@vellumai/assistant 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +82 -21
- package/package.json +1 -1
- package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +16 -0
- package/src/__tests__/app-git-history.test.ts +22 -27
- package/src/__tests__/app-git-service.test.ts +44 -78
- package/src/__tests__/call-orchestrator.test.ts +321 -0
- package/src/__tests__/channel-approval-routes.test.ts +1267 -93
- package/src/__tests__/channel-approval.test.ts +2 -0
- package/src/__tests__/channel-approvals.test.ts +51 -2
- package/src/__tests__/channel-delivery-store.test.ts +130 -1
- package/src/__tests__/channel-guardian.test.ts +371 -1
- package/src/__tests__/config-schema.test.ts +1 -1
- package/src/__tests__/credential-security-invariants.test.ts +1 -0
- package/src/__tests__/daemon-lifecycle.test.ts +635 -0
- package/src/__tests__/daemon-server-session-init.test.ts +5 -0
- package/src/__tests__/gateway-only-enforcement.test.ts +106 -21
- package/src/__tests__/handlers-telegram-config.test.ts +82 -0
- package/src/__tests__/handlers-twilio-config.test.ts +738 -5
- package/src/__tests__/ingress-url-consistency.test.ts +64 -0
- package/src/__tests__/ipc-snapshot.test.ts +10 -0
- package/src/__tests__/run-orchestrator.test.ts +1 -1
- package/src/__tests__/secret-scanner.test.ts +223 -0
- package/src/__tests__/session-process-bridge.test.ts +2 -0
- package/src/__tests__/shell-parser-property.test.ts +357 -2
- package/src/__tests__/system-prompt.test.ts +25 -1
- package/src/__tests__/tool-executor-lifecycle-events.test.ts +34 -1
- package/src/__tests__/tool-permission-simulate-handler.test.ts +2 -2
- package/src/__tests__/user-reference.test.ts +68 -0
- package/src/calls/call-orchestrator.ts +63 -11
- package/src/calls/twilio-config.ts +10 -1
- package/src/calls/twilio-rest.ts +70 -0
- package/src/cli/map.ts +6 -0
- package/src/commands/__tests__/cc-command-registry.test.ts +67 -0
- package/src/commands/cc-command-registry.ts +14 -1
- package/src/config/bundled-skills/claude-code/TOOLS.json +10 -3
- package/src/config/bundled-skills/email-setup/SKILL.md +56 -0
- package/src/config/bundled-skills/messaging/SKILL.md +4 -0
- package/src/config/bundled-skills/subagent/SKILL.md +4 -0
- package/src/config/bundled-skills/subagent/TOOLS.json +4 -0
- package/src/config/defaults.ts +1 -1
- package/src/config/schema.ts +6 -3
- package/src/config/skills.ts +5 -32
- package/src/config/system-prompt.ts +16 -0
- package/src/config/user-reference.ts +29 -0
- package/src/config/vellum-skills/catalog.json +52 -0
- package/src/config/vellum-skills/telegram-setup/SKILL.md +6 -1
- package/src/config/vellum-skills/twilio-setup/SKILL.md +49 -4
- package/src/daemon/auth-manager.ts +103 -0
- package/src/daemon/computer-use-session.ts +8 -1
- package/src/daemon/config-watcher.ts +253 -0
- package/src/daemon/handlers/config.ts +193 -17
- package/src/daemon/handlers/sessions.ts +5 -3
- package/src/daemon/handlers/skills.ts +60 -17
- package/src/daemon/ipc-contract-inventory.json +4 -0
- package/src/daemon/ipc-contract.ts +16 -0
- package/src/daemon/ipc-handler.ts +87 -0
- package/src/daemon/lifecycle.ts +16 -4
- package/src/daemon/ride-shotgun-handler.ts +11 -1
- package/src/daemon/server.ts +105 -502
- package/src/daemon/session-agent-loop.ts +9 -14
- package/src/daemon/session-process.ts +20 -3
- package/src/daemon/session-runtime-assembly.ts +60 -44
- package/src/daemon/session-slash.ts +50 -2
- package/src/daemon/session-surfaces.ts +17 -1
- package/src/daemon/session.ts +8 -1
- package/src/inbound/public-ingress-urls.ts +20 -3
- package/src/index.ts +1 -23
- package/src/memory/app-git-service.ts +24 -0
- package/src/memory/app-store.ts +0 -21
- package/src/memory/channel-delivery-store.ts +74 -3
- package/src/memory/channel-guardian-store.ts +54 -26
- package/src/memory/conversation-key-store.ts +20 -0
- package/src/memory/conversation-store.ts +14 -2
- package/src/memory/db-connection.ts +28 -0
- package/src/memory/db-init.ts +1019 -0
- package/src/memory/db.ts +2 -1995
- package/src/memory/embedding-backend.ts +79 -11
- package/src/memory/indexer.ts +2 -0
- package/src/memory/job-utils.ts +64 -4
- package/src/memory/jobs-worker.ts +7 -1
- package/src/memory/recall-cache.ts +107 -0
- package/src/memory/retriever.ts +30 -1
- package/src/memory/schema-migration.ts +984 -0
- package/src/memory/schema.ts +6 -0
- package/src/memory/search/types.ts +2 -0
- package/src/permissions/prompter.ts +14 -3
- package/src/permissions/trust-store.ts +7 -0
- package/src/runtime/channel-approvals.ts +17 -3
- package/src/runtime/gateway-client.ts +2 -1
- package/src/runtime/http-server.ts +28 -9
- package/src/runtime/routes/channel-routes.ts +279 -100
- package/src/runtime/routes/run-routes.ts +7 -1
- package/src/runtime/run-orchestrator.ts +8 -1
- package/src/security/secret-scanner.ts +218 -0
- package/src/skills/clawhub.ts +6 -2
- package/src/skills/frontmatter.ts +63 -0
- package/src/skills/slash-commands.ts +23 -0
- package/src/skills/vellum-catalog-remote.ts +107 -0
- package/src/subagent/manager.ts +4 -1
- package/src/subagent/types.ts +2 -0
- package/src/tools/browser/auto-navigate.ts +132 -24
- package/src/tools/browser/browser-manager.ts +67 -61
- package/src/tools/claude-code/claude-code.ts +55 -3
- package/src/tools/executor.ts +10 -2
- package/src/tools/skills/vellum-catalog.ts +75 -127
- package/src/tools/subagent/spawn.ts +2 -0
- package/src/tools/terminal/parser.ts +21 -5
- package/src/util/platform.ts +8 -1
- package/src/util/retry.ts +4 -4
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
1
2
|
import type { AssistantConfig } from '../config/types.js';
|
|
2
3
|
import { getLogger } from '../util/logger.js';
|
|
3
4
|
import { GeminiEmbeddingBackend } from './embedding-gemini.js';
|
|
@@ -10,9 +11,41 @@ const log = getLogger('memory-embeddings');
|
|
|
10
11
|
/** Global cache of embedding backend instances, keyed by "provider:model". */
|
|
11
12
|
const backendCache = new Map<string, EmbeddingBackend>();
|
|
12
13
|
|
|
13
|
-
|
|
14
|
+
// ── In-memory embedding vector cache ──────────────────────────────
|
|
15
|
+
// LRU cache keyed by sha256(provider + model + text) → embedding vector.
|
|
16
|
+
// Avoids redundant API calls / local compute for identical content.
|
|
17
|
+
const VECTOR_CACHE_MAX_ENTRIES = 4096;
|
|
18
|
+
const vectorCache = new Map<string, number[]>();
|
|
19
|
+
|
|
20
|
+
function vectorCacheKey(provider: string, model: string, text: string): string {
|
|
21
|
+
return createHash('sha256').update(`${provider}\0${model}\0${text}`).digest('hex');
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function getFromVectorCache(provider: string, model: string, text: string): number[] | undefined {
|
|
25
|
+
const key = vectorCacheKey(provider, model, text);
|
|
26
|
+
const v = vectorCache.get(key);
|
|
27
|
+
if (v !== undefined) {
|
|
28
|
+
// LRU refresh: move to end of insertion order
|
|
29
|
+
vectorCache.delete(key);
|
|
30
|
+
vectorCache.set(key, v);
|
|
31
|
+
}
|
|
32
|
+
return v;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function putInVectorCache(provider: string, model: string, text: string, vector: number[]): void {
|
|
36
|
+
const key = vectorCacheKey(provider, model, text);
|
|
37
|
+
vectorCache.delete(key);
|
|
38
|
+
if (vectorCache.size >= VECTOR_CACHE_MAX_ENTRIES) {
|
|
39
|
+
const oldest = vectorCache.keys().next().value;
|
|
40
|
+
if (oldest !== undefined) vectorCache.delete(oldest);
|
|
41
|
+
}
|
|
42
|
+
vectorCache.set(key, vector);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** Clear cached embedding backends and the in-memory vector cache. */
|
|
14
46
|
export function clearEmbeddingBackendCache(): void {
|
|
15
47
|
backendCache.clear();
|
|
48
|
+
vectorCache.clear();
|
|
16
49
|
}
|
|
17
50
|
|
|
18
51
|
function cacheKey(provider: string, model: string): string {
|
|
@@ -153,22 +186,44 @@ export async function embedWithBackend(
|
|
|
153
186
|
throw new Error(selection.reason ?? 'No memory embedding backend configured');
|
|
154
187
|
}
|
|
155
188
|
|
|
156
|
-
|
|
157
|
-
const
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
189
|
+
const expectedDim = config.memory.qdrant.vectorSize;
|
|
190
|
+
const { provider: primaryProvider, model: primaryModel } = selection.backend;
|
|
191
|
+
|
|
192
|
+
// ── Build fallback backends list (needed for embed fallback) ──
|
|
193
|
+
const fallbacks: EmbeddingBackend[] =
|
|
194
|
+
config.memory.embeddings.provider === 'auto' && selection.backend.provider === 'local'
|
|
195
|
+
? selectFallbackBackends(config, 'local')
|
|
196
|
+
: [];
|
|
197
|
+
|
|
198
|
+
// ── In-memory cache check (primary provider only) ──────────────
|
|
199
|
+
const cached: (number[] | null)[] = texts.map(t => {
|
|
200
|
+
const v = getFromVectorCache(primaryProvider, primaryModel, t);
|
|
201
|
+
if (v && v.length === expectedDim) return v;
|
|
202
|
+
return null;
|
|
203
|
+
});
|
|
204
|
+
const uncachedIndices: number[] = [];
|
|
205
|
+
for (let i = 0; i < cached.length; i++) {
|
|
206
|
+
if (!cached[i]) uncachedIndices.push(i);
|
|
207
|
+
}
|
|
208
|
+
if (uncachedIndices.length === 0) {
|
|
209
|
+
return { provider: primaryProvider, model: primaryModel, vectors: cached as number[][] };
|
|
162
210
|
}
|
|
163
211
|
|
|
212
|
+
// ── Embed uncached texts ────────────────────────────────────────
|
|
213
|
+
const backends: EmbeddingBackend[] = [selection.backend, ...fallbacks];
|
|
214
|
+
|
|
164
215
|
let lastErr: unknown;
|
|
165
216
|
for (const backend of backends) {
|
|
217
|
+
const isPrimary = backend === selection.backend;
|
|
218
|
+
// For the primary backend, only embed uncached texts and merge with cached.
|
|
219
|
+
// For fallback backends, embed ALL texts since the cache was keyed to the primary.
|
|
220
|
+
const textsToEmbed = isPrimary ? uncachedIndices.map(i => texts[i]) : texts;
|
|
221
|
+
|
|
166
222
|
try {
|
|
167
|
-
const vectors = await backend.embed(
|
|
168
|
-
if (vectors.length !==
|
|
169
|
-
throw new Error(`Embedding backend returned ${vectors.length} vectors for ${
|
|
223
|
+
const vectors = await backend.embed(textsToEmbed, options);
|
|
224
|
+
if (vectors.length !== textsToEmbed.length) {
|
|
225
|
+
throw new Error(`Embedding backend returned ${vectors.length} vectors for ${textsToEmbed.length} texts`);
|
|
170
226
|
}
|
|
171
|
-
const expectedDim = config.memory.qdrant.vectorSize;
|
|
172
227
|
for (const vec of vectors) {
|
|
173
228
|
if (vec.length !== expectedDim) {
|
|
174
229
|
throw new Error(
|
|
@@ -176,6 +231,19 @@ export async function embedWithBackend(
|
|
|
176
231
|
);
|
|
177
232
|
}
|
|
178
233
|
}
|
|
234
|
+
|
|
235
|
+
// Populate cache with freshly embedded vectors
|
|
236
|
+
for (let i = 0; i < textsToEmbed.length; i++) {
|
|
237
|
+
putInVectorCache(backend.provider, backend.model, textsToEmbed[i], vectors[i]);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if (isPrimary) {
|
|
241
|
+
const merged = [...cached] as number[][];
|
|
242
|
+
for (let i = 0; i < uncachedIndices.length; i++) {
|
|
243
|
+
merged[uncachedIndices[i]] = vectors[i];
|
|
244
|
+
}
|
|
245
|
+
return { provider: backend.provider, model: backend.model, vectors: merged };
|
|
246
|
+
}
|
|
179
247
|
return { provider: backend.provider, model: backend.model, vectors };
|
|
180
248
|
} catch (err) {
|
|
181
249
|
lastErr = err;
|
package/src/memory/indexer.ts
CHANGED
|
@@ -7,6 +7,7 @@ import { getDb } from './db.js';
|
|
|
7
7
|
import { enqueueMemoryJob, enqueueResolvePendingConflictsForMessageJob } from './jobs-store.js';
|
|
8
8
|
import { extractTextFromStoredMessageContent } from './message-content.js';
|
|
9
9
|
import { segmentText } from './segmenter.js';
|
|
10
|
+
import { bumpMemoryVersion } from './recall-cache.js';
|
|
10
11
|
import { memorySegments } from './schema.js';
|
|
11
12
|
|
|
12
13
|
const log = getLogger('memory-indexer');
|
|
@@ -108,6 +109,7 @@ export function indexMessageNow(
|
|
|
108
109
|
log.debug(`Skipped ${skippedEmbedJobs}/${segments.length} embed_segment jobs (content unchanged)`);
|
|
109
110
|
}
|
|
110
111
|
|
|
112
|
+
bumpMemoryVersion();
|
|
111
113
|
enqueueSummaryRollupJobsIfDue();
|
|
112
114
|
|
|
113
115
|
const enqueuedJobs = (segments.length - skippedEmbedJobs) + (shouldExtract ? 2 : 1) + (shouldResolveConflicts ? 1 : 0);
|
package/src/memory/job-utils.ts
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
|
+
import { createHash, randomUUID } from 'node:crypto';
|
|
2
|
+
import { eq, and } from 'drizzle-orm';
|
|
1
3
|
import { getLogger } from '../util/logger.js';
|
|
2
4
|
import { embedWithBackend, getMemoryBackendStatus } from './embedding-backend.js';
|
|
5
|
+
import { getDb } from './db.js';
|
|
3
6
|
import { getQdrantClient } from './qdrant-client.js';
|
|
7
|
+
import { memoryEmbeddings } from './schema.js';
|
|
4
8
|
import type { AssistantConfig } from '../config/types.js';
|
|
5
9
|
|
|
6
10
|
const log = getLogger('memory-jobs-worker');
|
|
@@ -111,9 +115,66 @@ export async function embedAndUpsert(
|
|
|
111
115
|
);
|
|
112
116
|
}
|
|
113
117
|
|
|
114
|
-
const
|
|
115
|
-
|
|
116
|
-
|
|
118
|
+
const contentHash = createHash('sha256').update(text).digest('hex');
|
|
119
|
+
let provider = status.provider;
|
|
120
|
+
let model = status.model!;
|
|
121
|
+
let vector: number[];
|
|
122
|
+
|
|
123
|
+
// Check SQLite embedding cache for a matching content hash (primary provider only).
|
|
124
|
+
const db = getDb();
|
|
125
|
+
const expectedDim = config.memory.qdrant.vectorSize;
|
|
126
|
+
let cachedRow = db
|
|
127
|
+
.select({ vectorJson: memoryEmbeddings.vectorJson, dimensions: memoryEmbeddings.dimensions })
|
|
128
|
+
.from(memoryEmbeddings)
|
|
129
|
+
.where(
|
|
130
|
+
and(
|
|
131
|
+
eq(memoryEmbeddings.contentHash, contentHash),
|
|
132
|
+
eq(memoryEmbeddings.provider, provider),
|
|
133
|
+
eq(memoryEmbeddings.model, model),
|
|
134
|
+
),
|
|
135
|
+
)
|
|
136
|
+
.get();
|
|
137
|
+
if (cachedRow && cachedRow.dimensions !== expectedDim) cachedRow = undefined;
|
|
138
|
+
|
|
139
|
+
if (cachedRow) {
|
|
140
|
+
vector = JSON.parse(cachedRow.vectorJson);
|
|
141
|
+
} else {
|
|
142
|
+
const embedded = await embedWithBackend(config, [text]);
|
|
143
|
+
vector = embedded.vectors[0];
|
|
144
|
+
if (!vector) return;
|
|
145
|
+
provider = embedded.provider;
|
|
146
|
+
model = embedded.model;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Persist embedding in SQLite for cross-restart cache
|
|
150
|
+
const now = Date.now();
|
|
151
|
+
try {
|
|
152
|
+
db.insert(memoryEmbeddings)
|
|
153
|
+
.values({
|
|
154
|
+
id: randomUUID(),
|
|
155
|
+
targetType,
|
|
156
|
+
targetId,
|
|
157
|
+
provider,
|
|
158
|
+
model,
|
|
159
|
+
dimensions: vector.length,
|
|
160
|
+
vectorJson: JSON.stringify(vector),
|
|
161
|
+
contentHash,
|
|
162
|
+
createdAt: now,
|
|
163
|
+
updatedAt: now,
|
|
164
|
+
})
|
|
165
|
+
.onConflictDoUpdate({
|
|
166
|
+
target: [memoryEmbeddings.targetType, memoryEmbeddings.targetId, memoryEmbeddings.provider, memoryEmbeddings.model],
|
|
167
|
+
set: {
|
|
168
|
+
vectorJson: JSON.stringify(vector),
|
|
169
|
+
dimensions: vector.length,
|
|
170
|
+
contentHash,
|
|
171
|
+
updatedAt: now,
|
|
172
|
+
},
|
|
173
|
+
})
|
|
174
|
+
.run();
|
|
175
|
+
} catch (err) {
|
|
176
|
+
log.warn({ err, targetType, targetId }, 'Failed to write embedding cache');
|
|
177
|
+
}
|
|
117
178
|
|
|
118
179
|
let qdrant;
|
|
119
180
|
try {
|
|
@@ -123,7 +184,6 @@ export async function embedAndUpsert(
|
|
|
123
184
|
}
|
|
124
185
|
|
|
125
186
|
try {
|
|
126
|
-
const now = Date.now();
|
|
127
187
|
await qdrant.upsert(targetType, targetId, vector, {
|
|
128
188
|
text,
|
|
129
189
|
created_at: (extraPayload?.created_at as number) ?? now,
|
|
@@ -18,6 +18,7 @@ import {
|
|
|
18
18
|
retryDelayForAttempt,
|
|
19
19
|
RETRY_MAX_ATTEMPTS,
|
|
20
20
|
} from './job-utils.js';
|
|
21
|
+
import { bumpMemoryVersion } from './recall-cache.js';
|
|
21
22
|
|
|
22
23
|
// ── Per-job-type handlers ──────────────────────────────────────────
|
|
23
24
|
|
|
@@ -121,9 +122,14 @@ export async function runMemoryJobsOnce(
|
|
|
121
122
|
try {
|
|
122
123
|
await processJob(job, config);
|
|
123
124
|
completeMemoryJob(job.id);
|
|
125
|
+
bumpMemoryVersion();
|
|
124
126
|
groupProcessed += 1;
|
|
125
127
|
} catch (err) {
|
|
126
|
-
|
|
128
|
+
try {
|
|
129
|
+
handleJobError(job, err);
|
|
130
|
+
} catch (handlerErr) {
|
|
131
|
+
log.error({ err: handlerErr, jobId: job.id, type: job.type }, 'handleJobError itself threw, job left in running status');
|
|
132
|
+
}
|
|
127
133
|
}
|
|
128
134
|
}
|
|
129
135
|
return groupProcessed;
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { createHash } from 'crypto';
|
|
2
|
+
import type { MemoryRecallResult, MemoryRecallOptions } from './search/types.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* In-memory cache for memory recall results.
|
|
6
|
+
*
|
|
7
|
+
* The full retrieval pipeline (FTS5 + Qdrant + entity graph + RRF merge) is
|
|
8
|
+
* expensive. When the same query is issued multiple turns in a row (common
|
|
9
|
+
* when the conversation context hasn't changed), we can serve the cached
|
|
10
|
+
* result instantly.
|
|
11
|
+
*
|
|
12
|
+
* Invalidation: a monotonic version counter is bumped whenever new memory
|
|
13
|
+
* is indexed (segments, items, embeddings). Cache entries are only valid
|
|
14
|
+
* when their version matches the current global version.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
interface CacheEntry {
|
|
18
|
+
version: number;
|
|
19
|
+
createdAt: number;
|
|
20
|
+
result: MemoryRecallResult;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const MAX_ENTRIES = 32;
|
|
24
|
+
const TTL_MS = 60_000; // 60 seconds
|
|
25
|
+
|
|
26
|
+
let _version = 0;
|
|
27
|
+
const _cache = new Map<string, CacheEntry>();
|
|
28
|
+
|
|
29
|
+
/** Bump the global memory version, invalidating all cached recall results. */
|
|
30
|
+
export function bumpMemoryVersion(): void {
|
|
31
|
+
_version++;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/** Return the current memory version (for snapshot-based staleness checks). */
|
|
35
|
+
export function getMemoryVersion(): number {
|
|
36
|
+
return _version;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** Build a deterministic cache key from the recall inputs. */
|
|
40
|
+
function buildCacheKey(
|
|
41
|
+
query: string,
|
|
42
|
+
conversationId: string,
|
|
43
|
+
options?: MemoryRecallOptions,
|
|
44
|
+
): string {
|
|
45
|
+
const parts = [
|
|
46
|
+
query,
|
|
47
|
+
conversationId,
|
|
48
|
+
options?.scopeId ?? '',
|
|
49
|
+
options?.scopePolicyOverride
|
|
50
|
+
? `${options.scopePolicyOverride.scopeId}:${options.scopePolicyOverride.fallbackToDefault}`
|
|
51
|
+
: '',
|
|
52
|
+
options?.excludeMessageIds ? [...options.excludeMessageIds].sort().join(',') : '',
|
|
53
|
+
options?.maxInjectTokensOverride != null ? String(options.maxInjectTokensOverride) : '',
|
|
54
|
+
];
|
|
55
|
+
return createHash('sha256').update(parts.join('\0')).digest('hex');
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** Look up a cached recall result. Returns undefined on miss or stale entry. */
|
|
59
|
+
export function getCachedRecall(
|
|
60
|
+
query: string,
|
|
61
|
+
conversationId: string,
|
|
62
|
+
options?: MemoryRecallOptions,
|
|
63
|
+
): MemoryRecallResult | undefined {
|
|
64
|
+
const key = buildCacheKey(query, conversationId, options);
|
|
65
|
+
const entry = _cache.get(key);
|
|
66
|
+
if (!entry) return undefined;
|
|
67
|
+
if (entry.version !== _version || Date.now() - entry.createdAt > TTL_MS) {
|
|
68
|
+
_cache.delete(key);
|
|
69
|
+
return undefined;
|
|
70
|
+
}
|
|
71
|
+
return entry.result;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Store a recall result in the cache. Evicts oldest entries when full.
|
|
76
|
+
*
|
|
77
|
+
* When `snapshotVersion` is provided, the entry is only stored if the
|
|
78
|
+
* snapshot still matches the current global version — this prevents a
|
|
79
|
+
* stale result from being cached under a version that was bumped while
|
|
80
|
+
* the retrieval pipeline was in flight.
|
|
81
|
+
*/
|
|
82
|
+
export function setCachedRecall(
|
|
83
|
+
query: string,
|
|
84
|
+
conversationId: string,
|
|
85
|
+
options: MemoryRecallOptions | undefined,
|
|
86
|
+
result: MemoryRecallResult,
|
|
87
|
+
snapshotVersion?: number,
|
|
88
|
+
): void {
|
|
89
|
+
// If a snapshot version was provided, only cache when it still matches
|
|
90
|
+
// the current version — otherwise the result may be stale.
|
|
91
|
+
if (snapshotVersion !== undefined && snapshotVersion !== _version) return;
|
|
92
|
+
|
|
93
|
+
const key = buildCacheKey(query, conversationId, options);
|
|
94
|
+
|
|
95
|
+
// Evict oldest entries if at capacity
|
|
96
|
+
if (_cache.size >= MAX_ENTRIES && !_cache.has(key)) {
|
|
97
|
+
const oldest = _cache.keys().next().value;
|
|
98
|
+
if (oldest !== undefined) _cache.delete(oldest);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
_cache.set(key, { version: _version, createdAt: Date.now(), result });
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/** Clear the entire cache (useful for testing). */
|
|
105
|
+
export function clearRecallCache(): void {
|
|
106
|
+
_cache.clear();
|
|
107
|
+
}
|
package/src/memory/retriever.ts
CHANGED
|
@@ -19,6 +19,7 @@ import { semanticSearch, isQdrantConnectionError } from './search/semantic.js';
|
|
|
19
19
|
import { entitySearch } from './search/entity.js';
|
|
20
20
|
import { mergeCandidates, applySourceCaps, rerankWithLLM, trimToTokenBudget, markItemUsage } from './search/ranking.js';
|
|
21
21
|
import { buildInjectedText, MEMORY_CONTEXT_ACK } from './search/formatting.js';
|
|
22
|
+
import { getCachedRecall, setCachedRecall, getMemoryVersion } from './recall-cache.js';
|
|
22
23
|
|
|
23
24
|
// Re-export public types and functions so existing importers continue to work
|
|
24
25
|
export type {
|
|
@@ -161,10 +162,12 @@ async function collectAndMergeCandidates(
|
|
|
161
162
|
|
|
162
163
|
// -- Phase 2: expensive searches (skipped on early termination) --
|
|
163
164
|
let semantic: Candidate[] = [];
|
|
165
|
+
let semanticSearchFailed = false;
|
|
164
166
|
if (queryVector && !canTerminateEarly) {
|
|
165
167
|
try {
|
|
166
168
|
semantic = await semanticSearch(queryVector, opts?.provider ?? 'unknown', opts?.model ?? 'unknown', config.memory.retrieval.semanticTopK, excludeMessageIds, scopeIds);
|
|
167
169
|
} catch (err) {
|
|
170
|
+
semanticSearchFailed = true;
|
|
168
171
|
if (isQdrantConnectionError(err)) {
|
|
169
172
|
log.warn({ err }, 'Qdrant is unavailable — semantic search disabled, memory recall will be degraded');
|
|
170
173
|
} else {
|
|
@@ -214,6 +217,7 @@ async function collectAndMergeCandidates(
|
|
|
214
217
|
relationNeighborEntityCount,
|
|
215
218
|
relationExpandedItemCount,
|
|
216
219
|
earlyTerminated: canTerminateEarly,
|
|
220
|
+
semanticSearchFailed,
|
|
217
221
|
merged,
|
|
218
222
|
};
|
|
219
223
|
}
|
|
@@ -225,6 +229,7 @@ export async function buildMemoryRecall(
|
|
|
225
229
|
options?: MemoryRecallOptions,
|
|
226
230
|
): Promise<MemoryRecallResult> {
|
|
227
231
|
const start = Date.now();
|
|
232
|
+
const versionSnapshot = getMemoryVersion();
|
|
228
233
|
const excludeMessageIds = options?.excludeMessageIds?.filter((id) => id.length > 0) ?? [];
|
|
229
234
|
const signal = options?.signal;
|
|
230
235
|
if (!config.memory.enabled) {
|
|
@@ -234,6 +239,14 @@ export async function buildMemoryRecall(
|
|
|
234
239
|
return emptyResult({ enabled: true, degraded: false, reason: 'memory.aborted', latencyMs: Date.now() - start });
|
|
235
240
|
}
|
|
236
241
|
|
|
242
|
+
// Check recall cache — serves identical results instantly when the query
|
|
243
|
+
// and memory state haven't changed since the last recall.
|
|
244
|
+
const cached = getCachedRecall(query, conversationId, options);
|
|
245
|
+
if (cached) {
|
|
246
|
+
log.debug({ query: truncate(query, 120), latencyMs: Date.now() - start }, 'Memory recall served from cache');
|
|
247
|
+
return { ...cached, latencyMs: Date.now() - start };
|
|
248
|
+
}
|
|
249
|
+
|
|
237
250
|
const backendStatus = getMemoryBackendStatus(config);
|
|
238
251
|
let queryVector: number[] | null = null;
|
|
239
252
|
let provider: string | undefined;
|
|
@@ -326,7 +339,15 @@ export async function buildMemoryRecall(
|
|
|
326
339
|
relationNeighborEntityCount,
|
|
327
340
|
relationExpandedItemCount,
|
|
328
341
|
earlyTerminated,
|
|
342
|
+
semanticSearchFailed,
|
|
329
343
|
} = collected;
|
|
344
|
+
|
|
345
|
+
// Mark as degraded when semantic search failed — the recall is based on
|
|
346
|
+
// lexical/recency only and should not be cached.
|
|
347
|
+
if (semanticSearchFailed) {
|
|
348
|
+
degraded = true;
|
|
349
|
+
reason = reason ?? 'memory.semantic_search_failure';
|
|
350
|
+
}
|
|
330
351
|
let merged = applySourceCaps(collected.merged, config);
|
|
331
352
|
|
|
332
353
|
// LLM re-ranking: send top candidates to Haiku for relevance scoring
|
|
@@ -395,7 +416,7 @@ export async function buildMemoryRecall(
|
|
|
395
416
|
latencyMs,
|
|
396
417
|
}, 'Memory recall completed');
|
|
397
418
|
|
|
398
|
-
|
|
419
|
+
const result: MemoryRecallResult = {
|
|
399
420
|
enabled: true,
|
|
400
421
|
degraded,
|
|
401
422
|
reason,
|
|
@@ -418,6 +439,14 @@ export async function buildMemoryRecall(
|
|
|
418
439
|
latencyMs,
|
|
419
440
|
topCandidates,
|
|
420
441
|
};
|
|
442
|
+
|
|
443
|
+
// Only cache non-degraded results — degraded results (e.g. lexical-only
|
|
444
|
+
// fallback when embeddings fail) would delay quality recovery once the
|
|
445
|
+
// embedding backend comes back.
|
|
446
|
+
if (!result.degraded) {
|
|
447
|
+
setCachedRecall(query, conversationId, options, result, versionSnapshot);
|
|
448
|
+
}
|
|
449
|
+
return result;
|
|
421
450
|
}
|
|
422
451
|
|
|
423
452
|
export function stripMemoryRecallMessages<T extends { role: 'user' | 'assistant'; content: Array<{ type: string; text?: string }> }>(
|