@vellumai/assistant 0.7.3 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +29 -28
- package/Dockerfile +1 -0
- package/__tests__/permissions/gateway-threshold-reader.test.ts +236 -9
- package/bun.lock +3 -0
- package/knip.json +1 -0
- package/node_modules/@vellumai/ipc-server-utils/bun.lock +24 -0
- package/node_modules/@vellumai/ipc-server-utils/package.json +18 -0
- package/node_modules/@vellumai/ipc-server-utils/src/index.ts +6 -0
- package/node_modules/@vellumai/ipc-server-utils/src/socket-watchdog.test.ts +430 -0
- package/node_modules/@vellumai/ipc-server-utils/src/socket-watchdog.ts +221 -0
- package/node_modules/@vellumai/ipc-server-utils/tsconfig.json +20 -0
- package/openapi.yaml +22 -4
- package/package.json +3 -1
- package/src/__tests__/annotate-risk-options.test.ts +291 -0
- package/src/__tests__/approval-cascade.test.ts +8 -16
- package/src/__tests__/approval-routes-http.test.ts +6 -0
- package/src/__tests__/auto-analysis-end-to-end.test.ts +12 -25
- package/src/__tests__/call-constants.test.ts +10 -1
- package/src/__tests__/call-controller.test.ts +127 -0
- package/src/__tests__/cli-memory-v2-reembed-skills.test.ts +58 -28
- package/src/__tests__/config-loader-platform-defaults.test.ts +284 -1
- package/src/__tests__/context-search-memory-source.test.ts +3 -26
- package/src/__tests__/context-search-pkb-source.test.ts +12 -6
- package/src/__tests__/conversation-abort-tool-results.test.ts +1 -6
- package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +1 -1
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +1 -1
- package/src/__tests__/conversation-agent-loop.test.ts +3 -3
- package/src/__tests__/conversation-confirmation-signals.test.ts +5 -13
- package/src/__tests__/conversation-init.benchmark.test.ts +1 -1
- package/src/__tests__/conversation-process-callsite.test.ts +1 -6
- package/src/__tests__/conversation-provider-retry-repair.test.ts +1 -6
- package/src/__tests__/conversation-runtime-assembly.test.ts +15 -6
- package/src/__tests__/conversation-slash-unknown.test.ts +1 -6
- package/src/__tests__/conversation-surfaces-action-delivery.test.ts +170 -9
- package/src/__tests__/conversation-surfaces-data-persist.test.ts +73 -1
- package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +59 -0
- package/src/__tests__/conversation-workspace-injection.test.ts +1 -7
- package/src/__tests__/conversation-workspace-tool-tracking.test.ts +1 -7
- package/src/__tests__/filing-service.test.ts +2 -19
- package/src/__tests__/handlers-skills-memory-v2-reseed.test.ts +10 -26
- package/src/__tests__/injector-chain.test.ts +24 -16
- package/src/__tests__/injector-pkb-v2-silenced.test.ts +10 -7
- package/src/__tests__/lifecycle-memory-v2-seed.test.ts +154 -67
- package/src/__tests__/notification-decision-fallback.test.ts +91 -0
- package/src/__tests__/notification-decision-strategy.test.ts +22 -0
- package/src/__tests__/oauth-cli.test.ts +121 -0
- package/src/__tests__/relay-server.test.ts +46 -2
- package/src/__tests__/secret-prompt-log-hygiene.test.ts +7 -5
- package/src/__tests__/secret-prompter-channel-fallback.test.ts +7 -5
- package/src/__tests__/secret-response-routing.test.ts +7 -5
- package/src/__tests__/server-history-render.test.ts +82 -0
- package/src/__tests__/skill-include-graph.test.ts +31 -0
- package/src/__tests__/skill-load-tool.test.ts +44 -16
- package/src/__tests__/skills.test.ts +39 -0
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -42
- package/src/__tests__/tool-executor.test.ts +155 -0
- package/src/__tests__/voice-session-bridge.test.ts +3 -0
- package/src/__tests__/workspace-migration-069-seed-onboarding-threads.test.ts +120 -0
- package/src/__tests__/workspace-migration-071-remove-safe-storage-release-note.test.ts +206 -0
- package/src/__tests__/workspace-migration-safe-storage-limits-release.test.ts +15 -27
- package/src/agent/loop.ts +11 -0
- package/src/approvals/guardian-decision-primitive.ts +0 -13
- package/src/approvals/guardian-request-resolvers.ts +4 -32
- package/src/calls/call-constants.ts +5 -8
- package/src/calls/call-controller.ts +130 -67
- package/src/calls/relay-server.ts +7 -1
- package/src/calls/voice-session-bridge.ts +1 -1
- package/src/cli/commands/memory-v2.ts +7 -7
- package/src/cli/commands/oauth/__tests__/connect.test.ts +0 -254
- package/src/cli/commands/oauth/connect.ts +10 -52
- package/src/config/bundled-skills/app-builder/SKILL.md +1 -3
- package/src/config/feature-flag-registry.json +1 -17
- package/src/config/loader.ts +72 -19
- package/src/config/schemas/memory-v2.ts +1 -1
- package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +32 -0
- package/src/daemon/conversation-agent-loop-handlers.ts +32 -0
- package/src/daemon/conversation-agent-loop.ts +13 -10
- package/src/daemon/conversation-lifecycle.ts +22 -8
- package/src/daemon/conversation-surfaces.ts +16 -14
- package/src/daemon/conversation-tool-setup.ts +9 -5
- package/src/daemon/conversation.ts +1 -1
- package/src/daemon/handlers/shared.ts +26 -0
- package/src/daemon/host-bash-proxy.ts +1 -1
- package/src/daemon/host-browser-proxy.ts +1 -1
- package/src/daemon/host-cu-proxy.ts +1 -1
- package/src/daemon/host-file-proxy.ts +1 -1
- package/src/daemon/host-transfer-proxy.ts +2 -2
- package/src/daemon/lifecycle.ts +88 -73
- package/src/daemon/memory-v2-startup.ts +55 -14
- package/src/daemon/message-types/messages.ts +19 -1
- package/src/documents/document-store.ts +35 -1
- package/src/filing/filing-service.ts +2 -3
- package/src/heartbeat/heartbeat-service.ts +1 -1
- package/src/ipc/assistant-server.ts +93 -36
- package/src/ipc/skill-server.ts +99 -42
- package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +10 -57
- package/src/memory/context-search/sources/memory-v2.ts +1 -17
- package/src/memory/context-search/sources/memory.ts +2 -2
- package/src/memory/context-search/sources/pkb.ts +2 -3
- package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +104 -61
- package/src/memory/graph/__tests__/handle-remember-v2.test.ts +11 -26
- package/src/memory/graph/conversation-graph-memory.ts +32 -9
- package/src/memory/graph/graph-search.test.ts +6 -5
- package/src/memory/graph/graph-search.ts +3 -4
- package/src/memory/graph/retriever.test.ts +12 -7
- package/src/memory/graph/retriever.ts +4 -5
- package/src/memory/graph/tool-handlers.ts +3 -4
- package/src/memory/graph/tools.ts +4 -4
- package/src/memory/indexer.ts +1 -2
- package/src/memory/jobs/__tests__/embed-concept-page.test.ts +116 -0
- package/src/memory/jobs/embed-concept-page.ts +223 -87
- package/src/memory/jobs-worker.ts +8 -4
- package/src/memory/pkb/pkb-search.test.ts +6 -5
- package/src/memory/pkb/pkb-search.ts +4 -5
- package/src/memory/qdrant-client.ts +3 -0
- package/src/memory/search/semantic.ts +4 -5
- package/src/memory/v2/__tests__/activation.test.ts +35 -5
- package/src/memory/v2/__tests__/consolidation-job.test.ts +21 -32
- package/src/memory/v2/__tests__/injection.test.ts +140 -23
- package/src/memory/v2/__tests__/qdrant.test.ts +310 -9
- package/src/memory/v2/__tests__/sim.test.ts +118 -7
- package/src/memory/v2/__tests__/static-context.test.ts +1 -13
- package/src/memory/v2/__tests__/sweep-job.test.ts +19 -33
- package/src/memory/v2/consolidation-job.ts +7 -8
- package/src/memory/v2/injection.ts +32 -12
- package/src/memory/v2/page-store.ts +39 -0
- package/src/memory/v2/prompts/consolidation.ts +5 -0
- package/src/memory/v2/qdrant.ts +209 -48
- package/src/memory/v2/sim.ts +67 -26
- package/src/memory/v2/static-context.ts +4 -8
- package/src/memory/v2/sweep-job.ts +5 -6
- package/src/memory/v2/types.ts +7 -0
- package/src/notifications/copy-composer.ts +46 -12
- package/src/notifications/decision-engine.ts +46 -0
- package/src/permissions/gateway-threshold-reader.ts +116 -8
- package/src/permissions/prompter.ts +86 -96
- package/src/permissions/secret-prompter.ts +31 -31
- package/src/plugins/defaults/injectors.ts +1 -2
- package/src/proactive-artifact/job.test.ts +51 -4
- package/src/proactive-artifact/job.ts +16 -2
- package/src/proactive-artifact/message-copy.ts +18 -1
- package/src/prompts/templates/SOUL.md +13 -28
- package/src/runtime/auth/route-policy.ts +1 -0
- package/src/runtime/channel-approvals.ts +3 -2
- package/src/runtime/guardian-reply-router.ts +0 -10
- package/src/runtime/pending-interactions.ts +19 -15
- package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +147 -0
- package/src/runtime/routes/approval-routes.ts +7 -3
- package/src/runtime/routes/consolidation-routes.ts +8 -9
- package/src/runtime/routes/conversation-query-routes.ts +44 -1
- package/src/runtime/routes/debug-bash-routes.ts +2 -0
- package/src/runtime/routes/filing-routes.ts +2 -3
- package/src/runtime/routes/inbound-stages/guardian-reply-intercept.ts +0 -3
- package/src/runtime/routes/memory-item-routes.test.ts +3 -9
- package/src/runtime/routes/memory-item-routes.ts +5 -6
- package/src/runtime/routes/memory-v2-routes.ts +103 -17
- package/src/skills/include-graph.ts +35 -13
- package/src/tools/document/document-tool.ts +20 -0
- package/src/tools/executor.ts +18 -2
- package/src/tools/memory/register.test.ts +7 -5
- package/src/tools/permission-checker.ts +15 -0
- package/src/tools/skills/load.ts +24 -20
- package/src/tools/tool-name-aliases.ts +19 -0
- package/src/tools/types.ts +19 -1
- package/src/workspace/migrations/067-release-notes-safe-storage-limits.ts +4 -62
- package/src/workspace/migrations/069-seed-onboarding-threads.ts +28 -0
- package/src/workspace/migrations/070-memory-v2-summary-schema-rebuild.ts +31 -0
- package/src/workspace/migrations/071-remove-safe-storage-release-note.ts +111 -0
- package/src/workspace/migrations/registry.ts +6 -0
|
@@ -86,6 +86,10 @@ const upsertCalls: Array<{
|
|
|
86
86
|
slug: string;
|
|
87
87
|
dense: number[];
|
|
88
88
|
sparse: { indices: number[]; values: number[] };
|
|
89
|
+
summary?: {
|
|
90
|
+
dense: number[];
|
|
91
|
+
sparse: { indices: number[]; values: number[] };
|
|
92
|
+
};
|
|
89
93
|
updatedAt: number;
|
|
90
94
|
}> = [];
|
|
91
95
|
|
|
@@ -96,6 +100,10 @@ mock.module("../../v2/qdrant.js", () => ({
|
|
|
96
100
|
slug: string;
|
|
97
101
|
dense: number[];
|
|
98
102
|
sparse: { indices: number[]; values: number[] };
|
|
103
|
+
summary?: {
|
|
104
|
+
dense: number[];
|
|
105
|
+
sparse: { indices: number[]; values: number[] };
|
|
106
|
+
};
|
|
99
107
|
updatedAt: number;
|
|
100
108
|
}) => {
|
|
101
109
|
upsertCalls.push(params);
|
|
@@ -242,6 +250,114 @@ describe("embedConceptPageJob — happy path", () => {
|
|
|
242
250
|
});
|
|
243
251
|
});
|
|
244
252
|
|
|
253
|
+
describe("embedConceptPageJob — summary embedding", () => {
|
|
254
|
+
test("embeds the summary when present and forwards summary vectors to upsert", async () => {
|
|
255
|
+
await writePage(tmpWorkspace, {
|
|
256
|
+
slug: "summarized-page",
|
|
257
|
+
frontmatter: {
|
|
258
|
+
edges: [],
|
|
259
|
+
ref_files: [],
|
|
260
|
+
summary: "A short prose summary that retrieval indexes separately.",
|
|
261
|
+
},
|
|
262
|
+
body: "Long-form body content.\n",
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
await embedConceptPageJob(
|
|
266
|
+
makeJob({ slug: "summarized-page" }),
|
|
267
|
+
TEST_CONFIG,
|
|
268
|
+
);
|
|
269
|
+
|
|
270
|
+
// Body and summary are batched into one backend call (saves a round-trip).
|
|
271
|
+
expect(embedWithBackendCalls).toHaveLength(1);
|
|
272
|
+
expect(embedWithBackendCalls[0].inputs).toHaveLength(2);
|
|
273
|
+
expect(upsertCalls).toHaveLength(1);
|
|
274
|
+
const call = upsertCalls[0];
|
|
275
|
+
expect(call.slug).toBe("summarized-page");
|
|
276
|
+
expect(call.dense).toEqual([0.1, 0.2, 0.3, 0.4]);
|
|
277
|
+
expect(call.sparse).toBeDefined();
|
|
278
|
+
expect(call.summary?.dense).toEqual([0.1, 0.2, 0.3, 0.4]);
|
|
279
|
+
expect(call.summary?.sparse).toBeDefined();
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
test("skips summary embedding when the page has no summary in frontmatter", async () => {
|
|
283
|
+
await writePage(tmpWorkspace, {
|
|
284
|
+
slug: "legacy-page",
|
|
285
|
+
frontmatter: { edges: [], ref_files: [] },
|
|
286
|
+
body: "Body only — no summary in frontmatter.\n",
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
await embedConceptPageJob(makeJob({ slug: "legacy-page" }), TEST_CONFIG);
|
|
290
|
+
|
|
291
|
+
// Only the body was embedded.
|
|
292
|
+
expect(embedWithBackendCalls).toHaveLength(1);
|
|
293
|
+
expect(upsertCalls).toHaveLength(1);
|
|
294
|
+
const call = upsertCalls[0];
|
|
295
|
+
expect(call.summary).toBeUndefined();
|
|
296
|
+
});
|
|
297
|
+
|
|
298
|
+
test("skips summary embedding when the summary is whitespace-only", async () => {
|
|
299
|
+
// Whitespace-only summaries (` `, `\n`) are equivalent to absent — the
|
|
300
|
+
// embedding backend would reject the empty input downstream anyway.
|
|
301
|
+
await writePage(tmpWorkspace, {
|
|
302
|
+
slug: "whitespace-summary",
|
|
303
|
+
frontmatter: {
|
|
304
|
+
edges: [],
|
|
305
|
+
ref_files: [],
|
|
306
|
+
summary: " ",
|
|
307
|
+
},
|
|
308
|
+
body: "Body content.\n",
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
await embedConceptPageJob(
|
|
312
|
+
makeJob({ slug: "whitespace-summary" }),
|
|
313
|
+
TEST_CONFIG,
|
|
314
|
+
);
|
|
315
|
+
|
|
316
|
+
expect(embedWithBackendCalls).toHaveLength(1);
|
|
317
|
+
expect(upsertCalls[0].summary).toBeUndefined();
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
test("body and summary cache rows are independent (summary edit doesn't invalidate body)", async () => {
|
|
321
|
+
// Write a page with a summary, run the job to prime caches.
|
|
322
|
+
await writePage(tmpWorkspace, {
|
|
323
|
+
slug: "cached-summary",
|
|
324
|
+
frontmatter: {
|
|
325
|
+
edges: [],
|
|
326
|
+
ref_files: [],
|
|
327
|
+
summary: "First version of the summary.",
|
|
328
|
+
},
|
|
329
|
+
body: "Stable body that never changes.\n",
|
|
330
|
+
});
|
|
331
|
+
await embedConceptPageJob(
|
|
332
|
+
makeJob({ slug: "cached-summary" }),
|
|
333
|
+
TEST_CONFIG,
|
|
334
|
+
);
|
|
335
|
+
// Body + summary batched into a single backend call on first run.
|
|
336
|
+
expect(embedWithBackendCalls).toHaveLength(1);
|
|
337
|
+
expect(embedWithBackendCalls[0].inputs).toHaveLength(2);
|
|
338
|
+
|
|
339
|
+
// Edit only the summary — body stays identical, only the summary text
|
|
340
|
+
// changes. Re-running the job should hit the body cache (no re-embed)
|
|
341
|
+
// but recompute the summary embedding.
|
|
342
|
+
await writePage(tmpWorkspace, {
|
|
343
|
+
slug: "cached-summary",
|
|
344
|
+
frontmatter: {
|
|
345
|
+
edges: [],
|
|
346
|
+
ref_files: [],
|
|
347
|
+
summary: "Second version of the summary, different wording.",
|
|
348
|
+
},
|
|
349
|
+
body: "Stable body that never changes.\n",
|
|
350
|
+
});
|
|
351
|
+
await embedConceptPageJob(
|
|
352
|
+
makeJob({ slug: "cached-summary" }),
|
|
353
|
+
TEST_CONFIG,
|
|
354
|
+
);
|
|
355
|
+
// One additional backend call with only the summary text — body hit the cache.
|
|
356
|
+
expect(embedWithBackendCalls).toHaveLength(2);
|
|
357
|
+
expect(embedWithBackendCalls[1].inputs).toHaveLength(1);
|
|
358
|
+
});
|
|
359
|
+
});
|
|
360
|
+
|
|
245
361
|
describe("embedConceptPageJob — cache hit", () => {
|
|
246
362
|
test("reuses the cached dense vector when content hash matches", async () => {
|
|
247
363
|
await writePage(tmpWorkspace, {
|
|
@@ -98,52 +98,93 @@ export async function embedConceptPageJob(
|
|
|
98
98
|
);
|
|
99
99
|
}
|
|
100
100
|
|
|
101
|
-
const contentHash = embeddingInputContentHash({ type: "text", text });
|
|
102
101
|
const expectedDim = config.memory.qdrant.vectorSize;
|
|
103
|
-
|
|
104
|
-
|
|
102
|
+
// The status provider is the cache lookup key for any prior row; the
|
|
103
|
+
// *actual* provider/model come back on the embedded result. They usually
|
|
104
|
+
// match, but a backend swap mid-run would surface here — body and summary
|
|
105
|
+
// are then re-embedded together so both rows write under the same identity.
|
|
106
|
+
const cacheProvider = status.provider;
|
|
107
|
+
const cacheModel = status.model!;
|
|
108
|
+
|
|
109
|
+
const db = getDb();
|
|
105
110
|
|
|
106
111
|
// Cache lookup: same (targetType, targetId, provider, model) row gets
|
|
107
112
|
// reused across runs as long as `contentHash` matches. The dim mismatch
|
|
108
113
|
// check guards against a config change (vectorSize bumped) since the last
|
|
109
|
-
// write — in that case we treat the row as stale and re-embed.
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
114
|
+
// write — in that case we treat the row as stale and re-embed. The body
|
|
115
|
+
// and (optional) summary share the same provider/model — but each gets
|
|
116
|
+
// its own cache row keyed by a distinct targetId so summary edits don't
|
|
117
|
+
// invalidate the body cache and vice versa.
|
|
118
|
+
const bodyContentHash = embeddingInputContentHash({ type: "text", text });
|
|
119
|
+
const bodyCache = readEmbeddingCache(
|
|
120
|
+
db,
|
|
121
|
+
slug,
|
|
122
|
+
cacheProvider,
|
|
123
|
+
cacheModel,
|
|
124
|
+
expectedDim,
|
|
125
|
+
);
|
|
126
|
+
const bodyCacheHit = bodyCache?.contentHash === bodyContentHash;
|
|
127
|
+
|
|
128
|
+
// Optional summary embedding — only when the page has a `summary` in its
|
|
129
|
+
// frontmatter. Pages without one fall back to body-only retrieval at
|
|
130
|
+
// query time (the activation pipeline reads the summary score as
|
|
131
|
+
// undefined and uses the body score directly).
|
|
132
|
+
const summaryText = page.frontmatter.summary?.trim() ?? "";
|
|
133
|
+
const hasSummary = summaryText.length > 0;
|
|
134
|
+
const summaryCacheId = `${slug}#summary`;
|
|
135
|
+
const summaryContentHash = hasSummary
|
|
136
|
+
? embeddingInputContentHash({ type: "text", text: summaryText })
|
|
137
|
+
: undefined;
|
|
138
|
+
const summaryCache = hasSummary
|
|
139
|
+
? readEmbeddingCache(
|
|
140
|
+
db,
|
|
141
|
+
summaryCacheId,
|
|
142
|
+
cacheProvider,
|
|
143
|
+
cacheModel,
|
|
144
|
+
expectedDim,
|
|
145
|
+
)
|
|
146
|
+
: null;
|
|
147
|
+
const summaryCacheHit =
|
|
148
|
+
hasSummary && summaryCache?.contentHash === summaryContentHash;
|
|
149
|
+
|
|
150
|
+
// Batch all cache misses into one `embedWithBackend` call. Each backend
|
|
151
|
+
// round-trip is the dominant cost — fresh body + fresh summary in a
|
|
152
|
+
// single batch saves a round-trip vs serial calls and gives both vectors
|
|
153
|
+
// the same provider/model regardless of any backend rotation mid-run.
|
|
154
|
+
type Slot = "body" | "summary";
|
|
155
|
+
const toEmbed: Array<{ type: "text"; text: string }> = [];
|
|
156
|
+
const slots: Slot[] = [];
|
|
157
|
+
if (!bodyCacheHit) {
|
|
158
|
+
toEmbed.push({ type: "text", text });
|
|
159
|
+
slots.push("body");
|
|
160
|
+
}
|
|
161
|
+
if (hasSummary && !summaryCacheHit) {
|
|
162
|
+
toEmbed.push({ type: "text", text: summaryText });
|
|
163
|
+
slots.push("summary");
|
|
145
164
|
}
|
|
146
165
|
|
|
166
|
+
let bodyDense: number[] | undefined = bodyCacheHit ? bodyCache!.dense : undefined;
|
|
167
|
+
let summaryDense: number[] | undefined = summaryCacheHit
|
|
168
|
+
? summaryCache!.dense
|
|
169
|
+
: undefined;
|
|
170
|
+
let writeProvider = cacheProvider;
|
|
171
|
+
let writeModel = cacheModel;
|
|
172
|
+
if (toEmbed.length > 0) {
|
|
173
|
+
const embedded = await embedWithBackend(config, toEmbed);
|
|
174
|
+
writeProvider = embedded.provider;
|
|
175
|
+
writeModel = embedded.model;
|
|
176
|
+
for (let i = 0; i < slots.length; i++) {
|
|
177
|
+
const vector = embedded.vectors[i];
|
|
178
|
+
if (!vector) continue;
|
|
179
|
+
if (slots[i] === "body") bodyDense = vector;
|
|
180
|
+
else summaryDense = vector;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
// Body embedding is the ground truth — without it the page can't surface.
|
|
184
|
+
// (Cache hit paths populate `bodyDense` above; a fresh embed that returned
|
|
185
|
+
// no vectors short-circuits here too.)
|
|
186
|
+
if (!bodyDense) return;
|
|
187
|
+
|
|
147
188
|
// Sparse is cheap (in-process tokenization) and changes any time the body
|
|
148
189
|
// changes, so we always recompute it rather than caching alongside dense.
|
|
149
190
|
// BM25 weights live on the doc side; queries embed binary occurrence in
|
|
@@ -151,57 +192,42 @@ export async function embedConceptPageJob(
|
|
|
151
192
|
// corpus for the first time), fall back to the legacy TF-only encoding —
|
|
152
193
|
// the next reembed pass overwrites the page once stats are available.
|
|
153
194
|
const corpusStats = getConceptPageCorpusStats();
|
|
154
|
-
const
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
195
|
+
const encodeSparse = (input: string) =>
|
|
196
|
+
corpusStats
|
|
197
|
+
? generateBm25DocEmbedding(input, corpusStats, {
|
|
198
|
+
k1: config.memory.v2.bm25_k1,
|
|
199
|
+
b: config.memory.v2.bm25_b,
|
|
200
|
+
})
|
|
201
|
+
: generateSparseEmbedding(input);
|
|
202
|
+
const sparse = encodeSparse(text);
|
|
203
|
+
const summarySparse = hasSummary ? encodeSparse(summaryText) : undefined;
|
|
160
204
|
|
|
161
205
|
const now = Date.now();
|
|
162
206
|
// Persist freshly embedded vectors for cross-restart reuse. On cache hit
|
|
163
207
|
// the existing row already has identical content + hash, so the write
|
|
164
208
|
// would be a no-op — skip it. Best-effort: write failure is not fatal,
|
|
165
209
|
// we still want the Qdrant upsert below to fire.
|
|
166
|
-
if (!
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
memoryEmbeddings.provider,
|
|
188
|
-
memoryEmbeddings.model,
|
|
189
|
-
],
|
|
190
|
-
set: {
|
|
191
|
-
vectorBlob: blobValue,
|
|
192
|
-
vectorJson: null,
|
|
193
|
-
dimensions: dense.length,
|
|
194
|
-
contentHash,
|
|
195
|
-
updatedAt: now,
|
|
196
|
-
},
|
|
197
|
-
})
|
|
198
|
-
.run();
|
|
199
|
-
} catch (err) {
|
|
200
|
-
log.warn(
|
|
201
|
-
{ err, slug },
|
|
202
|
-
"Failed to write concept-page embedding cache row",
|
|
203
|
-
);
|
|
204
|
-
}
|
|
210
|
+
if (!bodyCacheHit) {
|
|
211
|
+
writeEmbeddingCache(db, {
|
|
212
|
+
slug,
|
|
213
|
+
cacheId: slug,
|
|
214
|
+
dense: bodyDense,
|
|
215
|
+
contentHash: bodyContentHash,
|
|
216
|
+
provider: writeProvider,
|
|
217
|
+
model: writeModel,
|
|
218
|
+
now,
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
if (hasSummary && !summaryCacheHit && summaryDense && summaryContentHash) {
|
|
222
|
+
writeEmbeddingCache(db, {
|
|
223
|
+
slug,
|
|
224
|
+
cacheId: summaryCacheId,
|
|
225
|
+
dense: summaryDense,
|
|
226
|
+
contentHash: summaryContentHash,
|
|
227
|
+
provider: writeProvider,
|
|
228
|
+
model: writeModel,
|
|
229
|
+
now,
|
|
230
|
+
});
|
|
205
231
|
}
|
|
206
232
|
|
|
207
233
|
// Apply anisotropy correction at the boundary between the (raw) cached
|
|
@@ -210,19 +236,129 @@ export async function embedConceptPageJob(
|
|
|
210
236
|
// the cache survives and the (cheap) correction math reruns over each
|
|
211
237
|
// cached vector. Pass-through when no calibration is fit yet.
|
|
212
238
|
const correctedDense = await applyCorrectionIfCalibrated(
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
239
|
+
bodyDense,
|
|
240
|
+
writeProvider,
|
|
241
|
+
writeModel,
|
|
216
242
|
);
|
|
243
|
+
const correctedSummaryDense = summaryDense
|
|
244
|
+
? await applyCorrectionIfCalibrated(summaryDense, writeProvider, writeModel)
|
|
245
|
+
: undefined;
|
|
217
246
|
|
|
218
247
|
await upsertConceptPageEmbedding({
|
|
219
248
|
slug,
|
|
220
249
|
dense: correctedDense,
|
|
221
250
|
sparse,
|
|
251
|
+
summary:
|
|
252
|
+
correctedSummaryDense && summarySparse
|
|
253
|
+
? { dense: correctedSummaryDense, sparse: summarySparse }
|
|
254
|
+
: undefined,
|
|
222
255
|
updatedAt: now,
|
|
223
256
|
});
|
|
224
257
|
}
|
|
225
258
|
|
|
259
|
+
/** SQLite cache row shape returned by `readEmbeddingCache`. */
|
|
260
|
+
interface EmbeddingCacheEntry {
|
|
261
|
+
dense: number[];
|
|
262
|
+
contentHash: string;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Look up a cached dense vector keyed on `(targetType, targetId, provider,
|
|
267
|
+
* model)`. Returns the row only when the persisted dimensions match the
|
|
268
|
+
* configured expectation — a stale row from a previous `vectorSize` is
|
|
269
|
+
* treated as a cache miss so the caller re-embeds.
|
|
270
|
+
*/
|
|
271
|
+
function readEmbeddingCache(
|
|
272
|
+
db: ReturnType<typeof getDb>,
|
|
273
|
+
cacheId: string,
|
|
274
|
+
provider: string,
|
|
275
|
+
model: string,
|
|
276
|
+
expectedDim: number,
|
|
277
|
+
): EmbeddingCacheEntry | null {
|
|
278
|
+
const row = db
|
|
279
|
+
.select({
|
|
280
|
+
vectorBlob: memoryEmbeddings.vectorBlob,
|
|
281
|
+
vectorJson: memoryEmbeddings.vectorJson,
|
|
282
|
+
dimensions: memoryEmbeddings.dimensions,
|
|
283
|
+
contentHash: memoryEmbeddings.contentHash,
|
|
284
|
+
})
|
|
285
|
+
.from(memoryEmbeddings)
|
|
286
|
+
.where(
|
|
287
|
+
and(
|
|
288
|
+
eq(memoryEmbeddings.targetType, CONCEPT_PAGE_TARGET_TYPE),
|
|
289
|
+
eq(memoryEmbeddings.targetId, cacheId),
|
|
290
|
+
eq(memoryEmbeddings.provider, provider),
|
|
291
|
+
eq(memoryEmbeddings.model, model),
|
|
292
|
+
),
|
|
293
|
+
)
|
|
294
|
+
.get();
|
|
295
|
+
if (!row || row.dimensions !== expectedDim) return null;
|
|
296
|
+
// A row without a contentHash is a legacy/corrupt entry — treat as a miss
|
|
297
|
+
// and force a re-embed rather than misalign the cache key.
|
|
298
|
+
if (row.contentHash === null) return null;
|
|
299
|
+
const dense = row.vectorBlob
|
|
300
|
+
? blobToVector(row.vectorBlob as Buffer)
|
|
301
|
+
: (JSON.parse(row.vectorJson!) as number[]);
|
|
302
|
+
return { dense, contentHash: row.contentHash };
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Persist a freshly embedded dense vector in the SQLite cache. Best-effort:
|
|
307
|
+
* a write failure is logged and swallowed so the Qdrant upsert still runs.
|
|
308
|
+
*/
|
|
309
|
+
function writeEmbeddingCache(
|
|
310
|
+
db: ReturnType<typeof getDb>,
|
|
311
|
+
params: {
|
|
312
|
+
slug: string;
|
|
313
|
+
cacheId: string;
|
|
314
|
+
dense: number[];
|
|
315
|
+
contentHash: string;
|
|
316
|
+
provider: string;
|
|
317
|
+
model: string;
|
|
318
|
+
now: number;
|
|
319
|
+
},
|
|
320
|
+
): void {
|
|
321
|
+
const { slug, cacheId, dense, contentHash, provider, model, now } = params;
|
|
322
|
+
try {
|
|
323
|
+
const blobValue = vectorToBlob(dense);
|
|
324
|
+
db.insert(memoryEmbeddings)
|
|
325
|
+
.values({
|
|
326
|
+
id: randomUUID(),
|
|
327
|
+
targetType: CONCEPT_PAGE_TARGET_TYPE,
|
|
328
|
+
targetId: cacheId,
|
|
329
|
+
provider,
|
|
330
|
+
model,
|
|
331
|
+
dimensions: dense.length,
|
|
332
|
+
vectorBlob: blobValue,
|
|
333
|
+
vectorJson: null,
|
|
334
|
+
contentHash,
|
|
335
|
+
createdAt: now,
|
|
336
|
+
updatedAt: now,
|
|
337
|
+
})
|
|
338
|
+
.onConflictDoUpdate({
|
|
339
|
+
target: [
|
|
340
|
+
memoryEmbeddings.targetType,
|
|
341
|
+
memoryEmbeddings.targetId,
|
|
342
|
+
memoryEmbeddings.provider,
|
|
343
|
+
memoryEmbeddings.model,
|
|
344
|
+
],
|
|
345
|
+
set: {
|
|
346
|
+
vectorBlob: blobValue,
|
|
347
|
+
vectorJson: null,
|
|
348
|
+
dimensions: dense.length,
|
|
349
|
+
contentHash,
|
|
350
|
+
updatedAt: now,
|
|
351
|
+
},
|
|
352
|
+
})
|
|
353
|
+
.run();
|
|
354
|
+
} catch (err) {
|
|
355
|
+
log.warn(
|
|
356
|
+
{ err, slug, cacheId },
|
|
357
|
+
"Failed to write concept-page embedding cache row",
|
|
358
|
+
);
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
|
|
226
362
|
/**
|
|
227
363
|
* Enqueue an `embed_concept_page` job (async, fire-and-forget). Modeled on
|
|
228
364
|
* `enqueuePkbIndexJob` — callers that want a slug re-embedded after a write
|
|
@@ -11,7 +11,6 @@ import {
|
|
|
11
11
|
getLastScheduledCleanupEnqueueMs,
|
|
12
12
|
markScheduledCleanupEnqueued,
|
|
13
13
|
} from "./cleanup-schedule-state.js";
|
|
14
|
-
import { isMemoryV2ReadActive } from "./context-search/sources/memory-v2.js";
|
|
15
14
|
import { conversationAnalyzeJob } from "./conversation-analyze-job.js";
|
|
16
15
|
import { maybeRunDbMaintenance } from "./db-maintenance.js";
|
|
17
16
|
import { bootstrapFromHistory } from "./graph/bootstrap.js";
|
|
@@ -510,6 +509,11 @@ async function processJob(
|
|
|
510
509
|
await embedGraphTriggerJob(job, config);
|
|
511
510
|
return;
|
|
512
511
|
case "graph_extract":
|
|
512
|
+
// Stale rows enqueued before v2 was enabled (or by any unguarded v1
|
|
513
|
+
// path) must not consume embedding/extraction budget when v2 is on.
|
|
514
|
+
if (config.memory.v2.enabled) {
|
|
515
|
+
return;
|
|
516
|
+
}
|
|
513
517
|
await graphExtractJob(job, config);
|
|
514
518
|
return;
|
|
515
519
|
case "conversation_analyze":
|
|
@@ -623,8 +627,8 @@ export const GRAPH_MAINTENANCE_CHECKPOINTS = {
|
|
|
623
627
|
* Enqueue periodic graph maintenance jobs.
|
|
624
628
|
*
|
|
625
629
|
* Mutually exclusive between v1 and v2:
|
|
626
|
-
* - v2 active (
|
|
627
|
-
*
|
|
630
|
+
* - v2 active (`memory.v2.enabled` on) → only `memory_v2_consolidate` is
|
|
631
|
+
* scheduled.
|
|
628
632
|
* - v2 inactive → the four v1 entries (decay, consolidate, pattern_scan,
|
|
629
633
|
* narrative) are scheduled instead.
|
|
630
634
|
*
|
|
@@ -643,7 +647,7 @@ export function maybeEnqueueGraphMaintenanceJobs(
|
|
|
643
647
|
config: AssistantConfig,
|
|
644
648
|
nowMs = Date.now(),
|
|
645
649
|
): void {
|
|
646
|
-
const v2Active =
|
|
650
|
+
const v2Active = config.memory.v2.enabled;
|
|
647
651
|
|
|
648
652
|
const schedule: Array<{
|
|
649
653
|
key: string;
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import { beforeEach, describe, expect, mock, test } from "bun:test";
|
|
2
2
|
|
|
3
3
|
import { makeMockLogger } from "../../__tests__/helpers/mock-logger.js";
|
|
4
|
-
import { _setOverridesForTesting } from "../../config/assistant-feature-flags.js";
|
|
5
4
|
|
|
6
|
-
// This test exercises the v1 PKB search path.
|
|
7
|
-
// (
|
|
8
|
-
//
|
|
9
|
-
|
|
5
|
+
// This test exercises the v1 PKB search path. `config.memory.v2.enabled`
|
|
6
|
+
// (default `true`) makes pkb-search short-circuit to keep traffic off the
|
|
7
|
+
// legacy collection — force it off so the v1 path stays under test.
|
|
8
|
+
mock.module("../../config/loader.js", () => ({
|
|
9
|
+
getConfig: () => ({ memory: { v2: { enabled: false } } }),
|
|
10
|
+
}));
|
|
10
11
|
|
|
11
12
|
mock.module("../../util/logger.js", () => ({
|
|
12
13
|
getLogger: () => makeMockLogger(),
|
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
|
|
5
5
|
import { getConfig } from "../../config/loader.js";
|
|
6
6
|
import { getLogger } from "../../util/logger.js";
|
|
7
|
-
import { isMemoryV2ReadActive } from "../context-search/sources/memory-v2.js";
|
|
8
7
|
import {
|
|
9
8
|
isQdrantBreakerOpen,
|
|
10
9
|
withQdrantBreaker,
|
|
@@ -42,10 +41,10 @@ export async function searchPkbFiles(
|
|
|
42
41
|
limit: number,
|
|
43
42
|
scopeIds?: string[],
|
|
44
43
|
): Promise<PkbSearchResult[]> {
|
|
45
|
-
// v2 owns the read path when
|
|
46
|
-
//
|
|
47
|
-
//
|
|
48
|
-
if (
|
|
44
|
+
// v2 owns the read path when enabled; v2 absorbs PKB as a read source,
|
|
45
|
+
// so PKB hint search short-circuits to keep traffic off the v1 collection
|
|
46
|
+
// (avoiding OOM-crash risk from a corrupted sparse segment).
|
|
47
|
+
if (getConfig().memory.v2.enabled) return [];
|
|
49
48
|
|
|
50
49
|
if (isQdrantBreakerOpen()) {
|
|
51
50
|
log.warn("Qdrant circuit breaker open, skipping PKB search");
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { inArray } from "drizzle-orm";
|
|
2
2
|
|
|
3
3
|
import { getConfig } from "../../config/loader.js";
|
|
4
|
-
import { isMemoryV2ReadActive } from "../context-search/sources/memory-v2.js";
|
|
5
4
|
import { getDb } from "../db-connection.js";
|
|
6
5
|
import { withQdrantBreaker } from "../qdrant-circuit-breaker.js";
|
|
7
6
|
import type {
|
|
@@ -56,10 +55,10 @@ export async function semanticSearch(
|
|
|
56
55
|
): Promise<Candidate[]> {
|
|
57
56
|
if (limit <= 0) return [];
|
|
58
57
|
|
|
59
|
-
// v2 owns the read path when
|
|
60
|
-
//
|
|
61
|
-
//
|
|
62
|
-
if (
|
|
58
|
+
// v2 owns the read path when enabled; the v1 `memory` collection is in
|
|
59
|
+
// active retirement, and routing semantic recall there would re-enter the
|
|
60
|
+
// same corrupted sparse segments that can OOM-crash Qdrant.
|
|
61
|
+
if (getConfig().memory.v2.enabled) return [];
|
|
63
62
|
|
|
64
63
|
const qdrant = getQdrantClient();
|
|
65
64
|
|
|
@@ -114,7 +114,10 @@ class MockQdrantClient {
|
|
|
114
114
|
limit: params.limit,
|
|
115
115
|
filter: params.filter,
|
|
116
116
|
});
|
|
117
|
-
|
|
117
|
+
// The four-channel hybrid query fires body-dense, body-sparse,
|
|
118
|
+
// summary-dense, summary-sparse in order; both dense channels share
|
|
119
|
+
// the dense queue and both sparse channels share the sparse queue.
|
|
120
|
+
const channel = params.using.endsWith("sparse") ? "sparse" : "dense";
|
|
118
121
|
return state.queryResponses[channel].shift() ?? { points: [] };
|
|
119
122
|
}
|
|
120
123
|
}
|
|
@@ -223,9 +226,20 @@ function makeConfig(
|
|
|
223
226
|
} as unknown as AssistantConfig;
|
|
224
227
|
}
|
|
225
228
|
|
|
226
|
-
/**
|
|
229
|
+
/**
|
|
230
|
+
* Stage a single hybrid-query response — body channels first, then summary
|
|
231
|
+
* channels (which default to empty). The four-channel hybrid query fires
|
|
232
|
+
* body-dense, body-sparse, summary-dense, summary-sparse in that order, so
|
|
233
|
+
* each logical call consumes 2 dense + 2 sparse queue entries.
|
|
234
|
+
*/
|
|
227
235
|
function stageHybridResponse(
|
|
228
|
-
hits: Array<{
|
|
236
|
+
hits: Array<{
|
|
237
|
+
slug: string;
|
|
238
|
+
denseScore?: number;
|
|
239
|
+
sparseScore?: number;
|
|
240
|
+
summaryDenseScore?: number;
|
|
241
|
+
summarySparseScore?: number;
|
|
242
|
+
}>,
|
|
229
243
|
): void {
|
|
230
244
|
state.queryResponses.dense.push({
|
|
231
245
|
points: hits
|
|
@@ -237,6 +251,22 @@ function stageHybridResponse(
|
|
|
237
251
|
.filter((h) => h.sparseScore !== undefined)
|
|
238
252
|
.map((h) => ({ score: h.sparseScore, payload: { slug: h.slug } })),
|
|
239
253
|
});
|
|
254
|
+
state.queryResponses.dense.push({
|
|
255
|
+
points: hits
|
|
256
|
+
.filter((h) => h.summaryDenseScore !== undefined)
|
|
257
|
+
.map((h) => ({
|
|
258
|
+
score: h.summaryDenseScore,
|
|
259
|
+
payload: { slug: h.slug },
|
|
260
|
+
})),
|
|
261
|
+
});
|
|
262
|
+
state.queryResponses.sparse.push({
|
|
263
|
+
points: hits
|
|
264
|
+
.filter((h) => h.summarySparseScore !== undefined)
|
|
265
|
+
.map((h) => ({
|
|
266
|
+
score: h.summarySparseScore,
|
|
267
|
+
payload: { slug: h.slug },
|
|
268
|
+
})),
|
|
269
|
+
});
|
|
240
270
|
}
|
|
241
271
|
|
|
242
272
|
beforeEach(resetState);
|
|
@@ -369,7 +399,7 @@ describe("selectCandidates", () => {
|
|
|
369
399
|
nowText: "",
|
|
370
400
|
config: makeConfig(),
|
|
371
401
|
});
|
|
372
|
-
expect(state.queryCalls).toHaveLength(
|
|
402
|
+
expect(state.queryCalls).toHaveLength(4);
|
|
373
403
|
for (const call of state.queryCalls) {
|
|
374
404
|
expect(call.limit).toBe(1_000_000);
|
|
375
405
|
expect(call.filter).toBeUndefined();
|
|
@@ -385,7 +415,7 @@ describe("selectCandidates", () => {
|
|
|
385
415
|
nowText: "",
|
|
386
416
|
config: makeConfig({ ann_candidate_limit: 25 }),
|
|
387
417
|
});
|
|
388
|
-
expect(state.queryCalls).toHaveLength(
|
|
418
|
+
expect(state.queryCalls).toHaveLength(4);
|
|
389
419
|
for (const call of state.queryCalls) {
|
|
390
420
|
expect(call.limit).toBe(25);
|
|
391
421
|
expect(call.filter).toBeUndefined();
|