@vellumai/assistant 0.4.49 → 0.4.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +24 -33
- package/README.md +3 -3
- package/docs/architecture/memory.md +180 -119
- package/package.json +2 -2
- package/src/__tests__/agent-loop.test.ts +3 -1
- package/src/__tests__/anthropic-provider.test.ts +114 -23
- package/src/__tests__/approval-cascade.test.ts +1 -15
- package/src/__tests__/approval-routes-http.test.ts +2 -0
- package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
- package/src/__tests__/canonical-guardian-store.test.ts +95 -0
- package/src/__tests__/checker.test.ts +13 -0
- package/src/__tests__/config-schema.test.ts +1 -68
- package/src/__tests__/context-memory-e2e.test.ts +11 -100
- package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
- package/src/__tests__/credential-security-e2e.test.ts +1 -0
- package/src/__tests__/credential-vault-unit.test.ts +4 -0
- package/src/__tests__/credential-vault.test.ts +13 -1
- package/src/__tests__/cu-unified-flow.test.ts +532 -0
- package/src/__tests__/date-context.test.ts +93 -77
- package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
- package/src/__tests__/guardian-routing-invariants.test.ts +93 -0
- package/src/__tests__/history-repair.test.ts +245 -0
- package/src/__tests__/host-cu-proxy.test.ts +165 -3
- package/src/__tests__/http-user-message-parity.test.ts +1 -0
- package/src/__tests__/invite-redemption-service.test.ts +65 -1
- package/src/__tests__/keychain-broker-client.test.ts +4 -4
- package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
- package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
- package/src/__tests__/memory-recall-quality.test.ts +244 -407
- package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
- package/src/__tests__/memory-regressions.test.ts +477 -2841
- package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
- package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
- package/src/__tests__/mime-builder.test.ts +28 -0
- package/src/__tests__/native-web-search.test.ts +1 -0
- package/src/__tests__/oauth-cli.test.ts +572 -5
- package/src/__tests__/oauth-store.test.ts +120 -6
- package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
- package/src/__tests__/registry.test.ts +0 -1
- package/src/__tests__/relay-server.test.ts +46 -1
- package/src/__tests__/schedule-tools.test.ts +32 -0
- package/src/__tests__/script-proxy-certs.test.ts +1 -1
- package/src/__tests__/secret-onetime-send.test.ts +1 -0
- package/src/__tests__/secure-keys.test.ts +7 -2
- package/src/__tests__/send-endpoint-busy.test.ts +3 -0
- package/src/__tests__/session-abort-tool-results.test.ts +1 -14
- package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
- package/src/__tests__/session-agent-loop.test.ts +19 -15
- package/src/__tests__/session-confirmation-signals.test.ts +1 -15
- package/src/__tests__/session-error.test.ts +124 -2
- package/src/__tests__/session-history-web-search.test.ts +918 -0
- package/src/__tests__/session-pre-run-repair.test.ts +1 -14
- package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
- package/src/__tests__/session-queue.test.ts +37 -27
- package/src/__tests__/session-runtime-assembly.test.ts +54 -0
- package/src/__tests__/session-slash-known.test.ts +1 -15
- package/src/__tests__/session-slash-queue.test.ts +1 -15
- package/src/__tests__/session-slash-unknown.test.ts +1 -15
- package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
- package/src/__tests__/session-workspace-injection.test.ts +3 -37
- package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
- package/src/__tests__/skills-install-extract.test.ts +93 -0
- package/src/__tests__/skillssh-registry.test.ts +451 -0
- package/src/__tests__/trust-store.test.ts +15 -0
- package/src/__tests__/voice-invite-redemption.test.ts +32 -1
- package/src/agent/ax-tree-compaction.test.ts +51 -0
- package/src/agent/loop.ts +39 -12
- package/src/approvals/AGENTS.md +1 -1
- package/src/approvals/guardian-request-resolvers.ts +14 -2
- package/src/bundler/compiler-tools.ts +66 -2
- package/src/calls/call-domain.ts +132 -0
- package/src/calls/call-store.ts +6 -0
- package/src/calls/relay-server.ts +43 -5
- package/src/calls/relay-setup-router.ts +17 -1
- package/src/calls/twilio-config.ts +1 -1
- package/src/calls/types.ts +3 -1
- package/src/cli/commands/doctor.ts +4 -3
- package/src/cli/commands/mcp.ts +46 -59
- package/src/cli/commands/memory.ts +16 -165
- package/src/cli/commands/oauth/apps.ts +31 -2
- package/src/cli/commands/oauth/connections.ts +431 -97
- package/src/cli/commands/oauth/providers.ts +15 -1
- package/src/cli/commands/sessions.ts +5 -2
- package/src/cli/commands/skills.ts +173 -1
- package/src/cli/http-client.ts +0 -20
- package/src/cli/main-screen.tsx +2 -2
- package/src/cli/program.ts +5 -6
- package/src/cli.ts +4 -10
- package/src/config/bundled-skills/computer-use/TOOLS.json +1 -1
- package/src/config/bundled-skills/computer-use/tools/computer-use-observe.ts +12 -0
- package/src/config/bundled-tool-registry.ts +2 -5
- package/src/config/schema.ts +1 -12
- package/src/config/schemas/memory-lifecycle.ts +0 -9
- package/src/config/schemas/memory-processing.ts +0 -180
- package/src/config/schemas/memory-retrieval.ts +32 -104
- package/src/config/schemas/memory.ts +0 -10
- package/src/config/types.ts +0 -4
- package/src/context/window-manager.ts +4 -1
- package/src/daemon/config-watcher.ts +61 -3
- package/src/daemon/daemon-control.ts +1 -1
- package/src/daemon/date-context.ts +114 -31
- package/src/daemon/handlers/sessions.ts +18 -13
- package/src/daemon/handlers/skills.ts +20 -1
- package/src/daemon/history-repair.ts +72 -8
- package/src/daemon/host-cu-proxy.ts +55 -26
- package/src/daemon/lifecycle.ts +31 -3
- package/src/daemon/mcp-reload-service.ts +2 -2
- package/src/daemon/message-types/computer-use.ts +1 -12
- package/src/daemon/message-types/memory.ts +4 -16
- package/src/daemon/message-types/messages.ts +1 -0
- package/src/daemon/message-types/sessions.ts +4 -0
- package/src/daemon/server.ts +12 -1
- package/src/daemon/session-agent-loop-handlers.ts +38 -0
- package/src/daemon/session-agent-loop.ts +334 -48
- package/src/daemon/session-error.ts +89 -6
- package/src/daemon/session-history.ts +17 -7
- package/src/daemon/session-media-retry.ts +6 -2
- package/src/daemon/session-memory.ts +69 -149
- package/src/daemon/session-process.ts +10 -1
- package/src/daemon/session-runtime-assembly.ts +49 -19
- package/src/daemon/session-surfaces.ts +4 -1
- package/src/daemon/session-tool-setup.ts +7 -1
- package/src/daemon/session.ts +12 -2
- package/src/instrument.ts +61 -1
- package/src/memory/admin.ts +2 -191
- package/src/memory/canonical-guardian-store.ts +38 -2
- package/src/memory/conversation-crud.ts +0 -33
- package/src/memory/conversation-queries.ts +22 -3
- package/src/memory/db-init.ts +28 -0
- package/src/memory/embedding-backend.ts +84 -8
- package/src/memory/embedding-types.ts +9 -1
- package/src/memory/indexer.ts +7 -46
- package/src/memory/items-extractor.ts +274 -76
- package/src/memory/job-handlers/backfill.ts +2 -127
- package/src/memory/job-handlers/cleanup.ts +2 -16
- package/src/memory/job-handlers/extraction.ts +2 -138
- package/src/memory/job-handlers/index-maintenance.ts +1 -6
- package/src/memory/job-handlers/summarization.ts +3 -148
- package/src/memory/job-utils.ts +21 -59
- package/src/memory/jobs-store.ts +1 -159
- package/src/memory/jobs-worker.ts +9 -52
- package/src/memory/migrations/104-core-indexes.ts +3 -3
- package/src/memory/migrations/149-oauth-tables.ts +2 -0
- package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
- package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
- package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
- package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
- package/src/memory/migrations/154-drop-fts.ts +20 -0
- package/src/memory/migrations/155-drop-conflicts.ts +7 -0
- package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
- package/src/memory/migrations/index.ts +7 -0
- package/src/memory/qdrant-client.ts +148 -51
- package/src/memory/raw-query.ts +1 -1
- package/src/memory/retriever.test.ts +294 -273
- package/src/memory/retriever.ts +421 -645
- package/src/memory/schema/calls.ts +2 -0
- package/src/memory/schema/memory-core.ts +3 -48
- package/src/memory/schema/oauth.ts +2 -0
- package/src/memory/search/formatting.ts +263 -176
- package/src/memory/search/lexical.ts +1 -254
- package/src/memory/search/ranking.ts +0 -455
- package/src/memory/search/semantic.ts +100 -14
- package/src/memory/search/staleness.ts +47 -0
- package/src/memory/search/tier-classifier.ts +21 -0
- package/src/memory/search/types.ts +15 -77
- package/src/memory/task-memory-cleanup.ts +4 -6
- package/src/messaging/providers/gmail/mime-builder.ts +17 -7
- package/src/oauth/byo-connection.test.ts +8 -1
- package/src/oauth/oauth-store.ts +113 -27
- package/src/oauth/seed-providers.ts +6 -0
- package/src/oauth/token-persistence.ts +11 -3
- package/src/permissions/defaults.ts +1 -0
- package/src/permissions/trust-store.ts +23 -1
- package/src/playbooks/playbook-compiler.ts +1 -1
- package/src/prompts/system-prompt.ts +18 -2
- package/src/providers/anthropic/client.ts +56 -126
- package/src/providers/types.ts +7 -1
- package/src/runtime/AGENTS.md +9 -0
- package/src/runtime/auth/route-policy.ts +6 -3
- package/src/runtime/guardian-reply-router.ts +24 -22
- package/src/runtime/http-server.ts +2 -2
- package/src/runtime/invite-redemption-service.ts +19 -1
- package/src/runtime/invite-service.ts +25 -0
- package/src/runtime/pending-interactions.ts +2 -2
- package/src/runtime/routes/brain-graph-routes.ts +10 -90
- package/src/runtime/routes/conversation-routes.ts +9 -1
- package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
- package/src/runtime/routes/memory-item-routes.test.ts +754 -0
- package/src/runtime/routes/memory-item-routes.ts +503 -0
- package/src/runtime/routes/session-management-routes.ts +3 -3
- package/src/runtime/routes/settings-routes.ts +2 -2
- package/src/runtime/routes/trust-rules-routes.ts +14 -0
- package/src/runtime/routes/workspace-routes.ts +2 -1
- package/src/security/keychain-broker-client.ts +17 -4
- package/src/security/secure-keys.ts +25 -3
- package/src/security/token-manager.ts +36 -36
- package/src/skills/catalog-install.ts +74 -18
- package/src/skills/skillssh-registry.ts +503 -0
- package/src/tools/assets/search.ts +5 -1
- package/src/tools/computer-use/definitions.ts +0 -10
- package/src/tools/computer-use/registry.ts +1 -1
- package/src/tools/credentials/vault.ts +1 -3
- package/src/tools/memory/definitions.ts +4 -13
- package/src/tools/memory/handlers.test.ts +83 -103
- package/src/tools/memory/handlers.ts +50 -85
- package/src/tools/schedule/create.ts +8 -1
- package/src/tools/schedule/update.ts +8 -1
- package/src/tools/skills/load.ts +25 -2
- package/src/__tests__/clarification-resolver.test.ts +0 -193
- package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
- package/src/__tests__/conflict-policy.test.ts +0 -269
- package/src/__tests__/conflict-store.test.ts +0 -372
- package/src/__tests__/contradiction-checker.test.ts +0 -361
- package/src/__tests__/entity-extractor.test.ts +0 -211
- package/src/__tests__/entity-search.test.ts +0 -1117
- package/src/__tests__/profile-compiler.test.ts +0 -392
- package/src/__tests__/session-conflict-gate.test.ts +0 -1228
- package/src/__tests__/session-profile-injection.test.ts +0 -557
- package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
- package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
- package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
- package/src/daemon/session-conflict-gate.ts +0 -167
- package/src/daemon/session-dynamic-profile.ts +0 -77
- package/src/memory/clarification-resolver.ts +0 -417
- package/src/memory/conflict-intent.ts +0 -205
- package/src/memory/conflict-policy.ts +0 -127
- package/src/memory/conflict-store.ts +0 -410
- package/src/memory/contradiction-checker.ts +0 -508
- package/src/memory/entity-extractor.ts +0 -535
- package/src/memory/format-recall.ts +0 -47
- package/src/memory/fts-reconciler.ts +0 -165
- package/src/memory/job-handlers/conflict.ts +0 -200
- package/src/memory/profile-compiler.ts +0 -195
- package/src/memory/recall-cache.ts +0 -117
- package/src/memory/search/entity.ts +0 -535
- package/src/memory/search/query-expansion.test.ts +0 -70
- package/src/memory/search/query-expansion.ts +0 -118
- package/src/runtime/routes/mcp-routes.ts +0 -20
|
@@ -4,6 +4,13 @@
|
|
|
4
4
|
* Measures end-to-end memory recall time with varying database sizes.
|
|
5
5
|
* Validates latency stays within acceptable bounds and token budget
|
|
6
6
|
* enforcement works correctly.
|
|
7
|
+
*
|
|
8
|
+
* The new pipeline uses hybrid search (Qdrant) + recency search.
|
|
9
|
+
* With Qdrant mocked and semanticSearch returning empty, only recency
|
|
10
|
+
* search provides candidates. These recency-only candidates have
|
|
11
|
+
* low finalScore (< 0.6) and are filtered out by tier classification,
|
|
12
|
+
* so injectedText is empty. The tests verify pipeline completion,
|
|
13
|
+
* latency bounds, and correct handling of recency hits.
|
|
7
14
|
*/
|
|
8
15
|
import { mkdtempSync, rmSync } from "node:fs";
|
|
9
16
|
import { tmpdir } from "node:os";
|
|
@@ -38,8 +45,7 @@ mock.module("../util/logger.js", () => ({
|
|
|
38
45
|
}),
|
|
39
46
|
}));
|
|
40
47
|
|
|
41
|
-
// Counter for semantic search invocations
|
|
42
|
-
// skips the call entirely rather than relying on flaky wall-clock comparisons.
|
|
48
|
+
// Counter for semantic search invocations
|
|
43
49
|
let semanticSearchCallCount = 0;
|
|
44
50
|
|
|
45
51
|
mock.module("../memory/search/semantic.js", () => ({
|
|
@@ -63,6 +69,8 @@ mock.module("../memory/embedding-backend.js", () => ({
|
|
|
63
69
|
model: "mock-embedding",
|
|
64
70
|
vectors: [new Array(1536).fill(0)],
|
|
65
71
|
}),
|
|
72
|
+
generateSparseEmbedding: () => ({ indices: [], values: [] }),
|
|
73
|
+
logMemoryEmbeddingWarning: () => {},
|
|
66
74
|
}));
|
|
67
75
|
|
|
68
76
|
import { DEFAULT_CONFIG } from "../config/defaults.js";
|
|
@@ -135,13 +143,7 @@ function makeConfig(overrides?: { maxInjectTokens?: number }): AssistantConfig {
|
|
|
135
143
|
},
|
|
136
144
|
retrieval: {
|
|
137
145
|
...DEFAULT_CONFIG.memory.retrieval,
|
|
138
|
-
lexicalTopK: 50,
|
|
139
|
-
semanticTopK: 20,
|
|
140
146
|
maxInjectTokens: overrides?.maxInjectTokens ?? 750,
|
|
141
|
-
reranking: {
|
|
142
|
-
...DEFAULT_CONFIG.memory.retrieval.reranking,
|
|
143
|
-
enabled: false,
|
|
144
|
-
},
|
|
145
147
|
dynamicBudget: {
|
|
146
148
|
enabled: false,
|
|
147
149
|
minInjectTokens: 160,
|
|
@@ -161,13 +163,9 @@ describe("Memory retrieval benchmark", () => {
|
|
|
161
163
|
beforeEach(() => {
|
|
162
164
|
const db = getDb();
|
|
163
165
|
db.run("DELETE FROM memory_item_sources");
|
|
164
|
-
db.run("DELETE FROM memory_item_entities");
|
|
165
|
-
db.run("DELETE FROM memory_entity_relations");
|
|
166
|
-
db.run("DELETE FROM memory_entities");
|
|
167
166
|
db.run("DELETE FROM memory_embeddings");
|
|
168
|
-
db.run("DELETE FROM memory_summaries");
|
|
169
167
|
db.run("DELETE FROM memory_items");
|
|
170
|
-
|
|
168
|
+
|
|
171
169
|
db.run("DELETE FROM memory_segments");
|
|
172
170
|
db.run("DELETE FROM messages");
|
|
173
171
|
db.run("DELETE FROM conversations");
|
|
@@ -198,8 +196,8 @@ describe("Memory retrieval benchmark", () => {
|
|
|
198
196
|
|
|
199
197
|
expect(recall.enabled).toBe(true);
|
|
200
198
|
expect(recall.degraded).toBe(false);
|
|
201
|
-
|
|
202
|
-
expect(recall.
|
|
199
|
+
// Recency search finds conversation-scoped segments
|
|
200
|
+
expect(recall.recencyHits).toBeGreaterThan(0);
|
|
203
201
|
// Relaxed threshold — guards against severe regressions, not precise benchmarking
|
|
204
202
|
expect(recall.latencyMs).toBeLessThan(500);
|
|
205
203
|
});
|
|
@@ -218,8 +216,7 @@ describe("Memory retrieval benchmark", () => {
|
|
|
218
216
|
|
|
219
217
|
expect(recall.enabled).toBe(true);
|
|
220
218
|
expect(recall.degraded).toBe(false);
|
|
221
|
-
expect(recall.
|
|
222
|
-
expect(recall.selectedCount).toBeGreaterThan(0);
|
|
219
|
+
expect(recall.recencyHits).toBeGreaterThan(0);
|
|
223
220
|
expect(recall.latencyMs).toBeLessThan(1000);
|
|
224
221
|
});
|
|
225
222
|
|
|
@@ -237,8 +234,7 @@ describe("Memory retrieval benchmark", () => {
|
|
|
237
234
|
|
|
238
235
|
expect(recall.enabled).toBe(true);
|
|
239
236
|
expect(recall.degraded).toBe(false);
|
|
240
|
-
expect(recall.
|
|
241
|
-
expect(recall.selectedCount).toBeGreaterThan(0);
|
|
237
|
+
expect(recall.recencyHits).toBeGreaterThan(0);
|
|
242
238
|
expect(recall.latencyMs).toBeLessThan(2000);
|
|
243
239
|
});
|
|
244
240
|
|
|
@@ -256,10 +252,11 @@ describe("Memory retrieval benchmark", () => {
|
|
|
256
252
|
);
|
|
257
253
|
|
|
258
254
|
expect(recall.enabled).toBe(true);
|
|
255
|
+
// With Qdrant mocked empty and recency-only candidates below tier threshold,
|
|
256
|
+
// injectedTokens is 0. Verify the budget cap is still respected.
|
|
259
257
|
expect(recall.injectedTokens).toBeLessThanOrEqual(smallBudget);
|
|
260
|
-
expect(recall.injectedTokens).toBeGreaterThan(0);
|
|
261
258
|
|
|
262
|
-
// Compare against a larger budget
|
|
259
|
+
// Compare against a larger budget
|
|
263
260
|
const largeBudget = 2000;
|
|
264
261
|
const largeConfig = makeConfig({ maxInjectTokens: largeBudget });
|
|
265
262
|
const largeRecall = await buildMemoryRecall(
|
|
@@ -275,137 +272,20 @@ describe("Memory retrieval benchmark", () => {
|
|
|
275
272
|
);
|
|
276
273
|
});
|
|
277
274
|
|
|
278
|
-
test("
|
|
279
|
-
const conversationId = "conv-bench-
|
|
280
|
-
const now = 1_700_500_000_000;
|
|
281
|
-
// Seed enough items that early termination can trigger
|
|
282
|
-
seedMemoryItems(conversationId, 500, now);
|
|
283
|
-
|
|
284
|
-
// Config with early termination enabled and low thresholds to trigger it
|
|
285
|
-
const etConfig: AssistantConfig = {
|
|
286
|
-
...DEFAULT_CONFIG,
|
|
287
|
-
memory: {
|
|
288
|
-
...DEFAULT_CONFIG.memory,
|
|
289
|
-
embeddings: {
|
|
290
|
-
...DEFAULT_CONFIG.memory.embeddings,
|
|
291
|
-
provider: "local" as const,
|
|
292
|
-
required: false,
|
|
293
|
-
},
|
|
294
|
-
retrieval: {
|
|
295
|
-
...DEFAULT_CONFIG.memory.retrieval,
|
|
296
|
-
lexicalTopK: 50,
|
|
297
|
-
semanticTopK: 20,
|
|
298
|
-
maxInjectTokens: 750,
|
|
299
|
-
reranking: {
|
|
300
|
-
...DEFAULT_CONFIG.memory.retrieval.reranking,
|
|
301
|
-
enabled: false,
|
|
302
|
-
},
|
|
303
|
-
dynamicBudget: {
|
|
304
|
-
enabled: false,
|
|
305
|
-
minInjectTokens: 160,
|
|
306
|
-
maxInjectTokens: 750,
|
|
307
|
-
targetHeadroomTokens: 900,
|
|
308
|
-
},
|
|
309
|
-
earlyTermination: {
|
|
310
|
-
enabled: true,
|
|
311
|
-
minCandidates: 5,
|
|
312
|
-
minHighConfidence: 3,
|
|
313
|
-
confidenceThreshold: 0.3,
|
|
314
|
-
},
|
|
315
|
-
},
|
|
316
|
-
},
|
|
317
|
-
};
|
|
318
|
-
|
|
319
|
-
const recall = await buildMemoryRecall(
|
|
320
|
-
"What do we know about topic-5 and keyword-3?",
|
|
321
|
-
conversationId,
|
|
322
|
-
etConfig,
|
|
323
|
-
);
|
|
324
|
-
|
|
325
|
-
expect(recall.enabled).toBe(true);
|
|
326
|
-
expect(recall.earlyTerminated).toBe(true);
|
|
327
|
-
// Semantic search should be skipped when early termination fires
|
|
328
|
-
expect(recall.semanticHits).toBe(0);
|
|
329
|
-
expect(recall.selectedCount).toBeGreaterThan(0);
|
|
330
|
-
});
|
|
331
|
-
|
|
332
|
-
test("early termination skips semantic search entirely", async () => {
|
|
333
|
-
const conversationId = "conv-bench-et-skip";
|
|
275
|
+
test("semantic search is invoked when not early terminated", async () => {
|
|
276
|
+
const conversationId = "conv-bench-semantic";
|
|
334
277
|
const now = 1_700_500_000_000;
|
|
335
|
-
seedMemoryItems(conversationId,
|
|
278
|
+
seedMemoryItems(conversationId, 100, now);
|
|
336
279
|
|
|
337
280
|
const query = "What do we know about topic-5 and keyword-3?";
|
|
338
281
|
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
memory: {
|
|
342
|
-
...DEFAULT_CONFIG.memory,
|
|
343
|
-
embeddings: {
|
|
344
|
-
...DEFAULT_CONFIG.memory.embeddings,
|
|
345
|
-
provider: "local" as const,
|
|
346
|
-
required: false,
|
|
347
|
-
},
|
|
348
|
-
retrieval: {
|
|
349
|
-
...DEFAULT_CONFIG.memory.retrieval,
|
|
350
|
-
lexicalTopK: 50,
|
|
351
|
-
semanticTopK: 20,
|
|
352
|
-
maxInjectTokens: 750,
|
|
353
|
-
reranking: {
|
|
354
|
-
...DEFAULT_CONFIG.memory.retrieval.reranking,
|
|
355
|
-
enabled: false,
|
|
356
|
-
},
|
|
357
|
-
dynamicBudget: {
|
|
358
|
-
enabled: false,
|
|
359
|
-
minInjectTokens: 160,
|
|
360
|
-
maxInjectTokens: 750,
|
|
361
|
-
targetHeadroomTokens: 900,
|
|
362
|
-
},
|
|
363
|
-
earlyTermination: {
|
|
364
|
-
enabled: true,
|
|
365
|
-
minCandidates: 5,
|
|
366
|
-
minHighConfidence: 3,
|
|
367
|
-
confidenceThreshold: 0.3,
|
|
368
|
-
},
|
|
369
|
-
},
|
|
370
|
-
},
|
|
371
|
-
};
|
|
372
|
-
|
|
373
|
-
const noEtConfig: AssistantConfig = {
|
|
374
|
-
...etConfig,
|
|
375
|
-
memory: {
|
|
376
|
-
...etConfig.memory,
|
|
377
|
-
retrieval: {
|
|
378
|
-
...etConfig.memory.retrieval,
|
|
379
|
-
earlyTermination: {
|
|
380
|
-
enabled: false,
|
|
381
|
-
minCandidates: 5,
|
|
382
|
-
minHighConfidence: 3,
|
|
383
|
-
confidenceThreshold: 0.3,
|
|
384
|
-
},
|
|
385
|
-
},
|
|
386
|
-
},
|
|
387
|
-
};
|
|
388
|
-
|
|
389
|
-
// Run with ET enabled — semantic search should be skipped
|
|
390
|
-
semanticSearchCallCount = 0;
|
|
391
|
-
const etRecall = await buildMemoryRecall(query, conversationId, etConfig);
|
|
392
|
-
const etCalls = semanticSearchCallCount;
|
|
393
|
-
|
|
394
|
-
expect(etRecall.earlyTerminated).toBe(true);
|
|
395
|
-
expect(etRecall.semanticHits).toBe(0);
|
|
396
|
-
expect(etCalls).toBe(0);
|
|
397
|
-
|
|
398
|
-
// Run without ET — semantic search should be invoked
|
|
282
|
+
// earlyTermination is always false in the new pipeline, so semantic
|
|
283
|
+
// search should always be invoked when a query vector is available.
|
|
399
284
|
semanticSearchCallCount = 0;
|
|
400
|
-
const
|
|
401
|
-
|
|
402
|
-
conversationId,
|
|
403
|
-
noEtConfig,
|
|
404
|
-
);
|
|
405
|
-
const baselineCalls = semanticSearchCallCount;
|
|
285
|
+
const config = makeConfig();
|
|
286
|
+
await buildMemoryRecall(query, conversationId, config);
|
|
406
287
|
|
|
407
|
-
expect(
|
|
408
|
-
expect(baselineCalls).toBeGreaterThan(0);
|
|
288
|
+
expect(semanticSearchCallCount).toBeGreaterThan(0);
|
|
409
289
|
});
|
|
410
290
|
|
|
411
291
|
test("recall.latencyMs tracks wall-clock within 50% tolerance", async () => {
|
|
@@ -427,14 +307,17 @@ describe("Memory retrieval benchmark", () => {
|
|
|
427
307
|
const wallMs = Date.now() - wallStart;
|
|
428
308
|
|
|
429
309
|
expect(recall.enabled).toBe(true);
|
|
430
|
-
|
|
310
|
+
// latencyMs may be 0 when the pipeline runs very fast (< 1ms granularity)
|
|
311
|
+
expect(recall.latencyMs).toBeGreaterThanOrEqual(0);
|
|
431
312
|
|
|
432
313
|
// Self-reported latencyMs should agree with wall-clock within 50%.
|
|
433
314
|
// Tolerance is wide because both sides use Date.now() (integer ms),
|
|
434
315
|
// so on fast runs the quantization error can be large relative to
|
|
435
316
|
// total elapsed time.
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
317
|
+
if (wallMs > 0) {
|
|
318
|
+
const ratio = recall.latencyMs / wallMs;
|
|
319
|
+
expect(ratio).toBeGreaterThanOrEqual(0.5);
|
|
320
|
+
expect(ratio).toBeLessThanOrEqual(1.5);
|
|
321
|
+
}
|
|
439
322
|
});
|
|
440
323
|
});
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Atomicity tests for memory UPSERT paths.
|
|
3
3
|
*
|
|
4
|
-
* SQLite is single-writer, and indexMessageNow
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
* sequentially.
|
|
4
|
+
* SQLite is single-writer, and indexMessageNow is a synchronous function.
|
|
5
|
+
* Because every call runs to completion before the next microtask starts, the
|
|
6
|
+
* Promise.all / Promise.resolve().then() pattern used here does NOT create
|
|
7
|
+
* true concurrent execution — calls still run sequentially.
|
|
9
8
|
*
|
|
10
9
|
* What these tests DO verify is the correctness of the ON CONFLICT /
|
|
11
10
|
* IMMEDIATE-transaction logic when the same logical operation is repeated many
|
|
@@ -47,6 +46,7 @@ mock.module("../util/logger.js", () => ({
|
|
|
47
46
|
mock.module("../memory/qdrant-client.js", () => ({
|
|
48
47
|
getQdrantClient: () => ({
|
|
49
48
|
searchWithFilter: async () => [],
|
|
49
|
+
hybridSearch: async () => [],
|
|
50
50
|
upsertPoints: async () => {},
|
|
51
51
|
deletePoints: async () => {},
|
|
52
52
|
}),
|
|
@@ -75,10 +75,6 @@ mock.module("../config/loader.js", () => ({
|
|
|
75
75
|
invalidateConfigCache: () => {},
|
|
76
76
|
}));
|
|
77
77
|
|
|
78
|
-
import {
|
|
79
|
-
createOrUpdatePendingConflict,
|
|
80
|
-
listPendingConflicts,
|
|
81
|
-
} from "../memory/conflict-store.js";
|
|
82
78
|
import { getDb, initializeDb, resetDb } from "../memory/db.js";
|
|
83
79
|
import { indexMessageNow } from "../memory/indexer.js";
|
|
84
80
|
import {
|
|
@@ -102,15 +98,9 @@ afterAll(() => {
|
|
|
102
98
|
|
|
103
99
|
function resetTables() {
|
|
104
100
|
const db = getDb();
|
|
105
|
-
db.run("DELETE FROM memory_item_conflicts");
|
|
106
|
-
db.run("DELETE FROM memory_item_entities");
|
|
107
|
-
db.run("DELETE FROM memory_entity_relations");
|
|
108
|
-
db.run("DELETE FROM memory_entities");
|
|
109
101
|
db.run("DELETE FROM memory_item_sources");
|
|
110
102
|
db.run("DELETE FROM memory_embeddings");
|
|
111
|
-
db.run("DELETE FROM memory_summaries");
|
|
112
103
|
db.run("DELETE FROM memory_items");
|
|
113
|
-
db.run("DELETE FROM memory_segment_fts");
|
|
114
104
|
db.run("DELETE FROM memory_segments");
|
|
115
105
|
db.run("DELETE FROM memory_jobs");
|
|
116
106
|
db.run("DELETE FROM messages");
|
|
@@ -151,50 +141,6 @@ function seedConversationAndMessage(
|
|
|
151
141
|
.run();
|
|
152
142
|
}
|
|
153
143
|
|
|
154
|
-
/** Insert a pair of memory items that can serve as conflict participants. */
|
|
155
|
-
function seedItemPair(
|
|
156
|
-
suffix: string,
|
|
157
|
-
scopeId = "default",
|
|
158
|
-
): { existingItemId: string; candidateItemId: string } {
|
|
159
|
-
const db = getDb();
|
|
160
|
-
const now = Date.now();
|
|
161
|
-
const existingItemId = `existing-${suffix}`;
|
|
162
|
-
const candidateItemId = `candidate-${suffix}`;
|
|
163
|
-
db.insert(memoryItems)
|
|
164
|
-
.values([
|
|
165
|
-
{
|
|
166
|
-
id: existingItemId,
|
|
167
|
-
kind: "preference",
|
|
168
|
-
subject: "framework preference",
|
|
169
|
-
statement: `Existing statement ${suffix}`,
|
|
170
|
-
status: "active",
|
|
171
|
-
confidence: 0.8,
|
|
172
|
-
importance: 0.7,
|
|
173
|
-
fingerprint: `fp-existing-${suffix}`,
|
|
174
|
-
verificationState: "assistant_inferred",
|
|
175
|
-
scopeId,
|
|
176
|
-
firstSeenAt: now,
|
|
177
|
-
lastSeenAt: now,
|
|
178
|
-
},
|
|
179
|
-
{
|
|
180
|
-
id: candidateItemId,
|
|
181
|
-
kind: "preference",
|
|
182
|
-
subject: "framework preference",
|
|
183
|
-
statement: `Candidate statement ${suffix}`,
|
|
184
|
-
status: "pending_clarification",
|
|
185
|
-
confidence: 0.8,
|
|
186
|
-
importance: 0.7,
|
|
187
|
-
fingerprint: `fp-candidate-${suffix}`,
|
|
188
|
-
verificationState: "assistant_inferred",
|
|
189
|
-
scopeId,
|
|
190
|
-
firstSeenAt: now,
|
|
191
|
-
lastSeenAt: now,
|
|
192
|
-
},
|
|
193
|
-
])
|
|
194
|
-
.run();
|
|
195
|
-
return { existingItemId, candidateItemId };
|
|
196
|
-
}
|
|
197
|
-
|
|
198
144
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
199
145
|
// Test suite: segment UPSERT atomicity under parallel indexer load
|
|
200
146
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
@@ -484,191 +430,6 @@ describe("segment UPSERT atomicity under repeated indexer invocations", () => {
|
|
|
484
430
|
});
|
|
485
431
|
});
|
|
486
432
|
|
|
487
|
-
// ─────────────────────────────────────────────────────────────────────────────
|
|
488
|
-
// Test suite: conflict creation UPSERT atomicity
|
|
489
|
-
// ─────────────────────────────────────────────────────────────────────────────
|
|
490
|
-
|
|
491
|
-
describe("conflict creation UPSERT atomicity", () => {
|
|
492
|
-
beforeEach(() => {
|
|
493
|
-
resetTables();
|
|
494
|
-
});
|
|
495
|
-
|
|
496
|
-
test("repeated createOrUpdatePendingConflict calls for the same pair produce exactly one conflict row", async () => {
|
|
497
|
-
// Critical UPSERT path: the same conflict pair inserted multiple times
|
|
498
|
-
// (e.g. duplicate worker dispatches, retries). The IMMEDIATE transaction
|
|
499
|
-
// guard in createOrUpdatePendingConflict must ensure only one row exists.
|
|
500
|
-
const pair = seedItemPair("parallel-create");
|
|
501
|
-
|
|
502
|
-
// Call createOrUpdatePendingConflict N times for the same pair. Calls run
|
|
503
|
-
// sequentially (synchronous); the test verifies that repeated calls produce
|
|
504
|
-
// exactly one conflict row — the IMMEDIATE transaction deduplication path.
|
|
505
|
-
const WORKERS = 10;
|
|
506
|
-
const results = await Promise.all(
|
|
507
|
-
Array.from({ length: WORKERS }, (_, i) =>
|
|
508
|
-
Promise.resolve().then(() =>
|
|
509
|
-
createOrUpdatePendingConflict({
|
|
510
|
-
scopeId: "default",
|
|
511
|
-
existingItemId: pair.existingItemId,
|
|
512
|
-
candidateItemId: pair.candidateItemId,
|
|
513
|
-
relationship: "ambiguous_contradiction",
|
|
514
|
-
clarificationQuestion: `Worker ${i} discovered a contradiction`,
|
|
515
|
-
}),
|
|
516
|
-
),
|
|
517
|
-
),
|
|
518
|
-
);
|
|
519
|
-
|
|
520
|
-
// All callers must receive the same conflict ID — the deduplication path
|
|
521
|
-
// returns the existing row on the second and subsequent calls.
|
|
522
|
-
const firstId = results[0].id;
|
|
523
|
-
for (const result of results) {
|
|
524
|
-
expect(result.id).toBe(firstId);
|
|
525
|
-
}
|
|
526
|
-
|
|
527
|
-
// Exactly one pending conflict row in the DB.
|
|
528
|
-
const pending = listPendingConflicts("default");
|
|
529
|
-
expect(pending).toHaveLength(1);
|
|
530
|
-
expect(pending[0].id).toBe(firstId);
|
|
531
|
-
});
|
|
532
|
-
|
|
533
|
-
test("conflict creation for different pairs produces distinct rows without cross-contamination", async () => {
|
|
534
|
-
// Each unique item pair must get its own conflict row — deduplication must
|
|
535
|
-
// be scoped to the pair, not global. Also exercises the idempotent
|
|
536
|
-
// insert-then-update path within each pair.
|
|
537
|
-
const PAIR_COUNT = 6;
|
|
538
|
-
const pairs = Array.from({ length: PAIR_COUNT }, (_, i) =>
|
|
539
|
-
seedItemPair(`multi-pair-${i}`),
|
|
540
|
-
);
|
|
541
|
-
|
|
542
|
-
// For each pair, make two calls: one insert and one update. All calls run
|
|
543
|
-
// sequentially. The test verifies that each pair ends up with exactly one
|
|
544
|
-
// conflict row (no cross-pair contamination, idempotent update path works).
|
|
545
|
-
await Promise.all(
|
|
546
|
-
pairs.flatMap((pair) => [
|
|
547
|
-
// First call: insert with 'contradiction'.
|
|
548
|
-
Promise.resolve().then(() =>
|
|
549
|
-
createOrUpdatePendingConflict({
|
|
550
|
-
scopeId: "default",
|
|
551
|
-
existingItemId: pair.existingItemId,
|
|
552
|
-
candidateItemId: pair.candidateItemId,
|
|
553
|
-
relationship: "contradiction",
|
|
554
|
-
}),
|
|
555
|
-
),
|
|
556
|
-
// Second call: update to 'ambiguous_contradiction' — tests the idempotent update path.
|
|
557
|
-
Promise.resolve().then(() =>
|
|
558
|
-
createOrUpdatePendingConflict({
|
|
559
|
-
scopeId: "default",
|
|
560
|
-
existingItemId: pair.existingItemId,
|
|
561
|
-
candidateItemId: pair.candidateItemId,
|
|
562
|
-
relationship: "ambiguous_contradiction",
|
|
563
|
-
}),
|
|
564
|
-
),
|
|
565
|
-
]),
|
|
566
|
-
);
|
|
567
|
-
|
|
568
|
-
// Each pair must have produced exactly one pending conflict.
|
|
569
|
-
const pending = listPendingConflicts("default");
|
|
570
|
-
expect(pending).toHaveLength(PAIR_COUNT);
|
|
571
|
-
|
|
572
|
-
// All conflict IDs must be unique.
|
|
573
|
-
const ids = pending.map((c) => c.id);
|
|
574
|
-
expect(new Set(ids).size).toBe(PAIR_COUNT);
|
|
575
|
-
|
|
576
|
-
// Each returned conflict must reference the correct item pair.
|
|
577
|
-
for (let i = 0; i < PAIR_COUNT; i++) {
|
|
578
|
-
const pair = pairs[i];
|
|
579
|
-
const found = pending.find(
|
|
580
|
-
(c) =>
|
|
581
|
-
c.existingItemId === pair.existingItemId &&
|
|
582
|
-
c.candidateItemId === pair.candidateItemId,
|
|
583
|
-
);
|
|
584
|
-
expect(found).toBeDefined();
|
|
585
|
-
// The update call ran after the insert, so relationship is ambiguous_contradiction.
|
|
586
|
-
expect(found!.relationship).toBe("ambiguous_contradiction");
|
|
587
|
-
}
|
|
588
|
-
});
|
|
589
|
-
|
|
590
|
-
test("repeated updates to the same conflict row converge to a consistent state", async () => {
|
|
591
|
-
// Multiple update calls for the same conflict (e.g. repeated worker runs).
|
|
592
|
-
// All updates must succeed (last writer wins is acceptable) and the row
|
|
593
|
-
// must remain internally consistent.
|
|
594
|
-
const pair = seedItemPair("concurrent-update");
|
|
595
|
-
const first = createOrUpdatePendingConflict({
|
|
596
|
-
scopeId: "default",
|
|
597
|
-
existingItemId: pair.existingItemId,
|
|
598
|
-
candidateItemId: pair.candidateItemId,
|
|
599
|
-
relationship: "contradiction",
|
|
600
|
-
clarificationQuestion: "Initial question",
|
|
601
|
-
});
|
|
602
|
-
|
|
603
|
-
// Call createOrUpdatePendingConflict N times against the same existing row.
|
|
604
|
-
// Calls are sequential; the test verifies the row stays consistent (one row,
|
|
605
|
-
// valid status/relationship) after repeated updates — last writer wins.
|
|
606
|
-
const UPDATES = 8;
|
|
607
|
-
const results = await Promise.all(
|
|
608
|
-
Array.from({ length: UPDATES }, (_, i) =>
|
|
609
|
-
Promise.resolve().then(() =>
|
|
610
|
-
createOrUpdatePendingConflict({
|
|
611
|
-
scopeId: "default",
|
|
612
|
-
existingItemId: pair.existingItemId,
|
|
613
|
-
candidateItemId: pair.candidateItemId,
|
|
614
|
-
relationship: "ambiguous_contradiction",
|
|
615
|
-
clarificationQuestion: `Updated question from worker ${i}`,
|
|
616
|
-
}),
|
|
617
|
-
),
|
|
618
|
-
),
|
|
619
|
-
);
|
|
620
|
-
|
|
621
|
-
// All calls must return the same conflict ID.
|
|
622
|
-
for (const result of results) {
|
|
623
|
-
expect(result.id).toBe(first.id);
|
|
624
|
-
}
|
|
625
|
-
|
|
626
|
-
// Still exactly one row in the DB.
|
|
627
|
-
const pending = listPendingConflicts("default");
|
|
628
|
-
expect(pending).toHaveLength(1);
|
|
629
|
-
|
|
630
|
-
// The row must be consistent: valid status, valid relationship.
|
|
631
|
-
const conflict = pending[0];
|
|
632
|
-
expect(conflict.status).toBe("pending_clarification");
|
|
633
|
-
expect(conflict.relationship).toBe("ambiguous_contradiction");
|
|
634
|
-
});
|
|
635
|
-
|
|
636
|
-
test("scope isolation ensures conflicts in different scopes do not interfere", async () => {
|
|
637
|
-
// Conflicts created in different scopes must not cross-contaminate each
|
|
638
|
-
// other's conflict sets — scopeId must be part of the deduplication key.
|
|
639
|
-
const SCOPES = ["scope-alpha", "scope-beta", "scope-gamma"];
|
|
640
|
-
const scopePairs = SCOPES.map((scope) => ({
|
|
641
|
-
scope,
|
|
642
|
-
pair: seedItemPair(`scope-${scope}`, scope),
|
|
643
|
-
}));
|
|
644
|
-
|
|
645
|
-
// Make 3 calls per scope for all scopes. Calls run sequentially; the test
|
|
646
|
-
// verifies that each scope produces exactly one conflict row and that there
|
|
647
|
-
// is no cross-scope contamination from repeated same-scope calls.
|
|
648
|
-
await Promise.all(
|
|
649
|
-
scopePairs.flatMap(({ scope, pair }) =>
|
|
650
|
-
Array.from({ length: 3 }, () =>
|
|
651
|
-
Promise.resolve().then(() =>
|
|
652
|
-
createOrUpdatePendingConflict({
|
|
653
|
-
scopeId: scope,
|
|
654
|
-
existingItemId: pair.existingItemId,
|
|
655
|
-
candidateItemId: pair.candidateItemId,
|
|
656
|
-
relationship: "contradiction",
|
|
657
|
-
}),
|
|
658
|
-
),
|
|
659
|
-
),
|
|
660
|
-
),
|
|
661
|
-
);
|
|
662
|
-
|
|
663
|
-
for (const scope of SCOPES) {
|
|
664
|
-
const pending = listPendingConflicts(scope);
|
|
665
|
-
// Exactly one conflict per scope, no cross-scope leakage.
|
|
666
|
-
expect(pending).toHaveLength(1);
|
|
667
|
-
expect(pending[0].scopeId).toBe(scope);
|
|
668
|
-
}
|
|
669
|
-
});
|
|
670
|
-
});
|
|
671
|
-
|
|
672
433
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
673
434
|
// Test suite: memory segment job atomicity
|
|
674
435
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
@@ -81,4 +81,32 @@ describe("buildMultipartMime", () => {
|
|
|
81
81
|
expect(decoded).toContain('filename="b.png"');
|
|
82
82
|
expect(decoded).toContain("Content-Type: image/png");
|
|
83
83
|
});
|
|
84
|
+
|
|
85
|
+
test("sanitizes CRLF from header values to prevent header injection", () => {
|
|
86
|
+
const result = buildMultipartMime({
|
|
87
|
+
to: "victim@example.com\r\nBcc: attacker@example.com",
|
|
88
|
+
subject: "Fwd: Hello\r\nCc: attacker@example.com",
|
|
89
|
+
body: "Body",
|
|
90
|
+
cc: "team@example.com\nX-Injected: yes",
|
|
91
|
+
bcc: "audit@example.com\r\nX-Another: value",
|
|
92
|
+
inReplyTo: "<id@example.com>\nReferences: <evil@example.com>",
|
|
93
|
+
attachments: [],
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
const decoded = Buffer.from(
|
|
97
|
+
result.replace(/-/g, "+").replace(/_/g, "/"),
|
|
98
|
+
"base64",
|
|
99
|
+
).toString("utf-8");
|
|
100
|
+
|
|
101
|
+
expect(decoded).toContain("To: victim@example.com Bcc: attacker@example.com");
|
|
102
|
+
expect(decoded).toContain("Subject: Fwd: Hello Cc: attacker@example.com");
|
|
103
|
+
expect(decoded).toContain("Cc: team@example.com X-Injected: yes");
|
|
104
|
+
expect(decoded).toContain("Bcc: audit@example.com X-Another: value");
|
|
105
|
+
expect(decoded).toContain(
|
|
106
|
+
"In-Reply-To: <id@example.com> References: <evil@example.com>",
|
|
107
|
+
);
|
|
108
|
+
expect(decoded).not.toContain("\r\nBcc: attacker@example.com");
|
|
109
|
+
expect(decoded).not.toContain("\r\nCc: attacker@example.com");
|
|
110
|
+
});
|
|
111
|
+
|
|
84
112
|
});
|