@vellumai/assistant 0.5.7 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +2 -1
- package/docker-entrypoint.sh +9 -0
- package/docs/architecture/memory.md +13 -11
- package/node_modules/@vellumai/ces-contracts/src/error.ts +1 -1
- package/node_modules/@vellumai/ces-contracts/src/grants.ts +1 -1
- package/node_modules/@vellumai/ces-contracts/src/handles.ts +1 -1
- package/node_modules/@vellumai/ces-contracts/src/index.ts +1 -1
- package/node_modules/@vellumai/ces-contracts/src/rpc.ts +1 -1
- package/package.json +1 -1
- package/src/__tests__/approval-cascade.test.ts +0 -1
- package/src/__tests__/browser-fill-credential.test.ts +1 -1
- package/src/__tests__/call-controller.test.ts +0 -1
- package/src/__tests__/ces-rpc-credential-backend.test.ts +3 -3
- package/src/__tests__/ces-startup-timeout.test.ts +40 -0
- package/src/__tests__/config-schema-cmd.test.ts +0 -1
- package/src/__tests__/config-schema.test.ts +2 -0
- package/src/__tests__/conversation-abort-tool-results.test.ts +0 -1
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +0 -2
- package/src/__tests__/conversation-agent-loop.test.ts +2 -4
- package/src/__tests__/conversation-confirmation-signals.test.ts +0 -1
- package/src/__tests__/conversation-error.test.ts +15 -1
- package/src/__tests__/conversation-messaging-secret-redirect.test.ts +1 -1
- package/src/__tests__/conversation-pre-run-repair.test.ts +0 -1
- package/src/__tests__/conversation-provider-retry-repair.test.ts +0 -1
- package/src/__tests__/conversation-queue.test.ts +0 -1
- package/src/__tests__/conversation-slash-queue.test.ts +0 -1
- package/src/__tests__/conversation-slash-unknown.test.ts +0 -1
- package/src/__tests__/conversation-workspace-injection.test.ts +0 -1
- package/src/__tests__/conversation-workspace-tool-tracking.test.ts +0 -1
- package/src/__tests__/credential-execution-client.test.ts +5 -2
- package/src/__tests__/credential-execution-feature-gates.test.ts +31 -16
- package/src/__tests__/credential-execution-managed-contract.test.ts +2 -2
- package/src/__tests__/credential-security-e2e.test.ts +1 -1
- package/src/__tests__/credential-security-invariants.test.ts +2 -5
- package/src/__tests__/credentials-cli.test.ts +4 -3
- package/src/__tests__/daemon-credential-client.test.ts +123 -0
- package/src/__tests__/deterministic-verification-control-plane.test.ts +1 -0
- package/src/__tests__/gateway-client-managed-outbound.test.ts +79 -1
- package/src/__tests__/journal-context.test.ts +335 -0
- package/src/__tests__/memory-context-benchmark.benchmark.test.ts +0 -3
- package/src/__tests__/memory-lifecycle-e2e.test.ts +70 -25
- package/src/__tests__/memory-recall-quality.test.ts +48 -17
- package/src/__tests__/memory-regressions.test.ts +408 -363
- package/src/__tests__/memory-retrieval.benchmark.test.ts +0 -3
- package/src/__tests__/non-member-access-request.test.ts +2 -2
- package/src/__tests__/notification-decision-strategy.test.ts +71 -0
- package/src/__tests__/oauth-cli.test.ts +5 -1
- package/src/__tests__/provider-commit-message-generator.test.ts +0 -37
- package/src/__tests__/provider-error-scenarios.test.ts +0 -267
- package/src/__tests__/provider-streaming.benchmark.test.ts +2 -81
- package/src/__tests__/relay-server.test.ts +1 -2
- package/src/__tests__/script-proxy-injection-runtime.test.ts +1 -1
- package/src/__tests__/secret-onetime-send.test.ts +1 -1
- package/src/__tests__/secure-keys.test.ts +18 -15
- package/src/__tests__/skill-memory.test.ts +17 -3
- package/src/__tests__/stale-approval-dedup.test.ts +171 -0
- package/src/__tests__/stt-hints.test.ts +437 -0
- package/src/__tests__/task-memory-cleanup.test.ts +14 -0
- package/src/__tests__/twilio-routes-twiml.test.ts +139 -1
- package/src/__tests__/voice-quality.test.ts +58 -0
- package/src/__tests__/voice-scoped-grant-consumer.test.ts +0 -1
- package/src/__tests__/workspace-migration-016-migrate-credentials-from-keychain.test.ts +5 -3
- package/src/acp/agent-process.ts +9 -1
- package/src/agent/loop.ts +1 -1
- package/src/approvals/guardian-request-resolvers.ts +164 -38
- package/src/calls/__tests__/tts-text-sanitizer.test.ts +254 -0
- package/src/calls/call-controller.ts +9 -5
- package/src/calls/fish-audio-client.ts +26 -14
- package/src/calls/stt-hints.ts +189 -0
- package/src/calls/tts-text-sanitizer.ts +61 -0
- package/src/calls/twilio-routes.ts +32 -4
- package/src/calls/voice-quality.ts +15 -3
- package/src/calls/voice-session-bridge.ts +1 -0
- package/src/cli/commands/avatar.ts +2 -2
- package/src/cli/commands/credentials.ts +110 -94
- package/src/cli/commands/doctor.ts +2 -2
- package/src/cli/commands/keys.ts +7 -7
- package/src/cli/commands/memory.ts +1 -1
- package/src/cli/commands/oauth/connections.ts +11 -29
- package/src/cli/commands/oauth/platform.ts +389 -43
- package/src/cli/lib/daemon-credential-client.ts +284 -0
- package/src/cli.ts +1 -1
- package/src/config/bundled-skills/AGENTS.md +34 -0
- package/src/config/bundled-skills/acp/SKILL.md +10 -0
- package/src/config/bundled-skills/app-builder/SKILL.md +0 -4
- package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +2 -2
- package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +1 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +1 -0
- package/src/config/bundled-skills/settings/SKILL.md +15 -2
- package/src/config/bundled-skills/settings/TOOLS.json +46 -1
- package/src/config/bundled-skills/settings/tools/avatar-remove.ts +59 -0
- package/src/config/bundled-skills/settings/tools/avatar-update.ts +80 -0
- package/src/config/bundled-skills/slack/SKILL.md +1 -1
- package/src/config/bundled-tool-registry.ts +4 -0
- package/src/config/defaults.ts +0 -2
- package/src/config/env-registry.ts +4 -4
- package/src/config/env.ts +14 -1
- package/src/config/feature-flag-registry.json +1 -1
- package/src/config/loader.ts +8 -11
- package/src/config/schema.ts +5 -16
- package/src/config/schemas/calls.ts +17 -0
- package/src/config/schemas/inference.ts +2 -2
- package/src/config/schemas/journal.ts +16 -0
- package/src/config/schemas/memory-processing.ts +2 -2
- package/src/config/types.ts +1 -0
- package/src/contacts/contact-store.ts +2 -2
- package/src/credential-execution/executable-discovery.ts +1 -1
- package/src/credential-execution/startup-timeout.ts +36 -0
- package/src/daemon/approval-generators.ts +3 -9
- package/src/daemon/conversation-error.ts +13 -1
- package/src/daemon/conversation-memory.ts +1 -2
- package/src/daemon/conversation-process.ts +18 -1
- package/src/daemon/conversation-surfaces.ts +30 -1
- package/src/daemon/conversation.ts +20 -9
- package/src/daemon/guardian-action-generators.ts +3 -9
- package/src/daemon/lifecycle.ts +18 -11
- package/src/daemon/message-types/conversations.ts +1 -0
- package/src/daemon/server.ts +2 -3
- package/src/memory/app-store.ts +31 -0
- package/src/memory/db-init.ts +4 -0
- package/src/memory/indexer.ts +19 -10
- package/src/memory/items-extractor.ts +315 -322
- package/src/memory/job-handlers/summarization.ts +26 -16
- package/src/memory/jobs-store.ts +33 -1
- package/src/memory/journal-memory.ts +214 -0
- package/src/memory/migrations/193-add-source-type-columns.ts +81 -0
- package/src/memory/migrations/index.ts +1 -0
- package/src/memory/migrations/registry.ts +8 -0
- package/src/memory/retriever.test.ts +37 -25
- package/src/memory/retriever.ts +24 -49
- package/src/memory/schema/memory-core.ts +2 -0
- package/src/memory/search/formatting.ts +7 -44
- package/src/memory/search/staleness.ts +4 -0
- package/src/memory/search/tier-classifier.ts +10 -2
- package/src/memory/search/types.ts +2 -5
- package/src/memory/task-memory-cleanup.ts +4 -3
- package/src/notifications/adapters/slack.ts +168 -6
- package/src/notifications/broadcaster.ts +1 -0
- package/src/notifications/copy-composer.ts +59 -2
- package/src/notifications/signal.ts +2 -0
- package/src/notifications/types.ts +2 -0
- package/src/prompts/journal-context.ts +133 -0
- package/src/prompts/persona-resolver.ts +80 -24
- package/src/prompts/system-prompt.ts +8 -0
- package/src/prompts/templates/SOUL.md +10 -0
- package/src/providers/provider-send-message.ts +3 -32
- package/src/providers/registry.ts +2 -139
- package/src/providers/types.ts +1 -1
- package/src/runtime/access-request-helper.ts +4 -0
- package/src/runtime/auth/__tests__/guard-tests.test.ts +9 -50
- package/src/runtime/auth/route-policy.ts +2 -0
- package/src/runtime/gateway-client.ts +47 -4
- package/src/runtime/guardian-decision-types.ts +45 -4
- package/src/runtime/http-server.ts +5 -2
- package/src/runtime/routes/access-request-decision.ts +2 -2
- package/src/runtime/routes/app-management-routes.ts +2 -1
- package/src/runtime/routes/approval-strategies/guardian-callback-strategy.ts +219 -30
- package/src/runtime/routes/approval-strategies/guardian-text-engine-strategy.ts +37 -14
- package/src/runtime/routes/channel-readiness-routes.ts +9 -4
- package/src/runtime/routes/debug-routes.ts +12 -9
- package/src/runtime/routes/guardian-approval-interception.ts +168 -11
- package/src/runtime/routes/guardian-approval-prompt.ts +6 -1
- package/src/runtime/routes/guardian-approval-reply-helpers.ts +103 -21
- package/src/runtime/routes/identity-routes.ts +1 -1
- package/src/runtime/routes/inbound-message-handler.ts +31 -1
- package/src/runtime/routes/inbound-stages/acl-enforcement.ts +64 -5
- package/src/runtime/routes/inbound-stages/background-dispatch.ts +52 -40
- package/src/runtime/routes/integrations/twilio.ts +52 -10
- package/src/runtime/routes/memory-item-routes.test.ts +3 -3
- package/src/runtime/routes/memory-item-routes.ts +25 -11
- package/src/runtime/routes/secret-routes.ts +141 -10
- package/src/runtime/routes/tts-routes.ts +11 -1
- package/src/security/ces-credential-client.ts +18 -9
- package/src/security/ces-rpc-credential-backend.ts +4 -3
- package/src/security/credential-backend.ts +10 -4
- package/src/security/secure-keys.ts +21 -4
- package/src/skills/catalog-install.ts +4 -36
- package/src/skills/skill-memory.ts +1 -0
- package/src/subagent/manager.ts +2 -5
- package/src/tools/acp/spawn.ts +78 -1
- package/src/tools/credentials/vault.ts +5 -3
- package/src/tools/memory/definitions.ts +3 -2
- package/src/tools/memory/handlers.ts +10 -7
- package/src/tools/terminal/safe-env.ts +1 -0
- package/src/util/browser.ts +15 -0
- package/src/util/platform.ts +1 -1
- package/src/workspace/migrations/016-migrate-credentials-from-keychain.ts +4 -4
- package/src/workspace/migrations/017-seed-persona-dirs.ts +2 -1
- package/src/workspace/migrations/018-rekey-compound-credential-keys.ts +184 -0
- package/src/workspace/migrations/019-scope-journal-to-guardian.ts +103 -0
- package/src/workspace/migrations/migrate-to-workspace-volume.ts +4 -4
- package/src/workspace/migrations/registry.ts +4 -0
- package/src/workspace/provider-commit-message-generator.ts +12 -21
- package/src/__tests__/provider-fail-open-selection.test.ts +0 -271
- package/src/__tests__/provider-failover-actual-provider.test.ts +0 -66
- package/src/memory/search/lexical.ts +0 -48
- package/src/providers/failover.ts +0 -186
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Memory lifecycle E2E regression test.
|
|
3
3
|
*
|
|
4
4
|
* Verifies the new memory pipeline end-to-end:
|
|
5
|
-
* -
|
|
5
|
+
* - Standard-kind enum items (identity, preference, project, decision, constraint, event, journal, capability, ...)
|
|
6
6
|
* - Supersession chains (supersedes/supersededBy fields)
|
|
7
7
|
* - Hybrid search retrieval
|
|
8
8
|
* - Two-layer XML injection format (<memory_context> with sections)
|
|
@@ -44,10 +44,31 @@ mock.module("../util/logger.js", () => ({
|
|
|
44
44
|
}),
|
|
45
45
|
}));
|
|
46
46
|
|
|
47
|
+
// Stub the local embedding backend so the real ONNX model never loads
|
|
48
|
+
mock.module("../memory/embedding-local.js", () => ({
|
|
49
|
+
LocalEmbeddingBackend: class {
|
|
50
|
+
readonly provider = "local" as const;
|
|
51
|
+
readonly model: string;
|
|
52
|
+
constructor(model: string) {
|
|
53
|
+
this.model = model;
|
|
54
|
+
}
|
|
55
|
+
async embed(texts: string[]): Promise<number[][]> {
|
|
56
|
+
return texts.map(() => new Array(384).fill(0));
|
|
57
|
+
}
|
|
58
|
+
},
|
|
59
|
+
}));
|
|
60
|
+
|
|
61
|
+
// Dynamic Qdrant mock: tests can push results to be returned by searchWithFilter/hybridSearch
|
|
62
|
+
let mockQdrantResults: Array<{
|
|
63
|
+
id: string;
|
|
64
|
+
score: number;
|
|
65
|
+
payload: Record<string, unknown>;
|
|
66
|
+
}> = [];
|
|
67
|
+
|
|
47
68
|
mock.module("../memory/qdrant-client.js", () => ({
|
|
48
69
|
getQdrantClient: () => ({
|
|
49
|
-
searchWithFilter: async () =>
|
|
50
|
-
hybridSearch: async () =>
|
|
70
|
+
searchWithFilter: async () => mockQdrantResults,
|
|
71
|
+
hybridSearch: async () => mockQdrantResults,
|
|
51
72
|
upsertPoints: async () => {},
|
|
52
73
|
deletePoints: async () => {},
|
|
53
74
|
}),
|
|
@@ -60,7 +81,6 @@ const TEST_CONFIG = {
|
|
|
60
81
|
...DEFAULT_CONFIG.memory,
|
|
61
82
|
embeddings: {
|
|
62
83
|
...DEFAULT_CONFIG.memory.embeddings,
|
|
63
|
-
provider: "openai" as const,
|
|
64
84
|
required: false,
|
|
65
85
|
},
|
|
66
86
|
extraction: {
|
|
@@ -115,6 +135,7 @@ describe("Memory lifecycle E2E regression", () => {
|
|
|
115
135
|
db.run("DELETE FROM conversations");
|
|
116
136
|
db.run("DELETE FROM memory_jobs");
|
|
117
137
|
db.run("DELETE FROM memory_checkpoints");
|
|
138
|
+
mockQdrantResults = [];
|
|
118
139
|
resetCleanupScheduleThrottle();
|
|
119
140
|
resetStaleSweepThrottle();
|
|
120
141
|
});
|
|
@@ -128,7 +149,7 @@ describe("Memory lifecycle E2E regression", () => {
|
|
|
128
149
|
}
|
|
129
150
|
});
|
|
130
151
|
|
|
131
|
-
test("extraction produces items with
|
|
152
|
+
test("extraction produces items with standard-kind enum and supersession chains form correctly", async () => {
|
|
132
153
|
const db = getDb();
|
|
133
154
|
const now = 1_701_100_000_000;
|
|
134
155
|
const conversationId = "conv-memory-lifecycle";
|
|
@@ -165,7 +186,7 @@ describe("Memory lifecycle E2E regression", () => {
|
|
|
165
186
|
])
|
|
166
187
|
.run();
|
|
167
188
|
|
|
168
|
-
// Seed items using the
|
|
189
|
+
// Seed items using the standard-kind enum
|
|
169
190
|
const kinds = [
|
|
170
191
|
"identity",
|
|
171
192
|
"preference",
|
|
@@ -299,12 +320,8 @@ describe("Memory lifecycle E2E regression", () => {
|
|
|
299
320
|
TEST_CONFIG,
|
|
300
321
|
);
|
|
301
322
|
|
|
302
|
-
//
|
|
303
|
-
//
|
|
304
|
-
// (threshold > 0.6) because semantic=0 with Qdrant mocked empty.
|
|
305
|
-
// Verify recency search ran successfully.
|
|
306
|
-
expect(recall.recencyHits).toBeGreaterThan(0);
|
|
307
|
-
// Candidates exist but don't pass tier classification, so injectedText is empty
|
|
323
|
+
// Without semantic search (Qdrant mocked empty), no candidates pass
|
|
324
|
+
// tier classification (threshold > 0.6).
|
|
308
325
|
expect(recall.enabled).toBe(true);
|
|
309
326
|
});
|
|
310
327
|
|
|
@@ -343,15 +360,47 @@ describe("Memory lifecycle E2E regression", () => {
|
|
|
343
360
|
})
|
|
344
361
|
.run();
|
|
345
362
|
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
363
|
+
// Seed a memory item so the semantic search path can find it
|
|
364
|
+
db.insert(memoryItems)
|
|
365
|
+
.values({
|
|
366
|
+
id: "item-timezone-pref",
|
|
367
|
+
kind: "preference",
|
|
368
|
+
subject: "timezone preference",
|
|
369
|
+
statement: "My preferred timezone is America/Los_Angeles.",
|
|
370
|
+
status: "active",
|
|
371
|
+
confidence: 0.9,
|
|
372
|
+
importance: 0.8,
|
|
373
|
+
fingerprint: "fp-item-timezone-pref",
|
|
374
|
+
firstSeenAt: now + 10,
|
|
375
|
+
lastSeenAt: now + 10,
|
|
376
|
+
})
|
|
377
|
+
.run();
|
|
378
|
+
|
|
379
|
+
db.insert(memoryItemSources)
|
|
380
|
+
.values({
|
|
381
|
+
memoryItemId: "item-timezone-pref",
|
|
382
|
+
messageId: "msg-injection-seed",
|
|
383
|
+
evidence: "timezone preference evidence",
|
|
384
|
+
createdAt: now + 10,
|
|
385
|
+
})
|
|
386
|
+
.run();
|
|
387
|
+
|
|
388
|
+
// Mock Qdrant to return the timezone preference item
|
|
389
|
+
mockQdrantResults = [
|
|
390
|
+
{
|
|
391
|
+
id: "emb-timezone-pref",
|
|
392
|
+
score: 0.92,
|
|
393
|
+
payload: {
|
|
394
|
+
target_type: "item",
|
|
395
|
+
target_id: "item-timezone-pref",
|
|
396
|
+
text: "My preferred timezone is America/Los_Angeles.",
|
|
397
|
+
kind: "preference",
|
|
398
|
+
status: "active",
|
|
399
|
+
created_at: now + 10,
|
|
400
|
+
last_seen_at: now + 10,
|
|
401
|
+
},
|
|
402
|
+
},
|
|
403
|
+
];
|
|
355
404
|
|
|
356
405
|
const recall = await buildMemoryRecall(
|
|
357
406
|
"timezone",
|
|
@@ -359,10 +408,6 @@ describe("Memory lifecycle E2E regression", () => {
|
|
|
359
408
|
TEST_CONFIG,
|
|
360
409
|
);
|
|
361
410
|
|
|
362
|
-
// The recency-only promotion path (Step 6 in retriever) ensures the
|
|
363
|
-
// seeded segment reaches tier 2 and is injected even without semantic
|
|
364
|
-
// search. Verify structure of the two-layer XML format.
|
|
365
|
-
expect(recall.recencyHits).toBeGreaterThan(0);
|
|
366
411
|
expect(recall.enabled).toBe(true);
|
|
367
412
|
expect(recall.injectedText.length).toBeGreaterThan(0);
|
|
368
413
|
expect(recall.injectedTokens).toBeGreaterThan(0);
|
|
@@ -397,7 +397,6 @@ describe("Memory Recall Quality", () => {
|
|
|
397
397
|
TEST_CONFIG,
|
|
398
398
|
);
|
|
399
399
|
|
|
400
|
-
expect(recall.recencyHits).toBeGreaterThan(0);
|
|
401
400
|
expect(recall.enabled).toBe(true);
|
|
402
401
|
// With high-scoring Qdrant results, items should be injected
|
|
403
402
|
expect(recall.semanticHits).toBeGreaterThan(0);
|
|
@@ -502,7 +501,6 @@ describe("Memory Recall Quality", () => {
|
|
|
502
501
|
TEST_CONFIG,
|
|
503
502
|
);
|
|
504
503
|
|
|
505
|
-
expect(recall.recencyHits).toBeGreaterThan(0);
|
|
506
504
|
expect(recall.enabled).toBe(true);
|
|
507
505
|
// High-importance preference should be injected
|
|
508
506
|
expect(recall.injectedText).toContain("TypeScript");
|
|
@@ -566,7 +564,6 @@ describe("Memory Recall Quality", () => {
|
|
|
566
564
|
);
|
|
567
565
|
|
|
568
566
|
// Recency search finds the segment but tier classification filters it
|
|
569
|
-
expect(recall.recencyHits).toBeGreaterThan(0);
|
|
570
567
|
// Superseded items should not leak into injected text
|
|
571
568
|
expect(recall.injectedText).not.toContain("vim for editing code");
|
|
572
569
|
});
|
|
@@ -623,7 +620,6 @@ describe("Memory Recall Quality", () => {
|
|
|
623
620
|
|
|
624
621
|
// Recency search finds segments but tier classification filters them.
|
|
625
622
|
// Key assertion: superseded MySQL item should not leak.
|
|
626
|
-
expect(recall.recencyHits).toBeGreaterThan(0);
|
|
627
623
|
expect(recall.injectedText).not.toContain("MySQL");
|
|
628
624
|
});
|
|
629
625
|
|
|
@@ -671,13 +667,29 @@ describe("Memory Recall Quality", () => {
|
|
|
671
667
|
firstSeenAt: now - 50_000,
|
|
672
668
|
});
|
|
673
669
|
|
|
670
|
+
// Mock Qdrant to return the active item as a semantic search result
|
|
671
|
+
mockQdrantResults = [
|
|
672
|
+
{
|
|
673
|
+
id: "emb-framework-active",
|
|
674
|
+
score: 0.92,
|
|
675
|
+
payload: {
|
|
676
|
+
target_type: "item",
|
|
677
|
+
target_id: "item-framework-active",
|
|
678
|
+
text: "Framework preference is React for this codebase",
|
|
679
|
+
kind: "preference",
|
|
680
|
+
status: "active",
|
|
681
|
+
created_at: now,
|
|
682
|
+
last_seen_at: now,
|
|
683
|
+
},
|
|
684
|
+
},
|
|
685
|
+
];
|
|
686
|
+
|
|
674
687
|
const recall = await buildMemoryRecall(
|
|
675
688
|
"framework preference",
|
|
676
689
|
"conv-invalid-status",
|
|
677
690
|
TEST_CONFIG,
|
|
678
691
|
);
|
|
679
|
-
|
|
680
|
-
// Active segment content should be injected; invalidated item should not leak
|
|
692
|
+
// Active item should be injected via semantic search; invalidated item should not leak
|
|
681
693
|
expect(recall.injectedText).toContain("React");
|
|
682
694
|
expect(recall.injectedText).not.toContain("Angular");
|
|
683
695
|
});
|
|
@@ -765,7 +777,6 @@ describe("Memory Recall Quality", () => {
|
|
|
765
777
|
TEST_CONFIG,
|
|
766
778
|
);
|
|
767
779
|
|
|
768
|
-
expect(recall.recencyHits).toBeGreaterThan(0);
|
|
769
780
|
expect(recall.enabled).toBe(true);
|
|
770
781
|
// Recent Bun item should be injected, old Node reference should not
|
|
771
782
|
expect(recall.injectedText).toContain("Bun");
|
|
@@ -870,7 +881,6 @@ describe("Memory Recall Quality", () => {
|
|
|
870
881
|
TEST_CONFIG,
|
|
871
882
|
);
|
|
872
883
|
|
|
873
|
-
expect(recall.recencyHits).toBeGreaterThan(0);
|
|
874
884
|
expect(recall.enabled).toBe(true);
|
|
875
885
|
// Frequently accessed timezone item should be in injected text
|
|
876
886
|
expect(recall.injectedText).toContain("America/Los_Angeles");
|
|
@@ -940,7 +950,6 @@ describe("Memory Recall Quality", () => {
|
|
|
940
950
|
TEST_CONFIG,
|
|
941
951
|
);
|
|
942
952
|
|
|
943
|
-
expect(recall.recencyHits).toBeGreaterThan(0);
|
|
944
953
|
expect(recall.enabled).toBe(true);
|
|
945
954
|
// Deployment rule should be injected
|
|
946
955
|
expect(recall.injectedText).toContain("staging");
|
|
@@ -987,7 +996,7 @@ describe("Memory Recall Quality", () => {
|
|
|
987
996
|
);
|
|
988
997
|
});
|
|
989
998
|
|
|
990
|
-
test("precision@k guard verifies pipeline completes with seeded
|
|
999
|
+
test("precision@k guard verifies pipeline completes with seeded items", async () => {
|
|
991
1000
|
const db = getDb();
|
|
992
1001
|
const now = 1_700_000_700_000;
|
|
993
1002
|
insertConversation(db, "conv-pk", now, 3);
|
|
@@ -995,17 +1004,17 @@ describe("Memory Recall Quality", () => {
|
|
|
995
1004
|
const prefs = [
|
|
996
1005
|
{
|
|
997
1006
|
msg: "msg-pk-1",
|
|
998
|
-
|
|
1007
|
+
item: "item-pk-1",
|
|
999
1008
|
text: "I prefer dark mode over light mode",
|
|
1000
1009
|
},
|
|
1001
1010
|
{
|
|
1002
1011
|
msg: "msg-pk-2",
|
|
1003
|
-
|
|
1012
|
+
item: "item-pk-2",
|
|
1004
1013
|
text: "I like using TypeScript for all projects",
|
|
1005
1014
|
},
|
|
1006
1015
|
{
|
|
1007
1016
|
msg: "msg-pk-3",
|
|
1008
|
-
|
|
1017
|
+
item: "item-pk-3",
|
|
1009
1018
|
text: "I prefer tabs over spaces for indentation",
|
|
1010
1019
|
},
|
|
1011
1020
|
];
|
|
@@ -1014,18 +1023,40 @@ describe("Memory Recall Quality", () => {
|
|
|
1014
1023
|
const p = prefs[i]!;
|
|
1015
1024
|
const t = now + i * 1000;
|
|
1016
1025
|
insertMessage(db, p.msg, "conv-pk", "user", p.text, t);
|
|
1017
|
-
|
|
1026
|
+
insertItem(db, {
|
|
1027
|
+
id: p.item,
|
|
1028
|
+
kind: "preference",
|
|
1029
|
+
subject: `preference-${i}`,
|
|
1030
|
+
statement: p.text,
|
|
1031
|
+
importance: 0.8,
|
|
1032
|
+
firstSeenAt: t,
|
|
1033
|
+
});
|
|
1034
|
+
insertItemSource(db, p.item, p.msg, t);
|
|
1018
1035
|
}
|
|
1019
1036
|
|
|
1037
|
+
// Mock Qdrant to return all three preference items
|
|
1038
|
+
mockQdrantResults = prefs.map((p, i) => ({
|
|
1039
|
+
id: `emb-pk-${i}`,
|
|
1040
|
+
score: 0.9 - i * 0.05,
|
|
1041
|
+
payload: {
|
|
1042
|
+
target_type: "item",
|
|
1043
|
+
target_id: p.item,
|
|
1044
|
+
text: p.text,
|
|
1045
|
+
kind: "preference",
|
|
1046
|
+
status: "active",
|
|
1047
|
+
created_at: now + i * 1000,
|
|
1048
|
+
last_seen_at: now + i * 1000,
|
|
1049
|
+
},
|
|
1050
|
+
}));
|
|
1051
|
+
|
|
1020
1052
|
const recall = await buildMemoryRecall(
|
|
1021
1053
|
"what do I prefer",
|
|
1022
1054
|
"conv-pk",
|
|
1023
1055
|
TEST_CONFIG,
|
|
1024
1056
|
);
|
|
1025
1057
|
|
|
1026
|
-
//
|
|
1027
|
-
//
|
|
1028
|
-
expect(recall.recencyHits).toBeGreaterThan(0);
|
|
1058
|
+
// Semantic search returns all three preference items which pass
|
|
1059
|
+
// tier classification and are injected.
|
|
1029
1060
|
expect(recall.enabled).toBe(true);
|
|
1030
1061
|
assertPrecisionAtK(
|
|
1031
1062
|
recall.injectedText,
|