vellum 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -2
- package/bun.lock +5 -2
- package/package.json +4 -2
- package/scripts/capture-x-graphql.ts +562 -0
- package/scripts/ipc/check-swift-decoder-drift.ts +2 -1
- package/scripts/test.sh +5 -0
- package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +133 -34
- package/src/__tests__/account-registry.test.ts +2 -1
- package/src/__tests__/agent-heartbeat-service.test.ts +250 -0
- package/src/__tests__/asset-materialize-tool.test.ts +16 -15
- package/src/__tests__/asset-search-tool.test.ts +23 -22
- package/src/__tests__/attachments-store.test.ts +56 -127
- package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +5 -4
- package/src/__tests__/browser-skill-endstate.test.ts +4 -3
- package/src/__tests__/call-bridge.test.ts +385 -0
- package/src/__tests__/call-constants.test.ts +40 -0
- package/src/__tests__/call-orchestrator.test.ts +130 -4
- package/src/__tests__/call-recovery.test.ts +518 -0
- package/src/__tests__/call-routes-http.test.ts +459 -0
- package/src/__tests__/call-state-machine.test.ts +143 -0
- package/src/__tests__/call-store.test.ts +216 -1
- package/src/__tests__/cli-discover.test.ts +1 -1
- package/src/__tests__/commit-message-enrichment-service.test.ts +148 -7
- package/src/__tests__/compaction.benchmark.test.ts +176 -0
- package/src/__tests__/computer-use-tools.test.ts +250 -0
- package/src/__tests__/config-schema.test.ts +299 -3
- package/src/__tests__/conflict-store.test.ts +2 -1
- package/src/__tests__/contacts-tools.test.ts +331 -0
- package/src/__tests__/conversation-store.test.ts +30 -32
- package/src/__tests__/credential-security-invariants.test.ts +4 -0
- package/src/__tests__/date-context.test.ts +373 -0
- package/src/__tests__/db-schedule-syntax-migration.test.ts +129 -0
- package/src/__tests__/fixtures/media-reuse-fixtures.ts +3 -3
- package/src/__tests__/followup-tools.test.ts +303 -0
- package/src/__tests__/handlers-twitter-config.test.ts +718 -0
- package/src/__tests__/intent-routing.test.ts +64 -57
- package/src/__tests__/ipc-roundtrip.benchmark.test.ts +237 -0
- package/src/__tests__/ipc-snapshot.test.ts +62 -28
- package/src/__tests__/llm-usage-store.test.ts +3 -8
- package/src/__tests__/media-generate-image.test.ts +1 -1
- package/src/__tests__/media-reuse-story.e2e.test.ts +7 -7
- package/src/__tests__/memory-retrieval.benchmark.test.ts +430 -0
- package/src/__tests__/parallel-tool.benchmark.test.ts +294 -0
- package/src/__tests__/playbook-tools.test.ts +342 -0
- package/src/__tests__/profile-compiler.test.ts +2 -1
- package/src/__tests__/provider-streaming.benchmark.test.ts +773 -0
- package/src/__tests__/recurrence-engine-rruleset.test.ts +78 -0
- package/src/__tests__/recurrence-engine.test.ts +69 -0
- package/src/__tests__/recurrence-types.test.ts +71 -0
- package/src/__tests__/registry.test.ts +5 -3
- package/src/__tests__/relay-server.test.ts +633 -0
- package/src/__tests__/reminder-store.test.ts +6 -3
- package/src/__tests__/reminder.test.ts +43 -77
- package/src/__tests__/run-orchestrator-assistant-events.test.ts +8 -4
- package/src/__tests__/run-orchestrator.test.ts +4 -4
- package/src/__tests__/runtime-attachment-metadata.test.ts +7 -6
- package/src/__tests__/runtime-runs-http.test.ts +4 -4
- package/src/__tests__/runtime-runs.test.ts +4 -4
- package/src/__tests__/schedule-store.test.ts +482 -0
- package/src/__tests__/schedule-tools.test.ts +700 -0
- package/src/__tests__/scheduler-recurrence.test.ts +329 -0
- package/src/__tests__/server-history-render.test.ts +14 -13
- package/src/__tests__/session-error.test.ts +28 -0
- package/src/__tests__/session-init.benchmark.test.ts +462 -0
- package/src/__tests__/session-queue.test.ts +71 -48
- package/src/__tests__/session-runtime-assembly.test.ts +161 -0
- package/src/__tests__/session-surfaces-task-progress.test.ts +104 -0
- package/src/__tests__/signup-e2e.test.ts +2 -1
- package/src/__tests__/skill-projection.benchmark.test.ts +328 -0
- package/src/__tests__/skill-script-runner.test.ts +159 -0
- package/src/__tests__/speaker-identification.test.ts +52 -0
- package/src/__tests__/subagent-manager-notify.test.ts +42 -10
- package/src/__tests__/subagent-tools.test.ts +141 -41
- package/src/__tests__/task-compiler.test.ts +2 -1
- package/src/__tests__/task-runner.test.ts +2 -1
- package/src/__tests__/task-scheduler.test.ts +2 -1
- package/src/__tests__/task-tools.test.ts +49 -56
- package/src/__tests__/tool-audit-listener.test.ts +1 -0
- package/src/__tests__/tool-domain-event-publisher.test.ts +2 -0
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +500 -0
- package/src/__tests__/tool-executor.test.ts +13 -17
- package/src/__tests__/turn-commit.test.ts +218 -3
- package/src/__tests__/twilio-provider.test.ts +143 -0
- package/src/__tests__/twilio-routes.test.ts +789 -0
- package/src/__tests__/twitter-auth-handler.test.ts +581 -0
- package/src/__tests__/view-image-tool.test.ts +217 -0
- package/src/__tests__/workspace-git-service.test.ts +186 -0
- package/src/__tests__/workspace-heartbeat-service.test.ts +13 -3
- package/src/agent-heartbeat/agent-heartbeat-service.ts +155 -0
- package/src/bundler/app-bundler.ts +12 -8
- package/src/calls/call-bridge.ts +95 -0
- package/src/calls/call-constants.ts +43 -5
- package/src/calls/call-domain.ts +276 -0
- package/src/calls/call-orchestrator.ts +43 -17
- package/src/calls/call-recovery.ts +207 -0
- package/src/calls/call-state-machine.ts +68 -0
- package/src/calls/call-store.ts +192 -5
- package/src/calls/relay-server.ts +41 -4
- package/src/calls/speaker-identification.ts +213 -0
- package/src/calls/twilio-provider.ts +10 -6
- package/src/calls/twilio-routes.ts +90 -76
- package/src/calls/types.ts +1 -1
- package/src/cli/config-commands.ts +334 -0
- package/src/cli/core-commands.ts +776 -0
- package/src/cli/doordash.ts +251 -1
- package/src/cli/ipc-client.ts +82 -0
- package/src/cli/map.ts +246 -0
- package/src/cli/twitter.ts +575 -0
- package/src/cli.ts +7 -5
- package/src/commands/__tests__/cc-command-registry.test.ts +319 -0
- package/src/commands/cc-command-registry.ts +209 -0
- package/src/config/bundled-skills/contacts/SKILL.md +39 -0
- package/src/config/bundled-skills/contacts/TOOLS.json +122 -0
- package/src/config/bundled-skills/contacts/tools/contact-merge.ts +9 -0
- package/src/config/bundled-skills/contacts/tools/contact-search.ts +9 -0
- package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +9 -0
- package/src/config/bundled-skills/document/SKILL.md +18 -0
- package/src/config/bundled-skills/document/TOOLS.json +53 -0
- package/src/config/bundled-skills/document/tools/document-create.ts +9 -0
- package/src/config/bundled-skills/document/tools/document-update.ts +9 -0
- package/src/config/bundled-skills/doordash/SKILL.md +82 -23
- package/src/config/bundled-skills/followups/SKILL.md +32 -0
- package/src/config/bundled-skills/followups/TOOLS.json +100 -0
- package/src/config/bundled-skills/followups/tools/followup-create.ts +9 -0
- package/src/config/bundled-skills/followups/tools/followup-list.ts +9 -0
- package/src/config/bundled-skills/followups/tools/followup-resolve.ts +9 -0
- package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +1 -23
- package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +2 -1
- package/src/config/bundled-skills/playbooks/SKILL.md +31 -0
- package/src/config/bundled-skills/playbooks/TOOLS.json +126 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +9 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +9 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +9 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +9 -0
- package/src/config/bundled-skills/reminder/SKILL.md +20 -0
- package/src/config/bundled-skills/reminder/TOOLS.json +67 -0
- package/src/config/bundled-skills/reminder/tools/reminder-cancel.ts +9 -0
- package/src/config/bundled-skills/reminder/tools/reminder-create.ts +9 -0
- package/src/config/bundled-skills/reminder/tools/reminder-list.ts +9 -0
- package/src/config/bundled-skills/schedule/SKILL.md +74 -0
- package/src/config/bundled-skills/schedule/TOOLS.json +135 -0
- package/src/config/bundled-skills/schedule/tools/schedule-create.ts +9 -0
- package/src/config/bundled-skills/schedule/tools/schedule-delete.ts +9 -0
- package/src/config/bundled-skills/schedule/tools/schedule-list.ts +9 -0
- package/src/config/bundled-skills/schedule/tools/schedule-update.ts +9 -0
- package/src/config/bundled-skills/subagent/SKILL.md +25 -0
- package/src/config/bundled-skills/subagent/TOOLS.json +107 -0
- package/src/config/bundled-skills/subagent/tools/subagent-abort.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-message.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-read.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-spawn.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-status.ts +9 -0
- package/src/config/bundled-skills/tasks/SKILL.md +28 -0
- package/src/config/bundled-skills/tasks/TOOLS.json +256 -0
- package/src/config/bundled-skills/tasks/tools/task-delete.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-add.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-remove.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-show.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-update.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-run.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-save.ts +9 -0
- package/src/config/bundled-skills/twitter/SKILL.md +134 -0
- package/src/config/bundled-skills/watcher/SKILL.md +27 -0
- package/src/config/bundled-skills/watcher/TOOLS.json +147 -0
- package/src/config/bundled-skills/watcher/tools/watcher-create.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-delete.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-digest.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-list.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-update.ts +9 -0
- package/src/config/defaults.ts +33 -0
- package/src/config/loader.ts +4 -1
- package/src/config/schema.ts +161 -1
- package/src/config/system-prompt.ts +61 -16
- package/src/config/templates/IDENTITY.md +7 -0
- package/src/config/types.ts +4 -0
- package/src/contacts/contact-store.ts +4 -4
- package/src/daemon/assistant-attachments.ts +10 -0
- package/src/daemon/classifier.ts +3 -1
- package/src/daemon/computer-use-session.ts +3 -1
- package/src/daemon/date-context.ts +136 -0
- package/src/daemon/handlers/apps.ts +16 -1
- package/src/daemon/handlers/browser.ts +54 -0
- package/src/daemon/handlers/computer-use.ts +7 -1
- package/src/daemon/handlers/config.ts +163 -5
- package/src/daemon/handlers/diagnostics.ts +5 -1
- package/src/daemon/handlers/documents.ts +18 -29
- package/src/daemon/handlers/home-base.ts +5 -1
- package/src/daemon/handlers/index.ts +40 -277
- package/src/daemon/handlers/misc.ts +9 -1
- package/src/daemon/handlers/publish.ts +6 -1
- package/src/daemon/handlers/sessions.ts +65 -12
- package/src/daemon/handlers/shared.ts +36 -1
- package/src/daemon/handlers/signing.ts +37 -0
- package/src/daemon/handlers/skills.ts +20 -6
- package/src/daemon/handlers/subagents.ts +8 -3
- package/src/daemon/handlers/twitter-auth.ts +169 -0
- package/src/daemon/handlers/work-items.ts +384 -68
- package/src/daemon/ipc-contract-inventory.json +28 -4
- package/src/daemon/ipc-contract.ts +133 -37
- package/src/daemon/ipc-protocol.ts +7 -2
- package/src/daemon/lifecycle.ts +21 -0
- package/src/daemon/main.ts +10 -4
- package/src/daemon/ride-shotgun-handler.ts +74 -10
- package/src/daemon/server.ts +143 -26
- package/src/daemon/session-agent-loop.ts +887 -0
- package/src/daemon/session-attachments.ts +28 -5
- package/src/daemon/session-error.ts +24 -3
- package/src/daemon/session-lifecycle.ts +147 -0
- package/src/daemon/session-media-retry.ts +147 -0
- package/src/daemon/session-messaging.ts +145 -0
- package/src/daemon/session-notifiers.ts +164 -0
- package/src/daemon/session-process.ts +2 -2
- package/src/daemon/session-queue-manager.ts +1 -0
- package/src/daemon/session-runtime-assembly.ts +52 -0
- package/src/daemon/session-skill-tools.ts +124 -5
- package/src/daemon/session-slash.ts +3 -0
- package/src/daemon/session-surfaces.ts +77 -2
- package/src/daemon/session-tool-setup.ts +216 -2
- package/src/daemon/session-usage.ts +0 -2
- package/src/daemon/session.ts +114 -1404
- package/src/daemon/video-thumbnail.ts +60 -0
- package/src/doordash/client.ts +121 -27
- package/src/doordash/queries.ts +1 -2
- package/src/export/formatter.ts +3 -1
- package/src/followups/followup-store.ts +4 -2
- package/src/followups/types.ts +6 -0
- package/src/hooks/templates.ts +1 -1
- package/src/index.ts +32 -1153
- package/src/memory/attachments-store.ts +28 -83
- package/src/memory/channel-delivery-store.ts +7 -21
- package/src/memory/clarification-resolver.ts +6 -5
- package/src/memory/contradiction-checker.ts +3 -2
- package/src/memory/conversation-key-store.ts +10 -29
- package/src/memory/conversation-store.ts +2 -1
- package/src/memory/db.ts +96 -2
- package/src/memory/entity-extractor.ts +6 -3
- package/src/memory/items-extractor.ts +5 -4
- package/src/memory/jobs-store.ts +3 -2
- package/src/memory/llm-usage-store.ts +1 -2
- package/src/memory/runs-store.ts +1 -2
- package/src/memory/schema.ts +23 -2
- package/src/messaging/style-analyzer.ts +3 -2
- package/src/messaging/thread-summarizer.ts +8 -12
- package/src/messaging/triage-engine.ts +4 -2
- package/src/providers/openrouter/client.ts +20 -0
- package/src/providers/registry.ts +8 -0
- package/src/runtime/http-server.ts +108 -20
- package/src/runtime/routes/attachment-routes.ts +2 -3
- package/src/runtime/routes/call-routes.ts +140 -0
- package/src/runtime/routes/channel-routes.ts +5 -10
- package/src/runtime/routes/conversation-routes.ts +5 -5
- package/src/runtime/routes/run-routes.ts +2 -2
- package/src/runtime/run-orchestrator.ts +9 -3
- package/src/schedule/recurrence-engine.ts +138 -0
- package/src/schedule/recurrence-types.ts +67 -0
- package/src/schedule/schedule-store.ts +102 -57
- package/src/schedule/scheduler.ts +9 -6
- package/src/security/oauth2.ts +29 -4
- package/src/security/secret-allowlist.ts +46 -0
- package/src/skills/clawhub.ts +1 -1
- package/src/subagent/manager.ts +40 -8
- package/src/swarm/backend-claude-code.ts +64 -9
- package/src/swarm/worker-prompts.ts +2 -1
- package/src/tasks/SPEC.md +34 -28
- package/src/tasks/ephemeral-permissions.ts +16 -7
- package/src/tasks/task-compiler.ts +5 -4
- package/src/tasks/task-runner.ts +10 -5
- package/src/tasks/task-scheduler.ts +1 -1
- package/src/tasks/tool-sanitizer.ts +36 -0
- package/src/tools/assets/search.ts +4 -4
- package/src/tools/browser/api-map.ts +220 -0
- package/src/tools/browser/auto-navigate.ts +270 -0
- package/src/tools/browser/browser-execution.ts +2 -1
- package/src/tools/browser/browser-manager.ts +2 -2
- package/src/tools/browser/network-recorder.ts +5 -4
- package/src/tools/browser/x-auto-navigate.ts +207 -0
- package/src/tools/calls/call-end.ts +17 -67
- package/src/tools/calls/call-start.ts +24 -85
- package/src/tools/calls/call-status.ts +35 -51
- package/src/tools/claude-code/claude-code.ts +77 -11
- package/src/tools/contacts/contact-merge.ts +46 -78
- package/src/tools/contacts/contact-search.ts +35 -79
- package/src/tools/contacts/contact-upsert.ts +35 -108
- package/src/tools/credentials/vault.ts +20 -4
- package/src/tools/document/document-tool.ts +71 -144
- package/src/tools/executor.ts +129 -10
- package/src/tools/followups/followup_create.ts +46 -88
- package/src/tools/followups/followup_list.ts +34 -74
- package/src/tools/followups/followup_resolve.ts +31 -66
- package/src/tools/host-terminal/cli-discover.ts +2 -1
- package/src/tools/host-terminal/host-shell.ts +10 -0
- package/src/tools/memory/handlers.ts +5 -4
- package/src/tools/network/__tests__/web-search.test.ts +427 -0
- package/src/tools/network/script-proxy/__tests__/logging.test.ts +248 -0
- package/src/tools/network/script-proxy/__tests__/policy.test.ts +234 -0
- package/src/tools/network/script-proxy/__tests__/router.test.ts +76 -0
- package/src/tools/network/web-fetch.ts +18 -6
- package/src/tools/playbooks/index.ts +4 -5
- package/src/tools/playbooks/playbook-create.ts +3 -47
- package/src/tools/playbooks/playbook-delete.ts +1 -25
- package/src/tools/playbooks/playbook-list.ts +1 -28
- package/src/tools/playbooks/playbook-update.ts +3 -51
- package/src/tools/reminder/reminder.ts +5 -78
- package/src/tools/schedule/create.ts +69 -74
- package/src/tools/schedule/delete.ts +21 -47
- package/src/tools/schedule/list.ts +55 -74
- package/src/tools/schedule/update.ts +77 -84
- package/src/tools/subagent/abort.ts +29 -58
- package/src/tools/subagent/message.ts +30 -63
- package/src/tools/subagent/read.ts +53 -84
- package/src/tools/subagent/spawn.ts +43 -82
- package/src/tools/subagent/status.ts +42 -71
- package/src/tools/swarm/delegate.ts +2 -1
- package/src/tools/tasks/index.ts +8 -8
- package/src/tools/tasks/task-delete.ts +60 -88
- package/src/tools/tasks/task-list.ts +31 -52
- package/src/tools/tasks/task-run.ts +72 -108
- package/src/tools/tasks/task-save.ts +33 -65
- package/src/tools/tasks/work-item-enqueue.ts +183 -215
- package/src/tools/tasks/work-item-list.ts +33 -63
- package/src/tools/tasks/work-item-remove.ts +45 -97
- package/src/tools/tasks/work-item-update.ts +91 -163
- package/src/tools/terminal/backends/native.ts +3 -1
- package/src/tools/tool-manifest.ts +0 -62
- package/src/tools/types.ts +6 -0
- package/src/tools/ui-surface/definitions.ts +3 -1
- package/src/tools/watch/screen-watch.ts +3 -1
- package/src/tools/watcher/create.ts +52 -98
- package/src/tools/watcher/delete.ts +20 -46
- package/src/tools/watcher/digest.ts +36 -70
- package/src/tools/watcher/list.ts +49 -79
- package/src/tools/watcher/update.ts +45 -91
- package/src/twitter/client.ts +690 -0
- package/src/twitter/session.ts +91 -0
- package/src/usage/types.ts +0 -1
- package/src/util/truncate.ts +6 -0
- package/src/watcher/providers/slack.ts +2 -1
- package/src/watcher/watcher-store.ts +3 -2
- package/src/work-items/work-item-store.ts +27 -2
- package/src/workspace/commit-message-enrichment-service.ts +31 -7
- package/src/workspace/git-service.ts +87 -22
- package/src/workspace/provider-commit-message-generator.ts +242 -0
- package/src/workspace/turn-commit.ts +62 -3
- package/src/tools/contacts/index.ts +0 -4
- package/src/tools/document/index.ts +0 -5
- package/src/tools/followups/index.ts +0 -3
- package/src/tools/subagent/index.ts +0 -5
- /package/src/__tests__/{memory-context-benchmark.test.ts → memory-context-benchmark.benchmark.test.ts} +0 -0
|
@@ -232,7 +232,7 @@ describe('image-studio TOOLS.json manifest', () => {
|
|
|
232
232
|
const props = manifest.tools[0].input_schema.properties;
|
|
233
233
|
expect(props.mode.enum).toEqual(['generate', 'edit']);
|
|
234
234
|
expect(props.attachment_ids.type).toBe('array');
|
|
235
|
-
expect(props.model.enum).toEqual(['gemini-2.5-flash-image', 'gemini-3-pro-image']);
|
|
235
|
+
expect(props.model.enum).toEqual(['gemini-2.5-flash-image', 'gemini-3-pro-image', 'gemini-3-pro-image-preview']);
|
|
236
236
|
expect(props.variants.type).toBe('number');
|
|
237
237
|
});
|
|
238
238
|
});
|
|
@@ -96,7 +96,7 @@ mock.module('../tools/network/script-proxy/certs.js', () => ({
|
|
|
96
96
|
// Source imports (after mocks)
|
|
97
97
|
// ---------------------------------------------------------------------------
|
|
98
98
|
|
|
99
|
-
import { initializeDb, getDb } from '../memory/db.js';
|
|
99
|
+
import { initializeDb, getDb, resetDb } from '../memory/db.js';
|
|
100
100
|
import { uploadAttachment, linkAttachmentToMessage } from '../memory/attachments-store.js';
|
|
101
101
|
import { createConversation, addMessage } from '../memory/conversation-store.js';
|
|
102
102
|
import { assetSearchTool, searchAttachments } from '../tools/assets/search.js';
|
|
@@ -114,6 +114,7 @@ initializeDb();
|
|
|
114
114
|
mkdirSync(sandboxDir, { recursive: true });
|
|
115
115
|
|
|
116
116
|
afterAll(async () => {
|
|
117
|
+
resetDb();
|
|
117
118
|
await stopAllSessions();
|
|
118
119
|
resolveByIdResults = new Map();
|
|
119
120
|
secureKeyValues = new Map();
|
|
@@ -211,7 +212,7 @@ describe('Story E2E: selfie yesterday -> generated image today', () => {
|
|
|
211
212
|
|
|
212
213
|
// -- Step 2: Selfie uploaded in Thread A (standard) --
|
|
213
214
|
threadA = createConversation({ title: 'Thread A — selfie upload' });
|
|
214
|
-
selfieAttachment = uploadAttachment('
|
|
215
|
+
selfieAttachment = uploadAttachment('selfie.png', 'image/png', TINY_PNG_BASE64);
|
|
215
216
|
selfieId = selfieAttachment.id;
|
|
216
217
|
|
|
217
218
|
const msgA = addMessage(threadA.id, 'user', 'Here is my selfie from yesterday');
|
|
@@ -358,7 +359,6 @@ describe('Story E2E: selfie yesterday -> generated image today', () => {
|
|
|
358
359
|
// in the attachment store (same hash = returns existing row).
|
|
359
360
|
const generatedImageBase64 = Buffer.from('generated-portrait-data-unique').toString('base64');
|
|
360
361
|
const outputAttachment = uploadAttachment(
|
|
361
|
-
'asst-story-01',
|
|
362
362
|
'generated-portrait.png',
|
|
363
363
|
'image/png',
|
|
364
364
|
generatedImageBase64,
|
|
@@ -473,7 +473,7 @@ describe('Private-thread variant: cross-thread media blocking', () => {
|
|
|
473
473
|
test('selfie in private thread A is NOT discoverable via search from Thread B', async () => {
|
|
474
474
|
// Upload selfie in a private thread
|
|
475
475
|
const privateThread = createConversation({ title: 'Private selfie thread', threadType: 'private' });
|
|
476
|
-
const selfie = uploadAttachment('
|
|
476
|
+
const selfie = uploadAttachment('private-selfie.png', 'image/png', TINY_PNG_BASE64);
|
|
477
477
|
const msg = addMessage(privateThread.id, 'user', 'My private selfie');
|
|
478
478
|
linkAttachmentToMessage(msg.id, selfie.id, 0);
|
|
479
479
|
|
|
@@ -497,7 +497,7 @@ describe('Private-thread variant: cross-thread media blocking', () => {
|
|
|
497
497
|
test('selfie in private thread A is NOT materializable from Thread B', async () => {
|
|
498
498
|
const privateThread = createConversation({ title: 'Private selfie thread', threadType: 'private' });
|
|
499
499
|
const base64 = Buffer.from('private image data').toString('base64');
|
|
500
|
-
const selfie = uploadAttachment('
|
|
500
|
+
const selfie = uploadAttachment('private-selfie.png', 'image/png', base64);
|
|
501
501
|
const msg = addMessage(privateThread.id, 'user', 'My private selfie');
|
|
502
502
|
linkAttachmentToMessage(msg.id, selfie.id, 0);
|
|
503
503
|
|
|
@@ -521,7 +521,7 @@ describe('Private-thread variant: cross-thread media blocking', () => {
|
|
|
521
521
|
|
|
522
522
|
test('selfie in private thread IS accessible from the same private thread', async () => {
|
|
523
523
|
const privateThread = createConversation({ title: 'Private selfie thread', threadType: 'private' });
|
|
524
|
-
const selfie = uploadAttachment('
|
|
524
|
+
const selfie = uploadAttachment('private-selfie.png', 'image/png', TINY_PNG_BASE64);
|
|
525
525
|
const msg = addMessage(privateThread.id, 'user', 'My private selfie');
|
|
526
526
|
linkAttachmentToMessage(msg.id, selfie.id, 0);
|
|
527
527
|
|
|
@@ -550,7 +550,7 @@ describe('Private-thread variant: cross-thread media blocking', () => {
|
|
|
550
550
|
|
|
551
551
|
test('selfie in private thread A is NOT accessible from private thread B', async () => {
|
|
552
552
|
const privateThreadA = createConversation({ title: 'Private thread A', threadType: 'private' });
|
|
553
|
-
const selfie = uploadAttachment('
|
|
553
|
+
const selfie = uploadAttachment('thread-a-selfie.png', 'image/png', TINY_PNG_BASE64);
|
|
554
554
|
const msgA = addMessage(privateThreadA.id, 'user', 'Selfie in thread A');
|
|
555
555
|
linkAttachmentToMessage(msgA.id, selfie.id, 0);
|
|
556
556
|
|
|
@@ -0,0 +1,430 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory Retrieval Benchmark
|
|
3
|
+
*
|
|
4
|
+
* Measures end-to-end memory recall time with varying database sizes.
|
|
5
|
+
* Validates latency stays within acceptable bounds and token budget
|
|
6
|
+
* enforcement works correctly.
|
|
7
|
+
*/
|
|
8
|
+
import { afterAll, beforeAll, beforeEach, describe, expect, mock, test } from 'bun:test';
|
|
9
|
+
import { mkdtempSync, rmSync } from 'node:fs';
|
|
10
|
+
import { tmpdir } from 'node:os';
|
|
11
|
+
import { join } from 'node:path';
|
|
12
|
+
|
|
13
|
+
const testDir = mkdtempSync(join(tmpdir(), 'mem-retrieval-bench-'));
|
|
14
|
+
|
|
15
|
+
mock.module('../util/platform.js', () => ({
|
|
16
|
+
getDataDir: () => testDir,
|
|
17
|
+
isMacOS: () => process.platform === 'darwin',
|
|
18
|
+
isLinux: () => process.platform === 'linux',
|
|
19
|
+
isWindows: () => process.platform === 'win32',
|
|
20
|
+
getSocketPath: () => join(testDir, 'test.sock'),
|
|
21
|
+
getPidPath: () => join(testDir, 'test.pid'),
|
|
22
|
+
getDbPath: () => join(testDir, 'test.db'),
|
|
23
|
+
getLogPath: () => join(testDir, 'test.log'),
|
|
24
|
+
ensureDataDir: () => {},
|
|
25
|
+
}));
|
|
26
|
+
|
|
27
|
+
mock.module('../util/logger.js', () => ({
|
|
28
|
+
getLogger: () => new Proxy({} as Record<string, unknown>, {
|
|
29
|
+
get: () => () => {},
|
|
30
|
+
}),
|
|
31
|
+
}));
|
|
32
|
+
|
|
33
|
+
// Simulated network delay for semantic search (ms). When > 0, the mock
|
|
34
|
+
// semantic search sleeps for this duration before returning, simulating the
|
|
35
|
+
// Qdrant network round-trip that early termination is designed to skip.
|
|
36
|
+
let semanticSearchDelayMs = 0;
|
|
37
|
+
|
|
38
|
+
mock.module('../memory/search/semantic.js', () => ({
|
|
39
|
+
semanticSearch: async () => {
|
|
40
|
+
if (semanticSearchDelayMs > 0) {
|
|
41
|
+
await Bun.sleep(semanticSearchDelayMs);
|
|
42
|
+
}
|
|
43
|
+
return [];
|
|
44
|
+
},
|
|
45
|
+
isQdrantConnectionError: () => false,
|
|
46
|
+
}));
|
|
47
|
+
|
|
48
|
+
mock.module('../memory/embedding-backend.js', () => ({
|
|
49
|
+
getMemoryBackendStatus: (config: { memory: { enabled: boolean } }) => ({
|
|
50
|
+
enabled: config.memory.enabled,
|
|
51
|
+
degraded: false,
|
|
52
|
+
provider: 'local',
|
|
53
|
+
model: 'mock-embedding',
|
|
54
|
+
reason: null,
|
|
55
|
+
}),
|
|
56
|
+
embedWithBackend: async () => ({
|
|
57
|
+
provider: 'local' as const,
|
|
58
|
+
model: 'mock-embedding',
|
|
59
|
+
vectors: [new Array(1536).fill(0)],
|
|
60
|
+
}),
|
|
61
|
+
}));
|
|
62
|
+
|
|
63
|
+
import { DEFAULT_CONFIG } from '../config/defaults.js';
|
|
64
|
+
import { getDb, initializeDb, resetDb } from '../memory/db.js';
|
|
65
|
+
import { buildMemoryRecall } from '../memory/retriever.js';
|
|
66
|
+
import { conversations, memorySegments, messages } from '../memory/schema.js';
|
|
67
|
+
import type { AssistantConfig } from '../config/types.js';
|
|
68
|
+
|
|
69
|
+
function seedMemoryItems(conversationId: string, count: number, now: number): void {
|
|
70
|
+
const db = getDb();
|
|
71
|
+
db.insert(conversations).values({
|
|
72
|
+
id: conversationId,
|
|
73
|
+
title: null,
|
|
74
|
+
createdAt: now,
|
|
75
|
+
updatedAt: now,
|
|
76
|
+
totalInputTokens: 0,
|
|
77
|
+
totalOutputTokens: 0,
|
|
78
|
+
totalEstimatedCost: 0,
|
|
79
|
+
contextSummary: null,
|
|
80
|
+
contextCompactedMessageCount: 0,
|
|
81
|
+
contextCompactedAt: null,
|
|
82
|
+
}).run();
|
|
83
|
+
|
|
84
|
+
for (let i = 0; i < count; i++) {
|
|
85
|
+
const msgId = `msg-${conversationId}-${i}`;
|
|
86
|
+
const text = `Memory item ${i}: information about topic-${i % 20} including keyword-${i % 10} details.`;
|
|
87
|
+
db.insert(messages).values({
|
|
88
|
+
id: msgId,
|
|
89
|
+
conversationId,
|
|
90
|
+
role: i % 2 === 0 ? 'user' : 'assistant',
|
|
91
|
+
content: JSON.stringify([{ type: 'text', text }]),
|
|
92
|
+
createdAt: now + i,
|
|
93
|
+
}).run();
|
|
94
|
+
db.insert(memorySegments).values({
|
|
95
|
+
id: `seg-${conversationId}-${i}`,
|
|
96
|
+
messageId: msgId,
|
|
97
|
+
conversationId,
|
|
98
|
+
role: i % 2 === 0 ? 'user' : 'assistant',
|
|
99
|
+
segmentIndex: 0,
|
|
100
|
+
text,
|
|
101
|
+
tokenEstimate: 20,
|
|
102
|
+
scopeId: 'default',
|
|
103
|
+
createdAt: now + i,
|
|
104
|
+
updatedAt: now + i,
|
|
105
|
+
}).run();
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function makeConfig(overrides?: { maxInjectTokens?: number }): AssistantConfig {
|
|
110
|
+
return {
|
|
111
|
+
...DEFAULT_CONFIG,
|
|
112
|
+
memory: {
|
|
113
|
+
...DEFAULT_CONFIG.memory,
|
|
114
|
+
embeddings: {
|
|
115
|
+
...DEFAULT_CONFIG.memory.embeddings,
|
|
116
|
+
provider: 'local' as const,
|
|
117
|
+
required: false,
|
|
118
|
+
},
|
|
119
|
+
retrieval: {
|
|
120
|
+
...DEFAULT_CONFIG.memory.retrieval,
|
|
121
|
+
lexicalTopK: 50,
|
|
122
|
+
semanticTopK: 20,
|
|
123
|
+
maxInjectTokens: overrides?.maxInjectTokens ?? 750,
|
|
124
|
+
reranking: { ...DEFAULT_CONFIG.memory.retrieval.reranking, enabled: false },
|
|
125
|
+
dynamicBudget: {
|
|
126
|
+
enabled: false,
|
|
127
|
+
minInjectTokens: 160,
|
|
128
|
+
maxInjectTokens: overrides?.maxInjectTokens ?? 750,
|
|
129
|
+
targetHeadroomTokens: 900,
|
|
130
|
+
},
|
|
131
|
+
},
|
|
132
|
+
},
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
describe('Memory retrieval benchmark', () => {
|
|
137
|
+
beforeAll(() => {
|
|
138
|
+
initializeDb();
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
beforeEach(() => {
|
|
142
|
+
const db = getDb();
|
|
143
|
+
db.run('DELETE FROM memory_item_sources');
|
|
144
|
+
db.run('DELETE FROM memory_item_entities');
|
|
145
|
+
db.run('DELETE FROM memory_entity_relations');
|
|
146
|
+
db.run('DELETE FROM memory_entities');
|
|
147
|
+
db.run('DELETE FROM memory_embeddings');
|
|
148
|
+
db.run('DELETE FROM memory_summaries');
|
|
149
|
+
db.run('DELETE FROM memory_items');
|
|
150
|
+
db.run('DELETE FROM memory_segment_fts');
|
|
151
|
+
db.run('DELETE FROM memory_segments');
|
|
152
|
+
db.run('DELETE FROM messages');
|
|
153
|
+
db.run('DELETE FROM conversations');
|
|
154
|
+
db.run('DELETE FROM memory_jobs');
|
|
155
|
+
db.run('DELETE FROM memory_checkpoints');
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
afterAll(() => {
|
|
159
|
+
resetDb();
|
|
160
|
+
try {
|
|
161
|
+
rmSync(testDir, { recursive: true });
|
|
162
|
+
} catch {
|
|
163
|
+
// best effort cleanup
|
|
164
|
+
}
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
test('retrieval completes under 500ms for 100 items', async () => {
|
|
168
|
+
const conversationId = 'conv-bench-100';
|
|
169
|
+
const now = 1_700_500_000_000;
|
|
170
|
+
seedMemoryItems(conversationId, 100, now);
|
|
171
|
+
|
|
172
|
+
const config = makeConfig();
|
|
173
|
+
const recall = await buildMemoryRecall(
|
|
174
|
+
'What do we know about topic-5 and keyword-3?',
|
|
175
|
+
conversationId,
|
|
176
|
+
config,
|
|
177
|
+
);
|
|
178
|
+
|
|
179
|
+
expect(recall.enabled).toBe(true);
|
|
180
|
+
expect(recall.degraded).toBe(false);
|
|
181
|
+
expect(recall.lexicalHits).toBeGreaterThan(0);
|
|
182
|
+
expect(recall.selectedCount).toBeGreaterThan(0);
|
|
183
|
+
// Relaxed threshold — guards against severe regressions, not precise benchmarking
|
|
184
|
+
expect(recall.latencyMs).toBeLessThan(500);
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
test('retrieval completes under 1000ms for 500 items', async () => {
|
|
188
|
+
const conversationId = 'conv-bench-500';
|
|
189
|
+
const now = 1_700_500_000_000;
|
|
190
|
+
seedMemoryItems(conversationId, 500, now);
|
|
191
|
+
|
|
192
|
+
const config = makeConfig();
|
|
193
|
+
const recall = await buildMemoryRecall(
|
|
194
|
+
'What do we know about topic-5 and keyword-3?',
|
|
195
|
+
conversationId,
|
|
196
|
+
config,
|
|
197
|
+
);
|
|
198
|
+
|
|
199
|
+
expect(recall.enabled).toBe(true);
|
|
200
|
+
expect(recall.degraded).toBe(false);
|
|
201
|
+
expect(recall.lexicalHits).toBeGreaterThan(0);
|
|
202
|
+
expect(recall.selectedCount).toBeGreaterThan(0);
|
|
203
|
+
expect(recall.latencyMs).toBeLessThan(1000);
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
test('retrieval completes under 2000ms for 2000 items', async () => {
|
|
207
|
+
const conversationId = 'conv-bench-2000';
|
|
208
|
+
const now = 1_700_500_000_000;
|
|
209
|
+
seedMemoryItems(conversationId, 2000, now);
|
|
210
|
+
|
|
211
|
+
const config = makeConfig();
|
|
212
|
+
const recall = await buildMemoryRecall(
|
|
213
|
+
'What do we know about topic-5 and keyword-3?',
|
|
214
|
+
conversationId,
|
|
215
|
+
config,
|
|
216
|
+
);
|
|
217
|
+
|
|
218
|
+
expect(recall.enabled).toBe(true);
|
|
219
|
+
expect(recall.degraded).toBe(false);
|
|
220
|
+
expect(recall.lexicalHits).toBeGreaterThan(0);
|
|
221
|
+
expect(recall.selectedCount).toBeGreaterThan(0);
|
|
222
|
+
expect(recall.latencyMs).toBeLessThan(2000);
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
test('token budget enforcement: maxInjectTokens is respected', async () => {
|
|
226
|
+
const conversationId = 'conv-bench-budget';
|
|
227
|
+
const now = 1_700_500_000_000;
|
|
228
|
+
seedMemoryItems(conversationId, 500, now);
|
|
229
|
+
|
|
230
|
+
const smallBudget = 200;
|
|
231
|
+
const config = makeConfig({ maxInjectTokens: smallBudget });
|
|
232
|
+
const recall = await buildMemoryRecall(
|
|
233
|
+
'What do we know about topic-5 and keyword-3?',
|
|
234
|
+
conversationId,
|
|
235
|
+
config,
|
|
236
|
+
);
|
|
237
|
+
|
|
238
|
+
expect(recall.enabled).toBe(true);
|
|
239
|
+
expect(recall.injectedTokens).toBeLessThanOrEqual(smallBudget);
|
|
240
|
+
expect(recall.injectedTokens).toBeGreaterThan(0);
|
|
241
|
+
|
|
242
|
+
// Compare against a larger budget to verify the cap actually constrains
|
|
243
|
+
const largeBudget = 2000;
|
|
244
|
+
const largeConfig = makeConfig({ maxInjectTokens: largeBudget });
|
|
245
|
+
const largeRecall = await buildMemoryRecall(
|
|
246
|
+
'What do we know about topic-5 and keyword-3?',
|
|
247
|
+
conversationId,
|
|
248
|
+
largeConfig,
|
|
249
|
+
);
|
|
250
|
+
|
|
251
|
+
expect(largeRecall.injectedTokens).toBeLessThanOrEqual(largeBudget);
|
|
252
|
+
// With more budget, we should get at least as many tokens
|
|
253
|
+
expect(largeRecall.injectedTokens).toBeGreaterThanOrEqual(recall.injectedTokens);
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
test('early termination reduces latency when applicable', async () => {
|
|
257
|
+
const conversationId = 'conv-bench-et';
|
|
258
|
+
const now = 1_700_500_000_000;
|
|
259
|
+
// Seed enough items that early termination can trigger
|
|
260
|
+
seedMemoryItems(conversationId, 500, now);
|
|
261
|
+
|
|
262
|
+
// Config with early termination enabled and low thresholds to trigger it
|
|
263
|
+
const etConfig: AssistantConfig = {
|
|
264
|
+
...DEFAULT_CONFIG,
|
|
265
|
+
memory: {
|
|
266
|
+
...DEFAULT_CONFIG.memory,
|
|
267
|
+
embeddings: {
|
|
268
|
+
...DEFAULT_CONFIG.memory.embeddings,
|
|
269
|
+
provider: 'local' as const,
|
|
270
|
+
required: false,
|
|
271
|
+
},
|
|
272
|
+
retrieval: {
|
|
273
|
+
...DEFAULT_CONFIG.memory.retrieval,
|
|
274
|
+
lexicalTopK: 50,
|
|
275
|
+
semanticTopK: 20,
|
|
276
|
+
maxInjectTokens: 750,
|
|
277
|
+
reranking: { ...DEFAULT_CONFIG.memory.retrieval.reranking, enabled: false },
|
|
278
|
+
dynamicBudget: {
|
|
279
|
+
enabled: false,
|
|
280
|
+
minInjectTokens: 160,
|
|
281
|
+
maxInjectTokens: 750,
|
|
282
|
+
targetHeadroomTokens: 900,
|
|
283
|
+
},
|
|
284
|
+
earlyTermination: {
|
|
285
|
+
enabled: true,
|
|
286
|
+
minCandidates: 5,
|
|
287
|
+
minHighConfidence: 3,
|
|
288
|
+
confidenceThreshold: 0.3,
|
|
289
|
+
},
|
|
290
|
+
},
|
|
291
|
+
},
|
|
292
|
+
};
|
|
293
|
+
|
|
294
|
+
const recall = await buildMemoryRecall(
|
|
295
|
+
'What do we know about topic-5 and keyword-3?',
|
|
296
|
+
conversationId,
|
|
297
|
+
etConfig,
|
|
298
|
+
);
|
|
299
|
+
|
|
300
|
+
expect(recall.enabled).toBe(true);
|
|
301
|
+
expect(recall.earlyTerminated).toBe(true);
|
|
302
|
+
// Semantic search should be skipped when early termination fires
|
|
303
|
+
expect(recall.semanticHits).toBe(0);
|
|
304
|
+
expect(recall.selectedCount).toBeGreaterThan(0);
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
test('early termination is measurably faster than baseline', async () => {
|
|
308
|
+
const conversationId = 'conv-bench-et-delta';
|
|
309
|
+
const now = 1_700_500_000_000;
|
|
310
|
+
seedMemoryItems(conversationId, 500, now);
|
|
311
|
+
|
|
312
|
+
// Simulate the Qdrant network round-trip that ET is designed to skip.
|
|
313
|
+
// Use 100ms to dominate over variable CPU-bound work on slower hosts.
|
|
314
|
+
semanticSearchDelayMs = 100;
|
|
315
|
+
|
|
316
|
+
const query = 'What do we know about topic-5 and keyword-3?';
|
|
317
|
+
|
|
318
|
+
const etConfig: AssistantConfig = {
|
|
319
|
+
...DEFAULT_CONFIG,
|
|
320
|
+
memory: {
|
|
321
|
+
...DEFAULT_CONFIG.memory,
|
|
322
|
+
embeddings: {
|
|
323
|
+
...DEFAULT_CONFIG.memory.embeddings,
|
|
324
|
+
provider: 'local' as const,
|
|
325
|
+
required: false,
|
|
326
|
+
},
|
|
327
|
+
retrieval: {
|
|
328
|
+
...DEFAULT_CONFIG.memory.retrieval,
|
|
329
|
+
lexicalTopK: 50,
|
|
330
|
+
semanticTopK: 20,
|
|
331
|
+
maxInjectTokens: 750,
|
|
332
|
+
reranking: { ...DEFAULT_CONFIG.memory.retrieval.reranking, enabled: false },
|
|
333
|
+
dynamicBudget: {
|
|
334
|
+
enabled: false,
|
|
335
|
+
minInjectTokens: 160,
|
|
336
|
+
maxInjectTokens: 750,
|
|
337
|
+
targetHeadroomTokens: 900,
|
|
338
|
+
},
|
|
339
|
+
earlyTermination: {
|
|
340
|
+
enabled: true,
|
|
341
|
+
minCandidates: 5,
|
|
342
|
+
minHighConfidence: 3,
|
|
343
|
+
confidenceThreshold: 0.3,
|
|
344
|
+
},
|
|
345
|
+
},
|
|
346
|
+
},
|
|
347
|
+
};
|
|
348
|
+
|
|
349
|
+
const noEtConfig: AssistantConfig = {
|
|
350
|
+
...etConfig,
|
|
351
|
+
memory: {
|
|
352
|
+
...etConfig.memory,
|
|
353
|
+
retrieval: {
|
|
354
|
+
...etConfig.memory.retrieval,
|
|
355
|
+
earlyTermination: {
|
|
356
|
+
enabled: false,
|
|
357
|
+
minCandidates: 5,
|
|
358
|
+
minHighConfidence: 3,
|
|
359
|
+
confidenceThreshold: 0.3,
|
|
360
|
+
},
|
|
361
|
+
},
|
|
362
|
+
},
|
|
363
|
+
};
|
|
364
|
+
|
|
365
|
+
try {
|
|
366
|
+
// Warm up to avoid cold-start bias
|
|
367
|
+
await buildMemoryRecall(query, conversationId, etConfig);
|
|
368
|
+
await buildMemoryRecall(query, conversationId, noEtConfig);
|
|
369
|
+
|
|
370
|
+
const iterations = 5;
|
|
371
|
+
const etTimes: number[] = [];
|
|
372
|
+
const baselineTimes: number[] = [];
|
|
373
|
+
|
|
374
|
+
for (let i = 0; i < iterations; i++) {
|
|
375
|
+
const t0 = performance.now();
|
|
376
|
+
const etRecall = await buildMemoryRecall(query, conversationId, etConfig);
|
|
377
|
+
etTimes.push(performance.now() - t0);
|
|
378
|
+
expect(etRecall.earlyTerminated).toBe(true);
|
|
379
|
+
|
|
380
|
+
const t1 = performance.now();
|
|
381
|
+
const baselineRecall = await buildMemoryRecall(query, conversationId, noEtConfig);
|
|
382
|
+
baselineTimes.push(performance.now() - t1);
|
|
383
|
+
expect(baselineRecall.earlyTerminated).toBe(false);
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
etTimes.sort((a, b) => a - b);
|
|
387
|
+
baselineTimes.sort((a, b) => a - b);
|
|
388
|
+
const medianEt = etTimes[Math.floor(iterations / 2)];
|
|
389
|
+
const medianBaseline = baselineTimes[Math.floor(iterations / 2)];
|
|
390
|
+
|
|
391
|
+
// ET skips the mocked network delay, so it should be measurably faster.
|
|
392
|
+
// Use a 15% threshold to tolerate slower CI hosts where CPU-bound work
|
|
393
|
+
// takes longer relative to the fixed mock delay.
|
|
394
|
+
const speedup = 1 - medianEt / medianBaseline;
|
|
395
|
+
expect(speedup).toBeGreaterThanOrEqual(0.15);
|
|
396
|
+
} finally {
|
|
397
|
+
semanticSearchDelayMs = 0;
|
|
398
|
+
}
|
|
399
|
+
});
|
|
400
|
+
|
|
401
|
+
test('recall.latencyMs tracks wall-clock within 20% tolerance', async () => {
|
|
402
|
+
const conversationId = 'conv-bench-wallclock';
|
|
403
|
+
const now = 1_700_500_000_000;
|
|
404
|
+
seedMemoryItems(conversationId, 500, now);
|
|
405
|
+
|
|
406
|
+
const config = makeConfig();
|
|
407
|
+
|
|
408
|
+
// Use Date.now() to match the timer source used by buildMemoryRecall
|
|
409
|
+
// (which also uses Date.now()), avoiding precision mismatches between
|
|
410
|
+
// integer-ms Date.now() and sub-ms performance.now().
|
|
411
|
+
const wallStart = Date.now();
|
|
412
|
+
const recall = await buildMemoryRecall(
|
|
413
|
+
'What do we know about topic-5 and keyword-3?',
|
|
414
|
+
conversationId,
|
|
415
|
+
config,
|
|
416
|
+
);
|
|
417
|
+
const wallMs = Date.now() - wallStart;
|
|
418
|
+
|
|
419
|
+
expect(recall.enabled).toBe(true);
|
|
420
|
+
expect(recall.latencyMs).toBeGreaterThan(0);
|
|
421
|
+
|
|
422
|
+
// Self-reported latencyMs should agree with wall-clock within 50%.
|
|
423
|
+
// Tolerance is wide because both sides use Date.now() (integer ms),
|
|
424
|
+
// so on fast runs the quantization error can be large relative to
|
|
425
|
+
// total elapsed time.
|
|
426
|
+
const ratio = recall.latencyMs / Math.max(wallMs, 1);
|
|
427
|
+
expect(ratio).toBeGreaterThanOrEqual(0.5);
|
|
428
|
+
expect(ratio).toBeLessThanOrEqual(1.5);
|
|
429
|
+
});
|
|
430
|
+
});
|