vellum 0.2.1 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -2
- package/bun.lock +71 -100
- package/package.json +5 -3
- package/scripts/capture-x-graphql.ts +562 -0
- package/scripts/ipc/check-swift-decoder-drift.ts +2 -1
- package/scripts/test.sh +5 -0
- package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +133 -34
- package/src/__tests__/account-registry.test.ts +2 -1
- package/src/__tests__/agent-heartbeat-service.test.ts +250 -0
- package/src/__tests__/asset-materialize-tool.test.ts +16 -15
- package/src/__tests__/asset-search-tool.test.ts +23 -22
- package/src/__tests__/attachments-store.test.ts +56 -127
- package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +5 -4
- package/src/__tests__/browser-skill-endstate.test.ts +4 -3
- package/src/__tests__/call-bridge.test.ts +385 -0
- package/src/__tests__/call-constants.test.ts +40 -0
- package/src/__tests__/call-orchestrator.test.ts +130 -4
- package/src/__tests__/call-recovery.test.ts +518 -0
- package/src/__tests__/call-routes-http.test.ts +459 -0
- package/src/__tests__/call-state-machine.test.ts +143 -0
- package/src/__tests__/call-store.test.ts +216 -1
- package/src/__tests__/cli-discover.test.ts +1 -1
- package/src/__tests__/commit-message-enrichment-service.test.ts +148 -7
- package/src/__tests__/compaction.benchmark.test.ts +176 -0
- package/src/__tests__/computer-use-tools.test.ts +250 -0
- package/src/__tests__/config-schema.test.ts +305 -3
- package/src/__tests__/conflict-store.test.ts +2 -1
- package/src/__tests__/contacts-tools.test.ts +331 -0
- package/src/__tests__/conversation-store.test.ts +30 -32
- package/src/__tests__/credential-security-invariants.test.ts +4 -0
- package/src/__tests__/date-context.test.ts +373 -0
- package/src/__tests__/db-schedule-syntax-migration.test.ts +129 -0
- package/src/__tests__/fixtures/media-reuse-fixtures.ts +3 -3
- package/src/__tests__/followup-tools.test.ts +303 -0
- package/src/__tests__/handlers-twilio-config.test.ts +221 -0
- package/src/__tests__/handlers-twitter-config.test.ts +718 -0
- package/src/__tests__/intent-routing.test.ts +64 -57
- package/src/__tests__/ipc-roundtrip.benchmark.test.ts +237 -0
- package/src/__tests__/ipc-snapshot.test.ts +71 -28
- package/src/__tests__/llm-usage-store.test.ts +3 -8
- package/src/__tests__/media-generate-image.test.ts +1 -1
- package/src/__tests__/media-reuse-story.e2e.test.ts +7 -7
- package/src/__tests__/memory-regressions.test.ts +100 -2
- package/src/__tests__/memory-retrieval.benchmark.test.ts +430 -0
- package/src/__tests__/parallel-tool.benchmark.test.ts +294 -0
- package/src/__tests__/playbook-tools.test.ts +342 -0
- package/src/__tests__/profile-compiler.test.ts +2 -1
- package/src/__tests__/provider-commit-message-generator.test.ts +303 -0
- package/src/__tests__/provider-streaming.benchmark.test.ts +773 -0
- package/src/__tests__/recurrence-engine-rruleset.test.ts +78 -0
- package/src/__tests__/recurrence-engine.test.ts +69 -0
- package/src/__tests__/recurrence-types.test.ts +71 -0
- package/src/__tests__/registry.test.ts +5 -3
- package/src/__tests__/relay-server.test.ts +633 -0
- package/src/__tests__/reminder-store.test.ts +6 -3
- package/src/__tests__/reminder.test.ts +43 -77
- package/src/__tests__/run-orchestrator-assistant-events.test.ts +8 -4
- package/src/__tests__/run-orchestrator.test.ts +4 -4
- package/src/__tests__/runtime-attachment-metadata.test.ts +7 -6
- package/src/__tests__/runtime-runs-http.test.ts +4 -4
- package/src/__tests__/runtime-runs.test.ts +4 -4
- package/src/__tests__/schedule-store.test.ts +482 -0
- package/src/__tests__/schedule-tools.test.ts +700 -0
- package/src/__tests__/scheduler-recurrence.test.ts +329 -0
- package/src/__tests__/server-history-render.test.ts +14 -13
- package/src/__tests__/session-conflict-gate.test.ts +28 -25
- package/src/__tests__/session-error.test.ts +28 -0
- package/src/__tests__/session-init.benchmark.test.ts +462 -0
- package/src/__tests__/session-queue.test.ts +71 -48
- package/src/__tests__/session-runtime-assembly.test.ts +161 -0
- package/src/__tests__/session-surfaces-task-progress.test.ts +104 -0
- package/src/__tests__/signup-e2e.test.ts +2 -1
- package/src/__tests__/skill-projection.benchmark.test.ts +328 -0
- package/src/__tests__/skill-script-runner.test.ts +159 -0
- package/src/__tests__/speaker-identification.test.ts +52 -0
- package/src/__tests__/subagent-manager-notify.test.ts +42 -10
- package/src/__tests__/subagent-tools.test.ts +141 -41
- package/src/__tests__/task-compiler.test.ts +2 -1
- package/src/__tests__/task-runner.test.ts +2 -1
- package/src/__tests__/task-scheduler.test.ts +2 -1
- package/src/__tests__/task-tools.test.ts +49 -56
- package/src/__tests__/tool-audit-listener.test.ts +1 -0
- package/src/__tests__/tool-domain-event-publisher.test.ts +2 -0
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +500 -0
- package/src/__tests__/tool-executor.test.ts +13 -17
- package/src/__tests__/turn-commit.test.ts +218 -3
- package/src/__tests__/twilio-provider.test.ts +143 -0
- package/src/__tests__/twilio-routes.test.ts +789 -0
- package/src/__tests__/twitter-auth-handler.test.ts +581 -0
- package/src/__tests__/view-image-tool.test.ts +217 -0
- package/src/__tests__/workspace-git-service.test.ts +186 -0
- package/src/__tests__/workspace-heartbeat-service.test.ts +13 -3
- package/src/agent-heartbeat/agent-heartbeat-service.ts +155 -0
- package/src/bundler/app-bundler.ts +12 -8
- package/src/calls/__tests__/twilio-webhook-urls.test.ts +162 -0
- package/src/calls/call-bridge.ts +95 -0
- package/src/calls/call-constants.ts +43 -5
- package/src/calls/call-domain.ts +276 -0
- package/src/calls/call-orchestrator.ts +43 -17
- package/src/calls/call-recovery.ts +207 -0
- package/src/calls/call-state-machine.ts +68 -0
- package/src/calls/call-store.ts +192 -5
- package/src/calls/relay-server.ts +41 -4
- package/src/calls/speaker-identification.ts +213 -0
- package/src/calls/twilio-config.ts +8 -8
- package/src/calls/twilio-provider.ts +13 -9
- package/src/calls/twilio-routes.ts +90 -76
- package/src/calls/twilio-webhook-urls.ts +50 -0
- package/src/calls/types.ts +1 -1
- package/src/cli/config-commands.ts +334 -0
- package/src/cli/core-commands.ts +776 -0
- package/src/cli/doordash.ts +251 -1
- package/src/cli/ipc-client.ts +82 -0
- package/src/cli/map.ts +270 -0
- package/src/cli/twitter.ts +575 -0
- package/src/cli.ts +7 -5
- package/src/commands/__tests__/cc-command-registry.test.ts +319 -0
- package/src/commands/cc-command-registry.ts +209 -0
- package/src/config/bundled-skills/contacts/SKILL.md +39 -0
- package/src/config/bundled-skills/contacts/TOOLS.json +122 -0
- package/src/config/bundled-skills/contacts/tools/contact-merge.ts +9 -0
- package/src/config/bundled-skills/contacts/tools/contact-search.ts +9 -0
- package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +9 -0
- package/src/config/bundled-skills/document/SKILL.md +18 -0
- package/src/config/bundled-skills/document/TOOLS.json +53 -0
- package/src/config/bundled-skills/document/tools/document-create.ts +9 -0
- package/src/config/bundled-skills/document/tools/document-update.ts +9 -0
- package/src/config/bundled-skills/doordash/SKILL.md +82 -23
- package/src/config/bundled-skills/followups/SKILL.md +32 -0
- package/src/config/bundled-skills/followups/TOOLS.json +100 -0
- package/src/config/bundled-skills/followups/tools/followup-create.ts +9 -0
- package/src/config/bundled-skills/followups/tools/followup-list.ts +9 -0
- package/src/config/bundled-skills/followups/tools/followup-resolve.ts +9 -0
- package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +1 -23
- package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +2 -1
- package/src/config/bundled-skills/playbooks/SKILL.md +31 -0
- package/src/config/bundled-skills/playbooks/TOOLS.json +126 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +9 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +9 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +9 -0
- package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +9 -0
- package/src/config/bundled-skills/reminder/SKILL.md +20 -0
- package/src/config/bundled-skills/reminder/TOOLS.json +67 -0
- package/src/config/bundled-skills/reminder/tools/reminder-cancel.ts +9 -0
- package/src/config/bundled-skills/reminder/tools/reminder-create.ts +9 -0
- package/src/config/bundled-skills/reminder/tools/reminder-list.ts +9 -0
- package/src/config/bundled-skills/schedule/SKILL.md +74 -0
- package/src/config/bundled-skills/schedule/TOOLS.json +135 -0
- package/src/config/bundled-skills/schedule/tools/schedule-create.ts +9 -0
- package/src/config/bundled-skills/schedule/tools/schedule-delete.ts +9 -0
- package/src/config/bundled-skills/schedule/tools/schedule-list.ts +9 -0
- package/src/config/bundled-skills/schedule/tools/schedule-update.ts +9 -0
- package/src/config/bundled-skills/subagent/SKILL.md +25 -0
- package/src/config/bundled-skills/subagent/TOOLS.json +107 -0
- package/src/config/bundled-skills/subagent/tools/subagent-abort.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-message.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-read.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-spawn.ts +9 -0
- package/src/config/bundled-skills/subagent/tools/subagent-status.ts +9 -0
- package/src/config/bundled-skills/tasks/SKILL.md +28 -0
- package/src/config/bundled-skills/tasks/TOOLS.json +256 -0
- package/src/config/bundled-skills/tasks/tools/task-delete.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-add.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-remove.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-show.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list-update.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-list.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-run.ts +9 -0
- package/src/config/bundled-skills/tasks/tools/task-save.ts +9 -0
- package/src/config/bundled-skills/twitter/SKILL.md +134 -0
- package/src/config/bundled-skills/watcher/SKILL.md +27 -0
- package/src/config/bundled-skills/watcher/TOOLS.json +147 -0
- package/src/config/bundled-skills/watcher/tools/watcher-create.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-delete.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-digest.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-list.ts +9 -0
- package/src/config/bundled-skills/watcher/tools/watcher-update.ts +9 -0
- package/src/config/defaults.ts +34 -0
- package/src/config/loader.ts +4 -1
- package/src/config/schema.ts +165 -1
- package/src/config/system-prompt.ts +61 -16
- package/src/config/templates/IDENTITY.md +7 -0
- package/src/config/types.ts +4 -0
- package/src/config/vellum-skills/telegram-setup/SKILL.md +1 -5
- package/src/contacts/contact-store.ts +4 -4
- package/src/daemon/assistant-attachments.ts +10 -0
- package/src/daemon/classifier.ts +3 -1
- package/src/daemon/computer-use-session.ts +3 -1
- package/src/daemon/date-context.ts +136 -0
- package/src/daemon/handlers/apps.ts +16 -1
- package/src/daemon/handlers/browser.ts +54 -0
- package/src/daemon/handlers/computer-use.ts +7 -1
- package/src/daemon/handlers/config.ts +205 -5
- package/src/daemon/handlers/diagnostics.ts +5 -1
- package/src/daemon/handlers/documents.ts +18 -29
- package/src/daemon/handlers/home-base.ts +5 -1
- package/src/daemon/handlers/index.ts +40 -277
- package/src/daemon/handlers/misc.ts +9 -1
- package/src/daemon/handlers/publish.ts +6 -1
- package/src/daemon/handlers/sessions.ts +65 -12
- package/src/daemon/handlers/shared.ts +36 -1
- package/src/daemon/handlers/signing.ts +37 -0
- package/src/daemon/handlers/skills.ts +20 -6
- package/src/daemon/handlers/subagents.ts +8 -3
- package/src/daemon/handlers/twitter-auth.ts +169 -0
- package/src/daemon/handlers/work-items.ts +384 -68
- package/src/daemon/ipc-contract-inventory.json +32 -4
- package/src/daemon/ipc-contract.ts +156 -37
- package/src/daemon/ipc-protocol.ts +7 -2
- package/src/daemon/lifecycle.ts +21 -0
- package/src/daemon/main.ts +10 -4
- package/src/daemon/ride-shotgun-handler.ts +75 -10
- package/src/daemon/server.ts +143 -26
- package/src/daemon/session-agent-loop.ts +922 -0
- package/src/daemon/session-attachments.ts +28 -5
- package/src/daemon/session-conflict-gate.ts +18 -109
- package/src/daemon/session-error.ts +24 -3
- package/src/daemon/session-lifecycle.ts +147 -0
- package/src/daemon/session-media-retry.ts +147 -0
- package/src/daemon/session-messaging.ts +145 -0
- package/src/daemon/session-notifiers.ts +164 -0
- package/src/daemon/session-process.ts +2 -2
- package/src/daemon/session-queue-manager.ts +1 -0
- package/src/daemon/session-runtime-assembly.ts +52 -0
- package/src/daemon/session-skill-tools.ts +124 -5
- package/src/daemon/session-slash.ts +3 -0
- package/src/daemon/session-surfaces.ts +77 -2
- package/src/daemon/session-tool-setup.ts +216 -2
- package/src/daemon/session-usage.ts +0 -2
- package/src/daemon/session.ts +114 -1404
- package/src/daemon/video-thumbnail.ts +60 -0
- package/src/doordash/client.ts +121 -27
- package/src/doordash/queries.ts +1 -2
- package/src/export/formatter.ts +3 -1
- package/src/followups/followup-store.ts +4 -2
- package/src/followups/types.ts +6 -0
- package/src/hooks/templates.ts +1 -1
- package/src/index.ts +32 -1153
- package/src/memory/attachments-store.ts +28 -83
- package/src/memory/channel-delivery-store.ts +7 -21
- package/src/memory/clarification-resolver.ts +6 -5
- package/src/memory/conflict-intent.ts +114 -0
- package/src/memory/contradiction-checker.ts +3 -2
- package/src/memory/conversation-key-store.ts +10 -29
- package/src/memory/conversation-store.ts +2 -1
- package/src/memory/db.ts +96 -2
- package/src/memory/entity-extractor.ts +6 -3
- package/src/memory/items-extractor.ts +5 -4
- package/src/memory/job-handlers/conflict.ts +23 -1
- package/src/memory/jobs-store.ts +3 -2
- package/src/memory/llm-usage-store.ts +1 -2
- package/src/memory/runs-store.ts +1 -2
- package/src/memory/schema.ts +23 -2
- package/src/messaging/style-analyzer.ts +3 -2
- package/src/messaging/thread-summarizer.ts +8 -12
- package/src/messaging/triage-engine.ts +4 -2
- package/src/providers/openrouter/client.ts +20 -0
- package/src/providers/registry.ts +8 -0
- package/src/runtime/gateway-client.ts +36 -0
- package/src/runtime/http-server.ts +166 -22
- package/src/runtime/routes/attachment-routes.ts +2 -3
- package/src/runtime/routes/call-routes.ts +140 -0
- package/src/runtime/routes/channel-routes.ts +125 -88
- package/src/runtime/routes/conversation-routes.ts +5 -5
- package/src/runtime/routes/run-routes.ts +2 -2
- package/src/runtime/run-orchestrator.ts +9 -3
- package/src/schedule/recurrence-engine.ts +138 -0
- package/src/schedule/recurrence-types.ts +67 -0
- package/src/schedule/schedule-store.ts +102 -57
- package/src/schedule/scheduler.ts +9 -6
- package/src/security/oauth2.ts +29 -4
- package/src/security/secret-allowlist.ts +46 -0
- package/src/skills/clawhub.ts +1 -1
- package/src/subagent/manager.ts +40 -8
- package/src/swarm/backend-claude-code.ts +64 -9
- package/src/swarm/worker-prompts.ts +2 -1
- package/src/tasks/SPEC.md +34 -28
- package/src/tasks/ephemeral-permissions.ts +16 -7
- package/src/tasks/task-compiler.ts +5 -4
- package/src/tasks/task-runner.ts +10 -5
- package/src/tasks/task-scheduler.ts +1 -1
- package/src/tasks/tool-sanitizer.ts +36 -0
- package/src/tools/assets/search.ts +4 -4
- package/src/tools/browser/api-map.ts +293 -0
- package/src/tools/browser/auto-navigate.ts +270 -0
- package/src/tools/browser/browser-execution.ts +2 -1
- package/src/tools/browser/browser-manager.ts +2 -2
- package/src/tools/browser/network-recorder.ts +5 -4
- package/src/tools/browser/x-auto-navigate.ts +207 -0
- package/src/tools/calls/call-end.ts +17 -67
- package/src/tools/calls/call-start.ts +24 -85
- package/src/tools/calls/call-status.ts +35 -51
- package/src/tools/claude-code/claude-code.ts +207 -11
- package/src/tools/contacts/contact-merge.ts +46 -78
- package/src/tools/contacts/contact-search.ts +35 -79
- package/src/tools/contacts/contact-upsert.ts +35 -108
- package/src/tools/credentials/vault.ts +20 -4
- package/src/tools/document/document-tool.ts +71 -144
- package/src/tools/executor.ts +129 -10
- package/src/tools/followups/followup_create.ts +46 -88
- package/src/tools/followups/followup_list.ts +34 -74
- package/src/tools/followups/followup_resolve.ts +31 -66
- package/src/tools/host-terminal/cli-discover.ts +2 -1
- package/src/tools/host-terminal/host-shell.ts +10 -0
- package/src/tools/memory/handlers.ts +5 -4
- package/src/tools/network/__tests__/web-search.test.ts +427 -0
- package/src/tools/network/script-proxy/__tests__/logging.test.ts +248 -0
- package/src/tools/network/script-proxy/__tests__/policy.test.ts +234 -0
- package/src/tools/network/script-proxy/__tests__/router.test.ts +76 -0
- package/src/tools/network/web-fetch.ts +18 -6
- package/src/tools/playbooks/index.ts +4 -5
- package/src/tools/playbooks/playbook-create.ts +3 -47
- package/src/tools/playbooks/playbook-delete.ts +1 -25
- package/src/tools/playbooks/playbook-list.ts +1 -28
- package/src/tools/playbooks/playbook-update.ts +3 -51
- package/src/tools/reminder/reminder.ts +5 -78
- package/src/tools/schedule/create.ts +69 -74
- package/src/tools/schedule/delete.ts +21 -47
- package/src/tools/schedule/list.ts +55 -74
- package/src/tools/schedule/update.ts +77 -84
- package/src/tools/subagent/abort.ts +29 -58
- package/src/tools/subagent/message.ts +30 -63
- package/src/tools/subagent/read.ts +53 -84
- package/src/tools/subagent/spawn.ts +43 -82
- package/src/tools/subagent/status.ts +42 -71
- package/src/tools/swarm/delegate.ts +2 -1
- package/src/tools/tasks/index.ts +8 -8
- package/src/tools/tasks/task-delete.ts +60 -88
- package/src/tools/tasks/task-list.ts +31 -52
- package/src/tools/tasks/task-run.ts +72 -108
- package/src/tools/tasks/task-save.ts +33 -65
- package/src/tools/tasks/work-item-enqueue.ts +183 -215
- package/src/tools/tasks/work-item-list.ts +33 -63
- package/src/tools/tasks/work-item-remove.ts +45 -97
- package/src/tools/tasks/work-item-update.ts +91 -163
- package/src/tools/terminal/backends/native.ts +3 -1
- package/src/tools/tool-manifest.ts +0 -62
- package/src/tools/types.ts +6 -0
- package/src/tools/ui-surface/definitions.ts +3 -1
- package/src/tools/watch/screen-watch.ts +3 -1
- package/src/tools/watcher/create.ts +52 -98
- package/src/tools/watcher/delete.ts +20 -46
- package/src/tools/watcher/digest.ts +36 -70
- package/src/tools/watcher/list.ts +49 -79
- package/src/tools/watcher/update.ts +45 -91
- package/src/twitter/client.ts +690 -0
- package/src/twitter/session.ts +91 -0
- package/src/usage/types.ts +0 -1
- package/src/util/truncate.ts +6 -0
- package/src/watcher/providers/slack.ts +2 -1
- package/src/watcher/watcher-store.ts +3 -2
- package/src/work-items/work-item-store.ts +27 -2
- package/src/workspace/commit-message-enrichment-service.ts +31 -7
- package/src/workspace/git-service.ts +87 -22
- package/src/workspace/provider-commit-message-generator.ts +269 -0
- package/src/workspace/turn-commit.ts +62 -3
- package/src/tools/contacts/index.ts +0 -4
- package/src/tools/document/index.ts +0 -5
- package/src/tools/followups/index.ts +0 -3
- package/src/tools/subagent/index.ts +0 -5
- /package/src/__tests__/{memory-context-benchmark.test.ts → memory-context-benchmark.benchmark.test.ts} +0 -0
|
@@ -0,0 +1,430 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory Retrieval Benchmark
|
|
3
|
+
*
|
|
4
|
+
* Measures end-to-end memory recall time with varying database sizes.
|
|
5
|
+
* Validates latency stays within acceptable bounds and token budget
|
|
6
|
+
* enforcement works correctly.
|
|
7
|
+
*/
|
|
8
|
+
import { afterAll, beforeAll, beforeEach, describe, expect, mock, test } from 'bun:test';
|
|
9
|
+
import { mkdtempSync, rmSync } from 'node:fs';
|
|
10
|
+
import { tmpdir } from 'node:os';
|
|
11
|
+
import { join } from 'node:path';
|
|
12
|
+
|
|
13
|
+
const testDir = mkdtempSync(join(tmpdir(), 'mem-retrieval-bench-'));
|
|
14
|
+
|
|
15
|
+
mock.module('../util/platform.js', () => ({
|
|
16
|
+
getDataDir: () => testDir,
|
|
17
|
+
isMacOS: () => process.platform === 'darwin',
|
|
18
|
+
isLinux: () => process.platform === 'linux',
|
|
19
|
+
isWindows: () => process.platform === 'win32',
|
|
20
|
+
getSocketPath: () => join(testDir, 'test.sock'),
|
|
21
|
+
getPidPath: () => join(testDir, 'test.pid'),
|
|
22
|
+
getDbPath: () => join(testDir, 'test.db'),
|
|
23
|
+
getLogPath: () => join(testDir, 'test.log'),
|
|
24
|
+
ensureDataDir: () => {},
|
|
25
|
+
}));
|
|
26
|
+
|
|
27
|
+
mock.module('../util/logger.js', () => ({
|
|
28
|
+
getLogger: () => new Proxy({} as Record<string, unknown>, {
|
|
29
|
+
get: () => () => {},
|
|
30
|
+
}),
|
|
31
|
+
}));
|
|
32
|
+
|
|
33
|
+
// Simulated network delay for semantic search (ms). When > 0, the mock
|
|
34
|
+
// semantic search sleeps for this duration before returning, simulating the
|
|
35
|
+
// Qdrant network round-trip that early termination is designed to skip.
|
|
36
|
+
let semanticSearchDelayMs = 0;
|
|
37
|
+
|
|
38
|
+
mock.module('../memory/search/semantic.js', () => ({
|
|
39
|
+
semanticSearch: async () => {
|
|
40
|
+
if (semanticSearchDelayMs > 0) {
|
|
41
|
+
await Bun.sleep(semanticSearchDelayMs);
|
|
42
|
+
}
|
|
43
|
+
return [];
|
|
44
|
+
},
|
|
45
|
+
isQdrantConnectionError: () => false,
|
|
46
|
+
}));
|
|
47
|
+
|
|
48
|
+
mock.module('../memory/embedding-backend.js', () => ({
|
|
49
|
+
getMemoryBackendStatus: (config: { memory: { enabled: boolean } }) => ({
|
|
50
|
+
enabled: config.memory.enabled,
|
|
51
|
+
degraded: false,
|
|
52
|
+
provider: 'local',
|
|
53
|
+
model: 'mock-embedding',
|
|
54
|
+
reason: null,
|
|
55
|
+
}),
|
|
56
|
+
embedWithBackend: async () => ({
|
|
57
|
+
provider: 'local' as const,
|
|
58
|
+
model: 'mock-embedding',
|
|
59
|
+
vectors: [new Array(1536).fill(0)],
|
|
60
|
+
}),
|
|
61
|
+
}));
|
|
62
|
+
|
|
63
|
+
import { DEFAULT_CONFIG } from '../config/defaults.js';
|
|
64
|
+
import { getDb, initializeDb, resetDb } from '../memory/db.js';
|
|
65
|
+
import { buildMemoryRecall } from '../memory/retriever.js';
|
|
66
|
+
import { conversations, memorySegments, messages } from '../memory/schema.js';
|
|
67
|
+
import type { AssistantConfig } from '../config/types.js';
|
|
68
|
+
|
|
69
|
+
function seedMemoryItems(conversationId: string, count: number, now: number): void {
|
|
70
|
+
const db = getDb();
|
|
71
|
+
db.insert(conversations).values({
|
|
72
|
+
id: conversationId,
|
|
73
|
+
title: null,
|
|
74
|
+
createdAt: now,
|
|
75
|
+
updatedAt: now,
|
|
76
|
+
totalInputTokens: 0,
|
|
77
|
+
totalOutputTokens: 0,
|
|
78
|
+
totalEstimatedCost: 0,
|
|
79
|
+
contextSummary: null,
|
|
80
|
+
contextCompactedMessageCount: 0,
|
|
81
|
+
contextCompactedAt: null,
|
|
82
|
+
}).run();
|
|
83
|
+
|
|
84
|
+
for (let i = 0; i < count; i++) {
|
|
85
|
+
const msgId = `msg-${conversationId}-${i}`;
|
|
86
|
+
const text = `Memory item ${i}: information about topic-${i % 20} including keyword-${i % 10} details.`;
|
|
87
|
+
db.insert(messages).values({
|
|
88
|
+
id: msgId,
|
|
89
|
+
conversationId,
|
|
90
|
+
role: i % 2 === 0 ? 'user' : 'assistant',
|
|
91
|
+
content: JSON.stringify([{ type: 'text', text }]),
|
|
92
|
+
createdAt: now + i,
|
|
93
|
+
}).run();
|
|
94
|
+
db.insert(memorySegments).values({
|
|
95
|
+
id: `seg-${conversationId}-${i}`,
|
|
96
|
+
messageId: msgId,
|
|
97
|
+
conversationId,
|
|
98
|
+
role: i % 2 === 0 ? 'user' : 'assistant',
|
|
99
|
+
segmentIndex: 0,
|
|
100
|
+
text,
|
|
101
|
+
tokenEstimate: 20,
|
|
102
|
+
scopeId: 'default',
|
|
103
|
+
createdAt: now + i,
|
|
104
|
+
updatedAt: now + i,
|
|
105
|
+
}).run();
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function makeConfig(overrides?: { maxInjectTokens?: number }): AssistantConfig {
|
|
110
|
+
return {
|
|
111
|
+
...DEFAULT_CONFIG,
|
|
112
|
+
memory: {
|
|
113
|
+
...DEFAULT_CONFIG.memory,
|
|
114
|
+
embeddings: {
|
|
115
|
+
...DEFAULT_CONFIG.memory.embeddings,
|
|
116
|
+
provider: 'local' as const,
|
|
117
|
+
required: false,
|
|
118
|
+
},
|
|
119
|
+
retrieval: {
|
|
120
|
+
...DEFAULT_CONFIG.memory.retrieval,
|
|
121
|
+
lexicalTopK: 50,
|
|
122
|
+
semanticTopK: 20,
|
|
123
|
+
maxInjectTokens: overrides?.maxInjectTokens ?? 750,
|
|
124
|
+
reranking: { ...DEFAULT_CONFIG.memory.retrieval.reranking, enabled: false },
|
|
125
|
+
dynamicBudget: {
|
|
126
|
+
enabled: false,
|
|
127
|
+
minInjectTokens: 160,
|
|
128
|
+
maxInjectTokens: overrides?.maxInjectTokens ?? 750,
|
|
129
|
+
targetHeadroomTokens: 900,
|
|
130
|
+
},
|
|
131
|
+
},
|
|
132
|
+
},
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
describe('Memory retrieval benchmark', () => {
|
|
137
|
+
beforeAll(() => {
|
|
138
|
+
initializeDb();
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
beforeEach(() => {
|
|
142
|
+
const db = getDb();
|
|
143
|
+
db.run('DELETE FROM memory_item_sources');
|
|
144
|
+
db.run('DELETE FROM memory_item_entities');
|
|
145
|
+
db.run('DELETE FROM memory_entity_relations');
|
|
146
|
+
db.run('DELETE FROM memory_entities');
|
|
147
|
+
db.run('DELETE FROM memory_embeddings');
|
|
148
|
+
db.run('DELETE FROM memory_summaries');
|
|
149
|
+
db.run('DELETE FROM memory_items');
|
|
150
|
+
db.run('DELETE FROM memory_segment_fts');
|
|
151
|
+
db.run('DELETE FROM memory_segments');
|
|
152
|
+
db.run('DELETE FROM messages');
|
|
153
|
+
db.run('DELETE FROM conversations');
|
|
154
|
+
db.run('DELETE FROM memory_jobs');
|
|
155
|
+
db.run('DELETE FROM memory_checkpoints');
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
afterAll(() => {
|
|
159
|
+
resetDb();
|
|
160
|
+
try {
|
|
161
|
+
rmSync(testDir, { recursive: true });
|
|
162
|
+
} catch {
|
|
163
|
+
// best effort cleanup
|
|
164
|
+
}
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
test('retrieval completes under 500ms for 100 items', async () => {
|
|
168
|
+
const conversationId = 'conv-bench-100';
|
|
169
|
+
const now = 1_700_500_000_000;
|
|
170
|
+
seedMemoryItems(conversationId, 100, now);
|
|
171
|
+
|
|
172
|
+
const config = makeConfig();
|
|
173
|
+
const recall = await buildMemoryRecall(
|
|
174
|
+
'What do we know about topic-5 and keyword-3?',
|
|
175
|
+
conversationId,
|
|
176
|
+
config,
|
|
177
|
+
);
|
|
178
|
+
|
|
179
|
+
expect(recall.enabled).toBe(true);
|
|
180
|
+
expect(recall.degraded).toBe(false);
|
|
181
|
+
expect(recall.lexicalHits).toBeGreaterThan(0);
|
|
182
|
+
expect(recall.selectedCount).toBeGreaterThan(0);
|
|
183
|
+
// Relaxed threshold — guards against severe regressions, not precise benchmarking
|
|
184
|
+
expect(recall.latencyMs).toBeLessThan(500);
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
test('retrieval completes under 1000ms for 500 items', async () => {
|
|
188
|
+
const conversationId = 'conv-bench-500';
|
|
189
|
+
const now = 1_700_500_000_000;
|
|
190
|
+
seedMemoryItems(conversationId, 500, now);
|
|
191
|
+
|
|
192
|
+
const config = makeConfig();
|
|
193
|
+
const recall = await buildMemoryRecall(
|
|
194
|
+
'What do we know about topic-5 and keyword-3?',
|
|
195
|
+
conversationId,
|
|
196
|
+
config,
|
|
197
|
+
);
|
|
198
|
+
|
|
199
|
+
expect(recall.enabled).toBe(true);
|
|
200
|
+
expect(recall.degraded).toBe(false);
|
|
201
|
+
expect(recall.lexicalHits).toBeGreaterThan(0);
|
|
202
|
+
expect(recall.selectedCount).toBeGreaterThan(0);
|
|
203
|
+
expect(recall.latencyMs).toBeLessThan(1000);
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
test('retrieval completes under 2000ms for 2000 items', async () => {
|
|
207
|
+
const conversationId = 'conv-bench-2000';
|
|
208
|
+
const now = 1_700_500_000_000;
|
|
209
|
+
seedMemoryItems(conversationId, 2000, now);
|
|
210
|
+
|
|
211
|
+
const config = makeConfig();
|
|
212
|
+
const recall = await buildMemoryRecall(
|
|
213
|
+
'What do we know about topic-5 and keyword-3?',
|
|
214
|
+
conversationId,
|
|
215
|
+
config,
|
|
216
|
+
);
|
|
217
|
+
|
|
218
|
+
expect(recall.enabled).toBe(true);
|
|
219
|
+
expect(recall.degraded).toBe(false);
|
|
220
|
+
expect(recall.lexicalHits).toBeGreaterThan(0);
|
|
221
|
+
expect(recall.selectedCount).toBeGreaterThan(0);
|
|
222
|
+
expect(recall.latencyMs).toBeLessThan(2000);
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
test('token budget enforcement: maxInjectTokens is respected', async () => {
|
|
226
|
+
const conversationId = 'conv-bench-budget';
|
|
227
|
+
const now = 1_700_500_000_000;
|
|
228
|
+
seedMemoryItems(conversationId, 500, now);
|
|
229
|
+
|
|
230
|
+
const smallBudget = 200;
|
|
231
|
+
const config = makeConfig({ maxInjectTokens: smallBudget });
|
|
232
|
+
const recall = await buildMemoryRecall(
|
|
233
|
+
'What do we know about topic-5 and keyword-3?',
|
|
234
|
+
conversationId,
|
|
235
|
+
config,
|
|
236
|
+
);
|
|
237
|
+
|
|
238
|
+
expect(recall.enabled).toBe(true);
|
|
239
|
+
expect(recall.injectedTokens).toBeLessThanOrEqual(smallBudget);
|
|
240
|
+
expect(recall.injectedTokens).toBeGreaterThan(0);
|
|
241
|
+
|
|
242
|
+
// Compare against a larger budget to verify the cap actually constrains
|
|
243
|
+
const largeBudget = 2000;
|
|
244
|
+
const largeConfig = makeConfig({ maxInjectTokens: largeBudget });
|
|
245
|
+
const largeRecall = await buildMemoryRecall(
|
|
246
|
+
'What do we know about topic-5 and keyword-3?',
|
|
247
|
+
conversationId,
|
|
248
|
+
largeConfig,
|
|
249
|
+
);
|
|
250
|
+
|
|
251
|
+
expect(largeRecall.injectedTokens).toBeLessThanOrEqual(largeBudget);
|
|
252
|
+
// With more budget, we should get at least as many tokens
|
|
253
|
+
expect(largeRecall.injectedTokens).toBeGreaterThanOrEqual(recall.injectedTokens);
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
test('early termination reduces latency when applicable', async () => {
|
|
257
|
+
const conversationId = 'conv-bench-et';
|
|
258
|
+
const now = 1_700_500_000_000;
|
|
259
|
+
// Seed enough items that early termination can trigger
|
|
260
|
+
seedMemoryItems(conversationId, 500, now);
|
|
261
|
+
|
|
262
|
+
// Config with early termination enabled and low thresholds to trigger it
|
|
263
|
+
const etConfig: AssistantConfig = {
|
|
264
|
+
...DEFAULT_CONFIG,
|
|
265
|
+
memory: {
|
|
266
|
+
...DEFAULT_CONFIG.memory,
|
|
267
|
+
embeddings: {
|
|
268
|
+
...DEFAULT_CONFIG.memory.embeddings,
|
|
269
|
+
provider: 'local' as const,
|
|
270
|
+
required: false,
|
|
271
|
+
},
|
|
272
|
+
retrieval: {
|
|
273
|
+
...DEFAULT_CONFIG.memory.retrieval,
|
|
274
|
+
lexicalTopK: 50,
|
|
275
|
+
semanticTopK: 20,
|
|
276
|
+
maxInjectTokens: 750,
|
|
277
|
+
reranking: { ...DEFAULT_CONFIG.memory.retrieval.reranking, enabled: false },
|
|
278
|
+
dynamicBudget: {
|
|
279
|
+
enabled: false,
|
|
280
|
+
minInjectTokens: 160,
|
|
281
|
+
maxInjectTokens: 750,
|
|
282
|
+
targetHeadroomTokens: 900,
|
|
283
|
+
},
|
|
284
|
+
earlyTermination: {
|
|
285
|
+
enabled: true,
|
|
286
|
+
minCandidates: 5,
|
|
287
|
+
minHighConfidence: 3,
|
|
288
|
+
confidenceThreshold: 0.3,
|
|
289
|
+
},
|
|
290
|
+
},
|
|
291
|
+
},
|
|
292
|
+
};
|
|
293
|
+
|
|
294
|
+
const recall = await buildMemoryRecall(
|
|
295
|
+
'What do we know about topic-5 and keyword-3?',
|
|
296
|
+
conversationId,
|
|
297
|
+
etConfig,
|
|
298
|
+
);
|
|
299
|
+
|
|
300
|
+
expect(recall.enabled).toBe(true);
|
|
301
|
+
expect(recall.earlyTerminated).toBe(true);
|
|
302
|
+
// Semantic search should be skipped when early termination fires
|
|
303
|
+
expect(recall.semanticHits).toBe(0);
|
|
304
|
+
expect(recall.selectedCount).toBeGreaterThan(0);
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
test('early termination is measurably faster than baseline', async () => {
|
|
308
|
+
const conversationId = 'conv-bench-et-delta';
|
|
309
|
+
const now = 1_700_500_000_000;
|
|
310
|
+
seedMemoryItems(conversationId, 500, now);
|
|
311
|
+
|
|
312
|
+
// Simulate the Qdrant network round-trip that ET is designed to skip.
|
|
313
|
+
// Use 100ms to dominate over variable CPU-bound work on slower hosts.
|
|
314
|
+
semanticSearchDelayMs = 100;
|
|
315
|
+
|
|
316
|
+
const query = 'What do we know about topic-5 and keyword-3?';
|
|
317
|
+
|
|
318
|
+
const etConfig: AssistantConfig = {
|
|
319
|
+
...DEFAULT_CONFIG,
|
|
320
|
+
memory: {
|
|
321
|
+
...DEFAULT_CONFIG.memory,
|
|
322
|
+
embeddings: {
|
|
323
|
+
...DEFAULT_CONFIG.memory.embeddings,
|
|
324
|
+
provider: 'local' as const,
|
|
325
|
+
required: false,
|
|
326
|
+
},
|
|
327
|
+
retrieval: {
|
|
328
|
+
...DEFAULT_CONFIG.memory.retrieval,
|
|
329
|
+
lexicalTopK: 50,
|
|
330
|
+
semanticTopK: 20,
|
|
331
|
+
maxInjectTokens: 750,
|
|
332
|
+
reranking: { ...DEFAULT_CONFIG.memory.retrieval.reranking, enabled: false },
|
|
333
|
+
dynamicBudget: {
|
|
334
|
+
enabled: false,
|
|
335
|
+
minInjectTokens: 160,
|
|
336
|
+
maxInjectTokens: 750,
|
|
337
|
+
targetHeadroomTokens: 900,
|
|
338
|
+
},
|
|
339
|
+
earlyTermination: {
|
|
340
|
+
enabled: true,
|
|
341
|
+
minCandidates: 5,
|
|
342
|
+
minHighConfidence: 3,
|
|
343
|
+
confidenceThreshold: 0.3,
|
|
344
|
+
},
|
|
345
|
+
},
|
|
346
|
+
},
|
|
347
|
+
};
|
|
348
|
+
|
|
349
|
+
const noEtConfig: AssistantConfig = {
|
|
350
|
+
...etConfig,
|
|
351
|
+
memory: {
|
|
352
|
+
...etConfig.memory,
|
|
353
|
+
retrieval: {
|
|
354
|
+
...etConfig.memory.retrieval,
|
|
355
|
+
earlyTermination: {
|
|
356
|
+
enabled: false,
|
|
357
|
+
minCandidates: 5,
|
|
358
|
+
minHighConfidence: 3,
|
|
359
|
+
confidenceThreshold: 0.3,
|
|
360
|
+
},
|
|
361
|
+
},
|
|
362
|
+
},
|
|
363
|
+
};
|
|
364
|
+
|
|
365
|
+
try {
|
|
366
|
+
// Warm up to avoid cold-start bias
|
|
367
|
+
await buildMemoryRecall(query, conversationId, etConfig);
|
|
368
|
+
await buildMemoryRecall(query, conversationId, noEtConfig);
|
|
369
|
+
|
|
370
|
+
const iterations = 5;
|
|
371
|
+
const etTimes: number[] = [];
|
|
372
|
+
const baselineTimes: number[] = [];
|
|
373
|
+
|
|
374
|
+
for (let i = 0; i < iterations; i++) {
|
|
375
|
+
const t0 = performance.now();
|
|
376
|
+
const etRecall = await buildMemoryRecall(query, conversationId, etConfig);
|
|
377
|
+
etTimes.push(performance.now() - t0);
|
|
378
|
+
expect(etRecall.earlyTerminated).toBe(true);
|
|
379
|
+
|
|
380
|
+
const t1 = performance.now();
|
|
381
|
+
const baselineRecall = await buildMemoryRecall(query, conversationId, noEtConfig);
|
|
382
|
+
baselineTimes.push(performance.now() - t1);
|
|
383
|
+
expect(baselineRecall.earlyTerminated).toBe(false);
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
etTimes.sort((a, b) => a - b);
|
|
387
|
+
baselineTimes.sort((a, b) => a - b);
|
|
388
|
+
const medianEt = etTimes[Math.floor(iterations / 2)];
|
|
389
|
+
const medianBaseline = baselineTimes[Math.floor(iterations / 2)];
|
|
390
|
+
|
|
391
|
+
// ET skips the mocked network delay, so it should be measurably faster.
|
|
392
|
+
// Use a 15% threshold to tolerate slower CI hosts where CPU-bound work
|
|
393
|
+
// takes longer relative to the fixed mock delay.
|
|
394
|
+
const speedup = 1 - medianEt / medianBaseline;
|
|
395
|
+
expect(speedup).toBeGreaterThanOrEqual(0.15);
|
|
396
|
+
} finally {
|
|
397
|
+
semanticSearchDelayMs = 0;
|
|
398
|
+
}
|
|
399
|
+
});
|
|
400
|
+
|
|
401
|
+
test('recall.latencyMs tracks wall-clock within 20% tolerance', async () => {
|
|
402
|
+
const conversationId = 'conv-bench-wallclock';
|
|
403
|
+
const now = 1_700_500_000_000;
|
|
404
|
+
seedMemoryItems(conversationId, 500, now);
|
|
405
|
+
|
|
406
|
+
const config = makeConfig();
|
|
407
|
+
|
|
408
|
+
// Use Date.now() to match the timer source used by buildMemoryRecall
|
|
409
|
+
// (which also uses Date.now()), avoiding precision mismatches between
|
|
410
|
+
// integer-ms Date.now() and sub-ms performance.now().
|
|
411
|
+
const wallStart = Date.now();
|
|
412
|
+
const recall = await buildMemoryRecall(
|
|
413
|
+
'What do we know about topic-5 and keyword-3?',
|
|
414
|
+
conversationId,
|
|
415
|
+
config,
|
|
416
|
+
);
|
|
417
|
+
const wallMs = Date.now() - wallStart;
|
|
418
|
+
|
|
419
|
+
expect(recall.enabled).toBe(true);
|
|
420
|
+
expect(recall.latencyMs).toBeGreaterThan(0);
|
|
421
|
+
|
|
422
|
+
// Self-reported latencyMs should agree with wall-clock within 50%.
|
|
423
|
+
// Tolerance is wide because both sides use Date.now() (integer ms),
|
|
424
|
+
// so on fast runs the quantization error can be large relative to
|
|
425
|
+
// total elapsed time.
|
|
426
|
+
const ratio = recall.latencyMs / Math.max(wallMs, 1);
|
|
427
|
+
expect(ratio).toBeGreaterThanOrEqual(0.5);
|
|
428
|
+
expect(ratio).toBeLessThanOrEqual(1.5);
|
|
429
|
+
});
|
|
430
|
+
});
|