@vellumai/assistant 0.4.16 → 0.4.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +6 -6
- package/README.md +1 -2
- package/eslint.config.mjs +2 -2
- package/package.json +1 -1
- package/src/__tests__/access-request-decision.test.ts +128 -120
- package/src/__tests__/account-registry.test.ts +121 -110
- package/src/__tests__/active-skill-tools.test.ts +200 -172
- package/src/__tests__/actor-token-service.test.ts +341 -274
- package/src/__tests__/agent-loop-thinking.test.ts +28 -19
- package/src/__tests__/agent-loop.test.ts +798 -378
- package/src/__tests__/anthropic-provider.test.ts +405 -247
- package/src/__tests__/app-builder-tool-scripts.test.ts +97 -97
- package/src/__tests__/app-bundler.test.ts +112 -79
- package/src/__tests__/app-executors.test.ts +205 -178
- package/src/__tests__/app-git-history.test.ts +90 -73
- package/src/__tests__/app-git-service.test.ts +67 -53
- package/src/__tests__/app-open-proxy.test.ts +29 -25
- package/src/__tests__/approval-conversation-turn.test.ts +100 -81
- package/src/__tests__/approval-hardcoded-copy-guard.test.ts +45 -17
- package/src/__tests__/approval-message-composer.test.ts +119 -119
- package/src/__tests__/approval-primitive.test.ts +264 -233
- package/src/__tests__/approval-routes-http.test.ts +4 -3
- package/src/__tests__/asset-materialize-tool.test.ts +250 -178
- package/src/__tests__/asset-search-tool.test.ts +251 -191
- package/src/__tests__/assistant-attachment-directive.test.ts +187 -142
- package/src/__tests__/assistant-attachments.test.ts +254 -186
- package/src/__tests__/assistant-event-hub.test.ts +105 -63
- package/src/__tests__/assistant-event.test.ts +66 -58
- package/src/__tests__/assistant-events-sse-hardening.test.ts +113 -73
- package/src/__tests__/assistant-feature-flag-guard.test.ts +78 -52
- package/src/__tests__/assistant-feature-flag-guardrails.test.ts +48 -45
- package/src/__tests__/assistant-feature-flags-integration.test.ts +118 -77
- package/src/__tests__/assistant-id-boundary-guard.test.ts +158 -104
- package/src/__tests__/attachments-store.test.ts +240 -183
- package/src/__tests__/attachments.test.ts +70 -62
- package/src/__tests__/audit-log-rotation.test.ts +50 -35
- package/src/__tests__/browser-fill-credential.test.ts +169 -101
- package/src/__tests__/browser-manager.test.ts +97 -75
- package/src/__tests__/browser-runtime-check.test.ts +16 -15
- package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +12 -10
- package/src/__tests__/browser-skill-endstate.test.ts +97 -72
- package/src/__tests__/bundle-scanner.test.ts +47 -22
- package/src/__tests__/bundled-asset.test.ts +74 -47
- package/src/__tests__/call-constants.test.ts +19 -19
- package/src/__tests__/call-controller.test.ts +1073 -751
- package/src/__tests__/call-conversation-messages.test.ts +90 -65
- package/src/__tests__/call-domain.test.ts +149 -121
- package/src/__tests__/call-pointer-message-composer.test.ts +113 -83
- package/src/__tests__/call-pointer-messages.test.ts +213 -154
- package/src/__tests__/call-pointer-no-hardcoded-copy.guard.test.ts +9 -10
- package/src/__tests__/call-recovery.test.ts +232 -212
- package/src/__tests__/call-routes-http.test.ts +328 -279
- package/src/__tests__/call-start-guardian-guard.test.ts +32 -30
- package/src/__tests__/call-state-machine.test.ts +62 -51
- package/src/__tests__/call-state.test.ts +89 -75
- package/src/__tests__/call-store.test.ts +387 -316
- package/src/__tests__/callback-handoff-copy.test.ts +84 -82
- package/src/__tests__/canonical-guardian-store.test.ts +331 -280
- package/src/__tests__/channel-approval-routes.test.ts +1643 -1126
- package/src/__tests__/channel-approval.test.ts +139 -137
- package/src/__tests__/channel-approvals.test.ts +226 -182
- package/src/__tests__/channel-delivery-store.test.ts +232 -194
- package/src/__tests__/channel-guardian.test.ts +6 -3
- package/src/__tests__/channel-invite-transport.test.ts +107 -92
- package/src/__tests__/channel-policy.test.ts +42 -38
- package/src/__tests__/channel-readiness-service.test.ts +119 -102
- package/src/__tests__/channel-reply-delivery.test.ts +147 -118
- package/src/__tests__/channel-retry-sweep.test.ts +153 -110
- package/src/__tests__/checker.test.ts +3309 -1850
- package/src/__tests__/clarification-resolver.test.ts +91 -79
- package/src/__tests__/classifier.test.ts +64 -54
- package/src/__tests__/claude-code-skill-regression.test.ts +42 -37
- package/src/__tests__/claude-code-tool-profiles.test.ts +31 -29
- package/src/__tests__/clawhub.test.ts +92 -82
- package/src/__tests__/cli.test.ts +30 -30
- package/src/__tests__/clipboard.test.ts +53 -46
- package/src/__tests__/commit-guarantee.test.ts +59 -52
- package/src/__tests__/commit-message-enrichment-service.test.ts +203 -75
- package/src/__tests__/compaction.benchmark.test.ts +33 -31
- package/src/__tests__/computer-use-session-compaction.test.ts +60 -50
- package/src/__tests__/computer-use-session-lifecycle.test.ts +145 -117
- package/src/__tests__/computer-use-session-working-dir.test.ts +62 -48
- package/src/__tests__/computer-use-skill-baseline.test.ts +22 -19
- package/src/__tests__/computer-use-skill-endstate.test.ts +45 -31
- package/src/__tests__/computer-use-skill-lifecycle-cleanup.test.ts +121 -88
- package/src/__tests__/computer-use-skill-manifest-regression.test.ts +65 -42
- package/src/__tests__/computer-use-skill-proxy-bridge.test.ts +33 -18
- package/src/__tests__/computer-use-tools.test.ts +121 -98
- package/src/__tests__/config-schema.test.ts +443 -347
- package/src/__tests__/config-watcher.test.ts +96 -81
- package/src/__tests__/confirmation-request-guardian-bridge.test.ts +148 -133
- package/src/__tests__/conflict-intent-tokenization.test.ts +96 -78
- package/src/__tests__/conflict-policy.test.ts +151 -80
- package/src/__tests__/conflict-store.test.ts +203 -157
- package/src/__tests__/connection-policy.test.ts +89 -59
- package/src/__tests__/contacts-tools.test.ts +247 -178
- package/src/__tests__/context-memory-e2e.test.ts +306 -214
- package/src/__tests__/context-token-estimator.test.ts +114 -74
- package/src/__tests__/context-window-manager.test.ts +269 -167
- package/src/__tests__/contradiction-checker.test.ts +161 -135
- package/src/__tests__/conversation-attention-store.test.ts +350 -290
- package/src/__tests__/conversation-attention-telegram.test.ts +156 -114
- package/src/__tests__/conversation-pairing.test.ts +220 -113
- package/src/__tests__/conversation-routes-guardian-reply.test.ts +164 -104
- package/src/__tests__/conversation-routes.test.ts +71 -41
- package/src/__tests__/conversation-store.test.ts +390 -235
- package/src/__tests__/credential-broker-browser-fill.test.ts +325 -250
- package/src/__tests__/credential-broker-server-use.test.ts +283 -243
- package/src/__tests__/credential-broker.test.ts +128 -74
- package/src/__tests__/credential-host-pattern-match.test.ts +64 -44
- package/src/__tests__/credential-metadata-store.test.ts +360 -311
- package/src/__tests__/credential-policy-validate.test.ts +81 -65
- package/src/__tests__/credential-resolve.test.ts +212 -145
- package/src/__tests__/credential-security-e2e.test.ts +144 -103
- package/src/__tests__/credential-security-invariants.test.ts +253 -208
- package/src/__tests__/credential-selection.test.ts +254 -146
- package/src/__tests__/credential-vault-unit.test.ts +531 -341
- package/src/__tests__/credential-vault.test.ts +761 -484
- package/src/__tests__/daemon-assistant-events.test.ts +91 -66
- package/src/__tests__/daemon-lifecycle.test.ts +258 -190
- package/src/__tests__/daemon-server-session-init.test.ts +257 -191
- package/src/__tests__/date-context.test.ts +314 -249
- package/src/__tests__/db-migration-rollback.test.ts +259 -130
- package/src/__tests__/db-schedule-syntax-migration.test.ts +78 -41
- package/src/__tests__/delete-managed-skill-tool.test.ts +77 -53
- package/src/__tests__/deterministic-verification-control-plane.test.ts +183 -135
- package/src/__tests__/dictation-mode-detection.test.ts +77 -55
- package/src/__tests__/dictation-profile-store.test.ts +70 -56
- package/src/__tests__/dictation-text-processing.test.ts +53 -35
- package/src/__tests__/diff.test.ts +102 -98
- package/src/__tests__/domain-normalize.test.ts +54 -54
- package/src/__tests__/domain-policy.test.ts +71 -55
- package/src/__tests__/dynamic-page-surface.test.ts +31 -33
- package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +69 -69
- package/src/__tests__/edit-engine.test.ts +56 -56
- package/src/__tests__/elevenlabs-client.test.ts +117 -91
- package/src/__tests__/elevenlabs-config.test.ts +32 -31
- package/src/__tests__/email-classifier.test.ts +15 -12
- package/src/__tests__/email-cli.test.ts +121 -108
- package/src/__tests__/emit-signal-routing-intent.test.ts +76 -69
- package/src/__tests__/encrypted-store.test.ts +180 -154
- package/src/__tests__/entity-extractor.test.ts +108 -87
- package/src/__tests__/entity-search.test.ts +664 -258
- package/src/__tests__/ephemeral-permissions.test.ts +224 -188
- package/src/__tests__/event-bus.test.ts +81 -77
- package/src/__tests__/extract-email.test.ts +51 -0
- package/src/__tests__/file-edit-tool.test.ts +62 -44
- package/src/__tests__/file-ops-service.test.ts +131 -114
- package/src/__tests__/file-read-tool.test.ts +48 -31
- package/src/__tests__/file-write-tool.test.ts +43 -37
- package/src/__tests__/filesystem-tools.test.ts +238 -209
- package/src/__tests__/followup-tools.test.ts +237 -162
- package/src/__tests__/forbidden-legacy-symbols.test.ts +19 -20
- package/src/__tests__/frontmatter.test.ts +96 -81
- package/src/__tests__/fuzzy-match-property.test.ts +75 -81
- package/src/__tests__/fuzzy-match.test.ts +71 -65
- package/src/__tests__/gateway-client-managed-outbound.test.ts +76 -57
- package/src/__tests__/gateway-only-enforcement.test.ts +467 -369
- package/src/__tests__/gateway-only-guard.test.ts +54 -56
- package/src/__tests__/gemini-image-service.test.ts +113 -100
- package/src/__tests__/gemini-provider.test.ts +297 -220
- package/src/__tests__/get-weather.test.ts +188 -114
- package/src/__tests__/gmail-integration.test.ts +47 -46
- package/src/__tests__/guardian-action-conversation-turn.test.ts +226 -171
- package/src/__tests__/guardian-action-copy-generator.test.ts +111 -93
- package/src/__tests__/guardian-action-followup-executor.test.ts +215 -151
- package/src/__tests__/guardian-action-followup-store.test.ts +199 -167
- package/src/__tests__/guardian-action-grant-mint-consume.test.ts +297 -250
- package/src/__tests__/guardian-action-late-reply.test.ts +462 -316
- package/src/__tests__/guardian-action-no-hardcoded-copy.test.ts +23 -18
- package/src/__tests__/guardian-action-store.test.ts +158 -109
- package/src/__tests__/guardian-action-sweep.test.ts +114 -100
- package/src/__tests__/guardian-actions-endpoint.test.ts +440 -256
- package/src/__tests__/guardian-control-plane-policy.test.ts +497 -331
- package/src/__tests__/guardian-decision-primitive-canonical.test.ts +217 -215
- package/src/__tests__/guardian-dispatch.test.ts +316 -256
- package/src/__tests__/guardian-grant-minting.test.ts +247 -178
- package/src/__tests__/guardian-outbound-http.test.ts +337 -209
- package/src/__tests__/guardian-principal-id-roundtrip.test.ts +99 -96
- package/src/__tests__/guardian-question-copy.test.ts +17 -17
- package/src/__tests__/guardian-question-mode.test.ts +134 -100
- package/src/__tests__/guardian-routing-invariants.test.ts +679 -613
- package/src/__tests__/guardian-routing-state.test.ts +256 -209
- package/src/__tests__/guardian-verification-intent-routing.test.ts +94 -88
- package/src/__tests__/guardian-verification-voice-binding.test.ts +47 -41
- package/src/__tests__/guardian-verify-setup-skill-regression.test.ts +0 -1
- package/src/__tests__/handle-user-message-secret-resume.test.ts +43 -21
- package/src/__tests__/handlers-add-trust-rule-metadata.test.ts +92 -76
- package/src/__tests__/handlers-cu-observation-blob.test.ts +103 -70
- package/src/__tests__/handlers-ipc-blob-probe.test.ts +77 -51
- package/src/__tests__/handlers-slack-config.test.ts +63 -54
- package/src/__tests__/handlers-task-submit-slash.test.ts +18 -18
- package/src/__tests__/handlers-telegram-config.test.ts +662 -329
- package/src/__tests__/handlers-twitter-config.test.ts +525 -298
- package/src/__tests__/handlers-user-message-approval-consumption.test.ts +270 -195
- package/src/__tests__/headless-browser-interactions.test.ts +444 -280
- package/src/__tests__/headless-browser-navigate.test.ts +116 -79
- package/src/__tests__/headless-browser-read-tools.test.ts +123 -86
- package/src/__tests__/headless-browser-snapshot.test.ts +71 -56
- package/src/__tests__/heartbeat-service.test.ts +76 -58
- package/src/__tests__/history-repair-observability.test.ts +14 -14
- package/src/__tests__/history-repair.test.ts +171 -167
- package/src/__tests__/home-base-bootstrap.test.ts +30 -27
- package/src/__tests__/hooks-blocking.test.ts +86 -37
- package/src/__tests__/hooks-cli.test.ts +104 -68
- package/src/__tests__/hooks-config.test.ts +81 -43
- package/src/__tests__/hooks-discovery.test.ts +106 -96
- package/src/__tests__/hooks-integration.test.ts +78 -72
- package/src/__tests__/hooks-manager.test.ts +99 -61
- package/src/__tests__/hooks-runner.test.ts +94 -71
- package/src/__tests__/hooks-settings.test.ts +69 -64
- package/src/__tests__/hooks-templates.test.ts +85 -54
- package/src/__tests__/hooks-ts-runner.test.ts +82 -45
- package/src/__tests__/hooks-watch.test.ts +32 -22
- package/src/__tests__/host-file-edit-tool.test.ts +190 -148
- package/src/__tests__/host-file-read-tool.test.ts +86 -63
- package/src/__tests__/host-file-write-tool.test.ts +98 -64
- package/src/__tests__/host-shell-tool.test.ts +342 -233
- package/src/__tests__/inbound-invite-redemption.test.ts +194 -152
- package/src/__tests__/ingress-member-store.test.ts +163 -159
- package/src/__tests__/ingress-reconcile.test.ts +183 -142
- package/src/__tests__/ingress-routes-http.test.ts +441 -356
- package/src/__tests__/ingress-url-consistency.test.ts +125 -64
- package/src/__tests__/integration-status.test.ts +93 -73
- package/src/__tests__/intent-routing.test.ts +148 -118
- package/src/__tests__/invite-redemption-service.test.ts +163 -121
- package/src/__tests__/ipc-blob-store.test.ts +104 -91
- package/src/__tests__/ipc-contract-inventory.test.ts +27 -15
- package/src/__tests__/ipc-contract.test.ts +24 -23
- package/src/__tests__/ipc-protocol.test.ts +52 -46
- package/src/__tests__/ipc-roundtrip.benchmark.test.ts +61 -50
- package/src/__tests__/ipc-snapshot.test.ts +1135 -1056
- package/src/__tests__/ipc-validate.test.ts +240 -179
- package/src/__tests__/key-migration.test.ts +123 -90
- package/src/__tests__/keychain.test.ts +150 -123
- package/src/__tests__/lifecycle-docs-guard.test.ts +65 -64
- package/src/__tests__/llm-usage-store.test.ts +112 -87
- package/src/__tests__/managed-skill-lifecycle.test.ts +147 -108
- package/src/__tests__/managed-store.test.ts +411 -360
- package/src/__tests__/mcp-cli.test.ts +189 -123
- package/src/__tests__/mcp-health-check.test.ts +26 -21
- package/src/__tests__/media-generate-image.test.ts +122 -99
- package/src/__tests__/media-reuse-story.e2e.test.ts +282 -214
- package/src/__tests__/media-visibility-policy.test.ts +86 -38
- package/src/__tests__/memory-context-benchmark.benchmark.test.ts +146 -100
- package/src/__tests__/memory-lifecycle-e2e.test.ts +385 -297
- package/src/__tests__/memory-query-builder.test.ts +32 -33
- package/src/__tests__/memory-recall-quality.test.ts +761 -407
- package/src/__tests__/memory-regressions.experimental.test.ts +443 -380
- package/src/__tests__/memory-regressions.test.ts +3725 -2642
- package/src/__tests__/memory-retrieval-budget.test.ts +7 -8
- package/src/__tests__/memory-retrieval.benchmark.test.ts +144 -109
- package/src/__tests__/memory-upsert-concurrency.test.ts +292 -201
- package/src/__tests__/messaging-send-tool.test.ts +36 -29
- package/src/__tests__/migration-cli-flows.test.ts +69 -53
- package/src/__tests__/migration-ordering.test.ts +103 -86
- package/src/__tests__/mime-builder.test.ts +55 -32
- package/src/__tests__/mock-signup-server.test.ts +384 -246
- package/src/__tests__/model-intents.test.ts +61 -37
- package/src/__tests__/no-direct-anthropic-sdk-imports.test.ts +9 -12
- package/src/__tests__/no-is-trusted-guard.test.ts +24 -21
- package/src/__tests__/non-member-access-request.test.ts +294 -249
- package/src/__tests__/notification-broadcaster.test.ts +99 -81
- package/src/__tests__/notification-decision-fallback.test.ts +223 -178
- package/src/__tests__/notification-decision-strategy.test.ts +375 -337
- package/src/__tests__/notification-deep-link.test.ts +67 -61
- package/src/__tests__/notification-guardian-path.test.ts +248 -206
- package/src/__tests__/notification-routing-intent.test.ts +166 -93
- package/src/__tests__/notification-telegram-adapter.test.ts +60 -46
- package/src/__tests__/notification-thread-candidate-validation.test.ts +78 -75
- package/src/__tests__/notification-thread-candidates.test.ts +64 -61
- package/src/__tests__/oauth-callback-registry.test.ts +40 -30
- package/src/__tests__/oauth-connect-handler.test.ts +109 -89
- package/src/__tests__/oauth-scope-policy.test.ts +63 -55
- package/src/__tests__/oauth2-gateway-transport.test.ts +252 -174
- package/src/__tests__/onboarding-starter-tasks.test.ts +93 -89
- package/src/__tests__/onboarding-template-contract.test.ts +93 -94
- package/src/__tests__/openai-provider.test.ts +366 -274
- package/src/__tests__/pairing-concurrent.test.ts +18 -12
- package/src/__tests__/pairing-routes.test.ts +45 -41
- package/src/__tests__/parallel-tool.benchmark.test.ts +108 -58
- package/src/__tests__/parser.test.ts +316 -226
- package/src/__tests__/path-classifier.test.ts +24 -25
- package/src/__tests__/path-policy.test.ts +187 -147
- package/src/__tests__/phone.test.ts +36 -36
- package/src/__tests__/platform-move-helper.test.ts +48 -40
- package/src/__tests__/platform-socket-path.test.ts +23 -24
- package/src/__tests__/platform-workspace-migration.test.ts +464 -414
- package/src/__tests__/platform.test.ts +61 -53
- package/src/__tests__/playbook-execution.test.ts +397 -265
- package/src/__tests__/playbook-tools.test.ts +267 -196
- package/src/__tests__/prebuilt-home-base-seed.test.ts +30 -27
- package/src/__tests__/pricing.test.ts +316 -136
- package/src/__tests__/profile-compiler.test.ts +206 -188
- package/src/__tests__/provider-commit-message-generator.test.ts +114 -106
- package/src/__tests__/provider-error-scenarios.test.ts +212 -158
- package/src/__tests__/provider-fail-open-selection.test.ts +51 -44
- package/src/__tests__/provider-registry-ollama.test.ts +13 -9
- package/src/__tests__/provider-streaming.benchmark.test.ts +232 -183
- package/src/__tests__/proxy-approval-callback.test.ts +180 -119
- package/src/__tests__/public-ingress-urls.test.ts +112 -94
- package/src/__tests__/qdrant-manager.test.ts +147 -98
- package/src/__tests__/ratelimit.test.ts +152 -82
- package/src/__tests__/recording-handler.test.ts +273 -151
- package/src/__tests__/recording-intent-fallback.test.ts +94 -75
- package/src/__tests__/recording-intent-handler.test.ts +422 -292
- package/src/__tests__/recording-intent.test.ts +578 -379
- package/src/__tests__/recording-state-machine.test.ts +530 -316
- package/src/__tests__/recurrence-engine-rruleset.test.ts +150 -92
- package/src/__tests__/recurrence-engine.test.ts +81 -41
- package/src/__tests__/recurrence-types.test.ts +63 -44
- package/src/__tests__/relay-server.test.ts +2131 -1602
- package/src/__tests__/reminder-store.test.ts +158 -80
- package/src/__tests__/reminder.test.ts +113 -109
- package/src/__tests__/remote-skill-policy.test.ts +96 -72
- package/src/__tests__/request-file-tool.test.ts +74 -67
- package/src/__tests__/response-tier.test.ts +131 -74
- package/src/__tests__/runtime-attachment-metadata.test.ts +107 -70
- package/src/__tests__/runtime-events-sse-parity.test.ts +167 -145
- package/src/__tests__/runtime-events-sse.test.ts +67 -51
- package/src/__tests__/sandbox-diagnostics.test.ts +66 -56
- package/src/__tests__/sandbox-host-parity.test.ts +377 -301
- package/src/__tests__/scaffold-managed-skill-tool.test.ts +213 -161
- package/src/__tests__/schedule-store.test.ts +268 -205
- package/src/__tests__/schedule-tools.test.ts +702 -524
- package/src/__tests__/scheduler-recurrence.test.ts +240 -130
- package/src/__tests__/scoped-approval-grants.test.ts +258 -168
- package/src/__tests__/scoped-grant-security-matrix.test.ts +160 -146
- package/src/__tests__/script-proxy-certs.test.ts +38 -35
- package/src/__tests__/script-proxy-connect-tunnel.test.ts +71 -46
- package/src/__tests__/script-proxy-decision-trace.test.ts +161 -84
- package/src/__tests__/script-proxy-http-forwarder.test.ts +146 -129
- package/src/__tests__/script-proxy-injection-runtime.test.ts +139 -113
- package/src/__tests__/script-proxy-mitm-handler.test.ts +226 -142
- package/src/__tests__/script-proxy-policy-runtime.test.ts +126 -86
- package/src/__tests__/script-proxy-policy.test.ts +308 -153
- package/src/__tests__/script-proxy-rewrite-specificity.test.ts +74 -62
- package/src/__tests__/script-proxy-router.test.ts +111 -77
- package/src/__tests__/script-proxy-session-manager.test.ts +156 -113
- package/src/__tests__/script-proxy-session-runtime.test.ts +28 -24
- package/src/__tests__/secret-allowlist.test.ts +105 -90
- package/src/__tests__/secret-ingress-handler.test.ts +41 -30
- package/src/__tests__/secret-onetime-send.test.ts +67 -50
- package/src/__tests__/secret-prompt-log-hygiene.test.ts +35 -31
- package/src/__tests__/secret-response-routing.test.ts +50 -41
- package/src/__tests__/secret-scanner-executor.test.ts +152 -111
- package/src/__tests__/secret-scanner.test.ts +495 -413
- package/src/__tests__/secure-keys.test.ts +132 -121
- package/src/__tests__/send-endpoint-busy.test.ts +313 -232
- package/src/__tests__/send-notification-tool.test.ts +43 -42
- package/src/__tests__/sensitive-output-placeholders.test.ts +72 -64
- package/src/__tests__/sequence-store.test.ts +335 -167
- package/src/__tests__/server-history-render.test.ts +341 -202
- package/src/__tests__/session-abort-tool-results.test.ts +133 -70
- package/src/__tests__/session-approval-overrides.test.ts +93 -91
- package/src/__tests__/session-confirmation-signals.test.ts +252 -160
- package/src/__tests__/session-conflict-gate.test.ts +775 -585
- package/src/__tests__/session-error.test.ts +222 -191
- package/src/__tests__/session-evictor.test.ts +79 -62
- package/src/__tests__/session-init.benchmark.test.ts +170 -108
- package/src/__tests__/session-load-history-repair.test.ts +273 -139
- package/src/__tests__/session-messaging-secret-redirect.test.ts +130 -90
- package/src/__tests__/session-pre-run-repair.test.ts +106 -59
- package/src/__tests__/session-profile-injection.test.ts +198 -130
- package/src/__tests__/session-provider-retry-repair.test.ts +223 -141
- package/src/__tests__/session-queue.test.ts +624 -321
- package/src/__tests__/session-runtime-assembly.test.ts +425 -329
- package/src/__tests__/session-runtime-workspace.test.ts +69 -61
- package/src/__tests__/session-skill-tools.test.ts +973 -678
- package/src/__tests__/session-slash-known.test.ts +185 -133
- package/src/__tests__/session-slash-queue.test.ts +147 -81
- package/src/__tests__/session-slash-unknown.test.ts +135 -90
- package/src/__tests__/session-surfaces-task-progress.test.ts +122 -87
- package/src/__tests__/session-tool-setup-app-refresh.test.ts +338 -177
- package/src/__tests__/session-tool-setup-memory-scope.test.ts +63 -40
- package/src/__tests__/session-tool-setup-side-effect-flag.test.ts +60 -37
- package/src/__tests__/session-tool-setup-tools-disabled.test.ts +28 -26
- package/src/__tests__/session-undo.test.ts +43 -30
- package/src/__tests__/session-workspace-cache-state.test.ts +108 -67
- package/src/__tests__/session-workspace-injection.test.ts +245 -117
- package/src/__tests__/session-workspace-tool-tracking.test.ts +260 -93
- package/src/__tests__/shared-filesystem-errors.test.ts +47 -47
- package/src/__tests__/shell-credential-ref.test.ts +126 -90
- package/src/__tests__/shell-identity.test.ts +134 -111
- package/src/__tests__/shell-parser-fuzz.test.ts +263 -179
- package/src/__tests__/shell-parser-property.test.ts +435 -288
- package/src/__tests__/shell-tool-proxy-mode.test.ts +142 -70
- package/src/__tests__/size-guard.test.ts +42 -44
- package/src/__tests__/skill-feature-flags-integration.test.ts +79 -52
- package/src/__tests__/skill-feature-flags.test.ts +75 -47
- package/src/__tests__/skill-include-graph.test.ts +143 -148
- package/src/__tests__/skill-load-feature-flag.test.ts +94 -59
- package/src/__tests__/skill-load-tool.test.ts +371 -199
- package/src/__tests__/skill-projection-feature-flag.test.ts +131 -88
- package/src/__tests__/skill-projection.benchmark.test.ts +93 -65
- package/src/__tests__/skill-script-runner-host.test.ts +460 -250
- package/src/__tests__/skill-script-runner-sandbox.test.ts +168 -108
- package/src/__tests__/skill-script-runner.test.ts +115 -74
- package/src/__tests__/skill-tool-factory.test.ts +140 -96
- package/src/__tests__/skill-tool-manifest.test.ts +306 -210
- package/src/__tests__/skill-version-hash.test.ts +70 -56
- package/src/__tests__/skills.test.ts +0 -1
- package/src/__tests__/slack-channel-config.test.ts +127 -84
- package/src/__tests__/slack-skill.test.ts +60 -47
- package/src/__tests__/slash-commands-catalog.test.ts +37 -31
- package/src/__tests__/slash-commands-parser.test.ts +71 -64
- package/src/__tests__/slash-commands-resolver.test.ts +143 -107
- package/src/__tests__/slash-commands-rewrite.test.ts +22 -22
- package/src/__tests__/sms-messaging-provider.test.ts +74 -47
- package/src/__tests__/speaker-identification.test.ts +28 -25
- package/src/__tests__/starter-bundle.test.ts +27 -23
- package/src/__tests__/starter-task-flow.test.ts +67 -52
- package/src/__tests__/subagent-manager-notify.test.ts +154 -108
- package/src/__tests__/subagent-tools.test.ts +311 -270
- package/src/__tests__/subagent-types.test.ts +40 -40
- package/src/__tests__/surface-mutex-cleanup.test.ts +42 -30
- package/src/__tests__/swarm-dag-pathological.test.ts +122 -111
- package/src/__tests__/swarm-orchestrator.test.ts +135 -101
- package/src/__tests__/swarm-plan-validator.test.ts +125 -73
- package/src/__tests__/swarm-recursion.test.ts +58 -46
- package/src/__tests__/swarm-router-planner.test.ts +99 -74
- package/src/__tests__/swarm-session-integration.test.ts +148 -91
- package/src/__tests__/swarm-tool.test.ts +65 -45
- package/src/__tests__/swarm-worker-backend.test.ts +59 -45
- package/src/__tests__/swarm-worker-runner.test.ts +133 -118
- package/src/__tests__/system-prompt.test.ts +290 -256
- package/src/__tests__/task-compiler.test.ts +176 -120
- package/src/__tests__/task-management-tools.test.ts +561 -456
- package/src/__tests__/task-memory-cleanup.test.ts +627 -362
- package/src/__tests__/task-runner.test.ts +117 -94
- package/src/__tests__/task-scheduler.test.ts +113 -84
- package/src/__tests__/task-tools.test.ts +349 -264
- package/src/__tests__/terminal-sandbox.test.ts +138 -108
- package/src/__tests__/terminal-tools.test.ts +350 -305
- package/src/__tests__/thread-seed-composer.test.ts +307 -180
- package/src/__tests__/tool-approval-handler.test.ts +238 -137
- package/src/__tests__/tool-audit-listener.test.ts +69 -69
- package/src/__tests__/tool-domain-event-publisher.test.ts +142 -132
- package/src/__tests__/tool-execution-abort-cleanup.test.ts +153 -146
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +136 -105
- package/src/__tests__/tool-executor-lifecycle-events.test.ts +355 -239
- package/src/__tests__/tool-executor-redaction.test.ts +112 -109
- package/src/__tests__/tool-executor-shell-integration.test.ts +130 -79
- package/src/__tests__/tool-executor.test.ts +1274 -674
- package/src/__tests__/tool-grant-request-escalation.test.ts +401 -283
- package/src/__tests__/tool-metrics-listener.test.ts +97 -85
- package/src/__tests__/tool-notification-listener.test.ts +42 -25
- package/src/__tests__/tool-permission-simulate-handler.test.ts +137 -113
- package/src/__tests__/tool-policy.test.ts +44 -25
- package/src/__tests__/tool-profiling-listener.test.ts +99 -93
- package/src/__tests__/tool-result-truncation.test.ts +5 -4
- package/src/__tests__/tool-trace-listener.test.ts +131 -111
- package/src/__tests__/top-level-renderer.test.ts +62 -58
- package/src/__tests__/top-level-scanner.test.ts +68 -64
- package/src/__tests__/trace-emitter.test.ts +56 -56
- package/src/__tests__/trust-context-guards.test.ts +65 -65
- package/src/__tests__/trust-store.test.ts +1239 -806
- package/src/__tests__/trusted-contact-approval-notifier.test.ts +339 -275
- package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +484 -373
- package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +264 -241
- package/src/__tests__/trusted-contact-multichannel.test.ts +182 -142
- package/src/__tests__/trusted-contact-verification.test.ts +251 -231
- package/src/__tests__/turn-commit.test.ts +259 -200
- package/src/__tests__/twilio-config.test.ts +49 -41
- package/src/__tests__/twilio-provider.test.ts +140 -126
- package/src/__tests__/twilio-rest.test.ts +22 -18
- package/src/__tests__/twilio-routes-elevenlabs.test.ts +188 -162
- package/src/__tests__/twilio-routes-twiml.test.ts +55 -55
- package/src/__tests__/twilio-routes.test.ts +389 -281
- package/src/__tests__/twitter-auth-handler.test.ts +184 -139
- package/src/__tests__/twitter-cli-error-shaping.test.ts +88 -73
- package/src/__tests__/twitter-cli-routing.test.ts +146 -99
- package/src/__tests__/twitter-oauth-client.test.ts +82 -65
- package/src/__tests__/update-bulletin-format.test.ts +69 -66
- package/src/__tests__/update-bulletin-state.test.ts +66 -60
- package/src/__tests__/update-bulletin.test.ts +150 -114
- package/src/__tests__/update-template-contract.test.ts +15 -10
- package/src/__tests__/url-safety.test.ts +288 -265
- package/src/__tests__/user-reference.test.ts +32 -32
- package/src/__tests__/view-image-tool.test.ts +118 -96
- package/src/__tests__/voice-invite-redemption.test.ts +111 -106
- package/src/__tests__/voice-quality.test.ts +117 -102
- package/src/__tests__/voice-scoped-grant-consumer.test.ts +204 -146
- package/src/__tests__/voice-session-bridge.test.ts +351 -216
- package/src/__tests__/weather-skill-regression.test.ts +170 -120
- package/src/__tests__/web-fetch.test.ts +664 -526
- package/src/__tests__/web-search.test.ts +379 -213
- package/src/__tests__/work-item-output.test.ts +90 -53
- package/src/__tests__/workspace-git-service.test.ts +437 -356
- package/src/__tests__/workspace-heartbeat-service.test.ts +125 -91
- package/src/__tests__/workspace-lifecycle.test.ts +98 -64
- package/src/__tests__/workspace-policy.test.ts +139 -71
- package/src/commands/__tests__/cc-command-registry.test.ts +142 -134
- package/src/config/__tests__/feature-flag-registry-guard.test.ts +48 -39
- package/src/config/bundled-skills/chatgpt-import/tools/chatgpt-import.ts +44 -4
- package/src/config/bundled-skills/doordash/__tests__/doordash-session.test.ts +0 -1
- package/src/config/bundled-skills/messaging/SKILL.md +9 -7
- package/src/config/bundled-skills/messaging/tools/gmail-outreach-scan.ts +15 -5
- package/src/config/bundled-skills/messaging/tools/gmail-sender-digest.ts +16 -5
- package/src/config/bundled-skills/messaging/tools/messaging-reply.ts +11 -7
- package/src/config/bundled-skills/slack/tools/slack-scan-digest.ts +34 -32
- package/src/config/bundled-tool-registry.ts +2 -0
- package/src/config/env.ts +38 -29
- package/src/daemon/handlers/skills.ts +18 -10
- package/src/daemon/ipc-contract/messages.ts +1 -0
- package/src/daemon/ipc-contract/surfaces.ts +7 -1
- package/src/daemon/session-agent-loop-handlers.ts +5 -0
- package/src/daemon/session-agent-loop.ts +1 -1
- package/src/daemon/session-process.ts +1 -1
- package/src/daemon/session-surfaces.ts +42 -2
- package/src/memory/db-connection.ts +16 -10
- package/src/messaging/providers/gmail/adapter.ts +10 -3
- package/src/messaging/providers/gmail/client.ts +280 -72
- package/src/runtime/auth/__tests__/context.test.ts +75 -65
- package/src/runtime/auth/__tests__/credential-service.test.ts +137 -114
- package/src/runtime/auth/__tests__/guard-tests.test.ts +84 -90
- package/src/runtime/auth/__tests__/ipc-auth-context.test.ts +40 -40
- package/src/runtime/auth/__tests__/middleware.test.ts +80 -74
- package/src/runtime/auth/__tests__/policy.test.ts +9 -9
- package/src/runtime/auth/__tests__/route-policy.test.ts +76 -65
- package/src/runtime/auth/__tests__/scopes.test.ts +68 -60
- package/src/runtime/auth/__tests__/subject.test.ts +54 -54
- package/src/runtime/auth/__tests__/token-service.test.ts +115 -108
- package/src/runtime/auth/scopes.ts +3 -0
- package/src/runtime/auth/token-service.ts +78 -48
- package/src/runtime/auth/types.ts +2 -1
- package/src/runtime/http-server.ts +2 -1
- package/src/security/secure-keys.ts +103 -53
- package/src/sequence/reply-matcher.ts +10 -6
- package/src/skills/frontmatter.ts +9 -6
- package/src/tools/browser/__tests__/auth-cache.test.ts +69 -63
- package/src/tools/browser/__tests__/auth-detector.test.ts +218 -157
- package/src/tools/browser/__tests__/jit-auth.test.ts +83 -99
- package/src/tools/ui-surface/definitions.ts +2 -1
- package/src/util/platform.ts +0 -12
- package/docs/architecture/http-token-refresh.md +0 -274
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { describe, expect, test } from
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
2
|
|
|
3
|
-
import { computeRecallBudget } from
|
|
3
|
+
import { computeRecallBudget } from "../memory/retrieval-budget.js";
|
|
4
4
|
|
|
5
|
-
describe(
|
|
6
|
-
test(
|
|
5
|
+
describe("memory retrieval budget", () => {
|
|
6
|
+
test("clamps to maxInjectTokens when headroom is large", () => {
|
|
7
7
|
const budget = computeRecallBudget({
|
|
8
8
|
estimatedPromptTokens: 20_000,
|
|
9
9
|
maxInputTokens: 180_000,
|
|
@@ -14,7 +14,7 @@ describe('memory retrieval budget', () => {
|
|
|
14
14
|
expect(budget).toBe(10_000);
|
|
15
15
|
});
|
|
16
16
|
|
|
17
|
-
test(
|
|
17
|
+
test("clamps to minInjectTokens when headroom is tight", () => {
|
|
18
18
|
const budget = computeRecallBudget({
|
|
19
19
|
estimatedPromptTokens: 172_000,
|
|
20
20
|
maxInputTokens: 180_000,
|
|
@@ -25,7 +25,7 @@ describe('memory retrieval budget', () => {
|
|
|
25
25
|
expect(budget).toBe(1_200);
|
|
26
26
|
});
|
|
27
27
|
|
|
28
|
-
test(
|
|
28
|
+
test("returns computed value when between min and max", () => {
|
|
29
29
|
const budget = computeRecallBudget({
|
|
30
30
|
estimatedPromptTokens: 165_000,
|
|
31
31
|
maxInputTokens: 180_000,
|
|
@@ -36,7 +36,7 @@ describe('memory retrieval budget', () => {
|
|
|
36
36
|
expect(budget).toBe(7_000);
|
|
37
37
|
});
|
|
38
38
|
|
|
39
|
-
test(
|
|
39
|
+
test("normalizes invalid min/max ordering safely", () => {
|
|
40
40
|
const budget = computeRecallBudget({
|
|
41
41
|
estimatedPromptTokens: 150_000,
|
|
42
42
|
maxInputTokens: 180_000,
|
|
@@ -47,4 +47,3 @@ describe('memory retrieval budget', () => {
|
|
|
47
47
|
expect(budget).toBe(12_000);
|
|
48
48
|
});
|
|
49
49
|
});
|
|
50
|
-
|
|
@@ -5,37 +5,45 @@
|
|
|
5
5
|
* Validates latency stays within acceptable bounds and token budget
|
|
6
6
|
* enforcement works correctly.
|
|
7
7
|
*/
|
|
8
|
-
import { mkdtempSync, rmSync } from
|
|
9
|
-
import { tmpdir } from
|
|
10
|
-
import { join } from
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
8
|
+
import { mkdtempSync, rmSync } from "node:fs";
|
|
9
|
+
import { tmpdir } from "node:os";
|
|
10
|
+
import { join } from "node:path";
|
|
11
|
+
import {
|
|
12
|
+
afterAll,
|
|
13
|
+
beforeAll,
|
|
14
|
+
beforeEach,
|
|
15
|
+
describe,
|
|
16
|
+
expect,
|
|
17
|
+
mock,
|
|
18
|
+
test,
|
|
19
|
+
} from "bun:test";
|
|
20
|
+
|
|
21
|
+
const testDir = mkdtempSync(join(tmpdir(), "mem-retrieval-bench-"));
|
|
22
|
+
|
|
23
|
+
mock.module("../util/platform.js", () => ({
|
|
17
24
|
getDataDir: () => testDir,
|
|
18
|
-
isMacOS: () => process.platform ===
|
|
19
|
-
isLinux: () => process.platform ===
|
|
20
|
-
isWindows: () => process.platform ===
|
|
21
|
-
getSocketPath: () => join(testDir,
|
|
22
|
-
getPidPath: () => join(testDir,
|
|
23
|
-
getDbPath: () => join(testDir,
|
|
24
|
-
getLogPath: () => join(testDir,
|
|
25
|
+
isMacOS: () => process.platform === "darwin",
|
|
26
|
+
isLinux: () => process.platform === "linux",
|
|
27
|
+
isWindows: () => process.platform === "win32",
|
|
28
|
+
getSocketPath: () => join(testDir, "test.sock"),
|
|
29
|
+
getPidPath: () => join(testDir, "test.pid"),
|
|
30
|
+
getDbPath: () => join(testDir, "test.db"),
|
|
31
|
+
getLogPath: () => join(testDir, "test.log"),
|
|
25
32
|
ensureDataDir: () => {},
|
|
26
33
|
}));
|
|
27
34
|
|
|
28
|
-
mock.module(
|
|
29
|
-
getLogger: () =>
|
|
30
|
-
|
|
31
|
-
|
|
35
|
+
mock.module("../util/logger.js", () => ({
|
|
36
|
+
getLogger: () =>
|
|
37
|
+
new Proxy({} as Record<string, unknown>, {
|
|
38
|
+
get: () => () => {},
|
|
39
|
+
}),
|
|
32
40
|
}));
|
|
33
41
|
|
|
34
42
|
// Counter for semantic search invocations — used to verify early termination
|
|
35
43
|
// skips the call entirely rather than relying on flaky wall-clock comparisons.
|
|
36
44
|
let semanticSearchCallCount = 0;
|
|
37
45
|
|
|
38
|
-
mock.module(
|
|
46
|
+
mock.module("../memory/search/semantic.js", () => ({
|
|
39
47
|
semanticSearch: async () => {
|
|
40
48
|
semanticSearchCallCount++;
|
|
41
49
|
return [];
|
|
@@ -43,64 +51,76 @@ mock.module('../memory/search/semantic.js', () => ({
|
|
|
43
51
|
isQdrantConnectionError: () => false,
|
|
44
52
|
}));
|
|
45
53
|
|
|
46
|
-
mock.module(
|
|
54
|
+
mock.module("../memory/embedding-backend.js", () => ({
|
|
47
55
|
getMemoryBackendStatus: (config: { memory: { enabled: boolean } }) => ({
|
|
48
56
|
enabled: config.memory.enabled,
|
|
49
57
|
degraded: false,
|
|
50
|
-
provider:
|
|
51
|
-
model:
|
|
58
|
+
provider: "local",
|
|
59
|
+
model: "mock-embedding",
|
|
52
60
|
reason: null,
|
|
53
61
|
}),
|
|
54
62
|
embedWithBackend: async () => ({
|
|
55
|
-
provider:
|
|
56
|
-
model:
|
|
63
|
+
provider: "local" as const,
|
|
64
|
+
model: "mock-embedding",
|
|
57
65
|
vectors: [new Array(1536).fill(0)],
|
|
58
66
|
}),
|
|
59
67
|
}));
|
|
60
68
|
|
|
61
|
-
import { DEFAULT_CONFIG } from
|
|
62
|
-
import type { AssistantConfig } from
|
|
63
|
-
import { getDb, initializeDb, resetDb } from
|
|
64
|
-
import { buildMemoryRecall } from
|
|
65
|
-
import { conversations, memorySegments, messages } from
|
|
66
|
-
|
|
67
|
-
function seedMemoryItems(
|
|
69
|
+
import { DEFAULT_CONFIG } from "../config/defaults.js";
|
|
70
|
+
import type { AssistantConfig } from "../config/types.js";
|
|
71
|
+
import { getDb, initializeDb, resetDb } from "../memory/db.js";
|
|
72
|
+
import { buildMemoryRecall } from "../memory/retriever.js";
|
|
73
|
+
import { conversations, memorySegments, messages } from "../memory/schema.js";
|
|
74
|
+
|
|
75
|
+
function seedMemoryItems(
|
|
76
|
+
conversationId: string,
|
|
77
|
+
count: number,
|
|
78
|
+
now: number,
|
|
79
|
+
): void {
|
|
68
80
|
const db = getDb();
|
|
69
|
-
db.insert(conversations)
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
+
db.insert(conversations)
|
|
82
|
+
.values({
|
|
83
|
+
id: conversationId,
|
|
84
|
+
title: null,
|
|
85
|
+
createdAt: now,
|
|
86
|
+
updatedAt: now,
|
|
87
|
+
totalInputTokens: 0,
|
|
88
|
+
totalOutputTokens: 0,
|
|
89
|
+
totalEstimatedCost: 0,
|
|
90
|
+
contextSummary: null,
|
|
91
|
+
contextCompactedMessageCount: 0,
|
|
92
|
+
contextCompactedAt: null,
|
|
93
|
+
})
|
|
94
|
+
.run();
|
|
81
95
|
|
|
82
96
|
for (let i = 0; i < count; i++) {
|
|
83
97
|
const msgId = `msg-${conversationId}-${i}`;
|
|
84
|
-
const text = `Memory item ${i}: information about topic-${
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
98
|
+
const text = `Memory item ${i}: information about topic-${
|
|
99
|
+
i % 20
|
|
100
|
+
} including keyword-${i % 10} details.`;
|
|
101
|
+
db.insert(messages)
|
|
102
|
+
.values({
|
|
103
|
+
id: msgId,
|
|
104
|
+
conversationId,
|
|
105
|
+
role: i % 2 === 0 ? "user" : "assistant",
|
|
106
|
+
content: JSON.stringify([{ type: "text", text }]),
|
|
107
|
+
createdAt: now + i,
|
|
108
|
+
})
|
|
109
|
+
.run();
|
|
110
|
+
db.insert(memorySegments)
|
|
111
|
+
.values({
|
|
112
|
+
id: `seg-${conversationId}-${i}`,
|
|
113
|
+
messageId: msgId,
|
|
114
|
+
conversationId,
|
|
115
|
+
role: i % 2 === 0 ? "user" : "assistant",
|
|
116
|
+
segmentIndex: 0,
|
|
117
|
+
text,
|
|
118
|
+
tokenEstimate: 20,
|
|
119
|
+
scopeId: "default",
|
|
120
|
+
createdAt: now + i,
|
|
121
|
+
updatedAt: now + i,
|
|
122
|
+
})
|
|
123
|
+
.run();
|
|
104
124
|
}
|
|
105
125
|
}
|
|
106
126
|
|
|
@@ -111,7 +131,7 @@ function makeConfig(overrides?: { maxInjectTokens?: number }): AssistantConfig {
|
|
|
111
131
|
...DEFAULT_CONFIG.memory,
|
|
112
132
|
embeddings: {
|
|
113
133
|
...DEFAULT_CONFIG.memory.embeddings,
|
|
114
|
-
provider:
|
|
134
|
+
provider: "local" as const,
|
|
115
135
|
required: false,
|
|
116
136
|
},
|
|
117
137
|
retrieval: {
|
|
@@ -119,7 +139,10 @@ function makeConfig(overrides?: { maxInjectTokens?: number }): AssistantConfig {
|
|
|
119
139
|
lexicalTopK: 50,
|
|
120
140
|
semanticTopK: 20,
|
|
121
141
|
maxInjectTokens: overrides?.maxInjectTokens ?? 750,
|
|
122
|
-
reranking: {
|
|
142
|
+
reranking: {
|
|
143
|
+
...DEFAULT_CONFIG.memory.retrieval.reranking,
|
|
144
|
+
enabled: false,
|
|
145
|
+
},
|
|
123
146
|
dynamicBudget: {
|
|
124
147
|
enabled: false,
|
|
125
148
|
minInjectTokens: 160,
|
|
@@ -131,26 +154,26 @@ function makeConfig(overrides?: { maxInjectTokens?: number }): AssistantConfig {
|
|
|
131
154
|
};
|
|
132
155
|
}
|
|
133
156
|
|
|
134
|
-
describe(
|
|
157
|
+
describe("Memory retrieval benchmark", () => {
|
|
135
158
|
beforeAll(() => {
|
|
136
159
|
initializeDb();
|
|
137
160
|
});
|
|
138
161
|
|
|
139
162
|
beforeEach(() => {
|
|
140
163
|
const db = getDb();
|
|
141
|
-
db.run(
|
|
142
|
-
db.run(
|
|
143
|
-
db.run(
|
|
144
|
-
db.run(
|
|
145
|
-
db.run(
|
|
146
|
-
db.run(
|
|
147
|
-
db.run(
|
|
148
|
-
db.run(
|
|
149
|
-
db.run(
|
|
150
|
-
db.run(
|
|
151
|
-
db.run(
|
|
152
|
-
db.run(
|
|
153
|
-
db.run(
|
|
164
|
+
db.run("DELETE FROM memory_item_sources");
|
|
165
|
+
db.run("DELETE FROM memory_item_entities");
|
|
166
|
+
db.run("DELETE FROM memory_entity_relations");
|
|
167
|
+
db.run("DELETE FROM memory_entities");
|
|
168
|
+
db.run("DELETE FROM memory_embeddings");
|
|
169
|
+
db.run("DELETE FROM memory_summaries");
|
|
170
|
+
db.run("DELETE FROM memory_items");
|
|
171
|
+
db.run("DELETE FROM memory_segment_fts");
|
|
172
|
+
db.run("DELETE FROM memory_segments");
|
|
173
|
+
db.run("DELETE FROM messages");
|
|
174
|
+
db.run("DELETE FROM conversations");
|
|
175
|
+
db.run("DELETE FROM memory_jobs");
|
|
176
|
+
db.run("DELETE FROM memory_checkpoints");
|
|
154
177
|
});
|
|
155
178
|
|
|
156
179
|
afterAll(() => {
|
|
@@ -162,14 +185,14 @@ describe('Memory retrieval benchmark', () => {
|
|
|
162
185
|
}
|
|
163
186
|
});
|
|
164
187
|
|
|
165
|
-
test(
|
|
166
|
-
const conversationId =
|
|
188
|
+
test("retrieval completes under 500ms for 100 items", async () => {
|
|
189
|
+
const conversationId = "conv-bench-100";
|
|
167
190
|
const now = 1_700_500_000_000;
|
|
168
191
|
seedMemoryItems(conversationId, 100, now);
|
|
169
192
|
|
|
170
193
|
const config = makeConfig();
|
|
171
194
|
const recall = await buildMemoryRecall(
|
|
172
|
-
|
|
195
|
+
"What do we know about topic-5 and keyword-3?",
|
|
173
196
|
conversationId,
|
|
174
197
|
config,
|
|
175
198
|
);
|
|
@@ -182,14 +205,14 @@ describe('Memory retrieval benchmark', () => {
|
|
|
182
205
|
expect(recall.latencyMs).toBeLessThan(500);
|
|
183
206
|
});
|
|
184
207
|
|
|
185
|
-
test(
|
|
186
|
-
const conversationId =
|
|
208
|
+
test("retrieval completes under 1000ms for 500 items", async () => {
|
|
209
|
+
const conversationId = "conv-bench-500";
|
|
187
210
|
const now = 1_700_500_000_000;
|
|
188
211
|
seedMemoryItems(conversationId, 500, now);
|
|
189
212
|
|
|
190
213
|
const config = makeConfig();
|
|
191
214
|
const recall = await buildMemoryRecall(
|
|
192
|
-
|
|
215
|
+
"What do we know about topic-5 and keyword-3?",
|
|
193
216
|
conversationId,
|
|
194
217
|
config,
|
|
195
218
|
);
|
|
@@ -201,14 +224,14 @@ describe('Memory retrieval benchmark', () => {
|
|
|
201
224
|
expect(recall.latencyMs).toBeLessThan(1000);
|
|
202
225
|
});
|
|
203
226
|
|
|
204
|
-
test(
|
|
205
|
-
const conversationId =
|
|
227
|
+
test("retrieval completes under 2000ms for 2000 items", async () => {
|
|
228
|
+
const conversationId = "conv-bench-2000";
|
|
206
229
|
const now = 1_700_500_000_000;
|
|
207
230
|
seedMemoryItems(conversationId, 2000, now);
|
|
208
231
|
|
|
209
232
|
const config = makeConfig();
|
|
210
233
|
const recall = await buildMemoryRecall(
|
|
211
|
-
|
|
234
|
+
"What do we know about topic-5 and keyword-3?",
|
|
212
235
|
conversationId,
|
|
213
236
|
config,
|
|
214
237
|
);
|
|
@@ -220,15 +243,15 @@ describe('Memory retrieval benchmark', () => {
|
|
|
220
243
|
expect(recall.latencyMs).toBeLessThan(2000);
|
|
221
244
|
});
|
|
222
245
|
|
|
223
|
-
test(
|
|
224
|
-
const conversationId =
|
|
246
|
+
test("token budget enforcement: maxInjectTokens is respected", async () => {
|
|
247
|
+
const conversationId = "conv-bench-budget";
|
|
225
248
|
const now = 1_700_500_000_000;
|
|
226
249
|
seedMemoryItems(conversationId, 500, now);
|
|
227
250
|
|
|
228
251
|
const smallBudget = 200;
|
|
229
252
|
const config = makeConfig({ maxInjectTokens: smallBudget });
|
|
230
253
|
const recall = await buildMemoryRecall(
|
|
231
|
-
|
|
254
|
+
"What do we know about topic-5 and keyword-3?",
|
|
232
255
|
conversationId,
|
|
233
256
|
config,
|
|
234
257
|
);
|
|
@@ -241,18 +264,20 @@ describe('Memory retrieval benchmark', () => {
|
|
|
241
264
|
const largeBudget = 2000;
|
|
242
265
|
const largeConfig = makeConfig({ maxInjectTokens: largeBudget });
|
|
243
266
|
const largeRecall = await buildMemoryRecall(
|
|
244
|
-
|
|
267
|
+
"What do we know about topic-5 and keyword-3?",
|
|
245
268
|
conversationId,
|
|
246
269
|
largeConfig,
|
|
247
270
|
);
|
|
248
271
|
|
|
249
272
|
expect(largeRecall.injectedTokens).toBeLessThanOrEqual(largeBudget);
|
|
250
273
|
// With more budget, we should get at least as many tokens
|
|
251
|
-
expect(largeRecall.injectedTokens).toBeGreaterThanOrEqual(
|
|
274
|
+
expect(largeRecall.injectedTokens).toBeGreaterThanOrEqual(
|
|
275
|
+
recall.injectedTokens,
|
|
276
|
+
);
|
|
252
277
|
});
|
|
253
278
|
|
|
254
|
-
test(
|
|
255
|
-
const conversationId =
|
|
279
|
+
test("early termination reduces latency when applicable", async () => {
|
|
280
|
+
const conversationId = "conv-bench-et";
|
|
256
281
|
const now = 1_700_500_000_000;
|
|
257
282
|
// Seed enough items that early termination can trigger
|
|
258
283
|
seedMemoryItems(conversationId, 500, now);
|
|
@@ -264,7 +289,7 @@ describe('Memory retrieval benchmark', () => {
|
|
|
264
289
|
...DEFAULT_CONFIG.memory,
|
|
265
290
|
embeddings: {
|
|
266
291
|
...DEFAULT_CONFIG.memory.embeddings,
|
|
267
|
-
provider:
|
|
292
|
+
provider: "local" as const,
|
|
268
293
|
required: false,
|
|
269
294
|
},
|
|
270
295
|
retrieval: {
|
|
@@ -272,7 +297,10 @@ describe('Memory retrieval benchmark', () => {
|
|
|
272
297
|
lexicalTopK: 50,
|
|
273
298
|
semanticTopK: 20,
|
|
274
299
|
maxInjectTokens: 750,
|
|
275
|
-
reranking: {
|
|
300
|
+
reranking: {
|
|
301
|
+
...DEFAULT_CONFIG.memory.retrieval.reranking,
|
|
302
|
+
enabled: false,
|
|
303
|
+
},
|
|
276
304
|
dynamicBudget: {
|
|
277
305
|
enabled: false,
|
|
278
306
|
minInjectTokens: 160,
|
|
@@ -290,7 +318,7 @@ describe('Memory retrieval benchmark', () => {
|
|
|
290
318
|
};
|
|
291
319
|
|
|
292
320
|
const recall = await buildMemoryRecall(
|
|
293
|
-
|
|
321
|
+
"What do we know about topic-5 and keyword-3?",
|
|
294
322
|
conversationId,
|
|
295
323
|
etConfig,
|
|
296
324
|
);
|
|
@@ -302,12 +330,12 @@ describe('Memory retrieval benchmark', () => {
|
|
|
302
330
|
expect(recall.selectedCount).toBeGreaterThan(0);
|
|
303
331
|
});
|
|
304
332
|
|
|
305
|
-
test(
|
|
306
|
-
const conversationId =
|
|
333
|
+
test("early termination skips semantic search entirely", async () => {
|
|
334
|
+
const conversationId = "conv-bench-et-skip";
|
|
307
335
|
const now = 1_700_500_000_000;
|
|
308
336
|
seedMemoryItems(conversationId, 500, now);
|
|
309
337
|
|
|
310
|
-
const query =
|
|
338
|
+
const query = "What do we know about topic-5 and keyword-3?";
|
|
311
339
|
|
|
312
340
|
const etConfig: AssistantConfig = {
|
|
313
341
|
...DEFAULT_CONFIG,
|
|
@@ -315,7 +343,7 @@ describe('Memory retrieval benchmark', () => {
|
|
|
315
343
|
...DEFAULT_CONFIG.memory,
|
|
316
344
|
embeddings: {
|
|
317
345
|
...DEFAULT_CONFIG.memory.embeddings,
|
|
318
|
-
provider:
|
|
346
|
+
provider: "local" as const,
|
|
319
347
|
required: false,
|
|
320
348
|
},
|
|
321
349
|
retrieval: {
|
|
@@ -323,7 +351,10 @@ describe('Memory retrieval benchmark', () => {
|
|
|
323
351
|
lexicalTopK: 50,
|
|
324
352
|
semanticTopK: 20,
|
|
325
353
|
maxInjectTokens: 750,
|
|
326
|
-
reranking: {
|
|
354
|
+
reranking: {
|
|
355
|
+
...DEFAULT_CONFIG.memory.retrieval.reranking,
|
|
356
|
+
enabled: false,
|
|
357
|
+
},
|
|
327
358
|
dynamicBudget: {
|
|
328
359
|
enabled: false,
|
|
329
360
|
minInjectTokens: 160,
|
|
@@ -367,15 +398,19 @@ describe('Memory retrieval benchmark', () => {
|
|
|
367
398
|
|
|
368
399
|
// Run without ET — semantic search should be invoked
|
|
369
400
|
semanticSearchCallCount = 0;
|
|
370
|
-
const baselineRecall = await buildMemoryRecall(
|
|
401
|
+
const baselineRecall = await buildMemoryRecall(
|
|
402
|
+
query,
|
|
403
|
+
conversationId,
|
|
404
|
+
noEtConfig,
|
|
405
|
+
);
|
|
371
406
|
const baselineCalls = semanticSearchCallCount;
|
|
372
407
|
|
|
373
408
|
expect(baselineRecall.earlyTerminated).toBe(false);
|
|
374
409
|
expect(baselineCalls).toBeGreaterThan(0);
|
|
375
410
|
});
|
|
376
411
|
|
|
377
|
-
test(
|
|
378
|
-
const conversationId =
|
|
412
|
+
test("recall.latencyMs tracks wall-clock within 50% tolerance", async () => {
|
|
413
|
+
const conversationId = "conv-bench-wallclock";
|
|
379
414
|
const now = 1_700_500_000_000;
|
|
380
415
|
seedMemoryItems(conversationId, 500, now);
|
|
381
416
|
|
|
@@ -386,7 +421,7 @@ describe('Memory retrieval benchmark', () => {
|
|
|
386
421
|
// integer-ms Date.now() and sub-ms performance.now().
|
|
387
422
|
const wallStart = Date.now();
|
|
388
423
|
const recall = await buildMemoryRecall(
|
|
389
|
-
|
|
424
|
+
"What do we know about topic-5 and keyword-3?",
|
|
390
425
|
conversationId,
|
|
391
426
|
config,
|
|
392
427
|
);
|