@vellumai/assistant 0.7.2 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +16 -1
- package/docs/architecture/memory.md +5 -2
- package/node_modules/@vellumai/gateway-client/src/ipc-client.ts +13 -4
- package/node_modules/@vellumai/skill-host-contracts/src/assistant-event.ts +0 -9
- package/node_modules/@vellumai/slack-text/src/index.test.ts +18 -35
- package/node_modules/@vellumai/slack-text/src/index.ts +2 -48
- package/openapi.yaml +449 -22
- package/package.json +1 -1
- package/src/__tests__/app-control-flow.test.ts +21 -11
- package/src/__tests__/assistant-event-hub.test.ts +48 -0
- package/src/__tests__/assistant-event.test.ts +0 -10
- package/src/__tests__/assistant-events-sse-hardening.test.ts +2 -7
- package/src/__tests__/assistant-feature-flags-integration.test.ts +18 -0
- package/src/__tests__/auto-analysis-end-to-end.test.ts +62 -1
- package/src/__tests__/background-workers-disk-pressure.test.ts +268 -0
- package/src/__tests__/call-conversation-messages.test.ts +8 -2
- package/src/__tests__/channel-inbound-disk-pressure.test.ts +537 -0
- package/src/__tests__/channel-readiness-service.test.ts +4 -2
- package/src/__tests__/config-loader-backfill.test.ts +379 -0
- package/src/__tests__/config-schema.test.ts +1 -0
- package/src/__tests__/config-watcher-cleanup-throttle.test.ts +18 -9
- package/src/__tests__/config-watcher.test.ts +140 -69
- package/src/__tests__/context-search-agent-runner.test.ts +61 -3
- package/src/__tests__/context-search-conversations-source.test.ts +0 -24
- package/src/__tests__/context-search-fanout.test.ts +0 -1
- package/src/__tests__/context-search-memory-source.test.ts +3 -7
- package/src/__tests__/context-search-memory-v2-source.test.ts +0 -2
- package/src/__tests__/context-search-pkb-source.test.ts +0 -1
- package/src/__tests__/context-search-workspace-source.test.ts +0 -1
- package/src/__tests__/conversation-abort-tool-results.test.ts +6 -0
- package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +223 -0
- package/src/__tests__/conversation-agent-loop.test.ts +454 -5
- package/src/__tests__/conversation-error.test.ts +150 -3
- package/src/__tests__/conversation-process-callsite.test.ts +43 -0
- package/src/__tests__/conversation-provider-retry-repair.test.ts +6 -0
- package/src/__tests__/conversation-runtime-assembly.test.ts +65 -0
- package/src/__tests__/conversation-slash-unknown.test.ts +6 -0
- package/src/__tests__/conversation-speed-override.test.ts +0 -3
- package/src/__tests__/conversation-store.test.ts +0 -18
- package/src/__tests__/conversation-surfaces-app-control.test.ts +15 -4
- package/src/__tests__/conversation-surfaces-data-persist.test.ts +404 -0
- package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +2 -5
- package/src/__tests__/conversation-workspace-injection.test.ts +6 -0
- package/src/__tests__/conversation-workspace-tool-tracking.test.ts +6 -0
- package/src/__tests__/credentials-cli.test.ts +7 -0
- package/src/__tests__/cu-unified-flow.test.ts +176 -10
- package/src/__tests__/date-context.test.ts +164 -2
- package/src/__tests__/disk-pressure-guard.test.ts +262 -0
- package/src/__tests__/disk-pressure-lifecycle.test.ts +168 -0
- package/src/__tests__/disk-pressure-policy.test.ts +241 -0
- package/src/__tests__/disk-pressure-routes.test.ts +379 -0
- package/src/__tests__/disk-pressure-tools.test.ts +277 -0
- package/src/__tests__/disk-usage.test.ts +150 -0
- package/src/__tests__/events-client-registration.test.ts +52 -0
- package/src/__tests__/events-dev-bypass-actor.test.ts +162 -0
- package/src/__tests__/file-write-tool.test.ts +4 -10
- package/src/__tests__/filing-service.test.ts +3 -4
- package/src/__tests__/heartbeat-disk-pressure.test.ts +183 -0
- package/src/__tests__/heartbeat-service.test.ts +260 -11
- package/src/__tests__/host-app-control-proxy.test.ts +195 -25
- package/src/__tests__/host-bash-proxy.test.ts +227 -34
- package/src/__tests__/host-bash-routes.test.ts +178 -13
- package/src/__tests__/host-cu-proxy.test.ts +210 -3
- package/src/__tests__/host-cu-routes-targeted.test.ts +141 -12
- package/src/__tests__/host-file-proxy-targeted.test.ts +48 -9
- package/src/__tests__/host-file-proxy.test.ts +268 -6
- package/src/__tests__/host-file-routes-targeted.test.ts +175 -17
- package/src/__tests__/host-transfer-proxy-targeted.test.ts +408 -59
- package/src/__tests__/host-transfer-routes-targeted.test.ts +232 -17
- package/src/__tests__/http-user-message-parity.test.ts +107 -1
- package/src/__tests__/injector-chain.test.ts +18 -6
- package/src/__tests__/injector-disk-pressure.test.ts +224 -0
- package/src/__tests__/managed-profile-guard.test.ts +18 -0
- package/src/__tests__/mcp-abort-signal.test.ts +130 -0
- package/src/__tests__/memory-admin-recall.test.ts +3 -11
- package/src/__tests__/memory-retrieval-pipeline.test.ts +22 -1
- package/src/__tests__/normalize-onboarding.test.ts +180 -0
- package/src/__tests__/oauth-connect-routes.test.ts +316 -0
- package/src/__tests__/oauth-provider-seed-logos.test.ts +24 -2
- package/src/__tests__/onboarding-persona-write.test.ts +308 -0
- package/src/__tests__/openai-provider.test.ts +45 -8
- package/src/__tests__/persist-onboarding-artifacts.test.ts +44 -64
- package/src/__tests__/platform-callback-registration.test.ts +21 -4
- package/src/__tests__/platform.test.ts +2 -1
- package/src/__tests__/playbook-execution.test.ts +0 -43
- package/src/__tests__/plugin-tool-contribution.test.ts +47 -0
- package/src/__tests__/prechat-onboarding-contract.test.ts +214 -27
- package/src/__tests__/provider-tool-name.test.ts +23 -0
- package/src/__tests__/relay-server.test.ts +15 -4
- package/src/__tests__/runtime-events-sse.test.ts +4 -8
- package/src/__tests__/scheduler-disk-pressure.test.ts +148 -0
- package/src/__tests__/secret-ingress-http.test.ts +0 -1
- package/src/__tests__/suggestion-routes.test.ts +46 -0
- package/src/__tests__/twilio-validation.test.ts +2 -2
- package/src/__tests__/workspace-migration-065-bump-stale-heartbeat-interval.test.ts +122 -0
- package/src/__tests__/workspace-migration-066-seed-heartbeat-callsite-cost-default.test.ts +285 -0
- package/src/__tests__/workspace-migration-068-release-notes-local-timezone.test.ts +90 -0
- package/src/__tests__/workspace-migration-safe-storage-limits-release.test.ts +90 -0
- package/src/approvals/guardian-decision-primitive.ts +13 -0
- package/src/approvals/guardian-request-resolvers.ts +16 -17
- package/src/backup/snapshot-lock.ts +2 -27
- package/src/bundler/compiler-tools.ts +3 -2
- package/src/calls/call-conversation-messages.ts +46 -10
- package/src/cli/commands/__tests__/webhooks.test.ts +0 -4
- package/src/cli/commands/bash.ts +35 -108
- package/src/cli/commands/contacts.ts +64 -25
- package/src/cli/commands/credentials.ts +56 -0
- package/src/cli/commands/memory-v2.ts +7 -6
- package/src/cli/commands/oauth/__tests__/connect.test.ts +437 -1
- package/src/cli/commands/oauth/connect.ts +127 -1
- package/src/cli/commands/platform/__tests__/callback-routes-list.test.ts +0 -3
- package/src/cli/commands/platform/__tests__/connect.test.ts +7 -1
- package/src/cli/commands/platform/__tests__/disconnect.test.ts +7 -1
- package/src/cli/commands/platform/__tests__/status.test.ts +103 -6
- package/src/cli/commands/platform/index.ts +16 -7
- package/src/cli/commands/status.ts +57 -0
- package/src/cli/program.ts +4 -2
- package/src/config/assistant-feature-flags.ts +13 -3
- package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +4 -3
- package/src/config/bundled-skills/phone-calls/references/TROUBLESHOOTING.md +13 -7
- package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +2 -2
- package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +2 -2
- package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +2 -2
- package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +2 -2
- package/src/config/env.ts +0 -8
- package/src/config/feature-flag-registry.json +27 -3
- package/src/config/loader.ts +127 -8
- package/src/config/schemas/__tests__/memory-v2.test.ts +10 -5
- package/src/config/schemas/call-site-catalog.ts +14 -0
- package/src/config/schemas/channels.ts +0 -5
- package/src/config/schemas/heartbeat.ts +1 -1
- package/src/config/schemas/llm.ts +2 -0
- package/src/config/schemas/memory-lifecycle.ts +13 -0
- package/src/config/schemas/memory-v2.ts +75 -11
- package/src/config/schemas/platform.ts +43 -3
- package/src/config/schemas/services.ts +28 -0
- package/src/config/seed-inference-profiles.ts +230 -33
- package/src/contacts/contact-store.ts +0 -25
- package/src/daemon/__tests__/conversation-tool-setup.test.ts +86 -25
- package/src/daemon/assistant-attachments.ts +4 -4
- package/src/daemon/config-watcher.ts +85 -57
- package/src/daemon/conversation-agent-loop-handlers.ts +6 -0
- package/src/daemon/conversation-agent-loop.ts +170 -33
- package/src/daemon/conversation-error.ts +87 -15
- package/src/daemon/conversation-lifecycle.ts +1 -3
- package/src/daemon/conversation-process.ts +8 -0
- package/src/daemon/conversation-runtime-assembly.ts +26 -0
- package/src/daemon/conversation-store.ts +2 -2
- package/src/daemon/conversation-surfaces.ts +195 -15
- package/src/daemon/conversation-tool-setup.ts +57 -14
- package/src/daemon/conversation.ts +17 -22
- package/src/daemon/date-context.ts +71 -22
- package/src/daemon/disk-pressure-background-gate.ts +73 -0
- package/src/daemon/disk-pressure-guard.ts +343 -0
- package/src/daemon/disk-pressure-policy.ts +163 -0
- package/src/daemon/handlers/shared.ts +0 -1
- package/src/daemon/handlers/skills.ts +3 -4
- package/src/daemon/host-app-control-proxy.ts +137 -41
- package/src/daemon/host-bash-proxy.ts +46 -21
- package/src/daemon/host-cu-proxy.ts +49 -3
- package/src/daemon/host-file-proxy.ts +43 -7
- package/src/daemon/host-transfer-proxy.ts +95 -4
- package/src/daemon/lifecycle.ts +79 -28
- package/src/daemon/meet-host-supervisor.ts +4 -4
- package/src/daemon/meet-manifest-loader.ts +0 -1
- package/src/daemon/memory-v2-startup.ts +14 -4
- package/src/daemon/message-protocol.ts +3 -0
- package/src/daemon/message-types/conversations.ts +4 -0
- package/src/daemon/message-types/disk-pressure.ts +9 -0
- package/src/daemon/message-types/messages.ts +3 -0
- package/src/daemon/profiler-run-store.ts +5 -5
- package/src/daemon/tool-setup-types.ts +2 -2
- package/src/documents/document-store.ts +85 -0
- package/src/filing/filing-service.ts +30 -5
- package/src/heartbeat/__tests__/heartbeat-feed-event.test.ts +9 -16
- package/src/heartbeat/__tests__/heartbeat-run-store.test.ts +36 -0
- package/src/heartbeat/heartbeat-run-store.ts +13 -0
- package/src/heartbeat/heartbeat-service.ts +205 -31
- package/src/home/feed-scheduler.ts +18 -0
- package/src/inbound/platform-callback-registration.ts +8 -15
- package/src/ipc/__tests__/clients-list-ipc.test.ts +169 -0
- package/src/ipc/assistant-server.ts +56 -2
- package/src/ipc/gateway-client.ts +37 -3
- package/src/live-voice/live-voice-archive.ts +4 -4
- package/src/live-voice/protocol.ts +5 -7
- package/src/media/image-service.ts +1 -7
- package/src/memory/__tests__/fixtures/memory-v2-activation-fixtures.ts +21 -13
- package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +52 -22
- package/src/memory/__tests__/memory-v2-activation-log-store.test.ts +0 -6
- package/src/memory/__tests__/memory-v2-concept-frequency.test.ts +272 -0
- package/src/memory/admin.ts +5 -9
- package/src/memory/context-search/agent-runner.ts +19 -2
- package/src/memory/context-search/sources/conversations.ts +2 -11
- package/src/memory/context-search/sources/memory-v2.ts +5 -4
- package/src/memory/context-search/sources/memory.ts +0 -1
- package/src/memory/context-search/types.ts +0 -1
- package/src/memory/conversation-crud.ts +4 -12
- package/src/memory/db-init.ts +2 -0
- package/src/memory/embedding-runtime-manager.ts +119 -5
- package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +32 -21
- package/src/memory/graph/conversation-graph-memory.ts +42 -54
- package/src/memory/graph/extraction.ts +1 -3
- package/src/memory/graph/graph-search.test.ts +10 -67
- package/src/memory/graph/graph-search.ts +1 -20
- package/src/memory/graph/retriever.test.ts +6 -0
- package/src/memory/graph/retriever.ts +6 -10
- package/src/memory/indexer.ts +54 -45
- package/src/memory/job-handlers/backfill.ts +2 -11
- package/src/memory/job-handlers/cleanup.ts +43 -0
- package/src/memory/job-handlers/embedding.ts +6 -8
- package/src/memory/job-handlers/summarization.ts +2 -7
- package/src/memory/jobs-store.ts +48 -0
- package/src/memory/jobs-worker.ts +81 -43
- package/src/memory/memory-v2-activation-log-store.ts +32 -14
- package/src/memory/memory-v2-concept-frequency.ts +169 -0
- package/src/memory/migrations/239-trace-events-created-at-index.ts +18 -0
- package/src/memory/migrations/index.ts +1 -0
- package/src/memory/pkb/pkb-search.test.ts +6 -0
- package/src/memory/qdrant-client.ts +0 -13
- package/src/memory/rerank-local.ts +374 -0
- package/src/memory/search/semantic.ts +6 -67
- package/src/memory/trace-event-store.ts +1 -17
- package/src/memory/v2/__tests__/activation.test.ts +311 -250
- package/src/memory/v2/__tests__/consolidation-job.test.ts +40 -8
- package/src/memory/v2/__tests__/injection.test.ts +157 -167
- package/src/memory/v2/__tests__/prompts-consolidation.test.ts +61 -2
- package/src/memory/v2/__tests__/qdrant.test.ts +16 -0
- package/src/memory/v2/__tests__/reranker.test.ts +338 -0
- package/src/memory/v2/__tests__/sim.test.ts +5 -199
- package/src/memory/v2/__tests__/skill-store.test.ts +71 -65
- package/src/memory/v2/__tests__/static-context.test.ts +76 -1
- package/src/memory/v2/activation.ts +149 -156
- package/src/memory/v2/consolidation-job.ts +62 -12
- package/src/memory/v2/injection.ts +47 -60
- package/src/memory/v2/prompts/consolidation.ts +36 -1
- package/src/memory/v2/qdrant.ts +99 -0
- package/src/memory/v2/reranker.ts +177 -0
- package/src/memory/v2/sim.ts +10 -84
- package/src/memory/v2/skill-content.ts +4 -3
- package/src/memory/v2/skill-store.ts +82 -59
- package/src/memory/v2/static-context.ts +22 -0
- package/src/memory/v2/types.ts +10 -10
- package/src/notifications/copy-composer.ts +13 -0
- package/src/notifications/signal.ts +4 -0
- package/src/oauth/AGENTS.md +3 -1
- package/src/oauth/__tests__/oauth-connect-state.test.ts +137 -0
- package/src/oauth/connect-orchestrator.ts +2 -0
- package/src/oauth/connection-resolver.test.ts +66 -1
- package/src/oauth/connection-resolver.ts +55 -1
- package/src/oauth/oauth-connect-state.ts +77 -0
- package/src/oauth/seed-providers.ts +58 -1
- package/src/plugins/defaults/injectors.ts +35 -2
- package/src/plugins/defaults/memory-retrieval.ts +5 -6
- package/src/plugins/types.ts +7 -0
- package/src/proactive-artifact/aux-message-injector.ts +74 -0
- package/src/proactive-artifact/decision.test.ts +226 -0
- package/src/proactive-artifact/decision.ts +165 -0
- package/src/proactive-artifact/index.ts +7 -0
- package/src/proactive-artifact/job.test.ts +867 -0
- package/src/proactive-artifact/job.ts +352 -0
- package/src/proactive-artifact/message-copy.ts +41 -0
- package/src/proactive-artifact/trigger-state.test.ts +277 -0
- package/src/proactive-artifact/trigger-state.ts +119 -0
- package/src/prompts/normalize-onboarding.ts +80 -0
- package/src/prompts/persona-resolver.ts +101 -9
- package/src/prompts/system-prompt.ts +21 -7
- package/src/prompts/templates/BOOTSTRAP.md +13 -5
- package/src/providers/__tests__/retry-callsite.test.ts +222 -1
- package/src/providers/model-intents.ts +7 -0
- package/src/providers/openrouter/client.ts +8 -0
- package/src/providers/retry.ts +50 -0
- package/src/providers/types.ts +1 -0
- package/src/runtime/__tests__/agent-wake.test.ts +456 -3
- package/src/runtime/agent-wake.ts +238 -100
- package/src/runtime/assistant-event-hub.ts +36 -6
- package/src/runtime/assistant-event.ts +0 -1
- package/src/runtime/auth/__tests__/route-policy.test.ts +64 -0
- package/src/runtime/auth/route-policy.ts +14 -1
- package/src/runtime/auth/same-actor.ts +216 -0
- package/src/runtime/channel-retry-sweep.ts +65 -1
- package/src/runtime/guardian-reply-router.ts +10 -0
- package/src/runtime/local-actor-identity.ts +52 -11
- package/src/runtime/pending-interactions.ts +8 -0
- package/src/runtime/routes/__tests__/client-routes.test.ts +155 -0
- package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +0 -5
- package/src/runtime/routes/__tests__/heartbeat-routes.test.ts +1 -1
- package/src/runtime/routes/client-routes.ts +20 -2
- package/src/runtime/routes/contact-routes.ts +0 -25
- package/src/runtime/routes/conversation-routes.ts +35 -26
- package/src/runtime/routes/debug-bash-routes.ts +163 -0
- package/src/runtime/routes/disk-pressure-routes.ts +121 -0
- package/src/runtime/routes/document-pdf-renderer.ts +6 -2
- package/src/runtime/routes/documents-routes.ts +2 -75
- package/src/runtime/routes/events-routes.ts +41 -9
- package/src/runtime/routes/host-bash-routes.ts +23 -3
- package/src/runtime/routes/host-cu-routes.ts +33 -6
- package/src/runtime/routes/host-file-routes.ts +32 -6
- package/src/runtime/routes/host-transfer-routes.ts +79 -16
- package/src/runtime/routes/identity-routes.ts +7 -138
- package/src/runtime/routes/inbound-message-handler.ts +77 -12
- package/src/runtime/routes/inbound-stages/guardian-reply-intercept.ts +3 -0
- package/src/runtime/routes/index.ts +6 -0
- package/src/runtime/routes/memory-item-routes.test.ts +41 -15
- package/src/runtime/routes/memory-v2-routes.ts +33 -0
- package/src/runtime/routes/oauth-connect-routes.ts +153 -0
- package/src/runtime/verification-outbound-actions.ts +4 -4
- package/src/schedule/run-script.ts +37 -5
- package/src/schedule/scheduler.ts +20 -1
- package/src/security/encrypted-store.ts +2 -0
- package/src/security/secure-keys.ts +55 -0
- package/src/skills/remote-skill-policy.ts +4 -10
- package/src/subagent/index.ts +1 -7
- package/src/subagent/manager.ts +1 -15
- package/src/tasks/task-runner.ts +0 -1
- package/src/tasks/task-store.ts +0 -3
- package/src/tools/background-tool-registry.ts +17 -3
- package/src/tools/host-filesystem/edit.test.ts +151 -0
- package/src/tools/host-filesystem/edit.ts +43 -1
- package/src/tools/host-filesystem/read.test.ts +129 -0
- package/src/tools/host-filesystem/read.ts +43 -1
- package/src/tools/host-filesystem/transfer.test.ts +127 -2
- package/src/tools/host-filesystem/transfer.ts +56 -11
- package/src/tools/host-filesystem/write.test.ts +134 -0
- package/src/tools/host-filesystem/write.ts +43 -1
- package/src/tools/host-terminal/host-shell.ts +13 -6
- package/src/tools/mcp/mcp-tool-factory.ts +2 -1
- package/src/tools/memory/register.test.ts +12 -9
- package/src/tools/memory/register.ts +1 -2
- package/src/tools/provider-tool-name.ts +28 -0
- package/src/tools/registry.ts +30 -9
- package/src/tools/terminal/shell.ts +9 -1
- package/src/tools/tool-approval-handler.ts +31 -6
- package/src/tools/types.ts +24 -2
- package/src/tts/provider-catalog.ts +3 -5
- package/src/util/disk-usage.ts +138 -0
- package/src/util/platform.ts +21 -11
- package/src/util/process-liveness.ts +26 -0
- package/src/workspace/heartbeat-service.ts +19 -0
- package/src/workspace/migrations/065-bump-stale-heartbeat-interval.ts +60 -0
- package/src/workspace/migrations/066-seed-heartbeat-callsite-cost-default.ts +146 -0
- package/src/workspace/migrations/067-release-notes-safe-storage-limits.ts +72 -0
- package/src/workspace/migrations/068-release-notes-local-timezone.ts +65 -0
- package/src/workspace/migrations/registry.ts +8 -0
- package/src/__tests__/conversation-tool-setup-memory-scope.test.ts +0 -167
- package/src/memory/v2/__tests__/skill-qdrant.test.ts +0 -657
- package/src/memory/v2/skill-qdrant.ts +0 -404
- package/src/signals/bash.ts +0 -198
|
@@ -38,10 +38,16 @@ const ONNXRUNTIME_COMMON_VERSION = "1.21.0";
|
|
|
38
38
|
const TRANSFORMERS_VERSION = "3.8.1";
|
|
39
39
|
const JINJA_VERSION = "0.5.5";
|
|
40
40
|
|
|
41
|
-
/**
|
|
42
|
-
|
|
41
|
+
/**
|
|
42
|
+
* Composite version string for cache invalidation. Bumping the trailing
|
|
43
|
+
* `_workers-vN` suffix forces existing installs to regenerate the worker
|
|
44
|
+
* scripts when the worker IPC contract or spawn-args list changes (without
|
|
45
|
+
* requiring an `@huggingface/transformers` version bump).
|
|
46
|
+
*/
|
|
47
|
+
const RUNTIME_VERSION = `ort-${ONNXRUNTIME_NODE_VERSION}_hf-${TRANSFORMERS_VERSION}_jinja-${JINJA_VERSION}_workers-v2`;
|
|
43
48
|
|
|
44
49
|
const WORKER_FILENAME = "embed-worker.mjs";
|
|
50
|
+
const RERANK_WORKER_FILENAME = "rerank-worker.mjs";
|
|
45
51
|
|
|
46
52
|
/** Module-level guard so concurrent in-process calls share one download. */
|
|
47
53
|
const installGuard = new PromiseGuard<void>();
|
|
@@ -171,6 +177,101 @@ process.stdin.on('end', () => process.exit(0));
|
|
|
171
177
|
`;
|
|
172
178
|
}
|
|
173
179
|
|
|
180
|
+
function generateRerankWorkerScript(): string {
|
|
181
|
+
// Cross-encoder rerank worker. Loads a sequence-classification model and
|
|
182
|
+
// scores paired (queries[i], passages[i]) tuples in one batched ONNX
|
|
183
|
+
// inference call. Mirrors the embed worker's lifecycle (ready signal,
|
|
184
|
+
// JSON-lines IPC, sequential queue) so LocalRerankBackend can reuse the
|
|
185
|
+
// same supervisor pattern.
|
|
186
|
+
//
|
|
187
|
+
// Request shape: { id, queries: string[], passages: string[] } with
|
|
188
|
+
// queries.length === passages.length. Each pair is one (query, passage)
|
|
189
|
+
// tuple; multiple distinct queries can ride in a single batch so the
|
|
190
|
+
// activation pipeline can score the user-channel and assistant-channel
|
|
191
|
+
// queries against a shared candidate set in one tokenizer + ONNX call.
|
|
192
|
+
return `\
|
|
193
|
+
// rerank-worker.mjs — Auto-generated by EmbeddingRuntimeManager
|
|
194
|
+
// Runs in a separate bun process, communicates via JSON-lines over stdin/stdout.
|
|
195
|
+
process.title = 'rerank-worker';
|
|
196
|
+
import {
|
|
197
|
+
AutoModelForSequenceClassification,
|
|
198
|
+
AutoTokenizer,
|
|
199
|
+
env,
|
|
200
|
+
} from '@huggingface/transformers';
|
|
201
|
+
|
|
202
|
+
const model = process.argv[2];
|
|
203
|
+
const cacheDir = process.argv[3];
|
|
204
|
+
const dtype = process.argv[4] || 'q8';
|
|
205
|
+
if (cacheDir && env) env.cacheDir = cacheDir;
|
|
206
|
+
|
|
207
|
+
let tokenizer;
|
|
208
|
+
let session;
|
|
209
|
+
try {
|
|
210
|
+
tokenizer = await AutoTokenizer.from_pretrained(model);
|
|
211
|
+
session = await AutoModelForSequenceClassification.from_pretrained(model, { dtype });
|
|
212
|
+
process.stdout.write(JSON.stringify({ type: 'ready' }) + '\\n');
|
|
213
|
+
} catch (err) {
|
|
214
|
+
process.stdout.write(JSON.stringify({ type: 'error', error: err.message || String(err) }) + '\\n');
|
|
215
|
+
process.exit(1);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const sigmoid = (x) => 1 / (1 + Math.exp(-x));
|
|
219
|
+
|
|
220
|
+
const decoder = new TextDecoder();
|
|
221
|
+
let buffer = '';
|
|
222
|
+
let processing = false;
|
|
223
|
+
const queue = [];
|
|
224
|
+
|
|
225
|
+
process.stdin.on('data', (chunk) => {
|
|
226
|
+
buffer += typeof chunk === 'string' ? chunk : decoder.decode(chunk, { stream: true });
|
|
227
|
+
let idx;
|
|
228
|
+
while ((idx = buffer.indexOf('\\n')) !== -1) {
|
|
229
|
+
const line = buffer.slice(0, idx);
|
|
230
|
+
buffer = buffer.slice(idx + 1);
|
|
231
|
+
if (line.trim()) queue.push(line);
|
|
232
|
+
}
|
|
233
|
+
if (!processing) processQueue();
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
async function processQueue() {
|
|
237
|
+
processing = true;
|
|
238
|
+
while (queue.length > 0) {
|
|
239
|
+
const line = queue.shift();
|
|
240
|
+
let req;
|
|
241
|
+
try { req = JSON.parse(line); } catch { continue; }
|
|
242
|
+
try {
|
|
243
|
+
const { id, queries, passages } = req;
|
|
244
|
+
if (
|
|
245
|
+
!Array.isArray(queries) || !Array.isArray(passages) ||
|
|
246
|
+
queries.length !== passages.length || passages.length === 0
|
|
247
|
+
) {
|
|
248
|
+
process.stdout.write(JSON.stringify({ id, scores: [] }) + '\\n');
|
|
249
|
+
continue;
|
|
250
|
+
}
|
|
251
|
+
const inputs = await tokenizer(queries, {
|
|
252
|
+
text_pair: passages,
|
|
253
|
+
padding: true,
|
|
254
|
+
truncation: true,
|
|
255
|
+
return_tensors: 'pt',
|
|
256
|
+
});
|
|
257
|
+
const out = await session(inputs);
|
|
258
|
+
const logits = out.logits.data;
|
|
259
|
+
const scores = new Array(passages.length);
|
|
260
|
+
for (let i = 0; i < passages.length; i++) {
|
|
261
|
+
scores[i] = sigmoid(Number(logits[i]));
|
|
262
|
+
}
|
|
263
|
+
process.stdout.write(JSON.stringify({ id, scores }) + '\\n');
|
|
264
|
+
} catch (err) {
|
|
265
|
+
process.stdout.write(JSON.stringify({ id: req?.id, error: err.message || String(err) }) + '\\n');
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
processing = false;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
process.stdin.on('end', () => process.exit(0));
|
|
272
|
+
`;
|
|
273
|
+
}
|
|
274
|
+
|
|
174
275
|
// ── Main manager ────────────────────────────────────────────────────
|
|
175
276
|
|
|
176
277
|
export class EmbeddingRuntimeManager {
|
|
@@ -186,8 +287,12 @@ export class EmbeddingRuntimeManager {
|
|
|
186
287
|
if (!manifest) return false;
|
|
187
288
|
if (manifest.runtimeVersion !== RUNTIME_VERSION) return false;
|
|
188
289
|
|
|
189
|
-
// Verify
|
|
190
|
-
return
|
|
290
|
+
// Verify both worker scripts exist and a bun binary is available
|
|
291
|
+
return (
|
|
292
|
+
existsSync(this.getWorkerPath()) &&
|
|
293
|
+
existsSync(this.getRerankWorkerPath()) &&
|
|
294
|
+
this.getBunPath() !== undefined
|
|
295
|
+
);
|
|
191
296
|
}
|
|
192
297
|
|
|
193
298
|
/** Path to the embed worker script. */
|
|
@@ -195,6 +300,11 @@ export class EmbeddingRuntimeManager {
|
|
|
195
300
|
return join(this.baseDir, WORKER_FILENAME);
|
|
196
301
|
}
|
|
197
302
|
|
|
303
|
+
/** Path to the rerank worker script. */
|
|
304
|
+
getRerankWorkerPath(): string {
|
|
305
|
+
return join(this.baseDir, RERANK_WORKER_FILENAME);
|
|
306
|
+
}
|
|
307
|
+
|
|
198
308
|
/**
|
|
199
309
|
* Find a usable bun binary.
|
|
200
310
|
* Delegates to the shared bun-runtime helper, also checking
|
|
@@ -375,8 +485,12 @@ export class EmbeddingRuntimeManager {
|
|
|
375
485
|
].join("\n"),
|
|
376
486
|
);
|
|
377
487
|
|
|
378
|
-
// Step 4: Write embed worker
|
|
488
|
+
// Step 4: Write embed + rerank worker scripts
|
|
379
489
|
writeFileSync(join(tmpDir, WORKER_FILENAME), generateWorkerScript());
|
|
490
|
+
writeFileSync(
|
|
491
|
+
join(tmpDir, RERANK_WORKER_FILENAME),
|
|
492
|
+
generateRerankWorkerScript(),
|
|
493
|
+
);
|
|
380
494
|
|
|
381
495
|
// Step 5: Write version manifest
|
|
382
496
|
const manifest: VersionManifest = {
|
|
@@ -45,25 +45,28 @@ mock.module("../../../util/logger.js", () => ({
|
|
|
45
45
|
|
|
46
46
|
// Stub the v1 retriever so we don't reach Qdrant. Both modes return zero
|
|
47
47
|
// nodes — the v1 injection branch becomes a no-op, isolating the assertion
|
|
48
|
-
// to "did the v2 routing fire?".
|
|
48
|
+
// to "did the v2 routing fire?". Tracked via `mock()` so tests can also
|
|
49
|
+
// assert that v1 retrieval is *not* called when v2 is enabled.
|
|
50
|
+
const loadContextMemoryMock = mock(async () => ({
|
|
51
|
+
nodes: [],
|
|
52
|
+
serendipityNodes: [],
|
|
53
|
+
latencyMs: 1,
|
|
54
|
+
metrics: null,
|
|
55
|
+
queryVector: undefined,
|
|
56
|
+
sparseVector: undefined,
|
|
57
|
+
userQueryVector: undefined,
|
|
58
|
+
userQuerySparseVector: undefined,
|
|
59
|
+
}));
|
|
60
|
+
const retrieveForTurnMock = mock(async () => ({
|
|
61
|
+
nodes: [],
|
|
62
|
+
latencyMs: 1,
|
|
63
|
+
metrics: null,
|
|
64
|
+
queryVector: undefined,
|
|
65
|
+
sparseVector: undefined,
|
|
66
|
+
}));
|
|
49
67
|
mock.module("../retriever.js", () => ({
|
|
50
|
-
loadContextMemory:
|
|
51
|
-
|
|
52
|
-
serendipityNodes: [],
|
|
53
|
-
latencyMs: 1,
|
|
54
|
-
metrics: null,
|
|
55
|
-
queryVector: undefined,
|
|
56
|
-
sparseVector: undefined,
|
|
57
|
-
userQueryVector: undefined,
|
|
58
|
-
userQuerySparseVector: undefined,
|
|
59
|
-
}),
|
|
60
|
-
retrieveForTurn: async () => ({
|
|
61
|
-
nodes: [],
|
|
62
|
-
latencyMs: 1,
|
|
63
|
-
metrics: null,
|
|
64
|
-
queryVector: undefined,
|
|
65
|
-
sparseVector: undefined,
|
|
66
|
-
}),
|
|
68
|
+
loadContextMemory: loadContextMemoryMock,
|
|
69
|
+
retrieveForTurn: retrieveForTurnMock,
|
|
67
70
|
}));
|
|
68
71
|
|
|
69
72
|
// Programmable embedding + Qdrant state. Mirrors the pattern in
|
|
@@ -232,7 +235,7 @@ function makeMemory(): InstanceType<typeof ConversationGraphMemory> {
|
|
|
232
235
|
// `initialized = true` skips the context-load branch and the
|
|
233
236
|
// `fetchRecentSummaries` DB read it depends on, isolating the per-turn path
|
|
234
237
|
// for these unit tests. Context-load is covered by its own block below.
|
|
235
|
-
const m = new ConversationGraphMemory("
|
|
238
|
+
const m = new ConversationGraphMemory("conv-test-1");
|
|
236
239
|
(m as unknown as { initialized: boolean }).initialized = true;
|
|
237
240
|
return m;
|
|
238
241
|
}
|
|
@@ -262,6 +265,8 @@ beforeEach(() => {
|
|
|
262
265
|
testDbHandle = createTestDb();
|
|
263
266
|
qdrantState.queryResponses.dense.length = 0;
|
|
264
267
|
qdrantState.queryResponses.sparse.length = 0;
|
|
268
|
+
loadContextMemoryMock.mockClear();
|
|
269
|
+
retrieveForTurnMock.mockClear();
|
|
265
270
|
_resetMemoryV2QdrantForTests();
|
|
266
271
|
});
|
|
267
272
|
|
|
@@ -347,6 +352,9 @@ describe("ConversationGraphMemory.prepareMemory — v2 routing (per-turn path)",
|
|
|
347
352
|
expect(firstBlock.text.endsWith("\n</memory>")).toBe(true);
|
|
348
353
|
// No nested wrapper.
|
|
349
354
|
expect(firstBlock.text.match(/<memory>/g)?.length).toBe(1);
|
|
355
|
+
|
|
356
|
+
// v1 retrieval is fully bypassed when v2 is enabled.
|
|
357
|
+
expect(retrieveForTurnMock).not.toHaveBeenCalled();
|
|
350
358
|
});
|
|
351
359
|
|
|
352
360
|
test("reinjectCachedMemory after v2 injection wraps exactly once (no double-wrap)", async () => {
|
|
@@ -409,7 +417,7 @@ describe("ConversationGraphMemory.prepareMemory — v2 routing (context-load pat
|
|
|
409
417
|
stageTurn([{ slug: "alice-vscode", denseScore: 0.9 }]);
|
|
410
418
|
|
|
411
419
|
// Fresh memory → initialized=false → runContextLoad branch.
|
|
412
|
-
const memory = new ConversationGraphMemory("
|
|
420
|
+
const memory = new ConversationGraphMemory("conv-test-cl");
|
|
413
421
|
const config = makeConfig(true);
|
|
414
422
|
const messages = makeMessages("first message of the conversation here");
|
|
415
423
|
|
|
@@ -430,13 +438,16 @@ describe("ConversationGraphMemory.prepareMemory — v2 routing (context-load pat
|
|
|
430
438
|
const firstBlock = lastMsg?.content[0];
|
|
431
439
|
if (firstBlock?.type !== "text") throw new Error("unexpected block type");
|
|
432
440
|
expect(firstBlock.text.match(/<memory>/g)?.length).toBe(1);
|
|
441
|
+
|
|
442
|
+
// v1 retrieval is fully bypassed when v2 is enabled.
|
|
443
|
+
expect(loadContextMemoryMock).not.toHaveBeenCalled();
|
|
433
444
|
});
|
|
434
445
|
|
|
435
446
|
test("flag off → v2 not run on first turn either", async () => {
|
|
436
447
|
_setOverridesForTesting({ "memory-v2-enabled": false });
|
|
437
448
|
stageTurn([{ slug: "alice-vscode", denseScore: 0.9 }]);
|
|
438
449
|
|
|
439
|
-
const memory = new ConversationGraphMemory("
|
|
450
|
+
const memory = new ConversationGraphMemory("conv-test-cl-off");
|
|
440
451
|
const config = makeConfig(true);
|
|
441
452
|
const messages = makeMessages("first message of the conversation here");
|
|
442
453
|
|
|
@@ -62,14 +62,12 @@ export class ConversationGraphMemory {
|
|
|
62
62
|
private initialized = false;
|
|
63
63
|
private needsReload = false;
|
|
64
64
|
private stateRestored = false;
|
|
65
|
-
private scopeId: string;
|
|
66
65
|
private conversationId: string;
|
|
67
66
|
private lastInjectedBlock: string | null = null;
|
|
68
67
|
private lastInjectedNodeIds: string[] = [];
|
|
69
68
|
private lastInjectedImages: Map<string, ResolvedImage> = new Map();
|
|
70
69
|
|
|
71
|
-
constructor(
|
|
72
|
-
this.scopeId = scopeId;
|
|
70
|
+
constructor(conversationId: string) {
|
|
73
71
|
this.conversationId = conversationId;
|
|
74
72
|
}
|
|
75
73
|
|
|
@@ -147,7 +145,6 @@ export class ConversationGraphMemory {
|
|
|
147
145
|
const db = getDb();
|
|
148
146
|
const baseWhere = and(
|
|
149
147
|
eq(memorySummaries.scope, "conversation"),
|
|
150
|
-
eq(memorySummaries.scopeId, this.scopeId),
|
|
151
148
|
ne(memorySummaries.scopeKey, this.conversationId),
|
|
152
149
|
);
|
|
153
150
|
|
|
@@ -385,29 +382,11 @@ export class ConversationGraphMemory {
|
|
|
385
382
|
signal: AbortSignal,
|
|
386
383
|
onEvent: (msg: ServerMessage) => void,
|
|
387
384
|
) {
|
|
388
|
-
const result = await loadContextMemory({
|
|
389
|
-
scopeId: this.scopeId,
|
|
390
|
-
recentSummaries,
|
|
391
|
-
userQuery,
|
|
392
|
-
config,
|
|
393
|
-
signal,
|
|
394
|
-
});
|
|
395
|
-
|
|
396
|
-
this.initialized = true;
|
|
397
|
-
this.needsReload = false;
|
|
398
|
-
|
|
399
|
-
// v2 routing: when the feature flag and workspace config are both on,
|
|
400
|
-
// replace v1's injection with the activation-pipeline output. v1
|
|
401
|
-
// retrieval still runs above so its tracker stays warm — keeps the
|
|
402
|
-
// off→on→off flag flip cheap and avoids invalidating cached metrics.
|
|
403
|
-
// assistantMessage is empty: context-load fires on turn 1 / post-
|
|
404
|
-
// compaction, so there is no immediately-prior assistant turn to
|
|
405
|
-
// weight the activation against.
|
|
406
|
-
//
|
|
407
385
|
// Use the raw user text (no >10-char filter) so even short greetings
|
|
408
386
|
// ("hi") get a fresh top-K activation dump on the first user message.
|
|
409
|
-
// The activation pipeline is robust to weak ANN signal — it
|
|
410
|
-
//
|
|
387
|
+
// The activation pipeline is robust to weak ANN signal — it falls back
|
|
388
|
+
// to spreading + nowText to surface candidates.
|
|
389
|
+
const startedAt = Date.now();
|
|
411
390
|
const rawUserText = readRawUserText(messages[messages.length - 1]);
|
|
412
391
|
const v2 = await this.maybeRouteV2Injection(
|
|
413
392
|
messages,
|
|
@@ -415,7 +394,11 @@ export class ConversationGraphMemory {
|
|
|
415
394
|
"context-load",
|
|
416
395
|
rawUserText ?? userQuery ?? "",
|
|
417
396
|
"",
|
|
397
|
+
signal,
|
|
418
398
|
);
|
|
399
|
+
this.initialized = true;
|
|
400
|
+
this.needsReload = false;
|
|
401
|
+
|
|
419
402
|
if (v2.routed) {
|
|
420
403
|
this.lastInjectedBlock = v2.injectedBlockText;
|
|
421
404
|
this.lastInjectedNodeIds = [];
|
|
@@ -425,17 +408,22 @@ export class ConversationGraphMemory {
|
|
|
425
408
|
injectedTokens: v2.injectedBlockText
|
|
426
409
|
? estimateTextTokens(v2.injectedBlockText)
|
|
427
410
|
: 0,
|
|
428
|
-
latencyMs:
|
|
411
|
+
latencyMs: Date.now() - startedAt,
|
|
429
412
|
mode: "context-load" as const,
|
|
430
413
|
injectedBlockText: v2.injectedBlockText,
|
|
431
|
-
metrics:
|
|
432
|
-
queryVector: result.queryVector,
|
|
433
|
-
sparseVector: result.sparseVector,
|
|
434
|
-
userQueryVector: result.userQueryVector,
|
|
435
|
-
userQuerySparseVector: result.userQuerySparseVector,
|
|
414
|
+
metrics: null,
|
|
436
415
|
};
|
|
437
416
|
}
|
|
438
417
|
|
|
418
|
+
// v1 fallback — only reached when the v2 flag or workspace config is off.
|
|
419
|
+
const result = await loadContextMemory({
|
|
420
|
+
scopeId: "default",
|
|
421
|
+
recentSummaries,
|
|
422
|
+
userQuery,
|
|
423
|
+
config,
|
|
424
|
+
signal,
|
|
425
|
+
});
|
|
426
|
+
|
|
439
427
|
if (result.nodes.length === 0) {
|
|
440
428
|
this.lastInjectedBlock = null;
|
|
441
429
|
this.lastInjectedNodeIds = [];
|
|
@@ -543,27 +531,16 @@ export class ConversationGraphMemory {
|
|
|
543
531
|
if (userLastBlocks.length > 0 && assistantLast) break;
|
|
544
532
|
}
|
|
545
533
|
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
userLastMessageBlocks: userLastBlocks,
|
|
550
|
-
scopeId: this.scopeId,
|
|
551
|
-
config,
|
|
552
|
-
tracker: this.tracker,
|
|
553
|
-
signal,
|
|
554
|
-
});
|
|
555
|
-
|
|
556
|
-
// v2 routing: same gating as `runContextLoad` — when the flag and config
|
|
557
|
-
// are both on, the v2 activation pipeline produces the injection block
|
|
558
|
-
// (or `null` for the cache-stable empty path). v1 retrieval above runs
|
|
559
|
-
// unconditionally so the tracker stays in sync with the v1 nodes —
|
|
560
|
-
// cheap insurance for an off→on→off flag flip mid-conversation.
|
|
534
|
+
// v2 path — skip v1 retrieval entirely when v2 is enabled. See the
|
|
535
|
+
// matching comment in `runContextLoad` for rationale.
|
|
536
|
+
const startedAt = Date.now();
|
|
561
537
|
const v2 = await this.maybeRouteV2Injection(
|
|
562
538
|
messages,
|
|
563
539
|
config,
|
|
564
540
|
"per-turn",
|
|
565
541
|
userLast,
|
|
566
542
|
assistantLast,
|
|
543
|
+
signal,
|
|
567
544
|
);
|
|
568
545
|
if (v2.routed) {
|
|
569
546
|
this.lastInjectedBlock = v2.injectedBlockText;
|
|
@@ -574,15 +551,24 @@ export class ConversationGraphMemory {
|
|
|
574
551
|
injectedTokens: v2.injectedBlockText
|
|
575
552
|
? estimateTextTokens(v2.injectedBlockText)
|
|
576
553
|
: 0,
|
|
577
|
-
latencyMs:
|
|
554
|
+
latencyMs: Date.now() - startedAt,
|
|
578
555
|
mode: "per-turn" as const,
|
|
579
556
|
injectedBlockText: v2.injectedBlockText,
|
|
580
|
-
metrics:
|
|
581
|
-
queryVector: result.queryVector,
|
|
582
|
-
sparseVector: result.sparseVector,
|
|
557
|
+
metrics: null,
|
|
583
558
|
};
|
|
584
559
|
}
|
|
585
560
|
|
|
561
|
+
// v1 path (only reached when the v2 flag or workspace config is off).
|
|
562
|
+
const result = await retrieveForTurn({
|
|
563
|
+
assistantLastMessage: assistantLast,
|
|
564
|
+
userLastMessage: userLast,
|
|
565
|
+
userLastMessageBlocks: userLastBlocks,
|
|
566
|
+
scopeId: "default",
|
|
567
|
+
config,
|
|
568
|
+
tracker: this.tracker,
|
|
569
|
+
signal,
|
|
570
|
+
});
|
|
571
|
+
|
|
586
572
|
if (result.nodes.length === 0) {
|
|
587
573
|
this.lastInjectedBlock = null;
|
|
588
574
|
this.lastInjectedNodeIds = [];
|
|
@@ -641,12 +627,12 @@ export class ConversationGraphMemory {
|
|
|
641
627
|
}
|
|
642
628
|
|
|
643
629
|
/**
|
|
644
|
-
*
|
|
645
|
-
*
|
|
646
|
-
* config (`memory.v2.enabled`) are both on.
|
|
630
|
+
* Run the v2 activation pipeline when the `memory-v2-enabled` feature flag
|
|
631
|
+
* *and* the workspace config (`memory.v2.enabled`) are both on.
|
|
647
632
|
*
|
|
648
633
|
* The two outcomes the caller distinguishes via `routed`:
|
|
649
|
-
* - `routed: false` — v2 disabled; caller
|
|
634
|
+
* - `routed: false` — v2 disabled; caller falls through to the legacy v1
|
|
635
|
+
* retrieval path.
|
|
650
636
|
* - `routed: true` — v2 ran. `runMessages` is either the v2-prepended
|
|
651
637
|
* message list (block was non-null) or the input
|
|
652
638
|
* messages unchanged (cache-stable empty path).
|
|
@@ -658,6 +644,7 @@ export class ConversationGraphMemory {
|
|
|
658
644
|
mode: InjectMemoryV2Mode,
|
|
659
645
|
userMessage: string,
|
|
660
646
|
assistantMessage: string,
|
|
647
|
+
signal: AbortSignal,
|
|
661
648
|
): Promise<{
|
|
662
649
|
routed: boolean;
|
|
663
650
|
runMessages: Message[];
|
|
@@ -683,6 +670,7 @@ export class ConversationGraphMemory {
|
|
|
683
670
|
messageId: `${this.conversationId}:turn:${currentTurn}`,
|
|
684
671
|
mode,
|
|
685
672
|
config,
|
|
673
|
+
signal,
|
|
686
674
|
});
|
|
687
675
|
|
|
688
676
|
if (!result.block) {
|
|
@@ -1443,9 +1443,7 @@ async function findCandidateNodes(
|
|
|
1443
1443
|
const embedding = await embedWithRetry(config, [searchText]);
|
|
1444
1444
|
const queryVector = embedding.vectors[0];
|
|
1445
1445
|
if (queryVector) {
|
|
1446
|
-
const searchResults = await searchGraphNodes(queryVector, 100
|
|
1447
|
-
scopeId,
|
|
1448
|
-
]);
|
|
1446
|
+
const searchResults = await searchGraphNodes(queryVector, 100);
|
|
1449
1447
|
for (const r of searchResults) allNodeIds.add(r.nodeId);
|
|
1450
1448
|
}
|
|
1451
1449
|
} catch (err) {
|
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
import { beforeEach, describe, expect, mock, test } from "bun:test";
|
|
2
2
|
|
|
3
3
|
import { makeMockLogger } from "../../__tests__/helpers/mock-logger.js";
|
|
4
|
+
import { _setOverridesForTesting } from "../../config/assistant-feature-flags.js";
|
|
5
|
+
|
|
6
|
+
// This test exercises the v1 graph search path. The `memory-v2-enabled` flag
|
|
7
|
+
// (registry default `true`) makes graph-search short-circuit to keep traffic
|
|
8
|
+
// off the legacy collection — disable it so the v1 path stays under test.
|
|
9
|
+
_setOverridesForTesting({ "memory-v2-enabled": false });
|
|
4
10
|
|
|
5
11
|
mock.module("../../util/logger.js", () => ({
|
|
6
12
|
getLogger: () => makeMockLogger(),
|
|
@@ -64,7 +70,7 @@ describe("searchGraphNodes — _meta filter parity", () => {
|
|
|
64
70
|
});
|
|
65
71
|
|
|
66
72
|
test("hybrid path excludes _meta sentinel points", async () => {
|
|
67
|
-
await searchGraphNodes([0.1], 5,
|
|
73
|
+
await searchGraphNodes([0.1], 5, {
|
|
68
74
|
indices: [1],
|
|
69
75
|
values: [1],
|
|
70
76
|
});
|
|
@@ -80,7 +86,7 @@ describe("searchGraphNodes — _meta filter parity", () => {
|
|
|
80
86
|
});
|
|
81
87
|
|
|
82
88
|
test("dense-only path also excludes _meta sentinel points", async () => {
|
|
83
|
-
await searchGraphNodes([0.1], 5
|
|
89
|
+
await searchGraphNodes([0.1], 5);
|
|
84
90
|
|
|
85
91
|
expect(searchCalls).toHaveLength(1);
|
|
86
92
|
const filter = searchCalls[0]?.filter as {
|
|
@@ -93,69 +99,6 @@ describe("searchGraphNodes — _meta filter parity", () => {
|
|
|
93
99
|
});
|
|
94
100
|
});
|
|
95
101
|
|
|
96
|
-
describe("searchGraphNodes — excludeScopeIds", () => {
|
|
97
|
-
beforeEach(() => {
|
|
98
|
-
breakerOpen = false;
|
|
99
|
-
hybridSearchCalls.length = 0;
|
|
100
|
-
searchCalls.length = 0;
|
|
101
|
-
});
|
|
102
|
-
|
|
103
|
-
test("hybrid path adds memory_scope_id must_not when excludeScopeIds provided", async () => {
|
|
104
|
-
await searchGraphNodes(
|
|
105
|
-
[0.1],
|
|
106
|
-
5,
|
|
107
|
-
undefined,
|
|
108
|
-
{ indices: [1], values: [1] },
|
|
109
|
-
undefined,
|
|
110
|
-
["scope:abc", "scope:xyz"],
|
|
111
|
-
);
|
|
112
|
-
|
|
113
|
-
expect(hybridSearchCalls).toHaveLength(1);
|
|
114
|
-
const filter = hybridSearchCalls[0]?.filter as {
|
|
115
|
-
must_not: Array<Record<string, unknown>>;
|
|
116
|
-
};
|
|
117
|
-
const scopeExclude = filter.must_not.find(
|
|
118
|
-
(c) => c.key === "memory_scope_id",
|
|
119
|
-
) as { match: { any: string[] } } | undefined;
|
|
120
|
-
expect(scopeExclude?.match.any).toEqual(["scope:abc", "scope:xyz"]);
|
|
121
|
-
});
|
|
122
|
-
|
|
123
|
-
test("dense-only path adds memory_scope_id must_not when excludeScopeIds provided", async () => {
|
|
124
|
-
await searchGraphNodes([0.1], 5, undefined, undefined, undefined, [
|
|
125
|
-
"scope:abc",
|
|
126
|
-
]);
|
|
127
|
-
|
|
128
|
-
expect(searchCalls).toHaveLength(1);
|
|
129
|
-
const filter = searchCalls[0]?.filter as {
|
|
130
|
-
must_not: Array<Record<string, unknown>>;
|
|
131
|
-
};
|
|
132
|
-
const scopeExclude = filter.must_not.find(
|
|
133
|
-
(c) => c.key === "memory_scope_id",
|
|
134
|
-
) as { match: { any: string[] } } | undefined;
|
|
135
|
-
expect(scopeExclude?.match.any).toEqual(["scope:abc"]);
|
|
136
|
-
});
|
|
137
|
-
|
|
138
|
-
test("hybrid path omits memory_scope_id must_not when excludeScopeIds is empty", async () => {
|
|
139
|
-
await searchGraphNodes(
|
|
140
|
-
[0.1],
|
|
141
|
-
5,
|
|
142
|
-
undefined,
|
|
143
|
-
{ indices: [1], values: [1] },
|
|
144
|
-
undefined,
|
|
145
|
-
[],
|
|
146
|
-
);
|
|
147
|
-
|
|
148
|
-
expect(hybridSearchCalls).toHaveLength(1);
|
|
149
|
-
const filter = hybridSearchCalls[0]?.filter as {
|
|
150
|
-
must_not: Array<Record<string, unknown>>;
|
|
151
|
-
};
|
|
152
|
-
const scopeExclude = filter.must_not.find(
|
|
153
|
-
(c) => c.key === "memory_scope_id",
|
|
154
|
-
);
|
|
155
|
-
expect(scopeExclude).toBeUndefined();
|
|
156
|
-
});
|
|
157
|
-
});
|
|
158
|
-
|
|
159
102
|
describe("searchGraphNodes — prefetch floor", () => {
|
|
160
103
|
beforeEach(() => {
|
|
161
104
|
breakerOpen = false;
|
|
@@ -164,7 +107,7 @@ describe("searchGraphNodes — prefetch floor", () => {
|
|
|
164
107
|
});
|
|
165
108
|
|
|
166
109
|
test("hybrid prefetchLimit floors at 200 for small limits", async () => {
|
|
167
|
-
await searchGraphNodes([0.1], 10,
|
|
110
|
+
await searchGraphNodes([0.1], 10, {
|
|
168
111
|
indices: [1],
|
|
169
112
|
values: [1],
|
|
170
113
|
});
|
|
@@ -174,7 +117,7 @@ describe("searchGraphNodes — prefetch floor", () => {
|
|
|
174
117
|
});
|
|
175
118
|
|
|
176
119
|
test("hybrid prefetchLimit scales with limit when limit*10 exceeds floor", async () => {
|
|
177
|
-
await searchGraphNodes([0.1], 50,
|
|
120
|
+
await searchGraphNodes([0.1], 50, {
|
|
178
121
|
indices: [1],
|
|
179
122
|
values: [1],
|
|
180
123
|
});
|
|
@@ -38,17 +38,13 @@ export interface GraphSearchResult {
|
|
|
38
38
|
* Semantic search across graph nodes in Qdrant. Returns scored node IDs
|
|
39
39
|
* that the caller can hydrate from the graph store.
|
|
40
40
|
*
|
|
41
|
-
* Filters to `target_type: "graph_node"
|
|
42
|
-
* `excludeScopeIds` adds a `must_not` against `memory_scope_id` for callers
|
|
43
|
-
* that need to omit specific scopes from a broader recall search.
|
|
41
|
+
* Filters to `target_type: "graph_node"`.
|
|
44
42
|
*/
|
|
45
43
|
export async function searchGraphNodes(
|
|
46
44
|
queryVector: number[],
|
|
47
45
|
limit: number,
|
|
48
|
-
scopeIds?: string[],
|
|
49
46
|
sparseVector?: QdrantSparseVector,
|
|
50
47
|
dateRange?: { afterMs?: number; beforeMs?: number },
|
|
51
|
-
excludeScopeIds?: string[],
|
|
52
48
|
): Promise<GraphSearchResult[]> {
|
|
53
49
|
// v2 owns the read path when both gates are on. The v1 `memory` collection
|
|
54
50
|
// is in active retirement and a corrupted sparse segment can OOM-crash the
|
|
@@ -66,21 +62,12 @@ export async function searchGraphNodes(
|
|
|
66
62
|
const mustNot: Record<string, unknown>[] = [
|
|
67
63
|
{ key: "_meta", match: { value: true } },
|
|
68
64
|
];
|
|
69
|
-
if (excludeScopeIds && excludeScopeIds.length > 0) {
|
|
70
|
-
mustNot.push({
|
|
71
|
-
key: "memory_scope_id",
|
|
72
|
-
match: { any: excludeScopeIds },
|
|
73
|
-
});
|
|
74
|
-
}
|
|
75
65
|
|
|
76
66
|
// Use hybrid search (dense + sparse with RRF fusion) when a non-empty
|
|
77
67
|
// sparse vector is available; otherwise fall back to dense-only search.
|
|
78
68
|
if (sparseVector && sparseVector.indices.length > 0) {
|
|
79
69
|
const must: Record<string, unknown>[] = [
|
|
80
70
|
{ key: "target_type", match: { value: "graph_node" } },
|
|
81
|
-
...(scopeIds && scopeIds.length > 0
|
|
82
|
-
? [{ key: "memory_scope_id", match: { any: scopeIds } }]
|
|
83
|
-
: []),
|
|
84
71
|
];
|
|
85
72
|
if (dateRange?.afterMs != null) {
|
|
86
73
|
must.push({ key: "created_at", range: { gte: dateRange.afterMs } });
|
|
@@ -120,12 +107,6 @@ export async function searchGraphNodes(
|
|
|
120
107
|
},
|
|
121
108
|
];
|
|
122
109
|
|
|
123
|
-
if (scopeIds && scopeIds.length > 0) {
|
|
124
|
-
denseMusts.push({
|
|
125
|
-
key: "memory_scope_id",
|
|
126
|
-
match: { any: scopeIds },
|
|
127
|
-
});
|
|
128
|
-
}
|
|
129
110
|
if (dateRange?.afterMs != null) {
|
|
130
111
|
denseMusts.push({ key: "created_at", range: { gte: dateRange.afterMs } });
|
|
131
112
|
}
|
|
@@ -71,6 +71,7 @@ mock.module("../../providers/provider-send-message.js", () => ({
|
|
|
71
71
|
extractToolUse: () => null,
|
|
72
72
|
}));
|
|
73
73
|
|
|
74
|
+
import { _setOverridesForTesting } from "../../config/assistant-feature-flags.js";
|
|
74
75
|
import { DEFAULT_CONFIG } from "../../config/defaults.js";
|
|
75
76
|
import type { AssistantConfig } from "../../config/types.js";
|
|
76
77
|
import { resetDb } from "../db-connection.js";
|
|
@@ -81,6 +82,11 @@ import { loadContextMemory, retrieveForTurn } from "./retriever.js";
|
|
|
81
82
|
import { createNode } from "./store.js";
|
|
82
83
|
import type { NewNode } from "./types.js";
|
|
83
84
|
|
|
85
|
+
// These tests exercise v1 retrieval. v2 takeover (both `memory-v2-enabled`
|
|
86
|
+
// flag *and* `memory.v2.enabled` schema field) makes `loadContextMemory`
|
|
87
|
+
// short-circuit, so disable the flag here to keep the v1 path under test.
|
|
88
|
+
_setOverridesForTesting({ "memory-v2-enabled": false });
|
|
89
|
+
|
|
84
90
|
const TEST_CONFIG: AssistantConfig = { ...DEFAULT_CONFIG };
|
|
85
91
|
|
|
86
92
|
function makeCapabilityNode(content: string, capId: string): NewNode {
|