@vellumai/assistant 0.7.2 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +16 -1
- package/docs/architecture/memory.md +5 -2
- package/node_modules/@vellumai/gateway-client/src/ipc-client.ts +13 -4
- package/node_modules/@vellumai/skill-host-contracts/src/assistant-event.ts +0 -9
- package/node_modules/@vellumai/slack-text/src/index.test.ts +18 -35
- package/node_modules/@vellumai/slack-text/src/index.ts +2 -48
- package/openapi.yaml +449 -22
- package/package.json +1 -1
- package/src/__tests__/app-control-flow.test.ts +21 -11
- package/src/__tests__/assistant-event-hub.test.ts +48 -0
- package/src/__tests__/assistant-event.test.ts +0 -10
- package/src/__tests__/assistant-events-sse-hardening.test.ts +2 -7
- package/src/__tests__/assistant-feature-flags-integration.test.ts +18 -0
- package/src/__tests__/auto-analysis-end-to-end.test.ts +62 -1
- package/src/__tests__/background-workers-disk-pressure.test.ts +268 -0
- package/src/__tests__/call-conversation-messages.test.ts +8 -2
- package/src/__tests__/channel-inbound-disk-pressure.test.ts +537 -0
- package/src/__tests__/channel-readiness-service.test.ts +4 -2
- package/src/__tests__/config-loader-backfill.test.ts +379 -0
- package/src/__tests__/config-schema.test.ts +1 -0
- package/src/__tests__/config-watcher-cleanup-throttle.test.ts +18 -9
- package/src/__tests__/config-watcher.test.ts +140 -69
- package/src/__tests__/context-search-agent-runner.test.ts +61 -3
- package/src/__tests__/context-search-conversations-source.test.ts +0 -24
- package/src/__tests__/context-search-fanout.test.ts +0 -1
- package/src/__tests__/context-search-memory-source.test.ts +3 -7
- package/src/__tests__/context-search-memory-v2-source.test.ts +0 -2
- package/src/__tests__/context-search-pkb-source.test.ts +0 -1
- package/src/__tests__/context-search-workspace-source.test.ts +0 -1
- package/src/__tests__/conversation-abort-tool-results.test.ts +6 -0
- package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +223 -0
- package/src/__tests__/conversation-agent-loop.test.ts +454 -5
- package/src/__tests__/conversation-error.test.ts +150 -3
- package/src/__tests__/conversation-process-callsite.test.ts +43 -0
- package/src/__tests__/conversation-provider-retry-repair.test.ts +6 -0
- package/src/__tests__/conversation-runtime-assembly.test.ts +65 -0
- package/src/__tests__/conversation-slash-unknown.test.ts +6 -0
- package/src/__tests__/conversation-speed-override.test.ts +0 -3
- package/src/__tests__/conversation-store.test.ts +0 -18
- package/src/__tests__/conversation-surfaces-app-control.test.ts +15 -4
- package/src/__tests__/conversation-surfaces-data-persist.test.ts +404 -0
- package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +2 -5
- package/src/__tests__/conversation-workspace-injection.test.ts +6 -0
- package/src/__tests__/conversation-workspace-tool-tracking.test.ts +6 -0
- package/src/__tests__/credentials-cli.test.ts +7 -0
- package/src/__tests__/cu-unified-flow.test.ts +176 -10
- package/src/__tests__/date-context.test.ts +164 -2
- package/src/__tests__/disk-pressure-guard.test.ts +262 -0
- package/src/__tests__/disk-pressure-lifecycle.test.ts +168 -0
- package/src/__tests__/disk-pressure-policy.test.ts +241 -0
- package/src/__tests__/disk-pressure-routes.test.ts +379 -0
- package/src/__tests__/disk-pressure-tools.test.ts +277 -0
- package/src/__tests__/disk-usage.test.ts +150 -0
- package/src/__tests__/events-client-registration.test.ts +52 -0
- package/src/__tests__/events-dev-bypass-actor.test.ts +162 -0
- package/src/__tests__/file-write-tool.test.ts +4 -10
- package/src/__tests__/filing-service.test.ts +3 -4
- package/src/__tests__/heartbeat-disk-pressure.test.ts +183 -0
- package/src/__tests__/heartbeat-service.test.ts +260 -11
- package/src/__tests__/host-app-control-proxy.test.ts +195 -25
- package/src/__tests__/host-bash-proxy.test.ts +227 -34
- package/src/__tests__/host-bash-routes.test.ts +178 -13
- package/src/__tests__/host-cu-proxy.test.ts +210 -3
- package/src/__tests__/host-cu-routes-targeted.test.ts +141 -12
- package/src/__tests__/host-file-proxy-targeted.test.ts +48 -9
- package/src/__tests__/host-file-proxy.test.ts +268 -6
- package/src/__tests__/host-file-routes-targeted.test.ts +175 -17
- package/src/__tests__/host-transfer-proxy-targeted.test.ts +408 -59
- package/src/__tests__/host-transfer-routes-targeted.test.ts +232 -17
- package/src/__tests__/http-user-message-parity.test.ts +107 -1
- package/src/__tests__/injector-chain.test.ts +18 -6
- package/src/__tests__/injector-disk-pressure.test.ts +224 -0
- package/src/__tests__/managed-profile-guard.test.ts +18 -0
- package/src/__tests__/mcp-abort-signal.test.ts +130 -0
- package/src/__tests__/memory-admin-recall.test.ts +3 -11
- package/src/__tests__/memory-retrieval-pipeline.test.ts +22 -1
- package/src/__tests__/normalize-onboarding.test.ts +180 -0
- package/src/__tests__/oauth-connect-routes.test.ts +316 -0
- package/src/__tests__/oauth-provider-seed-logos.test.ts +24 -2
- package/src/__tests__/onboarding-persona-write.test.ts +308 -0
- package/src/__tests__/openai-provider.test.ts +45 -8
- package/src/__tests__/persist-onboarding-artifacts.test.ts +44 -64
- package/src/__tests__/platform-callback-registration.test.ts +21 -4
- package/src/__tests__/platform.test.ts +2 -1
- package/src/__tests__/playbook-execution.test.ts +0 -43
- package/src/__tests__/plugin-tool-contribution.test.ts +47 -0
- package/src/__tests__/prechat-onboarding-contract.test.ts +214 -27
- package/src/__tests__/provider-tool-name.test.ts +23 -0
- package/src/__tests__/relay-server.test.ts +15 -4
- package/src/__tests__/runtime-events-sse.test.ts +4 -8
- package/src/__tests__/scheduler-disk-pressure.test.ts +148 -0
- package/src/__tests__/secret-ingress-http.test.ts +0 -1
- package/src/__tests__/suggestion-routes.test.ts +46 -0
- package/src/__tests__/twilio-validation.test.ts +2 -2
- package/src/__tests__/workspace-migration-065-bump-stale-heartbeat-interval.test.ts +122 -0
- package/src/__tests__/workspace-migration-066-seed-heartbeat-callsite-cost-default.test.ts +285 -0
- package/src/__tests__/workspace-migration-068-release-notes-local-timezone.test.ts +90 -0
- package/src/__tests__/workspace-migration-safe-storage-limits-release.test.ts +90 -0
- package/src/approvals/guardian-decision-primitive.ts +13 -0
- package/src/approvals/guardian-request-resolvers.ts +16 -17
- package/src/backup/snapshot-lock.ts +2 -27
- package/src/bundler/compiler-tools.ts +3 -2
- package/src/calls/call-conversation-messages.ts +46 -10
- package/src/cli/commands/__tests__/webhooks.test.ts +0 -4
- package/src/cli/commands/bash.ts +35 -108
- package/src/cli/commands/contacts.ts +64 -25
- package/src/cli/commands/credentials.ts +56 -0
- package/src/cli/commands/memory-v2.ts +7 -6
- package/src/cli/commands/oauth/__tests__/connect.test.ts +437 -1
- package/src/cli/commands/oauth/connect.ts +127 -1
- package/src/cli/commands/platform/__tests__/callback-routes-list.test.ts +0 -3
- package/src/cli/commands/platform/__tests__/connect.test.ts +7 -1
- package/src/cli/commands/platform/__tests__/disconnect.test.ts +7 -1
- package/src/cli/commands/platform/__tests__/status.test.ts +103 -6
- package/src/cli/commands/platform/index.ts +16 -7
- package/src/cli/commands/status.ts +57 -0
- package/src/cli/program.ts +4 -2
- package/src/config/assistant-feature-flags.ts +13 -3
- package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +4 -3
- package/src/config/bundled-skills/phone-calls/references/TROUBLESHOOTING.md +13 -7
- package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +2 -2
- package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +2 -2
- package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +2 -2
- package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +2 -2
- package/src/config/env.ts +0 -8
- package/src/config/feature-flag-registry.json +27 -3
- package/src/config/loader.ts +127 -8
- package/src/config/schemas/__tests__/memory-v2.test.ts +10 -5
- package/src/config/schemas/call-site-catalog.ts +14 -0
- package/src/config/schemas/channels.ts +0 -5
- package/src/config/schemas/heartbeat.ts +1 -1
- package/src/config/schemas/llm.ts +2 -0
- package/src/config/schemas/memory-lifecycle.ts +13 -0
- package/src/config/schemas/memory-v2.ts +75 -11
- package/src/config/schemas/platform.ts +43 -3
- package/src/config/schemas/services.ts +28 -0
- package/src/config/seed-inference-profiles.ts +230 -33
- package/src/contacts/contact-store.ts +0 -25
- package/src/daemon/__tests__/conversation-tool-setup.test.ts +86 -25
- package/src/daemon/assistant-attachments.ts +4 -4
- package/src/daemon/config-watcher.ts +85 -57
- package/src/daemon/conversation-agent-loop-handlers.ts +6 -0
- package/src/daemon/conversation-agent-loop.ts +170 -33
- package/src/daemon/conversation-error.ts +87 -15
- package/src/daemon/conversation-lifecycle.ts +1 -3
- package/src/daemon/conversation-process.ts +8 -0
- package/src/daemon/conversation-runtime-assembly.ts +26 -0
- package/src/daemon/conversation-store.ts +2 -2
- package/src/daemon/conversation-surfaces.ts +195 -15
- package/src/daemon/conversation-tool-setup.ts +57 -14
- package/src/daemon/conversation.ts +17 -22
- package/src/daemon/date-context.ts +71 -22
- package/src/daemon/disk-pressure-background-gate.ts +73 -0
- package/src/daemon/disk-pressure-guard.ts +343 -0
- package/src/daemon/disk-pressure-policy.ts +163 -0
- package/src/daemon/handlers/shared.ts +0 -1
- package/src/daemon/handlers/skills.ts +3 -4
- package/src/daemon/host-app-control-proxy.ts +137 -41
- package/src/daemon/host-bash-proxy.ts +46 -21
- package/src/daemon/host-cu-proxy.ts +49 -3
- package/src/daemon/host-file-proxy.ts +43 -7
- package/src/daemon/host-transfer-proxy.ts +95 -4
- package/src/daemon/lifecycle.ts +79 -28
- package/src/daemon/meet-host-supervisor.ts +4 -4
- package/src/daemon/meet-manifest-loader.ts +0 -1
- package/src/daemon/memory-v2-startup.ts +14 -4
- package/src/daemon/message-protocol.ts +3 -0
- package/src/daemon/message-types/conversations.ts +4 -0
- package/src/daemon/message-types/disk-pressure.ts +9 -0
- package/src/daemon/message-types/messages.ts +3 -0
- package/src/daemon/profiler-run-store.ts +5 -5
- package/src/daemon/tool-setup-types.ts +2 -2
- package/src/documents/document-store.ts +85 -0
- package/src/filing/filing-service.ts +30 -5
- package/src/heartbeat/__tests__/heartbeat-feed-event.test.ts +9 -16
- package/src/heartbeat/__tests__/heartbeat-run-store.test.ts +36 -0
- package/src/heartbeat/heartbeat-run-store.ts +13 -0
- package/src/heartbeat/heartbeat-service.ts +205 -31
- package/src/home/feed-scheduler.ts +18 -0
- package/src/inbound/platform-callback-registration.ts +8 -15
- package/src/ipc/__tests__/clients-list-ipc.test.ts +169 -0
- package/src/ipc/assistant-server.ts +56 -2
- package/src/ipc/gateway-client.ts +37 -3
- package/src/live-voice/live-voice-archive.ts +4 -4
- package/src/live-voice/protocol.ts +5 -7
- package/src/media/image-service.ts +1 -7
- package/src/memory/__tests__/fixtures/memory-v2-activation-fixtures.ts +21 -13
- package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +52 -22
- package/src/memory/__tests__/memory-v2-activation-log-store.test.ts +0 -6
- package/src/memory/__tests__/memory-v2-concept-frequency.test.ts +272 -0
- package/src/memory/admin.ts +5 -9
- package/src/memory/context-search/agent-runner.ts +19 -2
- package/src/memory/context-search/sources/conversations.ts +2 -11
- package/src/memory/context-search/sources/memory-v2.ts +5 -4
- package/src/memory/context-search/sources/memory.ts +0 -1
- package/src/memory/context-search/types.ts +0 -1
- package/src/memory/conversation-crud.ts +4 -12
- package/src/memory/db-init.ts +2 -0
- package/src/memory/embedding-runtime-manager.ts +119 -5
- package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +32 -21
- package/src/memory/graph/conversation-graph-memory.ts +42 -54
- package/src/memory/graph/extraction.ts +1 -3
- package/src/memory/graph/graph-search.test.ts +10 -67
- package/src/memory/graph/graph-search.ts +1 -20
- package/src/memory/graph/retriever.test.ts +6 -0
- package/src/memory/graph/retriever.ts +6 -10
- package/src/memory/indexer.ts +54 -45
- package/src/memory/job-handlers/backfill.ts +2 -11
- package/src/memory/job-handlers/cleanup.ts +43 -0
- package/src/memory/job-handlers/embedding.ts +6 -8
- package/src/memory/job-handlers/summarization.ts +2 -7
- package/src/memory/jobs-store.ts +48 -0
- package/src/memory/jobs-worker.ts +81 -43
- package/src/memory/memory-v2-activation-log-store.ts +32 -14
- package/src/memory/memory-v2-concept-frequency.ts +169 -0
- package/src/memory/migrations/239-trace-events-created-at-index.ts +18 -0
- package/src/memory/migrations/index.ts +1 -0
- package/src/memory/pkb/pkb-search.test.ts +6 -0
- package/src/memory/qdrant-client.ts +0 -13
- package/src/memory/rerank-local.ts +374 -0
- package/src/memory/search/semantic.ts +6 -67
- package/src/memory/trace-event-store.ts +1 -17
- package/src/memory/v2/__tests__/activation.test.ts +311 -250
- package/src/memory/v2/__tests__/consolidation-job.test.ts +40 -8
- package/src/memory/v2/__tests__/injection.test.ts +157 -167
- package/src/memory/v2/__tests__/prompts-consolidation.test.ts +61 -2
- package/src/memory/v2/__tests__/qdrant.test.ts +16 -0
- package/src/memory/v2/__tests__/reranker.test.ts +338 -0
- package/src/memory/v2/__tests__/sim.test.ts +5 -199
- package/src/memory/v2/__tests__/skill-store.test.ts +71 -65
- package/src/memory/v2/__tests__/static-context.test.ts +76 -1
- package/src/memory/v2/activation.ts +149 -156
- package/src/memory/v2/consolidation-job.ts +62 -12
- package/src/memory/v2/injection.ts +47 -60
- package/src/memory/v2/prompts/consolidation.ts +36 -1
- package/src/memory/v2/qdrant.ts +99 -0
- package/src/memory/v2/reranker.ts +177 -0
- package/src/memory/v2/sim.ts +10 -84
- package/src/memory/v2/skill-content.ts +4 -3
- package/src/memory/v2/skill-store.ts +82 -59
- package/src/memory/v2/static-context.ts +22 -0
- package/src/memory/v2/types.ts +10 -10
- package/src/notifications/copy-composer.ts +13 -0
- package/src/notifications/signal.ts +4 -0
- package/src/oauth/AGENTS.md +3 -1
- package/src/oauth/__tests__/oauth-connect-state.test.ts +137 -0
- package/src/oauth/connect-orchestrator.ts +2 -0
- package/src/oauth/connection-resolver.test.ts +66 -1
- package/src/oauth/connection-resolver.ts +55 -1
- package/src/oauth/oauth-connect-state.ts +77 -0
- package/src/oauth/seed-providers.ts +58 -1
- package/src/plugins/defaults/injectors.ts +35 -2
- package/src/plugins/defaults/memory-retrieval.ts +5 -6
- package/src/plugins/types.ts +7 -0
- package/src/proactive-artifact/aux-message-injector.ts +74 -0
- package/src/proactive-artifact/decision.test.ts +226 -0
- package/src/proactive-artifact/decision.ts +165 -0
- package/src/proactive-artifact/index.ts +7 -0
- package/src/proactive-artifact/job.test.ts +867 -0
- package/src/proactive-artifact/job.ts +352 -0
- package/src/proactive-artifact/message-copy.ts +41 -0
- package/src/proactive-artifact/trigger-state.test.ts +277 -0
- package/src/proactive-artifact/trigger-state.ts +119 -0
- package/src/prompts/normalize-onboarding.ts +80 -0
- package/src/prompts/persona-resolver.ts +101 -9
- package/src/prompts/system-prompt.ts +21 -7
- package/src/prompts/templates/BOOTSTRAP.md +13 -5
- package/src/providers/__tests__/retry-callsite.test.ts +222 -1
- package/src/providers/model-intents.ts +7 -0
- package/src/providers/openrouter/client.ts +8 -0
- package/src/providers/retry.ts +50 -0
- package/src/providers/types.ts +1 -0
- package/src/runtime/__tests__/agent-wake.test.ts +456 -3
- package/src/runtime/agent-wake.ts +238 -100
- package/src/runtime/assistant-event-hub.ts +36 -6
- package/src/runtime/assistant-event.ts +0 -1
- package/src/runtime/auth/__tests__/route-policy.test.ts +64 -0
- package/src/runtime/auth/route-policy.ts +14 -1
- package/src/runtime/auth/same-actor.ts +216 -0
- package/src/runtime/channel-retry-sweep.ts +65 -1
- package/src/runtime/guardian-reply-router.ts +10 -0
- package/src/runtime/local-actor-identity.ts +52 -11
- package/src/runtime/pending-interactions.ts +8 -0
- package/src/runtime/routes/__tests__/client-routes.test.ts +155 -0
- package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +0 -5
- package/src/runtime/routes/__tests__/heartbeat-routes.test.ts +1 -1
- package/src/runtime/routes/client-routes.ts +20 -2
- package/src/runtime/routes/contact-routes.ts +0 -25
- package/src/runtime/routes/conversation-routes.ts +35 -26
- package/src/runtime/routes/debug-bash-routes.ts +163 -0
- package/src/runtime/routes/disk-pressure-routes.ts +121 -0
- package/src/runtime/routes/document-pdf-renderer.ts +6 -2
- package/src/runtime/routes/documents-routes.ts +2 -75
- package/src/runtime/routes/events-routes.ts +41 -9
- package/src/runtime/routes/host-bash-routes.ts +23 -3
- package/src/runtime/routes/host-cu-routes.ts +33 -6
- package/src/runtime/routes/host-file-routes.ts +32 -6
- package/src/runtime/routes/host-transfer-routes.ts +79 -16
- package/src/runtime/routes/identity-routes.ts +7 -138
- package/src/runtime/routes/inbound-message-handler.ts +77 -12
- package/src/runtime/routes/inbound-stages/guardian-reply-intercept.ts +3 -0
- package/src/runtime/routes/index.ts +6 -0
- package/src/runtime/routes/memory-item-routes.test.ts +41 -15
- package/src/runtime/routes/memory-v2-routes.ts +33 -0
- package/src/runtime/routes/oauth-connect-routes.ts +153 -0
- package/src/runtime/verification-outbound-actions.ts +4 -4
- package/src/schedule/run-script.ts +37 -5
- package/src/schedule/scheduler.ts +20 -1
- package/src/security/encrypted-store.ts +2 -0
- package/src/security/secure-keys.ts +55 -0
- package/src/skills/remote-skill-policy.ts +4 -10
- package/src/subagent/index.ts +1 -7
- package/src/subagent/manager.ts +1 -15
- package/src/tasks/task-runner.ts +0 -1
- package/src/tasks/task-store.ts +0 -3
- package/src/tools/background-tool-registry.ts +17 -3
- package/src/tools/host-filesystem/edit.test.ts +151 -0
- package/src/tools/host-filesystem/edit.ts +43 -1
- package/src/tools/host-filesystem/read.test.ts +129 -0
- package/src/tools/host-filesystem/read.ts +43 -1
- package/src/tools/host-filesystem/transfer.test.ts +127 -2
- package/src/tools/host-filesystem/transfer.ts +56 -11
- package/src/tools/host-filesystem/write.test.ts +134 -0
- package/src/tools/host-filesystem/write.ts +43 -1
- package/src/tools/host-terminal/host-shell.ts +13 -6
- package/src/tools/mcp/mcp-tool-factory.ts +2 -1
- package/src/tools/memory/register.test.ts +12 -9
- package/src/tools/memory/register.ts +1 -2
- package/src/tools/provider-tool-name.ts +28 -0
- package/src/tools/registry.ts +30 -9
- package/src/tools/terminal/shell.ts +9 -1
- package/src/tools/tool-approval-handler.ts +31 -6
- package/src/tools/types.ts +24 -2
- package/src/tts/provider-catalog.ts +3 -5
- package/src/util/disk-usage.ts +138 -0
- package/src/util/platform.ts +21 -11
- package/src/util/process-liveness.ts +26 -0
- package/src/workspace/heartbeat-service.ts +19 -0
- package/src/workspace/migrations/065-bump-stale-heartbeat-interval.ts +60 -0
- package/src/workspace/migrations/066-seed-heartbeat-callsite-cost-default.ts +146 -0
- package/src/workspace/migrations/067-release-notes-safe-storage-limits.ts +72 -0
- package/src/workspace/migrations/068-release-notes-local-timezone.ts +65 -0
- package/src/workspace/migrations/registry.ts +8 -0
- package/src/__tests__/conversation-tool-setup-memory-scope.test.ts +0 -167
- package/src/memory/v2/__tests__/skill-qdrant.test.ts +0 -657
- package/src/memory/v2/skill-qdrant.ts +0 -404
- package/src/signals/bash.ts +0 -198
|
@@ -29,21 +29,18 @@ import { getWorkspaceDir } from "../../util/platform.js";
|
|
|
29
29
|
import type { DrizzleDb } from "../db-connection.js";
|
|
30
30
|
import {
|
|
31
31
|
type MemoryV2ConceptRowRecord,
|
|
32
|
-
type MemoryV2SkillRowRecord,
|
|
33
32
|
recordMemoryV2ActivationLog,
|
|
34
33
|
} from "../memory-v2-activation-log-store.js";
|
|
35
34
|
import {
|
|
36
35
|
computeOwnActivation,
|
|
37
|
-
computeSkillActivation,
|
|
38
36
|
selectCandidates,
|
|
39
37
|
selectInjections,
|
|
40
|
-
selectSkillInjections,
|
|
41
38
|
spreadActivation,
|
|
42
39
|
} from "./activation.js";
|
|
43
40
|
import { hydrate, save } from "./activation-store.js";
|
|
44
41
|
import { getEdgeIndex } from "./edge-index.js";
|
|
45
42
|
import { readPage, renderPageContent } from "./page-store.js";
|
|
46
|
-
import {
|
|
43
|
+
import { getSkillCapability, isSkillSlug } from "./skill-store.js";
|
|
47
44
|
import type { ActivationState, EverInjectedEntry } from "./types.js";
|
|
48
45
|
|
|
49
46
|
const log = getLogger("memory-v2-injection");
|
|
@@ -84,6 +81,7 @@ export interface InjectMemoryV2BlockParams {
|
|
|
84
81
|
*/
|
|
85
82
|
mode?: InjectMemoryV2Mode;
|
|
86
83
|
config: AssistantConfig;
|
|
84
|
+
signal?: AbortSignal;
|
|
87
85
|
}
|
|
88
86
|
|
|
89
87
|
export interface InjectMemoryV2BlockResult {
|
|
@@ -127,30 +125,36 @@ export async function injectMemoryV2Block(
|
|
|
127
125
|
nowText,
|
|
128
126
|
messageId,
|
|
129
127
|
config,
|
|
128
|
+
signal,
|
|
130
129
|
} = params;
|
|
131
130
|
|
|
132
131
|
const workspaceDir = getWorkspaceDir();
|
|
133
132
|
|
|
134
133
|
// (1) Hydrate. Missing rows are normal at conversation start — proceed
|
|
135
134
|
// with an effective empty prior state so the first turn can still inject.
|
|
135
|
+
throwIfAborted(signal);
|
|
136
136
|
const priorState = await hydrate(database, conversationId);
|
|
137
137
|
|
|
138
138
|
// (2) Topology. `getEdgeIndex` walks concept-page frontmatter and caches
|
|
139
139
|
// the result module-locally; an empty workspace yields an empty index.
|
|
140
|
+
throwIfAborted(signal);
|
|
140
141
|
const edgeIndex = await getEdgeIndex(workspaceDir);
|
|
141
142
|
|
|
142
143
|
// (3) Candidate set: prior-state survivors above epsilon ∪ ANN top-50.
|
|
143
144
|
// `selectCandidates` also returns `fromPrior` / `fromAnn` provenance sets so
|
|
144
145
|
// telemetry can attribute each candidate back to its source.
|
|
146
|
+
throwIfAborted(signal);
|
|
145
147
|
const { candidates, fromPrior, fromAnn } = await selectCandidates({
|
|
146
148
|
priorState,
|
|
147
149
|
userText: userMessage,
|
|
148
150
|
assistantText: assistantMessage,
|
|
149
151
|
nowText,
|
|
150
152
|
config,
|
|
153
|
+
signal,
|
|
151
154
|
});
|
|
152
155
|
|
|
153
156
|
// (4) Own activation: A_o = d·prev + c_user·sim_u + c_a·sim_a + c_now·sim_n.
|
|
157
|
+
throwIfAborted(signal);
|
|
154
158
|
const { activation: ownActivation, breakdown: ownBreakdown } =
|
|
155
159
|
await computeOwnActivation({
|
|
156
160
|
candidates,
|
|
@@ -159,9 +163,11 @@ export async function injectMemoryV2Block(
|
|
|
159
163
|
assistantText: assistantMessage,
|
|
160
164
|
nowText,
|
|
161
165
|
config,
|
|
166
|
+
signal,
|
|
162
167
|
});
|
|
163
168
|
|
|
164
169
|
// (5) Spreading activation across the edge graph (k, hops from config).
|
|
170
|
+
throwIfAborted(signal);
|
|
165
171
|
const { k, hops, top_k, epsilon } = config.memory.v2;
|
|
166
172
|
const { final: finalActivation, contribution: spreadContribution } =
|
|
167
173
|
spreadActivation(ownActivation, edgeIndex, k, hops);
|
|
@@ -182,25 +188,6 @@ export async function injectMemoryV2Block(
|
|
|
182
188
|
});
|
|
183
189
|
const slugsToRender = mode === "context-load" ? topNow : toInject;
|
|
184
190
|
|
|
185
|
-
// (6b) Skill pipeline — a sibling pipeline to the concept-page one above.
|
|
186
|
-
// Skills are stateless (no decay, no spread, no `everInjected` dedup) and
|
|
187
|
-
// the catalog is small, so every known skill is scored every turn. The
|
|
188
|
-
// top-K injection slate is re-presented every turn so the agent can drop
|
|
189
|
-
// and pick skills up freely; the inspector renders the full ranked list.
|
|
190
|
-
const skillCandidates = new Set(getAllSkillIds());
|
|
191
|
-
const { activation: skillActivation, breakdown: skillBreakdown } =
|
|
192
|
-
await computeSkillActivation({
|
|
193
|
-
candidates: skillCandidates,
|
|
194
|
-
userText: userMessage,
|
|
195
|
-
assistantText: assistantMessage,
|
|
196
|
-
nowText,
|
|
197
|
-
config,
|
|
198
|
-
});
|
|
199
|
-
const { topNow: topSkillIds } = selectSkillInjections({
|
|
200
|
-
A: skillActivation,
|
|
201
|
-
topK: config.memory.v2.top_k_skills,
|
|
202
|
-
});
|
|
203
|
-
|
|
204
191
|
// Build the next persisted state regardless of whether we render anything:
|
|
205
192
|
// even on a "no new injection" turn, prior-state activations decay via the
|
|
206
193
|
// candidate-set carry-forward and need to be rewritten so `epsilon`-trimmed
|
|
@@ -215,8 +202,10 @@ export async function injectMemoryV2Block(
|
|
|
215
202
|
// just rendered all of them); on per-turn it's just the newly added slugs.
|
|
216
203
|
// We append rather than reset so that compaction-driven eviction
|
|
217
204
|
// (`evictCompactedTurns`) is the only path that can re-enable a previously-
|
|
218
|
-
// injected slug.
|
|
219
|
-
//
|
|
205
|
+
// injected slug. Skill slugs (`skills/<id>`) participate in this dedup just
|
|
206
|
+
// like concept slugs — once attached on a turn, the cached attachment lives
|
|
207
|
+
// on that user message and the agent keeps seeing it across subsequent turns
|
|
208
|
+
// until compaction evicts the turn.
|
|
220
209
|
const everInjectedSet = new Set(priorEverInjected.map((entry) => entry.slug));
|
|
221
210
|
const newlyInjected = slugsToRender.filter(
|
|
222
211
|
(slug) => !everInjectedSet.has(slug),
|
|
@@ -243,7 +232,6 @@ export async function injectMemoryV2Block(
|
|
|
243
232
|
const { block, missingSlugs } = await renderInjectionBlock(
|
|
244
233
|
workspaceDir,
|
|
245
234
|
slugsToRender,
|
|
246
|
-
topSkillIds,
|
|
247
235
|
);
|
|
248
236
|
const missingSlugSet = new Set(missingSlugs);
|
|
249
237
|
if (missingSlugs.length > 0) {
|
|
@@ -262,7 +250,6 @@ export async function injectMemoryV2Block(
|
|
|
262
250
|
// block memory injection.
|
|
263
251
|
const toInjectSet = new Set(toInject);
|
|
264
252
|
const renderedSet = new Set(slugsToRender);
|
|
265
|
-
const topSkillIdSet = new Set(topSkillIds);
|
|
266
253
|
const conceptRows: MemoryV2ConceptRowRecord[] = [...candidates].map(
|
|
267
254
|
(slug) => {
|
|
268
255
|
const breakdown = ownBreakdown.get(slug);
|
|
@@ -301,6 +288,9 @@ export async function injectMemoryV2Block(
|
|
|
301
288
|
simUser: breakdown?.simUser ?? 0,
|
|
302
289
|
simAssistant: breakdown?.simAssistant ?? 0,
|
|
303
290
|
simNow: breakdown?.simNow ?? 0,
|
|
291
|
+
simUserRerankBoost: breakdown?.simUserRerankBoost ?? 0,
|
|
292
|
+
simAssistantRerankBoost: breakdown?.simAssistantRerankBoost ?? 0,
|
|
293
|
+
inRerankPool: breakdown?.inRerankPool ?? false,
|
|
304
294
|
spreadContribution: spreadContribution.get(slug) ?? 0,
|
|
305
295
|
source:
|
|
306
296
|
inPrior && inAnn ? "both" : inPrior ? "prior_state" : "ann_top50",
|
|
@@ -310,19 +300,6 @@ export async function injectMemoryV2Block(
|
|
|
310
300
|
);
|
|
311
301
|
conceptRows.sort((a, b) => b.finalActivation - a.finalActivation);
|
|
312
302
|
|
|
313
|
-
const skillRows: MemoryV2SkillRowRecord[] = [...skillCandidates].map((id) => {
|
|
314
|
-
const breakdown = skillBreakdown.get(id);
|
|
315
|
-
return {
|
|
316
|
-
id,
|
|
317
|
-
activation: skillActivation.get(id) ?? 0,
|
|
318
|
-
simUser: breakdown?.simUser ?? 0,
|
|
319
|
-
simAssistant: breakdown?.simAssistant ?? 0,
|
|
320
|
-
simNow: breakdown?.simNow ?? 0,
|
|
321
|
-
status: topSkillIdSet.has(id) ? "injected" : "not_injected",
|
|
322
|
-
};
|
|
323
|
-
});
|
|
324
|
-
skillRows.sort((a, b) => b.activation - a.activation);
|
|
325
|
-
|
|
326
303
|
const v2Cfg = config.memory.v2;
|
|
327
304
|
try {
|
|
328
305
|
recordMemoryV2ActivationLog({
|
|
@@ -330,7 +307,6 @@ export async function injectMemoryV2Block(
|
|
|
330
307
|
turn: currentTurn,
|
|
331
308
|
mode,
|
|
332
309
|
concepts: conceptRows,
|
|
333
|
-
skills: skillRows,
|
|
334
310
|
config: {
|
|
335
311
|
d: v2Cfg.d,
|
|
336
312
|
c_user: v2Cfg.c_user,
|
|
@@ -339,7 +315,6 @@ export async function injectMemoryV2Block(
|
|
|
339
315
|
k: v2Cfg.k,
|
|
340
316
|
hops: v2Cfg.hops,
|
|
341
317
|
top_k: v2Cfg.top_k,
|
|
342
|
-
top_k_skills: v2Cfg.top_k_skills,
|
|
343
318
|
epsilon: v2Cfg.epsilon,
|
|
344
319
|
},
|
|
345
320
|
});
|
|
@@ -353,6 +328,12 @@ export async function injectMemoryV2Block(
|
|
|
353
328
|
return { block, toInject: newlyInjected };
|
|
354
329
|
}
|
|
355
330
|
|
|
331
|
+
function throwIfAborted(signal: AbortSignal | undefined): void {
|
|
332
|
+
if (signal?.aborted) {
|
|
333
|
+
throw new DOMException("Aborted", "AbortError");
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
356
337
|
// ---------------------------------------------------------------------------
|
|
357
338
|
// Internal helpers
|
|
358
339
|
// ---------------------------------------------------------------------------
|
|
@@ -380,9 +361,14 @@ interface RenderInjectionBlockResult {
|
|
|
380
361
|
}
|
|
381
362
|
|
|
382
363
|
/**
|
|
383
|
-
* Render the inner content of the `<memory>` block for a list of slugs
|
|
384
|
-
*
|
|
385
|
-
*
|
|
364
|
+
* Render the inner content of the `<memory>` block for a list of slugs.
|
|
365
|
+
* The caller wraps the result in `<memory>...</memory>` exactly once at
|
|
366
|
+
* injection time.
|
|
367
|
+
*
|
|
368
|
+
* The slug list is partitioned by prefix: slugs starting with `skills/`
|
|
369
|
+
* resolve to a `SkillEntry` via `getSkillCapability` and render under the
|
|
370
|
+
* trailing `### Skills You Can Use` subsection; everything else is read
|
|
371
|
+
* from disk via `readPage` and rendered as a concept-page section.
|
|
386
372
|
*
|
|
387
373
|
* Concept pages are read in parallel via `readPage`. Pages whose file has
|
|
388
374
|
* gone missing between selection and render (e.g. consolidation deleted
|
|
@@ -390,17 +376,17 @@ interface RenderInjectionBlockResult {
|
|
|
390
376
|
* block but reported back via `missingSlugs` so callers can surface the
|
|
391
377
|
* divergence.
|
|
392
378
|
*
|
|
393
|
-
* Skill
|
|
394
|
-
*
|
|
395
|
-
* the
|
|
379
|
+
* Skill slugs whose entry the cache no longer knows (e.g. uninstalled
|
|
380
|
+
* mid-run) are silently dropped, mirroring the missing-pages behavior but
|
|
381
|
+
* without entering `missingSlugs` — the skill catalog is the source of
|
|
382
|
+
* truth for skill availability, not on-disk concept pages, so a missing
|
|
383
|
+
* skill is an expected catalog-level outcome rather than a stale-index
|
|
384
|
+
* bug.
|
|
396
385
|
*
|
|
397
|
-
* The block shape
|
|
398
|
-
*
|
|
399
|
-
* as it lives on disk — frontmatter (`edges`, `ref_files`) plus body — so
|
|
400
|
-
* the agent sees the page's edges and any referenced media paths alongside
|
|
401
|
-
* the prose:
|
|
386
|
+
* The block shape mirrors the §5 layout — concept-page sections first,
|
|
387
|
+
* skills subsection last — preserving the prompt format the agent sees:
|
|
402
388
|
*
|
|
403
|
-
* ### <slug-1>
|
|
389
|
+
* ### <concept-slug-1>
|
|
404
390
|
* ---
|
|
405
391
|
* edges:
|
|
406
392
|
* - <neighbor-slug>
|
|
@@ -409,7 +395,7 @@ interface RenderInjectionBlockResult {
|
|
|
409
395
|
* ---
|
|
410
396
|
* <body-1>
|
|
411
397
|
*
|
|
412
|
-
* ### <slug-2>
|
|
398
|
+
* ### <concept-slug-2>
|
|
413
399
|
* ---
|
|
414
400
|
* edges: []
|
|
415
401
|
* ref_files: []
|
|
@@ -423,10 +409,12 @@ interface RenderInjectionBlockResult {
|
|
|
423
409
|
async function renderInjectionBlock(
|
|
424
410
|
workspaceDir: string,
|
|
425
411
|
slugs: string[],
|
|
426
|
-
skillIds: string[],
|
|
427
412
|
): Promise<RenderInjectionBlockResult> {
|
|
413
|
+
const conceptSlugs = slugs.filter((s) => !isSkillSlug(s));
|
|
414
|
+
const skillSlugs = slugs.filter((s) => isSkillSlug(s));
|
|
415
|
+
|
|
428
416
|
const pages = await Promise.all(
|
|
429
|
-
|
|
417
|
+
conceptSlugs.map(async (slug) => {
|
|
430
418
|
const page = await readPage(workspaceDir, slug);
|
|
431
419
|
return { slug, page };
|
|
432
420
|
}),
|
|
@@ -444,10 +432,9 @@ async function renderInjectionBlock(
|
|
|
444
432
|
sections.push(`### ${slug}\n${content}`);
|
|
445
433
|
}
|
|
446
434
|
|
|
447
|
-
// v2's skills collection is skills-only, so the activation suffix always applies.
|
|
448
435
|
const skillLines: string[] = [];
|
|
449
|
-
for (const
|
|
450
|
-
const entry = getSkillCapability(
|
|
436
|
+
for (const slug of skillSlugs) {
|
|
437
|
+
const entry = getSkillCapability(slug);
|
|
451
438
|
if (!entry) continue;
|
|
452
439
|
skillLines.push(`- ${entry.content} → use skill_load to activate`);
|
|
453
440
|
}
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
* the convention established for the sweep prompt.
|
|
17
17
|
*/
|
|
18
18
|
|
|
19
|
-
import { readFileSync } from "node:fs";
|
|
19
|
+
import { lstatSync, readFileSync } from "node:fs";
|
|
20
20
|
import { homedir } from "node:os";
|
|
21
21
|
import { isAbsolute, join } from "node:path";
|
|
22
22
|
|
|
@@ -28,6 +28,14 @@ const log = getLogger("memory-v2-consolidate-prompt");
|
|
|
28
28
|
/** Sentinel substituted with the cutoff timestamp at runtime. */
|
|
29
29
|
export const CUTOFF_PLACEHOLDER = "{{CUTOFF}}";
|
|
30
30
|
|
|
31
|
+
/**
|
|
32
|
+
* Upper bound for the override file. Real consolidation prompts are kilobytes;
|
|
33
|
+
* 1 MiB is generous headroom while preventing a `settings.write` principal from
|
|
34
|
+
* pointing the field at a multi-gigabyte file (or `/dev/zero`-like stream that
|
|
35
|
+
* `lstat` can't size cap on its own) and exfiltrating it through the wake hint.
|
|
36
|
+
*/
|
|
37
|
+
const MAX_PROMPT_BYTES = 1 * 1024 * 1024;
|
|
38
|
+
|
|
31
39
|
/**
|
|
32
40
|
* Consolidation prompt — live-mode only. The agent runs as itself (full
|
|
33
41
|
* SOUL.md + IDENTITY.md + persona + memory autoloads) with the standard
|
|
@@ -447,6 +455,33 @@ export function resolveConsolidationPrompt(
|
|
|
447
455
|
const resolvedPath = resolveOverridePath(overridePath);
|
|
448
456
|
let contents: string;
|
|
449
457
|
try {
|
|
458
|
+
const stat = lstatSync(resolvedPath);
|
|
459
|
+
if (!stat.isFile()) {
|
|
460
|
+
log.warn(
|
|
461
|
+
{
|
|
462
|
+
configuredPath: overridePath,
|
|
463
|
+
resolvedPath,
|
|
464
|
+
reason: "not_regular_file",
|
|
465
|
+
fallback: "bundled",
|
|
466
|
+
},
|
|
467
|
+
"consolidation prompt override is not a regular file; using bundled prompt",
|
|
468
|
+
);
|
|
469
|
+
return renderConsolidationPrompt(cutoff);
|
|
470
|
+
}
|
|
471
|
+
if (stat.size > MAX_PROMPT_BYTES) {
|
|
472
|
+
log.warn(
|
|
473
|
+
{
|
|
474
|
+
configuredPath: overridePath,
|
|
475
|
+
resolvedPath,
|
|
476
|
+
size: stat.size,
|
|
477
|
+
limit: MAX_PROMPT_BYTES,
|
|
478
|
+
reason: "oversized_override",
|
|
479
|
+
fallback: "bundled",
|
|
480
|
+
},
|
|
481
|
+
"consolidation prompt override exceeds size limit; using bundled prompt",
|
|
482
|
+
);
|
|
483
|
+
return renderConsolidationPrompt(cutoff);
|
|
484
|
+
}
|
|
450
485
|
contents = readFileSync(resolvedPath, "utf-8");
|
|
451
486
|
} catch (err) {
|
|
452
487
|
const code = (err as NodeJS.ErrnoException).code;
|
package/src/memory/v2/qdrant.ts
CHANGED
|
@@ -62,6 +62,7 @@ export interface ConceptPageQueryResult {
|
|
|
62
62
|
|
|
63
63
|
let _client: QdrantRestClient | null = null;
|
|
64
64
|
let _collectionReady = false;
|
|
65
|
+
let _collectionReadyPromise: Promise<void> | null = null;
|
|
65
66
|
|
|
66
67
|
/** Lazily create a Qdrant REST client bound to the resolved URL. */
|
|
67
68
|
function getClient(): QdrantRestClient {
|
|
@@ -85,7 +86,15 @@ function getClient(): QdrantRestClient {
|
|
|
85
86
|
*/
|
|
86
87
|
export async function ensureConceptPageCollection(): Promise<void> {
|
|
87
88
|
if (_collectionReady) return;
|
|
89
|
+
if (_collectionReadyPromise) return _collectionReadyPromise;
|
|
88
90
|
|
|
91
|
+
_collectionReadyPromise = ensureConceptPageCollectionOnce().finally(() => {
|
|
92
|
+
_collectionReadyPromise = null;
|
|
93
|
+
});
|
|
94
|
+
return _collectionReadyPromise;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
async function ensureConceptPageCollectionOnce(): Promise<void> {
|
|
89
98
|
const client = getClient();
|
|
90
99
|
const config = getConfig();
|
|
91
100
|
const vectorSize = config.memory.qdrant.vectorSize;
|
|
@@ -215,6 +224,95 @@ export async function deleteConceptPageEmbedding(slug: string): Promise<void> {
|
|
|
215
224
|
}
|
|
216
225
|
}
|
|
217
226
|
|
|
227
|
+
/**
|
|
228
|
+
* Remove every point whose slug starts with the given prefix and whose
|
|
229
|
+
* remaining suffix is not in `activeSuffixes`. Used by the skill-seed flow to
|
|
230
|
+
* drop stale `skills/<id>` slugs after a skill is uninstalled or disabled,
|
|
231
|
+
* since skills now share the concept-page collection rather than living in a
|
|
232
|
+
* dedicated one.
|
|
233
|
+
*
|
|
234
|
+
* Idempotent: when the live `<prefix>*` slugs already match `activeSuffixes`,
|
|
235
|
+
* the function performs a single scroll and no deletes.
|
|
236
|
+
*/
|
|
237
|
+
export async function pruneSlugsWithPrefixExcept(
|
|
238
|
+
prefix: string,
|
|
239
|
+
activeSuffixes: readonly string[],
|
|
240
|
+
): Promise<void> {
|
|
241
|
+
await ensureConceptPageCollection();
|
|
242
|
+
|
|
243
|
+
const client = getClient();
|
|
244
|
+
const activeSet = new Set(activeSuffixes);
|
|
245
|
+
|
|
246
|
+
const doPrune = async (): Promise<void> => {
|
|
247
|
+
const stalePointIds: Array<string | number> = [];
|
|
248
|
+
let offset: string | number | undefined = undefined;
|
|
249
|
+
const maxIterations = 10_000;
|
|
250
|
+
const batchSize = 256;
|
|
251
|
+
for (let i = 0; i < maxIterations; i++) {
|
|
252
|
+
const result = await client.scroll(MEMORY_V2_COLLECTION, {
|
|
253
|
+
limit: batchSize,
|
|
254
|
+
with_payload: true,
|
|
255
|
+
with_vector: false,
|
|
256
|
+
...(offset !== undefined ? { offset } : {}),
|
|
257
|
+
});
|
|
258
|
+
for (const point of result.points) {
|
|
259
|
+
const slug = (point.payload as { slug?: unknown } | null)?.slug;
|
|
260
|
+
if (typeof slug !== "string") continue;
|
|
261
|
+
if (!slug.startsWith(prefix)) continue;
|
|
262
|
+
const suffix = slug.slice(prefix.length);
|
|
263
|
+
if (!activeSet.has(suffix)) {
|
|
264
|
+
stalePointIds.push(point.id);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
const next = result.next_page_offset;
|
|
268
|
+
if (next == null) break;
|
|
269
|
+
offset = typeof next === "string" ? next : (next as number);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
if (stalePointIds.length === 0) return;
|
|
273
|
+
|
|
274
|
+
await client.delete(MEMORY_V2_COLLECTION, {
|
|
275
|
+
wait: true,
|
|
276
|
+
points: stalePointIds,
|
|
277
|
+
});
|
|
278
|
+
};
|
|
279
|
+
|
|
280
|
+
try {
|
|
281
|
+
await doPrune();
|
|
282
|
+
} catch (err) {
|
|
283
|
+
if (isCollectionMissing(err)) {
|
|
284
|
+
_collectionReady = false;
|
|
285
|
+
await ensureConceptPageCollection();
|
|
286
|
+
await doPrune();
|
|
287
|
+
return;
|
|
288
|
+
}
|
|
289
|
+
throw err;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Best-effort delete of the legacy `memory_v2_skills` Qdrant collection. Skill
|
|
295
|
+
* embeddings now live alongside concept pages in `memory_v2_concept_pages`
|
|
296
|
+
* under the `skills/<id>` slug prefix, so the dedicated collection is dead
|
|
297
|
+
* weight on installs upgraded from the split-collection era. Fire-and-forget:
|
|
298
|
+
* on a fresh install (collection never existed) or a transient Qdrant
|
|
299
|
+
* unavailable, we log and move on.
|
|
300
|
+
*/
|
|
301
|
+
export async function dropLegacySkillsCollection(): Promise<void> {
|
|
302
|
+
try {
|
|
303
|
+
const client = getClient();
|
|
304
|
+
const exists = await client.collectionExists("memory_v2_skills");
|
|
305
|
+
if (!exists.exists) return;
|
|
306
|
+
await client.deleteCollection("memory_v2_skills");
|
|
307
|
+
log.info("Deleted legacy memory_v2_skills Qdrant collection");
|
|
308
|
+
} catch (err) {
|
|
309
|
+
log.warn(
|
|
310
|
+
{ err },
|
|
311
|
+
"Failed to drop legacy memory_v2_skills collection — non-fatal",
|
|
312
|
+
);
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
218
316
|
/**
|
|
219
317
|
* Run separate dense and sparse queries against the concept-page collection
|
|
220
318
|
* and return per-channel scores per slug. Callers fuse these — typically via
|
|
@@ -437,4 +535,5 @@ function pointIdForSlug(slug: string): string {
|
|
|
437
535
|
export function _resetMemoryV2QdrantForTests(): void {
|
|
438
536
|
_client = null;
|
|
439
537
|
_collectionReady = false;
|
|
538
|
+
_collectionReadyPromise = null;
|
|
440
539
|
}
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
/** Memory v2 cross-encoder rerank — `(query, page-preview)` pairs scored by a local model. */
|
|
2
|
+
|
|
3
|
+
import { createHash } from "node:crypto";
|
|
4
|
+
|
|
5
|
+
import type { AssistantConfig } from "../../config/types.js";
|
|
6
|
+
import { getLogger } from "../../util/logger.js";
|
|
7
|
+
import { getWorkspaceDir } from "../../util/platform.js";
|
|
8
|
+
import { getOrCreateRerankBackend } from "../rerank-local.js";
|
|
9
|
+
import { readPage } from "./page-store.js";
|
|
10
|
+
|
|
11
|
+
const log = getLogger("memory-v2-reranker");
|
|
12
|
+
|
|
13
|
+
// ~512-token model context for bge-reranker-base; cap input to bound payload.
|
|
14
|
+
const PASSAGE_CHAR_CAP = 240;
|
|
15
|
+
|
|
16
|
+
interface CacheEntry {
|
|
17
|
+
scores: Map<string, number>;
|
|
18
|
+
ts: number;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const CACHE_TTL_MS = 2 * 60 * 1000;
|
|
22
|
+
const CACHE_MAX_ENTRIES = 64;
|
|
23
|
+
const cache = new Map<string, CacheEntry>();
|
|
24
|
+
|
|
25
|
+
function cacheKey(query: string, slugs: readonly string[]): string {
|
|
26
|
+
const sorted = [...slugs].sort().join("\0");
|
|
27
|
+
return createHash("sha256").update(`${query}\0${sorted}`).digest("hex");
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function evictExpired(now: number): void {
|
|
31
|
+
for (const [k, v] of cache) {
|
|
32
|
+
if (now - v.ts > CACHE_TTL_MS) cache.delete(k);
|
|
33
|
+
}
|
|
34
|
+
if (cache.size > CACHE_MAX_ENTRIES) {
|
|
35
|
+
const toDrop = cache.size - CACHE_MAX_ENTRIES;
|
|
36
|
+
let i = 0;
|
|
37
|
+
for (const k of cache.keys()) {
|
|
38
|
+
if (i++ >= toDrop) break;
|
|
39
|
+
cache.delete(k);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function buildPassage(slug: string, body: string): string {
|
|
45
|
+
const trimmed = body.replace(/^\s+/, "");
|
|
46
|
+
const blank = trimmed.search(/\n\s*\n/);
|
|
47
|
+
const para = blank === -1 ? trimmed : trimmed.slice(0, blank);
|
|
48
|
+
const stripped = para.replace(/^#+\s.*\n/, "").trim();
|
|
49
|
+
const compact = stripped.replace(/\s+/g, " ").slice(0, PASSAGE_CHAR_CAP);
|
|
50
|
+
return `${slug}\n${compact}`;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Run the cross-encoder over each candidate's first-paragraph preview for
|
|
55
|
+
* one or more queries against the same candidate set. Returns one
|
|
56
|
+
* `Map<slug, score>` per query, in the same order as the `queries` array.
|
|
57
|
+
*
|
|
58
|
+
* Multi-query batching: the user-channel and assistant-channel queries share
|
|
59
|
+
* a candidate set per turn, so scoring them in a single tokenizer +
|
|
60
|
+
* forward-pass call avoids the ONNX-invocation overhead of two serialised
|
|
61
|
+
* worker round-trips. Cache hits short-circuit per-query independently —
|
|
62
|
+
* a whitespace-only query yields an empty Map without hitting the backend.
|
|
63
|
+
*
|
|
64
|
+
* Failures (worker down, page read errors) yield empty Maps so callers can
|
|
65
|
+
* fall back to pure fused scores. Per-batch normalisation and boost math
|
|
66
|
+
* live in `computeOwnActivation`.
|
|
67
|
+
*/
|
|
68
|
+
export async function rerankCandidates(
|
|
69
|
+
queries: readonly string[],
|
|
70
|
+
candidates: readonly string[],
|
|
71
|
+
config: AssistantConfig,
|
|
72
|
+
): Promise<Array<Map<string, number>>> {
|
|
73
|
+
if (queries.length === 0) return [];
|
|
74
|
+
if (candidates.length === 0) return queries.map(() => new Map());
|
|
75
|
+
|
|
76
|
+
const now = Date.now();
|
|
77
|
+
evictExpired(now);
|
|
78
|
+
|
|
79
|
+
const results: Array<Map<string, number> | null> = queries.map(() => null);
|
|
80
|
+
const uncachedIndices: number[] = [];
|
|
81
|
+
for (let i = 0; i < queries.length; i++) {
|
|
82
|
+
const q = queries[i];
|
|
83
|
+
if (q.trim().length === 0) {
|
|
84
|
+
results[i] = new Map();
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
const key = cacheKey(q, candidates);
|
|
88
|
+
const cached = cache.get(key);
|
|
89
|
+
if (cached) {
|
|
90
|
+
// Refresh insertion order so frequently-hit entries survive eviction.
|
|
91
|
+
cache.delete(key);
|
|
92
|
+
cache.set(key, { ...cached, ts: now });
|
|
93
|
+
results[i] = new Map(cached.scores);
|
|
94
|
+
} else {
|
|
95
|
+
uncachedIndices.push(i);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const finalize = (): Array<Map<string, number>> =>
|
|
100
|
+
results.map((r) => r ?? new Map());
|
|
101
|
+
|
|
102
|
+
if (uncachedIndices.length === 0) return finalize();
|
|
103
|
+
|
|
104
|
+
const workspaceDir = getWorkspaceDir();
|
|
105
|
+
const pages = await Promise.all(
|
|
106
|
+
candidates.map((slug) =>
|
|
107
|
+
readPage(workspaceDir, slug).catch((err) => {
|
|
108
|
+
log.debug({ err, slug }, "Reranker skipping page that failed to load");
|
|
109
|
+
return null;
|
|
110
|
+
}),
|
|
111
|
+
),
|
|
112
|
+
);
|
|
113
|
+
const passages: string[] = [];
|
|
114
|
+
const slugsForPassages: string[] = [];
|
|
115
|
+
for (let i = 0; i < candidates.length; i++) {
|
|
116
|
+
const page = pages[i];
|
|
117
|
+
if (!page) continue;
|
|
118
|
+
passages.push(buildPassage(candidates[i], page.body));
|
|
119
|
+
slugsForPassages.push(candidates[i]);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (passages.length === 0) {
|
|
123
|
+
for (const i of uncachedIndices) results[i] = new Map();
|
|
124
|
+
return finalize();
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// One tokenizer + ONNX forward pass over every uncached query × passage
|
|
128
|
+
// pair. Pairs are laid out query-major: queries[uncached[0]] × passages,
|
|
129
|
+
// then queries[uncached[1]] × passages, etc.
|
|
130
|
+
const batchQueries: string[] = [];
|
|
131
|
+
const batchPassages: string[] = [];
|
|
132
|
+
for (const qi of uncachedIndices) {
|
|
133
|
+
const q = queries[qi];
|
|
134
|
+
for (const p of passages) {
|
|
135
|
+
batchQueries.push(q);
|
|
136
|
+
batchPassages.push(p);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const { model, dtype } = config.memory.v2.rerank;
|
|
141
|
+
let scores: number[];
|
|
142
|
+
try {
|
|
143
|
+
const backend = getOrCreateRerankBackend(model, dtype);
|
|
144
|
+
scores = await backend.score(batchQueries, batchPassages);
|
|
145
|
+
} catch (err) {
|
|
146
|
+
log.warn(
|
|
147
|
+
{ err, model, n: batchPassages.length },
|
|
148
|
+
"Rerank backend failed; falling back to pure fused scores",
|
|
149
|
+
);
|
|
150
|
+
for (const i of uncachedIndices) results[i] = new Map();
|
|
151
|
+
return finalize();
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
for (let j = 0; j < uncachedIndices.length; j++) {
|
|
155
|
+
const qi = uncachedIndices[j];
|
|
156
|
+
const offset = j * passages.length;
|
|
157
|
+
const result = new Map<string, number>();
|
|
158
|
+
for (let i = 0; i < slugsForPassages.length; i++) {
|
|
159
|
+
const s = scores[offset + i];
|
|
160
|
+
if (typeof s !== "number" || Number.isNaN(s)) continue;
|
|
161
|
+
// sigmoid output should already be in [0, 1]; clamp defensively.
|
|
162
|
+
result.set(slugsForPassages[i], Math.max(0, Math.min(1, s)));
|
|
163
|
+
}
|
|
164
|
+
results[qi] = result;
|
|
165
|
+
cache.set(cacheKey(queries[qi], candidates), {
|
|
166
|
+
scores: new Map(result),
|
|
167
|
+
ts: now,
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return finalize();
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/** @internal Test-only: clear the LRU cache. */
|
|
175
|
+
export function _resetRerankCacheForTests(): void {
|
|
176
|
+
cache.clear();
|
|
177
|
+
}
|