@vellumai/assistant 0.4.49 → 0.4.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +24 -33
- package/README.md +3 -3
- package/docs/architecture/integrations.md +2 -2
- package/docs/architecture/keychain-broker.md +6 -6
- package/docs/architecture/memory.md +180 -119
- package/knip.json +32 -0
- package/package.json +3 -2
- package/src/__tests__/agent-loop.test.ts +3 -1
- package/src/__tests__/anthropic-provider.test.ts +114 -23
- package/src/__tests__/approval-cascade.test.ts +1 -15
- package/src/__tests__/approval-routes-http.test.ts +2 -0
- package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
- package/src/__tests__/btw-routes.test.ts +61 -5
- package/src/__tests__/canonical-guardian-store.test.ts +95 -0
- package/src/__tests__/checker.test.ts +13 -0
- package/src/__tests__/config-schema.test.ts +1 -68
- package/src/__tests__/config-watcher.test.ts +8 -0
- package/src/__tests__/context-memory-e2e.test.ts +11 -100
- package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
- package/src/__tests__/credential-security-e2e.test.ts +1 -0
- package/src/__tests__/credential-security-invariants.test.ts +8 -7
- package/src/__tests__/credential-vault-unit.test.ts +23 -18
- package/src/__tests__/credential-vault.test.ts +30 -18
- package/src/__tests__/credentials-cli.test.ts +257 -82
- package/src/__tests__/cu-unified-flow.test.ts +532 -0
- package/src/__tests__/date-context.test.ts +93 -77
- package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
- package/src/__tests__/guardian-routing-invariants.test.ts +93 -0
- package/src/__tests__/history-repair.test.ts +245 -0
- package/src/__tests__/host-cu-proxy.test.ts +165 -3
- package/src/__tests__/http-user-message-parity.test.ts +1 -0
- package/src/__tests__/inbound-invite-redemption.test.ts +36 -7
- package/src/__tests__/integration-status.test.ts +31 -30
- package/src/__tests__/invite-redemption-service.test.ts +166 -13
- package/src/__tests__/invite-routes-http.test.ts +166 -5
- package/src/__tests__/keychain-broker-client.test.ts +4 -4
- package/src/__tests__/list-messages-attachments.test.ts +193 -0
- package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
- package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
- package/src/__tests__/memory-recall-quality.test.ts +244 -407
- package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
- package/src/__tests__/memory-regressions.test.ts +477 -2841
- package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
- package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
- package/src/__tests__/mime-builder.test.ts +28 -0
- package/src/__tests__/native-web-search.test.ts +1 -0
- package/src/__tests__/oauth-cli.test.ts +824 -31
- package/src/__tests__/oauth-provider-profiles.test.ts +1 -1
- package/src/__tests__/oauth-store.test.ts +363 -17
- package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
- package/src/__tests__/registry.test.ts +0 -1
- package/src/__tests__/relay-server.test.ts +55 -1
- package/src/__tests__/schedule-tools.test.ts +32 -0
- package/src/__tests__/script-proxy-certs.test.ts +1 -1
- package/src/__tests__/secret-onetime-send.test.ts +1 -0
- package/src/__tests__/secret-routes-managed-proxy.test.ts +183 -0
- package/src/__tests__/secure-keys.test.ts +78 -18
- package/src/__tests__/send-endpoint-busy.test.ts +3 -0
- package/src/__tests__/server-history-render.test.ts +2 -2
- package/src/__tests__/session-abort-tool-results.test.ts +1 -14
- package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
- package/src/__tests__/session-agent-loop.test.ts +19 -15
- package/src/__tests__/session-confirmation-signals.test.ts +1 -15
- package/src/__tests__/session-error.test.ts +124 -2
- package/src/__tests__/session-history-web-search.test.ts +918 -0
- package/src/__tests__/session-pre-run-repair.test.ts +1 -14
- package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
- package/src/__tests__/session-queue.test.ts +37 -27
- package/src/__tests__/session-runtime-assembly.test.ts +54 -0
- package/src/__tests__/session-slash-known.test.ts +1 -15
- package/src/__tests__/session-slash-queue.test.ts +1 -15
- package/src/__tests__/session-slash-unknown.test.ts +1 -15
- package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
- package/src/__tests__/session-workspace-injection.test.ts +3 -37
- package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
- package/src/__tests__/skills-install-extract.test.ts +93 -0
- package/src/__tests__/skills.test.ts +2 -2
- package/src/__tests__/skillssh-registry.test.ts +451 -0
- package/src/__tests__/slack-channel-config.test.ts +10 -8
- package/src/__tests__/trust-store.test.ts +15 -0
- package/src/__tests__/twilio-config.test.ts +11 -10
- package/src/__tests__/twilio-provider.test.ts +9 -4
- package/src/__tests__/voice-invite-redemption.test.ts +85 -5
- package/src/agent/ax-tree-compaction.test.ts +51 -0
- package/src/agent/loop.ts +39 -12
- package/src/approvals/AGENTS.md +1 -1
- package/src/approvals/guardian-request-resolvers.ts +14 -2
- package/src/bundler/compiler-tools.ts +66 -2
- package/src/calls/call-domain.ts +134 -3
- package/src/calls/call-store.ts +6 -0
- package/src/calls/relay-server.ts +44 -6
- package/src/calls/relay-setup-router.ts +17 -1
- package/src/calls/twilio-config.ts +5 -4
- package/src/calls/twilio-provider.ts +14 -9
- package/src/calls/twilio-rest.ts +10 -7
- package/src/calls/types.ts +3 -1
- package/src/cli/commands/config.ts +14 -9
- package/src/cli/commands/contacts.ts +3 -0
- package/src/cli/commands/credentials.ts +170 -174
- package/src/cli/commands/doctor.ts +11 -8
- package/src/cli/commands/keys.ts +9 -9
- package/src/cli/commands/mcp.ts +46 -59
- package/src/cli/commands/memory.ts +16 -165
- package/src/cli/commands/oauth/apps.ts +68 -10
- package/src/cli/commands/oauth/connections.ts +475 -105
- package/src/cli/commands/oauth/index.ts +3 -3
- package/src/cli/commands/oauth/providers.ts +18 -4
- package/src/cli/commands/sessions.ts +5 -2
- package/src/cli/commands/skills.ts +173 -1
- package/src/cli/http-client.ts +0 -20
- package/src/cli/main-screen.tsx +2 -2
- package/src/cli/program.ts +5 -6
- package/src/cli.ts +20 -22
- package/src/config/__tests__/feature-flag-registry-bundled.test.ts +39 -0
- package/src/config/bundled-skills/computer-use/TOOLS.json +1 -1
- package/src/config/bundled-skills/computer-use/tools/computer-use-observe.ts +12 -0
- package/src/config/bundled-skills/contacts/SKILL.md +35 -11
- package/src/config/bundled-skills/contacts/tools/google-contacts.ts +1 -1
- package/src/config/bundled-skills/gmail/SKILL.md +1 -1
- package/src/config/bundled-skills/gmail/TOOLS.json +52 -0
- package/src/config/bundled-skills/gmail/tools/gmail-archive.ts +13 -3
- package/src/config/bundled-skills/gmail/tools/gmail-attachments.ts +9 -2
- package/src/config/bundled-skills/gmail/tools/gmail-draft.ts +5 -1
- package/src/config/bundled-skills/gmail/tools/gmail-filters.ts +5 -1
- package/src/config/bundled-skills/gmail/tools/gmail-follow-up.ts +5 -1
- package/src/config/bundled-skills/gmail/tools/gmail-forward.ts +5 -1
- package/src/config/bundled-skills/gmail/tools/gmail-label.ts +9 -2
- package/src/config/bundled-skills/gmail/tools/gmail-outreach-scan.ts +5 -1
- package/src/config/bundled-skills/gmail/tools/gmail-send-draft.ts +5 -1
- package/src/config/bundled-skills/gmail/tools/gmail-sender-digest.ts +5 -1
- package/src/config/bundled-skills/gmail/tools/gmail-trash.ts +5 -1
- package/src/config/bundled-skills/gmail/tools/gmail-unsubscribe.ts +5 -1
- package/src/config/bundled-skills/gmail/tools/gmail-vacation.ts +5 -1
- package/src/config/bundled-skills/google-calendar/TOOLS.json +20 -0
- package/src/config/bundled-skills/google-calendar/tools/calendar-check-availability.ts +2 -1
- package/src/config/bundled-skills/google-calendar/tools/calendar-create-event.ts +2 -1
- package/src/config/bundled-skills/google-calendar/tools/calendar-get-event.ts +2 -1
- package/src/config/bundled-skills/google-calendar/tools/calendar-list-events.ts +2 -1
- package/src/config/bundled-skills/google-calendar/tools/calendar-rsvp.ts +2 -1
- package/src/config/bundled-skills/google-calendar/tools/shared.ts +8 -2
- package/src/config/bundled-skills/messaging/SKILL.md +1 -1
- package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +2 -2
- package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +2 -2
- package/src/config/bundled-skills/messaging/tools/messaging-auth-test.ts +2 -2
- package/src/config/bundled-skills/messaging/tools/messaging-list-conversations.ts +2 -2
- package/src/config/bundled-skills/messaging/tools/messaging-mark-read.ts +2 -2
- package/src/config/bundled-skills/messaging/tools/messaging-read.ts +2 -2
- package/src/config/bundled-skills/messaging/tools/messaging-search.ts +2 -2
- package/src/config/bundled-skills/messaging/tools/messaging-send.ts +2 -2
- package/src/config/bundled-skills/messaging/tools/messaging-sender-digest.ts +2 -2
- package/src/config/bundled-skills/messaging/tools/shared.ts +7 -5
- package/src/config/bundled-skills/slack/tools/shared.ts +1 -1
- package/src/config/bundled-skills/slack/tools/slack-add-reaction.ts +1 -1
- package/src/config/bundled-skills/slack/tools/slack-channel-details.ts +1 -1
- package/src/config/bundled-skills/slack/tools/slack-delete-message.ts +1 -1
- package/src/config/bundled-skills/slack/tools/slack-edit-message.ts +1 -1
- package/src/config/bundled-skills/slack/tools/slack-leave-channel.ts +1 -1
- package/src/config/bundled-skills/slack/tools/slack-scan-digest.ts +1 -1
- package/src/config/bundled-tool-registry.ts +2 -5
- package/src/config/loader.ts +6 -42
- package/src/config/schema.ts +1 -12
- package/src/config/schemas/memory-lifecycle.ts +0 -9
- package/src/config/schemas/memory-processing.ts +0 -180
- package/src/config/schemas/memory-retrieval.ts +32 -104
- package/src/config/schemas/memory.ts +0 -10
- package/src/config/types.ts +0 -4
- package/src/contacts/contact-store.ts +39 -2
- package/src/contacts/contacts-write.ts +9 -0
- package/src/context/window-manager.ts +4 -1
- package/src/daemon/config-watcher.ts +55 -2
- package/src/daemon/daemon-control.ts +1 -1
- package/src/daemon/date-context.ts +114 -31
- package/src/daemon/handlers/config-ingress.ts +2 -2
- package/src/daemon/handlers/config-slack-channel.ts +59 -39
- package/src/daemon/handlers/config-telegram.ts +23 -14
- package/src/daemon/handlers/session-history.ts +1 -358
- package/src/daemon/handlers/sessions.ts +18 -13
- package/src/daemon/handlers/shared.ts +3 -17
- package/src/daemon/handlers/skills.ts +20 -1
- package/src/daemon/history-repair.ts +72 -8
- package/src/daemon/host-cu-proxy.ts +55 -26
- package/src/daemon/lifecycle.ts +39 -4
- package/src/daemon/mcp-reload-service.ts +2 -2
- package/src/daemon/message-types/computer-use.ts +1 -12
- package/src/daemon/message-types/memory.ts +4 -16
- package/src/daemon/message-types/messages.ts +1 -0
- package/src/daemon/message-types/sessions.ts +4 -42
- package/src/daemon/server.ts +6 -1
- package/src/daemon/session-agent-loop-handlers.ts +38 -0
- package/src/daemon/session-agent-loop.ts +334 -48
- package/src/daemon/session-error.ts +89 -6
- package/src/daemon/session-history.ts +17 -7
- package/src/daemon/session-media-retry.ts +6 -2
- package/src/daemon/session-memory.ts +69 -149
- package/src/daemon/session-process.ts +10 -1
- package/src/daemon/session-runtime-assembly.ts +49 -19
- package/src/daemon/session-slash.ts +3 -5
- package/src/daemon/session-surfaces.ts +4 -1
- package/src/daemon/session-tool-setup.ts +7 -1
- package/src/daemon/session.ts +12 -2
- package/src/email/providers/index.ts +2 -2
- package/src/instrument.ts +61 -1
- package/src/media/avatar-router.ts +1 -1
- package/src/memory/admin.ts +2 -191
- package/src/memory/canonical-guardian-store.ts +38 -2
- package/src/memory/conversation-crud.ts +0 -33
- package/src/memory/conversation-queries.ts +25 -83
- package/src/memory/db-init.ts +32 -0
- package/src/memory/embedding-backend.ts +84 -8
- package/src/memory/embedding-types.ts +9 -1
- package/src/memory/indexer.ts +7 -46
- package/src/memory/invite-store.ts +19 -0
- package/src/memory/items-extractor.ts +274 -76
- package/src/memory/job-handlers/backfill.ts +2 -127
- package/src/memory/job-handlers/cleanup.ts +2 -16
- package/src/memory/job-handlers/extraction.ts +2 -138
- package/src/memory/job-handlers/index-maintenance.ts +1 -6
- package/src/memory/job-handlers/summarization.ts +3 -148
- package/src/memory/job-utils.ts +21 -59
- package/src/memory/jobs-store.ts +1 -159
- package/src/memory/jobs-worker.ts +9 -52
- package/src/memory/migrations/104-core-indexes.ts +3 -3
- package/src/memory/migrations/149-oauth-tables.ts +2 -0
- package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
- package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
- package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
- package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
- package/src/memory/migrations/154-drop-fts.ts +20 -0
- package/src/memory/migrations/155-drop-conflicts.ts +7 -0
- package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
- package/src/memory/migrations/157-invite-contact-id.ts +104 -0
- package/src/memory/migrations/index.ts +8 -0
- package/src/memory/migrations/registry.ts +6 -0
- package/src/memory/qdrant-client.ts +148 -51
- package/src/memory/raw-query.ts +1 -1
- package/src/memory/retriever.test.ts +294 -273
- package/src/memory/retriever.ts +421 -645
- package/src/memory/schema/calls.ts +2 -0
- package/src/memory/schema/contacts.ts +1 -0
- package/src/memory/schema/memory-core.ts +3 -48
- package/src/memory/schema/oauth.ts +2 -0
- package/src/memory/search/formatting.ts +263 -176
- package/src/memory/search/lexical.ts +1 -254
- package/src/memory/search/ranking.ts +0 -455
- package/src/memory/search/semantic.ts +100 -14
- package/src/memory/search/staleness.ts +47 -0
- package/src/memory/search/tier-classifier.ts +21 -0
- package/src/memory/search/types.ts +15 -77
- package/src/memory/task-memory-cleanup.ts +4 -6
- package/src/messaging/provider.ts +1 -1
- package/src/messaging/providers/gmail/adapter.ts +1 -1
- package/src/messaging/providers/gmail/mime-builder.ts +17 -7
- package/src/messaging/providers/telegram-bot/adapter.ts +17 -8
- package/src/messaging/providers/whatsapp/adapter.ts +13 -9
- package/src/messaging/registry.ts +9 -5
- package/src/oauth/byo-connection.test.ts +40 -25
- package/src/oauth/connect-orchestrator.ts +4 -10
- package/src/oauth/connection-resolver.ts +20 -6
- package/src/oauth/manual-token-connection.ts +5 -5
- package/src/oauth/oauth-store.ts +183 -31
- package/src/oauth/platform-connection.test.ts +1 -1
- package/src/oauth/provider-behaviors.ts +503 -4
- package/src/oauth/seed-providers.ts +214 -8
- package/src/oauth/token-persistence.ts +31 -16
- package/src/permissions/defaults.ts +1 -0
- package/src/permissions/trust-store.ts +23 -1
- package/src/playbooks/playbook-compiler.ts +1 -1
- package/src/prompts/system-prompt.ts +18 -2
- package/src/providers/anthropic/client.ts +56 -126
- package/src/providers/types.ts +7 -1
- package/src/runtime/AGENTS.md +9 -0
- package/src/runtime/auth/route-policy.ts +6 -3
- package/src/runtime/channel-readiness-service.ts +48 -40
- package/src/runtime/guardian-reply-router.ts +24 -22
- package/src/runtime/http-server.ts +2 -2
- package/src/runtime/http-types.ts +2 -0
- package/src/runtime/invite-redemption-service.ts +72 -12
- package/src/runtime/invite-service.ts +43 -0
- package/src/runtime/middleware/twilio-validation.ts +1 -1
- package/src/runtime/pending-interactions.ts +2 -2
- package/src/runtime/routes/brain-graph-routes.ts +10 -90
- package/src/runtime/routes/btw-routes.ts +10 -5
- package/src/runtime/routes/conversation-routes.ts +56 -11
- package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
- package/src/runtime/routes/integrations/slack/channel.ts +2 -2
- package/src/runtime/routes/integrations/telegram.ts +2 -2
- package/src/runtime/routes/integrations/twilio.ts +17 -17
- package/src/runtime/routes/invite-routes.ts +29 -4
- package/src/runtime/routes/memory-item-routes.test.ts +754 -0
- package/src/runtime/routes/memory-item-routes.ts +503 -0
- package/src/runtime/routes/secret-routes.ts +17 -0
- package/src/runtime/routes/session-management-routes.ts +3 -3
- package/src/runtime/routes/settings-routes.ts +3 -3
- package/src/runtime/routes/trust-rules-routes.ts +14 -0
- package/src/runtime/routes/workspace-routes.ts +9 -4
- package/src/runtime/routes/workspace-utils.ts +8 -2
- package/src/schedule/integration-status.ts +26 -19
- package/src/security/keychain-broker-client.ts +17 -4
- package/src/security/oauth2.ts +6 -7
- package/src/security/secure-keys.ts +44 -19
- package/src/security/token-manager.ts +46 -39
- package/src/services/vercel-deploy.ts +0 -24
- package/src/signals/confirm.ts +78 -0
- package/src/signals/mcp-reload.ts +18 -0
- package/src/skills/catalog-install.ts +74 -18
- package/src/skills/skillssh-registry.ts +503 -0
- package/src/tools/assets/search.ts +5 -1
- package/src/tools/computer-use/definitions.ts +0 -10
- package/src/tools/computer-use/registry.ts +1 -1
- package/src/tools/credentials/vault.ts +22 -7
- package/src/tools/memory/definitions.ts +4 -13
- package/src/tools/memory/handlers.test.ts +83 -103
- package/src/tools/memory/handlers.ts +50 -85
- package/src/tools/network/script-proxy/session-manager.ts +8 -8
- package/src/tools/schedule/create.ts +10 -3
- package/src/tools/schedule/update.ts +8 -1
- package/src/tools/skills/load.ts +25 -2
- package/src/watcher/provider-types.ts +1 -1
- package/src/watcher/providers/github.ts +1 -1
- package/src/watcher/providers/gmail.ts +3 -3
- package/src/watcher/providers/google-calendar.ts +3 -3
- package/src/watcher/providers/linear.ts +1 -1
- package/src/__tests__/clarification-resolver.test.ts +0 -193
- package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
- package/src/__tests__/conflict-policy.test.ts +0 -269
- package/src/__tests__/conflict-store.test.ts +0 -372
- package/src/__tests__/contradiction-checker.test.ts +0 -361
- package/src/__tests__/entity-extractor.test.ts +0 -211
- package/src/__tests__/entity-search.test.ts +0 -1117
- package/src/__tests__/profile-compiler.test.ts +0 -392
- package/src/__tests__/session-conflict-gate.test.ts +0 -1228
- package/src/__tests__/session-profile-injection.test.ts +0 -557
- package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
- package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
- package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
- package/src/daemon/session-conflict-gate.ts +0 -167
- package/src/daemon/session-dynamic-profile.ts +0 -77
- package/src/memory/clarification-resolver.ts +0 -417
- package/src/memory/conflict-intent.ts +0 -205
- package/src/memory/conflict-policy.ts +0 -127
- package/src/memory/conflict-store.ts +0 -410
- package/src/memory/contradiction-checker.ts +0 -508
- package/src/memory/entity-extractor.ts +0 -535
- package/src/memory/format-recall.ts +0 -47
- package/src/memory/fts-reconciler.ts +0 -165
- package/src/memory/job-handlers/conflict.ts +0 -200
- package/src/memory/profile-compiler.ts +0 -195
- package/src/memory/recall-cache.ts +0 -117
- package/src/memory/search/entity.ts +0 -535
- package/src/memory/search/query-expansion.test.ts +0 -70
- package/src/memory/search/query-expansion.ts +0 -118
- package/src/runtime/routes/mcp-routes.ts +0 -20
package/src/memory/retriever.ts
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { inArray } from "drizzle-orm";
|
|
1
|
+
import { inArray, sql } from "drizzle-orm";
|
|
3
2
|
|
|
4
3
|
import type { AssistantConfig } from "../config/types.js";
|
|
5
4
|
import { estimateTextTokens } from "../context/token-estimator.js";
|
|
@@ -12,40 +11,34 @@ import {
|
|
|
12
11
|
import { getDb } from "./db.js";
|
|
13
12
|
import {
|
|
14
13
|
embedWithBackend,
|
|
14
|
+
generateSparseEmbedding,
|
|
15
15
|
getMemoryBackendStatus,
|
|
16
16
|
logMemoryEmbeddingWarning,
|
|
17
17
|
} from "./embedding-backend.js";
|
|
18
|
-
import {
|
|
18
|
+
import { isQdrantBreakerOpen } from "./qdrant-circuit-breaker.js";
|
|
19
19
|
import {
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
conversations,
|
|
21
|
+
memoryItems,
|
|
22
|
+
memoryItemSources,
|
|
23
|
+
messages,
|
|
24
|
+
} from "./schema.js";
|
|
23
25
|
import {
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
import {
|
|
30
|
-
import { MEMORY_CONTEXT_ACK } from "./search/formatting.js";
|
|
31
|
-
import {
|
|
32
|
-
directItemSearch,
|
|
33
|
-
lexicalSearch,
|
|
34
|
-
recencySearch,
|
|
35
|
-
} from "./search/lexical.js";
|
|
36
|
-
import { buildFTSQuery, expandQueryForFTS } from "./search/query-expansion.js";
|
|
37
|
-
import {
|
|
38
|
-
applySourceCaps,
|
|
39
|
-
mergeCandidates,
|
|
40
|
-
rerankWithLLM,
|
|
41
|
-
} from "./search/ranking.js";
|
|
26
|
+
buildTwoLayerInjection,
|
|
27
|
+
IDENTITY_KINDS,
|
|
28
|
+
MEMORY_CONTEXT_ACK,
|
|
29
|
+
PREFERENCE_KINDS,
|
|
30
|
+
} from "./search/formatting.js";
|
|
31
|
+
import { recencySearch } from "./search/lexical.js";
|
|
42
32
|
import { isQdrantConnectionError, semanticSearch } from "./search/semantic.js";
|
|
33
|
+
import { applyStaleDemotion, computeStaleness } from "./search/staleness.js";
|
|
34
|
+
import {
|
|
35
|
+
classifyTiers,
|
|
36
|
+
type TieredCandidate,
|
|
37
|
+
} from "./search/tier-classifier.js";
|
|
43
38
|
import type {
|
|
44
39
|
Candidate,
|
|
45
|
-
CollectedCandidates,
|
|
46
40
|
DegradationReason,
|
|
47
41
|
DegradationStatus,
|
|
48
|
-
FallbackSource,
|
|
49
42
|
MemoryRecallCandiateDebug,
|
|
50
43
|
MemoryRecallOptions,
|
|
51
44
|
MemoryRecallResult,
|
|
@@ -61,7 +54,6 @@ export {
|
|
|
61
54
|
export type {
|
|
62
55
|
DegradationReason,
|
|
63
56
|
DegradationStatus,
|
|
64
|
-
FallbackSource,
|
|
65
57
|
MemoryRecallCandiateDebug,
|
|
66
58
|
MemoryRecallResult,
|
|
67
59
|
ScopePolicyOverride,
|
|
@@ -69,22 +61,6 @@ export type {
|
|
|
69
61
|
|
|
70
62
|
const log = getLogger("memory-retriever");
|
|
71
63
|
|
|
72
|
-
/** Hash the retrieval-relevant config fields so the recall cache distinguishes different configs. */
|
|
73
|
-
function buildConfigFingerprint(config: AssistantConfig): string {
|
|
74
|
-
const relevant = {
|
|
75
|
-
r: config.memory.retrieval,
|
|
76
|
-
e: {
|
|
77
|
-
provider: config.memory.embeddings.provider,
|
|
78
|
-
required: config.memory.embeddings.required,
|
|
79
|
-
},
|
|
80
|
-
ent: config.memory.entity.enabled,
|
|
81
|
-
};
|
|
82
|
-
return createHash("sha256")
|
|
83
|
-
.update(JSON.stringify(relevant))
|
|
84
|
-
.digest("hex")
|
|
85
|
-
.slice(0, 16);
|
|
86
|
-
}
|
|
87
|
-
|
|
88
64
|
const EMBED_MAX_RETRIES = 3;
|
|
89
65
|
const EMBED_BASE_DELAY_MS = 500;
|
|
90
66
|
|
|
@@ -151,336 +127,18 @@ function buildScopeFilter(
|
|
|
151
127
|
return [scopeId];
|
|
152
128
|
}
|
|
153
129
|
|
|
154
|
-
/**
|
|
155
|
-
* Shared retrieval pipeline: collect candidates from all available sources
|
|
156
|
-
* (lexical, recency, semantic, entity, direct item search) and merge them
|
|
157
|
-
* using RRF.
|
|
158
|
-
*/
|
|
159
|
-
export async function collectAndMergeCandidates(
|
|
160
|
-
query: string,
|
|
161
|
-
config: AssistantConfig,
|
|
162
|
-
opts?: {
|
|
163
|
-
queryVector?: number[] | null;
|
|
164
|
-
provider?: string;
|
|
165
|
-
model?: string;
|
|
166
|
-
conversationId?: string;
|
|
167
|
-
excludeMessageIds?: string[];
|
|
168
|
-
scopeId?: string;
|
|
169
|
-
scopePolicyOverride?: ScopePolicyOverride;
|
|
170
|
-
},
|
|
171
|
-
): Promise<CollectedCandidates> {
|
|
172
|
-
const queryVector = opts?.queryVector ?? null;
|
|
173
|
-
const excludeMessageIds = opts?.excludeMessageIds ?? [];
|
|
174
|
-
const scopeId = opts?.scopeId;
|
|
175
|
-
const scopePolicy = config.memory.retrieval.scopePolicy;
|
|
176
|
-
// Build the list of scope IDs to include in queries.
|
|
177
|
-
// A per-call scopePolicyOverride takes precedence over the global policy.
|
|
178
|
-
const scopeIds = buildScopeFilter(
|
|
179
|
-
scopeId,
|
|
180
|
-
scopePolicy,
|
|
181
|
-
opts?.scopePolicyOverride,
|
|
182
|
-
);
|
|
183
|
-
|
|
184
|
-
let semanticSearchFailed = false;
|
|
185
|
-
let semanticSearchError: unknown;
|
|
186
|
-
|
|
187
|
-
// Detect when semantic search won't be available so we can compensate
|
|
188
|
-
// by boosting lexical/recency/direct item limits.
|
|
189
|
-
const semanticUnavailable = !queryVector || isQdrantBreakerOpen();
|
|
190
|
-
if (semanticUnavailable) {
|
|
191
|
-
log.debug("Semantic search unavailable — boosting lexical limits");
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
// -- Phase 1: cheap local searches (always run) --
|
|
195
|
-
const lexicalTopK = semanticUnavailable
|
|
196
|
-
? config.memory.retrieval.lexicalTopK * 2
|
|
197
|
-
: config.memory.retrieval.lexicalTopK;
|
|
198
|
-
|
|
199
|
-
// When semantic search is unavailable, expand the conversational query
|
|
200
|
-
// into meaningful keywords for better FTS recall. This compensates for
|
|
201
|
-
// the lack of vector-based semantic matching.
|
|
202
|
-
const expandedFtsQuery = semanticUnavailable
|
|
203
|
-
? buildFTSQuery(expandQueryForFTS(query))
|
|
204
|
-
: undefined;
|
|
205
|
-
|
|
206
|
-
const lexical = lexicalSearch(
|
|
207
|
-
query,
|
|
208
|
-
lexicalTopK,
|
|
209
|
-
excludeMessageIds,
|
|
210
|
-
scopeIds,
|
|
211
|
-
expandedFtsQuery,
|
|
212
|
-
);
|
|
213
|
-
|
|
214
|
-
const baseRecencyLimit = Math.max(
|
|
215
|
-
10,
|
|
216
|
-
Math.floor(config.memory.retrieval.semanticTopK / 2),
|
|
217
|
-
);
|
|
218
|
-
const recencyLimit = semanticUnavailable
|
|
219
|
-
? Math.ceil(baseRecencyLimit * 1.5)
|
|
220
|
-
: baseRecencyLimit;
|
|
221
|
-
const recency = opts?.conversationId
|
|
222
|
-
? recencySearch(
|
|
223
|
-
opts.conversationId,
|
|
224
|
-
recencyLimit,
|
|
225
|
-
excludeMessageIds,
|
|
226
|
-
scopeIds,
|
|
227
|
-
)
|
|
228
|
-
: [];
|
|
229
|
-
|
|
230
|
-
// Direct item search supplements FTS with LIKE-based matching.
|
|
231
|
-
// When exclusions are present, adaptively increase the fetch size until
|
|
232
|
-
// we collect directLimit valid (non-excluded) items or exhaust the DB.
|
|
233
|
-
const baseDirectLimit = Math.max(10, config.memory.retrieval.lexicalTopK);
|
|
234
|
-
const directLimit = semanticUnavailable
|
|
235
|
-
? baseDirectLimit * 2
|
|
236
|
-
: baseDirectLimit;
|
|
237
|
-
|
|
238
|
-
// Helper: filter fetched direct items to those with at least one non-excluded source.
|
|
239
|
-
const filterDirectItems = (items: Candidate[]): Candidate[] => {
|
|
240
|
-
if (items.length === 0) return items;
|
|
241
|
-
const db = getDb();
|
|
242
|
-
const excludedSet = new Set(excludeMessageIds);
|
|
243
|
-
const allSources = db
|
|
244
|
-
.select({
|
|
245
|
-
memoryItemId: memoryItemSources.memoryItemId,
|
|
246
|
-
messageId: memoryItemSources.messageId,
|
|
247
|
-
})
|
|
248
|
-
.from(memoryItemSources)
|
|
249
|
-
.where(
|
|
250
|
-
inArray(
|
|
251
|
-
memoryItemSources.memoryItemId,
|
|
252
|
-
items.map((c) => c.id),
|
|
253
|
-
),
|
|
254
|
-
)
|
|
255
|
-
.all();
|
|
256
|
-
const hasNonExcluded = new Set<string>();
|
|
257
|
-
const hasSources = new Set<string>();
|
|
258
|
-
for (const s of allSources) {
|
|
259
|
-
hasSources.add(s.memoryItemId);
|
|
260
|
-
if (!excludedSet.has(s.messageId)) {
|
|
261
|
-
hasNonExcluded.add(s.memoryItemId);
|
|
262
|
-
}
|
|
263
|
-
}
|
|
264
|
-
return items.filter(
|
|
265
|
-
(c) => !hasSources.has(c.id) || hasNonExcluded.has(c.id),
|
|
266
|
-
);
|
|
267
|
-
};
|
|
268
|
-
|
|
269
|
-
let directItems: Candidate[];
|
|
270
|
-
if (excludeMessageIds.length > 0) {
|
|
271
|
-
const MAX_FETCH = directLimit * 8;
|
|
272
|
-
|
|
273
|
-
// Probe: fetch directLimit items and measure how many survive filtering.
|
|
274
|
-
const probe = directItemSearch(query, directLimit, scopeIds);
|
|
275
|
-
const probeFiltered = filterDirectItems(probe);
|
|
276
|
-
const probeExhausted = probe.length < directLimit;
|
|
277
|
-
|
|
278
|
-
if (probeFiltered.length >= directLimit || probeExhausted) {
|
|
279
|
-
directItems = probeFiltered.slice(0, directLimit);
|
|
280
|
-
} else {
|
|
281
|
-
// Compute exclusion ratio from probe and extrapolate the fetch size
|
|
282
|
-
// needed to yield directLimit surviving items in a single query.
|
|
283
|
-
const exclusionRatio =
|
|
284
|
-
probe.length > 0 ? 1 - probeFiltered.length / probe.length : 0;
|
|
285
|
-
// Fetch enough to compensate for the observed exclusion rate, with
|
|
286
|
-
// a 1.5x safety margin to avoid a second round in most cases.
|
|
287
|
-
const estimatedFetch =
|
|
288
|
-
exclusionRatio < 1
|
|
289
|
-
? Math.ceil((directLimit / (1 - exclusionRatio)) * 1.5)
|
|
290
|
-
: MAX_FETCH;
|
|
291
|
-
let fetchSize = Math.min(
|
|
292
|
-
Math.max(estimatedFetch, directLimit + 24),
|
|
293
|
-
MAX_FETCH,
|
|
294
|
-
);
|
|
295
|
-
|
|
296
|
-
let fetched = directItemSearch(query, fetchSize, scopeIds);
|
|
297
|
-
directItems = filterDirectItems(fetched).slice(0, directLimit);
|
|
298
|
-
|
|
299
|
-
// Retry loop: when the estimate under-fetched (uneven exclusion
|
|
300
|
-
// distribution), keep increasing fetchSize until quota is met or
|
|
301
|
-
// the DB is exhausted.
|
|
302
|
-
while (
|
|
303
|
-
directItems.length < directLimit &&
|
|
304
|
-
fetched.length === fetchSize &&
|
|
305
|
-
fetchSize < MAX_FETCH
|
|
306
|
-
) {
|
|
307
|
-
fetchSize = Math.min(fetchSize * 2, MAX_FETCH);
|
|
308
|
-
fetched = directItemSearch(query, fetchSize, scopeIds);
|
|
309
|
-
directItems = filterDirectItems(fetched).slice(0, directLimit);
|
|
310
|
-
}
|
|
311
|
-
}
|
|
312
|
-
} else {
|
|
313
|
-
directItems = directItemSearch(query, directLimit, scopeIds);
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
// -- Early termination check --
|
|
317
|
-
// If cheap sources already produced enough high-relevance candidates,
|
|
318
|
-
// skip semantic and entity search entirely.
|
|
319
|
-
//
|
|
320
|
-
// Deduplicate before counting: lexical and recency can return the same
|
|
321
|
-
// segment (common when recent messages match the query), so checking raw
|
|
322
|
-
// counts would inflate the total and trigger early termination prematurely.
|
|
323
|
-
const etConfig = config.memory.retrieval.earlyTermination;
|
|
324
|
-
const cheapCandidateMap = new Map<string, Candidate>();
|
|
325
|
-
for (const c of [...lexical, ...recency, ...directItems]) {
|
|
326
|
-
const existing = cheapCandidateMap.get(c.key);
|
|
327
|
-
// Keep the candidate with higher query relevance (lexical score is the
|
|
328
|
-
// best proxy we have at this stage; confidence reflects extraction
|
|
329
|
-
// certainty, not query-match strength).
|
|
330
|
-
if (!existing || c.lexical > existing.lexical) {
|
|
331
|
-
cheapCandidateMap.set(c.key, c);
|
|
332
|
-
}
|
|
333
|
-
}
|
|
334
|
-
const cheapCandidates = [...cheapCandidateMap.values()];
|
|
335
|
-
|
|
336
|
-
// Gate on relevance instead of confidence: for direct item candidates,
|
|
337
|
-
// c.confidence reflects extraction certainty (memory_items.confidence),
|
|
338
|
-
// not query-match relevance. Common tokens can produce many high-confidence
|
|
339
|
-
// but weakly relevant items that would skip semantic search exactly when
|
|
340
|
-
// it's needed most. Instead, check lexical score (query-match relevance).
|
|
341
|
-
//
|
|
342
|
-
// Disable early termination when semantic search is unavailable: boosted
|
|
343
|
-
// limits inflate cheap candidate counts, making this gate trigger more
|
|
344
|
-
// easily. Skipping entity retrieval on top of losing semantic search
|
|
345
|
-
// would reduce recall quality further.
|
|
346
|
-
const canTerminateEarly =
|
|
347
|
-
etConfig.enabled &&
|
|
348
|
-
!semanticUnavailable &&
|
|
349
|
-
cheapCandidates.length >= etConfig.minCandidates &&
|
|
350
|
-
cheapCandidates.filter((c) => c.lexical >= etConfig.confidenceThreshold)
|
|
351
|
-
.length >= etConfig.minHighConfidence;
|
|
352
|
-
|
|
353
|
-
// -- Phase 2: entity search + await semantic (skipped on early termination) --
|
|
354
|
-
let semantic: Candidate[] = [];
|
|
355
|
-
let entity: Candidate[] = [];
|
|
356
|
-
let candidateDepths: Map<string, number> | undefined;
|
|
357
|
-
let relationSeedEntityCount = 0;
|
|
358
|
-
let relationTraversedEdgeCount = 0;
|
|
359
|
-
let relationNeighborEntityCount = 0;
|
|
360
|
-
let relationExpandedItemCount = 0;
|
|
361
|
-
|
|
362
|
-
if (!canTerminateEarly) {
|
|
363
|
-
// Start semantic search now that we know early termination won't apply.
|
|
364
|
-
// The network round-trip overlaps with entity search below.
|
|
365
|
-
const semanticPromise = queryVector
|
|
366
|
-
? semanticSearch(
|
|
367
|
-
queryVector,
|
|
368
|
-
opts?.provider ?? "unknown",
|
|
369
|
-
opts?.model ?? "unknown",
|
|
370
|
-
config.memory.retrieval.semanticTopK,
|
|
371
|
-
excludeMessageIds,
|
|
372
|
-
scopeIds,
|
|
373
|
-
).catch((err): Candidate[] => {
|
|
374
|
-
semanticSearchFailed = true;
|
|
375
|
-
semanticSearchError = err;
|
|
376
|
-
if (isQdrantConnectionError(err)) {
|
|
377
|
-
log.warn(
|
|
378
|
-
{ err },
|
|
379
|
-
"Qdrant is unavailable — semantic search disabled, memory recall will be degraded",
|
|
380
|
-
);
|
|
381
|
-
} else {
|
|
382
|
-
log.warn(
|
|
383
|
-
{ err },
|
|
384
|
-
"Semantic search failed, continuing with other retrieval methods",
|
|
385
|
-
);
|
|
386
|
-
}
|
|
387
|
-
return [];
|
|
388
|
-
})
|
|
389
|
-
: null;
|
|
390
|
-
|
|
391
|
-
// Entity search is synchronous — run it while the semantic promise
|
|
392
|
-
// is in flight.
|
|
393
|
-
if (config.memory.entity.enabled) {
|
|
394
|
-
const entitySearchResult = entitySearch(
|
|
395
|
-
query,
|
|
396
|
-
config.memory.entity,
|
|
397
|
-
scopeIds,
|
|
398
|
-
excludeMessageIds,
|
|
399
|
-
);
|
|
400
|
-
entity = entitySearchResult.candidates;
|
|
401
|
-
candidateDepths = entitySearchResult.candidateDepths;
|
|
402
|
-
relationSeedEntityCount = entitySearchResult.relationSeedEntityCount;
|
|
403
|
-
relationTraversedEdgeCount =
|
|
404
|
-
entitySearchResult.relationTraversedEdgeCount;
|
|
405
|
-
relationNeighborEntityCount =
|
|
406
|
-
entitySearchResult.relationNeighborEntityCount;
|
|
407
|
-
relationExpandedItemCount = entitySearchResult.relationExpandedItemCount;
|
|
408
|
-
}
|
|
409
|
-
|
|
410
|
-
if (semanticPromise) {
|
|
411
|
-
semantic = await semanticPromise;
|
|
412
|
-
}
|
|
413
|
-
}
|
|
414
|
-
|
|
415
|
-
if (canTerminateEarly) {
|
|
416
|
-
log.debug(
|
|
417
|
-
{
|
|
418
|
-
cheapCandidateCount: cheapCandidates.length,
|
|
419
|
-
highRelevanceCount: cheapCandidates.filter(
|
|
420
|
-
(c) => c.lexical >= etConfig.confidenceThreshold,
|
|
421
|
-
).length,
|
|
422
|
-
},
|
|
423
|
-
"Early termination: skipping semantic and entity search — sufficient high-relevance candidates from cheap sources",
|
|
424
|
-
);
|
|
425
|
-
}
|
|
426
|
-
|
|
427
|
-
const relationScoreMultiplier =
|
|
428
|
-
config.memory.entity.enabled &&
|
|
429
|
-
config.memory.entity.relationRetrieval.enabled
|
|
430
|
-
? config.memory.entity.relationRetrieval.neighborScoreMultiplier
|
|
431
|
-
: undefined;
|
|
432
|
-
const depthMap =
|
|
433
|
-
config.memory.entity.enabled &&
|
|
434
|
-
config.memory.entity.relationRetrieval.depthDecay
|
|
435
|
-
? candidateDepths
|
|
436
|
-
: undefined;
|
|
437
|
-
const merged = mergeCandidates(
|
|
438
|
-
lexical,
|
|
439
|
-
semantic,
|
|
440
|
-
recency,
|
|
441
|
-
[...entity, ...directItems],
|
|
442
|
-
config.memory.retrieval.freshness,
|
|
443
|
-
relationScoreMultiplier,
|
|
444
|
-
depthMap,
|
|
445
|
-
);
|
|
446
|
-
|
|
447
|
-
return {
|
|
448
|
-
lexical,
|
|
449
|
-
recency,
|
|
450
|
-
semantic,
|
|
451
|
-
entity,
|
|
452
|
-
relationSeedEntityCount,
|
|
453
|
-
relationTraversedEdgeCount,
|
|
454
|
-
relationNeighborEntityCount,
|
|
455
|
-
relationExpandedItemCount,
|
|
456
|
-
earlyTerminated: canTerminateEarly,
|
|
457
|
-
semanticSearchFailed,
|
|
458
|
-
semanticUnavailable,
|
|
459
|
-
semanticSearchError,
|
|
460
|
-
merged,
|
|
461
|
-
};
|
|
462
|
-
}
|
|
463
|
-
|
|
464
130
|
/**
|
|
465
131
|
* Build a structured degradation status describing which retrieval
|
|
466
132
|
* capabilities are unavailable and what fallback sources remain.
|
|
467
133
|
*/
|
|
468
134
|
function buildDegradationStatus(
|
|
469
135
|
reason: DegradationReason,
|
|
470
|
-
|
|
136
|
+
_config: AssistantConfig,
|
|
471
137
|
): DegradationStatus {
|
|
472
|
-
const fallbackSources: FallbackSource[] = [
|
|
473
|
-
"lexical",
|
|
474
|
-
"recency",
|
|
475
|
-
"direct_item",
|
|
476
|
-
];
|
|
477
|
-
if (config.memory.entity.enabled) {
|
|
478
|
-
fallbackSources.push("entity");
|
|
479
|
-
}
|
|
480
138
|
return {
|
|
481
139
|
semanticUnavailable: true,
|
|
482
140
|
reason,
|
|
483
|
-
fallbackSources,
|
|
141
|
+
fallbackSources: ["recency"],
|
|
484
142
|
};
|
|
485
143
|
}
|
|
486
144
|
|
|
@@ -576,86 +234,184 @@ async function generateQueryEmbedding(
|
|
|
576
234
|
return { queryVector, provider, model, degraded, degradation, reason };
|
|
577
235
|
}
|
|
578
236
|
|
|
579
|
-
/** Result of the re-ranking stage. */
|
|
580
|
-
interface RerankResult {
|
|
581
|
-
merged: Candidate[];
|
|
582
|
-
rerankApplied: boolean;
|
|
583
|
-
}
|
|
584
|
-
|
|
585
237
|
/**
|
|
586
|
-
*
|
|
587
|
-
*
|
|
588
|
-
*
|
|
238
|
+
* Memory recall pipeline: hybrid search → tier classification →
|
|
239
|
+
* staleness annotation → two-layer XML injection.
|
|
240
|
+
*
|
|
241
|
+
* Pipeline steps:
|
|
242
|
+
* 1. Build query text (caller provides via buildMemoryQuery)
|
|
243
|
+
* 2. Generate dense + sparse embeddings
|
|
244
|
+
* 3. Hybrid search on Qdrant (dense + sparse RRF fusion)
|
|
245
|
+
* 4. Supplement with recency search (conversation-scoped, DB only)
|
|
246
|
+
* 5. Merge + deduplicate results
|
|
247
|
+
* 6. Classify tiers (score > 0.8 → tier 1, > 0.6 → tier 2)
|
|
248
|
+
* 7. Enrich item candidates with metadata for staleness
|
|
249
|
+
* 8. Compute staleness per item
|
|
250
|
+
* 9. Demote very_stale tier 1 → tier 2
|
|
251
|
+
* 10. Build two-layer XML injection with budget allocation
|
|
589
252
|
*/
|
|
590
|
-
async function
|
|
253
|
+
export async function buildMemoryRecall(
|
|
591
254
|
query: string,
|
|
592
|
-
|
|
255
|
+
conversationId: string,
|
|
593
256
|
config: AssistantConfig,
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
)
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
257
|
+
options?: MemoryRecallOptions,
|
|
258
|
+
): Promise<MemoryRecallResult> {
|
|
259
|
+
const start = Date.now();
|
|
260
|
+
const excludeMessageIds =
|
|
261
|
+
options?.excludeMessageIds?.filter((id) => id.length > 0) ?? [];
|
|
262
|
+
const signal = options?.signal;
|
|
263
|
+
|
|
264
|
+
if (!config.memory.enabled) {
|
|
265
|
+
return emptyResult({
|
|
266
|
+
enabled: false,
|
|
267
|
+
degraded: false,
|
|
268
|
+
reason: "memory.disabled",
|
|
269
|
+
latencyMs: Date.now() - start,
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
if (signal?.aborted) {
|
|
273
|
+
return emptyResult({
|
|
274
|
+
enabled: true,
|
|
275
|
+
degraded: false,
|
|
276
|
+
reason: "memory.aborted",
|
|
277
|
+
latencyMs: Date.now() - start,
|
|
278
|
+
});
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// ── Step 1+2: Generate dense and sparse embeddings ──────────────
|
|
282
|
+
const embeddingResult = await generateQueryEmbedding(
|
|
283
|
+
query,
|
|
284
|
+
config,
|
|
285
|
+
signal,
|
|
286
|
+
start,
|
|
287
|
+
);
|
|
288
|
+
if ("earlyExit" in embeddingResult) return embeddingResult.earlyExit;
|
|
289
|
+
|
|
290
|
+
const { queryVector, provider, model } = embeddingResult;
|
|
291
|
+
|
|
292
|
+
// Generate sparse embedding for the query text (TF-IDF based)
|
|
293
|
+
const sparseVector = generateSparseEmbedding(query);
|
|
294
|
+
const sparseVectorAvailable = sparseVector.indices.length > 0;
|
|
295
|
+
|
|
296
|
+
// ── Step 3: Hybrid search on Qdrant ─────────────────────────────
|
|
297
|
+
const scopePolicy = config.memory.retrieval.scopePolicy;
|
|
298
|
+
const scopeIds = buildScopeFilter(
|
|
299
|
+
options?.scopeId,
|
|
300
|
+
scopePolicy,
|
|
301
|
+
options?.scopePolicyOverride,
|
|
302
|
+
);
|
|
303
|
+
|
|
304
|
+
const HYBRID_LIMIT = 20;
|
|
305
|
+
|
|
306
|
+
let hybridCandidates: Candidate[] = [];
|
|
307
|
+
let semanticSearchFailed = false;
|
|
308
|
+
let sparseVectorUsed = false;
|
|
309
|
+
const hybridSearchStart = Date.now();
|
|
310
|
+
|
|
311
|
+
const qdrantBreakerOpen = isQdrantBreakerOpen();
|
|
312
|
+
if (queryVector && !qdrantBreakerOpen) {
|
|
606
313
|
try {
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
{
|
|
616
|
-
rerankLatencyMs: Date.now() - rerankStart,
|
|
617
|
-
rerankedCount: reranked.length,
|
|
618
|
-
},
|
|
619
|
-
"LLM re-ranking completed",
|
|
314
|
+
hybridCandidates = await semanticSearch(
|
|
315
|
+
queryVector,
|
|
316
|
+
provider ?? "unknown",
|
|
317
|
+
model ?? "unknown",
|
|
318
|
+
HYBRID_LIMIT,
|
|
319
|
+
excludeMessageIds,
|
|
320
|
+
scopeIds,
|
|
321
|
+
sparseVectorAvailable ? sparseVector : undefined,
|
|
620
322
|
);
|
|
323
|
+
sparseVectorUsed = sparseVectorAvailable;
|
|
621
324
|
} catch (err) {
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
reason: "memory.aborted",
|
|
628
|
-
provider,
|
|
629
|
-
model,
|
|
630
|
-
latencyMs: Date.now() - start,
|
|
631
|
-
}),
|
|
632
|
-
};
|
|
325
|
+
semanticSearchFailed = true;
|
|
326
|
+
if (isQdrantConnectionError(err)) {
|
|
327
|
+
log.warn({ err }, "Qdrant unavailable — hybrid search disabled");
|
|
328
|
+
} else {
|
|
329
|
+
log.warn({ err }, "Hybrid search failed, continuing with recency only");
|
|
633
330
|
}
|
|
634
|
-
log.warn(
|
|
635
|
-
{ err, rerankLatencyMs: Date.now() - rerankStart },
|
|
636
|
-
"LLM re-ranking failed, using RRF order",
|
|
637
|
-
);
|
|
638
331
|
}
|
|
639
332
|
}
|
|
333
|
+
const hybridSearchMs = Date.now() - hybridSearchStart;
|
|
640
334
|
|
|
641
|
-
|
|
642
|
-
|
|
335
|
+
// ── Step 4: Recency supplement (DB only, conversation-scoped) ───
|
|
336
|
+
const recencyLimit = 5;
|
|
337
|
+
const recencyCandidates = conversationId
|
|
338
|
+
? recencySearch(conversationId, recencyLimit, excludeMessageIds, scopeIds)
|
|
339
|
+
: [];
|
|
643
340
|
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
)
|
|
658
|
-
|
|
341
|
+
// ── Step 5: Merge and deduplicate ──────────────────────────────
|
|
342
|
+
const candidateMap = new Map<string, Candidate>();
|
|
343
|
+
for (const c of [...hybridCandidates, ...recencyCandidates]) {
|
|
344
|
+
const existing = candidateMap.get(c.key);
|
|
345
|
+
if (!existing) {
|
|
346
|
+
candidateMap.set(c.key, { ...c });
|
|
347
|
+
continue;
|
|
348
|
+
}
|
|
349
|
+
// Keep highest scores from each source
|
|
350
|
+
existing.semantic = Math.max(existing.semantic, c.semantic);
|
|
351
|
+
existing.recency = Math.max(existing.recency, c.recency);
|
|
352
|
+
existing.confidence = Math.max(existing.confidence, c.confidence);
|
|
353
|
+
existing.importance = Math.max(existing.importance, c.importance);
|
|
354
|
+
if (c.text.length > existing.text.length) {
|
|
355
|
+
existing.text = c.text;
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
// Compute RRF-style final scores for the merged candidates
|
|
360
|
+
const allCandidates = [...candidateMap.values()];
|
|
361
|
+
for (const c of allCandidates) {
|
|
362
|
+
// Simple weighted combination — hybrid search already applies RRF fusion
|
|
363
|
+
// at the Qdrant level; here we combine the fused semantic score with recency.
|
|
364
|
+
c.finalScore = c.semantic * 0.7 + c.recency * 0.2 + c.confidence * 0.1;
|
|
365
|
+
}
|
|
366
|
+
allCandidates.sort((a, b) => b.finalScore - a.finalScore);
|
|
367
|
+
|
|
368
|
+
// ── Step 6: Tier classification ─────────────────────────────────
|
|
369
|
+
// Recency-only candidates (semantic=0) can never reach the tier 2 threshold
|
|
370
|
+
// (>0.6) since their max finalScore is 0.3. Promote them directly to tier 2
|
|
371
|
+
// so recent conversation context is preserved even without semantic signal.
|
|
372
|
+
const recencyOnlyKeys = new Set(
|
|
373
|
+
allCandidates
|
|
374
|
+
.filter((c) => c.semantic === 0 && c.recency > 0)
|
|
375
|
+
.map((c) => c.key),
|
|
376
|
+
);
|
|
377
|
+
const tiered = classifyTiers(allCandidates);
|
|
378
|
+
if (recencyOnlyKeys.size > 0) {
|
|
379
|
+
const alreadyTiered = new Set(tiered.map((c) => c.key));
|
|
380
|
+
for (const c of allCandidates) {
|
|
381
|
+
if (recencyOnlyKeys.has(c.key) && !alreadyTiered.has(c.key)) {
|
|
382
|
+
tiered.push({ ...c, tier: 2 });
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
// ── Step 6b: Enrich candidates with source labels ──────────────
|
|
388
|
+
enrichSourceLabels(tiered);
|
|
389
|
+
|
|
390
|
+
// ── Step 7: Enrich with item metadata for staleness ─────────────
|
|
391
|
+
const itemIds = tiered.filter((c) => c.type === "item").map((c) => c.id);
|
|
392
|
+
const itemMetadataMap = enrichItemMetadata(itemIds);
|
|
393
|
+
|
|
394
|
+
// ── Step 8: Compute staleness per item ──────────────────────────
|
|
395
|
+
const now = Date.now();
|
|
396
|
+
for (const c of tiered) {
|
|
397
|
+
if (c.type !== "item") continue;
|
|
398
|
+
const meta = itemMetadataMap.get(c.id);
|
|
399
|
+
if (!meta) continue;
|
|
400
|
+
const { level } = computeStaleness(
|
|
401
|
+
{
|
|
402
|
+
kind: c.kind,
|
|
403
|
+
firstSeenAt: meta.firstSeenAt,
|
|
404
|
+
sourceConversationCount: meta.sourceConversationCount,
|
|
405
|
+
},
|
|
406
|
+
now,
|
|
407
|
+
);
|
|
408
|
+
c.staleness = level;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
// ── Step 9: Demote very_stale tier 1 → tier 2 ──────────────────
|
|
412
|
+
const afterDemotion = applyStaleDemotion(tiered);
|
|
413
|
+
|
|
414
|
+
// ── Step 10: Budget allocation and two-layer injection ──────────
|
|
659
415
|
const maxInjectTokens = Math.max(
|
|
660
416
|
1,
|
|
661
417
|
Math.floor(
|
|
@@ -664,241 +420,265 @@ function formatRecallResult(
|
|
|
664
420
|
),
|
|
665
421
|
);
|
|
666
422
|
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
423
|
+
// Split into sections for two-layer injection
|
|
424
|
+
const identityItems = afterDemotion.filter(
|
|
425
|
+
(c) => c.tier === 1 && IDENTITY_KINDS.has(c.kind),
|
|
426
|
+
);
|
|
427
|
+
const preferences = afterDemotion.filter(
|
|
428
|
+
(c) => c.tier === 1 && PREFERENCE_KINDS.has(c.kind),
|
|
429
|
+
);
|
|
430
|
+
const tier1Candidates = afterDemotion.filter(
|
|
431
|
+
(c) =>
|
|
432
|
+
c.tier === 1 &&
|
|
433
|
+
!IDENTITY_KINDS.has(c.kind) &&
|
|
434
|
+
!PREFERENCE_KINDS.has(c.kind),
|
|
435
|
+
);
|
|
436
|
+
const tier2Candidates = afterDemotion.filter((c) => c.tier === 2);
|
|
437
|
+
|
|
438
|
+
const injectedText = buildTwoLayerInjection({
|
|
439
|
+
identityItems,
|
|
440
|
+
tier1Candidates,
|
|
441
|
+
tier2Candidates,
|
|
442
|
+
preferences,
|
|
443
|
+
totalBudgetTokens: maxInjectTokens,
|
|
670
444
|
});
|
|
671
|
-
const { selected } = formatted;
|
|
672
|
-
const injectedText = formatted.text;
|
|
673
445
|
|
|
674
|
-
|
|
446
|
+
// ── Assemble result ─────────────────────────────────────────────
|
|
447
|
+
const selectedCount =
|
|
448
|
+
identityItems.length +
|
|
449
|
+
tier1Candidates.length +
|
|
450
|
+
tier2Candidates.length +
|
|
451
|
+
preferences.length;
|
|
452
|
+
|
|
453
|
+
const tier1Count = afterDemotion.filter((c) => c.tier === 1).length;
|
|
454
|
+
const tier2Count = afterDemotion.filter((c) => c.tier === 2).length;
|
|
455
|
+
const stalenessStats = {
|
|
456
|
+
fresh: afterDemotion.filter((c) => c.staleness === "fresh").length,
|
|
457
|
+
aging: afterDemotion.filter((c) => c.staleness === "aging").length,
|
|
458
|
+
stale: afterDemotion.filter((c) => c.staleness === "stale").length,
|
|
459
|
+
very_stale: afterDemotion.filter((c) => c.staleness === "very_stale")
|
|
460
|
+
.length,
|
|
461
|
+
};
|
|
462
|
+
|
|
463
|
+
const topCandidates: MemoryRecallCandiateDebug[] = afterDemotion
|
|
675
464
|
.slice(0, 10)
|
|
676
465
|
.map((c) => ({
|
|
677
466
|
key: c.key,
|
|
678
467
|
type: c.type,
|
|
679
468
|
kind: c.kind,
|
|
680
469
|
finalScore: c.finalScore,
|
|
681
|
-
lexical: c.lexical,
|
|
682
470
|
semantic: c.semantic,
|
|
683
471
|
recency: c.recency,
|
|
684
472
|
}));
|
|
685
473
|
|
|
686
474
|
const latencyMs = Date.now() - start;
|
|
475
|
+
|
|
476
|
+
// Propagate degradation from semantic search failure or breaker-open skip
|
|
477
|
+
if (
|
|
478
|
+
semanticSearchFailed ||
|
|
479
|
+
qdrantBreakerOpen ||
|
|
480
|
+
(!queryVector && config.memory.embeddings.required)
|
|
481
|
+
) {
|
|
482
|
+
embeddingResult.degraded = true;
|
|
483
|
+
embeddingResult.reason =
|
|
484
|
+
embeddingResult.reason ??
|
|
485
|
+
(qdrantBreakerOpen
|
|
486
|
+
? "memory.qdrant_breaker_open"
|
|
487
|
+
: "memory.hybrid_search_failure");
|
|
488
|
+
}
|
|
489
|
+
|
|
687
490
|
log.debug(
|
|
688
491
|
{
|
|
689
492
|
query: truncate(query, 120),
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
relationExpandedItemCount: collected.relationExpandedItemCount,
|
|
698
|
-
earlyTerminated: collected.earlyTerminated,
|
|
699
|
-
mergedCount,
|
|
700
|
-
selected: selected.length,
|
|
493
|
+
hybridHits: hybridCandidates.length,
|
|
494
|
+
recencyHits: recencyCandidates.length,
|
|
495
|
+
mergedCount: allCandidates.length,
|
|
496
|
+
tier1Count,
|
|
497
|
+
tier2Count,
|
|
498
|
+
stalenessStats,
|
|
499
|
+
selectedCount,
|
|
701
500
|
maxInjectTokens,
|
|
702
|
-
rerankApplied,
|
|
703
501
|
injectedTokens: estimateTextTokens(injectedText),
|
|
704
502
|
latencyMs,
|
|
705
503
|
},
|
|
706
504
|
"Memory recall completed",
|
|
707
505
|
);
|
|
708
506
|
|
|
709
|
-
|
|
507
|
+
const result: MemoryRecallResult = {
|
|
710
508
|
enabled: true,
|
|
711
|
-
degraded:
|
|
712
|
-
degradation:
|
|
713
|
-
reason:
|
|
714
|
-
provider:
|
|
715
|
-
model:
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
relationSeedEntityCount: collected.relationSeedEntityCount,
|
|
721
|
-
relationTraversedEdgeCount: collected.relationTraversedEdgeCount,
|
|
722
|
-
relationNeighborEntityCount: collected.relationNeighborEntityCount,
|
|
723
|
-
relationExpandedItemCount: collected.relationExpandedItemCount,
|
|
724
|
-
earlyTerminated: collected.earlyTerminated,
|
|
725
|
-
mergedCount,
|
|
726
|
-
selectedCount: selected.length,
|
|
727
|
-
rerankApplied,
|
|
509
|
+
degraded: embeddingResult.degraded,
|
|
510
|
+
degradation: embeddingResult.degradation,
|
|
511
|
+
reason: embeddingResult.reason,
|
|
512
|
+
provider: embeddingResult.provider,
|
|
513
|
+
model: embeddingResult.model,
|
|
514
|
+
semanticHits: hybridCandidates.length,
|
|
515
|
+
recencyHits: recencyCandidates.length,
|
|
516
|
+
mergedCount: allCandidates.length,
|
|
517
|
+
selectedCount,
|
|
728
518
|
injectedTokens: estimateTextTokens(injectedText),
|
|
729
519
|
injectedText,
|
|
730
520
|
latencyMs,
|
|
731
521
|
topCandidates,
|
|
522
|
+
tier1Count,
|
|
523
|
+
tier2Count,
|
|
524
|
+
hybridSearchMs,
|
|
525
|
+
sparseVectorUsed,
|
|
732
526
|
};
|
|
527
|
+
|
|
528
|
+
return result;
|
|
733
529
|
}
|
|
734
530
|
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
latencyMs: Date.now() - start,
|
|
752
|
-
});
|
|
753
|
-
}
|
|
754
|
-
if (signal?.aborted) {
|
|
755
|
-
return emptyResult({
|
|
756
|
-
enabled: true,
|
|
757
|
-
degraded: false,
|
|
758
|
-
reason: "memory.aborted",
|
|
759
|
-
latencyMs: Date.now() - start,
|
|
760
|
-
});
|
|
761
|
-
}
|
|
531
|
+
/**
|
|
532
|
+
* Enrich item candidates with metadata needed for staleness computation:
|
|
533
|
+
* - firstSeenAt: when the item was first extracted
|
|
534
|
+
* - sourceConversationCount: number of distinct conversations that sourced this item
|
|
535
|
+
*/
|
|
536
|
+
function enrichItemMetadata(
|
|
537
|
+
itemIds: string[],
|
|
538
|
+
): Map<
|
|
539
|
+
string,
|
|
540
|
+
{ firstSeenAt: number; sourceConversationCount: number; kind: string }
|
|
541
|
+
> {
|
|
542
|
+
const result = new Map<
|
|
543
|
+
string,
|
|
544
|
+
{ firstSeenAt: number; sourceConversationCount: number; kind: string }
|
|
545
|
+
>();
|
|
546
|
+
if (itemIds.length === 0) return result;
|
|
762
547
|
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
const cached = getCachedRecall(
|
|
766
|
-
query,
|
|
767
|
-
conversationId,
|
|
768
|
-
options,
|
|
769
|
-
configFingerprint,
|
|
770
|
-
);
|
|
771
|
-
if (cached) {
|
|
772
|
-
log.debug(
|
|
773
|
-
{ query: truncate(query, 120), latencyMs: Date.now() - start },
|
|
774
|
-
"Memory recall served from cache",
|
|
775
|
-
);
|
|
776
|
-
return { ...cached, latencyMs: Date.now() - start };
|
|
777
|
-
}
|
|
548
|
+
try {
|
|
549
|
+
const db = getDb();
|
|
778
550
|
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
551
|
+
// Fetch firstSeenAt and kind from memory_items
|
|
552
|
+
const items = db
|
|
553
|
+
.select({
|
|
554
|
+
id: memoryItems.id,
|
|
555
|
+
firstSeenAt: memoryItems.firstSeenAt,
|
|
556
|
+
kind: memoryItems.kind,
|
|
557
|
+
})
|
|
558
|
+
.from(memoryItems)
|
|
559
|
+
.where(inArray(memoryItems.id, itemIds))
|
|
560
|
+
.all();
|
|
787
561
|
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
provider: embeddingResult.provider,
|
|
794
|
-
model: embeddingResult.model,
|
|
795
|
-
conversationId,
|
|
796
|
-
excludeMessageIds,
|
|
797
|
-
scopeId: options?.scopeId,
|
|
798
|
-
scopePolicyOverride: options?.scopePolicyOverride,
|
|
799
|
-
});
|
|
800
|
-
} catch (err) {
|
|
801
|
-
if (signal?.aborted || isAbortError(err)) {
|
|
802
|
-
return emptyResult({
|
|
803
|
-
enabled: true,
|
|
804
|
-
degraded: false,
|
|
805
|
-
reason: "memory.aborted",
|
|
806
|
-
provider: embeddingResult.provider,
|
|
807
|
-
model: embeddingResult.model,
|
|
808
|
-
latencyMs: Date.now() - start,
|
|
562
|
+
for (const item of items) {
|
|
563
|
+
result.set(item.id, {
|
|
564
|
+
firstSeenAt: item.firstSeenAt,
|
|
565
|
+
kind: item.kind,
|
|
566
|
+
sourceConversationCount: 1, // default, updated below
|
|
809
567
|
});
|
|
810
568
|
}
|
|
569
|
+
|
|
570
|
+
// Compute sourceConversationCount: count distinct conversation IDs
|
|
571
|
+
// across the memory_item_sources → messages join.
|
|
572
|
+
const sourceCountRows = db
|
|
573
|
+
.select({
|
|
574
|
+
memoryItemId: memoryItemSources.memoryItemId,
|
|
575
|
+
conversationCount:
|
|
576
|
+
sql<number>`COUNT(DISTINCT ${messages.conversationId})`.as(
|
|
577
|
+
"conversation_count",
|
|
578
|
+
),
|
|
579
|
+
})
|
|
580
|
+
.from(memoryItemSources)
|
|
581
|
+
.innerJoin(messages, sql`${memoryItemSources.messageId} = ${messages.id}`)
|
|
582
|
+
.where(inArray(memoryItemSources.memoryItemId, itemIds))
|
|
583
|
+
.groupBy(memoryItemSources.memoryItemId)
|
|
584
|
+
.all();
|
|
585
|
+
|
|
586
|
+
for (const row of sourceCountRows) {
|
|
587
|
+
const existing = result.get(row.memoryItemId);
|
|
588
|
+
if (existing) {
|
|
589
|
+
existing.sourceConversationCount = row.conversationCount;
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
} catch (err) {
|
|
811
593
|
log.warn(
|
|
812
594
|
{ err },
|
|
813
|
-
"
|
|
595
|
+
"Failed to enrich item metadata for staleness computation",
|
|
814
596
|
);
|
|
815
|
-
return emptyResult({
|
|
816
|
-
enabled: true,
|
|
817
|
-
degraded: true,
|
|
818
|
-
reason: `memory.retrieval_failure: ${
|
|
819
|
-
err instanceof Error ? err.message : String(err)
|
|
820
|
-
}`,
|
|
821
|
-
provider: embeddingResult.provider,
|
|
822
|
-
model: embeddingResult.model,
|
|
823
|
-
latencyMs: Date.now() - start,
|
|
824
|
-
});
|
|
825
597
|
}
|
|
826
598
|
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
// are marked degraded and excluded from the recall cache — preventing
|
|
830
|
-
// stale boosted results from being served after the breaker closes.
|
|
831
|
-
//
|
|
832
|
-
// Exception: when semanticUnavailable is solely because no embedding
|
|
833
|
-
// provider is configured (queryVector == null) and embeddings are not
|
|
834
|
-
// required, lexical-only results are the expected steady state — do not
|
|
835
|
-
// mark as degraded.
|
|
836
|
-
const semanticActuallyFailed =
|
|
837
|
-
collected.semanticSearchFailed ||
|
|
838
|
-
(collected.semanticUnavailable &&
|
|
839
|
-
(embeddingResult.queryVector != null ||
|
|
840
|
-
config.memory.embeddings.required));
|
|
841
|
-
if (semanticActuallyFailed) {
|
|
842
|
-
embeddingResult.degraded = true;
|
|
843
|
-
embeddingResult.reason =
|
|
844
|
-
embeddingResult.reason ??
|
|
845
|
-
(collected.semanticUnavailable
|
|
846
|
-
? embeddingResult.queryVector != null
|
|
847
|
-
? "memory.qdrant_circuit_open"
|
|
848
|
-
: "memory.embedding_unavailable"
|
|
849
|
-
: "memory.semantic_search_failure");
|
|
850
|
-
if (!embeddingResult.degradation) {
|
|
851
|
-
const isQdrantIssue =
|
|
852
|
-
embeddingResult.queryVector != null ||
|
|
853
|
-
isQdrantConnectionError(collected.semanticSearchError) ||
|
|
854
|
-
collected.semanticSearchError instanceof QdrantCircuitOpenError;
|
|
855
|
-
const reason: DegradationReason = isQdrantIssue
|
|
856
|
-
? "qdrant_unavailable"
|
|
857
|
-
: "embedding_generation_failed";
|
|
858
|
-
embeddingResult.degradation = buildDegradationStatus(reason, config);
|
|
859
|
-
}
|
|
860
|
-
}
|
|
599
|
+
return result;
|
|
600
|
+
}
|
|
861
601
|
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
602
|
+
/**
|
|
603
|
+
* Enrich tiered candidates with source labels (conversation titles).
|
|
604
|
+
*
|
|
605
|
+
* For "item" candidates: joins through memoryItemSources → messages → conversations
|
|
606
|
+
* to find the most recent conversation title associated with the item.
|
|
607
|
+
* For "segment" / "summary" candidates: looks up the conversation title directly
|
|
608
|
+
* via the candidate's key (which contains the conversationId for segments).
|
|
609
|
+
*
|
|
610
|
+
* Mutates the candidates in-place for efficiency.
|
|
611
|
+
*/
|
|
612
|
+
function enrichSourceLabels(candidates: TieredCandidate[]): void {
|
|
613
|
+
if (candidates.length === 0) return;
|
|
873
614
|
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
query,
|
|
877
|
-
collected,
|
|
878
|
-
rerankResult.merged,
|
|
879
|
-
rerankResult.rerankApplied,
|
|
880
|
-
config,
|
|
881
|
-
options,
|
|
882
|
-
embeddingResult,
|
|
883
|
-
start,
|
|
884
|
-
);
|
|
615
|
+
try {
|
|
616
|
+
const db = getDb();
|
|
885
617
|
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
618
|
+
// Collect item IDs for items that need source label lookup
|
|
619
|
+
const itemCandidates = candidates.filter((c) => c.type === "item");
|
|
620
|
+
const itemIds = itemCandidates.map((c) => c.id);
|
|
621
|
+
|
|
622
|
+
if (itemIds.length > 0) {
|
|
623
|
+
// For items: find conversation titles via memoryItemSources → messages → conversations.
|
|
624
|
+
// Pick the most recent conversation title per item.
|
|
625
|
+
const rows = db
|
|
626
|
+
.select({
|
|
627
|
+
memoryItemId: memoryItemSources.memoryItemId,
|
|
628
|
+
title: conversations.title,
|
|
629
|
+
conversationUpdatedAt: conversations.updatedAt,
|
|
630
|
+
})
|
|
631
|
+
.from(memoryItemSources)
|
|
632
|
+
.innerJoin(
|
|
633
|
+
messages,
|
|
634
|
+
sql`${memoryItemSources.messageId} = ${messages.id}`,
|
|
635
|
+
)
|
|
636
|
+
.innerJoin(
|
|
637
|
+
conversations,
|
|
638
|
+
sql`${messages.conversationId} = ${conversations.id}`,
|
|
639
|
+
)
|
|
640
|
+
.where(inArray(memoryItemSources.memoryItemId, itemIds))
|
|
641
|
+
.all();
|
|
642
|
+
|
|
643
|
+
// Group by item ID and pick the most recently updated conversation title
|
|
644
|
+
const titleMap = new Map<string, string>();
|
|
645
|
+
const updatedAtMap = new Map<string, number>();
|
|
646
|
+
for (const row of rows) {
|
|
647
|
+
if (!row.title) continue;
|
|
648
|
+
const existing = updatedAtMap.get(row.memoryItemId);
|
|
649
|
+
if (existing === undefined || row.conversationUpdatedAt > existing) {
|
|
650
|
+
titleMap.set(row.memoryItemId, row.title);
|
|
651
|
+
updatedAtMap.set(row.memoryItemId, row.conversationUpdatedAt);
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
for (const c of itemCandidates) {
|
|
656
|
+
const title = titleMap.get(c.id);
|
|
657
|
+
if (title) {
|
|
658
|
+
c.sourceLabel = title;
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
// For segment candidates: the key format is "seg:<segmentId>" and the id is the segment's id.
|
|
664
|
+
// We can look up the conversation title via the segment's conversationId in memory_segments.
|
|
665
|
+
// However, segments already reference a conversationId in the schema — but the Candidate type
|
|
666
|
+
// doesn't carry it. For now, skip segment source labels as the join path would require
|
|
667
|
+
// importing memorySegments and an additional query. The primary value is item source labels.
|
|
668
|
+
} catch (err) {
|
|
669
|
+
log.warn({ err }, "Failed to enrich candidates with source labels");
|
|
898
670
|
}
|
|
899
|
-
return result;
|
|
900
671
|
}
|
|
901
672
|
|
|
673
|
+
/**
|
|
674
|
+
* Strip memory recall messages from the conversation history.
|
|
675
|
+
*
|
|
676
|
+
* Handles both exact text matching and `<memory_context>` XML wrapper
|
|
677
|
+
* detection: when the recall text starts with `<memory_context>`, we
|
|
678
|
+
* also match user messages whose sole text block starts with the same
|
|
679
|
+
* tag (covering cases where the exact text differs slightly due to
|
|
680
|
+
* dynamic content).
|
|
681
|
+
*/
|
|
902
682
|
export function stripMemoryRecallMessages<
|
|
903
683
|
T extends {
|
|
904
684
|
role: "user" | "assistant";
|
|
@@ -918,6 +698,25 @@ export function stripMemoryRecallMessages<
|
|
|
918
698
|
msg.content[0].type === "text" &&
|
|
919
699
|
msg.content[0].text === MEMORY_CONTEXT_ACK;
|
|
920
700
|
|
|
701
|
+
// Check if the recall text uses the <memory_context> XML format
|
|
702
|
+
const isMemoryContextFormat = recallText
|
|
703
|
+
.trimStart()
|
|
704
|
+
.startsWith("<memory_context>");
|
|
705
|
+
|
|
706
|
+
// Helper: does a text block match the recall text?
|
|
707
|
+
const textMatches = (text: string | undefined): boolean => {
|
|
708
|
+
if (!text) return false;
|
|
709
|
+
if (text === recallText) return true;
|
|
710
|
+
// For <memory_context> format, match any block that starts with the tag
|
|
711
|
+
if (
|
|
712
|
+
isMemoryContextFormat &&
|
|
713
|
+
text.trimStart().startsWith("<memory_context>")
|
|
714
|
+
) {
|
|
715
|
+
return true;
|
|
716
|
+
}
|
|
717
|
+
return false;
|
|
718
|
+
};
|
|
719
|
+
|
|
921
720
|
// Prefer the canonical separate_context_message pair: a user message whose
|
|
922
721
|
// sole text block is the recall text, followed by an assistant ack. This
|
|
923
722
|
// must be checked first so that a real user message that happens to contain
|
|
@@ -928,7 +727,7 @@ export function stripMemoryRecallMessages<
|
|
|
928
727
|
if (msg.role !== "user") continue;
|
|
929
728
|
if (msg.content.length !== 1) continue;
|
|
930
729
|
const block = msg.content[0];
|
|
931
|
-
if (block.type !== "text" || block.text
|
|
730
|
+
if (block.type !== "text" || !textMatches(block.text)) continue;
|
|
932
731
|
const next = messages[i + 1];
|
|
933
732
|
if (next && isAck(next)) {
|
|
934
733
|
return [...messages.slice(0, i), ...messages.slice(i + 2)];
|
|
@@ -937,7 +736,7 @@ export function stripMemoryRecallMessages<
|
|
|
937
736
|
}
|
|
938
737
|
|
|
939
738
|
// Fall back to generic text-match removal: find the last user message
|
|
940
|
-
// containing the recall text block
|
|
739
|
+
// containing the recall text block.
|
|
941
740
|
let targetIndex = -1;
|
|
942
741
|
let blockIndex = -1;
|
|
943
742
|
for (let i = messages.length - 1; i >= 0; i--) {
|
|
@@ -945,7 +744,7 @@ export function stripMemoryRecallMessages<
|
|
|
945
744
|
if (msg.role !== "user" || msg.content.length === 0) continue;
|
|
946
745
|
for (let bi = msg.content.length - 1; bi >= 0; bi--) {
|
|
947
746
|
const block = msg.content[bi];
|
|
948
|
-
if (block.type === "text" && block.text
|
|
747
|
+
if (block.type === "text" && textMatches(block.text)) {
|
|
949
748
|
targetIndex = i;
|
|
950
749
|
blockIndex = bi;
|
|
951
750
|
break;
|
|
@@ -983,21 +782,6 @@ export function stripMemoryRecallMessages<
|
|
|
983
782
|
return cleaned;
|
|
984
783
|
}
|
|
985
784
|
|
|
986
|
-
export function injectMemoryRecallIntoUserMessage<
|
|
987
|
-
T extends {
|
|
988
|
-
role: "user" | "assistant";
|
|
989
|
-
content: Array<{ type: string; text?: string }>;
|
|
990
|
-
},
|
|
991
|
-
>(message: T, memoryRecallText: string): T {
|
|
992
|
-
if (message.role !== "user") return message;
|
|
993
|
-
if (memoryRecallText.trim().length === 0) return message;
|
|
994
|
-
const memoryBlock = { type: "text", text: memoryRecallText } as const;
|
|
995
|
-
return {
|
|
996
|
-
...message,
|
|
997
|
-
content: [memoryBlock, ...message.content] as T["content"],
|
|
998
|
-
} as T;
|
|
999
|
-
}
|
|
1000
|
-
|
|
1001
785
|
/**
|
|
1002
786
|
* Inject memory recall as a separate user+assistant message pair before the
|
|
1003
787
|
* last user message. This separates memory context from the user's actual
|
|
@@ -1049,18 +833,10 @@ function emptyResult(
|
|
|
1049
833
|
reason: init.reason,
|
|
1050
834
|
provider: init.provider,
|
|
1051
835
|
model: init.model,
|
|
1052
|
-
lexicalHits: 0,
|
|
1053
836
|
semanticHits: 0,
|
|
1054
837
|
recencyHits: 0,
|
|
1055
|
-
entityHits: 0,
|
|
1056
|
-
relationSeedEntityCount: 0,
|
|
1057
|
-
relationTraversedEdgeCount: 0,
|
|
1058
|
-
relationNeighborEntityCount: 0,
|
|
1059
|
-
relationExpandedItemCount: 0,
|
|
1060
|
-
earlyTerminated: false,
|
|
1061
838
|
mergedCount: 0,
|
|
1062
839
|
selectedCount: 0,
|
|
1063
|
-
rerankApplied: false,
|
|
1064
840
|
injectedTokens: 0,
|
|
1065
841
|
injectedText: "",
|
|
1066
842
|
latencyMs: init.latencyMs,
|