@vellumai/assistant 0.4.49 → 0.4.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +24 -33
- package/README.md +3 -3
- package/docs/architecture/memory.md +180 -119
- package/package.json +2 -2
- package/src/__tests__/agent-loop.test.ts +3 -1
- package/src/__tests__/anthropic-provider.test.ts +114 -23
- package/src/__tests__/approval-cascade.test.ts +1 -15
- package/src/__tests__/approval-routes-http.test.ts +2 -0
- package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
- package/src/__tests__/canonical-guardian-store.test.ts +95 -0
- package/src/__tests__/checker.test.ts +13 -0
- package/src/__tests__/config-schema.test.ts +1 -68
- package/src/__tests__/context-memory-e2e.test.ts +11 -100
- package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
- package/src/__tests__/credential-security-e2e.test.ts +1 -0
- package/src/__tests__/credential-vault-unit.test.ts +4 -0
- package/src/__tests__/credential-vault.test.ts +13 -1
- package/src/__tests__/cu-unified-flow.test.ts +532 -0
- package/src/__tests__/date-context.test.ts +93 -77
- package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
- package/src/__tests__/guardian-routing-invariants.test.ts +93 -0
- package/src/__tests__/history-repair.test.ts +245 -0
- package/src/__tests__/host-cu-proxy.test.ts +165 -3
- package/src/__tests__/http-user-message-parity.test.ts +1 -0
- package/src/__tests__/invite-redemption-service.test.ts +65 -1
- package/src/__tests__/keychain-broker-client.test.ts +4 -4
- package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
- package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
- package/src/__tests__/memory-recall-quality.test.ts +244 -407
- package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
- package/src/__tests__/memory-regressions.test.ts +477 -2841
- package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
- package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
- package/src/__tests__/mime-builder.test.ts +28 -0
- package/src/__tests__/native-web-search.test.ts +1 -0
- package/src/__tests__/oauth-cli.test.ts +572 -5
- package/src/__tests__/oauth-store.test.ts +120 -6
- package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
- package/src/__tests__/registry.test.ts +0 -1
- package/src/__tests__/relay-server.test.ts +46 -1
- package/src/__tests__/schedule-tools.test.ts +32 -0
- package/src/__tests__/script-proxy-certs.test.ts +1 -1
- package/src/__tests__/secret-onetime-send.test.ts +1 -0
- package/src/__tests__/secure-keys.test.ts +7 -2
- package/src/__tests__/send-endpoint-busy.test.ts +3 -0
- package/src/__tests__/session-abort-tool-results.test.ts +1 -14
- package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
- package/src/__tests__/session-agent-loop.test.ts +19 -15
- package/src/__tests__/session-confirmation-signals.test.ts +1 -15
- package/src/__tests__/session-error.test.ts +124 -2
- package/src/__tests__/session-history-web-search.test.ts +918 -0
- package/src/__tests__/session-pre-run-repair.test.ts +1 -14
- package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
- package/src/__tests__/session-queue.test.ts +37 -27
- package/src/__tests__/session-runtime-assembly.test.ts +54 -0
- package/src/__tests__/session-slash-known.test.ts +1 -15
- package/src/__tests__/session-slash-queue.test.ts +1 -15
- package/src/__tests__/session-slash-unknown.test.ts +1 -15
- package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
- package/src/__tests__/session-workspace-injection.test.ts +3 -37
- package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
- package/src/__tests__/skills-install-extract.test.ts +93 -0
- package/src/__tests__/skillssh-registry.test.ts +451 -0
- package/src/__tests__/trust-store.test.ts +15 -0
- package/src/__tests__/voice-invite-redemption.test.ts +32 -1
- package/src/agent/ax-tree-compaction.test.ts +51 -0
- package/src/agent/loop.ts +39 -12
- package/src/approvals/AGENTS.md +1 -1
- package/src/approvals/guardian-request-resolvers.ts +14 -2
- package/src/bundler/compiler-tools.ts +66 -2
- package/src/calls/call-domain.ts +132 -0
- package/src/calls/call-store.ts +6 -0
- package/src/calls/relay-server.ts +43 -5
- package/src/calls/relay-setup-router.ts +17 -1
- package/src/calls/twilio-config.ts +1 -1
- package/src/calls/types.ts +3 -1
- package/src/cli/commands/doctor.ts +4 -3
- package/src/cli/commands/mcp.ts +46 -59
- package/src/cli/commands/memory.ts +16 -165
- package/src/cli/commands/oauth/apps.ts +31 -2
- package/src/cli/commands/oauth/connections.ts +431 -97
- package/src/cli/commands/oauth/providers.ts +15 -1
- package/src/cli/commands/sessions.ts +5 -2
- package/src/cli/commands/skills.ts +173 -1
- package/src/cli/http-client.ts +0 -20
- package/src/cli/main-screen.tsx +2 -2
- package/src/cli/program.ts +5 -6
- package/src/cli.ts +4 -10
- package/src/config/bundled-skills/computer-use/TOOLS.json +1 -1
- package/src/config/bundled-skills/computer-use/tools/computer-use-observe.ts +12 -0
- package/src/config/bundled-tool-registry.ts +2 -5
- package/src/config/schema.ts +1 -12
- package/src/config/schemas/memory-lifecycle.ts +0 -9
- package/src/config/schemas/memory-processing.ts +0 -180
- package/src/config/schemas/memory-retrieval.ts +32 -104
- package/src/config/schemas/memory.ts +0 -10
- package/src/config/types.ts +0 -4
- package/src/context/window-manager.ts +4 -1
- package/src/daemon/config-watcher.ts +61 -3
- package/src/daemon/daemon-control.ts +1 -1
- package/src/daemon/date-context.ts +114 -31
- package/src/daemon/handlers/sessions.ts +18 -13
- package/src/daemon/handlers/skills.ts +20 -1
- package/src/daemon/history-repair.ts +72 -8
- package/src/daemon/host-cu-proxy.ts +55 -26
- package/src/daemon/lifecycle.ts +31 -3
- package/src/daemon/mcp-reload-service.ts +2 -2
- package/src/daemon/message-types/computer-use.ts +1 -12
- package/src/daemon/message-types/memory.ts +4 -16
- package/src/daemon/message-types/messages.ts +1 -0
- package/src/daemon/message-types/sessions.ts +4 -0
- package/src/daemon/server.ts +12 -1
- package/src/daemon/session-agent-loop-handlers.ts +38 -0
- package/src/daemon/session-agent-loop.ts +334 -48
- package/src/daemon/session-error.ts +89 -6
- package/src/daemon/session-history.ts +17 -7
- package/src/daemon/session-media-retry.ts +6 -2
- package/src/daemon/session-memory.ts +69 -149
- package/src/daemon/session-process.ts +10 -1
- package/src/daemon/session-runtime-assembly.ts +49 -19
- package/src/daemon/session-surfaces.ts +4 -1
- package/src/daemon/session-tool-setup.ts +7 -1
- package/src/daemon/session.ts +12 -2
- package/src/instrument.ts +61 -1
- package/src/memory/admin.ts +2 -191
- package/src/memory/canonical-guardian-store.ts +38 -2
- package/src/memory/conversation-crud.ts +0 -33
- package/src/memory/conversation-queries.ts +22 -3
- package/src/memory/db-init.ts +28 -0
- package/src/memory/embedding-backend.ts +84 -8
- package/src/memory/embedding-types.ts +9 -1
- package/src/memory/indexer.ts +7 -46
- package/src/memory/items-extractor.ts +274 -76
- package/src/memory/job-handlers/backfill.ts +2 -127
- package/src/memory/job-handlers/cleanup.ts +2 -16
- package/src/memory/job-handlers/extraction.ts +2 -138
- package/src/memory/job-handlers/index-maintenance.ts +1 -6
- package/src/memory/job-handlers/summarization.ts +3 -148
- package/src/memory/job-utils.ts +21 -59
- package/src/memory/jobs-store.ts +1 -159
- package/src/memory/jobs-worker.ts +9 -52
- package/src/memory/migrations/104-core-indexes.ts +3 -3
- package/src/memory/migrations/149-oauth-tables.ts +2 -0
- package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
- package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
- package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
- package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
- package/src/memory/migrations/154-drop-fts.ts +20 -0
- package/src/memory/migrations/155-drop-conflicts.ts +7 -0
- package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
- package/src/memory/migrations/index.ts +7 -0
- package/src/memory/qdrant-client.ts +148 -51
- package/src/memory/raw-query.ts +1 -1
- package/src/memory/retriever.test.ts +294 -273
- package/src/memory/retriever.ts +421 -645
- package/src/memory/schema/calls.ts +2 -0
- package/src/memory/schema/memory-core.ts +3 -48
- package/src/memory/schema/oauth.ts +2 -0
- package/src/memory/search/formatting.ts +263 -176
- package/src/memory/search/lexical.ts +1 -254
- package/src/memory/search/ranking.ts +0 -455
- package/src/memory/search/semantic.ts +100 -14
- package/src/memory/search/staleness.ts +47 -0
- package/src/memory/search/tier-classifier.ts +21 -0
- package/src/memory/search/types.ts +15 -77
- package/src/memory/task-memory-cleanup.ts +4 -6
- package/src/messaging/providers/gmail/mime-builder.ts +17 -7
- package/src/oauth/byo-connection.test.ts +8 -1
- package/src/oauth/oauth-store.ts +113 -27
- package/src/oauth/seed-providers.ts +6 -0
- package/src/oauth/token-persistence.ts +11 -3
- package/src/permissions/defaults.ts +1 -0
- package/src/permissions/trust-store.ts +23 -1
- package/src/playbooks/playbook-compiler.ts +1 -1
- package/src/prompts/system-prompt.ts +18 -2
- package/src/providers/anthropic/client.ts +56 -126
- package/src/providers/types.ts +7 -1
- package/src/runtime/AGENTS.md +9 -0
- package/src/runtime/auth/route-policy.ts +6 -3
- package/src/runtime/guardian-reply-router.ts +24 -22
- package/src/runtime/http-server.ts +2 -2
- package/src/runtime/invite-redemption-service.ts +19 -1
- package/src/runtime/invite-service.ts +25 -0
- package/src/runtime/pending-interactions.ts +2 -2
- package/src/runtime/routes/brain-graph-routes.ts +10 -90
- package/src/runtime/routes/conversation-routes.ts +9 -1
- package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
- package/src/runtime/routes/memory-item-routes.test.ts +754 -0
- package/src/runtime/routes/memory-item-routes.ts +503 -0
- package/src/runtime/routes/session-management-routes.ts +3 -3
- package/src/runtime/routes/settings-routes.ts +2 -2
- package/src/runtime/routes/trust-rules-routes.ts +14 -0
- package/src/runtime/routes/workspace-routes.ts +2 -1
- package/src/security/keychain-broker-client.ts +17 -4
- package/src/security/secure-keys.ts +25 -3
- package/src/security/token-manager.ts +36 -36
- package/src/skills/catalog-install.ts +74 -18
- package/src/skills/skillssh-registry.ts +503 -0
- package/src/tools/assets/search.ts +5 -1
- package/src/tools/computer-use/definitions.ts +0 -10
- package/src/tools/computer-use/registry.ts +1 -1
- package/src/tools/credentials/vault.ts +1 -3
- package/src/tools/memory/definitions.ts +4 -13
- package/src/tools/memory/handlers.test.ts +83 -103
- package/src/tools/memory/handlers.ts +50 -85
- package/src/tools/schedule/create.ts +8 -1
- package/src/tools/schedule/update.ts +8 -1
- package/src/tools/skills/load.ts +25 -2
- package/src/__tests__/clarification-resolver.test.ts +0 -193
- package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
- package/src/__tests__/conflict-policy.test.ts +0 -269
- package/src/__tests__/conflict-store.test.ts +0 -372
- package/src/__tests__/contradiction-checker.test.ts +0 -361
- package/src/__tests__/entity-extractor.test.ts +0 -211
- package/src/__tests__/entity-search.test.ts +0 -1117
- package/src/__tests__/profile-compiler.test.ts +0 -392
- package/src/__tests__/session-conflict-gate.test.ts +0 -1228
- package/src/__tests__/session-profile-injection.test.ts +0 -557
- package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
- package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
- package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
- package/src/daemon/session-conflict-gate.ts +0 -167
- package/src/daemon/session-dynamic-profile.ts +0 -77
- package/src/memory/clarification-resolver.ts +0 -417
- package/src/memory/conflict-intent.ts +0 -205
- package/src/memory/conflict-policy.ts +0 -127
- package/src/memory/conflict-store.ts +0 -410
- package/src/memory/contradiction-checker.ts +0 -508
- package/src/memory/entity-extractor.ts +0 -535
- package/src/memory/format-recall.ts +0 -47
- package/src/memory/fts-reconciler.ts +0 -165
- package/src/memory/job-handlers/conflict.ts +0 -200
- package/src/memory/profile-compiler.ts +0 -195
- package/src/memory/recall-cache.ts +0 -117
- package/src/memory/search/entity.ts +0 -535
- package/src/memory/search/query-expansion.test.ts +0 -70
- package/src/memory/search/query-expansion.ts +0 -118
- package/src/runtime/routes/mcp-routes.ts +0 -20
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { and, eq, sql } from "drizzle-orm";
|
|
1
|
+
import { and, eq, like, sql } from "drizzle-orm";
|
|
2
2
|
import { v4 as uuid } from "uuid";
|
|
3
3
|
|
|
4
4
|
import { getConfig } from "../config/loader.js";
|
|
@@ -15,30 +15,23 @@ import { getDb } from "./db.js";
|
|
|
15
15
|
import { computeMemoryFingerprint } from "./fingerprint.js";
|
|
16
16
|
import { enqueueMemoryJob } from "./jobs-store.js";
|
|
17
17
|
import { extractTextFromStoredMessageContent } from "./message-content.js";
|
|
18
|
-
import {
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
memoryItemSources,
|
|
22
|
-
messages,
|
|
23
|
-
} from "./schema.js";
|
|
18
|
+
import { withQdrantBreaker } from "./qdrant-circuit-breaker.js";
|
|
19
|
+
import { getQdrantClient } from "./qdrant-client.js";
|
|
20
|
+
import { memoryItems, memoryItemSources, messages } from "./schema.js";
|
|
24
21
|
import { isConversationFailed } from "./task-memory-cleanup.js";
|
|
25
22
|
import { clampUnitInterval } from "./validation.js";
|
|
26
23
|
|
|
27
24
|
const log = getLogger("memory-items-extractor");
|
|
28
25
|
|
|
29
26
|
export type MemoryItemKind =
|
|
27
|
+
| "identity"
|
|
30
28
|
| "preference"
|
|
31
|
-
| "profile"
|
|
32
29
|
| "project"
|
|
33
30
|
| "decision"
|
|
34
|
-
| "todo"
|
|
35
|
-
| "fact"
|
|
36
31
|
| "constraint"
|
|
37
|
-
| "
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
| "instruction"
|
|
41
|
-
| "style";
|
|
32
|
+
| "event";
|
|
33
|
+
|
|
34
|
+
export type OverrideConfidence = "explicit" | "tentative" | "inferred";
|
|
42
35
|
|
|
43
36
|
interface ExtractedItem {
|
|
44
37
|
kind: MemoryItemKind;
|
|
@@ -47,26 +40,37 @@ interface ExtractedItem {
|
|
|
47
40
|
confidence: number;
|
|
48
41
|
importance: number;
|
|
49
42
|
fingerprint: string;
|
|
43
|
+
supersedes: string | null;
|
|
44
|
+
overrideConfidence: OverrideConfidence;
|
|
45
|
+
/** True when the LLM emitted a supersedes ID that was rejected (hallucinated). */
|
|
46
|
+
supersedesRejected?: boolean;
|
|
50
47
|
}
|
|
51
48
|
|
|
52
49
|
const VALID_KINDS = new Set<string>([
|
|
50
|
+
"identity",
|
|
53
51
|
"preference",
|
|
54
|
-
"profile",
|
|
55
52
|
"project",
|
|
56
53
|
"decision",
|
|
57
|
-
"todo",
|
|
58
|
-
"fact",
|
|
59
54
|
"constraint",
|
|
60
|
-
"relationship",
|
|
61
55
|
"event",
|
|
62
|
-
"opinion",
|
|
63
|
-
"instruction",
|
|
64
|
-
"style",
|
|
65
56
|
]);
|
|
66
57
|
|
|
58
|
+
/** Maps old kind names to their new equivalents for graceful migration. */
|
|
59
|
+
const KIND_MIGRATION_MAP: Record<string, MemoryItemKind> = {
|
|
60
|
+
profile: "identity",
|
|
61
|
+
fact: "identity",
|
|
62
|
+
relationship: "identity",
|
|
63
|
+
opinion: "preference",
|
|
64
|
+
todo: "project",
|
|
65
|
+
instruction: "constraint",
|
|
66
|
+
style: "preference",
|
|
67
|
+
};
|
|
68
|
+
|
|
67
69
|
const SUPERSEDE_KINDS = new Set<MemoryItemKind>([
|
|
68
|
-
"
|
|
70
|
+
"identity",
|
|
69
71
|
"preference",
|
|
72
|
+
"project",
|
|
73
|
+
"decision",
|
|
70
74
|
"constraint",
|
|
71
75
|
]);
|
|
72
76
|
|
|
@@ -129,32 +133,39 @@ function hasSemanticDensity(text: string): boolean {
|
|
|
129
133
|
|
|
130
134
|
// ── LLM-powered extraction ────────────────────────────────────────────
|
|
131
135
|
|
|
132
|
-
|
|
136
|
+
function buildExtractionSystemPrompt(
|
|
137
|
+
existingItems: Array<{
|
|
138
|
+
id: string;
|
|
139
|
+
kind: string;
|
|
140
|
+
subject: string;
|
|
141
|
+
statement: string;
|
|
142
|
+
}>,
|
|
143
|
+
): string {
|
|
144
|
+
let prompt = `You are a memory extraction system. Given a message from a conversation, extract structured memory items that would be valuable to remember for future interactions.
|
|
133
145
|
|
|
134
146
|
Extract items in these categories:
|
|
135
|
-
-
|
|
136
|
-
-
|
|
137
|
-
- project: Project names, repos, tech stacks, architecture details
|
|
147
|
+
- identity: Personal info (name, role, location, timezone, background), notable facts, relationships between people/teams/systems
|
|
148
|
+
- preference: User likes, dislikes, preferred approaches/tools/styles, communication style patterns, opinions and evaluations
|
|
149
|
+
- project: Project names, repos, tech stacks, architecture details, action items, follow-ups, things to do later
|
|
138
150
|
- decision: Choices made, approaches selected, trade-offs resolved
|
|
139
|
-
-
|
|
140
|
-
- fact: Notable facts, definitions, technical details worth remembering
|
|
141
|
-
- constraint: Rules, requirements, things that must/must not be done
|
|
142
|
-
- relationship: Connections between people, teams, projects, systems
|
|
151
|
+
- constraint: Rules, requirements, things that must/must not be done, explicit directives on how the assistant should behave
|
|
143
152
|
- event: Deadlines, milestones, meetings, releases, dates
|
|
144
|
-
- opinion: Viewpoints, assessments, evaluations of tools/approaches
|
|
145
|
-
- instruction: Explicit directives on how the assistant should behave
|
|
146
|
-
- style: Communication style patterns — writing tone, formatting habits, vocabulary choices, greeting/sign-off conventions
|
|
147
153
|
|
|
148
154
|
For each item, provide:
|
|
149
155
|
- kind: One of the categories above
|
|
150
156
|
- subject: A short label (2-8 words) identifying what this is about
|
|
151
|
-
- statement:
|
|
157
|
+
- statement: A relationship-rich factual statement to remember (1-2 sentences). Include relational context — who recommended it, why it matters, how it connects to other facts. For example, write "Data processing library that Sarah from Marketing recommended for the Q4 pipeline rewrite" instead of just "Uses pandas".
|
|
152
158
|
- confidence: How confident you are this is accurate (0.0-1.0)
|
|
153
159
|
- importance: How valuable this is to remember (0.0-1.0)
|
|
154
160
|
- 1.0: Explicit user instructions about assistant behavior
|
|
155
161
|
- 0.8-0.9: Personal facts, strong preferences, key decisions
|
|
156
162
|
- 0.6-0.7: Project details, constraints, opinions
|
|
157
163
|
- 0.3-0.5: Contextual details, minor preferences
|
|
164
|
+
- supersedes: If this item replaces an existing memory item, set this to the ID of the item it replaces. Use null if it does not replace anything. Determine supersession by understanding the semantic meaning — do not rely on keyword matching.
|
|
165
|
+
- overrideConfidence: How confident you are that this overrides an existing item:
|
|
166
|
+
- "explicit": Clear override signal (e.g., "Actually I now prefer X", "I changed my mind about Y", "We switched from A to B")
|
|
167
|
+
- "tentative": Ambiguous — the new information might override the old, but it's not certain
|
|
168
|
+
- "inferred": Weak signal — possibly related to an existing item but no clear override intent
|
|
158
169
|
|
|
159
170
|
Rules:
|
|
160
171
|
- Only extract genuinely memorable information. Skip pleasantries, filler, and transient discussion.
|
|
@@ -163,12 +174,94 @@ Rules:
|
|
|
163
174
|
- Prefer fewer high-quality items over many low-quality ones.
|
|
164
175
|
- If the message contains no memorable information, return an empty array.`;
|
|
165
176
|
|
|
177
|
+
if (existingItems.length > 0) {
|
|
178
|
+
prompt += `\n\nExisting memory items (use these to identify supersession targets — set \`supersedes\` to the item ID if the new information replaces one of these):\n`;
|
|
179
|
+
for (const item of existingItems) {
|
|
180
|
+
prompt += `- [${item.id}] (${item.kind}) ${item.subject}: ${item.statement}\n`;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return prompt;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
const VALID_OVERRIDE_CONFIDENCES = new Set<string>([
|
|
188
|
+
"explicit",
|
|
189
|
+
"tentative",
|
|
190
|
+
"inferred",
|
|
191
|
+
]);
|
|
192
|
+
|
|
166
193
|
interface LLMExtractedItem {
|
|
167
194
|
kind: string;
|
|
168
195
|
subject: string;
|
|
169
196
|
statement: string;
|
|
170
197
|
confidence: number;
|
|
171
198
|
importance: number;
|
|
199
|
+
supersedes: string | null;
|
|
200
|
+
overrideConfidence: string;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Query top-10 active items by kind + subject similarity to give the
|
|
205
|
+
* extraction LLM awareness of existing items it might supersede.
|
|
206
|
+
* This is a write-path-only heuristic — not used at read time.
|
|
207
|
+
*/
|
|
208
|
+
function queryExistingItemsForContext(
|
|
209
|
+
scopeId: string,
|
|
210
|
+
text: string,
|
|
211
|
+
): Array<{ id: string; kind: string; subject: string; statement: string }> {
|
|
212
|
+
const db = getDb();
|
|
213
|
+
|
|
214
|
+
// Extract a rough subject prefix from the first few words of the text
|
|
215
|
+
const words = text.trim().split(/\s+/).slice(0, 3).join(" ");
|
|
216
|
+
// Escape LIKE wildcards so user text with % or _ doesn't alter query semantics
|
|
217
|
+
const escaped = words.replace(/%/g, "").replace(/_/g, "");
|
|
218
|
+
const subjectPrefix = escaped.length > 0 ? `${escaped}%` : "%";
|
|
219
|
+
|
|
220
|
+
// Query active items matching subject prefix, limited to 10
|
|
221
|
+
const rows = db
|
|
222
|
+
.select({
|
|
223
|
+
id: memoryItems.id,
|
|
224
|
+
kind: memoryItems.kind,
|
|
225
|
+
subject: memoryItems.subject,
|
|
226
|
+
statement: memoryItems.statement,
|
|
227
|
+
})
|
|
228
|
+
.from(memoryItems)
|
|
229
|
+
.where(
|
|
230
|
+
and(
|
|
231
|
+
eq(memoryItems.scopeId, scopeId),
|
|
232
|
+
eq(memoryItems.status, "active"),
|
|
233
|
+
like(memoryItems.subject, subjectPrefix),
|
|
234
|
+
),
|
|
235
|
+
)
|
|
236
|
+
.limit(10)
|
|
237
|
+
.all();
|
|
238
|
+
|
|
239
|
+
// If prefix match yielded few results, backfill with recent active items
|
|
240
|
+
if (rows.length < 10) {
|
|
241
|
+
const existingIds = new Set(rows.map((r) => r.id));
|
|
242
|
+
const backfill = db
|
|
243
|
+
.select({
|
|
244
|
+
id: memoryItems.id,
|
|
245
|
+
kind: memoryItems.kind,
|
|
246
|
+
subject: memoryItems.subject,
|
|
247
|
+
statement: memoryItems.statement,
|
|
248
|
+
})
|
|
249
|
+
.from(memoryItems)
|
|
250
|
+
.where(
|
|
251
|
+
and(eq(memoryItems.scopeId, scopeId), eq(memoryItems.status, "active")),
|
|
252
|
+
)
|
|
253
|
+
.limit(10 - rows.length)
|
|
254
|
+
.all();
|
|
255
|
+
|
|
256
|
+
for (const row of backfill) {
|
|
257
|
+
if (!existingIds.has(row.id)) {
|
|
258
|
+
rows.push(row);
|
|
259
|
+
existingIds.add(row.id);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
return rows;
|
|
172
265
|
}
|
|
173
266
|
|
|
174
267
|
async function extractItemsWithLLM(
|
|
@@ -188,6 +281,10 @@ async function extractItemsWithLLM(
|
|
|
188
281
|
const { signal, cleanup } = createTimeout(15000);
|
|
189
282
|
|
|
190
283
|
try {
|
|
284
|
+
// Query existing items to give the LLM supersession context
|
|
285
|
+
const existingItems = queryExistingItemsForContext(scopeId, text);
|
|
286
|
+
const systemPrompt = buildExtractionSystemPrompt(existingItems);
|
|
287
|
+
|
|
191
288
|
const response = await provider.sendMessage(
|
|
192
289
|
[userMessage(text)],
|
|
193
290
|
[
|
|
@@ -215,7 +312,7 @@ async function extractItemsWithLLM(
|
|
|
215
312
|
statement: {
|
|
216
313
|
type: "string",
|
|
217
314
|
description:
|
|
218
|
-
"
|
|
315
|
+
"Relationship-rich factual statement to remember (1-2 sentences). Include relational context.",
|
|
219
316
|
},
|
|
220
317
|
confidence: {
|
|
221
318
|
type: "number",
|
|
@@ -227,6 +324,17 @@ async function extractItemsWithLLM(
|
|
|
227
324
|
description:
|
|
228
325
|
"How valuable this is to remember (0.0-1.0)",
|
|
229
326
|
},
|
|
327
|
+
supersedes: {
|
|
328
|
+
type: ["string", "null"],
|
|
329
|
+
description:
|
|
330
|
+
"ID of the existing memory item this replaces, or null if not replacing anything",
|
|
331
|
+
},
|
|
332
|
+
overrideConfidence: {
|
|
333
|
+
type: "string",
|
|
334
|
+
enum: ["explicit", "tentative", "inferred"],
|
|
335
|
+
description:
|
|
336
|
+
"How confident you are that this overrides an existing item: explicit (clear override), tentative (ambiguous), inferred (weak signal)",
|
|
337
|
+
},
|
|
230
338
|
},
|
|
231
339
|
required: [
|
|
232
340
|
"kind",
|
|
@@ -234,6 +342,8 @@ async function extractItemsWithLLM(
|
|
|
234
342
|
"statement",
|
|
235
343
|
"confidence",
|
|
236
344
|
"importance",
|
|
345
|
+
"supersedes",
|
|
346
|
+
"overrideConfidence",
|
|
237
347
|
],
|
|
238
348
|
},
|
|
239
349
|
},
|
|
@@ -242,7 +352,7 @@ async function extractItemsWithLLM(
|
|
|
242
352
|
},
|
|
243
353
|
},
|
|
244
354
|
],
|
|
245
|
-
|
|
355
|
+
systemPrompt,
|
|
246
356
|
{
|
|
247
357
|
config: {
|
|
248
358
|
modelIntent: extractionConfig.modelIntent,
|
|
@@ -270,9 +380,14 @@ async function extractItemsWithLLM(
|
|
|
270
380
|
return extractItemsPatternBased(text, scopeId);
|
|
271
381
|
}
|
|
272
382
|
|
|
383
|
+
// Build set of known existing item IDs for supersession validation
|
|
384
|
+
const existingItemIds = new Set(existingItems.map((e) => e.id));
|
|
385
|
+
|
|
273
386
|
const items: ExtractedItem[] = [];
|
|
274
387
|
for (const raw of input.items) {
|
|
275
|
-
|
|
388
|
+
// Apply kind migration map for old kind names, then validate
|
|
389
|
+
const resolvedKind = KIND_MIGRATION_MAP[raw.kind] ?? raw.kind;
|
|
390
|
+
if (!VALID_KINDS.has(resolvedKind)) continue;
|
|
276
391
|
if (!raw.subject || !raw.statement) continue;
|
|
277
392
|
const subject = truncate(String(raw.subject), 80, "");
|
|
278
393
|
const statement = truncate(String(raw.statement), 500, "");
|
|
@@ -280,17 +395,38 @@ async function extractItemsWithLLM(
|
|
|
280
395
|
const importance = clampUnitInterval(parseScore(raw.importance, 0.5));
|
|
281
396
|
const fingerprint = computeMemoryFingerprint(
|
|
282
397
|
scopeId,
|
|
283
|
-
|
|
398
|
+
resolvedKind,
|
|
284
399
|
subject,
|
|
285
400
|
statement,
|
|
286
401
|
);
|
|
402
|
+
|
|
403
|
+
// Validate supersedes: must reference a known existing item ID.
|
|
404
|
+
// Reject hallucinated IDs that don't match any item we showed the LLM.
|
|
405
|
+
const rawSupersedes =
|
|
406
|
+
typeof raw.supersedes === "string" && raw.supersedes.length > 0
|
|
407
|
+
? raw.supersedes
|
|
408
|
+
: null;
|
|
409
|
+
const supersedes =
|
|
410
|
+
rawSupersedes && existingItemIds.has(rawSupersedes)
|
|
411
|
+
? rawSupersedes
|
|
412
|
+
: null;
|
|
413
|
+
const supersedesRejected = !!rawSupersedes && !supersedes;
|
|
414
|
+
const overrideConfidence = VALID_OVERRIDE_CONFIDENCES.has(
|
|
415
|
+
raw.overrideConfidence,
|
|
416
|
+
)
|
|
417
|
+
? (raw.overrideConfidence as OverrideConfidence)
|
|
418
|
+
: "inferred";
|
|
419
|
+
|
|
287
420
|
items.push({
|
|
288
|
-
kind:
|
|
421
|
+
kind: resolvedKind as MemoryItemKind,
|
|
289
422
|
subject,
|
|
290
423
|
statement,
|
|
291
424
|
confidence,
|
|
292
425
|
importance,
|
|
293
426
|
fingerprint,
|
|
427
|
+
supersedes,
|
|
428
|
+
overrideConfidence,
|
|
429
|
+
supersedesRejected,
|
|
294
430
|
});
|
|
295
431
|
}
|
|
296
432
|
|
|
@@ -387,12 +523,7 @@ export async function extractAndUpsertMemoryItemsForMessage(
|
|
|
387
523
|
verificationState === "user_reported"
|
|
388
524
|
? "user_reported"
|
|
389
525
|
: existing.verificationState;
|
|
390
|
-
|
|
391
|
-
effectiveStatus =
|
|
392
|
-
existing.status === "pending_clarification" &&
|
|
393
|
-
hasPendingConflict(existing.id)
|
|
394
|
-
? "pending_clarification"
|
|
395
|
-
: "active";
|
|
526
|
+
effectiveStatus = "active";
|
|
396
527
|
db.update(memoryItems)
|
|
397
528
|
.set({
|
|
398
529
|
status: effectiveStatus,
|
|
@@ -424,16 +555,103 @@ export async function extractAndUpsertMemoryItemsForMessage(
|
|
|
424
555
|
firstSeenAt: message.createdAt,
|
|
425
556
|
lastSeenAt: seenAt,
|
|
426
557
|
lastUsedAt: null,
|
|
558
|
+
supersedes: item.supersedes,
|
|
559
|
+
overrideConfidence: item.overrideConfidence,
|
|
427
560
|
})
|
|
428
561
|
.run();
|
|
429
562
|
upserted += 1;
|
|
430
563
|
}
|
|
431
564
|
|
|
432
|
-
//
|
|
433
|
-
//
|
|
434
|
-
//
|
|
435
|
-
|
|
436
|
-
|
|
565
|
+
// Handle LLM-directed supersession based on overrideConfidence.
|
|
566
|
+
// Guard: skip if supersedes targets the current item (self-supersession on
|
|
567
|
+
// fingerprint re-hit would incorrectly remove an active memory).
|
|
568
|
+
if (
|
|
569
|
+
item.supersedes &&
|
|
570
|
+
item.supersedes !== memoryItemId &&
|
|
571
|
+
item.overrideConfidence === "explicit" &&
|
|
572
|
+
effectiveStatus === "active"
|
|
573
|
+
) {
|
|
574
|
+
// Explicit supersession: mark old item as superseded and link both items
|
|
575
|
+
const oldItem = db
|
|
576
|
+
.select({ id: memoryItems.id })
|
|
577
|
+
.from(memoryItems)
|
|
578
|
+
.where(
|
|
579
|
+
and(
|
|
580
|
+
eq(memoryItems.id, item.supersedes),
|
|
581
|
+
eq(memoryItems.scopeId, effectiveScopeId),
|
|
582
|
+
eq(memoryItems.status, "active"),
|
|
583
|
+
),
|
|
584
|
+
)
|
|
585
|
+
.get();
|
|
586
|
+
|
|
587
|
+
if (oldItem) {
|
|
588
|
+
db.update(memoryItems)
|
|
589
|
+
.set({
|
|
590
|
+
status: "superseded",
|
|
591
|
+
supersededBy: memoryItemId,
|
|
592
|
+
})
|
|
593
|
+
.where(eq(memoryItems.id, oldItem.id))
|
|
594
|
+
.run();
|
|
595
|
+
|
|
596
|
+
// Update new item's supersedes link
|
|
597
|
+
db.update(memoryItems)
|
|
598
|
+
.set({ supersedes: oldItem.id })
|
|
599
|
+
.where(eq(memoryItems.id, memoryItemId))
|
|
600
|
+
.run();
|
|
601
|
+
|
|
602
|
+
// Remove superseded item from Qdrant vector index
|
|
603
|
+
try {
|
|
604
|
+
const qdrant = getQdrantClient();
|
|
605
|
+
await withQdrantBreaker(() =>
|
|
606
|
+
qdrant.deleteByTarget("item", oldItem.id),
|
|
607
|
+
);
|
|
608
|
+
} catch (err) {
|
|
609
|
+
const errMsg = err instanceof Error ? err.message : String(err);
|
|
610
|
+
log.warn(
|
|
611
|
+
{ err: errMsg, oldItemId: oldItem.id },
|
|
612
|
+
"Failed to remove superseded item from Qdrant — will be cleaned up by index maintenance",
|
|
613
|
+
);
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
log.debug(
|
|
617
|
+
{ newItemId: memoryItemId, oldItemId: oldItem.id },
|
|
618
|
+
"Explicitly superseded memory item",
|
|
619
|
+
);
|
|
620
|
+
}
|
|
621
|
+
} else if (item.supersedes && item.overrideConfidence === "tentative") {
|
|
622
|
+
// Tentative: insert as active but don't supersede — both coexist
|
|
623
|
+
log.debug(
|
|
624
|
+
{
|
|
625
|
+
newItemId: memoryItemId,
|
|
626
|
+
supersedes: item.supersedes,
|
|
627
|
+
overrideConfidence: "tentative",
|
|
628
|
+
},
|
|
629
|
+
"Tentative override — both items coexist",
|
|
630
|
+
);
|
|
631
|
+
} else if (item.supersedes && item.overrideConfidence === "inferred") {
|
|
632
|
+
// Inferred: insert as active, don't supersede, log for observability
|
|
633
|
+
log.debug(
|
|
634
|
+
{
|
|
635
|
+
newItemId: memoryItemId,
|
|
636
|
+
supersedes: item.supersedes,
|
|
637
|
+
overrideConfidence: "inferred",
|
|
638
|
+
},
|
|
639
|
+
"Inferred override — both items coexist (weak signal)",
|
|
640
|
+
);
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
// Fallback subject-match supersession: only when the LLM did not
|
|
644
|
+
// explicitly handle supersession for this item. This preserves the
|
|
645
|
+
// original behavior for pattern-based extraction and items without
|
|
646
|
+
// LLM-directed supersession. Skip items whose supersedes ID was
|
|
647
|
+
// rejected (hallucinated) — they should coexist, not trigger
|
|
648
|
+
// subject-based replacement.
|
|
649
|
+
if (
|
|
650
|
+
!item.supersedes &&
|
|
651
|
+
!item.supersedesRejected &&
|
|
652
|
+
SUPERSEDE_KINDS.has(item.kind) &&
|
|
653
|
+
effectiveStatus === "active"
|
|
654
|
+
) {
|
|
437
655
|
db.update(memoryItems)
|
|
438
656
|
.set({ status: "superseded" })
|
|
439
657
|
.where(
|
|
@@ -459,11 +677,6 @@ export async function extractAndUpsertMemoryItemsForMessage(
|
|
|
459
677
|
.run();
|
|
460
678
|
|
|
461
679
|
enqueueMemoryJob("embed_item", { itemId: memoryItemId });
|
|
462
|
-
|
|
463
|
-
// Queue contradiction check for newly inserted items
|
|
464
|
-
if (!existing) {
|
|
465
|
-
enqueueMemoryJob("check_contradictions", { itemId: memoryItemId });
|
|
466
|
-
}
|
|
467
680
|
}
|
|
468
681
|
|
|
469
682
|
log.debug(
|
|
@@ -504,6 +717,8 @@ function extractItemsPatternBased(
|
|
|
504
717
|
confidence: classification.confidence,
|
|
505
718
|
importance: classification.importance,
|
|
506
719
|
fingerprint,
|
|
720
|
+
supersedes: null,
|
|
721
|
+
overrideConfidence: "inferred" as OverrideConfidence,
|
|
507
722
|
});
|
|
508
723
|
}
|
|
509
724
|
|
|
@@ -533,7 +748,7 @@ function classifySentence(
|
|
|
533
748
|
"timezone",
|
|
534
749
|
])
|
|
535
750
|
) {
|
|
536
|
-
return { kind: "
|
|
751
|
+
return { kind: "identity", confidence: 0.72, importance: 0.8 };
|
|
537
752
|
}
|
|
538
753
|
if (includesAny(lower, ["project", "repository", "repo", "codebase"])) {
|
|
539
754
|
return { kind: "project", confidence: 0.68, importance: 0.6 };
|
|
@@ -546,7 +761,7 @@ function classifySentence(
|
|
|
546
761
|
if (
|
|
547
762
|
includesAny(lower, ["todo", "to do", "next step", "follow up", "need to"])
|
|
548
763
|
) {
|
|
549
|
-
return { kind: "
|
|
764
|
+
return { kind: "project", confidence: 0.74, importance: 0.6 };
|
|
550
765
|
}
|
|
551
766
|
if (
|
|
552
767
|
includesAny(lower, [
|
|
@@ -560,7 +775,7 @@ function classifySentence(
|
|
|
560
775
|
return { kind: "constraint", confidence: 0.7, importance: 0.7 };
|
|
561
776
|
}
|
|
562
777
|
if (includesAny(lower, ["remember", "important", "fact", "noted"])) {
|
|
563
|
-
return { kind: "
|
|
778
|
+
return { kind: "identity", confidence: 0.62, importance: 0.5 };
|
|
564
779
|
}
|
|
565
780
|
return null;
|
|
566
781
|
}
|
|
@@ -603,20 +818,3 @@ function parseScore(value: unknown, fallback: number): number {
|
|
|
603
818
|
const n = Number(value);
|
|
604
819
|
return Number.isFinite(n) ? n : fallback;
|
|
605
820
|
}
|
|
606
|
-
|
|
607
|
-
/** Returns true if the given memory item is the candidate in an unresolved conflict. */
|
|
608
|
-
function hasPendingConflict(itemId: string): boolean {
|
|
609
|
-
const db = getDb();
|
|
610
|
-
const row = db
|
|
611
|
-
.select({ id: memoryItemConflicts.id })
|
|
612
|
-
.from(memoryItemConflicts)
|
|
613
|
-
.where(
|
|
614
|
-
and(
|
|
615
|
-
eq(memoryItemConflicts.candidateItemId, itemId),
|
|
616
|
-
eq(memoryItemConflicts.status, "pending_clarification"),
|
|
617
|
-
),
|
|
618
|
-
)
|
|
619
|
-
.limit(1)
|
|
620
|
-
.get();
|
|
621
|
-
return row != null;
|
|
622
|
-
}
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
import { and, asc, eq, gt,
|
|
1
|
+
import { and, asc, eq, gt, or } from "drizzle-orm";
|
|
2
2
|
|
|
3
3
|
import type { AssistantConfig } from "../../config/types.js";
|
|
4
4
|
import type { TrustClass } from "../../runtime/actor-trust-resolver.js";
|
|
5
|
-
import { getLogger } from "../../util/logger.js";
|
|
6
5
|
import {
|
|
7
6
|
readMessageCursorCheckpoint,
|
|
8
7
|
resetMessageCursorCheckpoint,
|
|
@@ -14,21 +13,11 @@ import {
|
|
|
14
13
|
} from "../conversation-crud.js";
|
|
15
14
|
import { getDb } from "../db.js";
|
|
16
15
|
import { indexMessageNow } from "../indexer.js";
|
|
17
|
-
import {
|
|
18
|
-
enqueueBackfillEntityRelationsJob,
|
|
19
|
-
enqueueMemoryJob,
|
|
20
|
-
type MemoryJob,
|
|
21
|
-
} from "../jobs-store.js";
|
|
16
|
+
import { enqueueMemoryJob, type MemoryJob } from "../jobs-store.js";
|
|
22
17
|
import { messages } from "../schema.js";
|
|
23
18
|
|
|
24
|
-
const log = getLogger("memory-jobs-worker");
|
|
25
|
-
|
|
26
19
|
const BACKFILL_CHECKPOINT_KEY = "memory:backfill:last_created_at";
|
|
27
20
|
const BACKFILL_CHECKPOINT_ID_KEY = "memory:backfill:last_message_id";
|
|
28
|
-
const RELATION_BACKFILL_CHECKPOINT_KEY =
|
|
29
|
-
"memory:relation_backfill:last_created_at";
|
|
30
|
-
const RELATION_BACKFILL_CHECKPOINT_ID_KEY =
|
|
31
|
-
"memory:relation_backfill:last_message_id";
|
|
32
21
|
|
|
33
22
|
function parseProvenanceTrustClass(
|
|
34
23
|
rawMetadata: string | null,
|
|
@@ -43,10 +32,6 @@ function parseProvenanceTrustClass(
|
|
|
43
32
|
}
|
|
44
33
|
}
|
|
45
34
|
|
|
46
|
-
function isTrustedTrustClass(trustClass: TrustClass | undefined): boolean {
|
|
47
|
-
return trustClass === "guardian" || trustClass === undefined;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
35
|
export function backfillJob(job: MemoryJob, config: AssistantConfig): void {
|
|
51
36
|
const db = getDb();
|
|
52
37
|
const force = job.payload.force === true;
|
|
@@ -114,115 +99,5 @@ export function backfillJob(job: MemoryJob, config: AssistantConfig): void {
|
|
|
114
99
|
|
|
115
100
|
if (batch.length === 200) {
|
|
116
101
|
enqueueMemoryJob("backfill", {});
|
|
117
|
-
} else if (
|
|
118
|
-
config.memory.entity.enabled &&
|
|
119
|
-
config.memory.entity.extractRelations.enabled
|
|
120
|
-
) {
|
|
121
|
-
// Enqueue after the terminal batch (including an empty batch when total
|
|
122
|
-
// messages are an exact multiple of 200) so the relation backfill does not
|
|
123
|
-
// overlap with messages the normal backfill already covered via
|
|
124
|
-
// indexMessageNow → extract_items → extract_entities.
|
|
125
|
-
enqueueBackfillEntityRelationsJob();
|
|
126
102
|
}
|
|
127
103
|
}
|
|
128
|
-
|
|
129
|
-
export function backfillEntityRelationsJob(
|
|
130
|
-
job: MemoryJob,
|
|
131
|
-
config: AssistantConfig,
|
|
132
|
-
): void {
|
|
133
|
-
if (!config.memory.entity.enabled) return;
|
|
134
|
-
if (!config.memory.entity.extractRelations.enabled) return;
|
|
135
|
-
|
|
136
|
-
const force = job.payload.force === true;
|
|
137
|
-
if (force) {
|
|
138
|
-
resetMessageCursorCheckpoint(
|
|
139
|
-
RELATION_BACKFILL_CHECKPOINT_KEY,
|
|
140
|
-
RELATION_BACKFILL_CHECKPOINT_ID_KEY,
|
|
141
|
-
);
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
const db = getDb();
|
|
145
|
-
const cursor = readMessageCursorCheckpoint(
|
|
146
|
-
RELATION_BACKFILL_CHECKPOINT_KEY,
|
|
147
|
-
RELATION_BACKFILL_CHECKPOINT_ID_KEY,
|
|
148
|
-
);
|
|
149
|
-
const batchSize = Math.max(
|
|
150
|
-
1,
|
|
151
|
-
config.memory.entity.extractRelations.backfillBatchSize,
|
|
152
|
-
);
|
|
153
|
-
|
|
154
|
-
const afterCursor = or(
|
|
155
|
-
gt(messages.createdAt, cursor.createdAt),
|
|
156
|
-
and(
|
|
157
|
-
eq(messages.createdAt, cursor.createdAt),
|
|
158
|
-
gt(messages.id, cursor.messageId),
|
|
159
|
-
),
|
|
160
|
-
);
|
|
161
|
-
|
|
162
|
-
// Honor extractFromAssistant config — same role filter as indexMessageNow
|
|
163
|
-
const roleFilter = config.memory.extraction.extractFromAssistant
|
|
164
|
-
? undefined
|
|
165
|
-
: ne(messages.role, "assistant");
|
|
166
|
-
|
|
167
|
-
const conditions = roleFilter ? and(afterCursor, roleFilter) : afterCursor;
|
|
168
|
-
|
|
169
|
-
const batch = db
|
|
170
|
-
.select({
|
|
171
|
-
id: messages.id,
|
|
172
|
-
conversationId: messages.conversationId,
|
|
173
|
-
role: messages.role,
|
|
174
|
-
createdAt: messages.createdAt,
|
|
175
|
-
metadata: messages.metadata,
|
|
176
|
-
})
|
|
177
|
-
.from(messages)
|
|
178
|
-
.where(conditions)
|
|
179
|
-
.orderBy(asc(messages.createdAt), asc(messages.id))
|
|
180
|
-
.limit(batchSize)
|
|
181
|
-
.all();
|
|
182
|
-
if (batch.length === 0) return;
|
|
183
|
-
|
|
184
|
-
const scopeCache = new Map<string, string>();
|
|
185
|
-
let queuedExtractEntityJobs = 0;
|
|
186
|
-
let skippedUntrusted = 0;
|
|
187
|
-
for (const message of batch) {
|
|
188
|
-
const provenanceTrustClass = parseProvenanceTrustClass(
|
|
189
|
-
message.metadata ?? null,
|
|
190
|
-
);
|
|
191
|
-
if (!isTrustedTrustClass(provenanceTrustClass)) {
|
|
192
|
-
skippedUntrusted += 1;
|
|
193
|
-
continue;
|
|
194
|
-
}
|
|
195
|
-
let scopeId = scopeCache.get(message.conversationId);
|
|
196
|
-
if (scopeId === undefined) {
|
|
197
|
-
scopeId = getConversationMemoryScopeId(message.conversationId);
|
|
198
|
-
scopeCache.set(message.conversationId, scopeId);
|
|
199
|
-
}
|
|
200
|
-
enqueueMemoryJob("extract_entities", { messageId: message.id, scopeId });
|
|
201
|
-
queuedExtractEntityJobs += 1;
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
const lastMessage = batch[batch.length - 1];
|
|
205
|
-
writeMessageCursorCheckpoint(
|
|
206
|
-
RELATION_BACKFILL_CHECKPOINT_KEY,
|
|
207
|
-
RELATION_BACKFILL_CHECKPOINT_ID_KEY,
|
|
208
|
-
{
|
|
209
|
-
createdAt: lastMessage.createdAt,
|
|
210
|
-
messageId: lastMessage.id,
|
|
211
|
-
},
|
|
212
|
-
);
|
|
213
|
-
|
|
214
|
-
if (batch.length === batchSize) {
|
|
215
|
-
enqueueBackfillEntityRelationsJob();
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
log.debug(
|
|
219
|
-
{
|
|
220
|
-
queuedExtractEntityJobs,
|
|
221
|
-
skippedUntrusted,
|
|
222
|
-
batchSize,
|
|
223
|
-
lastCreatedAt: lastMessage.createdAt,
|
|
224
|
-
lastMessageId: lastMessage.id,
|
|
225
|
-
},
|
|
226
|
-
"Queued relation backfill batch",
|
|
227
|
-
);
|
|
228
|
-
}
|