@vellumai/assistant 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +4 -0
- package/ARCHITECTURE.md +68 -15
- package/Dockerfile +2 -2
- package/bun.lock +6 -2
- package/docker-entrypoint.sh +32 -1
- package/docs/architecture/integrations.md +1 -1
- package/docs/architecture/memory.md +21 -24
- package/openapi.yaml +538 -3
- package/package.json +5 -1
- package/src/__tests__/anthropic-provider.test.ts +160 -95
- package/src/__tests__/app-dir-path-guard.test.ts +1 -0
- package/src/__tests__/app-executors.test.ts +47 -1
- package/src/__tests__/app-source-watcher.test.ts +159 -0
- package/src/__tests__/checker.test.ts +38 -6
- package/src/__tests__/config-schema.test.ts +5 -0
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +4 -6
- package/src/__tests__/conversation-agent-loop.test.ts +4 -51
- package/src/__tests__/conversation-history-web-search.test.ts +1 -1
- package/src/__tests__/conversation-runtime-assembly.test.ts +653 -832
- package/src/__tests__/conversation-runtime-workspace.test.ts +1 -93
- package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +17 -4
- package/src/__tests__/conversation-wipe.test.ts +2 -6
- package/src/__tests__/conversation-workspace-cache-state.test.ts +6 -12
- package/src/__tests__/conversation-workspace-injection.test.ts +25 -26
- package/src/__tests__/conversation-workspace-tool-tracking.test.ts +1 -1
- package/src/__tests__/copy-composer-tc-templates.test.ts +335 -0
- package/src/__tests__/date-context.test.ts +76 -210
- package/src/__tests__/db-schedule-syntax-migration.test.ts +16 -1
- package/src/__tests__/file-list-tool.test.ts +219 -0
- package/src/__tests__/first-greeting.test.ts +1 -1
- package/src/__tests__/heartbeat-service.test.ts +180 -3
- package/src/__tests__/identity-routes.test.ts +328 -0
- package/src/__tests__/injection-block.test.ts +24 -0
- package/src/__tests__/install-skill-routing.test.ts +7 -6
- package/src/__tests__/jobs-store-qdrant-breaker.test.ts +15 -14
- package/src/__tests__/list-messages-tool-merge.test.ts +300 -0
- package/src/__tests__/llm-context-normalization.test.ts +18 -18
- package/src/__tests__/llm-context-route-provider.test.ts +101 -0
- package/src/__tests__/llm-request-log-turn-query.test.ts +162 -0
- package/src/__tests__/log-export-workspace.test.ts +72 -105
- package/src/__tests__/mcp-abort-signal.test.ts +5 -0
- package/src/__tests__/mcp-client-auth.test.ts +5 -0
- package/src/__tests__/memory-recall-log-store.test.ts +132 -0
- package/src/__tests__/migration-export-streaming.test.ts +304 -0
- package/src/__tests__/migration-import-commit-http.test.ts +11 -10
- package/src/__tests__/mock-fetch.ts +87 -0
- package/src/__tests__/notification-decision-recipient-context.test.ts +282 -0
- package/src/__tests__/onboarding-template-contract.test.ts +62 -14
- package/src/__tests__/parser.test.ts +32 -0
- package/src/__tests__/permission-checker-host-gate.test.ts +452 -0
- package/src/__tests__/permission-controls-v2-flag.test.ts +55 -0
- package/src/__tests__/permission-mode-sse.test.ts +418 -0
- package/src/__tests__/permission-mode-store.test.ts +277 -0
- package/src/__tests__/permission-mode.test.ts +101 -0
- package/src/__tests__/platform-bash-auto-approve.test.ts +359 -0
- package/src/__tests__/profiler-routes.test.ts +502 -0
- package/src/__tests__/profiler-run-store.test.ts +441 -0
- package/src/__tests__/proxy-approval-callback.test.ts +4 -75
- package/src/__tests__/registry.test.ts +1 -1
- package/src/__tests__/sandbox-host-parity.test.ts +5 -4
- package/src/__tests__/scheduler-reuse-conversation.test.ts +368 -0
- package/src/__tests__/scrub-corrupted-image-attachments.test.ts +278 -0
- package/src/__tests__/search-skills-unified.test.ts +4 -3
- package/src/__tests__/send-endpoint-busy.test.ts +42 -3
- package/src/__tests__/set-permission-mode.test.ts +274 -0
- package/src/__tests__/skill-load-feature-flag.test.ts +12 -0
- package/src/__tests__/skill-memory.test.ts +2 -783
- package/src/__tests__/strip-memory-injections.test.ts +187 -0
- package/src/__tests__/subagent-detail.test.ts +84 -0
- package/src/__tests__/subagent-disposal.test.ts +308 -0
- package/src/__tests__/subagent-manager-notify.test.ts +19 -10
- package/src/__tests__/subagent-notify-parent.test.ts +390 -0
- package/src/__tests__/subagent-role-registry.test.ts +108 -0
- package/src/__tests__/subagent-tool-filtering.test.ts +71 -0
- package/src/__tests__/subagent-tools.test.ts +464 -4
- package/src/__tests__/system-prompt-ask-mode.test.ts +139 -0
- package/src/__tests__/task-memory-cleanup.test.ts +12 -12
- package/src/__tests__/terminal-tools.test.ts +17 -27
- package/src/__tests__/test-preload.ts +4 -0
- package/src/__tests__/tool-executor.test.ts +4 -26
- package/src/__tests__/tool-side-effects-slack-dm.test.ts +1 -0
- package/src/__tests__/top-level-renderer.test.ts +10 -13
- package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +116 -2
- package/src/__tests__/workspace-migration-028-recover-conversations-from-disk-view.test.ts +387 -0
- package/src/agent/loop.ts +6 -0
- package/src/approvals/guardian-request-resolvers.ts +24 -0
- package/src/avatar/traits-png-sync.ts +3 -3
- package/src/cli/__tests__/run-assistant-command.ts +29 -0
- package/src/cli/commands/__tests__/email-download.test.ts +245 -0
- package/src/cli/commands/__tests__/email-list.test.ts +192 -0
- package/src/cli/commands/__tests__/email-register.test.ts +186 -0
- package/src/cli/commands/__tests__/email-send.test.ts +291 -0
- package/src/cli/commands/__tests__/email-status.test.ts +181 -0
- package/src/cli/commands/__tests__/email-unregister.test.ts +139 -0
- package/src/cli/commands/__tests__/routes.test.ts +562 -0
- package/src/cli/commands/conversations.ts +1 -8
- package/src/cli/commands/email.ts +584 -835
- package/src/cli/commands/memory.ts +1 -34
- package/src/cli/commands/notifications.ts +7 -2
- package/src/cli/commands/oauth/connect.ts +14 -5
- package/src/cli/commands/routes.ts +396 -0
- package/src/cli/commands/skills.ts +130 -20
- package/src/cli/program.ts +2 -0
- package/src/cli.ts +1 -120
- package/src/config/bundled-skills/app-builder/SKILL.md +4 -1
- package/src/config/bundled-skills/gmail/SKILL.md +2 -2
- package/src/config/bundled-skills/messaging/SKILL.md +7 -0
- package/src/config/bundled-skills/schedule/SKILL.md +22 -2
- package/src/config/bundled-skills/schedule/TOOLS.json +8 -0
- package/src/config/bundled-skills/settings/tools/avatar-get.ts +3 -13
- package/src/config/bundled-skills/settings/tools/avatar-remove.ts +2 -4
- package/src/config/bundled-skills/settings/tools/avatar-update.ts +5 -2
- package/src/config/bundled-skills/slack/SKILL.md +2 -0
- package/src/config/bundled-skills/subagent/SKILL.md +43 -3
- package/src/config/bundled-skills/subagent/TOOLS.json +29 -4
- package/src/config/env-registry.ts +63 -0
- package/src/config/feature-flag-registry.json +17 -1
- package/src/config/schema.ts +8 -0
- package/src/config/schemas/filing.ts +51 -0
- package/src/config/schemas/heartbeat.ts +15 -12
- package/src/config/schemas/memory-lifecycle.ts +12 -0
- package/src/config/schemas/security.ts +14 -0
- package/src/daemon/app-source-watcher.ts +93 -0
- package/src/daemon/config-watcher.ts +79 -1
- package/src/daemon/conversation-agent-loop-handlers.ts +20 -0
- package/src/daemon/conversation-agent-loop.ts +158 -65
- package/src/daemon/conversation-history.ts +4 -19
- package/src/daemon/conversation-lifecycle.ts +8 -14
- package/src/daemon/conversation-process.ts +13 -7
- package/src/daemon/conversation-runtime-assembly.ts +300 -306
- package/src/daemon/conversation-tool-setup.ts +44 -14
- package/src/daemon/conversation-workspace.ts +1 -2
- package/src/daemon/conversation.ts +18 -0
- package/src/daemon/date-context.ts +26 -53
- package/src/daemon/first-greeting.ts +1 -1
- package/src/daemon/handlers/conversations.ts +4 -7
- package/src/daemon/handlers/shared.test.ts +143 -0
- package/src/daemon/handlers/shared.ts +63 -5
- package/src/daemon/handlers/skills.ts +11 -18
- package/src/daemon/lifecycle.ts +199 -157
- package/src/daemon/message-types/conversations.ts +25 -6
- package/src/daemon/message-types/messages.ts +9 -1
- package/src/daemon/message-types/schedules.ts +1 -0
- package/src/daemon/message-types/settings.ts +6 -0
- package/src/daemon/profiler-run-store.ts +557 -0
- package/src/daemon/server.ts +89 -9
- package/src/daemon/shutdown-handlers.ts +5 -0
- package/src/daemon/tool-side-effects.ts +23 -3
- package/src/export/transcript-formatter.ts +148 -0
- package/src/filing/filing-service.ts +228 -0
- package/src/heartbeat/heartbeat-service.ts +96 -7
- package/src/mcp/client.ts +6 -0
- package/src/mcp/mcp-oauth-provider.ts +149 -27
- package/src/memory/admin.ts +33 -32
- package/src/memory/app-store.ts +69 -0
- package/src/memory/conversation-bootstrap.ts +1 -1
- package/src/memory/conversation-crud.ts +136 -107
- package/src/memory/conversation-group-migration.ts +1 -1
- package/src/memory/conversation-queries.ts +58 -12
- package/src/memory/conversation-title-service.ts +1 -0
- package/src/memory/db-init.ts +182 -376
- package/src/memory/graph/bootstrap.ts +75 -66
- package/src/memory/graph/capability-seed.ts +167 -15
- package/src/memory/graph/consolidation.ts +38 -4
- package/src/memory/graph/conversation-graph-memory.ts +133 -104
- package/src/memory/graph/extraction-job.ts +9 -4
- package/src/memory/graph/extraction.ts +66 -23
- package/src/memory/graph/graph-memory-state-store.ts +37 -0
- package/src/memory/graph/graph-search.ts +29 -15
- package/src/memory/graph/injection.ts +38 -8
- package/src/memory/graph/inspect.ts +12 -3
- package/src/memory/graph/retriever.ts +365 -262
- package/src/memory/graph/store.test.ts +48 -0
- package/src/memory/graph/store.ts +150 -11
- package/src/memory/graph/tool-handlers.ts +84 -209
- package/src/memory/graph/tools.ts +8 -52
- package/src/memory/graph/types.ts +24 -0
- package/src/memory/job-handlers/cleanup.ts +44 -1
- package/src/memory/jobs-store.ts +70 -60
- package/src/memory/jobs-worker.ts +44 -28
- package/src/memory/llm-request-log-store.ts +96 -12
- package/src/memory/memory-recall-log-store.ts +49 -5
- package/src/memory/migrations/203-drop-memory-items-tables.ts +33 -1
- package/src/memory/migrations/206-memory-graph-node-edits.ts +19 -0
- package/src/memory/migrations/206-scrub-corrupted-image-attachments.ts +131 -0
- package/src/memory/migrations/207-conversation-graph-memory-state.ts +20 -0
- package/src/memory/migrations/208-conversations-last-message-at.ts +35 -0
- package/src/memory/migrations/209-strip-thinking-from-consolidated.ts +85 -0
- package/src/memory/migrations/210-schedule-reuse-conversation.ts +13 -0
- package/src/memory/migrations/211-memory-recall-logs-query-context.ts +21 -0
- package/src/memory/migrations/212-llm-request-logs-created-at-index.ts +19 -0
- package/src/memory/migrations/index.ts +8 -0
- package/src/memory/migrations/registry.ts +8 -0
- package/src/memory/schema/conversations.ts +14 -0
- package/src/memory/schema/infrastructure.ts +8 -1
- package/src/memory/schema/memory-core.ts +0 -51
- package/src/memory/schema/memory-graph.ts +15 -0
- package/src/memory/task-memory-cleanup.ts +30 -11
- package/src/notifications/copy-composer.ts +86 -0
- package/src/notifications/decision-engine.ts +35 -0
- package/src/permissions/checker.ts +12 -1
- package/src/permissions/permission-mode-store.ts +180 -0
- package/src/permissions/permission-mode.ts +31 -0
- package/src/permissions/workspace-policy.ts +9 -0
- package/src/prompts/system-prompt.ts +59 -7
- package/src/prompts/templates/BOOTSTRAP-REFERENCE.md +100 -0
- package/src/prompts/templates/BOOTSTRAP.md +70 -165
- package/src/prompts/templates/HEARTBEAT.md +3 -1
- package/src/prompts/templates/SOUL.md +25 -4
- package/src/prompts/templates/UPDATES.md +8 -0
- package/src/providers/anthropic/client.ts +107 -219
- package/src/runtime/auth/route-policy.ts +23 -0
- package/src/runtime/http-server.ts +32 -2
- package/src/runtime/http-types.ts +12 -1
- package/src/runtime/migrations/vbundle-builder.ts +389 -3
- package/src/runtime/migrations/vbundle-importer.ts +8 -6
- package/src/runtime/routes/__tests__/user-route-dispatcher.test.ts +378 -0
- package/src/runtime/routes/app-management-routes.ts +1 -11
- package/src/runtime/routes/approval-strategies/guardian-callback-strategy.ts +26 -0
- package/src/runtime/routes/archive-utils.ts +29 -0
- package/src/runtime/routes/avatar-routes.ts +2 -9
- package/src/runtime/routes/btw-routes.ts +14 -1
- package/src/runtime/routes/conversation-analysis-routes.ts +173 -0
- package/src/runtime/routes/conversation-management-routes.ts +1 -14
- package/src/runtime/routes/conversation-query-routes.ts +49 -3
- package/src/runtime/routes/conversation-routes.ts +264 -44
- package/src/runtime/routes/heartbeat-routes.ts +4 -10
- package/src/runtime/routes/identity-routes.ts +53 -18
- package/src/runtime/routes/llm-context-normalization.ts +14 -10
- package/src/runtime/routes/log-export-routes.ts +23 -275
- package/src/runtime/routes/memory-item-routes.test.ts +168 -233
- package/src/runtime/routes/migration-routes.ts +18 -7
- package/src/runtime/routes/profiler-routes.ts +350 -0
- package/src/runtime/routes/schedule-routes.ts +27 -12
- package/src/runtime/routes/settings-routes.ts +95 -8
- package/src/runtime/routes/subagents-routes.ts +28 -7
- package/src/runtime/routes/user-route-dispatcher.ts +223 -0
- package/src/runtime/routes/user-routes.ts +41 -0
- package/src/runtime/routes/workspace-routes.ts +0 -1
- package/src/schedule/schedule-store.ts +30 -0
- package/src/schedule/scheduler.ts +45 -18
- package/src/skills/catalog-install.ts +10 -2
- package/src/skills/managed-store.ts +2 -2
- package/src/skills/skill-memory.ts +1 -293
- package/src/subagent/index.ts +13 -3
- package/src/subagent/manager.ts +308 -29
- package/src/subagent/types.ts +68 -0
- package/src/tasks/task-runner.ts +4 -4
- package/src/tools/apps/executors.ts +29 -4
- package/src/tools/filesystem/list.ts +93 -0
- package/src/tools/permission-checker.ts +78 -0
- package/src/tools/registry.ts +4 -0
- package/src/tools/schedule/create.ts +3 -0
- package/src/tools/schedule/list.ts +1 -0
- package/src/tools/schedule/update.ts +6 -0
- package/src/tools/shared/filesystem/errors.ts +5 -0
- package/src/tools/shared/filesystem/file-ops-service.ts +90 -2
- package/src/tools/shared/filesystem/types.ts +17 -0
- package/src/tools/shared/shell-output.ts +31 -2
- package/src/tools/subagent/abort.ts +12 -2
- package/src/tools/subagent/message.ts +9 -2
- package/src/tools/subagent/notify-parent.ts +79 -0
- package/src/tools/subagent/read.ts +29 -8
- package/src/tools/subagent/resolve.ts +21 -0
- package/src/tools/subagent/spawn.ts +2 -0
- package/src/tools/subagent/status.ts +11 -1
- package/src/tools/system/avatar-generator.ts +3 -3
- package/src/tools/system/register.ts +23 -0
- package/src/tools/system/set-permission-mode.ts +103 -0
- package/src/tools/terminal/parser.ts +30 -5
- package/src/tools/terminal/safe-env.ts +16 -1
- package/src/tools/tool-manifest.ts +6 -0
- package/src/tools/types.ts +2 -0
- package/src/util/logger.ts +1 -1
- package/src/util/platform.ts +50 -17
- package/src/workspace/migrations/023-move-config-files-to-workspace.ts +2 -2
- package/src/workspace/migrations/024-move-runtime-files-to-workspace.ts +2 -2
- package/src/workspace/migrations/028-recover-conversations-from-disk-view.ts +270 -0
- package/src/workspace/migrations/029-seed-pkb.ts +84 -0
- package/src/workspace/migrations/registry.ts +4 -0
- package/src/workspace/top-level-renderer.ts +5 -9
- package/src/__tests__/cli-memory.test.ts +0 -377
- package/src/__tests__/clipboard.test.ts +0 -88
- package/src/cli/cli-memory.ts +0 -179
- package/src/util/clipboard.ts +0 -34
|
@@ -35,7 +35,12 @@ import {
|
|
|
35
35
|
evaluateTemporalTriggers,
|
|
36
36
|
type TriggeredResult,
|
|
37
37
|
} from "./triggers.js";
|
|
38
|
-
import type {
|
|
38
|
+
import type {
|
|
39
|
+
MemoryEdge,
|
|
40
|
+
MemoryNode,
|
|
41
|
+
RetrievalMetrics,
|
|
42
|
+
ScoredNode,
|
|
43
|
+
} from "./types.js";
|
|
39
44
|
import { isCapabilityNode } from "./types.js";
|
|
40
45
|
|
|
41
46
|
const log = getLogger("graph-retriever");
|
|
@@ -78,7 +83,7 @@ async function rerankAndDedup(
|
|
|
78
83
|
const provider = await getConfiguredProvider();
|
|
79
84
|
if (!provider) return candidates.slice(0, maxNodes);
|
|
80
85
|
|
|
81
|
-
//
|
|
86
|
+
// Numbered listing for the LLM: index + age + full content
|
|
82
87
|
const now = Date.now();
|
|
83
88
|
const listing = candidates
|
|
84
89
|
.map((s, i) => {
|
|
@@ -87,11 +92,7 @@ async function rerankAndDedup(
|
|
|
87
92
|
ageDays < 1
|
|
88
93
|
? `${Math.floor(ageDays * 24)}h`
|
|
89
94
|
: `${Math.floor(ageDays)}d`;
|
|
90
|
-
|
|
91
|
-
s.node.content.length > 100
|
|
92
|
-
? s.node.content.slice(0, 100) + "…"
|
|
93
|
-
: s.node.content;
|
|
94
|
-
return `${i + 1}. (${age}) ${preview}`;
|
|
95
|
+
return `${i + 1}. (${age}) ${s.node.content}`;
|
|
95
96
|
})
|
|
96
97
|
.join("\n");
|
|
97
98
|
|
|
@@ -176,10 +177,11 @@ async function dedupForTurn(
|
|
|
176
177
|
candidates: ScoredNode[],
|
|
177
178
|
maxNodes: number,
|
|
178
179
|
query: string,
|
|
179
|
-
): Promise<ScoredNode[]> {
|
|
180
|
+
): Promise<{ nodes: ScoredNode[]; llmApplied: boolean }> {
|
|
180
181
|
try {
|
|
181
182
|
const provider = await getConfiguredProvider();
|
|
182
|
-
if (!provider)
|
|
183
|
+
if (!provider)
|
|
184
|
+
return { nodes: candidates.slice(0, maxNodes), llmApplied: false };
|
|
183
185
|
|
|
184
186
|
const now = Date.now();
|
|
185
187
|
const listing = candidates
|
|
@@ -189,11 +191,7 @@ async function dedupForTurn(
|
|
|
189
191
|
ageDays < 1
|
|
190
192
|
? `${Math.floor(ageDays * 24)}h`
|
|
191
193
|
: `${Math.floor(ageDays)}d`;
|
|
192
|
-
|
|
193
|
-
s.node.content.length > 150
|
|
194
|
-
? s.node.content.slice(0, 150) + "…"
|
|
195
|
-
: s.node.content;
|
|
196
|
-
return `${i + 1}. (${age}) ${preview}`;
|
|
194
|
+
return `${i + 1}. (${age}) ${s.node.content}`;
|
|
197
195
|
})
|
|
198
196
|
.join("\n");
|
|
199
197
|
|
|
@@ -211,6 +209,98 @@ async function dedupForTurn(
|
|
|
211
209
|
},
|
|
212
210
|
);
|
|
213
211
|
|
|
212
|
+
const toolBlock = extractToolUse(response);
|
|
213
|
+
if (!toolBlock)
|
|
214
|
+
return { nodes: candidates.slice(0, maxNodes), llmApplied: false };
|
|
215
|
+
|
|
216
|
+
const input = toolBlock.input as { items?: number[] };
|
|
217
|
+
if (!input.items?.length)
|
|
218
|
+
return { nodes: candidates.slice(0, maxNodes), llmApplied: false };
|
|
219
|
+
|
|
220
|
+
const reranked: ScoredNode[] = [];
|
|
221
|
+
const seen = new Set<number>();
|
|
222
|
+
for (const num of input.items) {
|
|
223
|
+
const idx = num - 1;
|
|
224
|
+
if (idx >= 0 && idx < candidates.length && !seen.has(idx)) {
|
|
225
|
+
reranked.push(candidates[idx]);
|
|
226
|
+
seen.add(idx);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
return reranked.length > 0
|
|
231
|
+
? { nodes: reranked.slice(0, maxNodes), llmApplied: true }
|
|
232
|
+
: { nodes: candidates.slice(0, maxNodes), llmApplied: false };
|
|
233
|
+
} catch (err) {
|
|
234
|
+
log.warn(
|
|
235
|
+
{ err: err instanceof Error ? err.message : String(err) },
|
|
236
|
+
"Per-turn dedup+rerank failed, using scored order",
|
|
237
|
+
);
|
|
238
|
+
return { nodes: candidates.slice(0, maxNodes), llmApplied: false };
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// ---------------------------------------------------------------------------
|
|
243
|
+
// Cross-category dedup — dedup-only (no relevance filtering)
|
|
244
|
+
// ---------------------------------------------------------------------------
|
|
245
|
+
|
|
246
|
+
const DEDUP_ITEMS_TOOL = {
|
|
247
|
+
name: "select_items",
|
|
248
|
+
description:
|
|
249
|
+
"Select ALL items that survive deduplication. When multiple items describe the same event/fact, keep only the richest version. Do not filter by relevance — keep everything that is not a duplicate.",
|
|
250
|
+
input_schema: {
|
|
251
|
+
type: "object" as const,
|
|
252
|
+
properties: {
|
|
253
|
+
items: {
|
|
254
|
+
type: "array" as const,
|
|
255
|
+
description:
|
|
256
|
+
"Item numbers to keep (1-indexed). Remove duplicates — when multiple entries describe the same event/fact, keep ONLY the richest version. Keep all non-duplicate items.",
|
|
257
|
+
items: { type: "number" as const },
|
|
258
|
+
},
|
|
259
|
+
},
|
|
260
|
+
required: ["items"] as const,
|
|
261
|
+
},
|
|
262
|
+
};
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Dedup-only pass for cross-category duplicate removal. Unlike `dedupForTurn`,
|
|
266
|
+
* this does NOT filter by relevance to a query — it ONLY removes duplicates
|
|
267
|
+
* and keeps everything else. Used after context load to catch topic-level
|
|
268
|
+
* duplicates across reserved categories and serendipity.
|
|
269
|
+
*/
|
|
270
|
+
async function dedupCrossCategory(
|
|
271
|
+
candidates: ScoredNode[],
|
|
272
|
+
maxNodes: number,
|
|
273
|
+
): Promise<ScoredNode[]> {
|
|
274
|
+
try {
|
|
275
|
+
const provider = await getConfiguredProvider();
|
|
276
|
+
if (!provider) return candidates.slice(0, maxNodes);
|
|
277
|
+
|
|
278
|
+
const now = Date.now();
|
|
279
|
+
const listing = candidates
|
|
280
|
+
.map((s, i) => {
|
|
281
|
+
const ageDays = (now - s.node.created) / (1000 * 60 * 60 * 24);
|
|
282
|
+
const age =
|
|
283
|
+
ageDays < 1
|
|
284
|
+
? `${Math.floor(ageDays * 24)}h`
|
|
285
|
+
: `${Math.floor(ageDays)}d`;
|
|
286
|
+
return `${i + 1}. (${age}) ${s.node.content}`;
|
|
287
|
+
})
|
|
288
|
+
.join("\n");
|
|
289
|
+
|
|
290
|
+
const response = await provider.sendMessage(
|
|
291
|
+
[userMessage(listing)],
|
|
292
|
+
[DEDUP_ITEMS_TOOL],
|
|
293
|
+
`Deduplicate the following numbered items. When multiple items describe the same event, fact, or status, keep ONLY the richest version. Keep ALL items that are not duplicates — do not filter by relevance or topic. Call the select_items tool with every item that survives dedup.`,
|
|
294
|
+
{
|
|
295
|
+
config: {
|
|
296
|
+
modelIntent: "latency-optimized" as const,
|
|
297
|
+
tool_choice: { type: "tool" as const, name: "select_items" },
|
|
298
|
+
thinking: { type: "disabled" },
|
|
299
|
+
temperature: 0,
|
|
300
|
+
},
|
|
301
|
+
},
|
|
302
|
+
);
|
|
303
|
+
|
|
214
304
|
const toolBlock = extractToolUse(response);
|
|
215
305
|
if (!toolBlock) return candidates.slice(0, maxNodes);
|
|
216
306
|
|
|
@@ -233,7 +323,7 @@ async function dedupForTurn(
|
|
|
233
323
|
} catch (err) {
|
|
234
324
|
log.warn(
|
|
235
325
|
{ err: err instanceof Error ? err.message : String(err) },
|
|
236
|
-
"
|
|
326
|
+
"Cross-category dedup failed, using original order",
|
|
237
327
|
);
|
|
238
328
|
return candidates.slice(0, maxNodes);
|
|
239
329
|
}
|
|
@@ -263,6 +353,7 @@ export interface ContextLoadResult {
|
|
|
263
353
|
serendipityNodes: ScoredNode[];
|
|
264
354
|
triggeredNodes: TriggeredResult[];
|
|
265
355
|
latencyMs: number;
|
|
356
|
+
metrics: RetrievalMetrics;
|
|
266
357
|
}
|
|
267
358
|
|
|
268
359
|
/**
|
|
@@ -287,15 +378,21 @@ export async function loadContextMemory(
|
|
|
287
378
|
|
|
288
379
|
// 1. Embed recent conversation summaries as retrieval queries
|
|
289
380
|
let queryVector: number[] | null = null;
|
|
381
|
+
let embeddingProvider: string | null = null;
|
|
382
|
+
let embeddingModel: string | null = null;
|
|
383
|
+
let contextQueryText: string | null = null;
|
|
290
384
|
if (opts.recentSummaries.length > 0) {
|
|
291
385
|
try {
|
|
292
386
|
const queryText = opts.recentSummaries.join("\n\n");
|
|
293
387
|
const truncated =
|
|
294
388
|
queryText.length > 3000 ? queryText.slice(0, 3000) : queryText;
|
|
389
|
+
contextQueryText = truncated;
|
|
295
390
|
const result = await embedWithRetry(opts.config, [truncated], {
|
|
296
391
|
signal: opts.signal,
|
|
297
392
|
});
|
|
298
393
|
queryVector = result.vectors[0] ?? null;
|
|
394
|
+
embeddingProvider = result.provider;
|
|
395
|
+
embeddingModel = result.model;
|
|
299
396
|
} catch (err) {
|
|
300
397
|
log.warn({ err }, "Failed to embed summaries for context load");
|
|
301
398
|
}
|
|
@@ -303,7 +400,9 @@ export async function loadContextMemory(
|
|
|
303
400
|
|
|
304
401
|
// 2. Hybrid retrieval from Qdrant (dense search on graph_node points)
|
|
305
402
|
const semanticCandidateIds = new Map<string, number>(); // nodeId → score
|
|
403
|
+
let hybridSearchLatencyMs = 0;
|
|
306
404
|
if (queryVector) {
|
|
405
|
+
const searchStart = Date.now();
|
|
307
406
|
try {
|
|
308
407
|
const results = await searchGraphNodes(queryVector, maxNodes * 3, [
|
|
309
408
|
opts.scopeId,
|
|
@@ -313,8 +412,11 @@ export async function loadContextMemory(
|
|
|
313
412
|
}
|
|
314
413
|
} catch (err) {
|
|
315
414
|
log.warn({ err }, "Qdrant search failed for context load");
|
|
415
|
+
} finally {
|
|
416
|
+
hybridSearchLatencyMs = Date.now() - searchStart;
|
|
316
417
|
}
|
|
317
418
|
}
|
|
419
|
+
const pureSemanticHits = semanticCandidateIds.size;
|
|
318
420
|
|
|
319
421
|
// Also include top-significance nodes as a fallback
|
|
320
422
|
const topSignificance = queryNodes({
|
|
@@ -498,105 +600,15 @@ export async function loadContextMemory(
|
|
|
498
600
|
},
|
|
499
601
|
);
|
|
500
602
|
|
|
501
|
-
// 6.
|
|
502
|
-
//
|
|
503
|
-
//
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
types: ["prospective"],
|
|
508
|
-
fidelityNot: ["gone"],
|
|
509
|
-
createdAfter: nowMs - 3 * 24 * 60 * 60 * 1000, // last 3 days
|
|
510
|
-
limit: PROSPECTIVE_RESERVE,
|
|
511
|
-
});
|
|
512
|
-
|
|
513
|
-
// Filter out prospective nodes that have been superseded or resolved.
|
|
514
|
-
// A "supersedes" or "resolved-by" edge targeting a node means its
|
|
515
|
-
// content has been replaced by a newer memory — stop force-surfacing it.
|
|
516
|
-
const unresolvedProspective = recentProspective.filter((node) => {
|
|
517
|
-
const incoming = getEdgesForNode(node.id, "incoming");
|
|
518
|
-
return !incoming.some(
|
|
519
|
-
(e) =>
|
|
520
|
-
e.relationship === "supersedes" || e.relationship === "resolved-by",
|
|
521
|
-
);
|
|
522
|
-
});
|
|
523
|
-
|
|
524
|
-
// Score them so they have breakdowns, but they're guaranteed inclusion
|
|
525
|
-
const prospectiveIds = new Set(unresolvedProspective.map((n) => n.id));
|
|
526
|
-
const reservedNodes: ScoredNode[] = unresolvedProspective.map((node) => {
|
|
527
|
-
const existing = scored.find((s) => s.node.id === node.id);
|
|
528
|
-
if (existing) return existing;
|
|
529
|
-
return scoreCandidate(node, {
|
|
530
|
-
semanticSimilarity: 0,
|
|
531
|
-
effectiveSignificance: computeEffectiveSignificance(node, nowMs),
|
|
532
|
-
emotionalIntensity: node.emotionalCharge.intensity,
|
|
533
|
-
temporalBoost: (computeTemporalBoost(node, now) + 1) / 2,
|
|
534
|
-
recencyBoost: computeRecencyBoost(node, nowMs),
|
|
535
|
-
triggerBoost: 0,
|
|
536
|
-
activationBoost: 0,
|
|
537
|
-
});
|
|
538
|
-
});
|
|
539
|
-
|
|
540
|
-
// Reserve slots for upcoming events (nodes with event dates in the future).
|
|
541
|
-
// Like prospective reservation, these MUST surface — if the user said
|
|
542
|
-
// "I have a flight Tuesday," the assistant must remember it regardless of score.
|
|
543
|
-
const UPCOMING_RESERVE = 5;
|
|
544
|
-
const upcomingEvents = queryNodes({
|
|
545
|
-
scopeId: opts.scopeId,
|
|
546
|
-
fidelityNot: ["gone"],
|
|
547
|
-
hasEventDate: true,
|
|
548
|
-
eventDateAfter: nowMs,
|
|
549
|
-
eventDateBefore: nowMs + 30 * 24 * 60 * 60 * 1000, // next 30 days
|
|
550
|
-
limit: 20, // Fetch extra candidates — post-sort by proximity below
|
|
551
|
-
});
|
|
552
|
-
|
|
553
|
-
// Sort by event date ascending so soonest events get reserved first
|
|
554
|
-
// (queryNodes sorts by significance, which would drop a tomorrow-event
|
|
555
|
-
// with low significance in favor of a 3-weeks-away high-significance one)
|
|
556
|
-
upcomingEvents.sort((a, b) => (a.eventDate ?? 0) - (b.eventDate ?? 0));
|
|
557
|
-
|
|
558
|
-
const unresolvedUpcoming = upcomingEvents
|
|
559
|
-
.filter((node) => {
|
|
560
|
-
if (prospectiveIds.has(node.id)) return false; // already reserved as prospective
|
|
561
|
-
const incoming = getEdgesForNode(node.id, "incoming");
|
|
562
|
-
return !incoming.some(
|
|
563
|
-
(e) =>
|
|
564
|
-
e.relationship === "supersedes" || e.relationship === "resolved-by",
|
|
565
|
-
);
|
|
566
|
-
})
|
|
567
|
-
.slice(0, UPCOMING_RESERVE);
|
|
568
|
-
|
|
569
|
-
const upcomingIds = new Set(unresolvedUpcoming.map((n) => n.id));
|
|
570
|
-
const reservedUpcoming: ScoredNode[] = unresolvedUpcoming.map((node) => {
|
|
571
|
-
const existing = scored.find((s) => s.node.id === node.id);
|
|
572
|
-
if (existing) return existing;
|
|
573
|
-
return scoreCandidate(node, {
|
|
574
|
-
semanticSimilarity: 0,
|
|
575
|
-
effectiveSignificance: computeEffectiveSignificance(node, nowMs),
|
|
576
|
-
emotionalIntensity: node.emotionalCharge.intensity,
|
|
577
|
-
temporalBoost: (computeTemporalBoost(node, now) + 1) / 2,
|
|
578
|
-
recencyBoost: computeRecencyBoost(node, nowMs),
|
|
579
|
-
triggerBoost: 0,
|
|
580
|
-
activationBoost: 0,
|
|
581
|
-
});
|
|
582
|
-
});
|
|
583
|
-
|
|
584
|
-
// Remove reserved nodes and all procedural nodes from the main pool.
|
|
585
|
-
// Procedural nodes have dedicated reserved slots — any that didn't make
|
|
586
|
-
// the cut shouldn't compete with organic memories for general slots.
|
|
587
|
-
const mainPool = scored.filter(
|
|
588
|
-
(s) =>
|
|
589
|
-
!isCapabilityNode(s.node) &&
|
|
590
|
-
!prospectiveIds.has(s.node.id) &&
|
|
591
|
-
!upcomingIds.has(s.node.id),
|
|
592
|
-
);
|
|
603
|
+
// 6. Remove procedural nodes from the main pool — they have dedicated
|
|
604
|
+
// reserved slots and shouldn't compete with organic memories.
|
|
605
|
+
// Prospective/upcoming reserves were removed in favor of the PKB
|
|
606
|
+
// (personal knowledge base) which handles commitments and schedule
|
|
607
|
+
// via always-loaded flat files.
|
|
608
|
+
const mainPool = scored.filter((s) => !isCapabilityNode(s.node));
|
|
593
609
|
const mainSlots = Math.max(
|
|
594
610
|
0,
|
|
595
|
-
maxNodes -
|
|
596
|
-
serendipitySlots -
|
|
597
|
-
reservedNodes.length -
|
|
598
|
-
reservedUpcoming.length -
|
|
599
|
-
reservedCapabilities.length,
|
|
611
|
+
maxNodes - serendipitySlots - reservedCapabilities.length,
|
|
600
612
|
);
|
|
601
613
|
|
|
602
614
|
// 7. LLM re-ranking on the main pool: dedup + select
|
|
@@ -606,18 +618,14 @@ export async function loadContextMemory(
|
|
|
606
618
|
opts.config,
|
|
607
619
|
);
|
|
608
620
|
|
|
609
|
-
// 8. Combine: reserved
|
|
610
|
-
const deterministic = [
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
...reranked,
|
|
615
|
-
].slice(0, maxNodes - serendipitySlots);
|
|
621
|
+
// 8. Combine: reserved capabilities + reranked main pool
|
|
622
|
+
const deterministic = [...reservedCapabilities, ...reranked].slice(
|
|
623
|
+
0,
|
|
624
|
+
maxNodes - serendipitySlots,
|
|
625
|
+
);
|
|
616
626
|
// Exclude procedural nodes from serendipity — they have reserved slots
|
|
617
627
|
// and shouldn't appear as random wildcard picks.
|
|
618
|
-
const serendipityPool = scored.filter(
|
|
619
|
-
(s) => !isCapabilityNode(s.node),
|
|
620
|
-
);
|
|
628
|
+
const serendipityPool = scored.filter((s) => !isCapabilityNode(s.node));
|
|
621
629
|
const serendipityPicks = sampleSerendipity(serendipityPool, serendipitySlots);
|
|
622
630
|
|
|
623
631
|
// Deduplicate serendipity against deterministic
|
|
@@ -626,11 +634,56 @@ export async function loadContextMemory(
|
|
|
626
634
|
(s) => !deterministicIds.has(s.node.id),
|
|
627
635
|
);
|
|
628
636
|
|
|
637
|
+
// 9. Cross-category dedup: catch topic-level duplicates across reserved
|
|
638
|
+
// categories (prospective, upcoming, capabilities) and serendipity.
|
|
639
|
+
// Only runs when the combined set is large enough to warrant an LLM call.
|
|
640
|
+
const CROSS_DEDUP_THRESHOLD = 15;
|
|
641
|
+
const combined = [...deterministic, ...uniqueSerendipity];
|
|
642
|
+
let dedupedDeterministic = deterministic;
|
|
643
|
+
let dedupedSerendipity = uniqueSerendipity;
|
|
644
|
+
|
|
645
|
+
if (combined.length > CROSS_DEDUP_THRESHOLD) {
|
|
646
|
+
const deduped = await dedupCrossCategory(
|
|
647
|
+
combined,
|
|
648
|
+
combined.length, // preserve all non-duplicate nodes
|
|
649
|
+
);
|
|
650
|
+
|
|
651
|
+
// Re-split into deterministic vs serendipity by checking original membership
|
|
652
|
+
dedupedDeterministic = deduped.filter((s) =>
|
|
653
|
+
deterministicIds.has(s.node.id),
|
|
654
|
+
);
|
|
655
|
+
dedupedSerendipity = deduped.filter(
|
|
656
|
+
(s) => !deterministicIds.has(s.node.id),
|
|
657
|
+
);
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
const TOP_N = 20;
|
|
661
|
+
const topCandidates = scored.slice(0, TOP_N).map((s) => ({
|
|
662
|
+
nodeId: s.node.id,
|
|
663
|
+
type: s.node.type,
|
|
664
|
+
score: s.score,
|
|
665
|
+
semanticSimilarity: s.scoreBreakdown.semanticSimilarity,
|
|
666
|
+
recencyBoost: s.scoreBreakdown.recencyBoost,
|
|
667
|
+
}));
|
|
668
|
+
|
|
629
669
|
return {
|
|
630
|
-
nodes:
|
|
631
|
-
serendipityNodes:
|
|
670
|
+
nodes: dedupedDeterministic,
|
|
671
|
+
serendipityNodes: dedupedSerendipity,
|
|
632
672
|
triggeredNodes: allTriggered,
|
|
633
673
|
latencyMs: Date.now() - start,
|
|
674
|
+
metrics: {
|
|
675
|
+
semanticHits: pureSemanticHits,
|
|
676
|
+
mergedCount: scored.length,
|
|
677
|
+
selectedCount: dedupedDeterministic.length + dedupedSerendipity.length,
|
|
678
|
+
tier1Count: 0,
|
|
679
|
+
tier2Count: reservedCapabilities.length,
|
|
680
|
+
hybridSearchLatencyMs,
|
|
681
|
+
sparseVectorUsed: false,
|
|
682
|
+
embeddingProvider,
|
|
683
|
+
embeddingModel,
|
|
684
|
+
queryContext: contextQueryText,
|
|
685
|
+
topCandidates,
|
|
686
|
+
},
|
|
634
687
|
};
|
|
635
688
|
}
|
|
636
689
|
|
|
@@ -654,9 +707,12 @@ export interface TurnRetrievalOpts {
|
|
|
654
707
|
export interface TurnRetrievalResult {
|
|
655
708
|
/** New nodes to inject (not already in context). */
|
|
656
709
|
nodes: ScoredNode[];
|
|
710
|
+
/** Serendipity picks included in nodes. */
|
|
711
|
+
serendipityNodes: ScoredNode[];
|
|
657
712
|
/** Triggers that fired this turn. */
|
|
658
713
|
triggeredNodes: TriggeredResult[];
|
|
659
714
|
latencyMs: number;
|
|
715
|
+
metrics: RetrievalMetrics;
|
|
660
716
|
}
|
|
661
717
|
|
|
662
718
|
/**
|
|
@@ -674,6 +730,24 @@ export async function retrieveForTurn(
|
|
|
674
730
|
const now = new Date();
|
|
675
731
|
const nowMs = now.getTime();
|
|
676
732
|
|
|
733
|
+
let embeddingProvider: string | null = null;
|
|
734
|
+
let embeddingModel: string | null = null;
|
|
735
|
+
let hybridSearchLatencyMs = 0;
|
|
736
|
+
|
|
737
|
+
const ZERO_METRICS: RetrievalMetrics = {
|
|
738
|
+
semanticHits: 0,
|
|
739
|
+
mergedCount: 0,
|
|
740
|
+
selectedCount: 0,
|
|
741
|
+
tier1Count: 0,
|
|
742
|
+
tier2Count: 0,
|
|
743
|
+
hybridSearchLatencyMs: 0,
|
|
744
|
+
sparseVectorUsed: false,
|
|
745
|
+
embeddingProvider: null,
|
|
746
|
+
embeddingModel: null,
|
|
747
|
+
queryContext: null,
|
|
748
|
+
topCandidates: [],
|
|
749
|
+
};
|
|
750
|
+
|
|
677
751
|
// 1. Build query from last exchange
|
|
678
752
|
const queryText = [opts.assistantLastMessage, opts.userLastMessage]
|
|
679
753
|
.filter((m) => m.length > 0)
|
|
@@ -685,6 +759,7 @@ export async function retrieveForTurn(
|
|
|
685
759
|
(b): b is ImageContent => b.type === "image",
|
|
686
760
|
);
|
|
687
761
|
const allCandidateIds = new Map<string, number>(); // nodeId → best score
|
|
762
|
+
const searchStart = Date.now();
|
|
688
763
|
|
|
689
764
|
if (imageBlocks.length > 0) {
|
|
690
765
|
try {
|
|
@@ -705,6 +780,10 @@ export async function retrieveForTurn(
|
|
|
705
780
|
const imgResult = await embedWithRetry(opts.config, [imageInput], {
|
|
706
781
|
signal: opts.signal,
|
|
707
782
|
});
|
|
783
|
+
if (!embeddingProvider) {
|
|
784
|
+
embeddingProvider = imgResult.provider;
|
|
785
|
+
embeddingModel = imgResult.model;
|
|
786
|
+
}
|
|
708
787
|
const imgVector = imgResult.vectors[0];
|
|
709
788
|
if (imgVector) {
|
|
710
789
|
const imgResults = await searchGraphNodes(imgVector, 40, [
|
|
@@ -723,7 +802,20 @@ export async function retrieveForTurn(
|
|
|
723
802
|
}
|
|
724
803
|
|
|
725
804
|
if (queryText.trim().length === 0 && allCandidateIds.size === 0) {
|
|
726
|
-
return {
|
|
805
|
+
return {
|
|
806
|
+
nodes: [],
|
|
807
|
+
serendipityNodes: [],
|
|
808
|
+
triggeredNodes: [],
|
|
809
|
+
latencyMs: Date.now() - start,
|
|
810
|
+
metrics: {
|
|
811
|
+
...ZERO_METRICS,
|
|
812
|
+
hybridSearchLatencyMs:
|
|
813
|
+
imageBlocks.length > 0 ? Date.now() - searchStart : 0,
|
|
814
|
+
embeddingProvider,
|
|
815
|
+
embeddingModel,
|
|
816
|
+
queryContext: queryText || null,
|
|
817
|
+
},
|
|
818
|
+
};
|
|
727
819
|
}
|
|
728
820
|
|
|
729
821
|
// Chunk if too large (8k token ≈ 32k chars conservative estimate)
|
|
@@ -764,6 +856,8 @@ export async function retrieveForTurn(
|
|
|
764
856
|
const embedResults = await embedWithRetry(opts.config, chunks, {
|
|
765
857
|
signal: opts.signal,
|
|
766
858
|
});
|
|
859
|
+
embeddingProvider = embedResults.provider;
|
|
860
|
+
embeddingModel = embedResults.model;
|
|
767
861
|
queryEmbeddings = embedResults.vectors;
|
|
768
862
|
|
|
769
863
|
const searchPromises = queryEmbeddings.map((vec) =>
|
|
@@ -777,14 +871,35 @@ export async function retrieveForTurn(
|
|
|
777
871
|
allCandidateIds.set(r.nodeId, Math.max(current, r.score));
|
|
778
872
|
}
|
|
779
873
|
}
|
|
874
|
+
hybridSearchLatencyMs = Date.now() - searchStart;
|
|
780
875
|
} catch (err) {
|
|
781
876
|
log.warn({ err }, "Embedding/search failed for turn retrieval");
|
|
782
877
|
if (allCandidateIds.size === 0) {
|
|
783
|
-
return {
|
|
878
|
+
return {
|
|
879
|
+
nodes: [],
|
|
880
|
+
serendipityNodes: [],
|
|
881
|
+
triggeredNodes: [],
|
|
882
|
+
latencyMs: Date.now() - start,
|
|
883
|
+
metrics: {
|
|
884
|
+
...ZERO_METRICS,
|
|
885
|
+
hybridSearchLatencyMs: Date.now() - searchStart,
|
|
886
|
+
embeddingProvider,
|
|
887
|
+
embeddingModel,
|
|
888
|
+
queryContext: queryText || null,
|
|
889
|
+
},
|
|
890
|
+
};
|
|
784
891
|
}
|
|
785
892
|
}
|
|
786
893
|
}
|
|
787
894
|
|
|
895
|
+
// Capture search latency for image-only searches (text path sets it inside its try block)
|
|
896
|
+
if (hybridSearchLatencyMs === 0 && allCandidateIds.size > 0) {
|
|
897
|
+
hybridSearchLatencyMs = Date.now() - searchStart;
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
// Snapshot pure vector-search results before triggers inflate the set
|
|
901
|
+
const pureSemanticHits = allCandidateIds.size;
|
|
902
|
+
|
|
788
903
|
// 3. Evaluate semantic triggers
|
|
789
904
|
const semanticTriggers = getActiveTriggersByType("semantic", opts.scopeId);
|
|
790
905
|
const triggeredSemantic =
|
|
@@ -813,20 +928,36 @@ export async function retrieveForTurn(
|
|
|
813
928
|
if (newCandidateIds.length === 0) {
|
|
814
929
|
return {
|
|
815
930
|
nodes: [],
|
|
931
|
+
serendipityNodes: [],
|
|
816
932
|
triggeredNodes: triggeredSemantic,
|
|
817
933
|
latencyMs: Date.now() - start,
|
|
934
|
+
metrics: {
|
|
935
|
+
...ZERO_METRICS,
|
|
936
|
+
semanticHits: pureSemanticHits,
|
|
937
|
+
hybridSearchLatencyMs,
|
|
938
|
+
embeddingProvider,
|
|
939
|
+
embeddingModel,
|
|
940
|
+
queryContext: queryText || null,
|
|
941
|
+
},
|
|
818
942
|
};
|
|
819
943
|
}
|
|
820
944
|
|
|
821
945
|
// 5. Hydrate and score
|
|
822
946
|
const nodes = getNodesByIds(newCandidateIds);
|
|
823
947
|
const scored: ScoredNode[] = [];
|
|
948
|
+
const capabilityCandidates: { node: MemoryNode; sim: number }[] = [];
|
|
824
949
|
|
|
825
950
|
for (const node of nodes) {
|
|
826
951
|
if (node.fidelity === "gone") continue;
|
|
827
|
-
//
|
|
828
|
-
//
|
|
829
|
-
if (isCapabilityNode(node))
|
|
952
|
+
// Capability nodes (auto-seeded skills/CLI) are excluded from the general
|
|
953
|
+
// scoring pool — they compete in the dedicated procedural reserve below.
|
|
954
|
+
if (isCapabilityNode(node)) {
|
|
955
|
+
capabilityCandidates.push({
|
|
956
|
+
node,
|
|
957
|
+
sim: allCandidateIds.get(node.id) ?? 0,
|
|
958
|
+
});
|
|
959
|
+
continue;
|
|
960
|
+
}
|
|
830
961
|
|
|
831
962
|
const semanticSim = allCandidateIds.get(node.id) ?? 0;
|
|
832
963
|
const effectiveSig = computeEffectiveSignificance(node, nowMs);
|
|
@@ -853,156 +984,128 @@ export async function retrieveForTurn(
|
|
|
853
984
|
);
|
|
854
985
|
}
|
|
855
986
|
|
|
987
|
+
// 5b. Reserve slots for capability nodes (skills/CLI).
|
|
988
|
+
// Sourced from vector search candidates — only semantically relevant
|
|
989
|
+
// capabilities compete for reserved slots.
|
|
990
|
+
const PROCEDURAL_RESERVE = 3;
|
|
991
|
+
|
|
992
|
+
const proceduralCandidates = capabilityCandidates
|
|
993
|
+
.filter(({ node }) => !opts.tracker.isInContext(node.id))
|
|
994
|
+
.sort((a, b) => b.sim - a.sim);
|
|
995
|
+
|
|
996
|
+
const seenProcCapIds = new Set<string>();
|
|
997
|
+
const rankedProcedural = proceduralCandidates
|
|
998
|
+
.filter(({ node }) => {
|
|
999
|
+
const match = node.content.match(
|
|
1000
|
+
/^skill:(\S+)\n|^cli:(\S+)\n|^\s*The ".*?" skill \(([^)]+)\)|^\s*The "assistant (\S+)" CLI command/,
|
|
1001
|
+
);
|
|
1002
|
+
const capId = match?.[1] ?? match?.[2] ?? match?.[3] ?? match?.[4];
|
|
1003
|
+
if (capId) {
|
|
1004
|
+
if (seenProcCapIds.has(capId)) return false;
|
|
1005
|
+
seenProcCapIds.add(capId);
|
|
1006
|
+
}
|
|
1007
|
+
return true;
|
|
1008
|
+
})
|
|
1009
|
+
.slice(0, PROCEDURAL_RESERVE);
|
|
1010
|
+
|
|
1011
|
+
const proceduralScored: ScoredNode[] = rankedProcedural.map(({ node, sim }) =>
|
|
1012
|
+
scoreCandidate(
|
|
1013
|
+
node,
|
|
1014
|
+
{
|
|
1015
|
+
semanticSimilarity: sim,
|
|
1016
|
+
effectiveSignificance: computeEffectiveSignificance(node, nowMs),
|
|
1017
|
+
emotionalIntensity: node.emotionalCharge.intensity,
|
|
1018
|
+
temporalBoost: (computeTemporalBoost(node, now) + 1) / 2,
|
|
1019
|
+
recencyBoost: computeRecencyBoost(node, nowMs),
|
|
1020
|
+
triggerBoost: triggerBoostMap.get(node.id) ?? 0,
|
|
1021
|
+
activationBoost: 0,
|
|
1022
|
+
},
|
|
1023
|
+
PER_TURN_WEIGHTS,
|
|
1024
|
+
),
|
|
1025
|
+
);
|
|
1026
|
+
|
|
1027
|
+
const PROCEDURAL_SIM_FLOOR = 0.15;
|
|
1028
|
+
const proceduralInjected = proceduralScored.filter(
|
|
1029
|
+
(s) => s.scoreBreakdown.semanticSimilarity >= PROCEDURAL_SIM_FLOOR,
|
|
1030
|
+
);
|
|
1031
|
+
const proceduralIds = new Set(proceduralInjected.map((s) => s.node.id));
|
|
1032
|
+
|
|
856
1033
|
// Sort and apply threshold — pull a wider pool for dedup, then trim
|
|
857
1034
|
scored.sort((a, b) => b.score - a.score);
|
|
858
1035
|
const INJECTION_THRESHOLD = 0.3;
|
|
859
|
-
const PRE_DEDUP_POOL =
|
|
860
|
-
const MAX_INJECTED =
|
|
1036
|
+
const PRE_DEDUP_POOL = 20;
|
|
1037
|
+
const MAX_INJECTED = 4;
|
|
861
1038
|
const pool = scored
|
|
862
1039
|
.filter((s) => s.score >= INJECTION_THRESHOLD)
|
|
863
1040
|
.slice(0, PRE_DEDUP_POOL);
|
|
864
1041
|
|
|
865
1042
|
// Dedup + rerank with a fast model when the pool is large enough to warrant it
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
latencyMs: Date.now() - start,
|
|
875
|
-
};
|
|
876
|
-
}
|
|
877
|
-
|
|
878
|
-
// ---------------------------------------------------------------------------
|
|
879
|
-
// Periodic refresh — every N turns, replenish memory context
|
|
880
|
-
// ---------------------------------------------------------------------------
|
|
881
|
-
|
|
882
|
-
export interface RefreshOpts {
|
|
883
|
-
/** Recent turns (last 5-6) concatenated as text. */
|
|
884
|
-
recentTurnsText: string;
|
|
885
|
-
scopeId: string;
|
|
886
|
-
config: AssistantConfig;
|
|
887
|
-
tracker: InContextTracker;
|
|
888
|
-
signal?: AbortSignal;
|
|
889
|
-
/** Max new nodes to inject (default 10). */
|
|
890
|
-
maxNodes?: number;
|
|
891
|
-
}
|
|
892
|
-
|
|
893
|
-
export interface RefreshResult {
|
|
894
|
-
nodes: ScoredNode[];
|
|
895
|
-
latencyMs: number;
|
|
896
|
-
}
|
|
897
|
-
|
|
898
|
-
/** Default interval between refresh cycles. */
|
|
899
|
-
export const REFRESH_INTERVAL_TURNS = 5;
|
|
900
|
-
|
|
901
|
-
/**
|
|
902
|
-
* Periodic context refresh. Runs every N turns to catch memories that
|
|
903
|
-
* the per-turn injection missed due to its high threshold.
|
|
904
|
-
*
|
|
905
|
-
* Uses a wider window (recent 5-6 turns) as the query to capture the
|
|
906
|
-
* evolved conversational vibe. No LLM re-ranking — pure embedding +
|
|
907
|
-
* scoring for speed (~200ms).
|
|
908
|
-
*
|
|
909
|
-
* Also runs after compaction to replenish lost memory context.
|
|
910
|
-
*/
|
|
911
|
-
export async function refreshContextMemory(
|
|
912
|
-
opts: RefreshOpts,
|
|
913
|
-
): Promise<RefreshResult> {
|
|
914
|
-
const start = Date.now();
|
|
915
|
-
const now = new Date();
|
|
916
|
-
const nowMs = now.getTime();
|
|
917
|
-
const maxNodes = opts.maxNodes ?? 10;
|
|
918
|
-
|
|
919
|
-
if (opts.recentTurnsText.trim().length === 0) {
|
|
920
|
-
return { nodes: [], latencyMs: Date.now() - start };
|
|
921
|
-
}
|
|
922
|
-
|
|
923
|
-
// 1. Embed recent turns window
|
|
924
|
-
const queryText =
|
|
925
|
-
opts.recentTurnsText.length > 6000
|
|
926
|
-
? opts.recentTurnsText.slice(-6000)
|
|
927
|
-
: opts.recentTurnsText;
|
|
928
|
-
|
|
929
|
-
let queryVector: number[] | null = null;
|
|
930
|
-
try {
|
|
931
|
-
const result = await embedWithRetry(opts.config, [queryText], {
|
|
932
|
-
signal: opts.signal,
|
|
933
|
-
});
|
|
934
|
-
queryVector = result.vectors[0] ?? null;
|
|
935
|
-
} catch (err) {
|
|
936
|
-
log.warn({ err }, "Embedding failed for context refresh");
|
|
937
|
-
return { nodes: [], latencyMs: Date.now() - start };
|
|
938
|
-
}
|
|
939
|
-
|
|
940
|
-
if (!queryVector) {
|
|
941
|
-
return { nodes: [], latencyMs: Date.now() - start };
|
|
1043
|
+
let injected: ScoredNode[];
|
|
1044
|
+
let llmDedupApplied = false;
|
|
1045
|
+
if (pool.length > MAX_INJECTED) {
|
|
1046
|
+
const result = await dedupForTurn(pool, MAX_INJECTED, opts.userLastMessage);
|
|
1047
|
+
injected = result.nodes;
|
|
1048
|
+
llmDedupApplied = result.llmApplied;
|
|
1049
|
+
} else {
|
|
1050
|
+
injected = pool;
|
|
942
1051
|
}
|
|
943
1052
|
|
|
944
|
-
//
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
1053
|
+
// Remove procedural-reserved nodes from general set to avoid double-counting
|
|
1054
|
+
const generalInjected = injected.filter((s) => !proceduralIds.has(s.node.id));
|
|
1055
|
+
|
|
1056
|
+
// Backfill vacated general slots from the remaining pool so we always
|
|
1057
|
+
// return up to MAX_INJECTED general memories when eligible candidates exist.
|
|
1058
|
+
// Only skip backfill when LLM dedup genuinely ran — it intentionally rejected
|
|
1059
|
+
// items as duplicates/irrelevant. When dedupForTurn fell back to a plain
|
|
1060
|
+
// top-N slice (no provider, tool call failure), backfill is still appropriate.
|
|
1061
|
+
if (generalInjected.length < MAX_INJECTED && !llmDedupApplied) {
|
|
1062
|
+
const usedIds = new Set([
|
|
1063
|
+
...generalInjected.map((s) => s.node.id),
|
|
1064
|
+
...proceduralIds,
|
|
949
1065
|
]);
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
1066
|
+
const backfillCandidates = pool.filter((s) => !usedIds.has(s.node.id));
|
|
1067
|
+
const needed = MAX_INJECTED - generalInjected.length;
|
|
1068
|
+
for (let i = 0; i < Math.min(needed, backfillCandidates.length); i++) {
|
|
1069
|
+
generalInjected.push(backfillCandidates[i]);
|
|
1070
|
+
}
|
|
953
1071
|
}
|
|
954
1072
|
|
|
955
|
-
|
|
956
|
-
const
|
|
957
|
-
(c) => !opts.tracker.isInContext(c.nodeId),
|
|
958
|
-
);
|
|
1073
|
+
const allDeterministic = [...generalInjected, ...proceduralInjected];
|
|
1074
|
+
const deterministicIds = new Set(allDeterministic.map((n) => n.node.id));
|
|
959
1075
|
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
// 4. Hydrate and score
|
|
965
|
-
const nodes = getNodesByIds(newCandidates.map((c) => c.nodeId));
|
|
966
|
-
const candidateScoreMap = new Map(
|
|
967
|
-
newCandidates.map((c) => [c.nodeId, c.score]),
|
|
1076
|
+
// Reserve 1 serendipity slot from scored candidates not in the deterministic set
|
|
1077
|
+
const serendipityPool = scored.filter(
|
|
1078
|
+
(s) => s.score >= INJECTION_THRESHOLD && !deterministicIds.has(s.node.id),
|
|
968
1079
|
);
|
|
969
|
-
|
|
970
|
-
const
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
scored.push(
|
|
981
|
-
scoreCandidate(
|
|
982
|
-
node,
|
|
983
|
-
{
|
|
984
|
-
semanticSimilarity: semanticSim,
|
|
985
|
-
effectiveSignificance: effectiveSig,
|
|
986
|
-
emotionalIntensity: node.emotionalCharge.intensity,
|
|
987
|
-
temporalBoost: (temporal + 1) / 2,
|
|
988
|
-
recencyBoost: recency,
|
|
989
|
-
triggerBoost: 0,
|
|
990
|
-
activationBoost: 0,
|
|
991
|
-
},
|
|
992
|
-
PER_TURN_WEIGHTS,
|
|
993
|
-
),
|
|
994
|
-
);
|
|
995
|
-
}
|
|
996
|
-
|
|
997
|
-
// 5. Return top N — lower threshold than per-turn since this is a periodic refresh
|
|
998
|
-
scored.sort((a, b) => b.score - a.score);
|
|
999
|
-
const REFRESH_THRESHOLD = 0.15;
|
|
1000
|
-
const refreshed = scored
|
|
1001
|
-
.filter((s) => s.score >= REFRESH_THRESHOLD)
|
|
1002
|
-
.slice(0, maxNodes);
|
|
1080
|
+
const serendipityPicks = sampleSerendipity(serendipityPool, 1);
|
|
1081
|
+
const allInjected = [...allDeterministic, ...serendipityPicks];
|
|
1082
|
+
|
|
1083
|
+
const TOP_N = 20;
|
|
1084
|
+
const topCandidates = scored.slice(0, TOP_N).map((s) => ({
|
|
1085
|
+
nodeId: s.node.id,
|
|
1086
|
+
type: s.node.type,
|
|
1087
|
+
score: s.score,
|
|
1088
|
+
semanticSimilarity: s.scoreBreakdown.semanticSimilarity,
|
|
1089
|
+
recencyBoost: s.scoreBreakdown.recencyBoost,
|
|
1090
|
+
}));
|
|
1003
1091
|
|
|
1004
1092
|
return {
|
|
1005
|
-
nodes:
|
|
1093
|
+
nodes: allInjected,
|
|
1094
|
+
serendipityNodes: serendipityPicks,
|
|
1095
|
+
triggeredNodes: triggeredSemantic,
|
|
1006
1096
|
latencyMs: Date.now() - start,
|
|
1097
|
+
metrics: {
|
|
1098
|
+
semanticHits: pureSemanticHits,
|
|
1099
|
+
mergedCount: scored.length,
|
|
1100
|
+
selectedCount: allInjected.length,
|
|
1101
|
+
tier1Count: 0,
|
|
1102
|
+
tier2Count: 0,
|
|
1103
|
+
hybridSearchLatencyMs,
|
|
1104
|
+
sparseVectorUsed: false,
|
|
1105
|
+
embeddingProvider,
|
|
1106
|
+
embeddingModel,
|
|
1107
|
+
queryContext: queryText || null,
|
|
1108
|
+
topCandidates,
|
|
1109
|
+
},
|
|
1007
1110
|
};
|
|
1008
1111
|
}
|