@vellumai/assistant 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/AGENTS.md +4 -0
  2. package/ARCHITECTURE.md +68 -15
  3. package/Dockerfile +2 -2
  4. package/bun.lock +6 -2
  5. package/docker-entrypoint.sh +32 -1
  6. package/docs/architecture/integrations.md +1 -1
  7. package/docs/architecture/memory.md +21 -24
  8. package/openapi.yaml +538 -3
  9. package/package.json +5 -1
  10. package/src/__tests__/anthropic-provider.test.ts +160 -95
  11. package/src/__tests__/app-dir-path-guard.test.ts +1 -0
  12. package/src/__tests__/app-executors.test.ts +47 -1
  13. package/src/__tests__/app-source-watcher.test.ts +159 -0
  14. package/src/__tests__/checker.test.ts +38 -6
  15. package/src/__tests__/config-schema.test.ts +5 -0
  16. package/src/__tests__/conversation-agent-loop-overflow.test.ts +4 -6
  17. package/src/__tests__/conversation-agent-loop.test.ts +4 -51
  18. package/src/__tests__/conversation-history-web-search.test.ts +1 -1
  19. package/src/__tests__/conversation-runtime-assembly.test.ts +653 -832
  20. package/src/__tests__/conversation-runtime-workspace.test.ts +1 -93
  21. package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +17 -4
  22. package/src/__tests__/conversation-wipe.test.ts +2 -6
  23. package/src/__tests__/conversation-workspace-cache-state.test.ts +6 -12
  24. package/src/__tests__/conversation-workspace-injection.test.ts +25 -26
  25. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +1 -1
  26. package/src/__tests__/copy-composer-tc-templates.test.ts +335 -0
  27. package/src/__tests__/date-context.test.ts +76 -210
  28. package/src/__tests__/db-schedule-syntax-migration.test.ts +16 -1
  29. package/src/__tests__/file-list-tool.test.ts +219 -0
  30. package/src/__tests__/first-greeting.test.ts +1 -1
  31. package/src/__tests__/heartbeat-service.test.ts +180 -3
  32. package/src/__tests__/identity-routes.test.ts +328 -0
  33. package/src/__tests__/injection-block.test.ts +24 -0
  34. package/src/__tests__/install-skill-routing.test.ts +7 -6
  35. package/src/__tests__/jobs-store-qdrant-breaker.test.ts +15 -14
  36. package/src/__tests__/list-messages-tool-merge.test.ts +300 -0
  37. package/src/__tests__/llm-context-normalization.test.ts +18 -18
  38. package/src/__tests__/llm-context-route-provider.test.ts +101 -0
  39. package/src/__tests__/llm-request-log-turn-query.test.ts +162 -0
  40. package/src/__tests__/log-export-workspace.test.ts +72 -105
  41. package/src/__tests__/mcp-abort-signal.test.ts +5 -0
  42. package/src/__tests__/mcp-client-auth.test.ts +5 -0
  43. package/src/__tests__/memory-recall-log-store.test.ts +132 -0
  44. package/src/__tests__/migration-export-streaming.test.ts +304 -0
  45. package/src/__tests__/migration-import-commit-http.test.ts +11 -10
  46. package/src/__tests__/mock-fetch.ts +87 -0
  47. package/src/__tests__/notification-decision-recipient-context.test.ts +282 -0
  48. package/src/__tests__/onboarding-template-contract.test.ts +62 -14
  49. package/src/__tests__/parser.test.ts +32 -0
  50. package/src/__tests__/permission-checker-host-gate.test.ts +452 -0
  51. package/src/__tests__/permission-controls-v2-flag.test.ts +55 -0
  52. package/src/__tests__/permission-mode-sse.test.ts +418 -0
  53. package/src/__tests__/permission-mode-store.test.ts +277 -0
  54. package/src/__tests__/permission-mode.test.ts +101 -0
  55. package/src/__tests__/platform-bash-auto-approve.test.ts +359 -0
  56. package/src/__tests__/profiler-routes.test.ts +502 -0
  57. package/src/__tests__/profiler-run-store.test.ts +441 -0
  58. package/src/__tests__/proxy-approval-callback.test.ts +4 -75
  59. package/src/__tests__/registry.test.ts +1 -1
  60. package/src/__tests__/sandbox-host-parity.test.ts +5 -4
  61. package/src/__tests__/scheduler-reuse-conversation.test.ts +368 -0
  62. package/src/__tests__/scrub-corrupted-image-attachments.test.ts +278 -0
  63. package/src/__tests__/search-skills-unified.test.ts +4 -3
  64. package/src/__tests__/send-endpoint-busy.test.ts +42 -3
  65. package/src/__tests__/set-permission-mode.test.ts +274 -0
  66. package/src/__tests__/skill-load-feature-flag.test.ts +12 -0
  67. package/src/__tests__/skill-memory.test.ts +2 -783
  68. package/src/__tests__/strip-memory-injections.test.ts +187 -0
  69. package/src/__tests__/subagent-detail.test.ts +84 -0
  70. package/src/__tests__/subagent-disposal.test.ts +308 -0
  71. package/src/__tests__/subagent-manager-notify.test.ts +19 -10
  72. package/src/__tests__/subagent-notify-parent.test.ts +390 -0
  73. package/src/__tests__/subagent-role-registry.test.ts +108 -0
  74. package/src/__tests__/subagent-tool-filtering.test.ts +71 -0
  75. package/src/__tests__/subagent-tools.test.ts +464 -4
  76. package/src/__tests__/system-prompt-ask-mode.test.ts +139 -0
  77. package/src/__tests__/task-memory-cleanup.test.ts +12 -12
  78. package/src/__tests__/terminal-tools.test.ts +17 -27
  79. package/src/__tests__/test-preload.ts +4 -0
  80. package/src/__tests__/tool-executor.test.ts +4 -26
  81. package/src/__tests__/tool-side-effects-slack-dm.test.ts +1 -0
  82. package/src/__tests__/top-level-renderer.test.ts +10 -13
  83. package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +116 -2
  84. package/src/__tests__/workspace-migration-028-recover-conversations-from-disk-view.test.ts +387 -0
  85. package/src/agent/loop.ts +6 -0
  86. package/src/approvals/guardian-request-resolvers.ts +24 -0
  87. package/src/avatar/traits-png-sync.ts +3 -3
  88. package/src/cli/__tests__/run-assistant-command.ts +29 -0
  89. package/src/cli/commands/__tests__/email-download.test.ts +245 -0
  90. package/src/cli/commands/__tests__/email-list.test.ts +192 -0
  91. package/src/cli/commands/__tests__/email-register.test.ts +186 -0
  92. package/src/cli/commands/__tests__/email-send.test.ts +291 -0
  93. package/src/cli/commands/__tests__/email-status.test.ts +181 -0
  94. package/src/cli/commands/__tests__/email-unregister.test.ts +139 -0
  95. package/src/cli/commands/__tests__/routes.test.ts +562 -0
  96. package/src/cli/commands/conversations.ts +1 -8
  97. package/src/cli/commands/email.ts +584 -835
  98. package/src/cli/commands/memory.ts +1 -34
  99. package/src/cli/commands/notifications.ts +7 -2
  100. package/src/cli/commands/oauth/connect.ts +14 -5
  101. package/src/cli/commands/routes.ts +396 -0
  102. package/src/cli/commands/skills.ts +130 -20
  103. package/src/cli/program.ts +2 -0
  104. package/src/cli.ts +1 -120
  105. package/src/config/bundled-skills/app-builder/SKILL.md +4 -1
  106. package/src/config/bundled-skills/gmail/SKILL.md +2 -2
  107. package/src/config/bundled-skills/messaging/SKILL.md +7 -0
  108. package/src/config/bundled-skills/schedule/SKILL.md +22 -2
  109. package/src/config/bundled-skills/schedule/TOOLS.json +8 -0
  110. package/src/config/bundled-skills/settings/tools/avatar-get.ts +3 -13
  111. package/src/config/bundled-skills/settings/tools/avatar-remove.ts +2 -4
  112. package/src/config/bundled-skills/settings/tools/avatar-update.ts +5 -2
  113. package/src/config/bundled-skills/slack/SKILL.md +2 -0
  114. package/src/config/bundled-skills/subagent/SKILL.md +43 -3
  115. package/src/config/bundled-skills/subagent/TOOLS.json +29 -4
  116. package/src/config/env-registry.ts +63 -0
  117. package/src/config/feature-flag-registry.json +17 -1
  118. package/src/config/schema.ts +8 -0
  119. package/src/config/schemas/filing.ts +51 -0
  120. package/src/config/schemas/heartbeat.ts +15 -12
  121. package/src/config/schemas/memory-lifecycle.ts +12 -0
  122. package/src/config/schemas/security.ts +14 -0
  123. package/src/daemon/app-source-watcher.ts +93 -0
  124. package/src/daemon/config-watcher.ts +79 -1
  125. package/src/daemon/conversation-agent-loop-handlers.ts +20 -0
  126. package/src/daemon/conversation-agent-loop.ts +158 -65
  127. package/src/daemon/conversation-history.ts +4 -19
  128. package/src/daemon/conversation-lifecycle.ts +8 -14
  129. package/src/daemon/conversation-process.ts +13 -7
  130. package/src/daemon/conversation-runtime-assembly.ts +300 -306
  131. package/src/daemon/conversation-tool-setup.ts +44 -14
  132. package/src/daemon/conversation-workspace.ts +1 -2
  133. package/src/daemon/conversation.ts +18 -0
  134. package/src/daemon/date-context.ts +26 -53
  135. package/src/daemon/first-greeting.ts +1 -1
  136. package/src/daemon/handlers/conversations.ts +4 -7
  137. package/src/daemon/handlers/shared.test.ts +143 -0
  138. package/src/daemon/handlers/shared.ts +63 -5
  139. package/src/daemon/handlers/skills.ts +11 -18
  140. package/src/daemon/lifecycle.ts +199 -157
  141. package/src/daemon/message-types/conversations.ts +25 -6
  142. package/src/daemon/message-types/messages.ts +9 -1
  143. package/src/daemon/message-types/schedules.ts +1 -0
  144. package/src/daemon/message-types/settings.ts +6 -0
  145. package/src/daemon/profiler-run-store.ts +557 -0
  146. package/src/daemon/server.ts +89 -9
  147. package/src/daemon/shutdown-handlers.ts +5 -0
  148. package/src/daemon/tool-side-effects.ts +23 -3
  149. package/src/export/transcript-formatter.ts +148 -0
  150. package/src/filing/filing-service.ts +228 -0
  151. package/src/heartbeat/heartbeat-service.ts +96 -7
  152. package/src/mcp/client.ts +6 -0
  153. package/src/mcp/mcp-oauth-provider.ts +149 -27
  154. package/src/memory/admin.ts +33 -32
  155. package/src/memory/app-store.ts +69 -0
  156. package/src/memory/conversation-bootstrap.ts +1 -1
  157. package/src/memory/conversation-crud.ts +136 -107
  158. package/src/memory/conversation-group-migration.ts +1 -1
  159. package/src/memory/conversation-queries.ts +58 -12
  160. package/src/memory/conversation-title-service.ts +1 -0
  161. package/src/memory/db-init.ts +182 -376
  162. package/src/memory/graph/bootstrap.ts +75 -66
  163. package/src/memory/graph/capability-seed.ts +167 -15
  164. package/src/memory/graph/consolidation.ts +38 -4
  165. package/src/memory/graph/conversation-graph-memory.ts +133 -104
  166. package/src/memory/graph/extraction-job.ts +9 -4
  167. package/src/memory/graph/extraction.ts +66 -23
  168. package/src/memory/graph/graph-memory-state-store.ts +37 -0
  169. package/src/memory/graph/graph-search.ts +29 -15
  170. package/src/memory/graph/injection.ts +38 -8
  171. package/src/memory/graph/inspect.ts +12 -3
  172. package/src/memory/graph/retriever.ts +365 -262
  173. package/src/memory/graph/store.test.ts +48 -0
  174. package/src/memory/graph/store.ts +150 -11
  175. package/src/memory/graph/tool-handlers.ts +84 -209
  176. package/src/memory/graph/tools.ts +8 -52
  177. package/src/memory/graph/types.ts +24 -0
  178. package/src/memory/job-handlers/cleanup.ts +44 -1
  179. package/src/memory/jobs-store.ts +70 -60
  180. package/src/memory/jobs-worker.ts +44 -28
  181. package/src/memory/llm-request-log-store.ts +96 -12
  182. package/src/memory/memory-recall-log-store.ts +49 -5
  183. package/src/memory/migrations/203-drop-memory-items-tables.ts +33 -1
  184. package/src/memory/migrations/206-memory-graph-node-edits.ts +19 -0
  185. package/src/memory/migrations/206-scrub-corrupted-image-attachments.ts +131 -0
  186. package/src/memory/migrations/207-conversation-graph-memory-state.ts +20 -0
  187. package/src/memory/migrations/208-conversations-last-message-at.ts +35 -0
  188. package/src/memory/migrations/209-strip-thinking-from-consolidated.ts +85 -0
  189. package/src/memory/migrations/210-schedule-reuse-conversation.ts +13 -0
  190. package/src/memory/migrations/211-memory-recall-logs-query-context.ts +21 -0
  191. package/src/memory/migrations/212-llm-request-logs-created-at-index.ts +19 -0
  192. package/src/memory/migrations/index.ts +8 -0
  193. package/src/memory/migrations/registry.ts +8 -0
  194. package/src/memory/schema/conversations.ts +14 -0
  195. package/src/memory/schema/infrastructure.ts +8 -1
  196. package/src/memory/schema/memory-core.ts +0 -51
  197. package/src/memory/schema/memory-graph.ts +15 -0
  198. package/src/memory/task-memory-cleanup.ts +30 -11
  199. package/src/notifications/copy-composer.ts +86 -0
  200. package/src/notifications/decision-engine.ts +35 -0
  201. package/src/permissions/checker.ts +12 -1
  202. package/src/permissions/permission-mode-store.ts +180 -0
  203. package/src/permissions/permission-mode.ts +31 -0
  204. package/src/permissions/workspace-policy.ts +9 -0
  205. package/src/prompts/system-prompt.ts +59 -7
  206. package/src/prompts/templates/BOOTSTRAP-REFERENCE.md +100 -0
  207. package/src/prompts/templates/BOOTSTRAP.md +70 -165
  208. package/src/prompts/templates/HEARTBEAT.md +3 -1
  209. package/src/prompts/templates/SOUL.md +25 -4
  210. package/src/prompts/templates/UPDATES.md +8 -0
  211. package/src/providers/anthropic/client.ts +107 -219
  212. package/src/runtime/auth/route-policy.ts +23 -0
  213. package/src/runtime/http-server.ts +32 -2
  214. package/src/runtime/http-types.ts +12 -1
  215. package/src/runtime/migrations/vbundle-builder.ts +389 -3
  216. package/src/runtime/migrations/vbundle-importer.ts +8 -6
  217. package/src/runtime/routes/__tests__/user-route-dispatcher.test.ts +378 -0
  218. package/src/runtime/routes/app-management-routes.ts +1 -11
  219. package/src/runtime/routes/approval-strategies/guardian-callback-strategy.ts +26 -0
  220. package/src/runtime/routes/archive-utils.ts +29 -0
  221. package/src/runtime/routes/avatar-routes.ts +2 -9
  222. package/src/runtime/routes/btw-routes.ts +14 -1
  223. package/src/runtime/routes/conversation-analysis-routes.ts +173 -0
  224. package/src/runtime/routes/conversation-management-routes.ts +1 -14
  225. package/src/runtime/routes/conversation-query-routes.ts +49 -3
  226. package/src/runtime/routes/conversation-routes.ts +264 -44
  227. package/src/runtime/routes/heartbeat-routes.ts +4 -10
  228. package/src/runtime/routes/identity-routes.ts +53 -18
  229. package/src/runtime/routes/llm-context-normalization.ts +14 -10
  230. package/src/runtime/routes/log-export-routes.ts +23 -275
  231. package/src/runtime/routes/memory-item-routes.test.ts +168 -233
  232. package/src/runtime/routes/migration-routes.ts +18 -7
  233. package/src/runtime/routes/profiler-routes.ts +350 -0
  234. package/src/runtime/routes/schedule-routes.ts +27 -12
  235. package/src/runtime/routes/settings-routes.ts +95 -8
  236. package/src/runtime/routes/subagents-routes.ts +28 -7
  237. package/src/runtime/routes/user-route-dispatcher.ts +223 -0
  238. package/src/runtime/routes/user-routes.ts +41 -0
  239. package/src/runtime/routes/workspace-routes.ts +0 -1
  240. package/src/schedule/schedule-store.ts +30 -0
  241. package/src/schedule/scheduler.ts +45 -18
  242. package/src/skills/catalog-install.ts +10 -2
  243. package/src/skills/managed-store.ts +2 -2
  244. package/src/skills/skill-memory.ts +1 -293
  245. package/src/subagent/index.ts +13 -3
  246. package/src/subagent/manager.ts +308 -29
  247. package/src/subagent/types.ts +68 -0
  248. package/src/tasks/task-runner.ts +4 -4
  249. package/src/tools/apps/executors.ts +29 -4
  250. package/src/tools/filesystem/list.ts +93 -0
  251. package/src/tools/permission-checker.ts +78 -0
  252. package/src/tools/registry.ts +4 -0
  253. package/src/tools/schedule/create.ts +3 -0
  254. package/src/tools/schedule/list.ts +1 -0
  255. package/src/tools/schedule/update.ts +6 -0
  256. package/src/tools/shared/filesystem/errors.ts +5 -0
  257. package/src/tools/shared/filesystem/file-ops-service.ts +90 -2
  258. package/src/tools/shared/filesystem/types.ts +17 -0
  259. package/src/tools/shared/shell-output.ts +31 -2
  260. package/src/tools/subagent/abort.ts +12 -2
  261. package/src/tools/subagent/message.ts +9 -2
  262. package/src/tools/subagent/notify-parent.ts +79 -0
  263. package/src/tools/subagent/read.ts +29 -8
  264. package/src/tools/subagent/resolve.ts +21 -0
  265. package/src/tools/subagent/spawn.ts +2 -0
  266. package/src/tools/subagent/status.ts +11 -1
  267. package/src/tools/system/avatar-generator.ts +3 -3
  268. package/src/tools/system/register.ts +23 -0
  269. package/src/tools/system/set-permission-mode.ts +103 -0
  270. package/src/tools/terminal/parser.ts +30 -5
  271. package/src/tools/terminal/safe-env.ts +16 -1
  272. package/src/tools/tool-manifest.ts +6 -0
  273. package/src/tools/types.ts +2 -0
  274. package/src/util/logger.ts +1 -1
  275. package/src/util/platform.ts +50 -17
  276. package/src/workspace/migrations/023-move-config-files-to-workspace.ts +2 -2
  277. package/src/workspace/migrations/024-move-runtime-files-to-workspace.ts +2 -2
  278. package/src/workspace/migrations/028-recover-conversations-from-disk-view.ts +270 -0
  279. package/src/workspace/migrations/029-seed-pkb.ts +84 -0
  280. package/src/workspace/migrations/registry.ts +4 -0
  281. package/src/workspace/top-level-renderer.ts +5 -9
  282. package/src/__tests__/cli-memory.test.ts +0 -377
  283. package/src/__tests__/clipboard.test.ts +0 -88
  284. package/src/cli/cli-memory.ts +0 -179
  285. package/src/util/clipboard.ts +0 -34
@@ -35,7 +35,12 @@ import {
35
35
  evaluateTemporalTriggers,
36
36
  type TriggeredResult,
37
37
  } from "./triggers.js";
38
- import type { MemoryEdge, MemoryNode, ScoredNode } from "./types.js";
38
+ import type {
39
+ MemoryEdge,
40
+ MemoryNode,
41
+ RetrievalMetrics,
42
+ ScoredNode,
43
+ } from "./types.js";
39
44
  import { isCapabilityNode } from "./types.js";
40
45
 
41
46
  const log = getLogger("graph-retriever");
@@ -78,7 +83,7 @@ async function rerankAndDedup(
78
83
  const provider = await getConfiguredProvider();
79
84
  if (!provider) return candidates.slice(0, maxNodes);
80
85
 
81
- // Compact listing for the LLM: numbered index + age + first 100 chars
86
+ // Numbered listing for the LLM: index + age + full content
82
87
  const now = Date.now();
83
88
  const listing = candidates
84
89
  .map((s, i) => {
@@ -87,11 +92,7 @@ async function rerankAndDedup(
87
92
  ageDays < 1
88
93
  ? `${Math.floor(ageDays * 24)}h`
89
94
  : `${Math.floor(ageDays)}d`;
90
- const preview =
91
- s.node.content.length > 100
92
- ? s.node.content.slice(0, 100) + "…"
93
- : s.node.content;
94
- return `${i + 1}. (${age}) ${preview}`;
95
+ return `${i + 1}. (${age}) ${s.node.content}`;
95
96
  })
96
97
  .join("\n");
97
98
 
@@ -176,10 +177,11 @@ async function dedupForTurn(
176
177
  candidates: ScoredNode[],
177
178
  maxNodes: number,
178
179
  query: string,
179
- ): Promise<ScoredNode[]> {
180
+ ): Promise<{ nodes: ScoredNode[]; llmApplied: boolean }> {
180
181
  try {
181
182
  const provider = await getConfiguredProvider();
182
- if (!provider) return candidates.slice(0, maxNodes);
183
+ if (!provider)
184
+ return { nodes: candidates.slice(0, maxNodes), llmApplied: false };
183
185
 
184
186
  const now = Date.now();
185
187
  const listing = candidates
@@ -189,11 +191,7 @@ async function dedupForTurn(
189
191
  ageDays < 1
190
192
  ? `${Math.floor(ageDays * 24)}h`
191
193
  : `${Math.floor(ageDays)}d`;
192
- const preview =
193
- s.node.content.length > 150
194
- ? s.node.content.slice(0, 150) + "…"
195
- : s.node.content;
196
- return `${i + 1}. (${age}) ${preview}`;
194
+ return `${i + 1}. (${age}) ${s.node.content}`;
197
195
  })
198
196
  .join("\n");
199
197
 
@@ -211,6 +209,98 @@ async function dedupForTurn(
211
209
  },
212
210
  );
213
211
 
212
+ const toolBlock = extractToolUse(response);
213
+ if (!toolBlock)
214
+ return { nodes: candidates.slice(0, maxNodes), llmApplied: false };
215
+
216
+ const input = toolBlock.input as { items?: number[] };
217
+ if (!input.items?.length)
218
+ return { nodes: candidates.slice(0, maxNodes), llmApplied: false };
219
+
220
+ const reranked: ScoredNode[] = [];
221
+ const seen = new Set<number>();
222
+ for (const num of input.items) {
223
+ const idx = num - 1;
224
+ if (idx >= 0 && idx < candidates.length && !seen.has(idx)) {
225
+ reranked.push(candidates[idx]);
226
+ seen.add(idx);
227
+ }
228
+ }
229
+
230
+ return reranked.length > 0
231
+ ? { nodes: reranked.slice(0, maxNodes), llmApplied: true }
232
+ : { nodes: candidates.slice(0, maxNodes), llmApplied: false };
233
+ } catch (err) {
234
+ log.warn(
235
+ { err: err instanceof Error ? err.message : String(err) },
236
+ "Per-turn dedup+rerank failed, using scored order",
237
+ );
238
+ return { nodes: candidates.slice(0, maxNodes), llmApplied: false };
239
+ }
240
+ }
241
+
242
+ // ---------------------------------------------------------------------------
243
+ // Cross-category dedup — dedup-only (no relevance filtering)
244
+ // ---------------------------------------------------------------------------
245
+
246
+ const DEDUP_ITEMS_TOOL = {
247
+ name: "select_items",
248
+ description:
249
+ "Select ALL items that survive deduplication. When multiple items describe the same event/fact, keep only the richest version. Do not filter by relevance — keep everything that is not a duplicate.",
250
+ input_schema: {
251
+ type: "object" as const,
252
+ properties: {
253
+ items: {
254
+ type: "array" as const,
255
+ description:
256
+ "Item numbers to keep (1-indexed). Remove duplicates — when multiple entries describe the same event/fact, keep ONLY the richest version. Keep all non-duplicate items.",
257
+ items: { type: "number" as const },
258
+ },
259
+ },
260
+ required: ["items"] as const,
261
+ },
262
+ };
263
+
264
+ /**
265
+ * Dedup-only pass for cross-category duplicate removal. Unlike `dedupForTurn`,
266
+ * this does NOT filter by relevance to a query — it ONLY removes duplicates
267
+ * and keeps everything else. Used after context load to catch topic-level
268
+ * duplicates across reserved categories and serendipity.
269
+ */
270
+ async function dedupCrossCategory(
271
+ candidates: ScoredNode[],
272
+ maxNodes: number,
273
+ ): Promise<ScoredNode[]> {
274
+ try {
275
+ const provider = await getConfiguredProvider();
276
+ if (!provider) return candidates.slice(0, maxNodes);
277
+
278
+ const now = Date.now();
279
+ const listing = candidates
280
+ .map((s, i) => {
281
+ const ageDays = (now - s.node.created) / (1000 * 60 * 60 * 24);
282
+ const age =
283
+ ageDays < 1
284
+ ? `${Math.floor(ageDays * 24)}h`
285
+ : `${Math.floor(ageDays)}d`;
286
+ return `${i + 1}. (${age}) ${s.node.content}`;
287
+ })
288
+ .join("\n");
289
+
290
+ const response = await provider.sendMessage(
291
+ [userMessage(listing)],
292
+ [DEDUP_ITEMS_TOOL],
293
+ `Deduplicate the following numbered items. When multiple items describe the same event, fact, or status, keep ONLY the richest version. Keep ALL items that are not duplicates — do not filter by relevance or topic. Call the select_items tool with every item that survives dedup.`,
294
+ {
295
+ config: {
296
+ modelIntent: "latency-optimized" as const,
297
+ tool_choice: { type: "tool" as const, name: "select_items" },
298
+ thinking: { type: "disabled" },
299
+ temperature: 0,
300
+ },
301
+ },
302
+ );
303
+
214
304
  const toolBlock = extractToolUse(response);
215
305
  if (!toolBlock) return candidates.slice(0, maxNodes);
216
306
 
@@ -233,7 +323,7 @@ async function dedupForTurn(
233
323
  } catch (err) {
234
324
  log.warn(
235
325
  { err: err instanceof Error ? err.message : String(err) },
236
- "Per-turn dedup+rerank failed, using scored order",
326
+ "Cross-category dedup failed, using original order",
237
327
  );
238
328
  return candidates.slice(0, maxNodes);
239
329
  }
@@ -263,6 +353,7 @@ export interface ContextLoadResult {
263
353
  serendipityNodes: ScoredNode[];
264
354
  triggeredNodes: TriggeredResult[];
265
355
  latencyMs: number;
356
+ metrics: RetrievalMetrics;
266
357
  }
267
358
 
268
359
  /**
@@ -287,15 +378,21 @@ export async function loadContextMemory(
287
378
 
288
379
  // 1. Embed recent conversation summaries as retrieval queries
289
380
  let queryVector: number[] | null = null;
381
+ let embeddingProvider: string | null = null;
382
+ let embeddingModel: string | null = null;
383
+ let contextQueryText: string | null = null;
290
384
  if (opts.recentSummaries.length > 0) {
291
385
  try {
292
386
  const queryText = opts.recentSummaries.join("\n\n");
293
387
  const truncated =
294
388
  queryText.length > 3000 ? queryText.slice(0, 3000) : queryText;
389
+ contextQueryText = truncated;
295
390
  const result = await embedWithRetry(opts.config, [truncated], {
296
391
  signal: opts.signal,
297
392
  });
298
393
  queryVector = result.vectors[0] ?? null;
394
+ embeddingProvider = result.provider;
395
+ embeddingModel = result.model;
299
396
  } catch (err) {
300
397
  log.warn({ err }, "Failed to embed summaries for context load");
301
398
  }
@@ -303,7 +400,9 @@ export async function loadContextMemory(
303
400
 
304
401
  // 2. Hybrid retrieval from Qdrant (dense search on graph_node points)
305
402
  const semanticCandidateIds = new Map<string, number>(); // nodeId → score
403
+ let hybridSearchLatencyMs = 0;
306
404
  if (queryVector) {
405
+ const searchStart = Date.now();
307
406
  try {
308
407
  const results = await searchGraphNodes(queryVector, maxNodes * 3, [
309
408
  opts.scopeId,
@@ -313,8 +412,11 @@ export async function loadContextMemory(
313
412
  }
314
413
  } catch (err) {
315
414
  log.warn({ err }, "Qdrant search failed for context load");
415
+ } finally {
416
+ hybridSearchLatencyMs = Date.now() - searchStart;
316
417
  }
317
418
  }
419
+ const pureSemanticHits = semanticCandidateIds.size;
318
420
 
319
421
  // Also include top-significance nodes as a fallback
320
422
  const topSignificance = queryNodes({
@@ -498,105 +600,15 @@ export async function loadContextMemory(
498
600
  },
499
601
  );
500
602
 
501
- // 6. Reserve slots for recent prospective nodes (commitments, tasks, plans).
502
- // These MUST surface at conversation start regardless of score — if the user
503
- // said "I have a doctor appointment tomorrow," Velissa must remember it.
504
- const PROSPECTIVE_RESERVE = 10;
505
- const recentProspective = queryNodes({
506
- scopeId: opts.scopeId,
507
- types: ["prospective"],
508
- fidelityNot: ["gone"],
509
- createdAfter: nowMs - 3 * 24 * 60 * 60 * 1000, // last 3 days
510
- limit: PROSPECTIVE_RESERVE,
511
- });
512
-
513
- // Filter out prospective nodes that have been superseded or resolved.
514
- // A "supersedes" or "resolved-by" edge targeting a node means its
515
- // content has been replaced by a newer memory — stop force-surfacing it.
516
- const unresolvedProspective = recentProspective.filter((node) => {
517
- const incoming = getEdgesForNode(node.id, "incoming");
518
- return !incoming.some(
519
- (e) =>
520
- e.relationship === "supersedes" || e.relationship === "resolved-by",
521
- );
522
- });
523
-
524
- // Score them so they have breakdowns, but they're guaranteed inclusion
525
- const prospectiveIds = new Set(unresolvedProspective.map((n) => n.id));
526
- const reservedNodes: ScoredNode[] = unresolvedProspective.map((node) => {
527
- const existing = scored.find((s) => s.node.id === node.id);
528
- if (existing) return existing;
529
- return scoreCandidate(node, {
530
- semanticSimilarity: 0,
531
- effectiveSignificance: computeEffectiveSignificance(node, nowMs),
532
- emotionalIntensity: node.emotionalCharge.intensity,
533
- temporalBoost: (computeTemporalBoost(node, now) + 1) / 2,
534
- recencyBoost: computeRecencyBoost(node, nowMs),
535
- triggerBoost: 0,
536
- activationBoost: 0,
537
- });
538
- });
539
-
540
- // Reserve slots for upcoming events (nodes with event dates in the future).
541
- // Like prospective reservation, these MUST surface — if the user said
542
- // "I have a flight Tuesday," the assistant must remember it regardless of score.
543
- const UPCOMING_RESERVE = 5;
544
- const upcomingEvents = queryNodes({
545
- scopeId: opts.scopeId,
546
- fidelityNot: ["gone"],
547
- hasEventDate: true,
548
- eventDateAfter: nowMs,
549
- eventDateBefore: nowMs + 30 * 24 * 60 * 60 * 1000, // next 30 days
550
- limit: 20, // Fetch extra candidates — post-sort by proximity below
551
- });
552
-
553
- // Sort by event date ascending so soonest events get reserved first
554
- // (queryNodes sorts by significance, which would drop a tomorrow-event
555
- // with low significance in favor of a 3-weeks-away high-significance one)
556
- upcomingEvents.sort((a, b) => (a.eventDate ?? 0) - (b.eventDate ?? 0));
557
-
558
- const unresolvedUpcoming = upcomingEvents
559
- .filter((node) => {
560
- if (prospectiveIds.has(node.id)) return false; // already reserved as prospective
561
- const incoming = getEdgesForNode(node.id, "incoming");
562
- return !incoming.some(
563
- (e) =>
564
- e.relationship === "supersedes" || e.relationship === "resolved-by",
565
- );
566
- })
567
- .slice(0, UPCOMING_RESERVE);
568
-
569
- const upcomingIds = new Set(unresolvedUpcoming.map((n) => n.id));
570
- const reservedUpcoming: ScoredNode[] = unresolvedUpcoming.map((node) => {
571
- const existing = scored.find((s) => s.node.id === node.id);
572
- if (existing) return existing;
573
- return scoreCandidate(node, {
574
- semanticSimilarity: 0,
575
- effectiveSignificance: computeEffectiveSignificance(node, nowMs),
576
- emotionalIntensity: node.emotionalCharge.intensity,
577
- temporalBoost: (computeTemporalBoost(node, now) + 1) / 2,
578
- recencyBoost: computeRecencyBoost(node, nowMs),
579
- triggerBoost: 0,
580
- activationBoost: 0,
581
- });
582
- });
583
-
584
- // Remove reserved nodes and all procedural nodes from the main pool.
585
- // Procedural nodes have dedicated reserved slots — any that didn't make
586
- // the cut shouldn't compete with organic memories for general slots.
587
- const mainPool = scored.filter(
588
- (s) =>
589
- !isCapabilityNode(s.node) &&
590
- !prospectiveIds.has(s.node.id) &&
591
- !upcomingIds.has(s.node.id),
592
- );
603
+ // 6. Remove procedural nodes from the main pool they have dedicated
604
+ // reserved slots and shouldn't compete with organic memories.
605
+ // Prospective/upcoming reserves were removed in favor of the PKB
606
+ // (personal knowledge base) which handles commitments and schedule
607
+ // via always-loaded flat files.
608
+ const mainPool = scored.filter((s) => !isCapabilityNode(s.node));
593
609
  const mainSlots = Math.max(
594
610
  0,
595
- maxNodes -
596
- serendipitySlots -
597
- reservedNodes.length -
598
- reservedUpcoming.length -
599
- reservedCapabilities.length,
611
+ maxNodes - serendipitySlots - reservedCapabilities.length,
600
612
  );
601
613
 
602
614
  // 7. LLM re-ranking on the main pool: dedup + select
@@ -606,18 +618,14 @@ export async function loadContextMemory(
606
618
  opts.config,
607
619
  );
608
620
 
609
- // 8. Combine: reserved prospective + reserved upcoming + reserved capabilities + reranked main pool
610
- const deterministic = [
611
- ...reservedNodes,
612
- ...reservedUpcoming,
613
- ...reservedCapabilities,
614
- ...reranked,
615
- ].slice(0, maxNodes - serendipitySlots);
621
+ // 8. Combine: reserved capabilities + reranked main pool
622
+ const deterministic = [...reservedCapabilities, ...reranked].slice(
623
+ 0,
624
+ maxNodes - serendipitySlots,
625
+ );
616
626
  // Exclude procedural nodes from serendipity — they have reserved slots
617
627
  // and shouldn't appear as random wildcard picks.
618
- const serendipityPool = scored.filter(
619
- (s) => !isCapabilityNode(s.node),
620
- );
628
+ const serendipityPool = scored.filter((s) => !isCapabilityNode(s.node));
621
629
  const serendipityPicks = sampleSerendipity(serendipityPool, serendipitySlots);
622
630
 
623
631
  // Deduplicate serendipity against deterministic
@@ -626,11 +634,56 @@ export async function loadContextMemory(
626
634
  (s) => !deterministicIds.has(s.node.id),
627
635
  );
628
636
 
637
+ // 9. Cross-category dedup: catch topic-level duplicates across reserved
638
+ // categories (prospective, upcoming, capabilities) and serendipity.
639
+ // Only runs when the combined set is large enough to warrant an LLM call.
640
+ const CROSS_DEDUP_THRESHOLD = 15;
641
+ const combined = [...deterministic, ...uniqueSerendipity];
642
+ let dedupedDeterministic = deterministic;
643
+ let dedupedSerendipity = uniqueSerendipity;
644
+
645
+ if (combined.length > CROSS_DEDUP_THRESHOLD) {
646
+ const deduped = await dedupCrossCategory(
647
+ combined,
648
+ combined.length, // preserve all non-duplicate nodes
649
+ );
650
+
651
+ // Re-split into deterministic vs serendipity by checking original membership
652
+ dedupedDeterministic = deduped.filter((s) =>
653
+ deterministicIds.has(s.node.id),
654
+ );
655
+ dedupedSerendipity = deduped.filter(
656
+ (s) => !deterministicIds.has(s.node.id),
657
+ );
658
+ }
659
+
660
+ const TOP_N = 20;
661
+ const topCandidates = scored.slice(0, TOP_N).map((s) => ({
662
+ nodeId: s.node.id,
663
+ type: s.node.type,
664
+ score: s.score,
665
+ semanticSimilarity: s.scoreBreakdown.semanticSimilarity,
666
+ recencyBoost: s.scoreBreakdown.recencyBoost,
667
+ }));
668
+
629
669
  return {
630
- nodes: deterministic,
631
- serendipityNodes: uniqueSerendipity,
670
+ nodes: dedupedDeterministic,
671
+ serendipityNodes: dedupedSerendipity,
632
672
  triggeredNodes: allTriggered,
633
673
  latencyMs: Date.now() - start,
674
+ metrics: {
675
+ semanticHits: pureSemanticHits,
676
+ mergedCount: scored.length,
677
+ selectedCount: dedupedDeterministic.length + dedupedSerendipity.length,
678
+ tier1Count: 0,
679
+ tier2Count: reservedCapabilities.length,
680
+ hybridSearchLatencyMs,
681
+ sparseVectorUsed: false,
682
+ embeddingProvider,
683
+ embeddingModel,
684
+ queryContext: contextQueryText,
685
+ topCandidates,
686
+ },
634
687
  };
635
688
  }
636
689
 
@@ -654,9 +707,12 @@ export interface TurnRetrievalOpts {
654
707
  export interface TurnRetrievalResult {
655
708
  /** New nodes to inject (not already in context). */
656
709
  nodes: ScoredNode[];
710
+ /** Serendipity picks included in nodes. */
711
+ serendipityNodes: ScoredNode[];
657
712
  /** Triggers that fired this turn. */
658
713
  triggeredNodes: TriggeredResult[];
659
714
  latencyMs: number;
715
+ metrics: RetrievalMetrics;
660
716
  }
661
717
 
662
718
  /**
@@ -674,6 +730,24 @@ export async function retrieveForTurn(
674
730
  const now = new Date();
675
731
  const nowMs = now.getTime();
676
732
 
733
+ let embeddingProvider: string | null = null;
734
+ let embeddingModel: string | null = null;
735
+ let hybridSearchLatencyMs = 0;
736
+
737
+ const ZERO_METRICS: RetrievalMetrics = {
738
+ semanticHits: 0,
739
+ mergedCount: 0,
740
+ selectedCount: 0,
741
+ tier1Count: 0,
742
+ tier2Count: 0,
743
+ hybridSearchLatencyMs: 0,
744
+ sparseVectorUsed: false,
745
+ embeddingProvider: null,
746
+ embeddingModel: null,
747
+ queryContext: null,
748
+ topCandidates: [],
749
+ };
750
+
677
751
  // 1. Build query from last exchange
678
752
  const queryText = [opts.assistantLastMessage, opts.userLastMessage]
679
753
  .filter((m) => m.length > 0)
@@ -685,6 +759,7 @@ export async function retrieveForTurn(
685
759
  (b): b is ImageContent => b.type === "image",
686
760
  );
687
761
  const allCandidateIds = new Map<string, number>(); // nodeId → best score
762
+ const searchStart = Date.now();
688
763
 
689
764
  if (imageBlocks.length > 0) {
690
765
  try {
@@ -705,6 +780,10 @@ export async function retrieveForTurn(
705
780
  const imgResult = await embedWithRetry(opts.config, [imageInput], {
706
781
  signal: opts.signal,
707
782
  });
783
+ if (!embeddingProvider) {
784
+ embeddingProvider = imgResult.provider;
785
+ embeddingModel = imgResult.model;
786
+ }
708
787
  const imgVector = imgResult.vectors[0];
709
788
  if (imgVector) {
710
789
  const imgResults = await searchGraphNodes(imgVector, 40, [
@@ -723,7 +802,20 @@ export async function retrieveForTurn(
723
802
  }
724
803
 
725
804
  if (queryText.trim().length === 0 && allCandidateIds.size === 0) {
726
- return { nodes: [], triggeredNodes: [], latencyMs: Date.now() - start };
805
+ return {
806
+ nodes: [],
807
+ serendipityNodes: [],
808
+ triggeredNodes: [],
809
+ latencyMs: Date.now() - start,
810
+ metrics: {
811
+ ...ZERO_METRICS,
812
+ hybridSearchLatencyMs:
813
+ imageBlocks.length > 0 ? Date.now() - searchStart : 0,
814
+ embeddingProvider,
815
+ embeddingModel,
816
+ queryContext: queryText || null,
817
+ },
818
+ };
727
819
  }
728
820
 
729
821
  // Chunk if too large (8k token ≈ 32k chars conservative estimate)
@@ -764,6 +856,8 @@ export async function retrieveForTurn(
764
856
  const embedResults = await embedWithRetry(opts.config, chunks, {
765
857
  signal: opts.signal,
766
858
  });
859
+ embeddingProvider = embedResults.provider;
860
+ embeddingModel = embedResults.model;
767
861
  queryEmbeddings = embedResults.vectors;
768
862
 
769
863
  const searchPromises = queryEmbeddings.map((vec) =>
@@ -777,14 +871,35 @@ export async function retrieveForTurn(
777
871
  allCandidateIds.set(r.nodeId, Math.max(current, r.score));
778
872
  }
779
873
  }
874
+ hybridSearchLatencyMs = Date.now() - searchStart;
780
875
  } catch (err) {
781
876
  log.warn({ err }, "Embedding/search failed for turn retrieval");
782
877
  if (allCandidateIds.size === 0) {
783
- return { nodes: [], triggeredNodes: [], latencyMs: Date.now() - start };
878
+ return {
879
+ nodes: [],
880
+ serendipityNodes: [],
881
+ triggeredNodes: [],
882
+ latencyMs: Date.now() - start,
883
+ metrics: {
884
+ ...ZERO_METRICS,
885
+ hybridSearchLatencyMs: Date.now() - searchStart,
886
+ embeddingProvider,
887
+ embeddingModel,
888
+ queryContext: queryText || null,
889
+ },
890
+ };
784
891
  }
785
892
  }
786
893
  }
787
894
 
895
+ // Capture search latency for image-only searches (text path sets it inside its try block)
896
+ if (hybridSearchLatencyMs === 0 && allCandidateIds.size > 0) {
897
+ hybridSearchLatencyMs = Date.now() - searchStart;
898
+ }
899
+
900
+ // Snapshot pure vector-search results before triggers inflate the set
901
+ const pureSemanticHits = allCandidateIds.size;
902
+
788
903
  // 3. Evaluate semantic triggers
789
904
  const semanticTriggers = getActiveTriggersByType("semantic", opts.scopeId);
790
905
  const triggeredSemantic =
@@ -813,20 +928,36 @@ export async function retrieveForTurn(
813
928
  if (newCandidateIds.length === 0) {
814
929
  return {
815
930
  nodes: [],
931
+ serendipityNodes: [],
816
932
  triggeredNodes: triggeredSemantic,
817
933
  latencyMs: Date.now() - start,
934
+ metrics: {
935
+ ...ZERO_METRICS,
936
+ semanticHits: pureSemanticHits,
937
+ hybridSearchLatencyMs,
938
+ embeddingProvider,
939
+ embeddingModel,
940
+ queryContext: queryText || null,
941
+ },
818
942
  };
819
943
  }
820
944
 
821
945
  // 5. Hydrate and score
822
946
  const nodes = getNodesByIds(newCandidateIds);
823
947
  const scored: ScoredNode[] = [];
948
+ const capabilityCandidates: { node: MemoryNode; sim: number }[] = [];
824
949
 
825
950
  for (const node of nodes) {
826
951
  if (node.fidelity === "gone") continue;
827
- // Procedural nodes (capabilities) have reserved slots at context-load
828
- // and shouldn't compete with organic memories in per-turn injection.
829
- if (isCapabilityNode(node)) continue;
952
+ // Capability nodes (auto-seeded skills/CLI) are excluded from the general
953
+ // scoring pool — they compete in the dedicated procedural reserve below.
954
+ if (isCapabilityNode(node)) {
955
+ capabilityCandidates.push({
956
+ node,
957
+ sim: allCandidateIds.get(node.id) ?? 0,
958
+ });
959
+ continue;
960
+ }
830
961
 
831
962
  const semanticSim = allCandidateIds.get(node.id) ?? 0;
832
963
  const effectiveSig = computeEffectiveSignificance(node, nowMs);
@@ -853,156 +984,128 @@ export async function retrieveForTurn(
853
984
  );
854
985
  }
855
986
 
987
+ // 5b. Reserve slots for capability nodes (skills/CLI).
988
+ // Sourced from vector search candidates — only semantically relevant
989
+ // capabilities compete for reserved slots.
990
+ const PROCEDURAL_RESERVE = 3;
991
+
992
+ const proceduralCandidates = capabilityCandidates
993
+ .filter(({ node }) => !opts.tracker.isInContext(node.id))
994
+ .sort((a, b) => b.sim - a.sim);
995
+
996
+ const seenProcCapIds = new Set<string>();
997
+ const rankedProcedural = proceduralCandidates
998
+ .filter(({ node }) => {
999
+ const match = node.content.match(
1000
+ /^skill:(\S+)\n|^cli:(\S+)\n|^\s*The ".*?" skill \(([^)]+)\)|^\s*The "assistant (\S+)" CLI command/,
1001
+ );
1002
+ const capId = match?.[1] ?? match?.[2] ?? match?.[3] ?? match?.[4];
1003
+ if (capId) {
1004
+ if (seenProcCapIds.has(capId)) return false;
1005
+ seenProcCapIds.add(capId);
1006
+ }
1007
+ return true;
1008
+ })
1009
+ .slice(0, PROCEDURAL_RESERVE);
1010
+
1011
+ const proceduralScored: ScoredNode[] = rankedProcedural.map(({ node, sim }) =>
1012
+ scoreCandidate(
1013
+ node,
1014
+ {
1015
+ semanticSimilarity: sim,
1016
+ effectiveSignificance: computeEffectiveSignificance(node, nowMs),
1017
+ emotionalIntensity: node.emotionalCharge.intensity,
1018
+ temporalBoost: (computeTemporalBoost(node, now) + 1) / 2,
1019
+ recencyBoost: computeRecencyBoost(node, nowMs),
1020
+ triggerBoost: triggerBoostMap.get(node.id) ?? 0,
1021
+ activationBoost: 0,
1022
+ },
1023
+ PER_TURN_WEIGHTS,
1024
+ ),
1025
+ );
1026
+
1027
+ const PROCEDURAL_SIM_FLOOR = 0.15;
1028
+ const proceduralInjected = proceduralScored.filter(
1029
+ (s) => s.scoreBreakdown.semanticSimilarity >= PROCEDURAL_SIM_FLOOR,
1030
+ );
1031
+ const proceduralIds = new Set(proceduralInjected.map((s) => s.node.id));
1032
+
856
1033
  // Sort and apply threshold — pull a wider pool for dedup, then trim
857
1034
  scored.sort((a, b) => b.score - a.score);
858
1035
  const INJECTION_THRESHOLD = 0.3;
859
- const PRE_DEDUP_POOL = 40;
860
- const MAX_INJECTED = 8;
1036
+ const PRE_DEDUP_POOL = 20;
1037
+ const MAX_INJECTED = 4;
861
1038
  const pool = scored
862
1039
  .filter((s) => s.score >= INJECTION_THRESHOLD)
863
1040
  .slice(0, PRE_DEDUP_POOL);
864
1041
 
865
1042
  // Dedup + rerank with a fast model when the pool is large enough to warrant it
866
- const injected =
867
- pool.length > MAX_INJECTED
868
- ? await dedupForTurn(pool, MAX_INJECTED, opts.userLastMessage)
869
- : pool;
870
-
871
- return {
872
- nodes: injected,
873
- triggeredNodes: triggeredSemantic,
874
- latencyMs: Date.now() - start,
875
- };
876
- }
877
-
878
- // ---------------------------------------------------------------------------
879
- // Periodic refresh — every N turns, replenish memory context
880
- // ---------------------------------------------------------------------------
881
-
882
- export interface RefreshOpts {
883
- /** Recent turns (last 5-6) concatenated as text. */
884
- recentTurnsText: string;
885
- scopeId: string;
886
- config: AssistantConfig;
887
- tracker: InContextTracker;
888
- signal?: AbortSignal;
889
- /** Max new nodes to inject (default 10). */
890
- maxNodes?: number;
891
- }
892
-
893
- export interface RefreshResult {
894
- nodes: ScoredNode[];
895
- latencyMs: number;
896
- }
897
-
898
- /** Default interval between refresh cycles. */
899
- export const REFRESH_INTERVAL_TURNS = 5;
900
-
901
- /**
902
- * Periodic context refresh. Runs every N turns to catch memories that
903
- * the per-turn injection missed due to its high threshold.
904
- *
905
- * Uses a wider window (recent 5-6 turns) as the query to capture the
906
- * evolved conversational vibe. No LLM re-ranking — pure embedding +
907
- * scoring for speed (~200ms).
908
- *
909
- * Also runs after compaction to replenish lost memory context.
910
- */
911
- export async function refreshContextMemory(
912
- opts: RefreshOpts,
913
- ): Promise<RefreshResult> {
914
- const start = Date.now();
915
- const now = new Date();
916
- const nowMs = now.getTime();
917
- const maxNodes = opts.maxNodes ?? 10;
918
-
919
- if (opts.recentTurnsText.trim().length === 0) {
920
- return { nodes: [], latencyMs: Date.now() - start };
921
- }
922
-
923
- // 1. Embed recent turns window
924
- const queryText =
925
- opts.recentTurnsText.length > 6000
926
- ? opts.recentTurnsText.slice(-6000)
927
- : opts.recentTurnsText;
928
-
929
- let queryVector: number[] | null = null;
930
- try {
931
- const result = await embedWithRetry(opts.config, [queryText], {
932
- signal: opts.signal,
933
- });
934
- queryVector = result.vectors[0] ?? null;
935
- } catch (err) {
936
- log.warn({ err }, "Embedding failed for context refresh");
937
- return { nodes: [], latencyMs: Date.now() - start };
938
- }
939
-
940
- if (!queryVector) {
941
- return { nodes: [], latencyMs: Date.now() - start };
1043
+ let injected: ScoredNode[];
1044
+ let llmDedupApplied = false;
1045
+ if (pool.length > MAX_INJECTED) {
1046
+ const result = await dedupForTurn(pool, MAX_INJECTED, opts.userLastMessage);
1047
+ injected = result.nodes;
1048
+ llmDedupApplied = result.llmApplied;
1049
+ } else {
1050
+ injected = pool;
942
1051
  }
943
1052
 
944
- // 2. Search cast a wider net than per-turn
945
- let candidates: Array<{ nodeId: string; score: number }>;
946
- try {
947
- candidates = await searchGraphNodes(queryVector, maxNodes * 3, [
948
- opts.scopeId,
1053
+ // Remove procedural-reserved nodes from general set to avoid double-counting
1054
+ const generalInjected = injected.filter((s) => !proceduralIds.has(s.node.id));
1055
+
1056
+ // Backfill vacated general slots from the remaining pool so we always
1057
+ // return up to MAX_INJECTED general memories when eligible candidates exist.
1058
+ // Only skip backfill when LLM dedup genuinely ran — it intentionally rejected
1059
+ // items as duplicates/irrelevant. When dedupForTurn fell back to a plain
1060
+ // top-N slice (no provider, tool call failure), backfill is still appropriate.
1061
+ if (generalInjected.length < MAX_INJECTED && !llmDedupApplied) {
1062
+ const usedIds = new Set([
1063
+ ...generalInjected.map((s) => s.node.id),
1064
+ ...proceduralIds,
949
1065
  ]);
950
- } catch (err) {
951
- log.warn({ err }, "Qdrant search failed for context refresh");
952
- return { nodes: [], latencyMs: Date.now() - start };
1066
+ const backfillCandidates = pool.filter((s) => !usedIds.has(s.node.id));
1067
+ const needed = MAX_INJECTED - generalInjected.length;
1068
+ for (let i = 0; i < Math.min(needed, backfillCandidates.length); i++) {
1069
+ generalInjected.push(backfillCandidates[i]);
1070
+ }
953
1071
  }
954
1072
 
955
- // 3. Filter to nodes NOT already in context
956
- const newCandidates = candidates.filter(
957
- (c) => !opts.tracker.isInContext(c.nodeId),
958
- );
1073
+ const allDeterministic = [...generalInjected, ...proceduralInjected];
1074
+ const deterministicIds = new Set(allDeterministic.map((n) => n.node.id));
959
1075
 
960
- if (newCandidates.length === 0) {
961
- return { nodes: [], latencyMs: Date.now() - start };
962
- }
963
-
964
- // 4. Hydrate and score
965
- const nodes = getNodesByIds(newCandidates.map((c) => c.nodeId));
966
- const candidateScoreMap = new Map(
967
- newCandidates.map((c) => [c.nodeId, c.score]),
1076
+ // Reserve 1 serendipity slot from scored candidates not in the deterministic set
1077
+ const serendipityPool = scored.filter(
1078
+ (s) => s.score >= INJECTION_THRESHOLD && !deterministicIds.has(s.node.id),
968
1079
  );
969
-
970
- const scored: ScoredNode[] = [];
971
- for (const node of nodes) {
972
- if (node.fidelity === "gone") continue;
973
- if (isCapabilityNode(node)) continue;
974
-
975
- const semanticSim = candidateScoreMap.get(node.id) ?? 0;
976
- const effectiveSig = computeEffectiveSignificance(node, nowMs);
977
- const temporal = computeTemporalBoost(node, now);
978
- const recency = computeRecencyBoost(node, nowMs);
979
-
980
- scored.push(
981
- scoreCandidate(
982
- node,
983
- {
984
- semanticSimilarity: semanticSim,
985
- effectiveSignificance: effectiveSig,
986
- emotionalIntensity: node.emotionalCharge.intensity,
987
- temporalBoost: (temporal + 1) / 2,
988
- recencyBoost: recency,
989
- triggerBoost: 0,
990
- activationBoost: 0,
991
- },
992
- PER_TURN_WEIGHTS,
993
- ),
994
- );
995
- }
996
-
997
- // 5. Return top N — lower threshold than per-turn since this is a periodic refresh
998
- scored.sort((a, b) => b.score - a.score);
999
- const REFRESH_THRESHOLD = 0.15;
1000
- const refreshed = scored
1001
- .filter((s) => s.score >= REFRESH_THRESHOLD)
1002
- .slice(0, maxNodes);
1080
+ const serendipityPicks = sampleSerendipity(serendipityPool, 1);
1081
+ const allInjected = [...allDeterministic, ...serendipityPicks];
1082
+
1083
+ const TOP_N = 20;
1084
+ const topCandidates = scored.slice(0, TOP_N).map((s) => ({
1085
+ nodeId: s.node.id,
1086
+ type: s.node.type,
1087
+ score: s.score,
1088
+ semanticSimilarity: s.scoreBreakdown.semanticSimilarity,
1089
+ recencyBoost: s.scoreBreakdown.recencyBoost,
1090
+ }));
1003
1091
 
1004
1092
  return {
1005
- nodes: refreshed,
1093
+ nodes: allInjected,
1094
+ serendipityNodes: serendipityPicks,
1095
+ triggeredNodes: triggeredSemantic,
1006
1096
  latencyMs: Date.now() - start,
1097
+ metrics: {
1098
+ semanticHits: pureSemanticHits,
1099
+ mergedCount: scored.length,
1100
+ selectedCount: allInjected.length,
1101
+ tier1Count: 0,
1102
+ tier2Count: 0,
1103
+ hybridSearchLatencyMs,
1104
+ sparseVectorUsed: false,
1105
+ embeddingProvider,
1106
+ embeddingModel,
1107
+ queryContext: queryText || null,
1108
+ topCandidates,
1109
+ },
1007
1110
  };
1008
1111
  }