vellum 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (349) hide show
  1. package/README.md +15 -2
  2. package/bun.lock +5 -2
  3. package/package.json +4 -2
  4. package/scripts/capture-x-graphql.ts +562 -0
  5. package/scripts/ipc/check-swift-decoder-drift.ts +2 -1
  6. package/scripts/test.sh +5 -0
  7. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +133 -34
  8. package/src/__tests__/account-registry.test.ts +2 -1
  9. package/src/__tests__/agent-heartbeat-service.test.ts +250 -0
  10. package/src/__tests__/asset-materialize-tool.test.ts +16 -15
  11. package/src/__tests__/asset-search-tool.test.ts +23 -22
  12. package/src/__tests__/attachments-store.test.ts +56 -127
  13. package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +5 -4
  14. package/src/__tests__/browser-skill-endstate.test.ts +4 -3
  15. package/src/__tests__/call-bridge.test.ts +385 -0
  16. package/src/__tests__/call-constants.test.ts +40 -0
  17. package/src/__tests__/call-orchestrator.test.ts +130 -4
  18. package/src/__tests__/call-recovery.test.ts +518 -0
  19. package/src/__tests__/call-routes-http.test.ts +459 -0
  20. package/src/__tests__/call-state-machine.test.ts +143 -0
  21. package/src/__tests__/call-store.test.ts +216 -1
  22. package/src/__tests__/cli-discover.test.ts +1 -1
  23. package/src/__tests__/commit-message-enrichment-service.test.ts +148 -7
  24. package/src/__tests__/compaction.benchmark.test.ts +176 -0
  25. package/src/__tests__/computer-use-tools.test.ts +250 -0
  26. package/src/__tests__/config-schema.test.ts +299 -3
  27. package/src/__tests__/conflict-store.test.ts +2 -1
  28. package/src/__tests__/contacts-tools.test.ts +331 -0
  29. package/src/__tests__/conversation-store.test.ts +30 -32
  30. package/src/__tests__/credential-security-invariants.test.ts +4 -0
  31. package/src/__tests__/date-context.test.ts +373 -0
  32. package/src/__tests__/db-schedule-syntax-migration.test.ts +129 -0
  33. package/src/__tests__/fixtures/media-reuse-fixtures.ts +3 -3
  34. package/src/__tests__/followup-tools.test.ts +303 -0
  35. package/src/__tests__/handlers-twitter-config.test.ts +718 -0
  36. package/src/__tests__/intent-routing.test.ts +64 -57
  37. package/src/__tests__/ipc-roundtrip.benchmark.test.ts +237 -0
  38. package/src/__tests__/ipc-snapshot.test.ts +62 -28
  39. package/src/__tests__/llm-usage-store.test.ts +3 -8
  40. package/src/__tests__/media-generate-image.test.ts +1 -1
  41. package/src/__tests__/media-reuse-story.e2e.test.ts +7 -7
  42. package/src/__tests__/memory-retrieval.benchmark.test.ts +430 -0
  43. package/src/__tests__/parallel-tool.benchmark.test.ts +294 -0
  44. package/src/__tests__/playbook-tools.test.ts +342 -0
  45. package/src/__tests__/profile-compiler.test.ts +2 -1
  46. package/src/__tests__/provider-streaming.benchmark.test.ts +773 -0
  47. package/src/__tests__/recurrence-engine-rruleset.test.ts +78 -0
  48. package/src/__tests__/recurrence-engine.test.ts +69 -0
  49. package/src/__tests__/recurrence-types.test.ts +71 -0
  50. package/src/__tests__/registry.test.ts +5 -3
  51. package/src/__tests__/relay-server.test.ts +633 -0
  52. package/src/__tests__/reminder-store.test.ts +6 -3
  53. package/src/__tests__/reminder.test.ts +43 -77
  54. package/src/__tests__/run-orchestrator-assistant-events.test.ts +8 -4
  55. package/src/__tests__/run-orchestrator.test.ts +4 -4
  56. package/src/__tests__/runtime-attachment-metadata.test.ts +7 -6
  57. package/src/__tests__/runtime-runs-http.test.ts +4 -4
  58. package/src/__tests__/runtime-runs.test.ts +4 -4
  59. package/src/__tests__/schedule-store.test.ts +482 -0
  60. package/src/__tests__/schedule-tools.test.ts +700 -0
  61. package/src/__tests__/scheduler-recurrence.test.ts +329 -0
  62. package/src/__tests__/server-history-render.test.ts +14 -13
  63. package/src/__tests__/session-error.test.ts +28 -0
  64. package/src/__tests__/session-init.benchmark.test.ts +462 -0
  65. package/src/__tests__/session-queue.test.ts +71 -48
  66. package/src/__tests__/session-runtime-assembly.test.ts +161 -0
  67. package/src/__tests__/session-surfaces-task-progress.test.ts +104 -0
  68. package/src/__tests__/signup-e2e.test.ts +2 -1
  69. package/src/__tests__/skill-projection.benchmark.test.ts +328 -0
  70. package/src/__tests__/skill-script-runner.test.ts +159 -0
  71. package/src/__tests__/speaker-identification.test.ts +52 -0
  72. package/src/__tests__/subagent-manager-notify.test.ts +42 -10
  73. package/src/__tests__/subagent-tools.test.ts +141 -41
  74. package/src/__tests__/task-compiler.test.ts +2 -1
  75. package/src/__tests__/task-runner.test.ts +2 -1
  76. package/src/__tests__/task-scheduler.test.ts +2 -1
  77. package/src/__tests__/task-tools.test.ts +49 -56
  78. package/src/__tests__/tool-audit-listener.test.ts +1 -0
  79. package/src/__tests__/tool-domain-event-publisher.test.ts +2 -0
  80. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +500 -0
  81. package/src/__tests__/tool-executor.test.ts +13 -17
  82. package/src/__tests__/turn-commit.test.ts +218 -3
  83. package/src/__tests__/twilio-provider.test.ts +143 -0
  84. package/src/__tests__/twilio-routes.test.ts +789 -0
  85. package/src/__tests__/twitter-auth-handler.test.ts +581 -0
  86. package/src/__tests__/view-image-tool.test.ts +217 -0
  87. package/src/__tests__/workspace-git-service.test.ts +186 -0
  88. package/src/__tests__/workspace-heartbeat-service.test.ts +13 -3
  89. package/src/agent-heartbeat/agent-heartbeat-service.ts +155 -0
  90. package/src/bundler/app-bundler.ts +12 -8
  91. package/src/calls/call-bridge.ts +95 -0
  92. package/src/calls/call-constants.ts +43 -5
  93. package/src/calls/call-domain.ts +276 -0
  94. package/src/calls/call-orchestrator.ts +43 -17
  95. package/src/calls/call-recovery.ts +207 -0
  96. package/src/calls/call-state-machine.ts +68 -0
  97. package/src/calls/call-store.ts +192 -5
  98. package/src/calls/relay-server.ts +41 -4
  99. package/src/calls/speaker-identification.ts +213 -0
  100. package/src/calls/twilio-provider.ts +10 -6
  101. package/src/calls/twilio-routes.ts +90 -76
  102. package/src/calls/types.ts +1 -1
  103. package/src/cli/config-commands.ts +334 -0
  104. package/src/cli/core-commands.ts +776 -0
  105. package/src/cli/doordash.ts +251 -1
  106. package/src/cli/ipc-client.ts +82 -0
  107. package/src/cli/map.ts +246 -0
  108. package/src/cli/twitter.ts +575 -0
  109. package/src/cli.ts +7 -5
  110. package/src/commands/__tests__/cc-command-registry.test.ts +319 -0
  111. package/src/commands/cc-command-registry.ts +209 -0
  112. package/src/config/bundled-skills/contacts/SKILL.md +39 -0
  113. package/src/config/bundled-skills/contacts/TOOLS.json +122 -0
  114. package/src/config/bundled-skills/contacts/tools/contact-merge.ts +9 -0
  115. package/src/config/bundled-skills/contacts/tools/contact-search.ts +9 -0
  116. package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +9 -0
  117. package/src/config/bundled-skills/document/SKILL.md +18 -0
  118. package/src/config/bundled-skills/document/TOOLS.json +53 -0
  119. package/src/config/bundled-skills/document/tools/document-create.ts +9 -0
  120. package/src/config/bundled-skills/document/tools/document-update.ts +9 -0
  121. package/src/config/bundled-skills/doordash/SKILL.md +82 -23
  122. package/src/config/bundled-skills/followups/SKILL.md +32 -0
  123. package/src/config/bundled-skills/followups/TOOLS.json +100 -0
  124. package/src/config/bundled-skills/followups/tools/followup-create.ts +9 -0
  125. package/src/config/bundled-skills/followups/tools/followup-list.ts +9 -0
  126. package/src/config/bundled-skills/followups/tools/followup-resolve.ts +9 -0
  127. package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +1 -23
  128. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +2 -1
  129. package/src/config/bundled-skills/playbooks/SKILL.md +31 -0
  130. package/src/config/bundled-skills/playbooks/TOOLS.json +126 -0
  131. package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +9 -0
  132. package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +9 -0
  133. package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +9 -0
  134. package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +9 -0
  135. package/src/config/bundled-skills/reminder/SKILL.md +20 -0
  136. package/src/config/bundled-skills/reminder/TOOLS.json +67 -0
  137. package/src/config/bundled-skills/reminder/tools/reminder-cancel.ts +9 -0
  138. package/src/config/bundled-skills/reminder/tools/reminder-create.ts +9 -0
  139. package/src/config/bundled-skills/reminder/tools/reminder-list.ts +9 -0
  140. package/src/config/bundled-skills/schedule/SKILL.md +74 -0
  141. package/src/config/bundled-skills/schedule/TOOLS.json +135 -0
  142. package/src/config/bundled-skills/schedule/tools/schedule-create.ts +9 -0
  143. package/src/config/bundled-skills/schedule/tools/schedule-delete.ts +9 -0
  144. package/src/config/bundled-skills/schedule/tools/schedule-list.ts +9 -0
  145. package/src/config/bundled-skills/schedule/tools/schedule-update.ts +9 -0
  146. package/src/config/bundled-skills/subagent/SKILL.md +25 -0
  147. package/src/config/bundled-skills/subagent/TOOLS.json +107 -0
  148. package/src/config/bundled-skills/subagent/tools/subagent-abort.ts +9 -0
  149. package/src/config/bundled-skills/subagent/tools/subagent-message.ts +9 -0
  150. package/src/config/bundled-skills/subagent/tools/subagent-read.ts +9 -0
  151. package/src/config/bundled-skills/subagent/tools/subagent-spawn.ts +9 -0
  152. package/src/config/bundled-skills/subagent/tools/subagent-status.ts +9 -0
  153. package/src/config/bundled-skills/tasks/SKILL.md +28 -0
  154. package/src/config/bundled-skills/tasks/TOOLS.json +256 -0
  155. package/src/config/bundled-skills/tasks/tools/task-delete.ts +9 -0
  156. package/src/config/bundled-skills/tasks/tools/task-list-add.ts +9 -0
  157. package/src/config/bundled-skills/tasks/tools/task-list-remove.ts +9 -0
  158. package/src/config/bundled-skills/tasks/tools/task-list-show.ts +9 -0
  159. package/src/config/bundled-skills/tasks/tools/task-list-update.ts +9 -0
  160. package/src/config/bundled-skills/tasks/tools/task-list.ts +9 -0
  161. package/src/config/bundled-skills/tasks/tools/task-run.ts +9 -0
  162. package/src/config/bundled-skills/tasks/tools/task-save.ts +9 -0
  163. package/src/config/bundled-skills/twitter/SKILL.md +134 -0
  164. package/src/config/bundled-skills/watcher/SKILL.md +27 -0
  165. package/src/config/bundled-skills/watcher/TOOLS.json +147 -0
  166. package/src/config/bundled-skills/watcher/tools/watcher-create.ts +9 -0
  167. package/src/config/bundled-skills/watcher/tools/watcher-delete.ts +9 -0
  168. package/src/config/bundled-skills/watcher/tools/watcher-digest.ts +9 -0
  169. package/src/config/bundled-skills/watcher/tools/watcher-list.ts +9 -0
  170. package/src/config/bundled-skills/watcher/tools/watcher-update.ts +9 -0
  171. package/src/config/defaults.ts +33 -0
  172. package/src/config/loader.ts +4 -1
  173. package/src/config/schema.ts +161 -1
  174. package/src/config/system-prompt.ts +61 -16
  175. package/src/config/templates/IDENTITY.md +7 -0
  176. package/src/config/types.ts +4 -0
  177. package/src/contacts/contact-store.ts +4 -4
  178. package/src/daemon/assistant-attachments.ts +10 -0
  179. package/src/daemon/classifier.ts +3 -1
  180. package/src/daemon/computer-use-session.ts +3 -1
  181. package/src/daemon/date-context.ts +136 -0
  182. package/src/daemon/handlers/apps.ts +16 -1
  183. package/src/daemon/handlers/browser.ts +54 -0
  184. package/src/daemon/handlers/computer-use.ts +7 -1
  185. package/src/daemon/handlers/config.ts +163 -5
  186. package/src/daemon/handlers/diagnostics.ts +5 -1
  187. package/src/daemon/handlers/documents.ts +18 -29
  188. package/src/daemon/handlers/home-base.ts +5 -1
  189. package/src/daemon/handlers/index.ts +40 -277
  190. package/src/daemon/handlers/misc.ts +9 -1
  191. package/src/daemon/handlers/publish.ts +6 -1
  192. package/src/daemon/handlers/sessions.ts +65 -12
  193. package/src/daemon/handlers/shared.ts +36 -1
  194. package/src/daemon/handlers/signing.ts +37 -0
  195. package/src/daemon/handlers/skills.ts +20 -6
  196. package/src/daemon/handlers/subagents.ts +8 -3
  197. package/src/daemon/handlers/twitter-auth.ts +169 -0
  198. package/src/daemon/handlers/work-items.ts +384 -68
  199. package/src/daemon/ipc-contract-inventory.json +28 -4
  200. package/src/daemon/ipc-contract.ts +133 -37
  201. package/src/daemon/ipc-protocol.ts +7 -2
  202. package/src/daemon/lifecycle.ts +21 -0
  203. package/src/daemon/main.ts +10 -4
  204. package/src/daemon/ride-shotgun-handler.ts +74 -10
  205. package/src/daemon/server.ts +143 -26
  206. package/src/daemon/session-agent-loop.ts +887 -0
  207. package/src/daemon/session-attachments.ts +28 -5
  208. package/src/daemon/session-error.ts +24 -3
  209. package/src/daemon/session-lifecycle.ts +147 -0
  210. package/src/daemon/session-media-retry.ts +147 -0
  211. package/src/daemon/session-messaging.ts +145 -0
  212. package/src/daemon/session-notifiers.ts +164 -0
  213. package/src/daemon/session-process.ts +2 -2
  214. package/src/daemon/session-queue-manager.ts +1 -0
  215. package/src/daemon/session-runtime-assembly.ts +52 -0
  216. package/src/daemon/session-skill-tools.ts +124 -5
  217. package/src/daemon/session-slash.ts +3 -0
  218. package/src/daemon/session-surfaces.ts +77 -2
  219. package/src/daemon/session-tool-setup.ts +216 -2
  220. package/src/daemon/session-usage.ts +0 -2
  221. package/src/daemon/session.ts +114 -1404
  222. package/src/daemon/video-thumbnail.ts +60 -0
  223. package/src/doordash/client.ts +121 -27
  224. package/src/doordash/queries.ts +1 -2
  225. package/src/export/formatter.ts +3 -1
  226. package/src/followups/followup-store.ts +4 -2
  227. package/src/followups/types.ts +6 -0
  228. package/src/hooks/templates.ts +1 -1
  229. package/src/index.ts +32 -1153
  230. package/src/memory/attachments-store.ts +28 -83
  231. package/src/memory/channel-delivery-store.ts +7 -21
  232. package/src/memory/clarification-resolver.ts +6 -5
  233. package/src/memory/contradiction-checker.ts +3 -2
  234. package/src/memory/conversation-key-store.ts +10 -29
  235. package/src/memory/conversation-store.ts +2 -1
  236. package/src/memory/db.ts +96 -2
  237. package/src/memory/entity-extractor.ts +6 -3
  238. package/src/memory/items-extractor.ts +5 -4
  239. package/src/memory/jobs-store.ts +3 -2
  240. package/src/memory/llm-usage-store.ts +1 -2
  241. package/src/memory/runs-store.ts +1 -2
  242. package/src/memory/schema.ts +23 -2
  243. package/src/messaging/style-analyzer.ts +3 -2
  244. package/src/messaging/thread-summarizer.ts +8 -12
  245. package/src/messaging/triage-engine.ts +4 -2
  246. package/src/providers/openrouter/client.ts +20 -0
  247. package/src/providers/registry.ts +8 -0
  248. package/src/runtime/http-server.ts +108 -20
  249. package/src/runtime/routes/attachment-routes.ts +2 -3
  250. package/src/runtime/routes/call-routes.ts +140 -0
  251. package/src/runtime/routes/channel-routes.ts +5 -10
  252. package/src/runtime/routes/conversation-routes.ts +5 -5
  253. package/src/runtime/routes/run-routes.ts +2 -2
  254. package/src/runtime/run-orchestrator.ts +9 -3
  255. package/src/schedule/recurrence-engine.ts +138 -0
  256. package/src/schedule/recurrence-types.ts +67 -0
  257. package/src/schedule/schedule-store.ts +102 -57
  258. package/src/schedule/scheduler.ts +9 -6
  259. package/src/security/oauth2.ts +29 -4
  260. package/src/security/secret-allowlist.ts +46 -0
  261. package/src/skills/clawhub.ts +1 -1
  262. package/src/subagent/manager.ts +40 -8
  263. package/src/swarm/backend-claude-code.ts +64 -9
  264. package/src/swarm/worker-prompts.ts +2 -1
  265. package/src/tasks/SPEC.md +34 -28
  266. package/src/tasks/ephemeral-permissions.ts +16 -7
  267. package/src/tasks/task-compiler.ts +5 -4
  268. package/src/tasks/task-runner.ts +10 -5
  269. package/src/tasks/task-scheduler.ts +1 -1
  270. package/src/tasks/tool-sanitizer.ts +36 -0
  271. package/src/tools/assets/search.ts +4 -4
  272. package/src/tools/browser/api-map.ts +220 -0
  273. package/src/tools/browser/auto-navigate.ts +270 -0
  274. package/src/tools/browser/browser-execution.ts +2 -1
  275. package/src/tools/browser/browser-manager.ts +2 -2
  276. package/src/tools/browser/network-recorder.ts +5 -4
  277. package/src/tools/browser/x-auto-navigate.ts +207 -0
  278. package/src/tools/calls/call-end.ts +17 -67
  279. package/src/tools/calls/call-start.ts +24 -85
  280. package/src/tools/calls/call-status.ts +35 -51
  281. package/src/tools/claude-code/claude-code.ts +77 -11
  282. package/src/tools/contacts/contact-merge.ts +46 -78
  283. package/src/tools/contacts/contact-search.ts +35 -79
  284. package/src/tools/contacts/contact-upsert.ts +35 -108
  285. package/src/tools/credentials/vault.ts +20 -4
  286. package/src/tools/document/document-tool.ts +71 -144
  287. package/src/tools/executor.ts +129 -10
  288. package/src/tools/followups/followup_create.ts +46 -88
  289. package/src/tools/followups/followup_list.ts +34 -74
  290. package/src/tools/followups/followup_resolve.ts +31 -66
  291. package/src/tools/host-terminal/cli-discover.ts +2 -1
  292. package/src/tools/host-terminal/host-shell.ts +10 -0
  293. package/src/tools/memory/handlers.ts +5 -4
  294. package/src/tools/network/__tests__/web-search.test.ts +427 -0
  295. package/src/tools/network/script-proxy/__tests__/logging.test.ts +248 -0
  296. package/src/tools/network/script-proxy/__tests__/policy.test.ts +234 -0
  297. package/src/tools/network/script-proxy/__tests__/router.test.ts +76 -0
  298. package/src/tools/network/web-fetch.ts +18 -6
  299. package/src/tools/playbooks/index.ts +4 -5
  300. package/src/tools/playbooks/playbook-create.ts +3 -47
  301. package/src/tools/playbooks/playbook-delete.ts +1 -25
  302. package/src/tools/playbooks/playbook-list.ts +1 -28
  303. package/src/tools/playbooks/playbook-update.ts +3 -51
  304. package/src/tools/reminder/reminder.ts +5 -78
  305. package/src/tools/schedule/create.ts +69 -74
  306. package/src/tools/schedule/delete.ts +21 -47
  307. package/src/tools/schedule/list.ts +55 -74
  308. package/src/tools/schedule/update.ts +77 -84
  309. package/src/tools/subagent/abort.ts +29 -58
  310. package/src/tools/subagent/message.ts +30 -63
  311. package/src/tools/subagent/read.ts +53 -84
  312. package/src/tools/subagent/spawn.ts +43 -82
  313. package/src/tools/subagent/status.ts +42 -71
  314. package/src/tools/swarm/delegate.ts +2 -1
  315. package/src/tools/tasks/index.ts +8 -8
  316. package/src/tools/tasks/task-delete.ts +60 -88
  317. package/src/tools/tasks/task-list.ts +31 -52
  318. package/src/tools/tasks/task-run.ts +72 -108
  319. package/src/tools/tasks/task-save.ts +33 -65
  320. package/src/tools/tasks/work-item-enqueue.ts +183 -215
  321. package/src/tools/tasks/work-item-list.ts +33 -63
  322. package/src/tools/tasks/work-item-remove.ts +45 -97
  323. package/src/tools/tasks/work-item-update.ts +91 -163
  324. package/src/tools/terminal/backends/native.ts +3 -1
  325. package/src/tools/tool-manifest.ts +0 -62
  326. package/src/tools/types.ts +6 -0
  327. package/src/tools/ui-surface/definitions.ts +3 -1
  328. package/src/tools/watch/screen-watch.ts +3 -1
  329. package/src/tools/watcher/create.ts +52 -98
  330. package/src/tools/watcher/delete.ts +20 -46
  331. package/src/tools/watcher/digest.ts +36 -70
  332. package/src/tools/watcher/list.ts +49 -79
  333. package/src/tools/watcher/update.ts +45 -91
  334. package/src/twitter/client.ts +690 -0
  335. package/src/twitter/session.ts +91 -0
  336. package/src/usage/types.ts +0 -1
  337. package/src/util/truncate.ts +6 -0
  338. package/src/watcher/providers/slack.ts +2 -1
  339. package/src/watcher/watcher-store.ts +3 -2
  340. package/src/work-items/work-item-store.ts +27 -2
  341. package/src/workspace/commit-message-enrichment-service.ts +31 -7
  342. package/src/workspace/git-service.ts +87 -22
  343. package/src/workspace/provider-commit-message-generator.ts +242 -0
  344. package/src/workspace/turn-commit.ts +62 -3
  345. package/src/tools/contacts/index.ts +0 -4
  346. package/src/tools/document/index.ts +0 -5
  347. package/src/tools/followups/index.ts +0 -3
  348. package/src/tools/subagent/index.ts +0 -5
  349. /package/src/__tests__/{memory-context-benchmark.test.ts → memory-context-benchmark.benchmark.test.ts} +0 -0
@@ -232,7 +232,7 @@ describe('image-studio TOOLS.json manifest', () => {
232
232
  const props = manifest.tools[0].input_schema.properties;
233
233
  expect(props.mode.enum).toEqual(['generate', 'edit']);
234
234
  expect(props.attachment_ids.type).toBe('array');
235
- expect(props.model.enum).toEqual(['gemini-2.5-flash-image', 'gemini-3-pro-image']);
235
+ expect(props.model.enum).toEqual(['gemini-2.5-flash-image', 'gemini-3-pro-image', 'gemini-3-pro-image-preview']);
236
236
  expect(props.variants.type).toBe('number');
237
237
  });
238
238
  });
@@ -96,7 +96,7 @@ mock.module('../tools/network/script-proxy/certs.js', () => ({
96
96
  // Source imports (after mocks)
97
97
  // ---------------------------------------------------------------------------
98
98
 
99
- import { initializeDb, getDb } from '../memory/db.js';
99
+ import { initializeDb, getDb, resetDb } from '../memory/db.js';
100
100
  import { uploadAttachment, linkAttachmentToMessage } from '../memory/attachments-store.js';
101
101
  import { createConversation, addMessage } from '../memory/conversation-store.js';
102
102
  import { assetSearchTool, searchAttachments } from '../tools/assets/search.js';
@@ -114,6 +114,7 @@ initializeDb();
114
114
  mkdirSync(sandboxDir, { recursive: true });
115
115
 
116
116
  afterAll(async () => {
117
+ resetDb();
117
118
  await stopAllSessions();
118
119
  resolveByIdResults = new Map();
119
120
  secureKeyValues = new Map();
@@ -211,7 +212,7 @@ describe('Story E2E: selfie yesterday -> generated image today', () => {
211
212
 
212
213
  // -- Step 2: Selfie uploaded in Thread A (standard) --
213
214
  threadA = createConversation({ title: 'Thread A — selfie upload' });
214
- selfieAttachment = uploadAttachment('asst-story-01', 'selfie.png', 'image/png', TINY_PNG_BASE64);
215
+ selfieAttachment = uploadAttachment('selfie.png', 'image/png', TINY_PNG_BASE64);
215
216
  selfieId = selfieAttachment.id;
216
217
 
217
218
  const msgA = addMessage(threadA.id, 'user', 'Here is my selfie from yesterday');
@@ -358,7 +359,6 @@ describe('Story E2E: selfie yesterday -> generated image today', () => {
358
359
  // in the attachment store (same hash = returns existing row).
359
360
  const generatedImageBase64 = Buffer.from('generated-portrait-data-unique').toString('base64');
360
361
  const outputAttachment = uploadAttachment(
361
- 'asst-story-01',
362
362
  'generated-portrait.png',
363
363
  'image/png',
364
364
  generatedImageBase64,
@@ -473,7 +473,7 @@ describe('Private-thread variant: cross-thread media blocking', () => {
473
473
  test('selfie in private thread A is NOT discoverable via search from Thread B', async () => {
474
474
  // Upload selfie in a private thread
475
475
  const privateThread = createConversation({ title: 'Private selfie thread', threadType: 'private' });
476
- const selfie = uploadAttachment('asst-priv', 'private-selfie.png', 'image/png', TINY_PNG_BASE64);
476
+ const selfie = uploadAttachment('private-selfie.png', 'image/png', TINY_PNG_BASE64);
477
477
  const msg = addMessage(privateThread.id, 'user', 'My private selfie');
478
478
  linkAttachmentToMessage(msg.id, selfie.id, 0);
479
479
 
@@ -497,7 +497,7 @@ describe('Private-thread variant: cross-thread media blocking', () => {
497
497
  test('selfie in private thread A is NOT materializable from Thread B', async () => {
498
498
  const privateThread = createConversation({ title: 'Private selfie thread', threadType: 'private' });
499
499
  const base64 = Buffer.from('private image data').toString('base64');
500
- const selfie = uploadAttachment('asst-priv', 'private-selfie.png', 'image/png', base64);
500
+ const selfie = uploadAttachment('private-selfie.png', 'image/png', base64);
501
501
  const msg = addMessage(privateThread.id, 'user', 'My private selfie');
502
502
  linkAttachmentToMessage(msg.id, selfie.id, 0);
503
503
 
@@ -521,7 +521,7 @@ describe('Private-thread variant: cross-thread media blocking', () => {
521
521
 
522
522
  test('selfie in private thread IS accessible from the same private thread', async () => {
523
523
  const privateThread = createConversation({ title: 'Private selfie thread', threadType: 'private' });
524
- const selfie = uploadAttachment('asst-priv', 'private-selfie.png', 'image/png', TINY_PNG_BASE64);
524
+ const selfie = uploadAttachment('private-selfie.png', 'image/png', TINY_PNG_BASE64);
525
525
  const msg = addMessage(privateThread.id, 'user', 'My private selfie');
526
526
  linkAttachmentToMessage(msg.id, selfie.id, 0);
527
527
 
@@ -550,7 +550,7 @@ describe('Private-thread variant: cross-thread media blocking', () => {
550
550
 
551
551
  test('selfie in private thread A is NOT accessible from private thread B', async () => {
552
552
  const privateThreadA = createConversation({ title: 'Private thread A', threadType: 'private' });
553
- const selfie = uploadAttachment('asst-priv', 'thread-a-selfie.png', 'image/png', TINY_PNG_BASE64);
553
+ const selfie = uploadAttachment('thread-a-selfie.png', 'image/png', TINY_PNG_BASE64);
554
554
  const msgA = addMessage(privateThreadA.id, 'user', 'Selfie in thread A');
555
555
  linkAttachmentToMessage(msgA.id, selfie.id, 0);
556
556
 
@@ -0,0 +1,430 @@
1
+ /**
2
+ * Memory Retrieval Benchmark
3
+ *
4
+ * Measures end-to-end memory recall time with varying database sizes.
5
+ * Validates latency stays within acceptable bounds and token budget
6
+ * enforcement works correctly.
7
+ */
8
+ import { afterAll, beforeAll, beforeEach, describe, expect, mock, test } from 'bun:test';
9
+ import { mkdtempSync, rmSync } from 'node:fs';
10
+ import { tmpdir } from 'node:os';
11
+ import { join } from 'node:path';
12
+
13
+ const testDir = mkdtempSync(join(tmpdir(), 'mem-retrieval-bench-'));
14
+
15
+ mock.module('../util/platform.js', () => ({
16
+ getDataDir: () => testDir,
17
+ isMacOS: () => process.platform === 'darwin',
18
+ isLinux: () => process.platform === 'linux',
19
+ isWindows: () => process.platform === 'win32',
20
+ getSocketPath: () => join(testDir, 'test.sock'),
21
+ getPidPath: () => join(testDir, 'test.pid'),
22
+ getDbPath: () => join(testDir, 'test.db'),
23
+ getLogPath: () => join(testDir, 'test.log'),
24
+ ensureDataDir: () => {},
25
+ }));
26
+
27
+ mock.module('../util/logger.js', () => ({
28
+ getLogger: () => new Proxy({} as Record<string, unknown>, {
29
+ get: () => () => {},
30
+ }),
31
+ }));
32
+
33
+ // Simulated network delay for semantic search (ms). When > 0, the mock
34
+ // semantic search sleeps for this duration before returning, simulating the
35
+ // Qdrant network round-trip that early termination is designed to skip.
36
+ let semanticSearchDelayMs = 0;
37
+
38
+ mock.module('../memory/search/semantic.js', () => ({
39
+ semanticSearch: async () => {
40
+ if (semanticSearchDelayMs > 0) {
41
+ await Bun.sleep(semanticSearchDelayMs);
42
+ }
43
+ return [];
44
+ },
45
+ isQdrantConnectionError: () => false,
46
+ }));
47
+
48
+ mock.module('../memory/embedding-backend.js', () => ({
49
+ getMemoryBackendStatus: (config: { memory: { enabled: boolean } }) => ({
50
+ enabled: config.memory.enabled,
51
+ degraded: false,
52
+ provider: 'local',
53
+ model: 'mock-embedding',
54
+ reason: null,
55
+ }),
56
+ embedWithBackend: async () => ({
57
+ provider: 'local' as const,
58
+ model: 'mock-embedding',
59
+ vectors: [new Array(1536).fill(0)],
60
+ }),
61
+ }));
62
+
63
+ import { DEFAULT_CONFIG } from '../config/defaults.js';
64
+ import { getDb, initializeDb, resetDb } from '../memory/db.js';
65
+ import { buildMemoryRecall } from '../memory/retriever.js';
66
+ import { conversations, memorySegments, messages } from '../memory/schema.js';
67
+ import type { AssistantConfig } from '../config/types.js';
68
+
69
+ function seedMemoryItems(conversationId: string, count: number, now: number): void {
70
+ const db = getDb();
71
+ db.insert(conversations).values({
72
+ id: conversationId,
73
+ title: null,
74
+ createdAt: now,
75
+ updatedAt: now,
76
+ totalInputTokens: 0,
77
+ totalOutputTokens: 0,
78
+ totalEstimatedCost: 0,
79
+ contextSummary: null,
80
+ contextCompactedMessageCount: 0,
81
+ contextCompactedAt: null,
82
+ }).run();
83
+
84
+ for (let i = 0; i < count; i++) {
85
+ const msgId = `msg-${conversationId}-${i}`;
86
+ const text = `Memory item ${i}: information about topic-${i % 20} including keyword-${i % 10} details.`;
87
+ db.insert(messages).values({
88
+ id: msgId,
89
+ conversationId,
90
+ role: i % 2 === 0 ? 'user' : 'assistant',
91
+ content: JSON.stringify([{ type: 'text', text }]),
92
+ createdAt: now + i,
93
+ }).run();
94
+ db.insert(memorySegments).values({
95
+ id: `seg-${conversationId}-${i}`,
96
+ messageId: msgId,
97
+ conversationId,
98
+ role: i % 2 === 0 ? 'user' : 'assistant',
99
+ segmentIndex: 0,
100
+ text,
101
+ tokenEstimate: 20,
102
+ scopeId: 'default',
103
+ createdAt: now + i,
104
+ updatedAt: now + i,
105
+ }).run();
106
+ }
107
+ }
108
+
109
+ function makeConfig(overrides?: { maxInjectTokens?: number }): AssistantConfig {
110
+ return {
111
+ ...DEFAULT_CONFIG,
112
+ memory: {
113
+ ...DEFAULT_CONFIG.memory,
114
+ embeddings: {
115
+ ...DEFAULT_CONFIG.memory.embeddings,
116
+ provider: 'local' as const,
117
+ required: false,
118
+ },
119
+ retrieval: {
120
+ ...DEFAULT_CONFIG.memory.retrieval,
121
+ lexicalTopK: 50,
122
+ semanticTopK: 20,
123
+ maxInjectTokens: overrides?.maxInjectTokens ?? 750,
124
+ reranking: { ...DEFAULT_CONFIG.memory.retrieval.reranking, enabled: false },
125
+ dynamicBudget: {
126
+ enabled: false,
127
+ minInjectTokens: 160,
128
+ maxInjectTokens: overrides?.maxInjectTokens ?? 750,
129
+ targetHeadroomTokens: 900,
130
+ },
131
+ },
132
+ },
133
+ };
134
+ }
135
+
136
+ describe('Memory retrieval benchmark', () => {
137
+ beforeAll(() => {
138
+ initializeDb();
139
+ });
140
+
141
+ beforeEach(() => {
142
+ const db = getDb();
143
+ db.run('DELETE FROM memory_item_sources');
144
+ db.run('DELETE FROM memory_item_entities');
145
+ db.run('DELETE FROM memory_entity_relations');
146
+ db.run('DELETE FROM memory_entities');
147
+ db.run('DELETE FROM memory_embeddings');
148
+ db.run('DELETE FROM memory_summaries');
149
+ db.run('DELETE FROM memory_items');
150
+ db.run('DELETE FROM memory_segment_fts');
151
+ db.run('DELETE FROM memory_segments');
152
+ db.run('DELETE FROM messages');
153
+ db.run('DELETE FROM conversations');
154
+ db.run('DELETE FROM memory_jobs');
155
+ db.run('DELETE FROM memory_checkpoints');
156
+ });
157
+
158
+ afterAll(() => {
159
+ resetDb();
160
+ try {
161
+ rmSync(testDir, { recursive: true });
162
+ } catch {
163
+ // best effort cleanup
164
+ }
165
+ });
166
+
167
+ test('retrieval completes under 500ms for 100 items', async () => {
168
+ const conversationId = 'conv-bench-100';
169
+ const now = 1_700_500_000_000;
170
+ seedMemoryItems(conversationId, 100, now);
171
+
172
+ const config = makeConfig();
173
+ const recall = await buildMemoryRecall(
174
+ 'What do we know about topic-5 and keyword-3?',
175
+ conversationId,
176
+ config,
177
+ );
178
+
179
+ expect(recall.enabled).toBe(true);
180
+ expect(recall.degraded).toBe(false);
181
+ expect(recall.lexicalHits).toBeGreaterThan(0);
182
+ expect(recall.selectedCount).toBeGreaterThan(0);
183
+ // Relaxed threshold — guards against severe regressions, not precise benchmarking
184
+ expect(recall.latencyMs).toBeLessThan(500);
185
+ });
186
+
187
+ test('retrieval completes under 1000ms for 500 items', async () => {
188
+ const conversationId = 'conv-bench-500';
189
+ const now = 1_700_500_000_000;
190
+ seedMemoryItems(conversationId, 500, now);
191
+
192
+ const config = makeConfig();
193
+ const recall = await buildMemoryRecall(
194
+ 'What do we know about topic-5 and keyword-3?',
195
+ conversationId,
196
+ config,
197
+ );
198
+
199
+ expect(recall.enabled).toBe(true);
200
+ expect(recall.degraded).toBe(false);
201
+ expect(recall.lexicalHits).toBeGreaterThan(0);
202
+ expect(recall.selectedCount).toBeGreaterThan(0);
203
+ expect(recall.latencyMs).toBeLessThan(1000);
204
+ });
205
+
206
+ test('retrieval completes under 2000ms for 2000 items', async () => {
207
+ const conversationId = 'conv-bench-2000';
208
+ const now = 1_700_500_000_000;
209
+ seedMemoryItems(conversationId, 2000, now);
210
+
211
+ const config = makeConfig();
212
+ const recall = await buildMemoryRecall(
213
+ 'What do we know about topic-5 and keyword-3?',
214
+ conversationId,
215
+ config,
216
+ );
217
+
218
+ expect(recall.enabled).toBe(true);
219
+ expect(recall.degraded).toBe(false);
220
+ expect(recall.lexicalHits).toBeGreaterThan(0);
221
+ expect(recall.selectedCount).toBeGreaterThan(0);
222
+ expect(recall.latencyMs).toBeLessThan(2000);
223
+ });
224
+
225
+ test('token budget enforcement: maxInjectTokens is respected', async () => {
226
+ const conversationId = 'conv-bench-budget';
227
+ const now = 1_700_500_000_000;
228
+ seedMemoryItems(conversationId, 500, now);
229
+
230
+ const smallBudget = 200;
231
+ const config = makeConfig({ maxInjectTokens: smallBudget });
232
+ const recall = await buildMemoryRecall(
233
+ 'What do we know about topic-5 and keyword-3?',
234
+ conversationId,
235
+ config,
236
+ );
237
+
238
+ expect(recall.enabled).toBe(true);
239
+ expect(recall.injectedTokens).toBeLessThanOrEqual(smallBudget);
240
+ expect(recall.injectedTokens).toBeGreaterThan(0);
241
+
242
+ // Compare against a larger budget to verify the cap actually constrains
243
+ const largeBudget = 2000;
244
+ const largeConfig = makeConfig({ maxInjectTokens: largeBudget });
245
+ const largeRecall = await buildMemoryRecall(
246
+ 'What do we know about topic-5 and keyword-3?',
247
+ conversationId,
248
+ largeConfig,
249
+ );
250
+
251
+ expect(largeRecall.injectedTokens).toBeLessThanOrEqual(largeBudget);
252
+ // With more budget, we should get at least as many tokens
253
+ expect(largeRecall.injectedTokens).toBeGreaterThanOrEqual(recall.injectedTokens);
254
+ });
255
+
256
+ test('early termination reduces latency when applicable', async () => {
257
+ const conversationId = 'conv-bench-et';
258
+ const now = 1_700_500_000_000;
259
+ // Seed enough items that early termination can trigger
260
+ seedMemoryItems(conversationId, 500, now);
261
+
262
+ // Config with early termination enabled and low thresholds to trigger it
263
+ const etConfig: AssistantConfig = {
264
+ ...DEFAULT_CONFIG,
265
+ memory: {
266
+ ...DEFAULT_CONFIG.memory,
267
+ embeddings: {
268
+ ...DEFAULT_CONFIG.memory.embeddings,
269
+ provider: 'local' as const,
270
+ required: false,
271
+ },
272
+ retrieval: {
273
+ ...DEFAULT_CONFIG.memory.retrieval,
274
+ lexicalTopK: 50,
275
+ semanticTopK: 20,
276
+ maxInjectTokens: 750,
277
+ reranking: { ...DEFAULT_CONFIG.memory.retrieval.reranking, enabled: false },
278
+ dynamicBudget: {
279
+ enabled: false,
280
+ minInjectTokens: 160,
281
+ maxInjectTokens: 750,
282
+ targetHeadroomTokens: 900,
283
+ },
284
+ earlyTermination: {
285
+ enabled: true,
286
+ minCandidates: 5,
287
+ minHighConfidence: 3,
288
+ confidenceThreshold: 0.3,
289
+ },
290
+ },
291
+ },
292
+ };
293
+
294
+ const recall = await buildMemoryRecall(
295
+ 'What do we know about topic-5 and keyword-3?',
296
+ conversationId,
297
+ etConfig,
298
+ );
299
+
300
+ expect(recall.enabled).toBe(true);
301
+ expect(recall.earlyTerminated).toBe(true);
302
+ // Semantic search should be skipped when early termination fires
303
+ expect(recall.semanticHits).toBe(0);
304
+ expect(recall.selectedCount).toBeGreaterThan(0);
305
+ });
306
+
307
+ test('early termination is measurably faster than baseline', async () => {
308
+ const conversationId = 'conv-bench-et-delta';
309
+ const now = 1_700_500_000_000;
310
+ seedMemoryItems(conversationId, 500, now);
311
+
312
+ // Simulate the Qdrant network round-trip that ET is designed to skip.
313
+ // Use 100ms to dominate over variable CPU-bound work on slower hosts.
314
+ semanticSearchDelayMs = 100;
315
+
316
+ const query = 'What do we know about topic-5 and keyword-3?';
317
+
318
+ const etConfig: AssistantConfig = {
319
+ ...DEFAULT_CONFIG,
320
+ memory: {
321
+ ...DEFAULT_CONFIG.memory,
322
+ embeddings: {
323
+ ...DEFAULT_CONFIG.memory.embeddings,
324
+ provider: 'local' as const,
325
+ required: false,
326
+ },
327
+ retrieval: {
328
+ ...DEFAULT_CONFIG.memory.retrieval,
329
+ lexicalTopK: 50,
330
+ semanticTopK: 20,
331
+ maxInjectTokens: 750,
332
+ reranking: { ...DEFAULT_CONFIG.memory.retrieval.reranking, enabled: false },
333
+ dynamicBudget: {
334
+ enabled: false,
335
+ minInjectTokens: 160,
336
+ maxInjectTokens: 750,
337
+ targetHeadroomTokens: 900,
338
+ },
339
+ earlyTermination: {
340
+ enabled: true,
341
+ minCandidates: 5,
342
+ minHighConfidence: 3,
343
+ confidenceThreshold: 0.3,
344
+ },
345
+ },
346
+ },
347
+ };
348
+
349
+ const noEtConfig: AssistantConfig = {
350
+ ...etConfig,
351
+ memory: {
352
+ ...etConfig.memory,
353
+ retrieval: {
354
+ ...etConfig.memory.retrieval,
355
+ earlyTermination: {
356
+ enabled: false,
357
+ minCandidates: 5,
358
+ minHighConfidence: 3,
359
+ confidenceThreshold: 0.3,
360
+ },
361
+ },
362
+ },
363
+ };
364
+
365
+ try {
366
+ // Warm up to avoid cold-start bias
367
+ await buildMemoryRecall(query, conversationId, etConfig);
368
+ await buildMemoryRecall(query, conversationId, noEtConfig);
369
+
370
+ const iterations = 5;
371
+ const etTimes: number[] = [];
372
+ const baselineTimes: number[] = [];
373
+
374
+ for (let i = 0; i < iterations; i++) {
375
+ const t0 = performance.now();
376
+ const etRecall = await buildMemoryRecall(query, conversationId, etConfig);
377
+ etTimes.push(performance.now() - t0);
378
+ expect(etRecall.earlyTerminated).toBe(true);
379
+
380
+ const t1 = performance.now();
381
+ const baselineRecall = await buildMemoryRecall(query, conversationId, noEtConfig);
382
+ baselineTimes.push(performance.now() - t1);
383
+ expect(baselineRecall.earlyTerminated).toBe(false);
384
+ }
385
+
386
+ etTimes.sort((a, b) => a - b);
387
+ baselineTimes.sort((a, b) => a - b);
388
+ const medianEt = etTimes[Math.floor(iterations / 2)];
389
+ const medianBaseline = baselineTimes[Math.floor(iterations / 2)];
390
+
391
+ // ET skips the mocked network delay, so it should be measurably faster.
392
+ // Use a 15% threshold to tolerate slower CI hosts where CPU-bound work
393
+ // takes longer relative to the fixed mock delay.
394
+ const speedup = 1 - medianEt / medianBaseline;
395
+ expect(speedup).toBeGreaterThanOrEqual(0.15);
396
+ } finally {
397
+ semanticSearchDelayMs = 0;
398
+ }
399
+ });
400
+
401
+ test('recall.latencyMs tracks wall-clock within 20% tolerance', async () => {
402
+ const conversationId = 'conv-bench-wallclock';
403
+ const now = 1_700_500_000_000;
404
+ seedMemoryItems(conversationId, 500, now);
405
+
406
+ const config = makeConfig();
407
+
408
+ // Use Date.now() to match the timer source used by buildMemoryRecall
409
+ // (which also uses Date.now()), avoiding precision mismatches between
410
+ // integer-ms Date.now() and sub-ms performance.now().
411
+ const wallStart = Date.now();
412
+ const recall = await buildMemoryRecall(
413
+ 'What do we know about topic-5 and keyword-3?',
414
+ conversationId,
415
+ config,
416
+ );
417
+ const wallMs = Date.now() - wallStart;
418
+
419
+ expect(recall.enabled).toBe(true);
420
+ expect(recall.latencyMs).toBeGreaterThan(0);
421
+
422
+ // Self-reported latencyMs should agree with wall-clock within 50%.
423
+ // Tolerance is wide because both sides use Date.now() (integer ms),
424
+ // so on fast runs the quantization error can be large relative to
425
+ // total elapsed time.
426
+ const ratio = recall.latencyMs / Math.max(wallMs, 1);
427
+ expect(ratio).toBeGreaterThanOrEqual(0.5);
428
+ expect(ratio).toBeLessThanOrEqual(1.5);
429
+ });
430
+ });