@vellumai/assistant 0.5.16 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (407) hide show
  1. package/ARCHITECTURE.md +1 -1
  2. package/Dockerfile +0 -3
  3. package/knip.json +2 -1
  4. package/openapi.yaml +660 -80
  5. package/package.json +1 -1
  6. package/src/__tests__/actor-token-service.test.ts +68 -0
  7. package/src/__tests__/agent-loop.test.ts +0 -32
  8. package/src/__tests__/always-loaded-tools-guard.test.ts +2 -2
  9. package/src/__tests__/anthropic-provider.test.ts +57 -3
  10. package/src/__tests__/app-compiler.test.ts +120 -0
  11. package/src/__tests__/assistant-feature-flags-integration.test.ts +2 -2
  12. package/src/__tests__/call-conversation-messages.test.ts +2 -6
  13. package/src/__tests__/call-domain.test.ts +2 -6
  14. package/src/__tests__/call-pointer-messages.test.ts +2 -14
  15. package/src/__tests__/call-recovery.test.ts +2 -6
  16. package/src/__tests__/call-routes-http.test.ts +2 -6
  17. package/src/__tests__/call-store.test.ts +2 -6
  18. package/src/__tests__/cancel-resolves-conversation-key.test.ts +2 -6
  19. package/src/__tests__/canonical-guardian-store.test.ts +2 -6
  20. package/src/__tests__/channel-delivery-store.test.ts +2 -6
  21. package/src/__tests__/channel-retry-sweep.test.ts +2 -6
  22. package/src/__tests__/checker.test.ts +25 -3
  23. package/src/__tests__/clawhub.test.ts +54 -24
  24. package/src/__tests__/cli-command-risk-guard.test.ts +14 -0
  25. package/src/__tests__/cli-memory.test.ts +74 -69
  26. package/src/__tests__/config-schema.test.ts +1 -1
  27. package/src/__tests__/config-set-platform-guard.test.ts +302 -0
  28. package/src/__tests__/confirmation-request-guardian-bridge.test.ts +2 -6
  29. package/src/__tests__/contacts-tools.test.ts +31 -0
  30. package/src/__tests__/context-overflow-reducer.test.ts +86 -0
  31. package/src/__tests__/context-token-estimator.test.ts +175 -10
  32. package/src/__tests__/conversation-agent-loop-overflow.test.ts +9 -0
  33. package/src/__tests__/conversation-agent-loop.test.ts +9 -0
  34. package/src/__tests__/conversation-attachments.test.ts +2 -6
  35. package/src/__tests__/conversation-attention-store.test.ts +2 -6
  36. package/src/__tests__/conversation-clear-safety.test.ts +2 -6
  37. package/src/__tests__/conversation-delete-schedule-cleanup.test.ts +4 -10
  38. package/src/__tests__/conversation-disk-view-integration.test.ts +2 -6
  39. package/src/__tests__/conversation-disk-view.test.ts +2 -6
  40. package/src/__tests__/conversation-error.test.ts +33 -2
  41. package/src/__tests__/conversation-fork-crud.test.ts +2 -6
  42. package/src/__tests__/conversation-history-web-search.test.ts +5 -0
  43. package/src/__tests__/conversation-load-history-repair.test.ts +5 -1
  44. package/src/__tests__/conversation-media-retry.test.ts +91 -0
  45. package/src/__tests__/conversation-starter-routes.test.ts +20 -11
  46. package/src/__tests__/conversation-store.test.ts +2 -6
  47. package/src/__tests__/conversation-usage.test.ts +2 -6
  48. package/src/__tests__/conversation-wipe.test.ts +11 -408
  49. package/src/__tests__/credential-execution-feature-gates.test.ts +3 -3
  50. package/src/__tests__/credential-execution-shell-lockdown.test.ts +2 -2
  51. package/src/__tests__/credential-security-e2e.test.ts +2 -0
  52. package/src/__tests__/followup-tools.test.ts +2 -6
  53. package/src/__tests__/graph-extraction-event-date.test.ts +186 -0
  54. package/src/__tests__/guardian-action-conversation-turn.test.ts +2 -6
  55. package/src/__tests__/guardian-action-followup-executor.test.ts +2 -6
  56. package/src/__tests__/guardian-action-followup-store.test.ts +2 -6
  57. package/src/__tests__/guardian-action-grant-mint-consume.test.ts +2 -6
  58. package/src/__tests__/guardian-action-late-reply.test.ts +2 -6
  59. package/src/__tests__/guardian-action-store.test.ts +2 -6
  60. package/src/__tests__/guardian-binding-drift-heal.test.ts +2 -6
  61. package/src/__tests__/guardian-decision-primitive-canonical.test.ts +8 -8
  62. package/src/__tests__/guardian-dispatch.test.ts +2 -6
  63. package/src/__tests__/guardian-grant-minting.test.ts +2 -14
  64. package/src/__tests__/guardian-principal-id-roundtrip.test.ts +2 -6
  65. package/src/__tests__/guardian-routing-invariants.test.ts +192 -6
  66. package/src/__tests__/guardian-routing-state.test.ts +2 -6
  67. package/src/__tests__/guardian-verification-voice-binding.test.ts +2 -6
  68. package/src/__tests__/inbound-invite-redemption.test.ts +2 -6
  69. package/src/__tests__/injection-block.test.ts +154 -0
  70. package/src/__tests__/install-meta.test.ts +506 -0
  71. package/src/__tests__/install-skill-routing.test.ts +292 -0
  72. package/src/__tests__/invite-redemption-service.test.ts +2 -6
  73. package/src/__tests__/invite-routes-http.test.ts +2 -6
  74. package/src/__tests__/jobs-store-qdrant-breaker.test.ts +2 -14
  75. package/src/__tests__/list-messages-attachments.test.ts +2 -6
  76. package/src/__tests__/llm-context-route-provider.test.ts +2 -6
  77. package/src/__tests__/llm-request-log-turn-query.test.ts +2 -6
  78. package/src/__tests__/llm-usage-store.test.ts +2 -6
  79. package/src/__tests__/log-export-workspace.test.ts +2 -6
  80. package/src/__tests__/managed-store.test.ts +38 -11
  81. package/src/__tests__/memory-jobs-worker-backoff.test.ts +2 -8
  82. package/src/__tests__/memory-recall-log-store.test.ts +2 -6
  83. package/src/__tests__/memory-upsert-concurrency.test.ts +4 -112
  84. package/src/__tests__/non-member-access-request.test.ts +2 -6
  85. package/src/__tests__/notification-guardian-path.test.ts +2 -6
  86. package/src/__tests__/oauth-cli.test.ts +364 -2
  87. package/src/__tests__/oauth2-gateway-transport.test.ts +18 -3
  88. package/src/__tests__/outlook-attachments.test.ts +301 -0
  89. package/src/__tests__/outlook-automation-tools.test.ts +425 -0
  90. package/src/__tests__/outlook-categories.test.ts +212 -0
  91. package/src/__tests__/outlook-client-automation.test.ts +246 -0
  92. package/src/__tests__/outlook-compose-tools.test.ts +325 -0
  93. package/src/__tests__/outlook-declutter-tools.test.ts +585 -0
  94. package/src/__tests__/outlook-email-watcher.test.ts +322 -0
  95. package/src/__tests__/outlook-follow-up.test.ts +196 -0
  96. package/src/__tests__/outlook-messaging-provider.test.ts +498 -3
  97. package/src/__tests__/outlook-trash.test.ts +77 -0
  98. package/src/__tests__/outlook-unsubscribe.test.ts +250 -0
  99. package/src/__tests__/platform-callback-registration.test.ts +4 -4
  100. package/src/__tests__/playbook-execution.test.ts +76 -80
  101. package/src/__tests__/playbook-tools.test.ts +5 -7
  102. package/src/__tests__/provider-error-scenarios.test.ts +21 -0
  103. package/src/__tests__/rebuild-index-graph-nodes.test.ts +273 -0
  104. package/src/__tests__/registry.test.ts +2 -2
  105. package/src/__tests__/require-fresh-approval.test.ts +64 -2
  106. package/src/__tests__/runtime-events-sse-parity.test.ts +2 -6
  107. package/src/__tests__/runtime-events-sse.test.ts +2 -6
  108. package/src/__tests__/schedule-store.test.ts +2 -6
  109. package/src/__tests__/schedule-tools.test.ts +2 -6
  110. package/src/__tests__/scheduler-recurrence.test.ts +1 -5
  111. package/src/__tests__/scoped-approval-grants.test.ts +2 -6
  112. package/src/__tests__/scoped-grant-security-matrix.test.ts +2 -6
  113. package/src/__tests__/search-skills-unified.test.ts +421 -0
  114. package/src/__tests__/secret-onetime-send.test.ts +2 -0
  115. package/src/__tests__/send-endpoint-busy.test.ts +2 -6
  116. package/src/__tests__/sequence-store.test.ts +2 -6
  117. package/src/__tests__/server-history-render.test.ts +2 -6
  118. package/src/__tests__/skill-feature-flags-integration.test.ts +38 -31
  119. package/src/__tests__/skill-feature-flags.test.ts +6 -6
  120. package/src/__tests__/skill-load-feature-flag.test.ts +11 -11
  121. package/src/__tests__/skill-memory.test.ts +140 -98
  122. package/src/__tests__/skills-uninstall.test.ts +2 -2
  123. package/src/__tests__/skills.test.ts +1 -1
  124. package/src/__tests__/slack-inbound-verification.test.ts +2 -6
  125. package/src/__tests__/task-compiler.test.ts +2 -6
  126. package/src/__tests__/task-management-tools.test.ts +2 -6
  127. package/src/__tests__/task-memory-cleanup.test.ts +173 -229
  128. package/src/__tests__/task-runner.test.ts +2 -6
  129. package/src/__tests__/task-scheduler.test.ts +2 -6
  130. package/src/__tests__/test-preload.ts +3 -0
  131. package/src/__tests__/tool-approval-handler.test.ts +2 -6
  132. package/src/__tests__/tool-grant-request-escalation.test.ts +2 -6
  133. package/src/__tests__/tool-side-effects-slack-dm.test.ts +276 -0
  134. package/src/__tests__/trust-store.test.ts +1 -1
  135. package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +2 -6
  136. package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +2 -6
  137. package/src/__tests__/trusted-contact-multichannel.test.ts +2 -6
  138. package/src/__tests__/trusted-contact-verification.test.ts +2 -6
  139. package/src/__tests__/turn-boundary-resolution.test.ts +2 -6
  140. package/src/__tests__/usage-cache-backfill-migration.test.ts +1 -6
  141. package/src/__tests__/usage-routes.test.ts +2 -6
  142. package/src/__tests__/verification-control-plane-policy.test.ts +0 -2
  143. package/src/__tests__/voice-invite-redemption.test.ts +2 -6
  144. package/src/__tests__/voice-scoped-grant-consumer.test.ts +2 -6
  145. package/src/__tests__/voice-session-bridge.test.ts +2 -6
  146. package/src/__tests__/volume-security-guard.test.ts +2 -0
  147. package/src/__tests__/workspace-lifecycle.test.ts +29 -1
  148. package/src/__tests__/workspace-migration-009-backfill-conversation-disk-view.test.ts +2 -6
  149. package/src/__tests__/workspace-migration-013-repair-conversation-disk-view.test.ts +2 -6
  150. package/src/__tests__/workspace-migration-026-backfill-install-meta.test.ts +558 -0
  151. package/src/__tests__/workspace-policy.test.ts +1 -1
  152. package/src/agent/attachments.ts +7 -2
  153. package/src/agent/image-optimize.ts +165 -0
  154. package/src/agent/loop.ts +1 -15
  155. package/src/bundler/app-compiler.ts +179 -2
  156. package/src/bundler/package-resolver.ts +3 -5
  157. package/src/cli/__tests__/notifications.test.ts +1 -2
  158. package/src/cli/cli-memory.ts +67 -64
  159. package/src/cli/commands/avatar.ts +3 -3
  160. package/src/cli/commands/config.ts +26 -13
  161. package/src/cli/commands/doctor.ts +2 -2
  162. package/src/cli/commands/memory.ts +41 -55
  163. package/src/cli/commands/oauth/__tests__/connect.test.ts +2 -2
  164. package/src/cli/commands/oauth/__tests__/disconnect.test.ts +2 -2
  165. package/src/cli/commands/oauth/__tests__/mode.test.ts +8 -1
  166. package/src/cli/commands/oauth/__tests__/status.test.ts +2 -2
  167. package/src/cli/commands/oauth/connect.ts +11 -6
  168. package/src/cli/commands/oauth/mode.ts +7 -0
  169. package/src/cli/commands/oauth/shared.ts +39 -3
  170. package/src/cli/commands/platform/__tests__/connect.test.ts +1 -1
  171. package/src/cli/commands/platform/__tests__/disconnect.test.ts +1 -1
  172. package/src/cli/commands/platform/__tests__/status.test.ts +5 -5
  173. package/src/cli/commands/platform/index.ts +16 -16
  174. package/src/cli/commands/skills.ts +88 -16
  175. package/src/cli/commands/trust.ts +2 -2
  176. package/src/cli/lib/daemon-credential-client.ts +2 -3
  177. package/src/config/bundled-skills/acp/TOOLS.json +1 -1
  178. package/src/config/bundled-skills/contacts/SKILL.md +0 -1
  179. package/src/config/bundled-skills/contacts/TOOLS.json +0 -8
  180. package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +0 -4
  181. package/src/config/bundled-skills/gmail/SKILL.md +2 -10
  182. package/src/config/bundled-skills/google-calendar/SKILL.md +1 -9
  183. package/src/config/bundled-skills/messaging/SKILL.md +10 -18
  184. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +40 -33
  185. package/src/config/bundled-skills/outlook/SKILL.md +189 -0
  186. package/src/config/bundled-skills/outlook/TOOLS.json +530 -0
  187. package/src/config/bundled-skills/outlook/tools/outlook-attachments.ts +85 -0
  188. package/src/config/bundled-skills/outlook/tools/outlook-categories.ts +77 -0
  189. package/src/config/bundled-skills/outlook/tools/outlook-draft.ts +84 -0
  190. package/src/config/bundled-skills/outlook/tools/outlook-follow-up.ts +94 -0
  191. package/src/config/bundled-skills/outlook/tools/outlook-forward.ts +49 -0
  192. package/src/config/bundled-skills/outlook/tools/outlook-outreach-scan.ts +237 -0
  193. package/src/config/bundled-skills/outlook/tools/outlook-rules.ts +161 -0
  194. package/src/config/bundled-skills/outlook/tools/outlook-send-draft.ts +32 -0
  195. package/src/config/bundled-skills/outlook/tools/outlook-sender-digest.ts +272 -0
  196. package/src/config/bundled-skills/outlook/tools/outlook-trash.ts +29 -0
  197. package/src/config/bundled-skills/outlook/tools/outlook-unsubscribe.ts +129 -0
  198. package/src/config/bundled-skills/outlook/tools/outlook-vacation.ts +87 -0
  199. package/src/config/bundled-skills/outlook/tools/shared.ts +20 -0
  200. package/src/config/bundled-skills/outlook-calendar/SKILL.md +51 -0
  201. package/src/config/bundled-skills/outlook-calendar/TOOLS.json +221 -0
  202. package/src/config/bundled-skills/outlook-calendar/calendar-client.ts +252 -0
  203. package/src/config/bundled-skills/outlook-calendar/tools/outlook-calendar-check-availability.ts +53 -0
  204. package/src/config/bundled-skills/outlook-calendar/tools/outlook-calendar-create-event.ts +74 -0
  205. package/src/config/bundled-skills/outlook-calendar/tools/outlook-calendar-get-event.ts +18 -0
  206. package/src/config/bundled-skills/outlook-calendar/tools/outlook-calendar-list-events.ts +46 -0
  207. package/src/config/bundled-skills/outlook-calendar/tools/outlook-calendar-rsvp.ts +36 -0
  208. package/src/config/bundled-skills/outlook-calendar/tools/shared.ts +17 -0
  209. package/src/config/bundled-skills/outlook-calendar/types.ts +120 -0
  210. package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +47 -40
  211. package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +16 -29
  212. package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +16 -18
  213. package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +39 -47
  214. package/src/config/bundled-skills/slack/SKILL.md +1 -7
  215. package/src/config/bundled-tool-registry.ts +56 -4
  216. package/src/config/env-registry.ts +15 -8
  217. package/src/config/feature-flag-registry.json +21 -124
  218. package/src/config/schemas/platform.ts +8 -0
  219. package/src/config/schemas/timeouts.ts +1 -1
  220. package/src/config/skills.ts +18 -7
  221. package/src/context/token-estimator.ts +25 -18
  222. package/src/context/window-manager.ts +6 -2
  223. package/src/credential-execution/process-manager.ts +3 -1
  224. package/src/daemon/context-overflow-reducer.ts +46 -2
  225. package/src/daemon/conversation-agent-loop-handlers.ts +123 -82
  226. package/src/daemon/conversation-agent-loop.ts +96 -61
  227. package/src/daemon/conversation-error.ts +31 -8
  228. package/src/daemon/conversation-lifecycle.ts +33 -0
  229. package/src/daemon/conversation-media-retry.ts +85 -7
  230. package/src/daemon/conversation-notifiers.ts +4 -1
  231. package/src/daemon/conversation-runtime-assembly.ts +5 -0
  232. package/src/daemon/conversation.ts +41 -2
  233. package/src/daemon/daemon-control.ts +8 -2
  234. package/src/daemon/handlers/shared.ts +22 -12
  235. package/src/daemon/handlers/skills.ts +416 -202
  236. package/src/daemon/lifecycle.ts +40 -1
  237. package/src/daemon/main.ts +5 -1
  238. package/src/daemon/message-types/conversations.ts +4 -1
  239. package/src/daemon/message-types/messages.ts +3 -1
  240. package/src/daemon/message-types/skills.ts +97 -36
  241. package/src/daemon/providers-setup.ts +5 -0
  242. package/src/daemon/server.ts +11 -2
  243. package/src/daemon/tool-side-effects.ts +27 -5
  244. package/src/heartbeat/heartbeat-service.ts +1 -0
  245. package/src/hooks/cli.ts +2 -2
  246. package/src/hooks/runner.ts +15 -38
  247. package/src/inbound/platform-callback-registration.ts +14 -14
  248. package/src/memory/admin.ts +11 -45
  249. package/src/memory/conversation-bootstrap.ts +2 -0
  250. package/src/memory/conversation-crud.ts +242 -348
  251. package/src/memory/conversation-group-migration.ts +157 -0
  252. package/src/memory/conversation-queries.ts +4 -2
  253. package/src/memory/db-init.ts +30 -3
  254. package/src/memory/embed.ts +73 -0
  255. package/src/memory/embedding-backend.ts +8 -14
  256. package/src/memory/embedding-runtime-manager.ts +12 -114
  257. package/src/memory/fingerprint.ts +2 -2
  258. package/src/memory/graph/bootstrap.ts +512 -0
  259. package/src/memory/graph/capability-seed.ts +297 -0
  260. package/src/memory/graph/consolidation.ts +691 -0
  261. package/src/memory/graph/conversation-graph-memory.ts +630 -0
  262. package/src/memory/graph/decay.test.ts +208 -0
  263. package/src/memory/graph/decay.ts +195 -0
  264. package/src/memory/graph/extraction-job.ts +69 -0
  265. package/src/memory/graph/extraction.test.ts +936 -0
  266. package/src/memory/graph/extraction.ts +1254 -0
  267. package/src/memory/graph/graph-search.ts +266 -0
  268. package/src/memory/graph/image-ref-utils.ts +29 -0
  269. package/src/memory/graph/injection.test.ts +513 -0
  270. package/src/memory/graph/injection.ts +439 -0
  271. package/src/memory/graph/inspect.ts +534 -0
  272. package/src/memory/graph/narrative.ts +267 -0
  273. package/src/memory/graph/pattern-scan.ts +269 -0
  274. package/src/memory/graph/retriever.ts +1008 -0
  275. package/src/memory/graph/scoring.test.ts +548 -0
  276. package/src/memory/graph/scoring.ts +232 -0
  277. package/src/memory/graph/serendipity.ts +65 -0
  278. package/src/memory/graph/store.test.ts +1050 -0
  279. package/src/memory/graph/store.ts +699 -0
  280. package/src/memory/graph/tool-handlers.ts +426 -0
  281. package/src/memory/graph/tools.ts +141 -0
  282. package/src/memory/graph/triggers.test.ts +487 -0
  283. package/src/memory/graph/triggers.ts +223 -0
  284. package/src/memory/graph/types.ts +271 -0
  285. package/src/memory/group-crud.ts +191 -0
  286. package/src/memory/indexer.ts +37 -19
  287. package/src/memory/job-handlers/cleanup.ts +0 -53
  288. package/src/memory/job-handlers/conversation-starters.ts +91 -53
  289. package/src/memory/job-handlers/embedding.ts +5 -31
  290. package/src/memory/job-handlers/index-maintenance.ts +23 -11
  291. package/src/memory/job-handlers/summarization.ts +32 -17
  292. package/src/memory/job-utils.ts +1 -1
  293. package/src/memory/jobs-store.ts +50 -70
  294. package/src/memory/jobs-worker.ts +147 -112
  295. package/src/memory/message-content.ts +1 -0
  296. package/src/memory/migrations/202-memory-graph-tables.ts +130 -0
  297. package/src/memory/migrations/203-drop-memory-items-tables.ts +23 -0
  298. package/src/memory/migrations/204-rename-memory-graph-type-values.ts +46 -0
  299. package/src/memory/migrations/205-memory-graph-image-refs.ts +11 -0
  300. package/src/memory/migrations/index.ts +4 -0
  301. package/src/memory/migrations/registry.ts +8 -0
  302. package/src/memory/qdrant-client.ts +44 -17
  303. package/src/memory/schema/index.ts +1 -0
  304. package/src/memory/schema/memory-graph.ts +139 -0
  305. package/src/memory/search/semantic.ts +47 -91
  306. package/src/memory/task-memory-cleanup.ts +28 -50
  307. package/src/messaging/providers/outlook/adapter.ts +8 -1
  308. package/src/messaging/providers/outlook/client.ts +299 -0
  309. package/src/messaging/providers/outlook/types.ts +118 -0
  310. package/src/notifications/adapters/macos.ts +1 -0
  311. package/src/notifications/copy-composer.ts +9 -0
  312. package/src/notifications/signal.ts +16 -0
  313. package/src/oauth/seed-providers.ts +2 -1
  314. package/src/permissions/checker.ts +24 -3
  315. package/src/permissions/defaults.ts +4 -4
  316. package/src/permissions/workspace-policy.ts +1 -1
  317. package/src/playbooks/playbook-compiler.ts +19 -18
  318. package/src/playbooks/types.ts +4 -3
  319. package/src/prompts/system-prompt.ts +3 -29
  320. package/src/providers/anthropic/client.ts +47 -19
  321. package/src/providers/gemini/client.ts +1 -1
  322. package/src/providers/openai/client.ts +1 -1
  323. package/src/providers/registry.ts +1 -1
  324. package/src/providers/retry.ts +19 -3
  325. package/src/runtime/actor-trust-resolver.ts +5 -1
  326. package/src/runtime/auth/route-policy.ts +7 -0
  327. package/src/runtime/guardian-reply-router.ts +5 -1
  328. package/src/runtime/http-server.ts +23 -3
  329. package/src/runtime/middleware/auth.ts +20 -0
  330. package/src/runtime/routes/attachment-routes.test.ts +106 -0
  331. package/src/runtime/routes/attachment-routes.ts +106 -16
  332. package/src/runtime/routes/brain-graph-routes.ts +21 -22
  333. package/src/runtime/routes/btw-routes.ts +8 -0
  334. package/src/runtime/routes/conversation-management-routes.ts +2 -0
  335. package/src/runtime/routes/conversation-starter-routes.ts +2 -2
  336. package/src/runtime/routes/debug-routes.ts +1 -1
  337. package/src/runtime/routes/global-search-routes.ts +21 -19
  338. package/src/runtime/routes/group-routes.ts +207 -0
  339. package/src/runtime/routes/guardian-action-routes.ts +21 -10
  340. package/src/runtime/routes/guardian-bootstrap-routes.ts +23 -19
  341. package/src/runtime/routes/inbound-message-handler.ts +19 -0
  342. package/src/runtime/routes/inbound-stages/guardian-activation-intercept.test.ts +292 -0
  343. package/src/runtime/routes/inbound-stages/guardian-activation-intercept.ts +207 -0
  344. package/src/runtime/routes/memory-item-routes.test.ts +2 -14
  345. package/src/runtime/routes/memory-item-routes.ts +341 -388
  346. package/src/runtime/routes/schedule-routes.ts +2 -0
  347. package/src/runtime/routes/skills-routes.ts +103 -37
  348. package/src/runtime/routes/work-items-routes.test.ts +2 -6
  349. package/src/schedule/scheduler.ts +8 -1
  350. package/src/security/oauth2.ts +1 -1
  351. package/src/security/secure-keys.ts +4 -8
  352. package/src/shared/provider-env-vars.ts +19 -0
  353. package/src/skills/catalog-cache.ts +5 -0
  354. package/src/skills/catalog-install.ts +15 -14
  355. package/src/skills/clawhub.ts +134 -154
  356. package/src/skills/install-meta.ts +208 -0
  357. package/src/skills/managed-store.ts +27 -16
  358. package/src/skills/skill-memory.ts +152 -77
  359. package/src/skills/skillssh-registry.ts +19 -17
  360. package/src/tasks/task-runner.ts +3 -1
  361. package/src/telemetry/usage-telemetry-reporter.test.ts +3 -5
  362. package/src/tools/browser/runtime-check.ts +3 -1
  363. package/src/tools/memory/register.ts +63 -46
  364. package/src/tools/permission-checker.ts +7 -1
  365. package/src/tools/shared/filesystem/image-read.ts +22 -85
  366. package/src/tools/terminal/safe-env.ts +1 -0
  367. package/src/tools/tool-manifest.ts +3 -3
  368. package/src/util/browser.ts +25 -10
  369. package/src/util/bun-runtime.ts +172 -0
  370. package/src/watcher/providers/outlook-calendar.ts +343 -0
  371. package/src/watcher/providers/outlook.ts +198 -0
  372. package/src/workspace/migrations/025-remove-oauth-app-setup-skills.ts +76 -0
  373. package/src/workspace/migrations/026-backfill-install-meta.ts +325 -0
  374. package/src/workspace/migrations/027-remove-orphaned-optimized-images-cache.ts +42 -0
  375. package/src/workspace/migrations/registry.ts +6 -0
  376. package/src/__tests__/context-memory-e2e.test.ts +0 -415
  377. package/src/__tests__/journal-context.test.ts +0 -268
  378. package/src/__tests__/memory-context-benchmark.benchmark.test.ts +0 -297
  379. package/src/__tests__/memory-lifecycle-e2e.test.ts +0 -459
  380. package/src/__tests__/memory-query-builder.test.ts +0 -59
  381. package/src/__tests__/memory-recall-quality.test.ts +0 -1046
  382. package/src/__tests__/memory-regressions.experimental.test.ts +0 -629
  383. package/src/__tests__/memory-regressions.test.ts +0 -3696
  384. package/src/__tests__/memory-retrieval.benchmark.test.ts +0 -295
  385. package/src/daemon/conversation-memory.ts +0 -207
  386. package/src/memory/conversation-starters-cadence.ts +0 -74
  387. package/src/memory/items-extractor.ts +0 -860
  388. package/src/memory/job-handlers/batch-extraction.ts +0 -753
  389. package/src/memory/job-handlers/extraction.ts +0 -40
  390. package/src/memory/job-handlers/journal-carry-forward.test.ts +0 -355
  391. package/src/memory/job-handlers/journal-carry-forward.ts +0 -255
  392. package/src/memory/journal-memory.ts +0 -224
  393. package/src/memory/query-builder.ts +0 -47
  394. package/src/memory/query-expansion.ts +0 -83
  395. package/src/memory/retriever.test.ts +0 -1592
  396. package/src/memory/retriever.ts +0 -1331
  397. package/src/memory/search/formatting.test.ts +0 -140
  398. package/src/memory/search/formatting.ts +0 -262
  399. package/src/memory/search/mmr.ts +0 -139
  400. package/src/memory/search/ranking.ts +0 -15
  401. package/src/memory/search/staleness.ts +0 -40
  402. package/src/memory/search/tier-classifier.ts +0 -18
  403. package/src/memory/search/types.ts +0 -121
  404. package/src/prompts/journal-context.ts +0 -154
  405. package/src/tools/memory/definitions.ts +0 -69
  406. package/src/tools/memory/handlers.test.ts +0 -562
  407. package/src/tools/memory/handlers.ts +0 -434
@@ -0,0 +1,302 @@
1
+ import { beforeEach, describe, expect, mock, test } from "bun:test";
2
+
3
+ import { Command } from "commander";
4
+
5
+ // ---------------------------------------------------------------------------
6
+ // Mock state
7
+ // ---------------------------------------------------------------------------
8
+
9
+ let mockPlatformClientCreate: () => Promise<Record<
10
+ string,
11
+ unknown
12
+ > | null> = async () => null;
13
+
14
+ let mockLoadRawConfig: () => Record<string, unknown> = () => ({});
15
+ const mockSaveRawConfigCalls: Array<Record<string, unknown>> = [];
16
+ const mockSetNestedValueCalls: Array<{
17
+ obj: Record<string, unknown>;
18
+ key: string;
19
+ value: unknown;
20
+ }> = [];
21
+ let mockGetNestedValue: (
22
+ obj: Record<string, unknown>,
23
+ key: string,
24
+ ) => unknown = () => undefined;
25
+
26
+ // ---------------------------------------------------------------------------
27
+ // Mocks — platform/client (controls requirePlatformConnection)
28
+ // ---------------------------------------------------------------------------
29
+
30
+ mock.module("../platform/client.js", () => ({
31
+ VellumPlatformClient: {
32
+ create: () => mockPlatformClientCreate(),
33
+ },
34
+ }));
35
+
36
+ // ---------------------------------------------------------------------------
37
+ // Mocks — config/loader
38
+ // ---------------------------------------------------------------------------
39
+
40
+ mock.module("../config/loader.js", () => ({
41
+ getConfig: () => ({ services: {} }),
42
+ loadConfig: () => ({ services: {} }),
43
+ saveConfig: () => {},
44
+ invalidateConfigCache: () => {},
45
+ loadRawConfig: () => mockLoadRawConfig(),
46
+ saveRawConfig: (raw: Record<string, unknown>) => {
47
+ mockSaveRawConfigCalls.push(raw);
48
+ },
49
+ applyNestedDefaults: (c: unknown) => c,
50
+ deepMergeMissing: (a: unknown) => a,
51
+ deepMergeOverwrite: (a: unknown) => a,
52
+ mergeDefaultWorkspaceConfig: () => {},
53
+ getNestedValue: (obj: Record<string, unknown>, key: string) =>
54
+ mockGetNestedValue(obj, key),
55
+ setNestedValue: (obj: Record<string, unknown>, key: string, value: unknown) =>
56
+ mockSetNestedValueCalls.push({ obj, key, value }),
57
+ API_KEY_PROVIDERS: [
58
+ "anthropic",
59
+ "openai",
60
+ "gemini",
61
+ "ollama",
62
+ "fireworks",
63
+ "openrouter",
64
+ "brave",
65
+ "perplexity",
66
+ ],
67
+ }));
68
+
69
+ // ---------------------------------------------------------------------------
70
+ // Mocks — util/logger (suppress log output)
71
+ // ---------------------------------------------------------------------------
72
+
73
+ mock.module("../util/logger.js", () => ({
74
+ getLogger: () => ({
75
+ info: () => {},
76
+ warn: () => {},
77
+ error: () => {},
78
+ debug: () => {},
79
+ }),
80
+ getCliLogger: () => ({
81
+ info: () => {},
82
+ warn: () => {},
83
+ error: () => {},
84
+ debug: () => {},
85
+ }),
86
+ }));
87
+
88
+ // ---------------------------------------------------------------------------
89
+ // Mocks — oauth/oauth-store (transitive dep of oauth/shared.ts)
90
+ // ---------------------------------------------------------------------------
91
+
92
+ mock.module("../oauth/oauth-store.js", () => ({
93
+ disconnectOAuthProvider: async () => "not-found" as const,
94
+ getConnection: () => undefined,
95
+ getConnectionByProvider: () => undefined,
96
+ listConnections: () => [],
97
+ deleteConnection: () => false,
98
+ upsertApp: async () => ({}),
99
+ getApp: () => undefined,
100
+ getAppByProviderAndClientId: () => undefined,
101
+ getMostRecentAppByProvider: () => undefined,
102
+ listApps: () => [],
103
+ deleteApp: async () => false,
104
+ getProvider: () => undefined,
105
+ listProviders: () => [],
106
+ registerProvider: () => ({}),
107
+ updateProvider: () => undefined,
108
+ deleteProvider: () => false,
109
+ seedProviders: () => {},
110
+ getActiveConnection: () => undefined,
111
+ listActiveConnectionsByProvider: () => [],
112
+ createConnection: () => ({}),
113
+ isProviderConnected: () => false,
114
+ updateConnection: () => ({}),
115
+ }));
116
+
117
+ // ---------------------------------------------------------------------------
118
+ // Import the module under test (after mocks are registered)
119
+ // ---------------------------------------------------------------------------
120
+
121
+ const { registerConfigCommand } = await import("../cli/commands/config.js");
122
+
123
+ // ---------------------------------------------------------------------------
124
+ // Test helper
125
+ // ---------------------------------------------------------------------------
126
+
127
+ async function runCli(
128
+ args: string[],
129
+ ): Promise<{ exitCode: number; stdout: string }> {
130
+ const originalStdoutWrite = process.stdout.write.bind(process.stdout);
131
+ const originalStderrWrite = process.stderr.write.bind(process.stderr);
132
+ const stdoutChunks: string[] = [];
133
+
134
+ process.stdout.write = ((chunk: unknown) => {
135
+ stdoutChunks.push(typeof chunk === "string" ? chunk : String(chunk));
136
+ return true;
137
+ }) as typeof process.stdout.write;
138
+
139
+ process.stderr.write = (() => true) as typeof process.stderr.write;
140
+
141
+ process.exitCode = 0;
142
+
143
+ try {
144
+ const program = new Command();
145
+ program.option("--json", "JSON output");
146
+ program.exitOverride();
147
+ program.configureOutput({
148
+ writeErr: () => {},
149
+ writeOut: (str: string) => stdoutChunks.push(str),
150
+ });
151
+ registerConfigCommand(program);
152
+ await program.parseAsync(args);
153
+ } catch {
154
+ if (process.exitCode === 0) process.exitCode = 1;
155
+ } finally {
156
+ process.stdout.write = originalStdoutWrite;
157
+ process.stderr.write = originalStderrWrite;
158
+ }
159
+
160
+ const exitCode = process.exitCode ?? 0;
161
+ process.exitCode = 0;
162
+
163
+ return {
164
+ exitCode,
165
+ stdout: stdoutChunks.join(""),
166
+ };
167
+ }
168
+
169
+ // ---------------------------------------------------------------------------
170
+ // Tests
171
+ // ---------------------------------------------------------------------------
172
+
173
+ describe("config set — platform connection guard for service mode paths", () => {
174
+ beforeEach(() => {
175
+ // Default: not connected to platform
176
+ mockPlatformClientCreate = async () => null;
177
+ mockLoadRawConfig = () => ({});
178
+ mockSaveRawConfigCalls.length = 0;
179
+ mockSetNestedValueCalls.length = 0;
180
+ mockGetNestedValue = () => undefined;
181
+ });
182
+
183
+ test("config set services.inference.mode managed — fails when not connected", async () => {
184
+ const { exitCode, stdout } = await runCli([
185
+ "node",
186
+ "assistant",
187
+ "--json",
188
+ "config",
189
+ "set",
190
+ "services.inference.mode",
191
+ "managed",
192
+ ]);
193
+
194
+ expect(exitCode).toBe(1);
195
+ const parsed = JSON.parse(stdout);
196
+ expect(parsed.ok).toBe(false);
197
+ expect(parsed.error).toContain("vellum platform connect");
198
+ expect(parsed.error).toContain("Not connected");
199
+ // Config should NOT have been written
200
+ expect(mockSaveRawConfigCalls).toHaveLength(0);
201
+ expect(mockSetNestedValueCalls).toHaveLength(0);
202
+ });
203
+
204
+ test("config set services.image-generation.mode your-own — succeeds without platform connection", async () => {
205
+ const { exitCode } = await runCli([
206
+ "node",
207
+ "assistant",
208
+ "--json",
209
+ "config",
210
+ "set",
211
+ "services.image-generation.mode",
212
+ "your-own",
213
+ ]);
214
+
215
+ expect(exitCode).toBe(0);
216
+ // Config should have been written — setting to "your-own" doesn't need platform
217
+ expect(mockSetNestedValueCalls).toHaveLength(1);
218
+ expect(mockSetNestedValueCalls[0]!.key).toBe(
219
+ "services.image-generation.mode",
220
+ );
221
+ expect(mockSetNestedValueCalls[0]!.value).toBe("your-own");
222
+ expect(mockSaveRawConfigCalls).toHaveLength(1);
223
+ });
224
+
225
+ test("config set calls.enabled true — succeeds without platform connection", async () => {
226
+ const { exitCode } = await runCli([
227
+ "node",
228
+ "assistant",
229
+ "config",
230
+ "set",
231
+ "calls.enabled",
232
+ "true",
233
+ ]);
234
+
235
+ expect(exitCode).toBe(0);
236
+ // Config should have been written
237
+ expect(mockSetNestedValueCalls).toHaveLength(1);
238
+ expect(mockSetNestedValueCalls[0]!.key).toBe("calls.enabled");
239
+ expect(mockSetNestedValueCalls[0]!.value).toBe(true);
240
+ expect(mockSaveRawConfigCalls).toHaveLength(1);
241
+ });
242
+
243
+ test("config get services.inference.mode — works without platform connection", async () => {
244
+ mockGetNestedValue = (_obj, key) => {
245
+ if (key === "services.inference.mode") return "your-own";
246
+ return undefined;
247
+ };
248
+
249
+ const { exitCode } = await runCli([
250
+ "node",
251
+ "assistant",
252
+ "config",
253
+ "get",
254
+ "services.inference.mode",
255
+ ]);
256
+
257
+ expect(exitCode).toBe(0);
258
+ // No writes should have occurred
259
+ expect(mockSaveRawConfigCalls).toHaveLength(0);
260
+ expect(mockSetNestedValueCalls).toHaveLength(0);
261
+ });
262
+
263
+ test("config set services.web-search.mode managed — fails when not connected", async () => {
264
+ const { exitCode, stdout } = await runCli([
265
+ "node",
266
+ "assistant",
267
+ "--json",
268
+ "config",
269
+ "set",
270
+ "services.web-search.mode",
271
+ "managed",
272
+ ]);
273
+
274
+ expect(exitCode).toBe(1);
275
+ const parsed = JSON.parse(stdout);
276
+ expect(parsed.ok).toBe(false);
277
+ expect(parsed.error).toContain("vellum platform connect");
278
+ expect(mockSaveRawConfigCalls).toHaveLength(0);
279
+ });
280
+
281
+ test("config set services.inference.mode managed — succeeds when connected", async () => {
282
+ mockPlatformClientCreate = async () => ({
283
+ platformAssistantId: "asst-123",
284
+ fetch: async () => new Response(),
285
+ });
286
+
287
+ const { exitCode } = await runCli([
288
+ "node",
289
+ "assistant",
290
+ "config",
291
+ "set",
292
+ "services.inference.mode",
293
+ "managed",
294
+ ]);
295
+
296
+ expect(exitCode).toBe(0);
297
+ expect(mockSetNestedValueCalls).toHaveLength(1);
298
+ expect(mockSetNestedValueCalls[0]!.key).toBe("services.inference.mode");
299
+ expect(mockSetNestedValueCalls[0]!.value).toBe("managed");
300
+ expect(mockSaveRawConfigCalls).toHaveLength(1);
301
+ });
302
+ });
@@ -8,7 +8,7 @@
8
8
  * 4. Missing guardian binding causes a skip
9
9
  */
10
10
 
11
- import { afterAll, beforeEach, describe, expect, mock, test } from "bun:test";
11
+ import { beforeEach, describe, expect, mock, test } from "bun:test";
12
12
 
13
13
  mock.module("../util/logger.js", () => ({
14
14
  getLogger: () =>
@@ -76,7 +76,7 @@ import {
76
76
  generateCanonicalRequestCode,
77
77
  listCanonicalGuardianDeliveries,
78
78
  } from "../memory/canonical-guardian-store.js";
79
- import { getDb, initializeDb, resetDb } from "../memory/db.js";
79
+ import { getDb, initializeDb } from "../memory/db.js";
80
80
  import { bridgeConfirmationRequestToGuardian } from "../runtime/confirmation-request-guardian-bridge.js";
81
81
 
82
82
  initializeDb();
@@ -87,10 +87,6 @@ function resetTables(): void {
87
87
  db.run("DELETE FROM canonical_guardian_requests");
88
88
  }
89
89
 
90
- afterAll(() => {
91
- resetDb();
92
- });
93
-
94
90
  // ---------------------------------------------------------------------------
95
91
  // Helpers
96
92
  // ---------------------------------------------------------------------------
@@ -137,6 +137,37 @@ describe("contact_upsert tool", () => {
137
137
  expect(result.content).toContain("slack: @bob");
138
138
  });
139
139
 
140
+ test("ignores external identity bindings supplied through tool input", async () => {
141
+ const result = await executeContactUpsert(
142
+ {
143
+ display_name: "Eve",
144
+ channels: [
145
+ {
146
+ type: "slack",
147
+ address: "@eve",
148
+ external_user_id: "UATTACKER",
149
+ external_chat_id: "DATTACKER",
150
+ },
151
+ ],
152
+ },
153
+ ctx,
154
+ );
155
+
156
+ expect(result.isError).toBe(false);
157
+
158
+ const row = getRawDb()
159
+ .query(
160
+ "SELECT external_user_id, external_chat_id FROM contact_channels WHERE type = 'slack' AND address = '@eve'",
161
+ )
162
+ .get() as {
163
+ external_user_id: string | null;
164
+ external_chat_id: string | null;
165
+ };
166
+
167
+ expect(row.external_user_id).toBeNull();
168
+ expect(row.external_chat_id).toBeNull();
169
+ });
170
+
140
171
  test("updates an existing contact by ID", async () => {
141
172
  const createResult = await executeContactUpsert(
142
173
  { display_name: "Charlie" },
@@ -467,6 +467,92 @@ describe("context-overflow-reducer", () => {
467
467
  });
468
468
  });
469
469
 
470
+ describe("budget-aware media stubbing", () => {
471
+ test("media stubbing tier retains images within budget", async () => {
472
+ // Create messages with multiple image-only user messages (5 images in the
473
+ // latest user message). With budget-aware retention, the reducer should
474
+ // keep more than the old hardcoded limit of 3 when targetTokens is high.
475
+ const makeImageBlock = () => ({
476
+ type: "image" as const,
477
+ source: {
478
+ type: "base64" as const,
479
+ media_type: "image/png" as const,
480
+ // Small base64 payload so each image doesn't cost many tokens
481
+ data: "A".repeat(1_000),
482
+ },
483
+ });
484
+
485
+ const messages: Message[] = [
486
+ msg("user", "Here are some old images"),
487
+ {
488
+ role: "user",
489
+ content: [makeImageBlock(), makeImageBlock()],
490
+ },
491
+ msg("assistant", "I see the old images."),
492
+ msg("user", "And some more old images"),
493
+ {
494
+ role: "user",
495
+ content: [makeImageBlock()],
496
+ },
497
+ msg("assistant", "Got those too."),
498
+ // Latest user message with 5 images — should retain more than 3
499
+ {
500
+ role: "user",
501
+ content: [
502
+ makeImageBlock(),
503
+ makeImageBlock(),
504
+ makeImageBlock(),
505
+ makeImageBlock(),
506
+ makeImageBlock(),
507
+ ],
508
+ },
509
+ ];
510
+
511
+ // Set targetTokens very high so all images in the latest message fit
512
+ const config = makeConfig({
513
+ targetTokens: 500_000,
514
+ });
515
+ const compactFn = makeNoOpCompactFn();
516
+
517
+ // Run through forced_compaction and tool_result_truncation first
518
+ const step1 = await reduceContextOverflow(
519
+ messages,
520
+ config,
521
+ undefined,
522
+ compactFn,
523
+ );
524
+ expect(step1.tier).toBe("forced_compaction");
525
+
526
+ const step2 = await reduceContextOverflow(
527
+ step1.messages,
528
+ config,
529
+ step1.state,
530
+ compactFn,
531
+ );
532
+ expect(step2.tier).toBe("tool_result_truncation");
533
+
534
+ // Now apply media stubbing
535
+ const step3 = await reduceContextOverflow(
536
+ step2.messages,
537
+ config,
538
+ step2.state,
539
+ compactFn,
540
+ );
541
+ expect(step3.tier).toBe("media_stubbing");
542
+
543
+ // Count remaining image blocks in the latest user message
544
+ const latestUserMsg = step3.messages[step3.messages.length - 1];
545
+ expect(latestUserMsg.role).toBe("user");
546
+ const remainingImages = latestUserMsg.content.filter(
547
+ (b) => b.type === "image",
548
+ );
549
+
550
+ // With budget-aware retention and a high target, all 5 images should be
551
+ // retained — more than the old hardcoded limit of 3.
552
+ expect(remainingImages.length).toBeGreaterThan(3);
553
+ });
554
+ });
555
+
470
556
  describe("createInitialReducerState", () => {
471
557
  test("returns a clean state with no applied tiers", () => {
472
558
  const state = createInitialReducerState();
@@ -9,6 +9,27 @@ import {
9
9
  } from "../context/token-estimator.js";
10
10
  import type { Message } from "../providers/types.js";
11
11
 
12
+ /** Build a minimal valid PNG header with the given dimensions, returned as base64. */
13
+ function makePngBase64(width: number, height: number): string {
14
+ const header = Buffer.alloc(24);
15
+ header[0] = 0x89;
16
+ header[1] = 0x50;
17
+ header[2] = 0x4e;
18
+ header[3] = 0x47;
19
+ header[4] = 0x0d;
20
+ header[5] = 0x0a;
21
+ header[6] = 0x1a;
22
+ header[7] = 0x0a;
23
+ header.writeUInt32BE(13, 8);
24
+ header[12] = 0x49;
25
+ header[13] = 0x48;
26
+ header[14] = 0x44;
27
+ header[15] = 0x52;
28
+ header.writeUInt32BE(width, 16);
29
+ header.writeUInt32BE(height, 20);
30
+ return header.toString("base64");
31
+ }
32
+
12
33
  describe("token estimator", () => {
13
34
  test("estimates text tokens from character length", () => {
14
35
  expect(estimateTextTokens("")).toBe(0);
@@ -48,7 +69,7 @@ describe("token estimator", () => {
48
69
  data: "a".repeat(100),
49
70
  },
50
71
  }),
51
- ).toBeGreaterThan(500);
72
+ ).toBeGreaterThan(0);
52
73
  });
53
74
 
54
75
  test("estimates message and prompt totals", () => {
@@ -264,9 +285,11 @@ describe("token estimator", () => {
264
285
  { providerName: "anthropic" },
265
286
  );
266
287
 
267
- // 1920x1080 scaled to fit 1568x1568: scale = 1568/1920 = 0.8167
288
+ // 1920x1080 scaled to fit 1568px bounding box: dimScale = 1568/1920 = 0.8167
268
289
  // scaledWidth = round(1920 * 0.8167) = 1568, scaledHeight = round(1080 * 0.8167) = 882
269
- // tokens = ceil(1568 * 882 / 750) = ceil(1843.968) = ~1844
290
+ // pixels = 1568 * 882 = 1,382,976 > 1,200,000 → mpScale = sqrt(1200000/1382976) = 0.9315
291
+ // scaledWidth = round(1568 * 0.9315) = 1461, scaledHeight = round(882 * 0.9315) = 822
292
+ // tokens = ceil(1461 * 822 / 750) = ceil(1601.26) = ~1,602
270
293
  // With IMAGE_BLOCK_OVERHEAD_TOKENS and media_type overhead, still well under 5000
271
294
  expect(anthropicTokens).toBeLessThan(5_000);
272
295
 
@@ -299,13 +322,10 @@ describe("token estimator", () => {
299
322
  { providerName: "anthropic" },
300
323
  );
301
324
 
302
- // Should fall back to ANTHROPIC_IMAGE_MAX_TOKENS (~3,277)
303
- // The total will include IMAGE_BLOCK_OVERHEAD_TOKENS + media_type overhead,
304
- // but the max is applied at the outer Math.max(IMAGE_BLOCK_TOKENS, ...) level
305
- // ANTHROPIC_IMAGE_MAX_TOKENS = ceil(1568*1568/750) = 3277
306
- // Total = max(1024, 16 + ceil(9/4) + 3277) = max(1024, 3296) = 3296
307
- expect(tokens).toBeGreaterThanOrEqual(3_277);
308
- expect(tokens).toBeLessThan(4_000);
325
+ // Should fall back to ANTHROPIC_IMAGE_MAX_TOKENS (1,600)
326
+ // Total = 16 (block overhead) + ceil(9/4) (media_type) + 1600 = 1619
327
+ expect(tokens).toBeGreaterThanOrEqual(1_600);
328
+ expect(tokens).toBeLessThan(2_000);
309
329
  });
310
330
 
311
331
  test("Anthropic image tokens are the same for same-dimension images regardless of payload size", () => {
@@ -356,4 +376,149 @@ describe("token estimator", () => {
356
376
  // For Anthropic, same dimensions should produce the same estimate
357
377
  expect(largeTokens).toBe(smallTokens);
358
378
  });
379
+
380
+ test("applies megapixel cap for square images on Anthropic", () => {
381
+ // Build a minimal valid PNG header encoding 2000x2000 dimensions.
382
+ const pngHeader = Buffer.alloc(24);
383
+ // PNG signature
384
+ pngHeader[0] = 0x89;
385
+ pngHeader[1] = 0x50;
386
+ pngHeader[2] = 0x4e;
387
+ pngHeader[3] = 0x47;
388
+ pngHeader[4] = 0x0d;
389
+ pngHeader[5] = 0x0a;
390
+ pngHeader[6] = 0x1a;
391
+ pngHeader[7] = 0x0a;
392
+ // IHDR chunk length (13 bytes)
393
+ pngHeader.writeUInt32BE(13, 8);
394
+ // "IHDR"
395
+ pngHeader[12] = 0x49;
396
+ pngHeader[13] = 0x48;
397
+ pngHeader[14] = 0x44;
398
+ pngHeader[15] = 0x52;
399
+ // Width: 2000
400
+ pngHeader.writeUInt32BE(2000, 16);
401
+ // Height: 2000
402
+ pngHeader.writeUInt32BE(2000, 20);
403
+
404
+ const base64Data = pngHeader.toString("base64");
405
+
406
+ const tokens = estimateContentBlockTokens(
407
+ {
408
+ type: "image",
409
+ source: { type: "base64", media_type: "image/png", data: base64Data },
410
+ },
411
+ { providerName: "anthropic" },
412
+ );
413
+
414
+ // 2000x2000 → dimScale = 1568/2000 = 0.784 → 1568x1568 = 2,458,624 pixels
415
+ // 2,458,624 > 1,200,000 → mpScale = sqrt(1200000/2458624) ≈ 0.6987
416
+ // scaledWidth = round(1568 * 0.6987) = 1096, scaledHeight = round(1568 * 0.6987) = 1096
417
+ // tokens = ceil(1096 * 1096 / 750) = ceil(1601.6) ≈ 1602
418
+ // Without megapixel cap would have been ceil(1568 * 1568 / 750) ≈ 3277
419
+ expect(tokens).toBeLessThanOrEqual(1_700);
420
+ });
421
+
422
+ test("small Anthropic images are not inflated to 1024 tokens", () => {
423
+ // 200x200 image: ceil(200*200/750) = ceil(53.33) = 54 tokens
424
+ const tokens = estimateContentBlockTokens(
425
+ {
426
+ type: "image",
427
+ source: {
428
+ type: "base64",
429
+ media_type: "image/png",
430
+ data: makePngBase64(200, 200),
431
+ },
432
+ },
433
+ { providerName: "anthropic" },
434
+ );
435
+
436
+ // 54 (dimension-based) + 16 (block overhead) + 3 (media type) = 73
437
+ expect(tokens).toBeLessThan(100);
438
+ expect(tokens).toBeGreaterThan(50);
439
+ });
440
+
441
+ test("thumbnail Anthropic images estimate accurately", () => {
442
+ // 150x150 image: ceil(150*150/750) = ceil(30) = 30 tokens
443
+ const tokens = estimateContentBlockTokens(
444
+ {
445
+ type: "image",
446
+ source: {
447
+ type: "base64",
448
+ media_type: "image/png",
449
+ data: makePngBase64(150, 150),
450
+ },
451
+ },
452
+ { providerName: "anthropic" },
453
+ );
454
+
455
+ // 30 + 16 + 3 = 49
456
+ expect(tokens).toBeLessThan(70);
457
+ expect(tokens).toBeGreaterThan(30);
458
+ });
459
+
460
+ test("many small Anthropic images do not trigger phantom token inflation", () => {
461
+ const messages: Message[] = [
462
+ {
463
+ role: "user",
464
+ content: Array.from({ length: 100 }, () => ({
465
+ type: "image" as const,
466
+ source: {
467
+ type: "base64" as const,
468
+ media_type: "image/png",
469
+ data: makePngBase64(200, 200),
470
+ },
471
+ })),
472
+ },
473
+ ];
474
+
475
+ const total = estimateMessagesTokens(messages, { providerName: "anthropic" });
476
+
477
+ // Each image: ~73 tokens. 100 images + message overhead ≈ 7,304
478
+ // Old behavior: 100 * ~1,043 = ~104,300 (14x overestimate)
479
+ expect(total).toBeLessThan(10_000);
480
+ });
481
+
482
+ test("matches Anthropic's published table for common aspect ratios", () => {
483
+ // These are the max sizes that should NOT be further scaled (at or below the megapixel cap).
484
+ // 1:1 → 1092x1092 (~1,590 tokens)
485
+ const squareTokens = estimateContentBlockTokens(
486
+ {
487
+ type: "image",
488
+ source: {
489
+ type: "base64",
490
+ media_type: "image/png",
491
+ data: makePngBase64(1092, 1092),
492
+ },
493
+ },
494
+ { providerName: "anthropic" },
495
+ );
496
+
497
+ // 1:2 → 784x1568 (~1,639 tokens)
498
+ const tallTokens = estimateContentBlockTokens(
499
+ {
500
+ type: "image",
501
+ source: {
502
+ type: "base64",
503
+ media_type: "image/png",
504
+ data: makePngBase64(784, 1568),
505
+ },
506
+ },
507
+ { providerName: "anthropic" },
508
+ );
509
+
510
+ // 1092x1092 = 1,192,464 pixels < 1,200,000 → no megapixel scaling needed.
511
+ // tokens = ceil(1092 * 1092 / 750) = ceil(1589.95) ≈ 1590
512
+ // With overhead: 16 + 3 + 1590 = 1609
513
+ expect(squareTokens).toBeGreaterThan(1_400);
514
+ expect(squareTokens).toBeLessThan(1_800);
515
+
516
+ // 784*1568 = 1,229,312 > 1,200,000 → slight scaling applies
517
+ // mpScale = sqrt(1200000/1229312) ≈ 0.9881
518
+ // scaledWidth = round(784 * 0.9881) = 775, scaledHeight = round(1568 * 0.9881) = 1549
519
+ // tokens = ceil(775 * 1549 / 750) = ceil(1600.6) ≈ 1601
520
+ // With overhead: 16 + 3 + 1601 = 1620
521
+ expect(tallTokens).toBeGreaterThan(1_400);
522
+ expect(tallTokens).toBeLessThan(1_800);
523
+ });
359
524
  });
@@ -457,6 +457,15 @@ function makeCtx(
457
457
  assistantMessageChannel: "vellum" as const,
458
458
  }),
459
459
 
460
+ graphMemory: {
461
+ onCompacted: () => {},
462
+ prepareMemory: async () => ({ runMessages: [], injectedTokens: 0, latencyMs: 0, mode: "none" as const }),
463
+ reinjectCachedMemory: (messages: Message[]) => ({
464
+ runMessages: messages,
465
+ injectedTokens: 0,
466
+ }),
467
+ } as unknown as AgentLoopConversationContext["graphMemory"],
468
+
460
469
  ...overrides,
461
470
  } as AgentLoopConversationContext;
462
471
  }