@vellumai/assistant 0.6.5 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (443) hide show
  1. package/AGENTS.md +9 -1
  2. package/ARCHITECTURE.md +15 -17
  3. package/Dockerfile +6 -4
  4. package/__tests__/permissions/gateway-threshold-reader.test.ts +283 -0
  5. package/docs/architecture/integrations.md +32 -39
  6. package/docs/architecture/memory.md +25 -30
  7. package/docs/architecture/security.md +7 -6
  8. package/docs/browser-use-architecture-phase2.md +63 -20
  9. package/docs/plugins.md +761 -0
  10. package/examples/plugins/echo/README.md +132 -0
  11. package/examples/plugins/echo/package.json +17 -0
  12. package/examples/plugins/echo/register.ts +187 -0
  13. package/node_modules/@vellumai/egress-proxy/src/types.ts +19 -0
  14. package/openapi.yaml +212 -68
  15. package/package.json +1 -1
  16. package/src/__tests__/app-compiler.test.ts +57 -0
  17. package/src/__tests__/approval-cascade.test.ts +7 -2
  18. package/src/__tests__/auto-analysis-end-to-end.test.ts +1 -0
  19. package/src/__tests__/avatar-generator.test.ts +4 -2
  20. package/src/__tests__/bundled-asset.test.ts +6 -6
  21. package/src/__tests__/catalog-cache.test.ts +69 -0
  22. package/src/__tests__/checker.test.ts +459 -171
  23. package/src/__tests__/circuit-breaker-pipeline.test.ts +406 -0
  24. package/src/__tests__/compaction-events.test.ts +501 -0
  25. package/src/__tests__/compaction-pipeline.test.ts +210 -0
  26. package/src/__tests__/compaction-strip-metadata-clear.test.ts +181 -0
  27. package/src/__tests__/compaction-timeout-recovery.test.ts +262 -0
  28. package/src/__tests__/config-model-image-provider.test.ts +110 -0
  29. package/src/__tests__/config-schema.test.ts +22 -9
  30. package/src/__tests__/config-watcher-cleanup-throttle.test.ts +0 -4
  31. package/src/__tests__/contacts-tools.test.ts +26 -0
  32. package/src/__tests__/context-overflow-policy.test.ts +7 -7
  33. package/src/__tests__/context-window-manager.test.ts +355 -4
  34. package/src/__tests__/conversation-abort-tool-results.test.ts +4 -1
  35. package/src/__tests__/conversation-agent-loop-overflow.test.ts +26 -30
  36. package/src/__tests__/conversation-agent-loop.test.ts +30 -141
  37. package/src/__tests__/conversation-confirmation-signals.test.ts +6 -1
  38. package/src/__tests__/conversation-history-web-search.test.ts +1 -0
  39. package/src/__tests__/conversation-init.benchmark.test.ts +2 -16
  40. package/src/__tests__/conversation-pairing.test.ts +174 -10
  41. package/src/__tests__/conversation-pre-run-repair.test.ts +4 -1
  42. package/src/__tests__/conversation-process-callsite.test.ts +3 -0
  43. package/src/__tests__/conversation-provider-retry-repair.test.ts +16 -7
  44. package/src/__tests__/conversation-queue.test.ts +29 -14
  45. package/src/__tests__/conversation-routes-disk-view.test.ts +7 -6
  46. package/src/__tests__/conversation-runtime-assembly.test.ts +155 -110
  47. package/src/__tests__/conversation-runtime-workspace.test.ts +23 -38
  48. package/src/__tests__/conversation-seed-composer.test.ts +2 -2
  49. package/src/__tests__/conversation-slash-queue.test.ts +7 -2
  50. package/src/__tests__/conversation-slash-unknown.test.ts +25 -2
  51. package/src/__tests__/conversation-speed-override.test.ts +6 -1
  52. package/src/__tests__/conversation-title-service.test.ts +116 -0
  53. package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +41 -2
  54. package/src/__tests__/conversation-usage.test.ts +1 -1
  55. package/src/__tests__/conversation-workspace-cache-state.test.ts +4 -1
  56. package/src/__tests__/conversation-workspace-injection.test.ts +3 -0
  57. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +4 -1
  58. package/src/__tests__/credential-health-service.test.ts +78 -9
  59. package/src/__tests__/credential-security-invariants.test.ts +2 -2
  60. package/src/__tests__/db-schedule-syntax-migration.test.ts +1 -0
  61. package/src/__tests__/empty-response-pipeline.test.ts +305 -0
  62. package/src/__tests__/extension-id-sync-guard.test.ts +3 -3
  63. package/src/__tests__/first-greeting.test.ts +247 -5
  64. package/src/__tests__/headless-browser-mode.test.ts +57 -0
  65. package/src/__tests__/history-repair-pipeline.test.ts +399 -0
  66. package/src/__tests__/host-browser-e2e-cloud.test.ts +307 -0
  67. package/src/__tests__/host-browser-e2e-self-hosted.test.ts +3 -3
  68. package/src/__tests__/host-proxy-interface.test.ts +36 -2
  69. package/src/__tests__/image-credentials.test.ts +137 -0
  70. package/src/__tests__/image-service-dispatcher.test.ts +186 -0
  71. package/src/__tests__/injector-chain.test.ts +526 -0
  72. package/src/__tests__/intent-routing.test.ts +0 -26
  73. package/src/__tests__/llm-call-pipeline.test.ts +285 -0
  74. package/src/__tests__/llm-schema.test.ts +1 -1
  75. package/src/__tests__/media-generate-image.test.ts +119 -13
  76. package/src/__tests__/memory-retrieval-pipeline.test.ts +401 -0
  77. package/src/__tests__/memory-upsert-concurrency.test.ts +1 -0
  78. package/src/__tests__/migration-import-from-url.test.ts +5 -68
  79. package/src/__tests__/model-intents.test.ts +4 -2
  80. package/src/__tests__/notification-broadcaster.test.ts +3 -3
  81. package/src/__tests__/notification-decision-strategy.test.ts +0 -11
  82. package/src/__tests__/notification-schedule-notify-dedup.test.ts +108 -0
  83. package/src/__tests__/oauth-apps-routes.test.ts +1 -1
  84. package/src/__tests__/oauth-cli.test.ts +14 -12
  85. package/src/__tests__/oauth-connect-orchestrator.test.ts +4 -13
  86. package/src/__tests__/oauth-provider-serializer.test.ts +6 -4
  87. package/src/__tests__/oauth-provider-visibility.test.ts +3 -5
  88. package/src/__tests__/oauth-providers-routes.test.ts +3 -2
  89. package/src/__tests__/oauth-store.test.ts +41 -76
  90. package/src/__tests__/onboarding-template-contract.test.ts +16 -64
  91. package/src/__tests__/openai-image-service.test.ts +368 -0
  92. package/src/__tests__/overflow-reduce-pipeline.test.ts +676 -0
  93. package/src/__tests__/permission-checker-host-gate.test.ts +0 -24
  94. package/src/__tests__/persist-onboarding-artifacts.test.ts +266 -0
  95. package/src/__tests__/persistence-pipeline.test.ts +377 -0
  96. package/src/__tests__/pipeline-runner.test.ts +565 -0
  97. package/src/__tests__/platform.test.ts +5 -2
  98. package/src/__tests__/plugin-bootstrap.test.ts +483 -0
  99. package/src/__tests__/plugin-registry.test.ts +273 -0
  100. package/src/__tests__/plugin-route-contribution.test.ts +288 -0
  101. package/src/__tests__/plugin-skill-contribution.test.ts +367 -0
  102. package/src/__tests__/plugin-tool-contribution.test.ts +286 -0
  103. package/src/__tests__/plugin-types.test.ts +320 -0
  104. package/src/__tests__/pricing.test.ts +44 -12
  105. package/src/__tests__/proxy-approval-callback.test.ts +69 -8
  106. package/src/__tests__/reaction-persistence.test.ts +1 -0
  107. package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +1 -0
  108. package/src/__tests__/registry.test.ts +0 -2
  109. package/src/__tests__/schedule-routes.test.ts +131 -1
  110. package/src/__tests__/scheduler-recurrence.test.ts +14 -70
  111. package/src/__tests__/scheduler-reuse-conversation.test.ts +10 -50
  112. package/src/__tests__/secret-detection-handler.test.ts +0 -10
  113. package/src/__tests__/shell-identity.test.ts +0 -134
  114. package/src/__tests__/suggestion-routes.test.ts +103 -4
  115. package/src/__tests__/task-memory-cleanup.test.ts +1 -0
  116. package/src/__tests__/task-scheduler.test.ts +3 -15
  117. package/src/__tests__/test-preload.ts +11 -0
  118. package/src/__tests__/title-generate-pipeline.test.ts +224 -0
  119. package/src/__tests__/token-estimate-pipeline.test.ts +431 -0
  120. package/src/__tests__/tool-error-pipeline.test.ts +244 -0
  121. package/src/__tests__/tool-execute-pipeline.test.ts +431 -0
  122. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -6
  123. package/src/__tests__/tool-executor-shell-integration.test.ts +7 -10
  124. package/src/__tests__/tool-executor.test.ts +141 -0
  125. package/src/__tests__/tool-result-truncate-pipeline.test.ts +356 -0
  126. package/src/__tests__/tool-result-truncation.test.ts +0 -110
  127. package/src/__tests__/user-plugin-loader.test.ts +191 -0
  128. package/src/__tests__/workspace-migration-046-seed-conversation-starters-callsite.test.ts +185 -0
  129. package/src/__tests__/workspace-migration-049-release-notes-default-sonnet.test.ts +100 -0
  130. package/src/__tests__/workspace-migration-050-seed-main-agent-opus-callsite.test.ts +171 -0
  131. package/src/__tests__/workspace-migration-051-seed-conversation-summarization-callsite.test.ts +252 -0
  132. package/src/__tests__/workspace-migration-remove-hooks.test.ts +99 -0
  133. package/src/__tests__/workspace-policy.test.ts +21 -3
  134. package/src/agent/loop.ts +340 -102
  135. package/src/approvals/__tests__/guardian-feed-event.test.ts +304 -0
  136. package/src/approvals/guardian-request-resolvers.ts +80 -0
  137. package/src/backup/__tests__/backup-worker.test.ts +2 -13
  138. package/src/backup/backup-worker.ts +3 -15
  139. package/src/bundler/app-compiler.ts +84 -1
  140. package/src/calls/call-state.ts +2 -2
  141. package/src/channels/__tests__/types.test.ts +3 -3
  142. package/src/channels/types.ts +6 -4
  143. package/src/cli/__tests__/notifications.test.ts +87 -211
  144. package/src/cli/commands/__tests__/backup.test.ts +1 -1
  145. package/src/cli/commands/__tests__/image-generation.test.ts +255 -35
  146. package/src/cli/commands/__tests__/inference-send.test.ts +12 -0
  147. package/src/cli/commands/__tests__/tts-synthesize.test.ts +12 -0
  148. package/src/cli/commands/backup.ts +2 -2
  149. package/src/cli/commands/clients.ts +138 -0
  150. package/src/cli/commands/completions.ts +2 -9
  151. package/src/cli/commands/conversations.ts +55 -7
  152. package/src/cli/commands/image-generation.ts +33 -34
  153. package/src/cli/commands/notifications.ts +68 -103
  154. package/src/cli/commands/oauth/__tests__/providers-register.test.ts +1 -1
  155. package/src/cli/commands/oauth/__tests__/providers-update.test.ts +1 -1
  156. package/src/cli/commands/oauth/connect.ts +2 -2
  157. package/src/cli/commands/oauth/providers.ts +176 -8
  158. package/src/cli/commands/oauth/status.ts +46 -36
  159. package/src/cli/commands/skills.ts +3 -4
  160. package/src/cli/program.ts +25 -29
  161. package/src/config/__tests__/backup-schema.test.ts +7 -2
  162. package/src/config/bundled-skills/app-builder/SKILL.md +2 -2
  163. package/src/config/bundled-skills/app-builder/references/WIDGETS.md +10 -10
  164. package/src/config/bundled-skills/contacts/tools/contact-merge.ts +66 -87
  165. package/src/config/bundled-skills/contacts/tools/contact-search.ts +28 -51
  166. package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +22 -40
  167. package/src/config/bundled-skills/image-studio/SKILL.md +2 -1
  168. package/src/config/bundled-skills/image-studio/TOOLS.json +2 -1
  169. package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +23 -39
  170. package/src/config/bundled-skills/messaging/SKILL.md +3 -3
  171. package/src/config/bundled-skills/messaging/tools/__tests__/messaging-feed-events.test.ts +207 -0
  172. package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +12 -0
  173. package/src/config/bundled-skills/messaging/tools/messaging-send.ts +58 -0
  174. package/src/config/bundled-skills/schedule/SKILL.md +8 -3
  175. package/src/config/bundled-skills/schedule/TOOLS.json +15 -7
  176. package/src/config/bundled-skills/schedule/references/SCRIPT_MODE_PATTERNS.md +59 -0
  177. package/src/config/bundled-tool-registry.ts +0 -15
  178. package/src/config/feature-flag-registry.json +17 -1
  179. package/src/config/schema.ts +19 -0
  180. package/src/config/schemas/backup.ts +1 -1
  181. package/src/config/schemas/conversations.ts +16 -0
  182. package/src/config/schemas/llm.ts +2 -3
  183. package/src/config/schemas/security.ts +6 -6
  184. package/src/config/schemas/tts.ts +11 -0
  185. package/src/config/skill-state.ts +6 -2
  186. package/src/config/skills.ts +94 -5
  187. package/src/context/__tests__/compact-prompt.test.ts +27 -9
  188. package/src/context/prompts/compact.md +26 -12
  189. package/src/context/tool-result-truncation.ts +3 -63
  190. package/src/context/window-manager.ts +190 -16
  191. package/src/credential-health/credential-health-service.ts +19 -6
  192. package/src/daemon/__tests__/conversation-feed-event.test.ts +317 -0
  193. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +4 -12
  194. package/src/daemon/__tests__/conversation-tool-setup.test.ts +14 -15
  195. package/src/daemon/config-watcher.ts +0 -2
  196. package/src/daemon/context-overflow-policy.ts +4 -13
  197. package/src/daemon/conversation-agent-loop-handlers.ts +83 -22
  198. package/src/daemon/conversation-agent-loop.ts +984 -683
  199. package/src/daemon/conversation-history.ts +10 -19
  200. package/src/daemon/conversation-lifecycle.ts +37 -19
  201. package/src/daemon/conversation-notifiers.ts +2 -110
  202. package/src/daemon/conversation-process.ts +14 -7
  203. package/src/daemon/conversation-runtime-assembly.ts +532 -411
  204. package/src/daemon/conversation-tool-setup.ts +41 -4
  205. package/src/daemon/conversation.ts +80 -35
  206. package/src/daemon/external-plugins-bootstrap.ts +478 -0
  207. package/src/daemon/first-greeting.ts +191 -14
  208. package/src/daemon/handlers/config-model.ts +11 -0
  209. package/src/daemon/handlers/skills.ts +5 -1
  210. package/src/daemon/lifecycle.ts +33 -68
  211. package/src/daemon/message-types/computer-use.ts +2 -34
  212. package/src/daemon/message-types/conversations.ts +49 -0
  213. package/src/daemon/message-types/messages.ts +12 -0
  214. package/src/daemon/server.ts +5 -3
  215. package/src/daemon/shutdown-handlers.ts +2 -12
  216. package/src/daemon/tool-side-effects.ts +14 -56
  217. package/src/heartbeat/__tests__/heartbeat-feed-event.test.ts +160 -0
  218. package/src/heartbeat/heartbeat-service.ts +24 -1
  219. package/src/home/__tests__/feed-population-integration.test.ts +312 -0
  220. package/src/home/emit-feed-event.ts +7 -0
  221. package/src/home/feed-types.ts +41 -2
  222. package/src/home/rewrite-command-preview.ts +66 -0
  223. package/src/ipc/__tests__/socket-path.test.ts +11 -50
  224. package/src/ipc/cli-client.ts +1 -1
  225. package/src/ipc/cli-server.ts +3 -3
  226. package/src/ipc/gateway-client.ts +4 -1
  227. package/src/ipc/routes/browser-context.ts +2 -0
  228. package/src/ipc/routes/browser.ts +1 -0
  229. package/src/ipc/routes/get-contact.ts +16 -0
  230. package/src/ipc/routes/index.ts +14 -0
  231. package/src/ipc/routes/list-clients.ts +31 -0
  232. package/src/ipc/routes/merge-contacts.ts +17 -0
  233. package/src/ipc/routes/notification.ts +133 -0
  234. package/src/ipc/routes/rename-conversation.ts +59 -0
  235. package/src/ipc/routes/search-contacts.ts +19 -0
  236. package/src/ipc/routes/upsert-contact.ts +25 -0
  237. package/src/ipc/socket-path.ts +14 -38
  238. package/src/media/app-icon-generator.ts +23 -46
  239. package/src/media/avatar-router.ts +26 -41
  240. package/src/media/gemini-image-service.ts +8 -41
  241. package/src/media/image-credentials.ts +73 -0
  242. package/src/media/image-service.ts +85 -0
  243. package/src/media/openai-image-service.ts +131 -0
  244. package/src/media/types.ts +46 -0
  245. package/src/memory/conversation-crud.ts +48 -18
  246. package/src/memory/conversation-queries.ts +57 -4
  247. package/src/memory/conversation-title-service.ts +25 -0
  248. package/src/memory/db-init.ts +8 -0
  249. package/src/memory/embedding-gemini.test.ts +41 -2
  250. package/src/memory/embedding-gemini.ts +6 -1
  251. package/src/memory/graph/bootstrap.test.ts +282 -0
  252. package/src/memory/graph/bootstrap.ts +8 -5
  253. package/src/memory/graph/extraction.ts +10 -2
  254. package/src/memory/graph/graph-search.test.ts +1 -0
  255. package/src/memory/graph/inspect.ts +2 -2
  256. package/src/memory/graph/retriever.ts +10 -3
  257. package/src/memory/migrations/041-approval-prompt-ts-tracker.ts +26 -0
  258. package/src/memory/migrations/149-oauth-tables.ts +1 -0
  259. package/src/memory/migrations/223-schedule-script-column.ts +11 -0
  260. package/src/memory/migrations/224-oauth-providers-managed-service-is-paid.ts +24 -0
  261. package/src/memory/migrations/225-oauth-providers-available-scopes.ts +13 -0
  262. package/src/memory/migrations/index.ts +4 -0
  263. package/src/memory/pkb/pkb-index.test.ts +1 -0
  264. package/src/memory/pkb/pkb-reconcile.test.ts +1 -0
  265. package/src/memory/pkb/pkb-search.test.ts +65 -4
  266. package/src/memory/pkb/pkb-search.ts +40 -18
  267. package/src/memory/qdrant-client.test.ts +60 -0
  268. package/src/memory/qdrant-client.ts +25 -0
  269. package/src/memory/schema/infrastructure.ts +1 -0
  270. package/src/memory/schema/oauth.ts +4 -1
  271. package/src/messaging/providers/slack/render-transcript.test.ts +77 -29
  272. package/src/messaging/providers/slack/render-transcript.ts +58 -0
  273. package/src/notifications/conversation-pairing.ts +78 -19
  274. package/src/notifications/copy-composer.ts +0 -5
  275. package/src/notifications/emit-signal.ts +1 -1
  276. package/src/notifications/signal.ts +1 -2
  277. package/src/oauth/AGENTS.md +1 -1
  278. package/src/oauth/__tests__/identity-verifier.test.ts +2 -1
  279. package/src/oauth/connect-orchestrator.ts +8 -34
  280. package/src/oauth/connect-types.ts +6 -10
  281. package/src/oauth/manual-token-connection.ts +23 -0
  282. package/src/oauth/oauth-store.ts +30 -14
  283. package/src/oauth/provider-serializer.ts +6 -1
  284. package/src/oauth/seed-providers.ts +56 -108
  285. package/src/outbound-proxy/http-forwarder.ts +9 -0
  286. package/src/permissions/approval-policy.test.ts +293 -18
  287. package/src/permissions/approval-policy.ts +110 -58
  288. package/src/permissions/arg-parser.test.ts +161 -0
  289. package/src/permissions/arg-parser.ts +141 -0
  290. package/src/permissions/bash-risk-classifier.test.ts +414 -2
  291. package/src/permissions/bash-risk-classifier.ts +303 -60
  292. package/src/permissions/checker.ts +157 -29
  293. package/src/permissions/command-registry.test.ts +239 -0
  294. package/src/permissions/command-registry.ts +234 -54
  295. package/src/permissions/defaults.ts +5 -4
  296. package/src/permissions/gateway-threshold-reader.ts +196 -0
  297. package/src/permissions/prompter.ts +4 -0
  298. package/src/permissions/risk-types.ts +61 -4
  299. package/src/permissions/schedule-risk-classifier.test.ts +129 -0
  300. package/src/permissions/schedule-risk-classifier.ts +85 -0
  301. package/src/permissions/shell-identity.ts +2 -42
  302. package/src/permissions/types.ts +2 -0
  303. package/src/permissions/workspace-policy.ts +8 -3
  304. package/src/plugins/defaults/circuit-breaker.ts +146 -0
  305. package/src/plugins/defaults/compaction.ts +145 -0
  306. package/src/plugins/defaults/empty-response.ts +126 -0
  307. package/src/plugins/defaults/history-repair.ts +85 -0
  308. package/src/plugins/defaults/index.ts +116 -0
  309. package/src/plugins/defaults/injectors.ts +491 -0
  310. package/src/plugins/defaults/llm-call.ts +82 -0
  311. package/src/plugins/defaults/memory-retrieval.ts +226 -0
  312. package/src/plugins/defaults/overflow-reduce.ts +181 -0
  313. package/src/plugins/defaults/persistence.ts +129 -0
  314. package/src/plugins/defaults/title-generate.ts +95 -0
  315. package/src/plugins/defaults/token-estimate.ts +104 -0
  316. package/src/plugins/defaults/tool-error.ts +126 -0
  317. package/src/plugins/defaults/tool-execute.ts +89 -0
  318. package/src/plugins/defaults/tool-result-truncate.ts +88 -0
  319. package/src/plugins/pipeline.ts +316 -0
  320. package/src/plugins/plugin-skill-contributions.ts +292 -0
  321. package/src/plugins/registry.ts +241 -0
  322. package/src/plugins/types.ts +1134 -0
  323. package/src/plugins/user-loader.ts +177 -0
  324. package/src/prompts/templates/BOOTSTRAP.md +27 -77
  325. package/src/providers/model-catalog.ts +52 -29
  326. package/src/providers/model-intents.ts +1 -1
  327. package/src/providers/openrouter/client.ts +5 -1
  328. package/src/providers/speech-to-text/deepgram-realtime.test.ts +61 -0
  329. package/src/providers/speech-to-text/deepgram-realtime.ts +57 -0
  330. package/src/providers/speech-to-text/xai-realtime.test.ts +72 -4
  331. package/src/providers/speech-to-text/xai-realtime.ts +39 -14
  332. package/src/runtime/AGENTS.md +25 -16
  333. package/src/runtime/__tests__/browser-extension-pair-routes.test.ts +3 -3
  334. package/src/runtime/__tests__/client-registry.test.ts +293 -0
  335. package/src/runtime/client-registry.ts +261 -0
  336. package/src/runtime/http-server.ts +77 -8
  337. package/src/runtime/http-types.ts +0 -2
  338. package/src/runtime/migrations/vbundle-builder.ts +1 -22
  339. package/src/runtime/routes/approval-prompt-ts-tracker.ts +51 -31
  340. package/src/runtime/routes/approval-routes.ts +17 -0
  341. package/src/runtime/routes/browser-extension-pair-routes.ts +27 -8
  342. package/src/runtime/routes/conversation-routes.ts +223 -116
  343. package/src/runtime/routes/inbound-message-handler.ts +88 -13
  344. package/src/runtime/routes/memory-item-routes.test.ts +1 -0
  345. package/src/runtime/routes/migration-routes.ts +0 -3
  346. package/src/runtime/routes/playground/__tests__/force-compact.test.ts +284 -0
  347. package/src/runtime/routes/playground/__tests__/guard.test.ts +80 -0
  348. package/src/runtime/routes/playground/__tests__/inject-failures.test.ts +294 -0
  349. package/src/runtime/routes/playground/__tests__/reset-circuit.test.ts +271 -0
  350. package/src/runtime/routes/playground/__tests__/seed-conversation.test.ts +202 -0
  351. package/src/runtime/routes/playground/__tests__/seeded-conversations.test.ts +309 -0
  352. package/src/runtime/routes/playground/__tests__/state.test.ts +224 -0
  353. package/src/runtime/routes/playground/conversation-not-found.ts +29 -0
  354. package/src/runtime/routes/playground/deps.ts +56 -0
  355. package/src/runtime/routes/playground/force-compact.ts +73 -0
  356. package/src/runtime/routes/playground/guard.ts +37 -0
  357. package/src/runtime/routes/playground/index.ts +28 -0
  358. package/src/runtime/routes/playground/inject-failures.ts +159 -0
  359. package/src/runtime/routes/playground/reset-circuit.ts +115 -0
  360. package/src/runtime/routes/playground/seed-conversation.ts +139 -0
  361. package/src/runtime/routes/playground/seeded-conversations.ts +78 -0
  362. package/src/runtime/routes/playground/state.ts +78 -0
  363. package/src/runtime/routes/schedule-routes.ts +89 -8
  364. package/src/runtime/skill-route-registry.ts +75 -15
  365. package/src/schedule/run-script.ts +68 -0
  366. package/src/schedule/schedule-store.ts +7 -1
  367. package/src/schedule/scheduler.ts +48 -8
  368. package/src/skills/catalog-cache.ts +12 -5
  369. package/src/tools/browser/__tests__/browser-status.test.ts +189 -0
  370. package/src/tools/browser/browser-execution.ts +88 -19
  371. package/src/tools/browser/cdp-client/__tests__/extension-cdp-client.test.ts +230 -0
  372. package/src/tools/browser/cdp-client/__tests__/factory.test.ts +146 -3
  373. package/src/tools/browser/cdp-client/extension-cdp-client.ts +54 -3
  374. package/src/tools/browser/cdp-client/factory.ts +15 -4
  375. package/src/tools/executor.ts +126 -74
  376. package/src/tools/network/script-proxy/session-manager.ts +37 -1
  377. package/src/tools/permission-checker.ts +98 -49
  378. package/src/tools/policy-context.ts +4 -0
  379. package/src/tools/registry.ts +140 -3
  380. package/src/tools/schedule/create.ts +23 -8
  381. package/src/tools/schedule/update.ts +3 -1
  382. package/src/tools/secret-detection-handler.ts +0 -51
  383. package/src/tools/system/avatar-generator.ts +6 -2
  384. package/src/tools/types.ts +28 -2
  385. package/src/util/platform.ts +7 -2
  386. package/src/util/pricing.ts +26 -3
  387. package/src/workspace/migrations/006-services-config.ts +2 -4
  388. package/src/workspace/migrations/022-move-hooks-to-workspace.ts +2 -3
  389. package/src/workspace/migrations/041-backfill-google-gmail-settings-scope.ts +3 -4
  390. package/src/workspace/migrations/046-seed-conversation-starters-callsite.ts +108 -0
  391. package/src/workspace/migrations/047-remove-watch-callsites.ts +54 -0
  392. package/src/workspace/migrations/048-remove-workspace-hooks.ts +81 -0
  393. package/src/workspace/migrations/049-release-notes-default-sonnet.ts +80 -0
  394. package/src/workspace/migrations/050-seed-main-agent-opus-callsite.ts +86 -0
  395. package/src/workspace/migrations/051-seed-conversation-summarization-callsite.ts +128 -0
  396. package/src/workspace/migrations/registry.ts +12 -0
  397. package/tsconfig.json +1 -1
  398. package/hook-templates/debug-prompt-logger/hook.json +0 -7
  399. package/hook-templates/debug-prompt-logger/run.sh +0 -66
  400. package/src/__tests__/compaction-circuit-breaker.test.ts +0 -336
  401. package/src/__tests__/context-overflow-approval.test.ts +0 -156
  402. package/src/__tests__/hooks-blocking.test.ts +0 -178
  403. package/src/__tests__/hooks-cli.test.ts +0 -182
  404. package/src/__tests__/hooks-config.test.ts +0 -108
  405. package/src/__tests__/hooks-discovery.test.ts +0 -211
  406. package/src/__tests__/hooks-integration.test.ts +0 -196
  407. package/src/__tests__/hooks-manager.test.ts +0 -226
  408. package/src/__tests__/hooks-runner.test.ts +0 -175
  409. package/src/__tests__/hooks-settings.test.ts +0 -160
  410. package/src/__tests__/hooks-templates.test.ts +0 -169
  411. package/src/__tests__/hooks-ts-runner.test.ts +0 -170
  412. package/src/__tests__/hooks-watch.test.ts +0 -112
  413. package/src/__tests__/notification-schedule-dedup.test.ts +0 -213
  414. package/src/__tests__/oauth-scope-policy.test.ts +0 -180
  415. package/src/__tests__/send-notification-tool.test.ts +0 -83
  416. package/src/cli/commands/shotgun.ts +0 -266
  417. package/src/config/bundled-skills/conversations/SKILL.md +0 -20
  418. package/src/config/bundled-skills/conversations/TOOLS.json +0 -23
  419. package/src/config/bundled-skills/conversations/tools/rename-conversation.ts +0 -88
  420. package/src/config/bundled-skills/heartbeat/SKILL.md +0 -43
  421. package/src/config/bundled-skills/notifications/SKILL.md +0 -40
  422. package/src/config/bundled-skills/notifications/TOOLS.json +0 -80
  423. package/src/config/bundled-skills/notifications/tools/send-notification.ts +0 -152
  424. package/src/config/bundled-skills/notifications/tools/shared.ts +0 -13
  425. package/src/config/bundled-skills/screen-watch/SKILL.md +0 -27
  426. package/src/config/bundled-skills/screen-watch/TOOLS.json +0 -35
  427. package/src/config/bundled-skills/screen-watch/tools/start-screen-watch.ts +0 -12
  428. package/src/config/bundled-skills/skills-catalog/SKILL.md +0 -84
  429. package/src/daemon/context-overflow-approval.ts +0 -52
  430. package/src/daemon/watch-handler.ts +0 -399
  431. package/src/hooks/cli.ts +0 -253
  432. package/src/hooks/config.ts +0 -100
  433. package/src/hooks/discovery.ts +0 -135
  434. package/src/hooks/manager.ts +0 -179
  435. package/src/hooks/runner.ts +0 -117
  436. package/src/hooks/templates.ts +0 -77
  437. package/src/hooks/types.ts +0 -75
  438. package/src/oauth/scope-policy.ts +0 -89
  439. package/src/runtime/gateway-internal-client.ts +0 -94
  440. package/src/runtime/routes/watch-routes.ts +0 -156
  441. package/src/signals/shotgun.ts +0 -203
  442. package/src/tools/watch/screen-watch.ts +0 -144
  443. package/src/tools/watch/watch-state.ts +0 -142
@@ -0,0 +1,128 @@
1
+ import { existsSync, readFileSync, writeFileSync } from "node:fs";
2
+ import { join } from "node:path";
3
+
4
+ import type { WorkspaceMigration } from "./types.js";
5
+
6
+ /**
7
+ * Seed a sensible default for the `conversationSummarization` LLM call site.
8
+ *
9
+ * `conversationSummarization` is invoked from `ContextWindowManager.
10
+ * updateSummary()` during mid-loop compaction. Without a call-site entry it
11
+ * falls through to `llm.default` (opus + `effort: "max"` + `thinking:
12
+ * { enabled: true }` + `maxTokens: 64000`), which is far too expensive for
13
+ * summarizing a ~150k-token transcript inside the agent-loop plugin
14
+ * pipeline's 30s budget — we were hitting `PluginTimeoutError` and hard-
15
+ * failing the turn.
16
+ *
17
+ * This migration seeds `effort: "low"` and `thinking: { enabled: false }`
18
+ * (and opus-4.7 as the model when absent) so the summary call runs cheaply
19
+ * inside budget. Existing user-set fields are preserved — if the user has
20
+ * explicitly configured `effort` or `thinking` for this call site, we do
21
+ * not touch those values. Follows the pattern established by migrations
22
+ * 040 and 046 but merges additively instead of skip-when-present, because
23
+ * migration 038 may have already seeded a bare `{ model: ... }` entry that
24
+ * still needs `effort` / `thinking` defaults to avoid the same fallthrough
25
+ * to the expensive default.
26
+ *
27
+ * - Skip entirely when `VELLUM_DEFAULT_WORKSPACE_CONFIG_PATH` is set
28
+ * (platform overlay owns call-site seeds).
29
+ * - Skip when the resolved provider is not Anthropic / OpenRouter (the
30
+ * seeded model IDs are Anthropic-shaped; mixing with another provider
31
+ * would guarantee invalid-model errors).
32
+ * - Merge-missing semantics per leaf: never overwrite user-set values.
33
+ *
34
+ * Idempotent: re-running after all leaves are populated is a no-op.
35
+ */
36
+ export const seedConversationSummarizationCallsiteMigration: WorkspaceMigration =
37
+ {
38
+ id: "051-seed-conversation-summarization-callsite",
39
+ description:
40
+ "Seed conversationSummarization LLM call-site defaults so summary runs stay inside the agent-loop budget",
41
+ run(workspaceDir: string): void {
42
+ if (process.env.VELLUM_DEFAULT_WORKSPACE_CONFIG_PATH) return;
43
+
44
+ const configPath = join(workspaceDir, "config.json");
45
+ const configExisted = existsSync(configPath);
46
+
47
+ let config: Record<string, unknown> = {};
48
+ if (configExisted) {
49
+ try {
50
+ const raw = JSON.parse(readFileSync(configPath, "utf-8"));
51
+ if (!raw || typeof raw !== "object" || Array.isArray(raw)) return;
52
+ config = raw as Record<string, unknown>;
53
+ } catch {
54
+ return;
55
+ }
56
+ }
57
+
58
+ const llm = readObject(config.llm) ?? {};
59
+ const defaultBlock = readObject(llm.default);
60
+
61
+ const explicitProvider = readString(defaultBlock?.provider);
62
+ if (
63
+ explicitProvider !== undefined &&
64
+ explicitProvider !== "anthropic" &&
65
+ explicitProvider !== "openrouter"
66
+ ) {
67
+ return;
68
+ }
69
+ const provider = explicitProvider ?? "anthropic";
70
+ const qualityModel = resolveQualityModel(provider);
71
+ if (qualityModel === undefined) return;
72
+
73
+ const callSites = readObject(llm.callSites) ?? {};
74
+ const existing = readObject(callSites.conversationSummarization) ?? {};
75
+
76
+ // Merge-missing per leaf. Presence of the key — even with a value of
77
+ // `false` — counts as user intent and is preserved.
78
+ const seeded: Record<string, unknown> = { ...existing };
79
+ let changed = false;
80
+ if (!("model" in seeded)) {
81
+ seeded.model = qualityModel;
82
+ changed = true;
83
+ }
84
+ if (!("effort" in seeded)) {
85
+ seeded.effort = "low";
86
+ changed = true;
87
+ }
88
+ if (!("thinking" in seeded)) {
89
+ seeded.thinking = { enabled: false };
90
+ changed = true;
91
+ }
92
+
93
+ if (!changed) return;
94
+
95
+ callSites.conversationSummarization = seeded;
96
+ llm.callSites = callSites;
97
+ config.llm = llm;
98
+ writeFileSync(configPath, JSON.stringify(config, null, 2) + "\n");
99
+ },
100
+ down(_workspaceDir: string): void {
101
+ // Forward-only: removing the seeded defaults would reintroduce the
102
+ // 30s pipeline-budget timeout this migration fixes.
103
+ },
104
+ };
105
+
106
+ // ---------------------------------------------------------------------------
107
+ // Helpers — self-contained per workspace migrations AGENTS.md
108
+ // ---------------------------------------------------------------------------
109
+
110
+ const PROVIDER_QUALITY_MODELS: Record<string, string> = {
111
+ anthropic: "claude-opus-4-7",
112
+ openrouter: "anthropic/claude-opus-4.7",
113
+ };
114
+
115
+ function resolveQualityModel(provider: string): string | undefined {
116
+ return PROVIDER_QUALITY_MODELS[provider];
117
+ }
118
+
119
+ function readObject(value: unknown): Record<string, unknown> | null {
120
+ if (value === null || typeof value !== "object" || Array.isArray(value)) {
121
+ return null;
122
+ }
123
+ return value as Record<string, unknown>;
124
+ }
125
+
126
+ function readString(value: unknown): string | undefined {
127
+ return typeof value === "string" && value.length > 0 ? value : undefined;
128
+ }
@@ -43,6 +43,12 @@ import { fixBackfillGoogleGmailSettingsScopeMigration } from "./042-fix-backfill
43
43
  import { releaseNotesLatexRenderingMigration } from "./043-release-notes-latex-rendering.js";
44
44
  import { bumpStaleProviderStreamTimeoutMigration } from "./044-bump-stale-provider-stream-timeout.js";
45
45
  import { releaseNotesMeetAvatarMigration } from "./045-release-notes-meet-avatar.js";
46
+ import { seedConversationStartersCallsiteMigration } from "./046-seed-conversation-starters-callsite.js";
47
+ import { removeWatchCallsitesMigration } from "./047-remove-watch-callsites.js";
48
+ import { removeWorkspaceHooksMigration } from "./048-remove-workspace-hooks.js";
49
+ import { releaseNotesDefaultSonnetMigration } from "./049-release-notes-default-sonnet.js";
50
+ import { seedMainAgentOpusCallsiteMigration } from "./050-seed-main-agent-opus-callsite.js";
51
+ import { seedConversationSummarizationCallsiteMigration } from "./051-seed-conversation-summarization-callsite.js";
46
52
  import { migrateToWorkspaceVolumeMigration } from "./migrate-to-workspace-volume.js";
47
53
  import type { WorkspaceMigration } from "./types.js";
48
54
 
@@ -97,4 +103,10 @@ export const WORKSPACE_MIGRATIONS: WorkspaceMigration[] = [
97
103
  releaseNotesLatexRenderingMigration,
98
104
  bumpStaleProviderStreamTimeoutMigration,
99
105
  releaseNotesMeetAvatarMigration,
106
+ seedConversationStartersCallsiteMigration,
107
+ removeWatchCallsitesMigration,
108
+ removeWorkspaceHooksMigration,
109
+ releaseNotesDefaultSonnetMigration,
110
+ seedMainAgentOpusCallsiteMigration,
111
+ seedConversationSummarizationCallsiteMigration,
100
112
  ];
package/tsconfig.json CHANGED
@@ -15,6 +15,6 @@
15
15
  "jsx": "react-jsx",
16
16
  "types": ["bun-types"]
17
17
  },
18
- "include": ["src/**/*", "src/**/*.tsx"],
18
+ "include": ["src/**/*", "src/**/*.tsx", "examples/**/*.ts"],
19
19
  "exclude": ["node_modules", "dist", "drizzle"]
20
20
  }
@@ -1,7 +0,0 @@
1
- {
2
- "name": "debug-prompt-logger",
3
- "description": "Logs system prompt and conversation history to stderr before each LLM call",
4
- "version": "1.0.0",
5
- "events": ["pre-llm-call"],
6
- "script": "run.sh"
7
- }
@@ -1,66 +0,0 @@
1
- #!/usr/bin/env bash
2
- # Debug Prompt Logger — prints the system prompt and conversation
3
- # history before each LLM call. Runs whenever the hook is installed.
4
-
5
- data=$(cat)
6
-
7
- echo "" >&2
8
- echo "════════════════════════════════════════════════════════════════" >&2
9
- echo " PRE-LLM-CALL" >&2
10
- echo "════════════════════════════════════════════════════════════════" >&2
11
- echo "" >&2
12
-
13
- # Total output cap (bytes) — prevents unbounded stderr when jq is missing.
14
- MAX_OUTPUT=200000
15
-
16
- if ! command -v jq >/dev/null 2>&1; then
17
- echo "(jq not found — install jq for formatted output)" >&2
18
- printf '%s' "$data" | head -c "$MAX_OUTPUT" >&2
19
- echo "" >&2
20
- echo "════════════════════════════════════════════════════════════════" >&2
21
- echo "" >&2
22
- exit 0
23
- fi
24
-
25
- # System prompt (capped at 5000 chars to avoid flooding stderr)
26
- echo "── System Prompt ──────────────────────────────────────────────" >&2
27
- printf '%s' "$data" | jq -r '.systemPrompt // "N/A" | if length > 5000 then .[:5000] + "\n…[truncated]" else . end' >&2
28
- echo "" >&2
29
- echo "" >&2
30
-
31
- # Message count and model
32
- model=$(printf '%s' "$data" | jq -r '.model // "unknown"')
33
- msgCount=$(printf '%s' "$data" | jq '.messages | length')
34
- toolCount=$(printf '%s' "$data" | jq -r '.toolCount // 0')
35
- echo "Model: $model | Messages: $msgCount | Tools: $toolCount" >&2
36
- echo "" >&2
37
-
38
- # All messages — truncate per-field to keep output bounded.
39
- # Max chars per text/tool field (images/files already capped at 1000).
40
- MAX_FIELD=2000
41
-
42
- echo "── Messages ───────────────────────────────────────────────────" >&2
43
- printf '%s' "$data" | jq -r --argjson maxf "$MAX_FIELD" '
44
- def trunc(n): if length > n then .[:n] + "…[truncated]" else . end;
45
- .messages[] |
46
- "\(.role): " + (
47
- if (.content | type) == "string" then
48
- .content | trunc($maxf)
49
- elif (.content | type) == "array" then
50
- [.content[] |
51
- if .type == "text" then (.text | trunc($maxf))
52
- elif .type == "image" then "[image: \(.source.media_type // "unknown")] \(.source.data[:1000])..."
53
- elif .type == "file" then "[file: \(.source.filename // "unknown")] \(.source.data[:1000])..."
54
- elif .type == "tool_use" then "[tool_use: \(.name)] \(.input | tostring | trunc($maxf))"
55
- elif .type == "tool_result" then "[tool_result: \(.content // "" | tostring | trunc($maxf))]"
56
- else "[" + .type + "] " + (. | tostring | trunc($maxf))
57
- end
58
- ] | join(" | ")
59
- else
60
- "(empty)"
61
- end
62
- )
63
- ' >&2
64
- echo "" >&2
65
- echo "════════════════════════════════════════════════════════════════" >&2
66
- echo "" >&2
@@ -1,336 +0,0 @@
1
- /**
2
- * Circuit-breaker tests for the compaction path.
3
- *
4
- * These exercise the tiny helpers (`isCompactionCircuitOpen`,
5
- * `trackCompactionOutcome`) that `conversation-agent-loop.ts` uses at every
6
- * `maybeCompact()` call site. Covering the helpers — rather than wiring up a
7
- * full `Conversation` — keeps the test fast and isolates the breaker logic
8
- * from the rest of the loop, which is where bugs actually hide.
9
- *
10
- * Acceptance criteria:
11
- * (a) counter increments on `summaryFailed`
12
- * (b) circuit opens after exactly 3 failures
13
- * (c) successful compaction resets counter and circuit
14
- * (d) open circuit skips auto-compaction but admits `force: true`
15
- * (e) circuit re-opens after cooldown expiry when 3 more failures accumulate
16
- * (f) call sites guard `undefined summaryFailed` so early returns do not
17
- * reset the counter
18
- * (g) forceCompact-style tracking: resets counter on success, increments on
19
- * failure, preserves state on early returns
20
- */
21
- import { afterEach, beforeEach, describe, expect, test } from "bun:test";
22
-
23
- import {
24
- isCompactionCircuitOpen,
25
- trackCompactionOutcome,
26
- } from "../daemon/conversation-agent-loop.js";
27
- import type { ServerMessage } from "../daemon/message-protocol.js";
28
-
29
- interface BreakerState {
30
- consecutiveCompactionFailures: number;
31
- compactionCircuitOpenUntil: number | null;
32
- }
33
-
34
- function makeState(): BreakerState {
35
- return {
36
- consecutiveCompactionFailures: 0,
37
- compactionCircuitOpenUntil: null,
38
- };
39
- }
40
-
41
- function collectEvents(): {
42
- events: ServerMessage[];
43
- onEvent: (msg: ServerMessage) => void;
44
- } {
45
- const events: ServerMessage[] = [];
46
- return { events, onEvent: (msg) => events.push(msg) };
47
- }
48
-
49
- describe("compaction circuit breaker", () => {
50
- let originalDateNow: () => number;
51
-
52
- beforeEach(() => {
53
- originalDateNow = Date.now;
54
- });
55
-
56
- afterEach(() => {
57
- Date.now = originalDateNow;
58
- });
59
-
60
- test("(a) counter increments on each summaryFailed outcome", () => {
61
- const state = makeState();
62
- const { onEvent, events } = collectEvents();
63
-
64
- trackCompactionOutcome(state, true, onEvent);
65
- expect(state.consecutiveCompactionFailures).toBe(1);
66
- expect(state.compactionCircuitOpenUntil).toBeNull();
67
- expect(events).toHaveLength(0);
68
-
69
- trackCompactionOutcome(state, true, onEvent);
70
- expect(state.consecutiveCompactionFailures).toBe(2);
71
- expect(state.compactionCircuitOpenUntil).toBeNull();
72
- expect(events).toHaveLength(0);
73
- });
74
-
75
- test("(b) circuit opens after exactly 3 consecutive failures", () => {
76
- const fixedNow = 1_700_000_000_000;
77
- Date.now = () => fixedNow;
78
-
79
- const state = makeState();
80
- const { onEvent, events } = collectEvents();
81
-
82
- trackCompactionOutcome(state, true, onEvent);
83
- trackCompactionOutcome(state, true, onEvent);
84
- // Two failures — circuit still closed.
85
- expect(state.compactionCircuitOpenUntil).toBeNull();
86
- expect(events).toHaveLength(0);
87
-
88
- trackCompactionOutcome(state, true, onEvent);
89
- // Third failure — circuit trips and fires the event exactly once.
90
- expect(state.consecutiveCompactionFailures).toBe(3);
91
- expect(state.compactionCircuitOpenUntil).toBe(fixedNow + 60 * 60 * 1000);
92
- expect(events).toHaveLength(1);
93
- expect(events[0]).toEqual({
94
- type: "compaction_circuit_open",
95
- reason: "3_consecutive_failures",
96
- openUntil: fixedNow + 60 * 60 * 1000,
97
- });
98
-
99
- // Further failures do not re-fire the event while the circuit is open.
100
- trackCompactionOutcome(state, true, onEvent);
101
- expect(state.consecutiveCompactionFailures).toBe(4);
102
- expect(events).toHaveLength(1);
103
- });
104
-
105
- test("(c) successful compaction resets counter and clears circuit", () => {
106
- const fixedNow = 1_700_000_000_000;
107
- Date.now = () => fixedNow;
108
-
109
- const state = makeState();
110
- const { onEvent } = collectEvents();
111
-
112
- // Trip the breaker.
113
- trackCompactionOutcome(state, true, onEvent);
114
- trackCompactionOutcome(state, true, onEvent);
115
- trackCompactionOutcome(state, true, onEvent);
116
- expect(state.compactionCircuitOpenUntil).not.toBeNull();
117
-
118
- // Success resets state.
119
- trackCompactionOutcome(state, false, onEvent);
120
- expect(state.consecutiveCompactionFailures).toBe(0);
121
- expect(state.compactionCircuitOpenUntil).toBeNull();
122
-
123
- // `summaryFailed` undefined (never attempted the LLM call) currently
124
- // takes the "not failed" branch, which is why callers must guard the
125
- // helper with `summaryFailed !== undefined` — otherwise an early-return
126
- // `maybeCompact()` would silently reset the counter. The regression test
127
- // below documents that invariant from the caller's perspective.
128
- trackCompactionOutcome(state, undefined, onEvent);
129
- expect(state.consecutiveCompactionFailures).toBe(0);
130
- expect(state.compactionCircuitOpenUntil).toBeNull();
131
- });
132
-
133
- test("(d) isCompactionCircuitOpen reflects state and expiry", () => {
134
- const fixedNow = 1_700_000_000_000;
135
- Date.now = () => fixedNow;
136
-
137
- const state = makeState();
138
- expect(isCompactionCircuitOpen(state)).toBe(false);
139
-
140
- // Trip the breaker — now open.
141
- const { onEvent } = collectEvents();
142
- trackCompactionOutcome(state, true, onEvent);
143
- trackCompactionOutcome(state, true, onEvent);
144
- trackCompactionOutcome(state, true, onEvent);
145
- expect(isCompactionCircuitOpen(state)).toBe(true);
146
-
147
- // After cooldown expires the helper reports closed again, even without an
148
- // explicit reset — the open-until timestamp is the only source of truth
149
- // for the gate.
150
- Date.now = () => fixedNow + 60 * 60 * 1000 + 1;
151
- expect(isCompactionCircuitOpen(state)).toBe(false);
152
- });
153
-
154
- test("(d) open circuit skips auto-compaction but admits force:true", () => {
155
- // Simulate the decision the agent-loop site makes with a counter that
156
- // only increments when compaction actually runs.
157
- const fixedNow = 1_700_000_000_000;
158
- Date.now = () => fixedNow;
159
-
160
- const state = makeState();
161
- const { onEvent } = collectEvents();
162
-
163
- let compactionCalls = 0;
164
- const runCompactionIfAllowed = (opts: { force?: boolean }) => {
165
- // Mirror conversation-agent-loop.ts site 1:
166
- // auto paths gate on !isCompactionCircuitOpen(ctx);
167
- // force paths bypass the gate.
168
- if (!opts.force && isCompactionCircuitOpen(state)) {
169
- return { ran: false };
170
- }
171
- compactionCalls += 1;
172
- return { ran: true };
173
- };
174
-
175
- // Trip the breaker.
176
- trackCompactionOutcome(state, true, onEvent);
177
- trackCompactionOutcome(state, true, onEvent);
178
- trackCompactionOutcome(state, true, onEvent);
179
- expect(isCompactionCircuitOpen(state)).toBe(true);
180
-
181
- // Auto-path is skipped while the circuit is open.
182
- const autoAttempt = runCompactionIfAllowed({});
183
- expect(autoAttempt.ran).toBe(false);
184
- expect(compactionCalls).toBe(0);
185
-
186
- // Force-path always runs, even with the breaker open.
187
- const forceAttempt = runCompactionIfAllowed({ force: true });
188
- expect(forceAttempt.ran).toBe(true);
189
- expect(compactionCalls).toBe(1);
190
-
191
- // After a forced compaction succeeds, the counter resets and the circuit
192
- // closes, unblocking future auto attempts.
193
- trackCompactionOutcome(state, false, onEvent);
194
- expect(isCompactionCircuitOpen(state)).toBe(false);
195
- expect(state.consecutiveCompactionFailures).toBe(0);
196
-
197
- const autoRetry = runCompactionIfAllowed({});
198
- expect(autoRetry.ran).toBe(true);
199
- expect(compactionCalls).toBe(2);
200
- });
201
-
202
- test("(e) circuit re-opens after cooldown expiry when 3 more failures accumulate", () => {
203
- // Regression: before the fix, `trackCompactionOutcome` required
204
- // `compactionCircuitOpenUntil === null` to open the circuit. Once a
205
- // cooldown expired, `isCompactionCircuitOpen()` correctly reported
206
- // "closed" but the stale past-timestamp stayed on the state, so the
207
- // next 3-strike window could never trip a new cooldown. The fix
208
- // treats any expired timestamp the same as null.
209
- const t0 = 1_700_000_000_000;
210
- Date.now = () => t0;
211
-
212
- const state = makeState();
213
- const { onEvent, events } = collectEvents();
214
-
215
- // Trip the breaker the first time.
216
- trackCompactionOutcome(state, true, onEvent);
217
- trackCompactionOutcome(state, true, onEvent);
218
- trackCompactionOutcome(state, true, onEvent);
219
- expect(state.compactionCircuitOpenUntil).toBe(t0 + 60 * 60 * 1000);
220
- expect(events).toHaveLength(1);
221
-
222
- // Advance past the cooldown window. Manually reset the counter — in
223
- // production this happens when a subsequent `maybeCompact()` call
224
- // succeeds (`summaryFailed: false`) after the cooldown elapses, but
225
- // the bug manifests even when the counter is reset: the stale
226
- // `compactionCircuitOpenUntil` is what breaks re-opening.
227
- const t1 = t0 + 60 * 60 * 1000 + 1;
228
- Date.now = () => t1;
229
- expect(isCompactionCircuitOpen(state)).toBe(false);
230
- state.consecutiveCompactionFailures = 0;
231
- // `compactionCircuitOpenUntil` is deliberately left as the old
232
- // timestamp to reproduce the bug condition — in practice the null
233
- // reset only happens on `summaryFailed: false`.
234
- expect(state.compactionCircuitOpenUntil).toBe(t0 + 60 * 60 * 1000);
235
-
236
- // Three more failures must trip a fresh cooldown even though the
237
- // old timestamp is still set.
238
- trackCompactionOutcome(state, true, onEvent);
239
- trackCompactionOutcome(state, true, onEvent);
240
- trackCompactionOutcome(state, true, onEvent);
241
- expect(state.consecutiveCompactionFailures).toBe(3);
242
- expect(state.compactionCircuitOpenUntil).toBe(t1 + 60 * 60 * 1000);
243
- expect(events).toHaveLength(2);
244
- expect(events[1]).toEqual({
245
- type: "compaction_circuit_open",
246
- reason: "3_consecutive_failures",
247
- openUntil: t1 + 60 * 60 * 1000,
248
- });
249
- });
250
-
251
- test("(f) call sites guard undefined summaryFailed so early returns don't reset the counter", () => {
252
- // Regression: `maybeCompact()` returns `summaryFailed: undefined` on
253
- // early-return paths (no eligible messages, below threshold, cooldown
254
- // active, truncation-only). Before the fix, the agent loop called
255
- // `trackCompactionOutcome(ctx, compacted.summaryFailed, onEvent)`
256
- // unconditionally — `undefined` took the else branch and silently
257
- // reset the 3-strike counter. Callers must now guard with
258
- // `summaryFailed !== undefined` at every call site.
259
- const state = makeState();
260
- const { onEvent } = collectEvents();
261
-
262
- // Accumulate two failures, close to tripping the breaker.
263
- trackCompactionOutcome(state, true, onEvent);
264
- trackCompactionOutcome(state, true, onEvent);
265
- expect(state.consecutiveCompactionFailures).toBe(2);
266
-
267
- // Simulate an early-return result from maybeCompact() (e.g. below
268
- // threshold) — callers must skip the tracking call entirely.
269
- const earlyReturn = {
270
- compacted: false,
271
- summaryFailed: undefined as boolean | undefined,
272
- };
273
- if (earlyReturn.summaryFailed !== undefined) {
274
- trackCompactionOutcome(state, earlyReturn.summaryFailed, onEvent);
275
- }
276
- // Counter preserved — the early return did not reset progress toward
277
- // tripping the breaker.
278
- expect(state.consecutiveCompactionFailures).toBe(2);
279
-
280
- // A third real failure then trips the breaker as expected.
281
- trackCompactionOutcome(state, true, onEvent);
282
- expect(state.consecutiveCompactionFailures).toBe(3);
283
- expect(state.compactionCircuitOpenUntil).not.toBeNull();
284
- });
285
-
286
- test("(g) forceCompact-style tracking resets counter on success, increments on failure", () => {
287
- // Regression: `Conversation.forceCompact()` previously didn't track
288
- // circuit-breaker outcomes. A successful user `/compact` wouldn't clear
289
- // an accumulating counter and a failed forced compaction wouldn't
290
- // contribute to tripping the breaker. The fix calls
291
- // `trackCompactionOutcome(this, result.summaryFailed, this.sendToClient)`
292
- // after `maybeCompact` — guarded by `summaryFailed !== undefined` so
293
- // early-return paths don't reset the counter.
294
- const state = makeState();
295
- const { onEvent } = collectEvents();
296
-
297
- // Simulate forceCompact: call maybeCompact with force:true, then
298
- // track the outcome the same way forceCompact now does.
299
- const trackForceCompact = (result: {
300
- summaryFailed?: boolean;
301
- compacted: boolean;
302
- }): void => {
303
- if (result.summaryFailed !== undefined) {
304
- trackCompactionOutcome(state, result.summaryFailed, onEvent);
305
- }
306
- };
307
-
308
- // Two failures via the auto path …
309
- trackCompactionOutcome(state, true, onEvent);
310
- trackCompactionOutcome(state, true, onEvent);
311
- expect(state.consecutiveCompactionFailures).toBe(2);
312
-
313
- // … then the user hits /compact and the forced call succeeds. This
314
- // must clear the stuck counter so the conversation isn't one
315
- // auto-failure away from a cooldown.
316
- trackForceCompact({ summaryFailed: false, compacted: true });
317
- expect(state.consecutiveCompactionFailures).toBe(0);
318
- expect(state.compactionCircuitOpenUntil).toBeNull();
319
-
320
- // Conversely, three forced failures must trip the breaker too — a
321
- // run of broken summaries is a provider-health signal regardless of
322
- // whether the caller bypassed the breaker.
323
- trackForceCompact({ summaryFailed: true, compacted: true });
324
- trackForceCompact({ summaryFailed: true, compacted: true });
325
- trackForceCompact({ summaryFailed: true, compacted: true });
326
- expect(state.consecutiveCompactionFailures).toBe(3);
327
- expect(state.compactionCircuitOpenUntil).not.toBeNull();
328
-
329
- // An early-return forceCompact (e.g. no eligible messages) must not
330
- // reset the counter — the breaker should stay open.
331
- const wasOpenUntil = state.compactionCircuitOpenUntil;
332
- trackForceCompact({ summaryFailed: undefined, compacted: false });
333
- expect(state.consecutiveCompactionFailures).toBe(3);
334
- expect(state.compactionCircuitOpenUntil).toBe(wasOpenUntil);
335
- });
336
- });