@vellumai/assistant 0.6.5 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (443) hide show
  1. package/AGENTS.md +9 -1
  2. package/ARCHITECTURE.md +15 -17
  3. package/Dockerfile +6 -4
  4. package/__tests__/permissions/gateway-threshold-reader.test.ts +283 -0
  5. package/docs/architecture/integrations.md +32 -39
  6. package/docs/architecture/memory.md +25 -30
  7. package/docs/architecture/security.md +7 -6
  8. package/docs/browser-use-architecture-phase2.md +63 -20
  9. package/docs/plugins.md +761 -0
  10. package/examples/plugins/echo/README.md +132 -0
  11. package/examples/plugins/echo/package.json +17 -0
  12. package/examples/plugins/echo/register.ts +187 -0
  13. package/node_modules/@vellumai/egress-proxy/src/types.ts +19 -0
  14. package/openapi.yaml +212 -68
  15. package/package.json +1 -1
  16. package/src/__tests__/app-compiler.test.ts +57 -0
  17. package/src/__tests__/approval-cascade.test.ts +7 -2
  18. package/src/__tests__/auto-analysis-end-to-end.test.ts +1 -0
  19. package/src/__tests__/avatar-generator.test.ts +4 -2
  20. package/src/__tests__/bundled-asset.test.ts +6 -6
  21. package/src/__tests__/catalog-cache.test.ts +69 -0
  22. package/src/__tests__/checker.test.ts +459 -171
  23. package/src/__tests__/circuit-breaker-pipeline.test.ts +406 -0
  24. package/src/__tests__/compaction-events.test.ts +501 -0
  25. package/src/__tests__/compaction-pipeline.test.ts +210 -0
  26. package/src/__tests__/compaction-strip-metadata-clear.test.ts +181 -0
  27. package/src/__tests__/compaction-timeout-recovery.test.ts +262 -0
  28. package/src/__tests__/config-model-image-provider.test.ts +110 -0
  29. package/src/__tests__/config-schema.test.ts +22 -9
  30. package/src/__tests__/config-watcher-cleanup-throttle.test.ts +0 -4
  31. package/src/__tests__/contacts-tools.test.ts +26 -0
  32. package/src/__tests__/context-overflow-policy.test.ts +7 -7
  33. package/src/__tests__/context-window-manager.test.ts +355 -4
  34. package/src/__tests__/conversation-abort-tool-results.test.ts +4 -1
  35. package/src/__tests__/conversation-agent-loop-overflow.test.ts +26 -30
  36. package/src/__tests__/conversation-agent-loop.test.ts +30 -141
  37. package/src/__tests__/conversation-confirmation-signals.test.ts +6 -1
  38. package/src/__tests__/conversation-history-web-search.test.ts +1 -0
  39. package/src/__tests__/conversation-init.benchmark.test.ts +2 -16
  40. package/src/__tests__/conversation-pairing.test.ts +174 -10
  41. package/src/__tests__/conversation-pre-run-repair.test.ts +4 -1
  42. package/src/__tests__/conversation-process-callsite.test.ts +3 -0
  43. package/src/__tests__/conversation-provider-retry-repair.test.ts +16 -7
  44. package/src/__tests__/conversation-queue.test.ts +29 -14
  45. package/src/__tests__/conversation-routes-disk-view.test.ts +7 -6
  46. package/src/__tests__/conversation-runtime-assembly.test.ts +155 -110
  47. package/src/__tests__/conversation-runtime-workspace.test.ts +23 -38
  48. package/src/__tests__/conversation-seed-composer.test.ts +2 -2
  49. package/src/__tests__/conversation-slash-queue.test.ts +7 -2
  50. package/src/__tests__/conversation-slash-unknown.test.ts +25 -2
  51. package/src/__tests__/conversation-speed-override.test.ts +6 -1
  52. package/src/__tests__/conversation-title-service.test.ts +116 -0
  53. package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +41 -2
  54. package/src/__tests__/conversation-usage.test.ts +1 -1
  55. package/src/__tests__/conversation-workspace-cache-state.test.ts +4 -1
  56. package/src/__tests__/conversation-workspace-injection.test.ts +3 -0
  57. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +4 -1
  58. package/src/__tests__/credential-health-service.test.ts +78 -9
  59. package/src/__tests__/credential-security-invariants.test.ts +2 -2
  60. package/src/__tests__/db-schedule-syntax-migration.test.ts +1 -0
  61. package/src/__tests__/empty-response-pipeline.test.ts +305 -0
  62. package/src/__tests__/extension-id-sync-guard.test.ts +3 -3
  63. package/src/__tests__/first-greeting.test.ts +247 -5
  64. package/src/__tests__/headless-browser-mode.test.ts +57 -0
  65. package/src/__tests__/history-repair-pipeline.test.ts +399 -0
  66. package/src/__tests__/host-browser-e2e-cloud.test.ts +307 -0
  67. package/src/__tests__/host-browser-e2e-self-hosted.test.ts +3 -3
  68. package/src/__tests__/host-proxy-interface.test.ts +36 -2
  69. package/src/__tests__/image-credentials.test.ts +137 -0
  70. package/src/__tests__/image-service-dispatcher.test.ts +186 -0
  71. package/src/__tests__/injector-chain.test.ts +526 -0
  72. package/src/__tests__/intent-routing.test.ts +0 -26
  73. package/src/__tests__/llm-call-pipeline.test.ts +285 -0
  74. package/src/__tests__/llm-schema.test.ts +1 -1
  75. package/src/__tests__/media-generate-image.test.ts +119 -13
  76. package/src/__tests__/memory-retrieval-pipeline.test.ts +401 -0
  77. package/src/__tests__/memory-upsert-concurrency.test.ts +1 -0
  78. package/src/__tests__/migration-import-from-url.test.ts +5 -68
  79. package/src/__tests__/model-intents.test.ts +4 -2
  80. package/src/__tests__/notification-broadcaster.test.ts +3 -3
  81. package/src/__tests__/notification-decision-strategy.test.ts +0 -11
  82. package/src/__tests__/notification-schedule-notify-dedup.test.ts +108 -0
  83. package/src/__tests__/oauth-apps-routes.test.ts +1 -1
  84. package/src/__tests__/oauth-cli.test.ts +14 -12
  85. package/src/__tests__/oauth-connect-orchestrator.test.ts +4 -13
  86. package/src/__tests__/oauth-provider-serializer.test.ts +6 -4
  87. package/src/__tests__/oauth-provider-visibility.test.ts +3 -5
  88. package/src/__tests__/oauth-providers-routes.test.ts +3 -2
  89. package/src/__tests__/oauth-store.test.ts +41 -76
  90. package/src/__tests__/onboarding-template-contract.test.ts +16 -64
  91. package/src/__tests__/openai-image-service.test.ts +368 -0
  92. package/src/__tests__/overflow-reduce-pipeline.test.ts +676 -0
  93. package/src/__tests__/permission-checker-host-gate.test.ts +0 -24
  94. package/src/__tests__/persist-onboarding-artifacts.test.ts +266 -0
  95. package/src/__tests__/persistence-pipeline.test.ts +377 -0
  96. package/src/__tests__/pipeline-runner.test.ts +565 -0
  97. package/src/__tests__/platform.test.ts +5 -2
  98. package/src/__tests__/plugin-bootstrap.test.ts +483 -0
  99. package/src/__tests__/plugin-registry.test.ts +273 -0
  100. package/src/__tests__/plugin-route-contribution.test.ts +288 -0
  101. package/src/__tests__/plugin-skill-contribution.test.ts +367 -0
  102. package/src/__tests__/plugin-tool-contribution.test.ts +286 -0
  103. package/src/__tests__/plugin-types.test.ts +320 -0
  104. package/src/__tests__/pricing.test.ts +44 -12
  105. package/src/__tests__/proxy-approval-callback.test.ts +69 -8
  106. package/src/__tests__/reaction-persistence.test.ts +1 -0
  107. package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +1 -0
  108. package/src/__tests__/registry.test.ts +0 -2
  109. package/src/__tests__/schedule-routes.test.ts +131 -1
  110. package/src/__tests__/scheduler-recurrence.test.ts +14 -70
  111. package/src/__tests__/scheduler-reuse-conversation.test.ts +10 -50
  112. package/src/__tests__/secret-detection-handler.test.ts +0 -10
  113. package/src/__tests__/shell-identity.test.ts +0 -134
  114. package/src/__tests__/suggestion-routes.test.ts +103 -4
  115. package/src/__tests__/task-memory-cleanup.test.ts +1 -0
  116. package/src/__tests__/task-scheduler.test.ts +3 -15
  117. package/src/__tests__/test-preload.ts +11 -0
  118. package/src/__tests__/title-generate-pipeline.test.ts +224 -0
  119. package/src/__tests__/token-estimate-pipeline.test.ts +431 -0
  120. package/src/__tests__/tool-error-pipeline.test.ts +244 -0
  121. package/src/__tests__/tool-execute-pipeline.test.ts +431 -0
  122. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -6
  123. package/src/__tests__/tool-executor-shell-integration.test.ts +7 -10
  124. package/src/__tests__/tool-executor.test.ts +141 -0
  125. package/src/__tests__/tool-result-truncate-pipeline.test.ts +356 -0
  126. package/src/__tests__/tool-result-truncation.test.ts +0 -110
  127. package/src/__tests__/user-plugin-loader.test.ts +191 -0
  128. package/src/__tests__/workspace-migration-046-seed-conversation-starters-callsite.test.ts +185 -0
  129. package/src/__tests__/workspace-migration-049-release-notes-default-sonnet.test.ts +100 -0
  130. package/src/__tests__/workspace-migration-050-seed-main-agent-opus-callsite.test.ts +171 -0
  131. package/src/__tests__/workspace-migration-051-seed-conversation-summarization-callsite.test.ts +252 -0
  132. package/src/__tests__/workspace-migration-remove-hooks.test.ts +99 -0
  133. package/src/__tests__/workspace-policy.test.ts +21 -3
  134. package/src/agent/loop.ts +340 -102
  135. package/src/approvals/__tests__/guardian-feed-event.test.ts +304 -0
  136. package/src/approvals/guardian-request-resolvers.ts +80 -0
  137. package/src/backup/__tests__/backup-worker.test.ts +2 -13
  138. package/src/backup/backup-worker.ts +3 -15
  139. package/src/bundler/app-compiler.ts +84 -1
  140. package/src/calls/call-state.ts +2 -2
  141. package/src/channels/__tests__/types.test.ts +3 -3
  142. package/src/channels/types.ts +6 -4
  143. package/src/cli/__tests__/notifications.test.ts +87 -211
  144. package/src/cli/commands/__tests__/backup.test.ts +1 -1
  145. package/src/cli/commands/__tests__/image-generation.test.ts +255 -35
  146. package/src/cli/commands/__tests__/inference-send.test.ts +12 -0
  147. package/src/cli/commands/__tests__/tts-synthesize.test.ts +12 -0
  148. package/src/cli/commands/backup.ts +2 -2
  149. package/src/cli/commands/clients.ts +138 -0
  150. package/src/cli/commands/completions.ts +2 -9
  151. package/src/cli/commands/conversations.ts +55 -7
  152. package/src/cli/commands/image-generation.ts +33 -34
  153. package/src/cli/commands/notifications.ts +68 -103
  154. package/src/cli/commands/oauth/__tests__/providers-register.test.ts +1 -1
  155. package/src/cli/commands/oauth/__tests__/providers-update.test.ts +1 -1
  156. package/src/cli/commands/oauth/connect.ts +2 -2
  157. package/src/cli/commands/oauth/providers.ts +176 -8
  158. package/src/cli/commands/oauth/status.ts +46 -36
  159. package/src/cli/commands/skills.ts +3 -4
  160. package/src/cli/program.ts +25 -29
  161. package/src/config/__tests__/backup-schema.test.ts +7 -2
  162. package/src/config/bundled-skills/app-builder/SKILL.md +2 -2
  163. package/src/config/bundled-skills/app-builder/references/WIDGETS.md +10 -10
  164. package/src/config/bundled-skills/contacts/tools/contact-merge.ts +66 -87
  165. package/src/config/bundled-skills/contacts/tools/contact-search.ts +28 -51
  166. package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +22 -40
  167. package/src/config/bundled-skills/image-studio/SKILL.md +2 -1
  168. package/src/config/bundled-skills/image-studio/TOOLS.json +2 -1
  169. package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +23 -39
  170. package/src/config/bundled-skills/messaging/SKILL.md +3 -3
  171. package/src/config/bundled-skills/messaging/tools/__tests__/messaging-feed-events.test.ts +207 -0
  172. package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +12 -0
  173. package/src/config/bundled-skills/messaging/tools/messaging-send.ts +58 -0
  174. package/src/config/bundled-skills/schedule/SKILL.md +8 -3
  175. package/src/config/bundled-skills/schedule/TOOLS.json +15 -7
  176. package/src/config/bundled-skills/schedule/references/SCRIPT_MODE_PATTERNS.md +59 -0
  177. package/src/config/bundled-tool-registry.ts +0 -15
  178. package/src/config/feature-flag-registry.json +17 -1
  179. package/src/config/schema.ts +19 -0
  180. package/src/config/schemas/backup.ts +1 -1
  181. package/src/config/schemas/conversations.ts +16 -0
  182. package/src/config/schemas/llm.ts +2 -3
  183. package/src/config/schemas/security.ts +6 -6
  184. package/src/config/schemas/tts.ts +11 -0
  185. package/src/config/skill-state.ts +6 -2
  186. package/src/config/skills.ts +94 -5
  187. package/src/context/__tests__/compact-prompt.test.ts +27 -9
  188. package/src/context/prompts/compact.md +26 -12
  189. package/src/context/tool-result-truncation.ts +3 -63
  190. package/src/context/window-manager.ts +190 -16
  191. package/src/credential-health/credential-health-service.ts +19 -6
  192. package/src/daemon/__tests__/conversation-feed-event.test.ts +317 -0
  193. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +4 -12
  194. package/src/daemon/__tests__/conversation-tool-setup.test.ts +14 -15
  195. package/src/daemon/config-watcher.ts +0 -2
  196. package/src/daemon/context-overflow-policy.ts +4 -13
  197. package/src/daemon/conversation-agent-loop-handlers.ts +83 -22
  198. package/src/daemon/conversation-agent-loop.ts +984 -683
  199. package/src/daemon/conversation-history.ts +10 -19
  200. package/src/daemon/conversation-lifecycle.ts +37 -19
  201. package/src/daemon/conversation-notifiers.ts +2 -110
  202. package/src/daemon/conversation-process.ts +14 -7
  203. package/src/daemon/conversation-runtime-assembly.ts +532 -411
  204. package/src/daemon/conversation-tool-setup.ts +41 -4
  205. package/src/daemon/conversation.ts +80 -35
  206. package/src/daemon/external-plugins-bootstrap.ts +478 -0
  207. package/src/daemon/first-greeting.ts +191 -14
  208. package/src/daemon/handlers/config-model.ts +11 -0
  209. package/src/daemon/handlers/skills.ts +5 -1
  210. package/src/daemon/lifecycle.ts +33 -68
  211. package/src/daemon/message-types/computer-use.ts +2 -34
  212. package/src/daemon/message-types/conversations.ts +49 -0
  213. package/src/daemon/message-types/messages.ts +12 -0
  214. package/src/daemon/server.ts +5 -3
  215. package/src/daemon/shutdown-handlers.ts +2 -12
  216. package/src/daemon/tool-side-effects.ts +14 -56
  217. package/src/heartbeat/__tests__/heartbeat-feed-event.test.ts +160 -0
  218. package/src/heartbeat/heartbeat-service.ts +24 -1
  219. package/src/home/__tests__/feed-population-integration.test.ts +312 -0
  220. package/src/home/emit-feed-event.ts +7 -0
  221. package/src/home/feed-types.ts +41 -2
  222. package/src/home/rewrite-command-preview.ts +66 -0
  223. package/src/ipc/__tests__/socket-path.test.ts +11 -50
  224. package/src/ipc/cli-client.ts +1 -1
  225. package/src/ipc/cli-server.ts +3 -3
  226. package/src/ipc/gateway-client.ts +4 -1
  227. package/src/ipc/routes/browser-context.ts +2 -0
  228. package/src/ipc/routes/browser.ts +1 -0
  229. package/src/ipc/routes/get-contact.ts +16 -0
  230. package/src/ipc/routes/index.ts +14 -0
  231. package/src/ipc/routes/list-clients.ts +31 -0
  232. package/src/ipc/routes/merge-contacts.ts +17 -0
  233. package/src/ipc/routes/notification.ts +133 -0
  234. package/src/ipc/routes/rename-conversation.ts +59 -0
  235. package/src/ipc/routes/search-contacts.ts +19 -0
  236. package/src/ipc/routes/upsert-contact.ts +25 -0
  237. package/src/ipc/socket-path.ts +14 -38
  238. package/src/media/app-icon-generator.ts +23 -46
  239. package/src/media/avatar-router.ts +26 -41
  240. package/src/media/gemini-image-service.ts +8 -41
  241. package/src/media/image-credentials.ts +73 -0
  242. package/src/media/image-service.ts +85 -0
  243. package/src/media/openai-image-service.ts +131 -0
  244. package/src/media/types.ts +46 -0
  245. package/src/memory/conversation-crud.ts +48 -18
  246. package/src/memory/conversation-queries.ts +57 -4
  247. package/src/memory/conversation-title-service.ts +25 -0
  248. package/src/memory/db-init.ts +8 -0
  249. package/src/memory/embedding-gemini.test.ts +41 -2
  250. package/src/memory/embedding-gemini.ts +6 -1
  251. package/src/memory/graph/bootstrap.test.ts +282 -0
  252. package/src/memory/graph/bootstrap.ts +8 -5
  253. package/src/memory/graph/extraction.ts +10 -2
  254. package/src/memory/graph/graph-search.test.ts +1 -0
  255. package/src/memory/graph/inspect.ts +2 -2
  256. package/src/memory/graph/retriever.ts +10 -3
  257. package/src/memory/migrations/041-approval-prompt-ts-tracker.ts +26 -0
  258. package/src/memory/migrations/149-oauth-tables.ts +1 -0
  259. package/src/memory/migrations/223-schedule-script-column.ts +11 -0
  260. package/src/memory/migrations/224-oauth-providers-managed-service-is-paid.ts +24 -0
  261. package/src/memory/migrations/225-oauth-providers-available-scopes.ts +13 -0
  262. package/src/memory/migrations/index.ts +4 -0
  263. package/src/memory/pkb/pkb-index.test.ts +1 -0
  264. package/src/memory/pkb/pkb-reconcile.test.ts +1 -0
  265. package/src/memory/pkb/pkb-search.test.ts +65 -4
  266. package/src/memory/pkb/pkb-search.ts +40 -18
  267. package/src/memory/qdrant-client.test.ts +60 -0
  268. package/src/memory/qdrant-client.ts +25 -0
  269. package/src/memory/schema/infrastructure.ts +1 -0
  270. package/src/memory/schema/oauth.ts +4 -1
  271. package/src/messaging/providers/slack/render-transcript.test.ts +77 -29
  272. package/src/messaging/providers/slack/render-transcript.ts +58 -0
  273. package/src/notifications/conversation-pairing.ts +78 -19
  274. package/src/notifications/copy-composer.ts +0 -5
  275. package/src/notifications/emit-signal.ts +1 -1
  276. package/src/notifications/signal.ts +1 -2
  277. package/src/oauth/AGENTS.md +1 -1
  278. package/src/oauth/__tests__/identity-verifier.test.ts +2 -1
  279. package/src/oauth/connect-orchestrator.ts +8 -34
  280. package/src/oauth/connect-types.ts +6 -10
  281. package/src/oauth/manual-token-connection.ts +23 -0
  282. package/src/oauth/oauth-store.ts +30 -14
  283. package/src/oauth/provider-serializer.ts +6 -1
  284. package/src/oauth/seed-providers.ts +56 -108
  285. package/src/outbound-proxy/http-forwarder.ts +9 -0
  286. package/src/permissions/approval-policy.test.ts +293 -18
  287. package/src/permissions/approval-policy.ts +110 -58
  288. package/src/permissions/arg-parser.test.ts +161 -0
  289. package/src/permissions/arg-parser.ts +141 -0
  290. package/src/permissions/bash-risk-classifier.test.ts +414 -2
  291. package/src/permissions/bash-risk-classifier.ts +303 -60
  292. package/src/permissions/checker.ts +157 -29
  293. package/src/permissions/command-registry.test.ts +239 -0
  294. package/src/permissions/command-registry.ts +234 -54
  295. package/src/permissions/defaults.ts +5 -4
  296. package/src/permissions/gateway-threshold-reader.ts +196 -0
  297. package/src/permissions/prompter.ts +4 -0
  298. package/src/permissions/risk-types.ts +61 -4
  299. package/src/permissions/schedule-risk-classifier.test.ts +129 -0
  300. package/src/permissions/schedule-risk-classifier.ts +85 -0
  301. package/src/permissions/shell-identity.ts +2 -42
  302. package/src/permissions/types.ts +2 -0
  303. package/src/permissions/workspace-policy.ts +8 -3
  304. package/src/plugins/defaults/circuit-breaker.ts +146 -0
  305. package/src/plugins/defaults/compaction.ts +145 -0
  306. package/src/plugins/defaults/empty-response.ts +126 -0
  307. package/src/plugins/defaults/history-repair.ts +85 -0
  308. package/src/plugins/defaults/index.ts +116 -0
  309. package/src/plugins/defaults/injectors.ts +491 -0
  310. package/src/plugins/defaults/llm-call.ts +82 -0
  311. package/src/plugins/defaults/memory-retrieval.ts +226 -0
  312. package/src/plugins/defaults/overflow-reduce.ts +181 -0
  313. package/src/plugins/defaults/persistence.ts +129 -0
  314. package/src/plugins/defaults/title-generate.ts +95 -0
  315. package/src/plugins/defaults/token-estimate.ts +104 -0
  316. package/src/plugins/defaults/tool-error.ts +126 -0
  317. package/src/plugins/defaults/tool-execute.ts +89 -0
  318. package/src/plugins/defaults/tool-result-truncate.ts +88 -0
  319. package/src/plugins/pipeline.ts +316 -0
  320. package/src/plugins/plugin-skill-contributions.ts +292 -0
  321. package/src/plugins/registry.ts +241 -0
  322. package/src/plugins/types.ts +1134 -0
  323. package/src/plugins/user-loader.ts +177 -0
  324. package/src/prompts/templates/BOOTSTRAP.md +27 -77
  325. package/src/providers/model-catalog.ts +52 -29
  326. package/src/providers/model-intents.ts +1 -1
  327. package/src/providers/openrouter/client.ts +5 -1
  328. package/src/providers/speech-to-text/deepgram-realtime.test.ts +61 -0
  329. package/src/providers/speech-to-text/deepgram-realtime.ts +57 -0
  330. package/src/providers/speech-to-text/xai-realtime.test.ts +72 -4
  331. package/src/providers/speech-to-text/xai-realtime.ts +39 -14
  332. package/src/runtime/AGENTS.md +25 -16
  333. package/src/runtime/__tests__/browser-extension-pair-routes.test.ts +3 -3
  334. package/src/runtime/__tests__/client-registry.test.ts +293 -0
  335. package/src/runtime/client-registry.ts +261 -0
  336. package/src/runtime/http-server.ts +77 -8
  337. package/src/runtime/http-types.ts +0 -2
  338. package/src/runtime/migrations/vbundle-builder.ts +1 -22
  339. package/src/runtime/routes/approval-prompt-ts-tracker.ts +51 -31
  340. package/src/runtime/routes/approval-routes.ts +17 -0
  341. package/src/runtime/routes/browser-extension-pair-routes.ts +27 -8
  342. package/src/runtime/routes/conversation-routes.ts +223 -116
  343. package/src/runtime/routes/inbound-message-handler.ts +88 -13
  344. package/src/runtime/routes/memory-item-routes.test.ts +1 -0
  345. package/src/runtime/routes/migration-routes.ts +0 -3
  346. package/src/runtime/routes/playground/__tests__/force-compact.test.ts +284 -0
  347. package/src/runtime/routes/playground/__tests__/guard.test.ts +80 -0
  348. package/src/runtime/routes/playground/__tests__/inject-failures.test.ts +294 -0
  349. package/src/runtime/routes/playground/__tests__/reset-circuit.test.ts +271 -0
  350. package/src/runtime/routes/playground/__tests__/seed-conversation.test.ts +202 -0
  351. package/src/runtime/routes/playground/__tests__/seeded-conversations.test.ts +309 -0
  352. package/src/runtime/routes/playground/__tests__/state.test.ts +224 -0
  353. package/src/runtime/routes/playground/conversation-not-found.ts +29 -0
  354. package/src/runtime/routes/playground/deps.ts +56 -0
  355. package/src/runtime/routes/playground/force-compact.ts +73 -0
  356. package/src/runtime/routes/playground/guard.ts +37 -0
  357. package/src/runtime/routes/playground/index.ts +28 -0
  358. package/src/runtime/routes/playground/inject-failures.ts +159 -0
  359. package/src/runtime/routes/playground/reset-circuit.ts +115 -0
  360. package/src/runtime/routes/playground/seed-conversation.ts +139 -0
  361. package/src/runtime/routes/playground/seeded-conversations.ts +78 -0
  362. package/src/runtime/routes/playground/state.ts +78 -0
  363. package/src/runtime/routes/schedule-routes.ts +89 -8
  364. package/src/runtime/skill-route-registry.ts +75 -15
  365. package/src/schedule/run-script.ts +68 -0
  366. package/src/schedule/schedule-store.ts +7 -1
  367. package/src/schedule/scheduler.ts +48 -8
  368. package/src/skills/catalog-cache.ts +12 -5
  369. package/src/tools/browser/__tests__/browser-status.test.ts +189 -0
  370. package/src/tools/browser/browser-execution.ts +88 -19
  371. package/src/tools/browser/cdp-client/__tests__/extension-cdp-client.test.ts +230 -0
  372. package/src/tools/browser/cdp-client/__tests__/factory.test.ts +146 -3
  373. package/src/tools/browser/cdp-client/extension-cdp-client.ts +54 -3
  374. package/src/tools/browser/cdp-client/factory.ts +15 -4
  375. package/src/tools/executor.ts +126 -74
  376. package/src/tools/network/script-proxy/session-manager.ts +37 -1
  377. package/src/tools/permission-checker.ts +98 -49
  378. package/src/tools/policy-context.ts +4 -0
  379. package/src/tools/registry.ts +140 -3
  380. package/src/tools/schedule/create.ts +23 -8
  381. package/src/tools/schedule/update.ts +3 -1
  382. package/src/tools/secret-detection-handler.ts +0 -51
  383. package/src/tools/system/avatar-generator.ts +6 -2
  384. package/src/tools/types.ts +28 -2
  385. package/src/util/platform.ts +7 -2
  386. package/src/util/pricing.ts +26 -3
  387. package/src/workspace/migrations/006-services-config.ts +2 -4
  388. package/src/workspace/migrations/022-move-hooks-to-workspace.ts +2 -3
  389. package/src/workspace/migrations/041-backfill-google-gmail-settings-scope.ts +3 -4
  390. package/src/workspace/migrations/046-seed-conversation-starters-callsite.ts +108 -0
  391. package/src/workspace/migrations/047-remove-watch-callsites.ts +54 -0
  392. package/src/workspace/migrations/048-remove-workspace-hooks.ts +81 -0
  393. package/src/workspace/migrations/049-release-notes-default-sonnet.ts +80 -0
  394. package/src/workspace/migrations/050-seed-main-agent-opus-callsite.ts +86 -0
  395. package/src/workspace/migrations/051-seed-conversation-summarization-callsite.ts +128 -0
  396. package/src/workspace/migrations/registry.ts +12 -0
  397. package/tsconfig.json +1 -1
  398. package/hook-templates/debug-prompt-logger/hook.json +0 -7
  399. package/hook-templates/debug-prompt-logger/run.sh +0 -66
  400. package/src/__tests__/compaction-circuit-breaker.test.ts +0 -336
  401. package/src/__tests__/context-overflow-approval.test.ts +0 -156
  402. package/src/__tests__/hooks-blocking.test.ts +0 -178
  403. package/src/__tests__/hooks-cli.test.ts +0 -182
  404. package/src/__tests__/hooks-config.test.ts +0 -108
  405. package/src/__tests__/hooks-discovery.test.ts +0 -211
  406. package/src/__tests__/hooks-integration.test.ts +0 -196
  407. package/src/__tests__/hooks-manager.test.ts +0 -226
  408. package/src/__tests__/hooks-runner.test.ts +0 -175
  409. package/src/__tests__/hooks-settings.test.ts +0 -160
  410. package/src/__tests__/hooks-templates.test.ts +0 -169
  411. package/src/__tests__/hooks-ts-runner.test.ts +0 -170
  412. package/src/__tests__/hooks-watch.test.ts +0 -112
  413. package/src/__tests__/notification-schedule-dedup.test.ts +0 -213
  414. package/src/__tests__/oauth-scope-policy.test.ts +0 -180
  415. package/src/__tests__/send-notification-tool.test.ts +0 -83
  416. package/src/cli/commands/shotgun.ts +0 -266
  417. package/src/config/bundled-skills/conversations/SKILL.md +0 -20
  418. package/src/config/bundled-skills/conversations/TOOLS.json +0 -23
  419. package/src/config/bundled-skills/conversations/tools/rename-conversation.ts +0 -88
  420. package/src/config/bundled-skills/heartbeat/SKILL.md +0 -43
  421. package/src/config/bundled-skills/notifications/SKILL.md +0 -40
  422. package/src/config/bundled-skills/notifications/TOOLS.json +0 -80
  423. package/src/config/bundled-skills/notifications/tools/send-notification.ts +0 -152
  424. package/src/config/bundled-skills/notifications/tools/shared.ts +0 -13
  425. package/src/config/bundled-skills/screen-watch/SKILL.md +0 -27
  426. package/src/config/bundled-skills/screen-watch/TOOLS.json +0 -35
  427. package/src/config/bundled-skills/screen-watch/tools/start-screen-watch.ts +0 -12
  428. package/src/config/bundled-skills/skills-catalog/SKILL.md +0 -84
  429. package/src/daemon/context-overflow-approval.ts +0 -52
  430. package/src/daemon/watch-handler.ts +0 -399
  431. package/src/hooks/cli.ts +0 -253
  432. package/src/hooks/config.ts +0 -100
  433. package/src/hooks/discovery.ts +0 -135
  434. package/src/hooks/manager.ts +0 -179
  435. package/src/hooks/runner.ts +0 -117
  436. package/src/hooks/templates.ts +0 -77
  437. package/src/hooks/types.ts +0 -75
  438. package/src/oauth/scope-policy.ts +0 -89
  439. package/src/runtime/gateway-internal-client.ts +0 -94
  440. package/src/runtime/routes/watch-routes.ts +0 -156
  441. package/src/signals/shotgun.ts +0 -203
  442. package/src/tools/watch/screen-watch.ts +0 -144
  443. package/src/tools/watch/watch-state.ts +0 -142
@@ -0,0 +1,676 @@
1
+ /**
2
+ * Unit tests for the default `overflowReduce` plugin (PR 23).
3
+ *
4
+ * Two goals:
5
+ * 1. The default middleware produces results **identical** to the historical
6
+ * inline tier loop for a golden set of over-budget histories. We exercise
7
+ * this by running the same inputs through two paths — the pipeline and a
8
+ * faithful re-implementation of the pre-PR-23 inline loop — and asserting
9
+ * the final `(messages, runMessages, injectionMode, reducerState,
10
+ * reducerCompacted, attempts)` tuple matches byte-for-byte.
11
+ * 2. A user-registered spy middleware observes **every** reduction attempt
12
+ * when wrapped around the default. This covers the onion-composition
13
+ * contract: the spy sees each call from the outside and can count
14
+ * iterations without changing reducer behavior.
15
+ *
16
+ * The test creates its own plugin registry via
17
+ * `resetPluginRegistryForTests()` and re-registers the default before each
18
+ * case so the registry is deterministic across runs.
19
+ */
20
+
21
+ import { beforeEach, describe, expect, test } from "bun:test";
22
+
23
+ import { estimatePromptTokens } from "../context/token-estimator.js";
24
+ import type {
25
+ ContextWindowCompactOptions,
26
+ ContextWindowResult,
27
+ } from "../context/window-manager.js";
28
+ import { createContextSummaryMessage } from "../context/window-manager.js";
29
+ import {
30
+ createInitialReducerState,
31
+ reduceContextOverflow,
32
+ type ReducerState,
33
+ } from "../daemon/context-overflow-reducer.js";
34
+ import type {
35
+ InjectionMode,
36
+ TrustContext,
37
+ } from "../daemon/conversation-runtime-assembly.js";
38
+ import {
39
+ defaultOverflowReduceMiddleware,
40
+ defaultOverflowReducePlugin,
41
+ } from "../plugins/defaults/overflow-reduce.js";
42
+ import { runPipeline } from "../plugins/pipeline.js";
43
+ import {
44
+ getMiddlewaresFor,
45
+ registerPlugin,
46
+ resetPluginRegistryForTests,
47
+ } from "../plugins/registry.js";
48
+ import type {
49
+ Middleware,
50
+ OverflowReduceArgs,
51
+ OverflowReduceResult,
52
+ Plugin,
53
+ TurnContext,
54
+ } from "../plugins/types.js";
55
+ import type { Message } from "../providers/types.js";
56
+
57
+ // ── Fixtures ────────────────────────────────────────────────────────────────
58
+
59
+ function msg(role: "user" | "assistant", text: string): Message {
60
+ return { role, content: [{ type: "text", text }] };
61
+ }
62
+
63
+ function toolUseMsg(id: string, name: string): Message {
64
+ return {
65
+ role: "assistant",
66
+ content: [{ type: "tool_use", id, name, input: { path: "/tmp/test" } }],
67
+ };
68
+ }
69
+
70
+ function toolResultMsg(toolUseId: string, content: string): Message {
71
+ return {
72
+ role: "user",
73
+ content: [{ type: "tool_result", tool_use_id: toolUseId, content }],
74
+ };
75
+ }
76
+
77
+ const SYSTEM_PROMPT = "You are a helpful assistant.";
78
+
79
+ const CONTEXT_WINDOW = {
80
+ enabled: true,
81
+ maxInputTokens: 2000,
82
+ targetBudgetRatio: 0.65,
83
+ compactThreshold: 0.6,
84
+ summaryBudgetRatio: 0.05,
85
+ overflowRecovery: {
86
+ enabled: true,
87
+ safetyMarginRatio: 0.05,
88
+ maxAttempts: 3,
89
+ interactiveLatestTurnCompression: "summarize" as const,
90
+ nonInteractiveLatestTurnCompression: "truncate" as const,
91
+ },
92
+ };
93
+
94
+ const TRUST: TrustContext = {
95
+ sourceChannel: "vellum",
96
+ trustClass: "guardian",
97
+ };
98
+
99
+ function makeTurnContext(overrides: Partial<TurnContext> = {}): TurnContext {
100
+ return {
101
+ requestId: "req-overflow-test",
102
+ conversationId: "conv-overflow-test",
103
+ turnIndex: 0,
104
+ trust: TRUST,
105
+ ...overrides,
106
+ };
107
+ }
108
+
109
+ /**
110
+ * Minimal compaction stub — always compacts to a one-message summary so the
111
+ * reducer's forced-compaction tier succeeds. Mirrors `makeCompactFn` from
112
+ * `context-overflow-reducer.test.ts` so the two test suites exercise the
113
+ * reducer under comparable conditions.
114
+ */
115
+ function makeCompactFn(
116
+ summaryText = "## Goals\n- compacted summary",
117
+ ): (
118
+ messages: Message[],
119
+ signal: AbortSignal | undefined,
120
+ options: ContextWindowCompactOptions,
121
+ ) => Promise<ContextWindowResult> {
122
+ return async (messages, _signal, _options) => {
123
+ const summaryMsg = createContextSummaryMessage(summaryText);
124
+ const compactedMessages = [summaryMsg];
125
+ const estimatedInputTokens = estimatePromptTokens(
126
+ compactedMessages,
127
+ SYSTEM_PROMPT,
128
+ { providerName: "mock" },
129
+ );
130
+ return {
131
+ messages: compactedMessages,
132
+ compacted: true,
133
+ previousEstimatedInputTokens: estimatePromptTokens(
134
+ messages,
135
+ SYSTEM_PROMPT,
136
+ { providerName: "mock" },
137
+ ),
138
+ estimatedInputTokens,
139
+ maxInputTokens: 2000,
140
+ thresholdTokens: 1200,
141
+ compactedMessages: messages.length,
142
+ compactedPersistedMessages: messages.length,
143
+ summaryCalls: 1,
144
+ summaryInputTokens: 100,
145
+ summaryOutputTokens: 50,
146
+ summaryModel: "mock-model",
147
+ summaryText,
148
+ };
149
+ };
150
+ }
151
+
152
+ /**
153
+ * Faithful re-implementation of the pre-PR-23 inline tier loop — lives in
154
+ * this test file rather than the production module so we have an immutable
155
+ * baseline the default middleware can be diffed against. If either
156
+ * implementation drifts, the golden-output cases below fail.
157
+ *
158
+ * The function intentionally avoids any side effects on external state — no
159
+ * circuit-breaker tracking, no activity emission, no `applyCompactionResult`.
160
+ * The production orchestrator still runs those through callbacks; this
161
+ * baseline only needs the *message mutation* behavior so we can compare
162
+ * reducer output.
163
+ */
164
+ async function runInlineBaseline(args: {
165
+ readonly messages: Message[];
166
+ readonly runMessages: Message[];
167
+ readonly systemPrompt: string;
168
+ readonly providerName: string;
169
+ readonly preflightBudget: number;
170
+ readonly toolTokenBudget?: number;
171
+ readonly maxAttempts: number;
172
+ readonly abortSignal?: AbortSignal;
173
+ readonly compactFn: (
174
+ messages: Message[],
175
+ signal: AbortSignal | undefined,
176
+ options: ContextWindowCompactOptions,
177
+ ) => Promise<ContextWindowResult>;
178
+ readonly contextWindow: typeof CONTEXT_WINDOW;
179
+ readonly reinjectForMode: (
180
+ reducedMessages: Message[],
181
+ mode: InjectionMode,
182
+ stepCompacted: boolean,
183
+ accumulatedCompacted: boolean,
184
+ ) => Promise<Message[]>;
185
+ readonly estimatePostInjection: (runMsgs: Message[]) => number;
186
+ }): Promise<{
187
+ messages: Message[];
188
+ runMessages: Message[];
189
+ injectionMode: InjectionMode;
190
+ reducerState: ReducerState;
191
+ reducerCompacted: boolean;
192
+ attempts: number;
193
+ }> {
194
+ let messages = args.messages;
195
+ let runMessages = args.runMessages;
196
+ let injectionMode: InjectionMode = "full";
197
+ let reducerState: ReducerState = createInitialReducerState();
198
+ let reducerCompacted = false;
199
+ let attempts = 0;
200
+
201
+ while (attempts < args.maxAttempts && !reducerState.exhausted) {
202
+ args.abortSignal?.throwIfAborted();
203
+ attempts++;
204
+ const step = await reduceContextOverflow(
205
+ messages,
206
+ {
207
+ providerName: args.providerName,
208
+ systemPrompt: args.systemPrompt,
209
+ contextWindow: args.contextWindow,
210
+ targetTokens: args.preflightBudget,
211
+ toolTokenBudget: args.toolTokenBudget,
212
+ },
213
+ reducerState,
214
+ args.compactFn,
215
+ args.abortSignal,
216
+ );
217
+
218
+ reducerState = step.state;
219
+ messages = step.messages;
220
+ injectionMode = step.state.injectionMode;
221
+
222
+ const stepCompacted = step.compactionResult?.compacted === true;
223
+ if (stepCompacted) {
224
+ reducerCompacted = true;
225
+ }
226
+
227
+ args.abortSignal?.throwIfAborted();
228
+
229
+ runMessages = await args.reinjectForMode(
230
+ messages,
231
+ injectionMode,
232
+ stepCompacted,
233
+ reducerCompacted,
234
+ );
235
+
236
+ const postInjectionTokens = args.estimatePostInjection(runMessages);
237
+ if (postInjectionTokens <= args.preflightBudget) break;
238
+ }
239
+
240
+ return {
241
+ messages,
242
+ runMessages,
243
+ injectionMode,
244
+ reducerState,
245
+ reducerCompacted,
246
+ attempts,
247
+ };
248
+ }
249
+
250
+ function buildArgs(messages: Message[]): {
251
+ args: OverflowReduceArgs;
252
+ reinjectCalls: Array<{
253
+ mode: InjectionMode;
254
+ stepCompacted: boolean;
255
+ accumulatedCompacted: boolean;
256
+ }>;
257
+ compactionResults: ContextWindowResult[];
258
+ rawCompactFn: (
259
+ messages: Message[],
260
+ signal: AbortSignal | undefined,
261
+ options: ContextWindowCompactOptions,
262
+ ) => Promise<ContextWindowResult>;
263
+ } {
264
+ const reinjectCalls: Array<{
265
+ mode: InjectionMode;
266
+ stepCompacted: boolean;
267
+ accumulatedCompacted: boolean;
268
+ }> = [];
269
+ const compactionResults: ContextWindowResult[] = [];
270
+ const compactFn = makeCompactFn();
271
+
272
+ // Identity reinject: the test harness does not exercise the full
273
+ // `applyRuntimeInjections` pipeline; it simply tracks how many times the
274
+ // orchestrator would have been asked to rebuild `runMessages` so the spy
275
+ // middleware can attribute each iteration. Returns the reducer's latest
276
+ // `messages` untouched — real orchestrator code re-injects runtime blocks.
277
+ const reinjectForMode = async (
278
+ reducedMessages: Message[],
279
+ mode: InjectionMode,
280
+ stepCompacted: boolean,
281
+ accumulatedCompacted: boolean,
282
+ ): Promise<Message[]> => {
283
+ reinjectCalls.push({ mode, stepCompacted, accumulatedCompacted });
284
+ return reducedMessages;
285
+ };
286
+
287
+ const estimatePostInjection = (runMsgs: Message[]): number =>
288
+ estimatePromptTokens(runMsgs, SYSTEM_PROMPT, {
289
+ providerName: "mock",
290
+ });
291
+
292
+ const args: OverflowReduceArgs = {
293
+ messages,
294
+ runMessages: messages,
295
+ systemPrompt: SYSTEM_PROMPT,
296
+ providerName: "mock",
297
+ contextWindow: CONTEXT_WINDOW,
298
+ preflightBudget: 1000,
299
+ toolTokenBudget: 0,
300
+ maxAttempts: CONTEXT_WINDOW.overflowRecovery.maxAttempts,
301
+ // `OverflowReduceArgs.compactFn` types `options` as `unknown` to avoid
302
+ // leaking the `ContextWindowCompactOptions` shape into the plugin
303
+ // surface. The test helper produces a real `ContextWindowCompactOptions`
304
+ // signature, so we trampoline through a widened wrapper.
305
+ compactFn: (msgs, signal, opts) =>
306
+ compactFn(msgs, signal, opts as ContextWindowCompactOptions),
307
+ emitActivityState: () => {
308
+ /* no-op — the orchestrator owns activity emission */
309
+ },
310
+ onCompactionResult: (result) => {
311
+ compactionResults.push(result);
312
+ },
313
+ reinjectForMode,
314
+ estimatePostInjection,
315
+ };
316
+
317
+ return { args, reinjectCalls, compactionResults, rawCompactFn: compactFn };
318
+ }
319
+
320
+ // ── Test suite ──────────────────────────────────────────────────────────────
321
+
322
+ describe("overflow-reduce pipeline", () => {
323
+ beforeEach(() => {
324
+ resetPluginRegistryForTests();
325
+ registerPlugin(defaultOverflowReducePlugin);
326
+ });
327
+
328
+ describe("default middleware matches historical inline loop", () => {
329
+ test("large tool-result history — identical reduced output", async () => {
330
+ const longToolResult = "r".repeat(8000);
331
+ const goldenHistory: Message[] = [
332
+ msg("user", "Start"),
333
+ toolUseMsg("tu_1", "read_file"),
334
+ toolResultMsg("tu_1", longToolResult),
335
+ msg("assistant", "Result"),
336
+ msg("user", "Next"),
337
+ ];
338
+
339
+ const pipelineBuild = buildArgs(goldenHistory);
340
+ const inlineBuild = buildArgs(goldenHistory);
341
+
342
+ // Run both paths against the SAME fixture. `buildArgs` gives each
343
+ // call its own `compactFn` instance so nothing leaks between runs.
344
+ const pipelineResult = await runPipeline<
345
+ OverflowReduceArgs,
346
+ OverflowReduceResult
347
+ >(
348
+ "overflowReduce",
349
+ getMiddlewaresFor("overflowReduce"),
350
+ // Sentinel terminal — the default middleware doesn't call next,
351
+ // so this must never fire. Assert that invariant here.
352
+ async () => {
353
+ throw new Error("terminal unexpectedly reached");
354
+ },
355
+ pipelineBuild.args,
356
+ makeTurnContext(),
357
+ 30000,
358
+ );
359
+
360
+ const inlineResult = await runInlineBaseline({
361
+ messages: goldenHistory,
362
+ runMessages: goldenHistory,
363
+ systemPrompt: SYSTEM_PROMPT,
364
+ providerName: "mock",
365
+ preflightBudget: inlineBuild.args.preflightBudget,
366
+ toolTokenBudget: inlineBuild.args.toolTokenBudget,
367
+ maxAttempts: inlineBuild.args.maxAttempts,
368
+ compactFn: inlineBuild.rawCompactFn,
369
+ contextWindow: CONTEXT_WINDOW,
370
+ reinjectForMode: inlineBuild.args.reinjectForMode,
371
+ estimatePostInjection: inlineBuild.args.estimatePostInjection,
372
+ });
373
+
374
+ // Byte-for-byte match across every field the orchestrator relies on.
375
+ expect(pipelineResult.messages).toEqual(inlineResult.messages);
376
+ expect(pipelineResult.runMessages).toEqual(inlineResult.runMessages);
377
+ expect(pipelineResult.injectionMode).toBe(inlineResult.injectionMode);
378
+ expect(pipelineResult.reducerState).toEqual(inlineResult.reducerState);
379
+ expect(pipelineResult.reducerCompacted).toBe(
380
+ inlineResult.reducerCompacted,
381
+ );
382
+ expect(pipelineResult.attempts).toBe(inlineResult.attempts);
383
+ });
384
+
385
+ test("small conversation that fits after first reduction — single attempt", async () => {
386
+ // A history that's already within budget so the first `applyForcedCompaction`
387
+ // brings us under — the loop must exit without iterating further.
388
+ const smallHistory: Message[] = [
389
+ msg("user", "Hello"),
390
+ msg("assistant", "Hi there — how can I help?"),
391
+ ];
392
+
393
+ const pipelineBuild = buildArgs(smallHistory);
394
+ const inlineBuild = buildArgs(smallHistory);
395
+
396
+ const pipelineResult = await runPipeline<
397
+ OverflowReduceArgs,
398
+ OverflowReduceResult
399
+ >(
400
+ "overflowReduce",
401
+ getMiddlewaresFor("overflowReduce"),
402
+ async () => {
403
+ throw new Error("terminal unexpectedly reached");
404
+ },
405
+ pipelineBuild.args,
406
+ makeTurnContext(),
407
+ 30000,
408
+ );
409
+ const inlineResult = await runInlineBaseline({
410
+ messages: smallHistory,
411
+ runMessages: smallHistory,
412
+ systemPrompt: SYSTEM_PROMPT,
413
+ providerName: "mock",
414
+ preflightBudget: inlineBuild.args.preflightBudget,
415
+ toolTokenBudget: inlineBuild.args.toolTokenBudget,
416
+ maxAttempts: inlineBuild.args.maxAttempts,
417
+ compactFn: inlineBuild.rawCompactFn,
418
+ contextWindow: CONTEXT_WINDOW,
419
+ reinjectForMode: inlineBuild.args.reinjectForMode,
420
+ estimatePostInjection: inlineBuild.args.estimatePostInjection,
421
+ });
422
+
423
+ expect(pipelineResult.attempts).toBe(inlineResult.attempts);
424
+ expect(pipelineResult.attempts).toBeGreaterThanOrEqual(1);
425
+ expect(pipelineResult.messages).toEqual(inlineResult.messages);
426
+ expect(pipelineResult.reducerCompacted).toBe(
427
+ inlineResult.reducerCompacted,
428
+ );
429
+ });
430
+ });
431
+
432
+ describe("spy middleware observes each reduction attempt", () => {
433
+ test("spy sees one invocation when the default converges in one step", async () => {
434
+ const history: Message[] = [msg("user", "Hello"), msg("assistant", "Hi")];
435
+
436
+ // Spy tracks the args passed into its layer. It must forward via
437
+ // `next` so the default still fires.
438
+ const spyCalls: Array<{
439
+ hadMessages: number;
440
+ budget: number;
441
+ attempts: number;
442
+ }> = [];
443
+ const spy: Middleware<OverflowReduceArgs, OverflowReduceResult> =
444
+ async function spyMiddleware(args, next, _ctx) {
445
+ spyCalls.push({
446
+ hadMessages: args.messages.length,
447
+ budget: args.preflightBudget,
448
+ attempts: 0, // populated after next() from the result
449
+ });
450
+ const result = await next(args);
451
+ spyCalls[spyCalls.length - 1]!.attempts = result.attempts;
452
+ return result;
453
+ };
454
+ const spyPlugin: Plugin = {
455
+ manifest: {
456
+ name: "spy-overflow",
457
+ version: "0.0.1",
458
+ requires: { pluginRuntime: "v1", overflowReduceApi: "v1" },
459
+ },
460
+ middleware: { overflowReduce: spy },
461
+ };
462
+ // Register spy first so it wraps the default (registration order =
463
+ // outer→inner). The default therefore runs as the spy's downstream.
464
+ resetPluginRegistryForTests();
465
+ registerPlugin(spyPlugin);
466
+ registerPlugin(defaultOverflowReducePlugin);
467
+
468
+ const { args } = buildArgs(history);
469
+ const result = await runPipeline<
470
+ OverflowReduceArgs,
471
+ OverflowReduceResult
472
+ >(
473
+ "overflowReduce",
474
+ getMiddlewaresFor("overflowReduce"),
475
+ async () => {
476
+ throw new Error("terminal unexpectedly reached");
477
+ },
478
+ args,
479
+ makeTurnContext(),
480
+ 30000,
481
+ );
482
+
483
+ // Spy was called exactly once — the pipeline invokes each middleware
484
+ // once per pipeline call, not once per reducer iteration. Iteration
485
+ // count shows up in the result.attempts field.
486
+ expect(spyCalls).toHaveLength(1);
487
+ expect(spyCalls[0]?.hadMessages).toBe(2);
488
+ expect(spyCalls[0]?.budget).toBe(1000);
489
+ expect(spyCalls[0]?.attempts).toBe(result.attempts);
490
+ expect(result.attempts).toBeGreaterThanOrEqual(1);
491
+ });
492
+
493
+ test("spy can short-circuit the default by not calling next", async () => {
494
+ const history: Message[] = [msg("user", "Hi")];
495
+
496
+ const shortCircuit: Middleware<OverflowReduceArgs, OverflowReduceResult> =
497
+ async function shortCircuitMiddleware(args, _next, _ctx) {
498
+ // Returns a synthetic "no-op" result — the default is never invoked.
499
+ return {
500
+ messages: args.messages,
501
+ runMessages: args.runMessages,
502
+ injectionMode: "minimal",
503
+ reducerState: {
504
+ appliedTiers: ["injection_downgrade"],
505
+ injectionMode: "minimal",
506
+ exhausted: true,
507
+ },
508
+ reducerCompacted: false,
509
+ attempts: 0,
510
+ };
511
+ };
512
+ resetPluginRegistryForTests();
513
+ registerPlugin({
514
+ manifest: {
515
+ name: "short-circuit-overflow",
516
+ version: "0.0.1",
517
+ requires: { pluginRuntime: "v1", overflowReduceApi: "v1" },
518
+ },
519
+ middleware: { overflowReduce: shortCircuit },
520
+ });
521
+ registerPlugin(defaultOverflowReducePlugin);
522
+
523
+ const { args, compactionResults, reinjectCalls } = buildArgs(history);
524
+ const result = await runPipeline<
525
+ OverflowReduceArgs,
526
+ OverflowReduceResult
527
+ >(
528
+ "overflowReduce",
529
+ getMiddlewaresFor("overflowReduce"),
530
+ async () => {
531
+ throw new Error("terminal unexpectedly reached");
532
+ },
533
+ args,
534
+ makeTurnContext(),
535
+ 30000,
536
+ );
537
+
538
+ // Because the outer middleware short-circuited, the default never
539
+ // ran — no compactFn invocations, no reinject callbacks.
540
+ expect(result.injectionMode).toBe("minimal");
541
+ expect(result.attempts).toBe(0);
542
+ expect(compactionResults).toHaveLength(0);
543
+ expect(reinjectCalls).toHaveLength(0);
544
+ });
545
+ });
546
+
547
+ describe("direct middleware invocation", () => {
548
+ test("default middleware without the pipeline runner still executes the tier loop", async () => {
549
+ const history: Message[] = [msg("user", "Hi")];
550
+ const { args } = buildArgs(history);
551
+
552
+ const result = await defaultOverflowReduceMiddleware(
553
+ args,
554
+ async () => {
555
+ throw new Error("next should not be invoked by the default");
556
+ },
557
+ makeTurnContext(),
558
+ );
559
+
560
+ expect(result.attempts).toBeGreaterThanOrEqual(1);
561
+ expect(result.reducerState.appliedTiers.length).toBeGreaterThanOrEqual(1);
562
+ });
563
+ });
564
+
565
+ describe("abort signal propagation", () => {
566
+ test("middleware bails between iterations when abortSignal fires", async () => {
567
+ // History that won't converge in one step — multiple iterations.
568
+ const longToolResult = "r".repeat(8000);
569
+ const history: Message[] = [
570
+ msg("user", "Start"),
571
+ toolUseMsg("tu_1", "read_file"),
572
+ toolResultMsg("tu_1", longToolResult),
573
+ msg("user", "Next"),
574
+ ];
575
+
576
+ const controller = new AbortController();
577
+ const build = buildArgs(history);
578
+ // Abort on the first `estimatePostInjection` — simulates the
579
+ // pipeline-level timeout firing mid-turn. The next loop iteration
580
+ // must see the signal and throw rather than starting another round.
581
+ let estimateCalls = 0;
582
+ const aborting: OverflowReduceArgs = {
583
+ ...build.args,
584
+ abortSignal: controller.signal,
585
+ estimatePostInjection: () => {
586
+ estimateCalls++;
587
+ if (estimateCalls === 1) controller.abort();
588
+ // Return a value that guarantees another iteration would fire
589
+ // without the abort gate.
590
+ return build.args.preflightBudget + 1_000_000;
591
+ },
592
+ };
593
+
594
+ expect(
595
+ defaultOverflowReduceMiddleware(
596
+ aborting,
597
+ async () => {
598
+ throw new Error("next should not be invoked");
599
+ },
600
+ makeTurnContext(),
601
+ ),
602
+ ).rejects.toThrow();
603
+ // Give the event loop a tick to resolve the rejected promise.
604
+ await Promise.resolve();
605
+ // Exactly one iteration ran; the abort gate stopped the next round.
606
+ expect(estimateCalls).toBe(1);
607
+ });
608
+
609
+ test("middleware refuses to start when abortSignal is already aborted", async () => {
610
+ const history: Message[] = [msg("user", "Hi")];
611
+ const controller = new AbortController();
612
+ controller.abort();
613
+ const build = buildArgs(history);
614
+ const args: OverflowReduceArgs = {
615
+ ...build.args,
616
+ abortSignal: controller.signal,
617
+ };
618
+
619
+ expect(
620
+ defaultOverflowReduceMiddleware(
621
+ args,
622
+ async () => {
623
+ throw new Error("next should not be invoked");
624
+ },
625
+ makeTurnContext(),
626
+ ),
627
+ ).rejects.toThrow();
628
+ await Promise.resolve();
629
+ // Reducer never ran — zero compaction and reinject callbacks observed.
630
+ expect(build.compactionResults).toHaveLength(0);
631
+ expect(build.reinjectCalls).toHaveLength(0);
632
+ });
633
+ });
634
+
635
+ describe("reinjectForMode two-flag semantics", () => {
636
+ test("stepCompacted reflects current iteration; accumulatedCompacted stays sticky", async () => {
637
+ // Force multiple iterations by returning over-budget until the loop
638
+ // exits on maxAttempts. First iteration compacts (stepCompacted=true);
639
+ // subsequent iterations run other tiers (stepCompacted=false), but
640
+ // accumulatedCompacted must remain true for slack suppression.
641
+ const longToolResult = "r".repeat(8000);
642
+ const history: Message[] = [
643
+ msg("user", "Start"),
644
+ toolUseMsg("tu_1", "read_file"),
645
+ toolResultMsg("tu_1", longToolResult),
646
+ msg("user", "Next"),
647
+ ];
648
+ const build = buildArgs(history);
649
+ const overBudget: OverflowReduceArgs = {
650
+ ...build.args,
651
+ estimatePostInjection: () => build.args.preflightBudget + 1_000_000,
652
+ };
653
+
654
+ await defaultOverflowReduceMiddleware(
655
+ overBudget,
656
+ async () => {
657
+ throw new Error("next should not be invoked");
658
+ },
659
+ makeTurnContext(),
660
+ );
661
+
662
+ // At least one compaction attempt happened.
663
+ expect(build.reinjectCalls.length).toBeGreaterThanOrEqual(1);
664
+ // The first iteration that compacted set accumulatedCompacted=true,
665
+ // and every subsequent call continues to see it true — even when
666
+ // that iteration's own step did NOT compact.
667
+ const firstCompactedAt = build.reinjectCalls.findIndex(
668
+ (c) => c.stepCompacted,
669
+ );
670
+ expect(firstCompactedAt).toBeGreaterThanOrEqual(0);
671
+ for (let i = firstCompactedAt; i < build.reinjectCalls.length; i++) {
672
+ expect(build.reinjectCalls[i]!.accumulatedCompacted).toBe(true);
673
+ }
674
+ });
675
+ });
676
+ });