@vellumai/assistant 0.6.5 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (443) hide show
  1. package/AGENTS.md +9 -1
  2. package/ARCHITECTURE.md +15 -17
  3. package/Dockerfile +6 -4
  4. package/__tests__/permissions/gateway-threshold-reader.test.ts +283 -0
  5. package/docs/architecture/integrations.md +32 -39
  6. package/docs/architecture/memory.md +25 -30
  7. package/docs/architecture/security.md +7 -6
  8. package/docs/browser-use-architecture-phase2.md +63 -20
  9. package/docs/plugins.md +761 -0
  10. package/examples/plugins/echo/README.md +132 -0
  11. package/examples/plugins/echo/package.json +17 -0
  12. package/examples/plugins/echo/register.ts +187 -0
  13. package/node_modules/@vellumai/egress-proxy/src/types.ts +19 -0
  14. package/openapi.yaml +212 -68
  15. package/package.json +1 -1
  16. package/src/__tests__/app-compiler.test.ts +57 -0
  17. package/src/__tests__/approval-cascade.test.ts +7 -2
  18. package/src/__tests__/auto-analysis-end-to-end.test.ts +1 -0
  19. package/src/__tests__/avatar-generator.test.ts +4 -2
  20. package/src/__tests__/bundled-asset.test.ts +6 -6
  21. package/src/__tests__/catalog-cache.test.ts +69 -0
  22. package/src/__tests__/checker.test.ts +459 -171
  23. package/src/__tests__/circuit-breaker-pipeline.test.ts +406 -0
  24. package/src/__tests__/compaction-events.test.ts +501 -0
  25. package/src/__tests__/compaction-pipeline.test.ts +210 -0
  26. package/src/__tests__/compaction-strip-metadata-clear.test.ts +181 -0
  27. package/src/__tests__/compaction-timeout-recovery.test.ts +262 -0
  28. package/src/__tests__/config-model-image-provider.test.ts +110 -0
  29. package/src/__tests__/config-schema.test.ts +22 -9
  30. package/src/__tests__/config-watcher-cleanup-throttle.test.ts +0 -4
  31. package/src/__tests__/contacts-tools.test.ts +26 -0
  32. package/src/__tests__/context-overflow-policy.test.ts +7 -7
  33. package/src/__tests__/context-window-manager.test.ts +355 -4
  34. package/src/__tests__/conversation-abort-tool-results.test.ts +4 -1
  35. package/src/__tests__/conversation-agent-loop-overflow.test.ts +26 -30
  36. package/src/__tests__/conversation-agent-loop.test.ts +30 -141
  37. package/src/__tests__/conversation-confirmation-signals.test.ts +6 -1
  38. package/src/__tests__/conversation-history-web-search.test.ts +1 -0
  39. package/src/__tests__/conversation-init.benchmark.test.ts +2 -16
  40. package/src/__tests__/conversation-pairing.test.ts +174 -10
  41. package/src/__tests__/conversation-pre-run-repair.test.ts +4 -1
  42. package/src/__tests__/conversation-process-callsite.test.ts +3 -0
  43. package/src/__tests__/conversation-provider-retry-repair.test.ts +16 -7
  44. package/src/__tests__/conversation-queue.test.ts +29 -14
  45. package/src/__tests__/conversation-routes-disk-view.test.ts +7 -6
  46. package/src/__tests__/conversation-runtime-assembly.test.ts +155 -110
  47. package/src/__tests__/conversation-runtime-workspace.test.ts +23 -38
  48. package/src/__tests__/conversation-seed-composer.test.ts +2 -2
  49. package/src/__tests__/conversation-slash-queue.test.ts +7 -2
  50. package/src/__tests__/conversation-slash-unknown.test.ts +25 -2
  51. package/src/__tests__/conversation-speed-override.test.ts +6 -1
  52. package/src/__tests__/conversation-title-service.test.ts +116 -0
  53. package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +41 -2
  54. package/src/__tests__/conversation-usage.test.ts +1 -1
  55. package/src/__tests__/conversation-workspace-cache-state.test.ts +4 -1
  56. package/src/__tests__/conversation-workspace-injection.test.ts +3 -0
  57. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +4 -1
  58. package/src/__tests__/credential-health-service.test.ts +78 -9
  59. package/src/__tests__/credential-security-invariants.test.ts +2 -2
  60. package/src/__tests__/db-schedule-syntax-migration.test.ts +1 -0
  61. package/src/__tests__/empty-response-pipeline.test.ts +305 -0
  62. package/src/__tests__/extension-id-sync-guard.test.ts +3 -3
  63. package/src/__tests__/first-greeting.test.ts +247 -5
  64. package/src/__tests__/headless-browser-mode.test.ts +57 -0
  65. package/src/__tests__/history-repair-pipeline.test.ts +399 -0
  66. package/src/__tests__/host-browser-e2e-cloud.test.ts +307 -0
  67. package/src/__tests__/host-browser-e2e-self-hosted.test.ts +3 -3
  68. package/src/__tests__/host-proxy-interface.test.ts +36 -2
  69. package/src/__tests__/image-credentials.test.ts +137 -0
  70. package/src/__tests__/image-service-dispatcher.test.ts +186 -0
  71. package/src/__tests__/injector-chain.test.ts +526 -0
  72. package/src/__tests__/intent-routing.test.ts +0 -26
  73. package/src/__tests__/llm-call-pipeline.test.ts +285 -0
  74. package/src/__tests__/llm-schema.test.ts +1 -1
  75. package/src/__tests__/media-generate-image.test.ts +119 -13
  76. package/src/__tests__/memory-retrieval-pipeline.test.ts +401 -0
  77. package/src/__tests__/memory-upsert-concurrency.test.ts +1 -0
  78. package/src/__tests__/migration-import-from-url.test.ts +5 -68
  79. package/src/__tests__/model-intents.test.ts +4 -2
  80. package/src/__tests__/notification-broadcaster.test.ts +3 -3
  81. package/src/__tests__/notification-decision-strategy.test.ts +0 -11
  82. package/src/__tests__/notification-schedule-notify-dedup.test.ts +108 -0
  83. package/src/__tests__/oauth-apps-routes.test.ts +1 -1
  84. package/src/__tests__/oauth-cli.test.ts +14 -12
  85. package/src/__tests__/oauth-connect-orchestrator.test.ts +4 -13
  86. package/src/__tests__/oauth-provider-serializer.test.ts +6 -4
  87. package/src/__tests__/oauth-provider-visibility.test.ts +3 -5
  88. package/src/__tests__/oauth-providers-routes.test.ts +3 -2
  89. package/src/__tests__/oauth-store.test.ts +41 -76
  90. package/src/__tests__/onboarding-template-contract.test.ts +16 -64
  91. package/src/__tests__/openai-image-service.test.ts +368 -0
  92. package/src/__tests__/overflow-reduce-pipeline.test.ts +676 -0
  93. package/src/__tests__/permission-checker-host-gate.test.ts +0 -24
  94. package/src/__tests__/persist-onboarding-artifacts.test.ts +266 -0
  95. package/src/__tests__/persistence-pipeline.test.ts +377 -0
  96. package/src/__tests__/pipeline-runner.test.ts +565 -0
  97. package/src/__tests__/platform.test.ts +5 -2
  98. package/src/__tests__/plugin-bootstrap.test.ts +483 -0
  99. package/src/__tests__/plugin-registry.test.ts +273 -0
  100. package/src/__tests__/plugin-route-contribution.test.ts +288 -0
  101. package/src/__tests__/plugin-skill-contribution.test.ts +367 -0
  102. package/src/__tests__/plugin-tool-contribution.test.ts +286 -0
  103. package/src/__tests__/plugin-types.test.ts +320 -0
  104. package/src/__tests__/pricing.test.ts +44 -12
  105. package/src/__tests__/proxy-approval-callback.test.ts +69 -8
  106. package/src/__tests__/reaction-persistence.test.ts +1 -0
  107. package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +1 -0
  108. package/src/__tests__/registry.test.ts +0 -2
  109. package/src/__tests__/schedule-routes.test.ts +131 -1
  110. package/src/__tests__/scheduler-recurrence.test.ts +14 -70
  111. package/src/__tests__/scheduler-reuse-conversation.test.ts +10 -50
  112. package/src/__tests__/secret-detection-handler.test.ts +0 -10
  113. package/src/__tests__/shell-identity.test.ts +0 -134
  114. package/src/__tests__/suggestion-routes.test.ts +103 -4
  115. package/src/__tests__/task-memory-cleanup.test.ts +1 -0
  116. package/src/__tests__/task-scheduler.test.ts +3 -15
  117. package/src/__tests__/test-preload.ts +11 -0
  118. package/src/__tests__/title-generate-pipeline.test.ts +224 -0
  119. package/src/__tests__/token-estimate-pipeline.test.ts +431 -0
  120. package/src/__tests__/tool-error-pipeline.test.ts +244 -0
  121. package/src/__tests__/tool-execute-pipeline.test.ts +431 -0
  122. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -6
  123. package/src/__tests__/tool-executor-shell-integration.test.ts +7 -10
  124. package/src/__tests__/tool-executor.test.ts +141 -0
  125. package/src/__tests__/tool-result-truncate-pipeline.test.ts +356 -0
  126. package/src/__tests__/tool-result-truncation.test.ts +0 -110
  127. package/src/__tests__/user-plugin-loader.test.ts +191 -0
  128. package/src/__tests__/workspace-migration-046-seed-conversation-starters-callsite.test.ts +185 -0
  129. package/src/__tests__/workspace-migration-049-release-notes-default-sonnet.test.ts +100 -0
  130. package/src/__tests__/workspace-migration-050-seed-main-agent-opus-callsite.test.ts +171 -0
  131. package/src/__tests__/workspace-migration-051-seed-conversation-summarization-callsite.test.ts +252 -0
  132. package/src/__tests__/workspace-migration-remove-hooks.test.ts +99 -0
  133. package/src/__tests__/workspace-policy.test.ts +21 -3
  134. package/src/agent/loop.ts +340 -102
  135. package/src/approvals/__tests__/guardian-feed-event.test.ts +304 -0
  136. package/src/approvals/guardian-request-resolvers.ts +80 -0
  137. package/src/backup/__tests__/backup-worker.test.ts +2 -13
  138. package/src/backup/backup-worker.ts +3 -15
  139. package/src/bundler/app-compiler.ts +84 -1
  140. package/src/calls/call-state.ts +2 -2
  141. package/src/channels/__tests__/types.test.ts +3 -3
  142. package/src/channels/types.ts +6 -4
  143. package/src/cli/__tests__/notifications.test.ts +87 -211
  144. package/src/cli/commands/__tests__/backup.test.ts +1 -1
  145. package/src/cli/commands/__tests__/image-generation.test.ts +255 -35
  146. package/src/cli/commands/__tests__/inference-send.test.ts +12 -0
  147. package/src/cli/commands/__tests__/tts-synthesize.test.ts +12 -0
  148. package/src/cli/commands/backup.ts +2 -2
  149. package/src/cli/commands/clients.ts +138 -0
  150. package/src/cli/commands/completions.ts +2 -9
  151. package/src/cli/commands/conversations.ts +55 -7
  152. package/src/cli/commands/image-generation.ts +33 -34
  153. package/src/cli/commands/notifications.ts +68 -103
  154. package/src/cli/commands/oauth/__tests__/providers-register.test.ts +1 -1
  155. package/src/cli/commands/oauth/__tests__/providers-update.test.ts +1 -1
  156. package/src/cli/commands/oauth/connect.ts +2 -2
  157. package/src/cli/commands/oauth/providers.ts +176 -8
  158. package/src/cli/commands/oauth/status.ts +46 -36
  159. package/src/cli/commands/skills.ts +3 -4
  160. package/src/cli/program.ts +25 -29
  161. package/src/config/__tests__/backup-schema.test.ts +7 -2
  162. package/src/config/bundled-skills/app-builder/SKILL.md +2 -2
  163. package/src/config/bundled-skills/app-builder/references/WIDGETS.md +10 -10
  164. package/src/config/bundled-skills/contacts/tools/contact-merge.ts +66 -87
  165. package/src/config/bundled-skills/contacts/tools/contact-search.ts +28 -51
  166. package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +22 -40
  167. package/src/config/bundled-skills/image-studio/SKILL.md +2 -1
  168. package/src/config/bundled-skills/image-studio/TOOLS.json +2 -1
  169. package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +23 -39
  170. package/src/config/bundled-skills/messaging/SKILL.md +3 -3
  171. package/src/config/bundled-skills/messaging/tools/__tests__/messaging-feed-events.test.ts +207 -0
  172. package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +12 -0
  173. package/src/config/bundled-skills/messaging/tools/messaging-send.ts +58 -0
  174. package/src/config/bundled-skills/schedule/SKILL.md +8 -3
  175. package/src/config/bundled-skills/schedule/TOOLS.json +15 -7
  176. package/src/config/bundled-skills/schedule/references/SCRIPT_MODE_PATTERNS.md +59 -0
  177. package/src/config/bundled-tool-registry.ts +0 -15
  178. package/src/config/feature-flag-registry.json +17 -1
  179. package/src/config/schema.ts +19 -0
  180. package/src/config/schemas/backup.ts +1 -1
  181. package/src/config/schemas/conversations.ts +16 -0
  182. package/src/config/schemas/llm.ts +2 -3
  183. package/src/config/schemas/security.ts +6 -6
  184. package/src/config/schemas/tts.ts +11 -0
  185. package/src/config/skill-state.ts +6 -2
  186. package/src/config/skills.ts +94 -5
  187. package/src/context/__tests__/compact-prompt.test.ts +27 -9
  188. package/src/context/prompts/compact.md +26 -12
  189. package/src/context/tool-result-truncation.ts +3 -63
  190. package/src/context/window-manager.ts +190 -16
  191. package/src/credential-health/credential-health-service.ts +19 -6
  192. package/src/daemon/__tests__/conversation-feed-event.test.ts +317 -0
  193. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +4 -12
  194. package/src/daemon/__tests__/conversation-tool-setup.test.ts +14 -15
  195. package/src/daemon/config-watcher.ts +0 -2
  196. package/src/daemon/context-overflow-policy.ts +4 -13
  197. package/src/daemon/conversation-agent-loop-handlers.ts +83 -22
  198. package/src/daemon/conversation-agent-loop.ts +984 -683
  199. package/src/daemon/conversation-history.ts +10 -19
  200. package/src/daemon/conversation-lifecycle.ts +37 -19
  201. package/src/daemon/conversation-notifiers.ts +2 -110
  202. package/src/daemon/conversation-process.ts +14 -7
  203. package/src/daemon/conversation-runtime-assembly.ts +532 -411
  204. package/src/daemon/conversation-tool-setup.ts +41 -4
  205. package/src/daemon/conversation.ts +80 -35
  206. package/src/daemon/external-plugins-bootstrap.ts +478 -0
  207. package/src/daemon/first-greeting.ts +191 -14
  208. package/src/daemon/handlers/config-model.ts +11 -0
  209. package/src/daemon/handlers/skills.ts +5 -1
  210. package/src/daemon/lifecycle.ts +33 -68
  211. package/src/daemon/message-types/computer-use.ts +2 -34
  212. package/src/daemon/message-types/conversations.ts +49 -0
  213. package/src/daemon/message-types/messages.ts +12 -0
  214. package/src/daemon/server.ts +5 -3
  215. package/src/daemon/shutdown-handlers.ts +2 -12
  216. package/src/daemon/tool-side-effects.ts +14 -56
  217. package/src/heartbeat/__tests__/heartbeat-feed-event.test.ts +160 -0
  218. package/src/heartbeat/heartbeat-service.ts +24 -1
  219. package/src/home/__tests__/feed-population-integration.test.ts +312 -0
  220. package/src/home/emit-feed-event.ts +7 -0
  221. package/src/home/feed-types.ts +41 -2
  222. package/src/home/rewrite-command-preview.ts +66 -0
  223. package/src/ipc/__tests__/socket-path.test.ts +11 -50
  224. package/src/ipc/cli-client.ts +1 -1
  225. package/src/ipc/cli-server.ts +3 -3
  226. package/src/ipc/gateway-client.ts +4 -1
  227. package/src/ipc/routes/browser-context.ts +2 -0
  228. package/src/ipc/routes/browser.ts +1 -0
  229. package/src/ipc/routes/get-contact.ts +16 -0
  230. package/src/ipc/routes/index.ts +14 -0
  231. package/src/ipc/routes/list-clients.ts +31 -0
  232. package/src/ipc/routes/merge-contacts.ts +17 -0
  233. package/src/ipc/routes/notification.ts +133 -0
  234. package/src/ipc/routes/rename-conversation.ts +59 -0
  235. package/src/ipc/routes/search-contacts.ts +19 -0
  236. package/src/ipc/routes/upsert-contact.ts +25 -0
  237. package/src/ipc/socket-path.ts +14 -38
  238. package/src/media/app-icon-generator.ts +23 -46
  239. package/src/media/avatar-router.ts +26 -41
  240. package/src/media/gemini-image-service.ts +8 -41
  241. package/src/media/image-credentials.ts +73 -0
  242. package/src/media/image-service.ts +85 -0
  243. package/src/media/openai-image-service.ts +131 -0
  244. package/src/media/types.ts +46 -0
  245. package/src/memory/conversation-crud.ts +48 -18
  246. package/src/memory/conversation-queries.ts +57 -4
  247. package/src/memory/conversation-title-service.ts +25 -0
  248. package/src/memory/db-init.ts +8 -0
  249. package/src/memory/embedding-gemini.test.ts +41 -2
  250. package/src/memory/embedding-gemini.ts +6 -1
  251. package/src/memory/graph/bootstrap.test.ts +282 -0
  252. package/src/memory/graph/bootstrap.ts +8 -5
  253. package/src/memory/graph/extraction.ts +10 -2
  254. package/src/memory/graph/graph-search.test.ts +1 -0
  255. package/src/memory/graph/inspect.ts +2 -2
  256. package/src/memory/graph/retriever.ts +10 -3
  257. package/src/memory/migrations/041-approval-prompt-ts-tracker.ts +26 -0
  258. package/src/memory/migrations/149-oauth-tables.ts +1 -0
  259. package/src/memory/migrations/223-schedule-script-column.ts +11 -0
  260. package/src/memory/migrations/224-oauth-providers-managed-service-is-paid.ts +24 -0
  261. package/src/memory/migrations/225-oauth-providers-available-scopes.ts +13 -0
  262. package/src/memory/migrations/index.ts +4 -0
  263. package/src/memory/pkb/pkb-index.test.ts +1 -0
  264. package/src/memory/pkb/pkb-reconcile.test.ts +1 -0
  265. package/src/memory/pkb/pkb-search.test.ts +65 -4
  266. package/src/memory/pkb/pkb-search.ts +40 -18
  267. package/src/memory/qdrant-client.test.ts +60 -0
  268. package/src/memory/qdrant-client.ts +25 -0
  269. package/src/memory/schema/infrastructure.ts +1 -0
  270. package/src/memory/schema/oauth.ts +4 -1
  271. package/src/messaging/providers/slack/render-transcript.test.ts +77 -29
  272. package/src/messaging/providers/slack/render-transcript.ts +58 -0
  273. package/src/notifications/conversation-pairing.ts +78 -19
  274. package/src/notifications/copy-composer.ts +0 -5
  275. package/src/notifications/emit-signal.ts +1 -1
  276. package/src/notifications/signal.ts +1 -2
  277. package/src/oauth/AGENTS.md +1 -1
  278. package/src/oauth/__tests__/identity-verifier.test.ts +2 -1
  279. package/src/oauth/connect-orchestrator.ts +8 -34
  280. package/src/oauth/connect-types.ts +6 -10
  281. package/src/oauth/manual-token-connection.ts +23 -0
  282. package/src/oauth/oauth-store.ts +30 -14
  283. package/src/oauth/provider-serializer.ts +6 -1
  284. package/src/oauth/seed-providers.ts +56 -108
  285. package/src/outbound-proxy/http-forwarder.ts +9 -0
  286. package/src/permissions/approval-policy.test.ts +293 -18
  287. package/src/permissions/approval-policy.ts +110 -58
  288. package/src/permissions/arg-parser.test.ts +161 -0
  289. package/src/permissions/arg-parser.ts +141 -0
  290. package/src/permissions/bash-risk-classifier.test.ts +414 -2
  291. package/src/permissions/bash-risk-classifier.ts +303 -60
  292. package/src/permissions/checker.ts +157 -29
  293. package/src/permissions/command-registry.test.ts +239 -0
  294. package/src/permissions/command-registry.ts +234 -54
  295. package/src/permissions/defaults.ts +5 -4
  296. package/src/permissions/gateway-threshold-reader.ts +196 -0
  297. package/src/permissions/prompter.ts +4 -0
  298. package/src/permissions/risk-types.ts +61 -4
  299. package/src/permissions/schedule-risk-classifier.test.ts +129 -0
  300. package/src/permissions/schedule-risk-classifier.ts +85 -0
  301. package/src/permissions/shell-identity.ts +2 -42
  302. package/src/permissions/types.ts +2 -0
  303. package/src/permissions/workspace-policy.ts +8 -3
  304. package/src/plugins/defaults/circuit-breaker.ts +146 -0
  305. package/src/plugins/defaults/compaction.ts +145 -0
  306. package/src/plugins/defaults/empty-response.ts +126 -0
  307. package/src/plugins/defaults/history-repair.ts +85 -0
  308. package/src/plugins/defaults/index.ts +116 -0
  309. package/src/plugins/defaults/injectors.ts +491 -0
  310. package/src/plugins/defaults/llm-call.ts +82 -0
  311. package/src/plugins/defaults/memory-retrieval.ts +226 -0
  312. package/src/plugins/defaults/overflow-reduce.ts +181 -0
  313. package/src/plugins/defaults/persistence.ts +129 -0
  314. package/src/plugins/defaults/title-generate.ts +95 -0
  315. package/src/plugins/defaults/token-estimate.ts +104 -0
  316. package/src/plugins/defaults/tool-error.ts +126 -0
  317. package/src/plugins/defaults/tool-execute.ts +89 -0
  318. package/src/plugins/defaults/tool-result-truncate.ts +88 -0
  319. package/src/plugins/pipeline.ts +316 -0
  320. package/src/plugins/plugin-skill-contributions.ts +292 -0
  321. package/src/plugins/registry.ts +241 -0
  322. package/src/plugins/types.ts +1134 -0
  323. package/src/plugins/user-loader.ts +177 -0
  324. package/src/prompts/templates/BOOTSTRAP.md +27 -77
  325. package/src/providers/model-catalog.ts +52 -29
  326. package/src/providers/model-intents.ts +1 -1
  327. package/src/providers/openrouter/client.ts +5 -1
  328. package/src/providers/speech-to-text/deepgram-realtime.test.ts +61 -0
  329. package/src/providers/speech-to-text/deepgram-realtime.ts +57 -0
  330. package/src/providers/speech-to-text/xai-realtime.test.ts +72 -4
  331. package/src/providers/speech-to-text/xai-realtime.ts +39 -14
  332. package/src/runtime/AGENTS.md +25 -16
  333. package/src/runtime/__tests__/browser-extension-pair-routes.test.ts +3 -3
  334. package/src/runtime/__tests__/client-registry.test.ts +293 -0
  335. package/src/runtime/client-registry.ts +261 -0
  336. package/src/runtime/http-server.ts +77 -8
  337. package/src/runtime/http-types.ts +0 -2
  338. package/src/runtime/migrations/vbundle-builder.ts +1 -22
  339. package/src/runtime/routes/approval-prompt-ts-tracker.ts +51 -31
  340. package/src/runtime/routes/approval-routes.ts +17 -0
  341. package/src/runtime/routes/browser-extension-pair-routes.ts +27 -8
  342. package/src/runtime/routes/conversation-routes.ts +223 -116
  343. package/src/runtime/routes/inbound-message-handler.ts +88 -13
  344. package/src/runtime/routes/memory-item-routes.test.ts +1 -0
  345. package/src/runtime/routes/migration-routes.ts +0 -3
  346. package/src/runtime/routes/playground/__tests__/force-compact.test.ts +284 -0
  347. package/src/runtime/routes/playground/__tests__/guard.test.ts +80 -0
  348. package/src/runtime/routes/playground/__tests__/inject-failures.test.ts +294 -0
  349. package/src/runtime/routes/playground/__tests__/reset-circuit.test.ts +271 -0
  350. package/src/runtime/routes/playground/__tests__/seed-conversation.test.ts +202 -0
  351. package/src/runtime/routes/playground/__tests__/seeded-conversations.test.ts +309 -0
  352. package/src/runtime/routes/playground/__tests__/state.test.ts +224 -0
  353. package/src/runtime/routes/playground/conversation-not-found.ts +29 -0
  354. package/src/runtime/routes/playground/deps.ts +56 -0
  355. package/src/runtime/routes/playground/force-compact.ts +73 -0
  356. package/src/runtime/routes/playground/guard.ts +37 -0
  357. package/src/runtime/routes/playground/index.ts +28 -0
  358. package/src/runtime/routes/playground/inject-failures.ts +159 -0
  359. package/src/runtime/routes/playground/reset-circuit.ts +115 -0
  360. package/src/runtime/routes/playground/seed-conversation.ts +139 -0
  361. package/src/runtime/routes/playground/seeded-conversations.ts +78 -0
  362. package/src/runtime/routes/playground/state.ts +78 -0
  363. package/src/runtime/routes/schedule-routes.ts +89 -8
  364. package/src/runtime/skill-route-registry.ts +75 -15
  365. package/src/schedule/run-script.ts +68 -0
  366. package/src/schedule/schedule-store.ts +7 -1
  367. package/src/schedule/scheduler.ts +48 -8
  368. package/src/skills/catalog-cache.ts +12 -5
  369. package/src/tools/browser/__tests__/browser-status.test.ts +189 -0
  370. package/src/tools/browser/browser-execution.ts +88 -19
  371. package/src/tools/browser/cdp-client/__tests__/extension-cdp-client.test.ts +230 -0
  372. package/src/tools/browser/cdp-client/__tests__/factory.test.ts +146 -3
  373. package/src/tools/browser/cdp-client/extension-cdp-client.ts +54 -3
  374. package/src/tools/browser/cdp-client/factory.ts +15 -4
  375. package/src/tools/executor.ts +126 -74
  376. package/src/tools/network/script-proxy/session-manager.ts +37 -1
  377. package/src/tools/permission-checker.ts +98 -49
  378. package/src/tools/policy-context.ts +4 -0
  379. package/src/tools/registry.ts +140 -3
  380. package/src/tools/schedule/create.ts +23 -8
  381. package/src/tools/schedule/update.ts +3 -1
  382. package/src/tools/secret-detection-handler.ts +0 -51
  383. package/src/tools/system/avatar-generator.ts +6 -2
  384. package/src/tools/types.ts +28 -2
  385. package/src/util/platform.ts +7 -2
  386. package/src/util/pricing.ts +26 -3
  387. package/src/workspace/migrations/006-services-config.ts +2 -4
  388. package/src/workspace/migrations/022-move-hooks-to-workspace.ts +2 -3
  389. package/src/workspace/migrations/041-backfill-google-gmail-settings-scope.ts +3 -4
  390. package/src/workspace/migrations/046-seed-conversation-starters-callsite.ts +108 -0
  391. package/src/workspace/migrations/047-remove-watch-callsites.ts +54 -0
  392. package/src/workspace/migrations/048-remove-workspace-hooks.ts +81 -0
  393. package/src/workspace/migrations/049-release-notes-default-sonnet.ts +80 -0
  394. package/src/workspace/migrations/050-seed-main-agent-opus-callsite.ts +86 -0
  395. package/src/workspace/migrations/051-seed-conversation-summarization-callsite.ts +128 -0
  396. package/src/workspace/migrations/registry.ts +12 -0
  397. package/tsconfig.json +1 -1
  398. package/hook-templates/debug-prompt-logger/hook.json +0 -7
  399. package/hook-templates/debug-prompt-logger/run.sh +0 -66
  400. package/src/__tests__/compaction-circuit-breaker.test.ts +0 -336
  401. package/src/__tests__/context-overflow-approval.test.ts +0 -156
  402. package/src/__tests__/hooks-blocking.test.ts +0 -178
  403. package/src/__tests__/hooks-cli.test.ts +0 -182
  404. package/src/__tests__/hooks-config.test.ts +0 -108
  405. package/src/__tests__/hooks-discovery.test.ts +0 -211
  406. package/src/__tests__/hooks-integration.test.ts +0 -196
  407. package/src/__tests__/hooks-manager.test.ts +0 -226
  408. package/src/__tests__/hooks-runner.test.ts +0 -175
  409. package/src/__tests__/hooks-settings.test.ts +0 -160
  410. package/src/__tests__/hooks-templates.test.ts +0 -169
  411. package/src/__tests__/hooks-ts-runner.test.ts +0 -170
  412. package/src/__tests__/hooks-watch.test.ts +0 -112
  413. package/src/__tests__/notification-schedule-dedup.test.ts +0 -213
  414. package/src/__tests__/oauth-scope-policy.test.ts +0 -180
  415. package/src/__tests__/send-notification-tool.test.ts +0 -83
  416. package/src/cli/commands/shotgun.ts +0 -266
  417. package/src/config/bundled-skills/conversations/SKILL.md +0 -20
  418. package/src/config/bundled-skills/conversations/TOOLS.json +0 -23
  419. package/src/config/bundled-skills/conversations/tools/rename-conversation.ts +0 -88
  420. package/src/config/bundled-skills/heartbeat/SKILL.md +0 -43
  421. package/src/config/bundled-skills/notifications/SKILL.md +0 -40
  422. package/src/config/bundled-skills/notifications/TOOLS.json +0 -80
  423. package/src/config/bundled-skills/notifications/tools/send-notification.ts +0 -152
  424. package/src/config/bundled-skills/notifications/tools/shared.ts +0 -13
  425. package/src/config/bundled-skills/screen-watch/SKILL.md +0 -27
  426. package/src/config/bundled-skills/screen-watch/TOOLS.json +0 -35
  427. package/src/config/bundled-skills/screen-watch/tools/start-screen-watch.ts +0 -12
  428. package/src/config/bundled-skills/skills-catalog/SKILL.md +0 -84
  429. package/src/daemon/context-overflow-approval.ts +0 -52
  430. package/src/daemon/watch-handler.ts +0 -399
  431. package/src/hooks/cli.ts +0 -253
  432. package/src/hooks/config.ts +0 -100
  433. package/src/hooks/discovery.ts +0 -135
  434. package/src/hooks/manager.ts +0 -179
  435. package/src/hooks/runner.ts +0 -117
  436. package/src/hooks/templates.ts +0 -77
  437. package/src/hooks/types.ts +0 -75
  438. package/src/oauth/scope-policy.ts +0 -89
  439. package/src/runtime/gateway-internal-client.ts +0 -94
  440. package/src/runtime/routes/watch-routes.ts +0 -156
  441. package/src/signals/shotgun.ts +0 -203
  442. package/src/tools/watch/screen-watch.ts +0 -144
  443. package/src/tools/watch/watch-state.ts +0 -142
@@ -0,0 +1,431 @@
1
+ /**
2
+ * Tests for the `tokenEstimate` plugin pipeline (PR 22 of the
3
+ * agent-plugin-system plan).
4
+ *
5
+ * Covers:
6
+ * - The default plugin's terminal middleware matches
7
+ * {@link estimatePromptTokensRaw} output exactly across a set of golden
8
+ * inputs (empty history, text-only, tools, provider-specific image sizing).
9
+ * - Running the pipeline end-to-end with the default registered produces
10
+ * the same numeric result as calling `estimatePromptTokensRaw` directly.
11
+ * - A custom plugin that short-circuits the chain can override the default,
12
+ * proving the extension point works.
13
+ *
14
+ * These tests exercise the registry + runner directly. They do not touch
15
+ * `bootstrapPlugins` — the default registration path is covered by the
16
+ * bootstrap suite.
17
+ */
18
+
19
+ import { afterEach, beforeEach, describe, expect, test } from "bun:test";
20
+
21
+ import {
22
+ estimatePromptTokensRaw,
23
+ estimateToolsTokens,
24
+ } from "../context/token-estimator.js";
25
+ import type { TrustContext } from "../daemon/conversation-runtime-assembly.js";
26
+ import {
27
+ defaultTokenEstimatePlugin,
28
+ defaultTokenEstimateTerminal,
29
+ } from "../plugins/defaults/token-estimate.js";
30
+ import { DEFAULT_TIMEOUTS, runPipeline } from "../plugins/pipeline.js";
31
+ import {
32
+ getMiddlewaresFor,
33
+ registerPlugin,
34
+ resetPluginRegistryForTests,
35
+ } from "../plugins/registry.js";
36
+ import type {
37
+ EstimateArgs,
38
+ EstimateResult,
39
+ Middleware,
40
+ Plugin,
41
+ TurnContext,
42
+ } from "../plugins/types.js";
43
+ import type { Message, ToolDefinition } from "../providers/types.js";
44
+
45
+ // ── Fixtures ─────────────────────────────────────────────────────────────
46
+
47
+ const trust: TrustContext = {
48
+ sourceChannel: "vellum",
49
+ trustClass: "guardian",
50
+ };
51
+
52
+ function makeCtx(overrides: Partial<TurnContext> = {}): TurnContext {
53
+ return {
54
+ requestId: "req-token-estimate-test",
55
+ conversationId: "conv-token-estimate-test",
56
+ turnIndex: 0,
57
+ trust,
58
+ ...overrides,
59
+ };
60
+ }
61
+
62
+ const EMPTY_HISTORY: Message[] = [];
63
+
64
+ const TEXT_HISTORY: Message[] = [
65
+ { role: "user", content: [{ type: "text", text: "hello there" }] },
66
+ {
67
+ role: "assistant",
68
+ content: [
69
+ { type: "text", text: "hi! how can I help you today?" },
70
+ { type: "text", text: "a second text block for good measure" },
71
+ ],
72
+ },
73
+ ];
74
+
75
+ const TOOL_USE_HISTORY: Message[] = [
76
+ { role: "user", content: [{ type: "text", text: "what's in the log?" }] },
77
+ {
78
+ role: "assistant",
79
+ content: [
80
+ {
81
+ type: "tool_use",
82
+ id: "tu-1",
83
+ name: "bash",
84
+ input: { command: "tail -n 5 server.log" },
85
+ },
86
+ ],
87
+ },
88
+ {
89
+ role: "user",
90
+ content: [
91
+ {
92
+ type: "tool_result",
93
+ tool_use_id: "tu-1",
94
+ content: "line1\nline2\nline3",
95
+ },
96
+ ],
97
+ },
98
+ ];
99
+
100
+ const SYSTEM_PROMPT = "You are a helpful assistant with a long preamble.";
101
+
102
+ const SAMPLE_TOOLS: ToolDefinition[] = [
103
+ {
104
+ name: "bash",
105
+ description: "Execute a shell command and return its output.",
106
+ input_schema: {
107
+ type: "object",
108
+ properties: { command: { type: "string" } },
109
+ required: ["command"],
110
+ },
111
+ },
112
+ {
113
+ name: "file_read",
114
+ description: "Read a file from the workspace.",
115
+ input_schema: {
116
+ type: "object",
117
+ properties: { path: { type: "string" } },
118
+ required: ["path"],
119
+ },
120
+ },
121
+ ];
122
+
123
+ // ── Helpers ──────────────────────────────────────────────────────────────
124
+
125
+ function registerDefault(): void {
126
+ registerPlugin(defaultTokenEstimatePlugin);
127
+ }
128
+
129
+ function rawEstimate(
130
+ args: Pick<EstimateArgs, "history" | "systemPrompt" | "providerName"> & {
131
+ tools: ToolDefinition[];
132
+ },
133
+ ): number {
134
+ const toolTokenBudget =
135
+ args.tools.length > 0 ? estimateToolsTokens(args.tools) : 0;
136
+ return estimatePromptTokensRaw(args.history, args.systemPrompt, {
137
+ providerName: args.providerName,
138
+ toolTokenBudget,
139
+ });
140
+ }
141
+
142
+ async function runViaPipeline(args: EstimateArgs): Promise<EstimateResult> {
143
+ return runPipeline<EstimateArgs, EstimateResult>(
144
+ "tokenEstimate",
145
+ getMiddlewaresFor("tokenEstimate"),
146
+ // Mirror the production wiring in `daemon/conversation-agent-loop.ts`:
147
+ // the default plugin's middleware is a passthrough, so the terminal is
148
+ // wired in by the call site. Using the same terminal here means the
149
+ // tests exercise the exact composition shape that ships.
150
+ defaultTokenEstimateTerminal,
151
+ args,
152
+ makeCtx(),
153
+ DEFAULT_TIMEOUTS.tokenEstimate,
154
+ );
155
+ }
156
+
157
+ // ── Tests ────────────────────────────────────────────────────────────────
158
+
159
+ beforeEach(() => {
160
+ resetPluginRegistryForTests();
161
+ });
162
+
163
+ afterEach(() => {
164
+ resetPluginRegistryForTests();
165
+ });
166
+
167
+ describe("tokenEstimate pipeline — default plugin parity", () => {
168
+ test("default matches estimatePromptTokensRaw on empty history", async () => {
169
+ registerDefault();
170
+ const args: EstimateArgs = {
171
+ history: EMPTY_HISTORY,
172
+ systemPrompt: undefined,
173
+ tools: [],
174
+ providerName: undefined,
175
+ };
176
+ const pipelineResult = await runViaPipeline(args);
177
+ expect(pipelineResult).toBe(rawEstimate(args));
178
+ });
179
+
180
+ test("default matches estimatePromptTokensRaw on text-only history", async () => {
181
+ registerDefault();
182
+ const args: EstimateArgs = {
183
+ history: TEXT_HISTORY,
184
+ systemPrompt: SYSTEM_PROMPT,
185
+ tools: [],
186
+ providerName: "anthropic",
187
+ };
188
+ const pipelineResult = await runViaPipeline(args);
189
+ expect(pipelineResult).toBe(rawEstimate(args));
190
+ // Sanity: the system prompt adds real token cost, so the number is
191
+ // strictly larger than the bare-history estimate.
192
+ expect(pipelineResult).toBeGreaterThan(
193
+ rawEstimate({
194
+ history: TEXT_HISTORY,
195
+ systemPrompt: undefined,
196
+ tools: [],
197
+ providerName: "anthropic",
198
+ }),
199
+ );
200
+ });
201
+
202
+ test("default matches estimatePromptTokensRaw with tool_use/tool_result blocks", async () => {
203
+ registerDefault();
204
+ const args: EstimateArgs = {
205
+ history: TOOL_USE_HISTORY,
206
+ systemPrompt: SYSTEM_PROMPT,
207
+ tools: SAMPLE_TOOLS,
208
+ providerName: "anthropic",
209
+ };
210
+ const pipelineResult = await runViaPipeline(args);
211
+ expect(pipelineResult).toBe(rawEstimate(args));
212
+ });
213
+
214
+ test("default folds tool definition tokens into the result", async () => {
215
+ registerDefault();
216
+ const baseArgs: EstimateArgs = {
217
+ history: TEXT_HISTORY,
218
+ systemPrompt: SYSTEM_PROMPT,
219
+ tools: [],
220
+ providerName: "anthropic",
221
+ };
222
+ const withoutTools = await runViaPipeline(baseArgs);
223
+ const withTools = await runViaPipeline({
224
+ ...baseArgs,
225
+ tools: SAMPLE_TOOLS,
226
+ });
227
+ // Tools contribute non-zero overhead; the pipeline result must grow.
228
+ const toolBudget = estimateToolsTokens(SAMPLE_TOOLS);
229
+ expect(toolBudget).toBeGreaterThan(0);
230
+ expect(withTools - withoutTools).toBe(toolBudget);
231
+ });
232
+
233
+ test("provider-specific image sizing flows through the default", async () => {
234
+ registerDefault();
235
+ // Two providers see different image token costs for the same content —
236
+ // the raw estimator is the source of truth, so the pipeline must agree
237
+ // under both provider names.
238
+ const imageHistory: Message[] = [
239
+ {
240
+ role: "user",
241
+ content: [
242
+ {
243
+ type: "image",
244
+ source: {
245
+ type: "base64",
246
+ media_type: "image/png",
247
+ // Small fake PNG-ish payload; the estimator's fallback path
248
+ // kicks in when parseImageDimensions fails, which is fine —
249
+ // the two providers still diverge on overhead.
250
+ data: "a".repeat(128),
251
+ },
252
+ },
253
+ ],
254
+ },
255
+ ];
256
+ const anthropicArgs: EstimateArgs = {
257
+ history: imageHistory,
258
+ systemPrompt: undefined,
259
+ tools: [],
260
+ providerName: "anthropic",
261
+ };
262
+ const openaiArgs: EstimateArgs = {
263
+ ...anthropicArgs,
264
+ providerName: "openai",
265
+ };
266
+ const anthropicResult = await runViaPipeline(anthropicArgs);
267
+ const openaiResult = await runViaPipeline(openaiArgs);
268
+ expect(anthropicResult).toBe(rawEstimate(anthropicArgs));
269
+ expect(openaiResult).toBe(rawEstimate(openaiArgs));
270
+ });
271
+ });
272
+
273
+ describe("tokenEstimate pipeline — custom override", () => {
274
+ test("custom plugin short-circuit returns a different value than the default", async () => {
275
+ // A plugin that completely replaces the default with a fixed value,
276
+ // proving plugins can substitute provider-native tokenizers (e.g.
277
+ // `countTokens`) without touching orchestrator code.
278
+ const FIXED = 424242;
279
+ const override: Middleware<EstimateArgs, EstimateResult> = async (
280
+ _args,
281
+ _next,
282
+ _ctx,
283
+ ) => FIXED;
284
+ const customPlugin: Plugin = {
285
+ manifest: {
286
+ name: "custom-token-estimate",
287
+ version: "1.0.0",
288
+ requires: { pluginRuntime: "v1", tokenEstimateApi: "v1" },
289
+ },
290
+ middleware: { tokenEstimate: override },
291
+ };
292
+
293
+ // Register the custom plugin FIRST so it sits outermost and short-
294
+ // circuits before the default's terminal runs.
295
+ registerPlugin(customPlugin);
296
+ registerDefault();
297
+
298
+ const args: EstimateArgs = {
299
+ history: TEXT_HISTORY,
300
+ systemPrompt: SYSTEM_PROMPT,
301
+ tools: SAMPLE_TOOLS,
302
+ providerName: "anthropic",
303
+ };
304
+ const pipelineResult = await runViaPipeline(args);
305
+ expect(pipelineResult).toBe(FIXED);
306
+ // And for contrast: the default alone would have given the raw value.
307
+ expect(pipelineResult).not.toBe(rawEstimate(args));
308
+ });
309
+
310
+ test("wrapper middleware that scales the downstream result composes with the default", async () => {
311
+ // A plugin that wraps the downstream estimate, doubling it. This
312
+ // exercises the onion composition: outer middleware sees the raw
313
+ // default result and returns its own modification.
314
+ const doubler: Middleware<EstimateArgs, EstimateResult> = async (
315
+ args,
316
+ next,
317
+ _ctx,
318
+ ) => {
319
+ const inner = await next(args);
320
+ return inner * 2;
321
+ };
322
+ const wrapperPlugin: Plugin = {
323
+ manifest: {
324
+ name: "doubling-token-estimate",
325
+ version: "1.0.0",
326
+ requires: { pluginRuntime: "v1", tokenEstimateApi: "v1" },
327
+ },
328
+ middleware: { tokenEstimate: doubler },
329
+ };
330
+
331
+ registerPlugin(wrapperPlugin);
332
+ registerDefault();
333
+
334
+ const args: EstimateArgs = {
335
+ history: TEXT_HISTORY,
336
+ systemPrompt: SYSTEM_PROMPT,
337
+ tools: SAMPLE_TOOLS,
338
+ providerName: "anthropic",
339
+ };
340
+ const pipelineResult = await runViaPipeline(args);
341
+ expect(pipelineResult).toBe(rawEstimate(args) * 2);
342
+ });
343
+ });
344
+
345
+ describe("tokenEstimate pipeline — default does not shadow late plugins", () => {
346
+ test("user middleware registered AFTER the default still runs", async () => {
347
+ // Regression test for the default-first shadowing hazard: defaults are
348
+ // registered before user plugins in `bootstrapPlugins()`, putting the
349
+ // default at the OUTERMOST onion position. If the default middleware
350
+ // runs the estimate directly instead of calling `next(args)`, any user
351
+ // plugin loaded afterward is invisible. The default is a passthrough —
352
+ // this test fails loudly if that invariant ever regresses.
353
+ registerDefault();
354
+ const observed: EstimateArgs[] = [];
355
+ const observer: Middleware<EstimateArgs, EstimateResult> = async (
356
+ args,
357
+ next,
358
+ _ctx,
359
+ ) => {
360
+ observed.push(args);
361
+ // Return a sentinel so we can distinguish the observer's result from
362
+ // the default's output.
363
+ await next(args);
364
+ return 999_999;
365
+ };
366
+ const userPlugin: Plugin = {
367
+ manifest: {
368
+ name: "late-registered-observer",
369
+ version: "1.0.0",
370
+ requires: { pluginRuntime: "v1", tokenEstimateApi: "v1" },
371
+ },
372
+ middleware: { tokenEstimate: observer },
373
+ };
374
+ registerPlugin(userPlugin);
375
+
376
+ const args: EstimateArgs = {
377
+ history: TEXT_HISTORY,
378
+ systemPrompt: SYSTEM_PROMPT,
379
+ tools: [],
380
+ providerName: "anthropic",
381
+ };
382
+ const result = await runViaPipeline(args);
383
+ expect(observed.length).toBe(1);
384
+ expect(result).toBe(999_999);
385
+ });
386
+ });
387
+
388
+ describe("tokenEstimate pipeline — args are immutable to middleware", () => {
389
+ test("frozen history/tools reject in-place mutation attempts", () => {
390
+ // The call site freezes shallow clones of `history` and `tools` before
391
+ // handing them to the pipeline. This mirrors the runtime protection
392
+ // that stops a misbehaving middleware from trimming `args.history` in
393
+ // place — which would silently drop prompt context from the
394
+ // orchestrator's live `runMessages` array before the provider call.
395
+ const frozenHistory = Object.freeze([...TEXT_HISTORY]);
396
+ const frozenTools = Object.freeze([...SAMPLE_TOOLS]);
397
+ expect(() => {
398
+ (frozenHistory as Message[]).pop();
399
+ }).toThrow(TypeError);
400
+ expect(() => {
401
+ (frozenTools as ToolDefinition[]).push({
402
+ name: "extra",
403
+ description: "",
404
+ input_schema: { type: "object", properties: {} },
405
+ });
406
+ }).toThrow(TypeError);
407
+ });
408
+ });
409
+
410
+ describe("tokenEstimate pipeline — empty registry fallback", () => {
411
+ test("without any plugin registered, the terminal receives the call", async () => {
412
+ // `runViaPipeline` uses a throwing terminal, so here we run the
413
+ // pipeline with an explicit terminal that returns a sentinel to prove
414
+ // that an empty middleware list falls through.
415
+ const SENTINEL = 12345;
416
+ const result = await runPipeline<EstimateArgs, EstimateResult>(
417
+ "tokenEstimate",
418
+ getMiddlewaresFor("tokenEstimate"),
419
+ async () => SENTINEL,
420
+ {
421
+ history: TEXT_HISTORY,
422
+ systemPrompt: SYSTEM_PROMPT,
423
+ tools: [],
424
+ providerName: "anthropic",
425
+ },
426
+ makeCtx(),
427
+ DEFAULT_TIMEOUTS.tokenEstimate,
428
+ );
429
+ expect(result).toBe(SENTINEL);
430
+ });
431
+ });
@@ -0,0 +1,244 @@
1
+ /**
2
+ * Tests for the `toolError` pipeline (PR 19).
3
+ *
4
+ * Covers:
5
+ * - Default plugin nudges on the first error turn and keeps nudging up to the
6
+ * `maxConsecutiveErrorNudges` cap.
7
+ * - Default plugin suppresses the nudge once the cap is exceeded (the error is
8
+ * likely unrecoverable — burning tokens on more nudges is wasteful).
9
+ * - Default plugin uses the canonical {@link DEFAULT_TOOL_ERROR_NUDGE_TEXT}.
10
+ * - Default plugin skips when `hasToolError` is false, regardless of the
11
+ * consecutive counter (no error this turn → nothing to nudge).
12
+ * - Swapping in a user plugin that provides its own `toolError` middleware
13
+ * changes the nudge text end-to-end through `runPipeline`.
14
+ */
15
+
16
+ import { beforeEach, describe, expect, test } from "bun:test";
17
+
18
+ import type { TrustContext } from "../daemon/conversation-runtime-assembly.js";
19
+ import {
20
+ DEFAULT_TOOL_ERROR_NUDGE_TEXT,
21
+ defaultToolErrorPlugin,
22
+ defaultToolErrorTerminal,
23
+ } from "../plugins/defaults/tool-error.js";
24
+ import { runPipeline } from "../plugins/pipeline.js";
25
+ import {
26
+ getMiddlewaresFor,
27
+ registerPlugin,
28
+ resetPluginRegistryForTests,
29
+ } from "../plugins/registry.js";
30
+ import {
31
+ type Middleware,
32
+ type Plugin,
33
+ type ToolErrorArgs,
34
+ type ToolErrorDecision,
35
+ type TurnContext,
36
+ } from "../plugins/types.js";
37
+
38
+ const trust: TrustContext = {
39
+ sourceChannel: "vellum",
40
+ trustClass: "guardian",
41
+ };
42
+
43
+ function makeCtx(): TurnContext {
44
+ return {
45
+ requestId: "req-tool-error-test",
46
+ conversationId: "conv-tool-error-test",
47
+ turnIndex: 0,
48
+ trust,
49
+ };
50
+ }
51
+
52
+ async function runToolErrorPipeline(
53
+ args: ToolErrorArgs,
54
+ ): Promise<ToolErrorDecision> {
55
+ // Mirror the production call site in `agent/loop.ts`: the pipeline terminal
56
+ // is `defaultToolErrorTerminal`, not a no-op. The default plugin's
57
+ // middleware is a passthrough that calls `next(args)`, so the decision
58
+ // logic lives in the terminal.
59
+ return runPipeline<ToolErrorArgs, ToolErrorDecision>(
60
+ "toolError",
61
+ getMiddlewaresFor("toolError"),
62
+ async (pipelineArgs) => defaultToolErrorTerminal(pipelineArgs),
63
+ args,
64
+ makeCtx(),
65
+ 500,
66
+ );
67
+ }
68
+
69
+ describe("toolError pipeline", () => {
70
+ describe("default plugin", () => {
71
+ beforeEach(() => {
72
+ resetPluginRegistryForTests();
73
+ registerPlugin(defaultToolErrorPlugin);
74
+ });
75
+
76
+ test("nudges on first error turn with canonical text", async () => {
77
+ const decision = await runToolErrorPipeline({
78
+ hasToolError: true,
79
+ consecutiveErrorTurns: 1,
80
+ maxConsecutiveErrorNudges: 3,
81
+ });
82
+ expect(decision.action).toBe("nudge");
83
+ if (decision.action === "nudge") {
84
+ expect(decision.nudgeText).toBe(DEFAULT_TOOL_ERROR_NUDGE_TEXT);
85
+ }
86
+ });
87
+
88
+ test("keeps nudging up to and including the cap", async () => {
89
+ // Cap of 3: turns 1, 2, and 3 all nudge. Turn 4 is past the cap.
90
+ for (let turn = 1; turn <= 3; turn++) {
91
+ const decision = await runToolErrorPipeline({
92
+ hasToolError: true,
93
+ consecutiveErrorTurns: turn,
94
+ maxConsecutiveErrorNudges: 3,
95
+ });
96
+ expect(decision.action).toBe("nudge");
97
+ }
98
+ });
99
+
100
+ test("suppresses the nudge once the consecutive counter exceeds the cap", async () => {
101
+ const decision = await runToolErrorPipeline({
102
+ hasToolError: true,
103
+ consecutiveErrorTurns: 4,
104
+ maxConsecutiveErrorNudges: 3,
105
+ });
106
+ expect(decision.action).toBe("skip");
107
+ });
108
+
109
+ test("skips when there is no tool error this turn, regardless of counter", async () => {
110
+ // Counter is non-zero (the previous turn errored) but this turn succeeded,
111
+ // so nothing to nudge about.
112
+ const decision = await runToolErrorPipeline({
113
+ hasToolError: false,
114
+ consecutiveErrorTurns: 2,
115
+ maxConsecutiveErrorNudges: 3,
116
+ });
117
+ expect(decision.action).toBe("skip");
118
+ });
119
+
120
+ test("honors a caller-supplied cap of zero (never nudges)", async () => {
121
+ // Some call-sites may want to disable nudging entirely by passing cap = 0.
122
+ // The decision logic uses `<=`, so counter 0 with cap 0 does nudge; counter
123
+ // 1 with cap 0 suppresses. The cap is inclusive.
124
+ const turn1 = await runToolErrorPipeline({
125
+ hasToolError: true,
126
+ consecutiveErrorTurns: 1,
127
+ maxConsecutiveErrorNudges: 0,
128
+ });
129
+ expect(turn1.action).toBe("skip");
130
+ });
131
+ });
132
+
133
+ describe("user-supplied plugin", () => {
134
+ beforeEach(() => {
135
+ resetPluginRegistryForTests();
136
+ });
137
+
138
+ test("swapping in a plugin changes the nudge text", async () => {
139
+ const customText = "<system_notice>Custom error hint.</system_notice>";
140
+ const customMiddleware: Middleware<
141
+ ToolErrorArgs,
142
+ ToolErrorDecision
143
+ > = async (args) => {
144
+ if (args.hasToolError) {
145
+ return { action: "nudge", nudgeText: customText };
146
+ }
147
+ return { action: "skip" };
148
+ };
149
+ const customPlugin: Plugin = {
150
+ manifest: {
151
+ name: "custom-tool-error",
152
+ version: "0.0.1",
153
+ requires: { pluginRuntime: "v1", toolErrorApi: "v1" },
154
+ },
155
+ middleware: { toolError: customMiddleware },
156
+ };
157
+ registerPlugin(customPlugin);
158
+
159
+ const decision = await runToolErrorPipeline({
160
+ hasToolError: true,
161
+ consecutiveErrorTurns: 1,
162
+ maxConsecutiveErrorNudges: 3,
163
+ });
164
+ expect(decision.action).toBe("nudge");
165
+ if (decision.action === "nudge") {
166
+ expect(decision.nudgeText).toBe(customText);
167
+ }
168
+ });
169
+
170
+ test("swapping in a plugin can suppress nudges even when the default would nudge", async () => {
171
+ const suppressingMiddleware: Middleware<
172
+ ToolErrorArgs,
173
+ ToolErrorDecision
174
+ > = async () => ({ action: "skip" });
175
+ const plugin: Plugin = {
176
+ manifest: {
177
+ name: "no-nudge",
178
+ version: "0.0.1",
179
+ requires: { pluginRuntime: "v1", toolErrorApi: "v1" },
180
+ },
181
+ middleware: { toolError: suppressingMiddleware },
182
+ };
183
+ registerPlugin(plugin);
184
+
185
+ const decision = await runToolErrorPipeline({
186
+ hasToolError: true,
187
+ consecutiveErrorTurns: 1,
188
+ maxConsecutiveErrorNudges: 3,
189
+ });
190
+ expect(decision.action).toBe("skip");
191
+ });
192
+
193
+ test("terminal still produces the legacy nudge when no plugin is registered", async () => {
194
+ // No registerPlugin call — the registry is empty for this slot. Since
195
+ // `agent/loop.ts` now passes `defaultToolErrorTerminal` as the pipeline
196
+ // terminal (rather than an inline `() => skip`), direct AgentLoop
197
+ // callers that skip `bootstrapPlugins()` still get the legacy nudge.
198
+ const decision = await runToolErrorPipeline({
199
+ hasToolError: true,
200
+ consecutiveErrorTurns: 1,
201
+ maxConsecutiveErrorNudges: 3,
202
+ });
203
+ expect(decision.action).toBe("nudge");
204
+ if (decision.action === "nudge") {
205
+ expect(decision.nudgeText).toBe(DEFAULT_TOOL_ERROR_NUDGE_TEXT);
206
+ }
207
+ });
208
+
209
+ test("user plugin registered AFTER the default still runs (no shadowing)", async () => {
210
+ // Production registration order: defaults load first via the side-effect
211
+ // imports in `defaults/index.ts`, then user plugins register on top via
212
+ // `bootstrapPlugins()`. The user's middleware ends up at a deeper onion
213
+ // layer than the default. If the default's middleware were to bypass
214
+ // `next` and call the decision logic directly, the user middleware
215
+ // would never run — this test guards against that regression.
216
+ registerPlugin(defaultToolErrorPlugin);
217
+
218
+ let userMiddlewareRan = false;
219
+ const userMiddleware: Middleware<
220
+ ToolErrorArgs,
221
+ ToolErrorDecision
222
+ > = async (args, next) => {
223
+ userMiddlewareRan = true;
224
+ return next(args);
225
+ };
226
+ registerPlugin({
227
+ manifest: {
228
+ name: "late-user-plugin",
229
+ version: "0.0.1",
230
+ requires: { pluginRuntime: "v1", toolErrorApi: "v1" },
231
+ },
232
+ middleware: { toolError: userMiddleware },
233
+ });
234
+
235
+ await runToolErrorPipeline({
236
+ hasToolError: true,
237
+ consecutiveErrorTurns: 1,
238
+ maxConsecutiveErrorNudges: 3,
239
+ });
240
+
241
+ expect(userMiddlewareRan).toBe(true);
242
+ });
243
+ });
244
+ });