@vellumai/assistant 0.6.5 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (443) hide show
  1. package/AGENTS.md +9 -1
  2. package/ARCHITECTURE.md +15 -17
  3. package/Dockerfile +6 -4
  4. package/__tests__/permissions/gateway-threshold-reader.test.ts +283 -0
  5. package/docs/architecture/integrations.md +32 -39
  6. package/docs/architecture/memory.md +25 -30
  7. package/docs/architecture/security.md +7 -6
  8. package/docs/browser-use-architecture-phase2.md +63 -20
  9. package/docs/plugins.md +761 -0
  10. package/examples/plugins/echo/README.md +132 -0
  11. package/examples/plugins/echo/package.json +17 -0
  12. package/examples/plugins/echo/register.ts +187 -0
  13. package/node_modules/@vellumai/egress-proxy/src/types.ts +19 -0
  14. package/openapi.yaml +212 -68
  15. package/package.json +1 -1
  16. package/src/__tests__/app-compiler.test.ts +57 -0
  17. package/src/__tests__/approval-cascade.test.ts +7 -2
  18. package/src/__tests__/auto-analysis-end-to-end.test.ts +1 -0
  19. package/src/__tests__/avatar-generator.test.ts +4 -2
  20. package/src/__tests__/bundled-asset.test.ts +6 -6
  21. package/src/__tests__/catalog-cache.test.ts +69 -0
  22. package/src/__tests__/checker.test.ts +459 -171
  23. package/src/__tests__/circuit-breaker-pipeline.test.ts +406 -0
  24. package/src/__tests__/compaction-events.test.ts +501 -0
  25. package/src/__tests__/compaction-pipeline.test.ts +210 -0
  26. package/src/__tests__/compaction-strip-metadata-clear.test.ts +181 -0
  27. package/src/__tests__/compaction-timeout-recovery.test.ts +262 -0
  28. package/src/__tests__/config-model-image-provider.test.ts +110 -0
  29. package/src/__tests__/config-schema.test.ts +22 -9
  30. package/src/__tests__/config-watcher-cleanup-throttle.test.ts +0 -4
  31. package/src/__tests__/contacts-tools.test.ts +26 -0
  32. package/src/__tests__/context-overflow-policy.test.ts +7 -7
  33. package/src/__tests__/context-window-manager.test.ts +355 -4
  34. package/src/__tests__/conversation-abort-tool-results.test.ts +4 -1
  35. package/src/__tests__/conversation-agent-loop-overflow.test.ts +26 -30
  36. package/src/__tests__/conversation-agent-loop.test.ts +30 -141
  37. package/src/__tests__/conversation-confirmation-signals.test.ts +6 -1
  38. package/src/__tests__/conversation-history-web-search.test.ts +1 -0
  39. package/src/__tests__/conversation-init.benchmark.test.ts +2 -16
  40. package/src/__tests__/conversation-pairing.test.ts +174 -10
  41. package/src/__tests__/conversation-pre-run-repair.test.ts +4 -1
  42. package/src/__tests__/conversation-process-callsite.test.ts +3 -0
  43. package/src/__tests__/conversation-provider-retry-repair.test.ts +16 -7
  44. package/src/__tests__/conversation-queue.test.ts +29 -14
  45. package/src/__tests__/conversation-routes-disk-view.test.ts +7 -6
  46. package/src/__tests__/conversation-runtime-assembly.test.ts +155 -110
  47. package/src/__tests__/conversation-runtime-workspace.test.ts +23 -38
  48. package/src/__tests__/conversation-seed-composer.test.ts +2 -2
  49. package/src/__tests__/conversation-slash-queue.test.ts +7 -2
  50. package/src/__tests__/conversation-slash-unknown.test.ts +25 -2
  51. package/src/__tests__/conversation-speed-override.test.ts +6 -1
  52. package/src/__tests__/conversation-title-service.test.ts +116 -0
  53. package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +41 -2
  54. package/src/__tests__/conversation-usage.test.ts +1 -1
  55. package/src/__tests__/conversation-workspace-cache-state.test.ts +4 -1
  56. package/src/__tests__/conversation-workspace-injection.test.ts +3 -0
  57. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +4 -1
  58. package/src/__tests__/credential-health-service.test.ts +78 -9
  59. package/src/__tests__/credential-security-invariants.test.ts +2 -2
  60. package/src/__tests__/db-schedule-syntax-migration.test.ts +1 -0
  61. package/src/__tests__/empty-response-pipeline.test.ts +305 -0
  62. package/src/__tests__/extension-id-sync-guard.test.ts +3 -3
  63. package/src/__tests__/first-greeting.test.ts +247 -5
  64. package/src/__tests__/headless-browser-mode.test.ts +57 -0
  65. package/src/__tests__/history-repair-pipeline.test.ts +399 -0
  66. package/src/__tests__/host-browser-e2e-cloud.test.ts +307 -0
  67. package/src/__tests__/host-browser-e2e-self-hosted.test.ts +3 -3
  68. package/src/__tests__/host-proxy-interface.test.ts +36 -2
  69. package/src/__tests__/image-credentials.test.ts +137 -0
  70. package/src/__tests__/image-service-dispatcher.test.ts +186 -0
  71. package/src/__tests__/injector-chain.test.ts +526 -0
  72. package/src/__tests__/intent-routing.test.ts +0 -26
  73. package/src/__tests__/llm-call-pipeline.test.ts +285 -0
  74. package/src/__tests__/llm-schema.test.ts +1 -1
  75. package/src/__tests__/media-generate-image.test.ts +119 -13
  76. package/src/__tests__/memory-retrieval-pipeline.test.ts +401 -0
  77. package/src/__tests__/memory-upsert-concurrency.test.ts +1 -0
  78. package/src/__tests__/migration-import-from-url.test.ts +5 -68
  79. package/src/__tests__/model-intents.test.ts +4 -2
  80. package/src/__tests__/notification-broadcaster.test.ts +3 -3
  81. package/src/__tests__/notification-decision-strategy.test.ts +0 -11
  82. package/src/__tests__/notification-schedule-notify-dedup.test.ts +108 -0
  83. package/src/__tests__/oauth-apps-routes.test.ts +1 -1
  84. package/src/__tests__/oauth-cli.test.ts +14 -12
  85. package/src/__tests__/oauth-connect-orchestrator.test.ts +4 -13
  86. package/src/__tests__/oauth-provider-serializer.test.ts +6 -4
  87. package/src/__tests__/oauth-provider-visibility.test.ts +3 -5
  88. package/src/__tests__/oauth-providers-routes.test.ts +3 -2
  89. package/src/__tests__/oauth-store.test.ts +41 -76
  90. package/src/__tests__/onboarding-template-contract.test.ts +16 -64
  91. package/src/__tests__/openai-image-service.test.ts +368 -0
  92. package/src/__tests__/overflow-reduce-pipeline.test.ts +676 -0
  93. package/src/__tests__/permission-checker-host-gate.test.ts +0 -24
  94. package/src/__tests__/persist-onboarding-artifacts.test.ts +266 -0
  95. package/src/__tests__/persistence-pipeline.test.ts +377 -0
  96. package/src/__tests__/pipeline-runner.test.ts +565 -0
  97. package/src/__tests__/platform.test.ts +5 -2
  98. package/src/__tests__/plugin-bootstrap.test.ts +483 -0
  99. package/src/__tests__/plugin-registry.test.ts +273 -0
  100. package/src/__tests__/plugin-route-contribution.test.ts +288 -0
  101. package/src/__tests__/plugin-skill-contribution.test.ts +367 -0
  102. package/src/__tests__/plugin-tool-contribution.test.ts +286 -0
  103. package/src/__tests__/plugin-types.test.ts +320 -0
  104. package/src/__tests__/pricing.test.ts +44 -12
  105. package/src/__tests__/proxy-approval-callback.test.ts +69 -8
  106. package/src/__tests__/reaction-persistence.test.ts +1 -0
  107. package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +1 -0
  108. package/src/__tests__/registry.test.ts +0 -2
  109. package/src/__tests__/schedule-routes.test.ts +131 -1
  110. package/src/__tests__/scheduler-recurrence.test.ts +14 -70
  111. package/src/__tests__/scheduler-reuse-conversation.test.ts +10 -50
  112. package/src/__tests__/secret-detection-handler.test.ts +0 -10
  113. package/src/__tests__/shell-identity.test.ts +0 -134
  114. package/src/__tests__/suggestion-routes.test.ts +103 -4
  115. package/src/__tests__/task-memory-cleanup.test.ts +1 -0
  116. package/src/__tests__/task-scheduler.test.ts +3 -15
  117. package/src/__tests__/test-preload.ts +11 -0
  118. package/src/__tests__/title-generate-pipeline.test.ts +224 -0
  119. package/src/__tests__/token-estimate-pipeline.test.ts +431 -0
  120. package/src/__tests__/tool-error-pipeline.test.ts +244 -0
  121. package/src/__tests__/tool-execute-pipeline.test.ts +431 -0
  122. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -6
  123. package/src/__tests__/tool-executor-shell-integration.test.ts +7 -10
  124. package/src/__tests__/tool-executor.test.ts +141 -0
  125. package/src/__tests__/tool-result-truncate-pipeline.test.ts +356 -0
  126. package/src/__tests__/tool-result-truncation.test.ts +0 -110
  127. package/src/__tests__/user-plugin-loader.test.ts +191 -0
  128. package/src/__tests__/workspace-migration-046-seed-conversation-starters-callsite.test.ts +185 -0
  129. package/src/__tests__/workspace-migration-049-release-notes-default-sonnet.test.ts +100 -0
  130. package/src/__tests__/workspace-migration-050-seed-main-agent-opus-callsite.test.ts +171 -0
  131. package/src/__tests__/workspace-migration-051-seed-conversation-summarization-callsite.test.ts +252 -0
  132. package/src/__tests__/workspace-migration-remove-hooks.test.ts +99 -0
  133. package/src/__tests__/workspace-policy.test.ts +21 -3
  134. package/src/agent/loop.ts +340 -102
  135. package/src/approvals/__tests__/guardian-feed-event.test.ts +304 -0
  136. package/src/approvals/guardian-request-resolvers.ts +80 -0
  137. package/src/backup/__tests__/backup-worker.test.ts +2 -13
  138. package/src/backup/backup-worker.ts +3 -15
  139. package/src/bundler/app-compiler.ts +84 -1
  140. package/src/calls/call-state.ts +2 -2
  141. package/src/channels/__tests__/types.test.ts +3 -3
  142. package/src/channels/types.ts +6 -4
  143. package/src/cli/__tests__/notifications.test.ts +87 -211
  144. package/src/cli/commands/__tests__/backup.test.ts +1 -1
  145. package/src/cli/commands/__tests__/image-generation.test.ts +255 -35
  146. package/src/cli/commands/__tests__/inference-send.test.ts +12 -0
  147. package/src/cli/commands/__tests__/tts-synthesize.test.ts +12 -0
  148. package/src/cli/commands/backup.ts +2 -2
  149. package/src/cli/commands/clients.ts +138 -0
  150. package/src/cli/commands/completions.ts +2 -9
  151. package/src/cli/commands/conversations.ts +55 -7
  152. package/src/cli/commands/image-generation.ts +33 -34
  153. package/src/cli/commands/notifications.ts +68 -103
  154. package/src/cli/commands/oauth/__tests__/providers-register.test.ts +1 -1
  155. package/src/cli/commands/oauth/__tests__/providers-update.test.ts +1 -1
  156. package/src/cli/commands/oauth/connect.ts +2 -2
  157. package/src/cli/commands/oauth/providers.ts +176 -8
  158. package/src/cli/commands/oauth/status.ts +46 -36
  159. package/src/cli/commands/skills.ts +3 -4
  160. package/src/cli/program.ts +25 -29
  161. package/src/config/__tests__/backup-schema.test.ts +7 -2
  162. package/src/config/bundled-skills/app-builder/SKILL.md +2 -2
  163. package/src/config/bundled-skills/app-builder/references/WIDGETS.md +10 -10
  164. package/src/config/bundled-skills/contacts/tools/contact-merge.ts +66 -87
  165. package/src/config/bundled-skills/contacts/tools/contact-search.ts +28 -51
  166. package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +22 -40
  167. package/src/config/bundled-skills/image-studio/SKILL.md +2 -1
  168. package/src/config/bundled-skills/image-studio/TOOLS.json +2 -1
  169. package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +23 -39
  170. package/src/config/bundled-skills/messaging/SKILL.md +3 -3
  171. package/src/config/bundled-skills/messaging/tools/__tests__/messaging-feed-events.test.ts +207 -0
  172. package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +12 -0
  173. package/src/config/bundled-skills/messaging/tools/messaging-send.ts +58 -0
  174. package/src/config/bundled-skills/schedule/SKILL.md +8 -3
  175. package/src/config/bundled-skills/schedule/TOOLS.json +15 -7
  176. package/src/config/bundled-skills/schedule/references/SCRIPT_MODE_PATTERNS.md +59 -0
  177. package/src/config/bundled-tool-registry.ts +0 -15
  178. package/src/config/feature-flag-registry.json +17 -1
  179. package/src/config/schema.ts +19 -0
  180. package/src/config/schemas/backup.ts +1 -1
  181. package/src/config/schemas/conversations.ts +16 -0
  182. package/src/config/schemas/llm.ts +2 -3
  183. package/src/config/schemas/security.ts +6 -6
  184. package/src/config/schemas/tts.ts +11 -0
  185. package/src/config/skill-state.ts +6 -2
  186. package/src/config/skills.ts +94 -5
  187. package/src/context/__tests__/compact-prompt.test.ts +27 -9
  188. package/src/context/prompts/compact.md +26 -12
  189. package/src/context/tool-result-truncation.ts +3 -63
  190. package/src/context/window-manager.ts +190 -16
  191. package/src/credential-health/credential-health-service.ts +19 -6
  192. package/src/daemon/__tests__/conversation-feed-event.test.ts +317 -0
  193. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +4 -12
  194. package/src/daemon/__tests__/conversation-tool-setup.test.ts +14 -15
  195. package/src/daemon/config-watcher.ts +0 -2
  196. package/src/daemon/context-overflow-policy.ts +4 -13
  197. package/src/daemon/conversation-agent-loop-handlers.ts +83 -22
  198. package/src/daemon/conversation-agent-loop.ts +984 -683
  199. package/src/daemon/conversation-history.ts +10 -19
  200. package/src/daemon/conversation-lifecycle.ts +37 -19
  201. package/src/daemon/conversation-notifiers.ts +2 -110
  202. package/src/daemon/conversation-process.ts +14 -7
  203. package/src/daemon/conversation-runtime-assembly.ts +532 -411
  204. package/src/daemon/conversation-tool-setup.ts +41 -4
  205. package/src/daemon/conversation.ts +80 -35
  206. package/src/daemon/external-plugins-bootstrap.ts +478 -0
  207. package/src/daemon/first-greeting.ts +191 -14
  208. package/src/daemon/handlers/config-model.ts +11 -0
  209. package/src/daemon/handlers/skills.ts +5 -1
  210. package/src/daemon/lifecycle.ts +33 -68
  211. package/src/daemon/message-types/computer-use.ts +2 -34
  212. package/src/daemon/message-types/conversations.ts +49 -0
  213. package/src/daemon/message-types/messages.ts +12 -0
  214. package/src/daemon/server.ts +5 -3
  215. package/src/daemon/shutdown-handlers.ts +2 -12
  216. package/src/daemon/tool-side-effects.ts +14 -56
  217. package/src/heartbeat/__tests__/heartbeat-feed-event.test.ts +160 -0
  218. package/src/heartbeat/heartbeat-service.ts +24 -1
  219. package/src/home/__tests__/feed-population-integration.test.ts +312 -0
  220. package/src/home/emit-feed-event.ts +7 -0
  221. package/src/home/feed-types.ts +41 -2
  222. package/src/home/rewrite-command-preview.ts +66 -0
  223. package/src/ipc/__tests__/socket-path.test.ts +11 -50
  224. package/src/ipc/cli-client.ts +1 -1
  225. package/src/ipc/cli-server.ts +3 -3
  226. package/src/ipc/gateway-client.ts +4 -1
  227. package/src/ipc/routes/browser-context.ts +2 -0
  228. package/src/ipc/routes/browser.ts +1 -0
  229. package/src/ipc/routes/get-contact.ts +16 -0
  230. package/src/ipc/routes/index.ts +14 -0
  231. package/src/ipc/routes/list-clients.ts +31 -0
  232. package/src/ipc/routes/merge-contacts.ts +17 -0
  233. package/src/ipc/routes/notification.ts +133 -0
  234. package/src/ipc/routes/rename-conversation.ts +59 -0
  235. package/src/ipc/routes/search-contacts.ts +19 -0
  236. package/src/ipc/routes/upsert-contact.ts +25 -0
  237. package/src/ipc/socket-path.ts +14 -38
  238. package/src/media/app-icon-generator.ts +23 -46
  239. package/src/media/avatar-router.ts +26 -41
  240. package/src/media/gemini-image-service.ts +8 -41
  241. package/src/media/image-credentials.ts +73 -0
  242. package/src/media/image-service.ts +85 -0
  243. package/src/media/openai-image-service.ts +131 -0
  244. package/src/media/types.ts +46 -0
  245. package/src/memory/conversation-crud.ts +48 -18
  246. package/src/memory/conversation-queries.ts +57 -4
  247. package/src/memory/conversation-title-service.ts +25 -0
  248. package/src/memory/db-init.ts +8 -0
  249. package/src/memory/embedding-gemini.test.ts +41 -2
  250. package/src/memory/embedding-gemini.ts +6 -1
  251. package/src/memory/graph/bootstrap.test.ts +282 -0
  252. package/src/memory/graph/bootstrap.ts +8 -5
  253. package/src/memory/graph/extraction.ts +10 -2
  254. package/src/memory/graph/graph-search.test.ts +1 -0
  255. package/src/memory/graph/inspect.ts +2 -2
  256. package/src/memory/graph/retriever.ts +10 -3
  257. package/src/memory/migrations/041-approval-prompt-ts-tracker.ts +26 -0
  258. package/src/memory/migrations/149-oauth-tables.ts +1 -0
  259. package/src/memory/migrations/223-schedule-script-column.ts +11 -0
  260. package/src/memory/migrations/224-oauth-providers-managed-service-is-paid.ts +24 -0
  261. package/src/memory/migrations/225-oauth-providers-available-scopes.ts +13 -0
  262. package/src/memory/migrations/index.ts +4 -0
  263. package/src/memory/pkb/pkb-index.test.ts +1 -0
  264. package/src/memory/pkb/pkb-reconcile.test.ts +1 -0
  265. package/src/memory/pkb/pkb-search.test.ts +65 -4
  266. package/src/memory/pkb/pkb-search.ts +40 -18
  267. package/src/memory/qdrant-client.test.ts +60 -0
  268. package/src/memory/qdrant-client.ts +25 -0
  269. package/src/memory/schema/infrastructure.ts +1 -0
  270. package/src/memory/schema/oauth.ts +4 -1
  271. package/src/messaging/providers/slack/render-transcript.test.ts +77 -29
  272. package/src/messaging/providers/slack/render-transcript.ts +58 -0
  273. package/src/notifications/conversation-pairing.ts +78 -19
  274. package/src/notifications/copy-composer.ts +0 -5
  275. package/src/notifications/emit-signal.ts +1 -1
  276. package/src/notifications/signal.ts +1 -2
  277. package/src/oauth/AGENTS.md +1 -1
  278. package/src/oauth/__tests__/identity-verifier.test.ts +2 -1
  279. package/src/oauth/connect-orchestrator.ts +8 -34
  280. package/src/oauth/connect-types.ts +6 -10
  281. package/src/oauth/manual-token-connection.ts +23 -0
  282. package/src/oauth/oauth-store.ts +30 -14
  283. package/src/oauth/provider-serializer.ts +6 -1
  284. package/src/oauth/seed-providers.ts +56 -108
  285. package/src/outbound-proxy/http-forwarder.ts +9 -0
  286. package/src/permissions/approval-policy.test.ts +293 -18
  287. package/src/permissions/approval-policy.ts +110 -58
  288. package/src/permissions/arg-parser.test.ts +161 -0
  289. package/src/permissions/arg-parser.ts +141 -0
  290. package/src/permissions/bash-risk-classifier.test.ts +414 -2
  291. package/src/permissions/bash-risk-classifier.ts +303 -60
  292. package/src/permissions/checker.ts +157 -29
  293. package/src/permissions/command-registry.test.ts +239 -0
  294. package/src/permissions/command-registry.ts +234 -54
  295. package/src/permissions/defaults.ts +5 -4
  296. package/src/permissions/gateway-threshold-reader.ts +196 -0
  297. package/src/permissions/prompter.ts +4 -0
  298. package/src/permissions/risk-types.ts +61 -4
  299. package/src/permissions/schedule-risk-classifier.test.ts +129 -0
  300. package/src/permissions/schedule-risk-classifier.ts +85 -0
  301. package/src/permissions/shell-identity.ts +2 -42
  302. package/src/permissions/types.ts +2 -0
  303. package/src/permissions/workspace-policy.ts +8 -3
  304. package/src/plugins/defaults/circuit-breaker.ts +146 -0
  305. package/src/plugins/defaults/compaction.ts +145 -0
  306. package/src/plugins/defaults/empty-response.ts +126 -0
  307. package/src/plugins/defaults/history-repair.ts +85 -0
  308. package/src/plugins/defaults/index.ts +116 -0
  309. package/src/plugins/defaults/injectors.ts +491 -0
  310. package/src/plugins/defaults/llm-call.ts +82 -0
  311. package/src/plugins/defaults/memory-retrieval.ts +226 -0
  312. package/src/plugins/defaults/overflow-reduce.ts +181 -0
  313. package/src/plugins/defaults/persistence.ts +129 -0
  314. package/src/plugins/defaults/title-generate.ts +95 -0
  315. package/src/plugins/defaults/token-estimate.ts +104 -0
  316. package/src/plugins/defaults/tool-error.ts +126 -0
  317. package/src/plugins/defaults/tool-execute.ts +89 -0
  318. package/src/plugins/defaults/tool-result-truncate.ts +88 -0
  319. package/src/plugins/pipeline.ts +316 -0
  320. package/src/plugins/plugin-skill-contributions.ts +292 -0
  321. package/src/plugins/registry.ts +241 -0
  322. package/src/plugins/types.ts +1134 -0
  323. package/src/plugins/user-loader.ts +177 -0
  324. package/src/prompts/templates/BOOTSTRAP.md +27 -77
  325. package/src/providers/model-catalog.ts +52 -29
  326. package/src/providers/model-intents.ts +1 -1
  327. package/src/providers/openrouter/client.ts +5 -1
  328. package/src/providers/speech-to-text/deepgram-realtime.test.ts +61 -0
  329. package/src/providers/speech-to-text/deepgram-realtime.ts +57 -0
  330. package/src/providers/speech-to-text/xai-realtime.test.ts +72 -4
  331. package/src/providers/speech-to-text/xai-realtime.ts +39 -14
  332. package/src/runtime/AGENTS.md +25 -16
  333. package/src/runtime/__tests__/browser-extension-pair-routes.test.ts +3 -3
  334. package/src/runtime/__tests__/client-registry.test.ts +293 -0
  335. package/src/runtime/client-registry.ts +261 -0
  336. package/src/runtime/http-server.ts +77 -8
  337. package/src/runtime/http-types.ts +0 -2
  338. package/src/runtime/migrations/vbundle-builder.ts +1 -22
  339. package/src/runtime/routes/approval-prompt-ts-tracker.ts +51 -31
  340. package/src/runtime/routes/approval-routes.ts +17 -0
  341. package/src/runtime/routes/browser-extension-pair-routes.ts +27 -8
  342. package/src/runtime/routes/conversation-routes.ts +223 -116
  343. package/src/runtime/routes/inbound-message-handler.ts +88 -13
  344. package/src/runtime/routes/memory-item-routes.test.ts +1 -0
  345. package/src/runtime/routes/migration-routes.ts +0 -3
  346. package/src/runtime/routes/playground/__tests__/force-compact.test.ts +284 -0
  347. package/src/runtime/routes/playground/__tests__/guard.test.ts +80 -0
  348. package/src/runtime/routes/playground/__tests__/inject-failures.test.ts +294 -0
  349. package/src/runtime/routes/playground/__tests__/reset-circuit.test.ts +271 -0
  350. package/src/runtime/routes/playground/__tests__/seed-conversation.test.ts +202 -0
  351. package/src/runtime/routes/playground/__tests__/seeded-conversations.test.ts +309 -0
  352. package/src/runtime/routes/playground/__tests__/state.test.ts +224 -0
  353. package/src/runtime/routes/playground/conversation-not-found.ts +29 -0
  354. package/src/runtime/routes/playground/deps.ts +56 -0
  355. package/src/runtime/routes/playground/force-compact.ts +73 -0
  356. package/src/runtime/routes/playground/guard.ts +37 -0
  357. package/src/runtime/routes/playground/index.ts +28 -0
  358. package/src/runtime/routes/playground/inject-failures.ts +159 -0
  359. package/src/runtime/routes/playground/reset-circuit.ts +115 -0
  360. package/src/runtime/routes/playground/seed-conversation.ts +139 -0
  361. package/src/runtime/routes/playground/seeded-conversations.ts +78 -0
  362. package/src/runtime/routes/playground/state.ts +78 -0
  363. package/src/runtime/routes/schedule-routes.ts +89 -8
  364. package/src/runtime/skill-route-registry.ts +75 -15
  365. package/src/schedule/run-script.ts +68 -0
  366. package/src/schedule/schedule-store.ts +7 -1
  367. package/src/schedule/scheduler.ts +48 -8
  368. package/src/skills/catalog-cache.ts +12 -5
  369. package/src/tools/browser/__tests__/browser-status.test.ts +189 -0
  370. package/src/tools/browser/browser-execution.ts +88 -19
  371. package/src/tools/browser/cdp-client/__tests__/extension-cdp-client.test.ts +230 -0
  372. package/src/tools/browser/cdp-client/__tests__/factory.test.ts +146 -3
  373. package/src/tools/browser/cdp-client/extension-cdp-client.ts +54 -3
  374. package/src/tools/browser/cdp-client/factory.ts +15 -4
  375. package/src/tools/executor.ts +126 -74
  376. package/src/tools/network/script-proxy/session-manager.ts +37 -1
  377. package/src/tools/permission-checker.ts +98 -49
  378. package/src/tools/policy-context.ts +4 -0
  379. package/src/tools/registry.ts +140 -3
  380. package/src/tools/schedule/create.ts +23 -8
  381. package/src/tools/schedule/update.ts +3 -1
  382. package/src/tools/secret-detection-handler.ts +0 -51
  383. package/src/tools/system/avatar-generator.ts +6 -2
  384. package/src/tools/types.ts +28 -2
  385. package/src/util/platform.ts +7 -2
  386. package/src/util/pricing.ts +26 -3
  387. package/src/workspace/migrations/006-services-config.ts +2 -4
  388. package/src/workspace/migrations/022-move-hooks-to-workspace.ts +2 -3
  389. package/src/workspace/migrations/041-backfill-google-gmail-settings-scope.ts +3 -4
  390. package/src/workspace/migrations/046-seed-conversation-starters-callsite.ts +108 -0
  391. package/src/workspace/migrations/047-remove-watch-callsites.ts +54 -0
  392. package/src/workspace/migrations/048-remove-workspace-hooks.ts +81 -0
  393. package/src/workspace/migrations/049-release-notes-default-sonnet.ts +80 -0
  394. package/src/workspace/migrations/050-seed-main-agent-opus-callsite.ts +86 -0
  395. package/src/workspace/migrations/051-seed-conversation-summarization-callsite.ts +128 -0
  396. package/src/workspace/migrations/registry.ts +12 -0
  397. package/tsconfig.json +1 -1
  398. package/hook-templates/debug-prompt-logger/hook.json +0 -7
  399. package/hook-templates/debug-prompt-logger/run.sh +0 -66
  400. package/src/__tests__/compaction-circuit-breaker.test.ts +0 -336
  401. package/src/__tests__/context-overflow-approval.test.ts +0 -156
  402. package/src/__tests__/hooks-blocking.test.ts +0 -178
  403. package/src/__tests__/hooks-cli.test.ts +0 -182
  404. package/src/__tests__/hooks-config.test.ts +0 -108
  405. package/src/__tests__/hooks-discovery.test.ts +0 -211
  406. package/src/__tests__/hooks-integration.test.ts +0 -196
  407. package/src/__tests__/hooks-manager.test.ts +0 -226
  408. package/src/__tests__/hooks-runner.test.ts +0 -175
  409. package/src/__tests__/hooks-settings.test.ts +0 -160
  410. package/src/__tests__/hooks-templates.test.ts +0 -169
  411. package/src/__tests__/hooks-ts-runner.test.ts +0 -170
  412. package/src/__tests__/hooks-watch.test.ts +0 -112
  413. package/src/__tests__/notification-schedule-dedup.test.ts +0 -213
  414. package/src/__tests__/oauth-scope-policy.test.ts +0 -180
  415. package/src/__tests__/send-notification-tool.test.ts +0 -83
  416. package/src/cli/commands/shotgun.ts +0 -266
  417. package/src/config/bundled-skills/conversations/SKILL.md +0 -20
  418. package/src/config/bundled-skills/conversations/TOOLS.json +0 -23
  419. package/src/config/bundled-skills/conversations/tools/rename-conversation.ts +0 -88
  420. package/src/config/bundled-skills/heartbeat/SKILL.md +0 -43
  421. package/src/config/bundled-skills/notifications/SKILL.md +0 -40
  422. package/src/config/bundled-skills/notifications/TOOLS.json +0 -80
  423. package/src/config/bundled-skills/notifications/tools/send-notification.ts +0 -152
  424. package/src/config/bundled-skills/notifications/tools/shared.ts +0 -13
  425. package/src/config/bundled-skills/screen-watch/SKILL.md +0 -27
  426. package/src/config/bundled-skills/screen-watch/TOOLS.json +0 -35
  427. package/src/config/bundled-skills/screen-watch/tools/start-screen-watch.ts +0 -12
  428. package/src/config/bundled-skills/skills-catalog/SKILL.md +0 -84
  429. package/src/daemon/context-overflow-approval.ts +0 -52
  430. package/src/daemon/watch-handler.ts +0 -399
  431. package/src/hooks/cli.ts +0 -253
  432. package/src/hooks/config.ts +0 -100
  433. package/src/hooks/discovery.ts +0 -135
  434. package/src/hooks/manager.ts +0 -179
  435. package/src/hooks/runner.ts +0 -117
  436. package/src/hooks/templates.ts +0 -77
  437. package/src/hooks/types.ts +0 -75
  438. package/src/oauth/scope-policy.ts +0 -89
  439. package/src/runtime/gateway-internal-client.ts +0 -94
  440. package/src/runtime/routes/watch-routes.ts +0 -156
  441. package/src/signals/shotgun.ts +0 -203
  442. package/src/tools/watch/screen-watch.ts +0 -144
  443. package/src/tools/watch/watch-state.ts +0 -142
package/src/agent/loop.ts CHANGED
@@ -6,8 +6,23 @@ import {
6
6
  estimateToolsTokens,
7
7
  getCalibrationProviderKey,
8
8
  } from "../context/token-estimator.js";
9
- import { truncateOversizedToolResults } from "../context/tool-result-truncation.js";
10
- import { getHookManager } from "../hooks/manager.js";
9
+ import { calculateMaxToolResultChars } from "../context/tool-result-truncation.js";
10
+ import { defaultEmptyResponseTerminal } from "../plugins/defaults/empty-response.js";
11
+ import { defaultToolErrorTerminal } from "../plugins/defaults/tool-error.js";
12
+ import { defaultToolResultTruncateTerminal } from "../plugins/defaults/tool-result-truncate.js";
13
+ import { DEFAULT_TIMEOUTS, runPipeline } from "../plugins/pipeline.js";
14
+ import { getMiddlewaresFor } from "../plugins/registry.js";
15
+ import type {
16
+ EmptyResponseArgs,
17
+ EmptyResponseDecision,
18
+ LLMCallArgs,
19
+ LLMCallResult,
20
+ ToolErrorArgs,
21
+ ToolErrorDecision,
22
+ ToolResultTruncateArgs,
23
+ ToolResultTruncateResult,
24
+ TurnContext,
25
+ } from "../plugins/types.js";
11
26
  import type {
12
27
  ContentBlock,
13
28
  Message,
@@ -20,7 +35,7 @@ import {
20
35
  applyStreamingSubstitution,
21
36
  applySubstitutions,
22
37
  } from "../tools/sensitive-output-placeholders.js";
23
- import { ProviderError } from "../util/errors.js";
38
+ import { AssistantError, ErrorCode, ProviderError } from "../util/errors.js";
24
39
  import { getLogger } from "../util/logger.js";
25
40
  import { isRetryableNetworkError } from "../util/retry.js";
26
41
 
@@ -75,6 +90,10 @@ export type AgentEvent =
75
90
  };
76
91
  status?: string;
77
92
  contentBlocks?: ContentBlock[];
93
+ riskLevel?: string;
94
+ riskReason?: string;
95
+ isContainerized?: boolean;
96
+ riskScopeOptions?: Array<{ pattern: string; label: string }>;
78
97
  }
79
98
  | { type: "tool_use_preview_start"; toolUseId: string; toolName: string }
80
99
  | {
@@ -125,6 +144,61 @@ const DEFAULT_CONFIG: AgentLoopConfig = {
125
144
  const MAX_CONSECUTIVE_ERROR_NUDGES = 3;
126
145
  const MAX_EMPTY_RESPONSE_RETRIES = 1;
127
146
 
147
+ /**
148
+ * Build a minimal {@link TurnContext} for pipeline invocations inside the
149
+ * agent loop. Real production call sites thread a full `TurnContext` into
150
+ * `AgentLoop.run()` (see the `turnContext` parameter on
151
+ * {@link AgentLoop.run}); this helper is the fallback used only by unit
152
+ * tests that construct `AgentLoop` directly without an orchestrator.
153
+ *
154
+ * When the orchestrator-supplied context is present, {@link resolveLoopTurnContext}
155
+ * is used instead of this helper so the pipeline sees the real
156
+ * `conversationId`, trust, and `contextWindowManager`. In the fallback path
157
+ * the returned context is still useful for pipeline logging: `requestId`
158
+ * surfaces in every structured record, and `turnIndex` reflects the
159
+ * current tool-use iteration.
160
+ */
161
+ function buildLoopTurnContext(
162
+ requestId: string | undefined,
163
+ turnIndex: number,
164
+ ): TurnContext {
165
+ return {
166
+ requestId: requestId ?? "agent-loop",
167
+ // Loop-scoped pipelines do not currently carry a conversation ID; the
168
+ // outer orchestrator owns that dimension. Use a fixed sentinel so log
169
+ // consumers can filter loop-origin records out of conversation queries.
170
+ conversationId: "agent-loop",
171
+ turnIndex,
172
+ trust: {
173
+ sourceChannel: "vellum",
174
+ trustClass: "unknown",
175
+ },
176
+ };
177
+ }
178
+
179
+ /**
180
+ * Produce a `TurnContext` for a pipeline call inside {@link AgentLoop.run}.
181
+ *
182
+ * When the orchestrator supplied a `turnContext`, clone it and overwrite
183
+ * `requestId` + `turnIndex` with the loop-scoped values so plugin log
184
+ * records correctly attribute the call to the current tool-use iteration
185
+ * while preserving the real `conversationId`, trust context, and
186
+ * `contextWindowManager` the orchestrator assembled for the turn. Without
187
+ * an orchestrator context (unit tests that instantiate `AgentLoop` with no
188
+ * `turnContext`), fall back to {@link buildLoopTurnContext}'s synthesized
189
+ * placeholder.
190
+ */
191
+ function resolveLoopTurnContext(
192
+ base: TurnContext | undefined,
193
+ requestId: string | undefined,
194
+ turnIndex: number,
195
+ ): TurnContext {
196
+ if (base) {
197
+ return { ...base, requestId: requestId ?? base.requestId, turnIndex };
198
+ }
199
+ return buildLoopTurnContext(requestId, turnIndex);
200
+ }
201
+
128
202
  /**
129
203
  * User-config HTTP status codes that should never page the on-call: billing
130
204
  * exhaustion (402), invalid credentials (401), and forbidden/plan-gated (403).
@@ -167,6 +241,42 @@ export interface ResolvedSystemPrompt {
167
241
  model?: string;
168
242
  }
169
243
 
244
+ /**
245
+ * Callback shape the loop uses to execute a tool invocation.
246
+ *
247
+ * The trailing `turnContext` is optional so in-process tests that wire the
248
+ * callback without an orchestrator keep working. Production sites (the
249
+ * `Conversation`'s `createToolExecutor`) forward the supplied context into
250
+ * `ToolExecutor.execute` so the `toolExecute` pipeline sees the orchestrator's
251
+ * real conversation identity/trust/contextWindowManager instead of the
252
+ * synthesized placeholder `ToolExecutor` would otherwise build from the
253
+ * `ToolContext` alone.
254
+ */
255
+ export type LoopToolExecutor = (
256
+ name: string,
257
+ input: Record<string, unknown>,
258
+ onOutput?: (chunk: string) => void,
259
+ toolUseId?: string,
260
+ turnContext?: TurnContext,
261
+ ) => Promise<{
262
+ content: string;
263
+ isError: boolean;
264
+ diff?: {
265
+ filePath: string;
266
+ oldContent: string;
267
+ newContent: string;
268
+ isNewFile: boolean;
269
+ };
270
+ status?: string;
271
+ contentBlocks?: ContentBlock[];
272
+ sensitiveBindings?: SensitiveOutputBinding[];
273
+ yieldToUser?: boolean;
274
+ riskLevel?: string;
275
+ riskReason?: string;
276
+ isContainerized?: boolean;
277
+ riskScopeOptions?: Array<{ pattern: string; label: string }>;
278
+ }>;
279
+
170
280
  export class AgentLoop {
171
281
  private provider: Provider;
172
282
  private systemPrompt: string;
@@ -176,52 +286,14 @@ export class AgentLoop {
176
286
  private resolveSystemPrompt:
177
287
  | ((history: Message[]) => ResolvedSystemPrompt)
178
288
  | null;
179
- private toolExecutor:
180
- | ((
181
- name: string,
182
- input: Record<string, unknown>,
183
- onOutput?: (chunk: string) => void,
184
- toolUseId?: string,
185
- ) => Promise<{
186
- content: string;
187
- isError: boolean;
188
- diff?: {
189
- filePath: string;
190
- oldContent: string;
191
- newContent: string;
192
- isNewFile: boolean;
193
- };
194
- status?: string;
195
- contentBlocks?: ContentBlock[];
196
- sensitiveBindings?: SensitiveOutputBinding[];
197
- yieldToUser?: boolean;
198
- }>)
199
- | null;
289
+ private toolExecutor: LoopToolExecutor | null;
200
290
 
201
291
  constructor(
202
292
  provider: Provider,
203
293
  systemPrompt: string,
204
294
  config?: Partial<AgentLoopConfig>,
205
295
  tools?: ToolDefinition[],
206
- toolExecutor?: (
207
- name: string,
208
- input: Record<string, unknown>,
209
- onOutput?: (chunk: string) => void,
210
- toolUseId?: string,
211
- ) => Promise<{
212
- content: string;
213
- isError: boolean;
214
- diff?: {
215
- filePath: string;
216
- oldContent: string;
217
- newContent: string;
218
- isNewFile: boolean;
219
- };
220
- status?: string;
221
- contentBlocks?: ContentBlock[];
222
- sensitiveBindings?: SensitiveOutputBinding[];
223
- yieldToUser?: boolean;
224
- }>,
296
+ toolExecutor?: LoopToolExecutor,
225
297
  resolveTools?: (history: Message[]) => ToolDefinition[],
226
298
  resolveSystemPrompt?: (history: Message[]) => ResolvedSystemPrompt,
227
299
  ) {
@@ -234,6 +306,21 @@ export class AgentLoop {
234
306
  this.toolExecutor = toolExecutor ?? null;
235
307
  }
236
308
 
309
+ /**
310
+ * Resolve the tool definitions sent to the provider for the given turn.
311
+ *
312
+ * Mirrors the logic of {@link getToolTokenBudget} but returns the tool
313
+ * array itself — callers that need to thread the tool set into a plugin
314
+ * pipeline (e.g. `tokenEstimate`, where the pipeline's args include
315
+ * `tools`) use this rather than re-implementing the dynamic-vs-static
316
+ * resolver fork.
317
+ */
318
+ getResolvedTools(history?: Message[]): ToolDefinition[] {
319
+ return history && this.resolveTools
320
+ ? this.resolveTools(history)
321
+ : this.tools;
322
+ }
323
+
237
324
  /**
238
325
  * Estimate token cost of the tool definitions sent to the provider.
239
326
  *
@@ -243,9 +330,7 @@ export class AgentLoop {
243
330
  * without a resolver), falls back to the static `this.tools`.
244
331
  */
245
332
  getToolTokenBudget(history?: Message[]): number {
246
- const tools =
247
- history && this.resolveTools ? this.resolveTools(history) : this.tools;
248
- return estimateToolsTokens(tools);
333
+ return estimateToolsTokens(this.getResolvedTools(history));
249
334
  }
250
335
 
251
336
  async run(
@@ -253,8 +338,19 @@ export class AgentLoop {
253
338
  onEvent: (event: AgentEvent) => void | Promise<void>,
254
339
  signal?: AbortSignal,
255
340
  requestId?: string,
256
- onCheckpoint?: (checkpoint: CheckpointInfo) => CheckpointDecision,
341
+ onCheckpoint?: (
342
+ checkpoint: CheckpointInfo,
343
+ ) => CheckpointDecision | Promise<CheckpointDecision>,
257
344
  callSite?: LLMCallSite,
345
+ /**
346
+ * Optional per-turn context supplied by the orchestrator. Every pipeline
347
+ * invocation inside the loop clones from this value (overwriting only
348
+ * `turnIndex`/`requestId`) so middleware sees the real conversation
349
+ * identity, trust class, and `contextWindowManager` rather than the
350
+ * `"agent-loop"` sentinel used when the loop is instantiated standalone
351
+ * in unit tests.
352
+ */
353
+ turnContext?: TurnContext,
258
354
  ): Promise<Message[]> {
259
355
  const history = [...messages];
260
356
  const initialHistoryLength = messages.length;
@@ -354,22 +450,6 @@ export class AgentLoop {
354
450
  providerConfig.callSite = callSite;
355
451
  }
356
452
 
357
- const preLlmResult = await getHookManager().trigger("pre-llm-call", {
358
- systemPrompt: turnSystemPrompt,
359
- messages: history,
360
- toolCount: currentTools.length,
361
- });
362
-
363
- if (preLlmResult.blocked) {
364
- onEvent({
365
- type: "error",
366
- error: new Error(
367
- `LLM call blocked by hook "${preLlmResult.blockedBy}"`,
368
- ),
369
- });
370
- break;
371
- }
372
-
373
453
  // Rate-limit consecutive LLM calls to prevent spin when tools return instantly
374
454
  const minInterval = this.config.minTurnIntervalMs ?? 0;
375
455
  if (minInterval > 0 && lastLlmCallTime > 0) {
@@ -411,11 +491,26 @@ export class AgentLoop {
411
491
  stripOldImageBlocks(history),
412
492
  );
413
493
 
414
- const response = await this.provider.sendMessage(
415
- providerHistory,
416
- currentTools.length > 0 ? currentTools : undefined,
417
- turnSystemPrompt,
418
- {
494
+ // Wrap the provider call in the `llmCall` pipeline so middleware
495
+ // contributed by plugins may observe, rewrite, short-circuit, or
496
+ // post-process every LLM request. The terminal below is the real
497
+ // `provider.sendMessage(...)` call; middleware that call `next(args)`
498
+ // eventually reach it. The default `defaultLlmCallPlugin` contributes
499
+ // only a passthrough middleware that forwards to `next(args)` —
500
+ // registered at module load, it sits at the outermost layer in the
501
+ // onion, so short-circuiting there would silently disable every
502
+ // user-registered `llmCall` middleware. Timeout is `null`
503
+ // (`DEFAULT_TIMEOUTS.llmCall`) — the provider layer already enforces
504
+ // its own HTTP-level budgets.
505
+ //
506
+ // The `onEvent` wrapping is kept inside `args.options` so substitution
507
+ // and streaming behavior exactly match the pre-pipeline call site.
508
+ const llmCallArgs: LLMCallArgs = {
509
+ provider: this.provider,
510
+ messages: providerHistory,
511
+ tools: currentTools.length > 0 ? currentTools : undefined,
512
+ systemPrompt: turnSystemPrompt,
513
+ options: {
419
514
  config: providerConfig,
420
515
  onEvent: (event) => {
421
516
  if (event.type === "text_delta") {
@@ -466,6 +561,36 @@ export class AgentLoop {
466
561
  },
467
562
  signal,
468
563
  },
564
+ };
565
+
566
+ // Per-turn pipeline context. When the orchestrator threaded a full
567
+ // `turnContext` into `run()`, use it (overwriting `turnIndex` with
568
+ // the loop-scoped tool-use iteration) so middleware sees the real
569
+ // conversation identity, trust, and `contextWindowManager`. The
570
+ // synthesized fallback is only reached by standalone unit-test
571
+ // instantiations that never plumb a context through.
572
+ const turnCtx = resolveLoopTurnContext(
573
+ turnContext,
574
+ requestId,
575
+ toolUseTurns,
576
+ );
577
+
578
+ const response: LLMCallResult = await runPipeline<
579
+ LLMCallArgs,
580
+ LLMCallResult
581
+ >(
582
+ "llmCall",
583
+ getMiddlewaresFor("llmCall"),
584
+ (args) =>
585
+ args.provider.sendMessage(
586
+ args.messages,
587
+ args.tools,
588
+ args.systemPrompt,
589
+ args.options,
590
+ ),
591
+ llmCallArgs,
592
+ turnCtx,
593
+ DEFAULT_TIMEOUTS.llmCall,
469
594
  );
470
595
 
471
596
  const providerDurationMs = Date.now() - providerStart;
@@ -484,14 +609,6 @@ export class AgentLoop {
484
609
  estimatedInputTokens: preSendEstimatedTokens,
485
610
  });
486
611
 
487
- void getHookManager().trigger("post-llm-call", {
488
- model: response.model,
489
- inputTokens: response.usage.inputTokens,
490
- outputTokens: response.usage.outputTokens,
491
- contentBlockCount: response.content.length,
492
- durationMs: providerDurationMs,
493
- });
494
-
495
612
  // Flush any buffered streaming text from the substitution pipeline
496
613
  if (streamingPending.length > 0) {
497
614
  const flushed = applySubstitutions(streamingPending, substitutionMap);
@@ -554,6 +671,12 @@ export class AgentLoop {
554
671
  // invocations passed in via `messages`) must NOT suppress the
555
672
  // nudge — those turns completed long ago and have no bearing on
556
673
  // whether the current tool-use chain has delivered text yet.
674
+ //
675
+ // The actual decision (nudge vs. accept vs. error) is delegated to
676
+ // the `emptyResponse` plugin pipeline. The pipeline returns a
677
+ // decision; the loop carries out the side-effect (pushing the nudge
678
+ // or surfacing the error). See `plugins/defaults/empty-response.ts`
679
+ // for the default decision logic.
557
680
  const hasVisibleText = response.content.some(
558
681
  (block) => block.type === "text" && block.text.trim().length > 0,
559
682
  );
@@ -571,13 +694,37 @@ export class AgentLoop {
571
694
  }
572
695
  return false;
573
696
  })();
574
- if (
575
- !hasVisibleText &&
576
- toolUseBlocks.length === 0 &&
577
- toolUseTurns > 0 &&
578
- !priorAssistantHadVisibleText &&
579
- emptyResponseRetries < MAX_EMPTY_RESPONSE_RETRIES
580
- ) {
697
+
698
+ const emptyResponseArgs: EmptyResponseArgs = {
699
+ responseContent: response.content,
700
+ toolUseBlocksLength: toolUseBlocks.length,
701
+ toolUseTurns,
702
+ emptyResponseRetries,
703
+ maxEmptyResponseRetries: MAX_EMPTY_RESPONSE_RETRIES,
704
+ priorAssistantHadVisibleText,
705
+ };
706
+ const emptyResponseCtx = resolveLoopTurnContext(
707
+ turnContext,
708
+ requestId,
709
+ toolUseTurns,
710
+ );
711
+ const emptyResponseDecision: EmptyResponseDecision = await runPipeline(
712
+ "emptyResponse",
713
+ getMiddlewaresFor("emptyResponse"),
714
+ async (args) => defaultEmptyResponseTerminal(args),
715
+ emptyResponseArgs,
716
+ emptyResponseCtx,
717
+ DEFAULT_TIMEOUTS.emptyResponse,
718
+ );
719
+
720
+ if (emptyResponseDecision.action === "nudge") {
721
+ // Fall back to the canonical nudge text if the plugin returned
722
+ // `action: "nudge"` but forgot `nudgeText`. Keeps a misbehaving
723
+ // plugin from silently breaking the loop invariant that the
724
+ // model sees a coherent prompt.
725
+ const nudgeText =
726
+ emptyResponseDecision.nudgeText ??
727
+ "<system_notice>Your previous response was empty. You must respond to the user with a summary of what you found or did. Do not use any tools — just respond with text.</system_notice>";
581
728
  emptyResponseRetries++;
582
729
  rlog.warn(
583
730
  { turn: toolUseTurns, retry: emptyResponseRetries },
@@ -585,16 +732,25 @@ export class AgentLoop {
585
732
  );
586
733
  history.push({
587
734
  role: "user",
588
- content: [
589
- {
590
- type: "text",
591
- text: "<system_notice>Your previous response was empty. You must respond to the user with a summary of what you found or did. Do not use any tools — just respond with text.</system_notice>",
592
- },
593
- ],
735
+ content: [{ type: "text", text: nudgeText }],
594
736
  });
595
737
  continue;
596
738
  }
597
739
 
740
+ if (emptyResponseDecision.action === "error") {
741
+ rlog.error(
742
+ { turn: toolUseTurns, retries: emptyResponseRetries },
743
+ "emptyResponse pipeline requested error surface",
744
+ );
745
+ throw new AssistantError(
746
+ "Model returned empty response after tool results",
747
+ ErrorCode.INTERNAL_ERROR,
748
+ );
749
+ }
750
+
751
+ // action === "accept" — fall through. Emit a dedicated log line for
752
+ // the specific "empty turn after tool results, retries exhausted"
753
+ // case so ops dashboards that grep on this line keep working.
598
754
  if (
599
755
  !hasVisibleText &&
600
756
  toolUseBlocks.length === 0 &&
@@ -664,6 +820,14 @@ export class AgentLoop {
664
820
  });
665
821
  },
666
822
  toolUse.id,
823
+ // Forward the loop's resolved `TurnContext` through the
824
+ // executor callback so `ToolExecutor.execute` can thread the
825
+ // real orchestrator context into the `toolExecute` pipeline.
826
+ // Standalone tests that don't wire a `turnContext` into
827
+ // `AgentLoop.run()` pass `undefined` here and the executor
828
+ // falls back to the synthesized placeholder — preserving the
829
+ // existing unit-test behavior.
830
+ turnCtx,
667
831
  );
668
832
 
669
833
  return { toolUse, result };
@@ -727,12 +891,52 @@ export class AgentLoop {
727
891
  }),
728
892
  );
729
893
 
730
- // Pre-emptively truncate oversized tool results to prevent context overflow
731
- const { blocks: resultBlocks, truncatedCount } =
732
- truncateOversizedToolResults(
733
- rawResultBlocks,
734
- this.config.maxInputTokens ?? 180_000,
894
+ // Pre-emptively truncate oversized tool results to prevent context
895
+ // overflow. The work is delegated to the `toolResultTruncate`
896
+ // plugin pipeline so downstream plugins can swap in a smarter
897
+ // truncation strategy (e.g. a summariser) while the default
898
+ // middleware preserves the historical tail-drop behaviour.
899
+ const contextWindowTokens = this.config.maxInputTokens ?? 180_000;
900
+ const maxChars = calculateMaxToolResultChars(contextWindowTokens);
901
+ const truncateMiddlewares = getMiddlewaresFor("toolResultTruncate");
902
+
903
+ let truncatedCount = 0;
904
+ const truncatedBlocks: ContentBlock[] = [];
905
+ for (const block of rawResultBlocks) {
906
+ if (block.type !== "tool_result") {
907
+ truncatedBlocks.push(block);
908
+ continue;
909
+ }
910
+ const toolBlock = block as ToolResultContent;
911
+ if (
912
+ typeof toolBlock.content !== "string" ||
913
+ toolBlock.content.length <= maxChars
914
+ ) {
915
+ truncatedBlocks.push(block);
916
+ continue;
917
+ }
918
+ const pipelineResult = await runPipeline<
919
+ ToolResultTruncateArgs,
920
+ ToolResultTruncateResult
921
+ >(
922
+ "toolResultTruncate",
923
+ truncateMiddlewares,
924
+ async (args) => defaultToolResultTruncateTerminal(args),
925
+ { content: toolBlock.content, maxChars },
926
+ turnCtx,
927
+ DEFAULT_TIMEOUTS.toolResultTruncate,
735
928
  );
929
+ if (pipelineResult.truncated) {
930
+ truncatedCount++;
931
+ truncatedBlocks.push({
932
+ ...toolBlock,
933
+ content: pipelineResult.content,
934
+ });
935
+ } else {
936
+ truncatedBlocks.push(block);
937
+ }
938
+ }
939
+ const resultBlocks = truncatedBlocks;
736
940
  if (truncatedCount > 0) {
737
941
  log.warn(
738
942
  `Truncated ${truncatedCount} oversized tool result(s) to prevent context overflow`,
@@ -758,6 +962,10 @@ export class AgentLoop {
758
962
  diff: result.diff,
759
963
  status: result.status,
760
964
  contentBlocks: result.contentBlocks,
965
+ riskLevel: result.riskLevel,
966
+ riskReason: result.riskReason,
967
+ isContainerized: result.isContainerized,
968
+ riskScopeOptions: result.riskScopeOptions,
761
969
  });
762
970
  }
763
971
 
@@ -779,29 +987,59 @@ export class AgentLoop {
779
987
  // When any tool returned an error, nudge the LLM to retry with
780
988
  // corrected parameters instead of ending its turn. Skip the nudge
781
989
  // after MAX_CONSECUTIVE_ERROR_NUDGES consecutive error turns
782
- // (the error is likely unrecoverable at that point).
990
+ // (the error is likely unrecoverable at that point). The nudge
991
+ // decision is delegated to the `toolError` plugin pipeline so user
992
+ // plugins can change the text, observe the event, or suppress it.
783
993
  const hasToolError = toolResults.some(({ result }) => result.isError);
784
994
  if (hasToolError) {
785
995
  consecutiveErrorTurns++;
786
996
  } else {
787
997
  consecutiveErrorTurns = 0;
788
998
  }
789
- if (
790
- hasToolError &&
791
- consecutiveErrorTurns <= MAX_CONSECUTIVE_ERROR_NUDGES
792
- ) {
999
+ const toolErrorArgs: ToolErrorArgs = {
1000
+ hasToolError,
1001
+ consecutiveErrorTurns,
1002
+ maxConsecutiveErrorNudges: MAX_CONSECUTIVE_ERROR_NUDGES,
1003
+ };
1004
+ const toolErrorCtx: TurnContext = resolveLoopTurnContext(
1005
+ turnContext,
1006
+ requestId,
1007
+ toolUseTurns - 1,
1008
+ );
1009
+ const toolErrorDecision = await runPipeline<
1010
+ ToolErrorArgs,
1011
+ ToolErrorDecision
1012
+ >(
1013
+ "toolError",
1014
+ getMiddlewaresFor("toolError"),
1015
+ // Terminal: the canonical nudge decision. The default plugin's
1016
+ // middleware is a passthrough (so later-registered user plugins
1017
+ // aren't shadowed), so this terminal is what actually produces
1018
+ // the decision when no user plugin overrides it. Wiring the
1019
+ // decision here — rather than inside the default plugin's
1020
+ // middleware — also preserves the legacy nudge for direct
1021
+ // AgentLoop callers (tests, benchmarks) that skip
1022
+ // `bootstrapPlugins()` and therefore never register the default.
1023
+ async (args) => defaultToolErrorTerminal(args),
1024
+ toolErrorArgs,
1025
+ toolErrorCtx,
1026
+ DEFAULT_TIMEOUTS.toolError,
1027
+ );
1028
+ if (toolErrorDecision.action === "nudge") {
793
1029
  resultBlocks.push({
794
1030
  type: "text",
795
- text: "<system_notice>One or more tool calls returned an error. If the error looks recoverable (e.g. missing or invalid parameters), fix the parameters and retry. If the error is clearly unrecoverable (e.g. a service is down, a resource does not exist, or a permission is permanently denied), report it to the user.</system_notice>",
1031
+ text: toolErrorDecision.nudgeText,
796
1032
  });
797
1033
  }
798
1034
 
799
1035
  // Add tool results as a user message and continue the loop
800
1036
  history.push({ role: "user", content: resultBlocks });
801
1037
 
802
- // Invoke checkpoint callback after tool results are in history
1038
+ // Invoke checkpoint callback after tool results are in history.
1039
+ // The callback may be async — the mid-loop budget check delegates
1040
+ // to the `tokenEstimate` plugin pipeline, which is asynchronous.
803
1041
  if (onCheckpoint) {
804
- const decision = onCheckpoint({
1042
+ const decision = await onCheckpoint({
805
1043
  turnIndex: toolUseTurns - 1, // 0-based (toolUseTurns was already incremented)
806
1044
  toolCount: toolUseBlocks.length,
807
1045
  hasToolUse: true,