@vellumai/assistant 0.4.17 → 0.4.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (528) hide show
  1. package/docs/runbook-trusted-contacts.md +5 -3
  2. package/eslint.config.mjs +2 -2
  3. package/package.json +1 -1
  4. package/src/__tests__/access-request-decision.test.ts +128 -120
  5. package/src/__tests__/account-registry.test.ts +121 -110
  6. package/src/__tests__/active-skill-tools.test.ts +200 -172
  7. package/src/__tests__/actor-token-service.test.ts +341 -274
  8. package/src/__tests__/agent-loop-thinking.test.ts +28 -19
  9. package/src/__tests__/agent-loop.test.ts +798 -378
  10. package/src/__tests__/anthropic-provider.test.ts +405 -247
  11. package/src/__tests__/app-builder-tool-scripts.test.ts +97 -97
  12. package/src/__tests__/app-bundler.test.ts +112 -79
  13. package/src/__tests__/app-executors.test.ts +205 -178
  14. package/src/__tests__/app-git-history.test.ts +90 -73
  15. package/src/__tests__/app-git-service.test.ts +67 -53
  16. package/src/__tests__/app-open-proxy.test.ts +29 -25
  17. package/src/__tests__/approval-conversation-turn.test.ts +100 -81
  18. package/src/__tests__/approval-hardcoded-copy-guard.test.ts +45 -17
  19. package/src/__tests__/approval-message-composer.test.ts +119 -119
  20. package/src/__tests__/approval-primitive.test.ts +264 -233
  21. package/src/__tests__/approval-routes-http.test.ts +4 -3
  22. package/src/__tests__/asset-materialize-tool.test.ts +250 -178
  23. package/src/__tests__/asset-search-tool.test.ts +251 -191
  24. package/src/__tests__/assistant-attachment-directive.test.ts +187 -142
  25. package/src/__tests__/assistant-attachments.test.ts +254 -186
  26. package/src/__tests__/assistant-event-hub.test.ts +105 -63
  27. package/src/__tests__/assistant-event.test.ts +66 -58
  28. package/src/__tests__/assistant-events-sse-hardening.test.ts +113 -73
  29. package/src/__tests__/assistant-feature-flag-guard.test.ts +78 -52
  30. package/src/__tests__/assistant-feature-flag-guardrails.test.ts +48 -45
  31. package/src/__tests__/assistant-feature-flags-integration.test.ts +118 -77
  32. package/src/__tests__/assistant-id-boundary-guard.test.ts +158 -104
  33. package/src/__tests__/attachments-store.test.ts +240 -183
  34. package/src/__tests__/attachments.test.ts +70 -62
  35. package/src/__tests__/audit-log-rotation.test.ts +50 -35
  36. package/src/__tests__/browser-fill-credential.test.ts +169 -101
  37. package/src/__tests__/browser-manager.test.ts +97 -75
  38. package/src/__tests__/browser-runtime-check.test.ts +16 -15
  39. package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +12 -10
  40. package/src/__tests__/browser-skill-endstate.test.ts +97 -72
  41. package/src/__tests__/bundle-scanner.test.ts +47 -22
  42. package/src/__tests__/bundled-asset.test.ts +74 -47
  43. package/src/__tests__/call-constants.test.ts +19 -19
  44. package/src/__tests__/call-controller.test.ts +0 -1
  45. package/src/__tests__/call-conversation-messages.test.ts +90 -65
  46. package/src/__tests__/call-domain.test.ts +149 -121
  47. package/src/__tests__/call-pointer-message-composer.test.ts +113 -83
  48. package/src/__tests__/call-pointer-messages.test.ts +213 -154
  49. package/src/__tests__/call-pointer-no-hardcoded-copy.guard.test.ts +9 -10
  50. package/src/__tests__/call-recovery.test.ts +232 -212
  51. package/src/__tests__/call-routes-http.test.ts +0 -1
  52. package/src/__tests__/call-start-guardian-guard.test.ts +32 -30
  53. package/src/__tests__/call-state-machine.test.ts +62 -51
  54. package/src/__tests__/call-state.test.ts +89 -75
  55. package/src/__tests__/call-store.test.ts +387 -316
  56. package/src/__tests__/callback-handoff-copy.test.ts +84 -82
  57. package/src/__tests__/canonical-guardian-store.test.ts +331 -280
  58. package/src/__tests__/channel-approval-routes.test.ts +1643 -1115
  59. package/src/__tests__/channel-approval.test.ts +139 -137
  60. package/src/__tests__/channel-approvals.test.ts +7 -2
  61. package/src/__tests__/channel-delivery-store.test.ts +232 -194
  62. package/src/__tests__/channel-guardian.test.ts +5 -3
  63. package/src/__tests__/channel-invite-transport.test.ts +107 -92
  64. package/src/__tests__/channel-policy.test.ts +42 -38
  65. package/src/__tests__/channel-readiness-service.test.ts +119 -102
  66. package/src/__tests__/channel-reply-delivery.test.ts +147 -118
  67. package/src/__tests__/channel-retry-sweep.test.ts +153 -110
  68. package/src/__tests__/checker.test.ts +3309 -1850
  69. package/src/__tests__/clarification-resolver.test.ts +91 -79
  70. package/src/__tests__/classifier.test.ts +64 -54
  71. package/src/__tests__/claude-code-skill-regression.test.ts +42 -37
  72. package/src/__tests__/claude-code-tool-profiles.test.ts +31 -29
  73. package/src/__tests__/clawhub.test.ts +92 -82
  74. package/src/__tests__/cli.test.ts +30 -30
  75. package/src/__tests__/clipboard.test.ts +53 -46
  76. package/src/__tests__/commit-guarantee.test.ts +59 -52
  77. package/src/__tests__/commit-message-enrichment-service.test.ts +203 -75
  78. package/src/__tests__/compaction.benchmark.test.ts +33 -31
  79. package/src/__tests__/computer-use-session-compaction.test.ts +60 -50
  80. package/src/__tests__/computer-use-session-lifecycle.test.ts +145 -117
  81. package/src/__tests__/computer-use-session-working-dir.test.ts +62 -48
  82. package/src/__tests__/computer-use-skill-baseline.test.ts +22 -19
  83. package/src/__tests__/computer-use-skill-endstate.test.ts +45 -31
  84. package/src/__tests__/computer-use-skill-lifecycle-cleanup.test.ts +121 -88
  85. package/src/__tests__/computer-use-skill-manifest-regression.test.ts +65 -42
  86. package/src/__tests__/computer-use-skill-proxy-bridge.test.ts +33 -18
  87. package/src/__tests__/computer-use-tools.test.ts +121 -98
  88. package/src/__tests__/config-schema.test.ts +443 -347
  89. package/src/__tests__/config-watcher.test.ts +96 -81
  90. package/src/__tests__/confirmation-request-guardian-bridge.test.ts +148 -133
  91. package/src/__tests__/conflict-intent-tokenization.test.ts +96 -78
  92. package/src/__tests__/conflict-policy.test.ts +151 -80
  93. package/src/__tests__/conflict-store.test.ts +203 -157
  94. package/src/__tests__/connection-policy.test.ts +89 -59
  95. package/src/__tests__/contacts-tools.test.ts +247 -178
  96. package/src/__tests__/context-memory-e2e.test.ts +306 -214
  97. package/src/__tests__/context-token-estimator.test.ts +114 -74
  98. package/src/__tests__/context-window-manager.test.ts +269 -167
  99. package/src/__tests__/contradiction-checker.test.ts +161 -135
  100. package/src/__tests__/conversation-attention-store.test.ts +350 -290
  101. package/src/__tests__/conversation-attention-telegram.test.ts +0 -1
  102. package/src/__tests__/conversation-pairing.test.ts +220 -113
  103. package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
  104. package/src/__tests__/conversation-store.test.ts +390 -235
  105. package/src/__tests__/credential-broker-browser-fill.test.ts +325 -250
  106. package/src/__tests__/credential-broker-server-use.test.ts +283 -243
  107. package/src/__tests__/credential-broker.test.ts +128 -74
  108. package/src/__tests__/credential-host-pattern-match.test.ts +64 -44
  109. package/src/__tests__/credential-metadata-store.test.ts +360 -311
  110. package/src/__tests__/credential-policy-validate.test.ts +81 -65
  111. package/src/__tests__/credential-resolve.test.ts +212 -145
  112. package/src/__tests__/credential-security-e2e.test.ts +144 -103
  113. package/src/__tests__/credential-security-invariants.test.ts +253 -208
  114. package/src/__tests__/credential-selection.test.ts +254 -146
  115. package/src/__tests__/credential-vault-unit.test.ts +531 -341
  116. package/src/__tests__/credential-vault.test.ts +761 -484
  117. package/src/__tests__/daemon-assistant-events.test.ts +91 -66
  118. package/src/__tests__/daemon-lifecycle.test.ts +258 -190
  119. package/src/__tests__/daemon-server-session-init.test.ts +2 -1
  120. package/src/__tests__/date-context.test.ts +314 -249
  121. package/src/__tests__/db-migration-rollback.test.ts +259 -130
  122. package/src/__tests__/db-schedule-syntax-migration.test.ts +78 -41
  123. package/src/__tests__/delete-managed-skill-tool.test.ts +77 -53
  124. package/src/__tests__/deterministic-verification-control-plane.test.ts +0 -1
  125. package/src/__tests__/dictation-mode-detection.test.ts +77 -55
  126. package/src/__tests__/dictation-profile-store.test.ts +70 -56
  127. package/src/__tests__/dictation-text-processing.test.ts +53 -35
  128. package/src/__tests__/diff.test.ts +102 -98
  129. package/src/__tests__/domain-normalize.test.ts +54 -54
  130. package/src/__tests__/domain-policy.test.ts +71 -55
  131. package/src/__tests__/dynamic-page-surface.test.ts +31 -33
  132. package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +69 -69
  133. package/src/__tests__/edit-engine.test.ts +56 -56
  134. package/src/__tests__/elevenlabs-client.test.ts +117 -91
  135. package/src/__tests__/elevenlabs-config.test.ts +32 -31
  136. package/src/__tests__/email-classifier.test.ts +15 -12
  137. package/src/__tests__/email-cli.test.ts +121 -108
  138. package/src/__tests__/emit-signal-routing-intent.test.ts +76 -69
  139. package/src/__tests__/encrypted-store.test.ts +180 -154
  140. package/src/__tests__/entity-extractor.test.ts +108 -87
  141. package/src/__tests__/entity-search.test.ts +664 -258
  142. package/src/__tests__/ephemeral-permissions.test.ts +224 -188
  143. package/src/__tests__/event-bus.test.ts +81 -77
  144. package/src/__tests__/extract-email.test.ts +29 -20
  145. package/src/__tests__/file-edit-tool.test.ts +62 -44
  146. package/src/__tests__/file-ops-service.test.ts +131 -114
  147. package/src/__tests__/file-read-tool.test.ts +48 -31
  148. package/src/__tests__/file-write-tool.test.ts +43 -37
  149. package/src/__tests__/filesystem-tools.test.ts +238 -209
  150. package/src/__tests__/followup-tools.test.ts +237 -162
  151. package/src/__tests__/forbidden-legacy-symbols.test.ts +19 -20
  152. package/src/__tests__/frontmatter.test.ts +96 -81
  153. package/src/__tests__/fuzzy-match-property.test.ts +75 -81
  154. package/src/__tests__/fuzzy-match.test.ts +71 -65
  155. package/src/__tests__/gateway-client-managed-outbound.test.ts +76 -57
  156. package/src/__tests__/gateway-only-enforcement.test.ts +0 -1
  157. package/src/__tests__/gateway-only-guard.test.ts +0 -1
  158. package/src/__tests__/gemini-image-service.test.ts +113 -100
  159. package/src/__tests__/gemini-provider.test.ts +297 -220
  160. package/src/__tests__/get-weather.test.ts +188 -114
  161. package/src/__tests__/gmail-integration.test.ts +13 -5
  162. package/src/__tests__/guardian-action-conversation-turn.test.ts +226 -171
  163. package/src/__tests__/guardian-action-copy-generator.test.ts +111 -93
  164. package/src/__tests__/guardian-action-followup-executor.test.ts +0 -1
  165. package/src/__tests__/guardian-action-followup-store.test.ts +199 -167
  166. package/src/__tests__/guardian-action-grant-mint-consume.test.ts +297 -250
  167. package/src/__tests__/guardian-action-late-reply.test.ts +462 -316
  168. package/src/__tests__/guardian-action-no-hardcoded-copy.test.ts +23 -18
  169. package/src/__tests__/guardian-action-store.test.ts +158 -109
  170. package/src/__tests__/guardian-action-sweep.test.ts +114 -100
  171. package/src/__tests__/guardian-actions-endpoint.test.ts +440 -256
  172. package/src/__tests__/guardian-control-plane-policy.test.ts +497 -331
  173. package/src/__tests__/guardian-decision-primitive-canonical.test.ts +217 -215
  174. package/src/__tests__/guardian-dispatch.test.ts +316 -256
  175. package/src/__tests__/guardian-grant-minting.test.ts +247 -178
  176. package/src/__tests__/guardian-outbound-http.test.ts +5 -3
  177. package/src/__tests__/guardian-principal-id-roundtrip.test.ts +99 -96
  178. package/src/__tests__/guardian-question-copy.test.ts +17 -17
  179. package/src/__tests__/guardian-question-mode.test.ts +134 -100
  180. package/src/__tests__/guardian-routing-invariants.test.ts +0 -1
  181. package/src/__tests__/guardian-routing-state.test.ts +0 -1
  182. package/src/__tests__/guardian-verification-intent-routing.test.ts +94 -88
  183. package/src/__tests__/guardian-verification-voice-binding.test.ts +0 -1
  184. package/src/__tests__/guardian-verify-setup-skill-regression.test.ts +0 -1
  185. package/src/__tests__/handle-user-message-secret-resume.test.ts +7 -2
  186. package/src/__tests__/handlers-add-trust-rule-metadata.test.ts +92 -76
  187. package/src/__tests__/handlers-cu-observation-blob.test.ts +103 -70
  188. package/src/__tests__/handlers-ipc-blob-probe.test.ts +77 -51
  189. package/src/__tests__/handlers-slack-config.test.ts +63 -54
  190. package/src/__tests__/handlers-task-submit-slash.test.ts +18 -18
  191. package/src/__tests__/handlers-telegram-config.test.ts +662 -329
  192. package/src/__tests__/handlers-twitter-config.test.ts +525 -298
  193. package/src/__tests__/handlers-user-message-approval-consumption.test.ts +5 -2
  194. package/src/__tests__/headless-browser-interactions.test.ts +444 -280
  195. package/src/__tests__/headless-browser-navigate.test.ts +116 -79
  196. package/src/__tests__/headless-browser-read-tools.test.ts +123 -86
  197. package/src/__tests__/headless-browser-snapshot.test.ts +71 -56
  198. package/src/__tests__/heartbeat-service.test.ts +76 -58
  199. package/src/__tests__/history-repair-observability.test.ts +14 -14
  200. package/src/__tests__/history-repair.test.ts +171 -167
  201. package/src/__tests__/home-base-bootstrap.test.ts +30 -27
  202. package/src/__tests__/hooks-blocking.test.ts +86 -37
  203. package/src/__tests__/hooks-cli.test.ts +104 -68
  204. package/src/__tests__/hooks-config.test.ts +81 -43
  205. package/src/__tests__/hooks-discovery.test.ts +106 -96
  206. package/src/__tests__/hooks-integration.test.ts +78 -72
  207. package/src/__tests__/hooks-manager.test.ts +99 -61
  208. package/src/__tests__/hooks-runner.test.ts +94 -71
  209. package/src/__tests__/hooks-settings.test.ts +69 -64
  210. package/src/__tests__/hooks-templates.test.ts +85 -54
  211. package/src/__tests__/hooks-ts-runner.test.ts +82 -45
  212. package/src/__tests__/hooks-watch.test.ts +32 -22
  213. package/src/__tests__/host-file-edit-tool.test.ts +190 -148
  214. package/src/__tests__/host-file-read-tool.test.ts +86 -63
  215. package/src/__tests__/host-file-write-tool.test.ts +98 -64
  216. package/src/__tests__/host-shell-tool.test.ts +342 -233
  217. package/src/__tests__/inbound-invite-redemption.test.ts +0 -1
  218. package/src/__tests__/ingress-member-store.test.ts +163 -159
  219. package/src/__tests__/ingress-reconcile.test.ts +13 -6
  220. package/src/__tests__/ingress-routes-http.test.ts +441 -356
  221. package/src/__tests__/ingress-url-consistency.test.ts +125 -64
  222. package/src/__tests__/integration-status.test.ts +93 -73
  223. package/src/__tests__/intent-routing.test.ts +148 -118
  224. package/src/__tests__/invite-redemption-service.test.ts +163 -121
  225. package/src/__tests__/ipc-blob-store.test.ts +104 -91
  226. package/src/__tests__/ipc-contract-inventory.test.ts +27 -15
  227. package/src/__tests__/ipc-contract.test.ts +24 -23
  228. package/src/__tests__/ipc-protocol.test.ts +52 -46
  229. package/src/__tests__/ipc-roundtrip.benchmark.test.ts +61 -50
  230. package/src/__tests__/ipc-snapshot.test.ts +1135 -1056
  231. package/src/__tests__/ipc-validate.test.ts +240 -179
  232. package/src/__tests__/key-migration.test.ts +123 -90
  233. package/src/__tests__/keychain.test.ts +150 -123
  234. package/src/__tests__/lifecycle-docs-guard.test.ts +65 -64
  235. package/src/__tests__/llm-usage-store.test.ts +112 -87
  236. package/src/__tests__/managed-skill-lifecycle.test.ts +147 -108
  237. package/src/__tests__/managed-store.test.ts +411 -360
  238. package/src/__tests__/mcp-cli.test.ts +190 -124
  239. package/src/__tests__/mcp-health-check.test.ts +26 -21
  240. package/src/__tests__/media-generate-image.test.ts +122 -99
  241. package/src/__tests__/media-reuse-story.e2e.test.ts +282 -214
  242. package/src/__tests__/media-visibility-policy.test.ts +86 -38
  243. package/src/__tests__/memory-context-benchmark.benchmark.test.ts +146 -100
  244. package/src/__tests__/memory-lifecycle-e2e.test.ts +385 -297
  245. package/src/__tests__/memory-query-builder.test.ts +32 -33
  246. package/src/__tests__/memory-recall-quality.test.ts +761 -407
  247. package/src/__tests__/memory-regressions.experimental.test.ts +443 -380
  248. package/src/__tests__/memory-regressions.test.ts +3725 -2642
  249. package/src/__tests__/memory-retrieval-budget.test.ts +7 -8
  250. package/src/__tests__/memory-retrieval.benchmark.test.ts +144 -109
  251. package/src/__tests__/memory-upsert-concurrency.test.ts +292 -201
  252. package/src/__tests__/messaging-send-tool.test.ts +36 -29
  253. package/src/__tests__/migration-cli-flows.test.ts +69 -53
  254. package/src/__tests__/migration-ordering.test.ts +103 -86
  255. package/src/__tests__/mime-builder.test.ts +55 -32
  256. package/src/__tests__/mock-signup-server.test.ts +384 -246
  257. package/src/__tests__/model-intents.test.ts +61 -37
  258. package/src/__tests__/no-direct-anthropic-sdk-imports.test.ts +9 -12
  259. package/src/__tests__/no-is-trusted-guard.test.ts +24 -21
  260. package/src/__tests__/non-member-access-request.test.ts +3 -2
  261. package/src/__tests__/notification-broadcaster.test.ts +99 -81
  262. package/src/__tests__/notification-decision-fallback.test.ts +223 -178
  263. package/src/__tests__/notification-decision-strategy.test.ts +375 -337
  264. package/src/__tests__/notification-deep-link.test.ts +67 -61
  265. package/src/__tests__/notification-guardian-path.test.ts +248 -206
  266. package/src/__tests__/notification-routing-intent.test.ts +166 -93
  267. package/src/__tests__/notification-thread-candidate-validation.test.ts +78 -75
  268. package/src/__tests__/notification-thread-candidates.test.ts +64 -61
  269. package/src/__tests__/oauth-callback-registry.test.ts +40 -30
  270. package/src/__tests__/oauth-connect-handler.test.ts +109 -89
  271. package/src/__tests__/oauth-scope-policy.test.ts +63 -55
  272. package/src/__tests__/oauth2-gateway-transport.test.ts +252 -174
  273. package/src/__tests__/onboarding-starter-tasks.test.ts +93 -89
  274. package/src/__tests__/onboarding-template-contract.test.ts +93 -94
  275. package/src/__tests__/openai-provider.test.ts +366 -274
  276. package/src/__tests__/pairing-concurrent.test.ts +18 -12
  277. package/src/__tests__/pairing-routes.test.ts +45 -41
  278. package/src/__tests__/parallel-tool.benchmark.test.ts +108 -58
  279. package/src/__tests__/parser.test.ts +316 -226
  280. package/src/__tests__/path-classifier.test.ts +24 -25
  281. package/src/__tests__/path-policy.test.ts +187 -147
  282. package/src/__tests__/phone.test.ts +36 -36
  283. package/src/__tests__/platform-move-helper.test.ts +48 -40
  284. package/src/__tests__/platform-socket-path.test.ts +23 -24
  285. package/src/__tests__/platform-workspace-migration.test.ts +464 -414
  286. package/src/__tests__/platform.test.ts +61 -53
  287. package/src/__tests__/playbook-execution.test.ts +397 -265
  288. package/src/__tests__/playbook-tools.test.ts +267 -196
  289. package/src/__tests__/prebuilt-home-base-seed.test.ts +30 -27
  290. package/src/__tests__/pricing.test.ts +316 -136
  291. package/src/__tests__/profile-compiler.test.ts +206 -188
  292. package/src/__tests__/provider-commit-message-generator.test.ts +114 -106
  293. package/src/__tests__/provider-error-scenarios.test.ts +212 -158
  294. package/src/__tests__/provider-fail-open-selection.test.ts +51 -44
  295. package/src/__tests__/provider-registry-ollama.test.ts +13 -9
  296. package/src/__tests__/provider-streaming.benchmark.test.ts +232 -183
  297. package/src/__tests__/proxy-approval-callback.test.ts +180 -119
  298. package/src/__tests__/public-ingress-urls.test.ts +112 -94
  299. package/src/__tests__/qdrant-manager.test.ts +147 -98
  300. package/src/__tests__/ratelimit.test.ts +152 -82
  301. package/src/__tests__/recording-handler.test.ts +273 -151
  302. package/src/__tests__/recording-intent-fallback.test.ts +94 -75
  303. package/src/__tests__/recording-intent-handler.test.ts +9 -2
  304. package/src/__tests__/recording-intent.test.ts +578 -379
  305. package/src/__tests__/recording-state-machine.test.ts +530 -316
  306. package/src/__tests__/recurrence-engine-rruleset.test.ts +150 -92
  307. package/src/__tests__/recurrence-engine.test.ts +81 -41
  308. package/src/__tests__/recurrence-types.test.ts +63 -44
  309. package/src/__tests__/relay-server.test.ts +2131 -1602
  310. package/src/__tests__/reminder-store.test.ts +158 -80
  311. package/src/__tests__/reminder.test.ts +113 -109
  312. package/src/__tests__/remote-skill-policy.test.ts +96 -72
  313. package/src/__tests__/request-file-tool.test.ts +74 -67
  314. package/src/__tests__/response-tier.test.ts +131 -74
  315. package/src/__tests__/runtime-attachment-metadata.test.ts +0 -1
  316. package/src/__tests__/runtime-events-sse-parity.test.ts +167 -145
  317. package/src/__tests__/runtime-events-sse.test.ts +0 -1
  318. package/src/__tests__/sandbox-diagnostics.test.ts +66 -56
  319. package/src/__tests__/sandbox-host-parity.test.ts +377 -301
  320. package/src/__tests__/scaffold-managed-skill-tool.test.ts +213 -161
  321. package/src/__tests__/schedule-store.test.ts +268 -205
  322. package/src/__tests__/schedule-tools.test.ts +702 -524
  323. package/src/__tests__/scheduler-recurrence.test.ts +240 -130
  324. package/src/__tests__/scoped-approval-grants.test.ts +258 -168
  325. package/src/__tests__/scoped-grant-security-matrix.test.ts +160 -146
  326. package/src/__tests__/script-proxy-certs.test.ts +38 -35
  327. package/src/__tests__/script-proxy-connect-tunnel.test.ts +71 -46
  328. package/src/__tests__/script-proxy-decision-trace.test.ts +161 -84
  329. package/src/__tests__/script-proxy-http-forwarder.test.ts +146 -129
  330. package/src/__tests__/script-proxy-injection-runtime.test.ts +139 -113
  331. package/src/__tests__/script-proxy-mitm-handler.test.ts +226 -142
  332. package/src/__tests__/script-proxy-policy-runtime.test.ts +126 -86
  333. package/src/__tests__/script-proxy-policy.test.ts +308 -153
  334. package/src/__tests__/script-proxy-rewrite-specificity.test.ts +74 -62
  335. package/src/__tests__/script-proxy-router.test.ts +111 -77
  336. package/src/__tests__/script-proxy-session-manager.test.ts +156 -113
  337. package/src/__tests__/script-proxy-session-runtime.test.ts +28 -24
  338. package/src/__tests__/secret-allowlist.test.ts +105 -90
  339. package/src/__tests__/secret-ingress-handler.test.ts +41 -30
  340. package/src/__tests__/secret-onetime-send.test.ts +67 -50
  341. package/src/__tests__/secret-prompt-log-hygiene.test.ts +35 -31
  342. package/src/__tests__/secret-response-routing.test.ts +50 -41
  343. package/src/__tests__/secret-scanner-executor.test.ts +152 -111
  344. package/src/__tests__/secret-scanner.test.ts +495 -413
  345. package/src/__tests__/secure-keys.test.ts +132 -121
  346. package/src/__tests__/send-endpoint-busy.test.ts +8 -3
  347. package/src/__tests__/send-notification-tool.test.ts +43 -42
  348. package/src/__tests__/sensitive-output-placeholders.test.ts +72 -64
  349. package/src/__tests__/sequence-store.test.ts +335 -167
  350. package/src/__tests__/server-history-render.test.ts +341 -202
  351. package/src/__tests__/session-abort-tool-results.test.ts +133 -70
  352. package/src/__tests__/session-confirmation-signals.test.ts +252 -160
  353. package/src/__tests__/session-conflict-gate.test.ts +775 -585
  354. package/src/__tests__/session-error.test.ts +222 -191
  355. package/src/__tests__/session-evictor.test.ts +79 -62
  356. package/src/__tests__/session-init.benchmark.test.ts +170 -108
  357. package/src/__tests__/session-load-history-repair.test.ts +273 -139
  358. package/src/__tests__/session-messaging-secret-redirect.test.ts +130 -90
  359. package/src/__tests__/session-pre-run-repair.test.ts +106 -59
  360. package/src/__tests__/session-profile-injection.test.ts +198 -130
  361. package/src/__tests__/session-provider-retry-repair.test.ts +223 -141
  362. package/src/__tests__/session-queue.test.ts +624 -321
  363. package/src/__tests__/session-runtime-assembly.test.ts +425 -329
  364. package/src/__tests__/session-runtime-workspace.test.ts +69 -61
  365. package/src/__tests__/session-skill-tools.test.ts +973 -678
  366. package/src/__tests__/session-slash-known.test.ts +185 -133
  367. package/src/__tests__/session-slash-queue.test.ts +147 -81
  368. package/src/__tests__/session-slash-unknown.test.ts +135 -90
  369. package/src/__tests__/session-surfaces-task-progress.test.ts +122 -87
  370. package/src/__tests__/session-tool-setup-app-refresh.test.ts +338 -177
  371. package/src/__tests__/session-tool-setup-memory-scope.test.ts +63 -40
  372. package/src/__tests__/session-tool-setup-side-effect-flag.test.ts +60 -37
  373. package/src/__tests__/session-tool-setup-tools-disabled.test.ts +28 -26
  374. package/src/__tests__/session-undo.test.ts +43 -30
  375. package/src/__tests__/session-workspace-cache-state.test.ts +108 -67
  376. package/src/__tests__/session-workspace-injection.test.ts +245 -117
  377. package/src/__tests__/session-workspace-tool-tracking.test.ts +260 -93
  378. package/src/__tests__/shared-filesystem-errors.test.ts +47 -47
  379. package/src/__tests__/shell-credential-ref.test.ts +126 -90
  380. package/src/__tests__/shell-identity.test.ts +134 -111
  381. package/src/__tests__/shell-parser-fuzz.test.ts +263 -179
  382. package/src/__tests__/shell-parser-property.test.ts +435 -288
  383. package/src/__tests__/shell-tool-proxy-mode.test.ts +142 -70
  384. package/src/__tests__/size-guard.test.ts +42 -44
  385. package/src/__tests__/skill-feature-flags-integration.test.ts +79 -52
  386. package/src/__tests__/skill-feature-flags.test.ts +75 -47
  387. package/src/__tests__/skill-include-graph.test.ts +143 -148
  388. package/src/__tests__/skill-load-feature-flag.test.ts +94 -59
  389. package/src/__tests__/skill-load-tool.test.ts +371 -199
  390. package/src/__tests__/skill-projection-feature-flag.test.ts +131 -88
  391. package/src/__tests__/skill-projection.benchmark.test.ts +93 -65
  392. package/src/__tests__/skill-script-runner-host.test.ts +460 -250
  393. package/src/__tests__/skill-script-runner-sandbox.test.ts +168 -108
  394. package/src/__tests__/skill-script-runner.test.ts +115 -74
  395. package/src/__tests__/skill-tool-factory.test.ts +140 -96
  396. package/src/__tests__/skill-tool-manifest.test.ts +306 -210
  397. package/src/__tests__/skill-version-hash.test.ts +70 -56
  398. package/src/__tests__/skills.test.ts +0 -1
  399. package/src/__tests__/slack-channel-config.test.ts +127 -84
  400. package/src/__tests__/slack-skill.test.ts +60 -47
  401. package/src/__tests__/slash-commands-catalog.test.ts +37 -31
  402. package/src/__tests__/slash-commands-parser.test.ts +71 -64
  403. package/src/__tests__/slash-commands-resolver.test.ts +143 -107
  404. package/src/__tests__/slash-commands-rewrite.test.ts +22 -22
  405. package/src/__tests__/sms-messaging-provider.test.ts +4 -0
  406. package/src/__tests__/speaker-identification.test.ts +28 -25
  407. package/src/__tests__/starter-bundle.test.ts +27 -23
  408. package/src/__tests__/starter-task-flow.test.ts +67 -52
  409. package/src/__tests__/subagent-manager-notify.test.ts +154 -108
  410. package/src/__tests__/subagent-tools.test.ts +311 -270
  411. package/src/__tests__/subagent-types.test.ts +40 -40
  412. package/src/__tests__/surface-mutex-cleanup.test.ts +42 -30
  413. package/src/__tests__/swarm-dag-pathological.test.ts +122 -111
  414. package/src/__tests__/swarm-orchestrator.test.ts +135 -101
  415. package/src/__tests__/swarm-plan-validator.test.ts +125 -73
  416. package/src/__tests__/swarm-recursion.test.ts +58 -46
  417. package/src/__tests__/swarm-router-planner.test.ts +99 -74
  418. package/src/__tests__/swarm-session-integration.test.ts +148 -91
  419. package/src/__tests__/swarm-tool.test.ts +65 -45
  420. package/src/__tests__/swarm-worker-backend.test.ts +59 -45
  421. package/src/__tests__/swarm-worker-runner.test.ts +133 -118
  422. package/src/__tests__/system-prompt.test.ts +311 -256
  423. package/src/__tests__/task-compiler.test.ts +176 -120
  424. package/src/__tests__/task-management-tools.test.ts +561 -456
  425. package/src/__tests__/task-memory-cleanup.test.ts +627 -362
  426. package/src/__tests__/task-runner.test.ts +117 -94
  427. package/src/__tests__/task-scheduler.test.ts +113 -84
  428. package/src/__tests__/task-tools.test.ts +349 -264
  429. package/src/__tests__/terminal-sandbox.test.ts +138 -108
  430. package/src/__tests__/terminal-tools.test.ts +350 -305
  431. package/src/__tests__/thread-seed-composer.test.ts +307 -180
  432. package/src/__tests__/tool-approval-handler.test.ts +238 -137
  433. package/src/__tests__/tool-audit-listener.test.ts +69 -69
  434. package/src/__tests__/tool-domain-event-publisher.test.ts +142 -132
  435. package/src/__tests__/tool-execution-abort-cleanup.test.ts +155 -146
  436. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +136 -105
  437. package/src/__tests__/tool-executor-lifecycle-events.test.ts +355 -239
  438. package/src/__tests__/tool-executor-redaction.test.ts +112 -109
  439. package/src/__tests__/tool-executor-shell-integration.test.ts +130 -79
  440. package/src/__tests__/tool-executor.test.ts +1274 -674
  441. package/src/__tests__/tool-grant-request-escalation.test.ts +401 -283
  442. package/src/__tests__/tool-metrics-listener.test.ts +97 -85
  443. package/src/__tests__/tool-notification-listener.test.ts +42 -25
  444. package/src/__tests__/tool-permission-simulate-handler.test.ts +137 -113
  445. package/src/__tests__/tool-policy.test.ts +44 -25
  446. package/src/__tests__/tool-profiling-listener.test.ts +99 -93
  447. package/src/__tests__/tool-result-truncation.test.ts +5 -4
  448. package/src/__tests__/tool-trace-listener.test.ts +131 -111
  449. package/src/__tests__/top-level-renderer.test.ts +62 -58
  450. package/src/__tests__/top-level-scanner.test.ts +68 -64
  451. package/src/__tests__/trace-emitter.test.ts +56 -56
  452. package/src/__tests__/trust-context-guards.test.ts +65 -65
  453. package/src/__tests__/trust-store.test.ts +1239 -806
  454. package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
  455. package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +0 -1
  456. package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +3 -2
  457. package/src/__tests__/trusted-contact-multichannel.test.ts +3 -2
  458. package/src/__tests__/trusted-contact-verification.test.ts +251 -231
  459. package/src/__tests__/turn-commit.test.ts +259 -200
  460. package/src/__tests__/twilio-provider.test.ts +140 -126
  461. package/src/__tests__/twilio-rest.test.ts +22 -18
  462. package/src/__tests__/twilio-routes-elevenlabs.test.ts +0 -1
  463. package/src/__tests__/twilio-routes-twiml.test.ts +55 -55
  464. package/src/__tests__/twilio-routes.test.ts +0 -1
  465. package/src/__tests__/twitter-auth-handler.test.ts +184 -139
  466. package/src/__tests__/twitter-cli-error-shaping.test.ts +88 -73
  467. package/src/__tests__/twitter-cli-routing.test.ts +146 -99
  468. package/src/__tests__/twitter-oauth-client.test.ts +82 -65
  469. package/src/__tests__/update-bulletin-format.test.ts +69 -66
  470. package/src/__tests__/update-bulletin-state.test.ts +66 -60
  471. package/src/__tests__/update-bulletin.test.ts +150 -114
  472. package/src/__tests__/update-template-contract.test.ts +15 -10
  473. package/src/__tests__/url-safety.test.ts +288 -265
  474. package/src/__tests__/user-reference.test.ts +32 -32
  475. package/src/__tests__/view-image-tool.test.ts +118 -96
  476. package/src/__tests__/voice-invite-redemption.test.ts +111 -106
  477. package/src/__tests__/voice-quality.test.ts +117 -102
  478. package/src/__tests__/voice-scoped-grant-consumer.test.ts +204 -146
  479. package/src/__tests__/voice-session-bridge.test.ts +351 -216
  480. package/src/__tests__/weather-skill-regression.test.ts +170 -120
  481. package/src/__tests__/web-fetch.test.ts +664 -526
  482. package/src/__tests__/web-search.test.ts +379 -213
  483. package/src/__tests__/work-item-output.test.ts +90 -53
  484. package/src/__tests__/workspace-git-service.test.ts +437 -356
  485. package/src/__tests__/workspace-heartbeat-service.test.ts +125 -91
  486. package/src/__tests__/workspace-lifecycle.test.ts +98 -64
  487. package/src/__tests__/workspace-policy.test.ts +139 -71
  488. package/src/cli/mcp.ts +81 -28
  489. package/src/commands/__tests__/cc-command-registry.test.ts +142 -134
  490. package/src/config/__tests__/feature-flag-registry-guard.test.ts +48 -39
  491. package/src/config/bundled-skills/chatgpt-import/tools/chatgpt-import.ts +25 -10
  492. package/src/config/bundled-skills/doordash/__tests__/doordash-session.test.ts +0 -1
  493. package/src/config/bundled-skills/guardian-verify-setup/SKILL.md +6 -11
  494. package/src/config/bundled-skills/messaging/SKILL.md +4 -3
  495. package/src/config/bundled-skills/messaging/tools/gmail-outreach-scan.ts +15 -5
  496. package/src/config/bundled-skills/messaging/tools/gmail-sender-digest.ts +16 -5
  497. package/src/config/bundled-skills/phone-calls/SKILL.md +1 -2
  498. package/src/config/bundled-skills/slack/tools/slack-scan-digest.ts +34 -32
  499. package/src/config/bundled-skills/sms-setup/SKILL.md +8 -16
  500. package/src/config/bundled-skills/telegram-setup/SKILL.md +3 -3
  501. package/src/config/bundled-skills/trusted-contacts/SKILL.md +13 -25
  502. package/src/config/bundled-skills/twilio-setup/SKILL.md +13 -23
  503. package/src/config/bundled-tool-registry.ts +2 -0
  504. package/src/config/env.ts +3 -4
  505. package/src/config/system-prompt.ts +32 -0
  506. package/src/mcp/client.ts +2 -7
  507. package/src/memory/db-connection.ts +16 -10
  508. package/src/messaging/providers/gmail/adapter.ts +10 -3
  509. package/src/messaging/providers/gmail/client.ts +280 -72
  510. package/src/runtime/auth/__tests__/context.test.ts +75 -65
  511. package/src/runtime/auth/__tests__/credential-service.test.ts +137 -114
  512. package/src/runtime/auth/__tests__/guard-tests.test.ts +84 -90
  513. package/src/runtime/auth/__tests__/ipc-auth-context.test.ts +40 -40
  514. package/src/runtime/auth/__tests__/middleware.test.ts +80 -74
  515. package/src/runtime/auth/__tests__/policy.test.ts +9 -9
  516. package/src/runtime/auth/__tests__/route-policy.test.ts +76 -65
  517. package/src/runtime/auth/__tests__/scopes.test.ts +68 -60
  518. package/src/runtime/auth/__tests__/subject.test.ts +54 -54
  519. package/src/runtime/auth/__tests__/token-service.test.ts +115 -108
  520. package/src/runtime/auth/scopes.ts +3 -0
  521. package/src/runtime/auth/token-service.ts +4 -1
  522. package/src/runtime/auth/types.ts +2 -1
  523. package/src/runtime/http-server.ts +2 -1
  524. package/src/security/secure-keys.ts +120 -54
  525. package/src/tools/browser/__tests__/auth-cache.test.ts +69 -63
  526. package/src/tools/browser/__tests__/auth-detector.test.ts +218 -157
  527. package/src/tools/browser/__tests__/jit-auth.test.ts +83 -99
  528. package/src/tools/terminal/safe-env.ts +7 -0
@@ -1,7 +1,11 @@
1
- import { describe, expect,test } from 'bun:test';
1
+ import { describe, expect, test } from "bun:test";
2
2
 
3
- import type { AgentEvent, CheckpointDecision,CheckpointInfo } from '../agent/loop.js';
4
- import { AgentLoop } from '../agent/loop.js';
3
+ import type {
4
+ AgentEvent,
5
+ CheckpointDecision,
6
+ CheckpointInfo,
7
+ } from "../agent/loop.js";
8
+ import { AgentLoop } from "../agent/loop.js";
5
9
  import type {
6
10
  ContentBlock,
7
11
  Message,
@@ -9,21 +13,30 @@ import type {
9
13
  ProviderResponse,
10
14
  SendMessageOptions,
11
15
  ToolDefinition,
12
- } from '../providers/types.js';
16
+ } from "../providers/types.js";
13
17
 
14
18
  // ---------------------------------------------------------------------------
15
19
  // Helpers
16
20
  // ---------------------------------------------------------------------------
17
21
 
18
22
  /** A mock provider that returns pre-configured responses in sequence. */
19
- function createMockProvider(
20
- responses: ProviderResponse[],
21
- ): { provider: Provider; calls: { messages: Message[]; tools?: ToolDefinition[]; systemPrompt?: string }[] } {
22
- const calls: { messages: Message[]; tools?: ToolDefinition[]; systemPrompt?: string }[] = [];
23
+ function createMockProvider(responses: ProviderResponse[]): {
24
+ provider: Provider;
25
+ calls: {
26
+ messages: Message[];
27
+ tools?: ToolDefinition[];
28
+ systemPrompt?: string;
29
+ }[];
30
+ } {
31
+ const calls: {
32
+ messages: Message[];
33
+ tools?: ToolDefinition[];
34
+ systemPrompt?: string;
35
+ }[] = [];
23
36
  let callIndex = 0;
24
37
 
25
38
  const provider: Provider = {
26
- name: 'mock',
39
+ name: "mock",
27
40
  async sendMessage(
28
41
  messages: Message[],
29
42
  tools?: ToolDefinition[],
@@ -37,8 +50,8 @@ function createMockProvider(
37
50
  // Emit streaming events if the response has text blocks
38
51
  if (options?.onEvent) {
39
52
  for (const block of response.content) {
40
- if (block.type === 'text') {
41
- options.onEvent({ type: 'text_delta', text: block.text });
53
+ if (block.type === "text") {
54
+ options.onEvent({ type: "text_delta", text: block.text });
42
55
  }
43
56
  }
44
57
  }
@@ -52,29 +65,37 @@ function createMockProvider(
52
65
 
53
66
  function textResponse(text: string): ProviderResponse {
54
67
  return {
55
- content: [{ type: 'text', text }],
56
- model: 'mock-model',
68
+ content: [{ type: "text", text }],
69
+ model: "mock-model",
57
70
  usage: { inputTokens: 10, outputTokens: 5 },
58
- stopReason: 'end_turn',
71
+ stopReason: "end_turn",
59
72
  };
60
73
  }
61
74
 
62
- function toolUseResponse(id: string, name: string, input: Record<string, unknown>): ProviderResponse {
75
+ function toolUseResponse(
76
+ id: string,
77
+ name: string,
78
+ input: Record<string, unknown>,
79
+ ): ProviderResponse {
63
80
  return {
64
- content: [{ type: 'tool_use', id, name, input }],
65
- model: 'mock-model',
81
+ content: [{ type: "tool_use", id, name, input }],
82
+ model: "mock-model",
66
83
  usage: { inputTokens: 10, outputTokens: 5 },
67
- stopReason: 'tool_use',
84
+ stopReason: "tool_use",
68
85
  };
69
86
  }
70
87
 
71
88
  const dummyTools: ToolDefinition[] = [
72
- { name: 'read_file', description: 'Read a file', input_schema: { type: 'object', properties: { path: { type: 'string' } } } },
89
+ {
90
+ name: "read_file",
91
+ description: "Read a file",
92
+ input_schema: { type: "object", properties: { path: { type: "string" } } },
93
+ },
73
94
  ];
74
95
 
75
96
  const userMessage: Message = {
76
- role: 'user',
77
- content: [{ type: 'text', text: 'Hello' }],
97
+ role: "user",
98
+ content: [{ type: "text", text: "Hello" }],
78
99
  };
79
100
 
80
101
  function collectEvents(events: AgentEvent[]): (event: AgentEvent) => void {
@@ -85,11 +106,11 @@ function collectEvents(events: AgentEvent[]): (event: AgentEvent) => void {
85
106
  // Tests
86
107
  // ---------------------------------------------------------------------------
87
108
 
88
- describe('AgentLoop', () => {
109
+ describe("AgentLoop", () => {
89
110
  // 1. Basic text response
90
- test('returns history with assistant message for simple text response', async () => {
91
- const { provider } = createMockProvider([textResponse('Hi there!')]);
92
- const loop = new AgentLoop(provider, 'system prompt');
111
+ test("returns history with assistant message for simple text response", async () => {
112
+ const { provider } = createMockProvider([textResponse("Hi there!")]);
113
+ const loop = new AgentLoop(provider, "system prompt");
93
114
 
94
115
  const events: AgentEvent[] = [];
95
116
  const history = await loop.run([userMessage], collectEvents(events));
@@ -97,32 +118,41 @@ describe('AgentLoop', () => {
97
118
  // History should contain original user message + assistant response
98
119
  expect(history).toHaveLength(2);
99
120
  expect(history[0]).toEqual(userMessage);
100
- expect(history[1].role).toBe('assistant');
101
- expect(history[1].content).toEqual([{ type: 'text', text: 'Hi there!' }]);
121
+ expect(history[1].role).toBe("assistant");
122
+ expect(history[1].content).toEqual([{ type: "text", text: "Hi there!" }]);
102
123
  });
103
124
 
104
125
  // 2. Tool execution — provider returns tool_use, verify tool executor is called
105
- test('executes tool and passes result back to provider', async () => {
106
- const toolCallId = 'tool-1';
126
+ test("executes tool and passes result back to provider", async () => {
127
+ const toolCallId = "tool-1";
107
128
  const { provider, calls } = createMockProvider([
108
- toolUseResponse(toolCallId, 'read_file', { path: '/tmp/test.txt' }),
109
- textResponse('File contents received.'),
129
+ toolUseResponse(toolCallId, "read_file", { path: "/tmp/test.txt" }),
130
+ textResponse("File contents received."),
110
131
  ]);
111
132
 
112
133
  const toolCalls: { name: string; input: Record<string, unknown> }[] = [];
113
- const toolExecutor = async (name: string, input: Record<string, unknown>) => {
134
+ const toolExecutor = async (
135
+ name: string,
136
+ input: Record<string, unknown>,
137
+ ) => {
114
138
  toolCalls.push({ name, input });
115
- return { content: 'file data here', isError: false };
139
+ return { content: "file data here", isError: false };
116
140
  };
117
141
 
118
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
142
+ const loop = new AgentLoop(
143
+ provider,
144
+ "system",
145
+ {},
146
+ dummyTools,
147
+ toolExecutor,
148
+ );
119
149
  const events: AgentEvent[] = [];
120
150
  const history = await loop.run([userMessage], collectEvents(events));
121
151
 
122
152
  // Tool executor was called with correct args
123
153
  expect(toolCalls).toHaveLength(1);
124
- expect(toolCalls[0].name).toBe('read_file');
125
- expect(toolCalls[0].input).toEqual({ path: '/tmp/test.txt' });
154
+ expect(toolCalls[0].name).toBe("read_file");
155
+ expect(toolCalls[0].input).toEqual({ path: "/tmp/test.txt" });
126
156
 
127
157
  // Provider was called twice (initial + after tool result)
128
158
  expect(calls).toHaveLength(2);
@@ -130,35 +160,50 @@ describe('AgentLoop', () => {
130
160
  // Second call should include the tool result as a user message
131
161
  const secondCallMessages = calls[1].messages;
132
162
  const lastMsg = secondCallMessages[secondCallMessages.length - 1];
133
- expect(lastMsg.role).toBe('user');
163
+ expect(lastMsg.role).toBe("user");
134
164
 
135
165
  const toolResultBlock = lastMsg.content.find(
136
- (b): b is Extract<ContentBlock, { type: 'tool_result' }> => b.type === 'tool_result',
166
+ (b): b is Extract<ContentBlock, { type: "tool_result" }> =>
167
+ b.type === "tool_result",
137
168
  );
138
169
  expect(toolResultBlock).toBeDefined();
139
170
  expect(toolResultBlock!.tool_use_id).toBe(toolCallId);
140
- expect(toolResultBlock!.content).toBe('file data here');
171
+ expect(toolResultBlock!.content).toBe("file data here");
141
172
  expect(toolResultBlock!.is_error).toBe(false);
142
173
 
143
174
  // Final history: user, assistant(tool_use), user(tool_result), assistant(text)
144
175
  expect(history).toHaveLength(4);
145
- expect(history[3].role).toBe('assistant');
146
- expect(history[3].content).toEqual([{ type: 'text', text: 'File contents received.' }]);
176
+ expect(history[3].role).toBe("assistant");
177
+ expect(history[3].content).toEqual([
178
+ { type: "text", text: "File contents received." },
179
+ ]);
147
180
  });
148
181
 
149
182
  // 3. Multi-turn tool loop
150
- test('supports multi-turn tool execution', async () => {
183
+ test("supports multi-turn tool execution", async () => {
151
184
  const { provider, calls } = createMockProvider([
152
- toolUseResponse('t1', 'read_file', { path: '/a.txt' }),
153
- toolUseResponse('t2', 'read_file', { path: '/b.txt' }),
154
- textResponse('Done reading both files.'),
185
+ toolUseResponse("t1", "read_file", { path: "/a.txt" }),
186
+ toolUseResponse("t2", "read_file", { path: "/b.txt" }),
187
+ textResponse("Done reading both files."),
155
188
  ]);
156
189
 
157
- const toolExecutor = async (name: string, input: Record<string, unknown>) => {
158
- return { content: `contents of ${(input as { path: string }).path}`, isError: false };
190
+ const toolExecutor = async (
191
+ name: string,
192
+ input: Record<string, unknown>,
193
+ ) => {
194
+ return {
195
+ content: `contents of ${(input as { path: string }).path}`,
196
+ isError: false,
197
+ };
159
198
  };
160
199
 
161
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
200
+ const loop = new AgentLoop(
201
+ provider,
202
+ "system",
203
+ {},
204
+ dummyTools,
205
+ toolExecutor,
206
+ );
162
207
  const history = await loop.run([userMessage], () => {});
163
208
 
164
209
  // Provider called 3 times (two tool rounds + final text)
@@ -166,35 +211,37 @@ describe('AgentLoop', () => {
166
211
 
167
212
  // History: user, assistant(t1), user(result1), assistant(t2), user(result2), assistant(text)
168
213
  expect(history).toHaveLength(6);
169
- expect(history[5].content).toEqual([{ type: 'text', text: 'Done reading both files.' }]);
214
+ expect(history[5].content).toEqual([
215
+ { type: "text", text: "Done reading both files." },
216
+ ]);
170
217
  });
171
218
 
172
219
  // 4. Loop stops when provider returns tool_use but no executor is configured
173
- test('stops when tool_use returned but no tool executor configured', async () => {
220
+ test("stops when tool_use returned but no tool executor configured", async () => {
174
221
  const { provider } = createMockProvider([
175
- toolUseResponse('t1', 'read_file', { path: '/a.txt' }),
222
+ toolUseResponse("t1", "read_file", { path: "/a.txt" }),
176
223
  ]);
177
224
 
178
225
  // No tool executor provided
179
- const loop = new AgentLoop(provider, 'system', {}, dummyTools);
226
+ const loop = new AgentLoop(provider, "system", {}, dummyTools);
180
227
  const history = await loop.run([userMessage], () => {});
181
228
 
182
229
  // Should stop after first response (no executor to handle tool use)
183
230
  expect(history).toHaveLength(2);
184
- expect(history[1].role).toBe('assistant');
231
+ expect(history[1].role).toBe("assistant");
185
232
  });
186
233
 
187
234
  // 5. Error handling — provider throws, verify error event and loop stops
188
- test('emits error event and stops when provider throws', async () => {
189
- const error = new Error('API rate limit exceeded');
235
+ test("emits error event and stops when provider throws", async () => {
236
+ const error = new Error("API rate limit exceeded");
190
237
  const provider: Provider = {
191
- name: 'mock',
238
+ name: "mock",
192
239
  async sendMessage(): Promise<ProviderResponse> {
193
240
  throw error;
194
241
  },
195
242
  };
196
243
 
197
- const loop = new AgentLoop(provider, 'system');
244
+ const loop = new AgentLoop(provider, "system");
198
245
  const events: AgentEvent[] = [];
199
246
  const history = await loop.run([userMessage], collectEvents(events));
200
247
 
@@ -202,32 +249,34 @@ describe('AgentLoop', () => {
202
249
  expect(history).toHaveLength(1);
203
250
 
204
251
  // Error event was emitted
205
- const errorEvents = events.filter((e) => e.type === 'error');
252
+ const errorEvents = events.filter((e) => e.type === "error");
206
253
  expect(errorEvents).toHaveLength(1);
207
- expect((errorEvents[0] as { type: 'error'; error: Error }).error.message).toBe('API rate limit exceeded');
254
+ expect(
255
+ (errorEvents[0] as { type: "error"; error: Error }).error.message,
256
+ ).toBe("API rate limit exceeded");
208
257
  });
209
258
 
210
259
  // 6. Abort signal — verify the loop respects AbortSignal
211
- test('stops when abort signal is triggered before provider call', async () => {
260
+ test("stops when abort signal is triggered before provider call", async () => {
212
261
  const controller = new AbortController();
213
262
  controller.abort(); // abort immediately
214
263
 
215
- const { provider } = createMockProvider([textResponse('Should not reach')]);
216
- const loop = new AgentLoop(provider, 'system');
264
+ const { provider } = createMockProvider([textResponse("Should not reach")]);
265
+ const loop = new AgentLoop(provider, "system");
217
266
  const history = await loop.run([userMessage], () => {}, controller.signal);
218
267
 
219
268
  // Loop should exit immediately, returning only original messages
220
269
  expect(history).toHaveLength(1);
221
270
  });
222
271
 
223
- test('stops when abort signal is triggered between turns', async () => {
272
+ test("stops when abort signal is triggered between turns", async () => {
224
273
  const controller = new AbortController();
225
274
  let turnCount = 0;
226
275
 
227
276
  const { provider } = createMockProvider([
228
- toolUseResponse('t1', 'read_file', { path: '/a.txt' }),
229
- toolUseResponse('t2', 'read_file', { path: '/b.txt' }),
230
- textResponse('Should not reach'),
277
+ toolUseResponse("t1", "read_file", { path: "/a.txt" }),
278
+ toolUseResponse("t2", "read_file", { path: "/b.txt" }),
279
+ textResponse("Should not reach"),
231
280
  ]);
232
281
 
233
282
  const toolExecutor = async () => {
@@ -236,10 +285,16 @@ describe('AgentLoop', () => {
236
285
  // Abort after the first tool turn completes
237
286
  controller.abort();
238
287
  }
239
- return { content: 'data', isError: false };
288
+ return { content: "data", isError: false };
240
289
  };
241
290
 
242
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
291
+ const loop = new AgentLoop(
292
+ provider,
293
+ "system",
294
+ {},
295
+ dummyTools,
296
+ toolExecutor,
297
+ );
243
298
  const history = await loop.run([userMessage], () => {}, controller.signal);
244
299
 
245
300
  // After the first tool turn, abort fires. The while loop checks signal at the
@@ -250,20 +305,24 @@ describe('AgentLoop', () => {
250
305
  expect(history.length).toBeLessThanOrEqual(4);
251
306
 
252
307
  // Verify the loop didn't reach the final text response
253
- const lastAssistant = [...history].reverse().find(m => m.role === 'assistant');
308
+ const lastAssistant = [...history]
309
+ .reverse()
310
+ .find((m) => m.role === "assistant");
254
311
  expect(lastAssistant).toBeDefined();
255
- const hasToolUse = lastAssistant!.content.some(b => b.type === 'tool_use');
312
+ const hasToolUse = lastAssistant!.content.some(
313
+ (b) => b.type === "tool_use",
314
+ );
256
315
  // The last assistant message should be a tool_use, not the final text
257
316
  expect(hasToolUse).toBe(true);
258
317
  });
259
318
 
260
319
  // 6b. Abort signal during long-running tool execution — loop exits immediately
261
- test('stops immediately when abort fires during a stuck tool execution', async () => {
320
+ test("stops immediately when abort fires during a stuck tool execution", async () => {
262
321
  const controller = new AbortController();
263
322
 
264
323
  const { provider } = createMockProvider([
265
- toolUseResponse('t1', 'read_file', { path: '/stuck.txt' }),
266
- textResponse('Should not reach'),
324
+ toolUseResponse("t1", "read_file", { path: "/stuck.txt" }),
325
+ textResponse("Should not reach"),
267
326
  ]);
268
327
 
269
328
  // Simulate a stuck tool that never resolves — abort fires while it's running
@@ -271,11 +330,17 @@ describe('AgentLoop', () => {
271
330
  // Abort from a timer while this tool is "stuck"
272
331
  setTimeout(() => controller.abort(), 50);
273
332
  // Simulate being stuck for a long time
274
- await new Promise(resolve => setTimeout(resolve, 10_000));
275
- return { content: 'should never return', isError: false };
333
+ await new Promise((resolve) => setTimeout(resolve, 10_000));
334
+ return { content: "should never return", isError: false };
276
335
  };
277
336
 
278
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
337
+ const loop = new AgentLoop(
338
+ provider,
339
+ "system",
340
+ {},
341
+ dummyTools,
342
+ toolExecutor,
343
+ );
279
344
  const start = Date.now();
280
345
  const history = await loop.run([userMessage], () => {}, controller.signal);
281
346
  const elapsed = Date.now() - start;
@@ -286,96 +351,142 @@ describe('AgentLoop', () => {
286
351
  // User message + assistant tool_use + synthesized cancellation tool_result
287
352
  expect(history).toHaveLength(3);
288
353
  const lastMsg = history[2];
289
- expect(lastMsg.role).toBe('user');
354
+ expect(lastMsg.role).toBe("user");
290
355
  expect(lastMsg.content).toHaveLength(1);
291
- expect(lastMsg.content[0].type).toBe('tool_result');
292
- expect((lastMsg.content[0] as { type: 'tool_result'; tool_use_id: string; content: string; is_error: boolean }).content).toBe('Cancelled by user');
293
- expect((lastMsg.content[0] as { type: 'tool_result'; tool_use_id: string; content: string; is_error: boolean }).is_error).toBe(true);
356
+ expect(lastMsg.content[0].type).toBe("tool_result");
357
+ expect(
358
+ (
359
+ lastMsg.content[0] as {
360
+ type: "tool_result";
361
+ tool_use_id: string;
362
+ content: string;
363
+ is_error: boolean;
364
+ }
365
+ ).content,
366
+ ).toBe("Cancelled by user");
367
+ expect(
368
+ (
369
+ lastMsg.content[0] as {
370
+ type: "tool_result";
371
+ tool_use_id: string;
372
+ content: string;
373
+ is_error: boolean;
374
+ }
375
+ ).is_error,
376
+ ).toBe(true);
294
377
  });
295
378
 
296
379
  // 7. Events — verify text_delta and other events are emitted
297
- test('emits text_delta events during streaming', async () => {
298
- const { provider } = createMockProvider([textResponse('Hello world')]);
299
- const loop = new AgentLoop(provider, 'system');
380
+ test("emits text_delta events during streaming", async () => {
381
+ const { provider } = createMockProvider([textResponse("Hello world")]);
382
+ const loop = new AgentLoop(provider, "system");
300
383
 
301
384
  const events: AgentEvent[] = [];
302
385
  await loop.run([userMessage], collectEvents(events));
303
386
 
304
- const textDeltas = events.filter((e) => e.type === 'text_delta');
387
+ const textDeltas = events.filter((e) => e.type === "text_delta");
305
388
  expect(textDeltas).toHaveLength(1);
306
- expect((textDeltas[0] as { type: 'text_delta'; text: string }).text).toBe('Hello world');
389
+ expect((textDeltas[0] as { type: "text_delta"; text: string }).text).toBe(
390
+ "Hello world",
391
+ );
307
392
  });
308
393
 
309
- test('emits usage events', async () => {
310
- const { provider } = createMockProvider([textResponse('Hi')]);
311
- const loop = new AgentLoop(provider, 'system');
394
+ test("emits usage events", async () => {
395
+ const { provider } = createMockProvider([textResponse("Hi")]);
396
+ const loop = new AgentLoop(provider, "system");
312
397
 
313
398
  const events: AgentEvent[] = [];
314
399
  await loop.run([userMessage], collectEvents(events));
315
400
 
316
- const usageEvents = events.filter((e) => e.type === 'usage');
401
+ const usageEvents = events.filter((e) => e.type === "usage");
317
402
  expect(usageEvents).toHaveLength(1);
318
- const usage = usageEvents[0] as Extract<AgentEvent, { type: 'usage' }>;
319
- expect(usage.type).toBe('usage');
403
+ const usage = usageEvents[0] as Extract<AgentEvent, { type: "usage" }>;
404
+ expect(usage.type).toBe("usage");
320
405
  expect(usage.inputTokens).toBe(10);
321
406
  expect(usage.outputTokens).toBe(5);
322
- expect(usage.model).toBe('mock-model');
323
- expect(typeof usage.providerDurationMs).toBe('number');
407
+ expect(usage.model).toBe("mock-model");
408
+ expect(typeof usage.providerDurationMs).toBe("number");
324
409
  expect(usage.providerDurationMs).toBeGreaterThanOrEqual(0);
325
410
  });
326
411
 
327
- test('emits message_complete events', async () => {
328
- const { provider } = createMockProvider([textResponse('Done')]);
329
- const loop = new AgentLoop(provider, 'system');
412
+ test("emits message_complete events", async () => {
413
+ const { provider } = createMockProvider([textResponse("Done")]);
414
+ const loop = new AgentLoop(provider, "system");
330
415
 
331
416
  const events: AgentEvent[] = [];
332
417
  await loop.run([userMessage], collectEvents(events));
333
418
 
334
- const completeEvents = events.filter((e) => e.type === 'message_complete');
419
+ const completeEvents = events.filter((e) => e.type === "message_complete");
335
420
  expect(completeEvents).toHaveLength(1);
336
- expect((completeEvents[0] as { type: 'message_complete'; message: Message }).message.role).toBe('assistant');
421
+ expect(
422
+ (completeEvents[0] as { type: "message_complete"; message: Message })
423
+ .message.role,
424
+ ).toBe("assistant");
337
425
  });
338
426
 
339
- test('emits tool_use and tool_result events during tool execution', async () => {
427
+ test("emits tool_use and tool_result events during tool execution", async () => {
340
428
  const { provider } = createMockProvider([
341
- toolUseResponse('t1', 'read_file', { path: '/test.txt' }),
342
- textResponse('Done'),
429
+ toolUseResponse("t1", "read_file", { path: "/test.txt" }),
430
+ textResponse("Done"),
343
431
  ]);
344
432
 
345
- const toolExecutor = async () => ({ content: 'file data', isError: false });
346
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
433
+ const toolExecutor = async () => ({ content: "file data", isError: false });
434
+ const loop = new AgentLoop(
435
+ provider,
436
+ "system",
437
+ {},
438
+ dummyTools,
439
+ toolExecutor,
440
+ );
347
441
 
348
442
  const events: AgentEvent[] = [];
349
443
  await loop.run([userMessage], collectEvents(events));
350
444
 
351
- const toolUseEvents = events.filter((e) => e.type === 'tool_use');
445
+ const toolUseEvents = events.filter((e) => e.type === "tool_use");
352
446
  expect(toolUseEvents).toHaveLength(1);
353
447
  expect(toolUseEvents[0]).toEqual({
354
- type: 'tool_use',
355
- id: 't1',
356
- name: 'read_file',
357
- input: { path: '/test.txt' },
448
+ type: "tool_use",
449
+ id: "t1",
450
+ name: "read_file",
451
+ input: { path: "/test.txt" },
358
452
  });
359
453
 
360
- const toolResultEvents = events.filter((e) => e.type === 'tool_result');
454
+ const toolResultEvents = events.filter((e) => e.type === "tool_result");
361
455
  expect(toolResultEvents).toHaveLength(1);
362
- expect((toolResultEvents[0] as Extract<AgentEvent, { type: 'tool_result' }>).toolUseId).toBe('t1');
363
- expect((toolResultEvents[0] as Extract<AgentEvent, { type: 'tool_result' }>).content).toBe('file data');
364
- expect((toolResultEvents[0] as Extract<AgentEvent, { type: 'tool_result' }>).isError).toBe(false);
456
+ expect(
457
+ (toolResultEvents[0] as Extract<AgentEvent, { type: "tool_result" }>)
458
+ .toolUseId,
459
+ ).toBe("t1");
460
+ expect(
461
+ (toolResultEvents[0] as Extract<AgentEvent, { type: "tool_result" }>)
462
+ .content,
463
+ ).toBe("file data");
464
+ expect(
465
+ (toolResultEvents[0] as Extract<AgentEvent, { type: "tool_result" }>)
466
+ .isError,
467
+ ).toBe(false);
365
468
  });
366
469
 
367
470
  // 8. Progress reminder injection every 5 tool-use turns
368
- test('injects progress reminder after every 5 tool-use turns', async () => {
471
+ test("injects progress reminder after every 5 tool-use turns", async () => {
369
472
  // Create 6 tool responses followed by a text response
370
473
  const responses: ProviderResponse[] = [];
371
474
  for (let i = 0; i < 6; i++) {
372
- responses.push(toolUseResponse(`t${i}`, 'read_file', { path: `/file${i}.txt` }));
475
+ responses.push(
476
+ toolUseResponse(`t${i}`, "read_file", { path: `/file${i}.txt` }),
477
+ );
373
478
  }
374
- responses.push(textResponse('Finally done'));
479
+ responses.push(textResponse("Finally done"));
375
480
 
376
481
  const { provider, calls } = createMockProvider(responses);
377
- const toolExecutor = async () => ({ content: 'data', isError: false });
378
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
482
+ const toolExecutor = async () => ({ content: "data", isError: false });
483
+ const loop = new AgentLoop(
484
+ provider,
485
+ "system",
486
+ {},
487
+ dummyTools,
488
+ toolExecutor,
489
+ );
379
490
 
380
491
  await loop.run([userMessage], () => {});
381
492
 
@@ -383,24 +494,24 @@ describe('AgentLoop', () => {
383
494
  // calls[5] is the 6th provider call; its messages[-1] should have the reminder
384
495
  const fifthTurnResultMsg = calls[5].messages[calls[5].messages.length - 1];
385
496
  const reminderBlock = fifthTurnResultMsg.content.find(
386
- (b): b is Extract<ContentBlock, { type: 'text' }> =>
387
- b.type === 'text' && b.text.includes('making meaningful progress'),
497
+ (b): b is Extract<ContentBlock, { type: "text" }> =>
498
+ b.type === "text" && b.text.includes("making meaningful progress"),
388
499
  );
389
500
  expect(reminderBlock).toBeDefined();
390
501
  });
391
502
 
392
- test('stops after configured maxToolUseTurns to prevent runaway loops', async () => {
503
+ test("stops after configured maxToolUseTurns to prevent runaway loops", async () => {
393
504
  const responses: ProviderResponse[] = [
394
- toolUseResponse('t1', 'read_file', { path: '/one.txt' }),
395
- toolUseResponse('t2', 'read_file', { path: '/two.txt' }),
396
- toolUseResponse('t3', 'read_file', { path: '/three.txt' }),
397
- textResponse('Should never be requested'),
505
+ toolUseResponse("t1", "read_file", { path: "/one.txt" }),
506
+ toolUseResponse("t2", "read_file", { path: "/two.txt" }),
507
+ toolUseResponse("t3", "read_file", { path: "/three.txt" }),
508
+ textResponse("Should never be requested"),
398
509
  ];
399
510
  const { provider, calls } = createMockProvider(responses);
400
- const toolExecutor = async () => ({ content: 'data', isError: false });
511
+ const toolExecutor = async () => ({ content: "data", isError: false });
401
512
  const loop = new AgentLoop(
402
513
  provider,
403
- 'system',
514
+ "system",
404
515
  { maxToolUseTurns: 3 },
405
516
  dummyTools,
406
517
  toolExecutor,
@@ -413,32 +524,36 @@ describe('AgentLoop', () => {
413
524
  expect(calls).toHaveLength(3);
414
525
 
415
526
  const errorEvents = events.filter(
416
- (e): e is Extract<AgentEvent, { type: 'error' }> => e.type === 'error',
527
+ (e): e is Extract<AgentEvent, { type: "error" }> => e.type === "error",
417
528
  );
418
529
  expect(errorEvents).toHaveLength(1);
419
- expect(errorEvents[0].error.message).toContain('Tool-use turn limit reached (3)');
530
+ expect(errorEvents[0].error.message).toContain(
531
+ "Tool-use turn limit reached (3)",
532
+ );
420
533
 
421
534
  const lastMessage = history[history.length - 1];
422
- expect(lastMessage.role).toBe('user');
535
+ expect(lastMessage.role).toBe("user");
423
536
  const limitText = lastMessage.content.find(
424
- (b): b is Extract<ContentBlock, { type: 'text' }> =>
425
- b.type === 'text' && b.text.includes('Tool-use turn limit reached (3)'),
537
+ (b): b is Extract<ContentBlock, { type: "text" }> =>
538
+ b.type === "text" && b.text.includes("Tool-use turn limit reached (3)"),
426
539
  );
427
540
  expect(limitText).toBeDefined();
428
541
  });
429
542
 
430
- test('injects approaching-limit warning before the hard stop', async () => {
543
+ test("injects approaching-limit warning before the hard stop", async () => {
431
544
  // maxToolUseTurns: 8, soft warning at turn 3 (8 - 5 = 3)
432
545
  const responses: ProviderResponse[] = [];
433
546
  for (let i = 0; i < 8; i++) {
434
- responses.push(toolUseResponse(`t${i}`, 'read_file', { path: `/${i}.txt` }));
547
+ responses.push(
548
+ toolUseResponse(`t${i}`, "read_file", { path: `/${i}.txt` }),
549
+ );
435
550
  }
436
- responses.push(textResponse('done'));
551
+ responses.push(textResponse("done"));
437
552
  const { provider, calls } = createMockProvider(responses);
438
- const toolExecutor = async () => ({ content: 'data', isError: false });
553
+ const toolExecutor = async () => ({ content: "data", isError: false });
439
554
  const loop = new AgentLoop(
440
555
  provider,
441
- 'system',
556
+ "system",
442
557
  { maxToolUseTurns: 8 },
443
558
  dummyTools,
444
559
  toolExecutor,
@@ -455,25 +570,28 @@ describe('AgentLoop', () => {
455
570
  const turn4Messages = calls[3].messages;
456
571
  const lastMsg = turn4Messages[turn4Messages.length - 1];
457
572
  const warningBlock = lastMsg.content.find(
458
- (b): b is Extract<ContentBlock, { type: 'text' }> =>
459
- b.type === 'text' && b.text.includes('approaching the tool-use turn limit'),
573
+ (b): b is Extract<ContentBlock, { type: "text" }> =>
574
+ b.type === "text" &&
575
+ b.text.includes("approaching the tool-use turn limit"),
460
576
  );
461
577
  expect(warningBlock).toBeDefined();
462
578
  });
463
579
 
464
- test('runs without limit when maxToolUseTurns is 0', async () => {
580
+ test("runs without limit when maxToolUseTurns is 0", async () => {
465
581
  // Use 20 turns (beyond old default of 8 used in other tests) to verify no cap
466
582
  const turnCount = 20;
467
583
  const responses: ProviderResponse[] = [];
468
584
  for (let i = 0; i < turnCount; i++) {
469
- responses.push(toolUseResponse(`t${i}`, 'read_file', { path: `/${i}.txt` }));
585
+ responses.push(
586
+ toolUseResponse(`t${i}`, "read_file", { path: `/${i}.txt` }),
587
+ );
470
588
  }
471
- responses.push(textResponse('done'));
589
+ responses.push(textResponse("done"));
472
590
  const { provider, calls } = createMockProvider(responses);
473
- const toolExecutor = async () => ({ content: 'data', isError: false });
591
+ const toolExecutor = async () => ({ content: "data", isError: false });
474
592
  const loop = new AgentLoop(
475
593
  provider,
476
- 'system',
594
+ "system",
477
595
  { maxToolUseTurns: 0, minTurnIntervalMs: 0 },
478
596
  dummyTools,
479
597
  toolExecutor,
@@ -487,7 +605,7 @@ describe('AgentLoop', () => {
487
605
 
488
606
  // No hard-limit error events should have been emitted
489
607
  const errorEvents = events.filter(
490
- (e): e is Extract<AgentEvent, { type: 'error' }> => e.type === 'error',
608
+ (e): e is Extract<AgentEvent, { type: "error" }> => e.type === "error",
491
609
  );
492
610
  expect(errorEvents).toHaveLength(0);
493
611
 
@@ -495,8 +613,8 @@ describe('AgentLoop', () => {
495
613
  const progressChecks = calls.filter((call) => {
496
614
  const lastMsg = call.messages[call.messages.length - 1];
497
615
  return lastMsg.content.some(
498
- (b): b is Extract<ContentBlock, { type: 'text' }> =>
499
- b.type === 'text' && b.text.includes('making meaningful progress'),
616
+ (b): b is Extract<ContentBlock, { type: "text" }> =>
617
+ b.type === "text" && b.text.includes("making meaningful progress"),
500
618
  );
501
619
  });
502
620
  expect(progressChecks.length).toBeGreaterThanOrEqual(3);
@@ -505,40 +623,51 @@ describe('AgentLoop', () => {
505
623
  const limitWarnings = calls.filter((call) => {
506
624
  const lastMsg = call.messages[call.messages.length - 1];
507
625
  return lastMsg.content.some(
508
- (b): b is Extract<ContentBlock, { type: 'text' }> =>
509
- b.type === 'text' && b.text.includes('approaching the tool-use turn limit'),
626
+ (b): b is Extract<ContentBlock, { type: "text" }> =>
627
+ b.type === "text" &&
628
+ b.text.includes("approaching the tool-use turn limit"),
510
629
  );
511
630
  });
512
631
  expect(limitWarnings).toHaveLength(0);
513
632
  });
514
633
 
515
634
  // 9. Tool executor error results are forwarded correctly
516
- test('forwards tool error results to provider', async () => {
635
+ test("forwards tool error results to provider", async () => {
517
636
  const { provider, calls } = createMockProvider([
518
- toolUseResponse('t1', 'read_file', { path: '/nonexistent.txt' }),
519
- textResponse('File not found, sorry.'),
637
+ toolUseResponse("t1", "read_file", { path: "/nonexistent.txt" }),
638
+ textResponse("File not found, sorry."),
520
639
  ]);
521
640
 
522
- const toolExecutor = async () => ({ content: 'ENOENT: file not found', isError: true });
523
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
641
+ const toolExecutor = async () => ({
642
+ content: "ENOENT: file not found",
643
+ isError: true,
644
+ });
645
+ const loop = new AgentLoop(
646
+ provider,
647
+ "system",
648
+ {},
649
+ dummyTools,
650
+ toolExecutor,
651
+ );
524
652
 
525
653
  await loop.run([userMessage], () => {});
526
654
 
527
655
  const secondCallMessages = calls[1].messages;
528
656
  const lastMsg = secondCallMessages[secondCallMessages.length - 1];
529
657
  const toolResultBlock = lastMsg.content.find(
530
- (b): b is Extract<ContentBlock, { type: 'tool_result' }> => b.type === 'tool_result',
658
+ (b): b is Extract<ContentBlock, { type: "tool_result" }> =>
659
+ b.type === "tool_result",
531
660
  );
532
661
  expect(toolResultBlock).toBeDefined();
533
662
  expect(toolResultBlock!.is_error).toBe(true);
534
- expect(toolResultBlock!.content).toBe('ENOENT: file not found');
663
+ expect(toolResultBlock!.content).toBe("ENOENT: file not found");
535
664
  });
536
665
 
537
666
  // 10. Tool output chunks are forwarded via onEvent
538
- test('emits tool_output_chunk events during tool execution', async () => {
667
+ test("emits tool_output_chunk events during tool execution", async () => {
539
668
  const { provider } = createMockProvider([
540
- toolUseResponse('t1', 'read_file', { path: '/test.txt' }),
541
- textResponse('Done'),
669
+ toolUseResponse("t1", "read_file", { path: "/test.txt" }),
670
+ textResponse("Done"),
542
671
  ]);
543
672
 
544
673
  const toolExecutor = async (
@@ -546,36 +675,48 @@ describe('AgentLoop', () => {
546
675
  _input: Record<string, unknown>,
547
676
  onOutput?: (chunk: string) => void,
548
677
  ) => {
549
- onOutput?.('chunk1');
550
- onOutput?.('chunk2');
551
- return { content: 'full output', isError: false };
678
+ onOutput?.("chunk1");
679
+ onOutput?.("chunk2");
680
+ return { content: "full output", isError: false };
552
681
  };
553
682
 
554
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
683
+ const loop = new AgentLoop(
684
+ provider,
685
+ "system",
686
+ {},
687
+ dummyTools,
688
+ toolExecutor,
689
+ );
555
690
  const events: AgentEvent[] = [];
556
691
  await loop.run([userMessage], collectEvents(events));
557
692
 
558
- const chunkEvents = events.filter((e) => e.type === 'tool_output_chunk');
693
+ const chunkEvents = events.filter((e) => e.type === "tool_output_chunk");
559
694
  expect(chunkEvents).toHaveLength(2);
560
- expect((chunkEvents[0] as Extract<AgentEvent, { type: 'tool_output_chunk' }>).chunk).toBe('chunk1');
561
- expect((chunkEvents[1] as Extract<AgentEvent, { type: 'tool_output_chunk' }>).chunk).toBe('chunk2');
695
+ expect(
696
+ (chunkEvents[0] as Extract<AgentEvent, { type: "tool_output_chunk" }>)
697
+ .chunk,
698
+ ).toBe("chunk1");
699
+ expect(
700
+ (chunkEvents[1] as Extract<AgentEvent, { type: "tool_output_chunk" }>)
701
+ .chunk,
702
+ ).toBe("chunk2");
562
703
  });
563
704
 
564
705
  // 11. System prompt and tools are passed to provider
565
- test('passes system prompt and tools to provider', async () => {
566
- const { provider, calls } = createMockProvider([textResponse('Hi')]);
567
- const loop = new AgentLoop(provider, 'My system prompt', {}, dummyTools);
706
+ test("passes system prompt and tools to provider", async () => {
707
+ const { provider, calls } = createMockProvider([textResponse("Hi")]);
708
+ const loop = new AgentLoop(provider, "My system prompt", {}, dummyTools);
568
709
 
569
710
  await loop.run([userMessage], () => {});
570
711
 
571
- expect(calls[0].systemPrompt).toBe('My system prompt');
712
+ expect(calls[0].systemPrompt).toBe("My system prompt");
572
713
  expect(calls[0].tools).toEqual(dummyTools);
573
714
  });
574
715
 
575
716
  // 12. No tools configured — tools are not passed to provider
576
- test('does not pass tools to provider when none are configured', async () => {
577
- const { provider, calls } = createMockProvider([textResponse('Hi')]);
578
- const loop = new AgentLoop(provider, 'system');
717
+ test("does not pass tools to provider when none are configured", async () => {
718
+ const { provider, calls } = createMockProvider([textResponse("Hi")]);
719
+ const loop = new AgentLoop(provider, "system");
579
720
 
580
721
  await loop.run([userMessage], () => {});
581
722
 
@@ -583,33 +724,60 @@ describe('AgentLoop', () => {
583
724
  });
584
725
 
585
726
  // 13. Parallel tool execution — multiple tool_use blocks in a single response
586
- test('executes multiple tools in parallel', async () => {
727
+ test("executes multiple tools in parallel", async () => {
587
728
  const { provider, calls } = createMockProvider([
588
729
  // Provider returns 3 tool_use blocks in a single response
589
730
  {
590
731
  content: [
591
- { type: 'tool_use' as const, id: 't1', name: 'read_file', input: { path: '/a.txt' } },
592
- { type: 'tool_use' as const, id: 't2', name: 'read_file', input: { path: '/b.txt' } },
593
- { type: 'tool_use' as const, id: 't3', name: 'read_file', input: { path: '/c.txt' } },
732
+ {
733
+ type: "tool_use" as const,
734
+ id: "t1",
735
+ name: "read_file",
736
+ input: { path: "/a.txt" },
737
+ },
738
+ {
739
+ type: "tool_use" as const,
740
+ id: "t2",
741
+ name: "read_file",
742
+ input: { path: "/b.txt" },
743
+ },
744
+ {
745
+ type: "tool_use" as const,
746
+ id: "t3",
747
+ name: "read_file",
748
+ input: { path: "/c.txt" },
749
+ },
594
750
  ],
595
- model: 'mock-model',
751
+ model: "mock-model",
596
752
  usage: { inputTokens: 10, outputTokens: 5 },
597
- stopReason: 'tool_use' as const,
753
+ stopReason: "tool_use" as const,
598
754
  },
599
- textResponse('Got all three files.'),
755
+ textResponse("Got all three files."),
600
756
  ]);
601
757
 
602
758
  const executionLog: { path: string; start: number; end: number }[] = [];
603
- const toolExecutor = async (_name: string, input: Record<string, unknown>) => {
759
+ const toolExecutor = async (
760
+ _name: string,
761
+ input: Record<string, unknown>,
762
+ ) => {
604
763
  const start = Date.now();
605
764
  // Simulate async work — all tools should overlap in time
606
- await new Promise(resolve => setTimeout(resolve, 50));
765
+ await new Promise((resolve) => setTimeout(resolve, 50));
607
766
  const end = Date.now();
608
767
  executionLog.push({ path: (input as { path: string }).path, start, end });
609
- return { content: `contents of ${(input as { path: string }).path}`, isError: false };
768
+ return {
769
+ content: `contents of ${(input as { path: string }).path}`,
770
+ isError: false,
771
+ };
610
772
  };
611
773
 
612
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
774
+ const loop = new AgentLoop(
775
+ provider,
776
+ "system",
777
+ {},
778
+ dummyTools,
779
+ toolExecutor,
780
+ );
613
781
  const events: AgentEvent[] = [];
614
782
  const history = await loop.run([userMessage], collectEvents(events));
615
783
 
@@ -618,8 +786,8 @@ describe('AgentLoop', () => {
618
786
 
619
787
  // Verify parallel execution: all tools should start before any finishes
620
788
  // (with 50ms delay each, sequential would take 150ms+, parallel ~50ms)
621
- const allStarts = executionLog.map(e => e.start);
622
- const allEnds = executionLog.map(e => e.end);
789
+ const allStarts = executionLog.map((e) => e.start);
790
+ const allEnds = executionLog.map((e) => e.end);
623
791
  const firstEnd = Math.min(...allEnds);
624
792
  const lastStart = Math.max(...allStarts);
625
793
  // In parallel execution, the last tool starts before the first tool ends
@@ -632,19 +800,21 @@ describe('AgentLoop', () => {
632
800
  const secondCallMessages = calls[1].messages;
633
801
  const lastMsg = secondCallMessages[secondCallMessages.length - 1];
634
802
  const toolResultBlocks = lastMsg.content.filter(
635
- (b): b is Extract<ContentBlock, { type: 'tool_result' }> => b.type === 'tool_result',
803
+ (b): b is Extract<ContentBlock, { type: "tool_result" }> =>
804
+ b.type === "tool_result",
636
805
  );
637
806
  expect(toolResultBlocks).toHaveLength(3);
638
- expect(toolResultBlocks[0].tool_use_id).toBe('t1');
639
- expect(toolResultBlocks[1].tool_use_id).toBe('t2');
640
- expect(toolResultBlocks[2].tool_use_id).toBe('t3');
807
+ expect(toolResultBlocks[0].tool_use_id).toBe("t1");
808
+ expect(toolResultBlocks[1].tool_use_id).toBe("t2");
809
+ expect(toolResultBlocks[2].tool_use_id).toBe("t3");
641
810
 
642
811
  // All tool_use events should be emitted before any tool_result events
643
812
  let lastToolUseIdx = -1;
644
813
  let firstToolResultIdx = events.length;
645
814
  events.forEach((e, i) => {
646
- if (e.type === 'tool_use') lastToolUseIdx = i;
647
- if (e.type === 'tool_result' && i < firstToolResultIdx) firstToolResultIdx = i;
815
+ if (e.type === "tool_use") lastToolUseIdx = i;
816
+ if (e.type === "tool_result" && i < firstToolResultIdx)
817
+ firstToolResultIdx = i;
648
818
  });
649
819
  expect(lastToolUseIdx).toBeLessThan(firstToolResultIdx);
650
820
 
@@ -653,96 +823,151 @@ describe('AgentLoop', () => {
653
823
  });
654
824
 
655
825
  // 14. Abort before parallel tool execution synthesizes cancelled results
656
- test('synthesizes cancelled results when aborted before tool execution', async () => {
826
+ test("synthesizes cancelled results when aborted before tool execution", async () => {
657
827
  const controller = new AbortController();
658
828
 
659
829
  const { provider } = createMockProvider([
660
830
  {
661
831
  content: [
662
- { type: 'tool_use' as const, id: 't1', name: 'read_file', input: { path: '/a.txt' } },
663
- { type: 'tool_use' as const, id: 't2', name: 'read_file', input: { path: '/b.txt' } },
832
+ {
833
+ type: "tool_use" as const,
834
+ id: "t1",
835
+ name: "read_file",
836
+ input: { path: "/a.txt" },
837
+ },
838
+ {
839
+ type: "tool_use" as const,
840
+ id: "t2",
841
+ name: "read_file",
842
+ input: { path: "/b.txt" },
843
+ },
664
844
  ],
665
- model: 'mock-model',
845
+ model: "mock-model",
666
846
  usage: { inputTokens: 10, outputTokens: 5 },
667
- stopReason: 'tool_use' as const,
847
+ stopReason: "tool_use" as const,
668
848
  },
669
849
  ]);
670
850
 
671
851
  // Abort during the provider call so the signal is already aborted
672
852
  // before tool execution begins
673
853
  const originalSendMessage = provider.sendMessage.bind(provider);
674
- provider.sendMessage = async (...args: Parameters<typeof provider.sendMessage>) => {
854
+ provider.sendMessage = async (
855
+ ...args: Parameters<typeof provider.sendMessage>
856
+ ) => {
675
857
  const result = await originalSendMessage(...args);
676
858
  controller.abort();
677
859
  return result;
678
860
  };
679
861
 
680
862
  const toolCalls: string[] = [];
681
- const toolExecutor = async (_name: string, input: Record<string, unknown>) => {
863
+ const toolExecutor = async (
864
+ _name: string,
865
+ input: Record<string, unknown>,
866
+ ) => {
682
867
  toolCalls.push((input as { path: string }).path);
683
- return { content: 'data', isError: false };
868
+ return { content: "data", isError: false };
684
869
  };
685
870
 
686
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
871
+ const loop = new AgentLoop(
872
+ provider,
873
+ "system",
874
+ {},
875
+ dummyTools,
876
+ toolExecutor,
877
+ );
687
878
  const events: AgentEvent[] = [];
688
- const history = await loop.run([userMessage], collectEvents(events), controller.signal);
879
+ const history = await loop.run(
880
+ [userMessage],
881
+ collectEvents(events),
882
+ controller.signal,
883
+ );
689
884
 
690
885
  // No tools should have been executed
691
886
  expect(toolCalls).toHaveLength(0);
692
887
 
693
888
  // History should contain cancelled tool_result blocks
694
889
  const lastMsg = history[history.length - 1];
695
- expect(lastMsg.role).toBe('user');
890
+ expect(lastMsg.role).toBe("user");
696
891
  const toolResultBlocks = lastMsg.content.filter(
697
- (b): b is Extract<ContentBlock, { type: 'tool_result' }> => b.type === 'tool_result',
892
+ (b): b is Extract<ContentBlock, { type: "tool_result" }> =>
893
+ b.type === "tool_result",
698
894
  );
699
895
  expect(toolResultBlocks).toHaveLength(2);
700
- expect(toolResultBlocks[0].tool_use_id).toBe('t1');
701
- expect(toolResultBlocks[0].content).toBe('Cancelled by user');
896
+ expect(toolResultBlocks[0].tool_use_id).toBe("t1");
897
+ expect(toolResultBlocks[0].content).toBe("Cancelled by user");
702
898
  expect(toolResultBlocks[0].is_error).toBe(true);
703
- expect(toolResultBlocks[1].tool_use_id).toBe('t2');
704
- expect(toolResultBlocks[1].content).toBe('Cancelled by user');
899
+ expect(toolResultBlocks[1].tool_use_id).toBe("t2");
900
+ expect(toolResultBlocks[1].content).toBe("Cancelled by user");
705
901
  expect(toolResultBlocks[1].is_error).toBe(true);
706
902
  });
707
903
 
708
904
  // 15. Parallel tool_result events are emitted in deterministic tool_use order
709
- test('emits tool_result events in tool_use order regardless of completion timing', async () => {
905
+ test("emits tool_result events in tool_use order regardless of completion timing", async () => {
710
906
  const { provider } = createMockProvider([
711
907
  {
712
908
  content: [
713
- { type: 'tool_use' as const, id: 't1', name: 'read_file', input: { path: '/slow.txt' } },
714
- { type: 'tool_use' as const, id: 't2', name: 'read_file', input: { path: '/fast.txt' } },
715
- { type: 'tool_use' as const, id: 't3', name: 'read_file', input: { path: '/medium.txt' } },
909
+ {
910
+ type: "tool_use" as const,
911
+ id: "t1",
912
+ name: "read_file",
913
+ input: { path: "/slow.txt" },
914
+ },
915
+ {
916
+ type: "tool_use" as const,
917
+ id: "t2",
918
+ name: "read_file",
919
+ input: { path: "/fast.txt" },
920
+ },
921
+ {
922
+ type: "tool_use" as const,
923
+ id: "t3",
924
+ name: "read_file",
925
+ input: { path: "/medium.txt" },
926
+ },
716
927
  ],
717
- model: 'mock-model',
928
+ model: "mock-model",
718
929
  usage: { inputTokens: 10, outputTokens: 5 },
719
- stopReason: 'tool_use' as const,
930
+ stopReason: "tool_use" as const,
720
931
  },
721
- textResponse('Done'),
932
+ textResponse("Done"),
722
933
  ]);
723
934
 
724
935
  // Tools complete in different order than they were called: t2 first, t3 second, t1 last
725
- const toolExecutor = async (_name: string, input: Record<string, unknown>) => {
936
+ const toolExecutor = async (
937
+ _name: string,
938
+ input: Record<string, unknown>,
939
+ ) => {
726
940
  const path = (input as { path: string }).path;
727
- const delays: Record<string, number> = { '/slow.txt': 80, '/fast.txt': 10, '/medium.txt': 40 };
728
- await new Promise(resolve => setTimeout(resolve, delays[path] ?? 10));
941
+ const delays: Record<string, number> = {
942
+ "/slow.txt": 80,
943
+ "/fast.txt": 10,
944
+ "/medium.txt": 40,
945
+ };
946
+ await new Promise((resolve) => setTimeout(resolve, delays[path] ?? 10));
729
947
  return { content: `contents of ${path}`, isError: false };
730
948
  };
731
949
 
732
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
950
+ const loop = new AgentLoop(
951
+ provider,
952
+ "system",
953
+ {},
954
+ dummyTools,
955
+ toolExecutor,
956
+ );
733
957
  const events: AgentEvent[] = [];
734
958
  await loop.run([userMessage], collectEvents(events));
735
959
 
736
960
  // Collect tool_result events in order
737
961
  const toolResultEvents = events.filter(
738
- (e): e is Extract<AgentEvent, { type: 'tool_result' }> => e.type === 'tool_result',
962
+ (e): e is Extract<AgentEvent, { type: "tool_result" }> =>
963
+ e.type === "tool_result",
739
964
  );
740
965
  expect(toolResultEvents).toHaveLength(3);
741
966
 
742
967
  // Results must be in tool_use order (t1, t2, t3), NOT completion order (t2, t3, t1)
743
- expect(toolResultEvents[0].toolUseId).toBe('t1');
744
- expect(toolResultEvents[1].toolUseId).toBe('t2');
745
- expect(toolResultEvents[2].toolUseId).toBe('t3');
968
+ expect(toolResultEvents[0].toolUseId).toBe("t1");
969
+ expect(toolResultEvents[1].toolUseId).toBe("t2");
970
+ expect(toolResultEvents[2].toolUseId).toBe("t3");
746
971
  });
747
972
 
748
973
  // ---------------------------------------------------------------------------
@@ -750,19 +975,25 @@ describe('AgentLoop', () => {
750
975
  // ---------------------------------------------------------------------------
751
976
 
752
977
  // 16. Checkpoint callback is called after tool results with correct info
753
- test('checkpoint callback is called after tool results with correct info', async () => {
978
+ test("checkpoint callback is called after tool results with correct info", async () => {
754
979
  const { provider } = createMockProvider([
755
- toolUseResponse('t1', 'read_file', { path: '/test.txt' }),
756
- textResponse('Done'),
980
+ toolUseResponse("t1", "read_file", { path: "/test.txt" }),
981
+ textResponse("Done"),
757
982
  ]);
758
983
 
759
- const toolExecutor = async () => ({ content: 'file data', isError: false });
760
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
984
+ const toolExecutor = async () => ({ content: "file data", isError: false });
985
+ const loop = new AgentLoop(
986
+ provider,
987
+ "system",
988
+ {},
989
+ dummyTools,
990
+ toolExecutor,
991
+ );
761
992
 
762
993
  const checkpoints: CheckpointInfo[] = [];
763
994
  const onCheckpoint = (checkpoint: CheckpointInfo): CheckpointDecision => {
764
995
  checkpoints.push(checkpoint);
765
- return 'continue';
996
+ return "continue";
766
997
  };
767
998
 
768
999
  await loop.run([userMessage], () => {}, undefined, undefined, onCheckpoint);
@@ -776,84 +1007,120 @@ describe('AgentLoop', () => {
776
1007
  });
777
1008
 
778
1009
  // 17. Returning 'continue' lets the loop proceed normally
779
- test('checkpoint returning continue lets the loop proceed normally', async () => {
1010
+ test("checkpoint returning continue lets the loop proceed normally", async () => {
780
1011
  const { provider, calls } = createMockProvider([
781
- toolUseResponse('t1', 'read_file', { path: '/a.txt' }),
782
- toolUseResponse('t2', 'read_file', { path: '/b.txt' }),
783
- textResponse('All done'),
1012
+ toolUseResponse("t1", "read_file", { path: "/a.txt" }),
1013
+ toolUseResponse("t2", "read_file", { path: "/b.txt" }),
1014
+ textResponse("All done"),
784
1015
  ]);
785
1016
 
786
- const toolExecutor = async () => ({ content: 'data', isError: false });
787
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
1017
+ const toolExecutor = async () => ({ content: "data", isError: false });
1018
+ const loop = new AgentLoop(
1019
+ provider,
1020
+ "system",
1021
+ {},
1022
+ dummyTools,
1023
+ toolExecutor,
1024
+ );
788
1025
 
789
- const onCheckpoint = (): CheckpointDecision => 'continue';
1026
+ const onCheckpoint = (): CheckpointDecision => "continue";
790
1027
 
791
- const history = await loop.run([userMessage], () => {}, undefined, undefined, onCheckpoint);
1028
+ const history = await loop.run(
1029
+ [userMessage],
1030
+ () => {},
1031
+ undefined,
1032
+ undefined,
1033
+ onCheckpoint,
1034
+ );
792
1035
 
793
1036
  // All 3 provider calls should happen (2 tool turns + final text)
794
1037
  expect(calls).toHaveLength(3);
795
1038
  // Full history: user, assistant(t1), user(result1), assistant(t2), user(result2), assistant(text)
796
1039
  expect(history).toHaveLength(6);
797
- expect(history[5].content).toEqual([{ type: 'text', text: 'All done' }]);
1040
+ expect(history[5].content).toEqual([{ type: "text", text: "All done" }]);
798
1041
  });
799
1042
 
800
1043
  // 18. Returning 'yield' causes the loop to stop after that turn
801
- test('checkpoint returning yield causes the loop to stop', async () => {
1044
+ test("checkpoint returning yield causes the loop to stop", async () => {
802
1045
  const { provider, calls } = createMockProvider([
803
- toolUseResponse('t1', 'read_file', { path: '/a.txt' }),
804
- toolUseResponse('t2', 'read_file', { path: '/b.txt' }),
805
- textResponse('Should not reach'),
1046
+ toolUseResponse("t1", "read_file", { path: "/a.txt" }),
1047
+ toolUseResponse("t2", "read_file", { path: "/b.txt" }),
1048
+ textResponse("Should not reach"),
806
1049
  ]);
807
1050
 
808
- const toolExecutor = async () => ({ content: 'data', isError: false });
809
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
1051
+ const toolExecutor = async () => ({ content: "data", isError: false });
1052
+ const loop = new AgentLoop(
1053
+ provider,
1054
+ "system",
1055
+ {},
1056
+ dummyTools,
1057
+ toolExecutor,
1058
+ );
810
1059
 
811
- const onCheckpoint = (): CheckpointDecision => 'yield';
1060
+ const onCheckpoint = (): CheckpointDecision => "yield";
812
1061
 
813
- const history = await loop.run([userMessage], () => {}, undefined, undefined, onCheckpoint);
1062
+ const history = await loop.run(
1063
+ [userMessage],
1064
+ () => {},
1065
+ undefined,
1066
+ undefined,
1067
+ onCheckpoint,
1068
+ );
814
1069
 
815
1070
  // Only 1 provider call should happen — loop yields after first tool turn
816
1071
  expect(calls).toHaveLength(1);
817
1072
  // History: user, assistant(t1), user(result1)
818
1073
  expect(history).toHaveLength(3);
819
- expect(history[1].role).toBe('assistant');
820
- expect(history[2].role).toBe('user');
1074
+ expect(history[1].role).toBe("assistant");
1075
+ expect(history[2].role).toBe("user");
821
1076
  });
822
1077
 
823
1078
  // 19. Without a checkpoint callback, behavior is unchanged
824
- test('without checkpoint callback behavior is unchanged', async () => {
1079
+ test("without checkpoint callback behavior is unchanged", async () => {
825
1080
  const { provider, calls } = createMockProvider([
826
- toolUseResponse('t1', 'read_file', { path: '/a.txt' }),
827
- textResponse('Done'),
1081
+ toolUseResponse("t1", "read_file", { path: "/a.txt" }),
1082
+ textResponse("Done"),
828
1083
  ]);
829
1084
 
830
- const toolExecutor = async () => ({ content: 'data', isError: false });
831
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
1085
+ const toolExecutor = async () => ({ content: "data", isError: false });
1086
+ const loop = new AgentLoop(
1087
+ provider,
1088
+ "system",
1089
+ {},
1090
+ dummyTools,
1091
+ toolExecutor,
1092
+ );
832
1093
 
833
1094
  const history = await loop.run([userMessage], () => {});
834
1095
 
835
1096
  // Normal behavior: 2 provider calls, full history
836
1097
  expect(calls).toHaveLength(2);
837
1098
  expect(history).toHaveLength(4);
838
- expect(history[3].content).toEqual([{ type: 'text', text: 'Done' }]);
1099
+ expect(history[3].content).toEqual([{ type: "text", text: "Done" }]);
839
1100
  });
840
1101
 
841
1102
  // 20. turnIndex increments correctly across turns
842
- test('turnIndex increments correctly across multiple turns', async () => {
1103
+ test("turnIndex increments correctly across multiple turns", async () => {
843
1104
  const { provider } = createMockProvider([
844
- toolUseResponse('t1', 'read_file', { path: '/a.txt' }),
845
- toolUseResponse('t2', 'read_file', { path: '/b.txt' }),
846
- toolUseResponse('t3', 'read_file', { path: '/c.txt' }),
847
- textResponse('Done'),
1105
+ toolUseResponse("t1", "read_file", { path: "/a.txt" }),
1106
+ toolUseResponse("t2", "read_file", { path: "/b.txt" }),
1107
+ toolUseResponse("t3", "read_file", { path: "/c.txt" }),
1108
+ textResponse("Done"),
848
1109
  ]);
849
1110
 
850
- const toolExecutor = async () => ({ content: 'data', isError: false });
851
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
1111
+ const toolExecutor = async () => ({ content: "data", isError: false });
1112
+ const loop = new AgentLoop(
1113
+ provider,
1114
+ "system",
1115
+ {},
1116
+ dummyTools,
1117
+ toolExecutor,
1118
+ );
852
1119
 
853
1120
  const checkpoints: CheckpointInfo[] = [];
854
1121
  const onCheckpoint = (checkpoint: CheckpointInfo): CheckpointDecision => {
855
1122
  checkpoints.push(checkpoint);
856
- return 'continue';
1123
+ return "continue";
857
1124
  };
858
1125
 
859
1126
  await loop.run([userMessage], () => {}, undefined, undefined, onCheckpoint);
@@ -865,48 +1132,79 @@ describe('AgentLoop', () => {
865
1132
  });
866
1133
 
867
1134
  // 21. Checkpoint is NOT called when there's no tool use
868
- test('checkpoint is not called when assistant responds with text only', async () => {
869
- const { provider } = createMockProvider([textResponse('Just a text response')]);
870
- const loop = new AgentLoop(provider, 'system', {}, dummyTools);
1135
+ test("checkpoint is not called when assistant responds with text only", async () => {
1136
+ const { provider } = createMockProvider([
1137
+ textResponse("Just a text response"),
1138
+ ]);
1139
+ const loop = new AgentLoop(provider, "system", {}, dummyTools);
871
1140
 
872
1141
  const checkpoints: CheckpointInfo[] = [];
873
1142
  const onCheckpoint = (checkpoint: CheckpointInfo): CheckpointDecision => {
874
1143
  checkpoints.push(checkpoint);
875
- return 'continue';
1144
+ return "continue";
876
1145
  };
877
1146
 
878
- const history = await loop.run([userMessage], () => {}, undefined, undefined, onCheckpoint);
1147
+ const history = await loop.run(
1148
+ [userMessage],
1149
+ () => {},
1150
+ undefined,
1151
+ undefined,
1152
+ onCheckpoint,
1153
+ );
879
1154
 
880
1155
  // Checkpoint should never be called for a text-only response
881
1156
  expect(checkpoints).toHaveLength(0);
882
1157
  // Normal response
883
1158
  expect(history).toHaveLength(2);
884
- expect(history[1].content).toEqual([{ type: 'text', text: 'Just a text response' }]);
1159
+ expect(history[1].content).toEqual([
1160
+ { type: "text", text: "Just a text response" },
1161
+ ]);
885
1162
  });
886
1163
 
887
1164
  // 22. Checkpoint reports correct toolCount for parallel tool execution
888
- test('checkpoint reports correct toolCount for parallel tools', async () => {
1165
+ test("checkpoint reports correct toolCount for parallel tools", async () => {
889
1166
  const { provider } = createMockProvider([
890
1167
  {
891
1168
  content: [
892
- { type: 'tool_use' as const, id: 't1', name: 'read_file', input: { path: '/a.txt' } },
893
- { type: 'tool_use' as const, id: 't2', name: 'read_file', input: { path: '/b.txt' } },
894
- { type: 'tool_use' as const, id: 't3', name: 'read_file', input: { path: '/c.txt' } },
1169
+ {
1170
+ type: "tool_use" as const,
1171
+ id: "t1",
1172
+ name: "read_file",
1173
+ input: { path: "/a.txt" },
1174
+ },
1175
+ {
1176
+ type: "tool_use" as const,
1177
+ id: "t2",
1178
+ name: "read_file",
1179
+ input: { path: "/b.txt" },
1180
+ },
1181
+ {
1182
+ type: "tool_use" as const,
1183
+ id: "t3",
1184
+ name: "read_file",
1185
+ input: { path: "/c.txt" },
1186
+ },
895
1187
  ],
896
- model: 'mock-model',
1188
+ model: "mock-model",
897
1189
  usage: { inputTokens: 10, outputTokens: 5 },
898
- stopReason: 'tool_use' as const,
1190
+ stopReason: "tool_use" as const,
899
1191
  },
900
- textResponse('Got all three'),
1192
+ textResponse("Got all three"),
901
1193
  ]);
902
1194
 
903
- const toolExecutor = async () => ({ content: 'data', isError: false });
904
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
1195
+ const toolExecutor = async () => ({ content: "data", isError: false });
1196
+ const loop = new AgentLoop(
1197
+ provider,
1198
+ "system",
1199
+ {},
1200
+ dummyTools,
1201
+ toolExecutor,
1202
+ );
905
1203
 
906
1204
  const checkpoints: CheckpointInfo[] = [];
907
1205
  const onCheckpoint = (checkpoint: CheckpointInfo): CheckpointDecision => {
908
1206
  checkpoints.push(checkpoint);
909
- return 'continue';
1207
+ return "continue";
910
1208
  };
911
1209
 
912
1210
  await loop.run([userMessage], () => {}, undefined, undefined, onCheckpoint);
@@ -917,27 +1215,41 @@ describe('AgentLoop', () => {
917
1215
  });
918
1216
 
919
1217
  // 23. Multiple checkpoints across a multi-turn run with selective yield on turn 3
920
- test('multiple checkpoints with selective yield — executes turns 0-2, yields at turn 3, never runs 4+', async () => {
1218
+ test("multiple checkpoints with selective yield — executes turns 0-2, yields at turn 3, never runs 4+", async () => {
921
1219
  // Mock provider to return tool_use for 5 turns, then text
922
1220
  const responses: ProviderResponse[] = [];
923
1221
  for (let i = 0; i < 5; i++) {
924
- responses.push(toolUseResponse(`t${i}`, 'read_file', { path: `/file${i}.txt` }));
1222
+ responses.push(
1223
+ toolUseResponse(`t${i}`, "read_file", { path: `/file${i}.txt` }),
1224
+ );
925
1225
  }
926
- responses.push(textResponse('Should never reach this'));
1226
+ responses.push(textResponse("Should never reach this"));
927
1227
 
928
1228
  const { provider, calls } = createMockProvider(responses);
929
- const toolExecutor = async () => ({ content: 'data', isError: false });
930
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
1229
+ const toolExecutor = async () => ({ content: "data", isError: false });
1230
+ const loop = new AgentLoop(
1231
+ provider,
1232
+ "system",
1233
+ {},
1234
+ dummyTools,
1235
+ toolExecutor,
1236
+ );
931
1237
 
932
1238
  const checkpoints: CheckpointInfo[] = [];
933
1239
  const onCheckpoint = (checkpoint: CheckpointInfo): CheckpointDecision => {
934
1240
  checkpoints.push(checkpoint);
935
1241
  // Yield on turn 3 (0-indexed)
936
- return checkpoint.turnIndex === 3 ? 'yield' : 'continue';
1242
+ return checkpoint.turnIndex === 3 ? "yield" : "continue";
937
1243
  };
938
1244
 
939
1245
  const events: AgentEvent[] = [];
940
- const history = await loop.run([userMessage], collectEvents(events), undefined, undefined, onCheckpoint);
1246
+ const history = await loop.run(
1247
+ [userMessage],
1248
+ collectEvents(events),
1249
+ undefined,
1250
+ undefined,
1251
+ onCheckpoint,
1252
+ );
941
1253
 
942
1254
  // Turns 0, 1, 2, 3 execute (4 provider calls). Turn 3 yields, so turns 4+ never execute.
943
1255
  expect(calls).toHaveLength(4);
@@ -956,45 +1268,61 @@ describe('AgentLoop', () => {
956
1268
  expect(history).toHaveLength(9);
957
1269
 
958
1270
  // Verify the last two messages are from turn 3
959
- expect(history[7].role).toBe('assistant');
960
- const lastAssistantToolUse = history[7].content.find((b) => b.type === 'tool_use');
1271
+ expect(history[7].role).toBe("assistant");
1272
+ const lastAssistantToolUse = history[7].content.find(
1273
+ (b) => b.type === "tool_use",
1274
+ );
961
1275
  expect(lastAssistantToolUse).toBeDefined();
962
- if (lastAssistantToolUse && lastAssistantToolUse.type === 'tool_use') {
963
- expect(lastAssistantToolUse.id).toBe('t3');
1276
+ if (lastAssistantToolUse && lastAssistantToolUse.type === "tool_use") {
1277
+ expect(lastAssistantToolUse.id).toBe("t3");
964
1278
  }
965
- expect(history[8].role).toBe('user');
1279
+ expect(history[8].role).toBe("user");
966
1280
  const lastToolResult = history[8].content.find(
967
- (b): b is Extract<ContentBlock, { type: 'tool_result' }> => b.type === 'tool_result',
1281
+ (b): b is Extract<ContentBlock, { type: "tool_result" }> =>
1282
+ b.type === "tool_result",
968
1283
  );
969
1284
  expect(lastToolResult).toBeDefined();
970
- expect(lastToolResult!.tool_use_id).toBe('t3');
1285
+ expect(lastToolResult!.tool_use_id).toBe("t3");
971
1286
 
972
1287
  // Verify turns 4+ never executed — no tool_use event for t4
973
1288
  const toolUseEvents = events.filter(
974
- (e): e is Extract<AgentEvent, { type: 'tool_use' }> => e.type === 'tool_use',
1289
+ (e): e is Extract<AgentEvent, { type: "tool_use" }> =>
1290
+ e.type === "tool_use",
975
1291
  );
976
1292
  const toolUseNames = toolUseEvents.map((e) => e.id);
977
- expect(toolUseNames).toEqual(['t0', 't1', 't2', 't3']);
978
- expect(toolUseNames).not.toContain('t4');
1293
+ expect(toolUseNames).toEqual(["t0", "t1", "t2", "t3"]);
1294
+ expect(toolUseNames).not.toContain("t4");
979
1295
  });
980
1296
 
981
1297
  // 24. Yield on second turn — first turn proceeds, second stops
982
- test('yield on second turn lets first turn proceed and stops on second', async () => {
1298
+ test("yield on second turn lets first turn proceed and stops on second", async () => {
983
1299
  const { provider, calls } = createMockProvider([
984
- toolUseResponse('t1', 'read_file', { path: '/a.txt' }),
985
- toolUseResponse('t2', 'read_file', { path: '/b.txt' }),
986
- textResponse('Should not reach'),
1300
+ toolUseResponse("t1", "read_file", { path: "/a.txt" }),
1301
+ toolUseResponse("t2", "read_file", { path: "/b.txt" }),
1302
+ textResponse("Should not reach"),
987
1303
  ]);
988
1304
 
989
- const toolExecutor = async () => ({ content: 'data', isError: false });
990
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
1305
+ const toolExecutor = async () => ({ content: "data", isError: false });
1306
+ const loop = new AgentLoop(
1307
+ provider,
1308
+ "system",
1309
+ {},
1310
+ dummyTools,
1311
+ toolExecutor,
1312
+ );
991
1313
 
992
1314
  const onCheckpoint = (checkpoint: CheckpointInfo): CheckpointDecision => {
993
1315
  // Yield on the second turn (turnIndex 1)
994
- return checkpoint.turnIndex === 1 ? 'yield' : 'continue';
1316
+ return checkpoint.turnIndex === 1 ? "yield" : "continue";
995
1317
  };
996
1318
 
997
- const history = await loop.run([userMessage], () => {}, undefined, undefined, onCheckpoint);
1319
+ const history = await loop.run(
1320
+ [userMessage],
1321
+ () => {},
1322
+ undefined,
1323
+ undefined,
1324
+ onCheckpoint,
1325
+ );
998
1326
 
999
1327
  // 2 provider calls: first tool turn + second tool turn (yield after second)
1000
1328
  expect(calls).toHaveLength(2);
@@ -1007,9 +1335,9 @@ describe('AgentLoop', () => {
1007
1335
  // ---------------------------------------------------------------------------
1008
1336
 
1009
1337
  // 25. Without resolveTools, static tools are used (backward compatible)
1010
- test('without resolveTools, static tools are passed to provider', async () => {
1011
- const { provider, calls } = createMockProvider([textResponse('Hi')]);
1012
- const loop = new AgentLoop(provider, 'system', {}, dummyTools);
1338
+ test("without resolveTools, static tools are passed to provider", async () => {
1339
+ const { provider, calls } = createMockProvider([textResponse("Hi")]);
1340
+ const loop = new AgentLoop(provider, "system", {}, dummyTools);
1013
1341
 
1014
1342
  await loop.run([userMessage], () => {});
1015
1343
 
@@ -1017,25 +1345,39 @@ describe('AgentLoop', () => {
1017
1345
  });
1018
1346
 
1019
1347
  // 26. resolveTools callback is invoked before each provider call
1020
- test('resolveTools is invoked before each provider call', async () => {
1348
+ test("resolveTools is invoked before each provider call", async () => {
1021
1349
  const resolverCalls: Message[][] = [];
1022
1350
  const resolvedTools: ToolDefinition[] = [
1023
- { name: 'search', description: 'Search files', input_schema: { type: 'object', properties: { query: { type: 'string' } } } },
1351
+ {
1352
+ name: "search",
1353
+ description: "Search files",
1354
+ input_schema: {
1355
+ type: "object",
1356
+ properties: { query: { type: "string" } },
1357
+ },
1358
+ },
1024
1359
  ];
1025
1360
 
1026
1361
  const { provider } = createMockProvider([
1027
- toolUseResponse('t1', 'search', { query: 'foo' }),
1028
- textResponse('Found it'),
1362
+ toolUseResponse("t1", "search", { query: "foo" }),
1363
+ textResponse("Found it"),
1029
1364
  ]);
1030
1365
 
1031
- const toolExecutor = async () => ({ content: 'result', isError: false });
1366
+ const toolExecutor = async () => ({ content: "result", isError: false });
1032
1367
 
1033
1368
  const resolveTools = (history: Message[]): ToolDefinition[] => {
1034
1369
  resolverCalls.push([...history]);
1035
1370
  return resolvedTools;
1036
1371
  };
1037
1372
 
1038
- const loop = new AgentLoop(provider, 'system', {}, [], toolExecutor, resolveTools);
1373
+ const loop = new AgentLoop(
1374
+ provider,
1375
+ "system",
1376
+ {},
1377
+ [],
1378
+ toolExecutor,
1379
+ resolveTools,
1380
+ );
1039
1381
  await loop.run([userMessage], () => {});
1040
1382
 
1041
1383
  // resolveTools should be called once per provider turn (2 turns total)
@@ -1050,17 +1392,28 @@ describe('AgentLoop', () => {
1050
1392
  });
1051
1393
 
1052
1394
  // 27. Resolved tool list is passed to the provider
1053
- test('resolved tools are passed to the provider instead of static tools', async () => {
1395
+ test("resolved tools are passed to the provider instead of static tools", async () => {
1054
1396
  const dynamicTools: ToolDefinition[] = [
1055
- { name: 'dynamic_tool', description: 'Dynamic', input_schema: { type: 'object' } },
1397
+ {
1398
+ name: "dynamic_tool",
1399
+ description: "Dynamic",
1400
+ input_schema: { type: "object" },
1401
+ },
1056
1402
  ];
1057
1403
 
1058
- const { provider, calls } = createMockProvider([textResponse('Hi')]);
1404
+ const { provider, calls } = createMockProvider([textResponse("Hi")]);
1059
1405
 
1060
1406
  const resolveTools = (): ToolDefinition[] => dynamicTools;
1061
1407
 
1062
1408
  // Pass different static tools to verify they are overridden
1063
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, undefined, resolveTools);
1409
+ const loop = new AgentLoop(
1410
+ provider,
1411
+ "system",
1412
+ {},
1413
+ dummyTools,
1414
+ undefined,
1415
+ resolveTools,
1416
+ );
1064
1417
  await loop.run([userMessage], () => {});
1065
1418
 
1066
1419
  // Provider should receive the dynamically resolved tools, not the static ones
@@ -1069,31 +1422,59 @@ describe('AgentLoop', () => {
1069
1422
  });
1070
1423
 
1071
1424
  // 28. Tool list can change between turns
1072
- test('resolveTools can return different tools on each turn', async () => {
1425
+ test("resolveTools can return different tools on each turn", async () => {
1073
1426
  const toolsPerTurn: ToolDefinition[][] = [
1074
- [{ name: 'tool_a', description: 'Tool A', input_schema: { type: 'object' } }],
1075
1427
  [
1076
- { name: 'tool_a', description: 'Tool A', input_schema: { type: 'object' } },
1077
- { name: 'tool_b', description: 'Tool B', input_schema: { type: 'object' } },
1428
+ {
1429
+ name: "tool_a",
1430
+ description: "Tool A",
1431
+ input_schema: { type: "object" },
1432
+ },
1433
+ ],
1434
+ [
1435
+ {
1436
+ name: "tool_a",
1437
+ description: "Tool A",
1438
+ input_schema: { type: "object" },
1439
+ },
1440
+ {
1441
+ name: "tool_b",
1442
+ description: "Tool B",
1443
+ input_schema: { type: "object" },
1444
+ },
1445
+ ],
1446
+ [
1447
+ {
1448
+ name: "tool_c",
1449
+ description: "Tool C",
1450
+ input_schema: { type: "object" },
1451
+ },
1078
1452
  ],
1079
- [{ name: 'tool_c', description: 'Tool C', input_schema: { type: 'object' } }],
1080
1453
  ];
1081
1454
 
1082
1455
  let turnIndex = 0;
1083
1456
  const resolveTools = (): ToolDefinition[] => {
1084
- const tools = toolsPerTurn[turnIndex] ?? toolsPerTurn[toolsPerTurn.length - 1];
1457
+ const tools =
1458
+ toolsPerTurn[turnIndex] ?? toolsPerTurn[toolsPerTurn.length - 1];
1085
1459
  turnIndex++;
1086
1460
  return tools;
1087
1461
  };
1088
1462
 
1089
1463
  const { provider, calls } = createMockProvider([
1090
- toolUseResponse('t1', 'tool_a', {}),
1091
- toolUseResponse('t2', 'tool_a', {}),
1092
- textResponse('Done'),
1464
+ toolUseResponse("t1", "tool_a", {}),
1465
+ toolUseResponse("t2", "tool_a", {}),
1466
+ textResponse("Done"),
1093
1467
  ]);
1094
1468
 
1095
- const toolExecutor = async () => ({ content: 'ok', isError: false });
1096
- const loop = new AgentLoop(provider, 'system', {}, [], toolExecutor, resolveTools);
1469
+ const toolExecutor = async () => ({ content: "ok", isError: false });
1470
+ const loop = new AgentLoop(
1471
+ provider,
1472
+ "system",
1473
+ {},
1474
+ [],
1475
+ toolExecutor,
1476
+ resolveTools,
1477
+ );
1097
1478
  await loop.run([userMessage], () => {});
1098
1479
 
1099
1480
  // Provider should have been called 3 times
@@ -1106,12 +1487,21 @@ describe('AgentLoop', () => {
1106
1487
  });
1107
1488
 
1108
1489
  // 29. resolveTools returning empty array means no tools passed to provider
1109
- test('resolveTools returning empty array sends no tools to provider', async () => {
1490
+ test("resolveTools returning empty array sends no tools to provider", async () => {
1110
1491
  const resolveTools = (): ToolDefinition[] => [];
1111
1492
 
1112
- const { provider, calls } = createMockProvider([textResponse('No tools available')]);
1493
+ const { provider, calls } = createMockProvider([
1494
+ textResponse("No tools available"),
1495
+ ]);
1113
1496
 
1114
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, undefined, resolveTools);
1497
+ const loop = new AgentLoop(
1498
+ provider,
1499
+ "system",
1500
+ {},
1501
+ dummyTools,
1502
+ undefined,
1503
+ resolveTools,
1504
+ );
1115
1505
  await loop.run([userMessage], () => {});
1116
1506
 
1117
1507
  // Empty array should result in undefined tools (same as no-tools behavior)
@@ -1123,13 +1513,13 @@ describe('AgentLoop', () => {
1123
1513
  // ---------------------------------------------------------------------------
1124
1514
 
1125
1515
  // 30. Oversized tool results are truncated before entering history
1126
- test('truncates oversized tool results before adding to history', async () => {
1127
- const toolCallId = 'tool-large';
1128
- const largeContent = 'x'.repeat(500_000);
1516
+ test("truncates oversized tool results before adding to history", async () => {
1517
+ const toolCallId = "tool-large";
1518
+ const largeContent = "x".repeat(500_000);
1129
1519
 
1130
1520
  const { provider, calls } = createMockProvider([
1131
- toolUseResponse(toolCallId, 'read_file', { path: '/huge.txt' }),
1132
- textResponse('Got it.'),
1521
+ toolUseResponse(toolCallId, "read_file", { path: "/huge.txt" }),
1522
+ textResponse("Got it."),
1133
1523
  ]);
1134
1524
 
1135
1525
  const toolExecutor = async () => {
@@ -1138,7 +1528,7 @@ describe('AgentLoop', () => {
1138
1528
 
1139
1529
  const loop = new AgentLoop(
1140
1530
  provider,
1141
- 'system',
1531
+ "system",
1142
1532
  { maxInputTokens: 180_000 },
1143
1533
  dummyTools,
1144
1534
  toolExecutor,
@@ -1148,10 +1538,11 @@ describe('AgentLoop', () => {
1148
1538
 
1149
1539
  // The tool result user message is at index 2 in history
1150
1540
  const toolResultMsg = history[2];
1151
- expect(toolResultMsg.role).toBe('user');
1541
+ expect(toolResultMsg.role).toBe("user");
1152
1542
 
1153
1543
  const toolResultBlock = toolResultMsg.content.find(
1154
- (b): b is Extract<ContentBlock, { type: 'tool_result' }> => b.type === 'tool_result',
1544
+ (b): b is Extract<ContentBlock, { type: "tool_result" }> =>
1545
+ b.type === "tool_result",
1155
1546
  );
1156
1547
  expect(toolResultBlock).toBeDefined();
1157
1548
 
@@ -1159,28 +1550,27 @@ describe('AgentLoop', () => {
1159
1550
  expect(toolResultBlock!.content.length).toBeLessThan(500_000);
1160
1551
 
1161
1552
  // Content should end with the truncation suffix
1162
- expect(toolResultBlock!.content).toContain(
1163
- '[Content truncated',
1164
- );
1553
+ expect(toolResultBlock!.content).toContain("[Content truncated");
1165
1554
 
1166
1555
  // The second provider call should also have the truncated content in messages
1167
1556
  const secondCallMessages = calls[1].messages;
1168
1557
  const lastMsg = secondCallMessages[secondCallMessages.length - 1];
1169
1558
  const sentBlock = lastMsg.content.find(
1170
- (b): b is Extract<ContentBlock, { type: 'tool_result' }> => b.type === 'tool_result',
1559
+ (b): b is Extract<ContentBlock, { type: "tool_result" }> =>
1560
+ b.type === "tool_result",
1171
1561
  );
1172
1562
  expect(sentBlock).toBeDefined();
1173
1563
  expect(sentBlock!.content.length).toBeLessThan(500_000);
1174
1564
  });
1175
1565
 
1176
1566
  // 31. Non-oversized tool results pass through unchanged
1177
- test('non-oversized tool results pass through unchanged', async () => {
1178
- const toolCallId = 'tool-small';
1179
- const smallContent = 'small content';
1567
+ test("non-oversized tool results pass through unchanged", async () => {
1568
+ const toolCallId = "tool-small";
1569
+ const smallContent = "small content";
1180
1570
 
1181
1571
  const { provider, calls } = createMockProvider([
1182
- toolUseResponse(toolCallId, 'read_file', { path: '/small.txt' }),
1183
- textResponse('Got it.'),
1572
+ toolUseResponse(toolCallId, "read_file", { path: "/small.txt" }),
1573
+ textResponse("Got it."),
1184
1574
  ]);
1185
1575
 
1186
1576
  const toolExecutor = async () => {
@@ -1189,7 +1579,7 @@ describe('AgentLoop', () => {
1189
1579
 
1190
1580
  const loop = new AgentLoop(
1191
1581
  provider,
1192
- 'system',
1582
+ "system",
1193
1583
  { maxInputTokens: 180_000 },
1194
1584
  dummyTools,
1195
1585
  toolExecutor,
@@ -1199,10 +1589,11 @@ describe('AgentLoop', () => {
1199
1589
 
1200
1590
  // The tool result user message is at index 2 in history
1201
1591
  const toolResultMsg = history[2];
1202
- expect(toolResultMsg.role).toBe('user');
1592
+ expect(toolResultMsg.role).toBe("user");
1203
1593
 
1204
1594
  const toolResultBlock = toolResultMsg.content.find(
1205
- (b): b is Extract<ContentBlock, { type: 'tool_result' }> => b.type === 'tool_result',
1595
+ (b): b is Extract<ContentBlock, { type: "tool_result" }> =>
1596
+ b.type === "tool_result",
1206
1597
  );
1207
1598
  expect(toolResultBlock).toBeDefined();
1208
1599
 
@@ -1213,7 +1604,8 @@ describe('AgentLoop', () => {
1213
1604
  const secondCallMessages = calls[1].messages;
1214
1605
  const lastMsg = secondCallMessages[secondCallMessages.length - 1];
1215
1606
  const sentBlock = lastMsg.content.find(
1216
- (b): b is Extract<ContentBlock, { type: 'tool_result' }> => b.type === 'tool_result',
1607
+ (b): b is Extract<ContentBlock, { type: "tool_result" }> =>
1608
+ b.type === "tool_result",
1217
1609
  );
1218
1610
  expect(sentBlock).toBeDefined();
1219
1611
  expect(sentBlock!.content).toBe(smallContent);
@@ -1225,32 +1617,42 @@ describe('AgentLoop', () => {
1225
1617
 
1226
1618
  // 32. Tool results with sensitiveBindings populate substitution map and
1227
1619
  // final assistant message text is resolved with real values.
1228
- test('resolves sensitive output placeholders in final assistant message', async () => {
1229
- const placeholder = 'VELLUM_ASSISTANT_INVITE_CODE_TEST1234';
1230
- const realToken = 'realInviteToken999';
1620
+ test("resolves sensitive output placeholders in final assistant message", async () => {
1621
+ const placeholder = "VELLUM_ASSISTANT_INVITE_CODE_TEST1234";
1622
+ const realToken = "realInviteToken999";
1231
1623
 
1232
1624
  const { provider, calls } = createMockProvider([
1233
- toolUseResponse('t1', 'bash', { command: 'create invite' }),
1625
+ toolUseResponse("t1", "bash", { command: "create invite" }),
1234
1626
  // The LLM responds using the placeholder (it never saw the real token)
1235
- textResponse(`Here is your invite link: https://t.me/bot?start=iv_${placeholder}`),
1627
+ textResponse(
1628
+ `Here is your invite link: https://t.me/bot?start=iv_${placeholder}`,
1629
+ ),
1236
1630
  ]);
1237
1631
 
1238
1632
  const toolExecutor = async () => ({
1239
1633
  content: `https://t.me/bot?start=iv_${placeholder}`,
1240
1634
  isError: false,
1241
- sensitiveBindings: [{ kind: 'invite_code' as const, placeholder, value: realToken }],
1635
+ sensitiveBindings: [
1636
+ { kind: "invite_code" as const, placeholder, value: realToken },
1637
+ ],
1242
1638
  });
1243
1639
 
1244
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
1640
+ const loop = new AgentLoop(
1641
+ provider,
1642
+ "system",
1643
+ {},
1644
+ dummyTools,
1645
+ toolExecutor,
1646
+ );
1245
1647
  const events: AgentEvent[] = [];
1246
1648
  const history = await loop.run([userMessage], collectEvents(events));
1247
1649
 
1248
1650
  // The final assistant message in HISTORY should retain placeholders
1249
1651
  // (so the model never sees real values on subsequent turns)
1250
1652
  const lastAssistant = history[history.length - 1];
1251
- expect(lastAssistant.role).toBe('assistant');
1653
+ expect(lastAssistant.role).toBe("assistant");
1252
1654
  const historyTextBlock = lastAssistant.content.find(
1253
- (b): b is Extract<ContentBlock, { type: 'text' }> => b.type === 'text',
1655
+ (b): b is Extract<ContentBlock, { type: "text" }> => b.type === "text",
1254
1656
  );
1255
1657
  expect(historyTextBlock).toBeDefined();
1256
1658
  expect(historyTextBlock!.text).toContain(placeholder);
@@ -1259,11 +1661,12 @@ describe('AgentLoop', () => {
1259
1661
  // The message_complete EVENT should also retain placeholders (persisted
1260
1662
  // to conversation store; real values leak on session reload otherwise)
1261
1663
  const completeEvents = events.filter(
1262
- (e): e is Extract<AgentEvent, { type: 'message_complete' }> => e.type === 'message_complete',
1664
+ (e): e is Extract<AgentEvent, { type: "message_complete" }> =>
1665
+ e.type === "message_complete",
1263
1666
  );
1264
1667
  const lastComplete = completeEvents[completeEvents.length - 1];
1265
1668
  const completeText = lastComplete.message.content.find(
1266
- (b): b is Extract<ContentBlock, { type: 'text' }> => b.type === 'text',
1669
+ (b): b is Extract<ContentBlock, { type: "text" }> => b.type === "text",
1267
1670
  );
1268
1671
  expect(completeText!.text).toContain(placeholder);
1269
1672
  expect(completeText!.text).not.toContain(realToken);
@@ -1272,23 +1675,25 @@ describe('AgentLoop', () => {
1272
1675
  // NOT the raw token (model never sees the real value)
1273
1676
  const secondCallMessages = calls[1].messages;
1274
1677
  const toolResultMsg = secondCallMessages.find(
1275
- (m) => m.role === 'user' && m.content.some((b) => b.type === 'tool_result'),
1678
+ (m) =>
1679
+ m.role === "user" && m.content.some((b) => b.type === "tool_result"),
1276
1680
  );
1277
1681
  expect(toolResultMsg).toBeDefined();
1278
1682
  const toolResultBlock = toolResultMsg!.content.find(
1279
- (b): b is Extract<ContentBlock, { type: 'tool_result' }> => b.type === 'tool_result',
1683
+ (b): b is Extract<ContentBlock, { type: "tool_result" }> =>
1684
+ b.type === "tool_result",
1280
1685
  );
1281
1686
  expect(toolResultBlock!.content).toContain(placeholder);
1282
1687
  expect(toolResultBlock!.content).not.toContain(realToken);
1283
1688
  });
1284
1689
 
1285
1690
  // 33. Streamed text_delta events have placeholders resolved to real values
1286
- test('resolves sensitive output placeholders in streamed text_delta events', async () => {
1287
- const placeholder = 'VELLUM_ASSISTANT_INVITE_CODE_STRM5678';
1288
- const realToken = 'streamedRealToken';
1691
+ test("resolves sensitive output placeholders in streamed text_delta events", async () => {
1692
+ const placeholder = "VELLUM_ASSISTANT_INVITE_CODE_STRM5678";
1693
+ const realToken = "streamedRealToken";
1289
1694
 
1290
1695
  const { provider } = createMockProvider([
1291
- toolUseResponse('t1', 'bash', { command: 'invite' }),
1696
+ toolUseResponse("t1", "bash", { command: "invite" }),
1292
1697
  // Response text includes the placeholder
1293
1698
  textResponse(`Link: https://t.me/bot?start=iv_${placeholder}`),
1294
1699
  ]);
@@ -1296,18 +1701,27 @@ describe('AgentLoop', () => {
1296
1701
  const toolExecutor = async () => ({
1297
1702
  content: `https://t.me/bot?start=iv_${placeholder}`,
1298
1703
  isError: false,
1299
- sensitiveBindings: [{ kind: 'invite_code' as const, placeholder, value: realToken }],
1704
+ sensitiveBindings: [
1705
+ { kind: "invite_code" as const, placeholder, value: realToken },
1706
+ ],
1300
1707
  });
1301
1708
 
1302
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
1709
+ const loop = new AgentLoop(
1710
+ provider,
1711
+ "system",
1712
+ {},
1713
+ dummyTools,
1714
+ toolExecutor,
1715
+ );
1303
1716
  const events: AgentEvent[] = [];
1304
1717
  await loop.run([userMessage], collectEvents(events));
1305
1718
 
1306
1719
  // Collect all text_delta events from the final turn (after tool result)
1307
1720
  const textDeltas = events.filter(
1308
- (e): e is Extract<AgentEvent, { type: 'text_delta' }> => e.type === 'text_delta',
1721
+ (e): e is Extract<AgentEvent, { type: "text_delta" }> =>
1722
+ e.type === "text_delta",
1309
1723
  );
1310
- const allStreamedText = textDeltas.map((e) => e.text).join('');
1724
+ const allStreamedText = textDeltas.map((e) => e.text).join("");
1311
1725
 
1312
1726
  // Streamed text should contain the real token, not the placeholder
1313
1727
  expect(allStreamedText).toContain(realToken);
@@ -1315,26 +1729,32 @@ describe('AgentLoop', () => {
1315
1729
  });
1316
1730
 
1317
1731
  // 34. Without sensitive bindings, text passes through unchanged
1318
- test('text passes through unchanged when no sensitive bindings exist', async () => {
1732
+ test("text passes through unchanged when no sensitive bindings exist", async () => {
1319
1733
  const { provider } = createMockProvider([
1320
- toolUseResponse('t1', 'read_file', { path: '/test.txt' }),
1321
- textResponse('Normal response with no placeholders.'),
1734
+ toolUseResponse("t1", "read_file", { path: "/test.txt" }),
1735
+ textResponse("Normal response with no placeholders."),
1322
1736
  ]);
1323
1737
 
1324
1738
  const toolExecutor = async () => ({
1325
- content: 'file contents',
1739
+ content: "file contents",
1326
1740
  isError: false,
1327
1741
  // No sensitiveBindings
1328
1742
  });
1329
1743
 
1330
- const loop = new AgentLoop(provider, 'system', {}, dummyTools, toolExecutor);
1744
+ const loop = new AgentLoop(
1745
+ provider,
1746
+ "system",
1747
+ {},
1748
+ dummyTools,
1749
+ toolExecutor,
1750
+ );
1331
1751
  const events: AgentEvent[] = [];
1332
1752
  const history = await loop.run([userMessage], collectEvents(events));
1333
1753
 
1334
1754
  const lastAssistant = history[history.length - 1];
1335
1755
  const textBlock = lastAssistant.content.find(
1336
- (b): b is Extract<ContentBlock, { type: 'text' }> => b.type === 'text',
1756
+ (b): b is Extract<ContentBlock, { type: "text" }> => b.type === "text",
1337
1757
  );
1338
- expect(textBlock!.text).toBe('Normal response with no placeholders.');
1758
+ expect(textBlock!.text).toBe("Normal response with no placeholders.");
1339
1759
  });
1340
1760
  });