vellum 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (349) hide show
  1. package/README.md +15 -2
  2. package/bun.lock +5 -2
  3. package/package.json +4 -2
  4. package/scripts/capture-x-graphql.ts +562 -0
  5. package/scripts/ipc/check-swift-decoder-drift.ts +2 -1
  6. package/scripts/test.sh +5 -0
  7. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +133 -34
  8. package/src/__tests__/account-registry.test.ts +2 -1
  9. package/src/__tests__/agent-heartbeat-service.test.ts +250 -0
  10. package/src/__tests__/asset-materialize-tool.test.ts +16 -15
  11. package/src/__tests__/asset-search-tool.test.ts +23 -22
  12. package/src/__tests__/attachments-store.test.ts +56 -127
  13. package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +5 -4
  14. package/src/__tests__/browser-skill-endstate.test.ts +4 -3
  15. package/src/__tests__/call-bridge.test.ts +385 -0
  16. package/src/__tests__/call-constants.test.ts +40 -0
  17. package/src/__tests__/call-orchestrator.test.ts +130 -4
  18. package/src/__tests__/call-recovery.test.ts +518 -0
  19. package/src/__tests__/call-routes-http.test.ts +459 -0
  20. package/src/__tests__/call-state-machine.test.ts +143 -0
  21. package/src/__tests__/call-store.test.ts +216 -1
  22. package/src/__tests__/cli-discover.test.ts +1 -1
  23. package/src/__tests__/commit-message-enrichment-service.test.ts +148 -7
  24. package/src/__tests__/compaction.benchmark.test.ts +176 -0
  25. package/src/__tests__/computer-use-tools.test.ts +250 -0
  26. package/src/__tests__/config-schema.test.ts +299 -3
  27. package/src/__tests__/conflict-store.test.ts +2 -1
  28. package/src/__tests__/contacts-tools.test.ts +331 -0
  29. package/src/__tests__/conversation-store.test.ts +30 -32
  30. package/src/__tests__/credential-security-invariants.test.ts +4 -0
  31. package/src/__tests__/date-context.test.ts +373 -0
  32. package/src/__tests__/db-schedule-syntax-migration.test.ts +129 -0
  33. package/src/__tests__/fixtures/media-reuse-fixtures.ts +3 -3
  34. package/src/__tests__/followup-tools.test.ts +303 -0
  35. package/src/__tests__/handlers-twitter-config.test.ts +718 -0
  36. package/src/__tests__/intent-routing.test.ts +64 -57
  37. package/src/__tests__/ipc-roundtrip.benchmark.test.ts +237 -0
  38. package/src/__tests__/ipc-snapshot.test.ts +62 -28
  39. package/src/__tests__/llm-usage-store.test.ts +3 -8
  40. package/src/__tests__/media-generate-image.test.ts +1 -1
  41. package/src/__tests__/media-reuse-story.e2e.test.ts +7 -7
  42. package/src/__tests__/memory-retrieval.benchmark.test.ts +430 -0
  43. package/src/__tests__/parallel-tool.benchmark.test.ts +294 -0
  44. package/src/__tests__/playbook-tools.test.ts +342 -0
  45. package/src/__tests__/profile-compiler.test.ts +2 -1
  46. package/src/__tests__/provider-streaming.benchmark.test.ts +773 -0
  47. package/src/__tests__/recurrence-engine-rruleset.test.ts +78 -0
  48. package/src/__tests__/recurrence-engine.test.ts +69 -0
  49. package/src/__tests__/recurrence-types.test.ts +71 -0
  50. package/src/__tests__/registry.test.ts +5 -3
  51. package/src/__tests__/relay-server.test.ts +633 -0
  52. package/src/__tests__/reminder-store.test.ts +6 -3
  53. package/src/__tests__/reminder.test.ts +43 -77
  54. package/src/__tests__/run-orchestrator-assistant-events.test.ts +8 -4
  55. package/src/__tests__/run-orchestrator.test.ts +4 -4
  56. package/src/__tests__/runtime-attachment-metadata.test.ts +7 -6
  57. package/src/__tests__/runtime-runs-http.test.ts +4 -4
  58. package/src/__tests__/runtime-runs.test.ts +4 -4
  59. package/src/__tests__/schedule-store.test.ts +482 -0
  60. package/src/__tests__/schedule-tools.test.ts +700 -0
  61. package/src/__tests__/scheduler-recurrence.test.ts +329 -0
  62. package/src/__tests__/server-history-render.test.ts +14 -13
  63. package/src/__tests__/session-error.test.ts +28 -0
  64. package/src/__tests__/session-init.benchmark.test.ts +462 -0
  65. package/src/__tests__/session-queue.test.ts +71 -48
  66. package/src/__tests__/session-runtime-assembly.test.ts +161 -0
  67. package/src/__tests__/session-surfaces-task-progress.test.ts +104 -0
  68. package/src/__tests__/signup-e2e.test.ts +2 -1
  69. package/src/__tests__/skill-projection.benchmark.test.ts +328 -0
  70. package/src/__tests__/skill-script-runner.test.ts +159 -0
  71. package/src/__tests__/speaker-identification.test.ts +52 -0
  72. package/src/__tests__/subagent-manager-notify.test.ts +42 -10
  73. package/src/__tests__/subagent-tools.test.ts +141 -41
  74. package/src/__tests__/task-compiler.test.ts +2 -1
  75. package/src/__tests__/task-runner.test.ts +2 -1
  76. package/src/__tests__/task-scheduler.test.ts +2 -1
  77. package/src/__tests__/task-tools.test.ts +49 -56
  78. package/src/__tests__/tool-audit-listener.test.ts +1 -0
  79. package/src/__tests__/tool-domain-event-publisher.test.ts +2 -0
  80. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +500 -0
  81. package/src/__tests__/tool-executor.test.ts +13 -17
  82. package/src/__tests__/turn-commit.test.ts +218 -3
  83. package/src/__tests__/twilio-provider.test.ts +143 -0
  84. package/src/__tests__/twilio-routes.test.ts +789 -0
  85. package/src/__tests__/twitter-auth-handler.test.ts +581 -0
  86. package/src/__tests__/view-image-tool.test.ts +217 -0
  87. package/src/__tests__/workspace-git-service.test.ts +186 -0
  88. package/src/__tests__/workspace-heartbeat-service.test.ts +13 -3
  89. package/src/agent-heartbeat/agent-heartbeat-service.ts +155 -0
  90. package/src/bundler/app-bundler.ts +12 -8
  91. package/src/calls/call-bridge.ts +95 -0
  92. package/src/calls/call-constants.ts +43 -5
  93. package/src/calls/call-domain.ts +276 -0
  94. package/src/calls/call-orchestrator.ts +43 -17
  95. package/src/calls/call-recovery.ts +207 -0
  96. package/src/calls/call-state-machine.ts +68 -0
  97. package/src/calls/call-store.ts +192 -5
  98. package/src/calls/relay-server.ts +41 -4
  99. package/src/calls/speaker-identification.ts +213 -0
  100. package/src/calls/twilio-provider.ts +10 -6
  101. package/src/calls/twilio-routes.ts +90 -76
  102. package/src/calls/types.ts +1 -1
  103. package/src/cli/config-commands.ts +334 -0
  104. package/src/cli/core-commands.ts +776 -0
  105. package/src/cli/doordash.ts +251 -1
  106. package/src/cli/ipc-client.ts +82 -0
  107. package/src/cli/map.ts +246 -0
  108. package/src/cli/twitter.ts +575 -0
  109. package/src/cli.ts +7 -5
  110. package/src/commands/__tests__/cc-command-registry.test.ts +319 -0
  111. package/src/commands/cc-command-registry.ts +209 -0
  112. package/src/config/bundled-skills/contacts/SKILL.md +39 -0
  113. package/src/config/bundled-skills/contacts/TOOLS.json +122 -0
  114. package/src/config/bundled-skills/contacts/tools/contact-merge.ts +9 -0
  115. package/src/config/bundled-skills/contacts/tools/contact-search.ts +9 -0
  116. package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +9 -0
  117. package/src/config/bundled-skills/document/SKILL.md +18 -0
  118. package/src/config/bundled-skills/document/TOOLS.json +53 -0
  119. package/src/config/bundled-skills/document/tools/document-create.ts +9 -0
  120. package/src/config/bundled-skills/document/tools/document-update.ts +9 -0
  121. package/src/config/bundled-skills/doordash/SKILL.md +82 -23
  122. package/src/config/bundled-skills/followups/SKILL.md +32 -0
  123. package/src/config/bundled-skills/followups/TOOLS.json +100 -0
  124. package/src/config/bundled-skills/followups/tools/followup-create.ts +9 -0
  125. package/src/config/bundled-skills/followups/tools/followup-list.ts +9 -0
  126. package/src/config/bundled-skills/followups/tools/followup-resolve.ts +9 -0
  127. package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +1 -23
  128. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +2 -1
  129. package/src/config/bundled-skills/playbooks/SKILL.md +31 -0
  130. package/src/config/bundled-skills/playbooks/TOOLS.json +126 -0
  131. package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +9 -0
  132. package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +9 -0
  133. package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +9 -0
  134. package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +9 -0
  135. package/src/config/bundled-skills/reminder/SKILL.md +20 -0
  136. package/src/config/bundled-skills/reminder/TOOLS.json +67 -0
  137. package/src/config/bundled-skills/reminder/tools/reminder-cancel.ts +9 -0
  138. package/src/config/bundled-skills/reminder/tools/reminder-create.ts +9 -0
  139. package/src/config/bundled-skills/reminder/tools/reminder-list.ts +9 -0
  140. package/src/config/bundled-skills/schedule/SKILL.md +74 -0
  141. package/src/config/bundled-skills/schedule/TOOLS.json +135 -0
  142. package/src/config/bundled-skills/schedule/tools/schedule-create.ts +9 -0
  143. package/src/config/bundled-skills/schedule/tools/schedule-delete.ts +9 -0
  144. package/src/config/bundled-skills/schedule/tools/schedule-list.ts +9 -0
  145. package/src/config/bundled-skills/schedule/tools/schedule-update.ts +9 -0
  146. package/src/config/bundled-skills/subagent/SKILL.md +25 -0
  147. package/src/config/bundled-skills/subagent/TOOLS.json +107 -0
  148. package/src/config/bundled-skills/subagent/tools/subagent-abort.ts +9 -0
  149. package/src/config/bundled-skills/subagent/tools/subagent-message.ts +9 -0
  150. package/src/config/bundled-skills/subagent/tools/subagent-read.ts +9 -0
  151. package/src/config/bundled-skills/subagent/tools/subagent-spawn.ts +9 -0
  152. package/src/config/bundled-skills/subagent/tools/subagent-status.ts +9 -0
  153. package/src/config/bundled-skills/tasks/SKILL.md +28 -0
  154. package/src/config/bundled-skills/tasks/TOOLS.json +256 -0
  155. package/src/config/bundled-skills/tasks/tools/task-delete.ts +9 -0
  156. package/src/config/bundled-skills/tasks/tools/task-list-add.ts +9 -0
  157. package/src/config/bundled-skills/tasks/tools/task-list-remove.ts +9 -0
  158. package/src/config/bundled-skills/tasks/tools/task-list-show.ts +9 -0
  159. package/src/config/bundled-skills/tasks/tools/task-list-update.ts +9 -0
  160. package/src/config/bundled-skills/tasks/tools/task-list.ts +9 -0
  161. package/src/config/bundled-skills/tasks/tools/task-run.ts +9 -0
  162. package/src/config/bundled-skills/tasks/tools/task-save.ts +9 -0
  163. package/src/config/bundled-skills/twitter/SKILL.md +134 -0
  164. package/src/config/bundled-skills/watcher/SKILL.md +27 -0
  165. package/src/config/bundled-skills/watcher/TOOLS.json +147 -0
  166. package/src/config/bundled-skills/watcher/tools/watcher-create.ts +9 -0
  167. package/src/config/bundled-skills/watcher/tools/watcher-delete.ts +9 -0
  168. package/src/config/bundled-skills/watcher/tools/watcher-digest.ts +9 -0
  169. package/src/config/bundled-skills/watcher/tools/watcher-list.ts +9 -0
  170. package/src/config/bundled-skills/watcher/tools/watcher-update.ts +9 -0
  171. package/src/config/defaults.ts +33 -0
  172. package/src/config/loader.ts +4 -1
  173. package/src/config/schema.ts +161 -1
  174. package/src/config/system-prompt.ts +61 -16
  175. package/src/config/templates/IDENTITY.md +7 -0
  176. package/src/config/types.ts +4 -0
  177. package/src/contacts/contact-store.ts +4 -4
  178. package/src/daemon/assistant-attachments.ts +10 -0
  179. package/src/daemon/classifier.ts +3 -1
  180. package/src/daemon/computer-use-session.ts +3 -1
  181. package/src/daemon/date-context.ts +136 -0
  182. package/src/daemon/handlers/apps.ts +16 -1
  183. package/src/daemon/handlers/browser.ts +54 -0
  184. package/src/daemon/handlers/computer-use.ts +7 -1
  185. package/src/daemon/handlers/config.ts +163 -5
  186. package/src/daemon/handlers/diagnostics.ts +5 -1
  187. package/src/daemon/handlers/documents.ts +18 -29
  188. package/src/daemon/handlers/home-base.ts +5 -1
  189. package/src/daemon/handlers/index.ts +40 -277
  190. package/src/daemon/handlers/misc.ts +9 -1
  191. package/src/daemon/handlers/publish.ts +6 -1
  192. package/src/daemon/handlers/sessions.ts +65 -12
  193. package/src/daemon/handlers/shared.ts +36 -1
  194. package/src/daemon/handlers/signing.ts +37 -0
  195. package/src/daemon/handlers/skills.ts +20 -6
  196. package/src/daemon/handlers/subagents.ts +8 -3
  197. package/src/daemon/handlers/twitter-auth.ts +169 -0
  198. package/src/daemon/handlers/work-items.ts +384 -68
  199. package/src/daemon/ipc-contract-inventory.json +28 -4
  200. package/src/daemon/ipc-contract.ts +133 -37
  201. package/src/daemon/ipc-protocol.ts +7 -2
  202. package/src/daemon/lifecycle.ts +21 -0
  203. package/src/daemon/main.ts +10 -4
  204. package/src/daemon/ride-shotgun-handler.ts +74 -10
  205. package/src/daemon/server.ts +143 -26
  206. package/src/daemon/session-agent-loop.ts +887 -0
  207. package/src/daemon/session-attachments.ts +28 -5
  208. package/src/daemon/session-error.ts +24 -3
  209. package/src/daemon/session-lifecycle.ts +147 -0
  210. package/src/daemon/session-media-retry.ts +147 -0
  211. package/src/daemon/session-messaging.ts +145 -0
  212. package/src/daemon/session-notifiers.ts +164 -0
  213. package/src/daemon/session-process.ts +2 -2
  214. package/src/daemon/session-queue-manager.ts +1 -0
  215. package/src/daemon/session-runtime-assembly.ts +52 -0
  216. package/src/daemon/session-skill-tools.ts +124 -5
  217. package/src/daemon/session-slash.ts +3 -0
  218. package/src/daemon/session-surfaces.ts +77 -2
  219. package/src/daemon/session-tool-setup.ts +216 -2
  220. package/src/daemon/session-usage.ts +0 -2
  221. package/src/daemon/session.ts +114 -1404
  222. package/src/daemon/video-thumbnail.ts +60 -0
  223. package/src/doordash/client.ts +121 -27
  224. package/src/doordash/queries.ts +1 -2
  225. package/src/export/formatter.ts +3 -1
  226. package/src/followups/followup-store.ts +4 -2
  227. package/src/followups/types.ts +6 -0
  228. package/src/hooks/templates.ts +1 -1
  229. package/src/index.ts +32 -1153
  230. package/src/memory/attachments-store.ts +28 -83
  231. package/src/memory/channel-delivery-store.ts +7 -21
  232. package/src/memory/clarification-resolver.ts +6 -5
  233. package/src/memory/contradiction-checker.ts +3 -2
  234. package/src/memory/conversation-key-store.ts +10 -29
  235. package/src/memory/conversation-store.ts +2 -1
  236. package/src/memory/db.ts +96 -2
  237. package/src/memory/entity-extractor.ts +6 -3
  238. package/src/memory/items-extractor.ts +5 -4
  239. package/src/memory/jobs-store.ts +3 -2
  240. package/src/memory/llm-usage-store.ts +1 -2
  241. package/src/memory/runs-store.ts +1 -2
  242. package/src/memory/schema.ts +23 -2
  243. package/src/messaging/style-analyzer.ts +3 -2
  244. package/src/messaging/thread-summarizer.ts +8 -12
  245. package/src/messaging/triage-engine.ts +4 -2
  246. package/src/providers/openrouter/client.ts +20 -0
  247. package/src/providers/registry.ts +8 -0
  248. package/src/runtime/http-server.ts +108 -20
  249. package/src/runtime/routes/attachment-routes.ts +2 -3
  250. package/src/runtime/routes/call-routes.ts +140 -0
  251. package/src/runtime/routes/channel-routes.ts +5 -10
  252. package/src/runtime/routes/conversation-routes.ts +5 -5
  253. package/src/runtime/routes/run-routes.ts +2 -2
  254. package/src/runtime/run-orchestrator.ts +9 -3
  255. package/src/schedule/recurrence-engine.ts +138 -0
  256. package/src/schedule/recurrence-types.ts +67 -0
  257. package/src/schedule/schedule-store.ts +102 -57
  258. package/src/schedule/scheduler.ts +9 -6
  259. package/src/security/oauth2.ts +29 -4
  260. package/src/security/secret-allowlist.ts +46 -0
  261. package/src/skills/clawhub.ts +1 -1
  262. package/src/subagent/manager.ts +40 -8
  263. package/src/swarm/backend-claude-code.ts +64 -9
  264. package/src/swarm/worker-prompts.ts +2 -1
  265. package/src/tasks/SPEC.md +34 -28
  266. package/src/tasks/ephemeral-permissions.ts +16 -7
  267. package/src/tasks/task-compiler.ts +5 -4
  268. package/src/tasks/task-runner.ts +10 -5
  269. package/src/tasks/task-scheduler.ts +1 -1
  270. package/src/tasks/tool-sanitizer.ts +36 -0
  271. package/src/tools/assets/search.ts +4 -4
  272. package/src/tools/browser/api-map.ts +220 -0
  273. package/src/tools/browser/auto-navigate.ts +270 -0
  274. package/src/tools/browser/browser-execution.ts +2 -1
  275. package/src/tools/browser/browser-manager.ts +2 -2
  276. package/src/tools/browser/network-recorder.ts +5 -4
  277. package/src/tools/browser/x-auto-navigate.ts +207 -0
  278. package/src/tools/calls/call-end.ts +17 -67
  279. package/src/tools/calls/call-start.ts +24 -85
  280. package/src/tools/calls/call-status.ts +35 -51
  281. package/src/tools/claude-code/claude-code.ts +77 -11
  282. package/src/tools/contacts/contact-merge.ts +46 -78
  283. package/src/tools/contacts/contact-search.ts +35 -79
  284. package/src/tools/contacts/contact-upsert.ts +35 -108
  285. package/src/tools/credentials/vault.ts +20 -4
  286. package/src/tools/document/document-tool.ts +71 -144
  287. package/src/tools/executor.ts +129 -10
  288. package/src/tools/followups/followup_create.ts +46 -88
  289. package/src/tools/followups/followup_list.ts +34 -74
  290. package/src/tools/followups/followup_resolve.ts +31 -66
  291. package/src/tools/host-terminal/cli-discover.ts +2 -1
  292. package/src/tools/host-terminal/host-shell.ts +10 -0
  293. package/src/tools/memory/handlers.ts +5 -4
  294. package/src/tools/network/__tests__/web-search.test.ts +427 -0
  295. package/src/tools/network/script-proxy/__tests__/logging.test.ts +248 -0
  296. package/src/tools/network/script-proxy/__tests__/policy.test.ts +234 -0
  297. package/src/tools/network/script-proxy/__tests__/router.test.ts +76 -0
  298. package/src/tools/network/web-fetch.ts +18 -6
  299. package/src/tools/playbooks/index.ts +4 -5
  300. package/src/tools/playbooks/playbook-create.ts +3 -47
  301. package/src/tools/playbooks/playbook-delete.ts +1 -25
  302. package/src/tools/playbooks/playbook-list.ts +1 -28
  303. package/src/tools/playbooks/playbook-update.ts +3 -51
  304. package/src/tools/reminder/reminder.ts +5 -78
  305. package/src/tools/schedule/create.ts +69 -74
  306. package/src/tools/schedule/delete.ts +21 -47
  307. package/src/tools/schedule/list.ts +55 -74
  308. package/src/tools/schedule/update.ts +77 -84
  309. package/src/tools/subagent/abort.ts +29 -58
  310. package/src/tools/subagent/message.ts +30 -63
  311. package/src/tools/subagent/read.ts +53 -84
  312. package/src/tools/subagent/spawn.ts +43 -82
  313. package/src/tools/subagent/status.ts +42 -71
  314. package/src/tools/swarm/delegate.ts +2 -1
  315. package/src/tools/tasks/index.ts +8 -8
  316. package/src/tools/tasks/task-delete.ts +60 -88
  317. package/src/tools/tasks/task-list.ts +31 -52
  318. package/src/tools/tasks/task-run.ts +72 -108
  319. package/src/tools/tasks/task-save.ts +33 -65
  320. package/src/tools/tasks/work-item-enqueue.ts +183 -215
  321. package/src/tools/tasks/work-item-list.ts +33 -63
  322. package/src/tools/tasks/work-item-remove.ts +45 -97
  323. package/src/tools/tasks/work-item-update.ts +91 -163
  324. package/src/tools/terminal/backends/native.ts +3 -1
  325. package/src/tools/tool-manifest.ts +0 -62
  326. package/src/tools/types.ts +6 -0
  327. package/src/tools/ui-surface/definitions.ts +3 -1
  328. package/src/tools/watch/screen-watch.ts +3 -1
  329. package/src/tools/watcher/create.ts +52 -98
  330. package/src/tools/watcher/delete.ts +20 -46
  331. package/src/tools/watcher/digest.ts +36 -70
  332. package/src/tools/watcher/list.ts +49 -79
  333. package/src/tools/watcher/update.ts +45 -91
  334. package/src/twitter/client.ts +690 -0
  335. package/src/twitter/session.ts +91 -0
  336. package/src/usage/types.ts +0 -1
  337. package/src/util/truncate.ts +6 -0
  338. package/src/watcher/providers/slack.ts +2 -1
  339. package/src/watcher/watcher-store.ts +3 -2
  340. package/src/work-items/work-item-store.ts +27 -2
  341. package/src/workspace/commit-message-enrichment-service.ts +31 -7
  342. package/src/workspace/git-service.ts +87 -22
  343. package/src/workspace/provider-commit-message-generator.ts +242 -0
  344. package/src/workspace/turn-commit.ts +62 -3
  345. package/src/tools/contacts/index.ts +0 -4
  346. package/src/tools/document/index.ts +0 -5
  347. package/src/tools/followups/index.ts +0 -3
  348. package/src/tools/subagent/index.ts +0 -5
  349. /package/src/__tests__/{memory-context-benchmark.test.ts → memory-context-benchmark.benchmark.test.ts} +0 -0
@@ -0,0 +1,462 @@
1
+ /**
2
+ * Session Initialization Benchmark
3
+ *
4
+ * Measures latency of key session startup components and end-to-end
5
+ * session creation timing (request to first-tool-ready state).
6
+ *
7
+ * Uses multi-sample median timing with warm-up runs to reduce sensitivity
8
+ * to host load and machine class. Thresholds are intentionally loose
9
+ * guardrails for catching regressions, not precise performance targets.
10
+ *
11
+ * Component targets (median of 5 runs):
12
+ * - initializeTools: < 100ms
13
+ * - buildSystemPrompt: < 50ms
14
+ * - getAllToolDefinitions: < 10ms
15
+ *
16
+ * End-to-end targets (median of 3 runs):
17
+ * - Session creation (no preactivated skills): < 200ms
18
+ * - Session creation (3 preactivated skills): < 300ms
19
+ * - Session constructor (sync, no loadFromDb): < 10ms
20
+ */
21
+ import { afterAll, describe, expect, mock, test } from 'bun:test';
22
+ import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs';
23
+ import { tmpdir } from 'node:os';
24
+ import { join } from 'node:path';
25
+
26
+ /** Return the median of a sorted-ascending array of numbers. */
27
+ function median(sorted: number[]): number {
28
+ const mid = Math.floor(sorted.length / 2);
29
+ return sorted.length % 2 === 0
30
+ ? (sorted[mid - 1] + sorted[mid]) / 2
31
+ : sorted[mid];
32
+ }
33
+
34
+ const testDir = mkdtempSync(join(tmpdir(), 'session-init-bench-'));
35
+
36
+ // Create subdirectories expected by platform helpers
37
+ mkdirSync(join(testDir, 'data'), { recursive: true });
38
+ mkdirSync(join(testDir, 'logs'), { recursive: true });
39
+ mkdirSync(join(testDir, 'skills'), { recursive: true });
40
+ mkdirSync(join(testDir, 'hooks'), { recursive: true });
41
+
42
+ // Seed minimal prompt files so buildSystemPrompt doesn't bail on missing files
43
+ writeFileSync(join(testDir, 'IDENTITY.md'), '# Test Identity\nYou are a test assistant.');
44
+ writeFileSync(join(testDir, 'SOUL.md'), '# Test Soul\nBe helpful.');
45
+ writeFileSync(join(testDir, 'USER.md'), '# Test User\nName: Benchmark Runner');
46
+
47
+ // Create real skill directories so projectSkillTools can load them from the catalog
48
+ const testSkillIds = ['bench-skill-a', 'bench-skill-b', 'bench-skill-c'];
49
+ for (const skillId of testSkillIds) {
50
+ const skillDir = join(testDir, 'skills', skillId);
51
+ mkdirSync(skillDir, { recursive: true });
52
+ writeFileSync(join(skillDir, 'SKILL.md'), [
53
+ '---',
54
+ `name: ${skillId}`,
55
+ `description: Benchmark test skill ${skillId}`,
56
+ '---',
57
+ `# ${skillId}`,
58
+ 'A test skill for benchmarking.',
59
+ ].join('\n'));
60
+ writeFileSync(join(skillDir, 'TOOLS.json'), JSON.stringify({
61
+ version: 1,
62
+ tools: [{
63
+ name: `${skillId}_tool`,
64
+ description: `Tool for ${skillId}`,
65
+ category: 'benchmark',
66
+ risk: 'low',
67
+ input_schema: { type: 'object', properties: {} },
68
+ executor: 'run.sh',
69
+ execution_target: 'host',
70
+ }],
71
+ }));
72
+ writeFileSync(join(skillDir, 'run.sh'), '#!/bin/sh\necho ok');
73
+ }
74
+
75
+ mock.module('../util/platform.js', () => ({
76
+ getDataDir: () => testDir,
77
+ getRootDir: () => testDir,
78
+ getWorkspaceDir: () => testDir,
79
+ getWorkspaceConfigPath: () => join(testDir, 'config.json'),
80
+ getWorkspaceSkillsDir: () => join(testDir, 'skills'),
81
+ getWorkspaceHooksDir: () => join(testDir, 'hooks'),
82
+ getWorkspacePromptPath: (file: string) => join(testDir, file),
83
+ getSocketPath: () => join(testDir, 'test.sock'),
84
+ getSessionTokenPath: () => join(testDir, 'session-token'),
85
+ getPidPath: () => join(testDir, 'test.pid'),
86
+ getDbPath: () => join(testDir, 'data', 'test.db'),
87
+ getLogPath: () => join(testDir, 'logs', 'test.log'),
88
+ getHistoryPath: () => join(testDir, 'history'),
89
+ getHooksDir: () => join(testDir, 'hooks'),
90
+ getIpcBlobDir: () => join(testDir, 'ipc-blobs'),
91
+ getSandboxRootDir: () => join(testDir, 'sandbox'),
92
+ getSandboxWorkingDir: () => testDir,
93
+ getInterfacesDir: () => join(testDir, 'interfaces'),
94
+ isMacOS: () => process.platform === 'darwin',
95
+ isLinux: () => process.platform === 'linux',
96
+ isWindows: () => process.platform === 'win32',
97
+ getPlatformName: () => process.platform,
98
+ getClipboardCommand: () => null,
99
+ removeSocketFile: () => {},
100
+ migratePath: () => {},
101
+ migrateToWorkspaceLayout: () => {},
102
+ migrateToDataLayout: () => {},
103
+ ensureDataDir: () => {},
104
+ }));
105
+
106
+ mock.module('../util/logger.js', () => ({
107
+ getLogger: () =>
108
+ new Proxy({} as Record<string, unknown>, {
109
+ get: () => () => {},
110
+ }),
111
+ getCliLogger: () =>
112
+ new Proxy({} as Record<string, unknown>, {
113
+ get: () => () => {},
114
+ }),
115
+ isDebug: () => false,
116
+ truncateForLog: (value: string, maxLen = 500) =>
117
+ value.length > maxLen ? value.slice(0, maxLen) + '...' : value,
118
+ initLogger: () => {},
119
+ pruneOldLogFiles: () => 0,
120
+ }));
121
+
122
+ const mockConfig = {
123
+ model: 'mock-model',
124
+ provider: 'mock',
125
+ sandbox: { enabled: false, backend: 'native' },
126
+ contextWindow: {
127
+ enabled: true,
128
+ maxInputTokens: 180000,
129
+ targetInputTokens: 110000,
130
+ compactThreshold: 0.8,
131
+ preserveRecentUserTurns: 8,
132
+ summaryMaxTokens: 1200,
133
+ },
134
+ thinking: { enabled: false, budgetTokens: 10000 },
135
+ };
136
+
137
+ mock.module('../config/loader.js', () => ({
138
+ API_KEY_PROVIDERS: ['anthropic', 'openai', 'gemini', 'ollama', 'fireworks', 'brave', 'perplexity'],
139
+ getConfig: () => mockConfig,
140
+ loadConfig: () => mockConfig,
141
+ saveConfig: () => {},
142
+ invalidateConfigCache: () => {},
143
+ loadRawConfig: () => ({}),
144
+ saveRawConfig: () => {},
145
+ getNestedValue: () => undefined,
146
+ setNestedValue: () => {},
147
+ }));
148
+
149
+ // Additional mocks required for Session constructor and end-to-end tests
150
+
151
+ mock.module('../memory/conversation-store.js', () => ({
152
+ addMessage: () => ({ id: 'msg-1' }),
153
+ getMessages: () => [],
154
+ listConversations: () => [],
155
+ getConversation: () => null,
156
+ getLatestConversation: () => null,
157
+ createConversation: () => ({ id: 'bench-conv', title: 'Bench', threadType: 'standard' }),
158
+ clearAll: () => {},
159
+ getConversationThreadType: () => 'standard',
160
+ getConversationMemoryScopeId: () => 'default',
161
+ updateConversationTitle: () => {},
162
+ }));
163
+
164
+ mock.module('../hooks/manager.js', () => ({
165
+ getHookManager: () => ({
166
+ trigger: () => Promise.resolve(),
167
+ initialize: () => {},
168
+ }),
169
+ }));
170
+
171
+ mock.module('../tools/watch/watch-state.js', () => ({
172
+ watchSessions: new Map(),
173
+ registerWatchStartNotifier: () => {},
174
+ unregisterWatchStartNotifier: () => {},
175
+ fireWatchStartNotifier: () => {},
176
+ registerWatchCommentaryNotifier: () => {},
177
+ unregisterWatchCommentaryNotifier: () => {},
178
+ fireWatchCommentaryNotifier: () => {},
179
+ registerWatchCompletionNotifier: () => {},
180
+ unregisterWatchCompletionNotifier: () => {},
181
+ fireWatchCompletionNotifier: () => {},
182
+ getActiveWatchSession: () => undefined,
183
+ addObservation: () => {},
184
+ pruneWatchSessions: () => {},
185
+ }));
186
+
187
+ mock.module('../calls/call-state.js', () => ({
188
+ registerCallQuestionNotifier: () => {},
189
+ unregisterCallQuestionNotifier: () => {},
190
+ fireCallQuestionNotifier: () => {},
191
+ registerCallCompletionNotifier: () => {},
192
+ unregisterCallCompletionNotifier: () => {},
193
+ fireCallCompletionNotifier: () => {},
194
+ registerCallOrchestrator: () => {},
195
+ unregisterCallOrchestrator: () => {},
196
+ getCallOrchestrator: () => undefined,
197
+ }));
198
+
199
+ mock.module('../calls/call-store.js', () => ({
200
+ createCallSession: () => ({ id: 'mock' }),
201
+ getCallSession: () => null,
202
+ getCallSessionByCallSid: () => null,
203
+ getActiveCallSessionForConversation: () => null,
204
+ updateCallSession: () => {},
205
+ listRecoverableCalls: () => [],
206
+ recordCallEvent: () => {},
207
+ getCallEvents: () => [],
208
+ createPendingQuestion: () => ({ id: 'mock' }),
209
+ getPendingQuestion: () => null,
210
+ answerPendingQuestion: () => {},
211
+ expirePendingQuestions: () => {},
212
+ buildCallbackDedupeKey: () => '',
213
+ isCallbackProcessed: () => false,
214
+ recordProcessedCallback: () => {},
215
+ tryRecordProcessedCallback: () => true,
216
+ claimCallback: () => true,
217
+ releaseCallbackClaim: () => {},
218
+ }));
219
+
220
+ mock.module('../daemon/watch-handler.js', () => ({
221
+ lastCommentaryBySession: new Map(),
222
+ lastSummaryBySession: new Map(),
223
+ }));
224
+
225
+ mock.module('../tools/browser/browser-screencast.js', () => ({
226
+ registerSessionSender: () => {},
227
+ unregisterSessionSender: () => {},
228
+ ensureScreencast: () => Promise.resolve(),
229
+ updateBrowserStatus: () => {},
230
+ updatePagesList: () => Promise.resolve(),
231
+ stopBrowserScreencast: () => Promise.resolve(),
232
+ getElementBounds: () => Promise.resolve(null),
233
+ updateHighlights: () => {},
234
+ stopAllScreencasts: () => Promise.resolve(),
235
+ isScreencastActive: () => false,
236
+ getSender: () => undefined,
237
+ getScreencastSurfaceId: () => null,
238
+ }));
239
+
240
+ mock.module('../services/published-app-updater.js', () => ({
241
+ updatePublishedAppDeployment: () => Promise.resolve(),
242
+ }));
243
+
244
+ const { initializeTools, getAllToolDefinitions, __resetRegistryForTesting } = await import(
245
+ '../tools/registry.js'
246
+ );
247
+ const { buildSystemPrompt } = await import('../config/system-prompt.js');
248
+ const { Session } = await import('../daemon/session.js');
249
+ const { projectSkillTools, resetSkillToolProjection } = await import(
250
+ '../daemon/session-skill-tools.js'
251
+ );
252
+ import type { Provider } from '../providers/types.js';
253
+
254
+ afterAll(() => {
255
+ __resetRegistryForTesting();
256
+ try {
257
+ rmSync(testDir, { recursive: true });
258
+ } catch {
259
+ // best-effort cleanup
260
+ }
261
+ });
262
+
263
+ describe('Session initialization benchmark', () => {
264
+ test('initializeTools completes under 100ms (median of 5)', async () => {
265
+ // Warm-up run to eliminate JIT / lazy-load overhead
266
+ __resetRegistryForTesting();
267
+ await initializeTools();
268
+
269
+ const timings: number[] = [];
270
+ for (let i = 0; i < 5; i++) {
271
+ __resetRegistryForTesting();
272
+ const start = performance.now();
273
+ await initializeTools();
274
+ timings.push(performance.now() - start);
275
+ }
276
+
277
+ timings.sort((a, b) => a - b);
278
+ expect(median(timings)).toBeLessThan(100);
279
+ });
280
+
281
+ test('getAllToolDefinitions retrieves definitions under 10ms (median of 5)', async () => {
282
+ await initializeTools();
283
+
284
+ // Warm-up
285
+ getAllToolDefinitions();
286
+
287
+ const timings: number[] = [];
288
+ for (let i = 0; i < 5; i++) {
289
+ const start = performance.now();
290
+ const definitions = getAllToolDefinitions();
291
+ timings.push(performance.now() - start);
292
+ if (i === 0) expect(definitions.length).toBeGreaterThan(0);
293
+ }
294
+
295
+ timings.sort((a, b) => a - b);
296
+ expect(median(timings)).toBeLessThan(10);
297
+ });
298
+
299
+ test('buildSystemPrompt assembles prompt under 50ms (median of 5)', () => {
300
+ // Warm-up
301
+ buildSystemPrompt();
302
+
303
+ const timings: number[] = [];
304
+ for (let i = 0; i < 5; i++) {
305
+ const start = performance.now();
306
+ const prompt = buildSystemPrompt();
307
+ timings.push(performance.now() - start);
308
+ if (i === 0) {
309
+ expect(prompt.length).toBeGreaterThan(0);
310
+ expect(prompt).toContain('Test Identity');
311
+ }
312
+ }
313
+
314
+ timings.sort((a, b) => a - b);
315
+ expect(median(timings)).toBeLessThan(50);
316
+ });
317
+
318
+ test('repeated buildSystemPrompt calls are consistently fast (10 iterations)', () => {
319
+ const timings: number[] = [];
320
+ for (let i = 0; i < 10; i++) {
321
+ const start = performance.now();
322
+ buildSystemPrompt();
323
+ timings.push(performance.now() - start);
324
+ }
325
+
326
+ const maxTime = Math.max(...timings);
327
+ const avgTime = timings.reduce((a, b) => a + b, 0) / timings.length;
328
+
329
+ // Each call should be under 50ms, average well under 20ms
330
+ expect(maxTime).toBeLessThan(50);
331
+ expect(avgTime).toBeLessThan(20);
332
+ });
333
+
334
+ test('tool definitions count stays within expected range after init', async () => {
335
+ await initializeTools();
336
+ const definitions = getAllToolDefinitions();
337
+
338
+ // Sanity: we expect a meaningful number of core tools (at least 20)
339
+ // but not an unreasonable explosion (under 200)
340
+ expect(definitions.length).toBeGreaterThanOrEqual(20);
341
+ expect(definitions.length).toBeLessThan(200);
342
+ });
343
+ });
344
+
345
+ describe('End-to-end session creation benchmark', () => {
346
+ // Uses the real Session constructor + loadFromDb() path, which wires up
347
+ // the tool executor, event bus, agent loop, context window manager, and
348
+ // notifiers. Note: the daemon's getOrCreateSession() adds provider
349
+ // construction, rate limiting, concurrency guards, and evictor management
350
+ // on top — those are lightweight config-driven operations not benchmarked
351
+ // here.
352
+
353
+ const mockProvider: Provider = {
354
+ name: 'mock',
355
+ sendMessage: () =>
356
+ Promise.resolve({
357
+ content: [{ type: 'text' as const, text: 'ok' }],
358
+ model: 'mock-model',
359
+ usage: { inputTokens: 0, outputTokens: 0 },
360
+ stopReason: 'end_turn',
361
+ }),
362
+ };
363
+ const noop = () => {};
364
+
365
+ test('session creation without preactivated skills completes under 200ms (median of 3)', async () => {
366
+ __resetRegistryForTesting();
367
+ await initializeTools();
368
+ const systemPrompt = buildSystemPrompt();
369
+
370
+ // Warm-up run
371
+ const warmup = new Session('bench-warmup-0', mockProvider, systemPrompt, 64000, noop, testDir);
372
+ await warmup.loadFromDb();
373
+ warmup.dispose();
374
+
375
+ const timings: number[] = [];
376
+ for (let i = 0; i < 3; i++) {
377
+ const id = `bench-no-skills-${i}`;
378
+ const start = performance.now();
379
+ const session = new Session(id, mockProvider, systemPrompt, 64000, noop, testDir);
380
+ await session.loadFromDb();
381
+ timings.push(performance.now() - start);
382
+
383
+ if (i === 0) {
384
+ expect(session.conversationId).toBe(id);
385
+ expect(session.getMessages()).toHaveLength(0);
386
+ }
387
+ session.dispose();
388
+ }
389
+
390
+ timings.sort((a, b) => a - b);
391
+ expect(median(timings)).toBeLessThan(200);
392
+ });
393
+
394
+ test('session creation with 3 preactivated skills completes under 300ms (median of 3)', async () => {
395
+ __resetRegistryForTesting();
396
+ await initializeTools();
397
+ const systemPrompt = buildSystemPrompt();
398
+
399
+ // Warm-up run — includes skill projection so manifest loading is JIT'd
400
+ const warmup = new Session('bench-warmup-s', mockProvider, systemPrompt, 64000, noop, testDir);
401
+ warmup.preactivatedSkillIds = testSkillIds;
402
+ await warmup.loadFromDb();
403
+ projectSkillTools([], {
404
+ preactivatedSkillIds: warmup.preactivatedSkillIds,
405
+ previouslyActiveSkillIds: warmup.skillProjectionState,
406
+ cache: warmup.skillProjectionCache,
407
+ });
408
+ resetSkillToolProjection(warmup.skillProjectionState);
409
+ warmup.dispose();
410
+
411
+ const timings: number[] = [];
412
+ for (let i = 0; i < 3; i++) {
413
+ const id = `bench-with-skills-${i}`;
414
+ const start = performance.now();
415
+ const session = new Session(id, mockProvider, systemPrompt, 64000, noop, testDir);
416
+ session.preactivatedSkillIds = testSkillIds;
417
+ await session.loadFromDb();
418
+ // Skill projection runs at agent turn time, not during loadFromDb.
419
+ // Include it here to measure the full first-tool-ready path.
420
+ const projection = projectSkillTools([], {
421
+ preactivatedSkillIds: session.preactivatedSkillIds,
422
+ previouslyActiveSkillIds: session.skillProjectionState,
423
+ cache: session.skillProjectionCache,
424
+ });
425
+ timings.push(performance.now() - start);
426
+
427
+ if (i === 0) {
428
+ expect(session.conversationId).toBe(id);
429
+ expect(session.getMessages()).toHaveLength(0);
430
+ expect(projection.toolDefinitions.length).toBe(testSkillIds.length);
431
+ }
432
+ resetSkillToolProjection(session.skillProjectionState);
433
+ session.dispose();
434
+ }
435
+
436
+ timings.sort((a, b) => a - b);
437
+ expect(median(timings)).toBeLessThan(300);
438
+ });
439
+
440
+ test('Session constructor (sync, no loadFromDb) completes under 10ms (median of 5)', () => {
441
+ const systemPrompt = buildSystemPrompt();
442
+
443
+ // Warm-up
444
+ const warmup = new Session('bench-events-w', mockProvider, systemPrompt, 64000, noop, testDir);
445
+ warmup.dispose();
446
+
447
+ const timings: number[] = [];
448
+ for (let i = 0; i < 5; i++) {
449
+ const start = performance.now();
450
+ const session = new Session(`bench-events-${i}`, mockProvider, systemPrompt, 64000, noop, testDir);
451
+ timings.push(performance.now() - start);
452
+
453
+ if (i === 0) {
454
+ expect(session.eventBus.listenerCount()).toBeGreaterThan(0);
455
+ }
456
+ session.dispose();
457
+ }
458
+
459
+ timings.sort((a, b) => a - b);
460
+ expect(median(timings)).toBeLessThan(10);
461
+ });
462
+ });
@@ -1,4 +1,4 @@
1
- import { describe, expect, mock, test, beforeEach } from 'bun:test';
1
+ import { describe, expect, mock, test, beforeEach, afterAll } from 'bun:test';
2
2
  import { rmSync, writeFileSync } from 'node:fs';
3
3
  import type { Message, ProviderResponse } from '../providers/types.js';
4
4
  import type { AgentEvent, CheckpointInfo, CheckpointDecision } from '../agent/loop.js';
@@ -8,8 +8,16 @@ import type { ServerMessage } from '../daemon/ipc-protocol.js';
8
8
  // Mocks — must precede the Session import so Bun applies them at load time.
9
9
  // ---------------------------------------------------------------------------
10
10
 
11
+ function makeLoggerStub(): Record<string, unknown> {
12
+ const stub: Record<string, unknown> = {};
13
+ for (const m of ['info', 'warn', 'error', 'debug', 'trace', 'fatal', 'silent', 'child']) {
14
+ stub[m] = m === 'child' ? () => makeLoggerStub() : () => {};
15
+ }
16
+ return stub;
17
+ }
18
+
11
19
  mock.module('../util/logger.js', () => ({
12
- getLogger: () => new Proxy({} as Record<string, unknown>, { get: () => () => {} }),
20
+ getLogger: () => makeLoggerStub(),
13
21
  }));
14
22
 
15
23
  mock.module('../util/platform.js', () => ({
@@ -139,28 +147,12 @@ mock.module('../context/window-manager.js', () => ({
139
147
  }));
140
148
 
141
149
  // ---------------------------------------------------------------------------
142
- // Workspace/git + attachment mocks.
150
+ // Workspace/git turn-commit test hooks.
143
151
  // ---------------------------------------------------------------------------
144
152
 
145
153
  const turnCommitCalls: Array<{ workspaceDir: string; sessionId: string; turnNumber: number }> = [];
146
154
  let turnCommitHangForever = false;
147
155
 
148
- mock.module('../workspace/git-service.js', () => ({
149
- getWorkspaceGitService: () => ({
150
- ensureInitialized: async () => {},
151
- }),
152
- }));
153
-
154
- mock.module('../workspace/turn-commit.js', () => ({
155
- commitTurnChanges: async (workspaceDir: string, sessionId: string, turnNumber: number) => {
156
- turnCommitCalls.push({ workspaceDir, sessionId, turnNumber });
157
- if (turnCommitHangForever) {
158
- // Simulate a commit that never resolves within the timeout budget
159
- await new Promise<void>(() => {});
160
- }
161
- },
162
- }));
163
-
164
156
  // ---------------------------------------------------------------------------
165
157
  // Usage event capture for request-ID correlation tests.
166
158
  // ---------------------------------------------------------------------------
@@ -223,6 +215,17 @@ mock.module('../agent/loop.js', () => ({
223
215
  import { Session, MAX_QUEUE_DEPTH } from '../daemon/session.js';
224
216
  import type { QueueDrainReason, QueuePolicy } from '../daemon/session.js';
225
217
 
218
+ type SessionWithWorkspaceDeps = Session & {
219
+ getWorkspaceGitService?: (_workspaceDir: string) => { ensureInitialized: () => Promise<void> };
220
+ commitTurnChanges?: (
221
+ workspaceDir: string,
222
+ sessionId: string,
223
+ turnNumber: number,
224
+ provider?: unknown,
225
+ deadlineMs?: number,
226
+ ) => Promise<void>;
227
+ };
228
+
226
229
  function makeSession(sendToClient?: (msg: ServerMessage) => void): Session {
227
230
  const provider = {
228
231
  name: 'mock',
@@ -235,7 +238,19 @@ function makeSession(sendToClient?: (msg: ServerMessage) => void): Session {
235
238
  };
236
239
  },
237
240
  };
238
- return new Session('conv-1', provider, 'system prompt', 4096, sendToClient ?? (() => {}), '/tmp');
241
+ const session = new Session('conv-1', provider, 'system prompt', 4096, sendToClient ?? (() => {}), '/tmp');
242
+ const sessionWithWorkspaceDeps = session as SessionWithWorkspaceDeps;
243
+ sessionWithWorkspaceDeps.getWorkspaceGitService = () => ({
244
+ ensureInitialized: async () => {},
245
+ });
246
+ sessionWithWorkspaceDeps.commitTurnChanges = async (workspaceDir: string, sessionId: string, turnNumber: number) => {
247
+ turnCommitCalls.push({ workspaceDir, sessionId, turnNumber });
248
+ if (turnCommitHangForever) {
249
+ // Simulate a commit that never resolves within the timeout budget
250
+ await new Promise<void>(() => {});
251
+ }
252
+ };
253
+ return session;
239
254
  }
240
255
 
241
256
  /**
@@ -289,6 +304,10 @@ beforeEach(() => {
289
304
  linkAttachmentShouldThrow = false;
290
305
  });
291
306
 
307
+ afterAll(() => {
308
+ mock.restore();
309
+ });
310
+
292
311
  // ---------------------------------------------------------------------------
293
312
  // Tests
294
313
  // ---------------------------------------------------------------------------
@@ -1472,41 +1491,45 @@ describe('Regression: cancel semantics and error channel split', () => {
1472
1491
 
1473
1492
  turnCommitHangForever = true;
1474
1493
 
1475
- const events1: ServerMessage[] = [];
1476
- const events2: ServerMessage[] = [];
1494
+ try {
1495
+ const events1: ServerMessage[] = [];
1496
+ const events2: ServerMessage[] = [];
1477
1497
 
1478
- // Start first message (promise intentionally not awaited — we test queue drain behavior)
1479
- const _p1 = session.processMessage('msg-1', [], (e) => events1.push(e), 'req-1');
1480
- await waitForPendingRun(1);
1498
+ // Start first message (promise intentionally not awaited — we test queue drain behavior)
1499
+ const _p1 = session.processMessage('msg-1', [], (e) => events1.push(e), 'req-1');
1500
+ await waitForPendingRun(1);
1481
1501
 
1482
- // Enqueue a second message while the first is processing
1483
- session.enqueueMessage('msg-2', [], (e) => events2.push(e), 'req-2');
1502
+ // Enqueue a second message while the first is processing
1503
+ session.enqueueMessage('msg-2', [], (e) => events2.push(e), 'req-2');
1484
1504
 
1485
- // Complete the first agent loop run
1486
- resolveRun(0);
1505
+ // Complete the first agent loop run
1506
+ resolveRun(0);
1487
1507
 
1488
- // The turn should still complete (timeout fires) and drain the queue
1489
- // even though commitTurnChanges never resolves.
1490
- // The default turnCommitMaxWaitMs is 4000ms in the config mock,
1491
- // but the mock config doesn't set it, so it defaults to 4000ms.
1492
- // We wait for the second run to be registered, which proves the
1493
- // turn completed and the queue drained despite the hanging commit.
1494
- await waitForPendingRun(2, 10_000);
1508
+ // The turn should still complete (timeout fires) and drain the queue
1509
+ // even though commitTurnChanges never resolves.
1510
+ // The default turnCommitMaxWaitMs is 4000ms in the config mock,
1511
+ // but the mock config doesn't set it, so it defaults to 4000ms.
1512
+ // We wait for the second run to be registered, which proves the
1513
+ // turn completed and the queue drained despite the hanging commit.
1514
+ await waitForPendingRun(2, 10_000);
1495
1515
 
1496
- // First message should have completed
1497
- const completion1 = events1.find((e) => e.type === 'message_complete');
1498
- expect(completion1).toBeDefined();
1516
+ // First message should have completed
1517
+ const completion1 = events1.find((e) => e.type === 'message_complete');
1518
+ expect(completion1).toBeDefined();
1499
1519
 
1500
- // Second message should have been dequeued
1501
- const dequeued = events2.find((e) => e.type === 'message_dequeued');
1502
- expect(dequeued).toBeDefined();
1520
+ // Second message should have been dequeued
1521
+ const dequeued = events2.find((e) => e.type === 'message_dequeued');
1522
+ expect(dequeued).toBeDefined();
1503
1523
 
1504
- // The turn commit should have been called
1505
- expect(turnCommitCalls).toHaveLength(1);
1524
+ // The turn commit should have been called
1525
+ expect(turnCommitCalls).toHaveLength(1);
1506
1526
 
1507
- // Complete the second run so the test can clean up
1508
- turnCommitHangForever = false;
1509
- resolveRun(1);
1510
- await new Promise((r) => setTimeout(r, 50));
1527
+ // Complete the second run so the test can clean up
1528
+ turnCommitHangForever = false;
1529
+ resolveRun(1);
1530
+ await new Promise((r) => setTimeout(r, 50));
1531
+ } finally {
1532
+ turnCommitHangForever = false;
1533
+ }
1511
1534
  }, 15_000);
1512
1535
  });