vellum 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (349) hide show
  1. package/README.md +15 -2
  2. package/bun.lock +5 -2
  3. package/package.json +4 -2
  4. package/scripts/capture-x-graphql.ts +562 -0
  5. package/scripts/ipc/check-swift-decoder-drift.ts +2 -1
  6. package/scripts/test.sh +5 -0
  7. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +133 -34
  8. package/src/__tests__/account-registry.test.ts +2 -1
  9. package/src/__tests__/agent-heartbeat-service.test.ts +250 -0
  10. package/src/__tests__/asset-materialize-tool.test.ts +16 -15
  11. package/src/__tests__/asset-search-tool.test.ts +23 -22
  12. package/src/__tests__/attachments-store.test.ts +56 -127
  13. package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +5 -4
  14. package/src/__tests__/browser-skill-endstate.test.ts +4 -3
  15. package/src/__tests__/call-bridge.test.ts +385 -0
  16. package/src/__tests__/call-constants.test.ts +40 -0
  17. package/src/__tests__/call-orchestrator.test.ts +130 -4
  18. package/src/__tests__/call-recovery.test.ts +518 -0
  19. package/src/__tests__/call-routes-http.test.ts +459 -0
  20. package/src/__tests__/call-state-machine.test.ts +143 -0
  21. package/src/__tests__/call-store.test.ts +216 -1
  22. package/src/__tests__/cli-discover.test.ts +1 -1
  23. package/src/__tests__/commit-message-enrichment-service.test.ts +148 -7
  24. package/src/__tests__/compaction.benchmark.test.ts +176 -0
  25. package/src/__tests__/computer-use-tools.test.ts +250 -0
  26. package/src/__tests__/config-schema.test.ts +299 -3
  27. package/src/__tests__/conflict-store.test.ts +2 -1
  28. package/src/__tests__/contacts-tools.test.ts +331 -0
  29. package/src/__tests__/conversation-store.test.ts +30 -32
  30. package/src/__tests__/credential-security-invariants.test.ts +4 -0
  31. package/src/__tests__/date-context.test.ts +373 -0
  32. package/src/__tests__/db-schedule-syntax-migration.test.ts +129 -0
  33. package/src/__tests__/fixtures/media-reuse-fixtures.ts +3 -3
  34. package/src/__tests__/followup-tools.test.ts +303 -0
  35. package/src/__tests__/handlers-twitter-config.test.ts +718 -0
  36. package/src/__tests__/intent-routing.test.ts +64 -57
  37. package/src/__tests__/ipc-roundtrip.benchmark.test.ts +237 -0
  38. package/src/__tests__/ipc-snapshot.test.ts +62 -28
  39. package/src/__tests__/llm-usage-store.test.ts +3 -8
  40. package/src/__tests__/media-generate-image.test.ts +1 -1
  41. package/src/__tests__/media-reuse-story.e2e.test.ts +7 -7
  42. package/src/__tests__/memory-retrieval.benchmark.test.ts +430 -0
  43. package/src/__tests__/parallel-tool.benchmark.test.ts +294 -0
  44. package/src/__tests__/playbook-tools.test.ts +342 -0
  45. package/src/__tests__/profile-compiler.test.ts +2 -1
  46. package/src/__tests__/provider-streaming.benchmark.test.ts +773 -0
  47. package/src/__tests__/recurrence-engine-rruleset.test.ts +78 -0
  48. package/src/__tests__/recurrence-engine.test.ts +69 -0
  49. package/src/__tests__/recurrence-types.test.ts +71 -0
  50. package/src/__tests__/registry.test.ts +5 -3
  51. package/src/__tests__/relay-server.test.ts +633 -0
  52. package/src/__tests__/reminder-store.test.ts +6 -3
  53. package/src/__tests__/reminder.test.ts +43 -77
  54. package/src/__tests__/run-orchestrator-assistant-events.test.ts +8 -4
  55. package/src/__tests__/run-orchestrator.test.ts +4 -4
  56. package/src/__tests__/runtime-attachment-metadata.test.ts +7 -6
  57. package/src/__tests__/runtime-runs-http.test.ts +4 -4
  58. package/src/__tests__/runtime-runs.test.ts +4 -4
  59. package/src/__tests__/schedule-store.test.ts +482 -0
  60. package/src/__tests__/schedule-tools.test.ts +700 -0
  61. package/src/__tests__/scheduler-recurrence.test.ts +329 -0
  62. package/src/__tests__/server-history-render.test.ts +14 -13
  63. package/src/__tests__/session-error.test.ts +28 -0
  64. package/src/__tests__/session-init.benchmark.test.ts +462 -0
  65. package/src/__tests__/session-queue.test.ts +71 -48
  66. package/src/__tests__/session-runtime-assembly.test.ts +161 -0
  67. package/src/__tests__/session-surfaces-task-progress.test.ts +104 -0
  68. package/src/__tests__/signup-e2e.test.ts +2 -1
  69. package/src/__tests__/skill-projection.benchmark.test.ts +328 -0
  70. package/src/__tests__/skill-script-runner.test.ts +159 -0
  71. package/src/__tests__/speaker-identification.test.ts +52 -0
  72. package/src/__tests__/subagent-manager-notify.test.ts +42 -10
  73. package/src/__tests__/subagent-tools.test.ts +141 -41
  74. package/src/__tests__/task-compiler.test.ts +2 -1
  75. package/src/__tests__/task-runner.test.ts +2 -1
  76. package/src/__tests__/task-scheduler.test.ts +2 -1
  77. package/src/__tests__/task-tools.test.ts +49 -56
  78. package/src/__tests__/tool-audit-listener.test.ts +1 -0
  79. package/src/__tests__/tool-domain-event-publisher.test.ts +2 -0
  80. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +500 -0
  81. package/src/__tests__/tool-executor.test.ts +13 -17
  82. package/src/__tests__/turn-commit.test.ts +218 -3
  83. package/src/__tests__/twilio-provider.test.ts +143 -0
  84. package/src/__tests__/twilio-routes.test.ts +789 -0
  85. package/src/__tests__/twitter-auth-handler.test.ts +581 -0
  86. package/src/__tests__/view-image-tool.test.ts +217 -0
  87. package/src/__tests__/workspace-git-service.test.ts +186 -0
  88. package/src/__tests__/workspace-heartbeat-service.test.ts +13 -3
  89. package/src/agent-heartbeat/agent-heartbeat-service.ts +155 -0
  90. package/src/bundler/app-bundler.ts +12 -8
  91. package/src/calls/call-bridge.ts +95 -0
  92. package/src/calls/call-constants.ts +43 -5
  93. package/src/calls/call-domain.ts +276 -0
  94. package/src/calls/call-orchestrator.ts +43 -17
  95. package/src/calls/call-recovery.ts +207 -0
  96. package/src/calls/call-state-machine.ts +68 -0
  97. package/src/calls/call-store.ts +192 -5
  98. package/src/calls/relay-server.ts +41 -4
  99. package/src/calls/speaker-identification.ts +213 -0
  100. package/src/calls/twilio-provider.ts +10 -6
  101. package/src/calls/twilio-routes.ts +90 -76
  102. package/src/calls/types.ts +1 -1
  103. package/src/cli/config-commands.ts +334 -0
  104. package/src/cli/core-commands.ts +776 -0
  105. package/src/cli/doordash.ts +251 -1
  106. package/src/cli/ipc-client.ts +82 -0
  107. package/src/cli/map.ts +246 -0
  108. package/src/cli/twitter.ts +575 -0
  109. package/src/cli.ts +7 -5
  110. package/src/commands/__tests__/cc-command-registry.test.ts +319 -0
  111. package/src/commands/cc-command-registry.ts +209 -0
  112. package/src/config/bundled-skills/contacts/SKILL.md +39 -0
  113. package/src/config/bundled-skills/contacts/TOOLS.json +122 -0
  114. package/src/config/bundled-skills/contacts/tools/contact-merge.ts +9 -0
  115. package/src/config/bundled-skills/contacts/tools/contact-search.ts +9 -0
  116. package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +9 -0
  117. package/src/config/bundled-skills/document/SKILL.md +18 -0
  118. package/src/config/bundled-skills/document/TOOLS.json +53 -0
  119. package/src/config/bundled-skills/document/tools/document-create.ts +9 -0
  120. package/src/config/bundled-skills/document/tools/document-update.ts +9 -0
  121. package/src/config/bundled-skills/doordash/SKILL.md +82 -23
  122. package/src/config/bundled-skills/followups/SKILL.md +32 -0
  123. package/src/config/bundled-skills/followups/TOOLS.json +100 -0
  124. package/src/config/bundled-skills/followups/tools/followup-create.ts +9 -0
  125. package/src/config/bundled-skills/followups/tools/followup-list.ts +9 -0
  126. package/src/config/bundled-skills/followups/tools/followup-resolve.ts +9 -0
  127. package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +1 -23
  128. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +2 -1
  129. package/src/config/bundled-skills/playbooks/SKILL.md +31 -0
  130. package/src/config/bundled-skills/playbooks/TOOLS.json +126 -0
  131. package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +9 -0
  132. package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +9 -0
  133. package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +9 -0
  134. package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +9 -0
  135. package/src/config/bundled-skills/reminder/SKILL.md +20 -0
  136. package/src/config/bundled-skills/reminder/TOOLS.json +67 -0
  137. package/src/config/bundled-skills/reminder/tools/reminder-cancel.ts +9 -0
  138. package/src/config/bundled-skills/reminder/tools/reminder-create.ts +9 -0
  139. package/src/config/bundled-skills/reminder/tools/reminder-list.ts +9 -0
  140. package/src/config/bundled-skills/schedule/SKILL.md +74 -0
  141. package/src/config/bundled-skills/schedule/TOOLS.json +135 -0
  142. package/src/config/bundled-skills/schedule/tools/schedule-create.ts +9 -0
  143. package/src/config/bundled-skills/schedule/tools/schedule-delete.ts +9 -0
  144. package/src/config/bundled-skills/schedule/tools/schedule-list.ts +9 -0
  145. package/src/config/bundled-skills/schedule/tools/schedule-update.ts +9 -0
  146. package/src/config/bundled-skills/subagent/SKILL.md +25 -0
  147. package/src/config/bundled-skills/subagent/TOOLS.json +107 -0
  148. package/src/config/bundled-skills/subagent/tools/subagent-abort.ts +9 -0
  149. package/src/config/bundled-skills/subagent/tools/subagent-message.ts +9 -0
  150. package/src/config/bundled-skills/subagent/tools/subagent-read.ts +9 -0
  151. package/src/config/bundled-skills/subagent/tools/subagent-spawn.ts +9 -0
  152. package/src/config/bundled-skills/subagent/tools/subagent-status.ts +9 -0
  153. package/src/config/bundled-skills/tasks/SKILL.md +28 -0
  154. package/src/config/bundled-skills/tasks/TOOLS.json +256 -0
  155. package/src/config/bundled-skills/tasks/tools/task-delete.ts +9 -0
  156. package/src/config/bundled-skills/tasks/tools/task-list-add.ts +9 -0
  157. package/src/config/bundled-skills/tasks/tools/task-list-remove.ts +9 -0
  158. package/src/config/bundled-skills/tasks/tools/task-list-show.ts +9 -0
  159. package/src/config/bundled-skills/tasks/tools/task-list-update.ts +9 -0
  160. package/src/config/bundled-skills/tasks/tools/task-list.ts +9 -0
  161. package/src/config/bundled-skills/tasks/tools/task-run.ts +9 -0
  162. package/src/config/bundled-skills/tasks/tools/task-save.ts +9 -0
  163. package/src/config/bundled-skills/twitter/SKILL.md +134 -0
  164. package/src/config/bundled-skills/watcher/SKILL.md +27 -0
  165. package/src/config/bundled-skills/watcher/TOOLS.json +147 -0
  166. package/src/config/bundled-skills/watcher/tools/watcher-create.ts +9 -0
  167. package/src/config/bundled-skills/watcher/tools/watcher-delete.ts +9 -0
  168. package/src/config/bundled-skills/watcher/tools/watcher-digest.ts +9 -0
  169. package/src/config/bundled-skills/watcher/tools/watcher-list.ts +9 -0
  170. package/src/config/bundled-skills/watcher/tools/watcher-update.ts +9 -0
  171. package/src/config/defaults.ts +33 -0
  172. package/src/config/loader.ts +4 -1
  173. package/src/config/schema.ts +161 -1
  174. package/src/config/system-prompt.ts +61 -16
  175. package/src/config/templates/IDENTITY.md +7 -0
  176. package/src/config/types.ts +4 -0
  177. package/src/contacts/contact-store.ts +4 -4
  178. package/src/daemon/assistant-attachments.ts +10 -0
  179. package/src/daemon/classifier.ts +3 -1
  180. package/src/daemon/computer-use-session.ts +3 -1
  181. package/src/daemon/date-context.ts +136 -0
  182. package/src/daemon/handlers/apps.ts +16 -1
  183. package/src/daemon/handlers/browser.ts +54 -0
  184. package/src/daemon/handlers/computer-use.ts +7 -1
  185. package/src/daemon/handlers/config.ts +163 -5
  186. package/src/daemon/handlers/diagnostics.ts +5 -1
  187. package/src/daemon/handlers/documents.ts +18 -29
  188. package/src/daemon/handlers/home-base.ts +5 -1
  189. package/src/daemon/handlers/index.ts +40 -277
  190. package/src/daemon/handlers/misc.ts +9 -1
  191. package/src/daemon/handlers/publish.ts +6 -1
  192. package/src/daemon/handlers/sessions.ts +65 -12
  193. package/src/daemon/handlers/shared.ts +36 -1
  194. package/src/daemon/handlers/signing.ts +37 -0
  195. package/src/daemon/handlers/skills.ts +20 -6
  196. package/src/daemon/handlers/subagents.ts +8 -3
  197. package/src/daemon/handlers/twitter-auth.ts +169 -0
  198. package/src/daemon/handlers/work-items.ts +384 -68
  199. package/src/daemon/ipc-contract-inventory.json +28 -4
  200. package/src/daemon/ipc-contract.ts +133 -37
  201. package/src/daemon/ipc-protocol.ts +7 -2
  202. package/src/daemon/lifecycle.ts +21 -0
  203. package/src/daemon/main.ts +10 -4
  204. package/src/daemon/ride-shotgun-handler.ts +74 -10
  205. package/src/daemon/server.ts +143 -26
  206. package/src/daemon/session-agent-loop.ts +887 -0
  207. package/src/daemon/session-attachments.ts +28 -5
  208. package/src/daemon/session-error.ts +24 -3
  209. package/src/daemon/session-lifecycle.ts +147 -0
  210. package/src/daemon/session-media-retry.ts +147 -0
  211. package/src/daemon/session-messaging.ts +145 -0
  212. package/src/daemon/session-notifiers.ts +164 -0
  213. package/src/daemon/session-process.ts +2 -2
  214. package/src/daemon/session-queue-manager.ts +1 -0
  215. package/src/daemon/session-runtime-assembly.ts +52 -0
  216. package/src/daemon/session-skill-tools.ts +124 -5
  217. package/src/daemon/session-slash.ts +3 -0
  218. package/src/daemon/session-surfaces.ts +77 -2
  219. package/src/daemon/session-tool-setup.ts +216 -2
  220. package/src/daemon/session-usage.ts +0 -2
  221. package/src/daemon/session.ts +114 -1404
  222. package/src/daemon/video-thumbnail.ts +60 -0
  223. package/src/doordash/client.ts +121 -27
  224. package/src/doordash/queries.ts +1 -2
  225. package/src/export/formatter.ts +3 -1
  226. package/src/followups/followup-store.ts +4 -2
  227. package/src/followups/types.ts +6 -0
  228. package/src/hooks/templates.ts +1 -1
  229. package/src/index.ts +32 -1153
  230. package/src/memory/attachments-store.ts +28 -83
  231. package/src/memory/channel-delivery-store.ts +7 -21
  232. package/src/memory/clarification-resolver.ts +6 -5
  233. package/src/memory/contradiction-checker.ts +3 -2
  234. package/src/memory/conversation-key-store.ts +10 -29
  235. package/src/memory/conversation-store.ts +2 -1
  236. package/src/memory/db.ts +96 -2
  237. package/src/memory/entity-extractor.ts +6 -3
  238. package/src/memory/items-extractor.ts +5 -4
  239. package/src/memory/jobs-store.ts +3 -2
  240. package/src/memory/llm-usage-store.ts +1 -2
  241. package/src/memory/runs-store.ts +1 -2
  242. package/src/memory/schema.ts +23 -2
  243. package/src/messaging/style-analyzer.ts +3 -2
  244. package/src/messaging/thread-summarizer.ts +8 -12
  245. package/src/messaging/triage-engine.ts +4 -2
  246. package/src/providers/openrouter/client.ts +20 -0
  247. package/src/providers/registry.ts +8 -0
  248. package/src/runtime/http-server.ts +108 -20
  249. package/src/runtime/routes/attachment-routes.ts +2 -3
  250. package/src/runtime/routes/call-routes.ts +140 -0
  251. package/src/runtime/routes/channel-routes.ts +5 -10
  252. package/src/runtime/routes/conversation-routes.ts +5 -5
  253. package/src/runtime/routes/run-routes.ts +2 -2
  254. package/src/runtime/run-orchestrator.ts +9 -3
  255. package/src/schedule/recurrence-engine.ts +138 -0
  256. package/src/schedule/recurrence-types.ts +67 -0
  257. package/src/schedule/schedule-store.ts +102 -57
  258. package/src/schedule/scheduler.ts +9 -6
  259. package/src/security/oauth2.ts +29 -4
  260. package/src/security/secret-allowlist.ts +46 -0
  261. package/src/skills/clawhub.ts +1 -1
  262. package/src/subagent/manager.ts +40 -8
  263. package/src/swarm/backend-claude-code.ts +64 -9
  264. package/src/swarm/worker-prompts.ts +2 -1
  265. package/src/tasks/SPEC.md +34 -28
  266. package/src/tasks/ephemeral-permissions.ts +16 -7
  267. package/src/tasks/task-compiler.ts +5 -4
  268. package/src/tasks/task-runner.ts +10 -5
  269. package/src/tasks/task-scheduler.ts +1 -1
  270. package/src/tasks/tool-sanitizer.ts +36 -0
  271. package/src/tools/assets/search.ts +4 -4
  272. package/src/tools/browser/api-map.ts +220 -0
  273. package/src/tools/browser/auto-navigate.ts +270 -0
  274. package/src/tools/browser/browser-execution.ts +2 -1
  275. package/src/tools/browser/browser-manager.ts +2 -2
  276. package/src/tools/browser/network-recorder.ts +5 -4
  277. package/src/tools/browser/x-auto-navigate.ts +207 -0
  278. package/src/tools/calls/call-end.ts +17 -67
  279. package/src/tools/calls/call-start.ts +24 -85
  280. package/src/tools/calls/call-status.ts +35 -51
  281. package/src/tools/claude-code/claude-code.ts +77 -11
  282. package/src/tools/contacts/contact-merge.ts +46 -78
  283. package/src/tools/contacts/contact-search.ts +35 -79
  284. package/src/tools/contacts/contact-upsert.ts +35 -108
  285. package/src/tools/credentials/vault.ts +20 -4
  286. package/src/tools/document/document-tool.ts +71 -144
  287. package/src/tools/executor.ts +129 -10
  288. package/src/tools/followups/followup_create.ts +46 -88
  289. package/src/tools/followups/followup_list.ts +34 -74
  290. package/src/tools/followups/followup_resolve.ts +31 -66
  291. package/src/tools/host-terminal/cli-discover.ts +2 -1
  292. package/src/tools/host-terminal/host-shell.ts +10 -0
  293. package/src/tools/memory/handlers.ts +5 -4
  294. package/src/tools/network/__tests__/web-search.test.ts +427 -0
  295. package/src/tools/network/script-proxy/__tests__/logging.test.ts +248 -0
  296. package/src/tools/network/script-proxy/__tests__/policy.test.ts +234 -0
  297. package/src/tools/network/script-proxy/__tests__/router.test.ts +76 -0
  298. package/src/tools/network/web-fetch.ts +18 -6
  299. package/src/tools/playbooks/index.ts +4 -5
  300. package/src/tools/playbooks/playbook-create.ts +3 -47
  301. package/src/tools/playbooks/playbook-delete.ts +1 -25
  302. package/src/tools/playbooks/playbook-list.ts +1 -28
  303. package/src/tools/playbooks/playbook-update.ts +3 -51
  304. package/src/tools/reminder/reminder.ts +5 -78
  305. package/src/tools/schedule/create.ts +69 -74
  306. package/src/tools/schedule/delete.ts +21 -47
  307. package/src/tools/schedule/list.ts +55 -74
  308. package/src/tools/schedule/update.ts +77 -84
  309. package/src/tools/subagent/abort.ts +29 -58
  310. package/src/tools/subagent/message.ts +30 -63
  311. package/src/tools/subagent/read.ts +53 -84
  312. package/src/tools/subagent/spawn.ts +43 -82
  313. package/src/tools/subagent/status.ts +42 -71
  314. package/src/tools/swarm/delegate.ts +2 -1
  315. package/src/tools/tasks/index.ts +8 -8
  316. package/src/tools/tasks/task-delete.ts +60 -88
  317. package/src/tools/tasks/task-list.ts +31 -52
  318. package/src/tools/tasks/task-run.ts +72 -108
  319. package/src/tools/tasks/task-save.ts +33 -65
  320. package/src/tools/tasks/work-item-enqueue.ts +183 -215
  321. package/src/tools/tasks/work-item-list.ts +33 -63
  322. package/src/tools/tasks/work-item-remove.ts +45 -97
  323. package/src/tools/tasks/work-item-update.ts +91 -163
  324. package/src/tools/terminal/backends/native.ts +3 -1
  325. package/src/tools/tool-manifest.ts +0 -62
  326. package/src/tools/types.ts +6 -0
  327. package/src/tools/ui-surface/definitions.ts +3 -1
  328. package/src/tools/watch/screen-watch.ts +3 -1
  329. package/src/tools/watcher/create.ts +52 -98
  330. package/src/tools/watcher/delete.ts +20 -46
  331. package/src/tools/watcher/digest.ts +36 -70
  332. package/src/tools/watcher/list.ts +49 -79
  333. package/src/tools/watcher/update.ts +45 -91
  334. package/src/twitter/client.ts +690 -0
  335. package/src/twitter/session.ts +91 -0
  336. package/src/usage/types.ts +0 -1
  337. package/src/util/truncate.ts +6 -0
  338. package/src/watcher/providers/slack.ts +2 -1
  339. package/src/watcher/watcher-store.ts +3 -2
  340. package/src/work-items/work-item-store.ts +27 -2
  341. package/src/workspace/commit-message-enrichment-service.ts +31 -7
  342. package/src/workspace/git-service.ts +87 -22
  343. package/src/workspace/provider-commit-message-generator.ts +242 -0
  344. package/src/workspace/turn-commit.ts +62 -3
  345. package/src/tools/contacts/index.ts +0 -4
  346. package/src/tools/document/index.ts +0 -5
  347. package/src/tools/followups/index.ts +0 -3
  348. package/src/tools/subagent/index.ts +0 -5
  349. /package/src/__tests__/{memory-context-benchmark.test.ts → memory-context-benchmark.benchmark.test.ts} +0 -0
@@ -0,0 +1,773 @@
1
+ /**
2
+ * Provider Streaming Benchmark
3
+ *
4
+ * Measures overhead introduced by the provider adapter layers (retry, failover,
5
+ * stream timeout) on top of a simulated streaming source.
6
+ *
7
+ * Baseline targets:
8
+ * - TTFT overhead < 50ms beyond source latency
9
+ * - Event throughput within 20% of source rate through provider wrappers
10
+ * - Abort signal stops streaming within 100ms
11
+ * - Stream timeout fires within 50ms of configured deadline
12
+ */
13
+ import { describe, test, expect, mock } from 'bun:test';
14
+
15
+ mock.module('../util/logger.js', () => ({
16
+ getLogger: () =>
17
+ new Proxy({} as Record<string, unknown>, { get: () => () => {} }),
18
+ isDebug: () => false,
19
+ }));
20
+
21
+ import { createStreamTimeout } from '../providers/stream-timeout.js';
22
+ import { RetryProvider } from '../providers/retry.js';
23
+ import { FailoverProvider } from '../providers/failover.js';
24
+ import type {
25
+ Provider,
26
+ ProviderResponse,
27
+ SendMessageOptions,
28
+ Message,
29
+ ToolDefinition,
30
+ ProviderEvent,
31
+ } from '../providers/types.js';
32
+ import { ProviderError } from '../util/errors.js';
33
+
34
+ // ---------------------------------------------------------------------------
35
+ // Helpers
36
+ // ---------------------------------------------------------------------------
37
+
38
+ const SIMPLE_MESSAGES: Message[] = [
39
+ { role: 'user', content: [{ type: 'text', text: 'Hello' }] },
40
+ ];
41
+
42
+ // Dummy key for mock server tests — not a real credential
43
+ const BENCH_API_KEY = ['test', 'benchmark', 'key'].join('-');
44
+
45
+ /** Build a mock provider that delivers `tokenCount` text deltas at a given rate. */
46
+ function makeStreamingProvider(
47
+ tokenCount: number,
48
+ tokensPerSecond: number,
49
+ opts?: { ttftMs?: number; name?: string },
50
+ ): Provider {
51
+ const delayPerToken = 1000 / tokensPerSecond;
52
+ const ttftMs = opts?.ttftMs ?? 0;
53
+
54
+ return {
55
+ name: opts?.name ?? 'mock-streaming',
56
+ async sendMessage(
57
+ _messages: Message[],
58
+ _tools?: ToolDefinition[],
59
+ _systemPrompt?: string,
60
+ options?: SendMessageOptions,
61
+ ): Promise<ProviderResponse> {
62
+ const { onEvent, signal } = options ?? {};
63
+
64
+ // Simulate TTFT delay
65
+ if (ttftMs > 0) {
66
+ await new Promise((r) => setTimeout(r, ttftMs));
67
+ }
68
+
69
+ for (let i = 0; i < tokenCount; i++) {
70
+ if (signal?.aborted) break;
71
+ onEvent?.({ type: 'text_delta', text: `word${i} ` });
72
+ if (i < tokenCount - 1) {
73
+ await new Promise((r) => setTimeout(r, delayPerToken));
74
+ }
75
+ }
76
+
77
+ return {
78
+ content: [{ type: 'text', text: 'complete' }],
79
+ model: 'mock',
80
+ usage: { inputTokens: 10, outputTokens: tokenCount },
81
+ stopReason: 'end_turn',
82
+ };
83
+ },
84
+ };
85
+ }
86
+
87
+ /** Build a provider that always fails with a given error. */
88
+ function makeFailingProvider(name: string, statusCode?: number): Provider {
89
+ return {
90
+ name,
91
+ async sendMessage(): Promise<ProviderResponse> {
92
+ throw new ProviderError(`${name} failed`, name, statusCode);
93
+ },
94
+ };
95
+ }
96
+
97
+ // ---------------------------------------------------------------------------
98
+ // Benchmarks
99
+ // ---------------------------------------------------------------------------
100
+
101
+ describe('Provider streaming benchmark', () => {
102
+ test('TTFT overhead through RetryProvider is < 50ms', async () => {
103
+ const sourceTtftMs = 20;
104
+ const inner = makeStreamingProvider(10, 100, { ttftMs: sourceTtftMs });
105
+ const wrapped = new RetryProvider(inner);
106
+
107
+ let firstEventTime: number | undefined;
108
+ const start = performance.now();
109
+
110
+ await wrapped.sendMessage(SIMPLE_MESSAGES, undefined, undefined, {
111
+ onEvent: () => {
112
+ if (firstEventTime === undefined) {
113
+ firstEventTime = performance.now();
114
+ }
115
+ },
116
+ });
117
+
118
+ expect(firstEventTime).toBeDefined();
119
+ const observedTtft = firstEventTime! - start;
120
+ const overhead = observedTtft - sourceTtftMs;
121
+
122
+ // The wrapper should add negligible latency
123
+ expect(overhead).toBeLessThan(50);
124
+ });
125
+
126
+ test('TTFT overhead through FailoverProvider is < 50ms', async () => {
127
+ const sourceTtftMs = 20;
128
+ const inner = makeStreamingProvider(10, 100, {
129
+ ttftMs: sourceTtftMs,
130
+ name: 'primary',
131
+ });
132
+ const fallback = makeStreamingProvider(10, 100, {
133
+ ttftMs: sourceTtftMs,
134
+ name: 'fallback',
135
+ });
136
+ const wrapped = new FailoverProvider([inner, fallback]);
137
+
138
+ let firstEventTime: number | undefined;
139
+ const start = performance.now();
140
+
141
+ await wrapped.sendMessage(SIMPLE_MESSAGES, undefined, undefined, {
142
+ onEvent: () => {
143
+ if (firstEventTime === undefined) {
144
+ firstEventTime = performance.now();
145
+ }
146
+ },
147
+ });
148
+
149
+ expect(firstEventTime).toBeDefined();
150
+ const observedTtft = firstEventTime! - start;
151
+ const overhead = observedTtft - sourceTtftMs;
152
+
153
+ expect(overhead).toBeLessThan(50);
154
+ });
155
+
156
+ test('event throughput through provider wrappers is within 20% of source rate', async () => {
157
+ const tokenCount = 50;
158
+ const sourceRate = 200; // tokens/sec
159
+
160
+ // Measure unwrapped baseline in the same run so we compare against actual
161
+ // timer resolution rather than the theoretical sourceRate (which setTimeout
162
+ // may not achieve on busy or coarse-timer hosts).
163
+ const baseline = makeStreamingProvider(tokenCount, sourceRate);
164
+ const baselineEvents: number[] = [];
165
+ const baselineStart = performance.now();
166
+
167
+ await baseline.sendMessage(SIMPLE_MESSAGES, undefined, undefined, {
168
+ onEvent: () => {
169
+ baselineEvents.push(performance.now());
170
+ },
171
+ });
172
+
173
+ const baselineElapsed = baselineEvents[baselineEvents.length - 1] - baselineStart;
174
+ const baselineRate = (baselineEvents.length / baselineElapsed) * 1000;
175
+
176
+ // Now measure the wrapped provider
177
+ const inner = makeStreamingProvider(tokenCount, sourceRate);
178
+ const wrapped = new RetryProvider(inner);
179
+
180
+ const events: number[] = [];
181
+ const start = performance.now();
182
+
183
+ await wrapped.sendMessage(SIMPLE_MESSAGES, undefined, undefined, {
184
+ onEvent: () => {
185
+ events.push(performance.now());
186
+ },
187
+ });
188
+
189
+ const elapsed = events[events.length - 1] - start;
190
+ const observedRate = (events.length / elapsed) * 1000;
191
+
192
+ expect(events.length).toBe(tokenCount);
193
+
194
+ // Wrapped throughput should be within 20% of the measured unwrapped baseline
195
+ const minAcceptableRate = baselineRate * 0.8;
196
+ expect(observedRate).toBeGreaterThanOrEqual(minAcceptableRate);
197
+ });
198
+
199
+ test('failover adds < 100ms overhead when primary provider fails', async () => {
200
+ const failing = makeFailingProvider('failing-primary', 500);
201
+ const healthy = makeStreamingProvider(5, 100, { name: 'healthy-fallback' });
202
+
203
+ // Measure the fallback provider's baseline execution time directly so we
204
+ // can isolate the failover overhead from the stream's own runtime.
205
+ const baselineEvents: ProviderEvent[] = [];
206
+ const baselineStart = performance.now();
207
+
208
+ await healthy.sendMessage(SIMPLE_MESSAGES, undefined, undefined, {
209
+ onEvent: (e) => baselineEvents.push(e),
210
+ });
211
+
212
+ const baselineElapsed = performance.now() - baselineStart;
213
+
214
+ // Now measure through the FailoverProvider (primary fails, falls back)
215
+ const healthy2 = makeStreamingProvider(5, 100, { name: 'healthy-fallback' });
216
+ const wrapped = new FailoverProvider([failing, healthy2]);
217
+
218
+ const events: ProviderEvent[] = [];
219
+ const start = performance.now();
220
+
221
+ await wrapped.sendMessage(SIMPLE_MESSAGES, undefined, undefined, {
222
+ onEvent: (e) => events.push(e),
223
+ });
224
+
225
+ const elapsed = performance.now() - start;
226
+ expect(events.length).toBe(5);
227
+
228
+ // Isolate the failover overhead by subtracting the fallback stream's baseline
229
+ const failoverOverhead = elapsed - baselineElapsed;
230
+ expect(failoverOverhead).toBeLessThan(100);
231
+ });
232
+
233
+ test('createStreamTimeout fires within 50ms of configured deadline', async () => {
234
+ const timeoutMs = 100;
235
+ const { signal, cleanup } = createStreamTimeout(timeoutMs);
236
+
237
+ const start = performance.now();
238
+
239
+ await new Promise<void>((resolve) => {
240
+ signal.addEventListener('abort', () => resolve(), { once: true });
241
+ });
242
+
243
+ const elapsed = performance.now() - start;
244
+ cleanup();
245
+
246
+ // Should fire close to the configured timeout
247
+ expect(elapsed).toBeGreaterThanOrEqual(timeoutMs - 10); // allow 10ms early
248
+ expect(elapsed).toBeLessThan(timeoutMs + 50);
249
+ });
250
+
251
+ test('external abort signal propagates through createStreamTimeout within 10ms', async () => {
252
+ const externalController = new AbortController();
253
+ const { signal, cleanup } = createStreamTimeout(60_000, externalController.signal);
254
+
255
+ const abortDelay = 50;
256
+
257
+ const start = performance.now();
258
+ setTimeout(() => externalController.abort(new Error('user cancel')), abortDelay);
259
+
260
+ await new Promise<void>((resolve) => {
261
+ signal.addEventListener('abort', () => resolve(), { once: true });
262
+ });
263
+
264
+ const elapsed = performance.now() - start;
265
+ cleanup();
266
+
267
+ // Should propagate almost immediately after external abort
268
+ expect(elapsed).toBeGreaterThanOrEqual(abortDelay - 10);
269
+ expect(elapsed).toBeLessThan(abortDelay + 10);
270
+ });
271
+
272
+ test('abort signal stops streaming provider within 100ms', async () => {
273
+ // Provider that would stream 200 tokens at 50/sec (4 seconds total)
274
+ const inner = makeStreamingProvider(200, 50);
275
+ const wrapped = new RetryProvider(inner);
276
+
277
+ const controller = new AbortController();
278
+ const events: ProviderEvent[] = [];
279
+
280
+ // Abort after 100ms — should stop well before all 200 tokens
281
+ const abortAfterMs = 100;
282
+ setTimeout(() => controller.abort(), abortAfterMs);
283
+
284
+ const start = performance.now();
285
+
286
+ await wrapped.sendMessage(SIMPLE_MESSAGES, undefined, undefined, {
287
+ onEvent: (e) => events.push(e),
288
+ signal: controller.signal,
289
+ });
290
+
291
+ const elapsed = performance.now() - start;
292
+
293
+ // Should have stopped well before all 200 tokens
294
+ expect(events.length).toBeLessThan(200);
295
+ // Should complete within 100ms of abort signal (abort at 100ms + 100ms grace)
296
+ expect(elapsed).toBeLessThan(abortAfterMs + 100);
297
+ });
298
+
299
+ test('SSE event parsing throughput via Bun.serve mock', async () => {
300
+ const tokenCount = 100;
301
+ const encoder = new TextEncoder();
302
+
303
+ // Start a local SSE server
304
+ const server = Bun.serve({
305
+ port: 0,
306
+ fetch() {
307
+ const stream = new ReadableStream({
308
+ async start(controller) {
309
+ for (let i = 0; i < tokenCount; i++) {
310
+ const event = `event: content_block_delta\ndata: ${JSON.stringify({
311
+ type: 'content_block_delta',
312
+ index: 0,
313
+ delta: { type: 'text_delta', text: `word${i} ` },
314
+ })}\n\n`;
315
+ controller.enqueue(encoder.encode(event));
316
+ }
317
+ // Send stop event
318
+ controller.enqueue(
319
+ encoder.encode(
320
+ `event: message_stop\ndata: ${JSON.stringify({ type: 'message_stop' })}\n\n`,
321
+ ),
322
+ );
323
+ controller.close();
324
+ },
325
+ });
326
+
327
+ return new Response(stream, {
328
+ headers: { 'Content-Type': 'text/event-stream' },
329
+ });
330
+ },
331
+ });
332
+
333
+ try {
334
+ const start = performance.now();
335
+
336
+ const response = await fetch(`http://localhost:${server.port}`);
337
+ const reader = response.body!.getReader();
338
+ const decoder = new TextDecoder();
339
+
340
+ let buffer = '';
341
+ let eventCount = 0;
342
+ let firstEventTime: number | undefined;
343
+
344
+ while (true) {
345
+ const { done, value } = await reader.read();
346
+ if (done) break;
347
+
348
+ buffer += decoder.decode(value, { stream: true });
349
+
350
+ // Parse SSE events from buffer
351
+ const parts = buffer.split('\n\n');
352
+ buffer = parts.pop()!; // keep incomplete last part
353
+
354
+ for (const part of parts) {
355
+ if (!part.trim()) continue;
356
+ const dataLine = part
357
+ .split('\n')
358
+ .find((l) => l.startsWith('data: '));
359
+ if (!dataLine) continue;
360
+
361
+ const json = JSON.parse(dataLine.slice(6));
362
+ if (json.type === 'content_block_delta') {
363
+ eventCount++;
364
+ if (firstEventTime === undefined) {
365
+ firstEventTime = performance.now();
366
+ }
367
+ }
368
+ }
369
+ }
370
+
371
+ const elapsed = performance.now() - start;
372
+ const eventsPerSecond = (eventCount / elapsed) * 1000;
373
+
374
+ // All events should be parsed
375
+ expect(eventCount).toBe(tokenCount);
376
+
377
+ // TTFT from server should be < 50ms (no artificial delay)
378
+ expect(firstEventTime! - start).toBeLessThan(50);
379
+
380
+ // Throughput: at least 1000 events/sec for local SSE parsing
381
+ // (no network latency, just parsing overhead)
382
+ expect(eventsPerSecond).toBeGreaterThan(1000);
383
+ } finally {
384
+ server.stop();
385
+ }
386
+ });
387
+
388
+ test('stream timeout cleanup prevents late abort', async () => {
389
+ // Create a timeout that would fire in 100ms
390
+ const { signal, cleanup } = createStreamTimeout(100);
391
+
392
+ // Clean up before it fires
393
+ cleanup();
394
+
395
+ // Wait past the original timeout
396
+ await new Promise((r) => setTimeout(r, 150));
397
+
398
+ // Signal should NOT have been aborted since we cleaned up
399
+ expect(signal.aborted).toBe(false);
400
+ });
401
+
402
+ test('TTFT through Anthropic SDK adapter with mock SSE server', async () => {
403
+ const Anthropic = (await import('@anthropic-ai/sdk')).default;
404
+ const tokenCount = 20;
405
+ const encoder = new TextEncoder();
406
+
407
+ // Full Anthropic-format SSE response
408
+ function buildAnthropicSSE(count: number): string[] {
409
+ const events: string[] = [];
410
+
411
+ events.push(`event: message_start\ndata: ${JSON.stringify({
412
+ type: 'message_start',
413
+ message: {
414
+ id: 'msg_bench_01',
415
+ type: 'message',
416
+ role: 'assistant',
417
+ content: [],
418
+ model: 'claude-3-5-sonnet-20241022',
419
+ stop_reason: null,
420
+ stop_sequence: null,
421
+ usage: { input_tokens: 10, output_tokens: 1 },
422
+ },
423
+ })}\n\n`);
424
+
425
+ events.push(`event: content_block_start\ndata: ${JSON.stringify({
426
+ type: 'content_block_start',
427
+ index: 0,
428
+ content_block: { type: 'text', text: '' },
429
+ })}\n\n`);
430
+
431
+ for (let i = 0; i < count; i++) {
432
+ events.push(`event: content_block_delta\ndata: ${JSON.stringify({
433
+ type: 'content_block_delta',
434
+ index: 0,
435
+ delta: { type: 'text_delta', text: `word${i} ` },
436
+ })}\n\n`);
437
+ }
438
+
439
+ events.push(`event: content_block_stop\ndata: ${JSON.stringify({
440
+ type: 'content_block_stop',
441
+ index: 0,
442
+ })}\n\n`);
443
+
444
+ events.push(`event: message_delta\ndata: ${JSON.stringify({
445
+ type: 'message_delta',
446
+ delta: { stop_reason: 'end_turn', stop_sequence: null },
447
+ usage: { output_tokens: count },
448
+ })}\n\n`);
449
+
450
+ events.push(`event: message_stop\ndata: ${JSON.stringify({
451
+ type: 'message_stop',
452
+ })}\n\n`);
453
+
454
+ return events;
455
+ }
456
+
457
+ const server = Bun.serve({
458
+ port: 0,
459
+ fetch() {
460
+ const sseEvents = buildAnthropicSSE(tokenCount);
461
+ const stream = new ReadableStream({
462
+ start(controller) {
463
+ for (const evt of sseEvents) {
464
+ controller.enqueue(encoder.encode(evt));
465
+ }
466
+ controller.close();
467
+ },
468
+ });
469
+ return new Response(stream, {
470
+ headers: { 'Content-Type': 'text/event-stream' },
471
+ });
472
+ },
473
+ });
474
+
475
+ try {
476
+ const client = new Anthropic({
477
+ apiKey: BENCH_API_KEY,
478
+ baseURL: `http://localhost:${server.port}`,
479
+ });
480
+
481
+ let firstEventTime: number | undefined;
482
+ const start = performance.now();
483
+
484
+ const sdkStream = client.messages.stream({
485
+ model: 'claude-3-5-sonnet-20241022',
486
+ max_tokens: 1024,
487
+ messages: [{ role: 'user', content: 'Hello' }],
488
+ });
489
+
490
+ sdkStream.on('text', () => {
491
+ if (firstEventTime === undefined) {
492
+ firstEventTime = performance.now();
493
+ }
494
+ });
495
+
496
+ await sdkStream.finalMessage();
497
+
498
+ expect(firstEventTime).toBeDefined();
499
+ const ttft = firstEventTime! - start;
500
+
501
+ // TTFT through the full SDK adapter should be < 100ms with a local mock
502
+ expect(ttft).toBeLessThan(100);
503
+ } finally {
504
+ server.stop();
505
+ }
506
+ });
507
+
508
+ test('throughput through Anthropic SDK adapter matches source rate', async () => {
509
+ const Anthropic = (await import('@anthropic-ai/sdk')).default;
510
+ const tokenCount = 200;
511
+ const encoder = new TextEncoder();
512
+
513
+ const server = Bun.serve({
514
+ port: 0,
515
+ fetch() {
516
+ const events: string[] = [];
517
+
518
+ events.push(`event: message_start\ndata: ${JSON.stringify({
519
+ type: 'message_start',
520
+ message: {
521
+ id: 'msg_bench_02',
522
+ type: 'message',
523
+ role: 'assistant',
524
+ content: [],
525
+ model: 'claude-3-5-sonnet-20241022',
526
+ stop_reason: null,
527
+ stop_sequence: null,
528
+ usage: { input_tokens: 10, output_tokens: 1 },
529
+ },
530
+ })}\n\n`);
531
+
532
+ events.push(`event: content_block_start\ndata: ${JSON.stringify({
533
+ type: 'content_block_start',
534
+ index: 0,
535
+ content_block: { type: 'text', text: '' },
536
+ })}\n\n`);
537
+
538
+ for (let i = 0; i < tokenCount; i++) {
539
+ events.push(`event: content_block_delta\ndata: ${JSON.stringify({
540
+ type: 'content_block_delta',
541
+ index: 0,
542
+ delta: { type: 'text_delta', text: `w${i} ` },
543
+ })}\n\n`);
544
+ }
545
+
546
+ events.push(`event: content_block_stop\ndata: ${JSON.stringify({
547
+ type: 'content_block_stop',
548
+ index: 0,
549
+ })}\n\n`);
550
+
551
+ events.push(`event: message_delta\ndata: ${JSON.stringify({
552
+ type: 'message_delta',
553
+ delta: { stop_reason: 'end_turn', stop_sequence: null },
554
+ usage: { output_tokens: tokenCount },
555
+ })}\n\n`);
556
+
557
+ events.push(`event: message_stop\ndata: ${JSON.stringify({
558
+ type: 'message_stop',
559
+ })}\n\n`);
560
+
561
+ const stream = new ReadableStream({
562
+ start(controller) {
563
+ for (const evt of events) {
564
+ controller.enqueue(encoder.encode(evt));
565
+ }
566
+ controller.close();
567
+ },
568
+ });
569
+
570
+ return new Response(stream, {
571
+ headers: { 'Content-Type': 'text/event-stream' },
572
+ });
573
+ },
574
+ });
575
+
576
+ try {
577
+ const client = new Anthropic({
578
+ apiKey: BENCH_API_KEY,
579
+ baseURL: `http://localhost:${server.port}`,
580
+ });
581
+
582
+ const textEvents: number[] = [];
583
+ const start = performance.now();
584
+
585
+ const sdkStream = client.messages.stream({
586
+ model: 'claude-3-5-sonnet-20241022',
587
+ max_tokens: 4096,
588
+ messages: [{ role: 'user', content: 'Hello' }],
589
+ });
590
+
591
+ sdkStream.on('text', () => {
592
+ textEvents.push(performance.now());
593
+ });
594
+
595
+ await sdkStream.finalMessage();
596
+
597
+ const elapsed = textEvents[textEvents.length - 1] - start;
598
+ const observedRate = (textEvents.length / elapsed) * 1000;
599
+
600
+ // All text deltas should be delivered through the SDK
601
+ expect(textEvents.length).toBe(tokenCount);
602
+
603
+ // SDK adapter should achieve at least 1000 events/sec from a local mock
604
+ // (same threshold as the raw SSE parsing test)
605
+ expect(observedRate).toBeGreaterThan(1000);
606
+ } finally {
607
+ server.stop();
608
+ }
609
+ });
610
+
611
+ test('AnthropicProvider adapter end-to-end with mock SSE server', async () => {
612
+ const tokenCount = 50;
613
+ const encoder = new TextEncoder();
614
+
615
+ const server = Bun.serve({
616
+ port: 0,
617
+ fetch() {
618
+ const events: string[] = [];
619
+
620
+ events.push(`event: message_start\ndata: ${JSON.stringify({
621
+ type: 'message_start',
622
+ message: {
623
+ id: 'msg_bench_03',
624
+ type: 'message',
625
+ role: 'assistant',
626
+ content: [],
627
+ model: 'claude-3-5-sonnet-20241022',
628
+ stop_reason: null,
629
+ stop_sequence: null,
630
+ usage: { input_tokens: 10, output_tokens: 1 },
631
+ },
632
+ })}\n\n`);
633
+
634
+ events.push(`event: content_block_start\ndata: ${JSON.stringify({
635
+ type: 'content_block_start',
636
+ index: 0,
637
+ content_block: { type: 'text', text: '' },
638
+ })}\n\n`);
639
+
640
+ for (let i = 0; i < tokenCount; i++) {
641
+ events.push(`event: content_block_delta\ndata: ${JSON.stringify({
642
+ type: 'content_block_delta',
643
+ index: 0,
644
+ delta: { type: 'text_delta', text: `token${i} ` },
645
+ })}\n\n`);
646
+ }
647
+
648
+ events.push(`event: content_block_stop\ndata: ${JSON.stringify({
649
+ type: 'content_block_stop',
650
+ index: 0,
651
+ })}\n\n`);
652
+
653
+ events.push(`event: message_delta\ndata: ${JSON.stringify({
654
+ type: 'message_delta',
655
+ delta: { stop_reason: 'end_turn', stop_sequence: null },
656
+ usage: { output_tokens: tokenCount },
657
+ })}\n\n`);
658
+
659
+ events.push(`event: message_stop\ndata: ${JSON.stringify({
660
+ type: 'message_stop',
661
+ })}\n\n`);
662
+
663
+ const stream = new ReadableStream({
664
+ start(controller) {
665
+ for (const evt of events) {
666
+ controller.enqueue(encoder.encode(evt));
667
+ }
668
+ controller.close();
669
+ },
670
+ });
671
+
672
+ return new Response(stream, {
673
+ headers: { 'Content-Type': 'text/event-stream' },
674
+ });
675
+ },
676
+ });
677
+
678
+ // Save and override env var before try so it's always restored in finally
679
+ const origBaseUrl = process.env.ANTHROPIC_BASE_URL;
680
+ process.env.ANTHROPIC_BASE_URL = `http://localhost:${server.port}`;
681
+
682
+ try {
683
+ // Import dynamically after setting env var so SDK picks it up
684
+ const { AnthropicProvider } = await import('../providers/anthropic/client.js');
685
+ const provider = new AnthropicProvider(BENCH_API_KEY, 'claude-3-5-sonnet-20241022');
686
+
687
+ const receivedEvents: ProviderEvent[] = [];
688
+ let firstEventTime: number | undefined;
689
+ const start = performance.now();
690
+
691
+ const result = await provider.sendMessage(
692
+ SIMPLE_MESSAGES,
693
+ undefined,
694
+ undefined,
695
+ {
696
+ onEvent: (e) => {
697
+ if (firstEventTime === undefined) {
698
+ firstEventTime = performance.now();
699
+ }
700
+ receivedEvents.push(e);
701
+ },
702
+ },
703
+ );
704
+
705
+ // Verify the full adapter pipeline delivered all events
706
+ const textDeltas = receivedEvents.filter((e) => e.type === 'text_delta');
707
+ expect(textDeltas.length).toBe(tokenCount);
708
+
709
+ // TTFT through the complete provider adapter < 100ms
710
+ expect(firstEventTime).toBeDefined();
711
+ expect(firstEventTime! - start).toBeLessThan(100);
712
+
713
+ // Provider response should have correct structure
714
+ expect(result.model).toBe('claude-3-5-sonnet-20241022');
715
+ expect(result.stopReason).toBe('end_turn');
716
+ expect(result.usage.outputTokens).toBe(tokenCount);
717
+
718
+ // Throughput: events should flow at > 500 events/sec through the full adapter
719
+ const elapsed = performance.now() - start;
720
+ const rate = (textDeltas.length / elapsed) * 1000;
721
+ expect(rate).toBeGreaterThan(500);
722
+ } finally {
723
+ if (origBaseUrl === undefined) {
724
+ delete process.env.ANTHROPIC_BASE_URL;
725
+ } else {
726
+ process.env.ANTHROPIC_BASE_URL = origBaseUrl;
727
+ }
728
+ server.stop();
729
+ }
730
+ });
731
+
732
+ test('multiple rapid events are delivered without batching loss', async () => {
733
+ // Provider that emits events as fast as possible (no delay between tokens)
734
+ const tokenCount = 500;
735
+ const inner: Provider = {
736
+ name: 'rapid-fire',
737
+ async sendMessage(
738
+ _messages: Message[],
739
+ _tools?: ToolDefinition[],
740
+ _systemPrompt?: string,
741
+ options?: SendMessageOptions,
742
+ ): Promise<ProviderResponse> {
743
+ const { onEvent } = options ?? {};
744
+ for (let i = 0; i < tokenCount; i++) {
745
+ onEvent?.({ type: 'text_delta', text: `w${i} ` });
746
+ }
747
+ return {
748
+ content: [{ type: 'text', text: 'done' }],
749
+ model: 'mock',
750
+ usage: { inputTokens: 5, outputTokens: tokenCount },
751
+ stopReason: 'end_turn',
752
+ };
753
+ },
754
+ };
755
+
756
+ const wrapped = new RetryProvider(inner);
757
+ const events: ProviderEvent[] = [];
758
+
759
+ const start = performance.now();
760
+
761
+ await wrapped.sendMessage(SIMPLE_MESSAGES, undefined, undefined, {
762
+ onEvent: (e) => events.push(e),
763
+ });
764
+
765
+ const elapsed = performance.now() - start;
766
+
767
+ // All events must be delivered — no loss through the wrapper
768
+ expect(events.length).toBe(tokenCount);
769
+
770
+ // 500 synchronous events should complete in < 50ms
771
+ expect(elapsed).toBeLessThan(50);
772
+ });
773
+ });