vellum 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (361) hide show
  1. package/README.md +15 -2
  2. package/bun.lock +5 -2
  3. package/package.json +4 -2
  4. package/scripts/capture-x-graphql.ts +562 -0
  5. package/scripts/ipc/check-swift-decoder-drift.ts +2 -1
  6. package/scripts/test.sh +5 -0
  7. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +161 -34
  8. package/src/__tests__/account-registry.test.ts +2 -1
  9. package/src/__tests__/agent-heartbeat-service.test.ts +250 -0
  10. package/src/__tests__/app-bundler.test.ts +12 -33
  11. package/src/__tests__/asset-materialize-tool.test.ts +16 -15
  12. package/src/__tests__/asset-search-tool.test.ts +23 -22
  13. package/src/__tests__/attachments-store.test.ts +56 -127
  14. package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +5 -4
  15. package/src/__tests__/browser-skill-endstate.test.ts +5 -8
  16. package/src/__tests__/call-bridge.test.ts +385 -0
  17. package/src/__tests__/call-constants.test.ts +40 -0
  18. package/src/__tests__/call-orchestrator.test.ts +454 -0
  19. package/src/__tests__/call-recovery.test.ts +518 -0
  20. package/src/__tests__/call-routes-http.test.ts +459 -0
  21. package/src/__tests__/call-state-machine.test.ts +143 -0
  22. package/src/__tests__/call-state.test.ts +133 -0
  23. package/src/__tests__/call-store.test.ts +691 -0
  24. package/src/__tests__/cli-discover.test.ts +1 -1
  25. package/src/__tests__/commit-message-enrichment-service.test.ts +550 -0
  26. package/src/__tests__/compaction.benchmark.test.ts +176 -0
  27. package/src/__tests__/computer-use-tools.test.ts +250 -0
  28. package/src/__tests__/config-schema.test.ts +348 -3
  29. package/src/__tests__/conflict-store.test.ts +2 -1
  30. package/src/__tests__/contacts-tools.test.ts +331 -0
  31. package/src/__tests__/conversation-store.test.ts +30 -32
  32. package/src/__tests__/credential-security-invariants.test.ts +4 -0
  33. package/src/__tests__/date-context.test.ts +373 -0
  34. package/src/__tests__/db-schedule-syntax-migration.test.ts +129 -0
  35. package/src/__tests__/doordash-session.test.ts +9 -0
  36. package/src/__tests__/fixtures/media-reuse-fixtures.ts +3 -3
  37. package/src/__tests__/followup-tools.test.ts +303 -0
  38. package/src/__tests__/handlers-twitter-config.test.ts +718 -0
  39. package/src/__tests__/intent-routing.test.ts +64 -57
  40. package/src/__tests__/ipc-roundtrip.benchmark.test.ts +237 -0
  41. package/src/__tests__/ipc-snapshot.test.ts +96 -28
  42. package/src/__tests__/llm-usage-store.test.ts +3 -8
  43. package/src/__tests__/media-generate-image.test.ts +1 -1
  44. package/src/__tests__/media-reuse-story.e2e.test.ts +7 -7
  45. package/src/__tests__/memory-retrieval.benchmark.test.ts +430 -0
  46. package/src/__tests__/parallel-tool.benchmark.test.ts +294 -0
  47. package/src/__tests__/playbook-tools.test.ts +342 -0
  48. package/src/__tests__/profile-compiler.test.ts +2 -1
  49. package/src/__tests__/provider-streaming.benchmark.test.ts +773 -0
  50. package/src/__tests__/recurrence-engine-rruleset.test.ts +78 -0
  51. package/src/__tests__/recurrence-engine.test.ts +69 -0
  52. package/src/__tests__/recurrence-types.test.ts +71 -0
  53. package/src/__tests__/registry.test.ts +17 -10
  54. package/src/__tests__/relay-server.test.ts +633 -0
  55. package/src/__tests__/reminder-store.test.ts +6 -3
  56. package/src/__tests__/reminder.test.ts +43 -77
  57. package/src/__tests__/run-orchestrator-assistant-events.test.ts +222 -0
  58. package/src/__tests__/run-orchestrator.test.ts +7 -7
  59. package/src/__tests__/runtime-attachment-metadata.test.ts +19 -20
  60. package/src/__tests__/runtime-runs-http.test.ts +5 -23
  61. package/src/__tests__/runtime-runs.test.ts +11 -11
  62. package/src/__tests__/schedule-store.test.ts +482 -0
  63. package/src/__tests__/schedule-tools.test.ts +700 -0
  64. package/src/__tests__/scheduler-recurrence.test.ts +329 -0
  65. package/src/__tests__/server-history-render.test.ts +14 -13
  66. package/src/__tests__/session-error.test.ts +28 -0
  67. package/src/__tests__/session-init.benchmark.test.ts +462 -0
  68. package/src/__tests__/session-queue.test.ts +89 -16
  69. package/src/__tests__/session-runtime-assembly.test.ts +161 -0
  70. package/src/__tests__/session-surfaces-task-progress.test.ts +104 -0
  71. package/src/__tests__/signup-e2e.test.ts +2 -1
  72. package/src/__tests__/skill-projection.benchmark.test.ts +328 -0
  73. package/src/__tests__/skill-script-runner.test.ts +159 -0
  74. package/src/__tests__/speaker-identification.test.ts +52 -0
  75. package/src/__tests__/subagent-manager-notify.test.ts +42 -10
  76. package/src/__tests__/subagent-tools.test.ts +141 -41
  77. package/src/__tests__/task-compiler.test.ts +2 -1
  78. package/src/__tests__/task-runner.test.ts +2 -1
  79. package/src/__tests__/task-scheduler.test.ts +2 -1
  80. package/src/__tests__/task-tools.test.ts +49 -56
  81. package/src/__tests__/tool-audit-listener.test.ts +1 -0
  82. package/src/__tests__/tool-domain-event-publisher.test.ts +2 -0
  83. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +500 -0
  84. package/src/__tests__/tool-executor.test.ts +13 -17
  85. package/src/__tests__/turn-commit.test.ts +273 -2
  86. package/src/__tests__/twilio-provider.test.ts +143 -0
  87. package/src/__tests__/twilio-routes.test.ts +789 -0
  88. package/src/__tests__/twitter-auth-handler.test.ts +581 -0
  89. package/src/__tests__/view-image-tool.test.ts +217 -0
  90. package/src/__tests__/workspace-git-service.test.ts +403 -0
  91. package/src/__tests__/workspace-heartbeat-service.test.ts +141 -2
  92. package/src/agent-heartbeat/agent-heartbeat-service.ts +155 -0
  93. package/src/bundler/app-bundler.ts +35 -14
  94. package/src/calls/call-bridge.ts +95 -0
  95. package/src/calls/call-constants.ts +48 -0
  96. package/src/calls/call-domain.ts +276 -0
  97. package/src/calls/call-orchestrator.ts +390 -0
  98. package/src/calls/call-recovery.ts +207 -0
  99. package/src/calls/call-state-machine.ts +68 -0
  100. package/src/calls/call-state.ts +64 -0
  101. package/src/calls/call-store.ts +416 -0
  102. package/src/calls/relay-server.ts +335 -0
  103. package/src/calls/speaker-identification.ts +213 -0
  104. package/src/calls/twilio-config.ts +34 -0
  105. package/src/calls/twilio-provider.ts +173 -0
  106. package/src/calls/twilio-routes.ts +250 -0
  107. package/src/calls/types.ts +37 -0
  108. package/src/calls/voice-provider.ts +14 -0
  109. package/src/cli/config-commands.ts +334 -0
  110. package/src/cli/core-commands.ts +776 -0
  111. package/src/cli/doordash.ts +256 -25
  112. package/src/cli/ipc-client.ts +82 -0
  113. package/src/cli/map.ts +246 -0
  114. package/src/cli/twitter.ts +575 -0
  115. package/src/cli.ts +7 -5
  116. package/src/commands/__tests__/cc-command-registry.test.ts +319 -0
  117. package/src/commands/cc-command-registry.ts +209 -0
  118. package/src/config/bundled-skills/contacts/SKILL.md +39 -0
  119. package/src/config/bundled-skills/contacts/TOOLS.json +122 -0
  120. package/src/config/bundled-skills/contacts/tools/contact-merge.ts +9 -0
  121. package/src/config/bundled-skills/contacts/tools/contact-search.ts +9 -0
  122. package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +9 -0
  123. package/src/config/bundled-skills/document/SKILL.md +18 -0
  124. package/src/config/bundled-skills/document/TOOLS.json +53 -0
  125. package/src/config/bundled-skills/document/tools/document-create.ts +9 -0
  126. package/src/config/bundled-skills/document/tools/document-update.ts +9 -0
  127. package/src/config/bundled-skills/doordash/SKILL.md +163 -0
  128. package/src/config/bundled-skills/followups/SKILL.md +32 -0
  129. package/src/config/bundled-skills/followups/TOOLS.json +100 -0
  130. package/src/config/bundled-skills/followups/tools/followup-create.ts +9 -0
  131. package/src/config/bundled-skills/followups/tools/followup-list.ts +9 -0
  132. package/src/config/bundled-skills/followups/tools/followup-resolve.ts +9 -0
  133. package/src/config/bundled-skills/image-studio/TOOLS.json +2 -2
  134. package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +2 -24
  135. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +2 -1
  136. package/src/config/bundled-skills/playbooks/SKILL.md +31 -0
  137. package/src/config/bundled-skills/playbooks/TOOLS.json +126 -0
  138. package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +9 -0
  139. package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +9 -0
  140. package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +9 -0
  141. package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +9 -0
  142. package/src/config/bundled-skills/reminder/SKILL.md +20 -0
  143. package/src/config/bundled-skills/reminder/TOOLS.json +67 -0
  144. package/src/config/bundled-skills/reminder/tools/reminder-cancel.ts +9 -0
  145. package/src/config/bundled-skills/reminder/tools/reminder-create.ts +9 -0
  146. package/src/config/bundled-skills/reminder/tools/reminder-list.ts +9 -0
  147. package/src/config/bundled-skills/schedule/SKILL.md +74 -0
  148. package/src/config/bundled-skills/schedule/TOOLS.json +135 -0
  149. package/src/config/bundled-skills/schedule/tools/schedule-create.ts +9 -0
  150. package/src/config/bundled-skills/schedule/tools/schedule-delete.ts +9 -0
  151. package/src/config/bundled-skills/schedule/tools/schedule-list.ts +9 -0
  152. package/src/config/bundled-skills/schedule/tools/schedule-update.ts +9 -0
  153. package/src/config/bundled-skills/subagent/SKILL.md +25 -0
  154. package/src/config/bundled-skills/subagent/TOOLS.json +107 -0
  155. package/src/config/bundled-skills/subagent/tools/subagent-abort.ts +9 -0
  156. package/src/config/bundled-skills/subagent/tools/subagent-message.ts +9 -0
  157. package/src/config/bundled-skills/subagent/tools/subagent-read.ts +9 -0
  158. package/src/config/bundled-skills/subagent/tools/subagent-spawn.ts +9 -0
  159. package/src/config/bundled-skills/subagent/tools/subagent-status.ts +9 -0
  160. package/src/config/bundled-skills/tasks/SKILL.md +28 -0
  161. package/src/config/bundled-skills/tasks/TOOLS.json +256 -0
  162. package/src/config/bundled-skills/tasks/tools/task-delete.ts +9 -0
  163. package/src/config/bundled-skills/tasks/tools/task-list-add.ts +9 -0
  164. package/src/config/bundled-skills/tasks/tools/task-list-remove.ts +9 -0
  165. package/src/config/bundled-skills/tasks/tools/task-list-show.ts +9 -0
  166. package/src/config/bundled-skills/tasks/tools/task-list-update.ts +9 -0
  167. package/src/config/bundled-skills/tasks/tools/task-list.ts +9 -0
  168. package/src/config/bundled-skills/tasks/tools/task-run.ts +9 -0
  169. package/src/config/bundled-skills/tasks/tools/task-save.ts +9 -0
  170. package/src/config/bundled-skills/twitter/SKILL.md +134 -0
  171. package/src/config/bundled-skills/watcher/SKILL.md +27 -0
  172. package/src/config/bundled-skills/watcher/TOOLS.json +147 -0
  173. package/src/config/bundled-skills/watcher/tools/watcher-create.ts +9 -0
  174. package/src/config/bundled-skills/watcher/tools/watcher-delete.ts +9 -0
  175. package/src/config/bundled-skills/watcher/tools/watcher-digest.ts +9 -0
  176. package/src/config/bundled-skills/watcher/tools/watcher-list.ts +9 -0
  177. package/src/config/bundled-skills/watcher/tools/watcher-update.ts +9 -0
  178. package/src/config/defaults.ts +44 -0
  179. package/src/config/loader.ts +4 -1
  180. package/src/config/schema.ts +218 -1
  181. package/src/config/system-prompt.ts +100 -6
  182. package/src/config/templates/IDENTITY.md +7 -0
  183. package/src/config/types.ts +5 -0
  184. package/src/contacts/contact-store.ts +4 -4
  185. package/src/daemon/assistant-attachments.ts +10 -0
  186. package/src/daemon/classifier.ts +3 -1
  187. package/src/daemon/computer-use-session.ts +3 -1
  188. package/src/daemon/date-context.ts +136 -0
  189. package/src/daemon/handlers/apps.ts +16 -1
  190. package/src/daemon/handlers/browser.ts +54 -0
  191. package/src/daemon/handlers/computer-use.ts +7 -1
  192. package/src/daemon/handlers/config.ts +192 -4
  193. package/src/daemon/handlers/diagnostics.ts +5 -1
  194. package/src/daemon/handlers/documents.ts +18 -29
  195. package/src/daemon/handlers/home-base.ts +5 -1
  196. package/src/daemon/handlers/index.ts +40 -271
  197. package/src/daemon/handlers/misc.ts +9 -1
  198. package/src/daemon/handlers/publish.ts +6 -1
  199. package/src/daemon/handlers/sessions.ts +65 -12
  200. package/src/daemon/handlers/shared.ts +36 -1
  201. package/src/daemon/handlers/signing.ts +37 -0
  202. package/src/daemon/handlers/skills.ts +20 -6
  203. package/src/daemon/handlers/subagents.ts +8 -3
  204. package/src/daemon/handlers/twitter-auth.ts +169 -0
  205. package/src/daemon/handlers/work-items.ts +495 -39
  206. package/src/daemon/ipc-contract-inventory.json +40 -4
  207. package/src/daemon/ipc-contract.ts +185 -37
  208. package/src/daemon/ipc-protocol.ts +7 -2
  209. package/src/daemon/lifecycle.ts +48 -5
  210. package/src/daemon/main.ts +10 -4
  211. package/src/daemon/ride-shotgun-handler.ts +74 -10
  212. package/src/daemon/server.ts +144 -29
  213. package/src/daemon/session-agent-loop.ts +887 -0
  214. package/src/daemon/session-attachments.ts +28 -5
  215. package/src/daemon/session-error.ts +24 -3
  216. package/src/daemon/session-lifecycle.ts +147 -0
  217. package/src/daemon/session-media-retry.ts +147 -0
  218. package/src/daemon/session-messaging.ts +145 -0
  219. package/src/daemon/session-notifiers.ts +164 -0
  220. package/src/daemon/session-process.ts +2 -2
  221. package/src/daemon/session-queue-manager.ts +1 -0
  222. package/src/daemon/session-runtime-assembly.ts +52 -0
  223. package/src/daemon/session-skill-tools.ts +124 -5
  224. package/src/daemon/session-slash.ts +3 -0
  225. package/src/daemon/session-surfaces.ts +77 -2
  226. package/src/daemon/session-tool-setup.ts +222 -2
  227. package/src/daemon/session-usage.ts +0 -2
  228. package/src/daemon/session.ts +114 -1365
  229. package/src/daemon/video-thumbnail.ts +60 -0
  230. package/src/doordash/client.ts +121 -27
  231. package/src/doordash/queries.ts +1 -2
  232. package/src/export/formatter.ts +3 -1
  233. package/src/followups/followup-store.ts +4 -2
  234. package/src/followups/types.ts +6 -0
  235. package/src/hooks/templates.ts +1 -1
  236. package/src/index.ts +32 -1151
  237. package/src/media/gemini-image-service.ts +1 -1
  238. package/src/memory/attachments-store.ts +28 -83
  239. package/src/memory/channel-delivery-store.ts +7 -21
  240. package/src/memory/clarification-resolver.ts +6 -5
  241. package/src/memory/contradiction-checker.ts +3 -2
  242. package/src/memory/conversation-key-store.ts +10 -29
  243. package/src/memory/conversation-store.ts +2 -1
  244. package/src/memory/db.ts +362 -2
  245. package/src/memory/entity-extractor.ts +6 -3
  246. package/src/memory/items-extractor.ts +5 -4
  247. package/src/memory/jobs-store.ts +3 -2
  248. package/src/memory/llm-usage-store.ts +1 -2
  249. package/src/memory/runs-store.ts +1 -2
  250. package/src/memory/schema.ts +65 -2
  251. package/src/messaging/style-analyzer.ts +3 -2
  252. package/src/messaging/thread-summarizer.ts +8 -12
  253. package/src/messaging/triage-engine.ts +4 -2
  254. package/src/providers/openrouter/client.ts +20 -0
  255. package/src/providers/registry.ts +8 -0
  256. package/src/runtime/http-server.ts +277 -25
  257. package/src/runtime/http-types.ts +0 -2
  258. package/src/runtime/routes/attachment-routes.ts +5 -6
  259. package/src/runtime/routes/call-routes.ts +140 -0
  260. package/src/runtime/routes/channel-routes.ts +12 -19
  261. package/src/runtime/routes/conversation-routes.ts +5 -9
  262. package/src/runtime/routes/run-routes.ts +4 -8
  263. package/src/runtime/run-orchestrator.ts +39 -6
  264. package/src/schedule/recurrence-engine.ts +138 -0
  265. package/src/schedule/recurrence-types.ts +67 -0
  266. package/src/schedule/schedule-store.ts +102 -57
  267. package/src/schedule/scheduler.ts +9 -6
  268. package/src/security/oauth2.ts +29 -4
  269. package/src/security/secret-allowlist.ts +46 -0
  270. package/src/skills/clawhub.ts +1 -1
  271. package/src/subagent/manager.ts +40 -8
  272. package/src/swarm/backend-claude-code.ts +64 -9
  273. package/src/swarm/worker-prompts.ts +2 -1
  274. package/src/tasks/SPEC.md +34 -28
  275. package/src/tasks/ephemeral-permissions.ts +16 -7
  276. package/src/tasks/task-compiler.ts +5 -4
  277. package/src/tasks/task-runner.ts +10 -5
  278. package/src/tasks/task-scheduler.ts +1 -1
  279. package/src/tasks/tool-sanitizer.ts +36 -0
  280. package/src/tools/assets/search.ts +4 -4
  281. package/src/tools/browser/api-map.ts +220 -0
  282. package/src/tools/browser/auto-navigate.ts +270 -0
  283. package/src/tools/browser/browser-execution.ts +2 -1
  284. package/src/tools/browser/browser-manager.ts +2 -2
  285. package/src/tools/browser/network-recorder.ts +5 -4
  286. package/src/tools/browser/x-auto-navigate.ts +207 -0
  287. package/src/tools/calls/call-end.ts +67 -0
  288. package/src/tools/calls/call-start.ts +73 -0
  289. package/src/tools/calls/call-status.ts +81 -0
  290. package/src/tools/claude-code/claude-code.ts +77 -11
  291. package/src/tools/contacts/contact-merge.ts +46 -78
  292. package/src/tools/contacts/contact-search.ts +35 -79
  293. package/src/tools/contacts/contact-upsert.ts +35 -108
  294. package/src/tools/credentials/vault.ts +21 -5
  295. package/src/tools/document/document-tool.ts +71 -144
  296. package/src/tools/executor.ts +129 -10
  297. package/src/tools/followups/followup_create.ts +46 -88
  298. package/src/tools/followups/followup_list.ts +34 -74
  299. package/src/tools/followups/followup_resolve.ts +31 -66
  300. package/src/tools/host-terminal/cli-discover.ts +2 -1
  301. package/src/tools/host-terminal/host-shell.ts +10 -0
  302. package/src/tools/memory/handlers.ts +5 -4
  303. package/src/tools/network/__tests__/web-search.test.ts +427 -0
  304. package/src/tools/network/script-proxy/__tests__/logging.test.ts +248 -0
  305. package/src/tools/network/script-proxy/__tests__/policy.test.ts +234 -0
  306. package/src/tools/network/script-proxy/__tests__/router.test.ts +76 -0
  307. package/src/tools/network/web-fetch.ts +18 -6
  308. package/src/tools/playbooks/index.ts +4 -5
  309. package/src/tools/playbooks/playbook-create.ts +3 -47
  310. package/src/tools/playbooks/playbook-delete.ts +1 -25
  311. package/src/tools/playbooks/playbook-list.ts +1 -28
  312. package/src/tools/playbooks/playbook-update.ts +3 -51
  313. package/src/tools/registry.ts +2 -4
  314. package/src/tools/reminder/reminder.ts +5 -78
  315. package/src/tools/schedule/create.ts +69 -74
  316. package/src/tools/schedule/delete.ts +21 -47
  317. package/src/tools/schedule/list.ts +55 -74
  318. package/src/tools/schedule/update.ts +77 -84
  319. package/src/tools/subagent/abort.ts +29 -58
  320. package/src/tools/subagent/message.ts +30 -63
  321. package/src/tools/subagent/read.ts +53 -84
  322. package/src/tools/subagent/spawn.ts +43 -82
  323. package/src/tools/subagent/status.ts +42 -71
  324. package/src/tools/swarm/delegate.ts +2 -1
  325. package/src/tools/tasks/index.ts +8 -6
  326. package/src/tools/tasks/task-delete.ts +69 -56
  327. package/src/tools/tasks/task-list.ts +31 -52
  328. package/src/tools/tasks/task-run.ts +74 -102
  329. package/src/tools/tasks/task-save.ts +33 -65
  330. package/src/tools/tasks/work-item-enqueue.ts +192 -134
  331. package/src/tools/tasks/work-item-list.ts +33 -78
  332. package/src/tools/tasks/work-item-remove.ts +60 -0
  333. package/src/tools/tasks/work-item-update.ts +114 -0
  334. package/src/tools/terminal/backends/native.ts +3 -1
  335. package/src/tools/tool-manifest.ts +20 -74
  336. package/src/tools/types.ts +6 -0
  337. package/src/tools/ui-surface/definitions.ts +6 -1
  338. package/src/tools/watch/screen-watch.ts +3 -1
  339. package/src/tools/watcher/create.ts +52 -98
  340. package/src/tools/watcher/delete.ts +20 -46
  341. package/src/tools/watcher/digest.ts +36 -70
  342. package/src/tools/watcher/list.ts +49 -79
  343. package/src/tools/watcher/update.ts +45 -91
  344. package/src/twitter/client.ts +690 -0
  345. package/src/twitter/session.ts +91 -0
  346. package/src/usage/types.ts +0 -1
  347. package/src/util/truncate.ts +6 -0
  348. package/src/watcher/providers/slack.ts +2 -1
  349. package/src/watcher/watcher-store.ts +3 -2
  350. package/src/work-items/work-item-store.ts +236 -2
  351. package/src/workspace/commit-message-enrichment-service.ts +284 -0
  352. package/src/workspace/commit-message-provider.ts +95 -0
  353. package/src/workspace/git-service.ts +272 -52
  354. package/src/workspace/heartbeat-service.ts +70 -13
  355. package/src/workspace/provider-commit-message-generator.ts +242 -0
  356. package/src/workspace/turn-commit.ts +100 -51
  357. package/src/tools/contacts/index.ts +0 -4
  358. package/src/tools/document/index.ts +0 -5
  359. package/src/tools/followups/index.ts +0 -3
  360. package/src/tools/subagent/index.ts +0 -5
  361. /package/src/__tests__/{memory-context-benchmark.test.ts → memory-context-benchmark.benchmark.test.ts} +0 -0
@@ -0,0 +1,207 @@
1
+ /**
2
+ * CDP-based auto-navigation for X.com.
3
+ *
4
+ * Drives Chrome through key X.com pages to trigger GraphQL API calls,
5
+ * so the NetworkRecorder captures the full API surface without manual browsing.
6
+ */
7
+
8
+ import { getLogger } from '../../util/logger.js';
9
+
10
+ const log = getLogger('x-auto-navigate');
11
+
12
+ const CDP_BASE = 'http://localhost:9222';
13
+
14
+ interface NavStep {
15
+ label: string;
16
+ url?: string;
17
+ clickSelector?: string;
18
+ }
19
+
20
+ /** Minimal CDP client — connects to one page tab. */
21
+ class MiniCDP {
22
+ private ws: WebSocket | null = null;
23
+ private nextId = 1;
24
+ private callbacks = new Map<number, { resolve: (v: unknown) => void; reject: (e: Error) => void }>();
25
+
26
+ async connect(wsUrl: string): Promise<void> {
27
+ return new Promise((resolve, reject) => {
28
+ const ws = new WebSocket(wsUrl);
29
+ ws.onopen = () => { this.ws = ws; resolve(); };
30
+ ws.onerror = (e) => reject(new Error(`CDP error: ${e}`));
31
+ ws.onclose = () => { this.ws = null; };
32
+ ws.onmessage = (event) => {
33
+ const msg = JSON.parse(String(event.data));
34
+ if (msg.id != null) {
35
+ const cb = this.callbacks.get(msg.id);
36
+ if (cb) {
37
+ this.callbacks.delete(msg.id);
38
+ msg.error ? cb.reject(new Error(msg.error.message)) : cb.resolve(msg.result);
39
+ }
40
+ }
41
+ };
42
+ });
43
+ }
44
+
45
+ async send(method: string, params?: Record<string, unknown>): Promise<unknown> {
46
+ if (!this.ws) throw new Error('Not connected');
47
+ const id = this.nextId++;
48
+ return new Promise((resolve, reject) => {
49
+ this.callbacks.set(id, { resolve, reject });
50
+ this.ws!.send(JSON.stringify({ id, method, params }));
51
+ });
52
+ }
53
+
54
+ close() { this.ws?.close(); }
55
+ }
56
+
57
+ /**
58
+ * Navigate Chrome through X.com pages to trigger GraphQL calls.
59
+ * The NetworkRecorder should already be attached and capturing.
60
+ *
61
+ * @param abortSignal Optional signal to stop navigation early.
62
+ * @returns List of step labels that completed successfully.
63
+ */
64
+ export async function navigateXPages(abortSignal?: { aborted: boolean }): Promise<string[]> {
65
+ let wsUrl: string | null = null;
66
+ try {
67
+ const res = await fetch(`${CDP_BASE}/json/list`);
68
+ if (!res.ok) {
69
+ log.warn('CDP not available for auto-navigation');
70
+ return [];
71
+ }
72
+ const targets = (await res.json()) as Array<{ type: string; url: string; webSocketDebuggerUrl: string }>;
73
+ const xTab = targets.find(
74
+ t => t.type === 'page' && (t.url.includes('x.com') || t.url.includes('twitter.com')),
75
+ );
76
+ wsUrl = xTab?.webSocketDebuggerUrl ?? targets.find(t => t.type === 'page')?.webSocketDebuggerUrl ?? null;
77
+ } catch (err) {
78
+ log.warn({ err }, 'Failed to discover Chrome tabs');
79
+ return [];
80
+ }
81
+
82
+ if (!wsUrl) {
83
+ log.warn('No Chrome tab found for auto-navigation');
84
+ return [];
85
+ }
86
+
87
+ const cdp = new MiniCDP();
88
+ try {
89
+ await cdp.connect(wsUrl);
90
+ } catch (err) {
91
+ log.warn({ err }, 'Failed to connect CDP for auto-navigation');
92
+ return [];
93
+ }
94
+
95
+ await cdp.send('Page.enable').catch(() => {});
96
+ const completed: string[] = [];
97
+
98
+ // Navigate to home first to discover the screen name
99
+ try {
100
+ await cdp.send('Page.navigate', { url: 'https://x.com/home' });
101
+ await sleep(3000);
102
+ } catch (err) {
103
+ log.warn({ err }, 'Failed to navigate to home');
104
+ cdp.close();
105
+ return [];
106
+ }
107
+
108
+ // Resolve screen name for profile-based URLs
109
+ let screenName: string | null = null;
110
+ try {
111
+ const result = await cdp.send('Runtime.evaluate', {
112
+ expression: `
113
+ (function() {
114
+ const link = document.querySelector('a[data-testid="AppTabBar_Profile_Link"]');
115
+ if (link) return link.getAttribute('href')?.replace('/', '') ?? null;
116
+ return null;
117
+ })()
118
+ `,
119
+ awaitPromise: false,
120
+ returnByValue: true,
121
+ }) as { result?: { value?: string | null } };
122
+ screenName = result?.result?.value ?? null;
123
+ } catch { /* ignore */ }
124
+
125
+ log.info({ screenName }, 'Detected screen name');
126
+
127
+ // Build steps with resolved URLs
128
+ const steps: NavStep[] = [
129
+ { label: 'Home timeline', url: 'https://x.com/home' },
130
+ { label: 'Profile', clickSelector: 'a[data-testid="AppTabBar_Profile_Link"]' },
131
+ { label: 'Tweet detail', clickSelector: 'article[data-testid="tweet"] a[href*="/status/"]' },
132
+ { label: 'Search', url: 'https://x.com/search?q=hello&src=typed_query' },
133
+ { label: 'Bookmarks', url: 'https://x.com/i/bookmarks' },
134
+ { label: 'Notifications', url: 'https://x.com/notifications' },
135
+ ];
136
+
137
+ // Add profile-based URLs if we have the screen name
138
+ if (screenName) {
139
+ steps.push(
140
+ { label: 'Likes', url: `https://x.com/${screenName}/likes` },
141
+ { label: 'Followers', url: `https://x.com/${screenName}/followers` },
142
+ { label: 'Following', url: `https://x.com/${screenName}/following` },
143
+ { label: 'Media', url: `https://x.com/${screenName}/media` },
144
+ );
145
+ }
146
+
147
+ for (const step of steps) {
148
+ if (abortSignal?.aborted) break;
149
+
150
+ log.info({ step: step.label }, 'Auto-navigate step starting');
151
+
152
+ try {
153
+ if (step.url) {
154
+ await cdp.send('Page.navigate', { url: step.url });
155
+ await sleep(3000);
156
+ }
157
+
158
+ if (step.clickSelector) {
159
+ await sleep(1500);
160
+ await clickInPage(cdp, step.clickSelector);
161
+ await sleep(2000);
162
+ }
163
+
164
+ // Scroll to trigger lazy-loaded content
165
+ await cdp.send('Runtime.evaluate', {
166
+ expression: 'window.scrollBy(0, 800)',
167
+ awaitPromise: false,
168
+ }).catch(() => {});
169
+
170
+ await sleep(2000);
171
+
172
+ completed.push(step.label);
173
+ log.info({ step: step.label }, 'Auto-navigate step completed');
174
+ } catch (err) {
175
+ log.warn({ err, step: step.label }, 'Auto-navigate step failed');
176
+ }
177
+ }
178
+
179
+ cdp.close();
180
+ log.info({ completed: completed.length, total: steps.length }, 'Auto-navigation finished');
181
+ return completed;
182
+ }
183
+
184
+ async function clickInPage(cdp: MiniCDP, selector: string): Promise<boolean> {
185
+ try {
186
+ const result = await cdp.send('Runtime.evaluate', {
187
+ expression: `
188
+ (function() {
189
+ const el = document.querySelector(${JSON.stringify(selector)});
190
+ if (!el) return false;
191
+ el.scrollIntoView({ block: 'center' });
192
+ el.click();
193
+ return true;
194
+ })()
195
+ `,
196
+ awaitPromise: false,
197
+ returnByValue: true,
198
+ }) as { result?: { value?: boolean } };
199
+ return result?.result?.value === true;
200
+ } catch {
201
+ return false;
202
+ }
203
+ }
204
+
205
+ function sleep(ms: number): Promise<void> {
206
+ return new Promise(r => setTimeout(r, ms));
207
+ }
@@ -0,0 +1,67 @@
1
+ import { RiskLevel } from '../../permissions/types.js';
2
+ import type { Tool, ToolContext, ToolExecutionResult } from '../types.js';
3
+ import type { ToolDefinition } from '../../providers/types.js';
4
+ import { registerTool } from '../registry.js';
5
+ import { cancelCall } from '../../calls/call-domain.js';
6
+
7
+ const definition: ToolDefinition = {
8
+ name: 'call_end',
9
+ description: 'End an active phone call',
10
+ input_schema: {
11
+ type: 'object',
12
+ properties: {
13
+ call_session_id: {
14
+ type: 'string',
15
+ description: 'The call session ID to end',
16
+ },
17
+ reason: {
18
+ type: 'string',
19
+ description: 'Reason for ending the call',
20
+ },
21
+ },
22
+ required: ['call_session_id'],
23
+ },
24
+ };
25
+
26
+ class CallEndTool implements Tool {
27
+ name = 'call_end';
28
+ description = definition.description;
29
+ category = 'communication';
30
+ defaultRiskLevel = RiskLevel.Medium;
31
+
32
+ getDefinition(): ToolDefinition {
33
+ return definition;
34
+ }
35
+
36
+ async execute(input: Record<string, unknown>, _context: ToolContext): Promise<ToolExecutionResult> {
37
+ const callSessionId = input.call_session_id as string | undefined;
38
+ if (!callSessionId || typeof callSessionId !== 'string') {
39
+ return { content: 'Error: call_session_id is required and must be a string', isError: true };
40
+ }
41
+
42
+ const reason = input.reason as string | undefined;
43
+
44
+ const result = await cancelCall({ callSessionId, reason });
45
+
46
+ if (!result.ok) {
47
+ // If the call already ended, report it as a non-error for the tool
48
+ if (result.status === 409) {
49
+ return { content: result.error, isError: false };
50
+ }
51
+ return { content: `Error: ${result.error}`, isError: true };
52
+ }
53
+
54
+ const lines = [
55
+ 'Call ended successfully.',
56
+ ` Call Session ID: ${callSessionId}`,
57
+ ` Status: cancelled`,
58
+ ];
59
+ if (reason) {
60
+ lines.push(` Reason: ${reason}`);
61
+ }
62
+
63
+ return { content: lines.join('\n'), isError: false };
64
+ }
65
+ }
66
+
67
+ registerTool(new CallEndTool());
@@ -0,0 +1,73 @@
1
+ import { RiskLevel } from '../../permissions/types.js';
2
+ import type { Tool, ToolContext, ToolExecutionResult } from '../types.js';
3
+ import type { ToolDefinition } from '../../providers/types.js';
4
+ import { registerTool } from '../registry.js';
5
+ import { startCall } from '../../calls/call-domain.js';
6
+ import { getConfig } from '../../config/loader.js';
7
+
8
+ const definition: ToolDefinition = {
9
+ name: 'call_start',
10
+ description:
11
+ 'Place an outbound phone call via AI voice. The assistant will converse with the callee on behalf of the user.',
12
+ input_schema: {
13
+ type: 'object',
14
+ properties: {
15
+ phone_number: {
16
+ type: 'string',
17
+ description: 'E.164 formatted phone number (e.g. +14155551234)',
18
+ },
19
+ task: {
20
+ type: 'string',
21
+ description: 'What the call should accomplish',
22
+ },
23
+ context: {
24
+ type: 'string',
25
+ description: 'Additional context for the conversation',
26
+ },
27
+ },
28
+ required: ['phone_number', 'task'],
29
+ },
30
+ };
31
+
32
+ class CallStartTool implements Tool {
33
+ name = 'call_start';
34
+ description = definition.description;
35
+ category = 'communication';
36
+ defaultRiskLevel = RiskLevel.High;
37
+
38
+ getDefinition(): ToolDefinition {
39
+ return definition;
40
+ }
41
+
42
+ async execute(input: Record<string, unknown>, context: ToolContext): Promise<ToolExecutionResult> {
43
+ if (!getConfig().calls.enabled) {
44
+ return { content: 'Error: Calls feature is disabled via configuration. Set calls.enabled to true to use this feature.', isError: true };
45
+ }
46
+
47
+ const result = await startCall({
48
+ phoneNumber: input.phone_number as string,
49
+ task: input.task as string,
50
+ context: input.context as string | undefined,
51
+ conversationId: context.conversationId,
52
+ });
53
+
54
+ if (!result.ok) {
55
+ return { content: `Error: ${result.error}`, isError: true };
56
+ }
57
+
58
+ return {
59
+ content: [
60
+ 'Call initiated successfully.',
61
+ ` Call Session ID: ${result.session.id}`,
62
+ ` Call SID: ${result.callSid}`,
63
+ ` To: ${result.session.toNumber}`,
64
+ ` Status: initiated`,
65
+ '',
66
+ 'The AI voice assistant is now placing the call. Use call_status to check progress.',
67
+ ].join('\n'),
68
+ isError: false,
69
+ };
70
+ }
71
+ }
72
+
73
+ registerTool(new CallStartTool());
@@ -0,0 +1,81 @@
1
+ import { RiskLevel } from '../../permissions/types.js';
2
+ import type { Tool, ToolContext, ToolExecutionResult } from '../types.js';
3
+ import type { ToolDefinition } from '../../providers/types.js';
4
+ import { registerTool } from '../registry.js';
5
+ import { getCallStatus } from '../../calls/call-domain.js';
6
+
7
+ const definition: ToolDefinition = {
8
+ name: 'call_status',
9
+ description: 'Check the status of an active or recent phone call',
10
+ input_schema: {
11
+ type: 'object',
12
+ properties: {
13
+ call_session_id: {
14
+ type: 'string',
15
+ description: 'Specific call session ID to check. If omitted, checks for an active call in the current conversation.',
16
+ },
17
+ },
18
+ required: [],
19
+ },
20
+ };
21
+
22
+ class CallStatusTool implements Tool {
23
+ name = 'call_status';
24
+ description = definition.description;
25
+ category = 'communication';
26
+ defaultRiskLevel = RiskLevel.Low;
27
+
28
+ getDefinition(): ToolDefinition {
29
+ return definition;
30
+ }
31
+
32
+ async execute(input: Record<string, unknown>, context: ToolContext): Promise<ToolExecutionResult> {
33
+ const callSessionId = input.call_session_id as string | undefined;
34
+
35
+ const result = getCallStatus(callSessionId, context.conversationId);
36
+
37
+ if (!result.ok) {
38
+ // When no active call is found and no specific ID was requested, it's not an error
39
+ if (!callSessionId && result.error === 'No active call found in the current conversation') {
40
+ return { content: result.error, isError: false };
41
+ }
42
+ return { content: `Error: ${result.error}`, isError: true };
43
+ }
44
+
45
+ const { session } = result;
46
+ const lines = [
47
+ `Call Session: ${session.id}`,
48
+ ` Status: ${session.status}`,
49
+ ` To: ${session.toNumber}`,
50
+ ` From: ${session.fromNumber}`,
51
+ ];
52
+
53
+ if (session.providerCallSid) {
54
+ lines.push(` Call SID: ${session.providerCallSid}`);
55
+ }
56
+
57
+ if (session.task) {
58
+ lines.push(` Task: ${session.task}`);
59
+ }
60
+
61
+ if (session.startedAt) {
62
+ const durationMs = (session.endedAt ?? Date.now()) - session.startedAt;
63
+ const durationSec = Math.round(durationMs / 1000);
64
+ lines.push(` Duration: ${durationSec}s`);
65
+ }
66
+
67
+ if (session.lastError) {
68
+ lines.push(` Last Error: ${session.lastError}`);
69
+ }
70
+
71
+ if (result.pendingQuestion) {
72
+ lines.push('');
73
+ lines.push(` Pending Question: ${result.pendingQuestion.questionText}`);
74
+ lines.push(` Question ID: ${result.pendingQuestion.id}`);
75
+ }
76
+
77
+ return { content: lines.join('\n'), isError: false };
78
+ }
79
+ }
80
+
81
+ registerTool(new CallStatusTool());
@@ -3,6 +3,7 @@ import type { Tool, ToolContext, ToolExecutionResult } from '../types.js';
3
3
  import type { ToolDefinition } from '../../providers/types.js';
4
4
  import { getConfig } from '../../config/loader.js';
5
5
  import { getLogger } from '../../util/logger.js';
6
+ import { truncate } from '../../util/truncate.js';
6
7
  import { getProfilePolicy } from '../../swarm/worker-backend.js';
7
8
  import type { WorkerProfile } from '../../swarm/worker-backend.js';
8
9
 
@@ -22,6 +23,11 @@ const APPROVAL_REQUIRED_TOOLS = new Set([
22
23
 
23
24
  const VALID_PROFILES: readonly WorkerProfile[] = ['general', 'researcher', 'coder', 'reviewer'];
24
25
 
26
+ // Maximum nesting depth for Claude Code subprocesses.
27
+ // Depth 0 = top-level assistant, depth 1 = first subprocess, etc.
28
+ const MAX_CLAUDE_CODE_DEPTH = 1;
29
+ const DEPTH_ENV_VAR = 'VELLUM_CLAUDE_CODE_DEPTH';
30
+
25
31
  export const claudeCodeTool: Tool = {
26
32
  name: 'claude_code',
27
33
  description: 'Delegate a coding task to Claude Code, an AI-powered coding agent that can read, write, and edit files, run shell commands, and perform complex multi-step software engineering tasks autonomously.',
@@ -108,7 +114,10 @@ export const claudeCodeTool: Tool = {
108
114
 
109
115
  const { query } = sdkModule;
110
116
 
111
- log.info({ prompt: prompt.slice(0, 100), workingDir, model, resume: !!resumeSessionId }, 'Starting Claude Code session');
117
+ // Collect stderr output from the Claude Code subprocess for debugging
118
+ const stderrLines: string[] = [];
119
+
120
+ log.info({ prompt: truncate(prompt, 100, ''), workingDir, model, resume: !!resumeSessionId }, 'Starting Claude Code session');
112
121
 
113
122
  // Build the canUseTool callback, enforcing profile-based restrictions
114
123
  const canUseTool: import('@anthropic-ai/claude-agent-sdk').CanUseTool = async (toolName, toolInput, _options) => {
@@ -151,6 +160,26 @@ export const claudeCodeTool: Tool = {
151
160
  }
152
161
  };
153
162
 
163
+ // Enforce nesting depth limit to prevent infinite recursion.
164
+ const currentDepth = parseInt(process.env[DEPTH_ENV_VAR] ?? '0', 10);
165
+ if (currentDepth >= MAX_CLAUDE_CODE_DEPTH) {
166
+ log.warn({ currentDepth, max: MAX_CLAUDE_CODE_DEPTH }, 'Claude Code nesting depth exceeded');
167
+ return {
168
+ content: `Error: Claude Code nesting depth exceeded (depth ${currentDepth}, max ${MAX_CLAUDE_CODE_DEPTH}). Cannot spawn another Claude Code subprocess.`,
169
+ isError: true,
170
+ };
171
+ }
172
+
173
+ // Build a clean env for the subprocess. Strip the SDK's own nesting guard
174
+ // (CLAUDECODE) so it can launch, but set our depth counter to enforce our limit.
175
+ const subprocessEnv: Record<string, string | undefined> = {
176
+ ...process.env,
177
+ ANTHROPIC_API_KEY: apiKey,
178
+ [DEPTH_ENV_VAR]: String(currentDepth + 1),
179
+ };
180
+ delete subprocessEnv.CLAUDECODE;
181
+ delete subprocessEnv.CLAUDE_CODE_ENTRYPOINT;
182
+
154
183
  // Build query options
155
184
  const queryOptions: import('@anthropic-ai/claude-agent-sdk').Options = {
156
185
  cwd: workingDir,
@@ -158,12 +187,16 @@ export const claudeCodeTool: Tool = {
158
187
  canUseTool,
159
188
  permissionMode: 'default',
160
189
  allowedTools: [...AUTO_APPROVE_TOOLS],
161
- env: {
162
- ...process.env,
163
- ANTHROPIC_API_KEY: apiKey,
164
- },
190
+ env: subprocessEnv,
165
191
  maxTurns: 50,
166
192
  persistSession: true,
193
+ stderr: (data: string) => {
194
+ const trimmed = data.trimEnd();
195
+ if (trimmed) {
196
+ stderrLines.push(trimmed);
197
+ log.debug({ stderr: trimmed }, 'Claude Code subprocess stderr');
198
+ }
199
+ },
167
200
  };
168
201
 
169
202
  if (resumeSessionId) {
@@ -179,6 +212,12 @@ export const claudeCodeTool: Tool = {
179
212
  for await (const message of conversation) {
180
213
  switch (message.type) {
181
214
  case 'assistant': {
215
+ // Check for SDK-level errors on the assistant message
216
+ if (message.error) {
217
+ log.error({ error: message.error, sessionId: message.session_id }, 'Claude Code assistant message error');
218
+ hasError = true;
219
+ resultText += `\n\n[Claude Code error: ${message.error}]`;
220
+ }
182
221
  // Extract text from assistant messages
183
222
  if (message.message?.content) {
184
223
  for (const block of message.message.content) {
@@ -193,22 +232,43 @@ export const claudeCodeTool: Tool = {
193
232
  }
194
233
  case 'result': {
195
234
  sessionId = message.session_id;
235
+ const resultMeta = {
236
+ subtype: message.subtype,
237
+ numTurns: message.num_turns,
238
+ durationMs: message.duration_ms,
239
+ costUsd: message.total_cost_usd,
240
+ stopReason: message.stop_reason,
241
+ };
242
+
196
243
  if (message.subtype === 'success') {
244
+ log.info(resultMeta, 'Claude Code session completed successfully');
197
245
  if (message.result && !resultText) {
198
246
  resultText = message.result;
199
247
  }
200
248
  } else {
201
- // Error result
249
+ // Error result — surface the subtype and details
202
250
  hasError = true;
203
251
  const errors = message.errors ?? [];
252
+ const denials = message.permission_denials ?? [];
253
+
254
+ log.error({ ...resultMeta, errors, permissionDenials: denials.length }, 'Claude Code session failed');
255
+
256
+ const parts: string[] = [];
257
+ parts.push(`[${message.subtype}] (${message.num_turns} turns, ${(message.duration_ms / 1000).toFixed(1)}s)`);
204
258
  if (errors.length > 0) {
205
- resultText += `\n\nErrors: ${errors.join(', ')}`;
259
+ parts.push(`Errors: ${errors.join('; ')}`);
260
+ }
261
+ if (denials.length > 0) {
262
+ const denialSummary = denials.map(d => `${d.tool_name}`).join(', ');
263
+ parts.push(`Permission denied: ${denialSummary}`);
206
264
  }
265
+ resultText += `\n\n${parts.join('\n')}`;
207
266
  }
208
267
  break;
209
268
  }
210
269
  default:
211
- // Ignore other message types (system, stream_event, etc.)
270
+ // Log unhandled message types at debug level for diagnostics
271
+ log.debug({ messageType: message.type }, 'Claude Code unhandled message type');
212
272
  break;
213
273
  }
214
274
  }
@@ -221,10 +281,16 @@ export const claudeCodeTool: Tool = {
221
281
  isError: hasError,
222
282
  };
223
283
  } catch (err) {
224
- const message = err instanceof Error ? err.message : String(err);
225
- log.error({ err }, 'Claude Code execution failed');
284
+ const errMessage = err instanceof Error ? err.message : String(err);
285
+ const recentStderr = stderrLines.slice(-20);
286
+ log.error({ err, stderrTail: recentStderr }, 'Claude Code execution failed');
287
+
288
+ const parts = [`Claude Code error: ${errMessage}`];
289
+ if (recentStderr.length > 0) {
290
+ parts.push(`\nSubprocess stderr (last ${recentStderr.length} lines):\n${recentStderr.join('\n')}`);
291
+ }
226
292
  return {
227
- content: `Claude Code error: ${message}`,
293
+ content: parts.join(''),
228
294
  isError: true,
229
295
  };
230
296
  }