vellum 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (361) hide show
  1. package/README.md +15 -2
  2. package/bun.lock +5 -2
  3. package/package.json +4 -2
  4. package/scripts/capture-x-graphql.ts +562 -0
  5. package/scripts/ipc/check-swift-decoder-drift.ts +2 -1
  6. package/scripts/test.sh +5 -0
  7. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +161 -34
  8. package/src/__tests__/account-registry.test.ts +2 -1
  9. package/src/__tests__/agent-heartbeat-service.test.ts +250 -0
  10. package/src/__tests__/app-bundler.test.ts +12 -33
  11. package/src/__tests__/asset-materialize-tool.test.ts +16 -15
  12. package/src/__tests__/asset-search-tool.test.ts +23 -22
  13. package/src/__tests__/attachments-store.test.ts +56 -127
  14. package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +5 -4
  15. package/src/__tests__/browser-skill-endstate.test.ts +5 -8
  16. package/src/__tests__/call-bridge.test.ts +385 -0
  17. package/src/__tests__/call-constants.test.ts +40 -0
  18. package/src/__tests__/call-orchestrator.test.ts +454 -0
  19. package/src/__tests__/call-recovery.test.ts +518 -0
  20. package/src/__tests__/call-routes-http.test.ts +459 -0
  21. package/src/__tests__/call-state-machine.test.ts +143 -0
  22. package/src/__tests__/call-state.test.ts +133 -0
  23. package/src/__tests__/call-store.test.ts +691 -0
  24. package/src/__tests__/cli-discover.test.ts +1 -1
  25. package/src/__tests__/commit-message-enrichment-service.test.ts +550 -0
  26. package/src/__tests__/compaction.benchmark.test.ts +176 -0
  27. package/src/__tests__/computer-use-tools.test.ts +250 -0
  28. package/src/__tests__/config-schema.test.ts +348 -3
  29. package/src/__tests__/conflict-store.test.ts +2 -1
  30. package/src/__tests__/contacts-tools.test.ts +331 -0
  31. package/src/__tests__/conversation-store.test.ts +30 -32
  32. package/src/__tests__/credential-security-invariants.test.ts +4 -0
  33. package/src/__tests__/date-context.test.ts +373 -0
  34. package/src/__tests__/db-schedule-syntax-migration.test.ts +129 -0
  35. package/src/__tests__/doordash-session.test.ts +9 -0
  36. package/src/__tests__/fixtures/media-reuse-fixtures.ts +3 -3
  37. package/src/__tests__/followup-tools.test.ts +303 -0
  38. package/src/__tests__/handlers-twitter-config.test.ts +718 -0
  39. package/src/__tests__/intent-routing.test.ts +64 -57
  40. package/src/__tests__/ipc-roundtrip.benchmark.test.ts +237 -0
  41. package/src/__tests__/ipc-snapshot.test.ts +96 -28
  42. package/src/__tests__/llm-usage-store.test.ts +3 -8
  43. package/src/__tests__/media-generate-image.test.ts +1 -1
  44. package/src/__tests__/media-reuse-story.e2e.test.ts +7 -7
  45. package/src/__tests__/memory-retrieval.benchmark.test.ts +430 -0
  46. package/src/__tests__/parallel-tool.benchmark.test.ts +294 -0
  47. package/src/__tests__/playbook-tools.test.ts +342 -0
  48. package/src/__tests__/profile-compiler.test.ts +2 -1
  49. package/src/__tests__/provider-streaming.benchmark.test.ts +773 -0
  50. package/src/__tests__/recurrence-engine-rruleset.test.ts +78 -0
  51. package/src/__tests__/recurrence-engine.test.ts +69 -0
  52. package/src/__tests__/recurrence-types.test.ts +71 -0
  53. package/src/__tests__/registry.test.ts +17 -10
  54. package/src/__tests__/relay-server.test.ts +633 -0
  55. package/src/__tests__/reminder-store.test.ts +6 -3
  56. package/src/__tests__/reminder.test.ts +43 -77
  57. package/src/__tests__/run-orchestrator-assistant-events.test.ts +222 -0
  58. package/src/__tests__/run-orchestrator.test.ts +7 -7
  59. package/src/__tests__/runtime-attachment-metadata.test.ts +19 -20
  60. package/src/__tests__/runtime-runs-http.test.ts +5 -23
  61. package/src/__tests__/runtime-runs.test.ts +11 -11
  62. package/src/__tests__/schedule-store.test.ts +482 -0
  63. package/src/__tests__/schedule-tools.test.ts +700 -0
  64. package/src/__tests__/scheduler-recurrence.test.ts +329 -0
  65. package/src/__tests__/server-history-render.test.ts +14 -13
  66. package/src/__tests__/session-error.test.ts +28 -0
  67. package/src/__tests__/session-init.benchmark.test.ts +462 -0
  68. package/src/__tests__/session-queue.test.ts +89 -16
  69. package/src/__tests__/session-runtime-assembly.test.ts +161 -0
  70. package/src/__tests__/session-surfaces-task-progress.test.ts +104 -0
  71. package/src/__tests__/signup-e2e.test.ts +2 -1
  72. package/src/__tests__/skill-projection.benchmark.test.ts +328 -0
  73. package/src/__tests__/skill-script-runner.test.ts +159 -0
  74. package/src/__tests__/speaker-identification.test.ts +52 -0
  75. package/src/__tests__/subagent-manager-notify.test.ts +42 -10
  76. package/src/__tests__/subagent-tools.test.ts +141 -41
  77. package/src/__tests__/task-compiler.test.ts +2 -1
  78. package/src/__tests__/task-runner.test.ts +2 -1
  79. package/src/__tests__/task-scheduler.test.ts +2 -1
  80. package/src/__tests__/task-tools.test.ts +49 -56
  81. package/src/__tests__/tool-audit-listener.test.ts +1 -0
  82. package/src/__tests__/tool-domain-event-publisher.test.ts +2 -0
  83. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +500 -0
  84. package/src/__tests__/tool-executor.test.ts +13 -17
  85. package/src/__tests__/turn-commit.test.ts +273 -2
  86. package/src/__tests__/twilio-provider.test.ts +143 -0
  87. package/src/__tests__/twilio-routes.test.ts +789 -0
  88. package/src/__tests__/twitter-auth-handler.test.ts +581 -0
  89. package/src/__tests__/view-image-tool.test.ts +217 -0
  90. package/src/__tests__/workspace-git-service.test.ts +403 -0
  91. package/src/__tests__/workspace-heartbeat-service.test.ts +141 -2
  92. package/src/agent-heartbeat/agent-heartbeat-service.ts +155 -0
  93. package/src/bundler/app-bundler.ts +35 -14
  94. package/src/calls/call-bridge.ts +95 -0
  95. package/src/calls/call-constants.ts +48 -0
  96. package/src/calls/call-domain.ts +276 -0
  97. package/src/calls/call-orchestrator.ts +390 -0
  98. package/src/calls/call-recovery.ts +207 -0
  99. package/src/calls/call-state-machine.ts +68 -0
  100. package/src/calls/call-state.ts +64 -0
  101. package/src/calls/call-store.ts +416 -0
  102. package/src/calls/relay-server.ts +335 -0
  103. package/src/calls/speaker-identification.ts +213 -0
  104. package/src/calls/twilio-config.ts +34 -0
  105. package/src/calls/twilio-provider.ts +173 -0
  106. package/src/calls/twilio-routes.ts +250 -0
  107. package/src/calls/types.ts +37 -0
  108. package/src/calls/voice-provider.ts +14 -0
  109. package/src/cli/config-commands.ts +334 -0
  110. package/src/cli/core-commands.ts +776 -0
  111. package/src/cli/doordash.ts +256 -25
  112. package/src/cli/ipc-client.ts +82 -0
  113. package/src/cli/map.ts +246 -0
  114. package/src/cli/twitter.ts +575 -0
  115. package/src/cli.ts +7 -5
  116. package/src/commands/__tests__/cc-command-registry.test.ts +319 -0
  117. package/src/commands/cc-command-registry.ts +209 -0
  118. package/src/config/bundled-skills/contacts/SKILL.md +39 -0
  119. package/src/config/bundled-skills/contacts/TOOLS.json +122 -0
  120. package/src/config/bundled-skills/contacts/tools/contact-merge.ts +9 -0
  121. package/src/config/bundled-skills/contacts/tools/contact-search.ts +9 -0
  122. package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +9 -0
  123. package/src/config/bundled-skills/document/SKILL.md +18 -0
  124. package/src/config/bundled-skills/document/TOOLS.json +53 -0
  125. package/src/config/bundled-skills/document/tools/document-create.ts +9 -0
  126. package/src/config/bundled-skills/document/tools/document-update.ts +9 -0
  127. package/src/config/bundled-skills/doordash/SKILL.md +163 -0
  128. package/src/config/bundled-skills/followups/SKILL.md +32 -0
  129. package/src/config/bundled-skills/followups/TOOLS.json +100 -0
  130. package/src/config/bundled-skills/followups/tools/followup-create.ts +9 -0
  131. package/src/config/bundled-skills/followups/tools/followup-list.ts +9 -0
  132. package/src/config/bundled-skills/followups/tools/followup-resolve.ts +9 -0
  133. package/src/config/bundled-skills/image-studio/TOOLS.json +2 -2
  134. package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +2 -24
  135. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +2 -1
  136. package/src/config/bundled-skills/playbooks/SKILL.md +31 -0
  137. package/src/config/bundled-skills/playbooks/TOOLS.json +126 -0
  138. package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +9 -0
  139. package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +9 -0
  140. package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +9 -0
  141. package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +9 -0
  142. package/src/config/bundled-skills/reminder/SKILL.md +20 -0
  143. package/src/config/bundled-skills/reminder/TOOLS.json +67 -0
  144. package/src/config/bundled-skills/reminder/tools/reminder-cancel.ts +9 -0
  145. package/src/config/bundled-skills/reminder/tools/reminder-create.ts +9 -0
  146. package/src/config/bundled-skills/reminder/tools/reminder-list.ts +9 -0
  147. package/src/config/bundled-skills/schedule/SKILL.md +74 -0
  148. package/src/config/bundled-skills/schedule/TOOLS.json +135 -0
  149. package/src/config/bundled-skills/schedule/tools/schedule-create.ts +9 -0
  150. package/src/config/bundled-skills/schedule/tools/schedule-delete.ts +9 -0
  151. package/src/config/bundled-skills/schedule/tools/schedule-list.ts +9 -0
  152. package/src/config/bundled-skills/schedule/tools/schedule-update.ts +9 -0
  153. package/src/config/bundled-skills/subagent/SKILL.md +25 -0
  154. package/src/config/bundled-skills/subagent/TOOLS.json +107 -0
  155. package/src/config/bundled-skills/subagent/tools/subagent-abort.ts +9 -0
  156. package/src/config/bundled-skills/subagent/tools/subagent-message.ts +9 -0
  157. package/src/config/bundled-skills/subagent/tools/subagent-read.ts +9 -0
  158. package/src/config/bundled-skills/subagent/tools/subagent-spawn.ts +9 -0
  159. package/src/config/bundled-skills/subagent/tools/subagent-status.ts +9 -0
  160. package/src/config/bundled-skills/tasks/SKILL.md +28 -0
  161. package/src/config/bundled-skills/tasks/TOOLS.json +256 -0
  162. package/src/config/bundled-skills/tasks/tools/task-delete.ts +9 -0
  163. package/src/config/bundled-skills/tasks/tools/task-list-add.ts +9 -0
  164. package/src/config/bundled-skills/tasks/tools/task-list-remove.ts +9 -0
  165. package/src/config/bundled-skills/tasks/tools/task-list-show.ts +9 -0
  166. package/src/config/bundled-skills/tasks/tools/task-list-update.ts +9 -0
  167. package/src/config/bundled-skills/tasks/tools/task-list.ts +9 -0
  168. package/src/config/bundled-skills/tasks/tools/task-run.ts +9 -0
  169. package/src/config/bundled-skills/tasks/tools/task-save.ts +9 -0
  170. package/src/config/bundled-skills/twitter/SKILL.md +134 -0
  171. package/src/config/bundled-skills/watcher/SKILL.md +27 -0
  172. package/src/config/bundled-skills/watcher/TOOLS.json +147 -0
  173. package/src/config/bundled-skills/watcher/tools/watcher-create.ts +9 -0
  174. package/src/config/bundled-skills/watcher/tools/watcher-delete.ts +9 -0
  175. package/src/config/bundled-skills/watcher/tools/watcher-digest.ts +9 -0
  176. package/src/config/bundled-skills/watcher/tools/watcher-list.ts +9 -0
  177. package/src/config/bundled-skills/watcher/tools/watcher-update.ts +9 -0
  178. package/src/config/defaults.ts +44 -0
  179. package/src/config/loader.ts +4 -1
  180. package/src/config/schema.ts +218 -1
  181. package/src/config/system-prompt.ts +100 -6
  182. package/src/config/templates/IDENTITY.md +7 -0
  183. package/src/config/types.ts +5 -0
  184. package/src/contacts/contact-store.ts +4 -4
  185. package/src/daemon/assistant-attachments.ts +10 -0
  186. package/src/daemon/classifier.ts +3 -1
  187. package/src/daemon/computer-use-session.ts +3 -1
  188. package/src/daemon/date-context.ts +136 -0
  189. package/src/daemon/handlers/apps.ts +16 -1
  190. package/src/daemon/handlers/browser.ts +54 -0
  191. package/src/daemon/handlers/computer-use.ts +7 -1
  192. package/src/daemon/handlers/config.ts +192 -4
  193. package/src/daemon/handlers/diagnostics.ts +5 -1
  194. package/src/daemon/handlers/documents.ts +18 -29
  195. package/src/daemon/handlers/home-base.ts +5 -1
  196. package/src/daemon/handlers/index.ts +40 -271
  197. package/src/daemon/handlers/misc.ts +9 -1
  198. package/src/daemon/handlers/publish.ts +6 -1
  199. package/src/daemon/handlers/sessions.ts +65 -12
  200. package/src/daemon/handlers/shared.ts +36 -1
  201. package/src/daemon/handlers/signing.ts +37 -0
  202. package/src/daemon/handlers/skills.ts +20 -6
  203. package/src/daemon/handlers/subagents.ts +8 -3
  204. package/src/daemon/handlers/twitter-auth.ts +169 -0
  205. package/src/daemon/handlers/work-items.ts +495 -39
  206. package/src/daemon/ipc-contract-inventory.json +40 -4
  207. package/src/daemon/ipc-contract.ts +185 -37
  208. package/src/daemon/ipc-protocol.ts +7 -2
  209. package/src/daemon/lifecycle.ts +48 -5
  210. package/src/daemon/main.ts +10 -4
  211. package/src/daemon/ride-shotgun-handler.ts +74 -10
  212. package/src/daemon/server.ts +144 -29
  213. package/src/daemon/session-agent-loop.ts +887 -0
  214. package/src/daemon/session-attachments.ts +28 -5
  215. package/src/daemon/session-error.ts +24 -3
  216. package/src/daemon/session-lifecycle.ts +147 -0
  217. package/src/daemon/session-media-retry.ts +147 -0
  218. package/src/daemon/session-messaging.ts +145 -0
  219. package/src/daemon/session-notifiers.ts +164 -0
  220. package/src/daemon/session-process.ts +2 -2
  221. package/src/daemon/session-queue-manager.ts +1 -0
  222. package/src/daemon/session-runtime-assembly.ts +52 -0
  223. package/src/daemon/session-skill-tools.ts +124 -5
  224. package/src/daemon/session-slash.ts +3 -0
  225. package/src/daemon/session-surfaces.ts +77 -2
  226. package/src/daemon/session-tool-setup.ts +222 -2
  227. package/src/daemon/session-usage.ts +0 -2
  228. package/src/daemon/session.ts +114 -1365
  229. package/src/daemon/video-thumbnail.ts +60 -0
  230. package/src/doordash/client.ts +121 -27
  231. package/src/doordash/queries.ts +1 -2
  232. package/src/export/formatter.ts +3 -1
  233. package/src/followups/followup-store.ts +4 -2
  234. package/src/followups/types.ts +6 -0
  235. package/src/hooks/templates.ts +1 -1
  236. package/src/index.ts +32 -1151
  237. package/src/media/gemini-image-service.ts +1 -1
  238. package/src/memory/attachments-store.ts +28 -83
  239. package/src/memory/channel-delivery-store.ts +7 -21
  240. package/src/memory/clarification-resolver.ts +6 -5
  241. package/src/memory/contradiction-checker.ts +3 -2
  242. package/src/memory/conversation-key-store.ts +10 -29
  243. package/src/memory/conversation-store.ts +2 -1
  244. package/src/memory/db.ts +362 -2
  245. package/src/memory/entity-extractor.ts +6 -3
  246. package/src/memory/items-extractor.ts +5 -4
  247. package/src/memory/jobs-store.ts +3 -2
  248. package/src/memory/llm-usage-store.ts +1 -2
  249. package/src/memory/runs-store.ts +1 -2
  250. package/src/memory/schema.ts +65 -2
  251. package/src/messaging/style-analyzer.ts +3 -2
  252. package/src/messaging/thread-summarizer.ts +8 -12
  253. package/src/messaging/triage-engine.ts +4 -2
  254. package/src/providers/openrouter/client.ts +20 -0
  255. package/src/providers/registry.ts +8 -0
  256. package/src/runtime/http-server.ts +277 -25
  257. package/src/runtime/http-types.ts +0 -2
  258. package/src/runtime/routes/attachment-routes.ts +5 -6
  259. package/src/runtime/routes/call-routes.ts +140 -0
  260. package/src/runtime/routes/channel-routes.ts +12 -19
  261. package/src/runtime/routes/conversation-routes.ts +5 -9
  262. package/src/runtime/routes/run-routes.ts +4 -8
  263. package/src/runtime/run-orchestrator.ts +39 -6
  264. package/src/schedule/recurrence-engine.ts +138 -0
  265. package/src/schedule/recurrence-types.ts +67 -0
  266. package/src/schedule/schedule-store.ts +102 -57
  267. package/src/schedule/scheduler.ts +9 -6
  268. package/src/security/oauth2.ts +29 -4
  269. package/src/security/secret-allowlist.ts +46 -0
  270. package/src/skills/clawhub.ts +1 -1
  271. package/src/subagent/manager.ts +40 -8
  272. package/src/swarm/backend-claude-code.ts +64 -9
  273. package/src/swarm/worker-prompts.ts +2 -1
  274. package/src/tasks/SPEC.md +34 -28
  275. package/src/tasks/ephemeral-permissions.ts +16 -7
  276. package/src/tasks/task-compiler.ts +5 -4
  277. package/src/tasks/task-runner.ts +10 -5
  278. package/src/tasks/task-scheduler.ts +1 -1
  279. package/src/tasks/tool-sanitizer.ts +36 -0
  280. package/src/tools/assets/search.ts +4 -4
  281. package/src/tools/browser/api-map.ts +220 -0
  282. package/src/tools/browser/auto-navigate.ts +270 -0
  283. package/src/tools/browser/browser-execution.ts +2 -1
  284. package/src/tools/browser/browser-manager.ts +2 -2
  285. package/src/tools/browser/network-recorder.ts +5 -4
  286. package/src/tools/browser/x-auto-navigate.ts +207 -0
  287. package/src/tools/calls/call-end.ts +67 -0
  288. package/src/tools/calls/call-start.ts +73 -0
  289. package/src/tools/calls/call-status.ts +81 -0
  290. package/src/tools/claude-code/claude-code.ts +77 -11
  291. package/src/tools/contacts/contact-merge.ts +46 -78
  292. package/src/tools/contacts/contact-search.ts +35 -79
  293. package/src/tools/contacts/contact-upsert.ts +35 -108
  294. package/src/tools/credentials/vault.ts +21 -5
  295. package/src/tools/document/document-tool.ts +71 -144
  296. package/src/tools/executor.ts +129 -10
  297. package/src/tools/followups/followup_create.ts +46 -88
  298. package/src/tools/followups/followup_list.ts +34 -74
  299. package/src/tools/followups/followup_resolve.ts +31 -66
  300. package/src/tools/host-terminal/cli-discover.ts +2 -1
  301. package/src/tools/host-terminal/host-shell.ts +10 -0
  302. package/src/tools/memory/handlers.ts +5 -4
  303. package/src/tools/network/__tests__/web-search.test.ts +427 -0
  304. package/src/tools/network/script-proxy/__tests__/logging.test.ts +248 -0
  305. package/src/tools/network/script-proxy/__tests__/policy.test.ts +234 -0
  306. package/src/tools/network/script-proxy/__tests__/router.test.ts +76 -0
  307. package/src/tools/network/web-fetch.ts +18 -6
  308. package/src/tools/playbooks/index.ts +4 -5
  309. package/src/tools/playbooks/playbook-create.ts +3 -47
  310. package/src/tools/playbooks/playbook-delete.ts +1 -25
  311. package/src/tools/playbooks/playbook-list.ts +1 -28
  312. package/src/tools/playbooks/playbook-update.ts +3 -51
  313. package/src/tools/registry.ts +2 -4
  314. package/src/tools/reminder/reminder.ts +5 -78
  315. package/src/tools/schedule/create.ts +69 -74
  316. package/src/tools/schedule/delete.ts +21 -47
  317. package/src/tools/schedule/list.ts +55 -74
  318. package/src/tools/schedule/update.ts +77 -84
  319. package/src/tools/subagent/abort.ts +29 -58
  320. package/src/tools/subagent/message.ts +30 -63
  321. package/src/tools/subagent/read.ts +53 -84
  322. package/src/tools/subagent/spawn.ts +43 -82
  323. package/src/tools/subagent/status.ts +42 -71
  324. package/src/tools/swarm/delegate.ts +2 -1
  325. package/src/tools/tasks/index.ts +8 -6
  326. package/src/tools/tasks/task-delete.ts +69 -56
  327. package/src/tools/tasks/task-list.ts +31 -52
  328. package/src/tools/tasks/task-run.ts +74 -102
  329. package/src/tools/tasks/task-save.ts +33 -65
  330. package/src/tools/tasks/work-item-enqueue.ts +192 -134
  331. package/src/tools/tasks/work-item-list.ts +33 -78
  332. package/src/tools/tasks/work-item-remove.ts +60 -0
  333. package/src/tools/tasks/work-item-update.ts +114 -0
  334. package/src/tools/terminal/backends/native.ts +3 -1
  335. package/src/tools/tool-manifest.ts +20 -74
  336. package/src/tools/types.ts +6 -0
  337. package/src/tools/ui-surface/definitions.ts +6 -1
  338. package/src/tools/watch/screen-watch.ts +3 -1
  339. package/src/tools/watcher/create.ts +52 -98
  340. package/src/tools/watcher/delete.ts +20 -46
  341. package/src/tools/watcher/digest.ts +36 -70
  342. package/src/tools/watcher/list.ts +49 -79
  343. package/src/tools/watcher/update.ts +45 -91
  344. package/src/twitter/client.ts +690 -0
  345. package/src/twitter/session.ts +91 -0
  346. package/src/usage/types.ts +0 -1
  347. package/src/util/truncate.ts +6 -0
  348. package/src/watcher/providers/slack.ts +2 -1
  349. package/src/watcher/watcher-store.ts +3 -2
  350. package/src/work-items/work-item-store.ts +236 -2
  351. package/src/workspace/commit-message-enrichment-service.ts +284 -0
  352. package/src/workspace/commit-message-provider.ts +95 -0
  353. package/src/workspace/git-service.ts +272 -52
  354. package/src/workspace/heartbeat-service.ts +70 -13
  355. package/src/workspace/provider-commit-message-generator.ts +242 -0
  356. package/src/workspace/turn-commit.ts +100 -51
  357. package/src/tools/contacts/index.ts +0 -4
  358. package/src/tools/document/index.ts +0 -5
  359. package/src/tools/followups/index.ts +0 -3
  360. package/src/tools/subagent/index.ts +0 -5
  361. /package/src/__tests__/{memory-context-benchmark.test.ts → memory-context-benchmark.benchmark.test.ts} +0 -0
@@ -6,7 +6,7 @@
6
6
  * keeping the constructor body focused on wiring.
7
7
  */
8
8
 
9
- import type { ToolDefinition } from '../providers/types.js';
9
+ import type { Message, ToolDefinition } from '../providers/types.js';
10
10
  import type { ToolExecutionResult, ToolLifecycleEventHandler } from '../tools/types.js';
11
11
  import type { ServerMessage, UiSurfaceShow } from './ipc-protocol.js';
12
12
  import type { ToolExecutor } from '../tools/executor.js';
@@ -26,6 +26,7 @@ import type { SurfaceSessionContext } from './session-surfaces.js';
26
26
  import { updatePublishedAppDeployment } from '../services/published-app-updater.js';
27
27
  import { registerSessionSender } from '../tools/browser/browser-screencast.js';
28
28
  import type { ProxyApprovalCallback, ProxyApprovalRequest } from '../tools/network/script-proxy/index.js';
29
+ import { projectSkillTools, type SkillProjectionCache } from './session-skill-tools.js';
29
30
 
30
31
  // ── Context Interface ────────────────────────────────────────────────
31
32
 
@@ -46,6 +47,10 @@ export interface ToolSetupContext extends SurfaceSessionContext {
46
47
  memoryPolicy: { scopeId: string; strictSideEffects: boolean };
47
48
  /** True when the session has no connected IPC client (HTTP-only path). */
48
49
  hasNoClient?: boolean;
50
+ /** When true, the session is executing a task run and must not become interactive. */
51
+ headlessLock?: boolean;
52
+ /** When set, this session is executing a task run. Used to retrieve ephemeral permission rules. */
53
+ taskRunId?: string;
49
54
  }
50
55
 
51
56
  // ── buildToolDefinitions ─────────────────────────────────────────────
@@ -65,6 +70,59 @@ export function buildToolDefinitions(): ToolDefinition[] {
65
70
  ];
66
71
  }
67
72
 
73
+ // ── DoorDash task_progress auto-update ────────────────────────────────
74
+
75
+ function isPlainObject(value: unknown): value is Record<string, unknown> {
76
+ return typeof value === 'object' && value !== null && !Array.isArray(value);
77
+ }
78
+
79
+ interface DoordashStep { label: string; status: string; detail?: string }
80
+
81
+ /**
82
+ * Map a `vellum doordash <subcommand>` to the step label it corresponds to.
83
+ */
84
+ function doordashCommandToStep(cmd: string): string | null {
85
+ if (/vellum doordash status\b/.test(cmd) || /vellum doordash refresh\b/.test(cmd) || /vellum doordash login\b/.test(cmd)) return 'Check session';
86
+ if (/vellum doordash search\b/.test(cmd) || /vellum doordash search-items\b/.test(cmd)) return 'Search restaurants';
87
+ if (/vellum doordash menu\b/.test(cmd) || /vellum doordash item\b/.test(cmd) || /vellum doordash store-search\b/.test(cmd)) return 'Browse menu';
88
+ if (/vellum doordash cart\b/.test(cmd)) return 'Add to cart';
89
+ if (/vellum doordash checkout\b/.test(cmd) || /vellum doordash payment-methods\b/.test(cmd)) return 'Add to cart';
90
+ if (/vellum doordash order\b/.test(cmd)) return 'Place order';
91
+ return null;
92
+ }
93
+
94
+ /**
95
+ * Given a completed DoorDash CLI command, return updated steps array or null if no change.
96
+ */
97
+ function updateDoordashSteps(cmd: string, steps: DoordashStep[], isError: boolean): DoordashStep[] | null {
98
+ const stepLabel = doordashCommandToStep(cmd);
99
+ if (!stepLabel) return null;
100
+
101
+ const stepIndex = steps.findIndex(s => s.label === stepLabel);
102
+ if (stepIndex < 0) return null;
103
+
104
+ const updated = steps.map((s, i) => {
105
+ if (i < stepIndex) {
106
+ // Steps before current should be completed
107
+ return s.status === 'completed' ? s : { ...s, status: 'completed' };
108
+ }
109
+ if (i === stepIndex) {
110
+ if (isError) {
111
+ // If the command failed, mark as in_progress still (will retry)
112
+ return { ...s, status: 'in_progress' };
113
+ }
114
+ return { ...s, status: 'completed' };
115
+ }
116
+ if (i === stepIndex + 1 && !isError) {
117
+ // Next step becomes waiting (user may need to respond before it starts)
118
+ return { ...s, status: 'waiting' };
119
+ }
120
+ return s;
121
+ });
122
+
123
+ return updated;
124
+ }
125
+
68
126
  // ── createToolExecutor ───────────────────────────────────────────────
69
127
 
70
128
  /**
@@ -85,11 +143,47 @@ export function createToolExecutor(
85
143
  registerSessionSender(ctx.conversationId, (msg) => ctx.sendToClient(msg));
86
144
 
87
145
  return async (name: string, input: Record<string, unknown>, onOutput?: (chunk: string) => void) => {
146
+ // Pre-execution: mark the current DoorDash step as in_progress when command starts
147
+ if (name === 'bash' || name === 'host_bash') {
148
+ const preCmd = input.command as string | undefined;
149
+ if (preCmd?.includes('vellum doordash')) {
150
+ const surfaceId = 'doordash-progress';
151
+ const stored = ctx.surfaceState.get(surfaceId);
152
+ if (stored && stored.surfaceType === 'card') {
153
+ const card = stored.data as import('./ipc-contract.js').CardSurfaceData;
154
+ if (card.template === 'task_progress' && isPlainObject(card.templateData)) {
155
+ const steps = (card.templateData as Record<string, unknown>).steps;
156
+ if (Array.isArray(steps)) {
157
+ const stepLabel = doordashCommandToStep(preCmd);
158
+ if (stepLabel) {
159
+ const stepIndex = (steps as DoordashStep[]).findIndex(s => s.label === stepLabel);
160
+ if (stepIndex >= 0 && (steps as DoordashStep[])[stepIndex].status !== 'in_progress') {
161
+ const updatedSteps = (steps as DoordashStep[]).map((s, i) =>
162
+ i === stepIndex ? { ...s, status: 'in_progress' } : s
163
+ );
164
+ const updatedTemplateData = { ...card.templateData as Record<string, unknown>, steps: updatedSteps };
165
+ const updatedData = { ...card, templateData: updatedTemplateData };
166
+ stored.data = updatedData as import('./ipc-contract.js').CardSurfaceData;
167
+ ctx.sendToClient({
168
+ type: 'ui_surface_update',
169
+ sessionId: ctx.conversationId,
170
+ surfaceId,
171
+ data: updatedData,
172
+ });
173
+ }
174
+ }
175
+ }
176
+ }
177
+ }
178
+ }
179
+ }
180
+
88
181
  const result = await executor.execute(name, input, {
89
182
  workingDir: ctx.workingDir,
90
183
  sessionId: ctx.conversationId,
91
184
  conversationId: ctx.conversationId,
92
185
  requestId: ctx.currentRequestId,
186
+ taskRunId: ctx.taskRunId,
93
187
  onOutput,
94
188
  signal: ctx.abortController?.signal,
95
189
  sandboxOverride: ctx.sandboxOverride,
@@ -114,7 +208,7 @@ export function createToolExecutor(
114
208
  });
115
209
  }
116
210
  },
117
- isInteractive: !ctx.hasNoClient,
211
+ isInteractive: !ctx.hasNoClient && !ctx.headlessLock,
118
212
  proxyToolResolver: (toolName: string, proxyInput: Record<string, unknown>) => surfaceProxyResolver(ctx, toolName, proxyInput),
119
213
  proxyApprovalCallback: createProxyApprovalCallback(prompter, ctx),
120
214
  requestSecret: async (params) => {
@@ -181,6 +275,12 @@ export function createToolExecutor(
181
275
  ctx.sendToClient({ type: 'open_tasks_window' });
182
276
  }
183
277
 
278
+ // Broadcast tasks_changed so connected clients (e.g. macOS Tasks window)
279
+ // auto-refresh when the LLM mutates the task queue via tools
280
+ if ((name === 'task_list_add' || name === 'task_list_update' || name === 'task_list_remove') && !result.isError) {
281
+ broadcastToAllClients?.({ type: 'tasks_changed' });
282
+ }
283
+
184
284
  // Auto-refresh workspace surfaces when app files are edited
185
285
  if ((name === 'app_file_edit' || name === 'app_file_write') && !result.isError) {
186
286
  const appId = input.app_id as string | undefined;
@@ -192,6 +292,74 @@ export function createToolExecutor(
192
292
  }
193
293
  }
194
294
 
295
+ // Auto-emit task_progress card on first DoorDash CLI command
296
+ if (name === 'bash' || name === 'host_bash') {
297
+ const cmd = input.command as string | undefined;
298
+ if (cmd?.includes('vellum doordash')) {
299
+ const surfaceId = 'doordash-progress';
300
+
301
+ if (!ctx.surfaceState.has(surfaceId)) {
302
+ // First DoorDash command — auto-emit the task_progress card
303
+ const data = {
304
+ title: 'Ordering from DoorDash',
305
+ body: '',
306
+ template: 'task_progress' as const,
307
+ templateData: {
308
+ title: 'Ordering from DoorDash',
309
+ status: 'in_progress',
310
+ steps: [
311
+ { label: 'Check session', status: 'in_progress' },
312
+ { label: 'Search restaurants', status: 'pending' },
313
+ { label: 'Browse menu', status: 'pending' },
314
+ { label: 'Add to cart', status: 'pending' },
315
+ { label: 'Place order', status: 'pending' },
316
+ ],
317
+ },
318
+ } satisfies import('./ipc-contract.js').CardSurfaceData;
319
+ ctx.surfaceState.set(surfaceId, { surfaceType: 'card', data });
320
+ ctx.sendToClient({
321
+ type: 'ui_surface_show',
322
+ sessionId: ctx.conversationId,
323
+ surfaceId,
324
+ surfaceType: 'card',
325
+ title: 'Ordering from DoorDash',
326
+ data,
327
+ display: 'inline',
328
+ });
329
+ ctx.currentTurnSurfaces.push({
330
+ surfaceId,
331
+ surfaceType: 'card',
332
+ title: 'Ordering from DoorDash',
333
+ data,
334
+ display: 'inline',
335
+ });
336
+ }
337
+
338
+ // Auto-update step statuses based on the command that just ran
339
+ const stored = ctx.surfaceState.get(surfaceId);
340
+ if (stored && stored.surfaceType === 'card') {
341
+ const card = stored.data as import('./ipc-contract.js').CardSurfaceData;
342
+ if (card.template === 'task_progress' && isPlainObject(card.templateData)) {
343
+ const steps = (card.templateData as Record<string, unknown>).steps;
344
+ if (Array.isArray(steps)) {
345
+ const updatedSteps = updateDoordashSteps(cmd, steps as Array<{ label: string; status: string; detail?: string }>, result.isError);
346
+ if (updatedSteps) {
347
+ const updatedTemplateData = { ...card.templateData as Record<string, unknown>, steps: updatedSteps };
348
+ const updatedData = { ...card, templateData: updatedTemplateData };
349
+ stored.data = updatedData as import('./ipc-contract.js').CardSurfaceData;
350
+ ctx.sendToClient({
351
+ type: 'ui_surface_update',
352
+ sessionId: ctx.conversationId,
353
+ surfaceId,
354
+ data: updatedData,
355
+ });
356
+ }
357
+ }
358
+ }
359
+ }
360
+ }
361
+ }
362
+
195
363
  return result;
196
364
  };
197
365
  }
@@ -284,3 +452,55 @@ export function createProxyApprovalCallback(
284
452
  || response.decision === 'always_allow_high_risk';
285
453
  };
286
454
  }
455
+
456
+ // ── createResolveToolsCallback ───────────────────────────────────────
457
+
458
+ /**
459
+ * Bundled skills that must always be active regardless of conversation
460
+ * history or explicit preactivation. Without this, their tools are
461
+ * unavailable in fresh sessions until `skill_load` is called.
462
+ */
463
+ const DEFAULT_PREACTIVATED_SKILL_IDS = ['tasks'];
464
+
465
+ /**
466
+ * Subset of Session state that the resolveTools callback reads at each
467
+ * agent turn. Properties are read lazily from this reference.
468
+ */
469
+ export interface SkillProjectionContext {
470
+ preactivatedSkillIds?: string[];
471
+ readonly skillProjectionState: Map<string, string>;
472
+ readonly skillProjectionCache: SkillProjectionCache;
473
+ readonly coreToolNames: Set<string>;
474
+ allowedToolNames?: Set<string>;
475
+ }
476
+
477
+ /**
478
+ * Build a resolveTools callback that merges base tool definitions with
479
+ * dynamically projected skill tools on each agent turn. Also updates
480
+ * allowedToolNames so newly-activated skill tools aren't blocked by
481
+ * the executor's stale gate.
482
+ */
483
+ export function createResolveToolsCallback(
484
+ toolDefs: ToolDefinition[],
485
+ ctx: SkillProjectionContext,
486
+ ): ((history: Message[]) => ToolDefinition[]) | undefined {
487
+ if (toolDefs.length === 0) return undefined;
488
+
489
+ return (history: Message[]) => {
490
+ const effectivePreactivated = [
491
+ ...DEFAULT_PREACTIVATED_SKILL_IDS,
492
+ ...(ctx.preactivatedSkillIds ?? []),
493
+ ];
494
+ const projection = projectSkillTools(history, {
495
+ preactivatedSkillIds: effectivePreactivated,
496
+ previouslyActiveSkillIds: ctx.skillProjectionState,
497
+ cache: ctx.skillProjectionCache,
498
+ });
499
+ const turnAllowed = new Set(ctx.coreToolNames);
500
+ for (const name of projection.allowedToolNames) {
501
+ turnAllowed.add(name);
502
+ }
503
+ ctx.allowedToolNames = turnAllowed;
504
+ return [...toolDefs, ...projection.toolDefinitions];
505
+ };
506
+ }
@@ -11,7 +11,6 @@ const log = getLogger('session-usage');
11
11
  export interface UsageContext {
12
12
  conversationId: string;
13
13
  providerName: string;
14
- assistantId: string | null;
15
14
  usageStats: UsageStats;
16
15
  }
17
16
 
@@ -60,7 +59,6 @@ export function recordUsage(
60
59
  outputTokens,
61
60
  cacheCreationInputTokens: null,
62
61
  cacheReadInputTokens: null,
63
- assistantId: ctx.assistantId,
64
62
  conversationId: ctx.conversationId,
65
63
  runId: null,
66
64
  requestId,