vellum 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (361) hide show
  1. package/README.md +15 -2
  2. package/bun.lock +5 -2
  3. package/package.json +4 -2
  4. package/scripts/capture-x-graphql.ts +562 -0
  5. package/scripts/ipc/check-swift-decoder-drift.ts +2 -1
  6. package/scripts/test.sh +5 -0
  7. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +161 -34
  8. package/src/__tests__/account-registry.test.ts +2 -1
  9. package/src/__tests__/agent-heartbeat-service.test.ts +250 -0
  10. package/src/__tests__/app-bundler.test.ts +12 -33
  11. package/src/__tests__/asset-materialize-tool.test.ts +16 -15
  12. package/src/__tests__/asset-search-tool.test.ts +23 -22
  13. package/src/__tests__/attachments-store.test.ts +56 -127
  14. package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +5 -4
  15. package/src/__tests__/browser-skill-endstate.test.ts +5 -8
  16. package/src/__tests__/call-bridge.test.ts +385 -0
  17. package/src/__tests__/call-constants.test.ts +40 -0
  18. package/src/__tests__/call-orchestrator.test.ts +454 -0
  19. package/src/__tests__/call-recovery.test.ts +518 -0
  20. package/src/__tests__/call-routes-http.test.ts +459 -0
  21. package/src/__tests__/call-state-machine.test.ts +143 -0
  22. package/src/__tests__/call-state.test.ts +133 -0
  23. package/src/__tests__/call-store.test.ts +691 -0
  24. package/src/__tests__/cli-discover.test.ts +1 -1
  25. package/src/__tests__/commit-message-enrichment-service.test.ts +550 -0
  26. package/src/__tests__/compaction.benchmark.test.ts +176 -0
  27. package/src/__tests__/computer-use-tools.test.ts +250 -0
  28. package/src/__tests__/config-schema.test.ts +348 -3
  29. package/src/__tests__/conflict-store.test.ts +2 -1
  30. package/src/__tests__/contacts-tools.test.ts +331 -0
  31. package/src/__tests__/conversation-store.test.ts +30 -32
  32. package/src/__tests__/credential-security-invariants.test.ts +4 -0
  33. package/src/__tests__/date-context.test.ts +373 -0
  34. package/src/__tests__/db-schedule-syntax-migration.test.ts +129 -0
  35. package/src/__tests__/doordash-session.test.ts +9 -0
  36. package/src/__tests__/fixtures/media-reuse-fixtures.ts +3 -3
  37. package/src/__tests__/followup-tools.test.ts +303 -0
  38. package/src/__tests__/handlers-twitter-config.test.ts +718 -0
  39. package/src/__tests__/intent-routing.test.ts +64 -57
  40. package/src/__tests__/ipc-roundtrip.benchmark.test.ts +237 -0
  41. package/src/__tests__/ipc-snapshot.test.ts +96 -28
  42. package/src/__tests__/llm-usage-store.test.ts +3 -8
  43. package/src/__tests__/media-generate-image.test.ts +1 -1
  44. package/src/__tests__/media-reuse-story.e2e.test.ts +7 -7
  45. package/src/__tests__/memory-retrieval.benchmark.test.ts +430 -0
  46. package/src/__tests__/parallel-tool.benchmark.test.ts +294 -0
  47. package/src/__tests__/playbook-tools.test.ts +342 -0
  48. package/src/__tests__/profile-compiler.test.ts +2 -1
  49. package/src/__tests__/provider-streaming.benchmark.test.ts +773 -0
  50. package/src/__tests__/recurrence-engine-rruleset.test.ts +78 -0
  51. package/src/__tests__/recurrence-engine.test.ts +69 -0
  52. package/src/__tests__/recurrence-types.test.ts +71 -0
  53. package/src/__tests__/registry.test.ts +17 -10
  54. package/src/__tests__/relay-server.test.ts +633 -0
  55. package/src/__tests__/reminder-store.test.ts +6 -3
  56. package/src/__tests__/reminder.test.ts +43 -77
  57. package/src/__tests__/run-orchestrator-assistant-events.test.ts +222 -0
  58. package/src/__tests__/run-orchestrator.test.ts +7 -7
  59. package/src/__tests__/runtime-attachment-metadata.test.ts +19 -20
  60. package/src/__tests__/runtime-runs-http.test.ts +5 -23
  61. package/src/__tests__/runtime-runs.test.ts +11 -11
  62. package/src/__tests__/schedule-store.test.ts +482 -0
  63. package/src/__tests__/schedule-tools.test.ts +700 -0
  64. package/src/__tests__/scheduler-recurrence.test.ts +329 -0
  65. package/src/__tests__/server-history-render.test.ts +14 -13
  66. package/src/__tests__/session-error.test.ts +28 -0
  67. package/src/__tests__/session-init.benchmark.test.ts +462 -0
  68. package/src/__tests__/session-queue.test.ts +89 -16
  69. package/src/__tests__/session-runtime-assembly.test.ts +161 -0
  70. package/src/__tests__/session-surfaces-task-progress.test.ts +104 -0
  71. package/src/__tests__/signup-e2e.test.ts +2 -1
  72. package/src/__tests__/skill-projection.benchmark.test.ts +328 -0
  73. package/src/__tests__/skill-script-runner.test.ts +159 -0
  74. package/src/__tests__/speaker-identification.test.ts +52 -0
  75. package/src/__tests__/subagent-manager-notify.test.ts +42 -10
  76. package/src/__tests__/subagent-tools.test.ts +141 -41
  77. package/src/__tests__/task-compiler.test.ts +2 -1
  78. package/src/__tests__/task-runner.test.ts +2 -1
  79. package/src/__tests__/task-scheduler.test.ts +2 -1
  80. package/src/__tests__/task-tools.test.ts +49 -56
  81. package/src/__tests__/tool-audit-listener.test.ts +1 -0
  82. package/src/__tests__/tool-domain-event-publisher.test.ts +2 -0
  83. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +500 -0
  84. package/src/__tests__/tool-executor.test.ts +13 -17
  85. package/src/__tests__/turn-commit.test.ts +273 -2
  86. package/src/__tests__/twilio-provider.test.ts +143 -0
  87. package/src/__tests__/twilio-routes.test.ts +789 -0
  88. package/src/__tests__/twitter-auth-handler.test.ts +581 -0
  89. package/src/__tests__/view-image-tool.test.ts +217 -0
  90. package/src/__tests__/workspace-git-service.test.ts +403 -0
  91. package/src/__tests__/workspace-heartbeat-service.test.ts +141 -2
  92. package/src/agent-heartbeat/agent-heartbeat-service.ts +155 -0
  93. package/src/bundler/app-bundler.ts +35 -14
  94. package/src/calls/call-bridge.ts +95 -0
  95. package/src/calls/call-constants.ts +48 -0
  96. package/src/calls/call-domain.ts +276 -0
  97. package/src/calls/call-orchestrator.ts +390 -0
  98. package/src/calls/call-recovery.ts +207 -0
  99. package/src/calls/call-state-machine.ts +68 -0
  100. package/src/calls/call-state.ts +64 -0
  101. package/src/calls/call-store.ts +416 -0
  102. package/src/calls/relay-server.ts +335 -0
  103. package/src/calls/speaker-identification.ts +213 -0
  104. package/src/calls/twilio-config.ts +34 -0
  105. package/src/calls/twilio-provider.ts +173 -0
  106. package/src/calls/twilio-routes.ts +250 -0
  107. package/src/calls/types.ts +37 -0
  108. package/src/calls/voice-provider.ts +14 -0
  109. package/src/cli/config-commands.ts +334 -0
  110. package/src/cli/core-commands.ts +776 -0
  111. package/src/cli/doordash.ts +256 -25
  112. package/src/cli/ipc-client.ts +82 -0
  113. package/src/cli/map.ts +246 -0
  114. package/src/cli/twitter.ts +575 -0
  115. package/src/cli.ts +7 -5
  116. package/src/commands/__tests__/cc-command-registry.test.ts +319 -0
  117. package/src/commands/cc-command-registry.ts +209 -0
  118. package/src/config/bundled-skills/contacts/SKILL.md +39 -0
  119. package/src/config/bundled-skills/contacts/TOOLS.json +122 -0
  120. package/src/config/bundled-skills/contacts/tools/contact-merge.ts +9 -0
  121. package/src/config/bundled-skills/contacts/tools/contact-search.ts +9 -0
  122. package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +9 -0
  123. package/src/config/bundled-skills/document/SKILL.md +18 -0
  124. package/src/config/bundled-skills/document/TOOLS.json +53 -0
  125. package/src/config/bundled-skills/document/tools/document-create.ts +9 -0
  126. package/src/config/bundled-skills/document/tools/document-update.ts +9 -0
  127. package/src/config/bundled-skills/doordash/SKILL.md +163 -0
  128. package/src/config/bundled-skills/followups/SKILL.md +32 -0
  129. package/src/config/bundled-skills/followups/TOOLS.json +100 -0
  130. package/src/config/bundled-skills/followups/tools/followup-create.ts +9 -0
  131. package/src/config/bundled-skills/followups/tools/followup-list.ts +9 -0
  132. package/src/config/bundled-skills/followups/tools/followup-resolve.ts +9 -0
  133. package/src/config/bundled-skills/image-studio/TOOLS.json +2 -2
  134. package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +2 -24
  135. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +2 -1
  136. package/src/config/bundled-skills/playbooks/SKILL.md +31 -0
  137. package/src/config/bundled-skills/playbooks/TOOLS.json +126 -0
  138. package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +9 -0
  139. package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +9 -0
  140. package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +9 -0
  141. package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +9 -0
  142. package/src/config/bundled-skills/reminder/SKILL.md +20 -0
  143. package/src/config/bundled-skills/reminder/TOOLS.json +67 -0
  144. package/src/config/bundled-skills/reminder/tools/reminder-cancel.ts +9 -0
  145. package/src/config/bundled-skills/reminder/tools/reminder-create.ts +9 -0
  146. package/src/config/bundled-skills/reminder/tools/reminder-list.ts +9 -0
  147. package/src/config/bundled-skills/schedule/SKILL.md +74 -0
  148. package/src/config/bundled-skills/schedule/TOOLS.json +135 -0
  149. package/src/config/bundled-skills/schedule/tools/schedule-create.ts +9 -0
  150. package/src/config/bundled-skills/schedule/tools/schedule-delete.ts +9 -0
  151. package/src/config/bundled-skills/schedule/tools/schedule-list.ts +9 -0
  152. package/src/config/bundled-skills/schedule/tools/schedule-update.ts +9 -0
  153. package/src/config/bundled-skills/subagent/SKILL.md +25 -0
  154. package/src/config/bundled-skills/subagent/TOOLS.json +107 -0
  155. package/src/config/bundled-skills/subagent/tools/subagent-abort.ts +9 -0
  156. package/src/config/bundled-skills/subagent/tools/subagent-message.ts +9 -0
  157. package/src/config/bundled-skills/subagent/tools/subagent-read.ts +9 -0
  158. package/src/config/bundled-skills/subagent/tools/subagent-spawn.ts +9 -0
  159. package/src/config/bundled-skills/subagent/tools/subagent-status.ts +9 -0
  160. package/src/config/bundled-skills/tasks/SKILL.md +28 -0
  161. package/src/config/bundled-skills/tasks/TOOLS.json +256 -0
  162. package/src/config/bundled-skills/tasks/tools/task-delete.ts +9 -0
  163. package/src/config/bundled-skills/tasks/tools/task-list-add.ts +9 -0
  164. package/src/config/bundled-skills/tasks/tools/task-list-remove.ts +9 -0
  165. package/src/config/bundled-skills/tasks/tools/task-list-show.ts +9 -0
  166. package/src/config/bundled-skills/tasks/tools/task-list-update.ts +9 -0
  167. package/src/config/bundled-skills/tasks/tools/task-list.ts +9 -0
  168. package/src/config/bundled-skills/tasks/tools/task-run.ts +9 -0
  169. package/src/config/bundled-skills/tasks/tools/task-save.ts +9 -0
  170. package/src/config/bundled-skills/twitter/SKILL.md +134 -0
  171. package/src/config/bundled-skills/watcher/SKILL.md +27 -0
  172. package/src/config/bundled-skills/watcher/TOOLS.json +147 -0
  173. package/src/config/bundled-skills/watcher/tools/watcher-create.ts +9 -0
  174. package/src/config/bundled-skills/watcher/tools/watcher-delete.ts +9 -0
  175. package/src/config/bundled-skills/watcher/tools/watcher-digest.ts +9 -0
  176. package/src/config/bundled-skills/watcher/tools/watcher-list.ts +9 -0
  177. package/src/config/bundled-skills/watcher/tools/watcher-update.ts +9 -0
  178. package/src/config/defaults.ts +44 -0
  179. package/src/config/loader.ts +4 -1
  180. package/src/config/schema.ts +218 -1
  181. package/src/config/system-prompt.ts +100 -6
  182. package/src/config/templates/IDENTITY.md +7 -0
  183. package/src/config/types.ts +5 -0
  184. package/src/contacts/contact-store.ts +4 -4
  185. package/src/daemon/assistant-attachments.ts +10 -0
  186. package/src/daemon/classifier.ts +3 -1
  187. package/src/daemon/computer-use-session.ts +3 -1
  188. package/src/daemon/date-context.ts +136 -0
  189. package/src/daemon/handlers/apps.ts +16 -1
  190. package/src/daemon/handlers/browser.ts +54 -0
  191. package/src/daemon/handlers/computer-use.ts +7 -1
  192. package/src/daemon/handlers/config.ts +192 -4
  193. package/src/daemon/handlers/diagnostics.ts +5 -1
  194. package/src/daemon/handlers/documents.ts +18 -29
  195. package/src/daemon/handlers/home-base.ts +5 -1
  196. package/src/daemon/handlers/index.ts +40 -271
  197. package/src/daemon/handlers/misc.ts +9 -1
  198. package/src/daemon/handlers/publish.ts +6 -1
  199. package/src/daemon/handlers/sessions.ts +65 -12
  200. package/src/daemon/handlers/shared.ts +36 -1
  201. package/src/daemon/handlers/signing.ts +37 -0
  202. package/src/daemon/handlers/skills.ts +20 -6
  203. package/src/daemon/handlers/subagents.ts +8 -3
  204. package/src/daemon/handlers/twitter-auth.ts +169 -0
  205. package/src/daemon/handlers/work-items.ts +495 -39
  206. package/src/daemon/ipc-contract-inventory.json +40 -4
  207. package/src/daemon/ipc-contract.ts +185 -37
  208. package/src/daemon/ipc-protocol.ts +7 -2
  209. package/src/daemon/lifecycle.ts +48 -5
  210. package/src/daemon/main.ts +10 -4
  211. package/src/daemon/ride-shotgun-handler.ts +74 -10
  212. package/src/daemon/server.ts +144 -29
  213. package/src/daemon/session-agent-loop.ts +887 -0
  214. package/src/daemon/session-attachments.ts +28 -5
  215. package/src/daemon/session-error.ts +24 -3
  216. package/src/daemon/session-lifecycle.ts +147 -0
  217. package/src/daemon/session-media-retry.ts +147 -0
  218. package/src/daemon/session-messaging.ts +145 -0
  219. package/src/daemon/session-notifiers.ts +164 -0
  220. package/src/daemon/session-process.ts +2 -2
  221. package/src/daemon/session-queue-manager.ts +1 -0
  222. package/src/daemon/session-runtime-assembly.ts +52 -0
  223. package/src/daemon/session-skill-tools.ts +124 -5
  224. package/src/daemon/session-slash.ts +3 -0
  225. package/src/daemon/session-surfaces.ts +77 -2
  226. package/src/daemon/session-tool-setup.ts +222 -2
  227. package/src/daemon/session-usage.ts +0 -2
  228. package/src/daemon/session.ts +114 -1365
  229. package/src/daemon/video-thumbnail.ts +60 -0
  230. package/src/doordash/client.ts +121 -27
  231. package/src/doordash/queries.ts +1 -2
  232. package/src/export/formatter.ts +3 -1
  233. package/src/followups/followup-store.ts +4 -2
  234. package/src/followups/types.ts +6 -0
  235. package/src/hooks/templates.ts +1 -1
  236. package/src/index.ts +32 -1151
  237. package/src/media/gemini-image-service.ts +1 -1
  238. package/src/memory/attachments-store.ts +28 -83
  239. package/src/memory/channel-delivery-store.ts +7 -21
  240. package/src/memory/clarification-resolver.ts +6 -5
  241. package/src/memory/contradiction-checker.ts +3 -2
  242. package/src/memory/conversation-key-store.ts +10 -29
  243. package/src/memory/conversation-store.ts +2 -1
  244. package/src/memory/db.ts +362 -2
  245. package/src/memory/entity-extractor.ts +6 -3
  246. package/src/memory/items-extractor.ts +5 -4
  247. package/src/memory/jobs-store.ts +3 -2
  248. package/src/memory/llm-usage-store.ts +1 -2
  249. package/src/memory/runs-store.ts +1 -2
  250. package/src/memory/schema.ts +65 -2
  251. package/src/messaging/style-analyzer.ts +3 -2
  252. package/src/messaging/thread-summarizer.ts +8 -12
  253. package/src/messaging/triage-engine.ts +4 -2
  254. package/src/providers/openrouter/client.ts +20 -0
  255. package/src/providers/registry.ts +8 -0
  256. package/src/runtime/http-server.ts +277 -25
  257. package/src/runtime/http-types.ts +0 -2
  258. package/src/runtime/routes/attachment-routes.ts +5 -6
  259. package/src/runtime/routes/call-routes.ts +140 -0
  260. package/src/runtime/routes/channel-routes.ts +12 -19
  261. package/src/runtime/routes/conversation-routes.ts +5 -9
  262. package/src/runtime/routes/run-routes.ts +4 -8
  263. package/src/runtime/run-orchestrator.ts +39 -6
  264. package/src/schedule/recurrence-engine.ts +138 -0
  265. package/src/schedule/recurrence-types.ts +67 -0
  266. package/src/schedule/schedule-store.ts +102 -57
  267. package/src/schedule/scheduler.ts +9 -6
  268. package/src/security/oauth2.ts +29 -4
  269. package/src/security/secret-allowlist.ts +46 -0
  270. package/src/skills/clawhub.ts +1 -1
  271. package/src/subagent/manager.ts +40 -8
  272. package/src/swarm/backend-claude-code.ts +64 -9
  273. package/src/swarm/worker-prompts.ts +2 -1
  274. package/src/tasks/SPEC.md +34 -28
  275. package/src/tasks/ephemeral-permissions.ts +16 -7
  276. package/src/tasks/task-compiler.ts +5 -4
  277. package/src/tasks/task-runner.ts +10 -5
  278. package/src/tasks/task-scheduler.ts +1 -1
  279. package/src/tasks/tool-sanitizer.ts +36 -0
  280. package/src/tools/assets/search.ts +4 -4
  281. package/src/tools/browser/api-map.ts +220 -0
  282. package/src/tools/browser/auto-navigate.ts +270 -0
  283. package/src/tools/browser/browser-execution.ts +2 -1
  284. package/src/tools/browser/browser-manager.ts +2 -2
  285. package/src/tools/browser/network-recorder.ts +5 -4
  286. package/src/tools/browser/x-auto-navigate.ts +207 -0
  287. package/src/tools/calls/call-end.ts +67 -0
  288. package/src/tools/calls/call-start.ts +73 -0
  289. package/src/tools/calls/call-status.ts +81 -0
  290. package/src/tools/claude-code/claude-code.ts +77 -11
  291. package/src/tools/contacts/contact-merge.ts +46 -78
  292. package/src/tools/contacts/contact-search.ts +35 -79
  293. package/src/tools/contacts/contact-upsert.ts +35 -108
  294. package/src/tools/credentials/vault.ts +21 -5
  295. package/src/tools/document/document-tool.ts +71 -144
  296. package/src/tools/executor.ts +129 -10
  297. package/src/tools/followups/followup_create.ts +46 -88
  298. package/src/tools/followups/followup_list.ts +34 -74
  299. package/src/tools/followups/followup_resolve.ts +31 -66
  300. package/src/tools/host-terminal/cli-discover.ts +2 -1
  301. package/src/tools/host-terminal/host-shell.ts +10 -0
  302. package/src/tools/memory/handlers.ts +5 -4
  303. package/src/tools/network/__tests__/web-search.test.ts +427 -0
  304. package/src/tools/network/script-proxy/__tests__/logging.test.ts +248 -0
  305. package/src/tools/network/script-proxy/__tests__/policy.test.ts +234 -0
  306. package/src/tools/network/script-proxy/__tests__/router.test.ts +76 -0
  307. package/src/tools/network/web-fetch.ts +18 -6
  308. package/src/tools/playbooks/index.ts +4 -5
  309. package/src/tools/playbooks/playbook-create.ts +3 -47
  310. package/src/tools/playbooks/playbook-delete.ts +1 -25
  311. package/src/tools/playbooks/playbook-list.ts +1 -28
  312. package/src/tools/playbooks/playbook-update.ts +3 -51
  313. package/src/tools/registry.ts +2 -4
  314. package/src/tools/reminder/reminder.ts +5 -78
  315. package/src/tools/schedule/create.ts +69 -74
  316. package/src/tools/schedule/delete.ts +21 -47
  317. package/src/tools/schedule/list.ts +55 -74
  318. package/src/tools/schedule/update.ts +77 -84
  319. package/src/tools/subagent/abort.ts +29 -58
  320. package/src/tools/subagent/message.ts +30 -63
  321. package/src/tools/subagent/read.ts +53 -84
  322. package/src/tools/subagent/spawn.ts +43 -82
  323. package/src/tools/subagent/status.ts +42 -71
  324. package/src/tools/swarm/delegate.ts +2 -1
  325. package/src/tools/tasks/index.ts +8 -6
  326. package/src/tools/tasks/task-delete.ts +69 -56
  327. package/src/tools/tasks/task-list.ts +31 -52
  328. package/src/tools/tasks/task-run.ts +74 -102
  329. package/src/tools/tasks/task-save.ts +33 -65
  330. package/src/tools/tasks/work-item-enqueue.ts +192 -134
  331. package/src/tools/tasks/work-item-list.ts +33 -78
  332. package/src/tools/tasks/work-item-remove.ts +60 -0
  333. package/src/tools/tasks/work-item-update.ts +114 -0
  334. package/src/tools/terminal/backends/native.ts +3 -1
  335. package/src/tools/tool-manifest.ts +20 -74
  336. package/src/tools/types.ts +6 -0
  337. package/src/tools/ui-surface/definitions.ts +6 -1
  338. package/src/tools/watch/screen-watch.ts +3 -1
  339. package/src/tools/watcher/create.ts +52 -98
  340. package/src/tools/watcher/delete.ts +20 -46
  341. package/src/tools/watcher/digest.ts +36 -70
  342. package/src/tools/watcher/list.ts +49 -79
  343. package/src/tools/watcher/update.ts +45 -91
  344. package/src/twitter/client.ts +690 -0
  345. package/src/twitter/session.ts +91 -0
  346. package/src/usage/types.ts +0 -1
  347. package/src/util/truncate.ts +6 -0
  348. package/src/watcher/providers/slack.ts +2 -1
  349. package/src/watcher/watcher-store.ts +3 -2
  350. package/src/work-items/work-item-store.ts +236 -2
  351. package/src/workspace/commit-message-enrichment-service.ts +284 -0
  352. package/src/workspace/commit-message-provider.ts +95 -0
  353. package/src/workspace/git-service.ts +272 -52
  354. package/src/workspace/heartbeat-service.ts +70 -13
  355. package/src/workspace/provider-commit-message-generator.ts +242 -0
  356. package/src/workspace/turn-commit.ts +100 -51
  357. package/src/tools/contacts/index.ts +0 -4
  358. package/src/tools/document/index.ts +0 -5
  359. package/src/tools/followups/index.ts +0 -3
  360. package/src/tools/subagent/index.ts +0 -5
  361. /package/src/__tests__/{memory-context-benchmark.test.ts → memory-context-benchmark.benchmark.test.ts} +0 -0
@@ -0,0 +1,500 @@
1
+ /**
2
+ * Tool Execution Pipeline Benchmark
3
+ *
4
+ * Measures the overhead of each phase in the permission/security pipeline:
5
+ * 1. classifyRisk — risk classification
6
+ * 2. check — trust rule matching (both no-rule fallback and matched-rule paths)
7
+ * 3. scanText — secret scanning on output
8
+ * 4. ToolExecutor.execute() — full pipeline overhead with noop/slow tools
9
+ *
10
+ * Target ranges:
11
+ * - p50 pipeline overhead (classifyRisk + check) < 20ms for pre-approved tools
12
+ * - p95 pipeline overhead < 50ms
13
+ * - Overhead is constant regardless of tool execution time
14
+ * - Secret scanning < 5ms for short outputs (< 1KB)
15
+ * - Secret scanning < 50ms for large outputs (100KB)
16
+ * - ToolExecutor overhead < 20ms regardless of tool execution time
17
+ */
18
+ import { describe, test, expect, beforeAll, afterAll, mock } from 'bun:test';
19
+ import { mkdtempSync, rmSync } from 'node:fs';
20
+ import { tmpdir } from 'node:os';
21
+ import { join } from 'node:path';
22
+
23
+ const testDir = mkdtempSync(join(tmpdir(), 'tool-pipeline-bench-'));
24
+
25
+ // Local registry for ToolExecutor tests — the mock delegates to this map
26
+ // so that registerTool/getTool/getAllTools work for our benchmark tools.
27
+ const localRegistry = new Map<string, import('../tools/types.js').Tool>();
28
+
29
+ // Mocks must precede imports of modules under test.
30
+ mock.module('../util/platform.js', () => ({
31
+ getDataDir: () => testDir,
32
+ isMacOS: () => process.platform === 'darwin',
33
+ isLinux: () => process.platform === 'linux',
34
+ isWindows: () => process.platform === 'win32',
35
+ getSocketPath: () => join(testDir, 'test.sock'),
36
+ getPidPath: () => join(testDir, 'test.pid'),
37
+ getDbPath: () => join(testDir, 'test.db'),
38
+ getLogPath: () => join(testDir, 'test.log'),
39
+ ensureDataDir: () => {},
40
+ getHooksDir: () => join(testDir, 'hooks'),
41
+ }));
42
+
43
+ mock.module('../util/logger.js', () => ({
44
+ getLogger: () => new Proxy({} as Record<string, unknown>, {
45
+ get: () => () => {},
46
+ }),
47
+ isDebug: () => false,
48
+ }));
49
+
50
+ // Allow toggling between no-rule and matched-rule paths
51
+ let mockRuleResponse: import('../permissions/types.js').TrustRule | null = null;
52
+
53
+ mock.module('../permissions/trust-store.js', () => ({
54
+ addRule: () => {},
55
+ findHighestPriorityRule: () => mockRuleResponse,
56
+ clearCache: () => {},
57
+ }));
58
+
59
+ mock.module('../config/loader.js', () => ({
60
+ getConfig: () => ({
61
+ provider: 'mock-provider',
62
+ timeouts: { permissionTimeoutSec: 5, toolExecutionTimeoutSec: 120 },
63
+ permissions: { mode: 'legacy' },
64
+ skills: { load: { extraDirs: [] } },
65
+ secretDetection: { enabled: true, entropyThreshold: 4.0, action: 'warn' },
66
+ sandbox: { enabled: false },
67
+ contextWindow: {},
68
+ memory: {},
69
+ }),
70
+ }));
71
+
72
+ mock.module('../config/skills.js', () => ({
73
+ resolveSkillSelector: () => ({ skill: null }),
74
+ loadSkillCatalog: () => [],
75
+ }));
76
+
77
+ mock.module('../tools/registry.js', () => ({
78
+ getTool: (name: string) => localRegistry.get(name),
79
+ getAllTools: () => Array.from(localRegistry.values()),
80
+ registerTool: (tool: import('../tools/types.js').Tool) => { localRegistry.set(tool.name, tool); },
81
+ }));
82
+
83
+ mock.module('../hooks/manager.js', () => ({
84
+ getHookManager: () => ({
85
+ trigger: () => Promise.resolve({ blocked: false }),
86
+ }),
87
+ }));
88
+
89
+ import { classifyRisk, check } from '../permissions/checker.js';
90
+ import { scanText, DEFAULT_ENTROPY_CONFIG } from '../security/secret-scanner.js';
91
+ import { RiskLevel } from '../permissions/types.js';
92
+ import { ToolExecutor } from '../tools/executor.js';
93
+ import { PermissionPrompter } from '../permissions/prompter.js';
94
+ import type { Tool, ToolContext, ToolExecutionResult } from '../tools/types.js';
95
+
96
+ // ---------------------------------------------------------------------------
97
+ // Helpers
98
+ // ---------------------------------------------------------------------------
99
+
100
+ function percentile(values: number[], p: number): number {
101
+ const sorted = [...values].sort((a, b) => a - b);
102
+ const idx = Math.ceil((p / 100) * sorted.length) - 1;
103
+ return sorted[Math.max(0, idx)];
104
+ }
105
+
106
+ async function benchmarkAsync<T>(
107
+ fn: () => Promise<T>,
108
+ iterations: number,
109
+ ): Promise<{ timings: number[]; results: T[] }> {
110
+ const timings: number[] = [];
111
+ const results: T[] = [];
112
+ for (let i = 0; i < iterations; i++) {
113
+ const start = performance.now();
114
+ const result = await fn();
115
+ timings.push(performance.now() - start);
116
+ results.push(result);
117
+ }
118
+ return { timings, results };
119
+ }
120
+
121
+ function benchmarkSync<T>(
122
+ fn: () => T,
123
+ iterations: number,
124
+ ): { timings: number[]; results: T[] } {
125
+ const timings: number[] = [];
126
+ const results: T[] = [];
127
+ for (let i = 0; i < iterations; i++) {
128
+ const start = performance.now();
129
+ const result = fn();
130
+ timings.push(performance.now() - start);
131
+ results.push(result);
132
+ }
133
+ return { timings, results };
134
+ }
135
+
136
+ function generateLargeOutput(sizeBytes: number): string {
137
+ // Generate realistic-looking tool output with varied content
138
+ const lines: string[] = [];
139
+ const words = [
140
+ 'function', 'const', 'let', 'return', 'import', 'export',
141
+ 'class', 'interface', 'type', 'async', 'await', 'Promise',
142
+ 'string', 'number', 'boolean', 'undefined', 'null', 'void',
143
+ ];
144
+ let currentSize = 0;
145
+ while (currentSize < sizeBytes) {
146
+ const lineWords: string[] = [];
147
+ for (let w = 0; w < 10; w++) {
148
+ lineWords.push(words[Math.floor(Math.random() * words.length)]);
149
+ }
150
+ const line = lineWords.join(' ');
151
+ lines.push(line);
152
+ currentSize += line.length + 1; // +1 for newline
153
+ }
154
+ return lines.join('\n').slice(0, sizeBytes);
155
+ }
156
+
157
+ // ---------------------------------------------------------------------------
158
+ // Benchmark suite
159
+ // ---------------------------------------------------------------------------
160
+
161
+ const ITERATIONS = 100;
162
+ const WARMUP = 5;
163
+
164
+ describe('Tool execution pipeline benchmark', () => {
165
+ // Warm up the parser/modules
166
+ beforeAll(async () => {
167
+ for (let i = 0; i < WARMUP; i++) {
168
+ await classifyRisk('file_read', { path: '/tmp/test.ts' }, '/tmp');
169
+ await check('file_read', { path: '/tmp/test.ts' }, '/tmp');
170
+ scanText('no secrets here');
171
+ }
172
+ });
173
+
174
+ afterAll(() => {
175
+ try {
176
+ rmSync(testDir, { recursive: true });
177
+ } catch {
178
+ // best effort cleanup
179
+ }
180
+ });
181
+
182
+ test('classifyRisk: low-risk tool (file_read) is fast', async () => {
183
+ const { timings } = await benchmarkAsync(
184
+ () => classifyRisk('file_read', { path: '/tmp/test.ts' }, '/tmp'),
185
+ ITERATIONS,
186
+ );
187
+
188
+ const p50 = percentile(timings, 50);
189
+ const p95 = percentile(timings, 95);
190
+
191
+ expect(p50).toBeLessThan(5);
192
+ expect(p95).toBeLessThan(10);
193
+ });
194
+
195
+ test('classifyRisk: bash command classification', async () => {
196
+ const { timings, results } = await benchmarkAsync(
197
+ () => classifyRisk('bash', { command: 'ls -la /tmp' }, '/tmp'),
198
+ ITERATIONS,
199
+ );
200
+
201
+ const p50 = percentile(timings, 50);
202
+ const p95 = percentile(timings, 95);
203
+
204
+ // Bash classification involves shell parsing so it is slower
205
+ expect(p50).toBeLessThan(15);
206
+ expect(p95).toBeLessThan(40);
207
+ // Verify correctness: ls should be low risk
208
+ expect(results[0]).toBe(RiskLevel.Low);
209
+ });
210
+
211
+ test('classifyRisk: medium-risk tool (file_write)', async () => {
212
+ const { timings, results } = await benchmarkAsync(
213
+ () => classifyRisk('file_write', { path: '/tmp/out.txt' }, '/tmp'),
214
+ ITERATIONS,
215
+ );
216
+
217
+ const p50 = percentile(timings, 50);
218
+ expect(p50).toBeLessThan(5);
219
+ expect(results[0]).toBe(RiskLevel.Medium);
220
+ });
221
+
222
+ test('check: full permission check for low-risk tool', async () => {
223
+ const { timings, results } = await benchmarkAsync(
224
+ () => check('file_read', { path: '/tmp/test.ts' }, '/tmp'),
225
+ ITERATIONS,
226
+ );
227
+
228
+ const p50 = percentile(timings, 50);
229
+ const p95 = percentile(timings, 95);
230
+
231
+ // Full check includes classifyRisk + trust rule lookup
232
+ expect(p50).toBeLessThan(10);
233
+ expect(p95).toBeLessThan(20);
234
+ // Low-risk with no matching rule should auto-allow
235
+ expect(results[0].decision).toBe('allow');
236
+ });
237
+
238
+ test('check: full permission check for bash command', async () => {
239
+ const { timings, results } = await benchmarkAsync(
240
+ () => check('bash', { command: 'git status' }, '/tmp'),
241
+ ITERATIONS,
242
+ );
243
+
244
+ const p50 = percentile(timings, 50);
245
+ const p95 = percentile(timings, 95);
246
+
247
+ // Bash involves shell parsing + trust rule lookup
248
+ expect(p50).toBeLessThan(20);
249
+ expect(p95).toBeLessThan(50);
250
+ // git status is low risk, should auto-allow
251
+ expect(results[0].decision).toBe('allow');
252
+ });
253
+
254
+ test('check: matched allow-rule path for medium-risk tool', async () => {
255
+ // Exercise the code path where findHighestPriorityRule returns a matching
256
+ // allow rule, rather than always falling through to the no-rule default.
257
+ mockRuleResponse = {
258
+ id: 'bench:allow-file_write',
259
+ tool: 'file_write',
260
+ pattern: '**',
261
+ scope: '/tmp',
262
+ decision: 'allow',
263
+ priority: 90,
264
+ createdAt: Date.now(),
265
+ };
266
+
267
+ try {
268
+ const { timings, results } = await benchmarkAsync(
269
+ () => check('file_write', { path: '/tmp/out.txt' }, '/tmp'),
270
+ ITERATIONS,
271
+ );
272
+
273
+ const p50 = percentile(timings, 50);
274
+ const p95 = percentile(timings, 95);
275
+
276
+ expect(p50).toBeLessThan(10);
277
+ expect(p95).toBeLessThan(20);
278
+ // Medium-risk with a matching allow rule should auto-allow
279
+ expect(results[0].decision).toBe('allow');
280
+ expect(results[0].matchedRule?.id).toBe('bench:allow-file_write');
281
+ } finally {
282
+ mockRuleResponse = null;
283
+ }
284
+ });
285
+
286
+ test('check: permission cost is stable across different input paths', async () => {
287
+ // Verify that the permission check cost doesn't vary with input path length/complexity.
288
+ // Actual tool-execution-time independence is tested in the ToolExecutor section below.
289
+ const shortPathTimings: number[] = [];
290
+ const longPathTimings: number[] = [];
291
+
292
+ for (let i = 0; i < ITERATIONS; i++) {
293
+ const start1 = performance.now();
294
+ await check('file_read', { path: '/tmp/fast.ts' }, '/tmp');
295
+ shortPathTimings.push(performance.now() - start1);
296
+
297
+ const start2 = performance.now();
298
+ await check('file_read', { path: '/tmp/slow-complex-deeply-nested-file.ts' }, '/tmp');
299
+ longPathTimings.push(performance.now() - start2);
300
+ }
301
+
302
+ const shortP50 = percentile(shortPathTimings, 50);
303
+ const longP50 = percentile(longPathTimings, 50);
304
+
305
+ // Permission check cost should be roughly the same regardless of path length
306
+ const ratio = Math.max(shortP50, longP50) / Math.max(Math.min(shortP50, longP50), 0.001);
307
+ expect(ratio).toBeLessThan(5);
308
+ });
309
+
310
+ test('scanText: short output (< 1KB) completes quickly', () => {
311
+ const shortOutput = 'Build succeeded. 42 tests passed, 0 failed.\nTime: 1.23s';
312
+
313
+ const { timings } = benchmarkSync(
314
+ () => scanText(shortOutput, DEFAULT_ENTROPY_CONFIG),
315
+ ITERATIONS,
316
+ );
317
+
318
+ const p50 = percentile(timings, 50);
319
+ const p95 = percentile(timings, 95);
320
+
321
+ expect(p50).toBeLessThan(5);
322
+ expect(p95).toBeLessThan(10);
323
+ });
324
+
325
+ test('scanText: large output (100KB) within budget', () => {
326
+ const largeOutput = generateLargeOutput(100 * 1024);
327
+
328
+ const { timings } = benchmarkSync(
329
+ () => scanText(largeOutput, DEFAULT_ENTROPY_CONFIG),
330
+ ITERATIONS,
331
+ );
332
+
333
+ const p50 = percentile(timings, 50);
334
+ const p95 = percentile(timings, 95);
335
+
336
+ expect(p50).toBeLessThan(50);
337
+ expect(p95).toBeLessThan(100);
338
+ });
339
+
340
+ test('scanText: output with secrets is detected without excessive overhead', () => {
341
+ // Build fake secrets programmatically to avoid pre-commit hook false positives
342
+ const fakeGhToken = 'ghp_' + 'A1b2C3d4E5f6G7h8I9j0K1l2M3n4O5p6Q7r8';
343
+ const fakeConnStr = 'postgres://' + 'user:s3cret@db.host.example.com:5432/mydb';
344
+ const outputWithSecrets = [
345
+ 'Deploying to production...',
346
+ `Using API key: ${fakeGhToken}`,
347
+ `Connection: ${fakeConnStr}`,
348
+ 'Build complete.',
349
+ ].join('\n');
350
+
351
+ const { timings, results } = benchmarkSync(
352
+ () => scanText(outputWithSecrets, DEFAULT_ENTROPY_CONFIG),
353
+ ITERATIONS,
354
+ );
355
+
356
+ const p50 = percentile(timings, 50);
357
+ expect(p50).toBeLessThan(5);
358
+
359
+ // Verify detection correctness
360
+ expect(results[0].length).toBeGreaterThanOrEqual(2);
361
+ const types = results[0].map((m) => m.type);
362
+ expect(types).toContain('GitHub Token');
363
+ expect(types).toContain('Database Connection String');
364
+ });
365
+
366
+ test('combined pipeline overhead (classifyRisk + check + scanText) stays under budget', async () => {
367
+ const timings: number[] = [];
368
+
369
+ for (let i = 0; i < ITERATIONS; i++) {
370
+ const start = performance.now();
371
+
372
+ // Phase 1: Risk classification
373
+ await classifyRisk('bash', { command: 'git diff HEAD' }, '/tmp');
374
+ // Phase 2: Permission check
375
+ await check('bash', { command: 'git diff HEAD' }, '/tmp');
376
+ // Phase 3: Secret scanning on output
377
+ scanText('diff --git a/file.ts b/file.ts\n+const x = 42;\n-const x = 41;', DEFAULT_ENTROPY_CONFIG);
378
+
379
+ timings.push(performance.now() - start);
380
+ }
381
+
382
+ const p50 = percentile(timings, 50);
383
+ const p95 = percentile(timings, 95);
384
+
385
+ // Combined pipeline overhead for a pre-approved tool
386
+ expect(p50).toBeLessThan(20);
387
+ expect(p95).toBeLessThan(50);
388
+ });
389
+
390
+ // -------------------------------------------------------------------------
391
+ // ToolExecutor end-to-end overhead benchmarks
392
+ // -------------------------------------------------------------------------
393
+
394
+ describe('ToolExecutor overhead', () => {
395
+ const SLEEP_MS = 50;
396
+ // Fewer iterations for slow-tool tests to avoid timeouts (50ms * 30 = 1.5s)
397
+ const SLOW_ITERATIONS = 30;
398
+ let executor: ToolExecutor;
399
+ const toolContext: ToolContext = {
400
+ workingDir: '/tmp',
401
+ sessionId: 'bench-session',
402
+ conversationId: 'bench-conv',
403
+ };
404
+
405
+ function makeTool(name: string, sleepMs: number): Tool {
406
+ return {
407
+ name,
408
+ description: `Benchmark tool (${sleepMs}ms)`,
409
+ category: 'benchmark',
410
+ defaultRiskLevel: RiskLevel.Low,
411
+ getDefinition: () => ({
412
+ name,
413
+ description: `Benchmark tool (${sleepMs}ms)`,
414
+ input_schema: { type: 'object' as const, properties: {} },
415
+ }),
416
+ execute: async (): Promise<ToolExecutionResult> => {
417
+ if (sleepMs > 0) {
418
+ await new Promise((r) => setTimeout(r, sleepMs));
419
+ }
420
+ return { content: 'ok', isError: false };
421
+ },
422
+ };
423
+ }
424
+
425
+ beforeAll(() => {
426
+ // Auto-allow prompter (never called for low-risk tools, but required by constructor)
427
+ const prompter = new PermissionPrompter(() => {});
428
+ executor = new ToolExecutor(prompter);
429
+
430
+ const noopTool = makeTool('bench_noop', 0);
431
+ const slowTool = makeTool('bench_slow', SLEEP_MS);
432
+ localRegistry.set(noopTool.name, noopTool);
433
+ localRegistry.set(slowTool.name, slowTool);
434
+ });
435
+
436
+ test('ToolExecutor with noop tool: pipeline overhead < 20ms', async () => {
437
+ // Warmup
438
+ for (let i = 0; i < WARMUP; i++) {
439
+ await executor.execute('bench_noop', {}, toolContext);
440
+ }
441
+
442
+ const { timings } = await benchmarkAsync(
443
+ () => executor.execute('bench_noop', {}, toolContext),
444
+ ITERATIONS,
445
+ );
446
+
447
+ const p50 = percentile(timings, 50);
448
+ const p95 = percentile(timings, 95);
449
+
450
+ // Full pipeline overhead for a noop tool should be minimal
451
+ expect(p50).toBeLessThan(20);
452
+ expect(p95).toBeLessThan(50);
453
+ });
454
+
455
+ test('ToolExecutor with slow tool (50ms): overhead is constant', async () => {
456
+ // Warmup
457
+ for (let i = 0; i < WARMUP; i++) {
458
+ await executor.execute('bench_slow', {}, toolContext);
459
+ }
460
+
461
+ const { timings } = await benchmarkAsync(
462
+ () => executor.execute('bench_slow', {}, toolContext),
463
+ SLOW_ITERATIONS,
464
+ );
465
+
466
+ const p50 = percentile(timings, 50);
467
+
468
+ // Total time should be ~50ms + overhead. Pipeline overhead (total - sleep)
469
+ // should be similar to the noop case.
470
+ expect(p50).toBeGreaterThanOrEqual(SLEEP_MS);
471
+ // Total should not exceed sleep + generous overhead budget
472
+ expect(p50).toBeLessThan(SLEEP_MS + 30);
473
+ }, 10_000);
474
+
475
+ test('overhead subtraction: slow tool overhead matches noop overhead', async () => {
476
+ // Run both tools and compare pipeline overhead
477
+ const noopTimings: number[] = [];
478
+ const slowTimings: number[] = [];
479
+
480
+ for (let i = 0; i < SLOW_ITERATIONS; i++) {
481
+ const s1 = performance.now();
482
+ await executor.execute('bench_noop', {}, toolContext);
483
+ noopTimings.push(performance.now() - s1);
484
+
485
+ const s2 = performance.now();
486
+ await executor.execute('bench_slow', {}, toolContext);
487
+ slowTimings.push(performance.now() - s2);
488
+ }
489
+
490
+ const noopP50 = percentile(noopTimings, 50);
491
+ const slowP50 = percentile(slowTimings, 50);
492
+
493
+ // Overhead = slow_duration - sleep_time. Should be close to noop_duration.
494
+ const slowOverhead = slowP50 - SLEEP_MS;
495
+
496
+ // The overhead portion of the slow tool should be within 10ms of the noop total
497
+ expect(Math.abs(slowOverhead - noopP50)).toBeLessThan(10);
498
+ }, 10_000);
499
+ });
500
+ });
@@ -993,20 +993,16 @@ describe('isSideEffectTool', () => {
993
993
  expect(isSideEffectTool('account_manage')).toBe(false);
994
994
  });
995
995
 
996
- test('reminder create is a side-effect', () => {
997
- expect(isSideEffectTool('reminder', { action: 'create' })).toBe(true);
996
+ test('reminder_create is a side-effect', () => {
997
+ expect(isSideEffectTool('reminder_create')).toBe(true);
998
998
  });
999
999
 
1000
- test('reminder cancel is a side-effect', () => {
1001
- expect(isSideEffectTool('reminder', { action: 'cancel' })).toBe(true);
1000
+ test('reminder_cancel is a side-effect', () => {
1001
+ expect(isSideEffectTool('reminder_cancel')).toBe(true);
1002
1002
  });
1003
1003
 
1004
- test('reminder list is NOT a side-effect', () => {
1005
- expect(isSideEffectTool('reminder', { action: 'list' })).toBe(false);
1006
- });
1007
-
1008
- test('reminder without input is NOT a side-effect', () => {
1009
- expect(isSideEffectTool('reminder')).toBe(false);
1004
+ test('reminder_list is NOT a side-effect', () => {
1005
+ expect(isSideEffectTool('reminder_list')).toBe(false);
1010
1006
  });
1011
1007
 
1012
1008
  test('credential_store store is a side-effect', () => {
@@ -1262,7 +1258,7 @@ describe('ToolExecutor forcePromptSideEffects enforcement', () => {
1262
1258
  { name: 'document_create', input: { title: 'doc', content: 'body' } },
1263
1259
  { name: 'document_update', input: { id: 'doc-1', content: 'updated' } },
1264
1260
  { name: 'account_manage', input: { action: 'create', name: 'acct' } },
1265
- { name: 'reminder', input: { action: 'create', message: 'remind me' } },
1261
+ { name: 'reminder_create', input: { fire_at: '2030-01-01T00:00:00Z', label: 'test', message: 'remind me' } },
1266
1262
  { name: 'credential_store', input: { action: 'store', name: 'api-key', value: 'secret' } },
1267
1263
  ];
1268
1264
 
@@ -1550,13 +1546,13 @@ describe('ToolExecutor forcePromptSideEffects enforcement', () => {
1550
1546
  expect(promptCalled).toBe(false);
1551
1547
  });
1552
1548
 
1553
- test('reminder create forces prompt in private thread', async () => {
1549
+ test('reminder_create forces prompt in private thread', async () => {
1554
1550
  checkResultOverride = { decision: 'allow', reason: 'Matched trust rule' };
1555
1551
 
1556
1552
  const executor = new ToolExecutor(makeTrackingPrompter());
1557
1553
  const result = await executor.execute(
1558
- 'reminder',
1559
- { action: 'create', message: 'test reminder' },
1554
+ 'reminder_create',
1555
+ { fire_at: '2030-01-01T00:00:00Z', label: 'test', message: 'test reminder' },
1560
1556
  makeContext({ forcePromptSideEffects: true }),
1561
1557
  );
1562
1558
 
@@ -1564,13 +1560,13 @@ describe('ToolExecutor forcePromptSideEffects enforcement', () => {
1564
1560
  expect(promptCalled).toBe(true);
1565
1561
  });
1566
1562
 
1567
- test('reminder list does NOT force prompt in private thread', async () => {
1563
+ test('reminder_list does NOT force prompt in private thread', async () => {
1568
1564
  checkResultOverride = { decision: 'allow', reason: 'Matched trust rule' };
1569
1565
 
1570
1566
  const executor = new ToolExecutor(makeTrackingPrompter());
1571
1567
  const result = await executor.execute(
1572
- 'reminder',
1573
- { action: 'list' },
1568
+ 'reminder_list',
1569
+ {},
1574
1570
  makeContext({ forcePromptSideEffects: true }),
1575
1571
  );
1576
1572