@vellumai/assistant 0.4.48 → 0.4.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (423) hide show
  1. package/ARCHITECTURE.md +26 -35
  2. package/README.md +5 -26
  3. package/docs/architecture/integrations.md +45 -41
  4. package/docs/architecture/keychain-broker.md +3 -3
  5. package/docs/architecture/memory.md +180 -119
  6. package/docs/runbook-trusted-contacts.md +3 -8
  7. package/hook-templates/debug-prompt-logger/hook.json +1 -1
  8. package/hook-templates/debug-prompt-logger/run.sh +1 -3
  9. package/package.json +2 -2
  10. package/src/__tests__/actor-token-service.test.ts +0 -1
  11. package/src/__tests__/agent-loop.test.ts +3 -1
  12. package/src/__tests__/anthropic-provider.test.ts +249 -2
  13. package/src/__tests__/approval-cascade.test.ts +796 -0
  14. package/src/__tests__/approval-primitive.test.ts +0 -1
  15. package/src/__tests__/approval-routes-http.test.ts +4 -0
  16. package/src/__tests__/assistant-attachments.test.ts +12 -34
  17. package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
  18. package/src/__tests__/assistant-feature-flag-guardrails.test.ts +76 -0
  19. package/src/__tests__/assistant-feature-flags-integration.test.ts +0 -1
  20. package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +2 -2
  21. package/src/__tests__/canonical-guardian-store.test.ts +95 -0
  22. package/src/__tests__/channel-guardian.test.ts +0 -2
  23. package/src/__tests__/channel-readiness-routes.test.ts +15 -6
  24. package/src/__tests__/channel-readiness-service.test.ts +10 -9
  25. package/src/__tests__/checker.test.ts +13 -20
  26. package/src/__tests__/computer-use-skill-manifest-regression.test.ts +1 -1
  27. package/src/__tests__/computer-use-tools.test.ts +2 -19
  28. package/src/__tests__/config-schema.test.ts +1 -68
  29. package/src/__tests__/config-watcher.test.ts +0 -1
  30. package/src/__tests__/confirmation-request-guardian-bridge.test.ts +0 -1
  31. package/src/__tests__/context-image-dimensions.test.ts +332 -0
  32. package/src/__tests__/context-memory-e2e.test.ts +11 -100
  33. package/src/__tests__/context-token-estimator.test.ts +196 -13
  34. package/src/__tests__/conversation-attention-store.test.ts +0 -1
  35. package/src/__tests__/conversation-attention-telegram.test.ts +0 -1
  36. package/src/__tests__/conversation-routes-guardian-reply.test.ts +152 -0
  37. package/src/__tests__/conversation-routes-slash-commands.test.ts +2 -0
  38. package/src/__tests__/credential-metadata-store.test.ts +64 -73
  39. package/src/__tests__/credential-security-e2e.test.ts +1 -0
  40. package/src/__tests__/credential-security-invariants.test.ts +13 -7
  41. package/src/__tests__/credential-vault-unit.test.ts +284 -49
  42. package/src/__tests__/credential-vault.test.ts +150 -16
  43. package/src/__tests__/credentials-cli.test.ts +71 -0
  44. package/src/__tests__/cu-unified-flow.test.ts +532 -0
  45. package/src/__tests__/date-context.test.ts +93 -77
  46. package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
  47. package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +0 -1
  48. package/src/__tests__/ephemeral-permissions.test.ts +3 -3
  49. package/src/__tests__/gateway-only-guard.test.ts +0 -1
  50. package/src/__tests__/guardian-action-grant-mint-consume.test.ts +0 -1
  51. package/src/__tests__/guardian-decision-primitive-canonical.test.ts +0 -1
  52. package/src/__tests__/guardian-routing-invariants.test.ts +93 -1
  53. package/src/__tests__/guardian-verification-voice-binding.test.ts +0 -1
  54. package/src/__tests__/handlers-user-message-approval-consumption.test.ts +0 -39
  55. package/src/__tests__/heartbeat-service.test.ts +0 -1
  56. package/src/__tests__/history-repair.test.ts +245 -0
  57. package/src/__tests__/host-cu-proxy.test.ts +791 -0
  58. package/src/__tests__/host-shell-tool.test.ts +27 -15
  59. package/src/__tests__/http-user-message-parity.test.ts +2 -0
  60. package/src/__tests__/ingress-url-consistency.test.ts +14 -21
  61. package/src/__tests__/integration-status.test.ts +32 -51
  62. package/src/__tests__/intent-routing.test.ts +0 -1
  63. package/src/__tests__/invite-redemption-service.test.ts +65 -1
  64. package/src/__tests__/invite-routes-http.test.ts +10 -9
  65. package/src/__tests__/keychain-broker-client.test.ts +14 -46
  66. package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
  67. package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
  68. package/src/__tests__/memory-recall-quality.test.ts +244 -407
  69. package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
  70. package/src/__tests__/memory-regressions.test.ts +477 -2841
  71. package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
  72. package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
  73. package/src/__tests__/mime-builder.test.ts +28 -0
  74. package/src/__tests__/native-web-search.test.ts +1 -0
  75. package/src/__tests__/notification-routing-intent.test.ts +0 -1
  76. package/src/__tests__/oauth-cli.test.ts +941 -15
  77. package/src/__tests__/oauth-provider-profiles.test.ts +9 -9
  78. package/src/__tests__/oauth-scope-policy.test.ts +4 -6
  79. package/src/__tests__/oauth-store.test.ts +870 -0
  80. package/src/__tests__/onboarding-starter-tasks.test.ts +0 -1
  81. package/src/__tests__/provider-error-scenarios.test.ts +0 -1
  82. package/src/__tests__/provider-streaming.benchmark.test.ts +0 -1
  83. package/src/__tests__/public-ingress-urls.test.ts +15 -21
  84. package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
  85. package/src/__tests__/recording-handler.test.ts +3 -4
  86. package/src/__tests__/registry.test.ts +2 -3
  87. package/src/__tests__/relay-server.test.ts +46 -1
  88. package/src/__tests__/runtime-events-sse.test.ts +55 -7
  89. package/src/__tests__/schedule-store.test.ts +0 -1
  90. package/src/__tests__/schedule-tools.test.ts +32 -0
  91. package/src/__tests__/scheduler-recurrence.test.ts +0 -1
  92. package/src/__tests__/scoped-approval-grants.test.ts +0 -1
  93. package/src/__tests__/scoped-grant-security-matrix.test.ts +0 -1
  94. package/src/__tests__/script-proxy-certs.test.ts +1 -1
  95. package/src/__tests__/secret-ingress-handler.test.ts +0 -1
  96. package/src/__tests__/secret-onetime-send.test.ts +1 -0
  97. package/src/__tests__/secure-keys.test.ts +7 -2
  98. package/src/__tests__/send-endpoint-busy.test.ts +24 -6
  99. package/src/__tests__/sequence-store.test.ts +0 -1
  100. package/src/__tests__/session-abort-tool-results.test.ts +1 -14
  101. package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
  102. package/src/__tests__/session-agent-loop.test.ts +19 -15
  103. package/src/__tests__/session-confirmation-signals.test.ts +1 -15
  104. package/src/__tests__/session-error.test.ts +124 -2
  105. package/src/__tests__/session-history-web-search.test.ts +918 -0
  106. package/src/__tests__/session-init.benchmark.test.ts +4 -5
  107. package/src/__tests__/session-pre-run-repair.test.ts +1 -14
  108. package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
  109. package/src/__tests__/session-queue.test.ts +37 -27
  110. package/src/__tests__/session-runtime-assembly.test.ts +54 -0
  111. package/src/__tests__/session-slash-known.test.ts +1 -15
  112. package/src/__tests__/session-slash-queue.test.ts +1 -15
  113. package/src/__tests__/session-slash-unknown.test.ts +1 -15
  114. package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
  115. package/src/__tests__/session-workspace-injection.test.ts +3 -37
  116. package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
  117. package/src/__tests__/skill-include-graph.test.ts +66 -0
  118. package/src/__tests__/skill-load-feature-flag.test.ts +0 -1
  119. package/src/__tests__/skill-load-tool.test.ts +149 -1
  120. package/src/__tests__/skill-projection-feature-flag.test.ts +0 -1
  121. package/src/__tests__/skills-install-extract.test.ts +93 -0
  122. package/src/__tests__/skills-uninstall.test.ts +1 -1
  123. package/src/__tests__/skills.test.ts +3 -3
  124. package/src/__tests__/skillssh-registry.test.ts +451 -0
  125. package/src/__tests__/slack-channel-config.test.ts +67 -3
  126. package/src/__tests__/slack-share-routes.test.ts +17 -19
  127. package/src/__tests__/system-prompt.test.ts +0 -1
  128. package/src/__tests__/telegram-invite-adapter.test.ts +18 -22
  129. package/src/__tests__/terminal-tools.test.ts +4 -3
  130. package/src/__tests__/test-support/computer-use-skill-harness.ts +3 -2
  131. package/src/__tests__/tool-approval-handler.test.ts +0 -1
  132. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -1
  133. package/src/__tests__/tool-executor-lifecycle-events.test.ts +0 -1
  134. package/src/__tests__/tool-executor-shell-integration.test.ts +0 -1
  135. package/src/__tests__/tool-executor.test.ts +0 -1
  136. package/src/__tests__/tool-grant-request-escalation.test.ts +0 -1
  137. package/src/__tests__/trust-store-pattern-matches.test.ts +29 -0
  138. package/src/__tests__/trust-store.test.ts +7 -13
  139. package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
  140. package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +0 -1
  141. package/src/__tests__/twilio-routes.test.ts +0 -16
  142. package/src/__tests__/verification-control-plane-policy.test.ts +0 -1
  143. package/src/__tests__/voice-invite-redemption.test.ts +32 -1
  144. package/src/__tests__/voice-scoped-grant-consumer.test.ts +0 -1
  145. package/src/agent/ax-tree-compaction.test.ts +286 -0
  146. package/src/agent/loop.ts +104 -131
  147. package/src/approvals/AGENTS.md +1 -1
  148. package/src/approvals/guardian-request-resolvers.ts +14 -2
  149. package/src/bundler/compiler-tools.ts +66 -2
  150. package/src/calls/call-domain.ts +133 -6
  151. package/src/calls/call-store.ts +6 -0
  152. package/src/calls/relay-server.ts +52 -18
  153. package/src/calls/relay-setup-router.ts +17 -1
  154. package/src/calls/twilio-config.ts +3 -8
  155. package/src/calls/twilio-routes.ts +1 -2
  156. package/src/calls/types.ts +3 -1
  157. package/src/calls/voice-ingress-preflight.ts +1 -1
  158. package/src/cli/commands/browser-relay.ts +18 -12
  159. package/src/cli/commands/completions.ts +0 -3
  160. package/src/cli/commands/credentials.ts +101 -15
  161. package/src/cli/commands/doctor.ts +4 -3
  162. package/src/cli/commands/mcp.ts +46 -59
  163. package/src/cli/commands/memory.ts +16 -165
  164. package/src/cli/commands/oauth/apps.ts +284 -0
  165. package/src/cli/commands/oauth/connections.ts +633 -0
  166. package/src/cli/commands/oauth/index.ts +52 -0
  167. package/src/cli/commands/oauth/providers.ts +256 -0
  168. package/src/cli/commands/sessions.ts +5 -2
  169. package/src/cli/commands/skills.ts +177 -339
  170. package/src/cli/http-client.ts +0 -20
  171. package/src/cli/main-screen.tsx +2 -2
  172. package/src/cli/program.ts +6 -11
  173. package/src/cli/reference.ts +1 -3
  174. package/src/cli.ts +4 -10
  175. package/src/config/assistant-feature-flags.ts +0 -3
  176. package/src/config/bundled-skills/_shared/CLI_RETRIEVAL_PATTERN.md +1 -1
  177. package/src/config/bundled-skills/computer-use/SKILL.md +3 -6
  178. package/src/config/bundled-skills/computer-use/TOOLS.json +23 -5
  179. package/src/config/bundled-skills/computer-use/tools/{computer-use-request-control.ts → computer-use-observe.ts} +1 -5
  180. package/src/config/bundled-skills/google-calendar/calendar-client.ts +21 -16
  181. package/src/config/bundled-skills/messaging/tools/shared.ts +1 -4
  182. package/src/config/bundled-skills/settings/SKILL.md +1 -1
  183. package/src/config/bundled-skills/settings/TOOLS.json +2 -8
  184. package/src/config/bundled-skills/settings/tools/voice-config-update.ts +5 -33
  185. package/src/config/bundled-tool-registry.ts +2 -5
  186. package/src/config/env-registry.ts +14 -83
  187. package/src/config/env.ts +11 -50
  188. package/src/config/feature-flag-registry.json +16 -16
  189. package/src/config/loader.ts +0 -6
  190. package/src/config/schema.ts +4 -13
  191. package/src/config/schemas/memory-lifecycle.ts +0 -9
  192. package/src/config/schemas/memory-processing.ts +0 -180
  193. package/src/config/schemas/memory-retrieval.ts +32 -104
  194. package/src/config/schemas/memory.ts +0 -10
  195. package/src/config/skills.ts +21 -2
  196. package/src/config/types.ts +0 -4
  197. package/src/context/image-dimensions.ts +229 -0
  198. package/src/context/token-estimator.ts +75 -12
  199. package/src/context/window-manager.ts +53 -11
  200. package/src/daemon/assistant-attachments.ts +1 -13
  201. package/src/daemon/config-watcher.ts +61 -3
  202. package/src/daemon/daemon-control.ts +1 -1
  203. package/src/daemon/date-context.ts +114 -31
  204. package/src/daemon/handlers/config-ingress.ts +8 -33
  205. package/src/daemon/handlers/config-slack-channel.ts +49 -46
  206. package/src/daemon/handlers/config-telegram.ts +32 -16
  207. package/src/daemon/handlers/sessions.ts +27 -36
  208. package/src/daemon/handlers/shared.ts +0 -130
  209. package/src/daemon/handlers/skills.ts +20 -1
  210. package/src/daemon/history-repair.ts +72 -8
  211. package/src/daemon/host-cu-proxy.ts +430 -0
  212. package/src/daemon/lifecycle.ts +67 -71
  213. package/src/daemon/mcp-reload-service.ts +2 -2
  214. package/src/daemon/message-protocol.ts +3 -0
  215. package/src/daemon/message-types/computer-use.ts +1 -129
  216. package/src/daemon/message-types/host-cu.ts +19 -0
  217. package/src/daemon/message-types/memory.ts +4 -16
  218. package/src/daemon/message-types/messages.ts +4 -0
  219. package/src/daemon/message-types/sessions.ts +4 -0
  220. package/src/daemon/server.ts +25 -21
  221. package/src/daemon/session-agent-loop-handlers.ts +40 -0
  222. package/src/daemon/session-agent-loop.ts +334 -48
  223. package/src/daemon/session-attachments.ts +1 -2
  224. package/src/daemon/session-error.ts +89 -6
  225. package/src/daemon/session-history.ts +17 -7
  226. package/src/daemon/session-media-retry.ts +6 -2
  227. package/src/daemon/session-memory.ts +69 -149
  228. package/src/daemon/session-process.ts +10 -1
  229. package/src/daemon/session-runtime-assembly.ts +49 -19
  230. package/src/daemon/session-slash.ts +1 -1
  231. package/src/daemon/session-surfaces.ts +43 -28
  232. package/src/daemon/session-tool-setup.ts +9 -10
  233. package/src/daemon/session.ts +150 -17
  234. package/src/daemon/tool-side-effects.ts +2 -8
  235. package/src/daemon/watch-handler.ts +2 -2
  236. package/src/events/tool-metrics-listener.ts +2 -2
  237. package/src/hooks/manager.ts +1 -4
  238. package/src/inbound/public-ingress-urls.ts +7 -7
  239. package/src/instrument.ts +61 -1
  240. package/src/logfire.ts +16 -5
  241. package/src/memory/admin.ts +2 -191
  242. package/src/memory/canonical-guardian-store.ts +38 -2
  243. package/src/memory/conversation-crud.ts +0 -33
  244. package/src/memory/conversation-key-store.ts +21 -0
  245. package/src/memory/conversation-queries.ts +22 -3
  246. package/src/memory/db-init.ts +32 -0
  247. package/src/memory/embedding-backend.ts +84 -8
  248. package/src/memory/embedding-types.ts +9 -1
  249. package/src/memory/indexer.ts +7 -46
  250. package/src/memory/items-extractor.ts +274 -76
  251. package/src/memory/job-handlers/backfill.ts +2 -127
  252. package/src/memory/job-handlers/cleanup.ts +2 -16
  253. package/src/memory/job-handlers/extraction.ts +2 -138
  254. package/src/memory/job-handlers/index-maintenance.ts +1 -6
  255. package/src/memory/job-handlers/summarization.ts +3 -148
  256. package/src/memory/job-utils.ts +21 -59
  257. package/src/memory/jobs-store.ts +1 -159
  258. package/src/memory/jobs-worker.ts +9 -52
  259. package/src/memory/migrations/104-core-indexes.ts +3 -3
  260. package/src/memory/migrations/149-oauth-tables.ts +62 -0
  261. package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
  262. package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
  263. package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
  264. package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
  265. package/src/memory/migrations/154-drop-fts.ts +20 -0
  266. package/src/memory/migrations/155-drop-conflicts.ts +7 -0
  267. package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
  268. package/src/memory/migrations/index.ts +8 -0
  269. package/src/memory/qdrant-client.ts +148 -51
  270. package/src/memory/raw-query.ts +1 -1
  271. package/src/memory/retriever.test.ts +294 -273
  272. package/src/memory/retriever.ts +421 -645
  273. package/src/memory/schema/calls.ts +2 -0
  274. package/src/memory/schema/index.ts +1 -0
  275. package/src/memory/schema/memory-core.ts +3 -48
  276. package/src/memory/schema/oauth.ts +67 -0
  277. package/src/memory/search/formatting.ts +263 -176
  278. package/src/memory/search/lexical.ts +1 -254
  279. package/src/memory/search/ranking.ts +0 -455
  280. package/src/memory/search/semantic.ts +100 -14
  281. package/src/memory/search/staleness.ts +47 -0
  282. package/src/memory/search/tier-classifier.ts +21 -0
  283. package/src/memory/search/types.ts +15 -77
  284. package/src/memory/task-memory-cleanup.ts +4 -6
  285. package/src/messaging/provider.ts +4 -4
  286. package/src/messaging/providers/gmail/client.ts +82 -2
  287. package/src/messaging/providers/gmail/mime-builder.ts +17 -7
  288. package/src/messaging/providers/gmail/people-client.ts +10 -10
  289. package/src/messaging/providers/telegram-bot/adapter.ts +17 -17
  290. package/src/messaging/providers/whatsapp/adapter.ts +11 -8
  291. package/src/messaging/registry.ts +2 -32
  292. package/src/notifications/copy-composer.ts +0 -5
  293. package/src/notifications/signal.ts +4 -5
  294. package/src/oauth/byo-connection.test.ts +133 -25
  295. package/src/oauth/byo-connection.ts +22 -6
  296. package/src/oauth/connect-orchestrator.ts +113 -57
  297. package/src/oauth/connect-types.ts +17 -23
  298. package/src/oauth/connection-resolver.ts +35 -11
  299. package/src/oauth/connection.ts +1 -1
  300. package/src/oauth/manual-token-connection.ts +104 -0
  301. package/src/oauth/oauth-store.ts +582 -0
  302. package/src/oauth/platform-connection.test.ts +29 -0
  303. package/src/oauth/platform-connection.ts +6 -5
  304. package/src/oauth/provider-behaviors.ts +124 -0
  305. package/src/oauth/scope-policy.ts +9 -2
  306. package/src/oauth/seed-providers.ts +167 -0
  307. package/src/oauth/token-persistence.ts +81 -77
  308. package/src/permissions/checker.ts +3 -3
  309. package/src/permissions/defaults.ts +1 -1
  310. package/src/permissions/prompter.ts +10 -1
  311. package/src/permissions/trust-store.ts +36 -1
  312. package/src/playbooks/playbook-compiler.ts +1 -1
  313. package/src/prompts/__tests__/build-cli-reference-section.test.ts +3 -1
  314. package/src/prompts/system-prompt.ts +46 -42
  315. package/src/providers/anthropic/client.ts +59 -20
  316. package/src/providers/retry.ts +1 -27
  317. package/src/providers/types.ts +7 -1
  318. package/src/runtime/AGENTS.md +9 -0
  319. package/src/runtime/auth/route-policy.ts +6 -6
  320. package/src/runtime/channel-reply-delivery.ts +0 -40
  321. package/src/runtime/gateway-client.ts +0 -7
  322. package/src/runtime/guardian-reply-router.ts +24 -22
  323. package/src/runtime/http-server.ts +10 -8
  324. package/src/runtime/http-types.ts +2 -2
  325. package/src/runtime/invite-redemption-service.ts +19 -1
  326. package/src/runtime/invite-service.ts +25 -0
  327. package/src/runtime/middleware/twilio-validation.ts +1 -11
  328. package/src/runtime/pending-interactions.ts +14 -12
  329. package/src/runtime/routes/brain-graph-routes.ts +10 -90
  330. package/src/runtime/routes/channel-delivery-routes.ts +0 -1
  331. package/src/runtime/routes/conversation-routes.ts +81 -19
  332. package/src/runtime/routes/events-routes.ts +21 -11
  333. package/src/runtime/routes/host-cu-routes.ts +97 -0
  334. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
  335. package/src/runtime/routes/inbound-stages/background-dispatch.ts +12 -111
  336. package/src/runtime/routes/integrations/slack/share.ts +6 -7
  337. package/src/runtime/routes/log-export-routes.ts +126 -8
  338. package/src/runtime/routes/memory-item-routes.test.ts +754 -0
  339. package/src/runtime/routes/memory-item-routes.ts +503 -0
  340. package/src/runtime/routes/session-management-routes.ts +3 -3
  341. package/src/runtime/routes/settings-routes.ts +55 -48
  342. package/src/runtime/routes/surface-action-routes.ts +1 -1
  343. package/src/runtime/routes/trust-rules-routes.ts +14 -0
  344. package/src/runtime/routes/watch-routes.ts +128 -0
  345. package/src/runtime/routes/workspace-routes.ts +2 -1
  346. package/src/schedule/integration-status.ts +10 -9
  347. package/src/security/credential-key.ts +0 -156
  348. package/src/security/keychain-broker-client.ts +22 -10
  349. package/src/security/oauth2.ts +1 -1
  350. package/src/security/secure-keys.ts +25 -3
  351. package/src/security/token-manager.ts +137 -64
  352. package/src/skills/catalog-install.ts +414 -0
  353. package/src/skills/include-graph.ts +32 -0
  354. package/src/skills/skillssh-registry.ts +503 -0
  355. package/src/telegram/bot-username.ts +2 -3
  356. package/src/tools/assets/search.ts +5 -1
  357. package/src/tools/browser/network-recorder.ts +1 -1
  358. package/src/tools/browser/network-recording-types.ts +1 -1
  359. package/src/tools/computer-use/definitions.ts +36 -11
  360. package/src/tools/computer-use/registry.ts +5 -6
  361. package/src/tools/credentials/broker.ts +1 -2
  362. package/src/tools/credentials/metadata-store.ts +17 -121
  363. package/src/tools/credentials/vault.ts +92 -167
  364. package/src/tools/memory/definitions.ts +4 -13
  365. package/src/tools/memory/handlers.test.ts +83 -103
  366. package/src/tools/memory/handlers.ts +50 -85
  367. package/src/tools/registry.ts +2 -7
  368. package/src/tools/schedule/create.ts +8 -1
  369. package/src/tools/schedule/update.ts +8 -1
  370. package/src/tools/skills/load.ts +85 -3
  371. package/src/tools/watch/watch-state.ts +0 -12
  372. package/src/util/logger.ts +7 -41
  373. package/src/util/platform.ts +9 -28
  374. package/src/watcher/providers/google-calendar.ts +2 -1
  375. package/src/__tests__/clarification-resolver.test.ts +0 -193
  376. package/src/__tests__/computer-use-session-compaction.test.ts +0 -143
  377. package/src/__tests__/computer-use-session-lifecycle.test.ts +0 -322
  378. package/src/__tests__/computer-use-session-working-dir.test.ts +0 -166
  379. package/src/__tests__/computer-use-skill-baseline.test.ts +0 -78
  380. package/src/__tests__/computer-use-skill-endstate.test.ts +0 -105
  381. package/src/__tests__/computer-use-skill-lifecycle-cleanup.test.ts +0 -249
  382. package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
  383. package/src/__tests__/conflict-policy.test.ts +0 -269
  384. package/src/__tests__/conflict-store.test.ts +0 -372
  385. package/src/__tests__/contradiction-checker.test.ts +0 -361
  386. package/src/__tests__/entity-extractor.test.ts +0 -211
  387. package/src/__tests__/entity-search.test.ts +0 -1117
  388. package/src/__tests__/profile-compiler.test.ts +0 -392
  389. package/src/__tests__/ride-shotgun-handler.test.ts +0 -452
  390. package/src/__tests__/session-conflict-gate.test.ts +0 -1228
  391. package/src/__tests__/session-profile-injection.test.ts +0 -557
  392. package/src/cli/commands/dev.ts +0 -129
  393. package/src/cli/commands/map.ts +0 -391
  394. package/src/cli/commands/oauth.ts +0 -77
  395. package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
  396. package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
  397. package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
  398. package/src/daemon/computer-use-session.ts +0 -1026
  399. package/src/daemon/ride-shotgun-handler.ts +0 -569
  400. package/src/daemon/session-conflict-gate.ts +0 -167
  401. package/src/daemon/session-dynamic-profile.ts +0 -77
  402. package/src/memory/clarification-resolver.ts +0 -417
  403. package/src/memory/conflict-intent.ts +0 -205
  404. package/src/memory/conflict-policy.ts +0 -127
  405. package/src/memory/conflict-store.ts +0 -410
  406. package/src/memory/contradiction-checker.ts +0 -508
  407. package/src/memory/entity-extractor.ts +0 -535
  408. package/src/memory/format-recall.ts +0 -47
  409. package/src/memory/fts-reconciler.ts +0 -165
  410. package/src/memory/job-handlers/conflict.ts +0 -200
  411. package/src/memory/profile-compiler.ts +0 -195
  412. package/src/memory/recall-cache.ts +0 -117
  413. package/src/memory/search/entity.ts +0 -535
  414. package/src/memory/search/query-expansion.test.ts +0 -70
  415. package/src/memory/search/query-expansion.ts +0 -118
  416. package/src/oauth/provider-base-urls.ts +0 -21
  417. package/src/oauth/provider-profiles.ts +0 -192
  418. package/src/prompts/computer-use-prompt.ts +0 -98
  419. package/src/runtime/routes/computer-use-routes.ts +0 -641
  420. package/src/runtime/routes/mcp-routes.ts +0 -20
  421. package/src/runtime/telegram-streaming-delivery.test.ts +0 -729
  422. package/src/runtime/telegram-streaming-delivery.ts +0 -393
  423. package/src/tools/computer-use/request-computer-control.ts +0 -56
@@ -9,6 +9,11 @@ import { loadSkillBySelector, loadSkillCatalog } from "../../config/skills.js";
9
9
  import { RiskLevel } from "../../permissions/types.js";
10
10
  import type { ToolDefinition } from "../../providers/types.js";
11
11
  import {
12
+ autoInstallFromCatalog,
13
+ resolveCatalog,
14
+ } from "../../skills/catalog-install.js";
15
+ import {
16
+ collectAllMissing,
12
17
  indexCatalogById,
13
18
  validateIncludes,
14
19
  } from "../../skills/include-graph.js";
@@ -137,7 +142,32 @@ export class SkillLoadTool implements Tool {
137
142
  };
138
143
  }
139
144
 
140
- const loaded = loadSkillBySelector(selector);
145
+ let loaded = loadSkillBySelector(selector);
146
+
147
+ // Auto-install from catalog if the skill isn't found locally
148
+ if (
149
+ !loaded.skill &&
150
+ (loaded.errorCode === "not_found" || loaded.errorCode === "empty_catalog")
151
+ ) {
152
+ try {
153
+ const installed = await autoInstallFromCatalog(selector);
154
+ if (installed) {
155
+ log.info({ skillId: selector }, "Auto-installed skill from catalog");
156
+ loaded = loadSkillBySelector(selector);
157
+ }
158
+ } catch (err) {
159
+ const installError = err instanceof Error ? err.message : String(err);
160
+ log.warn(
161
+ { err, skillId: selector },
162
+ "Auto-install from catalog failed",
163
+ );
164
+ return {
165
+ content: `Error: skill "${selector}" was found in the catalog but installation failed: ${installError}`,
166
+ isError: true,
167
+ };
168
+ }
169
+ }
170
+
141
171
  if (!loaded.skill) {
142
172
  return {
143
173
  content: `Error: ${loaded.error ?? "Failed to load skill"}`,
@@ -160,10 +190,62 @@ export class SkillLoadTool implements Tool {
160
190
  // Load catalog for include validation and child metadata output
161
191
  let catalogIndex: Map<string, SkillSummary> | undefined;
162
192
  if (skill.includes && skill.includes.length > 0) {
163
- const catalog = loadSkillCatalog();
193
+ let catalog = loadSkillCatalog();
164
194
  catalogIndex = indexCatalogById(catalog);
165
195
 
166
- // Validate recursive includes (fail-closed)
196
+ // Auto-install missing includes before validation (max 5 rounds for transitive deps)
197
+ // Defer catalog resolution until we confirm there are missing includes,
198
+ // then cache the result to avoid redundant network requests per dependency.
199
+ let remoteCatalog: Awaited<ReturnType<typeof resolveCatalog>> | undefined;
200
+
201
+ const MAX_INSTALL_ROUNDS = 5;
202
+ for (let round = 0; round < MAX_INSTALL_ROUNDS; round++) {
203
+ const missing = collectAllMissing(skill.id, catalogIndex);
204
+ if (missing.size === 0) break;
205
+
206
+ // Lazily resolve catalog on first round with missing includes
207
+ if (!remoteCatalog) {
208
+ try {
209
+ remoteCatalog = await resolveCatalog([...missing][0]);
210
+ } catch (err) {
211
+ log.warn(
212
+ { err, skillId: skill.id },
213
+ "Failed to resolve catalog for include auto-install",
214
+ );
215
+ break;
216
+ }
217
+ }
218
+
219
+ let installedAny = false;
220
+ for (const missingId of missing) {
221
+ try {
222
+ const installed = await autoInstallFromCatalog(
223
+ missingId,
224
+ remoteCatalog,
225
+ );
226
+ if (installed) {
227
+ log.info(
228
+ { skillId: missingId, parentSkillId: skill.id },
229
+ "Auto-installed missing include",
230
+ );
231
+ installedAny = true;
232
+ }
233
+ } catch (err) {
234
+ log.warn(
235
+ { err, skillId: missingId },
236
+ "Failed to auto-install missing include",
237
+ );
238
+ }
239
+ }
240
+
241
+ if (!installedAny) break; // Nothing could be installed, stop trying
242
+
243
+ // Reload catalog to pick up newly installed skills
244
+ catalog = loadSkillCatalog();
245
+ catalogIndex = indexCatalogById(catalog);
246
+ }
247
+
248
+ // Validate (fail-closed — catches genuinely missing deps + cycles)
167
249
  const validation = validateIncludes(skill.id, catalogIndex);
168
250
  if (!validation.ok) {
169
251
  if (validation.error === "missing") {
@@ -24,18 +24,6 @@ export interface WatchSession {
24
24
  timeoutHandle?: ReturnType<typeof setTimeout>;
25
25
  /** Guards against concurrent generateSummary calls */
26
26
  summaryInFlight?: boolean;
27
- /** Whether this session was started via ride shotgun (no live commentary) */
28
- isRideShotgun?: boolean;
29
- /** Learn mode records network traffic alongside screen observations */
30
- isLearnMode?: boolean;
31
- /** Domain filter for network recording in learn mode */
32
- targetDomain?: string;
33
- /** Recording ID for learn mode sessions */
34
- recordingId?: string;
35
- /** Path where the learn recording was successfully saved (undefined if save failed) */
36
- savedRecordingPath?: string;
37
- /** Reason the learn-mode bootstrap failed (CDP launch vs recorder attach) */
38
- bootstrapFailureReason?: string;
39
27
  }
40
28
 
41
29
  /** Module-level map of watch sessions keyed by watchId. */
@@ -12,11 +12,7 @@ import pino from "pino";
12
12
  import type { PrettyOptions } from "pino-pretty";
13
13
  import pinoPretty from "pino-pretty";
14
14
 
15
- import {
16
- getDebugMode,
17
- getDebugStdoutLogs,
18
- getLogStderr,
19
- } from "../config/env-registry.js";
15
+ import { getDebugStdoutLogs } from "../config/env-registry.js";
20
16
  import { logSerializers } from "./log-redact.js";
21
17
  import { getLogPath } from "./platform.js";
22
18
 
@@ -110,31 +106,18 @@ function buildRotatingLogger(config: LogFileConfig): pino.Logger {
110
106
  activeLogDate = today;
111
107
  activeLogFileConfig = config;
112
108
 
113
- const level = getDebugMode() ? "debug" : "info";
114
-
115
- if (getDebugMode()) {
116
- const prettyStream = pinoPretty(prettyOpts({ destination: 2 }));
117
- return pino(
118
- { name: "assistant", level, serializers: logSerializers },
119
- pino.multistream([
120
- { stream: fileStream, level: "info" as const },
121
- { stream: prettyStream, level: "debug" as const },
122
- ]),
123
- );
124
- }
125
-
126
109
  // When stdout is not a TTY (e.g. desktop app redirects to a hatch log file),
127
110
  // write to the rotating file only — the hatch log already captured early
128
111
  // startup output and echoing pino output there is unnecessary duplication.
129
112
  if (!process.stdout.isTTY) {
130
113
  return pino(
131
- { name: "assistant", level, serializers: logSerializers },
114
+ { name: "assistant", level: "info", serializers: logSerializers },
132
115
  fileStream,
133
116
  );
134
117
  }
135
118
 
136
119
  return pino(
137
- { name: "assistant", level, serializers: logSerializers },
120
+ { name: "assistant", level: "info", serializers: logSerializers },
138
121
  pino.multistream([
139
122
  { stream: fileStream, level: "info" as const },
140
123
  {
@@ -173,13 +156,11 @@ function getRootLogger(): pino.Logger {
173
156
  }
174
157
  if (!rootLogger) {
175
158
  const forceStderr =
176
- process.env.BUN_TEST === "1" ||
177
- process.env.NODE_ENV === "test" ||
178
- getLogStderr();
159
+ process.env.BUN_TEST === "1" || process.env.NODE_ENV === "test";
179
160
  if (forceStderr) {
180
161
  rootLogger = pino(
181
162
  {
182
- level: getDebugMode() ? "debug" : "info",
163
+ level: "info",
183
164
  serializers: logSerializers,
184
165
  },
185
166
  pino.destination(2),
@@ -208,17 +189,7 @@ function getRootLogger(): pino.Logger {
208
189
  prettyOpts({ destination: fileDest, colorize: false }),
209
190
  );
210
191
 
211
- if (getDebugMode()) {
212
- const prettyStream = pinoPretty(prettyOpts({ destination: 2 }));
213
- const multi = pino.multistream([
214
- { stream: fileStream, level: "info" as const },
215
- { stream: prettyStream, level: "debug" as const },
216
- ]);
217
- rootLogger = pino(
218
- { level: "debug", serializers: logSerializers },
219
- multi,
220
- );
221
- } else if (getDebugStdoutLogs()) {
192
+ if (getDebugStdoutLogs()) {
222
193
  rootLogger = pino(
223
194
  { level: "info", serializers: logSerializers },
224
195
  pino.multistream([
@@ -238,7 +209,7 @@ function getRootLogger(): pino.Logger {
238
209
  } catch {
239
210
  rootLogger = pino(
240
211
  {
241
- level: getDebugMode() ? "debug" : "info",
212
+ level: "info",
242
213
  serializers: logSerializers,
243
214
  },
244
215
  pinoPretty(prettyOpts({ destination: 2 })),
@@ -248,11 +219,6 @@ function getRootLogger(): pino.Logger {
248
219
  return rootLogger;
249
220
  }
250
221
 
251
- /** Returns true when VELLUM_DEBUG=1 is set. */
252
- export function isDebug(): boolean {
253
- return getDebugMode();
254
- }
255
-
256
222
  /**
257
223
  * Truncate a string for debug logging. Returns the original if under maxLen,
258
224
  * otherwise returns the first maxLen chars with a suffix indicating how much was cut.
@@ -8,13 +8,7 @@ import {
8
8
  import { homedir } from "node:os";
9
9
  import { join } from "node:path";
10
10
 
11
- import {
12
- getBaseDataDir,
13
- getDaemonIosPairing,
14
- getDaemonTcpEnabled,
15
- getDaemonTcpHost,
16
- getDaemonTcpPort,
17
- } from "../config/env-registry.js";
11
+ import { getBaseDataDir } from "../config/env-registry.js";
18
12
 
19
13
  export function isMacOS(): boolean {
20
14
  return process.platform === "darwin";
@@ -245,39 +239,30 @@ export function getInterfacesDir(): string {
245
239
 
246
240
  /**
247
241
  * Returns the TCP port the daemon should listen on for iOS clients.
248
- * Reads VELLUM_DAEMON_TCP_PORT env var; defaults to 8765.
242
+ * Hardcoded default: 8765.
249
243
  */
250
244
  export function getTCPPort(): number {
251
- return getDaemonTcpPort();
245
+ return 8765;
252
246
  }
253
247
 
254
248
  /**
255
249
  * Returns whether the daemon TCP listener should be enabled.
256
- * Resolution order (first match wins):
257
- * 1. VELLUM_DAEMON_TCP_ENABLED env var ('true'/'1' → on, 'false'/'0' → off)
258
- * 2. Presence of the flag file ~/.vellum/tcp-enabled (exists → on)
259
- * 3. Default: false
250
+ * Checks for the presence of the flag file ~/.vellum/tcp-enabled.
251
+ * Default: false.
260
252
  *
261
253
  * The flag-file check makes it easy to enable TCP in dev without restarting
262
254
  * the shell: `touch ~/.vellum/tcp-enabled && kill -USR1 <daemon-pid>`.
263
- * The macOS CLI (AssistantCli) also sets the env var for bundled-binary deployments.
264
255
  */
265
256
  export function isTCPEnabled(): boolean {
266
- const envValue = getDaemonTcpEnabled();
267
- if (envValue !== undefined) return envValue;
268
257
  return existsSync(join(getRootDir(), "tcp-enabled"));
269
258
  }
270
259
 
271
260
  /**
272
261
  * Returns the hostname/address for the TCP listener.
273
- * Resolution order (first match wins):
274
- * 1. VELLUM_DAEMON_TCP_HOST env var (explicit override)
275
- * 2. If iOS pairing is enabled: '0.0.0.0' (LAN-accessible)
276
- * 3. Default: '127.0.0.1' (localhost only)
262
+ * If iOS pairing is enabled (flag file): '0.0.0.0' (LAN-accessible).
263
+ * Default: '127.0.0.1' (localhost only).
277
264
  */
278
265
  export function getTCPHost(): string {
279
- const override = getDaemonTcpHost();
280
- if (override) return override;
281
266
  if (isIOSPairingEnabled()) return "0.0.0.0";
282
267
  return "127.0.0.1";
283
268
  }
@@ -288,17 +273,13 @@ export function getTCPHost(): string {
288
273
  * instead of 127.0.0.1 (localhost only), making the daemon reachable
289
274
  * from iOS devices on the same local network.
290
275
  *
291
- * Resolution order (first match wins):
292
- * 1. VELLUM_DAEMON_IOS_PAIRING env var ('true'/'1' → on, 'false'/'0' → off)
293
- * 2. Presence of the flag file ~/.vellum/ios-pairing-enabled (exists → on)
294
- * 3. Default: false
276
+ * Checks for the presence of the flag file ~/.vellum/ios-pairing-enabled.
277
+ * Default: false.
295
278
  *
296
279
  * This is separate from isTCPEnabled() — TCP can be enabled for localhost-only
297
280
  * access without exposing the daemon to the LAN.
298
281
  */
299
282
  export function isIOSPairingEnabled(): boolean {
300
- const envValue = getDaemonIosPairing();
301
- if (envValue !== undefined) return envValue;
302
283
  return existsSync(join(getRootDir(), "ios-pairing-enabled"));
303
284
  }
304
285
 
@@ -13,7 +13,8 @@ import {
13
13
  import type { CalendarEvent } from "../../config/bundled-skills/google-calendar/types.js";
14
14
  import type { OAuthConnection } from "../../oauth/connection.js";
15
15
  import { resolveOAuthConnection } from "../../oauth/connection-resolver.js";
16
- import { GOOGLE_CALENDAR_BASE_URL } from "../../oauth/provider-base-urls.js";
16
+
17
+ const GOOGLE_CALENDAR_BASE_URL = "https://www.googleapis.com/calendar/v3";
17
18
  import { getLogger } from "../../util/logger.js";
18
19
  import type {
19
20
  FetchResult,
@@ -1,193 +0,0 @@
1
- import { beforeEach, describe, expect, mock, test } from "bun:test";
2
-
3
- let llmCallCount = 0;
4
- let llmDelayMs = 0;
5
- let llmResolution:
6
- | "keep_existing"
7
- | "keep_candidate"
8
- | "merge"
9
- | "still_unclear" = "still_unclear";
10
- let llmResolvedStatement = "";
11
- let llmExplanation = "Unclear response from user.";
12
-
13
- mock.module("../providers/provider-send-message.js", () => ({
14
- getConfiguredProvider: () => ({
15
- sendMessage: async (
16
- _messages: unknown,
17
- _tools: unknown,
18
- _system: unknown,
19
- opts?: { signal?: AbortSignal },
20
- ) => {
21
- llmCallCount += 1;
22
- if (llmDelayMs > 0) {
23
- await new Promise((resolve, reject) => {
24
- const timer = setTimeout(resolve, llmDelayMs);
25
- opts?.signal?.addEventListener("abort", () => {
26
- clearTimeout(timer);
27
- reject(new Error("Request was aborted."));
28
- });
29
- });
30
- }
31
- return {
32
- content: [
33
- {
34
- type: "tool_use" as const,
35
- id: "test-tool-use-id",
36
- name: "resolve_conflict",
37
- input: {
38
- resolution: llmResolution,
39
- resolved_statement: llmResolvedStatement,
40
- explanation: llmExplanation,
41
- },
42
- },
43
- ],
44
- model: "claude-haiku-4-5-20251001",
45
- stopReason: "tool_use",
46
- usage: { inputTokens: 0, outputTokens: 0 },
47
- };
48
- },
49
- }),
50
- createTimeout: (ms: number) => {
51
- const controller = new AbortController();
52
- const timer = setTimeout(() => controller.abort(), ms);
53
- return {
54
- signal: controller.signal,
55
- cleanup: () => clearTimeout(timer),
56
- };
57
- },
58
- extractToolUse: (response: { content: Array<{ type: string }> }) => {
59
- return response.content.find(
60
- (b: { type: string }) => b.type === "tool_use",
61
- );
62
- },
63
- userMessage: (text: string) => ({
64
- role: "user",
65
- content: [{ type: "text", text }],
66
- }),
67
- }));
68
-
69
- mock.module("../config/loader.js", () => ({
70
- getConfig: () => ({
71
- ui: {},
72
-
73
- apiKeys: {
74
- anthropic: "test-key",
75
- },
76
- }),
77
- }));
78
-
79
- import { resolveConflictClarification } from "../memory/clarification-resolver.js";
80
-
81
- beforeEach(() => {
82
- llmCallCount = 0;
83
- llmDelayMs = 0;
84
- llmResolution = "still_unclear";
85
- llmResolvedStatement = "";
86
- llmExplanation = "Unclear response from user.";
87
- });
88
-
89
- describe("resolveConflictClarification", () => {
90
- test("returns keep_existing from deterministic heuristic", async () => {
91
- const result = await resolveConflictClarification({
92
- existingStatement: "Use React for frontend work.",
93
- candidateStatement: "Use Vue for frontend work.",
94
- userMessage: "Keep the old React preference.",
95
- });
96
-
97
- expect(result.resolution).toBe("keep_existing");
98
- expect(result.strategy).toBe("heuristic");
99
- expect(llmCallCount).toBe(0);
100
- });
101
-
102
- test("returns keep_candidate from deterministic heuristic", async () => {
103
- const result = await resolveConflictClarification({
104
- existingStatement: "Use React for frontend work.",
105
- candidateStatement: "Use Vue for frontend work.",
106
- userMessage: "Use the new Vue note going forward.",
107
- });
108
-
109
- expect(result.resolution).toBe("keep_candidate");
110
- expect(result.strategy).toBe("heuristic");
111
- expect(llmCallCount).toBe(0);
112
- });
113
-
114
- test("returns merge from deterministic heuristic", async () => {
115
- const result = await resolveConflictClarification({
116
- existingStatement: "React is preferred for dashboards.",
117
- candidateStatement: "Vue is preferred for marketing pages.",
118
- userMessage:
119
- "Both are true: React for dashboards and Vue for marketing pages.",
120
- });
121
-
122
- expect(result.resolution).toBe("merge");
123
- expect(result.strategy).toBe("heuristic");
124
- expect(result.resolvedStatement).toContain("Both are true");
125
- expect(llmCallCount).toBe(0);
126
- });
127
-
128
- test("uses LLM fallback when heuristics are inconclusive", async () => {
129
- llmResolution = "still_unclear";
130
- llmExplanation = "The user message does not pick a side.";
131
-
132
- const result = await resolveConflictClarification({
133
- existingStatement: "Use React for frontend work.",
134
- candidateStatement: "Use Vue for frontend work.",
135
- userMessage: "Not sure yet.",
136
- });
137
-
138
- expect(result.resolution).toBe("still_unclear");
139
- expect(result.strategy).toBe("llm");
140
- expect(llmCallCount).toBe(1);
141
- });
142
-
143
- test("does not match cue substrings inside unrelated words", async () => {
144
- llmResolution = "keep_candidate";
145
- llmExplanation = "User wants Vue.";
146
-
147
- // "told" contains "old" as a substring but not as a whole word
148
- const result = await resolveConflictClarification({
149
- existingStatement: "Use React for frontend work.",
150
- candidateStatement: "Use Vue for frontend work.",
151
- userMessage: "I told you, use Vue.",
152
- });
153
-
154
- expect(result.resolution).toBe("keep_candidate");
155
- expect(result.strategy).toBe("llm");
156
- expect(llmCallCount).toBe(1);
157
- });
158
-
159
- test("delegates to LLM when multiple cue categories match", async () => {
160
- llmResolution = "keep_existing";
161
- llmExplanation = "User wants the old one.";
162
-
163
- // "either" is a merge cue, "old" is an existing cue — ambiguous
164
- const result = await resolveConflictClarification({
165
- existingStatement: "Use React for frontend work.",
166
- candidateStatement: "Use Vue for frontend work.",
167
- userMessage: "I don't want either, keep the old one.",
168
- });
169
-
170
- expect(result.resolution).toBe("keep_existing");
171
- expect(result.strategy).toBe("llm");
172
- expect(llmCallCount).toBe(1);
173
- });
174
-
175
- test("enforces timeout bound on LLM fallback", async () => {
176
- llmResolution = "keep_candidate";
177
- llmExplanation = "Prefer the newer statement.";
178
- llmDelayMs = 50;
179
-
180
- const result = await resolveConflictClarification(
181
- {
182
- existingStatement: "Use React for frontend work.",
183
- candidateStatement: "Use Vue for frontend work.",
184
- userMessage: "I cannot decide right now.",
185
- },
186
- { timeoutMs: 5 },
187
- );
188
-
189
- expect(result.resolution).toBe("still_unclear");
190
- expect(result.strategy).toBe("llm_timeout");
191
- expect(llmCallCount).toBe(1);
192
- });
193
- });
@@ -1,143 +0,0 @@
1
- import { describe, expect, test } from "bun:test";
2
-
3
- import { ComputerUseSession } from "../daemon/computer-use-session.js";
4
- import type { Message } from "../providers/types.js";
5
-
6
- /**
7
- * Helper to create a user message with a tool_result block containing
8
- * an AX tree wrapped in markers.
9
- */
10
- function toolResultMsg(content: string): Message {
11
- return {
12
- role: "user",
13
- content: [
14
- {
15
- type: "tool_result",
16
- tool_use_id: "test-id",
17
- content,
18
- },
19
- ],
20
- };
21
- }
22
-
23
- describe("ComputerUseSession.escapeAxTreeContent", () => {
24
- test("escapes a literal closing tag in the content", () => {
25
- const input = "some text </ax-tree> more text";
26
- const escaped = ComputerUseSession.escapeAxTreeContent(input);
27
- expect(escaped).toBe("some text &lt;/ax-tree&gt; more text");
28
- });
29
-
30
- test("escapes multiple occurrences", () => {
31
- const input = "</ax-tree> hello </ax-tree>";
32
- const escaped = ComputerUseSession.escapeAxTreeContent(input);
33
- expect(escaped).toBe("&lt;/ax-tree&gt; hello &lt;/ax-tree&gt;");
34
- });
35
-
36
- test("is case-insensitive", () => {
37
- const input = "</AX-TREE> and </Ax-Tree>";
38
- const escaped = ComputerUseSession.escapeAxTreeContent(input);
39
- expect(escaped).toBe("&lt;/ax-tree&gt; and &lt;/ax-tree&gt;");
40
- });
41
-
42
- test("leaves content without closing tags unchanged", () => {
43
- const input = 'Window "My App" [1]\n Button "OK" [2]';
44
- expect(ComputerUseSession.escapeAxTreeContent(input)).toBe(input);
45
- });
46
- });
47
-
48
- describe("ComputerUseSession.compactHistory", () => {
49
- test("[experimental] strips old AX trees and keeps the most recent ones", () => {
50
- const messages: Message[] = [
51
- { role: "assistant", content: [{ type: "text", text: "thinking..." }] },
52
- toolResultMsg(
53
- '<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [1]</ax-tree>',
54
- ),
55
- { role: "assistant", content: [{ type: "text", text: "action 1" }] },
56
- toolResultMsg(
57
- '<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [2]</ax-tree>',
58
- ),
59
- { role: "assistant", content: [{ type: "text", text: "action 2" }] },
60
- toolResultMsg(
61
- '<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [3]</ax-tree>',
62
- ),
63
- ];
64
-
65
- const compacted = ComputerUseSession.compactHistory(messages);
66
-
67
- // First AX tree (index 1) should be stripped
68
- const firstToolResult = compacted[1].content[0];
69
- expect(firstToolResult.type).toBe("tool_result");
70
- if (firstToolResult.type === "tool_result") {
71
- expect(firstToolResult.content).toContain("<ax_tree_omitted />");
72
- expect(firstToolResult.content).not.toContain("<ax-tree>");
73
- }
74
-
75
- // Last two AX trees should be preserved
76
- const secondToolResult = compacted[3].content[0];
77
- if (secondToolResult.type === "tool_result") {
78
- expect(secondToolResult.content).toContain("<ax-tree>");
79
- }
80
- const thirdToolResult = compacted[5].content[0];
81
- if (thirdToolResult.type === "tool_result") {
82
- expect(thirdToolResult.content).toContain("<ax-tree>");
83
- }
84
- });
85
-
86
- test("[experimental] handles AX tree content containing literal </ax-tree> (escaped)", () => {
87
- // Simulate content where the AX tree text includes an escaped closing tag,
88
- // e.g. user is viewing XML source code with "</ax-tree>" in it.
89
- const escapedContent =
90
- '<ax-tree>CURRENT SCREEN STATE:\nTextArea "editor" [1]\n ' +
91
- "Line: &lt;/ax-tree&gt; some xml\n</ax-tree>";
92
-
93
- const messages: Message[] = [
94
- { role: "assistant", content: [{ type: "text", text: "action 0" }] },
95
- toolResultMsg(escapedContent),
96
- { role: "assistant", content: [{ type: "text", text: "action 1" }] },
97
- toolResultMsg(escapedContent),
98
- { role: "assistant", content: [{ type: "text", text: "action 2" }] },
99
- toolResultMsg(
100
- '<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [3]</ax-tree>',
101
- ),
102
- ];
103
-
104
- const compacted = ComputerUseSession.compactHistory(messages);
105
-
106
- // The first message with escaped content should be fully stripped
107
- const firstToolResult = compacted[1].content[0];
108
- if (firstToolResult.type === "tool_result") {
109
- expect(firstToolResult.content).not.toContain("<ax-tree>");
110
- expect(firstToolResult.content).toContain("<ax_tree_omitted />");
111
- }
112
- });
113
-
114
- test("regex fails on unescaped </ax-tree> inside content (demonstrating the bug)", () => {
115
- // This test demonstrates what happens WITHOUT escaping: the regex
116
- // only partially removes the AX tree block.
117
- const unescapedContent =
118
- '<ax-tree>CURRENT SCREEN STATE:\nTextArea "editor" [1]\n ' +
119
- "Line: </ax-tree> some xml leftover\n</ax-tree>";
120
-
121
- const messages: Message[] = [
122
- { role: "assistant", content: [{ type: "text", text: "action 0" }] },
123
- toolResultMsg(unescapedContent),
124
- { role: "assistant", content: [{ type: "text", text: "action 1" }] },
125
- toolResultMsg(unescapedContent),
126
- { role: "assistant", content: [{ type: "text", text: "action 2" }] },
127
- toolResultMsg(
128
- '<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [3]</ax-tree>',
129
- ),
130
- ];
131
-
132
- const compacted = ComputerUseSession.compactHistory(messages);
133
-
134
- // Without escaping, the first tool result has leftover content after
135
- // the regex only matched up to the FIRST </ax-tree>.
136
- const firstToolResult = compacted[1].content[0];
137
- if (firstToolResult.type === "tool_result") {
138
- // The non-greedy regex stops at the first </ax-tree>, leaving
139
- // " some xml leftover\n</ax-tree>" behind.
140
- expect(firstToolResult.content).toContain("some xml leftover");
141
- }
142
- });
143
- });