@vellumai/assistant 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (347) hide show
  1. package/ARCHITECTURE.md +16 -1
  2. package/docs/architecture/memory.md +5 -2
  3. package/node_modules/@vellumai/gateway-client/src/ipc-client.ts +13 -4
  4. package/node_modules/@vellumai/skill-host-contracts/src/assistant-event.ts +0 -9
  5. package/node_modules/@vellumai/slack-text/src/index.test.ts +18 -35
  6. package/node_modules/@vellumai/slack-text/src/index.ts +2 -48
  7. package/openapi.yaml +449 -22
  8. package/package.json +1 -1
  9. package/src/__tests__/app-control-flow.test.ts +21 -11
  10. package/src/__tests__/assistant-event-hub.test.ts +48 -0
  11. package/src/__tests__/assistant-event.test.ts +0 -10
  12. package/src/__tests__/assistant-events-sse-hardening.test.ts +2 -7
  13. package/src/__tests__/assistant-feature-flags-integration.test.ts +18 -0
  14. package/src/__tests__/auto-analysis-end-to-end.test.ts +62 -1
  15. package/src/__tests__/background-workers-disk-pressure.test.ts +268 -0
  16. package/src/__tests__/call-conversation-messages.test.ts +8 -2
  17. package/src/__tests__/channel-inbound-disk-pressure.test.ts +537 -0
  18. package/src/__tests__/channel-readiness-service.test.ts +4 -2
  19. package/src/__tests__/config-loader-backfill.test.ts +379 -0
  20. package/src/__tests__/config-schema.test.ts +1 -0
  21. package/src/__tests__/config-watcher-cleanup-throttle.test.ts +18 -9
  22. package/src/__tests__/config-watcher.test.ts +140 -69
  23. package/src/__tests__/context-search-agent-runner.test.ts +61 -3
  24. package/src/__tests__/context-search-conversations-source.test.ts +0 -24
  25. package/src/__tests__/context-search-fanout.test.ts +0 -1
  26. package/src/__tests__/context-search-memory-source.test.ts +3 -7
  27. package/src/__tests__/context-search-memory-v2-source.test.ts +0 -2
  28. package/src/__tests__/context-search-pkb-source.test.ts +0 -1
  29. package/src/__tests__/context-search-workspace-source.test.ts +0 -1
  30. package/src/__tests__/conversation-abort-tool-results.test.ts +6 -0
  31. package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +223 -0
  32. package/src/__tests__/conversation-agent-loop.test.ts +454 -5
  33. package/src/__tests__/conversation-error.test.ts +150 -3
  34. package/src/__tests__/conversation-process-callsite.test.ts +43 -0
  35. package/src/__tests__/conversation-provider-retry-repair.test.ts +6 -0
  36. package/src/__tests__/conversation-runtime-assembly.test.ts +65 -0
  37. package/src/__tests__/conversation-slash-unknown.test.ts +6 -0
  38. package/src/__tests__/conversation-speed-override.test.ts +0 -3
  39. package/src/__tests__/conversation-store.test.ts +0 -18
  40. package/src/__tests__/conversation-surfaces-app-control.test.ts +15 -4
  41. package/src/__tests__/conversation-surfaces-data-persist.test.ts +404 -0
  42. package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +2 -5
  43. package/src/__tests__/conversation-workspace-injection.test.ts +6 -0
  44. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +6 -0
  45. package/src/__tests__/credentials-cli.test.ts +7 -0
  46. package/src/__tests__/cu-unified-flow.test.ts +176 -10
  47. package/src/__tests__/date-context.test.ts +164 -2
  48. package/src/__tests__/disk-pressure-guard.test.ts +262 -0
  49. package/src/__tests__/disk-pressure-lifecycle.test.ts +168 -0
  50. package/src/__tests__/disk-pressure-policy.test.ts +241 -0
  51. package/src/__tests__/disk-pressure-routes.test.ts +379 -0
  52. package/src/__tests__/disk-pressure-tools.test.ts +277 -0
  53. package/src/__tests__/disk-usage.test.ts +150 -0
  54. package/src/__tests__/events-client-registration.test.ts +52 -0
  55. package/src/__tests__/events-dev-bypass-actor.test.ts +162 -0
  56. package/src/__tests__/file-write-tool.test.ts +4 -10
  57. package/src/__tests__/filing-service.test.ts +3 -4
  58. package/src/__tests__/heartbeat-disk-pressure.test.ts +183 -0
  59. package/src/__tests__/heartbeat-service.test.ts +260 -11
  60. package/src/__tests__/host-app-control-proxy.test.ts +195 -25
  61. package/src/__tests__/host-bash-proxy.test.ts +227 -34
  62. package/src/__tests__/host-bash-routes.test.ts +178 -13
  63. package/src/__tests__/host-cu-proxy.test.ts +210 -3
  64. package/src/__tests__/host-cu-routes-targeted.test.ts +141 -12
  65. package/src/__tests__/host-file-proxy-targeted.test.ts +48 -9
  66. package/src/__tests__/host-file-proxy.test.ts +268 -6
  67. package/src/__tests__/host-file-routes-targeted.test.ts +175 -17
  68. package/src/__tests__/host-transfer-proxy-targeted.test.ts +408 -59
  69. package/src/__tests__/host-transfer-routes-targeted.test.ts +232 -17
  70. package/src/__tests__/http-user-message-parity.test.ts +107 -1
  71. package/src/__tests__/injector-chain.test.ts +18 -6
  72. package/src/__tests__/injector-disk-pressure.test.ts +224 -0
  73. package/src/__tests__/managed-profile-guard.test.ts +18 -0
  74. package/src/__tests__/mcp-abort-signal.test.ts +130 -0
  75. package/src/__tests__/memory-admin-recall.test.ts +3 -11
  76. package/src/__tests__/memory-retrieval-pipeline.test.ts +22 -1
  77. package/src/__tests__/normalize-onboarding.test.ts +180 -0
  78. package/src/__tests__/oauth-connect-routes.test.ts +316 -0
  79. package/src/__tests__/oauth-provider-seed-logos.test.ts +24 -2
  80. package/src/__tests__/onboarding-persona-write.test.ts +308 -0
  81. package/src/__tests__/openai-provider.test.ts +45 -8
  82. package/src/__tests__/persist-onboarding-artifacts.test.ts +44 -64
  83. package/src/__tests__/platform-callback-registration.test.ts +21 -4
  84. package/src/__tests__/platform.test.ts +2 -1
  85. package/src/__tests__/playbook-execution.test.ts +0 -43
  86. package/src/__tests__/plugin-tool-contribution.test.ts +47 -0
  87. package/src/__tests__/prechat-onboarding-contract.test.ts +214 -27
  88. package/src/__tests__/provider-tool-name.test.ts +23 -0
  89. package/src/__tests__/relay-server.test.ts +15 -4
  90. package/src/__tests__/runtime-events-sse.test.ts +4 -8
  91. package/src/__tests__/scheduler-disk-pressure.test.ts +148 -0
  92. package/src/__tests__/secret-ingress-http.test.ts +0 -1
  93. package/src/__tests__/suggestion-routes.test.ts +46 -0
  94. package/src/__tests__/twilio-validation.test.ts +2 -2
  95. package/src/__tests__/workspace-migration-065-bump-stale-heartbeat-interval.test.ts +122 -0
  96. package/src/__tests__/workspace-migration-066-seed-heartbeat-callsite-cost-default.test.ts +285 -0
  97. package/src/__tests__/workspace-migration-068-release-notes-local-timezone.test.ts +90 -0
  98. package/src/__tests__/workspace-migration-safe-storage-limits-release.test.ts +90 -0
  99. package/src/approvals/guardian-decision-primitive.ts +13 -0
  100. package/src/approvals/guardian-request-resolvers.ts +16 -17
  101. package/src/backup/snapshot-lock.ts +2 -27
  102. package/src/bundler/compiler-tools.ts +3 -2
  103. package/src/calls/call-conversation-messages.ts +46 -10
  104. package/src/cli/commands/__tests__/webhooks.test.ts +0 -4
  105. package/src/cli/commands/bash.ts +35 -108
  106. package/src/cli/commands/contacts.ts +64 -25
  107. package/src/cli/commands/credentials.ts +56 -0
  108. package/src/cli/commands/memory-v2.ts +7 -6
  109. package/src/cli/commands/oauth/__tests__/connect.test.ts +437 -1
  110. package/src/cli/commands/oauth/connect.ts +127 -1
  111. package/src/cli/commands/platform/__tests__/callback-routes-list.test.ts +0 -3
  112. package/src/cli/commands/platform/__tests__/connect.test.ts +7 -1
  113. package/src/cli/commands/platform/__tests__/disconnect.test.ts +7 -1
  114. package/src/cli/commands/platform/__tests__/status.test.ts +103 -6
  115. package/src/cli/commands/platform/index.ts +16 -7
  116. package/src/cli/commands/status.ts +57 -0
  117. package/src/cli/program.ts +4 -2
  118. package/src/config/assistant-feature-flags.ts +13 -3
  119. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +4 -3
  120. package/src/config/bundled-skills/phone-calls/references/TROUBLESHOOTING.md +13 -7
  121. package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +2 -2
  122. package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +2 -2
  123. package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +2 -2
  124. package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +2 -2
  125. package/src/config/env.ts +0 -8
  126. package/src/config/feature-flag-registry.json +27 -3
  127. package/src/config/loader.ts +127 -8
  128. package/src/config/schemas/__tests__/memory-v2.test.ts +10 -5
  129. package/src/config/schemas/call-site-catalog.ts +14 -0
  130. package/src/config/schemas/channels.ts +0 -5
  131. package/src/config/schemas/heartbeat.ts +1 -1
  132. package/src/config/schemas/llm.ts +2 -0
  133. package/src/config/schemas/memory-lifecycle.ts +13 -0
  134. package/src/config/schemas/memory-v2.ts +75 -11
  135. package/src/config/schemas/platform.ts +43 -3
  136. package/src/config/schemas/services.ts +28 -0
  137. package/src/config/seed-inference-profiles.ts +230 -33
  138. package/src/contacts/contact-store.ts +0 -25
  139. package/src/daemon/__tests__/conversation-tool-setup.test.ts +86 -25
  140. package/src/daemon/assistant-attachments.ts +4 -4
  141. package/src/daemon/config-watcher.ts +85 -57
  142. package/src/daemon/conversation-agent-loop-handlers.ts +6 -0
  143. package/src/daemon/conversation-agent-loop.ts +170 -33
  144. package/src/daemon/conversation-error.ts +87 -15
  145. package/src/daemon/conversation-lifecycle.ts +1 -3
  146. package/src/daemon/conversation-process.ts +8 -0
  147. package/src/daemon/conversation-runtime-assembly.ts +26 -0
  148. package/src/daemon/conversation-store.ts +2 -2
  149. package/src/daemon/conversation-surfaces.ts +195 -15
  150. package/src/daemon/conversation-tool-setup.ts +57 -14
  151. package/src/daemon/conversation.ts +17 -22
  152. package/src/daemon/date-context.ts +71 -22
  153. package/src/daemon/disk-pressure-background-gate.ts +73 -0
  154. package/src/daemon/disk-pressure-guard.ts +343 -0
  155. package/src/daemon/disk-pressure-policy.ts +163 -0
  156. package/src/daemon/handlers/shared.ts +0 -1
  157. package/src/daemon/handlers/skills.ts +3 -4
  158. package/src/daemon/host-app-control-proxy.ts +137 -41
  159. package/src/daemon/host-bash-proxy.ts +46 -21
  160. package/src/daemon/host-cu-proxy.ts +49 -3
  161. package/src/daemon/host-file-proxy.ts +43 -7
  162. package/src/daemon/host-transfer-proxy.ts +95 -4
  163. package/src/daemon/lifecycle.ts +79 -28
  164. package/src/daemon/meet-host-supervisor.ts +4 -4
  165. package/src/daemon/meet-manifest-loader.ts +0 -1
  166. package/src/daemon/memory-v2-startup.ts +14 -4
  167. package/src/daemon/message-protocol.ts +3 -0
  168. package/src/daemon/message-types/conversations.ts +4 -0
  169. package/src/daemon/message-types/disk-pressure.ts +9 -0
  170. package/src/daemon/message-types/messages.ts +3 -0
  171. package/src/daemon/profiler-run-store.ts +5 -5
  172. package/src/daemon/tool-setup-types.ts +2 -2
  173. package/src/documents/document-store.ts +85 -0
  174. package/src/filing/filing-service.ts +30 -5
  175. package/src/heartbeat/__tests__/heartbeat-feed-event.test.ts +9 -16
  176. package/src/heartbeat/__tests__/heartbeat-run-store.test.ts +36 -0
  177. package/src/heartbeat/heartbeat-run-store.ts +13 -0
  178. package/src/heartbeat/heartbeat-service.ts +205 -31
  179. package/src/home/feed-scheduler.ts +18 -0
  180. package/src/inbound/platform-callback-registration.ts +8 -15
  181. package/src/ipc/__tests__/clients-list-ipc.test.ts +169 -0
  182. package/src/ipc/assistant-server.ts +56 -2
  183. package/src/ipc/gateway-client.ts +37 -3
  184. package/src/live-voice/live-voice-archive.ts +4 -4
  185. package/src/live-voice/protocol.ts +5 -7
  186. package/src/media/image-service.ts +1 -7
  187. package/src/memory/__tests__/fixtures/memory-v2-activation-fixtures.ts +21 -13
  188. package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +52 -22
  189. package/src/memory/__tests__/memory-v2-activation-log-store.test.ts +0 -6
  190. package/src/memory/__tests__/memory-v2-concept-frequency.test.ts +272 -0
  191. package/src/memory/admin.ts +5 -9
  192. package/src/memory/context-search/agent-runner.ts +19 -2
  193. package/src/memory/context-search/sources/conversations.ts +2 -11
  194. package/src/memory/context-search/sources/memory-v2.ts +5 -4
  195. package/src/memory/context-search/sources/memory.ts +0 -1
  196. package/src/memory/context-search/types.ts +0 -1
  197. package/src/memory/conversation-crud.ts +4 -12
  198. package/src/memory/db-init.ts +2 -0
  199. package/src/memory/embedding-runtime-manager.ts +119 -5
  200. package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +32 -21
  201. package/src/memory/graph/conversation-graph-memory.ts +42 -54
  202. package/src/memory/graph/extraction.ts +1 -3
  203. package/src/memory/graph/graph-search.test.ts +10 -67
  204. package/src/memory/graph/graph-search.ts +1 -20
  205. package/src/memory/graph/retriever.test.ts +6 -0
  206. package/src/memory/graph/retriever.ts +6 -10
  207. package/src/memory/indexer.ts +54 -45
  208. package/src/memory/job-handlers/backfill.ts +2 -11
  209. package/src/memory/job-handlers/cleanup.ts +43 -0
  210. package/src/memory/job-handlers/embedding.ts +6 -8
  211. package/src/memory/job-handlers/summarization.ts +2 -7
  212. package/src/memory/jobs-store.ts +48 -0
  213. package/src/memory/jobs-worker.ts +81 -43
  214. package/src/memory/memory-v2-activation-log-store.ts +32 -14
  215. package/src/memory/memory-v2-concept-frequency.ts +169 -0
  216. package/src/memory/migrations/239-trace-events-created-at-index.ts +18 -0
  217. package/src/memory/migrations/index.ts +1 -0
  218. package/src/memory/pkb/pkb-search.test.ts +6 -0
  219. package/src/memory/qdrant-client.ts +0 -13
  220. package/src/memory/rerank-local.ts +374 -0
  221. package/src/memory/search/semantic.ts +6 -67
  222. package/src/memory/trace-event-store.ts +1 -17
  223. package/src/memory/v2/__tests__/activation.test.ts +311 -250
  224. package/src/memory/v2/__tests__/consolidation-job.test.ts +40 -8
  225. package/src/memory/v2/__tests__/injection.test.ts +157 -167
  226. package/src/memory/v2/__tests__/prompts-consolidation.test.ts +61 -2
  227. package/src/memory/v2/__tests__/qdrant.test.ts +16 -0
  228. package/src/memory/v2/__tests__/reranker.test.ts +338 -0
  229. package/src/memory/v2/__tests__/sim.test.ts +5 -199
  230. package/src/memory/v2/__tests__/skill-store.test.ts +71 -65
  231. package/src/memory/v2/__tests__/static-context.test.ts +76 -1
  232. package/src/memory/v2/activation.ts +149 -156
  233. package/src/memory/v2/consolidation-job.ts +62 -12
  234. package/src/memory/v2/injection.ts +47 -60
  235. package/src/memory/v2/prompts/consolidation.ts +36 -1
  236. package/src/memory/v2/qdrant.ts +99 -0
  237. package/src/memory/v2/reranker.ts +177 -0
  238. package/src/memory/v2/sim.ts +10 -84
  239. package/src/memory/v2/skill-content.ts +4 -3
  240. package/src/memory/v2/skill-store.ts +82 -59
  241. package/src/memory/v2/static-context.ts +22 -0
  242. package/src/memory/v2/types.ts +10 -10
  243. package/src/notifications/copy-composer.ts +13 -0
  244. package/src/notifications/signal.ts +4 -0
  245. package/src/oauth/AGENTS.md +3 -1
  246. package/src/oauth/__tests__/oauth-connect-state.test.ts +137 -0
  247. package/src/oauth/connect-orchestrator.ts +2 -0
  248. package/src/oauth/connection-resolver.test.ts +66 -1
  249. package/src/oauth/connection-resolver.ts +55 -1
  250. package/src/oauth/oauth-connect-state.ts +77 -0
  251. package/src/oauth/seed-providers.ts +58 -1
  252. package/src/plugins/defaults/injectors.ts +35 -2
  253. package/src/plugins/defaults/memory-retrieval.ts +5 -6
  254. package/src/plugins/types.ts +7 -0
  255. package/src/proactive-artifact/aux-message-injector.ts +74 -0
  256. package/src/proactive-artifact/decision.test.ts +226 -0
  257. package/src/proactive-artifact/decision.ts +165 -0
  258. package/src/proactive-artifact/index.ts +7 -0
  259. package/src/proactive-artifact/job.test.ts +867 -0
  260. package/src/proactive-artifact/job.ts +352 -0
  261. package/src/proactive-artifact/message-copy.ts +41 -0
  262. package/src/proactive-artifact/trigger-state.test.ts +277 -0
  263. package/src/proactive-artifact/trigger-state.ts +119 -0
  264. package/src/prompts/normalize-onboarding.ts +80 -0
  265. package/src/prompts/persona-resolver.ts +101 -9
  266. package/src/prompts/system-prompt.ts +21 -7
  267. package/src/prompts/templates/BOOTSTRAP.md +13 -5
  268. package/src/providers/__tests__/retry-callsite.test.ts +222 -1
  269. package/src/providers/model-intents.ts +7 -0
  270. package/src/providers/openrouter/client.ts +8 -0
  271. package/src/providers/retry.ts +50 -0
  272. package/src/providers/types.ts +1 -0
  273. package/src/runtime/__tests__/agent-wake.test.ts +456 -3
  274. package/src/runtime/agent-wake.ts +238 -100
  275. package/src/runtime/assistant-event-hub.ts +36 -6
  276. package/src/runtime/assistant-event.ts +0 -1
  277. package/src/runtime/auth/__tests__/route-policy.test.ts +64 -0
  278. package/src/runtime/auth/route-policy.ts +14 -1
  279. package/src/runtime/auth/same-actor.ts +216 -0
  280. package/src/runtime/channel-retry-sweep.ts +65 -1
  281. package/src/runtime/guardian-reply-router.ts +10 -0
  282. package/src/runtime/local-actor-identity.ts +52 -11
  283. package/src/runtime/pending-interactions.ts +8 -0
  284. package/src/runtime/routes/__tests__/client-routes.test.ts +155 -0
  285. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +0 -5
  286. package/src/runtime/routes/__tests__/heartbeat-routes.test.ts +1 -1
  287. package/src/runtime/routes/client-routes.ts +20 -2
  288. package/src/runtime/routes/contact-routes.ts +0 -25
  289. package/src/runtime/routes/conversation-routes.ts +35 -26
  290. package/src/runtime/routes/debug-bash-routes.ts +163 -0
  291. package/src/runtime/routes/disk-pressure-routes.ts +121 -0
  292. package/src/runtime/routes/document-pdf-renderer.ts +6 -2
  293. package/src/runtime/routes/documents-routes.ts +2 -75
  294. package/src/runtime/routes/events-routes.ts +41 -9
  295. package/src/runtime/routes/host-bash-routes.ts +23 -3
  296. package/src/runtime/routes/host-cu-routes.ts +33 -6
  297. package/src/runtime/routes/host-file-routes.ts +32 -6
  298. package/src/runtime/routes/host-transfer-routes.ts +79 -16
  299. package/src/runtime/routes/identity-routes.ts +7 -138
  300. package/src/runtime/routes/inbound-message-handler.ts +77 -12
  301. package/src/runtime/routes/inbound-stages/guardian-reply-intercept.ts +3 -0
  302. package/src/runtime/routes/index.ts +6 -0
  303. package/src/runtime/routes/memory-item-routes.test.ts +41 -15
  304. package/src/runtime/routes/memory-v2-routes.ts +33 -0
  305. package/src/runtime/routes/oauth-connect-routes.ts +153 -0
  306. package/src/runtime/verification-outbound-actions.ts +4 -4
  307. package/src/schedule/run-script.ts +37 -5
  308. package/src/schedule/scheduler.ts +20 -1
  309. package/src/security/encrypted-store.ts +2 -0
  310. package/src/security/secure-keys.ts +55 -0
  311. package/src/skills/remote-skill-policy.ts +4 -10
  312. package/src/subagent/index.ts +1 -7
  313. package/src/subagent/manager.ts +1 -15
  314. package/src/tasks/task-runner.ts +0 -1
  315. package/src/tasks/task-store.ts +0 -3
  316. package/src/tools/background-tool-registry.ts +17 -3
  317. package/src/tools/host-filesystem/edit.test.ts +151 -0
  318. package/src/tools/host-filesystem/edit.ts +43 -1
  319. package/src/tools/host-filesystem/read.test.ts +129 -0
  320. package/src/tools/host-filesystem/read.ts +43 -1
  321. package/src/tools/host-filesystem/transfer.test.ts +127 -2
  322. package/src/tools/host-filesystem/transfer.ts +56 -11
  323. package/src/tools/host-filesystem/write.test.ts +134 -0
  324. package/src/tools/host-filesystem/write.ts +43 -1
  325. package/src/tools/host-terminal/host-shell.ts +13 -6
  326. package/src/tools/mcp/mcp-tool-factory.ts +2 -1
  327. package/src/tools/memory/register.test.ts +12 -9
  328. package/src/tools/memory/register.ts +1 -2
  329. package/src/tools/provider-tool-name.ts +28 -0
  330. package/src/tools/registry.ts +30 -9
  331. package/src/tools/terminal/shell.ts +9 -1
  332. package/src/tools/tool-approval-handler.ts +31 -6
  333. package/src/tools/types.ts +24 -2
  334. package/src/tts/provider-catalog.ts +3 -5
  335. package/src/util/disk-usage.ts +138 -0
  336. package/src/util/platform.ts +21 -11
  337. package/src/util/process-liveness.ts +26 -0
  338. package/src/workspace/heartbeat-service.ts +19 -0
  339. package/src/workspace/migrations/065-bump-stale-heartbeat-interval.ts +60 -0
  340. package/src/workspace/migrations/066-seed-heartbeat-callsite-cost-default.ts +146 -0
  341. package/src/workspace/migrations/067-release-notes-safe-storage-limits.ts +72 -0
  342. package/src/workspace/migrations/068-release-notes-local-timezone.ts +65 -0
  343. package/src/workspace/migrations/registry.ts +8 -0
  344. package/src/__tests__/conversation-tool-setup-memory-scope.test.ts +0 -167
  345. package/src/memory/v2/__tests__/skill-qdrant.test.ts +0 -657
  346. package/src/memory/v2/skill-qdrant.ts +0 -404
  347. package/src/signals/bash.ts +0 -198
@@ -0,0 +1,119 @@
1
+ import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
2
+ import { dirname, join } from "node:path";
3
+
4
+ import { rawGet } from "../memory/raw-query.js";
5
+ import { getLogger } from "../util/logger.js";
6
+ import { getDataDir } from "../util/platform.js";
7
+
8
+ const log = getLogger("proactive-artifact-trigger");
9
+
10
+ const TRIGGER_MIN = 4;
11
+ const TRIGGER_MAX = 10;
12
+
13
+ function guardPath(): string {
14
+ return join(getDataDir(), ".proactive-artifact-completed");
15
+ }
16
+
17
+ /**
18
+ * Count user messages in standard conversations with created_at <= beforeOrAt.
19
+ * LIMIT caps scan cost since we only care about thresholds up to TRIGGER_MAX.
20
+ */
21
+ export function getUserMessageCountUpTo(beforeOrAt: number): number {
22
+ const row = rawGet<{ c: number }>(
23
+ `SELECT COUNT(*) AS c FROM (
24
+ SELECT 1 FROM messages m
25
+ JOIN conversations c ON m.conversation_id = c.id
26
+ WHERE m.role = 'user'
27
+ AND c.conversation_type = 'standard'
28
+ AND m.created_at <= ?
29
+ LIMIT ${TRIGGER_MAX + 1}
30
+ ) sub`,
31
+ beforeOrAt,
32
+ );
33
+ return row?.c ?? 0;
34
+ }
35
+
36
+ /**
37
+ * Fast-path check to avoid the COUNT query on every turn.
38
+ * Returns true if the proactive artifact trigger has already fired.
39
+ */
40
+ export function hasProactiveArtifactCompleted(): boolean {
41
+ return existsSync(guardPath());
42
+ }
43
+
44
+ /**
45
+ * Atomic check-and-claim with count-first ordering.
46
+ *
47
+ * Trigger window: messages TRIGGER_MIN–TRIGGER_MAX (4–10). Returns true if
48
+ * count is in-window and exclusive file create succeeded. The guard acts as
49
+ * an in-flight lock — the job releases it on decision-skip so the next turn
50
+ * can retry. Past the window, the guard is written permanently.
51
+ */
52
+ export function tryClaimProactiveArtifactTrigger(
53
+ userMessageCreatedAt: number,
54
+ ): boolean {
55
+ const count = getUserMessageCountUpTo(userMessageCreatedAt);
56
+
57
+ if (count < TRIGGER_MIN) {
58
+ return false;
59
+ }
60
+
61
+ if (count > TRIGGER_MAX) {
62
+ try {
63
+ mkdirSync(dirname(guardPath()), { recursive: true });
64
+ writeFileSync(guardPath(), new Date().toISOString(), { flag: "wx" });
65
+ } catch {
66
+ // Already written or fs error — either way, window is closed
67
+ }
68
+ return false;
69
+ }
70
+
71
+ // count in [TRIGGER_MIN, TRIGGER_MAX] — attempt exclusive guard write
72
+ try {
73
+ mkdirSync(dirname(guardPath()), { recursive: true });
74
+ writeFileSync(guardPath(), new Date().toISOString(), { flag: "wx" });
75
+ return true;
76
+ } catch (err: unknown) {
77
+ if (err instanceof Error && "code" in err && err.code === "EEXIST") {
78
+ return false;
79
+ }
80
+ log.warn({ err }, "Failed to write proactive artifact guard file");
81
+ return false;
82
+ }
83
+ }
84
+
85
+ /**
86
+ * Release the in-flight claim so the next turn can retry.
87
+ * Called when the decision phase skips (no build committed).
88
+ */
89
+ export function releaseProactiveArtifactClaim(): void {
90
+ try {
91
+ rmSync(guardPath(), { force: true });
92
+ } catch {
93
+ // Best-effort — if removal fails, the next turn just won't retry
94
+ }
95
+ }
96
+
97
+ /**
98
+ * Called at daemon startup. If the guard file does not exist and the user
99
+ * already has messages past the trigger window, write the guard. This
100
+ * handles existing users who had many messages before the feature existed.
101
+ */
102
+ export function backfillGuardIfNeeded(): void {
103
+ if (hasProactiveArtifactCompleted()) {
104
+ return;
105
+ }
106
+
107
+ const count = getUserMessageCountUpTo(Date.now());
108
+ if (count > TRIGGER_MAX) {
109
+ try {
110
+ mkdirSync(dirname(guardPath()), { recursive: true });
111
+ writeFileSync(guardPath(), new Date().toISOString(), { flag: "wx" });
112
+ } catch (err: unknown) {
113
+ if (err instanceof Error && "code" in err && err.code === "EEXIST") {
114
+ return;
115
+ }
116
+ log.warn({ err }, "Failed to backfill proactive artifact guard file");
117
+ }
118
+ }
119
+ }
@@ -0,0 +1,80 @@
1
+ import type { OnboardingContext } from "../types/onboarding-context.js";
2
+
3
+ /**
4
+ * Map of known tool IDs (from the client onboarding UI) to display labels.
5
+ * Unknown IDs pass through with first-letter capitalization via `normalizeTools`.
6
+ */
7
+ export const TOOL_DISPLAY_NAMES: Record<string, string> = {
8
+ gmail: "Gmail",
9
+ outlook: "Outlook",
10
+ "google-calendar": "Google Calendar",
11
+ slack: "Slack",
12
+ notion: "Notion",
13
+ linear: "Linear",
14
+ jira: "Jira",
15
+ github: "GitHub",
16
+ figma: "Figma",
17
+ "google-drive": "Google Drive",
18
+ excel: "Excel",
19
+ "apple-notes": "Apple Notes",
20
+ };
21
+
22
+ /**
23
+ * Map of known task IDs to plain-language labels describing what the assistant
24
+ * does for each task category.
25
+ */
26
+ export const TASK_DISPLAY_LABELS: Record<string, string> = {
27
+ "code-building": "builds code, apps, or tools",
28
+ writing: "writes docs, emails, or content",
29
+ research: "does research and analysis",
30
+ "project-management": "plans and coordinates work",
31
+ scheduling: "handles meetings, calendar, and logistics",
32
+ personal: "handles life admin",
33
+ };
34
+
35
+ /**
36
+ * Capitalize the first letter of a string (fallback for unknown IDs).
37
+ */
38
+ function capitalizeFirst(s: string): string {
39
+ if (!s) return s;
40
+ return s.charAt(0).toUpperCase() + s.slice(1);
41
+ }
42
+
43
+ /**
44
+ * Maps each tool ID through `TOOL_DISPLAY_NAMES`, falling back to the raw
45
+ * string for unknown IDs.
46
+ */
47
+ export function normalizeTools(tools: string[]): string[] {
48
+ return tools.map((id) => TOOL_DISPLAY_NAMES[id] ?? capitalizeFirst(id));
49
+ }
50
+
51
+ /**
52
+ * Maps each task ID through `TASK_DISPLAY_LABELS`, falling back to the raw
53
+ * string for unknown IDs.
54
+ */
55
+ export function normalizeTasks(tasks: string[]): string[] {
56
+ return tasks.map((id) => TASK_DISPLAY_LABELS[id] ?? id);
57
+ }
58
+
59
+ export interface NormalizedOnboarding {
60
+ preferredName?: string;
61
+ commonWork: string[];
62
+ dailyTools: string[];
63
+ tone?: string;
64
+ assistantName?: string;
65
+ }
66
+
67
+ /**
68
+ * Normalizes raw onboarding context from the client into display-ready data.
69
+ */
70
+ export function normalizeOnboardingContext(
71
+ ctx: OnboardingContext,
72
+ ): NormalizedOnboarding {
73
+ return {
74
+ preferredName: ctx.userName?.trim() || undefined,
75
+ commonWork: normalizeTasks(ctx.tasks),
76
+ dailyTools: normalizeTools(ctx.tools),
77
+ tone: ctx.tone,
78
+ assistantName: ctx.assistantName,
79
+ };
80
+ }
@@ -1,9 +1,4 @@
1
- import {
2
- existsSync,
3
- mkdirSync,
4
- readFileSync,
5
- writeFileSync,
6
- } from "node:fs";
1
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
7
2
  import { basename, dirname, join } from "node:path";
8
3
 
9
4
  import {
@@ -14,8 +9,9 @@ import {
14
9
  import type { ChannelCapabilities } from "../daemon/conversation-runtime-assembly.js";
15
10
  import type { TrustContext } from "../daemon/trust-context.js";
16
11
  import { getLogger } from "../util/logger.js";
17
- import { getWorkspaceDir } from "../util/platform.js";
12
+ import { getWorkspaceDir, getWorkspacePromptPath } from "../util/platform.js";
18
13
  import { stripCommentLines } from "../util/strip-comment-lines.js";
14
+ import type { NormalizedOnboarding } from "./normalize-onboarding.js";
19
15
 
20
16
  const log = getLogger("persona-resolver");
21
17
 
@@ -110,7 +106,11 @@ function resolveUserFilename(
110
106
 
111
107
  // Validate basename to prevent path traversal
112
108
  if (filename) {
113
- if (basename(filename) !== filename || filename === ".." || filename === ".") {
109
+ if (
110
+ basename(filename) !== filename ||
111
+ filename === ".." ||
112
+ filename === "."
113
+ ) {
114
114
  log.warn(
115
115
  { userFile: filename },
116
116
  "Contact userFile contains path traversal; ignoring",
@@ -268,7 +268,11 @@ export function resolveGuardianPersonaStrict(): string | null {
268
268
  * Creates the parent `users/` directory if missing.
269
269
  */
270
270
  export function ensureGuardianPersonaFile(userFile: string): void {
271
- if (basename(userFile) !== userFile || userFile === ".." || userFile === ".") {
271
+ if (
272
+ basename(userFile) !== userFile ||
273
+ userFile === ".." ||
274
+ userFile === "."
275
+ ) {
272
276
  log.warn(
273
277
  { userFile },
274
278
  "Guardian persona userFile contains path traversal; refusing to write",
@@ -314,3 +318,91 @@ export function isGuardianPersonaCustomized(filePath: string): boolean {
314
318
  const templateStripped = stripCommentLines(GUARDIAN_PERSONA_TEMPLATE);
315
319
  return stripped !== templateStripped;
316
320
  }
321
+
322
+ // ── Onboarding section writer ────────────────────────────────────
323
+
324
+ const ONBOARDING_HEADING = "## Onboarding Context";
325
+
326
+ /**
327
+ * Build the markdown section content for the onboarding context.
328
+ * Omits bullet lines where the value is empty/absent.
329
+ */
330
+ function buildOnboardingSection(normalized: NormalizedOnboarding): string {
331
+ const lines: string[] = [ONBOARDING_HEADING, ""];
332
+
333
+ if (normalized.preferredName) {
334
+ lines.push(`- **Preferred name:** ${normalized.preferredName}`);
335
+ }
336
+ if (normalized.commonWork.length > 0) {
337
+ lines.push(`- **Common work:** ${normalized.commonWork.join("; ")}`);
338
+ }
339
+ if (normalized.dailyTools.length > 0) {
340
+ lines.push(`- **Daily tools:** ${normalized.dailyTools.join(", ")}`);
341
+ }
342
+
343
+ lines.push("");
344
+ return lines.join("\n");
345
+ }
346
+
347
+ /**
348
+ * Resolve the write target for the onboarding section using the
349
+ * fallback chain: guardian persona → `users/default.md` → `USER.md`.
350
+ */
351
+ function resolveOnboardingWriteTarget(): string {
352
+ const guardianPath = resolveGuardianPersonaPath();
353
+ if (guardianPath) return guardianPath;
354
+
355
+ const defaultUserPath = join(getWorkspaceDir(), "users", "default.md");
356
+ if (existsSync(defaultUserPath)) return defaultUserPath;
357
+
358
+ return getWorkspacePromptPath("USER.md");
359
+ }
360
+
361
+ /**
362
+ * Write a managed `## Onboarding Context` section to the guardian persona
363
+ * file (or fallback target). Idempotent: replaces the section in-place if
364
+ * it already exists, appends if not, and creates the file when missing.
365
+ *
366
+ * Never throws — logs a warning on failure (fire-and-forget pattern).
367
+ */
368
+ export function writeOnboardingSection(normalized: NormalizedOnboarding): void {
369
+ try {
370
+ const targetPath = resolveOnboardingWriteTarget();
371
+ const section = buildOnboardingSection(normalized);
372
+
373
+ let content: string;
374
+ if (existsSync(targetPath)) {
375
+ content = readFileSync(targetPath, "utf-8");
376
+ } else {
377
+ // Create parent directories and start with a header
378
+ mkdirSync(dirname(targetPath), { recursive: true });
379
+ content = "# User Profile\n\n";
380
+ }
381
+
382
+ // Replace existing section or append
383
+ const headingIndex = content.indexOf(ONBOARDING_HEADING);
384
+ if (headingIndex !== -1) {
385
+ // Find the end of the section: next `## ` heading or EOF
386
+ const afterHeading = content.indexOf("\n", headingIndex);
387
+ const rest = afterHeading !== -1 ? content.slice(afterHeading + 1) : "";
388
+ const nextHeadingMatch = rest.match(/^## /m);
389
+ const before = content.slice(0, headingIndex);
390
+ const after = nextHeadingMatch ? rest.slice(nextHeadingMatch.index!) : "";
391
+ content = before + section + after;
392
+ } else {
393
+ // Append after a blank line (ensure trailing newline first)
394
+ if (!content.endsWith("\n")) {
395
+ content += "\n";
396
+ }
397
+ if (!content.endsWith("\n\n")) {
398
+ content += "\n";
399
+ }
400
+ content += section;
401
+ }
402
+
403
+ writeFileSync(targetPath, content, "utf-8");
404
+ log.debug({ path: targetPath }, "Wrote onboarding section to persona file");
405
+ } catch (err) {
406
+ log.warn({ err }, "Failed to write onboarding section to persona file");
407
+ }
408
+ }
@@ -21,6 +21,7 @@ import {
21
21
  import { stripCommentLines } from "../util/strip-comment-lines.js";
22
22
  import { cleanupBootstrapFiles } from "./bootstrap-cleanup.js";
23
23
  import { SYSTEM_PROMPT_CACHE_BOUNDARY } from "./cache-boundary.js";
24
+ import { normalizeOnboardingContext } from "./normalize-onboarding.js";
24
25
 
25
26
  export { SYSTEM_PROMPT_CACHE_BOUNDARY };
26
27
 
@@ -322,14 +323,27 @@ export function buildSystemPrompt(options?: BuildSystemPromptOptions): string {
322
323
  );
323
324
 
324
325
  if (options?.onboardingContext) {
325
- dynamicParts.push(
326
- "## Pre-chat Onboarding Context\n\n" +
327
- "The user completed the native pre-chat onboarding. Here is their context:\n\n" +
328
- "```json\n" +
329
- JSON.stringify(options.onboardingContext, null, 2) +
330
- "\n```\n\n" +
331
- "Use this to personalize your opener and skip redundant discovery. If `assistantName` is present, it is the name the user chose for you; preserve it in IDENTITY.md.",
326
+ const n = normalizeOnboardingContext(options.onboardingContext);
327
+ const lines: string[] = [
328
+ "## First-Run User Context",
329
+ "",
330
+ "The user completed setup before this conversation.",
331
+ "",
332
+ "Known context:",
333
+ ];
334
+ if (n.preferredName) lines.push(`- Name: ${n.preferredName}`);
335
+ if (n.commonWork.length)
336
+ lines.push(`- Common work: ${n.commonWork.join("; ")}`);
337
+ if (n.dailyTools.length)
338
+ lines.push(`- Daily tools: ${n.dailyTools.join(", ")}`);
339
+ if (n.assistantName)
340
+ lines.push(`- Chosen assistant name: ${n.assistantName}`);
341
+ if (n.tone) lines.push(`- Preferred initial voice: ${n.tone}`);
342
+ lines.push(
343
+ "",
344
+ "Apply this context quietly. Do not recap it as a list unless the user asks.",
332
345
  );
346
+ dynamicParts.push(lines.join("\n"));
333
347
  }
334
348
  }
335
349
  // Configuration section removed — workspace files are self-describing,
@@ -16,6 +16,14 @@ That's the whole job. Everything below is subordinate.
16
16
 
17
17
  The goal is for the user to feel seen — not just helped. Seen means: the assistant held more of them than they explicitly handed it. It noticed something they were hedging. It named what they did more precisely than they named it themselves. It offered a frame they hadn't found themselves. That's what earns the second conversation.
18
18
 
19
+ ## First-response speed
20
+
21
+ The first visible responses are part of the product. For the first two real user turns, default to visible text first. If the message can be answered conversationally, produce text immediately. Do not call tools for hidden setup, memory, style notes, IDENTITY.md/SOUL.md/user-profile writes, or exploratory reads before or alongside those early answers.
22
+
23
+ This does not ban real work. If the user's first message is a task that requires tools — reading a file they named, editing code, sending mail, building, scheduling, researching — use the tools needed for that task. The line is simple: user-visible progress can justify latency; private setup cannot.
24
+
25
+ Private setup waits until there is enough signal to justify it. Low-signal banter, greetings, and vibe checks are useful for matching tone, but not enough to spend latency on reads or writes. If you're unsure whether the setup is justified, defer it.
26
+
19
27
  ## Opening move
20
28
 
21
29
  The first message in your conversation context is a system trigger used to generate the canned greeting. Don't reference it, quote it, or respond to it as if the user said it.
@@ -62,17 +70,17 @@ Task-first users may become conversation-first users by their second or third co
62
70
 
63
71
  You're not a blank tool and not a service rep running intake. You're a colleague starting work with someone — sharp, paying attention, warm when warmth fits, with taste and a voice that'll develop. That shows up in how you do the work, not in announcements about it.
64
72
 
65
- Run the ritual on the first message that contains real signal — a request, a question, a problem, anything with enough content to write a meaningful observation. A short acknowledgment ("ok," "thanks," "cool," "got it") is not the trigger. Run the ritual in the same turn as your first substantive responseas tool calls that happen alongside or before you write your reply. Do not defer the ritual to a subsequent turn. By the time the user says "thanks," it should already be done.
73
+ The personalization ritual is deferred, not cancelled. A short acknowledgment ("ok," "thanks," "cool," "got it") is not the trigger. Low-signal openers ("hey," "sup," "not much") are not the trigger either. Once the conversation has enough signal to make the write meaningful usually after the first two real user turns, or after the user has given a concrete task/problem write one observation to SOUL.md about how they communicate. Do it at a natural lull, after task progress, or during wrap-up.
66
74
 
67
- The ritual: write one observation to SOUL.md about how they communicate. One committed line is enough — the consistency it creates is more valuable than accuracy. Be specific: "drops punctuation, leads with the ask, zero tolerance for hedging" is useful. "User is direct" is not. Vague observations don't change behavior. Specific ones do. Then write to IDENTITY.md (name, emoji, tagline under `## Identity Intro`).
75
+ One committed line is enough — the consistency it creates is more valuable than accuracy. Be specific: "drops punctuation, leads with the ask, zero tolerance for hedging" is useful. "User is direct" is not. Vague observations don't change behavior. Specific ones do. Then write to IDENTITY.md if it still needs a name, emoji, or tagline under `## Identity Intro`.
68
76
 
69
- That is the entire turn-1 ritual: one SOUL.md write, one IDENTITY.md write. Nothing else happens on turn 1. No journal entry. No NOW.md update. No scratchpad setup. No file deletion. Do not do wrap-up steps until the conversation has had multiple exchanges and is clearly ending. If you catch yourself about to make a third tool call on turn 1, stop. The cap is two. The third call can wait.
77
+ When you do the ritual, keep it small: one SOUL.md write and one IDENTITY.md write. No journal entry. No NOW.md update. No scratchpad setup. No file deletion. Do not do wrap-up steps until the conversation has had multiple exchanges and is clearly ending. If it would delay an answer the user is waiting for, wait.
70
78
 
71
79
  This is also how personality evolves. If the user is drier than your starting register, shift. If they're warmer, match it. The SOUL.md write is the mechanism.
72
80
 
73
81
  ## Learning as byproduct
74
82
 
75
- You'll pick things up while helping. Save them quietly with `file_edit` — never mention files or tools.
83
+ You'll pick things up while helping. Save them quietly with `file_edit` when it does not delay user-visible progress — never mention files or tools.
76
84
 
77
85
  **Files to update:** IDENTITY.md, SOUL.md, users/{{USER_PERSONA_FILE}}
78
86
 
@@ -100,4 +108,4 @@ Only after multiple exchanges — not on turn 1, not on turn 2. When the convers
100
108
 
101
109
  Do not announce that you've completed the ritual. Do not say "I've set things up" or reference anything you did. After the tool calls complete, respond naturally to whatever the conversation calls for next. If the user's last message was a short acknowledgment, don't just echo it back — offer something: a question, a thought, a next step, anything that opens the door. Silence after completing the ritual is the worst possible outcome.
102
110
 
103
- One-shot. The files go regardless of how far you got.
111
+ One-shot. The files go once there is real signal; speed wins before that.
@@ -391,7 +391,14 @@ describe("RetryProvider — callSite resolution", () => {
391
391
 
392
392
  test("propagates temperature when explicitly set in resolved config", async () => {
393
393
  setLlmConfig({
394
- default: { provider: "anthropic", model: "claude-opus-4-7" },
394
+ default: {
395
+ provider: "anthropic",
396
+ model: "claude-opus-4-7",
397
+ // Thinking defaults to enabled in the schema. Disable here so the
398
+ // thinking/temperature conflict guard doesn't fire — that guard
399
+ // (Anthropic 400 backstop) has dedicated coverage further down.
400
+ thinking: { enabled: false },
401
+ },
395
402
  callSites: {
396
403
  mainAgent: { temperature: 0.5 },
397
404
  },
@@ -466,6 +473,220 @@ describe("RetryProvider — callSite resolution", () => {
466
473
  });
467
474
  });
468
475
 
476
+ // ── RetryProvider — Anthropic thinking + temperature conflict guard ─────────
477
+ //
478
+ // Anthropic 400s with "temperature may only be set to 1 when thinking is
479
+ // enabled or in adaptive mode" if a request combines extended thinking with
480
+ // `temperature` ≠ 1. We had three call sites ship with hardcoded
481
+ // per-call temperatures that exploded for Opus 4.x effort=high/xhigh
482
+ // profiles (PR #29560 fixed the call sites). This guard is a backstop: if a
483
+ // future call site reintroduces the same pattern, retry.ts drops the
484
+ // offending temperature instead of letting the request fail at the wire.
485
+
486
+ describe("RetryProvider — thinking/temperature conflict guard", () => {
487
+ test("drops explicit non-1 temperature when thinking is enabled (Anthropic)", async () => {
488
+ setLlmConfig({
489
+ default: {
490
+ provider: "anthropic",
491
+ model: "claude-opus-4-7",
492
+ thinking: { enabled: true, streamThinking: true },
493
+ },
494
+ callSites: { mainAgent: {} },
495
+ });
496
+
497
+ let seen: SendMessageOptions | undefined;
498
+ const wrapped = new RetryProvider(
499
+ makeProvider("anthropic", (options) => {
500
+ seen = options;
501
+ }),
502
+ );
503
+
504
+ await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, {
505
+ // Hardcoded per-call temperature — the pattern that caused the PR
506
+ // #29560 bug class. Without the guard this would forward to Anthropic
507
+ // and 400.
508
+ config: { callSite: "mainAgent", temperature: 0.7 },
509
+ });
510
+
511
+ const config = seen?.config as Record<string, unknown>;
512
+ expect(config.thinking).toEqual({ type: "adaptive" });
513
+ expect(config.temperature).toBeUndefined();
514
+ expect("temperature" in config).toBe(false);
515
+ });
516
+
517
+ test("drops explicit temperature: 0 when thinking is enabled (Anthropic)", async () => {
518
+ // Mirrors the recall-agent / retriever shape: `temperature: 0` for
519
+ // determinism on a thinking-enabled profile. Same 400 risk, same fix.
520
+ setLlmConfig({
521
+ default: {
522
+ provider: "anthropic",
523
+ model: "claude-opus-4-7",
524
+ thinking: { enabled: true, streamThinking: true },
525
+ },
526
+ callSites: { recall: {} },
527
+ });
528
+
529
+ let seen: SendMessageOptions | undefined;
530
+ const wrapped = new RetryProvider(
531
+ makeProvider("anthropic", (options) => {
532
+ seen = options;
533
+ }),
534
+ );
535
+
536
+ await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, {
537
+ config: { callSite: "recall", temperature: 0 },
538
+ });
539
+
540
+ const config = seen?.config as Record<string, unknown>;
541
+ expect(config.thinking).toEqual({ type: "adaptive" });
542
+ expect(config.temperature).toBeUndefined();
543
+ });
544
+
545
+ test("preserves temperature: 1 when thinking is enabled (Anthropic accepts it)", async () => {
546
+ setLlmConfig({
547
+ default: {
548
+ provider: "anthropic",
549
+ model: "claude-opus-4-7",
550
+ thinking: { enabled: true, streamThinking: true },
551
+ },
552
+ callSites: { mainAgent: {} },
553
+ });
554
+
555
+ let seen: SendMessageOptions | undefined;
556
+ const wrapped = new RetryProvider(
557
+ makeProvider("anthropic", (options) => {
558
+ seen = options;
559
+ }),
560
+ );
561
+
562
+ await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, {
563
+ config: { callSite: "mainAgent", temperature: 1 },
564
+ });
565
+
566
+ const config = seen?.config as Record<string, unknown>;
567
+ expect(config.thinking).toEqual({ type: "adaptive" });
568
+ expect(config.temperature).toBe(1);
569
+ });
570
+
571
+ test("preserves explicit temperature when thinking is disabled (Anthropic)", async () => {
572
+ // The bug class only exists when thinking resolves enabled. With
573
+ // thinking disabled, every temperature value is valid — the guard must
574
+ // not fire.
575
+ setLlmConfig({
576
+ default: {
577
+ provider: "anthropic",
578
+ model: "claude-opus-4-7",
579
+ thinking: { enabled: false, streamThinking: false },
580
+ },
581
+ callSites: { mainAgent: {} },
582
+ });
583
+
584
+ let seen: SendMessageOptions | undefined;
585
+ const wrapped = new RetryProvider(
586
+ makeProvider("anthropic", (options) => {
587
+ seen = options;
588
+ }),
589
+ );
590
+
591
+ await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, {
592
+ config: { callSite: "mainAgent", temperature: 0.7 },
593
+ });
594
+
595
+ const config = seen?.config as Record<string, unknown>;
596
+ expect(config.thinking).toEqual({ type: "disabled" });
597
+ expect(config.temperature).toBe(0.7);
598
+ });
599
+
600
+ test("drops temperature for OpenRouter when fronting an `anthropic/*` model", async () => {
601
+ setLlmConfig({
602
+ default: {
603
+ provider: "openrouter",
604
+ model: "anthropic/claude-opus-4-7",
605
+ thinking: { enabled: true, streamThinking: true },
606
+ },
607
+ callSites: { mainAgent: {} },
608
+ });
609
+
610
+ let seen: SendMessageOptions | undefined;
611
+ const wrapped = new RetryProvider(
612
+ makeProvider("openrouter", (options) => {
613
+ seen = options;
614
+ }),
615
+ );
616
+
617
+ await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, {
618
+ config: { callSite: "mainAgent", temperature: 0.7 },
619
+ });
620
+
621
+ const config = seen?.config as Record<string, unknown>;
622
+ expect(config.model).toBe("anthropic/claude-opus-4-7");
623
+ expect(config.temperature).toBeUndefined();
624
+ });
625
+
626
+ test("preserves temperature for OpenRouter when fronting a non-Anthropic reasoning model", async () => {
627
+ // OpenRouter's other reasoning models (xAI Grok, etc.) translate
628
+ // `thinking` into the `reasoning` parameter via `buildExtraCreateParams`
629
+ // and don't share Anthropic's temperature-must-be-1 constraint. The
630
+ // guard must not over-reach.
631
+ setLlmConfig({
632
+ default: {
633
+ provider: "openrouter",
634
+ model: "x-ai/grok-4",
635
+ thinking: { enabled: true, streamThinking: true },
636
+ },
637
+ callSites: { mainAgent: {} },
638
+ });
639
+
640
+ let seen: SendMessageOptions | undefined;
641
+ const wrapped = new RetryProvider(
642
+ makeProvider("openrouter", (options) => {
643
+ seen = options;
644
+ }),
645
+ );
646
+
647
+ await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, {
648
+ config: { callSite: "mainAgent", temperature: 0.7 },
649
+ });
650
+
651
+ const config = seen?.config as Record<string, unknown>;
652
+ expect(config.model).toBe("x-ai/grok-4");
653
+ expect(config.temperature).toBe(0.7);
654
+ });
655
+
656
+ test("guard does not fire when thinking has already been stripped by forced tool_choice", async () => {
657
+ // `retry.ts` strips `thinking` when forced `tool_choice: { type: "tool" }`
658
+ // is set on Anthropic — the guard runs after that step, so by the time
659
+ // we check, `thinking` is gone and the temperature can stay.
660
+ setLlmConfig({
661
+ default: {
662
+ provider: "anthropic",
663
+ model: "claude-opus-4-7",
664
+ thinking: { enabled: true, streamThinking: true },
665
+ },
666
+ callSites: { trustRuleSuggestion: {} },
667
+ });
668
+
669
+ let seen: SendMessageOptions | undefined;
670
+ const wrapped = new RetryProvider(
671
+ makeProvider("anthropic", (options) => {
672
+ seen = options;
673
+ }),
674
+ );
675
+
676
+ await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, {
677
+ config: {
678
+ callSite: "trustRuleSuggestion",
679
+ temperature: 0.7,
680
+ tool_choice: { type: "tool", name: "suggest_trust_rule" },
681
+ },
682
+ });
683
+
684
+ const config = seen?.config as Record<string, unknown>;
685
+ expect(config.thinking).toBeUndefined();
686
+ expect(config.temperature).toBe(0.7);
687
+ });
688
+ });
689
+
469
690
  // ── RetryProvider — pre-resolved model fast-path ────────────────────────────
470
691
 
471
692
  describe("RetryProvider — no callSite (pre-resolved config passes through)", () => {