vellum 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (349) hide show
  1. package/README.md +15 -2
  2. package/bun.lock +5 -2
  3. package/package.json +4 -2
  4. package/scripts/capture-x-graphql.ts +562 -0
  5. package/scripts/ipc/check-swift-decoder-drift.ts +2 -1
  6. package/scripts/test.sh +5 -0
  7. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +133 -34
  8. package/src/__tests__/account-registry.test.ts +2 -1
  9. package/src/__tests__/agent-heartbeat-service.test.ts +250 -0
  10. package/src/__tests__/asset-materialize-tool.test.ts +16 -15
  11. package/src/__tests__/asset-search-tool.test.ts +23 -22
  12. package/src/__tests__/attachments-store.test.ts +56 -127
  13. package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +5 -4
  14. package/src/__tests__/browser-skill-endstate.test.ts +4 -3
  15. package/src/__tests__/call-bridge.test.ts +385 -0
  16. package/src/__tests__/call-constants.test.ts +40 -0
  17. package/src/__tests__/call-orchestrator.test.ts +130 -4
  18. package/src/__tests__/call-recovery.test.ts +518 -0
  19. package/src/__tests__/call-routes-http.test.ts +459 -0
  20. package/src/__tests__/call-state-machine.test.ts +143 -0
  21. package/src/__tests__/call-store.test.ts +216 -1
  22. package/src/__tests__/cli-discover.test.ts +1 -1
  23. package/src/__tests__/commit-message-enrichment-service.test.ts +148 -7
  24. package/src/__tests__/compaction.benchmark.test.ts +176 -0
  25. package/src/__tests__/computer-use-tools.test.ts +250 -0
  26. package/src/__tests__/config-schema.test.ts +299 -3
  27. package/src/__tests__/conflict-store.test.ts +2 -1
  28. package/src/__tests__/contacts-tools.test.ts +331 -0
  29. package/src/__tests__/conversation-store.test.ts +30 -32
  30. package/src/__tests__/credential-security-invariants.test.ts +4 -0
  31. package/src/__tests__/date-context.test.ts +373 -0
  32. package/src/__tests__/db-schedule-syntax-migration.test.ts +129 -0
  33. package/src/__tests__/fixtures/media-reuse-fixtures.ts +3 -3
  34. package/src/__tests__/followup-tools.test.ts +303 -0
  35. package/src/__tests__/handlers-twitter-config.test.ts +718 -0
  36. package/src/__tests__/intent-routing.test.ts +64 -57
  37. package/src/__tests__/ipc-roundtrip.benchmark.test.ts +237 -0
  38. package/src/__tests__/ipc-snapshot.test.ts +62 -28
  39. package/src/__tests__/llm-usage-store.test.ts +3 -8
  40. package/src/__tests__/media-generate-image.test.ts +1 -1
  41. package/src/__tests__/media-reuse-story.e2e.test.ts +7 -7
  42. package/src/__tests__/memory-retrieval.benchmark.test.ts +430 -0
  43. package/src/__tests__/parallel-tool.benchmark.test.ts +294 -0
  44. package/src/__tests__/playbook-tools.test.ts +342 -0
  45. package/src/__tests__/profile-compiler.test.ts +2 -1
  46. package/src/__tests__/provider-streaming.benchmark.test.ts +773 -0
  47. package/src/__tests__/recurrence-engine-rruleset.test.ts +78 -0
  48. package/src/__tests__/recurrence-engine.test.ts +69 -0
  49. package/src/__tests__/recurrence-types.test.ts +71 -0
  50. package/src/__tests__/registry.test.ts +5 -3
  51. package/src/__tests__/relay-server.test.ts +633 -0
  52. package/src/__tests__/reminder-store.test.ts +6 -3
  53. package/src/__tests__/reminder.test.ts +43 -77
  54. package/src/__tests__/run-orchestrator-assistant-events.test.ts +8 -4
  55. package/src/__tests__/run-orchestrator.test.ts +4 -4
  56. package/src/__tests__/runtime-attachment-metadata.test.ts +7 -6
  57. package/src/__tests__/runtime-runs-http.test.ts +4 -4
  58. package/src/__tests__/runtime-runs.test.ts +4 -4
  59. package/src/__tests__/schedule-store.test.ts +482 -0
  60. package/src/__tests__/schedule-tools.test.ts +700 -0
  61. package/src/__tests__/scheduler-recurrence.test.ts +329 -0
  62. package/src/__tests__/server-history-render.test.ts +14 -13
  63. package/src/__tests__/session-error.test.ts +28 -0
  64. package/src/__tests__/session-init.benchmark.test.ts +462 -0
  65. package/src/__tests__/session-queue.test.ts +71 -48
  66. package/src/__tests__/session-runtime-assembly.test.ts +161 -0
  67. package/src/__tests__/session-surfaces-task-progress.test.ts +104 -0
  68. package/src/__tests__/signup-e2e.test.ts +2 -1
  69. package/src/__tests__/skill-projection.benchmark.test.ts +328 -0
  70. package/src/__tests__/skill-script-runner.test.ts +159 -0
  71. package/src/__tests__/speaker-identification.test.ts +52 -0
  72. package/src/__tests__/subagent-manager-notify.test.ts +42 -10
  73. package/src/__tests__/subagent-tools.test.ts +141 -41
  74. package/src/__tests__/task-compiler.test.ts +2 -1
  75. package/src/__tests__/task-runner.test.ts +2 -1
  76. package/src/__tests__/task-scheduler.test.ts +2 -1
  77. package/src/__tests__/task-tools.test.ts +49 -56
  78. package/src/__tests__/tool-audit-listener.test.ts +1 -0
  79. package/src/__tests__/tool-domain-event-publisher.test.ts +2 -0
  80. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +500 -0
  81. package/src/__tests__/tool-executor.test.ts +13 -17
  82. package/src/__tests__/turn-commit.test.ts +218 -3
  83. package/src/__tests__/twilio-provider.test.ts +143 -0
  84. package/src/__tests__/twilio-routes.test.ts +789 -0
  85. package/src/__tests__/twitter-auth-handler.test.ts +581 -0
  86. package/src/__tests__/view-image-tool.test.ts +217 -0
  87. package/src/__tests__/workspace-git-service.test.ts +186 -0
  88. package/src/__tests__/workspace-heartbeat-service.test.ts +13 -3
  89. package/src/agent-heartbeat/agent-heartbeat-service.ts +155 -0
  90. package/src/bundler/app-bundler.ts +12 -8
  91. package/src/calls/call-bridge.ts +95 -0
  92. package/src/calls/call-constants.ts +43 -5
  93. package/src/calls/call-domain.ts +276 -0
  94. package/src/calls/call-orchestrator.ts +43 -17
  95. package/src/calls/call-recovery.ts +207 -0
  96. package/src/calls/call-state-machine.ts +68 -0
  97. package/src/calls/call-store.ts +192 -5
  98. package/src/calls/relay-server.ts +41 -4
  99. package/src/calls/speaker-identification.ts +213 -0
  100. package/src/calls/twilio-provider.ts +10 -6
  101. package/src/calls/twilio-routes.ts +90 -76
  102. package/src/calls/types.ts +1 -1
  103. package/src/cli/config-commands.ts +334 -0
  104. package/src/cli/core-commands.ts +776 -0
  105. package/src/cli/doordash.ts +251 -1
  106. package/src/cli/ipc-client.ts +82 -0
  107. package/src/cli/map.ts +246 -0
  108. package/src/cli/twitter.ts +575 -0
  109. package/src/cli.ts +7 -5
  110. package/src/commands/__tests__/cc-command-registry.test.ts +319 -0
  111. package/src/commands/cc-command-registry.ts +209 -0
  112. package/src/config/bundled-skills/contacts/SKILL.md +39 -0
  113. package/src/config/bundled-skills/contacts/TOOLS.json +122 -0
  114. package/src/config/bundled-skills/contacts/tools/contact-merge.ts +9 -0
  115. package/src/config/bundled-skills/contacts/tools/contact-search.ts +9 -0
  116. package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +9 -0
  117. package/src/config/bundled-skills/document/SKILL.md +18 -0
  118. package/src/config/bundled-skills/document/TOOLS.json +53 -0
  119. package/src/config/bundled-skills/document/tools/document-create.ts +9 -0
  120. package/src/config/bundled-skills/document/tools/document-update.ts +9 -0
  121. package/src/config/bundled-skills/doordash/SKILL.md +82 -23
  122. package/src/config/bundled-skills/followups/SKILL.md +32 -0
  123. package/src/config/bundled-skills/followups/TOOLS.json +100 -0
  124. package/src/config/bundled-skills/followups/tools/followup-create.ts +9 -0
  125. package/src/config/bundled-skills/followups/tools/followup-list.ts +9 -0
  126. package/src/config/bundled-skills/followups/tools/followup-resolve.ts +9 -0
  127. package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +1 -23
  128. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +2 -1
  129. package/src/config/bundled-skills/playbooks/SKILL.md +31 -0
  130. package/src/config/bundled-skills/playbooks/TOOLS.json +126 -0
  131. package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +9 -0
  132. package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +9 -0
  133. package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +9 -0
  134. package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +9 -0
  135. package/src/config/bundled-skills/reminder/SKILL.md +20 -0
  136. package/src/config/bundled-skills/reminder/TOOLS.json +67 -0
  137. package/src/config/bundled-skills/reminder/tools/reminder-cancel.ts +9 -0
  138. package/src/config/bundled-skills/reminder/tools/reminder-create.ts +9 -0
  139. package/src/config/bundled-skills/reminder/tools/reminder-list.ts +9 -0
  140. package/src/config/bundled-skills/schedule/SKILL.md +74 -0
  141. package/src/config/bundled-skills/schedule/TOOLS.json +135 -0
  142. package/src/config/bundled-skills/schedule/tools/schedule-create.ts +9 -0
  143. package/src/config/bundled-skills/schedule/tools/schedule-delete.ts +9 -0
  144. package/src/config/bundled-skills/schedule/tools/schedule-list.ts +9 -0
  145. package/src/config/bundled-skills/schedule/tools/schedule-update.ts +9 -0
  146. package/src/config/bundled-skills/subagent/SKILL.md +25 -0
  147. package/src/config/bundled-skills/subagent/TOOLS.json +107 -0
  148. package/src/config/bundled-skills/subagent/tools/subagent-abort.ts +9 -0
  149. package/src/config/bundled-skills/subagent/tools/subagent-message.ts +9 -0
  150. package/src/config/bundled-skills/subagent/tools/subagent-read.ts +9 -0
  151. package/src/config/bundled-skills/subagent/tools/subagent-spawn.ts +9 -0
  152. package/src/config/bundled-skills/subagent/tools/subagent-status.ts +9 -0
  153. package/src/config/bundled-skills/tasks/SKILL.md +28 -0
  154. package/src/config/bundled-skills/tasks/TOOLS.json +256 -0
  155. package/src/config/bundled-skills/tasks/tools/task-delete.ts +9 -0
  156. package/src/config/bundled-skills/tasks/tools/task-list-add.ts +9 -0
  157. package/src/config/bundled-skills/tasks/tools/task-list-remove.ts +9 -0
  158. package/src/config/bundled-skills/tasks/tools/task-list-show.ts +9 -0
  159. package/src/config/bundled-skills/tasks/tools/task-list-update.ts +9 -0
  160. package/src/config/bundled-skills/tasks/tools/task-list.ts +9 -0
  161. package/src/config/bundled-skills/tasks/tools/task-run.ts +9 -0
  162. package/src/config/bundled-skills/tasks/tools/task-save.ts +9 -0
  163. package/src/config/bundled-skills/twitter/SKILL.md +134 -0
  164. package/src/config/bundled-skills/watcher/SKILL.md +27 -0
  165. package/src/config/bundled-skills/watcher/TOOLS.json +147 -0
  166. package/src/config/bundled-skills/watcher/tools/watcher-create.ts +9 -0
  167. package/src/config/bundled-skills/watcher/tools/watcher-delete.ts +9 -0
  168. package/src/config/bundled-skills/watcher/tools/watcher-digest.ts +9 -0
  169. package/src/config/bundled-skills/watcher/tools/watcher-list.ts +9 -0
  170. package/src/config/bundled-skills/watcher/tools/watcher-update.ts +9 -0
  171. package/src/config/defaults.ts +33 -0
  172. package/src/config/loader.ts +4 -1
  173. package/src/config/schema.ts +161 -1
  174. package/src/config/system-prompt.ts +61 -16
  175. package/src/config/templates/IDENTITY.md +7 -0
  176. package/src/config/types.ts +4 -0
  177. package/src/contacts/contact-store.ts +4 -4
  178. package/src/daemon/assistant-attachments.ts +10 -0
  179. package/src/daemon/classifier.ts +3 -1
  180. package/src/daemon/computer-use-session.ts +3 -1
  181. package/src/daemon/date-context.ts +136 -0
  182. package/src/daemon/handlers/apps.ts +16 -1
  183. package/src/daemon/handlers/browser.ts +54 -0
  184. package/src/daemon/handlers/computer-use.ts +7 -1
  185. package/src/daemon/handlers/config.ts +163 -5
  186. package/src/daemon/handlers/diagnostics.ts +5 -1
  187. package/src/daemon/handlers/documents.ts +18 -29
  188. package/src/daemon/handlers/home-base.ts +5 -1
  189. package/src/daemon/handlers/index.ts +40 -277
  190. package/src/daemon/handlers/misc.ts +9 -1
  191. package/src/daemon/handlers/publish.ts +6 -1
  192. package/src/daemon/handlers/sessions.ts +65 -12
  193. package/src/daemon/handlers/shared.ts +36 -1
  194. package/src/daemon/handlers/signing.ts +37 -0
  195. package/src/daemon/handlers/skills.ts +20 -6
  196. package/src/daemon/handlers/subagents.ts +8 -3
  197. package/src/daemon/handlers/twitter-auth.ts +169 -0
  198. package/src/daemon/handlers/work-items.ts +384 -68
  199. package/src/daemon/ipc-contract-inventory.json +28 -4
  200. package/src/daemon/ipc-contract.ts +133 -37
  201. package/src/daemon/ipc-protocol.ts +7 -2
  202. package/src/daemon/lifecycle.ts +21 -0
  203. package/src/daemon/main.ts +10 -4
  204. package/src/daemon/ride-shotgun-handler.ts +74 -10
  205. package/src/daemon/server.ts +143 -26
  206. package/src/daemon/session-agent-loop.ts +887 -0
  207. package/src/daemon/session-attachments.ts +28 -5
  208. package/src/daemon/session-error.ts +24 -3
  209. package/src/daemon/session-lifecycle.ts +147 -0
  210. package/src/daemon/session-media-retry.ts +147 -0
  211. package/src/daemon/session-messaging.ts +145 -0
  212. package/src/daemon/session-notifiers.ts +164 -0
  213. package/src/daemon/session-process.ts +2 -2
  214. package/src/daemon/session-queue-manager.ts +1 -0
  215. package/src/daemon/session-runtime-assembly.ts +52 -0
  216. package/src/daemon/session-skill-tools.ts +124 -5
  217. package/src/daemon/session-slash.ts +3 -0
  218. package/src/daemon/session-surfaces.ts +77 -2
  219. package/src/daemon/session-tool-setup.ts +216 -2
  220. package/src/daemon/session-usage.ts +0 -2
  221. package/src/daemon/session.ts +114 -1404
  222. package/src/daemon/video-thumbnail.ts +60 -0
  223. package/src/doordash/client.ts +121 -27
  224. package/src/doordash/queries.ts +1 -2
  225. package/src/export/formatter.ts +3 -1
  226. package/src/followups/followup-store.ts +4 -2
  227. package/src/followups/types.ts +6 -0
  228. package/src/hooks/templates.ts +1 -1
  229. package/src/index.ts +32 -1153
  230. package/src/memory/attachments-store.ts +28 -83
  231. package/src/memory/channel-delivery-store.ts +7 -21
  232. package/src/memory/clarification-resolver.ts +6 -5
  233. package/src/memory/contradiction-checker.ts +3 -2
  234. package/src/memory/conversation-key-store.ts +10 -29
  235. package/src/memory/conversation-store.ts +2 -1
  236. package/src/memory/db.ts +96 -2
  237. package/src/memory/entity-extractor.ts +6 -3
  238. package/src/memory/items-extractor.ts +5 -4
  239. package/src/memory/jobs-store.ts +3 -2
  240. package/src/memory/llm-usage-store.ts +1 -2
  241. package/src/memory/runs-store.ts +1 -2
  242. package/src/memory/schema.ts +23 -2
  243. package/src/messaging/style-analyzer.ts +3 -2
  244. package/src/messaging/thread-summarizer.ts +8 -12
  245. package/src/messaging/triage-engine.ts +4 -2
  246. package/src/providers/openrouter/client.ts +20 -0
  247. package/src/providers/registry.ts +8 -0
  248. package/src/runtime/http-server.ts +108 -20
  249. package/src/runtime/routes/attachment-routes.ts +2 -3
  250. package/src/runtime/routes/call-routes.ts +140 -0
  251. package/src/runtime/routes/channel-routes.ts +5 -10
  252. package/src/runtime/routes/conversation-routes.ts +5 -5
  253. package/src/runtime/routes/run-routes.ts +2 -2
  254. package/src/runtime/run-orchestrator.ts +9 -3
  255. package/src/schedule/recurrence-engine.ts +138 -0
  256. package/src/schedule/recurrence-types.ts +67 -0
  257. package/src/schedule/schedule-store.ts +102 -57
  258. package/src/schedule/scheduler.ts +9 -6
  259. package/src/security/oauth2.ts +29 -4
  260. package/src/security/secret-allowlist.ts +46 -0
  261. package/src/skills/clawhub.ts +1 -1
  262. package/src/subagent/manager.ts +40 -8
  263. package/src/swarm/backend-claude-code.ts +64 -9
  264. package/src/swarm/worker-prompts.ts +2 -1
  265. package/src/tasks/SPEC.md +34 -28
  266. package/src/tasks/ephemeral-permissions.ts +16 -7
  267. package/src/tasks/task-compiler.ts +5 -4
  268. package/src/tasks/task-runner.ts +10 -5
  269. package/src/tasks/task-scheduler.ts +1 -1
  270. package/src/tasks/tool-sanitizer.ts +36 -0
  271. package/src/tools/assets/search.ts +4 -4
  272. package/src/tools/browser/api-map.ts +220 -0
  273. package/src/tools/browser/auto-navigate.ts +270 -0
  274. package/src/tools/browser/browser-execution.ts +2 -1
  275. package/src/tools/browser/browser-manager.ts +2 -2
  276. package/src/tools/browser/network-recorder.ts +5 -4
  277. package/src/tools/browser/x-auto-navigate.ts +207 -0
  278. package/src/tools/calls/call-end.ts +17 -67
  279. package/src/tools/calls/call-start.ts +24 -85
  280. package/src/tools/calls/call-status.ts +35 -51
  281. package/src/tools/claude-code/claude-code.ts +77 -11
  282. package/src/tools/contacts/contact-merge.ts +46 -78
  283. package/src/tools/contacts/contact-search.ts +35 -79
  284. package/src/tools/contacts/contact-upsert.ts +35 -108
  285. package/src/tools/credentials/vault.ts +20 -4
  286. package/src/tools/document/document-tool.ts +71 -144
  287. package/src/tools/executor.ts +129 -10
  288. package/src/tools/followups/followup_create.ts +46 -88
  289. package/src/tools/followups/followup_list.ts +34 -74
  290. package/src/tools/followups/followup_resolve.ts +31 -66
  291. package/src/tools/host-terminal/cli-discover.ts +2 -1
  292. package/src/tools/host-terminal/host-shell.ts +10 -0
  293. package/src/tools/memory/handlers.ts +5 -4
  294. package/src/tools/network/__tests__/web-search.test.ts +427 -0
  295. package/src/tools/network/script-proxy/__tests__/logging.test.ts +248 -0
  296. package/src/tools/network/script-proxy/__tests__/policy.test.ts +234 -0
  297. package/src/tools/network/script-proxy/__tests__/router.test.ts +76 -0
  298. package/src/tools/network/web-fetch.ts +18 -6
  299. package/src/tools/playbooks/index.ts +4 -5
  300. package/src/tools/playbooks/playbook-create.ts +3 -47
  301. package/src/tools/playbooks/playbook-delete.ts +1 -25
  302. package/src/tools/playbooks/playbook-list.ts +1 -28
  303. package/src/tools/playbooks/playbook-update.ts +3 -51
  304. package/src/tools/reminder/reminder.ts +5 -78
  305. package/src/tools/schedule/create.ts +69 -74
  306. package/src/tools/schedule/delete.ts +21 -47
  307. package/src/tools/schedule/list.ts +55 -74
  308. package/src/tools/schedule/update.ts +77 -84
  309. package/src/tools/subagent/abort.ts +29 -58
  310. package/src/tools/subagent/message.ts +30 -63
  311. package/src/tools/subagent/read.ts +53 -84
  312. package/src/tools/subagent/spawn.ts +43 -82
  313. package/src/tools/subagent/status.ts +42 -71
  314. package/src/tools/swarm/delegate.ts +2 -1
  315. package/src/tools/tasks/index.ts +8 -8
  316. package/src/tools/tasks/task-delete.ts +60 -88
  317. package/src/tools/tasks/task-list.ts +31 -52
  318. package/src/tools/tasks/task-run.ts +72 -108
  319. package/src/tools/tasks/task-save.ts +33 -65
  320. package/src/tools/tasks/work-item-enqueue.ts +183 -215
  321. package/src/tools/tasks/work-item-list.ts +33 -63
  322. package/src/tools/tasks/work-item-remove.ts +45 -97
  323. package/src/tools/tasks/work-item-update.ts +91 -163
  324. package/src/tools/terminal/backends/native.ts +3 -1
  325. package/src/tools/tool-manifest.ts +0 -62
  326. package/src/tools/types.ts +6 -0
  327. package/src/tools/ui-surface/definitions.ts +3 -1
  328. package/src/tools/watch/screen-watch.ts +3 -1
  329. package/src/tools/watcher/create.ts +52 -98
  330. package/src/tools/watcher/delete.ts +20 -46
  331. package/src/tools/watcher/digest.ts +36 -70
  332. package/src/tools/watcher/list.ts +49 -79
  333. package/src/tools/watcher/update.ts +45 -91
  334. package/src/twitter/client.ts +690 -0
  335. package/src/twitter/session.ts +91 -0
  336. package/src/usage/types.ts +0 -1
  337. package/src/util/truncate.ts +6 -0
  338. package/src/watcher/providers/slack.ts +2 -1
  339. package/src/watcher/watcher-store.ts +3 -2
  340. package/src/work-items/work-item-store.ts +27 -2
  341. package/src/workspace/commit-message-enrichment-service.ts +31 -7
  342. package/src/workspace/git-service.ts +87 -22
  343. package/src/workspace/provider-commit-message-generator.ts +242 -0
  344. package/src/workspace/turn-commit.ts +62 -3
  345. package/src/tools/contacts/index.ts +0 -4
  346. package/src/tools/document/index.ts +0 -5
  347. package/src/tools/followups/index.ts +0 -3
  348. package/src/tools/subagent/index.ts +0 -5
  349. /package/src/__tests__/{memory-context-benchmark.test.ts → memory-context-benchmark.benchmark.test.ts} +0 -0
@@ -23,7 +23,7 @@ mock.module('../util/logger.js', () => ({
23
23
  }),
24
24
  }));
25
25
 
26
- import { initializeDb, getDb } from '../memory/db.js';
26
+ import { initializeDb, getDb, resetDb } from '../memory/db.js';
27
27
  import { conversations } from '../memory/schema.js';
28
28
  import {
29
29
  createCallSession,
@@ -37,11 +37,15 @@ import {
37
37
  getPendingQuestion,
38
38
  answerPendingQuestion,
39
39
  expirePendingQuestions,
40
+ claimCallback,
41
+ releaseCallbackClaim,
42
+ finalizeCallbackClaim,
40
43
  } from '../calls/call-store.js';
41
44
 
42
45
  initializeDb();
43
46
 
44
47
  afterAll(() => {
48
+ resetDb();
45
49
  try { rmSync(testDir, { recursive: true }); } catch { /* best effort */ }
46
50
  });
47
51
 
@@ -65,6 +69,7 @@ function resetTables() {
65
69
  db.run('DELETE FROM call_pending_questions');
66
70
  db.run('DELETE FROM call_events');
67
71
  db.run('DELETE FROM call_sessions');
72
+ db.run('DELETE FROM processed_callbacks');
68
73
  db.run('DELETE FROM conversations');
69
74
  ensuredConvIds = new Set();
70
75
  }
@@ -473,4 +478,214 @@ describe('call-store', () => {
473
478
  const q1Row = raw.query('SELECT status FROM call_pending_questions WHERE id = ?').get(q1.id) as { status: string };
474
479
  expect(q1Row.status).toBe('answered');
475
480
  });
481
+
482
+ // ── Callback Claim ──────────────────────────────────────────────
483
+
484
+ test('claimCallback returns a claim ID on first call', () => {
485
+ const session = createTestCallSession({
486
+ conversationId: 'conv-22',
487
+ provider: 'twilio',
488
+ fromNumber: '+15551111111',
489
+ toNumber: '+15552222222',
490
+ });
491
+
492
+ const result = claimCallback('test-dedupe-key-1', session.id);
493
+ expect(result).toBeTypeOf('string');
494
+ expect(result!.length).toBeGreaterThan(0);
495
+ });
496
+
497
+ test('claimCallback returns null on duplicate key', () => {
498
+ const session = createTestCallSession({
499
+ conversationId: 'conv-23',
500
+ provider: 'twilio',
501
+ fromNumber: '+15551111111',
502
+ toNumber: '+15552222222',
503
+ });
504
+
505
+ const first = claimCallback('test-dedupe-key-2', session.id);
506
+ const second = claimCallback('test-dedupe-key-2', session.id);
507
+
508
+ expect(first).toBeTypeOf('string');
509
+ expect(second).toBeNull();
510
+ });
511
+
512
+ test('releaseCallbackClaim allows re-claim', () => {
513
+ const session = createTestCallSession({
514
+ conversationId: 'conv-24',
515
+ provider: 'twilio',
516
+ fromNumber: '+15551111111',
517
+ toNumber: '+15552222222',
518
+ });
519
+
520
+ const first = claimCallback('test-dedupe-key-3', session.id);
521
+ expect(first).toBeTypeOf('string');
522
+
523
+ releaseCallbackClaim('test-dedupe-key-3', first!);
524
+
525
+ const second = claimCallback('test-dedupe-key-3', session.id);
526
+ expect(second).toBeTypeOf('string');
527
+ });
528
+
529
+ test('releaseCallbackClaim with wrong claimId does not release', () => {
530
+ const session = createTestCallSession({
531
+ conversationId: 'conv-24b',
532
+ provider: 'twilio',
533
+ fromNumber: '+15551111111',
534
+ toNumber: '+15552222222',
535
+ });
536
+
537
+ const claimId = claimCallback('test-dedupe-key-3b', session.id);
538
+ expect(claimId).toBeTypeOf('string');
539
+
540
+ // Attempt to release with a wrong claim ID — should be a no-op
541
+ releaseCallbackClaim('test-dedupe-key-3b', 'wrong-claim-id');
542
+
543
+ // The claim should still be held, so re-claiming should fail
544
+ const second = claimCallback('test-dedupe-key-3b', session.id);
545
+ expect(second).toBeNull();
546
+ });
547
+
548
+ test('claimCallback INSERT OR IGNORE pattern is safe for same key', () => {
549
+ const session = createTestCallSession({
550
+ conversationId: 'conv-25',
551
+ provider: 'twilio',
552
+ fromNumber: '+15551111111',
553
+ toNumber: '+15552222222',
554
+ });
555
+
556
+ // Claim the key
557
+ const first = claimCallback('test-dedupe-key-4', session.id);
558
+ expect(first).toBeTypeOf('string');
559
+
560
+ // Subsequent claims with the same key should all return null without throwing
561
+ expect(claimCallback('test-dedupe-key-4', session.id)).toBeNull();
562
+ expect(claimCallback('test-dedupe-key-4', session.id)).toBeNull();
563
+
564
+ // Only one row should exist in the table for this key
565
+ const raw = (getDb() as unknown as { $client: import('bun:sqlite').Database }).$client;
566
+ const rows = raw.query('SELECT COUNT(*) as cnt FROM processed_callbacks WHERE dedupe_key = ?').get('test-dedupe-key-4') as { cnt: number };
567
+ expect(rows.cnt).toBe(1);
568
+ });
569
+
570
+ test('claimCallback reclaims expired orphaned claims', () => {
571
+ const session = createTestCallSession({
572
+ conversationId: 'conv-26',
573
+ provider: 'twilio',
574
+ fromNumber: '+15551111111',
575
+ toNumber: '+15552222222',
576
+ });
577
+
578
+ // Claim the key
579
+ const first = claimCallback('test-dedupe-key-expired', session.id);
580
+ expect(first).toBeTypeOf('string');
581
+
582
+ // Simulate an orphaned claim by backdating the created_at to well past expiry
583
+ const raw = (getDb() as unknown as { $client: import('bun:sqlite').Database }).$client;
584
+ const oldTimestamp = Date.now() - 120_000; // 2 minutes ago, well past 60s expiry
585
+ raw.query('UPDATE processed_callbacks SET created_at = ? WHERE dedupe_key = ?').run(oldTimestamp, 'test-dedupe-key-expired');
586
+
587
+ // Reclaim should succeed because the old claim has expired
588
+ const second = claimCallback('test-dedupe-key-expired', session.id);
589
+ expect(second).toBeTypeOf('string');
590
+
591
+ // The new claim should have a different claim ID
592
+ expect(second).not.toBe(first);
593
+ });
594
+
595
+ test('claimCallback does not reclaim finalized claims', () => {
596
+ const session = createTestCallSession({
597
+ conversationId: 'conv-27',
598
+ provider: 'twilio',
599
+ fromNumber: '+15551111111',
600
+ toNumber: '+15552222222',
601
+ });
602
+
603
+ // Claim and finalize
604
+ const first = claimCallback('test-dedupe-key-finalized', session.id);
605
+ expect(first).toBeTypeOf('string');
606
+ finalizeCallbackClaim('test-dedupe-key-finalized', first!);
607
+
608
+ // Attempting to reclaim a finalized key should fail because the far-future
609
+ // timestamp means it will never be considered expired
610
+ const second = claimCallback('test-dedupe-key-finalized', session.id);
611
+ expect(second).toBeNull();
612
+ });
613
+
614
+ test('finalizeCallbackClaim makes claim permanent', () => {
615
+ const session = createTestCallSession({
616
+ conversationId: 'conv-28',
617
+ provider: 'twilio',
618
+ fromNumber: '+15551111111',
619
+ toNumber: '+15552222222',
620
+ });
621
+
622
+ // Claim and finalize
623
+ const claimId = claimCallback('test-dedupe-key-permanent', session.id)!;
624
+ finalizeCallbackClaim('test-dedupe-key-permanent', claimId);
625
+
626
+ // Verify the created_at is set far in the future
627
+ const raw = (getDb() as unknown as { $client: import('bun:sqlite').Database }).$client;
628
+ const row = raw.query('SELECT created_at FROM processed_callbacks WHERE dedupe_key = ?').get('test-dedupe-key-permanent') as { created_at: number };
629
+ // Should be at least 50 years in the future from now
630
+ const fiftyYearsMs = 50 * 365 * 24 * 60 * 60 * 1000;
631
+ expect(row.created_at).toBeGreaterThan(Date.now() + fiftyYearsMs);
632
+ });
633
+
634
+ test('finalizeCallbackClaim with wrong claimId does not finalize', () => {
635
+ const session = createTestCallSession({
636
+ conversationId: 'conv-28b',
637
+ provider: 'twilio',
638
+ fromNumber: '+15551111111',
639
+ toNumber: '+15552222222',
640
+ });
641
+
642
+ // Claim the key
643
+ const claimId = claimCallback('test-dedupe-key-permanent-b', session.id)!;
644
+ expect(claimId).toBeTypeOf('string');
645
+
646
+ // Try to finalize with wrong claimId — should be a no-op
647
+ finalizeCallbackClaim('test-dedupe-key-permanent-b', 'wrong-claim-id');
648
+
649
+ // Verify the created_at was NOT set to far-future (it should still be close to now)
650
+ const raw = (getDb() as unknown as { $client: import('bun:sqlite').Database }).$client;
651
+ const row = raw.query('SELECT created_at FROM processed_callbacks WHERE dedupe_key = ?').get('test-dedupe-key-permanent-b') as { created_at: number };
652
+ const oneMinuteMs = 60 * 1000;
653
+ expect(row.created_at).toBeLessThan(Date.now() + oneMinuteMs);
654
+ });
655
+
656
+ test('handler A cannot release handler B claim after reclaim', () => {
657
+ const session = createTestCallSession({
658
+ conversationId: 'conv-29',
659
+ provider: 'twilio',
660
+ fromNumber: '+15551111111',
661
+ toNumber: '+15552222222',
662
+ });
663
+
664
+ // Handler A claims
665
+ const claimA = claimCallback('test-dedupe-key-ownership', session.id)!;
666
+ expect(claimA).toBeTypeOf('string');
667
+
668
+ // Simulate handler A taking too long: backdate the claim so it expires
669
+ const raw = (getDb() as unknown as { $client: import('bun:sqlite').Database }).$client;
670
+ const oldTimestamp = Date.now() - 120_000;
671
+ raw.query('UPDATE processed_callbacks SET created_at = ? WHERE dedupe_key = ?').run(oldTimestamp, 'test-dedupe-key-ownership');
672
+
673
+ // Handler B reclaims (succeeds because the old claim expired)
674
+ const claimB = claimCallback('test-dedupe-key-ownership', session.id)!;
675
+ expect(claimB).toBeTypeOf('string');
676
+ expect(claimB).not.toBe(claimA);
677
+
678
+ // Handler B finalizes
679
+ finalizeCallbackClaim('test-dedupe-key-ownership', claimB);
680
+
681
+ // Handler A tries to release using its old claimId — should be a no-op
682
+ releaseCallbackClaim('test-dedupe-key-ownership', claimA);
683
+
684
+ // Verify B's finalized claim is still intact
685
+ const row = raw.query('SELECT created_at, claim_id FROM processed_callbacks WHERE dedupe_key = ?').get('test-dedupe-key-ownership') as { created_at: number; claim_id: string };
686
+ expect(row).not.toBeNull();
687
+ expect(row.claim_id).toBe(claimB);
688
+ const fiftyYearsMs = 50 * 365 * 24 * 60 * 60 * 1000;
689
+ expect(row.created_at).toBeGreaterThan(Date.now() + fiftyYearsMs);
690
+ });
476
691
  });
@@ -71,7 +71,7 @@ describe('cliDiscoverTool', () => {
71
71
  expect(result.isError).toBe(false);
72
72
  // Should at least find git which is nearly universally available
73
73
  expect(result.content).toContain('**git**');
74
- }, 30_000);
74
+ }, 60_000);
75
75
 
76
76
  test('includes version info for found CLIs', async () => {
77
77
  const result = await cliDiscoverTool.execute(
@@ -3,12 +3,13 @@ import { mkdirSync, rmSync, writeFileSync, existsSync } from 'node:fs';
3
3
  import { join } from 'node:path';
4
4
  import { tmpdir } from 'node:os';
5
5
  import { execFileSync } from 'node:child_process';
6
+ import type { CommitContext } from '../workspace/commit-message-provider.js';
7
+
6
8
  import {
7
9
  CommitEnrichmentService,
8
10
  _resetEnrichmentService,
9
11
  } from '../workspace/commit-message-enrichment-service.js';
10
12
  import { WorkspaceGitService, _resetGitServiceRegistry } from '../workspace/git-service.js';
11
- import type { CommitContext } from '../workspace/commit-message-provider.js';
12
13
 
13
14
  describe('CommitEnrichmentService', () => {
14
15
  let testDir: string;
@@ -48,6 +49,16 @@ describe('CommitEnrichmentService', () => {
48
49
  return await gitService.getHeadHash();
49
50
  }
50
51
 
52
+ async function waitForDrain(service: CommitEnrichmentService, timeoutMs = 5000): Promise<void> {
53
+ const started = Date.now();
54
+ while (service._getQueueSize() > 0 || service._getActiveWorkers() > 0) {
55
+ if (Date.now() - started > timeoutMs) {
56
+ throw new Error(`Timed out waiting for enrichment queue to drain after ${timeoutMs}ms`);
57
+ }
58
+ await new Promise(resolve => setTimeout(resolve, 50));
59
+ }
60
+ }
61
+
51
62
  test('enqueue and execute writes git note on success', async () => {
52
63
  const commitHash = await createCommit();
53
64
  const service = new CommitEnrichmentService({
@@ -280,9 +291,7 @@ describe('CommitEnrichmentService', () => {
280
291
  });
281
292
 
282
293
  // Wait for queue to drain before shutdown (avoids discarding pending jobs)
283
- while (service._getQueueSize() > 0 || service._getActiveWorkers() > 0) {
284
- await new Promise(resolve => setTimeout(resolve, 50));
285
- }
294
+ await waitForDrain(service, 5000);
286
295
  await service.shutdown();
287
296
 
288
297
  // Both notes should exist
@@ -318,9 +327,7 @@ describe('CommitEnrichmentService', () => {
318
327
  // Wait for all retries to complete (initial + 2 retries, with backoff)
319
328
  // Backoff: 1s after attempt 1, 2s after attempt 2 = ~3s total
320
329
  // But since the job itself is very fast to time out, total time is dominated by backoff
321
- while (service._getActiveWorkers() > 0 || service._getQueueSize() > 0) {
322
- await new Promise(resolve => setTimeout(resolve, 100));
323
- }
330
+ await waitForDrain(service, 10000);
324
331
  await service.shutdown();
325
332
 
326
333
  // After 1 initial attempt + 2 retries (3 total), the job should be counted as failed
@@ -373,6 +380,140 @@ describe('CommitEnrichmentService', () => {
373
380
  expect(service._getDroppedCount()).toBe(4);
374
381
  });
375
382
 
383
+ test('timed-out enrichment work is cancelled via AbortSignal', async () => {
384
+ // Track whether the slow enrichment work actually ran to completion
385
+ let enrichmentCompleted = false;
386
+ const commitHash = await createCommit();
387
+
388
+ const service = new CommitEnrichmentService({
389
+ maxQueueSize: 10,
390
+ maxConcurrency: 1,
391
+ jobTimeoutMs: 50, // Very short timeout
392
+ maxRetries: 0,
393
+ });
394
+
395
+ // Monkey-patch writeNote to simulate slow work that respects the abort signal.
396
+ // The real writeNote now passes the signal to execFileAsync which kills the
397
+ // child process on abort. This mock replicates that behavior by rejecting
398
+ // when the signal fires.
399
+ const originalWriteNote = gitService.writeNote.bind(gitService);
400
+ gitService.writeNote = async (_hash: string, _note: string, signal?: AbortSignal) => {
401
+ // Simulate slow work that is cancellable via AbortSignal
402
+ await new Promise<void>((resolve, reject) => {
403
+ const timer = setTimeout(() => {
404
+ enrichmentCompleted = true;
405
+ resolve();
406
+ }, 2000);
407
+ signal?.addEventListener('abort', () => {
408
+ clearTimeout(timer);
409
+ reject(new Error('aborted'));
410
+ }, { once: true });
411
+ });
412
+ };
413
+
414
+ service.enqueue({
415
+ workspaceDir: testDir,
416
+ commitHash,
417
+ context: makeContext(),
418
+ gitService,
419
+ });
420
+
421
+ await waitForDrain(service, 5000);
422
+ await service.shutdown();
423
+
424
+ // Allow any zombie work to settle — if abort didn't work, the 2s timer
425
+ // would still be running and would set enrichmentCompleted=true. Wait
426
+ // longer than the 2000ms mock delay to reliably catch the regression.
427
+ await new Promise(resolve => setTimeout(resolve, 2500));
428
+
429
+ // The job should have timed out and been counted as failed
430
+ expect(service._getFailedCount()).toBe(1);
431
+ expect(service._getSucceededCount()).toBe(0);
432
+ // The slow enrichment work should NOT have completed since the signal was aborted
433
+ expect(enrichmentCompleted).toBe(false);
434
+
435
+ // Restore original
436
+ gitService.writeNote = originalWriteNote;
437
+ });
438
+
439
+ test('shutdown does not hang on timed-out jobs', async () => {
440
+ const commitHash = await createCommit();
441
+
442
+ const service = new CommitEnrichmentService({
443
+ maxQueueSize: 10,
444
+ maxConcurrency: 1,
445
+ jobTimeoutMs: 50, // Short timeout
446
+ maxRetries: 0,
447
+ });
448
+
449
+ // Make writeNote artificially slow so the job will always time out.
450
+ // The mock respects the abort signal so the subprocess is killed on timeout.
451
+ const originalWriteNote = gitService.writeNote.bind(gitService);
452
+ gitService.writeNote = async (_hash: string, _note: string, signal?: AbortSignal) => {
453
+ await new Promise<void>((resolve, reject) => {
454
+ const timer = setTimeout(resolve, 5000);
455
+ signal?.addEventListener('abort', () => {
456
+ clearTimeout(timer);
457
+ reject(new Error('aborted'));
458
+ }, { once: true });
459
+ });
460
+ };
461
+
462
+ service.enqueue({
463
+ workspaceDir: testDir,
464
+ commitHash,
465
+ context: makeContext(),
466
+ gitService,
467
+ });
468
+
469
+ // Shutdown should complete promptly, not hang for 5s waiting on the slow writeNote
470
+ const shutdownStart = Date.now();
471
+ await service.shutdown();
472
+ const shutdownElapsed = Date.now() - shutdownStart;
473
+
474
+ // Shutdown should complete well under the 5s slow-work duration
475
+ expect(shutdownElapsed).toBeLessThan(3000);
476
+ expect(service._getFailedCount()).toBe(1);
477
+
478
+ gitService.writeNote = originalWriteNote;
479
+ }, 10000);
480
+
481
+ test('abort signal is triggered on non-timeout errors before retry', async () => {
482
+ const commitHash = await createCommit();
483
+
484
+ const service = new CommitEnrichmentService({
485
+ maxQueueSize: 10,
486
+ maxConcurrency: 1,
487
+ jobTimeoutMs: 5000,
488
+ maxRetries: 0,
489
+ });
490
+
491
+ // Make writeNote throw an error and observe whether the signal gets aborted
492
+ const originalWriteNote = gitService.writeNote.bind(gitService);
493
+ gitService.writeNote = async (_hash: string, _note: string) => {
494
+ // Set up a listener on the abort controller's signal to track abortion.
495
+ // We access the signal indirectly by throwing, which triggers the catch
496
+ // block in executeJob where controller.abort() is called.
497
+ throw new Error('Simulated writeNote failure');
498
+ };
499
+
500
+ service.enqueue({
501
+ workspaceDir: testDir,
502
+ commitHash,
503
+ context: makeContext(),
504
+ gitService,
505
+ });
506
+
507
+ await waitForDrain(service, 5000);
508
+ await service.shutdown();
509
+
510
+ // The job should have failed (no retries configured)
511
+ expect(service._getFailedCount()).toBe(1);
512
+ expect(service._getSucceededCount()).toBe(0);
513
+
514
+ gitService.writeNote = originalWriteNote;
515
+ });
516
+
376
517
  test('enqueue is fire-and-forget and never throws even when called rapidly', async () => {
377
518
  const service = new CommitEnrichmentService({
378
519
  maxQueueSize: 3,
@@ -0,0 +1,176 @@
1
+ /**
2
+ * Context Window Compaction Benchmark
3
+ *
4
+ * Measures compaction cost with a mock provider:
5
+ * - compaction latency under threshold pressure
6
+ * - no-op fast path for below-threshold histories
7
+ * - token reduction ratio after compaction
8
+ * - summary call count within expected range
9
+ * - severe pressure overriding cooldown
10
+ */
11
+ import { describe, expect, mock, test } from 'bun:test';
12
+
13
+ import { DEFAULT_CONFIG } from '../config/defaults.js';
14
+ import { ContextWindowManager } from '../context/window-manager.js';
15
+ import { estimatePromptTokens } from '../context/token-estimator.js';
16
+ import type { Message, Provider } from '../providers/types.js';
17
+
18
+ mock.module('../util/logger.js', () => ({
19
+ getLogger: () =>
20
+ new Proxy({} as Record<string, unknown>, { get: () => () => {} }),
21
+ }));
22
+
23
+ function makeSummaryProvider(counter: { calls: number }): Provider {
24
+ return {
25
+ name: 'mock',
26
+ async sendMessage() {
27
+ counter.calls += 1;
28
+ return {
29
+ content: [
30
+ {
31
+ type: 'text',
32
+ text: `## Goals\n- Preserve state\n## Constraints\n- Keep PRs small\n## Decisions\n- Call ${counter.calls}`,
33
+ },
34
+ ],
35
+ model: 'mock-model',
36
+ usage: { inputTokens: 420, outputTokens: 85 },
37
+ stopReason: 'end_turn',
38
+ };
39
+ },
40
+ };
41
+ }
42
+
43
+ function makeLongMessages(turns: number): Message[] {
44
+ const rows: Message[] = [];
45
+ for (let i = 0; i < turns; i++) {
46
+ rows.push({
47
+ role: 'user',
48
+ content: [
49
+ {
50
+ type: 'text',
51
+ text: `[U${i}] User message with enough content to estimate tokens. Topic ${i % 9}.`,
52
+ },
53
+ ],
54
+ });
55
+ rows.push({
56
+ role: 'assistant',
57
+ content: [
58
+ {
59
+ type: 'text',
60
+ text: `[A${i}] Assistant response with relevant content. Result ${i % 7}.`,
61
+ },
62
+ ],
63
+ });
64
+ }
65
+ return rows;
66
+ }
67
+
68
+ function makeConfig() {
69
+ return {
70
+ ...DEFAULT_CONFIG.contextWindow,
71
+ maxInputTokens: 6000,
72
+ targetInputTokens: 3200,
73
+ compactThreshold: 0.6,
74
+ preserveRecentUserTurns: 8,
75
+ chunkTokens: 1200,
76
+ };
77
+ }
78
+
79
+ describe('Compaction benchmark', () => {
80
+ test('compaction with mock provider completes under 500ms', async () => {
81
+ const counter = { calls: 0 };
82
+ const provider = makeSummaryProvider(counter);
83
+ const config = makeConfig();
84
+ const manager = new ContextWindowManager(provider, 'system prompt', config);
85
+
86
+ // 90 turns = 180 messages, well above 60% of 6000 = 3600 threshold
87
+ const messages = makeLongMessages(90);
88
+ const before = estimatePromptTokens(messages, 'system prompt', {
89
+ providerName: 'mock',
90
+ });
91
+ expect(before).toBeGreaterThan(config.maxInputTokens * config.compactThreshold);
92
+
93
+ const start = performance.now();
94
+ const result = await manager.maybeCompact(messages);
95
+ const elapsed = performance.now() - start;
96
+
97
+ expect(result.compacted).toBe(true);
98
+ expect(elapsed).toBeLessThan(500);
99
+ });
100
+
101
+ test('below-threshold check returns in under 50ms (no-op)', async () => {
102
+ const counter = { calls: 0 };
103
+ const provider = makeSummaryProvider(counter);
104
+ const config = makeConfig();
105
+ const manager = new ContextWindowManager(provider, 'system prompt', config);
106
+
107
+ // 3 turns = 6 messages, well below threshold
108
+ const messages = makeLongMessages(3);
109
+
110
+ const start = performance.now();
111
+ const result = await manager.maybeCompact(messages);
112
+ const elapsed = performance.now() - start;
113
+
114
+ expect(result.compacted).toBe(false);
115
+ expect(result.reason).toBe('below compaction threshold');
116
+ expect(elapsed).toBeLessThan(50);
117
+ expect(counter.calls).toBe(0);
118
+ });
119
+
120
+ test('token reduction ratio exceeds 30% after compaction', async () => {
121
+ const counter = { calls: 0 };
122
+ const provider = makeSummaryProvider(counter);
123
+ const config = makeConfig();
124
+ const manager = new ContextWindowManager(provider, 'system prompt', config);
125
+
126
+ const messages = makeLongMessages(90);
127
+ const result = await manager.maybeCompact(messages);
128
+
129
+ expect(result.compacted).toBe(true);
130
+ const reductionRatio =
131
+ (result.previousEstimatedInputTokens - result.estimatedInputTokens) /
132
+ result.previousEstimatedInputTokens;
133
+ expect(reductionRatio).toBeGreaterThan(0.3);
134
+ });
135
+
136
+ test('summary calls fall within 2-6 range', async () => {
137
+ const counter = { calls: 0 };
138
+ const provider = makeSummaryProvider(counter);
139
+ const config = makeConfig();
140
+ const manager = new ContextWindowManager(provider, 'system prompt', config);
141
+
142
+ const messages = makeLongMessages(90);
143
+ const result = await manager.maybeCompact(messages);
144
+
145
+ expect(result.compacted).toBe(true);
146
+ expect(result.summaryCalls).toBeGreaterThanOrEqual(2);
147
+ expect(result.summaryCalls).toBeLessThanOrEqual(6);
148
+ expect(result.summaryCalls).toBe(counter.calls);
149
+ });
150
+
151
+ test('severe pressure triggers compaction even during cooldown', async () => {
152
+ const counter = { calls: 0 };
153
+ const provider = makeSummaryProvider(counter);
154
+ // Use a tighter maxInputTokens so 90 turns exceeds the 95% severe threshold
155
+ const config = {
156
+ ...makeConfig(),
157
+ maxInputTokens: 4000,
158
+ targetInputTokens: 2000,
159
+ };
160
+ const manager = new ContextWindowManager(provider, 'system prompt', config);
161
+
162
+ const messages = makeLongMessages(90);
163
+ const estimated = estimatePromptTokens(messages, 'system prompt', {
164
+ providerName: 'mock',
165
+ });
166
+ expect(estimated).toBeGreaterThan(config.maxInputTokens * 0.95);
167
+
168
+ // Simulate being within cooldown by setting lastCompactedAt to now
169
+ const result = await manager.maybeCompact(messages, undefined, {
170
+ lastCompactedAt: Date.now(),
171
+ });
172
+
173
+ expect(result.compacted).toBe(true);
174
+ expect(result.summaryCalls).toBeGreaterThan(0);
175
+ });
176
+ });