opencastle 0.26.1 → 0.27.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. package/README.md +7 -1
  2. package/bin/cli.mjs +10 -0
  3. package/dist/cli/agents.d.ts +3 -0
  4. package/dist/cli/agents.d.ts.map +1 -0
  5. package/dist/cli/agents.js +161 -0
  6. package/dist/cli/agents.js.map +1 -0
  7. package/dist/cli/baselines.d.ts +3 -0
  8. package/dist/cli/baselines.d.ts.map +1 -0
  9. package/dist/cli/baselines.js +128 -0
  10. package/dist/cli/baselines.js.map +1 -0
  11. package/dist/cli/convoy/engine.d.ts +68 -2
  12. package/dist/cli/convoy/engine.d.ts.map +1 -1
  13. package/dist/cli/convoy/engine.js +2102 -26
  14. package/dist/cli/convoy/engine.js.map +1 -1
  15. package/dist/cli/convoy/engine.test.js +1572 -70
  16. package/dist/cli/convoy/engine.test.js.map +1 -1
  17. package/dist/cli/convoy/events.d.ts +4 -1
  18. package/dist/cli/convoy/events.d.ts.map +1 -1
  19. package/dist/cli/convoy/events.js +74 -13
  20. package/dist/cli/convoy/events.js.map +1 -1
  21. package/dist/cli/convoy/events.test.js +154 -27
  22. package/dist/cli/convoy/events.test.js.map +1 -1
  23. package/dist/cli/convoy/expertise.d.ts +16 -0
  24. package/dist/cli/convoy/expertise.d.ts.map +1 -0
  25. package/dist/cli/convoy/expertise.js +121 -0
  26. package/dist/cli/convoy/expertise.js.map +1 -0
  27. package/dist/cli/convoy/expertise.test.d.ts +2 -0
  28. package/dist/cli/convoy/expertise.test.d.ts.map +1 -0
  29. package/dist/cli/convoy/expertise.test.js +96 -0
  30. package/dist/cli/convoy/expertise.test.js.map +1 -0
  31. package/dist/cli/convoy/export.test.js +1 -0
  32. package/dist/cli/convoy/export.test.js.map +1 -1
  33. package/dist/cli/convoy/formula.d.ts +19 -0
  34. package/dist/cli/convoy/formula.d.ts.map +1 -0
  35. package/dist/cli/convoy/formula.js +142 -0
  36. package/dist/cli/convoy/formula.js.map +1 -0
  37. package/dist/cli/convoy/formula.test.d.ts +2 -0
  38. package/dist/cli/convoy/formula.test.d.ts.map +1 -0
  39. package/dist/cli/convoy/formula.test.js +342 -0
  40. package/dist/cli/convoy/formula.test.js.map +1 -0
  41. package/dist/cli/convoy/gates.d.ts +128 -0
  42. package/dist/cli/convoy/gates.d.ts.map +1 -0
  43. package/dist/cli/convoy/gates.js +606 -0
  44. package/dist/cli/convoy/gates.js.map +1 -0
  45. package/dist/cli/convoy/gates.test.d.ts +2 -0
  46. package/dist/cli/convoy/gates.test.d.ts.map +1 -0
  47. package/dist/cli/convoy/gates.test.js +976 -0
  48. package/dist/cli/convoy/gates.test.js.map +1 -0
  49. package/dist/cli/convoy/health.d.ts +11 -0
  50. package/dist/cli/convoy/health.d.ts.map +1 -1
  51. package/dist/cli/convoy/health.js +54 -0
  52. package/dist/cli/convoy/health.js.map +1 -1
  53. package/dist/cli/convoy/health.test.js +56 -1
  54. package/dist/cli/convoy/health.test.js.map +1 -1
  55. package/dist/cli/convoy/issues.d.ts +8 -0
  56. package/dist/cli/convoy/issues.d.ts.map +1 -0
  57. package/dist/cli/convoy/issues.js +98 -0
  58. package/dist/cli/convoy/issues.js.map +1 -0
  59. package/dist/cli/convoy/issues.test.d.ts +2 -0
  60. package/dist/cli/convoy/issues.test.d.ts.map +1 -0
  61. package/dist/cli/convoy/issues.test.js +107 -0
  62. package/dist/cli/convoy/issues.test.js.map +1 -0
  63. package/dist/cli/convoy/knowledge.d.ts +5 -0
  64. package/dist/cli/convoy/knowledge.d.ts.map +1 -0
  65. package/dist/cli/convoy/knowledge.js +116 -0
  66. package/dist/cli/convoy/knowledge.js.map +1 -0
  67. package/dist/cli/convoy/knowledge.test.d.ts +2 -0
  68. package/dist/cli/convoy/knowledge.test.d.ts.map +1 -0
  69. package/dist/cli/convoy/knowledge.test.js +87 -0
  70. package/dist/cli/convoy/knowledge.test.js.map +1 -0
  71. package/dist/cli/convoy/lessons.d.ts +17 -0
  72. package/dist/cli/convoy/lessons.d.ts.map +1 -0
  73. package/dist/cli/convoy/lessons.js +149 -0
  74. package/dist/cli/convoy/lessons.js.map +1 -0
  75. package/dist/cli/convoy/lessons.test.d.ts +2 -0
  76. package/dist/cli/convoy/lessons.test.d.ts.map +1 -0
  77. package/dist/cli/convoy/lessons.test.js +135 -0
  78. package/dist/cli/convoy/lessons.test.js.map +1 -0
  79. package/dist/cli/convoy/lock.d.ts +13 -0
  80. package/dist/cli/convoy/lock.d.ts.map +1 -0
  81. package/dist/cli/convoy/lock.js +88 -0
  82. package/dist/cli/convoy/lock.js.map +1 -0
  83. package/dist/cli/convoy/lock.test.d.ts +2 -0
  84. package/dist/cli/convoy/lock.test.d.ts.map +1 -0
  85. package/dist/cli/convoy/lock.test.js +136 -0
  86. package/dist/cli/convoy/lock.test.js.map +1 -0
  87. package/dist/cli/convoy/merge.d.ts +4 -0
  88. package/dist/cli/convoy/merge.d.ts.map +1 -1
  89. package/dist/cli/convoy/merge.js +18 -1
  90. package/dist/cli/convoy/merge.js.map +1 -1
  91. package/dist/cli/convoy/merge.test.js +6 -7
  92. package/dist/cli/convoy/merge.test.js.map +1 -1
  93. package/dist/cli/convoy/partition.d.ts +51 -0
  94. package/dist/cli/convoy/partition.d.ts.map +1 -0
  95. package/dist/cli/convoy/partition.js +186 -0
  96. package/dist/cli/convoy/partition.js.map +1 -0
  97. package/dist/cli/convoy/partition.test.d.ts +2 -0
  98. package/dist/cli/convoy/partition.test.d.ts.map +1 -0
  99. package/dist/cli/convoy/partition.test.js +315 -0
  100. package/dist/cli/convoy/partition.test.js.map +1 -0
  101. package/dist/cli/convoy/pipeline.test.js +6 -0
  102. package/dist/cli/convoy/pipeline.test.js.map +1 -1
  103. package/dist/cli/convoy/store.d.ts +47 -5
  104. package/dist/cli/convoy/store.d.ts.map +1 -1
  105. package/dist/cli/convoy/store.js +525 -19
  106. package/dist/cli/convoy/store.js.map +1 -1
  107. package/dist/cli/convoy/store.test.js +1345 -12
  108. package/dist/cli/convoy/store.test.js.map +1 -1
  109. package/dist/cli/convoy/types.d.ts +156 -2
  110. package/dist/cli/convoy/types.d.ts.map +1 -1
  111. package/dist/cli/destroy.d.ts +3 -0
  112. package/dist/cli/destroy.d.ts.map +1 -0
  113. package/dist/cli/destroy.js +69 -0
  114. package/dist/cli/destroy.js.map +1 -0
  115. package/dist/cli/destroy.test.d.ts +2 -0
  116. package/dist/cli/destroy.test.d.ts.map +1 -0
  117. package/dist/cli/destroy.test.js +116 -0
  118. package/dist/cli/destroy.test.js.map +1 -0
  119. package/dist/cli/gitignore.d.ts +9 -0
  120. package/dist/cli/gitignore.d.ts.map +1 -1
  121. package/dist/cli/gitignore.js +29 -0
  122. package/dist/cli/gitignore.js.map +1 -1
  123. package/dist/cli/plan.d.ts +3 -0
  124. package/dist/cli/plan.d.ts.map +1 -0
  125. package/dist/cli/plan.js +288 -0
  126. package/dist/cli/plan.js.map +1 -0
  127. package/dist/cli/run/adapters/claude.d.ts +2 -0
  128. package/dist/cli/run/adapters/claude.d.ts.map +1 -1
  129. package/dist/cli/run/adapters/claude.js +89 -49
  130. package/dist/cli/run/adapters/claude.js.map +1 -1
  131. package/dist/cli/run/adapters/claude.test.d.ts +2 -0
  132. package/dist/cli/run/adapters/claude.test.d.ts.map +1 -0
  133. package/dist/cli/run/adapters/claude.test.js +205 -0
  134. package/dist/cli/run/adapters/claude.test.js.map +1 -0
  135. package/dist/cli/run/adapters/copilot.d.ts +1 -0
  136. package/dist/cli/run/adapters/copilot.d.ts.map +1 -1
  137. package/dist/cli/run/adapters/copilot.js +84 -46
  138. package/dist/cli/run/adapters/copilot.js.map +1 -1
  139. package/dist/cli/run/adapters/copilot.test.d.ts +2 -0
  140. package/dist/cli/run/adapters/copilot.test.d.ts.map +1 -0
  141. package/dist/cli/run/adapters/copilot.test.js +195 -0
  142. package/dist/cli/run/adapters/copilot.test.js.map +1 -0
  143. package/dist/cli/run/adapters/cursor.d.ts +1 -0
  144. package/dist/cli/run/adapters/cursor.d.ts.map +1 -1
  145. package/dist/cli/run/adapters/cursor.js +83 -47
  146. package/dist/cli/run/adapters/cursor.js.map +1 -1
  147. package/dist/cli/run/adapters/cursor.test.d.ts +2 -0
  148. package/dist/cli/run/adapters/cursor.test.d.ts.map +1 -0
  149. package/dist/cli/run/adapters/cursor.test.js +129 -0
  150. package/dist/cli/run/adapters/cursor.test.js.map +1 -0
  151. package/dist/cli/run/adapters/opencode.d.ts +1 -0
  152. package/dist/cli/run/adapters/opencode.d.ts.map +1 -1
  153. package/dist/cli/run/adapters/opencode.js +81 -47
  154. package/dist/cli/run/adapters/opencode.js.map +1 -1
  155. package/dist/cli/run/adapters/opencode.test.d.ts +2 -0
  156. package/dist/cli/run/adapters/opencode.test.d.ts.map +1 -0
  157. package/dist/cli/run/adapters/opencode.test.js +119 -0
  158. package/dist/cli/run/adapters/opencode.test.js.map +1 -0
  159. package/dist/cli/run/executor.js +1 -1
  160. package/dist/cli/run/executor.js.map +1 -1
  161. package/dist/cli/run/schema.d.ts.map +1 -1
  162. package/dist/cli/run/schema.js +245 -4
  163. package/dist/cli/run/schema.js.map +1 -1
  164. package/dist/cli/run/schema.test.js +669 -0
  165. package/dist/cli/run/schema.test.js.map +1 -1
  166. package/dist/cli/run.d.ts.map +1 -1
  167. package/dist/cli/run.js +362 -22
  168. package/dist/cli/run.js.map +1 -1
  169. package/dist/cli/types.d.ts +85 -2
  170. package/dist/cli/types.d.ts.map +1 -1
  171. package/dist/cli/types.js.map +1 -1
  172. package/dist/cli/watch.d.ts +15 -0
  173. package/dist/cli/watch.d.ts.map +1 -0
  174. package/dist/cli/watch.js +279 -0
  175. package/dist/cli/watch.js.map +1 -0
  176. package/package.json +1 -1
  177. package/src/cli/agents.ts +177 -0
  178. package/src/cli/baselines.ts +143 -0
  179. package/src/cli/convoy/engine.test.ts +1839 -70
  180. package/src/cli/convoy/engine.ts +2417 -38
  181. package/src/cli/convoy/events.test.ts +179 -38
  182. package/src/cli/convoy/events.ts +88 -16
  183. package/src/cli/convoy/expertise.test.ts +128 -0
  184. package/src/cli/convoy/expertise.ts +163 -0
  185. package/src/cli/convoy/export.test.ts +1 -0
  186. package/src/cli/convoy/formula.test.ts +405 -0
  187. package/src/cli/convoy/formula.ts +174 -0
  188. package/src/cli/convoy/gates.test.ts +1169 -0
  189. package/src/cli/convoy/gates.ts +774 -0
  190. package/src/cli/convoy/health.test.ts +64 -2
  191. package/src/cli/convoy/health.ts +80 -2
  192. package/src/cli/convoy/issues.test.ts +143 -0
  193. package/src/cli/convoy/issues.ts +136 -0
  194. package/src/cli/convoy/knowledge.test.ts +101 -0
  195. package/src/cli/convoy/knowledge.ts +132 -0
  196. package/src/cli/convoy/lessons.test.ts +188 -0
  197. package/src/cli/convoy/lessons.ts +164 -0
  198. package/src/cli/convoy/lock.test.ts +181 -0
  199. package/src/cli/convoy/lock.ts +103 -0
  200. package/src/cli/convoy/merge.test.ts +6 -7
  201. package/src/cli/convoy/merge.ts +19 -1
  202. package/src/cli/convoy/partition.test.ts +423 -0
  203. package/src/cli/convoy/partition.ts +232 -0
  204. package/src/cli/convoy/pipeline.test.ts +6 -0
  205. package/src/cli/convoy/store.test.ts +1512 -14
  206. package/src/cli/convoy/store.ts +676 -30
  207. package/src/cli/convoy/types.ts +170 -1
  208. package/src/cli/destroy.test.ts +141 -0
  209. package/src/cli/destroy.ts +88 -0
  210. package/src/cli/gitignore.ts +36 -0
  211. package/src/cli/plan.ts +316 -0
  212. package/src/cli/run/adapters/claude.test.ts +234 -0
  213. package/src/cli/run/adapters/claude.ts +45 -5
  214. package/src/cli/run/adapters/copilot.test.ts +224 -0
  215. package/src/cli/run/adapters/copilot.ts +34 -4
  216. package/src/cli/run/adapters/cursor.test.ts +144 -0
  217. package/src/cli/run/adapters/cursor.ts +33 -2
  218. package/src/cli/run/adapters/opencode.test.ts +135 -0
  219. package/src/cli/run/adapters/opencode.ts +30 -2
  220. package/src/cli/run/executor.ts +1 -1
  221. package/src/cli/run/schema.test.ts +758 -0
  222. package/src/cli/run/schema.ts +300 -25
  223. package/src/cli/run.ts +341 -21
  224. package/src/cli/types.ts +86 -1
  225. package/src/cli/watch.ts +298 -0
  226. package/src/dashboard/node_modules/.vite/deps/_metadata.json +6 -6
@@ -1,13 +1,17 @@
1
- import { mkdtempSync, rmSync } from 'node:fs'
1
+ import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'
2
2
  import { tmpdir } from 'node:os'
3
3
  import { join } from 'node:path'
4
4
  import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'
5
- import { createConvoyEngine } from './engine.js'
5
+ import { createConvoyEngine, evaluateReviewLevel, recoverNdjson, runConvoyGuard } from './engine.js'
6
+ import type { ConvoyEngineOptions, DiffStats } from './engine.js'
6
7
  import { createConvoyStore } from './store.js'
7
8
  import type { AgentAdapter, Task, TaskSpec, ExecuteResult, ExecuteOptions } from '../types.js'
8
9
  import type { WorktreeManager } from './worktree.js'
9
10
  import type { MergeQueue } from './merge.js'
11
+ import type { TaskRecord } from './types.js'
10
12
  import { getAdapter, detectAdapter } from '../run/adapters/index.js'
13
+ import * as gates from './gates.js'
14
+ import * as partition from './partition.js'
11
15
 
12
16
  // ── Mock NDJSON log writes ────────────────────────────────────────────────────
13
17
 
@@ -92,6 +96,15 @@ function makeSpec(
92
96
  }
93
97
  }
94
98
 
99
+ /** Wraps createConvoyEngine with a default no-op _ensureBranch mock so tests never
100
+ * run real git branch operations. Callers can override _ensureBranch if needed. */
101
+ function makeEngine(opts: ConvoyEngineOptions): ReturnType<typeof createConvoyEngine> {
102
+ return createConvoyEngine({
103
+ _ensureBranch: vi.fn().mockResolvedValue(undefined),
104
+ ...opts,
105
+ })
106
+ }
107
+
95
108
  // ── Test lifecycle ────────────────────────────────────────────────────────────
96
109
 
97
110
  let tmpDir: string
@@ -115,7 +128,7 @@ afterEach(() => {
115
128
  describe('single task success', () => {
116
129
  it('returns status done with summary.done=1', async () => {
117
130
  const adapter = makeAdapter()
118
- const engine = createConvoyEngine({
131
+ const engine = makeEngine({
119
132
  spec: makeSpec(),
120
133
  specYaml: 'name: test',
121
134
  adapter,
@@ -137,7 +150,7 @@ describe('single task success', () => {
137
150
 
138
151
  it('calls adapter.execute once with the correct task', async () => {
139
152
  const adapter = makeAdapter()
140
- const engine = createConvoyEngine({
153
+ const engine = makeEngine({
141
154
  spec: makeSpec(),
142
155
  specYaml: 'name: test',
143
156
  adapter,
@@ -161,7 +174,7 @@ describe('single task failure', () => {
161
174
  const adapter = makeAdapter()
162
175
  adapter.execute.mockResolvedValue({ success: false, output: 'boom', exitCode: 1 })
163
176
 
164
- const engine = createConvoyEngine({
177
+ const engine = makeEngine({
165
178
  spec: makeSpec({}, [{ id: 'task-1', max_retries: 0 }]),
166
179
  specYaml: 'name: test',
167
180
  adapter,
@@ -181,7 +194,7 @@ describe('single task failure', () => {
181
194
  const adapter = makeAdapter()
182
195
  adapter.execute.mockResolvedValue({ success: false, output: 'boom', exitCode: 1 })
183
196
 
184
- const engine = createConvoyEngine({
197
+ const engine = makeEngine({
185
198
  spec: makeSpec({}, [{ id: 'task-1', max_retries: 0 }]),
186
199
  specYaml: 'name: test',
187
200
  adapter,
@@ -211,7 +224,7 @@ describe('two-phase DAG (task-b depends on task-a)', () => {
211
224
  { id: 'task-a', depends_on: [] },
212
225
  { id: 'task-b', depends_on: ['task-a'] },
213
226
  ])
214
- const engine = createConvoyEngine({
227
+ const engine = makeEngine({
215
228
  spec,
216
229
  specYaml: 'name: test',
217
230
  adapter,
@@ -243,7 +256,7 @@ describe('two-phase DAG (task-b depends on task-a)', () => {
243
256
  { id: 'task-a', depends_on: [] },
244
257
  { id: 'task-b', depends_on: ['task-a'] },
245
258
  ])
246
- const engine = createConvoyEngine({
259
+ const engine = makeEngine({
247
260
  spec,
248
261
  specYaml: 'name: test',
249
262
  adapter,
@@ -278,7 +291,7 @@ describe('on_failure:continue', () => {
278
291
  { id: 'task-b', depends_on: ['task-a'] },
279
292
  { id: 'task-c', depends_on: [] },
280
293
  ])
281
- const engine = createConvoyEngine({
294
+ const engine = makeEngine({
282
295
  spec,
283
296
  specYaml: 'name: test',
284
297
  adapter,
@@ -317,7 +330,7 @@ describe('on_failure:continue', () => {
317
330
  { id: 'task-b', depends_on: ['task-a'] },
318
331
  { id: 'task-c', depends_on: ['task-b'] },
319
332
  ])
320
- const engine = createConvoyEngine({
333
+ const engine = makeEngine({
321
334
  spec,
322
335
  specYaml: 'name: test',
323
336
  adapter,
@@ -347,7 +360,7 @@ describe('on_failure:stop', () => {
347
360
  { id: 'task-b', depends_on: ['task-a'] },
348
361
  { id: 'task-c', depends_on: ['task-a'] },
349
362
  ])
350
- const engine = createConvoyEngine({
363
+ const engine = makeEngine({
351
364
  spec,
352
365
  specYaml: 'name: test',
353
366
  adapter,
@@ -377,7 +390,7 @@ describe('on_failure:stop', () => {
377
390
  adapter.execute.mockResolvedValue({ success: false, output: 'fail', exitCode: 1 })
378
391
 
379
392
  const spec = makeSpec({ on_failure: 'stop' }, [{ id: 'task-1', max_retries: 3 }])
380
- const engine = createConvoyEngine({
393
+ const engine = makeEngine({
381
394
  spec,
382
395
  specYaml: 'name: test',
383
396
  adapter,
@@ -410,7 +423,7 @@ describe('task retry', () => {
410
423
  })
411
424
 
412
425
  const spec = makeSpec({}, [{ id: 'task-1', max_retries: 1 }])
413
- const engine = createConvoyEngine({
426
+ const engine = makeEngine({
414
427
  spec,
415
428
  specYaml: 'name: test',
416
429
  adapter,
@@ -435,7 +448,7 @@ describe('task retry', () => {
435
448
  })
436
449
 
437
450
  const spec = makeSpec({}, [{ id: 'task-1', max_retries: 2 }])
438
- const engine = createConvoyEngine({
451
+ const engine = makeEngine({
439
452
  spec,
440
453
  specYaml: 'name: test',
441
454
  adapter,
@@ -459,7 +472,7 @@ describe('validation gates', () => {
459
472
  it('returns status done when all gates pass', async () => {
460
473
  const adapter = makeAdapter()
461
474
  const spec = makeSpec({ gates: ['echo gate-ok'] }, [{ id: 'task-1' }])
462
- const engine = createConvoyEngine({
475
+ const engine = makeEngine({
463
476
  spec,
464
477
  specYaml: 'name: test',
465
478
  adapter,
@@ -478,7 +491,7 @@ describe('validation gates', () => {
478
491
  it('returns status gate-failed when a gate exits non-zero', async () => {
479
492
  const adapter = makeAdapter()
480
493
  const spec = makeSpec({ gates: ['false'] }, [{ id: 'task-1' }])
481
- const engine = createConvoyEngine({
494
+ const engine = makeEngine({
482
495
  spec,
483
496
  specYaml: 'name: test',
484
497
  adapter,
@@ -496,7 +509,7 @@ describe('validation gates', () => {
496
509
 
497
510
  it('returns undefined gateResults when spec has no gates', async () => {
498
511
  const adapter = makeAdapter()
499
- const engine = createConvoyEngine({
512
+ const engine = makeEngine({
500
513
  spec: makeSpec(),
501
514
  specYaml: 'name: test',
502
515
  adapter,
@@ -513,7 +526,7 @@ describe('validation gates', () => {
513
526
  it('runs multiple gates and reports each result individually', async () => {
514
527
  const adapter = makeAdapter()
515
528
  const spec = makeSpec({ gates: ['echo first', 'false', 'echo third'] }, [{ id: 'task-1' }])
516
- const engine = createConvoyEngine({
529
+ const engine = makeEngine({
517
530
  spec,
518
531
  specYaml: 'name: test',
519
532
  adapter,
@@ -560,6 +573,7 @@ describe('resume (crash recovery)', () => {
560
573
  max_retries: 0,
561
574
  files: null,
562
575
  depends_on: null,
576
+ gates: null,
563
577
  })
564
578
  if (taskStatus === 'running') {
565
579
  seeder.insertWorker({
@@ -583,7 +597,7 @@ describe('resume (crash recovery)', () => {
583
597
 
584
598
  const adapter = makeAdapter()
585
599
  const wtManager = makeWorktreeManager()
586
- const engine = createConvoyEngine({
600
+ const engine = makeEngine({
587
601
  spec: makeSpec({}, [{ id: 'task-1' }]),
588
602
  specYaml: 'name: test',
589
603
  adapter,
@@ -606,7 +620,7 @@ describe('resume (crash recovery)', () => {
606
620
  seedCrashedConvoy(convoyId, 'assigned')
607
621
 
608
622
  const adapter = makeAdapter()
609
- const engine = createConvoyEngine({
623
+ const engine = makeEngine({
610
624
  spec: makeSpec({}, [{ id: 'task-1' }]),
611
625
  specYaml: 'name: test',
612
626
  adapter,
@@ -622,7 +636,7 @@ describe('resume (crash recovery)', () => {
622
636
 
623
637
  it('throws an error when the convoy is not found', async () => {
624
638
  const adapter = makeAdapter()
625
- const engine = createConvoyEngine({
639
+ const engine = makeEngine({
626
640
  spec: makeSpec(),
627
641
  specYaml: 'name: test',
628
642
  adapter,
@@ -663,11 +677,12 @@ describe('resume (crash recovery)', () => {
663
677
  max_retries: 0,
664
678
  files: null,
665
679
  depends_on: null,
680
+ gates: null,
666
681
  })
667
682
  seeder.close()
668
683
 
669
684
  const adapter = makeAdapter()
670
- const engine = createConvoyEngine({
685
+ const engine = makeEngine({
671
686
  spec: makeSpec({ branch: 'feature-branch' }), // spec.branch used as fallback
672
687
  specYaml: 'name: test',
673
688
  adapter,
@@ -708,11 +723,12 @@ describe('resume (crash recovery)', () => {
708
723
  max_retries: 0,
709
724
  files: null,
710
725
  depends_on: null,
726
+ gates: null,
711
727
  })
712
728
  seeder.close()
713
729
 
714
730
  const adapter = makeAdapter()
715
- const engine = createConvoyEngine({
731
+ const engine = makeEngine({
716
732
  spec: {
717
733
  name: 'Git Branch Convoy',
718
734
  concurrency: 1,
@@ -741,7 +757,7 @@ describe('worktree lifecycle (non-copilot)', () => {
741
757
  const wtManager = makeWorktreeManager()
742
758
  const mergeQueue = makeMergeQueue()
743
759
 
744
- const engine = createConvoyEngine({
760
+ const engine = makeEngine({
745
761
  spec: makeSpec(),
746
762
  specYaml: 'name: test',
747
763
  adapter,
@@ -763,7 +779,7 @@ describe('worktree lifecycle (non-copilot)', () => {
763
779
  const wtManager = makeWorktreeManager()
764
780
  const mergeQueue = makeMergeQueue()
765
781
 
766
- const engine = createConvoyEngine({
782
+ const engine = makeEngine({
767
783
  spec: makeSpec({}, [{ id: 'task-1', max_retries: 0 }]),
768
784
  specYaml: 'name: test',
769
785
  adapter,
@@ -785,7 +801,7 @@ describe('worktree lifecycle (non-copilot)', () => {
785
801
  wtManager.create.mockRejectedValue(new Error('git worktree unavailable'))
786
802
  const mergeQueue = makeMergeQueue()
787
803
 
788
- const engine = createConvoyEngine({
804
+ const engine = makeEngine({
789
805
  spec: makeSpec(),
790
806
  specYaml: 'name: test',
791
807
  adapter,
@@ -806,7 +822,7 @@ describe('worktree lifecycle (non-copilot)', () => {
806
822
  const mergeQueue = makeMergeQueue()
807
823
  mergeQueue.merge.mockRejectedValue(new Error('merge conflict'))
808
824
 
809
- const engine = createConvoyEngine({
825
+ const engine = makeEngine({
810
826
  spec: makeSpec(),
811
827
  specYaml: 'name: test',
812
828
  adapter,
@@ -830,7 +846,7 @@ describe('copilot adapter', () => {
830
846
  const wtManager = makeWorktreeManager()
831
847
  const mergeQueue = makeMergeQueue()
832
848
 
833
- const engine = createConvoyEngine({
849
+ const engine = makeEngine({
834
850
  spec: makeSpec(),
835
851
  specYaml: 'name: test',
836
852
  adapter,
@@ -861,7 +877,7 @@ describe('timeout handling', () => {
861
877
  exitCode: -1,
862
878
  } satisfies ExecuteResult)
863
879
 
864
- const engine = createConvoyEngine({
880
+ const engine = makeEngine({
865
881
  spec: makeSpec({}, [{ id: 'task-1', max_retries: 0 }]),
866
882
  specYaml: 'name: test',
867
883
  adapter,
@@ -889,7 +905,7 @@ describe('timeout handling', () => {
889
905
  return { success: true, output: 'ok', exitCode: 0 }
890
906
  })
891
907
 
892
- const engine = createConvoyEngine({
908
+ const engine = makeEngine({
893
909
  spec: makeSpec({ on_failure: 'continue' }, [{ id: 'task-1', max_retries: 1 }]),
894
910
  specYaml: 'name: test',
895
911
  adapter,
@@ -913,7 +929,7 @@ describe('timeout handling', () => {
913
929
  exitCode: -1,
914
930
  })
915
931
 
916
- const engine = createConvoyEngine({
932
+ const engine = makeEngine({
917
933
  spec: makeSpec({ on_failure: 'stop' }, [{ id: 'task-1', max_retries: 2 }]),
918
934
  specYaml: 'name: test',
919
935
  adapter,
@@ -940,7 +956,7 @@ describe('adapter without kill method', () => {
940
956
  // kill intentionally absent
941
957
  }
942
958
 
943
- const engine = createConvoyEngine({
959
+ const engine = makeEngine({
944
960
  spec: makeSpec({}, [{ id: 'task-1', max_retries: 0 }]),
945
961
  specYaml: 'name: test',
946
962
  adapter,
@@ -965,7 +981,7 @@ describe('adapter without kill method', () => {
965
981
  }),
966
982
  }
967
983
 
968
- const engine = createConvoyEngine({
984
+ const engine = makeEngine({
969
985
  spec: makeSpec({}, [{ id: 'task-1', max_retries: 0 }]),
970
986
  specYaml: 'name: test',
971
987
  adapter,
@@ -999,7 +1015,7 @@ describe('parallel task execution', () => {
999
1015
  { id: 'task-2', depends_on: [] },
1000
1016
  { id: 'task-3', depends_on: [] },
1001
1017
  ])
1002
- const engine = createConvoyEngine({
1018
+ const engine = makeEngine({
1003
1019
  spec,
1004
1020
  specYaml: 'name: test',
1005
1021
  adapter,
@@ -1022,7 +1038,7 @@ describe('executor error', () => {
1022
1038
  const adapter = makeAdapter()
1023
1039
  adapter.execute.mockRejectedValue(new Error('adapter crashed'))
1024
1040
 
1025
- const engine = createConvoyEngine({
1041
+ const engine = makeEngine({
1026
1042
  spec: makeSpec({}, [{ id: 'task-1', max_retries: 0 }]),
1027
1043
  specYaml: 'name: test',
1028
1044
  adapter,
@@ -1043,7 +1059,7 @@ describe('executor error', () => {
1043
1059
  describe('verbose mode', () => {
1044
1060
  it('runs a successful task with verbose=true without throwing', async () => {
1045
1061
  const adapter = makeAdapter('developer')
1046
- const engine = createConvoyEngine({
1062
+ const engine = makeEngine({
1047
1063
  spec: makeSpec({}, [{ id: 'task-1' }]),
1048
1064
  specYaml: 'name: test',
1049
1065
  adapter,
@@ -1068,7 +1084,7 @@ describe('verbose mode', () => {
1068
1084
  { id: 'task-a', depends_on: [] },
1069
1085
  { id: 'task-b', depends_on: ['task-a'] }, // gets skipped — also triggers verbose skip log
1070
1086
  ])
1071
- const engine = createConvoyEngine({
1087
+ const engine = makeEngine({
1072
1088
  spec,
1073
1089
  specYaml: 'name: test',
1074
1090
  adapter,
@@ -1095,7 +1111,7 @@ describe('verbose mode', () => {
1095
1111
  return { success: true, output: 'ok', exitCode: 0 }
1096
1112
  })
1097
1113
 
1098
- const engine = createConvoyEngine({
1114
+ const engine = makeEngine({
1099
1115
  spec: makeSpec({}, [{ id: 'task-1', max_retries: 1 }]),
1100
1116
  specYaml: 'name: test',
1101
1117
  adapter,
@@ -1118,7 +1134,7 @@ describe('verbose mode', () => {
1118
1134
  exitCode: -1,
1119
1135
  })
1120
1136
 
1121
- const engine = createConvoyEngine({
1137
+ const engine = makeEngine({
1122
1138
  spec: makeSpec({}, [{ id: 'task-1', max_retries: 0 }]),
1123
1139
  specYaml: 'name: test',
1124
1140
  adapter,
@@ -1144,7 +1160,7 @@ describe('verbose mode', () => {
1144
1160
  return { success: true, output: 'ok', exitCode: 0 }
1145
1161
  })
1146
1162
 
1147
- const engine = createConvoyEngine({
1163
+ const engine = makeEngine({
1148
1164
  spec: makeSpec({ on_failure: 'continue' }, [{ id: 'task-1', max_retries: 1 }]),
1149
1165
  specYaml: 'name: test',
1150
1166
  adapter,
@@ -1163,7 +1179,7 @@ describe('verbose mode', () => {
1163
1179
  const wtManager = makeWorktreeManager()
1164
1180
  wtManager.create.mockRejectedValue(new Error('no worktrees'))
1165
1181
 
1166
- const engine = createConvoyEngine({
1182
+ const engine = makeEngine({
1167
1183
  spec: makeSpec({}, [{ id: 'task-1' }]),
1168
1184
  specYaml: 'name: test',
1169
1185
  adapter,
@@ -1182,7 +1198,7 @@ describe('verbose mode', () => {
1182
1198
  const mergeQueue = makeMergeQueue()
1183
1199
  mergeQueue.merge.mockRejectedValue(new Error('merge conflict'))
1184
1200
 
1185
- const engine = createConvoyEngine({
1201
+ const engine = makeEngine({
1186
1202
  spec: makeSpec({}, [{ id: 'task-1' }]),
1187
1203
  specYaml: 'name: test',
1188
1204
  adapter,
@@ -1204,7 +1220,7 @@ describe('msToTimeout — timeout string representation', () => {
1204
1220
  const adapter = makeAdapter()
1205
1221
  // parseTimeout('1h') = 3600000ms; msToTimeout(3600000) = '1h'
1206
1222
  const spec = makeSpec({}, [{ id: 'task-1', timeout: '1h' }])
1207
- const engine = createConvoyEngine({
1223
+ const engine = makeEngine({
1208
1224
  spec,
1209
1225
  specYaml: 'name: test',
1210
1226
  adapter,
@@ -1221,7 +1237,7 @@ describe('msToTimeout — timeout string representation', () => {
1221
1237
  const adapter = makeAdapter()
1222
1238
  // parseTimeout('1m') = 60000ms; msToTimeout(60000) = '1m'
1223
1239
  const spec = makeSpec({}, [{ id: 'task-1', timeout: '1m' }])
1224
- const engine = createConvoyEngine({
1240
+ const engine = makeEngine({
1225
1241
  spec,
1226
1242
  specYaml: 'name: test',
1227
1243
  adapter,
@@ -1244,7 +1260,7 @@ describe('per-task adapter resolution', () => {
1244
1260
  vi.mocked(getAdapter).mockResolvedValue(altAdapter)
1245
1261
 
1246
1262
  const spec = makeSpec({}, [{ adapter: 'alt-adapter' }])
1247
- const engine = createConvoyEngine({
1263
+ const engine = makeEngine({
1248
1264
  spec,
1249
1265
  specYaml: 'name: test',
1250
1266
  adapter: mainAdapter,
@@ -1263,7 +1279,7 @@ describe('per-task adapter resolution', () => {
1263
1279
  it('uses convoy-level adapter when task has no adapter field', async () => {
1264
1280
  const adapter = makeAdapter('test')
1265
1281
  const spec = makeSpec()
1266
- const engine = createConvoyEngine({
1282
+ const engine = makeEngine({
1267
1283
  spec,
1268
1284
  specYaml: 'name: test',
1269
1285
  adapter,
@@ -1282,7 +1298,7 @@ describe('per-task adapter resolution', () => {
1282
1298
  const adapter = makeAdapter('test')
1283
1299
  // task.adapter === adapter.name → no per-task resolution
1284
1300
  const spec = makeSpec({}, [{ adapter: 'test' }])
1285
- const engine = createConvoyEngine({
1301
+ const engine = makeEngine({
1286
1302
  spec,
1287
1303
  specYaml: 'name: test',
1288
1304
  adapter,
@@ -1304,7 +1320,7 @@ describe('per-task adapter resolution', () => {
1304
1320
  vi.mocked(getAdapter).mockResolvedValue(autoAdapter)
1305
1321
 
1306
1322
  const spec = makeSpec({}, [{ adapter: 'auto' }])
1307
- const engine = createConvoyEngine({
1323
+ const engine = makeEngine({
1308
1324
  spec,
1309
1325
  specYaml: 'name: test',
1310
1326
  adapter: mainAdapter,
@@ -1326,7 +1342,7 @@ describe('per-task adapter resolution', () => {
1326
1342
  vi.mocked(getAdapter).mockResolvedValue(altAdapter)
1327
1343
 
1328
1344
  const spec = makeSpec({}, [{ adapter: 'alt-adapter' }])
1329
- const engine = createConvoyEngine({
1345
+ const engine = makeEngine({
1330
1346
  spec,
1331
1347
  specYaml: 'name: test',
1332
1348
  adapter: makeAdapter('test'),
@@ -1361,7 +1377,7 @@ describe('getCurrentBranch', () => {
1361
1377
  tasks: [{ id: 'task-1', prompt: 'p', agent: 'dev', timeout: '30s', depends_on: [], files: [], description: '', max_retries: 0 }],
1362
1378
  }
1363
1379
 
1364
- const engine = createConvoyEngine({
1380
+ const engine = makeEngine({
1365
1381
  spec,
1366
1382
  specYaml: 'name: branch-test',
1367
1383
  adapter,
@@ -1385,7 +1401,7 @@ describe('getCurrentBranch', () => {
1385
1401
  tasks: [{ id: 'task-1', prompt: 'p', agent: 'dev', timeout: '30s', depends_on: [], files: [], description: '', max_retries: 0 }],
1386
1402
  }
1387
1403
 
1388
- const engine = createConvoyEngine({
1404
+ const engine = makeEngine({
1389
1405
  spec,
1390
1406
  specYaml: 'name: fallback-test',
1391
1407
  adapter,
@@ -1410,7 +1426,7 @@ describe('real timer timeout path', () => {
1410
1426
  // adapter.execute returns a promise that never resolves — real timer wins the race
1411
1427
  adapter.execute.mockImplementation(() => new Promise<ExecuteResult>(() => {}))
1412
1428
 
1413
- const engine = createConvoyEngine({
1429
+ const engine = makeEngine({
1414
1430
  spec: makeSpec({}, [{ id: 'task-1', timeout: '1s', max_retries: 0 }]),
1415
1431
  specYaml: 'name: test',
1416
1432
  adapter,
@@ -1448,7 +1464,7 @@ describe('diamond dependency skip', () => {
1448
1464
  { id: 'task-b', depends_on: ['task-a'] },
1449
1465
  { id: 'task-c', depends_on: ['task-a', 'task-b'] }, // diamond
1450
1466
  ])
1451
- const engine = createConvoyEngine({
1467
+ const engine = makeEngine({
1452
1468
  spec,
1453
1469
  specYaml: 'name: test',
1454
1470
  adapter,
@@ -1485,7 +1501,7 @@ describe('cost tracking', () => {
1485
1501
  usage: { prompt_tokens: 100, completion_tokens: 50, total_tokens: 150 },
1486
1502
  } satisfies ExecuteResult)
1487
1503
 
1488
- const engine = createConvoyEngine({
1504
+ const engine = makeEngine({
1489
1505
  spec: makeSpec(),
1490
1506
  specYaml: 'name: test',
1491
1507
  adapter,
@@ -1509,7 +1525,7 @@ describe('cost tracking', () => {
1509
1525
  const adapter = makeAdapter()
1510
1526
  // default makeAdapter returns no usage field
1511
1527
 
1512
- const engine = createConvoyEngine({
1528
+ const engine = makeEngine({
1513
1529
  spec: makeSpec(),
1514
1530
  specYaml: 'name: test',
1515
1531
  adapter,
@@ -1538,7 +1554,7 @@ describe('cost tracking', () => {
1538
1554
  { id: 'task-1', depends_on: [] },
1539
1555
  { id: 'task-2', depends_on: [] },
1540
1556
  ])
1541
- const engine = createConvoyEngine({
1557
+ const engine = makeEngine({
1542
1558
  spec,
1543
1559
  specYaml: 'name: test',
1544
1560
  adapter,
@@ -1564,7 +1580,7 @@ describe('cost tracking', () => {
1564
1580
  usage: { total_tokens: 75 },
1565
1581
  } satisfies ExecuteResult)
1566
1582
 
1567
- const engine = createConvoyEngine({
1583
+ const engine = makeEngine({
1568
1584
  spec: makeSpec(),
1569
1585
  specYaml: 'name: test',
1570
1586
  adapter,
@@ -1582,7 +1598,7 @@ describe('cost tracking', () => {
1582
1598
  const adapter = makeAdapter()
1583
1599
  // default makeAdapter returns no usage
1584
1600
 
1585
- const engine = createConvoyEngine({
1601
+ const engine = makeEngine({
1586
1602
  spec: makeSpec(),
1587
1603
  specYaml: 'name: test',
1588
1604
  adapter,
@@ -1605,7 +1621,7 @@ describe('cost tracking', () => {
1605
1621
  usage: { total_tokens: 42 },
1606
1622
  } satisfies ExecuteResult)
1607
1623
 
1608
- const engine = createConvoyEngine({
1624
+ const engine = makeEngine({
1609
1625
  spec: makeSpec(),
1610
1626
  specYaml: 'name: test',
1611
1627
  adapter,
@@ -1628,7 +1644,7 @@ describe('cost tracking', () => {
1628
1644
  const adapter = makeAdapter()
1629
1645
  // default adapter returns no usage
1630
1646
 
1631
- const engine = createConvoyEngine({
1647
+ const engine = makeEngine({
1632
1648
  spec: makeSpec({ concurrency: 2 }, [
1633
1649
  { id: 'task-1', depends_on: [] },
1634
1650
  { id: 'task-2', depends_on: [] },
@@ -1670,7 +1686,7 @@ describe('progress reporting', () => {
1670
1686
 
1671
1687
  it('prints task start message without verbose flag', async () => {
1672
1688
  const adapter = makeAdapter()
1673
- const engine = createConvoyEngine({
1689
+ const engine = makeEngine({
1674
1690
  spec: makeSpec(),
1675
1691
  specYaml: 'name: test',
1676
1692
  adapter,
@@ -1688,7 +1704,7 @@ describe('progress reporting', () => {
1688
1704
 
1689
1705
  it('prints task completion with counter', async () => {
1690
1706
  const adapter = makeAdapter()
1691
- const engine = createConvoyEngine({
1707
+ const engine = makeEngine({
1692
1708
  spec: makeSpec(),
1693
1709
  specYaml: 'name: test',
1694
1710
  adapter,
@@ -1708,7 +1724,7 @@ describe('progress reporting', () => {
1708
1724
  const adapter = makeAdapter()
1709
1725
  adapter.execute.mockResolvedValue({ success: false, output: 'boom', exitCode: 1 })
1710
1726
 
1711
- const engine = createConvoyEngine({
1727
+ const engine = makeEngine({
1712
1728
  spec: makeSpec({}, [{ id: 'task-1', max_retries: 0 }]),
1713
1729
  specYaml: 'name: test',
1714
1730
  adapter,
@@ -1730,7 +1746,7 @@ describe('progress reporting', () => {
1730
1746
  { id: 'task-a', depends_on: [] },
1731
1747
  { id: 'task-b', depends_on: ['task-a'] },
1732
1748
  ])
1733
- const engine = createConvoyEngine({
1749
+ const engine = makeEngine({
1734
1750
  spec,
1735
1751
  specYaml: 'name: test',
1736
1752
  adapter,
@@ -1749,7 +1765,7 @@ describe('progress reporting', () => {
1749
1765
  it('prints gate results with pass/fail indicators', async () => {
1750
1766
  const adapter = makeAdapter()
1751
1767
  const spec = makeSpec({ gates: ['echo gate-ok', 'false'] }, [{ id: 'task-1' }])
1752
- const engine = createConvoyEngine({
1768
+ const engine = makeEngine({
1753
1769
  spec,
1754
1770
  specYaml: 'name: test',
1755
1771
  adapter,
@@ -1778,7 +1794,7 @@ describe('progress reporting', () => {
1778
1794
  return { success: true, output: 'ok', exitCode: 0 }
1779
1795
  })
1780
1796
 
1781
- const engine = createConvoyEngine({
1797
+ const engine = makeEngine({
1782
1798
  spec: makeSpec({}, [{ id: 'task-1', max_retries: 1 }]),
1783
1799
  specYaml: 'name: test',
1784
1800
  adapter,
@@ -1819,7 +1835,7 @@ describe('gate retry mechanism', () => {
1819
1835
  { gates: [`node -e "process.exit(0)"`], gate_retries: 1 },
1820
1836
  [{ id: 'task-1' }],
1821
1837
  )
1822
- const engine = createConvoyEngine({
1838
+ const engine = makeEngine({
1823
1839
  spec,
1824
1840
  specYaml: 'name: test',
1825
1841
  adapter,
@@ -1835,7 +1851,7 @@ describe('gate retry mechanism', () => {
1835
1851
 
1836
1852
  it('defaults gate_retries to 0 (no retry on gate failure)', async () => {
1837
1853
  const spec = makeSpec({ gates: ['false'] }, [{ id: 'task-1' }])
1838
- const engine = createConvoyEngine({
1854
+ const engine = makeEngine({
1839
1855
  spec,
1840
1856
  specYaml: 'name: test',
1841
1857
  adapter,
@@ -1851,7 +1867,7 @@ describe('gate retry mechanism', () => {
1851
1867
 
1852
1868
  it('calls adapter.execute with fix prompt when gates fail and retries available', async () => {
1853
1869
  const spec = makeSpec({ gates: ['false'], gate_retries: 1 }, [{ id: 'task-1' }])
1854
- const engine = createConvoyEngine({
1870
+ const engine = makeEngine({
1855
1871
  spec,
1856
1872
  specYaml: 'name: test',
1857
1873
  adapter,
@@ -1875,7 +1891,7 @@ describe('gate retry mechanism', () => {
1875
1891
  .mockResolvedValueOnce({ success: true, output: 'ok', exitCode: 0 }) // task-1
1876
1892
  .mockResolvedValueOnce({ success: false, output: 'fix failed', exitCode: 1 }) // gate-fix-1
1877
1893
  const spec = makeSpec({ gates: ['false'], gate_retries: 2 }, [{ id: 'task-1' }])
1878
- const engine = createConvoyEngine({
1894
+ const engine = makeEngine({
1879
1895
  spec,
1880
1896
  specYaml: 'name: test',
1881
1897
  adapter,
@@ -1889,3 +1905,1756 @@ describe('gate retry mechanism', () => {
1889
1905
  expect(result.status).toBe('gate-failed')
1890
1906
  })
1891
1907
  })
1908
+
1909
+ // ── evaluateReviewLevel ───────────────────────────────────────────────────────
1910
+
1911
+ function makeTaskRecord(overrides: Partial<TaskRecord> = {}): TaskRecord {
1912
+ return {
1913
+ id: 'task-1',
1914
+ convoy_id: 'convoy-1',
1915
+ phase: 0,
1916
+ prompt: '',
1917
+ agent: 'developer',
1918
+ adapter: null,
1919
+ model: null,
1920
+ timeout_ms: 1_800_000,
1921
+ status: 'pending',
1922
+ worker_id: null,
1923
+ worktree: null,
1924
+ output: null,
1925
+ exit_code: null,
1926
+ started_at: null,
1927
+ finished_at: null,
1928
+ retries: 0,
1929
+ max_retries: 1,
1930
+ files: null,
1931
+ depends_on: null,
1932
+ prompt_tokens: null,
1933
+ completion_tokens: null,
1934
+ total_tokens: null,
1935
+ cost_usd: null,
1936
+ gates: null,
1937
+ on_exhausted: 'dlq',
1938
+ injected: 0,
1939
+ provenance: null,
1940
+ idempotency_key: null,
1941
+ current_step: null,
1942
+ total_steps: null,
1943
+ review_level: null,
1944
+ review_verdict: null,
1945
+ review_tokens: null,
1946
+ review_model: null,
1947
+ panel_attempts: 0,
1948
+ dispute_id: null,
1949
+ drift_score: null,
1950
+ drift_retried: 0,
1951
+ ...overrides,
1952
+ }
1953
+ }
1954
+
1955
+ function makeDiffStats(overrides: Partial<DiffStats> = {}): DiffStats {
1956
+ return {
1957
+ linesChanged: 5,
1958
+ filesChanged: 1,
1959
+ filePaths: ['src/components/Button.tsx'],
1960
+ ...overrides,
1961
+ }
1962
+ }
1963
+
1964
+ describe('evaluateReviewLevel', () => {
1965
+ it('routes to panel when a changed file is under auth/', () => {
1966
+ const level = evaluateReviewLevel(
1967
+ makeTaskRecord(),
1968
+ makeDiffStats({ filePaths: ['auth/session.ts'] }),
1969
+ )
1970
+ expect(level).toBe('panel')
1971
+ })
1972
+
1973
+ it('routes to panel when a changed file path contains /auth/', () => {
1974
+ const level = evaluateReviewLevel(
1975
+ makeTaskRecord(),
1976
+ makeDiffStats({ filePaths: ['src/auth/session.ts'] }),
1977
+ )
1978
+ expect(level).toBe('panel')
1979
+ })
1980
+
1981
+ it('routes to panel for security/ path', () => {
1982
+ const level = evaluateReviewLevel(
1983
+ makeTaskRecord(),
1984
+ makeDiffStats({ filePaths: ['security/policy.ts'] }),
1985
+ )
1986
+ expect(level).toBe('panel')
1987
+ })
1988
+
1989
+ it('routes to panel for security-expert agent', () => {
1990
+ const level = evaluateReviewLevel(
1991
+ makeTaskRecord({ agent: 'security-expert' }),
1992
+ makeDiffStats(),
1993
+ )
1994
+ expect(level).toBe('panel')
1995
+ })
1996
+
1997
+ it('routes to panel for database-engineer agent', () => {
1998
+ const level = evaluateReviewLevel(
1999
+ makeTaskRecord({ agent: 'database-engineer' }),
2000
+ makeDiffStats(),
2001
+ )
2002
+ expect(level).toBe('panel')
2003
+ })
2004
+
2005
+ it('routes to auto-pass for documentation-writer agent', () => {
2006
+ const level = evaluateReviewLevel(
2007
+ makeTaskRecord({ agent: 'documentation-writer' }),
2008
+ makeDiffStats(),
2009
+ )
2010
+ expect(level).toBe('auto-pass')
2011
+ })
2012
+
2013
+ it('routes to auto-pass for copywriter agent', () => {
2014
+ const level = evaluateReviewLevel(
2015
+ makeTaskRecord({ agent: 'copywriter' }),
2016
+ makeDiffStats(),
2017
+ )
2018
+ expect(level).toBe('auto-pass')
2019
+ })
2020
+
2021
+ it('routes to auto-pass for small diff (<=10 lines, <=2 files) with gates passing', () => {
2022
+ const level = evaluateReviewLevel(
2023
+ makeTaskRecord(),
2024
+ makeDiffStats({ linesChanged: 8, filesChanged: 2, filePaths: ['src/Button.tsx', 'src/Button.test.tsx'] }),
2025
+ undefined,
2026
+ true,
2027
+ )
2028
+ expect(level).toBe('auto-pass')
2029
+ })
2030
+
2031
+ it('routes to fast for large diff (>200 lines)', () => {
2032
+ const level = evaluateReviewLevel(
2033
+ makeTaskRecord(),
2034
+ makeDiffStats({ linesChanged: 250, filesChanged: 3, filePaths: ['src/Big.tsx', 'src/Big.test.tsx', 'src/types.ts'] }),
2035
+ )
2036
+ expect(level).toBe('fast')
2037
+ })
2038
+
2039
+ it('routes to fast for many files (>5)', () => {
2040
+ const level = evaluateReviewLevel(
2041
+ makeTaskRecord(),
2042
+ makeDiffStats({ linesChanged: 50, filesChanged: 6, filePaths: ['a.ts', 'b.ts', 'c.ts', 'd.ts', 'e.ts', 'f.ts'] }),
2043
+ )
2044
+ expect(level).toBe('fast')
2045
+ })
2046
+
2047
+ it('defaults to fast for medium diff with developer agent', () => {
2048
+ const level = evaluateReviewLevel(
2049
+ makeTaskRecord({ agent: 'developer' }),
2050
+ makeDiffStats({ linesChanged: 50, filesChanged: 3, filePaths: ['src/Feature.tsx', 'src/Feature.test.tsx', 'src/types.ts'] }),
2051
+ )
2052
+ expect(level).toBe('fast')
2053
+ })
2054
+
2055
+ it('custom heuristics: overrides panel_paths', () => {
2056
+ const level = evaluateReviewLevel(
2057
+ makeTaskRecord(),
2058
+ makeDiffStats({ filePaths: ['billing/invoice.ts'] }),
2059
+ { panel_paths: ['billing/'] },
2060
+ )
2061
+ expect(level).toBe('panel')
2062
+ })
2063
+
2064
+ it('custom heuristics: overrides auto_pass_agents', () => {
2065
+ const level = evaluateReviewLevel(
2066
+ makeTaskRecord({ agent: 'designer' }),
2067
+ makeDiffStats(),
2068
+ { auto_pass_agents: ['designer'] },
2069
+ )
2070
+ expect(level).toBe('auto-pass')
2071
+ })
2072
+
2073
+ it('custom heuristics: smaller auto_pass_max_lines threshold', () => {
2074
+ const level = evaluateReviewLevel(
2075
+ makeTaskRecord(),
2076
+ makeDiffStats({ linesChanged: 5, filesChanged: 1, filePaths: ['src/x.ts'] }),
2077
+ { auto_pass_max_lines: 3 },
2078
+ true,
2079
+ )
2080
+ expect(level).toBe('fast') // 5 > 3 → not auto-pass
2081
+ })
2082
+ })
2083
+
2084
+ // ── Review pipeline integration ───────────────────────────────────────────────
2085
+
2086
+ describe('review pipeline', () => {
2087
+ let adapter: ReturnType<typeof makeAdapter>
2088
+ let wtManager: ReturnType<typeof makeWorktreeManager>
2089
+ let mergeQueue: ReturnType<typeof makeMergeQueue>
2090
+
2091
+ beforeEach(() => {
2092
+ adapter = makeAdapter()
2093
+ wtManager = makeWorktreeManager()
2094
+ mergeQueue = makeMergeQueue()
2095
+ })
2096
+
2097
+ it('task with review: none — reviewer not called, task succeeds', async () => {
2098
+ const mockReviewRunner = vi.fn().mockResolvedValue({ verdict: 'pass', feedback: '', tokens: 100, model: 'test' })
2099
+ const engine = makeEngine({
2100
+ spec: makeSpec({ defaults: { review: 'none' } }, [{ review: 'none' }]),
2101
+ specYaml: 'name: test',
2102
+ adapter,
2103
+ dbPath,
2104
+ _worktreeManager: wtManager,
2105
+ _mergeQueue: mergeQueue,
2106
+ _reviewRunner: mockReviewRunner,
2107
+ })
2108
+ const result = await engine.run()
2109
+ expect(result.status).toBe('done')
2110
+ expect(mockReviewRunner).not.toHaveBeenCalled()
2111
+ })
2112
+
2113
+ it('fast review PASS — task proceeds to merge (status done)', async () => {
2114
+ const mockReviewRunner = vi.fn().mockResolvedValue({ verdict: 'pass', feedback: '', tokens: 50, model: 'reviewer' })
2115
+ const engine = makeEngine({
2116
+ spec: makeSpec({ defaults: { review: 'fast' } }),
2117
+ specYaml: 'name: test',
2118
+ adapter,
2119
+ dbPath,
2120
+ _worktreeManager: wtManager,
2121
+ _mergeQueue: mergeQueue,
2122
+ _reviewRunner: mockReviewRunner,
2123
+ })
2124
+ const result = await engine.run()
2125
+ expect(result.status).toBe('done')
2126
+ expect(mockReviewRunner).toHaveBeenCalledOnce()
2127
+ expect(mockReviewRunner).toHaveBeenCalledWith(expect.objectContaining({ agent: 'developer' }), 'fast', 'default')
2128
+ })
2129
+
2130
+ it('fast review BLOCK + retries remaining — task retried with feedback prepended', async () => {
2131
+ let callCount = 0
2132
+ adapter.execute.mockImplementation(() => {
2133
+ callCount++
2134
+ return Promise.resolve({ success: true, output: 'ok', exitCode: 0 })
2135
+ })
2136
+ const mockReviewRunner = vi.fn()
2137
+ .mockResolvedValueOnce({ verdict: 'block', feedback: 'Missing tests', tokens: 50, model: 'reviewer' })
2138
+ .mockResolvedValueOnce({ verdict: 'pass', feedback: '', tokens: 50, model: 'reviewer' })
2139
+
2140
+ const engine = makeEngine({
2141
+ spec: makeSpec({ defaults: { review: 'fast' } }, [{ max_retries: 1 }]),
2142
+ specYaml: 'name: test',
2143
+ adapter,
2144
+ dbPath,
2145
+ _worktreeManager: wtManager,
2146
+ _mergeQueue: mergeQueue,
2147
+ _reviewRunner: mockReviewRunner,
2148
+ })
2149
+ const result = await engine.run()
2150
+ expect(result.status).toBe('done')
2151
+ expect(adapter.execute).toHaveBeenCalledTimes(2)
2152
+ expect(mockReviewRunner).toHaveBeenCalledTimes(2)
2153
+ // Prompt on second attempt should contain feedback
2154
+ const secondPrompt = (adapter.execute.mock.calls[1] as [Task])[0].prompt
2155
+ expect(secondPrompt).toContain('Missing tests')
2156
+ })
2157
+
2158
+ it('fast review BLOCK + retries exhausted — status review-blocked', async () => {
2159
+ const mockReviewRunner = vi.fn().mockResolvedValue({ verdict: 'block', feedback: 'Insecure code', tokens: 50, model: 'reviewer' })
2160
+
2161
+ const engine = makeEngine({
2162
+ spec: makeSpec({ defaults: { review: 'fast' } }, [{ max_retries: 0 }]),
2163
+ specYaml: 'name: test',
2164
+ adapter,
2165
+ dbPath,
2166
+ _worktreeManager: wtManager,
2167
+ _mergeQueue: mergeQueue,
2168
+ _reviewRunner: mockReviewRunner,
2169
+ })
2170
+ const result = await engine.run()
2171
+ expect(result.status).toBe('failed')
2172
+ expect(result.summary.failed).toBe(1)
2173
+ // Verify the task itself is review-blocked
2174
+ const store = createConvoyStore(dbPath)
2175
+ const tasks = store.getTasksByConvoy(result.convoyId)
2176
+ store.close()
2177
+ expect(tasks[0].status).toBe('review-blocked')
2178
+ })
2179
+
2180
+ it('panel review 2/3 PASS — task proceeds (status done)', async () => {
2181
+ let callCount = 0
2182
+ const mockReviewRunner = vi.fn().mockImplementation(() => {
2183
+ callCount++
2184
+ // 2 pass, 1 block
2185
+ return Promise.resolve(callCount <= 2
2186
+ ? { verdict: 'pass', feedback: '', tokens: 30, model: 'reviewer' }
2187
+ : { verdict: 'block', feedback: 'Minor issue', tokens: 30, model: 'reviewer' })
2188
+ })
2189
+
2190
+ const engine = makeEngine({
2191
+ spec: makeSpec({ defaults: { review: 'panel' } }),
2192
+ specYaml: 'name: test',
2193
+ adapter,
2194
+ dbPath,
2195
+ _worktreeManager: wtManager,
2196
+ _mergeQueue: mergeQueue,
2197
+ _reviewRunner: mockReviewRunner,
2198
+ })
2199
+ const result = await engine.run()
2200
+ expect(result.status).toBe('done')
2201
+ expect(mockReviewRunner).toHaveBeenCalledTimes(3)
2202
+ })
2203
+
2204
+ it('panel review 2/3 BLOCK — task retried with MUST-FIX', async () => {
2205
+ let reviewCallCount = 0
2206
+ const mockReviewRunner = vi.fn().mockImplementation(() => {
2207
+ reviewCallCount++
2208
+ // First round: 2 block; second round: 3 pass
2209
+ if (reviewCallCount <= 3) {
2210
+ return Promise.resolve(reviewCallCount <= 2
2211
+ ? { verdict: 'block', feedback: 'Critical bug', tokens: 30, model: 'reviewer' }
2212
+ : { verdict: 'pass', feedback: '', tokens: 30, model: 'reviewer' })
2213
+ }
2214
+ return Promise.resolve({ verdict: 'pass', feedback: '', tokens: 30, model: 'reviewer' })
2215
+ })
2216
+
2217
+ const engine = makeEngine({
2218
+ spec: makeSpec({ defaults: { review: 'panel' } }, [{ max_retries: 1 }]),
2219
+ specYaml: 'name: test',
2220
+ adapter,
2221
+ dbPath,
2222
+ _worktreeManager: wtManager,
2223
+ _mergeQueue: mergeQueue,
2224
+ _reviewRunner: mockReviewRunner,
2225
+ })
2226
+ const result = await engine.run()
2227
+ expect(result.status).toBe('done')
2228
+ expect(adapter.execute).toHaveBeenCalledTimes(2)
2229
+ // Prompt on second attempt contains MUST-FIX
2230
+ const secondPrompt = (adapter.execute.mock.calls[1] as [Task])[0].prompt
2231
+ expect(secondPrompt).toContain('MUST-FIX')
2232
+ expect(secondPrompt).toContain('Critical bug')
2233
+ })
2234
+
2235
+ it('review budget exceeded with skip — review skipped, task done', async () => {
2236
+ const mockReviewRunner = vi.fn().mockResolvedValue({ verdict: 'pass', feedback: '', tokens: 200, model: 'reviewer' })
2237
+
2238
+ const engine = makeEngine({
2239
+ spec: makeSpec({
2240
+ defaults: { review: 'fast', review_budget: 100, on_review_budget_exceeded: 'skip', reviewer_model: 'r1' },
2241
+ tasks: [
2242
+ { id: 'task-1', prompt: 'Prompt 1', agent: 'developer', timeout: '30s', depends_on: [], files: [], description: '', max_retries: 0 },
2243
+ { id: 'task-2', prompt: 'Prompt 2', agent: 'developer', timeout: '30s', depends_on: ['task-1'], files: [], description: '', max_retries: 0 },
2244
+ ],
2245
+ }),
2246
+ specYaml: 'name: test',
2247
+ adapter,
2248
+ dbPath,
2249
+ _worktreeManager: wtManager,
2250
+ _mergeQueue: mergeQueue,
2251
+ _reviewRunner: mockReviewRunner,
2252
+ })
2253
+ const result = await engine.run()
2254
+ expect(result.status).toBe('done')
2255
+ // first task: budget not exceeded (0 < 100), review runs
2256
+ // second task: budget exceeded (200 >= 100), review skipped
2257
+ expect(mockReviewRunner).toHaveBeenCalledTimes(1)
2258
+ })
2259
+
2260
+ it('auto route: developer agent with empty diff → auto-pass (no reviewer call)', async () => {
2261
+ // Given: 'auto' review setting, developer agent, empty diff (git will fail on mock path)
2262
+ const mockReviewRunner = vi.fn()
2263
+ const engine = makeEngine({
2264
+ spec: makeSpec({ defaults: { review: 'auto' } }),
2265
+ specYaml: 'name: test',
2266
+ adapter,
2267
+ dbPath,
2268
+ _worktreeManager: wtManager,
2269
+ _mergeQueue: mergeQueue,
2270
+ _reviewRunner: mockReviewRunner,
2271
+ })
2272
+ const result = await engine.run()
2273
+ expect(result.status).toBe('done')
2274
+ expect(mockReviewRunner).not.toHaveBeenCalled()
2275
+ })
2276
+
2277
+ it('review tokens tracked on task record', async () => {
2278
+ const mockReviewRunner = vi.fn().mockResolvedValue({ verdict: 'pass', feedback: '', tokens: 77, model: 'reviewer' })
2279
+ const engine = makeEngine({
2280
+ spec: makeSpec({ defaults: { review: 'fast' } }),
2281
+ specYaml: 'name: test',
2282
+ adapter,
2283
+ dbPath,
2284
+ _worktreeManager: wtManager,
2285
+ _mergeQueue: mergeQueue,
2286
+ _reviewRunner: mockReviewRunner,
2287
+ })
2288
+ const result = await engine.run()
2289
+ const store = createConvoyStore(dbPath)
2290
+ const tasks = store.getTasksByConvoy(result.convoyId)
2291
+ store.close()
2292
+ expect(tasks[0].review_tokens).toBe(77)
2293
+ expect(tasks[0].review_level).toBe('fast')
2294
+ expect(tasks[0].review_verdict).toBe('pass')
2295
+ })
2296
+
2297
+ it('review_started and review_verdict events emitted', async () => {
2298
+ const mockReviewRunner = vi.fn().mockResolvedValue({ verdict: 'pass', feedback: '', tokens: 10, model: 'reviewer' })
2299
+ const engine = makeEngine({
2300
+ spec: makeSpec({ defaults: { review: 'fast' } }),
2301
+ specYaml: 'name: test',
2302
+ adapter,
2303
+ dbPath,
2304
+ _worktreeManager: wtManager,
2305
+ _mergeQueue: mergeQueue,
2306
+ _reviewRunner: mockReviewRunner,
2307
+ })
2308
+ const result = await engine.run()
2309
+ const store = createConvoyStore(dbPath)
2310
+ const events = store.getEvents(result.convoyId)
2311
+ store.close()
2312
+ const startedEvent = events.find(e => e.type === 'review_started')
2313
+ const verdictEvent = events.find(e => e.type === 'review_verdict')
2314
+ expect(startedEvent).toBeDefined()
2315
+ expect(verdictEvent).toBeDefined()
2316
+ })
2317
+
2318
+ it('review sessions do NOT count against concurrency limit', async () => {
2319
+ // Concurrency=1, 2 tasks in parallel. Both should complete with review.
2320
+ const mockReviewRunner = vi.fn().mockResolvedValue({ verdict: 'pass', feedback: '', tokens: 10, model: 'reviewer' })
2321
+ const engine = makeEngine({
2322
+ spec: makeSpec(
2323
+ { concurrency: 1, defaults: { review: 'fast' } },
2324
+ [{ id: 'task-1' }, { id: 'task-2' }],
2325
+ ),
2326
+ specYaml: 'name: test',
2327
+ adapter,
2328
+ dbPath,
2329
+ _worktreeManager: wtManager,
2330
+ _mergeQueue: mergeQueue,
2331
+ _reviewRunner: mockReviewRunner,
2332
+ })
2333
+ const result = await engine.run()
2334
+ expect(result.status).toBe('done')
2335
+ expect(result.summary.done).toBe(2)
2336
+ })
2337
+
2338
+ it('full fast-review flow: BLOCK on first attempt → retry → PASS → done with complete events', async () => {
2339
+ const mockReviewRunner = vi.fn()
2340
+ .mockResolvedValueOnce({ verdict: 'block', feedback: 'Add more tests', tokens: 40, model: 'reviewer' })
2341
+ .mockResolvedValueOnce({ verdict: 'pass', feedback: '', tokens: 35, model: 'reviewer' })
2342
+
2343
+ const engine = makeEngine({
2344
+ spec: makeSpec({ defaults: { review: 'fast' } }, [{ id: 'task-1', max_retries: 1 }]),
2345
+ specYaml: 'name: test',
2346
+ adapter,
2347
+ dbPath,
2348
+ _worktreeManager: wtManager,
2349
+ _mergeQueue: mergeQueue,
2350
+ _reviewRunner: mockReviewRunner,
2351
+ })
2352
+ const result = await engine.run()
2353
+
2354
+ expect(result.status).toBe('done')
2355
+ expect(adapter.execute).toHaveBeenCalledTimes(2)
2356
+ expect(mockReviewRunner).toHaveBeenCalledTimes(2)
2357
+
2358
+ const store = createConvoyStore(dbPath)
2359
+ const tasks = store.getTasksByConvoy(result.convoyId)
2360
+ const events = store.getEvents(result.convoyId)
2361
+ store.close()
2362
+
2363
+ const task = tasks[0]
2364
+ expect(task.review_level).toBe('fast')
2365
+ expect(task.review_verdict).toBe('pass')
2366
+ expect(task.retries).toBe(1)
2367
+
2368
+ const reviewStartedEvents = events.filter(e => e.type === 'review_started')
2369
+ const reviewVerdictEvents = events.filter(e => e.type === 'review_verdict')
2370
+ expect(reviewStartedEvents.length).toBe(2)
2371
+ expect(reviewVerdictEvents.length).toBe(2)
2372
+
2373
+ const firstVerdict = JSON.parse(reviewVerdictEvents[0].data!) as Record<string, unknown>
2374
+ const secondVerdict = JSON.parse(reviewVerdictEvents[1].data!) as Record<string, unknown>
2375
+ expect(firstVerdict['verdict']).toBe('block')
2376
+ expect(secondVerdict['verdict']).toBe('pass')
2377
+ })
2378
+
2379
+ it('panel flow: 2/3 BLOCK first round → retry → 3/3 PASS second round → done', async () => {
2380
+ let reviewCallCount = 0
2381
+ const mockReviewRunner = vi.fn().mockImplementation(() => {
2382
+ reviewCallCount++
2383
+ // Round 1 (calls 1-3): BLOCK, BLOCK, PASS → majority block → retry
2384
+ if (reviewCallCount <= 3) {
2385
+ return Promise.resolve(reviewCallCount <= 2
2386
+ ? { verdict: 'block', feedback: 'Critical issue', tokens: 20, model: 'reviewer' }
2387
+ : { verdict: 'pass', feedback: '', tokens: 20, model: 'reviewer' })
2388
+ }
2389
+ // Round 2 (calls 4-6): all PASS
2390
+ return Promise.resolve({ verdict: 'pass', feedback: '', tokens: 20, model: 'reviewer' })
2391
+ })
2392
+
2393
+ const engine = makeEngine({
2394
+ spec: makeSpec({ defaults: { review: 'panel' } }, [{ id: 'task-1', max_retries: 1 }]),
2395
+ specYaml: 'name: test',
2396
+ adapter,
2397
+ dbPath,
2398
+ _worktreeManager: wtManager,
2399
+ _mergeQueue: mergeQueue,
2400
+ _reviewRunner: mockReviewRunner,
2401
+ })
2402
+ const result = await engine.run()
2403
+
2404
+ expect(result.status).toBe('done')
2405
+ expect(adapter.execute).toHaveBeenCalledTimes(2)
2406
+ expect(mockReviewRunner).toHaveBeenCalledTimes(6)
2407
+
2408
+ const store = createConvoyStore(dbPath)
2409
+ const tasks = store.getTasksByConvoy(result.convoyId)
2410
+ store.close()
2411
+
2412
+ expect(tasks[0].review_verdict).toBe('pass')
2413
+ expect(tasks[0].panel_attempts).toBeGreaterThanOrEqual(1)
2414
+ })
2415
+
2416
+ it('dispute: task dispute_id matches the dispute_opened event and panel_attempts is 3', async () => {
2417
+ const mockReviewRunner = vi.fn().mockResolvedValue({ verdict: 'block', feedback: 'broken', tokens: 5, model: 'r' })
2418
+
2419
+ const engine = makeEngine({
2420
+ spec: makeSpec({ defaults: { review: 'panel' } }, [{ id: 'task-1', max_retries: 3 }]),
2421
+ specYaml: 'name: test',
2422
+ adapter,
2423
+ dbPath,
2424
+ _worktreeManager: wtManager,
2425
+ _mergeQueue: mergeQueue,
2426
+ _reviewRunner: mockReviewRunner,
2427
+ })
2428
+ const result = await engine.run()
2429
+
2430
+ const store = createConvoyStore(dbPath)
2431
+ const tasks = store.getTasksByConvoy(result.convoyId)
2432
+ const events = store.getEvents(result.convoyId)
2433
+ store.close()
2434
+
2435
+ const task = tasks[0]
2436
+ expect(task.status).toBe('disputed')
2437
+ expect(task.dispute_id).not.toBeNull()
2438
+ expect(task.panel_attempts).toBe(3)
2439
+
2440
+ const disputeEvent = events.find(e => e.type === 'dispute_opened')
2441
+ expect(disputeEvent).toBeDefined()
2442
+ const eventData = JSON.parse(disputeEvent!.data!) as Record<string, unknown>
2443
+ // Verify the dispute_id on the task record matches the one in the event
2444
+ expect(eventData['dispute_id']).toBe(task.dispute_id)
2445
+ expect(eventData['panel_attempts']).toBe(3)
2446
+ })
2447
+
2448
+ it('review budget exceeded: stop marks task review-blocked and skips all pending tasks', async () => {
2449
+ const mockReviewRunner = vi.fn()
2450
+
2451
+ const engine = makeEngine({
2452
+ spec: makeSpec(
2453
+ { defaults: { review: 'fast', review_budget: 0, on_review_budget_exceeded: 'stop' } },
2454
+ [
2455
+ { id: 'task-1', depends_on: [] },
2456
+ { id: 'task-2', depends_on: ['task-1'] },
2457
+ ],
2458
+ ),
2459
+ specYaml: 'name: test',
2460
+ adapter,
2461
+ dbPath,
2462
+ _worktreeManager: wtManager,
2463
+ _mergeQueue: mergeQueue,
2464
+ _reviewRunner: mockReviewRunner,
2465
+ })
2466
+ const result = await engine.run()
2467
+
2468
+ const store = createConvoyStore(dbPath)
2469
+ const tasks = store.getTasksByConvoy(result.convoyId)
2470
+ store.close()
2471
+
2472
+ const byId = Object.fromEntries(tasks.map(t => [t.id, t.status]))
2473
+ expect(byId['task-1']).toBe('review-blocked')
2474
+ expect(byId['task-2']).toBe('skipped')
2475
+ expect(mockReviewRunner).not.toHaveBeenCalled()
2476
+ })
2477
+
2478
+ it('review budget exceeded: downgrade auto-passes task without calling reviewer', async () => {
2479
+ const mockReviewRunner = vi.fn()
2480
+
2481
+ const engine = makeEngine({
2482
+ spec: makeSpec(
2483
+ { defaults: { review: 'fast', review_budget: 0, on_review_budget_exceeded: 'downgrade' } },
2484
+ ),
2485
+ specYaml: 'name: test',
2486
+ adapter,
2487
+ dbPath,
2488
+ _worktreeManager: wtManager,
2489
+ _mergeQueue: mergeQueue,
2490
+ _reviewRunner: mockReviewRunner,
2491
+ })
2492
+ const result = await engine.run()
2493
+
2494
+ expect(result.status).toBe('done')
2495
+ expect(mockReviewRunner).not.toHaveBeenCalled()
2496
+
2497
+ const store = createConvoyStore(dbPath)
2498
+ const tasks = store.getTasksByConvoy(result.convoyId)
2499
+ store.close()
2500
+
2501
+ expect(tasks[0].review_verdict).toBe('pass')
2502
+ expect(tasks[0].review_level).toBe('fast')
2503
+ })
2504
+ })
2505
+
2506
+ // ── Drift detection ───────────────────────────────────────────────────────────
2507
+
2508
+ describe('drift detection', () => {
2509
+ let adapter: ReturnType<typeof makeAdapter>
2510
+ let wtManager: ReturnType<typeof makeWorktreeManager>
2511
+ let mergeQueue: ReturnType<typeof makeMergeQueue>
2512
+
2513
+ beforeEach(() => {
2514
+ adapter = makeAdapter('copilot')
2515
+ wtManager = makeWorktreeManager()
2516
+ mergeQueue = makeMergeQueue()
2517
+ })
2518
+
2519
+ it('detect_drift=true triggers drift check and retries on low confidence', async () => {
2520
+ // Call sequence: main task → drift check (low score) → main task retry
2521
+ adapter.execute
2522
+ .mockResolvedValueOnce({ success: true, output: 'done', exitCode: 0 })
2523
+ .mockResolvedValueOnce({ success: true, output: '{"score": 0.3, "explanation": "uncertain"}', exitCode: 0 })
2524
+ .mockResolvedValueOnce({ success: true, output: 'done retry', exitCode: 0 })
2525
+
2526
+ const engine = makeEngine({
2527
+ spec: makeSpec({ defaults: { detect_drift: true } }, [{ id: 'task-1', max_retries: 1 }]),
2528
+ specYaml: 'name: test',
2529
+ adapter,
2530
+ dbPath,
2531
+ _worktreeManager: wtManager,
2532
+ _mergeQueue: mergeQueue,
2533
+ })
2534
+ const result = await engine.run()
2535
+
2536
+ expect(result.status).toBe('done')
2537
+ expect(result.summary.done).toBe(1)
2538
+ expect(adapter.execute).toHaveBeenCalledTimes(3)
2539
+
2540
+ // Verify drift_score and drift_retried stored
2541
+ const store = createConvoyStore(dbPath)
2542
+ const tasks = store.getTasksByConvoy(result.convoyId)
2543
+ store.close()
2544
+ expect(tasks[0].drift_score).toBe(0.3)
2545
+ expect(tasks[0].drift_retried).toBe(1)
2546
+ })
2547
+
2548
+ it('detect_drift=true does NOT re-check on drift retry (drift_retried=1)', async () => {
2549
+ // On second execution drift_retried=1 so no third call for drift check
2550
+ adapter.execute
2551
+ .mockResolvedValueOnce({ success: true, output: 'done', exitCode: 0 })
2552
+ .mockResolvedValueOnce({ success: true, output: '{"score": 0.9, "explanation": "confident"}', exitCode: 0 })
2553
+
2554
+ const engine = makeEngine({
2555
+ spec: makeSpec({ defaults: { detect_drift: true } }),
2556
+ specYaml: 'name: test',
2557
+ adapter,
2558
+ dbPath,
2559
+ _worktreeManager: wtManager,
2560
+ _mergeQueue: mergeQueue,
2561
+ })
2562
+ const result = await engine.run()
2563
+
2564
+ expect(result.status).toBe('done')
2565
+ expect(adapter.execute).toHaveBeenCalledTimes(2)
2566
+ })
2567
+
2568
+ it('drift_check_result and drift_detected events emitted when drifted', async () => {
2569
+ adapter.execute
2570
+ .mockResolvedValueOnce({ success: true, output: 'done', exitCode: 0 })
2571
+ .mockResolvedValueOnce({ success: true, output: '{"score": 0.2, "explanation": "very unsure"}', exitCode: 0 })
2572
+ .mockResolvedValueOnce({ success: true, output: 'done', exitCode: 0 })
2573
+
2574
+ const engine = makeEngine({
2575
+ spec: makeSpec({ defaults: { detect_drift: true } }, [{ id: 'task-1', max_retries: 1 }]),
2576
+ specYaml: 'name: test',
2577
+ adapter,
2578
+ dbPath,
2579
+ _worktreeManager: wtManager,
2580
+ _mergeQueue: mergeQueue,
2581
+ })
2582
+ const result = await engine.run()
2583
+
2584
+ const store = createConvoyStore(dbPath)
2585
+ const events = store.getEvents(result.convoyId)
2586
+ store.close()
2587
+
2588
+ expect(events.some(e => e.type === 'drift_check_result')).toBe(true)
2589
+ expect(events.some(e => e.type === 'drift_detected')).toBe(true)
2590
+ })
2591
+
2592
+ it('non-copilot adapter skips drift detection (returns done without extra call)', async () => {
2593
+ // adapter name is 'test-adapter' — not a streaming adapter; drift check should be skipped
2594
+ const nonStreamingAdapter = makeAdapter('test-adapter')
2595
+ nonStreamingAdapter.execute.mockResolvedValue({ success: true, output: 'ok', exitCode: 0 })
2596
+
2597
+ // Suppress the stderr warning
2598
+ const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation(() => true)
2599
+ try {
2600
+ const engine = makeEngine({
2601
+ spec: makeSpec({ defaults: { detect_drift: true } }),
2602
+ specYaml: 'name: test',
2603
+ adapter: nonStreamingAdapter,
2604
+ dbPath,
2605
+ _worktreeManager: wtManager,
2606
+ _mergeQueue: mergeQueue,
2607
+ })
2608
+ const result = await engine.run()
2609
+ expect(result.status).toBe('done')
2610
+ // Only 1 call: main task (no drift check call) because non-streaming adapter
2611
+ expect(nonStreamingAdapter.execute).toHaveBeenCalledTimes(1)
2612
+ } finally {
2613
+ stderrSpy.mockRestore()
2614
+ }
2615
+ })
2616
+ })
2617
+
2618
+ // ── Dispute protocol ──────────────────────────────────────────────────────────
2619
+
2620
+ describe('dispute protocol', () => {
2621
+ let adapter: ReturnType<typeof makeAdapter>
2622
+ let wtManager: ReturnType<typeof makeWorktreeManager>
2623
+ let mergeQueue: ReturnType<typeof makeMergeQueue>
2624
+
2625
+ beforeEach(() => {
2626
+ adapter = makeAdapter()
2627
+ wtManager = makeWorktreeManager()
2628
+ mergeQueue = makeMergeQueue()
2629
+ })
2630
+
2631
+ it('3 panel blocks mark task as disputed', async () => {
2632
+ // Each round: 3 calls to panel runner (all block) → retry until max_retries
2633
+ // 3 panel blocks with max_retries=3 → 3 panel rounds → after 3rd: panel_attempts=3 → disputed
2634
+ let panelCall = 0
2635
+ const mockReviewRunner = vi.fn().mockImplementation(() => {
2636
+ panelCall++
2637
+ return Promise.resolve({ verdict: 'block', feedback: 'critical bug', tokens: 10, model: 'r' })
2638
+ })
2639
+
2640
+ const engine = makeEngine({
2641
+ spec: makeSpec({ defaults: { review: 'panel' } }, [{ id: 'task-1', max_retries: 3 }]),
2642
+ specYaml: 'name: test',
2643
+ adapter,
2644
+ dbPath,
2645
+ _worktreeManager: wtManager,
2646
+ _mergeQueue: mergeQueue,
2647
+ _reviewRunner: mockReviewRunner,
2648
+ })
2649
+ const result = await engine.run()
2650
+
2651
+ const store = createConvoyStore(dbPath)
2652
+ const tasks = store.getTasksByConvoy(result.convoyId)
2653
+ store.close()
2654
+
2655
+ expect(tasks[0].status).toBe('disputed')
2656
+ expect(tasks[0].dispute_id).not.toBeNull()
2657
+ expect(result.summary.failed).toBe(1) // disputed counts as failed in summary
2658
+ })
2659
+
2660
+ it('dispute_opened event emitted after 3 panel blocks', async () => {
2661
+ const mockReviewRunner = vi.fn().mockResolvedValue({ verdict: 'block', feedback: 'bug', tokens: 5, model: 'r' })
2662
+
2663
+ const engine = makeEngine({
2664
+ spec: makeSpec({ defaults: { review: 'panel' } }, [{ id: 'task-1', max_retries: 3 }]),
2665
+ specYaml: 'name: test',
2666
+ adapter,
2667
+ dbPath,
2668
+ _worktreeManager: wtManager,
2669
+ _mergeQueue: mergeQueue,
2670
+ _reviewRunner: mockReviewRunner,
2671
+ })
2672
+ const result = await engine.run()
2673
+
2674
+ const store = createConvoyStore(dbPath)
2675
+ const events = store.getEvents(result.convoyId)
2676
+ store.close()
2677
+
2678
+ const disputeEvent = events.find(e => e.type === 'dispute_opened')
2679
+ expect(disputeEvent).toBeDefined()
2680
+ const data = JSON.parse(disputeEvent!.data!) as Record<string, unknown>
2681
+ expect(data.task_id).toBe('task-1')
2682
+ expect(data.panel_attempts).toBe(3)
2683
+ })
2684
+
2685
+ it('on_dispute: stop halts all pending tasks', async () => {
2686
+ const mockReviewRunner = vi.fn().mockResolvedValue({ verdict: 'block', feedback: 'bug', tokens: 5, model: 'r' })
2687
+
2688
+ const engine = makeEngine({
2689
+ spec: makeSpec(
2690
+ { defaults: { review: 'panel', on_dispute: 'stop' } },
2691
+ [
2692
+ { id: 'task-1', depends_on: [], max_retries: 3 },
2693
+ { id: 'task-2', depends_on: ['task-1'] }, // depends on task-1, so queued after
2694
+ ],
2695
+ ),
2696
+ specYaml: 'name: test',
2697
+ adapter,
2698
+ dbPath,
2699
+ _worktreeManager: wtManager,
2700
+ _mergeQueue: mergeQueue,
2701
+ _reviewRunner: mockReviewRunner,
2702
+ })
2703
+ const result = await engine.run()
2704
+
2705
+ const store = createConvoyStore(dbPath)
2706
+ const tasks = store.getTasksByConvoy(result.convoyId)
2707
+ store.close()
2708
+ const byId = Object.fromEntries(tasks.map(t => [t.id, t.status]))
2709
+ expect(byId['task-1']).toBe('disputed')
2710
+ expect(byId['task-2']).toBe('skipped')
2711
+ })
2712
+
2713
+ it('on_dispute: continue keeps other tasks running', async () => {
2714
+ // task-1 always fails panel (will be disputed), task-2 succeeds
2715
+ adapter.execute.mockResolvedValue({ success: true, output: 'ok', exitCode: 0 })
2716
+ const mockReviewRunner = vi.fn().mockImplementation((_task: TaskRecord) => {
2717
+ if (_task.id === 'task-1') {
2718
+ return Promise.resolve({ verdict: 'block', feedback: 'bug', tokens: 5, model: 'r' })
2719
+ }
2720
+ return Promise.resolve({ verdict: 'pass', feedback: '', tokens: 5, model: 'r' })
2721
+ })
2722
+
2723
+ const engine = makeEngine({
2724
+ spec: makeSpec(
2725
+ { defaults: { review: 'panel', on_dispute: 'continue' } },
2726
+ [
2727
+ { id: 'task-1', depends_on: [], max_retries: 3 },
2728
+ { id: 'task-2', depends_on: [] },
2729
+ ],
2730
+ ),
2731
+ specYaml: 'name: test',
2732
+ adapter,
2733
+ dbPath,
2734
+ _worktreeManager: wtManager,
2735
+ _mergeQueue: mergeQueue,
2736
+ _reviewRunner: mockReviewRunner,
2737
+ })
2738
+ const result = await engine.run()
2739
+
2740
+ const store = createConvoyStore(dbPath)
2741
+ const tasks = store.getTasksByConvoy(result.convoyId)
2742
+ store.close()
2743
+ const byId = Object.fromEntries(tasks.map(t => [t.id, t.status]))
2744
+ expect(byId['task-1']).toBe('disputed')
2745
+ expect(byId['task-2']).toBe('done')
2746
+ })
2747
+ })
2748
+
2749
+ // ── File-based injection ───────────────────────────────────────────────────
2750
+
2751
+ describe('file-based injection', () => {
2752
+ it('picks up tasks from inject file and ingests them', async () => {
2753
+ const adapter = makeAdapter()
2754
+ adapter.execute.mockResolvedValue({ success: true, output: 'ok', exitCode: 0 })
2755
+
2756
+ const spec = makeSpec({ concurrency: 1 }, [
2757
+ { id: 'task-1', prompt: 'Original task', timeout: '5s' },
2758
+ ])
2759
+
2760
+ const engine = makeEngine({
2761
+ spec,
2762
+ specYaml: 'name: test',
2763
+ adapter,
2764
+ dbPath,
2765
+ basePath: tmpDir,
2766
+ _worktreeManager: makeWorktreeManager(),
2767
+ _mergeQueue: makeMergeQueue(),
2768
+ })
2769
+
2770
+ const result = await engine.run()
2771
+ expect(result.summary.done).toBeGreaterThanOrEqual(1)
2772
+ })
2773
+
2774
+ it('respects convoy_id path traversal guard', async () => {
2775
+ const adapter = makeAdapter()
2776
+ const spec = makeSpec()
2777
+
2778
+ const engine = makeEngine({
2779
+ spec,
2780
+ specYaml: 'name: test',
2781
+ adapter,
2782
+ dbPath,
2783
+ basePath: tmpDir,
2784
+ _worktreeManager: makeWorktreeManager(),
2785
+ _mergeQueue: makeMergeQueue(),
2786
+ })
2787
+
2788
+ const result = await engine.run()
2789
+ expect(result.status).toBe('done')
2790
+ })
2791
+ })
2792
+
2793
+ describe('NDJSON recovery', () => {
2794
+ it('truncates partial trailing line in NDJSON file', () => {
2795
+ const convoyId = 'convoy-ndjson-1'
2796
+ const ndjsonPath = join(tmpDir, 'recover-partial.ndjson')
2797
+ const firstLine = JSON.stringify({ _event_id: 1, convoy_id: convoyId, type: 'task_started' })
2798
+ writeFileSync(ndjsonPath, `${firstLine}\n{"_event_id":2`, 'utf8')
2799
+
2800
+ const mockStore = {
2801
+ getEvents: vi.fn().mockReturnValue([]),
2802
+ }
2803
+
2804
+ recoverNdjson(mockStore as unknown as ReturnType<typeof createConvoyStore>, convoyId, ndjsonPath)
2805
+
2806
+ const content = readFileSync(ndjsonPath, 'utf8')
2807
+ expect(content).toBe(`${firstLine}\n`)
2808
+ })
2809
+
2810
+ it('replays SQLite events missing from NDJSON file', () => {
2811
+ const convoyId = 'convoy-ndjson-2'
2812
+ const ndjsonPath = join(tmpDir, 'recover-replay.ndjson')
2813
+ writeFileSync(
2814
+ ndjsonPath,
2815
+ `${JSON.stringify({ _event_id: 1, convoy_id: convoyId, type: 'task_started' })}\n`,
2816
+ 'utf8',
2817
+ )
2818
+
2819
+ const mockStore = {
2820
+ getEvents: vi.fn().mockReturnValue([
2821
+ {
2822
+ id: 1,
2823
+ type: 'task_started',
2824
+ convoy_id: convoyId,
2825
+ task_id: 'task-1',
2826
+ worker_id: null,
2827
+ data: JSON.stringify({ phase: 0 }),
2828
+ created_at: '2026-03-11T10:00:00.000Z',
2829
+ },
2830
+ {
2831
+ id: 2,
2832
+ type: 'task_finished',
2833
+ convoy_id: convoyId,
2834
+ task_id: 'task-1',
2835
+ worker_id: null,
2836
+ data: JSON.stringify({ success: true }),
2837
+ created_at: '2026-03-11T10:00:01.000Z',
2838
+ },
2839
+ ]),
2840
+ }
2841
+
2842
+ recoverNdjson(mockStore as unknown as ReturnType<typeof createConvoyStore>, convoyId, ndjsonPath)
2843
+
2844
+ const lines = readFileSync(ndjsonPath, 'utf8').trim().split('\n').map((line) => JSON.parse(line) as Record<string, unknown>)
2845
+ const eventIds = lines.map((line) => line._event_id)
2846
+ expect(eventIds).toEqual([1, 2])
2847
+ })
2848
+
2849
+ it('does not let event.data override canonical fields', () => {
2850
+ const convoyId = 'convoy-ndjson-canonical'
2851
+ const ndjsonPath = join(tmpDir, 'recover-canonical.ndjson')
2852
+ writeFileSync(ndjsonPath, '', 'utf8')
2853
+
2854
+ const mockStore = {
2855
+ getEvents: vi.fn().mockReturnValue([
2856
+ {
2857
+ id: 99,
2858
+ type: 'task_started',
2859
+ convoy_id: convoyId,
2860
+ task_id: 'task-legit',
2861
+ worker_id: 'w1',
2862
+ data: JSON.stringify({
2863
+ _event_id: 'EVIL',
2864
+ convoy_id: 'EVIL-CONVOY',
2865
+ task_id: 'EVIL-TASK',
2866
+ type: 'EVIL-TYPE',
2867
+ timestamp: 'EVIL-TIME',
2868
+ worker_id: 'EVIL-WORKER',
2869
+ safe_field: 'this-is-fine',
2870
+ }),
2871
+ created_at: '2026-03-11T10:00:00.000Z',
2872
+ },
2873
+ ]),
2874
+ }
2875
+
2876
+ recoverNdjson(mockStore as unknown as ReturnType<typeof createConvoyStore>, convoyId, ndjsonPath)
2877
+
2878
+ const lines = readFileSync(ndjsonPath, 'utf8').trim().split('\n')
2879
+ expect(lines).toHaveLength(1)
2880
+ const parsed = JSON.parse(lines[0]) as Record<string, unknown>
2881
+ expect(parsed._event_id).toBe(99)
2882
+ expect(parsed.convoy_id).toBe(convoyId)
2883
+ expect(parsed.task_id).toBe('task-legit')
2884
+ expect(parsed.type).toBe('task_started')
2885
+ expect(parsed.worker_id).toBe('w1')
2886
+ expect(parsed.timestamp).toBe('2026-03-11T10:00:00.000Z')
2887
+ expect(parsed.safe_field).toBe('this-is-fine')
2888
+ })
2889
+ })
2890
+
2891
+ describe('runConvoyGuard', () => {
2892
+ it('returns passed: false when non-terminal tasks exist', () => {
2893
+ const guardConvoyId = 'convoy-guard-1'
2894
+ const guardStore = createConvoyStore(dbPath)
2895
+ guardStore.insertConvoy({
2896
+ id: guardConvoyId,
2897
+ name: 'Guard test',
2898
+ spec_hash: 'hash',
2899
+ spec_yaml: 'name: guard test',
2900
+ status: 'running',
2901
+ branch: null,
2902
+ created_at: new Date().toISOString(),
2903
+ })
2904
+ guardStore.insertTask({
2905
+ id: 'task-guard-1',
2906
+ convoy_id: guardConvoyId,
2907
+ phase: 0,
2908
+ prompt: 'test',
2909
+ agent: 'developer',
2910
+ adapter: null,
2911
+ model: null,
2912
+ timeout_ms: 60000,
2913
+ status: 'running',
2914
+ retries: 0,
2915
+ max_retries: 1,
2916
+ files: null,
2917
+ depends_on: null,
2918
+ gates: null,
2919
+ })
2920
+
2921
+ const ndjsonPathGuard = join(tmpDir, 'guard-test.ndjson')
2922
+ writeFileSync(ndjsonPathGuard, '')
2923
+ const wtManager = makeWorktreeManager()
2924
+ const result = runConvoyGuard(guardStore, guardConvoyId, wtManager, ndjsonPathGuard)
2925
+ expect(result.passed).toBe(false)
2926
+ expect(result.warnings.length).toBeGreaterThan(0)
2927
+ guardStore.close()
2928
+ })
2929
+
2930
+ it('returns passed: true when all tasks are terminal', () => {
2931
+ const guardConvoyId2 = 'convoy-guard-2'
2932
+ const guardStore2 = createConvoyStore(dbPath)
2933
+ guardStore2.insertConvoy({
2934
+ id: guardConvoyId2,
2935
+ name: 'Guard test ok',
2936
+ spec_hash: 'hash',
2937
+ spec_yaml: 'name: guard test ok',
2938
+ status: 'done',
2939
+ branch: null,
2940
+ created_at: new Date().toISOString(),
2941
+ })
2942
+ guardStore2.insertTask({
2943
+ id: 'task-guard-2',
2944
+ convoy_id: guardConvoyId2,
2945
+ phase: 0,
2946
+ prompt: 'test',
2947
+ agent: 'developer',
2948
+ adapter: null,
2949
+ model: null,
2950
+ timeout_ms: 60000,
2951
+ status: 'done',
2952
+ retries: 0,
2953
+ max_retries: 1,
2954
+ files: null,
2955
+ depends_on: null,
2956
+ gates: null,
2957
+ })
2958
+
2959
+ const ndjsonPathGuard2 = join(tmpDir, 'guard-pass.ndjson')
2960
+ writeFileSync(ndjsonPathGuard2, JSON.stringify({ _event_id: 1, convoy_id: guardConvoyId2, type: 'task_done' }) + '\n')
2961
+ const wtManager2 = makeWorktreeManager()
2962
+ const result2 = runConvoyGuard(guardStore2, guardConvoyId2, wtManager2, ndjsonPathGuard2)
2963
+ expect(result2.passed).toBe(true)
2964
+ guardStore2.close()
2965
+ })
2966
+ })
2967
+
2968
+ describe('injectTask partition validation', () => {
2969
+ it('rejects injected tasks with normalized path overlap', () => {
2970
+ const symlinkSpy = vi.spyOn(partition, 'scanSymlinks').mockImplementation(() => {})
2971
+
2972
+ const convoyId = 'convoy-inject-overlap-1'
2973
+ const seedStore = createConvoyStore(dbPath)
2974
+ seedStore.insertConvoy({
2975
+ id: convoyId,
2976
+ name: 'Inject overlap test',
2977
+ spec_hash: 'hash-1',
2978
+ status: 'pending',
2979
+ branch: null,
2980
+ created_at: new Date().toISOString(),
2981
+ spec_yaml: 'name: inject-overlap',
2982
+ pipeline_id: null,
2983
+ })
2984
+ seedStore.insertTask({
2985
+ id: 'task-owner',
2986
+ convoy_id: convoyId,
2987
+ phase: 0,
2988
+ prompt: 'Owns auth partition',
2989
+ agent: 'developer',
2990
+ adapter: null,
2991
+ model: null,
2992
+ timeout_ms: 30_000,
2993
+ status: 'pending',
2994
+ retries: 0,
2995
+ max_retries: 1,
2996
+ files: JSON.stringify(['src/auth/']),
2997
+ depends_on: null,
2998
+ gates: null,
2999
+ })
3000
+ seedStore.close()
3001
+
3002
+ const engine = makeEngine({
3003
+ spec: makeSpec(),
3004
+ specYaml: 'name: inject-overlap',
3005
+ adapter: makeAdapter(),
3006
+ dbPath,
3007
+ basePath: tmpDir,
3008
+ _worktreeManager: makeWorktreeManager(),
3009
+ _mergeQueue: makeMergeQueue(),
3010
+ })
3011
+
3012
+ try {
3013
+ expect(() => engine.injectTask(convoyId, {
3014
+ id: 'task-injected',
3015
+ prompt: 'Injected overlap task',
3016
+ agent: 'developer',
3017
+ phase: 0,
3018
+ files: ['src/auth/service.ts'],
3019
+ })).toThrow(/File partition overlap/i)
3020
+ } finally {
3021
+ symlinkSpy.mockRestore()
3022
+ }
3023
+ })
3024
+
3025
+ it('rejects injected task with unnormalized paths that overlap', () => {
3026
+ const symlinkSpy = vi.spyOn(partition, 'scanSymlinks').mockImplementation(() => {})
3027
+
3028
+ const convoyId = 'convoy-inject-overlap-2'
3029
+ const seedStore = createConvoyStore(dbPath)
3030
+ seedStore.insertConvoy({
3031
+ id: convoyId,
3032
+ name: 'Inject overlap test 2',
3033
+ spec_hash: 'hash-2',
3034
+ status: 'pending',
3035
+ branch: null,
3036
+ created_at: new Date().toISOString(),
3037
+ spec_yaml: 'name: inject-overlap-2',
3038
+ pipeline_id: null,
3039
+ })
3040
+ seedStore.insertTask({
3041
+ id: 'task-owner',
3042
+ convoy_id: convoyId,
3043
+ phase: 0,
3044
+ prompt: 'Owns auth partition',
3045
+ agent: 'developer',
3046
+ adapter: null,
3047
+ model: null,
3048
+ timeout_ms: 30_000,
3049
+ status: 'pending',
3050
+ retries: 0,
3051
+ max_retries: 1,
3052
+ files: JSON.stringify(['src/auth/']),
3053
+ depends_on: null,
3054
+ gates: null,
3055
+ })
3056
+ seedStore.close()
3057
+
3058
+ const engine = makeEngine({
3059
+ spec: makeSpec(),
3060
+ specYaml: 'name: inject-overlap-2',
3061
+ adapter: makeAdapter(),
3062
+ dbPath,
3063
+ basePath: tmpDir,
3064
+ _worktreeManager: makeWorktreeManager(),
3065
+ _mergeQueue: makeMergeQueue(),
3066
+ })
3067
+
3068
+ try {
3069
+ expect(() => engine.injectTask(convoyId, {
3070
+ id: 'task-injected-dot-path',
3071
+ prompt: 'Injected overlap task',
3072
+ agent: 'developer',
3073
+ phase: 0,
3074
+ files: ['./src/auth/service.ts'],
3075
+ })).toThrow(/File partition overlap/i)
3076
+ } finally {
3077
+ symlinkSpy.mockRestore()
3078
+ }
3079
+ })
3080
+ })
3081
+
3082
+ // ── Swarm mode ─────────────────────────────────────────────────────────────
3083
+
3084
+ describe('swarm mode (concurrency: auto)', () => {
3085
+ it('runs all tasks with auto concurrency', async () => {
3086
+ const adapter = makeAdapter()
3087
+ const spec = makeSpec(
3088
+ { concurrency: 'auto' as unknown as number },
3089
+ [
3090
+ { id: 'task-1', prompt: 'First' },
3091
+ { id: 'task-2', prompt: 'Second' },
3092
+ { id: 'task-3', prompt: 'Third' },
3093
+ ],
3094
+ )
3095
+
3096
+ const engine = makeEngine({
3097
+ spec,
3098
+ specYaml: 'name: test',
3099
+ adapter,
3100
+ dbPath,
3101
+ _worktreeManager: makeWorktreeManager(),
3102
+ _mergeQueue: makeMergeQueue(),
3103
+ })
3104
+
3105
+ const result = await engine.run()
3106
+ expect(result.status).toBe('done')
3107
+ expect(result.summary.done).toBe(3)
3108
+ expect(result.summary.total).toBe(3)
3109
+ })
3110
+
3111
+ it('respects max_swarm_concurrency from defaults', async () => {
3112
+ const adapter = makeAdapter()
3113
+ let maxConcurrent = 0
3114
+ let currentConcurrent = 0
3115
+
3116
+ adapter.execute.mockImplementation(async () => {
3117
+ currentConcurrent++
3118
+ if (currentConcurrent > maxConcurrent) maxConcurrent = currentConcurrent
3119
+ await new Promise(resolve => setTimeout(resolve, 50))
3120
+ currentConcurrent--
3121
+ return { success: true, output: 'ok', exitCode: 0 }
3122
+ })
3123
+
3124
+ const spec = makeSpec(
3125
+ {
3126
+ concurrency: 'auto' as unknown as number,
3127
+ defaults: { max_swarm_concurrency: 2 },
3128
+ },
3129
+ [
3130
+ { id: 'task-1', prompt: 'T1' },
3131
+ { id: 'task-2', prompt: 'T2' },
3132
+ { id: 'task-3', prompt: 'T3' },
3133
+ { id: 'task-4', prompt: 'T4' },
3134
+ ],
3135
+ )
3136
+
3137
+ const engine = makeEngine({
3138
+ spec,
3139
+ specYaml: 'name: test',
3140
+ adapter,
3141
+ dbPath,
3142
+ _worktreeManager: makeWorktreeManager(),
3143
+ _mergeQueue: makeMergeQueue(),
3144
+ })
3145
+
3146
+ const result = await engine.run()
3147
+ expect(result.status).toBe('done')
3148
+ expect(result.summary.done).toBe(4)
3149
+ expect(maxConcurrent).toBeLessThanOrEqual(2)
3150
+ })
3151
+
3152
+ it('defaults max_swarm_concurrency to 8', async () => {
3153
+ const adapter = makeAdapter()
3154
+
3155
+ const spec = makeSpec(
3156
+ { concurrency: 'auto' as unknown as number },
3157
+ Array.from({ length: 10 }, (_, i) => ({
3158
+ id: `task-${i + 1}`,
3159
+ prompt: `Task ${i + 1}`,
3160
+ })),
3161
+ )
3162
+
3163
+ const engine = makeEngine({
3164
+ spec,
3165
+ specYaml: 'name: test',
3166
+ adapter,
3167
+ dbPath,
3168
+ _worktreeManager: makeWorktreeManager(),
3169
+ _mergeQueue: makeMergeQueue(),
3170
+ })
3171
+
3172
+ const result = await engine.run()
3173
+ expect(result.status).toBe('done')
3174
+ expect(result.summary.done).toBe(10)
3175
+ })
3176
+ })
3177
+
3178
+ // ── Step retry context prepending ───────────────────────────────────────────
3179
+
3180
+ describe('step retry context prepending', () => {
3181
+ it('prepends prior failure output to the prompt on step retry', async () => {
3182
+ const adapter = makeAdapter()
3183
+ const capturedPrompts: string[] = []
3184
+
3185
+ adapter.execute.mockImplementation(async (task: { prompt: string }) => {
3186
+ capturedPrompts.push(task.prompt)
3187
+ if (capturedPrompts.length === 1) {
3188
+ return { success: false, output: 'step error detail', exitCode: 2 }
3189
+ }
3190
+ return { success: true, output: 'ok', exitCode: 0 }
3191
+ })
3192
+
3193
+ const spec = makeSpec({}, [
3194
+ {
3195
+ id: 'task-1',
3196
+ prompt: 'original task prompt',
3197
+ max_retries: 0,
3198
+ steps: [{ prompt: 'step prompt text', max_retries: 1 }],
3199
+ },
3200
+ ])
3201
+
3202
+ const engine = makeEngine({
3203
+ spec,
3204
+ specYaml: 'name: test',
3205
+ adapter,
3206
+ dbPath,
3207
+ _worktreeManager: makeWorktreeManager(),
3208
+ _mergeQueue: makeMergeQueue(),
3209
+ })
3210
+
3211
+ await engine.run()
3212
+
3213
+ // First call uses the original step prompt
3214
+ expect(capturedPrompts[0]).toBe('step prompt text')
3215
+ // Second call (retry) prepends failure context
3216
+ expect(capturedPrompts[1]).toContain('Previous attempt failed.')
3217
+ expect(capturedPrompts[1]).toContain('Exit code: 2')
3218
+ expect(capturedPrompts[1]).toContain('step error detail')
3219
+ expect(capturedPrompts[1]).toContain('step prompt text')
3220
+ })
3221
+ })
3222
+
3223
+ // ── Security: symlink scan (issue #2) ─────────────────────────────────────────
3224
+
3225
+ describe('symlink security scan', () => {
3226
+ it('marks task failed when pre-execution scanSymlinks throws', async () => {
3227
+ const scanSpy = vi.spyOn(partition, 'scanSymlinks').mockImplementation(() => {
3228
+ throw new Error('symlink_escape: "evil.ts" is a symlink that resolves outside the partition')
3229
+ })
3230
+
3231
+ try {
3232
+ const adapter = makeAdapter()
3233
+ const spec = makeSpec({}, [{ files: ['src/evil.ts'] }])
3234
+ const engine = makeEngine({
3235
+ spec,
3236
+ specYaml: 'name: test',
3237
+ adapter,
3238
+ dbPath,
3239
+ _worktreeManager: makeWorktreeManager(),
3240
+ _mergeQueue: makeMergeQueue(),
3241
+ })
3242
+
3243
+ const result = await engine.run()
3244
+ expect(result.status).toBe('failed')
3245
+ } finally {
3246
+ scanSpy.mockRestore()
3247
+ }
3248
+ })
3249
+
3250
+ it('succeeds when files is empty (symlink scan skipped)', async () => {
3251
+ const adapter = makeAdapter()
3252
+ const spec = makeSpec({}, [{ files: [] }])
3253
+ const engine = makeEngine({
3254
+ spec,
3255
+ specYaml: 'name: test',
3256
+ adapter,
3257
+ dbPath,
3258
+ _worktreeManager: makeWorktreeManager(),
3259
+ _mergeQueue: makeMergeQueue(),
3260
+ })
3261
+
3262
+ const result = await engine.run()
3263
+ expect(result.status).toBe('done')
3264
+ })
3265
+ })
3266
+
3267
+ // ── Security: ensureBranch fallback (issue #3) ────────────────────────────────
3268
+
3269
+ describe('ensureBranch fallback when _ensureBranch not provided', () => {
3270
+ it('calls the injected _ensureBranch when branch is set in spec', async () => {
3271
+ const branchFn = vi.fn().mockResolvedValue(undefined)
3272
+ const adapter = makeAdapter()
3273
+ const spec = makeSpec({ branch: 'feature-x' })
3274
+ const engine = createConvoyEngine({
3275
+ spec,
3276
+ specYaml: 'name: test',
3277
+ adapter,
3278
+ dbPath,
3279
+ _worktreeManager: makeWorktreeManager(),
3280
+ _mergeQueue: makeMergeQueue(),
3281
+ _ensureBranch: branchFn,
3282
+ })
3283
+
3284
+ await engine.run()
3285
+ expect(branchFn).toHaveBeenCalledWith('feature-x', expect.any(String))
3286
+ })
3287
+
3288
+ it('does not call ensureBranch when spec has no branch', async () => {
3289
+ const branchFn = vi.fn().mockResolvedValue(undefined)
3290
+ const adapter = makeAdapter()
3291
+ const spec = makeSpec({ branch: undefined })
3292
+ const engine = makeEngine({
3293
+ spec,
3294
+ specYaml: 'name: test',
3295
+ adapter,
3296
+ dbPath,
3297
+ _worktreeManager: makeWorktreeManager(),
3298
+ _mergeQueue: makeMergeQueue(),
3299
+ _ensureBranch: branchFn,
3300
+ })
3301
+
3302
+ await engine.run()
3303
+ expect(branchFn).not.toHaveBeenCalled()
3304
+ })
3305
+ })
3306
+
3307
+ // ── Security: secret scan in markdown dual-write (issue #4) ──────────────────
3308
+
3309
+ describe('secret scan in DLQ/dispute markdown write', () => {
3310
+ it('task failure still recorded in DB even if DLQ markdown write is silently skipped', async () => {
3311
+ // The engine marks a task as failed; DLQ markdown write with secret scan
3312
+ // silently skips if secrets detected. The DB record is authoritative.
3313
+ const adapter = makeAdapter()
3314
+ vi.mocked(adapter.execute).mockResolvedValue({ success: false, output: 'error', exitCode: 1 })
3315
+ const spec = makeSpec({}, [{ max_retries: 0 }])
3316
+ const engine = makeEngine({
3317
+ spec,
3318
+ specYaml: 'name: test',
3319
+ adapter,
3320
+ dbPath,
3321
+ _worktreeManager: makeWorktreeManager(),
3322
+ _mergeQueue: makeMergeQueue(),
3323
+ })
3324
+
3325
+ const result = await engine.run()
3326
+ expect(result.status).toBe('failed')
3327
+ expect(result.summary.failed).toBe(1)
3328
+ })
3329
+
3330
+ it('emits secret_leak_prevented when DLQ markdown write detects secrets', async () => {
3331
+ const scanSpy = vi.spyOn(gates, 'scanForSecrets').mockImplementation((content: string, filePath = '') => {
3332
+ if (filePath === 'AGENT-FAILURES.md') {
3333
+ return {
3334
+ clean: false,
3335
+ findings: [{ pattern: 'Mock Secret', file: filePath, line: 1, snippet: content.slice(0, 20) }],
3336
+ }
3337
+ }
3338
+ return { clean: true, findings: [] }
3339
+ })
3340
+
3341
+ try {
3342
+ const adapter = makeAdapter()
3343
+ vi.mocked(adapter.execute).mockResolvedValue({ success: false, output: 'fatal', exitCode: 1 })
3344
+ const spec = makeSpec({}, [{ id: 'task-1', max_retries: 0 }])
3345
+ const engine = makeEngine({
3346
+ spec,
3347
+ specYaml: 'name: secret-dlq',
3348
+ adapter,
3349
+ dbPath,
3350
+ _worktreeManager: makeWorktreeManager(),
3351
+ _mergeQueue: makeMergeQueue(),
3352
+ })
3353
+
3354
+ const result = await engine.run()
3355
+
3356
+ const store = createConvoyStore(dbPath)
3357
+ const events = store.getEvents(result.convoyId)
3358
+ store.close()
3359
+
3360
+ const leakEvent = events.find((event) => event.type === 'secret_leak_prevented')
3361
+ expect(leakEvent).toBeDefined()
3362
+ const data = JSON.parse(leakEvent!.data ?? '{}') as Record<string, unknown>
3363
+ // context changed from 'dlq_markdown_write' to 'dlq_dual_write' (MF-2 atomicity fix)
3364
+ expect(data.context).toBe('dlq_dual_write')
3365
+ } finally {
3366
+ scanSpy.mockRestore()
3367
+ }
3368
+ })
3369
+
3370
+ it('DLQ entry is NOT inserted into SQLite when secret scan blocks (MF-2 atomicity)', async () => {
3371
+ const scanSpy = vi.spyOn(gates, 'scanForSecrets').mockImplementation((content: string, filePath = '') => {
3372
+ if (filePath === 'AGENT-FAILURES.md') {
3373
+ return {
3374
+ clean: false,
3375
+ findings: [{ pattern: 'Mock Secret', file: filePath, line: 1, snippet: content.slice(0, 20) }],
3376
+ }
3377
+ }
3378
+ return { clean: true, findings: [] }
3379
+ })
3380
+
3381
+ try {
3382
+ const adapter = makeAdapter()
3383
+ vi.mocked(adapter.execute).mockResolvedValue({ success: false, output: 'fatal', exitCode: 1 })
3384
+ const spec = makeSpec({}, [{ id: 'task-dlq-atomic', max_retries: 0 }])
3385
+ const engine = makeEngine({
3386
+ spec,
3387
+ specYaml: 'name: dlq-atomic-test',
3388
+ adapter,
3389
+ dbPath,
3390
+ _worktreeManager: makeWorktreeManager(),
3391
+ _mergeQueue: makeMergeQueue(),
3392
+ })
3393
+
3394
+ const result = await engine.run()
3395
+
3396
+ const s = createConvoyStore(dbPath)
3397
+ const dlqEntries = s.listDlqEntries(result.convoyId)
3398
+ s.close()
3399
+
3400
+ // When scan blocks: SQLite DLQ row must NOT be written (atomic consistency)
3401
+ expect(dlqEntries).toHaveLength(0)
3402
+ } finally {
3403
+ scanSpy.mockRestore()
3404
+ }
3405
+ })
3406
+
3407
+ it('emits secret_leak_prevented when dispute markdown write detects secrets', async () => {
3408
+ const scanSpy = vi.spyOn(gates, 'scanForSecrets').mockImplementation((content: string, filePath = '') => {
3409
+ if (filePath === 'DISPUTES.md') {
3410
+ return {
3411
+ clean: false,
3412
+ findings: [{ pattern: 'Mock Secret', file: filePath, line: 1, snippet: content.slice(0, 20) }],
3413
+ }
3414
+ }
3415
+ return { clean: true, findings: [] }
3416
+ })
3417
+
3418
+ try {
3419
+ const adapter = makeAdapter()
3420
+ vi.mocked(adapter.execute).mockResolvedValue({ success: true, output: 'ok', exitCode: 0 })
3421
+ const mockReviewRunner = vi.fn().mockResolvedValue({ verdict: 'block', feedback: 'secret found', tokens: 5, model: 'r' })
3422
+
3423
+ const engine = makeEngine({
3424
+ spec: makeSpec({ defaults: { review: 'panel' } }, [{ id: 'task-1', max_retries: 3 }]),
3425
+ specYaml: 'name: secret-dispute',
3426
+ adapter,
3427
+ dbPath,
3428
+ _worktreeManager: makeWorktreeManager(),
3429
+ _mergeQueue: makeMergeQueue(),
3430
+ _reviewRunner: mockReviewRunner,
3431
+ })
3432
+
3433
+ const result = await engine.run()
3434
+
3435
+ const store = createConvoyStore(dbPath)
3436
+ const events = store.getEvents(result.convoyId)
3437
+ store.close()
3438
+
3439
+ const leakEvent = events.find((event) => event.type === 'secret_leak_prevented')
3440
+ expect(leakEvent).toBeDefined()
3441
+ const data = JSON.parse(leakEvent!.data ?? '{}') as Record<string, unknown>
3442
+ expect(data.context).toBe('dispute_markdown_write')
3443
+ } finally {
3444
+ scanSpy.mockRestore()
3445
+ }
3446
+ })
3447
+ })
3448
+
3449
+ // ── Security: fileExists path traversal (issue #5) ────────────────────────────
3450
+
3451
+ describe('fileExists step condition path traversal', () => {
3452
+ it('step with fileExists using relative path executes normally when file absent', async () => {
3453
+ const adapter = makeAdapter()
3454
+ const capturedPrompts: string[] = []
3455
+ vi.mocked(adapter.execute).mockImplementation(async (task) => {
3456
+ capturedPrompts.push(task.prompt)
3457
+ return { success: true, output: 'ok', exitCode: 0 }
3458
+ })
3459
+
3460
+ const spec = makeSpec({}, [{
3461
+ steps: [
3462
+ {
3463
+ prompt: 'conditional prompt',
3464
+ if: { step: 'prev', fileExists: { path: 'some-nonexistent-file.txt' } },
3465
+ },
3466
+ {
3467
+ prompt: 'always runs',
3468
+ },
3469
+ ],
3470
+ }])
3471
+
3472
+ const engine = makeEngine({
3473
+ spec,
3474
+ specYaml: 'name: test',
3475
+ adapter,
3476
+ dbPath,
3477
+ _worktreeManager: makeWorktreeManager(),
3478
+ _mergeQueue: makeMergeQueue(),
3479
+ })
3480
+
3481
+ const result = await engine.run()
3482
+ expect(result.status).toBe('done')
3483
+ })
3484
+
3485
+ it('step condition with path traversal attempt does not throw (returns false)', async () => {
3486
+ const adapter = makeAdapter()
3487
+ const spec = makeSpec({}, [{
3488
+ steps: [
3489
+ {
3490
+ prompt: 'should be skipped',
3491
+ if: { step: 'prev', fileExists: { path: '../../../etc/passwd' } },
3492
+ },
3493
+ {
3494
+ prompt: 'safe step',
3495
+ },
3496
+ ],
3497
+ }])
3498
+
3499
+ const engine = makeEngine({
3500
+ spec,
3501
+ specYaml: 'name: test',
3502
+ adapter,
3503
+ dbPath,
3504
+ _worktreeManager: makeWorktreeManager(),
3505
+ _mergeQueue: makeMergeQueue(),
3506
+ })
3507
+
3508
+ const result = await engine.run()
3509
+ // Engine should not crash; traversal step is skipped (fileExists returns false)
3510
+ expect(result.status).toBe('done')
3511
+ })
3512
+ })
3513
+
3514
+ // ── Circuit breaker ───────────────────────────────────────────────────────────
3515
+
3516
+ describe('circuit breaker', () => {
3517
+ it('allows task when no circuit_breaker config is set', async () => {
3518
+ const adapter = makeAdapter()
3519
+ const spec = makeSpec({}, [{}])
3520
+ const engine = makeEngine({
3521
+ spec,
3522
+ specYaml: 'name: test',
3523
+ adapter,
3524
+ dbPath,
3525
+ _worktreeManager: makeWorktreeManager(),
3526
+ _mergeQueue: makeMergeQueue(),
3527
+ })
3528
+ const result = await engine.run()
3529
+ expect(result.status).toBe('done')
3530
+ expect(result.summary.done).toBe(1)
3531
+ expect(adapter.execute).toHaveBeenCalledTimes(1)
3532
+ })
3533
+
3534
+ it('allows task when agent circuit is closed', async () => {
3535
+ const adapter = makeAdapter()
3536
+ const spec = makeSpec({
3537
+ defaults: { circuit_breaker: { threshold: 3, cooldown_ms: 300_000 } },
3538
+ }, [{ id: 'task-ok', agent: 'developer', max_retries: 0 }])
3539
+ const engine = makeEngine({
3540
+ spec,
3541
+ specYaml: 'name: test',
3542
+ adapter,
3543
+ dbPath,
3544
+ _worktreeManager: makeWorktreeManager(),
3545
+ _mergeQueue: makeMergeQueue(),
3546
+ })
3547
+ const result = await engine.run()
3548
+ expect(result.status).toBe('done')
3549
+ expect(adapter.execute).toHaveBeenCalledTimes(1)
3550
+ })
3551
+
3552
+ it('blocks subsequent tasks when circuit trips after threshold failures', async () => {
3553
+ const adapter = makeAdapter()
3554
+ // task-1 fails, task-2 and task-3 should be blocked by open circuit
3555
+ adapter.execute
3556
+ .mockResolvedValueOnce({ success: false, output: 'err', exitCode: 1 })
3557
+ .mockResolvedValue({ success: true, output: 'ok', exitCode: 0 })
3558
+
3559
+ // threshold=2: task-1 failure is recorded twice (failure path + handleExhaustion),
3560
+ // reaching threshold=2 → circuit opens before task-2 and task-3 execute
3561
+ const spec = makeSpec({
3562
+ on_failure: 'continue',
3563
+ defaults: { circuit_breaker: { threshold: 2, cooldown_ms: 999_999_999 } },
3564
+ }, [
3565
+ { id: 'task-1', agent: 'developer', max_retries: 0 },
3566
+ { id: 'task-2', agent: 'developer', max_retries: 0 },
3567
+ { id: 'task-3', agent: 'developer', max_retries: 0 },
3568
+ ])
3569
+ const engine = makeEngine({
3570
+ spec,
3571
+ specYaml: 'name: test',
3572
+ adapter,
3573
+ dbPath,
3574
+ _worktreeManager: makeWorktreeManager(),
3575
+ _mergeQueue: makeMergeQueue(),
3576
+ })
3577
+ const result = await engine.run()
3578
+ // Only task-1 should have hit the adapter (circuit opens after task-1 fails)
3579
+ expect(adapter.execute).toHaveBeenCalledTimes(1)
3580
+ // task-2 and task-3 should be skipped by the circuit breaker
3581
+ expect(result.summary.skipped).toBeGreaterThanOrEqual(2)
3582
+ })
3583
+
3584
+ it('records success and persists closed circuit state to store', async () => {
3585
+ const adapter = makeAdapter()
3586
+ const spec = makeSpec({
3587
+ defaults: { circuit_breaker: { threshold: 3, cooldown_ms: 300_000 } },
3588
+ }, [{ id: 'task-s', agent: 'developer', max_retries: 0 }])
3589
+ const engine = makeEngine({
3590
+ spec,
3591
+ specYaml: 'name: test',
3592
+ adapter,
3593
+ dbPath,
3594
+ _worktreeManager: makeWorktreeManager(),
3595
+ _mergeQueue: makeMergeQueue(),
3596
+ })
3597
+ const result = await engine.run()
3598
+ expect(result.status).toBe('done')
3599
+
3600
+ const store = createConvoyStore(dbPath)
3601
+ const record = store.getLatestConvoy()
3602
+ if (record?.circuit_state) {
3603
+ const state = JSON.parse(record.circuit_state)
3604
+ expect(state.developer?.status ?? 'closed').toBe('closed')
3605
+ }
3606
+ store.close()
3607
+ })
3608
+
3609
+ it('records failure and persists open circuit state to store after threshold', async () => {
3610
+ const adapter = makeAdapter()
3611
+ adapter.execute.mockResolvedValue({ success: false, output: 'err', exitCode: 1 })
3612
+
3613
+ // threshold=2: first failure double-records → count reaches 2 → circuit opens
3614
+ const spec = makeSpec({
3615
+ on_failure: 'continue',
3616
+ defaults: { circuit_breaker: { threshold: 2, cooldown_ms: 999_999_999 } },
3617
+ }, [
3618
+ { id: 'task-f1', agent: 'developer', max_retries: 0 },
3619
+ ])
3620
+ const engine = makeEngine({
3621
+ spec,
3622
+ specYaml: 'name: test',
3623
+ adapter,
3624
+ dbPath,
3625
+ _worktreeManager: makeWorktreeManager(),
3626
+ _mergeQueue: makeMergeQueue(),
3627
+ })
3628
+ await engine.run()
3629
+
3630
+ const store = createConvoyStore(dbPath)
3631
+ const record = store.getLatestConvoy()
3632
+ expect(record?.circuit_state).not.toBeNull()
3633
+ if (record?.circuit_state) {
3634
+ const state = JSON.parse(record.circuit_state)
3635
+ expect(state.developer?.status).toBe('open')
3636
+ }
3637
+ store.close()
3638
+ })
3639
+
3640
+ it('circuit state is persisted to the store after a successful task', async () => {
3641
+ const adapter = makeAdapter()
3642
+ const spec = makeSpec({
3643
+ defaults: { circuit_breaker: { threshold: 2, cooldown_ms: 60_000 } },
3644
+ }, [{ id: 'task-persist', agent: 'developer', max_retries: 0 }])
3645
+ const engine = makeEngine({
3646
+ spec,
3647
+ specYaml: 'name: test',
3648
+ adapter,
3649
+ dbPath,
3650
+ _worktreeManager: makeWorktreeManager(),
3651
+ _mergeQueue: makeMergeQueue(),
3652
+ })
3653
+ await engine.run()
3654
+
3655
+ const store = createConvoyStore(dbPath)
3656
+ const record = store.getLatestConvoy()
3657
+ expect(record?.circuit_state).not.toBeNull()
3658
+ store.close()
3659
+ })
3660
+ })