opencastle 0.31.7 → 0.32.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. package/README.md +4 -1
  2. package/bin/cli.mjs +15 -0
  3. package/dist/cli/agents.d.ts.map +1 -1
  4. package/dist/cli/agents.js +19 -5
  5. package/dist/cli/agents.js.map +1 -1
  6. package/dist/cli/artifacts-cli.d.ts +3 -0
  7. package/dist/cli/artifacts-cli.d.ts.map +1 -0
  8. package/dist/cli/artifacts-cli.js +36 -0
  9. package/dist/cli/artifacts-cli.js.map +1 -0
  10. package/dist/cli/baselines.d.ts.map +1 -1
  11. package/dist/cli/baselines.js +11 -0
  12. package/dist/cli/baselines.js.map +1 -1
  13. package/dist/cli/convoy/artifacts.d.ts +25 -0
  14. package/dist/cli/convoy/artifacts.d.ts.map +1 -0
  15. package/dist/cli/convoy/artifacts.js +129 -0
  16. package/dist/cli/convoy/artifacts.js.map +1 -0
  17. package/dist/cli/convoy/artifacts.test.d.ts +2 -0
  18. package/dist/cli/convoy/artifacts.test.d.ts.map +1 -0
  19. package/dist/cli/convoy/artifacts.test.js +169 -0
  20. package/dist/cli/convoy/artifacts.test.js.map +1 -0
  21. package/dist/cli/convoy/compaction.d.ts +23 -0
  22. package/dist/cli/convoy/compaction.d.ts.map +1 -0
  23. package/dist/cli/convoy/compaction.js +117 -0
  24. package/dist/cli/convoy/compaction.js.map +1 -0
  25. package/dist/cli/convoy/compaction.test.d.ts +2 -0
  26. package/dist/cli/convoy/compaction.test.d.ts.map +1 -0
  27. package/dist/cli/convoy/compaction.test.js +205 -0
  28. package/dist/cli/convoy/compaction.test.js.map +1 -0
  29. package/dist/cli/convoy/contracts.d.ts +22 -0
  30. package/dist/cli/convoy/contracts.d.ts.map +1 -0
  31. package/dist/cli/convoy/contracts.js +254 -0
  32. package/dist/cli/convoy/contracts.js.map +1 -0
  33. package/dist/cli/convoy/contracts.test.d.ts +2 -0
  34. package/dist/cli/convoy/contracts.test.d.ts.map +1 -0
  35. package/dist/cli/convoy/contracts.test.js +239 -0
  36. package/dist/cli/convoy/contracts.test.js.map +1 -0
  37. package/dist/cli/convoy/dag-analysis.d.ts +40 -0
  38. package/dist/cli/convoy/dag-analysis.d.ts.map +1 -0
  39. package/dist/cli/convoy/dag-analysis.js +282 -0
  40. package/dist/cli/convoy/dag-analysis.js.map +1 -0
  41. package/dist/cli/convoy/dag-analysis.test.d.ts +2 -0
  42. package/dist/cli/convoy/dag-analysis.test.d.ts.map +1 -0
  43. package/dist/cli/convoy/dag-analysis.test.js +289 -0
  44. package/dist/cli/convoy/dag-analysis.test.js.map +1 -0
  45. package/dist/cli/convoy/effort-scaling.d.ts +20 -0
  46. package/dist/cli/convoy/effort-scaling.d.ts.map +1 -0
  47. package/dist/cli/convoy/effort-scaling.js +82 -0
  48. package/dist/cli/convoy/effort-scaling.js.map +1 -0
  49. package/dist/cli/convoy/effort-scaling.test.d.ts +2 -0
  50. package/dist/cli/convoy/effort-scaling.test.d.ts.map +1 -0
  51. package/dist/cli/convoy/effort-scaling.test.js +120 -0
  52. package/dist/cli/convoy/effort-scaling.test.js.map +1 -0
  53. package/dist/cli/convoy/engine.d.ts.map +1 -1
  54. package/dist/cli/convoy/engine.js +280 -6
  55. package/dist/cli/convoy/engine.js.map +1 -1
  56. package/dist/cli/convoy/engine.test.js +155 -18
  57. package/dist/cli/convoy/engine.test.js.map +1 -1
  58. package/dist/cli/convoy/event-schemas.d.ts.map +1 -1
  59. package/dist/cli/convoy/event-schemas.js +55 -0
  60. package/dist/cli/convoy/event-schemas.js.map +1 -1
  61. package/dist/cli/convoy/isolation.d.ts +27 -0
  62. package/dist/cli/convoy/isolation.d.ts.map +1 -0
  63. package/dist/cli/convoy/isolation.js +120 -0
  64. package/dist/cli/convoy/isolation.js.map +1 -0
  65. package/dist/cli/convoy/isolation.test.d.ts +2 -0
  66. package/dist/cli/convoy/isolation.test.d.ts.map +1 -0
  67. package/dist/cli/convoy/isolation.test.js +105 -0
  68. package/dist/cli/convoy/isolation.test.js.map +1 -0
  69. package/dist/cli/convoy/review-stages.d.ts +9 -0
  70. package/dist/cli/convoy/review-stages.d.ts.map +1 -0
  71. package/dist/cli/convoy/review-stages.js +134 -0
  72. package/dist/cli/convoy/review-stages.js.map +1 -0
  73. package/dist/cli/convoy/review-stages.test.d.ts +2 -0
  74. package/dist/cli/convoy/review-stages.test.d.ts.map +1 -0
  75. package/dist/cli/convoy/review-stages.test.js +197 -0
  76. package/dist/cli/convoy/review-stages.test.js.map +1 -0
  77. package/dist/cli/convoy/skill-refinement.d.ts +39 -0
  78. package/dist/cli/convoy/skill-refinement.d.ts.map +1 -0
  79. package/dist/cli/convoy/skill-refinement.js +239 -0
  80. package/dist/cli/convoy/skill-refinement.js.map +1 -0
  81. package/dist/cli/convoy/skill-refinement.test.d.ts +2 -0
  82. package/dist/cli/convoy/skill-refinement.test.d.ts.map +1 -0
  83. package/dist/cli/convoy/skill-refinement.test.js +230 -0
  84. package/dist/cli/convoy/skill-refinement.test.js.map +1 -0
  85. package/dist/cli/convoy/spec-builder.d.ts +1 -0
  86. package/dist/cli/convoy/spec-builder.d.ts.map +1 -1
  87. package/dist/cli/convoy/spec-builder.js +11 -0
  88. package/dist/cli/convoy/spec-builder.js.map +1 -1
  89. package/dist/cli/convoy/spec-builder.test.js +54 -0
  90. package/dist/cli/convoy/spec-builder.test.js.map +1 -1
  91. package/dist/cli/convoy/store.d.ts +3 -2
  92. package/dist/cli/convoy/store.d.ts.map +1 -1
  93. package/dist/cli/convoy/store.js +20 -2
  94. package/dist/cli/convoy/store.js.map +1 -1
  95. package/dist/cli/convoy/store.test.js +15 -15
  96. package/dist/cli/convoy/store.test.js.map +1 -1
  97. package/dist/cli/convoy/tdd-gate.d.ts +15 -0
  98. package/dist/cli/convoy/tdd-gate.d.ts.map +1 -0
  99. package/dist/cli/convoy/tdd-gate.js +119 -0
  100. package/dist/cli/convoy/tdd-gate.js.map +1 -0
  101. package/dist/cli/convoy/tdd-gate.test.d.ts +2 -0
  102. package/dist/cli/convoy/tdd-gate.test.d.ts.map +1 -0
  103. package/dist/cli/convoy/tdd-gate.test.js +227 -0
  104. package/dist/cli/convoy/tdd-gate.test.js.map +1 -0
  105. package/dist/cli/convoy/types.d.ts +91 -0
  106. package/dist/cli/convoy/types.d.ts.map +1 -1
  107. package/dist/cli/convoy/types.js +8 -0
  108. package/dist/cli/convoy/types.js.map +1 -1
  109. package/dist/cli/insights.d.ts +3 -0
  110. package/dist/cli/insights.d.ts.map +1 -0
  111. package/dist/cli/insights.js +94 -0
  112. package/dist/cli/insights.js.map +1 -0
  113. package/dist/cli/lesson.d.ts.map +1 -1
  114. package/dist/cli/lesson.js +7 -0
  115. package/dist/cli/lesson.js.map +1 -1
  116. package/dist/cli/log.d.ts.map +1 -1
  117. package/dist/cli/log.js +7 -0
  118. package/dist/cli/log.js.map +1 -1
  119. package/dist/cli/package-config.d.ts +12 -0
  120. package/dist/cli/package-config.d.ts.map +1 -0
  121. package/dist/cli/package-config.js +37 -0
  122. package/dist/cli/package-config.js.map +1 -0
  123. package/dist/cli/package.d.ts +23 -0
  124. package/dist/cli/package.d.ts.map +1 -0
  125. package/dist/cli/package.js +285 -0
  126. package/dist/cli/package.js.map +1 -0
  127. package/dist/cli/package.test.d.ts +2 -0
  128. package/dist/cli/package.test.d.ts.map +1 -0
  129. package/dist/cli/package.test.js +236 -0
  130. package/dist/cli/package.test.js.map +1 -0
  131. package/dist/cli/pipeline.d.ts +6 -0
  132. package/dist/cli/pipeline.d.ts.map +1 -1
  133. package/dist/cli/pipeline.js +15 -2
  134. package/dist/cli/pipeline.js.map +1 -1
  135. package/dist/cli/run/schema.d.ts.map +1 -1
  136. package/dist/cli/run/schema.js +32 -0
  137. package/dist/cli/run/schema.js.map +1 -1
  138. package/dist/cli/run/schema.test.js +51 -0
  139. package/dist/cli/run/schema.test.js.map +1 -1
  140. package/dist/cli/skills.d.ts +3 -0
  141. package/dist/cli/skills.d.ts.map +1 -0
  142. package/dist/cli/skills.js +107 -0
  143. package/dist/cli/skills.js.map +1 -0
  144. package/dist/cli/types.d.ts +4 -1
  145. package/dist/cli/types.d.ts.map +1 -1
  146. package/dist/dashboard/scripts/etl.d.ts.map +1 -1
  147. package/dist/dashboard/scripts/etl.js +44 -11
  148. package/dist/dashboard/scripts/etl.js.map +1 -1
  149. package/package.json +2 -1
  150. package/src/cli/agents.ts +20 -5
  151. package/src/cli/artifacts-cli.ts +41 -0
  152. package/src/cli/baselines.ts +12 -0
  153. package/src/cli/convoy/artifacts.test.ts +201 -0
  154. package/src/cli/convoy/artifacts.ts +186 -0
  155. package/src/cli/convoy/compaction.test.ts +245 -0
  156. package/src/cli/convoy/compaction.ts +164 -0
  157. package/src/cli/convoy/contracts.test.ts +279 -0
  158. package/src/cli/convoy/contracts.ts +280 -0
  159. package/src/cli/convoy/dag-analysis.test.ts +349 -0
  160. package/src/cli/convoy/dag-analysis.ts +371 -0
  161. package/src/cli/convoy/effort-scaling.test.ts +140 -0
  162. package/src/cli/convoy/effort-scaling.ts +90 -0
  163. package/src/cli/convoy/engine.test.ts +175 -18
  164. package/src/cli/convoy/engine.ts +301 -7
  165. package/src/cli/convoy/event-schemas.ts +55 -0
  166. package/src/cli/convoy/isolation.test.ts +137 -0
  167. package/src/cli/convoy/isolation.ts +165 -0
  168. package/src/cli/convoy/review-stages.test.ts +235 -0
  169. package/src/cli/convoy/review-stages.ts +166 -0
  170. package/src/cli/convoy/skill-refinement.test.ts +277 -0
  171. package/src/cli/convoy/skill-refinement.ts +306 -0
  172. package/src/cli/convoy/spec-builder.test.ts +61 -0
  173. package/src/cli/convoy/spec-builder.ts +9 -0
  174. package/src/cli/convoy/store.test.ts +15 -15
  175. package/src/cli/convoy/store.ts +26 -4
  176. package/src/cli/convoy/tdd-gate.test.ts +281 -0
  177. package/src/cli/convoy/tdd-gate.ts +154 -0
  178. package/src/cli/convoy/types.ts +51 -0
  179. package/src/cli/insights.ts +99 -0
  180. package/src/cli/lesson.ts +8 -0
  181. package/src/cli/log.ts +8 -0
  182. package/src/cli/package-config.ts +48 -0
  183. package/src/cli/package.test.ts +276 -0
  184. package/src/cli/package.ts +329 -0
  185. package/src/cli/pipeline.ts +21 -2
  186. package/src/cli/run/schema.test.ts +58 -0
  187. package/src/cli/run/schema.ts +33 -0
  188. package/src/cli/skills.ts +121 -0
  189. package/src/cli/types.ts +4 -1
  190. package/src/dashboard/dist/_astro/index.D6quLrA6.css +1 -0
  191. package/src/dashboard/dist/data/convoy-list.json +21 -7
  192. package/src/dashboard/dist/data/convoys/demo-api-v2.json +3 -3
  193. package/src/dashboard/dist/data/convoys/demo-auth-revamp.json +5 -5
  194. package/src/dashboard/dist/data/convoys/demo-convoy-1.json +2 -2
  195. package/src/dashboard/dist/data/convoys/demo-convoy-2.json +1 -1
  196. package/src/dashboard/dist/data/convoys/demo-dashboard-ui.json +7 -7
  197. package/src/dashboard/dist/data/convoys/demo-data-pipeline.json +3 -3
  198. package/src/dashboard/dist/data/convoys/demo-deploy-ci.json +2 -2
  199. package/src/dashboard/dist/data/convoys/demo-docs-update.json +2 -2
  200. package/src/dashboard/dist/data/convoys/demo-perf-opt.json +4 -4
  201. package/src/dashboard/dist/index.html +306 -33
  202. package/src/dashboard/node_modules/.vite/deps/_metadata.json +6 -6
  203. package/src/dashboard/public/data/convoy-list.json +21 -7
  204. package/src/dashboard/public/data/convoys/demo-api-v2.json +3 -3
  205. package/src/dashboard/public/data/convoys/demo-auth-revamp.json +5 -5
  206. package/src/dashboard/public/data/convoys/demo-convoy-1.json +2 -2
  207. package/src/dashboard/public/data/convoys/demo-convoy-2.json +1 -1
  208. package/src/dashboard/public/data/convoys/demo-dashboard-ui.json +7 -7
  209. package/src/dashboard/public/data/convoys/demo-data-pipeline.json +3 -3
  210. package/src/dashboard/public/data/convoys/demo-deploy-ci.json +2 -2
  211. package/src/dashboard/public/data/convoys/demo-docs-update.json +2 -2
  212. package/src/dashboard/public/data/convoys/demo-perf-opt.json +4 -4
  213. package/src/dashboard/scripts/etl.test.ts +14 -0
  214. package/src/dashboard/scripts/etl.ts +48 -16
  215. package/src/dashboard/scripts/generate-demo-db.ts +18 -10
  216. package/src/dashboard/src/pages/index.astro +348 -45
  217. package/src/dashboard/src/styles/dashboard.css +56 -0
  218. package/src/orchestrator/prompts/assess-complexity.prompt.md +13 -0
  219. package/src/orchestrator/prompts/generate-convoy.prompt.md +19 -0
  220. package/src/dashboard/dist/_astro/index.BRDFmNzR.css +0 -1
@@ -1950,6 +1950,7 @@ function makeTaskRecord(overrides: Partial<TaskRecord> = {}): TaskRecord {
1950
1950
  dispute_id: null,
1951
1951
  drift_score: null,
1952
1952
  drift_retried: 0,
1953
+ compaction_count: 0,
1953
1954
  ...overrides,
1954
1955
  }
1955
1956
  }
@@ -2125,7 +2126,8 @@ describe('review pipeline', () => {
2125
2126
  })
2126
2127
  const result = await engine.run()
2127
2128
  expect(result.status).toBe('done')
2128
- expect(mockReviewRunner).toHaveBeenCalledOnce()
2129
+ // Two-stage review: stage 1 (spec compliance) + stage 2 (code quality) = 2 calls
2130
+ expect(mockReviewRunner).toHaveBeenCalledTimes(2)
2129
2131
  expect(mockReviewRunner).toHaveBeenCalledWith(expect.objectContaining({ agent: 'developer' }), 'fast', 'default')
2130
2132
  })
2131
2133
 
@@ -2136,8 +2138,9 @@ describe('review pipeline', () => {
2136
2138
  return Promise.resolve({ success: true, output: 'ok', exitCode: 0 })
2137
2139
  })
2138
2140
  const mockReviewRunner = vi.fn()
2139
- .mockResolvedValueOnce({ verdict: 'block', feedback: 'Missing tests', tokens: 50, model: 'reviewer' })
2140
- .mockResolvedValueOnce({ verdict: 'pass', feedback: '', tokens: 50, model: 'reviewer' })
2141
+ .mockResolvedValueOnce({ verdict: 'block', feedback: 'Missing tests', tokens: 50, model: 'reviewer' }) // round 1 stage 1 → block (short-circuits)
2142
+ .mockResolvedValueOnce({ verdict: 'pass', feedback: '', tokens: 50, model: 'reviewer' }) // round 2 stage 1 → pass
2143
+ .mockResolvedValueOnce({ verdict: 'pass', feedback: '', tokens: 50, model: 'reviewer' }) // round 2 stage 2 → pass
2141
2144
 
2142
2145
  const engine = makeEngine({
2143
2146
  spec: makeSpec({ defaults: { review: 'fast' } }, [{ max_retries: 1 }]),
@@ -2151,7 +2154,8 @@ describe('review pipeline', () => {
2151
2154
  const result = await engine.run()
2152
2155
  expect(result.status).toBe('done')
2153
2156
  expect(adapter.execute).toHaveBeenCalledTimes(2)
2154
- expect(mockReviewRunner).toHaveBeenCalledTimes(2)
2157
+ // Round 1: stage 1 blocks (1 call). Round 2: stage 1 pass + stage 2 pass (2 calls). Total: 3
2158
+ expect(mockReviewRunner).toHaveBeenCalledTimes(3)
2155
2159
  // Prompt on second attempt should contain feedback
2156
2160
  const secondPrompt = (adapter.execute.mock.calls[1] as [Task])[0].prompt
2157
2161
  expect(secondPrompt).toContain('Missing tests')
@@ -2183,10 +2187,10 @@ describe('review pipeline', () => {
2183
2187
  let callCount = 0
2184
2188
  const mockReviewRunner = vi.fn().mockImplementation(() => {
2185
2189
  callCount++
2186
- // 2 pass, 1 block
2187
- return Promise.resolve(callCount <= 2
2188
- ? { verdict: 'pass', feedback: '', tokens: 30, model: 'reviewer' }
2189
- : { verdict: 'block', feedback: 'Minor issue', tokens: 30, model: 'reviewer' })
2190
+ // Reviewer C blocks at stage 1 (call 3); reviewers A and B pass both stages (calls 1,2,4,5)
2191
+ return Promise.resolve(callCount === 3
2192
+ ? { verdict: 'block', feedback: 'Minor issue', tokens: 30, model: 'reviewer' }
2193
+ : { verdict: 'pass', feedback: '', tokens: 30, model: 'reviewer' })
2190
2194
  })
2191
2195
 
2192
2196
  const engine = makeEngine({
@@ -2200,7 +2204,8 @@ describe('review pipeline', () => {
2200
2204
  })
2201
2205
  const result = await engine.run()
2202
2206
  expect(result.status).toBe('done')
2203
- expect(mockReviewRunner).toHaveBeenCalledTimes(3)
2207
+ // Two-stage panel: 2 pass reviewers × 2 stages + 1 block reviewer × 1 stage = 5 calls
2208
+ expect(mockReviewRunner).toHaveBeenCalledTimes(5)
2204
2209
  })
2205
2210
 
2206
2211
  it('panel review 2/3 BLOCK — task retried with MUST-FIX', async () => {
@@ -2254,9 +2259,9 @@ describe('review pipeline', () => {
2254
2259
  })
2255
2260
  const result = await engine.run()
2256
2261
  expect(result.status).toBe('done')
2257
- // first task: budget not exceeded (0 < 100), review runs
2258
- // second task: budget exceeded (200 >= 100), review skipped
2259
- expect(mockReviewRunner).toHaveBeenCalledTimes(1)
2262
+ // first task: budget not exceeded (0 < 100), two-stage review runs (2 calls, total 400 tokens)
2263
+ // second task: budget exceeded (400 >= 100), review skipped
2264
+ expect(mockReviewRunner).toHaveBeenCalledTimes(2)
2260
2265
  })
2261
2266
 
2262
2267
  it('auto route: developer agent with empty diff → auto-pass (no reviewer call)', async () => {
@@ -2291,7 +2296,7 @@ describe('review pipeline', () => {
2291
2296
  const store = createConvoyStore(dbPath)
2292
2297
  const tasks = store.getTasksByConvoy(result.convoyId)
2293
2298
  store.close()
2294
- expect(tasks[0].review_tokens).toBe(77)
2299
+ expect(tasks[0].review_tokens).toBe(154) // two-stage: 77 (stage 1) + 77 (stage 2)
2295
2300
  expect(tasks[0].review_level).toBe('fast')
2296
2301
  expect(tasks[0].review_verdict).toBe('pass')
2297
2302
  })
@@ -2339,8 +2344,9 @@ describe('review pipeline', () => {
2339
2344
 
2340
2345
  it('full fast-review flow: BLOCK on first attempt → retry → PASS → done with complete events', async () => {
2341
2346
  const mockReviewRunner = vi.fn()
2342
- .mockResolvedValueOnce({ verdict: 'block', feedback: 'Add more tests', tokens: 40, model: 'reviewer' })
2343
- .mockResolvedValueOnce({ verdict: 'pass', feedback: '', tokens: 35, model: 'reviewer' })
2347
+ .mockResolvedValueOnce({ verdict: 'block', feedback: 'Add more tests', tokens: 40, model: 'reviewer' }) // round 1 stage 1 → block
2348
+ .mockResolvedValueOnce({ verdict: 'pass', feedback: '', tokens: 35, model: 'reviewer' }) // round 2 stage 1 → pass
2349
+ .mockResolvedValueOnce({ verdict: 'pass', feedback: '', tokens: 35, model: 'reviewer' }) // round 2 stage 2 → pass
2344
2350
 
2345
2351
  const engine = makeEngine({
2346
2352
  spec: makeSpec({ defaults: { review: 'fast' } }, [{ id: 'task-1', max_retries: 1 }]),
@@ -2355,7 +2361,8 @@ describe('review pipeline', () => {
2355
2361
 
2356
2362
  expect(result.status).toBe('done')
2357
2363
  expect(adapter.execute).toHaveBeenCalledTimes(2)
2358
- expect(mockReviewRunner).toHaveBeenCalledTimes(2)
2364
+ // Round 1: 1 call (block short-circuits). Round 2: 2 calls (stage 1 + stage 2). Total: 3
2365
+ expect(mockReviewRunner).toHaveBeenCalledTimes(3)
2359
2366
 
2360
2367
  const store = createConvoyStore(dbPath)
2361
2368
  const tasks = store.getTasksByConvoy(result.convoyId)
@@ -2405,7 +2412,9 @@ describe('review pipeline', () => {
2405
2412
 
2406
2413
  expect(result.status).toBe('done')
2407
2414
  expect(adapter.execute).toHaveBeenCalledTimes(2)
2408
- expect(mockReviewRunner).toHaveBeenCalledTimes(6)
2415
+ // Two-stage panel round 1: 2 blocks ×1 call + 1 pass ×2 calls = 4 calls
2416
+ // Two-stage panel round 2: 3 pass ×2 calls = 6 calls. Total: 10
2417
+ expect(mockReviewRunner).toHaveBeenCalledTimes(10)
2409
2418
 
2410
2419
  const store = createConvoyStore(dbPath)
2411
2420
  const tasks = store.getTasksByConvoy(result.convoyId)
@@ -2520,10 +2529,16 @@ describe('drift detection', () => {
2520
2529
 
2521
2530
  it('detect_drift=true triggers drift check and retries on low confidence', async () => {
2522
2531
  // Call sequence: main task → drift check (low score) → main task retry
2532
+ const driftRetryOutput = [
2533
+ 'done retry',
2534
+ '<!-- OUTPUT_CONTRACT',
2535
+ '{ "files_changed": ["src/foo.ts"], "tests_added": ["src/foo.test.ts"], "summary": "done" }',
2536
+ '-->',
2537
+ ].join('\n')
2523
2538
  adapter.execute
2524
2539
  .mockResolvedValueOnce({ success: true, output: 'done', exitCode: 0 })
2525
2540
  .mockResolvedValueOnce({ success: true, output: '{"score": 0.3, "explanation": "uncertain"}', exitCode: 0 })
2526
- .mockResolvedValueOnce({ success: true, output: 'done retry', exitCode: 0 })
2541
+ .mockResolvedValueOnce({ success: true, output: driftRetryOutput, exitCode: 0 })
2527
2542
 
2528
2543
  const engine = makeEngine({
2529
2544
  spec: makeSpec({ defaults: { detect_drift: true } }, [{ id: 'task-1', max_retries: 1 }]),
@@ -3757,3 +3772,145 @@ describe('createEventEmitter callsite safety', () => {
3757
3772
  testStore.close()
3758
3773
  })
3759
3774
  })
3775
+
3776
+ // ── Contract retry ────────────────────────────────────────────────────────────
3777
+
3778
+ describe('contract retry', () => {
3779
+ it('retries when output is missing OUTPUT_CONTRACT and retries remain', async () => {
3780
+ const validContractOutput = [
3781
+ 'Work done.',
3782
+ '<!-- OUTPUT_CONTRACT',
3783
+ '{ "files_changed": ["src/foo.ts"], "tests_added": ["src/foo.test.ts"], "summary": "implemented" }',
3784
+ '-->',
3785
+ ].join('\n')
3786
+
3787
+ const adapter = makeAdapter()
3788
+ adapter.execute
3789
+ .mockResolvedValueOnce({ success: true, output: 'no contract here', exitCode: 0 })
3790
+ .mockResolvedValueOnce({ success: true, output: validContractOutput, exitCode: 0 })
3791
+
3792
+ const engine = makeEngine({
3793
+ spec: makeSpec({}, [{ agent: 'developer', max_retries: 1 }]),
3794
+ specYaml: 'name: test',
3795
+ adapter,
3796
+ dbPath,
3797
+ _worktreeManager: makeWorktreeManager(),
3798
+ _mergeQueue: makeMergeQueue(),
3799
+ })
3800
+ const result = await engine.run()
3801
+ expect(result.status).toBe('done')
3802
+ expect(adapter.execute).toHaveBeenCalledTimes(2)
3803
+
3804
+ // Second prompt should contain the contract retry message
3805
+ const secondPrompt = (adapter.execute.mock.calls[1] as [Task])[0].prompt
3806
+ expect(secondPrompt).toContain('OUTPUT_CONTRACT')
3807
+ expect(secondPrompt).toContain('Missing fields')
3808
+
3809
+ const store = createConvoyStore(dbPath)
3810
+ const tasks = store.getTasksByConvoy(result.convoyId)
3811
+ store.close()
3812
+ expect(tasks[0].status).toBe('done')
3813
+ })
3814
+
3815
+ it('emits contract_violation and marks done when retries exhausted', async () => {
3816
+ const adapter = makeAdapter()
3817
+ adapter.execute.mockResolvedValue({ success: true, output: 'no contract here', exitCode: 0 })
3818
+
3819
+ const engine = makeEngine({
3820
+ spec: makeSpec({}, [{ agent: 'developer', max_retries: 0 }]),
3821
+ specYaml: 'name: test',
3822
+ adapter,
3823
+ dbPath,
3824
+ _worktreeManager: makeWorktreeManager(),
3825
+ _mergeQueue: makeMergeQueue(),
3826
+ })
3827
+ const result = await engine.run()
3828
+ expect(result.status).toBe('done')
3829
+ expect(adapter.execute).toHaveBeenCalledTimes(1)
3830
+
3831
+ const store = createConvoyStore(dbPath)
3832
+ const events = store.getEvents(result.convoyId)
3833
+ const tasks = store.getTasksByConvoy(result.convoyId)
3834
+ store.close()
3835
+
3836
+ const violationEvent = events.find(e => e.type === 'contract_violation')
3837
+ expect(violationEvent).toBeDefined()
3838
+ expect(tasks[0].status).toBe('done')
3839
+ })
3840
+ })
3841
+
3842
+ // ── Compaction continuation ───────────────────────────────────────────────────
3843
+
3844
+ describe('compaction continuation', () => {
3845
+ it('re-enqueues without incrementing retries when threshold exceeded', async () => {
3846
+ const adapter = makeAdapter()
3847
+ adapter.execute
3848
+ .mockResolvedValueOnce({ success: true, output: 'phase 1 done', exitCode: 0, usage: { total_tokens: 170_000 } })
3849
+ .mockResolvedValueOnce({ success: true, output: 'all done', exitCode: 0, usage: { total_tokens: 1_000 } })
3850
+
3851
+ const engine = makeEngine({
3852
+ spec: makeSpec(
3853
+ { defaults: { compaction: { enabled: true, token_threshold_pct: 80, summary_max_tokens: 2000 } } },
3854
+ [{ model: 'claude-sonnet-4-6', max_retries: 0 }],
3855
+ ),
3856
+ specYaml: 'name: test',
3857
+ adapter,
3858
+ dbPath,
3859
+ _worktreeManager: makeWorktreeManager(),
3860
+ _mergeQueue: makeMergeQueue(),
3861
+ })
3862
+ const result = await engine.run()
3863
+ expect(result.status).toBe('done')
3864
+ expect(adapter.execute).toHaveBeenCalledTimes(2)
3865
+
3866
+ const store = createConvoyStore(dbPath)
3867
+ const events = store.getEvents(result.convoyId)
3868
+ const tasks = store.getTasksByConvoy(result.convoyId)
3869
+ store.close()
3870
+
3871
+ // Compaction event emitted
3872
+ const compactedEvent = events.find(e => e.type === 'context_compacted')
3873
+ expect(compactedEvent).toBeDefined()
3874
+
3875
+ // Task completed successfully and retries were NOT incremented by compaction
3876
+ expect(tasks[0].status).toBe('done')
3877
+ expect(tasks[0].retries).toBe(0)
3878
+ })
3879
+
3880
+ it('fails with context_exhausted when max compactions reached', async () => {
3881
+ const adapter = makeAdapter()
3882
+ // All calls return high token count — will exhaust compaction budget after 3+1 calls
3883
+ adapter.execute.mockResolvedValue({
3884
+ success: true,
3885
+ output: 'partial work',
3886
+ exitCode: 0,
3887
+ usage: { total_tokens: 170_000 },
3888
+ })
3889
+
3890
+ const engine = makeEngine({
3891
+ spec: makeSpec(
3892
+ { defaults: { compaction: { enabled: true, token_threshold_pct: 80, summary_max_tokens: 2000 } } },
3893
+ [{ model: 'claude-sonnet-4-6', max_retries: 0 }],
3894
+ ),
3895
+ specYaml: 'name: test',
3896
+ adapter,
3897
+ dbPath,
3898
+ _worktreeManager: makeWorktreeManager(),
3899
+ _mergeQueue: makeMergeQueue(),
3900
+ })
3901
+ const result = await engine.run()
3902
+ expect(result.status).toBe('failed')
3903
+
3904
+ const store = createConvoyStore(dbPath)
3905
+ const events = store.getEvents(result.convoyId)
3906
+ const tasks = store.getTasksByConvoy(result.convoyId)
3907
+ store.close()
3908
+
3909
+ const exhaustedEvent = events.find(e => {
3910
+ if (e.type !== 'task_failed') return false
3911
+ try { return (JSON.parse(e.data as string) as { reason: string }).reason === 'context_exhausted' } catch { return false }
3912
+ })
3913
+ expect(exhaustedEvent).toBeDefined()
3914
+ expect(tasks[0].status).toBe('failed')
3915
+ })
3916
+ })