opencastle 0.27.0 → 0.27.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. package/bin/cli.mjs +6 -0
  2. package/dist/cli/agents.d.ts +3 -0
  3. package/dist/cli/agents.d.ts.map +1 -0
  4. package/dist/cli/agents.js +161 -0
  5. package/dist/cli/agents.js.map +1 -0
  6. package/dist/cli/baselines.d.ts +3 -0
  7. package/dist/cli/baselines.d.ts.map +1 -0
  8. package/dist/cli/baselines.js +128 -0
  9. package/dist/cli/baselines.js.map +1 -0
  10. package/dist/cli/convoy/dashboard-types.d.ts +146 -0
  11. package/dist/cli/convoy/dashboard-types.d.ts.map +1 -0
  12. package/dist/cli/convoy/dashboard-types.js +2 -0
  13. package/dist/cli/convoy/dashboard-types.js.map +1 -0
  14. package/dist/cli/convoy/engine.d.ts +67 -2
  15. package/dist/cli/convoy/engine.d.ts.map +1 -1
  16. package/dist/cli/convoy/engine.js +2036 -28
  17. package/dist/cli/convoy/engine.js.map +1 -1
  18. package/dist/cli/convoy/engine.test.js +1659 -70
  19. package/dist/cli/convoy/engine.test.js.map +1 -1
  20. package/dist/cli/convoy/event-schemas.d.ts +9 -0
  21. package/dist/cli/convoy/event-schemas.d.ts.map +1 -0
  22. package/dist/cli/convoy/event-schemas.js +185 -0
  23. package/dist/cli/convoy/event-schemas.js.map +1 -0
  24. package/dist/cli/convoy/events.d.ts +12 -1
  25. package/dist/cli/convoy/events.d.ts.map +1 -1
  26. package/dist/cli/convoy/events.js +186 -13
  27. package/dist/cli/convoy/events.js.map +1 -1
  28. package/dist/cli/convoy/events.test.js +325 -28
  29. package/dist/cli/convoy/events.test.js.map +1 -1
  30. package/dist/cli/convoy/expertise.d.ts +16 -0
  31. package/dist/cli/convoy/expertise.d.ts.map +1 -0
  32. package/dist/cli/convoy/expertise.js +121 -0
  33. package/dist/cli/convoy/expertise.js.map +1 -0
  34. package/dist/cli/convoy/expertise.test.d.ts +2 -0
  35. package/dist/cli/convoy/expertise.test.d.ts.map +1 -0
  36. package/dist/cli/convoy/expertise.test.js +96 -0
  37. package/dist/cli/convoy/expertise.test.js.map +1 -0
  38. package/dist/cli/convoy/export.test.js +1 -0
  39. package/dist/cli/convoy/export.test.js.map +1 -1
  40. package/dist/cli/convoy/formula.d.ts +19 -0
  41. package/dist/cli/convoy/formula.d.ts.map +1 -0
  42. package/dist/cli/convoy/formula.js +142 -0
  43. package/dist/cli/convoy/formula.js.map +1 -0
  44. package/dist/cli/convoy/formula.test.d.ts +2 -0
  45. package/dist/cli/convoy/formula.test.d.ts.map +1 -0
  46. package/dist/cli/convoy/formula.test.js +342 -0
  47. package/dist/cli/convoy/formula.test.js.map +1 -0
  48. package/dist/cli/convoy/gates.d.ts +128 -0
  49. package/dist/cli/convoy/gates.d.ts.map +1 -0
  50. package/dist/cli/convoy/gates.js +606 -0
  51. package/dist/cli/convoy/gates.js.map +1 -0
  52. package/dist/cli/convoy/gates.test.d.ts +2 -0
  53. package/dist/cli/convoy/gates.test.d.ts.map +1 -0
  54. package/dist/cli/convoy/gates.test.js +976 -0
  55. package/dist/cli/convoy/gates.test.js.map +1 -0
  56. package/dist/cli/convoy/health.d.ts +11 -0
  57. package/dist/cli/convoy/health.d.ts.map +1 -1
  58. package/dist/cli/convoy/health.js +54 -0
  59. package/dist/cli/convoy/health.js.map +1 -1
  60. package/dist/cli/convoy/health.test.js +56 -1
  61. package/dist/cli/convoy/health.test.js.map +1 -1
  62. package/dist/cli/convoy/issues.d.ts +8 -0
  63. package/dist/cli/convoy/issues.d.ts.map +1 -0
  64. package/dist/cli/convoy/issues.js +98 -0
  65. package/dist/cli/convoy/issues.js.map +1 -0
  66. package/dist/cli/convoy/issues.test.d.ts +2 -0
  67. package/dist/cli/convoy/issues.test.d.ts.map +1 -0
  68. package/dist/cli/convoy/issues.test.js +107 -0
  69. package/dist/cli/convoy/issues.test.js.map +1 -0
  70. package/dist/cli/convoy/knowledge.d.ts +5 -0
  71. package/dist/cli/convoy/knowledge.d.ts.map +1 -0
  72. package/dist/cli/convoy/knowledge.js +116 -0
  73. package/dist/cli/convoy/knowledge.js.map +1 -0
  74. package/dist/cli/convoy/knowledge.test.d.ts +2 -0
  75. package/dist/cli/convoy/knowledge.test.d.ts.map +1 -0
  76. package/dist/cli/convoy/knowledge.test.js +87 -0
  77. package/dist/cli/convoy/knowledge.test.js.map +1 -0
  78. package/dist/cli/convoy/lessons.d.ts +17 -0
  79. package/dist/cli/convoy/lessons.d.ts.map +1 -0
  80. package/dist/cli/convoy/lessons.js +149 -0
  81. package/dist/cli/convoy/lessons.js.map +1 -0
  82. package/dist/cli/convoy/lessons.test.d.ts +2 -0
  83. package/dist/cli/convoy/lessons.test.d.ts.map +1 -0
  84. package/dist/cli/convoy/lessons.test.js +135 -0
  85. package/dist/cli/convoy/lessons.test.js.map +1 -0
  86. package/dist/cli/convoy/lock.d.ts +13 -0
  87. package/dist/cli/convoy/lock.d.ts.map +1 -0
  88. package/dist/cli/convoy/lock.js +88 -0
  89. package/dist/cli/convoy/lock.js.map +1 -0
  90. package/dist/cli/convoy/lock.test.d.ts +2 -0
  91. package/dist/cli/convoy/lock.test.d.ts.map +1 -0
  92. package/dist/cli/convoy/lock.test.js +136 -0
  93. package/dist/cli/convoy/lock.test.js.map +1 -0
  94. package/dist/cli/convoy/log-merge.test.d.ts +2 -0
  95. package/dist/cli/convoy/log-merge.test.d.ts.map +1 -0
  96. package/dist/cli/convoy/log-merge.test.js +147 -0
  97. package/dist/cli/convoy/log-merge.test.js.map +1 -0
  98. package/dist/cli/convoy/merge.d.ts +4 -0
  99. package/dist/cli/convoy/merge.d.ts.map +1 -1
  100. package/dist/cli/convoy/merge.js +18 -1
  101. package/dist/cli/convoy/merge.js.map +1 -1
  102. package/dist/cli/convoy/merge.test.js +6 -7
  103. package/dist/cli/convoy/merge.test.js.map +1 -1
  104. package/dist/cli/convoy/partition.d.ts +51 -0
  105. package/dist/cli/convoy/partition.d.ts.map +1 -0
  106. package/dist/cli/convoy/partition.js +186 -0
  107. package/dist/cli/convoy/partition.js.map +1 -0
  108. package/dist/cli/convoy/partition.test.d.ts +2 -0
  109. package/dist/cli/convoy/partition.test.d.ts.map +1 -0
  110. package/dist/cli/convoy/partition.test.js +315 -0
  111. package/dist/cli/convoy/partition.test.js.map +1 -0
  112. package/dist/cli/convoy/pipeline.test.js +6 -0
  113. package/dist/cli/convoy/pipeline.test.js.map +1 -1
  114. package/dist/cli/convoy/store.d.ts +99 -7
  115. package/dist/cli/convoy/store.d.ts.map +1 -1
  116. package/dist/cli/convoy/store.js +764 -31
  117. package/dist/cli/convoy/store.js.map +1 -1
  118. package/dist/cli/convoy/store.test.js +1810 -18
  119. package/dist/cli/convoy/store.test.js.map +1 -1
  120. package/dist/cli/convoy/types.d.ts +427 -5
  121. package/dist/cli/convoy/types.d.ts.map +1 -1
  122. package/dist/cli/convoy/types.js +42 -1
  123. package/dist/cli/convoy/types.js.map +1 -1
  124. package/dist/cli/log.d.ts +11 -0
  125. package/dist/cli/log.d.ts.map +1 -1
  126. package/dist/cli/log.js +114 -2
  127. package/dist/cli/log.js.map +1 -1
  128. package/dist/cli/run/adapters/claude.d.ts +2 -0
  129. package/dist/cli/run/adapters/claude.d.ts.map +1 -1
  130. package/dist/cli/run/adapters/claude.js +89 -49
  131. package/dist/cli/run/adapters/claude.js.map +1 -1
  132. package/dist/cli/run/adapters/claude.test.d.ts +2 -0
  133. package/dist/cli/run/adapters/claude.test.d.ts.map +1 -0
  134. package/dist/cli/run/adapters/claude.test.js +205 -0
  135. package/dist/cli/run/adapters/claude.test.js.map +1 -0
  136. package/dist/cli/run/adapters/copilot.d.ts +1 -0
  137. package/dist/cli/run/adapters/copilot.d.ts.map +1 -1
  138. package/dist/cli/run/adapters/copilot.js +84 -46
  139. package/dist/cli/run/adapters/copilot.js.map +1 -1
  140. package/dist/cli/run/adapters/copilot.test.d.ts +2 -0
  141. package/dist/cli/run/adapters/copilot.test.d.ts.map +1 -0
  142. package/dist/cli/run/adapters/copilot.test.js +195 -0
  143. package/dist/cli/run/adapters/copilot.test.js.map +1 -0
  144. package/dist/cli/run/adapters/cursor.d.ts +1 -0
  145. package/dist/cli/run/adapters/cursor.d.ts.map +1 -1
  146. package/dist/cli/run/adapters/cursor.js +83 -47
  147. package/dist/cli/run/adapters/cursor.js.map +1 -1
  148. package/dist/cli/run/adapters/cursor.test.d.ts +2 -0
  149. package/dist/cli/run/adapters/cursor.test.d.ts.map +1 -0
  150. package/dist/cli/run/adapters/cursor.test.js +129 -0
  151. package/dist/cli/run/adapters/cursor.test.js.map +1 -0
  152. package/dist/cli/run/adapters/opencode.d.ts +1 -0
  153. package/dist/cli/run/adapters/opencode.d.ts.map +1 -1
  154. package/dist/cli/run/adapters/opencode.js +81 -47
  155. package/dist/cli/run/adapters/opencode.js.map +1 -1
  156. package/dist/cli/run/adapters/opencode.test.d.ts +2 -0
  157. package/dist/cli/run/adapters/opencode.test.d.ts.map +1 -0
  158. package/dist/cli/run/adapters/opencode.test.js +119 -0
  159. package/dist/cli/run/adapters/opencode.test.js.map +1 -0
  160. package/dist/cli/run/executor.js +1 -1
  161. package/dist/cli/run/executor.js.map +1 -1
  162. package/dist/cli/run/schema.d.ts.map +1 -1
  163. package/dist/cli/run/schema.js +245 -4
  164. package/dist/cli/run/schema.js.map +1 -1
  165. package/dist/cli/run/schema.test.js +669 -0
  166. package/dist/cli/run/schema.test.js.map +1 -1
  167. package/dist/cli/run.d.ts.map +1 -1
  168. package/dist/cli/run.js +362 -22
  169. package/dist/cli/run.js.map +1 -1
  170. package/dist/cli/types.d.ts +85 -2
  171. package/dist/cli/types.d.ts.map +1 -1
  172. package/dist/cli/types.js.map +1 -1
  173. package/dist/cli/watch.d.ts +15 -0
  174. package/dist/cli/watch.d.ts.map +1 -0
  175. package/dist/cli/watch.js +279 -0
  176. package/dist/cli/watch.js.map +1 -0
  177. package/package.json +5 -1
  178. package/src/cli/agents.ts +177 -0
  179. package/src/cli/baselines.ts +143 -0
  180. package/src/cli/convoy/TELEMETRY.md +203 -0
  181. package/src/cli/convoy/dashboard-types.ts +141 -0
  182. package/src/cli/convoy/engine.test.ts +1937 -70
  183. package/src/cli/convoy/engine.ts +2350 -40
  184. package/src/cli/convoy/event-schemas.ts +195 -0
  185. package/src/cli/convoy/events.test.ts +384 -39
  186. package/src/cli/convoy/events.ts +202 -16
  187. package/src/cli/convoy/expertise.test.ts +128 -0
  188. package/src/cli/convoy/expertise.ts +163 -0
  189. package/src/cli/convoy/export.test.ts +1 -0
  190. package/src/cli/convoy/formula.test.ts +405 -0
  191. package/src/cli/convoy/formula.ts +174 -0
  192. package/src/cli/convoy/gates.test.ts +1169 -0
  193. package/src/cli/convoy/gates.ts +774 -0
  194. package/src/cli/convoy/health.test.ts +64 -2
  195. package/src/cli/convoy/health.ts +80 -2
  196. package/src/cli/convoy/issues.test.ts +143 -0
  197. package/src/cli/convoy/issues.ts +136 -0
  198. package/src/cli/convoy/knowledge.test.ts +101 -0
  199. package/src/cli/convoy/knowledge.ts +132 -0
  200. package/src/cli/convoy/lessons.test.ts +188 -0
  201. package/src/cli/convoy/lessons.ts +164 -0
  202. package/src/cli/convoy/lock.test.ts +181 -0
  203. package/src/cli/convoy/lock.ts +103 -0
  204. package/src/cli/convoy/log-merge.test.ts +179 -0
  205. package/src/cli/convoy/merge.test.ts +6 -7
  206. package/src/cli/convoy/merge.ts +19 -1
  207. package/src/cli/convoy/partition.test.ts +423 -0
  208. package/src/cli/convoy/partition.ts +232 -0
  209. package/src/cli/convoy/pipeline.test.ts +6 -0
  210. package/src/cli/convoy/store.test.ts +2041 -20
  211. package/src/cli/convoy/store.ts +945 -46
  212. package/src/cli/convoy/types.ts +278 -4
  213. package/src/cli/log.ts +120 -2
  214. package/src/cli/run/adapters/claude.test.ts +234 -0
  215. package/src/cli/run/adapters/claude.ts +45 -5
  216. package/src/cli/run/adapters/copilot.test.ts +224 -0
  217. package/src/cli/run/adapters/copilot.ts +34 -4
  218. package/src/cli/run/adapters/cursor.test.ts +144 -0
  219. package/src/cli/run/adapters/cursor.ts +33 -2
  220. package/src/cli/run/adapters/opencode.test.ts +135 -0
  221. package/src/cli/run/adapters/opencode.ts +30 -2
  222. package/src/cli/run/executor.ts +1 -1
  223. package/src/cli/run/schema.test.ts +758 -0
  224. package/src/cli/run/schema.ts +300 -25
  225. package/src/cli/run.ts +341 -21
  226. package/src/cli/types.ts +86 -1
  227. package/src/cli/watch.ts +298 -0
  228. package/src/dashboard/dist/_astro/{index.DtnyD8a5.css → index.6L3_HsPT.css} +1 -1
  229. package/src/dashboard/dist/data/.gitkeep +0 -0
  230. package/src/dashboard/dist/data/convoy-list.json +1 -0
  231. package/src/dashboard/dist/data/overall-stats.json +24 -0
  232. package/src/dashboard/dist/index.html +701 -3
  233. package/src/dashboard/node_modules/.vite/deps/_metadata.json +6 -6
  234. package/src/dashboard/public/data/.gitkeep +0 -0
  235. package/src/dashboard/public/data/convoy-list.json +1 -0
  236. package/src/dashboard/public/data/overall-stats.json +24 -0
  237. package/src/dashboard/scripts/etl.test.ts +210 -0
  238. package/src/dashboard/scripts/etl.ts +108 -0
  239. package/src/dashboard/scripts/integration-test.ts +504 -0
  240. package/src/dashboard/src/pages/index.astro +854 -15
  241. package/src/dashboard/src/styles/dashboard.css +557 -1
  242. package/src/orchestrator/prompts/generate-convoy.prompt.md +212 -13
@@ -1,20 +1,39 @@
1
1
  import { execFile as execFileCb } from 'node:child_process'
2
2
  import { createHash } from 'node:crypto'
3
- import { mkdirSync } from 'node:fs'
3
+ import {
4
+ appendFileSync,
5
+ closeSync,
6
+ existsSync,
7
+ fsyncSync,
8
+ mkdirSync,
9
+ openSync,
10
+ readFileSync,
11
+ renameSync,
12
+ unlinkSync,
13
+ writeFileSync,
14
+ } from 'node:fs'
4
15
  import { dirname, join, resolve } from 'node:path'
16
+ import { DatabaseSync } from 'node:sqlite'
5
17
  import { promisify } from 'node:util'
6
- import type { Task, TaskSpec, AgentAdapter, ExecuteResult } from '../types.js'
7
- import { createConvoyStore, type ConvoyStore } from './store.js'
8
- import { createEventEmitter, type ConvoyEventEmitter } from './events.js'
18
+ import type { Task, TaskSpec, AgentAdapter, ExecuteResult, ReviewHeuristics } from '../types.js'
19
+ import { createConvoyStore, ConvoyArtifactLimitError, type ConvoyStore } from './store.js'
20
+ import { acquireEngineLock } from './lock.js'
21
+ import { createEventEmitter, ndjsonPathForConvoy, recoverNdjson, type ConvoyEventEmitter } from './events.js'
9
22
  import { createWorktreeManager, type WorktreeManager } from './worktree.js'
10
- import { createMergeQueue, type MergeQueue } from './merge.js'
11
- import { createHealthMonitor } from './health.js'
23
+ import { createMergeQueue, MergeConflictError, type MergeQueue } from './merge.js'
24
+ import { createHealthMonitor, detectDrift } from './health.js'
12
25
  import { exportConvoyToNdjson } from './export.js'
13
- import type { TaskRecord, ConvoyStatus } from './types.js'
26
+ import type { TaskRecord, ConvoyStatus, ConvoyTaskStatus, GuardConfig, CircuitBreakerConfig, TaskStep, Hook, TaskOutput, TaskInput } from './types.js'
14
27
  import { buildPhases, formatDuration } from '../run/executor.js'
15
- import { parseTimeout } from '../run/schema.js'
28
+ import { parseTimeout, parseYaml } from '../run/schema.js'
16
29
  import { getAdapter, detectAdapter } from '../run/adapters/index.js'
17
30
  import { c } from '../prompt.js'
31
+ import { validateFilePartitions, scanSymlinks, scanNewSymlinks, normalizePath, pathsOverlap } from './partition.js'
32
+ import { scanForSecrets, runSecretScanGate, runBlastRadiusGate, browserTestGate } from './gates.js'
33
+ import { readLessons, captureLessons, consolidateLessons } from './lessons.js'
34
+ import { updateExpertise, feedCircuitBreaker } from './expertise.js'
35
+ import { buildKnowledgeGraph } from './knowledge.js'
36
+ import { injectDiscoveredIssuesInstruction, checkDiscoveredIssues, consolidateIssues } from './issues.js'
18
37
 
19
38
  const execFile = promisify(execFileCb)
20
39
 
@@ -31,6 +50,10 @@ export interface ConvoyEngineOptions {
31
50
  pipelineId?: string
32
51
  _worktreeManager?: WorktreeManager
33
52
  _mergeQueue?: MergeQueue
53
+ /** Override for test injection. Pass `ensureBranch` for real behavior, or a mock. */
54
+ _ensureBranch?: (branchName: string, basePath: string) => Promise<void>
55
+ /** Injectable for test injection of the review pipeline. */
56
+ _reviewRunner?: (task: TaskRecord, level: ReviewLevel, reviewerModel: string) => Promise<ReviewResult>
34
57
  }
35
58
 
36
59
  export interface ConvoyResult {
@@ -45,9 +68,322 @@ export interface ConvoyResult {
45
68
  export interface ConvoyEngine {
46
69
  run(): Promise<ConvoyResult>
47
70
  resume(convoyId: string): Promise<ConvoyResult>
71
+ retryFailed(convoyId: string, taskIds?: string[]): Promise<void>
72
+ injectTask(convoyId: string, task: {
73
+ id: string
74
+ prompt: string
75
+ agent: string
76
+ phase: number
77
+ timeout_ms?: number
78
+ depends_on?: string[]
79
+ files?: string[]
80
+ max_retries?: number
81
+ provenance?: string
82
+ idempotency_key?: string
83
+ on_exhausted?: 'dlq' | 'skip' | 'stop'
84
+ }): TaskRecord
48
85
  }
49
86
 
50
- // ── Internal helpers ──────────────────────────────────────────────────────────
87
+ // ── Circuit Breaker ────────────────────────────────────────────────────────────
88
+
89
+ export interface CircuitBreakerState {
90
+ status: 'closed' | 'open' | 'half-open'
91
+ failures: number
92
+ last_failure_at: string | null
93
+ opened_at: string | null
94
+ }
95
+
96
+ export class CircuitBreakerManager {
97
+ private states: Map<string, CircuitBreakerState> = new Map()
98
+ private threshold: number
99
+ private cooldownMs: number
100
+ private fallbackAgent: string | null
101
+
102
+ constructor(config?: CircuitBreakerConfig, initialState?: Record<string, CircuitBreakerState>) {
103
+ this.threshold = config?.threshold ?? 3
104
+ this.cooldownMs = config?.cooldown_ms ?? 300_000
105
+ this.fallbackAgent = config?.fallback_agent ?? null
106
+
107
+ if (initialState) {
108
+ for (const [agent, state] of Object.entries(initialState)) {
109
+ this.states.set(agent, state)
110
+ }
111
+ }
112
+ }
113
+
114
+ getState(agent: string): CircuitBreakerState {
115
+ return this.states.get(agent) ?? { status: 'closed', failures: 0, last_failure_at: null, opened_at: null }
116
+ }
117
+
118
+ recordFailure(agent: string): { tripped: boolean; state: CircuitBreakerState } {
119
+ const state = this.getState(agent)
120
+ const now = new Date().toISOString()
121
+
122
+ if (state.status === 'half-open') {
123
+ // Probe failed — back to open, reset cooldown
124
+ state.status = 'open'
125
+ state.opened_at = now
126
+ state.last_failure_at = now
127
+ this.states.set(agent, state)
128
+ return { tripped: true, state }
129
+ }
130
+
131
+ state.failures += 1
132
+ state.last_failure_at = now
133
+
134
+ if (state.failures >= this.threshold) {
135
+ state.status = 'open'
136
+ state.opened_at = now
137
+ this.states.set(agent, state)
138
+ return { tripped: true, state }
139
+ }
140
+
141
+ this.states.set(agent, state)
142
+ return { tripped: false, state }
143
+ }
144
+
145
+ recordSuccess(agent: string): CircuitBreakerState {
146
+ const state = this.getState(agent)
147
+
148
+ if (state.status === 'half-open') {
149
+ // Probe succeeded — close circuit
150
+ state.status = 'closed'
151
+ state.failures = 0
152
+ state.opened_at = null
153
+ } else if (state.status === 'closed') {
154
+ state.failures = 0
155
+ }
156
+
157
+ this.states.set(agent, state)
158
+ return state
159
+ }
160
+
161
+ canAssign(agent: string): boolean {
162
+ const state = this.getState(agent)
163
+
164
+ if (state.status === 'closed') return true
165
+ if (state.status === 'half-open') return true // allow 1 probe
166
+
167
+ // Open — check cooldown
168
+ if (state.opened_at) {
169
+ const elapsed = Date.now() - new Date(state.opened_at).getTime()
170
+ if (elapsed >= this.cooldownMs) {
171
+ state.status = 'half-open'
172
+ this.states.set(agent, state)
173
+ return true
174
+ }
175
+ }
176
+
177
+ return false
178
+ }
179
+
180
+ get fallback(): string | null {
181
+ return this.fallbackAgent
182
+ }
183
+
184
+ serialize(): string {
185
+ return JSON.stringify(Object.fromEntries(this.states))
186
+ }
187
+ }
188
+
189
+ // ── Branch management ───────────────────────────────────────────────────────
190
+
191
+ /**
192
+ * Ensure the given branch exists and is checked out.
193
+ * Creates the branch from HEAD if it does not yet exist.
194
+ * Fails fast if there are uncommitted changes.
195
+ */
196
+ export async function ensureBranch(branchName: string, basePath: string): Promise<void> {
197
+ // Validate refspec — reject shell metacharacters
198
+ if (!/^[a-zA-Z0-9\-/_\.]+$/.test(branchName)) {
199
+ throw new Error(
200
+ `Invalid branch name "${branchName}": only alphanumeric, -, /, _, and . are allowed`,
201
+ )
202
+ }
203
+
204
+ // Refuse to switch branches with uncommitted changes
205
+ const { stdout: statusOut } = await execFile('git', ['status', '--porcelain'], {
206
+ cwd: basePath,
207
+ })
208
+ if (statusOut.trim()) {
209
+ throw new Error(
210
+ `Uncommitted changes detected in "${basePath}". Commit or stash before switching branches.`,
211
+ )
212
+ }
213
+
214
+ // Check if branch already exists
215
+ try {
216
+ await execFile('git', ['rev-parse', '--verify', branchName], { cwd: basePath })
217
+ // Branch exists — check it out
218
+ await execFile('git', ['checkout', branchName], { cwd: basePath })
219
+ } catch {
220
+ // Branch does not exist — create from current HEAD
221
+ await execFile('git', ['checkout', '-b', branchName], { cwd: basePath })
222
+ }
223
+ }
224
+
225
+ // ── Convoy guard ──────────────────────────────────────────────────────────────
226
+
227
+ export interface ConvoyGuardResult {
228
+ passed: boolean
229
+ warnings: string[]
230
+ }
231
+
232
+ export function runConvoyGuard(
233
+ store: ConvoyStore,
234
+ convoyId: string,
235
+ _wtManager: WorktreeManager,
236
+ ndjsonPath: string,
237
+ guardConfig?: GuardConfig,
238
+ ): ConvoyGuardResult {
239
+ // If guard is explicitly disabled, skip all checks
240
+ if (guardConfig?.enabled === false) {
241
+ return { passed: true, warnings: [] }
242
+ }
243
+
244
+ const warnings: string[] = []
245
+ const tasks = store.getTasksByConvoy(convoyId)
246
+
247
+ // Check 1: All task statuses are terminal
248
+ const terminalStatuses = new Set(['done', 'failed', 'skipped', 'timed-out', 'gate-failed', 'review-blocked', 'hook-failed', 'disputed'])
249
+ const nonTerminal = tasks.filter(t => !terminalStatuses.has(t.status))
250
+ if (nonTerminal.length > 0) {
251
+ warnings.push(
252
+ `Non-terminal tasks: ${nonTerminal.map(t => `${t.id}(${t.status})`).join(', ')}`,
253
+ )
254
+ }
255
+
256
+ // Check 2: NDJSON file exists and record count >= completed task count
257
+ const completedTasks = tasks.filter(t => t.status === 'done')
258
+ try {
259
+ const content = readFileSync(ndjsonPath, 'utf8')
260
+ const lines = content.split('\n').filter(l => l.trim())
261
+ // Per-convoy file — all records belong to this convoy, no need to filter by convoy_id
262
+ if (lines.length < completedTasks.length) {
263
+ warnings.push(
264
+ `NDJSON record count (${lines.length}) < completed tasks (${completedTasks.length})`,
265
+ )
266
+ }
267
+ } catch {
268
+ if (completedTasks.length > 0) {
269
+ warnings.push(
270
+ `NDJSON file not found at ${ndjsonPath} but ${completedTasks.length} tasks completed`,
271
+ )
272
+ }
273
+ }
274
+
275
+ // Check 3: Every retried task has events for each attempt
276
+ const retriedTasks = tasks.filter(t => t.retries > 0)
277
+ const events = store.getEvents(convoyId)
278
+ for (const task of retriedTasks) {
279
+ const taskEvents = events.filter(e => e.task_id === task.id && e.type === 'task_started')
280
+ if (taskEvents.length < task.retries) {
281
+ warnings.push(
282
+ `Task ${task.id} has ${task.retries} retries but only ${taskEvents.length} task_started events`,
283
+ )
284
+ }
285
+ }
286
+
287
+ // Check 4: Gate results recorded for all gates that ran
288
+ const gateEvents = events.filter(e => {
289
+ if (e.type === 'built_in_gate_result') return true
290
+ if (e.data == null) return false
291
+ try {
292
+ const parsed = JSON.parse(e.data) as Record<string, unknown>
293
+ return 'gate' in parsed
294
+ } catch {
295
+ return false
296
+ }
297
+ })
298
+ const tasksWithGates = tasks.filter(t => t.gates)
299
+ if (tasksWithGates.length > 0 && gateEvents.length === 0) {
300
+ warnings.push('Tasks have gates configured but no gate result events found')
301
+ }
302
+
303
+ // Check 5: Token/cost totals computed
304
+ const convoy = store.getConvoy(convoyId)
305
+ if (convoy && convoy.total_tokens == null) {
306
+ const totalTokens = tasks.reduce((sum, t) => sum + (t.total_tokens ?? 0), 0)
307
+ if (totalTokens > 0) {
308
+ warnings.push('Convoy total_tokens not persisted despite tasks having token data')
309
+ }
310
+ }
311
+
312
+ // Check 6: No orphaned worktrees — engine already calls removeAll() during cleanup.
313
+ // Synchronous check is not possible; the engine handles this.
314
+
315
+ return { passed: warnings.length === 0, warnings }
316
+ }
317
+
318
+ // ── Review routing ────────────────────────────────────────────────────────────
319
+
320
+ export interface DiffStats {
321
+ linesChanged: number
322
+ filesChanged: number
323
+ filePaths: string[]
324
+ }
325
+
326
+ export type ReviewLevel = 'auto-pass' | 'fast' | 'panel'
327
+
328
+ export interface ReviewResult {
329
+ verdict: 'pass' | 'block'
330
+ feedback: string
331
+ tokens: number
332
+ model: string
333
+ }
334
+
335
+ export function evaluateReviewLevel(
336
+ task: TaskRecord,
337
+ diff: DiffStats,
338
+ heuristics?: ReviewHeuristics,
339
+ allGatesPassed?: boolean,
340
+ ): ReviewLevel {
341
+ const panelPaths = heuristics?.panel_paths ?? ['auth/', 'security/', 'migrations/', 'rls/']
342
+ const panelAgents = heuristics?.panel_agents ?? ['security-expert', 'database-engineer']
343
+ const autoPassAgents = heuristics?.auto_pass_agents ?? ['documentation-writer', 'copywriter']
344
+ const autoPassMaxLines = heuristics?.auto_pass_max_lines ?? 10
345
+ const autoPassMaxFiles = heuristics?.auto_pass_max_files ?? 2
346
+
347
+ // Panel: sensitive paths or agents
348
+ if (panelPaths.some(p => diff.filePaths.some(fp => fp.startsWith(p) || fp.includes('/' + p)))) return 'panel'
349
+ if (panelAgents.includes(task.agent)) return 'panel'
350
+
351
+ // Auto-pass: documentation/copy agents
352
+ if (autoPassAgents.includes(task.agent)) return 'auto-pass'
353
+
354
+ // Auto-pass: small diffs with all gates passing
355
+ if (diff.linesChanged <= autoPassMaxLines && diff.filesChanged <= autoPassMaxFiles && allGatesPassed !== false) return 'auto-pass'
356
+
357
+ // Large diffs → fast review
358
+ if (diff.linesChanged > 200 || diff.filesChanged > 5) return 'fast'
359
+
360
+ // Default → fast review
361
+ return 'fast'
362
+ }
363
+
364
+ class ReviewSemaphore {
365
+ private current = 0
366
+ private queue: Array<() => void> = []
367
+ constructor(private max: number) {}
368
+
369
+ async acquire(): Promise<void> {
370
+ if (this.current < this.max) {
371
+ this.current++
372
+ return
373
+ }
374
+ return new Promise<void>(resolve => {
375
+ this.queue.push(() => { this.current++; resolve() })
376
+ })
377
+ }
378
+
379
+ release(): void {
380
+ this.current--
381
+ if (this.queue.length > 0) {
382
+ const next = this.queue.shift()!
383
+ next()
384
+ }
385
+ }
386
+ }
51
387
 
52
388
  function msToTimeout(ms: number): string {
53
389
  if (ms >= 3_600_000 && ms % 3_600_000 === 0) return `${ms / 3_600_000}h`
@@ -55,6 +391,80 @@ function msToTimeout(ms: number): string {
55
391
  return `${ms / 1_000}s`
56
392
  }
57
393
 
394
+ // ── DLQ markdown dual-write ───────────────────────────────────────────────────
395
+
396
+ // Builds the DLQ markdown entry text (no I/O, no scanning).
397
+ function buildDlqMarkdownEntry(
398
+ dlqId: string,
399
+ task: TaskRecord,
400
+ failureType: string,
401
+ errorOutput: string | null,
402
+ ): { marker: string; entry: string } {
403
+ const marker = `<!-- dlq:${dlqId} -->`
404
+ const entry = `\n${marker}\n### ${dlqId}\n\n| Field | Value |\n|-------|-------|\n| Task | ${task.id} |\n| Agent | ${task.agent} |\n| Type | ${failureType} |\n| Attempts | ${task.retries + 1} |\n| Date | ${new Date().toISOString()} |\n\n**Error:**\n\`\`\`\n${(errorOutput ?? '(no output)').slice(0, 2000)}\n\`\`\`\n`
405
+ return { marker, entry }
406
+ }
407
+
408
+ // Appends a pre-scanned DLQ entry to AGENT-FAILURES.md. The caller must have
409
+ // already verified the entry is clean via scanForSecrets — no re-scan here.
410
+ function appendDlqMarkdownClean(marker: string, entry: string): void {
411
+ const mdPath = join(resolve(process.cwd()), '.opencastle', 'AGENT-FAILURES.md')
412
+ try {
413
+ const existing = readFileSync(mdPath, 'utf8')
414
+ if (existing.includes(marker)) return
415
+ } catch {
416
+ // File doesn't exist yet — will create
417
+ }
418
+ mkdirSync(dirname(mdPath), { recursive: true })
419
+ appendFileSync(mdPath, entry)
420
+ }
421
+
422
+ function writeDisputeToMarkdown(
423
+ disputeId: string,
424
+ convoyId: string,
425
+ task: TaskRecord,
426
+ panelResults: ReviewResult[],
427
+ events?: ConvoyEventEmitter | null,
428
+ ): void {
429
+ const mdPath = join(resolve(process.cwd()), 'DISPUTES.md')
430
+ const marker = `<!-- dispute:${disputeId} -->`
431
+
432
+ try {
433
+ const existing = readFileSync(mdPath, 'utf8')
434
+ if (existing.includes(marker)) return
435
+ } catch {
436
+ // File doesn't exist yet
437
+ }
438
+
439
+ const blockingReasons = panelResults
440
+ .filter(r => r.verdict === 'block')
441
+ .map(r => r.feedback)
442
+ .join('\n\n')
443
+
444
+ const entry = `\n${marker}\n## Dispute: ${task.id}\n\n| Field | Value |\n|-------|-------|\n| Convoy | ${convoyId} |\n| Task | ${task.id} |\n| Date | ${new Date().toISOString()} |\n| Panel attempts | ${task.panel_attempts + 1} |\n| Agent | ${task.agent} |\n| Status | Open |\n\n**Blocking reasons:**\n\n${blockingReasons}\n`
445
+
446
+ const scanResult = scanForSecrets(entry, 'DISPUTES.md')
447
+ if (!scanResult.clean) {
448
+ if (events) {
449
+ events.emit(
450
+ 'secret_leak_prevented',
451
+ {
452
+ task_id: task.id,
453
+ findings_count: scanResult.findings.length,
454
+ patterns: scanResult.findings.map((f) => f.pattern),
455
+ context: 'dispute_markdown_write',
456
+ },
457
+ { convoy_id: convoyId, task_id: task.id },
458
+ )
459
+ }
460
+ return
461
+ }
462
+
463
+ appendFileSync(mdPath, entry)
464
+ }
465
+
466
+
467
+
58
468
  function taskRecordToTask(record: TaskRecord): Task {
59
469
  return {
60
470
  id: record.id,
@@ -67,6 +477,7 @@ function taskRecordToTask(record: TaskRecord): Task {
67
477
  model: record.model ?? undefined,
68
478
  max_retries: record.max_retries,
69
479
  adapter: record.adapter ?? undefined,
480
+ gates: record.gates ? (JSON.parse(record.gates) as string[]) : undefined,
70
481
  }
71
482
  }
72
483
 
@@ -81,6 +492,354 @@ function makeTimeoutPromise(ms: number): { promise: Promise<ExecuteResult>; clea
81
492
  return { promise, clear: () => { if (timerId !== undefined) clearTimeout(timerId) } }
82
493
  }
83
494
 
495
+ // ── Step condition evaluation ─────────────────────────────────────────────────
496
+
497
+ function evaluateStepCondition(
498
+ condition: TaskStep['if'],
499
+ stepResults: Map<string, { exitCode: number }>,
500
+ worktreePath: string | null,
501
+ basePath: string,
502
+ ): boolean {
503
+ if (!condition) return true
504
+
505
+ if (condition.exitCode) {
506
+ const prevResult = stepResults.get(condition.step)
507
+ if (!prevResult) return false
508
+ const code = prevResult.exitCode
509
+ const ec = condition.exitCode
510
+ if (ec.eq !== undefined && code !== ec.eq) return false
511
+ if (ec.ne !== undefined && code === ec.ne) return false
512
+ if (ec.gt !== undefined && !(code > ec.gt)) return false
513
+ if (ec.lt !== undefined && !(code < ec.lt)) return false
514
+ }
515
+
516
+ if (condition.fileExists) {
517
+ const base = worktreePath ?? basePath
518
+ if (condition.fileExists.path.startsWith('/')) {
519
+ return false // Absolute paths not allowed in step conditions
520
+ }
521
+ const filePath = join(base, condition.fileExists.path)
522
+ const resolved = resolve(filePath)
523
+ const resolvedBase = resolve(base)
524
+ if (!resolved.startsWith(resolvedBase + '/') && resolved !== resolvedBase) {
525
+ return false // path escapes the worktree — treat as "file doesn't exist"
526
+ }
527
+ if (!existsSync(filePath)) return false
528
+ }
529
+
530
+ return true
531
+ }
532
+
533
+ async function executeSteps(
534
+ taskRecord: TaskRecord,
535
+ steps: TaskStep[],
536
+ adapter: AgentAdapter,
537
+ worktreePath: string | null,
538
+ basePath: string,
539
+ store: ConvoyStore,
540
+ convoyId: string,
541
+ verbose: boolean,
542
+ ): Promise<ExecuteResult> {
543
+ const now = () => new Date().toISOString()
544
+ const stepResults = new Map<string, { exitCode: number }>()
545
+ let combinedOutput = ''
546
+ let lastExitCode = 0
547
+
548
+ // Track total_steps in DB
549
+ store.updateTaskStatus(taskRecord.id, convoyId, 'running', {})
550
+
551
+ for (let i = 0; i < steps.length; i++) {
552
+ const step = steps[i]
553
+
554
+ // Evaluate condition — skip step if condition is not met
555
+ if (step.if) {
556
+ const condMet = evaluateStepCondition(step.if, stepResults, worktreePath, basePath)
557
+ if (!condMet) {
558
+ const stepId = store.insertTaskStep({
559
+ task_id: taskRecord.id,
560
+ step_index: i,
561
+ prompt: step.prompt,
562
+ gates: step.gates ? JSON.stringify(step.gates) : null,
563
+ status: 'skipped',
564
+ exit_code: null,
565
+ output: 'Skipped: condition not met',
566
+ started_at: now(),
567
+ finished_at: now(),
568
+ })
569
+ if (step.id) {
570
+ stepResults.set(step.id, { exitCode: 0 })
571
+ }
572
+ combinedOutput += `\n[Step ${i + 1} skipped: condition not met]`
573
+ continue
574
+ }
575
+ }
576
+
577
+ // Insert step record as running
578
+ const stepDbId = store.insertTaskStep({
579
+ task_id: taskRecord.id,
580
+ step_index: i,
581
+ prompt: step.prompt,
582
+ gates: step.gates ? JSON.stringify(step.gates) : null,
583
+ status: 'running',
584
+ exit_code: null,
585
+ output: null,
586
+ started_at: now(),
587
+ finished_at: null,
588
+ })
589
+
590
+ // Update current_step on the task record
591
+ store.updateTaskStatus(taskRecord.id, convoyId, 'running', {})
592
+
593
+ const stepMaxRetries = step.max_retries ?? taskRecord.max_retries
594
+ let stepResult: ExecuteResult = { success: false, output: '', exitCode: -1 }
595
+ let stepAttempt = 0
596
+
597
+ while (stepAttempt <= stepMaxRetries) {
598
+ // Prepend prior failure context on retries
599
+ let stepPrompt = step.prompt
600
+ if (stepAttempt > 0 && stepResult) {
601
+ const failedOutput = stepResult.output || '(no output)'
602
+ stepPrompt = `Previous attempt failed.\nExit code: ${stepResult.exitCode}\nError output:\n${failedOutput}\n\nFix the issues and try again.\n\n` + step.prompt
603
+ }
604
+
605
+ const stepTask = {
606
+ id: taskRecord.id,
607
+ prompt: stepPrompt,
608
+ agent: taskRecord.agent,
609
+ timeout: `${taskRecord.timeout_ms}ms`,
610
+ depends_on: [],
611
+ files: taskRecord.files ? JSON.parse(taskRecord.files) as string[] : [],
612
+ description: `step ${i + 1}`,
613
+ max_retries: stepMaxRetries,
614
+ }
615
+
616
+ try {
617
+ stepResult = await adapter.execute(stepTask, { verbose, cwd: worktreePath ?? basePath })
618
+ } catch (err) {
619
+ stepResult = { success: false, output: (err as Error).message, exitCode: -1 }
620
+ }
621
+
622
+ if (stepResult.success) break
623
+
624
+ stepAttempt++
625
+ if (stepAttempt <= stepMaxRetries) {
626
+ process.stdout.write(` ↺ step ${i + 1}/${steps.length} failed, retry ${stepAttempt}/${stepMaxRetries}\n`)
627
+ }
628
+ }
629
+
630
+ lastExitCode = stepResult.exitCode
631
+ combinedOutput += `\n[Step ${i + 1}]\n${stepResult.output}`
632
+
633
+ if (step.id) {
634
+ stepResults.set(step.id, { exitCode: stepResult.exitCode })
635
+ }
636
+
637
+ // Run step-level gates if present
638
+ if (step.gates && step.gates.length > 0 && stepResult.success) {
639
+ let gateFailure: { command: string; exitCode: number; output: string } | null = null
640
+ const execFileCb = (await import('node:child_process')).execFile
641
+ const execFileP = (await import('node:util')).promisify(execFileCb)
642
+ for (const command of step.gates) {
643
+ try {
644
+ // SECURITY: Gate/hook commands come from the .convoy.yml spec file, which is operator-controlled.
645
+ // They are NOT user-supplied and are part of the trusted build configuration.
646
+ await execFileP('sh', ['-c', command], { cwd: worktreePath ?? basePath })
647
+ } catch (gateErr) {
648
+ const ge = gateErr as Error & { code?: unknown; stderr?: string; stdout?: string }
649
+ const code = typeof ge.code === 'number' ? ge.code : 1
650
+ const output = ge.stderr || ge.stdout || ge.message || ''
651
+ gateFailure = { command, exitCode: code, output }
652
+ break
653
+ }
654
+ }
655
+ if (gateFailure !== null) {
656
+ stepResult = { success: false, output: `Gate failed: ${gateFailure.command}\nExit code: ${gateFailure.exitCode}\n${gateFailure.output}`, exitCode: gateFailure.exitCode }
657
+ lastExitCode = gateFailure.exitCode
658
+ combinedOutput += `\n[Step ${i + 1} gate failed: ${gateFailure.command}]`
659
+ }
660
+ }
661
+
662
+ // Update step record
663
+ store.updateTaskStep(stepDbId, {
664
+ status: stepResult.success ? 'done' : 'failed',
665
+ exit_code: stepResult.exitCode,
666
+ output: stepResult.output,
667
+ finished_at: now(),
668
+ })
669
+
670
+ if (!stepResult.success) {
671
+ return {
672
+ success: false,
673
+ output: combinedOutput.trim(),
674
+ exitCode: lastExitCode,
675
+ }
676
+ }
677
+ }
678
+
679
+ return {
680
+ success: true,
681
+ output: combinedOutput.trim(),
682
+ exitCode: lastExitCode,
683
+ }
684
+ }
685
+
686
+ // ── File-based injection ──────────────────────────────────────────────────────
687
+
688
+ const INJECT_DIR = '.opencastle/convoy-inject'
689
+ const CONVOY_ID_RE = /^[a-zA-Z0-9-]+$/
690
+ const MAX_FILE_INJECTED_TASKS = 10
691
+
692
+ function pollInjectFile(
693
+ convoyId: string,
694
+ store: ConvoyStore,
695
+ events: ConvoyEventEmitter,
696
+ basePath: string,
697
+ ): number {
698
+ // Path traversal guard: convoy_id must be alphanumeric + hyphens only
699
+ if (!CONVOY_ID_RE.test(convoyId)) return 0
700
+
701
+ const injectDir = join(basePath, INJECT_DIR, convoyId)
702
+ const injectPath = join(injectDir, 'inject.yml')
703
+
704
+ if (!existsSync(injectPath)) return 0
705
+
706
+ // Atomic rename to prevent double-read
707
+ const processingPath = injectPath + '.processing'
708
+ try {
709
+ renameSync(injectPath, processingPath)
710
+ } catch {
711
+ return 0 // Another process may have grabbed it
712
+ }
713
+
714
+ let raw: string
715
+ try {
716
+ raw = readFileSync(processingPath, 'utf8')
717
+ } catch {
718
+ return 0
719
+ } finally {
720
+ try { unlinkSync(processingPath) } catch { /* ignore */ }
721
+ }
722
+
723
+ let parsed: Record<string, unknown>
724
+ try {
725
+ parsed = parseYaml(raw)
726
+ if (!parsed || typeof parsed !== 'object' || !Array.isArray(parsed.tasks)) {
727
+ process.stderr.write(`Warning: inject file has invalid format (expected { tasks: [...] })\n`)
728
+ return 0
729
+ }
730
+ } catch (err) {
731
+ process.stderr.write(`Warning: failed to parse inject file: ${(err as Error).message}\n`)
732
+ return 0
733
+ }
734
+
735
+ const tasks = parsed.tasks as Array<Record<string, unknown>>
736
+ const allExisting = store.getTasksByConvoy(convoyId)
737
+ const existingFileInjected = allExisting.filter(t => t.provenance === 'file-injection').length
738
+ const remaining = MAX_FILE_INJECTED_TASKS - existingFileInjected
739
+ let injectedCount = 0
740
+
741
+ for (const rawTask of tasks) {
742
+ if (injectedCount >= remaining) {
743
+ process.stderr.write(`Warning: file injection limit reached (${MAX_FILE_INJECTED_TASKS}), skipping remaining tasks\n`)
744
+ break
745
+ }
746
+
747
+ // Validate required fields
748
+ if (!rawTask.id || typeof rawTask.id !== 'string') {
749
+ process.stderr.write(`Warning: skipping injected task with missing/invalid id\n`)
750
+ continue
751
+ }
752
+ if (!rawTask.prompt || typeof rawTask.prompt !== 'string') {
753
+ process.stderr.write(`Warning: skipping injected task "${rawTask.id}": missing prompt\n`)
754
+ continue
755
+ }
756
+ if (!rawTask.agent || typeof rawTask.agent !== 'string') {
757
+ process.stderr.write(`Warning: skipping injected task "${rawTask.id}": missing agent\n`)
758
+ continue
759
+ }
760
+
761
+ // Check ID uniqueness
762
+ if (allExisting.some(t => t.id === rawTask.id as string)) {
763
+ process.stderr.write(`Warning: skipping injected task "${rawTask.id}": ID already exists\n`)
764
+ continue
765
+ }
766
+
767
+ // Determine phase — inject into last scheduled phase
768
+ const maxPhase = allExisting.reduce((max, t) => Math.max(max, t.phase), 0)
769
+
770
+ // Validate file paths before building the record
771
+ let validatedFiles: string | null = null
772
+ if (rawTask.files && Array.isArray(rawTask.files)) {
773
+ try {
774
+ validatedFiles = JSON.stringify((rawTask.files as string[]).map(f => normalizePath(f as string)))
775
+ } catch (err) {
776
+ process.stderr.write(`Warning: skipping injected task "${rawTask.id as string}": invalid file path: ${(err as Error).message}\n`)
777
+ continue
778
+ }
779
+ }
780
+
781
+ const record: TaskRecord = {
782
+ id: rawTask.id as string,
783
+ convoy_id: convoyId,
784
+ phase: maxPhase,
785
+ prompt: rawTask.prompt as string,
786
+ agent: rawTask.agent as string,
787
+ adapter: null,
788
+ model: null,
789
+ timeout_ms: typeof rawTask.timeout_ms === 'number' ? rawTask.timeout_ms : 1_800_000,
790
+ status: 'pending',
791
+ worker_id: null,
792
+ worktree: null,
793
+ output: null,
794
+ exit_code: null,
795
+ started_at: null,
796
+ finished_at: null,
797
+ retries: 0,
798
+ max_retries: typeof rawTask.max_retries === 'number' ? rawTask.max_retries : 1,
799
+ files: validatedFiles,
800
+ depends_on: null,
801
+ prompt_tokens: null,
802
+ completion_tokens: null,
803
+ total_tokens: null,
804
+ cost_usd: null,
805
+ gates: null,
806
+ on_exhausted: 'dlq',
807
+ injected: 1,
808
+ provenance: 'file-injection',
809
+ idempotency_key: null,
810
+ current_step: null,
811
+ total_steps: null,
812
+ review_level: null,
813
+ review_verdict: null,
814
+ review_tokens: null,
815
+ review_model: null,
816
+ panel_attempts: 0,
817
+ dispute_id: null,
818
+ drift_score: null,
819
+ drift_retried: 0,
820
+ outputs: null,
821
+ inputs: null,
822
+ discovered_issues: null,
823
+ }
824
+
825
+ try {
826
+ store.insertInjectedTask(record)
827
+ injectedCount++
828
+ } catch (err) {
829
+ process.stderr.write(`Warning: failed to inject task "${rawTask.id}": ${(err as Error).message}\n`)
830
+ }
831
+ }
832
+
833
+ if (injectedCount > 0) {
834
+ events.emit('file_injection_received', {
835
+ task_count: injectedCount,
836
+ source: injectPath,
837
+ }, { convoy_id: convoyId })
838
+ }
839
+
840
+ return injectedCount
841
+ }
842
+
84
843
  // ── Core convoy execution ─────────────────────────────────────────────────────
85
844
 
86
845
  async function runConvoy(
@@ -95,10 +854,14 @@ async function runConvoy(
95
854
  baseBranch: string,
96
855
  verbose: boolean,
97
856
  startTime: number,
857
+ ndjsonPath: string,
858
+ reviewRunner?: (task: TaskRecord, level: ReviewLevel, reviewerModel: string) => Promise<ReviewResult>,
98
859
  ): Promise<ConvoyResult> {
99
860
  const totalTasks = spec.tasks?.length ?? 0
100
861
  let completedCount = 0
101
862
  const activeTaskMap = new Map<string, Task>()
863
+ const reviewSemaphore = new ReviewSemaphore(spec.defaults?.max_concurrent_reviews ?? 3)
864
+ let reviewTokensTotal = 0
102
865
  const taskAdapterMap = new Map<string, AgentAdapter>()
103
866
 
104
867
  const healthMonitor = createHealthMonitor({
@@ -117,6 +880,19 @@ async function runConvoy(
117
880
  })
118
881
  healthMonitor.start()
119
882
 
883
+ // ── Circuit breaker ────────────────────────────────────────────────────────
884
+ const circuitBreakerConfig = spec.defaults?.circuit_breaker
885
+ const convoyRecord = store.getConvoy(convoyId)
886
+ const initialCircuitState = convoyRecord?.circuit_state ? JSON.parse(convoyRecord.circuit_state) : undefined
887
+ const circuitBreaker = new CircuitBreakerManager(circuitBreakerConfig, initialCircuitState)
888
+
889
+ // ── Trust model ────────────────────────────────────────────────────────────
890
+ // Gate commands, hook commands, and step commands in .convoy.yml are treated
891
+ // as operator-controlled build configuration (analogous to Makefiles, CI
892
+ // configs, or package.json scripts). They are executed via sh -c and must
893
+ // NOT contain user-supplied input. The spec file itself is the trust boundary.
894
+ // ──────────────────────────────────────────────────────────────────────────
895
+
120
896
  // ── Task skipping ─────────────────────────────────────────────────────────
121
897
 
122
898
  function skipTask(taskId: string, reason: string, visited: Set<string> = new Set()): void {
@@ -153,6 +929,147 @@ async function runConvoy(
153
929
  }
154
930
  }
155
931
 
932
+ function handleExhaustion(taskRecord: TaskRecord, failureType: string, errorOutput: string | null): void {
933
+ const exhausted = taskRecord.on_exhausted ?? 'dlq'
934
+
935
+ if (exhausted === 'dlq' || exhausted === 'stop') {
936
+ const dlqId = `dlq-${taskRecord.id}-${Date.now()}`
937
+
938
+ // Pre-scan: build the markdown entry and check for secrets BEFORE any
939
+ // writes. This keeps the SQLite DLQ row and the Markdown file in sync —
940
+ // either both are written or neither is (MF-2 dual-write atomicity).
941
+ const { marker: dlqMarker, entry: dlqMdEntry } = buildDlqMarkdownEntry(
942
+ dlqId,
943
+ taskRecord,
944
+ failureType,
945
+ errorOutput,
946
+ )
947
+ const dlqScanResult = scanForSecrets(dlqMdEntry, 'AGENT-FAILURES.md')
948
+
949
+ if (!dlqScanResult.clean) {
950
+ // Block BOTH writes to maintain consistent state
951
+ events.emit(
952
+ 'secret_leak_prevented',
953
+ {
954
+ task_id: taskRecord.id,
955
+ findings_count: dlqScanResult.findings.length,
956
+ patterns: dlqScanResult.findings.map((f) => f.pattern),
957
+ context: 'dlq_dual_write',
958
+ },
959
+ { convoy_id: convoyId, task_id: taskRecord.id },
960
+ )
961
+ } else {
962
+ // Clean — proceed with both writes atomically
963
+ store.insertDlqEntry({
964
+ id: dlqId,
965
+ convoy_id: convoyId,
966
+ task_id: taskRecord.id,
967
+ agent: taskRecord.agent,
968
+ failure_type: failureType,
969
+ error_output: errorOutput,
970
+ attempts: taskRecord.retries + 1,
971
+ tokens_spent: taskRecord.total_tokens,
972
+ escalation_task_id: null,
973
+ resolved: 0,
974
+ resolution: null,
975
+ created_at: new Date().toISOString(),
976
+ resolved_at: null,
977
+ })
978
+ appendDlqMarkdownClean(dlqMarker, dlqMdEntry)
979
+ events.emit('dlq_entry_created', {
980
+ dlq_id: dlqId,
981
+ task_id: taskRecord.id,
982
+ agent: taskRecord.agent,
983
+ failure_type: failureType,
984
+ }, { convoy_id: convoyId, task_id: taskRecord.id })
985
+ }
986
+ }
987
+
988
+ if (exhausted === 'stop') {
989
+ // Skip all remaining pending tasks + set convoy to failed
990
+ const allPending = store.getTasksByConvoy(convoyId).filter(t => t.status === 'pending')
991
+ for (const t of allPending) {
992
+ skipTask(t.id, `on_exhausted: stop — task "${taskRecord.id}" exhausted retries`)
993
+ }
994
+ store.updateConvoyStatus(convoyId, 'failed')
995
+ events.emit('convoy_failed', { status: 'failed', reason: `on_exhausted: stop — task "${taskRecord.id}" exhausted retries` }, { convoy_id: convoyId })
996
+ } else if (exhausted === 'dlq' || exhausted === 'skip') {
997
+ // Default behavior: cascade failure to dependents only
998
+ cascadeFailure(taskRecord.id)
999
+ }
1000
+
1001
+ // ── Circuit breaker: record exhaustion failure ──────────────────────────
1002
+ if (circuitBreakerConfig) {
1003
+ const { tripped } = circuitBreaker.recordFailure(taskRecord.agent)
1004
+ try { store.updateConvoyCircuitState(convoyId, circuitBreaker.serialize()) } catch { /* non-critical */ }
1005
+ if (tripped) {
1006
+ events.emit('circuit_breaker_tripped', {
1007
+ agent: taskRecord.agent,
1008
+ state: circuitBreaker.getState(taskRecord.agent),
1009
+ }, { convoy_id: convoyId, task_id: taskRecord.id })
1010
+ }
1011
+ }
1012
+ }
1013
+
1014
+ // ── Hook execution ────────────────────────────────────────────────────────
1015
+
1016
+ async function runHooks(
1017
+ hooks: Hook[],
1018
+ lifecycle: 'pre_task' | 'post_task' | 'post_convoy',
1019
+ context: { taskId?: string; convoyId: string; cwd: string },
1020
+ ): Promise<{ passed: boolean; failedHook?: Hook; error?: string }> {
1021
+ const filtered = hooks.filter(h => (h.on ?? 'post_task') === lifecycle)
1022
+ for (const hook of filtered) {
1023
+ if (hook.type === 'command' || hook.type === 'guard' || hook.type === 'validate') {
1024
+ const cmd = hook.command
1025
+ if (!cmd) continue
1026
+ try {
1027
+ // SECURITY: Gate/hook commands come from the .convoy.yml spec file, which is operator-controlled.
1028
+ // They are NOT user-supplied and are part of the trusted build configuration.
1029
+ await execFile('sh', ['-c', cmd], { cwd: context.cwd })
1030
+ } catch (err) {
1031
+ const execErr = err as Error & { stderr?: string; stdout?: string }
1032
+ const errorMsg = execErr.stderr || execErr.stdout || execErr.message || ''
1033
+ return { passed: false, failedHook: hook, error: errorMsg }
1034
+ }
1035
+ } else if (hook.type === 'agent') {
1036
+ if (!hook.prompt) continue
1037
+ const hookTask: Task = {
1038
+ id: `hook-${lifecycle}-${context.taskId ?? 'convoy'}-${Date.now()}`,
1039
+ prompt: hook.prompt,
1040
+ agent: hook.name ?? 'developer',
1041
+ timeout: '10m',
1042
+ depends_on: [],
1043
+ files: [],
1044
+ description: `Hook: ${hook.name ?? hook.type}`,
1045
+ max_retries: 0,
1046
+ }
1047
+ try {
1048
+ const hookResult = await adapter.execute(hookTask, { verbose, cwd: context.cwd })
1049
+ if (!hookResult.success) {
1050
+ return { passed: false, failedHook: hook, error: hookResult.output }
1051
+ }
1052
+ } catch (err) {
1053
+ return { passed: false, failedHook: hook, error: (err as Error).message }
1054
+ }
1055
+ } else if (hook.type === 'review') {
1056
+ if (!context.taskId || !reviewRunner) continue
1057
+ const reviewTaskRecord = store.getTask(context.taskId, context.convoyId)
1058
+ if (reviewTaskRecord) {
1059
+ const reviewResult = await reviewRunner(
1060
+ reviewTaskRecord,
1061
+ 'fast',
1062
+ spec.defaults?.reviewer_model ?? 'default',
1063
+ )
1064
+ if (reviewResult.verdict !== 'pass') {
1065
+ return { passed: false, failedHook: hook, error: reviewResult.feedback }
1066
+ }
1067
+ }
1068
+ }
1069
+ }
1070
+ return { passed: true }
1071
+ }
1072
+
156
1073
  // ── Single-task executor ──────────────────────────────────────────────────
157
1074
 
158
1075
  async function executeOneTask(taskRecord: TaskRecord): Promise<void> {
@@ -173,6 +1090,68 @@ async function runConvoy(
173
1090
  }
174
1091
  taskAdapterMap.set(taskRecord.id, taskAdapter)
175
1092
 
1093
+ // ── Check inputs availability ────────────────────────────────────────────
1094
+ if (taskRecord.inputs) {
1095
+ const inputs: TaskInput[] = JSON.parse(taskRecord.inputs)
1096
+ for (const input of inputs) {
1097
+ const artifact = store.getArtifact(convoyId, input.name)
1098
+ if (!artifact) {
1099
+ store.updateTaskStatus(taskRecord.id, convoyId, 'wait-for-input')
1100
+ events.emit('task_waiting_input', {
1101
+ task_id: taskRecord.id,
1102
+ missing_artifact: input.name,
1103
+ from_task: input.from,
1104
+ }, { convoy_id: convoyId, task_id: taskRecord.id })
1105
+ taskAdapterMap.delete(taskRecord.id)
1106
+ return
1107
+ }
1108
+ }
1109
+ }
1110
+
1111
+ // ── Circuit breaker check ──────────────────────────────────────────────
1112
+ if (circuitBreakerConfig) {
1113
+ if (!circuitBreaker.canAssign(taskRecord.agent)) {
1114
+ const fallback = circuitBreaker.fallback
1115
+ if (fallback) {
1116
+ events.emit('circuit_breaker_fallback', {
1117
+ original_agent: taskRecord.agent,
1118
+ fallback_agent: fallback,
1119
+ state: circuitBreaker.getState(taskRecord.agent),
1120
+ }, { convoy_id: convoyId, task_id: taskRecord.id })
1121
+ } else {
1122
+ events.emit('circuit_breaker_blocked', {
1123
+ agent: taskRecord.agent,
1124
+ state: circuitBreaker.getState(taskRecord.agent),
1125
+ }, { convoy_id: convoyId, task_id: taskRecord.id })
1126
+ }
1127
+ store.updateTaskStatus(taskRecord.id, convoyId, 'skipped', {
1128
+ output: `Circuit breaker open for agent "${taskRecord.agent}". ${fallback ? `No fallback available.` : `No fallback configured.`}`,
1129
+ })
1130
+ completedCount++
1131
+ taskAdapterMap.delete(taskRecord.id)
1132
+ cascadeFailure(taskRecord.id)
1133
+ return
1134
+ }
1135
+ }
1136
+
1137
+ // ── Intelligence: circuit breaker weak-area avoidance (Phase 18.2) ─────
1138
+ if (spec.defaults?.avoid_weak_agents) {
1139
+ try {
1140
+ const weakAreas = feedCircuitBreaker(taskRecord.agent, basePath)
1141
+ const taskFiles = taskRecord.files ? JSON.parse(taskRecord.files) as string[] : []
1142
+ const matchesWeakArea = weakAreas.some(area =>
1143
+ taskFiles.some(f => f.toLowerCase().includes(area.toLowerCase()))
1144
+ )
1145
+ if (matchesWeakArea && taskRecord.retries === 0) {
1146
+ events.emit('weak_area_skipped', { agent: taskRecord.agent, weak_areas: weakAreas, task_files: taskFiles }, { convoy_id: convoyId, task_id: taskRecord.id })
1147
+ store.updateTaskStatus(taskRecord.id, convoyId, 'skipped', { output: `Agent "${taskRecord.agent}" has weak-area match for task files. Skipped by avoid_weak_agents policy.` })
1148
+ completedCount++
1149
+ taskAdapterMap.delete(taskRecord.id)
1150
+ return
1151
+ }
1152
+ } catch { /* non-critical */ }
1153
+ }
1154
+
176
1155
  // Create worktree (skip for copilot adapter)
177
1156
  let worktreePath: string | null = null
178
1157
  if (taskAdapter.name !== 'copilot') {
@@ -209,21 +1188,166 @@ async function runConvoy(
209
1188
  const task = taskRecordToTask(taskRecord)
210
1189
  activeTaskMap.set(taskRecord.id, task)
211
1190
 
212
- process.stdout.write(` ${c.cyan('▶')} ${c.bold(`[${taskRecord.id}]`)} ${taskRecord.agent}${worktreePath ? c.dim(' (worktree)') : ''}\n`)
213
- events.emit(
214
- 'task_started',
215
- { worker_id: workerId },
216
- { convoy_id: convoyId, task_id: taskRecord.id, worker_id: workerId },
217
- )
1191
+ // ── Inject inputs into prompt ────────────────────────────────────────────
1192
+ if (taskRecord.inputs) {
1193
+ const inputs: TaskInput[] = JSON.parse(taskRecord.inputs)
1194
+ for (const input of inputs) {
1195
+ const artifact = store.getArtifact(convoyId, input.name)!
1196
+ const templateVar = input.as ?? input.name
1197
+ task.prompt = task.prompt.replaceAll(`{{input.${templateVar}}}`, artifact.content)
1198
+ }
1199
+ }
1200
+
1201
+ // ── Scratchpad template substitution (Phase 17.1) ───────────────────────
1202
+ const scratchpadRe = /\{\{scratchpad\.([a-zA-Z0-9_.-]+)\}\}/g
1203
+ let scratchpadMatch: RegExpExecArray | null
1204
+ while ((scratchpadMatch = scratchpadRe.exec(task.prompt)) !== null) {
1205
+ const spKey = scratchpadMatch[1]
1206
+ const spVal = store.getScratchpadValue(spKey)
1207
+ if (spVal !== null) {
1208
+ task.prompt = task.prompt.replaceAll(`{{scratchpad.${spKey}}}`, spVal)
1209
+ scratchpadRe.lastIndex = 0 // reset after replaceAll
1210
+ }
1211
+ }
1212
+
1213
+ process.stdout.write(` ${c.cyan('▶')} ${c.bold(`[${taskRecord.id}]`)} ${taskRecord.agent}${worktreePath ? c.dim(' (worktree)') : ''}\n`)
1214
+ events.emit(
1215
+ 'task_started',
1216
+ { worker_id: workerId },
1217
+ { convoy_id: convoyId, task_id: taskRecord.id, worker_id: workerId },
1218
+ )
1219
+
1220
+ const taskStartTime = Date.now()
1221
+
1222
+ // ── Outbound prompt scan — NEVER send a prompt containing secrets ─────────
1223
+ const promptScan = scanForSecrets(taskRecord.prompt, `task:${taskRecord.id}`)
1224
+ if (!promptScan.clean) {
1225
+ store.updateTaskStatus(taskRecord.id, convoyId, 'failed', {
1226
+ finished_at: now(),
1227
+ output: `Secret detected in prompt — task blocked before execution.\nFindings:\n${
1228
+ promptScan.findings
1229
+ .map((f) => ` ${f.pattern} at line ${f.line}: ${f.snippet}`)
1230
+ .join('\n')
1231
+ }`,
1232
+ })
1233
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: now() })
1234
+ completedCount++
1235
+ events.emit(
1236
+ 'secret_leak_prevented',
1237
+ {
1238
+ task_id: taskRecord.id,
1239
+ findings_count: promptScan.findings.length,
1240
+ patterns: promptScan.findings.map((f) => f.pattern),
1241
+ },
1242
+ { convoy_id: convoyId, task_id: taskRecord.id },
1243
+ )
1244
+ cascadeFailure(taskRecord.id)
1245
+ taskAdapterMap.delete(taskRecord.id)
1246
+ return
1247
+ }
1248
+
1249
+ const timeout = makeTimeoutPromise(taskRecord.timeout_ms)
1250
+ let result: ExecuteResult
1251
+
1252
+ // Retrieve steps from spec if defined
1253
+ const specTask = (spec.tasks ?? []).find(t => t.id === taskRecord.id)
1254
+ const steps: TaskStep[] | undefined = specTask?.steps
1255
+ const taskHooks: Hook[] = specTask?.hooks ?? []
1256
+
1257
+ // ── Intelligence: inject lessons (Phase 18.1) ─────────────────────────
1258
+ if (spec.defaults?.inject_lessons !== false) {
1259
+ try {
1260
+ const taskFiles = taskRecord.files ? JSON.parse(taskRecord.files) as string[] : []
1261
+ const lessons = readLessons(taskRecord.agent, taskFiles, basePath)
1262
+ if (lessons.length > 0) {
1263
+ const lessonsBlock
1264
+ = '\n\n---\nRelevant lessons from previous sessions:\n'
1265
+ + lessons.join('\n\n')
1266
+ + '\n---\n\n'
1267
+ task.prompt = lessonsBlock + task.prompt
1268
+ }
1269
+ } catch { /* non-critical */ }
1270
+ }
1271
+ // ── Intelligence: inject persistent agent identity (Phase 17.2) ────────
1272
+ const specTaskForPersistent = (spec.tasks ?? []).find(t => t.id === taskRecord.id)
1273
+ if (specTaskForPersistent?.persistent) {
1274
+ try {
1275
+ const identities = store.getAgentIdentities(taskRecord.agent, 3)
1276
+ if (identities.length > 0) {
1277
+ const contextBlock = '\n\n[Previous work context]\n'
1278
+ + identities.map(id => id.summary).join('\n\n')
1279
+ + '\n[End previous context]\n\n'
1280
+ task.prompt = contextBlock + task.prompt
1281
+ }
1282
+ } catch { /* non-critical */ }
1283
+ }
1284
+ // ── Intelligence: inject discovered issues instruction (Phase 18.4) ────
1285
+ if (spec.defaults?.track_discovered_issues) {
1286
+ task.prompt = injectDiscoveredIssuesInstruction(task.prompt)
1287
+ }
1288
+
1289
+ // ── pre_task hooks ────────────────────────────────────────────────────────
1290
+ if (taskHooks.length > 0) {
1291
+ const preResult = await runHooks(taskHooks, 'pre_task', {
1292
+ taskId: taskRecord.id,
1293
+ convoyId,
1294
+ cwd: worktreePath ?? basePath,
1295
+ })
1296
+ if (!preResult.passed) {
1297
+ await removeWorktree()
1298
+ const hookLabel = preResult.failedHook?.name ?? preResult.failedHook?.type ?? 'unknown'
1299
+ store.withTransaction(() => {
1300
+ store.updateTaskStatus(taskRecord.id, convoyId, 'hook-failed', {
1301
+ finished_at: now(),
1302
+ output: `pre_task hook "${hookLabel}" failed: ${preResult.error ?? ''}`,
1303
+ exit_code: 1,
1304
+ })
1305
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: now() })
1306
+ })
1307
+ completedCount++
1308
+ process.stdout.write(` ${c.red('✗')} ${c.bold(`[${taskRecord.id}]`)} pre_task hook failed ${c.dim(`[${completedCount}/${totalTasks}]`)}\n`)
1309
+ events.emit('task_failed', { reason: 'hook-failed', hook: hookLabel, worker_id: workerId }, { convoy_id: convoyId, task_id: taskRecord.id, worker_id: workerId })
1310
+ cascadeFailure(taskRecord.id)
1311
+ taskAdapterMap.delete(taskRecord.id)
1312
+ return
1313
+ }
1314
+ }
1315
+
1316
+ // ── Symlink security scan (pre-execution) ────────────────────────────────
1317
+ const taskFiles = taskRecord.files ? JSON.parse(taskRecord.files) as string[] : []
1318
+ if (taskFiles.length > 0 && worktreePath) {
1319
+ try {
1320
+ scanSymlinks(taskFiles, worktreePath)
1321
+ } catch (err) {
1322
+ await removeWorktree()
1323
+ store.withTransaction(() => {
1324
+ store.updateTaskStatus(taskRecord.id, convoyId, 'failed', {
1325
+ finished_at: now(),
1326
+ output: `Symlink security check failed: ${(err as Error).message}`,
1327
+ exit_code: 1,
1328
+ })
1329
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: now() })
1330
+ })
1331
+ completedCount++
1332
+ events.emit('task_failed', { reason: 'symlink-escape', worker_id: workerId }, { convoy_id: convoyId, task_id: taskRecord.id, worker_id: workerId })
1333
+ cascadeFailure(taskRecord.id)
1334
+ taskAdapterMap.delete(taskRecord.id)
1335
+ return
1336
+ }
1337
+ }
218
1338
 
219
- const taskStartTime = Date.now()
220
- const timeout = makeTimeoutPromise(taskRecord.timeout_ms)
221
- let result: ExecuteResult
222
1339
  try {
223
- result = await Promise.race([
224
- taskAdapter.execute(task, { verbose, cwd: worktreePath ?? basePath }),
225
- timeout.promise,
226
- ])
1340
+ if (steps && steps.length > 0) {
1341
+ result = await Promise.race([
1342
+ executeSteps(taskRecord, steps, taskAdapter, worktreePath, basePath, store, convoyId, verbose),
1343
+ timeout.promise,
1344
+ ])
1345
+ } else {
1346
+ result = await Promise.race([
1347
+ taskAdapter.execute(task, { verbose, cwd: worktreePath ?? basePath }),
1348
+ timeout.promise,
1349
+ ])
1350
+ }
227
1351
  timeout.clear()
228
1352
  } catch (err) {
229
1353
  timeout.clear()
@@ -247,12 +1371,14 @@ async function runConvoy(
247
1371
 
248
1372
  const freshRecord = store.getTask(taskRecord.id, convoyId)!
249
1373
  if (freshRecord.retries < freshRecord.max_retries && spec.on_failure !== 'stop') {
1374
+ const contextPrefix = `Previous attempt timed out.\n\nFix the issues and try again.\n\n`
250
1375
  store.updateTaskStatus(taskRecord.id, convoyId, 'pending', {
251
1376
  retries: freshRecord.retries + 1,
252
1377
  worker_id: null,
253
1378
  worktree: null,
254
1379
  started_at: null,
255
1380
  finished_at: null,
1381
+ prompt: contextPrefix + taskRecord.prompt,
256
1382
  })
257
1383
  store.updateWorkerStatus(workerId, 'killed', { finished_at: finishedAt })
258
1384
  process.stdout.write(` ${c.yellow('⟳')} ${c.bold(`[${taskRecord.id}]`)} timed out, retry ${freshRecord.retries + 1}/${freshRecord.max_retries}\n`)
@@ -292,25 +1418,727 @@ async function runConvoy(
292
1418
  phase: taskRecord.phase,
293
1419
  convoy_id: convoyId,
294
1420
  }, { convoy_id: convoyId, task_id: taskRecord.id })
295
- cascadeFailure(taskRecord.id)
1421
+ handleExhaustion(freshRecord, 'timeout', result.output || null)
296
1422
  }
297
1423
  taskAdapterMap.delete(taskRecord.id)
298
1424
  return
299
1425
  }
300
1426
 
301
1427
  // ── Success ─────────────────────────────────────────────────────────────
302
- if (result.success) {
303
- if (worktreePath) {
304
- try {
305
- await mergeQueue.merge(worktreePath, `convoy-${workerId}`, baseBranch)
306
- } catch (err) {
307
- if (verbose) {
1428
+ if (result.success) { // ── Per-task gates ─────────────────────────────────────────────────────
1429
+ const taskGates = taskRecord.gates ? (JSON.parse(taskRecord.gates) as string[]) : []
1430
+ if (taskGates.length > 0) {
1431
+ let gateFailure: { command: string; exitCode: number; output: string } | null = null
1432
+ for (const command of taskGates) {
1433
+ try {
1434
+ // SECURITY: Gate/hook commands come from the .convoy.yml spec file, which is operator-controlled.
1435
+ // They are NOT user-supplied and are part of the trusted build configuration.
1436
+ await execFile('sh', ['-c', command], { cwd: worktreePath ?? basePath })
1437
+ } catch (err) {
1438
+ const execErr = err as Error & { code?: unknown; stderr?: string; stdout?: string }
1439
+ const code = typeof execErr.code === 'number' ? execErr.code : 1
1440
+ const output = execErr.stderr || execErr.stdout || execErr.message || ''
1441
+ gateFailure = { command, exitCode: code, output }
1442
+ break
1443
+ }
1444
+ }
1445
+
1446
+ if (gateFailure !== null) {
1447
+ await removeWorktree()
1448
+ const freshRecord = store.getTask(taskRecord.id, convoyId)!
1449
+ if (freshRecord.retries < freshRecord.max_retries && spec.on_failure !== 'stop') {
1450
+ const contextPrefix = `Previous attempt's gate check failed.\nGate: ${gateFailure.command}\nExit code: ${gateFailure.exitCode}\nOutput:\n${gateFailure.output || '(no output)'}\n\nFix the issues and try again.\n\n`
1451
+ store.updateTaskStatus(taskRecord.id, convoyId, 'pending', {
1452
+ retries: freshRecord.retries + 1,
1453
+ worker_id: null,
1454
+ worktree: null,
1455
+ started_at: null,
1456
+ finished_at: null,
1457
+ prompt: contextPrefix + taskRecord.prompt,
1458
+ })
1459
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt })
1460
+ process.stdout.write(` ${c.yellow('⟳')} ${c.bold(`[${taskRecord.id}]`)} gate failed, retry ${freshRecord.retries + 1}/${freshRecord.max_retries}\n`)
1461
+ } else {
1462
+ store.withTransaction(() => {
1463
+ store.updateTaskStatus(taskRecord.id, convoyId, 'gate-failed', {
1464
+ finished_at: finishedAt,
1465
+ output: `Gate failed: ${gateFailure!.command}\nExit code: ${gateFailure!.exitCode}\n${gateFailure!.output}`,
1466
+ exit_code: gateFailure!.exitCode,
1467
+ })
1468
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt })
1469
+ })
1470
+ completedCount++
1471
+ process.stdout.write(` ${c.red('✗')} ${c.bold(`[${taskRecord.id}]`)} gate failed ${elapsed} ${c.dim(`[${completedCount}/${totalTasks}]`)}\n`)
1472
+ events.emit(
1473
+ 'task_failed',
1474
+ { reason: 'gate-failed', gate: gateFailure.command, exit_code: gateFailure.exitCode, worker_id: workerId },
1475
+ { convoy_id: convoyId, task_id: taskRecord.id, worker_id: workerId },
1476
+ )
1477
+ events.emit('session', {
1478
+ agent: taskRecord.agent,
1479
+ model: taskRecord.model ?? taskAdapter.name,
1480
+ task: taskRecord.id,
1481
+ outcome: 'failed',
1482
+ duration_min: Math.round((Date.now() - taskStartTime) / 60_000),
1483
+ files_changed: 0,
1484
+ retries: freshRecord.retries,
1485
+ convoy_id: convoyId,
1486
+ }, { convoy_id: convoyId, task_id: taskRecord.id })
1487
+ events.emit('delegation', {
1488
+ session_id: convoyId,
1489
+ agent: taskRecord.agent,
1490
+ model: taskRecord.model ?? taskAdapter.name,
1491
+ tier: 'standard',
1492
+ mechanism: 'convoy',
1493
+ outcome: 'failed',
1494
+ retries: freshRecord.retries,
1495
+ phase: taskRecord.phase,
1496
+ convoy_id: convoyId,
1497
+ }, { convoy_id: convoyId, task_id: taskRecord.id })
1498
+ handleExhaustion(freshRecord, 'gate-failed', gateFailure!.output || null)
1499
+ }
1500
+ taskAdapterMap.delete(taskRecord.id)
1501
+ return
1502
+ }
1503
+ }
1504
+
1505
+ // ── Built-in gates ────────────────────────────────────────────────────
1506
+ const builtInGates = spec.defaults?.built_in_gates
1507
+ if (builtInGates && worktreePath) {
1508
+ if (builtInGates.browser_test) {
1509
+ const specTask = (spec.tasks ?? []).find(t => t.id === taskRecord.id)
1510
+ const taskBrowserConfig = specTask?.browser_test ?? spec.defaults?.browser_test
1511
+ if (!taskBrowserConfig) {
308
1512
  process.stderr.write(
309
- `Warning: merge failed for ${taskRecord.id}: ${(err as Error).message}\n`,
1513
+ `Warning: browser_test gate enabled but no browser_test config (urls) found — skipping\n`,
1514
+ )
1515
+ } else {
1516
+ const browserResult = await browserTestGate({
1517
+ mcpServers: spec.defaults?.mcp_servers ?? [],
1518
+ taskConfig: taskBrowserConfig,
1519
+ worktreePath,
1520
+ approvalTimeout: spec.defaults?.mcp_server_approval_timeout,
1521
+ })
1522
+ events.emit(
1523
+ 'built_in_gate_result',
1524
+ { gate: 'browser_test', passed: browserResult.passed, output: browserResult.output },
1525
+ { convoy_id: convoyId, task_id: taskRecord.id },
1526
+ )
1527
+ if (!browserResult.passed) {
1528
+ await removeWorktree()
1529
+ const freshRecord = store.getTask(taskRecord.id, convoyId)!
1530
+ if (freshRecord.retries < freshRecord.max_retries && spec.on_failure !== 'stop') {
1531
+ store.updateTaskStatus(taskRecord.id, convoyId, 'pending', {
1532
+ retries: freshRecord.retries + 1,
1533
+ worker_id: null,
1534
+ worktree: null,
1535
+ started_at: null,
1536
+ finished_at: null,
1537
+ })
1538
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt })
1539
+ process.stdout.write(
1540
+ ` ${c.yellow('⟳')} ${c.bold(`[${taskRecord.id}]`)} browser test gate failed, retry ${freshRecord.retries + 1}/${freshRecord.max_retries}\n`,
1541
+ )
1542
+ } else {
1543
+ store.withTransaction(() => {
1544
+ store.updateTaskStatus(taskRecord.id, convoyId, 'gate-failed', {
1545
+ finished_at: finishedAt,
1546
+ output: `Built-in gate (browser_test) failed:\n${browserResult.output}`,
1547
+ exit_code: 1,
1548
+ })
1549
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt })
1550
+ })
1551
+ completedCount++
1552
+ process.stdout.write(
1553
+ ` ${c.red('✗')} ${c.bold(`[${taskRecord.id}]`)} browser test gate failed ${elapsed} ${c.dim(`[${completedCount}/${totalTasks}]`)}\n`,
1554
+ )
1555
+ events.emit(
1556
+ 'task_failed',
1557
+ { reason: 'gate-failed', gate: 'browser_test', worker_id: workerId },
1558
+ { convoy_id: convoyId, task_id: taskRecord.id, worker_id: workerId },
1559
+ )
1560
+ handleExhaustion(freshRecord, 'browser-test', browserResult.output)
1561
+ }
1562
+ taskAdapterMap.delete(taskRecord.id)
1563
+ return
1564
+ }
1565
+ }
1566
+ }
1567
+
1568
+ let changedFiles: string[] = []
1569
+ let diff = ''
1570
+ try {
1571
+ const { stdout: filesOut } = await execFile(
1572
+ 'git', ['diff', '--name-only', `${baseBranch}..HEAD`],
1573
+ { cwd: worktreePath },
1574
+ )
1575
+ changedFiles = filesOut.split('\n').filter(Boolean)
1576
+ const { stdout: diffOut } = await execFile(
1577
+ 'git', ['diff', `${baseBranch}..HEAD`],
1578
+ { cwd: worktreePath },
1579
+ )
1580
+ diff = diffOut
1581
+ } catch { /* no commits in worktree yet — skip */ }
1582
+
1583
+ // Secret scan gate
1584
+ if (builtInGates.secret_scan && changedFiles.length > 0) {
1585
+ const scanResult = await runSecretScanGate(changedFiles, worktreePath)
1586
+ events.emit(
1587
+ 'built_in_gate_result',
1588
+ { gate: 'secret_scan', passed: scanResult.passed, output: scanResult.output },
1589
+ { convoy_id: convoyId, task_id: taskRecord.id },
1590
+ )
1591
+ if (!scanResult.passed) {
1592
+ await removeWorktree()
1593
+ const freshRecord = store.getTask(taskRecord.id, convoyId)!
1594
+ if (freshRecord.retries < freshRecord.max_retries && spec.on_failure !== 'stop') {
1595
+ store.updateTaskStatus(taskRecord.id, convoyId, 'pending', {
1596
+ retries: freshRecord.retries + 1,
1597
+ worker_id: null,
1598
+ worktree: null,
1599
+ started_at: null,
1600
+ finished_at: null,
1601
+ prompt: `Secret scan gate failed.\n${scanResult.output}\n\nFix the issues and try again.\n\n${taskRecord.prompt}`,
1602
+ })
1603
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt })
1604
+ process.stdout.write(
1605
+ ` ${c.yellow('⟳')} ${c.bold(`[${taskRecord.id}]`)} secret scan gate failed, retry ${freshRecord.retries + 1}/${freshRecord.max_retries}\n`,
1606
+ )
1607
+ } else {
1608
+ store.withTransaction(() => {
1609
+ store.updateTaskStatus(taskRecord.id, convoyId, 'gate-failed', {
1610
+ finished_at: finishedAt,
1611
+ output: `Built-in gate (secret_scan) failed:\n${scanResult.output}`,
1612
+ exit_code: 1,
1613
+ })
1614
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt })
1615
+ })
1616
+ completedCount++
1617
+ process.stdout.write(
1618
+ ` ${c.red('✗')} ${c.bold(`[${taskRecord.id}]`)} secret scan gate failed ${elapsed} ${c.dim(`[${completedCount}/${totalTasks}]`)}\n`,
1619
+ )
1620
+ events.emit(
1621
+ 'task_failed',
1622
+ { reason: 'gate-failed', gate: 'secret_scan', worker_id: workerId },
1623
+ { convoy_id: convoyId, task_id: taskRecord.id, worker_id: workerId },
1624
+ )
1625
+ handleExhaustion(freshRecord, 'secret-scan', scanResult.output)
1626
+ }
1627
+ taskAdapterMap.delete(taskRecord.id)
1628
+ return
1629
+ }
1630
+ }
1631
+
1632
+ // Blast radius gate
1633
+ if (builtInGates.blast_radius && diff) {
1634
+ const blastResult = runBlastRadiusGate(diff)
1635
+ events.emit(
1636
+ 'built_in_gate_result',
1637
+ { gate: 'blast_radius', level: blastResult.level, passed: blastResult.passed, output: blastResult.output },
1638
+ { convoy_id: convoyId, task_id: taskRecord.id },
1639
+ )
1640
+ if (!blastResult.passed) {
1641
+ await removeWorktree()
1642
+ const freshRecord = store.getTask(taskRecord.id, convoyId)!
1643
+ if (freshRecord.retries < freshRecord.max_retries && spec.on_failure !== 'stop') {
1644
+ store.updateTaskStatus(taskRecord.id, convoyId, 'pending', {
1645
+ retries: freshRecord.retries + 1,
1646
+ worker_id: null,
1647
+ worktree: null,
1648
+ started_at: null,
1649
+ finished_at: null,
1650
+ prompt: `Blast radius gate failed.\n${blastResult.output}\n\nFix the issues and try again.\n\n${taskRecord.prompt}`,
1651
+ })
1652
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt })
1653
+ process.stdout.write(
1654
+ ` ${c.yellow('⟳')} ${c.bold(`[${taskRecord.id}]`)} blast radius gate failed, retry ${freshRecord.retries + 1}/${freshRecord.max_retries}\n`,
1655
+ )
1656
+ } else {
1657
+ store.withTransaction(() => {
1658
+ store.updateTaskStatus(taskRecord.id, convoyId, 'gate-failed', {
1659
+ finished_at: finishedAt,
1660
+ output: `Built-in gate (blast_radius) failed:\n${blastResult.output}`,
1661
+ exit_code: 1,
1662
+ })
1663
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt })
1664
+ })
1665
+ completedCount++
1666
+ process.stdout.write(
1667
+ ` ${c.red('✗')} ${c.bold(`[${taskRecord.id}]`)} blast radius gate failed ${elapsed} ${c.dim(`[${completedCount}/${totalTasks}]`)}\n`,
1668
+ )
1669
+ events.emit(
1670
+ 'task_failed',
1671
+ { reason: 'gate-failed', gate: 'blast_radius', worker_id: workerId },
1672
+ { convoy_id: convoyId, task_id: taskRecord.id, worker_id: workerId },
1673
+ )
1674
+ handleExhaustion(freshRecord, 'gate-failed', blastResult.output)
1675
+ }
1676
+ taskAdapterMap.delete(taskRecord.id)
1677
+ return
1678
+ }
1679
+ }
1680
+ }
1681
+
1682
+ // ── Drift detection ──────────────────────────────────────────────────
1683
+ const specTaskForDrift = (spec.tasks ?? []).find(t => t.id === taskRecord.id)
1684
+ const isDriftEnabled = specTaskForDrift?.detect_drift ?? spec.defaults?.detect_drift ?? false
1685
+
1686
+ if (isDriftEnabled && taskRecord.drift_retried === 0) {
1687
+ const driftResult = await detectDrift(taskRecord, taskAdapter)
1688
+
1689
+ events.emit('drift_check_result', {
1690
+ task_id: taskRecord.id,
1691
+ score: driftResult.score,
1692
+ threshold: driftResult.threshold,
1693
+ explanation: driftResult.explanation,
1694
+ drifted: driftResult.drifted,
1695
+ }, { convoy_id: convoyId, task_id: taskRecord.id })
1696
+
1697
+ store.updateTaskDrift(taskRecord.id, convoyId, { drift_score: driftResult.score })
1698
+
1699
+ if (driftResult.drifted) {
1700
+ events.emit('drift_detected', {
1701
+ task_id: taskRecord.id,
1702
+ score: driftResult.score,
1703
+ threshold: driftResult.threshold,
1704
+ }, { convoy_id: convoyId, task_id: taskRecord.id })
1705
+
1706
+ await removeWorktree()
1707
+ store.updateTaskDrift(taskRecord.id, convoyId, { drift_retried: 1 })
1708
+ store.updateTaskStatus(taskRecord.id, convoyId, 'pending', {
1709
+ worker_id: null,
1710
+ worktree: null,
1711
+ started_at: null,
1712
+ finished_at: null,
1713
+ })
1714
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt })
1715
+ process.stdout.write(` ${c.yellow('⟳')} ${c.bold(`[${taskRecord.id}]`)} drift detected (score: ${driftResult.score.toFixed(2)}), retrying\n`)
1716
+ taskAdapterMap.delete(taskRecord.id)
1717
+ return
1718
+ }
1719
+ }
1720
+
1721
+ // ── Review pipeline ──────────────────────────────────────────────────
1722
+ const specTaskForReview = (spec.tasks ?? []).find(t => t.id === taskRecord.id)
1723
+ const taskReviewSetting: string = specTaskForReview?.review ?? spec.defaults?.review ?? 'auto'
1724
+
1725
+ if (taskReviewSetting !== 'none') {
1726
+ // Compute diff stats from worktree
1727
+ let reviewChangedFiles: string[] = []
1728
+ let reviewDiffLines = 0
1729
+
1730
+ if (worktreePath) {
1731
+ try {
1732
+ const { stdout: filesOut } = await execFile(
1733
+ 'git', ['diff', '--name-only', `${baseBranch}..HEAD`],
1734
+ { cwd: worktreePath },
1735
+ )
1736
+ reviewChangedFiles = filesOut.split('\n').filter(Boolean)
1737
+ const { stdout: diffOut } = await execFile(
1738
+ 'git', ['diff', `${baseBranch}..HEAD`],
1739
+ { cwd: worktreePath },
310
1740
  )
1741
+ reviewDiffLines = diffOut.split('\n').filter(l => l.startsWith('+') || l.startsWith('-')).filter(l => !l.startsWith('+++') && !l.startsWith('---')).length
1742
+ } catch { /* no commits yet */ }
1743
+ }
1744
+
1745
+ const diffStats: DiffStats = {
1746
+ linesChanged: reviewDiffLines,
1747
+ filesChanged: reviewChangedFiles.length,
1748
+ filePaths: reviewChangedFiles,
1749
+ }
1750
+
1751
+ // Determine review level
1752
+ let reviewLevel: ReviewLevel
1753
+ if (taskReviewSetting === 'fast') {
1754
+ reviewLevel = 'fast'
1755
+ } else if (taskReviewSetting === 'panel') {
1756
+ reviewLevel = 'panel'
1757
+ } else {
1758
+ reviewLevel = evaluateReviewLevel(taskRecord, diffStats, spec.defaults?.review_heuristics, true)
1759
+ }
1760
+
1761
+ const reviewerModel = spec.defaults?.reviewer_model ?? 'default'
1762
+ events.emit('review_started', { level: reviewLevel, task_id: taskRecord.id, model: reviewerModel }, { convoy_id: convoyId, task_id: taskRecord.id })
1763
+
1764
+ if (reviewLevel === 'auto-pass') {
1765
+ store.updateTaskReview(taskRecord.id, convoyId, {
1766
+ review_level: 'auto-pass',
1767
+ review_verdict: 'pass',
1768
+ review_tokens: 0,
1769
+ review_model: reviewerModel,
1770
+ })
1771
+ events.emit('review_verdict', { level: 'auto-pass', verdict: 'pass', tokens: 0, model: reviewerModel, feedback_length: 0 }, { convoy_id: convoyId, task_id: taskRecord.id })
1772
+ } else if (reviewLevel === 'fast') {
1773
+ // Check review budget
1774
+ const reviewBudget = spec.defaults?.review_budget
1775
+ const onBudgetExceeded = spec.defaults?.on_review_budget_exceeded ?? 'skip'
1776
+
1777
+ if (reviewBudget != null && reviewTokensTotal >= reviewBudget) {
1778
+ if (onBudgetExceeded === 'stop') {
1779
+ const allPending = store.getTasksByConvoy(convoyId).filter(t => t.status === 'pending')
1780
+ for (const t of allPending) skipTask(t.id, 'review_budget exceeded with on_review_budget_exceeded: stop')
1781
+ store.withTransaction(() => {
1782
+ store.updateTaskStatus(taskRecord.id, convoyId, 'review-blocked', { finished_at: finishedAt, output: 'Review budget exceeded', exit_code: 1 })
1783
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt })
1784
+ })
1785
+ completedCount++
1786
+ process.stdout.write(` ${c.red('✗')} ${c.bold(`[${taskRecord.id}]`)} review budget exceeded (stop) ${elapsed} ${c.dim(`[${completedCount}/${totalTasks}]`)}\n`)
1787
+ events.emit('review_verdict', { level: 'fast', verdict: 'skip', tokens: 0, model: reviewerModel, feedback_length: 0, budget_exceeded: true }, { convoy_id: convoyId, task_id: taskRecord.id })
1788
+ taskAdapterMap.delete(taskRecord.id)
1789
+ return
1790
+ } else if (onBudgetExceeded === 'downgrade') {
1791
+ store.updateTaskReview(taskRecord.id, convoyId, { review_level: 'fast', review_verdict: 'pass', review_tokens: 0, review_model: reviewerModel })
1792
+ events.emit('review_verdict', { level: 'fast', verdict: 'pass', tokens: 0, model: reviewerModel, feedback_length: 0, budget_downgrade: true }, { convoy_id: convoyId, task_id: taskRecord.id })
1793
+ } else {
1794
+ // 'skip': treat as passed
1795
+ events.emit('review_verdict', { level: 'fast', verdict: 'pass', tokens: 0, model: reviewerModel, feedback_length: 0, budget_skip: true }, { convoy_id: convoyId, task_id: taskRecord.id })
1796
+ }
1797
+ } else {
1798
+ await reviewSemaphore.acquire()
1799
+ let reviewResult: ReviewResult
1800
+ try {
1801
+ if (reviewRunner) {
1802
+ reviewResult = await reviewRunner(taskRecord, 'fast', reviewerModel)
1803
+ } else {
1804
+ reviewResult = { verdict: 'pass', feedback: '', tokens: 0, model: reviewerModel }
1805
+ }
1806
+ } finally {
1807
+ reviewSemaphore.release()
1808
+ }
1809
+
1810
+ reviewTokensTotal += reviewResult.tokens
1811
+ store.updateTaskReview(taskRecord.id, convoyId, {
1812
+ review_level: 'fast',
1813
+ review_verdict: reviewResult.verdict,
1814
+ review_tokens: reviewResult.tokens,
1815
+ review_model: reviewResult.model,
1816
+ })
1817
+ store.updateConvoyReviewTokens(convoyId, reviewTokensTotal)
1818
+ events.emit('review_verdict', { level: 'fast', verdict: reviewResult.verdict, tokens: reviewResult.tokens, model: reviewResult.model, feedback_length: reviewResult.feedback.length }, { convoy_id: convoyId, task_id: taskRecord.id })
1819
+
1820
+ if (reviewResult.verdict === 'block') {
1821
+ await removeWorktree()
1822
+ const freshRecord = store.getTask(taskRecord.id, convoyId)!
1823
+ if (freshRecord.retries < freshRecord.max_retries && spec.on_failure !== 'stop') {
1824
+ const contextPrefix = `Previous attempt was blocked by review.\nFeedback:\n${reviewResult.feedback}\n\nFix the issues and try again.\n\n`
1825
+ store.updateTaskStatus(taskRecord.id, convoyId, 'pending', {
1826
+ retries: freshRecord.retries + 1,
1827
+ worker_id: null,
1828
+ worktree: null,
1829
+ started_at: null,
1830
+ finished_at: null,
1831
+ prompt: contextPrefix + taskRecord.prompt,
1832
+ })
1833
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt })
1834
+ process.stdout.write(` ${c.yellow('⟳')} ${c.bold(`[${taskRecord.id}]`)} review blocked, retry ${freshRecord.retries + 1}/${freshRecord.max_retries}\n`)
1835
+ taskAdapterMap.delete(taskRecord.id)
1836
+ return
1837
+ } else {
1838
+ store.withTransaction(() => {
1839
+ store.updateTaskStatus(taskRecord.id, convoyId, 'review-blocked', {
1840
+ finished_at: finishedAt,
1841
+ output: `Review blocked: ${reviewResult.feedback}`,
1842
+ exit_code: 1,
1843
+ })
1844
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt })
1845
+ })
1846
+ completedCount++
1847
+ process.stdout.write(` ${c.red('✗')} ${c.bold(`[${taskRecord.id}]`)} review blocked ${elapsed} ${c.dim(`[${completedCount}/${totalTasks}]`)}\n`)
1848
+ events.emit('task_failed', { reason: 'review-blocked', worker_id: workerId }, { convoy_id: convoyId, task_id: taskRecord.id, worker_id: workerId })
1849
+ handleExhaustion(freshRecord, 'review-blocked', reviewResult.feedback || null)
1850
+ taskAdapterMap.delete(taskRecord.id)
1851
+ return
1852
+ }
1853
+ }
1854
+ }
1855
+ } else {
1856
+ // panel: 3 concurrent reviewer calls, majority vote
1857
+ await reviewSemaphore.acquire()
1858
+ let panelResults: ReviewResult[]
1859
+ try {
1860
+ const noopRunner = (_t: TaskRecord, _l: ReviewLevel, m: string) => Promise.resolve({ verdict: 'pass' as const, feedback: '', tokens: 0, model: m })
1861
+ const runner = reviewRunner ?? noopRunner
1862
+ panelResults = await Promise.all([
1863
+ runner(taskRecord, 'panel', reviewerModel),
1864
+ runner(taskRecord, 'panel', reviewerModel),
1865
+ runner(taskRecord, 'panel', reviewerModel),
1866
+ ])
1867
+ } finally {
1868
+ reviewSemaphore.release()
1869
+ }
1870
+
1871
+ const panelPasses = panelResults.filter(r => r.verdict === 'pass').length
1872
+ const panelBlocks = panelResults.filter(r => r.verdict === 'block').length
1873
+ const totalPanelTokens = panelResults.reduce((sum, r) => sum + r.tokens, 0)
1874
+ reviewTokensTotal += totalPanelTokens
1875
+
1876
+ const freshForPanel = store.getTask(taskRecord.id, convoyId)!
1877
+ store.updateTaskReview(taskRecord.id, convoyId, {
1878
+ review_level: 'panel',
1879
+ review_verdict: panelPasses >= 2 ? 'pass' : 'block',
1880
+ review_tokens: totalPanelTokens,
1881
+ review_model: reviewerModel,
1882
+ panel_attempts: freshForPanel.panel_attempts + 1,
1883
+ })
1884
+ if (totalPanelTokens > 0) store.updateConvoyReviewTokens(convoyId, reviewTokensTotal)
1885
+ events.emit('review_verdict', { level: 'panel', verdict: panelPasses >= 2 ? 'pass' : 'block', tokens: totalPanelTokens, model: reviewerModel, feedback_length: panelResults.map(r => r.feedback).join('').length, passes: panelPasses, blocks: panelBlocks }, { convoy_id: convoyId, task_id: taskRecord.id })
1886
+
1887
+ if (panelBlocks >= 2) {
1888
+ const blockFeedback = panelResults.filter(r => r.verdict === 'block').map(r => r.feedback).join('\n\n---\n\n')
1889
+ await removeWorktree()
1890
+
1891
+ // Check for dispute trigger
1892
+ const updatedTask = store.getTask(taskRecord.id, convoyId)!
1893
+ if (updatedTask.panel_attempts >= 3) {
1894
+ const disputeId = `dispute-${taskRecord.id}-${Date.now()}`
1895
+ const onDispute = spec.defaults?.on_dispute ?? 'stop'
1896
+
1897
+ store.updateTaskDisputeStatus(taskRecord.id, convoyId, 'disputed', disputeId)
1898
+ writeDisputeToMarkdown(disputeId, convoyId, taskRecord, panelResults, events)
1899
+
1900
+ events.emit('dispute_opened', {
1901
+ dispute_id: disputeId,
1902
+ task_id: taskRecord.id,
1903
+ agent: taskRecord.agent,
1904
+ panel_attempts: updatedTask.panel_attempts,
1905
+ }, { convoy_id: convoyId, task_id: taskRecord.id })
1906
+
1907
+ if (onDispute === 'stop') {
1908
+ const allPending = store.getTasksByConvoy(convoyId).filter(t => t.status === 'pending')
1909
+ for (const t of allPending) {
1910
+ skipTask(t.id, `on_dispute: stop — task "${taskRecord.id}" disputed`)
1911
+ }
1912
+ }
1913
+
1914
+ completedCount++
1915
+ process.stdout.write(` ${c.red('⚡')} ${c.bold(`[${taskRecord.id}]`)} disputed after ${updatedTask.panel_attempts} panel attempts\n`)
1916
+ taskAdapterMap.delete(taskRecord.id)
1917
+ return
1918
+ }
1919
+
1920
+ const freshRecord = store.getTask(taskRecord.id, convoyId)!
1921
+ if (freshRecord.retries < freshRecord.max_retries && spec.on_failure !== 'stop') {
1922
+ const contextPrefix = `Previous attempt was blocked by panel review (${panelBlocks}/3 reviewers).\nMUST-FIX:\n${blockFeedback}\n\nFix the issues and try again.\n\n`
1923
+ store.updateTaskStatus(taskRecord.id, convoyId, 'pending', {
1924
+ retries: freshRecord.retries + 1,
1925
+ worker_id: null,
1926
+ worktree: null,
1927
+ started_at: null,
1928
+ finished_at: null,
1929
+ prompt: contextPrefix + taskRecord.prompt,
1930
+ })
1931
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt })
1932
+ process.stdout.write(` ${c.yellow('⟳')} ${c.bold(`[${taskRecord.id}]`)} panel blocked (${panelBlocks}/3), retry ${freshRecord.retries + 1}/${freshRecord.max_retries}\n`)
1933
+ taskAdapterMap.delete(taskRecord.id)
1934
+ return
1935
+ } else {
1936
+ store.withTransaction(() => {
1937
+ store.updateTaskStatus(taskRecord.id, convoyId, 'review-blocked', {
1938
+ finished_at: finishedAt,
1939
+ output: `Panel review blocked (${panelBlocks}/3): ${blockFeedback}`,
1940
+ exit_code: 1,
1941
+ })
1942
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt })
1943
+ })
1944
+ completedCount++
1945
+ process.stdout.write(` ${c.red('✗')} ${c.bold(`[${taskRecord.id}]`)} panel blocked ${elapsed} ${c.dim(`[${completedCount}/${totalTasks}]`)}\n`)
1946
+ events.emit('task_failed', { reason: 'review-blocked', worker_id: workerId }, { convoy_id: convoyId, task_id: taskRecord.id, worker_id: workerId })
1947
+ handleExhaustion(freshRecord, 'review-blocked', blockFeedback || null)
1948
+ taskAdapterMap.delete(taskRecord.id)
1949
+ return
1950
+ }
1951
+ }
1952
+ }
1953
+ }
1954
+
1955
+ // ── Intelligence: check discovered issues (Phase 18.4) ─────────────
1956
+ if (spec.defaults?.track_discovered_issues) {
1957
+ try {
1958
+ checkDiscoveredIssues(taskRecord.id, events, convoyId, worktreePath ?? basePath)
1959
+ } catch { /* non-critical */ }
1960
+ }
1961
+
1962
+ // ── post_task hooks ───────────────────────────────────────────────────
1963
+ if (taskHooks.length > 0) {
1964
+ const postResult = await runHooks(taskHooks, 'post_task', {
1965
+ taskId: taskRecord.id,
1966
+ convoyId,
1967
+ cwd: worktreePath ?? basePath,
1968
+ })
1969
+ if (!postResult.passed) {
1970
+ await removeWorktree()
1971
+ const hookLabel = postResult.failedHook?.name ?? postResult.failedHook?.type ?? 'unknown'
1972
+ store.withTransaction(() => {
1973
+ store.updateTaskStatus(taskRecord.id, convoyId, 'hook-failed', {
1974
+ finished_at: finishedAt,
1975
+ output: `post_task hook "${hookLabel}" failed: ${postResult.error ?? ''}`,
1976
+ exit_code: 1,
1977
+ })
1978
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt })
1979
+ })
1980
+ completedCount++
1981
+ process.stdout.write(` ${c.red('✗')} ${c.bold(`[${taskRecord.id}]`)} post_task hook failed ${elapsed} ${c.dim(`[${completedCount}/${totalTasks}]`)}\n`)
1982
+ events.emit('task_failed', { reason: 'hook-failed', hook: hookLabel, worker_id: workerId }, { convoy_id: convoyId, task_id: taskRecord.id, worker_id: workerId })
1983
+ cascadeFailure(taskRecord.id)
1984
+ taskAdapterMap.delete(taskRecord.id)
1985
+ return
1986
+ }
1987
+ }
1988
+
1989
+ // ── Symlink security scan (post-execution) ───────────────────────────
1990
+ if (taskFiles.length > 0 && worktreePath) {
1991
+ try {
1992
+ scanNewSymlinks(worktreePath, taskFiles)
1993
+ } catch (err) {
1994
+ await removeWorktree()
1995
+ store.withTransaction(() => {
1996
+ store.updateTaskStatus(taskRecord.id, convoyId, 'failed', {
1997
+ finished_at: finishedAt,
1998
+ output: `Post-execution symlink security check failed: ${(err as Error).message}`,
1999
+ exit_code: 1,
2000
+ })
2001
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt })
2002
+ })
2003
+ completedCount++
2004
+ events.emit('task_failed', { reason: 'symlink-escape-post', worker_id: workerId }, { convoy_id: convoyId, task_id: taskRecord.id, worker_id: workerId })
2005
+ cascadeFailure(taskRecord.id)
2006
+ taskAdapterMap.delete(taskRecord.id)
2007
+ return
2008
+ }
2009
+ }
2010
+
2011
+ if (worktreePath) {
2012
+ let mergeAttempt = 0
2013
+ const maxMergeAttempts = 2
2014
+ let merged = false
2015
+
2016
+ while (mergeAttempt < maxMergeAttempts && !merged) {
2017
+ try {
2018
+ await mergeQueue.merge(worktreePath, `convoy-${workerId}`, baseBranch)
2019
+ merged = true
2020
+ } catch (err) {
2021
+ if (err instanceof MergeConflictError) {
2022
+ mergeAttempt++
2023
+ events.emit('merge_conflict_detected', {
2024
+ attempt: mergeAttempt,
2025
+ conflicting_files: err.conflictingFiles,
2026
+ }, { convoy_id: convoyId, task_id: taskRecord.id })
2027
+
2028
+ if (mergeAttempt >= maxMergeAttempts) {
2029
+ events.emit('merge_conflict_failed', {
2030
+ attempts: mergeAttempt,
2031
+ conflicting_files: err.conflictingFiles,
2032
+ }, { convoy_id: convoyId, task_id: taskRecord.id })
2033
+
2034
+ const freshRecord = store.getTask(taskRecord.id, convoyId)!
2035
+ store.withTransaction(() => {
2036
+ store.updateTaskStatus(taskRecord.id, convoyId, 'failed', {
2037
+ finished_at: now(),
2038
+ output: `Merge conflict could not be resolved after ${mergeAttempt} attempts. Files: ${err.conflictingFiles.join(', ')}`,
2039
+ exit_code: 1,
2040
+ })
2041
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: now() })
2042
+ })
2043
+ completedCount++
2044
+ process.stdout.write(` ${c.red('✗')} ${c.bold(`[${taskRecord.id}]`)} merge conflict unresolved ${elapsed} ${c.dim(`[${completedCount}/${totalTasks}]`)}\n`)
2045
+ events.emit('task_failed', { reason: 'merge-conflict', worker_id: workerId }, { convoy_id: convoyId, task_id: taskRecord.id, worker_id: workerId })
2046
+ cascadeFailure(taskRecord.id)
2047
+ handleExhaustion(freshRecord, 'merge-conflict', err.conflictingFiles.join(', '))
2048
+ break
2049
+ }
2050
+
2051
+ // Per spec: backoff on second attempt (unreachable with maxMergeAttempts=2 but follows spec)
2052
+ if (mergeAttempt === 2) {
2053
+ await new Promise<void>(resolve => setTimeout(resolve, 30_000))
2054
+ }
2055
+
2056
+ // Inject a resolution task
2057
+ const fileHash = createHash('sha256')
2058
+ .update(err.conflictingFiles.sort().join(','))
2059
+ .digest('hex')
2060
+ .slice(0, 12)
2061
+ const idempotencyKey = `merge-conflict:${taskRecord.phase}:${fileHash}`
2062
+ const resolutionTaskId = `merge-fix-${taskRecord.id}-${mergeAttempt}`
2063
+ const conflictPrompt = `Resolve merge conflicts in: ${err.conflictingFiles.join(', ')}. Ensure no conflict markers remain (<<<<<<<, =======, >>>>>>>), syntax is valid, no duplicate imports.`
2064
+
2065
+ const resolutionRecord: TaskRecord = {
2066
+ id: resolutionTaskId,
2067
+ convoy_id: convoyId,
2068
+ phase: taskRecord.phase,
2069
+ prompt: conflictPrompt,
2070
+ agent: taskRecord.agent,
2071
+ adapter: null,
2072
+ model: null,
2073
+ timeout_ms: 600_000,
2074
+ status: 'pending',
2075
+ worker_id: null,
2076
+ worktree: null,
2077
+ output: null,
2078
+ exit_code: null,
2079
+ started_at: null,
2080
+ finished_at: null,
2081
+ retries: 0,
2082
+ max_retries: 1,
2083
+ files: JSON.stringify(err.conflictingFiles),
2084
+ depends_on: null,
2085
+ prompt_tokens: null,
2086
+ completion_tokens: null,
2087
+ total_tokens: null,
2088
+ cost_usd: null,
2089
+ gates: null,
2090
+ on_exhausted: 'dlq',
2091
+ injected: 1,
2092
+ provenance: 'merge-conflict',
2093
+ idempotency_key: idempotencyKey,
2094
+ current_step: null,
2095
+ total_steps: null,
2096
+ review_level: null,
2097
+ review_verdict: null,
2098
+ review_tokens: null,
2099
+ review_model: null,
2100
+ panel_attempts: 0,
2101
+ dispute_id: null,
2102
+ drift_score: null,
2103
+ drift_retried: 0,
2104
+ outputs: null,
2105
+ inputs: null,
2106
+ discovered_issues: null,
2107
+ }
2108
+
2109
+ store.insertInjectedTask(resolutionRecord)
2110
+ const storedResolutionRecord = store.getTask(resolutionTaskId, convoyId)!
2111
+ await executeOneTask(storedResolutionRecord)
2112
+ // Next loop iteration will retry the merge
2113
+ } else {
2114
+ // Non-conflict merge error — log warning and continue to done path
2115
+ if (verbose) {
2116
+ process.stderr.write(
2117
+ `Warning: merge failed for ${taskRecord.id}: ${(err as Error).message}\n`,
2118
+ )
2119
+ }
2120
+ merged = true // Preserve original behavior: continue despite error
2121
+ break
2122
+ }
311
2123
  }
312
2124
  }
2125
+
313
2126
  await removeWorktree()
2127
+
2128
+ if (!merged) {
2129
+ taskAdapterMap.delete(taskRecord.id)
2130
+ return
2131
+ }
2132
+
2133
+ // ── Intelligence: update expertise post-merge (Phase 18.2) ─────────
2134
+ try {
2135
+ updateExpertise(taskRecord.agent, { taskId: taskRecord.id, success: true, retries: taskRecord.retries, files: taskRecord.files ? JSON.parse(taskRecord.files) as string[] : [] }, basePath)
2136
+ } catch { /* non-critical */ }
2137
+ // ── Intelligence: build knowledge graph post-merge (Phase 18.3) ────
2138
+ try {
2139
+ const { stdout: diffOut } = await execFile('git', ['diff', 'HEAD~1'], { cwd: basePath })
2140
+ buildKnowledgeGraph(diffOut, convoyId, basePath)
2141
+ } catch { /* non-critical */ }
314
2142
  }
315
2143
 
316
2144
  const usageExtra: Partial<{ prompt_tokens: number; completion_tokens: number; total_tokens: number }> = {}
@@ -320,6 +2148,77 @@ async function runConvoy(
320
2148
  if (result.usage.total_tokens != null) usageExtra.total_tokens = result.usage.total_tokens
321
2149
  }
322
2150
 
2151
+ // ── Capture outputs as artifacts ────────────────────────────────────────
2152
+ if (taskRecord.outputs) {
2153
+ const outputs: TaskOutput[] = JSON.parse(taskRecord.outputs)
2154
+ for (const output of outputs) {
2155
+ let content: string
2156
+ if (output.type === 'summary') {
2157
+ content = result.output.slice(-4096)
2158
+ } else if (output.type === 'json') {
2159
+ const jsonMatch = result.output.match(/```json\n([\s\S]*?)```/)
2160
+ content = jsonMatch ? jsonMatch[1].trim() : result.output
2161
+ } else {
2162
+ content = result.output
2163
+ }
2164
+ try {
2165
+ store.insertArtifact({
2166
+ id: `artifact-${taskRecord.id}-${output.name}-${Date.now()}`,
2167
+ convoy_id: convoyId,
2168
+ task_id: taskRecord.id,
2169
+ name: output.name,
2170
+ type: output.type,
2171
+ content,
2172
+ created_at: new Date().toISOString(),
2173
+ })
2174
+ } catch (err) {
2175
+ if (err instanceof ConvoyArtifactLimitError) {
2176
+ events.emit('artifact_limit_reached', {
2177
+ task_id: taskRecord.id,
2178
+ artifact_name: output.name,
2179
+ }, { convoy_id: convoyId, task_id: taskRecord.id })
2180
+ } else {
2181
+ throw err
2182
+ }
2183
+ }
2184
+ }
2185
+ }
2186
+
2187
+ // ── Intelligence: capture persistent agent identity (Phase 17.2) ─────
2188
+ const specTaskForCapture = (spec.tasks ?? []).find(t => t.id === taskRecord.id)
2189
+ if (specTaskForCapture?.persistent && result.output) {
2190
+ try {
2191
+ // Extract last 300 words, cap at 4KB
2192
+ const words = result.output.split(/\s+/)
2193
+ const lastWords = words.slice(-300).join(' ')
2194
+ let summary = lastWords.length > 4096 ? lastWords.slice(-4096) : lastWords
2195
+
2196
+ // Secret-scan the summary before storing
2197
+ const summaryScan = scanForSecrets(summary, `identity:${taskRecord.id}`)
2198
+ if (summaryScan.clean) {
2199
+ store.insertAgentIdentity({
2200
+ id: `identity-${taskRecord.id}-${Date.now()}`,
2201
+ agent: taskRecord.agent,
2202
+ convoy_id: convoyId,
2203
+ task_id: taskRecord.id,
2204
+ summary,
2205
+ created_at: new Date().toISOString(),
2206
+ retention_days: 90,
2207
+ })
2208
+ events.emit('agent_identity_captured', {
2209
+ agent: taskRecord.agent,
2210
+ summary_length: summary.length,
2211
+ }, { convoy_id: convoyId, task_id: taskRecord.id })
2212
+ } else {
2213
+ events.emit('agent_identity_rejected', {
2214
+ agent: taskRecord.agent,
2215
+ reason: 'secrets_detected',
2216
+ findings_count: summaryScan.findings.length,
2217
+ }, { convoy_id: convoyId, task_id: taskRecord.id })
2218
+ }
2219
+ } catch { /* non-critical */ }
2220
+ }
2221
+
323
2222
  store.withTransaction(() => {
324
2223
  store.updateTaskStatus(taskRecord.id, convoyId, 'done', {
325
2224
  finished_at: finishedAt,
@@ -329,6 +2228,24 @@ async function runConvoy(
329
2228
  })
330
2229
  store.updateWorkerStatus(workerId, 'done', { finished_at: finishedAt })
331
2230
  })
2231
+ // ── Circuit breaker: record success ────────────────────────────────────
2232
+ if (circuitBreakerConfig) {
2233
+ circuitBreaker.recordSuccess(taskRecord.agent)
2234
+ try { store.updateConvoyCircuitState(convoyId, circuitBreaker.serialize()) } catch { /* non-critical */ }
2235
+ }
2236
+ // ── Intelligence: capture retry lesson (Phase 18.1) ─────────────────
2237
+ if (taskRecord.retries > 0 && spec.defaults?.inject_lessons !== false) {
2238
+ try {
2239
+ captureLessons({
2240
+ title: `Retry success for ${taskRecord.agent} on ${taskRecord.id}`,
2241
+ category: 'convoy',
2242
+ agent: taskRecord.agent,
2243
+ problem: `Task ${taskRecord.id} required ${taskRecord.retries} retries`,
2244
+ solution: 'Succeeded after retry with adjusted approach',
2245
+ files: taskRecord.files ? JSON.parse(taskRecord.files) as string[] : undefined,
2246
+ }, basePath)
2247
+ } catch { /* non-critical */ }
2248
+ }
332
2249
  completedCount++
333
2250
  process.stdout.write(` ${c.green('✓')} ${c.bold(`[${taskRecord.id}]`)} ${elapsed} ${c.dim(`[${completedCount}/${totalTasks}]`)}\n`)
334
2251
  events.emit(
@@ -367,12 +2284,15 @@ async function runConvoy(
367
2284
 
368
2285
  const freshRecord = store.getTask(taskRecord.id, convoyId)!
369
2286
  if (freshRecord.retries < freshRecord.max_retries && spec.on_failure !== 'stop') {
2287
+ const failedOutput = result.output || '(no output)'
2288
+ const contextPrefix = `Previous attempt failed.\nExit code: ${result.exitCode}\nError output:\n${failedOutput}\n\nFix the issues and try again.\n\n`
370
2289
  store.updateTaskStatus(taskRecord.id, convoyId, 'pending', {
371
2290
  retries: freshRecord.retries + 1,
372
2291
  worker_id: null,
373
2292
  worktree: null,
374
2293
  started_at: null,
375
2294
  finished_at: null,
2295
+ prompt: contextPrefix + taskRecord.prompt,
376
2296
  })
377
2297
  store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt })
378
2298
  process.stdout.write(` ${c.yellow('⟳')} ${c.bold(`[${taskRecord.id}]`)} retry ${freshRecord.retries + 1}/${freshRecord.max_retries}\n`)
@@ -385,6 +2305,21 @@ async function runConvoy(
385
2305
  })
386
2306
  store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt })
387
2307
  })
2308
+ // ── Intelligence: record failure in expertise (Phase 18.2) ──────────
2309
+ try {
2310
+ updateExpertise(taskRecord.agent, { taskId: taskRecord.id, success: false, retries: freshRecord.retries, files: taskRecord.files ? JSON.parse(taskRecord.files) as string[] : [] }, basePath)
2311
+ } catch { /* non-critical */ }
2312
+ // ── Circuit breaker: record failure ────────────────────────────────────
2313
+ if (circuitBreakerConfig) {
2314
+ const { tripped } = circuitBreaker.recordFailure(taskRecord.agent)
2315
+ try { store.updateConvoyCircuitState(convoyId, circuitBreaker.serialize()) } catch { /* non-critical */ }
2316
+ if (tripped) {
2317
+ events.emit('circuit_breaker_tripped', {
2318
+ agent: taskRecord.agent,
2319
+ state: circuitBreaker.getState(taskRecord.agent),
2320
+ }, { convoy_id: convoyId, task_id: taskRecord.id })
2321
+ }
2322
+ }
388
2323
  completedCount++
389
2324
  process.stdout.write(` ${c.red('✗')} ${c.bold(`[${taskRecord.id}]`)} failed ${elapsed} ${c.dim(`[${completedCount}/${totalTasks}]`)}\n`)
390
2325
  if (verbose) {
@@ -417,7 +2352,7 @@ async function runConvoy(
417
2352
  phase: taskRecord.phase,
418
2353
  convoy_id: convoyId,
419
2354
  }, { convoy_id: convoyId, task_id: taskRecord.id })
420
- cascadeFailure(taskRecord.id)
2355
+ handleExhaustion(freshRecord, 'error', result.output || null)
421
2356
  }
422
2357
  taskAdapterMap.delete(taskRecord.id)
423
2358
  }
@@ -425,20 +2360,47 @@ async function runConvoy(
425
2360
  // ── Main execution loop ───────────────────────────────────────────────────
426
2361
 
427
2362
  let lastPhase = -1
2363
+ const isSwarmMode = spec.concurrency === 'auto'
2364
+ const maxSwarmConcurrency = spec.defaults?.max_swarm_concurrency ?? 8
2365
+ let lastInjectPoll = 0
2366
+ const INJECT_POLL_INTERVAL = 2000 // 2 seconds
428
2367
  try {
429
2368
  let ready = store.getReadyTasks(convoyId)
430
- const concurrency = spec.concurrency ?? 1
431
2369
  while (ready.length > 0) {
2370
+ // Compute effective concurrency for this phase
2371
+ const effectiveConcurrency = isSwarmMode
2372
+ ? Math.min(ready.length, maxSwarmConcurrency)
2373
+ : (typeof spec.concurrency === 'number' ? spec.concurrency : 1)
2374
+
432
2375
  for (const t of ready) {
433
2376
  if (t.phase !== lastPhase) {
434
2377
  lastPhase = t.phase
435
2378
  const tasksInPhase = ready.filter(r => r.phase === t.phase)
436
2379
  const ids = tasksInPhase.map(r => r.id).join(', ')
437
2380
  process.stdout.write(`\n ${c.bold(`Phase ${t.phase + 1}:`)} ${c.dim(ids)}\n`)
2381
+ if (isSwarmMode) {
2382
+ events.emit('swarm_concurrency_update', {
2383
+ phase: t.phase,
2384
+ pending_count: ready.length,
2385
+ effective_concurrency: effectiveConcurrency,
2386
+ }, { convoy_id: convoyId })
2387
+ }
2388
+ }
2389
+ }
2390
+ for (let i = 0; i < ready.length; i += effectiveConcurrency) {
2391
+ // Poll for file-based injection between batches
2392
+ const now = Date.now()
2393
+ if (now - lastInjectPoll >= INJECT_POLL_INTERVAL) {
2394
+ pollInjectFile(convoyId, store, events, basePath)
2395
+ lastInjectPoll = now
438
2396
  }
2397
+ await Promise.all(ready.slice(i, i + effectiveConcurrency).map(t => executeOneTask(t)))
439
2398
  }
440
- for (let i = 0; i < ready.length; i += concurrency) {
441
- await Promise.all(ready.slice(i, i + concurrency).map(t => executeOneTask(t)))
2399
+ // Reset wait-for-input tasks to pending so they are re-evaluated after
2400
+ // upstream artifacts may have been captured in this batch
2401
+ const waitingTasks = store.getTasksByConvoy(convoyId).filter(t => t.status === ('wait-for-input' as ConvoyTaskStatus))
2402
+ for (const wt of waitingTasks) {
2403
+ store.updateTaskStatus(wt.id, convoyId, 'pending')
442
2404
  }
443
2405
  ready = store.getReadyTasks(convoyId)
444
2406
  }
@@ -460,6 +2422,8 @@ async function runConvoy(
460
2422
 
461
2423
  for (const command of spec.gates) {
462
2424
  try {
2425
+ // SECURITY: Gate/hook commands come from the .convoy.yml spec file, which is operator-controlled.
2426
+ // They are NOT user-supplied and are part of the trusted build configuration.
463
2427
  await execFile('sh', ['-c', command], { cwd: basePath })
464
2428
  gateResults.push({ command, exitCode: 0, passed: true })
465
2429
  process.stdout.write(` ${c.green('✓')} ${c.dim(command)}\n`)
@@ -510,13 +2474,39 @@ async function runConvoy(
510
2474
  }
511
2475
  }
512
2476
 
2477
+ // ── post_convoy hooks ─────────────────────────────────────────────────────
2478
+
2479
+ const specLevelHooks: Hook[] = spec.hooks ?? []
2480
+ if (specLevelHooks.length > 0) {
2481
+ const postConvoyResult = await runHooks(specLevelHooks, 'post_convoy', {
2482
+ convoyId,
2483
+ cwd: basePath,
2484
+ })
2485
+ if (!postConvoyResult.passed) {
2486
+ const hookLabel = postConvoyResult.failedHook?.name ?? postConvoyResult.failedHook?.type ?? 'unknown'
2487
+ events.emit('post_convoy_hook_failed', {
2488
+ hook: hookLabel,
2489
+ error: postConvoyResult.error,
2490
+ }, { convoy_id: convoyId })
2491
+ process.stdout.write(` ${c.red('✗')} post_convoy hook "${hookLabel}" failed\n`)
2492
+ }
2493
+ }
2494
+
2495
+ // ── Intelligence: post-convoy consolidation ──────────────────────────────
2496
+ if (spec.defaults?.inject_lessons !== false) {
2497
+ try { consolidateLessons(basePath) } catch { /* non-critical */ }
2498
+ }
2499
+ if (spec.defaults?.track_discovered_issues) {
2500
+ try { consolidateIssues(basePath) } catch { /* non-critical */ }
2501
+ }
2502
+
513
2503
  // ── Final status & summary ────────────────────────────────────────────────
514
2504
 
515
2505
  const allTasksFinal = store.getTasksByConvoy(convoyId)
516
2506
  const summary = {
517
2507
  total: allTasksFinal.length,
518
2508
  done: allTasksFinal.filter(t => t.status === 'done').length,
519
- failed: allTasksFinal.filter(t => t.status === 'failed').length,
2509
+ failed: allTasksFinal.filter(t => t.status === 'failed' || t.status === 'gate-failed' || t.status === 'review-blocked' || t.status === 'disputed').length,
520
2510
  skipped: allTasksFinal.filter(t => t.status === 'skipped').length,
521
2511
  timedOut: allTasksFinal.filter(t => t.status === 'timed-out').length,
522
2512
  }
@@ -541,6 +2531,25 @@ async function runConvoy(
541
2531
  total_tokens: convoyTotalTokens,
542
2532
  })
543
2533
 
2534
+ if (finalStatus === 'done') {
2535
+ events.emit('convoy_finished', { status: 'done' }, { convoy_id: convoyId })
2536
+ } else {
2537
+ events.emit('convoy_failed', { status: finalStatus, reason: finalStatus === 'gate-failed' ? 'Gate check failed' : 'One or more tasks failed' }, { convoy_id: convoyId })
2538
+ }
2539
+
2540
+ // Run convoy guard checks
2541
+ const guardResult = runConvoyGuard(store, convoyId, wtManager, ndjsonPath, spec.guard)
2542
+ if (guardResult.warnings.length > 0) {
2543
+ process.stdout.write(`\n ${c.yellow('Guard warnings:')}\n`)
2544
+ for (const w of guardResult.warnings) {
2545
+ process.stdout.write(` ${c.dim('⚠')} ${w}\n`)
2546
+ }
2547
+ events.emit('convoy_guard', {
2548
+ passed: guardResult.passed,
2549
+ warnings: guardResult.warnings,
2550
+ }, { convoy_id: convoyId })
2551
+ }
2552
+
544
2553
  return {
545
2554
  convoyId,
546
2555
  status: finalStatus,
@@ -575,9 +2584,49 @@ export function createConvoyEngine(options: ConvoyEngineOptions): ConvoyEngine {
575
2584
  const specHash = createHash('sha256').update(specYaml).digest('hex')
576
2585
  const baseBranch = spec.branch ?? (await getCurrentBranch())
577
2586
 
2587
+ // Ensure target branch exists before acquiring any locks.
2588
+ // Uses _ensureBranch injection so callers/tests can override.
2589
+ if (spec.branch !== undefined) {
2590
+ const branchFn = options._ensureBranch ?? ensureBranch
2591
+ await branchFn(spec.branch, basePath)
2592
+ }
2593
+
578
2594
  mkdirSync(dirname(dbPath), { recursive: true })
2595
+
2596
+ const lockDb = new DatabaseSync(dbPath)
2597
+ lockDb.exec('PRAGMA journal_mode = WAL')
2598
+ lockDb.exec(`CREATE TABLE IF NOT EXISTS engine_lock (
2599
+ id INTEGER PRIMARY KEY CHECK (id = 1),
2600
+ pid INTEGER NOT NULL,
2601
+ hostname TEXT NOT NULL,
2602
+ started_at TEXT NOT NULL,
2603
+ last_heartbeat TEXT NOT NULL
2604
+ )`)
2605
+
2606
+ const lock = (() => {
2607
+ try {
2608
+ return acquireEngineLock(lockDb, dbPath)
2609
+ } catch (err) {
2610
+ lockDb.close()
2611
+ throw err
2612
+ }
2613
+ })()
2614
+
2615
+ const versionRow = lockDb.prepare('SELECT sqlite_version() as v').get() as { v: string }
2616
+ const [major, minor] = versionRow.v.split('.').map(Number)
2617
+ if (major < 3 || (major === 3 && minor < 35)) {
2618
+ lock.release()
2619
+ lockDb.close()
2620
+ throw new Error(`SQLite version ${versionRow.v} is too old. Requires >= 3.35.0`)
2621
+ }
2622
+
2623
+ lock.startHeartbeat()
2624
+
579
2625
  const store = createConvoyStore(dbPath)
580
- const events = createEventEmitter(store, options.logsDir)
2626
+ const ndjsonPath = options.logsDir
2627
+ ? join(options.logsDir, 'convoys', `${convoyId}.ndjson`)
2628
+ : ndjsonPathForConvoy(convoyId, basePath)
2629
+ const events = createEventEmitter(store, { ndjsonPath })
581
2630
  const wtManager = options._worktreeManager ?? createWorktreeManager(basePath)
582
2631
  const mergeQueue = options._mergeQueue ?? createMergeQueue(basePath)
583
2632
 
@@ -596,6 +2645,24 @@ export function createConvoyEngine(options: ConvoyEngineOptions): ConvoyEngine {
596
2645
 
597
2646
  const tasks = spec.tasks ?? []
598
2647
  const phases = buildPhases(tasks)
2648
+
2649
+ // Validate file partitions before inserting tasks
2650
+ const partitionResult = validateFilePartitions(tasks, phases)
2651
+ if (!partitionResult.valid) {
2652
+ const conflictSummary = partitionResult.conflicts
2653
+ .map(
2654
+ (cf) =>
2655
+ `Phase ${cf.phase}: tasks "${cf.taskA}" and "${cf.taskB}" overlap on [${cf.overlapping.join(', ')}]`,
2656
+ )
2657
+ .join('\n')
2658
+ events.emit(
2659
+ 'file_partition_conflict',
2660
+ { conflicts: partitionResult.conflicts },
2661
+ { convoy_id: convoyId },
2662
+ )
2663
+ throw new Error(`File partition conflicts detected:\n${conflictSummary}`)
2664
+ }
2665
+
599
2666
  for (let phaseIdx = 0; phaseIdx < phases.length; phaseIdx++) {
600
2667
  for (const task of phases[phaseIdx]) {
601
2668
  store.insertTask({
@@ -612,6 +2679,9 @@ export function createConvoyEngine(options: ConvoyEngineOptions): ConvoyEngine {
612
2679
  max_retries: task.max_retries,
613
2680
  files: task.files.length > 0 ? JSON.stringify(task.files) : null,
614
2681
  depends_on: task.depends_on.length > 0 ? JSON.stringify(task.depends_on) : null,
2682
+ gates: task.gates && task.gates.length > 0 ? JSON.stringify(task.gates) : null,
2683
+ outputs: task.outputs && task.outputs.length > 0 ? JSON.stringify(task.outputs) : null,
2684
+ inputs: task.inputs && task.inputs.length > 0 ? JSON.stringify(task.inputs) : null,
615
2685
  })
616
2686
  }
617
2687
  }
@@ -621,11 +2691,15 @@ export function createConvoyEngine(options: ConvoyEngineOptions): ConvoyEngine {
621
2691
 
622
2692
  result = await runConvoy(
623
2693
  convoyId, spec, adapter, store, events,
624
- wtManager, mergeQueue, basePath, baseBranch, verbose, startTime,
2694
+ wtManager, mergeQueue, basePath, baseBranch, verbose, startTime, ndjsonPath,
2695
+ options._reviewRunner,
625
2696
  )
626
2697
  } finally {
627
2698
  try { await exportConvoyToNdjson(store, convoyId, options.logsDir) } catch { /* silent */ }
2699
+ events.close()
628
2700
  store.close()
2701
+ lock.release()
2702
+ lockDb.close()
629
2703
  }
630
2704
  return result
631
2705
  }
@@ -634,8 +2708,41 @@ export function createConvoyEngine(options: ConvoyEngineOptions): ConvoyEngine {
634
2708
  const startTime = Date.now()
635
2709
 
636
2710
  mkdirSync(dirname(dbPath), { recursive: true })
2711
+
2712
+ const lockDb = new DatabaseSync(dbPath)
2713
+ lockDb.exec('PRAGMA journal_mode = WAL')
2714
+ lockDb.exec(`CREATE TABLE IF NOT EXISTS engine_lock (
2715
+ id INTEGER PRIMARY KEY CHECK (id = 1),
2716
+ pid INTEGER NOT NULL,
2717
+ hostname TEXT NOT NULL,
2718
+ started_at TEXT NOT NULL,
2719
+ last_heartbeat TEXT NOT NULL
2720
+ )`)
2721
+
2722
+ const lock = (() => {
2723
+ try {
2724
+ return acquireEngineLock(lockDb, dbPath)
2725
+ } catch (err) {
2726
+ lockDb.close()
2727
+ throw err
2728
+ }
2729
+ })()
2730
+
2731
+ const versionRow = lockDb.prepare('SELECT sqlite_version() as v').get() as { v: string }
2732
+ const [major, minor] = versionRow.v.split('.').map(Number)
2733
+ if (major < 3 || (major === 3 && minor < 35)) {
2734
+ lock.release()
2735
+ lockDb.close()
2736
+ throw new Error(`SQLite version ${versionRow.v} is too old. Requires >= 3.35.0`)
2737
+ }
2738
+
2739
+ lock.startHeartbeat()
2740
+
637
2741
  const store = createConvoyStore(dbPath)
638
- const events = createEventEmitter(store, options.logsDir)
2742
+ const ndjsonPath = options.logsDir
2743
+ ? join(options.logsDir, 'convoys', `${convoyId}.ndjson`)
2744
+ : ndjsonPathForConvoy(convoyId, basePath)
2745
+ const events = createEventEmitter(store, { ndjsonPath })
639
2746
  const wtManager = options._worktreeManager ?? createWorktreeManager(basePath)
640
2747
  const mergeQueue = options._mergeQueue ?? createMergeQueue(basePath)
641
2748
 
@@ -673,6 +2780,9 @@ export function createConvoyEngine(options: ConvoyEngineOptions): ConvoyEngine {
673
2780
  // Remove all orphaned worktrees from the crashed run
674
2781
  await wtManager.removeAll()
675
2782
 
2783
+ // NDJSON recovery: truncate partial lines, replay missing events
2784
+ recoverNdjson(store, convoyId, ndjsonPath)
2785
+
676
2786
  events.emit(
677
2787
  'convoy_resumed',
678
2788
  { original_created_at: convoy.created_at },
@@ -681,14 +2791,214 @@ export function createConvoyEngine(options: ConvoyEngineOptions): ConvoyEngine {
681
2791
 
682
2792
  result = await runConvoy(
683
2793
  convoyId, spec, adapter, store, events,
684
- wtManager, mergeQueue, basePath, baseBranch, verbose, startTime,
2794
+ wtManager, mergeQueue, basePath, baseBranch, verbose, startTime, ndjsonPath,
2795
+ options._reviewRunner,
685
2796
  )
686
2797
  } finally {
687
2798
  try { await exportConvoyToNdjson(store, convoyId, options.logsDir) } catch { /* silent */ }
2799
+ events.close()
688
2800
  store.close()
2801
+ lock.release()
2802
+ lockDb.close()
689
2803
  }
690
2804
  return result
691
2805
  }
692
2806
 
693
- return { run, resume }
2807
+ async function retryFailed(convoyId: string, taskIds?: string[]): Promise<void> {
2808
+ mkdirSync(dirname(dbPath), { recursive: true })
2809
+ const store = createConvoyStore(dbPath)
2810
+ const ndjsonPath = options.logsDir
2811
+ ? join(options.logsDir, 'convoys', `${convoyId}.ndjson`)
2812
+ : ndjsonPathForConvoy(convoyId, basePath)
2813
+ const events = createEventEmitter(store, { ndjsonPath })
2814
+ try {
2815
+ const allTasks = store.getTasksByConvoy(convoyId)
2816
+ const retryableStatuses = ['failed', 'gate-failed', 'timed-out', 'review-blocked', 'disputed']
2817
+
2818
+ const tasksToRetry = allTasks.filter(t => {
2819
+ if (!retryableStatuses.includes(t.status)) return false
2820
+ if (taskIds && taskIds.length > 0) return taskIds.includes(t.id)
2821
+ return true
2822
+ })
2823
+
2824
+ for (const task of tasksToRetry) {
2825
+ store.updateTaskStatus(task.id, convoyId, 'pending', {
2826
+ worker_id: null,
2827
+ worktree: null,
2828
+ started_at: null,
2829
+ finished_at: null,
2830
+ })
2831
+ events.emit('task_retried', { previous_status: task.status }, { convoy_id: convoyId, task_id: task.id })
2832
+ }
2833
+
2834
+ // Reset convoy status to running so resume can pick it up
2835
+ store.updateConvoyStatus(convoyId, 'running', {})
2836
+ } finally {
2837
+ events.close()
2838
+ store.close()
2839
+ }
2840
+ }
2841
+
2842
+ function injectTask(convoyId: string, task: {
2843
+ id: string
2844
+ prompt: string
2845
+ agent: string
2846
+ phase: number
2847
+ timeout_ms?: number
2848
+ depends_on?: string[]
2849
+ files?: string[]
2850
+ max_retries?: number
2851
+ provenance?: string
2852
+ idempotency_key?: string
2853
+ on_exhausted?: 'dlq' | 'skip' | 'stop'
2854
+ }): TaskRecord {
2855
+ mkdirSync(dirname(dbPath), { recursive: true })
2856
+ const store = createConvoyStore(dbPath)
2857
+ try {
2858
+ // Idempotency check
2859
+ if (task.idempotency_key) {
2860
+ const existing = store.getTaskByIdempotencyKey(convoyId, task.idempotency_key)
2861
+ if (existing) return existing
2862
+ }
2863
+
2864
+ const allTasks = store.getTasksByConvoy(convoyId)
2865
+
2866
+ // Check max injectable tasks (10)
2867
+ const injectedCount = allTasks.filter(t => t.injected === 1).length
2868
+ if (injectedCount >= 10) {
2869
+ throw new Error(`Max injectable tasks (10) reached for convoy ${convoyId}`)
2870
+ }
2871
+
2872
+ // Validate ID uniqueness
2873
+ if (allTasks.some(t => t.id === task.id)) {
2874
+ throw new Error(`Task ID "${task.id}" already exists in convoy ${convoyId}`)
2875
+ }
2876
+
2877
+ // Validate depends_on references exist
2878
+ const deps = task.depends_on ?? []
2879
+ for (const dep of deps) {
2880
+ if (!allTasks.some(t => t.id === dep)) {
2881
+ throw new Error(`Dependency "${dep}" not found in convoy ${convoyId}`)
2882
+ }
2883
+ }
2884
+
2885
+ // Validate no file partition overlap with pending/running tasks
2886
+ const taskFiles = task.files ?? []
2887
+ if (taskFiles.length > 0) {
2888
+ // Normalize injected task file paths
2889
+ const normalizedTaskFiles = taskFiles.map(normalizePath)
2890
+
2891
+ // Symlink pre-scan on injected files
2892
+ const basePath = options.basePath ?? process.cwd()
2893
+ try {
2894
+ scanSymlinks(normalizedTaskFiles, basePath)
2895
+ } catch (err) {
2896
+ throw new Error(`Injected task "${task.id}" failed symlink check: ${(err as Error).message}`)
2897
+ }
2898
+
2899
+ // Full partition validation against active tasks
2900
+ const activeTasks = allTasks.filter(t => t.status === 'pending' || t.status === 'running' || t.status === 'assigned')
2901
+ for (const other of activeTasks) {
2902
+ const otherFiles = other.files ? (JSON.parse(other.files) as string[]) : []
2903
+ if (otherFiles.length === 0) continue
2904
+ const normalizedOther = otherFiles.map(normalizePath)
2905
+ const overlapping: string[] = []
2906
+ for (const fileA of normalizedTaskFiles) {
2907
+ for (const fileB of normalizedOther) {
2908
+ if (pathsOverlap(fileA, fileB) && !overlapping.includes(fileA)) {
2909
+ overlapping.push(fileA)
2910
+ }
2911
+ }
2912
+ }
2913
+ if (overlapping.length > 0) {
2914
+ throw new Error(`File partition overlap with task "${other.id}": ${overlapping.join(', ')}`)
2915
+ }
2916
+ }
2917
+ }
2918
+
2919
+ // Detect dependency cycles
2920
+ const depGraph = new Map<string, string[]>()
2921
+ for (const t of allTasks) {
2922
+ depGraph.set(t.id, t.depends_on ? (JSON.parse(t.depends_on) as string[]) : [])
2923
+ }
2924
+ depGraph.set(task.id, deps)
2925
+
2926
+ function hasCycle(nodeId: string, visited: Set<string>, stack: Set<string>): boolean {
2927
+ visited.add(nodeId)
2928
+ stack.add(nodeId)
2929
+ for (const dep of depGraph.get(nodeId) ?? []) {
2930
+ if (!visited.has(dep)) {
2931
+ if (hasCycle(dep, visited, stack)) return true
2932
+ } else if (stack.has(dep)) {
2933
+ return true
2934
+ }
2935
+ }
2936
+ stack.delete(nodeId)
2937
+ return false
2938
+ }
2939
+
2940
+ const visited = new Set<string>()
2941
+ const stack = new Set<string>()
2942
+ for (const nodeId of depGraph.keys()) {
2943
+ if (!visited.has(nodeId)) {
2944
+ if (hasCycle(nodeId, visited, stack)) {
2945
+ throw new Error(`Dependency cycle detected when injecting task "${task.id}"`)
2946
+ }
2947
+ }
2948
+ }
2949
+
2950
+ // Insert the task
2951
+ const record: TaskRecord = {
2952
+ id: task.id,
2953
+ convoy_id: convoyId,
2954
+ phase: task.phase,
2955
+ prompt: task.prompt,
2956
+ agent: task.agent,
2957
+ adapter: null,
2958
+ model: null,
2959
+ timeout_ms: task.timeout_ms ?? 1_800_000,
2960
+ status: 'pending',
2961
+ worker_id: null,
2962
+ worktree: null,
2963
+ output: null,
2964
+ exit_code: null,
2965
+ started_at: null,
2966
+ finished_at: null,
2967
+ retries: 0,
2968
+ max_retries: task.max_retries ?? 1,
2969
+ files: taskFiles.length > 0 ? JSON.stringify(taskFiles) : null,
2970
+ depends_on: deps.length > 0 ? JSON.stringify(deps) : null,
2971
+ prompt_tokens: null,
2972
+ completion_tokens: null,
2973
+ total_tokens: null,
2974
+ cost_usd: null,
2975
+ gates: null,
2976
+ on_exhausted: task.on_exhausted ?? 'dlq',
2977
+ injected: 1,
2978
+ provenance: task.provenance ?? null,
2979
+ idempotency_key: task.idempotency_key ?? null,
2980
+ current_step: null,
2981
+ total_steps: null,
2982
+ review_level: null,
2983
+ review_verdict: null,
2984
+ review_tokens: null,
2985
+ review_model: null,
2986
+ panel_attempts: 0,
2987
+ dispute_id: null,
2988
+ drift_score: null,
2989
+ drift_retried: 0,
2990
+ outputs: null,
2991
+ inputs: null,
2992
+ discovered_issues: null,
2993
+ }
2994
+
2995
+ store.insertInjectedTask(record)
2996
+
2997
+ return record
2998
+ } finally {
2999
+ store.close()
3000
+ }
3001
+ }
3002
+
3003
+ return { run, resume, retryFailed, injectTask }
694
3004
  }