@litmers/cursorflow-orchestrator 0.1.20 → 0.1.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/commands/cursorflow-clean.md +19 -0
- package/commands/cursorflow-runs.md +59 -0
- package/commands/cursorflow-stop.md +55 -0
- package/dist/cli/clean.js +171 -0
- package/dist/cli/clean.js.map +1 -1
- package/dist/cli/index.js +7 -0
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/init.js +1 -1
- package/dist/cli/init.js.map +1 -1
- package/dist/cli/logs.js +83 -42
- package/dist/cli/logs.js.map +1 -1
- package/dist/cli/monitor.d.ts +7 -0
- package/dist/cli/monitor.js +1007 -189
- package/dist/cli/monitor.js.map +1 -1
- package/dist/cli/prepare.js +4 -3
- package/dist/cli/prepare.js.map +1 -1
- package/dist/cli/resume.js +188 -236
- package/dist/cli/resume.js.map +1 -1
- package/dist/cli/run.js +8 -3
- package/dist/cli/run.js.map +1 -1
- package/dist/cli/runs.d.ts +5 -0
- package/dist/cli/runs.js +214 -0
- package/dist/cli/runs.js.map +1 -0
- package/dist/cli/setup-commands.js +0 -0
- package/dist/cli/signal.js +1 -1
- package/dist/cli/signal.js.map +1 -1
- package/dist/cli/stop.d.ts +5 -0
- package/dist/cli/stop.js +215 -0
- package/dist/cli/stop.js.map +1 -0
- package/dist/cli/tasks.d.ts +10 -0
- package/dist/cli/tasks.js +165 -0
- package/dist/cli/tasks.js.map +1 -0
- package/dist/core/auto-recovery.d.ts +212 -0
- package/dist/core/auto-recovery.js +737 -0
- package/dist/core/auto-recovery.js.map +1 -0
- package/dist/core/failure-policy.d.ts +156 -0
- package/dist/core/failure-policy.js +488 -0
- package/dist/core/failure-policy.js.map +1 -0
- package/dist/core/orchestrator.d.ts +15 -2
- package/dist/core/orchestrator.js +392 -15
- package/dist/core/orchestrator.js.map +1 -1
- package/dist/core/reviewer.d.ts +2 -0
- package/dist/core/reviewer.js +2 -0
- package/dist/core/reviewer.js.map +1 -1
- package/dist/core/runner.d.ts +33 -10
- package/dist/core/runner.js +321 -146
- package/dist/core/runner.js.map +1 -1
- package/dist/services/logging/buffer.d.ts +67 -0
- package/dist/services/logging/buffer.js +309 -0
- package/dist/services/logging/buffer.js.map +1 -0
- package/dist/services/logging/console.d.ts +89 -0
- package/dist/services/logging/console.js +169 -0
- package/dist/services/logging/console.js.map +1 -0
- package/dist/services/logging/file-writer.d.ts +71 -0
- package/dist/services/logging/file-writer.js +516 -0
- package/dist/services/logging/file-writer.js.map +1 -0
- package/dist/services/logging/formatter.d.ts +39 -0
- package/dist/services/logging/formatter.js +227 -0
- package/dist/services/logging/formatter.js.map +1 -0
- package/dist/services/logging/index.d.ts +11 -0
- package/dist/services/logging/index.js +30 -0
- package/dist/services/logging/index.js.map +1 -0
- package/dist/services/logging/parser.d.ts +31 -0
- package/dist/services/logging/parser.js +222 -0
- package/dist/services/logging/parser.js.map +1 -0
- package/dist/services/process/index.d.ts +59 -0
- package/dist/services/process/index.js +257 -0
- package/dist/services/process/index.js.map +1 -0
- package/dist/types/agent.d.ts +20 -0
- package/dist/types/agent.js +6 -0
- package/dist/types/agent.js.map +1 -0
- package/dist/types/config.d.ts +65 -0
- package/dist/types/config.js +6 -0
- package/dist/types/config.js.map +1 -0
- package/dist/types/events.d.ts +125 -0
- package/dist/types/events.js +6 -0
- package/dist/types/events.js.map +1 -0
- package/dist/types/index.d.ts +12 -0
- package/dist/types/index.js +37 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/lane.d.ts +43 -0
- package/dist/types/lane.js +6 -0
- package/dist/types/lane.js.map +1 -0
- package/dist/types/logging.d.ts +71 -0
- package/dist/types/logging.js +16 -0
- package/dist/types/logging.js.map +1 -0
- package/dist/types/review.d.ts +17 -0
- package/dist/types/review.js +6 -0
- package/dist/types/review.js.map +1 -0
- package/dist/types/run.d.ts +32 -0
- package/dist/types/run.js +6 -0
- package/dist/types/run.js.map +1 -0
- package/dist/types/task.d.ts +71 -0
- package/dist/types/task.js +6 -0
- package/dist/types/task.js.map +1 -0
- package/dist/ui/components.d.ts +134 -0
- package/dist/ui/components.js +389 -0
- package/dist/ui/components.js.map +1 -0
- package/dist/ui/log-viewer.d.ts +49 -0
- package/dist/ui/log-viewer.js +449 -0
- package/dist/ui/log-viewer.js.map +1 -0
- package/dist/utils/checkpoint.d.ts +87 -0
- package/dist/utils/checkpoint.js +317 -0
- package/dist/utils/checkpoint.js.map +1 -0
- package/dist/utils/config.d.ts +4 -0
- package/dist/utils/config.js +11 -2
- package/dist/utils/config.js.map +1 -1
- package/dist/utils/cursor-agent.js.map +1 -1
- package/dist/utils/dependency.d.ts +74 -0
- package/dist/utils/dependency.js +420 -0
- package/dist/utils/dependency.js.map +1 -0
- package/dist/utils/doctor.js +10 -5
- package/dist/utils/doctor.js.map +1 -1
- package/dist/utils/enhanced-logger.d.ts +10 -33
- package/dist/utils/enhanced-logger.js +94 -9
- package/dist/utils/enhanced-logger.js.map +1 -1
- package/dist/utils/git.d.ts +121 -0
- package/dist/utils/git.js +322 -2
- package/dist/utils/git.js.map +1 -1
- package/dist/utils/health.d.ts +91 -0
- package/dist/utils/health.js +556 -0
- package/dist/utils/health.js.map +1 -0
- package/dist/utils/lock.d.ts +95 -0
- package/dist/utils/lock.js +332 -0
- package/dist/utils/lock.js.map +1 -0
- package/dist/utils/log-buffer.d.ts +17 -0
- package/dist/utils/log-buffer.js +14 -0
- package/dist/utils/log-buffer.js.map +1 -0
- package/dist/utils/log-constants.d.ts +23 -0
- package/dist/utils/log-constants.js +28 -0
- package/dist/utils/log-constants.js.map +1 -0
- package/dist/utils/log-formatter.d.ts +9 -0
- package/dist/utils/log-formatter.js +113 -70
- package/dist/utils/log-formatter.js.map +1 -1
- package/dist/utils/log-service.d.ts +19 -0
- package/dist/utils/log-service.js +47 -0
- package/dist/utils/log-service.js.map +1 -0
- package/dist/utils/logger.d.ts +46 -27
- package/dist/utils/logger.js +82 -60
- package/dist/utils/logger.js.map +1 -1
- package/dist/utils/process-manager.d.ts +21 -0
- package/dist/utils/process-manager.js +138 -0
- package/dist/utils/process-manager.js.map +1 -0
- package/dist/utils/retry.d.ts +121 -0
- package/dist/utils/retry.js +374 -0
- package/dist/utils/retry.js.map +1 -0
- package/dist/utils/run-service.d.ts +88 -0
- package/dist/utils/run-service.js +412 -0
- package/dist/utils/run-service.js.map +1 -0
- package/dist/utils/state.d.ts +58 -2
- package/dist/utils/state.js +306 -3
- package/dist/utils/state.js.map +1 -1
- package/dist/utils/task-service.d.ts +82 -0
- package/dist/utils/task-service.js +348 -0
- package/dist/utils/task-service.js.map +1 -0
- package/dist/utils/types.d.ts +2 -272
- package/dist/utils/types.js +16 -0
- package/dist/utils/types.js.map +1 -1
- package/package.json +38 -23
- package/scripts/ai-security-check.js +0 -1
- package/scripts/local-security-gate.sh +0 -0
- package/scripts/monitor-lanes.sh +94 -0
- package/scripts/patches/test-cursor-agent.js +0 -1
- package/scripts/release.sh +0 -0
- package/scripts/setup-security.sh +0 -0
- package/scripts/stream-logs.sh +72 -0
- package/scripts/verify-and-fix.sh +0 -0
- package/src/cli/clean.ts +180 -0
- package/src/cli/index.ts +7 -0
- package/src/cli/init.ts +1 -1
- package/src/cli/logs.ts +79 -42
- package/src/cli/monitor.ts +1815 -899
- package/src/cli/prepare.ts +4 -3
- package/src/cli/resume.ts +220 -277
- package/src/cli/run.ts +9 -3
- package/src/cli/runs.ts +212 -0
- package/src/cli/setup-commands.ts +0 -0
- package/src/cli/signal.ts +1 -1
- package/src/cli/stop.ts +209 -0
- package/src/cli/tasks.ts +154 -0
- package/src/core/auto-recovery.ts +909 -0
- package/src/core/failure-policy.ts +592 -0
- package/src/core/orchestrator.ts +1131 -675
- package/src/core/reviewer.ts +4 -0
- package/src/core/runner.ts +388 -162
- package/src/services/logging/buffer.ts +326 -0
- package/src/services/logging/console.ts +193 -0
- package/src/services/logging/file-writer.ts +526 -0
- package/src/services/logging/formatter.ts +268 -0
- package/src/services/logging/index.ts +16 -0
- package/src/services/logging/parser.ts +232 -0
- package/src/services/process/index.ts +261 -0
- package/src/types/agent.ts +24 -0
- package/src/types/config.ts +79 -0
- package/src/types/events.ts +156 -0
- package/src/types/index.ts +29 -0
- package/src/types/lane.ts +56 -0
- package/src/types/logging.ts +96 -0
- package/src/types/review.ts +20 -0
- package/src/types/run.ts +37 -0
- package/src/types/task.ts +79 -0
- package/src/ui/components.ts +430 -0
- package/src/ui/log-viewer.ts +485 -0
- package/src/utils/checkpoint.ts +374 -0
- package/src/utils/config.ts +11 -2
- package/src/utils/cursor-agent.ts +1 -1
- package/src/utils/dependency.ts +482 -0
- package/src/utils/doctor.ts +11 -5
- package/src/utils/enhanced-logger.ts +108 -49
- package/src/utils/git.ts +374 -2
- package/src/utils/health.ts +596 -0
- package/src/utils/lock.ts +346 -0
- package/src/utils/log-buffer.ts +28 -0
- package/src/utils/log-constants.ts +26 -0
- package/src/utils/log-formatter.ts +120 -37
- package/src/utils/log-service.ts +49 -0
- package/src/utils/logger.ts +100 -51
- package/src/utils/process-manager.ts +100 -0
- package/src/utils/retry.ts +413 -0
- package/src/utils/run-service.ts +433 -0
- package/src/utils/state.ts +369 -3
- package/src/utils/task-service.ts +370 -0
- package/src/utils/types.ts +2 -315
package/src/core/orchestrator.ts
CHANGED
|
@@ -1,675 +1,1131 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Orchestrator - Parallel lane execution with dependency management
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
import * as
|
|
12
|
-
import
|
|
13
|
-
import {
|
|
14
|
-
|
|
15
|
-
import
|
|
16
|
-
import {
|
|
17
|
-
import
|
|
18
|
-
import {
|
|
19
|
-
import {
|
|
20
|
-
import {
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
|
-
|
|
275
|
-
/**
|
|
276
|
-
*
|
|
277
|
-
*/
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
}
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
}
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
const
|
|
499
|
-
|
|
500
|
-
if (
|
|
501
|
-
logger.
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
}
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
}
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Orchestrator - Parallel lane execution with dependency management
|
|
3
|
+
*
|
|
4
|
+
* Features:
|
|
5
|
+
* - Multi-layer stall detection
|
|
6
|
+
* - Cyclic dependency detection
|
|
7
|
+
* - Enhanced recovery strategies
|
|
8
|
+
* - Health checks before start
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import * as fs from 'fs';
|
|
12
|
+
import * as path from 'path';
|
|
13
|
+
import { spawn, ChildProcess } from 'child_process';
|
|
14
|
+
|
|
15
|
+
import * as logger from '../utils/logger';
|
|
16
|
+
import { loadState, saveState, createLaneState, validateLaneState } from '../utils/state';
|
|
17
|
+
import { LaneState, RunnerConfig, WebhookConfig, DependencyRequestPlan, EnhancedLogConfig } from '../utils/types';
|
|
18
|
+
import { events } from '../utils/events';
|
|
19
|
+
import { registerWebhooks } from '../utils/webhook';
|
|
20
|
+
import { loadConfig, getLogsDir } from '../utils/config';
|
|
21
|
+
import * as git from '../utils/git';
|
|
22
|
+
import { execSync } from 'child_process';
|
|
23
|
+
import { safeJoin } from '../utils/path';
|
|
24
|
+
import {
|
|
25
|
+
EnhancedLogManager,
|
|
26
|
+
createLogManager,
|
|
27
|
+
DEFAULT_LOG_CONFIG,
|
|
28
|
+
ParsedMessage
|
|
29
|
+
} from '../utils/enhanced-logger';
|
|
30
|
+
import { formatMessageForConsole } from '../utils/log-formatter';
|
|
31
|
+
import { analyzeStall, RecoveryAction, logFailure, DEFAULT_STALL_CONFIG, StallDetectionConfig, FailureType } from './failure-policy';
|
|
32
|
+
import {
|
|
33
|
+
getAutoRecoveryManager,
|
|
34
|
+
DEFAULT_AUTO_RECOVERY_CONFIG,
|
|
35
|
+
AutoRecoveryConfig,
|
|
36
|
+
savePOF,
|
|
37
|
+
createPOFFromRecoveryState,
|
|
38
|
+
getGitPushFailureGuidance,
|
|
39
|
+
getMergeConflictGuidance,
|
|
40
|
+
getGitErrorGuidance,
|
|
41
|
+
} from './auto-recovery';
|
|
42
|
+
import { detectCyclicDependencies, validateDependencies, printDependencyGraph, DependencyInfo } from '../utils/dependency';
|
|
43
|
+
import { preflightCheck, printPreflightReport, autoRepair } from '../utils/health';
|
|
44
|
+
import { getLatestCheckpoint } from '../utils/checkpoint';
|
|
45
|
+
import { cleanStaleLocks, getLockDir } from '../utils/lock';
|
|
46
|
+
|
|
47
|
+
/** Default stall detection configuration - 1 minute idle timeout for fast recovery */
|
|
48
|
+
const DEFAULT_ORCHESTRATOR_STALL_CONFIG: StallDetectionConfig = {
|
|
49
|
+
...DEFAULT_STALL_CONFIG,
|
|
50
|
+
idleTimeoutMs: 60 * 1000, // 1 minute (quick detection for continue signal)
|
|
51
|
+
progressTimeoutMs: 10 * 60 * 1000, // 10 minutes
|
|
52
|
+
maxRestarts: 2,
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
export interface LaneInfo {
|
|
56
|
+
name: string;
|
|
57
|
+
path: string;
|
|
58
|
+
dependsOn: string[];
|
|
59
|
+
startIndex?: number; // Current task index to resume from
|
|
60
|
+
restartCount?: number; // Number of times restarted due to stall
|
|
61
|
+
lastStateUpdate?: number; // Timestamp of last state file update
|
|
62
|
+
taskStartTime?: number; // When current task started
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export interface SpawnLaneResult {
|
|
66
|
+
child: ChildProcess;
|
|
67
|
+
logPath: string;
|
|
68
|
+
logManager?: EnhancedLogManager;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Lane execution tracking info
|
|
73
|
+
*/
|
|
74
|
+
interface RunningLaneInfo {
|
|
75
|
+
child: ChildProcess;
|
|
76
|
+
logPath: string;
|
|
77
|
+
logManager?: EnhancedLogManager;
|
|
78
|
+
lastActivity: number;
|
|
79
|
+
lastStateUpdate: number;
|
|
80
|
+
stallPhase: number; // 0: normal, 1: continued, 2: stronger_prompt, 3: restarted
|
|
81
|
+
taskStartTime: number;
|
|
82
|
+
lastOutput: string;
|
|
83
|
+
statePath: string;
|
|
84
|
+
bytesReceived: number; // Total bytes received from agent
|
|
85
|
+
lastBytesCheck: number; // Bytes at last check (for delta calculation)
|
|
86
|
+
continueSignalsSent: number; // Number of continue signals sent
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Spawn a lane process
|
|
91
|
+
*/
|
|
92
|
+
export function spawnLane({
|
|
93
|
+
laneName,
|
|
94
|
+
tasksFile,
|
|
95
|
+
laneRunDir,
|
|
96
|
+
executor,
|
|
97
|
+
startIndex = 0,
|
|
98
|
+
pipelineBranch,
|
|
99
|
+
worktreeDir,
|
|
100
|
+
enhancedLogConfig,
|
|
101
|
+
noGit = false,
|
|
102
|
+
onActivity,
|
|
103
|
+
}: {
|
|
104
|
+
laneName: string;
|
|
105
|
+
tasksFile: string;
|
|
106
|
+
laneRunDir: string;
|
|
107
|
+
executor: string;
|
|
108
|
+
startIndex?: number;
|
|
109
|
+
pipelineBranch?: string;
|
|
110
|
+
worktreeDir?: string;
|
|
111
|
+
enhancedLogConfig?: Partial<EnhancedLogConfig>;
|
|
112
|
+
noGit?: boolean;
|
|
113
|
+
onActivity?: () => void;
|
|
114
|
+
}): SpawnLaneResult {
|
|
115
|
+
fs.mkdirSync(laneRunDir, { recursive: true});
|
|
116
|
+
|
|
117
|
+
// Use extension-less resolve to handle both .ts (dev) and .js (dist)
|
|
118
|
+
const runnerPath = require.resolve('./runner');
|
|
119
|
+
|
|
120
|
+
const args = [
|
|
121
|
+
runnerPath,
|
|
122
|
+
tasksFile,
|
|
123
|
+
'--run-dir', laneRunDir,
|
|
124
|
+
'--executor', executor,
|
|
125
|
+
'--start-index', startIndex.toString(),
|
|
126
|
+
];
|
|
127
|
+
|
|
128
|
+
if (pipelineBranch) {
|
|
129
|
+
args.push('--pipeline-branch', pipelineBranch);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if (worktreeDir) {
|
|
133
|
+
args.push('--worktree-dir', worktreeDir);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (noGit) {
|
|
137
|
+
args.push('--no-git');
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Create enhanced log manager if enabled
|
|
141
|
+
const logConfig = { ...DEFAULT_LOG_CONFIG, ...enhancedLogConfig };
|
|
142
|
+
let logManager: EnhancedLogManager | undefined;
|
|
143
|
+
let logPath: string;
|
|
144
|
+
let child: ChildProcess;
|
|
145
|
+
|
|
146
|
+
// Build environment for child process
|
|
147
|
+
const childEnv = {
|
|
148
|
+
...process.env,
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
if (logConfig.enabled) {
|
|
152
|
+
// Create callback for clean console output
|
|
153
|
+
const onParsedMessage = (msg: ParsedMessage) => {
|
|
154
|
+
if (onActivity) onActivity();
|
|
155
|
+
const formatted = formatMessageForConsole(msg, {
|
|
156
|
+
laneLabel: `[${laneName}]`,
|
|
157
|
+
includeTimestamp: true
|
|
158
|
+
});
|
|
159
|
+
process.stdout.write(formatted + '\n');
|
|
160
|
+
};
|
|
161
|
+
|
|
162
|
+
logManager = createLogManager(laneRunDir, laneName, logConfig, onParsedMessage);
|
|
163
|
+
logPath = logManager.getLogPaths().clean;
|
|
164
|
+
|
|
165
|
+
// Spawn with pipe for enhanced logging
|
|
166
|
+
child = spawn('node', args, {
|
|
167
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
168
|
+
env: childEnv,
|
|
169
|
+
detached: false,
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
// Buffer for non-JSON lines
|
|
173
|
+
let lineBuffer = '';
|
|
174
|
+
|
|
175
|
+
// Pipe stdout and stderr through enhanced logger
|
|
176
|
+
if (child.stdout) {
|
|
177
|
+
child.stdout.on('data', (data: Buffer) => {
|
|
178
|
+
logManager!.writeStdout(data);
|
|
179
|
+
|
|
180
|
+
// Filter out JSON lines from console output to keep it clean
|
|
181
|
+
const str = data.toString();
|
|
182
|
+
lineBuffer += str;
|
|
183
|
+
const lines = lineBuffer.split('\n');
|
|
184
|
+
lineBuffer = lines.pop() || '';
|
|
185
|
+
|
|
186
|
+
for (const line of lines) {
|
|
187
|
+
const trimmed = line.trim();
|
|
188
|
+
// Show if it's a timestamped log line (starts with [YYYY-MM-DD... or [HH:MM:SS])
|
|
189
|
+
// or if it's NOT a noisy JSON line
|
|
190
|
+
const hasTimestamp = /^\[\d{4}-\d{2}-\d{2}T|\^\[\d{2}:\d{2}:\d{2}\]/.test(trimmed);
|
|
191
|
+
const isJson = trimmed.startsWith('{') || trimmed.includes('{"type"');
|
|
192
|
+
|
|
193
|
+
if (trimmed && !isJson) {
|
|
194
|
+
if (onActivity) onActivity();
|
|
195
|
+
// If line already has timestamp format, just add lane prefix
|
|
196
|
+
if (hasTimestamp) {
|
|
197
|
+
// Insert lane name after first timestamp
|
|
198
|
+
const formatted = trimmed.replace(/^(\[[^\]]+\])/, `$1 ${logger.COLORS.magenta}[${laneName}]${logger.COLORS.reset}`);
|
|
199
|
+
process.stdout.write(formatted + '\n');
|
|
200
|
+
} else {
|
|
201
|
+
// Add full prefix: timestamp + lane
|
|
202
|
+
process.stdout.write(`${logger.COLORS.gray}[${new Date().toLocaleTimeString('en-US', { hour12: false })}]${logger.COLORS.reset} ${logger.COLORS.magenta}[${laneName}]${logger.COLORS.reset} ${line}\n`);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
if (child.stderr) {
|
|
210
|
+
child.stderr.on('data', (data: Buffer) => {
|
|
211
|
+
logManager!.writeStderr(data);
|
|
212
|
+
const str = data.toString();
|
|
213
|
+
const lines = str.split('\n');
|
|
214
|
+
for (const line of lines) {
|
|
215
|
+
const trimmed = line.trim();
|
|
216
|
+
if (trimmed) {
|
|
217
|
+
// Check if it's a real error or just git/status output on stderr
|
|
218
|
+
const isStatus = trimmed.startsWith('Preparing worktree') ||
|
|
219
|
+
trimmed.startsWith('Switched to a new branch') ||
|
|
220
|
+
trimmed.startsWith('HEAD is now at') ||
|
|
221
|
+
trimmed.includes('actual output');
|
|
222
|
+
|
|
223
|
+
const ts = new Date().toLocaleTimeString('en-US', { hour12: false });
|
|
224
|
+
if (isStatus) {
|
|
225
|
+
process.stdout.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${logger.COLORS.magenta}[${laneName}]${logger.COLORS.reset} ${trimmed}\n`);
|
|
226
|
+
} else {
|
|
227
|
+
if (onActivity) onActivity();
|
|
228
|
+
process.stderr.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${logger.COLORS.magenta}[${laneName}]${logger.COLORS.reset} ${logger.COLORS.red}❌ ERR ${trimmed}${logger.COLORS.reset}\n`);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
});
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Close log manager when process exits
|
|
236
|
+
child.on('exit', () => {
|
|
237
|
+
logManager?.close();
|
|
238
|
+
});
|
|
239
|
+
} else {
|
|
240
|
+
// Fallback to simple file logging
|
|
241
|
+
logPath = safeJoin(laneRunDir, 'terminal.log');
|
|
242
|
+
const logFd = fs.openSync(logPath, 'a');
|
|
243
|
+
|
|
244
|
+
child = spawn('node', args, {
|
|
245
|
+
stdio: ['ignore', logFd, logFd],
|
|
246
|
+
env: childEnv,
|
|
247
|
+
detached: false,
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
try {
|
|
251
|
+
fs.closeSync(logFd);
|
|
252
|
+
} catch {
|
|
253
|
+
// Ignore
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
return { child, logPath, logManager };
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
/**
|
|
261
|
+
* Wait for child process to exit
|
|
262
|
+
*/
|
|
263
|
+
export function waitChild(proc: ChildProcess): Promise<number> {
|
|
264
|
+
return new Promise((resolve) => {
|
|
265
|
+
if (proc.exitCode !== null) {
|
|
266
|
+
resolve(proc.exitCode);
|
|
267
|
+
return;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
proc.once('exit', (code) => resolve(code ?? 1));
|
|
271
|
+
proc.once('error', () => resolve(1));
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* List lane task files in directory and load their configs for dependencies
|
|
277
|
+
*/
|
|
278
|
+
export function listLaneFiles(tasksDir: string): LaneInfo[] {
|
|
279
|
+
if (!fs.existsSync(tasksDir)) {
|
|
280
|
+
return [];
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
const files = fs.readdirSync(tasksDir);
|
|
284
|
+
return files
|
|
285
|
+
.filter(f => f.endsWith('.json'))
|
|
286
|
+
.sort()
|
|
287
|
+
.map(f => {
|
|
288
|
+
const filePath = safeJoin(tasksDir, f);
|
|
289
|
+
const name = path.basename(f, '.json');
|
|
290
|
+
let dependsOn: string[] = [];
|
|
291
|
+
|
|
292
|
+
try {
|
|
293
|
+
const config = JSON.parse(fs.readFileSync(filePath, 'utf8')) as RunnerConfig;
|
|
294
|
+
dependsOn = config.dependsOn || [];
|
|
295
|
+
} catch (e) {
|
|
296
|
+
logger.warn(`Failed to parse config for lane ${name}: ${e}`);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
return {
|
|
300
|
+
name,
|
|
301
|
+
path: filePath,
|
|
302
|
+
dependsOn,
|
|
303
|
+
};
|
|
304
|
+
});
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
/**
|
|
308
|
+
* Monitor lane states
|
|
309
|
+
*/
|
|
310
|
+
export function printLaneStatus(lanes: LaneInfo[], laneRunDirs: Record<string, string>): void {
|
|
311
|
+
const rows = lanes.map(lane => {
|
|
312
|
+
const dir = laneRunDirs[lane.name];
|
|
313
|
+
if (!dir) return { lane: lane.name, status: '(unknown)', task: '-' };
|
|
314
|
+
|
|
315
|
+
const statePath = safeJoin(dir, 'state.json');
|
|
316
|
+
const state = loadState<LaneState>(statePath);
|
|
317
|
+
|
|
318
|
+
if (!state) {
|
|
319
|
+
const isWaiting = lane.dependsOn.length > 0;
|
|
320
|
+
return { lane: lane.name, status: isWaiting ? 'waiting' : 'pending', task: '-' };
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
const idx = (state.currentTaskIndex || 0) + 1;
|
|
324
|
+
return {
|
|
325
|
+
lane: lane.name,
|
|
326
|
+
status: state.status || 'unknown',
|
|
327
|
+
task: `${idx}/${state.totalTasks || '?'}`,
|
|
328
|
+
};
|
|
329
|
+
});
|
|
330
|
+
|
|
331
|
+
logger.section('📡 Lane Status');
|
|
332
|
+
for (const r of rows) {
|
|
333
|
+
console.log(`- ${r.lane}: ${r.status} (${r.task})`);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* Resolve dependencies for all blocked lanes and sync with all active lanes
|
|
339
|
+
*/
|
|
340
|
+
async function resolveAllDependencies(
|
|
341
|
+
blockedLanes: Map<string, DependencyRequestPlan>,
|
|
342
|
+
allLanes: LaneInfo[],
|
|
343
|
+
laneRunDirs: Record<string, string>,
|
|
344
|
+
pipelineBranch: string,
|
|
345
|
+
runRoot: string
|
|
346
|
+
) {
|
|
347
|
+
// 1. Collect all unique changes and commands from blocked lanes
|
|
348
|
+
const allChanges: string[] = [];
|
|
349
|
+
const allCommands: string[] = [];
|
|
350
|
+
|
|
351
|
+
for (const [, plan] of blockedLanes) {
|
|
352
|
+
if (plan.changes) allChanges.push(...plan.changes);
|
|
353
|
+
if (plan.commands) allCommands.push(...plan.commands);
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
const uniqueChanges = Array.from(new Set(allChanges));
|
|
357
|
+
const uniqueCommands = Array.from(new Set(allCommands));
|
|
358
|
+
|
|
359
|
+
if (uniqueCommands.length === 0) return;
|
|
360
|
+
|
|
361
|
+
// 2. Setup a temporary worktree for resolution if needed, or use the first available one
|
|
362
|
+
const firstLaneName = Array.from(blockedLanes.keys())[0]!;
|
|
363
|
+
const statePath = safeJoin(laneRunDirs[firstLaneName]!, 'state.json');
|
|
364
|
+
const state = loadState<LaneState>(statePath);
|
|
365
|
+
const worktreeDir = state?.worktreeDir || safeJoin(runRoot, 'resolution-worktree');
|
|
366
|
+
|
|
367
|
+
if (!fs.existsSync(worktreeDir)) {
|
|
368
|
+
logger.info(`Creating resolution worktree at ${worktreeDir}`);
|
|
369
|
+
git.createWorktree(worktreeDir, pipelineBranch, { baseBranch: git.getCurrentBranch() });
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// 3. Resolve on pipeline branch
|
|
373
|
+
logger.info(`Resolving dependencies on ${pipelineBranch}`);
|
|
374
|
+
git.runGit(['checkout', pipelineBranch], { cwd: worktreeDir });
|
|
375
|
+
|
|
376
|
+
for (const cmd of uniqueCommands) {
|
|
377
|
+
logger.info(`Running: ${cmd}`);
|
|
378
|
+
try {
|
|
379
|
+
execSync(cmd, { cwd: worktreeDir, stdio: 'inherit' });
|
|
380
|
+
} catch (e: any) {
|
|
381
|
+
throw new Error(`Command failed: ${cmd}. ${e.message}`);
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
try {
|
|
386
|
+
git.runGit(['add', '.'], { cwd: worktreeDir });
|
|
387
|
+
git.runGit(['commit', '-m', `chore: auto-resolve dependencies\n\n${uniqueChanges.join('\n')}`], { cwd: worktreeDir });
|
|
388
|
+
|
|
389
|
+
// Log changed files
|
|
390
|
+
const stats = git.getLastOperationStats(worktreeDir);
|
|
391
|
+
if (stats) {
|
|
392
|
+
logger.info('Changed files:\n' + stats);
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
git.push(pipelineBranch, { cwd: worktreeDir });
|
|
396
|
+
} catch (e) { /* ignore if nothing to commit */ }
|
|
397
|
+
|
|
398
|
+
// 4. Sync ALL active lanes (blocked + pending + running)
|
|
399
|
+
// Since we only call this when running.size === 0, "active" means not completed/failed
|
|
400
|
+
for (const lane of allLanes) {
|
|
401
|
+
const laneDir = laneRunDirs[lane.name];
|
|
402
|
+
if (!laneDir) continue;
|
|
403
|
+
|
|
404
|
+
const laneState = loadState<LaneState>(safeJoin(laneDir, 'state.json'));
|
|
405
|
+
if (!laneState || laneState.status === 'completed' || laneState.status === 'failed') continue;
|
|
406
|
+
|
|
407
|
+
// Merge pipelineBranch into the lane's current task branch
|
|
408
|
+
const currentIdx = laneState.currentTaskIndex;
|
|
409
|
+
const taskConfig = JSON.parse(fs.readFileSync(lane.path, 'utf8')) as RunnerConfig;
|
|
410
|
+
const task = taskConfig.tasks[currentIdx];
|
|
411
|
+
|
|
412
|
+
if (task) {
|
|
413
|
+
const lanePipelineBranch = `${pipelineBranch}/${lane.name}`;
|
|
414
|
+
const taskBranch = `${lanePipelineBranch}--${String(currentIdx + 1).padStart(2, '0')}-${task.name}`;
|
|
415
|
+
logger.info(`Syncing lane ${lane.name} branch ${taskBranch}`);
|
|
416
|
+
|
|
417
|
+
try {
|
|
418
|
+
// If task branch doesn't exist yet, it will be created from pipelineBranch when the lane starts
|
|
419
|
+
if (git.branchExists(taskBranch, { cwd: worktreeDir })) {
|
|
420
|
+
git.runGit(['checkout', taskBranch], { cwd: worktreeDir });
|
|
421
|
+
git.runGit(['merge', pipelineBranch, '--no-edit'], { cwd: worktreeDir });
|
|
422
|
+
|
|
423
|
+
// Log changed files
|
|
424
|
+
const stats = git.getLastOperationStats(worktreeDir);
|
|
425
|
+
if (stats) {
|
|
426
|
+
logger.info(`Sync results for ${lane.name}:\n` + stats);
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
git.push(taskBranch, { cwd: worktreeDir });
|
|
430
|
+
}
|
|
431
|
+
} catch (e: any) {
|
|
432
|
+
logger.warn(`Failed to sync branch ${taskBranch}: ${e.message}`);
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
git.runGit(['checkout', pipelineBranch], { cwd: worktreeDir });
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
/**
|
|
441
|
+
* Run orchestration with dependency management
|
|
442
|
+
*/
|
|
443
|
+
export async function orchestrate(tasksDir: string, options: {
|
|
444
|
+
runDir?: string;
|
|
445
|
+
executor?: string;
|
|
446
|
+
pollInterval?: number;
|
|
447
|
+
maxConcurrentLanes?: number;
|
|
448
|
+
webhooks?: WebhookConfig[];
|
|
449
|
+
autoResolveDependencies?: boolean;
|
|
450
|
+
enhancedLogging?: Partial<EnhancedLogConfig>;
|
|
451
|
+
noGit?: boolean;
|
|
452
|
+
skipPreflight?: boolean;
|
|
453
|
+
stallConfig?: Partial<StallDetectionConfig>;
|
|
454
|
+
autoRecoveryConfig?: Partial<AutoRecoveryConfig>;
|
|
455
|
+
} = {}): Promise<{ lanes: LaneInfo[]; exitCodes: Record<string, number>; runRoot: string }> {
|
|
456
|
+
const lanes = listLaneFiles(tasksDir);
|
|
457
|
+
|
|
458
|
+
if (lanes.length === 0) {
|
|
459
|
+
throw new Error(`No lane task files found in ${tasksDir}`);
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// Run preflight checks
|
|
463
|
+
if (!options.skipPreflight) {
|
|
464
|
+
logger.section('🔍 Preflight Checks');
|
|
465
|
+
|
|
466
|
+
const preflight = await preflightCheck({
|
|
467
|
+
requireRemote: !options.noGit,
|
|
468
|
+
requireAuth: true,
|
|
469
|
+
});
|
|
470
|
+
|
|
471
|
+
if (!preflight.canProceed) {
|
|
472
|
+
printPreflightReport(preflight);
|
|
473
|
+
throw new Error('Preflight check failed. Please fix the blockers above.');
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
// Auto-repair if there are warnings
|
|
477
|
+
if (preflight.warnings.length > 0) {
|
|
478
|
+
logger.info('Attempting auto-repair...');
|
|
479
|
+
const repair = await autoRepair();
|
|
480
|
+
if (repair.repaired.length > 0) {
|
|
481
|
+
for (const r of repair.repaired) {
|
|
482
|
+
logger.success(`✓ ${r}`);
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
logger.success('✓ Preflight checks passed');
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// Validate dependencies and detect cycles
|
|
491
|
+
logger.section('📊 Dependency Analysis');
|
|
492
|
+
|
|
493
|
+
const depInfos: DependencyInfo[] = lanes.map(l => ({
|
|
494
|
+
name: l.name,
|
|
495
|
+
dependsOn: l.dependsOn,
|
|
496
|
+
}));
|
|
497
|
+
|
|
498
|
+
const depValidation = validateDependencies(depInfos);
|
|
499
|
+
|
|
500
|
+
if (!depValidation.valid) {
|
|
501
|
+
logger.error('❌ Dependency validation failed:');
|
|
502
|
+
for (const err of depValidation.errors) {
|
|
503
|
+
logger.error(` • ${err}`);
|
|
504
|
+
}
|
|
505
|
+
throw new Error('Invalid dependency configuration');
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
if (depValidation.warnings.length > 0) {
|
|
509
|
+
for (const warn of depValidation.warnings) {
|
|
510
|
+
logger.warn(`⚠️ ${warn}`);
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// Print dependency graph
|
|
515
|
+
printDependencyGraph(depInfos);
|
|
516
|
+
|
|
517
|
+
const config = loadConfig();
|
|
518
|
+
const logsDir = getLogsDir(config);
|
|
519
|
+
const runId = `run-${Date.now()}`;
|
|
520
|
+
// Use absolute path for runRoot to avoid issues with subfolders
|
|
521
|
+
const runRoot = options.runDir
|
|
522
|
+
? (path.isAbsolute(options.runDir) ? options.runDir : path.resolve(process.cwd(), options.runDir)) // nosemgrep
|
|
523
|
+
: safeJoin(logsDir, 'runs', runId);
|
|
524
|
+
|
|
525
|
+
fs.mkdirSync(runRoot, { recursive: true });
|
|
526
|
+
|
|
527
|
+
// Clean stale locks before starting
|
|
528
|
+
try {
|
|
529
|
+
const lockDir = getLockDir(git.getRepoRoot());
|
|
530
|
+
const cleaned = cleanStaleLocks(lockDir);
|
|
531
|
+
if (cleaned > 0) {
|
|
532
|
+
logger.info(`Cleaned ${cleaned} stale lock(s)`);
|
|
533
|
+
}
|
|
534
|
+
} catch {
|
|
535
|
+
// Ignore lock cleanup errors
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
const randomSuffix = Math.random().toString(36).substring(2, 7);
|
|
539
|
+
const pipelineBranch = `cursorflow/run-${Date.now().toString(36)}-${randomSuffix}`;
|
|
540
|
+
|
|
541
|
+
// Stall detection configuration
|
|
542
|
+
const stallConfig: StallDetectionConfig = {
|
|
543
|
+
...DEFAULT_ORCHESTRATOR_STALL_CONFIG,
|
|
544
|
+
...options.stallConfig,
|
|
545
|
+
};
|
|
546
|
+
|
|
547
|
+
// Initialize auto-recovery manager
|
|
548
|
+
const autoRecoveryManager = getAutoRecoveryManager({
|
|
549
|
+
...DEFAULT_AUTO_RECOVERY_CONFIG,
|
|
550
|
+
idleTimeoutMs: stallConfig.idleTimeoutMs, // Sync with stall config
|
|
551
|
+
...options.autoRecoveryConfig,
|
|
552
|
+
});
|
|
553
|
+
|
|
554
|
+
// Initialize event system
|
|
555
|
+
events.setRunId(runId);
|
|
556
|
+
if (options.webhooks) {
|
|
557
|
+
registerWebhooks(options.webhooks);
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
events.emit('orchestration.started', {
|
|
561
|
+
runId,
|
|
562
|
+
tasksDir,
|
|
563
|
+
laneCount: lanes.length,
|
|
564
|
+
runRoot,
|
|
565
|
+
});
|
|
566
|
+
|
|
567
|
+
const maxConcurrent = options.maxConcurrentLanes || 10;
|
|
568
|
+
const running: Map<string, RunningLaneInfo> = new Map();
|
|
569
|
+
const exitCodes: Record<string, number> = {};
|
|
570
|
+
const completedLanes = new Set<string>();
|
|
571
|
+
const failedLanes = new Set<string>();
|
|
572
|
+
const blockedLanes: Map<string, DependencyRequestPlan> = new Map();
|
|
573
|
+
|
|
574
|
+
// Track start index for each lane (initially 0)
|
|
575
|
+
for (const lane of lanes) {
|
|
576
|
+
lane.startIndex = 0;
|
|
577
|
+
lane.restartCount = 0;
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
const laneRunDirs: Record<string, string> = {};
|
|
581
|
+
const laneWorktreeDirs: Record<string, string> = {};
|
|
582
|
+
const repoRoot = git.getRepoRoot();
|
|
583
|
+
|
|
584
|
+
for (const lane of lanes) {
|
|
585
|
+
laneRunDirs[lane.name] = safeJoin(runRoot, 'lanes', lane.name);
|
|
586
|
+
fs.mkdirSync(laneRunDirs[lane.name]!, { recursive: true });
|
|
587
|
+
|
|
588
|
+
// Create initial state for ALL lanes so resume can find them even if they didn't start
|
|
589
|
+
try {
|
|
590
|
+
const taskConfig = JSON.parse(fs.readFileSync(lane.path, 'utf8')) as RunnerConfig;
|
|
591
|
+
|
|
592
|
+
// Calculate unique branch and worktree for this lane
|
|
593
|
+
const lanePipelineBranch = `${pipelineBranch}/${lane.name}`;
|
|
594
|
+
|
|
595
|
+
// Use a flat worktree directory name to avoid race conditions in parent directory creation
|
|
596
|
+
// repoRoot/_cursorflow/worktrees/cursorflow-run-xxx-lane-name
|
|
597
|
+
const laneWorktreeDir = safeJoin(
|
|
598
|
+
repoRoot,
|
|
599
|
+
taskConfig.worktreeRoot || '_cursorflow/worktrees',
|
|
600
|
+
lanePipelineBranch.replace(/\//g, '-')
|
|
601
|
+
);
|
|
602
|
+
|
|
603
|
+
// Ensure the parent directory exists before spawning the runner
|
|
604
|
+
// to avoid race conditions in git worktree add or fs operations
|
|
605
|
+
const worktreeParent = path.dirname(laneWorktreeDir);
|
|
606
|
+
if (!fs.existsSync(worktreeParent)) {
|
|
607
|
+
fs.mkdirSync(worktreeParent, { recursive: true });
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
laneWorktreeDirs[lane.name] = laneWorktreeDir;
|
|
611
|
+
|
|
612
|
+
const initialState = createLaneState(lane.name, taskConfig, lane.path, {
|
|
613
|
+
pipelineBranch: lanePipelineBranch,
|
|
614
|
+
worktreeDir: laneWorktreeDir
|
|
615
|
+
});
|
|
616
|
+
saveState(safeJoin(laneRunDirs[lane.name]!, 'state.json'), initialState);
|
|
617
|
+
} catch (e) {
|
|
618
|
+
logger.warn(`Failed to create initial state for lane ${lane.name}: ${e}`);
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
logger.section('🧭 Starting Orchestration');
|
|
623
|
+
logger.info(`Tasks directory: ${tasksDir}`);
|
|
624
|
+
logger.info(`Run directory: ${runRoot}`);
|
|
625
|
+
logger.info(`Lanes: ${lanes.length}`);
|
|
626
|
+
|
|
627
|
+
// Display dependency graph
|
|
628
|
+
logger.info('\n📊 Dependency Graph:');
|
|
629
|
+
for (const lane of lanes) {
|
|
630
|
+
const deps = lane.dependsOn.length > 0 ? ` [depends on: ${lane.dependsOn.join(', ')}]` : '';
|
|
631
|
+
console.log(` ${logger.COLORS.cyan}${lane.name}${logger.COLORS.reset}${deps}`);
|
|
632
|
+
|
|
633
|
+
// Simple tree-like visualization for deep dependencies
|
|
634
|
+
if (lane.dependsOn.length > 0) {
|
|
635
|
+
for (const dep of lane.dependsOn) {
|
|
636
|
+
console.log(` └─ ${dep}`);
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
console.log('');
|
|
641
|
+
|
|
642
|
+
// Disable auto-resolve when noGit mode is enabled
|
|
643
|
+
const autoResolve = !options.noGit && options.autoResolveDependencies !== false;
|
|
644
|
+
|
|
645
|
+
if (options.noGit) {
|
|
646
|
+
logger.info('🚫 Git operations disabled (--no-git mode)');
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
// Monitor lanes
|
|
650
|
+
const monitorInterval = setInterval(() => {
|
|
651
|
+
printLaneStatus(lanes, laneRunDirs);
|
|
652
|
+
}, options.pollInterval || 60000);
|
|
653
|
+
|
|
654
|
+
while (completedLanes.size + failedLanes.size + blockedLanes.size < lanes.length || (blockedLanes.size > 0 && running.size === 0)) {
|
|
655
|
+
// 1. Identify lanes ready to start
|
|
656
|
+
const readyToStart = lanes.filter(lane => {
|
|
657
|
+
// Not already running or completed or failed or blocked
|
|
658
|
+
if (running.has(lane.name) || completedLanes.has(lane.name) || failedLanes.has(lane.name) || blockedLanes.has(lane.name)) {
|
|
659
|
+
return false;
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
// Check dependencies
|
|
663
|
+
for (const dep of lane.dependsOn) {
|
|
664
|
+
if (failedLanes.has(dep)) {
|
|
665
|
+
logger.error(`Lane ${lane.name} will not start because dependency ${dep} failed`);
|
|
666
|
+
failedLanes.add(lane.name);
|
|
667
|
+
exitCodes[lane.name] = 1;
|
|
668
|
+
return false;
|
|
669
|
+
}
|
|
670
|
+
if (blockedLanes.has(dep)) {
|
|
671
|
+
// If a dependency is blocked, wait
|
|
672
|
+
return false;
|
|
673
|
+
}
|
|
674
|
+
if (!completedLanes.has(dep)) {
|
|
675
|
+
return false;
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
return true;
|
|
679
|
+
});
|
|
680
|
+
|
|
681
|
+
// 2. Spawn ready lanes up to maxConcurrent
|
|
682
|
+
for (const lane of readyToStart) {
|
|
683
|
+
if (running.size >= maxConcurrent) break;
|
|
684
|
+
|
|
685
|
+
const laneStatePath = safeJoin(laneRunDirs[lane.name]!, 'state.json');
|
|
686
|
+
|
|
687
|
+
// Validate and repair state before starting
|
|
688
|
+
const validation = validateLaneState(laneStatePath, { autoRepair: true });
|
|
689
|
+
if (!validation.valid && !validation.repaired) {
|
|
690
|
+
logger.warn(`[${lane.name}] State validation issues: ${validation.issues.join(', ')}`);
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
logger.info(`Lane started: ${lane.name}${lane.startIndex ? ` (resuming from ${lane.startIndex})` : ''}`);
|
|
694
|
+
|
|
695
|
+
let lastOutput = '';
|
|
696
|
+
const spawnResult = spawnLane({
|
|
697
|
+
laneName: lane.name,
|
|
698
|
+
tasksFile: lane.path,
|
|
699
|
+
laneRunDir: laneRunDirs[lane.name]!,
|
|
700
|
+
executor: options.executor || 'cursor-agent',
|
|
701
|
+
startIndex: lane.startIndex,
|
|
702
|
+
pipelineBranch: `${pipelineBranch}/${lane.name}`,
|
|
703
|
+
worktreeDir: laneWorktreeDirs[lane.name],
|
|
704
|
+
enhancedLogConfig: options.enhancedLogging,
|
|
705
|
+
noGit: options.noGit,
|
|
706
|
+
onActivity: () => {
|
|
707
|
+
const info = running.get(lane.name);
|
|
708
|
+
if (info) {
|
|
709
|
+
info.lastActivity = Date.now();
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
});
|
|
713
|
+
|
|
714
|
+
// Track last output and bytes received for long operation and stall detection
|
|
715
|
+
if (spawnResult.child.stdout) {
|
|
716
|
+
spawnResult.child.stdout.on('data', (data: Buffer) => {
|
|
717
|
+
const info = running.get(lane.name);
|
|
718
|
+
if (info) {
|
|
719
|
+
info.lastOutput = data.toString().trim().split('\n').pop() || '';
|
|
720
|
+
info.bytesReceived += data.length;
|
|
721
|
+
|
|
722
|
+
// Update auto-recovery manager
|
|
723
|
+
autoRecoveryManager.recordActivity(lane.name, data.length, info.lastOutput);
|
|
724
|
+
}
|
|
725
|
+
});
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
const now = Date.now();
|
|
729
|
+
running.set(lane.name, {
|
|
730
|
+
...spawnResult,
|
|
731
|
+
lastActivity: now,
|
|
732
|
+
lastStateUpdate: now,
|
|
733
|
+
stallPhase: 0,
|
|
734
|
+
taskStartTime: now,
|
|
735
|
+
lastOutput: '',
|
|
736
|
+
statePath: laneStatePath,
|
|
737
|
+
bytesReceived: 0,
|
|
738
|
+
lastBytesCheck: 0,
|
|
739
|
+
continueSignalsSent: 0,
|
|
740
|
+
});
|
|
741
|
+
|
|
742
|
+
// Register lane with auto-recovery manager
|
|
743
|
+
autoRecoveryManager.registerLane(lane.name);
|
|
744
|
+
|
|
745
|
+
// Update lane tracking
|
|
746
|
+
lane.taskStartTime = now;
|
|
747
|
+
|
|
748
|
+
events.emit('lane.started', {
|
|
749
|
+
laneName: lane.name,
|
|
750
|
+
pid: spawnResult.child.pid,
|
|
751
|
+
logPath: spawnResult.logPath,
|
|
752
|
+
});
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
// 3. Wait for any running lane to finish OR check for stalls
|
|
756
|
+
if (running.size > 0) {
|
|
757
|
+
// Polling timeout for stall detection
|
|
758
|
+
let pollTimeout: NodeJS.Timeout | undefined;
|
|
759
|
+
const pollPromise = new Promise<{ name: string; code: number }>(resolve => {
|
|
760
|
+
pollTimeout = setTimeout(() => resolve({ name: '__poll__', code: 0 }), 10000);
|
|
761
|
+
});
|
|
762
|
+
|
|
763
|
+
const promises = Array.from(running.entries()).map(async ([name, { child }]) => {
|
|
764
|
+
const code = await waitChild(child);
|
|
765
|
+
return { name, code };
|
|
766
|
+
});
|
|
767
|
+
|
|
768
|
+
const result = await Promise.race([...promises, pollPromise]);
|
|
769
|
+
if (pollTimeout) clearTimeout(pollTimeout);
|
|
770
|
+
|
|
771
|
+
if (result.name === '__poll__') {
|
|
772
|
+
// Periodic stall check with multi-layer detection and escalating recovery
|
|
773
|
+
for (const [laneName, info] of running.entries()) {
|
|
774
|
+
const now = Date.now();
|
|
775
|
+
const idleTime = now - info.lastActivity;
|
|
776
|
+
const lane = lanes.find(l => l.name === laneName)!;
|
|
777
|
+
|
|
778
|
+
// Check state file for progress updates
|
|
779
|
+
let progressTime = 0;
|
|
780
|
+
try {
|
|
781
|
+
const stateStat = fs.statSync(info.statePath);
|
|
782
|
+
const stateUpdateTime = stateStat.mtimeMs;
|
|
783
|
+
if (stateUpdateTime > info.lastStateUpdate) {
|
|
784
|
+
info.lastStateUpdate = stateUpdateTime;
|
|
785
|
+
}
|
|
786
|
+
progressTime = now - info.lastStateUpdate;
|
|
787
|
+
} catch {
|
|
788
|
+
// State file might not exist yet
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
// Calculate bytes received since last check
|
|
792
|
+
const bytesDelta = info.bytesReceived - info.lastBytesCheck;
|
|
793
|
+
info.lastBytesCheck = info.bytesReceived;
|
|
794
|
+
|
|
795
|
+
// Use multi-layer stall analysis with enhanced context
|
|
796
|
+
const analysis = analyzeStall({
|
|
797
|
+
stallPhase: info.stallPhase,
|
|
798
|
+
idleTimeMs: idleTime,
|
|
799
|
+
progressTimeMs: progressTime,
|
|
800
|
+
lastOutput: info.lastOutput,
|
|
801
|
+
restartCount: lane.restartCount || 0,
|
|
802
|
+
taskStartTimeMs: info.taskStartTime,
|
|
803
|
+
bytesReceived: bytesDelta, // Bytes since last check
|
|
804
|
+
continueSignalsSent: info.continueSignalsSent,
|
|
805
|
+
}, stallConfig);
|
|
806
|
+
|
|
807
|
+
// Only act if action is not NONE
|
|
808
|
+
if (analysis.action !== RecoveryAction.NONE) {
|
|
809
|
+
logFailure(laneName, analysis);
|
|
810
|
+
info.logManager?.log('error', analysis.message);
|
|
811
|
+
|
|
812
|
+
if (analysis.action === RecoveryAction.CONTINUE_SIGNAL) {
|
|
813
|
+
const interventionPath = safeJoin(laneRunDirs[laneName]!, 'intervention.txt');
|
|
814
|
+
try {
|
|
815
|
+
fs.writeFileSync(interventionPath, 'continue');
|
|
816
|
+
info.stallPhase = 1;
|
|
817
|
+
info.lastActivity = now;
|
|
818
|
+
info.continueSignalsSent++;
|
|
819
|
+
logger.info(`[${laneName}] Sent continue signal (#${info.continueSignalsSent})`);
|
|
820
|
+
|
|
821
|
+
events.emit('recovery.continue_signal', {
|
|
822
|
+
laneName,
|
|
823
|
+
idleSeconds: Math.round(idleTime / 1000),
|
|
824
|
+
signalCount: info.continueSignalsSent,
|
|
825
|
+
});
|
|
826
|
+
} catch (e) {
|
|
827
|
+
logger.error(`Failed to write intervention file for ${laneName}: ${e}`);
|
|
828
|
+
}
|
|
829
|
+
} else if (analysis.action === RecoveryAction.STRONGER_PROMPT) {
|
|
830
|
+
const interventionPath = safeJoin(laneRunDirs[laneName]!, 'intervention.txt');
|
|
831
|
+
const strongerPrompt = `[SYSTEM INTERVENTION] You seem to be stuck. Please continue with your current task immediately. If you're waiting for something, explain what you need and proceed with what you can do now. If you've completed the task, summarize your work and finish.`;
|
|
832
|
+
try {
|
|
833
|
+
fs.writeFileSync(interventionPath, strongerPrompt);
|
|
834
|
+
info.stallPhase = 2;
|
|
835
|
+
info.lastActivity = now;
|
|
836
|
+
logger.warn(`[${laneName}] Sent stronger prompt after continue signal failed`);
|
|
837
|
+
|
|
838
|
+
events.emit('recovery.stronger_prompt', { laneName });
|
|
839
|
+
} catch (e) {
|
|
840
|
+
logger.error(`Failed to write intervention file for ${laneName}: ${e}`);
|
|
841
|
+
}
|
|
842
|
+
} else if (analysis.action === RecoveryAction.KILL_AND_RESTART ||
|
|
843
|
+
analysis.action === RecoveryAction.RESTART_LANE ||
|
|
844
|
+
analysis.action === RecoveryAction.RESTART_LANE_FROM_CHECKPOINT) {
|
|
845
|
+
lane.restartCount = (lane.restartCount || 0) + 1;
|
|
846
|
+
info.stallPhase = 3;
|
|
847
|
+
|
|
848
|
+
// Try to get checkpoint info
|
|
849
|
+
const checkpoint = getLatestCheckpoint(laneRunDirs[laneName]!);
|
|
850
|
+
if (checkpoint) {
|
|
851
|
+
logger.info(`[${laneName}] Checkpoint available: ${checkpoint.id} (task ${checkpoint.taskIndex})`);
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
// Kill the process
|
|
855
|
+
try {
|
|
856
|
+
info.child.kill('SIGKILL');
|
|
857
|
+
} catch {
|
|
858
|
+
// Process might already be dead
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
logger.warn(`[${laneName}] Killing and restarting lane (restart #${lane.restartCount})`);
|
|
862
|
+
|
|
863
|
+
events.emit('recovery.restart', {
|
|
864
|
+
laneName,
|
|
865
|
+
restartCount: lane.restartCount,
|
|
866
|
+
maxRestarts: stallConfig.maxRestarts,
|
|
867
|
+
});
|
|
868
|
+
} else if (analysis.action === RecoveryAction.RUN_DOCTOR) {
|
|
869
|
+
info.stallPhase = 4;
|
|
870
|
+
|
|
871
|
+
// Run diagnostics
|
|
872
|
+
logger.error(`[${laneName}] Running diagnostics due to persistent failures...`);
|
|
873
|
+
|
|
874
|
+
// Import health check dynamically to avoid circular dependency
|
|
875
|
+
const { checkAgentHealth, checkAuthHealth } = await import('../utils/health');
|
|
876
|
+
|
|
877
|
+
const [agentHealth, authHealth] = await Promise.all([
|
|
878
|
+
checkAgentHealth(),
|
|
879
|
+
checkAuthHealth(),
|
|
880
|
+
]);
|
|
881
|
+
|
|
882
|
+
const issues: string[] = [];
|
|
883
|
+
if (!agentHealth.ok) issues.push(`Agent: ${agentHealth.message}`);
|
|
884
|
+
if (!authHealth.ok) issues.push(`Auth: ${authHealth.message}`);
|
|
885
|
+
|
|
886
|
+
if (issues.length > 0) {
|
|
887
|
+
logger.error(`[${laneName}] Diagnostic issues found:\n ${issues.join('\n ')}`);
|
|
888
|
+
} else {
|
|
889
|
+
logger.warn(`[${laneName}] No obvious issues found. The problem may be with the AI model or network.`);
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
// Save diagnostic to file
|
|
893
|
+
const diagnosticPath = safeJoin(laneRunDirs[laneName]!, 'diagnostic.json');
|
|
894
|
+
fs.writeFileSync(diagnosticPath, JSON.stringify({
|
|
895
|
+
timestamp: Date.now(),
|
|
896
|
+
agentHealthy: agentHealth.ok,
|
|
897
|
+
authHealthy: authHealth.ok,
|
|
898
|
+
issues,
|
|
899
|
+
analysis,
|
|
900
|
+
}, null, 2));
|
|
901
|
+
|
|
902
|
+
// Kill the process
|
|
903
|
+
try {
|
|
904
|
+
info.child.kill('SIGKILL');
|
|
905
|
+
} catch {
|
|
906
|
+
// Process might already be dead
|
|
907
|
+
}
|
|
908
|
+
|
|
909
|
+
logger.error(`[${laneName}] Aborting lane after diagnostic. Check ${diagnosticPath} for details.`);
|
|
910
|
+
|
|
911
|
+
// Save POF for failed recovery
|
|
912
|
+
const recoveryState = autoRecoveryManager.getState(laneName);
|
|
913
|
+
if (recoveryState) {
|
|
914
|
+
try {
|
|
915
|
+
const laneStatePath = safeJoin(laneRunDirs[laneName]!, 'state.json');
|
|
916
|
+
const laneState = loadState<LaneState>(laneStatePath);
|
|
917
|
+
const pofDir = safeJoin(runRoot, '..', '..', 'pof');
|
|
918
|
+
const diagnosticInfo = {
|
|
919
|
+
timestamp: Date.now(),
|
|
920
|
+
agentHealthy: agentHealth.ok,
|
|
921
|
+
authHealthy: authHealth.ok,
|
|
922
|
+
systemHealthy: true,
|
|
923
|
+
suggestedAction: issues.length > 0 ? 'Fix the issues above and retry' : 'Try with a different model',
|
|
924
|
+
details: issues.join('\n') || 'No obvious issues found',
|
|
925
|
+
};
|
|
926
|
+
const pofEntry = createPOFFromRecoveryState(
|
|
927
|
+
runId,
|
|
928
|
+
runRoot,
|
|
929
|
+
laneName,
|
|
930
|
+
recoveryState,
|
|
931
|
+
laneState,
|
|
932
|
+
diagnosticInfo
|
|
933
|
+
);
|
|
934
|
+
savePOF(runId, pofDir, pofEntry);
|
|
935
|
+
} catch (pofError: any) {
|
|
936
|
+
logger.warn(`[${laneName}] Failed to save POF: ${pofError.message}`);
|
|
937
|
+
}
|
|
938
|
+
}
|
|
939
|
+
|
|
940
|
+
events.emit('recovery.diagnosed', {
|
|
941
|
+
laneName,
|
|
942
|
+
diagnostic: { agentHealthy: agentHealth.ok, authHealthy: authHealth.ok, issues },
|
|
943
|
+
});
|
|
944
|
+
} else if (analysis.action === RecoveryAction.ABORT_LANE) {
|
|
945
|
+
info.stallPhase = 5;
|
|
946
|
+
|
|
947
|
+
try {
|
|
948
|
+
info.child.kill('SIGKILL');
|
|
949
|
+
} catch {
|
|
950
|
+
// Process might already be dead
|
|
951
|
+
}
|
|
952
|
+
|
|
953
|
+
logger.error(`[${laneName}] Aborting lane due to repeated stalls`);
|
|
954
|
+
|
|
955
|
+
// Save POF for failed recovery
|
|
956
|
+
const recoveryState = autoRecoveryManager.getState(laneName);
|
|
957
|
+
if (recoveryState) {
|
|
958
|
+
try {
|
|
959
|
+
const laneStatePath = safeJoin(laneRunDirs[laneName]!, 'state.json');
|
|
960
|
+
const laneState = loadState<LaneState>(laneStatePath);
|
|
961
|
+
const pofDir = safeJoin(runRoot, '..', '..', 'pof');
|
|
962
|
+
const pofEntry = createPOFFromRecoveryState(
|
|
963
|
+
runId,
|
|
964
|
+
runRoot,
|
|
965
|
+
laneName,
|
|
966
|
+
recoveryState,
|
|
967
|
+
laneState,
|
|
968
|
+
recoveryState.diagnosticInfo
|
|
969
|
+
);
|
|
970
|
+
savePOF(runId, pofDir, pofEntry);
|
|
971
|
+
} catch (pofError: any) {
|
|
972
|
+
logger.warn(`[${laneName}] Failed to save POF: ${pofError.message}`);
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
} else if (analysis.action === RecoveryAction.SEND_GIT_GUIDANCE) {
|
|
976
|
+
// Send guidance message to agent for git issues
|
|
977
|
+
const interventionPath = safeJoin(laneRunDirs[laneName]!, 'intervention.txt');
|
|
978
|
+
|
|
979
|
+
// Determine which guidance to send based on the failure type
|
|
980
|
+
let guidance: string;
|
|
981
|
+
if (analysis.type === FailureType.GIT_PUSH_REJECTED) {
|
|
982
|
+
guidance = getGitPushFailureGuidance();
|
|
983
|
+
} else if (analysis.type === FailureType.MERGE_CONFLICT) {
|
|
984
|
+
guidance = getMergeConflictGuidance();
|
|
985
|
+
} else {
|
|
986
|
+
guidance = getGitErrorGuidance(analysis.message);
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
try {
|
|
990
|
+
fs.writeFileSync(interventionPath, guidance);
|
|
991
|
+
info.lastActivity = now;
|
|
992
|
+
logger.info(`[${laneName}] Sent git issue guidance to agent`);
|
|
993
|
+
} catch (e: any) {
|
|
994
|
+
logger.error(`[${laneName}] Failed to send guidance: ${e.message}`);
|
|
995
|
+
}
|
|
996
|
+
}
|
|
997
|
+
}
|
|
998
|
+
}
|
|
999
|
+
continue;
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
const finished = result;
|
|
1003
|
+
const info = running.get(finished.name)!;
|
|
1004
|
+
running.delete(finished.name);
|
|
1005
|
+
exitCodes[finished.name] = finished.code;
|
|
1006
|
+
|
|
1007
|
+
// Unregister from auto-recovery manager
|
|
1008
|
+
autoRecoveryManager.unregisterLane(finished.name);
|
|
1009
|
+
|
|
1010
|
+
if (finished.code === 0) {
|
|
1011
|
+
completedLanes.add(finished.name);
|
|
1012
|
+
events.emit('lane.completed', {
|
|
1013
|
+
laneName: finished.name,
|
|
1014
|
+
exitCode: finished.code,
|
|
1015
|
+
});
|
|
1016
|
+
} else if (finished.code === 2) {
|
|
1017
|
+
// Blocked by dependency
|
|
1018
|
+
const statePath = safeJoin(laneRunDirs[finished.name]!, 'state.json');
|
|
1019
|
+
const state = loadState<LaneState>(statePath);
|
|
1020
|
+
|
|
1021
|
+
if (state && state.dependencyRequest) {
|
|
1022
|
+
blockedLanes.set(finished.name, state.dependencyRequest);
|
|
1023
|
+
const lane = lanes.find(l => l.name === finished.name);
|
|
1024
|
+
if (lane) {
|
|
1025
|
+
lane.startIndex = Math.max(0, state.currentTaskIndex - 1); // Task was blocked, retry it
|
|
1026
|
+
}
|
|
1027
|
+
|
|
1028
|
+
events.emit('lane.blocked', {
|
|
1029
|
+
laneName: finished.name,
|
|
1030
|
+
dependencyRequest: state.dependencyRequest,
|
|
1031
|
+
});
|
|
1032
|
+
logger.warn(`Lane ${finished.name} is blocked on dependency change request`);
|
|
1033
|
+
} else {
|
|
1034
|
+
failedLanes.add(finished.name);
|
|
1035
|
+
logger.error(`Lane ${finished.name} exited with code 2 but no dependency request found`);
|
|
1036
|
+
}
|
|
1037
|
+
} else {
|
|
1038
|
+
// Check if it was a restart request
|
|
1039
|
+
if (info.stallPhase === 2) {
|
|
1040
|
+
logger.info(`🔄 Lane ${finished.name} is being restarted due to stall...`);
|
|
1041
|
+
|
|
1042
|
+
// Update startIndex from current state to resume from the same task
|
|
1043
|
+
const statePath = safeJoin(laneRunDirs[finished.name]!, 'state.json');
|
|
1044
|
+
const state = loadState<LaneState>(statePath);
|
|
1045
|
+
if (state) {
|
|
1046
|
+
const lane = lanes.find(l => l.name === finished.name);
|
|
1047
|
+
if (lane) {
|
|
1048
|
+
lane.startIndex = state.currentTaskIndex;
|
|
1049
|
+
}
|
|
1050
|
+
}
|
|
1051
|
+
|
|
1052
|
+
// Note: we don't add to failedLanes or completedLanes,
|
|
1053
|
+
// so it will be eligible to start again in the next iteration.
|
|
1054
|
+
continue;
|
|
1055
|
+
}
|
|
1056
|
+
|
|
1057
|
+
failedLanes.add(finished.name);
|
|
1058
|
+
events.emit('lane.failed', {
|
|
1059
|
+
laneName: finished.name,
|
|
1060
|
+
exitCode: finished.code,
|
|
1061
|
+
error: info.stallPhase === 3 ? 'Stopped due to repeated stall' : 'Process exited with non-zero code',
|
|
1062
|
+
});
|
|
1063
|
+
}
|
|
1064
|
+
|
|
1065
|
+
printLaneStatus(lanes, laneRunDirs);
|
|
1066
|
+
} else {
|
|
1067
|
+
// Nothing running. Are we blocked?
|
|
1068
|
+
if (blockedLanes.size > 0 && autoResolve) {
|
|
1069
|
+
logger.section('🛠 Auto-Resolving Dependencies');
|
|
1070
|
+
|
|
1071
|
+
try {
|
|
1072
|
+
await resolveAllDependencies(blockedLanes, lanes, laneRunDirs, pipelineBranch, runRoot);
|
|
1073
|
+
|
|
1074
|
+
// Clear blocked status
|
|
1075
|
+
blockedLanes.clear();
|
|
1076
|
+
logger.success('Dependencies resolved and synced across all active lanes. Resuming...');
|
|
1077
|
+
} catch (error: any) {
|
|
1078
|
+
logger.error(`Auto-resolution failed: ${error.message}`);
|
|
1079
|
+
// Move blocked to failed
|
|
1080
|
+
for (const name of blockedLanes.keys()) {
|
|
1081
|
+
failedLanes.add(name);
|
|
1082
|
+
}
|
|
1083
|
+
blockedLanes.clear();
|
|
1084
|
+
}
|
|
1085
|
+
} else if (readyToStart.length === 0 && completedLanes.size + failedLanes.size + blockedLanes.size < lanes.length) {
|
|
1086
|
+
const remaining = lanes.filter(l => !completedLanes.has(l.name) && !failedLanes.has(l.name) && !blockedLanes.has(l.name));
|
|
1087
|
+
logger.error(`Deadlock detected! Remaining lanes cannot start: ${remaining.map(l => l.name).join(', ')}`);
|
|
1088
|
+
for (const l of remaining) {
|
|
1089
|
+
failedLanes.add(l.name);
|
|
1090
|
+
exitCodes[l.name] = 1;
|
|
1091
|
+
}
|
|
1092
|
+
} else {
|
|
1093
|
+
// All finished
|
|
1094
|
+
break;
|
|
1095
|
+
}
|
|
1096
|
+
}
|
|
1097
|
+
}
|
|
1098
|
+
|
|
1099
|
+
clearInterval(monitorInterval);
|
|
1100
|
+
printLaneStatus(lanes, laneRunDirs);
|
|
1101
|
+
|
|
1102
|
+
// Check for failures
|
|
1103
|
+
const failed = Object.entries(exitCodes).filter(([, code]) => code !== 0 && code !== 2);
|
|
1104
|
+
|
|
1105
|
+
if (failed.length > 0) {
|
|
1106
|
+
logger.error(`Lanes failed: ${failed.map(([l, c]) => `${l}(${c})`).join(', ')}`);
|
|
1107
|
+
process.exit(1);
|
|
1108
|
+
}
|
|
1109
|
+
|
|
1110
|
+
// Check for blocked lanes (if autoResolve was false)
|
|
1111
|
+
const blocked = Array.from(blockedLanes.keys());
|
|
1112
|
+
|
|
1113
|
+
if (blocked.length > 0) {
|
|
1114
|
+
logger.warn(`Lanes blocked on dependency: ${blocked.join(', ')}`);
|
|
1115
|
+
logger.info('Handle dependency changes manually and resume lanes');
|
|
1116
|
+
events.emit('orchestration.failed', {
|
|
1117
|
+
error: 'Some lanes blocked on dependency change requests',
|
|
1118
|
+
blockedLanes: blocked,
|
|
1119
|
+
});
|
|
1120
|
+
process.exit(2);
|
|
1121
|
+
}
|
|
1122
|
+
|
|
1123
|
+
logger.success('All lanes completed successfully!');
|
|
1124
|
+
events.emit('orchestration.completed', {
|
|
1125
|
+
runId,
|
|
1126
|
+
laneCount: lanes.length,
|
|
1127
|
+
completedCount: completedLanes.size,
|
|
1128
|
+
failedCount: failedLanes.size,
|
|
1129
|
+
});
|
|
1130
|
+
return { lanes, exitCodes, runRoot };
|
|
1131
|
+
}
|