@nbardy/oompa 0.7.2 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -4
- package/agentnet/src/agentnet/agent.clj +9 -8
- package/agentnet/src/agentnet/cli.clj +268 -77
- package/agentnet/src/agentnet/cli.clj.bak +1384 -0
- package/agentnet/src/agentnet/core.clj +17 -2
- package/agentnet/src/agentnet/harness.clj +69 -36
- package/agentnet/src/agentnet/runs.clj +5 -3
- package/agentnet/src/agentnet/tasks.clj +6 -0
- package/agentnet/src/agentnet/worker.clj +676 -226
- package/bin/oompa.js +5 -1
- package/config/prompts/_task_header.md +2 -2
- package/oompa.example.json +4 -4
- package/package.json +5 -3
- package/scripts/README.md +6 -0
- package/scripts/__pycache__/stream_bridge.cpython-314.pyc +0 -0
- package/scripts/copy-repo-code.sh +110 -0
- package/scripts/install-babashka.js +97 -0
- package/scripts/test-harness-resume.sh +229 -0
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
[babashka.process :as process]
|
|
22
22
|
[clojure.java.io :as io]
|
|
23
23
|
[clojure.set]
|
|
24
|
+
[clojure.pprint :refer [print-table]]
|
|
24
25
|
[clojure.string :as str]))
|
|
25
26
|
|
|
26
27
|
;; =============================================================================
|
|
@@ -40,6 +41,13 @@
|
|
|
40
41
|
(def ^:private shutdown-requested? (atom false))
|
|
41
42
|
|
|
42
43
|
(declare task-root-for-cwd)
|
|
44
|
+
(declare verify-mergeable?)
|
|
45
|
+
|
|
46
|
+
(defn- log-ts
|
|
47
|
+
"Readable wall-clock timestamp for worker log lines."
|
|
48
|
+
[]
|
|
49
|
+
(.format (java.time.format.DateTimeFormatter/ofPattern "yyyy-MM-dd HH:mm:ss")
|
|
50
|
+
(java.time.LocalDateTime/now)))
|
|
43
51
|
|
|
44
52
|
(defn- load-prompt
|
|
45
53
|
"Load a prompt file. Tries path as-is first, then from package root."
|
|
@@ -47,6 +55,11 @@
|
|
|
47
55
|
(or (agent/load-custom-prompt path)
|
|
48
56
|
(agent/load-custom-prompt (str package-root "/" path))))
|
|
49
57
|
|
|
58
|
+
(defn- snippet
|
|
59
|
+
[s limit]
|
|
60
|
+
(let [s (or s "")]
|
|
61
|
+
(subs s 0 (min limit (count s)))))
|
|
62
|
+
|
|
50
63
|
(defn- build-template-tokens
|
|
51
64
|
"Build token map for prompt template {var} substitution.
|
|
52
65
|
Merges core/build-context (rich YAML header, queue, hotspots, etc.)
|
|
@@ -90,6 +103,7 @@
|
|
|
90
103
|
(str/replace "{TASKS_ROOT}" task-root))))
|
|
91
104
|
|
|
92
105
|
(def ^:private default-max-working-resumes 5)
|
|
106
|
+
(def ^:private default-max-needs-followups 1)
|
|
93
107
|
(def ^:private default-max-wait-for-tasks 600)
|
|
94
108
|
|
|
95
109
|
(defn create-worker
|
|
@@ -100,10 +114,11 @@
|
|
|
100
114
|
:review-prompts paths to reviewer prompt files (loaded and concatenated for review).
|
|
101
115
|
:wait-between seconds to sleep between cycles (nil or 0 = no wait).
|
|
102
116
|
:max-wait-for-tasks max seconds a non-planner waits for tasks before giving up (default 600).
|
|
103
|
-
:max-working-resumes max consecutive working resumes before nudge+kill (default 5).
|
|
117
|
+
:max-working-resumes max consecutive working resumes before nudge+kill (default 5).
|
|
118
|
+
:max-needs-followups max NEEDS_FOLLOWUP continuations in one cycle (default 1)."
|
|
104
119
|
[{:keys [id swarm-id harness model runs max-cycles iterations prompts can-plan reasoning
|
|
105
120
|
reviewers wait-between
|
|
106
|
-
max-working-resumes max-wait-for-tasks]}]
|
|
121
|
+
max-working-resumes max-needs-followups max-wait-for-tasks]}]
|
|
107
122
|
(let [cycle-cap (or max-cycles iterations runs 10)
|
|
108
123
|
run-goal (or runs iterations 10)]
|
|
109
124
|
{:id id
|
|
@@ -127,6 +142,7 @@
|
|
|
127
142
|
default-max-wait-for-tasks))
|
|
128
143
|
:reviewers reviewers
|
|
129
144
|
:max-working-resumes (or max-working-resumes default-max-working-resumes)
|
|
145
|
+
:max-needs-followups (or max-needs-followups default-max-needs-followups)
|
|
130
146
|
:completed 0
|
|
131
147
|
:status :idle}))
|
|
132
148
|
|
|
@@ -145,7 +161,9 @@
|
|
|
145
161
|
"1. If you have meaningful changes: commit them and signal COMPLETE_AND_READY_FOR_MERGE\n"
|
|
146
162
|
"2. If scope is too large: create follow-up tasks in tasks/pending/ for remaining work,\n"
|
|
147
163
|
" commit what you have (even partial notes/design docs), and signal COMPLETE_AND_READY_FOR_MERGE\n"
|
|
148
|
-
"3. If you
|
|
164
|
+
"3. If you truly cannot produce a merge-ready artifact this turn, signal NEEDS_FOLLOWUP\n"
|
|
165
|
+
" and explain the remaining work. The framework will keep your claimed tasks and give you\n"
|
|
166
|
+
" one targeted follow-up prompt. This is not success.\n\n"
|
|
149
167
|
"Do NOT continue working without producing a signal."))
|
|
150
168
|
|
|
151
169
|
(defn- build-context
|
|
@@ -187,11 +205,63 @@
|
|
|
187
205
|
"\n\n"
|
|
188
206
|
(if (seq claimed-ids)
|
|
189
207
|
"Work on your claimed tasks. Signal COMPLETE_AND_READY_FOR_MERGE when done."
|
|
190
|
-
"No claims succeeded. CLAIM different tasks,
|
|
208
|
+
"No claims succeeded. CLAIM different tasks. If you cannot finish a mergeable artifact after trying hard, signal NEEDS_FOLLOWUP with a short explanation."))]
|
|
191
209
|
{:claimed claimed-ids
|
|
192
210
|
:failed failed-ids
|
|
193
211
|
:resume-prompt prompt}))
|
|
194
212
|
|
|
213
|
+
(defn- active-claimed-task-ids
|
|
214
|
+
"Union of tasks claimed earlier in the cycle and tasks moved into current/
|
|
215
|
+
during the latest attempt."
|
|
216
|
+
[claimed-ids mv-claimed-tasks]
|
|
217
|
+
(-> (set claimed-ids)
|
|
218
|
+
(into mv-claimed-tasks)))
|
|
219
|
+
|
|
220
|
+
(defn- recycle-task-id-set!
|
|
221
|
+
"Recycle a set of claimed task IDs from current/ back to pending/.
|
|
222
|
+
Returns a vector of recycled IDs."
|
|
223
|
+
[worker-id task-ids]
|
|
224
|
+
(let [task-ids (set (remove nil? task-ids))
|
|
225
|
+
recycled (when (seq task-ids)
|
|
226
|
+
(tasks/recycle-tasks! task-ids))]
|
|
227
|
+
(when (seq recycled)
|
|
228
|
+
(println (format "[%s] Recycled %d claimed task(s): %s"
|
|
229
|
+
worker-id (count recycled) (str/join ", " recycled))))
|
|
230
|
+
(vec (or recycled []))))
|
|
231
|
+
|
|
232
|
+
(defn- recycle-active-claims!
|
|
233
|
+
"Recycle all claims active in the current cycle."
|
|
234
|
+
[worker-id claimed-ids mv-claimed-tasks]
|
|
235
|
+
(recycle-task-id-set! worker-id (active-claimed-task-ids claimed-ids mv-claimed-tasks)))
|
|
236
|
+
|
|
237
|
+
(defn- build-needs-followup-prompt
|
|
238
|
+
"Prompt injected after NEEDS_FOLLOWUP so the worker keeps ownership and
|
|
239
|
+
closes the loop in the same cycle."
|
|
240
|
+
[claimed-ids output]
|
|
241
|
+
(let [context (build-context)
|
|
242
|
+
explanation (some-> output
|
|
243
|
+
(str/replace #"(?is)^\s*NEEDS_FOLLOWUP\b[\s:.-]*" "")
|
|
244
|
+
str/trim)]
|
|
245
|
+
(str "## NEEDS_FOLLOWUP Follow-up\n\n"
|
|
246
|
+
(if (seq claimed-ids)
|
|
247
|
+
(str "You still own these claimed tasks: "
|
|
248
|
+
(str/join ", " (sort claimed-ids))
|
|
249
|
+
"\n\n")
|
|
250
|
+
"You do not currently own any claimed tasks.\n\n")
|
|
251
|
+
"Continue the SAME cycle and finish a merge-ready artifact.\n"
|
|
252
|
+
"Do not output NEEDS_FOLLOWUP again unless you are still blocked after this follow-up.\n"
|
|
253
|
+
"Prefer the smallest useful diff. If scope is too large, create concrete follow-up tasks in the pending queue and still ship the artifact you have.\n\n"
|
|
254
|
+
(when (seq explanation)
|
|
255
|
+
(str "Your previous explanation:\n"
|
|
256
|
+
explanation
|
|
257
|
+
"\n\n"))
|
|
258
|
+
"Task Status: " (:task_status context) "\n"
|
|
259
|
+
"Remaining Pending:\n"
|
|
260
|
+
(if (str/blank? (:pending_tasks context))
|
|
261
|
+
"(none)"
|
|
262
|
+
(:pending_tasks context))
|
|
263
|
+
"\n\nWhen ready, signal COMPLETE_AND_READY_FOR_MERGE.")))
|
|
264
|
+
|
|
195
265
|
(defn- run-agent!
|
|
196
266
|
"Run agent with prompt, return {:output :done? :merge? :claim-ids :exit :session-id}.
|
|
197
267
|
When resume? is true, continues the existing session with a lighter prompt.
|
|
@@ -245,28 +315,31 @@
|
|
|
245
315
|
tagged-prompt (str "[oompa:" swarm-id* ":" id "] " prompt)
|
|
246
316
|
abs-worktree (.getAbsolutePath (io/file worktree-path))
|
|
247
317
|
|
|
248
|
-
cmd (harness/build-cmd harness
|
|
249
|
-
{:cwd abs-worktree :model model :reasoning reasoning
|
|
250
|
-
:session-id session-id :resume? resume?
|
|
251
|
-
:prompt tagged-prompt :format? true})
|
|
252
|
-
|
|
253
318
|
result (try
|
|
254
|
-
(
|
|
255
|
-
|
|
256
|
-
|
|
319
|
+
(harness/run-command! harness
|
|
320
|
+
{:cwd abs-worktree :model model :reasoning reasoning
|
|
321
|
+
:session-id session-id :resume? resume?
|
|
322
|
+
:prompt tagged-prompt :format? true})
|
|
257
323
|
(catch Exception e
|
|
258
324
|
(println (format "[%s] Agent exception: %s" id (.getMessage e)))
|
|
259
325
|
{:exit -1 :out "" :err (.getMessage e)}))
|
|
260
326
|
|
|
261
|
-
{:keys [output session-id]}
|
|
262
|
-
(harness/parse-output harness (:out result) session-id)
|
|
327
|
+
{:keys [output session-id warning raw-snippet]}
|
|
328
|
+
(harness/parse-output harness (:out result) session-id)
|
|
329
|
+
stderr-snippet (let [stderr (some-> (:err result) str/trim)]
|
|
330
|
+
(when (seq stderr)
|
|
331
|
+
(subs stderr 0 (min 400 (count stderr)))))]
|
|
263
332
|
|
|
264
333
|
{:output output
|
|
265
334
|
:exit (:exit result)
|
|
266
335
|
:done? (agent/done-signal? output)
|
|
267
336
|
:merge? (agent/merge-signal? output)
|
|
337
|
+
:needs-followup? (agent/needs-followup-signal? output)
|
|
268
338
|
:claim-ids (agent/parse-claim-signal output)
|
|
269
|
-
:session-id session-id
|
|
339
|
+
:session-id session-id
|
|
340
|
+
:parse-warning warning
|
|
341
|
+
:raw-snippet raw-snippet
|
|
342
|
+
:stderr-snippet stderr-snippet}))
|
|
270
343
|
|
|
271
344
|
(defn- run-reviewer!
|
|
272
345
|
"Run reviewer on worktree changes.
|
|
@@ -274,7 +347,8 @@
|
|
|
274
347
|
prev-feedback: vector of previous review outputs (for multi-round context).
|
|
275
348
|
Returns {:verdict :approved|:needs-changes|:rejected, :comments [...], :output string}"
|
|
276
349
|
[{:keys [id swarm-id reviewers]} worktree-path prev-feedback]
|
|
277
|
-
(let [
|
|
350
|
+
(let [start-ms (System/currentTimeMillis)
|
|
351
|
+
;; Get actual diff content (not just stat) — truncate to 8000 chars for prompt budget
|
|
278
352
|
diff-result (process/sh ["git" "diff" "main"]
|
|
279
353
|
{:dir worktree-path :out :string :err :string})
|
|
280
354
|
diff-content (let [d (:out diff-result)]
|
|
@@ -320,14 +394,12 @@
|
|
|
320
394
|
"After your verdict line, list every issue as a numbered item with "
|
|
321
395
|
"the file path and what needs to change.\n")
|
|
322
396
|
review-prompt (str "[oompa:" swarm-id* ":" id "] " review-body)
|
|
323
|
-
cmd (harness/build-cmd harness {:cwd abs-wt :model model :prompt review-prompt})
|
|
324
397
|
res (try
|
|
325
|
-
(
|
|
326
|
-
:in (harness/process-stdin harness review-prompt)
|
|
327
|
-
:out :string :err :string})
|
|
398
|
+
(harness/run-command! harness {:cwd abs-wt :model model :prompt review-prompt})
|
|
328
399
|
(catch Exception e
|
|
329
400
|
{:exit -1 :out "" :err (.getMessage e)}))
|
|
330
|
-
|
|
401
|
+
parsed (harness/parse-output harness (:out res) nil)
|
|
402
|
+
output (or (:output parsed) "")
|
|
331
403
|
has-verdict? (or (re-find #"VERDICT:\s*APPROVED" output)
|
|
332
404
|
(re-find #"VERDICT:\s*NEEDS_CHANGES" output)
|
|
333
405
|
(re-find #"VERDICT:\s*REJECTED" output)
|
|
@@ -348,7 +420,8 @@
|
|
|
348
420
|
(re-find #"VERDICT:\s*NEEDS_CHANGES" output) :needs-changes
|
|
349
421
|
(re-find #"VERDICT:\s*REJECTED" output) :needs-changes
|
|
350
422
|
(re-find #"(?i)\bAPPROVED\b" output) :approved
|
|
351
|
-
:else :needs-changes)
|
|
423
|
+
:else :needs-changes)
|
|
424
|
+
duration-ms (- (System/currentTimeMillis) start-ms)]
|
|
352
425
|
|
|
353
426
|
(println (format "[%s] Reviewer verdict: %s" id (name verdict)))
|
|
354
427
|
(let [summary (subs output 0 (min 300 (count output)))]
|
|
@@ -358,14 +431,16 @@
|
|
|
358
431
|
{:verdict verdict
|
|
359
432
|
:comments (when (not= (:exit result) 0)
|
|
360
433
|
[(:err result)])
|
|
361
|
-
:output output
|
|
434
|
+
:output output
|
|
435
|
+
:duration-ms duration-ms}))
|
|
362
436
|
|
|
363
437
|
(defn- run-fix!
|
|
364
438
|
"Ask worker to fix issues based on reviewer feedback.
|
|
365
439
|
all-feedback: vector of all reviewer outputs so far (accumulated across rounds).
|
|
366
440
|
Returns {:output string, :exit int}"
|
|
367
441
|
[{:keys [id swarm-id harness model]} worktree-path all-feedback]
|
|
368
|
-
(let [
|
|
442
|
+
(let [start-ms (System/currentTimeMillis)
|
|
443
|
+
swarm-id* (or swarm-id "unknown")
|
|
369
444
|
feedback-text (if (> (count all-feedback) 1)
|
|
370
445
|
(str "The reviewer has given feedback across " (count all-feedback) " rounds.\n"
|
|
371
446
|
"Fix ALL outstanding issues:\n\n"
|
|
@@ -381,18 +456,17 @@
|
|
|
381
456
|
|
|
382
457
|
abs-wt (.getAbsolutePath (io/file worktree-path))
|
|
383
458
|
|
|
384
|
-
cmd (harness/build-cmd harness
|
|
385
|
-
{:cwd abs-wt :model model :prompt fix-prompt})
|
|
386
|
-
|
|
387
459
|
result (try
|
|
388
|
-
(
|
|
389
|
-
|
|
390
|
-
:out :string :err :string})
|
|
460
|
+
(harness/run-command! harness
|
|
461
|
+
{:cwd abs-wt :model model :prompt fix-prompt})
|
|
391
462
|
(catch Exception e
|
|
392
|
-
{:exit -1 :out "" :err (.getMessage e)}))
|
|
463
|
+
{:exit -1 :out "" :err (.getMessage e)}))
|
|
464
|
+
parsed (harness/parse-output harness (:out result) nil)
|
|
465
|
+
duration-ms (- (System/currentTimeMillis) start-ms)]
|
|
393
466
|
|
|
394
|
-
{:output (:
|
|
395
|
-
:exit (:exit result)
|
|
467
|
+
{:output (:output parsed)
|
|
468
|
+
:exit (:exit result)
|
|
469
|
+
:duration-ms duration-ms}))
|
|
396
470
|
|
|
397
471
|
(defn- collect-divergence-context
|
|
398
472
|
"Collect context about how a worktree branch has diverged from main.
|
|
@@ -406,6 +480,65 @@
|
|
|
406
480
|
:main-log (or main-log "(none)")
|
|
407
481
|
:diff-stat (or diff-stat "(none)")}))
|
|
408
482
|
|
|
483
|
+
(defn- first-nonblank-line
|
|
484
|
+
"Return first non-blank line from text for compact logging."
|
|
485
|
+
[s]
|
|
486
|
+
(some->> (or s "")
|
|
487
|
+
str/split-lines
|
|
488
|
+
(remove str/blank?)
|
|
489
|
+
first))
|
|
490
|
+
|
|
491
|
+
(defn- classify-merge-failure
|
|
492
|
+
"Classify git merge/checkout failure text for better logs."
|
|
493
|
+
[failure-text]
|
|
494
|
+
(cond
|
|
495
|
+
(re-find #"untracked working tree files would be overwritten by merge" (or failure-text ""))
|
|
496
|
+
:untracked-overwrite
|
|
497
|
+
(re-find #"CONFLICT|Merge conflict" (or failure-text ""))
|
|
498
|
+
:conflict
|
|
499
|
+
(re-find #"Your local changes to the following files would be overwritten" (or failure-text ""))
|
|
500
|
+
:local-changes-overwrite
|
|
501
|
+
:else
|
|
502
|
+
:unknown))
|
|
503
|
+
|
|
504
|
+
(defn- run-resolver-agent!
|
|
505
|
+
"Run resolver agent with divergence + failure context.
|
|
506
|
+
Returns :resolved when branch verifies as mergeable, else :failed."
|
|
507
|
+
[worker wt-path worker-id reason-details]
|
|
508
|
+
(println (format "[%s] Branch diverged from main, launching resolver agent%s"
|
|
509
|
+
worker-id
|
|
510
|
+
(if (str/blank? reason-details)
|
|
511
|
+
""
|
|
512
|
+
(str " (" reason-details ")"))))
|
|
513
|
+
(let [{:keys [branch-log main-log diff-stat]} (collect-divergence-context wt-path)
|
|
514
|
+
resolve-prompt (str "[oompa:" (or (:swarm-id worker) "unknown") ":" worker-id "] "
|
|
515
|
+
"Your branch cannot currently be merged safely into main.\n\n"
|
|
516
|
+
(when-not (str/blank? reason-details)
|
|
517
|
+
(str "Failure context from previous merge attempt:\n"
|
|
518
|
+
reason-details "\n\n"))
|
|
519
|
+
"Your branch's commits (not on main):\n" branch-log "\n\n"
|
|
520
|
+
"Commits on main since you branched:\n" main-log "\n\n"
|
|
521
|
+
"Divergence scope:\n" diff-stat "\n\n"
|
|
522
|
+
"Make this branch cleanly mergeable into main. "
|
|
523
|
+
"Preserve the intent of your branch's changes.\n"
|
|
524
|
+
"You have full git access — rebase, cherry-pick, resolve conflicts, "
|
|
525
|
+
"or clean up merge blockers.\n"
|
|
526
|
+
"When done, verify with: git diff main --stat")
|
|
527
|
+
abs-wt (.getAbsolutePath (io/file wt-path))
|
|
528
|
+
result (try
|
|
529
|
+
(harness/run-command! (:harness worker)
|
|
530
|
+
{:cwd abs-wt :model (:model worker) :prompt resolve-prompt})
|
|
531
|
+
(catch Exception e
|
|
532
|
+
{:exit -1 :out "" :err (.getMessage e)}))]
|
|
533
|
+
(if (zero? (:exit result))
|
|
534
|
+
(if (verify-mergeable? wt-path)
|
|
535
|
+
(do (println (format "[%s] Agent resolved divergence, branch is mergeable" worker-id))
|
|
536
|
+
:resolved)
|
|
537
|
+
(do (println (format "[%s] Agent ran but branch still can't merge cleanly" worker-id))
|
|
538
|
+
:failed))
|
|
539
|
+
(do (println (format "[%s] Resolver agent failed (exit %d)" worker-id (:exit result)))
|
|
540
|
+
:failed))))
|
|
541
|
+
|
|
409
542
|
(defn- verify-mergeable?
|
|
410
543
|
"Dry-run merge to verify a worktree branch merges cleanly into main.
|
|
411
544
|
Does NOT leave merge state behind — always cleans up the dry-run.
|
|
@@ -437,36 +570,10 @@
|
|
|
437
570
|
;; Conflict — abort merge to restore clean worktree state, then
|
|
438
571
|
;; hand the problem to the agent with full divergence context.
|
|
439
572
|
(let [_ (process/sh ["git" "merge" "--abort"] {:dir wt-path})
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
"Your branch's commits (not on main):\n" branch-log "\n\n"
|
|
445
|
-
"Commits on main since you branched:\n" main-log "\n\n"
|
|
446
|
-
"Divergence scope:\n" diff-stat "\n\n"
|
|
447
|
-
"Make this branch cleanly mergeable into main. "
|
|
448
|
-
"Preserve the intent of your branch's changes.\n"
|
|
449
|
-
"You have full git access — rebase, cherry-pick, resolve conflicts, "
|
|
450
|
-
"whatever works.\n"
|
|
451
|
-
"When done, verify with: git diff main --stat")
|
|
452
|
-
abs-wt (.getAbsolutePath (io/file wt-path))
|
|
453
|
-
cmd (harness/build-cmd (:harness worker)
|
|
454
|
-
{:cwd abs-wt :model (:model worker) :prompt resolve-prompt})
|
|
455
|
-
result (try
|
|
456
|
-
(process/sh cmd {:dir abs-wt
|
|
457
|
-
:in (harness/process-stdin (:harness worker) resolve-prompt)
|
|
458
|
-
:out :string :err :string})
|
|
459
|
-
(catch Exception e
|
|
460
|
-
{:exit -1 :out "" :err (.getMessage e)}))]
|
|
461
|
-
(if (zero? (:exit result))
|
|
462
|
-
;; Agent ran — verify the branch actually merges cleanly now
|
|
463
|
-
(if (verify-mergeable? wt-path)
|
|
464
|
-
(do (println (format "[%s] Agent resolved divergence, branch is mergeable" worker-id))
|
|
465
|
-
:resolved)
|
|
466
|
-
(do (println (format "[%s] Agent ran but branch still can't merge cleanly" worker-id))
|
|
467
|
-
:failed))
|
|
468
|
-
(do (println (format "[%s] Resolver agent failed (exit %d)" worker-id (:exit result)))
|
|
469
|
-
:failed))))))
|
|
573
|
+
failure-snippet (first-nonblank-line (str (:out merge-result) "\n" (:err merge-result)))]
|
|
574
|
+
(run-resolver-agent! worker wt-path worker-id
|
|
575
|
+
(str "sync_worktree_to_main failed"
|
|
576
|
+
(when failure-snippet (str ": " failure-snippet))))))))
|
|
470
577
|
|
|
471
578
|
(defn- worktree-has-changes?
|
|
472
579
|
"Check if worktree has committed OR uncommitted changes vs main.
|
|
@@ -486,9 +593,11 @@
|
|
|
486
593
|
(defn- create-iteration-worktree!
|
|
487
594
|
"Create a fresh worktree for an iteration. Returns {:dir :branch :path}.
|
|
488
595
|
Force-removes stale worktree+branch from previous failed runs first."
|
|
489
|
-
[project-root worker-id iteration]
|
|
490
|
-
(let [
|
|
491
|
-
|
|
596
|
+
[project-root swarm-id worker-id iteration]
|
|
597
|
+
(let [swarm-token (or swarm-id (subs (str (java.util.UUID/randomUUID)) 0 8))
|
|
598
|
+
work-id (format "s%s-%s-i%d" swarm-token worker-id iteration)
|
|
599
|
+
wt-dir (format ".w%s" work-id)
|
|
600
|
+
wt-branch (format "oompa/%s" work-id)
|
|
492
601
|
wt-path (str project-root "/" wt-dir)]
|
|
493
602
|
;; Clean stale worktree/branch from previous failed runs
|
|
494
603
|
(process/sh ["git" "worktree" "remove" wt-dir "--force"] {:dir project-root})
|
|
@@ -507,39 +616,201 @@
|
|
|
507
616
|
(let [post-ids (tasks/current-task-ids)]
|
|
508
617
|
(clojure.set/difference post-ids pre-current-ids)))
|
|
509
618
|
|
|
619
|
+
(defn- now-ms
|
|
620
|
+
[]
|
|
621
|
+
(System/currentTimeMillis))
|
|
622
|
+
|
|
623
|
+
(defn- ms->seconds
|
|
624
|
+
[ms]
|
|
625
|
+
(/ ms 1000.0))
|
|
626
|
+
|
|
627
|
+
(defn- pct-of
|
|
628
|
+
[part total]
|
|
629
|
+
(if (pos? total)
|
|
630
|
+
(* 100.0 (/ part (double total)))
|
|
631
|
+
0.0))
|
|
632
|
+
|
|
633
|
+
(defn- init-cycle-timing
|
|
634
|
+
[]
|
|
635
|
+
{:implementation-rounds-ms []
|
|
636
|
+
:reviewer-response-ms []
|
|
637
|
+
:review-fixes-ms []
|
|
638
|
+
:optional-review-ms []
|
|
639
|
+
:llm-calls []})
|
|
640
|
+
|
|
641
|
+
(defn- add-llm-call
|
|
642
|
+
[timing section-name call-name duration-ms]
|
|
643
|
+
(let [timing (or timing (init-cycle-timing))
|
|
644
|
+
duration-ms (max 0 (long (or duration-ms 0)))]
|
|
645
|
+
(-> timing
|
|
646
|
+
(update section-name (fnil conj []) duration-ms)
|
|
647
|
+
(update :llm-calls conj {:name call-name
|
|
648
|
+
:section section-name
|
|
649
|
+
:duration-ms duration-ms}))))
|
|
650
|
+
|
|
651
|
+
(defn- cycle-llm-total-ms
|
|
652
|
+
[timing]
|
|
653
|
+
(let [sections [:implementation-rounds-ms :reviewer-response-ms :review-fixes-ms :optional-review-ms]]
|
|
654
|
+
(->> sections
|
|
655
|
+
(map #(reduce + 0 (or (get timing %) [])))
|
|
656
|
+
(reduce + 0))))
|
|
657
|
+
|
|
658
|
+
(defn- with-call-percent
|
|
659
|
+
[timing total-ms]
|
|
660
|
+
(update timing :llm-calls
|
|
661
|
+
(fn [calls]
|
|
662
|
+
(mapv (fn [{:keys [duration-ms] :as call}]
|
|
663
|
+
(assoc call :percent (pct-of duration-ms total-ms)))
|
|
664
|
+
calls))))
|
|
665
|
+
|
|
666
|
+
(defn- format-timing-segment
|
|
667
|
+
[label durations total-ms]
|
|
668
|
+
(let [durations (vec (or durations []))
|
|
669
|
+
items (if (seq durations)
|
|
670
|
+
(str/join ", "
|
|
671
|
+
(map #(format "%.2fs (%.1f%%)"
|
|
672
|
+
(ms->seconds %) (pct-of % total-ms))
|
|
673
|
+
durations))
|
|
674
|
+
"-")
|
|
675
|
+
section-ms (reduce + 0 durations)]
|
|
676
|
+
(format "%s=[%s] %.2fs (%.1f%%)"
|
|
677
|
+
label
|
|
678
|
+
items
|
|
679
|
+
(ms->seconds section-ms)
|
|
680
|
+
(pct-of section-ms total-ms))))
|
|
681
|
+
|
|
682
|
+
(defn- format-cycle-timing
|
|
683
|
+
[{:keys [implementation-rounds-ms reviewer-response-ms review-fixes-ms optional-review-ms]}
|
|
684
|
+
total-ms]
|
|
685
|
+
(let [llm-ms (cycle-llm-total-ms {:implementation-rounds-ms implementation-rounds-ms
|
|
686
|
+
:reviewer-response-ms reviewer-response-ms
|
|
687
|
+
:review-fixes-ms review-fixes-ms
|
|
688
|
+
:optional-review-ms optional-review-ms})
|
|
689
|
+
harness-ms (max 0 (- total-ms llm-ms))]
|
|
690
|
+
(str "timing: "
|
|
691
|
+
(format-timing-segment "Implementation" implementation-rounds-ms total-ms)
|
|
692
|
+
" | "
|
|
693
|
+
(format-timing-segment "Reviewer" reviewer-response-ms total-ms)
|
|
694
|
+
" | "
|
|
695
|
+
(format-timing-segment "Fixes" review-fixes-ms total-ms)
|
|
696
|
+
" | "
|
|
697
|
+
(format-timing-segment "OptionalReview" optional-review-ms total-ms)
|
|
698
|
+
" | LLM="
|
|
699
|
+
(format "%.2fs (%.1f%%)" (ms->seconds llm-ms) (pct-of llm-ms total-ms))
|
|
700
|
+
" | Harness="
|
|
701
|
+
(format "%.2fs (%.1f%%)" (ms->seconds harness-ms) (pct-of harness-ms total-ms))
|
|
702
|
+
" | Total="
|
|
703
|
+
(format "%.2fs" (ms->seconds total-ms)))))
|
|
704
|
+
|
|
705
|
+
(defn- safe-number
|
|
706
|
+
[v]
|
|
707
|
+
(if (number? v) (long v) 0))
|
|
708
|
+
|
|
709
|
+
(defn- safe-sum
|
|
710
|
+
[v]
|
|
711
|
+
(reduce + 0 (or v [])))
|
|
712
|
+
|
|
713
|
+
(defn- format-ms
|
|
714
|
+
[ms]
|
|
715
|
+
(format "%.2fs" (ms->seconds (safe-number ms))))
|
|
716
|
+
|
|
717
|
+
(defn- cycle-time-sum
|
|
718
|
+
[{:keys [implementation-rounds-ms reviewer-response-ms review-fixes-ms optional-review-ms] :as timing-ms}
|
|
719
|
+
duration-ms]
|
|
720
|
+
(let [impl (safe-sum implementation-rounds-ms)
|
|
721
|
+
review (safe-sum reviewer-response-ms)
|
|
722
|
+
fixes (safe-sum review-fixes-ms)
|
|
723
|
+
optional (safe-sum optional-review-ms)
|
|
724
|
+
total (safe-number duration-ms)
|
|
725
|
+
llm (+ impl review fixes optional)
|
|
726
|
+
harness (max 0 (- total llm))]
|
|
727
|
+
{:implementation-ms impl
|
|
728
|
+
:review-ms review
|
|
729
|
+
:fixes-ms fixes
|
|
730
|
+
:optional-review-ms optional
|
|
731
|
+
:llm-ms llm
|
|
732
|
+
:harness-ms harness
|
|
733
|
+
:total-ms total}))
|
|
734
|
+
|
|
735
|
+
(def ^:private empty-cycle-total
|
|
736
|
+
{:implementation-ms 0
|
|
737
|
+
:review-ms 0
|
|
738
|
+
:fixes-ms 0
|
|
739
|
+
:optional-review-ms 0
|
|
740
|
+
:llm-ms 0
|
|
741
|
+
:harness-ms 0
|
|
742
|
+
:total-ms 0})
|
|
743
|
+
|
|
744
|
+
(defn- aggregate-cycle-timings-by-worker
|
|
745
|
+
[swarm-id]
|
|
746
|
+
(reduce (fn [acc {:keys [worker-id timing-ms duration-ms]}]
|
|
747
|
+
(update acc worker-id
|
|
748
|
+
(fn [current]
|
|
749
|
+
(merge-with + (or current empty-cycle-total)
|
|
750
|
+
(cycle-time-sum timing-ms duration-ms)))))
|
|
751
|
+
{}
|
|
752
|
+
(or (when swarm-id (runs/list-cycles swarm-id)) [])))
|
|
753
|
+
|
|
754
|
+
(defn- worker-summary-row
|
|
755
|
+
[{:keys [id status completed cycles-completed merges claims rejections errors recycled review-rounds-total] :as _worker}
|
|
756
|
+
{:keys [implementation-ms review-ms fixes-ms harness-ms total-ms]}]
|
|
757
|
+
{:Worker id
|
|
758
|
+
:Runs (or completed cycles-completed 0)
|
|
759
|
+
:Cycles (or cycles-completed 0)
|
|
760
|
+
:Status (name status)
|
|
761
|
+
:Merges (or merges 0)
|
|
762
|
+
:Claims (or claims 0)
|
|
763
|
+
:Rejects (or rejections 0)
|
|
764
|
+
:Errors (or errors 0)
|
|
765
|
+
:Recycled (or recycled 0)
|
|
766
|
+
:ReviewRounds (or review-rounds-total 0)
|
|
767
|
+
:ImplMs (format-ms implementation-ms)
|
|
768
|
+
:ReviewMs (format-ms review-ms)
|
|
769
|
+
:FixMs (format-ms fixes-ms)
|
|
770
|
+
:HarnessMs (format-ms harness-ms)
|
|
771
|
+
:TotalMs (format-ms total-ms)})
|
|
772
|
+
|
|
510
773
|
(defn- emit-cycle-log!
|
|
511
|
-
"Write cycle event log. Called at every cycle exit point.
|
|
774
|
+
"Write cycle event log. Called at every cycle attempt exit point.
|
|
512
775
|
session-id links to the Claude CLI conversation transcript on disk.
|
|
513
776
|
No mutable summary state — all state is derived from immutable cycle logs."
|
|
514
|
-
[swarm-id worker-id cycle run start-ms session-id
|
|
515
|
-
{:keys [outcome claimed-task-ids recycled-tasks error-snippet review-rounds
|
|
516
|
-
|
|
777
|
+
[swarm-id worker-id cycle attempt run start-ms session-id
|
|
778
|
+
{:keys [outcome claimed-task-ids recycled-tasks error-snippet review-rounds timing-ms
|
|
779
|
+
worktree-path signals]}]
|
|
780
|
+
(let [duration-ms (- (now-ms) start-ms)
|
|
781
|
+
timing-ms (or timing-ms (init-cycle-timing))
|
|
782
|
+
harness-ms (max 0 (- duration-ms (cycle-llm-total-ms timing-ms)))
|
|
783
|
+
timing-ms (with-call-percent (assoc timing-ms
|
|
784
|
+
:harness-ms harness-ms
|
|
785
|
+
:llm-calls (or (:llm-calls timing-ms) []))
|
|
786
|
+
duration-ms)]
|
|
517
787
|
(runs/write-cycle-log!
|
|
518
788
|
swarm-id worker-id cycle
|
|
519
|
-
{:run run
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
789
|
+
(cond-> {:run run
|
|
790
|
+
:attempt attempt
|
|
791
|
+
:outcome outcome
|
|
792
|
+
:duration-ms duration-ms
|
|
793
|
+
:claimed-task-ids (vec (or claimed-task-ids []))
|
|
794
|
+
:recycled-tasks (or recycled-tasks [])
|
|
795
|
+
:error-snippet error-snippet
|
|
796
|
+
:review-rounds (or review-rounds 0)
|
|
797
|
+
:session-id session-id
|
|
798
|
+
:timing-ms timing-ms}
|
|
799
|
+
worktree-path (assoc :worktree-path worktree-path)
|
|
800
|
+
(seq signals) (assoc :signals (vec signals))))
|
|
801
|
+
(let [terminal-outcomes #{:merged :merge-failed :rejected :sync-failed :no-changes
|
|
802
|
+
:executor-done :stuck :error :interrupted :needs-followup}]
|
|
803
|
+
(if (and outcome (contains? terminal-outcomes outcome))
|
|
804
|
+
(do
|
|
805
|
+
(println (format "[%s] %s" worker-id (format-cycle-timing timing-ms duration-ms)))
|
|
806
|
+
(when worktree-path
|
|
807
|
+
(println (format "[%s] worktree: %s" worker-id worktree-path)))
|
|
808
|
+
(when (seq signals)
|
|
809
|
+
(println (format "[%s] signals: %s" worker-id (str/join " → " signals)))))
|
|
810
|
+
(println (format "[%s] Cycle %d attempt %d continuing"
|
|
811
|
+
worker-id cycle attempt))))))
|
|
812
|
+
|
|
813
|
+
|
|
543
814
|
|
|
544
815
|
(defn- cleanup-worktree!
|
|
545
816
|
"Remove worktree and branch."
|
|
@@ -580,7 +851,8 @@
|
|
|
580
851
|
(defn- merge-to-main!
|
|
581
852
|
"Merge worktree changes to main branch. Serialized via merge-lock to prevent
|
|
582
853
|
concurrent workers from corrupting the git index. On success, moves claimed
|
|
583
|
-
tasks current→complete and annotates metadata. Returns
|
|
854
|
+
tasks current→complete and annotates metadata. Returns
|
|
855
|
+
{:ok? bool :reason keyword :message string}.
|
|
584
856
|
claimed-task-ids: set of task IDs this worker claimed (framework owns completion)."
|
|
585
857
|
[wt-path wt-id worker-id project-root review-rounds claimed-task-ids]
|
|
586
858
|
(locking merge-lock
|
|
@@ -599,31 +871,66 @@
|
|
|
599
871
|
(process/sh ["git" "merge" wt-id "--no-edit"]
|
|
600
872
|
{:dir project-root :out :string :err :string}))
|
|
601
873
|
success (and (zero? (:exit checkout-result))
|
|
602
|
-
(zero? (:exit merge-result)))
|
|
874
|
+
(zero? (:exit merge-result)))
|
|
875
|
+
failure-text (str/join "\n"
|
|
876
|
+
(remove str/blank?
|
|
877
|
+
[(:out checkout-result)
|
|
878
|
+
(:err checkout-result)
|
|
879
|
+
(when merge-result (:out merge-result))
|
|
880
|
+
(when merge-result (:err merge-result))]))
|
|
881
|
+
failure-reason (if (not (zero? (:exit checkout-result)))
|
|
882
|
+
:checkout-failed
|
|
883
|
+
(classify-merge-failure failure-text))]
|
|
603
884
|
(if success
|
|
604
|
-
(
|
|
885
|
+
(let [completed (when (seq claimed-task-ids)
|
|
886
|
+
(tasks/complete-by-ids! claimed-task-ids))
|
|
887
|
+
completed-count (count (or completed []))]
|
|
605
888
|
(println (format "[%s] Merge successful" worker-id))
|
|
606
889
|
;; Framework-owned completion: move claimed tasks current→complete
|
|
607
|
-
(when (seq
|
|
608
|
-
(
|
|
609
|
-
|
|
610
|
-
(println (format "[%s] Completed %d task(s): %s"
|
|
611
|
-
worker-id (count completed) (str/join ", " completed))))))
|
|
890
|
+
(when (seq completed)
|
|
891
|
+
(println (format "[%s] Completed %d task(s): %s"
|
|
892
|
+
worker-id completed-count (str/join ", " completed))))
|
|
612
893
|
;; Annotate completed tasks with metadata while still holding merge-lock
|
|
613
|
-
(annotate-completed-tasks! project-root worker-id review-rounds)
|
|
894
|
+
(annotate-completed-tasks! project-root worker-id review-rounds)
|
|
895
|
+
{:ok? true
|
|
896
|
+
:reason :merged
|
|
897
|
+
:message "merge successful"
|
|
898
|
+
:completed-count completed-count})
|
|
614
899
|
;; FAILED: Clean up git state before releasing merge-lock.
|
|
615
900
|
;; Without this, a conflict leaves .git/MERGE_HEAD and poisons the
|
|
616
901
|
;; shared index — every subsequent worker fails on `git checkout main`.
|
|
617
902
|
(do
|
|
618
|
-
(println (format "[%s] MERGE FAILED: %s"
|
|
619
|
-
|
|
903
|
+
(println (format "[%s] MERGE FAILED (%s): %s"
|
|
904
|
+
worker-id
|
|
905
|
+
(name failure-reason)
|
|
906
|
+
(or (first-nonblank-line failure-text)
|
|
907
|
+
"no output")))
|
|
620
908
|
(let [abort-result (process/sh ["git" "merge" "--abort"]
|
|
621
909
|
{:dir project-root :out :string :err :string})]
|
|
622
910
|
(when-not (zero? (:exit abort-result))
|
|
623
911
|
;; Abort failed (no merge in progress, or other issue) — hard reset.
|
|
624
912
|
(process/sh ["git" "reset" "--hard" "HEAD"]
|
|
625
|
-
{:dir project-root :out :string :err :string})))
|
|
626
|
-
|
|
913
|
+
{:dir project-root :out :string :err :string})))
|
|
914
|
+
{:ok? false
|
|
915
|
+
:reason failure-reason
|
|
916
|
+
:message (or (first-nonblank-line failure-text) "merge failed")})))))
|
|
917
|
+
|
|
918
|
+
(defn- recover-merge-failure!
|
|
919
|
+
"On merge-to-main failure, launch resolver agent and retry merge once.
|
|
920
|
+
Must run outside merge-lock to avoid blocking other workers."
|
|
921
|
+
[worker wt-path wt-id worker-id project-root review-rounds claimed-task-ids merge-result]
|
|
922
|
+
(let [reason (:reason merge-result)
|
|
923
|
+
msg (:message merge-result)
|
|
924
|
+
_ (println (format "[%s] Launching resolver after merge failure (%s): %s"
|
|
925
|
+
worker-id (name (or reason :unknown)) (or msg "merge failed")))
|
|
926
|
+
resolve-status (run-resolver-agent! worker wt-path worker-id
|
|
927
|
+
(str "merge_to_main failed (" (name (or reason :unknown)) ")"
|
|
928
|
+
(when msg (str ": " msg))))]
|
|
929
|
+
(if (= :failed resolve-status)
|
|
930
|
+
merge-result
|
|
931
|
+
(do
|
|
932
|
+
(println (format "[%s] Retrying merge after resolver" worker-id))
|
|
933
|
+
(merge-to-main! wt-path wt-id worker-id project-root review-rounds claimed-task-ids)))))
|
|
627
934
|
|
|
628
935
|
(defn- task-only-diff?
|
|
629
936
|
"Check if all changes in worktree are task files only (no code changes).
|
|
@@ -653,16 +960,21 @@
|
|
|
653
960
|
and fixer has full context of all prior feedback.
|
|
654
961
|
Writes review logs to runs/{swarm-id}/reviews/ for post-mortem analysis.
|
|
655
962
|
Returns {:approved? bool, :attempts int}"
|
|
656
|
-
[worker wt-path worker-id iteration]
|
|
963
|
+
[worker wt-path worker-id iteration & [cycle-timing]]
|
|
657
964
|
(if (empty? (:reviewers worker))
|
|
658
965
|
;; No reviewer configured, auto-approve
|
|
659
|
-
{:approved? true :attempts 0}
|
|
966
|
+
{:approved? true :attempts 0 :timing (or cycle-timing (init-cycle-timing))}
|
|
660
967
|
|
|
661
968
|
;; Run review loop with accumulated feedback
|
|
662
969
|
(loop [attempt 1
|
|
663
|
-
prev-feedback []
|
|
970
|
+
prev-feedback []
|
|
971
|
+
timing (or cycle-timing (init-cycle-timing))]
|
|
664
972
|
(println (format "[%s] Review attempt %d/%d" worker-id attempt max-review-retries))
|
|
665
|
-
(let [{:keys [verdict output]} (run-reviewer! worker wt-path prev-feedback)
|
|
973
|
+
(let [{:keys [verdict output duration-ms]} (run-reviewer! worker wt-path prev-feedback)
|
|
974
|
+
timing (add-llm-call timing
|
|
975
|
+
:reviewer-response-ms
|
|
976
|
+
(str "review_" attempt)
|
|
977
|
+
(or duration-ms 0))
|
|
666
978
|
diff-files (diff-file-names wt-path)]
|
|
667
979
|
|
|
668
980
|
;; Persist review log for this round
|
|
@@ -670,13 +982,14 @@
|
|
|
670
982
|
(runs/write-review-log! (:swarm-id worker) worker-id iteration attempt
|
|
671
983
|
{:verdict verdict
|
|
672
984
|
:output output
|
|
985
|
+
:duration-ms (or duration-ms 0)
|
|
673
986
|
:diff-files (or diff-files [])}))
|
|
674
987
|
|
|
675
988
|
(case verdict
|
|
676
989
|
:approved
|
|
677
990
|
(do
|
|
678
991
|
(println (format "[%s] Reviewer APPROVED (attempt %d)" worker-id attempt))
|
|
679
|
-
{:approved? true :attempts attempt})
|
|
992
|
+
{:approved? true :attempts attempt :timing timing})
|
|
680
993
|
|
|
681
994
|
;; :needs-changes — always give the worker a chance to fix.
|
|
682
995
|
;; Hard rejection only happens when max review rounds are exhausted.
|
|
@@ -684,11 +997,15 @@
|
|
|
684
997
|
(if (>= attempt max-review-retries)
|
|
685
998
|
(do
|
|
686
999
|
(println (format "[%s] Max review retries reached (%d rounds)" worker-id attempt))
|
|
687
|
-
{:approved? false :attempts attempt})
|
|
1000
|
+
{:approved? false :attempts attempt :timing timing})
|
|
688
1001
|
(do
|
|
689
1002
|
(println (format "[%s] Reviewer requested changes, fixing..." worker-id))
|
|
690
|
-
(run-fix! worker wt-path all-feedback)
|
|
691
|
-
|
|
1003
|
+
(let [{:keys [duration-ms]} (run-fix! worker wt-path all-feedback)
|
|
1004
|
+
timing (add-llm-call timing
|
|
1005
|
+
:review-fixes-ms
|
|
1006
|
+
(str "fix_" attempt)
|
|
1007
|
+
(or duration-ms 0))]
|
|
1008
|
+
(recur (inc attempt) all-feedback timing))))))))))
|
|
692
1009
|
|
|
693
1010
|
;; =============================================================================
|
|
694
1011
|
;; Worker Loop
|
|
@@ -716,11 +1033,13 @@
|
|
|
716
1033
|
(pos? (tasks/pending-count)) true
|
|
717
1034
|
(pos? (tasks/current-count)) true
|
|
718
1035
|
(>= waited max-wait-seconds)
|
|
719
|
-
(do (println (format "[%s] No tasks after %ds, giving up"
|
|
1036
|
+
(do (println (format "[%s] [%s] No tasks after %ds, giving up"
|
|
1037
|
+
worker-id (log-ts) waited))
|
|
720
1038
|
false)
|
|
721
1039
|
:else
|
|
722
1040
|
(do (when (zero? (mod waited 60))
|
|
723
|
-
(println (format "[%s] Waiting for tasks... (%ds/%ds)"
|
|
1041
|
+
(println (format "[%s] [%s] Waiting for tasks... (%ds/%ds)"
|
|
1042
|
+
worker-id (log-ts) waited max-wait-seconds)))
|
|
724
1043
|
(Thread/sleep (* wait-poll-interval 1000))
|
|
725
1044
|
(recur (+ waited wait-poll-interval))))))
|
|
726
1045
|
|
|
@@ -740,7 +1059,8 @@
|
|
|
740
1059
|
Cycle cap is controlled by :max-cycles (legacy key: :iterations)."
|
|
741
1060
|
[worker]
|
|
742
1061
|
(tasks/ensure-dirs!)
|
|
743
|
-
(let [{:keys [id runs max-cycles iterations swarm-id wait-between
|
|
1062
|
+
(let [{:keys [id runs max-cycles iterations swarm-id wait-between
|
|
1063
|
+
max-wait-for-tasks max-needs-followups]} worker
|
|
744
1064
|
cycle-cap (or max-cycles iterations 10)
|
|
745
1065
|
run-goal (or runs iterations 10)
|
|
746
1066
|
project-root (System/getProperty "user.dir")]
|
|
@@ -759,6 +1079,7 @@
|
|
|
759
1079
|
(wait-for-tasks! id max-wait-for-tasks))
|
|
760
1080
|
|
|
761
1081
|
(loop [cycle 1
|
|
1082
|
+
attempt 1
|
|
762
1083
|
completed-runs 0
|
|
763
1084
|
consec-errors 0
|
|
764
1085
|
metrics {:merges 0 :rejections 0 :errors 0 :recycled 0 :review-rounds-total 0 :claims 0}
|
|
@@ -766,7 +1087,9 @@
|
|
|
766
1087
|
wt-state nil
|
|
767
1088
|
claimed-ids #{}
|
|
768
1089
|
claim-resume-prompt nil
|
|
769
|
-
working-resumes 0
|
|
1090
|
+
working-resumes 0
|
|
1091
|
+
needs-followups 0
|
|
1092
|
+
signals []]
|
|
770
1093
|
(let [finish (fn [status]
|
|
771
1094
|
(assoc worker :completed completed-runs
|
|
772
1095
|
:runs-completed completed-runs
|
|
@@ -783,6 +1106,8 @@
|
|
|
783
1106
|
(> cycle cycle-cap)
|
|
784
1107
|
(do
|
|
785
1108
|
(when wt-state
|
|
1109
|
+
(when (seq claimed-ids)
|
|
1110
|
+
(recycle-task-id-set! id claimed-ids))
|
|
786
1111
|
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state)))
|
|
787
1112
|
(println (format "[%s] Completed %d/%d runs in %d cycles (%d merges, %d claims, %d rejections, %d errors, %d recycled)"
|
|
788
1113
|
id completed-runs run-goal (dec cycle)
|
|
@@ -792,6 +1117,8 @@
|
|
|
792
1117
|
(>= completed-runs run-goal)
|
|
793
1118
|
(do
|
|
794
1119
|
(when wt-state
|
|
1120
|
+
(when (seq claimed-ids)
|
|
1121
|
+
(recycle-task-id-set! id claimed-ids))
|
|
795
1122
|
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state)))
|
|
796
1123
|
(println (format "[%s] Reached run goal: %d/%d runs in %d cycles"
|
|
797
1124
|
id completed-runs run-goal (dec cycle)))
|
|
@@ -806,8 +1133,9 @@
|
|
|
806
1133
|
(when (seq recycled)
|
|
807
1134
|
(println (format "[%s] Recycled %d claimed task(s) on shutdown" id (count recycled))))))
|
|
808
1135
|
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state)))
|
|
809
|
-
(emit-cycle-log! swarm-id id cycle current-run (
|
|
810
|
-
{:
|
|
1136
|
+
(emit-cycle-log! swarm-id id cycle attempt current-run (now-ms) session-id
|
|
1137
|
+
{:timing-ms (init-cycle-timing)
|
|
1138
|
+
:outcome :interrupted})
|
|
811
1139
|
(finish :interrupted))
|
|
812
1140
|
|
|
813
1141
|
:else
|
|
@@ -820,8 +1148,8 @@
|
|
|
820
1148
|
(println (format "[%s] Queue empty, waiting for tasks before cycle %d" id cycle))
|
|
821
1149
|
(wait-for-tasks! id max-wait-for-tasks))
|
|
822
1150
|
|
|
823
|
-
|
|
824
|
-
(or wt-state (create-iteration-worktree! project-root id cycle))
|
|
1151
|
+
(let [wt-state (try
|
|
1152
|
+
(or wt-state (create-iteration-worktree! project-root swarm-id id cycle))
|
|
825
1153
|
(catch Exception e
|
|
826
1154
|
(println (format "[%s] Worktree creation failed: %s" id (.getMessage e)))
|
|
827
1155
|
nil))]
|
|
@@ -832,160 +1160,268 @@
|
|
|
832
1160
|
(do
|
|
833
1161
|
(println (format "[%s] %d consecutive errors, stopping" id errors))
|
|
834
1162
|
(finish :error))
|
|
835
|
-
(do (backoff-sleep! id errors)
|
|
1163
|
+
(do (backoff-sleep! id errors)
|
|
1164
|
+
(recur (inc cycle) 1 completed-runs errors metrics nil nil #{} nil 0 0 []))))
|
|
836
1165
|
|
|
837
1166
|
(let [resume? (or (some? session-id) (some? claim-resume-prompt))
|
|
838
|
-
cycle-start-ms (
|
|
1167
|
+
cycle-start-ms (now-ms)
|
|
1168
|
+
cycle-timing (init-cycle-timing)
|
|
839
1169
|
pre-current-ids (tasks/current-task-ids)
|
|
840
|
-
_ (println (format "[%s] %s cycle %d/%d (run %d/%d)"
|
|
841
|
-
id
|
|
1170
|
+
_ (println (format "[%s] %s cycle %d/%d (run %d/%d, attempt %d)"
|
|
1171
|
+
id
|
|
1172
|
+
(if (= attempt 1) "Starting" "Resuming")
|
|
1173
|
+
cycle cycle-cap current-run run-goal attempt))
|
|
842
1174
|
context (build-context)
|
|
843
|
-
|
|
1175
|
+
agent-start-ms (now-ms)
|
|
1176
|
+
{:keys [output exit done? merge? needs-followup? claim-ids parse-warning raw-snippet] :as agent-result}
|
|
844
1177
|
(run-agent! worker (:path wt-state) context session-id resume?
|
|
845
1178
|
:resume-prompt-override claim-resume-prompt)
|
|
1179
|
+
cycle-timing (add-llm-call cycle-timing
|
|
1180
|
+
:implementation-rounds-ms
|
|
1181
|
+
"implementation"
|
|
1182
|
+
(- (now-ms) agent-start-ms))
|
|
846
1183
|
new-session-id (:session-id agent-result)
|
|
847
|
-
|
|
1184
|
+
stderr-snippet (:stderr-snippet agent-result)
|
|
1185
|
+
mv-claimed-tasks (detect-claimed-tasks pre-current-ids)
|
|
1186
|
+
active-claimed-ids (active-claimed-task-ids claimed-ids mv-claimed-tasks)
|
|
1187
|
+
wt-path (:path wt-state)
|
|
1188
|
+
;; Classify the signal for this attempt
|
|
1189
|
+
signal-label (cond
|
|
1190
|
+
(not (zero? exit)) (str "error:exit-" exit)
|
|
1191
|
+
(and (seq claim-ids) (not merge?) (not done?))
|
|
1192
|
+
(str "claim:" (str/join "," claim-ids))
|
|
1193
|
+
merge? "merge"
|
|
1194
|
+
done? "done"
|
|
1195
|
+
needs-followup? "needs-followup"
|
|
1196
|
+
:else "working")
|
|
1197
|
+
signals (conj signals signal-label)
|
|
1198
|
+
emit! (fn [opts]
|
|
1199
|
+
(emit-cycle-log! swarm-id id cycle attempt current-run cycle-start-ms new-session-id
|
|
1200
|
+
(merge {:worktree-path wt-path :signals signals} opts)))]
|
|
848
1201
|
(cond
|
|
849
1202
|
(not (zero? exit))
|
|
850
1203
|
(let [errors (inc consec-errors)
|
|
851
|
-
recycled (recycle-
|
|
852
|
-
metrics (-> metrics (update :errors inc) (update :recycled + recycled))
|
|
1204
|
+
recycled (recycle-active-claims! id claimed-ids mv-claimed-tasks)
|
|
1205
|
+
metrics (-> metrics (update :errors inc) (update :recycled + (count recycled)))
|
|
853
1206
|
error-msg (subs (or output "") 0 (min 200 (count (or output ""))))]
|
|
854
1207
|
(println (format "[%s] Agent error (exit %d): %s" id exit error-msg))
|
|
855
|
-
(
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
1208
|
+
(when (seq stderr-snippet)
|
|
1209
|
+
(println (format "[%s] Agent stderr snippet: %s"
|
|
1210
|
+
id
|
|
1211
|
+
(snippet (str/replace stderr-snippet #"\s+" " ") 240))))
|
|
1212
|
+
(emit!
|
|
1213
|
+
{:timing-ms cycle-timing
|
|
1214
|
+
:outcome :error
|
|
1215
|
+
:claimed-task-ids (vec active-claimed-ids)
|
|
1216
|
+
:recycled-tasks (seq recycled)
|
|
859
1217
|
:error-snippet error-msg})
|
|
860
1218
|
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
861
1219
|
(if (>= errors max-consecutive-errors)
|
|
862
1220
|
(do
|
|
863
1221
|
(println (format "[%s] %d consecutive errors, stopping" id errors))
|
|
864
1222
|
(finish :error))
|
|
865
|
-
(do (backoff-sleep! id errors)
|
|
1223
|
+
(do (backoff-sleep! id errors)
|
|
1224
|
+
(recur (inc cycle) 1 (inc completed-runs) errors metrics nil nil #{} nil 0 0 []))))
|
|
866
1225
|
|
|
867
1226
|
(and (seq claim-ids) (not merge?) (not done?))
|
|
868
1227
|
(let [_ (println (format "[%s] CLAIM signal: %s" id (str/join ", " claim-ids)))
|
|
869
1228
|
{:keys [claimed resume-prompt]} (execute-claims! claim-ids)
|
|
870
|
-
new-claimed-ids (into claimed-ids claimed)
|
|
1229
|
+
new-claimed-ids (into active-claimed-ids claimed)
|
|
871
1230
|
metrics (update metrics :claims + (count claimed))]
|
|
872
1231
|
(println (format "[%s] Claimed %d/%d tasks" id (count claimed) (count claim-ids)))
|
|
873
|
-
(emit
|
|
874
|
-
{:
|
|
875
|
-
|
|
876
|
-
|
|
1232
|
+
(emit!
|
|
1233
|
+
{:timing-ms cycle-timing
|
|
1234
|
+
:outcome :claimed :claimed-task-ids (vec claimed)})
|
|
1235
|
+
(recur cycle (inc attempt) completed-runs 0 metrics new-session-id wt-state
|
|
1236
|
+
new-claimed-ids resume-prompt 0 0 signals))
|
|
877
1237
|
|
|
878
1238
|
merge?
|
|
879
1239
|
(if (worktree-has-changes? (:path wt-state))
|
|
880
1240
|
(if (task-only-diff? (:path wt-state))
|
|
881
|
-
(
|
|
1241
|
+
(let [all-claimed active-claimed-ids]
|
|
882
1242
|
(println (format "[%s] Task-only diff, auto-merging" id))
|
|
883
|
-
(let [sync-status (sync-worktree-to-main! worker (:path wt-state) id)
|
|
884
|
-
all-claimed (into claimed-ids mv-claimed-tasks)]
|
|
1243
|
+
(let [sync-status (sync-worktree-to-main! worker (:path wt-state) id)]
|
|
885
1244
|
(if (= :failed sync-status)
|
|
886
|
-
(
|
|
1245
|
+
(let [recycled (recycle-task-id-set! id all-claimed)
|
|
1246
|
+
metrics (update metrics :recycled + (count recycled))]
|
|
887
1247
|
(println (format "[%s] Sync to main failed, skipping merge" id))
|
|
888
|
-
(emit
|
|
889
|
-
{:
|
|
1248
|
+
(emit!
|
|
1249
|
+
{:timing-ms cycle-timing
|
|
1250
|
+
:outcome :sync-failed
|
|
1251
|
+
:claimed-task-ids (vec all-claimed)
|
|
1252
|
+
:recycled-tasks (seq recycled)})
|
|
890
1253
|
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
891
|
-
(recur (inc cycle) (inc completed-runs) 0 metrics nil nil #{} nil 0))
|
|
892
|
-
(let [
|
|
893
|
-
|
|
1254
|
+
(recur (inc cycle) 1 (inc completed-runs) 0 metrics nil nil #{} nil 0 0 []))
|
|
1255
|
+
(let [merge-result (merge-to-main! (:path wt-state) (:branch wt-state) id project-root 0 all-claimed)
|
|
1256
|
+
merge-result (if (:ok? merge-result)
|
|
1257
|
+
merge-result
|
|
1258
|
+
(recover-merge-failure! worker (:path wt-state) (:branch wt-state)
|
|
1259
|
+
id project-root 0 all-claimed merge-result))
|
|
1260
|
+
merged? (:ok? merge-result)
|
|
1261
|
+
recycled (when-not merged?
|
|
1262
|
+
(recycle-task-id-set! id all-claimed))
|
|
1263
|
+
completed-count (or (:completed-count merge-result) 0)
|
|
1264
|
+
metrics (cond-> metrics
|
|
1265
|
+
(and merged? (pos? completed-count)) (update :merges inc)
|
|
1266
|
+
(seq recycled) (update :recycled + (count recycled)))]
|
|
894
1267
|
(println (format "[%s] Cycle %d/%d complete" id cycle cycle-cap))
|
|
895
|
-
(emit
|
|
896
|
-
{:
|
|
1268
|
+
(emit!
|
|
1269
|
+
{:timing-ms cycle-timing
|
|
1270
|
+
:outcome (if merged? :merged :merge-failed)
|
|
897
1271
|
:claimed-task-ids (vec all-claimed)
|
|
1272
|
+
:recycled-tasks (seq recycled)
|
|
898
1273
|
:review-rounds 0})
|
|
899
1274
|
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
900
|
-
(recur (inc cycle) (inc completed-runs) 0 metrics nil nil #{} nil 0)))))
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
1275
|
+
(recur (inc cycle) 1 (inc completed-runs) 0 metrics nil nil #{} nil 0 0 [])))))
|
|
1276
|
+
(let [{:keys [approved? attempts timing]} (review-loop! worker (:path wt-state) id cycle cycle-timing)
|
|
1277
|
+
cycle-timing (or timing cycle-timing)
|
|
1278
|
+
metrics (-> metrics
|
|
1279
|
+
(update :review-rounds-total + (or attempts 0))
|
|
1280
|
+
(cond-> (not approved?) (update :rejections inc)))]
|
|
905
1281
|
(if approved?
|
|
906
1282
|
(let [sync-status (sync-worktree-to-main! worker (:path wt-state) id)
|
|
907
|
-
all-claimed
|
|
1283
|
+
all-claimed active-claimed-ids]
|
|
908
1284
|
(if (= :failed sync-status)
|
|
909
|
-
(
|
|
1285
|
+
(let [recycled (recycle-task-id-set! id all-claimed)
|
|
1286
|
+
metrics (update metrics :recycled + (count recycled))]
|
|
910
1287
|
(println (format "[%s] Sync to main failed after approval, skipping merge" id))
|
|
911
|
-
(emit
|
|
912
|
-
{:
|
|
1288
|
+
(emit!
|
|
1289
|
+
{:timing-ms cycle-timing
|
|
1290
|
+
:outcome :sync-failed
|
|
913
1291
|
:claimed-task-ids (vec all-claimed)
|
|
1292
|
+
:recycled-tasks (seq recycled)
|
|
914
1293
|
:review-rounds (or attempts 0)})
|
|
915
1294
|
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
916
|
-
(recur (inc cycle) (inc completed-runs) 0 metrics nil nil #{} nil 0))
|
|
917
|
-
(let [
|
|
918
|
-
|
|
1295
|
+
(recur (inc cycle) 1 (inc completed-runs) 0 metrics nil nil #{} nil 0 0 []))
|
|
1296
|
+
(let [merge-result (merge-to-main! (:path wt-state) (:branch wt-state) id project-root (or attempts 0) all-claimed)
|
|
1297
|
+
merge-result (if (:ok? merge-result)
|
|
1298
|
+
merge-result
|
|
1299
|
+
(recover-merge-failure! worker (:path wt-state) (:branch wt-state)
|
|
1300
|
+
id project-root (or attempts 0) all-claimed merge-result))
|
|
1301
|
+
merged? (:ok? merge-result)
|
|
1302
|
+
recycled (when-not merged?
|
|
1303
|
+
(recycle-task-id-set! id all-claimed))
|
|
1304
|
+
completed-count (or (:completed-count merge-result) 0)
|
|
1305
|
+
metrics (cond-> metrics
|
|
1306
|
+
(and merged? (pos? completed-count)) (update :merges inc)
|
|
1307
|
+
(seq recycled) (update :recycled + (count recycled)))]
|
|
919
1308
|
(println (format "[%s] Cycle %d/%d complete" id cycle cycle-cap))
|
|
920
|
-
(emit
|
|
921
|
-
{:
|
|
1309
|
+
(emit!
|
|
1310
|
+
{:timing-ms cycle-timing
|
|
1311
|
+
:outcome (if merged? :merged :merge-failed)
|
|
922
1312
|
:claimed-task-ids (vec all-claimed)
|
|
1313
|
+
:recycled-tasks (seq recycled)
|
|
923
1314
|
:review-rounds (or attempts 0)})
|
|
924
1315
|
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
925
|
-
(recur (inc cycle) (inc completed-runs) 0 metrics nil nil #{} nil 0))))
|
|
926
|
-
(let [recycled (recycle-
|
|
927
|
-
metrics (update metrics :recycled + recycled)]
|
|
1316
|
+
(recur (inc cycle) 1 (inc completed-runs) 0 metrics nil nil #{} nil 0 0 []))))
|
|
1317
|
+
(let [recycled (recycle-active-claims! id claimed-ids mv-claimed-tasks)
|
|
1318
|
+
metrics (update metrics :recycled + (count recycled))]
|
|
928
1319
|
(println (format "[%s] Cycle %d/%d rejected" id cycle cycle-cap))
|
|
929
|
-
(emit
|
|
930
|
-
{:
|
|
931
|
-
:
|
|
932
|
-
:
|
|
1320
|
+
(emit!
|
|
1321
|
+
{:timing-ms cycle-timing
|
|
1322
|
+
:outcome :rejected
|
|
1323
|
+
:claimed-task-ids (vec active-claimed-ids)
|
|
1324
|
+
:recycled-tasks (seq recycled)
|
|
933
1325
|
:review-rounds (or attempts 0)})
|
|
934
1326
|
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
935
|
-
(recur (inc cycle) (inc completed-runs) 0 metrics nil nil #{} nil 0)))))
|
|
936
|
-
(let [recycled (recycle-
|
|
937
|
-
metrics (update metrics :recycled + recycled)]
|
|
1327
|
+
(recur (inc cycle) 1 (inc completed-runs) 0 metrics nil nil #{} nil 0 0 [])))))
|
|
1328
|
+
(let [recycled (recycle-active-claims! id claimed-ids mv-claimed-tasks)
|
|
1329
|
+
metrics (update metrics :recycled + (count recycled))]
|
|
938
1330
|
(println (format "[%s] Merge signaled but no changes, skipping" id))
|
|
939
|
-
(emit
|
|
940
|
-
{:
|
|
941
|
-
:
|
|
942
|
-
:
|
|
1331
|
+
(emit!
|
|
1332
|
+
{:timing-ms cycle-timing
|
|
1333
|
+
:outcome :no-changes
|
|
1334
|
+
:claimed-task-ids (vec active-claimed-ids)
|
|
1335
|
+
:recycled-tasks (seq recycled)})
|
|
943
1336
|
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
944
|
-
(recur (inc cycle) (inc completed-runs) 0 metrics nil nil #{} nil 0)))
|
|
1337
|
+
(recur (inc cycle) 1 (inc completed-runs) 0 metrics nil nil #{} nil 0 0 [])))
|
|
945
1338
|
|
|
946
1339
|
done?
|
|
947
|
-
(let [recycled (recycle-
|
|
948
|
-
metrics (
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
1340
|
+
(let [recycled (recycle-active-claims! id claimed-ids mv-claimed-tasks)
|
|
1341
|
+
metrics (-> metrics
|
|
1342
|
+
(update :recycled + (count recycled))
|
|
1343
|
+
(update :errors inc))]
|
|
1344
|
+
(println (format "[%s] Invalid __DONE__ signal from executor; stopping worker (cycle %d/%d)" id cycle cycle-cap))
|
|
1345
|
+
(emit!
|
|
1346
|
+
{:timing-ms cycle-timing
|
|
1347
|
+
:outcome :error
|
|
1348
|
+
:claimed-task-ids (vec active-claimed-ids)
|
|
1349
|
+
:recycled-tasks (seq recycled)
|
|
1350
|
+
:error-snippet "__DONE__ is not a valid executor signal; use CLAIM(...) or COMPLETE_AND_READY_FOR_MERGE"})
|
|
954
1351
|
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
955
|
-
(
|
|
1352
|
+
(finish :error))
|
|
1353
|
+
|
|
1354
|
+
needs-followup?
|
|
1355
|
+
(let [summary (subs (or output "") 0 (min 240 (count (or output ""))))
|
|
1356
|
+
next-followups (inc needs-followups)]
|
|
1357
|
+
(emit!
|
|
1358
|
+
{:timing-ms cycle-timing
|
|
1359
|
+
:outcome :needs-followup
|
|
1360
|
+
:claimed-task-ids (vec active-claimed-ids)
|
|
1361
|
+
:error-snippet summary})
|
|
1362
|
+
(if (> next-followups max-needs-followups)
|
|
1363
|
+
(let [recycled (recycle-active-claims! id claimed-ids mv-claimed-tasks)
|
|
1364
|
+
metrics (-> metrics
|
|
1365
|
+
(update :recycled + (count recycled))
|
|
1366
|
+
(update :errors inc))]
|
|
1367
|
+
(println (format "[%s] NEEDS_FOLLOWUP exhausted (%d/%d); stopping worker" id next-followups max-needs-followups))
|
|
1368
|
+
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
1369
|
+
(finish :error))
|
|
1370
|
+
(let [followup-prompt (build-needs-followup-prompt active-claimed-ids output)]
|
|
1371
|
+
(println (format "[%s] NEEDS_FOLLOWUP signal; continuing cycle with follow-up prompt (%d/%d)"
|
|
1372
|
+
id next-followups max-needs-followups))
|
|
1373
|
+
(recur cycle (inc attempt) completed-runs 0 metrics new-session-id wt-state
|
|
1374
|
+
active-claimed-ids followup-prompt 0 next-followups signals))))
|
|
956
1375
|
|
|
957
1376
|
:else
|
|
958
1377
|
(let [wr (inc working-resumes)
|
|
959
1378
|
max-wr (:max-working-resumes worker)]
|
|
1379
|
+
(when parse-warning
|
|
1380
|
+
(if (str/includes? parse-warning "AUTH_REQUIRED:")
|
|
1381
|
+
(println (format "[%s] LOGIN ISSUE: %s"
|
|
1382
|
+
id
|
|
1383
|
+
(str/replace parse-warning #"^AUTH_REQUIRED:\s*" "")))
|
|
1384
|
+
(println (format "[%s] WARNING: %s" id parse-warning))))
|
|
1385
|
+
(when (and parse-warning (seq raw-snippet))
|
|
1386
|
+
(println (format "[%s] Raw output snippet: %s"
|
|
1387
|
+
id
|
|
1388
|
+
(snippet (str/replace raw-snippet #"\s+" " ") 240))))
|
|
1389
|
+
(when (seq stderr-snippet)
|
|
1390
|
+
(println (format "[%s] Agent stderr snippet: %s"
|
|
1391
|
+
id
|
|
1392
|
+
(snippet (str/replace stderr-snippet #"\s+" " ") 240))))
|
|
960
1393
|
(cond
|
|
961
1394
|
(> wr max-wr)
|
|
962
|
-
(let [recycled (recycle-
|
|
963
|
-
metrics (update metrics :recycled + recycled)]
|
|
1395
|
+
(let [recycled (recycle-active-claims! id claimed-ids mv-claimed-tasks)
|
|
1396
|
+
metrics (update metrics :recycled + (count recycled))]
|
|
964
1397
|
(println (format "[%s] Stuck after %d working resumes + nudge, resetting session" id wr))
|
|
965
|
-
(emit
|
|
966
|
-
{:
|
|
967
|
-
:
|
|
968
|
-
:
|
|
1398
|
+
(emit!
|
|
1399
|
+
{:timing-ms cycle-timing
|
|
1400
|
+
:outcome :stuck
|
|
1401
|
+
:claimed-task-ids (vec active-claimed-ids)
|
|
1402
|
+
:recycled-tasks (seq recycled)})
|
|
969
1403
|
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
970
|
-
(recur (inc cycle) (inc completed-runs) 0 metrics nil nil #{} nil 0))
|
|
1404
|
+
(recur (inc cycle) 1 (inc completed-runs) 0 metrics nil nil #{} nil 0 0 []))
|
|
971
1405
|
|
|
972
1406
|
(= wr max-wr)
|
|
973
1407
|
(do
|
|
974
1408
|
(println (format "[%s] Working... %d/%d resumes, nudging agent to wrap up" id wr max-wr))
|
|
975
|
-
(emit
|
|
976
|
-
{:
|
|
977
|
-
:
|
|
978
|
-
|
|
979
|
-
|
|
1409
|
+
(emit!
|
|
1410
|
+
{:timing-ms cycle-timing
|
|
1411
|
+
:outcome :working
|
|
1412
|
+
:claimed-task-ids (vec active-claimed-ids)})
|
|
1413
|
+
(recur cycle (inc attempt) completed-runs 0 metrics new-session-id wt-state
|
|
1414
|
+
active-claimed-ids nudge-prompt wr needs-followups signals))
|
|
980
1415
|
|
|
981
1416
|
:else
|
|
982
1417
|
(do
|
|
983
1418
|
(println (format "[%s] Working... (will resume, %d/%d)" id wr max-wr))
|
|
984
|
-
(emit
|
|
985
|
-
{:
|
|
986
|
-
:
|
|
987
|
-
|
|
988
|
-
|
|
1419
|
+
(emit!
|
|
1420
|
+
{:timing-ms cycle-timing
|
|
1421
|
+
:outcome :working
|
|
1422
|
+
:claimed-task-ids (vec active-claimed-ids)})
|
|
1423
|
+
(recur cycle (inc attempt) completed-runs 0 metrics new-session-id wt-state
|
|
1424
|
+
active-claimed-ids nil wr needs-followups signals))))))))))))))
|
|
989
1425
|
|
|
990
1426
|
;; =============================================================================
|
|
991
1427
|
;; Multi-Worker Execution
|
|
@@ -1001,7 +1437,14 @@
|
|
|
1001
1437
|
Returns seq of final worker states."
|
|
1002
1438
|
[workers]
|
|
1003
1439
|
(tasks/ensure-dirs!)
|
|
1004
|
-
(let [swarm-id (-> workers first :swarm-id)
|
|
1440
|
+
(let [swarm-id (-> workers first :swarm-id)
|
|
1441
|
+
stale-current (tasks/list-current)]
|
|
1442
|
+
(when (seq stale-current)
|
|
1443
|
+
(println (format "WARNING: %d task(s) already in current/ from a previous run. These may be stale claims."
|
|
1444
|
+
(count stale-current)))
|
|
1445
|
+
(doseq [t stale-current]
|
|
1446
|
+
(println (format " - %s: %s" (:id t) (:summary t))))
|
|
1447
|
+
(println " Run `oompa requeue` to move them back to pending/ if they are stale."))
|
|
1005
1448
|
(println (format "Launching %d workers..." (count workers)))
|
|
1006
1449
|
|
|
1007
1450
|
;; Register JVM shutdown hook so SIGTERM/SIGINT triggers graceful stop.
|
|
@@ -1023,27 +1466,38 @@
|
|
|
1023
1466
|
(map-indexed
|
|
1024
1467
|
(fn [idx worker]
|
|
1025
1468
|
(let [worker (assoc worker :id (or (:id worker) (str "w" idx)))]
|
|
1026
|
-
(future
|
|
1469
|
+
(future
|
|
1470
|
+
(try
|
|
1471
|
+
(run-worker! worker)
|
|
1472
|
+
(catch Exception e
|
|
1473
|
+
(println (format "[%s] FATAL: %s" (:id worker) (.getMessage e)))
|
|
1474
|
+
(.printStackTrace e)
|
|
1475
|
+
(throw e))))))
|
|
1027
1476
|
workers))]
|
|
1028
1477
|
|
|
1029
1478
|
(println "All workers launched. Waiting for completion...")
|
|
1030
|
-
(let [results (mapv
|
|
1479
|
+
(let [results (mapv (fn [f]
|
|
1480
|
+
(try
|
|
1481
|
+
(deref f)
|
|
1482
|
+
(catch Exception e
|
|
1483
|
+
(println (format "Worker future failed: %s" (.getMessage e)))
|
|
1484
|
+
{:status :fatal-error :error (.getMessage e)})))
|
|
1485
|
+
futures)]
|
|
1031
1486
|
;; Clean exit — tell shutdown hook not to write stopped.json
|
|
1032
1487
|
(reset! shutdown-requested? false)
|
|
1033
1488
|
;; Remove the hook so it doesn't accumulate across calls
|
|
1034
1489
|
(try (.removeShutdownHook (Runtime/getRuntime) hook) (catch Exception _))
|
|
1035
1490
|
(println "\nAll workers complete.")
|
|
1036
|
-
(
|
|
1037
|
-
|
|
1038
|
-
(:id
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
(or (:review-rounds-total w) 0))))
|
|
1491
|
+
(let [timing-by-worker (aggregate-cycle-timings-by-worker swarm-id)
|
|
1492
|
+
rows (mapv (fn [result]
|
|
1493
|
+
(let [row-id (or (:id result) "")
|
|
1494
|
+
totals (get timing-by-worker row-id empty-cycle-total)]
|
|
1495
|
+
(worker-summary-row result totals)))
|
|
1496
|
+
results)]
|
|
1497
|
+
(println "\nWorker Summary")
|
|
1498
|
+
(print-table [:Worker :Runs :Cycles :Status :Merges :Claims :Rejects :Errors :Recycled
|
|
1499
|
+
:ReviewRounds :ImplMs :ReviewMs :FixMs :HarnessMs :TotalMs]
|
|
1500
|
+
rows))
|
|
1047
1501
|
|
|
1048
1502
|
;; Write stopped event — all state derivable from cycle logs
|
|
1049
1503
|
(when swarm-id
|
|
@@ -1091,16 +1545,12 @@
|
|
|
1091
1545
|
tagged-prompt (str "[oompa:" swarm-id* ":planner] " prompt-text)
|
|
1092
1546
|
abs-root (.getAbsolutePath (io/file project-root))
|
|
1093
1547
|
|
|
1094
|
-
cmd (harness/build-cmd harness
|
|
1095
|
-
{:cwd abs-root :model model :prompt tagged-prompt})
|
|
1096
|
-
|
|
1097
1548
|
_ (println (format "[planner] Running (%s:%s, max_pending: %d, current: %d)"
|
|
1098
1549
|
(name harness) (or model "default") max-pending pending-before))
|
|
1099
1550
|
|
|
1100
1551
|
result (try
|
|
1101
|
-
(
|
|
1102
|
-
|
|
1103
|
-
:out :string :err :string})
|
|
1552
|
+
(harness/run-command! harness
|
|
1553
|
+
{:cwd abs-root :model model :prompt tagged-prompt})
|
|
1104
1554
|
(catch Exception e
|
|
1105
1555
|
(println (format "[planner] Agent exception: %s" (.getMessage e)))
|
|
1106
1556
|
{:exit -1 :out "" :err (.getMessage e)}))
|