@nbardy/oompa 0.7.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -6
- package/agentnet/src/agentnet/agent.clj +45 -20
- package/agentnet/src/agentnet/cli.clj +852 -193
- package/agentnet/src/agentnet/cli.clj.bak +1384 -0
- package/agentnet/src/agentnet/core.clj +17 -2
- package/agentnet/src/agentnet/harness.clj +93 -37
- package/agentnet/src/agentnet/runs.clj +11 -6
- package/agentnet/src/agentnet/schema.clj +8 -1
- package/agentnet/src/agentnet/tasks.clj +6 -0
- package/agentnet/src/agentnet/worker.clj +867 -408
- package/bin/oompa.js +5 -1
- package/config/prompts/_task_header.md +9 -2
- package/config/prompts/magicgenie-executor.md +15 -0
- package/config/prompts/magicgenie-planner.md +26 -0
- package/config/prompts/magicgenie-reviewer.md +44 -0
- package/oompa.example.json +4 -4
- package/package.json +5 -3
- package/scripts/README.md +6 -0
- package/scripts/__pycache__/stream_bridge.cpython-314.pyc +0 -0
- package/scripts/copy-repo-code.sh +110 -0
- package/scripts/install-babashka.js +97 -0
- package/scripts/test-harness-resume.sh +229 -0
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
[babashka.process :as process]
|
|
22
22
|
[clojure.java.io :as io]
|
|
23
23
|
[clojure.set]
|
|
24
|
+
[clojure.pprint :refer [print-table]]
|
|
24
25
|
[clojure.string :as str]))
|
|
25
26
|
|
|
26
27
|
;; =============================================================================
|
|
@@ -40,6 +41,13 @@
|
|
|
40
41
|
(def ^:private shutdown-requested? (atom false))
|
|
41
42
|
|
|
42
43
|
(declare task-root-for-cwd)
|
|
44
|
+
(declare verify-mergeable?)
|
|
45
|
+
|
|
46
|
+
(defn- log-ts
|
|
47
|
+
"Readable wall-clock timestamp for worker log lines."
|
|
48
|
+
[]
|
|
49
|
+
(.format (java.time.format.DateTimeFormatter/ofPattern "yyyy-MM-dd HH:mm:ss")
|
|
50
|
+
(java.time.LocalDateTime/now)))
|
|
43
51
|
|
|
44
52
|
(defn- load-prompt
|
|
45
53
|
"Load a prompt file. Tries path as-is first, then from package root."
|
|
@@ -47,6 +55,11 @@
|
|
|
47
55
|
(or (agent/load-custom-prompt path)
|
|
48
56
|
(agent/load-custom-prompt (str package-root "/" path))))
|
|
49
57
|
|
|
58
|
+
(defn- snippet
|
|
59
|
+
[s limit]
|
|
60
|
+
(let [s (or s "")]
|
|
61
|
+
(subs s 0 (min limit (count s)))))
|
|
62
|
+
|
|
50
63
|
(defn- build-template-tokens
|
|
51
64
|
"Build token map for prompt template {var} substitution.
|
|
52
65
|
Merges core/build-context (rich YAML header, queue, hotspots, etc.)
|
|
@@ -75,8 +88,8 @@
|
|
|
75
88
|
local-tasks (io/file cwd-file "tasks")
|
|
76
89
|
parent-tasks (some-> cwd-file .getParentFile (io/file "tasks"))]
|
|
77
90
|
(cond
|
|
78
|
-
(.exists local-tasks) "tasks"
|
|
79
91
|
(and parent-tasks (.exists parent-tasks)) "../tasks"
|
|
92
|
+
(.exists local-tasks) "tasks"
|
|
80
93
|
:else "tasks")))
|
|
81
94
|
|
|
82
95
|
(defn- render-task-header
|
|
@@ -90,6 +103,8 @@
|
|
|
90
103
|
(str/replace "{TASKS_ROOT}" task-root))))
|
|
91
104
|
|
|
92
105
|
(def ^:private default-max-working-resumes 5)
|
|
106
|
+
(def ^:private default-max-needs-followups 1)
|
|
107
|
+
(def ^:private default-max-wait-for-tasks 600)
|
|
93
108
|
|
|
94
109
|
(defn create-worker
|
|
95
110
|
"Create a worker config.
|
|
@@ -97,16 +112,23 @@
|
|
|
97
112
|
:can-plan when false, worker waits for tasks before starting (backpressure).
|
|
98
113
|
:reasoning reasoning effort level (e.g. \"low\", \"medium\", \"high\") — codex only.
|
|
99
114
|
:review-prompts paths to reviewer prompt files (loaded and concatenated for review).
|
|
100
|
-
:wait-between seconds to sleep between
|
|
101
|
-
:max-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
115
|
+
:wait-between seconds to sleep between cycles (nil or 0 = no wait).
|
|
116
|
+
:max-wait-for-tasks max seconds a non-planner waits for tasks before giving up (default 600).
|
|
117
|
+
:max-working-resumes max consecutive working resumes before nudge+kill (default 5).
|
|
118
|
+
:max-needs-followups max NEEDS_FOLLOWUP continuations in one cycle (default 1)."
|
|
119
|
+
[{:keys [id swarm-id harness model runs max-cycles iterations prompts can-plan reasoning
|
|
120
|
+
reviewers wait-between
|
|
121
|
+
max-working-resumes max-needs-followups max-wait-for-tasks]}]
|
|
122
|
+
(let [cycle-cap (or max-cycles iterations runs 10)
|
|
123
|
+
run-goal (or runs iterations 10)]
|
|
105
124
|
{:id id
|
|
106
125
|
:swarm-id swarm-id
|
|
107
126
|
:harness (or harness :codex)
|
|
108
127
|
:model model
|
|
109
|
-
:iterations
|
|
128
|
+
;; Legacy compatibility: :iterations remains the cycle cap.
|
|
129
|
+
:iterations cycle-cap
|
|
130
|
+
:max-cycles cycle-cap
|
|
131
|
+
:runs run-goal
|
|
110
132
|
:prompts (cond
|
|
111
133
|
(vector? prompts) prompts
|
|
112
134
|
(string? prompts) [prompts]
|
|
@@ -114,15 +136,15 @@
|
|
|
114
136
|
:can-plan (if (some? can-plan) can-plan true)
|
|
115
137
|
:reasoning reasoning
|
|
116
138
|
:wait-between (when (and wait-between (pos? wait-between)) wait-between)
|
|
117
|
-
:
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
:else [])
|
|
139
|
+
:max-wait-for-tasks (let [v (or max-wait-for-tasks default-max-wait-for-tasks)]
|
|
140
|
+
(if (and (number? v) (pos? v))
|
|
141
|
+
v
|
|
142
|
+
default-max-wait-for-tasks))
|
|
143
|
+
:reviewers reviewers
|
|
123
144
|
:max-working-resumes (or max-working-resumes default-max-working-resumes)
|
|
145
|
+
:max-needs-followups (or max-needs-followups default-max-needs-followups)
|
|
124
146
|
:completed 0
|
|
125
|
-
:status :idle})
|
|
147
|
+
:status :idle}))
|
|
126
148
|
|
|
127
149
|
;; =============================================================================
|
|
128
150
|
;; Task Execution
|
|
@@ -139,7 +161,9 @@
|
|
|
139
161
|
"1. If you have meaningful changes: commit them and signal COMPLETE_AND_READY_FOR_MERGE\n"
|
|
140
162
|
"2. If scope is too large: create follow-up tasks in tasks/pending/ for remaining work,\n"
|
|
141
163
|
" commit what you have (even partial notes/design docs), and signal COMPLETE_AND_READY_FOR_MERGE\n"
|
|
142
|
-
"3. If you
|
|
164
|
+
"3. If you truly cannot produce a merge-ready artifact this turn, signal NEEDS_FOLLOWUP\n"
|
|
165
|
+
" and explain the remaining work. The framework will keep your claimed tasks and give you\n"
|
|
166
|
+
" one targeted follow-up prompt. This is not success.\n\n"
|
|
143
167
|
"Do NOT continue working without producing a signal."))
|
|
144
168
|
|
|
145
169
|
(defn- build-context
|
|
@@ -181,11 +205,63 @@
|
|
|
181
205
|
"\n\n"
|
|
182
206
|
(if (seq claimed-ids)
|
|
183
207
|
"Work on your claimed tasks. Signal COMPLETE_AND_READY_FOR_MERGE when done."
|
|
184
|
-
"No claims succeeded. CLAIM different tasks,
|
|
208
|
+
"No claims succeeded. CLAIM different tasks. If you cannot finish a mergeable artifact after trying hard, signal NEEDS_FOLLOWUP with a short explanation."))]
|
|
185
209
|
{:claimed claimed-ids
|
|
186
210
|
:failed failed-ids
|
|
187
211
|
:resume-prompt prompt}))
|
|
188
212
|
|
|
213
|
+
(defn- active-claimed-task-ids
|
|
214
|
+
"Union of tasks claimed earlier in the cycle and tasks moved into current/
|
|
215
|
+
during the latest attempt."
|
|
216
|
+
[claimed-ids mv-claimed-tasks]
|
|
217
|
+
(-> (set claimed-ids)
|
|
218
|
+
(into mv-claimed-tasks)))
|
|
219
|
+
|
|
220
|
+
(defn- recycle-task-id-set!
|
|
221
|
+
"Recycle a set of claimed task IDs from current/ back to pending/.
|
|
222
|
+
Returns a vector of recycled IDs."
|
|
223
|
+
[worker-id task-ids]
|
|
224
|
+
(let [task-ids (set (remove nil? task-ids))
|
|
225
|
+
recycled (when (seq task-ids)
|
|
226
|
+
(tasks/recycle-tasks! task-ids))]
|
|
227
|
+
(when (seq recycled)
|
|
228
|
+
(println (format "[%s] Recycled %d claimed task(s): %s"
|
|
229
|
+
worker-id (count recycled) (str/join ", " recycled))))
|
|
230
|
+
(vec (or recycled []))))
|
|
231
|
+
|
|
232
|
+
(defn- recycle-active-claims!
|
|
233
|
+
"Recycle all claims active in the current cycle."
|
|
234
|
+
[worker-id claimed-ids mv-claimed-tasks]
|
|
235
|
+
(recycle-task-id-set! worker-id (active-claimed-task-ids claimed-ids mv-claimed-tasks)))
|
|
236
|
+
|
|
237
|
+
(defn- build-needs-followup-prompt
|
|
238
|
+
"Prompt injected after NEEDS_FOLLOWUP so the worker keeps ownership and
|
|
239
|
+
closes the loop in the same cycle."
|
|
240
|
+
[claimed-ids output]
|
|
241
|
+
(let [context (build-context)
|
|
242
|
+
explanation (some-> output
|
|
243
|
+
(str/replace #"(?is)^\s*NEEDS_FOLLOWUP\b[\s:.-]*" "")
|
|
244
|
+
str/trim)]
|
|
245
|
+
(str "## NEEDS_FOLLOWUP Follow-up\n\n"
|
|
246
|
+
(if (seq claimed-ids)
|
|
247
|
+
(str "You still own these claimed tasks: "
|
|
248
|
+
(str/join ", " (sort claimed-ids))
|
|
249
|
+
"\n\n")
|
|
250
|
+
"You do not currently own any claimed tasks.\n\n")
|
|
251
|
+
"Continue the SAME cycle and finish a merge-ready artifact.\n"
|
|
252
|
+
"Do not output NEEDS_FOLLOWUP again unless you are still blocked after this follow-up.\n"
|
|
253
|
+
"Prefer the smallest useful diff. If scope is too large, create concrete follow-up tasks in the pending queue and still ship the artifact you have.\n\n"
|
|
254
|
+
(when (seq explanation)
|
|
255
|
+
(str "Your previous explanation:\n"
|
|
256
|
+
explanation
|
|
257
|
+
"\n\n"))
|
|
258
|
+
"Task Status: " (:task_status context) "\n"
|
|
259
|
+
"Remaining Pending:\n"
|
|
260
|
+
(if (str/blank? (:pending_tasks context))
|
|
261
|
+
"(none)"
|
|
262
|
+
(:pending_tasks context))
|
|
263
|
+
"\n\nWhen ready, signal COMPLETE_AND_READY_FOR_MERGE.")))
|
|
264
|
+
|
|
189
265
|
(defn- run-agent!
|
|
190
266
|
"Run agent with prompt, return {:output :done? :merge? :claim-ids :exit :session-id}.
|
|
191
267
|
When resume? is true, continues the existing session with a lighter prompt.
|
|
@@ -239,36 +315,40 @@
|
|
|
239
315
|
tagged-prompt (str "[oompa:" swarm-id* ":" id "] " prompt)
|
|
240
316
|
abs-worktree (.getAbsolutePath (io/file worktree-path))
|
|
241
317
|
|
|
242
|
-
cmd (harness/build-cmd harness
|
|
243
|
-
{:cwd abs-worktree :model model :reasoning reasoning
|
|
244
|
-
:session-id session-id :resume? resume?
|
|
245
|
-
:prompt tagged-prompt :format? true})
|
|
246
|
-
|
|
247
318
|
result (try
|
|
248
|
-
(
|
|
249
|
-
|
|
250
|
-
|
|
319
|
+
(harness/run-command! harness
|
|
320
|
+
{:cwd abs-worktree :model model :reasoning reasoning
|
|
321
|
+
:session-id session-id :resume? resume?
|
|
322
|
+
:prompt tagged-prompt :format? true})
|
|
251
323
|
(catch Exception e
|
|
252
324
|
(println (format "[%s] Agent exception: %s" id (.getMessage e)))
|
|
253
325
|
{:exit -1 :out "" :err (.getMessage e)}))
|
|
254
326
|
|
|
255
|
-
{:keys [output session-id]}
|
|
256
|
-
(harness/parse-output harness (:out result) session-id)
|
|
327
|
+
{:keys [output session-id warning raw-snippet]}
|
|
328
|
+
(harness/parse-output harness (:out result) session-id)
|
|
329
|
+
stderr-snippet (let [stderr (some-> (:err result) str/trim)]
|
|
330
|
+
(when (seq stderr)
|
|
331
|
+
(subs stderr 0 (min 400 (count stderr)))))]
|
|
257
332
|
|
|
258
333
|
{:output output
|
|
259
334
|
:exit (:exit result)
|
|
260
335
|
:done? (agent/done-signal? output)
|
|
261
336
|
:merge? (agent/merge-signal? output)
|
|
337
|
+
:needs-followup? (agent/needs-followup-signal? output)
|
|
262
338
|
:claim-ids (agent/parse-claim-signal output)
|
|
263
|
-
:session-id session-id
|
|
339
|
+
:session-id session-id
|
|
340
|
+
:parse-warning warning
|
|
341
|
+
:raw-snippet raw-snippet
|
|
342
|
+
:stderr-snippet stderr-snippet}))
|
|
264
343
|
|
|
265
344
|
(defn- run-reviewer!
|
|
266
345
|
"Run reviewer on worktree changes.
|
|
267
346
|
Uses custom review-prompts when configured, otherwise falls back to default.
|
|
268
347
|
prev-feedback: vector of previous review outputs (for multi-round context).
|
|
269
348
|
Returns {:verdict :approved|:needs-changes|:rejected, :comments [...], :output string}"
|
|
270
|
-
[{:keys [id swarm-id
|
|
271
|
-
(let [
|
|
349
|
+
[{:keys [id swarm-id reviewers]} worktree-path prev-feedback]
|
|
350
|
+
(let [start-ms (System/currentTimeMillis)
|
|
351
|
+
;; Get actual diff content (not just stat) — truncate to 8000 chars for prompt budget
|
|
272
352
|
diff-result (process/sh ["git" "diff" "main"]
|
|
273
353
|
{:dir worktree-path :out :string :err :string})
|
|
274
354
|
diff-content (let [d (:out diff-result)]
|
|
@@ -277,15 +357,9 @@
|
|
|
277
357
|
d))
|
|
278
358
|
|
|
279
359
|
swarm-id* (or swarm-id "unknown")
|
|
280
|
-
custom-prompt (when (seq review-prompts)
|
|
281
|
-
(->> review-prompts
|
|
282
|
-
(map load-prompt)
|
|
283
|
-
(remove nil?)
|
|
284
|
-
(str/join "\n\n")))
|
|
285
360
|
|
|
286
361
|
;; Only include the most recent round's feedback — the worker has already
|
|
287
362
|
;; attempted fixes based on it, so the reviewer just needs to verify.
|
|
288
|
-
;; Including all prior rounds bloats the prompt and causes empty output.
|
|
289
363
|
history-block (when (seq prev-feedback)
|
|
290
364
|
(let [latest (last prev-feedback)
|
|
291
365
|
truncated (if (> (count latest) 2000)
|
|
@@ -297,46 +371,57 @@
|
|
|
297
371
|
truncated
|
|
298
372
|
"\n\n")))
|
|
299
373
|
|
|
300
|
-
review-body (str (or custom-prompt
|
|
301
|
-
(str "Review the changes in this worktree.\n"
|
|
302
|
-
"Focus on architecture and design, not style.\n"))
|
|
303
|
-
"\n\nDiff:\n```\n" diff-content "\n```\n"
|
|
304
|
-
(when history-block history-block)
|
|
305
|
-
"\nYour verdict MUST be on its own line, exactly one of:\n"
|
|
306
|
-
"VERDICT: APPROVED\n"
|
|
307
|
-
"VERDICT: NEEDS_CHANGES\n\n"
|
|
308
|
-
"Do NOT use REJECTED. Always use NEEDS_CHANGES with specific, "
|
|
309
|
-
"actionable feedback explaining what must change and why. "
|
|
310
|
-
"The worker will attempt fixes based on your feedback.\n"
|
|
311
|
-
"After your verdict line, list every issue as a numbered item with "
|
|
312
|
-
"the file path and what needs to change.\n")
|
|
313
|
-
review-prompt (str "[oompa:" swarm-id* ":" id "] " review-body)
|
|
314
|
-
|
|
315
374
|
abs-wt (.getAbsolutePath (io/file worktree-path))
|
|
316
375
|
|
|
317
|
-
;;
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
376
|
+
;; Try each reviewer until one succeeds and returns a verdict
|
|
377
|
+
result (reduce (fn [_ {:keys [harness model prompts]}]
|
|
378
|
+
(let [custom-prompt (when (seq prompts)
|
|
379
|
+
(->> prompts
|
|
380
|
+
(map load-prompt)
|
|
381
|
+
(remove nil?)
|
|
382
|
+
(str/join "\n\n")))
|
|
383
|
+
review-body (str (or custom-prompt
|
|
384
|
+
(str "Review the changes in this worktree.\n"
|
|
385
|
+
"Focus on architecture and design, not style.\n"))
|
|
386
|
+
"\n\nDiff:\n```\n" diff-content "\n```\n"
|
|
387
|
+
(when history-block history-block)
|
|
388
|
+
"\nYour verdict MUST be on its own line, exactly one of:\n"
|
|
389
|
+
"VERDICT: APPROVED\n"
|
|
390
|
+
"VERDICT: NEEDS_CHANGES\n\n"
|
|
391
|
+
"Do NOT use REJECTED. Always use NEEDS_CHANGES with specific, "
|
|
392
|
+
"actionable feedback explaining what must change and why. "
|
|
393
|
+
"The worker will attempt fixes based on your feedback.\n"
|
|
394
|
+
"After your verdict line, list every issue as a numbered item with "
|
|
395
|
+
"the file path and what needs to change.\n")
|
|
396
|
+
review-prompt (str "[oompa:" swarm-id* ":" id "] " review-body)
|
|
397
|
+
res (try
|
|
398
|
+
(harness/run-command! harness {:cwd abs-wt :model model :prompt review-prompt})
|
|
399
|
+
(catch Exception e
|
|
400
|
+
{:exit -1 :out "" :err (.getMessage e)}))
|
|
401
|
+
parsed (harness/parse-output harness (:out res) nil)
|
|
402
|
+
output (or (:output parsed) "")
|
|
403
|
+
has-verdict? (or (re-find #"VERDICT:\s*APPROVED" output)
|
|
404
|
+
(re-find #"VERDICT:\s*NEEDS_CHANGES" output)
|
|
405
|
+
(re-find #"VERDICT:\s*REJECTED" output)
|
|
406
|
+
(re-find #"(?i)\bAPPROVED\b" output))]
|
|
407
|
+
(if (and (= (:exit res) 0) has-verdict?)
|
|
408
|
+
(reduced res)
|
|
409
|
+
(do
|
|
410
|
+
(println (format "[%s] Reviewer %s failed or returned no verdict, falling back..." id model))
|
|
411
|
+
res))))
|
|
412
|
+
{:exit -1 :out "" :err "No reviewers configured or no verdict returned"}
|
|
413
|
+
reviewers)
|
|
327
414
|
|
|
328
415
|
output (:out result)
|
|
329
416
|
|
|
330
|
-
;; Parse verdict
|
|
331
|
-
;; REJECTED is treated as NEEDS_CHANGES: the reviewer must always give
|
|
332
|
-
;; actionable feedback so the worker can attempt fixes. Hard rejection
|
|
333
|
-
;; only happens when max review rounds are exhausted.
|
|
417
|
+
;; Parse verdict
|
|
334
418
|
verdict (cond
|
|
335
419
|
(re-find #"VERDICT:\s*APPROVED" output) :approved
|
|
336
420
|
(re-find #"VERDICT:\s*NEEDS_CHANGES" output) :needs-changes
|
|
337
421
|
(re-find #"VERDICT:\s*REJECTED" output) :needs-changes
|
|
338
422
|
(re-find #"(?i)\bAPPROVED\b" output) :approved
|
|
339
|
-
:else :needs-changes)
|
|
423
|
+
:else :needs-changes)
|
|
424
|
+
duration-ms (- (System/currentTimeMillis) start-ms)]
|
|
340
425
|
|
|
341
426
|
(println (format "[%s] Reviewer verdict: %s" id (name verdict)))
|
|
342
427
|
(let [summary (subs output 0 (min 300 (count output)))]
|
|
@@ -346,14 +431,16 @@
|
|
|
346
431
|
{:verdict verdict
|
|
347
432
|
:comments (when (not= (:exit result) 0)
|
|
348
433
|
[(:err result)])
|
|
349
|
-
:output output
|
|
434
|
+
:output output
|
|
435
|
+
:duration-ms duration-ms}))
|
|
350
436
|
|
|
351
437
|
(defn- run-fix!
|
|
352
438
|
"Ask worker to fix issues based on reviewer feedback.
|
|
353
439
|
all-feedback: vector of all reviewer outputs so far (accumulated across rounds).
|
|
354
440
|
Returns {:output string, :exit int}"
|
|
355
441
|
[{:keys [id swarm-id harness model]} worktree-path all-feedback]
|
|
356
|
-
(let [
|
|
442
|
+
(let [start-ms (System/currentTimeMillis)
|
|
443
|
+
swarm-id* (or swarm-id "unknown")
|
|
357
444
|
feedback-text (if (> (count all-feedback) 1)
|
|
358
445
|
(str "The reviewer has given feedback across " (count all-feedback) " rounds.\n"
|
|
359
446
|
"Fix ALL outstanding issues:\n\n"
|
|
@@ -369,18 +456,17 @@
|
|
|
369
456
|
|
|
370
457
|
abs-wt (.getAbsolutePath (io/file worktree-path))
|
|
371
458
|
|
|
372
|
-
cmd (harness/build-cmd harness
|
|
373
|
-
{:cwd abs-wt :model model :prompt fix-prompt})
|
|
374
|
-
|
|
375
459
|
result (try
|
|
376
|
-
(
|
|
377
|
-
|
|
378
|
-
:out :string :err :string})
|
|
460
|
+
(harness/run-command! harness
|
|
461
|
+
{:cwd abs-wt :model model :prompt fix-prompt})
|
|
379
462
|
(catch Exception e
|
|
380
|
-
{:exit -1 :out "" :err (.getMessage e)}))
|
|
463
|
+
{:exit -1 :out "" :err (.getMessage e)}))
|
|
464
|
+
parsed (harness/parse-output harness (:out result) nil)
|
|
465
|
+
duration-ms (- (System/currentTimeMillis) start-ms)]
|
|
381
466
|
|
|
382
|
-
{:output (:
|
|
383
|
-
:exit (:exit result)
|
|
467
|
+
{:output (:output parsed)
|
|
468
|
+
:exit (:exit result)
|
|
469
|
+
:duration-ms duration-ms}))
|
|
384
470
|
|
|
385
471
|
(defn- collect-divergence-context
|
|
386
472
|
"Collect context about how a worktree branch has diverged from main.
|
|
@@ -394,6 +480,65 @@
|
|
|
394
480
|
:main-log (or main-log "(none)")
|
|
395
481
|
:diff-stat (or diff-stat "(none)")}))
|
|
396
482
|
|
|
483
|
+
(defn- first-nonblank-line
|
|
484
|
+
"Return first non-blank line from text for compact logging."
|
|
485
|
+
[s]
|
|
486
|
+
(some->> (or s "")
|
|
487
|
+
str/split-lines
|
|
488
|
+
(remove str/blank?)
|
|
489
|
+
first))
|
|
490
|
+
|
|
491
|
+
(defn- classify-merge-failure
|
|
492
|
+
"Classify git merge/checkout failure text for better logs."
|
|
493
|
+
[failure-text]
|
|
494
|
+
(cond
|
|
495
|
+
(re-find #"untracked working tree files would be overwritten by merge" (or failure-text ""))
|
|
496
|
+
:untracked-overwrite
|
|
497
|
+
(re-find #"CONFLICT|Merge conflict" (or failure-text ""))
|
|
498
|
+
:conflict
|
|
499
|
+
(re-find #"Your local changes to the following files would be overwritten" (or failure-text ""))
|
|
500
|
+
:local-changes-overwrite
|
|
501
|
+
:else
|
|
502
|
+
:unknown))
|
|
503
|
+
|
|
504
|
+
(defn- run-resolver-agent!
|
|
505
|
+
"Run resolver agent with divergence + failure context.
|
|
506
|
+
Returns :resolved when branch verifies as mergeable, else :failed."
|
|
507
|
+
[worker wt-path worker-id reason-details]
|
|
508
|
+
(println (format "[%s] Branch diverged from main, launching resolver agent%s"
|
|
509
|
+
worker-id
|
|
510
|
+
(if (str/blank? reason-details)
|
|
511
|
+
""
|
|
512
|
+
(str " (" reason-details ")"))))
|
|
513
|
+
(let [{:keys [branch-log main-log diff-stat]} (collect-divergence-context wt-path)
|
|
514
|
+
resolve-prompt (str "[oompa:" (or (:swarm-id worker) "unknown") ":" worker-id "] "
|
|
515
|
+
"Your branch cannot currently be merged safely into main.\n\n"
|
|
516
|
+
(when-not (str/blank? reason-details)
|
|
517
|
+
(str "Failure context from previous merge attempt:\n"
|
|
518
|
+
reason-details "\n\n"))
|
|
519
|
+
"Your branch's commits (not on main):\n" branch-log "\n\n"
|
|
520
|
+
"Commits on main since you branched:\n" main-log "\n\n"
|
|
521
|
+
"Divergence scope:\n" diff-stat "\n\n"
|
|
522
|
+
"Make this branch cleanly mergeable into main. "
|
|
523
|
+
"Preserve the intent of your branch's changes.\n"
|
|
524
|
+
"You have full git access — rebase, cherry-pick, resolve conflicts, "
|
|
525
|
+
"or clean up merge blockers.\n"
|
|
526
|
+
"When done, verify with: git diff main --stat")
|
|
527
|
+
abs-wt (.getAbsolutePath (io/file wt-path))
|
|
528
|
+
result (try
|
|
529
|
+
(harness/run-command! (:harness worker)
|
|
530
|
+
{:cwd abs-wt :model (:model worker) :prompt resolve-prompt})
|
|
531
|
+
(catch Exception e
|
|
532
|
+
{:exit -1 :out "" :err (.getMessage e)}))]
|
|
533
|
+
(if (zero? (:exit result))
|
|
534
|
+
(if (verify-mergeable? wt-path)
|
|
535
|
+
(do (println (format "[%s] Agent resolved divergence, branch is mergeable" worker-id))
|
|
536
|
+
:resolved)
|
|
537
|
+
(do (println (format "[%s] Agent ran but branch still can't merge cleanly" worker-id))
|
|
538
|
+
:failed))
|
|
539
|
+
(do (println (format "[%s] Resolver agent failed (exit %d)" worker-id (:exit result)))
|
|
540
|
+
:failed))))
|
|
541
|
+
|
|
397
542
|
(defn- verify-mergeable?
|
|
398
543
|
"Dry-run merge to verify a worktree branch merges cleanly into main.
|
|
399
544
|
Does NOT leave merge state behind — always cleans up the dry-run.
|
|
@@ -425,36 +570,10 @@
|
|
|
425
570
|
;; Conflict — abort merge to restore clean worktree state, then
|
|
426
571
|
;; hand the problem to the agent with full divergence context.
|
|
427
572
|
(let [_ (process/sh ["git" "merge" "--abort"] {:dir wt-path})
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
"Your branch's commits (not on main):\n" branch-log "\n\n"
|
|
433
|
-
"Commits on main since you branched:\n" main-log "\n\n"
|
|
434
|
-
"Divergence scope:\n" diff-stat "\n\n"
|
|
435
|
-
"Make this branch cleanly mergeable into main. "
|
|
436
|
-
"Preserve the intent of your branch's changes.\n"
|
|
437
|
-
"You have full git access — rebase, cherry-pick, resolve conflicts, "
|
|
438
|
-
"whatever works.\n"
|
|
439
|
-
"When done, verify with: git diff main --stat")
|
|
440
|
-
abs-wt (.getAbsolutePath (io/file wt-path))
|
|
441
|
-
cmd (harness/build-cmd (:harness worker)
|
|
442
|
-
{:cwd abs-wt :model (:model worker) :prompt resolve-prompt})
|
|
443
|
-
result (try
|
|
444
|
-
(process/sh cmd {:dir abs-wt
|
|
445
|
-
:in (harness/process-stdin (:harness worker) resolve-prompt)
|
|
446
|
-
:out :string :err :string})
|
|
447
|
-
(catch Exception e
|
|
448
|
-
{:exit -1 :out "" :err (.getMessage e)}))]
|
|
449
|
-
(if (zero? (:exit result))
|
|
450
|
-
;; Agent ran — verify the branch actually merges cleanly now
|
|
451
|
-
(if (verify-mergeable? wt-path)
|
|
452
|
-
(do (println (format "[%s] Agent resolved divergence, branch is mergeable" worker-id))
|
|
453
|
-
:resolved)
|
|
454
|
-
(do (println (format "[%s] Agent ran but branch still can't merge cleanly" worker-id))
|
|
455
|
-
:failed))
|
|
456
|
-
(do (println (format "[%s] Resolver agent failed (exit %d)" worker-id (:exit result)))
|
|
457
|
-
:failed))))))
|
|
573
|
+
failure-snippet (first-nonblank-line (str (:out merge-result) "\n" (:err merge-result)))]
|
|
574
|
+
(run-resolver-agent! worker wt-path worker-id
|
|
575
|
+
(str "sync_worktree_to_main failed"
|
|
576
|
+
(when failure-snippet (str ": " failure-snippet))))))))
|
|
458
577
|
|
|
459
578
|
(defn- worktree-has-changes?
|
|
460
579
|
"Check if worktree has committed OR uncommitted changes vs main.
|
|
@@ -474,9 +593,11 @@
|
|
|
474
593
|
(defn- create-iteration-worktree!
|
|
475
594
|
"Create a fresh worktree for an iteration. Returns {:dir :branch :path}.
|
|
476
595
|
Force-removes stale worktree+branch from previous failed runs first."
|
|
477
|
-
[project-root worker-id iteration]
|
|
478
|
-
(let [
|
|
479
|
-
|
|
596
|
+
[project-root swarm-id worker-id iteration]
|
|
597
|
+
(let [swarm-token (or swarm-id (subs (str (java.util.UUID/randomUUID)) 0 8))
|
|
598
|
+
work-id (format "s%s-%s-i%d" swarm-token worker-id iteration)
|
|
599
|
+
wt-dir (format ".w%s" work-id)
|
|
600
|
+
wt-branch (format "oompa/%s" work-id)
|
|
480
601
|
wt-path (str project-root "/" wt-dir)]
|
|
481
602
|
;; Clean stale worktree/branch from previous failed runs
|
|
482
603
|
(process/sh ["git" "worktree" "remove" wt-dir "--force"] {:dir project-root})
|
|
@@ -495,38 +616,201 @@
|
|
|
495
616
|
(let [post-ids (tasks/current-task-ids)]
|
|
496
617
|
(clojure.set/difference post-ids pre-current-ids)))
|
|
497
618
|
|
|
619
|
+
(defn- now-ms
|
|
620
|
+
[]
|
|
621
|
+
(System/currentTimeMillis))
|
|
622
|
+
|
|
623
|
+
(defn- ms->seconds
|
|
624
|
+
[ms]
|
|
625
|
+
(/ ms 1000.0))
|
|
626
|
+
|
|
627
|
+
(defn- pct-of
|
|
628
|
+
[part total]
|
|
629
|
+
(if (pos? total)
|
|
630
|
+
(* 100.0 (/ part (double total)))
|
|
631
|
+
0.0))
|
|
632
|
+
|
|
633
|
+
(defn- init-cycle-timing
|
|
634
|
+
[]
|
|
635
|
+
{:implementation-rounds-ms []
|
|
636
|
+
:reviewer-response-ms []
|
|
637
|
+
:review-fixes-ms []
|
|
638
|
+
:optional-review-ms []
|
|
639
|
+
:llm-calls []})
|
|
640
|
+
|
|
641
|
+
(defn- add-llm-call
|
|
642
|
+
[timing section-name call-name duration-ms]
|
|
643
|
+
(let [timing (or timing (init-cycle-timing))
|
|
644
|
+
duration-ms (max 0 (long (or duration-ms 0)))]
|
|
645
|
+
(-> timing
|
|
646
|
+
(update section-name (fnil conj []) duration-ms)
|
|
647
|
+
(update :llm-calls conj {:name call-name
|
|
648
|
+
:section section-name
|
|
649
|
+
:duration-ms duration-ms}))))
|
|
650
|
+
|
|
651
|
+
(defn- cycle-llm-total-ms
|
|
652
|
+
[timing]
|
|
653
|
+
(let [sections [:implementation-rounds-ms :reviewer-response-ms :review-fixes-ms :optional-review-ms]]
|
|
654
|
+
(->> sections
|
|
655
|
+
(map #(reduce + 0 (or (get timing %) [])))
|
|
656
|
+
(reduce + 0))))
|
|
657
|
+
|
|
658
|
+
(defn- with-call-percent
|
|
659
|
+
[timing total-ms]
|
|
660
|
+
(update timing :llm-calls
|
|
661
|
+
(fn [calls]
|
|
662
|
+
(mapv (fn [{:keys [duration-ms] :as call}]
|
|
663
|
+
(assoc call :percent (pct-of duration-ms total-ms)))
|
|
664
|
+
calls))))
|
|
665
|
+
|
|
666
|
+
(defn- format-timing-segment
|
|
667
|
+
[label durations total-ms]
|
|
668
|
+
(let [durations (vec (or durations []))
|
|
669
|
+
items (if (seq durations)
|
|
670
|
+
(str/join ", "
|
|
671
|
+
(map #(format "%.2fs (%.1f%%)"
|
|
672
|
+
(ms->seconds %) (pct-of % total-ms))
|
|
673
|
+
durations))
|
|
674
|
+
"-")
|
|
675
|
+
section-ms (reduce + 0 durations)]
|
|
676
|
+
(format "%s=[%s] %.2fs (%.1f%%)"
|
|
677
|
+
label
|
|
678
|
+
items
|
|
679
|
+
(ms->seconds section-ms)
|
|
680
|
+
(pct-of section-ms total-ms))))
|
|
681
|
+
|
|
682
|
+
(defn- format-cycle-timing
|
|
683
|
+
[{:keys [implementation-rounds-ms reviewer-response-ms review-fixes-ms optional-review-ms]}
|
|
684
|
+
total-ms]
|
|
685
|
+
(let [llm-ms (cycle-llm-total-ms {:implementation-rounds-ms implementation-rounds-ms
|
|
686
|
+
:reviewer-response-ms reviewer-response-ms
|
|
687
|
+
:review-fixes-ms review-fixes-ms
|
|
688
|
+
:optional-review-ms optional-review-ms})
|
|
689
|
+
harness-ms (max 0 (- total-ms llm-ms))]
|
|
690
|
+
(str "timing: "
|
|
691
|
+
(format-timing-segment "Implementation" implementation-rounds-ms total-ms)
|
|
692
|
+
" | "
|
|
693
|
+
(format-timing-segment "Reviewer" reviewer-response-ms total-ms)
|
|
694
|
+
" | "
|
|
695
|
+
(format-timing-segment "Fixes" review-fixes-ms total-ms)
|
|
696
|
+
" | "
|
|
697
|
+
(format-timing-segment "OptionalReview" optional-review-ms total-ms)
|
|
698
|
+
" | LLM="
|
|
699
|
+
(format "%.2fs (%.1f%%)" (ms->seconds llm-ms) (pct-of llm-ms total-ms))
|
|
700
|
+
" | Harness="
|
|
701
|
+
(format "%.2fs (%.1f%%)" (ms->seconds harness-ms) (pct-of harness-ms total-ms))
|
|
702
|
+
" | Total="
|
|
703
|
+
(format "%.2fs" (ms->seconds total-ms)))))
|
|
704
|
+
|
|
705
|
+
(defn- safe-number
|
|
706
|
+
[v]
|
|
707
|
+
(if (number? v) (long v) 0))
|
|
708
|
+
|
|
709
|
+
(defn- safe-sum
|
|
710
|
+
[v]
|
|
711
|
+
(reduce + 0 (or v [])))
|
|
712
|
+
|
|
713
|
+
(defn- format-ms
|
|
714
|
+
[ms]
|
|
715
|
+
(format "%.2fs" (ms->seconds (safe-number ms))))
|
|
716
|
+
|
|
717
|
+
(defn- cycle-time-sum
|
|
718
|
+
[{:keys [implementation-rounds-ms reviewer-response-ms review-fixes-ms optional-review-ms] :as timing-ms}
|
|
719
|
+
duration-ms]
|
|
720
|
+
(let [impl (safe-sum implementation-rounds-ms)
|
|
721
|
+
review (safe-sum reviewer-response-ms)
|
|
722
|
+
fixes (safe-sum review-fixes-ms)
|
|
723
|
+
optional (safe-sum optional-review-ms)
|
|
724
|
+
total (safe-number duration-ms)
|
|
725
|
+
llm (+ impl review fixes optional)
|
|
726
|
+
harness (max 0 (- total llm))]
|
|
727
|
+
{:implementation-ms impl
|
|
728
|
+
:review-ms review
|
|
729
|
+
:fixes-ms fixes
|
|
730
|
+
:optional-review-ms optional
|
|
731
|
+
:llm-ms llm
|
|
732
|
+
:harness-ms harness
|
|
733
|
+
:total-ms total}))
|
|
734
|
+
|
|
735
|
+
(def ^:private empty-cycle-total
|
|
736
|
+
{:implementation-ms 0
|
|
737
|
+
:review-ms 0
|
|
738
|
+
:fixes-ms 0
|
|
739
|
+
:optional-review-ms 0
|
|
740
|
+
:llm-ms 0
|
|
741
|
+
:harness-ms 0
|
|
742
|
+
:total-ms 0})
|
|
743
|
+
|
|
744
|
+
(defn- aggregate-cycle-timings-by-worker
|
|
745
|
+
[swarm-id]
|
|
746
|
+
(reduce (fn [acc {:keys [worker-id timing-ms duration-ms]}]
|
|
747
|
+
(update acc worker-id
|
|
748
|
+
(fn [current]
|
|
749
|
+
(merge-with + (or current empty-cycle-total)
|
|
750
|
+
(cycle-time-sum timing-ms duration-ms)))))
|
|
751
|
+
{}
|
|
752
|
+
(or (when swarm-id (runs/list-cycles swarm-id)) [])))
|
|
753
|
+
|
|
754
|
+
(defn- worker-summary-row
|
|
755
|
+
[{:keys [id status completed cycles-completed merges claims rejections errors recycled review-rounds-total] :as _worker}
|
|
756
|
+
{:keys [implementation-ms review-ms fixes-ms harness-ms total-ms]}]
|
|
757
|
+
{:Worker id
|
|
758
|
+
:Runs (or completed cycles-completed 0)
|
|
759
|
+
:Cycles (or cycles-completed 0)
|
|
760
|
+
:Status (name status)
|
|
761
|
+
:Merges (or merges 0)
|
|
762
|
+
:Claims (or claims 0)
|
|
763
|
+
:Rejects (or rejections 0)
|
|
764
|
+
:Errors (or errors 0)
|
|
765
|
+
:Recycled (or recycled 0)
|
|
766
|
+
:ReviewRounds (or review-rounds-total 0)
|
|
767
|
+
:ImplMs (format-ms implementation-ms)
|
|
768
|
+
:ReviewMs (format-ms review-ms)
|
|
769
|
+
:FixMs (format-ms fixes-ms)
|
|
770
|
+
:HarnessMs (format-ms harness-ms)
|
|
771
|
+
:TotalMs (format-ms total-ms)})
|
|
772
|
+
|
|
498
773
|
(defn- emit-cycle-log!
|
|
499
|
-
"Write cycle event log. Called at every cycle exit point.
|
|
774
|
+
"Write cycle event log. Called at every cycle attempt exit point.
|
|
500
775
|
session-id links to the Claude CLI conversation transcript on disk.
|
|
501
776
|
No mutable summary state — all state is derived from immutable cycle logs."
|
|
502
|
-
[swarm-id worker-id cycle start-ms session-id
|
|
503
|
-
{:keys [outcome claimed-task-ids recycled-tasks error-snippet review-rounds
|
|
504
|
-
|
|
777
|
+
[swarm-id worker-id cycle attempt run start-ms session-id
|
|
778
|
+
{:keys [outcome claimed-task-ids recycled-tasks error-snippet review-rounds timing-ms
|
|
779
|
+
worktree-path signals]}]
|
|
780
|
+
(let [duration-ms (- (now-ms) start-ms)
|
|
781
|
+
timing-ms (or timing-ms (init-cycle-timing))
|
|
782
|
+
harness-ms (max 0 (- duration-ms (cycle-llm-total-ms timing-ms)))
|
|
783
|
+
timing-ms (with-call-percent (assoc timing-ms
|
|
784
|
+
:harness-ms harness-ms
|
|
785
|
+
:llm-calls (or (:llm-calls timing-ms) []))
|
|
786
|
+
duration-ms)]
|
|
505
787
|
(runs/write-cycle-log!
|
|
506
788
|
swarm-id worker-id cycle
|
|
507
|
-
{:
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
789
|
+
(cond-> {:run run
|
|
790
|
+
:attempt attempt
|
|
791
|
+
:outcome outcome
|
|
792
|
+
:duration-ms duration-ms
|
|
793
|
+
:claimed-task-ids (vec (or claimed-task-ids []))
|
|
794
|
+
:recycled-tasks (or recycled-tasks [])
|
|
795
|
+
:error-snippet error-snippet
|
|
796
|
+
:review-rounds (or review-rounds 0)
|
|
797
|
+
:session-id session-id
|
|
798
|
+
:timing-ms timing-ms}
|
|
799
|
+
worktree-path (assoc :worktree-path worktree-path)
|
|
800
|
+
(seq signals) (assoc :signals (vec signals))))
|
|
801
|
+
(let [terminal-outcomes #{:merged :merge-failed :rejected :sync-failed :no-changes
|
|
802
|
+
:executor-done :stuck :error :interrupted :needs-followup}]
|
|
803
|
+
(if (and outcome (contains? terminal-outcomes outcome))
|
|
804
|
+
(do
|
|
805
|
+
(println (format "[%s] %s" worker-id (format-cycle-timing timing-ms duration-ms)))
|
|
806
|
+
(when worktree-path
|
|
807
|
+
(println (format "[%s] worktree: %s" worker-id worktree-path)))
|
|
808
|
+
(when (seq signals)
|
|
809
|
+
(println (format "[%s] signals: %s" worker-id (str/join " → " signals)))))
|
|
810
|
+
(println (format "[%s] Cycle %d attempt %d continuing"
|
|
811
|
+
worker-id cycle attempt))))))
|
|
812
|
+
|
|
813
|
+
|
|
530
814
|
|
|
531
815
|
(defn- cleanup-worktree!
|
|
532
816
|
"Remove worktree and branch."
|
|
@@ -567,7 +851,8 @@
|
|
|
567
851
|
(defn- merge-to-main!
|
|
568
852
|
"Merge worktree changes to main branch. Serialized via merge-lock to prevent
|
|
569
853
|
concurrent workers from corrupting the git index. On success, moves claimed
|
|
570
|
-
tasks current→complete and annotates metadata. Returns
|
|
854
|
+
tasks current→complete and annotates metadata. Returns
|
|
855
|
+
{:ok? bool :reason keyword :message string}.
|
|
571
856
|
claimed-task-ids: set of task IDs this worker claimed (framework owns completion)."
|
|
572
857
|
[wt-path wt-id worker-id project-root review-rounds claimed-task-ids]
|
|
573
858
|
(locking merge-lock
|
|
@@ -586,31 +871,66 @@
|
|
|
586
871
|
(process/sh ["git" "merge" wt-id "--no-edit"]
|
|
587
872
|
{:dir project-root :out :string :err :string}))
|
|
588
873
|
success (and (zero? (:exit checkout-result))
|
|
589
|
-
(zero? (:exit merge-result)))
|
|
874
|
+
(zero? (:exit merge-result)))
|
|
875
|
+
failure-text (str/join "\n"
|
|
876
|
+
(remove str/blank?
|
|
877
|
+
[(:out checkout-result)
|
|
878
|
+
(:err checkout-result)
|
|
879
|
+
(when merge-result (:out merge-result))
|
|
880
|
+
(when merge-result (:err merge-result))]))
|
|
881
|
+
failure-reason (if (not (zero? (:exit checkout-result)))
|
|
882
|
+
:checkout-failed
|
|
883
|
+
(classify-merge-failure failure-text))]
|
|
590
884
|
(if success
|
|
591
|
-
(
|
|
885
|
+
(let [completed (when (seq claimed-task-ids)
|
|
886
|
+
(tasks/complete-by-ids! claimed-task-ids))
|
|
887
|
+
completed-count (count (or completed []))]
|
|
592
888
|
(println (format "[%s] Merge successful" worker-id))
|
|
593
889
|
;; Framework-owned completion: move claimed tasks current→complete
|
|
594
|
-
(when (seq
|
|
595
|
-
(
|
|
596
|
-
|
|
597
|
-
(println (format "[%s] Completed %d task(s): %s"
|
|
598
|
-
worker-id (count completed) (str/join ", " completed))))))
|
|
890
|
+
(when (seq completed)
|
|
891
|
+
(println (format "[%s] Completed %d task(s): %s"
|
|
892
|
+
worker-id completed-count (str/join ", " completed))))
|
|
599
893
|
;; Annotate completed tasks with metadata while still holding merge-lock
|
|
600
|
-
(annotate-completed-tasks! project-root worker-id review-rounds)
|
|
894
|
+
(annotate-completed-tasks! project-root worker-id review-rounds)
|
|
895
|
+
{:ok? true
|
|
896
|
+
:reason :merged
|
|
897
|
+
:message "merge successful"
|
|
898
|
+
:completed-count completed-count})
|
|
601
899
|
;; FAILED: Clean up git state before releasing merge-lock.
|
|
602
900
|
;; Without this, a conflict leaves .git/MERGE_HEAD and poisons the
|
|
603
901
|
;; shared index — every subsequent worker fails on `git checkout main`.
|
|
604
902
|
(do
|
|
605
|
-
(println (format "[%s] MERGE FAILED: %s"
|
|
606
|
-
|
|
903
|
+
(println (format "[%s] MERGE FAILED (%s): %s"
|
|
904
|
+
worker-id
|
|
905
|
+
(name failure-reason)
|
|
906
|
+
(or (first-nonblank-line failure-text)
|
|
907
|
+
"no output")))
|
|
607
908
|
(let [abort-result (process/sh ["git" "merge" "--abort"]
|
|
608
909
|
{:dir project-root :out :string :err :string})]
|
|
609
910
|
(when-not (zero? (:exit abort-result))
|
|
610
911
|
;; Abort failed (no merge in progress, or other issue) — hard reset.
|
|
611
912
|
(process/sh ["git" "reset" "--hard" "HEAD"]
|
|
612
|
-
{:dir project-root :out :string :err :string})))
|
|
613
|
-
|
|
913
|
+
{:dir project-root :out :string :err :string})))
|
|
914
|
+
{:ok? false
|
|
915
|
+
:reason failure-reason
|
|
916
|
+
:message (or (first-nonblank-line failure-text) "merge failed")})))))
|
|
917
|
+
|
|
918
|
+
(defn- recover-merge-failure!
|
|
919
|
+
"On merge-to-main failure, launch resolver agent and retry merge once.
|
|
920
|
+
Must run outside merge-lock to avoid blocking other workers."
|
|
921
|
+
[worker wt-path wt-id worker-id project-root review-rounds claimed-task-ids merge-result]
|
|
922
|
+
(let [reason (:reason merge-result)
|
|
923
|
+
msg (:message merge-result)
|
|
924
|
+
_ (println (format "[%s] Launching resolver after merge failure (%s): %s"
|
|
925
|
+
worker-id (name (or reason :unknown)) (or msg "merge failed")))
|
|
926
|
+
resolve-status (run-resolver-agent! worker wt-path worker-id
|
|
927
|
+
(str "merge_to_main failed (" (name (or reason :unknown)) ")"
|
|
928
|
+
(when msg (str ": " msg))))]
|
|
929
|
+
(if (= :failed resolve-status)
|
|
930
|
+
merge-result
|
|
931
|
+
(do
|
|
932
|
+
(println (format "[%s] Retrying merge after resolver" worker-id))
|
|
933
|
+
(merge-to-main! wt-path wt-id worker-id project-root review-rounds claimed-task-ids)))))
|
|
614
934
|
|
|
615
935
|
(defn- task-only-diff?
|
|
616
936
|
"Check if all changes in worktree are task files only (no code changes).
|
|
@@ -640,16 +960,21 @@
|
|
|
640
960
|
and fixer has full context of all prior feedback.
|
|
641
961
|
Writes review logs to runs/{swarm-id}/reviews/ for post-mortem analysis.
|
|
642
962
|
Returns {:approved? bool, :attempts int}"
|
|
643
|
-
[worker wt-path worker-id iteration]
|
|
644
|
-
(if
|
|
963
|
+
[worker wt-path worker-id iteration & [cycle-timing]]
|
|
964
|
+
(if (empty? (:reviewers worker))
|
|
645
965
|
;; No reviewer configured, auto-approve
|
|
646
|
-
{:approved? true :attempts 0}
|
|
966
|
+
{:approved? true :attempts 0 :timing (or cycle-timing (init-cycle-timing))}
|
|
647
967
|
|
|
648
968
|
;; Run review loop with accumulated feedback
|
|
649
969
|
(loop [attempt 1
|
|
650
|
-
prev-feedback []
|
|
970
|
+
prev-feedback []
|
|
971
|
+
timing (or cycle-timing (init-cycle-timing))]
|
|
651
972
|
(println (format "[%s] Review attempt %d/%d" worker-id attempt max-review-retries))
|
|
652
|
-
(let [{:keys [verdict output]} (run-reviewer! worker wt-path prev-feedback)
|
|
973
|
+
(let [{:keys [verdict output duration-ms]} (run-reviewer! worker wt-path prev-feedback)
|
|
974
|
+
timing (add-llm-call timing
|
|
975
|
+
:reviewer-response-ms
|
|
976
|
+
(str "review_" attempt)
|
|
977
|
+
(or duration-ms 0))
|
|
653
978
|
diff-files (diff-file-names wt-path)]
|
|
654
979
|
|
|
655
980
|
;; Persist review log for this round
|
|
@@ -657,13 +982,14 @@
|
|
|
657
982
|
(runs/write-review-log! (:swarm-id worker) worker-id iteration attempt
|
|
658
983
|
{:verdict verdict
|
|
659
984
|
:output output
|
|
985
|
+
:duration-ms (or duration-ms 0)
|
|
660
986
|
:diff-files (or diff-files [])}))
|
|
661
987
|
|
|
662
988
|
(case verdict
|
|
663
989
|
:approved
|
|
664
990
|
(do
|
|
665
991
|
(println (format "[%s] Reviewer APPROVED (attempt %d)" worker-id attempt))
|
|
666
|
-
{:approved? true :attempts attempt})
|
|
992
|
+
{:approved? true :attempts attempt :timing timing})
|
|
667
993
|
|
|
668
994
|
;; :needs-changes — always give the worker a chance to fix.
|
|
669
995
|
;; Hard rejection only happens when max review rounds are exhausted.
|
|
@@ -671,37 +997,49 @@
|
|
|
671
997
|
(if (>= attempt max-review-retries)
|
|
672
998
|
(do
|
|
673
999
|
(println (format "[%s] Max review retries reached (%d rounds)" worker-id attempt))
|
|
674
|
-
{:approved? false :attempts attempt})
|
|
1000
|
+
{:approved? false :attempts attempt :timing timing})
|
|
675
1001
|
(do
|
|
676
1002
|
(println (format "[%s] Reviewer requested changes, fixing..." worker-id))
|
|
677
|
-
(run-fix! worker wt-path all-feedback)
|
|
678
|
-
|
|
1003
|
+
(let [{:keys [duration-ms]} (run-fix! worker wt-path all-feedback)
|
|
1004
|
+
timing (add-llm-call timing
|
|
1005
|
+
:review-fixes-ms
|
|
1006
|
+
(str "fix_" attempt)
|
|
1007
|
+
(or duration-ms 0))]
|
|
1008
|
+
(recur (inc attempt) all-feedback timing))))))))))
|
|
679
1009
|
|
|
680
1010
|
;; =============================================================================
|
|
681
1011
|
;; Worker Loop
|
|
682
1012
|
;; =============================================================================
|
|
683
1013
|
|
|
684
|
-
;; Workers wait
|
|
1014
|
+
;; Workers can wait for tasks before giving up; default is 10 minutes.
|
|
685
1015
|
;; This keeps workers alive while planners/designers ramp up the queue.
|
|
686
|
-
(def ^:private max-wait-for-tasks 600)
|
|
687
1016
|
(def ^:private wait-poll-interval 10)
|
|
688
|
-
(def ^:private max-consecutive-errors
|
|
1017
|
+
(def ^:private max-consecutive-errors 5)
|
|
1018
|
+
|
|
1019
|
+
(defn- backoff-sleep! [id errors]
|
|
1020
|
+
(when (< errors max-consecutive-errors)
|
|
1021
|
+
(let [wait-sec (* 60 (int (Math/pow 2 (dec errors))))]
|
|
1022
|
+
(println (format "[%s] Backing off for %d seconds before next retry (%d/%d)..." id wait-sec errors (dec max-consecutive-errors)))
|
|
1023
|
+
(Thread/sleep (* 1000 wait-sec)))))
|
|
1024
|
+
|
|
689
1025
|
|
|
690
1026
|
(defn- wait-for-tasks!
|
|
691
|
-
"Wait up to
|
|
692
|
-
backpressure on workers that can't create their own tasks (can_plan: false).
|
|
1027
|
+
"Wait up to max-wait-seconds for pending/current tasks to appear.
|
|
1028
|
+
Used for backpressure on workers that can't create their own tasks (can_plan: false).
|
|
693
1029
|
Polls every 10 seconds, logs every 60 seconds."
|
|
694
|
-
[worker-id]
|
|
1030
|
+
[worker-id max-wait-seconds]
|
|
695
1031
|
(loop [waited 0]
|
|
696
1032
|
(cond
|
|
697
1033
|
(pos? (tasks/pending-count)) true
|
|
698
1034
|
(pos? (tasks/current-count)) true
|
|
699
|
-
(>= waited max-wait-
|
|
700
|
-
(do (println (format "[%s] No tasks after %ds, giving up"
|
|
1035
|
+
(>= waited max-wait-seconds)
|
|
1036
|
+
(do (println (format "[%s] [%s] No tasks after %ds, giving up"
|
|
1037
|
+
worker-id (log-ts) waited))
|
|
701
1038
|
false)
|
|
702
1039
|
:else
|
|
703
1040
|
(do (when (zero? (mod waited 60))
|
|
704
|
-
(println (format "[%s] Waiting for tasks... (%ds/%ds)"
|
|
1041
|
+
(println (format "[%s] [%s] Waiting for tasks... (%ds/%ds)"
|
|
1042
|
+
worker-id (log-ts) waited max-wait-seconds)))
|
|
705
1043
|
(Thread/sleep (* wait-poll-interval 1000))
|
|
706
1044
|
(recur (+ waited wait-poll-interval))))))
|
|
707
1045
|
|
|
@@ -716,267 +1054,374 @@
|
|
|
716
1054
|
(defn run-worker!
|
|
717
1055
|
"Run worker loop with persistent sessions.
|
|
718
1056
|
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
Tracks per-worker metrics: merges, rejections, errors, review-rounds-total.
|
|
724
|
-
Returns final worker state with metrics attached."
|
|
1057
|
+
A run is a terminal outcome (merged/rejected/error-like).
|
|
1058
|
+
A cycle is one worker turn/resume. Multiple cycles may occur in one run.
|
|
1059
|
+
Cycle cap is controlled by :max-cycles (legacy key: :iterations)."
|
|
725
1060
|
[worker]
|
|
726
1061
|
(tasks/ensure-dirs!)
|
|
727
|
-
(let [{:keys [id iterations swarm-id wait-between
|
|
1062
|
+
(let [{:keys [id runs max-cycles iterations swarm-id wait-between
|
|
1063
|
+
max-wait-for-tasks max-needs-followups]} worker
|
|
1064
|
+
cycle-cap (or max-cycles iterations 10)
|
|
1065
|
+
run-goal (or runs iterations 10)
|
|
728
1066
|
project-root (System/getProperty "user.dir")]
|
|
729
|
-
(println (format "[%s] Starting worker (%s:%s%s,
|
|
1067
|
+
(println (format "[%s] Starting worker (%s:%s%s, goal=%d runs, cap=%d cycles%s)"
|
|
730
1068
|
id
|
|
731
1069
|
(name (:harness worker))
|
|
732
1070
|
(or (:model worker) "default")
|
|
733
1071
|
(if (:reasoning worker) (str ":" (:reasoning worker)) "")
|
|
734
|
-
|
|
1072
|
+
run-goal
|
|
1073
|
+
cycle-cap
|
|
735
1074
|
(if wait-between (format ", %ds between" wait-between) "")))
|
|
736
1075
|
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
1076
|
+
(when (and (not (:can-plan worker))
|
|
1077
|
+
(not (pos? (tasks/pending-count)))
|
|
1078
|
+
(not (pos? (tasks/current-count))))
|
|
1079
|
+
(wait-for-tasks! id max-wait-for-tasks))
|
|
740
1080
|
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
completed 0
|
|
1081
|
+
(loop [cycle 1
|
|
1082
|
+
attempt 1
|
|
1083
|
+
completed-runs 0
|
|
744
1084
|
consec-errors 0
|
|
745
1085
|
metrics {:merges 0 :rejections 0 :errors 0 :recycled 0 :review-rounds-total 0 :claims 0}
|
|
746
|
-
session-id nil
|
|
747
|
-
wt-state nil
|
|
748
|
-
claimed-ids #{}
|
|
749
|
-
claim-resume-prompt nil
|
|
750
|
-
working-resumes 0
|
|
1086
|
+
session-id nil
|
|
1087
|
+
wt-state nil
|
|
1088
|
+
claimed-ids #{}
|
|
1089
|
+
claim-resume-prompt nil
|
|
1090
|
+
working-resumes 0
|
|
1091
|
+
needs-followups 0
|
|
1092
|
+
signals []]
|
|
751
1093
|
(let [finish (fn [status]
|
|
752
|
-
(assoc worker :completed completed
|
|
1094
|
+
(assoc worker :completed completed-runs
|
|
1095
|
+
:runs-completed completed-runs
|
|
1096
|
+
:cycles-completed (dec cycle)
|
|
1097
|
+
:status status
|
|
753
1098
|
:merges (:merges metrics)
|
|
754
1099
|
:rejections (:rejections metrics)
|
|
755
1100
|
:errors (:errors metrics)
|
|
756
1101
|
:recycled (:recycled metrics)
|
|
757
1102
|
:review-rounds-total (:review-rounds-total metrics)
|
|
758
|
-
:claims (:claims metrics)))
|
|
1103
|
+
:claims (:claims metrics)))
|
|
1104
|
+
current-run (inc completed-runs)]
|
|
759
1105
|
(cond
|
|
760
|
-
(>
|
|
1106
|
+
(> cycle cycle-cap)
|
|
761
1107
|
(do
|
|
762
|
-
;; Cleanup any lingering worktree
|
|
763
1108
|
(when wt-state
|
|
1109
|
+
(when (seq claimed-ids)
|
|
1110
|
+
(recycle-task-id-set! id claimed-ids))
|
|
764
1111
|
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state)))
|
|
765
|
-
(println (format "[%s] Completed %d
|
|
766
|
-
id completed
|
|
1112
|
+
(println (format "[%s] Completed %d/%d runs in %d cycles (%d merges, %d claims, %d rejections, %d errors, %d recycled)"
|
|
1113
|
+
id completed-runs run-goal (dec cycle)
|
|
1114
|
+
(:merges metrics) (:claims metrics) (:rejections metrics) (:errors metrics) (:recycled metrics)))
|
|
767
1115
|
(finish :exhausted))
|
|
768
1116
|
|
|
1117
|
+
(>= completed-runs run-goal)
|
|
1118
|
+
(do
|
|
1119
|
+
(when wt-state
|
|
1120
|
+
(when (seq claimed-ids)
|
|
1121
|
+
(recycle-task-id-set! id claimed-ids))
|
|
1122
|
+
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state)))
|
|
1123
|
+
(println (format "[%s] Reached run goal: %d/%d runs in %d cycles"
|
|
1124
|
+
id completed-runs run-goal (dec cycle)))
|
|
1125
|
+
(finish :completed))
|
|
1126
|
+
|
|
769
1127
|
@shutdown-requested?
|
|
770
1128
|
(do
|
|
771
|
-
(println (format "[%s] Shutdown requested, stopping after %d
|
|
1129
|
+
(println (format "[%s] Shutdown requested, stopping after %d cycles" id (dec cycle)))
|
|
772
1130
|
(when wt-state
|
|
773
|
-
;; Recycle any claimed tasks back to pending so other workers can pick them up
|
|
774
1131
|
(when (seq claimed-ids)
|
|
775
1132
|
(let [recycled (tasks/recycle-tasks! claimed-ids)]
|
|
776
1133
|
(when (seq recycled)
|
|
777
1134
|
(println (format "[%s] Recycled %d claimed task(s) on shutdown" id (count recycled))))))
|
|
778
1135
|
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state)))
|
|
779
|
-
(emit-cycle-log! swarm-id id
|
|
780
|
-
|
|
1136
|
+
(emit-cycle-log! swarm-id id cycle attempt current-run (now-ms) session-id
|
|
1137
|
+
{:timing-ms (init-cycle-timing)
|
|
1138
|
+
:outcome :interrupted})
|
|
781
1139
|
(finish :interrupted))
|
|
782
1140
|
|
|
783
1141
|
:else
|
|
784
1142
|
(do
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
;; Code changes — full review loop
|
|
887
|
-
(let [{:keys [approved? attempts]} (review-loop! worker (:path wt-state) id iter)
|
|
888
|
-
;; Don't pre-increment :merges — defer to after actual merge succeeds
|
|
889
|
-
metrics (-> metrics
|
|
890
|
-
(update :review-rounds-total + (or attempts 0))
|
|
891
|
-
(cond-> (not approved?) (update :rejections inc)))]
|
|
892
|
-
(if approved?
|
|
893
|
-
(let [sync-status (sync-worktree-to-main! worker (:path wt-state) id)
|
|
894
|
-
all-claimed (into claimed-ids mv-claimed-tasks)]
|
|
1143
|
+
(maybe-sleep-between! id wait-between cycle)
|
|
1144
|
+
|
|
1145
|
+
(when (and (not (:can-plan worker))
|
|
1146
|
+
(not (pos? (tasks/pending-count)))
|
|
1147
|
+
(not (pos? (tasks/current-count))))
|
|
1148
|
+
(println (format "[%s] Queue empty, waiting for tasks before cycle %d" id cycle))
|
|
1149
|
+
(wait-for-tasks! id max-wait-for-tasks))
|
|
1150
|
+
|
|
1151
|
+
(let [wt-state (try
|
|
1152
|
+
(or wt-state (create-iteration-worktree! project-root swarm-id id cycle))
|
|
1153
|
+
(catch Exception e
|
|
1154
|
+
(println (format "[%s] Worktree creation failed: %s" id (.getMessage e)))
|
|
1155
|
+
nil))]
|
|
1156
|
+
(if (nil? wt-state)
|
|
1157
|
+
(let [errors (inc consec-errors)
|
|
1158
|
+
metrics (update metrics :errors inc)]
|
|
1159
|
+
(if (>= errors max-consecutive-errors)
|
|
1160
|
+
(do
|
|
1161
|
+
(println (format "[%s] %d consecutive errors, stopping" id errors))
|
|
1162
|
+
(finish :error))
|
|
1163
|
+
(do (backoff-sleep! id errors)
|
|
1164
|
+
(recur (inc cycle) 1 completed-runs errors metrics nil nil #{} nil 0 0 []))))
|
|
1165
|
+
|
|
1166
|
+
(let [resume? (or (some? session-id) (some? claim-resume-prompt))
|
|
1167
|
+
cycle-start-ms (now-ms)
|
|
1168
|
+
cycle-timing (init-cycle-timing)
|
|
1169
|
+
pre-current-ids (tasks/current-task-ids)
|
|
1170
|
+
_ (println (format "[%s] %s cycle %d/%d (run %d/%d, attempt %d)"
|
|
1171
|
+
id
|
|
1172
|
+
(if (= attempt 1) "Starting" "Resuming")
|
|
1173
|
+
cycle cycle-cap current-run run-goal attempt))
|
|
1174
|
+
context (build-context)
|
|
1175
|
+
agent-start-ms (now-ms)
|
|
1176
|
+
{:keys [output exit done? merge? needs-followup? claim-ids parse-warning raw-snippet] :as agent-result}
|
|
1177
|
+
(run-agent! worker (:path wt-state) context session-id resume?
|
|
1178
|
+
:resume-prompt-override claim-resume-prompt)
|
|
1179
|
+
cycle-timing (add-llm-call cycle-timing
|
|
1180
|
+
:implementation-rounds-ms
|
|
1181
|
+
"implementation"
|
|
1182
|
+
(- (now-ms) agent-start-ms))
|
|
1183
|
+
new-session-id (:session-id agent-result)
|
|
1184
|
+
stderr-snippet (:stderr-snippet agent-result)
|
|
1185
|
+
mv-claimed-tasks (detect-claimed-tasks pre-current-ids)
|
|
1186
|
+
active-claimed-ids (active-claimed-task-ids claimed-ids mv-claimed-tasks)
|
|
1187
|
+
wt-path (:path wt-state)
|
|
1188
|
+
;; Classify the signal for this attempt
|
|
1189
|
+
signal-label (cond
|
|
1190
|
+
(not (zero? exit)) (str "error:exit-" exit)
|
|
1191
|
+
(and (seq claim-ids) (not merge?) (not done?))
|
|
1192
|
+
(str "claim:" (str/join "," claim-ids))
|
|
1193
|
+
merge? "merge"
|
|
1194
|
+
done? "done"
|
|
1195
|
+
needs-followup? "needs-followup"
|
|
1196
|
+
:else "working")
|
|
1197
|
+
signals (conj signals signal-label)
|
|
1198
|
+
emit! (fn [opts]
|
|
1199
|
+
(emit-cycle-log! swarm-id id cycle attempt current-run cycle-start-ms new-session-id
|
|
1200
|
+
(merge {:worktree-path wt-path :signals signals} opts)))]
|
|
1201
|
+
(cond
|
|
1202
|
+
(not (zero? exit))
|
|
1203
|
+
(let [errors (inc consec-errors)
|
|
1204
|
+
recycled (recycle-active-claims! id claimed-ids mv-claimed-tasks)
|
|
1205
|
+
metrics (-> metrics (update :errors inc) (update :recycled + (count recycled)))
|
|
1206
|
+
error-msg (subs (or output "") 0 (min 200 (count (or output ""))))]
|
|
1207
|
+
(println (format "[%s] Agent error (exit %d): %s" id exit error-msg))
|
|
1208
|
+
(when (seq stderr-snippet)
|
|
1209
|
+
(println (format "[%s] Agent stderr snippet: %s"
|
|
1210
|
+
id
|
|
1211
|
+
(snippet (str/replace stderr-snippet #"\s+" " ") 240))))
|
|
1212
|
+
(emit!
|
|
1213
|
+
{:timing-ms cycle-timing
|
|
1214
|
+
:outcome :error
|
|
1215
|
+
:claimed-task-ids (vec active-claimed-ids)
|
|
1216
|
+
:recycled-tasks (seq recycled)
|
|
1217
|
+
:error-snippet error-msg})
|
|
1218
|
+
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
1219
|
+
(if (>= errors max-consecutive-errors)
|
|
1220
|
+
(do
|
|
1221
|
+
(println (format "[%s] %d consecutive errors, stopping" id errors))
|
|
1222
|
+
(finish :error))
|
|
1223
|
+
(do (backoff-sleep! id errors)
|
|
1224
|
+
(recur (inc cycle) 1 (inc completed-runs) errors metrics nil nil #{} nil 0 0 []))))
|
|
1225
|
+
|
|
1226
|
+
(and (seq claim-ids) (not merge?) (not done?))
|
|
1227
|
+
(let [_ (println (format "[%s] CLAIM signal: %s" id (str/join ", " claim-ids)))
|
|
1228
|
+
{:keys [claimed resume-prompt]} (execute-claims! claim-ids)
|
|
1229
|
+
new-claimed-ids (into active-claimed-ids claimed)
|
|
1230
|
+
metrics (update metrics :claims + (count claimed))]
|
|
1231
|
+
(println (format "[%s] Claimed %d/%d tasks" id (count claimed) (count claim-ids)))
|
|
1232
|
+
(emit!
|
|
1233
|
+
{:timing-ms cycle-timing
|
|
1234
|
+
:outcome :claimed :claimed-task-ids (vec claimed)})
|
|
1235
|
+
(recur cycle (inc attempt) completed-runs 0 metrics new-session-id wt-state
|
|
1236
|
+
new-claimed-ids resume-prompt 0 0 signals))
|
|
1237
|
+
|
|
1238
|
+
merge?
|
|
1239
|
+
(if (worktree-has-changes? (:path wt-state))
|
|
1240
|
+
(if (task-only-diff? (:path wt-state))
|
|
1241
|
+
(let [all-claimed active-claimed-ids]
|
|
1242
|
+
(println (format "[%s] Task-only diff, auto-merging" id))
|
|
1243
|
+
(let [sync-status (sync-worktree-to-main! worker (:path wt-state) id)]
|
|
895
1244
|
(if (= :failed sync-status)
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
(println (format "[%s] Sync to main failed
|
|
899
|
-
(emit
|
|
900
|
-
|
|
901
|
-
|
|
1245
|
+
(let [recycled (recycle-task-id-set! id all-claimed)
|
|
1246
|
+
metrics (update metrics :recycled + (count recycled))]
|
|
1247
|
+
(println (format "[%s] Sync to main failed, skipping merge" id))
|
|
1248
|
+
(emit!
|
|
1249
|
+
{:timing-ms cycle-timing
|
|
1250
|
+
:outcome :sync-failed
|
|
1251
|
+
:claimed-task-ids (vec all-claimed)
|
|
1252
|
+
:recycled-tasks (seq recycled)})
|
|
902
1253
|
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
903
|
-
(recur (inc
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
1254
|
+
(recur (inc cycle) 1 (inc completed-runs) 0 metrics nil nil #{} nil 0 0 []))
|
|
1255
|
+
(let [merge-result (merge-to-main! (:path wt-state) (:branch wt-state) id project-root 0 all-claimed)
|
|
1256
|
+
merge-result (if (:ok? merge-result)
|
|
1257
|
+
merge-result
|
|
1258
|
+
(recover-merge-failure! worker (:path wt-state) (:branch wt-state)
|
|
1259
|
+
id project-root 0 all-claimed merge-result))
|
|
1260
|
+
merged? (:ok? merge-result)
|
|
1261
|
+
recycled (when-not merged?
|
|
1262
|
+
(recycle-task-id-set! id all-claimed))
|
|
1263
|
+
completed-count (or (:completed-count merge-result) 0)
|
|
1264
|
+
metrics (cond-> metrics
|
|
1265
|
+
(and merged? (pos? completed-count)) (update :merges inc)
|
|
1266
|
+
(seq recycled) (update :recycled + (count recycled)))]
|
|
1267
|
+
(println (format "[%s] Cycle %d/%d complete" id cycle cycle-cap))
|
|
1268
|
+
(emit!
|
|
1269
|
+
{:timing-ms cycle-timing
|
|
1270
|
+
:outcome (if merged? :merged :merge-failed)
|
|
1271
|
+
:claimed-task-ids (vec all-claimed)
|
|
1272
|
+
:recycled-tasks (seq recycled)
|
|
1273
|
+
:review-rounds 0})
|
|
912
1274
|
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
913
|
-
(recur (inc
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
1275
|
+
(recur (inc cycle) 1 (inc completed-runs) 0 metrics nil nil #{} nil 0 0 [])))))
|
|
1276
|
+
(let [{:keys [approved? attempts timing]} (review-loop! worker (:path wt-state) id cycle cycle-timing)
|
|
1277
|
+
cycle-timing (or timing cycle-timing)
|
|
1278
|
+
metrics (-> metrics
|
|
1279
|
+
(update :review-rounds-total + (or attempts 0))
|
|
1280
|
+
(cond-> (not approved?) (update :rejections inc)))]
|
|
1281
|
+
(if approved?
|
|
1282
|
+
(let [sync-status (sync-worktree-to-main! worker (:path wt-state) id)
|
|
1283
|
+
all-claimed active-claimed-ids]
|
|
1284
|
+
(if (= :failed sync-status)
|
|
1285
|
+
(let [recycled (recycle-task-id-set! id all-claimed)
|
|
1286
|
+
metrics (update metrics :recycled + (count recycled))]
|
|
1287
|
+
(println (format "[%s] Sync to main failed after approval, skipping merge" id))
|
|
1288
|
+
(emit!
|
|
1289
|
+
{:timing-ms cycle-timing
|
|
1290
|
+
:outcome :sync-failed
|
|
1291
|
+
:claimed-task-ids (vec all-claimed)
|
|
1292
|
+
:recycled-tasks (seq recycled)
|
|
1293
|
+
:review-rounds (or attempts 0)})
|
|
1294
|
+
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
1295
|
+
(recur (inc cycle) 1 (inc completed-runs) 0 metrics nil nil #{} nil 0 0 []))
|
|
1296
|
+
(let [merge-result (merge-to-main! (:path wt-state) (:branch wt-state) id project-root (or attempts 0) all-claimed)
|
|
1297
|
+
merge-result (if (:ok? merge-result)
|
|
1298
|
+
merge-result
|
|
1299
|
+
(recover-merge-failure! worker (:path wt-state) (:branch wt-state)
|
|
1300
|
+
id project-root (or attempts 0) all-claimed merge-result))
|
|
1301
|
+
merged? (:ok? merge-result)
|
|
1302
|
+
recycled (when-not merged?
|
|
1303
|
+
(recycle-task-id-set! id all-claimed))
|
|
1304
|
+
completed-count (or (:completed-count merge-result) 0)
|
|
1305
|
+
metrics (cond-> metrics
|
|
1306
|
+
(and merged? (pos? completed-count)) (update :merges inc)
|
|
1307
|
+
(seq recycled) (update :recycled + (count recycled)))]
|
|
1308
|
+
(println (format "[%s] Cycle %d/%d complete" id cycle cycle-cap))
|
|
1309
|
+
(emit!
|
|
1310
|
+
{:timing-ms cycle-timing
|
|
1311
|
+
:outcome (if merged? :merged :merge-failed)
|
|
1312
|
+
:claimed-task-ids (vec all-claimed)
|
|
1313
|
+
:recycled-tasks (seq recycled)
|
|
1314
|
+
:review-rounds (or attempts 0)})
|
|
1315
|
+
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
1316
|
+
(recur (inc cycle) 1 (inc completed-runs) 0 metrics nil nil #{} nil 0 0 []))))
|
|
1317
|
+
(let [recycled (recycle-active-claims! id claimed-ids mv-claimed-tasks)
|
|
1318
|
+
metrics (update metrics :recycled + (count recycled))]
|
|
1319
|
+
(println (format "[%s] Cycle %d/%d rejected" id cycle cycle-cap))
|
|
1320
|
+
(emit!
|
|
1321
|
+
{:timing-ms cycle-timing
|
|
1322
|
+
:outcome :rejected
|
|
1323
|
+
:claimed-task-ids (vec active-claimed-ids)
|
|
1324
|
+
:recycled-tasks (seq recycled)
|
|
1325
|
+
:review-rounds (or attempts 0)})
|
|
1326
|
+
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
1327
|
+
(recur (inc cycle) 1 (inc completed-runs) 0 metrics nil nil #{} nil 0 0 [])))))
|
|
1328
|
+
(let [recycled (recycle-active-claims! id claimed-ids mv-claimed-tasks)
|
|
1329
|
+
metrics (update metrics :recycled + (count recycled))]
|
|
1330
|
+
(println (format "[%s] Merge signaled but no changes, skipping" id))
|
|
1331
|
+
(emit!
|
|
1332
|
+
{:timing-ms cycle-timing
|
|
1333
|
+
:outcome :no-changes
|
|
1334
|
+
:claimed-task-ids (vec active-claimed-ids)
|
|
1335
|
+
:recycled-tasks (seq recycled)})
|
|
960
1336
|
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
961
|
-
(recur (inc
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
1337
|
+
(recur (inc cycle) 1 (inc completed-runs) 0 metrics nil nil #{} nil 0 0 [])))
|
|
1338
|
+
|
|
1339
|
+
done?
|
|
1340
|
+
(let [recycled (recycle-active-claims! id claimed-ids mv-claimed-tasks)
|
|
1341
|
+
metrics (-> metrics
|
|
1342
|
+
(update :recycled + (count recycled))
|
|
1343
|
+
(update :errors inc))]
|
|
1344
|
+
(println (format "[%s] Invalid __DONE__ signal from executor; stopping worker (cycle %d/%d)" id cycle cycle-cap))
|
|
1345
|
+
(emit!
|
|
1346
|
+
{:timing-ms cycle-timing
|
|
1347
|
+
:outcome :error
|
|
1348
|
+
:claimed-task-ids (vec active-claimed-ids)
|
|
1349
|
+
:recycled-tasks (seq recycled)
|
|
1350
|
+
:error-snippet "__DONE__ is not a valid executor signal; use CLAIM(...) or COMPLETE_AND_READY_FOR_MERGE"})
|
|
1351
|
+
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
1352
|
+
(finish :error))
|
|
1353
|
+
|
|
1354
|
+
needs-followup?
|
|
1355
|
+
(let [summary (subs (or output "") 0 (min 240 (count (or output ""))))
|
|
1356
|
+
next-followups (inc needs-followups)]
|
|
1357
|
+
(emit!
|
|
1358
|
+
{:timing-ms cycle-timing
|
|
1359
|
+
:outcome :needs-followup
|
|
1360
|
+
:claimed-task-ids (vec active-claimed-ids)
|
|
1361
|
+
:error-snippet summary})
|
|
1362
|
+
(if (> next-followups max-needs-followups)
|
|
1363
|
+
(let [recycled (recycle-active-claims! id claimed-ids mv-claimed-tasks)
|
|
1364
|
+
metrics (-> metrics
|
|
1365
|
+
(update :recycled + (count recycled))
|
|
1366
|
+
(update :errors inc))]
|
|
1367
|
+
(println (format "[%s] NEEDS_FOLLOWUP exhausted (%d/%d); stopping worker" id next-followups max-needs-followups))
|
|
1368
|
+
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
1369
|
+
(finish :error))
|
|
1370
|
+
(let [followup-prompt (build-needs-followup-prompt active-claimed-ids output)]
|
|
1371
|
+
(println (format "[%s] NEEDS_FOLLOWUP signal; continuing cycle with follow-up prompt (%d/%d)"
|
|
1372
|
+
id next-followups max-needs-followups))
|
|
1373
|
+
(recur cycle (inc attempt) completed-runs 0 metrics new-session-id wt-state
|
|
1374
|
+
active-claimed-ids followup-prompt 0 next-followups signals))))
|
|
1375
|
+
|
|
1376
|
+
:else
|
|
1377
|
+
(let [wr (inc working-resumes)
|
|
1378
|
+
max-wr (:max-working-resumes worker)]
|
|
1379
|
+
(when parse-warning
|
|
1380
|
+
(if (str/includes? parse-warning "AUTH_REQUIRED:")
|
|
1381
|
+
(println (format "[%s] LOGIN ISSUE: %s"
|
|
1382
|
+
id
|
|
1383
|
+
(str/replace parse-warning #"^AUTH_REQUIRED:\s*" "")))
|
|
1384
|
+
(println (format "[%s] WARNING: %s" id parse-warning))))
|
|
1385
|
+
(when (and parse-warning (seq raw-snippet))
|
|
1386
|
+
(println (format "[%s] Raw output snippet: %s"
|
|
1387
|
+
id
|
|
1388
|
+
(snippet (str/replace raw-snippet #"\s+" " ") 240))))
|
|
1389
|
+
(when (seq stderr-snippet)
|
|
1390
|
+
(println (format "[%s] Agent stderr snippet: %s"
|
|
1391
|
+
id
|
|
1392
|
+
(snippet (str/replace stderr-snippet #"\s+" " ") 240))))
|
|
1393
|
+
(cond
|
|
1394
|
+
(> wr max-wr)
|
|
1395
|
+
(let [recycled (recycle-active-claims! id claimed-ids mv-claimed-tasks)
|
|
1396
|
+
metrics (update metrics :recycled + (count recycled))]
|
|
1397
|
+
(println (format "[%s] Stuck after %d working resumes + nudge, resetting session" id wr))
|
|
1398
|
+
(emit!
|
|
1399
|
+
{:timing-ms cycle-timing
|
|
1400
|
+
:outcome :stuck
|
|
1401
|
+
:claimed-task-ids (vec active-claimed-ids)
|
|
1402
|
+
:recycled-tasks (seq recycled)})
|
|
1403
|
+
(cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
|
|
1404
|
+
(recur (inc cycle) 1 (inc completed-runs) 0 metrics nil nil #{} nil 0 0 []))
|
|
1405
|
+
|
|
1406
|
+
(= wr max-wr)
|
|
1407
|
+
(do
|
|
1408
|
+
(println (format "[%s] Working... %d/%d resumes, nudging agent to wrap up" id wr max-wr))
|
|
1409
|
+
(emit!
|
|
1410
|
+
{:timing-ms cycle-timing
|
|
1411
|
+
:outcome :working
|
|
1412
|
+
:claimed-task-ids (vec active-claimed-ids)})
|
|
1413
|
+
(recur cycle (inc attempt) completed-runs 0 metrics new-session-id wt-state
|
|
1414
|
+
active-claimed-ids nudge-prompt wr needs-followups signals))
|
|
1415
|
+
|
|
1416
|
+
:else
|
|
1417
|
+
(do
|
|
1418
|
+
(println (format "[%s] Working... (will resume, %d/%d)" id wr max-wr))
|
|
1419
|
+
(emit!
|
|
1420
|
+
{:timing-ms cycle-timing
|
|
1421
|
+
:outcome :working
|
|
1422
|
+
:claimed-task-ids (vec active-claimed-ids)})
|
|
1423
|
+
(recur cycle (inc attempt) completed-runs 0 metrics new-session-id wt-state
|
|
1424
|
+
active-claimed-ids nil wr needs-followups signals))))))))))))))
|
|
980
1425
|
|
|
981
1426
|
;; =============================================================================
|
|
982
1427
|
;; Multi-Worker Execution
|
|
@@ -992,7 +1437,14 @@
|
|
|
992
1437
|
Returns seq of final worker states."
|
|
993
1438
|
[workers]
|
|
994
1439
|
(tasks/ensure-dirs!)
|
|
995
|
-
(let [swarm-id (-> workers first :swarm-id)
|
|
1440
|
+
(let [swarm-id (-> workers first :swarm-id)
|
|
1441
|
+
stale-current (tasks/list-current)]
|
|
1442
|
+
(when (seq stale-current)
|
|
1443
|
+
(println (format "WARNING: %d task(s) already in current/ from a previous run. These may be stale claims."
|
|
1444
|
+
(count stale-current)))
|
|
1445
|
+
(doseq [t stale-current]
|
|
1446
|
+
(println (format " - %s: %s" (:id t) (:summary t))))
|
|
1447
|
+
(println " Run `oompa requeue` to move them back to pending/ if they are stale."))
|
|
996
1448
|
(println (format "Launching %d workers..." (count workers)))
|
|
997
1449
|
|
|
998
1450
|
;; Register JVM shutdown hook so SIGTERM/SIGINT triggers graceful stop.
|
|
@@ -1014,27 +1466,38 @@
|
|
|
1014
1466
|
(map-indexed
|
|
1015
1467
|
(fn [idx worker]
|
|
1016
1468
|
(let [worker (assoc worker :id (or (:id worker) (str "w" idx)))]
|
|
1017
|
-
(future
|
|
1469
|
+
(future
|
|
1470
|
+
(try
|
|
1471
|
+
(run-worker! worker)
|
|
1472
|
+
(catch Exception e
|
|
1473
|
+
(println (format "[%s] FATAL: %s" (:id worker) (.getMessage e)))
|
|
1474
|
+
(.printStackTrace e)
|
|
1475
|
+
(throw e))))))
|
|
1018
1476
|
workers))]
|
|
1019
1477
|
|
|
1020
1478
|
(println "All workers launched. Waiting for completion...")
|
|
1021
|
-
(let [results (mapv
|
|
1479
|
+
(let [results (mapv (fn [f]
|
|
1480
|
+
(try
|
|
1481
|
+
(deref f)
|
|
1482
|
+
(catch Exception e
|
|
1483
|
+
(println (format "Worker future failed: %s" (.getMessage e)))
|
|
1484
|
+
{:status :fatal-error :error (.getMessage e)})))
|
|
1485
|
+
futures)]
|
|
1022
1486
|
;; Clean exit — tell shutdown hook not to write stopped.json
|
|
1023
1487
|
(reset! shutdown-requested? false)
|
|
1024
1488
|
;; Remove the hook so it doesn't accumulate across calls
|
|
1025
1489
|
(try (.removeShutdownHook (Runtime/getRuntime) hook) (catch Exception _))
|
|
1026
1490
|
(println "\nAll workers complete.")
|
|
1027
|
-
(
|
|
1028
|
-
|
|
1029
|
-
(:id
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
(or (:review-rounds-total w) 0))))
|
|
1491
|
+
(let [timing-by-worker (aggregate-cycle-timings-by-worker swarm-id)
|
|
1492
|
+
rows (mapv (fn [result]
|
|
1493
|
+
(let [row-id (or (:id result) "")
|
|
1494
|
+
totals (get timing-by-worker row-id empty-cycle-total)]
|
|
1495
|
+
(worker-summary-row result totals)))
|
|
1496
|
+
results)]
|
|
1497
|
+
(println "\nWorker Summary")
|
|
1498
|
+
(print-table [:Worker :Runs :Cycles :Status :Merges :Claims :Rejects :Errors :Recycled
|
|
1499
|
+
:ReviewRounds :ImplMs :ReviewMs :FixMs :HarnessMs :TotalMs]
|
|
1500
|
+
rows))
|
|
1038
1501
|
|
|
1039
1502
|
;; Write stopped event — all state derivable from cycle logs
|
|
1040
1503
|
(when swarm-id
|
|
@@ -1082,16 +1545,12 @@
|
|
|
1082
1545
|
tagged-prompt (str "[oompa:" swarm-id* ":planner] " prompt-text)
|
|
1083
1546
|
abs-root (.getAbsolutePath (io/file project-root))
|
|
1084
1547
|
|
|
1085
|
-
cmd (harness/build-cmd harness
|
|
1086
|
-
{:cwd abs-root :model model :prompt tagged-prompt})
|
|
1087
|
-
|
|
1088
1548
|
_ (println (format "[planner] Running (%s:%s, max_pending: %d, current: %d)"
|
|
1089
1549
|
(name harness) (or model "default") max-pending pending-before))
|
|
1090
1550
|
|
|
1091
1551
|
result (try
|
|
1092
|
-
(
|
|
1093
|
-
|
|
1094
|
-
:out :string :err :string})
|
|
1552
|
+
(harness/run-command! harness
|
|
1553
|
+
{:cwd abs-root :model model :prompt tagged-prompt})
|
|
1095
1554
|
(catch Exception e
|
|
1096
1555
|
(println (format "[planner] Agent exception: %s" (.getMessage e)))
|
|
1097
1556
|
{:exit -1 :out "" :err (.getMessage e)}))
|