@nbardy/oompa 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -75,8 +75,8 @@
75
75
  local-tasks (io/file cwd-file "tasks")
76
76
  parent-tasks (some-> cwd-file .getParentFile (io/file "tasks"))]
77
77
  (cond
78
- (.exists local-tasks) "tasks"
79
78
  (and parent-tasks (.exists parent-tasks)) "../tasks"
79
+ (.exists local-tasks) "tasks"
80
80
  :else "tasks")))
81
81
 
82
82
  (defn- render-task-header
@@ -90,6 +90,7 @@
90
90
  (str/replace "{TASKS_ROOT}" task-root))))
91
91
 
92
92
  (def ^:private default-max-working-resumes 5)
93
+ (def ^:private default-max-wait-for-tasks 600)
93
94
 
94
95
  (defn create-worker
95
96
  "Create a worker config.
@@ -97,16 +98,22 @@
97
98
  :can-plan when false, worker waits for tasks before starting (backpressure).
98
99
  :reasoning reasoning effort level (e.g. \"low\", \"medium\", \"high\") — codex only.
99
100
  :review-prompts paths to reviewer prompt files (loaded and concatenated for review).
100
- :wait-between seconds to sleep between iterations (nil or 0 = no wait).
101
+ :wait-between seconds to sleep between cycles (nil or 0 = no wait).
102
+ :max-wait-for-tasks max seconds a non-planner waits for tasks before giving up (default 600).
101
103
  :max-working-resumes max consecutive working resumes before nudge+kill (default 5)."
102
- [{:keys [id swarm-id harness model iterations prompts can-plan reasoning
103
- review-harness review-model review-prompts wait-between
104
- max-working-resumes]}]
104
+ [{:keys [id swarm-id harness model runs max-cycles iterations prompts can-plan reasoning
105
+ reviewers wait-between
106
+ max-working-resumes max-wait-for-tasks]}]
107
+ (let [cycle-cap (or max-cycles iterations runs 10)
108
+ run-goal (or runs iterations 10)]
105
109
  {:id id
106
110
  :swarm-id swarm-id
107
111
  :harness (or harness :codex)
108
112
  :model model
109
- :iterations (or iterations 10)
113
+ ;; Legacy compatibility: :iterations remains the cycle cap.
114
+ :iterations cycle-cap
115
+ :max-cycles cycle-cap
116
+ :runs run-goal
110
117
  :prompts (cond
111
118
  (vector? prompts) prompts
112
119
  (string? prompts) [prompts]
@@ -114,15 +121,14 @@
114
121
  :can-plan (if (some? can-plan) can-plan true)
115
122
  :reasoning reasoning
116
123
  :wait-between (when (and wait-between (pos? wait-between)) wait-between)
117
- :review-harness review-harness
118
- :review-model review-model
119
- :review-prompts (cond
120
- (vector? review-prompts) review-prompts
121
- (string? review-prompts) [review-prompts]
122
- :else [])
124
+ :max-wait-for-tasks (let [v (or max-wait-for-tasks default-max-wait-for-tasks)]
125
+ (if (and (number? v) (pos? v))
126
+ v
127
+ default-max-wait-for-tasks))
128
+ :reviewers reviewers
123
129
  :max-working-resumes (or max-working-resumes default-max-working-resumes)
124
130
  :completed 0
125
- :status :idle})
131
+ :status :idle}))
126
132
 
127
133
  ;; =============================================================================
128
134
  ;; Task Execution
@@ -267,7 +273,7 @@
267
273
  Uses custom review-prompts when configured, otherwise falls back to default.
268
274
  prev-feedback: vector of previous review outputs (for multi-round context).
269
275
  Returns {:verdict :approved|:needs-changes|:rejected, :comments [...], :output string}"
270
- [{:keys [id swarm-id review-harness review-model review-prompts]} worktree-path prev-feedback]
276
+ [{:keys [id swarm-id reviewers]} worktree-path prev-feedback]
271
277
  (let [;; Get actual diff content (not just stat) — truncate to 8000 chars for prompt budget
272
278
  diff-result (process/sh ["git" "diff" "main"]
273
279
  {:dir worktree-path :out :string :err :string})
@@ -277,15 +283,9 @@
277
283
  d))
278
284
 
279
285
  swarm-id* (or swarm-id "unknown")
280
- custom-prompt (when (seq review-prompts)
281
- (->> review-prompts
282
- (map load-prompt)
283
- (remove nil?)
284
- (str/join "\n\n")))
285
286
 
286
287
  ;; Only include the most recent round's feedback — the worker has already
287
288
  ;; attempted fixes based on it, so the reviewer just needs to verify.
288
- ;; Including all prior rounds bloats the prompt and causes empty output.
289
289
  history-block (when (seq prev-feedback)
290
290
  (let [latest (last prev-feedback)
291
291
  truncated (if (> (count latest) 2000)
@@ -297,40 +297,52 @@
297
297
  truncated
298
298
  "\n\n")))
299
299
 
300
- review-body (str (or custom-prompt
301
- (str "Review the changes in this worktree.\n"
302
- "Focus on architecture and design, not style.\n"))
303
- "\n\nDiff:\n```\n" diff-content "\n```\n"
304
- (when history-block history-block)
305
- "\nYour verdict MUST be on its own line, exactly one of:\n"
306
- "VERDICT: APPROVED\n"
307
- "VERDICT: NEEDS_CHANGES\n\n"
308
- "Do NOT use REJECTED. Always use NEEDS_CHANGES with specific, "
309
- "actionable feedback explaining what must change and why. "
310
- "The worker will attempt fixes based on your feedback.\n"
311
- "After your verdict line, list every issue as a numbered item with "
312
- "the file path and what needs to change.\n")
313
- review-prompt (str "[oompa:" swarm-id* ":" id "] " review-body)
314
-
315
300
  abs-wt (.getAbsolutePath (io/file worktree-path))
316
301
 
317
- ;; No session, no resume, no format flags reviewer is stateless one-shot
318
- cmd (harness/build-cmd review-harness
319
- {:cwd abs-wt :model review-model :prompt review-prompt})
320
-
321
- result (try
322
- (process/sh cmd {:dir abs-wt
323
- :in (harness/process-stdin review-harness review-prompt)
324
- :out :string :err :string})
325
- (catch Exception e
326
- {:exit -1 :out "" :err (.getMessage e)}))
302
+ ;; Try each reviewer until one succeeds and returns a verdict
303
+ result (reduce (fn [_ {:keys [harness model prompts]}]
304
+ (let [custom-prompt (when (seq prompts)
305
+ (->> prompts
306
+ (map load-prompt)
307
+ (remove nil?)
308
+ (str/join "\n\n")))
309
+ review-body (str (or custom-prompt
310
+ (str "Review the changes in this worktree.\n"
311
+ "Focus on architecture and design, not style.\n"))
312
+ "\n\nDiff:\n```\n" diff-content "\n```\n"
313
+ (when history-block history-block)
314
+ "\nYour verdict MUST be on its own line, exactly one of:\n"
315
+ "VERDICT: APPROVED\n"
316
+ "VERDICT: NEEDS_CHANGES\n\n"
317
+ "Do NOT use REJECTED. Always use NEEDS_CHANGES with specific, "
318
+ "actionable feedback explaining what must change and why. "
319
+ "The worker will attempt fixes based on your feedback.\n"
320
+ "After your verdict line, list every issue as a numbered item with "
321
+ "the file path and what needs to change.\n")
322
+ review-prompt (str "[oompa:" swarm-id* ":" id "] " review-body)
323
+ cmd (harness/build-cmd harness {:cwd abs-wt :model model :prompt review-prompt})
324
+ res (try
325
+ (process/sh cmd {:dir abs-wt
326
+ :in (harness/process-stdin harness review-prompt)
327
+ :out :string :err :string})
328
+ (catch Exception e
329
+ {:exit -1 :out "" :err (.getMessage e)}))
330
+ output (or (:out res) "")
331
+ has-verdict? (or (re-find #"VERDICT:\s*APPROVED" output)
332
+ (re-find #"VERDICT:\s*NEEDS_CHANGES" output)
333
+ (re-find #"VERDICT:\s*REJECTED" output)
334
+ (re-find #"(?i)\bAPPROVED\b" output))]
335
+ (if (and (= (:exit res) 0) has-verdict?)
336
+ (reduced res)
337
+ (do
338
+ (println (format "[%s] Reviewer %s failed or returned no verdict, falling back..." id model))
339
+ res))))
340
+ {:exit -1 :out "" :err "No reviewers configured or no verdict returned"}
341
+ reviewers)
327
342
 
328
343
  output (:out result)
329
344
 
330
- ;; Parse verdict — require explicit VERDICT: prefix to avoid false matches.
331
- ;; REJECTED is treated as NEEDS_CHANGES: the reviewer must always give
332
- ;; actionable feedback so the worker can attempt fixes. Hard rejection
333
- ;; only happens when max review rounds are exhausted.
345
+ ;; Parse verdict
334
346
  verdict (cond
335
347
  (re-find #"VERDICT:\s*APPROVED" output) :approved
336
348
  (re-find #"VERDICT:\s*NEEDS_CHANGES" output) :needs-changes
@@ -499,12 +511,13 @@
499
511
  "Write cycle event log. Called at every cycle exit point.
500
512
  session-id links to the Claude CLI conversation transcript on disk.
501
513
  No mutable summary state — all state is derived from immutable cycle logs."
502
- [swarm-id worker-id cycle start-ms session-id
514
+ [swarm-id worker-id cycle run start-ms session-id
503
515
  {:keys [outcome claimed-task-ids recycled-tasks error-snippet review-rounds]}]
504
516
  (let [duration-ms (- (System/currentTimeMillis) start-ms)]
505
517
  (runs/write-cycle-log!
506
518
  swarm-id worker-id cycle
507
- {:outcome outcome
519
+ {:run run
520
+ :outcome outcome
508
521
  :duration-ms duration-ms
509
522
  :claimed-task-ids (vec (or claimed-task-ids []))
510
523
  :recycled-tasks (or recycled-tasks [])
@@ -641,7 +654,7 @@
641
654
  Writes review logs to runs/{swarm-id}/reviews/ for post-mortem analysis.
642
655
  Returns {:approved? bool, :attempts int}"
643
656
  [worker wt-path worker-id iteration]
644
- (if-not (and (:review-harness worker) (:review-model worker))
657
+ (if (empty? (:reviewers worker))
645
658
  ;; No reviewer configured, auto-approve
646
659
  {:approved? true :attempts 0}
647
660
 
@@ -681,27 +694,33 @@
681
694
  ;; Worker Loop
682
695
  ;; =============================================================================
683
696
 
684
- ;; Workers wait up to 10 minutes for tasks to appear before giving up.
697
+ ;; Workers can wait for tasks before giving up; default is 10 minutes.
685
698
  ;; This keeps workers alive while planners/designers ramp up the queue.
686
- (def ^:private max-wait-for-tasks 600)
687
699
  (def ^:private wait-poll-interval 10)
688
- (def ^:private max-consecutive-errors 3)
700
+ (def ^:private max-consecutive-errors 5)
701
+
702
+ (defn- backoff-sleep! [id errors]
703
+ (when (< errors max-consecutive-errors)
704
+ (let [wait-sec (* 60 (int (Math/pow 2 (dec errors))))]
705
+ (println (format "[%s] Backing off for %d seconds before next retry (%d/%d)..." id wait-sec errors (dec max-consecutive-errors)))
706
+ (Thread/sleep (* 1000 wait-sec)))))
707
+
689
708
 
690
709
  (defn- wait-for-tasks!
691
- "Wait up to 10 minutes for pending/current tasks to appear. Used for
692
- backpressure on workers that can't create their own tasks (can_plan: false).
710
+ "Wait up to max-wait-seconds for pending/current tasks to appear.
711
+ Used for backpressure on workers that can't create their own tasks (can_plan: false).
693
712
  Polls every 10 seconds, logs every 60 seconds."
694
- [worker-id]
713
+ [worker-id max-wait-seconds]
695
714
  (loop [waited 0]
696
715
  (cond
697
716
  (pos? (tasks/pending-count)) true
698
717
  (pos? (tasks/current-count)) true
699
- (>= waited max-wait-for-tasks)
718
+ (>= waited max-wait-seconds)
700
719
  (do (println (format "[%s] No tasks after %ds, giving up" worker-id waited))
701
720
  false)
702
721
  :else
703
722
  (do (when (zero? (mod waited 60))
704
- (println (format "[%s] Waiting for tasks... (%ds/%ds)" worker-id waited max-wait-for-tasks)))
723
+ (println (format "[%s] Waiting for tasks... (%ds/%ds)" worker-id waited max-wait-seconds)))
705
724
  (Thread/sleep (* wait-poll-interval 1000))
706
725
  (recur (+ waited wait-poll-interval))))))
707
726
 
@@ -716,267 +735,257 @@
716
735
  (defn run-worker!
717
736
  "Run worker loop with persistent sessions.
718
737
 
719
- Sessions persist across iterations agents resume where they left off.
720
- Worktrees persist until COMPLETE_AND_READY_FOR_MERGE triggers review+merge.
721
- __DONE__ stops the worker entirely (planners only).
722
-
723
- Tracks per-worker metrics: merges, rejections, errors, review-rounds-total.
724
- Returns final worker state with metrics attached."
738
+ A run is a terminal outcome (merged/rejected/error-like).
739
+ A cycle is one worker turn/resume. Multiple cycles may occur in one run.
740
+ Cycle cap is controlled by :max-cycles (legacy key: :iterations)."
725
741
  [worker]
726
742
  (tasks/ensure-dirs!)
727
- (let [{:keys [id iterations swarm-id wait-between]} worker
743
+ (let [{:keys [id runs max-cycles iterations swarm-id wait-between max-wait-for-tasks]} worker
744
+ cycle-cap (or max-cycles iterations 10)
745
+ run-goal (or runs iterations 10)
728
746
  project-root (System/getProperty "user.dir")]
729
- (println (format "[%s] Starting worker (%s:%s%s, %d iterations%s)"
747
+ (println (format "[%s] Starting worker (%s:%s%s, goal=%d runs, cap=%d cycles%s)"
730
748
  id
731
749
  (name (:harness worker))
732
750
  (or (:model worker) "default")
733
751
  (if (:reasoning worker) (str ":" (:reasoning worker)) "")
734
- iterations
752
+ run-goal
753
+ cycle-cap
735
754
  (if wait-between (format ", %ds between" wait-between) "")))
736
755
 
737
- ;; Backpressure: workers that can't create tasks wait for tasks to exist
738
- (when-not (:can-plan worker)
739
- (wait-for-tasks! id))
756
+ (when (and (not (:can-plan worker))
757
+ (not (pos? (tasks/pending-count)))
758
+ (not (pos? (tasks/current-count))))
759
+ (wait-for-tasks! id max-wait-for-tasks))
740
760
 
741
- ;; metrics tracks: {:merges N :rejections N :errors N :recycled N :review-rounds-total N :claims N}
742
- (loop [iter 1
743
- completed 0
761
+ (loop [cycle 1
762
+ completed-runs 0
744
763
  consec-errors 0
745
764
  metrics {:merges 0 :rejections 0 :errors 0 :recycled 0 :review-rounds-total 0 :claims 0}
746
- session-id nil ;; persistent session-id (nil = start fresh)
747
- wt-state nil ;; {:dir :branch :path} or nil
748
- claimed-ids #{} ;; task IDs claimed this session (reset on worktree destroy)
749
- claim-resume-prompt nil ;; override prompt for next iteration (from CLAIM results)
750
- working-resumes 0] ;; consecutive "working" outcomes in current session
765
+ session-id nil
766
+ wt-state nil
767
+ claimed-ids #{}
768
+ claim-resume-prompt nil
769
+ working-resumes 0]
751
770
  (let [finish (fn [status]
752
- (assoc worker :completed completed :status status
771
+ (assoc worker :completed completed-runs
772
+ :runs-completed completed-runs
773
+ :cycles-completed (dec cycle)
774
+ :status status
753
775
  :merges (:merges metrics)
754
776
  :rejections (:rejections metrics)
755
777
  :errors (:errors metrics)
756
778
  :recycled (:recycled metrics)
757
779
  :review-rounds-total (:review-rounds-total metrics)
758
- :claims (:claims metrics)))]
780
+ :claims (:claims metrics)))
781
+ current-run (inc completed-runs)]
759
782
  (cond
760
- (> iter iterations)
783
+ (> cycle cycle-cap)
761
784
  (do
762
- ;; Cleanup any lingering worktree
763
785
  (when wt-state
764
786
  (cleanup-worktree! project-root (:dir wt-state) (:branch wt-state)))
765
- (println (format "[%s] Completed %d iterations (%d merges, %d claims, %d rejections, %d errors, %d recycled)"
766
- id completed (:merges metrics) (:claims metrics) (:rejections metrics) (:errors metrics) (:recycled metrics)))
787
+ (println (format "[%s] Completed %d/%d runs in %d cycles (%d merges, %d claims, %d rejections, %d errors, %d recycled)"
788
+ id completed-runs run-goal (dec cycle)
789
+ (:merges metrics) (:claims metrics) (:rejections metrics) (:errors metrics) (:recycled metrics)))
767
790
  (finish :exhausted))
768
791
 
792
+ (>= completed-runs run-goal)
793
+ (do
794
+ (when wt-state
795
+ (cleanup-worktree! project-root (:dir wt-state) (:branch wt-state)))
796
+ (println (format "[%s] Reached run goal: %d/%d runs in %d cycles"
797
+ id completed-runs run-goal (dec cycle)))
798
+ (finish :completed))
799
+
769
800
  @shutdown-requested?
770
801
  (do
771
- (println (format "[%s] Shutdown requested, stopping after %d iterations" id (dec iter)))
802
+ (println (format "[%s] Shutdown requested, stopping after %d cycles" id (dec cycle)))
772
803
  (when wt-state
773
- ;; Recycle any claimed tasks back to pending so other workers can pick them up
774
804
  (when (seq claimed-ids)
775
805
  (let [recycled (tasks/recycle-tasks! claimed-ids)]
776
806
  (when (seq recycled)
777
807
  (println (format "[%s] Recycled %d claimed task(s) on shutdown" id (count recycled))))))
778
808
  (cleanup-worktree! project-root (:dir wt-state) (:branch wt-state)))
779
- (emit-cycle-log! swarm-id id iter (System/currentTimeMillis) session-id
780
- {:outcome :interrupted})
809
+ (emit-cycle-log! swarm-id id cycle current-run (System/currentTimeMillis) session-id
810
+ {:outcome :interrupted})
781
811
  (finish :interrupted))
782
812
 
783
813
  :else
784
814
  (do
785
- ;; Sleep between iterations when wait_between is configured
786
- (maybe-sleep-between! id wait-between iter)
787
-
788
- ;; Backpressure: non-planner workers wait for tasks between iterations too
789
- (when (and (not (:can-plan worker))
790
- (not (pos? (tasks/pending-count)))
791
- (not (pos? (tasks/current-count))))
792
- (println (format "[%s] Queue empty, waiting for tasks before iteration %d" id iter))
793
- (wait-for-tasks! id))
794
-
795
- ;; Ensure worktree exists (create fresh if nil, reuse if persisted)
796
- (let [wt-state (try
797
- (or wt-state (create-iteration-worktree! project-root id iter))
798
- (catch Exception e
799
- (println (format "[%s] Worktree creation failed: %s" id (.getMessage e)))
800
- nil))]
801
- (if (nil? wt-state)
802
- ;; Worktree creation failed — count as error
803
- (let [errors (inc consec-errors)
804
- metrics (update metrics :errors inc)]
805
- (if (>= errors max-consecutive-errors)
806
- (do
807
- (println (format "[%s] %d consecutive errors, stopping" id errors))
808
- (finish :error))
809
- (recur (inc iter) completed errors metrics nil nil #{} nil 0)))
810
-
811
- ;; Worktree ready run agent
812
- (let [resume? (or (some? session-id) (some? claim-resume-prompt))
813
- iter-start-ms (System/currentTimeMillis)
814
- ;; Snapshot current/ task IDs before agent runs so we can
815
- ;; detect any direct mv claims (safety net for old behavior).
816
- pre-current-ids (tasks/current-task-ids)
817
- _ (println (format "[%s] %s iteration %d/%d"
818
- id (if resume? "Resuming" "Starting") iter iterations))
819
- context (build-context)
820
- {:keys [output exit done? merge? claim-ids] :as agent-result}
821
- (run-agent! worker (:path wt-state) context session-id resume?
822
- :resume-prompt-override claim-resume-prompt)
823
- new-session-id (:session-id agent-result)
824
- ;; Safety net: detect any direct mv claims (old behavior)
825
- mv-claimed-tasks (detect-claimed-tasks pre-current-ids)]
826
-
827
- (cond
828
- ;; Agent errored recycle claimed tasks, cleanup, reset session
829
- (not (zero? exit))
830
- (let [errors (inc consec-errors)
831
- recycled (recycle-orphaned-tasks! id pre-current-ids)
832
- metrics (-> metrics
833
- (update :errors inc)
834
- (update :recycled + recycled))
835
- error-msg (subs (or output "") 0 (min 200 (count (or output ""))))]
836
- (println (format "[%s] Agent error (exit %d): %s" id exit error-msg))
837
- (emit-cycle-log! swarm-id id iter iter-start-ms new-session-id
838
- {:outcome :error :claimed-task-ids (vec (into claimed-ids mv-claimed-tasks))
839
- :recycled-tasks (when (pos? recycled) (vec mv-claimed-tasks))
840
- :error-snippet error-msg})
841
- (cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
842
- (if (>= errors max-consecutive-errors)
843
- (do
844
- (println (format "[%s] %d consecutive errors, stopping" id errors))
845
- (finish :error))
846
- (recur (inc iter) completed errors metrics nil nil #{} nil 0)))
847
-
848
- ;; CLAIM signal — framework claims tasks, resumes agent with results
849
- ;; Only honored when no MERGE or DONE signal (lowest priority)
850
- (and (seq claim-ids) (not merge?) (not done?))
851
- (let [_ (println (format "[%s] CLAIM signal: %s" id (str/join ", " claim-ids)))
852
- {:keys [claimed failed resume-prompt]} (execute-claims! claim-ids)
853
- new-claimed-ids (into claimed-ids claimed)
854
- metrics (update metrics :claims + (count claimed))]
855
- (println (format "[%s] Claimed %d/%d tasks" id (count claimed) (count claim-ids)))
856
- (emit-cycle-log! swarm-id id iter iter-start-ms new-session-id
857
- {:outcome :claimed :claimed-task-ids (vec claimed)})
858
- (recur (inc iter) completed 0 metrics new-session-id wt-state
859
- new-claimed-ids resume-prompt 0))
860
-
861
- ;; COMPLETE_AND_READY_FOR_MERGE — review, merge, reset session
862
- merge?
863
- (if (worktree-has-changes? (:path wt-state))
864
- (if (task-only-diff? (:path wt-state))
865
- ;; Task-only changes — skip review, sync to main, auto-merge
866
- (do
867
- (println (format "[%s] Task-only diff, auto-merging" id))
868
- (let [sync-status (sync-worktree-to-main! worker (:path wt-state) id)
869
- all-claimed (into claimed-ids mv-claimed-tasks)]
870
- (if (= :failed sync-status)
871
- ;; Sync failed — cannot merge safely, skip
872
- (do
873
- (println (format "[%s] Sync to main failed, skipping merge" id))
874
- (emit-cycle-log! swarm-id id iter iter-start-ms new-session-id
875
- {:outcome :sync-failed :claimed-task-ids (vec all-claimed)})
876
- (cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
877
- (recur (inc iter) completed 0 metrics nil nil #{} nil 0))
878
- ;; Synced — proceed with merge
879
- (let [merged? (merge-to-main! (:path wt-state) (:branch wt-state) id project-root 0 all-claimed)
880
- metrics (if merged? (update metrics :merges inc) metrics)]
881
- (println (format "[%s] Cycle %d/%d complete" id iter iterations))
882
- (emit-cycle-log! swarm-id id iter iter-start-ms new-session-id
883
- {:outcome :merged :claimed-task-ids (vec all-claimed) :review-rounds 0})
884
- (cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
885
- (recur (inc iter) (inc completed) 0 metrics nil nil #{} nil 0)))))
886
- ;; Code changes — full review loop
887
- (let [{:keys [approved? attempts]} (review-loop! worker (:path wt-state) id iter)
888
- ;; Don't pre-increment :merges — defer to after actual merge succeeds
889
- metrics (-> metrics
890
- (update :review-rounds-total + (or attempts 0))
891
- (cond-> (not approved?) (update :rejections inc)))]
892
- (if approved?
815
+ (maybe-sleep-between! id wait-between cycle)
816
+
817
+ (when (and (not (:can-plan worker))
818
+ (not (pos? (tasks/pending-count)))
819
+ (not (pos? (tasks/current-count))))
820
+ (println (format "[%s] Queue empty, waiting for tasks before cycle %d" id cycle))
821
+ (wait-for-tasks! id max-wait-for-tasks))
822
+
823
+ (let [wt-state (try
824
+ (or wt-state (create-iteration-worktree! project-root id cycle))
825
+ (catch Exception e
826
+ (println (format "[%s] Worktree creation failed: %s" id (.getMessage e)))
827
+ nil))]
828
+ (if (nil? wt-state)
829
+ (let [errors (inc consec-errors)
830
+ metrics (update metrics :errors inc)]
831
+ (if (>= errors max-consecutive-errors)
832
+ (do
833
+ (println (format "[%s] %d consecutive errors, stopping" id errors))
834
+ (finish :error))
835
+ (do (backoff-sleep! id errors) (recur (inc cycle) completed-runs errors metrics nil nil #{} nil 0))))
836
+
837
+ (let [resume? (or (some? session-id) (some? claim-resume-prompt))
838
+ cycle-start-ms (System/currentTimeMillis)
839
+ pre-current-ids (tasks/current-task-ids)
840
+ _ (println (format "[%s] %s cycle %d/%d (run %d/%d)"
841
+ id (if resume? "Resuming" "Starting") cycle cycle-cap current-run run-goal))
842
+ context (build-context)
843
+ {:keys [output exit done? merge? claim-ids] :as agent-result}
844
+ (run-agent! worker (:path wt-state) context session-id resume?
845
+ :resume-prompt-override claim-resume-prompt)
846
+ new-session-id (:session-id agent-result)
847
+ mv-claimed-tasks (detect-claimed-tasks pre-current-ids)]
848
+ (cond
849
+ (not (zero? exit))
850
+ (let [errors (inc consec-errors)
851
+ recycled (recycle-orphaned-tasks! id pre-current-ids)
852
+ metrics (-> metrics (update :errors inc) (update :recycled + recycled))
853
+ error-msg (subs (or output "") 0 (min 200 (count (or output ""))))]
854
+ (println (format "[%s] Agent error (exit %d): %s" id exit error-msg))
855
+ (emit-cycle-log! swarm-id id cycle current-run cycle-start-ms new-session-id
856
+ {:outcome :error
857
+ :claimed-task-ids (vec (into claimed-ids mv-claimed-tasks))
858
+ :recycled-tasks (when (pos? recycled) (vec mv-claimed-tasks))
859
+ :error-snippet error-msg})
860
+ (cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
861
+ (if (>= errors max-consecutive-errors)
862
+ (do
863
+ (println (format "[%s] %d consecutive errors, stopping" id errors))
864
+ (finish :error))
865
+ (do (backoff-sleep! id errors) (recur (inc cycle) (inc completed-runs) errors metrics nil nil #{} nil 0))))
866
+
867
+ (and (seq claim-ids) (not merge?) (not done?))
868
+ (let [_ (println (format "[%s] CLAIM signal: %s" id (str/join ", " claim-ids)))
869
+ {:keys [claimed resume-prompt]} (execute-claims! claim-ids)
870
+ new-claimed-ids (into claimed-ids claimed)
871
+ metrics (update metrics :claims + (count claimed))]
872
+ (println (format "[%s] Claimed %d/%d tasks" id (count claimed) (count claim-ids)))
873
+ (emit-cycle-log! swarm-id id cycle current-run cycle-start-ms new-session-id
874
+ {:outcome :claimed :claimed-task-ids (vec claimed)})
875
+ (recur (inc cycle) completed-runs 0 metrics new-session-id wt-state
876
+ new-claimed-ids resume-prompt 0))
877
+
878
+ merge?
879
+ (if (worktree-has-changes? (:path wt-state))
880
+ (if (task-only-diff? (:path wt-state))
881
+ (do
882
+ (println (format "[%s] Task-only diff, auto-merging" id))
893
883
  (let [sync-status (sync-worktree-to-main! worker (:path wt-state) id)
894
884
  all-claimed (into claimed-ids mv-claimed-tasks)]
895
885
  (if (= :failed sync-status)
896
- ;; Sync failed after approval — treat as sync failure, skip merge
897
886
  (do
898
- (println (format "[%s] Sync to main failed after approval, skipping merge" id))
899
- (emit-cycle-log! swarm-id id iter iter-start-ms new-session-id
900
- {:outcome :sync-failed :claimed-task-ids (vec all-claimed)
901
- :review-rounds (or attempts 0)})
887
+ (println (format "[%s] Sync to main failed, skipping merge" id))
888
+ (emit-cycle-log! swarm-id id cycle current-run cycle-start-ms new-session-id
889
+ {:outcome :sync-failed :claimed-task-ids (vec all-claimed)})
902
890
  (cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
903
- (recur (inc iter) completed 0 metrics nil nil #{} nil 0))
904
- ;; Synced proceed with merge, capture return value
905
- (let [merged? (merge-to-main! (:path wt-state) (:branch wt-state) id project-root (or attempts 0) all-claimed)
891
+ (recur (inc cycle) (inc completed-runs) 0 metrics nil nil #{} nil 0))
892
+ (let [merged? (merge-to-main! (:path wt-state) (:branch wt-state) id project-root 0 all-claimed)
906
893
  metrics (if merged? (update metrics :merges inc) metrics)]
907
- (println (format "[%s] Cycle %d/%d complete" id iter iterations))
908
- (emit-cycle-log! swarm-id id iter iter-start-ms new-session-id
909
- {:outcome (if merged? :merged :merge-failed)
910
- :claimed-task-ids (vec all-claimed)
911
- :review-rounds (or attempts 0)})
894
+ (println (format "[%s] Cycle %d/%d complete" id cycle cycle-cap))
895
+ (emit-cycle-log! swarm-id id cycle current-run cycle-start-ms new-session-id
896
+ {:outcome (if merged? :merged :merge-failed)
897
+ :claimed-task-ids (vec all-claimed)
898
+ :review-rounds 0})
912
899
  (cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
913
- (recur (inc iter) (inc completed) 0 metrics nil nil #{} nil 0))))
914
- (let [recycled (recycle-orphaned-tasks! id pre-current-ids)
915
- metrics (update metrics :recycled + recycled)]
916
- (println (format "[%s] Cycle %d/%d rejected" id iter iterations))
917
- (emit-cycle-log! swarm-id id iter iter-start-ms new-session-id
918
- {:outcome :rejected :claimed-task-ids (vec (into claimed-ids mv-claimed-tasks))
919
- :recycled-tasks (when (pos? recycled) (vec mv-claimed-tasks))
920
- :review-rounds (or attempts 0)})
921
- (cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
922
- (recur (inc iter) completed 0 metrics nil nil #{} nil 0)))))
923
- (let [recycled (recycle-orphaned-tasks! id pre-current-ids)
924
- metrics (update metrics :recycled + recycled)]
925
- (println (format "[%s] Merge signaled but no changes, skipping" id))
926
- (emit-cycle-log! swarm-id id iter iter-start-ms new-session-id
927
- {:outcome :no-changes :claimed-task-ids (vec (into claimed-ids mv-claimed-tasks))
928
- :recycled-tasks (when (pos? recycled) (vec mv-claimed-tasks))})
929
- (cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
930
- (recur (inc iter) completed 0 metrics nil nil #{} nil 0)))
931
-
932
- ;; __DONE__ agent signaled it finished this cycle's work.
933
- ;; Always reset session and continue to next iteration.
934
- ;; Planners re-plan as tasks complete; executors pick up new tasks.
935
- done?
936
- (let [recycled (recycle-orphaned-tasks! id pre-current-ids)
937
- metrics (update metrics :recycled + recycled)]
938
- (println (format "[%s] __DONE__ signal, resetting session (iter %d/%d)" id iter iterations))
939
- (emit-cycle-log! swarm-id id iter iter-start-ms new-session-id
940
- {:outcome :executor-done :claimed-task-ids (vec (into claimed-ids mv-claimed-tasks))
941
- :recycled-tasks (when (pos? recycled) (vec mv-claimed-tasks))})
942
- (cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
943
- (recur (inc iter) completed 0 metrics nil nil #{} nil 0))
944
-
945
- ;; No signal agent still working, resume next iteration.
946
- ;; Track consecutive working resumes. After max-working-resumes,
947
- ;; inject a nudge prompt. If still no signal after nudge, kill session.
948
- :else
949
- (let [wr (inc working-resumes)
950
- max-wr (:max-working-resumes worker)]
951
- (cond
952
- ;; Already nudged last iteration, still no signal — stuck
953
- (> wr max-wr)
900
+ (recur (inc cycle) (inc completed-runs) 0 metrics nil nil #{} nil 0)))))
901
+ (let [{:keys [approved? attempts]} (review-loop! worker (:path wt-state) id cycle)
902
+ metrics (-> metrics
903
+ (update :review-rounds-total + (or attempts 0))
904
+ (cond-> (not approved?) (update :rejections inc)))]
905
+ (if approved?
906
+ (let [sync-status (sync-worktree-to-main! worker (:path wt-state) id)
907
+ all-claimed (into claimed-ids mv-claimed-tasks)]
908
+ (if (= :failed sync-status)
909
+ (do
910
+ (println (format "[%s] Sync to main failed after approval, skipping merge" id))
911
+ (emit-cycle-log! swarm-id id cycle current-run cycle-start-ms new-session-id
912
+ {:outcome :sync-failed
913
+ :claimed-task-ids (vec all-claimed)
914
+ :review-rounds (or attempts 0)})
915
+ (cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
916
+ (recur (inc cycle) (inc completed-runs) 0 metrics nil nil #{} nil 0))
917
+ (let [merged? (merge-to-main! (:path wt-state) (:branch wt-state) id project-root (or attempts 0) all-claimed)
918
+ metrics (if merged? (update metrics :merges inc) metrics)]
919
+ (println (format "[%s] Cycle %d/%d complete" id cycle cycle-cap))
920
+ (emit-cycle-log! swarm-id id cycle current-run cycle-start-ms new-session-id
921
+ {:outcome (if merged? :merged :merge-failed)
922
+ :claimed-task-ids (vec all-claimed)
923
+ :review-rounds (or attempts 0)})
924
+ (cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
925
+ (recur (inc cycle) (inc completed-runs) 0 metrics nil nil #{} nil 0))))
926
+ (let [recycled (recycle-orphaned-tasks! id pre-current-ids)
927
+ metrics (update metrics :recycled + recycled)]
928
+ (println (format "[%s] Cycle %d/%d rejected" id cycle cycle-cap))
929
+ (emit-cycle-log! swarm-id id cycle current-run cycle-start-ms new-session-id
930
+ {:outcome :rejected
931
+ :claimed-task-ids (vec (into claimed-ids mv-claimed-tasks))
932
+ :recycled-tasks (when (pos? recycled) (vec mv-claimed-tasks))
933
+ :review-rounds (or attempts 0)})
934
+ (cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
935
+ (recur (inc cycle) (inc completed-runs) 0 metrics nil nil #{} nil 0)))))
954
936
  (let [recycled (recycle-orphaned-tasks! id pre-current-ids)
955
937
  metrics (update metrics :recycled + recycled)]
956
- (println (format "[%s] Stuck after %d working resumes + nudge, resetting session" id wr))
957
- (emit-cycle-log! swarm-id id iter iter-start-ms new-session-id
958
- {:outcome :stuck :claimed-task-ids (vec (into claimed-ids mv-claimed-tasks))
959
- :recycled-tasks (when (pos? recycled) (vec mv-claimed-tasks))})
938
+ (println (format "[%s] Merge signaled but no changes, skipping" id))
939
+ (emit-cycle-log! swarm-id id cycle current-run cycle-start-ms new-session-id
940
+ {:outcome :no-changes
941
+ :claimed-task-ids (vec (into claimed-ids mv-claimed-tasks))
942
+ :recycled-tasks (when (pos? recycled) (vec mv-claimed-tasks))})
960
943
  (cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
961
- (recur (inc iter) completed 0 metrics nil nil #{} nil 0))
962
-
963
- ;; Hit the limit — nudge on next resume
964
- (= wr max-wr)
965
- (do
966
- (println (format "[%s] Working... %d/%d resumes, nudging agent to wrap up" id wr max-wr))
967
- (emit-cycle-log! swarm-id id iter iter-start-ms new-session-id
968
- {:outcome :working :claimed-task-ids (vec (into claimed-ids mv-claimed-tasks))})
969
- (recur (inc iter) completed 0 metrics new-session-id wt-state
970
- claimed-ids nudge-prompt wr))
971
-
972
- ;; Under limit normal resume
973
- :else
974
- (do
975
- (println (format "[%s] Working... (will resume, %d/%d)" id wr max-wr))
976
- (emit-cycle-log! swarm-id id iter iter-start-ms new-session-id
977
- {:outcome :working :claimed-task-ids (vec (into claimed-ids mv-claimed-tasks))})
978
- (recur (inc iter) completed 0 metrics new-session-id wt-state
979
- claimed-ids nil wr))))))))))))))
944
+ (recur (inc cycle) (inc completed-runs) 0 metrics nil nil #{} nil 0)))
945
+
946
+ done?
947
+ (let [recycled (recycle-orphaned-tasks! id pre-current-ids)
948
+ metrics (update metrics :recycled + recycled)]
949
+ (println (format "[%s] __DONE__ signal, resetting session (cycle %d/%d)" id cycle cycle-cap))
950
+ (emit-cycle-log! swarm-id id cycle current-run cycle-start-ms new-session-id
951
+ {:outcome :executor-done
952
+ :claimed-task-ids (vec (into claimed-ids mv-claimed-tasks))
953
+ :recycled-tasks (when (pos? recycled) (vec mv-claimed-tasks))})
954
+ (cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
955
+ (recur (inc cycle) completed-runs 0 metrics nil nil #{} nil 0))
956
+
957
+ :else
958
+ (let [wr (inc working-resumes)
959
+ max-wr (:max-working-resumes worker)]
960
+ (cond
961
+ (> wr max-wr)
962
+ (let [recycled (recycle-orphaned-tasks! id pre-current-ids)
963
+ metrics (update metrics :recycled + recycled)]
964
+ (println (format "[%s] Stuck after %d working resumes + nudge, resetting session" id wr))
965
+ (emit-cycle-log! swarm-id id cycle current-run cycle-start-ms new-session-id
966
+ {:outcome :stuck
967
+ :claimed-task-ids (vec (into claimed-ids mv-claimed-tasks))
968
+ :recycled-tasks (when (pos? recycled) (vec mv-claimed-tasks))})
969
+ (cleanup-worktree! project-root (:dir wt-state) (:branch wt-state))
970
+ (recur (inc cycle) (inc completed-runs) 0 metrics nil nil #{} nil 0))
971
+
972
+ (= wr max-wr)
973
+ (do
974
+ (println (format "[%s] Working... %d/%d resumes, nudging agent to wrap up" id wr max-wr))
975
+ (emit-cycle-log! swarm-id id cycle current-run cycle-start-ms new-session-id
976
+ {:outcome :working
977
+ :claimed-task-ids (vec (into claimed-ids mv-claimed-tasks))})
978
+ (recur (inc cycle) completed-runs 0 metrics new-session-id wt-state
979
+ claimed-ids nudge-prompt wr))
980
+
981
+ :else
982
+ (do
983
+ (println (format "[%s] Working... (will resume, %d/%d)" id wr max-wr))
984
+ (emit-cycle-log! swarm-id id cycle current-run cycle-start-ms new-session-id
985
+ {:outcome :working
986
+ :claimed-task-ids (vec (into claimed-ids mv-claimed-tasks))})
987
+ (recur (inc cycle) completed-runs 0 metrics new-session-id wt-state
988
+ claimed-ids nil wr))))))))))))))
980
989
 
981
990
  ;; =============================================================================
982
991
  ;; Multi-Worker Execution