@nbardy/oompa 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,8 +2,8 @@
2
2
  "Command-line interface for AgentNet orchestrator.
3
3
 
4
4
  Usage:
5
- ./swarm.bb run # Run all tasks once
6
- ./swarm.bb run --workers 4 # With 4 parallel workers
5
+ ./swarm.bb run # Run swarm from config (oompa.json)
6
+ ./swarm.bb run --detach --config oompa.json # Run in background with startup validation
7
7
  ./swarm.bb loop 20 --harness claude # 20 iterations with Claude
8
8
  ./swarm.bb loop --workers claude:5 opencode:2 --iterations 20 # Mixed harnesses
9
9
  ./swarm.bb swarm oompa.json # Multi-model from config
@@ -46,7 +46,7 @@
46
46
  (let [[harness count-str] (str/split s #":" 2)
47
47
  h (keyword harness)
48
48
  cnt (parse-int count-str 0)]
49
- (when-not (harnesses h)
49
+ (when-not (harness/valid-harness? h)
50
50
  (throw (ex-info (str "Unknown harness in worker spec: " s ". Known: " (str/join ", " (map name (sort harnesses)))) {})))
51
51
  (when (zero? cnt)
52
52
  (throw (ex-info (str "Invalid count in worker spec: " s ". Use format 'harness:count'") {})))
@@ -75,6 +75,10 @@
75
75
  :harness :codex
76
76
  :model nil
77
77
  :dry-run false
78
+ :detach false
79
+ :all false
80
+ :config-file nil
81
+ :startup-timeout nil
78
82
  :iterations 1
79
83
  :worker-specs nil}
80
84
  remaining args]
@@ -97,7 +101,7 @@
97
101
 
98
102
  (= arg "--harness")
99
103
  (let [h (keyword (second remaining))]
100
- (when-not (harnesses h)
104
+ (when-not (harness/valid-harness? h)
101
105
  (throw (ex-info (str "Unknown harness: " (second remaining) ". Known: " (str/join ", " (map name (sort harnesses)))) {})))
102
106
  (recur (assoc opts :harness h)
103
107
  (nnext remaining)))
@@ -106,6 +110,28 @@
106
110
  (recur (assoc opts :model (second remaining))
107
111
  (nnext remaining))
108
112
 
113
+ (= arg "--config")
114
+ (let [config-file (second remaining)]
115
+ (when (str/blank? config-file)
116
+ (throw (ex-info "--config requires a path" {:arg arg})))
117
+ (recur (assoc opts :config-file config-file)
118
+ (nnext remaining)))
119
+
120
+ (or (= arg "--detach") (= arg "--dettach"))
121
+ (recur (assoc opts :detach true)
122
+ (next remaining))
123
+
124
+ (= arg "--all")
125
+ (recur (assoc opts :all true)
126
+ (next remaining))
127
+
128
+ (= arg "--startup-timeout")
129
+ (let [seconds (parse-int (second remaining) nil)]
130
+ (when-not (and (number? seconds) (pos? seconds))
131
+ (throw (ex-info "--startup-timeout requires a positive integer (seconds)" {:arg arg})))
132
+ (recur (assoc opts :startup-timeout seconds)
133
+ (nnext remaining)))
134
+
109
135
  ;; Legacy flags (still supported)
110
136
  (= arg "--claude")
111
137
  (recur (assoc opts :harness :claude)
@@ -137,71 +163,169 @@
137
163
  ;; Commands
138
164
  ;; =============================================================================
139
165
 
140
- (declare cmd-swarm parse-model-string)
166
+ (declare cmd-swarm parse-model-string pid-alive?)
141
167
 
142
168
  (defn- check-git-clean!
143
- "Abort if git working tree is dirty. Dirty index causes merge conflicts
144
- and wasted worker iterations."
169
+ "Warn if git working tree is dirty. Dirty index may cause merge conflicts."
145
170
  []
146
171
  (let [result (process/sh ["git" "status" "--porcelain"]
147
172
  {:out :string :err :string})
148
173
  output (str/trim (:out result))]
149
174
  (when (and (zero? (:exit result)) (not (str/blank? output)))
150
- (println "ERROR: Git working tree is dirty. Resolve before running swarm.")
151
- (println)
152
- (println output)
153
- (println)
154
- (println "Run 'git stash' or 'git commit' first.")
155
- (System/exit 1))))
175
+ (println "WARNING: Git working tree is dirty. You may experience merge conflicts.")
176
+ (println output))))
156
177
 
157
- (defn- check-stale-worktrees!
158
- "Abort if stale oompa worktrees or branches exist from a prior run.
159
- Corrupted .git/worktrees/ entries poison git worktree add for ALL workers,
160
- not just the worker whose entry is stale. (See swarm af32b180 — kimi-k2.5
161
- w9 went 20/20 doing nothing because w10's corrupt commondir blocked it.)"
162
- []
163
- ;; Prune orphaned metadata first — cleans entries whose directories are gone
178
+ (defn- dirty-worktree?
179
+ "Returns true if the git worktree at path has uncommitted changes."
180
+ [path]
181
+ (let [{:keys [exit out]} (process/sh ["git" "-C" path "status" "--porcelain"]
182
+ {:out :string :err :string})]
183
+ (and (zero? exit) (not (str/blank? out)))))
184
+
185
+ (defn- worktree-branch-name
186
+ "Returns the current branch name for the worktree at path, or nil on failure."
187
+ [path]
188
+ (let [{:keys [exit out]} (process/sh ["git" "-C" path "rev-parse" "--abbrev-ref" "HEAD"]
189
+ {:out :string :err :string})]
190
+ (when (zero? exit) (str/trim out))))
191
+
192
+ (defn- remove-stale-worktree!
193
+ "Remove a stale worktree directory and delete its branch."
194
+ [path branch]
195
+ (process/sh ["git" "worktree" "remove" "--force" path] {:out :string :err :string})
196
+ (when (and branch (not (str/blank? branch)) (not= branch "HEAD"))
197
+ (process/sh ["git" "branch" "-D" branch] {:out :string :err :string})))
198
+
199
+ (defn- run-stale-review!
200
+ "Invoke the reviewer model on partial worktree changes.
201
+ Tries each reviewer in the fallback chain until one returns a verdict.
202
+ Returns :merge to merge the branch into main, :discard to throw it away."
203
+ [reviewer-configs worktree-path branch]
204
+ (let [diff-out (:out (process/sh ["git" "-C" worktree-path "diff" "HEAD"]
205
+ {:out :string :err :string}))
206
+ diff (if (> (count diff-out) 8000)
207
+ (str (subs diff-out 0 8000) "\n...[diff truncated at 8000 chars]")
208
+ diff-out)
209
+ status-out (:out (process/sh ["git" "-C" worktree-path "status" "--short"]
210
+ {:out :string}))
211
+ prompt (str "You are reviewing partial/incomplete changes from an interrupted swarm run.\n\n"
212
+ "Branch: " branch "\n"
213
+ "Status:\n" status-out "\n\n"
214
+ "Diff:\n```\n" diff "\n```\n\n"
215
+ "Should these changes be merged into main or discarded?\n"
216
+ "MERGE if: changes are correct, complete, or valuable enough to keep.\n"
217
+ "DISCARD if: changes are broken, trivial, or not worth merging.\n\n"
218
+ "Your verdict MUST appear on its own line, exactly one of:\n"
219
+ "VERDICT: MERGE\n"
220
+ "VERDICT: DISCARD\n\n"
221
+ "Then briefly explain why.\n")
222
+ result (reduce (fn [_ {:keys [harness model]}]
223
+ (try
224
+ (let [cmd (harness/build-cmd harness {:model model :prompt prompt})
225
+ res (process/sh cmd
226
+ {:in (harness/process-stdin harness prompt)
227
+ :out :string :err :string})
228
+ output (:out res)
229
+ has-verdict? (or (re-find #"VERDICT:\s*MERGE" output)
230
+ (re-find #"VERDICT:\s*DISCARD" output))]
231
+ (if (and (zero? (:exit res)) has-verdict?)
232
+ (reduced res)
233
+ res))
234
+ (catch Exception e
235
+ {:exit -1 :out "" :err (.getMessage e)})))
236
+ {:exit -1 :out ""}
237
+ reviewer-configs)
238
+ output (:out result)]
239
+ (cond
240
+ (re-find #"VERDICT:\s*MERGE" output) :merge
241
+ (re-find #"VERDICT:\s*DISCARD" output) :discard
242
+ :else :discard)))
243
+
244
+ (defn- handle-stale-worktrees!
245
+ "Non-destructive startup check for existing oompa worktrees.
246
+
247
+ - Always runs `git worktree prune` to clear orphaned metadata.
248
+ - Never auto-removes/merges/discards worktrees at startup.
249
+ - Prints a warning summary so concurrent swarms are not disrupted.
250
+
251
+ This avoids clobbering active work from another swarm in the same repo."
252
+ [_reviewer-configs]
253
+ ;; Step 1: prune orphaned git metadata first
164
254
  (let [prune-result (process/sh ["git" "worktree" "prune"] {:out :string :err :string})]
165
255
  (when-not (zero? (:exit prune-result))
166
256
  (println "WARNING: git worktree prune failed:")
167
257
  (println (:err prune-result))))
168
- (let [;; Find .ww* directories (oompa per-iteration worktree naming convention)
169
- ls-result (process/sh ["find" "." "-maxdepth" "1" "-type" "d" "-name" ".ww*"]
170
- {:out :string})
171
- stale-dirs (when (zero? (:exit ls-result))
172
- (->> (str/split-lines (:out ls-result))
173
- (remove str/blank?)))
174
- ;; Find oompa/* branches
175
- br-result (process/sh ["git" "branch" "--list" "oompa/*"]
258
+
259
+ ;; Step 2: discover existing oompa worktree dirs and oompa/* branches
260
+ (let [ls-result (process/sh ["find" "." "-maxdepth" "1" "-type" "d" "-name" ".w*-i*"]
261
+ {:out :string})
262
+ stale-dirs (when (zero? (:exit ls-result))
263
+ (->> (str/split-lines (:out ls-result))
264
+ (remove str/blank?)))
265
+ br-result (process/sh ["git" "branch" "--list" "oompa/*"] {:out :string})
266
+ all-oompa-branches (when (zero? (:exit br-result))
267
+ (->> (str/split-lines (:out br-result))
268
+ (map str/trim)
269
+ (remove str/blank?)))]
270
+
271
+ (when (or (seq stale-dirs) (seq all-oompa-branches))
272
+ ;; Step 3: classify for warning output only (no mutation)
273
+ (let [classified (mapv (fn [dir]
274
+ {:dir dir
275
+ :branch (worktree-branch-name dir)
276
+ :dirty? (dirty-worktree? dir)})
277
+ stale-dirs)
278
+ clean (filter (complement :dirty?) classified)
279
+ dirty (filter :dirty? classified)
280
+ dir-branches (set (keep :branch classified))
281
+ orphan-branches (remove #(contains? dir-branches %) all-oompa-branches)]
282
+ (println)
283
+ (println "WARNING: Existing oompa worktrees/branches detected; leaving them untouched.")
284
+ (println (format " Worktrees: %d (%d dirty, %d clean)"
285
+ (count classified) (count dirty) (count clean)))
286
+ (println (format " Orphan branches: %d" (count orphan-branches)))
287
+ (when (seq dirty)
288
+ (println " Dirty worktrees:")
289
+ (doseq [{:keys [dir branch]} dirty]
290
+ (println (format " %s (branch: %s)" dir (or branch "unknown")))))
291
+ (println " Run `oompa cleanup` manually when you want to reclaim them.")))))
292
+
293
+ (defn- cleanup-iteration-worktrees!
294
+ "Remove swarm iteration worktree dirs (.w*-i*) and oompa/* branches.
295
+ Returns {:dirs-removed n :branches-removed n}."
296
+ []
297
+ (let [ls-result (process/sh ["find" "." "-maxdepth" "1" "-type" "d" "-name" ".w*-i*"]
176
298
  {:out :string})
177
- stale-branches (when (zero? (:exit br-result))
178
- (->> (str/split-lines (:out br-result))
179
- (map str/trim)
180
- (remove str/blank?)))]
181
- (when (or (seq stale-dirs) (seq stale-branches))
182
- (println "ERROR: Stale oompa worktrees detected from a prior run.")
183
- (println " Corrupt worktree metadata will cause worker failures.")
184
- (println)
185
- (when (seq stale-dirs)
186
- (println (format " Stale directories (%d):" (count stale-dirs)))
187
- (doseq [d stale-dirs] (println (str " " d))))
188
- (when (seq stale-branches)
189
- (println (format " Stale branches (%d):" (count stale-branches)))
190
- (doseq [b stale-branches] (println (str " " b))))
191
- (println)
192
- (println "Clean up with:")
193
- (println " git worktree prune; for d in .ww*/; do git worktree remove --force \"$d\" 2>/dev/null; done; git branch --list 'oompa/*' | xargs git branch -D 2>/dev/null; rm -rf .ww*")
194
- (println)
195
- (System/exit 1))))
299
+ dirs (if (zero? (:exit ls-result))
300
+ (->> (str/split-lines (:out ls-result))
301
+ (remove str/blank?))
302
+ [])
303
+ _ (doseq [dir dirs]
304
+ (remove-stale-worktree! dir (worktree-branch-name dir)))
305
+ br-result (process/sh ["git" "branch" "--list" "oompa/*"] {:out :string})
306
+ branches (if (zero? (:exit br-result))
307
+ (->> (str/split-lines (:out br-result))
308
+ (map str/trim)
309
+ (remove str/blank?))
310
+ [])
311
+ ;; Branches may already be deleted by remove-stale-worktree!, so ignore failures.
312
+ _ (doseq [b branches]
313
+ (process/sh ["git" "branch" "-D" b] {:out :string :err :string}))]
314
+ {:dirs-removed (count dirs)
315
+ :branches-removed (count branches)}))
316
+
196
317
 
197
318
  (defn- probe-model
198
319
  "Send 'say ok' to a model via its harness CLI. Returns true if model responds.
199
- Uses harness/build-probe-cmd for the command, /dev/null stdin to prevent hang."
320
+ Uses harness/build-probe-cmd for the command.
321
+ For stdin-based harnesses (e.g. claude), delivers the probe prompt via stdin.
322
+ For close-stdin harnesses, uses /dev/null to prevent hang."
200
323
  [harness-kw model]
201
324
  (try
202
325
  (let [cmd (harness/build-probe-cmd harness-kw model)
203
- null-in (io/input-stream (io/file "/dev/null"))
204
- proc (process/process cmd {:out :string :err :string :in null-in})
326
+ probe-prompt "[_HIDE_TEST_] say ok"
327
+ stdin-val (harness/process-stdin harness-kw probe-prompt)
328
+ proc (process/process cmd {:out :string :err :string :in stdin-val})
205
329
  result (deref proc 30000 :timeout)]
206
330
  (if (= result :timeout)
207
331
  (do (.destroyForcibly (:proc proc)) false)
@@ -210,14 +334,14 @@
210
334
 
211
335
  (defn- validate-models!
212
336
  "Probe each unique harness:model pair. Prints results and exits if any fail."
213
- [worker-configs review-model]
337
+ [worker-configs review-models]
214
338
  (let [;; Deduplicate by harness:model only (ignore reasoning level)
215
- models (cond-> (->> worker-configs
339
+ models (into (->> worker-configs
216
340
  (map (fn [wc]
217
341
  (let [{:keys [harness model]} (parse-model-string (:model wc))]
218
342
  {:harness harness :model model})))
219
343
  set)
220
- review-model (conj (select-keys review-model [:harness :model])))
344
+ (map #(select-keys % [:harness :model]) review-models))
221
345
  _ (println "Validating models...")
222
346
  results (pmap (fn [{:keys [harness model]}]
223
347
  (let [ok (probe-model harness model)]
@@ -237,36 +361,261 @@
237
361
  (System/exit 1))
238
362
  (println)))
239
363
 
240
- (defn cmd-run
241
- "Run orchestrator — uses oompa.json if present, otherwise simple mode"
364
+ (def ^:private default-detach-startup-timeout 20)
365
+
366
+ (defn- run-id []
367
+ (subs (str (java.util.UUID/randomUUID)) 0 8))
368
+
369
+ (defn- run-ts []
370
+ (.format (java.time.format.DateTimeFormatter/ofPattern "yyyyMMdd-HHmmss")
371
+ (java.time.LocalDateTime/now)))
372
+
373
+ (defn- default-config-file
374
+ []
375
+ (cond
376
+ (.exists (io/file "oompa.json")) "oompa.json"
377
+ (.exists (io/file "oompa/oompa.json")) "oompa/oompa.json"
378
+ :else nil))
379
+
380
+ (defn- resolve-config-file
242
381
  [opts args]
243
- (if (.exists (io/file "oompa.json"))
244
- (cmd-swarm opts (or (seq args) ["oompa.json"]))
245
- (let [swarm-id (make-swarm-id)]
246
- (if-let [specs (:worker-specs opts)]
247
- ;; Mixed worker specs: --workers claude:5 opencode:2
248
- (let [workers (mapcat
249
- (fn [spec]
250
- (let [{:keys [harness count]} spec]
251
- (map-indexed
252
- (fn [idx _]
253
- (worker/create-worker
254
- {:id (format "%s-%d" (name harness) idx)
255
- :swarm-id swarm-id
256
- :harness harness
257
- :model (:model opts)
258
- :iterations 1}))
259
- (range count))))
260
- specs)]
261
- (println (format "Running once with mixed workers (swarm %s):" swarm-id))
262
- (doseq [spec specs]
263
- (println (format " %dx %s" (:count spec) (name (:harness spec)))))
382
+ (let [candidate (or (:config-file opts)
383
+ (first args)
384
+ (default-config-file))]
385
+ (when candidate
386
+ (.getCanonicalPath (io/file candidate)))))
387
+
388
+ (defn- prepare-log-file!
389
+ "Create oompa/logs and return absolute log path."
390
+ [rid]
391
+ (let [dir (if (.exists (io/file "oompa"))
392
+ (io/file "oompa" "logs")
393
+ (io/file "runs" "logs"))]
394
+ (.mkdirs dir)
395
+ (.getCanonicalPath (io/file dir (str (run-ts) "_" rid ".log")))))
396
+
397
+ (defn- read-file-safe
398
+ [path]
399
+ (try
400
+ (if (.exists (io/file path))
401
+ (slurp path)
402
+ "")
403
+ (catch Exception _
404
+ "")))
405
+
406
+ (defn- tail-lines
407
+ [text n]
408
+ (->> (str/split-lines (or text ""))
409
+ (take-last n)
410
+ (str/join "\n")))
411
+
412
+ (defn- extract-swarm-id
413
+ [text]
414
+ (some->> text
415
+ (re-find #"Swarm ID:\s*([0-9a-f]{8})")
416
+ second))
417
+
418
+ (defn- startup-diagnostic-lines
419
+ [text]
420
+ (->> (str/split-lines (or text ""))
421
+ (filter #(re-find #"ERROR:|FAIL|WARNING:" %))
422
+ (take-last 20)))
423
+
424
+ (defn- print-preflight-warnings!
425
+ []
426
+ (let [agent-cli? (zero? (:exit (process/sh ["which" "agent-cli"]
427
+ {:out :string :err :string})))]
428
+ (when-not agent-cli?
429
+ (println "WARNING: 'agent-cli' is not on PATH.")
430
+ (println " Model validation may report false model-access failures.")))
431
+ (let [dirty (process/sh ["git" "status" "--porcelain"]
432
+ {:out :string :err :string})
433
+ lines (->> (:out dirty)
434
+ str/split-lines
435
+ (remove str/blank?))]
436
+ (when (seq lines)
437
+ (println (format "WARNING: Git working tree is dirty (%d changed paths)." (count lines)))
438
+ (println " Swarm startup may fail until changes are committed/stashed.")
439
+ (doseq [line (take 20 lines)]
440
+ (println line))
441
+ (when (> (count lines) 20)
442
+ (println (format "... (%d total changed paths)" (count lines)))))))
443
+
444
+ (defn- runtime-classpath-entry
445
+ "Best-effort classpath root for agentnet sources."
446
+ []
447
+ (or
448
+ (some-> (System/getenv "OOMPA_PACKAGE_ROOT")
449
+ (io/file "agentnet" "src")
450
+ .getCanonicalPath)
451
+ (->> (str/split (or (System/getProperty "java.class.path") "")
452
+ (re-pattern (java.io.File/pathSeparator)))
453
+ (map str/trim)
454
+ (remove str/blank?)
455
+ (map io/file)
456
+ (filter #(.exists %))
457
+ (map #(.getCanonicalPath %))
458
+ (some #(when (str/ends-with? % (str "agentnet" java.io.File/separator "src"))
459
+ %)))
460
+ (.getCanonicalPath (io/file "agentnet" "src"))))
461
+
462
+ (defn- run-classpath
463
+ []
464
+ (runtime-classpath-entry))
465
+
466
+ (defn- run-script-path
467
+ []
468
+ (if-let [pkg-root (System/getenv "OOMPA_PACKAGE_ROOT")]
469
+ (.getCanonicalPath (io/file pkg-root "swarm.bb"))
470
+ (let [cp (io/file (runtime-classpath-entry))
471
+ ;; cp = <repo>/agentnet/src -> <repo>/swarm.bb
472
+ repo-root (some-> cp .getParentFile .getParentFile)
473
+ candidate (when repo-root (io/file repo-root "swarm.bb"))]
474
+ (if (and candidate (.exists candidate))
475
+ (.getCanonicalPath candidate)
476
+ (.getCanonicalPath (io/file "swarm.bb"))))))
477
+
478
+ (defn- detached-cmd
479
+ [opts config-file]
480
+ (cond-> ["nohup" "bb" "--classpath" (run-classpath) (run-script-path) "swarm"]
481
+ (:dry-run opts) (conj "--dry-run")
482
+ true (conj config-file)))
483
+
484
+ (defn- spawn-detached!
485
+ [cmd log-file]
486
+ (let [log (io/file log-file)
487
+ pb (doto (ProcessBuilder. ^java.util.List cmd)
488
+ (.directory (io/file "."))
489
+ (.redirectInput (java.lang.ProcessBuilder$Redirect/from (io/file "/dev/null")))
490
+ (.redirectOutput (java.lang.ProcessBuilder$Redirect/appendTo log))
491
+ (.redirectError (java.lang.ProcessBuilder$Redirect/appendTo log)))
492
+ proc (.start pb)
493
+ pid (.pid proc)]
494
+ ;; Give spawn a short window before validation checks liveness.
495
+ (Thread/sleep 100)
496
+ pid))
497
+
498
+ (defn- pid-alive?
499
+ [pid]
500
+ (zero? (:exit (process/sh ["kill" "-0" (str pid)]
501
+ {:out :string :err :string}))))
502
+
503
+ (defn- wait-for-startup!
504
+ [pid log-file timeout-sec]
505
+ (loop [waited 0]
506
+ (let [content (read-file-safe log-file)
507
+ started? (str/includes? content "Started event written to runs/")
508
+ alive? (pid-alive? pid)]
509
+ (cond
510
+ started?
511
+ {:status :started
512
+ :content content
513
+ :swarm-id (extract-swarm-id content)}
514
+
515
+ (not alive?)
516
+ {:status :failed
517
+ :content content}
518
+
519
+ (>= waited timeout-sec)
520
+ {:status :timeout
521
+ :content content}
522
+
523
+ :else
524
+ (do
525
+ (Thread/sleep 1000)
526
+ (recur (inc waited)))))))
527
+
528
+ (defn- cmd-run-detached
529
+ [opts config-file]
530
+ (print-preflight-warnings!)
531
+ (when-not (.exists (io/file config-file))
532
+ (println (format "ERROR: Config file not found: %s" (.getCanonicalPath (io/file config-file))))
533
+ (println (format " Working directory: %s" (.getCanonicalPath (io/file "."))))
534
+ (println)
535
+ (println "Tip: paths are relative to the working directory. Did you mean:")
536
+ (println (format " oompa run --config oompa/%s" (.getName (io/file config-file))))
537
+ (System/exit 1))
538
+ (let [timeout-sec (or (:startup-timeout opts)
539
+ (parse-int (System/getenv "OOMPA_DETACH_STARTUP_TIMEOUT")
540
+ default-detach-startup-timeout))
541
+ rid (run-id)
542
+ log-file (prepare-log-file! rid)
543
+ cmd (detached-cmd opts config-file)
544
+ pid (spawn-detached! cmd log-file)]
545
+ (println (format "Config: %s" config-file))
546
+ (when (:dry-run opts)
547
+ (println "Merge mode: dry-run"))
548
+ (let [{:keys [status content swarm-id]} (wait-for-startup! pid log-file timeout-sec)]
549
+ (case status
550
+ :failed
551
+ (do
264
552
  (println)
265
- (worker/run-workers! workers))
266
- ;; Simple mode retired — use oompa.json or --workers harness:count
553
+ (println "ERROR: Detached swarm exited during startup validation.")
554
+ (println "Startup log excerpt:")
555
+ (println (tail-lines content 120))
556
+ (System/exit 1))
557
+
558
+ :timeout
267
559
  (do
268
- (println "Simple mode is no longer supported. Use oompa.json or --workers harness:count.")
269
- (System/exit 1))))))
560
+ (println)
561
+ (println (format "WARNING: Detached swarm still initializing after %ss." timeout-sec))
562
+ (println "Recent startup log lines:")
563
+ (println (tail-lines content 40)))
564
+
565
+ nil)
566
+ (let [diag (startup-diagnostic-lines content)]
567
+ (when (seq diag)
568
+ (println)
569
+ (println "Startup diagnostics:")
570
+ (doseq [line diag]
571
+ (println line))))
572
+ (println)
573
+ (println " ┌──────────────────────────────────────────────────────────────┐")
574
+ (println " │ OOMPA SWARM RUN (DETACHED) │")
575
+ (println (format " │ Run id: %-46s│" rid))
576
+ (println (format " │ PID: %-46s│" pid))
577
+ (println (format " │ Log file: %-46s│" log-file))
578
+ (println (format " │ Swarm ID: %-46s│" (or swarm-id "(pending)")))
579
+ (println " └──────────────────────────────────────────────────────────────┘")
580
+ (println))))
581
+
582
+ (defn- cmd-run-legacy
583
+ "Run orchestrator once from worker specs (legacy mode)."
584
+ [opts args]
585
+ (let [swarm-id (make-swarm-id)]
586
+ (if-let [specs (:worker-specs opts)]
587
+ ;; Mixed worker specs: --workers claude:5 opencode:2
588
+ (let [workers (mapcat
589
+ (fn [spec]
590
+ (let [{:keys [harness count]} spec]
591
+ (map-indexed
592
+ (fn [idx _]
593
+ (worker/create-worker
594
+ {:id (format "%s-%d" (name harness) idx)
595
+ :swarm-id swarm-id
596
+ :harness harness
597
+ :model (:model opts)
598
+ :iterations 1}))
599
+ (range count))))
600
+ specs)]
601
+ (println (format "Running once with mixed workers (swarm %s):" swarm-id))
602
+ (doseq [spec specs]
603
+ (println (format " %dx %s" (:count spec) (name (:harness spec)))))
604
+ (println)
605
+ (worker/run-workers! workers))
606
+ ;; Simple mode retired — use oompa.json or --workers harness:count
607
+ (do
608
+ (println "Simple mode is no longer supported. Use oompa.json or --workers harness:count.")
609
+ (System/exit 1)))))
610
+
611
+ (defn cmd-run
612
+ "Run swarm from config. Use --detach for background mode."
613
+ [opts args]
614
+ (if-let [config-file (resolve-config-file opts args)]
615
+ (if (:detach opts)
616
+ (cmd-run-detached opts config-file)
617
+ (cmd-swarm opts [config-file]))
618
+ (cmd-run-legacy opts args)))
270
619
 
271
620
  (defn cmd-loop
272
621
  "Run orchestrator N times"
@@ -318,46 +667,72 @@
318
667
  (orchestrator/run-once! opts))))
319
668
 
320
669
  (defn cmd-status
321
- "Show status of last run — reads event-sourced runs/{swarm-id}/ data."
670
+ "Show running swarms."
322
671
  [opts args]
323
672
  (let [run-ids (runs/list-runs)]
324
673
  (if (seq run-ids)
325
- (let [swarm-id (or (first args) (first run-ids))
326
- started (runs/read-started swarm-id)
327
- stopped (runs/read-stopped swarm-id)
328
- cycles (runs/list-cycles swarm-id)
329
- reviews (runs/list-reviews swarm-id)]
330
- (println (format "Swarm: %s" swarm-id))
331
- (when started
332
- (println (format " Started: %s" (:started-at started)))
333
- (println (format " PID: %s" (or (:pid started) "N/A")))
334
- (println (format " Config: %s" (or (:config-file started) "N/A")))
335
- (println (format " Workers: %d" (count (:workers started)))))
336
- (println)
337
- (if stopped
338
- (println (format "Stopped: %s (reason: %s%s)"
339
- (:stopped-at stopped)
340
- (:reason stopped)
341
- (if (:error stopped)
342
- (str ", error: " (:error stopped))
343
- "")))
344
- (println " (still runningno stopped event yet)"))
345
- (when (seq cycles)
346
- (println)
347
- (println (format "Cycles: %d total" (count cycles)))
348
- (doseq [c cycles]
349
- (println (format " %s-c%d: %s (%dms, claimed: %s)"
350
- (:worker-id c) (:cycle c)
351
- (:outcome c)
352
- (or (:duration-ms c) 0)
353
- (str/join ", " (or (:claimed-task-ids c) []))))))
354
- (when (seq reviews)
355
- (println)
356
- (println (format "Reviews: %d total" (count reviews)))
357
- (doseq [r reviews]
358
- (println (format " %s-c%d-r%d: %s"
359
- (:worker-id r) (:cycle r) (:round r)
360
- (:verdict r))))))
674
+ (let [running (for [id run-ids
675
+ :let [started (runs/read-started id)
676
+ stopped (runs/read-stopped id)
677
+ pid (:pid started)]
678
+ :when (and started (not stopped) pid (pid-alive? pid))]
679
+ {:id id
680
+ :pid pid
681
+ :workers (count (:workers started))
682
+ :work-count (count (runs/list-cycles id))})]
683
+ (if (seq running)
684
+ (do
685
+ (println (format "Running Swarms: %d" (count running)))
686
+ (doseq [r running]
687
+ (println (format " Swarm: %s | PID: %s | Workers: %d | Work Count: %d"
688
+ (:id r) (:pid r) (:workers r) (:work-count r)))))
689
+ (println "No running swarms.")))
690
+ (println "No swarms found."))))
691
+
692
+ (defn cmd-info
693
+ "Show detailed information of a swarm run reads event-sourced runs/{swarm-id}/ data."
694
+ [opts args]
695
+ (let [run-ids (runs/list-runs)]
696
+ (if (seq run-ids)
697
+ (let [target-ids (if (seq args) [(first args)] run-ids)]
698
+ (doseq [swarm-id target-ids]
699
+ (let [started (runs/read-started swarm-id)
700
+ stopped (runs/read-stopped swarm-id)
701
+ cycles (runs/list-cycles swarm-id)
702
+ reviews (runs/list-reviews swarm-id)]
703
+ (println "--------------------------------------------------")
704
+ (println (format "Swarm: %s" swarm-id))
705
+ (when started
706
+ (println (format " Started: %s" (:started-at started)))
707
+ (println (format " PID: %s" (or (:pid started) "N/A")))
708
+ (println (format " Config: %s" (or (:config-file started) "N/A")))
709
+ (println (format " Workers: %d" (count (:workers started)))))
710
+ (println)
711
+ (if stopped
712
+ (println (format "Stopped: %s (reason: %s%s)"
713
+ (:stopped-at stopped)
714
+ (:reason stopped)
715
+ (if (:error stopped)
716
+ (str ", error: " (:error stopped))
717
+ "")))
718
+ (println " (still running — no stopped event yet)"))
719
+ (when (seq cycles)
720
+ (println)
721
+ (println (format "Cycles: %d total" (count cycles)))
722
+ (doseq [c cycles]
723
+ (println (format " %s-c%d: %s (%dms, claimed: %s)"
724
+ (:worker-id c) (:cycle c)
725
+ (:outcome c)
726
+ (or (:duration-ms c) 0)
727
+ (str/join ", " (or (:claimed-task-ids c) []))))))
728
+ (when (seq reviews)
729
+ (println)
730
+ (println (format "Reviews: %d total" (count reviews)))
731
+ (doseq [r reviews]
732
+ (println (format " %s-c%d-r%d: %s"
733
+ (:worker-id r) (:cycle r) (:round r)
734
+ (:verdict r)))))
735
+ (println))))
361
736
  ;; Fall back to legacy JSONL format
362
737
  (let [runs-dir (io/file "runs")
363
738
  files (when (.exists runs-dir)
@@ -377,6 +752,166 @@
377
752
  (println (format "Total: %d tasks" (count entries))))))
378
753
  (println "No runs found."))))))
379
754
 
755
+ (def ^:private error-outcomes
756
+ #{"error" "merge-failed" "rejected" "stuck" "needs-followup"})
757
+
758
+ (def ^:private terminal-run-outcomes
759
+ #{"merged" "rejected" "error" "merge-failed" "sync-failed" "stuck" "no-changes" "needs-followup"})
760
+
761
+ (defn- run-state
762
+ "Derive run lifecycle state from started/stopped events + PID liveness."
763
+ [started stopped]
764
+ (cond
765
+ (nil? started) "missing-started"
766
+ stopped (str "stopped/" (:reason stopped))
767
+ (pid-alive? (:pid started)) "running"
768
+ :else "stale"))
769
+
770
+ (defn- latest-cycles-by-worker
771
+ "Return map of worker-id -> latest cycle entry."
772
+ [cycles]
773
+ (reduce (fn [acc c]
774
+ (let [wid (:worker-id c)
775
+ prev (get acc wid)]
776
+ (if (or (nil? prev)
777
+ (> (or (:cycle c) 0) (or (:cycle prev) 0)))
778
+ (assoc acc wid c)
779
+ acc)))
780
+ {}
781
+ cycles))
782
+
783
+ (defn- worker-runtime
784
+ "Best-effort worker runtime classification for view output."
785
+ [worker latest-cycle worker-cycles run-state*]
786
+ (let [run-max (or (:runs worker) (:iterations worker) 0)
787
+ runs-done (count (filter #(terminal-run-outcomes (:outcome %)) worker-cycles))
788
+ outcome (or (:outcome latest-cycle) "-")]
789
+ (cond
790
+ (>= runs-done run-max) "completed"
791
+ (str/starts-with? run-state* "stopped/") "stopped"
792
+ (= run-state* "stale") "stale"
793
+ (nil? latest-cycle) "starting"
794
+ (= outcome "working") "working"
795
+ (= outcome "executor-done") "idle"
796
+ :else outcome)))
797
+
798
+ (defn- model-label
799
+ [{:keys [harness model reasoning]}]
800
+ (str harness ":" model (when reasoning (str ":" reasoning))))
801
+
802
+ (defn- run-metrics
803
+ "Summarize cycle metrics for a run."
804
+ [cycles]
805
+ (let [merged (count (filter #(= "merged" (:outcome %)) cycles))
806
+ failed (count (filter #(error-outcomes (:outcome %)) cycles))
807
+ claimed-all (->> cycles
808
+ (mapcat #(or (:claimed-task-ids %) []))
809
+ (remove str/blank?))
810
+ completed-ids (->> cycles
811
+ (filter #(= "merged" (:outcome %)))
812
+ (mapcat #(or (:claimed-task-ids %) []))
813
+ (remove str/blank?)
814
+ set)]
815
+ {:merged merged
816
+ :failed failed
817
+ :claimed (count (set claimed-all))
818
+ :completed (count completed-ids)}))
819
+
820
+ (defn- cmd-view-one
821
+ [swarm-id]
822
+ (if-let [started (runs/read-started swarm-id)]
823
+ (let [stopped (runs/read-stopped swarm-id)
824
+ cycles (or (runs/list-cycles swarm-id) [])
825
+ reviews (or (runs/list-reviews swarm-id) [])
826
+ workers (or (:workers started) [])
827
+ run-state* (run-state started stopped)
828
+ metrics (run-metrics cycles)
829
+ latest-by-worker (latest-cycles-by-worker cycles)
830
+ cycles-by-worker (group-by :worker-id cycles)]
831
+ (println (format "Swarm: %s" swarm-id))
832
+ (println (format "State: %s" run-state*))
833
+ (println (format "Started: %s" (:started-at started)))
834
+ (println (format "PID: %s" (or (:pid started) "N/A")))
835
+ (println (format "Config: %s" (or (:config-file started) "N/A")))
836
+ (when stopped
837
+ (println (format "Stopped: %s" (:stopped-at stopped))))
838
+ (println (format "Cycles: %d" (count cycles)))
839
+ (println (format "PRs: merged=%d failed=%d" (:merged metrics) (:failed metrics)))
840
+ (println (format "Tasks: claimed=%d completed=%d created=n/a"
841
+ (:claimed metrics) (:completed metrics)))
842
+ (println (format "Reviews: %d" (count reviews)))
843
+ (println)
844
+ (println "Workers:")
845
+ (println "ID | Runtime | Runs | Cycles | Last Outcome | Claimed | Model")
846
+ (println "----+-----------+--------+---------+----------------+---------+------------------------------")
847
+ (doseq [w (sort-by :id workers)]
848
+ (let [wid (:id w)
849
+ latest (get latest-by-worker wid)
850
+ worker-cycles (or (get cycles-by-worker wid) [])
851
+ run-max (or (:runs w) (:iterations w) 0)
852
+ runs-done (count (filter #(terminal-run-outcomes (:outcome %)) worker-cycles))
853
+ cycles-done (or (:cycle latest) 0)
854
+ runtime (worker-runtime w latest worker-cycles run-state*)
855
+ outcome (or (:outcome latest) "-")
856
+ claimed (count (or (:claimed-task-ids latest) []))]
857
+ (println (format "%-3s | %-9s | %4d/%-3d | %7d | %-14s | %-7d | %s"
858
+ wid runtime runs-done run-max cycles-done outcome claimed (model-label w))))))
859
+ (do
860
+ (println (format "Swarm not found: %s" swarm-id))
861
+ (System/exit 1))))
862
+
863
+ (defn cmd-list
864
+ "List recent swarms with liveness + activity metrics.
865
+ Default: 20 most recent. Use --all for full history."
866
+ [opts args]
867
+ (let [run-ids (or (runs/list-runs) [])]
868
+ (if-not (seq run-ids)
869
+ (println "No swarm runs found.")
870
+ (let [shown (if (:all opts) run-ids (take 20 run-ids))]
871
+ (println "Swarm Runs:")
872
+ (println "ID | State | PID | Workers | Active | Cycles | Merged | Failed | Done | Started")
873
+ (println "---------+------------------+--------+---------+--------+--------+--------+--------+------+-------------------------")
874
+ (doseq [rid shown]
875
+ (let [started (runs/read-started rid)
876
+ stopped (runs/read-stopped rid)
877
+ cycles (or (runs/list-cycles rid) [])
878
+ workers (or (:workers started) [])
879
+ metrics (run-metrics cycles)
880
+ latest-by-worker (latest-cycles-by-worker cycles)
881
+ cycles-by-worker (group-by :worker-id cycles)
882
+ state* (run-state started stopped)
883
+ active-count (if (= state* "running")
884
+ (count (filter (fn [w]
885
+ (let [wid (:id w)
886
+ run-max (or (:runs w) (:iterations w) 0)
887
+ runs-done (count (filter #(terminal-run-outcomes (:outcome %))
888
+ (or (get cycles-by-worker wid) [])))]
889
+ (< runs-done run-max)))
890
+ workers))
891
+ 0)]
892
+ (println (format "%-8s | %-16s | %-6s | %7d | %6d | %6d | %6d | %6d | %4d | %s"
893
+ rid
894
+ state*
895
+ (or (:pid started) "-")
896
+ (count workers)
897
+ active-count
898
+ (count cycles)
899
+ (:merged metrics)
900
+ (:failed metrics)
901
+ (:completed metrics)
902
+ (or (:started-at started) "-")))))
903
+ (when (and (not (:all opts)) (> (count run-ids) 20))
904
+ (println (format "\nShowing 20 of %d runs. Use --all for full history." (count run-ids))))
905
+ (println)
906
+ (println "Use `oompa view <swarm-id>` for detailed single-swarm info.")))))
907
+
908
+ (defn cmd-view
909
+ "Show detailed runtime for one swarm (default: latest run)."
910
+ [opts args]
911
+ (if-let [swarm-id (or (first args) (first (runs/list-runs)))]
912
+ (cmd-view-one swarm-id)
913
+ (println "No swarm runs found.")))
914
+
380
915
  (defn cmd-worktrees
381
916
  "List worktree status"
382
917
  [opts args]
@@ -395,15 +930,18 @@
395
930
  (println "No worktrees initialized."))))
396
931
 
397
932
  (defn cmd-cleanup
398
- "Remove all worktrees"
933
+ "Remove all worktrees (legacy pool + swarm iteration worktrees)."
399
934
  [opts args]
400
935
  (let [state-file (io/file ".workers/state.edn")]
936
+ (println "Removing worktrees...")
401
937
  (if (.exists state-file)
402
938
  (let [pool (read-string (slurp state-file))]
403
- (println "Removing worktrees...")
404
- (worktree/cleanup-pool! pool)
405
- (println "Done."))
406
- (println "No worktrees to clean up."))))
939
+ (worktree/cleanup-pool! pool))
940
+ (println "No legacy pool worktrees to clean up."))
941
+ (let [{:keys [dirs-removed branches-removed]} (cleanup-iteration-worktrees!)]
942
+ (println (format "Removed %d swarm worktree dir(s) and %d oompa branch(es)."
943
+ dirs-removed branches-removed)))
944
+ (println "Done.")))
407
945
 
408
946
  (defn cmd-context
409
947
  "Print current context (for debugging prompts)"
@@ -429,32 +967,52 @@
429
967
 
430
968
  Supported formats:
431
969
  - harness:model
432
- - harness:model:reasoning (codex only)
970
+ - harness:model:reasoning (if reasoning is in reasoning-variants)
433
971
  - model (defaults harness to :codex)
434
972
 
435
- Note: non-codex model identifiers may contain ':' (for example
436
- openrouter/...:free). Those suffixes are preserved in :model."
973
+ Note: model identifiers may contain ':' (for example openrouter/...:free).
974
+ Those suffixes are preserved in :model if not a known reasoning variant."
437
975
  [s]
438
976
  (if (and s (str/includes? s ":"))
439
977
  (let [[harness-str rest*] (str/split s #":" 2)
440
978
  harness (keyword harness-str)]
441
- (if (contains? harnesses harness)
442
- (if (= harness :codex)
443
- ;; Codex may include a reasoning suffix at the end. Only treat the
444
- ;; last segment as reasoning if it matches a known variant.
445
- (if-let [idx (str/last-index-of rest* ":")]
446
- (let [model* (subs rest* 0 idx)
447
- reasoning* (subs rest* (inc idx))]
448
- (if (contains? reasoning-variants reasoning*)
449
- {:harness harness :model model* :reasoning reasoning*}
450
- {:harness harness :model rest*}))
451
- {:harness harness :model rest*})
452
- ;; Non-codex: preserve full model string (including any ':suffix').
979
+ (if (harness/valid-harness? harness)
980
+ ;; Check for reasoning suffix for any valid harness
981
+ (if-let [idx (str/last-index-of rest* ":")]
982
+ (let [model* (subs rest* 0 idx)
983
+ reasoning* (subs rest* (inc idx))]
984
+ (if (contains? reasoning-variants reasoning*)
985
+ {:harness harness :model model* :reasoning reasoning*}
986
+ {:harness harness :model rest*}))
453
987
  {:harness harness :model rest*})
454
988
  ;; Not a known harness prefix, treat as raw model on default harness.
455
989
  {:harness :codex :model s}))
456
990
  {:harness :codex :model s}))
457
991
 
992
+ (defn- parse-reviewer-entry
993
+ "Parse reviewer config entry from either:
994
+ 1) string model spec: \"harness:model[:reasoning]\"
995
+ 2) map: {:model \"...\" :prompt \"path\"|[...]}.
996
+ Returns nil for invalid entries."
997
+ [entry]
998
+ (cond
999
+ (string? entry)
1000
+ (parse-model-string entry)
1001
+
1002
+ (map? entry)
1003
+ (let [model (:model entry)]
1004
+ (when (string? model)
1005
+ (let [parsed (parse-model-string model)
1006
+ prompts (let [p (:prompt entry)]
1007
+ (cond
1008
+ (vector? p) p
1009
+ (string? p) [p]
1010
+ :else []))]
1011
+ (assoc parsed :prompts prompts))))
1012
+
1013
+ :else
1014
+ nil))
1015
+
458
1016
  (defn cmd-swarm
459
1017
  "Run multiple worker configs from oompa.json in parallel"
460
1018
  [opts args]
@@ -462,42 +1020,37 @@
462
1020
  f (io/file config-file)
463
1021
  swarm-id (make-swarm-id)]
464
1022
  (when-not (.exists f)
465
- (println (format "Config file not found: %s" config-file))
466
- (println)
467
- (println "Create oompa.json with format:")
468
- (println "{")
469
- (println " \"workers\": [")
470
- (println " {\"model\": \"codex:gpt-5.3-codex:medium\", \"prompt\": \"prompts/executor.md\", \"iterations\": 10, \"count\": 3, \"can_plan\": false},")
471
- (println " {\"model\": \"claude:opus\", \"prompt\": [\"prompts/base.md\", \"prompts/planner.md\"], \"count\": 1},")
472
- (println " {\"model\": \"gemini:gemini-3-pro-preview\", \"prompt\": [\"prompts/executor.md\"], \"count\": 1}")
473
- (println " ]")
474
- (println "}")
1023
+ (println (format "ERROR: Config file not found: %s" (.getCanonicalPath f)))
1024
+ (println (format " Working directory: %s" (.getCanonicalPath (io/file "."))))
475
1025
  (println)
476
- (println "prompt: string or array of paths concatenated into one prompt.")
1026
+ (println "Tip: paths are relative to the working directory. Did you mean:")
1027
+ (println (format " oompa run --config oompa/%s" (.getName f)))
477
1028
  (System/exit 1))
478
1029
  ;; Preflight: abort if git is dirty to prevent merge conflicts
479
1030
  (check-git-clean!)
480
- ;; Preflight: abort if stale worktrees from prior runs would poison git
481
- (check-stale-worktrees!)
482
1031
 
483
1032
  (let [config (json/parse-string (slurp f) true)
484
- ;; Parse reviewer config — supports both formats:
485
- ;; Legacy: {"review_model": "harness:model:reasoning"}
486
- ;; New: {"reviewer": {"model": "harness:model:reasoning", "prompt": ["path.md"]}}
487
- reviewer-config (:reviewer config)
488
- review-parsed (cond
489
- reviewer-config
490
- (let [parsed (parse-model-string (:model reviewer-config))
491
- prompts (let [p (:prompt reviewer-config)]
492
- (cond (vector? p) p
493
- (string? p) [p]
494
- :else []))]
495
- (assoc parsed :prompts prompts))
496
-
497
- (:review_model config)
498
- (parse-model-string (:review_model config))
499
-
500
- :else nil)
1033
+ ;; Parse reviewer config — supports legacy + new formats.
1034
+ generic-reviewers (cond
1035
+ (:review_models config)
1036
+ (->> (:review_models config)
1037
+ (map parse-reviewer-entry)
1038
+ (remove nil?)
1039
+ vec)
1040
+
1041
+ (:review_model config)
1042
+ (->> [(:review_model config)]
1043
+ (map parse-reviewer-entry)
1044
+ (remove nil?)
1045
+ vec)
1046
+
1047
+ (:reviewer config)
1048
+ (->> [(:reviewer config)]
1049
+ (map parse-reviewer-entry)
1050
+ (remove nil?)
1051
+ vec)
1052
+
1053
+ :else [])
501
1054
 
502
1055
  ;; Parse planner config — optional dedicated planner
503
1056
  ;; Runs in project root, no worktree/review/merge, respects max_pending backpressure
@@ -512,8 +1065,19 @@
512
1065
  :prompts prompts
513
1066
  :max-pending (or (:max_pending planner-config) 10))))
514
1067
 
1068
+
515
1069
  worker-configs (:workers config)
516
1070
 
1071
+ ;; Require max_cycle to be present on all workers
1072
+ _ (doseq [[idx wc] (map-indexed vector worker-configs)]
1073
+ (when (or (:iterations wc) (:max_cycles wc))
1074
+ (println (format "ERROR: Worker %d uses deprecated 'iterations' or 'max_cycles'. Consolidate strictly on 'max_cycle'." idx))
1075
+ (System/exit 1))
1076
+ (when-not (:max_cycle wc)
1077
+ (println (format "ERROR: Worker %d missing 'max_cycle' in config." idx))
1078
+ (System/exit 1)))
1079
+
1080
+
517
1081
  ;; Expand worker configs by count
518
1082
  expanded-workers (mapcat (fn [wc]
519
1083
  (let [cnt (or (:count wc) 1)]
@@ -523,23 +1087,63 @@
523
1087
  ;; Convert to worker format
524
1088
  workers (map-indexed
525
1089
  (fn [idx wc]
526
- (let [{:keys [harness model reasoning]} (parse-model-string (:model wc))]
1090
+ (let [{:keys [harness model reasoning]} (parse-model-string (:model wc))
1091
+ ;; Support per-worker reviewer override (legacy + new):
1092
+ ;; - review_model: "harness:model"
1093
+ ;; - review_models: ["harness:model", ...]
1094
+ ;; - reviewer: {model, prompt}
1095
+ ;; - reviewers: [string|map, ...]
1096
+ worker-reviewers (cond
1097
+ (:reviewers wc)
1098
+ (->> (:reviewers wc)
1099
+ (map parse-reviewer-entry)
1100
+ (remove nil?)
1101
+ vec)
1102
+
1103
+ (:review_models wc)
1104
+ (->> (:review_models wc)
1105
+ (map parse-reviewer-entry)
1106
+ (remove nil?)
1107
+ vec)
1108
+
1109
+ (:reviewer wc)
1110
+ (->> [(:reviewer wc)]
1111
+ (map parse-reviewer-entry)
1112
+ (remove nil?)
1113
+ vec)
1114
+
1115
+ (:review_model wc)
1116
+ (->> [(:review_model wc)]
1117
+ (map parse-reviewer-entry)
1118
+ (remove nil?)
1119
+ vec)
1120
+
1121
+ :else [])
1122
+ all-reviewers (->> (concat worker-reviewers generic-reviewers)
1123
+ (map #(select-keys % [:harness :model :reasoning :prompts]))
1124
+ (distinct)
1125
+ (vec))]
527
1126
  (worker/create-worker
528
1127
  {:id (str "w" idx)
529
1128
  :swarm-id swarm-id
530
1129
  :harness harness
531
1130
  :model model
532
1131
  :reasoning reasoning
533
- :iterations (or (:iterations wc) 10)
1132
+ :runs (or (:runs wc) 10)
1133
+ :max-cycles (:max_cycle wc)
1134
+ :iterations (:max_cycle wc)
534
1135
  :prompts (:prompt wc)
535
1136
  :can-plan (:can_plan wc)
536
1137
  :wait-between (:wait_between wc)
1138
+ :max-wait-for-tasks (:max_wait_for_tasks wc)
537
1139
  :max-working-resumes (:max_working_resumes wc)
538
- :review-harness (:harness review-parsed)
539
- :review-model (:model review-parsed)
540
- :review-prompts (:prompts review-parsed)})))
1140
+ :reviewers all-reviewers})))
541
1141
  expanded-workers)]
542
1142
 
1143
+ ;; Preflight: handle stale worktrees from prior runs before launching workers.
1144
+ ;; Empty ones are auto-cleaned silently; dirty ones trigger an interactive review.
1145
+ (handle-stale-worktrees! generic-reviewers)
1146
+
543
1147
  (println (format "Swarm config from %s:" config-file))
544
1148
  (println (format " Swarm ID: %s" swarm-id))
545
1149
  (when planner-parsed
@@ -550,22 +1154,19 @@
550
1154
  (if (seq (:prompts planner-parsed))
551
1155
  (str ", prompts: " (str/join ", " (:prompts planner-parsed)))
552
1156
  ""))))
553
- (when review-parsed
554
- (println (format " Reviewer: %s:%s%s"
555
- (name (:harness review-parsed))
556
- (:model review-parsed)
557
- (if (seq (:prompts review-parsed))
558
- (str " (prompts: " (str/join ", " (:prompts review-parsed)) ")")
559
- ""))))
1157
+ (when (seq generic-reviewers)
1158
+ (println (format " Generic Reviewers: %s"
1159
+ (str/join ", " (map #(str (name (:harness %)) ":" (:model %)) generic-reviewers)))))
560
1160
  (println (format " Workers: %d total" (count workers)))
561
1161
  (doseq [[idx wc] (map-indexed vector worker-configs)]
562
1162
  (let [{:keys [harness model reasoning]} (parse-model-string (:model wc))]
563
- (println (format " - %dx %s:%s%s (%d iters%s)"
1163
+ (println (format " - %dx %s:%s%s (%d runs, %d cycle cap%s)"
564
1164
  (or (:count wc) 1)
565
1165
  (name harness)
566
1166
  model
567
1167
  (if reasoning (str ":" reasoning) "")
568
- (or (:iterations wc) 10)
1168
+ (or (:runs wc) 10)
1169
+ (:max_cycle wc)
569
1170
  (if (:prompt wc) (str ", " (:prompt wc)) "")))))
570
1171
  (println)
571
1172
 
@@ -573,13 +1174,13 @@
573
1174
  ;; Include planner model in validation if configured
574
1175
  (validate-models! (cond-> worker-configs
575
1176
  planner-config (conj planner-config))
576
- review-parsed)
1177
+ generic-reviewers)
577
1178
 
578
1179
  ;; Write started event to runs/{swarm-id}/started.json
579
1180
  (runs/write-started! swarm-id
580
1181
  {:workers workers
581
1182
  :planner-config planner-parsed
582
- :reviewer-config review-parsed
1183
+ :reviewer-configs generic-reviewers
583
1184
  :config-file config-file})
584
1185
  (println (format "\nStarted event written to runs/%s/started.json" swarm-id))
585
1186
 
@@ -614,6 +1215,26 @@
614
1215
  (doseq [t (tasks/list-current)]
615
1216
  (println (format " - %s: %s" (:id t) (:summary t)))))))
616
1217
 
1218
+ (defn cmd-requeue
1219
+ "Move current/ tasks back to pending/.
1220
+ With args, only requeue those task IDs. Without args, requeue all current tasks."
1221
+ [opts args]
1222
+ (tasks/ensure-dirs!)
1223
+ (let [current-ids (->> (tasks/list-current) (map :id) set)
1224
+ requested-ids (if (seq args) (set args) current-ids)
1225
+ recyclable-ids (set (filter current-ids requested-ids))
1226
+ recycled (if (seq args)
1227
+ (tasks/recycle-tasks! recyclable-ids)
1228
+ (tasks/recycle-all-current!))
1229
+ missing (sort (remove recyclable-ids requested-ids))]
1230
+ (if (seq recycled)
1231
+ (println (format "Requeued %d task(s): %s"
1232
+ (count recycled)
1233
+ (str/join ", " recycled)))
1234
+ (println "No current tasks were requeued."))
1235
+ (when (seq missing)
1236
+ (println (format "Not in current/: %s" (str/join ", " missing))))))
1237
+
617
1238
  (defn- find-latest-swarm-id
618
1239
  "Find the most recent swarm ID from runs/ directory."
619
1240
  []
@@ -685,12 +1306,16 @@
685
1306
  (println "Usage: ./swarm.bb <command> [options]")
686
1307
  (println)
687
1308
  (println "Commands:")
688
- (println " run Run all tasks once")
1309
+ (println " run [file] Run swarm from config (default: oompa.json, oompa/oompa.json)")
689
1310
  (println " loop N Run N iterations")
690
1311
  (println " swarm [file] Run multiple worker configs from oompa.json (parallel)")
691
1312
  (println " tasks Show task status (pending/current/complete)")
1313
+ (println " requeue [ids..] Move current tasks back to pending")
692
1314
  (println " prompt \"...\" Run ad-hoc prompt")
693
- (println " status Show last run summary")
1315
+ (println " status Show running swarms")
1316
+ (println " info Show detailed summary of the last run")
1317
+ (println " list List recent swarms (default: 20, --all for full history)")
1318
+ (println " view [swarm-id] Show detailed single-swarm runtime (default: latest)")
694
1319
  (println " worktrees List worktree status")
695
1320
  (println " stop [swarm-id] Stop swarm gracefully (finish current cycle)")
696
1321
  (println " kill [swarm-id] Kill swarm immediately (SIGKILL)")
@@ -698,10 +1323,15 @@
698
1323
  (println " context Print context block")
699
1324
  (println " check Check agent backends")
700
1325
  (println " help Show this help")
1326
+ (println " docs Dump all core architecture and swarm design docs")
701
1327
  (println)
702
1328
  (println "Options:")
703
1329
  (println " --workers N Number of parallel workers (default: 2)")
704
1330
  (println " --workers H:N [H:N ...] Mixed workers by harness (e.g., claude:5 opencode:2)")
1331
+ (println " --all Show full history for list command")
1332
+ (println " --config PATH Config file for run/swarm")
1333
+ (println " --detach Run in background (run command)")
1334
+ (println " --startup-timeout N Detached startup validation window in seconds")
705
1335
  (println " --iterations N Number of iterations per worker (default: 1)")
706
1336
  (println (str " --harness {" (str/join "," (map name (sort harnesses))) "} Agent harness to use (default: codex)"))
707
1337
  (println " --model MODEL Model to use (e.g., codex:gpt-5.3-codex:medium, claude:opus, gemini:gemini-3-pro-preview)")
@@ -709,10 +1339,34 @@
709
1339
  (println " --keep-worktrees Don't cleanup worktrees after run")
710
1340
  (println)
711
1341
  (println "Examples:")
1342
+ (println " ./swarm.bb list")
1343
+ (println " ./swarm.bb list --all")
1344
+ (println " ./swarm.bb view 6cd50f5a")
1345
+ (println " ./swarm.bb run --detach --config oompa/oompa_overnight_self_healing.json")
712
1346
  (println " ./swarm.bb loop 10 --harness codex --model gpt-5.3-codex --workers 3")
713
1347
  (println " ./swarm.bb loop --workers claude:5 opencode:2 --iterations 20")
714
1348
  (println " ./swarm.bb swarm oompa.json # Run multi-model config"))
715
1349
 
1350
+ (defn cmd-docs
1351
+ "Dump core architecture and design documents"
1352
+ [opts args]
1353
+ (let [docs-dir "docs"
1354
+ core-docs ["SWARM_PHILOSOPHY.md" "SWARM_GUIDE.md" "EDN_TICKETS.md" "SYSTEMS_DESIGN.md" "OOMPA.md"]
1355
+ package-dir (or (System/getenv "OOMPA_PACKAGE_ROOT") ".")
1356
+ doc-paths (map #(str package-dir "/" docs-dir "/" %) core-docs)]
1357
+ (println "# Oompa Loompas Core Documentation")
1358
+ (println)
1359
+ (doseq [path doc-paths]
1360
+ (try
1361
+ (let [content (slurp path)]
1362
+ (println (str "## " path))
1363
+ (println "```markdown")
1364
+ (println content)
1365
+ (println "```")
1366
+ (println))
1367
+ (catch Exception e
1368
+ (println (str "Could not read " path ": " (.getMessage e))))))))
1369
+
716
1370
  ;; =============================================================================
717
1371
  ;; Main Entry Point
718
1372
  ;; =============================================================================
@@ -722,26 +1376,31 @@
722
1376
  "loop" cmd-loop
723
1377
  "swarm" cmd-swarm
724
1378
  "tasks" cmd-tasks
1379
+ "requeue" cmd-requeue
725
1380
  "prompt" cmd-prompt
726
1381
  "status" cmd-status
1382
+ "info" cmd-info
1383
+ "list" cmd-list
1384
+ "view" cmd-view
727
1385
  "stop" cmd-stop
728
1386
  "kill" cmd-kill
729
1387
  "worktrees" cmd-worktrees
730
1388
  "cleanup" cmd-cleanup
731
1389
  "context" cmd-context
732
1390
  "check" cmd-check
733
- "help" cmd-help})
1391
+ "help" cmd-help
1392
+ "docs" cmd-docs})
734
1393
 
735
1394
  (defn -main [& args]
736
1395
  (let [[cmd & rest-args] args]
737
1396
  (if-let [handler (get commands cmd)]
738
- (let [{:keys [opts args]} (parse-args rest-args)]
739
- (try
740
- (handler opts args)
741
- (catch Exception e
742
- (binding [*out* *err*]
743
- (println (format "Error: %s" (.getMessage e))))
744
- (System/exit 1))))
1397
+ (try
1398
+ (let [{:keys [opts args]} (parse-args rest-args)]
1399
+ (handler opts args))
1400
+ (catch Exception e
1401
+ (binding [*out* *err*]
1402
+ (println (format "Error: %s" (.getMessage e))))
1403
+ (System/exit 1)))
745
1404
  (do
746
1405
  (cmd-help {} [])
747
1406
  (when cmd