@nbardy/oompa 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,8 +2,8 @@
2
2
  "Command-line interface for AgentNet orchestrator.
3
3
 
4
4
  Usage:
5
- ./swarm.bb run # Run all tasks once
6
- ./swarm.bb run --workers 4 # With 4 parallel workers
5
+ ./swarm.bb run # Run swarm from config (oompa.json)
6
+ ./swarm.bb run --detach --config oompa.json # Run in background with startup validation
7
7
  ./swarm.bb loop 20 --harness claude # 20 iterations with Claude
8
8
  ./swarm.bb loop --workers claude:5 opencode:2 --iterations 20 # Mixed harnesses
9
9
  ./swarm.bb swarm oompa.json # Multi-model from config
@@ -46,7 +46,7 @@
46
46
  (let [[harness count-str] (str/split s #":" 2)
47
47
  h (keyword harness)
48
48
  cnt (parse-int count-str 0)]
49
- (when-not (harnesses h)
49
+ (when-not (harness/valid-harness? h)
50
50
  (throw (ex-info (str "Unknown harness in worker spec: " s ". Known: " (str/join ", " (map name (sort harnesses)))) {})))
51
51
  (when (zero? cnt)
52
52
  (throw (ex-info (str "Invalid count in worker spec: " s ". Use format 'harness:count'") {})))
@@ -75,6 +75,10 @@
75
75
  :harness :codex
76
76
  :model nil
77
77
  :dry-run false
78
+ :detach false
79
+ :all false
80
+ :config-file nil
81
+ :startup-timeout nil
78
82
  :iterations 1
79
83
  :worker-specs nil}
80
84
  remaining args]
@@ -97,7 +101,7 @@
97
101
 
98
102
  (= arg "--harness")
99
103
  (let [h (keyword (second remaining))]
100
- (when-not (harnesses h)
104
+ (when-not (harness/valid-harness? h)
101
105
  (throw (ex-info (str "Unknown harness: " (second remaining) ". Known: " (str/join ", " (map name (sort harnesses)))) {})))
102
106
  (recur (assoc opts :harness h)
103
107
  (nnext remaining)))
@@ -106,6 +110,28 @@
106
110
  (recur (assoc opts :model (second remaining))
107
111
  (nnext remaining))
108
112
 
113
+ (= arg "--config")
114
+ (let [config-file (second remaining)]
115
+ (when (str/blank? config-file)
116
+ (throw (ex-info "--config requires a path" {:arg arg})))
117
+ (recur (assoc opts :config-file config-file)
118
+ (nnext remaining)))
119
+
120
+ (or (= arg "--detach") (= arg "--dettach"))
121
+ (recur (assoc opts :detach true)
122
+ (next remaining))
123
+
124
+ (= arg "--all")
125
+ (recur (assoc opts :all true)
126
+ (next remaining))
127
+
128
+ (= arg "--startup-timeout")
129
+ (let [seconds (parse-int (second remaining) nil)]
130
+ (when-not (and (number? seconds) (pos? seconds))
131
+ (throw (ex-info "--startup-timeout requires a positive integer (seconds)" {:arg arg})))
132
+ (recur (assoc opts :startup-timeout seconds)
133
+ (nnext remaining)))
134
+
109
135
  ;; Legacy flags (still supported)
110
136
  (= arg "--claude")
111
137
  (recur (assoc opts :harness :claude)
@@ -137,22 +163,17 @@
137
163
  ;; Commands
138
164
  ;; =============================================================================
139
165
 
140
- (declare cmd-swarm parse-model-string)
166
+ (declare cmd-swarm parse-model-string pid-alive?)
141
167
 
142
168
  (defn- check-git-clean!
143
- "Abort if git working tree is dirty. Dirty index causes merge conflicts
144
- and wasted worker iterations."
169
+ "Warn if git working tree is dirty. Dirty index may cause merge conflicts."
145
170
  []
146
171
  (let [result (process/sh ["git" "status" "--porcelain"]
147
172
  {:out :string :err :string})
148
173
  output (str/trim (:out result))]
149
174
  (when (and (zero? (:exit result)) (not (str/blank? output)))
150
- (println "ERROR: Git working tree is dirty. Resolve before running swarm.")
151
- (println)
152
- (println output)
153
- (println)
154
- (println "Run 'git stash' or 'git commit' first.")
155
- (System/exit 1))))
175
+ (println "WARNING: Git working tree is dirty. You may experience merge conflicts.")
176
+ (println output))))
156
177
 
157
178
  (defn- check-stale-worktrees!
158
179
  "Abort if stale oompa worktrees or branches exist from a prior run.
@@ -196,12 +217,15 @@
196
217
 
197
218
  (defn- probe-model
198
219
  "Send 'say ok' to a model via its harness CLI. Returns true if model responds.
199
- Uses harness/build-probe-cmd for the command, /dev/null stdin to prevent hang."
220
+ Uses harness/build-probe-cmd for the command.
221
+ For stdin-based harnesses (e.g. claude), delivers the probe prompt via stdin.
222
+ For close-stdin harnesses, uses /dev/null to prevent hang."
200
223
  [harness-kw model]
201
224
  (try
202
225
  (let [cmd (harness/build-probe-cmd harness-kw model)
203
- null-in (io/input-stream (io/file "/dev/null"))
204
- proc (process/process cmd {:out :string :err :string :in null-in})
226
+ probe-prompt "[_HIDE_TEST_] say ok"
227
+ stdin-val (harness/process-stdin harness-kw probe-prompt)
228
+ proc (process/process cmd {:out :string :err :string :in stdin-val})
205
229
  result (deref proc 30000 :timeout)]
206
230
  (if (= result :timeout)
207
231
  (do (.destroyForcibly (:proc proc)) false)
@@ -210,14 +234,14 @@
210
234
 
211
235
  (defn- validate-models!
212
236
  "Probe each unique harness:model pair. Prints results and exits if any fail."
213
- [worker-configs review-model]
237
+ [worker-configs review-models]
214
238
  (let [;; Deduplicate by harness:model only (ignore reasoning level)
215
- models (cond-> (->> worker-configs
239
+ models (into (->> worker-configs
216
240
  (map (fn [wc]
217
241
  (let [{:keys [harness model]} (parse-model-string (:model wc))]
218
242
  {:harness harness :model model})))
219
243
  set)
220
- review-model (conj (select-keys review-model [:harness :model])))
244
+ (map #(select-keys % [:harness :model]) review-models))
221
245
  _ (println "Validating models...")
222
246
  results (pmap (fn [{:keys [harness model]}]
223
247
  (let [ok (probe-model harness model)]
@@ -237,36 +261,257 @@
237
261
  (System/exit 1))
238
262
  (println)))
239
263
 
240
- (defn cmd-run
241
- "Run orchestrator — uses oompa.json if present, otherwise simple mode"
264
+ (def ^:private default-detach-startup-timeout 20)
265
+
266
+ (defn- run-id []
267
+ (subs (str (java.util.UUID/randomUUID)) 0 8))
268
+
269
+ (defn- run-ts []
270
+ (.format (java.time.format.DateTimeFormatter/ofPattern "yyyyMMdd-HHmmss")
271
+ (java.time.LocalDateTime/now)))
272
+
273
+ (defn- default-config-file
274
+ []
275
+ (cond
276
+ (.exists (io/file "oompa.json")) "oompa.json"
277
+ (.exists (io/file "oompa/oompa.json")) "oompa/oompa.json"
278
+ :else nil))
279
+
280
+ (defn- resolve-config-file
242
281
  [opts args]
243
- (if (.exists (io/file "oompa.json"))
244
- (cmd-swarm opts (or (seq args) ["oompa.json"]))
245
- (let [swarm-id (make-swarm-id)]
246
- (if-let [specs (:worker-specs opts)]
247
- ;; Mixed worker specs: --workers claude:5 opencode:2
248
- (let [workers (mapcat
249
- (fn [spec]
250
- (let [{:keys [harness count]} spec]
251
- (map-indexed
252
- (fn [idx _]
253
- (worker/create-worker
254
- {:id (format "%s-%d" (name harness) idx)
255
- :swarm-id swarm-id
256
- :harness harness
257
- :model (:model opts)
258
- :iterations 1}))
259
- (range count))))
260
- specs)]
261
- (println (format "Running once with mixed workers (swarm %s):" swarm-id))
262
- (doseq [spec specs]
263
- (println (format " %dx %s" (:count spec) (name (:harness spec)))))
282
+ (let [candidate (or (:config-file opts)
283
+ (first args)
284
+ (default-config-file))]
285
+ (when candidate
286
+ (.getCanonicalPath (io/file candidate)))))
287
+
288
+ (defn- prepare-log-file!
289
+ "Create oompa/logs and return absolute log path."
290
+ [rid]
291
+ (let [dir (if (.exists (io/file "oompa"))
292
+ (io/file "oompa" "logs")
293
+ (io/file "runs" "logs"))]
294
+ (.mkdirs dir)
295
+ (.getCanonicalPath (io/file dir (str (run-ts) "_" rid ".log")))))
296
+
297
+ (defn- read-file-safe
298
+ [path]
299
+ (try
300
+ (if (.exists (io/file path))
301
+ (slurp path)
302
+ "")
303
+ (catch Exception _
304
+ "")))
305
+
306
+ (defn- tail-lines
307
+ [text n]
308
+ (->> (str/split-lines (or text ""))
309
+ (take-last n)
310
+ (str/join "\n")))
311
+
312
+ (defn- extract-swarm-id
313
+ [text]
314
+ (some->> text
315
+ (re-find #"Swarm ID:\s*([0-9a-f]{8})")
316
+ second))
317
+
318
+ (defn- startup-diagnostic-lines
319
+ [text]
320
+ (->> (str/split-lines (or text ""))
321
+ (filter #(re-find #"ERROR:|FAIL|WARNING:" %))
322
+ (take-last 20)))
323
+
324
+ (defn- print-preflight-warnings!
325
+ []
326
+ (let [agent-cli? (zero? (:exit (process/sh ["which" "agent-cli"]
327
+ {:out :string :err :string})))]
328
+ (when-not agent-cli?
329
+ (println "WARNING: 'agent-cli' is not on PATH.")
330
+ (println " Model validation may report false model-access failures.")))
331
+ (let [dirty (process/sh ["git" "status" "--porcelain"]
332
+ {:out :string :err :string})
333
+ lines (->> (:out dirty)
334
+ str/split-lines
335
+ (remove str/blank?))]
336
+ (when (seq lines)
337
+ (println (format "WARNING: Git working tree is dirty (%d changed paths)." (count lines)))
338
+ (println " Swarm startup may fail until changes are committed/stashed.")
339
+ (doseq [line (take 20 lines)]
340
+ (println line))
341
+ (when (> (count lines) 20)
342
+ (println (format "... (%d total changed paths)" (count lines)))))))
343
+
344
+ (defn- runtime-classpath-entry
345
+ "Best-effort classpath root for agentnet sources."
346
+ []
347
+ (or
348
+ (some-> (System/getenv "OOMPA_PACKAGE_ROOT")
349
+ (io/file "agentnet" "src")
350
+ .getCanonicalPath)
351
+ (->> (str/split (or (System/getProperty "java.class.path") "")
352
+ (re-pattern (java.io.File/pathSeparator)))
353
+ (map str/trim)
354
+ (remove str/blank?)
355
+ (map io/file)
356
+ (filter #(.exists %))
357
+ (map #(.getCanonicalPath %))
358
+ (some #(when (str/ends-with? % (str "agentnet" java.io.File/separator "src"))
359
+ %)))
360
+ (.getCanonicalPath (io/file "agentnet" "src"))))
361
+
362
+ (defn- run-classpath
363
+ []
364
+ (runtime-classpath-entry))
365
+
366
+ (defn- run-script-path
367
+ []
368
+ (if-let [pkg-root (System/getenv "OOMPA_PACKAGE_ROOT")]
369
+ (.getCanonicalPath (io/file pkg-root "swarm.bb"))
370
+ (let [cp (io/file (runtime-classpath-entry))
371
+ ;; cp = <repo>/agentnet/src -> <repo>/swarm.bb
372
+ repo-root (some-> cp .getParentFile .getParentFile)
373
+ candidate (when repo-root (io/file repo-root "swarm.bb"))]
374
+ (if (and candidate (.exists candidate))
375
+ (.getCanonicalPath candidate)
376
+ (.getCanonicalPath (io/file "swarm.bb"))))))
377
+
378
+ (defn- detached-cmd
379
+ [opts config-file]
380
+ (cond-> ["nohup" "bb" "--classpath" (run-classpath) (run-script-path) "swarm"]
381
+ (:dry-run opts) (conj "--dry-run")
382
+ true (conj config-file)))
383
+
384
+ (defn- spawn-detached!
385
+ [cmd log-file]
386
+ (let [log (io/file log-file)
387
+ pb (doto (ProcessBuilder. ^java.util.List cmd)
388
+ (.directory (io/file "."))
389
+ (.redirectInput (java.lang.ProcessBuilder$Redirect/from (io/file "/dev/null")))
390
+ (.redirectOutput (java.lang.ProcessBuilder$Redirect/appendTo log))
391
+ (.redirectError (java.lang.ProcessBuilder$Redirect/appendTo log)))
392
+ proc (.start pb)
393
+ pid (.pid proc)]
394
+ ;; Give spawn a short window before validation checks liveness.
395
+ (Thread/sleep 100)
396
+ pid))
397
+
398
+ (defn- pid-alive?
399
+ [pid]
400
+ (zero? (:exit (process/sh ["kill" "-0" (str pid)]
401
+ {:out :string :err :string}))))
402
+
403
+ (defn- wait-for-startup!
404
+ [pid log-file timeout-sec]
405
+ (loop [waited 0]
406
+ (let [content (read-file-safe log-file)
407
+ started? (str/includes? content "Started event written to runs/")
408
+ alive? (pid-alive? pid)]
409
+ (cond
410
+ started?
411
+ {:status :started
412
+ :content content
413
+ :swarm-id (extract-swarm-id content)}
414
+
415
+ (not alive?)
416
+ {:status :failed
417
+ :content content}
418
+
419
+ (>= waited timeout-sec)
420
+ {:status :timeout
421
+ :content content}
422
+
423
+ :else
424
+ (do
425
+ (Thread/sleep 1000)
426
+ (recur (inc waited)))))))
427
+
428
+ (defn- cmd-run-detached
429
+ [opts config-file]
430
+ (print-preflight-warnings!)
431
+ (when-not (.exists (io/file config-file))
432
+ (println (format "Config not found: %s" config-file))
433
+ (System/exit 1))
434
+ (let [timeout-sec (or (:startup-timeout opts)
435
+ (parse-int (System/getenv "OOMPA_DETACH_STARTUP_TIMEOUT")
436
+ default-detach-startup-timeout))
437
+ rid (run-id)
438
+ log-file (prepare-log-file! rid)
439
+ cmd (detached-cmd opts config-file)
440
+ pid (spawn-detached! cmd log-file)]
441
+ (println (format "Config: %s" config-file))
442
+ (when (:dry-run opts)
443
+ (println "Merge mode: dry-run"))
444
+ (let [{:keys [status content swarm-id]} (wait-for-startup! pid log-file timeout-sec)]
445
+ (case status
446
+ :failed
447
+ (do
264
448
  (println)
265
- (worker/run-workers! workers))
266
- ;; Simple mode retired — use oompa.json or --workers harness:count
449
+ (println "ERROR: Detached swarm exited during startup validation.")
450
+ (println "Startup log excerpt:")
451
+ (println (tail-lines content 120))
452
+ (System/exit 1))
453
+
454
+ :timeout
267
455
  (do
268
- (println "Simple mode is no longer supported. Use oompa.json or --workers harness:count.")
269
- (System/exit 1))))))
456
+ (println)
457
+ (println (format "WARNING: Detached swarm still initializing after %ss." timeout-sec))
458
+ (println "Recent startup log lines:")
459
+ (println (tail-lines content 40)))
460
+
461
+ nil)
462
+ (let [diag (startup-diagnostic-lines content)]
463
+ (when (seq diag)
464
+ (println)
465
+ (println "Startup diagnostics:")
466
+ (doseq [line diag]
467
+ (println line))))
468
+ (println)
469
+ (println " ┌──────────────────────────────────────────────────────────────┐")
470
+ (println " │ OOMPA SWARM RUN (DETACHED) │")
471
+ (println (format " │ Run id: %-46s│" rid))
472
+ (println (format " │ PID: %-46s│" pid))
473
+ (println (format " │ Log file: %-46s│" log-file))
474
+ (println (format " │ Swarm ID: %-46s│" (or swarm-id "(pending)")))
475
+ (println " └──────────────────────────────────────────────────────────────┘")
476
+ (println))))
477
+
478
+ (defn- cmd-run-legacy
479
+ "Run orchestrator once from worker specs (legacy mode)."
480
+ [opts args]
481
+ (let [swarm-id (make-swarm-id)]
482
+ (if-let [specs (:worker-specs opts)]
483
+ ;; Mixed worker specs: --workers claude:5 opencode:2
484
+ (let [workers (mapcat
485
+ (fn [spec]
486
+ (let [{:keys [harness count]} spec]
487
+ (map-indexed
488
+ (fn [idx _]
489
+ (worker/create-worker
490
+ {:id (format "%s-%d" (name harness) idx)
491
+ :swarm-id swarm-id
492
+ :harness harness
493
+ :model (:model opts)
494
+ :iterations 1}))
495
+ (range count))))
496
+ specs)]
497
+ (println (format "Running once with mixed workers (swarm %s):" swarm-id))
498
+ (doseq [spec specs]
499
+ (println (format " %dx %s" (:count spec) (name (:harness spec)))))
500
+ (println)
501
+ (worker/run-workers! workers))
502
+ ;; Simple mode retired — use oompa.json or --workers harness:count
503
+ (do
504
+ (println "Simple mode is no longer supported. Use oompa.json or --workers harness:count.")
505
+ (System/exit 1)))))
506
+
507
+ (defn cmd-run
508
+ "Run swarm from config. Use --detach for background mode."
509
+ [opts args]
510
+ (if-let [config-file (resolve-config-file opts args)]
511
+ (if (:detach opts)
512
+ (cmd-run-detached opts config-file)
513
+ (cmd-swarm opts [config-file]))
514
+ (cmd-run-legacy opts args)))
270
515
 
271
516
  (defn cmd-loop
272
517
  "Run orchestrator N times"
@@ -318,46 +563,72 @@
318
563
  (orchestrator/run-once! opts))))
319
564
 
320
565
  (defn cmd-status
321
- "Show status of last run — reads event-sourced runs/{swarm-id}/ data."
566
+ "Show running swarms."
322
567
  [opts args]
323
568
  (let [run-ids (runs/list-runs)]
324
569
  (if (seq run-ids)
325
- (let [swarm-id (or (first args) (first run-ids))
326
- started (runs/read-started swarm-id)
327
- stopped (runs/read-stopped swarm-id)
328
- cycles (runs/list-cycles swarm-id)
329
- reviews (runs/list-reviews swarm-id)]
330
- (println (format "Swarm: %s" swarm-id))
331
- (when started
332
- (println (format " Started: %s" (:started-at started)))
333
- (println (format " PID: %s" (or (:pid started) "N/A")))
334
- (println (format " Config: %s" (or (:config-file started) "N/A")))
335
- (println (format " Workers: %d" (count (:workers started)))))
336
- (println)
337
- (if stopped
338
- (println (format "Stopped: %s (reason: %s%s)"
339
- (:stopped-at stopped)
340
- (:reason stopped)
341
- (if (:error stopped)
342
- (str ", error: " (:error stopped))
343
- "")))
344
- (println " (still runningno stopped event yet)"))
345
- (when (seq cycles)
346
- (println)
347
- (println (format "Cycles: %d total" (count cycles)))
348
- (doseq [c cycles]
349
- (println (format " %s-c%d: %s (%dms, claimed: %s)"
350
- (:worker-id c) (:cycle c)
351
- (:outcome c)
352
- (or (:duration-ms c) 0)
353
- (str/join ", " (or (:claimed-task-ids c) []))))))
354
- (when (seq reviews)
355
- (println)
356
- (println (format "Reviews: %d total" (count reviews)))
357
- (doseq [r reviews]
358
- (println (format " %s-c%d-r%d: %s"
359
- (:worker-id r) (:cycle r) (:round r)
360
- (:verdict r))))))
570
+ (let [running (for [id run-ids
571
+ :let [started (runs/read-started id)
572
+ stopped (runs/read-stopped id)
573
+ pid (:pid started)]
574
+ :when (and started (not stopped) pid (pid-alive? pid))]
575
+ {:id id
576
+ :pid pid
577
+ :workers (count (:workers started))
578
+ :work-count (count (runs/list-cycles id))})]
579
+ (if (seq running)
580
+ (do
581
+ (println (format "Running Swarms: %d" (count running)))
582
+ (doseq [r running]
583
+ (println (format " Swarm: %s | PID: %s | Workers: %d | Work Count: %d"
584
+ (:id r) (:pid r) (:workers r) (:work-count r)))))
585
+ (println "No running swarms.")))
586
+ (println "No swarms found."))))
587
+
588
+ (defn cmd-info
589
+ "Show detailed information of a swarm run reads event-sourced runs/{swarm-id}/ data."
590
+ [opts args]
591
+ (let [run-ids (runs/list-runs)]
592
+ (if (seq run-ids)
593
+ (let [target-ids (if (seq args) [(first args)] run-ids)]
594
+ (doseq [swarm-id target-ids]
595
+ (let [started (runs/read-started swarm-id)
596
+ stopped (runs/read-stopped swarm-id)
597
+ cycles (runs/list-cycles swarm-id)
598
+ reviews (runs/list-reviews swarm-id)]
599
+ (println "--------------------------------------------------")
600
+ (println (format "Swarm: %s" swarm-id))
601
+ (when started
602
+ (println (format " Started: %s" (:started-at started)))
603
+ (println (format " PID: %s" (or (:pid started) "N/A")))
604
+ (println (format " Config: %s" (or (:config-file started) "N/A")))
605
+ (println (format " Workers: %d" (count (:workers started)))))
606
+ (println)
607
+ (if stopped
608
+ (println (format "Stopped: %s (reason: %s%s)"
609
+ (:stopped-at stopped)
610
+ (:reason stopped)
611
+ (if (:error stopped)
612
+ (str ", error: " (:error stopped))
613
+ "")))
614
+ (println " (still running — no stopped event yet)"))
615
+ (when (seq cycles)
616
+ (println)
617
+ (println (format "Cycles: %d total" (count cycles)))
618
+ (doseq [c cycles]
619
+ (println (format " %s-c%d: %s (%dms, claimed: %s)"
620
+ (:worker-id c) (:cycle c)
621
+ (:outcome c)
622
+ (or (:duration-ms c) 0)
623
+ (str/join ", " (or (:claimed-task-ids c) []))))))
624
+ (when (seq reviews)
625
+ (println)
626
+ (println (format "Reviews: %d total" (count reviews)))
627
+ (doseq [r reviews]
628
+ (println (format " %s-c%d-r%d: %s"
629
+ (:worker-id r) (:cycle r) (:round r)
630
+ (:verdict r)))))
631
+ (println))))
361
632
  ;; Fall back to legacy JSONL format
362
633
  (let [runs-dir (io/file "runs")
363
634
  files (when (.exists runs-dir)
@@ -377,6 +648,166 @@
377
648
  (println (format "Total: %d tasks" (count entries))))))
378
649
  (println "No runs found."))))))
379
650
 
651
+ (def ^:private error-outcomes
652
+ #{"error" "merge-failed" "rejected" "stuck"})
653
+
654
+ (def ^:private terminal-run-outcomes
655
+ #{"merged" "rejected" "error" "merge-failed" "sync-failed" "stuck" "no-changes"})
656
+
657
+ (defn- run-state
658
+ "Derive run lifecycle state from started/stopped events + PID liveness."
659
+ [started stopped]
660
+ (cond
661
+ (nil? started) "missing-started"
662
+ stopped (str "stopped/" (:reason stopped))
663
+ (pid-alive? (:pid started)) "running"
664
+ :else "stale"))
665
+
666
+ (defn- latest-cycles-by-worker
667
+ "Return map of worker-id -> latest cycle entry."
668
+ [cycles]
669
+ (reduce (fn [acc c]
670
+ (let [wid (:worker-id c)
671
+ prev (get acc wid)]
672
+ (if (or (nil? prev)
673
+ (> (or (:cycle c) 0) (or (:cycle prev) 0)))
674
+ (assoc acc wid c)
675
+ acc)))
676
+ {}
677
+ cycles))
678
+
679
+ (defn- worker-runtime
680
+ "Best-effort worker runtime classification for view output."
681
+ [worker latest-cycle worker-cycles run-state*]
682
+ (let [run-max (or (:runs worker) (:iterations worker) 0)
683
+ runs-done (count (filter #(terminal-run-outcomes (:outcome %)) worker-cycles))
684
+ outcome (or (:outcome latest-cycle) "-")]
685
+ (cond
686
+ (>= runs-done run-max) "completed"
687
+ (str/starts-with? run-state* "stopped/") "stopped"
688
+ (= run-state* "stale") "stale"
689
+ (nil? latest-cycle) "starting"
690
+ (= outcome "working") "working"
691
+ (= outcome "executor-done") "idle"
692
+ :else outcome)))
693
+
694
+ (defn- model-label
695
+ [{:keys [harness model reasoning]}]
696
+ (str harness ":" model (when reasoning (str ":" reasoning))))
697
+
698
+ (defn- run-metrics
699
+ "Summarize cycle metrics for a run."
700
+ [cycles]
701
+ (let [merged (count (filter #(= "merged" (:outcome %)) cycles))
702
+ failed (count (filter #(error-outcomes (:outcome %)) cycles))
703
+ claimed-all (->> cycles
704
+ (mapcat #(or (:claimed-task-ids %) []))
705
+ (remove str/blank?))
706
+ completed-ids (->> cycles
707
+ (filter #(= "merged" (:outcome %)))
708
+ (mapcat #(or (:claimed-task-ids %) []))
709
+ (remove str/blank?)
710
+ set)]
711
+ {:merged merged
712
+ :failed failed
713
+ :claimed (count (set claimed-all))
714
+ :completed (count completed-ids)}))
715
+
716
+ (defn- cmd-view-one
717
+ [swarm-id]
718
+ (if-let [started (runs/read-started swarm-id)]
719
+ (let [stopped (runs/read-stopped swarm-id)
720
+ cycles (or (runs/list-cycles swarm-id) [])
721
+ reviews (or (runs/list-reviews swarm-id) [])
722
+ workers (or (:workers started) [])
723
+ run-state* (run-state started stopped)
724
+ metrics (run-metrics cycles)
725
+ latest-by-worker (latest-cycles-by-worker cycles)
726
+ cycles-by-worker (group-by :worker-id cycles)]
727
+ (println (format "Swarm: %s" swarm-id))
728
+ (println (format "State: %s" run-state*))
729
+ (println (format "Started: %s" (:started-at started)))
730
+ (println (format "PID: %s" (or (:pid started) "N/A")))
731
+ (println (format "Config: %s" (or (:config-file started) "N/A")))
732
+ (when stopped
733
+ (println (format "Stopped: %s" (:stopped-at stopped))))
734
+ (println (format "Cycles: %d" (count cycles)))
735
+ (println (format "PRs: merged=%d failed=%d" (:merged metrics) (:failed metrics)))
736
+ (println (format "Tasks: claimed=%d completed=%d created=n/a"
737
+ (:claimed metrics) (:completed metrics)))
738
+ (println (format "Reviews: %d" (count reviews)))
739
+ (println)
740
+ (println "Workers:")
741
+ (println "ID | Runtime | Runs | Cycles | Last Outcome | Claimed | Model")
742
+ (println "----+-----------+--------+---------+----------------+---------+------------------------------")
743
+ (doseq [w (sort-by :id workers)]
744
+ (let [wid (:id w)
745
+ latest (get latest-by-worker wid)
746
+ worker-cycles (or (get cycles-by-worker wid) [])
747
+ run-max (or (:runs w) (:iterations w) 0)
748
+ runs-done (count (filter #(terminal-run-outcomes (:outcome %)) worker-cycles))
749
+ cycles-done (or (:cycle latest) 0)
750
+ runtime (worker-runtime w latest worker-cycles run-state*)
751
+ outcome (or (:outcome latest) "-")
752
+ claimed (count (or (:claimed-task-ids latest) []))]
753
+ (println (format "%-3s | %-9s | %4d/%-3d | %7d | %-14s | %-7d | %s"
754
+ wid runtime runs-done run-max cycles-done outcome claimed (model-label w))))))
755
+ (do
756
+ (println (format "Swarm not found: %s" swarm-id))
757
+ (System/exit 1))))
758
+
759
+ (defn cmd-list
760
+ "List recent swarms with liveness + activity metrics.
761
+ Default: 20 most recent. Use --all for full history."
762
+ [opts args]
763
+ (let [run-ids (or (runs/list-runs) [])]
764
+ (if-not (seq run-ids)
765
+ (println "No swarm runs found.")
766
+ (let [shown (if (:all opts) run-ids (take 20 run-ids))]
767
+ (println "Swarm Runs:")
768
+ (println "ID | State | PID | Workers | Active | Cycles | Merged | Failed | Done | Started")
769
+ (println "---------+------------------+--------+---------+--------+--------+--------+--------+------+-------------------------")
770
+ (doseq [rid shown]
771
+ (let [started (runs/read-started rid)
772
+ stopped (runs/read-stopped rid)
773
+ cycles (or (runs/list-cycles rid) [])
774
+ workers (or (:workers started) [])
775
+ metrics (run-metrics cycles)
776
+ latest-by-worker (latest-cycles-by-worker cycles)
777
+ cycles-by-worker (group-by :worker-id cycles)
778
+ state* (run-state started stopped)
779
+ active-count (if (= state* "running")
780
+ (count (filter (fn [w]
781
+ (let [wid (:id w)
782
+ run-max (or (:runs w) (:iterations w) 0)
783
+ runs-done (count (filter #(terminal-run-outcomes (:outcome %))
784
+ (or (get cycles-by-worker wid) [])))]
785
+ (< runs-done run-max)))
786
+ workers))
787
+ 0)]
788
+ (println (format "%-8s | %-16s | %-6s | %7d | %6d | %6d | %6d | %6d | %4d | %s"
789
+ rid
790
+ state*
791
+ (or (:pid started) "-")
792
+ (count workers)
793
+ active-count
794
+ (count cycles)
795
+ (:merged metrics)
796
+ (:failed metrics)
797
+ (:completed metrics)
798
+ (or (:started-at started) "-")))))
799
+ (when (and (not (:all opts)) (> (count run-ids) 20))
800
+ (println (format "\nShowing 20 of %d runs. Use --all for full history." (count run-ids))))
801
+ (println)
802
+ (println "Use `oompa view <swarm-id>` for detailed single-swarm info.")))))
803
+
804
+ (defn cmd-view
805
+ "Show detailed runtime for one swarm (default: latest run)."
806
+ [opts args]
807
+ (if-let [swarm-id (or (first args) (first (runs/list-runs)))]
808
+ (cmd-view-one swarm-id)
809
+ (println "No swarm runs found.")))
810
+
380
811
  (defn cmd-worktrees
381
812
  "List worktree status"
382
813
  [opts args]
@@ -429,27 +860,23 @@
429
860
 
430
861
  Supported formats:
431
862
  - harness:model
432
- - harness:model:reasoning (codex only)
863
+ - harness:model:reasoning (if reasoning is in reasoning-variants)
433
864
  - model (defaults harness to :codex)
434
865
 
435
- Note: non-codex model identifiers may contain ':' (for example
436
- openrouter/...:free). Those suffixes are preserved in :model."
866
+ Note: model identifiers may contain ':' (for example openrouter/...:free).
867
+ Those suffixes are preserved in :model if not a known reasoning variant."
437
868
  [s]
438
869
  (if (and s (str/includes? s ":"))
439
870
  (let [[harness-str rest*] (str/split s #":" 2)
440
871
  harness (keyword harness-str)]
441
- (if (contains? harnesses harness)
442
- (if (= harness :codex)
443
- ;; Codex may include a reasoning suffix at the end. Only treat the
444
- ;; last segment as reasoning if it matches a known variant.
445
- (if-let [idx (str/last-index-of rest* ":")]
446
- (let [model* (subs rest* 0 idx)
447
- reasoning* (subs rest* (inc idx))]
448
- (if (contains? reasoning-variants reasoning*)
449
- {:harness harness :model model* :reasoning reasoning*}
450
- {:harness harness :model rest*}))
451
- {:harness harness :model rest*})
452
- ;; Non-codex: preserve full model string (including any ':suffix').
872
+ (if (harness/valid-harness? harness)
873
+ ;; Check for reasoning suffix for any valid harness
874
+ (if-let [idx (str/last-index-of rest* ":")]
875
+ (let [model* (subs rest* 0 idx)
876
+ reasoning* (subs rest* (inc idx))]
877
+ (if (contains? reasoning-variants reasoning*)
878
+ {:harness harness :model model* :reasoning reasoning*}
879
+ {:harness harness :model rest*}))
453
880
  {:harness harness :model rest*})
454
881
  ;; Not a known harness prefix, treat as raw model on default harness.
455
882
  {:harness :codex :model s}))
@@ -484,20 +911,14 @@
484
911
  ;; Parse reviewer config — supports both formats:
485
912
  ;; Legacy: {"review_model": "harness:model:reasoning"}
486
913
  ;; New: {"reviewer": {"model": "harness:model:reasoning", "prompt": ["path.md"]}}
487
- reviewer-config (:reviewer config)
488
- review-parsed (cond
489
- reviewer-config
490
- (let [parsed (parse-model-string (:model reviewer-config))
491
- prompts (let [p (:prompt reviewer-config)]
492
- (cond (vector? p) p
493
- (string? p) [p]
494
- :else []))]
495
- (assoc parsed :prompts prompts))
496
-
497
- (:review_model config)
498
- (parse-model-string (:review_model config))
499
-
500
- :else nil)
914
+ generic-reviewers (cond
915
+ (:review_models config)
916
+ (mapv parse-model-string (:review_models config))
917
+
918
+ (:review_model config)
919
+ [(parse-model-string (:review_model config))]
920
+
921
+ :else [])
501
922
 
502
923
  ;; Parse planner config — optional dedicated planner
503
924
  ;; Runs in project root, no worktree/review/merge, respects max_pending backpressure
@@ -523,21 +944,35 @@
523
944
  ;; Convert to worker format
524
945
  workers (map-indexed
525
946
  (fn [idx wc]
526
- (let [{:keys [harness model reasoning]} (parse-model-string (:model wc))]
947
+ (let [{:keys [harness model reasoning]} (parse-model-string (:model wc))
948
+ ;; Support per-worker reviewer override
949
+ worker-reviewer-config (:reviewer wc)
950
+ specific-reviewer (when worker-reviewer-config
951
+ (let [parsed (parse-model-string (:model worker-reviewer-config))
952
+ prompts (let [p (:prompt worker-reviewer-config)]
953
+ (cond (vector? p) p
954
+ (string? p) [p]
955
+ :else []))]
956
+ (assoc parsed :prompts prompts)))
957
+ all-reviewers (->> (concat (if specific-reviewer [specific-reviewer] []) generic-reviewers)
958
+ (map #(select-keys % [:harness :model :reasoning :prompts]))
959
+ (distinct)
960
+ (vec))]
527
961
  (worker/create-worker
528
962
  {:id (str "w" idx)
529
963
  :swarm-id swarm-id
530
964
  :harness harness
531
965
  :model model
532
966
  :reasoning reasoning
967
+ :runs (or (:runs wc) (:iterations wc) 10)
968
+ :max-cycles (or (:max_cycles wc) (:iterations wc) (:runs wc) 10)
533
969
  :iterations (or (:iterations wc) 10)
534
970
  :prompts (:prompt wc)
535
971
  :can-plan (:can_plan wc)
536
972
  :wait-between (:wait_between wc)
973
+ :max-wait-for-tasks (:max_wait_for_tasks wc)
537
974
  :max-working-resumes (:max_working_resumes wc)
538
- :review-harness (:harness review-parsed)
539
- :review-model (:model review-parsed)
540
- :review-prompts (:prompts review-parsed)})))
975
+ :reviewers all-reviewers})))
541
976
  expanded-workers)]
542
977
 
543
978
  (println (format "Swarm config from %s:" config-file))
@@ -550,22 +985,19 @@
550
985
  (if (seq (:prompts planner-parsed))
551
986
  (str ", prompts: " (str/join ", " (:prompts planner-parsed)))
552
987
  ""))))
553
- (when review-parsed
554
- (println (format " Reviewer: %s:%s%s"
555
- (name (:harness review-parsed))
556
- (:model review-parsed)
557
- (if (seq (:prompts review-parsed))
558
- (str " (prompts: " (str/join ", " (:prompts review-parsed)) ")")
559
- ""))))
988
+ (when (seq generic-reviewers)
989
+ (println (format " Generic Reviewers: %s"
990
+ (str/join ", " (map #(str (name (:harness %)) ":" (:model %)) generic-reviewers)))))
560
991
  (println (format " Workers: %d total" (count workers)))
561
992
  (doseq [[idx wc] (map-indexed vector worker-configs)]
562
993
  (let [{:keys [harness model reasoning]} (parse-model-string (:model wc))]
563
- (println (format " - %dx %s:%s%s (%d iters%s)"
994
+ (println (format " - %dx %s:%s%s (%d runs, %d cycle cap%s)"
564
995
  (or (:count wc) 1)
565
996
  (name harness)
566
997
  model
567
998
  (if reasoning (str ":" reasoning) "")
568
- (or (:iterations wc) 10)
999
+ (or (:runs wc) (:iterations wc) 10)
1000
+ (or (:max_cycles wc) (:iterations wc) (:runs wc) 10)
569
1001
  (if (:prompt wc) (str ", " (:prompt wc)) "")))))
570
1002
  (println)
571
1003
 
@@ -573,13 +1005,13 @@
573
1005
  ;; Include planner model in validation if configured
574
1006
  (validate-models! (cond-> worker-configs
575
1007
  planner-config (conj planner-config))
576
- review-parsed)
1008
+ generic-reviewers)
577
1009
 
578
1010
  ;; Write started event to runs/{swarm-id}/started.json
579
1011
  (runs/write-started! swarm-id
580
1012
  {:workers workers
581
1013
  :planner-config planner-parsed
582
- :reviewer-config review-parsed
1014
+ :reviewer-configs generic-reviewers
583
1015
  :config-file config-file})
584
1016
  (println (format "\nStarted event written to runs/%s/started.json" swarm-id))
585
1017
 
@@ -685,12 +1117,15 @@
685
1117
  (println "Usage: ./swarm.bb <command> [options]")
686
1118
  (println)
687
1119
  (println "Commands:")
688
- (println " run Run all tasks once")
1120
+ (println " run [file] Run swarm from config (default: oompa.json, oompa/oompa.json)")
689
1121
  (println " loop N Run N iterations")
690
1122
  (println " swarm [file] Run multiple worker configs from oompa.json (parallel)")
691
1123
  (println " tasks Show task status (pending/current/complete)")
692
1124
  (println " prompt \"...\" Run ad-hoc prompt")
693
- (println " status Show last run summary")
1125
+ (println " status Show running swarms")
1126
+ (println " info Show detailed summary of the last run")
1127
+ (println " list List recent swarms (default: 20, --all for full history)")
1128
+ (println " view [swarm-id] Show detailed single-swarm runtime (default: latest)")
694
1129
  (println " worktrees List worktree status")
695
1130
  (println " stop [swarm-id] Stop swarm gracefully (finish current cycle)")
696
1131
  (println " kill [swarm-id] Kill swarm immediately (SIGKILL)")
@@ -698,10 +1133,15 @@
698
1133
  (println " context Print context block")
699
1134
  (println " check Check agent backends")
700
1135
  (println " help Show this help")
1136
+ (println " docs Dump all core architecture and swarm design docs")
701
1137
  (println)
702
1138
  (println "Options:")
703
1139
  (println " --workers N Number of parallel workers (default: 2)")
704
1140
  (println " --workers H:N [H:N ...] Mixed workers by harness (e.g., claude:5 opencode:2)")
1141
+ (println " --all Show full history for list command")
1142
+ (println " --config PATH Config file for run/swarm")
1143
+ (println " --detach Run in background (run command)")
1144
+ (println " --startup-timeout N Detached startup validation window in seconds")
705
1145
  (println " --iterations N Number of iterations per worker (default: 1)")
706
1146
  (println (str " --harness {" (str/join "," (map name (sort harnesses))) "} Agent harness to use (default: codex)"))
707
1147
  (println " --model MODEL Model to use (e.g., codex:gpt-5.3-codex:medium, claude:opus, gemini:gemini-3-pro-preview)")
@@ -709,10 +1149,34 @@
709
1149
  (println " --keep-worktrees Don't cleanup worktrees after run")
710
1150
  (println)
711
1151
  (println "Examples:")
1152
+ (println " ./swarm.bb list")
1153
+ (println " ./swarm.bb list --all")
1154
+ (println " ./swarm.bb view 6cd50f5a")
1155
+ (println " ./swarm.bb run --detach --config oompa/oompa_overnight_self_healing.json")
712
1156
  (println " ./swarm.bb loop 10 --harness codex --model gpt-5.3-codex --workers 3")
713
1157
  (println " ./swarm.bb loop --workers claude:5 opencode:2 --iterations 20")
714
1158
  (println " ./swarm.bb swarm oompa.json # Run multi-model config"))
715
1159
 
1160
+ (defn cmd-docs
1161
+ "Dump core architecture and design documents"
1162
+ [opts args]
1163
+ (let [docs-dir "docs"
1164
+ core-docs ["SWARM_PHILOSOPHY.md" "SWARM_GUIDE.md" "EDN_TICKETS.md" "SYSTEMS_DESIGN.md" "OOMPA.md"]
1165
+ package-dir (or (System/getenv "OOMPA_PACKAGE_ROOT") ".")
1166
+ doc-paths (map #(str package-dir "/" docs-dir "/" %) core-docs)]
1167
+ (println "# Oompa Loompas Core Documentation")
1168
+ (println)
1169
+ (doseq [path doc-paths]
1170
+ (try
1171
+ (let [content (slurp path)]
1172
+ (println (str "## " path))
1173
+ (println "```markdown")
1174
+ (println content)
1175
+ (println "```")
1176
+ (println))
1177
+ (catch Exception e
1178
+ (println (str "Could not read " path ": " (.getMessage e))))))))
1179
+
716
1180
  ;; =============================================================================
717
1181
  ;; Main Entry Point
718
1182
  ;; =============================================================================
@@ -724,24 +1188,28 @@
724
1188
  "tasks" cmd-tasks
725
1189
  "prompt" cmd-prompt
726
1190
  "status" cmd-status
1191
+ "info" cmd-info
1192
+ "list" cmd-list
1193
+ "view" cmd-view
727
1194
  "stop" cmd-stop
728
1195
  "kill" cmd-kill
729
1196
  "worktrees" cmd-worktrees
730
1197
  "cleanup" cmd-cleanup
731
1198
  "context" cmd-context
732
1199
  "check" cmd-check
733
- "help" cmd-help})
1200
+ "help" cmd-help
1201
+ "docs" cmd-docs})
734
1202
 
735
1203
  (defn -main [& args]
736
1204
  (let [[cmd & rest-args] args]
737
1205
  (if-let [handler (get commands cmd)]
738
- (let [{:keys [opts args]} (parse-args rest-args)]
739
- (try
740
- (handler opts args)
741
- (catch Exception e
742
- (binding [*out* *err*]
743
- (println (format "Error: %s" (.getMessage e))))
744
- (System/exit 1))))
1206
+ (try
1207
+ (let [{:keys [opts args]} (parse-args rest-args)]
1208
+ (handler opts args))
1209
+ (catch Exception e
1210
+ (binding [*out* *err*]
1211
+ (println (format "Error: %s" (.getMessage e))))
1212
+ (System/exit 1)))
745
1213
  (do
746
1214
  (cmd-help {} [])
747
1215
  (when cmd