@nbardy/oompa 0.3.1 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/agentnet/src/agentnet/cli.clj +49 -4
- package/agentnet/src/agentnet/worker.clj +6 -3
- package/bin/test-models +130 -0
- package/package.json +3 -2
package/README.md
CHANGED
|
@@ -99,7 +99,7 @@ This spawns:
|
|
|
99
99
|
|
|
100
100
|
| Field | Required | Description |
|
|
101
101
|
|-------|----------|-------------|
|
|
102
|
-
| `model` | yes | `harness:model` (e.g. `
|
|
102
|
+
| `model` | yes | `harness:model` or `harness:model:reasoning` (e.g. `codex:o3:low`, `claude:opus-4.6`) |
|
|
103
103
|
| `prompt` | no | String or array of paths — concatenated into one prompt |
|
|
104
104
|
| `iterations` | no | Max iterations per worker (default: 10) |
|
|
105
105
|
| `count` | no | Number of workers with this config (default: 1) |
|
|
@@ -134,6 +134,43 @@
|
|
|
134
134
|
|
|
135
135
|
(declare cmd-swarm)
|
|
136
136
|
|
|
137
|
+
(defn- probe-model
|
|
138
|
+
"Send 'say ok' to a model via its harness CLI. Returns true if model responds."
|
|
139
|
+
[harness model]
|
|
140
|
+
(try
|
|
141
|
+
(let [cmd (case harness
|
|
142
|
+
:claude ["claude" "--model" model "-p" "say ok" "--max-turns" "1"]
|
|
143
|
+
:codex ["codex" "exec" "--model" model "--" "say ok"])
|
|
144
|
+
result (process/sh cmd {:out :string :err :string :timeout 30000})]
|
|
145
|
+
(zero? (:exit result)))
|
|
146
|
+
(catch Exception _ false)))
|
|
147
|
+
|
|
148
|
+
(defn- validate-models!
|
|
149
|
+
"Probe each unique harness:model pair. Prints results and exits if any fail."
|
|
150
|
+
[worker-configs review-model]
|
|
151
|
+
(let [models (cond-> (set (map (fn [wc]
|
|
152
|
+
(parse-model-string (:model wc)))
|
|
153
|
+
worker-configs))
|
|
154
|
+
review-model (conj review-model))
|
|
155
|
+
_ (println "Validating models...")
|
|
156
|
+
results (pmap (fn [{:keys [harness model]}]
|
|
157
|
+
(let [ok (probe-model harness model)]
|
|
158
|
+
(println (format " %s:%s %s"
|
|
159
|
+
(name harness) model
|
|
160
|
+
(if ok "OK" "FAIL")))
|
|
161
|
+
{:harness harness :model model :ok ok}))
|
|
162
|
+
models)
|
|
163
|
+
failures (filter (complement :ok) results)]
|
|
164
|
+
(when (seq failures)
|
|
165
|
+
(println)
|
|
166
|
+
(println "ERROR: The following models are not accessible:")
|
|
167
|
+
(doseq [{:keys [harness model]} failures]
|
|
168
|
+
(println (format " %s:%s" (name harness) model)))
|
|
169
|
+
(println)
|
|
170
|
+
(println "Fix model names in oompa.json and retry.")
|
|
171
|
+
(System/exit 1))
|
|
172
|
+
(println)))
|
|
173
|
+
|
|
137
174
|
(defn cmd-run
|
|
138
175
|
"Run orchestrator — uses oompa.json if present, otherwise simple mode"
|
|
139
176
|
[opts args]
|
|
@@ -284,11 +321,15 @@
|
|
|
284
321
|
(if available? "✓ available" "✗ not found"))))))
|
|
285
322
|
|
|
286
323
|
(defn- parse-model-string
|
|
287
|
-
"Parse
|
|
324
|
+
"Parse model string into {:harness :model :reasoning}.
|
|
325
|
+
Formats: 'harness:model', 'harness:model:reasoning', or just 'model'."
|
|
288
326
|
[s]
|
|
289
327
|
(if (and s (str/includes? s ":"))
|
|
290
|
-
(let [
|
|
291
|
-
|
|
328
|
+
(let [parts (str/split s #":" 3)]
|
|
329
|
+
(case (count parts)
|
|
330
|
+
2 {:harness (keyword (first parts)) :model (second parts)}
|
|
331
|
+
3 {:harness (keyword (first parts)) :model (second parts) :reasoning (nth parts 2)}
|
|
332
|
+
{:harness :codex :model s}))
|
|
292
333
|
{:harness :codex :model s}))
|
|
293
334
|
|
|
294
335
|
(defn cmd-swarm
|
|
@@ -324,12 +365,13 @@
|
|
|
324
365
|
;; Convert to worker format
|
|
325
366
|
workers (map-indexed
|
|
326
367
|
(fn [idx wc]
|
|
327
|
-
(let [{:keys [harness model]} (parse-model-string (:model wc))]
|
|
368
|
+
(let [{:keys [harness model reasoning]} (parse-model-string (:model wc))]
|
|
328
369
|
(worker/create-worker
|
|
329
370
|
{:id (str "w" idx)
|
|
330
371
|
:swarm-id swarm-id
|
|
331
372
|
:harness harness
|
|
332
373
|
:model model
|
|
374
|
+
:reasoning reasoning
|
|
333
375
|
:iterations (or (:iterations wc) 10)
|
|
334
376
|
:prompts (:prompt wc)
|
|
335
377
|
:can-plan (:can_plan wc)
|
|
@@ -352,6 +394,9 @@
|
|
|
352
394
|
(if (:prompt wc) (str ", " (:prompt wc)) "")))))
|
|
353
395
|
(println)
|
|
354
396
|
|
|
397
|
+
;; Preflight: probe each unique model before launching workers
|
|
398
|
+
(validate-models! worker-configs review-model)
|
|
399
|
+
|
|
355
400
|
;; Run workers using new worker module
|
|
356
401
|
(worker/run-workers! workers))))
|
|
357
402
|
|
|
@@ -106,8 +106,9 @@
|
|
|
106
106
|
(defn create-worker
|
|
107
107
|
"Create a worker config.
|
|
108
108
|
:prompts is a string or vector of strings — paths to prompt files.
|
|
109
|
-
:can-plan when false, worker waits for tasks before starting (backpressure).
|
|
110
|
-
|
|
109
|
+
:can-plan when false, worker waits for tasks before starting (backpressure).
|
|
110
|
+
:reasoning reasoning effort level (e.g. \"low\", \"medium\", \"high\") — codex only."
|
|
111
|
+
[{:keys [id swarm-id harness model iterations prompts can-plan reasoning review-harness review-model]}]
|
|
111
112
|
{:id id
|
|
112
113
|
:swarm-id swarm-id
|
|
113
114
|
:harness (or harness :codex)
|
|
@@ -118,6 +119,7 @@
|
|
|
118
119
|
(string? prompts) [prompts]
|
|
119
120
|
:else [])
|
|
120
121
|
:can-plan (if (some? can-plan) can-plan true)
|
|
122
|
+
:reasoning reasoning
|
|
121
123
|
:review-harness review-harness
|
|
122
124
|
:review-model review-model
|
|
123
125
|
:completed 0
|
|
@@ -144,7 +146,7 @@
|
|
|
144
146
|
|
|
145
147
|
(defn- run-agent!
|
|
146
148
|
"Run agent with prompt, return {:output string, :done? bool, :exit int}"
|
|
147
|
-
[{:keys [id swarm-id harness model prompts]} worktree-path context]
|
|
149
|
+
[{:keys [id swarm-id harness model prompts reasoning]} worktree-path context]
|
|
148
150
|
(let [;; 1. Task header (always, from package)
|
|
149
151
|
task-header (or (load-prompt "config/prompts/_task_header.md") "")
|
|
150
152
|
|
|
@@ -175,6 +177,7 @@
|
|
|
175
177
|
"--skip-git-repo-check"
|
|
176
178
|
"-C" abs-worktree]
|
|
177
179
|
model (into ["--model" model])
|
|
180
|
+
reasoning (into ["-c" (str "reasoning_effort=\"" reasoning "\"")])
|
|
178
181
|
true (conj "--" full-prompt))
|
|
179
182
|
:claude (cond-> ["claude" "-p" "--dangerously-skip-permissions"
|
|
180
183
|
"--session-id" session-id]
|
package/bin/test-models
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test-models — end-to-end validation of models in oompa.json
|
|
3
|
+
#
|
|
4
|
+
# Usage: test-models [path/to/oompa.json]
|
|
5
|
+
#
|
|
6
|
+
# For each unique model, launches the agent and asks it to write a result
|
|
7
|
+
# file. Then checks all expected files exist. This validates the full
|
|
8
|
+
# pipeline: harness CLI → model access → code execution → file I/O.
|
|
9
|
+
|
|
10
|
+
set -euo pipefail
|
|
11
|
+
|
|
12
|
+
CONFIG="${1:-oompa.json}"
|
|
13
|
+
|
|
14
|
+
if [ ! -f "$CONFIG" ]; then
|
|
15
|
+
echo "Config not found: $CONFIG"
|
|
16
|
+
echo "Usage: test-models [path/to/oompa.json]"
|
|
17
|
+
exit 1
|
|
18
|
+
fi
|
|
19
|
+
|
|
20
|
+
# Extract unique model strings from workers[] and review_model
|
|
21
|
+
MODELS=$(python3 -c "
|
|
22
|
+
import json
|
|
23
|
+
with open('$CONFIG') as f:
|
|
24
|
+
cfg = json.load(f)
|
|
25
|
+
models = set()
|
|
26
|
+
if cfg.get('review_model'):
|
|
27
|
+
models.add(cfg['review_model'])
|
|
28
|
+
for w in cfg.get('workers', []):
|
|
29
|
+
if 'model' in w:
|
|
30
|
+
models.add(w['model'])
|
|
31
|
+
for m in sorted(models):
|
|
32
|
+
print(m)
|
|
33
|
+
")
|
|
34
|
+
|
|
35
|
+
if [ -z "$MODELS" ]; then
|
|
36
|
+
echo "No models found in $CONFIG"
|
|
37
|
+
exit 1
|
|
38
|
+
fi
|
|
39
|
+
|
|
40
|
+
# Create results directory
|
|
41
|
+
RUN_ID=$(python3 -c "import uuid; print(str(uuid.uuid4())[:8])")
|
|
42
|
+
RESULTS_DIR="tst_results_${RUN_ID}"
|
|
43
|
+
mkdir -p "$RESULTS_DIR"
|
|
44
|
+
|
|
45
|
+
MODEL_COUNT=$(echo "$MODELS" | wc -l | tr -d ' ')
|
|
46
|
+
echo "Testing $MODEL_COUNT models from $CONFIG"
|
|
47
|
+
echo "Results dir: $RESULTS_DIR"
|
|
48
|
+
echo ""
|
|
49
|
+
|
|
50
|
+
# Launch all models in parallel
|
|
51
|
+
PIDS=()
|
|
52
|
+
MODEL_NAMES=()
|
|
53
|
+
|
|
54
|
+
while IFS= read -r model; do
|
|
55
|
+
HARNESS="${model%%:*}"
|
|
56
|
+
# Strip reasoning suffix for the model name passed to CLI
|
|
57
|
+
REST="${model#*:}"
|
|
58
|
+
MODEL_NAME="${REST%%:*}"
|
|
59
|
+
# Safe filename: replace slashes and dots
|
|
60
|
+
SAFE_NAME=$(echo "$model" | tr '/:.' '_')
|
|
61
|
+
|
|
62
|
+
MODEL_NAMES+=("$SAFE_NAME")
|
|
63
|
+
PROMPT="Write a file called ${RESULTS_DIR}/${SAFE_NAME}_DONE with exactly the text DONE. Nothing else. Just create that one file."
|
|
64
|
+
|
|
65
|
+
echo " launching $model ..."
|
|
66
|
+
|
|
67
|
+
case "$HARNESS" in
|
|
68
|
+
claude)
|
|
69
|
+
claude --model "$MODEL_NAME" -p "$PROMPT" --dangerously-skip-permissions --max-turns 3 \
|
|
70
|
+
> "${RESULTS_DIR}/${SAFE_NAME}.log" 2>&1 &
|
|
71
|
+
PIDS+=($!)
|
|
72
|
+
;;
|
|
73
|
+
codex)
|
|
74
|
+
codex exec --model "$MODEL_NAME" \
|
|
75
|
+
--dangerously-bypass-approvals-and-sandbox \
|
|
76
|
+
--skip-git-repo-check \
|
|
77
|
+
-- "$PROMPT" \
|
|
78
|
+
> "${RESULTS_DIR}/${SAFE_NAME}.log" 2>&1 &
|
|
79
|
+
PIDS+=($!)
|
|
80
|
+
;;
|
|
81
|
+
*)
|
|
82
|
+
echo " SKIP (unknown harness: $HARNESS)"
|
|
83
|
+
# Remove from expected list
|
|
84
|
+
unset 'MODEL_NAMES[${#MODEL_NAMES[@]}-1]'
|
|
85
|
+
;;
|
|
86
|
+
esac
|
|
87
|
+
done <<< "$MODELS"
|
|
88
|
+
|
|
89
|
+
# Wait for all
|
|
90
|
+
echo ""
|
|
91
|
+
echo "Waiting for all models to complete..."
|
|
92
|
+
for pid in "${PIDS[@]}"; do
|
|
93
|
+
wait "$pid" 2>/dev/null || true
|
|
94
|
+
done
|
|
95
|
+
|
|
96
|
+
# Check results
|
|
97
|
+
echo ""
|
|
98
|
+
echo "Results:"
|
|
99
|
+
echo ""
|
|
100
|
+
|
|
101
|
+
PASS=0
|
|
102
|
+
FAIL=0
|
|
103
|
+
|
|
104
|
+
for safe_name in "${MODEL_NAMES[@]}"; do
|
|
105
|
+
RESULT_FILE="${RESULTS_DIR}/${safe_name}_DONE"
|
|
106
|
+
printf " %-40s " "$safe_name"
|
|
107
|
+
|
|
108
|
+
if [ -f "$RESULT_FILE" ]; then
|
|
109
|
+
CONTENT=$(cat "$RESULT_FILE" | tr -d '[:space:]')
|
|
110
|
+
if [ "$CONTENT" = "DONE" ]; then
|
|
111
|
+
echo "PASS"
|
|
112
|
+
PASS=$((PASS + 1))
|
|
113
|
+
else
|
|
114
|
+
echo "FAIL (file exists but content: '$(head -1 "$RESULT_FILE")')"
|
|
115
|
+
FAIL=$((FAIL + 1))
|
|
116
|
+
fi
|
|
117
|
+
else
|
|
118
|
+
echo "FAIL (no result file)"
|
|
119
|
+
# Show first few lines of log for debugging
|
|
120
|
+
if [ -f "${RESULTS_DIR}/${safe_name}.log" ]; then
|
|
121
|
+
head -5 "${RESULTS_DIR}/${safe_name}.log" | sed 's/^/ /'
|
|
122
|
+
fi
|
|
123
|
+
FAIL=$((FAIL + 1))
|
|
124
|
+
fi
|
|
125
|
+
done
|
|
126
|
+
|
|
127
|
+
echo ""
|
|
128
|
+
echo "$PASS passed, $FAIL failed (results in $RESULTS_DIR/)"
|
|
129
|
+
|
|
130
|
+
[ "$FAIL" -eq 0 ]
|
package/package.json
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@nbardy/oompa",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.1",
|
|
4
4
|
"description": "Git-worktree multi-agent swarm orchestrator for Codex and Claude",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"type": "commonjs",
|
|
7
7
|
"bin": {
|
|
8
|
-
"oompa": "bin/oompa.js"
|
|
8
|
+
"oompa": "bin/oompa.js",
|
|
9
|
+
"oompa-test-models": "bin/test-models"
|
|
9
10
|
},
|
|
10
11
|
"files": [
|
|
11
12
|
"bin/",
|