@nbardy/oompa 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -6
- package/agentnet/src/agentnet/cli.clj +7 -7
- package/agentnet/src/agentnet/worker.clj +1 -1
- package/bin/test-models +74 -22
- package/oompa.example.json +2 -4
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -82,24 +82,22 @@ This repo has a fleshed out version of the idea. The oompa loompas are organized
|
|
|
82
82
|
**oompa.json** — the only file you need:
|
|
83
83
|
```json
|
|
84
84
|
{
|
|
85
|
-
"review_model": "codex:codex-5.2",
|
|
86
85
|
"workers": [
|
|
87
|
-
{"model": "claude:opus
|
|
88
|
-
{"model": "codex:
|
|
86
|
+
{"model": "claude:opus", "prompt": ["config/prompts/planner.md"], "iterations": 5, "count": 1},
|
|
87
|
+
{"model": "codex:gpt-5.3-codex:medium", "prompt": ["config/prompts/executor.md"], "iterations": 10, "count": 3, "can_plan": false}
|
|
89
88
|
]
|
|
90
89
|
}
|
|
91
90
|
```
|
|
92
91
|
|
|
93
92
|
This spawns:
|
|
94
93
|
- **1 planner** (opus) — reads spec, explores codebase, creates/refines tasks
|
|
95
|
-
- **3 executors** (
|
|
96
|
-
- **Reviews** done by codex-5.2 before any merge
|
|
94
|
+
- **3 executors** (gpt-5.3-codex, medium reasoning) — claims and executes tasks fast
|
|
97
95
|
|
|
98
96
|
#### Worker fields
|
|
99
97
|
|
|
100
98
|
| Field | Required | Description |
|
|
101
99
|
|-------|----------|-------------|
|
|
102
|
-
| `model` | yes | `harness:model` or `harness:model:reasoning` (e.g. `codex:
|
|
100
|
+
| `model` | yes | `harness:model` or `harness:model:reasoning` (e.g. `codex:gpt-5.3-codex:medium`, `claude:opus`) |
|
|
103
101
|
| `prompt` | no | String or array of paths — concatenated into one prompt |
|
|
104
102
|
| `iterations` | no | Max iterations per worker (default: 10) |
|
|
105
103
|
| `count` | no | Number of workers with this config (default: 1) |
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
[agentnet.worker :as worker]
|
|
17
17
|
[agentnet.tasks :as tasks]
|
|
18
18
|
[agentnet.agent :as agent]
|
|
19
|
+
[babashka.process :as process]
|
|
19
20
|
[clojure.string :as str]
|
|
20
21
|
[clojure.java.io :as io]
|
|
21
22
|
[cheshire.core :as json]))
|
|
@@ -132,15 +133,15 @@
|
|
|
132
133
|
;; Commands
|
|
133
134
|
;; =============================================================================
|
|
134
135
|
|
|
135
|
-
(declare cmd-swarm)
|
|
136
|
+
(declare cmd-swarm parse-model-string)
|
|
136
137
|
|
|
137
138
|
(defn- probe-model
|
|
138
139
|
"Send 'say ok' to a model via its harness CLI. Returns true if model responds."
|
|
139
140
|
[harness model]
|
|
140
141
|
(try
|
|
141
142
|
(let [cmd (case harness
|
|
142
|
-
:claude ["claude" "--model" model "-p" "say ok"
|
|
143
|
-
:codex ["codex" "exec" "--model" model "--" "say ok"])
|
|
143
|
+
:claude ["claude" "--model" model "-p" "say ok"]
|
|
144
|
+
:codex ["codex" "exec" "--dangerously-bypass-approvals-and-sandbox" "--skip-git-repo-check" "--model" model "--" "say ok"])
|
|
144
145
|
result (process/sh cmd {:out :string :err :string :timeout 30000})]
|
|
145
146
|
(zero? (:exit result)))
|
|
146
147
|
(catch Exception _ false)))
|
|
@@ -343,10 +344,9 @@
|
|
|
343
344
|
(println)
|
|
344
345
|
(println "Create oompa.json with format:")
|
|
345
346
|
(println "{")
|
|
346
|
-
(println " \"review_model\": \"codex:codex-5.2\",")
|
|
347
347
|
(println " \"workers\": [")
|
|
348
|
-
(println " {\"model\": \"codex:
|
|
349
|
-
(println " {\"model\": \"claude:opus
|
|
348
|
+
(println " {\"model\": \"codex:gpt-5.3-codex:medium\", \"prompt\": \"prompts/executor.md\", \"iterations\": 10, \"count\": 3, \"can_plan\": false},")
|
|
349
|
+
(println " {\"model\": \"claude:opus\", \"prompt\": [\"prompts/base.md\", \"prompts/planner.md\"], \"count\": 1}")
|
|
350
350
|
(println " ]")
|
|
351
351
|
(println "}")
|
|
352
352
|
(println)
|
|
@@ -449,7 +449,7 @@
|
|
|
449
449
|
(println " --keep-worktrees Don't cleanup worktrees after run")
|
|
450
450
|
(println)
|
|
451
451
|
(println "Examples:")
|
|
452
|
-
(println " ./swarm.bb loop 10 --harness codex --model
|
|
452
|
+
(println " ./swarm.bb loop 10 --harness codex --model gpt-5.3-codex --workers 3")
|
|
453
453
|
(println " ./swarm.bb loop --workers claude:5 codex:4 --iterations 20")
|
|
454
454
|
(println " ./swarm.bb swarm oompa.json # Run multi-model config"))
|
|
455
455
|
|
|
@@ -177,7 +177,7 @@
|
|
|
177
177
|
"--skip-git-repo-check"
|
|
178
178
|
"-C" abs-worktree]
|
|
179
179
|
model (into ["--model" model])
|
|
180
|
-
reasoning (into ["-c" (str "
|
|
180
|
+
reasoning (into ["-c" (str "model_reasoning_effort=\"" reasoning "\"")])
|
|
181
181
|
true (conj "--" full-prompt))
|
|
182
182
|
:claude (cond-> ["claude" "-p" "--dangerously-skip-permissions"
|
|
183
183
|
"--session-id" session-id]
|
package/bin/test-models
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
|
-
# test-models —
|
|
2
|
+
# test-models — end-to-end validation of models in oompa.json
|
|
3
3
|
#
|
|
4
4
|
# Usage: test-models [path/to/oompa.json]
|
|
5
5
|
#
|
|
6
|
-
#
|
|
7
|
-
#
|
|
6
|
+
# For each unique model, launches the agent and asks it to write a result
|
|
7
|
+
# file. Then checks all expected files exist. This validates the full
|
|
8
|
+
# pipeline: harness CLI → model access → code execution → file I/O.
|
|
8
9
|
|
|
9
10
|
set -euo pipefail
|
|
10
11
|
|
|
@@ -16,9 +17,9 @@ if [ ! -f "$CONFIG" ]; then
|
|
|
16
17
|
exit 1
|
|
17
18
|
fi
|
|
18
19
|
|
|
19
|
-
# Extract unique model strings
|
|
20
|
+
# Extract unique model strings from workers[] and review_model
|
|
20
21
|
MODELS=$(python3 -c "
|
|
21
|
-
import json
|
|
22
|
+
import json
|
|
22
23
|
with open('$CONFIG') as f:
|
|
23
24
|
cfg = json.load(f)
|
|
24
25
|
models = set()
|
|
@@ -36,43 +37,94 @@ if [ -z "$MODELS" ]; then
|
|
|
36
37
|
exit 1
|
|
37
38
|
fi
|
|
38
39
|
|
|
39
|
-
|
|
40
|
+
# Create results directory
|
|
41
|
+
RUN_ID=$(python3 -c "import uuid; print(str(uuid.uuid4())[:8])")
|
|
42
|
+
RESULTS_DIR="tst_results_${RUN_ID}"
|
|
43
|
+
mkdir -p "$RESULTS_DIR"
|
|
44
|
+
|
|
45
|
+
MODEL_COUNT=$(echo "$MODELS" | wc -l | tr -d ' ')
|
|
46
|
+
echo "Testing $MODEL_COUNT models from $CONFIG"
|
|
47
|
+
echo "Results dir: $RESULTS_DIR"
|
|
40
48
|
echo ""
|
|
41
49
|
|
|
42
|
-
|
|
43
|
-
|
|
50
|
+
# Launch all models in parallel
|
|
51
|
+
PIDS=()
|
|
52
|
+
MODEL_NAMES=()
|
|
44
53
|
|
|
45
54
|
while IFS= read -r model; do
|
|
46
55
|
HARNESS="${model%%:*}"
|
|
47
|
-
|
|
56
|
+
# Strip reasoning suffix for the model name passed to CLI
|
|
57
|
+
REST="${model#*:}"
|
|
58
|
+
MODEL_NAME="${REST%%:*}"
|
|
59
|
+
# Safe filename: replace slashes and dots
|
|
60
|
+
SAFE_NAME=$(echo "$model" | tr '/:.' '_')
|
|
61
|
+
|
|
62
|
+
MODEL_NAMES+=("$SAFE_NAME")
|
|
63
|
+
PROMPT="Write a file called ${RESULTS_DIR}/${SAFE_NAME}_DONE with exactly the text DONE. Nothing else. Just create that one file."
|
|
48
64
|
|
|
49
|
-
|
|
65
|
+
echo " launching $model ..."
|
|
50
66
|
|
|
51
67
|
case "$HARNESS" in
|
|
52
68
|
claude)
|
|
53
|
-
|
|
69
|
+
claude --model "$MODEL_NAME" -p "$PROMPT" --dangerously-skip-permissions --max-turns 3 \
|
|
70
|
+
> "${RESULTS_DIR}/${SAFE_NAME}.log" 2>&1 &
|
|
71
|
+
PIDS+=($!)
|
|
54
72
|
;;
|
|
55
73
|
codex)
|
|
56
|
-
|
|
74
|
+
codex exec --model "$MODEL_NAME" \
|
|
75
|
+
--dangerously-bypass-approvals-and-sandbox \
|
|
76
|
+
--skip-git-repo-check \
|
|
77
|
+
-- "$PROMPT" \
|
|
78
|
+
> "${RESULTS_DIR}/${SAFE_NAME}.log" 2>&1 &
|
|
79
|
+
PIDS+=($!)
|
|
57
80
|
;;
|
|
58
81
|
*)
|
|
59
|
-
echo "SKIP (unknown harness)"
|
|
60
|
-
|
|
82
|
+
echo " SKIP (unknown harness: $HARNESS)"
|
|
83
|
+
# Remove from expected list
|
|
84
|
+
unset 'MODEL_NAMES[${#MODEL_NAMES[@]}-1]'
|
|
61
85
|
;;
|
|
62
86
|
esac
|
|
87
|
+
done <<< "$MODELS"
|
|
88
|
+
|
|
89
|
+
# Wait for all
|
|
90
|
+
echo ""
|
|
91
|
+
echo "Waiting for all models to complete..."
|
|
92
|
+
for pid in "${PIDS[@]}"; do
|
|
93
|
+
wait "$pid" 2>/dev/null || true
|
|
94
|
+
done
|
|
63
95
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
96
|
+
# Check results
|
|
97
|
+
echo ""
|
|
98
|
+
echo "Results:"
|
|
99
|
+
echo ""
|
|
100
|
+
|
|
101
|
+
PASS=0
|
|
102
|
+
FAIL=0
|
|
103
|
+
|
|
104
|
+
for safe_name in "${MODEL_NAMES[@]}"; do
|
|
105
|
+
RESULT_FILE="${RESULTS_DIR}/${safe_name}_DONE"
|
|
106
|
+
printf " %-40s " "$safe_name"
|
|
107
|
+
|
|
108
|
+
if [ -f "$RESULT_FILE" ]; then
|
|
109
|
+
CONTENT=$(cat "$RESULT_FILE" | tr -d '[:space:]')
|
|
110
|
+
if [ "$CONTENT" = "DONE" ]; then
|
|
111
|
+
echo "PASS"
|
|
112
|
+
PASS=$((PASS + 1))
|
|
113
|
+
else
|
|
114
|
+
echo "FAIL (file exists but content: '$(head -1 "$RESULT_FILE")')"
|
|
115
|
+
FAIL=$((FAIL + 1))
|
|
116
|
+
fi
|
|
67
117
|
else
|
|
68
|
-
echo "FAIL"
|
|
69
|
-
#
|
|
70
|
-
|
|
118
|
+
echo "FAIL (no result file)"
|
|
119
|
+
# Show first few lines of log for debugging
|
|
120
|
+
if [ -f "${RESULTS_DIR}/${safe_name}.log" ]; then
|
|
121
|
+
head -5 "${RESULTS_DIR}/${safe_name}.log" | sed 's/^/ /'
|
|
122
|
+
fi
|
|
71
123
|
FAIL=$((FAIL + 1))
|
|
72
124
|
fi
|
|
73
|
-
done
|
|
125
|
+
done
|
|
74
126
|
|
|
75
127
|
echo ""
|
|
76
|
-
echo "$PASS passed, $FAIL failed"
|
|
128
|
+
echo "$PASS passed, $FAIL failed (results in $RESULTS_DIR/)"
|
|
77
129
|
|
|
78
130
|
[ "$FAIL" -eq 0 ]
|
package/oompa.example.json
CHANGED
|
@@ -1,15 +1,13 @@
|
|
|
1
1
|
{
|
|
2
|
-
"review_model": "codex:codex-5.2",
|
|
3
|
-
|
|
4
2
|
"workers": [
|
|
5
3
|
{
|
|
6
|
-
"model": "claude:opus
|
|
4
|
+
"model": "claude:opus",
|
|
7
5
|
"prompt": ["config/prompts/planner.md"],
|
|
8
6
|
"iterations": 5,
|
|
9
7
|
"count": 1
|
|
10
8
|
},
|
|
11
9
|
{
|
|
12
|
-
"model": "codex:
|
|
10
|
+
"model": "codex:gpt-5.3-codex:medium",
|
|
13
11
|
"prompt": ["config/prompts/executor.md"],
|
|
14
12
|
"iterations": 10,
|
|
15
13
|
"count": 3,
|