@nbardy/oompa 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -82,24 +82,22 @@ This repo has a fleshed out version of the idea. The oompa loompas are organized
82
82
  **oompa.json** — the only file you need:
83
83
  ```json
84
84
  {
85
- "review_model": "codex:codex-5.2",
86
85
  "workers": [
87
- {"model": "claude:opus-4.5", "prompt": ["config/prompts/planner.md"], "iterations": 5, "count": 1},
88
- {"model": "codex:codex-5.2-mini", "prompt": ["config/prompts/executor.md"], "iterations": 10, "count": 3, "can_plan": false}
86
+ {"model": "claude:opus", "prompt": ["config/prompts/planner.md"], "iterations": 5, "count": 1},
87
+ {"model": "codex:gpt-5.3-codex:medium", "prompt": ["config/prompts/executor.md"], "iterations": 10, "count": 3, "can_plan": false}
89
88
  ]
90
89
  }
91
90
  ```
92
91
 
93
92
  This spawns:
94
93
  - **1 planner** (opus) — reads spec, explores codebase, creates/refines tasks
95
- - **3 executors** (mini) — claims and executes tasks fast
96
- - **Reviews** done by codex-5.2 before any merge
94
+ - **3 executors** (gpt-5.3-codex, medium reasoning) — claims and executes tasks fast
97
95
 
98
96
  #### Worker fields
99
97
 
100
98
  | Field | Required | Description |
101
99
  |-------|----------|-------------|
102
- | `model` | yes | `harness:model` or `harness:model:reasoning` (e.g. `codex:o3:low`, `claude:opus-4.6`) |
100
+ | `model` | yes | `harness:model` or `harness:model:reasoning` (e.g. `codex:gpt-5.3-codex:medium`, `claude:opus`) |
103
101
  | `prompt` | no | String or array of paths — concatenated into one prompt |
104
102
  | `iterations` | no | Max iterations per worker (default: 10) |
105
103
  | `count` | no | Number of workers with this config (default: 1) |
@@ -16,6 +16,7 @@
16
16
  [agentnet.worker :as worker]
17
17
  [agentnet.tasks :as tasks]
18
18
  [agentnet.agent :as agent]
19
+ [babashka.process :as process]
19
20
  [clojure.string :as str]
20
21
  [clojure.java.io :as io]
21
22
  [cheshire.core :as json]))
@@ -132,15 +133,15 @@
132
133
  ;; Commands
133
134
  ;; =============================================================================
134
135
 
135
- (declare cmd-swarm)
136
+ (declare cmd-swarm parse-model-string)
136
137
 
137
138
  (defn- probe-model
138
139
  "Send 'say ok' to a model via its harness CLI. Returns true if model responds."
139
140
  [harness model]
140
141
  (try
141
142
  (let [cmd (case harness
142
- :claude ["claude" "--model" model "-p" "say ok" "--max-turns" "1"]
143
- :codex ["codex" "exec" "--model" model "--" "say ok"])
143
+ :claude ["claude" "--model" model "-p" "say ok"]
144
+ :codex ["codex" "exec" "--dangerously-bypass-approvals-and-sandbox" "--skip-git-repo-check" "--model" model "--" "say ok"])
144
145
  result (process/sh cmd {:out :string :err :string :timeout 30000})]
145
146
  (zero? (:exit result)))
146
147
  (catch Exception _ false)))
@@ -343,10 +344,9 @@
343
344
  (println)
344
345
  (println "Create oompa.json with format:")
345
346
  (println "{")
346
- (println " \"review_model\": \"codex:codex-5.2\",")
347
347
  (println " \"workers\": [")
348
- (println " {\"model\": \"codex:codex-5.2-mini\", \"prompt\": \"prompts/executor.md\", \"iterations\": 10, \"count\": 3, \"can_plan\": false},")
349
- (println " {\"model\": \"claude:opus-4.5\", \"prompt\": [\"prompts/base.md\", \"prompts/planner.md\"], \"count\": 1}")
348
+ (println " {\"model\": \"codex:gpt-5.3-codex:medium\", \"prompt\": \"prompts/executor.md\", \"iterations\": 10, \"count\": 3, \"can_plan\": false},")
349
+ (println " {\"model\": \"claude:opus\", \"prompt\": [\"prompts/base.md\", \"prompts/planner.md\"], \"count\": 1}")
350
350
  (println " ]")
351
351
  (println "}")
352
352
  (println)
@@ -449,7 +449,7 @@
449
449
  (println " --keep-worktrees Don't cleanup worktrees after run")
450
450
  (println)
451
451
  (println "Examples:")
452
- (println " ./swarm.bb loop 10 --harness codex --model codex-5.2-mini --workers 3")
452
+ (println " ./swarm.bb loop 10 --harness codex --model gpt-5.3-codex --workers 3")
453
453
  (println " ./swarm.bb loop --workers claude:5 codex:4 --iterations 20")
454
454
  (println " ./swarm.bb swarm oompa.json # Run multi-model config"))
455
455
 
@@ -177,7 +177,7 @@
177
177
  "--skip-git-repo-check"
178
178
  "-C" abs-worktree]
179
179
  model (into ["--model" model])
180
- reasoning (into ["-c" (str "reasoning_effort=\"" reasoning "\"")])
180
+ reasoning (into ["-c" (str "model_reasoning_effort=\"" reasoning "\"")])
181
181
  true (conj "--" full-prompt))
182
182
  :claude (cond-> ["claude" "-p" "--dangerously-skip-permissions"
183
183
  "--session-id" session-id]
package/bin/test-models CHANGED
@@ -1,10 +1,11 @@
1
1
  #!/usr/bin/env bash
2
- # test-models — probe all models in oompa.json with a hello-world check
2
+ # test-models — end-to-end validation of models in oompa.json
3
3
  #
4
4
  # Usage: test-models [path/to/oompa.json]
5
5
  #
6
- # Sends "say ok" to each unique model via its harness CLI.
7
- # Reports pass/fail for each. Exits non-zero if any fail.
6
+ # For each unique model, launches the agent and asks it to write a result
7
+ # file. Then checks all expected files exist. This validates the full
8
+ # pipeline: harness CLI → model access → code execution → file I/O.
8
9
 
9
10
  set -euo pipefail
10
11
 
@@ -16,9 +17,9 @@ if [ ! -f "$CONFIG" ]; then
16
17
  exit 1
17
18
  fi
18
19
 
19
- # Extract unique model strings (harness:model) from workers[] and review_model
20
+ # Extract unique model strings from workers[] and review_model
20
21
  MODELS=$(python3 -c "
21
- import json, sys
22
+ import json
22
23
  with open('$CONFIG') as f:
23
24
  cfg = json.load(f)
24
25
  models = set()
@@ -36,43 +37,94 @@ if [ -z "$MODELS" ]; then
36
37
  exit 1
37
38
  fi
38
39
 
39
- echo "Probing models from $CONFIG"
40
+ # Create results directory
41
+ RUN_ID=$(python3 -c "import uuid; print(str(uuid.uuid4())[:8])")
42
+ RESULTS_DIR="tst_results_${RUN_ID}"
43
+ mkdir -p "$RESULTS_DIR"
44
+
45
+ MODEL_COUNT=$(echo "$MODELS" | wc -l | tr -d ' ')
46
+ echo "Testing $MODEL_COUNT models from $CONFIG"
47
+ echo "Results dir: $RESULTS_DIR"
40
48
  echo ""
41
49
 
42
- PASS=0
43
- FAIL=0
50
+ # Launch all models in parallel
51
+ PIDS=()
52
+ MODEL_NAMES=()
44
53
 
45
54
  while IFS= read -r model; do
46
55
  HARNESS="${model%%:*}"
47
- MODEL_NAME="${model#*:}"
56
+ # Strip reasoning suffix for the model name passed to CLI
57
+ REST="${model#*:}"
58
+ MODEL_NAME="${REST%%:*}"
59
+ # Safe filename: replace slashes and dots
60
+ SAFE_NAME=$(echo "$model" | tr '/:.' '_')
61
+
62
+ MODEL_NAMES+=("$SAFE_NAME")
63
+ PROMPT="Write a file called ${RESULTS_DIR}/${SAFE_NAME}_DONE with exactly the text DONE. Nothing else. Just create that one file."
48
64
 
49
- printf " %-30s " "$model"
65
+ echo " launching $model ..."
50
66
 
51
67
  case "$HARNESS" in
52
68
  claude)
53
- OUTPUT=$(claude --model "$MODEL_NAME" -p "say ok" --max-turns 1 2>&1) && EXIT=$? || EXIT=$?
69
+ claude --model "$MODEL_NAME" -p "$PROMPT" --dangerously-skip-permissions --max-turns 3 \
70
+ > "${RESULTS_DIR}/${SAFE_NAME}.log" 2>&1 &
71
+ PIDS+=($!)
54
72
  ;;
55
73
  codex)
56
- OUTPUT=$(codex exec --model "$MODEL_NAME" -- "say ok" 2>&1) && EXIT=$? || EXIT=$?
74
+ codex exec --model "$MODEL_NAME" \
75
+ --dangerously-bypass-approvals-and-sandbox \
76
+ --skip-git-repo-check \
77
+ -- "$PROMPT" \
78
+ > "${RESULTS_DIR}/${SAFE_NAME}.log" 2>&1 &
79
+ PIDS+=($!)
57
80
  ;;
58
81
  *)
59
- echo "SKIP (unknown harness)"
60
- continue
82
+ echo " SKIP (unknown harness: $HARNESS)"
83
+ # Remove from expected list
84
+ unset 'MODEL_NAMES[${#MODEL_NAMES[@]}-1]'
61
85
  ;;
62
86
  esac
87
+ done <<< "$MODELS"
88
+
89
+ # Wait for all
90
+ echo ""
91
+ echo "Waiting for all models to complete..."
92
+ for pid in "${PIDS[@]}"; do
93
+ wait "$pid" 2>/dev/null || true
94
+ done
63
95
 
64
- if [ $EXIT -eq 0 ]; then
65
- echo "OK"
66
- PASS=$((PASS + 1))
96
+ # Check results
97
+ echo ""
98
+ echo "Results:"
99
+ echo ""
100
+
101
+ PASS=0
102
+ FAIL=0
103
+
104
+ for safe_name in "${MODEL_NAMES[@]}"; do
105
+ RESULT_FILE="${RESULTS_DIR}/${safe_name}_DONE"
106
+ printf " %-40s " "$safe_name"
107
+
108
+ if [ -f "$RESULT_FILE" ]; then
109
+ CONTENT=$(cat "$RESULT_FILE" | tr -d '[:space:]')
110
+ if [ "$CONTENT" = "DONE" ]; then
111
+ echo "PASS"
112
+ PASS=$((PASS + 1))
113
+ else
114
+ echo "FAIL (file exists but content: '$(head -1 "$RESULT_FILE")')"
115
+ FAIL=$((FAIL + 1))
116
+ fi
67
117
  else
68
- echo "FAIL"
69
- # Print first line of error for context
70
- echo "$OUTPUT" | head -3 | sed 's/^/ /'
118
+ echo "FAIL (no result file)"
119
+ # Show first few lines of log for debugging
120
+ if [ -f "${RESULTS_DIR}/${safe_name}.log" ]; then
121
+ head -5 "${RESULTS_DIR}/${safe_name}.log" | sed 's/^/ /'
122
+ fi
71
123
  FAIL=$((FAIL + 1))
72
124
  fi
73
- done <<< "$MODELS"
125
+ done
74
126
 
75
127
  echo ""
76
- echo "$PASS passed, $FAIL failed"
128
+ echo "$PASS passed, $FAIL failed (results in $RESULTS_DIR/)"
77
129
 
78
130
  [ "$FAIL" -eq 0 ]
@@ -1,15 +1,13 @@
1
1
  {
2
- "review_model": "codex:codex-5.2",
3
-
4
2
  "workers": [
5
3
  {
6
- "model": "claude:opus-4.5",
4
+ "model": "claude:opus",
7
5
  "prompt": ["config/prompts/planner.md"],
8
6
  "iterations": 5,
9
7
  "count": 1
10
8
  },
11
9
  {
12
- "model": "codex:codex-5.2-mini",
10
+ "model": "codex:gpt-5.3-codex:medium",
13
11
  "prompt": ["config/prompts/executor.md"],
14
12
  "iterations": 10,
15
13
  "count": 3,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nbardy/oompa",
3
- "version": "0.4.0",
3
+ "version": "0.4.2",
4
4
  "description": "Git-worktree multi-agent swarm orchestrator for Codex and Claude",
5
5
  "license": "MIT",
6
6
  "type": "commonjs",