npm - @nbardy/oompa - Versions diffs - 0.4.0 → 0.4.2 - Mend

@nbardy/oompa 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md +4 -6
package/agentnet/src/agentnet/cli.clj +7 -7
package/agentnet/src/agentnet/worker.clj +1 -1
package/bin/test-models +74 -22
package/oompa.example.json +2 -4
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -82,24 +82,22 @@ This repo has a fleshed out version of the idea. The oompa loompas are organized
 **oompa.json** — the only file you need:
 ```json
 {
-  "review_model": "codex:codex-5.2",
   "workers": [
-    {"model": "claude:opus-4.5", "prompt": ["config/prompts/planner.md"], "iterations": 5, "count": 1},
-    {"model": "codex:codex-5.2-mini", "prompt": ["config/prompts/executor.md"], "iterations": 10, "count": 3, "can_plan": false}
+    {"model": "claude:opus", "prompt": ["config/prompts/planner.md"], "iterations": 5, "count": 1},
+    {"model": "codex:gpt-5.3-codex:medium", "prompt": ["config/prompts/executor.md"], "iterations": 10, "count": 3, "can_plan": false}
   ]
 }
 ```
 This spawns:
 - **1 planner** (opus) — reads spec, explores codebase, creates/refines tasks
-- **3 executors** (mini) — claims and executes tasks fast
-- **Reviews** done by codex-5.2 before any merge
+- **3 executors** (gpt-5.3-codex, medium reasoning) — claims and executes tasks fast
 #### Worker fields
 | Field | Required | Description |
 |-------|----------|-------------|
-| `model` | yes | `harness:model` or `harness:model:reasoning` (e.g. `codex:o3:low`, `claude:opus-4.6`) |
+| `model` | yes | `harness:model` or `harness:model:reasoning` (e.g. `codex:gpt-5.3-codex:medium`, `claude:opus`) |
 | `prompt` | no | String or array of paths — concatenated into one prompt |
 | `iterations` | no | Max iterations per worker (default: 10) |
 | `count` | no | Number of workers with this config (default: 1) |

package/agentnet/src/agentnet/cli.clj CHANGED Viewed

@@ -16,6 +16,7 @@
             [agentnet.worker :as worker]
             [agentnet.tasks :as tasks]
             [agentnet.agent :as agent]
+            [babashka.process :as process]
             [clojure.string :as str]
             [clojure.java.io :as io]
             [cheshire.core :as json]))
@@ -132,15 +133,15 @@
 ;; Commands
 ;; =============================================================================
-(declare cmd-swarm)
+(declare cmd-swarm parse-model-string)
 (defn- probe-model
   "Send 'say ok' to a model via its harness CLI. Returns true if model responds."
   [harness model]
   (try
     (let [cmd (case harness
-                :claude ["claude" "--model" model "-p" "say ok" "--max-turns" "1"]
-                :codex  ["codex" "exec" "--model" model "--" "say ok"])
+                :claude ["claude" "--model" model "-p" "say ok"]
+                :codex  ["codex" "exec" "--dangerously-bypass-approvals-and-sandbox" "--skip-git-repo-check" "--model" model "--" "say ok"])
           result (process/sh cmd {:out :string :err :string :timeout 30000})]
       (zero? (:exit result)))
     (catch Exception _ false)))
@@ -343,10 +344,9 @@
       (println)
       (println "Create oompa.json with format:")
       (println "{")
-      (println "  \"review_model\": \"codex:codex-5.2\",")
       (println "  \"workers\": [")
-      (println "    {\"model\": \"codex:codex-5.2-mini\", \"prompt\": \"prompts/executor.md\", \"iterations\": 10, \"count\": 3, \"can_plan\": false},")
-      (println "    {\"model\": \"claude:opus-4.5\", \"prompt\": [\"prompts/base.md\", \"prompts/planner.md\"], \"count\": 1}")
+      (println "    {\"model\": \"codex:gpt-5.3-codex:medium\", \"prompt\": \"prompts/executor.md\", \"iterations\": 10, \"count\": 3, \"can_plan\": false},")
+      (println "    {\"model\": \"claude:opus\", \"prompt\": [\"prompts/base.md\", \"prompts/planner.md\"], \"count\": 1}")
       (println "  ]")
       (println "}")
       (println)
@@ -449,7 +449,7 @@
   (println "  --keep-worktrees         Don't cleanup worktrees after run")
   (println)
   (println "Examples:")
-  (println "  ./swarm.bb loop 10 --harness codex --model codex-5.2-mini --workers 3")
+  (println "  ./swarm.bb loop 10 --harness codex --model gpt-5.3-codex --workers 3")
   (println "  ./swarm.bb loop --workers claude:5 codex:4 --iterations 20")
   (println "  ./swarm.bb swarm oompa.json  # Run multi-model config"))

package/agentnet/src/agentnet/worker.clj CHANGED Viewed

@@ -177,7 +177,7 @@
                               "--skip-git-repo-check"
                               "-C" abs-worktree]
                        model (into ["--model" model])
-                       reasoning (into ["-c" (str "reasoning_effort=\"" reasoning "\"")])
+                       reasoning (into ["-c" (str "model_reasoning_effort=\"" reasoning "\"")])
                        true (conj "--" full-prompt))
               :claude (cond-> ["claude" "-p" "--dangerously-skip-permissions"
                                "--session-id" session-id]

package/bin/test-models CHANGED Viewed

@@ -1,10 +1,11 @@
 #!/usr/bin/env bash
-# test-models — probe all models in oompa.json with a hello-world check
+# test-models — end-to-end validation of models in oompa.json
 #
 # Usage: test-models [path/to/oompa.json]
 #
-# Sends "say ok" to each unique model via its harness CLI.
-# Reports pass/fail for each. Exits non-zero if any fail.
+# For each unique model, launches the agent and asks it to write a result
+# file. Then checks all expected files exist. This validates the full
+# pipeline: harness CLI → model access → code execution → file I/O.
 set -euo pipefail
@@ -16,9 +17,9 @@ if [ ! -f "$CONFIG" ]; then
   exit 1
 fi
-# Extract unique model strings (harness:model) from workers[] and review_model
+# Extract unique model strings from workers[] and review_model
 MODELS=$(python3 -c "
-import json, sys
+import json
 with open('$CONFIG') as f:
     cfg = json.load(f)
 models = set()
@@ -36,43 +37,94 @@ if [ -z "$MODELS" ]; then
   exit 1
 fi
-echo "Probing models from $CONFIG"
+# Create results directory
+RUN_ID=$(python3 -c "import uuid; print(str(uuid.uuid4())[:8])")
+RESULTS_DIR="tst_results_${RUN_ID}"
+mkdir -p "$RESULTS_DIR"
+MODEL_COUNT=$(echo "$MODELS" | wc -l | tr -d ' ')
+echo "Testing $MODEL_COUNT models from $CONFIG"
+echo "Results dir: $RESULTS_DIR"
 echo ""
-PASS=0
-FAIL=0
+# Launch all models in parallel
+PIDS=()
+MODEL_NAMES=()
 while IFS= read -r model; do
   HARNESS="${model%%:*}"
-  MODEL_NAME="${model#*:}"
+  # Strip reasoning suffix for the model name passed to CLI
+  REST="${model#*:}"
+  MODEL_NAME="${REST%%:*}"
+  # Safe filename: replace slashes and dots
+  SAFE_NAME=$(echo "$model" | tr '/:.' '_')
+  MODEL_NAMES+=("$SAFE_NAME")
+  PROMPT="Write a file called ${RESULTS_DIR}/${SAFE_NAME}_DONE with exactly the text DONE. Nothing else. Just create that one file."
-  printf "  %-30s " "$model"
+  echo "  launching $model ..."
   case "$HARNESS" in
     claude)
-      OUTPUT=$(claude --model "$MODEL_NAME" -p "say ok" --max-turns 1 2>&1) && EXIT=$? || EXIT=$?
+      claude --model "$MODEL_NAME" -p "$PROMPT" --dangerously-skip-permissions --max-turns 3 \
+        > "${RESULTS_DIR}/${SAFE_NAME}.log" 2>&1 &
+      PIDS+=($!)
       ;;
     codex)
-      OUTPUT=$(codex exec --model "$MODEL_NAME" -- "say ok" 2>&1) && EXIT=$? || EXIT=$?
+      codex exec --model "$MODEL_NAME" \
+        --dangerously-bypass-approvals-and-sandbox \
+        --skip-git-repo-check \
+        -- "$PROMPT" \
+        > "${RESULTS_DIR}/${SAFE_NAME}.log" 2>&1 &
+      PIDS+=($!)
       ;;
     *)
-      echo "SKIP (unknown harness)"
-      continue
+      echo "    SKIP (unknown harness: $HARNESS)"
+      # Remove from expected list
+      unset 'MODEL_NAMES[${#MODEL_NAMES[@]}-1]'
       ;;
   esac
+done <<< "$MODELS"
+# Wait for all
+echo ""
+echo "Waiting for all models to complete..."
+for pid in "${PIDS[@]}"; do
+  wait "$pid" 2>/dev/null || true
+done
-  if [ $EXIT -eq 0 ]; then
-    echo "OK"
-    PASS=$((PASS + 1))
+# Check results
+echo ""
+echo "Results:"
+echo ""
+PASS=0
+FAIL=0
+for safe_name in "${MODEL_NAMES[@]}"; do
+  RESULT_FILE="${RESULTS_DIR}/${safe_name}_DONE"
+  printf "  %-40s " "$safe_name"
+  if [ -f "$RESULT_FILE" ]; then
+    CONTENT=$(cat "$RESULT_FILE" | tr -d '[:space:]')
+    if [ "$CONTENT" = "DONE" ]; then
+      echo "PASS"
+      PASS=$((PASS + 1))
+    else
+      echo "FAIL (file exists but content: '$(head -1 "$RESULT_FILE")')"
+      FAIL=$((FAIL + 1))
+    fi
   else
-    echo "FAIL"
-    # Print first line of error for context
-    echo "$OUTPUT" | head -3 | sed 's/^/    /'
+    echo "FAIL (no result file)"
+    # Show first few lines of log for debugging
+    if [ -f "${RESULTS_DIR}/${safe_name}.log" ]; then
+      head -5 "${RESULTS_DIR}/${safe_name}.log" | sed 's/^/    /'
+    fi
     FAIL=$((FAIL + 1))
   fi
-done <<< "$MODELS"
+done
 echo ""
-echo "$PASS passed, $FAIL failed"
+echo "$PASS passed, $FAIL failed (results in $RESULTS_DIR/)"
 [ "$FAIL" -eq 0 ]

package/oompa.example.json CHANGED Viewed

@@ -1,15 +1,13 @@
 {
-  "review_model": "codex:codex-5.2",
   "workers": [
     {
-      "model": "claude:opus-4.5",
+      "model": "claude:opus",
       "prompt": ["config/prompts/planner.md"],
       "iterations": 5,
       "count": 1
     },
     {
-      "model": "codex:codex-5.2-mini",
+      "model": "codex:gpt-5.3-codex:medium",
       "prompt": ["config/prompts/executor.md"],
       "iterations": 10,
       "count": 3,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@nbardy/oompa",
-  "version": "0.4.0",
+  "version": "0.4.2",
   "description": "Git-worktree multi-agent swarm orchestrator for Codex and Claude",
   "license": "MIT",
   "type": "commonjs",