@nbardy/oompa 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/bin/test-models +74 -22
  2. package/package.json +1 -1
package/bin/test-models CHANGED
@@ -1,10 +1,11 @@
1
1
  #!/usr/bin/env bash
2
- # test-models — probe all models in oompa.json with a hello-world check
2
+ # test-models — end-to-end validation of models in oompa.json
3
3
  #
4
4
  # Usage: test-models [path/to/oompa.json]
5
5
  #
6
- # Sends "say ok" to each unique model via its harness CLI.
7
- # Reports pass/fail for each. Exits non-zero if any fail.
6
+ # For each unique model, launches the agent and asks it to write a result
7
+ # file. Then checks all expected files exist. This validates the full
8
+ # pipeline: harness CLI → model access → code execution → file I/O.
8
9
 
9
10
  set -euo pipefail
10
11
 
@@ -16,9 +17,9 @@ if [ ! -f "$CONFIG" ]; then
16
17
  exit 1
17
18
  fi
18
19
 
19
- # Extract unique model strings (harness:model) from workers[] and review_model
20
+ # Extract unique model strings from workers[] and review_model
20
21
  MODELS=$(python3 -c "
21
- import json, sys
22
+ import json
22
23
  with open('$CONFIG') as f:
23
24
  cfg = json.load(f)
24
25
  models = set()
@@ -36,43 +37,94 @@ if [ -z "$MODELS" ]; then
36
37
  exit 1
37
38
  fi
38
39
 
39
- echo "Probing models from $CONFIG"
40
+ # Create results directory
41
+ RUN_ID=$(python3 -c "import uuid; print(str(uuid.uuid4())[:8])")
42
+ RESULTS_DIR="tst_results_${RUN_ID}"
43
+ mkdir -p "$RESULTS_DIR"
44
+
45
+ MODEL_COUNT=$(echo "$MODELS" | wc -l | tr -d ' ')
46
+ echo "Testing $MODEL_COUNT models from $CONFIG"
47
+ echo "Results dir: $RESULTS_DIR"
40
48
  echo ""
41
49
 
42
- PASS=0
43
- FAIL=0
50
+ # Launch all models in parallel
51
+ PIDS=()
52
+ MODEL_NAMES=()
44
53
 
45
54
  while IFS= read -r model; do
46
55
  HARNESS="${model%%:*}"
47
- MODEL_NAME="${model#*:}"
56
+ # Strip reasoning suffix for the model name passed to CLI
57
+ REST="${model#*:}"
58
+ MODEL_NAME="${REST%%:*}"
59
+ # Safe filename: replace slashes and dots
60
+ SAFE_NAME=$(echo "$model" | tr '/:.' '_')
61
+
62
+ MODEL_NAMES+=("$SAFE_NAME")
63
+ PROMPT="Write a file called ${RESULTS_DIR}/${SAFE_NAME}_DONE with exactly the text DONE. Nothing else. Just create that one file."
48
64
 
49
- printf " %-30s " "$model"
65
+ echo " launching $model ..."
50
66
 
51
67
  case "$HARNESS" in
52
68
  claude)
53
- OUTPUT=$(claude --model "$MODEL_NAME" -p "say ok" --max-turns 1 2>&1) && EXIT=$? || EXIT=$?
69
+ claude --model "$MODEL_NAME" -p "$PROMPT" --dangerously-skip-permissions --max-turns 3 \
70
+ > "${RESULTS_DIR}/${SAFE_NAME}.log" 2>&1 &
71
+ PIDS+=($!)
54
72
  ;;
55
73
  codex)
56
- OUTPUT=$(codex exec --model "$MODEL_NAME" -- "say ok" 2>&1) && EXIT=$? || EXIT=$?
74
+ codex exec --model "$MODEL_NAME" \
75
+ --dangerously-bypass-approvals-and-sandbox \
76
+ --skip-git-repo-check \
77
+ -- "$PROMPT" \
78
+ > "${RESULTS_DIR}/${SAFE_NAME}.log" 2>&1 &
79
+ PIDS+=($!)
57
80
  ;;
58
81
  *)
59
- echo "SKIP (unknown harness)"
60
- continue
82
+ echo " SKIP (unknown harness: $HARNESS)"
83
+ # Remove from expected list
84
+ unset 'MODEL_NAMES[${#MODEL_NAMES[@]}-1]'
61
85
  ;;
62
86
  esac
87
+ done <<< "$MODELS"
88
+
89
+ # Wait for all
90
+ echo ""
91
+ echo "Waiting for all models to complete..."
92
+ for pid in "${PIDS[@]}"; do
93
+ wait "$pid" 2>/dev/null || true
94
+ done
63
95
 
64
- if [ $EXIT -eq 0 ]; then
65
- echo "OK"
66
- PASS=$((PASS + 1))
96
+ # Check results
97
+ echo ""
98
+ echo "Results:"
99
+ echo ""
100
+
101
+ PASS=0
102
+ FAIL=0
103
+
104
+ for safe_name in "${MODEL_NAMES[@]}"; do
105
+ RESULT_FILE="${RESULTS_DIR}/${safe_name}_DONE"
106
+ printf " %-40s " "$safe_name"
107
+
108
+ if [ -f "$RESULT_FILE" ]; then
109
+ CONTENT=$(cat "$RESULT_FILE" | tr -d '[:space:]')
110
+ if [ "$CONTENT" = "DONE" ]; then
111
+ echo "PASS"
112
+ PASS=$((PASS + 1))
113
+ else
114
+ echo "FAIL (file exists but content: '$(head -1 "$RESULT_FILE")')"
115
+ FAIL=$((FAIL + 1))
116
+ fi
67
117
  else
68
- echo "FAIL"
69
- # Print first line of error for context
70
- echo "$OUTPUT" | head -3 | sed 's/^/ /'
118
+ echo "FAIL (no result file)"
119
+ # Show first few lines of log for debugging
120
+ if [ -f "${RESULTS_DIR}/${safe_name}.log" ]; then
121
+ head -5 "${RESULTS_DIR}/${safe_name}.log" | sed 's/^/ /'
122
+ fi
71
123
  FAIL=$((FAIL + 1))
72
124
  fi
73
- done <<< "$MODELS"
125
+ done
74
126
 
75
127
  echo ""
76
- echo "$PASS passed, $FAIL failed"
128
+ echo "$PASS passed, $FAIL failed (results in $RESULTS_DIR/)"
77
129
 
78
130
  [ "$FAIL" -eq 0 ]
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nbardy/oompa",
3
- "version": "0.4.0",
3
+ "version": "0.4.1",
4
4
  "description": "Git-worktree multi-agent swarm orchestrator for Codex and Claude",
5
5
  "license": "MIT",
6
6
  "type": "commonjs",