@nbardy/oompa 0.7.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -6
- package/agentnet/src/agentnet/agent.clj +45 -20
- package/agentnet/src/agentnet/cli.clj +852 -193
- package/agentnet/src/agentnet/cli.clj.bak +1384 -0
- package/agentnet/src/agentnet/core.clj +17 -2
- package/agentnet/src/agentnet/harness.clj +93 -37
- package/agentnet/src/agentnet/runs.clj +11 -6
- package/agentnet/src/agentnet/schema.clj +8 -1
- package/agentnet/src/agentnet/tasks.clj +6 -0
- package/agentnet/src/agentnet/worker.clj +867 -408
- package/bin/oompa.js +5 -1
- package/config/prompts/_task_header.md +9 -2
- package/config/prompts/magicgenie-executor.md +15 -0
- package/config/prompts/magicgenie-planner.md +26 -0
- package/config/prompts/magicgenie-reviewer.md +44 -0
- package/oompa.example.json +4 -4
- package/package.json +5 -3
- package/scripts/README.md +6 -0
- package/scripts/__pycache__/stream_bridge.cpython-314.pyc +0 -0
- package/scripts/copy-repo-code.sh +110 -0
- package/scripts/install-babashka.js +97 -0
- package/scripts/test-harness-resume.sh +229 -0
package/bin/oompa.js
CHANGED
|
@@ -15,7 +15,11 @@ if (!fs.existsSync(swarmScript) || !fs.existsSync(classpath)) {
|
|
|
15
15
|
process.exit(1);
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
// Try to use the locally bundled bb if it exists (from postinstall)
|
|
19
|
+
const localBb = path.join(__dirname, "bb");
|
|
20
|
+
const bbPath = fs.existsSync(localBb) ? localBb : "bb";
|
|
21
|
+
|
|
22
|
+
const result = spawnSync(bbPath, ["--classpath", classpath, swarmScript, ...argv], {
|
|
19
23
|
stdio: "inherit",
|
|
20
24
|
cwd: process.cwd(),
|
|
21
25
|
env: { ...process.env, OOMPA_PACKAGE_ROOT: packageRoot }
|
|
@@ -21,11 +21,18 @@ CLAIM(task-001, task-003)
|
|
|
21
21
|
The framework will claim them atomically and resume you with results: what succeeded, what was already taken, and what's still pending. You can CLAIM again if needed.
|
|
22
22
|
|
|
23
23
|
Do NOT `mv` task files yourself. The framework owns all task state transitions.
|
|
24
|
+
Always read/write queue files via `{{TASKS_ROOT}}/...` (not hard-coded local `tasks/...` paths).
|
|
25
|
+
|
|
26
|
+
### Before merge signal
|
|
27
|
+
|
|
28
|
+
- Before `COMPLETE_AND_READY_FOR_MERGE`, run `git status --short` and ensure your intended deliverable is in tracked files.
|
|
29
|
+
- The framework performs final `git add -A` + `git commit`; you do not need to create the commit manually.
|
|
30
|
+
- If your deliverable is task creation, ensure those `.edn` files are present in `{{TASKS_ROOT}}/pending/` so other workers can claim them.
|
|
24
31
|
|
|
25
32
|
### Signals
|
|
26
33
|
|
|
27
34
|
- **`CLAIM(id, ...)`** — Claim one or more pending tasks. Batch related tasks together.
|
|
28
35
|
- **`COMPLETE_AND_READY_FOR_MERGE`** — Your work is done and ready for review. Framework reviews, merges, and marks your claimed tasks complete.
|
|
29
|
-
- **`
|
|
36
|
+
- **`NEEDS_FOLLOWUP`** — Last resort continuation signal. Use only if you tried hard and still cannot produce a merge-ready artifact from the current context. Explain the remaining work after the signal. The framework will keep your claimed tasks and resume you with a sharper follow-up prompt. This is not success.
|
|
30
37
|
|
|
31
|
-
One signal per output. Claim before working.
|
|
38
|
+
One signal per output. Claim before working. Do not output `DONE`.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
You are a focused executor on Magic Genie delivery work.
|
|
2
|
+
|
|
3
|
+
Read pending tasks and claim exactly what matches your strengths.
|
|
4
|
+
- If `:execution_type` is `"spell"`, implement the route + workflow recipe + output contract.
|
|
5
|
+
- If `:execution_type` is `:wish` or `"wish"`, implement the atomic catalog or UX support item.
|
|
6
|
+
- Keep work scoped to a single task file.
|
|
7
|
+
- Use `TODO_plan.md`, `docs/MegaPacks_Idea_list.md`, and existing code as source of truth.
|
|
8
|
+
|
|
9
|
+
Acceptance requirements:
|
|
10
|
+
- Include route + schema updates when requested.
|
|
11
|
+
- Ensure `:execution_type` distinctions are persisted in code metadata.
|
|
12
|
+
- Ensure deterministic outputs, consistent filenames, and no speculative behavior.
|
|
13
|
+
|
|
14
|
+
Avoid redesigning unrelated systems. If a task is unclear, skip it and pick another.
|
|
15
|
+
#oompa_directive:include_file "config/prompts/_agent_scope_rules.md"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
You are a planner for the Magic Genie execution stream.
|
|
2
|
+
|
|
3
|
+
Read `TODO_plan.md` and `docs/MegaPacks_Idea_list.md`, then create executable task files for gaps in `../tasks/pending`.
|
|
4
|
+
|
|
5
|
+
Priorities for this backlog:
|
|
6
|
+
- finish `execution_type`/`credit_cost` schema work first
|
|
7
|
+
- then package API planning routes
|
|
8
|
+
- then build the 125 Spell package definitions
|
|
9
|
+
- then UI polish and streaming follow-ups
|
|
10
|
+
|
|
11
|
+
For each task, write EDN with:
|
|
12
|
+
- `:id`
|
|
13
|
+
- `:summary`
|
|
14
|
+
- `:description`
|
|
15
|
+
- `:difficulty` (`:easy`, `:medium`, `:hard`)
|
|
16
|
+
- `:acceptance`
|
|
17
|
+
- optional `:execution_type` (`"wish"` or `"spell"`)
|
|
18
|
+
- optional `:route`
|
|
19
|
+
|
|
20
|
+
Scope:
|
|
21
|
+
- Keep task grain narrow: one endpoint, one schema, or one surface area.
|
|
22
|
+
- Prefer deterministic payload contracts and reuse existing workflow graph patterns.
|
|
23
|
+
- If a task is already represented in pending, do not duplicate.
|
|
24
|
+
|
|
25
|
+
If no tasks exist, continue using the existing task list and expand on this same backlog.
|
|
26
|
+
#oompa_directive:include_file "config/prompts/_agent_scope_rules.md"
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
You are a strict review specialist for Magic Genie implementation tasks.
|
|
2
|
+
|
|
3
|
+
Review each completed task for technical correctness, domain fit, and product value.
|
|
4
|
+
|
|
5
|
+
Context:
|
|
6
|
+
- Task file in `tasks/current/` or `tasks/complete/`.
|
|
7
|
+
- Source-of-truth docs:
|
|
8
|
+
- `../image_magick/docs/MegaPacks_Idea_list.md`
|
|
9
|
+
- `../image_magick/PERSONAS.md`
|
|
10
|
+
- `../image_magick/TODO_plan.md`
|
|
11
|
+
|
|
12
|
+
Review requirements:
|
|
13
|
+
- Read the task description and acceptance criteria first.
|
|
14
|
+
- Identify whether this is `wish` or `spell` and verify the implementation matches that type.
|
|
15
|
+
- For `spell` tasks:
|
|
16
|
+
- Verify route naming and slug structure align with the source package.
|
|
17
|
+
- Confirm outputs are explicitly multi-asset and include the listed IN/OUT contract items.
|
|
18
|
+
- Confirm the workflow is multi-node / deterministic rather than ad hoc single-step logic.
|
|
19
|
+
- For `wish` tasks:
|
|
20
|
+
- Verify one-off tool behavior and catalog/output metadata stay atomic and minimal.
|
|
21
|
+
- For both types:
|
|
22
|
+
- Verify persona relevance from PERSONAS.md for the target user segment.
|
|
23
|
+
- Check for SEO-aware decisions where applicable (title labels, package copy clarity, keyword alignment, upsell paths, route/category naming).
|
|
24
|
+
- Confirm code changes are scoped to the task and do not add speculative behavior.
|
|
25
|
+
- Quality checks:
|
|
26
|
+
- Is the target intent from the source spec met?
|
|
27
|
+
- Are outputs clear, reproducible, and suitable for production review?
|
|
28
|
+
- Are risk points surfaced (missing edge-case handling, broken wiring, inconsistent schema)?
|
|
29
|
+
|
|
30
|
+
Output exactly one of:
|
|
31
|
+
|
|
32
|
+
APPROVED
|
|
33
|
+
- Why it's acceptable.
|
|
34
|
+
- Optional short quality note (target fit + SEO alignment).
|
|
35
|
+
|
|
36
|
+
NEEDS_CHANGES
|
|
37
|
+
- Specific blocking issues with file paths and expected fixes.
|
|
38
|
+
- Explicit missing or incorrect persona/target-fit items.
|
|
39
|
+
- Explicit SEO gaps (if any).
|
|
40
|
+
|
|
41
|
+
REJECTED
|
|
42
|
+
- Fundamental mismatch with task intent or model assumptions.
|
|
43
|
+
|
|
44
|
+
#oompa_directive:include_file "config/prompts/_agent_scope_rules.md"
|
package/oompa.example.json
CHANGED
|
@@ -3,28 +3,28 @@
|
|
|
3
3
|
{
|
|
4
4
|
"model": "claude:opus",
|
|
5
5
|
"prompt": ["config/prompts/planner.md"],
|
|
6
|
-
"
|
|
6
|
+
"max_cycle": 5,
|
|
7
7
|
"count": 1,
|
|
8
8
|
"wait_between": 60
|
|
9
9
|
},
|
|
10
10
|
{
|
|
11
11
|
"model": "codex:gpt-5.3-codex:medium",
|
|
12
12
|
"prompt": ["config/prompts/executor.md"],
|
|
13
|
-
"
|
|
13
|
+
"max_cycle": 10,
|
|
14
14
|
"count": 2,
|
|
15
15
|
"can_plan": false
|
|
16
16
|
},
|
|
17
17
|
{
|
|
18
18
|
"model": "opencode:opencode/kimi-k2.5-free",
|
|
19
19
|
"prompt": ["config/prompts/executor.md"],
|
|
20
|
-
"
|
|
20
|
+
"max_cycle": 10,
|
|
21
21
|
"count": 1,
|
|
22
22
|
"can_plan": false
|
|
23
23
|
},
|
|
24
24
|
{
|
|
25
25
|
"model": "gemini:gemini-3-pro-preview",
|
|
26
26
|
"prompt": ["config/prompts/executor.md"],
|
|
27
|
-
"
|
|
27
|
+
"max_cycle": 10,
|
|
28
28
|
"count": 1,
|
|
29
29
|
"can_plan": false
|
|
30
30
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@nbardy/oompa",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.3",
|
|
4
4
|
"description": "Git-worktree multi-agent swarm orchestrator for Codex, Claude, and Opencode",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"type": "commonjs",
|
|
@@ -14,12 +14,14 @@
|
|
|
14
14
|
"agentnet/src/",
|
|
15
15
|
"config/prompts/",
|
|
16
16
|
"oompa.example.json",
|
|
17
|
-
"README.md"
|
|
17
|
+
"README.md",
|
|
18
|
+
"scripts/"
|
|
18
19
|
],
|
|
19
20
|
"scripts": {
|
|
20
21
|
"check": "node bin/oompa.js check",
|
|
21
22
|
"help": "node bin/oompa.js help",
|
|
22
|
-
"pack:dry": "npm pack --dry-run"
|
|
23
|
+
"pack:dry": "npm pack --dry-run",
|
|
24
|
+
"postinstall": "node scripts/install-babashka.js"
|
|
23
25
|
},
|
|
24
26
|
"engines": {
|
|
25
27
|
"node": ">=18"
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
# Scripts
|
|
2
|
+
|
|
3
|
+
Reserved for helper shell scripts (smoke tests, lint wiring, etc.).
|
|
4
|
+
Populate as the orchestrator matures.
|
|
5
|
+
|
|
6
|
+
- `copy-repo-code.sh`: copies implementation code files by default (or all tracked text files via `--all-files`) with filename headers to macOS clipboard (`pbcopy`).
|
|
Binary file
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Copy tracked code files in this repo to clipboard with file headers.
|
|
3
|
+
#
|
|
4
|
+
# Usage:
|
|
5
|
+
# ./scripts/copy-repo-code.sh
|
|
6
|
+
# ./scripts/copy-repo-code.sh --all-files
|
|
7
|
+
|
|
8
|
+
set -euo pipefail
|
|
9
|
+
|
|
10
|
+
usage() {
|
|
11
|
+
cat <<'EOF'
|
|
12
|
+
Usage:
|
|
13
|
+
./scripts/copy-repo-code.sh
|
|
14
|
+
./scripts/copy-repo-code.sh --all-files
|
|
15
|
+
|
|
16
|
+
Default mode:
|
|
17
|
+
Copies implementation-oriented files only (src, scripts, bins, tests, and
|
|
18
|
+
top-level shell/babashka entrypoints).
|
|
19
|
+
|
|
20
|
+
--all-files:
|
|
21
|
+
Copies all tracked text files in the repository.
|
|
22
|
+
EOF
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
mode="implementation"
|
|
26
|
+
if [ "${1:-}" = "--all-files" ]; then
|
|
27
|
+
mode="all"
|
|
28
|
+
shift
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
if [ "${1:-}" = "-h" ] || [ "${1:-}" = "--help" ]; then
|
|
32
|
+
usage
|
|
33
|
+
exit 0
|
|
34
|
+
fi
|
|
35
|
+
|
|
36
|
+
if [ "$#" -gt 0 ]; then
|
|
37
|
+
usage >&2
|
|
38
|
+
exit 1
|
|
39
|
+
fi
|
|
40
|
+
|
|
41
|
+
if ! command -v git >/dev/null 2>&1; then
|
|
42
|
+
echo "Error: git is required." >&2
|
|
43
|
+
exit 1
|
|
44
|
+
fi
|
|
45
|
+
|
|
46
|
+
if ! command -v pbcopy >/dev/null 2>&1; then
|
|
47
|
+
echo "Error: pbcopy is not available on this system." >&2
|
|
48
|
+
exit 1
|
|
49
|
+
fi
|
|
50
|
+
|
|
51
|
+
script_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
|
52
|
+
repo_root=$(git -C "$script_dir" rev-parse --show-toplevel 2>/dev/null || true)
|
|
53
|
+
if [ -z "$repo_root" ]; then
|
|
54
|
+
echo "Error: could not determine git repo root." >&2
|
|
55
|
+
exit 1
|
|
56
|
+
fi
|
|
57
|
+
|
|
58
|
+
tmp_file=$(mktemp)
|
|
59
|
+
cleanup() {
|
|
60
|
+
rm -f "$tmp_file"
|
|
61
|
+
}
|
|
62
|
+
trap cleanup EXIT
|
|
63
|
+
|
|
64
|
+
copied=0
|
|
65
|
+
skipped=0
|
|
66
|
+
|
|
67
|
+
if [ "$mode" = "all" ]; then
|
|
68
|
+
file_source_cmd=(git -C "$repo_root" ls-files -z)
|
|
69
|
+
else
|
|
70
|
+
file_source_cmd=(
|
|
71
|
+
git -C "$repo_root" ls-files -z --
|
|
72
|
+
"src/**"
|
|
73
|
+
"agentnet/src/**"
|
|
74
|
+
"bin/**"
|
|
75
|
+
"scripts/**"
|
|
76
|
+
"tests/**"
|
|
77
|
+
"agentnet/test/**"
|
|
78
|
+
"*.sh"
|
|
79
|
+
"*.bb"
|
|
80
|
+
"bb.edn"
|
|
81
|
+
"package.json"
|
|
82
|
+
)
|
|
83
|
+
fi
|
|
84
|
+
|
|
85
|
+
while IFS= read -r -d '' file; do
|
|
86
|
+
full_path="$repo_root/$file"
|
|
87
|
+
[ -f "$full_path" ] || continue
|
|
88
|
+
|
|
89
|
+
# Skip files that look binary.
|
|
90
|
+
if ! grep -Iq . "$full_path"; then
|
|
91
|
+
skipped=$((skipped + 1))
|
|
92
|
+
continue
|
|
93
|
+
fi
|
|
94
|
+
|
|
95
|
+
copied=$((copied + 1))
|
|
96
|
+
{
|
|
97
|
+
printf '===== FILE: %s =====\n' "$file"
|
|
98
|
+
cat "$full_path"
|
|
99
|
+
printf '\n\n'
|
|
100
|
+
} >> "$tmp_file"
|
|
101
|
+
done < <("${file_source_cmd[@]}")
|
|
102
|
+
|
|
103
|
+
if [ "$copied" -eq 0 ]; then
|
|
104
|
+
echo "No tracked text files found to copy." >&2
|
|
105
|
+
exit 1
|
|
106
|
+
fi
|
|
107
|
+
|
|
108
|
+
pbcopy < "$tmp_file"
|
|
109
|
+
bytes=$(wc -c < "$tmp_file" | tr -d '[:space:]')
|
|
110
|
+
echo "Copied $copied files ($bytes bytes) to clipboard in '$mode' mode. Skipped $skipped binary files."
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
const os = require('os');
|
|
4
|
+
const fs = require('fs');
|
|
5
|
+
const path = require('path');
|
|
6
|
+
const https = require('https');
|
|
7
|
+
const { execSync } = require('child_process');
|
|
8
|
+
|
|
9
|
+
const BB_VERSION = '1.3.191'; // A stable release version
|
|
10
|
+
|
|
11
|
+
// Map Node.js os/arch to Babashka release asset names
|
|
12
|
+
const platforms = {
|
|
13
|
+
darwin: {
|
|
14
|
+
arm64: `babashka-${BB_VERSION}-macos-aarch64.tar.gz`,
|
|
15
|
+
x64: `babashka-${BB_VERSION}-macos-amd64.tar.gz`,
|
|
16
|
+
},
|
|
17
|
+
linux: {
|
|
18
|
+
arm64: `babashka-${BB_VERSION}-linux-aarch64-static.tar.gz`,
|
|
19
|
+
x64: `babashka-${BB_VERSION}-linux-amd64-static.tar.gz`,
|
|
20
|
+
},
|
|
21
|
+
// We can add Windows later if needed, but it requires zip extraction
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
async function downloadBabashka() {
|
|
25
|
+
const osType = os.platform();
|
|
26
|
+
const arch = os.arch();
|
|
27
|
+
|
|
28
|
+
// If already installed globally and accessible, we can skip downloading!
|
|
29
|
+
try {
|
|
30
|
+
const existing = execSync('which bb', { encoding: 'utf-8' }).trim();
|
|
31
|
+
if (existing) {
|
|
32
|
+
console.log(`[oompa] System Babashka found at ${existing}. Skipping download.`);
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
} catch (e) {
|
|
36
|
+
// Not found, continue with download
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const assetName = platforms[osType]?.[arch];
|
|
40
|
+
if (!assetName) {
|
|
41
|
+
console.warn(`[oompa] Automatic Babashka download not supported for ${osType} ${arch}.`);
|
|
42
|
+
console.warn('[oompa] Please install manually: https://github.com/babashka/babashka#installation');
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const downloadUrl = `https://github.com/babashka/babashka/releases/download/v${BB_VERSION}/${assetName}`;
|
|
47
|
+
const binDir = path.join(__dirname, '..', 'bin');
|
|
48
|
+
const tempTar = path.join(binDir, 'bb.tar.gz');
|
|
49
|
+
const bbDest = path.join(binDir, 'bb');
|
|
50
|
+
|
|
51
|
+
if (!fs.existsSync(binDir)) {
|
|
52
|
+
fs.mkdirSync(binDir, { recursive: true });
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
console.log(`[oompa] Downloading Babashka (native engine) from ${downloadUrl}...`);
|
|
56
|
+
|
|
57
|
+
return new Promise((resolve, reject) => {
|
|
58
|
+
// Handle GitHub's redirects (302)
|
|
59
|
+
https.get(downloadUrl, (res) => {
|
|
60
|
+
if (res.statusCode === 301 || res.statusCode === 302) {
|
|
61
|
+
https.get(res.headers.location, (redirectRes) => {
|
|
62
|
+
const fileStream = fs.createWriteStream(tempTar);
|
|
63
|
+
redirectRes.pipe(fileStream);
|
|
64
|
+
fileStream.on('finish', () => {
|
|
65
|
+
fileStream.close();
|
|
66
|
+
extract(tempTar, binDir, bbDest, resolve, reject);
|
|
67
|
+
});
|
|
68
|
+
}).on('error', reject);
|
|
69
|
+
} else {
|
|
70
|
+
reject(new Error(`Failed to download: ${res.statusCode}`));
|
|
71
|
+
}
|
|
72
|
+
}).on('error', reject);
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function extract(tarPath, destDir, bbDest, resolve, reject) {
|
|
77
|
+
console.log('[oompa] Extracting native binary...');
|
|
78
|
+
try {
|
|
79
|
+
execSync(`tar -xzf ${tarPath} -C ${destDir}`);
|
|
80
|
+
fs.unlinkSync(tarPath); // cleanup
|
|
81
|
+
|
|
82
|
+
if (fs.existsSync(bbDest)) {
|
|
83
|
+
fs.chmodSync(bbDest, 0o755); // Make it executable
|
|
84
|
+
console.log('[oompa] ✅ Native engine installed successfully!');
|
|
85
|
+
resolve();
|
|
86
|
+
} else {
|
|
87
|
+
reject(new Error('Extraction failed: bb binary not found.'));
|
|
88
|
+
}
|
|
89
|
+
} catch (e) {
|
|
90
|
+
reject(e);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
downloadBabashka().catch((err) => {
|
|
95
|
+
console.error(`[oompa] Failed to install Babashka natively: ${err.message}`);
|
|
96
|
+
console.warn('[oompa] Please install manually: https://github.com/babashka/babashka#installation');
|
|
97
|
+
});
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test-harness-resume.sh — Smoke test session create + resume for each harness.
|
|
3
|
+
#
|
|
4
|
+
# For each available harness CLI:
|
|
5
|
+
# 1. Create a session: "Remember the word FOOBAR"
|
|
6
|
+
# 2. Resume that session: "What word were you supposed to remember?"
|
|
7
|
+
# 3. Check the response contains FOOBAR
|
|
8
|
+
#
|
|
9
|
+
# Usage:
|
|
10
|
+
# ./scripts/test-harness-resume.sh # test all available
|
|
11
|
+
# ./scripts/test-harness-resume.sh claude # test one harness
|
|
12
|
+
#
|
|
13
|
+
# Requires the CLI binaries to be on PATH.
|
|
14
|
+
|
|
15
|
+
set -euo pipefail
|
|
16
|
+
|
|
17
|
+
PASS=0
|
|
18
|
+
FAIL=0
|
|
19
|
+
SKIP=0
|
|
20
|
+
|
|
21
|
+
# Unset CLAUDECODE so we can run nested claude sessions
|
|
22
|
+
unset CLAUDECODE 2>/dev/null || true
|
|
23
|
+
|
|
24
|
+
green() { printf "\033[32m%s\033[0m\n" "$*"; }
|
|
25
|
+
red() { printf "\033[31m%s\033[0m\n" "$*"; }
|
|
26
|
+
yellow() { printf "\033[33m%s\033[0m\n" "$*"; }
|
|
27
|
+
|
|
28
|
+
check_binary() {
|
|
29
|
+
command -v "$1" >/dev/null 2>&1
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
# ── Claude ────────────────────────────────────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
test_claude() {
|
|
35
|
+
local uuid
|
|
36
|
+
uuid=$(python3 -c "import uuid; print(str(uuid.uuid4()))")
|
|
37
|
+
|
|
38
|
+
echo " Session ID: $uuid"
|
|
39
|
+
|
|
40
|
+
# Step 1: Create session
|
|
41
|
+
echo " Creating session..."
|
|
42
|
+
local create_out
|
|
43
|
+
create_out=$(claude --dangerously-skip-permissions \
|
|
44
|
+
--session-id "$uuid" \
|
|
45
|
+
-p "Remember the word FOOBAR. Say only: OK, remembered." 2>&1)
|
|
46
|
+
local create_exit=$?
|
|
47
|
+
|
|
48
|
+
if [ $create_exit -ne 0 ]; then
|
|
49
|
+
red " FAIL: create exited $create_exit"
|
|
50
|
+
echo " Output: $create_out"
|
|
51
|
+
return 1
|
|
52
|
+
fi
|
|
53
|
+
echo " Create output: $create_out"
|
|
54
|
+
|
|
55
|
+
# Step 2: Resume session
|
|
56
|
+
echo " Resuming session..."
|
|
57
|
+
local resume_out
|
|
58
|
+
resume_out=$(claude --dangerously-skip-permissions \
|
|
59
|
+
--resume "$uuid" \
|
|
60
|
+
-p "What word were you supposed to remember? Say only the word." 2>&1)
|
|
61
|
+
local resume_exit=$?
|
|
62
|
+
|
|
63
|
+
if [ $resume_exit -ne 0 ]; then
|
|
64
|
+
red " FAIL: resume exited $resume_exit"
|
|
65
|
+
echo " Output: $resume_out"
|
|
66
|
+
return 1
|
|
67
|
+
fi
|
|
68
|
+
echo " Resume output: $resume_out"
|
|
69
|
+
|
|
70
|
+
# Step 3: Check for FOOBAR
|
|
71
|
+
if echo "$resume_out" | grep -qi "FOOBAR"; then
|
|
72
|
+
green " PASS: resume recalled FOOBAR"
|
|
73
|
+
return 0
|
|
74
|
+
else
|
|
75
|
+
red " FAIL: resume did not recall FOOBAR"
|
|
76
|
+
return 1
|
|
77
|
+
fi
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
# ── Opencode ──────────────────────────────────────────────────────────────────
|
|
81
|
+
|
|
82
|
+
test_opencode() {
|
|
83
|
+
# Step 1: Create session, extract session-id from NDJSON
|
|
84
|
+
echo " Creating session..."
|
|
85
|
+
local create_out
|
|
86
|
+
create_out=$(opencode run --format json --print-logs --log-level WARN \
|
|
87
|
+
"Remember the word FOOBAR. Say only: OK, remembered." 2>&1)
|
|
88
|
+
local create_exit=$?
|
|
89
|
+
|
|
90
|
+
if [ $create_exit -ne 0 ]; then
|
|
91
|
+
red " FAIL: create exited $create_exit"
|
|
92
|
+
return 1
|
|
93
|
+
fi
|
|
94
|
+
|
|
95
|
+
# Extract session ID from NDJSON
|
|
96
|
+
local sid
|
|
97
|
+
sid=$(echo "$create_out" | grep -o '"sessionI[Dd]":"[^"]*"' | head -1 | cut -d'"' -f4)
|
|
98
|
+
if [ -z "$sid" ]; then
|
|
99
|
+
# Try alternate pattern
|
|
100
|
+
sid=$(echo "$create_out" | grep -oE 'ses_[A-Za-z0-9]+' | head -1)
|
|
101
|
+
fi
|
|
102
|
+
|
|
103
|
+
if [ -z "$sid" ]; then
|
|
104
|
+
red " FAIL: could not extract session ID"
|
|
105
|
+
echo " Output: $create_out"
|
|
106
|
+
return 1
|
|
107
|
+
fi
|
|
108
|
+
echo " Session ID: $sid"
|
|
109
|
+
|
|
110
|
+
# Step 2: Resume
|
|
111
|
+
echo " Resuming session..."
|
|
112
|
+
local resume_out
|
|
113
|
+
resume_out=$(opencode run --format json --print-logs --log-level WARN \
|
|
114
|
+
-s "$sid" --continue \
|
|
115
|
+
"What word were you supposed to remember? Say only the word." 2>&1)
|
|
116
|
+
local resume_exit=$?
|
|
117
|
+
|
|
118
|
+
if [ $resume_exit -ne 0 ]; then
|
|
119
|
+
red " FAIL: resume exited $resume_exit"
|
|
120
|
+
return 1
|
|
121
|
+
fi
|
|
122
|
+
|
|
123
|
+
if echo "$resume_out" | grep -qi "FOOBAR"; then
|
|
124
|
+
green " PASS: resume recalled FOOBAR"
|
|
125
|
+
return 0
|
|
126
|
+
else
|
|
127
|
+
red " FAIL: resume did not recall FOOBAR"
|
|
128
|
+
echo " Output: $resume_out"
|
|
129
|
+
return 1
|
|
130
|
+
fi
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
# ── Gemini ────────────────────────────────────────────────────────────────────
|
|
134
|
+
|
|
135
|
+
test_gemini() {
|
|
136
|
+
# Gemini uses implicit sessions (by cwd) and --resume latest
|
|
137
|
+
local tmpdir
|
|
138
|
+
tmpdir=$(mktemp -d)
|
|
139
|
+
|
|
140
|
+
echo " Working dir: $tmpdir"
|
|
141
|
+
|
|
142
|
+
# Step 1: Create session
|
|
143
|
+
echo " Creating session..."
|
|
144
|
+
local create_out
|
|
145
|
+
create_out=$(cd "$tmpdir" && gemini --yolo \
|
|
146
|
+
-p "Remember the word FOOBAR. Say only: OK, remembered." 2>&1)
|
|
147
|
+
local create_exit=$?
|
|
148
|
+
|
|
149
|
+
if [ $create_exit -ne 0 ]; then
|
|
150
|
+
red " FAIL: create exited $create_exit"
|
|
151
|
+
rm -rf "$tmpdir"
|
|
152
|
+
return 1
|
|
153
|
+
fi
|
|
154
|
+
echo " Create output: $create_out"
|
|
155
|
+
|
|
156
|
+
# Step 2: Resume
|
|
157
|
+
echo " Resuming session..."
|
|
158
|
+
local resume_out
|
|
159
|
+
resume_out=$(cd "$tmpdir" && gemini --yolo --resume latest \
|
|
160
|
+
-p "What word were you supposed to remember? Say only the word." 2>&1)
|
|
161
|
+
local resume_exit=$?
|
|
162
|
+
|
|
163
|
+
rm -rf "$tmpdir"
|
|
164
|
+
|
|
165
|
+
if [ $resume_exit -ne 0 ]; then
|
|
166
|
+
red " FAIL: resume exited $resume_exit"
|
|
167
|
+
return 1
|
|
168
|
+
fi
|
|
169
|
+
|
|
170
|
+
if echo "$resume_out" | grep -qi "FOOBAR"; then
|
|
171
|
+
green " PASS: resume recalled FOOBAR"
|
|
172
|
+
return 0
|
|
173
|
+
else
|
|
174
|
+
red " FAIL: resume did not recall FOOBAR"
|
|
175
|
+
echo " Output: $resume_out"
|
|
176
|
+
return 1
|
|
177
|
+
fi
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
# ── Codex ─────────────────────────────────────────────────────────────────────
|
|
181
|
+
|
|
182
|
+
test_codex() {
|
|
183
|
+
# Codex has resume-fn: nil — resume is not supported.
|
|
184
|
+
# This is a known limitation, not a silent skip. Fail so it stays visible.
|
|
185
|
+
red " FAIL: codex does not support session resume (resume-fn is nil in registry)"
|
|
186
|
+
return 1
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
# ── Runner ────────────────────────────────────────────────────────────────────
|
|
190
|
+
|
|
191
|
+
run_test() {
|
|
192
|
+
local harness=$1
|
|
193
|
+
echo ""
|
|
194
|
+
echo "=== Testing $harness ==="
|
|
195
|
+
|
|
196
|
+
if ! check_binary "$harness"; then
|
|
197
|
+
yellow " SKIP: $harness not found on PATH"
|
|
198
|
+
SKIP=$((SKIP + 1))
|
|
199
|
+
return
|
|
200
|
+
fi
|
|
201
|
+
|
|
202
|
+
local result=0
|
|
203
|
+
"test_$harness" || result=$?
|
|
204
|
+
|
|
205
|
+
if [ $result -eq 0 ]; then
|
|
206
|
+
PASS=$((PASS + 1))
|
|
207
|
+
else
|
|
208
|
+
FAIL=$((FAIL + 1))
|
|
209
|
+
fi
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
# ── Main ──────────────────────────────────────────────────────────────────────
|
|
213
|
+
|
|
214
|
+
HARNESSES="${1:-claude codex opencode gemini}"
|
|
215
|
+
|
|
216
|
+
echo "Harness Resume Smoke Tests"
|
|
217
|
+
echo "=========================="
|
|
218
|
+
|
|
219
|
+
for h in $HARNESSES; do
|
|
220
|
+
run_test "$h"
|
|
221
|
+
done
|
|
222
|
+
|
|
223
|
+
echo ""
|
|
224
|
+
echo "=========================="
|
|
225
|
+
echo "Results: $(green "$PASS pass"), $(red "$FAIL fail"), $(yellow "$SKIP skip")"
|
|
226
|
+
|
|
227
|
+
if [ $FAIL -gt 0 ]; then
|
|
228
|
+
exit 1
|
|
229
|
+
fi
|