svamp-cli 0.2.106 → 0.2.108
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/loop/bin/inject-loop.mjs +20 -6
- package/bin/skills/loop/bin/loop-init.mjs +21 -8
- package/bin/skills/loop/bin/loop-status.mjs +17 -4
- package/bin/skills/loop/bin/precompact.mjs +5 -2
- package/bin/skills/loop/bin/state-fp.mjs +5 -7
- package/bin/skills/loop/bin/stop-gate.mjs +17 -4
- package/bin/skills/loop/test/test-loop-gate.mjs +55 -18
- package/dist/{agentCommands-V14aFkLp.mjs → agentCommands-Cpq_Yk3h.mjs} +2 -2
- package/dist/{auth-CuCvqsTH.mjs → auth-DBKbI5IE.mjs} +1 -1
- package/dist/cli.mjs +50 -50
- package/dist/{commands-BGO8YrVj.mjs → commands-4JPwWmBF.mjs} +2 -2
- package/dist/{commands-CyVLITvL.mjs → commands-B7z0Yoi7.mjs} +1 -1
- package/dist/{commands-CmnRHJ_u.mjs → commands-CjuVNGG4.mjs} +3 -3
- package/dist/{commands-Dd1kqgBk.mjs → commands-D38_YkF9.mjs} +5 -5
- package/dist/{commands-Cectk2JF.mjs → commands-DOsK9QRw.mjs} +24 -8
- package/dist/{fleet-D7A8whbP.mjs → fleet-D-YD8lYU.mjs} +1 -1
- package/dist/{frpc-D9HsqQ7e.mjs → frpc-C5Bhpsdw.mjs} +1 -1
- package/dist/{headlessCli-Bli0HPzi.mjs → headlessCli-Cwqhpbm1.mjs} +2 -2
- package/dist/index.mjs +1 -1
- package/dist/{package-1Infuho2.mjs → package-D7tAsMPM.mjs} +1 -1
- package/dist/{run-9j9_wlx5.mjs → run-LyzVTe3J.mjs} +59 -34
- package/dist/{run-Cysre19E.mjs → run-TjecLji1.mjs} +1 -1
- package/dist/{serveCommands-JHbRf6Vz.mjs → serveCommands-XlqflmVF.mjs} +5 -5
- package/dist/{serveManager-ClBIhxJK.mjs → serveManager-QZxNxQq0.mjs} +2 -2
- package/dist/{sideband-CnnaC4xC.mjs → sideband-CgiHKPJo.mjs} +1 -1
- package/package.json +1 -1
|
@@ -3,36 +3,50 @@
|
|
|
3
3
|
// Injects the current LOOP.md plus the loop protocol so every iteration starts
|
|
4
4
|
// from the latest task/plan/progress without any daemon re-injection.
|
|
5
5
|
import { readFileSync, existsSync } from 'node:fs';
|
|
6
|
-
import { dirname, join, resolve } from 'node:path';
|
|
6
|
+
import { dirname, join, resolve, relative } from 'node:path';
|
|
7
7
|
import { fileURLToPath } from 'node:url';
|
|
8
8
|
|
|
9
9
|
const HERE = dirname(fileURLToPath(import.meta.url));
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
// Resolve the loop home from the per-process env (SVAMP_SESSION_ID + CLAUDE_PROJECT_DIR)
|
|
11
|
+
// so a hook in the SHARED .claude/settings.json injects each session's OWN LOOP.md.
|
|
12
|
+
// Fallback (manual/standalone): the parent of this copied bin/ dir.
|
|
13
|
+
const SID = process.env.SVAMP_SESSION_ID || null;
|
|
14
|
+
const PROJECT_ENV = process.env.CLAUDE_PROJECT_DIR || null;
|
|
15
|
+
const LOOP_DIR = (SID && PROJECT_ENV) ? join(PROJECT_ENV, '.svamp', SID, 'loop') : resolve(HERE, '..');
|
|
12
16
|
|
|
13
17
|
function readJSON(p, f) { try { return JSON.parse(readFileSync(p, 'utf8')); } catch { return f; } }
|
|
14
18
|
const cfg = readJSON(join(LOOP_DIR, 'loop.config.json'), null);
|
|
15
19
|
const state = readJSON(join(LOOP_DIR, 'loop-state.json'), { active: false });
|
|
16
20
|
if (!cfg || state.active === false) process.exit(0); // no active loop -> inject nothing
|
|
17
21
|
|
|
22
|
+
const PROJECT = (typeof cfg.project_dir === 'string' && cfg.project_dir)
|
|
23
|
+
|| process.env.CLAUDE_PROJECT_DIR || resolve(LOOP_DIR, '..', '..', '..');
|
|
24
|
+
|
|
25
|
+
// LOOP.md now lives inside the (session-scoped) loop dir.
|
|
26
|
+
const loopPath = join(LOOP_DIR, cfg.loop_file || 'LOOP.md');
|
|
18
27
|
let loopMd = '';
|
|
19
|
-
const loopPath = join(PROJECT, cfg.loop_file || 'LOOP.md');
|
|
20
28
|
if (existsSync(loopPath)) loopMd = readFileSync(loopPath, 'utf8');
|
|
21
29
|
|
|
22
30
|
const evaluatorOn = cfg.evaluator?.enabled !== false;
|
|
23
31
|
const oracleCmd = cfg.oracle?.command || cfg.oracle?.test || cfg.oracle?.build || cfg.oracle || null;
|
|
32
|
+
// Paths the agent must use, expressed relative to where it runs (PROJECT).
|
|
33
|
+
const LOOPMD_REL = relative(PROJECT, loopPath) || 'LOOP.md';
|
|
34
|
+
const VERDICT_REL = relative(PROJECT, join(LOOP_DIR, 'evaluator-verdict.json')) || join(LOOP_DIR, 'evaluator-verdict.json');
|
|
35
|
+
const STATEFP_REL = relative(PROJECT, join(LOOP_DIR, 'bin', 'state-fp.mjs')) || join(LOOP_DIR, 'bin', 'state-fp.mjs');
|
|
24
36
|
|
|
25
37
|
const protocol = `# 🔁 LOOP MODE IS ACTIVE
|
|
26
38
|
|
|
27
39
|
You are running inside a loop. Each turn is one iteration. Work toward completing the task described in LOOP.md below. You CANNOT end the loop by simply saying you are done — a Stop gate independently re-checks the exit conditions and will send you back to work if they are not met.
|
|
28
40
|
|
|
41
|
+
Your LOOP.md (durable memory) is at \`${LOOPMD_REL}\` — read and update it there.
|
|
42
|
+
|
|
29
43
|
**Exit conditions (all must hold before the loop ends):**
|
|
30
|
-
${oracleCmd ? `1. The oracle command must pass: \`${oracleCmd}\` (exit 0). The gate runs this itself — do not fake it.\n` : '1. (No oracle configured.)\n'}${evaluatorOn ? `2. An INDEPENDENT evaluator must judge the work \"done\". Before you finish: spawn a fresh subagent (Task tool) named/acting as \`loop-evaluator\` with a skeptical reviewer prompt; give it ONLY the goal (from LOOP.md), the current diff, and the oracle output. Have IT decide. Then record its verdict to
|
|
44
|
+
${oracleCmd ? `1. The oracle command must pass: \`${oracleCmd}\` (exit 0). The gate runs this itself — do not fake it.\n` : '1. (No oracle configured.)\n'}${evaluatorOn ? `2. An INDEPENDENT evaluator must judge the work \"done\". Before you finish: spawn a fresh subagent (Task tool) named/acting as \`loop-evaluator\` with a skeptical reviewer prompt; give it ONLY the goal (from LOOP.md), the current diff, and the oracle output. Have IT decide. Then record its verdict to \`${VERDICT_REL}\`:\n {"verdict":"done"|"continue","reason":"...","guidance":"...","state_fp":"<output of: node ${STATEFP_REL}>"}\n Do NOT grade your own work — the verdict must come from the subagent, and it is only valid for the exact code state it reviewed.\n` : ''}
|
|
31
45
|
|
|
32
46
|
**Each iteration:** read LOOP.md → make real progress on the task → update the Progress section of LOOP.md → run/verify the oracle → (if you believe it's done) get the evaluator verdict → end your turn to be re-checked. Keep LOOP.md current; it is your durable memory across iterations and restarts.
|
|
33
47
|
|
|
34
48
|
---
|
|
35
|
-
## LOOP.md
|
|
49
|
+
## LOOP.md (${LOOPMD_REL})
|
|
36
50
|
${loopMd || '(LOOP.md not found — create it with the task and a Progress section.)'}
|
|
37
51
|
`;
|
|
38
52
|
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
// loop-init.mjs — project the loop config into Claude Code's native files.
|
|
3
|
-
// Generates, in a target project dir:
|
|
4
|
-
// LOOP.md
|
|
3
|
+
// Generates, in a target project dir, a SESSION-SCOPED loop home:
|
|
4
|
+
// .svamp/<sessionId>/loop/{LOOP.md,loop.config.json,loop-state.json,bin/*}
|
|
5
5
|
// .claude/settings.json hooks (Stop gate + LOOP.md injection),
|
|
6
6
|
// and (optional) .claude/agents/loop-evaluator.md.
|
|
7
|
+
// Session-scoping keeps sibling sessions in the same working dir from colliding.
|
|
7
8
|
// Usage:
|
|
8
9
|
// node loop-init.mjs <dir> --task "..." [--criteria "..."] [--oracle "cmd"]
|
|
9
10
|
// [--max N] [--evaluator on|off] [--model NAME] [--loop-file LOOP.md]
|
|
@@ -34,9 +35,15 @@ const criteria = typeof args.criteria === 'string' ? args.criteria : null;
|
|
|
34
35
|
// Owning session id, stamped into loop-state.json so the daemon can scope
|
|
35
36
|
// "loop active" (auto-approve / AskUserQuestion auto-dismiss / loop resume) to the
|
|
36
37
|
// session that started the loop instead of every session sharing this directory.
|
|
37
|
-
const sessionId = typeof args.session === 'string' ? args.session
|
|
38
|
-
|
|
39
|
-
|
|
38
|
+
const sessionId = typeof args.session === 'string' ? args.session
|
|
39
|
+
: (typeof process.env.SVAMP_SESSION_ID === 'string' && process.env.SVAMP_SESSION_ID) ? process.env.SVAMP_SESSION_ID
|
|
40
|
+
: null;
|
|
41
|
+
|
|
42
|
+
// Session-scoped loop home so sessions sharing a working dir never collide:
|
|
43
|
+
// <dir>/.svamp/<sessionId>/loop/ (falls back to <dir>/.svamp/loop/ with no session).
|
|
44
|
+
// The Claude Code hook DEFINITIONS still live in <dir>/.claude/settings.json (that is
|
|
45
|
+
// Claude Code's own config), but the loop's state + memory now live under .svamp.
|
|
46
|
+
const loopDir = sessionId ? join(dir, '.svamp', sessionId, 'loop') : join(dir, '.svamp', 'loop');
|
|
40
47
|
const binDir = join(loopDir, 'bin');
|
|
41
48
|
mkdirSync(binDir, { recursive: true });
|
|
42
49
|
mkdirSync(join(dir, '.claude', 'agents'), { recursive: true });
|
|
@@ -51,6 +58,10 @@ for (const f of ['state-fp.mjs', 'stop-gate.mjs', 'inject-loop.mjs', 'loop-statu
|
|
|
51
58
|
// 2. loop.config.json
|
|
52
59
|
const config = {
|
|
53
60
|
loop_file: loopFile,
|
|
61
|
+
// The repo/working root — copied hook scripts (which live under .svamp/<sid>/loop/bin
|
|
62
|
+
// and resolve their own dir relatively) read this to run the oracle + fingerprint the
|
|
63
|
+
// work product, since their depth no longer encodes the project root.
|
|
64
|
+
project_dir: dir,
|
|
54
65
|
oracle: oracle ? { command: oracle, timeout_sec: 600 } : null,
|
|
55
66
|
evaluator: { enabled: evaluatorOn, model },
|
|
56
67
|
max_iterations: max,
|
|
@@ -67,8 +78,9 @@ writeFileSync(join(loopDir, 'loop-state.json'), JSON.stringify({
|
|
|
67
78
|
...(sessionId ? { session_id: sessionId } : {}),
|
|
68
79
|
}, null, 2));
|
|
69
80
|
|
|
70
|
-
// 4. LOOP.md (agent + human editable) —
|
|
71
|
-
|
|
81
|
+
// 4. LOOP.md (agent + human editable) — lives inside the session-scoped loop dir
|
|
82
|
+
// so two sessions in one working dir keep separate memory. Only written if absent.
|
|
83
|
+
const loopPath = join(loopDir, loopFile);
|
|
72
84
|
if (!existsSync(loopPath)) {
|
|
73
85
|
writeFileSync(loopPath, `# Loop Task
|
|
74
86
|
|
|
@@ -126,7 +138,8 @@ Be strict. A false "done" is far worse than one more iteration.
|
|
|
126
138
|
}
|
|
127
139
|
|
|
128
140
|
console.log(`✅ loop initialised in ${dir}
|
|
129
|
-
|
|
141
|
+
loop dir : ${loopDir}
|
|
142
|
+
task file : ${join(loopDir, loopFile)}
|
|
130
143
|
oracle : ${oracle || '(none)'}
|
|
131
144
|
evaluator : ${evaluatorOn ? 'on' + (model ? ` (${model})` : '') : 'off'}
|
|
132
145
|
max iters : ${max}
|
|
@@ -1,15 +1,28 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
// loop-status.mjs — show a loop's current state + per-iteration history timeline.
|
|
3
|
-
// Usage: node loop-status.mjs [project-dir] [--json] [-n <count>]
|
|
4
|
-
import { readFileSync } from 'node:fs';
|
|
3
|
+
// Usage: node loop-status.mjs [project-dir] [--session <id>] [--json] [-n <count>]
|
|
4
|
+
import { readFileSync, existsSync, readdirSync } from 'node:fs';
|
|
5
5
|
import { join, resolve } from 'node:path';
|
|
6
6
|
|
|
7
7
|
const args = process.argv.slice(2);
|
|
8
8
|
const json = args.includes('--json');
|
|
9
9
|
const nIdx = args.indexOf('-n');
|
|
10
10
|
const limit = nIdx !== -1 ? Number(args[nIdx + 1]) : 20;
|
|
11
|
-
const
|
|
12
|
-
const
|
|
11
|
+
const sIdx = args.indexOf('--session');
|
|
12
|
+
const sessionId = sIdx !== -1 ? args[sIdx + 1] : (process.env.SVAMP_SESSION_ID || null);
|
|
13
|
+
const dir = resolve(args.find((a) => !a.startsWith('-') && a !== String(limit) && a !== sessionId) || process.cwd());
|
|
14
|
+
|
|
15
|
+
// Find the loop home: explicit session → .svamp/<sid>/loop; else any session-scoped
|
|
16
|
+
// loop that has state; else .svamp/loop; else legacy .claude/loop.
|
|
17
|
+
function pickLoopDir(root, sid) {
|
|
18
|
+
const cands = [];
|
|
19
|
+
if (sid) cands.push(join(root, '.svamp', sid, 'loop'));
|
|
20
|
+
try { for (const e of readdirSync(join(root, '.svamp'), { withFileTypes: true })) if (e.isDirectory() && e.name !== sid) cands.push(join(root, '.svamp', e.name, 'loop')); } catch {}
|
|
21
|
+
cands.push(join(root, '.svamp', 'loop'), join(root, '.claude', 'loop'));
|
|
22
|
+
for (const c of cands) if (existsSync(join(c, 'loop-state.json'))) return c;
|
|
23
|
+
return sid ? join(root, '.svamp', sid, 'loop') : cands[cands.length - 1];
|
|
24
|
+
}
|
|
25
|
+
const LOOP_DIR = pickLoopDir(dir, sessionId);
|
|
13
26
|
|
|
14
27
|
const readJSON = (p, f) => { try { return JSON.parse(readFileSync(p, 'utf8')); } catch { return f; } };
|
|
15
28
|
const state = readJSON(join(LOOP_DIR, 'loop.config.json'), null)
|
|
@@ -7,8 +7,11 @@ import { dirname, join, resolve } from 'node:path';
|
|
|
7
7
|
import { fileURLToPath } from 'node:url';
|
|
8
8
|
|
|
9
9
|
const HERE = dirname(fileURLToPath(import.meta.url));
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
// Resolve the loop home from the per-process env (shared-settings safe); fallback to
|
|
11
|
+
// the parent of this copied bin/ dir.
|
|
12
|
+
const SID = process.env.SVAMP_SESSION_ID || null;
|
|
13
|
+
const PROJECT_ENV = process.env.CLAUDE_PROJECT_DIR || null;
|
|
14
|
+
const LOOP_DIR = (SID && PROJECT_ENV) ? join(PROJECT_ENV, '.svamp', SID, 'loop') : resolve(HERE, '..');
|
|
12
15
|
|
|
13
16
|
let active = false;
|
|
14
17
|
try { active = JSON.parse(readFileSync(join(LOOP_DIR, 'loop-state.json'), 'utf8')).active !== false; } catch {}
|
|
@@ -24,12 +24,10 @@ function git(dir, args) {
|
|
|
24
24
|
// LOOP.md/state/verdict change every iteration and would otherwise make every
|
|
25
25
|
// verdict look stale (the agent updates LOOP.md progress as it works).
|
|
26
26
|
function excludedPaths(dir) {
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
} catch {}
|
|
32
|
-
return { loopFile, prefixes: ['.claude/loop/'] };
|
|
27
|
+
// The loop's bookkeeping + memory now live under .svamp/<sid>/loop/ (and legacy
|
|
28
|
+
// loops under .claude/loop/). Both — plus a root LOOP.md a legacy loop may have
|
|
29
|
+
// left — are excluded so the fingerprint tracks the WORK PRODUCT only.
|
|
30
|
+
return { loopFile: 'LOOP.md', prefixes: ['.svamp/', '.claude/loop/'] };
|
|
33
31
|
}
|
|
34
32
|
|
|
35
33
|
const WALK_SKIP = new Set(['.git', 'node_modules', '.svamp', '.expo', 'dist', 'build']);
|
|
@@ -80,7 +78,7 @@ export function stateFingerprint(dir) {
|
|
|
80
78
|
if (!head) return walkFingerprint(dir, isExcluded); // non-git / no commit yet
|
|
81
79
|
// Exclude bookkeeping/memory from the tracked diff too (in case they're committed).
|
|
82
80
|
const diff = git(dir, ['-c', 'core.quotepath=false', 'diff', 'HEAD', '--',
|
|
83
|
-
'.', `:(exclude)${loopFile}`, ':(exclude).claude/loop']);
|
|
81
|
+
'.', `:(exclude)${loopFile}`, ':(exclude).claude/loop', ':(exclude).svamp']);
|
|
84
82
|
// KNOWN LIMITATION (review #8): --exclude-standard omits gitignored files, so a
|
|
85
83
|
// loop whose work product lands in a gitignored path (e.g. dist/) won't change
|
|
86
84
|
// the fingerprint. Acceptable since work products are normally tracked; loops
|
|
@@ -13,13 +13,19 @@
|
|
|
13
13
|
// budgets so it can never block forever, even if loop.config.json is hand-edited.
|
|
14
14
|
import { execSync } from 'node:child_process';
|
|
15
15
|
import { readFileSync, writeFileSync, renameSync, existsSync, appendFileSync, statSync } from 'node:fs';
|
|
16
|
-
import { dirname, join, resolve } from 'node:path';
|
|
16
|
+
import { dirname, join, resolve, relative } from 'node:path';
|
|
17
17
|
import { fileURLToPath } from 'node:url';
|
|
18
18
|
import { stateFingerprint } from './state-fp.mjs';
|
|
19
19
|
|
|
20
20
|
const HERE = dirname(fileURLToPath(import.meta.url));
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
// Resolve the loop home from the per-process env the daemon injects
|
|
22
|
+
// (SVAMP_SESSION_ID + Claude Code's CLAUDE_PROJECT_DIR) so a hook defined in the
|
|
23
|
+
// SHARED .claude/settings.json gates each session against ITS OWN .svamp/<sid>/loop/
|
|
24
|
+
// — a sibling session in the same dir resolves a different (or empty) loop and no-ops.
|
|
25
|
+
// Fallback (manual run / standalone): the parent of this copied bin/ dir.
|
|
26
|
+
const SID = process.env.SVAMP_SESSION_ID || null;
|
|
27
|
+
const PROJECT_ENV = process.env.CLAUDE_PROJECT_DIR || null;
|
|
28
|
+
const LOOP_DIR = (SID && PROJECT_ENV) ? join(PROJECT_ENV, '.svamp', SID, 'loop') : resolve(HERE, '..');
|
|
23
29
|
const CONFIG = join(LOOP_DIR, 'loop.config.json');
|
|
24
30
|
const STATE = join(LOOP_DIR, 'loop-state.json');
|
|
25
31
|
const VERDICT = join(LOOP_DIR, 'evaluator-verdict.json');
|
|
@@ -73,6 +79,11 @@ const state = readJSON(STATE, { active: false, iteration: 0 });
|
|
|
73
79
|
// Safe no-op if there is no active loop here.
|
|
74
80
|
if (!cfg || state.active === false) allow();
|
|
75
81
|
|
|
82
|
+
// The repo/working root: where the oracle runs and the work-product is fingerprinted.
|
|
83
|
+
// Stamped by loop-init (loop dir depth no longer encodes it); fall back to env/cwd.
|
|
84
|
+
const PROJECT = (typeof cfg.project_dir === 'string' && cfg.project_dir)
|
|
85
|
+
|| process.env.CLAUDE_PROJECT_DIR || resolve(LOOP_DIR, '..', '..', '..');
|
|
86
|
+
|
|
76
87
|
// Hard fallback ceiling: a hand-edited/null max_iterations must never let the
|
|
77
88
|
// gate block forever (the hook would trap the session). Default to 200.
|
|
78
89
|
const HARD_MAX = 200;
|
|
@@ -164,7 +175,9 @@ writeJSONAtomic(STATE, { ...state, iteration: nextIter, phase: 'continue',
|
|
|
164
175
|
appendHistory({ ts: now, iteration: nextIter, decision: 'continue', oracle: oraclePass, evaluator: evaluatorPass, detail: oraclePass ? evaluatorDetail : oracleDetail });
|
|
165
176
|
|
|
166
177
|
const remaining = max != null ? ` (iteration ${nextIter}/${max})` : '';
|
|
178
|
+
const VERDICT_REL = relative(PROJECT, VERDICT) || VERDICT;
|
|
179
|
+
const STATEFP_REL = relative(PROJECT, join(LOOP_DIR, 'bin', 'state-fp.mjs')) || join(LOOP_DIR, 'bin', 'state-fp.mjs');
|
|
167
180
|
const evalHint = evaluatorOn && !evaluatorPass && oraclePass
|
|
168
|
-
? `\n\nThe code looks like it may be ready, but you must get an independent verdict: spawn the \`loop-evaluator\` subagent (or a fresh Task agent with a skeptical reviewer prompt) to judge the current diff against LOOP.md, then write its result to
|
|
181
|
+
? `\n\nThe code looks like it may be ready, but you must get an independent verdict: spawn the \`loop-evaluator\` subagent (or a fresh Task agent with a skeptical reviewer prompt) to judge the current diff against LOOP.md, then write its result to \`${VERDICT_REL}\` as {"verdict":"done"|"continue","reason":"...","guidance":"...","state_fp":"<run: node ${STATEFP_REL}>"}. Do not write the verdict yourself.`
|
|
169
182
|
: '';
|
|
170
183
|
block(`Loop is not complete${remaining}. Keep working on the task in LOOP.md.\n\n${oracleDetail}\n${evaluatorOn ? '\n' + evaluatorDetail : ''}${evalHint}\n\nUpdate LOOP.md progress, fix the blocking issue, then finish your turn again to be re-checked.`);
|
|
@@ -12,6 +12,10 @@ const INIT = resolve(HERE, '..', 'bin', 'loop-init.mjs');
|
|
|
12
12
|
const STATE_FP = resolve(HERE, '..', 'bin', 'state-fp.mjs');
|
|
13
13
|
const node = process.execPath;
|
|
14
14
|
|
|
15
|
+
// Loops are SESSION-SCOPED under .svamp/<sid>/loop. Run every project with a session
|
|
16
|
+
// id so the test exercises the real production layout + env-resolved hooks.
|
|
17
|
+
const SID = 'test-session-0000';
|
|
18
|
+
|
|
15
19
|
let pass = 0, fail = 0;
|
|
16
20
|
function ok(cond, msg) { if (cond) { pass++; console.log(` ✓ ${msg}`); } else { fail++; console.log(` ✗ ${msg}`); } }
|
|
17
21
|
|
|
@@ -20,17 +24,23 @@ function sh(dir, cmd) { return execFileSync('bash', ['-lc', cmd], { cwd: dir, en
|
|
|
20
24
|
|
|
21
25
|
// Run the COPIED stop-gate inside the project; return {blocked, reason, exit}.
|
|
22
26
|
function runGate(dir, stopHookActive = false, transcriptPath = undefined) {
|
|
23
|
-
const gate = join(dir, '.
|
|
27
|
+
const gate = join(dir, '.svamp', SID, 'loop', 'bin', 'stop-gate.mjs');
|
|
24
28
|
let out = '', code = 0;
|
|
25
29
|
try {
|
|
26
|
-
|
|
30
|
+
// The daemon injects SVAMP_SESSION_ID + CLAUDE_PROJECT_DIR; the gate resolves its
|
|
31
|
+
// session-scoped loop dir from them (so a shared settings.json gates the right loop).
|
|
32
|
+
out = execFileSync(node, [gate], {
|
|
33
|
+
input: JSON.stringify({ stop_hook_active: stopHookActive, hook_event_name: 'Stop', cwd: dir, transcript_path: transcriptPath }),
|
|
34
|
+
encoding: 'utf8',
|
|
35
|
+
env: { ...process.env, SVAMP_SESSION_ID: SID, CLAUDE_PROJECT_DIR: dir },
|
|
36
|
+
});
|
|
27
37
|
} catch (e) { out = e.stdout || ''; code = e.status || 1; }
|
|
28
38
|
let parsed = null; try { parsed = JSON.parse(out); } catch {}
|
|
29
39
|
return { blocked: parsed?.decision === 'block', reason: parsed?.reason || '', exit: code, raw: out };
|
|
30
40
|
}
|
|
31
41
|
function fp(dir) { return execFileSync(node, [STATE_FP, dir], { encoding: 'utf8' }).trim(); }
|
|
32
|
-
function readState(dir) { return JSON.parse(readFileSync(join(dir, '.
|
|
33
|
-
function writeVerdict(dir, obj) { writeFileSync(join(dir, '.
|
|
42
|
+
function readState(dir) { return JSON.parse(readFileSync(join(dir, '.svamp', SID, 'loop', 'loop-state.json'), 'utf8')); }
|
|
43
|
+
function writeVerdict(dir, obj) { writeFileSync(join(dir, '.svamp', SID, 'loop', 'evaluator-verdict.json'), JSON.stringify(obj)); }
|
|
34
44
|
|
|
35
45
|
function newProject({ evaluator = 'on', max = 20 } = {}) {
|
|
36
46
|
const dir = mkdtempSync(join(tmpdir(), 'loopgate-'));
|
|
@@ -38,7 +48,7 @@ function newProject({ evaluator = 'on', max = 20 } = {}) {
|
|
|
38
48
|
git(dir, ['config', 'user.email', 't@t']); git(dir, ['config', 'user.name', 't']);
|
|
39
49
|
writeFileSync(join(dir, 'answer.txt'), 'TODO\n');
|
|
40
50
|
git(dir, ['add', '-A']); git(dir, ['commit', '-qm', 'init']);
|
|
41
|
-
execFileSync(node, [INIT, dir, '--task', 'make answer.txt contain DONE',
|
|
51
|
+
execFileSync(node, [INIT, dir, '--session', SID, '--task', 'make answer.txt contain DONE',
|
|
42
52
|
'--oracle', 'grep -q DONE answer.txt', '--evaluator', evaluator, '--max', String(max)],
|
|
43
53
|
{ encoding: 'utf8' });
|
|
44
54
|
return dir;
|
|
@@ -115,7 +125,7 @@ try {
|
|
|
115
125
|
// ---- Test 8: inactive loop -> no-op allow ----
|
|
116
126
|
console.log('Test 8: inactive loop is a no-op');
|
|
117
127
|
{ const d = newProject({ evaluator: 'off' }); dirs.push(d);
|
|
118
|
-
const sp = join(d, '.
|
|
128
|
+
const sp = join(d, '.svamp', SID, 'loop', 'loop-state.json');
|
|
119
129
|
const s = JSON.parse(readFileSync(sp, 'utf8')); s.active = false; writeFileSync(sp, JSON.stringify(s));
|
|
120
130
|
const r = runGate(d);
|
|
121
131
|
ok(!r.blocked, 'gate is a safe no-op when loop inactive');
|
|
@@ -127,8 +137,9 @@ try {
|
|
|
127
137
|
{ const d = newProject(); dirs.push(d);
|
|
128
138
|
writeFileSync(join(d, 'answer.txt'), 'DONE\n');
|
|
129
139
|
writeVerdict(d, { verdict: 'done', reason: 'complete', state_fp: fp(d) });
|
|
130
|
-
// Agent updates its progress memory AFTER getting the verdict:
|
|
131
|
-
|
|
140
|
+
// Agent updates its progress memory (session-scoped LOOP.md) AFTER getting the verdict:
|
|
141
|
+
const md = join(d, '.svamp', SID, 'loop', 'LOOP.md');
|
|
142
|
+
writeFileSync(md, readFileSync(md, 'utf8') + '\n- iter note: done\n');
|
|
132
143
|
const r = runGate(d);
|
|
133
144
|
ok(!r.blocked, 'gate still allows stop after LOOP.md was updated post-verdict');
|
|
134
145
|
}
|
|
@@ -137,9 +148,9 @@ try {
|
|
|
137
148
|
console.log('Test 10: runtime budget backstop allows stop');
|
|
138
149
|
{ const d = newProject({ evaluator: 'off', max: 9999 }); dirs.push(d); // oracle keeps failing
|
|
139
150
|
// set a tiny runtime budget and an old start time
|
|
140
|
-
const cfgP = join(d, '.
|
|
151
|
+
const cfgP = join(d, '.svamp', SID, 'loop', 'loop.config.json');
|
|
141
152
|
const cfg = JSON.parse(readFileSync(cfgP, 'utf8')); cfg.budget = { max_runtime_sec: 1 }; writeFileSync(cfgP, JSON.stringify(cfg));
|
|
142
|
-
const spP = join(d, '.
|
|
153
|
+
const spP = join(d, '.svamp', SID, 'loop', 'loop-state.json');
|
|
143
154
|
const sp = JSON.parse(readFileSync(spP, 'utf8')); sp.started_at = new Date(Date.now() - 5000).toISOString(); writeFileSync(spP, JSON.stringify(sp));
|
|
144
155
|
const r = runGate(d);
|
|
145
156
|
ok(!r.blocked, 'gate allows stop once runtime budget is exceeded');
|
|
@@ -149,7 +160,7 @@ try {
|
|
|
149
160
|
// ---- Test 11: per-iteration history trail is recorded ----
|
|
150
161
|
console.log('Test 11: history.jsonl audit trail');
|
|
151
162
|
{ const d = newProject({ evaluator: 'off' }); dirs.push(d);
|
|
152
|
-
const histPath = join(d, '.
|
|
163
|
+
const histPath = join(d, '.svamp', SID, 'loop', 'history.jsonl');
|
|
153
164
|
runGate(d); // oracle fails -> continue entry
|
|
154
165
|
let lines = readFileSync(histPath, 'utf8').split('\n').filter(Boolean).map((l) => JSON.parse(l));
|
|
155
166
|
ok(lines.length === 1 && lines[0].decision === 'continue', 'continue iteration recorded in history');
|
|
@@ -162,7 +173,7 @@ try {
|
|
|
162
173
|
// ---- Test 12: token budget from transcript gives up ----
|
|
163
174
|
console.log('Test 12: token budget backstop (from transcript)');
|
|
164
175
|
{ const d = newProject({ evaluator: 'off', max: 9999 }); dirs.push(d); // oracle keeps failing
|
|
165
|
-
const cfgP = join(d, '.
|
|
176
|
+
const cfgP = join(d, '.svamp', SID, 'loop', 'loop.config.json');
|
|
166
177
|
const cfg = JSON.parse(readFileSync(cfgP, 'utf8')); cfg.budget = { max_tokens: 1000 }; writeFileSync(cfgP, JSON.stringify(cfg));
|
|
167
178
|
const tp = join(d, 'transcript.jsonl');
|
|
168
179
|
writeFileSync(tp, [
|
|
@@ -178,9 +189,9 @@ try {
|
|
|
178
189
|
// ---- Test 13: null max_iterations must NOT block forever (hard fallback ceiling) ----
|
|
179
190
|
console.log('Test 13: null max_iterations is bounded by hard ceiling');
|
|
180
191
|
{ const d = newProject({ evaluator: 'off' }); dirs.push(d); // oracle keeps failing
|
|
181
|
-
const cfgP = join(d, '.
|
|
192
|
+
const cfgP = join(d, '.svamp', SID, 'loop', 'loop.config.json');
|
|
182
193
|
const cfg = JSON.parse(readFileSync(cfgP, 'utf8')); cfg.max_iterations = null; writeFileSync(cfgP, JSON.stringify(cfg));
|
|
183
|
-
const spP = join(d, '.
|
|
194
|
+
const spP = join(d, '.svamp', SID, 'loop', 'loop-state.json');
|
|
184
195
|
const sp = JSON.parse(readFileSync(spP, 'utf8')); sp.iteration = 200; writeFileSync(spP, JSON.stringify(sp)); // at the hard ceiling
|
|
185
196
|
const r = runGate(d);
|
|
186
197
|
ok(!r.blocked, 'gate allows stop at the hard fallback ceiling even with null max_iterations');
|
|
@@ -199,7 +210,7 @@ try {
|
|
|
199
210
|
// ---- Test 15: oracle that exits 0 with >1MB output is a PASS, not a failure (#3) ----
|
|
200
211
|
console.log('Test 15: large passing-oracle output is not misread as failure');
|
|
201
212
|
{ const d = newProject({ evaluator: 'off' }); dirs.push(d);
|
|
202
|
-
const cfgP = join(d, '.
|
|
213
|
+
const cfgP = join(d, '.svamp', SID, 'loop', 'loop.config.json');
|
|
203
214
|
const cfg = JSON.parse(readFileSync(cfgP, 'utf8'));
|
|
204
215
|
cfg.oracle = { command: `node -e "console.log('x'.repeat(2000000)); process.exit(0)"` }; // 2MB, exit 0
|
|
205
216
|
writeFileSync(cfgP, JSON.stringify(cfg));
|
|
@@ -210,9 +221,9 @@ try {
|
|
|
210
221
|
// ---- Test 16: huge finite max_iterations is CLAMPED to the hard ceiling (#1) ----
|
|
211
222
|
console.log('Test 16: huge max_iterations is clamped');
|
|
212
223
|
{ const d = newProject({ evaluator: 'off' }); dirs.push(d); // oracle keeps failing
|
|
213
|
-
const cfgP = join(d, '.
|
|
224
|
+
const cfgP = join(d, '.svamp', SID, 'loop', 'loop.config.json');
|
|
214
225
|
const cfg = JSON.parse(readFileSync(cfgP, 'utf8')); cfg.max_iterations = 1e9; writeFileSync(cfgP, JSON.stringify(cfg));
|
|
215
|
-
const spP = join(d, '.
|
|
226
|
+
const spP = join(d, '.svamp', SID, 'loop', 'loop-state.json');
|
|
216
227
|
const sp = JSON.parse(readFileSync(spP, 'utf8')); sp.iteration = 201; writeFileSync(spP, JSON.stringify(sp));
|
|
217
228
|
const r = runGate(d);
|
|
218
229
|
ok(!r.blocked && /max_iterations \(200\)/.test(readState(d).gave_up_reason || ''), 'max_iterations 1e9 clamped to 200 → gives up');
|
|
@@ -221,7 +232,7 @@ try {
|
|
|
221
232
|
// ---- Test 17: non-numeric iteration is coerced (cap stays alive) (#2) ----
|
|
222
233
|
console.log('Test 17: corrupt non-numeric iteration is coerced');
|
|
223
234
|
{ const d = newProject({ evaluator: 'off' }); dirs.push(d);
|
|
224
|
-
const spP = join(d, '.
|
|
235
|
+
const spP = join(d, '.svamp', SID, 'loop', 'loop-state.json');
|
|
225
236
|
const sp = JSON.parse(readFileSync(spP, 'utf8')); sp.iteration = 'not-a-number'; writeFileSync(spP, JSON.stringify(sp));
|
|
226
237
|
runGate(d); // oracle fails -> blocks, iteration coerced to 0+1
|
|
227
238
|
ok(readState(d).iteration === 1, 'non-numeric iteration coerced to a number (cap not disabled)');
|
|
@@ -239,6 +250,32 @@ try {
|
|
|
239
250
|
ok(fpA !== fpB, 'changing a symlink target changes the non-git fingerprint (no stale-verdict hole)');
|
|
240
251
|
}
|
|
241
252
|
|
|
253
|
+
// ---- Test 19: two sessions in ONE dir get isolated loops; a non-loop sibling no-ops ----
|
|
254
|
+
console.log('Test 19: session-scoped isolation in a shared working dir');
|
|
255
|
+
{ const dir = mkdtempSync(join(tmpdir(), 'loopshare-')); dirs.push(dir);
|
|
256
|
+
git(dir, ['init', '-q']); git(dir, ['config', 'user.email', 't@t']); git(dir, ['config', 'user.name', 't']);
|
|
257
|
+
writeFileSync(join(dir, 'answer.txt'), 'TODO\n');
|
|
258
|
+
git(dir, ['add', '-A']); git(dir, ['commit', '-qm', 'init']);
|
|
259
|
+
const A = 'sess-aaaa', B = 'sess-bbbb';
|
|
260
|
+
// A failing oracle so the gate blocks (and increments) rather than completing instantly.
|
|
261
|
+
const O = 'grep -q DONE answer.txt';
|
|
262
|
+
execFileSync(node, [INIT, dir, '--session', A, '--task', 'task A', '--oracle', O, '--evaluator', 'off', '--max', '20'], { encoding: 'utf8' });
|
|
263
|
+
execFileSync(node, [INIT, dir, '--session', B, '--task', 'task B', '--oracle', O, '--evaluator', 'off', '--max', '20'], { encoding: 'utf8' });
|
|
264
|
+
const runAs = (sid) => {
|
|
265
|
+
// The SHARED settings.json now points at B's gate copy; run THAT script as each session.
|
|
266
|
+
const gate = join(dir, '.svamp', B, 'loop', 'bin', 'stop-gate.mjs');
|
|
267
|
+
let out = ''; try { out = execFileSync(node, [gate], { input: '{}', encoding: 'utf8', env: { ...process.env, SVAMP_SESSION_ID: sid, CLAUDE_PROJECT_DIR: dir } }); } catch (e) { out = e.stdout || ''; }
|
|
268
|
+
let p = null; try { p = JSON.parse(out); } catch {}
|
|
269
|
+
return { blocked: p?.decision === 'block', raw: out };
|
|
270
|
+
};
|
|
271
|
+
const stateOf = (sid) => JSON.parse(readFileSync(join(dir, '.svamp', sid, 'loop', 'loop-state.json'), 'utf8'));
|
|
272
|
+
runAs(A); // oracle-less + evaluator-off: not done -> blocks, increments A only
|
|
273
|
+
ok(stateOf(A).iteration === 1, 'session A loop advanced to iter 1');
|
|
274
|
+
ok((stateOf(B).iteration ?? 0) === 0, 'session B loop untouched (no cross-talk)');
|
|
275
|
+
const sib = runAs('sess-cccc-noloop'); // a sibling with no loop
|
|
276
|
+
ok(!sib.blocked && sib.raw === '', 'non-loop sibling session no-ops (allows stop)');
|
|
277
|
+
}
|
|
278
|
+
|
|
242
279
|
console.log(`\n${fail === 0 ? '✅' : '❌'} ${pass} passed, ${fail} failed`);
|
|
243
280
|
process.exit(fail === 0 ? 0 : 1);
|
|
244
281
|
} finally {
|
|
@@ -148,7 +148,7 @@ async function sessionBroadcast(action, args) {
|
|
|
148
148
|
console.log(`Broadcast sent: ${action}`);
|
|
149
149
|
}
|
|
150
150
|
async function connectToMachineService() {
|
|
151
|
-
const { connectAndGetMachine } = await import('./commands-
|
|
151
|
+
const { connectAndGetMachine } = await import('./commands-DOsK9QRw.mjs');
|
|
152
152
|
return connectAndGetMachine();
|
|
153
153
|
}
|
|
154
154
|
async function inboxSend(targetSessionId, opts) {
|
|
@@ -165,7 +165,7 @@ async function inboxSend(targetSessionId, opts) {
|
|
|
165
165
|
}
|
|
166
166
|
const { server, machine } = await connectToMachineService();
|
|
167
167
|
try {
|
|
168
|
-
const { resolveSessionId } = await import('./commands-
|
|
168
|
+
const { resolveSessionId } = await import('./commands-DOsK9QRw.mjs');
|
|
169
169
|
const sessions = await machine.listSessions();
|
|
170
170
|
const match = resolveSessionId(sessions, targetSessionId);
|
|
171
171
|
const fullTargetId = match.sessionId;
|