svamp-cli 0.2.118 → 0.2.120
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/loop/SKILL.md +1 -1
- package/bin/skills/loop/bin/checklist.mjs +94 -0
- package/bin/skills/loop/bin/loop-init.mjs +4 -1
- package/bin/skills/loop/bin/stop-gate.mjs +24 -3
- package/bin/skills/loop/test/test-checklist.mjs +65 -0
- package/dist/{agentCommands-BTkU0PQb.mjs → agentCommands-DIfofhT-.mjs} +4 -4
- package/dist/{auth-DimbhOMP.mjs → auth-zcVYRjJ8.mjs} +1 -1
- package/dist/cli.mjs +78 -54
- package/dist/{commands-Bw2V_awn.mjs → commands-BFpGoTq8.mjs} +1 -1
- package/dist/{commands-BJfRk4KT.mjs → commands-BOCpNFZX.mjs} +2 -2
- package/dist/{commands-3FsdWpJO.mjs → commands-BYsoZ6Fn.mjs} +2 -2
- package/dist/{commands-BEjlVtvS.mjs → commands-CPsUPDnI.mjs} +1 -1
- package/dist/{commands-B5rek8XG.mjs → commands-CuY9G_88.mjs} +94 -14
- package/dist/{commands-fbQs3jLx.mjs → commands-DOtJfJG7.mjs} +5 -5
- package/dist/{fleet-D5dNVJIp.mjs → fleet-CEAB4PS0.mjs} +1 -1
- package/dist/{frpc-CdcXdQde.mjs → frpc-DlsBjcRf.mjs} +1 -1
- package/dist/{headlessCli-Lk2OU1Gh.mjs → headlessCli-DuY4WQVa.mjs} +2 -2
- package/dist/index.mjs +1 -1
- package/dist/{package-CxWiFy_P.mjs → package-DS33M8qt.mjs} +1 -1
- package/dist/{run-DIoR81Ev.mjs → run-6Pp8yTPw.mjs} +1 -1
- package/dist/{run-9C2ogsuu.mjs → run-C4BsPJ_p.mjs} +832 -66
- package/dist/{serveCommands-BqApmjmR.mjs → serveCommands-UDH0noeg.mjs} +5 -5
- package/dist/{serveManager-XsXnI804.mjs → serveManager-QZooKtI4.mjs} +2 -2
- package/dist/{sideband-BHWq1P8E.mjs → sideband-Wfli3n7U.mjs} +1 -1
- package/package.json +1 -1
package/bin/skills/loop/SKILL.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: loop
|
|
3
|
-
version: 0.
|
|
3
|
+
version: 0.4.0
|
|
4
4
|
description: Run a task as a reliable, self-verifying loop — iterate until objective exit conditions are met, with an independent evaluator instead of self-judging. Use when a task needs repeated iterations until "done" (fix until tests pass, refactor until clean, build until a spec is met, autonomous long-running work).
|
|
5
5
|
---
|
|
6
6
|
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
// checklist.mjs — the loop-engineering task/criteria atom.
|
|
2
|
+
// See docs/svamp-loop-engineering-vision.md. A checklist is a list of evaluable
|
|
3
|
+
// items persisted as JSON, in two layered scopes:
|
|
4
|
+
// session: <loopDir>/checklist.json (this session's goal)
|
|
5
|
+
// project: <projectDir>/.svamp/checklist.json (durable invariants, all sessions)
|
|
6
|
+
// The effective checklist a session enforces = project ∪ session. Each item is
|
|
7
|
+
// oracle-checked (a pass/fail command) or agent-evaluated. Done ≠ gone: a passing
|
|
8
|
+
// item STAYS in the list and is re-verified every loop, so it can regress to failing.
|
|
9
|
+
// The supervisor only lets the turn end when ALL effective items are passing.
|
|
10
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'node:fs';
|
|
11
|
+
import { join, dirname } from 'node:path';
|
|
12
|
+
import { execSync } from 'node:child_process';
|
|
13
|
+
|
|
14
|
+
// The session checklist lives at <project>/.svamp/<sid>/checklist.json — a SIBLING of
|
|
15
|
+
// the loop/ dir, not inside it (the checklist is session-scoped, independent of any
|
|
16
|
+
// active loop, and the UI/sync writes it there too: sync/ops.ts sessionChecklistRel).
|
|
17
|
+
export function sessionChecklistPath(loopDir) { return join(dirname(loopDir), 'checklist.json'); }
|
|
18
|
+
export function projectChecklistPath(projectDir) { return join(projectDir, '.svamp', 'checklist.json'); }
|
|
19
|
+
|
|
20
|
+
const STATUSES = ['pending', 'passing', 'failing'];
|
|
21
|
+
|
|
22
|
+
function readOne(path, scope) {
|
|
23
|
+
try {
|
|
24
|
+
if (!existsSync(path)) return [];
|
|
25
|
+
const j = JSON.parse(readFileSync(path, 'utf-8'));
|
|
26
|
+
const items = Array.isArray(j) ? j : (Array.isArray(j?.items) ? j.items : []);
|
|
27
|
+
return items.map((it, i) => ({
|
|
28
|
+
id: typeof it.id === 'string' && it.id ? it.id : `${scope}-${i}`,
|
|
29
|
+
text: String(it?.text ?? '').trim(),
|
|
30
|
+
// 'done' is a friendly alias for 'passing'.
|
|
31
|
+
status: it?.status === 'done' ? 'passing' : (STATUSES.includes(it?.status) ? it.status : 'pending'),
|
|
32
|
+
oracle: typeof it?.oracle === 'string' && it.oracle.trim() ? it.oracle.trim() : null,
|
|
33
|
+
scope,
|
|
34
|
+
})).filter((it) => it.text);
|
|
35
|
+
} catch { return []; }
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/** Effective checklist = project invariants ∪ session goals (project first, then session). */
|
|
39
|
+
export function readEffectiveChecklist(loopDir, projectDir) {
|
|
40
|
+
return [
|
|
41
|
+
...readOne(projectChecklistPath(projectDir), 'project'),
|
|
42
|
+
...readOne(sessionChecklistPath(loopDir), 'session'),
|
|
43
|
+
];
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Run each item's oracle (if it has one) and return items with refreshed status.
|
|
48
|
+
* Items WITHOUT an oracle keep their stored status (those are agent-evaluated, not
|
|
49
|
+
* machine-checkable here). This is the per-loop regression check: a previously
|
|
50
|
+
* passing item whose oracle now fails flips to 'failing'.
|
|
51
|
+
*/
|
|
52
|
+
export function evaluateChecklist(items, projectDir, timeoutSec = 600) {
|
|
53
|
+
return items.map((it) => {
|
|
54
|
+
if (!it.oracle) return it;
|
|
55
|
+
try {
|
|
56
|
+
execSync(it.oracle, { cwd: projectDir, stdio: 'pipe', maxBuffer: 16 * 1024 * 1024, timeout: timeoutSec * 1000 });
|
|
57
|
+
return { ...it, status: 'passing' };
|
|
58
|
+
} catch {
|
|
59
|
+
return { ...it, status: 'failing' };
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/** True when every effective item is passing (an empty list is trivially satisfied). */
|
|
65
|
+
export function allPassing(items) {
|
|
66
|
+
return items.length === 0 ? true : items.every((it) => it.status === 'passing');
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** A one-line summary for the gate's history/state. */
|
|
70
|
+
export function summarize(items) {
|
|
71
|
+
const pass = items.filter((i) => i.status === 'passing').length;
|
|
72
|
+
const fail = items.filter((i) => i.status === 'failing').length;
|
|
73
|
+
return `${pass}/${items.length} passing${fail ? `, ${fail} failing` : ''}`;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Persist refreshed statuses back to each scope's file, so the UI renderer + the
|
|
78
|
+
* agent see live state. Writes the canonical { items: [...] } shape (scope stripped —
|
|
79
|
+
* it's implied by which file the item lives in).
|
|
80
|
+
*/
|
|
81
|
+
export function writeChecklistStatuses(loopDir, projectDir, items) {
|
|
82
|
+
const targets = [
|
|
83
|
+
['session', sessionChecklistPath(loopDir)],
|
|
84
|
+
['project', projectChecklistPath(projectDir)],
|
|
85
|
+
];
|
|
86
|
+
for (const [scope, path] of targets) {
|
|
87
|
+
const scoped = items.filter((it) => it.scope === scope).map(({ scope: _s, ...rest }) => rest);
|
|
88
|
+
if (scoped.length === 0 && !existsSync(path)) continue; // don't create empty files
|
|
89
|
+
try {
|
|
90
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
91
|
+
writeFileSync(path, JSON.stringify({ items: scoped }, null, 2));
|
|
92
|
+
} catch { /* best-effort persistence */ }
|
|
93
|
+
}
|
|
94
|
+
}
|
|
@@ -58,7 +58,7 @@ rmSync(join(loopDir, 'evaluator-verdict.json'), { force: true });
|
|
|
58
58
|
rmSync(join(loopDir, 'history.jsonl'), { force: true });
|
|
59
59
|
|
|
60
60
|
// 1. Copy hook scripts so the project is self-contained.
|
|
61
|
-
for (const f of ['state-fp.mjs', 'stop-gate.mjs', 'inject-loop.mjs', 'loop-status.mjs', 'precompact.mjs']) {
|
|
61
|
+
for (const f of ['state-fp.mjs', 'stop-gate.mjs', 'checklist.mjs', 'inject-loop.mjs', 'loop-status.mjs', 'precompact.mjs']) {
|
|
62
62
|
const dest = join(binDir, f);
|
|
63
63
|
copyFileSync(join(HERE, f), dest);
|
|
64
64
|
try { chmodSync(dest, 0o755); } catch {}
|
|
@@ -71,6 +71,9 @@ const config = {
|
|
|
71
71
|
// and resolve their own dir relatively) read this to run the oracle + fingerprint the
|
|
72
72
|
// work product, since their depth no longer encodes the project root.
|
|
73
73
|
project_dir: dir,
|
|
74
|
+
// The success contract — the durable thing the gate judges against. Read by the daemon
|
|
75
|
+
// to populate the supervision:verdict event (docs/supervisor-gate-design.md).
|
|
76
|
+
...(criteria ? { criteria: criteria.trim() } : {}),
|
|
74
77
|
oracle: oracle ? { command: oracle, timeout_sec: 600 } : null,
|
|
75
78
|
evaluator: { enabled: evaluatorOn, model },
|
|
76
79
|
max_iterations: max,
|
|
@@ -16,6 +16,7 @@ import { readFileSync, writeFileSync, renameSync, existsSync, appendFileSync, st
|
|
|
16
16
|
import { dirname, join, resolve, relative } from 'node:path';
|
|
17
17
|
import { fileURLToPath } from 'node:url';
|
|
18
18
|
import { stateFingerprint } from './state-fp.mjs';
|
|
19
|
+
import { readEffectiveChecklist, evaluateChecklist, allPassing, summarize, writeChecklistStatuses } from './checklist.mjs';
|
|
19
20
|
|
|
20
21
|
const HERE = dirname(fileURLToPath(import.meta.url));
|
|
21
22
|
// Resolve the loop home from the per-process env the daemon injects
|
|
@@ -131,7 +132,26 @@ if (evaluatorOn) {
|
|
|
131
132
|
}
|
|
132
133
|
}
|
|
133
134
|
|
|
134
|
-
|
|
135
|
+
// --- (3) Checklist (the loop-engineering criteria atom) -----------------
|
|
136
|
+
// The effective checklist = project invariants ∪ session goals. Each item with an
|
|
137
|
+
// oracle is re-evaluated here (regression check); refreshed statuses are persisted
|
|
138
|
+
// so the UI + agent see live state. No-op when no checklist.json exists anywhere
|
|
139
|
+
// (allPassing([]) === true) — fully backward-compatible with criteria-only loops.
|
|
140
|
+
let checklistPass = true;
|
|
141
|
+
let checklistDetail = 'no checklist';
|
|
142
|
+
try {
|
|
143
|
+
const items = evaluateChecklist(readEffectiveChecklist(LOOP_DIR, PROJECT), PROJECT);
|
|
144
|
+
if (items.length > 0) {
|
|
145
|
+
writeChecklistStatuses(LOOP_DIR, PROJECT, items);
|
|
146
|
+
checklistPass = allPassing(items);
|
|
147
|
+
const failing = items.filter((i) => i.status !== 'passing');
|
|
148
|
+
checklistDetail = checklistPass
|
|
149
|
+
? `checklist: ${summarize(items)} — all passing`
|
|
150
|
+
: `checklist: ${summarize(items)}\n--- not yet passing ---\n${failing.map((i) => `[${i.scope}] ${i.text}${i.oracle ? ` (oracle: ${i.oracle})` : ''}`).join('\n')}`;
|
|
151
|
+
}
|
|
152
|
+
} catch { /* checklist is best-effort; never let it trap the gate */ }
|
|
153
|
+
|
|
154
|
+
const done = oraclePass && evaluatorPass && checklistPass;
|
|
135
155
|
|
|
136
156
|
// --- Decide -------------------------------------------------------------
|
|
137
157
|
const now = new Date().toISOString();
|
|
@@ -170,7 +190,7 @@ if (giveUp) {
|
|
|
170
190
|
}
|
|
171
191
|
|
|
172
192
|
writeJSONAtomic(STATE, { ...state, iteration: nextIter, phase: 'continue',
|
|
173
|
-
last_iteration_at: now, last_oracle: oracleDetail, last_eval: evaluatorDetail, ...tokenField });
|
|
193
|
+
last_iteration_at: now, last_oracle: oracleDetail, last_eval: evaluatorDetail, last_checklist: checklistDetail, ...tokenField });
|
|
174
194
|
|
|
175
195
|
appendHistory({ ts: now, iteration: nextIter, decision: 'continue', oracle: oraclePass, evaluator: evaluatorPass, detail: oraclePass ? evaluatorDetail : oracleDetail });
|
|
176
196
|
|
|
@@ -180,4 +200,5 @@ const STATEFP_REL = relative(PROJECT, join(LOOP_DIR, 'bin', 'state-fp.mjs')) ||
|
|
|
180
200
|
const evalHint = evaluatorOn && !evaluatorPass && oraclePass
|
|
181
201
|
? `\n\nThe code looks like it may be ready, but you must get an independent verdict: spawn the \`loop-evaluator\` subagent (or a fresh Task agent with a skeptical reviewer prompt) to judge the current diff against LOOP.md, then write its result to \`${VERDICT_REL}\` as {"verdict":"done"|"continue","reason":"...","guidance":"...","state_fp":"<run: node ${STATEFP_REL}>"}. Do not write the verdict yourself.`
|
|
182
202
|
: '';
|
|
183
|
-
|
|
203
|
+
const checklistHint = !checklistPass ? `\n\n${checklistDetail}\nWork the items above until each one's oracle passes; finished items must stay green (regressions re-open).` : '';
|
|
204
|
+
block(`Loop is not complete${remaining}. Keep working on the task in LOOP.md.\n\n${oracleDetail}\n${evaluatorOn ? '\n' + evaluatorDetail : ''}${checklistHint}${evalHint}\n\nUpdate LOOP.md progress, fix the blocking issue, then finish your turn again to be re-checked.`);
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
// test-checklist.mjs — the loop-engineering checklist atom (read/merge/evaluate/persist).
|
|
2
|
+
import { mkdtempSync, mkdirSync, writeFileSync, readFileSync, existsSync, rmSync } from 'node:fs';
|
|
3
|
+
import { tmpdir } from 'node:os';
|
|
4
|
+
import { join } from 'node:path';
|
|
5
|
+
import {
|
|
6
|
+
readEffectiveChecklist, evaluateChecklist, allPassing, summarize,
|
|
7
|
+
writeChecklistStatuses, sessionChecklistPath, projectChecklistPath,
|
|
8
|
+
} from '../bin/checklist.mjs';
|
|
9
|
+
|
|
10
|
+
let passed = 0, failed = 0;
|
|
11
|
+
function ok(cond, msg) { if (cond) { passed++; console.log(` ✓ ${msg}`); } else { failed++; console.log(` ✗ ${msg}`); } }
|
|
12
|
+
function eq(a, b, msg) { ok(JSON.stringify(a) === JSON.stringify(b), `${msg} (got ${JSON.stringify(a)})`); }
|
|
13
|
+
|
|
14
|
+
const root = mkdtempSync(join(tmpdir(), 'cl-test-'));
|
|
15
|
+
const projectDir = root;
|
|
16
|
+
const loopDir = join(root, '.svamp', 'sess1', 'loop');
|
|
17
|
+
mkdirSync(loopDir, { recursive: true });
|
|
18
|
+
mkdirSync(join(root, '.svamp'), { recursive: true });
|
|
19
|
+
|
|
20
|
+
console.log('scope merge + normalization');
|
|
21
|
+
writeFileSync(projectChecklistPath(projectDir), JSON.stringify({ items: [
|
|
22
|
+
{ text: 'tests pass', oracle: 'true', status: 'passing' },
|
|
23
|
+
] }));
|
|
24
|
+
writeFileSync(sessionChecklistPath(loopDir), JSON.stringify({ items: [
|
|
25
|
+
{ text: 'add feature', status: 'done' }, // 'done' alias → passing
|
|
26
|
+
{ text: ' ', status: 'pending' }, // blank → dropped
|
|
27
|
+
{ text: 'no TODOs', oracle: 'false' }, // defaults to pending
|
|
28
|
+
] }));
|
|
29
|
+
let eff = readEffectiveChecklist(loopDir, projectDir);
|
|
30
|
+
eq(eff.length, 3, 'effective = project ∪ session, blanks dropped');
|
|
31
|
+
eq(eff[0].scope, 'project', 'project items come first');
|
|
32
|
+
eq(eff[0].text, 'tests pass', 'project item text');
|
|
33
|
+
eq(eff[1].status, 'passing', "'done' normalized to passing");
|
|
34
|
+
ok(eff.map(i => i.scope).join(',') === 'project,session,session', 'scope tags correct');
|
|
35
|
+
|
|
36
|
+
console.log('evaluate — oracle pass/fail drives status (regression check)');
|
|
37
|
+
const evaluated = evaluateChecklist(eff, projectDir);
|
|
38
|
+
eq(evaluated.find(i => i.text === 'tests pass').status, 'passing', 'oracle `true` → passing');
|
|
39
|
+
eq(evaluated.find(i => i.text === 'no TODOs').status, 'failing', 'oracle `false` → failing');
|
|
40
|
+
eq(evaluated.find(i => i.text === 'add feature').status, 'passing', 'no-oracle item keeps stored status');
|
|
41
|
+
|
|
42
|
+
console.log('allPassing gate');
|
|
43
|
+
ok(!allPassing(evaluated), 'not all passing while one oracle fails');
|
|
44
|
+
ok(allPassing([]), 'empty list is trivially satisfied');
|
|
45
|
+
ok(allPassing(evaluated.map(i => ({ ...i, status: 'passing' }))), 'all passing → true');
|
|
46
|
+
|
|
47
|
+
console.log('summarize');
|
|
48
|
+
ok(summarize(evaluated).startsWith('2/3 passing'), `summary reads "${summarize(evaluated)}"`);
|
|
49
|
+
|
|
50
|
+
console.log('persist statuses back to the right scope files');
|
|
51
|
+
writeChecklistStatuses(loopDir, projectDir, evaluated);
|
|
52
|
+
const proj = JSON.parse(readFileSync(projectChecklistPath(projectDir), 'utf-8'));
|
|
53
|
+
const sess = JSON.parse(readFileSync(sessionChecklistPath(loopDir), 'utf-8'));
|
|
54
|
+
eq(proj.items.length, 1, 'project file holds only project items');
|
|
55
|
+
eq(sess.items.length, 2, 'session file holds only session items');
|
|
56
|
+
ok(proj.items[0].scope === undefined, 'scope stripped from persisted file');
|
|
57
|
+
ok(sess.items.find(i => i.text === 'no TODOs').status === 'failing', 'failing status persisted (UI will show it)');
|
|
58
|
+
|
|
59
|
+
// regression: a re-read after persist is stable
|
|
60
|
+
const reEff = readEffectiveChecklist(loopDir, projectDir);
|
|
61
|
+
eq(reEff.length, 3, 're-read after persist is stable');
|
|
62
|
+
|
|
63
|
+
rmSync(root, { recursive: true, force: true });
|
|
64
|
+
console.log(`\nchecklist: ${passed} passed, ${failed} failed`);
|
|
65
|
+
process.exit(failed ? 1 : 0);
|
|
@@ -2,7 +2,7 @@ import { existsSync, readFileSync, mkdirSync, writeFileSync, renameSync } from '
|
|
|
2
2
|
import { join, dirname } from 'node:path';
|
|
3
3
|
import os from 'node:os';
|
|
4
4
|
import { requireNotSandboxed } from './sandboxDetect-DNTcbgWD.mjs';
|
|
5
|
-
import { n as shortId } from './run-
|
|
5
|
+
import { n as shortId } from './run-C4BsPJ_p.mjs';
|
|
6
6
|
import 'os';
|
|
7
7
|
import 'fs/promises';
|
|
8
8
|
import 'fs';
|
|
@@ -96,7 +96,7 @@ async function sessionSetTitle(title) {
|
|
|
96
96
|
}
|
|
97
97
|
async function sessionSetProjectDescription(description) {
|
|
98
98
|
const dir = process.cwd();
|
|
99
|
-
const { projectName, writeProjectInfo, sanitizeDescription, projectInfoPath } = await import('./run-
|
|
99
|
+
const { projectName, writeProjectInfo, sanitizeDescription, projectInfoPath } = await import('./run-C4BsPJ_p.mjs').then(function (n) { return n.W; });
|
|
100
100
|
const desc = sanitizeDescription(description, 240);
|
|
101
101
|
if (!desc) {
|
|
102
102
|
console.error("Project description is empty.");
|
|
@@ -180,7 +180,7 @@ async function sessionBroadcast(action, args) {
|
|
|
180
180
|
console.log(`Broadcast sent: ${action}`);
|
|
181
181
|
}
|
|
182
182
|
async function connectToMachineService() {
|
|
183
|
-
const { connectAndGetMachine } = await import('./commands-
|
|
183
|
+
const { connectAndGetMachine } = await import('./commands-CuY9G_88.mjs');
|
|
184
184
|
return connectAndGetMachine();
|
|
185
185
|
}
|
|
186
186
|
async function inboxSend(targetSessionId, opts) {
|
|
@@ -197,7 +197,7 @@ async function inboxSend(targetSessionId, opts) {
|
|
|
197
197
|
}
|
|
198
198
|
const { server, machine } = await connectToMachineService();
|
|
199
199
|
try {
|
|
200
|
-
const { resolveSessionId } = await import('./commands-
|
|
200
|
+
const { resolveSessionId } = await import('./commands-CuY9G_88.mjs');
|
|
201
201
|
const sessions = await machine.listSessions();
|
|
202
202
|
const match = resolveSessionId(sessions, targetSessionId);
|
|
203
203
|
const fullTargetId = match.sessionId;
|