svamp-cli 0.2.118 → 0.2.120

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. package/bin/skills/loop/SKILL.md +1 -1
  2. package/bin/skills/loop/bin/checklist.mjs +94 -0
  3. package/bin/skills/loop/bin/loop-init.mjs +4 -1
  4. package/bin/skills/loop/bin/stop-gate.mjs +24 -3
  5. package/bin/skills/loop/test/test-checklist.mjs +65 -0
  6. package/dist/{agentCommands-BTkU0PQb.mjs → agentCommands-DIfofhT-.mjs} +4 -4
  7. package/dist/{auth-DimbhOMP.mjs → auth-zcVYRjJ8.mjs} +1 -1
  8. package/dist/cli.mjs +78 -54
  9. package/dist/{commands-Bw2V_awn.mjs → commands-BFpGoTq8.mjs} +1 -1
  10. package/dist/{commands-BJfRk4KT.mjs → commands-BOCpNFZX.mjs} +2 -2
  11. package/dist/{commands-3FsdWpJO.mjs → commands-BYsoZ6Fn.mjs} +2 -2
  12. package/dist/{commands-BEjlVtvS.mjs → commands-CPsUPDnI.mjs} +1 -1
  13. package/dist/{commands-B5rek8XG.mjs → commands-CuY9G_88.mjs} +94 -14
  14. package/dist/{commands-fbQs3jLx.mjs → commands-DOtJfJG7.mjs} +5 -5
  15. package/dist/{fleet-D5dNVJIp.mjs → fleet-CEAB4PS0.mjs} +1 -1
  16. package/dist/{frpc-CdcXdQde.mjs → frpc-DlsBjcRf.mjs} +1 -1
  17. package/dist/{headlessCli-Lk2OU1Gh.mjs → headlessCli-DuY4WQVa.mjs} +2 -2
  18. package/dist/index.mjs +1 -1
  19. package/dist/{package-CxWiFy_P.mjs → package-DS33M8qt.mjs} +1 -1
  20. package/dist/{run-DIoR81Ev.mjs → run-6Pp8yTPw.mjs} +1 -1
  21. package/dist/{run-9C2ogsuu.mjs → run-C4BsPJ_p.mjs} +832 -66
  22. package/dist/{serveCommands-BqApmjmR.mjs → serveCommands-UDH0noeg.mjs} +5 -5
  23. package/dist/{serveManager-XsXnI804.mjs → serveManager-QZooKtI4.mjs} +2 -2
  24. package/dist/{sideband-BHWq1P8E.mjs → sideband-Wfli3n7U.mjs} +1 -1
  25. package/package.json +1 -1
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: loop
3
- version: 0.3.2
3
+ version: 0.4.0
4
4
  description: Run a task as a reliable, self-verifying loop — iterate until objective exit conditions are met, with an independent evaluator instead of self-judging. Use when a task needs repeated iterations until "done" (fix until tests pass, refactor until clean, build until a spec is met, autonomous long-running work).
5
5
  ---
6
6
 
@@ -0,0 +1,94 @@
1
+ // checklist.mjs — the loop-engineering task/criteria atom.
2
+ // See docs/svamp-loop-engineering-vision.md. A checklist is a list of evaluable
3
+ // items persisted as JSON, in two layered scopes:
4
+ // session: <loopDir>/checklist.json (this session's goal)
5
+ // project: <projectDir>/.svamp/checklist.json (durable invariants, all sessions)
6
+ // The effective checklist a session enforces = project ∪ session. Each item is
7
+ // oracle-checked (a pass/fail command) or agent-evaluated. Done ≠ gone: a passing
8
+ // item STAYS in the list and is re-verified every loop, so it can regress to failing.
9
+ // The supervisor only lets the turn end when ALL effective items are passing.
10
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'node:fs';
11
+ import { join, dirname } from 'node:path';
12
+ import { execSync } from 'node:child_process';
13
+
14
+ // The session checklist lives at <project>/.svamp/<sid>/checklist.json — a SIBLING of
15
+ // the loop/ dir, not inside it (the checklist is session-scoped, independent of any
16
+ // active loop, and the UI/sync writes it there too: sync/ops.ts sessionChecklistRel).
17
+ export function sessionChecklistPath(loopDir) { return join(dirname(loopDir), 'checklist.json'); }
18
+ export function projectChecklistPath(projectDir) { return join(projectDir, '.svamp', 'checklist.json'); }
19
+
20
+ const STATUSES = ['pending', 'passing', 'failing'];
21
+
22
+ function readOne(path, scope) {
23
+ try {
24
+ if (!existsSync(path)) return [];
25
+ const j = JSON.parse(readFileSync(path, 'utf-8'));
26
+ const items = Array.isArray(j) ? j : (Array.isArray(j?.items) ? j.items : []);
27
+ return items.map((it, i) => ({
28
+ id: typeof it.id === 'string' && it.id ? it.id : `${scope}-${i}`,
29
+ text: String(it?.text ?? '').trim(),
30
+ // 'done' is a friendly alias for 'passing'.
31
+ status: it?.status === 'done' ? 'passing' : (STATUSES.includes(it?.status) ? it.status : 'pending'),
32
+ oracle: typeof it?.oracle === 'string' && it.oracle.trim() ? it.oracle.trim() : null,
33
+ scope,
34
+ })).filter((it) => it.text);
35
+ } catch { return []; }
36
+ }
37
+
38
+ /** Effective checklist = project invariants ∪ session goals (project first, then session). */
39
+ export function readEffectiveChecklist(loopDir, projectDir) {
40
+ return [
41
+ ...readOne(projectChecklistPath(projectDir), 'project'),
42
+ ...readOne(sessionChecklistPath(loopDir), 'session'),
43
+ ];
44
+ }
45
+
46
+ /**
47
+ * Run each item's oracle (if it has one) and return items with refreshed status.
48
+ * Items WITHOUT an oracle keep their stored status (those are agent-evaluated, not
49
+ * machine-checkable here). This is the per-loop regression check: a previously
50
+ * passing item whose oracle now fails flips to 'failing'.
51
+ */
52
+ export function evaluateChecklist(items, projectDir, timeoutSec = 600) {
53
+ return items.map((it) => {
54
+ if (!it.oracle) return it;
55
+ try {
56
+ execSync(it.oracle, { cwd: projectDir, stdio: 'pipe', maxBuffer: 16 * 1024 * 1024, timeout: timeoutSec * 1000 });
57
+ return { ...it, status: 'passing' };
58
+ } catch {
59
+ return { ...it, status: 'failing' };
60
+ }
61
+ });
62
+ }
63
+
64
+ /** True when every effective item is passing (an empty list is trivially satisfied). */
65
+ export function allPassing(items) {
66
+ return items.length === 0 ? true : items.every((it) => it.status === 'passing');
67
+ }
68
+
69
+ /** A one-line summary for the gate's history/state. */
70
+ export function summarize(items) {
71
+ const pass = items.filter((i) => i.status === 'passing').length;
72
+ const fail = items.filter((i) => i.status === 'failing').length;
73
+ return `${pass}/${items.length} passing${fail ? `, ${fail} failing` : ''}`;
74
+ }
75
+
76
+ /**
77
+ * Persist refreshed statuses back to each scope's file, so the UI renderer + the
78
+ * agent see live state. Writes the canonical { items: [...] } shape (scope stripped —
79
+ * it's implied by which file the item lives in).
80
+ */
81
+ export function writeChecklistStatuses(loopDir, projectDir, items) {
82
+ const targets = [
83
+ ['session', sessionChecklistPath(loopDir)],
84
+ ['project', projectChecklistPath(projectDir)],
85
+ ];
86
+ for (const [scope, path] of targets) {
87
+ const scoped = items.filter((it) => it.scope === scope).map(({ scope: _s, ...rest }) => rest);
88
+ if (scoped.length === 0 && !existsSync(path)) continue; // don't create empty files
89
+ try {
90
+ mkdirSync(dirname(path), { recursive: true });
91
+ writeFileSync(path, JSON.stringify({ items: scoped }, null, 2));
92
+ } catch { /* best-effort persistence */ }
93
+ }
94
+ }
@@ -58,7 +58,7 @@ rmSync(join(loopDir, 'evaluator-verdict.json'), { force: true });
58
58
  rmSync(join(loopDir, 'history.jsonl'), { force: true });
59
59
 
60
60
  // 1. Copy hook scripts so the project is self-contained.
61
- for (const f of ['state-fp.mjs', 'stop-gate.mjs', 'inject-loop.mjs', 'loop-status.mjs', 'precompact.mjs']) {
61
+ for (const f of ['state-fp.mjs', 'stop-gate.mjs', 'checklist.mjs', 'inject-loop.mjs', 'loop-status.mjs', 'precompact.mjs']) {
62
62
  const dest = join(binDir, f);
63
63
  copyFileSync(join(HERE, f), dest);
64
64
  try { chmodSync(dest, 0o755); } catch {}
@@ -71,6 +71,9 @@ const config = {
71
71
  // and resolve their own dir relatively) read this to run the oracle + fingerprint the
72
72
  // work product, since their depth no longer encodes the project root.
73
73
  project_dir: dir,
74
+ // The success contract — the durable thing the gate judges against. Read by the daemon
75
+ // to populate the supervision:verdict event (docs/supervisor-gate-design.md).
76
+ ...(criteria ? { criteria: criteria.trim() } : {}),
74
77
  oracle: oracle ? { command: oracle, timeout_sec: 600 } : null,
75
78
  evaluator: { enabled: evaluatorOn, model },
76
79
  max_iterations: max,
@@ -16,6 +16,7 @@ import { readFileSync, writeFileSync, renameSync, existsSync, appendFileSync, st
16
16
  import { dirname, join, resolve, relative } from 'node:path';
17
17
  import { fileURLToPath } from 'node:url';
18
18
  import { stateFingerprint } from './state-fp.mjs';
19
+ import { readEffectiveChecklist, evaluateChecklist, allPassing, summarize, writeChecklistStatuses } from './checklist.mjs';
19
20
 
20
21
  const HERE = dirname(fileURLToPath(import.meta.url));
21
22
  // Resolve the loop home from the per-process env the daemon injects
@@ -131,7 +132,26 @@ if (evaluatorOn) {
131
132
  }
132
133
  }
133
134
 
134
- const done = oraclePass && evaluatorPass;
135
+ // --- (3) Checklist (the loop-engineering criteria atom) -----------------
136
+ // The effective checklist = project invariants ∪ session goals. Each item with an
137
+ // oracle is re-evaluated here (regression check); refreshed statuses are persisted
138
+ // so the UI + agent see live state. No-op when no checklist.json exists anywhere
139
+ // (allPassing([]) === true) — fully backward-compatible with criteria-only loops.
140
+ let checklistPass = true;
141
+ let checklistDetail = 'no checklist';
142
+ try {
143
+ const items = evaluateChecklist(readEffectiveChecklist(LOOP_DIR, PROJECT), PROJECT);
144
+ if (items.length > 0) {
145
+ writeChecklistStatuses(LOOP_DIR, PROJECT, items);
146
+ checklistPass = allPassing(items);
147
+ const failing = items.filter((i) => i.status !== 'passing');
148
+ checklistDetail = checklistPass
149
+ ? `checklist: ${summarize(items)} — all passing`
150
+ : `checklist: ${summarize(items)}\n--- not yet passing ---\n${failing.map((i) => `[${i.scope}] ${i.text}${i.oracle ? ` (oracle: ${i.oracle})` : ''}`).join('\n')}`;
151
+ }
152
+ } catch { /* checklist is best-effort; never let it trap the gate */ }
153
+
154
+ const done = oraclePass && evaluatorPass && checklistPass;
135
155
 
136
156
  // --- Decide -------------------------------------------------------------
137
157
  const now = new Date().toISOString();
@@ -170,7 +190,7 @@ if (giveUp) {
170
190
  }
171
191
 
172
192
  writeJSONAtomic(STATE, { ...state, iteration: nextIter, phase: 'continue',
173
- last_iteration_at: now, last_oracle: oracleDetail, last_eval: evaluatorDetail, ...tokenField });
193
+ last_iteration_at: now, last_oracle: oracleDetail, last_eval: evaluatorDetail, last_checklist: checklistDetail, ...tokenField });
174
194
 
175
195
  appendHistory({ ts: now, iteration: nextIter, decision: 'continue', oracle: oraclePass, evaluator: evaluatorPass, detail: oraclePass ? evaluatorDetail : oracleDetail });
176
196
 
@@ -180,4 +200,5 @@ const STATEFP_REL = relative(PROJECT, join(LOOP_DIR, 'bin', 'state-fp.mjs')) ||
180
200
  const evalHint = evaluatorOn && !evaluatorPass && oraclePass
181
201
  ? `\n\nThe code looks like it may be ready, but you must get an independent verdict: spawn the \`loop-evaluator\` subagent (or a fresh Task agent with a skeptical reviewer prompt) to judge the current diff against LOOP.md, then write its result to \`${VERDICT_REL}\` as {"verdict":"done"|"continue","reason":"...","guidance":"...","state_fp":"<run: node ${STATEFP_REL}>"}. Do not write the verdict yourself.`
182
202
  : '';
183
- block(`Loop is not complete${remaining}. Keep working on the task in LOOP.md.\n\n${oracleDetail}\n${evaluatorOn ? '\n' + evaluatorDetail : ''}${evalHint}\n\nUpdate LOOP.md progress, fix the blocking issue, then finish your turn again to be re-checked.`);
203
+ const checklistHint = !checklistPass ? `\n\n${checklistDetail}\nWork the items above until each one's oracle passes; finished items must stay green (regressions re-open).` : '';
204
+ block(`Loop is not complete${remaining}. Keep working on the task in LOOP.md.\n\n${oracleDetail}\n${evaluatorOn ? '\n' + evaluatorDetail : ''}${checklistHint}${evalHint}\n\nUpdate LOOP.md progress, fix the blocking issue, then finish your turn again to be re-checked.`);
@@ -0,0 +1,65 @@
1
+ // test-checklist.mjs — the loop-engineering checklist atom (read/merge/evaluate/persist).
2
+ import { mkdtempSync, mkdirSync, writeFileSync, readFileSync, existsSync, rmSync } from 'node:fs';
3
+ import { tmpdir } from 'node:os';
4
+ import { join } from 'node:path';
5
+ import {
6
+ readEffectiveChecklist, evaluateChecklist, allPassing, summarize,
7
+ writeChecklistStatuses, sessionChecklistPath, projectChecklistPath,
8
+ } from '../bin/checklist.mjs';
9
+
10
+ let passed = 0, failed = 0;
11
+ function ok(cond, msg) { if (cond) { passed++; console.log(` ✓ ${msg}`); } else { failed++; console.log(` ✗ ${msg}`); } }
12
+ function eq(a, b, msg) { ok(JSON.stringify(a) === JSON.stringify(b), `${msg} (got ${JSON.stringify(a)})`); }
13
+
14
+ const root = mkdtempSync(join(tmpdir(), 'cl-test-'));
15
+ const projectDir = root;
16
+ const loopDir = join(root, '.svamp', 'sess1', 'loop');
17
+ mkdirSync(loopDir, { recursive: true });
18
+ mkdirSync(join(root, '.svamp'), { recursive: true });
19
+
20
+ console.log('scope merge + normalization');
21
+ writeFileSync(projectChecklistPath(projectDir), JSON.stringify({ items: [
22
+ { text: 'tests pass', oracle: 'true', status: 'passing' },
23
+ ] }));
24
+ writeFileSync(sessionChecklistPath(loopDir), JSON.stringify({ items: [
25
+ { text: 'add feature', status: 'done' }, // 'done' alias → passing
26
+ { text: ' ', status: 'pending' }, // blank → dropped
27
+ { text: 'no TODOs', oracle: 'false' }, // defaults to pending
28
+ ] }));
29
+ let eff = readEffectiveChecklist(loopDir, projectDir);
30
+ eq(eff.length, 3, 'effective = project ∪ session, blanks dropped');
31
+ eq(eff[0].scope, 'project', 'project items come first');
32
+ eq(eff[0].text, 'tests pass', 'project item text');
33
+ eq(eff[1].status, 'passing', "'done' normalized to passing");
34
+ ok(eff.map(i => i.scope).join(',') === 'project,session,session', 'scope tags correct');
35
+
36
+ console.log('evaluate — oracle pass/fail drives status (regression check)');
37
+ const evaluated = evaluateChecklist(eff, projectDir);
38
+ eq(evaluated.find(i => i.text === 'tests pass').status, 'passing', 'oracle `true` → passing');
39
+ eq(evaluated.find(i => i.text === 'no TODOs').status, 'failing', 'oracle `false` → failing');
40
+ eq(evaluated.find(i => i.text === 'add feature').status, 'passing', 'no-oracle item keeps stored status');
41
+
42
+ console.log('allPassing gate');
43
+ ok(!allPassing(evaluated), 'not all passing while one oracle fails');
44
+ ok(allPassing([]), 'empty list is trivially satisfied');
45
+ ok(allPassing(evaluated.map(i => ({ ...i, status: 'passing' }))), 'all passing → true');
46
+
47
+ console.log('summarize');
48
+ ok(summarize(evaluated).startsWith('2/3 passing'), `summary reads "${summarize(evaluated)}"`);
49
+
50
+ console.log('persist statuses back to the right scope files');
51
+ writeChecklistStatuses(loopDir, projectDir, evaluated);
52
+ const proj = JSON.parse(readFileSync(projectChecklistPath(projectDir), 'utf-8'));
53
+ const sess = JSON.parse(readFileSync(sessionChecklistPath(loopDir), 'utf-8'));
54
+ eq(proj.items.length, 1, 'project file holds only project items');
55
+ eq(sess.items.length, 2, 'session file holds only session items');
56
+ ok(proj.items[0].scope === undefined, 'scope stripped from persisted file');
57
+ ok(sess.items.find(i => i.text === 'no TODOs').status === 'failing', 'failing status persisted (UI will show it)');
58
+
59
+ // regression: a re-read after persist is stable
60
+ const reEff = readEffectiveChecklist(loopDir, projectDir);
61
+ eq(reEff.length, 3, 're-read after persist is stable');
62
+
63
+ rmSync(root, { recursive: true, force: true });
64
+ console.log(`\nchecklist: ${passed} passed, ${failed} failed`);
65
+ process.exit(failed ? 1 : 0);
@@ -2,7 +2,7 @@ import { existsSync, readFileSync, mkdirSync, writeFileSync, renameSync } from '
2
2
  import { join, dirname } from 'node:path';
3
3
  import os from 'node:os';
4
4
  import { requireNotSandboxed } from './sandboxDetect-DNTcbgWD.mjs';
5
- import { n as shortId } from './run-9C2ogsuu.mjs';
5
+ import { n as shortId } from './run-C4BsPJ_p.mjs';
6
6
  import 'os';
7
7
  import 'fs/promises';
8
8
  import 'fs';
@@ -96,7 +96,7 @@ async function sessionSetTitle(title) {
96
96
  }
97
97
  async function sessionSetProjectDescription(description) {
98
98
  const dir = process.cwd();
99
- const { projectName, writeProjectInfo, sanitizeDescription, projectInfoPath } = await import('./run-9C2ogsuu.mjs').then(function (n) { return n.T; });
99
+ const { projectName, writeProjectInfo, sanitizeDescription, projectInfoPath } = await import('./run-C4BsPJ_p.mjs').then(function (n) { return n.W; });
100
100
  const desc = sanitizeDescription(description, 240);
101
101
  if (!desc) {
102
102
  console.error("Project description is empty.");
@@ -180,7 +180,7 @@ async function sessionBroadcast(action, args) {
180
180
  console.log(`Broadcast sent: ${action}`);
181
181
  }
182
182
  async function connectToMachineService() {
183
- const { connectAndGetMachine } = await import('./commands-B5rek8XG.mjs');
183
+ const { connectAndGetMachine } = await import('./commands-CuY9G_88.mjs');
184
184
  return connectAndGetMachine();
185
185
  }
186
186
  async function inboxSend(targetSessionId, opts) {
@@ -197,7 +197,7 @@ async function inboxSend(targetSessionId, opts) {
197
197
  }
198
198
  const { server, machine } = await connectToMachineService();
199
199
  try {
200
- const { resolveSessionId } = await import('./commands-B5rek8XG.mjs');
200
+ const { resolveSessionId } = await import('./commands-CuY9G_88.mjs');
201
201
  const sessions = await machine.listSessions();
202
202
  const match = resolveSessionId(sessions, targetSessionId);
203
203
  const fullTargetId = match.sessionId;
@@ -1,4 +1,4 @@
1
- import { F as resolveModel } from './run-9C2ogsuu.mjs';
1
+ import { F as resolveModel } from './run-C4BsPJ_p.mjs';
2
2
  import 'os';
3
3
  import 'fs/promises';
4
4
  import 'fs';