svamp-cli 0.2.122 → 0.2.124

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: loop
3
- version: 0.3.2
3
+ version: 0.4.0
4
4
  description: Run a task as a reliable, self-verifying loop — iterate until objective exit conditions are met, with an independent evaluator instead of self-judging. Use when a task needs repeated iterations until "done" (fix until tests pass, refactor until clean, build until a spec is met, autonomous long-running work).
5
5
  ---
6
6
 
@@ -1,20 +1,49 @@
1
- // checklist.mjs — the loop-engineering task/criteria atom.
2
- // See docs/svamp-loop-engineering-vision.md. A checklist is a list of evaluable
3
- // items persisted as JSON, in two layered scopes:
1
+ // checklist.mjs — the loop-engineering checklist atom, gate side.
2
+ // See docs/checklist-atom-spec.md + docs/svamp-loop-engineering-vision.md. A checklist
3
+ // is a list of evaluable goal items persisted as JSON, in two layered scopes:
4
4
  // session: <loopDir>/checklist.json (this session's goal)
5
5
  // project: <projectDir>/.svamp/checklist.json (durable invariants, all sessions)
6
- // The effective checklist a session enforces = project ∪ session. Each item is
7
- // oracle-checked (a pass/fail command) or agent-evaluated. Done ≠ gone: a passing
8
- // item STAYS in the list and is re-verified every loop, so it can regress to failing.
9
- // The supervisor only lets the turn end when ALL effective items are passing.
6
+ // Effective checklist a session enforces = project ∪ session. Each item is oracle-checked
7
+ // (an eval cmd) or agent/human-evaluated. Done ≠ gone: a 'done' item STAYS and is
8
+ // re-verified every loop, so it can regress to 'blocked'. The gate lets the turn end only
9
+ // when ALL effective items are 'done'.
10
+ //
11
+ // This is the GATE runtime (a .mjs skill — it cannot import the TS atom in
12
+ // checklist/core.ts), so it mirrors the canonical vocab by value: ItemStatus +
13
+ // canonicalChecklistStatus are kept in sync with sync/checklistModel.ts + parseMarkdown.ts.
10
14
  import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'node:fs';
11
15
  import { join, dirname } from 'node:path';
12
16
  import { execSync } from 'node:child_process';
13
17
 
18
+ // CANONICAL: the session checklist lives INSIDE the loop dir at
19
+ // <project>/.svamp/<sid>/loop/checklist.json — beside the other supervisor state
20
+ // (loop-state.json, supervisor-verdict.json). Matches the daemon writer
21
+ // (checklist/core.ts checklistPath) + the frontend (sync/ops.ts sessionChecklistRel).
14
22
  export function sessionChecklistPath(loopDir) { return join(loopDir, 'checklist.json'); }
15
23
  export function projectChecklistPath(projectDir) { return join(projectDir, '.svamp', 'checklist.json'); }
16
24
 
17
- const STATUSES = ['pending', 'passing', 'failing'];
25
+ /**
26
+ * Map any accepted token — canonical OR the legacy loop aliases (pending/passing/failing) —
27
+ * to the canonical ItemStatus set. Mirrors checklistModel.canonicalChecklistStatus.
28
+ */
29
+ export function canonicalChecklistStatus(raw) {
30
+ switch (String(raw ?? '').toLowerCase()) {
31
+ case 'passing': case 'done': return 'done';
32
+ case 'failing': case 'blocked': return 'blocked';
33
+ case 'pending': case 'todo': case '': return 'todo';
34
+ case 'active': case 'in_progress': case 'in-progress': return 'active';
35
+ case 'verifying': return 'verifying';
36
+ case 'awaiting_review': case 'awaiting-review': case 'review': return 'awaiting_review';
37
+ case 'rework': return 'rework';
38
+ default: return 'todo';
39
+ }
40
+ }
41
+
42
+ /** The oracle command for an item: the atom's eval.cmd (type:'oracle'), else legacy item.oracle. */
43
+ function itemOracle(it) {
44
+ if (it?.eval?.type === 'oracle' && typeof it.eval.cmd === 'string' && it.eval.cmd.trim()) return it.eval.cmd.trim();
45
+ return typeof it?.oracle === 'string' && it.oracle.trim() ? it.oracle.trim() : null;
46
+ }
18
47
 
19
48
  function readOne(path, scope) {
20
49
  try {
@@ -22,12 +51,16 @@ function readOne(path, scope) {
22
51
  const j = JSON.parse(readFileSync(path, 'utf-8'));
23
52
  const items = Array.isArray(j) ? j : (Array.isArray(j?.items) ? j.items : []);
24
53
  return items.map((it, i) => ({
25
- id: typeof it.id === 'string' && it.id ? it.id : `${scope}-${i}`,
54
+ // Preserve the full atom item (eval, child, disposition, order, …) so the gate
55
+ // never strips fields the UI/core own; it only refreshes `status`.
56
+ ...it,
57
+ id: typeof it?.id === 'string' && it.id ? it.id : `${scope}-${i}`,
26
58
  text: String(it?.text ?? '').trim(),
27
- // 'done' is a friendly alias for 'passing'.
28
- status: it?.status === 'done' ? 'passing' : (STATUSES.includes(it?.status) ? it.status : 'pending'),
29
- oracle: typeof it?.oracle === 'string' && it.oracle.trim() ? it.oracle.trim() : null,
59
+ status: canonicalChecklistStatus(it?.status),
60
+ // transient helpers (underscored) used for evaluation, stripped before persist:
30
61
  scope,
62
+ _oracle: itemOracle(it),
63
+ _delegated: it?.disposition === 'delegated',
31
64
  })).filter((it) => it.text);
32
65
  } catch { return []; }
33
66
  }
@@ -41,47 +74,51 @@ export function readEffectiveChecklist(loopDir, projectDir) {
41
74
  }
42
75
 
43
76
  /**
44
- * Run each item's oracle (if it has one) and return items with refreshed status.
45
- * Items WITHOUT an oracle keep their stored status (those are agent-evaluated, not
46
- * machine-checkable here). This is the per-loop regression check: a previously
47
- * passing item whose oracle now fails flips to 'failing'.
77
+ * Run each INLINE item's oracle (if any) and return items with refreshed status:
78
+ * oracle pass 'done', oracle fail 'blocked'. This is the per-loop regression check
79
+ * (a previously 'done' item whose oracle now fails flips to 'blocked'). Delegated items
80
+ * (gated by their child) and non-oracle items (agent/human-evaluated) keep their status.
48
81
  */
49
82
  export function evaluateChecklist(items, projectDir, timeoutSec = 600) {
50
83
  return items.map((it) => {
51
- if (!it.oracle) return it;
84
+ if (it._delegated || !it._oracle) return it;
52
85
  try {
53
- execSync(it.oracle, { cwd: projectDir, stdio: 'pipe', maxBuffer: 16 * 1024 * 1024, timeout: timeoutSec * 1000 });
54
- return { ...it, status: 'passing' };
86
+ execSync(it._oracle, { cwd: projectDir, stdio: 'pipe', maxBuffer: 16 * 1024 * 1024, timeout: timeoutSec * 1000 });
87
+ return { ...it, status: 'done' };
55
88
  } catch {
56
- return { ...it, status: 'failing' };
89
+ return { ...it, status: 'blocked' };
57
90
  }
58
91
  });
59
92
  }
60
93
 
61
- /** True when every effective item is passing (an empty list is trivially satisfied). */
94
+ /** True when every effective item is 'done' (an empty list is trivially satisfied). */
62
95
  export function allPassing(items) {
63
- return items.length === 0 ? true : items.every((it) => it.status === 'passing');
96
+ return items.length === 0 ? true : items.every((it) => it.status === 'done');
64
97
  }
65
98
 
66
99
  /** A one-line summary for the gate's history/state. */
67
100
  export function summarize(items) {
68
- const pass = items.filter((i) => i.status === 'passing').length;
69
- const fail = items.filter((i) => i.status === 'failing').length;
70
- return `${pass}/${items.length} passing${fail ? `, ${fail} failing` : ''}`;
101
+ const done = items.filter((i) => i.status === 'done').length;
102
+ const blocked = items.filter((i) => i.status === 'blocked' || i.status === 'rework').length;
103
+ return `${done}/${items.length} done${blocked ? `, ${blocked} blocked` : ''}`;
71
104
  }
72
105
 
73
106
  /**
74
- * Persist refreshed statuses back to each scope's file, so the UI renderer + the
75
- * agent see live state. Writes the canonical { items: [...] } shape (scope stripped —
76
- * it's implied by which file the item lives in).
107
+ * Persist refreshed statuses back to each scope's file so the UI + agent see live state.
108
+ * Preserves the full atom item shape only the transient helper fields (_scope/_oracle/
109
+ * _delegated) are stripped; everything else (eval, child, disposition, order, …) round-trips.
77
110
  */
78
111
  export function writeChecklistStatuses(loopDir, projectDir, items) {
112
+ const strip = (it) => {
113
+ const { scope: _s, _oracle, _delegated, ...rest } = it;
114
+ return rest;
115
+ };
79
116
  const targets = [
80
117
  ['session', sessionChecklistPath(loopDir)],
81
118
  ['project', projectChecklistPath(projectDir)],
82
119
  ];
83
120
  for (const [scope, path] of targets) {
84
- const scoped = items.filter((it) => it.scope === scope).map(({ scope: _s, ...rest }) => rest);
121
+ const scoped = items.filter((it) => it.scope === scope).map(strip);
85
122
  if (scoped.length === 0 && !existsSync(path)) continue; // don't create empty files
86
123
  try {
87
124
  mkdirSync(dirname(path), { recursive: true });
@@ -58,7 +58,7 @@ rmSync(join(loopDir, 'evaluator-verdict.json'), { force: true });
58
58
  rmSync(join(loopDir, 'history.jsonl'), { force: true });
59
59
 
60
60
  // 1. Copy hook scripts so the project is self-contained.
61
- for (const f of ['state-fp.mjs', 'stop-gate.mjs', 'inject-loop.mjs', 'loop-status.mjs', 'precompact.mjs']) {
61
+ for (const f of ['state-fp.mjs', 'stop-gate.mjs', 'checklist.mjs', 'inject-loop.mjs', 'loop-status.mjs', 'precompact.mjs']) {
62
62
  const dest = join(binDir, f);
63
63
  copyFileSync(join(HERE, f), dest);
64
64
  try { chmodSync(dest, 0o755); } catch {}
@@ -71,6 +71,9 @@ const config = {
71
71
  // and resolve their own dir relatively) read this to run the oracle + fingerprint the
72
72
  // work product, since their depth no longer encodes the project root.
73
73
  project_dir: dir,
74
+ // The success contract — the durable thing the gate judges against. Read by the daemon
75
+ // to populate the supervision:verdict event (docs/supervisor-gate-design.md).
76
+ ...(criteria ? { criteria: criteria.trim() } : {}),
74
77
  oracle: oracle ? { command: oracle, timeout_sec: 600 } : null,
75
78
  evaluator: { enabled: evaluatorOn, model },
76
79
  max_iterations: max,
@@ -16,6 +16,7 @@ import { readFileSync, writeFileSync, renameSync, existsSync, appendFileSync, st
16
16
  import { dirname, join, resolve, relative } from 'node:path';
17
17
  import { fileURLToPath } from 'node:url';
18
18
  import { stateFingerprint } from './state-fp.mjs';
19
+ import { readEffectiveChecklist, evaluateChecklist, allPassing, summarize, writeChecklistStatuses } from './checklist.mjs';
19
20
 
20
21
  const HERE = dirname(fileURLToPath(import.meta.url));
21
22
  // Resolve the loop home from the per-process env the daemon injects
@@ -131,7 +132,26 @@ if (evaluatorOn) {
131
132
  }
132
133
  }
133
134
 
134
- const done = oraclePass && evaluatorPass;
135
+ // --- (3) Checklist (the loop-engineering criteria atom) -----------------
136
+ // The effective checklist = project invariants ∪ session goals. Each item with an
137
+ // oracle is re-evaluated here (regression check); refreshed statuses are persisted
138
+ // so the UI + agent see live state. No-op when no checklist.json exists anywhere
139
+ // (allPassing([]) === true) — fully backward-compatible with criteria-only loops.
140
+ let checklistPass = true;
141
+ let checklistDetail = 'no checklist';
142
+ try {
143
+ const items = evaluateChecklist(readEffectiveChecklist(LOOP_DIR, PROJECT), PROJECT);
144
+ if (items.length > 0) {
145
+ writeChecklistStatuses(LOOP_DIR, PROJECT, items);
146
+ checklistPass = allPassing(items);
147
+ const notDone = items.filter((i) => i.status !== 'done');
148
+ checklistDetail = checklistPass
149
+ ? `checklist: ${summarize(items)} — all done`
150
+ : `checklist: ${summarize(items)}\n--- not yet done ---\n${notDone.map((i) => `[${i.scope}] ${i.text}${i._oracle ? ` (oracle: ${i._oracle})` : ''}`).join('\n')}`;
151
+ }
152
+ } catch { /* checklist is best-effort; never let it trap the gate */ }
153
+
154
+ const done = oraclePass && evaluatorPass && checklistPass;
135
155
 
136
156
  // --- Decide -------------------------------------------------------------
137
157
  const now = new Date().toISOString();
@@ -170,7 +190,7 @@ if (giveUp) {
170
190
  }
171
191
 
172
192
  writeJSONAtomic(STATE, { ...state, iteration: nextIter, phase: 'continue',
173
- last_iteration_at: now, last_oracle: oracleDetail, last_eval: evaluatorDetail, ...tokenField });
193
+ last_iteration_at: now, last_oracle: oracleDetail, last_eval: evaluatorDetail, last_checklist: checklistDetail, ...tokenField });
174
194
 
175
195
  appendHistory({ ts: now, iteration: nextIter, decision: 'continue', oracle: oraclePass, evaluator: evaluatorPass, detail: oraclePass ? evaluatorDetail : oracleDetail });
176
196
 
@@ -180,4 +200,5 @@ const STATEFP_REL = relative(PROJECT, join(LOOP_DIR, 'bin', 'state-fp.mjs')) ||
180
200
  const evalHint = evaluatorOn && !evaluatorPass && oraclePass
181
201
  ? `\n\nThe code looks like it may be ready, but you must get an independent verdict: spawn the \`loop-evaluator\` subagent (or a fresh Task agent with a skeptical reviewer prompt) to judge the current diff against LOOP.md, then write its result to \`${VERDICT_REL}\` as {"verdict":"done"|"continue","reason":"...","guidance":"...","state_fp":"<run: node ${STATEFP_REL}>"}. Do not write the verdict yourself.`
182
202
  : '';
183
- block(`Loop is not complete${remaining}. Keep working on the task in LOOP.md.\n\n${oracleDetail}\n${evaluatorOn ? '\n' + evaluatorDetail : ''}${evalHint}\n\nUpdate LOOP.md progress, fix the blocking issue, then finish your turn again to be re-checked.`);
203
+ const checklistHint = !checklistPass ? `\n\n${checklistDetail}\nWork the items above until each one's oracle passes; finished items must stay green (regressions re-open).` : '';
204
+ block(`Loop is not complete${remaining}. Keep working on the task in LOOP.md.\n\n${oracleDetail}\n${evaluatorOn ? '\n' + evaluatorDetail : ''}${checklistHint}${evalHint}\n\nUpdate LOOP.md progress, fix the blocking issue, then finish your turn again to be re-checked.`);
@@ -19,7 +19,7 @@ mkdirSync(join(root, '.svamp'), { recursive: true });
19
19
 
20
20
  console.log('scope merge + normalization');
21
21
  writeFileSync(projectChecklistPath(projectDir), JSON.stringify({ items: [
22
- { text: 'tests pass', oracle: 'true', status: 'passing' },
22
+ { text: 'tests pass', oracle: 'true', status: 'done' },
23
23
  ] }));
24
24
  writeFileSync(sessionChecklistPath(loopDir), JSON.stringify({ items: [
25
25
  { text: 'add feature', status: 'done' }, // 'done' alias → passing
@@ -30,22 +30,22 @@ let eff = readEffectiveChecklist(loopDir, projectDir);
30
30
  eq(eff.length, 3, 'effective = project ∪ session, blanks dropped');
31
31
  eq(eff[0].scope, 'project', 'project items come first');
32
32
  eq(eff[0].text, 'tests pass', 'project item text');
33
- eq(eff[1].status, 'passing', "'done' normalized to passing");
33
+ eq(eff[1].status, 'done', "'done' normalized to done");
34
34
  ok(eff.map(i => i.scope).join(',') === 'project,session,session', 'scope tags correct');
35
35
 
36
36
  console.log('evaluate — oracle pass/fail drives status (regression check)');
37
37
  const evaluated = evaluateChecklist(eff, projectDir);
38
- eq(evaluated.find(i => i.text === 'tests pass').status, 'passing', 'oracle `true` → passing');
39
- eq(evaluated.find(i => i.text === 'no TODOs').status, 'failing', 'oracle `false` → failing');
40
- eq(evaluated.find(i => i.text === 'add feature').status, 'passing', 'no-oracle item keeps stored status');
38
+ eq(evaluated.find(i => i.text === 'tests pass').status, 'done', 'oracle `true` → passing');
39
+ eq(evaluated.find(i => i.text === 'no TODOs').status, 'blocked', 'oracle `false` → failing');
40
+ eq(evaluated.find(i => i.text === 'add feature').status, 'done', 'no-oracle item keeps stored status');
41
41
 
42
42
  console.log('allPassing gate');
43
- ok(!allPassing(evaluated), 'not all passing while one oracle fails');
43
+ ok(!allPassing(evaluated), 'not all done while one oracle fails');
44
44
  ok(allPassing([]), 'empty list is trivially satisfied');
45
- ok(allPassing(evaluated.map(i => ({ ...i, status: 'passing' }))), 'all passing → true');
45
+ ok(allPassing(evaluated.map(i => ({ ...i, status: 'done' }))), 'all done → true');
46
46
 
47
47
  console.log('summarize');
48
- ok(summarize(evaluated).startsWith('2/3 passing'), `summary reads "${summarize(evaluated)}"`);
48
+ ok(summarize(evaluated).startsWith('2/3 done'), `summary reads "${summarize(evaluated)}"`);
49
49
 
50
50
  console.log('persist statuses back to the right scope files');
51
51
  writeChecklistStatuses(loopDir, projectDir, evaluated);
@@ -54,12 +54,33 @@ const sess = JSON.parse(readFileSync(sessionChecklistPath(loopDir), 'utf-8'));
54
54
  eq(proj.items.length, 1, 'project file holds only project items');
55
55
  eq(sess.items.length, 2, 'session file holds only session items');
56
56
  ok(proj.items[0].scope === undefined, 'scope stripped from persisted file');
57
- ok(sess.items.find(i => i.text === 'no TODOs').status === 'failing', 'failing status persisted (UI will show it)');
57
+ ok(sess.items.find(i => i.text === 'no TODOs').status === 'blocked', 'blocked status persisted (UI will show it)');
58
58
 
59
59
  // regression: a re-read after persist is stable
60
60
  const reEff = readEffectiveChecklist(loopDir, projectDir);
61
61
  eq(reEff.length, 3, 're-read after persist is stable');
62
62
 
63
+ console.log('canonical atom shape — eval.cmd oracle, disposition, ItemStatus, field round-trip');
64
+ const root2 = mkdtempSync(join(tmpdir(), 'cl-atom-'));
65
+ const loopDir2 = join(root2, '.svamp', 'sessA', 'loop');
66
+ mkdirSync(loopDir2, { recursive: true });
67
+ writeFileSync(sessionChecklistPath(loopDir2), JSON.stringify({ items: [
68
+ { id: 'a', text: 'build green', disposition: 'inline', eval: { type: 'oracle', cmd: 'true' }, status: 'todo', order: 0 },
69
+ { id: 'b', text: 'lint clean', disposition: 'inline', eval: { type: 'oracle', cmd: 'false' }, status: 'todo' },
70
+ { id: 'c', text: 'ship the API', disposition: 'delegated', status: 'active', child: { sessionId: 'x', branch: 'feat/api' } },
71
+ ] }));
72
+ const atom = evaluateChecklist(readEffectiveChecklist(loopDir2, root2), root2);
73
+ eq(atom.find(i => i.id === 'a').status, 'done', 'eval.cmd `true` → done');
74
+ eq(atom.find(i => i.id === 'b').status, 'blocked', 'eval.cmd `false` → blocked');
75
+ eq(atom.find(i => i.id === 'c').status, 'active', 'delegated item NOT oracle-evaluated (child-gated), keeps status');
76
+ ok(!allPassing(atom), 'not all done while an inline oracle fails');
77
+ writeChecklistStatuses(loopDir2, root2, atom);
78
+ const persisted = JSON.parse(readFileSync(sessionChecklistPath(loopDir2), 'utf-8')).items;
79
+ const cItem = persisted.find(i => i.id === 'c');
80
+ ok(cItem.disposition === 'delegated' && cItem.child?.branch === 'feat/api', 'atom fields (disposition/child) round-trip — gate never strips them');
81
+ ok(persisted.find(i => i.id === 'a').eval?.cmd === 'true' && !('_oracle' in persisted.find(i => i.id === 'a')), 'eval preserved, transient _oracle stripped');
82
+ rmSync(root2, { recursive: true, force: true });
83
+
63
84
  rmSync(root, { recursive: true, force: true });
64
85
  console.log(`\nchecklist: ${passed} passed, ${failed} failed`);
65
86
  process.exit(failed ? 1 : 0);
package/dist/cli.mjs CHANGED
@@ -390,7 +390,7 @@ async function main() {
390
390
  } else if (!subcommand || subcommand === "start") {
391
391
  await handleInteractiveCommand();
392
392
  } else if (subcommand === "--version" || subcommand === "-v") {
393
- const pkg = await import('./package-D6yiDaV4.mjs').catch(() => ({ default: { version: "unknown" } }));
393
+ const pkg = await import('./package-B5rxAK5Z.mjs').catch(() => ({ default: { version: "unknown" } }));
394
394
  console.log(`svamp version: ${pkg.default.version}`);
395
395
  } else {
396
396
  console.error(`Unknown command: ${subcommand}`);
@@ -1,5 +1,5 @@
1
1
  var name = "svamp-cli";
2
- var version = "0.2.121";
2
+ var version = "0.2.124";
3
3
  var description = "Svamp CLI — AI workspace daemon on Hypha Cloud";
4
4
  var author = "Amun AI AB";
5
5
  var license = "SEE LICENSE IN LICENSE";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "svamp-cli",
3
- "version": "0.2.122",
3
+ "version": "0.2.124",
4
4
  "description": "Svamp CLI — AI workspace daemon on Hypha Cloud",
5
5
  "author": "Amun AI AB",
6
6
  "license": "SEE LICENSE IN LICENSE",