npm - svamp-cli - Versions diffs - 0.2.122 → 0.2.124 - Mend

svamp-cli 0.2.122 → 0.2.124

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/bin/skills/loop/SKILL.md +1 -1
package/bin/skills/loop/bin/checklist.mjs +66 -29
package/bin/skills/loop/bin/loop-init.mjs +4 -1
package/bin/skills/loop/bin/stop-gate.mjs +24 -3
package/bin/skills/loop/test/test-checklist.mjs +30 -9
package/dist/cli.mjs +1 -1
package/dist/{package-D6yiDaV4.mjs → package-B5rxAK5Z.mjs} +1 -1
package/package.json +1 -1

package/bin/skills/loop/SKILL.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: loop
-version: 0.3.2
+version: 0.4.0
 description: Run a task as a reliable, self-verifying loop — iterate until objective exit conditions are met, with an independent evaluator instead of self-judging. Use when a task needs repeated iterations until "done" (fix until tests pass, refactor until clean, build until a spec is met, autonomous long-running work).
 ---

package/bin/skills/loop/bin/checklist.mjs CHANGED Viewed

@@ -1,20 +1,49 @@
-// checklist.mjs — the loop-engineering task/criteria atom.
-// See docs/svamp-loop-engineering-vision.md. A checklist is a list of evaluable
-// items persisted as JSON, in two layered scopes:
+// checklist.mjs — the loop-engineering checklist atom, gate side.
+// See docs/checklist-atom-spec.md + docs/svamp-loop-engineering-vision.md. A checklist
+// is a list of evaluable goal items persisted as JSON, in two layered scopes:
 //   session: <loopDir>/checklist.json             (this session's goal)
 //   project: <projectDir>/.svamp/checklist.json   (durable invariants, all sessions)
-// The effective checklist a session enforces = project ∪ session. Each item is
-// oracle-checked (a pass/fail command) or agent-evaluated. Done ≠ gone: a passing
-// item STAYS in the list and is re-verified every loop, so it can regress to failing.
-// The supervisor only lets the turn end when ALL effective items are passing.
+// Effective checklist a session enforces = project ∪ session. Each item is oracle-checked
+// (an eval cmd) or agent/human-evaluated. Done ≠ gone: a 'done' item STAYS and is
+// re-verified every loop, so it can regress to 'blocked'. The gate lets the turn end only
+// when ALL effective items are 'done'.
+//
+// This is the GATE runtime (a .mjs skill — it cannot import the TS atom in
+// checklist/core.ts), so it mirrors the canonical vocab by value: ItemStatus +
+// canonicalChecklistStatus are kept in sync with sync/checklistModel.ts + parseMarkdown.ts.
 import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'node:fs';
 import { join, dirname } from 'node:path';
 import { execSync } from 'node:child_process';
+// CANONICAL: the session checklist lives INSIDE the loop dir at
+// <project>/.svamp/<sid>/loop/checklist.json — beside the other supervisor state
+// (loop-state.json, supervisor-verdict.json). Matches the daemon writer
+// (checklist/core.ts checklistPath) + the frontend (sync/ops.ts sessionChecklistRel).
 export function sessionChecklistPath(loopDir) { return join(loopDir, 'checklist.json'); }
 export function projectChecklistPath(projectDir) { return join(projectDir, '.svamp', 'checklist.json'); }
-const STATUSES = ['pending', 'passing', 'failing'];
+/**
+ * Map any accepted token — canonical OR the legacy loop aliases (pending/passing/failing) —
+ * to the canonical ItemStatus set. Mirrors checklistModel.canonicalChecklistStatus.
+ */
+export function canonicalChecklistStatus(raw) {
+  switch (String(raw ?? '').toLowerCase()) {
+    case 'passing': case 'done': return 'done';
+    case 'failing': case 'blocked': return 'blocked';
+    case 'pending': case 'todo': case '': return 'todo';
+    case 'active': case 'in_progress': case 'in-progress': return 'active';
+    case 'verifying': return 'verifying';
+    case 'awaiting_review': case 'awaiting-review': case 'review': return 'awaiting_review';
+    case 'rework': return 'rework';
+    default: return 'todo';
+  }
+}
+/** The oracle command for an item: the atom's eval.cmd (type:'oracle'), else legacy item.oracle. */
+function itemOracle(it) {
+  if (it?.eval?.type === 'oracle' && typeof it.eval.cmd === 'string' && it.eval.cmd.trim()) return it.eval.cmd.trim();
+  return typeof it?.oracle === 'string' && it.oracle.trim() ? it.oracle.trim() : null;
+}
 function readOne(path, scope) {
   try {
@@ -22,12 +51,16 @@ function readOne(path, scope) {
     const j = JSON.parse(readFileSync(path, 'utf-8'));
     const items = Array.isArray(j) ? j : (Array.isArray(j?.items) ? j.items : []);
     return items.map((it, i) => ({
-      id: typeof it.id === 'string' && it.id ? it.id : `${scope}-${i}`,
+      // Preserve the full atom item (eval, child, disposition, order, …) so the gate
+      // never strips fields the UI/core own; it only refreshes `status`.
+      ...it,
+      id: typeof it?.id === 'string' && it.id ? it.id : `${scope}-${i}`,
       text: String(it?.text ?? '').trim(),
-      // 'done' is a friendly alias for 'passing'.
-      status: it?.status === 'done' ? 'passing' : (STATUSES.includes(it?.status) ? it.status : 'pending'),
-      oracle: typeof it?.oracle === 'string' && it.oracle.trim() ? it.oracle.trim() : null,
+      status: canonicalChecklistStatus(it?.status),
+      // transient helpers (underscored) — used for evaluation, stripped before persist:
       scope,
+      _oracle: itemOracle(it),
+      _delegated: it?.disposition === 'delegated',
     })).filter((it) => it.text);
   } catch { return []; }
 }
@@ -41,47 +74,51 @@ export function readEffectiveChecklist(loopDir, projectDir) {
 }
 /**
- * Run each item's oracle (if it has one) and return items with refreshed status.
- * Items WITHOUT an oracle keep their stored status (those are agent-evaluated, not
- * machine-checkable here). This is the per-loop regression check: a previously
- * passing item whose oracle now fails flips to 'failing'.
+ * Run each INLINE item's oracle (if any) and return items with refreshed status:
+ * oracle pass → 'done', oracle fail → 'blocked'. This is the per-loop regression check
+ * (a previously 'done' item whose oracle now fails flips to 'blocked'). Delegated items
+ * (gated by their child) and non-oracle items (agent/human-evaluated) keep their status.
  */
 export function evaluateChecklist(items, projectDir, timeoutSec = 600) {
   return items.map((it) => {
-    if (!it.oracle) return it;
+    if (it._delegated || !it._oracle) return it;
     try {
-      execSync(it.oracle, { cwd: projectDir, stdio: 'pipe', maxBuffer: 16 * 1024 * 1024, timeout: timeoutSec * 1000 });
-      return { ...it, status: 'passing' };
+      execSync(it._oracle, { cwd: projectDir, stdio: 'pipe', maxBuffer: 16 * 1024 * 1024, timeout: timeoutSec * 1000 });
+      return { ...it, status: 'done' };
     } catch {
-      return { ...it, status: 'failing' };
+      return { ...it, status: 'blocked' };
     }
   });
 }
-/** True when every effective item is passing (an empty list is trivially satisfied). */
+/** True when every effective item is 'done' (an empty list is trivially satisfied). */
 export function allPassing(items) {
-  return items.length === 0 ? true : items.every((it) => it.status === 'passing');
+  return items.length === 0 ? true : items.every((it) => it.status === 'done');
 }
 /** A one-line summary for the gate's history/state. */
 export function summarize(items) {
-  const pass = items.filter((i) => i.status === 'passing').length;
-  const fail = items.filter((i) => i.status === 'failing').length;
-  return `${pass}/${items.length} passing${fail ? `, ${fail} failing` : ''}`;
+  const done = items.filter((i) => i.status === 'done').length;
+  const blocked = items.filter((i) => i.status === 'blocked' || i.status === 'rework').length;
+  return `${done}/${items.length} done${blocked ? `, ${blocked} blocked` : ''}`;
 }
 /**
- * Persist refreshed statuses back to each scope's file, so the UI renderer + the
- * agent see live state. Writes the canonical { items: [...] } shape (scope stripped —
- * it's implied by which file the item lives in).
+ * Persist refreshed statuses back to each scope's file so the UI + agent see live state.
+ * Preserves the full atom item shape — only the transient helper fields (_scope/_oracle/
+ * _delegated) are stripped; everything else (eval, child, disposition, order, …) round-trips.
  */
 export function writeChecklistStatuses(loopDir, projectDir, items) {
+  const strip = (it) => {
+    const { scope: _s, _oracle, _delegated, ...rest } = it;
+    return rest;
+  };
   const targets = [
     ['session', sessionChecklistPath(loopDir)],
     ['project', projectChecklistPath(projectDir)],
   ];
   for (const [scope, path] of targets) {
-    const scoped = items.filter((it) => it.scope === scope).map(({ scope: _s, ...rest }) => rest);
+    const scoped = items.filter((it) => it.scope === scope).map(strip);
     if (scoped.length === 0 && !existsSync(path)) continue; // don't create empty files
     try {
       mkdirSync(dirname(path), { recursive: true });

package/bin/skills/loop/bin/loop-init.mjs CHANGED Viewed

@@ -58,7 +58,7 @@ rmSync(join(loopDir, 'evaluator-verdict.json'), { force: true });
 rmSync(join(loopDir, 'history.jsonl'), { force: true });
 // 1. Copy hook scripts so the project is self-contained.
-for (const f of ['state-fp.mjs', 'stop-gate.mjs', 'inject-loop.mjs', 'loop-status.mjs', 'precompact.mjs']) {
+for (const f of ['state-fp.mjs', 'stop-gate.mjs', 'checklist.mjs', 'inject-loop.mjs', 'loop-status.mjs', 'precompact.mjs']) {
   const dest = join(binDir, f);
   copyFileSync(join(HERE, f), dest);
   try { chmodSync(dest, 0o755); } catch {}
@@ -71,6 +71,9 @@ const config = {
   // and resolve their own dir relatively) read this to run the oracle + fingerprint the
   // work product, since their depth no longer encodes the project root.
   project_dir: dir,
+  // The success contract — the durable thing the gate judges against. Read by the daemon
+  // to populate the supervision:verdict event (docs/supervisor-gate-design.md).
+  ...(criteria ? { criteria: criteria.trim() } : {}),
   oracle: oracle ? { command: oracle, timeout_sec: 600 } : null,
   evaluator: { enabled: evaluatorOn, model },
   max_iterations: max,

package/bin/skills/loop/bin/stop-gate.mjs CHANGED Viewed

@@ -16,6 +16,7 @@ import { readFileSync, writeFileSync, renameSync, existsSync, appendFileSync, st
 import { dirname, join, resolve, relative } from 'node:path';
 import { fileURLToPath } from 'node:url';
 import { stateFingerprint } from './state-fp.mjs';
+import { readEffectiveChecklist, evaluateChecklist, allPassing, summarize, writeChecklistStatuses } from './checklist.mjs';
 const HERE = dirname(fileURLToPath(import.meta.url));
 // Resolve the loop home from the per-process env the daemon injects
@@ -131,7 +132,26 @@ if (evaluatorOn) {
   }
 }
-const done = oraclePass && evaluatorPass;
+// --- (3) Checklist (the loop-engineering criteria atom) -----------------
+// The effective checklist = project invariants ∪ session goals. Each item with an
+// oracle is re-evaluated here (regression check); refreshed statuses are persisted
+// so the UI + agent see live state. No-op when no checklist.json exists anywhere
+// (allPassing([]) === true) — fully backward-compatible with criteria-only loops.
+let checklistPass = true;
+let checklistDetail = 'no checklist';
+try {
+  const items = evaluateChecklist(readEffectiveChecklist(LOOP_DIR, PROJECT), PROJECT);
+  if (items.length > 0) {
+    writeChecklistStatuses(LOOP_DIR, PROJECT, items);
+    checklistPass = allPassing(items);
+    const notDone = items.filter((i) => i.status !== 'done');
+    checklistDetail = checklistPass
+      ? `checklist: ${summarize(items)} — all done`
+      : `checklist: ${summarize(items)}\n--- not yet done ---\n${notDone.map((i) => `[${i.scope}] ${i.text}${i._oracle ? ` (oracle: ${i._oracle})` : ''}`).join('\n')}`;
+  }
+} catch { /* checklist is best-effort; never let it trap the gate */ }
+const done = oraclePass && evaluatorPass && checklistPass;
 // --- Decide -------------------------------------------------------------
 const now = new Date().toISOString();
@@ -170,7 +190,7 @@ if (giveUp) {
 }
 writeJSONAtomic(STATE, { ...state, iteration: nextIter, phase: 'continue',
-  last_iteration_at: now, last_oracle: oracleDetail, last_eval: evaluatorDetail, ...tokenField });
+  last_iteration_at: now, last_oracle: oracleDetail, last_eval: evaluatorDetail, last_checklist: checklistDetail, ...tokenField });
 appendHistory({ ts: now, iteration: nextIter, decision: 'continue', oracle: oraclePass, evaluator: evaluatorPass, detail: oraclePass ? evaluatorDetail : oracleDetail });
@@ -180,4 +200,5 @@ const STATEFP_REL = relative(PROJECT, join(LOOP_DIR, 'bin', 'state-fp.mjs')) ||
 const evalHint = evaluatorOn && !evaluatorPass && oraclePass
   ? `\n\nThe code looks like it may be ready, but you must get an independent verdict: spawn the \`loop-evaluator\` subagent (or a fresh Task agent with a skeptical reviewer prompt) to judge the current diff against LOOP.md, then write its result to \`${VERDICT_REL}\` as {"verdict":"done"|"continue","reason":"...","guidance":"...","state_fp":"<run: node ${STATEFP_REL}>"}. Do not write the verdict yourself.`
   : '';
-block(`Loop is not complete${remaining}. Keep working on the task in LOOP.md.\n\n${oracleDetail}\n${evaluatorOn ? '\n' + evaluatorDetail : ''}${evalHint}\n\nUpdate LOOP.md progress, fix the blocking issue, then finish your turn again to be re-checked.`);
+const checklistHint = !checklistPass ? `\n\n${checklistDetail}\nWork the items above until each one's oracle passes; finished items must stay green (regressions re-open).` : '';
+block(`Loop is not complete${remaining}. Keep working on the task in LOOP.md.\n\n${oracleDetail}\n${evaluatorOn ? '\n' + evaluatorDetail : ''}${checklistHint}${evalHint}\n\nUpdate LOOP.md progress, fix the blocking issue, then finish your turn again to be re-checked.`);

package/bin/skills/loop/test/test-checklist.mjs CHANGED Viewed

@@ -19,7 +19,7 @@ mkdirSync(join(root, '.svamp'), { recursive: true });
 console.log('scope merge + normalization');
 writeFileSync(projectChecklistPath(projectDir), JSON.stringify({ items: [
-  { text: 'tests pass', oracle: 'true', status: 'passing' },
+  { text: 'tests pass', oracle: 'true', status: 'done' },
 ] }));
 writeFileSync(sessionChecklistPath(loopDir), JSON.stringify({ items: [
   { text: 'add feature', status: 'done' },              // 'done' alias → passing
@@ -30,22 +30,22 @@ let eff = readEffectiveChecklist(loopDir, projectDir);
 eq(eff.length, 3, 'effective = project ∪ session, blanks dropped');
 eq(eff[0].scope, 'project', 'project items come first');
 eq(eff[0].text, 'tests pass', 'project item text');
-eq(eff[1].status, 'passing', "'done' normalized to passing");
+eq(eff[1].status, 'done', "'done' normalized to done");
 ok(eff.map(i => i.scope).join(',') === 'project,session,session', 'scope tags correct');
 console.log('evaluate — oracle pass/fail drives status (regression check)');
 const evaluated = evaluateChecklist(eff, projectDir);
-eq(evaluated.find(i => i.text === 'tests pass').status, 'passing', 'oracle `true` → passing');
-eq(evaluated.find(i => i.text === 'no TODOs').status, 'failing', 'oracle `false` → failing');
-eq(evaluated.find(i => i.text === 'add feature').status, 'passing', 'no-oracle item keeps stored status');
+eq(evaluated.find(i => i.text === 'tests pass').status, 'done', 'oracle `true` → passing');
+eq(evaluated.find(i => i.text === 'no TODOs').status, 'blocked', 'oracle `false` → failing');
+eq(evaluated.find(i => i.text === 'add feature').status, 'done', 'no-oracle item keeps stored status');
 console.log('allPassing gate');
-ok(!allPassing(evaluated), 'not all passing while one oracle fails');
+ok(!allPassing(evaluated), 'not all done while one oracle fails');
 ok(allPassing([]), 'empty list is trivially satisfied');
-ok(allPassing(evaluated.map(i => ({ ...i, status: 'passing' }))), 'all passing → true');
+ok(allPassing(evaluated.map(i => ({ ...i, status: 'done' }))), 'all done → true');
 console.log('summarize');
-ok(summarize(evaluated).startsWith('2/3 passing'), `summary reads "${summarize(evaluated)}"`);
+ok(summarize(evaluated).startsWith('2/3 done'), `summary reads "${summarize(evaluated)}"`);
 console.log('persist statuses back to the right scope files');
 writeChecklistStatuses(loopDir, projectDir, evaluated);
@@ -54,12 +54,33 @@ const sess = JSON.parse(readFileSync(sessionChecklistPath(loopDir), 'utf-8'));
 eq(proj.items.length, 1, 'project file holds only project items');
 eq(sess.items.length, 2, 'session file holds only session items');
 ok(proj.items[0].scope === undefined, 'scope stripped from persisted file');
-ok(sess.items.find(i => i.text === 'no TODOs').status === 'failing', 'failing status persisted (UI will show it)');
+ok(sess.items.find(i => i.text === 'no TODOs').status === 'blocked', 'blocked status persisted (UI will show it)');
 // regression: a re-read after persist is stable
 const reEff = readEffectiveChecklist(loopDir, projectDir);
 eq(reEff.length, 3, 're-read after persist is stable');
+console.log('canonical atom shape — eval.cmd oracle, disposition, ItemStatus, field round-trip');
+const root2 = mkdtempSync(join(tmpdir(), 'cl-atom-'));
+const loopDir2 = join(root2, '.svamp', 'sessA', 'loop');
+mkdirSync(loopDir2, { recursive: true });
+writeFileSync(sessionChecklistPath(loopDir2), JSON.stringify({ items: [
+  { id: 'a', text: 'build green', disposition: 'inline', eval: { type: 'oracle', cmd: 'true' }, status: 'todo', order: 0 },
+  { id: 'b', text: 'lint clean', disposition: 'inline', eval: { type: 'oracle', cmd: 'false' }, status: 'todo' },
+  { id: 'c', text: 'ship the API', disposition: 'delegated', status: 'active', child: { sessionId: 'x', branch: 'feat/api' } },
+] }));
+const atom = evaluateChecklist(readEffectiveChecklist(loopDir2, root2), root2);
+eq(atom.find(i => i.id === 'a').status, 'done', 'eval.cmd `true` → done');
+eq(atom.find(i => i.id === 'b').status, 'blocked', 'eval.cmd `false` → blocked');
+eq(atom.find(i => i.id === 'c').status, 'active', 'delegated item NOT oracle-evaluated (child-gated), keeps status');
+ok(!allPassing(atom), 'not all done while an inline oracle fails');
+writeChecklistStatuses(loopDir2, root2, atom);
+const persisted = JSON.parse(readFileSync(sessionChecklistPath(loopDir2), 'utf-8')).items;
+const cItem = persisted.find(i => i.id === 'c');
+ok(cItem.disposition === 'delegated' && cItem.child?.branch === 'feat/api', 'atom fields (disposition/child) round-trip — gate never strips them');
+ok(persisted.find(i => i.id === 'a').eval?.cmd === 'true' && !('_oracle' in persisted.find(i => i.id === 'a')), 'eval preserved, transient _oracle stripped');
+rmSync(root2, { recursive: true, force: true });
 rmSync(root, { recursive: true, force: true });
 console.log(`\nchecklist: ${passed} passed, ${failed} failed`);
 process.exit(failed ? 1 : 0);

package/dist/cli.mjs CHANGED Viewed

@@ -390,7 +390,7 @@ async function main() {
   } else if (!subcommand || subcommand === "start") {
     await handleInteractiveCommand();
   } else if (subcommand === "--version" || subcommand === "-v") {
-    const pkg = await import('./package-D6yiDaV4.mjs').catch(() => ({ default: { version: "unknown" } }));
+    const pkg = await import('./package-B5rxAK5Z.mjs').catch(() => ({ default: { version: "unknown" } }));
     console.log(`svamp version: ${pkg.default.version}`);
   } else {
     console.error(`Unknown command: ${subcommand}`);

package/dist/{package-D6yiDaV4.mjs → package-B5rxAK5Z.mjs} RENAMED Viewed

@@ -1,5 +1,5 @@
 var name = "svamp-cli";
-var version = "0.2.121";
+var version = "0.2.124";
 var description = "Svamp CLI — AI workspace daemon on Hypha Cloud";
 var author = "Amun AI AB";
 var license = "SEE LICENSE IN LICENSE";

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "svamp-cli",
-  "version": "0.2.122",
+  "version": "0.2.124",
   "description": "Svamp CLI — AI workspace daemon on Hypha Cloud",
   "author": "Amun AI AB",
   "license": "SEE LICENSE IN LICENSE",