npm - svamp-cli - Versions diffs - 0.2.118 → 0.2.120 - Mend

svamp-cli 0.2.118 → 0.2.120

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/bin/skills/loop/SKILL.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: loop
-version: 0.3.2
+version: 0.4.0
 description: Run a task as a reliable, self-verifying loop — iterate until objective exit conditions are met, with an independent evaluator instead of self-judging. Use when a task needs repeated iterations until "done" (fix until tests pass, refactor until clean, build until a spec is met, autonomous long-running work).
 ---

package/bin/skills/loop/bin/checklist.mjs ADDED Viewed

@@ -0,0 +1,94 @@
+// checklist.mjs — the loop-engineering task/criteria atom.
+// See docs/svamp-loop-engineering-vision.md. A checklist is a list of evaluable
+// items persisted as JSON, in two layered scopes:
+//   session: <loopDir>/checklist.json             (this session's goal)
+//   project: <projectDir>/.svamp/checklist.json   (durable invariants, all sessions)
+// The effective checklist a session enforces = project ∪ session. Each item is
+// oracle-checked (a pass/fail command) or agent-evaluated. Done ≠ gone: a passing
+// item STAYS in the list and is re-verified every loop, so it can regress to failing.
+// The supervisor only lets the turn end when ALL effective items are passing.
+import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'node:fs';
+import { join, dirname } from 'node:path';
+import { execSync } from 'node:child_process';
+// The session checklist lives at <project>/.svamp/<sid>/checklist.json — a SIBLING of
+// the loop/ dir, not inside it (the checklist is session-scoped, independent of any
+// active loop, and the UI/sync writes it there too: sync/ops.ts sessionChecklistRel).
+export function sessionChecklistPath(loopDir) { return join(dirname(loopDir), 'checklist.json'); }
+export function projectChecklistPath(projectDir) { return join(projectDir, '.svamp', 'checklist.json'); }
+const STATUSES = ['pending', 'passing', 'failing'];
+function readOne(path, scope) {
+  try {
+    if (!existsSync(path)) return [];
+    const j = JSON.parse(readFileSync(path, 'utf-8'));
+    const items = Array.isArray(j) ? j : (Array.isArray(j?.items) ? j.items : []);
+    return items.map((it, i) => ({
+      id: typeof it.id === 'string' && it.id ? it.id : `${scope}-${i}`,
+      text: String(it?.text ?? '').trim(),
+      // 'done' is a friendly alias for 'passing'.
+      status: it?.status === 'done' ? 'passing' : (STATUSES.includes(it?.status) ? it.status : 'pending'),
+      oracle: typeof it?.oracle === 'string' && it.oracle.trim() ? it.oracle.trim() : null,
+      scope,
+    })).filter((it) => it.text);
+  } catch { return []; }
+}
+/** Effective checklist = project invariants ∪ session goals (project first, then session). */
+export function readEffectiveChecklist(loopDir, projectDir) {
+  return [
+    ...readOne(projectChecklistPath(projectDir), 'project'),
+    ...readOne(sessionChecklistPath(loopDir), 'session'),
+  ];
+}
+/**
+ * Run each item's oracle (if it has one) and return items with refreshed status.
+ * Items WITHOUT an oracle keep their stored status (those are agent-evaluated, not
+ * machine-checkable here). This is the per-loop regression check: a previously
+ * passing item whose oracle now fails flips to 'failing'.
+ */
+export function evaluateChecklist(items, projectDir, timeoutSec = 600) {
+  return items.map((it) => {
+    if (!it.oracle) return it;
+    try {
+      execSync(it.oracle, { cwd: projectDir, stdio: 'pipe', maxBuffer: 16 * 1024 * 1024, timeout: timeoutSec * 1000 });
+      return { ...it, status: 'passing' };
+    } catch {
+      return { ...it, status: 'failing' };
+    }
+  });
+}
+/** True when every effective item is passing (an empty list is trivially satisfied). */
+export function allPassing(items) {
+  return items.length === 0 ? true : items.every((it) => it.status === 'passing');
+}
+/** A one-line summary for the gate's history/state. */
+export function summarize(items) {
+  const pass = items.filter((i) => i.status === 'passing').length;
+  const fail = items.filter((i) => i.status === 'failing').length;
+  return `${pass}/${items.length} passing${fail ? `, ${fail} failing` : ''}`;
+}
+/**
+ * Persist refreshed statuses back to each scope's file, so the UI renderer + the
+ * agent see live state. Writes the canonical { items: [...] } shape (scope stripped —
+ * it's implied by which file the item lives in).
+ */
+export function writeChecklistStatuses(loopDir, projectDir, items) {
+  const targets = [
+    ['session', sessionChecklistPath(loopDir)],
+    ['project', projectChecklistPath(projectDir)],
+  ];
+  for (const [scope, path] of targets) {
+    const scoped = items.filter((it) => it.scope === scope).map(({ scope: _s, ...rest }) => rest);
+    if (scoped.length === 0 && !existsSync(path)) continue; // don't create empty files
+    try {
+      mkdirSync(dirname(path), { recursive: true });
+      writeFileSync(path, JSON.stringify({ items: scoped }, null, 2));
+    } catch { /* best-effort persistence */ }
+  }
+}

package/bin/skills/loop/bin/loop-init.mjs CHANGED Viewed

@@ -58,7 +58,7 @@ rmSync(join(loopDir, 'evaluator-verdict.json'), { force: true });
 rmSync(join(loopDir, 'history.jsonl'), { force: true });
 // 1. Copy hook scripts so the project is self-contained.
-for (const f of ['state-fp.mjs', 'stop-gate.mjs', 'inject-loop.mjs', 'loop-status.mjs', 'precompact.mjs']) {
+for (const f of ['state-fp.mjs', 'stop-gate.mjs', 'checklist.mjs', 'inject-loop.mjs', 'loop-status.mjs', 'precompact.mjs']) {
   const dest = join(binDir, f);
   copyFileSync(join(HERE, f), dest);
   try { chmodSync(dest, 0o755); } catch {}
@@ -71,6 +71,9 @@ const config = {
   // and resolve their own dir relatively) read this to run the oracle + fingerprint the
   // work product, since their depth no longer encodes the project root.
   project_dir: dir,
+  // The success contract — the durable thing the gate judges against. Read by the daemon
+  // to populate the supervision:verdict event (docs/supervisor-gate-design.md).
+  ...(criteria ? { criteria: criteria.trim() } : {}),
   oracle: oracle ? { command: oracle, timeout_sec: 600 } : null,
   evaluator: { enabled: evaluatorOn, model },
   max_iterations: max,

package/bin/skills/loop/bin/stop-gate.mjs CHANGED Viewed

@@ -16,6 +16,7 @@ import { readFileSync, writeFileSync, renameSync, existsSync, appendFileSync, st
 import { dirname, join, resolve, relative } from 'node:path';
 import { fileURLToPath } from 'node:url';
 import { stateFingerprint } from './state-fp.mjs';
+import { readEffectiveChecklist, evaluateChecklist, allPassing, summarize, writeChecklistStatuses } from './checklist.mjs';
 const HERE = dirname(fileURLToPath(import.meta.url));
 // Resolve the loop home from the per-process env the daemon injects
@@ -131,7 +132,26 @@ if (evaluatorOn) {
   }
 }
-const done = oraclePass && evaluatorPass;
+// --- (3) Checklist (the loop-engineering criteria atom) -----------------
+// The effective checklist = project invariants ∪ session goals. Each item with an
+// oracle is re-evaluated here (regression check); refreshed statuses are persisted
+// so the UI + agent see live state. No-op when no checklist.json exists anywhere
+// (allPassing([]) === true) — fully backward-compatible with criteria-only loops.
+let checklistPass = true;
+let checklistDetail = 'no checklist';
+try {
+  const items = evaluateChecklist(readEffectiveChecklist(LOOP_DIR, PROJECT), PROJECT);
+  if (items.length > 0) {
+    writeChecklistStatuses(LOOP_DIR, PROJECT, items);
+    checklistPass = allPassing(items);
+    const failing = items.filter((i) => i.status !== 'passing');
+    checklistDetail = checklistPass
+      ? `checklist: ${summarize(items)} — all passing`
+      : `checklist: ${summarize(items)}\n--- not yet passing ---\n${failing.map((i) => `[${i.scope}] ${i.text}${i.oracle ? ` (oracle: ${i.oracle})` : ''}`).join('\n')}`;
+  }
+} catch { /* checklist is best-effort; never let it trap the gate */ }
+const done = oraclePass && evaluatorPass && checklistPass;
 // --- Decide -------------------------------------------------------------
 const now = new Date().toISOString();
@@ -170,7 +190,7 @@ if (giveUp) {
 }
 writeJSONAtomic(STATE, { ...state, iteration: nextIter, phase: 'continue',
-  last_iteration_at: now, last_oracle: oracleDetail, last_eval: evaluatorDetail, ...tokenField });
+  last_iteration_at: now, last_oracle: oracleDetail, last_eval: evaluatorDetail, last_checklist: checklistDetail, ...tokenField });
 appendHistory({ ts: now, iteration: nextIter, decision: 'continue', oracle: oraclePass, evaluator: evaluatorPass, detail: oraclePass ? evaluatorDetail : oracleDetail });
@@ -180,4 +200,5 @@ const STATEFP_REL = relative(PROJECT, join(LOOP_DIR, 'bin', 'state-fp.mjs')) ||
 const evalHint = evaluatorOn && !evaluatorPass && oraclePass
   ? `\n\nThe code looks like it may be ready, but you must get an independent verdict: spawn the \`loop-evaluator\` subagent (or a fresh Task agent with a skeptical reviewer prompt) to judge the current diff against LOOP.md, then write its result to \`${VERDICT_REL}\` as {"verdict":"done"|"continue","reason":"...","guidance":"...","state_fp":"<run: node ${STATEFP_REL}>"}. Do not write the verdict yourself.`
   : '';
-block(`Loop is not complete${remaining}. Keep working on the task in LOOP.md.\n\n${oracleDetail}\n${evaluatorOn ? '\n' + evaluatorDetail : ''}${evalHint}\n\nUpdate LOOP.md progress, fix the blocking issue, then finish your turn again to be re-checked.`);
+const checklistHint = !checklistPass ? `\n\n${checklistDetail}\nWork the items above until each one's oracle passes; finished items must stay green (regressions re-open).` : '';
+block(`Loop is not complete${remaining}. Keep working on the task in LOOP.md.\n\n${oracleDetail}\n${evaluatorOn ? '\n' + evaluatorDetail : ''}${checklistHint}${evalHint}\n\nUpdate LOOP.md progress, fix the blocking issue, then finish your turn again to be re-checked.`);

package/bin/skills/loop/test/test-checklist.mjs ADDED Viewed

@@ -0,0 +1,65 @@
+// test-checklist.mjs — the loop-engineering checklist atom (read/merge/evaluate/persist).
+import { mkdtempSync, mkdirSync, writeFileSync, readFileSync, existsSync, rmSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import {
+  readEffectiveChecklist, evaluateChecklist, allPassing, summarize,
+  writeChecklistStatuses, sessionChecklistPath, projectChecklistPath,
+} from '../bin/checklist.mjs';
+let passed = 0, failed = 0;
+function ok(cond, msg) { if (cond) { passed++; console.log(`  ✓ ${msg}`); } else { failed++; console.log(`  ✗ ${msg}`); } }
+function eq(a, b, msg) { ok(JSON.stringify(a) === JSON.stringify(b), `${msg} (got ${JSON.stringify(a)})`); }
+const root = mkdtempSync(join(tmpdir(), 'cl-test-'));
+const projectDir = root;
+const loopDir = join(root, '.svamp', 'sess1', 'loop');
+mkdirSync(loopDir, { recursive: true });
+mkdirSync(join(root, '.svamp'), { recursive: true });
+console.log('scope merge + normalization');
+writeFileSync(projectChecklistPath(projectDir), JSON.stringify({ items: [
+  { text: 'tests pass', oracle: 'true', status: 'passing' },
+] }));
+writeFileSync(sessionChecklistPath(loopDir), JSON.stringify({ items: [
+  { text: 'add feature', status: 'done' },              // 'done' alias → passing
+  { text: '   ', status: 'pending' },                    // blank → dropped
+  { text: 'no TODOs', oracle: 'false' },                 // defaults to pending
+] }));
+let eff = readEffectiveChecklist(loopDir, projectDir);
+eq(eff.length, 3, 'effective = project ∪ session, blanks dropped');
+eq(eff[0].scope, 'project', 'project items come first');
+eq(eff[0].text, 'tests pass', 'project item text');
+eq(eff[1].status, 'passing', "'done' normalized to passing");
+ok(eff.map(i => i.scope).join(',') === 'project,session,session', 'scope tags correct');
+console.log('evaluate — oracle pass/fail drives status (regression check)');
+const evaluated = evaluateChecklist(eff, projectDir);
+eq(evaluated.find(i => i.text === 'tests pass').status, 'passing', 'oracle `true` → passing');
+eq(evaluated.find(i => i.text === 'no TODOs').status, 'failing', 'oracle `false` → failing');
+eq(evaluated.find(i => i.text === 'add feature').status, 'passing', 'no-oracle item keeps stored status');
+console.log('allPassing gate');
+ok(!allPassing(evaluated), 'not all passing while one oracle fails');
+ok(allPassing([]), 'empty list is trivially satisfied');
+ok(allPassing(evaluated.map(i => ({ ...i, status: 'passing' }))), 'all passing → true');
+console.log('summarize');
+ok(summarize(evaluated).startsWith('2/3 passing'), `summary reads "${summarize(evaluated)}"`);
+console.log('persist statuses back to the right scope files');
+writeChecklistStatuses(loopDir, projectDir, evaluated);
+const proj = JSON.parse(readFileSync(projectChecklistPath(projectDir), 'utf-8'));
+const sess = JSON.parse(readFileSync(sessionChecklistPath(loopDir), 'utf-8'));
+eq(proj.items.length, 1, 'project file holds only project items');
+eq(sess.items.length, 2, 'session file holds only session items');
+ok(proj.items[0].scope === undefined, 'scope stripped from persisted file');
+ok(sess.items.find(i => i.text === 'no TODOs').status === 'failing', 'failing status persisted (UI will show it)');
+// regression: a re-read after persist is stable
+const reEff = readEffectiveChecklist(loopDir, projectDir);
+eq(reEff.length, 3, 're-read after persist is stable');
+rmSync(root, { recursive: true, force: true });
+console.log(`\nchecklist: ${passed} passed, ${failed} failed`);
+process.exit(failed ? 1 : 0);

package/dist/{agentCommands-BTkU0PQb.mjs → agentCommands-DIfofhT-.mjs} RENAMED Viewed

@@ -2,7 +2,7 @@ import { existsSync, readFileSync, mkdirSync, writeFileSync, renameSync } from '
 import { join, dirname } from 'node:path';
 import os from 'node:os';
 import { requireNotSandboxed } from './sandboxDetect-DNTcbgWD.mjs';
-import { n as shortId } from './run-9C2ogsuu.mjs';
+import { n as shortId } from './run-C4BsPJ_p.mjs';
 import 'os';
 import 'fs/promises';
 import 'fs';
@@ -96,7 +96,7 @@ async function sessionSetTitle(title) {
 }
 async function sessionSetProjectDescription(description) {
   const dir = process.cwd();
-  const { projectName, writeProjectInfo, sanitizeDescription, projectInfoPath } = await import('./run-9C2ogsuu.mjs').then(function (n) { return n.T; });
+  const { projectName, writeProjectInfo, sanitizeDescription, projectInfoPath } = await import('./run-C4BsPJ_p.mjs').then(function (n) { return n.W; });
   const desc = sanitizeDescription(description, 240);
   if (!desc) {
     console.error("Project description is empty.");
@@ -180,7 +180,7 @@ async function sessionBroadcast(action, args) {
   console.log(`Broadcast sent: ${action}`);
 }
 async function connectToMachineService() {
-  const { connectAndGetMachine } = await import('./commands-B5rek8XG.mjs');
+  const { connectAndGetMachine } = await import('./commands-CuY9G_88.mjs');
   return connectAndGetMachine();
 }
 async function inboxSend(targetSessionId, opts) {
@@ -197,7 +197,7 @@ async function inboxSend(targetSessionId, opts) {
   }
   const { server, machine } = await connectToMachineService();
   try {
-    const { resolveSessionId } = await import('./commands-B5rek8XG.mjs');
+    const { resolveSessionId } = await import('./commands-CuY9G_88.mjs');
     const sessions = await machine.listSessions();
     const match = resolveSessionId(sessions, targetSessionId);
     const fullTargetId = match.sessionId;

package/dist/{auth-DimbhOMP.mjs → auth-zcVYRjJ8.mjs} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { F as resolveModel } from './run-9C2ogsuu.mjs';
+import { F as resolveModel } from './run-C4BsPJ_p.mjs';
 import 'os';
 import 'fs/promises';
 import 'fs';