npm - nubos-pilot - Versions diffs - 1.2.1 → 1.2.3 - Mend

nubos-pilot 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

package/CHANGELOG.md +43 -1
package/agents/np-architect.md +2 -0
package/agents/np-executor.md +1 -1
package/agents/np-learnings-extractor.md +54 -0
package/agents/np-planner.md +1 -1
package/agents/np-security-reviewer.md +9 -0
package/bin/np-tools/_commands.cjs +5 -0
package/bin/np-tools/derive-tier.cjs +86 -0
package/bin/np-tools/derive-tier.test.cjs +83 -0
package/bin/np-tools/doctor.cjs +15 -2
package/bin/np-tools/graph-impact.cjs +111 -0
package/bin/np-tools/graph-impact.test.cjs +119 -0
package/bin/np-tools/learnings.cjs +105 -0
package/bin/np-tools/learnings.test.cjs +66 -0
package/bin/np-tools/loop-run-round.cjs +7 -1
package/bin/np-tools/scan-codebase.cjs +21 -1
package/bin/np-tools/skill-audit.cjs +79 -0
package/bin/np-tools/skill-audit.test.cjs +86 -0
package/bin/np-tools/verify-reliability.cjs +65 -0
package/bin/np-tools/verify-reliability.test.cjs +69 -0
package/lib/agents.test.cjs +1 -0
package/lib/checkpoint.cjs +3 -0
package/lib/codebase-graph.cjs +0 -0
package/lib/codebase-graph.test.cjs +174 -0
package/lib/codebase-manifest.cjs +3 -0
package/lib/config-defaults.cjs +13 -0
package/lib/config-schema.cjs +11 -0
package/lib/eval-reliability.cjs +63 -0
package/lib/eval-reliability.test.cjs +56 -0
package/lib/install/claude-hooks-learnings.test.cjs +82 -0
package/lib/install/claude-hooks.cjs +65 -4
package/lib/install/claude-hooks.test.cjs +5 -2
package/lib/learnings/capture-ledger.cjs +80 -0
package/lib/learnings/capture-ledger.test.cjs +54 -0
package/lib/learnings/extract.cjs +191 -0
package/lib/learnings/extract.test.cjs +115 -0
package/lib/learnings.cjs +19 -95
package/lib/memory.cjs +38 -33
package/lib/messaging.cjs +12 -6
package/lib/metrics-aggregate.cjs +14 -2
package/lib/migrate.cjs +29 -0
package/lib/migrate.test.cjs +91 -0
package/lib/nubosloop-audit.cjs +104 -0
package/lib/nubosloop-skill-audit.test.cjs +98 -0
package/lib/nubosloop.cjs +9 -0
package/lib/schemas/data/checkpoint.v1.json +13 -0
package/lib/schemas/data/codebase-manifest.v1.json +22 -0
package/lib/schemas/data/learnings.v1.json +28 -0
package/lib/schemas/data/memory-manifest.v1.json +14 -0
package/lib/schemas/data/memory-record.v1.json +16 -0
package/lib/schemas/data/message.v1.json +19 -0
package/lib/schemas/data/metrics-record.v1.json +11 -0
package/lib/tier-classify.cjs +67 -0
package/lib/tier-classify.test.cjs +67 -0
package/lib/validate.cjs +301 -0
package/lib/validate.test.cjs +242 -0
package/np-tools.cjs +5 -0
package/package.json +3 -1
package/skills/np-access-control/SKILL.md +42 -0
package/skills/np-accessibility-audit/SKILL.md +41 -0
package/skills/np-adr/SKILL.md +37 -0
package/skills/np-api-design/SKILL.md +34 -0
package/skills/np-caching-strategy/SKILL.md +38 -0
package/skills/np-data-modeling/SKILL.md +37 -0
package/skills/np-data-privacy/SKILL.md +39 -0
package/skills/np-dependency-audit/SKILL.md +47 -0
package/skills/np-encryption/SKILL.md +47 -0
package/skills/np-error-handling/SKILL.md +37 -0
package/skills/np-incident-response/SKILL.md +38 -0
package/skills/np-llm-app-architecture/SKILL.md +50 -0
package/skills/np-observability/SKILL.md +39 -0
package/skills/np-performance/SKILL.md +38 -0
package/skills/np-queue-design/SKILL.md +32 -0
package/skills/np-rag-design/SKILL.md +43 -0
package/skills/np-refactoring/SKILL.md +35 -0
package/skills/np-resilience-patterns/SKILL.md +39 -0
package/skills/np-secure-code-review/SKILL.md +46 -0
package/skills/np-secure-design/SKILL.md +44 -0
package/skills/np-service-boundary/SKILL.md +35 -0
package/skills/np-system-design/SKILL.md +40 -0
package/skills/np-test-strategy/SKILL.md +46 -0
package/skills/np-threat-model/SKILL.md +42 -0
package/templates/claude/payload/hooks/np-learnings-hook.cjs +55 -0
package/workflows/architect-phase.md +21 -1
package/workflows/execute-phase.md +66 -4
package/workflows/verify-work.md +17 -4

package/bin/np-tools/learnings.cjs ADDED Viewed

@@ -0,0 +1,105 @@
+'use strict';
+const path = require('node:path');
+const child_process = require('node:child_process');
+const { tryReadConfigPath } = require('../../lib/config.cjs');
+const ledger = require('../../lib/learnings/capture-ledger.cjs');
+const extract = require('../../lib/learnings/extract.cjs');
+const args = require('./_args.cjs');
+function _readStdin() {
+  return new Promise((resolve) => {
+    if (process.stdin.isTTY) return resolve('');
+    let buf = '';
+    process.stdin.setEncoding('utf-8');
+    const timer = setTimeout(() => { try { process.stdin.removeAllListeners(); } catch {} resolve(buf); }, 800);
+    process.stdin.on('data', (c) => { buf += c; });
+    process.stdin.on('end', () => { clearTimeout(timer); resolve(buf); });
+    process.stdin.on('error', () => { clearTimeout(timer); resolve(buf); });
+  });
+}
+function _safeParse(s) { try { return s ? JSON.parse(s) : {}; } catch { return {}; } }
+async function _payload(argv) {
+  const inline = args.getFlag(argv, '--payload', { allowDashValues: true });
+  if (inline !== undefined) return _safeParse(inline);
+  if (argv.includes('--stdin')) return _safeParse(await _readStdin());
+  return {};
+}
+function _cfg(cwd) {
+  return {
+    auto_capture: tryReadConfigPath(cwd, 'learnings.auto_capture', true) !== false,
+    max_per_hour: Number(tryReadConfigPath(cwd, 'learnings.max_captures_per_hour', 10)) || 10,
+    max_in_a_row: Number(tryReadConfigPath(cwd, 'learnings.max_in_a_row', 3)) || 3,
+    timeout_ms: Number(tryReadConfigPath(cwd, 'learnings.timeout_ms', 120000)) || 120000,
+    max_files: Number(tryReadConfigPath(cwd, 'learnings.max_files', 30)) || 30,
+  };
+}
+function _spawnWorker(cwd, sid) {
+  const npTools = path.join(__dirname, '..', '..', 'np-tools.cjs');
+  try {
+    const child = child_process.spawn(
+      process.execPath,
+      [npTools, 'learnings', 'run-extract', '--session', sid],
+      { cwd, detached: true, stdio: 'ignore' },
+    );
+    child.unref();
+    return true;
+  } catch { return false; }
+}
+function _emit(stdout, obj) { stdout.write(JSON.stringify(obj) + '\n'); }
+async function run(argv, ctx) {
+  const context = ctx || {};
+  const cwd = context.cwd || process.cwd();
+  const stdout = context.stdout || process.stdout;
+  const list = Array.isArray(argv) ? argv : [];
+  const verb = list[0];
+  const cfg = _cfg(cwd);
+  // 'reset' (UserPromptSubmit) and 'run-extract' (background worker) are not
+  // gated by auto_capture so they keep working coherently, but 'capture' is.
+  if (verb === 'capture') {
+    if (!cfg.auto_capture) { _emit(stdout, { captured: false, reason: 'disabled' }); return 0; }
+    const payload = await _payload(list);
+    const sid = payload.session_id || args.getFlag(list, '--session') || '';
+    if (!sid) { _emit(stdout, { captured: false, reason: 'no-session' }); return 0; }
+    const gate = ledger.tryRecordCapture(sid, { maxPerHour: cfg.max_per_hour, maxStreak: cfg.max_in_a_row });
+    if (!gate.allowed) { _emit(stdout, { captured: false, reason: gate.reason }); return 0; }
+    _spawnWorker(cwd, sid);
+    _emit(stdout, { captured: true, spawned: true });
+    return 0;
+  }
+  if (verb === 'reset') {
+    const payload = await _payload(list);
+    const sid = payload.session_id || args.getFlag(list, '--session') || '';
+    if (sid) ledger.resetStreak(sid);
+    return 0;
+  }
+  if (verb === 'run-extract') {
+    const sid = args.getFlag(list, '--session') || '';
+    try {
+      const result = extract.runExtract({ cwd, sid, config: cfg });
+      _emit(stdout, result);
+    } catch (err) {
+      _emit(stdout, { ran: false, reason: 'error', error: String(err && err.code || err) });
+    }
+    return 0;
+  }
+  _emit(stdout, { error: 'unknown-verb', verb: verb || null, verbs: ['capture', 'reset', 'run-extract'] });
+  return verb ? 1 : 0;
+}
+module.exports = { run };
+if (require.main === module) {
+  run(process.argv.slice(3)).then((c) => process.exit(c)).catch(() => process.exit(0));
+}

package/bin/np-tools/learnings.test.cjs ADDED Viewed

@@ -0,0 +1,66 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const { run } = require('./learnings.cjs');
+function _capture() {
+  const out = { text: '' };
+  return { stdout: { write: (s) => { out.text += s; return true; } }, out };
+}
+test('LV-1: capture with no session → no-session, no spawn', async () => {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-lv-'));
+  try {
+    const c = _capture();
+    c.cwd = dir;
+    const code = await run(['capture'], c);
+    assert.strictEqual(code, 0);
+    assert.match(c.out.text, /no-session/);
+  } finally { fs.rmSync(dir, { recursive: true, force: true }); }
+});
+test('LV-2: capture disabled via config → disabled, no spawn', async () => {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-lv-'));
+  try {
+    fs.mkdirSync(path.join(dir, '.nubos-pilot'), { recursive: true });
+    fs.writeFileSync(
+      path.join(dir, '.nubos-pilot', 'config.json'),
+      JSON.stringify({ learnings: { auto_capture: false } }),
+    );
+    const c = _capture();
+    c.cwd = dir;
+    const code = await run(['capture', '--payload', JSON.stringify({ session_id: 'abc' })], c);
+    assert.strictEqual(code, 0);
+    assert.match(c.out.text, /disabled/);
+  } finally { fs.rmSync(dir, { recursive: true, force: true }); }
+});
+test('LV-3: run-extract on a non-repo cwd → ran:false not-a-repo', async () => {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-lv-'));
+  try {
+    const c = _capture();
+    c.cwd = dir;
+    const code = await run(['run-extract', '--session', 'abc'], c);
+    assert.strictEqual(code, 0);
+    const parsed = JSON.parse(c.out.text);
+    assert.strictEqual(parsed.ran, false);
+    assert.strictEqual(parsed.reason, 'not-a-repo');
+  } finally { fs.rmSync(dir, { recursive: true, force: true }); }
+});
+test('LV-4: unknown verb → error envelope, exit 1', async () => {
+  const c = _capture();
+  const code = await run(['bogus'], c);
+  assert.strictEqual(code, 1);
+  assert.match(c.out.text, /unknown-verb/);
+});
+test('LV-5: reset is a no-op without a session and never throws', async () => {
+  const c = _capture();
+  const code = await run(['reset'], c);
+  assert.strictEqual(code, 0);
+});

package/bin/np-tools/loop-run-round.cjs CHANGED Viewed

@@ -324,21 +324,27 @@ function _runPostCritics(taskId, list, cwd) {
         ? nubosloop.coerceMaxRounds(override)
         : opts.maxRounds;
       const auditFindings = nubosloop.auditFindingsFromAudits(prev.tool_use_audit, round, taskId);
+      const skillFindings = nubosloop.skillFindingsFromState(prev, round, taskId);
+      const combinedAudit = skillFindings.length ? auditFindings.concat(skillFindings) : auditFindings;
       evalResult = nubosloop.evaluateLoop(
         { round },
         criticOutputs,
-        { maxRounds: effectiveMax, auditFindings },
+        { maxRounds: effectiveMax, auditFindings: combinedAudit },
       );
       const perRound = (prev.findings_per_round && typeof prev.findings_per_round === 'object')
         ? safeAssign({}, prev.findings_per_round)
         : {};
       perRound[String(round)] = evalResult.findings;
       const routed = nubosloop.markAuditsRoutedInArray(prev.tool_use_audit, round);
+      const skillRoutedRounds = skillFindings.length
+        ? nubosloop.markSkillFindingsRoutedInArray(prev.skill_routed_rounds, round)
+        : (Array.isArray(prev.skill_routed_rounds) ? prev.skill_routed_rounds : []);
       const partial = {
         last_phase: 'post-critics',
         last_action: evalResult.next_action,
         findings: evalResult.findings,
         findings_per_round: perRound,
+        skill_routed_rounds: skillRoutedRounds,
         tool_use_audit: routed.audits,
       };
       if (force) partial.forced_post_critics = true;

package/bin/np-tools/scan-codebase.cjs CHANGED Viewed

@@ -18,6 +18,7 @@ const {
   moduleDocPath,
   indexDocPath,
 } = require('../../lib/codebase-docs.cjs');
+const { buildModuleGraph } = require('../../lib/codebase-graph.cjs');
 function _parseArgs(args) {
   const flags = {
@@ -73,6 +74,16 @@ function _emitPlan(projectRoot, flags, stdout) {
       projectRoot,
       path.join(projectRoot, '.nubos-pilot', 'codebase', '.hashes.json'),
     ),
+    graph_path: path.relative(
+      projectRoot,
+      path.join(projectRoot, '.nubos-pilot', 'codebase', '.graph.json'),
+    ),
+    graph: {
+      module_count: modulesResult.graph.module_count,
+      edge_count: modulesResult.graph.edge_count,
+      cycle_count: modulesResult.graph.cycles.length,
+      unresolved_internal_deps: modulesResult.graph.metrics.unresolved_internal_deps,
+    },
   }, null, 2));
 }
@@ -103,6 +114,15 @@ function _scanAndBuild(projectRoot, flags) {
   fs.mkdirSync(path.dirname(indexMapPath), { recursive: true });
   atomicWriteFileSync(indexMapPath, JSON.stringify(docIndex, null, 2) + '\n');
+  const graph = buildModuleGraph(modules.map((m) => m.facts));
+  const graphPath = path.join(
+    projectRoot,
+    '.nubos-pilot',
+    'codebase',
+    '.graph.json',
+  );
+  atomicWriteFileSync(graphPath, JSON.stringify(graph, null, 2) + '\n');
   const indexPath = indexDocPath(projectRoot);
   fs.mkdirSync(path.dirname(indexPath), { recursive: true });
   atomicWriteFileSync(indexPath, buildIndexDoc(modules, { project_name: flags.projectName || null }));
@@ -115,7 +135,7 @@ function _scanAndBuild(projectRoot, flags) {
     atomicWriteFileSync(docPath, renderModuleDoc(mod.facts, null, hashLookup));
   }
-  return { scan: scanResult, modules, manifest, hashLookup };
+  return { scan: scanResult, modules, manifest, hashLookup, graph };
 }
 function _applyProse(projectRoot, flags, stdout) {

package/bin/np-tools/skill-audit.cjs ADDED Viewed

@@ -0,0 +1,79 @@
+'use strict';
+const nubosloop = require('../../lib/nubosloop.cjs');
+const checkpoint = require('../../lib/checkpoint.cjs');
+const { TASK_ID_RE } = require('../../lib/ids.cjs');
+const args = require('./_args.cjs');
+const { NubosPilotError } = require('../../lib/core.cjs');
+function _usage() {
+  return [
+    'Usage:',
+    '  np-tools.cjs skill-audit expect   --task <id> --skills <a,b,c>   (orchestrator: record injected skills)',
+    '  np-tools.cjs skill-audit ack      --task <id> --skill <name>     (executor: stamp a consulted skill)',
+    '  np-tools.cjs skill-audit findings --task <id> [--round <n>]      (read-only: list unmet skill bars)',
+    '',
+    'Mechanical counterpart to the Rule-9 search audit: a skill injected as a task\'s',
+    'quality bar that the executor never consulted becomes a `skill-bar-unconsulted`',
+    'finding at post-critics, routing the task back to the executor (once per round).',
+  ].join('\n');
+}
+function _assertTask(taskId) {
+  args.assertMatch(taskId, TASK_ID_RE, 'skill-audit-invalid-task-id', 'taskId');
+}
+function run(argv, ctx) {
+  const context = ctx || {};
+  const cwd = context.cwd || process.cwd();
+  const stdout = context.stdout || process.stdout;
+  const stderr = context.stderr || process.stderr;
+  const list = Array.isArray(argv) ? argv : [];
+  const verb = list[0];
+  const tail = list.slice(1);
+  if (!verb || verb === '-h' || verb === '--help') { stdout.write(_usage() + '\n'); return 0; }
+  try {
+    if (verb === 'expect') {
+      const taskId = args.getFlag(tail, '--task');
+      _assertTask(taskId);
+      const raw = args.getFlag(tail, '--skills') || '';
+      const skills = String(raw).split(',').map((s) => s.trim()).filter(Boolean);
+      const res = nubosloop.recordExpectedSkills(taskId, skills, cwd);
+      stdout.write(JSON.stringify(res) + '\n');
+      return 0;
+    }
+    if (verb === 'ack') {
+      const taskId = args.getFlag(tail, '--task');
+      _assertTask(taskId);
+      const skill = args.getFlag(tail, '--skill');
+      if (!skill) throw new NubosPilotError('skill-audit-missing-skill', 'ack requires --skill <name>', {});
+      const res = nubosloop.recordSkillEvidence(taskId, skill, cwd);
+      stdout.write(JSON.stringify(res) + '\n');
+      return 0;
+    }
+    if (verb === 'findings') {
+      const taskId = args.getFlag(tail, '--task');
+      _assertTask(taskId);
+      const cp = checkpoint.readCheckpoint(taskId, cwd) || {};
+      const prev = cp.nubosloop || {};
+      const roundArg = args.getFlag(tail, '--round');
+      const round = roundArg != null ? Number(roundArg) : (Number(prev.round) || 1);
+      const findings = nubosloop.skillFindingsFromState(prev, round, taskId);
+      stdout.write(JSON.stringify({ task_id: taskId, round, findings }) + '\n');
+      return 0;
+    }
+    stderr.write(JSON.stringify({ code: 'skill-audit-unknown-verb', message: 'Unknown verb: ' + verb, details: { verb, verbs: ['expect', 'ack', 'findings'] } }) + '\n');
+    return 1;
+  } catch (err) {
+    args.emitErrorEnvelope(err, stderr, 'skill-audit-internal-error');
+    return 1;
+  }
+}
+module.exports = { run };
+if (require.main === module) {
+  process.exit(run(process.argv.slice(3)));
+}

package/bin/np-tools/skill-audit.test.cjs ADDED Viewed

@@ -0,0 +1,86 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const { run } = require('./skill-audit.cjs');
+const checkpoint = require('../../lib/checkpoint.cjs');
+function _mkRoot() {
+  const r = fs.mkdtempSync(path.join(os.tmpdir(), 'np-skill-cli-'));
+  fs.mkdirSync(path.join(r, '.nubos-pilot', 'checkpoints'), { recursive: true });
+  fs.writeFileSync(
+    path.join(r, '.nubos-pilot', 'STATE.md'),
+    '---\nschema_version: 2\ncurrent_phase: null\ncurrent_plan: null\ncurrent_task: null\n---\n',
+    'utf-8',
+  );
+  return r;
+}
+function _cap(cwd) {
+  const out = { text: '' }; const err = { text: '' };
+  return { cwd, stdout: { write: (s) => { out.text += s; return true; } }, stderr: { write: (s) => { err.text += s; return true; } }, out, err };
+}
+const TID = 'M001-S001-T0001';
+test('SC-1: expect then findings reports the unacked skill', () => {
+  const r = _mkRoot();
+  try {
+    checkpoint.startTask({ id: TID }, r);
+    assert.equal(run(['expect', '--task', TID, '--skills', 'np-api-design,np-encryption'], _cap(r)), 0);
+    const c = _cap(r);
+    assert.equal(run(['findings', '--task', TID], c), 0);
+    const parsed = JSON.parse(c.out.text);
+    assert.equal(parsed.findings.length, 1);
+    assert.deepEqual(parsed.findings[0].raw.missing_skills.sort(), ['np-api-design', 'np-encryption']);
+  } finally { fs.rmSync(r, { recursive: true, force: true }); }
+});
+test('SC-2: ack clears the finding for that skill', () => {
+  const r = _mkRoot();
+  try {
+    checkpoint.startTask({ id: TID }, r);
+    run(['expect', '--task', TID, '--skills', 'np-api-design'], _cap(r));
+    run(['ack', '--task', TID, '--skill', 'np-api-design'], _cap(r));
+    const c = _cap(r);
+    run(['findings', '--task', TID], c);
+    assert.equal(JSON.parse(c.out.text).findings.length, 0);
+  } finally { fs.rmSync(r, { recursive: true, force: true }); }
+});
+test('SC-3: invalid task id → error envelope exit 1', () => {
+  const c = _cap(process.cwd());
+  assert.equal(run(['ack', '--task', 'bogus', '--skill', 'x'], c), 1);
+  assert.match(c.err.text, /skill-audit-invalid-task-id/);
+});
+test('SC-4: ack without --skill → error envelope exit 1', () => {
+  const r = _mkRoot();
+  try {
+    checkpoint.startTask({ id: TID }, r);
+    const c = _cap(r);
+    assert.equal(run(['ack', '--task', TID], c), 1);
+    assert.match(c.err.text, /skill-audit-missing-skill/);
+  } finally { fs.rmSync(r, { recursive: true, force: true }); }
+});
+test('SC-5: unknown verb → exit 1; --help → exit 0', () => {
+  const c1 = _cap(process.cwd());
+  assert.equal(run(['bogus'], c1), 1);
+  assert.match(c1.err.text, /skill-audit-unknown-verb/);
+  const c2 = _cap(process.cwd());
+  assert.equal(run(['--help'], c2), 0);
+  assert.match(c2.out.text, /skill-audit/);
+});
+test('SC-6: expect with empty skills is a no-op (no findings)', () => {
+  const r = _mkRoot();
+  try {
+    checkpoint.startTask({ id: TID }, r);
+    assert.equal(run(['expect', '--task', TID, '--skills', ''], _cap(r)), 0);
+    const c = _cap(r);
+    run(['findings', '--task', TID], c);
+    assert.equal(JSON.parse(c.out.text).findings.length, 0);
+  } finally { fs.rmSync(r, { recursive: true, force: true }); }
+});

package/bin/np-tools/verify-reliability.cjs ADDED Viewed

@@ -0,0 +1,65 @@
+'use strict';
+const { summarize, describe } = require('../../lib/eval-reliability.cjs');
+const { emitErrorEnvelope } = require('./_args.cjs');
+function _usage() {
+  return [
+    'Usage:',
+    '  np-tools.cjs verify-reliability --codes <c1,c2,...>',
+    '',
+    'pass@k reliability: the orchestrator runs a task\'s <verify> command k times',
+    'and passes the collected exit codes (0 = pass). Emits a JSON summary whose',
+    '`aggregate_exit_code` is 0 only when every run passed (pass^k) — feed it to',
+    '`loop-run-round --phase post-executor --verify-exit-code`. A flaky task',
+    'aggregates to red and flows through the normal build-fixer path.',
+  ].join('\n');
+}
+function run(argv, ctx) {
+  const context = ctx || {};
+  const stdout = context.stdout || process.stdout;
+  const stderr = context.stderr || process.stderr;
+  const args = Array.isArray(argv) ? argv.slice() : [];
+  let codesRaw = null;
+  for (let i = 0; i < args.length; i++) {
+    const a = args[i];
+    if (a === '-h' || a === '--help') { stdout.write(_usage() + '\n'); return 0; }
+    else if (a === '--codes') { codesRaw = args[++i] || ''; }
+    else if (a.startsWith('--codes=')) { codesRaw = a.slice('--codes='.length); }
+    else {
+      stderr.write(JSON.stringify({
+        code: 'verify-reliability-unknown-arg',
+        message: 'Unknown argument: ' + a,
+        details: { arg: a },
+      }) + '\n');
+      return 1;
+    }
+  }
+  if (codesRaw == null) {
+    stderr.write(JSON.stringify({
+      code: 'verify-reliability-missing-codes',
+      message: '--codes <c1,c2,...> is required',
+      details: {},
+    }) + '\n');
+    return 1;
+  }
+  try {
+    const codes = String(codesRaw).split(',').map((s) => s.trim()).filter((s) => s !== '').map(Number);
+    const summary = summarize(codes);
+    stdout.write(JSON.stringify(Object.assign({}, summary, { description: describe(summary) })) + '\n');
+    return 0;
+  } catch (err) {
+    emitErrorEnvelope(err, stderr, 'verify-reliability-internal-error');
+    return 1;
+  }
+}
+module.exports = { run };
+if (require.main === module) {
+  process.exit(run(process.argv.slice(2)));
+}

package/bin/np-tools/verify-reliability.test.cjs ADDED Viewed

@@ -0,0 +1,69 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const { run } = require('./verify-reliability.cjs');
+function _capture() {
+  const out = { text: '' };
+  const err = { text: '' };
+  return {
+    stdout: { write: (s) => { out.text += s; return true; } },
+    stderr: { write: (s) => { err.text += s; return true; } },
+    out, err,
+  };
+}
+test('VR-1: all-pass codes → aggregate 0, reliable-pass', () => {
+  const c = _capture();
+  const code = run(['--codes', '0,0,0'], c);
+  assert.strictEqual(code, 0);
+  const r = JSON.parse(c.out.text);
+  assert.strictEqual(r.aggregate_exit_code, 0);
+  assert.strictEqual(r.verdict, 'reliable-pass');
+});
+test('VR-2: flaky codes → aggregate 1, flaky verdict + loud description', () => {
+  const c = _capture();
+  const code = run(['--codes', '0,1,0'], c);
+  assert.strictEqual(code, 0);
+  const r = JSON.parse(c.out.text);
+  assert.strictEqual(r.aggregate_exit_code, 1);
+  assert.strictEqual(r.flaky, true);
+  assert.match(r.description, /FLAKY/);
+});
+test('VR-3: --codes= form supported', () => {
+  const c = _capture();
+  const code = run(['--codes=1,1'], c);
+  assert.strictEqual(code, 0);
+  assert.strictEqual(JSON.parse(c.out.text).verdict, 'reliable-fail');
+});
+test('VR-4: missing --codes → error envelope, exit 1', () => {
+  const c = _capture();
+  const code = run([], c);
+  assert.strictEqual(code, 1);
+  assert.match(c.err.text, /verify-reliability-missing-codes/);
+});
+test('VR-5: unknown arg → error envelope, exit 1', () => {
+  const c = _capture();
+  const code = run(['--bogus'], c);
+  assert.strictEqual(code, 1);
+  assert.match(c.err.text, /verify-reliability-unknown-arg/);
+});
+test('VR-6: empty codes → internal error envelope, exit 1', () => {
+  const c = _capture();
+  const code = run(['--codes', ''], c);
+  assert.strictEqual(code, 1);
+  assert.match(c.err.text, /eval-reliability-no-runs/);
+});
+test('VR-7: --help → usage exit 0', () => {
+  const c = _capture();
+  const code = run(['--help'], c);
+  assert.strictEqual(code, 0);
+  assert.match(c.out.text, /verify-reliability/);
+});

package/lib/agents.test.cjs CHANGED Viewed

@@ -247,6 +247,7 @@ const NP_AGENTS = [
   { file: 'np-nyquist-auditor', expected_tier: 'haiku' },
   { file: 'np-sc-extractor', expected_tier: 'haiku' },
   { file: 'np-critic', expected_tier: 'sonnet' },
+  { file: 'np-learnings-extractor', expected_tier: 'haiku' },
 ];
 // Audit-surface modules — files in agents/ that carry agent-shaped frontmatter

package/lib/checkpoint.cjs CHANGED Viewed

@@ -10,8 +10,10 @@ const {
 } = require('./core.cjs');
 const { parseState, serializeState } = require('./state.cjs');
 const { TASK_ID_RE } = require('./ids.cjs');
+const { assertValid } = require('./validate.cjs');
 const CHECKPOINT_SCHEMA_VERSION = 1;
+const STORE_SCHEMA = 'checkpoint.v1';
 function _assertSafeTaskId(taskId) {
   if (typeof taskId !== 'string' || !TASK_ID_RE.test(taskId)) {
@@ -78,6 +80,7 @@ function _assertCompatibleSchema(existing, cpPath) {
       },
     );
   }
+  assertValid(existing, STORE_SCHEMA, 'checkpoint-corrupt', { path: cpPath });
 }
 function _sliceFromTaskId(taskId) {

package/lib/codebase-graph.cjs ADDED Viewed

Binary file