nubos-pilot 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/CHANGELOG.md +43 -1
  2. package/agents/np-architect.md +2 -0
  3. package/agents/np-executor.md +1 -1
  4. package/agents/np-learnings-extractor.md +54 -0
  5. package/agents/np-planner.md +1 -1
  6. package/agents/np-security-reviewer.md +9 -0
  7. package/bin/np-tools/_commands.cjs +5 -0
  8. package/bin/np-tools/derive-tier.cjs +86 -0
  9. package/bin/np-tools/derive-tier.test.cjs +83 -0
  10. package/bin/np-tools/doctor.cjs +15 -2
  11. package/bin/np-tools/graph-impact.cjs +111 -0
  12. package/bin/np-tools/graph-impact.test.cjs +119 -0
  13. package/bin/np-tools/learnings.cjs +105 -0
  14. package/bin/np-tools/learnings.test.cjs +66 -0
  15. package/bin/np-tools/loop-run-round.cjs +7 -1
  16. package/bin/np-tools/scan-codebase.cjs +21 -1
  17. package/bin/np-tools/skill-audit.cjs +79 -0
  18. package/bin/np-tools/skill-audit.test.cjs +86 -0
  19. package/bin/np-tools/verify-reliability.cjs +65 -0
  20. package/bin/np-tools/verify-reliability.test.cjs +69 -0
  21. package/lib/agents.test.cjs +1 -0
  22. package/lib/checkpoint.cjs +3 -0
  23. package/lib/codebase-graph.cjs +0 -0
  24. package/lib/codebase-graph.test.cjs +174 -0
  25. package/lib/codebase-manifest.cjs +3 -0
  26. package/lib/config-defaults.cjs +13 -0
  27. package/lib/config-schema.cjs +11 -0
  28. package/lib/eval-reliability.cjs +63 -0
  29. package/lib/eval-reliability.test.cjs +56 -0
  30. package/lib/install/claude-hooks-learnings.test.cjs +82 -0
  31. package/lib/install/claude-hooks.cjs +65 -4
  32. package/lib/install/claude-hooks.test.cjs +5 -2
  33. package/lib/learnings/capture-ledger.cjs +80 -0
  34. package/lib/learnings/capture-ledger.test.cjs +54 -0
  35. package/lib/learnings/extract.cjs +191 -0
  36. package/lib/learnings/extract.test.cjs +115 -0
  37. package/lib/learnings.cjs +19 -95
  38. package/lib/memory.cjs +38 -33
  39. package/lib/messaging.cjs +12 -6
  40. package/lib/metrics-aggregate.cjs +14 -2
  41. package/lib/migrate.cjs +29 -0
  42. package/lib/migrate.test.cjs +91 -0
  43. package/lib/nubosloop-audit.cjs +104 -0
  44. package/lib/nubosloop-skill-audit.test.cjs +98 -0
  45. package/lib/nubosloop.cjs +9 -0
  46. package/lib/schemas/data/checkpoint.v1.json +13 -0
  47. package/lib/schemas/data/codebase-manifest.v1.json +22 -0
  48. package/lib/schemas/data/learnings.v1.json +28 -0
  49. package/lib/schemas/data/memory-manifest.v1.json +14 -0
  50. package/lib/schemas/data/memory-record.v1.json +16 -0
  51. package/lib/schemas/data/message.v1.json +19 -0
  52. package/lib/schemas/data/metrics-record.v1.json +11 -0
  53. package/lib/tier-classify.cjs +67 -0
  54. package/lib/tier-classify.test.cjs +67 -0
  55. package/lib/validate.cjs +301 -0
  56. package/lib/validate.test.cjs +242 -0
  57. package/np-tools.cjs +5 -0
  58. package/package.json +3 -1
  59. package/skills/np-access-control/SKILL.md +42 -0
  60. package/skills/np-accessibility-audit/SKILL.md +41 -0
  61. package/skills/np-adr/SKILL.md +37 -0
  62. package/skills/np-api-design/SKILL.md +34 -0
  63. package/skills/np-caching-strategy/SKILL.md +38 -0
  64. package/skills/np-data-modeling/SKILL.md +37 -0
  65. package/skills/np-data-privacy/SKILL.md +39 -0
  66. package/skills/np-dependency-audit/SKILL.md +47 -0
  67. package/skills/np-encryption/SKILL.md +47 -0
  68. package/skills/np-error-handling/SKILL.md +37 -0
  69. package/skills/np-incident-response/SKILL.md +38 -0
  70. package/skills/np-llm-app-architecture/SKILL.md +50 -0
  71. package/skills/np-observability/SKILL.md +39 -0
  72. package/skills/np-performance/SKILL.md +38 -0
  73. package/skills/np-queue-design/SKILL.md +32 -0
  74. package/skills/np-rag-design/SKILL.md +43 -0
  75. package/skills/np-refactoring/SKILL.md +35 -0
  76. package/skills/np-resilience-patterns/SKILL.md +39 -0
  77. package/skills/np-secure-code-review/SKILL.md +46 -0
  78. package/skills/np-secure-design/SKILL.md +44 -0
  79. package/skills/np-service-boundary/SKILL.md +35 -0
  80. package/skills/np-system-design/SKILL.md +40 -0
  81. package/skills/np-test-strategy/SKILL.md +46 -0
  82. package/skills/np-threat-model/SKILL.md +42 -0
  83. package/templates/claude/payload/hooks/np-learnings-hook.cjs +55 -0
  84. package/workflows/architect-phase.md +21 -1
  85. package/workflows/execute-phase.md +66 -4
  86. package/workflows/verify-work.md +17 -4
@@ -0,0 +1,105 @@
1
+ 'use strict';
2
+
3
+ const path = require('node:path');
4
+ const child_process = require('node:child_process');
5
+
6
+ const { tryReadConfigPath } = require('../../lib/config.cjs');
7
+ const ledger = require('../../lib/learnings/capture-ledger.cjs');
8
+ const extract = require('../../lib/learnings/extract.cjs');
9
+ const args = require('./_args.cjs');
10
+
11
+ function _readStdin() {
12
+ return new Promise((resolve) => {
13
+ if (process.stdin.isTTY) return resolve('');
14
+ let buf = '';
15
+ process.stdin.setEncoding('utf-8');
16
+ const timer = setTimeout(() => { try { process.stdin.removeAllListeners(); } catch {} resolve(buf); }, 800);
17
+ process.stdin.on('data', (c) => { buf += c; });
18
+ process.stdin.on('end', () => { clearTimeout(timer); resolve(buf); });
19
+ process.stdin.on('error', () => { clearTimeout(timer); resolve(buf); });
20
+ });
21
+ }
22
+
23
+ function _safeParse(s) { try { return s ? JSON.parse(s) : {}; } catch { return {}; } }
24
+
25
+ async function _payload(argv) {
26
+ const inline = args.getFlag(argv, '--payload', { allowDashValues: true });
27
+ if (inline !== undefined) return _safeParse(inline);
28
+ if (argv.includes('--stdin')) return _safeParse(await _readStdin());
29
+ return {};
30
+ }
31
+
32
+ function _cfg(cwd) {
33
+ return {
34
+ auto_capture: tryReadConfigPath(cwd, 'learnings.auto_capture', true) !== false,
35
+ max_per_hour: Number(tryReadConfigPath(cwd, 'learnings.max_captures_per_hour', 10)) || 10,
36
+ max_in_a_row: Number(tryReadConfigPath(cwd, 'learnings.max_in_a_row', 3)) || 3,
37
+ timeout_ms: Number(tryReadConfigPath(cwd, 'learnings.timeout_ms', 120000)) || 120000,
38
+ max_files: Number(tryReadConfigPath(cwd, 'learnings.max_files', 30)) || 30,
39
+ };
40
+ }
41
+
42
+ function _spawnWorker(cwd, sid) {
43
+ const npTools = path.join(__dirname, '..', '..', 'np-tools.cjs');
44
+ try {
45
+ const child = child_process.spawn(
46
+ process.execPath,
47
+ [npTools, 'learnings', 'run-extract', '--session', sid],
48
+ { cwd, detached: true, stdio: 'ignore' },
49
+ );
50
+ child.unref();
51
+ return true;
52
+ } catch { return false; }
53
+ }
54
+
55
+ function _emit(stdout, obj) { stdout.write(JSON.stringify(obj) + '\n'); }
56
+
57
+ async function run(argv, ctx) {
58
+ const context = ctx || {};
59
+ const cwd = context.cwd || process.cwd();
60
+ const stdout = context.stdout || process.stdout;
61
+ const list = Array.isArray(argv) ? argv : [];
62
+ const verb = list[0];
63
+ const cfg = _cfg(cwd);
64
+
65
+ // 'reset' (UserPromptSubmit) and 'run-extract' (background worker) are not
66
+ // gated by auto_capture so they keep working coherently, but 'capture' is.
67
+ if (verb === 'capture') {
68
+ if (!cfg.auto_capture) { _emit(stdout, { captured: false, reason: 'disabled' }); return 0; }
69
+ const payload = await _payload(list);
70
+ const sid = payload.session_id || args.getFlag(list, '--session') || '';
71
+ if (!sid) { _emit(stdout, { captured: false, reason: 'no-session' }); return 0; }
72
+ const gate = ledger.tryRecordCapture(sid, { maxPerHour: cfg.max_per_hour, maxStreak: cfg.max_in_a_row });
73
+ if (!gate.allowed) { _emit(stdout, { captured: false, reason: gate.reason }); return 0; }
74
+ _spawnWorker(cwd, sid);
75
+ _emit(stdout, { captured: true, spawned: true });
76
+ return 0;
77
+ }
78
+
79
+ if (verb === 'reset') {
80
+ const payload = await _payload(list);
81
+ const sid = payload.session_id || args.getFlag(list, '--session') || '';
82
+ if (sid) ledger.resetStreak(sid);
83
+ return 0;
84
+ }
85
+
86
+ if (verb === 'run-extract') {
87
+ const sid = args.getFlag(list, '--session') || '';
88
+ try {
89
+ const result = extract.runExtract({ cwd, sid, config: cfg });
90
+ _emit(stdout, result);
91
+ } catch (err) {
92
+ _emit(stdout, { ran: false, reason: 'error', error: String(err && err.code || err) });
93
+ }
94
+ return 0;
95
+ }
96
+
97
+ _emit(stdout, { error: 'unknown-verb', verb: verb || null, verbs: ['capture', 'reset', 'run-extract'] });
98
+ return verb ? 1 : 0;
99
+ }
100
+
101
+ module.exports = { run };
102
+
103
+ if (require.main === module) {
104
+ run(process.argv.slice(3)).then((c) => process.exit(c)).catch(() => process.exit(0));
105
+ }
@@ -0,0 +1,66 @@
1
+ 'use strict';
2
+
3
+ const { test } = require('node:test');
4
+ const assert = require('node:assert');
5
+ const fs = require('node:fs');
6
+ const os = require('node:os');
7
+ const path = require('node:path');
8
+ const { run } = require('./learnings.cjs');
9
+
10
+ function _capture() {
11
+ const out = { text: '' };
12
+ return { stdout: { write: (s) => { out.text += s; return true; } }, out };
13
+ }
14
+
15
+ test('LV-1: capture with no session → no-session, no spawn', async () => {
16
+ const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-lv-'));
17
+ try {
18
+ const c = _capture();
19
+ c.cwd = dir;
20
+ const code = await run(['capture'], c);
21
+ assert.strictEqual(code, 0);
22
+ assert.match(c.out.text, /no-session/);
23
+ } finally { fs.rmSync(dir, { recursive: true, force: true }); }
24
+ });
25
+
26
+ test('LV-2: capture disabled via config → disabled, no spawn', async () => {
27
+ const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-lv-'));
28
+ try {
29
+ fs.mkdirSync(path.join(dir, '.nubos-pilot'), { recursive: true });
30
+ fs.writeFileSync(
31
+ path.join(dir, '.nubos-pilot', 'config.json'),
32
+ JSON.stringify({ learnings: { auto_capture: false } }),
33
+ );
34
+ const c = _capture();
35
+ c.cwd = dir;
36
+ const code = await run(['capture', '--payload', JSON.stringify({ session_id: 'abc' })], c);
37
+ assert.strictEqual(code, 0);
38
+ assert.match(c.out.text, /disabled/);
39
+ } finally { fs.rmSync(dir, { recursive: true, force: true }); }
40
+ });
41
+
42
+ test('LV-3: run-extract on a non-repo cwd → ran:false not-a-repo', async () => {
43
+ const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-lv-'));
44
+ try {
45
+ const c = _capture();
46
+ c.cwd = dir;
47
+ const code = await run(['run-extract', '--session', 'abc'], c);
48
+ assert.strictEqual(code, 0);
49
+ const parsed = JSON.parse(c.out.text);
50
+ assert.strictEqual(parsed.ran, false);
51
+ assert.strictEqual(parsed.reason, 'not-a-repo');
52
+ } finally { fs.rmSync(dir, { recursive: true, force: true }); }
53
+ });
54
+
55
+ test('LV-4: unknown verb → error envelope, exit 1', async () => {
56
+ const c = _capture();
57
+ const code = await run(['bogus'], c);
58
+ assert.strictEqual(code, 1);
59
+ assert.match(c.out.text, /unknown-verb/);
60
+ });
61
+
62
+ test('LV-5: reset is a no-op without a session and never throws', async () => {
63
+ const c = _capture();
64
+ const code = await run(['reset'], c);
65
+ assert.strictEqual(code, 0);
66
+ });
@@ -324,21 +324,27 @@ function _runPostCritics(taskId, list, cwd) {
324
324
  ? nubosloop.coerceMaxRounds(override)
325
325
  : opts.maxRounds;
326
326
  const auditFindings = nubosloop.auditFindingsFromAudits(prev.tool_use_audit, round, taskId);
327
+ const skillFindings = nubosloop.skillFindingsFromState(prev, round, taskId);
328
+ const combinedAudit = skillFindings.length ? auditFindings.concat(skillFindings) : auditFindings;
327
329
  evalResult = nubosloop.evaluateLoop(
328
330
  { round },
329
331
  criticOutputs,
330
- { maxRounds: effectiveMax, auditFindings },
332
+ { maxRounds: effectiveMax, auditFindings: combinedAudit },
331
333
  );
332
334
  const perRound = (prev.findings_per_round && typeof prev.findings_per_round === 'object')
333
335
  ? safeAssign({}, prev.findings_per_round)
334
336
  : {};
335
337
  perRound[String(round)] = evalResult.findings;
336
338
  const routed = nubosloop.markAuditsRoutedInArray(prev.tool_use_audit, round);
339
+ const skillRoutedRounds = skillFindings.length
340
+ ? nubosloop.markSkillFindingsRoutedInArray(prev.skill_routed_rounds, round)
341
+ : (Array.isArray(prev.skill_routed_rounds) ? prev.skill_routed_rounds : []);
337
342
  const partial = {
338
343
  last_phase: 'post-critics',
339
344
  last_action: evalResult.next_action,
340
345
  findings: evalResult.findings,
341
346
  findings_per_round: perRound,
347
+ skill_routed_rounds: skillRoutedRounds,
342
348
  tool_use_audit: routed.audits,
343
349
  };
344
350
  if (force) partial.forced_post_critics = true;
@@ -18,6 +18,7 @@ const {
18
18
  moduleDocPath,
19
19
  indexDocPath,
20
20
  } = require('../../lib/codebase-docs.cjs');
21
+ const { buildModuleGraph } = require('../../lib/codebase-graph.cjs');
21
22
 
22
23
  function _parseArgs(args) {
23
24
  const flags = {
@@ -73,6 +74,16 @@ function _emitPlan(projectRoot, flags, stdout) {
73
74
  projectRoot,
74
75
  path.join(projectRoot, '.nubos-pilot', 'codebase', '.hashes.json'),
75
76
  ),
77
+ graph_path: path.relative(
78
+ projectRoot,
79
+ path.join(projectRoot, '.nubos-pilot', 'codebase', '.graph.json'),
80
+ ),
81
+ graph: {
82
+ module_count: modulesResult.graph.module_count,
83
+ edge_count: modulesResult.graph.edge_count,
84
+ cycle_count: modulesResult.graph.cycles.length,
85
+ unresolved_internal_deps: modulesResult.graph.metrics.unresolved_internal_deps,
86
+ },
76
87
  }, null, 2));
77
88
  }
78
89
 
@@ -103,6 +114,15 @@ function _scanAndBuild(projectRoot, flags) {
103
114
  fs.mkdirSync(path.dirname(indexMapPath), { recursive: true });
104
115
  atomicWriteFileSync(indexMapPath, JSON.stringify(docIndex, null, 2) + '\n');
105
116
 
117
+ const graph = buildModuleGraph(modules.map((m) => m.facts));
118
+ const graphPath = path.join(
119
+ projectRoot,
120
+ '.nubos-pilot',
121
+ 'codebase',
122
+ '.graph.json',
123
+ );
124
+ atomicWriteFileSync(graphPath, JSON.stringify(graph, null, 2) + '\n');
125
+
106
126
  const indexPath = indexDocPath(projectRoot);
107
127
  fs.mkdirSync(path.dirname(indexPath), { recursive: true });
108
128
  atomicWriteFileSync(indexPath, buildIndexDoc(modules, { project_name: flags.projectName || null }));
@@ -115,7 +135,7 @@ function _scanAndBuild(projectRoot, flags) {
115
135
  atomicWriteFileSync(docPath, renderModuleDoc(mod.facts, null, hashLookup));
116
136
  }
117
137
 
118
- return { scan: scanResult, modules, manifest, hashLookup };
138
+ return { scan: scanResult, modules, manifest, hashLookup, graph };
119
139
  }
120
140
 
121
141
  function _applyProse(projectRoot, flags, stdout) {
@@ -0,0 +1,79 @@
1
+ 'use strict';
2
+
3
+ const nubosloop = require('../../lib/nubosloop.cjs');
4
+ const checkpoint = require('../../lib/checkpoint.cjs');
5
+ const { TASK_ID_RE } = require('../../lib/ids.cjs');
6
+ const args = require('./_args.cjs');
7
+ const { NubosPilotError } = require('../../lib/core.cjs');
8
+
9
+ function _usage() {
10
+ return [
11
+ 'Usage:',
12
+ ' np-tools.cjs skill-audit expect --task <id> --skills <a,b,c> (orchestrator: record injected skills)',
13
+ ' np-tools.cjs skill-audit ack --task <id> --skill <name> (executor: stamp a consulted skill)',
14
+ ' np-tools.cjs skill-audit findings --task <id> [--round <n>] (read-only: list unmet skill bars)',
15
+ '',
16
+ 'Mechanical counterpart to the Rule-9 search audit: a skill injected as a task\'s',
17
+ 'quality bar that the executor never consulted becomes a `skill-bar-unconsulted`',
18
+ 'finding at post-critics, routing the task back to the executor (once per round).',
19
+ ].join('\n');
20
+ }
21
+
22
+ function _assertTask(taskId) {
23
+ args.assertMatch(taskId, TASK_ID_RE, 'skill-audit-invalid-task-id', 'taskId');
24
+ }
25
+
26
+ function run(argv, ctx) {
27
+ const context = ctx || {};
28
+ const cwd = context.cwd || process.cwd();
29
+ const stdout = context.stdout || process.stdout;
30
+ const stderr = context.stderr || process.stderr;
31
+ const list = Array.isArray(argv) ? argv : [];
32
+ const verb = list[0];
33
+ const tail = list.slice(1);
34
+
35
+ if (!verb || verb === '-h' || verb === '--help') { stdout.write(_usage() + '\n'); return 0; }
36
+
37
+ try {
38
+ if (verb === 'expect') {
39
+ const taskId = args.getFlag(tail, '--task');
40
+ _assertTask(taskId);
41
+ const raw = args.getFlag(tail, '--skills') || '';
42
+ const skills = String(raw).split(',').map((s) => s.trim()).filter(Boolean);
43
+ const res = nubosloop.recordExpectedSkills(taskId, skills, cwd);
44
+ stdout.write(JSON.stringify(res) + '\n');
45
+ return 0;
46
+ }
47
+ if (verb === 'ack') {
48
+ const taskId = args.getFlag(tail, '--task');
49
+ _assertTask(taskId);
50
+ const skill = args.getFlag(tail, '--skill');
51
+ if (!skill) throw new NubosPilotError('skill-audit-missing-skill', 'ack requires --skill <name>', {});
52
+ const res = nubosloop.recordSkillEvidence(taskId, skill, cwd);
53
+ stdout.write(JSON.stringify(res) + '\n');
54
+ return 0;
55
+ }
56
+ if (verb === 'findings') {
57
+ const taskId = args.getFlag(tail, '--task');
58
+ _assertTask(taskId);
59
+ const cp = checkpoint.readCheckpoint(taskId, cwd) || {};
60
+ const prev = cp.nubosloop || {};
61
+ const roundArg = args.getFlag(tail, '--round');
62
+ const round = roundArg != null ? Number(roundArg) : (Number(prev.round) || 1);
63
+ const findings = nubosloop.skillFindingsFromState(prev, round, taskId);
64
+ stdout.write(JSON.stringify({ task_id: taskId, round, findings }) + '\n');
65
+ return 0;
66
+ }
67
+ stderr.write(JSON.stringify({ code: 'skill-audit-unknown-verb', message: 'Unknown verb: ' + verb, details: { verb, verbs: ['expect', 'ack', 'findings'] } }) + '\n');
68
+ return 1;
69
+ } catch (err) {
70
+ args.emitErrorEnvelope(err, stderr, 'skill-audit-internal-error');
71
+ return 1;
72
+ }
73
+ }
74
+
75
+ module.exports = { run };
76
+
77
+ if (require.main === module) {
78
+ process.exit(run(process.argv.slice(3)));
79
+ }
@@ -0,0 +1,86 @@
1
+ 'use strict';
2
+
3
+ const { test } = require('node:test');
4
+ const assert = require('node:assert');
5
+ const fs = require('node:fs');
6
+ const os = require('node:os');
7
+ const path = require('node:path');
8
+ const { run } = require('./skill-audit.cjs');
9
+ const checkpoint = require('../../lib/checkpoint.cjs');
10
+
11
+ function _mkRoot() {
12
+ const r = fs.mkdtempSync(path.join(os.tmpdir(), 'np-skill-cli-'));
13
+ fs.mkdirSync(path.join(r, '.nubos-pilot', 'checkpoints'), { recursive: true });
14
+ fs.writeFileSync(
15
+ path.join(r, '.nubos-pilot', 'STATE.md'),
16
+ '---\nschema_version: 2\ncurrent_phase: null\ncurrent_plan: null\ncurrent_task: null\n---\n',
17
+ 'utf-8',
18
+ );
19
+ return r;
20
+ }
21
+ function _cap(cwd) {
22
+ const out = { text: '' }; const err = { text: '' };
23
+ return { cwd, stdout: { write: (s) => { out.text += s; return true; } }, stderr: { write: (s) => { err.text += s; return true; } }, out, err };
24
+ }
25
+ const TID = 'M001-S001-T0001';
26
+
27
+ test('SC-1: expect then findings reports the unacked skill', () => {
28
+ const r = _mkRoot();
29
+ try {
30
+ checkpoint.startTask({ id: TID }, r);
31
+ assert.equal(run(['expect', '--task', TID, '--skills', 'np-api-design,np-encryption'], _cap(r)), 0);
32
+ const c = _cap(r);
33
+ assert.equal(run(['findings', '--task', TID], c), 0);
34
+ const parsed = JSON.parse(c.out.text);
35
+ assert.equal(parsed.findings.length, 1);
36
+ assert.deepEqual(parsed.findings[0].raw.missing_skills.sort(), ['np-api-design', 'np-encryption']);
37
+ } finally { fs.rmSync(r, { recursive: true, force: true }); }
38
+ });
39
+
40
+ test('SC-2: ack clears the finding for that skill', () => {
41
+ const r = _mkRoot();
42
+ try {
43
+ checkpoint.startTask({ id: TID }, r);
44
+ run(['expect', '--task', TID, '--skills', 'np-api-design'], _cap(r));
45
+ run(['ack', '--task', TID, '--skill', 'np-api-design'], _cap(r));
46
+ const c = _cap(r);
47
+ run(['findings', '--task', TID], c);
48
+ assert.equal(JSON.parse(c.out.text).findings.length, 0);
49
+ } finally { fs.rmSync(r, { recursive: true, force: true }); }
50
+ });
51
+
52
+ test('SC-3: invalid task id → error envelope exit 1', () => {
53
+ const c = _cap(process.cwd());
54
+ assert.equal(run(['ack', '--task', 'bogus', '--skill', 'x'], c), 1);
55
+ assert.match(c.err.text, /skill-audit-invalid-task-id/);
56
+ });
57
+
58
+ test('SC-4: ack without --skill → error envelope exit 1', () => {
59
+ const r = _mkRoot();
60
+ try {
61
+ checkpoint.startTask({ id: TID }, r);
62
+ const c = _cap(r);
63
+ assert.equal(run(['ack', '--task', TID], c), 1);
64
+ assert.match(c.err.text, /skill-audit-missing-skill/);
65
+ } finally { fs.rmSync(r, { recursive: true, force: true }); }
66
+ });
67
+
68
+ test('SC-5: unknown verb → exit 1; --help → exit 0', () => {
69
+ const c1 = _cap(process.cwd());
70
+ assert.equal(run(['bogus'], c1), 1);
71
+ assert.match(c1.err.text, /skill-audit-unknown-verb/);
72
+ const c2 = _cap(process.cwd());
73
+ assert.equal(run(['--help'], c2), 0);
74
+ assert.match(c2.out.text, /skill-audit/);
75
+ });
76
+
77
+ test('SC-6: expect with empty skills is a no-op (no findings)', () => {
78
+ const r = _mkRoot();
79
+ try {
80
+ checkpoint.startTask({ id: TID }, r);
81
+ assert.equal(run(['expect', '--task', TID, '--skills', ''], _cap(r)), 0);
82
+ const c = _cap(r);
83
+ run(['findings', '--task', TID], c);
84
+ assert.equal(JSON.parse(c.out.text).findings.length, 0);
85
+ } finally { fs.rmSync(r, { recursive: true, force: true }); }
86
+ });
@@ -0,0 +1,65 @@
1
+ 'use strict';
2
+
3
+ const { summarize, describe } = require('../../lib/eval-reliability.cjs');
4
+ const { emitErrorEnvelope } = require('./_args.cjs');
5
+
6
+ function _usage() {
7
+ return [
8
+ 'Usage:',
9
+ ' np-tools.cjs verify-reliability --codes <c1,c2,...>',
10
+ '',
11
+ 'pass@k reliability: the orchestrator runs a task\'s <verify> command k times',
12
+ 'and passes the collected exit codes (0 = pass). Emits a JSON summary whose',
13
+ '`aggregate_exit_code` is 0 only when every run passed (pass^k) — feed it to',
14
+ '`loop-run-round --phase post-executor --verify-exit-code`. A flaky task',
15
+ 'aggregates to red and flows through the normal build-fixer path.',
16
+ ].join('\n');
17
+ }
18
+
19
+ function run(argv, ctx) {
20
+ const context = ctx || {};
21
+ const stdout = context.stdout || process.stdout;
22
+ const stderr = context.stderr || process.stderr;
23
+ const args = Array.isArray(argv) ? argv.slice() : [];
24
+
25
+ let codesRaw = null;
26
+ for (let i = 0; i < args.length; i++) {
27
+ const a = args[i];
28
+ if (a === '-h' || a === '--help') { stdout.write(_usage() + '\n'); return 0; }
29
+ else if (a === '--codes') { codesRaw = args[++i] || ''; }
30
+ else if (a.startsWith('--codes=')) { codesRaw = a.slice('--codes='.length); }
31
+ else {
32
+ stderr.write(JSON.stringify({
33
+ code: 'verify-reliability-unknown-arg',
34
+ message: 'Unknown argument: ' + a,
35
+ details: { arg: a },
36
+ }) + '\n');
37
+ return 1;
38
+ }
39
+ }
40
+
41
+ if (codesRaw == null) {
42
+ stderr.write(JSON.stringify({
43
+ code: 'verify-reliability-missing-codes',
44
+ message: '--codes <c1,c2,...> is required',
45
+ details: {},
46
+ }) + '\n');
47
+ return 1;
48
+ }
49
+
50
+ try {
51
+ const codes = String(codesRaw).split(',').map((s) => s.trim()).filter((s) => s !== '').map(Number);
52
+ const summary = summarize(codes);
53
+ stdout.write(JSON.stringify(Object.assign({}, summary, { description: describe(summary) })) + '\n');
54
+ return 0;
55
+ } catch (err) {
56
+ emitErrorEnvelope(err, stderr, 'verify-reliability-internal-error');
57
+ return 1;
58
+ }
59
+ }
60
+
61
+ module.exports = { run };
62
+
63
+ if (require.main === module) {
64
+ process.exit(run(process.argv.slice(2)));
65
+ }
@@ -0,0 +1,69 @@
1
+ 'use strict';
2
+
3
+ const { test } = require('node:test');
4
+ const assert = require('node:assert');
5
+ const { run } = require('./verify-reliability.cjs');
6
+
7
+ function _capture() {
8
+ const out = { text: '' };
9
+ const err = { text: '' };
10
+ return {
11
+ stdout: { write: (s) => { out.text += s; return true; } },
12
+ stderr: { write: (s) => { err.text += s; return true; } },
13
+ out, err,
14
+ };
15
+ }
16
+
17
+ test('VR-1: all-pass codes → aggregate 0, reliable-pass', () => {
18
+ const c = _capture();
19
+ const code = run(['--codes', '0,0,0'], c);
20
+ assert.strictEqual(code, 0);
21
+ const r = JSON.parse(c.out.text);
22
+ assert.strictEqual(r.aggregate_exit_code, 0);
23
+ assert.strictEqual(r.verdict, 'reliable-pass');
24
+ });
25
+
26
+ test('VR-2: flaky codes → aggregate 1, flaky verdict + loud description', () => {
27
+ const c = _capture();
28
+ const code = run(['--codes', '0,1,0'], c);
29
+ assert.strictEqual(code, 0);
30
+ const r = JSON.parse(c.out.text);
31
+ assert.strictEqual(r.aggregate_exit_code, 1);
32
+ assert.strictEqual(r.flaky, true);
33
+ assert.match(r.description, /FLAKY/);
34
+ });
35
+
36
+ test('VR-3: --codes= form supported', () => {
37
+ const c = _capture();
38
+ const code = run(['--codes=1,1'], c);
39
+ assert.strictEqual(code, 0);
40
+ assert.strictEqual(JSON.parse(c.out.text).verdict, 'reliable-fail');
41
+ });
42
+
43
+ test('VR-4: missing --codes → error envelope, exit 1', () => {
44
+ const c = _capture();
45
+ const code = run([], c);
46
+ assert.strictEqual(code, 1);
47
+ assert.match(c.err.text, /verify-reliability-missing-codes/);
48
+ });
49
+
50
+ test('VR-5: unknown arg → error envelope, exit 1', () => {
51
+ const c = _capture();
52
+ const code = run(['--bogus'], c);
53
+ assert.strictEqual(code, 1);
54
+ assert.match(c.err.text, /verify-reliability-unknown-arg/);
55
+ });
56
+
57
+ test('VR-6: empty codes → internal error envelope, exit 1', () => {
58
+ const c = _capture();
59
+ const code = run(['--codes', ''], c);
60
+ assert.strictEqual(code, 1);
61
+ assert.match(c.err.text, /eval-reliability-no-runs/);
62
+ });
63
+
64
+ test('VR-7: --help → usage exit 0', () => {
65
+ const c = _capture();
66
+ const code = run(['--help'], c);
67
+ assert.strictEqual(code, 0);
68
+ assert.match(c.out.text, /verify-reliability/);
69
+ });
@@ -247,6 +247,7 @@ const NP_AGENTS = [
247
247
  { file: 'np-nyquist-auditor', expected_tier: 'haiku' },
248
248
  { file: 'np-sc-extractor', expected_tier: 'haiku' },
249
249
  { file: 'np-critic', expected_tier: 'sonnet' },
250
+ { file: 'np-learnings-extractor', expected_tier: 'haiku' },
250
251
  ];
251
252
 
252
253
  // Audit-surface modules — files in agents/ that carry agent-shaped frontmatter
@@ -10,8 +10,10 @@ const {
10
10
  } = require('./core.cjs');
11
11
  const { parseState, serializeState } = require('./state.cjs');
12
12
  const { TASK_ID_RE } = require('./ids.cjs');
13
+ const { assertValid } = require('./validate.cjs');
13
14
 
14
15
  const CHECKPOINT_SCHEMA_VERSION = 1;
16
+ const STORE_SCHEMA = 'checkpoint.v1';
15
17
 
16
18
  function _assertSafeTaskId(taskId) {
17
19
  if (typeof taskId !== 'string' || !TASK_ID_RE.test(taskId)) {
@@ -78,6 +80,7 @@ function _assertCompatibleSchema(existing, cpPath) {
78
80
  },
79
81
  );
80
82
  }
83
+ assertValid(existing, STORE_SCHEMA, 'checkpoint-corrupt', { path: cpPath });
81
84
  }
82
85
 
83
86
  function _sliceFromTaskId(taskId) {
Binary file