nubos-pilot 1.2.1 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +43 -1
- package/agents/np-architect.md +2 -0
- package/agents/np-executor.md +1 -1
- package/agents/np-learnings-extractor.md +54 -0
- package/agents/np-planner.md +1 -1
- package/agents/np-security-reviewer.md +9 -0
- package/bin/np-tools/_commands.cjs +5 -0
- package/bin/np-tools/derive-tier.cjs +86 -0
- package/bin/np-tools/derive-tier.test.cjs +83 -0
- package/bin/np-tools/doctor.cjs +15 -2
- package/bin/np-tools/graph-impact.cjs +111 -0
- package/bin/np-tools/graph-impact.test.cjs +119 -0
- package/bin/np-tools/learnings.cjs +105 -0
- package/bin/np-tools/learnings.test.cjs +66 -0
- package/bin/np-tools/loop-run-round.cjs +7 -1
- package/bin/np-tools/scan-codebase.cjs +21 -1
- package/bin/np-tools/skill-audit.cjs +79 -0
- package/bin/np-tools/skill-audit.test.cjs +86 -0
- package/bin/np-tools/verify-reliability.cjs +65 -0
- package/bin/np-tools/verify-reliability.test.cjs +69 -0
- package/lib/agents.test.cjs +1 -0
- package/lib/checkpoint.cjs +3 -0
- package/lib/codebase-graph.cjs +0 -0
- package/lib/codebase-graph.test.cjs +174 -0
- package/lib/codebase-manifest.cjs +3 -0
- package/lib/config-defaults.cjs +13 -0
- package/lib/config-schema.cjs +11 -0
- package/lib/eval-reliability.cjs +63 -0
- package/lib/eval-reliability.test.cjs +56 -0
- package/lib/install/claude-hooks-learnings.test.cjs +82 -0
- package/lib/install/claude-hooks.cjs +65 -4
- package/lib/install/claude-hooks.test.cjs +5 -2
- package/lib/learnings/capture-ledger.cjs +80 -0
- package/lib/learnings/capture-ledger.test.cjs +54 -0
- package/lib/learnings/extract.cjs +191 -0
- package/lib/learnings/extract.test.cjs +115 -0
- package/lib/learnings.cjs +19 -95
- package/lib/memory.cjs +38 -33
- package/lib/messaging.cjs +12 -6
- package/lib/metrics-aggregate.cjs +14 -2
- package/lib/migrate.cjs +29 -0
- package/lib/migrate.test.cjs +91 -0
- package/lib/nubosloop-audit.cjs +104 -0
- package/lib/nubosloop-skill-audit.test.cjs +98 -0
- package/lib/nubosloop.cjs +9 -0
- package/lib/schemas/data/checkpoint.v1.json +13 -0
- package/lib/schemas/data/codebase-manifest.v1.json +22 -0
- package/lib/schemas/data/learnings.v1.json +28 -0
- package/lib/schemas/data/memory-manifest.v1.json +14 -0
- package/lib/schemas/data/memory-record.v1.json +16 -0
- package/lib/schemas/data/message.v1.json +19 -0
- package/lib/schemas/data/metrics-record.v1.json +11 -0
- package/lib/tier-classify.cjs +67 -0
- package/lib/tier-classify.test.cjs +67 -0
- package/lib/validate.cjs +301 -0
- package/lib/validate.test.cjs +242 -0
- package/np-tools.cjs +5 -0
- package/package.json +3 -1
- package/skills/np-access-control/SKILL.md +42 -0
- package/skills/np-accessibility-audit/SKILL.md +41 -0
- package/skills/np-adr/SKILL.md +37 -0
- package/skills/np-api-design/SKILL.md +34 -0
- package/skills/np-caching-strategy/SKILL.md +38 -0
- package/skills/np-data-modeling/SKILL.md +37 -0
- package/skills/np-data-privacy/SKILL.md +39 -0
- package/skills/np-dependency-audit/SKILL.md +47 -0
- package/skills/np-encryption/SKILL.md +47 -0
- package/skills/np-error-handling/SKILL.md +37 -0
- package/skills/np-incident-response/SKILL.md +38 -0
- package/skills/np-llm-app-architecture/SKILL.md +50 -0
- package/skills/np-observability/SKILL.md +39 -0
- package/skills/np-performance/SKILL.md +38 -0
- package/skills/np-queue-design/SKILL.md +32 -0
- package/skills/np-rag-design/SKILL.md +43 -0
- package/skills/np-refactoring/SKILL.md +35 -0
- package/skills/np-resilience-patterns/SKILL.md +39 -0
- package/skills/np-secure-code-review/SKILL.md +46 -0
- package/skills/np-secure-design/SKILL.md +44 -0
- package/skills/np-service-boundary/SKILL.md +35 -0
- package/skills/np-system-design/SKILL.md +40 -0
- package/skills/np-test-strategy/SKILL.md +46 -0
- package/skills/np-threat-model/SKILL.md +42 -0
- package/templates/claude/payload/hooks/np-learnings-hook.cjs +55 -0
- package/workflows/architect-phase.md +21 -1
- package/workflows/execute-phase.md +66 -4
- package/workflows/verify-work.md +17 -4
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const path = require('node:path');
|
|
4
|
+
const child_process = require('node:child_process');
|
|
5
|
+
|
|
6
|
+
const { tryReadConfigPath } = require('../../lib/config.cjs');
|
|
7
|
+
const ledger = require('../../lib/learnings/capture-ledger.cjs');
|
|
8
|
+
const extract = require('../../lib/learnings/extract.cjs');
|
|
9
|
+
const args = require('./_args.cjs');
|
|
10
|
+
|
|
11
|
+
function _readStdin() {
|
|
12
|
+
return new Promise((resolve) => {
|
|
13
|
+
if (process.stdin.isTTY) return resolve('');
|
|
14
|
+
let buf = '';
|
|
15
|
+
process.stdin.setEncoding('utf-8');
|
|
16
|
+
const timer = setTimeout(() => { try { process.stdin.removeAllListeners(); } catch {} resolve(buf); }, 800);
|
|
17
|
+
process.stdin.on('data', (c) => { buf += c; });
|
|
18
|
+
process.stdin.on('end', () => { clearTimeout(timer); resolve(buf); });
|
|
19
|
+
process.stdin.on('error', () => { clearTimeout(timer); resolve(buf); });
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function _safeParse(s) { try { return s ? JSON.parse(s) : {}; } catch { return {}; } }
|
|
24
|
+
|
|
25
|
+
async function _payload(argv) {
|
|
26
|
+
const inline = args.getFlag(argv, '--payload', { allowDashValues: true });
|
|
27
|
+
if (inline !== undefined) return _safeParse(inline);
|
|
28
|
+
if (argv.includes('--stdin')) return _safeParse(await _readStdin());
|
|
29
|
+
return {};
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function _cfg(cwd) {
|
|
33
|
+
return {
|
|
34
|
+
auto_capture: tryReadConfigPath(cwd, 'learnings.auto_capture', true) !== false,
|
|
35
|
+
max_per_hour: Number(tryReadConfigPath(cwd, 'learnings.max_captures_per_hour', 10)) || 10,
|
|
36
|
+
max_in_a_row: Number(tryReadConfigPath(cwd, 'learnings.max_in_a_row', 3)) || 3,
|
|
37
|
+
timeout_ms: Number(tryReadConfigPath(cwd, 'learnings.timeout_ms', 120000)) || 120000,
|
|
38
|
+
max_files: Number(tryReadConfigPath(cwd, 'learnings.max_files', 30)) || 30,
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function _spawnWorker(cwd, sid) {
|
|
43
|
+
const npTools = path.join(__dirname, '..', '..', 'np-tools.cjs');
|
|
44
|
+
try {
|
|
45
|
+
const child = child_process.spawn(
|
|
46
|
+
process.execPath,
|
|
47
|
+
[npTools, 'learnings', 'run-extract', '--session', sid],
|
|
48
|
+
{ cwd, detached: true, stdio: 'ignore' },
|
|
49
|
+
);
|
|
50
|
+
child.unref();
|
|
51
|
+
return true;
|
|
52
|
+
} catch { return false; }
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function _emit(stdout, obj) { stdout.write(JSON.stringify(obj) + '\n'); }
|
|
56
|
+
|
|
57
|
+
async function run(argv, ctx) {
|
|
58
|
+
const context = ctx || {};
|
|
59
|
+
const cwd = context.cwd || process.cwd();
|
|
60
|
+
const stdout = context.stdout || process.stdout;
|
|
61
|
+
const list = Array.isArray(argv) ? argv : [];
|
|
62
|
+
const verb = list[0];
|
|
63
|
+
const cfg = _cfg(cwd);
|
|
64
|
+
|
|
65
|
+
// 'reset' (UserPromptSubmit) and 'run-extract' (background worker) are not
|
|
66
|
+
// gated by auto_capture so they keep working coherently, but 'capture' is.
|
|
67
|
+
if (verb === 'capture') {
|
|
68
|
+
if (!cfg.auto_capture) { _emit(stdout, { captured: false, reason: 'disabled' }); return 0; }
|
|
69
|
+
const payload = await _payload(list);
|
|
70
|
+
const sid = payload.session_id || args.getFlag(list, '--session') || '';
|
|
71
|
+
if (!sid) { _emit(stdout, { captured: false, reason: 'no-session' }); return 0; }
|
|
72
|
+
const gate = ledger.tryRecordCapture(sid, { maxPerHour: cfg.max_per_hour, maxStreak: cfg.max_in_a_row });
|
|
73
|
+
if (!gate.allowed) { _emit(stdout, { captured: false, reason: gate.reason }); return 0; }
|
|
74
|
+
_spawnWorker(cwd, sid);
|
|
75
|
+
_emit(stdout, { captured: true, spawned: true });
|
|
76
|
+
return 0;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (verb === 'reset') {
|
|
80
|
+
const payload = await _payload(list);
|
|
81
|
+
const sid = payload.session_id || args.getFlag(list, '--session') || '';
|
|
82
|
+
if (sid) ledger.resetStreak(sid);
|
|
83
|
+
return 0;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (verb === 'run-extract') {
|
|
87
|
+
const sid = args.getFlag(list, '--session') || '';
|
|
88
|
+
try {
|
|
89
|
+
const result = extract.runExtract({ cwd, sid, config: cfg });
|
|
90
|
+
_emit(stdout, result);
|
|
91
|
+
} catch (err) {
|
|
92
|
+
_emit(stdout, { ran: false, reason: 'error', error: String(err && err.code || err) });
|
|
93
|
+
}
|
|
94
|
+
return 0;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
_emit(stdout, { error: 'unknown-verb', verb: verb || null, verbs: ['capture', 'reset', 'run-extract'] });
|
|
98
|
+
return verb ? 1 : 0;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
module.exports = { run };
|
|
102
|
+
|
|
103
|
+
if (require.main === module) {
|
|
104
|
+
run(process.argv.slice(3)).then((c) => process.exit(c)).catch(() => process.exit(0));
|
|
105
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { test } = require('node:test');
|
|
4
|
+
const assert = require('node:assert');
|
|
5
|
+
const fs = require('node:fs');
|
|
6
|
+
const os = require('node:os');
|
|
7
|
+
const path = require('node:path');
|
|
8
|
+
const { run } = require('./learnings.cjs');
|
|
9
|
+
|
|
10
|
+
function _capture() {
|
|
11
|
+
const out = { text: '' };
|
|
12
|
+
return { stdout: { write: (s) => { out.text += s; return true; } }, out };
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
test('LV-1: capture with no session → no-session, no spawn', async () => {
|
|
16
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-lv-'));
|
|
17
|
+
try {
|
|
18
|
+
const c = _capture();
|
|
19
|
+
c.cwd = dir;
|
|
20
|
+
const code = await run(['capture'], c);
|
|
21
|
+
assert.strictEqual(code, 0);
|
|
22
|
+
assert.match(c.out.text, /no-session/);
|
|
23
|
+
} finally { fs.rmSync(dir, { recursive: true, force: true }); }
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
test('LV-2: capture disabled via config → disabled, no spawn', async () => {
|
|
27
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-lv-'));
|
|
28
|
+
try {
|
|
29
|
+
fs.mkdirSync(path.join(dir, '.nubos-pilot'), { recursive: true });
|
|
30
|
+
fs.writeFileSync(
|
|
31
|
+
path.join(dir, '.nubos-pilot', 'config.json'),
|
|
32
|
+
JSON.stringify({ learnings: { auto_capture: false } }),
|
|
33
|
+
);
|
|
34
|
+
const c = _capture();
|
|
35
|
+
c.cwd = dir;
|
|
36
|
+
const code = await run(['capture', '--payload', JSON.stringify({ session_id: 'abc' })], c);
|
|
37
|
+
assert.strictEqual(code, 0);
|
|
38
|
+
assert.match(c.out.text, /disabled/);
|
|
39
|
+
} finally { fs.rmSync(dir, { recursive: true, force: true }); }
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
test('LV-3: run-extract on a non-repo cwd → ran:false not-a-repo', async () => {
|
|
43
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-lv-'));
|
|
44
|
+
try {
|
|
45
|
+
const c = _capture();
|
|
46
|
+
c.cwd = dir;
|
|
47
|
+
const code = await run(['run-extract', '--session', 'abc'], c);
|
|
48
|
+
assert.strictEqual(code, 0);
|
|
49
|
+
const parsed = JSON.parse(c.out.text);
|
|
50
|
+
assert.strictEqual(parsed.ran, false);
|
|
51
|
+
assert.strictEqual(parsed.reason, 'not-a-repo');
|
|
52
|
+
} finally { fs.rmSync(dir, { recursive: true, force: true }); }
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
test('LV-4: unknown verb → error envelope, exit 1', async () => {
|
|
56
|
+
const c = _capture();
|
|
57
|
+
const code = await run(['bogus'], c);
|
|
58
|
+
assert.strictEqual(code, 1);
|
|
59
|
+
assert.match(c.out.text, /unknown-verb/);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
test('LV-5: reset is a no-op without a session and never throws', async () => {
|
|
63
|
+
const c = _capture();
|
|
64
|
+
const code = await run(['reset'], c);
|
|
65
|
+
assert.strictEqual(code, 0);
|
|
66
|
+
});
|
|
@@ -324,21 +324,27 @@ function _runPostCritics(taskId, list, cwd) {
|
|
|
324
324
|
? nubosloop.coerceMaxRounds(override)
|
|
325
325
|
: opts.maxRounds;
|
|
326
326
|
const auditFindings = nubosloop.auditFindingsFromAudits(prev.tool_use_audit, round, taskId);
|
|
327
|
+
const skillFindings = nubosloop.skillFindingsFromState(prev, round, taskId);
|
|
328
|
+
const combinedAudit = skillFindings.length ? auditFindings.concat(skillFindings) : auditFindings;
|
|
327
329
|
evalResult = nubosloop.evaluateLoop(
|
|
328
330
|
{ round },
|
|
329
331
|
criticOutputs,
|
|
330
|
-
{ maxRounds: effectiveMax, auditFindings },
|
|
332
|
+
{ maxRounds: effectiveMax, auditFindings: combinedAudit },
|
|
331
333
|
);
|
|
332
334
|
const perRound = (prev.findings_per_round && typeof prev.findings_per_round === 'object')
|
|
333
335
|
? safeAssign({}, prev.findings_per_round)
|
|
334
336
|
: {};
|
|
335
337
|
perRound[String(round)] = evalResult.findings;
|
|
336
338
|
const routed = nubosloop.markAuditsRoutedInArray(prev.tool_use_audit, round);
|
|
339
|
+
const skillRoutedRounds = skillFindings.length
|
|
340
|
+
? nubosloop.markSkillFindingsRoutedInArray(prev.skill_routed_rounds, round)
|
|
341
|
+
: (Array.isArray(prev.skill_routed_rounds) ? prev.skill_routed_rounds : []);
|
|
337
342
|
const partial = {
|
|
338
343
|
last_phase: 'post-critics',
|
|
339
344
|
last_action: evalResult.next_action,
|
|
340
345
|
findings: evalResult.findings,
|
|
341
346
|
findings_per_round: perRound,
|
|
347
|
+
skill_routed_rounds: skillRoutedRounds,
|
|
342
348
|
tool_use_audit: routed.audits,
|
|
343
349
|
};
|
|
344
350
|
if (force) partial.forced_post_critics = true;
|
|
@@ -18,6 +18,7 @@ const {
|
|
|
18
18
|
moduleDocPath,
|
|
19
19
|
indexDocPath,
|
|
20
20
|
} = require('../../lib/codebase-docs.cjs');
|
|
21
|
+
const { buildModuleGraph } = require('../../lib/codebase-graph.cjs');
|
|
21
22
|
|
|
22
23
|
function _parseArgs(args) {
|
|
23
24
|
const flags = {
|
|
@@ -73,6 +74,16 @@ function _emitPlan(projectRoot, flags, stdout) {
|
|
|
73
74
|
projectRoot,
|
|
74
75
|
path.join(projectRoot, '.nubos-pilot', 'codebase', '.hashes.json'),
|
|
75
76
|
),
|
|
77
|
+
graph_path: path.relative(
|
|
78
|
+
projectRoot,
|
|
79
|
+
path.join(projectRoot, '.nubos-pilot', 'codebase', '.graph.json'),
|
|
80
|
+
),
|
|
81
|
+
graph: {
|
|
82
|
+
module_count: modulesResult.graph.module_count,
|
|
83
|
+
edge_count: modulesResult.graph.edge_count,
|
|
84
|
+
cycle_count: modulesResult.graph.cycles.length,
|
|
85
|
+
unresolved_internal_deps: modulesResult.graph.metrics.unresolved_internal_deps,
|
|
86
|
+
},
|
|
76
87
|
}, null, 2));
|
|
77
88
|
}
|
|
78
89
|
|
|
@@ -103,6 +114,15 @@ function _scanAndBuild(projectRoot, flags) {
|
|
|
103
114
|
fs.mkdirSync(path.dirname(indexMapPath), { recursive: true });
|
|
104
115
|
atomicWriteFileSync(indexMapPath, JSON.stringify(docIndex, null, 2) + '\n');
|
|
105
116
|
|
|
117
|
+
const graph = buildModuleGraph(modules.map((m) => m.facts));
|
|
118
|
+
const graphPath = path.join(
|
|
119
|
+
projectRoot,
|
|
120
|
+
'.nubos-pilot',
|
|
121
|
+
'codebase',
|
|
122
|
+
'.graph.json',
|
|
123
|
+
);
|
|
124
|
+
atomicWriteFileSync(graphPath, JSON.stringify(graph, null, 2) + '\n');
|
|
125
|
+
|
|
106
126
|
const indexPath = indexDocPath(projectRoot);
|
|
107
127
|
fs.mkdirSync(path.dirname(indexPath), { recursive: true });
|
|
108
128
|
atomicWriteFileSync(indexPath, buildIndexDoc(modules, { project_name: flags.projectName || null }));
|
|
@@ -115,7 +135,7 @@ function _scanAndBuild(projectRoot, flags) {
|
|
|
115
135
|
atomicWriteFileSync(docPath, renderModuleDoc(mod.facts, null, hashLookup));
|
|
116
136
|
}
|
|
117
137
|
|
|
118
|
-
return { scan: scanResult, modules, manifest, hashLookup };
|
|
138
|
+
return { scan: scanResult, modules, manifest, hashLookup, graph };
|
|
119
139
|
}
|
|
120
140
|
|
|
121
141
|
function _applyProse(projectRoot, flags, stdout) {
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const nubosloop = require('../../lib/nubosloop.cjs');
|
|
4
|
+
const checkpoint = require('../../lib/checkpoint.cjs');
|
|
5
|
+
const { TASK_ID_RE } = require('../../lib/ids.cjs');
|
|
6
|
+
const args = require('./_args.cjs');
|
|
7
|
+
const { NubosPilotError } = require('../../lib/core.cjs');
|
|
8
|
+
|
|
9
|
+
function _usage() {
|
|
10
|
+
return [
|
|
11
|
+
'Usage:',
|
|
12
|
+
' np-tools.cjs skill-audit expect --task <id> --skills <a,b,c> (orchestrator: record injected skills)',
|
|
13
|
+
' np-tools.cjs skill-audit ack --task <id> --skill <name> (executor: stamp a consulted skill)',
|
|
14
|
+
' np-tools.cjs skill-audit findings --task <id> [--round <n>] (read-only: list unmet skill bars)',
|
|
15
|
+
'',
|
|
16
|
+
'Mechanical counterpart to the Rule-9 search audit: a skill injected as a task\'s',
|
|
17
|
+
'quality bar that the executor never consulted becomes a `skill-bar-unconsulted`',
|
|
18
|
+
'finding at post-critics, routing the task back to the executor (once per round).',
|
|
19
|
+
].join('\n');
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function _assertTask(taskId) {
|
|
23
|
+
args.assertMatch(taskId, TASK_ID_RE, 'skill-audit-invalid-task-id', 'taskId');
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function run(argv, ctx) {
|
|
27
|
+
const context = ctx || {};
|
|
28
|
+
const cwd = context.cwd || process.cwd();
|
|
29
|
+
const stdout = context.stdout || process.stdout;
|
|
30
|
+
const stderr = context.stderr || process.stderr;
|
|
31
|
+
const list = Array.isArray(argv) ? argv : [];
|
|
32
|
+
const verb = list[0];
|
|
33
|
+
const tail = list.slice(1);
|
|
34
|
+
|
|
35
|
+
if (!verb || verb === '-h' || verb === '--help') { stdout.write(_usage() + '\n'); return 0; }
|
|
36
|
+
|
|
37
|
+
try {
|
|
38
|
+
if (verb === 'expect') {
|
|
39
|
+
const taskId = args.getFlag(tail, '--task');
|
|
40
|
+
_assertTask(taskId);
|
|
41
|
+
const raw = args.getFlag(tail, '--skills') || '';
|
|
42
|
+
const skills = String(raw).split(',').map((s) => s.trim()).filter(Boolean);
|
|
43
|
+
const res = nubosloop.recordExpectedSkills(taskId, skills, cwd);
|
|
44
|
+
stdout.write(JSON.stringify(res) + '\n');
|
|
45
|
+
return 0;
|
|
46
|
+
}
|
|
47
|
+
if (verb === 'ack') {
|
|
48
|
+
const taskId = args.getFlag(tail, '--task');
|
|
49
|
+
_assertTask(taskId);
|
|
50
|
+
const skill = args.getFlag(tail, '--skill');
|
|
51
|
+
if (!skill) throw new NubosPilotError('skill-audit-missing-skill', 'ack requires --skill <name>', {});
|
|
52
|
+
const res = nubosloop.recordSkillEvidence(taskId, skill, cwd);
|
|
53
|
+
stdout.write(JSON.stringify(res) + '\n');
|
|
54
|
+
return 0;
|
|
55
|
+
}
|
|
56
|
+
if (verb === 'findings') {
|
|
57
|
+
const taskId = args.getFlag(tail, '--task');
|
|
58
|
+
_assertTask(taskId);
|
|
59
|
+
const cp = checkpoint.readCheckpoint(taskId, cwd) || {};
|
|
60
|
+
const prev = cp.nubosloop || {};
|
|
61
|
+
const roundArg = args.getFlag(tail, '--round');
|
|
62
|
+
const round = roundArg != null ? Number(roundArg) : (Number(prev.round) || 1);
|
|
63
|
+
const findings = nubosloop.skillFindingsFromState(prev, round, taskId);
|
|
64
|
+
stdout.write(JSON.stringify({ task_id: taskId, round, findings }) + '\n');
|
|
65
|
+
return 0;
|
|
66
|
+
}
|
|
67
|
+
stderr.write(JSON.stringify({ code: 'skill-audit-unknown-verb', message: 'Unknown verb: ' + verb, details: { verb, verbs: ['expect', 'ack', 'findings'] } }) + '\n');
|
|
68
|
+
return 1;
|
|
69
|
+
} catch (err) {
|
|
70
|
+
args.emitErrorEnvelope(err, stderr, 'skill-audit-internal-error');
|
|
71
|
+
return 1;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
module.exports = { run };
|
|
76
|
+
|
|
77
|
+
if (require.main === module) {
|
|
78
|
+
process.exit(run(process.argv.slice(3)));
|
|
79
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { test } = require('node:test');
|
|
4
|
+
const assert = require('node:assert');
|
|
5
|
+
const fs = require('node:fs');
|
|
6
|
+
const os = require('node:os');
|
|
7
|
+
const path = require('node:path');
|
|
8
|
+
const { run } = require('./skill-audit.cjs');
|
|
9
|
+
const checkpoint = require('../../lib/checkpoint.cjs');
|
|
10
|
+
|
|
11
|
+
function _mkRoot() {
|
|
12
|
+
const r = fs.mkdtempSync(path.join(os.tmpdir(), 'np-skill-cli-'));
|
|
13
|
+
fs.mkdirSync(path.join(r, '.nubos-pilot', 'checkpoints'), { recursive: true });
|
|
14
|
+
fs.writeFileSync(
|
|
15
|
+
path.join(r, '.nubos-pilot', 'STATE.md'),
|
|
16
|
+
'---\nschema_version: 2\ncurrent_phase: null\ncurrent_plan: null\ncurrent_task: null\n---\n',
|
|
17
|
+
'utf-8',
|
|
18
|
+
);
|
|
19
|
+
return r;
|
|
20
|
+
}
|
|
21
|
+
function _cap(cwd) {
|
|
22
|
+
const out = { text: '' }; const err = { text: '' };
|
|
23
|
+
return { cwd, stdout: { write: (s) => { out.text += s; return true; } }, stderr: { write: (s) => { err.text += s; return true; } }, out, err };
|
|
24
|
+
}
|
|
25
|
+
const TID = 'M001-S001-T0001';
|
|
26
|
+
|
|
27
|
+
test('SC-1: expect then findings reports the unacked skill', () => {
|
|
28
|
+
const r = _mkRoot();
|
|
29
|
+
try {
|
|
30
|
+
checkpoint.startTask({ id: TID }, r);
|
|
31
|
+
assert.equal(run(['expect', '--task', TID, '--skills', 'np-api-design,np-encryption'], _cap(r)), 0);
|
|
32
|
+
const c = _cap(r);
|
|
33
|
+
assert.equal(run(['findings', '--task', TID], c), 0);
|
|
34
|
+
const parsed = JSON.parse(c.out.text);
|
|
35
|
+
assert.equal(parsed.findings.length, 1);
|
|
36
|
+
assert.deepEqual(parsed.findings[0].raw.missing_skills.sort(), ['np-api-design', 'np-encryption']);
|
|
37
|
+
} finally { fs.rmSync(r, { recursive: true, force: true }); }
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
test('SC-2: ack clears the finding for that skill', () => {
|
|
41
|
+
const r = _mkRoot();
|
|
42
|
+
try {
|
|
43
|
+
checkpoint.startTask({ id: TID }, r);
|
|
44
|
+
run(['expect', '--task', TID, '--skills', 'np-api-design'], _cap(r));
|
|
45
|
+
run(['ack', '--task', TID, '--skill', 'np-api-design'], _cap(r));
|
|
46
|
+
const c = _cap(r);
|
|
47
|
+
run(['findings', '--task', TID], c);
|
|
48
|
+
assert.equal(JSON.parse(c.out.text).findings.length, 0);
|
|
49
|
+
} finally { fs.rmSync(r, { recursive: true, force: true }); }
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
test('SC-3: invalid task id → error envelope exit 1', () => {
|
|
53
|
+
const c = _cap(process.cwd());
|
|
54
|
+
assert.equal(run(['ack', '--task', 'bogus', '--skill', 'x'], c), 1);
|
|
55
|
+
assert.match(c.err.text, /skill-audit-invalid-task-id/);
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
test('SC-4: ack without --skill → error envelope exit 1', () => {
|
|
59
|
+
const r = _mkRoot();
|
|
60
|
+
try {
|
|
61
|
+
checkpoint.startTask({ id: TID }, r);
|
|
62
|
+
const c = _cap(r);
|
|
63
|
+
assert.equal(run(['ack', '--task', TID], c), 1);
|
|
64
|
+
assert.match(c.err.text, /skill-audit-missing-skill/);
|
|
65
|
+
} finally { fs.rmSync(r, { recursive: true, force: true }); }
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
test('SC-5: unknown verb → exit 1; --help → exit 0', () => {
|
|
69
|
+
const c1 = _cap(process.cwd());
|
|
70
|
+
assert.equal(run(['bogus'], c1), 1);
|
|
71
|
+
assert.match(c1.err.text, /skill-audit-unknown-verb/);
|
|
72
|
+
const c2 = _cap(process.cwd());
|
|
73
|
+
assert.equal(run(['--help'], c2), 0);
|
|
74
|
+
assert.match(c2.out.text, /skill-audit/);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
test('SC-6: expect with empty skills is a no-op (no findings)', () => {
|
|
78
|
+
const r = _mkRoot();
|
|
79
|
+
try {
|
|
80
|
+
checkpoint.startTask({ id: TID }, r);
|
|
81
|
+
assert.equal(run(['expect', '--task', TID, '--skills', ''], _cap(r)), 0);
|
|
82
|
+
const c = _cap(r);
|
|
83
|
+
run(['findings', '--task', TID], c);
|
|
84
|
+
assert.equal(JSON.parse(c.out.text).findings.length, 0);
|
|
85
|
+
} finally { fs.rmSync(r, { recursive: true, force: true }); }
|
|
86
|
+
});
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { summarize, describe } = require('../../lib/eval-reliability.cjs');
|
|
4
|
+
const { emitErrorEnvelope } = require('./_args.cjs');
|
|
5
|
+
|
|
6
|
+
function _usage() {
|
|
7
|
+
return [
|
|
8
|
+
'Usage:',
|
|
9
|
+
' np-tools.cjs verify-reliability --codes <c1,c2,...>',
|
|
10
|
+
'',
|
|
11
|
+
'pass@k reliability: the orchestrator runs a task\'s <verify> command k times',
|
|
12
|
+
'and passes the collected exit codes (0 = pass). Emits a JSON summary whose',
|
|
13
|
+
'`aggregate_exit_code` is 0 only when every run passed (pass^k) — feed it to',
|
|
14
|
+
'`loop-run-round --phase post-executor --verify-exit-code`. A flaky task',
|
|
15
|
+
'aggregates to red and flows through the normal build-fixer path.',
|
|
16
|
+
].join('\n');
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function run(argv, ctx) {
|
|
20
|
+
const context = ctx || {};
|
|
21
|
+
const stdout = context.stdout || process.stdout;
|
|
22
|
+
const stderr = context.stderr || process.stderr;
|
|
23
|
+
const args = Array.isArray(argv) ? argv.slice() : [];
|
|
24
|
+
|
|
25
|
+
let codesRaw = null;
|
|
26
|
+
for (let i = 0; i < args.length; i++) {
|
|
27
|
+
const a = args[i];
|
|
28
|
+
if (a === '-h' || a === '--help') { stdout.write(_usage() + '\n'); return 0; }
|
|
29
|
+
else if (a === '--codes') { codesRaw = args[++i] || ''; }
|
|
30
|
+
else if (a.startsWith('--codes=')) { codesRaw = a.slice('--codes='.length); }
|
|
31
|
+
else {
|
|
32
|
+
stderr.write(JSON.stringify({
|
|
33
|
+
code: 'verify-reliability-unknown-arg',
|
|
34
|
+
message: 'Unknown argument: ' + a,
|
|
35
|
+
details: { arg: a },
|
|
36
|
+
}) + '\n');
|
|
37
|
+
return 1;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (codesRaw == null) {
|
|
42
|
+
stderr.write(JSON.stringify({
|
|
43
|
+
code: 'verify-reliability-missing-codes',
|
|
44
|
+
message: '--codes <c1,c2,...> is required',
|
|
45
|
+
details: {},
|
|
46
|
+
}) + '\n');
|
|
47
|
+
return 1;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
try {
|
|
51
|
+
const codes = String(codesRaw).split(',').map((s) => s.trim()).filter((s) => s !== '').map(Number);
|
|
52
|
+
const summary = summarize(codes);
|
|
53
|
+
stdout.write(JSON.stringify(Object.assign({}, summary, { description: describe(summary) })) + '\n');
|
|
54
|
+
return 0;
|
|
55
|
+
} catch (err) {
|
|
56
|
+
emitErrorEnvelope(err, stderr, 'verify-reliability-internal-error');
|
|
57
|
+
return 1;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
module.exports = { run };
|
|
62
|
+
|
|
63
|
+
if (require.main === module) {
|
|
64
|
+
process.exit(run(process.argv.slice(2)));
|
|
65
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { test } = require('node:test');
|
|
4
|
+
const assert = require('node:assert');
|
|
5
|
+
const { run } = require('./verify-reliability.cjs');
|
|
6
|
+
|
|
7
|
+
function _capture() {
|
|
8
|
+
const out = { text: '' };
|
|
9
|
+
const err = { text: '' };
|
|
10
|
+
return {
|
|
11
|
+
stdout: { write: (s) => { out.text += s; return true; } },
|
|
12
|
+
stderr: { write: (s) => { err.text += s; return true; } },
|
|
13
|
+
out, err,
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
test('VR-1: all-pass codes → aggregate 0, reliable-pass', () => {
|
|
18
|
+
const c = _capture();
|
|
19
|
+
const code = run(['--codes', '0,0,0'], c);
|
|
20
|
+
assert.strictEqual(code, 0);
|
|
21
|
+
const r = JSON.parse(c.out.text);
|
|
22
|
+
assert.strictEqual(r.aggregate_exit_code, 0);
|
|
23
|
+
assert.strictEqual(r.verdict, 'reliable-pass');
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
test('VR-2: flaky codes → aggregate 1, flaky verdict + loud description', () => {
|
|
27
|
+
const c = _capture();
|
|
28
|
+
const code = run(['--codes', '0,1,0'], c);
|
|
29
|
+
assert.strictEqual(code, 0);
|
|
30
|
+
const r = JSON.parse(c.out.text);
|
|
31
|
+
assert.strictEqual(r.aggregate_exit_code, 1);
|
|
32
|
+
assert.strictEqual(r.flaky, true);
|
|
33
|
+
assert.match(r.description, /FLAKY/);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
test('VR-3: --codes= form supported', () => {
|
|
37
|
+
const c = _capture();
|
|
38
|
+
const code = run(['--codes=1,1'], c);
|
|
39
|
+
assert.strictEqual(code, 0);
|
|
40
|
+
assert.strictEqual(JSON.parse(c.out.text).verdict, 'reliable-fail');
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
test('VR-4: missing --codes → error envelope, exit 1', () => {
|
|
44
|
+
const c = _capture();
|
|
45
|
+
const code = run([], c);
|
|
46
|
+
assert.strictEqual(code, 1);
|
|
47
|
+
assert.match(c.err.text, /verify-reliability-missing-codes/);
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
test('VR-5: unknown arg → error envelope, exit 1', () => {
|
|
51
|
+
const c = _capture();
|
|
52
|
+
const code = run(['--bogus'], c);
|
|
53
|
+
assert.strictEqual(code, 1);
|
|
54
|
+
assert.match(c.err.text, /verify-reliability-unknown-arg/);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
test('VR-6: empty codes → internal error envelope, exit 1', () => {
|
|
58
|
+
const c = _capture();
|
|
59
|
+
const code = run(['--codes', ''], c);
|
|
60
|
+
assert.strictEqual(code, 1);
|
|
61
|
+
assert.match(c.err.text, /eval-reliability-no-runs/);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
test('VR-7: --help → usage exit 0', () => {
|
|
65
|
+
const c = _capture();
|
|
66
|
+
const code = run(['--help'], c);
|
|
67
|
+
assert.strictEqual(code, 0);
|
|
68
|
+
assert.match(c.out.text, /verify-reliability/);
|
|
69
|
+
});
|
package/lib/agents.test.cjs
CHANGED
|
@@ -247,6 +247,7 @@ const NP_AGENTS = [
|
|
|
247
247
|
{ file: 'np-nyquist-auditor', expected_tier: 'haiku' },
|
|
248
248
|
{ file: 'np-sc-extractor', expected_tier: 'haiku' },
|
|
249
249
|
{ file: 'np-critic', expected_tier: 'sonnet' },
|
|
250
|
+
{ file: 'np-learnings-extractor', expected_tier: 'haiku' },
|
|
250
251
|
];
|
|
251
252
|
|
|
252
253
|
// Audit-surface modules — files in agents/ that carry agent-shaped frontmatter
|
package/lib/checkpoint.cjs
CHANGED
|
@@ -10,8 +10,10 @@ const {
|
|
|
10
10
|
} = require('./core.cjs');
|
|
11
11
|
const { parseState, serializeState } = require('./state.cjs');
|
|
12
12
|
const { TASK_ID_RE } = require('./ids.cjs');
|
|
13
|
+
const { assertValid } = require('./validate.cjs');
|
|
13
14
|
|
|
14
15
|
const CHECKPOINT_SCHEMA_VERSION = 1;
|
|
16
|
+
const STORE_SCHEMA = 'checkpoint.v1';
|
|
15
17
|
|
|
16
18
|
function _assertSafeTaskId(taskId) {
|
|
17
19
|
if (typeof taskId !== 'string' || !TASK_ID_RE.test(taskId)) {
|
|
@@ -78,6 +80,7 @@ function _assertCompatibleSchema(existing, cpPath) {
|
|
|
78
80
|
},
|
|
79
81
|
);
|
|
80
82
|
}
|
|
83
|
+
assertValid(existing, STORE_SCHEMA, 'checkpoint-corrupt', { path: cpPath });
|
|
81
84
|
}
|
|
82
85
|
|
|
83
86
|
function _sliceFromTaskId(taskId) {
|
|
Binary file
|