nubos-pilot 1.2.2 → 1.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/README.md +16 -0
- package/agents/np-architect.md +2 -0
- package/agents/np-executor.md +1 -1
- package/agents/np-learnings-extractor.md +54 -0
- package/agents/np-planner.md +1 -1
- package/agents/np-security-reviewer.md +9 -0
- package/bin/np-tools/_commands.cjs +4 -0
- package/bin/np-tools/derive-tier.cjs +86 -0
- package/bin/np-tools/derive-tier.test.cjs +83 -0
- package/bin/np-tools/learnings.cjs +109 -0
- package/bin/np-tools/learnings.test.cjs +66 -0
- package/bin/np-tools/loop-run-round.cjs +7 -1
- package/bin/np-tools/security.cjs +3 -0
- package/bin/np-tools/skill-audit.cjs +79 -0
- package/bin/np-tools/skill-audit.test.cjs +86 -0
- package/bin/np-tools/spawn-headless.cjs +35 -1
- package/bin/np-tools/spawn-headless.test.cjs +135 -0
- package/bin/np-tools/verify-reliability.cjs +65 -0
- package/bin/np-tools/verify-reliability.test.cjs +69 -0
- package/lib/agents.test.cjs +1 -0
- package/lib/config-defaults.cjs +13 -0
- package/lib/config-schema.cjs +11 -0
- package/lib/eval-reliability.cjs +63 -0
- package/lib/eval-reliability.test.cjs +56 -0
- package/lib/headless-guard.cjs +127 -0
- package/lib/headless-guard.test.cjs +119 -0
- package/lib/install/claude-hooks-learnings.test.cjs +82 -0
- package/lib/install/claude-hooks.cjs +65 -4
- package/lib/install/claude-hooks.test.cjs +5 -2
- package/lib/learnings/capture-ledger.cjs +80 -0
- package/lib/learnings/capture-ledger.test.cjs +54 -0
- package/lib/learnings/extract.cjs +191 -0
- package/lib/learnings/extract.test.cjs +115 -0
- package/lib/nubosloop-audit.cjs +104 -0
- package/lib/nubosloop-skill-audit.test.cjs +98 -0
- package/lib/nubosloop.cjs +9 -0
- package/lib/tier-classify.cjs +67 -0
- package/lib/tier-classify.test.cjs +67 -0
- package/np-tools.cjs +4 -0
- package/package.json +1 -1
- package/skills/np-access-control/SKILL.md +42 -0
- package/skills/np-accessibility-audit/SKILL.md +41 -0
- package/skills/np-adr/SKILL.md +37 -0
- package/skills/np-api-design/SKILL.md +34 -0
- package/skills/np-caching-strategy/SKILL.md +38 -0
- package/skills/np-data-modeling/SKILL.md +37 -0
- package/skills/np-data-privacy/SKILL.md +39 -0
- package/skills/np-dependency-audit/SKILL.md +47 -0
- package/skills/np-encryption/SKILL.md +47 -0
- package/skills/np-error-handling/SKILL.md +37 -0
- package/skills/np-incident-response/SKILL.md +38 -0
- package/skills/np-llm-app-architecture/SKILL.md +50 -0
- package/skills/np-observability/SKILL.md +39 -0
- package/skills/np-performance/SKILL.md +38 -0
- package/skills/np-queue-design/SKILL.md +32 -0
- package/skills/np-rag-design/SKILL.md +43 -0
- package/skills/np-refactoring/SKILL.md +35 -0
- package/skills/np-resilience-patterns/SKILL.md +39 -0
- package/skills/np-secure-code-review/SKILL.md +46 -0
- package/skills/np-secure-design/SKILL.md +44 -0
- package/skills/np-service-boundary/SKILL.md +35 -0
- package/skills/np-system-design/SKILL.md +40 -0
- package/skills/np-test-strategy/SKILL.md +46 -0
- package/skills/np-threat-model/SKILL.md +42 -0
- package/templates/claude/payload/hooks/np-learnings-hook.cjs +56 -0
- package/templates/claude/payload/hooks/np-security-hook.cjs +1 -0
- package/workflows/architect-phase.md +21 -1
- package/workflows/execute-phase.md +66 -4
- package/workflows/verify-work.md +17 -4
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('node:fs');
|
|
4
|
+
const os = require('node:os');
|
|
5
|
+
const path = require('node:path');
|
|
6
|
+
const crypto = require('node:crypto');
|
|
7
|
+
|
|
8
|
+
const git = require('../git.cjs');
|
|
9
|
+
const knowledgeAdapter = require('../knowledge-adapter.cjs');
|
|
10
|
+
|
|
11
|
+
// Stop-hook learning auto-capture (ECC continuous-learning, np-native). A
|
|
12
|
+
// background worker spawns the read-only np-learnings-extractor headlessly over
|
|
13
|
+
// the turn's diff; it returns atomic {pattern, outcome} candidates which we fold
|
|
14
|
+
// into the existing learnings store via the knowledge adapter — the same store
|
|
15
|
+
// /np:execute-phase already auto-logs into. Mirrors lib/security/review.cjs.
|
|
16
|
+
|
|
17
|
+
const EXTRACTOR_AGENT = 'np-learnings-extractor';
|
|
18
|
+
const MAX_DIFF_BYTES = 64 * 1024;
|
|
19
|
+
const MAX_UNTRACKED_BYTES = 12 * 1024;
|
|
20
|
+
const MAX_CANDIDATES = 5;
|
|
21
|
+
const MAX_PATTERN_LEN = 2000;
|
|
22
|
+
const MAX_OUTCOME_LEN = 2000;
|
|
23
|
+
const VALID_OUTCOMES = new Set(['verified', 'failed', 'reverted', 'partial']);
|
|
24
|
+
|
|
25
|
+
function isRepo(cwd) {
|
|
26
|
+
const r = git.runGit(['rev-parse', '--is-inside-work-tree'], { cwd });
|
|
27
|
+
return r.ok && String(r.stdout).trim() === 'true';
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function _lines(stdout) {
|
|
31
|
+
return String(stdout || '').split(/\r?\n/).filter(Boolean);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// "What changed this session": last commit (git show HEAD) plus any uncommitted
|
|
35
|
+
// working changes and untracked files, each capped. No baseline tracking —
|
|
36
|
+
// learnings are advisory, so a slightly wider window is acceptable.
|
|
37
|
+
function computeTurnDiff(cwd, maxFiles) {
|
|
38
|
+
const cap = Number.isFinite(maxFiles) ? maxFiles : 30;
|
|
39
|
+
const committedNames = git.runGit(['diff-tree', '--no-commit-id', '--name-only', '-r', '--root', 'HEAD'], { cwd });
|
|
40
|
+
const workingNames = git.runGit(['--no-pager', 'diff', '--name-only'], { cwd });
|
|
41
|
+
const untracked = git.runGit(['ls-files', '--others', '--exclude-standard'], { cwd });
|
|
42
|
+
|
|
43
|
+
const files = [...new Set([
|
|
44
|
+
..._lines(committedNames.stdout),
|
|
45
|
+
..._lines(workingNames.stdout),
|
|
46
|
+
..._lines(untracked.stdout),
|
|
47
|
+
])];
|
|
48
|
+
const uniqueFiles = files.slice(0, cap);
|
|
49
|
+
const truncatedFiles = files.length > cap;
|
|
50
|
+
|
|
51
|
+
let diffText = '';
|
|
52
|
+
const show = git.runGit(['--no-pager', 'show', '--no-color', 'HEAD'], { cwd });
|
|
53
|
+
if (show.ok) diffText += String(show.stdout || '').slice(0, MAX_DIFF_BYTES);
|
|
54
|
+
const working = git.runGit(['--no-pager', 'diff', '--no-color'], { cwd });
|
|
55
|
+
if (working.ok && diffText.length < MAX_DIFF_BYTES) {
|
|
56
|
+
diffText += '\n' + String(working.stdout || '').slice(0, MAX_DIFF_BYTES - diffText.length);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
let untrackedBudget = MAX_UNTRACKED_BYTES;
|
|
60
|
+
for (const f of _lines(untracked.stdout)) {
|
|
61
|
+
if (untrackedBudget <= 0) break;
|
|
62
|
+
let body = '';
|
|
63
|
+
try { body = fs.readFileSync(path.join(cwd, f), 'utf-8'); } catch { continue; }
|
|
64
|
+
const chunk = '\n--- new file: ' + f + ' ---\n' + body.slice(0, untrackedBudget);
|
|
65
|
+
diffText += chunk;
|
|
66
|
+
untrackedBudget -= chunk.length;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return { files: uniqueFiles, truncatedFiles, diffText };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function buildExtractorPrompt(opts) {
|
|
73
|
+
const o = opts || {};
|
|
74
|
+
const parts = [];
|
|
75
|
+
parts.push('<learning_capture>');
|
|
76
|
+
parts.push('You are running in learning-capture mode. Read the diff below — the work this session produced — and extract at most ' + MAX_CANDIDATES + ' ATOMIC, REUSABLE engineering learnings.');
|
|
77
|
+
parts.push('');
|
|
78
|
+
parts.push('A good learning is a durable, transferable rule a future agent on a SIMILAR task would benefit from — a convention discovered, a pitfall avoided, a fix that generalises. NOT a narration of what changed, NOT project-specific trivia, NOT anything obvious from reading the code.');
|
|
79
|
+
parts.push('');
|
|
80
|
+
parts.push('Each learning is one {pattern, outcome} pair:');
|
|
81
|
+
parts.push('- pattern: the reusable rule, imperative and self-contained (e.g. "use jose for JWT verification, never hand-roll HS256").');
|
|
82
|
+
parts.push('- outcome: one of verified | failed | reverted | partial — how it played out THIS session.');
|
|
83
|
+
parts.push('');
|
|
84
|
+
parts.push('If nothing meets the bar, return an empty list. Quality over quantity — zero is a valid, common answer.');
|
|
85
|
+
parts.push('');
|
|
86
|
+
parts.push('Changed files (' + o.files.length + (o.truncatedFiles ? '+, truncated' : '') + '):');
|
|
87
|
+
parts.push(o.files.map((f) => '- ' + f).join('\n'));
|
|
88
|
+
parts.push('');
|
|
89
|
+
parts.push('Diff:');
|
|
90
|
+
parts.push('```diff');
|
|
91
|
+
parts.push(o.diffText);
|
|
92
|
+
parts.push('```');
|
|
93
|
+
parts.push('');
|
|
94
|
+
parts.push('Output ONLY a single JSON object (no prose, no markdown fence):');
|
|
95
|
+
parts.push('{"learnings":[{"pattern":"...","outcome":"verified|failed|reverted|partial"}]}');
|
|
96
|
+
parts.push('</learning_capture>');
|
|
97
|
+
return parts.join('\n');
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function _tryParseJson(s) { try { return JSON.parse(s); } catch { return null; } }
|
|
101
|
+
function _stripFence(s) {
|
|
102
|
+
const m = String(s).match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
103
|
+
return m ? m[1] : s;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function parseExtractorOutput(raw) {
|
|
107
|
+
if (!raw || typeof raw !== 'string') return { candidates: [], parse_ok: false };
|
|
108
|
+
let resultText = raw;
|
|
109
|
+
const outer = _tryParseJson(raw);
|
|
110
|
+
if (outer && typeof outer === 'object' && typeof outer.result === 'string') resultText = outer.result;
|
|
111
|
+
|
|
112
|
+
let env = _tryParseJson(resultText);
|
|
113
|
+
if (!env) env = _tryParseJson(_stripFence(resultText));
|
|
114
|
+
if (!env || typeof env !== 'object' || !Array.isArray(env.learnings)) {
|
|
115
|
+
return { candidates: [], parse_ok: false };
|
|
116
|
+
}
|
|
117
|
+
const candidates = env.learnings
|
|
118
|
+
.filter((l) => l && typeof l === 'object' && typeof l.pattern === 'string' && l.pattern.trim())
|
|
119
|
+
.map((l) => ({
|
|
120
|
+
pattern: l.pattern.trim().slice(0, MAX_PATTERN_LEN),
|
|
121
|
+
outcome: VALID_OUTCOMES.has(String(l.outcome)) ? String(l.outcome) : 'verified',
|
|
122
|
+
}))
|
|
123
|
+
.slice(0, MAX_CANDIDATES);
|
|
124
|
+
return { candidates, parse_ok: true };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function _defaultSpawn(promptText, opts) {
|
|
128
|
+
const spawnHeadless = require('../../bin/np-tools/spawn-headless.cjs');
|
|
129
|
+
const tmp = os.tmpdir();
|
|
130
|
+
const tag = process.pid + '-' + crypto.randomBytes(4).toString('hex');
|
|
131
|
+
const promptPath = path.join(tmp, 'np-learn-prompt-' + tag + '.txt');
|
|
132
|
+
const outputPath = path.join(tmp, 'np-learn-out-' + tag + '.json');
|
|
133
|
+
fs.writeFileSync(promptPath, promptText, 'utf-8');
|
|
134
|
+
try {
|
|
135
|
+
spawnHeadless.run(
|
|
136
|
+
['--agent', EXTRACTOR_AGENT, '--prompt-path', promptPath, '--output-path', outputPath,
|
|
137
|
+
'--timeout-ms', String(opts.timeoutMs)],
|
|
138
|
+
{ cwd: opts.cwd, stdout: { write: () => {} } },
|
|
139
|
+
);
|
|
140
|
+
return fs.readFileSync(outputPath, 'utf-8');
|
|
141
|
+
} finally {
|
|
142
|
+
try { fs.unlinkSync(promptPath); } catch {}
|
|
143
|
+
try { fs.unlinkSync(outputPath); } catch {}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function runExtract(opts) {
|
|
148
|
+
const o = opts || {};
|
|
149
|
+
const cwd = o.cwd || process.cwd();
|
|
150
|
+
const config = o.config || {};
|
|
151
|
+
const spawn = typeof o.spawnImpl === 'function' ? o.spawnImpl : _defaultSpawn;
|
|
152
|
+
const logImpl = typeof o.logImpl === 'function'
|
|
153
|
+
? o.logImpl
|
|
154
|
+
: (cand) => knowledgeAdapter.getAdapter(cwd).log({ pattern: cand.pattern, outcome: cand.outcome });
|
|
155
|
+
|
|
156
|
+
if (!isRepo(cwd)) return { ran: false, reason: 'not-a-repo', logged: 0 };
|
|
157
|
+
|
|
158
|
+
const maxFiles = Number.isFinite(config.max_files) ? config.max_files : 30;
|
|
159
|
+
const diff = computeTurnDiff(cwd, maxFiles);
|
|
160
|
+
if (!String(diff.diffText).trim()) {
|
|
161
|
+
return { ran: true, logged: 0, reason: 'empty-diff' };
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const promptText = buildExtractorPrompt(diff);
|
|
165
|
+
let raw = '';
|
|
166
|
+
try {
|
|
167
|
+
raw = spawn(promptText, { cwd, timeoutMs: config.timeout_ms || 120000 });
|
|
168
|
+
} catch {
|
|
169
|
+
return { ran: true, logged: 0, reason: 'spawn-failed' };
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const parsed = parseExtractorOutput(raw);
|
|
173
|
+
if (!parsed.parse_ok) return { ran: true, logged: 0, reason: 'parse-failed' };
|
|
174
|
+
|
|
175
|
+
let logged = 0;
|
|
176
|
+
for (const cand of parsed.candidates) {
|
|
177
|
+
try { logImpl(cand); logged += 1; } catch { /* one bad candidate must not abort the rest */ }
|
|
178
|
+
}
|
|
179
|
+
return { ran: true, logged, candidates: parsed.candidates.length, reason: 'ok' };
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
module.exports = {
|
|
183
|
+
EXTRACTOR_AGENT,
|
|
184
|
+
isRepo,
|
|
185
|
+
computeTurnDiff,
|
|
186
|
+
buildExtractorPrompt,
|
|
187
|
+
parseExtractorOutput,
|
|
188
|
+
runExtract,
|
|
189
|
+
MAX_CANDIDATES,
|
|
190
|
+
VALID_OUTCOMES,
|
|
191
|
+
};
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { test } = require('node:test');
|
|
4
|
+
const assert = require('node:assert');
|
|
5
|
+
const fs = require('node:fs');
|
|
6
|
+
const os = require('node:os');
|
|
7
|
+
const path = require('node:path');
|
|
8
|
+
const cp = require('node:child_process');
|
|
9
|
+
const extract = require('./extract.cjs');
|
|
10
|
+
|
|
11
|
+
function _gitRepo(withCommit) {
|
|
12
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-extract-'));
|
|
13
|
+
const run = (args) => cp.spawnSync('git', args, { cwd: dir, encoding: 'utf-8' });
|
|
14
|
+
run(['init', '-q']);
|
|
15
|
+
run(['config', 'user.email', 'test@example.com']);
|
|
16
|
+
run(['config', 'user.name', 'Test']);
|
|
17
|
+
run(['config', 'commit.gpgsign', 'false']);
|
|
18
|
+
if (withCommit) {
|
|
19
|
+
fs.writeFileSync(path.join(dir, 'a.js'), 'function add(a,b){return a+b;}\n');
|
|
20
|
+
run(['add', '-A']);
|
|
21
|
+
run(['commit', '-q', '-m', 'add helper']);
|
|
22
|
+
}
|
|
23
|
+
return dir;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
test('EX-1: buildExtractorPrompt frames a learning_capture block with diff + files', () => {
|
|
27
|
+
const p = extract.buildExtractorPrompt({ files: ['a.js'], truncatedFiles: false, diffText: '+ code' });
|
|
28
|
+
assert.match(p, /<learning_capture>/);
|
|
29
|
+
assert.match(p, /a\.js/);
|
|
30
|
+
assert.match(p, /```diff/);
|
|
31
|
+
assert.match(p, /"learnings"/);
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
test('EX-2: parseExtractorOutput unwraps {result} envelope', () => {
|
|
35
|
+
const raw = JSON.stringify({ result: JSON.stringify({ learnings: [{ pattern: 'use jose for jwt', outcome: 'verified' }] }) });
|
|
36
|
+
const r = extract.parseExtractorOutput(raw);
|
|
37
|
+
assert.strictEqual(r.parse_ok, true);
|
|
38
|
+
assert.strictEqual(r.candidates.length, 1);
|
|
39
|
+
assert.strictEqual(r.candidates[0].pattern, 'use jose for jwt');
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
test('EX-3: parseExtractorOutput strips a markdown fence', () => {
|
|
43
|
+
const raw = '```json\n{"learnings":[{"pattern":"p","outcome":"failed"}]}\n```';
|
|
44
|
+
const r = extract.parseExtractorOutput(raw);
|
|
45
|
+
assert.strictEqual(r.candidates.length, 1);
|
|
46
|
+
assert.strictEqual(r.candidates[0].outcome, 'failed');
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
test('EX-4: invalid outcome defaults to verified; empty pattern dropped', () => {
|
|
50
|
+
const raw = JSON.stringify({ learnings: [
|
|
51
|
+
{ pattern: 'good', outcome: 'banana' },
|
|
52
|
+
{ pattern: ' ', outcome: 'verified' },
|
|
53
|
+
] });
|
|
54
|
+
const r = extract.parseExtractorOutput(raw);
|
|
55
|
+
assert.strictEqual(r.candidates.length, 1);
|
|
56
|
+
assert.strictEqual(r.candidates[0].outcome, 'verified');
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
test('EX-5: caps candidates at MAX_CANDIDATES', () => {
|
|
60
|
+
const many = Array.from({ length: 9 }, (_, i) => ({ pattern: 'p' + i, outcome: 'verified' }));
|
|
61
|
+
const r = extract.parseExtractorOutput(JSON.stringify({ learnings: many }));
|
|
62
|
+
assert.strictEqual(r.candidates.length, extract.MAX_CANDIDATES);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
test('EX-6: non-JSON output → parse_ok false', () => {
|
|
66
|
+
assert.strictEqual(extract.parseExtractorOutput('totally not json').parse_ok, false);
|
|
67
|
+
assert.strictEqual(extract.parseExtractorOutput('').parse_ok, false);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
test('EX-7: runExtract on a non-repo returns not-a-repo, logs nothing', () => {
|
|
71
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-norepo-'));
|
|
72
|
+
try {
|
|
73
|
+
const logged = [];
|
|
74
|
+
const r = extract.runExtract({ cwd: dir, spawnImpl: () => '{}', logImpl: (c) => logged.push(c) });
|
|
75
|
+
assert.strictEqual(r.ran, false);
|
|
76
|
+
assert.strictEqual(r.reason, 'not-a-repo');
|
|
77
|
+
assert.strictEqual(logged.length, 0);
|
|
78
|
+
} finally { fs.rmSync(dir, { recursive: true, force: true }); }
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
test('EX-8: runExtract on empty repo (no commit, no changes) → empty-diff', () => {
|
|
82
|
+
const dir = _gitRepo(false);
|
|
83
|
+
try {
|
|
84
|
+
const r = extract.runExtract({ cwd: dir, spawnImpl: () => '{}', logImpl: () => {} });
|
|
85
|
+
assert.strictEqual(r.ran, true);
|
|
86
|
+
assert.strictEqual(r.reason, 'empty-diff');
|
|
87
|
+
} finally { fs.rmSync(dir, { recursive: true, force: true }); }
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
test('EX-9: runExtract over a commit logs parsed candidates', () => {
|
|
91
|
+
const dir = _gitRepo(true);
|
|
92
|
+
try {
|
|
93
|
+
const logged = [];
|
|
94
|
+
const r = extract.runExtract({
|
|
95
|
+
cwd: dir,
|
|
96
|
+
spawnImpl: () => JSON.stringify({ result: JSON.stringify({ learnings: [
|
|
97
|
+
{ pattern: 'keep add() pure and total', outcome: 'verified' },
|
|
98
|
+
] }) }),
|
|
99
|
+
logImpl: (c) => logged.push(c),
|
|
100
|
+
});
|
|
101
|
+
assert.strictEqual(r.ran, true);
|
|
102
|
+
assert.strictEqual(r.logged, 1);
|
|
103
|
+
assert.strictEqual(logged[0].pattern, 'keep add() pure and total');
|
|
104
|
+
} finally { fs.rmSync(dir, { recursive: true, force: true }); }
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
test('EX-10: runExtract with unparseable spawn output → parse-failed, no log', () => {
|
|
108
|
+
const dir = _gitRepo(true);
|
|
109
|
+
try {
|
|
110
|
+
const logged = [];
|
|
111
|
+
const r = extract.runExtract({ cwd: dir, spawnImpl: () => 'garbage', logImpl: (c) => logged.push(c) });
|
|
112
|
+
assert.strictEqual(r.reason, 'parse-failed');
|
|
113
|
+
assert.strictEqual(logged.length, 0);
|
|
114
|
+
} finally { fs.rmSync(dir, { recursive: true, force: true }); }
|
|
115
|
+
});
|
package/lib/nubosloop-audit.cjs
CHANGED
|
@@ -88,6 +88,106 @@ function searchEvidenceForRound(taskId, round, cwd) {
|
|
|
88
88
|
return evidence.filter((e) => e && (Number(e.round) || 1) === target);
|
|
89
89
|
}
|
|
90
90
|
|
|
91
|
+
// ── Skill-bar consultation evidence (additive; mirrors search-evidence) ──────
|
|
92
|
+
// The orchestrator records the skills it injected for a task (`recordExpectedSkills`);
|
|
93
|
+
// the executor stamps each skill it actually consulted (`recordSkillEvidence`, via
|
|
94
|
+
// `skill-audit ack`). skillFindingsFromState turns an unmet expectation into a
|
|
95
|
+
// `skill-bar-unconsulted` finding (ROUTE_TABLE → executor), round-stamped and
|
|
96
|
+
// emitted at most once per round via `skill_routed_rounds` — same anti-re-route
|
|
97
|
+
// guarantee as the Rule-9 path. The Rule-9 functions below are left untouched.
|
|
98
|
+
|
|
99
|
+
function _normSkillName(s) {
|
|
100
|
+
const v = String(s || '').trim();
|
|
101
|
+
// A path like `.claude/skills/<skill>/SKILL.md` names the skill by its directory.
|
|
102
|
+
const dir = v.match(/([^/]+)\/SKILL\.md$/i);
|
|
103
|
+
if (dir) return dir[1];
|
|
104
|
+
// Otherwise a bare name (optionally with a stray .md): take the basename.
|
|
105
|
+
return v.replace(/^.*\//, '').replace(/\.md$/i, '');
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function recordSkillEvidence(taskId, skill, cwd) {
|
|
109
|
+
if (!TASK_ID_RE.test(taskId)) return null;
|
|
110
|
+
const name = _normSkillName(skill);
|
|
111
|
+
if (!name) return null;
|
|
112
|
+
let stampedRound = 1;
|
|
113
|
+
checkpoint.mergeCheckpoint(
|
|
114
|
+
taskId,
|
|
115
|
+
(cur) => {
|
|
116
|
+
const prev = (cur && cur.nubosloop) || {};
|
|
117
|
+
stampedRound = Number(prev.round) || 1;
|
|
118
|
+
const evidence = Array.isArray(prev.skill_evidence) ? prev.skill_evidence.slice() : [];
|
|
119
|
+
evidence.push({ round: stampedRound, skill: name, recorded_at: new Date().toISOString() });
|
|
120
|
+
return { nubosloop: safeAssign({}, prev, { skill_evidence: evidence }) };
|
|
121
|
+
},
|
|
122
|
+
cwd,
|
|
123
|
+
);
|
|
124
|
+
return { task_id: taskId, round: stampedRound, skill: name };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function recordExpectedSkills(taskId, skills, cwd) {
|
|
128
|
+
if (!TASK_ID_RE.test(taskId)) return null;
|
|
129
|
+
const names = (Array.isArray(skills) ? skills : []).map(_normSkillName).filter(Boolean);
|
|
130
|
+
if (names.length === 0) return { task_id: taskId, expected: [] };
|
|
131
|
+
let stampedRound = 1;
|
|
132
|
+
checkpoint.mergeCheckpoint(
|
|
133
|
+
taskId,
|
|
134
|
+
(cur) => {
|
|
135
|
+
const prev = (cur && cur.nubosloop) || {};
|
|
136
|
+
stampedRound = Number(prev.round) || 1;
|
|
137
|
+
const expect = Array.isArray(prev.skill_expect) ? prev.skill_expect.slice() : [];
|
|
138
|
+
expect.push({ round: stampedRound, skills: names, recorded_at: new Date().toISOString() });
|
|
139
|
+
return { nubosloop: safeAssign({}, prev, { skill_expect: expect }) };
|
|
140
|
+
},
|
|
141
|
+
cwd,
|
|
142
|
+
);
|
|
143
|
+
return { task_id: taskId, round: stampedRound, expected: names };
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function _collectForRound(listVal, round, key) {
|
|
147
|
+
const out = [];
|
|
148
|
+
if (!Array.isArray(listVal)) return out;
|
|
149
|
+
for (const e of listVal) {
|
|
150
|
+
if (!e || (Number(e.round) || 1) !== round) continue;
|
|
151
|
+
const v = e[key];
|
|
152
|
+
if (Array.isArray(v)) out.push(...v);
|
|
153
|
+
else if (v) out.push(v);
|
|
154
|
+
}
|
|
155
|
+
return out;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Pure: derive skill-bar findings from a checkpoint's nubosloop sub-object.
|
|
159
|
+
function skillFindingsFromState(prevNubosloop, round, taskId) {
|
|
160
|
+
const prev = prevNubosloop || {};
|
|
161
|
+
const t = Number(round);
|
|
162
|
+
if (!Number.isFinite(t) || t < 1) return [];
|
|
163
|
+
const routed = Array.isArray(prev.skill_routed_rounds) ? prev.skill_routed_rounds : [];
|
|
164
|
+
if (routed.includes(t)) return [];
|
|
165
|
+
const expected = Array.from(new Set(_collectForRound(prev.skill_expect, t, 'skills')));
|
|
166
|
+
if (expected.length === 0) return [];
|
|
167
|
+
const acked = new Set(_collectForRound(prev.skill_evidence, t, 'skill'));
|
|
168
|
+
const missing = expected.filter((s) => !acked.has(s));
|
|
169
|
+
if (missing.length === 0) return [];
|
|
170
|
+
return [{
|
|
171
|
+
category: 'skill-bar-unconsulted',
|
|
172
|
+
severity: 'fail',
|
|
173
|
+
file: '-',
|
|
174
|
+
line: null,
|
|
175
|
+
remediation: 'Spawn was given Nubos skills as the quality bar for this task but did not consult '
|
|
176
|
+
+ (missing.length === 1 ? 'it' : 'them') + ': [' + missing.join(', ') + ']. For each, `Read` '
|
|
177
|
+
+ '`.claude/skills/<skill>/SKILL.md`, satisfy its "Verification bar" in the diff, then stamp '
|
|
178
|
+
+ '`node np-tools.cjs skill-audit ack --task ' + taskId + ' --skill <skill>` — before editing.',
|
|
179
|
+
raw: { missing_skills: missing, expected_skills: expected },
|
|
180
|
+
}];
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function markSkillFindingsRoutedInArray(routedRounds, round) {
|
|
184
|
+
const t = Number(round);
|
|
185
|
+
const arr = Array.isArray(routedRounds) ? routedRounds.slice() : [];
|
|
186
|
+
if (!Number.isFinite(t) || t < 1 || arr.includes(t)) return arr;
|
|
187
|
+
arr.push(t);
|
|
188
|
+
return arr;
|
|
189
|
+
}
|
|
190
|
+
|
|
91
191
|
function auditToolUse(taskId, agent, toolUseLog, cwd) {
|
|
92
192
|
if (!TASK_ID_RE.test(taskId)) {
|
|
93
193
|
throw new NubosPilotError(
|
|
@@ -232,6 +332,10 @@ module.exports = {
|
|
|
232
332
|
auditToolUse,
|
|
233
333
|
recordSearchEvidence,
|
|
234
334
|
searchEvidenceForRound,
|
|
335
|
+
recordSkillEvidence,
|
|
336
|
+
recordExpectedSkills,
|
|
337
|
+
skillFindingsFromState,
|
|
338
|
+
markSkillFindingsRoutedInArray,
|
|
235
339
|
readToolUseAudit,
|
|
236
340
|
auditFindingsForRound,
|
|
237
341
|
auditFindingsFromAudits,
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { test } = require('node:test');
|
|
4
|
+
const assert = require('node:assert');
|
|
5
|
+
const fs = require('node:fs');
|
|
6
|
+
const os = require('node:os');
|
|
7
|
+
const path = require('node:path');
|
|
8
|
+
const loop = require('./nubosloop.cjs');
|
|
9
|
+
const checkpoint = require('./checkpoint.cjs');
|
|
10
|
+
|
|
11
|
+
function _mkRoot() {
|
|
12
|
+
const r = fs.mkdtempSync(path.join(os.tmpdir(), 'np-skill-audit-'));
|
|
13
|
+
fs.mkdirSync(path.join(r, '.nubos-pilot', 'checkpoints'), { recursive: true });
|
|
14
|
+
fs.writeFileSync(
|
|
15
|
+
path.join(r, '.nubos-pilot', 'STATE.md'),
|
|
16
|
+
'---\nschema_version: 2\ncurrent_phase: null\ncurrent_plan: null\ncurrent_task: null\n---\n',
|
|
17
|
+
'utf-8',
|
|
18
|
+
);
|
|
19
|
+
return r;
|
|
20
|
+
}
|
|
21
|
+
const TID = 'M001-S001-T0001';
|
|
22
|
+
function _nubosloop(r) { return (checkpoint.readCheckpoint(TID, r) || {}).nubosloop || {}; }
|
|
23
|
+
|
|
24
|
+
test('SA-1: expected-but-unacked skill → skill-bar-unconsulted finding', () => {
|
|
25
|
+
const r = _mkRoot();
|
|
26
|
+
try {
|
|
27
|
+
checkpoint.startTask({ id: TID }, r);
|
|
28
|
+
loop.recordExpectedSkills(TID, ['np-secure-code-review', 'np-api-design'], r);
|
|
29
|
+
loop.recordSkillEvidence(TID, 'np-api-design', r); // only one acked
|
|
30
|
+
const findings = loop.skillFindingsFromState(_nubosloop(r), 1, TID);
|
|
31
|
+
assert.equal(findings.length, 1);
|
|
32
|
+
assert.equal(findings[0].category, 'skill-bar-unconsulted');
|
|
33
|
+
assert.deepEqual(findings[0].raw.missing_skills, ['np-secure-code-review']);
|
|
34
|
+
} finally { fs.rmSync(r, { recursive: true, force: true }); }
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
test('SA-2: all expected skills acked → no finding', () => {
|
|
38
|
+
const r = _mkRoot();
|
|
39
|
+
try {
|
|
40
|
+
checkpoint.startTask({ id: TID }, r);
|
|
41
|
+
loop.recordExpectedSkills(TID, ['np-api-design'], r);
|
|
42
|
+
loop.recordSkillEvidence(TID, 'np-api-design', r);
|
|
43
|
+
assert.equal(loop.skillFindingsFromState(_nubosloop(r), 1, TID).length, 0);
|
|
44
|
+
} finally { fs.rmSync(r, { recursive: true, force: true }); }
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
test('SA-3: no expected skills → no finding (skill block was correctly omitted)', () => {
|
|
48
|
+
const r = _mkRoot();
|
|
49
|
+
try {
|
|
50
|
+
checkpoint.startTask({ id: TID }, r);
|
|
51
|
+
assert.equal(loop.skillFindingsFromState(_nubosloop(r), 1, TID).length, 0);
|
|
52
|
+
} finally { fs.rmSync(r, { recursive: true, force: true }); }
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
test('SA-4: ack tolerates a SKILL.md path, not just a bare name', () => {
|
|
56
|
+
const r = _mkRoot();
|
|
57
|
+
try {
|
|
58
|
+
checkpoint.startTask({ id: TID }, r);
|
|
59
|
+
loop.recordExpectedSkills(TID, ['np-encryption'], r);
|
|
60
|
+
loop.recordSkillEvidence(TID, '.claude/skills/np-encryption/SKILL.md', r);
|
|
61
|
+
assert.equal(loop.skillFindingsFromState(_nubosloop(r), 1, TID).length, 0);
|
|
62
|
+
} finally { fs.rmSync(r, { recursive: true, force: true }); }
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
test('SA-5: routed round is not re-emitted (anti-spurious-loop)', () => {
|
|
66
|
+
const r = _mkRoot();
|
|
67
|
+
try {
|
|
68
|
+
checkpoint.startTask({ id: TID }, r);
|
|
69
|
+
loop.recordExpectedSkills(TID, ['np-secure-code-review'], r);
|
|
70
|
+
assert.equal(loop.skillFindingsFromState(_nubosloop(r), 1, TID).length, 1);
|
|
71
|
+
// simulate the loop marking round 1 routed
|
|
72
|
+
checkpoint.mergeCheckpoint(TID, (cur) => {
|
|
73
|
+
const prev = (cur && cur.nubosloop) || {};
|
|
74
|
+
return { nubosloop: Object.assign({}, prev, { skill_routed_rounds: [1] }) };
|
|
75
|
+
}, r);
|
|
76
|
+
assert.equal(loop.skillFindingsFromState(_nubosloop(r), 1, TID).length, 0);
|
|
77
|
+
} finally { fs.rmSync(r, { recursive: true, force: true }); }
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
test('SA-6: a skill finding routes to executor, never stuck (ROUTE_TABLE wired)', () => {
|
|
81
|
+
const r = _mkRoot();
|
|
82
|
+
try {
|
|
83
|
+
checkpoint.startTask({ id: TID }, r);
|
|
84
|
+
loop.recordExpectedSkills(TID, ['np-secure-code-review'], r);
|
|
85
|
+
const findings = loop.skillFindingsFromState(_nubosloop(r), 1, TID);
|
|
86
|
+
const evalRes = loop.evaluateLoop({ round: 1 }, [], { maxRounds: 3, auditFindings: findings });
|
|
87
|
+
assert.equal(evalRes.next_action, 'executor');
|
|
88
|
+
assert.equal(evalRes.stuck, false);
|
|
89
|
+
// and the merged finding kept its category (not downgraded to unknown→stuck)
|
|
90
|
+
assert.ok(evalRes.findings.some((f) => f.category === 'skill-bar-unconsulted' && f.route === 'executor'));
|
|
91
|
+
} finally { fs.rmSync(r, { recursive: true, force: true }); }
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
test('SA-7: markSkillFindingsRoutedInArray is idempotent', () => {
|
|
95
|
+
assert.deepEqual(loop.markSkillFindingsRoutedInArray([], 1), [1]);
|
|
96
|
+
assert.deepEqual(loop.markSkillFindingsRoutedInArray([1], 1), [1]);
|
|
97
|
+
assert.deepEqual(loop.markSkillFindingsRoutedInArray([1], 2), [1, 2]);
|
|
98
|
+
});
|
package/lib/nubosloop.cjs
CHANGED
|
@@ -24,6 +24,7 @@ const ROUTE_TABLE = {
|
|
|
24
24
|
'lint-violation': 'executor',
|
|
25
25
|
'critic-error': 'stuck',
|
|
26
26
|
'rule-9-violation': 'executor',
|
|
27
|
+
'skill-bar-unconsulted': 'executor',
|
|
27
28
|
'missing-test': 'executor',
|
|
28
29
|
'edge-case-gap': 'executor',
|
|
29
30
|
'weak-assertion': 'executor',
|
|
@@ -311,6 +312,10 @@ const {
|
|
|
311
312
|
auditToolUse,
|
|
312
313
|
recordSearchEvidence,
|
|
313
314
|
searchEvidenceForRound,
|
|
315
|
+
recordSkillEvidence,
|
|
316
|
+
recordExpectedSkills,
|
|
317
|
+
skillFindingsFromState,
|
|
318
|
+
markSkillFindingsRoutedInArray,
|
|
314
319
|
readToolUseAudit,
|
|
315
320
|
auditFindingsForRound,
|
|
316
321
|
auditFindingsFromAudits,
|
|
@@ -502,6 +507,10 @@ module.exports = {
|
|
|
502
507
|
auditToolUse,
|
|
503
508
|
recordSearchEvidence,
|
|
504
509
|
searchEvidenceForRound,
|
|
510
|
+
recordSkillEvidence,
|
|
511
|
+
recordExpectedSkills,
|
|
512
|
+
skillFindingsFromState,
|
|
513
|
+
markSkillFindingsRoutedInArray,
|
|
505
514
|
readToolUseAudit,
|
|
506
515
|
auditFindingsForRound,
|
|
507
516
|
auditFindingsFromAudits,
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { VALID_TIERS } = require('./model-profiles.cjs');
|
|
4
|
+
|
|
5
|
+
// ADR-0013: a tier is a routing/meta property derived from OBSERVABLE task
|
|
6
|
+
// signals (files touched + risk keywords), never invented from implementation
|
|
7
|
+
// detail. classifyTier is advisory — the planner remains the decider; this
|
|
8
|
+
// helper only makes that decision evidence-based. Output is deterministic
|
|
9
|
+
// (no clock, no randomness) so a given task always classifies the same way.
|
|
10
|
+
|
|
11
|
+
const RISK_RE = /\b(auth|authn|authz|authoriz\w*|login|crypto|encrypt\w*|decrypt\w*|password|secret|credential|token|jwt|oauth|saml|session|payment|billing|invoice|permission|role|access[\s-]?control|migrat\w*|schema)\b/i;
|
|
12
|
+
const ARCH_RE = /\b(architect\w*|cross[\s-]?cutting|multi[\s-]?module|redesign|breaking[\s-]?change|public[\s-]?api|contract|interface|protocol|state[\s-]?machine|concurren\w*|distributed|orchestrat\w*)\b/i;
|
|
13
|
+
const TRIVIAL_RE = /\b(typo|comment|rename|docs?|readme|changelog|copy(?:writing)?|wording|spelling|version[\s-]?bump|bump[\s-]?version|lint|format(?:ting)?|whitespace|config[\s-]?value|constant|string[\s-]?literal)\b/i;
|
|
14
|
+
|
|
15
|
+
const SIZE_TO_TIER = Object.freeze({ trivial: 'haiku', standard: 'sonnet', large: 'opus' });
|
|
16
|
+
|
|
17
|
+
const LARGE_FILE_THRESHOLD = 6;
|
|
18
|
+
|
|
19
|
+
function _text(name, desc) {
|
|
20
|
+
return [String(name || ''), String(desc || '')].join(' ');
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* @param {{files_modified?: string[], name?: string, desc?: string}} task
|
|
25
|
+
* @returns {{tier: string, size: string, rationale: string, signals: {file_count: number, risk: boolean, arch: boolean, trivial: boolean}}}
|
|
26
|
+
*/
|
|
27
|
+
function classifyTier(task) {
|
|
28
|
+
const t = task || {};
|
|
29
|
+
const files = Array.isArray(t.files_modified) ? t.files_modified : [];
|
|
30
|
+
const fileCount = files.length;
|
|
31
|
+
const haystack = _text(t.name, t.desc) + ' ' + files.join(' ');
|
|
32
|
+
|
|
33
|
+
const risk = RISK_RE.test(haystack);
|
|
34
|
+
const arch = ARCH_RE.test(haystack);
|
|
35
|
+
const trivial = TRIVIAL_RE.test(haystack);
|
|
36
|
+
|
|
37
|
+
let size;
|
|
38
|
+
let rationale;
|
|
39
|
+
if (risk) {
|
|
40
|
+
size = 'large';
|
|
41
|
+
rationale = 'security/data-sensitive surface (auth, crypto, secrets, or migration) — escalate to the strongest tier';
|
|
42
|
+
} else if (arch || fileCount >= LARGE_FILE_THRESHOLD) {
|
|
43
|
+
size = 'large';
|
|
44
|
+
rationale = arch
|
|
45
|
+
? 'architectural / cross-cutting change — invariants span multiple units'
|
|
46
|
+
: 'broad change touching ' + fileCount + ' files — cross-file invariants likely';
|
|
47
|
+
} else if (fileCount <= 1 && trivial) {
|
|
48
|
+
size = 'trivial';
|
|
49
|
+
rationale = 'single-file mechanical edit (docs/rename/format/config) — narrow, low-risk';
|
|
50
|
+
} else {
|
|
51
|
+
size = 'standard';
|
|
52
|
+
rationale = 'ordinary single-concern implementation';
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return {
|
|
56
|
+
tier: SIZE_TO_TIER[size],
|
|
57
|
+
size,
|
|
58
|
+
rationale,
|
|
59
|
+
signals: { file_count: fileCount, risk, arch, trivial },
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function isValidTier(tier) {
|
|
64
|
+
return VALID_TIERS.includes(tier);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
module.exports = { classifyTier, isValidTier, SIZE_TO_TIER, LARGE_FILE_THRESHOLD };
|