nubos-pilot 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/np-executor.md +20 -0
- package/agents/np-security-reviewer.md +49 -3
- package/bin/install.js +7 -2
- package/bin/np-tools/_commands.cjs +1 -0
- package/bin/np-tools/security.cjs +177 -0
- package/bin/np-tools/security.test.cjs +82 -0
- package/lib/config-defaults.cjs +23 -0
- package/lib/config-defaults.test.cjs +15 -0
- package/lib/config-schema.cjs +19 -0
- package/lib/config-schema.test.cjs +58 -0
- package/lib/install/claude-hooks.cjs +100 -7
- package/lib/install/claude-hooks.test.cjs +96 -0
- package/lib/security/ledger.cjs +203 -0
- package/lib/security/ledger.test.cjs +139 -0
- package/lib/security/patterns.cjs +119 -0
- package/lib/security/review.cjs +220 -0
- package/lib/security/review.test.cjs +143 -0
- package/lib/security/scan.cjs +180 -0
- package/lib/security/scan.test.cjs +137 -0
- package/np-tools.cjs +1 -0
- package/package.json +1 -1
- package/templates/claude/payload/hooks/np-security-hook.cjs +50 -0
- package/workflows/execute-phase.md +11 -1
package/agents/np-executor.md
CHANGED
|
@@ -49,6 +49,25 @@ The orchestrator provides these in your prompt context. Read every path it hands
|
|
|
49
49
|
| Task summary (write on completion) | You fill this after the commit lands — describes changes, verification, follow-ups. | `.nubos-pilot/milestones/M<NNN>/slices/S<NNN>/tasks/T<NNNN>/T<NNNN>-SUMMARY.md` |
|
|
50
50
|
| Checkpoint file (managed) | Write-through state transitions via `np-tools.cjs checkpoint transition`. Do NOT read/write directly. | `.nubos-pilot/checkpoints/<task-full-id>.json` |
|
|
51
51
|
|
|
52
|
+
## Write against the success_criteria
|
|
53
|
+
|
|
54
|
+
When the orchestrator includes a `<success_criteria>` block in your prompt, those criteria are the
|
|
55
|
+
milestone's **acceptance target** — what "done right" means. Use them as your north star while you
|
|
56
|
+
implement, not just the `verify` command. `verify` proves the code runs; the criteria prove it does
|
|
57
|
+
the *right* thing. Aim for both green.
|
|
58
|
+
|
|
59
|
+
- **Intent, not a build spec (ADR-0019).** Criteria say *what* must be true, never *how* to build it
|
|
60
|
+
(no schema/filename/style is implied). Don't treat a criterion as a licence to add structure the
|
|
61
|
+
task plan didn't ask for.
|
|
62
|
+
- **Stay in scope.** A criterion is **never** a reason to edit a path outside `files_modified`. If
|
|
63
|
+
satisfying it would require touching another file, that is a planner-scope bug — emit the
|
|
64
|
+
`## SCOPE EXPANSION REQUEST` block (step 4a) and hand back; do not expand scope.
|
|
65
|
+
- **Self-check before commit.** Before `commit-task`, re-read your diff against each criterion your
|
|
66
|
+
task contributes to (cross-reference the slice `S<NNN>-UAT.md`). If your in-scope change leaves a
|
|
67
|
+
criterion it should satisfy unmet, fix it within `files_modified` before committing — don't ship a
|
|
68
|
+
known gap for the critic to bounce back.
|
|
69
|
+
- Criteria outside your task's scope are context, not your responsibility — do not chase them.
|
|
70
|
+
|
|
52
71
|
## Codebase Docs Protocol (runtime-agnostic)
|
|
53
72
|
|
|
54
73
|
nubos-pilot maintains a skill-style code documentation layer at
|
|
@@ -131,6 +150,7 @@ into the `task(…)` commit. If `workflow.commit_docs=true`, the
|
|
|
131
150
|
<scope_guardrail>
|
|
132
151
|
**Do:**
|
|
133
152
|
- Edit only files enumerated in `files_modified`.
|
|
153
|
+
- Treat any `<success_criteria>` in your prompt as the acceptance target; self-check your diff against it before commit (see "Write against the success_criteria").
|
|
134
154
|
- Commit via `node np-tools.cjs commit-task <task-id>`.
|
|
135
155
|
- Write checkpoint state transitions via the wrapper.
|
|
136
156
|
- Stay within the task's declared scope even if you spot tangential issues — log them, do not fix them.
|
|
@@ -1,15 +1,19 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: np-security-reviewer
|
|
3
|
-
description: Read-only
|
|
3
|
+
description: Read-only security auditor with two input modes. Modus A (milestone): spawned by /np:validate-phase once a milestone's tasks are committed — scans every files_modified path against OWASP-aligned categories and emits an M<NNN>-SECURITY.md draft with Pass/Risk/Defer per finding. Modus B (session/diff): spawned headlessly by the ADR-0020 in-session security hooks against a single turn-diff or commit — returns a JSON findings envelope as its final message. Detection-only in both modes — never edits source.
|
|
4
4
|
tier: sonnet
|
|
5
5
|
tools: Read, Bash, Grep, Glob
|
|
6
6
|
color: red
|
|
7
7
|
---
|
|
8
8
|
|
|
9
9
|
<role>
|
|
10
|
-
You are the nubos-pilot security reviewer. Post-execution twin of `np-verifier` for the security surface.
|
|
10
|
+
You are the nubos-pilot security reviewer. Post-execution twin of `np-verifier` for the security surface. You run in one of two modes, decided by the prompt.
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
**Modus A — milestone audit (default).** Spawned once a milestone's task commits are in place. You emit a `M<NNN>-SECURITY.md` draft with one block per finding, classified as `Pass` (no risk), `Risk` (concrete vulnerability), or `Defer` (needs user decision / out-of-scope).
|
|
13
|
+
|
|
14
|
+
**Modus B — session/diff (ADR-0020).** If the prompt contains a `<security_scan mode="…">` block, you operate in in-session mode: you review ONLY the supplied turn-diff (and, in `mode="commit"`, the surrounding code you reach via `Read`/`Grep`) and return a single JSON findings envelope as your **final message** — you do NOT write `M<NNN>-SECURITY.md`, do NOT use a milestone number, and do NOT read milestone files. See "## Session/Diff Mode (Modus B)" below for the exact contract.
|
|
15
|
+
|
|
16
|
+
You DO NOT propose patches. You DO NOT edit source. You report — in both modes.
|
|
13
17
|
|
|
14
18
|
**CRITICAL: Mandatory Initial Read**
|
|
15
19
|
If the prompt contains a `<files_to_read>` block, you MUST use the `Read` tool to load every file listed there before performing any other actions. This is your primary context.
|
|
@@ -104,6 +108,48 @@ Milestone Status resolution:
|
|
|
104
108
|
- Else any `Defer` → `deferred`.
|
|
105
109
|
- Else → `clean`.
|
|
106
110
|
|
|
111
|
+
## Session/Diff Mode (Modus B) — ADR-0020
|
|
112
|
+
|
|
113
|
+
Triggered when the prompt contains a `<security_scan mode="stop|commit">` block. This is the in-session
|
|
114
|
+
review spawned by the security hooks. It is independent by construction: you receive only the diff and a
|
|
115
|
+
fresh context — you never graded the code you are reviewing.
|
|
116
|
+
|
|
117
|
+
**Inputs (all inside the `<security_scan>` block):**
|
|
118
|
+
- The list of changed files and the diff under review.
|
|
119
|
+
- `mode="stop"` — review only what the turn changed; start from the diff, do not hunt outside it.
|
|
120
|
+
- `mode="commit"` — a deeper pass: use `Read`/`Grep`/`Glob` to inspect surrounding code (callers,
|
|
121
|
+
sanitizers, related files) before deciding a finding is real, to keep false positives low.
|
|
122
|
+
- An optional project guidance block. It is **additive** — it adds checks on top of the built-in OWASP
|
|
123
|
+
categories and never disables them. `RULES.md`/`CONTEXT.md` (if referenced) still authorize/neutralize
|
|
124
|
+
a finding the same way as Modus A.
|
|
125
|
+
|
|
126
|
+
**Behaviour:**
|
|
127
|
+
- Apply the same OWASP-aligned categories as Modus A.
|
|
128
|
+
- Report ONLY concrete `Risk` findings. Omit `Pass`/no-risk entries entirely.
|
|
129
|
+
- Do NOT write any file. Do NOT edit source. Do NOT spawn agents. Do NOT use a milestone number.
|
|
130
|
+
|
|
131
|
+
**Output contract — your FINAL message MUST be exactly one JSON object, no prose, no code fence:**
|
|
132
|
+
|
|
133
|
+
```json
|
|
134
|
+
{
|
|
135
|
+
"status": "clean | risks-found",
|
|
136
|
+
"findings": [
|
|
137
|
+
{
|
|
138
|
+
"category": "Injection | Auth & Session | Access Control | Crypto | SSRF / Open Redirect | Deserialization | File / Path | Secrets | Logging | Dependencies",
|
|
139
|
+
"severity": "high | medium | low",
|
|
140
|
+
"file": "relative/path.ext",
|
|
141
|
+
"line": 42,
|
|
142
|
+
"title": "short finding title",
|
|
143
|
+
"evidence": "the matched line / why it is exploitable",
|
|
144
|
+
"mitigation_hint": "the real fix (a pointer, not a patch)"
|
|
145
|
+
}
|
|
146
|
+
]
|
|
147
|
+
}
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
If you find nothing, return `{"status":"clean","findings":[]}`. The orchestrator surfaces and fixes these
|
|
151
|
+
findings as a follow-up in the same conversation — it never blocks the write or commit.
|
|
152
|
+
|
|
107
153
|
## Handoff Protocol
|
|
108
154
|
|
|
109
155
|
Before reviewing, check handoffs addressed to `np-security-reviewer`:
|
package/bin/install.js
CHANGED
|
@@ -610,10 +610,15 @@ async function _runInstallLocked(ctx) {
|
|
|
610
610
|
try {
|
|
611
611
|
const claudeHooks = require('../lib/install/claude-hooks.cjs');
|
|
612
612
|
const res = claudeHooks.installClaudeHooks({
|
|
613
|
-
projectRoot, scope: resolvedScope, which: '
|
|
613
|
+
projectRoot, scope: resolvedScope, which: 'all', force: false,
|
|
614
614
|
});
|
|
615
|
+
const secAction = res.results.security
|
|
616
|
+
? Object.values(res.results.security).every((r) => r.action === 'installed') ? 'installed'
|
|
617
|
+
: Object.values(res.results.security).every((r) => r.action === 'updated') ? 'updated' : 'mixed'
|
|
618
|
+
: 'skipped';
|
|
615
619
|
console.error(dim + ' [claude-hooks] statusline: ' + res.results.statusline.action
|
|
616
|
-
+ ', ctx-monitor: ' + res.results.ctxMonitor.action
|
|
620
|
+
+ ', ctx-monitor: ' + res.results.ctxMonitor.action
|
|
621
|
+
+ ', security: ' + secAction + reset);
|
|
617
622
|
if (res.results.statusline.action === 'skipped-existing') {
|
|
618
623
|
console.error(yellow + ' [claude-hooks] foreign statusLine preserved — re-run `install-hooks --force` to overwrite' + reset);
|
|
619
624
|
}
|
|
@@ -96,6 +96,7 @@ const COMMANDS = [
|
|
|
96
96
|
{ name: 'loop-audit-tool-use', category: 'Execution', description: 'Record/read the tool-use audit per spawn (Completeness Rule 9 mechanical check)', description_de: 'Tool-use Audit pro Spawn schreiben/lesen (Completeness Rule 9 mechanische Prüfung)' },
|
|
97
97
|
{ name: 'loop-stuck', category: 'Execution', description: 'Mark a task as stuck (writes loop-state + flips checkpoint status to stuck)', description_de: 'Markiert Task als stuck (schreibt Loop-State + setzt Checkpoint-Status auf stuck)' },
|
|
98
98
|
{ name: 'spawn-headless', category: 'Execution', description: 'Spawn an agent as a headless `claude -p` subprocess (ADR-0010 §L6); writes stdout to --output-path and returns exit code', description_de: 'Spawnt einen Agent als headless `claude -p` Subprozess (ADR-0010 §L6); schreibt stdout nach --output-path und liefert Exit-Code' },
|
|
99
|
+
{ name: 'security', category: 'Review', description: 'In-session security review hook backend (ADR-0020). Verbs: session-start | baseline | scan | review | commit | run-review. Reads the Claude Code hook payload via --stdin; non-blocking, report-once, independent reviewer spawn.', description_de: 'Backend für die In-Session-Security-Review-Hooks (ADR-0020). Verben: session-start | baseline | scan | review | commit | run-review. Liest die Claude-Code-Hook-Payload via --stdin; non-blocking, report-once, unabhängiger Reviewer-Spawn.' },
|
|
99
100
|
{ name: 'loop-metrics', category: 'Utility', description: 'Aggregate Nubosloop telemetry across all checkpoints (commits, stuck, route distribution)', description_de: 'Aggregiert Nubosloop-Telemetrie über alle Checkpoints (Commits, Stuck, Routing)' },
|
|
100
101
|
{ name: 'learning-log', category: 'Execution', description: 'Persist a learning to the local store (or MCP adapter when configured)', description_de: 'Persistiert ein Learning im lokalen Store (oder MCP-Adapter falls konfiguriert)' },
|
|
101
102
|
{ name: 'learning-match', category: 'Utility', description: 'Query the learnings store for cached patterns matching a free-text query', description_de: 'Fragt den Learnings-Store nach Cached-Patterns ab' },
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('node:fs');
|
|
4
|
+
const path = require('node:path');
|
|
5
|
+
const child_process = require('node:child_process');
|
|
6
|
+
|
|
7
|
+
const { tryReadConfigPath } = require('../../lib/config.cjs');
|
|
8
|
+
const scan = require('../../lib/security/scan.cjs');
|
|
9
|
+
const ledger = require('../../lib/security/ledger.cjs');
|
|
10
|
+
const review = require('../../lib/security/review.cjs');
|
|
11
|
+
const args = require('./_args.cjs');
|
|
12
|
+
|
|
13
|
+
const COMMIT_RE = /\bgit\b[\s\S]*\b(commit|push)\b/;
|
|
14
|
+
|
|
15
|
+
function _readStdin() {
|
|
16
|
+
return new Promise((resolve) => {
|
|
17
|
+
if (process.stdin.isTTY) return resolve('');
|
|
18
|
+
let buf = '';
|
|
19
|
+
process.stdin.setEncoding('utf-8');
|
|
20
|
+
const timer = setTimeout(() => { try { process.stdin.removeAllListeners(); } catch {} resolve(buf); }, 800);
|
|
21
|
+
process.stdin.on('data', (c) => { buf += c; });
|
|
22
|
+
process.stdin.on('end', () => { clearTimeout(timer); resolve(buf); });
|
|
23
|
+
process.stdin.on('error', () => { clearTimeout(timer); resolve(buf); });
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function _safeParse(s) { try { return s ? JSON.parse(s) : {}; } catch { return {}; }}
|
|
28
|
+
|
|
29
|
+
async function _payload(argv) {
|
|
30
|
+
const inline = args.getFlag(argv, '--payload', { allowDashValues: true });
|
|
31
|
+
if (inline !== undefined) return _safeParse(inline);
|
|
32
|
+
if (argv.includes('--stdin')) return _safeParse(await _readStdin());
|
|
33
|
+
return {};
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function _cfg(cwd) {
|
|
37
|
+
return {
|
|
38
|
+
enabled: tryReadConfigPath(cwd, 'security.enabled', true) !== false,
|
|
39
|
+
scan_on_write: tryReadConfigPath(cwd, 'security.scan_on_write', true) !== false,
|
|
40
|
+
review_on_stop: tryReadConfigPath(cwd, 'security.review_on_stop', true) !== false,
|
|
41
|
+
review_on_commit: tryReadConfigPath(cwd, 'security.review_on_commit', true) !== false,
|
|
42
|
+
custom_rules_path: tryReadConfigPath(cwd, 'security.custom_rules_path', null),
|
|
43
|
+
guidance_path: tryReadConfigPath(cwd, 'security.guidance_path', null),
|
|
44
|
+
review_timeout_ms: Number(tryReadConfigPath(cwd, 'security.review_timeout_ms', 180000)) || 180000,
|
|
45
|
+
max_stop_reviews_in_a_row: Number(tryReadConfigPath(cwd, 'security.max_stop_reviews_in_a_row', 3)) || 3,
|
|
46
|
+
max_commit_reviews_per_hour: Number(tryReadConfigPath(cwd, 'security.max_commit_reviews_per_hour', 20)) || 20,
|
|
47
|
+
max_files_per_review: Number(tryReadConfigPath(cwd, 'security.max_files_per_review', 30)) || 30,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function _resolveRel(cwd, p) {
|
|
52
|
+
if (!p) return null;
|
|
53
|
+
return path.isAbsolute(p) ? p : path.join(cwd, p);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function _editedContent(toolInput) {
|
|
57
|
+
if (!toolInput || typeof toolInput !== 'object') return '';
|
|
58
|
+
if (typeof toolInput.content === 'string') return toolInput.content;
|
|
59
|
+
if (typeof toolInput.new_string === 'string') return toolInput.new_string;
|
|
60
|
+
if (typeof toolInput.new_source === 'string') return toolInput.new_source;
|
|
61
|
+
if (Array.isArray(toolInput.edits)) {
|
|
62
|
+
return toolInput.edits.map((e) => (e && typeof e.new_string === 'string' ? e.new_string : '')).join('\n');
|
|
63
|
+
}
|
|
64
|
+
return '';
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function _editedPath(cwd, toolInput) {
|
|
68
|
+
if (!toolInput || typeof toolInput !== 'object') return '';
|
|
69
|
+
const raw = toolInput.file_path || toolInput.notebook_path || '';
|
|
70
|
+
if (!raw) return '';
|
|
71
|
+
return path.isAbsolute(raw) ? path.relative(cwd, raw) : raw;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function _spawnWorker(cwd, sid, mode) {
|
|
75
|
+
const npTools = path.join(__dirname, '..', '..', 'np-tools.cjs');
|
|
76
|
+
try {
|
|
77
|
+
const child = child_process.spawn(
|
|
78
|
+
process.execPath,
|
|
79
|
+
[npTools, 'security', 'run-review', '--session', sid, '--mode', mode],
|
|
80
|
+
{ cwd, detached: true, stdio: 'ignore' },
|
|
81
|
+
);
|
|
82
|
+
child.unref();
|
|
83
|
+
return true;
|
|
84
|
+
} catch { return false; }
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function _emit(stdout, obj) { stdout.write(JSON.stringify(obj)); }
|
|
88
|
+
|
|
89
|
+
async function run(argv, ctx) {
|
|
90
|
+
const context = ctx || {};
|
|
91
|
+
const cwd = context.cwd || process.cwd();
|
|
92
|
+
const stdout = context.stdout || process.stdout;
|
|
93
|
+
const list = Array.isArray(argv) ? argv : [];
|
|
94
|
+
const verb = list[0];
|
|
95
|
+
|
|
96
|
+
const cfg = _cfg(cwd);
|
|
97
|
+
if (!cfg.enabled && verb !== 'run-review') return 0;
|
|
98
|
+
|
|
99
|
+
const payload = await _payload(list);
|
|
100
|
+
const sid = payload.session_id || args.getFlag(list, '--session') || '';
|
|
101
|
+
|
|
102
|
+
if (verb === 'session-start') {
|
|
103
|
+
if (sid) { try { ledger.initSession(sid); } catch {} }
|
|
104
|
+
return 0;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (verb === 'baseline') {
|
|
108
|
+
if (sid) {
|
|
109
|
+
try { ledger.setBaseline(sid, { head: review.headSha(cwd) }); } catch {}
|
|
110
|
+
}
|
|
111
|
+
return 0;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if (verb === 'scan') {
|
|
115
|
+
if (!cfg.scan_on_write || !sid) return 0;
|
|
116
|
+
const filePath = _editedPath(cwd, payload.tool_input);
|
|
117
|
+
const content = _editedContent(payload.tool_input);
|
|
118
|
+
if (!filePath || !content) return 0;
|
|
119
|
+
let result;
|
|
120
|
+
try {
|
|
121
|
+
result = scan.scanContent({ filePath, content, customRulesPath: _resolveRel(cwd, cfg.custom_rules_path) });
|
|
122
|
+
} catch { return 0; }
|
|
123
|
+
let fresh;
|
|
124
|
+
try { fresh = ledger.markScanReported(sid, result.findings); } catch { fresh = result.findings; }
|
|
125
|
+
if (!fresh.length) return 0;
|
|
126
|
+
const lines = fresh.map((f) => '- [' + f.category + '] ' + path.basename(f.file) + ':' + f.line + ' — ' + f.reminder);
|
|
127
|
+
_emit(stdout, {
|
|
128
|
+
hookSpecificOutput: {
|
|
129
|
+
hookEventName: 'PostToolUse',
|
|
130
|
+
additionalContext: '[nubos-pilot security] potential issue(s) in just-written code:\n' + lines.join('\n')
|
|
131
|
+
+ '\nConsider addressing before proceeding (non-blocking).',
|
|
132
|
+
},
|
|
133
|
+
});
|
|
134
|
+
return 0;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if (verb === 'review') {
|
|
138
|
+
if (!cfg.review_on_stop || !sid) return 0;
|
|
139
|
+
let harvest = { findings: [] };
|
|
140
|
+
try { harvest = ledger.takeUnsurfacedRisks(sid, { maxStreak: cfg.max_stop_reviews_in_a_row }); } catch {}
|
|
141
|
+
if (harvest.findings && harvest.findings.length) {
|
|
142
|
+
const lines = harvest.findings.map((f) => '- [' + (f.category || 'security') + '] '
|
|
143
|
+
+ (f.file ? path.basename(String(f.file)) + (f.line ? ':' + f.line : '') + ' — ' : '')
|
|
144
|
+
+ (f.title || 'security finding') + (f.mitigation_hint ? ' (' + f.mitigation_hint + ')' : ''));
|
|
145
|
+
_emit(stdout, {
|
|
146
|
+
decision: 'block',
|
|
147
|
+
reason: '[nubos-pilot security] An independent review of this turn\'s changes found '
|
|
148
|
+
+ harvest.findings.length + ' security issue(s). Address them now as a follow-up, then continue:\n'
|
|
149
|
+
+ lines.join('\n'),
|
|
150
|
+
});
|
|
151
|
+
return 0;
|
|
152
|
+
}
|
|
153
|
+
_spawnWorker(cwd, sid, 'stop');
|
|
154
|
+
return 0;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if (verb === 'commit') {
|
|
158
|
+
if (!cfg.review_on_commit || !sid) return 0;
|
|
159
|
+
const cmd = payload.tool_input && typeof payload.tool_input.command === 'string' ? payload.tool_input.command : '';
|
|
160
|
+
if (!cmd || !COMMIT_RE.test(cmd)) return 0;
|
|
161
|
+
let allowed = { allowed: false };
|
|
162
|
+
try { allowed = ledger.tryRecordCommitReview(sid, { maxPerHour: cfg.max_commit_reviews_per_hour }); } catch {}
|
|
163
|
+
if (allowed.allowed) _spawnWorker(cwd, sid, 'commit');
|
|
164
|
+
return 0;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if (verb === 'run-review') {
|
|
168
|
+
if (!cfg.enabled || !sid) return 0;
|
|
169
|
+
const mode = args.getFlag(list, '--mode') === 'commit' ? 'commit' : 'stop';
|
|
170
|
+
try { review.runReview({ cwd, sid, mode, config: { ...cfg, guidance_path: _resolveRel(cwd, cfg.guidance_path) } }); } catch {}
|
|
171
|
+
return 0;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return 0;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
module.exports = { run, COMMIT_RE, _editedContent, _editedPath };
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { test } = require('node:test');
|
|
4
|
+
const assert = require('node:assert/strict');
|
|
5
|
+
const fs = require('node:fs');
|
|
6
|
+
const os = require('node:os');
|
|
7
|
+
const path = require('node:path');
|
|
8
|
+
|
|
9
|
+
const security = require('./security.cjs');
|
|
10
|
+
const ledger = require('../../lib/security/ledger.cjs');
|
|
11
|
+
|
|
12
|
+
let _c = 0;
|
|
13
|
+
function freshSid() { _c += 1; return 'cmd-sec-' + process.pid + '-' + _c; }
|
|
14
|
+
function cleanup(sid) { ledger.removeLedger(sid); try { fs.unlinkSync(ledger.ledgerPath(sid) + '.lock'); } catch {} }
|
|
15
|
+
|
|
16
|
+
function collector() {
|
|
17
|
+
const chunks = [];
|
|
18
|
+
return { stdout: { write: (s) => chunks.push(s) }, text: () => chunks.join('') };
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
async function runVerb(verb, payload, cwd, extra) {
|
|
22
|
+
const c = collector();
|
|
23
|
+
const argv = [verb, '--payload', JSON.stringify(payload), ...(extra || [])];
|
|
24
|
+
await security.run(argv, { cwd: cwd || process.cwd(), stdout: c.stdout });
|
|
25
|
+
return c.text();
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
test('SECCMD-1 scan emits additionalContext on first hit, silent on repeat (report-once)', async () => {
|
|
29
|
+
const sid = freshSid();
|
|
30
|
+
try {
|
|
31
|
+
const payload = { session_id: sid, tool_name: 'Write', tool_input: { file_path: 'x.js', content: 'const r = eval(q)' } };
|
|
32
|
+
const first = await runVerb('scan', payload);
|
|
33
|
+
const second = await runVerb('scan', payload);
|
|
34
|
+
assert.match(first, /hookSpecificOutput/);
|
|
35
|
+
assert.match(first, /nubos-pilot security/);
|
|
36
|
+
assert.equal(second, '');
|
|
37
|
+
} finally { cleanup(sid); }
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
test('SECCMD-2 review harvests unsurfaced risks and emits a non-blocking Stop block decision', async () => {
|
|
41
|
+
const sid = freshSid();
|
|
42
|
+
try {
|
|
43
|
+
ledger.addReviewFindings(sid, [{ file: 'a.js', line: 5, category: 'injection', severity: 'risk', title: 'SQLi', mitigation_hint: 'parameterize' }], 'stop');
|
|
44
|
+
const out = await runVerb('review', { session_id: sid });
|
|
45
|
+
const parsed = JSON.parse(out);
|
|
46
|
+
assert.equal(parsed.decision, 'block');
|
|
47
|
+
assert.match(parsed.reason, /nubos-pilot security/);
|
|
48
|
+
assert.match(parsed.reason, /SQLi/);
|
|
49
|
+
} finally { cleanup(sid); }
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
test('SECCMD-3 commit verb ignores non-git Bash commands', async () => {
|
|
53
|
+
const sid = freshSid();
|
|
54
|
+
try {
|
|
55
|
+
const out = await runVerb('commit', { session_id: sid, tool_name: 'Bash', tool_input: { command: 'ls -la' } });
|
|
56
|
+
assert.equal(out, '');
|
|
57
|
+
assert.equal(ledger.readLedger(sid).commit_review_times.length, 0);
|
|
58
|
+
} finally { cleanup(sid); }
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
test('SECCMD-4 master toggle off makes every hook verb a silent no-op', async () => {
|
|
62
|
+
const root = fs.mkdtempSync(path.join(os.tmpdir(), 'np-sec-proj-'));
|
|
63
|
+
fs.mkdirSync(path.join(root, '.nubos-pilot'), { recursive: true });
|
|
64
|
+
fs.writeFileSync(path.join(root, '.nubos-pilot', 'config.json'), JSON.stringify({ security: { enabled: false } }));
|
|
65
|
+
const sid = freshSid();
|
|
66
|
+
try {
|
|
67
|
+
const scanOut = await runVerb('scan', { session_id: sid, tool_name: 'Write', tool_input: { file_path: 'x.js', content: 'eval(q)' } }, root);
|
|
68
|
+
ledger.addReviewFindings(sid, [{ file: 'a.js', line: 1, category: 'x', severity: 'risk', title: 't' }], 'stop');
|
|
69
|
+
const reviewOut = await runVerb('review', { session_id: sid }, root);
|
|
70
|
+
assert.equal(scanOut, '');
|
|
71
|
+
assert.equal(reviewOut, '');
|
|
72
|
+
} finally { cleanup(sid); fs.rmSync(root, { recursive: true, force: true }); }
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
test('SECCMD-5 session-start and baseline are safe no-throw no-ops without a repo', async () => {
|
|
76
|
+
const sid = freshSid();
|
|
77
|
+
const root = fs.mkdtempSync(path.join(os.tmpdir(), 'np-sec-nr-'));
|
|
78
|
+
try {
|
|
79
|
+
assert.equal(await runVerb('session-start', { session_id: sid }, root), '');
|
|
80
|
+
assert.equal(await runVerb('baseline', { session_id: sid }, root), '');
|
|
81
|
+
} finally { cleanup(sid); fs.rmSync(root, { recursive: true, force: true }); }
|
|
82
|
+
});
|
package/lib/config-defaults.cjs
CHANGED
|
@@ -41,6 +41,23 @@ const DEFAULT_SWARM = Object.freeze({
|
|
|
41
41
|
knowledge_adapter: 'local',
|
|
42
42
|
});
|
|
43
43
|
|
|
44
|
+
const DEFAULT_SECURITY = Object.freeze({
|
|
45
|
+
enabled: true,
|
|
46
|
+
scan_on_write: true,
|
|
47
|
+
review_on_stop: true,
|
|
48
|
+
review_on_commit: true,
|
|
49
|
+
custom_rules_path: null,
|
|
50
|
+
guidance_path: null,
|
|
51
|
+
review_timeout_ms: 180000,
|
|
52
|
+
max_stop_reviews_in_a_row: 3,
|
|
53
|
+
max_commit_reviews_per_hour: 20,
|
|
54
|
+
max_files_per_review: 30,
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
const DEFAULT_CONFORMANCE = Object.freeze({
|
|
58
|
+
inject_criteria: true,
|
|
59
|
+
});
|
|
60
|
+
|
|
44
61
|
const DEFAULT_AUTO_LOG_LEARNING = true;
|
|
45
62
|
|
|
46
63
|
const DEFAULT_SPAWN_HEADLESS = Object.freeze({
|
|
@@ -67,6 +84,8 @@ const DEFAULT_CONFIG_TREE = Object.freeze({
|
|
|
67
84
|
loop: DEFAULT_LOOP,
|
|
68
85
|
swarm: DEFAULT_SWARM,
|
|
69
86
|
spawn: DEFAULT_SPAWN,
|
|
87
|
+
security: DEFAULT_SECURITY,
|
|
88
|
+
conformance: DEFAULT_CONFORMANCE,
|
|
70
89
|
auto_log_learning: DEFAULT_AUTO_LOG_LEARNING,
|
|
71
90
|
});
|
|
72
91
|
|
|
@@ -98,6 +117,8 @@ function buildInstallConfig(answers) {
|
|
|
98
117
|
fallback_on_error: DEFAULT_SPAWN_HEADLESS.fallback_on_error,
|
|
99
118
|
},
|
|
100
119
|
},
|
|
120
|
+
security: { ...DEFAULT_SECURITY },
|
|
121
|
+
conformance: { ...DEFAULT_CONFORMANCE },
|
|
101
122
|
auto_log_learning: DEFAULT_AUTO_LOG_LEARNING,
|
|
102
123
|
};
|
|
103
124
|
}
|
|
@@ -112,6 +133,8 @@ module.exports = {
|
|
|
112
133
|
DEFAULT_SWARM_CRITIC,
|
|
113
134
|
DEFAULT_SPAWN,
|
|
114
135
|
DEFAULT_SPAWN_HEADLESS,
|
|
136
|
+
DEFAULT_SECURITY,
|
|
137
|
+
DEFAULT_CONFORMANCE,
|
|
115
138
|
DEFAULT_AUTO_LOG_LEARNING,
|
|
116
139
|
DEFAULT_MODEL_PROFILE,
|
|
117
140
|
DEFAULT_SCOPE,
|
|
@@ -69,3 +69,18 @@ test('CFD-7: end-to-end — user answers "true" via askUser → commit_artifacts
|
|
|
69
69
|
try { fs.rmSync(root, { recursive: true, force: true }); } catch {}
|
|
70
70
|
}
|
|
71
71
|
});
|
|
72
|
+
|
|
73
|
+
test('CFD-SEC-1: buildInstallConfig writes always-on security defaults', () => {
|
|
74
|
+
const cfg = buildInstallConfig({ runtime: 'claude' });
|
|
75
|
+
assert.equal(cfg.security.enabled, true);
|
|
76
|
+
assert.equal(cfg.security.scan_on_write, true);
|
|
77
|
+
assert.equal(cfg.security.review_on_stop, true);
|
|
78
|
+
assert.equal(cfg.security.review_on_commit, true);
|
|
79
|
+
assert.equal(cfg.security.custom_rules_path, null);
|
|
80
|
+
assert.equal(cfg.security.max_files_per_review, 30);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
test('CFD-CONF-1: buildInstallConfig writes conformance.inject_criteria default', () => {
|
|
84
|
+
const cfg = buildInstallConfig({ runtime: 'claude' });
|
|
85
|
+
assert.equal(cfg.conformance.inject_criteria, true);
|
|
86
|
+
});
|
package/lib/config-schema.cjs
CHANGED
|
@@ -67,6 +67,25 @@ const SCHEMA = Object.freeze({
|
|
|
67
67
|
},
|
|
68
68
|
},
|
|
69
69
|
},
|
|
70
|
+
security: {
|
|
71
|
+
type: 'object', optional: true, shape: {
|
|
72
|
+
enabled: { type: 'boolean', optional: true },
|
|
73
|
+
scan_on_write: { type: 'boolean', optional: true },
|
|
74
|
+
review_on_stop: { type: 'boolean', optional: true },
|
|
75
|
+
review_on_commit: { type: 'boolean', optional: true },
|
|
76
|
+
custom_rules_path: { type: 'any', optional: true }, // string | null
|
|
77
|
+
guidance_path: { type: 'any', optional: true }, // string | null
|
|
78
|
+
review_timeout_ms: { type: 'number', optional: true },
|
|
79
|
+
max_stop_reviews_in_a_row: { type: 'number', optional: true },
|
|
80
|
+
max_commit_reviews_per_hour:{ type: 'number', optional: true },
|
|
81
|
+
max_files_per_review: { type: 'number', optional: true },
|
|
82
|
+
},
|
|
83
|
+
},
|
|
84
|
+
conformance: {
|
|
85
|
+
type: 'object', optional: true, shape: {
|
|
86
|
+
inject_criteria: { type: 'boolean', optional: true },
|
|
87
|
+
},
|
|
88
|
+
},
|
|
70
89
|
});
|
|
71
90
|
|
|
72
91
|
function _typeOf(v) {
|
|
@@ -146,3 +146,61 @@ test('SCHEMA-SYNC-1 every top-level key in DEFAULT_CONFIG_TREE has a SCHEMA entr
|
|
|
146
146
|
'SCHEMA.' + key + ' is neither in DEFAULT_CONFIG_TREE nor SCHEMA_ONLY_KEYS — drift');
|
|
147
147
|
}
|
|
148
148
|
});
|
|
149
|
+
|
|
150
|
+
test('SEC-CFG-1 valid security block produces zero warnings', () => {
|
|
151
|
+
const w = validateConfig({
|
|
152
|
+
security: {
|
|
153
|
+
enabled: true,
|
|
154
|
+
scan_on_write: true,
|
|
155
|
+
review_on_stop: false,
|
|
156
|
+
review_on_commit: true,
|
|
157
|
+
custom_rules_path: '.nubos-pilot/security-rules.json',
|
|
158
|
+
guidance_path: null,
|
|
159
|
+
review_timeout_ms: 120000,
|
|
160
|
+
max_stop_reviews_in_a_row: 3,
|
|
161
|
+
max_commit_reviews_per_hour: 20,
|
|
162
|
+
max_files_per_review: 30,
|
|
163
|
+
},
|
|
164
|
+
});
|
|
165
|
+
assert.deepEqual(w, []);
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
test('SEC-CFG-2 wrong type in security flags is flagged', () => {
|
|
169
|
+
const w = validateConfig({ security: { enabled: 'yes', max_files_per_review: 'lots' } });
|
|
170
|
+
assert.equal(w.length, 2);
|
|
171
|
+
assert.ok(w.every((x) => x.kind === 'invalid-type'));
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
test('SEC-CFG-3 unknown security sub-key is flagged', () => {
|
|
175
|
+
const w = validateConfig({ security: { scan_everywhere: true } });
|
|
176
|
+
assert.equal(w.length, 1);
|
|
177
|
+
assert.equal(w[0].kind, 'unknown-key');
|
|
178
|
+
assert.equal(w[0].path, 'security.scan_everywhere');
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
test('SEC-CFG-4 default security tree validates clean', () => {
|
|
182
|
+
const defaults = require('./config-defaults.cjs');
|
|
183
|
+
assert.deepEqual(validateConfig({ security: defaults.DEFAULT_SECURITY }), []);
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
test('CONF-CFG-1 valid conformance block produces zero warnings', () => {
|
|
187
|
+
assert.deepEqual(validateConfig({ conformance: { inject_criteria: true } }), []);
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
test('CONF-CFG-2 wrong type in conformance.inject_criteria is flagged', () => {
|
|
191
|
+
const w = validateConfig({ conformance: { inject_criteria: 'yes' } });
|
|
192
|
+
assert.equal(w.length, 1);
|
|
193
|
+
assert.equal(w[0].kind, 'invalid-type');
|
|
194
|
+
assert.equal(w[0].path, 'conformance.inject_criteria');
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
test('CONF-CFG-3 unknown conformance sub-key is flagged', () => {
|
|
198
|
+
const w = validateConfig({ conformance: { review_on_executor_stop: true } });
|
|
199
|
+
assert.equal(w.length, 1);
|
|
200
|
+
assert.equal(w[0].kind, 'unknown-key');
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
test('CONF-CFG-4 default conformance tree validates clean', () => {
|
|
204
|
+
const defaults = require('./config-defaults.cjs');
|
|
205
|
+
assert.deepEqual(validateConfig({ conformance: defaults.DEFAULT_CONFORMANCE }), []);
|
|
206
|
+
});
|