nubos-pilot 1.2.4 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +17 -1
  2. package/README.md +2 -1
  3. package/SECURITY.md +3 -4
  4. package/bin/np-tools/_commands.cjs +1 -0
  5. package/bin/np-tools/learnings.cjs +1 -1
  6. package/bin/np-tools/resolve-model.cjs +55 -1
  7. package/bin/np-tools/resolve-model.test.cjs +139 -0
  8. package/bin/np-tools/security.cjs +1 -1
  9. package/bin/np-tools/spawn-headless.cjs +100 -1
  10. package/bin/np-tools/spawn-headless.test.cjs +108 -58
  11. package/bin/np-tools/spawn-offhost.cjs +93 -0
  12. package/bin/np-tools/spawn-offhost.test.cjs +38 -0
  13. package/lib/agents.cjs +16 -2
  14. package/lib/config-schema.cjs +5 -1
  15. package/lib/learnings/extract.cjs +4 -4
  16. package/lib/learnings/extract.test.cjs +8 -8
  17. package/lib/model-providers.cjs +118 -0
  18. package/lib/model-providers.test.cjs +85 -0
  19. package/lib/runtime/agent-loop.cjs +64 -0
  20. package/lib/runtime/agent-loop.test.cjs +135 -0
  21. package/lib/runtime/dispatch.cjs +174 -0
  22. package/lib/runtime/dispatch.test.cjs +193 -0
  23. package/lib/runtime/preflight.cjs +68 -0
  24. package/lib/runtime/preflight.test.cjs +62 -0
  25. package/lib/runtime/providers/openai-compat.cjs +102 -0
  26. package/lib/runtime/providers/openai-compat.test.cjs +103 -0
  27. package/lib/runtime/tools/index.cjs +415 -0
  28. package/lib/runtime/tools/index.test.cjs +230 -0
  29. package/lib/security/review.cjs +4 -4
  30. package/lib/security/review.test.cjs +6 -6
  31. package/np-tools.cjs +1 -0
  32. package/package.json +1 -1
  33. package/workflows/add-tests.md +41 -0
  34. package/workflows/architect-phase.md +19 -0
  35. package/workflows/discuss-phase.md +29 -10
  36. package/workflows/execute-phase.md +93 -4
  37. package/workflows/plan-phase.md +57 -16
  38. package/workflows/research-phase.md +45 -0
  39. package/workflows/scan-codebase.md +21 -3
  40. package/workflows/validate-phase.md +30 -13
  41. package/workflows/verify-work.md +17 -0
@@ -0,0 +1,415 @@
1
+ 'use strict';
2
+
3
+ const fs = require('node:fs');
4
+ const path = require('node:path');
5
+ const { spawnSync } = require('node:child_process');
6
+ const { NubosPilotError } = require('../../core.cjs');
7
+ const { assertInsideBase } = require('../../safe-path.cjs');
8
+ const { scanContent, _looksCatastrophic } = require('../../security/scan.cjs');
9
+ const { search: knowledgeSearch } = require('../../knowledge.cjs');
10
+ const { recordSearchEvidence } = require('../../nubosloop-audit.cjs');
11
+
12
+ const MAX_FILE_BYTES = 1024 * 1024;
13
+ const MAX_READ_LINES = 2000;
14
+ const MAX_GREP_MATCHES = 200;
15
+ const MAX_GLOB_RESULTS = 500;
16
+ const MAX_WALK_ENTRIES = 20000;
17
+ const IGNORE_DIRS = new Set(['node_modules', '.git', 'coverage', '.next', 'dist', 'build', 'vendor']);
18
+
19
+ const DEFAULT_BASH_TIMEOUT_MS = 120000;
20
+ const MAX_BASH_OUTPUT = 30000;
21
+
22
+ const BASH_DENYLIST = [
23
+ { re: /\brm\s+(-[a-z]*\s+)*-[a-z]*[rf][a-z]*\s+(-[a-z]+\s+)*(\/|~|\/\*|\$HOME)(\s|$)/, why: 'recursive delete of / or $HOME' },
24
+ { re: /:\(\)\s*\{\s*:\s*\|\s*:\s*&\s*\}\s*;\s*:/, why: 'fork bomb' },
25
+ { re: /\bmkfs(\.\w+)?\b/, why: 'filesystem format' },
26
+ { re: /\bdd\b[^\n]*\bof=\/dev\//, why: 'raw write to a device' },
27
+ { re: />\s*\/dev\/(sd|nvme|disk)/, why: 'redirect into a block device' },
28
+ { re: /\b(curl|wget)\b[^\n|]*\|\s*(sudo\s+)?(sh|bash|zsh)\b/, why: 'pipe-from-network into a shell' },
29
+ { re: /\bsudo\b/, why: 'privilege escalation' },
30
+ { re: /\bchmod\s+(-R\s+)?0?777\s+\//, why: 'world-writable on /' },
31
+ { re: /\bgit\b[^\n]*\bpush\b/, why: 'git push (publishing is out of scope for an executor)' },
32
+ ];
33
+
34
+ function _looksBinary(buf) {
35
+ const n = Math.min(buf.length, 8000);
36
+ for (let i = 0; i < n; i++) if (buf[i] === 0) return true;
37
+ return false;
38
+ }
39
+
40
+ function _walk(root, onFile) {
41
+ let count = 0;
42
+ const stack = [root];
43
+ while (stack.length) {
44
+ const dir = stack.pop();
45
+ let entries;
46
+ try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { continue; }
47
+ for (const ent of entries) {
48
+ if (++count > MAX_WALK_ENTRIES) return;
49
+ const full = path.join(dir, ent.name);
50
+ if (ent.isDirectory()) {
51
+ if (!IGNORE_DIRS.has(ent.name) && !ent.name.startsWith('.')) stack.push(full);
52
+ } else if (ent.isFile()) {
53
+ onFile(full);
54
+ }
55
+ }
56
+ }
57
+ }
58
+
59
+ function _globToRegex(glob) {
60
+ let re = '';
61
+ for (let i = 0; i < glob.length; i++) {
62
+ const c = glob[i];
63
+ if (c === '*') {
64
+ if (glob[i + 1] === '*') { re += '.*'; i++; if (glob[i + 1] === '/') i++; }
65
+ else re += '[^/]*';
66
+ } else if (c === '?') {
67
+ re += '[^/]';
68
+ } else {
69
+ re += c.replace(/[.+^${}()|[\]\\]/g, '\\$&');
70
+ }
71
+ }
72
+ return new RegExp('^' + re + '$');
73
+ }
74
+
75
+ function _read(args, ctx) {
76
+ const rel = args && args.path;
77
+ if (typeof rel !== 'string' || !rel) throw new NubosPilotError('tool-bad-args', 'Read requires a "path"', {});
78
+ const abs = assertInsideBase(ctx.cwd, rel, 'Read path');
79
+ const stat = fs.statSync(abs);
80
+ if (stat.size > MAX_FILE_BYTES) throw new NubosPilotError('tool-file-too-large', 'file exceeds 1MB: ' + path.basename(abs), {});
81
+ const buf = fs.readFileSync(abs);
82
+ if (_looksBinary(buf)) return '[binary file omitted: ' + path.basename(abs) + ']';
83
+ const lines = buf.toString('utf-8').split('\n');
84
+ const offset = Math.max(0, (args && Number(args.offset)) || 0);
85
+ const limit = Math.min(MAX_READ_LINES, (args && Number(args.limit)) || MAX_READ_LINES);
86
+ const slice = lines.slice(offset, offset + limit);
87
+ return slice.map((l, i) => (offset + i + 1) + '\t' + l).join('\n');
88
+ }
89
+
90
+ function _glob(args, ctx) {
91
+ const pattern = (args && args.pattern) || '**/*';
92
+ const re = _globToRegex(pattern);
93
+ const matches = [];
94
+ _walk(ctx.cwd, (full) => {
95
+ if (matches.length >= MAX_GLOB_RESULTS) return;
96
+ const relPath = path.relative(ctx.cwd, full);
97
+ if (re.test(relPath)) matches.push(relPath);
98
+ });
99
+ matches.sort();
100
+ if (!matches.length) return 'no files match: ' + pattern;
101
+ return matches.join('\n');
102
+ }
103
+
104
+ function _grep(args, ctx) {
105
+ const pattern = args && args.pattern;
106
+ if (typeof pattern !== 'string' || !pattern) throw new NubosPilotError('tool-bad-args', 'Grep requires a "pattern"', {});
107
+ if (_looksCatastrophic(pattern)) {
108
+ throw new NubosPilotError('tool-bad-args', 'Grep pattern rejected as potentially catastrophic (ReDoS)', {});
109
+ }
110
+ let re;
111
+ try { re = new RegExp(pattern, (args && args.ignore_case) ? 'i' : ''); }
112
+ catch { throw new NubosPilotError('tool-bad-args', 'Grep pattern is not a valid regex', {}); }
113
+ const globRe = (args && args.glob) ? _globToRegex(args.glob) : null;
114
+ const out = [];
115
+ _walk(ctx.cwd, (full) => {
116
+ if (out.length >= MAX_GREP_MATCHES) return;
117
+ const relPath = path.relative(ctx.cwd, full);
118
+ if (globRe && !globRe.test(relPath)) return;
119
+ let buf;
120
+ try { buf = fs.readFileSync(full); } catch { return; }
121
+ if (buf.length > MAX_FILE_BYTES || _looksBinary(buf)) return;
122
+ const lines = buf.toString('utf-8').split('\n');
123
+ for (let i = 0; i < lines.length; i++) {
124
+ if (out.length >= MAX_GREP_MATCHES) break;
125
+ if (re.test(lines[i])) out.push(relPath + ':' + (i + 1) + ':' + lines[i].slice(0, 300));
126
+ }
127
+ });
128
+ if (!out.length) return 'no matches for /' + pattern + '/';
129
+ return out.join('\n');
130
+ }
131
+
132
+ function _scanNote(relPath, content, ctx) {
133
+ let res;
134
+ try { res = scanContent({ filePath: relPath, content, customRulesPath: ctx && ctx.customRulesPath }); }
135
+ catch { return '\n[security] scan unavailable for this write (scanner error) — content NOT verified'; }
136
+ const findings = (res && res.findings) || [];
137
+ if (!findings.length) return '';
138
+ const lines = findings.slice(0, 10).map((f) => ' - ' + f.severity + ' ' + f.category + ' @ line ' + f.line + ': ' + f.rule_name);
139
+ return '\n[security] ' + findings.length + ' finding(s) in the written content — review before relying on it:\n' + lines.join('\n');
140
+ }
141
+
142
+ function _assertNotSymlinkLeaf(abs, label) {
143
+ let lst;
144
+ try { lst = fs.lstatSync(abs); } catch { return; }
145
+ if (lst.isSymbolicLink()) {
146
+ throw new NubosPilotError('tool-path-symlink', label + ' resolves to a symlink — refused (workspace confinement)', { file: path.basename(abs) });
147
+ }
148
+ }
149
+
150
+ function _write(args, ctx) {
151
+ const rel = args && args.path;
152
+ if (typeof rel !== 'string' || !rel) throw new NubosPilotError('tool-bad-args', 'Write requires a "path"', {});
153
+ if (typeof (args && args.content) !== 'string') throw new NubosPilotError('tool-bad-args', 'Write requires string "content"', {});
154
+ const abs = assertInsideBase(ctx.cwd, rel, 'Write path');
155
+ _assertNotSymlinkLeaf(abs, 'Write path');
156
+ fs.mkdirSync(path.dirname(abs), { recursive: true });
157
+ fs.writeFileSync(abs, args.content, 'utf-8');
158
+ return 'wrote ' + rel + ' (' + Buffer.byteLength(args.content, 'utf-8') + ' bytes)' + _scanNote(rel, args.content, ctx);
159
+ }
160
+
161
+ function _edit(args, ctx) {
162
+ const rel = args && args.path;
163
+ const oldStr = args && args.old_string;
164
+ const newStr = args && args.new_string;
165
+ if (typeof rel !== 'string' || !rel) throw new NubosPilotError('tool-bad-args', 'Edit requires a "path"', {});
166
+ if (typeof oldStr !== 'string' || typeof newStr !== 'string') {
167
+ throw new NubosPilotError('tool-bad-args', 'Edit requires string "old_string" and "new_string"', {});
168
+ }
169
+ const abs = assertInsideBase(ctx.cwd, rel, 'Edit path');
170
+ _assertNotSymlinkLeaf(abs, 'Edit path');
171
+ const stat = fs.statSync(abs);
172
+ if (stat.size > MAX_FILE_BYTES) throw new NubosPilotError('tool-file-too-large', 'file exceeds 1MB: ' + path.basename(abs), {});
173
+ const before = fs.readFileSync(abs, 'utf-8');
174
+ const replaceAll = !!(args && args.replace_all);
175
+ const occurrences = before.split(oldStr).length - 1;
176
+ if (occurrences === 0) throw new NubosPilotError('tool-edit-no-match', 'old_string not found in ' + rel, {});
177
+ if (occurrences > 1 && !replaceAll) {
178
+ throw new NubosPilotError('tool-edit-ambiguous', 'old_string occurs ' + occurrences + 'x in ' + rel + ' — pass replace_all or make it unique', {});
179
+ }
180
+ const after = replaceAll
181
+ ? before.split(oldStr).join(newStr)
182
+ : (() => { const idx = before.indexOf(oldStr); return before.slice(0, idx) + newStr + before.slice(idx + oldStr.length); })();
183
+ fs.writeFileSync(abs, after, 'utf-8');
184
+ return 'edited ' + rel + ' (' + (replaceAll ? occurrences + ' replacements' : '1 replacement') + ')' + _scanNote(rel, after, ctx);
185
+ }
186
+
187
+ function _bash(args, ctx) {
188
+ const cmd = args && args.command;
189
+ if (typeof cmd !== 'string' || !cmd.trim()) throw new NubosPilotError('tool-bad-args', 'Bash requires a "command"', {});
190
+ for (const entry of BASH_DENYLIST) {
191
+ if (entry.re.test(cmd)) return 'Error: bash-command-blocked: ' + entry.why;
192
+ }
193
+ const timeout = Math.min(600000, Math.max(1000, (ctx && Number(ctx.bashTimeoutMs)) || (args && Number(args.timeout_ms)) || DEFAULT_BASH_TIMEOUT_MS));
194
+ const res = spawnSync('/bin/sh', ['-c', cmd], {
195
+ cwd: ctx.cwd,
196
+ timeout,
197
+ encoding: 'utf-8',
198
+ maxBuffer: MAX_BASH_OUTPUT * 4,
199
+ stdio: ['ignore', 'pipe', 'pipe'],
200
+ });
201
+ if (res.error) {
202
+ if (res.error.code === 'ETIMEDOUT') return 'Error: bash-timeout: command exceeded ' + timeout + 'ms';
203
+ return 'Error: bash-spawn-failed: ' + (res.error.code || res.error.message);
204
+ }
205
+ let out = (res.stdout || '') + (res.stderr || '');
206
+ let truncated = '';
207
+ if (out.length > MAX_BASH_OUTPUT) { out = out.slice(0, MAX_BASH_OUTPUT); truncated = '\n[output truncated at ' + MAX_BASH_OUTPUT + ' chars]'; }
208
+ const code = res.status == null ? '?' : res.status;
209
+ return '[exit ' + code + ']\n' + out + truncated;
210
+ }
211
+
212
+ function _knowledgeSearch(args, ctx) {
213
+ const query = args && (args.query || args.q);
214
+ if (typeof query !== 'string' || !query.trim()) {
215
+ throw new NubosPilotError('tool-bad-args', 'knowledge-search requires a "query"', {});
216
+ }
217
+ const limit = Math.min(50, Math.max(1, (args && Number(args.limit)) || 10));
218
+ if (ctx && ctx.taskId) {
219
+ try { recordSearchEvidence(ctx.taskId, query, ctx.cwd); } catch {}
220
+ }
221
+ let res;
222
+ try { res = knowledgeSearch(query, ctx.cwd, { limit }); }
223
+ catch (err) { return 'knowledge-search: index unavailable (' + ((err && err.code) || 'error') + ')'; }
224
+ const hits = (res && res.hits) || [];
225
+ if (!hits.length) return 'knowledge-search: no results for "' + query + '"';
226
+ return hits.map((h) => h.rel_path + ':' + h.line_start + ' (score ' + h.score + ')\n ' + String(h.preview || '').slice(0, 200)).join('\n');
227
+ }
228
+
229
+ const TOOLS = {
230
+ 'knowledge-search': {
231
+ run: _knowledgeSearch,
232
+ schema: {
233
+ type: 'function',
234
+ function: {
235
+ name: 'knowledge-search',
236
+ description: 'Search the project knowledge base (codebase docs, prior learnings) before writing code. Returns ranked "path:line (score)" hits. Call this first — it satisfies the Rule-9 search bar.',
237
+ parameters: {
238
+ type: 'object',
239
+ properties: {
240
+ query: { type: 'string', description: 'Free-text search query.' },
241
+ limit: { type: 'integer', description: 'Max hits (default 10).' },
242
+ },
243
+ required: ['query'],
244
+ },
245
+ },
246
+ },
247
+ },
248
+ Read: {
249
+ run: _read,
250
+ schema: {
251
+ type: 'function',
252
+ function: {
253
+ name: 'Read',
254
+ description: 'Read a file from the workspace. Returns up to 2000 lines, each prefixed with its 1-based line number.',
255
+ parameters: {
256
+ type: 'object',
257
+ properties: {
258
+ path: { type: 'string', description: 'Workspace-relative file path.' },
259
+ offset: { type: 'integer', description: 'Zero-based line to start from.' },
260
+ limit: { type: 'integer', description: 'Maximum number of lines to return.' },
261
+ },
262
+ required: ['path'],
263
+ },
264
+ },
265
+ },
266
+ },
267
+ Glob: {
268
+ run: _glob,
269
+ schema: {
270
+ type: 'function',
271
+ function: {
272
+ name: 'Glob',
273
+ description: 'List workspace files matching a glob (supports *, **, ?). Ignores node_modules/.git/vendor and dotdirs.',
274
+ parameters: {
275
+ type: 'object',
276
+ properties: { pattern: { type: 'string', description: 'Glob pattern, e.g. "src/**/*.ts".' } },
277
+ required: ['pattern'],
278
+ },
279
+ },
280
+ },
281
+ },
282
+ Grep: {
283
+ run: _grep,
284
+ schema: {
285
+ type: 'function',
286
+ function: {
287
+ name: 'Grep',
288
+ description: 'Search workspace file contents by regex. Returns up to 200 "relpath:line:text" matches.',
289
+ parameters: {
290
+ type: 'object',
291
+ properties: {
292
+ pattern: { type: 'string', description: 'JavaScript regular expression.' },
293
+ glob: { type: 'string', description: 'Optional glob to restrict which files are scanned.' },
294
+ ignore_case: { type: 'boolean', description: 'Case-insensitive match.' },
295
+ },
296
+ required: ['pattern'],
297
+ },
298
+ },
299
+ },
300
+ },
301
+ Write: {
302
+ run: _write,
303
+ schema: {
304
+ type: 'function',
305
+ function: {
306
+ name: 'Write',
307
+ description: 'Create or overwrite a workspace file with the given content. Confined to the workspace; content is security-scanned.',
308
+ parameters: {
309
+ type: 'object',
310
+ properties: {
311
+ path: { type: 'string', description: 'Workspace-relative file path.' },
312
+ content: { type: 'string', description: 'Full file content to write.' },
313
+ },
314
+ required: ['path', 'content'],
315
+ },
316
+ },
317
+ },
318
+ },
319
+ Edit: {
320
+ run: _edit,
321
+ schema: {
322
+ type: 'function',
323
+ function: {
324
+ name: 'Edit',
325
+ description: 'Replace an exact string in a workspace file. old_string must be unique unless replace_all is set.',
326
+ parameters: {
327
+ type: 'object',
328
+ properties: {
329
+ path: { type: 'string', description: 'Workspace-relative file path.' },
330
+ old_string: { type: 'string', description: 'Exact text to replace.' },
331
+ new_string: { type: 'string', description: 'Replacement text.' },
332
+ replace_all: { type: 'boolean', description: 'Replace every occurrence instead of requiring uniqueness.' },
333
+ },
334
+ required: ['path', 'old_string', 'new_string'],
335
+ },
336
+ },
337
+ },
338
+ },
339
+ Bash: {
340
+ run: _bash,
341
+ schema: {
342
+ type: 'function',
343
+ function: {
344
+ name: 'Bash',
345
+ description: 'Run a shell command in the workspace. Timed out and output-capped; catastrophic commands are blocked. Returns "[exit N]\\n<output>".',
346
+ parameters: {
347
+ type: 'object',
348
+ properties: {
349
+ command: { type: 'string', description: 'Shell command to execute.' },
350
+ timeout_ms: { type: 'integer', description: 'Optional timeout (ms), capped at 600000.' },
351
+ },
352
+ required: ['command'],
353
+ },
354
+ },
355
+ },
356
+ },
357
+ };
358
+
359
+ const READ_ONLY_TOOL_NAMES = Object.freeze(['Read', 'Glob', 'Grep']);
360
+ const MUTATING_TOOL_NAMES = Object.freeze(['Write', 'Edit']);
361
+ const SEARCH_TOOL_NAME = 'knowledge-search';
362
+ const IMPLEMENTED_TOOL_NAMES = Object.freeze([...READ_ONLY_TOOL_NAMES, ...MUTATING_TOOL_NAMES, 'Bash', SEARCH_TOOL_NAME]);
363
+
364
+ function toolsetFor(declaredNames, opts) {
365
+ const o = opts || {};
366
+ const declared = Array.isArray(declaredNames) ? declaredNames : [];
367
+ let allowed = READ_ONLY_TOOL_NAMES.slice();
368
+ if (!o.readOnly) {
369
+ allowed = allowed.concat(MUTATING_TOOL_NAMES);
370
+ if (o.allowBash === true) allowed.push('Bash');
371
+ }
372
+ const names = declared.filter((n) => allowed.includes(n));
373
+ if (o.withSearch && !names.includes(SEARCH_TOOL_NAME)) names.push(SEARCH_TOOL_NAME);
374
+ const extraCtx = o.ctx || {};
375
+ return {
376
+ names,
377
+ schemas: names.map((n) => TOOLS[n].schema),
378
+ execute(name, argsJson, ctx) {
379
+ return execute(name, argsJson, Object.assign({}, extraCtx, ctx), names);
380
+ },
381
+ };
382
+ }
383
+
384
+ function execute(name, argsJson, ctx, allowed) {
385
+ if (allowed && !allowed.includes(name)) {
386
+ return 'Error: tool "' + name + '" is not available to this agent';
387
+ }
388
+ const tool = TOOLS[name];
389
+ if (!tool) return 'Error: unknown tool "' + name + '"';
390
+ let args = {};
391
+ if (typeof argsJson === 'string' && argsJson.trim()) {
392
+ try { args = JSON.parse(argsJson); }
393
+ catch { return 'Error: tool "' + name + '" received arguments that are not valid JSON'; }
394
+ } else if (argsJson && typeof argsJson === 'object') {
395
+ args = argsJson;
396
+ }
397
+ try {
398
+ return String(tool.run(args, ctx || { cwd: process.cwd() }));
399
+ } catch (err) {
400
+ if (err && err.name === 'NubosPilotError') return 'Error: ' + err.code + ': ' + err.message;
401
+ return 'Error: ' + ((err && err.message) || 'tool execution failed');
402
+ }
403
+ }
404
+
405
+ module.exports = {
406
+ TOOLS,
407
+ READ_ONLY_TOOL_NAMES,
408
+ MUTATING_TOOL_NAMES,
409
+ SEARCH_TOOL_NAME,
410
+ IMPLEMENTED_TOOL_NAMES,
411
+ BASH_DENYLIST,
412
+ toolsetFor,
413
+ execute,
414
+ _globToRegex,
415
+ };
@@ -0,0 +1,230 @@
1
+ const fs = require('node:fs');
2
+ const os = require('node:os');
3
+ const path = require('node:path');
4
+ const { test, afterEach } = require('node:test');
5
+ const assert = require('node:assert/strict');
6
+
7
+ const { toolsetFor, execute, _globToRegex, READ_ONLY_TOOL_NAMES, MUTATING_TOOL_NAMES } = require('./index.cjs');
8
+
9
+ const _dirs = [];
10
+ function _ws(files) {
11
+ const root = fs.realpathSync(fs.mkdtempSync(path.join(os.tmpdir(), 'np-tools-')));
12
+ for (const [rel, content] of Object.entries(files)) {
13
+ const abs = path.join(root, rel);
14
+ fs.mkdirSync(path.dirname(abs), { recursive: true });
15
+ fs.writeFileSync(abs, content, 'utf-8');
16
+ }
17
+ _dirs.push(root);
18
+ return root;
19
+ }
20
+ afterEach(() => { while (_dirs.length) { try { fs.rmSync(_dirs.pop(), { recursive: true, force: true }); } catch {} } });
21
+
22
+ test('TOOL-1: Read returns line-numbered content', () => {
23
+ const cwd = _ws({ 'a.txt': 'one\ntwo\nthree' });
24
+ const out = execute('Read', { path: 'a.txt' }, { cwd });
25
+ assert.equal(out, '1\tone\n2\ttwo\n3\tthree');
26
+ });
27
+
28
+ test('TOOL-2: Read honours offset/limit', () => {
29
+ const cwd = _ws({ 'a.txt': 'l1\nl2\nl3\nl4' });
30
+ const out = execute('Read', { path: 'a.txt', offset: 1, limit: 2 }, { cwd });
31
+ assert.equal(out, '2\tl2\n3\tl3');
32
+ });
33
+
34
+ test('TOOL-3: Read outside cwd is refused (safe-path)', () => {
35
+ const cwd = _ws({ 'a.txt': 'x' });
36
+ const out = execute('Read', { path: '../../etc/passwd' }, { cwd });
37
+ assert.match(out, /^Error: safe-path-outside-base/);
38
+ });
39
+
40
+ test('TOOL-4: Glob matches by pattern, ignores node_modules', () => {
41
+ const cwd = _ws({ 'src/a.ts': '', 'src/b.ts': '', 'node_modules/x/c.ts': '', 'readme.md': '' });
42
+ const out = execute('Glob', { pattern: 'src/**/*.ts' }, { cwd });
43
+ assert.equal(out, 'src/a.ts\nsrc/b.ts');
44
+ });
45
+
46
+ test('TOOL-5: Grep returns relpath:line:text matches', () => {
47
+ const cwd = _ws({ 'a.js': 'const x = 1\nfunction foo() {}\n', 'b.js': 'function bar() {}\n' });
48
+ const out = execute('Grep', { pattern: 'function (\\w+)' }, { cwd });
49
+ const lines = out.split('\n').sort();
50
+ assert.deepEqual(lines, ['a.js:2:function foo() {}', 'b.js:1:function bar() {}']);
51
+ });
52
+
53
+ test('TOOL-6: Grep with glob restricts scanned files', () => {
54
+ const cwd = _ws({ 'a.js': 'needle\n', 'b.ts': 'needle\n' });
55
+ const out = execute('Grep', { pattern: 'needle', glob: '*.ts' }, { cwd });
56
+ assert.equal(out, 'b.ts:1:needle');
57
+ });
58
+
59
+ test('TOOL-7: execute returns error string (not throw) on bad JSON args', () => {
60
+ const cwd = _ws({});
61
+ const out = execute('Read', '{not json', { cwd });
62
+ assert.match(out, /not valid JSON/);
63
+ });
64
+
65
+ test('TOOL-8: execute rejects a tool not in the allowed set', () => {
66
+ const cwd = _ws({});
67
+ const out = execute('Bash', { cmd: 'ls' }, { cwd }, ['Read', 'Glob']);
68
+ assert.match(out, /not available/);
69
+ });
70
+
71
+ test('TOOL-9: toolsetFor intersects declared tools; Bash is OPT-IN (off by default)', () => {
72
+ const ts = toolsetFor(['Read', 'Write', 'Bash', 'Grep', 'WebFetch']);
73
+ assert.deepEqual(ts.names, ['Read', 'Write', 'Grep']);
74
+ assert.equal(ts.schemas[0].function.name, 'Read');
75
+ });
76
+
77
+ test('TOOL-9b: readOnly mode excludes mutating tools and Bash', () => {
78
+ const ts = toolsetFor(['Read', 'Write', 'Edit', 'Bash', 'Grep'], { readOnly: true });
79
+ assert.deepEqual(ts.names, ['Read', 'Grep']);
80
+ });
81
+
82
+ test('TOOL-9c: allowBash:true is required to include Bash', () => {
83
+ assert.deepEqual(toolsetFor(['Read', 'Write', 'Edit', 'Bash']).names, ['Read', 'Write', 'Edit']);
84
+ assert.deepEqual(toolsetFor(['Read', 'Write', 'Edit', 'Bash'], { allowBash: true }).names, ['Read', 'Write', 'Edit', 'Bash']);
85
+ });
86
+
87
+ test('TOOL-10: toolset.execute enforces the agent allow-list', () => {
88
+ const cwd = _ws({ 'a.txt': 'hi' });
89
+ const ts = toolsetFor(['Read']);
90
+ assert.equal(ts.execute('Read', { path: 'a.txt' }, { cwd }), '1\thi');
91
+ assert.match(ts.execute('Grep', { pattern: 'x' }, { cwd }), /not available/);
92
+ });
93
+
94
+ test('TOOL-11: _globToRegex handles *, **, ?', () => {
95
+ assert.ok(_globToRegex('*.ts').test('a.ts'));
96
+ assert.ok(!_globToRegex('*.ts').test('src/a.ts'));
97
+ assert.ok(_globToRegex('src/**/*.ts').test('src/x/y/a.ts'));
98
+ assert.ok(_globToRegex('a?.js').test('ab.js'));
99
+ });
100
+
101
+ test('TOOL-12: READ_ONLY / MUTATING tool-name sets are the expected closed sets', () => {
102
+ assert.deepEqual(READ_ONLY_TOOL_NAMES, ['Read', 'Glob', 'Grep']);
103
+ assert.deepEqual(MUTATING_TOOL_NAMES, ['Write', 'Edit']);
104
+ });
105
+
106
+ test('TOOL-13: Write creates a file (and reports byte count)', () => {
107
+ const cwd = _ws({});
108
+ const out = execute('Write', { path: 'sub/new.txt', content: 'hello world' }, { cwd });
109
+ assert.match(out, /^wrote sub\/new\.txt \(11 bytes\)/);
110
+ assert.equal(fs.readFileSync(path.join(cwd, 'sub/new.txt'), 'utf-8'), 'hello world');
111
+ });
112
+
113
+ test('TOOL-14: Write outside cwd is refused', () => {
114
+ const cwd = _ws({});
115
+ const out = execute('Write', { path: '../escape.txt', content: 'x' }, { cwd });
116
+ assert.match(out, /^Error: safe-path-outside-base/);
117
+ assert.ok(!fs.existsSync(path.join(cwd, '../escape.txt')));
118
+ });
119
+
120
+ test('TOOL-15: Write surfaces a security finding without blocking the write', () => {
121
+ const cwd = _ws({});
122
+ const out = execute('Write', { path: 'danger.js', content: 'const x = eval(userInput)\n' }, { cwd });
123
+ assert.match(out, /\[security\] \d+ finding/);
124
+ assert.ok(fs.existsSync(path.join(cwd, 'danger.js')));
125
+ });
126
+
127
+ test('TOOL-16: Edit replaces a unique string', () => {
128
+ const cwd = _ws({ 'a.txt': 'foo bar baz' });
129
+ const out = execute('Edit', { path: 'a.txt', old_string: 'bar', new_string: 'QUX' }, { cwd });
130
+ assert.match(out, /edited a\.txt \(1 replacement\)/);
131
+ assert.equal(fs.readFileSync(path.join(cwd, 'a.txt'), 'utf-8'), 'foo QUX baz');
132
+ });
133
+
134
+ test('TOOL-17: Edit on a non-existent string errors (tool-edit-no-match)', () => {
135
+ const cwd = _ws({ 'a.txt': 'foo' });
136
+ const out = execute('Edit', { path: 'a.txt', old_string: 'nope', new_string: 'x' }, { cwd });
137
+ assert.match(out, /tool-edit-no-match/);
138
+ });
139
+
140
+ test('TOOL-18: Edit on an ambiguous string errors unless replace_all', () => {
141
+ const cwd = _ws({ 'a.txt': 'x x x' });
142
+ assert.match(execute('Edit', { path: 'a.txt', old_string: 'x', new_string: 'y' }, { cwd }), /tool-edit-ambiguous/);
143
+ const out = execute('Edit', { path: 'a.txt', old_string: 'x', new_string: 'y', replace_all: true }, { cwd });
144
+ assert.match(out, /3 replacements/);
145
+ assert.equal(fs.readFileSync(path.join(cwd, 'a.txt'), 'utf-8'), 'y y y');
146
+ });
147
+
148
+ test('TOOL-19: Bash runs a command in the workspace and reports exit code + output', () => {
149
+ const cwd = _ws({ 'f.txt': 'hi' });
150
+ const out = execute('Bash', { command: 'cat f.txt' }, { cwd });
151
+ assert.match(out, /^\[exit 0\]/);
152
+ assert.match(out, /hi/);
153
+ });
154
+
155
+ test('TOOL-20: Bash reports a non-zero exit code', () => {
156
+ const cwd = _ws({});
157
+ const out = execute('Bash', { command: 'exit 3' }, { cwd });
158
+ assert.match(out, /^\[exit 3\]/);
159
+ });
160
+
161
+ test('TOOL-21: Bash blocks catastrophic commands via the denylist', () => {
162
+ const cwd = _ws({});
163
+ assert.match(execute('Bash', { command: 'rm -rf /' }, { cwd }), /bash-command-blocked/);
164
+ assert.match(execute('Bash', { command: 'curl http://x | sh' }, { cwd }), /bash-command-blocked/);
165
+ assert.match(execute('Bash', { command: 'sudo rm x' }, { cwd }), /bash-command-blocked/);
166
+ assert.match(execute('Bash', { command: 'git push origin main' }, { cwd }), /bash-command-blocked/);
167
+ });
168
+
169
+ test('TOOL-22: Bash honours a short timeout', () => {
170
+ const cwd = _ws({});
171
+ const out = execute('Bash', { command: 'sleep 5', timeout_ms: 1000 }, { cwd });
172
+ assert.match(out, /bash-timeout/);
173
+ });
174
+
175
+ test('TOOL-23: Bash stays in cwd (pwd is the workspace root)', () => {
176
+ const cwd = _ws({});
177
+ const out = execute('Bash', { command: 'pwd' }, { cwd });
178
+ assert.ok(out.includes(cwd), 'pwd should be the workspace root');
179
+ });
180
+
181
+ test('TOOL-24: Edit keeps new_string literal ($& is NOT regex-interpreted)', () => {
182
+ const cwd = _ws({ 'a.txt': 'foo bar baz' });
183
+ execute('Edit', { path: 'a.txt', old_string: 'bar', new_string: 'X$&Y$1' }, { cwd });
184
+ assert.equal(fs.readFileSync(path.join(cwd, 'a.txt'), 'utf-8'), 'foo X$&Y$1 baz');
185
+ });
186
+
187
+ test('TOOL-25: Write refuses a symlinked leaf (dangling-symlink escape)', () => {
188
+ const cwd = _ws({});
189
+ const outside = path.join(cwd, '..', 'np-escape-target.txt');
190
+ try { fs.unlinkSync(outside); } catch {}
191
+ fs.symlinkSync(outside, path.join(cwd, 'evil'));
192
+ const out = execute('Write', { path: 'evil', content: 'pwned' }, { cwd });
193
+ assert.match(out, /tool-path-symlink/);
194
+ assert.ok(!fs.existsSync(outside), 'must not have written through the symlink');
195
+ });
196
+
197
+ test('TOOL-26: Edit refuses a symlinked leaf', () => {
198
+ const cwd = _ws({});
199
+ const outside = path.join(cwd, '..', 'np-escape-edit.txt');
200
+ fs.writeFileSync(outside, 'original', 'utf-8');
201
+ fs.symlinkSync(outside, path.join(cwd, 'evil'));
202
+ const out = execute('Edit', { path: 'evil', old_string: 'original', new_string: 'pwned' }, { cwd });
203
+ assert.match(out, /tool-path-symlink|safe-path-outside-base/);
204
+ assert.equal(fs.readFileSync(outside, 'utf-8'), 'original');
205
+ fs.unlinkSync(outside);
206
+ });
207
+
208
+ test('TOOL-27: Grep rejects a catastrophic (ReDoS) pattern', () => {
209
+ const cwd = _ws({ 'a.txt': 'x' });
210
+ const out = execute('Grep', { pattern: '(a+)+$' }, { cwd });
211
+ assert.match(out, /catastrophic|ReDoS|tool-bad-args/);
212
+ });
213
+
214
+ test('TOOL-28: withSearch injects knowledge-search even when undeclared', () => {
215
+ assert.ok(!toolsetFor(['Read', 'Write']).names.includes('knowledge-search'));
216
+ const ts = toolsetFor(['Read', 'Write'], { withSearch: true });
217
+ assert.ok(ts.names.includes('knowledge-search'));
218
+ assert.ok(ts.schemas.some((s) => s.function.name === 'knowledge-search'));
219
+ });
220
+
221
+ test('TOOL-29: knowledge-search records search evidence when a taskId is in ctx', () => {
222
+ const cwd = _ws({});
223
+ fs.mkdirSync(path.join(cwd, '.nubos-pilot'), { recursive: true });
224
+ const taskId = 'M001-S001-T0001';
225
+ const ts = toolsetFor(['Read'], { withSearch: true, ctx: { taskId } });
226
+ const out = ts.execute('knowledge-search', { query: 'authentication' }, { cwd });
227
+ assert.match(out, /knowledge-search:/);
228
+ const { searchEvidenceForRound } = require('../../nubosloop-audit.cjs');
229
+ assert.ok(searchEvidenceForRound(taskId, 1, cwd).length > 0, 'evidence must be recorded for the round');
230
+ });
@@ -149,7 +149,7 @@ function _stripFence(s) {
149
149
  return m ? m[1] : s;
150
150
  }
151
151
 
152
- function _defaultSpawn(promptText, opts) {
152
+ async function _defaultSpawn(promptText, opts) {
153
153
  const spawnHeadless = require('../../bin/np-tools/spawn-headless.cjs');
154
154
  const tmp = os.tmpdir();
155
155
  const tag = process.pid + '-' + crypto.randomBytes(4).toString('hex');
@@ -158,7 +158,7 @@ function _defaultSpawn(promptText, opts) {
158
158
  fs.writeFileSync(promptPath, promptText, 'utf-8');
159
159
  const captured = [];
160
160
  try {
161
- spawnHeadless.run(
161
+ await spawnHeadless.run(
162
162
  ['--agent', REVIEWER_AGENT, '--prompt-path', promptPath, '--output-path', outputPath,
163
163
  '--timeout-ms', String(opts.timeoutMs)],
164
164
  { cwd: opts.cwd, stdout: { write: (s) => captured.push(s) } },
@@ -170,7 +170,7 @@ function _defaultSpawn(promptText, opts) {
170
170
  }
171
171
  }
172
172
 
173
- function runReview(opts) {
173
+ async function runReview(opts) {
174
174
  const o = opts || {};
175
175
  const cwd = o.cwd || process.cwd();
176
176
  const sid = o.sid;
@@ -198,7 +198,7 @@ function runReview(opts) {
198
198
  diffText: diff.diffText, guidancePath: config.guidance_path,
199
199
  });
200
200
 
201
- const raw = spawn(promptText, { cwd, timeoutMs: config.review_timeout_ms || 180000 });
201
+ const raw = await spawn(promptText, { cwd, timeoutMs: config.review_timeout_ms || 180000 });
202
202
  const parsed = parseReviewerOutput(raw);
203
203
  const risks = parsed.findings.filter((f) => f.severity === 'risk');
204
204
  const merged = ledger.addReviewFindings(sid, risks, mode);