nubos-pilot 1.2.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/CHANGELOG.md +24 -0
  2. package/README.md +18 -1
  3. package/SECURITY.md +3 -4
  4. package/bin/np-tools/_commands.cjs +1 -0
  5. package/bin/np-tools/learnings.cjs +5 -1
  6. package/bin/np-tools/resolve-model.cjs +55 -1
  7. package/bin/np-tools/resolve-model.test.cjs +139 -0
  8. package/bin/np-tools/security.cjs +4 -1
  9. package/bin/np-tools/spawn-headless.cjs +135 -2
  10. package/bin/np-tools/spawn-headless.test.cjs +225 -40
  11. package/bin/np-tools/spawn-offhost.cjs +93 -0
  12. package/bin/np-tools/spawn-offhost.test.cjs +38 -0
  13. package/lib/agents.cjs +16 -2
  14. package/lib/config-schema.cjs +5 -1
  15. package/lib/headless-guard.cjs +127 -0
  16. package/lib/headless-guard.test.cjs +119 -0
  17. package/lib/learnings/extract.cjs +4 -4
  18. package/lib/learnings/extract.test.cjs +8 -8
  19. package/lib/model-providers.cjs +118 -0
  20. package/lib/model-providers.test.cjs +85 -0
  21. package/lib/runtime/agent-loop.cjs +64 -0
  22. package/lib/runtime/agent-loop.test.cjs +135 -0
  23. package/lib/runtime/dispatch.cjs +174 -0
  24. package/lib/runtime/dispatch.test.cjs +193 -0
  25. package/lib/runtime/preflight.cjs +68 -0
  26. package/lib/runtime/preflight.test.cjs +62 -0
  27. package/lib/runtime/providers/openai-compat.cjs +102 -0
  28. package/lib/runtime/providers/openai-compat.test.cjs +103 -0
  29. package/lib/runtime/tools/index.cjs +415 -0
  30. package/lib/runtime/tools/index.test.cjs +230 -0
  31. package/lib/security/review.cjs +4 -4
  32. package/lib/security/review.test.cjs +6 -6
  33. package/np-tools.cjs +1 -0
  34. package/package.json +1 -1
  35. package/templates/claude/payload/hooks/np-learnings-hook.cjs +1 -0
  36. package/templates/claude/payload/hooks/np-security-hook.cjs +1 -0
  37. package/workflows/add-tests.md +41 -0
  38. package/workflows/architect-phase.md +19 -0
  39. package/workflows/discuss-phase.md +29 -10
  40. package/workflows/execute-phase.md +93 -4
  41. package/workflows/plan-phase.md +57 -16
  42. package/workflows/research-phase.md +45 -0
  43. package/workflows/scan-codebase.md +21 -3
  44. package/workflows/validate-phase.md +30 -13
  45. package/workflows/verify-work.md +17 -0
@@ -8,6 +8,14 @@ const assert = require('node:assert/strict');
8
8
 
9
9
  const spawnHeadless = require('./spawn-headless.cjs');
10
10
  const runContext = require('../../lib/run-context.cjs');
11
+ const headlessGuard = require('../../lib/headless-guard.cjs');
12
+
13
+ function _mockClaude(r, name, body) {
14
+ const p = path.join(r, name);
15
+ fs.writeFileSync(p, body, 'utf-8');
16
+ fs.chmodSync(p, 0o755);
17
+ return p;
18
+ }
11
19
 
12
20
  const _sandboxes = [];
13
21
  const _envBackup = {};
@@ -48,30 +56,30 @@ function _setEnv(k, v) {
48
56
  else process.env[k] = v;
49
57
  }
50
58
 
51
- test('SH-1: spawn-headless requires --agent', () => {
59
+ test('SH-1: spawn-headless requires --agent', async () => {
52
60
  const r = _mkRoot();
53
61
  const cap = _cap();
54
- assert.throws(
55
- () => spawnHeadless.run([], { cwd: r, stdout: cap.stub }),
62
+ await assert.rejects(
63
+ async () => spawnHeadless.run([], { cwd: r, stdout: cap.stub }),
56
64
  (err) => err && err.code === 'spawn-headless-missing-agent',
57
65
  );
58
66
  });
59
67
 
60
- test('SH-2: spawn-headless requires --prompt-path', () => {
68
+ test('SH-2: spawn-headless requires --prompt-path', async () => {
61
69
  const r = _mkRoot();
62
70
  const cap = _cap();
63
- assert.throws(
64
- () => spawnHeadless.run(['--agent', 'np-test-critic'], { cwd: r, stdout: cap.stub }),
71
+ await assert.rejects(
72
+ async () => spawnHeadless.run(['--agent', 'np-test-critic'], { cwd: r, stdout: cap.stub }),
65
73
  (err) => err && err.code === 'spawn-headless-missing-prompt-path',
66
74
  );
67
75
  });
68
76
 
69
- test('SH-3: spawn-headless requires --output-path', () => {
77
+ test('SH-3: spawn-headless requires --output-path', async () => {
70
78
  const r = _mkRoot();
71
79
  fs.writeFileSync(path.join(r, 'p.md'), 'do the audit', 'utf-8');
72
80
  const cap = _cap();
73
- assert.throws(
74
- () => spawnHeadless.run(
81
+ await assert.rejects(
82
+ async () => spawnHeadless.run(
75
83
  ['--agent', 'np-test-critic', '--prompt-path', 'p.md'],
76
84
  { cwd: r, stdout: cap.stub },
77
85
  ),
@@ -79,11 +87,11 @@ test('SH-3: spawn-headless requires --output-path', () => {
79
87
  );
80
88
  });
81
89
 
82
- test('SH-4: spawn-headless rejects path traversal on prompt-path', () => {
90
+ test('SH-4: spawn-headless rejects path traversal on prompt-path', async () => {
83
91
  const r = _mkRoot();
84
92
  const cap = _cap();
85
- assert.throws(
86
- () => spawnHeadless.run(
93
+ await assert.rejects(
94
+ async () => spawnHeadless.run(
87
95
  ['--agent', 'np-test-critic',
88
96
  '--prompt-path', '/etc/passwd',
89
97
  '--output-path', 'out.json'],
@@ -93,12 +101,12 @@ test('SH-4: spawn-headless rejects path traversal on prompt-path', () => {
93
101
  );
94
102
  });
95
103
 
96
- test('SH-5: spawn-headless rejects unknown agent', () => {
104
+ test('SH-5: spawn-headless rejects unknown agent', async () => {
97
105
  const r = _mkRoot();
98
106
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
99
107
  const cap = _cap();
100
- assert.throws(
101
- () => spawnHeadless.run(
108
+ await assert.rejects(
109
+ async () => spawnHeadless.run(
102
110
  ['--agent', 'np-does-not-exist',
103
111
  '--prompt-path', 'p.md',
104
112
  '--output-path', 'out.json'],
@@ -108,12 +116,12 @@ test('SH-5: spawn-headless rejects unknown agent', () => {
108
116
  );
109
117
  });
110
118
 
111
- test('SH-6: spawn-headless rejects invalid agent name (path-injection guard)', () => {
119
+ test('SH-6: spawn-headless rejects invalid agent name (path-injection guard)', async () => {
112
120
  const r = _mkRoot();
113
121
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
114
122
  const cap = _cap();
115
- assert.throws(
116
- () => spawnHeadless.run(
123
+ await assert.rejects(
124
+ async () => spawnHeadless.run(
117
125
  ['--agent', '../../etc/passwd',
118
126
  '--prompt-path', 'p.md',
119
127
  '--output-path', 'out.json'],
@@ -123,13 +131,13 @@ test('SH-6: spawn-headless rejects invalid agent name (path-injection guard)', (
123
131
  );
124
132
  });
125
133
 
126
- test('SH-7: spawn-headless reports claude-not-found when binary missing', () => {
134
+ test('SH-7: spawn-headless reports claude-not-found when binary missing', async () => {
127
135
  const r = _mkRoot();
128
136
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
129
137
  _setEnv('NUBOS_PILOT_CLAUDE_BIN', path.join(r, 'no-such-binary'));
130
138
  const cap = _cap();
131
- assert.throws(
132
- () => spawnHeadless.run(
139
+ await assert.rejects(
140
+ async () => spawnHeadless.run(
133
141
  ['--agent', 'np-test-critic',
134
142
  '--prompt-path', 'p.md',
135
143
  '--output-path', 'out.json'],
@@ -139,7 +147,7 @@ test('SH-7: spawn-headless reports claude-not-found when binary missing', () =>
139
147
  );
140
148
  });
141
149
 
142
- test('SH-8: spawn-headless captures stdout to output-path on success (mock binary)', () => {
150
+ test('SH-8: spawn-headless captures stdout to output-path on success (mock binary)', async () => {
143
151
  const r = _mkRoot();
144
152
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
145
153
  const mockBin = path.join(r, 'mock-claude.sh');
@@ -147,7 +155,7 @@ test('SH-8: spawn-headless captures stdout to output-path on success (mock binar
147
155
  fs.chmodSync(mockBin, 0o755);
148
156
  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
149
157
  const cap = _cap();
150
- const rc = spawnHeadless.run(
158
+ const rc = await spawnHeadless.run(
151
159
  ['--agent', 'np-test-critic',
152
160
  '--prompt-path', 'p.md',
153
161
  '--output-path', 'out.json'],
@@ -161,7 +169,7 @@ test('SH-8: spawn-headless captures stdout to output-path on success (mock binar
161
169
  assert.match(written, /"verdict":"passed"/);
162
170
  });
163
171
 
164
- test('SH-9: spawn-headless surfaces non-zero subprocess exit (mock failure)', () => {
172
+ test('SH-9: spawn-headless surfaces non-zero subprocess exit (mock failure)', async () => {
165
173
  const r = _mkRoot();
166
174
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
167
175
  const mockBin = path.join(r, 'mock-fail.sh');
@@ -169,7 +177,7 @@ test('SH-9: spawn-headless surfaces non-zero subprocess exit (mock failure)', ()
169
177
  fs.chmodSync(mockBin, 0o755);
170
178
  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
171
179
  const cap = _cap();
172
- const rc = spawnHeadless.run(
180
+ const rc = await spawnHeadless.run(
173
181
  ['--agent', 'np-test-critic',
174
182
  '--prompt-path', 'p.md',
175
183
  '--output-path', 'out.json'],
@@ -181,12 +189,12 @@ test('SH-9: spawn-headless surfaces non-zero subprocess exit (mock failure)', ()
181
189
  assert.match(payload.stderr_excerpt, /boom/);
182
190
  });
183
191
 
184
- test('SH-10: spawn-headless rejects --timeout-ms below 1000', () => {
192
+ test('SH-10: spawn-headless rejects --timeout-ms below 1000', async () => {
185
193
  const r = _mkRoot();
186
194
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
187
195
  const cap = _cap();
188
- assert.throws(
189
- () => spawnHeadless.run(
196
+ await assert.rejects(
197
+ async () => spawnHeadless.run(
190
198
  ['--agent', 'np-test-critic',
191
199
  '--prompt-path', 'p.md',
192
200
  '--output-path', 'out.json',
@@ -197,7 +205,7 @@ test('SH-10: spawn-headless rejects --timeout-ms below 1000', () => {
197
205
  );
198
206
  });
199
207
 
200
- test('SH-11: spawn-headless writes output atomically (no .tmp residue)', () => {
208
+ test('SH-11: spawn-headless writes output atomically (no .tmp residue)', async () => {
201
209
  const r = _mkRoot();
202
210
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
203
211
  const mockBin = path.join(r, 'mock-claude.sh');
@@ -205,7 +213,7 @@ test('SH-11: spawn-headless writes output atomically (no .tmp residue)', () => {
205
213
  fs.chmodSync(mockBin, 0o755);
206
214
  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
207
215
  const cap = _cap();
208
- const rc = spawnHeadless.run(
216
+ const rc = await spawnHeadless.run(
209
217
  ['--agent', 'np-test-critic',
210
218
  '--prompt-path', 'p.md',
211
219
  '--output-path', 'out.json'],
@@ -313,7 +321,7 @@ test('SH-REDACT-2 _redactSecrets is a no-op on safe text', () => {
313
321
  assert.equal(spawnHeadless._redactSecrets(safe), safe);
314
322
  });
315
323
 
316
- test('SH-AUDIT-FIRST spawn-trail is written BEFORE caller-visible output (audit-first)', () => {
324
+ test('SH-AUDIT-FIRST spawn-trail is written BEFORE caller-visible output (audit-first)', async () => {
317
325
  const r = _mkRoot();
318
326
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
319
327
  const mockBin = path.join(r, 'mock.sh');
@@ -327,7 +335,7 @@ test('SH-AUDIT-FIRST spawn-trail is written BEFORE caller-visible output (audit-
327
335
  const cap = _cap();
328
336
  let thrown = null;
329
337
  try {
330
- spawnHeadless.run(
338
+ await spawnHeadless.run(
331
339
  ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
332
340
  { cwd: r, stdout: cap.stub },
333
341
  );
@@ -340,7 +348,7 @@ test('SH-AUDIT-FIRST spawn-trail is written BEFORE caller-visible output (audit-
340
348
  'output must NOT exist if audit append failed (audit-first invariant)');
341
349
  });
342
350
 
343
- test('SH-PARSE-OK payload_parse_ok=false when claude returns non-JSON output', () => {
351
+ test('SH-PARSE-OK payload_parse_ok=false when claude returns non-JSON output', async () => {
344
352
  const r = _mkRoot();
345
353
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
346
354
  const mockBin = path.join(r, 'mock-plain.sh');
@@ -349,7 +357,7 @@ test('SH-PARSE-OK payload_parse_ok=false when claude returns non-JSON output', (
349
357
  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
350
358
  _setEnv('NUBOS_PILOT_RUN_ID', 'r-parse-test');
351
359
  const cap = _cap();
352
- spawnHeadless.run(
360
+ await spawnHeadless.run(
353
361
  ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
354
362
  { cwd: r, stdout: cap.stub },
355
363
  );
@@ -407,7 +415,7 @@ test('SH-ENV-3 NUBOS_PILOT_SPAWN_ENV_PASSTHROUGH allow-lists by exact key name',
407
415
  assert.equal(filtered.NOT_LISTED, undefined);
408
416
  });
409
417
 
410
- test('SH-TRAIL-1 spawn writes append-only spawn-trail record with run_id + prompt/response sha256 + timing', () => {
418
+ test('SH-TRAIL-1 spawn writes append-only spawn-trail record with run_id + prompt/response sha256 + timing', async () => {
411
419
  const r = _mkRoot();
412
420
  fs.writeFileSync(path.join(r, 'p.md'), 'do the audit', 'utf-8');
413
421
  const mockBin = path.join(r, 'mock-claude.sh');
@@ -416,7 +424,7 @@ test('SH-TRAIL-1 spawn writes append-only spawn-trail record with run_id + promp
416
424
  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
417
425
  _setEnv('NUBOS_PILOT_RUN_ID', 'r-traceme-deadbeef');
418
426
  const cap = _cap();
419
- const rc = spawnHeadless.run(
427
+ const rc = await spawnHeadless.run(
420
428
  ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
421
429
  { cwd: r, stdout: cap.stub },
422
430
  );
@@ -444,7 +452,7 @@ test('SH-TRAIL-1 spawn writes append-only spawn-trail record with run_id + promp
444
452
  assert.ok(Number.isFinite(rec.duration_ms) && rec.duration_ms >= 0);
445
453
  });
446
454
 
447
- test('SH-TRAIL-1b run_id is seeded BEFORE spawn so the child env inherits NUBOS_PILOT_RUN_ID', () => {
455
+ test('SH-TRAIL-1b run_id is seeded BEFORE spawn so the child env inherits NUBOS_PILOT_RUN_ID', async () => {
448
456
  const r = _mkRoot();
449
457
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
450
458
  // Mock-claude echoes its own ENV var so we can prove the child saw it.
@@ -457,7 +465,7 @@ test('SH-TRAIL-1b run_id is seeded BEFORE spawn so the child env inherits NUBOS_
457
465
  // Crucially: do NOT set NUBOS_PILOT_RUN_ID; the lazy-seed must happen.
458
466
  runContext._resetForTests();
459
467
  const cap = _cap();
460
- spawnHeadless.run(
468
+ await spawnHeadless.run(
461
469
  ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
462
470
  { cwd: r, stdout: cap.stub },
463
471
  );
@@ -467,7 +475,7 @@ test('SH-TRAIL-1b run_id is seeded BEFORE spawn so the child env inherits NUBOS_
467
475
  assert.equal(childRunId, payload.run_id, 'child must inherit parent NUBOS_PILOT_RUN_ID via filtered env');
468
476
  });
469
477
 
470
- test('SH-TRAIL-2 two sequential spawns append two parseable trail lines (jsonl integrity)', () => {
478
+ test('SH-TRAIL-2 two sequential spawns append two parseable trail lines (jsonl integrity)', async () => {
471
479
  const r = _mkRoot();
472
480
  fs.writeFileSync(path.join(r, 'p.md'), 'audit X', 'utf-8');
473
481
  const mockBin = path.join(r, 'mock.sh');
@@ -477,7 +485,7 @@ test('SH-TRAIL-2 two sequential spawns append two parseable trail lines (jsonl i
477
485
  _setEnv('NUBOS_PILOT_RUN_ID', 'r-test-multi-aaa1');
478
486
  const cap = _cap();
479
487
  for (let i = 0; i < 2; i++) {
480
- spawnHeadless.run(
488
+ await spawnHeadless.run(
481
489
  ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out' + i + '.json'],
482
490
  { cwd: r, stdout: cap.stub },
483
491
  );
@@ -488,6 +496,133 @@ test('SH-TRAIL-2 two sequential spawns append two parseable trail lines (jsonl i
488
496
  for (const l of lines) JSON.parse(l);
489
497
  });
490
498
 
499
+ test('SH-GUARD-1 refuses to spawn when NUBOS_PILOT_HEADLESS=1 (reentrancy guard)', async () => {
500
+ const r = _mkRoot();
501
+ fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
502
+ const mockBin = _mockClaude(r, 'mock.sh', '#!/bin/sh\ncat > /dev/null\necho "{}"\n');
503
+ _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
504
+ _setEnv('NUBOS_PILOT_HEADLESS', '1');
505
+ const cap = _cap();
506
+ await assert.rejects(
507
+ spawnHeadless.run(
508
+ ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
509
+ { cwd: r, stdout: cap.stub },
510
+ ),
511
+ (err) => err && err.code === 'spawn-headless-reentrant',
512
+ );
513
+ assert.equal(fs.existsSync(path.join(r, 'out.json')), false, 'no claude must be spawned inside a headless run');
514
+ });
515
+
516
+ test('SH-GUARD-2 refuses to spawn when hook depth has reached the cap (depth guard)', async () => {
517
+ const r = _mkRoot();
518
+ fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
519
+ const mockBin = _mockClaude(r, 'mock.sh', '#!/bin/sh\ncat > /dev/null\necho "{}"\n');
520
+ _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
521
+ _setEnv('NUBOS_PILOT_HOOK_DEPTH', '1');
522
+ const cap = _cap();
523
+ await assert.rejects(
524
+ spawnHeadless.run(
525
+ ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
526
+ { cwd: r, stdout: cap.stub },
527
+ ),
528
+ (err) => err && err.code === 'spawn-headless-depth-exceeded',
529
+ );
530
+ });
531
+
532
+ test('SH-GUARD-3 child env carries NUBOS_PILOT_HEADLESS=1 and depth=1 (one level deep only)', async () => {
533
+ const r = _mkRoot();
534
+ fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
535
+ const mockBin = _mockClaude(r, 'mock.sh',
536
+ '#!/bin/sh\ncat > /dev/null\nprintf \'{"hl":"\'$NUBOS_PILOT_HEADLESS\'","depth":"\'$NUBOS_PILOT_HOOK_DEPTH\'"}\\n\'\n');
537
+ _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
538
+ const cap = _cap();
539
+ const rc = await spawnHeadless.run(
540
+ ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
541
+ { cwd: r, stdout: cap.stub },
542
+ );
543
+ assert.equal(rc, 0);
544
+ const child = JSON.parse(fs.readFileSync(path.join(r, 'out.json'), 'utf-8'));
545
+ assert.equal(child.hl, '1', 'child claude must run with NUBOS_PILOT_HEADLESS=1');
546
+ assert.equal(child.depth, '1', 'child claude must run at hook depth 1');
547
+ });
548
+
549
+ test('SH-GUARD-4 refuses to spawn while a live lock for the same agent is held (concurrency guard)', async () => {
550
+ const r = _mkRoot();
551
+ fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
552
+ const mockBin = _mockClaude(r, 'mock.sh', '#!/bin/sh\ncat > /dev/null\necho "{}"\n');
553
+ _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
554
+ const held = headlessGuard.tryAcquireSpawnLock(r, 'np-test-critic');
555
+ assert.equal(held.acquired, true);
556
+ const cap = _cap();
557
+ try {
558
+ await assert.rejects(
559
+ spawnHeadless.run(
560
+ ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
561
+ { cwd: r, stdout: cap.stub },
562
+ ),
563
+ (err) => err && err.code === 'spawn-headless-locked',
564
+ );
565
+ } finally {
566
+ held.release();
567
+ }
568
+ });
569
+
570
+ test('SH-GUARD-5 lock is released after a successful spawn (re-spawnable)', async () => {
571
+ const r = _mkRoot();
572
+ fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
573
+ const mockBin = _mockClaude(r, 'mock.sh', '#!/bin/sh\ncat > /dev/null\necho "{}"\n');
574
+ _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
575
+ const cap = _cap();
576
+ for (let i = 0; i < 2; i++) {
577
+ const rc = await spawnHeadless.run(
578
+ ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out' + i + '.json'],
579
+ { cwd: r, stdout: cap.stub },
580
+ );
581
+ assert.equal(rc, 0, 'sequential spawns must each acquire and release the lock');
582
+ }
583
+ assert.equal(fs.existsSync(headlessGuard._lockPath(r, 'np-test-critic')), false, 'no lock residue after spawns');
584
+ });
585
+
586
+ test('SH-GUARD-6 a held lock for one agent does NOT block a different agent (per-agent scope)', async () => {
587
+ const r = _mkRoot();
588
+ fs.writeFileSync(
589
+ path.join(r, '.nubos-pilot', 'agents', 'np-other-critic.md'),
590
+ '---\nname: np-other-critic\n---\n\n# Role\n',
591
+ 'utf-8',
592
+ );
593
+ fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
594
+ const mockBin = _mockClaude(r, 'mock.sh', '#!/bin/sh\ncat > /dev/null\necho "{}"\n');
595
+ _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
596
+ const held = headlessGuard.tryAcquireSpawnLock(r, 'np-test-critic');
597
+ assert.equal(held.acquired, true);
598
+ const cap = _cap();
599
+ try {
600
+ const rc = await spawnHeadless.run(
601
+ ['--agent', 'np-other-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
602
+ { cwd: r, stdout: cap.stub },
603
+ );
604
+ assert.equal(rc, 0, 'a different agent must spawn while np-test-critic is locked');
605
+ } finally {
606
+ held.release();
607
+ }
608
+ });
609
+
610
+ test('SH-GUARD-7 lock is released even when the spawn errors (claude-not-found)', async () => {
611
+ const r = _mkRoot();
612
+ fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
613
+ _setEnv('NUBOS_PILOT_CLAUDE_BIN', path.join(r, 'no-such-binary'));
614
+ const cap = _cap();
615
+ await assert.rejects(
616
+ spawnHeadless.run(
617
+ ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
618
+ { cwd: r, stdout: cap.stub },
619
+ ),
620
+ (err) => err && err.code === 'spawn-headless-claude-not-found',
621
+ );
622
+ assert.equal(fs.existsSync(headlessGuard._lockPath(r, 'np-test-critic')), false,
623
+ 'the per-agent lock must not leak when the spawn fails');
624
+ });
625
+
491
626
  test('SH-ENV-4 NUBOS_PILOT_/CLAUDE_/ANTHROPIC_ prefixed vars pass through (whitelisted prefix)', () => {
492
627
  const parent = {
493
628
  PATH: '/usr/bin',
@@ -504,3 +639,53 @@ test('SH-ENV-4 NUBOS_PILOT_/CLAUDE_/ANTHROPIC_ prefixed vars pass through (white
504
639
  assert.equal(filtered.ANTHROPIC_BASE_URL, 'https://api.anthropic.com');
505
640
  assert.equal(filtered.UNRELATED_FOO, undefined);
506
641
  });
642
+
643
+ test('SH-OFFHOST-1: openai-compat routing runs dispatchOffHost and writes a {result} envelope (no claude -p)', async () => {
644
+ const r = _mkRoot();
645
+ fs.writeFileSync(path.join(r, 'p.md'), 'review this diff', 'utf-8');
646
+ const cap = _cap();
647
+ let claudeWasCalled = false;
648
+ _setEnv('NUBOS_PILOT_CLAUDE_BIN', path.join(r, 'nonexistent-claude-should-not-run'));
649
+ let dispatchArgs = null;
650
+ const code = await spawnHeadless.run(
651
+ ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
652
+ {
653
+ cwd: r,
654
+ stdout: cap.stub,
655
+ resolveImpl: () => ({ kind: 'openai-compat', provider: 'ollama', model: 'qwen2.5-coder:32b' }),
656
+ dispatchImpl: async (o) => { dispatchArgs = o; return { content: 'REVIEW: 0 risks', model: 'qwen2.5-coder:32b', provider: 'ollama' }; },
657
+ },
658
+ );
659
+ assert.equal(code, 0);
660
+ // dispatch received the agent + the prompt body as the task
661
+ assert.equal(dispatchArgs.agent, 'np-test-critic');
662
+ assert.match(dispatchArgs.task, /review this diff/);
663
+ // output is the claude-compatible {result} envelope so review/extract parse it unchanged
664
+ const out = JSON.parse(fs.readFileSync(path.join(r, 'out.json'), 'utf-8'));
665
+ assert.equal(out.result, 'REVIEW: 0 risks');
666
+ assert.equal(out.provider, 'ollama');
667
+ // caller-visible payload marks it off-host and the native claude bin was never invoked
668
+ const payload = JSON.parse(cap.get().trim());
669
+ assert.equal(payload.off_host, true);
670
+ assert.equal(payload.exit_code, 0);
671
+ assert.equal(claudeWasCalled, false);
672
+ });
673
+
674
+ test('SH-OFFHOST-2: a failing off-host dispatch returns exit 2 with an empty result (soft-fail parity)', async () => {
675
+ const r = _mkRoot();
676
+ fs.writeFileSync(path.join(r, 'p.md'), 'review', 'utf-8');
677
+ const cap = _cap();
678
+ const code = await spawnHeadless.run(
679
+ ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
680
+ {
681
+ cwd: r,
682
+ stdout: cap.stub,
683
+ resolveImpl: () => ({ kind: 'openai-compat', provider: 'ollama', model: 'x' }),
684
+ dispatchImpl: async () => { const e = new Error('provider unreachable'); e.code = 'preflight-failed'; throw e; },
685
+ },
686
+ );
687
+ assert.equal(code, 2);
688
+ const out = JSON.parse(fs.readFileSync(path.join(r, 'out.json'), 'utf-8'));
689
+ assert.equal(out.result, '');
690
+ assert.equal(out.is_error, true);
691
+ });
@@ -0,0 +1,93 @@
1
+ 'use strict';
2
+
3
+ const fs = require('node:fs');
4
+ const { NubosPilotError } = require('../../lib/core.cjs');
5
+ const { dispatchOffHost } = require('../../lib/runtime/dispatch.cjs');
6
+
7
+ function _usage() {
8
+ process.stderr.write(
9
+ 'Usage: np-tools.cjs spawn-offhost --agent <name> (--task <str> | --task-file <path>) '
10
+ + '[--cwd <dir>] [--phase P] [--plan P] [--task-id T] [--max-iterations N] [--allow-bash] [--read-only] [--no-audit]\n',
11
+ );
12
+ }
13
+
14
+ function _parse(argv) {
15
+ const out = { allowBash: false, readOnly: false };
16
+ const a = argv.slice();
17
+ while (a.length) {
18
+ const f = a.shift();
19
+ if (f === '--agent') out.agent = a.shift();
20
+ else if (f === '--task') out.task = a.shift();
21
+ else if (f === '--task-file') out.taskFile = a.shift();
22
+ else if (f === '--phase') out.phase = a.shift();
23
+ else if (f === '--plan') out.plan = a.shift();
24
+ else if (f === '--task-id') out.taskId = a.shift();
25
+ else if (f === '--max-iterations') out.maxIterations = Number(a.shift());
26
+ else if (f === '--cwd') out.cwd = a.shift();
27
+ else if (f === '--output-schema') out.outputSchema = a.shift();
28
+ else if (f === '--allow-bash') out.allowBash = true;
29
+ else if (f === '--read-only') out.readOnly = true;
30
+ else if (f === '--no-audit') out.skipAudit = true;
31
+ }
32
+ return out;
33
+ }
34
+
35
+ async function run(argv) {
36
+ const args = Array.isArray(argv) ? argv.slice() : process.argv.slice(3);
37
+ if (!args.length || args[0] === '--help') { _usage(); return 1; }
38
+ const parsed = _parse(args);
39
+
40
+ let task = parsed.task;
41
+ if (parsed.taskFile) {
42
+ try { task = fs.readFileSync(parsed.taskFile, 'utf-8'); }
43
+ catch { process.stderr.write(JSON.stringify({ code: 'spawn-offhost-task-file-unreadable', file: require('node:path').basename(parsed.taskFile) }) + '\n'); return 1; }
44
+ }
45
+ if (!parsed.agent || typeof task !== 'string') { _usage(); return 1; }
46
+
47
+ try {
48
+ const result = await dispatchOffHost({
49
+ agent: parsed.agent,
50
+ task,
51
+ cwd: parsed.cwd || process.cwd(),
52
+ phase: parsed.phase,
53
+ plan: parsed.plan,
54
+ taskId: parsed.taskId,
55
+ maxIterations: parsed.maxIterations,
56
+ allowBash: parsed.allowBash,
57
+ readOnly: parsed.readOnly,
58
+ skipAudit: parsed.skipAudit,
59
+ outputSchema: parsed.outputSchema,
60
+ });
61
+ if (result && result.metrics_recorded === false) {
62
+ process.stderr.write('spawn-offhost: metrics row was not recorded (telemetry only; run succeeded)\n');
63
+ }
64
+ if (result && result.rule9 && result.rule9.ok === false) {
65
+ process.stderr.write('spawn-offhost: Rule-9 violation (' + (result.rule9.violation || result.rule9.error)
66
+ + ') — the agent did not satisfy the search bar. Do NOT commit this output as-is; re-run or route back to the agent.\n');
67
+ }
68
+ if (result && result.capability && result.capability.ok === false) {
69
+ const c = result.capability;
70
+ process.stderr.write('spawn-offhost: the model advertised ' + c.toolsAdvertised
71
+ + ' tool(s) but made zero tool calls — the provider/model likely does NOT support OpenAI function/tool-calling. '
72
+ + (c.mutating
73
+ ? 'This agent edits files; off-host it produced NO changes. Route it to a tool-calling-capable model or keep it native.'
74
+ : 'If this agent was expected to inspect the workspace, its output may be ungrounded — verify before relying on it.')
75
+ + '\n');
76
+ }
77
+ process.stdout.write(JSON.stringify(result) + '\n');
78
+ return 0;
79
+ } catch (err) {
80
+ if (err && err.name === 'NubosPilotError') {
81
+ process.stderr.write(JSON.stringify({ code: err.code, message: err.message, details: err.details }) + '\n');
82
+ } else {
83
+ process.stderr.write(String((err && err.stack) || err) + '\n');
84
+ }
85
+ return 1;
86
+ }
87
+ }
88
+
89
+ module.exports = { run, _parse };
90
+
91
+ if (require.main === module) {
92
+ run(process.argv.slice(2)).then((code) => process.exit(code || 0));
93
+ }
@@ -0,0 +1,38 @@
1
+ const { test } = require('node:test');
2
+ const assert = require('node:assert/strict');
3
+
4
+ const subcmd = require('./spawn-offhost.cjs');
5
+
6
+ function _capture(fn) {
7
+ const out = []; const err = [];
8
+ const oo = process.stdout.write.bind(process.stdout);
9
+ const oe = process.stderr.write.bind(process.stderr);
10
+ process.stdout.write = (c) => { out.push(String(c)); return true; };
11
+ process.stderr.write = (c) => { err.push(String(c)); return true; };
12
+ return Promise.resolve(fn()).then((rc) => {
13
+ process.stdout.write = oo; process.stderr.write = oe;
14
+ return { rc, stdout: out.join(''), stderr: err.join('') };
15
+ }, (e) => { process.stdout.write = oo; process.stderr.write = oe; throw e; });
16
+ }
17
+
18
+ test('SOH-1: _parse reads agent/task, the boolean flags, --cwd and --no-audit', () => {
19
+ const p = subcmd._parse(['--agent', 'np-executor', '--task', 'do x', '--allow-bash', '--max-iterations', '5', '--cwd', '/wt', '--no-audit']);
20
+ assert.equal(p.agent, 'np-executor');
21
+ assert.equal(p.task, 'do x');
22
+ assert.equal(p.allowBash, true);
23
+ assert.equal(p.readOnly, false);
24
+ assert.equal(p.maxIterations, 5);
25
+ assert.equal(p.cwd, '/wt');
26
+ assert.equal(p.skipAudit, true);
27
+ });
28
+
29
+ test('SOH-2: missing args prints usage and returns 1', async () => {
30
+ const { rc, stderr } = await _capture(() => subcmd.run([]));
31
+ assert.equal(rc, 1);
32
+ assert.match(stderr, /Usage:/);
33
+ });
34
+
35
+ test('SOH-3: --agent without a task returns 1', async () => {
36
+ const { rc } = await _capture(() => subcmd.run(['--agent', 'np-executor']));
37
+ assert.equal(rc, 1);
38
+ });
package/lib/agents.cjs CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  const fs = require('node:fs');
4
4
  const path = require('node:path');
5
- const { extractFrontmatter } = require('./frontmatter.cjs');
5
+ const { extractFrontmatter, stripFrontmatter } = require('./frontmatter.cjs');
6
6
  const { NubosPilotError, findProjectRoot } = require('./core.cjs');
7
7
 
8
8
  const REQUIRED = ['name', 'description', 'tier', 'tools'];
@@ -60,7 +60,7 @@ function validateAgentFrontmatter(fm, agentName) {
60
60
 
61
61
  const AGENT_NAME_RE = /^[a-zA-Z0-9_-]+$/;
62
62
 
63
- function _loadAgentFromDisk(name, cwd) {
63
+ function _resolveAgentPath(name, cwd) {
64
64
  if (typeof name !== 'string' || !AGENT_NAME_RE.test(name)) {
65
65
  throw new NubosPilotError(
66
66
  'agent-invalid-name',
@@ -83,10 +83,23 @@ function _loadAgentFromDisk(name, cwd) {
83
83
  { name, path: candidates[0], tried: candidates },
84
84
  );
85
85
  }
86
+ return found;
87
+ }
88
+
89
+ function _loadAgentFromDisk(name, cwd) {
90
+ const found = _resolveAgentPath(name, cwd);
86
91
  const { frontmatter } = extractFrontmatter(fs.readFileSync(found, 'utf-8'));
87
92
  return validateAgentFrontmatter(frontmatter, name);
88
93
  }
89
94
 
95
+ function loadAgentSource(name, cwd) {
96
+ const found = _resolveAgentPath(name, cwd);
97
+ const raw = fs.readFileSync(found, 'utf-8');
98
+ const { frontmatter } = extractFrontmatter(raw);
99
+ validateAgentFrontmatter(frontmatter, name);
100
+ return { frontmatter, body: stripFrontmatter(raw), path: found };
101
+ }
102
+
90
103
  function loadAgent(name, cwd) {
91
104
  const fm = _loadAgentFromDisk(name, cwd);
92
105
  if (fm.module === true) {
@@ -143,6 +156,7 @@ module.exports = {
143
156
  validateAgentFrontmatter,
144
157
  loadAgent,
145
158
  loadAgentModule,
159
+ loadAgentSource,
146
160
  listAgents,
147
161
  getAgentSkills,
148
162
  AGENT_NAME_RE,
@@ -9,6 +9,8 @@ const VALID_TIERS = Object.freeze(['haiku', 'sonnet', 'opus']);
9
9
  const SCHEMA = Object.freeze({
10
10
  scope: { type: 'enum', values: VALID_SCOPES, optional: true },
11
11
  model_profile: { type: 'enum', values: VALID_MODEL_PROFILES, optional: true },
12
+ model_providers: { type: 'object', shape: 'any', optional: true },
13
+ agent_routing: { type: 'object', shape: 'any', optional: true },
12
14
  response_language:{ type: 'string', optional: true },
13
15
  runtime: { type: 'string', optional: true },
14
16
  runtimes: { type: 'array', element: 'string', optional: true },
@@ -220,7 +222,9 @@ function _clone(v) {
220
222
  return out;
221
223
  }
222
224
 
223
- const SCHEMA_ONLY_KEYS = Object.freeze(['runtime', 'runtimes', 'agent_skills']);
225
+ const SCHEMA_ONLY_KEYS = Object.freeze([
226
+ 'runtime', 'runtimes', 'agent_skills', 'model_providers', 'agent_routing',
227
+ ]);
224
228
 
225
229
  module.exports = {
226
230
  SCHEMA,