nubos-pilot 1.2.4 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +17 -1
  2. package/README.md +2 -1
  3. package/SECURITY.md +3 -4
  4. package/bin/np-tools/_commands.cjs +1 -0
  5. package/bin/np-tools/learnings.cjs +1 -1
  6. package/bin/np-tools/resolve-model.cjs +55 -1
  7. package/bin/np-tools/resolve-model.test.cjs +139 -0
  8. package/bin/np-tools/security.cjs +1 -1
  9. package/bin/np-tools/spawn-headless.cjs +100 -1
  10. package/bin/np-tools/spawn-headless.test.cjs +108 -58
  11. package/bin/np-tools/spawn-offhost.cjs +93 -0
  12. package/bin/np-tools/spawn-offhost.test.cjs +38 -0
  13. package/lib/agents.cjs +16 -2
  14. package/lib/config-schema.cjs +5 -1
  15. package/lib/learnings/extract.cjs +4 -4
  16. package/lib/learnings/extract.test.cjs +8 -8
  17. package/lib/model-providers.cjs +118 -0
  18. package/lib/model-providers.test.cjs +85 -0
  19. package/lib/runtime/agent-loop.cjs +64 -0
  20. package/lib/runtime/agent-loop.test.cjs +135 -0
  21. package/lib/runtime/dispatch.cjs +174 -0
  22. package/lib/runtime/dispatch.test.cjs +193 -0
  23. package/lib/runtime/preflight.cjs +68 -0
  24. package/lib/runtime/preflight.test.cjs +62 -0
  25. package/lib/runtime/providers/openai-compat.cjs +102 -0
  26. package/lib/runtime/providers/openai-compat.test.cjs +103 -0
  27. package/lib/runtime/tools/index.cjs +415 -0
  28. package/lib/runtime/tools/index.test.cjs +230 -0
  29. package/lib/security/review.cjs +4 -4
  30. package/lib/security/review.test.cjs +6 -6
  31. package/np-tools.cjs +1 -0
  32. package/package.json +1 -1
  33. package/workflows/add-tests.md +41 -0
  34. package/workflows/architect-phase.md +19 -0
  35. package/workflows/discuss-phase.md +29 -10
  36. package/workflows/execute-phase.md +93 -4
  37. package/workflows/plan-phase.md +57 -16
  38. package/workflows/research-phase.md +45 -0
  39. package/workflows/scan-codebase.md +21 -3
  40. package/workflows/validate-phase.md +30 -13
  41. package/workflows/verify-work.md +17 -0
@@ -56,30 +56,30 @@ function _setEnv(k, v) {
56
56
  else process.env[k] = v;
57
57
  }
58
58
 
59
- test('SH-1: spawn-headless requires --agent', () => {
59
+ test('SH-1: spawn-headless requires --agent', async () => {
60
60
  const r = _mkRoot();
61
61
  const cap = _cap();
62
- assert.throws(
63
- () => spawnHeadless.run([], { cwd: r, stdout: cap.stub }),
62
+ await assert.rejects(
63
+ async () => spawnHeadless.run([], { cwd: r, stdout: cap.stub }),
64
64
  (err) => err && err.code === 'spawn-headless-missing-agent',
65
65
  );
66
66
  });
67
67
 
68
- test('SH-2: spawn-headless requires --prompt-path', () => {
68
+ test('SH-2: spawn-headless requires --prompt-path', async () => {
69
69
  const r = _mkRoot();
70
70
  const cap = _cap();
71
- assert.throws(
72
- () => spawnHeadless.run(['--agent', 'np-test-critic'], { cwd: r, stdout: cap.stub }),
71
+ await assert.rejects(
72
+ async () => spawnHeadless.run(['--agent', 'np-test-critic'], { cwd: r, stdout: cap.stub }),
73
73
  (err) => err && err.code === 'spawn-headless-missing-prompt-path',
74
74
  );
75
75
  });
76
76
 
77
- test('SH-3: spawn-headless requires --output-path', () => {
77
+ test('SH-3: spawn-headless requires --output-path', async () => {
78
78
  const r = _mkRoot();
79
79
  fs.writeFileSync(path.join(r, 'p.md'), 'do the audit', 'utf-8');
80
80
  const cap = _cap();
81
- assert.throws(
82
- () => spawnHeadless.run(
81
+ await assert.rejects(
82
+ async () => spawnHeadless.run(
83
83
  ['--agent', 'np-test-critic', '--prompt-path', 'p.md'],
84
84
  { cwd: r, stdout: cap.stub },
85
85
  ),
@@ -87,11 +87,11 @@ test('SH-3: spawn-headless requires --output-path', () => {
87
87
  );
88
88
  });
89
89
 
90
- test('SH-4: spawn-headless rejects path traversal on prompt-path', () => {
90
+ test('SH-4: spawn-headless rejects path traversal on prompt-path', async () => {
91
91
  const r = _mkRoot();
92
92
  const cap = _cap();
93
- assert.throws(
94
- () => spawnHeadless.run(
93
+ await assert.rejects(
94
+ async () => spawnHeadless.run(
95
95
  ['--agent', 'np-test-critic',
96
96
  '--prompt-path', '/etc/passwd',
97
97
  '--output-path', 'out.json'],
@@ -101,12 +101,12 @@ test('SH-4: spawn-headless rejects path traversal on prompt-path', () => {
101
101
  );
102
102
  });
103
103
 
104
- test('SH-5: spawn-headless rejects unknown agent', () => {
104
+ test('SH-5: spawn-headless rejects unknown agent', async () => {
105
105
  const r = _mkRoot();
106
106
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
107
107
  const cap = _cap();
108
- assert.throws(
109
- () => spawnHeadless.run(
108
+ await assert.rejects(
109
+ async () => spawnHeadless.run(
110
110
  ['--agent', 'np-does-not-exist',
111
111
  '--prompt-path', 'p.md',
112
112
  '--output-path', 'out.json'],
@@ -116,12 +116,12 @@ test('SH-5: spawn-headless rejects unknown agent', () => {
116
116
  );
117
117
  });
118
118
 
119
- test('SH-6: spawn-headless rejects invalid agent name (path-injection guard)', () => {
119
+ test('SH-6: spawn-headless rejects invalid agent name (path-injection guard)', async () => {
120
120
  const r = _mkRoot();
121
121
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
122
122
  const cap = _cap();
123
- assert.throws(
124
- () => spawnHeadless.run(
123
+ await assert.rejects(
124
+ async () => spawnHeadless.run(
125
125
  ['--agent', '../../etc/passwd',
126
126
  '--prompt-path', 'p.md',
127
127
  '--output-path', 'out.json'],
@@ -131,13 +131,13 @@ test('SH-6: spawn-headless rejects invalid agent name (path-injection guard)', (
131
131
  );
132
132
  });
133
133
 
134
- test('SH-7: spawn-headless reports claude-not-found when binary missing', () => {
134
+ test('SH-7: spawn-headless reports claude-not-found when binary missing', async () => {
135
135
  const r = _mkRoot();
136
136
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
137
137
  _setEnv('NUBOS_PILOT_CLAUDE_BIN', path.join(r, 'no-such-binary'));
138
138
  const cap = _cap();
139
- assert.throws(
140
- () => spawnHeadless.run(
139
+ await assert.rejects(
140
+ async () => spawnHeadless.run(
141
141
  ['--agent', 'np-test-critic',
142
142
  '--prompt-path', 'p.md',
143
143
  '--output-path', 'out.json'],
@@ -147,7 +147,7 @@ test('SH-7: spawn-headless reports claude-not-found when binary missing', () =>
147
147
  );
148
148
  });
149
149
 
150
- test('SH-8: spawn-headless captures stdout to output-path on success (mock binary)', () => {
150
+ test('SH-8: spawn-headless captures stdout to output-path on success (mock binary)', async () => {
151
151
  const r = _mkRoot();
152
152
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
153
153
  const mockBin = path.join(r, 'mock-claude.sh');
@@ -155,7 +155,7 @@ test('SH-8: spawn-headless captures stdout to output-path on success (mock binar
155
155
  fs.chmodSync(mockBin, 0o755);
156
156
  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
157
157
  const cap = _cap();
158
- const rc = spawnHeadless.run(
158
+ const rc = await spawnHeadless.run(
159
159
  ['--agent', 'np-test-critic',
160
160
  '--prompt-path', 'p.md',
161
161
  '--output-path', 'out.json'],
@@ -169,7 +169,7 @@ test('SH-8: spawn-headless captures stdout to output-path on success (mock binar
169
169
  assert.match(written, /"verdict":"passed"/);
170
170
  });
171
171
 
172
- test('SH-9: spawn-headless surfaces non-zero subprocess exit (mock failure)', () => {
172
+ test('SH-9: spawn-headless surfaces non-zero subprocess exit (mock failure)', async () => {
173
173
  const r = _mkRoot();
174
174
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
175
175
  const mockBin = path.join(r, 'mock-fail.sh');
@@ -177,7 +177,7 @@ test('SH-9: spawn-headless surfaces non-zero subprocess exit (mock failure)', ()
177
177
  fs.chmodSync(mockBin, 0o755);
178
178
  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
179
179
  const cap = _cap();
180
- const rc = spawnHeadless.run(
180
+ const rc = await spawnHeadless.run(
181
181
  ['--agent', 'np-test-critic',
182
182
  '--prompt-path', 'p.md',
183
183
  '--output-path', 'out.json'],
@@ -189,12 +189,12 @@ test('SH-9: spawn-headless surfaces non-zero subprocess exit (mock failure)', ()
189
189
  assert.match(payload.stderr_excerpt, /boom/);
190
190
  });
191
191
 
192
- test('SH-10: spawn-headless rejects --timeout-ms below 1000', () => {
192
+ test('SH-10: spawn-headless rejects --timeout-ms below 1000', async () => {
193
193
  const r = _mkRoot();
194
194
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
195
195
  const cap = _cap();
196
- assert.throws(
197
- () => spawnHeadless.run(
196
+ await assert.rejects(
197
+ async () => spawnHeadless.run(
198
198
  ['--agent', 'np-test-critic',
199
199
  '--prompt-path', 'p.md',
200
200
  '--output-path', 'out.json',
@@ -205,7 +205,7 @@ test('SH-10: spawn-headless rejects --timeout-ms below 1000', () => {
205
205
  );
206
206
  });
207
207
 
208
- test('SH-11: spawn-headless writes output atomically (no .tmp residue)', () => {
208
+ test('SH-11: spawn-headless writes output atomically (no .tmp residue)', async () => {
209
209
  const r = _mkRoot();
210
210
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
211
211
  const mockBin = path.join(r, 'mock-claude.sh');
@@ -213,7 +213,7 @@ test('SH-11: spawn-headless writes output atomically (no .tmp residue)', () => {
213
213
  fs.chmodSync(mockBin, 0o755);
214
214
  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
215
215
  const cap = _cap();
216
- const rc = spawnHeadless.run(
216
+ const rc = await spawnHeadless.run(
217
217
  ['--agent', 'np-test-critic',
218
218
  '--prompt-path', 'p.md',
219
219
  '--output-path', 'out.json'],
@@ -321,7 +321,7 @@ test('SH-REDACT-2 _redactSecrets is a no-op on safe text', () => {
321
321
  assert.equal(spawnHeadless._redactSecrets(safe), safe);
322
322
  });
323
323
 
324
- test('SH-AUDIT-FIRST spawn-trail is written BEFORE caller-visible output (audit-first)', () => {
324
+ test('SH-AUDIT-FIRST spawn-trail is written BEFORE caller-visible output (audit-first)', async () => {
325
325
  const r = _mkRoot();
326
326
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
327
327
  const mockBin = path.join(r, 'mock.sh');
@@ -335,7 +335,7 @@ test('SH-AUDIT-FIRST spawn-trail is written BEFORE caller-visible output (audit-
335
335
  const cap = _cap();
336
336
  let thrown = null;
337
337
  try {
338
- spawnHeadless.run(
338
+ await spawnHeadless.run(
339
339
  ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
340
340
  { cwd: r, stdout: cap.stub },
341
341
  );
@@ -348,7 +348,7 @@ test('SH-AUDIT-FIRST spawn-trail is written BEFORE caller-visible output (audit-
348
348
  'output must NOT exist if audit append failed (audit-first invariant)');
349
349
  });
350
350
 
351
- test('SH-PARSE-OK payload_parse_ok=false when claude returns non-JSON output', () => {
351
+ test('SH-PARSE-OK payload_parse_ok=false when claude returns non-JSON output', async () => {
352
352
  const r = _mkRoot();
353
353
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
354
354
  const mockBin = path.join(r, 'mock-plain.sh');
@@ -357,7 +357,7 @@ test('SH-PARSE-OK payload_parse_ok=false when claude returns non-JSON output', (
357
357
  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
358
358
  _setEnv('NUBOS_PILOT_RUN_ID', 'r-parse-test');
359
359
  const cap = _cap();
360
- spawnHeadless.run(
360
+ await spawnHeadless.run(
361
361
  ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
362
362
  { cwd: r, stdout: cap.stub },
363
363
  );
@@ -415,7 +415,7 @@ test('SH-ENV-3 NUBOS_PILOT_SPAWN_ENV_PASSTHROUGH allow-lists by exact key name',
415
415
  assert.equal(filtered.NOT_LISTED, undefined);
416
416
  });
417
417
 
418
- test('SH-TRAIL-1 spawn writes append-only spawn-trail record with run_id + prompt/response sha256 + timing', () => {
418
+ test('SH-TRAIL-1 spawn writes append-only spawn-trail record with run_id + prompt/response sha256 + timing', async () => {
419
419
  const r = _mkRoot();
420
420
  fs.writeFileSync(path.join(r, 'p.md'), 'do the audit', 'utf-8');
421
421
  const mockBin = path.join(r, 'mock-claude.sh');
@@ -424,7 +424,7 @@ test('SH-TRAIL-1 spawn writes append-only spawn-trail record with run_id + promp
424
424
  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
425
425
  _setEnv('NUBOS_PILOT_RUN_ID', 'r-traceme-deadbeef');
426
426
  const cap = _cap();
427
- const rc = spawnHeadless.run(
427
+ const rc = await spawnHeadless.run(
428
428
  ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
429
429
  { cwd: r, stdout: cap.stub },
430
430
  );
@@ -452,7 +452,7 @@ test('SH-TRAIL-1 spawn writes append-only spawn-trail record with run_id + promp
452
452
  assert.ok(Number.isFinite(rec.duration_ms) && rec.duration_ms >= 0);
453
453
  });
454
454
 
455
- test('SH-TRAIL-1b run_id is seeded BEFORE spawn so the child env inherits NUBOS_PILOT_RUN_ID', () => {
455
+ test('SH-TRAIL-1b run_id is seeded BEFORE spawn so the child env inherits NUBOS_PILOT_RUN_ID', async () => {
456
456
  const r = _mkRoot();
457
457
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
458
458
  // Mock-claude echoes its own ENV var so we can prove the child saw it.
@@ -465,7 +465,7 @@ test('SH-TRAIL-1b run_id is seeded BEFORE spawn so the child env inherits NUBOS_
465
465
  // Crucially: do NOT set NUBOS_PILOT_RUN_ID; the lazy-seed must happen.
466
466
  runContext._resetForTests();
467
467
  const cap = _cap();
468
- spawnHeadless.run(
468
+ await spawnHeadless.run(
469
469
  ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
470
470
  { cwd: r, stdout: cap.stub },
471
471
  );
@@ -475,7 +475,7 @@ test('SH-TRAIL-1b run_id is seeded BEFORE spawn so the child env inherits NUBOS_
475
475
  assert.equal(childRunId, payload.run_id, 'child must inherit parent NUBOS_PILOT_RUN_ID via filtered env');
476
476
  });
477
477
 
478
- test('SH-TRAIL-2 two sequential spawns append two parseable trail lines (jsonl integrity)', () => {
478
+ test('SH-TRAIL-2 two sequential spawns append two parseable trail lines (jsonl integrity)', async () => {
479
479
  const r = _mkRoot();
480
480
  fs.writeFileSync(path.join(r, 'p.md'), 'audit X', 'utf-8');
481
481
  const mockBin = path.join(r, 'mock.sh');
@@ -485,7 +485,7 @@ test('SH-TRAIL-2 two sequential spawns append two parseable trail lines (jsonl i
485
485
  _setEnv('NUBOS_PILOT_RUN_ID', 'r-test-multi-aaa1');
486
486
  const cap = _cap();
487
487
  for (let i = 0; i < 2; i++) {
488
- spawnHeadless.run(
488
+ await spawnHeadless.run(
489
489
  ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out' + i + '.json'],
490
490
  { cwd: r, stdout: cap.stub },
491
491
  );
@@ -496,15 +496,15 @@ test('SH-TRAIL-2 two sequential spawns append two parseable trail lines (jsonl i
496
496
  for (const l of lines) JSON.parse(l);
497
497
  });
498
498
 
499
- test('SH-GUARD-1 refuses to spawn when NUBOS_PILOT_HEADLESS=1 (reentrancy guard)', () => {
499
+ test('SH-GUARD-1 refuses to spawn when NUBOS_PILOT_HEADLESS=1 (reentrancy guard)', async () => {
500
500
  const r = _mkRoot();
501
501
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
502
502
  const mockBin = _mockClaude(r, 'mock.sh', '#!/bin/sh\ncat > /dev/null\necho "{}"\n');
503
503
  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
504
504
  _setEnv('NUBOS_PILOT_HEADLESS', '1');
505
505
  const cap = _cap();
506
- assert.throws(
507
- () => spawnHeadless.run(
506
+ await assert.rejects(
507
+ spawnHeadless.run(
508
508
  ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
509
509
  { cwd: r, stdout: cap.stub },
510
510
  ),
@@ -513,15 +513,15 @@ test('SH-GUARD-1 refuses to spawn when NUBOS_PILOT_HEADLESS=1 (reentrancy guard)
513
513
  assert.equal(fs.existsSync(path.join(r, 'out.json')), false, 'no claude must be spawned inside a headless run');
514
514
  });
515
515
 
516
- test('SH-GUARD-2 refuses to spawn when hook depth has reached the cap (depth guard)', () => {
516
+ test('SH-GUARD-2 refuses to spawn when hook depth has reached the cap (depth guard)', async () => {
517
517
  const r = _mkRoot();
518
518
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
519
519
  const mockBin = _mockClaude(r, 'mock.sh', '#!/bin/sh\ncat > /dev/null\necho "{}"\n');
520
520
  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
521
521
  _setEnv('NUBOS_PILOT_HOOK_DEPTH', '1');
522
522
  const cap = _cap();
523
- assert.throws(
524
- () => spawnHeadless.run(
523
+ await assert.rejects(
524
+ spawnHeadless.run(
525
525
  ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
526
526
  { cwd: r, stdout: cap.stub },
527
527
  ),
@@ -529,14 +529,14 @@ test('SH-GUARD-2 refuses to spawn when hook depth has reached the cap (depth gua
529
529
  );
530
530
  });
531
531
 
532
- test('SH-GUARD-3 child env carries NUBOS_PILOT_HEADLESS=1 and depth=1 (one level deep only)', () => {
532
+ test('SH-GUARD-3 child env carries NUBOS_PILOT_HEADLESS=1 and depth=1 (one level deep only)', async () => {
533
533
  const r = _mkRoot();
534
534
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
535
535
  const mockBin = _mockClaude(r, 'mock.sh',
536
536
  '#!/bin/sh\ncat > /dev/null\nprintf \'{"hl":"\'$NUBOS_PILOT_HEADLESS\'","depth":"\'$NUBOS_PILOT_HOOK_DEPTH\'"}\\n\'\n');
537
537
  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
538
538
  const cap = _cap();
539
- const rc = spawnHeadless.run(
539
+ const rc = await spawnHeadless.run(
540
540
  ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
541
541
  { cwd: r, stdout: cap.stub },
542
542
  );
@@ -546,7 +546,7 @@ test('SH-GUARD-3 child env carries NUBOS_PILOT_HEADLESS=1 and depth=1 (one level
546
546
  assert.equal(child.depth, '1', 'child claude must run at hook depth 1');
547
547
  });
548
548
 
549
- test('SH-GUARD-4 refuses to spawn while a live lock for the same agent is held (concurrency guard)', () => {
549
+ test('SH-GUARD-4 refuses to spawn while a live lock for the same agent is held (concurrency guard)', async () => {
550
550
  const r = _mkRoot();
551
551
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
552
552
  const mockBin = _mockClaude(r, 'mock.sh', '#!/bin/sh\ncat > /dev/null\necho "{}"\n');
@@ -555,8 +555,8 @@ test('SH-GUARD-4 refuses to spawn while a live lock for the same agent is held (
555
555
  assert.equal(held.acquired, true);
556
556
  const cap = _cap();
557
557
  try {
558
- assert.throws(
559
- () => spawnHeadless.run(
558
+ await assert.rejects(
559
+ spawnHeadless.run(
560
560
  ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
561
561
  { cwd: r, stdout: cap.stub },
562
562
  ),
@@ -567,14 +567,14 @@ test('SH-GUARD-4 refuses to spawn while a live lock for the same agent is held (
567
567
  }
568
568
  });
569
569
 
570
- test('SH-GUARD-5 lock is released after a successful spawn (re-spawnable)', () => {
570
+ test('SH-GUARD-5 lock is released after a successful spawn (re-spawnable)', async () => {
571
571
  const r = _mkRoot();
572
572
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
573
573
  const mockBin = _mockClaude(r, 'mock.sh', '#!/bin/sh\ncat > /dev/null\necho "{}"\n');
574
574
  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
575
575
  const cap = _cap();
576
576
  for (let i = 0; i < 2; i++) {
577
- const rc = spawnHeadless.run(
577
+ const rc = await spawnHeadless.run(
578
578
  ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out' + i + '.json'],
579
579
  { cwd: r, stdout: cap.stub },
580
580
  );
@@ -583,7 +583,7 @@ test('SH-GUARD-5 lock is released after a successful spawn (re-spawnable)', () =
583
583
  assert.equal(fs.existsSync(headlessGuard._lockPath(r, 'np-test-critic')), false, 'no lock residue after spawns');
584
584
  });
585
585
 
586
- test('SH-GUARD-6 a held lock for one agent does NOT block a different agent (per-agent scope)', () => {
586
+ test('SH-GUARD-6 a held lock for one agent does NOT block a different agent (per-agent scope)', async () => {
587
587
  const r = _mkRoot();
588
588
  fs.writeFileSync(
589
589
  path.join(r, '.nubos-pilot', 'agents', 'np-other-critic.md'),
@@ -597,7 +597,7 @@ test('SH-GUARD-6 a held lock for one agent does NOT block a different agent (per
597
597
  assert.equal(held.acquired, true);
598
598
  const cap = _cap();
599
599
  try {
600
- const rc = spawnHeadless.run(
600
+ const rc = await spawnHeadless.run(
601
601
  ['--agent', 'np-other-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
602
602
  { cwd: r, stdout: cap.stub },
603
603
  );
@@ -607,13 +607,13 @@ test('SH-GUARD-6 a held lock for one agent does NOT block a different agent (per
607
607
  }
608
608
  });
609
609
 
610
- test('SH-GUARD-7 lock is released even when the spawn errors (claude-not-found)', () => {
610
+ test('SH-GUARD-7 lock is released even when the spawn errors (claude-not-found)', async () => {
611
611
  const r = _mkRoot();
612
612
  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
613
613
  _setEnv('NUBOS_PILOT_CLAUDE_BIN', path.join(r, 'no-such-binary'));
614
614
  const cap = _cap();
615
- assert.throws(
616
- () => spawnHeadless.run(
615
+ await assert.rejects(
616
+ spawnHeadless.run(
617
617
  ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
618
618
  { cwd: r, stdout: cap.stub },
619
619
  ),
@@ -639,3 +639,53 @@ test('SH-ENV-4 NUBOS_PILOT_/CLAUDE_/ANTHROPIC_ prefixed vars pass through (white
639
639
  assert.equal(filtered.ANTHROPIC_BASE_URL, 'https://api.anthropic.com');
640
640
  assert.equal(filtered.UNRELATED_FOO, undefined);
641
641
  });
642
+
643
+ test('SH-OFFHOST-1: openai-compat routing runs dispatchOffHost and writes a {result} envelope (no claude -p)', async () => {
644
+ const r = _mkRoot();
645
+ fs.writeFileSync(path.join(r, 'p.md'), 'review this diff', 'utf-8');
646
+ const cap = _cap();
647
+ let claudeWasCalled = false;
648
+ _setEnv('NUBOS_PILOT_CLAUDE_BIN', path.join(r, 'nonexistent-claude-should-not-run'));
649
+ let dispatchArgs = null;
650
+ const code = await spawnHeadless.run(
651
+ ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
652
+ {
653
+ cwd: r,
654
+ stdout: cap.stub,
655
+ resolveImpl: () => ({ kind: 'openai-compat', provider: 'ollama', model: 'qwen2.5-coder:32b' }),
656
+ dispatchImpl: async (o) => { dispatchArgs = o; return { content: 'REVIEW: 0 risks', model: 'qwen2.5-coder:32b', provider: 'ollama' }; },
657
+ },
658
+ );
659
+ assert.equal(code, 0);
660
+ // dispatch received the agent + the prompt body as the task
661
+ assert.equal(dispatchArgs.agent, 'np-test-critic');
662
+ assert.match(dispatchArgs.task, /review this diff/);
663
+ // output is the claude-compatible {result} envelope so review/extract parse it unchanged
664
+ const out = JSON.parse(fs.readFileSync(path.join(r, 'out.json'), 'utf-8'));
665
+ assert.equal(out.result, 'REVIEW: 0 risks');
666
+ assert.equal(out.provider, 'ollama');
667
+ // caller-visible payload marks it off-host and the native claude bin was never invoked
668
+ const payload = JSON.parse(cap.get().trim());
669
+ assert.equal(payload.off_host, true);
670
+ assert.equal(payload.exit_code, 0);
671
+ assert.equal(claudeWasCalled, false);
672
+ });
673
+
674
+ test('SH-OFFHOST-2: a failing off-host dispatch returns exit 2 with an empty result (soft-fail parity)', async () => {
675
+ const r = _mkRoot();
676
+ fs.writeFileSync(path.join(r, 'p.md'), 'review', 'utf-8');
677
+ const cap = _cap();
678
+ const code = await spawnHeadless.run(
679
+ ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
680
+ {
681
+ cwd: r,
682
+ stdout: cap.stub,
683
+ resolveImpl: () => ({ kind: 'openai-compat', provider: 'ollama', model: 'x' }),
684
+ dispatchImpl: async () => { const e = new Error('provider unreachable'); e.code = 'preflight-failed'; throw e; },
685
+ },
686
+ );
687
+ assert.equal(code, 2);
688
+ const out = JSON.parse(fs.readFileSync(path.join(r, 'out.json'), 'utf-8'));
689
+ assert.equal(out.result, '');
690
+ assert.equal(out.is_error, true);
691
+ });
@@ -0,0 +1,93 @@
1
+ 'use strict';
2
+
3
+ const fs = require('node:fs');
4
+ const { NubosPilotError } = require('../../lib/core.cjs');
5
+ const { dispatchOffHost } = require('../../lib/runtime/dispatch.cjs');
6
+
7
+ function _usage() {
8
+ process.stderr.write(
9
+ 'Usage: np-tools.cjs spawn-offhost --agent <name> (--task <str> | --task-file <path>) '
10
+ + '[--cwd <dir>] [--phase P] [--plan P] [--task-id T] [--max-iterations N] [--allow-bash] [--read-only] [--no-audit]\n',
11
+ );
12
+ }
13
+
14
+ function _parse(argv) {
15
+ const out = { allowBash: false, readOnly: false };
16
+ const a = argv.slice();
17
+ while (a.length) {
18
+ const f = a.shift();
19
+ if (f === '--agent') out.agent = a.shift();
20
+ else if (f === '--task') out.task = a.shift();
21
+ else if (f === '--task-file') out.taskFile = a.shift();
22
+ else if (f === '--phase') out.phase = a.shift();
23
+ else if (f === '--plan') out.plan = a.shift();
24
+ else if (f === '--task-id') out.taskId = a.shift();
25
+ else if (f === '--max-iterations') out.maxIterations = Number(a.shift());
26
+ else if (f === '--cwd') out.cwd = a.shift();
27
+ else if (f === '--output-schema') out.outputSchema = a.shift();
28
+ else if (f === '--allow-bash') out.allowBash = true;
29
+ else if (f === '--read-only') out.readOnly = true;
30
+ else if (f === '--no-audit') out.skipAudit = true;
31
+ }
32
+ return out;
33
+ }
34
+
35
+ async function run(argv) {
36
+ const args = Array.isArray(argv) ? argv.slice() : process.argv.slice(3);
37
+ if (!args.length || args[0] === '--help') { _usage(); return 1; }
38
+ const parsed = _parse(args);
39
+
40
+ let task = parsed.task;
41
+ if (parsed.taskFile) {
42
+ try { task = fs.readFileSync(parsed.taskFile, 'utf-8'); }
43
+ catch { process.stderr.write(JSON.stringify({ code: 'spawn-offhost-task-file-unreadable', file: require('node:path').basename(parsed.taskFile) }) + '\n'); return 1; }
44
+ }
45
+ if (!parsed.agent || typeof task !== 'string') { _usage(); return 1; }
46
+
47
+ try {
48
+ const result = await dispatchOffHost({
49
+ agent: parsed.agent,
50
+ task,
51
+ cwd: parsed.cwd || process.cwd(),
52
+ phase: parsed.phase,
53
+ plan: parsed.plan,
54
+ taskId: parsed.taskId,
55
+ maxIterations: parsed.maxIterations,
56
+ allowBash: parsed.allowBash,
57
+ readOnly: parsed.readOnly,
58
+ skipAudit: parsed.skipAudit,
59
+ outputSchema: parsed.outputSchema,
60
+ });
61
+ if (result && result.metrics_recorded === false) {
62
+ process.stderr.write('spawn-offhost: metrics row was not recorded (telemetry only; run succeeded)\n');
63
+ }
64
+ if (result && result.rule9 && result.rule9.ok === false) {
65
+ process.stderr.write('spawn-offhost: Rule-9 violation (' + (result.rule9.violation || result.rule9.error)
66
+ + ') — the agent did not satisfy the search bar. Do NOT commit this output as-is; re-run or route back to the agent.\n');
67
+ }
68
+ if (result && result.capability && result.capability.ok === false) {
69
+ const c = result.capability;
70
+ process.stderr.write('spawn-offhost: the model advertised ' + c.toolsAdvertised
71
+ + ' tool(s) but made zero tool calls — the provider/model likely does NOT support OpenAI function/tool-calling. '
72
+ + (c.mutating
73
+ ? 'This agent edits files; off-host it produced NO changes. Route it to a tool-calling-capable model or keep it native.'
74
+ : 'If this agent was expected to inspect the workspace, its output may be ungrounded — verify before relying on it.')
75
+ + '\n');
76
+ }
77
+ process.stdout.write(JSON.stringify(result) + '\n');
78
+ return 0;
79
+ } catch (err) {
80
+ if (err && err.name === 'NubosPilotError') {
81
+ process.stderr.write(JSON.stringify({ code: err.code, message: err.message, details: err.details }) + '\n');
82
+ } else {
83
+ process.stderr.write(String((err && err.stack) || err) + '\n');
84
+ }
85
+ return 1;
86
+ }
87
+ }
88
+
89
+ module.exports = { run, _parse };
90
+
91
+ if (require.main === module) {
92
+ run(process.argv.slice(2)).then((code) => process.exit(code || 0));
93
+ }
@@ -0,0 +1,38 @@
1
+ const { test } = require('node:test');
2
+ const assert = require('node:assert/strict');
3
+
4
+ const subcmd = require('./spawn-offhost.cjs');
5
+
6
+ function _capture(fn) {
7
+ const out = []; const err = [];
8
+ const oo = process.stdout.write.bind(process.stdout);
9
+ const oe = process.stderr.write.bind(process.stderr);
10
+ process.stdout.write = (c) => { out.push(String(c)); return true; };
11
+ process.stderr.write = (c) => { err.push(String(c)); return true; };
12
+ return Promise.resolve(fn()).then((rc) => {
13
+ process.stdout.write = oo; process.stderr.write = oe;
14
+ return { rc, stdout: out.join(''), stderr: err.join('') };
15
+ }, (e) => { process.stdout.write = oo; process.stderr.write = oe; throw e; });
16
+ }
17
+
18
+ test('SOH-1: _parse reads agent/task, the boolean flags, --cwd and --no-audit', () => {
19
+ const p = subcmd._parse(['--agent', 'np-executor', '--task', 'do x', '--allow-bash', '--max-iterations', '5', '--cwd', '/wt', '--no-audit']);
20
+ assert.equal(p.agent, 'np-executor');
21
+ assert.equal(p.task, 'do x');
22
+ assert.equal(p.allowBash, true);
23
+ assert.equal(p.readOnly, false);
24
+ assert.equal(p.maxIterations, 5);
25
+ assert.equal(p.cwd, '/wt');
26
+ assert.equal(p.skipAudit, true);
27
+ });
28
+
29
+ test('SOH-2: missing args prints usage and returns 1', async () => {
30
+ const { rc, stderr } = await _capture(() => subcmd.run([]));
31
+ assert.equal(rc, 1);
32
+ assert.match(stderr, /Usage:/);
33
+ });
34
+
35
+ test('SOH-3: --agent without a task returns 1', async () => {
36
+ const { rc } = await _capture(() => subcmd.run(['--agent', 'np-executor']));
37
+ assert.equal(rc, 1);
38
+ });
package/lib/agents.cjs CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  const fs = require('node:fs');
4
4
  const path = require('node:path');
5
- const { extractFrontmatter } = require('./frontmatter.cjs');
5
+ const { extractFrontmatter, stripFrontmatter } = require('./frontmatter.cjs');
6
6
  const { NubosPilotError, findProjectRoot } = require('./core.cjs');
7
7
 
8
8
  const REQUIRED = ['name', 'description', 'tier', 'tools'];
@@ -60,7 +60,7 @@ function validateAgentFrontmatter(fm, agentName) {
60
60
 
61
61
  const AGENT_NAME_RE = /^[a-zA-Z0-9_-]+$/;
62
62
 
63
- function _loadAgentFromDisk(name, cwd) {
63
+ function _resolveAgentPath(name, cwd) {
64
64
  if (typeof name !== 'string' || !AGENT_NAME_RE.test(name)) {
65
65
  throw new NubosPilotError(
66
66
  'agent-invalid-name',
@@ -83,10 +83,23 @@ function _loadAgentFromDisk(name, cwd) {
83
83
  { name, path: candidates[0], tried: candidates },
84
84
  );
85
85
  }
86
+ return found;
87
+ }
88
+
89
+ function _loadAgentFromDisk(name, cwd) {
90
+ const found = _resolveAgentPath(name, cwd);
86
91
  const { frontmatter } = extractFrontmatter(fs.readFileSync(found, 'utf-8'));
87
92
  return validateAgentFrontmatter(frontmatter, name);
88
93
  }
89
94
 
95
+ function loadAgentSource(name, cwd) {
96
+ const found = _resolveAgentPath(name, cwd);
97
+ const raw = fs.readFileSync(found, 'utf-8');
98
+ const { frontmatter } = extractFrontmatter(raw);
99
+ validateAgentFrontmatter(frontmatter, name);
100
+ return { frontmatter, body: stripFrontmatter(raw), path: found };
101
+ }
102
+
90
103
  function loadAgent(name, cwd) {
91
104
  const fm = _loadAgentFromDisk(name, cwd);
92
105
  if (fm.module === true) {
@@ -143,6 +156,7 @@ module.exports = {
143
156
  validateAgentFrontmatter,
144
157
  loadAgent,
145
158
  loadAgentModule,
159
+ loadAgentSource,
146
160
  listAgents,
147
161
  getAgentSkills,
148
162
  AGENT_NAME_RE,
@@ -9,6 +9,8 @@ const VALID_TIERS = Object.freeze(['haiku', 'sonnet', 'opus']);
9
9
  const SCHEMA = Object.freeze({
10
10
  scope: { type: 'enum', values: VALID_SCOPES, optional: true },
11
11
  model_profile: { type: 'enum', values: VALID_MODEL_PROFILES, optional: true },
12
+ model_providers: { type: 'object', shape: 'any', optional: true },
13
+ agent_routing: { type: 'object', shape: 'any', optional: true },
12
14
  response_language:{ type: 'string', optional: true },
13
15
  runtime: { type: 'string', optional: true },
14
16
  runtimes: { type: 'array', element: 'string', optional: true },
@@ -220,7 +222,9 @@ function _clone(v) {
220
222
  return out;
221
223
  }
222
224
 
223
- const SCHEMA_ONLY_KEYS = Object.freeze(['runtime', 'runtimes', 'agent_skills']);
225
+ const SCHEMA_ONLY_KEYS = Object.freeze([
226
+ 'runtime', 'runtimes', 'agent_skills', 'model_providers', 'agent_routing',
227
+ ]);
224
228
 
225
229
  module.exports = {
226
230
  SCHEMA,