nubos-pilot 1.2.4 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -1
- package/README.md +2 -1
- package/SECURITY.md +3 -4
- package/bin/np-tools/_commands.cjs +1 -0
- package/bin/np-tools/learnings.cjs +1 -1
- package/bin/np-tools/resolve-model.cjs +55 -1
- package/bin/np-tools/resolve-model.test.cjs +139 -0
- package/bin/np-tools/security.cjs +1 -1
- package/bin/np-tools/spawn-headless.cjs +100 -1
- package/bin/np-tools/spawn-headless.test.cjs +108 -58
- package/bin/np-tools/spawn-offhost.cjs +93 -0
- package/bin/np-tools/spawn-offhost.test.cjs +38 -0
- package/lib/agents.cjs +16 -2
- package/lib/config-schema.cjs +5 -1
- package/lib/learnings/extract.cjs +4 -4
- package/lib/learnings/extract.test.cjs +8 -8
- package/lib/model-providers.cjs +118 -0
- package/lib/model-providers.test.cjs +85 -0
- package/lib/runtime/agent-loop.cjs +64 -0
- package/lib/runtime/agent-loop.test.cjs +135 -0
- package/lib/runtime/dispatch.cjs +174 -0
- package/lib/runtime/dispatch.test.cjs +193 -0
- package/lib/runtime/preflight.cjs +68 -0
- package/lib/runtime/preflight.test.cjs +62 -0
- package/lib/runtime/providers/openai-compat.cjs +102 -0
- package/lib/runtime/providers/openai-compat.test.cjs +103 -0
- package/lib/runtime/tools/index.cjs +415 -0
- package/lib/runtime/tools/index.test.cjs +230 -0
- package/lib/security/review.cjs +4 -4
- package/lib/security/review.test.cjs +6 -6
- package/np-tools.cjs +1 -0
- package/package.json +1 -1
- package/workflows/add-tests.md +41 -0
- package/workflows/architect-phase.md +19 -0
- package/workflows/discuss-phase.md +29 -10
- package/workflows/execute-phase.md +93 -4
- package/workflows/plan-phase.md +57 -16
- package/workflows/research-phase.md +45 -0
- package/workflows/scan-codebase.md +21 -3
- package/workflows/validate-phase.md +30 -13
- package/workflows/verify-work.md +17 -0
|
@@ -56,30 +56,30 @@ function _setEnv(k, v) {
|
|
|
56
56
|
else process.env[k] = v;
|
|
57
57
|
}
|
|
58
58
|
|
|
59
|
-
test('SH-1: spawn-headless requires --agent', () => {
|
|
59
|
+
test('SH-1: spawn-headless requires --agent', async () => {
|
|
60
60
|
const r = _mkRoot();
|
|
61
61
|
const cap = _cap();
|
|
62
|
-
assert.
|
|
63
|
-
() => spawnHeadless.run([], { cwd: r, stdout: cap.stub }),
|
|
62
|
+
await assert.rejects(
|
|
63
|
+
async () => spawnHeadless.run([], { cwd: r, stdout: cap.stub }),
|
|
64
64
|
(err) => err && err.code === 'spawn-headless-missing-agent',
|
|
65
65
|
);
|
|
66
66
|
});
|
|
67
67
|
|
|
68
|
-
test('SH-2: spawn-headless requires --prompt-path', () => {
|
|
68
|
+
test('SH-2: spawn-headless requires --prompt-path', async () => {
|
|
69
69
|
const r = _mkRoot();
|
|
70
70
|
const cap = _cap();
|
|
71
|
-
assert.
|
|
72
|
-
() => spawnHeadless.run(['--agent', 'np-test-critic'], { cwd: r, stdout: cap.stub }),
|
|
71
|
+
await assert.rejects(
|
|
72
|
+
async () => spawnHeadless.run(['--agent', 'np-test-critic'], { cwd: r, stdout: cap.stub }),
|
|
73
73
|
(err) => err && err.code === 'spawn-headless-missing-prompt-path',
|
|
74
74
|
);
|
|
75
75
|
});
|
|
76
76
|
|
|
77
|
-
test('SH-3: spawn-headless requires --output-path', () => {
|
|
77
|
+
test('SH-3: spawn-headless requires --output-path', async () => {
|
|
78
78
|
const r = _mkRoot();
|
|
79
79
|
fs.writeFileSync(path.join(r, 'p.md'), 'do the audit', 'utf-8');
|
|
80
80
|
const cap = _cap();
|
|
81
|
-
assert.
|
|
82
|
-
() => spawnHeadless.run(
|
|
81
|
+
await assert.rejects(
|
|
82
|
+
async () => spawnHeadless.run(
|
|
83
83
|
['--agent', 'np-test-critic', '--prompt-path', 'p.md'],
|
|
84
84
|
{ cwd: r, stdout: cap.stub },
|
|
85
85
|
),
|
|
@@ -87,11 +87,11 @@ test('SH-3: spawn-headless requires --output-path', () => {
|
|
|
87
87
|
);
|
|
88
88
|
});
|
|
89
89
|
|
|
90
|
-
test('SH-4: spawn-headless rejects path traversal on prompt-path', () => {
|
|
90
|
+
test('SH-4: spawn-headless rejects path traversal on prompt-path', async () => {
|
|
91
91
|
const r = _mkRoot();
|
|
92
92
|
const cap = _cap();
|
|
93
|
-
assert.
|
|
94
|
-
() => spawnHeadless.run(
|
|
93
|
+
await assert.rejects(
|
|
94
|
+
async () => spawnHeadless.run(
|
|
95
95
|
['--agent', 'np-test-critic',
|
|
96
96
|
'--prompt-path', '/etc/passwd',
|
|
97
97
|
'--output-path', 'out.json'],
|
|
@@ -101,12 +101,12 @@ test('SH-4: spawn-headless rejects path traversal on prompt-path', () => {
|
|
|
101
101
|
);
|
|
102
102
|
});
|
|
103
103
|
|
|
104
|
-
test('SH-5: spawn-headless rejects unknown agent', () => {
|
|
104
|
+
test('SH-5: spawn-headless rejects unknown agent', async () => {
|
|
105
105
|
const r = _mkRoot();
|
|
106
106
|
fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
|
|
107
107
|
const cap = _cap();
|
|
108
|
-
assert.
|
|
109
|
-
() => spawnHeadless.run(
|
|
108
|
+
await assert.rejects(
|
|
109
|
+
async () => spawnHeadless.run(
|
|
110
110
|
['--agent', 'np-does-not-exist',
|
|
111
111
|
'--prompt-path', 'p.md',
|
|
112
112
|
'--output-path', 'out.json'],
|
|
@@ -116,12 +116,12 @@ test('SH-5: spawn-headless rejects unknown agent', () => {
|
|
|
116
116
|
);
|
|
117
117
|
});
|
|
118
118
|
|
|
119
|
-
test('SH-6: spawn-headless rejects invalid agent name (path-injection guard)', () => {
|
|
119
|
+
test('SH-6: spawn-headless rejects invalid agent name (path-injection guard)', async () => {
|
|
120
120
|
const r = _mkRoot();
|
|
121
121
|
fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
|
|
122
122
|
const cap = _cap();
|
|
123
|
-
assert.
|
|
124
|
-
() => spawnHeadless.run(
|
|
123
|
+
await assert.rejects(
|
|
124
|
+
async () => spawnHeadless.run(
|
|
125
125
|
['--agent', '../../etc/passwd',
|
|
126
126
|
'--prompt-path', 'p.md',
|
|
127
127
|
'--output-path', 'out.json'],
|
|
@@ -131,13 +131,13 @@ test('SH-6: spawn-headless rejects invalid agent name (path-injection guard)', (
|
|
|
131
131
|
);
|
|
132
132
|
});
|
|
133
133
|
|
|
134
|
-
test('SH-7: spawn-headless reports claude-not-found when binary missing', () => {
|
|
134
|
+
test('SH-7: spawn-headless reports claude-not-found when binary missing', async () => {
|
|
135
135
|
const r = _mkRoot();
|
|
136
136
|
fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
|
|
137
137
|
_setEnv('NUBOS_PILOT_CLAUDE_BIN', path.join(r, 'no-such-binary'));
|
|
138
138
|
const cap = _cap();
|
|
139
|
-
assert.
|
|
140
|
-
() => spawnHeadless.run(
|
|
139
|
+
await assert.rejects(
|
|
140
|
+
async () => spawnHeadless.run(
|
|
141
141
|
['--agent', 'np-test-critic',
|
|
142
142
|
'--prompt-path', 'p.md',
|
|
143
143
|
'--output-path', 'out.json'],
|
|
@@ -147,7 +147,7 @@ test('SH-7: spawn-headless reports claude-not-found when binary missing', () =>
|
|
|
147
147
|
);
|
|
148
148
|
});
|
|
149
149
|
|
|
150
|
-
test('SH-8: spawn-headless captures stdout to output-path on success (mock binary)', () => {
|
|
150
|
+
test('SH-8: spawn-headless captures stdout to output-path on success (mock binary)', async () => {
|
|
151
151
|
const r = _mkRoot();
|
|
152
152
|
fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
|
|
153
153
|
const mockBin = path.join(r, 'mock-claude.sh');
|
|
@@ -155,7 +155,7 @@ test('SH-8: spawn-headless captures stdout to output-path on success (mock binar
|
|
|
155
155
|
fs.chmodSync(mockBin, 0o755);
|
|
156
156
|
_setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
|
|
157
157
|
const cap = _cap();
|
|
158
|
-
const rc = spawnHeadless.run(
|
|
158
|
+
const rc = await spawnHeadless.run(
|
|
159
159
|
['--agent', 'np-test-critic',
|
|
160
160
|
'--prompt-path', 'p.md',
|
|
161
161
|
'--output-path', 'out.json'],
|
|
@@ -169,7 +169,7 @@ test('SH-8: spawn-headless captures stdout to output-path on success (mock binar
|
|
|
169
169
|
assert.match(written, /"verdict":"passed"/);
|
|
170
170
|
});
|
|
171
171
|
|
|
172
|
-
test('SH-9: spawn-headless surfaces non-zero subprocess exit (mock failure)', () => {
|
|
172
|
+
test('SH-9: spawn-headless surfaces non-zero subprocess exit (mock failure)', async () => {
|
|
173
173
|
const r = _mkRoot();
|
|
174
174
|
fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
|
|
175
175
|
const mockBin = path.join(r, 'mock-fail.sh');
|
|
@@ -177,7 +177,7 @@ test('SH-9: spawn-headless surfaces non-zero subprocess exit (mock failure)', ()
|
|
|
177
177
|
fs.chmodSync(mockBin, 0o755);
|
|
178
178
|
_setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
|
|
179
179
|
const cap = _cap();
|
|
180
|
-
const rc = spawnHeadless.run(
|
|
180
|
+
const rc = await spawnHeadless.run(
|
|
181
181
|
['--agent', 'np-test-critic',
|
|
182
182
|
'--prompt-path', 'p.md',
|
|
183
183
|
'--output-path', 'out.json'],
|
|
@@ -189,12 +189,12 @@ test('SH-9: spawn-headless surfaces non-zero subprocess exit (mock failure)', ()
|
|
|
189
189
|
assert.match(payload.stderr_excerpt, /boom/);
|
|
190
190
|
});
|
|
191
191
|
|
|
192
|
-
test('SH-10: spawn-headless rejects --timeout-ms below 1000', () => {
|
|
192
|
+
test('SH-10: spawn-headless rejects --timeout-ms below 1000', async () => {
|
|
193
193
|
const r = _mkRoot();
|
|
194
194
|
fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
|
|
195
195
|
const cap = _cap();
|
|
196
|
-
assert.
|
|
197
|
-
() => spawnHeadless.run(
|
|
196
|
+
await assert.rejects(
|
|
197
|
+
async () => spawnHeadless.run(
|
|
198
198
|
['--agent', 'np-test-critic',
|
|
199
199
|
'--prompt-path', 'p.md',
|
|
200
200
|
'--output-path', 'out.json',
|
|
@@ -205,7 +205,7 @@ test('SH-10: spawn-headless rejects --timeout-ms below 1000', () => {
|
|
|
205
205
|
);
|
|
206
206
|
});
|
|
207
207
|
|
|
208
|
-
test('SH-11: spawn-headless writes output atomically (no .tmp residue)', () => {
|
|
208
|
+
test('SH-11: spawn-headless writes output atomically (no .tmp residue)', async () => {
|
|
209
209
|
const r = _mkRoot();
|
|
210
210
|
fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
|
|
211
211
|
const mockBin = path.join(r, 'mock-claude.sh');
|
|
@@ -213,7 +213,7 @@ test('SH-11: spawn-headless writes output atomically (no .tmp residue)', () => {
|
|
|
213
213
|
fs.chmodSync(mockBin, 0o755);
|
|
214
214
|
_setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
|
|
215
215
|
const cap = _cap();
|
|
216
|
-
const rc = spawnHeadless.run(
|
|
216
|
+
const rc = await spawnHeadless.run(
|
|
217
217
|
['--agent', 'np-test-critic',
|
|
218
218
|
'--prompt-path', 'p.md',
|
|
219
219
|
'--output-path', 'out.json'],
|
|
@@ -321,7 +321,7 @@ test('SH-REDACT-2 _redactSecrets is a no-op on safe text', () => {
|
|
|
321
321
|
assert.equal(spawnHeadless._redactSecrets(safe), safe);
|
|
322
322
|
});
|
|
323
323
|
|
|
324
|
-
test('SH-AUDIT-FIRST spawn-trail is written BEFORE caller-visible output (audit-first)', () => {
|
|
324
|
+
test('SH-AUDIT-FIRST spawn-trail is written BEFORE caller-visible output (audit-first)', async () => {
|
|
325
325
|
const r = _mkRoot();
|
|
326
326
|
fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
|
|
327
327
|
const mockBin = path.join(r, 'mock.sh');
|
|
@@ -335,7 +335,7 @@ test('SH-AUDIT-FIRST spawn-trail is written BEFORE caller-visible output (audit-
|
|
|
335
335
|
const cap = _cap();
|
|
336
336
|
let thrown = null;
|
|
337
337
|
try {
|
|
338
|
-
spawnHeadless.run(
|
|
338
|
+
await spawnHeadless.run(
|
|
339
339
|
['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
|
|
340
340
|
{ cwd: r, stdout: cap.stub },
|
|
341
341
|
);
|
|
@@ -348,7 +348,7 @@ test('SH-AUDIT-FIRST spawn-trail is written BEFORE caller-visible output (audit-
|
|
|
348
348
|
'output must NOT exist if audit append failed (audit-first invariant)');
|
|
349
349
|
});
|
|
350
350
|
|
|
351
|
-
test('SH-PARSE-OK payload_parse_ok=false when claude returns non-JSON output', () => {
|
|
351
|
+
test('SH-PARSE-OK payload_parse_ok=false when claude returns non-JSON output', async () => {
|
|
352
352
|
const r = _mkRoot();
|
|
353
353
|
fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
|
|
354
354
|
const mockBin = path.join(r, 'mock-plain.sh');
|
|
@@ -357,7 +357,7 @@ test('SH-PARSE-OK payload_parse_ok=false when claude returns non-JSON output', (
|
|
|
357
357
|
_setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
|
|
358
358
|
_setEnv('NUBOS_PILOT_RUN_ID', 'r-parse-test');
|
|
359
359
|
const cap = _cap();
|
|
360
|
-
spawnHeadless.run(
|
|
360
|
+
await spawnHeadless.run(
|
|
361
361
|
['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
|
|
362
362
|
{ cwd: r, stdout: cap.stub },
|
|
363
363
|
);
|
|
@@ -415,7 +415,7 @@ test('SH-ENV-3 NUBOS_PILOT_SPAWN_ENV_PASSTHROUGH allow-lists by exact key name',
|
|
|
415
415
|
assert.equal(filtered.NOT_LISTED, undefined);
|
|
416
416
|
});
|
|
417
417
|
|
|
418
|
-
test('SH-TRAIL-1 spawn writes append-only spawn-trail record with run_id + prompt/response sha256 + timing', () => {
|
|
418
|
+
test('SH-TRAIL-1 spawn writes append-only spawn-trail record with run_id + prompt/response sha256 + timing', async () => {
|
|
419
419
|
const r = _mkRoot();
|
|
420
420
|
fs.writeFileSync(path.join(r, 'p.md'), 'do the audit', 'utf-8');
|
|
421
421
|
const mockBin = path.join(r, 'mock-claude.sh');
|
|
@@ -424,7 +424,7 @@ test('SH-TRAIL-1 spawn writes append-only spawn-trail record with run_id + promp
|
|
|
424
424
|
_setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
|
|
425
425
|
_setEnv('NUBOS_PILOT_RUN_ID', 'r-traceme-deadbeef');
|
|
426
426
|
const cap = _cap();
|
|
427
|
-
const rc = spawnHeadless.run(
|
|
427
|
+
const rc = await spawnHeadless.run(
|
|
428
428
|
['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
|
|
429
429
|
{ cwd: r, stdout: cap.stub },
|
|
430
430
|
);
|
|
@@ -452,7 +452,7 @@ test('SH-TRAIL-1 spawn writes append-only spawn-trail record with run_id + promp
|
|
|
452
452
|
assert.ok(Number.isFinite(rec.duration_ms) && rec.duration_ms >= 0);
|
|
453
453
|
});
|
|
454
454
|
|
|
455
|
-
test('SH-TRAIL-1b run_id is seeded BEFORE spawn so the child env inherits NUBOS_PILOT_RUN_ID', () => {
|
|
455
|
+
test('SH-TRAIL-1b run_id is seeded BEFORE spawn so the child env inherits NUBOS_PILOT_RUN_ID', async () => {
|
|
456
456
|
const r = _mkRoot();
|
|
457
457
|
fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
|
|
458
458
|
// Mock-claude echoes its own ENV var so we can prove the child saw it.
|
|
@@ -465,7 +465,7 @@ test('SH-TRAIL-1b run_id is seeded BEFORE spawn so the child env inherits NUBOS_
|
|
|
465
465
|
// Crucially: do NOT set NUBOS_PILOT_RUN_ID; the lazy-seed must happen.
|
|
466
466
|
runContext._resetForTests();
|
|
467
467
|
const cap = _cap();
|
|
468
|
-
spawnHeadless.run(
|
|
468
|
+
await spawnHeadless.run(
|
|
469
469
|
['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
|
|
470
470
|
{ cwd: r, stdout: cap.stub },
|
|
471
471
|
);
|
|
@@ -475,7 +475,7 @@ test('SH-TRAIL-1b run_id is seeded BEFORE spawn so the child env inherits NUBOS_
|
|
|
475
475
|
assert.equal(childRunId, payload.run_id, 'child must inherit parent NUBOS_PILOT_RUN_ID via filtered env');
|
|
476
476
|
});
|
|
477
477
|
|
|
478
|
-
test('SH-TRAIL-2 two sequential spawns append two parseable trail lines (jsonl integrity)', () => {
|
|
478
|
+
test('SH-TRAIL-2 two sequential spawns append two parseable trail lines (jsonl integrity)', async () => {
|
|
479
479
|
const r = _mkRoot();
|
|
480
480
|
fs.writeFileSync(path.join(r, 'p.md'), 'audit X', 'utf-8');
|
|
481
481
|
const mockBin = path.join(r, 'mock.sh');
|
|
@@ -485,7 +485,7 @@ test('SH-TRAIL-2 two sequential spawns append two parseable trail lines (jsonl i
|
|
|
485
485
|
_setEnv('NUBOS_PILOT_RUN_ID', 'r-test-multi-aaa1');
|
|
486
486
|
const cap = _cap();
|
|
487
487
|
for (let i = 0; i < 2; i++) {
|
|
488
|
-
spawnHeadless.run(
|
|
488
|
+
await spawnHeadless.run(
|
|
489
489
|
['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out' + i + '.json'],
|
|
490
490
|
{ cwd: r, stdout: cap.stub },
|
|
491
491
|
);
|
|
@@ -496,15 +496,15 @@ test('SH-TRAIL-2 two sequential spawns append two parseable trail lines (jsonl i
|
|
|
496
496
|
for (const l of lines) JSON.parse(l);
|
|
497
497
|
});
|
|
498
498
|
|
|
499
|
-
test('SH-GUARD-1 refuses to spawn when NUBOS_PILOT_HEADLESS=1 (reentrancy guard)', () => {
|
|
499
|
+
test('SH-GUARD-1 refuses to spawn when NUBOS_PILOT_HEADLESS=1 (reentrancy guard)', async () => {
|
|
500
500
|
const r = _mkRoot();
|
|
501
501
|
fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
|
|
502
502
|
const mockBin = _mockClaude(r, 'mock.sh', '#!/bin/sh\ncat > /dev/null\necho "{}"\n');
|
|
503
503
|
_setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
|
|
504
504
|
_setEnv('NUBOS_PILOT_HEADLESS', '1');
|
|
505
505
|
const cap = _cap();
|
|
506
|
-
assert.
|
|
507
|
-
|
|
506
|
+
await assert.rejects(
|
|
507
|
+
spawnHeadless.run(
|
|
508
508
|
['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
|
|
509
509
|
{ cwd: r, stdout: cap.stub },
|
|
510
510
|
),
|
|
@@ -513,15 +513,15 @@ test('SH-GUARD-1 refuses to spawn when NUBOS_PILOT_HEADLESS=1 (reentrancy guard)
|
|
|
513
513
|
assert.equal(fs.existsSync(path.join(r, 'out.json')), false, 'no claude must be spawned inside a headless run');
|
|
514
514
|
});
|
|
515
515
|
|
|
516
|
-
test('SH-GUARD-2 refuses to spawn when hook depth has reached the cap (depth guard)', () => {
|
|
516
|
+
test('SH-GUARD-2 refuses to spawn when hook depth has reached the cap (depth guard)', async () => {
|
|
517
517
|
const r = _mkRoot();
|
|
518
518
|
fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
|
|
519
519
|
const mockBin = _mockClaude(r, 'mock.sh', '#!/bin/sh\ncat > /dev/null\necho "{}"\n');
|
|
520
520
|
_setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
|
|
521
521
|
_setEnv('NUBOS_PILOT_HOOK_DEPTH', '1');
|
|
522
522
|
const cap = _cap();
|
|
523
|
-
assert.
|
|
524
|
-
|
|
523
|
+
await assert.rejects(
|
|
524
|
+
spawnHeadless.run(
|
|
525
525
|
['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
|
|
526
526
|
{ cwd: r, stdout: cap.stub },
|
|
527
527
|
),
|
|
@@ -529,14 +529,14 @@ test('SH-GUARD-2 refuses to spawn when hook depth has reached the cap (depth gua
|
|
|
529
529
|
);
|
|
530
530
|
});
|
|
531
531
|
|
|
532
|
-
test('SH-GUARD-3 child env carries NUBOS_PILOT_HEADLESS=1 and depth=1 (one level deep only)', () => {
|
|
532
|
+
test('SH-GUARD-3 child env carries NUBOS_PILOT_HEADLESS=1 and depth=1 (one level deep only)', async () => {
|
|
533
533
|
const r = _mkRoot();
|
|
534
534
|
fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
|
|
535
535
|
const mockBin = _mockClaude(r, 'mock.sh',
|
|
536
536
|
'#!/bin/sh\ncat > /dev/null\nprintf \'{"hl":"\'$NUBOS_PILOT_HEADLESS\'","depth":"\'$NUBOS_PILOT_HOOK_DEPTH\'"}\\n\'\n');
|
|
537
537
|
_setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
|
|
538
538
|
const cap = _cap();
|
|
539
|
-
const rc = spawnHeadless.run(
|
|
539
|
+
const rc = await spawnHeadless.run(
|
|
540
540
|
['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
|
|
541
541
|
{ cwd: r, stdout: cap.stub },
|
|
542
542
|
);
|
|
@@ -546,7 +546,7 @@ test('SH-GUARD-3 child env carries NUBOS_PILOT_HEADLESS=1 and depth=1 (one level
|
|
|
546
546
|
assert.equal(child.depth, '1', 'child claude must run at hook depth 1');
|
|
547
547
|
});
|
|
548
548
|
|
|
549
|
-
test('SH-GUARD-4 refuses to spawn while a live lock for the same agent is held (concurrency guard)', () => {
|
|
549
|
+
test('SH-GUARD-4 refuses to spawn while a live lock for the same agent is held (concurrency guard)', async () => {
|
|
550
550
|
const r = _mkRoot();
|
|
551
551
|
fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
|
|
552
552
|
const mockBin = _mockClaude(r, 'mock.sh', '#!/bin/sh\ncat > /dev/null\necho "{}"\n');
|
|
@@ -555,8 +555,8 @@ test('SH-GUARD-4 refuses to spawn while a live lock for the same agent is held (
|
|
|
555
555
|
assert.equal(held.acquired, true);
|
|
556
556
|
const cap = _cap();
|
|
557
557
|
try {
|
|
558
|
-
assert.
|
|
559
|
-
|
|
558
|
+
await assert.rejects(
|
|
559
|
+
spawnHeadless.run(
|
|
560
560
|
['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
|
|
561
561
|
{ cwd: r, stdout: cap.stub },
|
|
562
562
|
),
|
|
@@ -567,14 +567,14 @@ test('SH-GUARD-4 refuses to spawn while a live lock for the same agent is held (
|
|
|
567
567
|
}
|
|
568
568
|
});
|
|
569
569
|
|
|
570
|
-
test('SH-GUARD-5 lock is released after a successful spawn (re-spawnable)', () => {
|
|
570
|
+
test('SH-GUARD-5 lock is released after a successful spawn (re-spawnable)', async () => {
|
|
571
571
|
const r = _mkRoot();
|
|
572
572
|
fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
|
|
573
573
|
const mockBin = _mockClaude(r, 'mock.sh', '#!/bin/sh\ncat > /dev/null\necho "{}"\n');
|
|
574
574
|
_setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
|
|
575
575
|
const cap = _cap();
|
|
576
576
|
for (let i = 0; i < 2; i++) {
|
|
577
|
-
const rc = spawnHeadless.run(
|
|
577
|
+
const rc = await spawnHeadless.run(
|
|
578
578
|
['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out' + i + '.json'],
|
|
579
579
|
{ cwd: r, stdout: cap.stub },
|
|
580
580
|
);
|
|
@@ -583,7 +583,7 @@ test('SH-GUARD-5 lock is released after a successful spawn (re-spawnable)', () =
|
|
|
583
583
|
assert.equal(fs.existsSync(headlessGuard._lockPath(r, 'np-test-critic')), false, 'no lock residue after spawns');
|
|
584
584
|
});
|
|
585
585
|
|
|
586
|
-
test('SH-GUARD-6 a held lock for one agent does NOT block a different agent (per-agent scope)', () => {
|
|
586
|
+
test('SH-GUARD-6 a held lock for one agent does NOT block a different agent (per-agent scope)', async () => {
|
|
587
587
|
const r = _mkRoot();
|
|
588
588
|
fs.writeFileSync(
|
|
589
589
|
path.join(r, '.nubos-pilot', 'agents', 'np-other-critic.md'),
|
|
@@ -597,7 +597,7 @@ test('SH-GUARD-6 a held lock for one agent does NOT block a different agent (per
|
|
|
597
597
|
assert.equal(held.acquired, true);
|
|
598
598
|
const cap = _cap();
|
|
599
599
|
try {
|
|
600
|
-
const rc = spawnHeadless.run(
|
|
600
|
+
const rc = await spawnHeadless.run(
|
|
601
601
|
['--agent', 'np-other-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
|
|
602
602
|
{ cwd: r, stdout: cap.stub },
|
|
603
603
|
);
|
|
@@ -607,13 +607,13 @@ test('SH-GUARD-6 a held lock for one agent does NOT block a different agent (per
|
|
|
607
607
|
}
|
|
608
608
|
});
|
|
609
609
|
|
|
610
|
-
test('SH-GUARD-7 lock is released even when the spawn errors (claude-not-found)', () => {
|
|
610
|
+
test('SH-GUARD-7 lock is released even when the spawn errors (claude-not-found)', async () => {
|
|
611
611
|
const r = _mkRoot();
|
|
612
612
|
fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
|
|
613
613
|
_setEnv('NUBOS_PILOT_CLAUDE_BIN', path.join(r, 'no-such-binary'));
|
|
614
614
|
const cap = _cap();
|
|
615
|
-
assert.
|
|
616
|
-
|
|
615
|
+
await assert.rejects(
|
|
616
|
+
spawnHeadless.run(
|
|
617
617
|
['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
|
|
618
618
|
{ cwd: r, stdout: cap.stub },
|
|
619
619
|
),
|
|
@@ -639,3 +639,53 @@ test('SH-ENV-4 NUBOS_PILOT_/CLAUDE_/ANTHROPIC_ prefixed vars pass through (white
|
|
|
639
639
|
assert.equal(filtered.ANTHROPIC_BASE_URL, 'https://api.anthropic.com');
|
|
640
640
|
assert.equal(filtered.UNRELATED_FOO, undefined);
|
|
641
641
|
});
|
|
642
|
+
|
|
643
|
+
test('SH-OFFHOST-1: openai-compat routing runs dispatchOffHost and writes a {result} envelope (no claude -p)', async () => {
|
|
644
|
+
const r = _mkRoot();
|
|
645
|
+
fs.writeFileSync(path.join(r, 'p.md'), 'review this diff', 'utf-8');
|
|
646
|
+
const cap = _cap();
|
|
647
|
+
let claudeWasCalled = false;
|
|
648
|
+
_setEnv('NUBOS_PILOT_CLAUDE_BIN', path.join(r, 'nonexistent-claude-should-not-run'));
|
|
649
|
+
let dispatchArgs = null;
|
|
650
|
+
const code = await spawnHeadless.run(
|
|
651
|
+
['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
|
|
652
|
+
{
|
|
653
|
+
cwd: r,
|
|
654
|
+
stdout: cap.stub,
|
|
655
|
+
resolveImpl: () => ({ kind: 'openai-compat', provider: 'ollama', model: 'qwen2.5-coder:32b' }),
|
|
656
|
+
dispatchImpl: async (o) => { dispatchArgs = o; return { content: 'REVIEW: 0 risks', model: 'qwen2.5-coder:32b', provider: 'ollama' }; },
|
|
657
|
+
},
|
|
658
|
+
);
|
|
659
|
+
assert.equal(code, 0);
|
|
660
|
+
// dispatch received the agent + the prompt body as the task
|
|
661
|
+
assert.equal(dispatchArgs.agent, 'np-test-critic');
|
|
662
|
+
assert.match(dispatchArgs.task, /review this diff/);
|
|
663
|
+
// output is the claude-compatible {result} envelope so review/extract parse it unchanged
|
|
664
|
+
const out = JSON.parse(fs.readFileSync(path.join(r, 'out.json'), 'utf-8'));
|
|
665
|
+
assert.equal(out.result, 'REVIEW: 0 risks');
|
|
666
|
+
assert.equal(out.provider, 'ollama');
|
|
667
|
+
// caller-visible payload marks it off-host and the native claude bin was never invoked
|
|
668
|
+
const payload = JSON.parse(cap.get().trim());
|
|
669
|
+
assert.equal(payload.off_host, true);
|
|
670
|
+
assert.equal(payload.exit_code, 0);
|
|
671
|
+
assert.equal(claudeWasCalled, false);
|
|
672
|
+
});
|
|
673
|
+
|
|
674
|
+
test('SH-OFFHOST-2: a failing off-host dispatch returns exit 2 with an empty result (soft-fail parity)', async () => {
|
|
675
|
+
const r = _mkRoot();
|
|
676
|
+
fs.writeFileSync(path.join(r, 'p.md'), 'review', 'utf-8');
|
|
677
|
+
const cap = _cap();
|
|
678
|
+
const code = await spawnHeadless.run(
|
|
679
|
+
['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
|
|
680
|
+
{
|
|
681
|
+
cwd: r,
|
|
682
|
+
stdout: cap.stub,
|
|
683
|
+
resolveImpl: () => ({ kind: 'openai-compat', provider: 'ollama', model: 'x' }),
|
|
684
|
+
dispatchImpl: async () => { const e = new Error('provider unreachable'); e.code = 'preflight-failed'; throw e; },
|
|
685
|
+
},
|
|
686
|
+
);
|
|
687
|
+
assert.equal(code, 2);
|
|
688
|
+
const out = JSON.parse(fs.readFileSync(path.join(r, 'out.json'), 'utf-8'));
|
|
689
|
+
assert.equal(out.result, '');
|
|
690
|
+
assert.equal(out.is_error, true);
|
|
691
|
+
});
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('node:fs');
|
|
4
|
+
const { NubosPilotError } = require('../../lib/core.cjs');
|
|
5
|
+
const { dispatchOffHost } = require('../../lib/runtime/dispatch.cjs');
|
|
6
|
+
|
|
7
|
+
function _usage() {
|
|
8
|
+
process.stderr.write(
|
|
9
|
+
'Usage: np-tools.cjs spawn-offhost --agent <name> (--task <str> | --task-file <path>) '
|
|
10
|
+
+ '[--cwd <dir>] [--phase P] [--plan P] [--task-id T] [--max-iterations N] [--allow-bash] [--read-only] [--no-audit]\n',
|
|
11
|
+
);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function _parse(argv) {
|
|
15
|
+
const out = { allowBash: false, readOnly: false };
|
|
16
|
+
const a = argv.slice();
|
|
17
|
+
while (a.length) {
|
|
18
|
+
const f = a.shift();
|
|
19
|
+
if (f === '--agent') out.agent = a.shift();
|
|
20
|
+
else if (f === '--task') out.task = a.shift();
|
|
21
|
+
else if (f === '--task-file') out.taskFile = a.shift();
|
|
22
|
+
else if (f === '--phase') out.phase = a.shift();
|
|
23
|
+
else if (f === '--plan') out.plan = a.shift();
|
|
24
|
+
else if (f === '--task-id') out.taskId = a.shift();
|
|
25
|
+
else if (f === '--max-iterations') out.maxIterations = Number(a.shift());
|
|
26
|
+
else if (f === '--cwd') out.cwd = a.shift();
|
|
27
|
+
else if (f === '--output-schema') out.outputSchema = a.shift();
|
|
28
|
+
else if (f === '--allow-bash') out.allowBash = true;
|
|
29
|
+
else if (f === '--read-only') out.readOnly = true;
|
|
30
|
+
else if (f === '--no-audit') out.skipAudit = true;
|
|
31
|
+
}
|
|
32
|
+
return out;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async function run(argv) {
|
|
36
|
+
const args = Array.isArray(argv) ? argv.slice() : process.argv.slice(3);
|
|
37
|
+
if (!args.length || args[0] === '--help') { _usage(); return 1; }
|
|
38
|
+
const parsed = _parse(args);
|
|
39
|
+
|
|
40
|
+
let task = parsed.task;
|
|
41
|
+
if (parsed.taskFile) {
|
|
42
|
+
try { task = fs.readFileSync(parsed.taskFile, 'utf-8'); }
|
|
43
|
+
catch { process.stderr.write(JSON.stringify({ code: 'spawn-offhost-task-file-unreadable', file: require('node:path').basename(parsed.taskFile) }) + '\n'); return 1; }
|
|
44
|
+
}
|
|
45
|
+
if (!parsed.agent || typeof task !== 'string') { _usage(); return 1; }
|
|
46
|
+
|
|
47
|
+
try {
|
|
48
|
+
const result = await dispatchOffHost({
|
|
49
|
+
agent: parsed.agent,
|
|
50
|
+
task,
|
|
51
|
+
cwd: parsed.cwd || process.cwd(),
|
|
52
|
+
phase: parsed.phase,
|
|
53
|
+
plan: parsed.plan,
|
|
54
|
+
taskId: parsed.taskId,
|
|
55
|
+
maxIterations: parsed.maxIterations,
|
|
56
|
+
allowBash: parsed.allowBash,
|
|
57
|
+
readOnly: parsed.readOnly,
|
|
58
|
+
skipAudit: parsed.skipAudit,
|
|
59
|
+
outputSchema: parsed.outputSchema,
|
|
60
|
+
});
|
|
61
|
+
if (result && result.metrics_recorded === false) {
|
|
62
|
+
process.stderr.write('spawn-offhost: metrics row was not recorded (telemetry only; run succeeded)\n');
|
|
63
|
+
}
|
|
64
|
+
if (result && result.rule9 && result.rule9.ok === false) {
|
|
65
|
+
process.stderr.write('spawn-offhost: Rule-9 violation (' + (result.rule9.violation || result.rule9.error)
|
|
66
|
+
+ ') — the agent did not satisfy the search bar. Do NOT commit this output as-is; re-run or route back to the agent.\n');
|
|
67
|
+
}
|
|
68
|
+
if (result && result.capability && result.capability.ok === false) {
|
|
69
|
+
const c = result.capability;
|
|
70
|
+
process.stderr.write('spawn-offhost: the model advertised ' + c.toolsAdvertised
|
|
71
|
+
+ ' tool(s) but made zero tool calls — the provider/model likely does NOT support OpenAI function/tool-calling. '
|
|
72
|
+
+ (c.mutating
|
|
73
|
+
? 'This agent edits files; off-host it produced NO changes. Route it to a tool-calling-capable model or keep it native.'
|
|
74
|
+
: 'If this agent was expected to inspect the workspace, its output may be ungrounded — verify before relying on it.')
|
|
75
|
+
+ '\n');
|
|
76
|
+
}
|
|
77
|
+
process.stdout.write(JSON.stringify(result) + '\n');
|
|
78
|
+
return 0;
|
|
79
|
+
} catch (err) {
|
|
80
|
+
if (err && err.name === 'NubosPilotError') {
|
|
81
|
+
process.stderr.write(JSON.stringify({ code: err.code, message: err.message, details: err.details }) + '\n');
|
|
82
|
+
} else {
|
|
83
|
+
process.stderr.write(String((err && err.stack) || err) + '\n');
|
|
84
|
+
}
|
|
85
|
+
return 1;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
module.exports = { run, _parse };
|
|
90
|
+
|
|
91
|
+
if (require.main === module) {
|
|
92
|
+
run(process.argv.slice(2)).then((code) => process.exit(code || 0));
|
|
93
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
const { test } = require('node:test');
|
|
2
|
+
const assert = require('node:assert/strict');
|
|
3
|
+
|
|
4
|
+
const subcmd = require('./spawn-offhost.cjs');
|
|
5
|
+
|
|
6
|
+
function _capture(fn) {
|
|
7
|
+
const out = []; const err = [];
|
|
8
|
+
const oo = process.stdout.write.bind(process.stdout);
|
|
9
|
+
const oe = process.stderr.write.bind(process.stderr);
|
|
10
|
+
process.stdout.write = (c) => { out.push(String(c)); return true; };
|
|
11
|
+
process.stderr.write = (c) => { err.push(String(c)); return true; };
|
|
12
|
+
return Promise.resolve(fn()).then((rc) => {
|
|
13
|
+
process.stdout.write = oo; process.stderr.write = oe;
|
|
14
|
+
return { rc, stdout: out.join(''), stderr: err.join('') };
|
|
15
|
+
}, (e) => { process.stdout.write = oo; process.stderr.write = oe; throw e; });
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
test('SOH-1: _parse reads agent/task, the boolean flags, --cwd and --no-audit', () => {
|
|
19
|
+
const p = subcmd._parse(['--agent', 'np-executor', '--task', 'do x', '--allow-bash', '--max-iterations', '5', '--cwd', '/wt', '--no-audit']);
|
|
20
|
+
assert.equal(p.agent, 'np-executor');
|
|
21
|
+
assert.equal(p.task, 'do x');
|
|
22
|
+
assert.equal(p.allowBash, true);
|
|
23
|
+
assert.equal(p.readOnly, false);
|
|
24
|
+
assert.equal(p.maxIterations, 5);
|
|
25
|
+
assert.equal(p.cwd, '/wt');
|
|
26
|
+
assert.equal(p.skipAudit, true);
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
test('SOH-2: missing args prints usage and returns 1', async () => {
|
|
30
|
+
const { rc, stderr } = await _capture(() => subcmd.run([]));
|
|
31
|
+
assert.equal(rc, 1);
|
|
32
|
+
assert.match(stderr, /Usage:/);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
test('SOH-3: --agent without a task returns 1', async () => {
|
|
36
|
+
const { rc } = await _capture(() => subcmd.run(['--agent', 'np-executor']));
|
|
37
|
+
assert.equal(rc, 1);
|
|
38
|
+
});
|
package/lib/agents.cjs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
const fs = require('node:fs');
|
|
4
4
|
const path = require('node:path');
|
|
5
|
-
const { extractFrontmatter } = require('./frontmatter.cjs');
|
|
5
|
+
const { extractFrontmatter, stripFrontmatter } = require('./frontmatter.cjs');
|
|
6
6
|
const { NubosPilotError, findProjectRoot } = require('./core.cjs');
|
|
7
7
|
|
|
8
8
|
const REQUIRED = ['name', 'description', 'tier', 'tools'];
|
|
@@ -60,7 +60,7 @@ function validateAgentFrontmatter(fm, agentName) {
|
|
|
60
60
|
|
|
61
61
|
const AGENT_NAME_RE = /^[a-zA-Z0-9_-]+$/;
|
|
62
62
|
|
|
63
|
-
function
|
|
63
|
+
function _resolveAgentPath(name, cwd) {
|
|
64
64
|
if (typeof name !== 'string' || !AGENT_NAME_RE.test(name)) {
|
|
65
65
|
throw new NubosPilotError(
|
|
66
66
|
'agent-invalid-name',
|
|
@@ -83,10 +83,23 @@ function _loadAgentFromDisk(name, cwd) {
|
|
|
83
83
|
{ name, path: candidates[0], tried: candidates },
|
|
84
84
|
);
|
|
85
85
|
}
|
|
86
|
+
return found;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function _loadAgentFromDisk(name, cwd) {
|
|
90
|
+
const found = _resolveAgentPath(name, cwd);
|
|
86
91
|
const { frontmatter } = extractFrontmatter(fs.readFileSync(found, 'utf-8'));
|
|
87
92
|
return validateAgentFrontmatter(frontmatter, name);
|
|
88
93
|
}
|
|
89
94
|
|
|
95
|
+
function loadAgentSource(name, cwd) {
|
|
96
|
+
const found = _resolveAgentPath(name, cwd);
|
|
97
|
+
const raw = fs.readFileSync(found, 'utf-8');
|
|
98
|
+
const { frontmatter } = extractFrontmatter(raw);
|
|
99
|
+
validateAgentFrontmatter(frontmatter, name);
|
|
100
|
+
return { frontmatter, body: stripFrontmatter(raw), path: found };
|
|
101
|
+
}
|
|
102
|
+
|
|
90
103
|
function loadAgent(name, cwd) {
|
|
91
104
|
const fm = _loadAgentFromDisk(name, cwd);
|
|
92
105
|
if (fm.module === true) {
|
|
@@ -143,6 +156,7 @@ module.exports = {
|
|
|
143
156
|
validateAgentFrontmatter,
|
|
144
157
|
loadAgent,
|
|
145
158
|
loadAgentModule,
|
|
159
|
+
loadAgentSource,
|
|
146
160
|
listAgents,
|
|
147
161
|
getAgentSkills,
|
|
148
162
|
AGENT_NAME_RE,
|
package/lib/config-schema.cjs
CHANGED
|
@@ -9,6 +9,8 @@ const VALID_TIERS = Object.freeze(['haiku', 'sonnet', 'opus']);
|
|
|
9
9
|
const SCHEMA = Object.freeze({
|
|
10
10
|
scope: { type: 'enum', values: VALID_SCOPES, optional: true },
|
|
11
11
|
model_profile: { type: 'enum', values: VALID_MODEL_PROFILES, optional: true },
|
|
12
|
+
model_providers: { type: 'object', shape: 'any', optional: true },
|
|
13
|
+
agent_routing: { type: 'object', shape: 'any', optional: true },
|
|
12
14
|
response_language:{ type: 'string', optional: true },
|
|
13
15
|
runtime: { type: 'string', optional: true },
|
|
14
16
|
runtimes: { type: 'array', element: 'string', optional: true },
|
|
@@ -220,7 +222,9 @@ function _clone(v) {
|
|
|
220
222
|
return out;
|
|
221
223
|
}
|
|
222
224
|
|
|
223
|
-
const SCHEMA_ONLY_KEYS = Object.freeze([
|
|
225
|
+
const SCHEMA_ONLY_KEYS = Object.freeze([
|
|
226
|
+
'runtime', 'runtimes', 'agent_skills', 'model_providers', 'agent_routing',
|
|
227
|
+
]);
|
|
224
228
|
|
|
225
229
|
module.exports = {
|
|
226
230
|
SCHEMA,
|