npm - nubos-pilot - Versions diffs - 1.2.3 → 1.3.0 - Mend

nubos-pilot 1.2.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/CHANGELOG.md +24 -0
package/README.md +18 -1
package/SECURITY.md +3 -4
package/bin/np-tools/_commands.cjs +1 -0
package/bin/np-tools/learnings.cjs +5 -1
package/bin/np-tools/resolve-model.cjs +55 -1
package/bin/np-tools/resolve-model.test.cjs +139 -0
package/bin/np-tools/security.cjs +4 -1
package/bin/np-tools/spawn-headless.cjs +135 -2
package/bin/np-tools/spawn-headless.test.cjs +225 -40
package/bin/np-tools/spawn-offhost.cjs +93 -0
package/bin/np-tools/spawn-offhost.test.cjs +38 -0
package/lib/agents.cjs +16 -2
package/lib/config-schema.cjs +5 -1
package/lib/headless-guard.cjs +127 -0
package/lib/headless-guard.test.cjs +119 -0
package/lib/learnings/extract.cjs +4 -4
package/lib/learnings/extract.test.cjs +8 -8
package/lib/model-providers.cjs +118 -0
package/lib/model-providers.test.cjs +85 -0
package/lib/runtime/agent-loop.cjs +64 -0
package/lib/runtime/agent-loop.test.cjs +135 -0
package/lib/runtime/dispatch.cjs +174 -0
package/lib/runtime/dispatch.test.cjs +193 -0
package/lib/runtime/preflight.cjs +68 -0
package/lib/runtime/preflight.test.cjs +62 -0
package/lib/runtime/providers/openai-compat.cjs +102 -0
package/lib/runtime/providers/openai-compat.test.cjs +103 -0
package/lib/runtime/tools/index.cjs +415 -0
package/lib/runtime/tools/index.test.cjs +230 -0
package/lib/security/review.cjs +4 -4
package/lib/security/review.test.cjs +6 -6
package/np-tools.cjs +1 -0
package/package.json +1 -1
package/templates/claude/payload/hooks/np-learnings-hook.cjs +1 -0
package/templates/claude/payload/hooks/np-security-hook.cjs +1 -0
package/workflows/add-tests.md +41 -0
package/workflows/architect-phase.md +19 -0
package/workflows/discuss-phase.md +29 -10
package/workflows/execute-phase.md +93 -4
package/workflows/plan-phase.md +57 -16
package/workflows/research-phase.md +45 -0
package/workflows/scan-codebase.md +21 -3
package/workflows/validate-phase.md +30 -13
package/workflows/verify-work.md +17 -0

package/bin/np-tools/spawn-headless.test.cjs CHANGED Viewed

@@ -8,6 +8,14 @@ const assert = require('node:assert/strict');
 const spawnHeadless = require('./spawn-headless.cjs');
 const runContext = require('../../lib/run-context.cjs');
+const headlessGuard = require('../../lib/headless-guard.cjs');
+function _mockClaude(r, name, body) {
+  const p = path.join(r, name);
+  fs.writeFileSync(p, body, 'utf-8');
+  fs.chmodSync(p, 0o755);
+  return p;
+}
 const _sandboxes = [];
 const _envBackup = {};
@@ -48,30 +56,30 @@ function _setEnv(k, v) {
   else process.env[k] = v;
 }
-test('SH-1: spawn-headless requires --agent', () => {
+test('SH-1: spawn-headless requires --agent', async () => {
   const r = _mkRoot();
   const cap = _cap();
-  assert.throws(
-    () => spawnHeadless.run([], { cwd: r, stdout: cap.stub }),
+  await assert.rejects(
+    async () => spawnHeadless.run([], { cwd: r, stdout: cap.stub }),
     (err) => err && err.code === 'spawn-headless-missing-agent',
   );
 });
-test('SH-2: spawn-headless requires --prompt-path', () => {
+test('SH-2: spawn-headless requires --prompt-path', async () => {
   const r = _mkRoot();
   const cap = _cap();
-  assert.throws(
-    () => spawnHeadless.run(['--agent', 'np-test-critic'], { cwd: r, stdout: cap.stub }),
+  await assert.rejects(
+    async () => spawnHeadless.run(['--agent', 'np-test-critic'], { cwd: r, stdout: cap.stub }),
     (err) => err && err.code === 'spawn-headless-missing-prompt-path',
   );
 });
-test('SH-3: spawn-headless requires --output-path', () => {
+test('SH-3: spawn-headless requires --output-path', async () => {
   const r = _mkRoot();
   fs.writeFileSync(path.join(r, 'p.md'), 'do the audit', 'utf-8');
   const cap = _cap();
-  assert.throws(
-    () => spawnHeadless.run(
+  await assert.rejects(
+    async () => spawnHeadless.run(
       ['--agent', 'np-test-critic', '--prompt-path', 'p.md'],
       { cwd: r, stdout: cap.stub },
     ),
@@ -79,11 +87,11 @@ test('SH-3: spawn-headless requires --output-path', () => {
   );
 });
-test('SH-4: spawn-headless rejects path traversal on prompt-path', () => {
+test('SH-4: spawn-headless rejects path traversal on prompt-path', async () => {
   const r = _mkRoot();
   const cap = _cap();
-  assert.throws(
-    () => spawnHeadless.run(
+  await assert.rejects(
+    async () => spawnHeadless.run(
       ['--agent', 'np-test-critic',
         '--prompt-path', '/etc/passwd',
         '--output-path', 'out.json'],
@@ -93,12 +101,12 @@ test('SH-4: spawn-headless rejects path traversal on prompt-path', () => {
   );
 });
-test('SH-5: spawn-headless rejects unknown agent', () => {
+test('SH-5: spawn-headless rejects unknown agent', async () => {
   const r = _mkRoot();
   fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
   const cap = _cap();
-  assert.throws(
-    () => spawnHeadless.run(
+  await assert.rejects(
+    async () => spawnHeadless.run(
       ['--agent', 'np-does-not-exist',
         '--prompt-path', 'p.md',
         '--output-path', 'out.json'],
@@ -108,12 +116,12 @@ test('SH-5: spawn-headless rejects unknown agent', () => {
   );
 });
-test('SH-6: spawn-headless rejects invalid agent name (path-injection guard)', () => {
+test('SH-6: spawn-headless rejects invalid agent name (path-injection guard)', async () => {
   const r = _mkRoot();
   fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
   const cap = _cap();
-  assert.throws(
-    () => spawnHeadless.run(
+  await assert.rejects(
+    async () => spawnHeadless.run(
       ['--agent', '../../etc/passwd',
         '--prompt-path', 'p.md',
         '--output-path', 'out.json'],
@@ -123,13 +131,13 @@ test('SH-6: spawn-headless rejects invalid agent name (path-injection guard)', (
   );
 });
-test('SH-7: spawn-headless reports claude-not-found when binary missing', () => {
+test('SH-7: spawn-headless reports claude-not-found when binary missing', async () => {
   const r = _mkRoot();
   fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
   _setEnv('NUBOS_PILOT_CLAUDE_BIN', path.join(r, 'no-such-binary'));
   const cap = _cap();
-  assert.throws(
-    () => spawnHeadless.run(
+  await assert.rejects(
+    async () => spawnHeadless.run(
       ['--agent', 'np-test-critic',
         '--prompt-path', 'p.md',
         '--output-path', 'out.json'],
@@ -139,7 +147,7 @@ test('SH-7: spawn-headless reports claude-not-found when binary missing', () =>
   );
 });
-test('SH-8: spawn-headless captures stdout to output-path on success (mock binary)', () => {
+test('SH-8: spawn-headless captures stdout to output-path on success (mock binary)', async () => {
   const r = _mkRoot();
   fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
   const mockBin = path.join(r, 'mock-claude.sh');
@@ -147,7 +155,7 @@ test('SH-8: spawn-headless captures stdout to output-path on success (mock binar
   fs.chmodSync(mockBin, 0o755);
   _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
   const cap = _cap();
-  const rc = spawnHeadless.run(
+  const rc = await spawnHeadless.run(
     ['--agent', 'np-test-critic',
       '--prompt-path', 'p.md',
       '--output-path', 'out.json'],
@@ -161,7 +169,7 @@ test('SH-8: spawn-headless captures stdout to output-path on success (mock binar
   assert.match(written, /"verdict":"passed"/);
 });
-test('SH-9: spawn-headless surfaces non-zero subprocess exit (mock failure)', () => {
+test('SH-9: spawn-headless surfaces non-zero subprocess exit (mock failure)', async () => {
   const r = _mkRoot();
   fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
   const mockBin = path.join(r, 'mock-fail.sh');
@@ -169,7 +177,7 @@ test('SH-9: spawn-headless surfaces non-zero subprocess exit (mock failure)', ()
   fs.chmodSync(mockBin, 0o755);
   _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
   const cap = _cap();
-  const rc = spawnHeadless.run(
+  const rc = await spawnHeadless.run(
     ['--agent', 'np-test-critic',
       '--prompt-path', 'p.md',
       '--output-path', 'out.json'],
@@ -181,12 +189,12 @@ test('SH-9: spawn-headless surfaces non-zero subprocess exit (mock failure)', ()
   assert.match(payload.stderr_excerpt, /boom/);
 });
-test('SH-10: spawn-headless rejects --timeout-ms below 1000', () => {
+test('SH-10: spawn-headless rejects --timeout-ms below 1000', async () => {
   const r = _mkRoot();
   fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
   const cap = _cap();
-  assert.throws(
-    () => spawnHeadless.run(
+  await assert.rejects(
+    async () => spawnHeadless.run(
       ['--agent', 'np-test-critic',
         '--prompt-path', 'p.md',
         '--output-path', 'out.json',
@@ -197,7 +205,7 @@ test('SH-10: spawn-headless rejects --timeout-ms below 1000', () => {
   );
 });
-test('SH-11: spawn-headless writes output atomically (no .tmp residue)', () => {
+test('SH-11: spawn-headless writes output atomically (no .tmp residue)', async () => {
   const r = _mkRoot();
   fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
   const mockBin = path.join(r, 'mock-claude.sh');
@@ -205,7 +213,7 @@ test('SH-11: spawn-headless writes output atomically (no .tmp residue)', () => {
   fs.chmodSync(mockBin, 0o755);
   _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
   const cap = _cap();
-  const rc = spawnHeadless.run(
+  const rc = await spawnHeadless.run(
     ['--agent', 'np-test-critic',
       '--prompt-path', 'p.md',
       '--output-path', 'out.json'],
@@ -313,7 +321,7 @@ test('SH-REDACT-2 _redactSecrets is a no-op on safe text', () => {
   assert.equal(spawnHeadless._redactSecrets(safe), safe);
 });
-test('SH-AUDIT-FIRST spawn-trail is written BEFORE caller-visible output (audit-first)', () => {
+test('SH-AUDIT-FIRST spawn-trail is written BEFORE caller-visible output (audit-first)', async () => {
   const r = _mkRoot();
   fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
   const mockBin = path.join(r, 'mock.sh');
@@ -327,7 +335,7 @@ test('SH-AUDIT-FIRST spawn-trail is written BEFORE caller-visible output (audit-
   const cap = _cap();
   let thrown = null;
   try {
-    spawnHeadless.run(
+    await spawnHeadless.run(
       ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
       { cwd: r, stdout: cap.stub },
     );
@@ -340,7 +348,7 @@ test('SH-AUDIT-FIRST spawn-trail is written BEFORE caller-visible output (audit-
     'output must NOT exist if audit append failed (audit-first invariant)');
 });
-test('SH-PARSE-OK payload_parse_ok=false when claude returns non-JSON output', () => {
+test('SH-PARSE-OK payload_parse_ok=false when claude returns non-JSON output', async () => {
   const r = _mkRoot();
   fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
   const mockBin = path.join(r, 'mock-plain.sh');
@@ -349,7 +357,7 @@ test('SH-PARSE-OK payload_parse_ok=false when claude returns non-JSON output', (
   _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
   _setEnv('NUBOS_PILOT_RUN_ID', 'r-parse-test');
   const cap = _cap();
-  spawnHeadless.run(
+  await spawnHeadless.run(
     ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
     { cwd: r, stdout: cap.stub },
   );
@@ -407,7 +415,7 @@ test('SH-ENV-3 NUBOS_PILOT_SPAWN_ENV_PASSTHROUGH allow-lists by exact key name',
   assert.equal(filtered.NOT_LISTED, undefined);
 });
-test('SH-TRAIL-1 spawn writes append-only spawn-trail record with run_id + prompt/response sha256 + timing', () => {
+test('SH-TRAIL-1 spawn writes append-only spawn-trail record with run_id + prompt/response sha256 + timing', async () => {
   const r = _mkRoot();
   fs.writeFileSync(path.join(r, 'p.md'), 'do the audit', 'utf-8');
   const mockBin = path.join(r, 'mock-claude.sh');
@@ -416,7 +424,7 @@ test('SH-TRAIL-1 spawn writes append-only spawn-trail record with run_id + promp
   _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
   _setEnv('NUBOS_PILOT_RUN_ID', 'r-traceme-deadbeef');
   const cap = _cap();
-  const rc = spawnHeadless.run(
+  const rc = await spawnHeadless.run(
     ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
     { cwd: r, stdout: cap.stub },
   );
@@ -444,7 +452,7 @@ test('SH-TRAIL-1 spawn writes append-only spawn-trail record with run_id + promp
   assert.ok(Number.isFinite(rec.duration_ms) && rec.duration_ms >= 0);
 });
-test('SH-TRAIL-1b run_id is seeded BEFORE spawn so the child env inherits NUBOS_PILOT_RUN_ID', () => {
+test('SH-TRAIL-1b run_id is seeded BEFORE spawn so the child env inherits NUBOS_PILOT_RUN_ID', async () => {
   const r = _mkRoot();
   fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
   // Mock-claude echoes its own ENV var so we can prove the child saw it.
@@ -457,7 +465,7 @@ test('SH-TRAIL-1b run_id is seeded BEFORE spawn so the child env inherits NUBOS_
   // Crucially: do NOT set NUBOS_PILOT_RUN_ID; the lazy-seed must happen.
   runContext._resetForTests();
   const cap = _cap();
-  spawnHeadless.run(
+  await spawnHeadless.run(
     ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
     { cwd: r, stdout: cap.stub },
   );
@@ -467,7 +475,7 @@ test('SH-TRAIL-1b run_id is seeded BEFORE spawn so the child env inherits NUBOS_
   assert.equal(childRunId, payload.run_id, 'child must inherit parent NUBOS_PILOT_RUN_ID via filtered env');
 });
-test('SH-TRAIL-2 two sequential spawns append two parseable trail lines (jsonl integrity)', () => {
+test('SH-TRAIL-2 two sequential spawns append two parseable trail lines (jsonl integrity)', async () => {
   const r = _mkRoot();
   fs.writeFileSync(path.join(r, 'p.md'), 'audit X', 'utf-8');
   const mockBin = path.join(r, 'mock.sh');
@@ -477,7 +485,7 @@ test('SH-TRAIL-2 two sequential spawns append two parseable trail lines (jsonl i
   _setEnv('NUBOS_PILOT_RUN_ID', 'r-test-multi-aaa1');
   const cap = _cap();
   for (let i = 0; i < 2; i++) {
-    spawnHeadless.run(
+    await spawnHeadless.run(
       ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out' + i + '.json'],
       { cwd: r, stdout: cap.stub },
     );
@@ -488,6 +496,133 @@ test('SH-TRAIL-2 two sequential spawns append two parseable trail lines (jsonl i
   for (const l of lines) JSON.parse(l);
 });
+test('SH-GUARD-1 refuses to spawn when NUBOS_PILOT_HEADLESS=1 (reentrancy guard)', async () => {
+  const r = _mkRoot();
+  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
+  const mockBin = _mockClaude(r, 'mock.sh', '#!/bin/sh\ncat > /dev/null\necho "{}"\n');
+  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
+  _setEnv('NUBOS_PILOT_HEADLESS', '1');
+  const cap = _cap();
+  await assert.rejects(
+    spawnHeadless.run(
+      ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
+      { cwd: r, stdout: cap.stub },
+    ),
+    (err) => err && err.code === 'spawn-headless-reentrant',
+  );
+  assert.equal(fs.existsSync(path.join(r, 'out.json')), false, 'no claude must be spawned inside a headless run');
+});
+test('SH-GUARD-2 refuses to spawn when hook depth has reached the cap (depth guard)', async () => {
+  const r = _mkRoot();
+  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
+  const mockBin = _mockClaude(r, 'mock.sh', '#!/bin/sh\ncat > /dev/null\necho "{}"\n');
+  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
+  _setEnv('NUBOS_PILOT_HOOK_DEPTH', '1');
+  const cap = _cap();
+  await assert.rejects(
+    spawnHeadless.run(
+      ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
+      { cwd: r, stdout: cap.stub },
+    ),
+    (err) => err && err.code === 'spawn-headless-depth-exceeded',
+  );
+});
+test('SH-GUARD-3 child env carries NUBOS_PILOT_HEADLESS=1 and depth=1 (one level deep only)', async () => {
+  const r = _mkRoot();
+  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
+  const mockBin = _mockClaude(r, 'mock.sh',
+    '#!/bin/sh\ncat > /dev/null\nprintf \'{"hl":"\'$NUBOS_PILOT_HEADLESS\'","depth":"\'$NUBOS_PILOT_HOOK_DEPTH\'"}\\n\'\n');
+  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
+  const cap = _cap();
+  const rc = await spawnHeadless.run(
+    ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
+    { cwd: r, stdout: cap.stub },
+  );
+  assert.equal(rc, 0);
+  const child = JSON.parse(fs.readFileSync(path.join(r, 'out.json'), 'utf-8'));
+  assert.equal(child.hl, '1', 'child claude must run with NUBOS_PILOT_HEADLESS=1');
+  assert.equal(child.depth, '1', 'child claude must run at hook depth 1');
+});
+test('SH-GUARD-4 refuses to spawn while a live lock for the same agent is held (concurrency guard)', async () => {
+  const r = _mkRoot();
+  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
+  const mockBin = _mockClaude(r, 'mock.sh', '#!/bin/sh\ncat > /dev/null\necho "{}"\n');
+  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
+  const held = headlessGuard.tryAcquireSpawnLock(r, 'np-test-critic');
+  assert.equal(held.acquired, true);
+  const cap = _cap();
+  try {
+    await assert.rejects(
+      spawnHeadless.run(
+        ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
+        { cwd: r, stdout: cap.stub },
+      ),
+      (err) => err && err.code === 'spawn-headless-locked',
+    );
+  } finally {
+    held.release();
+  }
+});
+test('SH-GUARD-5 lock is released after a successful spawn (re-spawnable)', async () => {
+  const r = _mkRoot();
+  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
+  const mockBin = _mockClaude(r, 'mock.sh', '#!/bin/sh\ncat > /dev/null\necho "{}"\n');
+  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
+  const cap = _cap();
+  for (let i = 0; i < 2; i++) {
+    const rc = await spawnHeadless.run(
+      ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out' + i + '.json'],
+      { cwd: r, stdout: cap.stub },
+    );
+    assert.equal(rc, 0, 'sequential spawns must each acquire and release the lock');
+  }
+  assert.equal(fs.existsSync(headlessGuard._lockPath(r, 'np-test-critic')), false, 'no lock residue after spawns');
+});
+test('SH-GUARD-6 a held lock for one agent does NOT block a different agent (per-agent scope)', async () => {
+  const r = _mkRoot();
+  fs.writeFileSync(
+    path.join(r, '.nubos-pilot', 'agents', 'np-other-critic.md'),
+    '---\nname: np-other-critic\n---\n\n# Role\n',
+    'utf-8',
+  );
+  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
+  const mockBin = _mockClaude(r, 'mock.sh', '#!/bin/sh\ncat > /dev/null\necho "{}"\n');
+  _setEnv('NUBOS_PILOT_CLAUDE_BIN', mockBin);
+  const held = headlessGuard.tryAcquireSpawnLock(r, 'np-test-critic');
+  assert.equal(held.acquired, true);
+  const cap = _cap();
+  try {
+    const rc = await spawnHeadless.run(
+      ['--agent', 'np-other-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
+      { cwd: r, stdout: cap.stub },
+    );
+    assert.equal(rc, 0, 'a different agent must spawn while np-test-critic is locked');
+  } finally {
+    held.release();
+  }
+});
+test('SH-GUARD-7 lock is released even when the spawn errors (claude-not-found)', async () => {
+  const r = _mkRoot();
+  fs.writeFileSync(path.join(r, 'p.md'), 'audit', 'utf-8');
+  _setEnv('NUBOS_PILOT_CLAUDE_BIN', path.join(r, 'no-such-binary'));
+  const cap = _cap();
+  await assert.rejects(
+    spawnHeadless.run(
+      ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
+      { cwd: r, stdout: cap.stub },
+    ),
+    (err) => err && err.code === 'spawn-headless-claude-not-found',
+  );
+  assert.equal(fs.existsSync(headlessGuard._lockPath(r, 'np-test-critic')), false,
+    'the per-agent lock must not leak when the spawn fails');
+});
 test('SH-ENV-4 NUBOS_PILOT_/CLAUDE_/ANTHROPIC_ prefixed vars pass through (whitelisted prefix)', () => {
   const parent = {
     PATH: '/usr/bin',
@@ -504,3 +639,53 @@ test('SH-ENV-4 NUBOS_PILOT_/CLAUDE_/ANTHROPIC_ prefixed vars pass through (white
   assert.equal(filtered.ANTHROPIC_BASE_URL, 'https://api.anthropic.com');
   assert.equal(filtered.UNRELATED_FOO, undefined);
 });
+test('SH-OFFHOST-1: openai-compat routing runs dispatchOffHost and writes a {result} envelope (no claude -p)', async () => {
+  const r = _mkRoot();
+  fs.writeFileSync(path.join(r, 'p.md'), 'review this diff', 'utf-8');
+  const cap = _cap();
+  let claudeWasCalled = false;
+  _setEnv('NUBOS_PILOT_CLAUDE_BIN', path.join(r, 'nonexistent-claude-should-not-run'));
+  let dispatchArgs = null;
+  const code = await spawnHeadless.run(
+    ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
+    {
+      cwd: r,
+      stdout: cap.stub,
+      resolveImpl: () => ({ kind: 'openai-compat', provider: 'ollama', model: 'qwen2.5-coder:32b' }),
+      dispatchImpl: async (o) => { dispatchArgs = o; return { content: 'REVIEW: 0 risks', model: 'qwen2.5-coder:32b', provider: 'ollama' }; },
+    },
+  );
+  assert.equal(code, 0);
+  // dispatch received the agent + the prompt body as the task
+  assert.equal(dispatchArgs.agent, 'np-test-critic');
+  assert.match(dispatchArgs.task, /review this diff/);
+  // output is the claude-compatible {result} envelope so review/extract parse it unchanged
+  const out = JSON.parse(fs.readFileSync(path.join(r, 'out.json'), 'utf-8'));
+  assert.equal(out.result, 'REVIEW: 0 risks');
+  assert.equal(out.provider, 'ollama');
+  // caller-visible payload marks it off-host and the native claude bin was never invoked
+  const payload = JSON.parse(cap.get().trim());
+  assert.equal(payload.off_host, true);
+  assert.equal(payload.exit_code, 0);
+  assert.equal(claudeWasCalled, false);
+});
+test('SH-OFFHOST-2: a failing off-host dispatch returns exit 2 with an empty result (soft-fail parity)', async () => {
+  const r = _mkRoot();
+  fs.writeFileSync(path.join(r, 'p.md'), 'review', 'utf-8');
+  const cap = _cap();
+  const code = await spawnHeadless.run(
+    ['--agent', 'np-test-critic', '--prompt-path', 'p.md', '--output-path', 'out.json'],
+    {
+      cwd: r,
+      stdout: cap.stub,
+      resolveImpl: () => ({ kind: 'openai-compat', provider: 'ollama', model: 'x' }),
+      dispatchImpl: async () => { const e = new Error('provider unreachable'); e.code = 'preflight-failed'; throw e; },
+    },
+  );
+  assert.equal(code, 2);
+  const out = JSON.parse(fs.readFileSync(path.join(r, 'out.json'), 'utf-8'));
+  assert.equal(out.result, '');
+  assert.equal(out.is_error, true);
+});

package/bin/np-tools/spawn-offhost.cjs ADDED Viewed

@@ -0,0 +1,93 @@
+'use strict';
+const fs = require('node:fs');
+const { NubosPilotError } = require('../../lib/core.cjs');
+const { dispatchOffHost } = require('../../lib/runtime/dispatch.cjs');
+function _usage() {
+  process.stderr.write(
+    'Usage: np-tools.cjs spawn-offhost --agent <name> (--task <str> | --task-file <path>) '
+    + '[--cwd <dir>] [--phase P] [--plan P] [--task-id T] [--max-iterations N] [--allow-bash] [--read-only] [--no-audit]\n',
+  );
+}
+function _parse(argv) {
+  const out = { allowBash: false, readOnly: false };
+  const a = argv.slice();
+  while (a.length) {
+    const f = a.shift();
+    if (f === '--agent') out.agent = a.shift();
+    else if (f === '--task') out.task = a.shift();
+    else if (f === '--task-file') out.taskFile = a.shift();
+    else if (f === '--phase') out.phase = a.shift();
+    else if (f === '--plan') out.plan = a.shift();
+    else if (f === '--task-id') out.taskId = a.shift();
+    else if (f === '--max-iterations') out.maxIterations = Number(a.shift());
+    else if (f === '--cwd') out.cwd = a.shift();
+    else if (f === '--output-schema') out.outputSchema = a.shift();
+    else if (f === '--allow-bash') out.allowBash = true;
+    else if (f === '--read-only') out.readOnly = true;
+    else if (f === '--no-audit') out.skipAudit = true;
+  }
+  return out;
+}
+async function run(argv) {
+  const args = Array.isArray(argv) ? argv.slice() : process.argv.slice(3);
+  if (!args.length || args[0] === '--help') { _usage(); return 1; }
+  const parsed = _parse(args);
+  let task = parsed.task;
+  if (parsed.taskFile) {
+    try { task = fs.readFileSync(parsed.taskFile, 'utf-8'); }
+    catch { process.stderr.write(JSON.stringify({ code: 'spawn-offhost-task-file-unreadable', file: require('node:path').basename(parsed.taskFile) }) + '\n'); return 1; }
+  }
+  if (!parsed.agent || typeof task !== 'string') { _usage(); return 1; }
+  try {
+    const result = await dispatchOffHost({
+      agent: parsed.agent,
+      task,
+      cwd: parsed.cwd || process.cwd(),
+      phase: parsed.phase,
+      plan: parsed.plan,
+      taskId: parsed.taskId,
+      maxIterations: parsed.maxIterations,
+      allowBash: parsed.allowBash,
+      readOnly: parsed.readOnly,
+      skipAudit: parsed.skipAudit,
+      outputSchema: parsed.outputSchema,
+    });
+    if (result && result.metrics_recorded === false) {
+      process.stderr.write('spawn-offhost: metrics row was not recorded (telemetry only; run succeeded)\n');
+    }
+    if (result && result.rule9 && result.rule9.ok === false) {
+      process.stderr.write('spawn-offhost: Rule-9 violation (' + (result.rule9.violation || result.rule9.error)
+        + ') — the agent did not satisfy the search bar. Do NOT commit this output as-is; re-run or route back to the agent.\n');
+    }
+    if (result && result.capability && result.capability.ok === false) {
+      const c = result.capability;
+      process.stderr.write('spawn-offhost: the model advertised ' + c.toolsAdvertised
+        + ' tool(s) but made zero tool calls — the provider/model likely does NOT support OpenAI function/tool-calling. '
+        + (c.mutating
+          ? 'This agent edits files; off-host it produced NO changes. Route it to a tool-calling-capable model or keep it native.'
+          : 'If this agent was expected to inspect the workspace, its output may be ungrounded — verify before relying on it.')
+        + '\n');
+    }
+    process.stdout.write(JSON.stringify(result) + '\n');
+    return 0;
+  } catch (err) {
+    if (err && err.name === 'NubosPilotError') {
+      process.stderr.write(JSON.stringify({ code: err.code, message: err.message, details: err.details }) + '\n');
+    } else {
+      process.stderr.write(String((err && err.stack) || err) + '\n');
+    }
+    return 1;
+  }
+}
+module.exports = { run, _parse };
+if (require.main === module) {
+  run(process.argv.slice(2)).then((code) => process.exit(code || 0));
+}

package/bin/np-tools/spawn-offhost.test.cjs ADDED Viewed

@@ -0,0 +1,38 @@
+const { test } = require('node:test');
+const assert = require('node:assert/strict');
+const subcmd = require('./spawn-offhost.cjs');
+function _capture(fn) {
+  const out = []; const err = [];
+  const oo = process.stdout.write.bind(process.stdout);
+  const oe = process.stderr.write.bind(process.stderr);
+  process.stdout.write = (c) => { out.push(String(c)); return true; };
+  process.stderr.write = (c) => { err.push(String(c)); return true; };
+  return Promise.resolve(fn()).then((rc) => {
+    process.stdout.write = oo; process.stderr.write = oe;
+    return { rc, stdout: out.join(''), stderr: err.join('') };
+  }, (e) => { process.stdout.write = oo; process.stderr.write = oe; throw e; });
+}
+test('SOH-1: _parse reads agent/task, the boolean flags, --cwd and --no-audit', () => {
+  const p = subcmd._parse(['--agent', 'np-executor', '--task', 'do x', '--allow-bash', '--max-iterations', '5', '--cwd', '/wt', '--no-audit']);
+  assert.equal(p.agent, 'np-executor');
+  assert.equal(p.task, 'do x');
+  assert.equal(p.allowBash, true);
+  assert.equal(p.readOnly, false);
+  assert.equal(p.maxIterations, 5);
+  assert.equal(p.cwd, '/wt');
+  assert.equal(p.skipAudit, true);
+});
+test('SOH-2: missing args prints usage and returns 1', async () => {
+  const { rc, stderr } = await _capture(() => subcmd.run([]));
+  assert.equal(rc, 1);
+  assert.match(stderr, /Usage:/);
+});
+test('SOH-3: --agent without a task returns 1', async () => {
+  const { rc } = await _capture(() => subcmd.run(['--agent', 'np-executor']));
+  assert.equal(rc, 1);
+});

package/lib/agents.cjs CHANGED Viewed

@@ -2,7 +2,7 @@
 const fs = require('node:fs');
 const path = require('node:path');
-const { extractFrontmatter } = require('./frontmatter.cjs');
+const { extractFrontmatter, stripFrontmatter } = require('./frontmatter.cjs');
 const { NubosPilotError, findProjectRoot } = require('./core.cjs');
 const REQUIRED = ['name', 'description', 'tier', 'tools'];
@@ -60,7 +60,7 @@ function validateAgentFrontmatter(fm, agentName) {
 const AGENT_NAME_RE = /^[a-zA-Z0-9_-]+$/;
-function _loadAgentFromDisk(name, cwd) {
+function _resolveAgentPath(name, cwd) {
   if (typeof name !== 'string' || !AGENT_NAME_RE.test(name)) {
     throw new NubosPilotError(
       'agent-invalid-name',
@@ -83,10 +83,23 @@ function _loadAgentFromDisk(name, cwd) {
       { name, path: candidates[0], tried: candidates },
     );
   }
+  return found;
+}
+function _loadAgentFromDisk(name, cwd) {
+  const found = _resolveAgentPath(name, cwd);
   const { frontmatter } = extractFrontmatter(fs.readFileSync(found, 'utf-8'));
   return validateAgentFrontmatter(frontmatter, name);
 }
+function loadAgentSource(name, cwd) {
+  const found = _resolveAgentPath(name, cwd);
+  const raw = fs.readFileSync(found, 'utf-8');
+  const { frontmatter } = extractFrontmatter(raw);
+  validateAgentFrontmatter(frontmatter, name);
+  return { frontmatter, body: stripFrontmatter(raw), path: found };
+}
 function loadAgent(name, cwd) {
   const fm = _loadAgentFromDisk(name, cwd);
   if (fm.module === true) {
@@ -143,6 +156,7 @@ module.exports = {
   validateAgentFrontmatter,
   loadAgent,
   loadAgentModule,
+  loadAgentSource,
   listAgents,
   getAgentSkills,
   AGENT_NAME_RE,

package/lib/config-schema.cjs CHANGED Viewed

@@ -9,6 +9,8 @@ const VALID_TIERS = Object.freeze(['haiku', 'sonnet', 'opus']);
 const SCHEMA = Object.freeze({
   scope:            { type: 'enum', values: VALID_SCOPES, optional: true },
   model_profile:    { type: 'enum', values: VALID_MODEL_PROFILES, optional: true },
+  model_providers:  { type: 'object', shape: 'any', optional: true },
+  agent_routing:    { type: 'object', shape: 'any', optional: true },
   response_language:{ type: 'string', optional: true },
   runtime:          { type: 'string', optional: true },
   runtimes:         { type: 'array', element: 'string', optional: true },
@@ -220,7 +222,9 @@ function _clone(v) {
   return out;
 }
-const SCHEMA_ONLY_KEYS = Object.freeze(['runtime', 'runtimes', 'agent_skills']);
+const SCHEMA_ONLY_KEYS = Object.freeze([
+  'runtime', 'runtimes', 'agent_skills', 'model_providers', 'agent_routing',
+]);
 module.exports = {
   SCHEMA,