nubos-pilot 1.2.4 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +17 -1
  2. package/README.md +2 -1
  3. package/SECURITY.md +3 -4
  4. package/bin/np-tools/_commands.cjs +1 -0
  5. package/bin/np-tools/learnings.cjs +1 -1
  6. package/bin/np-tools/resolve-model.cjs +55 -1
  7. package/bin/np-tools/resolve-model.test.cjs +139 -0
  8. package/bin/np-tools/security.cjs +1 -1
  9. package/bin/np-tools/spawn-headless.cjs +100 -1
  10. package/bin/np-tools/spawn-headless.test.cjs +108 -58
  11. package/bin/np-tools/spawn-offhost.cjs +93 -0
  12. package/bin/np-tools/spawn-offhost.test.cjs +38 -0
  13. package/lib/agents.cjs +16 -2
  14. package/lib/config-schema.cjs +5 -1
  15. package/lib/learnings/extract.cjs +4 -4
  16. package/lib/learnings/extract.test.cjs +8 -8
  17. package/lib/model-providers.cjs +118 -0
  18. package/lib/model-providers.test.cjs +85 -0
  19. package/lib/runtime/agent-loop.cjs +64 -0
  20. package/lib/runtime/agent-loop.test.cjs +135 -0
  21. package/lib/runtime/dispatch.cjs +174 -0
  22. package/lib/runtime/dispatch.test.cjs +193 -0
  23. package/lib/runtime/preflight.cjs +68 -0
  24. package/lib/runtime/preflight.test.cjs +62 -0
  25. package/lib/runtime/providers/openai-compat.cjs +102 -0
  26. package/lib/runtime/providers/openai-compat.test.cjs +103 -0
  27. package/lib/runtime/tools/index.cjs +415 -0
  28. package/lib/runtime/tools/index.test.cjs +230 -0
  29. package/lib/security/review.cjs +4 -4
  30. package/lib/security/review.test.cjs +6 -6
  31. package/np-tools.cjs +1 -0
  32. package/package.json +1 -1
  33. package/workflows/add-tests.md +41 -0
  34. package/workflows/architect-phase.md +19 -0
  35. package/workflows/discuss-phase.md +29 -10
  36. package/workflows/execute-phase.md +93 -4
  37. package/workflows/plan-phase.md +57 -16
  38. package/workflows/research-phase.md +45 -0
  39. package/workflows/scan-codebase.md +21 -3
  40. package/workflows/validate-phase.md +30 -13
  41. package/workflows/verify-work.md +17 -0
@@ -100,7 +100,7 @@ test('REV-5 parseReviewerOutput handles claude -p envelope, fences, and junk', (
100
100
  assert.equal(junk.parse_ok, false);
101
101
  });
102
102
 
103
- test('REV-6 runReview guard blocks a concurrent review (no double spawn)', () => {
103
+ test('REV-6 runReview guard blocks a concurrent review (no double spawn)', async () => {
104
104
  const dir = tempRepo();
105
105
  const sid = freshSid();
106
106
  try {
@@ -108,21 +108,21 @@ test('REV-6 runReview guard blocks a concurrent review (no double spawn)', () =>
108
108
  fs.appendFileSync(path.join(dir, 'app.js'), 'const z = eval(q);\n');
109
109
  ledger.tryBeginReview(sid, {}); // simulate an in-flight review
110
110
  let spawnCalls = 0;
111
- const r = review.runReview({ cwd: dir, sid, mode: 'stop', config: {}, spawnImpl: () => { spawnCalls++; return '{}'; } });
111
+ const r = await review.runReview({ cwd: dir, sid, mode: 'stop', config: {}, spawnImpl: () => { spawnCalls++; return '{}'; } });
112
112
  assert.equal(r.ran, false);
113
113
  assert.equal(r.reason, 'in-flight');
114
114
  assert.equal(spawnCalls, 0);
115
115
  } finally { ledger.endReview(sid); cleanup(sid); fs.rmSync(dir, { recursive: true, force: true }); }
116
116
  });
117
117
 
118
- test('REV-7 runReview spawns, parses, and merges risk findings into the ledger', () => {
118
+ test('REV-7 runReview spawns, parses, and merges risk findings into the ledger', async () => {
119
119
  const dir = tempRepo();
120
120
  const sid = freshSid();
121
121
  try {
122
122
  ledger.setBaseline(sid, { head: headOf(dir) });
123
123
  fs.appendFileSync(path.join(dir, 'app.js'), 'const z = eval(q);\n');
124
124
  const stub = () => JSON.stringify({ result: '{"status":"risks-found","findings":[{"category":"dynamic-exec","severity":"high","file":"app.js","line":2,"title":"eval"}]}' });
125
- const r = review.runReview({ cwd: dir, sid, mode: 'stop', config: {}, spawnImpl: stub });
125
+ const r = await review.runReview({ cwd: dir, sid, mode: 'stop', config: {}, spawnImpl: stub });
126
126
  assert.equal(r.ran, true);
127
127
  assert.equal(r.findings_added, 1);
128
128
  const taken = ledger.takeUnsurfacedRisks(sid, {});
@@ -130,13 +130,13 @@ test('REV-7 runReview spawns, parses, and merges risk findings into the ledger',
130
130
  } finally { cleanup(sid); fs.rmSync(dir, { recursive: true, force: true }); }
131
131
  });
132
132
 
133
- test('REV-8 runReview on an empty diff does not spawn', () => {
133
+ test('REV-8 runReview on an empty diff does not spawn', async () => {
134
134
  const dir = tempRepo();
135
135
  const sid = freshSid();
136
136
  try {
137
137
  ledger.setBaseline(sid, { head: headOf(dir) });
138
138
  let spawnCalls = 0;
139
- const r = review.runReview({ cwd: dir, sid, mode: 'stop', config: {}, spawnImpl: () => { spawnCalls++; return '{}'; } });
139
+ const r = await review.runReview({ cwd: dir, sid, mode: 'stop', config: {}, spawnImpl: () => { spawnCalls++; return '{}'; } });
140
140
  assert.equal(r.findings_added, 0);
141
141
  assert.equal(spawnCalls, 0);
142
142
  } finally { cleanup(sid); fs.rmSync(dir, { recursive: true, force: true }); }
package/np-tools.cjs CHANGED
@@ -108,6 +108,7 @@ const topLevelCommands = {
108
108
  'loop-stuck': require('./bin/np-tools/loop-stuck.cjs'),
109
109
  'loop-metrics': require('./bin/np-tools/loop-metrics.cjs'),
110
110
  'spawn-headless': require('./bin/np-tools/spawn-headless.cjs'),
111
+ 'spawn-offhost': require('./bin/np-tools/spawn-offhost.cjs'),
111
112
  'security': require('./bin/np-tools/security.cjs'),
112
113
  'learning-log': require('./bin/np-tools/learning-log.cjs'),
113
114
  'learning-match': require('./bin/np-tools/learning-match.cjs'),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nubos-pilot",
3
- "version": "1.2.4",
3
+ "version": "1.3.0",
4
4
  "description": "Self-hosted AI pilot for any codebase. Researcher and critic agents plan, execute and verify each change.",
5
5
  "homepage": "https://pilot.nubos.cloud",
6
6
  "repository": {
@@ -47,6 +47,47 @@ Once the UAT file is emitted and the smoke-run is green, the orchestrator spawns
47
47
  - No `test.skip(...)` without a corresponding Fail / Defer marker.
48
48
  - No vacuous assertions.
49
49
 
50
+ Off-host dispatch (ADR-0021): if `np-critic` routes to an `openai-compat` provider, run the single review read-only via `spawn-offhost` instead of a native Agent spawn. The off-host critic CANNOT write to `$TMPDIR` (off-host Write is cwd-confined), so it emits the findings object `{ "critic":"critic", "findings":[…] }` as its FINAL MESSAGE; the orchestrator writes that to the critic-report path only after asserting the `critic` axis ∈ {critic,style,tests,acceptance} — any other value is silently dropped by `mergeCriticOutputs` (project_np_critic_field_schema_bug), so fail loud instead.
51
+
52
+ ```bash
53
+ LANG_DIRECTIVE=$(node .nubos-pilot/bin/np-tools.cjs lang-directive)
54
+ mkdir -p "${TMPDIR:-/tmp}/nubos-pilot/critic-reports"
55
+ CRITIC_REPORT_PATH="${TMPDIR:-/tmp}/nubos-pilot/critic-reports/critic-addtests-${PHASE}.json"
56
+ UAT_FILE=$(echo "$INIT" | node -e "process.stdin.on('data', d => console.log(JSON.parse(d).target_path))")
57
+ VERIFICATION_FILE=$(echo "$INIT" | node -e "process.stdin.on('data', d => console.log(JSON.parse(d).verification_path))")
58
+ CRITIC_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-critic --json 2>/dev/null \
59
+ | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).kind||"native")}catch{console.log("native")}})')
60
+ if [ "$CRITIC_KIND" = "openai-compat" ]; then
61
+ # Render the tests-axis-weighted critic prompt (UAT file + VERIFICATION.md as
62
+ # files_to_read) PLUS the emit-as-final-message instruction PLUS $LANG_DIRECTIVE
63
+ # into a TMPDIR file, then spawn read-only.
64
+ ADDTESTS_CRITIC_PROMPT="${TMPDIR:-/tmp}/np-offhost-addtests-critic-${PHASE}.md"
65
+ cat > "$ADDTESTS_CRITIC_PROMPT" <<EOF
66
+ You are np-critic reviewing a UAT regression suite (tests-axis weighted).
67
+
68
+ files_to_read:
69
+ - ${UAT_FILE}
70
+ - ${VERIFICATION_FILE}
71
+
72
+ Verify, weighting the tests-axis (verify-mismatch, missing-test, weak-assertion, silenced-failure):
73
+ - Every Pass-case in VERIFICATION.md has a corresponding test in the UAT file.
74
+ - Every test name describes observable behaviour.
75
+ - No test.skip(...) without a corresponding Fail / Defer marker.
76
+ - No vacuous assertions.
77
+
78
+ Emit ONLY the findings JSON object { "critic":"critic", "findings":[…] } as your final message.
79
+
80
+ ${LANG_DIRECTIVE}
81
+ EOF
82
+ OFFHOST_CRITIC_OUT=$(node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
83
+ --agent np-critic --task-file "$ADDTESTS_CRITIC_PROMPT" --read-only)
84
+ echo "$OFFHOST_CRITIC_OUT" | CRITIC_REPORT_PATH="$CRITIC_REPORT_PATH" node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{let env,f;try{env=JSON.parse(s);f=JSON.parse(env.content)}catch{console.error("off-host critic: final message is not the findings JSON object");process.exit(1)}var SUP=["critic","style","tests","acceptance"];if(!f||typeof f!=="object"||SUP.indexOf(f.critic)<0){console.error("off-host critic: `critic` must be one of "+SUP.join("/")+" — any other value is silently dropped by mergeCriticOutputs (project_np_critic_field_schema_bug)");process.exit(1)}require("fs").writeFileSync(process.env.CRITIC_REPORT_PATH,JSON.stringify(f))})' || exit 1
85
+ else
86
+ # → native Agent spawn of np-critic writes $CRITIC_REPORT_PATH (the existing path).
87
+ true
88
+ fi
89
+ ```
90
+
50
91
  Findings of category `missing-test`, `weak-assertion`, `silenced-failure`, or `verify-mismatch` route per `lib/nubosloop.cjs::routeFindings`. A single Build-Fixer-style round on `init add-tests` closes the loop. Beyond one round the workflow exits non-zero and the user resolves manually.
51
92
 
52
93
  This is intentionally a one-pass adversarial review (not the full Critic-Schwarm) — the UAT-emitter is mechanical and only one axis (test quality) needs adversarial coverage.
@@ -70,10 +70,14 @@ When the user invokes `/np:architect-phase <N> --research` (or when `swarm.resea
70
70
 
71
71
  The architect then consumes the consensus-merged `RESEARCH.md` instead of a single-spawn output. ADR-0011 details the merge rules and the `<consensus_meta>` audit block.
72
72
 
73
+ **Off-host (ADR-0021):** the Schwarm reuses the exact off-host `np-researcher` mechanism documented in `/np:research-phase` — detect `resolve-model np-researcher --kind`; when `openai-compat`, run the `k` spawns via `spawn-offhost --agent np-researcher --task-id "${MILESTONE_ID}-S000-T0000" --no-audit` (synthetic milestone-level canonical task-id for the Rule-9 ledger) and stamp one `loop-audit-tool-use` per spawn. **Offline only** — the off-host toolset has no `WebFetch`/`context7`, so route `np-researcher` native for online research.
74
+
73
75
  ## Adversarial Loop (1 round)
74
76
 
75
77
  After the architect emits `M<NNN>-ARCHITECTURE.md`, the orchestrator spawns ONE `np-critic` instance with the architecture file + `M<NNN>-CONTEXT.md` as inputs. The critic verifies that every locked decision in CONTEXT has a corresponding architecture entry and that no `Deferred` items leaked into the architecture. Findings of category `unmet-criterion`, `locked-decision-violation`, or `information-missing` route per `lib/nubosloop.cjs::routeFindings`. A single Build-Fixer-style round on the architect closes the loop. Beyond one round the workflow exits with `stuck` and the user resolves manually — architecture decisions don't merit unbounded looping.
76
78
 
79
+ **Off-host (ADR-0021):** when `np-critic` routes to an `openai-compat` provider (`resolve-model np-critic --kind`), run it `--read-only` via `spawn-offhost --agent np-critic` exactly as the `/np:execute-phase` off-host critic — the critic emits its `{ "critic":"critic", "findings":[…] }` object as the final message (off-host Write is cwd-confined, cannot reach `$TMPDIR`), and the orchestrator persists it only after asserting `critic ∈ {critic,style,tests,acceptance}` (else fail loud — `project_np_critic_field_schema_bug`).
80
+
77
81
  ## Skills (Nubos library)
78
82
 
79
83
  Nubos ships a design-time skill library under `.claude/skills/np-*/` (present only on Claude Code). These are the **quality bar for the architecture decisions you are about to commit** — each skill's "Verification bar" is the standard each ADR-style decision is held to. Before spawning `np-architect`, classify the milestone (read `M<NNN>-CONTEXT.md` + `M<NNN>-RESEARCH.md`) and inject the matching skill triggers into the architect's spawn prompt. Skills **stack** — include every row the milestone matches (cap at the most relevant ~4 if more match; always keep the security row when it applies).
@@ -113,6 +117,21 @@ If zero skills match, omit the skill-directive line — do not invent skills.
113
117
  Der Agent ist read-only auf Source — er schreibt EINE Datei:
114
118
  `.nubos-pilot/milestones/M<NNN>/M<NNN>-ARCHITECTURE.md`.
115
119
 
120
+ **Off-host (ADR-0021):** when `np-architect` routes to an `openai-compat` provider, run it via `spawn-offhost` instead of the host spawn. `np-architect` is NOT Rule-9-audited and writes only the one `M<NNN>-ARCHITECTURE.md` artefact under `.nubos-pilot/` (inside the repo cwd — never live code), so it runs off-host with the default cwd (Read/Grep/Glob over the repo + Write confined to cwd), **no `--allow-bash`, no worktree**. It writes the file exactly as the native architect; no emit-and-persist contract is needed.
121
+
122
+ ```bash
123
+ ARCHITECT_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-architect --json 2>/dev/null \
124
+ | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).kind||"native")}catch{console.log("native")}})')
125
+ if [ "$ARCHITECT_KIND" = "openai-compat" ]; then
126
+ ARCHITECT_PROMPT="${TMPDIR:-/tmp}/np-offhost-architect-M<NNN>.md"
127
+ # … render the SAME files_to_read block + Milestone/Task + (matched) skill
128
+ # directive above, PLUS $LANG_DIRECTIVE, into "$ARCHITECT_PROMPT" …
129
+ node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
130
+ --agent np-architect --task-file "$ARCHITECT_PROMPT" --phase "M<NNN>" >/dev/null
131
+ fi
132
+ # else → native host spawn per the block above.
133
+ ```
134
+
116
135
  ## Post
117
136
 
118
137
  Wenn der Agent `## CONTEXT CONFLICT` emittiert statt der Datei:
@@ -372,6 +372,7 @@ CONTEXT.md now captures the decisions. Success Criteria in `roadmap.yaml` are st
372
372
  ```bash
373
373
  SC_START=$(node .nubos-pilot/bin/np-tools.cjs metrics start-timestamp)
374
374
  SC_MODEL=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-sc-extractor --profile balanced)
375
+ SC_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-sc-extractor --kind 2>/dev/null || echo native)
375
376
 
376
377
  REQS_PATH=".nubos-pilot/REQUIREMENTS.md"
377
378
 
@@ -393,17 +394,35 @@ EXISTING_SC_JSON=$(node .nubos-pilot/bin/np-tools.cjs phase-meta "$PHASE" --fiel
393
394
  # call `node .nubos-pilot/bin/np-tools.cjs update-phase-meta $PHASE --stdin`
394
395
  # with {"success_criteria": [{id:"SC-N", text:"..."}, ...]} on its stdin,
395
396
  # and print a one-line summary.
396
- # Guard: the SC_COUNT check below (lines 399-403) hard-aborts if the spawn
397
- # returns zero criteria.
397
+ # Guard: the SC_COUNT check below hard-aborts if the spawn returns zero criteria.
398
+ # Off-host (ADR-0021): when np-sc-extractor routes to an openai-compat provider,
399
+ # run it via spawn-offhost (below) INSTEAD of the Agent tool.
398
400
  # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
399
-
400
- SC_END=$(node .nubos-pilot/bin/np-tools.cjs metrics end-timestamp)
401
- node .nubos-pilot/bin/np-tools.cjs metrics record \
402
- --agent np-sc-extractor --tier haiku --resolved-model "$SC_MODEL" \
403
- --phase "$PHASE" --plan "${MILESTONE_ID}-sc" --task "${MILESTONE_ID}-sc-extract" \
404
- --started "$SC_START" --ended "$SC_END" \
405
- --tokens-in "${TOKENS_IN:-0}" --tokens-out "${TOKENS_OUT:-0}" \
406
- --retry-count 0 --status ok --runtime "$RUNTIME"
401
+ if [ "$SC_KIND" = "openai-compat" ]; then
402
+ # np-sc-extractor is NOT Rule-9-audited and writes ONLY roadmap.yaml under
403
+ # .nubos-pilot/ (inside the repo cwd — NOT live code), so it runs off-host
404
+ # with the default cwd (repo root): Read/Grep/Glob over the repo + Write
405
+ # confined to cwd. NO --allow-bash, NO worktree (no live-code blast radius
406
+ # to isolate). It persists success_criteria into roadmap.yaml exactly as the
407
+ # native extractor does — no emit-and-persist contract needed.
408
+ # spawn-offhost records the metrics row itself.
409
+ SC_PROMPT="${TMPDIR:-/tmp}/np-offhost-sc-extractor-${MILESTONE_ID}.md"
410
+ # … render the SAME prompt the ACTION CONTRACT above describes (milestone,
411
+ # milestone_id, milestone_dir, context_path, requirements_path,
412
+ # existing_success_criteria) PLUS $LANG_DIRECTIVE into "$SC_PROMPT" …
413
+ node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
414
+ --agent np-sc-extractor --task-file "$SC_PROMPT" \
415
+ --phase "$PHASE" >/dev/null
416
+ else
417
+ # → execute the Agent call per ACTION CONTRACT above (native host spawn), then:
418
+ SC_END=$(node .nubos-pilot/bin/np-tools.cjs metrics end-timestamp)
419
+ node .nubos-pilot/bin/np-tools.cjs metrics record \
420
+ --agent np-sc-extractor --tier haiku --resolved-model "$SC_MODEL" \
421
+ --phase "$PHASE" --plan "${MILESTONE_ID}-sc" --task "${MILESTONE_ID}-sc-extract" \
422
+ --started "$SC_START" --ended "$SC_END" \
423
+ --tokens-in "${TOKENS_IN:-0}" --tokens-out "${TOKENS_OUT:-0}" \
424
+ --retry-count 0 --status ok --runtime "$RUNTIME"
425
+ fi
407
426
  ```
408
427
 
409
428
  After the spawn, sanity-check that `success_criteria` is non-empty:
@@ -168,7 +168,7 @@ Every task runs through the **Nubosloop** (ADR-0010, `lib/nubosloop.cjs`) — pr
168
168
 
169
169
  1. **Pre-flight cache lookup** (Round 1 only) — `loop-run-round --phase preflight --query "$TASK_QUERY"`. A hit at similarity ≥ `swarm.research.threshold` and `occurrence ≥ swarm.research.minOccurrence` short-circuits the Researcher-Schwarm; the cached pattern enters the Executor prompt with provenance `[CACHED]`. Soft cache failures (adapter-unknown) downgrade to a miss with `cache_miss_reason` populated; hard failures (corrupt store, version mismatch) propagate.
170
170
  2. **Researcher-Schwarm (on cache miss, or on `next_action=researcher` re-route)** — orchestrator spawns `swarm.research.k=3` independent `np-researcher` agents IN PARALLEL (single message, three Agent blocks) and merges their outputs through `lib/researcher-swarm.cjs::mergeConsensus` (Mehrheit / Union / Schnittmenge). The merged consensus enters the Executor prompt with provenance.
171
- 3. **Executor (R1) or Build-Fixer (R≥2)** — single LLM spawn. Round 1 spawns `agents/np-executor.md`. Round ≥ 2 spawns `agents/np-build-fixer.md` with prior critic findings + verify output appended. Edits ONLY paths in `files_modified` (D-04 — no scope expansion). Does NOT call `commit-task`.
171
+ 3. **Executor (R1) or Build-Fixer (R≥2)** — single LLM spawn. Round 1 spawns `agents/np-executor.md`. Round ≥ 2 spawns `agents/np-build-fixer.md` with prior critic findings + verify output appended. Edits ONLY paths in `files_modified` (D-04 — no scope expansion). Does NOT call `commit-task`. **Off-host (ADR-0021):** when the executor agent routes to an `openai-compat` provider (`agent_routing`), the spawn runs through `spawn-offhost` inside a forced slice worktree instead of the host Agent tool — see the off-host branch in the spawn block below. It satisfies Rule 9 via an injected native `knowledge-search` tool, and the orchestrator runs the same Step-4 audit stamp.
172
172
  4. **Mechanical Checks (orchestrator, NOT the agent)** — run task's `<verify>` command + stack linters (`phpstan`, `pint`, `tsc`, `eslint`); capture exit code + output to `$VERIFY_LOG`. Then `loop-audit-tool-use "$TASK_ID" --agent "$EXECUTOR_AGENT" --tool-use-log <json>` confirms the spawn invoked a knowledge-search tool ≥ 1× (Rule 9). The audited agent satisfies Rule 9 by running `node np-tools.cjs knowledge-search "<query>" --task "$TASK_ID"` via Bash, then stamping the exact string `knowledge-search` in `--tool-use-log`. The full accepted set is the `SEARCH_TOOLS` constant in `lib/nubosloop.cjs`; that constant is the single source of truth — do not re-enumerate it here. Audit findings get round-stamped and feed `loop-evaluate` alongside critic findings. Then call `loop-run-round --phase post-executor --verify-exit-code "$VERIFY_EXIT" --verify-output-path "$VERIFY_LOG"`. On verify-red the verb returns `next_action: spawn-build-fixer` — skip critics, advance to next round directly.
173
173
  5. **Critic (verify-green only)** — one Critic agent spawns: `agents/np-critic.md` (sonnet). It writes the full findings JSON to `$CRITIC_REPORT_PATH` and emits a small verdict envelope as its final message (ADR-0010 §L5 Verdict-Only Contract, 2026-05-05). Single-critic revision per §Trust Layer 2026-05-05 — the prior 3-critic schwarm collapsed because three parallel spawns added latency without proportional finding-quality gains; the Verdict-Only Contract on top reduces per-round main-context tokens by an order of magnitude (verbatim findings reports were the dominant Nubosloop cost-driver).
174
174
  6. **Route** — `loop-run-round --phase post-critics --critic-outputs-path "$CRITIC_REPORT_PATH"` (or legacy `--critic-outputs "$CRITIC_JSON"` when the Verdict-Only Contract is unavailable) returns `next_action ∈ {commit, executor, researcher, askuser, plan-checker, stuck}`:
@@ -359,7 +359,40 @@ for WAVE_INDEX in 0 1 2 ...; do
359
359
  if { [ "$ROUND" -eq 1 ] && [ "$CACHE_HIT" != "true" ]; } || [ "$NEXT_ACTION" = "researcher" ]; then
360
360
  SPAWN_SPECS=$(echo "$PREFLIGHT" | node -e \
361
361
  'process.stdin.on("data",d=>{const j=JSON.parse(d);process.stdout.write(JSON.stringify((j.swarm&&j.swarm.spawn_specs)||[]))})')
362
- # execute groups (1) + (2) per ACTION CONTRACT above, then:
362
+ # Off-host researcher swarm (ADR-0021): if np-researcher routes to an
363
+ # openai-compat provider, run $SWARM_K read-only spawns via spawn-offhost.
364
+ # np-researcher is Rule-9-audited → --task-id injects knowledge-search;
365
+ # read-only ⇒ no worktree needed. Each spawn MUST emit the per-spawn
366
+ # consensus JSON { decisions[], risks[], patterns[], open_questions[],
367
+ # sources[] } that researcher-merge consumes (NOT the researcher-output
368
+ # markdown artifact — that schema is for M<NNN>-RESEARCH.md, a different
369
+ # contract). A spawn whose output is not that JSON is substituted with an
370
+ # empty {} so researcher-merge degrades gracefully instead of aborting the
371
+ # wave (exit 4). --no-audit defers the Rule-9 stamp to the orchestrator's
372
+ # group-(2) loop-audit-tool-use (one per spawn) so the post-researcher
373
+ # SKIP-GUARD is satisfied exactly as for native spawns.
374
+ RESEARCHER_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-researcher --json 2>/dev/null \
375
+ | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).kind||"native")}catch{console.log("native")}})')
376
+ if [ "$RESEARCHER_KIND" = "openai-compat" ]; then
377
+ SWARM_K=$(echo "$PREFLIGHT" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{const j=JSON.parse(s);console.log((j.swarm&&j.swarm.k)||3)})')
378
+ SPAWN_OUT_PATHS=()
379
+ i=0
380
+ while [ "$i" -lt "$SWARM_K" ]; do
381
+ OUT_PATH="${TMPDIR:-/tmp}/np-spawn-${TASK_ID}-r${ROUND}-${i}.json"
382
+ R_PROMPT="${TMPDIR:-/tmp}/np-offhost-researcher-${TASK_ID}-r${ROUND}-${i}.md"
383
+ # … render researcher prompt i (task_query verbatim + seed_delta[i] + files_to_read) …
384
+ R_OUT=$(node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
385
+ --agent np-researcher --task-file "$R_PROMPT" --task-id "$TASK_ID" \
386
+ --read-only --no-audit ${SLICE_CWD:+--cwd "$SLICE_CWD"})
387
+ echo "$R_OUT" | OUT_PATH="$OUT_PATH" node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{let c="";try{c=JSON.parse(s).content||""}catch{}var ok=false;try{JSON.parse(c);ok=true}catch{}if(!ok){process.stderr.write("off-host researcher: spawn output is not the {decisions,risks,patterns,open_questions,sources} JSON researcher-merge expects — substituting empty consensus for this spawn\n");c="{}"}require("fs").writeFileSync(process.env.OUT_PATH,c)})'
388
+ R_LOG=$(echo "$R_OUT" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.stringify((JSON.parse(s).toolLog||[]).map(t=>t.name)))}catch{console.log("[]")}})')
389
+ node .nubos-pilot/bin/np-tools.cjs loop-audit-tool-use "$TASK_ID" --agent np-researcher --tool-use-log "$R_LOG"
390
+ SPAWN_OUT_PATHS+=("$OUT_PATH")
391
+ i=$((i+1))
392
+ done
393
+ else
394
+ true # → execute groups (1) + (2) per ACTION CONTRACT above (native Agent spawns).
395
+ fi
363
396
  CONSENSUS_PATTERN=$(node .nubos-pilot/bin/researcher-merge.cjs \
364
397
  "${SPAWN_OUT_PATHS[@]}")
365
398
  node .nubos-pilot/bin/np-tools.cjs loop-run-round "$TASK_ID" --phase post-researcher
@@ -420,7 +453,43 @@ for WAVE_INDEX in 0 1 2 ...; do
420
453
  else
421
454
  EXECUTOR_MODEL=$(node .nubos-pilot/bin/np-tools.cjs resolve-model "$EXECUTOR_AGENT" --profile frontier)
422
455
  fi
423
- # → execute group (1) per ACTION CONTRACT above, then:
456
+
457
+ # ━━━ Off-host executor (ADR-0021) — config-driven via agent_routing ━━━
458
+ # If $EXECUTOR_AGENT routes to an openai-compat provider, run it through
459
+ # the nubos-pilot dispatch loop (spawn-offhost) instead of the host Agent
460
+ # tool. Off-host REQUIRES worktree isolation: the existing per-wave worktree
461
+ # (created at §Worktree-Isolation above) confines model-driven Write/Edit/
462
+ # Bash, and the slice-end ff-merge is what lands the work on the parent
463
+ # branch. The worktree lives under .nubos-pilot/worktrees/, so checkpoint /
464
+ # search-evidence / metrics still resolve to the project root — only file
465
+ # ops are confined. We do NOT force a worktree out of band: doing so would
466
+ # bypass the merge-back gate (commits stranded) and the orchestrator's
467
+ # cwd=worktree convention (commit-task would find nothing). The orchestrator
468
+ # runs the canonical Step-4 loop-audit-tool-use with the returned tool-log,
469
+ # so spawn-offhost is called --no-audit to avoid double-stamping the round.
470
+ EXECUTOR_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model "$EXECUTOR_AGENT" --json 2>/dev/null \
471
+ | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).kind||"native")}catch{console.log("native")}})')
472
+ if [ "$EXECUTOR_KIND" = "openai-compat" ]; then
473
+ if [ "$WORKTREE_ISOLATION" != "true" ] || [ -z "$SLICE_CWD" ] || [ "$SLICE_CWD" = "." ]; then
474
+ echo "[np:execute-phase] off-host executor ($EXECUTOR_AGENT) requires workflow.worktree_isolation=true so model-driven edits are confined and ff-merged back. Enable it (config-set workflow.worktree_isolation true) and re-run." >&2
475
+ exit 1
476
+ fi
477
+ # Write the SAME rendered executor prompt you would have handed the Agent
478
+ # tool (task plan + slice context + consensus + success criteria +
479
+ # language directive + skill block) to this file:
480
+ OFFHOST_PROMPT="${TMPDIR:-/tmp}/np-offhost-${TASK_ID}-r${ROUND}.md"
481
+ # … render prompt to "$OFFHOST_PROMPT" …
482
+ OFFHOST_OUT=$(node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
483
+ --agent "$EXECUTOR_AGENT" --task-file "$OFFHOST_PROMPT" \
484
+ --task-id "$TASK_ID" --cwd "$SLICE_CWD" --allow-bash --no-audit)
485
+ # Harvest the tool-name log for the Layer-C audit stamp (Step 4).
486
+ EXECUTOR_TOOL_LOG=$(echo "$OFFHOST_OUT" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.stringify((JSON.parse(s).toolLog||[]).map(t=>t.name)))}catch{console.log("[]")}})')
487
+ else
488
+ # → execute group (1) per ACTION CONTRACT above (native host Agent spawn);
489
+ # EXECUTOR_TOOL_LOG is harvested from the spawn's tool_use stream.
490
+ true
491
+ fi
492
+ # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
424
493
 
425
494
  node .nubos-pilot/bin/np-tools.cjs checkpoint transition "$TASK_ID" verifying
426
495
 
@@ -510,7 +579,27 @@ for WAVE_INDEX in 0 1 2 ...; do
510
579
  # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
511
580
  mkdir -p "${TMPDIR:-/tmp}/nubos-pilot/critic-reports"
512
581
  CRITIC_REPORT_PATH="${TMPDIR:-/tmp}/nubos-pilot/critic-reports/critic-${TASK_ID}-r${ROUND}.json"
513
- # execute group (2) per ACTION CONTRACT above, then:
582
+ # Off-host critic (ADR-0021): if np-critic routes to an openai-compat
583
+ # provider, run it read-only via spawn-offhost. It CANNOT write to $TMPDIR
584
+ # (off-host Write is cwd-confined), so it emits the findings object
585
+ # { "critic":"critic", "findings":[…], "criteria"?:[…] } as its FINAL
586
+ # MESSAGE; the orchestrator writes that to $CRITIC_REPORT_PATH only after
587
+ # asserting the `critic` axis ∈ {critic,style,tests,acceptance} — any other
588
+ # value is silently dropped by mergeCriticOutputs
589
+ # (project_np_critic_field_schema_bug), so fail loud instead.
590
+ CRITIC_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-critic --json 2>/dev/null \
591
+ | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).kind||"native")}catch{console.log("native")}})')
592
+ if [ "$CRITIC_KIND" = "openai-compat" ]; then
593
+ OFFHOST_CRITIC_PROMPT="${TMPDIR:-/tmp}/np-offhost-critic-${TASK_ID}-r${ROUND}.md"
594
+ # … render the critic prompt (same files_to_read as group (2)) PLUS:
595
+ # "Emit ONLY the findings JSON object as your final message." …
596
+ OFFHOST_CRITIC_OUT=$(node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
597
+ --agent np-critic --task-file "$OFFHOST_CRITIC_PROMPT" --read-only ${SLICE_CWD:+--cwd "$SLICE_CWD"})
598
+ echo "$OFFHOST_CRITIC_OUT" | CRITIC_REPORT_PATH="$CRITIC_REPORT_PATH" node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{let env,f;try{env=JSON.parse(s);f=JSON.parse(env.content)}catch{console.error("off-host critic: final message is not the findings JSON object");process.exit(1)}var SUP=["critic","style","tests","acceptance"];if(!f||typeof f!=="object"||SUP.indexOf(f.critic)<0){console.error("off-host critic: `critic` must be one of "+SUP.join("/")+" — any other value is silently dropped by mergeCriticOutputs (project_np_critic_field_schema_bug)");process.exit(1)}require("fs").writeFileSync(process.env.CRITIC_REPORT_PATH,JSON.stringify(f))})' || exit 1
599
+ else
600
+ # → execute group (2) per ACTION CONTRACT above (native Agent spawn writes $CRITIC_REPORT_PATH).
601
+ true
602
+ fi
514
603
  node .nubos-pilot/bin/np-tools.cjs loop-audit-tool-use "$TASK_ID" --agent np-critic --tool-use-log '[]'
515
604
  POST_CRIT=$(node .nubos-pilot/bin/np-tools.cjs loop-run-round "$TASK_ID" \
516
605
  --phase post-critics --critic-outputs-path "$CRITIC_REPORT_PATH")
@@ -241,17 +241,38 @@ for ITER in 1 2; do
241
241
  # <prior_findings>$LAST_FINDINGS</prior_findings> (path to verdict JSON, R≥2)
242
242
  # <agent_skills>$AGENT_SKILLS_PLANNER</agent_skills>
243
243
  # Agent MUST: write/update slice plans inside $milestone_dir.
244
+ # Off-host (ADR-0021): when np-planner routes to an openai-compat provider
245
+ # (agent_routing), run it via spawn-offhost (below) INSTEAD of the Agent tool.
244
246
  # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
245
247
  PLANNER_START=$(node .nubos-pilot/bin/np-tools.cjs metrics start-timestamp)
246
248
  PLANNER_MODEL=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-planner --profile frontier)
247
- # execute the Agent call per ACTION CONTRACT above, then:
248
- PLANNER_END=$(node .nubos-pilot/bin/np-tools.cjs metrics end-timestamp)
249
- node .nubos-pilot/bin/np-tools.cjs metrics record \
250
- --agent np-planner --tier opus --resolved-model "$PLANNER_MODEL" \
251
- --phase "$PHASE" --plan "${milestone_id}-plan" --task "${milestone_id}-planner-run" \
252
- --started "$PLANNER_START" --ended "$PLANNER_END" \
253
- --tokens-in "${TOKENS_IN:-0}" --tokens-out "${TOKENS_OUT:-0}" \
254
- --retry-count 0 --status ok --runtime "$RUNTIME"
249
+ PLANNER_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-planner --json 2>/dev/null \
250
+ | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).kind||"native")}catch{console.log("native")}})')
251
+ if [ "$PLANNER_KIND" = "openai-compat" ]; then
252
+ # np-planner is NOT Rule-9-audited and writes ONLY planning artefacts under
253
+ # .nubos-pilot/ (inside the repo cwd — NOT live code), so it runs off-host
254
+ # with the default cwd (repo root): Read/Grep/Glob over the whole repo + Write
255
+ # confined to cwd. NO --allow-bash and NO worktree (there is no live-code
256
+ # blast radius to isolate, unlike the executor). It writes slice plans into
257
+ # $milestone_dir exactly as the native planner does — no emit-and-persist
258
+ # contract needed. spawn-offhost records the metrics row itself.
259
+ PLANNER_PROMPT="${TMPDIR:-/tmp}/np-offhost-planner-${milestone_id}-i${ITER}.md"
260
+ # … render the SAME prompt the ACTION CONTRACT above describes (mode,
261
+ # milestone, milestone_dir, goal, requirements, prior_findings,
262
+ # agent_skills) PLUS $LANG_DIRECTIVE into "$PLANNER_PROMPT" …
263
+ node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
264
+ --agent np-planner --task-file "$PLANNER_PROMPT" \
265
+ --phase "$PHASE" --plan "${milestone_id}-plan" >/dev/null
266
+ else
267
+ # → execute the Agent call per ACTION CONTRACT above (native host spawn), then:
268
+ PLANNER_END=$(node .nubos-pilot/bin/np-tools.cjs metrics end-timestamp)
269
+ node .nubos-pilot/bin/np-tools.cjs metrics record \
270
+ --agent np-planner --tier opus --resolved-model "$PLANNER_MODEL" \
271
+ --phase "$PHASE" --plan "${milestone_id}-plan" --task "${milestone_id}-planner-run" \
272
+ --started "$PLANNER_START" --ended "$PLANNER_END" \
273
+ --tokens-in "${TOKENS_IN:-0}" --tokens-out "${TOKENS_OUT:-0}" \
274
+ --retry-count 0 --status ok --runtime "$RUNTIME"
275
+ fi
255
276
 
256
277
  # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
257
278
  # ACTION CONTRACT — Step 2b: Spawn np-plan-checker (immediately after 2a)
@@ -265,17 +286,37 @@ for ITER in 1 2; do
265
286
  # Agent MUST: read planner output (slice plans inside $milestone_dir),
266
287
  # write YAML verdict to $milestone_dir/.tmp-verdict-$ITER.yaml. Orchestrator
267
288
  # converts YAML → JSON at $VERDICT_JSON_PATH (next bash section).
289
+ # Off-host (ADR-0021): when np-plan-checker routes to an openai-compat provider,
290
+ # run it via spawn-offhost (below) INSTEAD of the Agent tool.
268
291
  # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
269
292
  CHECKER_START=$(node .nubos-pilot/bin/np-tools.cjs metrics start-timestamp)
270
293
  CHECKER_MODEL=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-plan-checker --profile frontier)
271
- # execute the Agent call per ACTION CONTRACT above, then:
272
- CHECKER_END=$(node .nubos-pilot/bin/np-tools.cjs metrics end-timestamp)
273
- node .nubos-pilot/bin/np-tools.cjs metrics record \
274
- --agent np-plan-checker --tier opus --resolved-model "$CHECKER_MODEL" \
275
- --phase "$PHASE" --plan "${milestone_id}-plan" --task "${milestone_id}-planner-run" \
276
- --started "$CHECKER_START" --ended "$CHECKER_END" \
277
- --tokens-in "${TOKENS_IN:-0}" --tokens-out "${TOKENS_OUT:-0}" \
278
- --retry-count 0 --status ok --runtime "$RUNTIME"
294
+ CHECKER_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-plan-checker --json 2>/dev/null \
295
+ | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).kind||"native")}catch{console.log("native")}})')
296
+ if [ "$CHECKER_KIND" = "openai-compat" ]; then
297
+ # np-plan-checker is NOT Rule-9-audited and writes ONLY the verdict YAML under
298
+ # $milestone_dir (inside the repo cwd), so it runs off-host with the default
299
+ # cwd: Read/Grep/Glob over the repo + Write confined to cwd. NO --allow-bash,
300
+ # NO worktree. It writes $milestone_dir/.tmp-verdict-$ITER.yaml exactly as the
301
+ # native checker does (the orchestrator's YAML→JSON step is unchanged).
302
+ # spawn-offhost records the metrics row itself.
303
+ CHECKER_PROMPT="${TMPDIR:-/tmp}/np-offhost-plan-checker-${milestone_id}-i${ITER}.md"
304
+ # … render the SAME prompt the ACTION CONTRACT above describes (milestone,
305
+ # milestone_dir, agent_skills) PLUS $LANG_DIRECTIVE, and MUST state the exact
306
+ # output path $milestone_dir/.tmp-verdict-$ITER.yaml, into "$CHECKER_PROMPT" …
307
+ node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
308
+ --agent np-plan-checker --task-file "$CHECKER_PROMPT" \
309
+ --phase "$PHASE" --plan "${milestone_id}-plan" >/dev/null
310
+ else
311
+ # → execute the Agent call per ACTION CONTRACT above (native host spawn), then:
312
+ CHECKER_END=$(node .nubos-pilot/bin/np-tools.cjs metrics end-timestamp)
313
+ node .nubos-pilot/bin/np-tools.cjs metrics record \
314
+ --agent np-plan-checker --tier opus --resolved-model "$CHECKER_MODEL" \
315
+ --phase "$PHASE" --plan "${milestone_id}-plan" --task "${milestone_id}-planner-run" \
316
+ --started "$CHECKER_START" --ended "$CHECKER_END" \
317
+ --tokens-in "${TOKENS_IN:-0}" --tokens-out "${TOKENS_OUT:-0}" \
318
+ --retry-count 0 --status ok --runtime "$RUNTIME"
319
+ fi
279
320
 
280
321
  VERDICT_JSON_PATH="$milestone_dir/.tmp-verdict-$ITER.json"
281
322
  # (verdict JSON: {status: passed|issues_found, findings: [...] })
@@ -253,8 +253,36 @@ omit the `model:` parameter at spawn (Phase 8 D-22 inherit-pattern).
253
253
  ```bash
254
254
  RESEARCHER_START=$(node .nubos-pilot/bin/np-tools.cjs metrics start-timestamp)
255
255
  RESEARCHER_MODEL=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-researcher --profile balanced)
256
+ RESEARCHER_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-researcher --kind 2>/dev/null || echo native)
256
257
  ```
257
258
 
259
+ **Off-host (ADR-0021):** when `np-researcher` routes to an `openai-compat` provider, run the `$SWARM_K` spawns via `spawn-offhost` instead of the abstract host spawn. Two specifics for this audited agent:
260
+
261
+ - **Synthetic canonical task-id.** `np-researcher` is Rule-9-audited and `dispatchOffHost` requires a `M<NNN>-S<NNN>-T<NNNN>` id for the search-evidence ledger + audit. Research is milestone-level (no real slice/task), so mint the synthetic id `${MILESTONE_ID}-S000-T0000` (the `S000-T0000` suffix is the documented "milestone-level, no slice/task" convention). The injected native `knowledge-search` tool satisfies Rule 9; the orchestrator stamps one `loop-audit-tool-use` per spawn.
262
+ - **Offline only.** The off-host toolset has **no `WebFetch`/`context7`** — an off-host researcher can only do `$MODE == offline` (knowledge-search) research. If `$MODE == online`, keep `np-researcher` native (or accept offline-only research for that spawn). This is a capability bound, surfaced loudly, not a silent degrade.
263
+
264
+ Each spawn writes its own `$RESEARCH_DIR/spawn-<i>.md` (inside the repo cwd), so it runs write-enabled (NOT `--read-only`), no `--allow-bash`, no worktree.
265
+
266
+ ```bash
267
+ if [ "$RESEARCHER_KIND" = "openai-compat" ]; then
268
+ OFFHOST_TASK_ID="${MILESTONE_ID}-S000-T0000"
269
+ i=0
270
+ while [ "$i" -lt "${SWARM_K:-3}" ]; do
271
+ R_PROMPT="${TMPDIR:-/tmp}/np-offhost-researcher-${MILESTONE_ID}-${i}.md"
272
+ # … render spawn-spec i (files_to_read + goal + requirements + seed_delta[i] +
273
+ # $SPAWN_SCHEMA + the EXACT output path $RESEARCH_DIR/spawn-${i}.md) PLUS
274
+ # $LANG_DIRECTIVE into "$R_PROMPT" …
275
+ R_OUT=$(node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
276
+ --agent np-researcher --task-file "$R_PROMPT" --task-id "$OFFHOST_TASK_ID" --no-audit)
277
+ R_LOG=$(echo "$R_OUT" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.stringify((JSON.parse(s).toolLog||[]).map(t=>t.name)))}catch{console.log("[]")}})')
278
+ node .nubos-pilot/bin/np-tools.cjs loop-audit-tool-use "$OFFHOST_TASK_ID" --agent np-researcher --tool-use-log "$R_LOG"
279
+ i=$((i+1))
280
+ done
281
+ fi
282
+ ```
283
+
284
+ When `$RESEARCHER_KIND = native`, use the abstract host spawn below (the Phase 8 runtime adapter binds it):
285
+
258
286
  ```text
259
287
  Spawn agent=np-researcher tier=sonnet model=$RESEARCHER_MODEL mode=$MODE phase=$PHASE context=$CONTEXT_PATH output=$RESEARCH_PATH
260
288
  ```
@@ -351,6 +379,23 @@ Spawn agent=np-researcher-reconciler tier=sonnet model=$RECONCILER_MODEL phase=$
351
379
  schema_prompt=$RECONCILER_SCHEMA
352
380
  ```
353
381
 
382
+ **Off-host (ADR-0021):** when `np-researcher-reconciler` routes to an `openai-compat` provider, run it via `spawn-offhost` instead of the host spawn. The reconciler is NOT Rule-9-audited and writes only the single `$RESEARCH_PATH` (`M<NNN>-RESEARCH.md`) artefact under `.nubos-pilot/` (inside the repo cwd — never live code), so it runs off-host with the default cwd (Read/Grep/Glob over the spawn outputs + Write confined to cwd), **no `--allow-bash`, no worktree**. It writes `$RESEARCH_PATH` exactly as the native reconciler; no emit-and-persist contract is needed. spawn-offhost self-records.
383
+
384
+ ```bash
385
+ RECONCILER_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-researcher-reconciler --json 2>/dev/null \
386
+ | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).kind||"native")}catch{console.log("native")}})')
387
+ if [ "$RECONCILER_KIND" = "openai-compat" ]; then
388
+ RECONCILER_PROMPT="${TMPDIR:-/tmp}/np-offhost-reconciler-${MILESTONE_ID}.md"
389
+ # … render the SAME reconciler input (spawn_paths + merge_path + merged_json +
390
+ # context_path + $RECONCILER_SCHEMA + the EXACT final_path=$RESEARCH_PATH) PLUS
391
+ # $LANG_DIRECTIVE into "$RECONCILER_PROMPT" …
392
+ node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
393
+ --agent np-researcher-reconciler --task-file "$RECONCILER_PROMPT" \
394
+ --phase "$PHASE" --plan "$PLAN_ID" --task "$TASK_ID" >/dev/null
395
+ fi
396
+ # else → native host spawn per the block above.
397
+ ```
398
+
354
399
  The reconciler classifies each consensus decision's reasoning-trace as `identical | overlapping | orthogonal | unknown` (groupthink detection), picks each contested decision with documented reason, and writes `$RESEARCH_PATH` with `agreement_score` and `contested_count` in frontmatter.
355
400
 
356
401
  ```bash
@@ -106,9 +106,27 @@ is runtime-agnostic — pick whichever dispatch mechanism your host supports.
106
106
 
107
107
  ```bash
108
108
  PROSE_FILE=$(mktemp -t np-prose-XXXXXX.json)
109
- # Host dispatches agent with buildDocumenterPrompt(facts) and writes JSON
110
- # to $PROSE_FILE. Validate JSON before proceeding.
111
- python -c 'import json,sys; json.load(open(sys.argv[1]))' "$PROSE_FILE"
109
+ # Off-host (ADR-0021): when np-codebase-documenter routes to an openai-compat
110
+ # provider (agent_routing), run it via spawn-offhost INSTEAD of the native host
111
+ # dispatch below.
112
+ DOCUMENTER_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-codebase-documenter --kind 2>/dev/null || echo native)
113
+ if [ "$DOCUMENTER_KIND" = "openai-compat" ]; then
114
+ # np-codebase-documenter is NOT Rule-9-audited and writes ONLY
115
+ # .nubos-pilot/codebase/ artefacts (inside the repo cwd — NOT live code), so it
116
+ # runs off-host with the default cwd (repo root): Read/Grep/Glob over the repo +
117
+ # Write confined to cwd. NO --allow-bash and NO worktree (no live-code blast
118
+ # radius to isolate). The agent writes its module doc JSON itself, inside cwd.
119
+ DOC_PROMPT="${TMPDIR:-/tmp}/np-offhost-documenter-${MODULE_ID}.md"
120
+ # … render the SAME buildDocumenterPrompt(facts) prompt the native dispatch
121
+ # below describes PLUS $LANG_DIRECTIVE into "$DOC_PROMPT" …
122
+ node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
123
+ --agent np-codebase-documenter --task-file "$DOC_PROMPT" \
124
+ --phase scan >/dev/null
125
+ else
126
+ # Host dispatches agent with buildDocumenterPrompt(facts) and writes JSON
127
+ # to $PROSE_FILE. Validate JSON before proceeding.
128
+ python -c 'import json,sys; json.load(open(sys.argv[1]))' "$PROSE_FILE"
129
+ fi
112
130
  ```
113
131
 
114
132
  Batch pacing: the user opted into batches during Step 1. Between batches,
@@ -129,6 +129,8 @@ The auditor reads `REQUIREMENTS.md`, filters to the milestone's declared require
129
129
  ```bash
130
130
  START=$(node .nubos-pilot/bin/np-tools.cjs metrics start-timestamp)
131
131
  MODEL=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-nyquist-auditor --profile frontier)
132
+ AUDITOR_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-nyquist-auditor --json 2>/dev/null \
133
+ | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).kind||"native")}catch{console.log("native")}})')
132
134
 
133
135
  # Build the read list from the init payload:
134
136
  SLICE_PLANS=$(find "$MILESTONE_DIR/slices" -maxdepth 2 -name 'S*-PLAN.md' 2>/dev/null)
@@ -136,19 +138,34 @@ SLICE_SUMMARIES=$(find "$MILESTONE_DIR/slices" -maxdepth 2 -name 'S*-SUMMARY.md'
136
138
  TASK_PLANS=$(find "$MILESTONE_DIR/slices" -path '*/tasks/*/T*-PLAN.md' 2>/dev/null)
137
139
  TASK_SUMMARIES=$(find "$MILESTONE_DIR/slices" -path '*/tasks/*/T*-SUMMARY.md' 2>/dev/null)
138
140
 
139
- # Spawn agent=np-nyquist-auditor model=$MODEL
140
- # input: slice_plans, slice_summaries, task_plans, task_summaries, validation_path,
141
- # template_path, requirements_path, milestone_dir, milestone, milestone_id
142
- # output: $VALIDATION_PATH with per-requirement Nyquist scoring
143
- # (COVERED / UNDER_SAMPLED / UNCOVERED), using templates/VALIDATION.md as skeleton.
144
-
145
- END=$(node .nubos-pilot/bin/np-tools.cjs metrics end-timestamp)
146
- node .nubos-pilot/bin/np-tools.cjs metrics record \
147
- --agent np-nyquist-auditor --tier haiku --resolved-model "$MODEL" \
148
- --phase "$PHASE" --plan "$PLAN_ID" --task "$TASK_ID" \
149
- --started "$START" --ended "$END" \
150
- --tokens-in "${TOKENS_IN:-0}" --tokens-out "${TOKENS_OUT:-0}" \
151
- --retry-count 0 --status ok --runtime "$RUNTIME"
141
+ if [ "$AUDITOR_KIND" = "openai-compat" ]; then
142
+ # Off-host (ADR-0021): np-nyquist-auditor is NOT Rule-9-audited and writes ONLY
143
+ # $VALIDATION_PATH (M<NNN>-VALIDATION.md) under .nubos-pilot/ (inside the repo
144
+ # cwd NOT live code), so it runs off-host with the default cwd: Read/Grep/Glob
145
+ # over the repo + Write confined to cwd. NO --allow-bash, NO worktree. It writes
146
+ # the file from templates/VALIDATION.md exactly as the native auditor does (the
147
+ # orchestrator's output-lint check is unchanged). spawn-offhost self-records.
148
+ AUDITOR_PROMPT="${TMPDIR:-/tmp}/np-offhost-nyquist-${MILESTONE_ID}.md"
149
+ # … render the SAME auditor prompt (read list above + $VALIDATION_SCHEMA +
150
+ # template_path + requirements_path + the EXACT output path $VALIDATION_PATH)
151
+ # PLUS $LANG_DIRECTIVE into "$AUDITOR_PROMPT"
152
+ node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
153
+ --agent np-nyquist-auditor --task-file "$AUDITOR_PROMPT" \
154
+ --phase "$PHASE" --plan "$PLAN_ID" --task "$TASK_ID" >/dev/null
155
+ else
156
+ # Spawn agent=np-nyquist-auditor model=$MODEL (native host spawn)
157
+ # input: slice_plans, slice_summaries, task_plans, task_summaries, validation_path,
158
+ # template_path, requirements_path, milestone_dir, milestone, milestone_id
159
+ # output: $VALIDATION_PATH with per-requirement Nyquist scoring
160
+ # (COVERED / UNDER_SAMPLED / UNCOVERED), using templates/VALIDATION.md as skeleton.
161
+ END=$(node .nubos-pilot/bin/np-tools.cjs metrics end-timestamp)
162
+ node .nubos-pilot/bin/np-tools.cjs metrics record \
163
+ --agent np-nyquist-auditor --tier haiku --resolved-model "$MODEL" \
164
+ --phase "$PHASE" --plan "$PLAN_ID" --task "$TASK_ID" \
165
+ --started "$START" --ended "$END" \
166
+ --tokens-in "${TOKENS_IN:-0}" --tokens-out "${TOKENS_OUT:-0}" \
167
+ --retry-count 0 --status ok --runtime "$RUNTIME"
168
+ fi
152
169
  ```
153
170
 
154
171
  ## Validation Gate