nubos-pilot 1.2.4 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -1
- package/README.md +2 -1
- package/SECURITY.md +3 -4
- package/bin/np-tools/_commands.cjs +1 -0
- package/bin/np-tools/learnings.cjs +1 -1
- package/bin/np-tools/resolve-model.cjs +55 -1
- package/bin/np-tools/resolve-model.test.cjs +139 -0
- package/bin/np-tools/security.cjs +1 -1
- package/bin/np-tools/spawn-headless.cjs +100 -1
- package/bin/np-tools/spawn-headless.test.cjs +108 -58
- package/bin/np-tools/spawn-offhost.cjs +93 -0
- package/bin/np-tools/spawn-offhost.test.cjs +38 -0
- package/lib/agents.cjs +16 -2
- package/lib/config-schema.cjs +5 -1
- package/lib/learnings/extract.cjs +4 -4
- package/lib/learnings/extract.test.cjs +8 -8
- package/lib/model-providers.cjs +118 -0
- package/lib/model-providers.test.cjs +85 -0
- package/lib/runtime/agent-loop.cjs +64 -0
- package/lib/runtime/agent-loop.test.cjs +135 -0
- package/lib/runtime/dispatch.cjs +174 -0
- package/lib/runtime/dispatch.test.cjs +193 -0
- package/lib/runtime/preflight.cjs +68 -0
- package/lib/runtime/preflight.test.cjs +62 -0
- package/lib/runtime/providers/openai-compat.cjs +102 -0
- package/lib/runtime/providers/openai-compat.test.cjs +103 -0
- package/lib/runtime/tools/index.cjs +415 -0
- package/lib/runtime/tools/index.test.cjs +230 -0
- package/lib/security/review.cjs +4 -4
- package/lib/security/review.test.cjs +6 -6
- package/np-tools.cjs +1 -0
- package/package.json +1 -1
- package/workflows/add-tests.md +41 -0
- package/workflows/architect-phase.md +19 -0
- package/workflows/discuss-phase.md +29 -10
- package/workflows/execute-phase.md +93 -4
- package/workflows/plan-phase.md +57 -16
- package/workflows/research-phase.md +45 -0
- package/workflows/scan-codebase.md +21 -3
- package/workflows/validate-phase.md +30 -13
- package/workflows/verify-work.md +17 -0
|
@@ -100,7 +100,7 @@ test('REV-5 parseReviewerOutput handles claude -p envelope, fences, and junk', (
|
|
|
100
100
|
assert.equal(junk.parse_ok, false);
|
|
101
101
|
});
|
|
102
102
|
|
|
103
|
-
test('REV-6 runReview guard blocks a concurrent review (no double spawn)', () => {
|
|
103
|
+
test('REV-6 runReview guard blocks a concurrent review (no double spawn)', async () => {
|
|
104
104
|
const dir = tempRepo();
|
|
105
105
|
const sid = freshSid();
|
|
106
106
|
try {
|
|
@@ -108,21 +108,21 @@ test('REV-6 runReview guard blocks a concurrent review (no double spawn)', () =>
|
|
|
108
108
|
fs.appendFileSync(path.join(dir, 'app.js'), 'const z = eval(q);\n');
|
|
109
109
|
ledger.tryBeginReview(sid, {}); // simulate an in-flight review
|
|
110
110
|
let spawnCalls = 0;
|
|
111
|
-
const r = review.runReview({ cwd: dir, sid, mode: 'stop', config: {}, spawnImpl: () => { spawnCalls++; return '{}'; } });
|
|
111
|
+
const r = await review.runReview({ cwd: dir, sid, mode: 'stop', config: {}, spawnImpl: () => { spawnCalls++; return '{}'; } });
|
|
112
112
|
assert.equal(r.ran, false);
|
|
113
113
|
assert.equal(r.reason, 'in-flight');
|
|
114
114
|
assert.equal(spawnCalls, 0);
|
|
115
115
|
} finally { ledger.endReview(sid); cleanup(sid); fs.rmSync(dir, { recursive: true, force: true }); }
|
|
116
116
|
});
|
|
117
117
|
|
|
118
|
-
test('REV-7 runReview spawns, parses, and merges risk findings into the ledger', () => {
|
|
118
|
+
test('REV-7 runReview spawns, parses, and merges risk findings into the ledger', async () => {
|
|
119
119
|
const dir = tempRepo();
|
|
120
120
|
const sid = freshSid();
|
|
121
121
|
try {
|
|
122
122
|
ledger.setBaseline(sid, { head: headOf(dir) });
|
|
123
123
|
fs.appendFileSync(path.join(dir, 'app.js'), 'const z = eval(q);\n');
|
|
124
124
|
const stub = () => JSON.stringify({ result: '{"status":"risks-found","findings":[{"category":"dynamic-exec","severity":"high","file":"app.js","line":2,"title":"eval"}]}' });
|
|
125
|
-
const r = review.runReview({ cwd: dir, sid, mode: 'stop', config: {}, spawnImpl: stub });
|
|
125
|
+
const r = await review.runReview({ cwd: dir, sid, mode: 'stop', config: {}, spawnImpl: stub });
|
|
126
126
|
assert.equal(r.ran, true);
|
|
127
127
|
assert.equal(r.findings_added, 1);
|
|
128
128
|
const taken = ledger.takeUnsurfacedRisks(sid, {});
|
|
@@ -130,13 +130,13 @@ test('REV-7 runReview spawns, parses, and merges risk findings into the ledger',
|
|
|
130
130
|
} finally { cleanup(sid); fs.rmSync(dir, { recursive: true, force: true }); }
|
|
131
131
|
});
|
|
132
132
|
|
|
133
|
-
test('REV-8 runReview on an empty diff does not spawn', () => {
|
|
133
|
+
test('REV-8 runReview on an empty diff does not spawn', async () => {
|
|
134
134
|
const dir = tempRepo();
|
|
135
135
|
const sid = freshSid();
|
|
136
136
|
try {
|
|
137
137
|
ledger.setBaseline(sid, { head: headOf(dir) });
|
|
138
138
|
let spawnCalls = 0;
|
|
139
|
-
const r = review.runReview({ cwd: dir, sid, mode: 'stop', config: {}, spawnImpl: () => { spawnCalls++; return '{}'; } });
|
|
139
|
+
const r = await review.runReview({ cwd: dir, sid, mode: 'stop', config: {}, spawnImpl: () => { spawnCalls++; return '{}'; } });
|
|
140
140
|
assert.equal(r.findings_added, 0);
|
|
141
141
|
assert.equal(spawnCalls, 0);
|
|
142
142
|
} finally { cleanup(sid); fs.rmSync(dir, { recursive: true, force: true }); }
|
package/np-tools.cjs
CHANGED
|
@@ -108,6 +108,7 @@ const topLevelCommands = {
|
|
|
108
108
|
'loop-stuck': require('./bin/np-tools/loop-stuck.cjs'),
|
|
109
109
|
'loop-metrics': require('./bin/np-tools/loop-metrics.cjs'),
|
|
110
110
|
'spawn-headless': require('./bin/np-tools/spawn-headless.cjs'),
|
|
111
|
+
'spawn-offhost': require('./bin/np-tools/spawn-offhost.cjs'),
|
|
111
112
|
'security': require('./bin/np-tools/security.cjs'),
|
|
112
113
|
'learning-log': require('./bin/np-tools/learning-log.cjs'),
|
|
113
114
|
'learning-match': require('./bin/np-tools/learning-match.cjs'),
|
package/package.json
CHANGED
package/workflows/add-tests.md
CHANGED
|
@@ -47,6 +47,47 @@ Once the UAT file is emitted and the smoke-run is green, the orchestrator spawns
|
|
|
47
47
|
- No `test.skip(...)` without a corresponding Fail / Defer marker.
|
|
48
48
|
- No vacuous assertions.
|
|
49
49
|
|
|
50
|
+
Off-host dispatch (ADR-0021): if `np-critic` routes to an `openai-compat` provider, run the single review read-only via `spawn-offhost` instead of a native Agent spawn. The off-host critic CANNOT write to `$TMPDIR` (off-host Write is cwd-confined), so it emits the findings object `{ "critic":"critic", "findings":[…] }` as its FINAL MESSAGE; the orchestrator writes that to the critic-report path only after asserting the `critic` axis ∈ {critic,style,tests,acceptance} — any other value is silently dropped by `mergeCriticOutputs` (project_np_critic_field_schema_bug), so fail loud instead.
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
LANG_DIRECTIVE=$(node .nubos-pilot/bin/np-tools.cjs lang-directive)
|
|
54
|
+
mkdir -p "${TMPDIR:-/tmp}/nubos-pilot/critic-reports"
|
|
55
|
+
CRITIC_REPORT_PATH="${TMPDIR:-/tmp}/nubos-pilot/critic-reports/critic-addtests-${PHASE}.json"
|
|
56
|
+
UAT_FILE=$(echo "$INIT" | node -e "process.stdin.on('data', d => console.log(JSON.parse(d).target_path))")
|
|
57
|
+
VERIFICATION_FILE=$(echo "$INIT" | node -e "process.stdin.on('data', d => console.log(JSON.parse(d).verification_path))")
|
|
58
|
+
CRITIC_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-critic --json 2>/dev/null \
|
|
59
|
+
| node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).kind||"native")}catch{console.log("native")}})')
|
|
60
|
+
if [ "$CRITIC_KIND" = "openai-compat" ]; then
|
|
61
|
+
# Render the tests-axis-weighted critic prompt (UAT file + VERIFICATION.md as
|
|
62
|
+
# files_to_read) PLUS the emit-as-final-message instruction PLUS $LANG_DIRECTIVE
|
|
63
|
+
# into a TMPDIR file, then spawn read-only.
|
|
64
|
+
ADDTESTS_CRITIC_PROMPT="${TMPDIR:-/tmp}/np-offhost-addtests-critic-${PHASE}.md"
|
|
65
|
+
cat > "$ADDTESTS_CRITIC_PROMPT" <<EOF
|
|
66
|
+
You are np-critic reviewing a UAT regression suite (tests-axis weighted).
|
|
67
|
+
|
|
68
|
+
files_to_read:
|
|
69
|
+
- ${UAT_FILE}
|
|
70
|
+
- ${VERIFICATION_FILE}
|
|
71
|
+
|
|
72
|
+
Verify, weighting the tests-axis (verify-mismatch, missing-test, weak-assertion, silenced-failure):
|
|
73
|
+
- Every Pass-case in VERIFICATION.md has a corresponding test in the UAT file.
|
|
74
|
+
- Every test name describes observable behaviour.
|
|
75
|
+
- No test.skip(...) without a corresponding Fail / Defer marker.
|
|
76
|
+
- No vacuous assertions.
|
|
77
|
+
|
|
78
|
+
Emit ONLY the findings JSON object { "critic":"critic", "findings":[…] } as your final message.
|
|
79
|
+
|
|
80
|
+
${LANG_DIRECTIVE}
|
|
81
|
+
EOF
|
|
82
|
+
OFFHOST_CRITIC_OUT=$(node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
|
|
83
|
+
--agent np-critic --task-file "$ADDTESTS_CRITIC_PROMPT" --read-only)
|
|
84
|
+
echo "$OFFHOST_CRITIC_OUT" | CRITIC_REPORT_PATH="$CRITIC_REPORT_PATH" node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{let env,f;try{env=JSON.parse(s);f=JSON.parse(env.content)}catch{console.error("off-host critic: final message is not the findings JSON object");process.exit(1)}var SUP=["critic","style","tests","acceptance"];if(!f||typeof f!=="object"||SUP.indexOf(f.critic)<0){console.error("off-host critic: `critic` must be one of "+SUP.join("/")+" — any other value is silently dropped by mergeCriticOutputs (project_np_critic_field_schema_bug)");process.exit(1)}require("fs").writeFileSync(process.env.CRITIC_REPORT_PATH,JSON.stringify(f))})' || exit 1
|
|
85
|
+
else
|
|
86
|
+
# → native Agent spawn of np-critic writes $CRITIC_REPORT_PATH (the existing path).
|
|
87
|
+
true
|
|
88
|
+
fi
|
|
89
|
+
```
|
|
90
|
+
|
|
50
91
|
Findings of category `missing-test`, `weak-assertion`, `silenced-failure`, or `verify-mismatch` route per `lib/nubosloop.cjs::routeFindings`. A single Build-Fixer-style round on `init add-tests` closes the loop. Beyond one round the workflow exits non-zero and the user resolves manually.
|
|
51
92
|
|
|
52
93
|
This is intentionally a one-pass adversarial review (not the full Critic-Schwarm) — the UAT-emitter is mechanical and only one axis (test quality) needs adversarial coverage.
|
|
@@ -70,10 +70,14 @@ When the user invokes `/np:architect-phase <N> --research` (or when `swarm.resea
|
|
|
70
70
|
|
|
71
71
|
The architect then consumes the consensus-merged `RESEARCH.md` instead of a single-spawn output. ADR-0011 details the merge rules and the `<consensus_meta>` audit block.
|
|
72
72
|
|
|
73
|
+
**Off-host (ADR-0021):** the Schwarm reuses the exact off-host `np-researcher` mechanism documented in `/np:research-phase` — detect `resolve-model np-researcher --kind`; when `openai-compat`, run the `k` spawns via `spawn-offhost --agent np-researcher --task-id "${MILESTONE_ID}-S000-T0000" --no-audit` (synthetic milestone-level canonical task-id for the Rule-9 ledger) and stamp one `loop-audit-tool-use` per spawn. **Offline only** — the off-host toolset has no `WebFetch`/`context7`, so route `np-researcher` native for online research.
|
|
74
|
+
|
|
73
75
|
## Adversarial Loop (1 round)
|
|
74
76
|
|
|
75
77
|
After the architect emits `M<NNN>-ARCHITECTURE.md`, the orchestrator spawns ONE `np-critic` instance with the architecture file + `M<NNN>-CONTEXT.md` as inputs. The critic verifies that every locked decision in CONTEXT has a corresponding architecture entry and that no `Deferred` items leaked into the architecture. Findings of category `unmet-criterion`, `locked-decision-violation`, or `information-missing` route per `lib/nubosloop.cjs::routeFindings`. A single Build-Fixer-style round on the architect closes the loop. Beyond one round the workflow exits with `stuck` and the user resolves manually — architecture decisions don't merit unbounded looping.
|
|
76
78
|
|
|
79
|
+
**Off-host (ADR-0021):** when `np-critic` routes to an `openai-compat` provider (`resolve-model np-critic --kind`), run it `--read-only` via `spawn-offhost --agent np-critic` exactly as the `/np:execute-phase` off-host critic — the critic emits its `{ "critic":"critic", "findings":[…] }` object as the final message (off-host Write is cwd-confined, cannot reach `$TMPDIR`), and the orchestrator persists it only after asserting `critic ∈ {critic,style,tests,acceptance}` (else fail loud — `project_np_critic_field_schema_bug`).
|
|
80
|
+
|
|
77
81
|
## Skills (Nubos library)
|
|
78
82
|
|
|
79
83
|
Nubos ships a design-time skill library under `.claude/skills/np-*/` (present only on Claude Code). These are the **quality bar for the architecture decisions you are about to commit** — each skill's "Verification bar" is the standard each ADR-style decision is held to. Before spawning `np-architect`, classify the milestone (read `M<NNN>-CONTEXT.md` + `M<NNN>-RESEARCH.md`) and inject the matching skill triggers into the architect's spawn prompt. Skills **stack** — include every row the milestone matches (cap at the most relevant ~4 if more match; always keep the security row when it applies).
|
|
@@ -113,6 +117,21 @@ If zero skills match, omit the skill-directive line — do not invent skills.
|
|
|
113
117
|
Der Agent ist read-only auf Source — er schreibt EINE Datei:
|
|
114
118
|
`.nubos-pilot/milestones/M<NNN>/M<NNN>-ARCHITECTURE.md`.
|
|
115
119
|
|
|
120
|
+
**Off-host (ADR-0021):** when `np-architect` routes to an `openai-compat` provider, run it via `spawn-offhost` instead of the host spawn. `np-architect` is NOT Rule-9-audited and writes only the one `M<NNN>-ARCHITECTURE.md` artefact under `.nubos-pilot/` (inside the repo cwd — never live code), so it runs off-host with the default cwd (Read/Grep/Glob over the repo + Write confined to cwd), **no `--allow-bash`, no worktree**. It writes the file exactly as the native architect; no emit-and-persist contract is needed.
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
ARCHITECT_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-architect --json 2>/dev/null \
|
|
124
|
+
| node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).kind||"native")}catch{console.log("native")}})')
|
|
125
|
+
if [ "$ARCHITECT_KIND" = "openai-compat" ]; then
|
|
126
|
+
ARCHITECT_PROMPT="${TMPDIR:-/tmp}/np-offhost-architect-M<NNN>.md"
|
|
127
|
+
# … render the SAME files_to_read block + Milestone/Task + (matched) skill
|
|
128
|
+
# directive above, PLUS $LANG_DIRECTIVE, into "$ARCHITECT_PROMPT" …
|
|
129
|
+
node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
|
|
130
|
+
--agent np-architect --task-file "$ARCHITECT_PROMPT" --phase "M<NNN>" >/dev/null
|
|
131
|
+
fi
|
|
132
|
+
# else → native host spawn per the block above.
|
|
133
|
+
```
|
|
134
|
+
|
|
116
135
|
## Post
|
|
117
136
|
|
|
118
137
|
Wenn der Agent `## CONTEXT CONFLICT` emittiert statt der Datei:
|
|
@@ -372,6 +372,7 @@ CONTEXT.md now captures the decisions. Success Criteria in `roadmap.yaml` are st
|
|
|
372
372
|
```bash
|
|
373
373
|
SC_START=$(node .nubos-pilot/bin/np-tools.cjs metrics start-timestamp)
|
|
374
374
|
SC_MODEL=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-sc-extractor --profile balanced)
|
|
375
|
+
SC_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-sc-extractor --kind 2>/dev/null || echo native)
|
|
375
376
|
|
|
376
377
|
REQS_PATH=".nubos-pilot/REQUIREMENTS.md"
|
|
377
378
|
|
|
@@ -393,17 +394,35 @@ EXISTING_SC_JSON=$(node .nubos-pilot/bin/np-tools.cjs phase-meta "$PHASE" --fiel
|
|
|
393
394
|
# call `node .nubos-pilot/bin/np-tools.cjs update-phase-meta $PHASE --stdin`
|
|
394
395
|
# with {"success_criteria": [{id:"SC-N", text:"..."}, ...]} on its stdin,
|
|
395
396
|
# and print a one-line summary.
|
|
396
|
-
# Guard: the SC_COUNT check below
|
|
397
|
-
#
|
|
397
|
+
# Guard: the SC_COUNT check below hard-aborts if the spawn returns zero criteria.
|
|
398
|
+
# Off-host (ADR-0021): when np-sc-extractor routes to an openai-compat provider,
|
|
399
|
+
# run it via spawn-offhost (below) INSTEAD of the Agent tool.
|
|
398
400
|
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
401
|
+
if [ "$SC_KIND" = "openai-compat" ]; then
|
|
402
|
+
# np-sc-extractor is NOT Rule-9-audited and writes ONLY roadmap.yaml under
|
|
403
|
+
# .nubos-pilot/ (inside the repo cwd — NOT live code), so it runs off-host
|
|
404
|
+
# with the default cwd (repo root): Read/Grep/Glob over the repo + Write
|
|
405
|
+
# confined to cwd. NO --allow-bash, NO worktree (no live-code blast radius
|
|
406
|
+
# to isolate). It persists success_criteria into roadmap.yaml exactly as the
|
|
407
|
+
# native extractor does — no emit-and-persist contract needed.
|
|
408
|
+
# spawn-offhost records the metrics row itself.
|
|
409
|
+
SC_PROMPT="${TMPDIR:-/tmp}/np-offhost-sc-extractor-${MILESTONE_ID}.md"
|
|
410
|
+
# … render the SAME prompt the ACTION CONTRACT above describes (milestone,
|
|
411
|
+
# milestone_id, milestone_dir, context_path, requirements_path,
|
|
412
|
+
# existing_success_criteria) PLUS $LANG_DIRECTIVE into "$SC_PROMPT" …
|
|
413
|
+
node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
|
|
414
|
+
--agent np-sc-extractor --task-file "$SC_PROMPT" \
|
|
415
|
+
--phase "$PHASE" >/dev/null
|
|
416
|
+
else
|
|
417
|
+
# → execute the Agent call per ACTION CONTRACT above (native host spawn), then:
|
|
418
|
+
SC_END=$(node .nubos-pilot/bin/np-tools.cjs metrics end-timestamp)
|
|
419
|
+
node .nubos-pilot/bin/np-tools.cjs metrics record \
|
|
420
|
+
--agent np-sc-extractor --tier haiku --resolved-model "$SC_MODEL" \
|
|
421
|
+
--phase "$PHASE" --plan "${MILESTONE_ID}-sc" --task "${MILESTONE_ID}-sc-extract" \
|
|
422
|
+
--started "$SC_START" --ended "$SC_END" \
|
|
423
|
+
--tokens-in "${TOKENS_IN:-0}" --tokens-out "${TOKENS_OUT:-0}" \
|
|
424
|
+
--retry-count 0 --status ok --runtime "$RUNTIME"
|
|
425
|
+
fi
|
|
407
426
|
```
|
|
408
427
|
|
|
409
428
|
After the spawn, sanity-check that `success_criteria` is non-empty:
|
|
@@ -168,7 +168,7 @@ Every task runs through the **Nubosloop** (ADR-0010, `lib/nubosloop.cjs`) — pr
|
|
|
168
168
|
|
|
169
169
|
1. **Pre-flight cache lookup** (Round 1 only) — `loop-run-round --phase preflight --query "$TASK_QUERY"`. A hit at similarity ≥ `swarm.research.threshold` and `occurrence ≥ swarm.research.minOccurrence` short-circuits the Researcher-Schwarm; the cached pattern enters the Executor prompt with provenance `[CACHED]`. Soft cache failures (adapter-unknown) downgrade to a miss with `cache_miss_reason` populated; hard failures (corrupt store, version mismatch) propagate.
|
|
170
170
|
2. **Researcher-Schwarm (on cache miss, or on `next_action=researcher` re-route)** — orchestrator spawns `swarm.research.k=3` independent `np-researcher` agents IN PARALLEL (single message, three Agent blocks) and merges their outputs through `lib/researcher-swarm.cjs::mergeConsensus` (Mehrheit / Union / Schnittmenge). The merged consensus enters the Executor prompt with provenance.
|
|
171
|
-
3. **Executor (R1) or Build-Fixer (R≥2)** — single LLM spawn. Round 1 spawns `agents/np-executor.md`. Round ≥ 2 spawns `agents/np-build-fixer.md` with prior critic findings + verify output appended. Edits ONLY paths in `files_modified` (D-04 — no scope expansion). Does NOT call `commit-task`.
|
|
171
|
+
3. **Executor (R1) or Build-Fixer (R≥2)** — single LLM spawn. Round 1 spawns `agents/np-executor.md`. Round ≥ 2 spawns `agents/np-build-fixer.md` with prior critic findings + verify output appended. Edits ONLY paths in `files_modified` (D-04 — no scope expansion). Does NOT call `commit-task`. **Off-host (ADR-0021):** when the executor agent routes to an `openai-compat` provider (`agent_routing`), the spawn runs through `spawn-offhost` inside a forced slice worktree instead of the host Agent tool — see the off-host branch in the spawn block below. It satisfies Rule 9 via an injected native `knowledge-search` tool, and the orchestrator runs the same Step-4 audit stamp.
|
|
172
172
|
4. **Mechanical Checks (orchestrator, NOT the agent)** — run task's `<verify>` command + stack linters (`phpstan`, `pint`, `tsc`, `eslint`); capture exit code + output to `$VERIFY_LOG`. Then `loop-audit-tool-use "$TASK_ID" --agent "$EXECUTOR_AGENT" --tool-use-log <json>` confirms the spawn invoked a knowledge-search tool ≥ 1× (Rule 9). The audited agent satisfies Rule 9 by running `node np-tools.cjs knowledge-search "<query>" --task "$TASK_ID"` via Bash, then stamping the exact string `knowledge-search` in `--tool-use-log`. The full accepted set is the `SEARCH_TOOLS` constant in `lib/nubosloop.cjs`; that constant is the single source of truth — do not re-enumerate it here. Audit findings get round-stamped and feed `loop-evaluate` alongside critic findings. Then call `loop-run-round --phase post-executor --verify-exit-code "$VERIFY_EXIT" --verify-output-path "$VERIFY_LOG"`. On verify-red the verb returns `next_action: spawn-build-fixer` — skip critics, advance to next round directly.
|
|
173
173
|
5. **Critic (verify-green only)** — one Critic agent spawns: `agents/np-critic.md` (sonnet). It writes the full findings JSON to `$CRITIC_REPORT_PATH` and emits a small verdict envelope as its final message (ADR-0010 §L5 Verdict-Only Contract, 2026-05-05). Single-critic revision per §Trust Layer 2026-05-05 — the prior 3-critic schwarm collapsed because three parallel spawns added latency without proportional finding-quality gains; the Verdict-Only Contract on top reduces per-round main-context tokens by an order of magnitude (verbatim findings reports were the dominant Nubosloop cost-driver).
|
|
174
174
|
6. **Route** — `loop-run-round --phase post-critics --critic-outputs-path "$CRITIC_REPORT_PATH"` (or legacy `--critic-outputs "$CRITIC_JSON"` when the Verdict-Only Contract is unavailable) returns `next_action ∈ {commit, executor, researcher, askuser, plan-checker, stuck}`:
|
|
@@ -359,7 +359,40 @@ for WAVE_INDEX in 0 1 2 ...; do
|
|
|
359
359
|
if { [ "$ROUND" -eq 1 ] && [ "$CACHE_HIT" != "true" ]; } || [ "$NEXT_ACTION" = "researcher" ]; then
|
|
360
360
|
SPAWN_SPECS=$(echo "$PREFLIGHT" | node -e \
|
|
361
361
|
'process.stdin.on("data",d=>{const j=JSON.parse(d);process.stdout.write(JSON.stringify((j.swarm&&j.swarm.spawn_specs)||[]))})')
|
|
362
|
-
#
|
|
362
|
+
# Off-host researcher swarm (ADR-0021): if np-researcher routes to an
|
|
363
|
+
# openai-compat provider, run $SWARM_K read-only spawns via spawn-offhost.
|
|
364
|
+
# np-researcher is Rule-9-audited → --task-id injects knowledge-search;
|
|
365
|
+
# read-only ⇒ no worktree needed. Each spawn MUST emit the per-spawn
|
|
366
|
+
# consensus JSON { decisions[], risks[], patterns[], open_questions[],
|
|
367
|
+
# sources[] } that researcher-merge consumes (NOT the researcher-output
|
|
368
|
+
# markdown artifact — that schema is for M<NNN>-RESEARCH.md, a different
|
|
369
|
+
# contract). A spawn whose output is not that JSON is substituted with an
|
|
370
|
+
# empty {} so researcher-merge degrades gracefully instead of aborting the
|
|
371
|
+
# wave (exit 4). --no-audit defers the Rule-9 stamp to the orchestrator's
|
|
372
|
+
# group-(2) loop-audit-tool-use (one per spawn) so the post-researcher
|
|
373
|
+
# SKIP-GUARD is satisfied exactly as for native spawns.
|
|
374
|
+
RESEARCHER_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-researcher --json 2>/dev/null \
|
|
375
|
+
| node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).kind||"native")}catch{console.log("native")}})')
|
|
376
|
+
if [ "$RESEARCHER_KIND" = "openai-compat" ]; then
|
|
377
|
+
SWARM_K=$(echo "$PREFLIGHT" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{const j=JSON.parse(s);console.log((j.swarm&&j.swarm.k)||3)})')
|
|
378
|
+
SPAWN_OUT_PATHS=()
|
|
379
|
+
i=0
|
|
380
|
+
while [ "$i" -lt "$SWARM_K" ]; do
|
|
381
|
+
OUT_PATH="${TMPDIR:-/tmp}/np-spawn-${TASK_ID}-r${ROUND}-${i}.json"
|
|
382
|
+
R_PROMPT="${TMPDIR:-/tmp}/np-offhost-researcher-${TASK_ID}-r${ROUND}-${i}.md"
|
|
383
|
+
# … render researcher prompt i (task_query verbatim + seed_delta[i] + files_to_read) …
|
|
384
|
+
R_OUT=$(node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
|
|
385
|
+
--agent np-researcher --task-file "$R_PROMPT" --task-id "$TASK_ID" \
|
|
386
|
+
--read-only --no-audit ${SLICE_CWD:+--cwd "$SLICE_CWD"})
|
|
387
|
+
echo "$R_OUT" | OUT_PATH="$OUT_PATH" node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{let c="";try{c=JSON.parse(s).content||""}catch{}var ok=false;try{JSON.parse(c);ok=true}catch{}if(!ok){process.stderr.write("off-host researcher: spawn output is not the {decisions,risks,patterns,open_questions,sources} JSON researcher-merge expects — substituting empty consensus for this spawn\n");c="{}"}require("fs").writeFileSync(process.env.OUT_PATH,c)})'
|
|
388
|
+
R_LOG=$(echo "$R_OUT" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.stringify((JSON.parse(s).toolLog||[]).map(t=>t.name)))}catch{console.log("[]")}})')
|
|
389
|
+
node .nubos-pilot/bin/np-tools.cjs loop-audit-tool-use "$TASK_ID" --agent np-researcher --tool-use-log "$R_LOG"
|
|
390
|
+
SPAWN_OUT_PATHS+=("$OUT_PATH")
|
|
391
|
+
i=$((i+1))
|
|
392
|
+
done
|
|
393
|
+
else
|
|
394
|
+
true # → execute groups (1) + (2) per ACTION CONTRACT above (native Agent spawns).
|
|
395
|
+
fi
|
|
363
396
|
CONSENSUS_PATTERN=$(node .nubos-pilot/bin/researcher-merge.cjs \
|
|
364
397
|
"${SPAWN_OUT_PATHS[@]}")
|
|
365
398
|
node .nubos-pilot/bin/np-tools.cjs loop-run-round "$TASK_ID" --phase post-researcher
|
|
@@ -420,7 +453,43 @@ for WAVE_INDEX in 0 1 2 ...; do
|
|
|
420
453
|
else
|
|
421
454
|
EXECUTOR_MODEL=$(node .nubos-pilot/bin/np-tools.cjs resolve-model "$EXECUTOR_AGENT" --profile frontier)
|
|
422
455
|
fi
|
|
423
|
-
|
|
456
|
+
|
|
457
|
+
# ━━━ Off-host executor (ADR-0021) — config-driven via agent_routing ━━━
|
|
458
|
+
# If $EXECUTOR_AGENT routes to an openai-compat provider, run it through
|
|
459
|
+
# the nubos-pilot dispatch loop (spawn-offhost) instead of the host Agent
|
|
460
|
+
# tool. Off-host REQUIRES worktree isolation: the existing per-wave worktree
|
|
461
|
+
# (created at §Worktree-Isolation above) confines model-driven Write/Edit/
|
|
462
|
+
# Bash, and the slice-end ff-merge is what lands the work on the parent
|
|
463
|
+
# branch. The worktree lives under .nubos-pilot/worktrees/, so checkpoint /
|
|
464
|
+
# search-evidence / metrics still resolve to the project root — only file
|
|
465
|
+
# ops are confined. We do NOT force a worktree out of band: doing so would
|
|
466
|
+
# bypass the merge-back gate (commits stranded) and the orchestrator's
|
|
467
|
+
# cwd=worktree convention (commit-task would find nothing). The orchestrator
|
|
468
|
+
# runs the canonical Step-4 loop-audit-tool-use with the returned tool-log,
|
|
469
|
+
# so spawn-offhost is called --no-audit to avoid double-stamping the round.
|
|
470
|
+
EXECUTOR_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model "$EXECUTOR_AGENT" --json 2>/dev/null \
|
|
471
|
+
| node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).kind||"native")}catch{console.log("native")}})')
|
|
472
|
+
if [ "$EXECUTOR_KIND" = "openai-compat" ]; then
|
|
473
|
+
if [ "$WORKTREE_ISOLATION" != "true" ] || [ -z "$SLICE_CWD" ] || [ "$SLICE_CWD" = "." ]; then
|
|
474
|
+
echo "[np:execute-phase] off-host executor ($EXECUTOR_AGENT) requires workflow.worktree_isolation=true so model-driven edits are confined and ff-merged back. Enable it (config-set workflow.worktree_isolation true) and re-run." >&2
|
|
475
|
+
exit 1
|
|
476
|
+
fi
|
|
477
|
+
# Write the SAME rendered executor prompt you would have handed the Agent
|
|
478
|
+
# tool (task plan + slice context + consensus + success criteria +
|
|
479
|
+
# language directive + skill block) to this file:
|
|
480
|
+
OFFHOST_PROMPT="${TMPDIR:-/tmp}/np-offhost-${TASK_ID}-r${ROUND}.md"
|
|
481
|
+
# … render prompt to "$OFFHOST_PROMPT" …
|
|
482
|
+
OFFHOST_OUT=$(node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
|
|
483
|
+
--agent "$EXECUTOR_AGENT" --task-file "$OFFHOST_PROMPT" \
|
|
484
|
+
--task-id "$TASK_ID" --cwd "$SLICE_CWD" --allow-bash --no-audit)
|
|
485
|
+
# Harvest the tool-name log for the Layer-C audit stamp (Step 4).
|
|
486
|
+
EXECUTOR_TOOL_LOG=$(echo "$OFFHOST_OUT" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.stringify((JSON.parse(s).toolLog||[]).map(t=>t.name)))}catch{console.log("[]")}})')
|
|
487
|
+
else
|
|
488
|
+
# → execute group (1) per ACTION CONTRACT above (native host Agent spawn);
|
|
489
|
+
# EXECUTOR_TOOL_LOG is harvested from the spawn's tool_use stream.
|
|
490
|
+
true
|
|
491
|
+
fi
|
|
492
|
+
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
424
493
|
|
|
425
494
|
node .nubos-pilot/bin/np-tools.cjs checkpoint transition "$TASK_ID" verifying
|
|
426
495
|
|
|
@@ -510,7 +579,27 @@ for WAVE_INDEX in 0 1 2 ...; do
|
|
|
510
579
|
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
511
580
|
mkdir -p "${TMPDIR:-/tmp}/nubos-pilot/critic-reports"
|
|
512
581
|
CRITIC_REPORT_PATH="${TMPDIR:-/tmp}/nubos-pilot/critic-reports/critic-${TASK_ID}-r${ROUND}.json"
|
|
513
|
-
#
|
|
582
|
+
# Off-host critic (ADR-0021): if np-critic routes to an openai-compat
|
|
583
|
+
# provider, run it read-only via spawn-offhost. It CANNOT write to $TMPDIR
|
|
584
|
+
# (off-host Write is cwd-confined), so it emits the findings object
|
|
585
|
+
# { "critic":"critic", "findings":[…], "criteria"?:[…] } as its FINAL
|
|
586
|
+
# MESSAGE; the orchestrator writes that to $CRITIC_REPORT_PATH only after
|
|
587
|
+
# asserting the `critic` axis ∈ {critic,style,tests,acceptance} — any other
|
|
588
|
+
# value is silently dropped by mergeCriticOutputs
|
|
589
|
+
# (project_np_critic_field_schema_bug), so fail loud instead.
|
|
590
|
+
CRITIC_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-critic --json 2>/dev/null \
|
|
591
|
+
| node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).kind||"native")}catch{console.log("native")}})')
|
|
592
|
+
if [ "$CRITIC_KIND" = "openai-compat" ]; then
|
|
593
|
+
OFFHOST_CRITIC_PROMPT="${TMPDIR:-/tmp}/np-offhost-critic-${TASK_ID}-r${ROUND}.md"
|
|
594
|
+
# … render the critic prompt (same files_to_read as group (2)) PLUS:
|
|
595
|
+
# "Emit ONLY the findings JSON object as your final message." …
|
|
596
|
+
OFFHOST_CRITIC_OUT=$(node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
|
|
597
|
+
--agent np-critic --task-file "$OFFHOST_CRITIC_PROMPT" --read-only ${SLICE_CWD:+--cwd "$SLICE_CWD"})
|
|
598
|
+
echo "$OFFHOST_CRITIC_OUT" | CRITIC_REPORT_PATH="$CRITIC_REPORT_PATH" node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{let env,f;try{env=JSON.parse(s);f=JSON.parse(env.content)}catch{console.error("off-host critic: final message is not the findings JSON object");process.exit(1)}var SUP=["critic","style","tests","acceptance"];if(!f||typeof f!=="object"||SUP.indexOf(f.critic)<0){console.error("off-host critic: `critic` must be one of "+SUP.join("/")+" — any other value is silently dropped by mergeCriticOutputs (project_np_critic_field_schema_bug)");process.exit(1)}require("fs").writeFileSync(process.env.CRITIC_REPORT_PATH,JSON.stringify(f))})' || exit 1
|
|
599
|
+
else
|
|
600
|
+
# → execute group (2) per ACTION CONTRACT above (native Agent spawn writes $CRITIC_REPORT_PATH).
|
|
601
|
+
true
|
|
602
|
+
fi
|
|
514
603
|
node .nubos-pilot/bin/np-tools.cjs loop-audit-tool-use "$TASK_ID" --agent np-critic --tool-use-log '[]'
|
|
515
604
|
POST_CRIT=$(node .nubos-pilot/bin/np-tools.cjs loop-run-round "$TASK_ID" \
|
|
516
605
|
--phase post-critics --critic-outputs-path "$CRITIC_REPORT_PATH")
|
package/workflows/plan-phase.md
CHANGED
|
@@ -241,17 +241,38 @@ for ITER in 1 2; do
|
|
|
241
241
|
# <prior_findings>$LAST_FINDINGS</prior_findings> (path to verdict JSON, R≥2)
|
|
242
242
|
# <agent_skills>$AGENT_SKILLS_PLANNER</agent_skills>
|
|
243
243
|
# Agent MUST: write/update slice plans inside $milestone_dir.
|
|
244
|
+
# Off-host (ADR-0021): when np-planner routes to an openai-compat provider
|
|
245
|
+
# (agent_routing), run it via spawn-offhost (below) INSTEAD of the Agent tool.
|
|
244
246
|
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
245
247
|
PLANNER_START=$(node .nubos-pilot/bin/np-tools.cjs metrics start-timestamp)
|
|
246
248
|
PLANNER_MODEL=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-planner --profile frontier)
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
249
|
+
PLANNER_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-planner --json 2>/dev/null \
|
|
250
|
+
| node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).kind||"native")}catch{console.log("native")}})')
|
|
251
|
+
if [ "$PLANNER_KIND" = "openai-compat" ]; then
|
|
252
|
+
# np-planner is NOT Rule-9-audited and writes ONLY planning artefacts under
|
|
253
|
+
# .nubos-pilot/ (inside the repo cwd — NOT live code), so it runs off-host
|
|
254
|
+
# with the default cwd (repo root): Read/Grep/Glob over the whole repo + Write
|
|
255
|
+
# confined to cwd. NO --allow-bash and NO worktree (there is no live-code
|
|
256
|
+
# blast radius to isolate, unlike the executor). It writes slice plans into
|
|
257
|
+
# $milestone_dir exactly as the native planner does — no emit-and-persist
|
|
258
|
+
# contract needed. spawn-offhost records the metrics row itself.
|
|
259
|
+
PLANNER_PROMPT="${TMPDIR:-/tmp}/np-offhost-planner-${milestone_id}-i${ITER}.md"
|
|
260
|
+
# … render the SAME prompt the ACTION CONTRACT above describes (mode,
|
|
261
|
+
# milestone, milestone_dir, goal, requirements, prior_findings,
|
|
262
|
+
# agent_skills) PLUS $LANG_DIRECTIVE into "$PLANNER_PROMPT" …
|
|
263
|
+
node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
|
|
264
|
+
--agent np-planner --task-file "$PLANNER_PROMPT" \
|
|
265
|
+
--phase "$PHASE" --plan "${milestone_id}-plan" >/dev/null
|
|
266
|
+
else
|
|
267
|
+
# → execute the Agent call per ACTION CONTRACT above (native host spawn), then:
|
|
268
|
+
PLANNER_END=$(node .nubos-pilot/bin/np-tools.cjs metrics end-timestamp)
|
|
269
|
+
node .nubos-pilot/bin/np-tools.cjs metrics record \
|
|
270
|
+
--agent np-planner --tier opus --resolved-model "$PLANNER_MODEL" \
|
|
271
|
+
--phase "$PHASE" --plan "${milestone_id}-plan" --task "${milestone_id}-planner-run" \
|
|
272
|
+
--started "$PLANNER_START" --ended "$PLANNER_END" \
|
|
273
|
+
--tokens-in "${TOKENS_IN:-0}" --tokens-out "${TOKENS_OUT:-0}" \
|
|
274
|
+
--retry-count 0 --status ok --runtime "$RUNTIME"
|
|
275
|
+
fi
|
|
255
276
|
|
|
256
277
|
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
257
278
|
# ACTION CONTRACT — Step 2b: Spawn np-plan-checker (immediately after 2a)
|
|
@@ -265,17 +286,37 @@ for ITER in 1 2; do
|
|
|
265
286
|
# Agent MUST: read planner output (slice plans inside $milestone_dir),
|
|
266
287
|
# write YAML verdict to $milestone_dir/.tmp-verdict-$ITER.yaml. Orchestrator
|
|
267
288
|
# converts YAML → JSON at $VERDICT_JSON_PATH (next bash section).
|
|
289
|
+
# Off-host (ADR-0021): when np-plan-checker routes to an openai-compat provider,
|
|
290
|
+
# run it via spawn-offhost (below) INSTEAD of the Agent tool.
|
|
268
291
|
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
269
292
|
CHECKER_START=$(node .nubos-pilot/bin/np-tools.cjs metrics start-timestamp)
|
|
270
293
|
CHECKER_MODEL=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-plan-checker --profile frontier)
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
294
|
+
CHECKER_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-plan-checker --json 2>/dev/null \
|
|
295
|
+
| node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).kind||"native")}catch{console.log("native")}})')
|
|
296
|
+
if [ "$CHECKER_KIND" = "openai-compat" ]; then
|
|
297
|
+
# np-plan-checker is NOT Rule-9-audited and writes ONLY the verdict YAML under
|
|
298
|
+
# $milestone_dir (inside the repo cwd), so it runs off-host with the default
|
|
299
|
+
# cwd: Read/Grep/Glob over the repo + Write confined to cwd. NO --allow-bash,
|
|
300
|
+
# NO worktree. It writes $milestone_dir/.tmp-verdict-$ITER.yaml exactly as the
|
|
301
|
+
# native checker does (the orchestrator's YAML→JSON step is unchanged).
|
|
302
|
+
# spawn-offhost records the metrics row itself.
|
|
303
|
+
CHECKER_PROMPT="${TMPDIR:-/tmp}/np-offhost-plan-checker-${milestone_id}-i${ITER}.md"
|
|
304
|
+
# … render the SAME prompt the ACTION CONTRACT above describes (milestone,
|
|
305
|
+
# milestone_dir, agent_skills) PLUS $LANG_DIRECTIVE, and MUST state the exact
|
|
306
|
+
# output path $milestone_dir/.tmp-verdict-$ITER.yaml, into "$CHECKER_PROMPT" …
|
|
307
|
+
node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
|
|
308
|
+
--agent np-plan-checker --task-file "$CHECKER_PROMPT" \
|
|
309
|
+
--phase "$PHASE" --plan "${milestone_id}-plan" >/dev/null
|
|
310
|
+
else
|
|
311
|
+
# → execute the Agent call per ACTION CONTRACT above (native host spawn), then:
|
|
312
|
+
CHECKER_END=$(node .nubos-pilot/bin/np-tools.cjs metrics end-timestamp)
|
|
313
|
+
node .nubos-pilot/bin/np-tools.cjs metrics record \
|
|
314
|
+
--agent np-plan-checker --tier opus --resolved-model "$CHECKER_MODEL" \
|
|
315
|
+
--phase "$PHASE" --plan "${milestone_id}-plan" --task "${milestone_id}-planner-run" \
|
|
316
|
+
--started "$CHECKER_START" --ended "$CHECKER_END" \
|
|
317
|
+
--tokens-in "${TOKENS_IN:-0}" --tokens-out "${TOKENS_OUT:-0}" \
|
|
318
|
+
--retry-count 0 --status ok --runtime "$RUNTIME"
|
|
319
|
+
fi
|
|
279
320
|
|
|
280
321
|
VERDICT_JSON_PATH="$milestone_dir/.tmp-verdict-$ITER.json"
|
|
281
322
|
# (verdict JSON: {status: passed|issues_found, findings: [...] })
|
|
@@ -253,8 +253,36 @@ omit the `model:` parameter at spawn (Phase 8 D-22 inherit-pattern).
|
|
|
253
253
|
```bash
|
|
254
254
|
RESEARCHER_START=$(node .nubos-pilot/bin/np-tools.cjs metrics start-timestamp)
|
|
255
255
|
RESEARCHER_MODEL=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-researcher --profile balanced)
|
|
256
|
+
RESEARCHER_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-researcher --kind 2>/dev/null || echo native)
|
|
256
257
|
```
|
|
257
258
|
|
|
259
|
+
**Off-host (ADR-0021):** when `np-researcher` routes to an `openai-compat` provider, run the `$SWARM_K` spawns via `spawn-offhost` instead of the abstract host spawn. Two specifics for this audited agent:
|
|
260
|
+
|
|
261
|
+
- **Synthetic canonical task-id.** `np-researcher` is Rule-9-audited and `dispatchOffHost` requires a `M<NNN>-S<NNN>-T<NNNN>` id for the search-evidence ledger + audit. Research is milestone-level (no real slice/task), so mint the synthetic id `${MILESTONE_ID}-S000-T0000` (the `S000-T0000` suffix is the documented "milestone-level, no slice/task" convention). The injected native `knowledge-search` tool satisfies Rule 9; the orchestrator stamps one `loop-audit-tool-use` per spawn.
|
|
262
|
+
- **Offline only.** The off-host toolset has **no `WebFetch`/`context7`** — an off-host researcher can only do `$MODE == offline` (knowledge-search) research. If `$MODE == online`, keep `np-researcher` native (or accept offline-only research for that spawn). This is a capability bound, surfaced loudly, not a silent degrade.
|
|
263
|
+
|
|
264
|
+
Each spawn writes its own `$RESEARCH_DIR/spawn-<i>.md` (inside the repo cwd), so it runs write-enabled (NOT `--read-only`), no `--allow-bash`, no worktree.
|
|
265
|
+
|
|
266
|
+
```bash
|
|
267
|
+
if [ "$RESEARCHER_KIND" = "openai-compat" ]; then
|
|
268
|
+
OFFHOST_TASK_ID="${MILESTONE_ID}-S000-T0000"
|
|
269
|
+
i=0
|
|
270
|
+
while [ "$i" -lt "${SWARM_K:-3}" ]; do
|
|
271
|
+
R_PROMPT="${TMPDIR:-/tmp}/np-offhost-researcher-${MILESTONE_ID}-${i}.md"
|
|
272
|
+
# … render spawn-spec i (files_to_read + goal + requirements + seed_delta[i] +
|
|
273
|
+
# $SPAWN_SCHEMA + the EXACT output path $RESEARCH_DIR/spawn-${i}.md) PLUS
|
|
274
|
+
# $LANG_DIRECTIVE into "$R_PROMPT" …
|
|
275
|
+
R_OUT=$(node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
|
|
276
|
+
--agent np-researcher --task-file "$R_PROMPT" --task-id "$OFFHOST_TASK_ID" --no-audit)
|
|
277
|
+
R_LOG=$(echo "$R_OUT" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.stringify((JSON.parse(s).toolLog||[]).map(t=>t.name)))}catch{console.log("[]")}})')
|
|
278
|
+
node .nubos-pilot/bin/np-tools.cjs loop-audit-tool-use "$OFFHOST_TASK_ID" --agent np-researcher --tool-use-log "$R_LOG"
|
|
279
|
+
i=$((i+1))
|
|
280
|
+
done
|
|
281
|
+
fi
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
When `$RESEARCHER_KIND = native`, use the abstract host spawn below (the Phase 8 runtime adapter binds it):
|
|
285
|
+
|
|
258
286
|
```text
|
|
259
287
|
Spawn agent=np-researcher tier=sonnet model=$RESEARCHER_MODEL mode=$MODE phase=$PHASE context=$CONTEXT_PATH output=$RESEARCH_PATH
|
|
260
288
|
```
|
|
@@ -351,6 +379,23 @@ Spawn agent=np-researcher-reconciler tier=sonnet model=$RECONCILER_MODEL phase=$
|
|
|
351
379
|
schema_prompt=$RECONCILER_SCHEMA
|
|
352
380
|
```
|
|
353
381
|
|
|
382
|
+
**Off-host (ADR-0021):** when `np-researcher-reconciler` routes to an `openai-compat` provider, run it via `spawn-offhost` instead of the host spawn. The reconciler is NOT Rule-9-audited and writes only the single `$RESEARCH_PATH` (`M<NNN>-RESEARCH.md`) artefact under `.nubos-pilot/` (inside the repo cwd — never live code), so it runs off-host with the default cwd (Read/Grep/Glob over the spawn outputs + Write confined to cwd), **no `--allow-bash`, no worktree**. It writes `$RESEARCH_PATH` exactly as the native reconciler; no emit-and-persist contract is needed. spawn-offhost self-records.
|
|
383
|
+
|
|
384
|
+
```bash
|
|
385
|
+
RECONCILER_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-researcher-reconciler --json 2>/dev/null \
|
|
386
|
+
| node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).kind||"native")}catch{console.log("native")}})')
|
|
387
|
+
if [ "$RECONCILER_KIND" = "openai-compat" ]; then
|
|
388
|
+
RECONCILER_PROMPT="${TMPDIR:-/tmp}/np-offhost-reconciler-${MILESTONE_ID}.md"
|
|
389
|
+
# … render the SAME reconciler input (spawn_paths + merge_path + merged_json +
|
|
390
|
+
# context_path + $RECONCILER_SCHEMA + the EXACT final_path=$RESEARCH_PATH) PLUS
|
|
391
|
+
# $LANG_DIRECTIVE into "$RECONCILER_PROMPT" …
|
|
392
|
+
node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
|
|
393
|
+
--agent np-researcher-reconciler --task-file "$RECONCILER_PROMPT" \
|
|
394
|
+
--phase "$PHASE" --plan "$PLAN_ID" --task "$TASK_ID" >/dev/null
|
|
395
|
+
fi
|
|
396
|
+
# else → native host spawn per the block above.
|
|
397
|
+
```
|
|
398
|
+
|
|
354
399
|
The reconciler classifies each consensus decision's reasoning-trace as `identical | overlapping | orthogonal | unknown` (groupthink detection), picks each contested decision with documented reason, and writes `$RESEARCH_PATH` with `agreement_score` and `contested_count` in frontmatter.
|
|
355
400
|
|
|
356
401
|
```bash
|
|
@@ -106,9 +106,27 @@ is runtime-agnostic — pick whichever dispatch mechanism your host supports.
|
|
|
106
106
|
|
|
107
107
|
```bash
|
|
108
108
|
PROSE_FILE=$(mktemp -t np-prose-XXXXXX.json)
|
|
109
|
-
#
|
|
110
|
-
#
|
|
111
|
-
|
|
109
|
+
# Off-host (ADR-0021): when np-codebase-documenter routes to an openai-compat
|
|
110
|
+
# provider (agent_routing), run it via spawn-offhost INSTEAD of the native host
|
|
111
|
+
# dispatch below.
|
|
112
|
+
DOCUMENTER_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-codebase-documenter --kind 2>/dev/null || echo native)
|
|
113
|
+
if [ "$DOCUMENTER_KIND" = "openai-compat" ]; then
|
|
114
|
+
# np-codebase-documenter is NOT Rule-9-audited and writes ONLY
|
|
115
|
+
# .nubos-pilot/codebase/ artefacts (inside the repo cwd — NOT live code), so it
|
|
116
|
+
# runs off-host with the default cwd (repo root): Read/Grep/Glob over the repo +
|
|
117
|
+
# Write confined to cwd. NO --allow-bash and NO worktree (no live-code blast
|
|
118
|
+
# radius to isolate). The agent writes its module doc JSON itself, inside cwd.
|
|
119
|
+
DOC_PROMPT="${TMPDIR:-/tmp}/np-offhost-documenter-${MODULE_ID}.md"
|
|
120
|
+
# … render the SAME buildDocumenterPrompt(facts) prompt the native dispatch
|
|
121
|
+
# below describes PLUS $LANG_DIRECTIVE into "$DOC_PROMPT" …
|
|
122
|
+
node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
|
|
123
|
+
--agent np-codebase-documenter --task-file "$DOC_PROMPT" \
|
|
124
|
+
--phase scan >/dev/null
|
|
125
|
+
else
|
|
126
|
+
# Host dispatches agent with buildDocumenterPrompt(facts) and writes JSON
|
|
127
|
+
# to $PROSE_FILE. Validate JSON before proceeding.
|
|
128
|
+
python -c 'import json,sys; json.load(open(sys.argv[1]))' "$PROSE_FILE"
|
|
129
|
+
fi
|
|
112
130
|
```
|
|
113
131
|
|
|
114
132
|
Batch pacing: the user opted into batches during Step 1. Between batches,
|
|
@@ -129,6 +129,8 @@ The auditor reads `REQUIREMENTS.md`, filters to the milestone's declared require
|
|
|
129
129
|
```bash
|
|
130
130
|
START=$(node .nubos-pilot/bin/np-tools.cjs metrics start-timestamp)
|
|
131
131
|
MODEL=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-nyquist-auditor --profile frontier)
|
|
132
|
+
AUDITOR_KIND=$(node .nubos-pilot/bin/np-tools.cjs resolve-model np-nyquist-auditor --json 2>/dev/null \
|
|
133
|
+
| node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).kind||"native")}catch{console.log("native")}})')
|
|
132
134
|
|
|
133
135
|
# Build the read list from the init payload:
|
|
134
136
|
SLICE_PLANS=$(find "$MILESTONE_DIR/slices" -maxdepth 2 -name 'S*-PLAN.md' 2>/dev/null)
|
|
@@ -136,19 +138,34 @@ SLICE_SUMMARIES=$(find "$MILESTONE_DIR/slices" -maxdepth 2 -name 'S*-SUMMARY.md'
|
|
|
136
138
|
TASK_PLANS=$(find "$MILESTONE_DIR/slices" -path '*/tasks/*/T*-PLAN.md' 2>/dev/null)
|
|
137
139
|
TASK_SUMMARIES=$(find "$MILESTONE_DIR/slices" -path '*/tasks/*/T*-SUMMARY.md' 2>/dev/null)
|
|
138
140
|
|
|
139
|
-
|
|
140
|
-
#
|
|
141
|
-
#
|
|
142
|
-
#
|
|
143
|
-
#
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
141
|
+
if [ "$AUDITOR_KIND" = "openai-compat" ]; then
|
|
142
|
+
# Off-host (ADR-0021): np-nyquist-auditor is NOT Rule-9-audited and writes ONLY
|
|
143
|
+
# $VALIDATION_PATH (M<NNN>-VALIDATION.md) under .nubos-pilot/ (inside the repo
|
|
144
|
+
# cwd — NOT live code), so it runs off-host with the default cwd: Read/Grep/Glob
|
|
145
|
+
# over the repo + Write confined to cwd. NO --allow-bash, NO worktree. It writes
|
|
146
|
+
# the file from templates/VALIDATION.md exactly as the native auditor does (the
|
|
147
|
+
# orchestrator's output-lint check is unchanged). spawn-offhost self-records.
|
|
148
|
+
AUDITOR_PROMPT="${TMPDIR:-/tmp}/np-offhost-nyquist-${MILESTONE_ID}.md"
|
|
149
|
+
# … render the SAME auditor prompt (read list above + $VALIDATION_SCHEMA +
|
|
150
|
+
# template_path + requirements_path + the EXACT output path $VALIDATION_PATH)
|
|
151
|
+
# PLUS $LANG_DIRECTIVE into "$AUDITOR_PROMPT" …
|
|
152
|
+
node .nubos-pilot/bin/np-tools.cjs spawn-offhost \
|
|
153
|
+
--agent np-nyquist-auditor --task-file "$AUDITOR_PROMPT" \
|
|
154
|
+
--phase "$PHASE" --plan "$PLAN_ID" --task "$TASK_ID" >/dev/null
|
|
155
|
+
else
|
|
156
|
+
# Spawn agent=np-nyquist-auditor model=$MODEL (native host spawn)
|
|
157
|
+
# input: slice_plans, slice_summaries, task_plans, task_summaries, validation_path,
|
|
158
|
+
# template_path, requirements_path, milestone_dir, milestone, milestone_id
|
|
159
|
+
# output: $VALIDATION_PATH with per-requirement Nyquist scoring
|
|
160
|
+
# (COVERED / UNDER_SAMPLED / UNCOVERED), using templates/VALIDATION.md as skeleton.
|
|
161
|
+
END=$(node .nubos-pilot/bin/np-tools.cjs metrics end-timestamp)
|
|
162
|
+
node .nubos-pilot/bin/np-tools.cjs metrics record \
|
|
163
|
+
--agent np-nyquist-auditor --tier haiku --resolved-model "$MODEL" \
|
|
164
|
+
--phase "$PHASE" --plan "$PLAN_ID" --task "$TASK_ID" \
|
|
165
|
+
--started "$START" --ended "$END" \
|
|
166
|
+
--tokens-in "${TOKENS_IN:-0}" --tokens-out "${TOKENS_OUT:-0}" \
|
|
167
|
+
--retry-count 0 --status ok --runtime "$RUNTIME"
|
|
168
|
+
fi
|
|
152
169
|
```
|
|
153
170
|
|
|
154
171
|
## Validation Gate
|