nubos-pilot 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/agents/np-researcher-reconciler.md +115 -0
  2. package/agents/np-researcher.md +16 -0
  3. package/bin/np-tools/_commands.cjs +2 -0
  4. package/bin/np-tools/doctor.cjs +49 -0
  5. package/bin/np-tools/execute-milestone.cjs +10 -2
  6. package/bin/np-tools/execute-milestone.test.cjs +33 -0
  7. package/bin/np-tools/output-lint.cjs +113 -0
  8. package/bin/np-tools/researcher-reconcile.cjs +118 -0
  9. package/lib/agents.test.cjs +1 -0
  10. package/lib/archive-parsers.test.cjs +170 -0
  11. package/lib/archive.cjs +135 -18
  12. package/lib/fixtures/researcher/spawn-0-good.md +58 -0
  13. package/lib/fixtures/researcher/spawn-1-good.md +53 -0
  14. package/lib/fixtures/researcher/spawn-2-disagrees.md +53 -0
  15. package/lib/fixtures/validation/clean-frontmatter.md +36 -0
  16. package/lib/fixtures/validation/issues-frontmatter.md +46 -0
  17. package/lib/fixtures/validation/legacy-no-frontmatter.md +24 -0
  18. package/lib/fixtures/verification/deferred-with-rationale.md +22 -0
  19. package/lib/fixtures/verification/failed-mixed.md +17 -0
  20. package/lib/fixtures/verification/h2-emdash-verified.md +21 -0
  21. package/lib/fixtures/verification/h3-colon-verified.md +34 -0
  22. package/lib/output-lint.cjs +332 -0
  23. package/lib/output-lint.test.cjs +287 -0
  24. package/lib/researcher-reconciler.cjs +356 -0
  25. package/lib/researcher-reconciler.test.cjs +359 -0
  26. package/lib/schemas/index.cjs +40 -0
  27. package/lib/schemas/research-final.cjs +108 -0
  28. package/lib/schemas/researcher-output.cjs +91 -0
  29. package/lib/schemas/validation.cjs +72 -0
  30. package/lib/schemas/verification.cjs +62 -0
  31. package/lib/verify.cjs +133 -13
  32. package/lib/verify.test.cjs +95 -0
  33. package/np-tools.cjs +2 -0
  34. package/package.json +1 -1
  35. package/workflows/execute-phase.md +33 -5
  36. package/workflows/research-phase.md +139 -7
  37. package/workflows/validate-phase.md +28 -1
  38. package/workflows/verify-work.md +32 -0
@@ -0,0 +1,115 @@
1
+ ---
2
+ name: np-researcher-reconciler
3
+ description: Stage-2 reconciler for the researcher swarm (ADR-0018). Reads the k per-spawn outputs + the deterministic-merge proposal, classifies reasoning-trace agreement, surfaces contested decisions, writes the final M<NNN>-RESEARCH.md. READ-ONLY on inputs; single Write target.
4
+ tier: sonnet
5
+ tools: Read, Write, Bash, Grep, Glob
6
+ color: violet
7
+ ---
8
+
9
+ <role>
10
+ You are the nubos-pilot **Researcher Reconciler**. The swarm has already run: k parallel `np-researcher` spawns each produced one `spawn-<i>.md` against the same `<task_query>` with a unique `<seed_delta>`. The deterministic `lib/researcher-swarm.cjs::mergeConsensus` produced a Mehrheit/Union/Schnittmenge proposal. Your job is the second pass — read all of it, weigh reasoning traces (not just conclusions), and write the final `M<NNN>-RESEARCH.md` that the planner will consume.
11
+
12
+ You are READ-ONLY on inputs. You Write exactly one file: `M<NNN>-RESEARCH.md` at the path the orchestrator hands you. You never modify the per-spawn outputs, the merge proposal, or any source code.
13
+
14
+ Your output is the **truth of record** for the swarm: it includes a Reconciler Summary, a final Decisions section, an explicit Contested Decisions section, and Final-{Risks, Patterns, Open Questions, Sources}. Frontmatter exposes machine-readable signals (agreement_score, contested_count, reconciler_verdict) that the disagreement hard-gate consumes.
15
+ </role>
16
+
17
+ ## Completeness Mandate
18
+
19
+ This agent operates under [`templates/COMPLETENESS.md`](../templates/COMPLETENESS.md). The rules that bind this role:
20
+
21
+ - **Rule 5 — Aim to genuinely impress.** Surface disagreements; never bury them. A swarm with 1 contested decision and 4 agreed is more useful than a tidy 5-agreed list that papered over a real split.
22
+ - **Rule 9 — Search before building.** Your work is comparison, not new research. Do not invent decisions absent from the spawns; do not silently drop decisions the merge demoted.
23
+ - **Rule 11 — Ship the complete thing.** Every consolidated decision gets a `Reasoning-Trace-Agreement` classification (`identical | overlapping | orthogonal | unknown`). Every contested decision gets a per-spawn verdict citation and your pick + the reason.
24
+ - **Rule 12 — Boil the ocean.** If you cannot pick a contested decision deterministically, classify the reconciler_verdict as `needs_re_spawn` and document the unresolved evidence question — never coin a new claim to break the tie.
25
+
26
+ Refusal of any rule is a hard-stop. Surface the violation to the orchestrator verbatim and abort the spawn.
27
+
28
+ ## Inputs
29
+
30
+ The orchestrator provides these in your prompt context. Read every path via `Read` — do not guess.
31
+
32
+ | Input | Purpose | Typical path |
33
+ |-------|---------|--------------|
34
+ | `<spawn_paths>` (k entries, required) | Per-spawn researcher outputs. Each follows the `researcher-output` schema. | `.nubos-pilot/milestones/M<NNN>/research/spawn-<i>.md` |
35
+ | `<merge_path>` (required) | Deterministic `mergeConsensus` proposal. | `.nubos-pilot/milestones/M<NNN>/research/merge.md` |
36
+ | `<merged_json>` (in prompt) | Same deterministic data as JSON — `final_decisions`, `contested`, `agreement.decisions`, etc. — emitted by `node .nubos-pilot/bin/np-tools.cjs researcher-reconcile prepare <N>`. Use this as the structured truth; the merge.md is its human render. |
37
+ | `<context_paths>` (recommended) | `M<NNN>-CONTEXT.md`, `M<NNN>-ROADMAP.md` for grounding. | `.nubos-pilot/milestones/M<NNN>/...` |
38
+ | `<task_query>` | The original research question that all spawns answered. | inline in prompt |
39
+ | `<final_path>` (required) | The exact path you write your output to. | `.nubos-pilot/milestones/M<NNN>/M<NNN>-RESEARCH.md` |
40
+ | `<schema_prompt>` (required, verbatim contract) | The `research-final` schema rendered as a Markdown contract. Your output MUST conform — the workflow lints with `output-lint --enforce` and re-spawns you on violation. | injected by workflow via `output-lint prompt --schema research-final` |
41
+
42
+ ## Decision policy
43
+
44
+ 1. **Consensus decisions** (≥ ⌈k/2⌉ spawns agree on a decision text) go into `## Final Decisions` in spawn-order of first appearance. Cite all `from_spawns: [0, 1, 2]`. Classify the `Reasoning-Trace-Agreement`:
45
+ - **identical** — same wording / same evidence chain. Possible groupthink; lower the consolidated confidence one notch.
46
+ - **overlapping** — different prose, overlapping evidence. Default classification; consolidated confidence = max of cited spawns' confidences.
47
+ - **orthogonal** — different prose, different evidence (different sources, different reasoning paths). Strongest signal; consolidated confidence = `high`.
48
+ - **unknown** — < 2 spawns provided a `**Reasoning:**` block. Cite the missing reasoning, do not promote confidence.
49
+
50
+ 2. **Contested decisions** (only one spawn proposes a decision text not in any other spawn) go into `## Contested Decisions`. For each:
51
+ - Quote the spawn-i text + Reasoning + Evidence verbatim.
52
+ - State whether you `Pick`, `Discard`, or mark `Unresolved`.
53
+ - Cite the reason: contradiction with locked decisions in CONTEXT.md, evidence conflict with another spawn, missing reasoning, etc.
54
+ - If you `Unresolved`, set `reconciler_verdict: needs_re_spawn` in frontmatter.
55
+
56
+ 3. **Risks**: union of all spawn risks. Deduplicate by normalized text. Severity = max across cited spawns.
57
+
58
+ 4. **Patterns**: only patterns cited by ≥ `min(2, k)` spawns enter `## Final Patterns`. Solo patterns drop silently (they were noise, by definition).
59
+
60
+ 5. **Open Questions**: union; if ≥ 2 spawns raised the same question, it's a real blocker — note in the Summary.
61
+
62
+ 6. **Sources**: union; deduplicate by URL/path; preserve each spawn's annotation.
63
+
64
+ ## Output schema
65
+
66
+ The orchestrator injects `<schema_prompt>` — that is the binding contract. Re-stating the high-level shape here for reference:
67
+
68
+ ```yaml
69
+ ---
70
+ schema_version: 2
71
+ milestone: "M<NNN>"
72
+ type: research
73
+ agent: np-researcher-reconciler
74
+ k: <int>
75
+ agreement_score: <float 0..1>
76
+ contested_count: <int>
77
+ reconciler_verdict: clean | issues_flagged | needs_re_spawn
78
+ decision_count: <int>
79
+ risk_count: <int>
80
+ pattern_count: <int>
81
+ open_question_count: <int>
82
+ source_count: <int>
83
+ ---
84
+ ```
85
+
86
+ Body sections (each must be present, even if `_None._`):
87
+
88
+ - `## Reconciler Summary` — narrative: what k was, how many decisions consolidated, how many contested, what the reasoning-trace distribution looked like, whether the swarm should be re-spawned with a sharper task_query.
89
+ - `## Final Decisions` — `### D-N: <text>` with `**Reconciled-from:** spawn-X, spawn-Y, ...`, `**Confidence (reconciled):** high|med|low`, `**Reasoning-Trace-Agreement:** identical|overlapping|orthogonal|unknown`, `**Evidence:** ...`, `**Reasoning:** ...` (synthesized from cited spawns).
90
+ - `## Contested Decisions` — `### CD-N: <text>` with `**Spawn-X says:** ...`, `**Spawn-Y says:** ...`, `**Reconciler verdict:** Pick spawn-X | Discard | Unresolved`, `**Reason:** ...`.
91
+ - `## Final Risks` — `### R-N: <text>` with `**Severity:** ...`, `**Mitigation:** ...`, `**Reasoning:** ...`.
92
+ - `## Final Patterns` — `### P-N: <text>` with `**Description:** ...`, `**Source-Type:** ...`, `**Reasoning:** ...`.
93
+ - `## Final Open Questions` — `### Q-N: <text>` with `**Why-blocked:** ...`.
94
+ - `## Sources` — `### S-N: <url-or-path>` with `**Type:** ...`, `**Notes:** ...`.
95
+
96
+ ## Hard-fail contract
97
+
98
+ The workflow runs `output-lint check --file <final_path> --schema research-final --enforce` immediately after your Write returns. Any violation (missing frontmatter key, wrong enum, missing section, `[object Object]` titles) aborts the workflow with exit 1 and the workflow re-spawns you with the violation list as feedback. **Do not patch by hand.**
99
+
100
+ ## Reconciler verdict guidance
101
+
102
+ Set `reconciler_verdict` in frontmatter as:
103
+
104
+ - **`clean`** — `contested_count == 0` AND `agreement_score >= 0.8` AND no `Unresolved` contested entries. The swarm converged.
105
+ - **`issues_flagged`** — contested decisions exist but you picked each one with documented reasoning. Workflow may continue, but downstream consumers (planner) should weight contested picks slightly lower.
106
+ - **`needs_re_spawn`** — at least one `Unresolved` contested entry OR `agreement_score < 0.5`. The workflow's disagreement hard-gate asks the user whether to re-spawn the swarm with a sharper task_query.
107
+
108
+ The disagreement hard-gate in the workflow keys on `agreement_score` and `contested_count` from your frontmatter. Honest values make the gate work; inflated values silently break it downstream.
109
+
110
+ ## What you do NOT do
111
+
112
+ - Do not Read or Write outside the provided paths (no source code, no roadmap mutation, no commits).
113
+ - Do not invent decisions, risks, patterns, or sources that are not in any spawn.
114
+ - Do not collapse identical reasoning into "orthogonal" just to inflate confidence — call groupthink for what it is.
115
+ - Do not silently demote a `needs_re_spawn` verdict to `issues_flagged` to avoid the askuser dialog.
@@ -29,6 +29,22 @@ This agent operates under [`templates/COMPLETENESS.md`](../templates/COMPLETENES
29
29
 
30
30
  Refusal of any rule is a hard-stop. Surface the violation to the orchestrator verbatim and abort the spawn.
31
31
 
32
+ ## Output Schema (ADR-0017 / ADR-0018)
33
+
34
+ When invoked under the swarm (default), you write to `.nubos-pilot/milestones/M<NNN>/research/spawn-<i>.md`, **not** to the milestone-level `RESEARCH.md`. The reconciler agent merges your output with the other spawns and produces the consumed `M<NNN>-RESEARCH.md`.
35
+
36
+ Your per-spawn output MUST conform to the **`researcher-output`** schema. The orchestrator injects the schema as a literal `<schema_prompt>` block in your spawn input. Treat it as contract, not advice — `output-lint check --schema researcher-output --enforce` runs immediately after your Write and re-spawns you on violation.
37
+
38
+ Hard rules from the schema:
39
+
40
+ - Frontmatter must include `schema_version`, `agent: np-researcher`, `spawn_index`, `seed_delta`, `task_query_hash`, plus count fields (`decision_count`, `risk_count`, etc.).
41
+ - Five body sections are pflichtig (use `_None._` if empty): `## Decisions`, `## Risks`, `## Patterns`, `## Open Questions`, `## Sources`.
42
+ - Every Decision / Risk / Pattern / Open Question / Source uses heading style `### <PREFIX>-N: <text>` where PREFIX ∈ {D, R, P, Q, S}.
43
+ - **Every entry has a `**Reasoning:**` field** (mandatory). The Reasoning field documents what you weighed, what you discarded, and why this conclusion. The reconciler compares `Reasoning` traces across spawns to detect groupthink (identical reasoning → low independent evidence) vs orthogonal evidence (different reasoning paths to same conclusion → strong signal).
44
+ - No `[object Object]` strings in headings — the linter blocks them.
45
+
46
+ If only one spawn is configured (legacy single-spawn mode), you write directly to `M<NNN>-RESEARCH.md` and the reconciler is skipped — but the schema requirements still hold.
47
+
32
48
  **First read — Codebase Docs (runtime-agnostic):** Before any external
33
49
  research, read `.nubos-pilot/codebase/INDEX.md` and the module docs for
34
50
  every area the phase will touch. Existing External Deps listed there are
@@ -8,6 +8,8 @@ const COMMANDS = [
8
8
  { name: 'research-phase', category: 'Planning', description: 'Milestone-level research (WebFetch + MCP; offline fallback)', description_de: 'Milestone-Recherche (WebFetch + MCP; Offline-Fallback)' },
9
9
  { name: 'plan-milestone', category: 'Planning', description: 'Plan a milestone: scaffolds slices + tasks', description_de: 'Plant einen Milestone: erzeugt Slices + Tasks' },
10
10
  { name: 'plan-lint', category: 'Planning', description: 'Mechanical Trust-Layer linter for PLAN.md (verify-command + parallel-race + over-specification). ADR-0013', description_de: 'Mechanischer Trust-Layer-Linter für PLAN.md (verify-command + parallel-race + Über-Spezifikation). ADR-0013' },
11
+ { name: 'output-lint', category: 'Review', description: 'Mechanical output-artifact linter (frontmatter + body + cross-field invariants). Verbs: check | prompt | list. Schemas in lib/schemas/. Hard-gates verify-work, validate-phase. ADR-0017', description_de: 'Mechanischer Output-Artefakt-Linter (Frontmatter + Body + Cross-Field-Invarianten). Verben: check | prompt | list. Schemas in lib/schemas/. Hard-Gate für verify-work, validate-phase. ADR-0017' },
12
+ { name: 'researcher-reconcile', category: 'Planning', description: 'Researcher-swarm reconciliation (ADR-0018). Verbs: parse-spawn --file | prepare <N> | gate <N>. Reads per-spawn outputs, applies reasoning-trace classification, surfaces contested decisions, hard-gates on agreement_score / contested_count.', description_de: 'Researcher-Schwarm-Reconciliation (ADR-0018). Verben: parse-spawn --file | prepare <N> | gate <N>. Liest Per-Spawn-Outputs, klassifiziert Reasoning-Trace, hebt Contested Decisions hervor, Hard-Gate auf agreement_score / contested_count.' },
11
13
  { name: 'new-project', category: 'Planning', description: 'Greenfield project init (PROJECT.md + REQUIREMENTS.md + M001 milestone)', description_de: 'Greenfield-Projekt-Init (PROJECT.md + REQUIREMENTS.md + M001-Milestone)' },
12
14
  { name: 'new-milestone', category: 'Planning', description: 'Append a new milestone (M<NNN>) to an existing project', description_de: 'Hängt einen neuen Milestone (M<NNN>) an ein bestehendes Projekt an' },
13
15
  { name: 'propose-milestones', category: 'Planning', description: 'Re-plan all not-yet-done milestones: AI proposes add/update/remove from PROJECT.md + REQUIREMENTS.md', description_de: 'Plant offene Milestones neu: KI schlägt add/update/remove aus PROJECT.md + REQUIREMENTS.md vor' },
@@ -11,6 +11,8 @@ const runtimeAssetsMod = require('../../lib/install/runtime-assets.cjs');
11
11
  const askuserMod = require('../../lib/askuser.cjs');
12
12
  const codebaseManifest = require('../../lib/codebase-manifest.cjs');
13
13
  const { scan: workspaceScan } = require('../../lib/workspace-scan.cjs');
14
+ const outputLint = require('../../lib/output-lint.cjs');
15
+ const { getSchema, inferSchemaForFile } = require('../../lib/schemas/index.cjs');
14
16
 
15
17
  const PAYLOAD_SUBPATH = path.join('.claude', 'nubos-pilot');
16
18
  const STATE_SUBPATH = '.nubos-pilot';
@@ -497,6 +499,52 @@ function _checkOrphanTmpFiles(projectRoot) {
497
499
  return issues;
498
500
  }
499
501
 
502
+ function _checkOutputSchemas(projectRoot) {
503
+ const issues = [];
504
+ const milestonesRoot = path.join(projectRoot, STATE_SUBPATH, 'milestones');
505
+ if (!fs.existsSync(milestonesRoot)) return issues;
506
+ let entries;
507
+ try { entries = fs.readdirSync(milestonesRoot, { withFileTypes: true }); }
508
+ catch { return issues; }
509
+ for (const ent of entries) {
510
+ if (!ent.isDirectory()) continue;
511
+ if (!/^M\d{3,}$/.test(ent.name)) continue;
512
+ const mDir = path.join(milestonesRoot, ent.name);
513
+ for (const suffix of ['-VERIFICATION.md', '-VALIDATION.md']) {
514
+ const file = path.join(mDir, ent.name + suffix);
515
+ if (!fs.existsSync(file)) continue;
516
+ const schemaName = inferSchemaForFile(file);
517
+ if (!schemaName) continue;
518
+ let result;
519
+ try {
520
+ result = outputLint.lintFile(file, getSchema(schemaName));
521
+ } catch (err) {
522
+ issues.push({
523
+ id: 'output-schema-lint-failed',
524
+ severity: 'error',
525
+ fixable: 'manual',
526
+ details: { file, schema: schemaName, cause: err && err.message },
527
+ });
528
+ continue;
529
+ }
530
+ if (!result.ok) {
531
+ issues.push({
532
+ id: 'output-schema-violation',
533
+ severity: 'error',
534
+ fixable: 'manual',
535
+ details: {
536
+ file,
537
+ schema: schemaName,
538
+ violation_count: result.violations.length,
539
+ violations: result.violations.slice(0, 10),
540
+ },
541
+ });
542
+ }
543
+ }
544
+ }
545
+ return issues;
546
+ }
547
+
500
548
  function _audit(projectRoot) {
501
549
  const scope = _readScope(projectRoot);
502
550
  const payloadDir = _payloadDirFor(projectRoot, scope);
@@ -514,6 +562,7 @@ function _audit(projectRoot) {
514
562
  issues.push(..._checkNubosloopKnowledgeStore(projectRoot));
515
563
  issues.push(..._checkNubosloopConfig(projectRoot));
516
564
  issues.push(..._checkOrphanTmpFiles(projectRoot));
565
+ issues.push(..._checkOutputSchemas(projectRoot));
517
566
  return { issues, _codexContent: codex.content };
518
567
  }
519
568
 
@@ -18,6 +18,10 @@ const textMode = require('../../lib/text-mode.cjs');
18
18
 
19
19
  const INLINE_THRESHOLD_BYTES = 16 * 1024;
20
20
 
21
+ function _hasVerifyWorkFlag(list) {
22
+ return Array.isArray(list) && list.some((a) => a === '--verify-work');
23
+ }
24
+
21
25
  function _validateMilestoneArg(raw) {
22
26
  if (raw == null || raw === '') {
23
27
  throw new NubosPilotError(
@@ -85,7 +89,7 @@ function _sliceTasksSorted(mNum, sNum, cwd) {
85
89
  }).filter(Boolean);
86
90
  }
87
91
 
88
- function _initPayload(mNum, cwd) {
92
+ function _initPayload(mNum, cwd, opts) {
89
93
  let def;
90
94
  try {
91
95
  def = getPhase(mNum, cwd);
@@ -123,6 +127,8 @@ function _initPayload(mNum, cwd) {
123
127
  }
124
128
  const tmDetail = textMode.resolveTextModeDetail(cwd);
125
129
 
130
+ const autoVerify = Boolean(opts && opts.auto_verify);
131
+
126
132
  return {
127
133
  _workflow: 'execute-milestone',
128
134
  milestone: mNum,
@@ -136,6 +142,7 @@ function _initPayload(mNum, cwd) {
136
142
  total_tasks: totalTasks,
137
143
  slice_count: slices.length,
138
144
  executor_tier: 'sonnet',
145
+ auto_verify: autoVerify,
139
146
  text_mode: tmDetail.enabled,
140
147
  text_mode_source: tmDetail.source,
141
148
  agent_skills: { executor: _safeSkills('np-executor', cwd) },
@@ -279,7 +286,8 @@ function run(args, ctx) {
279
286
  switch (verb) {
280
287
  case 'init': {
281
288
  const mNum = _validateMilestoneArg(list[1]);
282
- const payload = _initPayload(mNum, cwd);
289
+ const autoVerify = _hasVerifyWorkFlag(list.slice(2));
290
+ const payload = _initPayload(mNum, cwd, { auto_verify: autoVerify });
283
291
  _emit(payload, stdout, cwd);
284
292
  return payload;
285
293
  }
@@ -231,6 +231,39 @@ test('EM-9: finalize-milestone iterates every slice and produces one summary per
231
231
  assert.ok(fs.existsSync(s2));
232
232
  });
233
233
 
234
+ test('EM-11: init without --verify-work emits auto_verify=false', () => {
235
+ const sandbox = makeSandbox();
236
+ seedRoadmapYaml(sandbox, _roadmap());
237
+ seedMilestoneDir(sandbox, 1, {});
238
+ seedSliceDir(sandbox, 1, 1, {});
239
+ _seedTask(sandbox, 1, 1, 1, ['src/a.ts']);
240
+ const cap = _capture();
241
+ const payload = subcmd.run(['init', '1'], { cwd: sandbox, stdout: cap.stub });
242
+ assert.equal(payload.auto_verify, false);
243
+ });
244
+
245
+ test('EM-12: init with --verify-work emits auto_verify=true', () => {
246
+ const sandbox = makeSandbox();
247
+ seedRoadmapYaml(sandbox, _roadmap());
248
+ seedMilestoneDir(sandbox, 1, {});
249
+ seedSliceDir(sandbox, 1, 1, {});
250
+ _seedTask(sandbox, 1, 1, 1, ['src/a.ts']);
251
+ const cap = _capture();
252
+ const payload = subcmd.run(['init', '1', '--verify-work'], { cwd: sandbox, stdout: cap.stub });
253
+ assert.equal(payload.auto_verify, true);
254
+ });
255
+
256
+ test('EM-13: init ignores unknown flags (no --auto-verify alias)', () => {
257
+ const sandbox = makeSandbox();
258
+ seedRoadmapYaml(sandbox, _roadmap());
259
+ seedMilestoneDir(sandbox, 1, {});
260
+ seedSliceDir(sandbox, 1, 1, {});
261
+ _seedTask(sandbox, 1, 1, 1, ['src/a.ts']);
262
+ const cap = _capture();
263
+ const payload = subcmd.run(['init', '1', '--auto-verify'], { cwd: sandbox, stdout: cap.stub });
264
+ assert.equal(payload.auto_verify, false);
265
+ });
266
+
234
267
  test('EM-10: finalize-slice marks tasks without SUMMARY.md but does not fail', () => {
235
268
  const sandbox = makeSandbox();
236
269
  seedRoadmapYaml(sandbox, _roadmap());
@@ -0,0 +1,113 @@
1
+ 'use strict';
2
+
3
+ const fs = require('node:fs');
4
+ const path = require('node:path');
5
+
6
+ const { NubosPilotError } = require('../../lib/core.cjs');
7
+ const outputLint = require('../../lib/output-lint.cjs');
8
+ const { getSchema, listSchemas, inferSchemaForFile } = require('../../lib/schemas/index.cjs');
9
+
10
+ function _parseArgs(list) {
11
+ const out = { file: null, schema: null, format: 'json', enforce: false };
12
+ for (let i = 0; i < list.length; i++) {
13
+ const a = list[i];
14
+ if (a === '--file' || a === '-f') out.file = list[++i];
15
+ else if (a === '--schema' || a === '-s') out.schema = list[++i];
16
+ else if (a === '--format') out.format = list[++i];
17
+ else if (a === '--enforce') out.enforce = true;
18
+ else if (a === '--md') out.format = 'md';
19
+ else if (a === '--text') out.format = 'text';
20
+ }
21
+ return out;
22
+ }
23
+
24
+ function _renderText(result) {
25
+ const lines = [];
26
+ lines.push((result.ok ? 'OK' : 'FAIL') + ' [' + (result.schema_name || '?') + '] ' + (result.path || '<inline>'));
27
+ if (!result.ok) {
28
+ for (const v of (result.violations || [])) {
29
+ lines.push(' - [' + v.code + '] ' + v.path + ': ' + v.message);
30
+ }
31
+ }
32
+ return lines.join('\n');
33
+ }
34
+
35
+ function _emit(result, format, stdout) {
36
+ if (format === 'text') {
37
+ stdout.write(_renderText(result) + '\n');
38
+ return;
39
+ }
40
+ stdout.write(JSON.stringify(result, null, 2));
41
+ }
42
+
43
+ function _verbCheck(flags, stdout) {
44
+ if (!flags.file) {
45
+ throw new NubosPilotError('output-lint-missing-file', 'check requires --file <path>', {});
46
+ }
47
+ const schemaName = flags.schema || inferSchemaForFile(flags.file);
48
+ if (!schemaName) {
49
+ throw new NubosPilotError(
50
+ 'output-lint-cannot-infer-schema',
51
+ 'cannot infer schema from filename; pass --schema <name>',
52
+ { file: flags.file, available: listSchemas() },
53
+ );
54
+ }
55
+ const schema = getSchema(schemaName);
56
+ const result = outputLint.lintFile(flags.file, schema);
57
+ _emit(result, flags.format, stdout);
58
+ if (flags.enforce && !result.ok) {
59
+ return 1;
60
+ }
61
+ return 0;
62
+ }
63
+
64
+ function _verbPrompt(flags, stdout) {
65
+ if (!flags.schema) {
66
+ throw new NubosPilotError('output-lint-missing-schema', 'prompt requires --schema <name>', {
67
+ available: listSchemas(),
68
+ });
69
+ }
70
+ const schema = getSchema(flags.schema);
71
+ stdout.write(outputLint.schemaPrompt(schema));
72
+ return 0;
73
+ }
74
+
75
+ function _verbList(_flags, stdout) {
76
+ const payload = listSchemas().map((name) => {
77
+ const s = getSchema(name);
78
+ return {
79
+ name,
80
+ artifact: s.artifact || null,
81
+ description: s.description || null,
82
+ required_frontmatter: (s.frontmatter && s.frontmatter.required) || [],
83
+ };
84
+ });
85
+ stdout.write(JSON.stringify(payload, null, 2));
86
+ return 0;
87
+ }
88
+
89
+ function run(args, ctx) {
90
+ const context = ctx || {};
91
+ const stdout = context.stdout || process.stdout;
92
+ const list = Array.isArray(args) ? args : [];
93
+ const verb = list[0];
94
+ const flags = _parseArgs(list.slice(1));
95
+
96
+ switch (verb) {
97
+ case 'check':
98
+ return _verbCheck(flags, stdout);
99
+ case 'prompt':
100
+ case 'schema-prompt':
101
+ return _verbPrompt(flags, stdout);
102
+ case 'list':
103
+ return _verbList(flags, stdout);
104
+ default:
105
+ throw new NubosPilotError(
106
+ 'output-lint-unknown-verb',
107
+ 'output-lint: unknown verb: ' + String(verb),
108
+ { verb, allowed: ['check', 'prompt', 'list'] },
109
+ );
110
+ }
111
+ }
112
+
113
+ module.exports = { run };
@@ -0,0 +1,118 @@
1
+ 'use strict';
2
+
3
+ const fs = require('node:fs');
4
+ const path = require('node:path');
5
+ const os = require('node:os');
6
+ const crypto = require('node:crypto');
7
+
8
+ const { NubosPilotError, projectStateDir } = require('../../lib/core.cjs');
9
+ const reconciler = require('../../lib/researcher-reconciler.cjs');
10
+ const layout = require('../../lib/layout.cjs');
11
+
12
+ const INLINE_THRESHOLD = 16 * 1024;
13
+
14
+ function _emit(payload, stdout, cwd) {
15
+ const json = JSON.stringify(payload, null, 2);
16
+ if (Buffer.byteLength(json, 'utf-8') <= INLINE_THRESHOLD) {
17
+ stdout.write(json);
18
+ return;
19
+ }
20
+ const tmpDir = path.join(projectStateDir(cwd), '.tmp');
21
+ fs.mkdirSync(tmpDir, { recursive: true });
22
+ const suffix = process.pid + '-' + crypto.randomBytes(4).toString('hex');
23
+ const tmpPath = path.join(tmpDir, 'researcher-reconcile-' + suffix + '.json');
24
+ fs.writeFileSync(tmpPath, json, 'utf-8');
25
+ stdout.write('@file:' + tmpPath);
26
+ }
27
+
28
+ function _validateMilestoneArg(raw) {
29
+ if (raw == null || !/^\d+$/.test(String(raw))) {
30
+ throw new NubosPilotError(
31
+ 'researcher-reconcile-invalid-milestone',
32
+ 'milestone must be a positive integer',
33
+ { value: raw },
34
+ );
35
+ }
36
+ return Number(raw);
37
+ }
38
+
39
+ function _parseFlags(list) {
40
+ const out = { min_agreement_score: null, max_contested: null, file: null };
41
+ for (let i = 0; i < list.length; i++) {
42
+ const a = list[i];
43
+ if (a === '--min-agreement-score') out.min_agreement_score = Number(list[++i]);
44
+ else if (a === '--max-contested') out.max_contested = Number(list[++i]);
45
+ else if (a === '--file') out.file = list[++i];
46
+ }
47
+ return out;
48
+ }
49
+
50
+ function _verbParseSpawn(args, ctx) {
51
+ const flags = _parseFlags(args.slice(0));
52
+ if (!flags.file) {
53
+ throw new NubosPilotError('researcher-reconcile-missing-file', 'parse-spawn requires --file <path>', {});
54
+ }
55
+ return reconciler.parseSpawnOutput(flags.file);
56
+ }
57
+
58
+ function _verbPrepare(args, ctx) {
59
+ const cwd = ctx.cwd || process.cwd();
60
+ const mNum = _validateMilestoneArg(args[0]);
61
+ const flags = _parseFlags(args.slice(1));
62
+ const opts = {};
63
+ if (flags.min_agreement_score != null) opts.min_agreement_score = flags.min_agreement_score;
64
+ if (flags.max_contested != null) opts.max_contested = flags.max_contested;
65
+ return reconciler.prepareReconcilerInput(mNum, cwd, opts);
66
+ }
67
+
68
+ function _verbGate(args, ctx) {
69
+ const cwd = ctx.cwd || process.cwd();
70
+ const mNum = _validateMilestoneArg(args[0]);
71
+ const flags = _parseFlags(args.slice(1));
72
+ const t = {};
73
+ if (flags.min_agreement_score != null) t.min_agreement_score = flags.min_agreement_score;
74
+ if (flags.max_contested != null) t.max_contested = flags.max_contested;
75
+
76
+ const finalPath = reconciler.finalResearchPath(mNum, cwd);
77
+ if (!fs.existsSync(finalPath)) {
78
+ throw new NubosPilotError(
79
+ 'researcher-reconcile-no-final',
80
+ 'final RESEARCH.md not found at ' + finalPath,
81
+ { milestone: mNum, path: finalPath },
82
+ );
83
+ }
84
+ const raw = fs.readFileSync(finalPath, 'utf-8');
85
+ return reconciler.gateFromFinalFrontmatter(raw, t);
86
+ }
87
+
88
+ function run(args, ctx) {
89
+ const context = ctx || {};
90
+ const stdout = context.stdout || process.stdout;
91
+ const cwd = context.cwd || process.cwd();
92
+ const list = Array.isArray(args) ? args : [];
93
+ const verb = list[0];
94
+
95
+ let payload;
96
+ switch (verb) {
97
+ case 'parse-spawn':
98
+ payload = _verbParseSpawn(list.slice(1), { cwd });
99
+ break;
100
+ case 'prepare':
101
+ case 'prepare-input':
102
+ payload = _verbPrepare(list.slice(1), { cwd });
103
+ break;
104
+ case 'gate':
105
+ payload = _verbGate(list.slice(1), { cwd });
106
+ break;
107
+ default:
108
+ throw new NubosPilotError(
109
+ 'researcher-reconcile-unknown-verb',
110
+ 'researcher-reconcile: unknown verb: ' + String(verb),
111
+ { verb, allowed: ['parse-spawn', 'prepare', 'gate'] },
112
+ );
113
+ }
114
+ _emit(payload, stdout, cwd);
115
+ return payload;
116
+ }
117
+
118
+ module.exports = { run };
@@ -224,6 +224,7 @@ const NP_AGENTS = [
224
224
  { file: 'np-executor', expected_tier: 'sonnet' },
225
225
  { file: 'np-verifier', expected_tier: 'sonnet' },
226
226
  { file: 'np-researcher', expected_tier: 'sonnet' },
227
+ { file: 'np-researcher-reconciler', expected_tier: 'sonnet' },
227
228
  { file: 'np-codebase-documenter', expected_tier: 'sonnet' },
228
229
  { file: 'np-architect', expected_tier: 'sonnet' },
229
230
  { file: 'np-build-fixer', expected_tier: 'sonnet' },