npm - ultimate-pi - Versions diffs - 0.16.0 → 0.17.0 - Mend

ultimate-pi 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-packet.yaml ADDED Viewed

@@ -0,0 +1,196 @@
+schema_version: "1.0.0"
+contract_version: "1.1.0"
+plan_id: plan-smoke-fixture-001
+task_id: task-smoke-001
+scope: Smoke fixture for plan-phase harness validation with execution_plan and debate artifacts.
+assumptions:
+  - Fixture only; no live agent run
+risk_level: med
+acceptance_checks:
+  - id: AC-1
+    description: DAG validation passes
+  - id: AC-2
+    description: Consolidated debate round recorded (fast profile)
+  - id: AC-3
+    description: Stack brief present in research-brief
+  - id: AC-4
+    description: Sprint contract complete
+  - id: AC-5
+    description: plan-review.md renders
+rollback_plan:
+  revert_commit_ready: true
+  rollback_artifacts:
+    revert_command: git revert HEAD
+    revert_branch: main
+    patch_bundle: .pi/harness/runs/smoke-fixture/patch.bundle
+execution_plan:
+  schema_version: "1.0.0"
+  phases:
+    - phase_id: P1
+      name: Foundation
+      objective: Establish baseline and verify harness wiring
+      entry_criteria:
+        - Fixture loaded
+      exit_criteria:
+        - AC-1 satisfied
+      milestone: M1-baseline
+      work_item_ids: [WI-1, WI-2, WI-3]
+    - phase_id: P2
+      name: Build
+      objective: Implement core changes
+      entry_criteria:
+        - M1-baseline complete
+      exit_criteria:
+        - AC-2 satisfied
+      milestone: M2-build
+      work_item_ids: [WI-4, WI-5, WI-6]
+    - phase_id: P3
+      name: Verify
+      objective: Quality gate and documentation
+      entry_criteria:
+        - M2-build complete
+      exit_criteria:
+        - AC-5 satisfied
+      milestone: M3-ship
+      work_item_ids: [WI-7, WI-8]
+  work_items:
+    - work_item_id: WI-1
+      phase_id: P1
+      title: Load fixture packet
+      description: Read plan-packet.yaml from fixture directory
+      depends_on: []
+      files:
+        - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml
+      parallel_safe: true
+      done_criteria:
+        type: manual
+        spec: Fixture packet readable
+      acceptance_check_ids: [AC-1]
+    - work_item_id: WI-2
+      phase_id: P1
+      title: Run DAG validator
+      description: Execute validate-plan-dag.mjs
+      depends_on: [WI-1]
+      files:
+        - .pi/scripts/validate-plan-dag.mjs
+      parallel_safe: false
+      done_criteria:
+        type: command
+        spec: node .pi/scripts/validate-plan-dag.mjs --packet plan-packet.yaml
+      acceptance_check_ids: [AC-1]
+    - work_item_id: WI-3
+      phase_id: P1
+      title: Lint harness-yaml
+      description: Ensure YAML helpers parse fixture
+      depends_on: [WI-1]
+      files:
+        - .pi/lib/harness-yaml.ts
+      parallel_safe: true
+      done_criteria:
+        type: lint
+        spec: npm test
+      acceptance_check_ids: [AC-1]
+    - work_item_id: WI-4
+      phase_id: P2
+      title: Debate round 1-2 artifacts
+      description: Validate review-round YAML
+      depends_on: [WI-2]
+      files:
+        - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml
+      parallel_safe: false
+      done_criteria:
+        type: artifact
+        spec: artifacts/review-round-r1.yaml exists
+      acceptance_check_ids: [AC-2]
+    - work_item_id: WI-5
+      phase_id: P2
+      title: Debate round 3-4 artifacts
+      description: Validate final review round
+      depends_on: [WI-4]
+      files:
+        - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml
+      parallel_safe: false
+      done_criteria:
+        type: artifact
+        spec: artifacts/review-round-r4.yaml exists
+      acceptance_check_ids: [AC-2]
+    - work_item_id: WI-6
+      phase_id: P2
+      title: Stack research merge
+      description: research-brief includes stack section
+      depends_on: [WI-2]
+      files: []
+      non_code: true
+      parallel_safe: true
+      done_criteria:
+        type: manual
+        spec: research-brief.yaml contains stack key
+      acceptance_check_ids: [AC-3]
+    - work_item_id: WI-7
+      phase_id: P3
+      title: Sprint contract audit
+      description: R4 sprint audit artifact
+      depends_on: [WI-5]
+      files:
+        - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml
+      parallel_safe: false
+      done_criteria:
+        type: artifact
+        spec: sprint-audit-r4.yaml present
+      acceptance_check_ids: [AC-4]
+    - work_item_id: WI-8
+      phase_id: P3
+      title: Render plan-review
+      description: Human-readable plan review markdown
+      depends_on: [WI-7]
+      files:
+        - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md
+      parallel_safe: false
+      done_criteria:
+        type: manual
+        spec: plan-review.md non-empty
+      acceptance_check_ids: [AC-5]
+  sprint_contract:
+    in_scope:
+      - Fixture validation only
+    out_of_scope:
+      - Production deploy
+    definition_of_done: All smoke checks green
+    assumptions:
+      - CI environment has node
+    external_dependencies: []
+  wbs_dictionary:
+    - work_item_id: WI-1
+      deliverable: Fixture packet loaded
+      owner_role: executor
+      inputs: []
+      outputs: [parsed packet]
+  risk_register:
+    - risk_id: R1
+      description: DAG validator false negative
+      likelihood: low
+      impact: high
+      mitigation: Unit tests on validate-plan-dag.mjs
+      linked_work_item_ids: [WI-2]
+    - risk_id: R2
+      description: Debate cap misconfiguration
+      likelihood: med
+      impact: med
+      mitigation: debate-orchestrator plan profile tests
+      linked_work_item_ids: [WI-4]
+    - risk_id: R3
+      description: YAML parse drift
+      likelihood: low
+      impact: med
+      mitigation: harness-yaml strict parse
+      linked_work_item_ids: [WI-3]
+  schedule_metadata:
+    critical_path_work_item_ids: [WI-1, WI-2, WI-4, WI-5, WI-7, WI-8]
+    parallel_groups:
+      - [WI-1, WI-3]
+    schedule_baseline_note: Fixture topological order; no calendar dates
+  dag_validation:
+    status: pass
+    topological_order: [WI-1, WI-2, WI-3, WI-4, WI-5, WI-6, WI-7, WI-8]
+    cycles: []
+    conflicts: []

package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-review.md ADDED Viewed

@@ -0,0 +1,14 @@
+# Plan review (fixture)
+plan_id: plan-smoke-fixture-001
+## Execution plan
+Phases: P1 Foundation → P2 Build → P3 Verify
+Critical path: WI-1 → WI-2 → WI-4 → WI-5 → WI-7 → WI-8
+## Debate
+- Round 1 (spec): review_gate_ready
+- Round 4 (quality): review_gate_ready

package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/research-brief.yaml ADDED Viewed

@@ -0,0 +1,62 @@
+decomposition:
+  schema_version: "1.0.0"
+  problem_restatement: Light-profile smoke for two-focus debate
+hypothesis:
+  schema_version: "1.0.0"
+  primary:
+    claim: Light debate covers spec and quality only
+    mechanism: Eligibility profile light with min_focus_rounds 2
+    prediction: planDebateOutcomeComplete passes with two rounds
+    experiment: Run smoke-harness-plan.mjs --fixture minimal-low-light
+implementation:
+  schema_version: "1.0.0"
+  problem_framing: Low-risk fixture for selective debate
+  sub_problems: [spec coverage, quality coverage]
+  internal_references:
+    - path: test/plan-debate-eligibility.test.mjs
+      relevance: Eligibility unit tests
+      reuse_signal: high
+  external_references: []
+  solution_patterns:
+    - name: light profile gate
+      provenance: ADR-0036
+      fit: Reduces debate cost on trivial tasks
+      tradeoffs:
+        pros: [Fewer rounds]
+        cons: []
+      risks: []
+  similar_implementations:
+    - name: minimal-med four-focus fixture
+      what_it_solves: Full debate coverage
+      gap_vs_us: Light uses two focuses only
+  recommended_approach:
+    summary: Two review rounds with spec then quality
+    recommended_approach_confidence: high
+    confidence_rationale: Deterministic fixture aligned with eligibility rules
+    evidence_refs:
+      - .pi/extensions/lib/plan-debate-eligibility.ts
+      - test/plan-debate-eligibility.test.mjs
+  anti_patterns: []
+  open_questions: []
+stack:
+  schema_version: "1.0.0"
+  problem_framing: Node harness tooling
+  constraints: []
+  options:
+    - name: extend current stack
+      category: brownfield
+      fit_summary: Use existing ultimate-pi harness
+      tradeoffs:
+        pros: [No new deps]
+        cons: []
+      risks: []
+      evidence_refs: []
+      recommendation_rank: 1
+  recommended_primary: extend current stack
+  rationale: Fixture validates in-repo harness
+eval:
+  schema_version: "1.0.0"
+  revision_recommended: false
+  relevance:
+    passes: true
+    rationale: Hypothesis matches light smoke task

package/.pi/harness/evals/smoke/smoke-harness-plan.mjs CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env node
 /**
  * smoke-harness-plan — fixture validation for plan-phase pipeline (CI).
- * Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light]
+ * Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light|minimal-med-fast]
  */
 import { access, readFile } from "node:fs/promises";
@@ -26,16 +26,23 @@ async function scanFocusCoverage(fixtureRoot, requiredFocus) {
 	let last_round_index = 0;
 	const { readdir } = await import("node:fs/promises");
 	const files = (await readdir(art)).filter((f) =>
-		/^review-round-r\d+\.yaml$/i.test(f),
+		/^review-round(?:-r\d+|-consolidated)\.yaml$/i.test(f),
 	);
 	for (const name of files.sort()) {
-		const m = /^review-round-r(\d+)\.yaml$/i.exec(name);
+		const consolidated = /^review-round-consolidated\.yaml$/i.test(name);
+		const m = consolidated
+			? ["review-round-consolidated.yaml", "1"]
+			: /^review-round-r(\d+)\.yaml$/i.exec(name);
 		if (!m) continue;
-		const roundIndex = Number(m[1]);
+		const roundIndex = consolidated ? 1 : Number(m[1]);
 		if (roundIndex > last_round_index) last_round_index = roundIndex;
 		const draft = parseYaml(await readFile(join(art, name), "utf-8"));
 		const focus = String(draft.debate_round_focus ?? "").trim();
-		if (requiredFocus.includes(focus)) covered.add(focus);
+		if (focus === "all") {
+			for (const f of requiredFocus) covered.add(f);
+		} else if (requiredFocus.includes(focus)) {
+			covered.add(focus);
+		}
 		if (roundIndex === last_round_index) {
 			last_review_gate_ready = draft.review_gate_ready === true;
 		}
@@ -110,22 +117,33 @@ async function runFixture(name) {
 	ok("research-brief.yaml structure");
 	const isLight = name === "minimal-low-light";
-	const requiredFocus = isLight ? ["spec", "quality"] : ["spec", "wbs", "schedule", "quality"];
-	const debateRounds = isLight
-		? ["review-round-r1.yaml", "review-round-r2.yaml"]
-		: [
-				"review-round-r1.yaml",
-				"review-round-r2.yaml",
-				"review-round-r3.yaml",
-				"review-round-r4.yaml",
-			];
+	const isFast = name === "minimal-med-fast";
+	const requiredFocus =
+		isLight || isFast
+			? ["spec", "quality"]
+			: ["spec", "wbs", "schedule", "quality"];
+	const debateRounds = isFast
+		? ["review-round-consolidated.yaml"]
+		: isLight
+			? ["review-round-r1.yaml", "review-round-r2.yaml"]
+			: [
+					"review-round-r1.yaml",
+					"review-round-r2.yaml",
+					"review-round-r3.yaml",
+					"review-round-r4.yaml",
+				];
 	const seenFocus = new Set();
 	for (const fileName of debateRounds) {
 		const p = join(fixtureRoot, "artifacts", fileName);
 		await access(p, constants.R_OK);
 		const draft = parseYaml(await readFile(p, "utf-8"));
 		if (!draft.schema_version) fail(`${fileName} missing schema_version`);
-		if (draft.debate_round_focus) seenFocus.add(draft.debate_round_focus);
+		const f = String(draft.debate_round_focus ?? "").trim();
+		if (f === "all") {
+			for (const req of requiredFocus) seenFocus.add(req);
+		} else if (f) {
+			seenFocus.add(f);
+		}
 	}
 	for (const focus of requiredFocus) {
 		if (!seenFocus.has(focus)) {
@@ -135,7 +153,7 @@ async function runFixture(name) {
 	ok(`debate round YAML artifacts (${requiredFocus.length} focuses)`);
 	const coverage = await scanFocusCoverage(fixtureRoot, requiredFocus);
-	const minRounds = isLight ? 2 : 4;
+	const minRounds = isFast ? 1 : isLight ? 2 : 4;
 	if (!planOutcomeComplete(coverage, requiredFocus, minRounds)) {
 		fail("debate outcome incomplete for fixture coverage");
 	}
@@ -144,6 +162,9 @@ async function runFixture(name) {
 	if (isLight && packet.risk_level !== "low") {
 		fail("minimal-low-light fixture must use risk_level low");
 	}
+	if (isFast && packet.risk_level !== "med") {
+		fail("minimal-med-fast fixture must use risk_level med");
+	}
 	console.log(`smoke-harness-plan: all ${name} fixture checks passed`);
 }
@@ -161,7 +182,9 @@ async function main() {
 		);
 		return;
 	}
-	fail("Usage: smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light] | --live");
+	fail(
+		"Usage: smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light|minimal-med-fast] | --live",
+	);
 }
 main().catch((err) => {

package/.pi/harness/specs/plan-review-round-draft.schema.json CHANGED Viewed

@@ -16,7 +16,7 @@
 		"round_index": { "type": "integer", "minimum": 1, "maximum": 12 },
 		"debate_round_focus": {
 			"type": "string",
-			"enum": ["spec", "wbs", "schedule", "quality"]
+			"enum": ["spec", "wbs", "schedule", "quality", "all"]
 		},
 		"round_summary": { "type": "string", "minLength": 1 },
 		"validation_summary": { "type": "string" },

package/.pi/model-router.example.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"defaultProfile": "auto",
 	"debug": false,
-	"classifierModel": "opencode-go/qwen3.6-plus",
+	"classifierModel": "openai/gpt-5.4-nano",
 	"phaseBias": 0.5,
 	"maxSessionBudget": 1.0,
 	"largeContextThreshold": 100000,
@@ -16,12 +16,21 @@
 	"profiles": {
 		"auto": {
 			"high": {
-				"model": "opencode-go/deepseek-v4-pro",
+				"model": "openai/gpt-5.5",
 				"thinking": "high",
-				"fallbacks": ["opencode-go/qwen3.6-plus"]
+				"fallbacks": ["openai/gpt-5.4-nano"]
+			},
+			"medium": { "model": "openai/gpt-5.5", "thinking": "medium" },
+			"low": { "model": "openai/gpt-5.5", "thinking": "low" }
+		},
+		"opencode-go": {
+			"high": {
+				"model": "opencode-go/qwen3.6-plus",
+				"thinking": "high",
+				"fallbacks": ["opencode-go/deepseek-v4-flash"]
 			},
 			"medium": { "model": "opencode-go/qwen3.6-plus", "thinking": "medium" },
-			"low": { "model": "opencode-go/deepseek-v4-flash", "thinking": "low" }
+			"low": { "model": "opencode-go/qwen3.6-plus", "thinking": "low" }
 		}
 	}
 }

package/.pi/prompts/harness-plan.md CHANGED Viewed

@@ -138,11 +138,16 @@ harness_debate_open({ debate_profile, required_focuses })
 Profiles:
-| Profile | Focuses required | min_focus_rounds |
-|---------|------------------|------------------|
-| full | spec, wbs, schedule, quality | 4 |
-| standard | all four | 4 |
-| light | spec, quality only | 2 |
+| Profile | Review gate | Focuses required | min_focus_rounds |
+|---------|-------------|------------------|------------------|
+| full | threaded (4 rounds) | spec, wbs, schedule, quality | 4 |
+| standard | threaded (4 rounds) | all four | 4 |
+| light | threaded (2 rounds) | spec, quality only | 2 |
+| fast | **consolidated** (1 round) | spec, quality | 1 |
+Med/low non-fork plans with clear stack and no implementation `open_questions` default to **fast** (consolidated). Escalate to threaded rounds only when integrator sets `review_gate_ready: false` or records blockers.
+`--quick`: skip scout-semantic; cap web research (≤2 searches, ≤3 fetches); prefer **fast** eligibility when DAG passes; use consolidated Review Gate when profile is fast.
 ## Phase 5 — Review Gate debate (profile-aware, pi-messenger, even with `--quick`)
@@ -153,13 +158,26 @@ Profiles:
 ### Focus coverage (required before consensus)
-Each required focus must appear in a submitted `review-round-rN.yaml` (`debate_round_focus`). Monotonic `round_index` (cap from profile). Consensus only when:
+Each required focus must appear in submitted review artifacts (`review-round-rN.yaml` or `review-round-consolidated.yaml` with `debate_round_focus: all`). Monotonic `round_index` (cap from profile). Consensus only when:
 - all **required** focuses covered, **and**
 - last round `review_gate_ready: true`, **and**
 - `validate-plan-dag.mjs` still passes (re-run after patches).
-### Per-round state machine
+### Consolidated state machine (`review_gate_mode: consolidated`, profile fast)
+```
+round_index := 1
+debate_round_focus := all
+spawn hypothesis-validator (blind)
+WHILE NOT ready_for_integrator (harness_debate_round_status round_index=1):
+  follow next_tool (validation-turn, adversary-brief, sprint-audit in parallel-friendly order; one subagent per batch)
+spawn review-integrator → write artifacts/review-round-consolidated.yaml → harness_debate_submit_round
+IF review_gate_ready false OR blockers: escalate — threaded round per missing focus (spec/wbs/schedule/quality)
+harness_debate_focus_coverage → harness_debate_consensus
+```
+### Threaded state machine (standard/full/light)
 ```
 round_index := next uncovered required focus

package/.pi/prompts/harness-setup.md CHANGED Viewed

@@ -327,7 +327,7 @@ sentrux plugin add-standard 2>/dev/null || echo "Plugins already installed or fa
 ## Step 3 — Pi Extension Packages
-Bundled extensions load from the installed `ultimate-pi` package. **Per-turn model routing** comes from a **vendored** fork of [`yeliu84/pi-model-router`](https://github.com/yeliu84/pi-model-router) in `vendor/pi-model-router/`, wired through [`.pi/extensions/pi-model-router-harness.ts`](.pi/extensions/pi-model-router-harness.ts). The harness **gates** activation on `.pi/model-router.json` (Step **3.5** below) so `router/auto` and built-in tiers such as `openai/gpt-5.4-pro` cannot load prematurely. Attribution: see [THIRD_PARTY_NOTICES.md](THIRD_PARTY_NOTICES.md) and `vendor/pi-model-router/UPSTREAM_PIN.md`. Maintainer refresh: `npm run vendor:sync-router`.
+Bundled extensions load from the installed `ultimate-pi` package. **Session-locked model routing** comes from a **vendored** fork of [`yeliu84/pi-model-router`](https://github.com/yeliu84/pi-model-router) in `vendor/pi-model-router/`, wired through [`.pi/extensions/pi-model-router-harness.ts`](.pi/extensions/pi-model-router-harness.ts). The router picks **one concrete model** when the session starts (from the first user prompt + system prompt complexity), then changes **thinking level only** each turn. The harness **gates** activation on `.pi/model-router.json` (Step **3.5** below) so `router/auto` cannot load prematurely. Attribution: see [THIRD_PARTY_NOTICES.md](THIRD_PARTY_NOTICES.md) and `vendor/pi-model-router/UPSTREAM_PIN.md`. Maintainer refresh: `npm run vendor:sync-router`.
 Optionally install the companion lockfile used in development:
@@ -381,9 +381,9 @@ If generation prints "No authenticated Pi providers": warn in report — user sh
 Do NOT block setup. If no config is written, `harness-sync-model-router.mjs` clears a premature `defaultProvider: "router"` in `.pi/settings.json`.
-**Router onboarding** — The vendored extension starts only after `.pi/model-router.json` appears. Running the script above prepares that file plus optional Pi defaults (**`router` / `auto`**) via `harness-sync-model-router.mjs` when `defaultProvider` was unset—then **`/reload`**.
+**Router onboarding** — The vendored extension starts only after `.pi/model-router.json` appears. Running the script above prepares that file plus optional Pi defaults (**`router` / `auto`**, or whatever `defaultProfile` is) via `harness-sync-model-router.mjs` when `defaultProvider` was unset—then **`/reload`**. Generated profiles use **one model SKU per profile**; high/medium/low tiers differ in **thinking** only. Subagents resolve their subprocess model from the **agent system prompt** complexity (same lock rules).
-Manual override: **`/router profile auto`** anytime after reload if they changed defaults.
+Manual override: **`/router profile auto`** or **`/router profile opencode-go`** anytime after reload if they changed defaults.
 ## Step 3.6 — Harness agents (package-resolved)
@@ -677,7 +677,7 @@ Output summary table:
 | sentrux | ✓/✗ | CLI + plugins; rules via Step 4.2 bootstrap |
 | Sentrux rules.toml | ✓/✗ | `.sentrux/rules.toml` synced from manifest |
 | pi extensions | ✓/✗ | 4 packages |
-| model router | ✓/✗ | Package + config verified, activation via `/router profile auto` |
+| model router | ✓/✗ | Package + config verified, activation via `/router profile auto` (or `opencode-go`) |
 | `.env` | ✓/✗/ask | Created / keys appended / user declined |
 | .gitignore | ✓/✗ | entries added (incl. `.env`) |