npm - ultimate-pi - Versions diffs - 0.17.0 → 0.18.0 - Mend

ultimate-pi 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

package/.agents/skills/harness-context/SKILL.md +13 -6
package/.agents/skills/harness-debate-plan/SKILL.md +37 -20
package/.agents/skills/harness-eval/SKILL.md +6 -21
package/.agents/skills/harness-governor/SKILL.md +4 -3
package/.agents/skills/harness-orchestration/SKILL.md +39 -51
package/.agents/skills/harness-plan/SKILL.md +23 -12
package/.agents/skills/harness-review/SKILL.md +52 -0
package/.agents/skills/harness-sentrux-setup/SKILL.md +13 -1
package/.agents/skills/harness-steer/SKILL.md +14 -0
package/.pi/agents/harness/adversary.md +3 -10
package/.pi/agents/harness/evaluator.md +3 -12
package/.pi/agents/harness/executor.md +12 -14
package/.pi/agents/harness/planning/decompose.md +7 -4
package/.pi/agents/harness/planning/hypothesis-validator.md +2 -0
package/.pi/agents/harness/planning/hypothesis.md +3 -1
package/.pi/agents/harness/planning/plan-adversary.md +2 -0
package/.pi/agents/harness/planning/plan-evaluator.md +2 -0
package/.pi/agents/harness/planning/plan-synthesizer.md +25 -0
package/.pi/agents/harness/planning/planning-context.md +48 -0
package/.pi/agents/harness/planning/review-integrator.md +2 -0
package/.pi/agents/harness/planning/scout-graphify.md +3 -1
package/.pi/agents/harness/planning/scout-semantic.md +3 -1
package/.pi/agents/harness/planning/scout-structure.md +3 -1
package/.pi/agents/harness/planning/sprint-contract-auditor.md +2 -0
package/.pi/agents/harness/sentrux-steward.md +51 -0
package/.pi/extensions/00-posthog-network-bootstrap.ts +11 -0
package/.pi/extensions/harness-live-widget.ts +27 -1
package/.pi/extensions/harness-plan-approval.ts +62 -56
package/.pi/extensions/harness-run-context.ts +541 -84
package/.pi/extensions/harness-subagent-submit.ts +43 -10
package/.pi/extensions/lib/harness-artifact-gate.ts +182 -0
package/.pi/extensions/lib/harness-posthog.ts +9 -5
package/.pi/extensions/lib/harness-spawn-topology.ts +188 -0
package/.pi/extensions/lib/harness-subagent-auth.ts +1 -0
package/.pi/extensions/lib/harness-subagent-policy.ts +23 -19
package/.pi/extensions/lib/harness-subagent-precheck.ts +35 -9
package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +66 -2
package/.pi/extensions/lib/harness-subagent-submit-registry.ts +21 -3
package/.pi/extensions/lib/harness-subagents-bridge.ts +7 -29
package/.pi/extensions/lib/harness-subprocess-bootstrap.ts +73 -0
package/.pi/extensions/lib/plan-approval/create-plan.ts +2 -3
package/.pi/extensions/lib/plan-approval/resolve-disk.ts +102 -0
package/.pi/extensions/lib/plan-approval/schema.ts +22 -8
package/.pi/extensions/lib/plan-approval/types.ts +1 -1
package/.pi/extensions/lib/plan-approval/validate.ts +2 -2
package/.pi/extensions/lib/plan-approval-readiness.ts +241 -0
package/.pi/extensions/lib/plan-debate-eligibility.ts +12 -5
package/.pi/extensions/lib/plan-debate-gate.ts +22 -1
package/.pi/extensions/lib/plan-debate-lanes.ts +32 -2
package/.pi/extensions/lib/plan-review-gate.ts +8 -0
package/.pi/extensions/lib/posthog-client.ts +76 -0
package/.pi/extensions/policy-gate.ts +24 -19
package/.pi/harness/agents.manifest.json +24 -16
package/.pi/harness/corpus/cron.example +8 -0
package/.pi/harness/corpus/graphify-kb-updater.config.json +159 -0
package/.pi/harness/corpus/systemd/graphify-kb-updater.env.template +4 -0
package/.pi/harness/corpus/systemd/graphify-kb-updater.service +17 -0
package/.pi/harness/corpus/systemd/graphify-kb-updater.timer +11 -0
package/.pi/harness/docs/adrs/0001-harness-constitution.md +2 -1
package/.pi/harness/docs/adrs/0006-sentrux-dual-layer.md +7 -6
package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +6 -1
package/.pi/harness/docs/adrs/0031-harness-run-context.md +1 -1
package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +7 -0
package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +3 -3
package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +8 -5
package/.pi/harness/docs/adrs/0039-harness-post-run-review-gate.md +47 -0
package/.pi/harness/docs/adrs/0040-practice-grounded-orchestration.md +40 -0
package/.pi/harness/docs/adrs/0041-intelligent-planning-reconnaissance.md +39 -0
package/.pi/harness/docs/adrs/0042-agent-native-orchestration.md +35 -0
package/.pi/harness/docs/adrs/0043-path-first-harness-tools.md +38 -0
package/.pi/harness/docs/adrs/0044-harness-steer-loop.md +36 -0
package/.pi/harness/docs/adrs/README.md +10 -0
package/.pi/harness/docs/graphify-kb-updater-runbook.md +157 -0
package/.pi/harness/docs/practice-map.md +110 -0
package/.pi/harness/env.harness.template +5 -3
package/.pi/harness/evals/smoke/sentrux-stub.json +1 -1
package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +5 -2
package/.pi/harness/specs/README.md +1 -1
package/.pi/harness/specs/harness-run-context.schema.json +11 -0
package/.pi/harness/specs/harness-spawn-context.schema.json +14 -0
package/.pi/harness/specs/plan-execution-plan.schema.json +39 -1
package/.pi/harness/specs/plan-packet.schema.json +4 -0
package/.pi/harness/specs/plan-phase-status.schema.json +17 -0
package/.pi/harness/specs/plan-phase-waiver.schema.json +25 -0
package/.pi/harness/specs/plan-planning-context.schema.json +50 -0
package/.pi/harness/specs/repair-brief.schema.json +45 -0
package/.pi/harness/specs/review-outcome.schema.json +46 -0
package/.pi/harness/specs/sentrux-manifest-proposal.schema.json +80 -0
package/.pi/harness/specs/sentrux-signal.schema.json +43 -0
package/.pi/harness/specs/steer-state.schema.json +20 -0
package/.pi/lib/harness-context-mode-policy.ts +256 -0
package/.pi/lib/harness-repair-brief.ts +145 -0
package/.pi/lib/harness-run-context.ts +591 -32
package/.pi/lib/harness-ui-state.ts +87 -9
package/.pi/prompts/harness-auto.md +9 -9
package/.pi/prompts/harness-critic.md +3 -30
package/.pi/prompts/harness-eval.md +4 -37
package/.pi/prompts/harness-plan.md +118 -54
package/.pi/prompts/harness-review.md +150 -15
package/.pi/prompts/harness-run.md +62 -10
package/.pi/prompts/harness-sentrux-steward.md +55 -0
package/.pi/prompts/harness-steer.md +30 -0
package/.pi/scripts/graphify-kb-updater.mjs +358 -0
package/.pi/scripts/harness-verify.mjs +22 -6
package/.pi/scripts/harness-web-policy-guard.mjs +68 -0
package/.pi/scripts/validate-plan-dag.mjs +3 -3
package/AGENTS.md +1 -0
package/CHANGELOG.md +11 -0
package/package.json +5 -4
package/.pi/prompts/git-sync.md +0 -124

package/.pi/harness/docs/graphify-kb-updater-runbook.md ADDED Viewed

@@ -0,0 +1,157 @@
+# Graphify KB updater runbook
+## Purpose and scope
+`graphify-kb-updater` keeps the local Graphify source corpus current for agentic engineering, context engineering, harness engineering, AI coding harnesses, research papers/feeds, articles/blogs, local books/transcripts, YouTube candidates, and competitor intelligence.
+The approved operating model is **hybrid allowlist auto-promotion with conservative staging**:
+- Daily local automation may auto-promote only explicitly approved allowlisted public sources with complete provenance and rights/access metadata.
+- Books, transcripts, YouTube/video material, paid/copyrighted/mirrored material, unclear-license content, and unknown open-web sources remain staged until manually approved.
+- Competitor monitoring is a curated taxonomy/watchlist/reporting signal, not an exhaustive crawler.
+- Pi-agent-open integration is intentionally limited/deferred: opening Pi should do at most a low-latency, no-network stale check. It must not perform synchronous web discovery, promotion, or Graphify mutation.
+## Governance and approval boundaries
+Required rights/access fields for every promotion:
+- `license`
+- `access`
+- `approved_by`
+- `approved_at`
+Allowlist auto-promotion requires all of the following:
+1. `.pi/harness/corpus/graphify-kb-updater.config.json` has `auto_promote_allowlist: true`.
+2. The candidate domain is present in `allowlist` with `approved: true`.
+3. The candidate itself has `approved: true`.
+4. `rights_access` is complete.
+5. The candidate is not a risky source class that requires manual review.
+Risky source classes (`book`, `transcript`, `youtube`) always require explicit approval and complete rights/access metadata. Raw HTTP shell paths are forbidden; keep discovery/fetch through approved harness web/API abstractions and verify with `.pi/scripts/harness-web-policy-guard.mjs`.
+## Manual commands
+Dry-run, no mutation of `raw/`, state, or `graphify-out/`:
+```bash
+node .pi/scripts/graphify-kb-updater.mjs --dry-run --pilot-report
+```
+Apply approved/promotable candidates and refresh Graphify only when promoted files changed:
+```bash
+node .pi/scripts/graphify-kb-updater.mjs --apply --refresh-graph --pilot-report
+```
+Apply without graph mutation:
+```bash
+node .pi/scripts/graphify-kb-updater.mjs --apply --skip-graph --pilot-report
+```
+Validate scheduler templates:
+```bash
+node .pi/scripts/graphify-kb-updater.mjs --scheduler-smoke
+```
+Run web-policy guard:
+```bash
+node .pi/scripts/harness-web-policy-guard.mjs
+```
+## Approval workflow
+1. Review dry-run JSON: candidate count, source counts, competitor labels, duplicate/skipped/blocked counts, stale warnings, planned promotions, and graph action.
+2. For a candidate, add it to `.pi/harness/corpus/graphify-kb-updater.config.json` `review_queue` with:
+   - `kind` (`article`, `paper`, `book`, `transcript`, or `youtube`)
+   - `title`
+   - `url` or `path`
+   - `approved: true`
+   - `rights_access` object with all required fields
+   - optional `competitor_labels` or provenance notes.
+3. For local files, you may place `<file>.rights.json` beside the source, but risky classes still require explicit approval before promotion.
+4. Run `--apply --refresh-graph`.
+5. Promoted sources land under `raw/graphify-kb-updates/<kind>/` with `.provenance.json` sidecars.
+## Daily scheduler setup
+Systemd user timer is the primary path and runs daily at 08:30 with randomized delay:
+```bash
+mkdir -p ~/.config/ultimate-pi ~/.config/systemd/user ~/.local/state/ultimate-pi
+cp .pi/harness/corpus/systemd/graphify-kb-updater.env.template ~/.config/ultimate-pi/graphify-kb-updater.env
+# edit UP_ROOT in the env file
+cp .pi/harness/corpus/systemd/graphify-kb-updater.service ~/.config/systemd/user/
+cp .pi/harness/corpus/systemd/graphify-kb-updater.timer ~/.config/systemd/user/
+systemctl --user daemon-reload
+systemctl --user enable --now graphify-kb-updater.timer
+systemctl --user list-timers graphify-kb-updater.timer
+```
+The service uses `flock`, `timeout 45m`, explicit env, append-only logs, and a non-overlap lock.
+Cron fallback is daily at 08:30; edit `UP_ROOT` and copy the line from `.pi/harness/corpus/cron.example` with `crontab -e`.
+## Reports, logs, and fields
+Apply runs write:
+- Registry: `.pi/harness/corpus/graphify-kb-updater-state/registry.json`
+- Per-run logs: `.pi/harness/corpus/graphify-kb-updater-state/logs/`
+- Scheduler logs: `~/.local/state/ultimate-pi/graphify-kb-updater.log` and `.err`
+Each run reports:
+- `last_run_at`
+- `candidate_count`, `promoted_count`, `blocked_count`, `skipped_count`, `duplicate_skips`, `failure_count`
+- `counts.by_kind`, `counts.by_source_type`, `counts.by_competitor_label`, `counts.allowlisted`
+- `stale_warnings`
+- `changed_existing_count` for same URL/path content changes
+- `graph.action`, `graph.exit_status`, and Graphify report path when refreshed
+- optional pilot metrics: `frontier_recall_proxy`, `promoted_precision_proxy`, `duplicate_noise_rate`, `graphify_success`
+Review these fields before enabling unattended mode and after every config change.
+## Troubleshooting
+- `missing_rights_access_approval`: add complete rights/access metadata.
+- `manual_approval_required`: set `approved: true` after source and rights review.
+- `duplicate_unchanged`: candidate was already promoted and content hash is unchanged.
+- `changed_existing_count > 0`: a stable URL/path changed content; review before relying on previous conclusions.
+- Graphify skipped: no promoted changes, `--skip-graph`, or no `--refresh-graph`.
+- Graphify failed: inspect `graph.stderr`, run `graphify update .` manually, and keep the scheduler disabled until fixed.
+- Scheduler did not run: check `systemctl --user status graphify-kb-updater.timer`, the env file path, and scheduler logs.
+- Overlap: lock path `%t/graphify-kb-updater.lock` or `/tmp/graphify-kb-updater.lock` prevents concurrent runs.
+## Disable
+```bash
+systemctl --user disable --now graphify-kb-updater.timer
+systemctl --user reset-failed graphify-kb-updater.service
+```
+Remove any cron line copied from `.pi/harness/corpus/cron.example`.
+## Rollback
+1. Disable systemd timer and remove cron line.
+2. Use registry/log promoted paths to remove or quarantine promoted files under `raw/graphify-kb-updates/`.
+3. Restore `.pi/harness/corpus/graphify-kb-updater-state/registry.json` from backup, or mark candidates rejected/quarantined.
+4. Revert implementation files if needed:
+```bash
+git checkout -- .pi/scripts/graphify-kb-updater.mjs .pi/harness/corpus/graphify-kb-updater.config.json .pi/harness/corpus/systemd/graphify-kb-updater.timer .pi/harness/corpus/cron.example test/graphify-kb-updater.test.mjs .pi/harness/docs/graphify-kb-updater-runbook.md
+```
+5. Regenerate Graphify from valid sources:
+```bash
+graphify update .
+```
+## Pilot gate before unattended mode
+Run at least one dry-run and one supervised apply. Record frontier recall proxy, promoted precision proxy, duplicate/noise rate, skipped reasons, stale warnings, and Graphify success from `--pilot-report`. Enable the timer only if promoted precision is acceptable and graph refresh succeeds.

package/.pi/harness/docs/practice-map.md ADDED Viewed

@@ -0,0 +1,110 @@
+# Harness practice map
+Source of truth linking harness phases to proven practices (graphify corpus), agents/scripts, spawn topology, and **agent translation** (ADR 0042). Orchestrators and agents should cite this doc when unsure why a lane exists.
+See also: [ADRs](adrs/README.md), [ADR 0040](adrs/0040-practice-grounded-orchestration.md), [ADR 0041](adrs/0041-intelligent-planning-reconnaissance.md), [ADR 0042](adrs/0042-agent-native-orchestration.md), [ADR 0043](adrs/0043-path-first-harness-tools.md), [ADR 0044](adrs/0044-harness-steer-loop.md), [`raw/modules/structured-planning.md`](../../../raw/modules/structured-planning.md).
+## Agent translation (human practice → agent design)
+| Human practice | Agent translation |
+|----------------|-------------------|
+| Meeting / chair | Parent as **scheduler + gate checker** only |
+| Fagan inspection rounds | **Schema-bound probes** + merge (`parallel_probes` profile) |
+| Two-pizza cap per batch | **Token/spawn budget** per phase (`harness-spawn-budget.ts`) |
+| RACI roles | **Disjoint prompt contexts**, not serial speakers |
+| WBS decomposition | **Lake-first `execution_plan`** (few outcomes, bundled context) |
+| Sprint / story points | **`executor_strategy` + lake `done_criteria`** |
+| Critical path | **`critical_path_lake_ids`** |
+| Replan on every failure | **Steer loop** (`implementation_gap`) vs **plan revise** (`plan_gap`) |
+| Tool payloads in chat | **Path-first** approve/submit/merge (ADR 0043) |
+## Team management rules (all `/harness-*` orchestrators)
+1. **Parallelism law** — Parallel `subagent` `tasks` only when outputs are independent inputs to a later merge (implementation ∥ stack research; inspector ∥ adversary in `parallel_probes`). Never parallelize decompose ∥ hypothesis.
+2. **Two-pizza cap per batch** — Max 2 research lanes, max 1 optional `planning-context` subagent, max 1 executor, max 1 debate lane agent per `subagent` call (plan-verify may use 2 probes + integrator in separate batches).
+3. **No redundant thinkers** — If artifact X exists, downstream agents read it; they do not re-derive (e.g. decompose after `planning-context.yaml`).
+4. **Sequential dependency chain** — planning context → problem framing / decompose → hypothesis → research → synthesis/author → DAG → plan-verify → approve → execute → review → (steer)* → policy.
+5. **Plan-verify (agent-native)** — For `fast`/`standard`, parallel probes then integrator; parent is chair, not participant. Threaded debate remains for `full` until parity.
+6. **Tool intelligence** — Parent chooses graphify, sg, ccc; subprocesses optional. **Path-first:** disk is source of truth; tool args are pointers (ADR 0043).
+## `/harness-plan` — Planning Process Group
+| Phase | Practice | Agent translation | Actor | Spawn |
+|-------|----------|---------------------|-------|-------|
+| 0 | Tooling / fast feedback | Pre-index once | Parent + `ccc` | Automatic |
+| 1 | Reconnaissance before WBS | **ContextPack** on disk | Parent tools or optional `planning-context` | No default subprocess |
+| 2a | Problem framing / lakes | Lake outcomes, not ticket tree | `decompose` or synthesizer section | Sequential after context gate |
+| 2b | Hypothesis-driven approach | Falsifiable claim grounded in framing | `hypothesis` or synthesizer | After `artifacts/decomposition.yaml` |
+| 3.5 | Spike / external research | Paths in research brief | Researchers optional | Artifacts required |
+| 4 | Fork resolution (batched) | One `ask_user` gate | Parent | After 3.5 |
+| 4b | Lake-first execution plan | `executor_strategy`, context bundles | `plan-synthesizer` (low/med) or `execution-plan-author` (high) | Single agent |
+| 4c | Deterministic quality gate | Script, not LLM | `validate-plan-dag.mjs` | Parent; hard stop |
+| 4d | Tailor process to risk | Probe depth, not meeting count | `harness_plan_debate_eligibility` | Pre plan-verify |
+| 4e | Architectural intent | Fitness-function spec | `harness/sentrux-steward` optional | When structural risk |
+| 5 | Plan-verify (Review Gate) | Parallel probes + integrator | Debate cast / probes | `parallel_probes` or threaded |
+| 6 | Baseline + approve | Path-only `approve_plan` | Parent | `approve_plan`, `create_plan` |
+### Review Gate — debate RACI (threaded / full profile)
+| Agent | Inspection role | Practice | When |
+|-------|-----------------|----------|------|
+| `hypothesis-validator` | Blind verifier | Independent verification (ADR 0034) | Round 1 / fast path |
+| `plan-evaluator` | Inspector | Neutral checklist | Every required focus |
+| `plan-adversary` | Red team | Adversarial review | Every required focus |
+| `sprint-contract-auditor` | DoD auditor | Sprint contract | `quality` focus |
+| `review-integrator` | Recorder | Single round artifact | End of round |
+| Parent | Chair | Gates only | Always |
+### Plan-verify profiles
+| Profile | When | Team shape |
+|---------|------|------------|
+| `full` | High risk, material fork | Threaded: all four focuses |
+| `standard` | Default med | `parallel_probes`: inspector ∥ adversary → integrator |
+| `light` | Low risk | Threaded: `spec` + `quality` |
+| `fast` | Med/low, clear stack | Consolidated verify + blind hypothesis-validator |
+## `/harness-run` — Executing Process Group
+| Step | Practice | Agent translation | Actor |
+|------|----------|-------------------|-------|
+| Gate | Change control | `plan_ready` required | Parent |
+| Pre-work | Fitness baseline | `sentrux gate --save` | Parent |
+| Work | Single implementer | `executor_strategy` | `harness/executor` |
+| Post-work | Observation | `sentrux check` / signal artifact | Parent |
+| Handoff | Generator–evaluator | `submit_executor_handoff` | Executor |
+| Next | Always verify | **`/harness-review`** (not replan on blocked) | Parent routing |
+## `/harness-review` — Monitoring and Controlling
+| Phase | Practice | Agent translation | Actor |
+|-------|----------|-------------------|-------|
+| 1 | Automated QC + fitness | Deterministic first | Parent scripts |
+| 2 | Measure vs plan | Benchmark on disk | `evaluator` benchmark |
+| 3 | Policy audit | Verdict (no fail-fast skip) | `evaluator` verdict |
+| 4 | Red team | Tiered: full attempt 1, lite 2+ steer | `adversary` |
+| 5 | Outcome + repair brief | Machine routing | Parent + `review-outcome.yaml`, `repair-brief.yaml` |
+| 6 | Steer gate | One `ask_user` | harness-decisions |
+| 7 | Steer / revise | `implementation_gap` → `/harness-steer`; `plan_gap` → plan revise | ADR 0044 |
+`--quick` = deterministic + benchmark + verdict (no adversary). Steer attempts 2+ default to lite review unless `block_merge`.
+## `/harness-steer` — Repair sub-cycle (ADR 0044)
+| Step | Practice | Actor |
+|------|----------|-------|
+| 0 | Read review + repair briefs | Parent |
+| 1 | Policy phase → `execute` | Parent |
+| 2 | Repair scope | `harness/executor` `mode: repair` |
+| 3 | Re-verify | `/harness-review` |
+## Anti-patterns
+- **Do not** spawn `decompose` and `hypothesis` in the same parallel `tasks` batch.
+- **Do not** run `graphify query` in `decompose` when planning-context coverage is ok (ADR 0041).
+- **Do not** parallelize threaded debate lanes in one batch (except `parallel_probes` inspector ∥ adversary per ADR 0042).
+- **Do not** let executor or parent self-certify.
+- **Do not** stop review on benchmark fail — complete verdict and route via steer (ADR 0044).
+- **Do not** tell user to run `/harness-plan "<new task>"` on test failure — use `/harness-steer` with `repair-brief.yaml`.
+- **Do not** re-`approve_plan` every steer attempt — only when packet changes.
+- **Do not** embed full plan packets in `approve_plan` / `submit_*` tool args (ADR 0043).

package/.pi/harness/env.harness.template CHANGED Viewed

@@ -21,6 +21,8 @@ HARNESS_WEB_SEARCH_ENGINE=ddg_html
 # --- PostHog (optional) ---
 # Project key — required for harness_* telemetry when HARNESS_TELEMETRY_ENABLED=true
+# WSL2: ultimate-pi loads 00-posthog-network-bootstrap.ts (IPv4 fetch for *.posthog.com).
+# If flush still fails, set POSTHOG_ENABLED=false or fix outbound HTTPS to PostHog.
 # POSTHOG_API_KEY=
 # POSTHOG_HOST=https://us.i.posthog.com
 # POSTHOG_ENABLED=true
@@ -39,6 +41,6 @@ HARNESS_WEB_SEARCH_ENGINE=ddg_html
 # --- Wiki / Obsidian vault (optional) ---
 VAULT_WIKI_PATH=vault/wiki
-# --- Sentrux gate (optional) ---
-# Require Sentrux stub for harness-verify (see .pi/scripts/harness-verify.mjs)
-# HARNESS_SENTRUX_REQUIRED=true
+# --- Sentrux fitness functions ---
+# Require sentrux check + run signal (or CI stub) in harness-verify
+HARNESS_SENTRUX_REQUIRED=true

package/.pi/harness/evals/smoke/sentrux-stub.json CHANGED Viewed

@@ -2,5 +2,5 @@
 	"schema_version": "1.0.0",
 	"signal_type": "stub",
 	"score": 0.5,
-	"note": "Placeholder until Sentrux MCP is wired. Satisfies HARNESS_SENTRUX_REQUIRED gate in harness:verify."
+	"note": "Fallback when HARNESS_RUN_DIR/artifacts/sentrux-signal.yaml is absent. Prefer run signal from /harness-run (ADR 0006)."
 }

package/.pi/harness/evals/smoke/smoke-harness-plan.mjs CHANGED Viewed

@@ -26,13 +26,16 @@ async function scanFocusCoverage(fixtureRoot, requiredFocus) {
 	let last_round_index = 0;
 	const { readdir } = await import("node:fs/promises");
 	const files = (await readdir(art)).filter((f) =>
-		/^review-round(?:-r\d+|-consolidated)\.yaml$/i.test(f),
+		/^review-round(?:-r\d+|-consolidated|-parallel-probes)\.yaml$/i.test(f),
 	);
 	for (const name of files.sort()) {
 		const consolidated = /^review-round-consolidated\.yaml$/i.test(name);
+		const parallelProbes = /^review-round-parallel-probes\.yaml$/i.test(name);
 		const m = consolidated
 			? ["review-round-consolidated.yaml", "1"]
-			: /^review-round-r(\d+)\.yaml$/i.exec(name);
+			: parallelProbes
+				? ["review-round-parallel-probes.yaml", "1"]
+				: /^review-round-r(\d+)\.yaml$/i.exec(name);
 		if (!m) continue;
 		const roundIndex = consolidated ? 1 : Number(m[1]);
 		if (roundIndex > last_round_index) last_round_index = roundIndex;

package/.pi/harness/specs/README.md CHANGED Viewed

@@ -13,7 +13,7 @@ This directory is the canonical contract surface for Phase 1 harness artifacts.
 These schemas define the minimum machine-readable contracts for:
-- planning (`PlanPacket`, `PlanDecompositionBrief`, `PlanHypothesisBrief`, `PlanHypothesisEval`, `PlanAdversaryBrief`)
+- planning (`PlanPacket`, `PlanPlanningContext`, `PlanDecompositionBrief`, `PlanHypothesisBrief`, `PlanHypothesisEval`, `PlanAdversaryBrief`, legacy `PlanScoutFindings`)
 - execution telemetry (`RunTrace`, `HarnessRunRecord`)
 - PostHog harness events (`HarnessPostHogEvent`)
 - observation bus (`HarnessObservation`)

package/.pi/harness/specs/harness-run-context.schema.json CHANGED Viewed

@@ -75,6 +75,17 @@
 		},
 		"turn_override_run_id": {
 			"type": ["string", "null"]
+		},
+		"steer_approved": {
+			"type": "boolean"
+		},
+		"steer_attempt": {
+			"type": "integer",
+			"minimum": 0
+		},
+		"steer_max_attempts": {
+			"type": "integer",
+			"minimum": 1
 		}
 	}
 }

package/.pi/harness/specs/harness-spawn-context.schema.json CHANGED Viewed

@@ -23,6 +23,7 @@
 				"revise",
 				"plan_review",
 				"execute",
+				"repair",
 				"benchmark",
 				"verdict",
 				"adversary",
@@ -61,6 +62,19 @@
 		"handoff_summary": {
 			"type": "string",
 			"description": "Prior phase bullet summary for chained spawns (harness-auto)"
+		},
+		"critical_path_work_item_ids": {
+			"type": "array",
+			"items": { "type": "string", "minLength": 1 },
+			"description": "Limiting-step work item ids from execution_plan.schedule_metadata (execute phase)"
+		},
+		"repair_brief_path": {
+			"type": "string",
+			"description": "Path to repair-brief.yaml (steer / repair mode)"
+		},
+		"executor_strategy": {
+			"type": "string",
+			"enum": ["single_pass", "per_lake", "per_work_item"]
 		}
 	}
 }

package/.pi/harness/specs/plan-execution-plan.schema.json CHANGED Viewed

@@ -36,9 +36,41 @@
 			"items": { "$ref": "#/$defs/risk" }
 		},
 		"schedule_metadata": { "$ref": "#/$defs/schedule_metadata" },
-		"dag_validation": { "$ref": "#/$defs/dag_validation" }
+		"dag_validation": { "$ref": "#/$defs/dag_validation" },
+		"lakes": {
+			"type": "array",
+			"items": { "$ref": "#/$defs/lake" }
+		},
+		"executor_strategy": {
+			"type": "string",
+			"enum": ["single_pass", "per_lake", "per_work_item"]
+		},
+		"critical_path_lake_ids": {
+			"type": "array",
+			"items": { "type": "string", "minLength": 1 }
+		}
 	},
 	"$defs": {
+		"lake": {
+			"type": "object",
+			"additionalProperties": false,
+			"required": ["lake_id", "title", "outcome", "done_criteria"],
+			"properties": {
+				"lake_id": { "type": "string", "minLength": 1 },
+				"title": { "type": "string", "minLength": 1 },
+				"outcome": { "type": "string", "minLength": 1 },
+				"done_criteria": { "type": "string", "minLength": 1 },
+				"context_bundle_path": { "type": "string", "minLength": 1 },
+				"files": {
+					"type": "array",
+					"items": { "type": "string", "minLength": 1 }
+				},
+				"out_of_scope": {
+					"type": "array",
+					"items": { "type": "string", "minLength": 1 }
+				}
+			}
+		},
 		"phase": {
 			"type": "object",
 			"additionalProperties": false,
@@ -107,6 +139,12 @@
 					"type": "array",
 					"minItems": 1,
 					"items": { "type": "string", "minLength": 1 }
+				},
+				"lake_id": { "type": "string", "minLength": 1 },
+				"context_bundle_path": { "type": "string", "minLength": 1 },
+				"context_refs": {
+					"type": "array",
+					"items": { "type": "string", "minLength": 1 }
 				}
 			}
 		},

package/.pi/harness/specs/plan-packet.schema.json CHANGED Viewed

@@ -94,6 +94,10 @@
 		},
 		"execution_plan": {
 			"$ref": "plan-execution-plan.schema.json"
+		},
+		"executor_strategy": {
+			"type": "string",
+			"enum": ["single_pass", "per_lake", "per_work_item"]
 		}
 	}
 }

package/.pi/harness/specs/plan-phase-status.schema.json ADDED Viewed

@@ -0,0 +1,17 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://ultimate-pi.local/.pi/harness/specs/plan-phase-status.schema.json",
+	"title": "PlanPhaseStatus",
+	"description": "Orchestrator-recorded plan phase outcome before baseline approval.",
+	"type": "object",
+	"additionalProperties": false,
+	"required": ["schema_version", "plan_status"],
+	"properties": {
+		"schema_version": { "type": "string", "const": "1.0.0" },
+		"plan_status": {
+			"type": "string",
+			"enum": ["ready", "partial", "needs_clarification"]
+		},
+		"notes": { "type": "string" }
+	}
+}

package/.pi/harness/specs/plan-phase-waiver.schema.json ADDED Viewed

@@ -0,0 +1,25 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://ultimate-pi.local/.pi/harness/specs/plan-phase-waiver.schema.json",
+	"title": "PlanPhaseWaiver",
+	"description": "Explicit human waiver for partial scouts or plan-phase blockers before approve_plan.",
+	"type": "object",
+	"additionalProperties": false,
+	"required": ["schema_version", "waived"],
+	"properties": {
+		"schema_version": { "type": "string", "const": "1.0.0" },
+		"waived": {
+			"type": "array",
+			"minItems": 1,
+			"items": {
+				"type": "object",
+				"additionalProperties": false,
+				"required": ["reason", "rationale"],
+				"properties": {
+					"reason": { "type": "string", "minLength": 1 },
+					"rationale": { "type": "string", "minLength": 1 }
+				}
+			}
+		}
+	}
+}

package/.pi/harness/specs/plan-planning-context.schema.json ADDED Viewed

@@ -0,0 +1,50 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://ultimate-pi.local/.pi/harness/specs/plan-planning-context.schema.json",
+	"title": "PlanPlanningContext",
+	"type": "object",
+	"additionalProperties": true,
+	"required": ["schema_version", "status", "summary", "coverage"],
+	"properties": {
+		"schema_version": { "type": "string", "const": "1.0.0" },
+		"status": {
+			"type": "string",
+			"enum": ["ok", "partial", "failed", "error"]
+		},
+		"task_ref": { "type": "string" },
+		"summary": { "type": "string", "minLength": 1 },
+		"coverage": {
+			"type": "object",
+			"required": ["architecture", "structure"],
+			"properties": {
+				"architecture": { "$ref": "#/$defs/coverageLane" },
+				"structure": { "$ref": "#/$defs/coverageLane" },
+				"semantic": { "$ref": "#/$defs/coverageLane" }
+			},
+			"additionalProperties": true
+		},
+		"findings": { "type": "array" },
+		"key_paths": { "type": "array", "items": { "type": "string" } },
+		"evidence_refs": { "type": "array" },
+		"open_questions": { "type": "array" }
+	},
+	"$defs": {
+		"coverageLane": {
+			"type": "object",
+			"required": ["status"],
+			"properties": {
+				"status": {
+					"type": "string",
+					"enum": ["ok", "partial", "skipped", "failed", "error"]
+				},
+				"tools_used": {
+					"type": "array",
+					"items": { "type": "string" }
+				},
+				"summary": { "type": "string" },
+				"key_paths": { "type": "array", "items": { "type": "string" } }
+			},
+			"additionalProperties": true
+		}
+	}
+}

package/.pi/harness/specs/repair-brief.schema.json ADDED Viewed

@@ -0,0 +1,45 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://ultimate-pi.local/.pi/harness/specs/repair-brief.schema.json",
+	"title": "RepairBrief",
+	"type": "object",
+	"additionalProperties": false,
+	"required": [
+		"schema_version",
+		"run_id",
+		"steer_attempt",
+		"remediation_class",
+		"source_artifacts",
+		"fix_directives"
+	],
+	"properties": {
+		"schema_version": { "type": "string", "const": "1.0.0" },
+		"run_id": { "type": "string", "minLength": 1 },
+		"steer_attempt": { "type": "integer", "minimum": 1 },
+		"remediation_class": {
+			"type": "string",
+			"enum": ["implementation_gap", "plan_gap", "rollback", "inconclusive"]
+		},
+		"source_artifacts": {
+			"type": "object",
+			"additionalProperties": { "type": "string" }
+		},
+		"failed_acceptance_check_ids": {
+			"type": "array",
+			"items": { "type": "string", "minLength": 1 }
+		},
+		"priority_lake_ids": {
+			"type": "array",
+			"items": { "type": "string", "minLength": 1 }
+		},
+		"fix_directives": {
+			"type": "array",
+			"minItems": 1,
+			"items": { "type": "string", "minLength": 1 }
+		},
+		"constraints": {
+			"type": "array",
+			"items": { "type": "string", "minLength": 1 }
+		}
+	}
+}

package/.pi/harness/specs/review-outcome.schema.json ADDED Viewed

@@ -0,0 +1,46 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://ultimate-pi.local/.pi/harness/specs/review-outcome.schema.json",
+	"title": "ReviewOutcome",
+	"type": "object",
+	"additionalProperties": false,
+	"required": [
+		"schema_version",
+		"run_id",
+		"status",
+		"remediation_class",
+		"recommended_next"
+	],
+	"properties": {
+		"schema_version": { "type": "string", "const": "1.0.0" },
+		"run_id": { "type": "string", "minLength": 1 },
+		"status": {
+			"type": "string",
+			"enum": ["pass", "fail", "inconclusive"]
+		},
+		"remediation_class": {
+			"type": "string",
+			"enum": [
+				"pass",
+				"implementation_gap",
+				"plan_gap",
+				"rollback",
+				"inconclusive"
+			]
+		},
+		"recommended_next": { "type": "string", "minLength": 1 },
+		"failed_acceptance_check_ids": {
+			"type": "array",
+			"items": { "type": "string", "minLength": 1 }
+		},
+		"steer_attempt": { "type": "integer", "minimum": 0 },
+		"review_tier": {
+			"type": "string",
+			"enum": ["full", "lite"]
+		},
+		"source_artifacts": {
+			"type": "object",
+			"additionalProperties": { "type": "string" }
+		}
+	}
+}