npm - ultimate-pi - Versions diffs - 0.16.0 → 0.18.0 - Mend

ultimate-pi 0.16.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

package/.pi/harness/agents.manifest.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
 	"schema_version": "1.0.0",
 	"package": "ultimate-pi",
-	"package_version": "0.15.0",
-	"generated_at": "2026-05-19T12:56:13.369Z",
+	"package_version": "0.17.0",
+	"generated_at": "2026-05-23T10:14:51.637Z",
 	"agents": {
 		"pi-pi/agent-expert": {
 			"path": ".pi/agents/pi-pi/agent-expert.md",
@@ -46,15 +46,15 @@
 		},
 		"harness/adversary": {
 			"path": ".pi/agents/harness/adversary.md",
-			"sha256": "560c7571ab91478bde1271e9ae6c3a112c3e1d28e1a261c5450fd1d00f9f89af"
+			"sha256": "697ee7c784e8eb30ce96f4f16e9bb5f9cdcaae76a4a7083ace2fe4272e6d732f"
 		},
 		"harness/evaluator": {
 			"path": ".pi/agents/harness/evaluator.md",
-			"sha256": "a4667d3efb305ba2fe79118e3d7d2b0de5e0369637af040d1238161d75cd28ac"
+			"sha256": "587ae14d6e91fd8af2b2842f568b9a1fa0b1d84fa6e18b4bc21c0ba2a9e62218"
 		},
 		"harness/executor": {
 			"path": ".pi/agents/harness/executor.md",
-			"sha256": "6baffcc3d89954494ce3ae439175686a39928b6a543a0a451da27475094b1712"
+			"sha256": "e222a5c54c74329cdcfa92918d9191fa603d8945b81ca94484db258cda012783"
 		},
 		"harness/incident-recorder": {
 			"path": ".pi/agents/harness/incident-recorder.md",
@@ -68,6 +68,10 @@
 			"path": ".pi/agents/harness/sentrux-bootstrap.md",
 			"sha256": "3a0b43b94386a7c541b8a806a37524a5e53f1c8049270db7a420680df5799eeb"
 		},
+		"harness/sentrux-steward": {
+			"path": ".pi/agents/harness/sentrux-steward.md",
+			"sha256": "0e63175d817adc0d65876f5c24fb54e4882081caf939ff9c658afee51fc6889c"
+		},
 		"harness/tie-breaker": {
 			"path": ".pi/agents/harness/tie-breaker.md",
 			"sha256": "1c54c1c3274291dea1ea8826563a7ad4fe1d9c4302984e907bfcd22cfc4f5eba"
@@ -78,7 +82,7 @@
 		},
 		"harness/planning/decompose": {
 			"path": ".pi/agents/harness/planning/decompose.md",
-			"sha256": "0919dafa1d1cd008d513c28524c1e7218867586a138982dccf01db5270c42c73"
+			"sha256": "c9dd890d45cf4548e28d03aedb86d5fc4ed81022e920ad0005faf404994c6e96"
 		},
 		"harness/planning/execution-plan-author": {
 			"path": ".pi/agents/harness/planning/execution-plan-author.md",
@@ -86,43 +90,47 @@
 		},
 		"harness/planning/hypothesis-validator": {
 			"path": ".pi/agents/harness/planning/hypothesis-validator.md",
-			"sha256": "36f0baa7796229f21bd02faf5e70402c7bf054289eab557a25bfbe3cb7781de7"
+			"sha256": "20411e5d734b14b05ae11153133089e044f46784e5b4741712f608665bbf4376"
 		},
 		"harness/planning/hypothesis": {
 			"path": ".pi/agents/harness/planning/hypothesis.md",
-			"sha256": "e83d5c4faaee8d32af4a5f22c9917b70a173f3e22d7c0f182b361706f2309171"
+			"sha256": "bbb91ac0de39c9de4bf388f0cf926151b6b6a7771d2a0d01d1009a1860daef77"
 		},
 		"harness/planning/implementation-researcher": {
 			"path": ".pi/agents/harness/planning/implementation-researcher.md",
-			"sha256": "653f320b5d51bb331774246687f24a75347b406bba4e6dfd2968d6e5d4cc8bb3"
+			"sha256": "d1bbaaf1e67ad98350319f973062f01a25ca70874c99cb335c99bec866da1f6d"
 		},
 		"harness/planning/plan-adversary": {
 			"path": ".pi/agents/harness/planning/plan-adversary.md",
-			"sha256": "3241d7ec939dc29e0af64690b99e9f74b209f40b0daa4a2a1f9ff86f99f94a8d"
+			"sha256": "d9a953c0f8f900dc9a95816ada401955dafade7bf5907406cbe3bf3ba760c469"
 		},
 		"harness/planning/plan-evaluator": {
 			"path": ".pi/agents/harness/planning/plan-evaluator.md",
-			"sha256": "71660ab58bfcfdfae56c873140d4ea5946ae30cd5719c96afeabfd02b1d1f81d"
+			"sha256": "825f296c487d6aeacad5d320e155a3f23d0db6dea822fccc99a1305941a43da2"
+		},
+		"harness/planning/planning-context": {
+			"path": ".pi/agents/harness/planning/planning-context.md",
+			"sha256": "96a51d1f2daafc9eaa8869a06ede9d04fc9e19076d58a81041e346e4c81c8b08"
 		},
 		"harness/planning/review-integrator": {
 			"path": ".pi/agents/harness/planning/review-integrator.md",
-			"sha256": "cf3f0dbe81274ec9ef0ff2e0c170e8dc929b20be65492d0ee9a80d985acf6d71"
+			"sha256": "bba385463ca8833654cd0dc80f666344332293fe86d7420d2c36755a3f9e743a"
 		},
 		"harness/planning/scout-graphify": {
 			"path": ".pi/agents/harness/planning/scout-graphify.md",
-			"sha256": "6e2bda8ad38311810c9916d9dab311873bc776e4b8832bb0e574136e45e1255e"
+			"sha256": "edc117245476859d3bea93d6e1247cf9f580719bb3aabb91d885cc196c102f68"
 		},
 		"harness/planning/scout-semantic": {
 			"path": ".pi/agents/harness/planning/scout-semantic.md",
-			"sha256": "416e518d8204a55b26dc53da1f750865c6f09ee2c7f343b41e7c08da3230c089"
+			"sha256": "060ad9251068c68cc20418a45a5a5747b708895b946c8153d9e5034b28c59ad5"
 		},
 		"harness/planning/scout-structure": {
 			"path": ".pi/agents/harness/planning/scout-structure.md",
-			"sha256": "76c42a15cc74cf1de2cf861cb0146c865c205f69cce7b9605d41893b19600029"
+			"sha256": "111d055b82f0e1dde4cddc61d53474d8ad650dba2fd988061fd40fa638ed8bc7"
 		},
 		"harness/planning/sprint-contract-auditor": {
 			"path": ".pi/agents/harness/planning/sprint-contract-auditor.md",
-			"sha256": "12cb5e6b53dcc19ace62e8e4c152d96440717df53a182e76216dd2327410df4d"
+			"sha256": "2321298529f70d03798d23346231c4c43ad4b7490a43f291430ca65b3ef93757"
 		},
 		"harness/planning/stack-researcher": {
 			"path": ".pi/agents/harness/planning/stack-researcher.md",

package/.pi/harness/corpus/cron.example ADDED Viewed

@@ -0,0 +1,8 @@
+# Cron alternative (systemd timer is the tested path). Bounded, locked, explicit env, no overlap.
+# Edit UP_ROOT before installing with `crontab -e`.
+SHELL=/bin/sh
+PATH=/usr/local/bin:/usr/bin:/bin
+UP_ROOT=/home/USER/ai-projects/ultimate-pi
+HARNESS_GRAPHIFY_KB_LOG=/home/USER/.local/state/ultimate-pi/graphify-kb-updater.log
+30 8 * * * cd "$UP_ROOT" && /usr/bin/flock -n /tmp/graphify-kb-updater.lock /usr/bin/timeout 45m /usr/bin/env node .pi/scripts/graphify-kb-updater.mjs --apply --refresh-graph --pilot-report --max-promotions 25 >> "$HARNESS_GRAPHIFY_KB_LOG" 2>&1

package/.pi/harness/corpus/graphify-kb-updater.config.json ADDED Viewed

@@ -0,0 +1,159 @@
+{
+	"schema_version": "1.1.0",
+	"policy": "hybrid-allowlist-auto-promotion-with-conservative-staging",
+	"auto_promote_allowlist": true,
+	"source_taxonomy": {
+		"article": {
+			"category": "public_article_or_engineering_blog",
+			"risk_class": "low_to_medium",
+			"default_policy": "allowlist_auto_promote_when_approved"
+		},
+		"paper": {
+			"category": "research_paper_or_abstract_feed",
+			"risk_class": "medium",
+			"default_policy": "stage_until_rights_review"
+		},
+		"book": {
+			"category": "book_or_longform_local_file",
+			"risk_class": "high",
+			"default_policy": "manual_approval_required"
+		},
+		"transcript": {
+			"category": "youtube_or_audio_transcript",
+			"risk_class": "high",
+			"default_policy": "manual_approval_required"
+		},
+		"youtube": {
+			"category": "youtube_candidate_or_video_reference",
+			"risk_class": "high",
+			"default_policy": "stage_metadata_only_until_approved"
+		}
+	},
+	"competitor_taxonomy": {
+		"ai_coding_agents": {
+			"description": "Coding-agent products, CLIs, IDE agents, and model-native coding surfaces.",
+			"keywords": [
+				"claude code",
+				"cursor",
+				"codex",
+				"aider",
+				"copilot",
+				"windsurf",
+				"zed",
+				"replit",
+				"devin"
+			]
+		},
+		"agentic_harnesses": {
+			"description": "Harnesses, orchestration frameworks, eval loops, task runners, and review gates.",
+			"keywords": [
+				"harness",
+				"orchestration",
+				"agent bus",
+				"eval",
+				"review gate",
+				"multi-agent",
+				"workflow"
+			]
+		},
+		"context_engineering": {
+			"description": "Context retrieval, compaction, memory, skills, MCP, and codebase indexing.",
+			"keywords": [
+				"context engineering",
+				"mcp",
+				"memory",
+				"retrieval",
+				"compaction",
+				"skills",
+				"knowledge graph"
+			]
+		}
+	},
+	"allowlist": [
+		{
+			"domain": "openai.com",
+			"approved": true,
+			"approved_by": "repo-policy",
+			"approved_at": "2026-05-23",
+			"allowed_source_classes": ["article"]
+		},
+		{
+			"domain": "anthropic.com",
+			"approved": true,
+			"approved_by": "repo-policy",
+			"approved_at": "2026-05-23",
+			"allowed_source_classes": ["article"]
+		},
+		{
+			"domain": "github.blog",
+			"approved": true,
+			"approved_by": "repo-policy",
+			"approved_at": "2026-05-23",
+			"allowed_source_classes": ["article"]
+		},
+		{
+			"domain": "martinfowler.com",
+			"approved": true,
+			"approved_by": "repo-policy",
+			"approved_at": "2026-05-23",
+			"allowed_source_classes": ["article"]
+		},
+		{
+			"domain": "addyosmani.com",
+			"approved": true,
+			"approved_by": "repo-policy",
+			"approved_at": "2026-05-23",
+			"allowed_source_classes": ["article"]
+		},
+		{
+			"domain": "arxiv.org",
+			"approved": false,
+			"approved_by": "manual-review-required",
+			"approved_at": "manual-review-required",
+			"allowed_source_classes": ["paper"]
+		}
+	],
+	"article_queries": [
+		"agentic engineering harness engineering AI coding agents",
+		"AI coding harness evaluation orchestration context engineering"
+	],
+	"paper_feeds": [
+		{
+			"title": "arXiv software engineering agents search feed",
+			"url": "https://arxiv.org/search/cs?query=agentic+software+engineering&searchtype=all",
+			"rights_access": {
+				"license": "source-specific",
+				"access": "public abstract/feed only; paper text requires review",
+				"approved_by": "manual-review-required",
+				"approved_at": "manual-review-required"
+			},
+			"provenance": {
+				"origin": "curated_search_feed",
+				"locator": "https://arxiv.org/search/cs?query=agentic+software+engineering&searchtype=all",
+				"notes": "Feed metadata only; paper body requires approval."
+			}
+		}
+	],
+	"local_books": [
+		{
+			"path": "data/books",
+			"max_files": 75
+		}
+	],
+	"local_transcripts": [
+		{
+			"path": "data/youtube-transcripts",
+			"max_files": 100
+		}
+	],
+	"youtube_candidates": [
+		{
+			"title": "Review queue placeholder for agentic engineering YouTube talks",
+			"url": "https://www.youtube.com/results?search_query=agentic+engineering+harness+engineering",
+			"rights_access": null,
+			"approved": false,
+			"competitor_labels": ["agentic_harnesses"]
+		}
+	],
+	"review_queue": []
+}

package/.pi/harness/corpus/systemd/graphify-kb-updater.env.template ADDED Viewed

@@ -0,0 +1,4 @@
+# Copy to ~/.config/ultimate-pi/graphify-kb-updater.env and edit paths.
+UP_ROOT=/home/USER/ai-projects/ultimate-pi
+NODE_ENV=production
+GRAPHIFY_KB_ARGS=--apply --refresh-graph --pilot-report --max-promotions 25

package/.pi/harness/corpus/systemd/graphify-kb-updater.service ADDED Viewed

@@ -0,0 +1,17 @@
+[Unit]
+Description=Ultimate Pi Graphify knowledge-base updater
+Documentation=file:%h/ai-projects/ultimate-pi/.pi/harness/docs/graphify-kb-updater-runbook.md
+After=network-online.target
+Wants=network-online.target
+[Service]
+Type=oneshot
+EnvironmentFile=%h/.config/ultimate-pi/graphify-kb-updater.env
+WorkingDirectory=${UP_ROOT}
+ExecStart=/usr/bin/flock -n %t/graphify-kb-updater.lock /usr/bin/timeout 45m /usr/bin/env node .pi/scripts/graphify-kb-updater.mjs ${GRAPHIFY_KB_ARGS}
+StandardOutput=append:%h/.local/state/ultimate-pi/graphify-kb-updater.log
+StandardError=append:%h/.local/state/ultimate-pi/graphify-kb-updater.err
+TimeoutStartSec=50m
+Nice=10
+IOSchedulingClass=best-effort
+IOSchedulingPriority=7

package/.pi/harness/corpus/systemd/graphify-kb-updater.timer ADDED Viewed

@@ -0,0 +1,11 @@
+[Unit]
+Description=Run Ultimate Pi Graphify knowledge-base updater daily on a bounded schedule
+[Timer]
+OnCalendar=*-*-* 08:30:00
+RandomizedDelaySec=30m
+Persistent=true
+Unit=graphify-kb-updater.service
+[Install]
+WantedBy=timers.target

package/.pi/harness/docs/adrs/0001-harness-constitution.md CHANGED Viewed

@@ -13,7 +13,8 @@ ultimate-pi needs a stable governance model for agentic runs: plan-before-mutate
 2. Phases are `plan → execute → evaluate → adversary → merge` with policy-gate as the source of truth.
 3. Local JSONL under `.pi/harness/runs/` is the **source of truth** for run history; PostHog is for team dashboards.
 4. Context for harness paths uses **context-mode only** — never lean-ctx in harness skills or extensions.
-5. `@posthog/pi` remains the LLM analytics layer; harness domain events use `harness-telemetry.ts`.
+5. Context-mode execute tools (`ctx_execute`, `ctx_batch_execute`, `ctx_execute_file`) are subject to the same phase matrix as `bash`/`write` via policy-gate.
+6. `@posthog/pi` remains the LLM analytics layer; harness domain events use `harness-telemetry.ts`.
 ## Consequences

package/.pi/harness/docs/adrs/0006-sentrux-dual-layer.md CHANGED Viewed

@@ -5,15 +5,16 @@
 ## Context
-Evaluator trust requires both programmatic gates (policy, budget, integrity) and external observation signals (Sentrux MCP).
+Evaluator trust requires both programmatic gates (policy, budget, integrity) and **measured structural actuals** from the Sentrux CLI (Pi sessions use CLI only — no Sentrux MCP in harness).
 ## Decision
 1. **Rules file:** `.sentrux/rules.toml` synced from manifest — see [ADR 0009](0009-sentrux-rules-lifecycle.md).
-2. **CLI gate:** `node "$UP_PKG/.pi/scripts/harness-verify.mjs"` fails if `HARNESS_SENTRUX_REQUIRED=true` and no `harness-sentrux-signal` stub/file exists for the run (placeholder until MCP wired). Resolve `$UP_PKG` via [.pi/scripts/README.md](../../../scripts/README.md).
-3. **MCP layer (Q2+):** Evaluator sessions must record at least one Sentrux observation before `harness_eval_verdict` promotion when Sentrux is enabled.
-4. Observations flow through `observation-bus.ts` as `HarnessObservation` envelopes.
-5. PostHog event: `harness_sentrux_signal` with `signal_type` and `score` only — no secrets.
+2. **Run observation:** `/harness-run` writes `artifacts/sentrux-signal.yaml` and appends session custom entry `harness-sentrux-signal` after `sentrux check` + `sentrux gate` (baseline from `sentrux gate --save` before execute).
+3. **Verify gate:** `harness-verify.mjs` with `HARNESS_SENTRUX_REQUIRED=true` prefers `$HARNESS_RUN_DIR/artifacts/sentrux-signal.yaml`; falls back to `.pi/harness/evals/smoke/sentrux-stub.json` only when no run signal exists (CI smoke / pre-run verify).
+4. **Evaluator:** `harness/evaluator` in `benchmark` mode reads `sentrux-signal.yaml` and `benchmark-log.yaml` — metrics are inputs, not executor optimization targets.
+5. Observations flow through `observation-bus.ts` as `HarnessObservation` envelopes when wired.
+6. PostHog event: `harness_sentrux_signal` with `signal_type` and `score` only — no secrets.
 ## Consequences
@@ -23,7 +24,7 @@ Evaluator trust requires both programmatic gates (policy, budget, integrity) and
 ### Negative
-- Full MCP integration remains follow-up when Sentrux server is available.
+- Teams must run `/harness-run` (or write `sentrux-signal.yaml`) before promotion verify when stub fallback is insufficient.
 ## References

package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md CHANGED Viewed

@@ -20,7 +20,10 @@ Sentrux enforces architecture via [`.sentrux/rules.toml`](https://sentrux.dev/do
    - On `agent_end` when harness phase is `plan` or `merge`
    - `node "$UP_PKG/.pi/scripts/harness-verify.mjs"` fails if manifest hash ≠ last sync (`--check`)
 7. **Custom rules:** TOML outside the managed block is preserved on sync.
-8. **Skill:** `harness-sentrux-setup` documents bootstrap vs `--force`.
+8. **Skill:** `harness-sentrux-setup` documents bootstrap vs steward vs sync vs observation.
+9. **Intent evolution:** `harness/sentrux-steward` proposes JSON Merge Patches via `submit_sentrux_manifest_proposal` → `artifacts/sentrux-manifest-proposal.yaml`, with graphify-first evidence (`graphify-out/GRAPH_REPORT.md`, `graphify query` / `path` / `explain`). Chair applies manifest edits; never silent auto-merge.
+10. **Material changes:** `add_layer`, `add_boundary`, `split_layer` require `adr_required` + `ask_user` when `human_required`. `tune_constraint` may proceed with sentrux/graphify evidence only when chair agrees.
+11. **Observation vs intent:** `/harness-run` + `/harness-review` run CLI fitness functions; observation failures → replan/fix. Manifest changes → steward + ADR, not directory-tree guessing.
 ## Consequences
@@ -36,6 +39,8 @@ Sentrux enforces architecture via [`.sentrux/rules.toml`](https://sentrux.dev/do
 ## References
 - ADR 0006 (Sentrux dual layer)
+- `.pi/agents/harness/sentrux-steward.md`, `.pi/prompts/harness-sentrux-steward.md`
+- `.pi/harness/specs/sentrux-manifest-proposal.schema.json`, `sentrux-signal.schema.json`
 - `.pi/scripts/harness-sentrux-bootstrap.mjs`
 - `.pi/scripts/sentrux-rules-sync.mjs`
 - `.agents/skills/harness-sentrux-setup/SKILL.md`

package/.pi/harness/docs/adrs/0031-harness-run-context.md CHANGED Viewed

@@ -17,7 +17,7 @@ Manual harness steps required copying `run_id` and `plan-packet.json` paths betw
 4. **Hook order:** `harness-run-context` `before_agent_start` allocates/reuses `run_id` before `trace-recorder` `agent_start`. Trace writes phase files `trace-<phase>.json` plus rollup `trace.json`.
 5. PostHog `harness_run_started` at most once per logical `run_id`.
 6. Short commands: `/harness-run`, `/harness-eval`, etc. without args; recovery via `/harness-run-status`, `/harness-use-run`.
-7. After execute, handoff recommends **`/harness-eval`** in the same session; review commands spawn isolated subagents (see ADR 0032). `active-run.json` still supports cross-session recovery when Pi was closed mid-run.
+7. After execute, handoff recommends **`/harness-eval`** in the same session; review commands spawn isolated subagents (see ADR 0032). `active-run.json` still supports cross-session recovery when Pi was closed mid-run. On a **new Pi session**, if disk has a non-stale active run but this session has no `harness-run-context` entry yet, show a one-time resume message and live-widget hint pointing at **`/harness-use-run <run-id>`** (no silent auto-bind).
 8. `hasApprovedPlanSignal` uses user-visible prompt only; execute requires `plan_ready` from disk validation **and** recorded `ask_user` approval (or `harness-plan-approval` entry).
 9. **Plan-phase writes:** policy-gate allows `write`/`edit` only on canonical `.pi/harness/runs/<run_id>/plan-packet.json` after approval; all other paths stay blocked until execute phase.
 10. **Approval-before-persist:** agents present the full plan, call `ask_user` (Approve / Request changes / Cancel), then write the packet. `--quick` narrows planning only — it does not skip approval.

package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md CHANGED Viewed

@@ -28,9 +28,16 @@ Harness slash prompts duplicated logic already defined in `harness/*` agents. Th
 - Orchestrator must parse subagent JSON reliably and pass complete spawn context.
 - Scope enforcement remains prompt-driven for executor until optional path allowlist.
+## Amendment (2026-05-23)
+- **`/harness-review`** is the master **post-run** orchestrator (benchmark + verdict + adversary). See ADR 0039.
+- **`/harness-eval`** and **`/harness-critic`** are thin deprecated aliases; do not implement separate pipelines.
+- Post-run artifacts use **`submit_*`** + **`harness_artifact_ready`** per ADR 0037; parent does not parse subprocess JSON into `artifacts/eval-verdict.yaml`.
 ## References
 - `.pi/prompts/harness-*.md`
+- ADR 0039 — post-run review gate
 - `.pi/agents/harness/*.md`
 - `vendor/pi-subagents/src/subagents.ts`, `.pi/extensions/lib/harness-subagents-bridge.ts`
 - `.pi/extensions/lib/harness-subagent-policy.ts`

package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md CHANGED Viewed

@@ -9,13 +9,13 @@
 ## Decision
-1. **Always-on research chain** after parallel scouts:
+1. **Always-on research chain** after planning context (ADR 0041; **sequential** — WBS before approach):
    - `harness/planning/decompose` — DeepMind-style problem decomposition (`PlanDecompositionBrief`)
-   - `harness/planning/hypothesis` — DARWIN hypothesis generation (`PlanHypothesisBrief`)
+   - `harness/planning/hypothesis` — DARWIN hypothesis generation (`PlanHypothesisBrief`); spawned only after `artifacts/decomposition.yaml` exists
 2. **Parent maps hypothesis → PlanPacket** — `plan-packet.schema.json` unchanged; execution gating stable.
 3. **Review Gate (ADR 0035):** outcome-based debate with `hypothesis-validator` on R1 (blind — task + hypothesis only). Retired `hypothesis-eval` as a separate pre-approval agent.
 4. **`approve_plan` optional `research_brief`** — rendered in `plan-review.md`; not written to `plan-packet.json`.
-5. **`--quick`** still skips semantic scout only; never skips decompose/hypothesis.
+5. **`--quick`** still skips semantic coverage in planning context only; never skips decompose/hypothesis.
 ## Consequences

package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md CHANGED Viewed

@@ -13,14 +13,14 @@ ADR 0034–0035 established Darwin research and outcome-based Review Gate debate
 ## Decision
-1. **Phase 3.5** — After decompose/hypothesis, parent spawns in parallel:
-   - `harness/planning/implementation-researcher` → `PlanImplementationResearchBrief` → `artifacts/implementation-research.yaml`
-   - `harness/planning/stack-researcher` → `PlanStackBrief` → `artifacts/stack.yaml`
+1. **Phase 3.5** — After decompose/hypothesis, parent produces (subprocess optional):
+   - `artifacts/implementation-research.yaml` (`PlanImplementationResearchBrief`) — inline and/or `implementation-researcher`
+   - `artifacts/stack.yaml` (`PlanStackBrief`) — inline and/or `stack-researcher`
 2. Research stays **outside** debate; debate agents cite artifacts, no web tools.
-3. **Phase 4d** — `harness_plan_debate_eligibility` (pre-debate only) selects `full | standard | light` and `required_focuses`; persisted on messenger + bus at `harness_debate_open`.
+3. **Phase 4d** — `harness_plan_debate_eligibility` (pre-debate only) selects `full | standard | light | fast` and `required_focuses`; persisted on messenger + bus at `harness_debate_open`.
 4. **Light profile** — `spec` + `quality` only, `min_focus_rounds=2`, reduced global cap; gate uses stored `required_focuses` (not hardcoded four).
 5. **Sprint auditor** — shared `lanesForRound(roundIndex, focus)` spawns sprint lane when `focus === quality` OR `roundIndex >= 4`.
-6. **`--quick`** still skips semantic scout only; never skips Phase 3.5 or debate.
+6. **`--quick`** still skips semantic coverage in planning context only; never skips Phase 3.5 artifacts (med/high risk) or debate.
 ## Profiles
@@ -29,6 +29,9 @@ ADR 0034–0035 established Darwin research and outcome-based Review Gate debate
 | full | high risk, material fork, open implementation questions, DAG manual patch, many tensions | all four | 4 |
 | standard | default (ambiguous → standard) | all four | 4 |
 | light | low risk, no fork, high-confidence implementation + clear stack primary | spec, quality | 2 |
+| fast | med/low, clear stack, no open questions | spec, quality | 1 (consolidated `review_gate_mode`) |
+See [practice-map.md](../practice-map.md) and [ADR 0040](0040-practice-grounded-orchestration.md).
 ## Consequences

package/.pi/harness/docs/adrs/0039-harness-post-run-review-gate.md ADDED Viewed

@@ -0,0 +1,47 @@
+# ADR 0039: Post-run review gate (`/harness-review`)
+- **Status:** Accepted
+- **Date:** 2026-05-23
+## Context
+Post-run flow split across `/harness-eval`, a thin `/harness-review` (verdict-only), and `/harness-critic`. Cross-session resume left `owner_pi_session_id` on the plan session, blocking parent orchestration. Status routing used session handoff strings instead of canonical `artifacts/eval-verdict.yaml`. Prompts still instructed parent JSON parsing and `write` to eval artifacts (ADR 0037 violation).
+## Decision
+1. **`/harness-review`** is the **master post-run orchestrator** (plan-grade): deterministic gates → benchmark evaluator → policy verdict → adversary (parallel with verdict when precheck allows) → optional tie-breaker → **`artifacts/review-outcome.yaml`**. Always complete review before replan; blocked execute routes here, not `/harness-plan`. `--quick` skips adversary and tie-breaker. Steer attempts 2+ may use **lite** review (benchmark + verdict; skip adversary unless prior `block_merge`).
+2. **`/harness-eval`** and **`/harness-critic`** are **deprecated aliases** that forward to `/harness-review` in the same turn.
+3. **Ownership:** `/harness-use-run --claim` and auto-claim on post-run commands (unless `--readonly`) set `owner_pi_session_id` and `pi_session_id` to the current Pi session.
+4. **Disk truth:** `resolveCompletionStatuses` reads `artifacts/eval-verdict.yaml` and `artifacts/adversary-report.yaml` for `nextStepAfterOutcome` and widget next steps. Persisted `next_recommended_command` on `run-context.yaml` wins when set.
+5. **Artifacts:** Evaluator uses `submit_eval_verdict`; adversary uses `submit_adversary_report`. Parent gates with `harness_artifact_ready` only. Parent may write `artifacts/benchmark-log.yaml` via `write_harness_yaml`; parent must not write eval/adversary verdict YAML.
+6. **Rollback:** `submit_executor_handoff` mirrors `rollback_refs` to `artifacts/executor-rollback.yaml` (no `artifacts/*.json`).
+## Phases (orchestrator)
+| Phase | Actor | Output |
+|-------|--------|--------|
+| 0 | Parent | Parse args; claim run; require execute complete |
+| 1 | Parent | `harness-verify.mjs`; optional `benchmark-log.yaml` |
+| 2 | `harness/evaluator` benchmark | `eval-verdict.yaml` |
+| 2b | Parent | Record benchmark fail in review-outcome; continue to verdict unless harness-verify hard-stops |
+| 3 | `harness/evaluator` verdict | `eval-verdict.yaml` (policy) |
+| 4 | `harness/adversary` | `adversary-report.yaml` |
+| 5 | `harness/tie-breaker` | conditional |
+## Consequences
+### Positive
+- One command after `/harness-run`; same-session and cross-session resume with `--claim`.
+- Widget and run context align with on-disk verdicts.
+### Negative
+- Full post-run pipeline latency is sequential in one command (acceptable vs broken multi-session flow).
+## References
+- ADR 0032 (amended), ADR 0037
+- `.pi/prompts/harness-review.md`
+- `.pi/lib/harness-run-context.ts` (`claimRunOwnership`, `resolveCompletionStatuses`)
+- `.agents/skills/harness-review/SKILL.md`

package/.pi/harness/docs/adrs/0040-practice-grounded-orchestration.md ADDED Viewed

@@ -0,0 +1,40 @@
+# ADR 0040: Practice-grounded orchestration and team topology
+- **Status:** Accepted
+- **Date:** 2026-05-23
+## Context
+Harness commands (`/harness-plan`, `/harness-run`, `/harness-review`) already followed structured planning, generator–evaluator separation, and outcome-based debate (ADRs 0032–0039). The graphify corpus (PMBOK process groups, Team Topologies, Code Complete inspection, harness engineering, Lean spikes) was not surfaced in prompts—orchestrators could spawn redundant parallel thinkers (e.g. decompose ∥ hypothesis) and debate lanes without clear RACI.
+## Decision
+1. **Practice map** — [practice-map.md](../practice-map.md) is the source of truth: phase → practice → agent/script → spawn topology, debate RACI, anti-patterns.
+2. **Planning sequence** — After planning context (ADR 0041), **decompose then hypothesis** (sequential invariant). Hypothesis requires `artifacts/decomposition.yaml` (amends ADR 0034). For `low`/`med` risk, a single **plan-synthesizer** spawn may produce decomposition, hypothesis, and `execution_plan` in one pass, but those artifacts must still land on disk before blind validation (ADR 0042)—sequential **invariant**, not necessarily three parent spawn batches.
+3. **Reconnaissance dedup** — `decompose` must not run `graphify query` when `artifacts/planning-context.yaml` has `coverage.architecture.status: ok` (legacy: `scout-graphify.yaml` with `status: ok`).
+4. **Team topology rules** — Documented in practice-map and orchestration skills:
+   - Parallel only for independent merges (implementation ∥ stack; optional legacy scouts ≤3).
+   - Max 2 research lanes, 1 optional `planning-context` subagent, 1 executor, 1 debate agent per `subagent` batch.
+   - Debate: parent is chair; one agent per batch; Fagan-style roles (inspector, red team, DoD auditor, blind verifier, recorder).
+5. **Command prompts** — Name the proven practice per phase; link practice-map.
+6. **Profiles** — `fast` consolidated Review Gate documented alongside `light` threaded gate (ADR 0036 amended).
+## Consequences
+### Positive
+- Every harness phase traceable to corpus-backed practice.
+- Fewer detached hypotheses and duplicate graphify work (strengthened by ADR 0041 planning-context artifact).
+- Clearer debate roster; smaller teams on low-risk plans via `fast`/`light`.
+### Negative
+- Slightly longer plan phase wall-clock (sequential decompose → hypothesis).
+- More documentation for agents to reference.
+## References
+- [practice-map.md](../practice-map.md)
+- ADR 0034, ADR 0036, ADR 0039
+- `.pi/prompts/harness-plan.md`, `.pi/prompts/harness-run.md`, `.pi/prompts/harness-review.md`
+- `graphify-out/GRAPH_REPORT.md` — Planning / Executing / Monitoring communities, Team Topologies, Harness Engineering

package/.pi/harness/docs/adrs/0041-intelligent-planning-reconnaissance.md ADDED Viewed

@@ -0,0 +1,39 @@
+# ADR 0041: Intelligent planning reconnaissance (tools over tool-scouts)
+- **Status:** Accepted
+- **Date:** 2026-05-23
+## Context
+ADR 0033 and 0040 mandated three parallel planning scouts (`scout-graphify`, `scout-structure`, `scout-semantic`), each bound to one tool family. That enforced coverage but constrained orchestrator intelligence: the parent always paid for three subprocesses even when one tool pass or a short graphify query sufficed.
+The graphify corpus (Superpowers: *Rigid Where It Matters, Flexible Where It Doesn't*; context engineering: *Context > Model Intelligence*) supports hard gates on **artifacts and phase order**, not on **how many subprocesses** gather context.
+## Decision
+1. **Phase 1 default** — Parent compiles `artifacts/planning-context.yaml` using repo tools (`graphify`, `sg`, `ccc`, reads) per task need. No mandatory scout subprocess batch.
+2. **Artifact contract** — `plan-planning-context.schema.json` requires `coverage.architecture` and `coverage.structure` at `ok` or `partial`; `coverage.semantic` may be `skipped` when `--quick`.
+3. **Optional subprocess** — At most one `harness/planning/planning-context` subagent when isolation warrants; `submit_planning_context` writes the canonical artifact.
+4. **Legacy compat (one release)** — `scout-*.yaml` trio still satisfies approval readiness with deprecation warning; `decompose` dedup reads `planning-context` first.
+5. **Phase 3.5** — Requires `implementation-research.yaml` and `stack.yaml` for med/high risk; subprocess researchers optional (parent may spike inline).
+6. **Spawn topology** — Remove default parallel scout batch rules; keep decompose∥hypothesis and debate sequential laws.
+## Consequences
+### Positive
+- Orchestrator chooses tools and depth by task; fewer ceremonial subprocesses.
+- Single shared artifact reduces merge friction and redundant graphify in decompose.
+- Hard gates (DAG, debate, approval) unchanged.
+### Negative
+- Parent context window bears more reconnaissance load unless `planning-context` subagent is used.
+- Legacy scout agents remain on disk until removal after deprecation window.
+## References
+- [practice-map.md](../practice-map.md)
+- ADR 0033, ADR 0040
+- `.pi/prompts/harness-plan.md`
+- `plan-planning-context.schema.json`

package/.pi/harness/docs/adrs/0042-agent-native-orchestration.md ADDED Viewed

@@ -0,0 +1,35 @@
+# ADR 0042: Agent-native orchestration
+- **Status:** Accepted
+- **Date:** 2026-05-23
+## Context
+Harness commands inherited human PM rituals: serial debate “meetings,” ticket-granularity WBS, and tool calls that re-embed full plan packets in model context. Agents optimize for context window, spawn cost, and verifiable artifacts—not calendar boundaries or social coordination (see [practice-map.md](../practice-map.md)).
+## Decision
+1. **Agent translation column** — practice-map documents human practice → agent equivalent (scheduler + gates, lake-first plans, path-first tools, steer loop).
+2. **Boiling lakes** — Fewer `work_items` with richer specs and `context_bundle_path`; `executor_strategy` on PlanPacket (`single_pass` | `per_lake` | `per_work_item`).
+3. **Plan-verify probes** — For `fast`/`standard` profiles, parallel inspector + adversary probes replace serial “one role per batch” debate where gate supports `parallel_probes` (ADR 0036 extended).
+4. **Plan synthesizer** — For `low`/`med` risk, one `harness/planning/plan-synthesizer` pass may replace separate author spawn; **decomposition + hypothesis artifacts still required** on disk for blind validation (ADR 0040 invariant).
+5. **Path-first tools** — See ADR 0043; disk is source of truth for approval and submit pipelines.
+6. **Steer loop** — See ADR 0044; always complete post-run review; repair vs plan revise routing.
+## Consequences
+### Positive
+- Lower plan/review wall-clock and token use.
+- Plans sized for agent throughput, not sprint ticket count.
+### Negative
+- More ADRs and schema fields for agents to learn.
+- Migration period: optional fat tool args remain one release.
+## References
+- [practice-map.md](../practice-map.md)
+- ADR 0040, 0041, 0043, 0044
+- `.cursor/plans/agent-native_harness_workflows_1d353489.plan.md` (design source)