ultimate-pi 0.17.0 → 0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/.agents/skills/harness-context/SKILL.md +13 -6
  2. package/.agents/skills/harness-debate-plan/SKILL.md +37 -20
  3. package/.agents/skills/harness-decisions/SKILL.md +1 -1
  4. package/.agents/skills/harness-eval/SKILL.md +6 -21
  5. package/.agents/skills/harness-governor/SKILL.md +4 -3
  6. package/.agents/skills/harness-orchestration/SKILL.md +41 -53
  7. package/.agents/skills/harness-plan/SKILL.md +23 -12
  8. package/.agents/skills/harness-review/SKILL.md +52 -0
  9. package/.agents/skills/harness-sentrux-setup/SKILL.md +16 -3
  10. package/.agents/skills/harness-steer/SKILL.md +14 -0
  11. package/.agents/skills/sentrux/SKILL.md +9 -9
  12. package/.pi/agents/harness/planning/decompose.md +7 -4
  13. package/.pi/agents/harness/planning/hypothesis-validator.md +2 -0
  14. package/.pi/agents/harness/planning/hypothesis.md +3 -1
  15. package/.pi/agents/harness/planning/plan-adversary.md +2 -0
  16. package/.pi/agents/harness/planning/plan-evaluator.md +2 -0
  17. package/.pi/agents/harness/planning/plan-synthesizer.md +25 -0
  18. package/.pi/agents/harness/planning/planning-context.md +48 -0
  19. package/.pi/agents/harness/planning/review-integrator.md +2 -0
  20. package/.pi/agents/harness/planning/sprint-contract-auditor.md +2 -0
  21. package/.pi/agents/harness/{adversary.md → reviewing/adversary.md} +3 -10
  22. package/.pi/agents/harness/{evaluator.md → reviewing/evaluator.md} +3 -12
  23. package/.pi/agents/harness/running/executor.md +45 -0
  24. package/.pi/agents/harness/sentrux-steward.md +51 -0
  25. package/.pi/extensions/00-harness-project-control.ts +133 -0
  26. package/.pi/extensions/00-posthog-network-bootstrap.ts +11 -0
  27. package/.pi/extensions/budget-guard.ts +2 -0
  28. package/.pi/extensions/debate-orchestrator.ts +2 -0
  29. package/.pi/extensions/harness-ask-user.ts +2 -2
  30. package/.pi/extensions/harness-debate-tools.ts +2 -2
  31. package/.pi/extensions/harness-live-widget.ts +60 -3
  32. package/.pi/extensions/harness-plan-approval.ts +64 -58
  33. package/.pi/extensions/harness-run-context.ts +715 -90
  34. package/.pi/extensions/harness-subagent-submit.ts +46 -12
  35. package/.pi/extensions/harness-subagents.ts +2 -2
  36. package/.pi/extensions/harness-telemetry.ts +2 -0
  37. package/.pi/extensions/harness-web-tools.ts +2 -2
  38. package/.pi/extensions/lib/extension-load-guard.ts +10 -0
  39. package/.pi/extensions/lib/harness-artifact-gate.ts +172 -0
  40. package/.pi/extensions/lib/harness-posthog.ts +9 -5
  41. package/.pi/extensions/lib/harness-spawn-topology.ts +165 -0
  42. package/.pi/extensions/lib/harness-subagent-auth.ts +1 -2
  43. package/.pi/extensions/lib/harness-subagent-policy.ts +28 -24
  44. package/.pi/extensions/lib/harness-subagent-precheck.ts +36 -10
  45. package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +66 -2
  46. package/.pi/extensions/lib/harness-subagent-submit-registry.ts +22 -22
  47. package/.pi/extensions/lib/harness-subagents-bridge.ts +7 -29
  48. package/.pi/extensions/lib/harness-subprocess-bootstrap.ts +73 -0
  49. package/.pi/extensions/lib/plan-approval/create-plan.ts +2 -3
  50. package/.pi/extensions/lib/plan-approval/resolve-disk.ts +102 -0
  51. package/.pi/extensions/lib/plan-approval/schema.ts +22 -8
  52. package/.pi/extensions/lib/plan-approval/types.ts +1 -1
  53. package/.pi/extensions/lib/plan-approval/validate.ts +2 -2
  54. package/.pi/extensions/lib/plan-approval-readiness.ts +192 -0
  55. package/.pi/extensions/lib/plan-debate-eligibility.ts +12 -5
  56. package/.pi/extensions/lib/plan-debate-gate.ts +22 -1
  57. package/.pi/extensions/lib/plan-debate-lanes.ts +32 -2
  58. package/.pi/extensions/lib/plan-review-gate.ts +8 -0
  59. package/.pi/extensions/lib/posthog-client.ts +76 -0
  60. package/.pi/extensions/lib/spawn-policy.ts +3 -3
  61. package/.pi/extensions/observation-bus.ts +2 -0
  62. package/.pi/extensions/policy-gate.ts +26 -19
  63. package/.pi/extensions/review-integrity.ts +91 -10
  64. package/.pi/extensions/sentrux-rules-sync.ts +2 -0
  65. package/.pi/extensions/test-diff-integrity.ts +1 -0
  66. package/.pi/extensions/trace-recorder.ts +2 -0
  67. package/.pi/harness/agents.manifest.json +37 -37
  68. package/.pi/harness/corpus/cron.example +8 -0
  69. package/.pi/harness/corpus/graphify-kb-updater.config.json +214 -0
  70. package/.pi/harness/corpus/systemd/graphify-kb-updater.env.template +4 -0
  71. package/.pi/harness/corpus/systemd/graphify-kb-updater.service +17 -0
  72. package/.pi/harness/corpus/systemd/graphify-kb-updater.timer +11 -0
  73. package/.pi/harness/docs/adrs/0001-harness-constitution.md +2 -1
  74. package/.pi/harness/docs/adrs/0006-sentrux-dual-layer.md +8 -6
  75. package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +6 -1
  76. package/.pi/harness/docs/adrs/0031-harness-run-context.md +1 -1
  77. package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +7 -0
  78. package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +3 -3
  79. package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +8 -5
  80. package/.pi/harness/docs/adrs/0039-harness-post-run-review-gate.md +47 -0
  81. package/.pi/harness/docs/adrs/0040-practice-grounded-orchestration.md +40 -0
  82. package/.pi/harness/docs/adrs/0041-intelligent-planning-reconnaissance.md +39 -0
  83. package/.pi/harness/docs/adrs/0042-agent-native-orchestration.md +35 -0
  84. package/.pi/harness/docs/adrs/0043-path-first-harness-tools.md +38 -0
  85. package/.pi/harness/docs/adrs/0044-harness-steer-loop.md +37 -0
  86. package/.pi/harness/docs/adrs/0045-phase-scoped-agent-directories.md +33 -0
  87. package/.pi/harness/docs/adrs/README.md +11 -0
  88. package/.pi/harness/docs/graphify-kb-updater-runbook.md +163 -0
  89. package/.pi/harness/docs/practice-map.md +110 -0
  90. package/.pi/harness/env.harness.template +5 -3
  91. package/.pi/harness/evals/smoke/sentrux-stub.json +1 -1
  92. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +5 -2
  93. package/.pi/harness/specs/README.md +1 -1
  94. package/.pi/harness/specs/harness-run-context.schema.json +11 -0
  95. package/.pi/harness/specs/harness-spawn-context.schema.json +15 -1
  96. package/.pi/harness/specs/plan-execution-plan.schema.json +39 -1
  97. package/.pi/harness/specs/plan-packet.schema.json +4 -0
  98. package/.pi/harness/specs/plan-phase-status.schema.json +17 -0
  99. package/.pi/harness/specs/plan-phase-waiver.schema.json +25 -0
  100. package/.pi/harness/specs/plan-planning-context.schema.json +50 -0
  101. package/.pi/harness/specs/repair-brief.schema.json +45 -0
  102. package/.pi/harness/specs/review-outcome.schema.json +46 -0
  103. package/.pi/harness/specs/sentrux-manifest-proposal.schema.json +80 -0
  104. package/.pi/harness/specs/sentrux-signal.schema.json +43 -0
  105. package/.pi/harness/specs/steer-state.schema.json +20 -0
  106. package/.pi/lib/harness-context-mode-policy.ts +256 -0
  107. package/.pi/lib/harness-project-config.ts +91 -0
  108. package/.pi/lib/harness-repair-brief.ts +145 -0
  109. package/.pi/lib/harness-run-context.ts +591 -32
  110. package/.pi/lib/harness-ui-state.ts +114 -21
  111. package/.pi/prompts/harness-auto.md +10 -10
  112. package/.pi/prompts/harness-critic.md +3 -30
  113. package/.pi/prompts/harness-eval.md +4 -37
  114. package/.pi/prompts/harness-plan.md +116 -54
  115. package/.pi/prompts/harness-review.md +150 -15
  116. package/.pi/prompts/harness-run.md +62 -10
  117. package/.pi/prompts/harness-sentrux-steward.md +55 -0
  118. package/.pi/prompts/harness-setup.md +5 -4
  119. package/.pi/prompts/harness-steer.md +30 -0
  120. package/.pi/scripts/README.md +1 -0
  121. package/.pi/scripts/graphify-kb-updater.mjs +398 -0
  122. package/.pi/scripts/harness-agents-manifest.mjs +1 -1
  123. package/.pi/scripts/harness-project-toggle.mjs +129 -0
  124. package/.pi/scripts/harness-sentrux-cli.mjs +142 -0
  125. package/.pi/scripts/harness-verify.mjs +22 -6
  126. package/.pi/scripts/harness-web-policy-guard.mjs +68 -0
  127. package/.pi/scripts/validate-plan-dag.mjs +3 -3
  128. package/AGENTS.md +1 -0
  129. package/CHANGELOG.md +23 -0
  130. package/README.md +94 -58
  131. package/package.json +5 -4
  132. package/.pi/agents/harness/executor.md +0 -47
  133. package/.pi/agents/harness/planning/scout-graphify.md +0 -37
  134. package/.pi/agents/harness/planning/scout-semantic.md +0 -39
  135. package/.pi/agents/harness/planning/scout-structure.md +0 -35
  136. package/.pi/prompts/git-sync.md +0 -124
  137. /package/.pi/agents/harness/{tie-breaker.md → reviewing/tie-breaker.md} +0 -0
@@ -17,7 +17,7 @@ Manual harness steps required copying `run_id` and `plan-packet.json` paths betw
17
17
  4. **Hook order:** `harness-run-context` `before_agent_start` allocates/reuses `run_id` before `trace-recorder` `agent_start`. Trace writes phase files `trace-<phase>.json` plus rollup `trace.json`.
18
18
  5. PostHog `harness_run_started` at most once per logical `run_id`.
19
19
  6. Short commands: `/harness-run`, `/harness-eval`, etc. without args; recovery via `/harness-run-status`, `/harness-use-run`.
20
- 7. After execute, handoff recommends **`/harness-eval`** in the same session; review commands spawn isolated subagents (see ADR 0032). `active-run.json` still supports cross-session recovery when Pi was closed mid-run.
20
+ 7. After execute, handoff recommends **`/harness-eval`** in the same session; review commands spawn isolated subagents (see ADR 0032). `active-run.json` still supports cross-session recovery when Pi was closed mid-run. On a **new Pi session**, if disk has a non-stale active run but this session has no `harness-run-context` entry yet, show a one-time resume message and live-widget hint pointing at **`/harness-use-run <run-id>`** (no silent auto-bind).
21
21
  8. `hasApprovedPlanSignal` uses user-visible prompt only; execute requires `plan_ready` from disk validation **and** recorded `ask_user` approval (or `harness-plan-approval` entry).
22
22
  9. **Plan-phase writes:** policy-gate allows `write`/`edit` only on canonical `.pi/harness/runs/<run_id>/plan-packet.json` after approval; all other paths stay blocked until execute phase.
23
23
  10. **Approval-before-persist:** agents present the full plan, call `ask_user` (Approve / Request changes / Cancel), then write the packet. `--quick` narrows planning only — it does not skip approval.
@@ -28,9 +28,16 @@ Harness slash prompts duplicated logic already defined in `harness/*` agents. Th
28
28
  - Orchestrator must parse subagent JSON reliably and pass complete spawn context.
29
29
  - Scope enforcement remains prompt-driven for executor until optional path allowlist.
30
30
 
31
+ ## Amendment (2026-05-23)
32
+
33
+ - **`/harness-review`** is the master **post-run** orchestrator (benchmark + verdict + adversary). See ADR 0039.
34
+ - **`/harness-eval`** and **`/harness-critic`** are thin deprecated aliases; do not implement separate pipelines.
35
+ - Post-run artifacts use **`submit_*`** + **`harness_artifact_ready`** per ADR 0037; parent does not parse subprocess JSON into `artifacts/eval-verdict.yaml`.
36
+
31
37
  ## References
32
38
 
33
39
  - `.pi/prompts/harness-*.md`
40
+ - ADR 0039 — post-run review gate
34
41
  - `.pi/agents/harness/*.md`
35
42
  - `vendor/pi-subagents/src/subagents.ts`, `.pi/extensions/lib/harness-subagents-bridge.ts`
36
43
  - `.pi/extensions/lib/harness-subagent-policy.ts`
@@ -9,13 +9,13 @@
9
9
 
10
10
  ## Decision
11
11
 
12
- 1. **Always-on research chain** after parallel scouts:
12
+ 1. **Always-on research chain** after planning context (ADR 0041; **sequential** — WBS before approach):
13
13
  - `harness/planning/decompose` — DeepMind-style problem decomposition (`PlanDecompositionBrief`)
14
- - `harness/planning/hypothesis` — DARWIN hypothesis generation (`PlanHypothesisBrief`)
14
+ - `harness/planning/hypothesis` — DARWIN hypothesis generation (`PlanHypothesisBrief`); spawned only after `artifacts/decomposition.yaml` exists
15
15
  2. **Parent maps hypothesis → PlanPacket** — `plan-packet.schema.json` unchanged; execution gating stable.
16
16
  3. **Review Gate (ADR 0035):** outcome-based debate with `hypothesis-validator` on R1 (blind — task + hypothesis only). Retired `hypothesis-eval` as a separate pre-approval agent.
17
17
  4. **`approve_plan` optional `research_brief`** — rendered in `plan-review.md`; not written to `plan-packet.json`.
18
- 5. **`--quick`** still skips semantic scout only; never skips decompose/hypothesis.
18
+ 5. **`--quick`** still skips semantic coverage in planning context only; never skips decompose/hypothesis.
19
19
 
20
20
  ## Consequences
21
21
 
@@ -13,14 +13,14 @@ ADR 0034–0035 established Darwin research and outcome-based Review Gate debate
13
13
 
14
14
  ## Decision
15
15
 
16
- 1. **Phase 3.5** — After decompose/hypothesis, parent spawns in parallel:
17
- - `harness/planning/implementation-researcher` `PlanImplementationResearchBrief` `artifacts/implementation-research.yaml`
18
- - `harness/planning/stack-researcher` `PlanStackBrief` `artifacts/stack.yaml`
16
+ 1. **Phase 3.5** — After decompose/hypothesis, parent produces (subprocess optional):
17
+ - `artifacts/implementation-research.yaml` (`PlanImplementationResearchBrief`) inline and/or `implementation-researcher`
18
+ - `artifacts/stack.yaml` (`PlanStackBrief`) inline and/or `stack-researcher`
19
19
  2. Research stays **outside** debate; debate agents cite artifacts, no web tools.
20
- 3. **Phase 4d** — `harness_plan_debate_eligibility` (pre-debate only) selects `full | standard | light` and `required_focuses`; persisted on messenger + bus at `harness_debate_open`.
20
+ 3. **Phase 4d** — `harness_plan_debate_eligibility` (pre-debate only) selects `full | standard | light | fast` and `required_focuses`; persisted on messenger + bus at `harness_debate_open`.
21
21
  4. **Light profile** — `spec` + `quality` only, `min_focus_rounds=2`, reduced global cap; gate uses stored `required_focuses` (not hardcoded four).
22
22
  5. **Sprint auditor** — shared `lanesForRound(roundIndex, focus)` spawns sprint lane when `focus === quality` OR `roundIndex >= 4`.
23
- 6. **`--quick`** still skips semantic scout only; never skips Phase 3.5 or debate.
23
+ 6. **`--quick`** still skips semantic coverage in planning context only; never skips Phase 3.5 artifacts (med/high risk) or debate.
24
24
 
25
25
  ## Profiles
26
26
 
@@ -29,6 +29,9 @@ ADR 0034–0035 established Darwin research and outcome-based Review Gate debate
29
29
  | full | high risk, material fork, open implementation questions, DAG manual patch, many tensions | all four | 4 |
30
30
  | standard | default (ambiguous → standard) | all four | 4 |
31
31
  | light | low risk, no fork, high-confidence implementation + clear stack primary | spec, quality | 2 |
32
+ | fast | med/low, clear stack, no open questions | spec, quality | 1 (consolidated `review_gate_mode`) |
33
+
34
+ See [practice-map.md](../practice-map.md) and [ADR 0040](0040-practice-grounded-orchestration.md).
32
35
 
33
36
  ## Consequences
34
37
 
@@ -0,0 +1,47 @@
1
+ # ADR 0039: Post-run review gate (`/harness-review`)
2
+
3
+ - **Status:** Accepted
4
+ - **Date:** 2026-05-23
5
+
6
+ ## Context
7
+
8
+ Post-run flow split across `/harness-eval`, a thin `/harness-review` (verdict-only), and `/harness-critic`. Cross-session resume left `owner_pi_session_id` on the plan session, blocking parent orchestration. Status routing used session handoff strings instead of canonical `artifacts/eval-verdict.yaml`. Prompts still instructed parent JSON parsing and `write` to eval artifacts (ADR 0037 violation).
9
+
10
+ ## Decision
11
+
12
+ 1. **`/harness-review`** is the **master post-run orchestrator** (plan-grade): deterministic gates → benchmark evaluator → policy verdict → adversary (parallel with verdict when precheck allows) → optional tie-breaker → **`artifacts/review-outcome.yaml`**. Always complete review before replan; blocked execute routes here, not `/harness-plan`. `--quick` skips adversary and tie-breaker. Steer attempts 2+ may use **lite** review (benchmark + verdict; skip adversary unless prior `block_merge`).
13
+ 2. **`/harness-eval`** and **`/harness-critic`** are **deprecated aliases** that forward to `/harness-review` in the same turn.
14
+ 3. **Ownership:** `/harness-use-run --claim` and auto-claim on post-run commands (unless `--readonly`) set `owner_pi_session_id` and `pi_session_id` to the current Pi session.
15
+ 4. **Disk truth:** `resolveCompletionStatuses` reads `artifacts/eval-verdict.yaml` and `artifacts/adversary-report.yaml` for `nextStepAfterOutcome` and widget next steps. Persisted `next_recommended_command` on `run-context.yaml` wins when set.
16
+ 5. **Artifacts:** Evaluator uses `submit_eval_verdict`; adversary uses `submit_adversary_report`. Parent gates with `harness_artifact_ready` only. Parent may write `artifacts/benchmark-log.yaml` via `write_harness_yaml`; parent must not write eval/adversary verdict YAML.
17
+ 6. **Rollback:** `submit_executor_handoff` mirrors `rollback_refs` to `artifacts/executor-rollback.yaml` (no `artifacts/*.json`).
18
+
19
+ ## Phases (orchestrator)
20
+
21
+ | Phase | Actor | Output |
22
+ |-------|--------|--------|
23
+ | 0 | Parent | Parse args; claim run; require execute complete |
24
+ | 1 | Parent | `harness-verify.mjs`; optional `benchmark-log.yaml` |
25
+ | 2 | `harness/evaluator` benchmark | `eval-verdict.yaml` |
26
+ | 2b | Parent | Record benchmark fail in review-outcome; continue to verdict unless harness-verify hard-stops |
27
+ | 3 | `harness/evaluator` verdict | `eval-verdict.yaml` (policy) |
28
+ | 4 | `harness/adversary` | `adversary-report.yaml` |
29
+ | 5 | `harness/tie-breaker` | conditional |
30
+
31
+ ## Consequences
32
+
33
+ ### Positive
34
+
35
+ - One command after `/harness-run`; same-session and cross-session resume with `--claim`.
36
+ - Widget and run context align with on-disk verdicts.
37
+
38
+ ### Negative
39
+
40
+ - Full post-run pipeline latency is sequential in one command (acceptable vs broken multi-session flow).
41
+
42
+ ## References
43
+
44
+ - ADR 0032 (amended), ADR 0037
45
+ - `.pi/prompts/harness-review.md`
46
+ - `.pi/lib/harness-run-context.ts` (`claimRunOwnership`, `resolveCompletionStatuses`)
47
+ - `.agents/skills/harness-review/SKILL.md`
@@ -0,0 +1,40 @@
1
+ # ADR 0040: Practice-grounded orchestration and team topology
2
+
3
+ - **Status:** Accepted
4
+ - **Date:** 2026-05-23
5
+
6
+ ## Context
7
+
8
+ Harness commands (`/harness-plan`, `/harness-run`, `/harness-review`) already followed structured planning, generator–evaluator separation, and outcome-based debate (ADRs 0032–0039). The graphify corpus (PMBOK process groups, Team Topologies, Code Complete inspection, harness engineering, Lean spikes) was not surfaced in prompts—orchestrators could spawn redundant parallel thinkers (e.g. decompose ∥ hypothesis) and debate lanes without clear RACI.
9
+
10
+ ## Decision
11
+
12
+ 1. **Practice map** — [practice-map.md](../practice-map.md) is the source of truth: phase → practice → agent/script → spawn topology, debate RACI, anti-patterns.
13
+ 2. **Planning sequence** — After planning context (ADR 0041), **decompose then hypothesis** (sequential invariant). Hypothesis requires `artifacts/decomposition.yaml` (amends ADR 0034). For `low`/`med` risk, a single **plan-synthesizer** spawn may produce decomposition, hypothesis, and `execution_plan` in one pass, but those artifacts must still land on disk before blind validation (ADR 0042)—sequential **invariant**, not necessarily three parent spawn batches.
14
+ 3. **Reconnaissance dedup** — `decompose` must not run `graphify query` when `artifacts/planning-context.yaml` has `coverage.architecture.status: ok` (legacy: `scout-graphify.yaml` with `status: ok`).
15
+ 4. **Team topology rules** — Documented in practice-map and orchestration skills:
16
+ - Parallel only for independent merges (implementation ∥ stack; optional legacy scouts ≤3).
17
+ - Max 2 research lanes, 1 optional `planning-context` subagent, 1 executor, 1 debate agent per `subagent` batch.
18
+ - Debate: parent is chair; one agent per batch; Fagan-style roles (inspector, red team, DoD auditor, blind verifier, recorder).
19
+ 5. **Command prompts** — Name the proven practice per phase; link practice-map.
20
+ 6. **Profiles** — `fast` consolidated Review Gate documented alongside `light` threaded gate (ADR 0036 amended).
21
+
22
+ ## Consequences
23
+
24
+ ### Positive
25
+
26
+ - Every harness phase traceable to corpus-backed practice.
27
+ - Fewer detached hypotheses and duplicate graphify work (strengthened by ADR 0041 planning-context artifact).
28
+ - Clearer debate roster; smaller teams on low-risk plans via `fast`/`light`.
29
+
30
+ ### Negative
31
+
32
+ - Slightly longer plan phase wall-clock (sequential decompose → hypothesis).
33
+ - More documentation for agents to reference.
34
+
35
+ ## References
36
+
37
+ - [practice-map.md](../practice-map.md)
38
+ - ADR 0034, ADR 0036, ADR 0039
39
+ - `.pi/prompts/harness-plan.md`, `.pi/prompts/harness-run.md`, `.pi/prompts/harness-review.md`
40
+ - `graphify-out/GRAPH_REPORT.md` — Planning / Executing / Monitoring communities, Team Topologies, Harness Engineering
@@ -0,0 +1,39 @@
1
+ # ADR 0041: Intelligent planning reconnaissance (tools over tool-scouts)
2
+
3
+ - **Status:** Accepted
4
+ - **Date:** 2026-05-23
5
+
6
+ ## Context
7
+
8
+ ADR 0033 and 0040 mandated three parallel planning scouts (`scout-graphify`, `scout-structure`, `scout-semantic`), each bound to one tool family. That enforced coverage but constrained orchestrator intelligence: the parent always paid for three subprocesses even when one tool pass or a short graphify query sufficed.
9
+
10
+ The graphify corpus (Superpowers: *Rigid Where It Matters, Flexible Where It Doesn't*; context engineering: *Context > Model Intelligence*) supports hard gates on **artifacts and phase order**, not on **how many subprocesses** gather context.
11
+
12
+ ## Decision
13
+
14
+ 1. **Phase 1 default** — Parent compiles `artifacts/planning-context.yaml` using repo tools (`graphify`, `sg`, `ccc`, reads) per task need. No mandatory scout subprocess batch.
15
+ 2. **Artifact contract** — `plan-planning-context.schema.json` requires `coverage.architecture` and `coverage.structure` at `ok` or `partial`; `coverage.semantic` may be `skipped` when `--quick`.
16
+ 3. **Optional subprocess** — At most one `harness/planning/planning-context` subagent when isolation warrants; `submit_planning_context` writes the canonical artifact.
17
+ 4. **Legacy compat (one release)** — `scout-*.yaml` trio still satisfies approval readiness with deprecation warning; `decompose` dedup reads `planning-context` first.
18
+ 5. **Phase 3.5** — Requires `implementation-research.yaml` and `stack.yaml` for med/high risk; subprocess researchers optional (parent may spike inline).
19
+ 6. **Spawn topology** — Remove default parallel scout batch rules; keep decompose∥hypothesis and debate sequential laws.
20
+
21
+ ## Consequences
22
+
23
+ ### Positive
24
+
25
+ - Orchestrator chooses tools and depth by task; fewer ceremonial subprocesses.
26
+ - Single shared artifact reduces merge friction and redundant graphify in decompose.
27
+ - Hard gates (DAG, debate, approval) unchanged.
28
+
29
+ ### Negative
30
+
31
+ - Parent context window bears more reconnaissance load unless `planning-context` subagent is used.
32
+ - Legacy scout agents remain on disk until removal after deprecation window.
33
+
34
+ ## References
35
+
36
+ - [practice-map.md](../practice-map.md)
37
+ - ADR 0033, ADR 0040
38
+ - `.pi/prompts/harness-plan.md`
39
+ - `plan-planning-context.schema.json`
@@ -0,0 +1,35 @@
1
+ # ADR 0042: Agent-native orchestration
2
+
3
+ - **Status:** Accepted
4
+ - **Date:** 2026-05-23
5
+
6
+ ## Context
7
+
8
+ Harness commands inherited human PM rituals: serial debate “meetings,” ticket-granularity WBS, and tool calls that re-embed full plan packets in model context. Agents optimize for context window, spawn cost, and verifiable artifacts—not calendar boundaries or social coordination (see [practice-map.md](../practice-map.md)).
9
+
10
+ ## Decision
11
+
12
+ 1. **Agent translation column** — practice-map documents human practice → agent equivalent (scheduler + gates, lake-first plans, path-first tools, steer loop).
13
+ 2. **Boiling lakes** — Fewer `work_items` with richer specs and `context_bundle_path`; `executor_strategy` on PlanPacket (`single_pass` | `per_lake` | `per_work_item`).
14
+ 3. **Plan-verify probes** — For `fast`/`standard` profiles, parallel inspector + adversary probes replace serial “one role per batch” debate where gate supports `parallel_probes` (ADR 0036 extended).
15
+ 4. **Plan synthesizer** — For `low`/`med` risk, one `harness/planning/plan-synthesizer` pass may replace separate author spawn; **decomposition + hypothesis artifacts still required** on disk for blind validation (ADR 0040 invariant).
16
+ 5. **Path-first tools** — See ADR 0043; disk is source of truth for approval and submit pipelines.
17
+ 6. **Steer loop** — See ADR 0044; always complete post-run review; repair vs plan revise routing.
18
+
19
+ ## Consequences
20
+
21
+ ### Positive
22
+
23
+ - Lower plan/review wall-clock and token use.
24
+ - Plans sized for agent throughput, not sprint ticket count.
25
+
26
+ ### Negative
27
+
28
+ - More ADRs and schema fields for agents to learn.
29
+ - Migration period: optional fat tool args remain one release.
30
+
31
+ ## References
32
+
33
+ - [practice-map.md](../practice-map.md)
34
+ - ADR 0040, 0041, 0043, 0044
35
+ - `.cursor/plans/agent-native_harness_workflows_1d353489.plan.md` (design source)
@@ -0,0 +1,38 @@
1
+ # ADR 0043: Path-first harness tool contracts
2
+
3
+ - **Status:** Accepted
4
+ - **Date:** 2026-05-23
5
+
6
+ ## Context
7
+
8
+ `approve_plan`, `create_plan`, and `submit_*` often pass full YAML/JSON documents in tool arguments when the same bytes already exist under `.pi/harness/runs/<run_id>/`. That duplicates tokens and trains models to carry large structs in chat history.
9
+
10
+ ## Decision
11
+
12
+ 1. **`approve_plan`** — Primary API: `approve_plan({ human_summary?: string })`. Extension loads `plan_packet` from `runCtx.plan_packet_path` and `research-brief.yaml` from the run dir. Optional `plan_packet` / `research_brief` deprecated for one release.
13
+ 2. **`create_plan`** — Primary API: `create_plan()` or `create_plan({ plan_packet_path?: string })`. Verifies approval marker and optional content hash from approve time.
14
+ 3. **`submit_*`** — Accept `source_path` under the active run; read, validate, promote to canonical path. `document` remains optional (deprecated).
15
+ 4. **`merge_harness_yaml`** — Parent merges patches from artifact paths without pasting bodies into tool args.
16
+ 5. **Tool results** — Return `{ path, sha256, status }` (and ids where relevant), not full documents.
17
+
18
+ ## Safety
19
+
20
+ - Draft/canonical packet must exist on disk before approve.
21
+ - Re-`approve_plan` required when `execution_plan` or `acceptance_checks` change after a `plan_gap` revise (hash gate).
22
+
23
+ ## Consequences
24
+
25
+ ### Positive
26
+
27
+ - Approval turns stay small in session history.
28
+ - Subagents write once to disk; submit is O(path) tokens.
29
+
30
+ ### Negative
31
+
32
+ - Agents must write drafts before approve/submit (explicit discipline).
33
+
34
+ ## References
35
+
36
+ - `.pi/extensions/harness-plan-approval.ts`
37
+ - `.pi/extensions/lib/harness-subagent-submit-pipeline.ts`
38
+ - ADR 0042, 0044
@@ -0,0 +1,37 @@
1
+ # ADR 0044: Harness steer loop (post-run repair)
2
+
3
+ - **Status:** Accepted
4
+ - **Date:** 2026-05-23
5
+
6
+ ## Context
7
+
8
+ After `/harness-run`, failed benchmarks or blocked execution previously routed users to `/harness-plan "<new task>"` even when the approved plan was still valid—high friction and duplicate planning context.
9
+
10
+ ## Decision
11
+
12
+ 1. **Always review** — `/harness-run` ends with `next_command: /harness-review` (including `blocked` / partial work). Remove benchmark fail-fast skip of verdict/adversary (ADR 0039 amended).
13
+ 2. **Review artifacts** — Parent writes `artifacts/review-outcome.yaml` and `artifacts/repair-brief.yaml` (path pointers, not pasted bodies).
14
+ 3. **Remediation routing** — `review-outcome.remediation_class`: `implementation_gap` → `/harness-steer`; `plan_gap` → `/harness-plan` revise with `repair_brief_path`; `pass` → policy status. **Review outcome wins** over executor `scope_drift` when they disagree; tie → `plan_gap`.
15
+ 4. **Plan-gap revise reset** — When review returns `plan_gap` and the next `/harness-plan` runs in revise mode, archive stale plan-phase debate state and generated planning artifacts under `artifacts/revisions/<timestamp>/` before the planner starts. Preserve review repair artifacts in place so the new planning round starts clean while retaining audit history.
16
+ 5. **`/harness-steer`** — Thin orchestrator: read briefs, set policy **phase `execute`**, spawn `harness/executor` with `mode: repair`, then `/harness-review` again.
17
+ 6. **Caps** — `HARNESS_STEER_MAX_ATTEMPTS` (default 3). **Tiered review:** full review on initial run + steer 1; steers 2+ use lite (benchmark + verdict) unless prior `block_merge` or user forces full.
18
+ 6. **Sentrux** — Refresh baseline or compare new violations only after steer mutations (avoid false degraded on every attempt).
19
+ 7. **Evaluate-phase writes** — Orchestrator may write review/steer YAML under run `artifacts/` in `evaluate`/`adversary` phase (allowlisted files).
20
+
21
+ ## Consequences
22
+
23
+ ### Positive
24
+
25
+ - One `approve_plan`; many repair cycles without re-typing tasks.
26
+ - `harness-auto` can loop until pass or cap.
27
+
28
+ ### Negative
29
+
30
+ - Higher review cost on failed runs (mitigated by tiered adversary).
31
+
32
+ ## References
33
+
34
+ - `.pi/prompts/harness-steer.md`
35
+ - `.pi/harness/specs/review-outcome.schema.json`, `repair-brief.schema.json`
36
+ - `nextStepAfterOutcome` in `.pi/lib/harness-run-context.ts`
37
+ - ADR 0039 (amended), 0043
@@ -0,0 +1,33 @@
1
+ # ADR 0045: Phase-scoped harness agent directories
2
+
3
+ Status: Accepted
4
+ Date: 2026-05-24
5
+
6
+ ## Context
7
+
8
+ Harness prompts had accumulated mixed agent ids such as `harness/executor`, `harness/evaluator`, and legacy planning `scout-*` agents. The current orchestration model is phase-scoped:
9
+
10
+ - planning context is parent-led or handled by `harness/planning/planning-context`
11
+ - execution is a single running agent
12
+ - post-run review is handled by reviewing agents
13
+
14
+ Flat run/review agent ids made prompt intent less obvious and left legacy planning scout agents discoverable even after ADR 0041 moved reconnaissance to parent tool use plus `planning-context.yaml`.
15
+
16
+ ## Decision
17
+
18
+ Use phase-scoped agent directories and ids for run/review orchestration:
19
+
20
+ - `.pi/agents/harness/running/executor.md` → `harness/running/executor`
21
+ - `.pi/agents/harness/reviewing/evaluator.md` → `harness/reviewing/evaluator`
22
+ - `.pi/agents/harness/reviewing/adversary.md` → `harness/reviewing/adversary`
23
+ - `.pi/agents/harness/reviewing/tie-breaker.md` → `harness/reviewing/tie-breaker`
24
+
25
+ Remove the legacy planning `scout-graphify`, `scout-structure`, and `scout-semantic` agents. Planning reconnaissance is represented by `artifacts/planning-context.yaml` only.
26
+
27
+ ## Consequences
28
+
29
+ - `/harness-run` must spawn only `harness/running/executor`.
30
+ - `/harness-review` must spawn only agents under `harness/reviewing/`.
31
+ - Submit-tool allowlists, precheck/topology policy, review-integrity policy, tests, and `agents.manifest.json` track the new ids.
32
+ - When post-run review records `next_recommended_command: "/harness-plan (mode: revise)"`, review-integrity treats `harness/planning/*` subagents as a phase handoff, not a review-isolation violation.
33
+ - Old scout YAML artifacts no longer satisfy plan approval readiness; `artifacts/planning-context.yaml` is required unless explicitly waived.
@@ -24,6 +24,17 @@ Team-shared ADRs for the ultimate-pi harness live under `.pi/harness/docs/adrs/`
24
24
  | [0036](0036-implementation-research-and-selective-debate.md) | Implementation research and selective debate | Accepted |
25
25
  | [0037](0037-subagent-submit-tools.md) | Subagent submit tools (subprocess extension) | Accepted |
26
26
  | [0038](0038-budget-telemetry-only.md) | Budget caps telemetry-only by default | Accepted |
27
+ | [0039](0039-harness-post-run-review-gate.md) | `/harness-review` master post-run gate | Accepted |
28
+ | [0040](0040-practice-grounded-orchestration.md) | Practice-grounded orchestration & team topology | Accepted |
29
+ | [0041](0041-intelligent-planning-reconnaissance.md) | Intelligent planning reconnaissance (tools over tool-scouts) | Accepted |
30
+ | [0042](0042-agent-native-orchestration.md) | Agent-native orchestration (lakes, plan-verify probes, synthesizer) | Accepted |
31
+ | [0043](0043-path-first-harness-tools.md) | Path-first harness tool contracts | Accepted |
32
+ | [0044](0044-harness-steer-loop.md) | Post-run steer loop (repair vs plan revise) | Accepted |
33
+ | [0045](0045-phase-scoped-agent-directories.md) | Phase-scoped harness agent directories | Accepted |
34
+
35
+ ## Practice map
36
+
37
+ Phase-to-practice mapping for slash commands: [practice-map.md](../practice-map.md).
27
38
 
28
39
  ## Template
29
40
 
@@ -0,0 +1,163 @@
1
+ # Graphify KB updater runbook
2
+
3
+ ## Purpose and scope
4
+
5
+ `graphify-kb-updater` keeps the local Graphify source corpus current for agentic engineering, context engineering, harness engineering, AI coding harnesses, research papers/feeds, articles/blogs, local books/transcripts, YouTube candidates, and competitor intelligence.
6
+
7
+ The approved operating model is **hybrid allowlist auto-promotion with conservative staging**:
8
+
9
+ - Daily local automation may auto-promote only explicitly approved allowlisted public sources (`article`, `repo`, or `release`) with complete provenance and rights/access metadata.
10
+ - Repository and release candidates are metadata-specific source classes; they do not inherit generic article behavior and must be authorized by `allowed_source_classes` on the allowlist entry.
11
+ - Books, transcripts, YouTube/video material, paid/copyrighted/mirrored material, unclear-license content, and unknown open-web sources remain staged until manually approved.
12
+ - Competitor monitoring is a curated taxonomy/watchlist/reporting signal, not an exhaustive crawler.
13
+ - Pi-agent-open integration is intentionally limited/deferred: opening Pi should do at most a low-latency, no-network stale check. It must not perform synchronous web discovery, promotion, or Graphify mutation.
14
+
15
+ ## Governance and approval boundaries
16
+
17
+ Required rights/access fields for every promotion:
18
+
19
+ - `license`
20
+ - `access`
21
+ - `approved_by`
22
+ - `approved_at`
23
+
24
+ Allowlist auto-promotion requires all of the following:
25
+
26
+ 1. `.pi/harness/corpus/graphify-kb-updater.config.json` has `auto_promote_allowlist: true`.
27
+ 2. The candidate domain is present in `allowlist` with `approved: true`.
28
+ 3. If the allowlist entry has `allowed_source_classes`, it includes the candidate `kind` (`article`, `repo`, or `release`).
29
+ 4. The candidate itself has `approved: true`.
30
+ 5. `provenance.origin` and `provenance.locator` are complete.
31
+ 6. `rights_access` is complete.
32
+ 7. The candidate is not a risky source class that requires manual review.
33
+
34
+ Risky source classes (`book`, `transcript`, `youtube`) always require explicit approval and complete rights/access metadata. Raw HTTP shell paths are forbidden; keep discovery/fetch through approved harness web/API abstractions and verify with `.pi/scripts/harness-web-policy-guard.mjs`.
35
+
36
+ ## Manual commands
37
+
38
+ Dry-run, no mutation of `raw/`, state, or `graphify-out/`:
39
+
40
+ ```bash
41
+ node .pi/scripts/graphify-kb-updater.mjs --dry-run --pilot-report
42
+ ```
43
+
44
+ Apply approved/promotable candidates and refresh Graphify only when promoted files changed:
45
+
46
+ ```bash
47
+ node .pi/scripts/graphify-kb-updater.mjs --apply --refresh-graph --pilot-report
48
+ ```
49
+
50
+ Apply without graph mutation:
51
+
52
+ ```bash
53
+ node .pi/scripts/graphify-kb-updater.mjs --apply --skip-graph --pilot-report
54
+ ```
55
+
56
+ Validate scheduler templates:
57
+
58
+ ```bash
59
+ node .pi/scripts/graphify-kb-updater.mjs --scheduler-smoke
60
+ ```
61
+
62
+ Run web-policy guard:
63
+
64
+ ```bash
65
+ node .pi/scripts/harness-web-policy-guard.mjs
66
+ ```
67
+
68
+ ## Approval workflow
69
+
70
+ 1. Review dry-run JSON: candidate count, source counts, competitor labels, duplicate/skipped/blocked counts, stale warnings, planned promotions, and graph action.
71
+ 2. For a candidate, add it to `.pi/harness/corpus/graphify-kb-updater.config.json` `review_queue` with:
72
+ - `kind` (`article`, `repo`, `release`, `paper`, `book`, `transcript`, or `youtube`)
73
+ - `title`
74
+ - `url` or `path`
75
+ - `approved: true`
76
+ - `rights_access` object with all required fields
77
+ - optional `competitor_labels` or provenance notes.
78
+ - for repo/release auto-promotion, an allowlist entry whose `allowed_source_classes` includes `repo` or `release`.
79
+ 3. For local files, you may place `<file>.rights.json` beside the source, but risky classes still require explicit approval before promotion.
80
+ 4. Run `--apply --refresh-graph`.
81
+ 5. Promoted sources land under `raw/graphify-kb-updates/<kind>/` with `.provenance.json` sidecars.
82
+
83
+ ## Daily scheduler setup
84
+
85
+ Systemd user timer is the primary path and runs daily at 08:30 with randomized delay:
86
+
87
+ ```bash
88
+ mkdir -p ~/.config/ultimate-pi ~/.config/systemd/user ~/.local/state/ultimate-pi
89
+ cp .pi/harness/corpus/systemd/graphify-kb-updater.env.template ~/.config/ultimate-pi/graphify-kb-updater.env
90
+ # edit UP_ROOT in the env file
91
+ cp .pi/harness/corpus/systemd/graphify-kb-updater.service ~/.config/systemd/user/
92
+ cp .pi/harness/corpus/systemd/graphify-kb-updater.timer ~/.config/systemd/user/
93
+ systemctl --user daemon-reload
94
+ systemctl --user enable --now graphify-kb-updater.timer
95
+ systemctl --user list-timers graphify-kb-updater.timer
96
+ ```
97
+
98
+ The service uses `flock`, `timeout 45m`, explicit env, append-only logs, and a non-overlap lock.
99
+
100
+ Cron fallback is daily at 08:30; edit `UP_ROOT` and copy the line from `.pi/harness/corpus/cron.example` with `crontab -e`.
101
+
102
+ ## Reports, logs, and fields
103
+
104
+ Apply runs write:
105
+
106
+ - Registry: `.pi/harness/corpus/graphify-kb-updater-state/registry.json`
107
+ - Per-run logs: `.pi/harness/corpus/graphify-kb-updater-state/logs/`
108
+ - Scheduler logs: `~/.local/state/ultimate-pi/graphify-kb-updater.log` and `.err`
109
+
110
+ Each run reports:
111
+
112
+ - `last_run_at`
113
+ - `candidate_count`, `promoted_count`, `blocked_count`, `skipped_count`, `duplicate_skips`, `failure_count`
114
+ - `counts.by_kind`, `counts.by_source_type`, `counts.by_competitor_label`, `counts.allowlisted`
115
+ - `staged_count`, `review_queue_count`, and `review_queue` items with reason codes and next actions
116
+ - `stale_warnings`
117
+ - `changed_existing_count` for same URL/path content changes
118
+ - `graph.action`, `graph.exit_status`, and Graphify report path when refreshed
119
+ - optional pilot metrics: `frontier_recall_proxy`, `promoted_precision_proxy`, `duplicate_noise_rate`, `graphify_success`
120
+
121
+ Review these fields before enabling unattended mode and after every config change.
122
+
123
+ ## Troubleshooting
124
+
125
+ - `missing_complete_provenance`: add `provenance.origin` and `provenance.locator`.
126
+ - `missing_rights_access_approval`: add complete rights/access metadata.
127
+ - `manual_approval_required`: set `approved: true` after source and rights review.
128
+ - `duplicate_unchanged`: candidate was already promoted and content hash is unchanged.
129
+ - `changed_existing_count > 0`: a stable URL/path changed content; review before relying on previous conclusions.
130
+ - Graphify skipped: no promoted changes, `--skip-graph`, or no `--refresh-graph`.
131
+ - Graphify failed: inspect `graph.stderr`, run `graphify update .` manually, and keep the scheduler disabled until fixed.
132
+ - Scheduler did not run: check `systemctl --user status graphify-kb-updater.timer`, the env file path, and scheduler logs.
133
+ - Overlap: lock path `%t/graphify-kb-updater.lock` or `/tmp/graphify-kb-updater.lock` prevents concurrent runs.
134
+
135
+ ## Disable
136
+
137
+ ```bash
138
+ systemctl --user disable --now graphify-kb-updater.timer
139
+ systemctl --user reset-failed graphify-kb-updater.service
140
+ ```
141
+
142
+ Remove any cron line copied from `.pi/harness/corpus/cron.example`.
143
+
144
+ ## Rollback
145
+
146
+ 1. Disable systemd timer and remove cron line.
147
+ 2. Use registry/log promoted paths to remove or quarantine promoted files under `raw/graphify-kb-updates/`.
148
+ 3. Restore `.pi/harness/corpus/graphify-kb-updater-state/registry.json` from backup, or mark candidates rejected/quarantined.
149
+ 4. Revert implementation files if needed:
150
+
151
+ ```bash
152
+ git checkout -- .pi/scripts/graphify-kb-updater.mjs .pi/harness/corpus/graphify-kb-updater.config.json .pi/harness/corpus/systemd/graphify-kb-updater.timer .pi/harness/corpus/cron.example test/graphify-kb-updater.test.mjs .pi/harness/docs/graphify-kb-updater-runbook.md
153
+ ```
154
+
155
+ 5. Regenerate Graphify from valid sources:
156
+
157
+ ```bash
158
+ graphify update .
159
+ ```
160
+
161
+ ## Pilot gate before unattended mode
162
+
163
+ Run at least one dry-run and one supervised apply. Record frontier recall proxy, promoted precision proxy, duplicate/noise rate, skipped reasons, stale warnings, and Graphify success from `--pilot-report`. Enable the timer only if promoted precision is acceptable and graph refresh succeeds.