ultimate-pi 0.16.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/.agents/skills/harness-context/SKILL.md +13 -6
  2. package/.agents/skills/harness-debate-plan/SKILL.md +37 -20
  3. package/.agents/skills/harness-eval/SKILL.md +6 -21
  4. package/.agents/skills/harness-governor/SKILL.md +4 -3
  5. package/.agents/skills/harness-orchestration/SKILL.md +39 -51
  6. package/.agents/skills/harness-plan/SKILL.md +23 -12
  7. package/.agents/skills/harness-review/SKILL.md +52 -0
  8. package/.agents/skills/harness-sentrux-setup/SKILL.md +13 -1
  9. package/.agents/skills/harness-steer/SKILL.md +14 -0
  10. package/.pi/agents/harness/adversary.md +3 -10
  11. package/.pi/agents/harness/evaluator.md +3 -12
  12. package/.pi/agents/harness/executor.md +12 -14
  13. package/.pi/agents/harness/planning/decompose.md +7 -4
  14. package/.pi/agents/harness/planning/hypothesis-validator.md +2 -0
  15. package/.pi/agents/harness/planning/hypothesis.md +4 -2
  16. package/.pi/agents/harness/planning/implementation-researcher.md +1 -1
  17. package/.pi/agents/harness/planning/plan-adversary.md +2 -0
  18. package/.pi/agents/harness/planning/plan-evaluator.md +2 -0
  19. package/.pi/agents/harness/planning/plan-synthesizer.md +25 -0
  20. package/.pi/agents/harness/planning/planning-context.md +48 -0
  21. package/.pi/agents/harness/planning/review-integrator.md +2 -0
  22. package/.pi/agents/harness/planning/scout-graphify.md +3 -1
  23. package/.pi/agents/harness/planning/scout-semantic.md +3 -1
  24. package/.pi/agents/harness/planning/scout-structure.md +3 -1
  25. package/.pi/agents/harness/planning/sprint-contract-auditor.md +2 -0
  26. package/.pi/agents/harness/sentrux-steward.md +51 -0
  27. package/.pi/extensions/00-posthog-network-bootstrap.ts +11 -0
  28. package/.pi/extensions/harness-debate-tools.ts +12 -3
  29. package/.pi/extensions/harness-live-widget.ts +27 -1
  30. package/.pi/extensions/harness-plan-approval.ts +62 -56
  31. package/.pi/extensions/harness-run-context.ts +553 -84
  32. package/.pi/extensions/harness-subagent-submit.ts +43 -33
  33. package/.pi/extensions/harness-telemetry.ts +29 -4
  34. package/.pi/extensions/lib/debate-bus-core.ts +15 -9
  35. package/.pi/extensions/lib/harness-artifact-gate.ts +182 -0
  36. package/.pi/extensions/lib/harness-posthog.ts +9 -5
  37. package/.pi/extensions/lib/harness-spawn-topology.ts +188 -0
  38. package/.pi/extensions/lib/harness-subagent-auth.ts +105 -19
  39. package/.pi/extensions/lib/harness-subagent-policy.ts +37 -19
  40. package/.pi/extensions/lib/harness-subagent-precheck.ts +35 -9
  41. package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +66 -2
  42. package/.pi/extensions/lib/harness-subagent-submit-registry.ts +21 -3
  43. package/.pi/extensions/lib/harness-subagents-bridge.ts +91 -28
  44. package/.pi/extensions/lib/harness-subprocess-bootstrap.ts +73 -0
  45. package/.pi/extensions/lib/plan-approval/create-plan.ts +2 -3
  46. package/.pi/extensions/lib/plan-approval/resolve-disk.ts +102 -0
  47. package/.pi/extensions/lib/plan-approval/schema.ts +22 -8
  48. package/.pi/extensions/lib/plan-approval/types.ts +1 -1
  49. package/.pi/extensions/lib/plan-approval/validate.ts +2 -2
  50. package/.pi/extensions/lib/plan-approval-readiness.ts +241 -0
  51. package/.pi/extensions/lib/plan-debate-eligibility.ts +67 -7
  52. package/.pi/extensions/lib/plan-debate-focus.ts +21 -9
  53. package/.pi/extensions/lib/plan-debate-gate.ts +101 -17
  54. package/.pi/extensions/lib/plan-debate-lanes.ts +57 -3
  55. package/.pi/extensions/lib/plan-debate-round-status.ts +18 -7
  56. package/.pi/extensions/lib/plan-messenger.ts +4 -0
  57. package/.pi/extensions/lib/plan-review-gate.ts +59 -0
  58. package/.pi/extensions/lib/posthog-client.ts +76 -0
  59. package/.pi/extensions/policy-gate.ts +24 -19
  60. package/.pi/extensions/trace-recorder.ts +1 -0
  61. package/.pi/harness/agents.manifest.json +24 -16
  62. package/.pi/harness/corpus/cron.example +8 -0
  63. package/.pi/harness/corpus/graphify-kb-updater.config.json +159 -0
  64. package/.pi/harness/corpus/systemd/graphify-kb-updater.env.template +4 -0
  65. package/.pi/harness/corpus/systemd/graphify-kb-updater.service +17 -0
  66. package/.pi/harness/corpus/systemd/graphify-kb-updater.timer +11 -0
  67. package/.pi/harness/docs/adrs/0001-harness-constitution.md +2 -1
  68. package/.pi/harness/docs/adrs/0006-sentrux-dual-layer.md +7 -6
  69. package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +6 -1
  70. package/.pi/harness/docs/adrs/0031-harness-run-context.md +1 -1
  71. package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +7 -0
  72. package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +3 -3
  73. package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +8 -5
  74. package/.pi/harness/docs/adrs/0039-harness-post-run-review-gate.md +47 -0
  75. package/.pi/harness/docs/adrs/0040-practice-grounded-orchestration.md +40 -0
  76. package/.pi/harness/docs/adrs/0041-intelligent-planning-reconnaissance.md +39 -0
  77. package/.pi/harness/docs/adrs/0042-agent-native-orchestration.md +35 -0
  78. package/.pi/harness/docs/adrs/0043-path-first-harness-tools.md +38 -0
  79. package/.pi/harness/docs/adrs/0044-harness-steer-loop.md +36 -0
  80. package/.pi/harness/docs/adrs/README.md +10 -0
  81. package/.pi/harness/docs/graphify-kb-updater-runbook.md +157 -0
  82. package/.pi/harness/docs/practice-map.md +110 -0
  83. package/.pi/harness/env.harness.template +5 -3
  84. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/implementation-research.yaml +28 -0
  85. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/review-round-consolidated.yaml +25 -0
  86. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-packet.yaml +196 -0
  87. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-review.md +14 -0
  88. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/research-brief.yaml +62 -0
  89. package/.pi/harness/evals/smoke/sentrux-stub.json +1 -1
  90. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +43 -17
  91. package/.pi/harness/specs/README.md +1 -1
  92. package/.pi/harness/specs/harness-run-context.schema.json +11 -0
  93. package/.pi/harness/specs/harness-spawn-context.schema.json +14 -0
  94. package/.pi/harness/specs/plan-execution-plan.schema.json +39 -1
  95. package/.pi/harness/specs/plan-packet.schema.json +4 -0
  96. package/.pi/harness/specs/plan-phase-status.schema.json +17 -0
  97. package/.pi/harness/specs/plan-phase-waiver.schema.json +25 -0
  98. package/.pi/harness/specs/plan-planning-context.schema.json +50 -0
  99. package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
  100. package/.pi/harness/specs/repair-brief.schema.json +45 -0
  101. package/.pi/harness/specs/review-outcome.schema.json +46 -0
  102. package/.pi/harness/specs/sentrux-manifest-proposal.schema.json +80 -0
  103. package/.pi/harness/specs/sentrux-signal.schema.json +43 -0
  104. package/.pi/harness/specs/steer-state.schema.json +20 -0
  105. package/.pi/lib/harness-context-mode-policy.ts +256 -0
  106. package/.pi/lib/harness-repair-brief.ts +145 -0
  107. package/.pi/lib/harness-run-context.ts +591 -32
  108. package/.pi/lib/harness-ui-state.ts +87 -9
  109. package/.pi/model-router.example.json +13 -4
  110. package/.pi/prompts/harness-auto.md +9 -9
  111. package/.pi/prompts/harness-critic.md +3 -30
  112. package/.pi/prompts/harness-eval.md +4 -37
  113. package/.pi/prompts/harness-plan.md +139 -57
  114. package/.pi/prompts/harness-review.md +150 -15
  115. package/.pi/prompts/harness-run.md +62 -10
  116. package/.pi/prompts/harness-sentrux-steward.md +55 -0
  117. package/.pi/prompts/harness-setup.md +4 -4
  118. package/.pi/prompts/harness-steer.md +30 -0
  119. package/.pi/scripts/graphify-kb-updater.mjs +358 -0
  120. package/.pi/scripts/harness-generate-model-router.mjs +118 -36
  121. package/.pi/scripts/harness-model-router-routing.test.mjs +97 -0
  122. package/.pi/scripts/harness-sync-model-router.mjs +15 -2
  123. package/.pi/scripts/harness-verify.mjs +51 -6
  124. package/.pi/scripts/harness-web-policy-guard.mjs +68 -0
  125. package/.pi/scripts/validate-plan-dag.mjs +3 -3
  126. package/AGENTS.md +1 -0
  127. package/CHANGELOG.md +22 -0
  128. package/package.json +5 -4
  129. package/vendor/pi-model-router/UPSTREAM_PIN.md +3 -1
  130. package/vendor/pi-model-router/extensions/commands.ts +4 -4
  131. package/vendor/pi-model-router/extensions/index.ts +21 -0
  132. package/vendor/pi-model-router/extensions/provider.ts +130 -79
  133. package/vendor/pi-model-router/extensions/routing.ts +148 -0
  134. package/vendor/pi-model-router/extensions/state.ts +3 -0
  135. package/vendor/pi-model-router/extensions/types.ts +9 -0
  136. package/vendor/pi-model-router/extensions/ui.ts +16 -2
  137. package/.pi/prompts/git-sync.md +0 -124
@@ -0,0 +1,38 @@
1
+ # ADR 0043: Path-first harness tool contracts
2
+
3
+ - **Status:** Accepted
4
+ - **Date:** 2026-05-23
5
+
6
+ ## Context
7
+
8
+ `approve_plan`, `create_plan`, and `submit_*` often pass full YAML/JSON documents in tool arguments when the same bytes already exist under `.pi/harness/runs/<run_id>/`. That duplicates tokens and trains models to carry large structs in chat history.
9
+
10
+ ## Decision
11
+
12
+ 1. **`approve_plan`** — Primary API: `approve_plan({ human_summary?: string })`. Extension loads `plan_packet` from `runCtx.plan_packet_path` and `research-brief.yaml` from the run dir. Optional `plan_packet` / `research_brief` deprecated for one release.
13
+ 2. **`create_plan`** — Primary API: `create_plan()` or `create_plan({ plan_packet_path?: string })`. Verifies approval marker and optional content hash from approve time.
14
+ 3. **`submit_*`** — Accept `source_path` under the active run; read, validate, promote to canonical path. `document` remains optional (deprecated).
15
+ 4. **`merge_harness_yaml`** — Parent merges patches from artifact paths without pasting bodies into tool args.
16
+ 5. **Tool results** — Return `{ path, sha256, status }` (and ids where relevant), not full documents.
17
+
18
+ ## Safety
19
+
20
+ - Draft/canonical packet must exist on disk before approve.
21
+ - Re-`approve_plan` required when `execution_plan` or `acceptance_checks` change after a `plan_gap` revise (hash gate).
22
+
23
+ ## Consequences
24
+
25
+ ### Positive
26
+
27
+ - Approval turns stay small in session history.
28
+ - Subagents write once to disk; submit is O(path) tokens.
29
+
30
+ ### Negative
31
+
32
+ - Agents must write drafts before approve/submit (explicit discipline).
33
+
34
+ ## References
35
+
36
+ - `.pi/extensions/harness-plan-approval.ts`
37
+ - `.pi/extensions/lib/harness-subagent-submit-pipeline.ts`
38
+ - ADR 0042, 0044
@@ -0,0 +1,36 @@
1
+ # ADR 0044: Harness steer loop (post-run repair)
2
+
3
+ - **Status:** Accepted
4
+ - **Date:** 2026-05-23
5
+
6
+ ## Context
7
+
8
+ After `/harness-run`, failed benchmarks or blocked execution previously routed users to `/harness-plan "<new task>"` even when the approved plan was still valid—high friction and duplicate planning context.
9
+
10
+ ## Decision
11
+
12
+ 1. **Always review** — `/harness-run` ends with `next_command: /harness-review` (including `blocked` / partial work). Remove benchmark fail-fast skip of verdict/adversary (ADR 0039 amended).
13
+ 2. **Review artifacts** — Parent writes `artifacts/review-outcome.yaml` and `artifacts/repair-brief.yaml` (path pointers, not pasted bodies).
14
+ 3. **Remediation routing** — `review-outcome.remediation_class`: `implementation_gap` → `/harness-steer`; `plan_gap` → `/harness-plan` revise with `repair_brief_path`; `pass` → policy status. **Review outcome wins** over executor `scope_drift` when they disagree; tie → `plan_gap`.
15
+ 4. **`/harness-steer`** — Thin orchestrator: read briefs, set policy **phase `execute`**, spawn `harness/executor` with `mode: repair`, then `/harness-review` again.
16
+ 5. **Caps** — `HARNESS_STEER_MAX_ATTEMPTS` (default 3). **Tiered review:** full review on initial run + steer 1; steers 2+ use lite (benchmark + verdict) unless prior `block_merge` or user forces full.
17
+ 6. **Sentrux** — Refresh baseline or compare new violations only after steer mutations (avoid false degraded on every attempt).
18
+ 7. **Evaluate-phase writes** — Orchestrator may write review/steer YAML under run `artifacts/` in `evaluate`/`adversary` phase (allowlisted files).
19
+
20
+ ## Consequences
21
+
22
+ ### Positive
23
+
24
+ - One `approve_plan`; many repair cycles without re-typing tasks.
25
+ - `harness-auto` can loop until pass or cap.
26
+
27
+ ### Negative
28
+
29
+ - Higher review cost on failed runs (mitigated by tiered adversary).
30
+
31
+ ## References
32
+
33
+ - `.pi/prompts/harness-steer.md`
34
+ - `.pi/harness/specs/review-outcome.schema.json`, `repair-brief.schema.json`
35
+ - `nextStepAfterOutcome` in `.pi/lib/harness-run-context.ts`
36
+ - ADR 0039 (amended), 0043
@@ -24,6 +24,16 @@ Team-shared ADRs for the ultimate-pi harness live under `.pi/harness/docs/adrs/`
24
24
  | [0036](0036-implementation-research-and-selective-debate.md) | Implementation research and selective debate | Accepted |
25
25
  | [0037](0037-subagent-submit-tools.md) | Subagent submit tools (subprocess extension) | Accepted |
26
26
  | [0038](0038-budget-telemetry-only.md) | Budget caps telemetry-only by default | Accepted |
27
+ | [0039](0039-harness-post-run-review-gate.md) | `/harness-review` master post-run gate | Accepted |
28
+ | [0040](0040-practice-grounded-orchestration.md) | Practice-grounded orchestration & team topology | Accepted |
29
+ | [0041](0041-intelligent-planning-reconnaissance.md) | Intelligent planning reconnaissance (tools over tool-scouts) | Accepted |
30
+ | [0042](0042-agent-native-orchestration.md) | Agent-native orchestration (lakes, plan-verify probes, synthesizer) | Accepted |
31
+ | [0043](0043-path-first-harness-tools.md) | Path-first harness tool contracts | Accepted |
32
+ | [0044](0044-harness-steer-loop.md) | Post-run steer loop (repair vs plan revise) | Accepted |
33
+
34
+ ## Practice map
35
+
36
+ Phase-to-practice mapping for slash commands: [practice-map.md](../practice-map.md).
27
37
 
28
38
  ## Template
29
39
 
@@ -0,0 +1,157 @@
1
+ # Graphify KB updater runbook
2
+
3
+ ## Purpose and scope
4
+
5
+ `graphify-kb-updater` keeps the local Graphify source corpus current for agentic engineering, context engineering, harness engineering, AI coding harnesses, research papers/feeds, articles/blogs, local books/transcripts, YouTube candidates, and competitor intelligence.
6
+
7
+ The approved operating model is **hybrid allowlist auto-promotion with conservative staging**:
8
+
9
+ - Daily local automation may auto-promote only explicitly approved allowlisted public sources with complete provenance and rights/access metadata.
10
+ - Books, transcripts, YouTube/video material, paid/copyrighted/mirrored material, unclear-license content, and unknown open-web sources remain staged until manually approved.
11
+ - Competitor monitoring is a curated taxonomy/watchlist/reporting signal, not an exhaustive crawler.
12
+ - Pi-agent-open integration is intentionally limited/deferred: opening Pi should do at most a low-latency, no-network stale check. It must not perform synchronous web discovery, promotion, or Graphify mutation.
13
+
14
+ ## Governance and approval boundaries
15
+
16
+ Required rights/access fields for every promotion:
17
+
18
+ - `license`
19
+ - `access`
20
+ - `approved_by`
21
+ - `approved_at`
22
+
23
+ Allowlist auto-promotion requires all of the following:
24
+
25
+ 1. `.pi/harness/corpus/graphify-kb-updater.config.json` has `auto_promote_allowlist: true`.
26
+ 2. The candidate domain is present in `allowlist` with `approved: true`.
27
+ 3. The candidate itself has `approved: true`.
28
+ 4. `rights_access` is complete.
29
+ 5. The candidate is not a risky source class that requires manual review.
30
+
31
+ Risky source classes (`book`, `transcript`, `youtube`) always require explicit approval and complete rights/access metadata. Raw HTTP shell paths are forbidden; keep discovery/fetch through approved harness web/API abstractions and verify with `.pi/scripts/harness-web-policy-guard.mjs`.
32
+
33
+ ## Manual commands
34
+
35
+ Dry-run, no mutation of `raw/`, state, or `graphify-out/`:
36
+
37
+ ```bash
38
+ node .pi/scripts/graphify-kb-updater.mjs --dry-run --pilot-report
39
+ ```
40
+
41
+ Apply approved/promotable candidates and refresh Graphify only when promoted files changed:
42
+
43
+ ```bash
44
+ node .pi/scripts/graphify-kb-updater.mjs --apply --refresh-graph --pilot-report
45
+ ```
46
+
47
+ Apply without graph mutation:
48
+
49
+ ```bash
50
+ node .pi/scripts/graphify-kb-updater.mjs --apply --skip-graph --pilot-report
51
+ ```
52
+
53
+ Validate scheduler templates:
54
+
55
+ ```bash
56
+ node .pi/scripts/graphify-kb-updater.mjs --scheduler-smoke
57
+ ```
58
+
59
+ Run web-policy guard:
60
+
61
+ ```bash
62
+ node .pi/scripts/harness-web-policy-guard.mjs
63
+ ```
64
+
65
+ ## Approval workflow
66
+
67
+ 1. Review dry-run JSON: candidate count, source counts, competitor labels, duplicate/skipped/blocked counts, stale warnings, planned promotions, and graph action.
68
+ 2. For a candidate, add it to `.pi/harness/corpus/graphify-kb-updater.config.json` `review_queue` with:
69
+ - `kind` (`article`, `paper`, `book`, `transcript`, or `youtube`)
70
+ - `title`
71
+ - `url` or `path`
72
+ - `approved: true`
73
+ - `rights_access` object with all required fields
74
+ - optional `competitor_labels` or provenance notes.
75
+ 3. For local files, you may place `<file>.rights.json` beside the source, but risky classes still require explicit approval before promotion.
76
+ 4. Run `--apply --refresh-graph`.
77
+ 5. Promoted sources land under `raw/graphify-kb-updates/<kind>/` with `.provenance.json` sidecars.
78
+
79
+ ## Daily scheduler setup
80
+
81
+ Systemd user timer is the primary path and runs daily at 08:30 with randomized delay:
82
+
83
+ ```bash
84
+ mkdir -p ~/.config/ultimate-pi ~/.config/systemd/user ~/.local/state/ultimate-pi
85
+ cp .pi/harness/corpus/systemd/graphify-kb-updater.env.template ~/.config/ultimate-pi/graphify-kb-updater.env
86
+ # edit UP_ROOT in the env file
87
+ cp .pi/harness/corpus/systemd/graphify-kb-updater.service ~/.config/systemd/user/
88
+ cp .pi/harness/corpus/systemd/graphify-kb-updater.timer ~/.config/systemd/user/
89
+ systemctl --user daemon-reload
90
+ systemctl --user enable --now graphify-kb-updater.timer
91
+ systemctl --user list-timers graphify-kb-updater.timer
92
+ ```
93
+
94
+ The service uses `flock`, `timeout 45m`, explicit env, append-only logs, and a non-overlap lock.
95
+
96
+ Cron fallback is daily at 08:30; edit `UP_ROOT` and copy the line from `.pi/harness/corpus/cron.example` with `crontab -e`.
97
+
98
+ ## Reports, logs, and fields
99
+
100
+ Apply runs write:
101
+
102
+ - Registry: `.pi/harness/corpus/graphify-kb-updater-state/registry.json`
103
+ - Per-run logs: `.pi/harness/corpus/graphify-kb-updater-state/logs/`
104
+ - Scheduler logs: `~/.local/state/ultimate-pi/graphify-kb-updater.log` and `.err`
105
+
106
+ Each run reports:
107
+
108
+ - `last_run_at`
109
+ - `candidate_count`, `promoted_count`, `blocked_count`, `skipped_count`, `duplicate_skips`, `failure_count`
110
+ - `counts.by_kind`, `counts.by_source_type`, `counts.by_competitor_label`, `counts.allowlisted`
111
+ - `stale_warnings`
112
+ - `changed_existing_count` for same URL/path content changes
113
+ - `graph.action`, `graph.exit_status`, and Graphify report path when refreshed
114
+ - optional pilot metrics: `frontier_recall_proxy`, `promoted_precision_proxy`, `duplicate_noise_rate`, `graphify_success`
115
+
116
+ Review these fields before enabling unattended mode and after every config change.
117
+
118
+ ## Troubleshooting
119
+
120
+ - `missing_rights_access_approval`: add complete rights/access metadata.
121
+ - `manual_approval_required`: set `approved: true` after source and rights review.
122
+ - `duplicate_unchanged`: candidate was already promoted and content hash is unchanged.
123
+ - `changed_existing_count > 0`: a stable URL/path changed content; review before relying on previous conclusions.
124
+ - Graphify skipped: no promoted changes, `--skip-graph`, or no `--refresh-graph`.
125
+ - Graphify failed: inspect `graph.stderr`, run `graphify update .` manually, and keep the scheduler disabled until fixed.
126
+ - Scheduler did not run: check `systemctl --user status graphify-kb-updater.timer`, the env file path, and scheduler logs.
127
+ - Overlap: lock path `%t/graphify-kb-updater.lock` or `/tmp/graphify-kb-updater.lock` prevents concurrent runs.
128
+
129
+ ## Disable
130
+
131
+ ```bash
132
+ systemctl --user disable --now graphify-kb-updater.timer
133
+ systemctl --user reset-failed graphify-kb-updater.service
134
+ ```
135
+
136
+ Remove any cron line copied from `.pi/harness/corpus/cron.example`.
137
+
138
+ ## Rollback
139
+
140
+ 1. Disable systemd timer and remove cron line.
141
+ 2. Use registry/log promoted paths to remove or quarantine promoted files under `raw/graphify-kb-updates/`.
142
+ 3. Restore `.pi/harness/corpus/graphify-kb-updater-state/registry.json` from backup, or mark candidates rejected/quarantined.
143
+ 4. Revert implementation files if needed:
144
+
145
+ ```bash
146
+ git checkout -- .pi/scripts/graphify-kb-updater.mjs .pi/harness/corpus/graphify-kb-updater.config.json .pi/harness/corpus/systemd/graphify-kb-updater.timer .pi/harness/corpus/cron.example test/graphify-kb-updater.test.mjs .pi/harness/docs/graphify-kb-updater-runbook.md
147
+ ```
148
+
149
+ 5. Regenerate Graphify from valid sources:
150
+
151
+ ```bash
152
+ graphify update .
153
+ ```
154
+
155
+ ## Pilot gate before unattended mode
156
+
157
+ Run at least one dry-run and one supervised apply. Record frontier recall proxy, promoted precision proxy, duplicate/noise rate, skipped reasons, stale warnings, and Graphify success from `--pilot-report`. Enable the timer only if promoted precision is acceptable and graph refresh succeeds.
@@ -0,0 +1,110 @@
1
+ # Harness practice map
2
+
3
+ Source of truth linking harness phases to proven practices (graphify corpus), agents/scripts, spawn topology, and **agent translation** (ADR 0042). Orchestrators and agents should cite this doc when unsure why a lane exists.
4
+
5
+ See also: [ADRs](adrs/README.md), [ADR 0040](adrs/0040-practice-grounded-orchestration.md), [ADR 0041](adrs/0041-intelligent-planning-reconnaissance.md), [ADR 0042](adrs/0042-agent-native-orchestration.md), [ADR 0043](adrs/0043-path-first-harness-tools.md), [ADR 0044](adrs/0044-harness-steer-loop.md), [`raw/modules/structured-planning.md`](../../../raw/modules/structured-planning.md).
6
+
7
+ ## Agent translation (human practice → agent design)
8
+
9
+ | Human practice | Agent translation |
10
+ |----------------|-------------------|
11
+ | Meeting / chair | Parent as **scheduler + gate checker** only |
12
+ | Fagan inspection rounds | **Schema-bound probes** + merge (`parallel_probes` profile) |
13
+ | Two-pizza cap per batch | **Token/spawn budget** per phase (`harness-spawn-budget.ts`) |
14
+ | RACI roles | **Disjoint prompt contexts**, not serial speakers |
15
+ | WBS decomposition | **Lake-first `execution_plan`** (few outcomes, bundled context) |
16
+ | Sprint / story points | **`executor_strategy` + lake `done_criteria`** |
17
+ | Critical path | **`critical_path_lake_ids`** |
18
+ | Replan on every failure | **Steer loop** (`implementation_gap`) vs **plan revise** (`plan_gap`) |
19
+ | Tool payloads in chat | **Path-first** approve/submit/merge (ADR 0043) |
20
+
21
+ ## Team management rules (all `/harness-*` orchestrators)
22
+
23
+ 1. **Parallelism law** — Parallel `subagent` `tasks` only when outputs are independent inputs to a later merge (implementation ∥ stack research; inspector ∥ adversary in `parallel_probes`). Never parallelize decompose ∥ hypothesis.
24
+ 2. **Two-pizza cap per batch** — Max 2 research lanes, max 1 optional `planning-context` subagent, max 1 executor, max 1 debate lane agent per `subagent` call (plan-verify may use 2 probes + integrator in separate batches).
25
+ 3. **No redundant thinkers** — If artifact X exists, downstream agents read it; they do not re-derive (e.g. decompose after `planning-context.yaml`).
26
+ 4. **Sequential dependency chain** — planning context → problem framing / decompose → hypothesis → research → synthesis/author → DAG → plan-verify → approve → execute → review → (steer)* → policy.
27
+ 5. **Plan-verify (agent-native)** — For `fast`/`standard`, parallel probes then integrator; parent is chair, not participant. Threaded debate remains for `full` until parity.
28
+ 6. **Tool intelligence** — Parent chooses graphify, sg, ccc; subprocesses optional. **Path-first:** disk is source of truth; tool args are pointers (ADR 0043).
29
+
30
+ ## `/harness-plan` — Planning Process Group
31
+
32
+ | Phase | Practice | Agent translation | Actor | Spawn |
33
+ |-------|----------|---------------------|-------|-------|
34
+ | 0 | Tooling / fast feedback | Pre-index once | Parent + `ccc` | Automatic |
35
+ | 1 | Reconnaissance before WBS | **ContextPack** on disk | Parent tools or optional `planning-context` | No default subprocess |
36
+ | 2a | Problem framing / lakes | Lake outcomes, not ticket tree | `decompose` or synthesizer section | Sequential after context gate |
37
+ | 2b | Hypothesis-driven approach | Falsifiable claim grounded in framing | `hypothesis` or synthesizer | After `artifacts/decomposition.yaml` |
38
+ | 3.5 | Spike / external research | Paths in research brief | Researchers optional | Artifacts required |
39
+ | 4 | Fork resolution (batched) | One `ask_user` gate | Parent | After 3.5 |
40
+ | 4b | Lake-first execution plan | `executor_strategy`, context bundles | `plan-synthesizer` (low/med) or `execution-plan-author` (high) | Single agent |
41
+ | 4c | Deterministic quality gate | Script, not LLM | `validate-plan-dag.mjs` | Parent; hard stop |
42
+ | 4d | Tailor process to risk | Probe depth, not meeting count | `harness_plan_debate_eligibility` | Pre plan-verify |
43
+ | 4e | Architectural intent | Fitness-function spec | `harness/sentrux-steward` optional | When structural risk |
44
+ | 5 | Plan-verify (Review Gate) | Parallel probes + integrator | Debate cast / probes | `parallel_probes` or threaded |
45
+ | 6 | Baseline + approve | Path-only `approve_plan` | Parent | `approve_plan`, `create_plan` |
46
+
47
+ ### Review Gate — debate RACI (threaded / full profile)
48
+
49
+ | Agent | Inspection role | Practice | When |
50
+ |-------|-----------------|----------|------|
51
+ | `hypothesis-validator` | Blind verifier | Independent verification (ADR 0034) | Round 1 / fast path |
52
+ | `plan-evaluator` | Inspector | Neutral checklist | Every required focus |
53
+ | `plan-adversary` | Red team | Adversarial review | Every required focus |
54
+ | `sprint-contract-auditor` | DoD auditor | Sprint contract | `quality` focus |
55
+ | `review-integrator` | Recorder | Single round artifact | End of round |
56
+ | Parent | Chair | Gates only | Always |
57
+
58
+ ### Plan-verify profiles
59
+
60
+ | Profile | When | Team shape |
61
+ |---------|------|------------|
62
+ | `full` | High risk, material fork | Threaded: all four focuses |
63
+ | `standard` | Default med | `parallel_probes`: inspector ∥ adversary → integrator |
64
+ | `light` | Low risk | Threaded: `spec` + `quality` |
65
+ | `fast` | Med/low, clear stack | Consolidated verify + blind hypothesis-validator |
66
+
67
+ ## `/harness-run` — Executing Process Group
68
+
69
+ | Step | Practice | Agent translation | Actor |
70
+ |------|----------|-------------------|-------|
71
+ | Gate | Change control | `plan_ready` required | Parent |
72
+ | Pre-work | Fitness baseline | `sentrux gate --save` | Parent |
73
+ | Work | Single implementer | `executor_strategy` | `harness/executor` |
74
+ | Post-work | Observation | `sentrux check` / signal artifact | Parent |
75
+ | Handoff | Generator–evaluator | `submit_executor_handoff` | Executor |
76
+ | Next | Always verify | **`/harness-review`** (not replan on blocked) | Parent routing |
77
+
78
+ ## `/harness-review` — Monitoring and Controlling
79
+
80
+ | Phase | Practice | Agent translation | Actor |
81
+ |-------|----------|-------------------|-------|
82
+ | 1 | Automated QC + fitness | Deterministic first | Parent scripts |
83
+ | 2 | Measure vs plan | Benchmark on disk | `evaluator` benchmark |
84
+ | 3 | Policy audit | Verdict (no fail-fast skip) | `evaluator` verdict |
85
+ | 4 | Red team | Tiered: full attempt 1, lite 2+ steer | `adversary` |
86
+ | 5 | Outcome + repair brief | Machine routing | Parent + `review-outcome.yaml`, `repair-brief.yaml` |
87
+ | 6 | Steer gate | One `ask_user` | harness-decisions |
88
+ | 7 | Steer / revise | `implementation_gap` → `/harness-steer`; `plan_gap` → plan revise | ADR 0044 |
89
+
90
+ `--quick` = deterministic + benchmark + verdict (no adversary). Steer attempts 2+ default to lite review unless `block_merge`.
91
+
92
+ ## `/harness-steer` — Repair sub-cycle (ADR 0044)
93
+
94
+ | Step | Practice | Actor |
95
+ |------|----------|-------|
96
+ | 0 | Read review + repair briefs | Parent |
97
+ | 1 | Policy phase → `execute` | Parent |
98
+ | 2 | Repair scope | `harness/executor` `mode: repair` |
99
+ | 3 | Re-verify | `/harness-review` |
100
+
101
+ ## Anti-patterns
102
+
103
+ - **Do not** spawn `decompose` and `hypothesis` in the same parallel `tasks` batch.
104
+ - **Do not** run `graphify query` in `decompose` when planning-context coverage is ok (ADR 0041).
105
+ - **Do not** parallelize threaded debate lanes in one batch (except `parallel_probes` inspector ∥ adversary per ADR 0042).
106
+ - **Do not** let executor or parent self-certify.
107
+ - **Do not** stop review on benchmark fail — complete verdict and route via steer (ADR 0044).
108
+ - **Do not** tell user to run `/harness-plan "<new task>"` on test failure — use `/harness-steer` with `repair-brief.yaml`.
109
+ - **Do not** re-`approve_plan` every steer attempt — only when packet changes.
110
+ - **Do not** embed full plan packets in `approve_plan` / `submit_*` tool args (ADR 0043).
@@ -21,6 +21,8 @@ HARNESS_WEB_SEARCH_ENGINE=ddg_html
21
21
 
22
22
  # --- PostHog (optional) ---
23
23
  # Project key — required for harness_* telemetry when HARNESS_TELEMETRY_ENABLED=true
24
+ # WSL2: ultimate-pi loads 00-posthog-network-bootstrap.ts (IPv4 fetch for *.posthog.com).
25
+ # If flush still fails, set POSTHOG_ENABLED=false or fix outbound HTTPS to PostHog.
24
26
  # POSTHOG_API_KEY=
25
27
  # POSTHOG_HOST=https://us.i.posthog.com
26
28
  # POSTHOG_ENABLED=true
@@ -39,6 +41,6 @@ HARNESS_WEB_SEARCH_ENGINE=ddg_html
39
41
  # --- Wiki / Obsidian vault (optional) ---
40
42
  VAULT_WIKI_PATH=vault/wiki
41
43
 
42
- # --- Sentrux gate (optional) ---
43
- # Require Sentrux stub for harness-verify (see .pi/scripts/harness-verify.mjs)
44
- # HARNESS_SENTRUX_REQUIRED=true
44
+ # --- Sentrux fitness functions ---
45
+ # Require sentrux check + run signal (or CI stub) in harness-verify
46
+ HARNESS_SENTRUX_REQUIRED=true
@@ -0,0 +1,28 @@
1
+ schema_version: "1.0.0"
2
+ problem_framing: Validate harness plan-phase with fixture-driven smoke
3
+ sub_problems:
4
+ - DAG validation
5
+ - Debate gate coverage
6
+ internal_references:
7
+ - path: .pi/harness/evals/smoke/smoke-harness-plan.mjs
8
+ relevance: Existing smoke pattern
9
+ reuse_signal: high
10
+ external_references: []
11
+ solution_patterns:
12
+ - name: fixture-driven gate
13
+ provenance: in-repo smoke
14
+ fit: Validates plan pipeline without live agents
15
+ tradeoffs:
16
+ pros: [Deterministic CI]
17
+ cons: []
18
+ risks: []
19
+ similar_implementations: []
20
+ recommended_approach:
21
+ summary: Extend minimal-med fixture with implementation artifact
22
+ recommended_approach_confidence: high
23
+ confidence_rationale: Reuses established smoke-harness-plan pattern
24
+ evidence_refs:
25
+ - .pi/harness/evals/smoke/smoke-harness-plan.mjs
26
+ - .pi/scripts/validate-plan-dag.mjs
27
+ anti_patterns: []
28
+ open_questions: []
@@ -0,0 +1,25 @@
1
+ schema_version: "1.0.0"
2
+ round_index: 1
3
+ debate_round_focus: all
4
+ round_summary: Consolidated review gate for fast profile fixture
5
+ validation_summary: Spec and quality checks pass in one round
6
+ adversary_summary: No blockers
7
+ disputes: []
8
+ recommended_packet_patches: []
9
+ review_gate_ready: true
10
+ participants:
11
+ - PlanEvaluatorAgent
12
+ - PlanAdversaryAgent
13
+ - SprintContractAuditorAgent
14
+ - ReviewIntegratorAgent
15
+ claims:
16
+ - consolidated review gate ready
17
+ rebuttals: []
18
+ evidence_refs: []
19
+ token_usage:
20
+ per_agent:
21
+ PlanEvaluatorAgent: 120
22
+ PlanAdversaryAgent: 100
23
+ SprintContractAuditorAgent: 80
24
+ round_total: 300
25
+ consensus_delta: 0.1
@@ -0,0 +1,196 @@
1
+ schema_version: "1.0.0"
2
+ contract_version: "1.1.0"
3
+ plan_id: plan-smoke-fixture-001
4
+ task_id: task-smoke-001
5
+ scope: Smoke fixture for plan-phase harness validation with execution_plan and debate artifacts.
6
+ assumptions:
7
+ - Fixture only; no live agent run
8
+ risk_level: med
9
+ acceptance_checks:
10
+ - id: AC-1
11
+ description: DAG validation passes
12
+ - id: AC-2
13
+ description: Consolidated debate round recorded (fast profile)
14
+ - id: AC-3
15
+ description: Stack brief present in research-brief
16
+ - id: AC-4
17
+ description: Sprint contract complete
18
+ - id: AC-5
19
+ description: plan-review.md renders
20
+ rollback_plan:
21
+ revert_commit_ready: true
22
+ rollback_artifacts:
23
+ revert_command: git revert HEAD
24
+ revert_branch: main
25
+ patch_bundle: .pi/harness/runs/smoke-fixture/patch.bundle
26
+ execution_plan:
27
+ schema_version: "1.0.0"
28
+ phases:
29
+ - phase_id: P1
30
+ name: Foundation
31
+ objective: Establish baseline and verify harness wiring
32
+ entry_criteria:
33
+ - Fixture loaded
34
+ exit_criteria:
35
+ - AC-1 satisfied
36
+ milestone: M1-baseline
37
+ work_item_ids: [WI-1, WI-2, WI-3]
38
+ - phase_id: P2
39
+ name: Build
40
+ objective: Implement core changes
41
+ entry_criteria:
42
+ - M1-baseline complete
43
+ exit_criteria:
44
+ - AC-2 satisfied
45
+ milestone: M2-build
46
+ work_item_ids: [WI-4, WI-5, WI-6]
47
+ - phase_id: P3
48
+ name: Verify
49
+ objective: Quality gate and documentation
50
+ entry_criteria:
51
+ - M2-build complete
52
+ exit_criteria:
53
+ - AC-5 satisfied
54
+ milestone: M3-ship
55
+ work_item_ids: [WI-7, WI-8]
56
+ work_items:
57
+ - work_item_id: WI-1
58
+ phase_id: P1
59
+ title: Load fixture packet
60
+ description: Read plan-packet.yaml from fixture directory
61
+ depends_on: []
62
+ files:
63
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml
64
+ parallel_safe: true
65
+ done_criteria:
66
+ type: manual
67
+ spec: Fixture packet readable
68
+ acceptance_check_ids: [AC-1]
69
+ - work_item_id: WI-2
70
+ phase_id: P1
71
+ title: Run DAG validator
72
+ description: Execute validate-plan-dag.mjs
73
+ depends_on: [WI-1]
74
+ files:
75
+ - .pi/scripts/validate-plan-dag.mjs
76
+ parallel_safe: false
77
+ done_criteria:
78
+ type: command
79
+ spec: node .pi/scripts/validate-plan-dag.mjs --packet plan-packet.yaml
80
+ acceptance_check_ids: [AC-1]
81
+ - work_item_id: WI-3
82
+ phase_id: P1
83
+ title: Lint harness-yaml
84
+ description: Ensure YAML helpers parse fixture
85
+ depends_on: [WI-1]
86
+ files:
87
+ - .pi/lib/harness-yaml.ts
88
+ parallel_safe: true
89
+ done_criteria:
90
+ type: lint
91
+ spec: npm test
92
+ acceptance_check_ids: [AC-1]
93
+ - work_item_id: WI-4
94
+ phase_id: P2
95
+ title: Debate round 1-2 artifacts
96
+ description: Validate review-round YAML
97
+ depends_on: [WI-2]
98
+ files:
99
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml
100
+ parallel_safe: false
101
+ done_criteria:
102
+ type: artifact
103
+ spec: artifacts/review-round-r1.yaml exists
104
+ acceptance_check_ids: [AC-2]
105
+ - work_item_id: WI-5
106
+ phase_id: P2
107
+ title: Debate round 3-4 artifacts
108
+ description: Validate final review round
109
+ depends_on: [WI-4]
110
+ files:
111
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml
112
+ parallel_safe: false
113
+ done_criteria:
114
+ type: artifact
115
+ spec: artifacts/review-round-r4.yaml exists
116
+ acceptance_check_ids: [AC-2]
117
+ - work_item_id: WI-6
118
+ phase_id: P2
119
+ title: Stack research merge
120
+ description: research-brief includes stack section
121
+ depends_on: [WI-2]
122
+ files: []
123
+ non_code: true
124
+ parallel_safe: true
125
+ done_criteria:
126
+ type: manual
127
+ spec: research-brief.yaml contains stack key
128
+ acceptance_check_ids: [AC-3]
129
+ - work_item_id: WI-7
130
+ phase_id: P3
131
+ title: Sprint contract audit
132
+ description: R4 sprint audit artifact
133
+ depends_on: [WI-5]
134
+ files:
135
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml
136
+ parallel_safe: false
137
+ done_criteria:
138
+ type: artifact
139
+ spec: sprint-audit-r4.yaml present
140
+ acceptance_check_ids: [AC-4]
141
+ - work_item_id: WI-8
142
+ phase_id: P3
143
+ title: Render plan-review
144
+ description: Human-readable plan review markdown
145
+ depends_on: [WI-7]
146
+ files:
147
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md
148
+ parallel_safe: false
149
+ done_criteria:
150
+ type: manual
151
+ spec: plan-review.md non-empty
152
+ acceptance_check_ids: [AC-5]
153
+ sprint_contract:
154
+ in_scope:
155
+ - Fixture validation only
156
+ out_of_scope:
157
+ - Production deploy
158
+ definition_of_done: All smoke checks green
159
+ assumptions:
160
+ - CI environment has node
161
+ external_dependencies: []
162
+ wbs_dictionary:
163
+ - work_item_id: WI-1
164
+ deliverable: Fixture packet loaded
165
+ owner_role: executor
166
+ inputs: []
167
+ outputs: [parsed packet]
168
+ risk_register:
169
+ - risk_id: R1
170
+ description: DAG validator false negative
171
+ likelihood: low
172
+ impact: high
173
+ mitigation: Unit tests on validate-plan-dag.mjs
174
+ linked_work_item_ids: [WI-2]
175
+ - risk_id: R2
176
+ description: Debate cap misconfiguration
177
+ likelihood: med
178
+ impact: med
179
+ mitigation: debate-orchestrator plan profile tests
180
+ linked_work_item_ids: [WI-4]
181
+ - risk_id: R3
182
+ description: YAML parse drift
183
+ likelihood: low
184
+ impact: med
185
+ mitigation: harness-yaml strict parse
186
+ linked_work_item_ids: [WI-3]
187
+ schedule_metadata:
188
+ critical_path_work_item_ids: [WI-1, WI-2, WI-4, WI-5, WI-7, WI-8]
189
+ parallel_groups:
190
+ - [WI-1, WI-3]
191
+ schedule_baseline_note: Fixture topological order; no calendar dates
192
+ dag_validation:
193
+ status: pass
194
+ topological_order: [WI-1, WI-2, WI-3, WI-4, WI-5, WI-6, WI-7, WI-8]
195
+ cycles: []
196
+ conflicts: []
@@ -0,0 +1,14 @@
1
+ # Plan review (fixture)
2
+
3
+ plan_id: plan-smoke-fixture-001
4
+
5
+ ## Execution plan
6
+
7
+ Phases: P1 Foundation → P2 Build → P3 Verify
8
+
9
+ Critical path: WI-1 → WI-2 → WI-4 → WI-5 → WI-7 → WI-8
10
+
11
+ ## Debate
12
+
13
+ - Round 1 (spec): review_gate_ready
14
+ - Round 4 (quality): review_gate_ready