ultimate-pi 0.16.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-context/SKILL.md +13 -6
- package/.agents/skills/harness-debate-plan/SKILL.md +37 -20
- package/.agents/skills/harness-eval/SKILL.md +6 -21
- package/.agents/skills/harness-governor/SKILL.md +4 -3
- package/.agents/skills/harness-orchestration/SKILL.md +39 -51
- package/.agents/skills/harness-plan/SKILL.md +23 -12
- package/.agents/skills/harness-review/SKILL.md +52 -0
- package/.agents/skills/harness-sentrux-setup/SKILL.md +13 -1
- package/.agents/skills/harness-steer/SKILL.md +14 -0
- package/.pi/agents/harness/adversary.md +3 -10
- package/.pi/agents/harness/evaluator.md +3 -12
- package/.pi/agents/harness/executor.md +12 -14
- package/.pi/agents/harness/planning/decompose.md +7 -4
- package/.pi/agents/harness/planning/hypothesis-validator.md +2 -0
- package/.pi/agents/harness/planning/hypothesis.md +4 -2
- package/.pi/agents/harness/planning/implementation-researcher.md +1 -1
- package/.pi/agents/harness/planning/plan-adversary.md +2 -0
- package/.pi/agents/harness/planning/plan-evaluator.md +2 -0
- package/.pi/agents/harness/planning/plan-synthesizer.md +25 -0
- package/.pi/agents/harness/planning/planning-context.md +48 -0
- package/.pi/agents/harness/planning/review-integrator.md +2 -0
- package/.pi/agents/harness/planning/scout-graphify.md +3 -1
- package/.pi/agents/harness/planning/scout-semantic.md +3 -1
- package/.pi/agents/harness/planning/scout-structure.md +3 -1
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +2 -0
- package/.pi/agents/harness/sentrux-steward.md +51 -0
- package/.pi/extensions/00-posthog-network-bootstrap.ts +11 -0
- package/.pi/extensions/harness-debate-tools.ts +12 -3
- package/.pi/extensions/harness-live-widget.ts +27 -1
- package/.pi/extensions/harness-plan-approval.ts +62 -56
- package/.pi/extensions/harness-run-context.ts +553 -84
- package/.pi/extensions/harness-subagent-submit.ts +43 -33
- package/.pi/extensions/harness-telemetry.ts +29 -4
- package/.pi/extensions/lib/debate-bus-core.ts +15 -9
- package/.pi/extensions/lib/harness-artifact-gate.ts +182 -0
- package/.pi/extensions/lib/harness-posthog.ts +9 -5
- package/.pi/extensions/lib/harness-spawn-topology.ts +188 -0
- package/.pi/extensions/lib/harness-subagent-auth.ts +105 -19
- package/.pi/extensions/lib/harness-subagent-policy.ts +37 -19
- package/.pi/extensions/lib/harness-subagent-precheck.ts +35 -9
- package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +66 -2
- package/.pi/extensions/lib/harness-subagent-submit-registry.ts +21 -3
- package/.pi/extensions/lib/harness-subagents-bridge.ts +91 -28
- package/.pi/extensions/lib/harness-subprocess-bootstrap.ts +73 -0
- package/.pi/extensions/lib/plan-approval/create-plan.ts +2 -3
- package/.pi/extensions/lib/plan-approval/resolve-disk.ts +102 -0
- package/.pi/extensions/lib/plan-approval/schema.ts +22 -8
- package/.pi/extensions/lib/plan-approval/types.ts +1 -1
- package/.pi/extensions/lib/plan-approval/validate.ts +2 -2
- package/.pi/extensions/lib/plan-approval-readiness.ts +241 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +67 -7
- package/.pi/extensions/lib/plan-debate-focus.ts +21 -9
- package/.pi/extensions/lib/plan-debate-gate.ts +101 -17
- package/.pi/extensions/lib/plan-debate-lanes.ts +57 -3
- package/.pi/extensions/lib/plan-debate-round-status.ts +18 -7
- package/.pi/extensions/lib/plan-messenger.ts +4 -0
- package/.pi/extensions/lib/plan-review-gate.ts +59 -0
- package/.pi/extensions/lib/posthog-client.ts +76 -0
- package/.pi/extensions/policy-gate.ts +24 -19
- package/.pi/extensions/trace-recorder.ts +1 -0
- package/.pi/harness/agents.manifest.json +24 -16
- package/.pi/harness/corpus/cron.example +8 -0
- package/.pi/harness/corpus/graphify-kb-updater.config.json +159 -0
- package/.pi/harness/corpus/systemd/graphify-kb-updater.env.template +4 -0
- package/.pi/harness/corpus/systemd/graphify-kb-updater.service +17 -0
- package/.pi/harness/corpus/systemd/graphify-kb-updater.timer +11 -0
- package/.pi/harness/docs/adrs/0001-harness-constitution.md +2 -1
- package/.pi/harness/docs/adrs/0006-sentrux-dual-layer.md +7 -6
- package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +6 -1
- package/.pi/harness/docs/adrs/0031-harness-run-context.md +1 -1
- package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +7 -0
- package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +3 -3
- package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +8 -5
- package/.pi/harness/docs/adrs/0039-harness-post-run-review-gate.md +47 -0
- package/.pi/harness/docs/adrs/0040-practice-grounded-orchestration.md +40 -0
- package/.pi/harness/docs/adrs/0041-intelligent-planning-reconnaissance.md +39 -0
- package/.pi/harness/docs/adrs/0042-agent-native-orchestration.md +35 -0
- package/.pi/harness/docs/adrs/0043-path-first-harness-tools.md +38 -0
- package/.pi/harness/docs/adrs/0044-harness-steer-loop.md +36 -0
- package/.pi/harness/docs/adrs/README.md +10 -0
- package/.pi/harness/docs/graphify-kb-updater-runbook.md +157 -0
- package/.pi/harness/docs/practice-map.md +110 -0
- package/.pi/harness/env.harness.template +5 -3
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/review-round-consolidated.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/research-brief.yaml +62 -0
- package/.pi/harness/evals/smoke/sentrux-stub.json +1 -1
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +43 -17
- package/.pi/harness/specs/README.md +1 -1
- package/.pi/harness/specs/harness-run-context.schema.json +11 -0
- package/.pi/harness/specs/harness-spawn-context.schema.json +14 -0
- package/.pi/harness/specs/plan-execution-plan.schema.json +39 -1
- package/.pi/harness/specs/plan-packet.schema.json +4 -0
- package/.pi/harness/specs/plan-phase-status.schema.json +17 -0
- package/.pi/harness/specs/plan-phase-waiver.schema.json +25 -0
- package/.pi/harness/specs/plan-planning-context.schema.json +50 -0
- package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
- package/.pi/harness/specs/repair-brief.schema.json +45 -0
- package/.pi/harness/specs/review-outcome.schema.json +46 -0
- package/.pi/harness/specs/sentrux-manifest-proposal.schema.json +80 -0
- package/.pi/harness/specs/sentrux-signal.schema.json +43 -0
- package/.pi/harness/specs/steer-state.schema.json +20 -0
- package/.pi/lib/harness-context-mode-policy.ts +256 -0
- package/.pi/lib/harness-repair-brief.ts +145 -0
- package/.pi/lib/harness-run-context.ts +591 -32
- package/.pi/lib/harness-ui-state.ts +87 -9
- package/.pi/model-router.example.json +13 -4
- package/.pi/prompts/harness-auto.md +9 -9
- package/.pi/prompts/harness-critic.md +3 -30
- package/.pi/prompts/harness-eval.md +4 -37
- package/.pi/prompts/harness-plan.md +139 -57
- package/.pi/prompts/harness-review.md +150 -15
- package/.pi/prompts/harness-run.md +62 -10
- package/.pi/prompts/harness-sentrux-steward.md +55 -0
- package/.pi/prompts/harness-setup.md +4 -4
- package/.pi/prompts/harness-steer.md +30 -0
- package/.pi/scripts/graphify-kb-updater.mjs +358 -0
- package/.pi/scripts/harness-generate-model-router.mjs +118 -36
- package/.pi/scripts/harness-model-router-routing.test.mjs +97 -0
- package/.pi/scripts/harness-sync-model-router.mjs +15 -2
- package/.pi/scripts/harness-verify.mjs +51 -6
- package/.pi/scripts/harness-web-policy-guard.mjs +68 -0
- package/.pi/scripts/validate-plan-dag.mjs +3 -3
- package/AGENTS.md +1 -0
- package/CHANGELOG.md +22 -0
- package/package.json +5 -4
- package/vendor/pi-model-router/UPSTREAM_PIN.md +3 -1
- package/vendor/pi-model-router/extensions/commands.ts +4 -4
- package/vendor/pi-model-router/extensions/index.ts +21 -0
- package/vendor/pi-model-router/extensions/provider.ts +130 -79
- package/vendor/pi-model-router/extensions/routing.ts +148 -0
- package/vendor/pi-model-router/extensions/state.ts +3 -0
- package/vendor/pi-model-router/extensions/types.ts +9 -0
- package/vendor/pi-model-router/extensions/ui.ts +16 -2
- package/.pi/prompts/git-sync.md +0 -124
|
@@ -15,13 +15,19 @@ description: Compile task-specific harness context using context-mode and graphi
|
|
|
15
15
|
- Use the **context-mode** npm package / pi integration for compression.
|
|
16
16
|
- **Do not** use lean-ctx (`ctx_read`, `ctx_search`, etc.) on harness paths — locked by Phase 2 plan.
|
|
17
17
|
|
|
18
|
-
##
|
|
18
|
+
## Tool menu (pick what the task needs)
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
20
|
+
Use these in rough priority order — not every tool on every task:
|
|
21
|
+
|
|
22
|
+
| Need | Tool |
|
|
23
|
+
|------|------|
|
|
24
|
+
| Architecture, god nodes, cross-file relationships | `graphify-out/GRAPH_REPORT.md`, `graphify query`, `graphify explain`, `graphify path` |
|
|
25
|
+
| Structural code patterns | `sg -p '…'` (ast-grep) |
|
|
26
|
+
| Semantic implementation search | `ccc search` (harness pre-indexes before subprocess spawns) |
|
|
27
|
+
| File detail | context-mode maps/signatures, then targeted reads |
|
|
28
|
+
| Harness governance | `.pi/harness/docs/adrs/README.md` |
|
|
29
|
+
|
|
30
|
+
For `/harness-plan` Phase 1, parent compiles findings into `artifacts/planning-context.yaml` — see **harness-plan** skill.
|
|
25
31
|
|
|
26
32
|
## Outputs
|
|
27
33
|
|
|
@@ -34,3 +40,4 @@ Compact context block:
|
|
|
34
40
|
## Rules
|
|
35
41
|
|
|
36
42
|
- `./raw/` is graphify source storage; run `graphify update .` after significant harness code changes.
|
|
43
|
+
- Subprocesses are optional; prefer parent tool use when reconnaissance fits the parent context window.
|
|
@@ -5,7 +5,32 @@ description: Plan-phase Review Gate debate — pi-messenger threads, lane YAML,
|
|
|
5
5
|
|
|
6
6
|
# harness-debate-plan
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
**Practice map:** `.pi/harness/docs/practice-map.md` (Review Gate RACI).
|
|
9
|
+
|
|
10
|
+
Use when running **Phase 5** of `/harness-plan` — **Fagan-style structured inspection** per focus (`spec` | `wbs` | `schedule` | `quality`). Parent is **chair**; within-round dialogue (claims → rebuttals → clarifications → counters → integrate).
|
|
11
|
+
|
|
12
|
+
## Inspection roles
|
|
13
|
+
|
|
14
|
+
| Agent | Role |
|
|
15
|
+
|-------|------|
|
|
16
|
+
| `hypothesis-validator` | Blind verifier (R1 only) |
|
|
17
|
+
| `plan-evaluator` | Inspector (checklist) |
|
|
18
|
+
| `plan-adversary` | Red team |
|
|
19
|
+
| `sprint-contract-auditor` | DoD auditor (`quality` or round ≥4) |
|
|
20
|
+
| `review-integrator` | Recorder / integration PM |
|
|
21
|
+
|
|
22
|
+
Do **not** add agents for `fast` profile — reduce focuses/rounds only.
|
|
23
|
+
|
|
24
|
+
## Debate profiles (team size)
|
|
25
|
+
|
|
26
|
+
| Profile | Mode | Focuses | When |
|
|
27
|
+
|---------|------|---------|------|
|
|
28
|
+
| `full` | threaded | all four | High risk, fork, open questions |
|
|
29
|
+
| `standard` | threaded | all four | Default med risk |
|
|
30
|
+
| `light` | threaded | spec, quality | Low risk, high-confidence research |
|
|
31
|
+
| `fast` | **consolidated** | spec, quality (one round) | Clear stack, no open questions; escalate to threaded on blockers |
|
|
32
|
+
|
|
33
|
+
Eligibility: `harness_plan_debate_eligibility` then `harness_debate_open({ debate_profile, required_focuses })`.
|
|
9
34
|
|
|
10
35
|
## Open
|
|
11
36
|
|
|
@@ -16,30 +41,22 @@ harness_debate_open({})
|
|
|
16
41
|
- Debate id is always `plan-<run_id>` (tool normalizes wrong ids).
|
|
17
42
|
- Creates `.pi/harness/runs/<run_id>/debate-messenger/`.
|
|
18
43
|
|
|
19
|
-
Budget profile
|
|
44
|
+
Budget caps vary by profile (see `plan-debate-eligibility.ts`); standard plan profile uses `min_focus_rounds=4`, `debate_global_cap=80000`.
|
|
20
45
|
|
|
21
|
-
|
|
22
|
-
|-------|-------|
|
|
23
|
-
| min_focus_rounds | 4 |
|
|
24
|
-
| max_rounds | 12 |
|
|
25
|
-
| max_exchanges_per_round | 3 |
|
|
26
|
-
| round_token_cap | 8000 |
|
|
27
|
-
| debate_global_cap | 80000 |
|
|
46
|
+
## Focus coverage
|
|
28
47
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
Call `harness_debate_focus_coverage` until all of `spec | wbs | schedule | quality` appear in submitted `review-round-r*.yaml` and last `review_gate_ready: true`.
|
|
48
|
+
Call `harness_debate_focus_coverage` until all **required** focuses (from eligibility) appear in submitted review rounds and last `review_gate_ready: true`.
|
|
32
49
|
|
|
33
50
|
## Per-round spawn order (sequential only — no parallel debate subagents)
|
|
34
51
|
|
|
35
|
-
1. R1: `hypothesis-validator` (blind) before
|
|
36
|
-
2. `plan-evaluator` → lane + messenger `claim`.
|
|
37
|
-
3. `harness_messenger_read_round` → `plan-adversary` → `rebuttal`.
|
|
38
|
-
4. Ping-pong while `unresolved_claim_ids` and `exchange_count <
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
52
|
+
1. R1: `hypothesis-validator` (blind verifier) before inspector.
|
|
53
|
+
2. `plan-evaluator` (inspector) → lane + messenger `claim`.
|
|
54
|
+
3. `harness_messenger_read_round` → `plan-adversary` (red team) → `rebuttal`.
|
|
55
|
+
4. Ping-pong while `unresolved_claim_ids` and `exchange_count < max` for profile.
|
|
56
|
+
5. `sprint-contract-auditor` (DoD) when focus is `quality` or round ≥ 4.
|
|
57
|
+
6. `review-integrator` (recorder) → `harness_debate_submit_round`.
|
|
58
|
+
|
|
59
|
+
**One subagent per `subagent` call** — never batch debate lanes.
|
|
43
60
|
|
|
44
61
|
Lane YAML + messenger messages **auto-apply** on subagent complete (`harness-debate-next-step`). Fallback: `harness_debate_apply_lane`.
|
|
45
62
|
|
|
@@ -1,27 +1,12 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: harness-eval
|
|
3
|
-
description:
|
|
3
|
+
description: >-
|
|
4
|
+
Deprecated — use harness-review skill and /harness-review for the full post-run
|
|
5
|
+
gate. This file remains as a pointer for older prompts.
|
|
4
6
|
---
|
|
5
7
|
|
|
6
|
-
# harness-eval
|
|
8
|
+
# harness-eval (deprecated)
|
|
7
9
|
|
|
8
|
-
|
|
10
|
+
Use **`harness-review`** skill and **`/harness-review`** instead.
|
|
9
11
|
|
|
10
|
-
|
|
11
|
-
- Before merge / release readiness
|
|
12
|
-
|
|
13
|
-
## Workflow (orchestrator)
|
|
14
|
-
|
|
15
|
-
1. Parent may run deterministic scripts (`harness-verify`, project tests).
|
|
16
|
-
2. Spawn `harness/evaluator` with `mode: benchmark` and artifact paths in `HarnessSpawnContext`.
|
|
17
|
-
3. Parse JSON from `get_subagent_result`; parent writes run artifacts.
|
|
18
|
-
|
|
19
|
-
## Rules
|
|
20
|
-
|
|
21
|
-
- No new Pi session — subagent isolation via `Agent` spawn (ADR 0032).
|
|
22
|
-
- Do not edit `plan-packet.json` in eval phase.
|
|
23
|
-
- `/harness-review` uses same agent with `mode: verdict` for policy EvalVerdict.
|
|
24
|
-
|
|
25
|
-
## Verdict values
|
|
26
|
-
|
|
27
|
-
`pass`, `conditional_pass`, `fail`, `human_required` (parent handles `ask_user`).
|
|
12
|
+
The master command runs benchmark + policy verdict (+ adversary unless `--quick`) with `submit_eval_verdict` / `submit_adversary_report` and parent `harness_artifact_ready` gates (ADR 0037, ADR 0039).
|
|
@@ -15,8 +15,9 @@ description: Enforce harness governance phases, policy gates, budgets, and promo
|
|
|
15
15
|
|
|
16
16
|
1. Read current phase from `/harness-policy-status` or session `harness-policy-state`.
|
|
17
17
|
2. Check ADRs: constitution (0001), eval promotion (0003), Sentrux (0006), drift (0007), rules lifecycle (0009).
|
|
18
|
-
3. For promotion: require eval pass, no abort lock, debate consensus if escalated, Sentrux when `HARNESS_SENTRUX_REQUIRED=true
|
|
19
|
-
4.
|
|
18
|
+
3. For promotion: require eval pass, no abort lock, debate consensus if escalated, Sentrux when `HARNESS_SENTRUX_REQUIRED=true` (`artifacts/sentrux-signal.yaml` from `/harness-run`, not executor self-report).
|
|
19
|
+
4. **Intent vs observation:** Manifest/layer/boundary changes → `/harness-sentrux-steward` proposal + chair approval + ADR when material, then `sentrux-rules-sync --force`. `sentrux check`/`gate` degradation after execute → replan or fix code — do not tune manifest on a single noisy gate.
|
|
20
|
+
5. After approved manifest edits: `node "$UP_PKG/.pi/scripts/harness-sentrux-bootstrap.mjs" --force` or `/harness-sentrux-sync`; emit `harness-architecture-changed` for the extension.
|
|
20
21
|
5. Run `node "$UP_PKG/.pi/scripts/harness-verify.mjs"` before claiming release readiness.
|
|
21
22
|
|
|
22
23
|
## Spec Distiller integration
|
|
@@ -31,7 +32,7 @@ When refining plans from noisy requirements:
|
|
|
31
32
|
## Budgets (ADR 0038)
|
|
32
33
|
|
|
33
34
|
- Default: **`HARNESS_BUDGET_ENFORCE` off** — token/debate caps are telemetry-only (`harness-budget-telemetry`, `harness-budget-soft-limit`). They do **not** block phases or debate lanes.
|
|
34
|
-
- Do **not** skip
|
|
35
|
+
- Do **not** skip reconnaissance artifacts (`planning-context.yaml`), debate rounds, or `approve_plan` because of soft budget hints in the widget.
|
|
35
36
|
- Re-enable hard caps only with `HARNESS_BUDGET_ENFORCE=1` and `HARNESS_BUDGET_HARD_STOP` / `HARNESS_DEBATE_HARD_STOP`.
|
|
36
37
|
|
|
37
38
|
## Subagent artifacts (ADR 0037)
|
|
@@ -3,94 +3,82 @@ name: harness-orchestration
|
|
|
3
3
|
description: >-
|
|
4
4
|
Orchestrate ultimate-pi harness phases with the native `subagent` tool
|
|
5
5
|
(isolated `pi --mode json` subprocesses). Use for plan/execute/evaluate
|
|
6
|
-
pipelines, L4 verification,
|
|
6
|
+
pipelines, L4 verification, optional planning-context, and debate prep.
|
|
7
7
|
---
|
|
8
8
|
|
|
9
9
|
# Harness orchestration
|
|
10
10
|
|
|
11
|
+
**Practice map:** `.pi/harness/docs/practice-map.md` · **ADR 0040** · **ADR 0041**.
|
|
12
|
+
|
|
13
|
+
## Team management rules
|
|
14
|
+
|
|
15
|
+
1. **Parallelism law** — Parallel `tasks` only when outputs are independent inputs to a later merge (implementation ∥ stack). Never parallelize debate lanes or decompose ∥ hypothesis.
|
|
16
|
+
2. **Two-pizza cap per batch** — Max 2 research lanes, 1 optional `planning-context` subagent, 1 executor, 1 debate agent per `subagent` call.
|
|
17
|
+
3. **No redundant thinkers** — Downstream agents read artifacts; do not re-derive.
|
|
18
|
+
4. **Sequential dependency chain** — planning context → decompose → hypothesis → research → author → DAG → debate → approve → execute → **/harness-review** → optional **/harness-steer** loop (ADR 0044).
|
|
19
|
+
5. **Path-first parent tools** — `approve_plan`, `create_plan`, `submit_*` via `source_path`, `merge_harness_yaml`, `harness_synthesize_repair_brief`.
|
|
20
|
+
6. **Debate = meeting** — Parent is chair; parallel_probes allows evaluator ∥ adversary per batch.
|
|
21
|
+
7. **Tool intelligence** — Parent uses graphify, sg, ccc, and reads by task need; subprocesses optional.
|
|
22
|
+
|
|
11
23
|
## Slash commands = orchestrators
|
|
12
24
|
|
|
13
25
|
`/harness-*` prompts parse args, call `subagent`, run `ask_user`, write policy-gated artifacts. Phase logic lives in `.pi/agents/harness/*.md` and `.pi/agents/harness/planning/*.md`.
|
|
14
26
|
|
|
15
27
|
Every spawn includes **HarnessSpawnContext** JSON in the task text (subprocess agents do not get `[HarnessActivePlan]` injection). Use `agentScope: "both"` so package agents under `$UP_PKG/.pi/agents/**` resolve.
|
|
16
28
|
|
|
17
|
-
Harness subprocesses load **`harness-subagent-submit`** (`PI_HARNESS_SUBPROCESS=1`, `HARNESS_RUN_ID`, `HARNESS_RUN_DIR`). Agents must call their scoped **`submit_*`** tool before exit; parent gates use **`harness_artifact_ready
|
|
18
|
-
|
|
19
|
-
## Subprocess telemetry
|
|
20
|
-
|
|
21
|
-
Harness bridge emits `harness_subagent_spawned` / `harness_subagent_completed` (replaces in-process setup/blackboard events).
|
|
22
|
-
|
|
23
|
-
```sql
|
|
24
|
-
SELECT
|
|
25
|
-
properties.agent as agent,
|
|
26
|
-
count() as n,
|
|
27
|
-
round(avg(toFloat(properties.duration_ms)), 0) as avg_ms
|
|
28
|
-
FROM events
|
|
29
|
-
WHERE event = 'harness_subagent_completed'
|
|
30
|
-
AND timestamp >= now() - INTERVAL 7 DAY
|
|
31
|
-
GROUP BY agent
|
|
32
|
-
ORDER BY avg_ms DESC
|
|
33
|
-
LIMIT 30
|
|
34
|
-
```
|
|
29
|
+
Harness subprocesses load **`harness-subagent-submit`** (`PI_HARNESS_SUBPROCESS=1`, `HARNESS_RUN_ID`, `HARNESS_RUN_DIR`). Agents must call their scoped **`submit_*`** tool before exit; parent gates use **`harness_artifact_ready`**.
|
|
35
30
|
|
|
36
31
|
## Latency rules
|
|
37
32
|
|
|
38
|
-
1. **Parallel `tasks`** —
|
|
39
|
-
2. **
|
|
40
|
-
3. **Compact handoffs** — read
|
|
41
|
-
4. **No spawn cap** —
|
|
33
|
+
1. **Parallel `tasks`** — Phase 3.5 research only (when using subprocesses).
|
|
34
|
+
2. **Sequential** — decompose, hypothesis, debate lanes, review evaluator passes.
|
|
35
|
+
3. **Compact handoffs** — read artifact paths; never paste full subprocess logs into next spawn.
|
|
36
|
+
4. **No spawn cap** — do not pass `timeoutMs` unless the user requests a cap.
|
|
42
37
|
|
|
43
38
|
## Command → agent
|
|
44
39
|
|
|
45
40
|
| Command | `agent` |
|
|
46
41
|
|---------|---------|
|
|
47
|
-
| `/harness-plan` | Parent:
|
|
48
|
-
| `/harness-run` | `harness/executor` |
|
|
49
|
-
| `/harness-
|
|
50
|
-
| `/harness-
|
|
51
|
-
| `/harness-critic` |
|
|
52
|
-
| `/harness-
|
|
53
|
-
| `/harness-incident` | `harness/incident-recorder` |
|
|
54
|
-
| `/harness-router-tune` | `harness/meta-optimizer` (optional) |
|
|
55
|
-
| `/harness-auto` | plan per `/harness-plan`; `--quick` skips adversary + tie-breaker |
|
|
42
|
+
| `/harness-plan` | Parent: planning context (tools) → decompose → hypothesis → Phase 3.5 artifacts → PlanPacket → eligibility + Review Gate → `approve_plan` + `create_plan` |
|
|
43
|
+
| `/harness-run` | `harness/executor` (single worker) |
|
|
44
|
+
| `/harness-review` | Parent verify → `evaluator` benchmark → `evaluator` verdict → `adversary` → optional `tie-breaker` (ADR 0039) |
|
|
45
|
+
| `/harness-eval` | **Deprecated** → `/harness-review` |
|
|
46
|
+
| `/harness-critic` | **Deprecated** → `/harness-review` |
|
|
47
|
+
| `/harness-auto` | plan per `/harness-plan`; `--quick` skips adversary + tie-breaker in review |
|
|
56
48
|
|
|
57
49
|
## Review isolation
|
|
58
50
|
|
|
59
|
-
Spawn `harness/evaluator` / `harness/adversary` via `subagent` in the **same** parent session. `review-integrity` allows `subagent` when `agent` is in the review set
|
|
51
|
+
Spawn `harness/evaluator` / `harness/adversary` via `subagent` in the **same** parent session. `review-integrity` allows `subagent` when `agent` is in the review set.
|
|
60
52
|
|
|
61
53
|
## ask_user policy
|
|
62
54
|
|
|
63
55
|
| Role | `ask_user` |
|
|
64
56
|
|------|------------|
|
|
65
57
|
| Parent orchestrator | Yes (plan clarification, `approve_plan`, router tune) |
|
|
66
|
-
| `harness/planning/*` | No —
|
|
58
|
+
| `harness/planning/*` | No — `human_required` in output if stuck |
|
|
67
59
|
| `harness/evaluator`, `harness/adversary`, `harness/tie-breaker` | `human_required` in subprocess JSON |
|
|
68
60
|
| `harness/executor` | No — parent handles governance |
|
|
69
61
|
|
|
70
62
|
## Spawn pattern (`/harness-plan`)
|
|
71
63
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
{ "agent": "harness/planning/scout-structure", "task": "…" },
|
|
78
|
-
{ "agent": "harness/planning/scout-semantic", "task": "…" }
|
|
79
|
-
]
|
|
80
|
-
}
|
|
81
|
-
```
|
|
64
|
+
**Phase 1 — planning context (parent default):**
|
|
65
|
+
|
|
66
|
+
- Use `graphify query`, `sg -p`, `ccc search`, and reads as needed.
|
|
67
|
+
- Write `artifacts/planning-context.yaml` via `write_harness_yaml`.
|
|
68
|
+
- Optional: single `planning-context` subagent when isolation helps.
|
|
82
69
|
|
|
83
|
-
|
|
70
|
+
**Phase 2 — sequential:**
|
|
84
71
|
|
|
85
|
-
|
|
72
|
+
```
|
|
73
|
+
subagent decompose → gate decomposition.yaml
|
|
74
|
+
subagent hypothesis → gate hypothesis.yaml
|
|
75
|
+
```
|
|
86
76
|
|
|
87
|
-
|
|
77
|
+
**Phase 3.5 — research artifacts required:** parent inline and/or parallel `implementation-researcher` + `stack-researcher` (≤2).
|
|
88
78
|
|
|
89
|
-
-
|
|
90
|
-
- `approve_plan`, `create_plan` — parent orchestrator only
|
|
91
|
-
- Subprocess agents cannot nest `subagent` (`subagent` stripped from child `--tools`)
|
|
79
|
+
Then execution-plan-author, DAG gate, debate eligibility, sequential debate rounds, `approve_plan` + `create_plan`.
|
|
92
80
|
|
|
93
81
|
## References
|
|
94
82
|
|
|
95
|
-
- ADR 0032, ADR 0033, `.pi/harness/specs/harness-spawn-context.schema.json`
|
|
83
|
+
- ADR 0032, ADR 0033, ADR 0040, ADR 0041, `.pi/harness/specs/harness-spawn-context.schema.json`
|
|
96
84
|
- `node "$UP_PKG/.pi/scripts/harness-agents-manifest.mjs" --check`
|
|
@@ -1,33 +1,44 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: harness-plan
|
|
3
|
-
description:
|
|
3
|
+
description: Agent-native harness plans — lakes/context bundles, planning context, parallel_probes debate profile, plan-synthesizer on low/med risk, path-first approve_plan/create_plan, then DAG + debate.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-plan
|
|
7
7
|
|
|
8
|
+
**Practice map:** `.pi/harness/docs/practice-map.md` · **ADR 0040** · **ADR 0042** · **ADR 0043**.
|
|
9
|
+
|
|
8
10
|
## When to use
|
|
9
11
|
|
|
10
12
|
- `/harness-plan`, harness-auto plan phase, drift replan, policy-gate without approved plan
|
|
11
13
|
|
|
14
|
+
## Team topology (spawn laws)
|
|
15
|
+
|
|
16
|
+
1. **Parallelism law** — Parallel `tasks` only for independent lanes (implementation ∥ stack ≤2). Never parallelize debate or decompose ∥ hypothesis.
|
|
17
|
+
2. **Two-pizza cap** — Max 1 debate agent, 1 optional planning-context subagent, per `subagent` call.
|
|
18
|
+
3. **No redundant thinkers** — Read upstream YAML; do not re-run graphify in decompose when `planning-context` architecture coverage is ok.
|
|
19
|
+
4. **Sequential chain** — planning context → decompose → hypothesis → research → author → DAG → debate → approve.
|
|
20
|
+
5. **Tool intelligence** — Parent picks graphify, sg, ccc by task; no mandatory tool-tied scout subprocesses.
|
|
21
|
+
|
|
12
22
|
## Workflow (parent orchestrator)
|
|
13
23
|
|
|
14
|
-
1.
|
|
15
|
-
2.
|
|
16
|
-
3. **
|
|
17
|
-
4.
|
|
18
|
-
5. `
|
|
19
|
-
6.
|
|
20
|
-
7. **`
|
|
21
|
-
8. **`
|
|
24
|
+
1. **Phase 1:** Compile `artifacts/planning-context.yaml` with tools (default) or optional `planning-context` subagent.
|
|
25
|
+
2. **Sequential** decompose → gate `artifacts/decomposition.yaml`.
|
|
26
|
+
3. **Sequential** hypothesis (requires decomposition).
|
|
27
|
+
4. **Phase 3.5:** `implementation-research.yaml` + `stack.yaml` (parent inline and/or parallel researchers).
|
|
28
|
+
5. Draft `PlanPacket` shell; `ask_user` on material fork **after** Phase 3.5.
|
|
29
|
+
6. `execution-plan-author` → merge `execution_plan`.
|
|
30
|
+
7. **`validate-plan-dag.mjs`** (must pass).
|
|
31
|
+
8. **`harness_plan_debate_eligibility`** — `parallel_probes` spawns plan-evaluator ∥ plan-adversary, then integrator round.
|
|
32
|
+
9. **`approve_plan({ human_summary? })`** / **`create_plan()`** — packet from `plan_packet_path` on disk (path-first).
|
|
22
33
|
|
|
23
|
-
`--quick` skips semantic
|
|
34
|
+
`--quick` skips semantic coverage in planning context and post-run adversary only — **not** adequate reconnaissance, implementation/stack artifacts (med/high risk), or plan debate.
|
|
24
35
|
|
|
25
36
|
## Rules
|
|
26
37
|
|
|
27
|
-
- On-disk plan artifacts are **YAML** (`plan-packet.yaml`, `research-brief.yaml`).
|
|
38
|
+
- On-disk plan artifacts are **YAML** (`plan-packet.yaml`, `research-brief.yaml`, `planning-context.yaml`).
|
|
28
39
|
- Subagents read-only; parent writes run artifacts and calls `approve_plan` / `create_plan`.
|
|
29
40
|
- context-mode only on harness paths.
|
|
30
|
-
- Phase 3.5 required
|
|
41
|
+
- Phase 3.5 artifacts required for med/high risk unless documented waiver.
|
|
31
42
|
|
|
32
43
|
## Output
|
|
33
44
|
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: harness-review
|
|
3
|
+
description: >-
|
|
4
|
+
Post-run verification gate (/harness-review): harness-verify, Sentrux fitness
|
|
5
|
+
functions, benchmark + verdict evaluator, adversary, optional tie-breaker.
|
|
6
|
+
Subagents use submit_*; parent uses harness_artifact_ready. Use after
|
|
7
|
+
/harness-run; claim cross-session runs with /harness-use-run --claim.
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# harness-review
|
|
11
|
+
|
|
12
|
+
**Practice map:** `.pi/harness/docs/practice-map.md` (Monitoring and Controlling: measure → judge → red team).
|
|
13
|
+
|
|
14
|
+
## When to use
|
|
15
|
+
|
|
16
|
+
- After `/harness-run` completes (same session preferred)
|
|
17
|
+
- Resuming with `/harness-use-run <run-id> --claim` then `/harness-review`
|
|
18
|
+
- Instead of separate `/harness-eval`, `/harness-critic` (aliases forward here)
|
|
19
|
+
|
|
20
|
+
## Orchestration summary
|
|
21
|
+
|
|
22
|
+
| Phase | Practice | Actor | Artifact |
|
|
23
|
+
|-------|----------|-------|----------|
|
|
24
|
+
| 1 | Automated QC + Sentrux fitness functions | Parent | `harness-verify.mjs`, `sentrux gate .`, `benchmark-log.yaml`, `sentrux-signal.yaml` |
|
|
25
|
+
| 2 | Measure actuals (EVM) | `harness/evaluator` benchmark | `eval-verdict.yaml` |
|
|
26
|
+
| 2b | Controlling | Parent | Write `review-outcome.yaml`; route via `remediation_class` (not fail-fast abort) |
|
|
27
|
+
| 6 | Outcome | Parent | `review-outcome.yaml` → `/harness-steer` or replan |
|
|
28
|
+
| 3 | Policy audit | `harness/evaluator` verdict | same YAML |
|
|
29
|
+
| 4 | Red team | `harness/adversary` | `adversary-report.yaml` |
|
|
30
|
+
| 5 | Arbitration | `harness/tie-breaker` | only if block + conditional_pass |
|
|
31
|
+
|
|
32
|
+
## Phase 1 — Sentrux (structural actuals)
|
|
33
|
+
|
|
34
|
+
When `HARNESS_SENTRUX_REQUIRED=true` (default in `.env.example`):
|
|
35
|
+
|
|
36
|
+
1. `node "$UP_PKG/.pi/scripts/harness-verify.mjs"` — rules drift + `sentrux check` when CLI installed.
|
|
37
|
+
2. `sentrux gate .` — compare to baseline saved during `/harness-run`.
|
|
38
|
+
3. Write `artifacts/sentrux-signal.yaml` and append session entry `harness-sentrux-signal` (observation bus / PostHog).
|
|
39
|
+
4. Optional `artifacts/benchmark-log.yaml` fields: `sentrux_check`, `sentrux_gate`, `harness_verify`.
|
|
40
|
+
|
|
41
|
+
Pass `sentrux-signal.yaml` path to evaluator `mode: benchmark` spawn context. Evaluator treats metrics as measured facts, not goals for the executor.
|
|
42
|
+
|
|
43
|
+
## Rules
|
|
44
|
+
|
|
45
|
+
- Parent never writes eval/adversary YAML — subprocess `submit_*` only (ADR 0037).
|
|
46
|
+
- Auto-claim run ownership unless `--readonly`.
|
|
47
|
+
- Disk verdict drives `next_recommended_command` (`resolveCompletionStatuses`).
|
|
48
|
+
|
|
49
|
+
## Aliases
|
|
50
|
+
|
|
51
|
+
- `/harness-eval` → use `/harness-review`
|
|
52
|
+
- `/harness-critic` → use `/harness-review` (or `--quick` to skip adversary)
|
|
@@ -11,6 +11,17 @@ description: Bootstrap Sentrux architectural rules for harness projects — seed
|
|
|
11
11
|
- Target repo has no `.sentrux/rules.toml` or `harness-verify` reports rules out of date
|
|
12
12
|
- User edited `.pi/harness/sentrux/architecture.manifest.json` (layers, boundaries, constraints)
|
|
13
13
|
|
|
14
|
+
## Roles (do not conflate)
|
|
15
|
+
|
|
16
|
+
| Role | Agent / command | Layer |
|
|
17
|
+
|------|-----------------|-------|
|
|
18
|
+
| **Bootstrap** | `harness/sentrux-bootstrap`, `harness-sentrux-bootstrap.mjs` | Greenfield seed + first sync |
|
|
19
|
+
| **Steward** | `harness/sentrux-steward`, `/harness-sentrux-steward` | Proposes manifest changes (`artifacts/sentrux-manifest-proposal.yaml`); chair applies |
|
|
20
|
+
| **Sync** | `sentrux-rules-sync.mjs`, `/harness-sentrux-sync` | Regenerates `rules.toml` from manifest after intent change |
|
|
21
|
+
| **Observation** | `/harness-run`, `/harness-review` | `sentrux gate --save` / `check` / `gate` → `artifacts/sentrux-signal.yaml` |
|
|
22
|
+
|
|
23
|
+
Never auto-sync manifest from directory trees. Material manifest edits need steward evidence + chair approval (ADR 0009).
|
|
24
|
+
|
|
14
25
|
## Canonical layout
|
|
15
26
|
|
|
16
27
|
| Path | Role |
|
|
@@ -53,4 +64,5 @@ Do **not** copy ultimate-pi's layer paths blindly into unrelated layouts — edi
|
|
|
53
64
|
|
|
54
65
|
- ADR 0009 — `.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md`
|
|
55
66
|
- Scripts — `.pi/scripts/sentrux-rules-sync.mjs`, `harness-sentrux-bootstrap.mjs`
|
|
56
|
-
-
|
|
67
|
+
- Agents — `harness/sentrux-bootstrap` (setup), `harness/sentrux-steward` (intent proposals)
|
|
68
|
+
- Specs — `sentrux-manifest-proposal.schema.json`, `sentrux-signal.schema.json`
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: harness-steer
|
|
3
|
+
description: Post-review repair loop via harness-steer and executor repair mode (ADR 0044).
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# harness-steer
|
|
7
|
+
|
|
8
|
+
Use after `/harness-review` when `artifacts/review-outcome.yaml` has `remediation_class: implementation_gap`.
|
|
9
|
+
|
|
10
|
+
1. Read `repair-brief.yaml` and `plan_packet_path` (paths only).
|
|
11
|
+
2. Set policy phase `execute`; spawn `harness/executor` with `mode: repair`.
|
|
12
|
+
3. Always follow with `/harness-review`.
|
|
13
|
+
|
|
14
|
+
See `.pi/prompts/harness-steer.md` and `.pi/harness/docs/adrs/0044-harness-steer-loop.md`.
|
|
@@ -30,13 +30,6 @@ Pressure-test the candidate with adversarial reasoning and reproducible attacks.
|
|
|
30
30
|
|
|
31
31
|
## Output
|
|
32
32
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
"adversary_report": { },
|
|
37
|
-
"human_summary": "…",
|
|
38
|
-
"recommendation": "proceed"
|
|
39
|
-
}
|
|
40
|
-
```
|
|
41
|
-
|
|
42
|
-
Use `recommendation`: `proceed`, `conditional_pass`, or `block`.
|
|
33
|
+
Call **`submit_adversary_report`** before exit (writes `artifacts/adversary-report.yaml`). Do not emit prose-only JSON for the parent to copy onto disk.
|
|
34
|
+
|
|
35
|
+
Use `recommendation`: `proceed`, `conditional_pass`, or `block`. Set `block_merge: true` when merge must halt.
|
|
@@ -17,7 +17,7 @@ Independently validate execution outcomes and emit structured verdicts. Spawn co
|
|
|
17
17
|
|
|
18
18
|
1. Read `HarnessSpawnContext` and artifact paths (`plan_packet_path`, `run_dir`, trace refs).
|
|
19
19
|
2. Reconstruct validation scope from the plan and on-disk run artifacts.
|
|
20
|
-
3. For `benchmark` mode: run or summarize deterministic checks (project tests, harness-verify if instructed in spawn prompt);
|
|
20
|
+
3. For `benchmark` mode: run or summarize deterministic checks (project tests, harness-verify if instructed in spawn prompt); read `artifacts/sentrux-signal.yaml` and `artifacts/benchmark-log.yaml` when present — cite `check_pass`, `gate_status`, and `quality_signal_summary` as measured structural actuals (do not treat as optimization targets for the executor).
|
|
21
21
|
4. For `verdict` mode: emit `EvalVerdict` matching `.pi/harness/specs/eval-verdict.schema.json`.
|
|
22
22
|
5. Recommend only: `proceed_to_adversary`, `replan`, or `rollback`.
|
|
23
23
|
6. Set `human_required` in structured output when blocked; never call `ask_user`.
|
|
@@ -31,15 +31,6 @@ Independently validate execution outcomes and emit structured verdicts. Spawn co
|
|
|
31
31
|
|
|
32
32
|
## Output
|
|
33
33
|
|
|
34
|
-
|
|
34
|
+
Call **`submit_eval_verdict`** before exit with a document matching `eval-verdict.schema.json` (writes `artifacts/eval-verdict.yaml` under the run dir). Do not ask the parent to parse JSON or write verdict files.
|
|
35
35
|
|
|
36
|
-
|
|
37
|
-
{
|
|
38
|
-
"eval_status": "pass",
|
|
39
|
-
"eval_verdict": { },
|
|
40
|
-
"human_summary": "…",
|
|
41
|
-
"recommended_action": "proceed_to_adversary"
|
|
42
|
-
}
|
|
43
|
-
```
|
|
44
|
-
|
|
45
|
-
Use `eval_status`: `pass`, `conditional_pass`, or `fail`.
|
|
36
|
+
Use `status`: `pass`, `conditional_pass`, or `fail`. `recommended_action`: `proceed_to_adversary`, `replan`, or `rollback`.
|
|
@@ -13,12 +13,17 @@ You are the Harness Executor.
|
|
|
13
13
|
|
|
14
14
|
Implement the approved plan with surgical diffs and strict scope control. The parent orchestrator spawned you with a `HarnessSpawnContext` appendix — use `plan_packet_path`, `run_dir`, and acceptance checks from that JSON.
|
|
15
15
|
|
|
16
|
+
## Repair mode (`mode: repair`)
|
|
17
|
+
|
|
18
|
+
When spawn context sets `mode: repair`, read `repair_brief_path` (typically `artifacts/repair-brief.yaml`). Fix only what the brief lists — failed acceptance checks, `fix_directives`, and `priority_lake_ids`. Do **not** widen scope beyond `plan_packet_path`. Set `repair_attempt` in handoff metadata when the schema allows.
|
|
19
|
+
|
|
16
20
|
## Process
|
|
17
21
|
|
|
18
|
-
1. Read the approved `PlanPacket` at `plan_packet_path` from spawn context; extract allowed scope before any mutation.
|
|
19
|
-
2.
|
|
22
|
+
1. Read the approved `PlanPacket` at `plan_packet_path` from spawn context; extract allowed scope before any mutation. Approval is recorded in `run-context.yaml` (`plan_ready: true`) and subprocess policy bootstrap — not as a field inside `plan-packet.yaml`.
|
|
23
|
+
2. When spawn context lists `critical_path_work_item_ids` (from `schedule_metadata`), implement those work items before non-critical items when practical (limiting-step / Grove).
|
|
24
|
+
3. Implement only approved scope with minimal, reversible diffs.
|
|
20
25
|
3. Run focused validations mapped to `acceptance_checks`.
|
|
21
|
-
4. Prepare rollback
|
|
26
|
+
4. Prepare rollback metadata in `rollback_refs` (revert command, revert branch, patch bundle path under the run directory). **`submit_executor_handoff`** writes `handoff/executor-summary.yaml` and mirrors `rollback_refs` to `artifacts/executor-rollback.yaml` (YAML only — no `artifacts/*.json`).
|
|
22
27
|
5. For plan-level ambiguity (wrong scope, missing acceptance), stop and return structured `scope_drift` — do not widen scope.
|
|
23
28
|
6. Do not self-certify final quality; hand off evidence paths for evaluator/adversary.
|
|
24
29
|
|
|
@@ -32,16 +37,9 @@ Implement the approved plan with surgical diffs and strict scope control. The pa
|
|
|
32
37
|
|
|
33
38
|
## Output
|
|
34
39
|
|
|
35
|
-
|
|
40
|
+
Call **`submit_executor_handoff`** with a document matching `harness-executor-handoff.schema.json` before exit:
|
|
36
41
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
"execution_status": "completed",
|
|
40
|
-
"files_changed": [],
|
|
41
|
-
"validation_summary": "…",
|
|
42
|
-
"rollback_refs": {},
|
|
43
|
-
"handoff_ready": { "evaluator": true, "adversary": true }
|
|
44
|
-
}
|
|
45
|
-
```
|
|
42
|
+
- `execution_status`: `completed`, `blocked`, or `scope_drift`
|
|
43
|
+
- `files_changed`, `validation_summary`, `rollback_refs`, `handoff_ready`
|
|
46
44
|
|
|
47
|
-
|
|
45
|
+
Do not write `artifacts/executor-rollback.json` — rollback is emitted as YAML by the submit pipeline.
|
|
@@ -7,7 +7,9 @@ thinking: medium
|
|
|
7
7
|
max_turns: 12
|
|
8
8
|
---
|
|
9
9
|
|
|
10
|
-
You are the **Harness
|
|
10
|
+
You are the **Harness problem-framing agent (Phase 2a — lakes / scope)**.
|
|
11
|
+
|
|
12
|
+
**Inspection role:** Outcome author (lake-sized units, not ticket WBS). See `.pi/harness/docs/practice-map.md` and ADR 0042.
|
|
11
13
|
|
|
12
14
|
## Mission
|
|
13
15
|
|
|
@@ -19,9 +21,10 @@ Read `HarnessSpawnContext` and the merged **scout lane JSON** in the spawn promp
|
|
|
19
21
|
|
|
20
22
|
## Process
|
|
21
23
|
|
|
22
|
-
1.
|
|
23
|
-
2.
|
|
24
|
-
3.
|
|
24
|
+
1. Read Phase 1 reconnaissance from spawn context paths — prefer `artifacts/planning-context.yaml`; legacy `artifacts/scout-*.yaml` lanes are accepted when present.
|
|
25
|
+
2. Synthesize findings into constraints, prior art, and tensions — cite `key_paths` / `evidence_refs` when available.
|
|
26
|
+
3. **Graphify dedup:** If `planning-context.yaml` has `coverage.architecture.status` of `ok`, or legacy `scout-graphify.yaml` has `status: ok`, do **not** run `graphify query` / `graphify explain` / `graphify path`. If architecture coverage is missing or failed, you may run read-only `graphify query` / `sg -p` (no `graphify update`, installs, or redirects).
|
|
27
|
+
4. Do not read `.pi/harness/specs/*.schema.json` from disk.
|
|
25
28
|
|
|
26
29
|
## Phase 1 — DeepMind-style decomposition
|
|
27
30
|
|
|
@@ -7,6 +7,8 @@ thinking: medium
|
|
|
7
7
|
max_turns: 10
|
|
8
8
|
---
|
|
9
9
|
|
|
10
|
+
**Inspection role:** Blind verifier (independent verification; debate R1 only). See `.pi/harness/docs/practice-map.md`.
|
|
11
|
+
|
|
10
12
|
## Your task
|
|
11
13
|
|
|
12
14
|
Blindly evaluate whether `PlanHypothesisBrief` is falsifiable, relevant to the task, and worth building — without seeing decomposition, scouts, or PlanPacket.
|
|
@@ -7,7 +7,9 @@ thinking: medium
|
|
|
7
7
|
max_turns: 14
|
|
8
8
|
---
|
|
9
9
|
|
|
10
|
-
You are the **Harness planning hypothesis generator (Phase
|
|
10
|
+
You are the **Harness planning hypothesis generator (Phase 2b — DARWIN)**.
|
|
11
|
+
|
|
12
|
+
**Role:** Approach author after WBS (Lean hypothesis-driven planning). Requires `artifacts/decomposition.yaml`. See `.pi/harness/docs/practice-map.md`.
|
|
11
13
|
|
|
12
14
|
## Mission
|
|
13
15
|
|
|
@@ -63,4 +65,4 @@ Do **not** include self-evaluation scores — a separate agent handles that.
|
|
|
63
65
|
|
|
64
66
|
## Output
|
|
65
67
|
|
|
66
|
-
Before ending, call `submit_hypothesis_brief` exactly once with the full `PlanHypothesisBrief` document. Do not paste the artifact as prose or a fenced JSON block — the tool
|
|
68
|
+
Before ending, call `submit_hypothesis_brief` exactly once with the full `PlanHypothesisBrief` document. The harness writes **`artifacts/hypothesis.yaml`** (YAML on disk). Do not use bash or any `*.json` path under `artifacts/`; do not paste the artifact as prose or a fenced JSON block — the submit tool is the deliverable.
|
|
@@ -31,7 +31,7 @@ Read `HarnessSpawnContext` plus paths to `artifacts/decomposition.yaml`, `artifa
|
|
|
31
31
|
|
|
32
32
|
## Output
|
|
33
33
|
|
|
34
|
-
Before ending, call `submit_implementation_research` exactly once with the full document.
|
|
34
|
+
Before ending, call `submit_implementation_research` exactly once with the full document. The harness writes **`artifacts/implementation-research.yaml`** (YAML on disk). Do not use bash or `implementation-research.json`; prose summary is optional — the submit tool is the deliverable.
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
## Guardrails
|
|
@@ -7,6 +7,8 @@ thinking: medium
|
|
|
7
7
|
max_turns: 14
|
|
8
8
|
---
|
|
9
9
|
|
|
10
|
+
**Inspection role:** Red team (adversarial review). See `.pi/harness/docs/practice-map.md`.
|
|
11
|
+
|
|
10
12
|
## Your task
|
|
11
13
|
|
|
12
14
|
Stress-test the ExecutionPlan with reproducible counterexamples. Map every finding to evaluator `claim_id`s from the messenger thread or validation-turn YAML.
|
|
@@ -7,6 +7,8 @@ thinking: medium
|
|
|
7
7
|
max_turns: 14
|
|
8
8
|
---
|
|
9
9
|
|
|
10
|
+
**Inspection role:** Inspector (neutral Fagan-style checklist). See `.pi/harness/docs/practice-map.md`.
|
|
11
|
+
|
|
10
12
|
## Your task
|
|
11
13
|
|
|
12
14
|
Score the ExecutionPlan against Validation Checks for one Review Gate round. Emit stable `checks[]` with ids and messenger-ready `claim_ids`. You are not an advocate for the plan.
|