ultimate-pi 0.13.1 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/.agents/skills/harness-debate-plan/SKILL.md +42 -22
  2. package/.agents/skills/harness-orchestration/SKILL.md +3 -3
  3. package/.agents/skills/harness-plan/SKILL.md +10 -8
  4. package/.pi/agents/harness/planning/decompose.md +4 -2
  5. package/.pi/agents/harness/planning/execution-plan-author.md +25 -14
  6. package/.pi/agents/harness/planning/hypothesis-validator.md +21 -5
  7. package/.pi/agents/harness/planning/implementation-researcher.md +42 -0
  8. package/.pi/agents/harness/planning/plan-adversary.md +20 -4
  9. package/.pi/agents/harness/planning/plan-evaluator.md +28 -5
  10. package/.pi/agents/harness/planning/review-integrator.md +25 -9
  11. package/.pi/agents/harness/planning/scout-graphify.md +1 -1
  12. package/.pi/agents/harness/planning/sprint-contract-auditor.md +19 -4
  13. package/.pi/agents/harness/planning/stack-researcher.md +19 -10
  14. package/.pi/extensions/debate-orchestrator.ts +39 -435
  15. package/.pi/extensions/harness-debate-tools.ts +741 -0
  16. package/.pi/extensions/harness-live-widget.ts +39 -159
  17. package/.pi/extensions/harness-plan-approval.ts +88 -22
  18. package/.pi/extensions/harness-run-context.ts +18 -0
  19. package/.pi/extensions/lib/debate-bus-core.ts +488 -0
  20. package/.pi/extensions/lib/debate-bus-state.ts +64 -0
  21. package/.pi/extensions/lib/harness-spawn-budget.ts +5 -25
  22. package/.pi/extensions/lib/plan-approval/dialog.ts +33 -272
  23. package/.pi/extensions/lib/plan-approval/format-plan.ts +12 -85
  24. package/.pi/extensions/lib/plan-approval/plan-review.ts +62 -6
  25. package/.pi/extensions/lib/plan-approval/render.ts +6 -0
  26. package/.pi/extensions/lib/plan-approval/types.ts +1 -0
  27. package/.pi/extensions/lib/plan-approval/validate.ts +1 -1
  28. package/.pi/extensions/lib/plan-debate-eligibility.ts +214 -0
  29. package/.pi/extensions/lib/plan-debate-envelope.ts +2 -0
  30. package/.pi/extensions/lib/plan-debate-focus.ts +151 -0
  31. package/.pi/extensions/lib/plan-debate-gate.ts +198 -0
  32. package/.pi/extensions/lib/plan-debate-id.ts +39 -0
  33. package/.pi/extensions/lib/plan-debate-lane.ts +220 -0
  34. package/.pi/extensions/lib/plan-debate-lanes.ts +44 -0
  35. package/.pi/extensions/lib/plan-debate-round-status.ts +137 -0
  36. package/.pi/extensions/lib/plan-debate-write-guard.ts +20 -0
  37. package/.pi/extensions/lib/plan-messenger.ts +352 -0
  38. package/.pi/extensions/lib/plan-review-integrator-rules.ts +119 -0
  39. package/.pi/extensions/lib/plan-scope-guard.ts +89 -0
  40. package/.pi/extensions/policy-gate.ts +1 -1
  41. package/.pi/harness/README.md +1 -1
  42. package/.pi/harness/agents.manifest.json +16 -12
  43. package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +1 -3
  44. package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +13 -5
  45. package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +51 -0
  46. package/.pi/harness/docs/adrs/README.md +2 -0
  47. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/implementation-research.yaml +28 -0
  48. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r1.yaml +24 -0
  49. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r2.yaml +25 -0
  50. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-packet.yaml +196 -0
  51. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-review.md +14 -0
  52. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/research-brief.yaml +62 -0
  53. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/implementation-research.yaml +28 -0
  54. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r2.yaml +24 -0
  55. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r3.yaml +24 -0
  56. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +29 -0
  57. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +97 -16
  58. package/.pi/harness/specs/plan-implementation-research-brief.schema.json +128 -0
  59. package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
  60. package/.pi/harness/specs/round-result.schema.json +15 -2
  61. package/.pi/lib/harness-ui-state.ts +92 -0
  62. package/.pi/prompts/harness-plan.md +90 -30
  63. package/.pi/prompts/planning-rubrics.md +31 -0
  64. package/CHANGELOG.md +23 -0
  65. package/package.json +3 -3
  66. package/.pi/extensions/lib/plan-approval/fallback.ts +0 -50
@@ -2,26 +2,34 @@
2
2
 
3
3
  ## Status
4
4
 
5
- Accepted (2026-05-18)
5
+ Accepted (2026-05-18); amended 2026-05-19 (outcome-based debate + ping-pong dialogue)
6
6
 
7
7
  ## Context
8
8
 
9
9
  `/harness-plan` produced thin PlanPackets (scope + bullets). Post-execute adversarial review (`/harness-critic`) ran too late. Graphify corpus (Structured Planning, ADR-020, Generator–Evaluator) defines WBS, validation, and review gate before baseline.
10
10
 
11
+ Early implementation treated debate as a fixed four-round checklist with single evaluator→adversary exchange per round, which ended debate on round count rather than focus coverage and quality.
12
+
11
13
  ## Decision
12
14
 
13
15
  1. **PlanPacket 1.1.0** — required `execution_plan` (phases, work_items, sprint_contract, dag_validation).
14
16
  2. **YAML on disk** — `plan-packet.yaml`, `research-brief.yaml`, `run-context.yaml`, `artifacts/*.yaml`. JSON Schema unchanged; instances validated after YAML parse.
15
17
  3. **Review Gate agents** — `stack-researcher`, `execution-plan-author`, debate: `hypothesis-validator`, `plan-evaluator`, `plan-adversary`, `sprint-contract-auditor`, `review-integrator`.
16
- 4. **Debate bus** — `debate_id=plan-<run_id>`, plan budget profile (4 rounds, 12k cap), plan-phase consensus prerequisites.
17
- 5. **No legacy JSON** plan paths; no pre-debate standalone `hypothesis-eval`.
18
+ 4. **Debate bus** — `debate_id=plan-<run_id>`, plan budget profile:
19
+ - `min_focus_rounds=4`, `max_rounds=12`, `max_exchanges_per_round=3`
20
+ - `round_token_cap=8000`, `debate_global_cap=80000`
21
+ 5. **Outcome-based completion** — consensus `adversarial_debate_completed` when all focuses `spec|wbs|schedule|quality` are covered in submitted review rounds, last `review_gate_ready: true`, and parent DAG validation passes (not `round_count >= 4` alone).
22
+ 6. **Within-round dialogue** — pi-messenger kinds: `claim`, `rebuttal`, `clarification`, `counter`; parent orchestrates ping-pong via `harness_debate_round_status` / `harness_debate_advance_thread` before integrator.
23
+ 7. **Sequential debate spawns** — parent must not parallelize debate lane subagents in one batch.
24
+ 8. **No legacy JSON** plan paths; no pre-debate standalone `hypothesis-eval`.
18
25
 
19
26
  ## Consequences
20
27
 
21
- - Positive: PM-grade plans, deterministic DAG gate, blind hypothesis eval in debate R1.
22
- - Negative: Higher spawn/token cost; `harness-verify` and smoke fixtures must use `.yaml`.
28
+ - Positive: PM-grade plans, deterministic DAG gate, blind hypothesis eval in debate R1, richer evaluator↔adversary threads, extendable round index for partial re-debate.
29
+ - Negative: Higher token cost (80k debate cap vs 12k); parent orchestration more stateful; smoke fixtures must include four `debate_round_focus` values.
23
30
 
24
31
  ## References
25
32
 
26
33
  - [ADR-0033](0033-parent-orchestrated-planning.md), [ADR-0034](0034-darwin-plan-research-pipeline.md)
27
34
  - `raw/decisions/adr-020.md`, `raw/modules/structured-planning.md`
35
+ - `.pi/prompts/planning-rubrics.md`, `.pi/prompts/harness-plan.md` Phase 5
@@ -0,0 +1,51 @@
1
+ # ADR 0036: Implementation research and selective debate
2
+
3
+ - **Status:** Accepted
4
+ - **Date:** 2026-05-19
5
+
6
+ ## Context
7
+
8
+ ADR 0034–0035 established Darwin research and outcome-based Review Gate debate. Gaps remained:
9
+
10
+ - No dedicated pass for external solution patterns vs in-repo stack selection.
11
+ - Debate always required all four focuses with `min_focus_rounds=4`, even for low-risk tasks.
12
+ - Sprint-contract-auditor spawn in code did not match prompt (quality focus).
13
+
14
+ ## Decision
15
+
16
+ 1. **Phase 3.5** — After decompose/hypothesis, parent spawns in parallel:
17
+ - `harness/planning/implementation-researcher` → `PlanImplementationResearchBrief` → `artifacts/implementation-research.yaml`
18
+ - `harness/planning/stack-researcher` → `PlanStackBrief` → `artifacts/stack.yaml`
19
+ 2. Research stays **outside** debate; debate agents cite artifacts, no web tools.
20
+ 3. **Phase 4d** — `harness_plan_debate_eligibility` (pre-debate only) selects `full | standard | light` and `required_focuses`; persisted on messenger + bus at `harness_debate_open`.
21
+ 4. **Light profile** — `spec` + `quality` only, `min_focus_rounds=2`, reduced global cap; gate uses stored `required_focuses` (not hardcoded four).
22
+ 5. **Sprint auditor** — shared `lanesForRound(roundIndex, focus)` spawns sprint lane when `focus === quality` OR `roundIndex >= 4`.
23
+ 6. **`--quick`** still skips semantic scout only; never skips Phase 3.5 or debate.
24
+
25
+ ## Profiles
26
+
27
+ | Profile | When | Focuses | min_focus_rounds |
28
+ |---------|------|---------|-------------------|
29
+ | full | high risk, material fork, open implementation questions, DAG manual patch, many tensions | all four | 4 |
30
+ | standard | default (ambiguous → standard) | all four | 4 |
31
+ | light | low risk, no fork, high-confidence implementation + clear stack primary | spec, quality | 2 |
32
+
33
+ ## Consequences
34
+
35
+ ### Positive
36
+
37
+ - Better plans on hard tasks (external patterns before WBS).
38
+ - Cheaper low-risk plans (light debate).
39
+ - Deterministic eligibility and gate alignment.
40
+
41
+ ### Negative
42
+
43
+ - Extra subagent per plan (implementation-researcher).
44
+ - Parents must run eligibility before `harness_debate_open`.
45
+
46
+ ## References
47
+
48
+ - `.pi/prompts/harness-plan.md`
49
+ - `.pi/harness/specs/plan-implementation-research-brief.schema.json`
50
+ - `.pi/extensions/lib/plan-debate-eligibility.ts`
51
+ - ADR 0034, ADR 0035
@@ -20,6 +20,8 @@ Team-shared ADRs for the ultimate-pi harness live under `.pi/harness/docs/adrs/`
20
20
  | [0032](0032-harness-command-orchestration.md) | Harness commands as agent orchestrators | Accepted |
21
21
  | [0033](0033-parent-orchestrated-planning.md) | Parent-orchestrated harness planning | Accepted |
22
22
  | [0034](0034-darwin-plan-research-pipeline.md) | Darwin plan research pipeline | Accepted |
23
+ | [0035](0035-plan-phase-review-gate.md) | Plan-phase Review Gate | Accepted |
24
+ | [0036](0036-implementation-research-and-selective-debate.md) | Implementation research and selective debate | Accepted |
23
25
 
24
26
  ## Template
25
27
 
@@ -0,0 +1,28 @@
1
+ schema_version: "1.0.0"
2
+ problem_framing: Validate harness plan-phase with fixture-driven smoke
3
+ sub_problems:
4
+ - DAG validation
5
+ - Debate gate coverage
6
+ internal_references:
7
+ - path: .pi/harness/evals/smoke/smoke-harness-plan.mjs
8
+ relevance: Existing smoke pattern
9
+ reuse_signal: high
10
+ external_references: []
11
+ solution_patterns:
12
+ - name: fixture-driven gate
13
+ provenance: in-repo smoke
14
+ fit: Validates plan pipeline without live agents
15
+ tradeoffs:
16
+ pros: [Deterministic CI]
17
+ cons: []
18
+ risks: []
19
+ similar_implementations: []
20
+ recommended_approach:
21
+ summary: Extend minimal-med fixture with implementation artifact
22
+ recommended_approach_confidence: high
23
+ confidence_rationale: Reuses established smoke-harness-plan pattern
24
+ evidence_refs:
25
+ - .pi/harness/evals/smoke/smoke-harness-plan.mjs
26
+ - .pi/scripts/validate-plan-dag.mjs
27
+ anti_patterns: []
28
+ open_questions: []
@@ -0,0 +1,24 @@
1
+ schema_version: "1.0.0"
2
+ round_index: 1
3
+ debate_round_focus: spec
4
+ round_summary: Spec round for light profile fixture
5
+ validation_summary: Spec checks pass
6
+ adversary_summary: No blocking findings
7
+ disputes: []
8
+ recommended_packet_patches: []
9
+ review_gate_ready: false
10
+ participants:
11
+ - PlanEvaluatorAgent
12
+ - PlanAdversaryAgent
13
+ - HypothesisValidatorAgent
14
+ - ReviewIntegratorAgent
15
+ claims:
16
+ - spec validation complete
17
+ rebuttals: []
18
+ evidence_refs: []
19
+ token_usage:
20
+ per_agent:
21
+ PlanEvaluatorAgent: 80
22
+ PlanAdversaryAgent: 80
23
+ round_total: 160
24
+ consensus_delta: 0.1
@@ -0,0 +1,25 @@
1
+ schema_version: "1.0.0"
2
+ round_index: 2
3
+ debate_round_focus: quality
4
+ round_summary: Quality round for light profile fixture
5
+ validation_summary: Quality and sprint contract pass
6
+ adversary_summary: No gaps
7
+ disputes: []
8
+ recommended_packet_patches: []
9
+ review_gate_ready: true
10
+ participants:
11
+ - PlanEvaluatorAgent
12
+ - PlanAdversaryAgent
13
+ - SprintContractAuditorAgent
14
+ - ReviewIntegratorAgent
15
+ claims:
16
+ - review gate ready
17
+ rebuttals: []
18
+ evidence_refs: []
19
+ token_usage:
20
+ per_agent:
21
+ PlanEvaluatorAgent: 100
22
+ PlanAdversaryAgent: 90
23
+ SprintContractAuditorAgent: 70
24
+ round_total: 260
25
+ consensus_delta: 0.12
@@ -0,0 +1,196 @@
1
+ schema_version: "1.0.0"
2
+ contract_version: "1.1.0"
3
+ plan_id: plan-smoke-fixture-001
4
+ task_id: task-smoke-001
5
+ scope: Smoke fixture for plan-phase harness validation with execution_plan and debate artifacts.
6
+ assumptions:
7
+ - Fixture only; no live agent run
8
+ risk_level: low
9
+ acceptance_checks:
10
+ - id: AC-1
11
+ description: DAG validation passes
12
+ - id: AC-2
13
+ description: Two debate rounds recorded (light profile)
14
+ - id: AC-3
15
+ description: Stack brief present in research-brief
16
+ - id: AC-4
17
+ description: Sprint contract complete
18
+ - id: AC-5
19
+ description: plan-review.md renders
20
+ rollback_plan:
21
+ revert_commit_ready: true
22
+ rollback_artifacts:
23
+ revert_command: git revert HEAD
24
+ revert_branch: main
25
+ patch_bundle: .pi/harness/runs/smoke-fixture/patch.bundle
26
+ execution_plan:
27
+ schema_version: "1.0.0"
28
+ phases:
29
+ - phase_id: P1
30
+ name: Foundation
31
+ objective: Establish baseline and verify harness wiring
32
+ entry_criteria:
33
+ - Fixture loaded
34
+ exit_criteria:
35
+ - AC-1 satisfied
36
+ milestone: M1-baseline
37
+ work_item_ids: [WI-1, WI-2, WI-3]
38
+ - phase_id: P2
39
+ name: Build
40
+ objective: Implement core changes
41
+ entry_criteria:
42
+ - M1-baseline complete
43
+ exit_criteria:
44
+ - AC-2 satisfied
45
+ milestone: M2-build
46
+ work_item_ids: [WI-4, WI-5, WI-6]
47
+ - phase_id: P3
48
+ name: Verify
49
+ objective: Quality gate and documentation
50
+ entry_criteria:
51
+ - M2-build complete
52
+ exit_criteria:
53
+ - AC-5 satisfied
54
+ milestone: M3-ship
55
+ work_item_ids: [WI-7, WI-8]
56
+ work_items:
57
+ - work_item_id: WI-1
58
+ phase_id: P1
59
+ title: Load fixture packet
60
+ description: Read plan-packet.yaml from fixture directory
61
+ depends_on: []
62
+ files:
63
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml
64
+ parallel_safe: true
65
+ done_criteria:
66
+ type: manual
67
+ spec: Fixture packet readable
68
+ acceptance_check_ids: [AC-1]
69
+ - work_item_id: WI-2
70
+ phase_id: P1
71
+ title: Run DAG validator
72
+ description: Execute validate-plan-dag.mjs
73
+ depends_on: [WI-1]
74
+ files:
75
+ - .pi/scripts/validate-plan-dag.mjs
76
+ parallel_safe: false
77
+ done_criteria:
78
+ type: command
79
+ spec: node .pi/scripts/validate-plan-dag.mjs --packet plan-packet.yaml
80
+ acceptance_check_ids: [AC-1]
81
+ - work_item_id: WI-3
82
+ phase_id: P1
83
+ title: Lint harness-yaml
84
+ description: Ensure YAML helpers parse fixture
85
+ depends_on: [WI-1]
86
+ files:
87
+ - .pi/lib/harness-yaml.ts
88
+ parallel_safe: true
89
+ done_criteria:
90
+ type: lint
91
+ spec: npm test
92
+ acceptance_check_ids: [AC-1]
93
+ - work_item_id: WI-4
94
+ phase_id: P2
95
+ title: Debate round 1-2 artifacts
96
+ description: Validate review-round YAML
97
+ depends_on: [WI-2]
98
+ files:
99
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml
100
+ parallel_safe: false
101
+ done_criteria:
102
+ type: artifact
103
+ spec: artifacts/review-round-r1.yaml exists
104
+ acceptance_check_ids: [AC-2]
105
+ - work_item_id: WI-5
106
+ phase_id: P2
107
+ title: Debate round 3-4 artifacts
108
+ description: Validate final review round
109
+ depends_on: [WI-4]
110
+ files:
111
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml
112
+ parallel_safe: false
113
+ done_criteria:
114
+ type: artifact
115
+ spec: artifacts/review-round-r4.yaml exists
116
+ acceptance_check_ids: [AC-2]
117
+ - work_item_id: WI-6
118
+ phase_id: P2
119
+ title: Stack research merge
120
+ description: research-brief includes stack section
121
+ depends_on: [WI-2]
122
+ files: []
123
+ non_code: true
124
+ parallel_safe: true
125
+ done_criteria:
126
+ type: manual
127
+ spec: research-brief.yaml contains stack key
128
+ acceptance_check_ids: [AC-3]
129
+ - work_item_id: WI-7
130
+ phase_id: P3
131
+ title: Sprint contract audit
132
+ description: R4 sprint audit artifact
133
+ depends_on: [WI-5]
134
+ files:
135
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml
136
+ parallel_safe: false
137
+ done_criteria:
138
+ type: artifact
139
+ spec: sprint-audit-r4.yaml present
140
+ acceptance_check_ids: [AC-4]
141
+ - work_item_id: WI-8
142
+ phase_id: P3
143
+ title: Render plan-review
144
+ description: Human-readable plan review markdown
145
+ depends_on: [WI-7]
146
+ files:
147
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md
148
+ parallel_safe: false
149
+ done_criteria:
150
+ type: manual
151
+ spec: plan-review.md non-empty
152
+ acceptance_check_ids: [AC-5]
153
+ sprint_contract:
154
+ in_scope:
155
+ - Fixture validation only
156
+ out_of_scope:
157
+ - Production deploy
158
+ definition_of_done: All smoke checks green
159
+ assumptions:
160
+ - CI environment has node
161
+ external_dependencies: []
162
+ wbs_dictionary:
163
+ - work_item_id: WI-1
164
+ deliverable: Fixture packet loaded
165
+ owner_role: executor
166
+ inputs: []
167
+ outputs: [parsed packet]
168
+ risk_register:
169
+ - risk_id: R1
170
+ description: DAG validator false negative
171
+ likelihood: low
172
+ impact: high
173
+ mitigation: Unit tests on validate-plan-dag.mjs
174
+ linked_work_item_ids: [WI-2]
175
+ - risk_id: R2
176
+ description: Debate cap misconfiguration
177
+ likelihood: med
178
+ impact: med
179
+ mitigation: debate-orchestrator plan profile tests
180
+ linked_work_item_ids: [WI-4]
181
+ - risk_id: R3
182
+ description: YAML parse drift
183
+ likelihood: low
184
+ impact: med
185
+ mitigation: harness-yaml strict parse
186
+ linked_work_item_ids: [WI-3]
187
+ schedule_metadata:
188
+ critical_path_work_item_ids: [WI-1, WI-2, WI-4, WI-5, WI-7, WI-8]
189
+ parallel_groups:
190
+ - [WI-1, WI-3]
191
+ schedule_baseline_note: Fixture topological order; no calendar dates
192
+ dag_validation:
193
+ status: pass
194
+ topological_order: [WI-1, WI-2, WI-3, WI-4, WI-5, WI-6, WI-7, WI-8]
195
+ cycles: []
196
+ conflicts: []
@@ -0,0 +1,14 @@
1
+ # Plan review (fixture)
2
+
3
+ plan_id: plan-smoke-fixture-001
4
+
5
+ ## Execution plan
6
+
7
+ Phases: P1 Foundation → P2 Build → P3 Verify
8
+
9
+ Critical path: WI-1 → WI-2 → WI-4 → WI-5 → WI-7 → WI-8
10
+
11
+ ## Debate
12
+
13
+ - Round 1 (spec): review_gate_ready
14
+ - Round 4 (quality): review_gate_ready
@@ -0,0 +1,62 @@
1
+ decomposition:
2
+ schema_version: "1.0.0"
3
+ problem_restatement: Light-profile smoke for two-focus debate
4
+ hypothesis:
5
+ schema_version: "1.0.0"
6
+ primary:
7
+ claim: Light debate covers spec and quality only
8
+ mechanism: Eligibility profile light with min_focus_rounds 2
9
+ prediction: planDebateOutcomeComplete passes with two rounds
10
+ experiment: Run smoke-harness-plan.mjs --fixture minimal-low-light
11
+ implementation:
12
+ schema_version: "1.0.0"
13
+ problem_framing: Low-risk fixture for selective debate
14
+ sub_problems: [spec coverage, quality coverage]
15
+ internal_references:
16
+ - path: test/plan-debate-eligibility.test.mjs
17
+ relevance: Eligibility unit tests
18
+ reuse_signal: high
19
+ external_references: []
20
+ solution_patterns:
21
+ - name: light profile gate
22
+ provenance: ADR-0036
23
+ fit: Reduces debate cost on trivial tasks
24
+ tradeoffs:
25
+ pros: [Fewer rounds]
26
+ cons: []
27
+ risks: []
28
+ similar_implementations:
29
+ - name: minimal-med four-focus fixture
30
+ what_it_solves: Full debate coverage
31
+ gap_vs_us: Light uses two focuses only
32
+ recommended_approach:
33
+ summary: Two review rounds with spec then quality
34
+ recommended_approach_confidence: high
35
+ confidence_rationale: Deterministic fixture aligned with eligibility rules
36
+ evidence_refs:
37
+ - .pi/extensions/lib/plan-debate-eligibility.ts
38
+ - test/plan-debate-eligibility.test.mjs
39
+ anti_patterns: []
40
+ open_questions: []
41
+ stack:
42
+ schema_version: "1.0.0"
43
+ problem_framing: Node harness tooling
44
+ constraints: []
45
+ options:
46
+ - name: extend current stack
47
+ category: brownfield
48
+ fit_summary: Use existing ultimate-pi harness
49
+ tradeoffs:
50
+ pros: [No new deps]
51
+ cons: []
52
+ risks: []
53
+ evidence_refs: []
54
+ recommendation_rank: 1
55
+ recommended_primary: extend current stack
56
+ rationale: Fixture validates in-repo harness
57
+ eval:
58
+ schema_version: "1.0.0"
59
+ revision_recommended: false
60
+ relevance:
61
+ passes: true
62
+ rationale: Hypothesis matches light smoke task
@@ -0,0 +1,28 @@
1
+ schema_version: "1.0.0"
2
+ problem_framing: Validate harness plan-phase with fixture-driven smoke
3
+ sub_problems:
4
+ - DAG validation
5
+ - Debate gate coverage
6
+ internal_references:
7
+ - path: .pi/harness/evals/smoke/smoke-harness-plan.mjs
8
+ relevance: Existing smoke pattern
9
+ reuse_signal: high
10
+ external_references: []
11
+ solution_patterns:
12
+ - name: fixture-driven gate
13
+ provenance: in-repo smoke
14
+ fit: Validates plan pipeline without live agents
15
+ tradeoffs:
16
+ pros: [Deterministic CI]
17
+ cons: []
18
+ risks: []
19
+ similar_implementations: []
20
+ recommended_approach:
21
+ summary: Extend minimal-med fixture with implementation artifact
22
+ recommended_approach_confidence: high
23
+ confidence_rationale: Reuses established smoke-harness-plan pattern
24
+ evidence_refs:
25
+ - .pi/harness/evals/smoke/smoke-harness-plan.mjs
26
+ - .pi/scripts/validate-plan-dag.mjs
27
+ anti_patterns: []
28
+ open_questions: []
@@ -0,0 +1,24 @@
1
+ schema_version: "1.0.0"
2
+ round_index: 2
3
+ debate_round_focus: wbs
4
+ round_summary: WBS round passed for fixture
5
+ validation_summary: Work breakdown structure validated
6
+ adversary_summary: No blocking findings
7
+ disputes: []
8
+ recommended_packet_patches: []
9
+ review_gate_ready: true
10
+ participants:
11
+ - PlanEvaluatorAgent
12
+ - PlanAdversaryAgent
13
+ - ReviewIntegratorAgent
14
+ claims:
15
+ - wbs validation complete
16
+ rebuttals: []
17
+ evidence_refs: []
18
+ token_usage:
19
+ per_agent:
20
+ PlanEvaluatorAgent: 100
21
+ PlanAdversaryAgent: 100
22
+ ReviewIntegratorAgent: 50
23
+ round_total: 250
24
+ consensus_delta: 0.1
@@ -0,0 +1,24 @@
1
+ schema_version: "1.0.0"
2
+ round_index: 3
3
+ debate_round_focus: schedule
4
+ round_summary: Schedule round passed for fixture
5
+ validation_summary: Critical path and dependencies validated
6
+ adversary_summary: No schedule risks unmitigated
7
+ disputes: []
8
+ recommended_packet_patches: []
9
+ review_gate_ready: true
10
+ participants:
11
+ - PlanEvaluatorAgent
12
+ - PlanAdversaryAgent
13
+ - ReviewIntegratorAgent
14
+ claims:
15
+ - schedule validation complete
16
+ rebuttals: []
17
+ evidence_refs: []
18
+ token_usage:
19
+ per_agent:
20
+ PlanEvaluatorAgent: 100
21
+ PlanAdversaryAgent: 100
22
+ ReviewIntegratorAgent: 50
23
+ round_total: 250
24
+ consensus_delta: 0.1
@@ -8,6 +8,35 @@ hypothesis:
8
8
  mechanism: Static artifacts plus validate-plan-dag.mjs
9
9
  prediction: CI passes without live agents
10
10
  experiment: Run smoke-harness-plan.mjs --fixture
11
+ implementation:
12
+ schema_version: "1.0.0"
13
+ problem_framing: Validate harness plan-phase with fixture-driven smoke
14
+ sub_problems:
15
+ - DAG validation
16
+ - Debate gate coverage
17
+ internal_references:
18
+ - path: .pi/harness/evals/smoke/smoke-harness-plan.mjs
19
+ relevance: Existing smoke pattern
20
+ reuse_signal: high
21
+ external_references: []
22
+ solution_patterns:
23
+ - name: fixture-driven gate
24
+ provenance: in-repo smoke
25
+ fit: Validates plan pipeline without live agents
26
+ tradeoffs:
27
+ pros: [Deterministic CI]
28
+ cons: []
29
+ risks: []
30
+ similar_implementations: []
31
+ recommended_approach:
32
+ summary: Extend minimal-med fixture with implementation artifact
33
+ recommended_approach_confidence: high
34
+ confidence_rationale: Reuses established smoke-harness-plan pattern
35
+ evidence_refs:
36
+ - .pi/harness/evals/smoke/smoke-harness-plan.mjs
37
+ - .pi/scripts/validate-plan-dag.mjs
38
+ anti_patterns: []
39
+ open_questions: []
11
40
  stack:
12
41
  schema_version: "1.0.0"
13
42
  problem_framing: Node harness tooling