ultimate-pi 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-debate-plan/SKILL.md +41 -61
- package/.agents/skills/harness-governor/SKILL.md +11 -0
- package/.agents/skills/harness-orchestration/SKILL.md +5 -3
- package/.agents/skills/harness-plan/SKILL.md +11 -9
- package/.pi/agents/harness/adversary.md +1 -1
- package/.pi/agents/harness/evaluator.md +1 -1
- package/.pi/agents/harness/executor.md +1 -1
- package/.pi/agents/harness/incident-recorder.md +1 -1
- package/.pi/agents/harness/meta-optimizer.md +1 -1
- package/.pi/agents/harness/planning/decompose.md +8 -35
- package/.pi/agents/harness/planning/execution-plan-author.md +27 -15
- package/.pi/agents/harness/planning/hypothesis-validator.md +23 -6
- package/.pi/agents/harness/planning/hypothesis.md +4 -27
- package/.pi/agents/harness/planning/implementation-researcher.md +43 -0
- package/.pi/agents/harness/planning/plan-adversary.md +20 -5
- package/.pi/agents/harness/planning/plan-evaluator.md +28 -6
- package/.pi/agents/harness/planning/review-integrator.md +23 -10
- package/.pi/agents/harness/planning/scout-graphify.md +4 -23
- package/.pi/agents/harness/planning/scout-semantic.md +3 -18
- package/.pi/agents/harness/planning/scout-structure.md +3 -18
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +22 -6
- package/.pi/agents/harness/planning/stack-researcher.md +21 -11
- package/.pi/agents/harness/tie-breaker.md +1 -1
- package/.pi/agents/harness/trace-librarian.md +1 -1
- package/.pi/extensions/budget-guard.ts +33 -19
- package/.pi/extensions/harness-debate-tools.ts +280 -19
- package/.pi/extensions/harness-live-widget.ts +39 -159
- package/.pi/extensions/harness-plan-approval.ts +47 -5
- package/.pi/extensions/harness-run-context.ts +96 -2
- package/.pi/extensions/harness-subagent-submit.ts +195 -0
- package/.pi/extensions/lib/debate-bus-core.ts +108 -17
- package/.pi/extensions/lib/debate-bus-state.ts +6 -0
- package/.pi/extensions/lib/harness-subagent-policy.ts +45 -0
- package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +82 -0
- package/.pi/extensions/lib/harness-subagent-submit-registry.ts +172 -0
- package/.pi/extensions/lib/harness-subagents-bridge.ts +42 -0
- package/.pi/extensions/lib/plan-approval/plan-review.ts +56 -0
- package/.pi/extensions/lib/plan-approval/types.ts +1 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +214 -0
- package/.pi/extensions/lib/plan-debate-focus.ts +151 -0
- package/.pi/extensions/lib/plan-debate-gate.ts +88 -34
- package/.pi/extensions/lib/plan-debate-lane.ts +15 -0
- package/.pi/extensions/lib/plan-debate-lanes.ts +44 -0
- package/.pi/extensions/lib/plan-debate-round-status.ts +63 -20
- package/.pi/extensions/lib/plan-messenger.ts +93 -17
- package/.pi/extensions/policy-gate.ts +1 -1
- package/.pi/harness/README.md +1 -1
- package/.pi/harness/agents.manifest.json +25 -21
- package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +1 -3
- package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +13 -5
- package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +51 -0
- package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md +31 -0
- package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md +23 -0
- package/.pi/harness/docs/adrs/README.md +4 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r1.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r2.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/research-brief.yaml +62 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r2.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r3.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +29 -0
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +97 -16
- package/.pi/harness/specs/harness-executor-handoff.schema.json +19 -0
- package/.pi/harness/specs/harness-human-required.schema.json +16 -0
- package/.pi/harness/specs/plan-implementation-research-brief.schema.json +128 -0
- package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
- package/.pi/harness/specs/plan-scout-findings.schema.json +19 -0
- package/.pi/harness/specs/round-result.schema.json +15 -2
- package/.pi/lib/harness-agent-output.ts +45 -0
- package/.pi/lib/harness-budget-enforce.ts +18 -0
- package/.pi/lib/harness-schema-validate.ts +89 -0
- package/.pi/lib/harness-spawn-parse.ts +86 -0
- package/.pi/lib/harness-subagent-submit-path.ts +41 -0
- package/.pi/lib/harness-ui-state.ts +107 -2
- package/.pi/prompts/harness-auto.md +2 -2
- package/.pi/prompts/harness-plan.md +94 -42
- package/.pi/prompts/harness-run.md +2 -2
- package/.pi/prompts/planning-rubrics.md +31 -0
- package/.pi/scripts/harness-verify.mjs +2 -0
- package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
- package/CHANGELOG.md +21 -0
- package/package.json +4 -2
- package/vendor/pi-subagents/src/subagents.ts +29 -3
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
problem_framing: Validate harness plan-phase with fixture-driven smoke
|
|
3
|
+
sub_problems:
|
|
4
|
+
- DAG validation
|
|
5
|
+
- Debate gate coverage
|
|
6
|
+
internal_references:
|
|
7
|
+
- path: .pi/harness/evals/smoke/smoke-harness-plan.mjs
|
|
8
|
+
relevance: Existing smoke pattern
|
|
9
|
+
reuse_signal: high
|
|
10
|
+
external_references: []
|
|
11
|
+
solution_patterns:
|
|
12
|
+
- name: fixture-driven gate
|
|
13
|
+
provenance: in-repo smoke
|
|
14
|
+
fit: Validates plan pipeline without live agents
|
|
15
|
+
tradeoffs:
|
|
16
|
+
pros: [Deterministic CI]
|
|
17
|
+
cons: []
|
|
18
|
+
risks: []
|
|
19
|
+
similar_implementations: []
|
|
20
|
+
recommended_approach:
|
|
21
|
+
summary: Extend minimal-med fixture with implementation artifact
|
|
22
|
+
recommended_approach_confidence: high
|
|
23
|
+
confidence_rationale: Reuses established smoke-harness-plan pattern
|
|
24
|
+
evidence_refs:
|
|
25
|
+
- .pi/harness/evals/smoke/smoke-harness-plan.mjs
|
|
26
|
+
- .pi/scripts/validate-plan-dag.mjs
|
|
27
|
+
anti_patterns: []
|
|
28
|
+
open_questions: []
|
package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r1.yaml
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
round_index: 1
|
|
3
|
+
debate_round_focus: spec
|
|
4
|
+
round_summary: Spec round for light profile fixture
|
|
5
|
+
validation_summary: Spec checks pass
|
|
6
|
+
adversary_summary: No blocking findings
|
|
7
|
+
disputes: []
|
|
8
|
+
recommended_packet_patches: []
|
|
9
|
+
review_gate_ready: false
|
|
10
|
+
participants:
|
|
11
|
+
- PlanEvaluatorAgent
|
|
12
|
+
- PlanAdversaryAgent
|
|
13
|
+
- HypothesisValidatorAgent
|
|
14
|
+
- ReviewIntegratorAgent
|
|
15
|
+
claims:
|
|
16
|
+
- spec validation complete
|
|
17
|
+
rebuttals: []
|
|
18
|
+
evidence_refs: []
|
|
19
|
+
token_usage:
|
|
20
|
+
per_agent:
|
|
21
|
+
PlanEvaluatorAgent: 80
|
|
22
|
+
PlanAdversaryAgent: 80
|
|
23
|
+
round_total: 160
|
|
24
|
+
consensus_delta: 0.1
|
package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r2.yaml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
round_index: 2
|
|
3
|
+
debate_round_focus: quality
|
|
4
|
+
round_summary: Quality round for light profile fixture
|
|
5
|
+
validation_summary: Quality and sprint contract pass
|
|
6
|
+
adversary_summary: No gaps
|
|
7
|
+
disputes: []
|
|
8
|
+
recommended_packet_patches: []
|
|
9
|
+
review_gate_ready: true
|
|
10
|
+
participants:
|
|
11
|
+
- PlanEvaluatorAgent
|
|
12
|
+
- PlanAdversaryAgent
|
|
13
|
+
- SprintContractAuditorAgent
|
|
14
|
+
- ReviewIntegratorAgent
|
|
15
|
+
claims:
|
|
16
|
+
- review gate ready
|
|
17
|
+
rebuttals: []
|
|
18
|
+
evidence_refs: []
|
|
19
|
+
token_usage:
|
|
20
|
+
per_agent:
|
|
21
|
+
PlanEvaluatorAgent: 100
|
|
22
|
+
PlanAdversaryAgent: 90
|
|
23
|
+
SprintContractAuditorAgent: 70
|
|
24
|
+
round_total: 260
|
|
25
|
+
consensus_delta: 0.12
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
contract_version: "1.1.0"
|
|
3
|
+
plan_id: plan-smoke-fixture-001
|
|
4
|
+
task_id: task-smoke-001
|
|
5
|
+
scope: Smoke fixture for plan-phase harness validation with execution_plan and debate artifacts.
|
|
6
|
+
assumptions:
|
|
7
|
+
- Fixture only; no live agent run
|
|
8
|
+
risk_level: low
|
|
9
|
+
acceptance_checks:
|
|
10
|
+
- id: AC-1
|
|
11
|
+
description: DAG validation passes
|
|
12
|
+
- id: AC-2
|
|
13
|
+
description: Two debate rounds recorded (light profile)
|
|
14
|
+
- id: AC-3
|
|
15
|
+
description: Stack brief present in research-brief
|
|
16
|
+
- id: AC-4
|
|
17
|
+
description: Sprint contract complete
|
|
18
|
+
- id: AC-5
|
|
19
|
+
description: plan-review.md renders
|
|
20
|
+
rollback_plan:
|
|
21
|
+
revert_commit_ready: true
|
|
22
|
+
rollback_artifacts:
|
|
23
|
+
revert_command: git revert HEAD
|
|
24
|
+
revert_branch: main
|
|
25
|
+
patch_bundle: .pi/harness/runs/smoke-fixture/patch.bundle
|
|
26
|
+
execution_plan:
|
|
27
|
+
schema_version: "1.0.0"
|
|
28
|
+
phases:
|
|
29
|
+
- phase_id: P1
|
|
30
|
+
name: Foundation
|
|
31
|
+
objective: Establish baseline and verify harness wiring
|
|
32
|
+
entry_criteria:
|
|
33
|
+
- Fixture loaded
|
|
34
|
+
exit_criteria:
|
|
35
|
+
- AC-1 satisfied
|
|
36
|
+
milestone: M1-baseline
|
|
37
|
+
work_item_ids: [WI-1, WI-2, WI-3]
|
|
38
|
+
- phase_id: P2
|
|
39
|
+
name: Build
|
|
40
|
+
objective: Implement core changes
|
|
41
|
+
entry_criteria:
|
|
42
|
+
- M1-baseline complete
|
|
43
|
+
exit_criteria:
|
|
44
|
+
- AC-2 satisfied
|
|
45
|
+
milestone: M2-build
|
|
46
|
+
work_item_ids: [WI-4, WI-5, WI-6]
|
|
47
|
+
- phase_id: P3
|
|
48
|
+
name: Verify
|
|
49
|
+
objective: Quality gate and documentation
|
|
50
|
+
entry_criteria:
|
|
51
|
+
- M2-build complete
|
|
52
|
+
exit_criteria:
|
|
53
|
+
- AC-5 satisfied
|
|
54
|
+
milestone: M3-ship
|
|
55
|
+
work_item_ids: [WI-7, WI-8]
|
|
56
|
+
work_items:
|
|
57
|
+
- work_item_id: WI-1
|
|
58
|
+
phase_id: P1
|
|
59
|
+
title: Load fixture packet
|
|
60
|
+
description: Read plan-packet.yaml from fixture directory
|
|
61
|
+
depends_on: []
|
|
62
|
+
files:
|
|
63
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml
|
|
64
|
+
parallel_safe: true
|
|
65
|
+
done_criteria:
|
|
66
|
+
type: manual
|
|
67
|
+
spec: Fixture packet readable
|
|
68
|
+
acceptance_check_ids: [AC-1]
|
|
69
|
+
- work_item_id: WI-2
|
|
70
|
+
phase_id: P1
|
|
71
|
+
title: Run DAG validator
|
|
72
|
+
description: Execute validate-plan-dag.mjs
|
|
73
|
+
depends_on: [WI-1]
|
|
74
|
+
files:
|
|
75
|
+
- .pi/scripts/validate-plan-dag.mjs
|
|
76
|
+
parallel_safe: false
|
|
77
|
+
done_criteria:
|
|
78
|
+
type: command
|
|
79
|
+
spec: node .pi/scripts/validate-plan-dag.mjs --packet plan-packet.yaml
|
|
80
|
+
acceptance_check_ids: [AC-1]
|
|
81
|
+
- work_item_id: WI-3
|
|
82
|
+
phase_id: P1
|
|
83
|
+
title: Lint harness-yaml
|
|
84
|
+
description: Ensure YAML helpers parse fixture
|
|
85
|
+
depends_on: [WI-1]
|
|
86
|
+
files:
|
|
87
|
+
- .pi/lib/harness-yaml.ts
|
|
88
|
+
parallel_safe: true
|
|
89
|
+
done_criteria:
|
|
90
|
+
type: lint
|
|
91
|
+
spec: npm test
|
|
92
|
+
acceptance_check_ids: [AC-1]
|
|
93
|
+
- work_item_id: WI-4
|
|
94
|
+
phase_id: P2
|
|
95
|
+
title: Debate round 1-2 artifacts
|
|
96
|
+
description: Validate review-round YAML
|
|
97
|
+
depends_on: [WI-2]
|
|
98
|
+
files:
|
|
99
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml
|
|
100
|
+
parallel_safe: false
|
|
101
|
+
done_criteria:
|
|
102
|
+
type: artifact
|
|
103
|
+
spec: artifacts/review-round-r1.yaml exists
|
|
104
|
+
acceptance_check_ids: [AC-2]
|
|
105
|
+
- work_item_id: WI-5
|
|
106
|
+
phase_id: P2
|
|
107
|
+
title: Debate round 3-4 artifacts
|
|
108
|
+
description: Validate final review round
|
|
109
|
+
depends_on: [WI-4]
|
|
110
|
+
files:
|
|
111
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml
|
|
112
|
+
parallel_safe: false
|
|
113
|
+
done_criteria:
|
|
114
|
+
type: artifact
|
|
115
|
+
spec: artifacts/review-round-r4.yaml exists
|
|
116
|
+
acceptance_check_ids: [AC-2]
|
|
117
|
+
- work_item_id: WI-6
|
|
118
|
+
phase_id: P2
|
|
119
|
+
title: Stack research merge
|
|
120
|
+
description: research-brief includes stack section
|
|
121
|
+
depends_on: [WI-2]
|
|
122
|
+
files: []
|
|
123
|
+
non_code: true
|
|
124
|
+
parallel_safe: true
|
|
125
|
+
done_criteria:
|
|
126
|
+
type: manual
|
|
127
|
+
spec: research-brief.yaml contains stack key
|
|
128
|
+
acceptance_check_ids: [AC-3]
|
|
129
|
+
- work_item_id: WI-7
|
|
130
|
+
phase_id: P3
|
|
131
|
+
title: Sprint contract audit
|
|
132
|
+
description: R4 sprint audit artifact
|
|
133
|
+
depends_on: [WI-5]
|
|
134
|
+
files:
|
|
135
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml
|
|
136
|
+
parallel_safe: false
|
|
137
|
+
done_criteria:
|
|
138
|
+
type: artifact
|
|
139
|
+
spec: sprint-audit-r4.yaml present
|
|
140
|
+
acceptance_check_ids: [AC-4]
|
|
141
|
+
- work_item_id: WI-8
|
|
142
|
+
phase_id: P3
|
|
143
|
+
title: Render plan-review
|
|
144
|
+
description: Human-readable plan review markdown
|
|
145
|
+
depends_on: [WI-7]
|
|
146
|
+
files:
|
|
147
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md
|
|
148
|
+
parallel_safe: false
|
|
149
|
+
done_criteria:
|
|
150
|
+
type: manual
|
|
151
|
+
spec: plan-review.md non-empty
|
|
152
|
+
acceptance_check_ids: [AC-5]
|
|
153
|
+
sprint_contract:
|
|
154
|
+
in_scope:
|
|
155
|
+
- Fixture validation only
|
|
156
|
+
out_of_scope:
|
|
157
|
+
- Production deploy
|
|
158
|
+
definition_of_done: All smoke checks green
|
|
159
|
+
assumptions:
|
|
160
|
+
- CI environment has node
|
|
161
|
+
external_dependencies: []
|
|
162
|
+
wbs_dictionary:
|
|
163
|
+
- work_item_id: WI-1
|
|
164
|
+
deliverable: Fixture packet loaded
|
|
165
|
+
owner_role: executor
|
|
166
|
+
inputs: []
|
|
167
|
+
outputs: [parsed packet]
|
|
168
|
+
risk_register:
|
|
169
|
+
- risk_id: R1
|
|
170
|
+
description: DAG validator false negative
|
|
171
|
+
likelihood: low
|
|
172
|
+
impact: high
|
|
173
|
+
mitigation: Unit tests on validate-plan-dag.mjs
|
|
174
|
+
linked_work_item_ids: [WI-2]
|
|
175
|
+
- risk_id: R2
|
|
176
|
+
description: Debate cap misconfiguration
|
|
177
|
+
likelihood: med
|
|
178
|
+
impact: med
|
|
179
|
+
mitigation: debate-orchestrator plan profile tests
|
|
180
|
+
linked_work_item_ids: [WI-4]
|
|
181
|
+
- risk_id: R3
|
|
182
|
+
description: YAML parse drift
|
|
183
|
+
likelihood: low
|
|
184
|
+
impact: med
|
|
185
|
+
mitigation: harness-yaml strict parse
|
|
186
|
+
linked_work_item_ids: [WI-3]
|
|
187
|
+
schedule_metadata:
|
|
188
|
+
critical_path_work_item_ids: [WI-1, WI-2, WI-4, WI-5, WI-7, WI-8]
|
|
189
|
+
parallel_groups:
|
|
190
|
+
- [WI-1, WI-3]
|
|
191
|
+
schedule_baseline_note: Fixture topological order; no calendar dates
|
|
192
|
+
dag_validation:
|
|
193
|
+
status: pass
|
|
194
|
+
topological_order: [WI-1, WI-2, WI-3, WI-4, WI-5, WI-6, WI-7, WI-8]
|
|
195
|
+
cycles: []
|
|
196
|
+
conflicts: []
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Plan review (fixture)
|
|
2
|
+
|
|
3
|
+
plan_id: plan-smoke-fixture-001
|
|
4
|
+
|
|
5
|
+
## Execution plan
|
|
6
|
+
|
|
7
|
+
Phases: P1 Foundation → P2 Build → P3 Verify
|
|
8
|
+
|
|
9
|
+
Critical path: WI-1 → WI-2 → WI-4 → WI-5 → WI-7 → WI-8
|
|
10
|
+
|
|
11
|
+
## Debate
|
|
12
|
+
|
|
13
|
+
- Round 1 (spec): review_gate_ready
|
|
14
|
+
- Round 4 (quality): review_gate_ready
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
decomposition:
|
|
2
|
+
schema_version: "1.0.0"
|
|
3
|
+
problem_restatement: Light-profile smoke for two-focus debate
|
|
4
|
+
hypothesis:
|
|
5
|
+
schema_version: "1.0.0"
|
|
6
|
+
primary:
|
|
7
|
+
claim: Light debate covers spec and quality only
|
|
8
|
+
mechanism: Eligibility profile light with min_focus_rounds 2
|
|
9
|
+
prediction: planDebateOutcomeComplete passes with two rounds
|
|
10
|
+
experiment: Run smoke-harness-plan.mjs --fixture minimal-low-light
|
|
11
|
+
implementation:
|
|
12
|
+
schema_version: "1.0.0"
|
|
13
|
+
problem_framing: Low-risk fixture for selective debate
|
|
14
|
+
sub_problems: [spec coverage, quality coverage]
|
|
15
|
+
internal_references:
|
|
16
|
+
- path: test/plan-debate-eligibility.test.mjs
|
|
17
|
+
relevance: Eligibility unit tests
|
|
18
|
+
reuse_signal: high
|
|
19
|
+
external_references: []
|
|
20
|
+
solution_patterns:
|
|
21
|
+
- name: light profile gate
|
|
22
|
+
provenance: ADR-0036
|
|
23
|
+
fit: Reduces debate cost on trivial tasks
|
|
24
|
+
tradeoffs:
|
|
25
|
+
pros: [Fewer rounds]
|
|
26
|
+
cons: []
|
|
27
|
+
risks: []
|
|
28
|
+
similar_implementations:
|
|
29
|
+
- name: minimal-med four-focus fixture
|
|
30
|
+
what_it_solves: Full debate coverage
|
|
31
|
+
gap_vs_us: Light uses two focuses only
|
|
32
|
+
recommended_approach:
|
|
33
|
+
summary: Two review rounds with spec then quality
|
|
34
|
+
recommended_approach_confidence: high
|
|
35
|
+
confidence_rationale: Deterministic fixture aligned with eligibility rules
|
|
36
|
+
evidence_refs:
|
|
37
|
+
- .pi/extensions/lib/plan-debate-eligibility.ts
|
|
38
|
+
- test/plan-debate-eligibility.test.mjs
|
|
39
|
+
anti_patterns: []
|
|
40
|
+
open_questions: []
|
|
41
|
+
stack:
|
|
42
|
+
schema_version: "1.0.0"
|
|
43
|
+
problem_framing: Node harness tooling
|
|
44
|
+
constraints: []
|
|
45
|
+
options:
|
|
46
|
+
- name: extend current stack
|
|
47
|
+
category: brownfield
|
|
48
|
+
fit_summary: Use existing ultimate-pi harness
|
|
49
|
+
tradeoffs:
|
|
50
|
+
pros: [No new deps]
|
|
51
|
+
cons: []
|
|
52
|
+
risks: []
|
|
53
|
+
evidence_refs: []
|
|
54
|
+
recommendation_rank: 1
|
|
55
|
+
recommended_primary: extend current stack
|
|
56
|
+
rationale: Fixture validates in-repo harness
|
|
57
|
+
eval:
|
|
58
|
+
schema_version: "1.0.0"
|
|
59
|
+
revision_recommended: false
|
|
60
|
+
relevance:
|
|
61
|
+
passes: true
|
|
62
|
+
rationale: Hypothesis matches light smoke task
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
problem_framing: Validate harness plan-phase with fixture-driven smoke
|
|
3
|
+
sub_problems:
|
|
4
|
+
- DAG validation
|
|
5
|
+
- Debate gate coverage
|
|
6
|
+
internal_references:
|
|
7
|
+
- path: .pi/harness/evals/smoke/smoke-harness-plan.mjs
|
|
8
|
+
relevance: Existing smoke pattern
|
|
9
|
+
reuse_signal: high
|
|
10
|
+
external_references: []
|
|
11
|
+
solution_patterns:
|
|
12
|
+
- name: fixture-driven gate
|
|
13
|
+
provenance: in-repo smoke
|
|
14
|
+
fit: Validates plan pipeline without live agents
|
|
15
|
+
tradeoffs:
|
|
16
|
+
pros: [Deterministic CI]
|
|
17
|
+
cons: []
|
|
18
|
+
risks: []
|
|
19
|
+
similar_implementations: []
|
|
20
|
+
recommended_approach:
|
|
21
|
+
summary: Extend minimal-med fixture with implementation artifact
|
|
22
|
+
recommended_approach_confidence: high
|
|
23
|
+
confidence_rationale: Reuses established smoke-harness-plan pattern
|
|
24
|
+
evidence_refs:
|
|
25
|
+
- .pi/harness/evals/smoke/smoke-harness-plan.mjs
|
|
26
|
+
- .pi/scripts/validate-plan-dag.mjs
|
|
27
|
+
anti_patterns: []
|
|
28
|
+
open_questions: []
|
package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r2.yaml
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
round_index: 2
|
|
3
|
+
debate_round_focus: wbs
|
|
4
|
+
round_summary: WBS round passed for fixture
|
|
5
|
+
validation_summary: Work breakdown structure validated
|
|
6
|
+
adversary_summary: No blocking findings
|
|
7
|
+
disputes: []
|
|
8
|
+
recommended_packet_patches: []
|
|
9
|
+
review_gate_ready: true
|
|
10
|
+
participants:
|
|
11
|
+
- PlanEvaluatorAgent
|
|
12
|
+
- PlanAdversaryAgent
|
|
13
|
+
- ReviewIntegratorAgent
|
|
14
|
+
claims:
|
|
15
|
+
- wbs validation complete
|
|
16
|
+
rebuttals: []
|
|
17
|
+
evidence_refs: []
|
|
18
|
+
token_usage:
|
|
19
|
+
per_agent:
|
|
20
|
+
PlanEvaluatorAgent: 100
|
|
21
|
+
PlanAdversaryAgent: 100
|
|
22
|
+
ReviewIntegratorAgent: 50
|
|
23
|
+
round_total: 250
|
|
24
|
+
consensus_delta: 0.1
|
package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r3.yaml
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
round_index: 3
|
|
3
|
+
debate_round_focus: schedule
|
|
4
|
+
round_summary: Schedule round passed for fixture
|
|
5
|
+
validation_summary: Critical path and dependencies validated
|
|
6
|
+
adversary_summary: No schedule risks unmitigated
|
|
7
|
+
disputes: []
|
|
8
|
+
recommended_packet_patches: []
|
|
9
|
+
review_gate_ready: true
|
|
10
|
+
participants:
|
|
11
|
+
- PlanEvaluatorAgent
|
|
12
|
+
- PlanAdversaryAgent
|
|
13
|
+
- ReviewIntegratorAgent
|
|
14
|
+
claims:
|
|
15
|
+
- schedule validation complete
|
|
16
|
+
rebuttals: []
|
|
17
|
+
evidence_refs: []
|
|
18
|
+
token_usage:
|
|
19
|
+
per_agent:
|
|
20
|
+
PlanEvaluatorAgent: 100
|
|
21
|
+
PlanAdversaryAgent: 100
|
|
22
|
+
ReviewIntegratorAgent: 50
|
|
23
|
+
round_total: 250
|
|
24
|
+
consensus_delta: 0.1
|
|
@@ -8,6 +8,35 @@ hypothesis:
|
|
|
8
8
|
mechanism: Static artifacts plus validate-plan-dag.mjs
|
|
9
9
|
prediction: CI passes without live agents
|
|
10
10
|
experiment: Run smoke-harness-plan.mjs --fixture
|
|
11
|
+
implementation:
|
|
12
|
+
schema_version: "1.0.0"
|
|
13
|
+
problem_framing: Validate harness plan-phase with fixture-driven smoke
|
|
14
|
+
sub_problems:
|
|
15
|
+
- DAG validation
|
|
16
|
+
- Debate gate coverage
|
|
17
|
+
internal_references:
|
|
18
|
+
- path: .pi/harness/evals/smoke/smoke-harness-plan.mjs
|
|
19
|
+
relevance: Existing smoke pattern
|
|
20
|
+
reuse_signal: high
|
|
21
|
+
external_references: []
|
|
22
|
+
solution_patterns:
|
|
23
|
+
- name: fixture-driven gate
|
|
24
|
+
provenance: in-repo smoke
|
|
25
|
+
fit: Validates plan pipeline without live agents
|
|
26
|
+
tradeoffs:
|
|
27
|
+
pros: [Deterministic CI]
|
|
28
|
+
cons: []
|
|
29
|
+
risks: []
|
|
30
|
+
similar_implementations: []
|
|
31
|
+
recommended_approach:
|
|
32
|
+
summary: Extend minimal-med fixture with implementation artifact
|
|
33
|
+
recommended_approach_confidence: high
|
|
34
|
+
confidence_rationale: Reuses established smoke-harness-plan pattern
|
|
35
|
+
evidence_refs:
|
|
36
|
+
- .pi/harness/evals/smoke/smoke-harness-plan.mjs
|
|
37
|
+
- .pi/scripts/validate-plan-dag.mjs
|
|
38
|
+
anti_patterns: []
|
|
39
|
+
open_questions: []
|
|
11
40
|
stack:
|
|
12
41
|
schema_version: "1.0.0"
|
|
13
42
|
problem_framing: Node harness tooling
|
|
@@ -1,16 +1,54 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
3
|
* smoke-harness-plan — fixture validation for plan-phase pipeline (CI).
|
|
4
|
-
* Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture
|
|
4
|
+
* Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light]
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
-
import { access,
|
|
7
|
+
import { access, readFile } from "node:fs/promises";
|
|
8
8
|
import { constants } from "node:fs";
|
|
9
|
-
import { dirname, join
|
|
9
|
+
import { dirname, join } from "node:path";
|
|
10
10
|
import { fileURLToPath } from "node:url";
|
|
11
11
|
import { parse as parseYaml } from "yaml";
|
|
12
12
|
import { validateExecutionPlan } from "../../../scripts/validate-plan-dag.mjs";
|
|
13
13
|
|
|
14
|
+
function planOutcomeComplete(coverage, requiredFocus, minRounds) {
|
|
15
|
+
return (
|
|
16
|
+
coverage.missing.length === 0 &&
|
|
17
|
+
coverage.last_review_gate_ready === true &&
|
|
18
|
+
coverage.last_round_index >= minRounds
|
|
19
|
+
);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
async function scanFocusCoverage(fixtureRoot, requiredFocus) {
|
|
23
|
+
const art = join(fixtureRoot, "artifacts");
|
|
24
|
+
const covered = new Set();
|
|
25
|
+
let last_review_gate_ready = false;
|
|
26
|
+
let last_round_index = 0;
|
|
27
|
+
const { readdir } = await import("node:fs/promises");
|
|
28
|
+
const files = (await readdir(art)).filter((f) =>
|
|
29
|
+
/^review-round-r\d+\.yaml$/i.test(f),
|
|
30
|
+
);
|
|
31
|
+
for (const name of files.sort()) {
|
|
32
|
+
const m = /^review-round-r(\d+)\.yaml$/i.exec(name);
|
|
33
|
+
if (!m) continue;
|
|
34
|
+
const roundIndex = Number(m[1]);
|
|
35
|
+
if (roundIndex > last_round_index) last_round_index = roundIndex;
|
|
36
|
+
const draft = parseYaml(await readFile(join(art, name), "utf-8"));
|
|
37
|
+
const focus = String(draft.debate_round_focus ?? "").trim();
|
|
38
|
+
if (requiredFocus.includes(focus)) covered.add(focus);
|
|
39
|
+
if (roundIndex === last_round_index) {
|
|
40
|
+
last_review_gate_ready = draft.review_gate_ready === true;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
const missing = requiredFocus.filter((f) => !covered.has(f));
|
|
44
|
+
return {
|
|
45
|
+
covered: requiredFocus.filter((f) => covered.has(f)),
|
|
46
|
+
missing,
|
|
47
|
+
last_review_gate_ready,
|
|
48
|
+
last_round_index,
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
|
|
14
52
|
const ROOT = join(dirname(fileURLToPath(import.meta.url)), "..", "..", "..", "..");
|
|
15
53
|
const FIXTURE_DIR = join(dirname(fileURLToPath(import.meta.url)), "fixtures", "plan-phase");
|
|
16
54
|
|
|
@@ -23,8 +61,16 @@ function ok(msg) {
|
|
|
23
61
|
console.log(` ✓ ${msg}`);
|
|
24
62
|
}
|
|
25
63
|
|
|
26
|
-
|
|
27
|
-
const
|
|
64
|
+
function fixtureNameFromArgs(args) {
|
|
65
|
+
const idx = args.indexOf("--fixture");
|
|
66
|
+
if (idx === -1 || !args[idx + 1] || args[idx + 1].startsWith("-")) {
|
|
67
|
+
return "minimal-med";
|
|
68
|
+
}
|
|
69
|
+
return args[idx + 1];
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async function runFixture(name) {
|
|
73
|
+
const fixtureRoot = join(FIXTURE_DIR, name);
|
|
28
74
|
try {
|
|
29
75
|
await access(fixtureRoot, constants.R_OK);
|
|
30
76
|
} catch {
|
|
@@ -49,29 +95,64 @@ async function runFixture() {
|
|
|
49
95
|
await access(reviewPath, constants.R_OK);
|
|
50
96
|
ok("plan-review.md present");
|
|
51
97
|
|
|
52
|
-
const
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
await access(p, constants.R_OK);
|
|
56
|
-
const draft = parseYaml(await readFile(p, "utf-8"));
|
|
57
|
-
if (!draft.schema_version) fail(`${name} missing schema_version`);
|
|
58
|
-
}
|
|
59
|
-
ok("debate round YAML artifacts present");
|
|
98
|
+
const implPath = join(fixtureRoot, "artifacts", "implementation-research.yaml");
|
|
99
|
+
await access(implPath, constants.R_OK);
|
|
100
|
+
ok("implementation-research.yaml present");
|
|
60
101
|
|
|
61
102
|
const researchPath = join(fixtureRoot, "research-brief.yaml");
|
|
62
103
|
const research = parseYaml(await readFile(researchPath, "utf-8"));
|
|
63
104
|
if (!research.decomposition || !research.hypothesis) {
|
|
64
105
|
fail("research-brief.yaml missing decomposition/hypothesis");
|
|
65
106
|
}
|
|
107
|
+
if (!research.implementation) {
|
|
108
|
+
fail("research-brief.yaml missing implementation section");
|
|
109
|
+
}
|
|
66
110
|
ok("research-brief.yaml structure");
|
|
67
111
|
|
|
68
|
-
|
|
112
|
+
const isLight = name === "minimal-low-light";
|
|
113
|
+
const requiredFocus = isLight ? ["spec", "quality"] : ["spec", "wbs", "schedule", "quality"];
|
|
114
|
+
const debateRounds = isLight
|
|
115
|
+
? ["review-round-r1.yaml", "review-round-r2.yaml"]
|
|
116
|
+
: [
|
|
117
|
+
"review-round-r1.yaml",
|
|
118
|
+
"review-round-r2.yaml",
|
|
119
|
+
"review-round-r3.yaml",
|
|
120
|
+
"review-round-r4.yaml",
|
|
121
|
+
];
|
|
122
|
+
const seenFocus = new Set();
|
|
123
|
+
for (const fileName of debateRounds) {
|
|
124
|
+
const p = join(fixtureRoot, "artifacts", fileName);
|
|
125
|
+
await access(p, constants.R_OK);
|
|
126
|
+
const draft = parseYaml(await readFile(p, "utf-8"));
|
|
127
|
+
if (!draft.schema_version) fail(`${fileName} missing schema_version`);
|
|
128
|
+
if (draft.debate_round_focus) seenFocus.add(draft.debate_round_focus);
|
|
129
|
+
}
|
|
130
|
+
for (const focus of requiredFocus) {
|
|
131
|
+
if (!seenFocus.has(focus)) {
|
|
132
|
+
fail(`fixture missing debate_round_focus: ${focus}`);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
ok(`debate round YAML artifacts (${requiredFocus.length} focuses)`);
|
|
136
|
+
|
|
137
|
+
const coverage = await scanFocusCoverage(fixtureRoot, requiredFocus);
|
|
138
|
+
const minRounds = isLight ? 2 : 4;
|
|
139
|
+
if (!planOutcomeComplete(coverage, requiredFocus, minRounds)) {
|
|
140
|
+
fail("debate outcome incomplete for fixture coverage");
|
|
141
|
+
}
|
|
142
|
+
ok("debate outcome complete for fixture profile");
|
|
143
|
+
|
|
144
|
+
if (isLight && packet.risk_level !== "low") {
|
|
145
|
+
fail("minimal-low-light fixture must use risk_level low");
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
console.log(`smoke-harness-plan: all ${name} fixture checks passed`);
|
|
69
149
|
}
|
|
70
150
|
|
|
71
151
|
async function main() {
|
|
72
152
|
const args = process.argv.slice(2);
|
|
73
153
|
if (args.includes("--fixture")) {
|
|
74
|
-
|
|
154
|
+
const name = fixtureNameFromArgs(args);
|
|
155
|
+
await runFixture(name);
|
|
75
156
|
return;
|
|
76
157
|
}
|
|
77
158
|
if (args.includes("--live")) {
|
|
@@ -80,7 +161,7 @@ async function main() {
|
|
|
80
161
|
);
|
|
81
162
|
return;
|
|
82
163
|
}
|
|
83
|
-
fail("Usage: smoke-harness-plan.mjs --fixture | --live");
|
|
164
|
+
fail("Usage: smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light] | --live");
|
|
84
165
|
}
|
|
85
166
|
|
|
86
167
|
main().catch((err) => {
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://ultimate-pi.local/.pi/harness/specs/harness-executor-handoff.schema.json",
|
|
4
|
+
"title": "HarnessExecutorHandoff",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"additionalProperties": true,
|
|
7
|
+
"required": ["schema_version", "execution_status"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"schema_version": { "type": "string", "const": "1.0.0" },
|
|
10
|
+
"execution_status": {
|
|
11
|
+
"type": "string",
|
|
12
|
+
"enum": ["completed", "blocked", "scope_drift"]
|
|
13
|
+
},
|
|
14
|
+
"files_changed": { "type": "array" },
|
|
15
|
+
"validation_summary": { "type": "string" },
|
|
16
|
+
"rollback_refs": { "type": "object" },
|
|
17
|
+
"handoff_ready": { "type": "object" }
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://ultimate-pi.local/.pi/harness/specs/harness-human-required.schema.json",
|
|
4
|
+
"title": "HarnessHumanRequired",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"additionalProperties": false,
|
|
7
|
+
"required": ["schema_version", "reason"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"schema_version": { "type": "string", "const": "1.0.0" },
|
|
10
|
+
"reason": { "type": "string", "minLength": 1 },
|
|
11
|
+
"questions": {
|
|
12
|
+
"type": "array",
|
|
13
|
+
"items": { "type": "string" }
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
}
|