ultimate-pi 0.11.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/ck-search/SKILL.md +11 -87
- package/.agents/skills/cocoindex-search/SKILL.md +35 -0
- package/.agents/skills/harness-debate-plan/SKILL.md +44 -0
- package/.agents/skills/harness-decisions/SKILL.md +1 -1
- package/.agents/skills/harness-orchestration/SKILL.md +54 -28
- package/.agents/skills/harness-plan/SKILL.md +15 -20
- package/.pi/PACKAGING.md +1 -0
- package/.pi/SYSTEM.md +21 -20
- package/.pi/agents/harness/adversary.md +0 -1
- package/.pi/agents/harness/evaluator.md +0 -1
- package/.pi/agents/harness/executor.md +1 -2
- package/.pi/agents/harness/incident-recorder.md +0 -1
- package/.pi/agents/harness/meta-optimizer.md +0 -1
- package/.pi/agents/harness/planning/decompose.md +3 -4
- package/.pi/agents/harness/planning/execution-plan-author.md +30 -0
- package/.pi/agents/harness/planning/hypothesis-validator.md +23 -0
- package/.pi/agents/harness/planning/hypothesis.md +3 -4
- package/.pi/agents/harness/planning/plan-adversary.md +10 -42
- package/.pi/agents/harness/planning/plan-evaluator.md +18 -0
- package/.pi/agents/harness/planning/review-integrator.md +23 -0
- package/.pi/agents/harness/planning/scout-graphify.md +13 -5
- package/.pi/agents/harness/planning/scout-semantic.md +23 -11
- package/.pi/agents/harness/planning/scout-structure.md +12 -6
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +18 -0
- package/.pi/agents/harness/planning/stack-researcher.md +24 -0
- package/.pi/agents/harness/tie-breaker.md +0 -1
- package/.pi/agents/harness/trace-librarian.md +0 -1
- package/.pi/extensions/debate-orchestrator.ts +90 -53
- package/.pi/extensions/harness-plan-approval.ts +2 -2
- package/.pi/extensions/harness-run-context.ts +150 -5
- package/.pi/extensions/harness-subagents.ts +17 -6
- package/.pi/extensions/lib/harness-cocoindex-refresh.ts +49 -0
- package/.pi/extensions/lib/harness-posthog.ts +6 -1
- package/.pi/extensions/lib/harness-spawn-budget.ts +75 -0
- package/.pi/extensions/lib/harness-subagent-auth.ts +123 -0
- package/.pi/extensions/lib/{harness-subagents/harness-subagent-policy.ts → harness-subagent-policy.ts} +8 -7
- package/.pi/extensions/lib/harness-subagent-precheck.ts +95 -0
- package/.pi/extensions/lib/harness-subagents-bridge.ts +122 -0
- package/.pi/extensions/lib/plan-approval/create-plan.ts +4 -7
- package/.pi/extensions/lib/plan-approval/plan-review.ts +1 -1
- package/.pi/extensions/lib/plan-approval/types.ts +7 -1
- package/.pi/extensions/lib/plan-debate-envelope.ts +84 -0
- package/.pi/extensions/lib/{harness-subagents/spawn-policy.ts → spawn-policy.ts} +1 -0
- package/.pi/extensions/policy-gate.ts +1 -1
- package/.pi/extensions/review-integrity.ts +48 -29
- package/.pi/harness/agents.manifest.json +37 -25
- package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +4 -3
- package/.pi/harness/docs/adrs/0033-parent-orchestrated-planning.md +2 -2
- package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +27 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml +26 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml +5 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +32 -0
- package/.pi/harness/evals/smoke/run-context.fixture.json +1 -1
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +88 -0
- package/.pi/harness/specs/harness-posthog-event.schema.json +6 -1
- package/.pi/harness/specs/plan-execution-plan-brief.schema.json +13 -0
- package/.pi/harness/specs/plan-execution-plan.schema.json +255 -0
- package/.pi/harness/specs/plan-packet.schema.json +14 -5
- package/.pi/harness/specs/plan-review-round-draft.schema.json +68 -0
- package/.pi/harness/specs/plan-sprint-audit-turn.schema.json +29 -0
- package/.pi/harness/specs/plan-stack-brief.schema.json +65 -0
- package/.pi/harness/specs/plan-validation-turn.schema.json +42 -0
- package/.pi/harness/specs/round-result.schema.json +16 -9
- package/.pi/lib/debate-orchestrator-types.ts +38 -0
- package/.pi/lib/harness-agent-discovery.mjs +81 -0
- package/.pi/lib/harness-run-context.ts +64 -38
- package/.pi/lib/harness-yaml.mjs +73 -0
- package/.pi/lib/harness-yaml.ts +90 -0
- package/.pi/prompts/harness-auto.md +13 -11
- package/.pi/prompts/harness-critic.md +2 -2
- package/.pi/prompts/harness-eval.md +3 -3
- package/.pi/prompts/harness-incident.md +2 -2
- package/.pi/prompts/harness-plan.md +83 -92
- package/.pi/prompts/harness-review.md +2 -2
- package/.pi/prompts/harness-router-tune.md +1 -1
- package/.pi/prompts/harness-run.md +2 -2
- package/.pi/prompts/harness-setup.md +30 -17
- package/.pi/prompts/harness-trace.md +2 -2
- package/.pi/scripts/README.md +1 -0
- package/.pi/scripts/harness-agents-manifest.mjs +1 -1
- package/.pi/scripts/harness-cli-verify.sh +24 -14
- package/.pi/scripts/harness-cocoindex-bootstrap.sh +182 -0
- package/.pi/scripts/harness-verify.mjs +38 -19
- package/.pi/scripts/validate-plan-dag.mjs +258 -0
- package/.pi/scripts/vendor-sync-pi-subagents.sh +19 -0
- package/.pi/skills/ast-grep/SKILL.md +2 -2
- package/.pi/skills/ccc/SKILL.md +142 -0
- package/.pi/skills/ccc/references/management.md +110 -0
- package/CHANGELOG.md +22 -0
- package/THIRD_PARTY_NOTICES.md +15 -0
- package/biome.json +2 -2
- package/package.json +7 -4
- package/vendor/pi-subagents/LICENSE +21 -0
- package/vendor/pi-subagents/UPSTREAM_PIN.md +11 -0
- package/vendor/pi-subagents/src/agents.ts +357 -0
- package/vendor/pi-subagents/src/subagents.ts +1463 -0
- package/.pi/agents/harness/planner.md +0 -13
- package/.pi/agents/harness/planning/hypothesis-eval.md +0 -59
- package/.pi/agents/harness/planning/planner.md +0 -20
- package/.pi/extensions/lib/harness-subagents/agent-loader.ts +0 -126
- package/.pi/extensions/lib/harness-subagents/agent-manifest.ts +0 -119
- package/.pi/extensions/lib/harness-subagents/agent-parser.ts +0 -87
- package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +0 -118
- package/.pi/extensions/lib/harness-subagents/blackboard.ts +0 -175
- package/.pi/extensions/lib/harness-subagents/parent-ask-user-bridge.ts +0 -10
- package/.pi/extensions/lib/harness-subagents/parent-harness-ui-bridge.ts +0 -137
- package/.pi/extensions/lib/harness-subagents/parent-harness-ui-hooks.ts +0 -77
- package/.pi/extensions/lib/harness-subagents/types-blackboard.ts +0 -27
- package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +0 -558
- package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +0 -666
- package/.pi/extensions/lib/harness-subagents/vendored/agent-types.ts +0 -175
- package/.pi/extensions/lib/harness-subagents/vendored/context.ts +0 -59
- package/.pi/extensions/lib/harness-subagents/vendored/cross-extension-rpc.ts +0 -134
- package/.pi/extensions/lib/harness-subagents/vendored/custom-agents.ts +0 -5
- package/.pi/extensions/lib/harness-subagents/vendored/default-agents.ts +0 -123
- package/.pi/extensions/lib/harness-subagents/vendored/env.ts +0 -43
- package/.pi/extensions/lib/harness-subagents/vendored/group-join.ts +0 -144
- package/.pi/extensions/lib/harness-subagents/vendored/index.ts +0 -2460
- package/.pi/extensions/lib/harness-subagents/vendored/invocation-config.ts +0 -52
- package/.pi/extensions/lib/harness-subagents/vendored/memory.ts +0 -182
- package/.pi/extensions/lib/harness-subagents/vendored/model-resolver.ts +0 -92
- package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +0 -115
- package/.pi/extensions/lib/harness-subagents/vendored/prompts.ts +0 -103
- package/.pi/extensions/lib/harness-subagents/vendored/schedule-store.ts +0 -177
- package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +0 -416
- package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +0 -210
- package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +0 -108
- package/.pi/extensions/lib/harness-subagents/vendored/types.ts +0 -187
- package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +0 -639
- package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +0 -324
- package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +0 -110
- package/.pi/extensions/lib/harness-subagents/vendored/usage.ts +0 -71
- package/.pi/extensions/lib/harness-subagents/vendored/worktree.ts +0 -195
- /package/.pi/extensions/{00-ultimate-pi-system-prompt.ts → custom-system-prompt.ts} +0 -0
package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
round_index: 4
|
|
3
|
+
debate_round_focus: quality
|
|
4
|
+
round_summary: Quality and sprint contract round passed
|
|
5
|
+
validation_summary: Sprint contract complete
|
|
6
|
+
adversary_summary: No gaps
|
|
7
|
+
disputes: []
|
|
8
|
+
recommended_packet_patches: []
|
|
9
|
+
review_gate_ready: true
|
|
10
|
+
participants:
|
|
11
|
+
- PlanEvaluatorAgent
|
|
12
|
+
- PlanAdversaryAgent
|
|
13
|
+
- SprintContractAuditorAgent
|
|
14
|
+
- ReviewIntegratorAgent
|
|
15
|
+
claims:
|
|
16
|
+
- review gate ready
|
|
17
|
+
rebuttals: []
|
|
18
|
+
evidence_refs: []
|
|
19
|
+
token_usage:
|
|
20
|
+
per_agent:
|
|
21
|
+
PlanEvaluatorAgent: 120
|
|
22
|
+
PlanAdversaryAgent: 110
|
|
23
|
+
SprintContractAuditorAgent: 90
|
|
24
|
+
ReviewIntegratorAgent: 60
|
|
25
|
+
round_total: 380
|
|
26
|
+
consensus_delta: 0.15
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
contract_version: "1.1.0"
|
|
3
|
+
plan_id: plan-smoke-fixture-001
|
|
4
|
+
task_id: task-smoke-001
|
|
5
|
+
scope: Smoke fixture for plan-phase harness validation with execution_plan and debate artifacts.
|
|
6
|
+
assumptions:
|
|
7
|
+
- Fixture only; no live agent run
|
|
8
|
+
risk_level: med
|
|
9
|
+
acceptance_checks:
|
|
10
|
+
- id: AC-1
|
|
11
|
+
description: DAG validation passes
|
|
12
|
+
- id: AC-2
|
|
13
|
+
description: Four debate rounds recorded
|
|
14
|
+
- id: AC-3
|
|
15
|
+
description: Stack brief present in research-brief
|
|
16
|
+
- id: AC-4
|
|
17
|
+
description: Sprint contract complete
|
|
18
|
+
- id: AC-5
|
|
19
|
+
description: plan-review.md renders
|
|
20
|
+
rollback_plan:
|
|
21
|
+
revert_commit_ready: true
|
|
22
|
+
rollback_artifacts:
|
|
23
|
+
revert_command: git revert HEAD
|
|
24
|
+
revert_branch: main
|
|
25
|
+
patch_bundle: .pi/harness/runs/smoke-fixture/patch.bundle
|
|
26
|
+
execution_plan:
|
|
27
|
+
schema_version: "1.0.0"
|
|
28
|
+
phases:
|
|
29
|
+
- phase_id: P1
|
|
30
|
+
name: Foundation
|
|
31
|
+
objective: Establish baseline and verify harness wiring
|
|
32
|
+
entry_criteria:
|
|
33
|
+
- Fixture loaded
|
|
34
|
+
exit_criteria:
|
|
35
|
+
- AC-1 satisfied
|
|
36
|
+
milestone: M1-baseline
|
|
37
|
+
work_item_ids: [WI-1, WI-2, WI-3]
|
|
38
|
+
- phase_id: P2
|
|
39
|
+
name: Build
|
|
40
|
+
objective: Implement core changes
|
|
41
|
+
entry_criteria:
|
|
42
|
+
- M1-baseline complete
|
|
43
|
+
exit_criteria:
|
|
44
|
+
- AC-2 satisfied
|
|
45
|
+
milestone: M2-build
|
|
46
|
+
work_item_ids: [WI-4, WI-5, WI-6]
|
|
47
|
+
- phase_id: P3
|
|
48
|
+
name: Verify
|
|
49
|
+
objective: Quality gate and documentation
|
|
50
|
+
entry_criteria:
|
|
51
|
+
- M2-build complete
|
|
52
|
+
exit_criteria:
|
|
53
|
+
- AC-5 satisfied
|
|
54
|
+
milestone: M3-ship
|
|
55
|
+
work_item_ids: [WI-7, WI-8]
|
|
56
|
+
work_items:
|
|
57
|
+
- work_item_id: WI-1
|
|
58
|
+
phase_id: P1
|
|
59
|
+
title: Load fixture packet
|
|
60
|
+
description: Read plan-packet.yaml from fixture directory
|
|
61
|
+
depends_on: []
|
|
62
|
+
files:
|
|
63
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml
|
|
64
|
+
parallel_safe: true
|
|
65
|
+
done_criteria:
|
|
66
|
+
type: manual
|
|
67
|
+
spec: Fixture packet readable
|
|
68
|
+
acceptance_check_ids: [AC-1]
|
|
69
|
+
- work_item_id: WI-2
|
|
70
|
+
phase_id: P1
|
|
71
|
+
title: Run DAG validator
|
|
72
|
+
description: Execute validate-plan-dag.mjs
|
|
73
|
+
depends_on: [WI-1]
|
|
74
|
+
files:
|
|
75
|
+
- .pi/scripts/validate-plan-dag.mjs
|
|
76
|
+
parallel_safe: false
|
|
77
|
+
done_criteria:
|
|
78
|
+
type: command
|
|
79
|
+
spec: node .pi/scripts/validate-plan-dag.mjs --packet plan-packet.yaml
|
|
80
|
+
acceptance_check_ids: [AC-1]
|
|
81
|
+
- work_item_id: WI-3
|
|
82
|
+
phase_id: P1
|
|
83
|
+
title: Lint harness-yaml
|
|
84
|
+
description: Ensure YAML helpers parse fixture
|
|
85
|
+
depends_on: [WI-1]
|
|
86
|
+
files:
|
|
87
|
+
- .pi/lib/harness-yaml.ts
|
|
88
|
+
parallel_safe: true
|
|
89
|
+
done_criteria:
|
|
90
|
+
type: lint
|
|
91
|
+
spec: npm test
|
|
92
|
+
acceptance_check_ids: [AC-1]
|
|
93
|
+
- work_item_id: WI-4
|
|
94
|
+
phase_id: P2
|
|
95
|
+
title: Debate round 1-2 artifacts
|
|
96
|
+
description: Validate review-round YAML
|
|
97
|
+
depends_on: [WI-2]
|
|
98
|
+
files:
|
|
99
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml
|
|
100
|
+
parallel_safe: false
|
|
101
|
+
done_criteria:
|
|
102
|
+
type: artifact
|
|
103
|
+
spec: artifacts/review-round-r1.yaml exists
|
|
104
|
+
acceptance_check_ids: [AC-2]
|
|
105
|
+
- work_item_id: WI-5
|
|
106
|
+
phase_id: P2
|
|
107
|
+
title: Debate round 3-4 artifacts
|
|
108
|
+
description: Validate final review round
|
|
109
|
+
depends_on: [WI-4]
|
|
110
|
+
files:
|
|
111
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml
|
|
112
|
+
parallel_safe: false
|
|
113
|
+
done_criteria:
|
|
114
|
+
type: artifact
|
|
115
|
+
spec: artifacts/review-round-r4.yaml exists
|
|
116
|
+
acceptance_check_ids: [AC-2]
|
|
117
|
+
- work_item_id: WI-6
|
|
118
|
+
phase_id: P2
|
|
119
|
+
title: Stack research merge
|
|
120
|
+
description: research-brief includes stack section
|
|
121
|
+
depends_on: [WI-2]
|
|
122
|
+
files: []
|
|
123
|
+
non_code: true
|
|
124
|
+
parallel_safe: true
|
|
125
|
+
done_criteria:
|
|
126
|
+
type: manual
|
|
127
|
+
spec: research-brief.yaml contains stack key
|
|
128
|
+
acceptance_check_ids: [AC-3]
|
|
129
|
+
- work_item_id: WI-7
|
|
130
|
+
phase_id: P3
|
|
131
|
+
title: Sprint contract audit
|
|
132
|
+
description: R4 sprint audit artifact
|
|
133
|
+
depends_on: [WI-5]
|
|
134
|
+
files:
|
|
135
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml
|
|
136
|
+
parallel_safe: false
|
|
137
|
+
done_criteria:
|
|
138
|
+
type: artifact
|
|
139
|
+
spec: sprint-audit-r4.yaml present
|
|
140
|
+
acceptance_check_ids: [AC-4]
|
|
141
|
+
- work_item_id: WI-8
|
|
142
|
+
phase_id: P3
|
|
143
|
+
title: Render plan-review
|
|
144
|
+
description: Human-readable plan review markdown
|
|
145
|
+
depends_on: [WI-7]
|
|
146
|
+
files:
|
|
147
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md
|
|
148
|
+
parallel_safe: false
|
|
149
|
+
done_criteria:
|
|
150
|
+
type: manual
|
|
151
|
+
spec: plan-review.md non-empty
|
|
152
|
+
acceptance_check_ids: [AC-5]
|
|
153
|
+
sprint_contract:
|
|
154
|
+
in_scope:
|
|
155
|
+
- Fixture validation only
|
|
156
|
+
out_of_scope:
|
|
157
|
+
- Production deploy
|
|
158
|
+
definition_of_done: All smoke checks green
|
|
159
|
+
assumptions:
|
|
160
|
+
- CI environment has node
|
|
161
|
+
external_dependencies: []
|
|
162
|
+
wbs_dictionary:
|
|
163
|
+
- work_item_id: WI-1
|
|
164
|
+
deliverable: Fixture packet loaded
|
|
165
|
+
owner_role: executor
|
|
166
|
+
inputs: []
|
|
167
|
+
outputs: [parsed packet]
|
|
168
|
+
risk_register:
|
|
169
|
+
- risk_id: R1
|
|
170
|
+
description: DAG validator false negative
|
|
171
|
+
likelihood: low
|
|
172
|
+
impact: high
|
|
173
|
+
mitigation: Unit tests on validate-plan-dag.mjs
|
|
174
|
+
linked_work_item_ids: [WI-2]
|
|
175
|
+
- risk_id: R2
|
|
176
|
+
description: Debate cap misconfiguration
|
|
177
|
+
likelihood: med
|
|
178
|
+
impact: med
|
|
179
|
+
mitigation: debate-orchestrator plan profile tests
|
|
180
|
+
linked_work_item_ids: [WI-4]
|
|
181
|
+
- risk_id: R3
|
|
182
|
+
description: YAML parse drift
|
|
183
|
+
likelihood: low
|
|
184
|
+
impact: med
|
|
185
|
+
mitigation: harness-yaml strict parse
|
|
186
|
+
linked_work_item_ids: [WI-3]
|
|
187
|
+
schedule_metadata:
|
|
188
|
+
critical_path_work_item_ids: [WI-1, WI-2, WI-4, WI-5, WI-7, WI-8]
|
|
189
|
+
parallel_groups:
|
|
190
|
+
- [WI-1, WI-3]
|
|
191
|
+
schedule_baseline_note: Fixture topological order; no calendar dates
|
|
192
|
+
dag_validation:
|
|
193
|
+
status: pass
|
|
194
|
+
topological_order: [WI-1, WI-2, WI-3, WI-4, WI-5, WI-6, WI-7, WI-8]
|
|
195
|
+
cycles: []
|
|
196
|
+
conflicts: []
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Plan review (fixture)
|
|
2
|
+
|
|
3
|
+
plan_id: plan-smoke-fixture-001
|
|
4
|
+
|
|
5
|
+
## Execution plan
|
|
6
|
+
|
|
7
|
+
Phases: P1 Foundation → P2 Build → P3 Verify
|
|
8
|
+
|
|
9
|
+
Critical path: WI-1 → WI-2 → WI-4 → WI-5 → WI-7 → WI-8
|
|
10
|
+
|
|
11
|
+
## Debate
|
|
12
|
+
|
|
13
|
+
- Round 1 (spec): review_gate_ready
|
|
14
|
+
- Round 4 (quality): review_gate_ready
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
decomposition:
|
|
2
|
+
schema_version: "1.0.0"
|
|
3
|
+
problem_restatement: Validate plan-phase YAML and debate pipeline
|
|
4
|
+
hypothesis:
|
|
5
|
+
schema_version: "1.0.0"
|
|
6
|
+
primary:
|
|
7
|
+
claim: Fixture-driven smoke covers DAG and debate
|
|
8
|
+
mechanism: Static artifacts plus validate-plan-dag.mjs
|
|
9
|
+
prediction: CI passes without live agents
|
|
10
|
+
experiment: Run smoke-harness-plan.mjs --fixture
|
|
11
|
+
stack:
|
|
12
|
+
schema_version: "1.0.0"
|
|
13
|
+
problem_framing: Node harness tooling
|
|
14
|
+
constraints: []
|
|
15
|
+
options:
|
|
16
|
+
- name: extend current stack
|
|
17
|
+
category: brownfield
|
|
18
|
+
fit_summary: Use existing ultimate-pi harness
|
|
19
|
+
tradeoffs:
|
|
20
|
+
pros: [No new deps]
|
|
21
|
+
cons: []
|
|
22
|
+
risks: []
|
|
23
|
+
evidence_refs: []
|
|
24
|
+
recommendation_rank: 1
|
|
25
|
+
recommended_primary: extend current stack
|
|
26
|
+
rationale: Fixture validates in-repo harness
|
|
27
|
+
eval:
|
|
28
|
+
schema_version: "1.0.0"
|
|
29
|
+
revision_recommended: false
|
|
30
|
+
relevance:
|
|
31
|
+
passes: true
|
|
32
|
+
rationale: Hypothesis matches smoke task
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"project_root": "/tmp/ultimate-pi-smoke",
|
|
6
6
|
"phase": "plan",
|
|
7
7
|
"plan_id": null,
|
|
8
|
-
"plan_packet_path": "/tmp/ultimate-pi-smoke/.pi/harness/runs/smoke-session-1/plan-packet.
|
|
8
|
+
"plan_packet_path": "/tmp/ultimate-pi-smoke/.pi/harness/runs/smoke-session-1/plan-packet.yaml",
|
|
9
9
|
"plan_ready": false,
|
|
10
10
|
"task_summary": "smoke task",
|
|
11
11
|
"status": "active",
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* smoke-harness-plan — fixture validation for plan-phase pipeline (CI).
|
|
4
|
+
* Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { access, cp, mkdir, readFile, rm } from "node:fs/promises";
|
|
8
|
+
import { constants } from "node:fs";
|
|
9
|
+
import { dirname, join, resolve } from "node:path";
|
|
10
|
+
import { fileURLToPath } from "node:url";
|
|
11
|
+
import { parse as parseYaml } from "yaml";
|
|
12
|
+
import { validateExecutionPlan } from "../../../scripts/validate-plan-dag.mjs";
|
|
13
|
+
|
|
14
|
+
const ROOT = join(dirname(fileURLToPath(import.meta.url)), "..", "..", "..", "..");
|
|
15
|
+
const FIXTURE_DIR = join(dirname(fileURLToPath(import.meta.url)), "fixtures", "plan-phase");
|
|
16
|
+
|
|
17
|
+
function fail(msg) {
|
|
18
|
+
console.error(`smoke-harness-plan: FAIL: ${msg}`);
|
|
19
|
+
process.exit(1);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function ok(msg) {
|
|
23
|
+
console.log(` ✓ ${msg}`);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
async function runFixture() {
|
|
27
|
+
const fixtureRoot = join(FIXTURE_DIR, "minimal-med");
|
|
28
|
+
try {
|
|
29
|
+
await access(fixtureRoot, constants.R_OK);
|
|
30
|
+
} catch {
|
|
31
|
+
fail(`missing fixture ${fixtureRoot}`);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const packetPath = join(fixtureRoot, "plan-packet.yaml");
|
|
35
|
+
const raw = await readFile(packetPath, "utf-8");
|
|
36
|
+
const packet = parseYaml(raw);
|
|
37
|
+
if (packet.contract_version !== "1.1.0") {
|
|
38
|
+
fail("fixture contract_version must be 1.1.0");
|
|
39
|
+
}
|
|
40
|
+
if (!packet.execution_plan) fail("fixture missing execution_plan");
|
|
41
|
+
|
|
42
|
+
const { status, errors } = validateExecutionPlan(packet, fixtureRoot);
|
|
43
|
+
if (status !== "pass") {
|
|
44
|
+
fail(`DAG validation failed: ${errors.join("; ")}`);
|
|
45
|
+
}
|
|
46
|
+
ok("fixture plan-packet.yaml DAG pass");
|
|
47
|
+
|
|
48
|
+
const reviewPath = join(fixtureRoot, "plan-review.md");
|
|
49
|
+
await access(reviewPath, constants.R_OK);
|
|
50
|
+
ok("plan-review.md present");
|
|
51
|
+
|
|
52
|
+
const debateRounds = ["review-round-r1.yaml", "review-round-r4.yaml"];
|
|
53
|
+
for (const name of debateRounds) {
|
|
54
|
+
const p = join(fixtureRoot, "artifacts", name);
|
|
55
|
+
await access(p, constants.R_OK);
|
|
56
|
+
const draft = parseYaml(await readFile(p, "utf-8"));
|
|
57
|
+
if (!draft.schema_version) fail(`${name} missing schema_version`);
|
|
58
|
+
}
|
|
59
|
+
ok("debate round YAML artifacts present");
|
|
60
|
+
|
|
61
|
+
const researchPath = join(fixtureRoot, "research-brief.yaml");
|
|
62
|
+
const research = parseYaml(await readFile(researchPath, "utf-8"));
|
|
63
|
+
if (!research.decomposition || !research.hypothesis) {
|
|
64
|
+
fail("research-brief.yaml missing decomposition/hypothesis");
|
|
65
|
+
}
|
|
66
|
+
ok("research-brief.yaml structure");
|
|
67
|
+
|
|
68
|
+
console.log("smoke-harness-plan: all fixture checks passed");
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async function main() {
|
|
72
|
+
const args = process.argv.slice(2);
|
|
73
|
+
if (args.includes("--fixture")) {
|
|
74
|
+
await runFixture();
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
if (args.includes("--live")) {
|
|
78
|
+
console.log(
|
|
79
|
+
"smoke-harness-plan: --live requires manual /harness-plan run; skipping in CI",
|
|
80
|
+
);
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
fail("Usage: smoke-harness-plan.mjs --fixture | --live");
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
main().catch((err) => {
|
|
87
|
+
fail(err instanceof Error ? err.message : String(err));
|
|
88
|
+
});
|
|
@@ -24,7 +24,12 @@
|
|
|
24
24
|
"harness_drift_report",
|
|
25
25
|
"harness_eval_verdict",
|
|
26
26
|
"harness_sentrux_signal",
|
|
27
|
-
"harness_observation"
|
|
27
|
+
"harness_observation",
|
|
28
|
+
"harness_subagent_spawned",
|
|
29
|
+
"harness_subagent_completed",
|
|
30
|
+
"harness_subagent_result_wait",
|
|
31
|
+
"harness_subagent_setup",
|
|
32
|
+
"harness_blackboard_op"
|
|
28
33
|
]
|
|
29
34
|
},
|
|
30
35
|
"distinct_id": {
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://ultimate-pi.local/.pi/harness/specs/plan-execution-plan-brief.schema.json",
|
|
4
|
+
"title": "PlanExecutionPlanBrief",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"additionalProperties": false,
|
|
7
|
+
"required": ["schema_version", "execution_plan"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"schema_version": { "type": "string", "const": "1.0.0" },
|
|
10
|
+
"execution_plan": { "$ref": "plan-execution-plan.schema.json" },
|
|
11
|
+
"human_summary": { "type": "string" }
|
|
12
|
+
}
|
|
13
|
+
}
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://ultimate-pi.local/.pi/harness/specs/plan-execution-plan.schema.json",
|
|
4
|
+
"title": "ExecutionPlan",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"additionalProperties": false,
|
|
7
|
+
"required": [
|
|
8
|
+
"schema_version",
|
|
9
|
+
"phases",
|
|
10
|
+
"work_items",
|
|
11
|
+
"sprint_contract",
|
|
12
|
+
"wbs_dictionary",
|
|
13
|
+
"risk_register",
|
|
14
|
+
"schedule_metadata",
|
|
15
|
+
"dag_validation"
|
|
16
|
+
],
|
|
17
|
+
"properties": {
|
|
18
|
+
"schema_version": { "type": "string", "const": "1.0.0" },
|
|
19
|
+
"phases": {
|
|
20
|
+
"type": "array",
|
|
21
|
+
"minItems": 1,
|
|
22
|
+
"items": { "$ref": "#/$defs/phase" }
|
|
23
|
+
},
|
|
24
|
+
"work_items": {
|
|
25
|
+
"type": "array",
|
|
26
|
+
"minItems": 1,
|
|
27
|
+
"items": { "$ref": "#/$defs/work_item" }
|
|
28
|
+
},
|
|
29
|
+
"sprint_contract": { "$ref": "#/$defs/sprint_contract" },
|
|
30
|
+
"wbs_dictionary": {
|
|
31
|
+
"type": "array",
|
|
32
|
+
"items": { "$ref": "#/$defs/wbs_entry" }
|
|
33
|
+
},
|
|
34
|
+
"risk_register": {
|
|
35
|
+
"type": "array",
|
|
36
|
+
"items": { "$ref": "#/$defs/risk" }
|
|
37
|
+
},
|
|
38
|
+
"schedule_metadata": { "$ref": "#/$defs/schedule_metadata" },
|
|
39
|
+
"dag_validation": { "$ref": "#/$defs/dag_validation" }
|
|
40
|
+
},
|
|
41
|
+
"$defs": {
|
|
42
|
+
"phase": {
|
|
43
|
+
"type": "object",
|
|
44
|
+
"additionalProperties": false,
|
|
45
|
+
"required": [
|
|
46
|
+
"phase_id",
|
|
47
|
+
"name",
|
|
48
|
+
"objective",
|
|
49
|
+
"entry_criteria",
|
|
50
|
+
"exit_criteria",
|
|
51
|
+
"milestone",
|
|
52
|
+
"work_item_ids"
|
|
53
|
+
],
|
|
54
|
+
"properties": {
|
|
55
|
+
"phase_id": { "type": "string", "minLength": 1 },
|
|
56
|
+
"name": { "type": "string", "minLength": 1 },
|
|
57
|
+
"objective": { "type": "string", "minLength": 1 },
|
|
58
|
+
"entry_criteria": {
|
|
59
|
+
"type": "array",
|
|
60
|
+
"minItems": 1,
|
|
61
|
+
"items": { "type": "string", "minLength": 1 }
|
|
62
|
+
},
|
|
63
|
+
"exit_criteria": {
|
|
64
|
+
"type": "array",
|
|
65
|
+
"minItems": 1,
|
|
66
|
+
"items": { "type": "string", "minLength": 1 }
|
|
67
|
+
},
|
|
68
|
+
"milestone": { "type": "string", "minLength": 1 },
|
|
69
|
+
"work_item_ids": {
|
|
70
|
+
"type": "array",
|
|
71
|
+
"minItems": 1,
|
|
72
|
+
"items": { "type": "string", "minLength": 1 }
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
},
|
|
76
|
+
"work_item": {
|
|
77
|
+
"type": "object",
|
|
78
|
+
"additionalProperties": false,
|
|
79
|
+
"required": [
|
|
80
|
+
"work_item_id",
|
|
81
|
+
"phase_id",
|
|
82
|
+
"title",
|
|
83
|
+
"description",
|
|
84
|
+
"depends_on",
|
|
85
|
+
"files",
|
|
86
|
+
"parallel_safe",
|
|
87
|
+
"done_criteria",
|
|
88
|
+
"acceptance_check_ids"
|
|
89
|
+
],
|
|
90
|
+
"properties": {
|
|
91
|
+
"work_item_id": { "type": "string", "minLength": 1 },
|
|
92
|
+
"phase_id": { "type": "string", "minLength": 1 },
|
|
93
|
+
"title": { "type": "string", "minLength": 1 },
|
|
94
|
+
"description": { "type": "string", "minLength": 1 },
|
|
95
|
+
"depends_on": {
|
|
96
|
+
"type": "array",
|
|
97
|
+
"items": { "type": "string", "minLength": 1 }
|
|
98
|
+
},
|
|
99
|
+
"files": {
|
|
100
|
+
"type": "array",
|
|
101
|
+
"items": { "type": "string", "minLength": 1 }
|
|
102
|
+
},
|
|
103
|
+
"parallel_safe": { "type": "boolean" },
|
|
104
|
+
"non_code": { "type": "boolean" },
|
|
105
|
+
"done_criteria": { "$ref": "#/$defs/done_criteria" },
|
|
106
|
+
"acceptance_check_ids": {
|
|
107
|
+
"type": "array",
|
|
108
|
+
"minItems": 1,
|
|
109
|
+
"items": { "type": "string", "minLength": 1 }
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
},
|
|
113
|
+
"done_criteria": {
|
|
114
|
+
"type": "object",
|
|
115
|
+
"additionalProperties": false,
|
|
116
|
+
"required": ["type", "spec"],
|
|
117
|
+
"properties": {
|
|
118
|
+
"type": {
|
|
119
|
+
"type": "string",
|
|
120
|
+
"enum": ["command", "test", "lint", "manual", "artifact"]
|
|
121
|
+
},
|
|
122
|
+
"spec": { "type": "string", "minLength": 1 }
|
|
123
|
+
}
|
|
124
|
+
},
|
|
125
|
+
"sprint_contract": {
|
|
126
|
+
"type": "object",
|
|
127
|
+
"additionalProperties": false,
|
|
128
|
+
"required": [
|
|
129
|
+
"in_scope",
|
|
130
|
+
"out_of_scope",
|
|
131
|
+
"definition_of_done",
|
|
132
|
+
"assumptions",
|
|
133
|
+
"external_dependencies"
|
|
134
|
+
],
|
|
135
|
+
"properties": {
|
|
136
|
+
"in_scope": {
|
|
137
|
+
"type": "array",
|
|
138
|
+
"items": { "type": "string", "minLength": 1 }
|
|
139
|
+
},
|
|
140
|
+
"out_of_scope": {
|
|
141
|
+
"type": "array",
|
|
142
|
+
"items": { "type": "string", "minLength": 1 }
|
|
143
|
+
},
|
|
144
|
+
"definition_of_done": { "type": "string", "minLength": 1 },
|
|
145
|
+
"assumptions": {
|
|
146
|
+
"type": "array",
|
|
147
|
+
"items": { "type": "string", "minLength": 1 }
|
|
148
|
+
},
|
|
149
|
+
"external_dependencies": {
|
|
150
|
+
"type": "array",
|
|
151
|
+
"items": { "type": "string", "minLength": 1 }
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
},
|
|
155
|
+
"wbs_entry": {
|
|
156
|
+
"type": "object",
|
|
157
|
+
"additionalProperties": false,
|
|
158
|
+
"required": [
|
|
159
|
+
"work_item_id",
|
|
160
|
+
"deliverable",
|
|
161
|
+
"owner_role",
|
|
162
|
+
"inputs",
|
|
163
|
+
"outputs"
|
|
164
|
+
],
|
|
165
|
+
"properties": {
|
|
166
|
+
"work_item_id": { "type": "string", "minLength": 1 },
|
|
167
|
+
"deliverable": { "type": "string", "minLength": 1 },
|
|
168
|
+
"owner_role": {
|
|
169
|
+
"type": "string",
|
|
170
|
+
"enum": ["executor", "human", "research"]
|
|
171
|
+
},
|
|
172
|
+
"inputs": {
|
|
173
|
+
"type": "array",
|
|
174
|
+
"items": { "type": "string", "minLength": 1 }
|
|
175
|
+
},
|
|
176
|
+
"outputs": {
|
|
177
|
+
"type": "array",
|
|
178
|
+
"items": { "type": "string", "minLength": 1 }
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
},
|
|
182
|
+
"risk": {
|
|
183
|
+
"type": "object",
|
|
184
|
+
"additionalProperties": false,
|
|
185
|
+
"required": [
|
|
186
|
+
"risk_id",
|
|
187
|
+
"description",
|
|
188
|
+
"likelihood",
|
|
189
|
+
"impact",
|
|
190
|
+
"mitigation",
|
|
191
|
+
"linked_work_item_ids"
|
|
192
|
+
],
|
|
193
|
+
"properties": {
|
|
194
|
+
"risk_id": { "type": "string", "minLength": 1 },
|
|
195
|
+
"description": { "type": "string", "minLength": 1 },
|
|
196
|
+
"likelihood": {
|
|
197
|
+
"type": "string",
|
|
198
|
+
"enum": ["low", "med", "high"]
|
|
199
|
+
},
|
|
200
|
+
"impact": {
|
|
201
|
+
"type": "string",
|
|
202
|
+
"enum": ["low", "med", "high"]
|
|
203
|
+
},
|
|
204
|
+
"mitigation": { "type": "string", "minLength": 1 },
|
|
205
|
+
"linked_work_item_ids": {
|
|
206
|
+
"type": "array",
|
|
207
|
+
"items": { "type": "string", "minLength": 1 }
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
},
|
|
211
|
+
"schedule_metadata": {
|
|
212
|
+
"type": "object",
|
|
213
|
+
"additionalProperties": false,
|
|
214
|
+
"required": [
|
|
215
|
+
"critical_path_work_item_ids",
|
|
216
|
+
"parallel_groups",
|
|
217
|
+
"schedule_baseline_note"
|
|
218
|
+
],
|
|
219
|
+
"properties": {
|
|
220
|
+
"critical_path_work_item_ids": {
|
|
221
|
+
"type": "array",
|
|
222
|
+
"items": { "type": "string", "minLength": 1 }
|
|
223
|
+
},
|
|
224
|
+
"parallel_groups": {
|
|
225
|
+
"type": "array",
|
|
226
|
+
"items": {
|
|
227
|
+
"type": "array",
|
|
228
|
+
"items": { "type": "string", "minLength": 1 }
|
|
229
|
+
}
|
|
230
|
+
},
|
|
231
|
+
"schedule_baseline_note": { "type": "string", "minLength": 1 }
|
|
232
|
+
}
|
|
233
|
+
},
|
|
234
|
+
"dag_validation": {
|
|
235
|
+
"type": "object",
|
|
236
|
+
"additionalProperties": false,
|
|
237
|
+
"required": ["status", "topological_order", "cycles", "conflicts"],
|
|
238
|
+
"properties": {
|
|
239
|
+
"status": { "type": "string", "enum": ["pass", "fail"] },
|
|
240
|
+
"topological_order": {
|
|
241
|
+
"type": "array",
|
|
242
|
+
"items": { "type": "string", "minLength": 1 }
|
|
243
|
+
},
|
|
244
|
+
"cycles": {
|
|
245
|
+
"type": "array",
|
|
246
|
+
"items": { "type": "array", "items": { "type": "string" } }
|
|
247
|
+
},
|
|
248
|
+
"conflicts": {
|
|
249
|
+
"type": "array",
|
|
250
|
+
"items": { "type": "string", "minLength": 1 }
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
}
|