ultimate-pi 0.10.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/.agents/skills/harness-debate-plan/SKILL.md +44 -0
  2. package/.agents/skills/harness-decisions/SKILL.md +3 -3
  3. package/.agents/skills/harness-orchestration/SKILL.md +59 -25
  4. package/.agents/skills/harness-plan/SKILL.md +16 -15
  5. package/.pi/agents/harness/adversary.md +0 -1
  6. package/.pi/agents/harness/evaluator.md +0 -1
  7. package/.pi/agents/harness/executor.md +1 -2
  8. package/.pi/agents/harness/incident-recorder.md +0 -1
  9. package/.pi/agents/harness/meta-optimizer.md +0 -1
  10. package/.pi/agents/harness/planning/decompose.md +83 -0
  11. package/.pi/agents/harness/planning/execution-plan-author.md +30 -0
  12. package/.pi/agents/harness/planning/hypothesis-validator.md +23 -0
  13. package/.pi/agents/harness/planning/hypothesis.md +89 -0
  14. package/.pi/agents/harness/planning/plan-adversary.md +18 -0
  15. package/.pi/agents/harness/planning/plan-evaluator.md +18 -0
  16. package/.pi/agents/harness/planning/review-integrator.md +23 -0
  17. package/.pi/agents/harness/planning/scout-graphify.md +54 -0
  18. package/.pi/agents/harness/planning/scout-semantic.md +47 -0
  19. package/.pi/agents/harness/planning/scout-structure.md +50 -0
  20. package/.pi/agents/harness/planning/sprint-contract-auditor.md +18 -0
  21. package/.pi/agents/harness/planning/stack-researcher.md +24 -0
  22. package/.pi/agents/harness/tie-breaker.md +0 -1
  23. package/.pi/agents/harness/trace-librarian.md +0 -1
  24. package/.pi/extensions/debate-orchestrator.ts +90 -53
  25. package/.pi/extensions/harness-ask-user.ts +5 -0
  26. package/.pi/extensions/harness-plan-approval.ts +137 -3
  27. package/.pi/extensions/harness-run-context.ts +146 -6
  28. package/.pi/extensions/harness-subagents.ts +10 -5
  29. package/.pi/extensions/harness-web-tools.ts +2 -0
  30. package/.pi/extensions/lib/extension-load-guard.ts +39 -0
  31. package/.pi/extensions/lib/harness-posthog.ts +6 -1
  32. package/.pi/extensions/lib/harness-spawn-budget.ts +75 -0
  33. package/.pi/extensions/lib/harness-subagent-auth.ts +123 -0
  34. package/.pi/extensions/lib/{harness-subagents/harness-subagent-policy.ts → harness-subagent-policy.ts} +34 -9
  35. package/.pi/extensions/lib/harness-subagent-precheck.ts +95 -0
  36. package/.pi/extensions/lib/harness-subagents-bridge.ts +176 -0
  37. package/.pi/extensions/lib/plan-approval/create-plan.ts +9 -7
  38. package/.pi/extensions/lib/plan-approval/plan-review.ts +393 -0
  39. package/.pi/extensions/lib/plan-approval/schema.ts +16 -1
  40. package/.pi/extensions/lib/plan-approval/types.ts +16 -0
  41. package/.pi/extensions/lib/plan-approval/validate.ts +2 -0
  42. package/.pi/extensions/lib/plan-debate-envelope.ts +84 -0
  43. package/.pi/extensions/lib/{harness-subagents/spawn-policy.ts → spawn-policy.ts} +2 -5
  44. package/.pi/extensions/policy-gate.ts +1 -1
  45. package/.pi/extensions/review-integrity.ts +48 -29
  46. package/.pi/extensions/ultimate-pi-vcc.ts +5 -0
  47. package/.pi/harness/agents.manifest.json +126 -82
  48. package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +7 -6
  49. package/.pi/harness/docs/adrs/0033-parent-orchestrated-planning.md +34 -0
  50. package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +41 -0
  51. package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +27 -0
  52. package/.pi/harness/docs/adrs/README.md +2 -0
  53. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml +25 -0
  54. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml +26 -0
  55. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml +5 -0
  56. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml +196 -0
  57. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md +14 -0
  58. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +32 -0
  59. package/.pi/harness/evals/smoke/run-context.fixture.json +1 -1
  60. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +88 -0
  61. package/.pi/harness/specs/README.md +1 -1
  62. package/.pi/harness/specs/harness-posthog-event.schema.json +6 -1
  63. package/.pi/harness/specs/harness-spawn-context.schema.json +2 -1
  64. package/.pi/harness/specs/plan-adversary-brief.schema.json +45 -0
  65. package/.pi/harness/specs/plan-decomposition-brief.schema.json +108 -0
  66. package/.pi/harness/specs/plan-execution-plan-brief.schema.json +13 -0
  67. package/.pi/harness/specs/plan-execution-plan.schema.json +255 -0
  68. package/.pi/harness/specs/plan-hypothesis-brief.schema.json +96 -0
  69. package/.pi/harness/specs/plan-hypothesis-eval.schema.json +61 -0
  70. package/.pi/harness/specs/plan-packet.schema.json +14 -5
  71. package/.pi/harness/specs/plan-review-round-draft.schema.json +68 -0
  72. package/.pi/harness/specs/plan-sprint-audit-turn.schema.json +29 -0
  73. package/.pi/harness/specs/plan-stack-brief.schema.json +65 -0
  74. package/.pi/harness/specs/plan-validation-turn.schema.json +42 -0
  75. package/.pi/harness/specs/round-result.schema.json +16 -9
  76. package/.pi/lib/debate-orchestrator-types.ts +38 -0
  77. package/.pi/lib/harness-agent-discovery.mjs +81 -0
  78. package/.pi/lib/harness-run-context.ts +76 -38
  79. package/.pi/lib/harness-yaml.mjs +73 -0
  80. package/.pi/lib/harness-yaml.ts +90 -0
  81. package/.pi/prompts/harness-auto.md +13 -11
  82. package/.pi/prompts/harness-critic.md +2 -2
  83. package/.pi/prompts/harness-eval.md +3 -3
  84. package/.pi/prompts/harness-incident.md +2 -2
  85. package/.pi/prompts/harness-plan.md +106 -37
  86. package/.pi/prompts/harness-review.md +2 -2
  87. package/.pi/prompts/harness-router-tune.md +1 -1
  88. package/.pi/prompts/harness-run.md +2 -2
  89. package/.pi/prompts/harness-setup.md +15 -6
  90. package/.pi/prompts/harness-trace.md +2 -2
  91. package/.pi/scripts/harness-agents-manifest.mjs +1 -1
  92. package/.pi/scripts/harness-resolve-up-pkg.mjs +13 -0
  93. package/.pi/scripts/harness-verify.mjs +28 -19
  94. package/.pi/scripts/validate-plan-dag.mjs +258 -0
  95. package/.pi/scripts/vendor-sync-pi-subagents.sh +19 -0
  96. package/CHANGELOG.md +24 -0
  97. package/THIRD_PARTY_NOTICES.md +8 -0
  98. package/biome.json +4 -1
  99. package/package.json +6 -4
  100. package/.pi/agents/harness/planner.md +0 -54
  101. package/.pi/extensions/lib/harness-subagents/agent-loader.ts +0 -126
  102. package/.pi/extensions/lib/harness-subagents/agent-manifest.ts +0 -119
  103. package/.pi/extensions/lib/harness-subagents/agent-parser.ts +0 -87
  104. package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +0 -118
  105. package/.pi/extensions/lib/harness-subagents/blackboard.ts +0 -175
  106. package/.pi/extensions/lib/harness-subagents/parent-ask-user-bridge.ts +0 -10
  107. package/.pi/extensions/lib/harness-subagents/parent-harness-ui-bridge.ts +0 -310
  108. package/.pi/extensions/lib/harness-subagents/parent-harness-ui-hooks.ts +0 -59
  109. package/.pi/extensions/lib/harness-subagents/types-blackboard.ts +0 -27
  110. package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +0 -558
  111. package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +0 -684
  112. package/.pi/extensions/lib/harness-subagents/vendored/agent-types.ts +0 -175
  113. package/.pi/extensions/lib/harness-subagents/vendored/context.ts +0 -59
  114. package/.pi/extensions/lib/harness-subagents/vendored/cross-extension-rpc.ts +0 -134
  115. package/.pi/extensions/lib/harness-subagents/vendored/custom-agents.ts +0 -5
  116. package/.pi/extensions/lib/harness-subagents/vendored/default-agents.ts +0 -123
  117. package/.pi/extensions/lib/harness-subagents/vendored/env.ts +0 -43
  118. package/.pi/extensions/lib/harness-subagents/vendored/group-join.ts +0 -144
  119. package/.pi/extensions/lib/harness-subagents/vendored/index.ts +0 -2494
  120. package/.pi/extensions/lib/harness-subagents/vendored/invocation-config.ts +0 -52
  121. package/.pi/extensions/lib/harness-subagents/vendored/memory.ts +0 -182
  122. package/.pi/extensions/lib/harness-subagents/vendored/model-resolver.ts +0 -92
  123. package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +0 -115
  124. package/.pi/extensions/lib/harness-subagents/vendored/prompts.ts +0 -103
  125. package/.pi/extensions/lib/harness-subagents/vendored/schedule-store.ts +0 -177
  126. package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +0 -416
  127. package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +0 -210
  128. package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +0 -108
  129. package/.pi/extensions/lib/harness-subagents/vendored/types.ts +0 -187
  130. package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +0 -639
  131. package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +0 -324
  132. package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +0 -110
  133. package/.pi/extensions/lib/harness-subagents/vendored/usage.ts +0 -71
  134. package/.pi/extensions/lib/harness-subagents/vendored/worktree.ts +0 -195
  135. /package/.pi/extensions/{00-ultimate-pi-system-prompt.ts → custom-system-prompt.ts} +0 -0
@@ -0,0 +1,41 @@
1
+ # ADR 0034: Darwin plan research pipeline
2
+
3
+ - **Status:** Accepted
4
+ - **Date:** 2026-05-17
5
+
6
+ ## Context
7
+
8
+ `/harness-plan` (ADR 0033) parent-orchestrated scouts and a single adversary before approval. Users need vague tasks transformed into rigorous, falsifiable hypotheses before execution plans are approved — not only codebase maps and scope bullets.
9
+
10
+ ## Decision
11
+
12
+ 1. **Always-on research chain** after parallel scouts:
13
+ - `harness/planning/decompose` — DeepMind-style problem decomposition (`PlanDecompositionBrief`)
14
+ - `harness/planning/hypothesis` — DARWIN hypothesis generation (`PlanHypothesisBrief`)
15
+ 2. **Parent maps hypothesis → PlanPacket** — `plan-packet.schema.json` unchanged; execution gating stable.
16
+ 3. **Parallel pre-approval reviews:**
17
+ - `harness/planning/plan-adversary` — execution risk on PlanPacket
18
+ - `harness/planning/hypothesis-eval` — blind self-eval (task + hypothesis only)
19
+ 4. **`approve_plan` optional `research_brief`** — rendered in `plan-review.md`; not written to `plan-packet.json`.
20
+ 5. **`--quick`** still skips semantic scout only; never skips decompose/hypothesis.
21
+
22
+ ## Consequences
23
+
24
+ ### Positive
25
+
26
+ - Plans grounded in explicit tensions, falsifiable claims, and experiments.
27
+ - Self-eval isolated from decomposition (reduces grade inflation).
28
+ - Editor review shows full research narrative plus PlanPacket.
29
+
30
+ ### Negative
31
+
32
+ - More subagent spawns per plan (scouts + decompose + hypothesis + 2 reviews; optional hypothesis revision).
33
+ - Longer plan phase latency and token cost.
34
+
35
+ ## References
36
+
37
+ - `.pi/prompts/harness-plan.md`
38
+ - `.pi/harness/specs/plan-decomposition-brief.schema.json`
39
+ - `.pi/harness/specs/plan-hypothesis-brief.schema.json`
40
+ - `.pi/harness/specs/plan-hypothesis-eval.schema.json`
41
+ - ADR 0033
@@ -0,0 +1,27 @@
1
+ # ADR-0035: Plan-phase Review Gate and YAML artifacts
2
+
3
+ ## Status
4
+
5
+ Accepted (2026-05-18)
6
+
7
+ ## Context
8
+
9
+ `/harness-plan` produced thin PlanPackets (scope + bullets). Post-execute adversarial review (`/harness-critic`) ran too late. Graphify corpus (Structured Planning, ADR-020, Generator–Evaluator) defines WBS, validation, and review gate before baseline.
10
+
11
+ ## Decision
12
+
13
+ 1. **PlanPacket 1.1.0** — required `execution_plan` (phases, work_items, sprint_contract, dag_validation).
14
+ 2. **YAML on disk** — `plan-packet.yaml`, `research-brief.yaml`, `run-context.yaml`, `artifacts/*.yaml`. JSON Schema unchanged; instances validated after YAML parse.
15
+ 3. **Review Gate agents** — `stack-researcher`, `execution-plan-author`, debate: `hypothesis-validator`, `plan-evaluator`, `plan-adversary`, `sprint-contract-auditor`, `review-integrator`.
16
+ 4. **Debate bus** — `debate_id=plan-<run_id>`, plan budget profile (4 rounds, 12k cap), plan-phase consensus prerequisites.
17
+ 5. **No legacy JSON** plan paths; no pre-debate standalone `hypothesis-eval`.
18
+
19
+ ## Consequences
20
+
21
+ - Positive: PM-grade plans, deterministic DAG gate, blind hypothesis eval in debate R1.
22
+ - Negative: Higher spawn/token cost; `harness-verify` and smoke fixtures must use `.yaml`.
23
+
24
+ ## References
25
+
26
+ - [ADR-0033](0033-parent-orchestrated-planning.md), [ADR-0034](0034-darwin-plan-research-pipeline.md)
27
+ - `raw/decisions/adr-020.md`, `raw/modules/structured-planning.md`
@@ -18,6 +18,8 @@ Team-shared ADRs for the ultimate-pi harness live under `.pi/harness/docs/adrs/`
18
18
  | [0030](0030-inhouse-vcc-compaction.md) | In-house VCC compaction (vendored pi-vcc) | Accepted |
19
19
  | [0031](0031-harness-run-context.md) | Harness active run context | Accepted |
20
20
  | [0032](0032-harness-command-orchestration.md) | Harness commands as agent orchestrators | Accepted |
21
+ | [0033](0033-parent-orchestrated-planning.md) | Parent-orchestrated harness planning | Accepted |
22
+ | [0034](0034-darwin-plan-research-pipeline.md) | Darwin plan research pipeline | Accepted |
21
23
 
22
24
  ## Template
23
25
 
@@ -0,0 +1,25 @@
1
+ schema_version: "1.0.0"
2
+ round_index: 1
3
+ debate_round_focus: spec
4
+ round_summary: Spec round passed for fixture
5
+ validation_summary: All spec checks pass
6
+ adversary_summary: No blocking adversarial findings
7
+ disputes: []
8
+ recommended_packet_patches: []
9
+ review_gate_ready: true
10
+ participants:
11
+ - PlanEvaluatorAgent
12
+ - PlanAdversaryAgent
13
+ - HypothesisValidatorAgent
14
+ - ReviewIntegratorAgent
15
+ claims:
16
+ - spec validation complete
17
+ rebuttals: []
18
+ evidence_refs: []
19
+ token_usage:
20
+ per_agent:
21
+ PlanEvaluatorAgent: 100
22
+ PlanAdversaryAgent: 100
23
+ ReviewIntegratorAgent: 50
24
+ round_total: 250
25
+ consensus_delta: 0.1
@@ -0,0 +1,26 @@
1
+ schema_version: "1.0.0"
2
+ round_index: 4
3
+ debate_round_focus: quality
4
+ round_summary: Quality and sprint contract round passed
5
+ validation_summary: Sprint contract complete
6
+ adversary_summary: No gaps
7
+ disputes: []
8
+ recommended_packet_patches: []
9
+ review_gate_ready: true
10
+ participants:
11
+ - PlanEvaluatorAgent
12
+ - PlanAdversaryAgent
13
+ - SprintContractAuditorAgent
14
+ - ReviewIntegratorAgent
15
+ claims:
16
+ - review gate ready
17
+ rebuttals: []
18
+ evidence_refs: []
19
+ token_usage:
20
+ per_agent:
21
+ PlanEvaluatorAgent: 120
22
+ PlanAdversaryAgent: 110
23
+ SprintContractAuditorAgent: 90
24
+ ReviewIntegratorAgent: 60
25
+ round_total: 380
26
+ consensus_delta: 0.15
@@ -0,0 +1,5 @@
1
+ schema_version: "1.0.0"
2
+ round_index: 4
3
+ gaps: []
4
+ recommendation: proceed
5
+ human_summary: Sprint contract satisfies ADR-020 for fixture
@@ -0,0 +1,196 @@
1
+ schema_version: "1.0.0"
2
+ contract_version: "1.1.0"
3
+ plan_id: plan-smoke-fixture-001
4
+ task_id: task-smoke-001
5
+ scope: Smoke fixture for plan-phase harness validation with execution_plan and debate artifacts.
6
+ assumptions:
7
+ - Fixture only; no live agent run
8
+ risk_level: med
9
+ acceptance_checks:
10
+ - id: AC-1
11
+ description: DAG validation passes
12
+ - id: AC-2
13
+ description: Four debate rounds recorded
14
+ - id: AC-3
15
+ description: Stack brief present in research-brief
16
+ - id: AC-4
17
+ description: Sprint contract complete
18
+ - id: AC-5
19
+ description: plan-review.md renders
20
+ rollback_plan:
21
+ revert_commit_ready: true
22
+ rollback_artifacts:
23
+ revert_command: git revert HEAD
24
+ revert_branch: main
25
+ patch_bundle: .pi/harness/runs/smoke-fixture/patch.bundle
26
+ execution_plan:
27
+ schema_version: "1.0.0"
28
+ phases:
29
+ - phase_id: P1
30
+ name: Foundation
31
+ objective: Establish baseline and verify harness wiring
32
+ entry_criteria:
33
+ - Fixture loaded
34
+ exit_criteria:
35
+ - AC-1 satisfied
36
+ milestone: M1-baseline
37
+ work_item_ids: [WI-1, WI-2, WI-3]
38
+ - phase_id: P2
39
+ name: Build
40
+ objective: Implement core changes
41
+ entry_criteria:
42
+ - M1-baseline complete
43
+ exit_criteria:
44
+ - AC-2 satisfied
45
+ milestone: M2-build
46
+ work_item_ids: [WI-4, WI-5, WI-6]
47
+ - phase_id: P3
48
+ name: Verify
49
+ objective: Quality gate and documentation
50
+ entry_criteria:
51
+ - M2-build complete
52
+ exit_criteria:
53
+ - AC-5 satisfied
54
+ milestone: M3-ship
55
+ work_item_ids: [WI-7, WI-8]
56
+ work_items:
57
+ - work_item_id: WI-1
58
+ phase_id: P1
59
+ title: Load fixture packet
60
+ description: Read plan-packet.yaml from fixture directory
61
+ depends_on: []
62
+ files:
63
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml
64
+ parallel_safe: true
65
+ done_criteria:
66
+ type: manual
67
+ spec: Fixture packet readable
68
+ acceptance_check_ids: [AC-1]
69
+ - work_item_id: WI-2
70
+ phase_id: P1
71
+ title: Run DAG validator
72
+ description: Execute validate-plan-dag.mjs
73
+ depends_on: [WI-1]
74
+ files:
75
+ - .pi/scripts/validate-plan-dag.mjs
76
+ parallel_safe: false
77
+ done_criteria:
78
+ type: command
79
+ spec: node .pi/scripts/validate-plan-dag.mjs --packet plan-packet.yaml
80
+ acceptance_check_ids: [AC-1]
81
+ - work_item_id: WI-3
82
+ phase_id: P1
83
+ title: Lint harness-yaml
84
+ description: Ensure YAML helpers parse fixture
85
+ depends_on: [WI-1]
86
+ files:
87
+ - .pi/lib/harness-yaml.ts
88
+ parallel_safe: true
89
+ done_criteria:
90
+ type: lint
91
+ spec: npm test
92
+ acceptance_check_ids: [AC-1]
93
+ - work_item_id: WI-4
94
+ phase_id: P2
95
+ title: Debate round 1-2 artifacts
96
+ description: Validate review-round YAML
97
+ depends_on: [WI-2]
98
+ files:
99
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml
100
+ parallel_safe: false
101
+ done_criteria:
102
+ type: artifact
103
+ spec: artifacts/review-round-r1.yaml exists
104
+ acceptance_check_ids: [AC-2]
105
+ - work_item_id: WI-5
106
+ phase_id: P2
107
+ title: Debate round 3-4 artifacts
108
+ description: Validate final review round
109
+ depends_on: [WI-4]
110
+ files:
111
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml
112
+ parallel_safe: false
113
+ done_criteria:
114
+ type: artifact
115
+ spec: artifacts/review-round-r4.yaml exists
116
+ acceptance_check_ids: [AC-2]
117
+ - work_item_id: WI-6
118
+ phase_id: P2
119
+ title: Stack research merge
120
+ description: research-brief includes stack section
121
+ depends_on: [WI-2]
122
+ files: []
123
+ non_code: true
124
+ parallel_safe: true
125
+ done_criteria:
126
+ type: manual
127
+ spec: research-brief.yaml contains stack key
128
+ acceptance_check_ids: [AC-3]
129
+ - work_item_id: WI-7
130
+ phase_id: P3
131
+ title: Sprint contract audit
132
+ description: R4 sprint audit artifact
133
+ depends_on: [WI-5]
134
+ files:
135
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml
136
+ parallel_safe: false
137
+ done_criteria:
138
+ type: artifact
139
+ spec: sprint-audit-r4.yaml present
140
+ acceptance_check_ids: [AC-4]
141
+ - work_item_id: WI-8
142
+ phase_id: P3
143
+ title: Render plan-review
144
+ description: Human-readable plan review markdown
145
+ depends_on: [WI-7]
146
+ files:
147
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md
148
+ parallel_safe: false
149
+ done_criteria:
150
+ type: manual
151
+ spec: plan-review.md non-empty
152
+ acceptance_check_ids: [AC-5]
153
+ sprint_contract:
154
+ in_scope:
155
+ - Fixture validation only
156
+ out_of_scope:
157
+ - Production deploy
158
+ definition_of_done: All smoke checks green
159
+ assumptions:
160
+ - CI environment has node
161
+ external_dependencies: []
162
+ wbs_dictionary:
163
+ - work_item_id: WI-1
164
+ deliverable: Fixture packet loaded
165
+ owner_role: executor
166
+ inputs: []
167
+ outputs: [parsed packet]
168
+ risk_register:
169
+ - risk_id: R1
170
+ description: DAG validator false negative
171
+ likelihood: low
172
+ impact: high
173
+ mitigation: Unit tests on validate-plan-dag.mjs
174
+ linked_work_item_ids: [WI-2]
175
+ - risk_id: R2
176
+ description: Debate cap misconfiguration
177
+ likelihood: med
178
+ impact: med
179
+ mitigation: debate-orchestrator plan profile tests
180
+ linked_work_item_ids: [WI-4]
181
+ - risk_id: R3
182
+ description: YAML parse drift
183
+ likelihood: low
184
+ impact: med
185
+ mitigation: harness-yaml strict parse
186
+ linked_work_item_ids: [WI-3]
187
+ schedule_metadata:
188
+ critical_path_work_item_ids: [WI-1, WI-2, WI-4, WI-5, WI-7, WI-8]
189
+ parallel_groups:
190
+ - [WI-1, WI-3]
191
+ schedule_baseline_note: Fixture topological order; no calendar dates
192
+ dag_validation:
193
+ status: pass
194
+ topological_order: [WI-1, WI-2, WI-3, WI-4, WI-5, WI-6, WI-7, WI-8]
195
+ cycles: []
196
+ conflicts: []
@@ -0,0 +1,14 @@
1
+ # Plan review (fixture)
2
+
3
+ plan_id: plan-smoke-fixture-001
4
+
5
+ ## Execution plan
6
+
7
+ Phases: P1 Foundation → P2 Build → P3 Verify
8
+
9
+ Critical path: WI-1 → WI-2 → WI-4 → WI-5 → WI-7 → WI-8
10
+
11
+ ## Debate
12
+
13
+ - Round 1 (spec): review_gate_ready
14
+ - Round 4 (quality): review_gate_ready
@@ -0,0 +1,32 @@
1
+ decomposition:
2
+ schema_version: "1.0.0"
3
+ problem_restatement: Validate plan-phase YAML and debate pipeline
4
+ hypothesis:
5
+ schema_version: "1.0.0"
6
+ primary:
7
+ claim: Fixture-driven smoke covers DAG and debate
8
+ mechanism: Static artifacts plus validate-plan-dag.mjs
9
+ prediction: CI passes without live agents
10
+ experiment: Run smoke-harness-plan.mjs --fixture
11
+ stack:
12
+ schema_version: "1.0.0"
13
+ problem_framing: Node harness tooling
14
+ constraints: []
15
+ options:
16
+ - name: extend current stack
17
+ category: brownfield
18
+ fit_summary: Use existing ultimate-pi harness
19
+ tradeoffs:
20
+ pros: [No new deps]
21
+ cons: []
22
+ risks: []
23
+ evidence_refs: []
24
+ recommendation_rank: 1
25
+ recommended_primary: extend current stack
26
+ rationale: Fixture validates in-repo harness
27
+ eval:
28
+ schema_version: "1.0.0"
29
+ revision_recommended: false
30
+ relevance:
31
+ passes: true
32
+ rationale: Hypothesis matches smoke task
@@ -5,7 +5,7 @@
5
5
  "project_root": "/tmp/ultimate-pi-smoke",
6
6
  "phase": "plan",
7
7
  "plan_id": null,
8
- "plan_packet_path": "/tmp/ultimate-pi-smoke/.pi/harness/runs/smoke-session-1/plan-packet.json",
8
+ "plan_packet_path": "/tmp/ultimate-pi-smoke/.pi/harness/runs/smoke-session-1/plan-packet.yaml",
9
9
  "plan_ready": false,
10
10
  "task_summary": "smoke task",
11
11
  "status": "active",
@@ -0,0 +1,88 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * smoke-harness-plan — fixture validation for plan-phase pipeline (CI).
4
+ * Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture
5
+ */
6
+
7
+ import { access, cp, mkdir, readFile, rm } from "node:fs/promises";
8
+ import { constants } from "node:fs";
9
+ import { dirname, join, resolve } from "node:path";
10
+ import { fileURLToPath } from "node:url";
11
+ import { parse as parseYaml } from "yaml";
12
+ import { validateExecutionPlan } from "../../../scripts/validate-plan-dag.mjs";
13
+
14
+ const ROOT = join(dirname(fileURLToPath(import.meta.url)), "..", "..", "..", "..");
15
+ const FIXTURE_DIR = join(dirname(fileURLToPath(import.meta.url)), "fixtures", "plan-phase");
16
+
17
+ function fail(msg) {
18
+ console.error(`smoke-harness-plan: FAIL: ${msg}`);
19
+ process.exit(1);
20
+ }
21
+
22
+ function ok(msg) {
23
+ console.log(` ✓ ${msg}`);
24
+ }
25
+
26
+ async function runFixture() {
27
+ const fixtureRoot = join(FIXTURE_DIR, "minimal-med");
28
+ try {
29
+ await access(fixtureRoot, constants.R_OK);
30
+ } catch {
31
+ fail(`missing fixture ${fixtureRoot}`);
32
+ }
33
+
34
+ const packetPath = join(fixtureRoot, "plan-packet.yaml");
35
+ const raw = await readFile(packetPath, "utf-8");
36
+ const packet = parseYaml(raw);
37
+ if (packet.contract_version !== "1.1.0") {
38
+ fail("fixture contract_version must be 1.1.0");
39
+ }
40
+ if (!packet.execution_plan) fail("fixture missing execution_plan");
41
+
42
+ const { status, errors } = validateExecutionPlan(packet, fixtureRoot);
43
+ if (status !== "pass") {
44
+ fail(`DAG validation failed: ${errors.join("; ")}`);
45
+ }
46
+ ok("fixture plan-packet.yaml DAG pass");
47
+
48
+ const reviewPath = join(fixtureRoot, "plan-review.md");
49
+ await access(reviewPath, constants.R_OK);
50
+ ok("plan-review.md present");
51
+
52
+ const debateRounds = ["review-round-r1.yaml", "review-round-r4.yaml"];
53
+ for (const name of debateRounds) {
54
+ const p = join(fixtureRoot, "artifacts", name);
55
+ await access(p, constants.R_OK);
56
+ const draft = parseYaml(await readFile(p, "utf-8"));
57
+ if (!draft.schema_version) fail(`${name} missing schema_version`);
58
+ }
59
+ ok("debate round YAML artifacts present");
60
+
61
+ const researchPath = join(fixtureRoot, "research-brief.yaml");
62
+ const research = parseYaml(await readFile(researchPath, "utf-8"));
63
+ if (!research.decomposition || !research.hypothesis) {
64
+ fail("research-brief.yaml missing decomposition/hypothesis");
65
+ }
66
+ ok("research-brief.yaml structure");
67
+
68
+ console.log("smoke-harness-plan: all fixture checks passed");
69
+ }
70
+
71
+ async function main() {
72
+ const args = process.argv.slice(2);
73
+ if (args.includes("--fixture")) {
74
+ await runFixture();
75
+ return;
76
+ }
77
+ if (args.includes("--live")) {
78
+ console.log(
79
+ "smoke-harness-plan: --live requires manual /harness-plan run; skipping in CI",
80
+ );
81
+ return;
82
+ }
83
+ fail("Usage: smoke-harness-plan.mjs --fixture | --live");
84
+ }
85
+
86
+ main().catch((err) => {
87
+ fail(err instanceof Error ? err.message : String(err));
88
+ });
@@ -13,7 +13,7 @@ This directory is the canonical contract surface for Phase 1 harness artifacts.
13
13
 
14
14
  These schemas define the minimum machine-readable contracts for:
15
15
 
16
- - planning (`PlanPacket`)
16
+ - planning (`PlanPacket`, `PlanDecompositionBrief`, `PlanHypothesisBrief`, `PlanHypothesisEval`, `PlanAdversaryBrief`)
17
17
  - execution telemetry (`RunTrace`, `HarnessRunRecord`)
18
18
  - PostHog harness events (`HarnessPostHogEvent`)
19
19
  - observation bus (`HarnessObservation`)
@@ -24,7 +24,12 @@
24
24
  "harness_drift_report",
25
25
  "harness_eval_verdict",
26
26
  "harness_sentrux_signal",
27
- "harness_observation"
27
+ "harness_observation",
28
+ "harness_subagent_spawned",
29
+ "harness_subagent_completed",
30
+ "harness_subagent_result_wait",
31
+ "harness_subagent_setup",
32
+ "harness_blackboard_op"
28
33
  ]
29
34
  },
30
35
  "distinct_id": {
@@ -14,13 +14,14 @@
14
14
  "agent": {
15
15
  "type": "string",
16
16
  "minLength": 1,
17
- "description": "Target subagent id, e.g. harness/planner"
17
+ "description": "Target subagent id, e.g. harness/planning/scout-graphify"
18
18
  },
19
19
  "mode": {
20
20
  "type": "string",
21
21
  "enum": [
22
22
  "create",
23
23
  "revise",
24
+ "plan_review",
24
25
  "execute",
25
26
  "benchmark",
26
27
  "verdict",
@@ -0,0 +1,45 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://ultimate-pi.local/.pi/harness/specs/plan-adversary-brief.schema.json",
4
+ "title": "PlanAdversaryBrief",
5
+ "description": "Pre-approval adversarial review of a draft PlanPacket (plan phase only).",
6
+ "type": "object",
7
+ "additionalProperties": false,
8
+ "required": [
9
+ "schema_version",
10
+ "edge_cases",
11
+ "failure_modes",
12
+ "acceptance_gaps",
13
+ "mitigations",
14
+ "recommendation"
15
+ ],
16
+ "properties": {
17
+ "schema_version": {
18
+ "type": "string",
19
+ "const": "1.0.0"
20
+ },
21
+ "edge_cases": {
22
+ "type": "array",
23
+ "items": { "type": "string", "minLength": 1 }
24
+ },
25
+ "failure_modes": {
26
+ "type": "array",
27
+ "items": { "type": "string", "minLength": 1 }
28
+ },
29
+ "acceptance_gaps": {
30
+ "type": "array",
31
+ "items": { "type": "string", "minLength": 1 }
32
+ },
33
+ "mitigations": {
34
+ "type": "array",
35
+ "items": { "type": "string", "minLength": 1 }
36
+ },
37
+ "recommendation": {
38
+ "type": "string",
39
+ "enum": ["proceed", "revise"]
40
+ },
41
+ "human_summary": {
42
+ "type": "string"
43
+ }
44
+ }
45
+ }
@@ -0,0 +1,108 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://ultimate-pi.local/.pi/harness/specs/plan-decomposition-brief.schema.json",
4
+ "title": "PlanDecompositionBrief",
5
+ "description": "DeepMind-style problem decomposition (harness plan phase 1).",
6
+ "type": "object",
7
+ "additionalProperties": false,
8
+ "required": [
9
+ "schema_version",
10
+ "problem_restatement",
11
+ "problem_types",
12
+ "scope",
13
+ "hard_constraints",
14
+ "soft_constraints",
15
+ "success_metrics",
16
+ "prior_art",
17
+ "tensions",
18
+ "core_tension"
19
+ ],
20
+ "properties": {
21
+ "schema_version": {
22
+ "type": "string",
23
+ "const": "1.0.0"
24
+ },
25
+ "problem_restatement": {
26
+ "type": "string",
27
+ "minLength": 1,
28
+ "description": "Precise restatement; what solving this looks like."
29
+ },
30
+ "problem_types": {
31
+ "type": "array",
32
+ "minItems": 1,
33
+ "items": {
34
+ "type": "string",
35
+ "enum": [
36
+ "optimization",
37
+ "discovery",
38
+ "explanation",
39
+ "design",
40
+ "selection"
41
+ ]
42
+ }
43
+ },
44
+ "scope": {
45
+ "type": "object",
46
+ "additionalProperties": false,
47
+ "required": ["narrowed_focus", "excluded"],
48
+ "properties": {
49
+ "narrowed_focus": {
50
+ "type": "string",
51
+ "minLength": 1
52
+ },
53
+ "excluded": {
54
+ "type": "array",
55
+ "items": { "type": "string", "minLength": 1 }
56
+ }
57
+ }
58
+ },
59
+ "hard_constraints": {
60
+ "type": "array",
61
+ "items": { "type": "string", "minLength": 1 }
62
+ },
63
+ "soft_constraints": {
64
+ "type": "array",
65
+ "items": { "type": "string", "minLength": 1 }
66
+ },
67
+ "success_metrics": {
68
+ "type": "array",
69
+ "minItems": 1,
70
+ "items": { "type": "string", "minLength": 1 }
71
+ },
72
+ "prior_art": {
73
+ "type": "object",
74
+ "additionalProperties": false,
75
+ "required": ["best_approach", "gap", "dead_ends"],
76
+ "properties": {
77
+ "best_approach": { "type": "string", "minLength": 1 },
78
+ "gap": { "type": "string", "minLength": 1 },
79
+ "dead_ends": {
80
+ "type": "array",
81
+ "items": { "type": "string", "minLength": 1 }
82
+ }
83
+ }
84
+ },
85
+ "tensions": {
86
+ "type": "array",
87
+ "minItems": 1,
88
+ "items": {
89
+ "type": "object",
90
+ "additionalProperties": false,
91
+ "required": ["claim_a", "claim_b", "why_matters"],
92
+ "properties": {
93
+ "claim_a": { "type": "string", "minLength": 1 },
94
+ "claim_b": { "type": "string", "minLength": 1 },
95
+ "why_matters": { "type": "string", "minLength": 1 }
96
+ }
97
+ }
98
+ },
99
+ "core_tension": {
100
+ "type": "string",
101
+ "minLength": 1,
102
+ "description": "One paragraph summarizing the tension that feeds hypothesis generation."
103
+ },
104
+ "human_summary": {
105
+ "type": "string"
106
+ }
107
+ }
108
+ }