ultimate-pi 0.10.1 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-debate-plan/SKILL.md +44 -0
- package/.agents/skills/harness-decisions/SKILL.md +3 -3
- package/.agents/skills/harness-orchestration/SKILL.md +59 -25
- package/.agents/skills/harness-plan/SKILL.md +16 -15
- package/.pi/agents/harness/adversary.md +0 -1
- package/.pi/agents/harness/evaluator.md +0 -1
- package/.pi/agents/harness/executor.md +1 -2
- package/.pi/agents/harness/incident-recorder.md +0 -1
- package/.pi/agents/harness/meta-optimizer.md +0 -1
- package/.pi/agents/harness/planning/decompose.md +83 -0
- package/.pi/agents/harness/planning/execution-plan-author.md +30 -0
- package/.pi/agents/harness/planning/hypothesis-validator.md +23 -0
- package/.pi/agents/harness/planning/hypothesis.md +89 -0
- package/.pi/agents/harness/planning/plan-adversary.md +18 -0
- package/.pi/agents/harness/planning/plan-evaluator.md +18 -0
- package/.pi/agents/harness/planning/review-integrator.md +23 -0
- package/.pi/agents/harness/planning/scout-graphify.md +54 -0
- package/.pi/agents/harness/planning/scout-semantic.md +47 -0
- package/.pi/agents/harness/planning/scout-structure.md +50 -0
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +18 -0
- package/.pi/agents/harness/planning/stack-researcher.md +24 -0
- package/.pi/agents/harness/tie-breaker.md +0 -1
- package/.pi/agents/harness/trace-librarian.md +0 -1
- package/.pi/extensions/debate-orchestrator.ts +90 -53
- package/.pi/extensions/harness-ask-user.ts +5 -0
- package/.pi/extensions/harness-plan-approval.ts +137 -3
- package/.pi/extensions/harness-run-context.ts +146 -6
- package/.pi/extensions/harness-subagents.ts +10 -5
- package/.pi/extensions/harness-web-tools.ts +2 -0
- package/.pi/extensions/lib/extension-load-guard.ts +39 -0
- package/.pi/extensions/lib/harness-posthog.ts +6 -1
- package/.pi/extensions/lib/harness-spawn-budget.ts +75 -0
- package/.pi/extensions/lib/harness-subagent-auth.ts +123 -0
- package/.pi/extensions/lib/{harness-subagents/harness-subagent-policy.ts → harness-subagent-policy.ts} +34 -9
- package/.pi/extensions/lib/harness-subagent-precheck.ts +95 -0
- package/.pi/extensions/lib/harness-subagents-bridge.ts +176 -0
- package/.pi/extensions/lib/plan-approval/create-plan.ts +9 -7
- package/.pi/extensions/lib/plan-approval/plan-review.ts +393 -0
- package/.pi/extensions/lib/plan-approval/schema.ts +16 -1
- package/.pi/extensions/lib/plan-approval/types.ts +16 -0
- package/.pi/extensions/lib/plan-approval/validate.ts +2 -0
- package/.pi/extensions/lib/plan-debate-envelope.ts +84 -0
- package/.pi/extensions/lib/{harness-subagents/spawn-policy.ts → spawn-policy.ts} +2 -5
- package/.pi/extensions/policy-gate.ts +1 -1
- package/.pi/extensions/review-integrity.ts +48 -29
- package/.pi/extensions/ultimate-pi-vcc.ts +5 -0
- package/.pi/harness/agents.manifest.json +126 -82
- package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +7 -6
- package/.pi/harness/docs/adrs/0033-parent-orchestrated-planning.md +34 -0
- package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +41 -0
- package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +27 -0
- package/.pi/harness/docs/adrs/README.md +2 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml +26 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml +5 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +32 -0
- package/.pi/harness/evals/smoke/run-context.fixture.json +1 -1
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +88 -0
- package/.pi/harness/specs/README.md +1 -1
- package/.pi/harness/specs/harness-posthog-event.schema.json +6 -1
- package/.pi/harness/specs/harness-spawn-context.schema.json +2 -1
- package/.pi/harness/specs/plan-adversary-brief.schema.json +45 -0
- package/.pi/harness/specs/plan-decomposition-brief.schema.json +108 -0
- package/.pi/harness/specs/plan-execution-plan-brief.schema.json +13 -0
- package/.pi/harness/specs/plan-execution-plan.schema.json +255 -0
- package/.pi/harness/specs/plan-hypothesis-brief.schema.json +96 -0
- package/.pi/harness/specs/plan-hypothesis-eval.schema.json +61 -0
- package/.pi/harness/specs/plan-packet.schema.json +14 -5
- package/.pi/harness/specs/plan-review-round-draft.schema.json +68 -0
- package/.pi/harness/specs/plan-sprint-audit-turn.schema.json +29 -0
- package/.pi/harness/specs/plan-stack-brief.schema.json +65 -0
- package/.pi/harness/specs/plan-validation-turn.schema.json +42 -0
- package/.pi/harness/specs/round-result.schema.json +16 -9
- package/.pi/lib/debate-orchestrator-types.ts +38 -0
- package/.pi/lib/harness-agent-discovery.mjs +81 -0
- package/.pi/lib/harness-run-context.ts +76 -38
- package/.pi/lib/harness-yaml.mjs +73 -0
- package/.pi/lib/harness-yaml.ts +90 -0
- package/.pi/prompts/harness-auto.md +13 -11
- package/.pi/prompts/harness-critic.md +2 -2
- package/.pi/prompts/harness-eval.md +3 -3
- package/.pi/prompts/harness-incident.md +2 -2
- package/.pi/prompts/harness-plan.md +106 -37
- package/.pi/prompts/harness-review.md +2 -2
- package/.pi/prompts/harness-router-tune.md +1 -1
- package/.pi/prompts/harness-run.md +2 -2
- package/.pi/prompts/harness-setup.md +15 -6
- package/.pi/prompts/harness-trace.md +2 -2
- package/.pi/scripts/harness-agents-manifest.mjs +1 -1
- package/.pi/scripts/harness-resolve-up-pkg.mjs +13 -0
- package/.pi/scripts/harness-verify.mjs +28 -19
- package/.pi/scripts/validate-plan-dag.mjs +258 -0
- package/.pi/scripts/vendor-sync-pi-subagents.sh +19 -0
- package/CHANGELOG.md +24 -0
- package/THIRD_PARTY_NOTICES.md +8 -0
- package/biome.json +4 -1
- package/package.json +6 -4
- package/.pi/agents/harness/planner.md +0 -54
- package/.pi/extensions/lib/harness-subagents/agent-loader.ts +0 -126
- package/.pi/extensions/lib/harness-subagents/agent-manifest.ts +0 -119
- package/.pi/extensions/lib/harness-subagents/agent-parser.ts +0 -87
- package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +0 -118
- package/.pi/extensions/lib/harness-subagents/blackboard.ts +0 -175
- package/.pi/extensions/lib/harness-subagents/parent-ask-user-bridge.ts +0 -10
- package/.pi/extensions/lib/harness-subagents/parent-harness-ui-bridge.ts +0 -310
- package/.pi/extensions/lib/harness-subagents/parent-harness-ui-hooks.ts +0 -59
- package/.pi/extensions/lib/harness-subagents/types-blackboard.ts +0 -27
- package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +0 -558
- package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +0 -684
- package/.pi/extensions/lib/harness-subagents/vendored/agent-types.ts +0 -175
- package/.pi/extensions/lib/harness-subagents/vendored/context.ts +0 -59
- package/.pi/extensions/lib/harness-subagents/vendored/cross-extension-rpc.ts +0 -134
- package/.pi/extensions/lib/harness-subagents/vendored/custom-agents.ts +0 -5
- package/.pi/extensions/lib/harness-subagents/vendored/default-agents.ts +0 -123
- package/.pi/extensions/lib/harness-subagents/vendored/env.ts +0 -43
- package/.pi/extensions/lib/harness-subagents/vendored/group-join.ts +0 -144
- package/.pi/extensions/lib/harness-subagents/vendored/index.ts +0 -2494
- package/.pi/extensions/lib/harness-subagents/vendored/invocation-config.ts +0 -52
- package/.pi/extensions/lib/harness-subagents/vendored/memory.ts +0 -182
- package/.pi/extensions/lib/harness-subagents/vendored/model-resolver.ts +0 -92
- package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +0 -115
- package/.pi/extensions/lib/harness-subagents/vendored/prompts.ts +0 -103
- package/.pi/extensions/lib/harness-subagents/vendored/schedule-store.ts +0 -177
- package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +0 -416
- package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +0 -210
- package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +0 -108
- package/.pi/extensions/lib/harness-subagents/vendored/types.ts +0 -187
- package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +0 -639
- package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +0 -324
- package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +0 -110
- package/.pi/extensions/lib/harness-subagents/vendored/usage.ts +0 -71
- package/.pi/extensions/lib/harness-subagents/vendored/worktree.ts +0 -195
- /package/.pi/extensions/{00-ultimate-pi-system-prompt.ts → custom-system-prompt.ts} +0 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# ADR 0034: Darwin plan research pipeline
|
|
2
|
+
|
|
3
|
+
- **Status:** Accepted
|
|
4
|
+
- **Date:** 2026-05-17
|
|
5
|
+
|
|
6
|
+
## Context
|
|
7
|
+
|
|
8
|
+
`/harness-plan` (ADR 0033) parent-orchestrated scouts and a single adversary before approval. Users need vague tasks transformed into rigorous, falsifiable hypotheses before execution plans are approved — not only codebase maps and scope bullets.
|
|
9
|
+
|
|
10
|
+
## Decision
|
|
11
|
+
|
|
12
|
+
1. **Always-on research chain** after parallel scouts:
|
|
13
|
+
- `harness/planning/decompose` — DeepMind-style problem decomposition (`PlanDecompositionBrief`)
|
|
14
|
+
- `harness/planning/hypothesis` — DARWIN hypothesis generation (`PlanHypothesisBrief`)
|
|
15
|
+
2. **Parent maps hypothesis → PlanPacket** — `plan-packet.schema.json` unchanged; execution gating stable.
|
|
16
|
+
3. **Parallel pre-approval reviews:**
|
|
17
|
+
- `harness/planning/plan-adversary` — execution risk on PlanPacket
|
|
18
|
+
- `harness/planning/hypothesis-eval` — blind self-eval (task + hypothesis only)
|
|
19
|
+
4. **`approve_plan` optional `research_brief`** — rendered in `plan-review.md`; not written to `plan-packet.json`.
|
|
20
|
+
5. **`--quick`** still skips semantic scout only; never skips decompose/hypothesis.
|
|
21
|
+
|
|
22
|
+
## Consequences
|
|
23
|
+
|
|
24
|
+
### Positive
|
|
25
|
+
|
|
26
|
+
- Plans grounded in explicit tensions, falsifiable claims, and experiments.
|
|
27
|
+
- Self-eval isolated from decomposition (reduces grade inflation).
|
|
28
|
+
- Editor review shows full research narrative plus PlanPacket.
|
|
29
|
+
|
|
30
|
+
### Negative
|
|
31
|
+
|
|
32
|
+
- More subagent spawns per plan (scouts + decompose + hypothesis + 2 reviews; optional hypothesis revision).
|
|
33
|
+
- Longer plan phase latency and token cost.
|
|
34
|
+
|
|
35
|
+
## References
|
|
36
|
+
|
|
37
|
+
- `.pi/prompts/harness-plan.md`
|
|
38
|
+
- `.pi/harness/specs/plan-decomposition-brief.schema.json`
|
|
39
|
+
- `.pi/harness/specs/plan-hypothesis-brief.schema.json`
|
|
40
|
+
- `.pi/harness/specs/plan-hypothesis-eval.schema.json`
|
|
41
|
+
- ADR 0033
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# ADR-0035: Plan-phase Review Gate and YAML artifacts
|
|
2
|
+
|
|
3
|
+
## Status
|
|
4
|
+
|
|
5
|
+
Accepted (2026-05-18)
|
|
6
|
+
|
|
7
|
+
## Context
|
|
8
|
+
|
|
9
|
+
`/harness-plan` produced thin PlanPackets (scope + bullets). Post-execute adversarial review (`/harness-critic`) ran too late. Graphify corpus (Structured Planning, ADR-020, Generator–Evaluator) defines WBS, validation, and review gate before baseline.
|
|
10
|
+
|
|
11
|
+
## Decision
|
|
12
|
+
|
|
13
|
+
1. **PlanPacket 1.1.0** — required `execution_plan` (phases, work_items, sprint_contract, dag_validation).
|
|
14
|
+
2. **YAML on disk** — `plan-packet.yaml`, `research-brief.yaml`, `run-context.yaml`, `artifacts/*.yaml`. JSON Schema unchanged; instances validated after YAML parse.
|
|
15
|
+
3. **Review Gate agents** — `stack-researcher`, `execution-plan-author`, debate: `hypothesis-validator`, `plan-evaluator`, `plan-adversary`, `sprint-contract-auditor`, `review-integrator`.
|
|
16
|
+
4. **Debate bus** — `debate_id=plan-<run_id>`, plan budget profile (4 rounds, 12k cap), plan-phase consensus prerequisites.
|
|
17
|
+
5. **No legacy JSON** plan paths; no pre-debate standalone `hypothesis-eval`.
|
|
18
|
+
|
|
19
|
+
## Consequences
|
|
20
|
+
|
|
21
|
+
- Positive: PM-grade plans, deterministic DAG gate, blind hypothesis eval in debate R1.
|
|
22
|
+
- Negative: Higher spawn/token cost; `harness-verify` and smoke fixtures must use `.yaml`.
|
|
23
|
+
|
|
24
|
+
## References
|
|
25
|
+
|
|
26
|
+
- [ADR-0033](0033-parent-orchestrated-planning.md), [ADR-0034](0034-darwin-plan-research-pipeline.md)
|
|
27
|
+
- `raw/decisions/adr-020.md`, `raw/modules/structured-planning.md`
|
|
@@ -18,6 +18,8 @@ Team-shared ADRs for the ultimate-pi harness live under `.pi/harness/docs/adrs/`
|
|
|
18
18
|
| [0030](0030-inhouse-vcc-compaction.md) | In-house VCC compaction (vendored pi-vcc) | Accepted |
|
|
19
19
|
| [0031](0031-harness-run-context.md) | Harness active run context | Accepted |
|
|
20
20
|
| [0032](0032-harness-command-orchestration.md) | Harness commands as agent orchestrators | Accepted |
|
|
21
|
+
| [0033](0033-parent-orchestrated-planning.md) | Parent-orchestrated harness planning | Accepted |
|
|
22
|
+
| [0034](0034-darwin-plan-research-pipeline.md) | Darwin plan research pipeline | Accepted |
|
|
21
23
|
|
|
22
24
|
## Template
|
|
23
25
|
|
package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
round_index: 1
|
|
3
|
+
debate_round_focus: spec
|
|
4
|
+
round_summary: Spec round passed for fixture
|
|
5
|
+
validation_summary: All spec checks pass
|
|
6
|
+
adversary_summary: No blocking adversarial findings
|
|
7
|
+
disputes: []
|
|
8
|
+
recommended_packet_patches: []
|
|
9
|
+
review_gate_ready: true
|
|
10
|
+
participants:
|
|
11
|
+
- PlanEvaluatorAgent
|
|
12
|
+
- PlanAdversaryAgent
|
|
13
|
+
- HypothesisValidatorAgent
|
|
14
|
+
- ReviewIntegratorAgent
|
|
15
|
+
claims:
|
|
16
|
+
- spec validation complete
|
|
17
|
+
rebuttals: []
|
|
18
|
+
evidence_refs: []
|
|
19
|
+
token_usage:
|
|
20
|
+
per_agent:
|
|
21
|
+
PlanEvaluatorAgent: 100
|
|
22
|
+
PlanAdversaryAgent: 100
|
|
23
|
+
ReviewIntegratorAgent: 50
|
|
24
|
+
round_total: 250
|
|
25
|
+
consensus_delta: 0.1
|
package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
round_index: 4
|
|
3
|
+
debate_round_focus: quality
|
|
4
|
+
round_summary: Quality and sprint contract round passed
|
|
5
|
+
validation_summary: Sprint contract complete
|
|
6
|
+
adversary_summary: No gaps
|
|
7
|
+
disputes: []
|
|
8
|
+
recommended_packet_patches: []
|
|
9
|
+
review_gate_ready: true
|
|
10
|
+
participants:
|
|
11
|
+
- PlanEvaluatorAgent
|
|
12
|
+
- PlanAdversaryAgent
|
|
13
|
+
- SprintContractAuditorAgent
|
|
14
|
+
- ReviewIntegratorAgent
|
|
15
|
+
claims:
|
|
16
|
+
- review gate ready
|
|
17
|
+
rebuttals: []
|
|
18
|
+
evidence_refs: []
|
|
19
|
+
token_usage:
|
|
20
|
+
per_agent:
|
|
21
|
+
PlanEvaluatorAgent: 120
|
|
22
|
+
PlanAdversaryAgent: 110
|
|
23
|
+
SprintContractAuditorAgent: 90
|
|
24
|
+
ReviewIntegratorAgent: 60
|
|
25
|
+
round_total: 380
|
|
26
|
+
consensus_delta: 0.15
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
contract_version: "1.1.0"
|
|
3
|
+
plan_id: plan-smoke-fixture-001
|
|
4
|
+
task_id: task-smoke-001
|
|
5
|
+
scope: Smoke fixture for plan-phase harness validation with execution_plan and debate artifacts.
|
|
6
|
+
assumptions:
|
|
7
|
+
- Fixture only; no live agent run
|
|
8
|
+
risk_level: med
|
|
9
|
+
acceptance_checks:
|
|
10
|
+
- id: AC-1
|
|
11
|
+
description: DAG validation passes
|
|
12
|
+
- id: AC-2
|
|
13
|
+
description: Four debate rounds recorded
|
|
14
|
+
- id: AC-3
|
|
15
|
+
description: Stack brief present in research-brief
|
|
16
|
+
- id: AC-4
|
|
17
|
+
description: Sprint contract complete
|
|
18
|
+
- id: AC-5
|
|
19
|
+
description: plan-review.md renders
|
|
20
|
+
rollback_plan:
|
|
21
|
+
revert_commit_ready: true
|
|
22
|
+
rollback_artifacts:
|
|
23
|
+
revert_command: git revert HEAD
|
|
24
|
+
revert_branch: main
|
|
25
|
+
patch_bundle: .pi/harness/runs/smoke-fixture/patch.bundle
|
|
26
|
+
execution_plan:
|
|
27
|
+
schema_version: "1.0.0"
|
|
28
|
+
phases:
|
|
29
|
+
- phase_id: P1
|
|
30
|
+
name: Foundation
|
|
31
|
+
objective: Establish baseline and verify harness wiring
|
|
32
|
+
entry_criteria:
|
|
33
|
+
- Fixture loaded
|
|
34
|
+
exit_criteria:
|
|
35
|
+
- AC-1 satisfied
|
|
36
|
+
milestone: M1-baseline
|
|
37
|
+
work_item_ids: [WI-1, WI-2, WI-3]
|
|
38
|
+
- phase_id: P2
|
|
39
|
+
name: Build
|
|
40
|
+
objective: Implement core changes
|
|
41
|
+
entry_criteria:
|
|
42
|
+
- M1-baseline complete
|
|
43
|
+
exit_criteria:
|
|
44
|
+
- AC-2 satisfied
|
|
45
|
+
milestone: M2-build
|
|
46
|
+
work_item_ids: [WI-4, WI-5, WI-6]
|
|
47
|
+
- phase_id: P3
|
|
48
|
+
name: Verify
|
|
49
|
+
objective: Quality gate and documentation
|
|
50
|
+
entry_criteria:
|
|
51
|
+
- M2-build complete
|
|
52
|
+
exit_criteria:
|
|
53
|
+
- AC-5 satisfied
|
|
54
|
+
milestone: M3-ship
|
|
55
|
+
work_item_ids: [WI-7, WI-8]
|
|
56
|
+
work_items:
|
|
57
|
+
- work_item_id: WI-1
|
|
58
|
+
phase_id: P1
|
|
59
|
+
title: Load fixture packet
|
|
60
|
+
description: Read plan-packet.yaml from fixture directory
|
|
61
|
+
depends_on: []
|
|
62
|
+
files:
|
|
63
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml
|
|
64
|
+
parallel_safe: true
|
|
65
|
+
done_criteria:
|
|
66
|
+
type: manual
|
|
67
|
+
spec: Fixture packet readable
|
|
68
|
+
acceptance_check_ids: [AC-1]
|
|
69
|
+
- work_item_id: WI-2
|
|
70
|
+
phase_id: P1
|
|
71
|
+
title: Run DAG validator
|
|
72
|
+
description: Execute validate-plan-dag.mjs
|
|
73
|
+
depends_on: [WI-1]
|
|
74
|
+
files:
|
|
75
|
+
- .pi/scripts/validate-plan-dag.mjs
|
|
76
|
+
parallel_safe: false
|
|
77
|
+
done_criteria:
|
|
78
|
+
type: command
|
|
79
|
+
spec: node .pi/scripts/validate-plan-dag.mjs --packet plan-packet.yaml
|
|
80
|
+
acceptance_check_ids: [AC-1]
|
|
81
|
+
- work_item_id: WI-3
|
|
82
|
+
phase_id: P1
|
|
83
|
+
title: Lint harness-yaml
|
|
84
|
+
description: Ensure YAML helpers parse fixture
|
|
85
|
+
depends_on: [WI-1]
|
|
86
|
+
files:
|
|
87
|
+
- .pi/lib/harness-yaml.ts
|
|
88
|
+
parallel_safe: true
|
|
89
|
+
done_criteria:
|
|
90
|
+
type: lint
|
|
91
|
+
spec: npm test
|
|
92
|
+
acceptance_check_ids: [AC-1]
|
|
93
|
+
- work_item_id: WI-4
|
|
94
|
+
phase_id: P2
|
|
95
|
+
title: Debate round 1-2 artifacts
|
|
96
|
+
description: Validate review-round YAML
|
|
97
|
+
depends_on: [WI-2]
|
|
98
|
+
files:
|
|
99
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml
|
|
100
|
+
parallel_safe: false
|
|
101
|
+
done_criteria:
|
|
102
|
+
type: artifact
|
|
103
|
+
spec: artifacts/review-round-r1.yaml exists
|
|
104
|
+
acceptance_check_ids: [AC-2]
|
|
105
|
+
- work_item_id: WI-5
|
|
106
|
+
phase_id: P2
|
|
107
|
+
title: Debate round 3-4 artifacts
|
|
108
|
+
description: Validate final review round
|
|
109
|
+
depends_on: [WI-4]
|
|
110
|
+
files:
|
|
111
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml
|
|
112
|
+
parallel_safe: false
|
|
113
|
+
done_criteria:
|
|
114
|
+
type: artifact
|
|
115
|
+
spec: artifacts/review-round-r4.yaml exists
|
|
116
|
+
acceptance_check_ids: [AC-2]
|
|
117
|
+
- work_item_id: WI-6
|
|
118
|
+
phase_id: P2
|
|
119
|
+
title: Stack research merge
|
|
120
|
+
description: research-brief includes stack section
|
|
121
|
+
depends_on: [WI-2]
|
|
122
|
+
files: []
|
|
123
|
+
non_code: true
|
|
124
|
+
parallel_safe: true
|
|
125
|
+
done_criteria:
|
|
126
|
+
type: manual
|
|
127
|
+
spec: research-brief.yaml contains stack key
|
|
128
|
+
acceptance_check_ids: [AC-3]
|
|
129
|
+
- work_item_id: WI-7
|
|
130
|
+
phase_id: P3
|
|
131
|
+
title: Sprint contract audit
|
|
132
|
+
description: R4 sprint audit artifact
|
|
133
|
+
depends_on: [WI-5]
|
|
134
|
+
files:
|
|
135
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml
|
|
136
|
+
parallel_safe: false
|
|
137
|
+
done_criteria:
|
|
138
|
+
type: artifact
|
|
139
|
+
spec: sprint-audit-r4.yaml present
|
|
140
|
+
acceptance_check_ids: [AC-4]
|
|
141
|
+
- work_item_id: WI-8
|
|
142
|
+
phase_id: P3
|
|
143
|
+
title: Render plan-review
|
|
144
|
+
description: Human-readable plan review markdown
|
|
145
|
+
depends_on: [WI-7]
|
|
146
|
+
files:
|
|
147
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md
|
|
148
|
+
parallel_safe: false
|
|
149
|
+
done_criteria:
|
|
150
|
+
type: manual
|
|
151
|
+
spec: plan-review.md non-empty
|
|
152
|
+
acceptance_check_ids: [AC-5]
|
|
153
|
+
sprint_contract:
|
|
154
|
+
in_scope:
|
|
155
|
+
- Fixture validation only
|
|
156
|
+
out_of_scope:
|
|
157
|
+
- Production deploy
|
|
158
|
+
definition_of_done: All smoke checks green
|
|
159
|
+
assumptions:
|
|
160
|
+
- CI environment has node
|
|
161
|
+
external_dependencies: []
|
|
162
|
+
wbs_dictionary:
|
|
163
|
+
- work_item_id: WI-1
|
|
164
|
+
deliverable: Fixture packet loaded
|
|
165
|
+
owner_role: executor
|
|
166
|
+
inputs: []
|
|
167
|
+
outputs: [parsed packet]
|
|
168
|
+
risk_register:
|
|
169
|
+
- risk_id: R1
|
|
170
|
+
description: DAG validator false negative
|
|
171
|
+
likelihood: low
|
|
172
|
+
impact: high
|
|
173
|
+
mitigation: Unit tests on validate-plan-dag.mjs
|
|
174
|
+
linked_work_item_ids: [WI-2]
|
|
175
|
+
- risk_id: R2
|
|
176
|
+
description: Debate cap misconfiguration
|
|
177
|
+
likelihood: med
|
|
178
|
+
impact: med
|
|
179
|
+
mitigation: debate-orchestrator plan profile tests
|
|
180
|
+
linked_work_item_ids: [WI-4]
|
|
181
|
+
- risk_id: R3
|
|
182
|
+
description: YAML parse drift
|
|
183
|
+
likelihood: low
|
|
184
|
+
impact: med
|
|
185
|
+
mitigation: harness-yaml strict parse
|
|
186
|
+
linked_work_item_ids: [WI-3]
|
|
187
|
+
schedule_metadata:
|
|
188
|
+
critical_path_work_item_ids: [WI-1, WI-2, WI-4, WI-5, WI-7, WI-8]
|
|
189
|
+
parallel_groups:
|
|
190
|
+
- [WI-1, WI-3]
|
|
191
|
+
schedule_baseline_note: Fixture topological order; no calendar dates
|
|
192
|
+
dag_validation:
|
|
193
|
+
status: pass
|
|
194
|
+
topological_order: [WI-1, WI-2, WI-3, WI-4, WI-5, WI-6, WI-7, WI-8]
|
|
195
|
+
cycles: []
|
|
196
|
+
conflicts: []
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Plan review (fixture)
|
|
2
|
+
|
|
3
|
+
plan_id: plan-smoke-fixture-001
|
|
4
|
+
|
|
5
|
+
## Execution plan
|
|
6
|
+
|
|
7
|
+
Phases: P1 Foundation → P2 Build → P3 Verify
|
|
8
|
+
|
|
9
|
+
Critical path: WI-1 → WI-2 → WI-4 → WI-5 → WI-7 → WI-8
|
|
10
|
+
|
|
11
|
+
## Debate
|
|
12
|
+
|
|
13
|
+
- Round 1 (spec): review_gate_ready
|
|
14
|
+
- Round 4 (quality): review_gate_ready
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
decomposition:
|
|
2
|
+
schema_version: "1.0.0"
|
|
3
|
+
problem_restatement: Validate plan-phase YAML and debate pipeline
|
|
4
|
+
hypothesis:
|
|
5
|
+
schema_version: "1.0.0"
|
|
6
|
+
primary:
|
|
7
|
+
claim: Fixture-driven smoke covers DAG and debate
|
|
8
|
+
mechanism: Static artifacts plus validate-plan-dag.mjs
|
|
9
|
+
prediction: CI passes without live agents
|
|
10
|
+
experiment: Run smoke-harness-plan.mjs --fixture
|
|
11
|
+
stack:
|
|
12
|
+
schema_version: "1.0.0"
|
|
13
|
+
problem_framing: Node harness tooling
|
|
14
|
+
constraints: []
|
|
15
|
+
options:
|
|
16
|
+
- name: extend current stack
|
|
17
|
+
category: brownfield
|
|
18
|
+
fit_summary: Use existing ultimate-pi harness
|
|
19
|
+
tradeoffs:
|
|
20
|
+
pros: [No new deps]
|
|
21
|
+
cons: []
|
|
22
|
+
risks: []
|
|
23
|
+
evidence_refs: []
|
|
24
|
+
recommendation_rank: 1
|
|
25
|
+
recommended_primary: extend current stack
|
|
26
|
+
rationale: Fixture validates in-repo harness
|
|
27
|
+
eval:
|
|
28
|
+
schema_version: "1.0.0"
|
|
29
|
+
revision_recommended: false
|
|
30
|
+
relevance:
|
|
31
|
+
passes: true
|
|
32
|
+
rationale: Hypothesis matches smoke task
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"project_root": "/tmp/ultimate-pi-smoke",
|
|
6
6
|
"phase": "plan",
|
|
7
7
|
"plan_id": null,
|
|
8
|
-
"plan_packet_path": "/tmp/ultimate-pi-smoke/.pi/harness/runs/smoke-session-1/plan-packet.
|
|
8
|
+
"plan_packet_path": "/tmp/ultimate-pi-smoke/.pi/harness/runs/smoke-session-1/plan-packet.yaml",
|
|
9
9
|
"plan_ready": false,
|
|
10
10
|
"task_summary": "smoke task",
|
|
11
11
|
"status": "active",
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* smoke-harness-plan — fixture validation for plan-phase pipeline (CI).
|
|
4
|
+
* Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { access, cp, mkdir, readFile, rm } from "node:fs/promises";
|
|
8
|
+
import { constants } from "node:fs";
|
|
9
|
+
import { dirname, join, resolve } from "node:path";
|
|
10
|
+
import { fileURLToPath } from "node:url";
|
|
11
|
+
import { parse as parseYaml } from "yaml";
|
|
12
|
+
import { validateExecutionPlan } from "../../../scripts/validate-plan-dag.mjs";
|
|
13
|
+
|
|
14
|
+
const ROOT = join(dirname(fileURLToPath(import.meta.url)), "..", "..", "..", "..");
|
|
15
|
+
const FIXTURE_DIR = join(dirname(fileURLToPath(import.meta.url)), "fixtures", "plan-phase");
|
|
16
|
+
|
|
17
|
+
function fail(msg) {
|
|
18
|
+
console.error(`smoke-harness-plan: FAIL: ${msg}`);
|
|
19
|
+
process.exit(1);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function ok(msg) {
|
|
23
|
+
console.log(` ✓ ${msg}`);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
async function runFixture() {
|
|
27
|
+
const fixtureRoot = join(FIXTURE_DIR, "minimal-med");
|
|
28
|
+
try {
|
|
29
|
+
await access(fixtureRoot, constants.R_OK);
|
|
30
|
+
} catch {
|
|
31
|
+
fail(`missing fixture ${fixtureRoot}`);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const packetPath = join(fixtureRoot, "plan-packet.yaml");
|
|
35
|
+
const raw = await readFile(packetPath, "utf-8");
|
|
36
|
+
const packet = parseYaml(raw);
|
|
37
|
+
if (packet.contract_version !== "1.1.0") {
|
|
38
|
+
fail("fixture contract_version must be 1.1.0");
|
|
39
|
+
}
|
|
40
|
+
if (!packet.execution_plan) fail("fixture missing execution_plan");
|
|
41
|
+
|
|
42
|
+
const { status, errors } = validateExecutionPlan(packet, fixtureRoot);
|
|
43
|
+
if (status !== "pass") {
|
|
44
|
+
fail(`DAG validation failed: ${errors.join("; ")}`);
|
|
45
|
+
}
|
|
46
|
+
ok("fixture plan-packet.yaml DAG pass");
|
|
47
|
+
|
|
48
|
+
const reviewPath = join(fixtureRoot, "plan-review.md");
|
|
49
|
+
await access(reviewPath, constants.R_OK);
|
|
50
|
+
ok("plan-review.md present");
|
|
51
|
+
|
|
52
|
+
const debateRounds = ["review-round-r1.yaml", "review-round-r4.yaml"];
|
|
53
|
+
for (const name of debateRounds) {
|
|
54
|
+
const p = join(fixtureRoot, "artifacts", name);
|
|
55
|
+
await access(p, constants.R_OK);
|
|
56
|
+
const draft = parseYaml(await readFile(p, "utf-8"));
|
|
57
|
+
if (!draft.schema_version) fail(`${name} missing schema_version`);
|
|
58
|
+
}
|
|
59
|
+
ok("debate round YAML artifacts present");
|
|
60
|
+
|
|
61
|
+
const researchPath = join(fixtureRoot, "research-brief.yaml");
|
|
62
|
+
const research = parseYaml(await readFile(researchPath, "utf-8"));
|
|
63
|
+
if (!research.decomposition || !research.hypothesis) {
|
|
64
|
+
fail("research-brief.yaml missing decomposition/hypothesis");
|
|
65
|
+
}
|
|
66
|
+
ok("research-brief.yaml structure");
|
|
67
|
+
|
|
68
|
+
console.log("smoke-harness-plan: all fixture checks passed");
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async function main() {
|
|
72
|
+
const args = process.argv.slice(2);
|
|
73
|
+
if (args.includes("--fixture")) {
|
|
74
|
+
await runFixture();
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
if (args.includes("--live")) {
|
|
78
|
+
console.log(
|
|
79
|
+
"smoke-harness-plan: --live requires manual /harness-plan run; skipping in CI",
|
|
80
|
+
);
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
fail("Usage: smoke-harness-plan.mjs --fixture | --live");
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
main().catch((err) => {
|
|
87
|
+
fail(err instanceof Error ? err.message : String(err));
|
|
88
|
+
});
|
|
@@ -13,7 +13,7 @@ This directory is the canonical contract surface for Phase 1 harness artifacts.
|
|
|
13
13
|
|
|
14
14
|
These schemas define the minimum machine-readable contracts for:
|
|
15
15
|
|
|
16
|
-
- planning (`PlanPacket`)
|
|
16
|
+
- planning (`PlanPacket`, `PlanDecompositionBrief`, `PlanHypothesisBrief`, `PlanHypothesisEval`, `PlanAdversaryBrief`)
|
|
17
17
|
- execution telemetry (`RunTrace`, `HarnessRunRecord`)
|
|
18
18
|
- PostHog harness events (`HarnessPostHogEvent`)
|
|
19
19
|
- observation bus (`HarnessObservation`)
|
|
@@ -24,7 +24,12 @@
|
|
|
24
24
|
"harness_drift_report",
|
|
25
25
|
"harness_eval_verdict",
|
|
26
26
|
"harness_sentrux_signal",
|
|
27
|
-
"harness_observation"
|
|
27
|
+
"harness_observation",
|
|
28
|
+
"harness_subagent_spawned",
|
|
29
|
+
"harness_subagent_completed",
|
|
30
|
+
"harness_subagent_result_wait",
|
|
31
|
+
"harness_subagent_setup",
|
|
32
|
+
"harness_blackboard_op"
|
|
28
33
|
]
|
|
29
34
|
},
|
|
30
35
|
"distinct_id": {
|
|
@@ -14,13 +14,14 @@
|
|
|
14
14
|
"agent": {
|
|
15
15
|
"type": "string",
|
|
16
16
|
"minLength": 1,
|
|
17
|
-
"description": "Target subagent id, e.g. harness/
|
|
17
|
+
"description": "Target subagent id, e.g. harness/planning/scout-graphify"
|
|
18
18
|
},
|
|
19
19
|
"mode": {
|
|
20
20
|
"type": "string",
|
|
21
21
|
"enum": [
|
|
22
22
|
"create",
|
|
23
23
|
"revise",
|
|
24
|
+
"plan_review",
|
|
24
25
|
"execute",
|
|
25
26
|
"benchmark",
|
|
26
27
|
"verdict",
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://ultimate-pi.local/.pi/harness/specs/plan-adversary-brief.schema.json",
|
|
4
|
+
"title": "PlanAdversaryBrief",
|
|
5
|
+
"description": "Pre-approval adversarial review of a draft PlanPacket (plan phase only).",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"additionalProperties": false,
|
|
8
|
+
"required": [
|
|
9
|
+
"schema_version",
|
|
10
|
+
"edge_cases",
|
|
11
|
+
"failure_modes",
|
|
12
|
+
"acceptance_gaps",
|
|
13
|
+
"mitigations",
|
|
14
|
+
"recommendation"
|
|
15
|
+
],
|
|
16
|
+
"properties": {
|
|
17
|
+
"schema_version": {
|
|
18
|
+
"type": "string",
|
|
19
|
+
"const": "1.0.0"
|
|
20
|
+
},
|
|
21
|
+
"edge_cases": {
|
|
22
|
+
"type": "array",
|
|
23
|
+
"items": { "type": "string", "minLength": 1 }
|
|
24
|
+
},
|
|
25
|
+
"failure_modes": {
|
|
26
|
+
"type": "array",
|
|
27
|
+
"items": { "type": "string", "minLength": 1 }
|
|
28
|
+
},
|
|
29
|
+
"acceptance_gaps": {
|
|
30
|
+
"type": "array",
|
|
31
|
+
"items": { "type": "string", "minLength": 1 }
|
|
32
|
+
},
|
|
33
|
+
"mitigations": {
|
|
34
|
+
"type": "array",
|
|
35
|
+
"items": { "type": "string", "minLength": 1 }
|
|
36
|
+
},
|
|
37
|
+
"recommendation": {
|
|
38
|
+
"type": "string",
|
|
39
|
+
"enum": ["proceed", "revise"]
|
|
40
|
+
},
|
|
41
|
+
"human_summary": {
|
|
42
|
+
"type": "string"
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://ultimate-pi.local/.pi/harness/specs/plan-decomposition-brief.schema.json",
|
|
4
|
+
"title": "PlanDecompositionBrief",
|
|
5
|
+
"description": "DeepMind-style problem decomposition (harness plan phase 1).",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"additionalProperties": false,
|
|
8
|
+
"required": [
|
|
9
|
+
"schema_version",
|
|
10
|
+
"problem_restatement",
|
|
11
|
+
"problem_types",
|
|
12
|
+
"scope",
|
|
13
|
+
"hard_constraints",
|
|
14
|
+
"soft_constraints",
|
|
15
|
+
"success_metrics",
|
|
16
|
+
"prior_art",
|
|
17
|
+
"tensions",
|
|
18
|
+
"core_tension"
|
|
19
|
+
],
|
|
20
|
+
"properties": {
|
|
21
|
+
"schema_version": {
|
|
22
|
+
"type": "string",
|
|
23
|
+
"const": "1.0.0"
|
|
24
|
+
},
|
|
25
|
+
"problem_restatement": {
|
|
26
|
+
"type": "string",
|
|
27
|
+
"minLength": 1,
|
|
28
|
+
"description": "Precise restatement; what solving this looks like."
|
|
29
|
+
},
|
|
30
|
+
"problem_types": {
|
|
31
|
+
"type": "array",
|
|
32
|
+
"minItems": 1,
|
|
33
|
+
"items": {
|
|
34
|
+
"type": "string",
|
|
35
|
+
"enum": [
|
|
36
|
+
"optimization",
|
|
37
|
+
"discovery",
|
|
38
|
+
"explanation",
|
|
39
|
+
"design",
|
|
40
|
+
"selection"
|
|
41
|
+
]
|
|
42
|
+
}
|
|
43
|
+
},
|
|
44
|
+
"scope": {
|
|
45
|
+
"type": "object",
|
|
46
|
+
"additionalProperties": false,
|
|
47
|
+
"required": ["narrowed_focus", "excluded"],
|
|
48
|
+
"properties": {
|
|
49
|
+
"narrowed_focus": {
|
|
50
|
+
"type": "string",
|
|
51
|
+
"minLength": 1
|
|
52
|
+
},
|
|
53
|
+
"excluded": {
|
|
54
|
+
"type": "array",
|
|
55
|
+
"items": { "type": "string", "minLength": 1 }
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
},
|
|
59
|
+
"hard_constraints": {
|
|
60
|
+
"type": "array",
|
|
61
|
+
"items": { "type": "string", "minLength": 1 }
|
|
62
|
+
},
|
|
63
|
+
"soft_constraints": {
|
|
64
|
+
"type": "array",
|
|
65
|
+
"items": { "type": "string", "minLength": 1 }
|
|
66
|
+
},
|
|
67
|
+
"success_metrics": {
|
|
68
|
+
"type": "array",
|
|
69
|
+
"minItems": 1,
|
|
70
|
+
"items": { "type": "string", "minLength": 1 }
|
|
71
|
+
},
|
|
72
|
+
"prior_art": {
|
|
73
|
+
"type": "object",
|
|
74
|
+
"additionalProperties": false,
|
|
75
|
+
"required": ["best_approach", "gap", "dead_ends"],
|
|
76
|
+
"properties": {
|
|
77
|
+
"best_approach": { "type": "string", "minLength": 1 },
|
|
78
|
+
"gap": { "type": "string", "minLength": 1 },
|
|
79
|
+
"dead_ends": {
|
|
80
|
+
"type": "array",
|
|
81
|
+
"items": { "type": "string", "minLength": 1 }
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
},
|
|
85
|
+
"tensions": {
|
|
86
|
+
"type": "array",
|
|
87
|
+
"minItems": 1,
|
|
88
|
+
"items": {
|
|
89
|
+
"type": "object",
|
|
90
|
+
"additionalProperties": false,
|
|
91
|
+
"required": ["claim_a", "claim_b", "why_matters"],
|
|
92
|
+
"properties": {
|
|
93
|
+
"claim_a": { "type": "string", "minLength": 1 },
|
|
94
|
+
"claim_b": { "type": "string", "minLength": 1 },
|
|
95
|
+
"why_matters": { "type": "string", "minLength": 1 }
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
},
|
|
99
|
+
"core_tension": {
|
|
100
|
+
"type": "string",
|
|
101
|
+
"minLength": 1,
|
|
102
|
+
"description": "One paragraph summarizing the tension that feeds hypothesis generation."
|
|
103
|
+
},
|
|
104
|
+
"human_summary": {
|
|
105
|
+
"type": "string"
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|