ultimate-pi 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.pi/agents/harness/planning/hypothesis.md +1 -1
- package/.pi/agents/harness/planning/implementation-researcher.md +1 -1
- package/.pi/extensions/harness-debate-tools.ts +12 -3
- package/.pi/extensions/harness-run-context.ts +12 -0
- package/.pi/extensions/harness-subagent-submit.ts +2 -25
- package/.pi/extensions/harness-telemetry.ts +29 -4
- package/.pi/extensions/lib/debate-bus-core.ts +15 -9
- package/.pi/extensions/lib/harness-subagent-auth.ts +104 -19
- package/.pi/extensions/lib/harness-subagent-policy.ts +14 -0
- package/.pi/extensions/lib/harness-subagents-bridge.ts +85 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +61 -8
- package/.pi/extensions/lib/plan-debate-focus.ts +21 -9
- package/.pi/extensions/lib/plan-debate-gate.ts +80 -17
- package/.pi/extensions/lib/plan-debate-lanes.ts +27 -3
- package/.pi/extensions/lib/plan-debate-round-status.ts +18 -7
- package/.pi/extensions/lib/plan-messenger.ts +4 -0
- package/.pi/extensions/lib/plan-review-gate.ts +51 -0
- package/.pi/extensions/trace-recorder.ts +1 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/review-round-consolidated.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/research-brief.yaml +62 -0
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +40 -17
- package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
- package/.pi/model-router.example.json +13 -4
- package/.pi/prompts/harness-plan.md +25 -7
- package/.pi/prompts/harness-setup.md +4 -4
- package/.pi/scripts/harness-generate-model-router.mjs +118 -36
- package/.pi/scripts/harness-model-router-routing.test.mjs +97 -0
- package/.pi/scripts/harness-sync-model-router.mjs +15 -2
- package/.pi/scripts/harness-verify.mjs +29 -0
- package/CHANGELOG.md +11 -0
- package/package.json +1 -1
- package/vendor/pi-model-router/UPSTREAM_PIN.md +3 -1
- package/vendor/pi-model-router/extensions/commands.ts +4 -4
- package/vendor/pi-model-router/extensions/index.ts +21 -0
- package/vendor/pi-model-router/extensions/provider.ts +130 -79
- package/vendor/pi-model-router/extensions/routing.ts +148 -0
- package/vendor/pi-model-router/extensions/state.ts +3 -0
- package/vendor/pi-model-router/extensions/types.ts +9 -0
- package/vendor/pi-model-router/extensions/ui.ts +16 -2
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
contract_version: "1.1.0"
|
|
3
|
+
plan_id: plan-smoke-fixture-001
|
|
4
|
+
task_id: task-smoke-001
|
|
5
|
+
scope: Smoke fixture for plan-phase harness validation with execution_plan and debate artifacts.
|
|
6
|
+
assumptions:
|
|
7
|
+
- Fixture only; no live agent run
|
|
8
|
+
risk_level: med
|
|
9
|
+
acceptance_checks:
|
|
10
|
+
- id: AC-1
|
|
11
|
+
description: DAG validation passes
|
|
12
|
+
- id: AC-2
|
|
13
|
+
description: Consolidated debate round recorded (fast profile)
|
|
14
|
+
- id: AC-3
|
|
15
|
+
description: Stack brief present in research-brief
|
|
16
|
+
- id: AC-4
|
|
17
|
+
description: Sprint contract complete
|
|
18
|
+
- id: AC-5
|
|
19
|
+
description: plan-review.md renders
|
|
20
|
+
rollback_plan:
|
|
21
|
+
revert_commit_ready: true
|
|
22
|
+
rollback_artifacts:
|
|
23
|
+
revert_command: git revert HEAD
|
|
24
|
+
revert_branch: main
|
|
25
|
+
patch_bundle: .pi/harness/runs/smoke-fixture/patch.bundle
|
|
26
|
+
execution_plan:
|
|
27
|
+
schema_version: "1.0.0"
|
|
28
|
+
phases:
|
|
29
|
+
- phase_id: P1
|
|
30
|
+
name: Foundation
|
|
31
|
+
objective: Establish baseline and verify harness wiring
|
|
32
|
+
entry_criteria:
|
|
33
|
+
- Fixture loaded
|
|
34
|
+
exit_criteria:
|
|
35
|
+
- AC-1 satisfied
|
|
36
|
+
milestone: M1-baseline
|
|
37
|
+
work_item_ids: [WI-1, WI-2, WI-3]
|
|
38
|
+
- phase_id: P2
|
|
39
|
+
name: Build
|
|
40
|
+
objective: Implement core changes
|
|
41
|
+
entry_criteria:
|
|
42
|
+
- M1-baseline complete
|
|
43
|
+
exit_criteria:
|
|
44
|
+
- AC-2 satisfied
|
|
45
|
+
milestone: M2-build
|
|
46
|
+
work_item_ids: [WI-4, WI-5, WI-6]
|
|
47
|
+
- phase_id: P3
|
|
48
|
+
name: Verify
|
|
49
|
+
objective: Quality gate and documentation
|
|
50
|
+
entry_criteria:
|
|
51
|
+
- M2-build complete
|
|
52
|
+
exit_criteria:
|
|
53
|
+
- AC-5 satisfied
|
|
54
|
+
milestone: M3-ship
|
|
55
|
+
work_item_ids: [WI-7, WI-8]
|
|
56
|
+
work_items:
|
|
57
|
+
- work_item_id: WI-1
|
|
58
|
+
phase_id: P1
|
|
59
|
+
title: Load fixture packet
|
|
60
|
+
description: Read plan-packet.yaml from fixture directory
|
|
61
|
+
depends_on: []
|
|
62
|
+
files:
|
|
63
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml
|
|
64
|
+
parallel_safe: true
|
|
65
|
+
done_criteria:
|
|
66
|
+
type: manual
|
|
67
|
+
spec: Fixture packet readable
|
|
68
|
+
acceptance_check_ids: [AC-1]
|
|
69
|
+
- work_item_id: WI-2
|
|
70
|
+
phase_id: P1
|
|
71
|
+
title: Run DAG validator
|
|
72
|
+
description: Execute validate-plan-dag.mjs
|
|
73
|
+
depends_on: [WI-1]
|
|
74
|
+
files:
|
|
75
|
+
- .pi/scripts/validate-plan-dag.mjs
|
|
76
|
+
parallel_safe: false
|
|
77
|
+
done_criteria:
|
|
78
|
+
type: command
|
|
79
|
+
spec: node .pi/scripts/validate-plan-dag.mjs --packet plan-packet.yaml
|
|
80
|
+
acceptance_check_ids: [AC-1]
|
|
81
|
+
- work_item_id: WI-3
|
|
82
|
+
phase_id: P1
|
|
83
|
+
title: Lint harness-yaml
|
|
84
|
+
description: Ensure YAML helpers parse fixture
|
|
85
|
+
depends_on: [WI-1]
|
|
86
|
+
files:
|
|
87
|
+
- .pi/lib/harness-yaml.ts
|
|
88
|
+
parallel_safe: true
|
|
89
|
+
done_criteria:
|
|
90
|
+
type: lint
|
|
91
|
+
spec: npm test
|
|
92
|
+
acceptance_check_ids: [AC-1]
|
|
93
|
+
- work_item_id: WI-4
|
|
94
|
+
phase_id: P2
|
|
95
|
+
title: Debate round 1-2 artifacts
|
|
96
|
+
description: Validate review-round YAML
|
|
97
|
+
depends_on: [WI-2]
|
|
98
|
+
files:
|
|
99
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml
|
|
100
|
+
parallel_safe: false
|
|
101
|
+
done_criteria:
|
|
102
|
+
type: artifact
|
|
103
|
+
spec: artifacts/review-round-r1.yaml exists
|
|
104
|
+
acceptance_check_ids: [AC-2]
|
|
105
|
+
- work_item_id: WI-5
|
|
106
|
+
phase_id: P2
|
|
107
|
+
title: Debate round 3-4 artifacts
|
|
108
|
+
description: Validate final review round
|
|
109
|
+
depends_on: [WI-4]
|
|
110
|
+
files:
|
|
111
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml
|
|
112
|
+
parallel_safe: false
|
|
113
|
+
done_criteria:
|
|
114
|
+
type: artifact
|
|
115
|
+
spec: artifacts/review-round-r4.yaml exists
|
|
116
|
+
acceptance_check_ids: [AC-2]
|
|
117
|
+
- work_item_id: WI-6
|
|
118
|
+
phase_id: P2
|
|
119
|
+
title: Stack research merge
|
|
120
|
+
description: research-brief includes stack section
|
|
121
|
+
depends_on: [WI-2]
|
|
122
|
+
files: []
|
|
123
|
+
non_code: true
|
|
124
|
+
parallel_safe: true
|
|
125
|
+
done_criteria:
|
|
126
|
+
type: manual
|
|
127
|
+
spec: research-brief.yaml contains stack key
|
|
128
|
+
acceptance_check_ids: [AC-3]
|
|
129
|
+
- work_item_id: WI-7
|
|
130
|
+
phase_id: P3
|
|
131
|
+
title: Sprint contract audit
|
|
132
|
+
description: R4 sprint audit artifact
|
|
133
|
+
depends_on: [WI-5]
|
|
134
|
+
files:
|
|
135
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml
|
|
136
|
+
parallel_safe: false
|
|
137
|
+
done_criteria:
|
|
138
|
+
type: artifact
|
|
139
|
+
spec: sprint-audit-r4.yaml present
|
|
140
|
+
acceptance_check_ids: [AC-4]
|
|
141
|
+
- work_item_id: WI-8
|
|
142
|
+
phase_id: P3
|
|
143
|
+
title: Render plan-review
|
|
144
|
+
description: Human-readable plan review markdown
|
|
145
|
+
depends_on: [WI-7]
|
|
146
|
+
files:
|
|
147
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md
|
|
148
|
+
parallel_safe: false
|
|
149
|
+
done_criteria:
|
|
150
|
+
type: manual
|
|
151
|
+
spec: plan-review.md non-empty
|
|
152
|
+
acceptance_check_ids: [AC-5]
|
|
153
|
+
sprint_contract:
|
|
154
|
+
in_scope:
|
|
155
|
+
- Fixture validation only
|
|
156
|
+
out_of_scope:
|
|
157
|
+
- Production deploy
|
|
158
|
+
definition_of_done: All smoke checks green
|
|
159
|
+
assumptions:
|
|
160
|
+
- CI environment has node
|
|
161
|
+
external_dependencies: []
|
|
162
|
+
wbs_dictionary:
|
|
163
|
+
- work_item_id: WI-1
|
|
164
|
+
deliverable: Fixture packet loaded
|
|
165
|
+
owner_role: executor
|
|
166
|
+
inputs: []
|
|
167
|
+
outputs: [parsed packet]
|
|
168
|
+
risk_register:
|
|
169
|
+
- risk_id: R1
|
|
170
|
+
description: DAG validator false negative
|
|
171
|
+
likelihood: low
|
|
172
|
+
impact: high
|
|
173
|
+
mitigation: Unit tests on validate-plan-dag.mjs
|
|
174
|
+
linked_work_item_ids: [WI-2]
|
|
175
|
+
- risk_id: R2
|
|
176
|
+
description: Debate cap misconfiguration
|
|
177
|
+
likelihood: med
|
|
178
|
+
impact: med
|
|
179
|
+
mitigation: debate-orchestrator plan profile tests
|
|
180
|
+
linked_work_item_ids: [WI-4]
|
|
181
|
+
- risk_id: R3
|
|
182
|
+
description: YAML parse drift
|
|
183
|
+
likelihood: low
|
|
184
|
+
impact: med
|
|
185
|
+
mitigation: harness-yaml strict parse
|
|
186
|
+
linked_work_item_ids: [WI-3]
|
|
187
|
+
schedule_metadata:
|
|
188
|
+
critical_path_work_item_ids: [WI-1, WI-2, WI-4, WI-5, WI-7, WI-8]
|
|
189
|
+
parallel_groups:
|
|
190
|
+
- [WI-1, WI-3]
|
|
191
|
+
schedule_baseline_note: Fixture topological order; no calendar dates
|
|
192
|
+
dag_validation:
|
|
193
|
+
status: pass
|
|
194
|
+
topological_order: [WI-1, WI-2, WI-3, WI-4, WI-5, WI-6, WI-7, WI-8]
|
|
195
|
+
cycles: []
|
|
196
|
+
conflicts: []
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Plan review (fixture)
|
|
2
|
+
|
|
3
|
+
plan_id: plan-smoke-fixture-001
|
|
4
|
+
|
|
5
|
+
## Execution plan
|
|
6
|
+
|
|
7
|
+
Phases: P1 Foundation → P2 Build → P3 Verify
|
|
8
|
+
|
|
9
|
+
Critical path: WI-1 → WI-2 → WI-4 → WI-5 → WI-7 → WI-8
|
|
10
|
+
|
|
11
|
+
## Debate
|
|
12
|
+
|
|
13
|
+
- Round 1 (spec): review_gate_ready
|
|
14
|
+
- Round 4 (quality): review_gate_ready
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
decomposition:
|
|
2
|
+
schema_version: "1.0.0"
|
|
3
|
+
problem_restatement: Light-profile smoke for two-focus debate
|
|
4
|
+
hypothesis:
|
|
5
|
+
schema_version: "1.0.0"
|
|
6
|
+
primary:
|
|
7
|
+
claim: Light debate covers spec and quality only
|
|
8
|
+
mechanism: Eligibility profile light with min_focus_rounds 2
|
|
9
|
+
prediction: planDebateOutcomeComplete passes with two rounds
|
|
10
|
+
experiment: Run smoke-harness-plan.mjs --fixture minimal-low-light
|
|
11
|
+
implementation:
|
|
12
|
+
schema_version: "1.0.0"
|
|
13
|
+
problem_framing: Low-risk fixture for selective debate
|
|
14
|
+
sub_problems: [spec coverage, quality coverage]
|
|
15
|
+
internal_references:
|
|
16
|
+
- path: test/plan-debate-eligibility.test.mjs
|
|
17
|
+
relevance: Eligibility unit tests
|
|
18
|
+
reuse_signal: high
|
|
19
|
+
external_references: []
|
|
20
|
+
solution_patterns:
|
|
21
|
+
- name: light profile gate
|
|
22
|
+
provenance: ADR-0036
|
|
23
|
+
fit: Reduces debate cost on trivial tasks
|
|
24
|
+
tradeoffs:
|
|
25
|
+
pros: [Fewer rounds]
|
|
26
|
+
cons: []
|
|
27
|
+
risks: []
|
|
28
|
+
similar_implementations:
|
|
29
|
+
- name: minimal-med four-focus fixture
|
|
30
|
+
what_it_solves: Full debate coverage
|
|
31
|
+
gap_vs_us: Light uses two focuses only
|
|
32
|
+
recommended_approach:
|
|
33
|
+
summary: Two review rounds with spec then quality
|
|
34
|
+
recommended_approach_confidence: high
|
|
35
|
+
confidence_rationale: Deterministic fixture aligned with eligibility rules
|
|
36
|
+
evidence_refs:
|
|
37
|
+
- .pi/extensions/lib/plan-debate-eligibility.ts
|
|
38
|
+
- test/plan-debate-eligibility.test.mjs
|
|
39
|
+
anti_patterns: []
|
|
40
|
+
open_questions: []
|
|
41
|
+
stack:
|
|
42
|
+
schema_version: "1.0.0"
|
|
43
|
+
problem_framing: Node harness tooling
|
|
44
|
+
constraints: []
|
|
45
|
+
options:
|
|
46
|
+
- name: extend current stack
|
|
47
|
+
category: brownfield
|
|
48
|
+
fit_summary: Use existing ultimate-pi harness
|
|
49
|
+
tradeoffs:
|
|
50
|
+
pros: [No new deps]
|
|
51
|
+
cons: []
|
|
52
|
+
risks: []
|
|
53
|
+
evidence_refs: []
|
|
54
|
+
recommendation_rank: 1
|
|
55
|
+
recommended_primary: extend current stack
|
|
56
|
+
rationale: Fixture validates in-repo harness
|
|
57
|
+
eval:
|
|
58
|
+
schema_version: "1.0.0"
|
|
59
|
+
revision_recommended: false
|
|
60
|
+
relevance:
|
|
61
|
+
passes: true
|
|
62
|
+
rationale: Hypothesis matches light smoke task
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
3
|
* smoke-harness-plan — fixture validation for plan-phase pipeline (CI).
|
|
4
|
-
* Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light]
|
|
4
|
+
* Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light|minimal-med-fast]
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
import { access, readFile } from "node:fs/promises";
|
|
@@ -26,16 +26,23 @@ async function scanFocusCoverage(fixtureRoot, requiredFocus) {
|
|
|
26
26
|
let last_round_index = 0;
|
|
27
27
|
const { readdir } = await import("node:fs/promises");
|
|
28
28
|
const files = (await readdir(art)).filter((f) =>
|
|
29
|
-
/^review-round
|
|
29
|
+
/^review-round(?:-r\d+|-consolidated)\.yaml$/i.test(f),
|
|
30
30
|
);
|
|
31
31
|
for (const name of files.sort()) {
|
|
32
|
-
const
|
|
32
|
+
const consolidated = /^review-round-consolidated\.yaml$/i.test(name);
|
|
33
|
+
const m = consolidated
|
|
34
|
+
? ["review-round-consolidated.yaml", "1"]
|
|
35
|
+
: /^review-round-r(\d+)\.yaml$/i.exec(name);
|
|
33
36
|
if (!m) continue;
|
|
34
|
-
const roundIndex = Number(m[1]);
|
|
37
|
+
const roundIndex = consolidated ? 1 : Number(m[1]);
|
|
35
38
|
if (roundIndex > last_round_index) last_round_index = roundIndex;
|
|
36
39
|
const draft = parseYaml(await readFile(join(art, name), "utf-8"));
|
|
37
40
|
const focus = String(draft.debate_round_focus ?? "").trim();
|
|
38
|
-
if (
|
|
41
|
+
if (focus === "all") {
|
|
42
|
+
for (const f of requiredFocus) covered.add(f);
|
|
43
|
+
} else if (requiredFocus.includes(focus)) {
|
|
44
|
+
covered.add(focus);
|
|
45
|
+
}
|
|
39
46
|
if (roundIndex === last_round_index) {
|
|
40
47
|
last_review_gate_ready = draft.review_gate_ready === true;
|
|
41
48
|
}
|
|
@@ -110,22 +117,33 @@ async function runFixture(name) {
|
|
|
110
117
|
ok("research-brief.yaml structure");
|
|
111
118
|
|
|
112
119
|
const isLight = name === "minimal-low-light";
|
|
113
|
-
const
|
|
114
|
-
const
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
]
|
|
120
|
+
const isFast = name === "minimal-med-fast";
|
|
121
|
+
const requiredFocus =
|
|
122
|
+
isLight || isFast
|
|
123
|
+
? ["spec", "quality"]
|
|
124
|
+
: ["spec", "wbs", "schedule", "quality"];
|
|
125
|
+
const debateRounds = isFast
|
|
126
|
+
? ["review-round-consolidated.yaml"]
|
|
127
|
+
: isLight
|
|
128
|
+
? ["review-round-r1.yaml", "review-round-r2.yaml"]
|
|
129
|
+
: [
|
|
130
|
+
"review-round-r1.yaml",
|
|
131
|
+
"review-round-r2.yaml",
|
|
132
|
+
"review-round-r3.yaml",
|
|
133
|
+
"review-round-r4.yaml",
|
|
134
|
+
];
|
|
122
135
|
const seenFocus = new Set();
|
|
123
136
|
for (const fileName of debateRounds) {
|
|
124
137
|
const p = join(fixtureRoot, "artifacts", fileName);
|
|
125
138
|
await access(p, constants.R_OK);
|
|
126
139
|
const draft = parseYaml(await readFile(p, "utf-8"));
|
|
127
140
|
if (!draft.schema_version) fail(`${fileName} missing schema_version`);
|
|
128
|
-
|
|
141
|
+
const f = String(draft.debate_round_focus ?? "").trim();
|
|
142
|
+
if (f === "all") {
|
|
143
|
+
for (const req of requiredFocus) seenFocus.add(req);
|
|
144
|
+
} else if (f) {
|
|
145
|
+
seenFocus.add(f);
|
|
146
|
+
}
|
|
129
147
|
}
|
|
130
148
|
for (const focus of requiredFocus) {
|
|
131
149
|
if (!seenFocus.has(focus)) {
|
|
@@ -135,7 +153,7 @@ async function runFixture(name) {
|
|
|
135
153
|
ok(`debate round YAML artifacts (${requiredFocus.length} focuses)`);
|
|
136
154
|
|
|
137
155
|
const coverage = await scanFocusCoverage(fixtureRoot, requiredFocus);
|
|
138
|
-
const minRounds = isLight ? 2 : 4;
|
|
156
|
+
const minRounds = isFast ? 1 : isLight ? 2 : 4;
|
|
139
157
|
if (!planOutcomeComplete(coverage, requiredFocus, minRounds)) {
|
|
140
158
|
fail("debate outcome incomplete for fixture coverage");
|
|
141
159
|
}
|
|
@@ -144,6 +162,9 @@ async function runFixture(name) {
|
|
|
144
162
|
if (isLight && packet.risk_level !== "low") {
|
|
145
163
|
fail("minimal-low-light fixture must use risk_level low");
|
|
146
164
|
}
|
|
165
|
+
if (isFast && packet.risk_level !== "med") {
|
|
166
|
+
fail("minimal-med-fast fixture must use risk_level med");
|
|
167
|
+
}
|
|
147
168
|
|
|
148
169
|
console.log(`smoke-harness-plan: all ${name} fixture checks passed`);
|
|
149
170
|
}
|
|
@@ -161,7 +182,9 @@ async function main() {
|
|
|
161
182
|
);
|
|
162
183
|
return;
|
|
163
184
|
}
|
|
164
|
-
fail(
|
|
185
|
+
fail(
|
|
186
|
+
"Usage: smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light|minimal-med-fast] | --live",
|
|
187
|
+
);
|
|
165
188
|
}
|
|
166
189
|
|
|
167
190
|
main().catch((err) => {
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
"round_index": { "type": "integer", "minimum": 1, "maximum": 12 },
|
|
17
17
|
"debate_round_focus": {
|
|
18
18
|
"type": "string",
|
|
19
|
-
"enum": ["spec", "wbs", "schedule", "quality"]
|
|
19
|
+
"enum": ["spec", "wbs", "schedule", "quality", "all"]
|
|
20
20
|
},
|
|
21
21
|
"round_summary": { "type": "string", "minLength": 1 },
|
|
22
22
|
"validation_summary": { "type": "string" },
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"defaultProfile": "auto",
|
|
3
3
|
"debug": false,
|
|
4
|
-
"classifierModel": "
|
|
4
|
+
"classifierModel": "openai/gpt-5.4-nano",
|
|
5
5
|
"phaseBias": 0.5,
|
|
6
6
|
"maxSessionBudget": 1.0,
|
|
7
7
|
"largeContextThreshold": 100000,
|
|
@@ -16,12 +16,21 @@
|
|
|
16
16
|
"profiles": {
|
|
17
17
|
"auto": {
|
|
18
18
|
"high": {
|
|
19
|
-
"model": "
|
|
19
|
+
"model": "openai/gpt-5.5",
|
|
20
20
|
"thinking": "high",
|
|
21
|
-
"fallbacks": ["
|
|
21
|
+
"fallbacks": ["openai/gpt-5.4-nano"]
|
|
22
|
+
},
|
|
23
|
+
"medium": { "model": "openai/gpt-5.5", "thinking": "medium" },
|
|
24
|
+
"low": { "model": "openai/gpt-5.5", "thinking": "low" }
|
|
25
|
+
},
|
|
26
|
+
"opencode-go": {
|
|
27
|
+
"high": {
|
|
28
|
+
"model": "opencode-go/qwen3.6-plus",
|
|
29
|
+
"thinking": "high",
|
|
30
|
+
"fallbacks": ["opencode-go/deepseek-v4-flash"]
|
|
22
31
|
},
|
|
23
32
|
"medium": { "model": "opencode-go/qwen3.6-plus", "thinking": "medium" },
|
|
24
|
-
"low": { "model": "opencode-go/
|
|
33
|
+
"low": { "model": "opencode-go/qwen3.6-plus", "thinking": "low" }
|
|
25
34
|
}
|
|
26
35
|
}
|
|
27
36
|
}
|
|
@@ -138,11 +138,16 @@ harness_debate_open({ debate_profile, required_focuses })
|
|
|
138
138
|
|
|
139
139
|
Profiles:
|
|
140
140
|
|
|
141
|
-
| Profile | Focuses required | min_focus_rounds |
|
|
142
|
-
|
|
143
|
-
| full | spec, wbs, schedule, quality | 4 |
|
|
144
|
-
| standard | all four | 4 |
|
|
145
|
-
| light | spec, quality only | 2 |
|
|
141
|
+
| Profile | Review gate | Focuses required | min_focus_rounds |
|
|
142
|
+
|---------|-------------|------------------|------------------|
|
|
143
|
+
| full | threaded (4 rounds) | spec, wbs, schedule, quality | 4 |
|
|
144
|
+
| standard | threaded (4 rounds) | all four | 4 |
|
|
145
|
+
| light | threaded (2 rounds) | spec, quality only | 2 |
|
|
146
|
+
| fast | **consolidated** (1 round) | spec, quality | 1 |
|
|
147
|
+
|
|
148
|
+
Med/low non-fork plans with clear stack and no implementation `open_questions` default to **fast** (consolidated). Escalate to threaded rounds only when integrator sets `review_gate_ready: false` or records blockers.
|
|
149
|
+
|
|
150
|
+
`--quick`: skip scout-semantic; cap web research (≤2 searches, ≤3 fetches); prefer **fast** eligibility when DAG passes; use consolidated Review Gate when profile is fast.
|
|
146
151
|
|
|
147
152
|
## Phase 5 — Review Gate debate (profile-aware, pi-messenger, even with `--quick`)
|
|
148
153
|
|
|
@@ -153,13 +158,26 @@ Profiles:
|
|
|
153
158
|
|
|
154
159
|
### Focus coverage (required before consensus)
|
|
155
160
|
|
|
156
|
-
Each required focus must appear in
|
|
161
|
+
Each required focus must appear in submitted review artifacts (`review-round-rN.yaml` or `review-round-consolidated.yaml` with `debate_round_focus: all`). Monotonic `round_index` (cap from profile). Consensus only when:
|
|
157
162
|
|
|
158
163
|
- all **required** focuses covered, **and**
|
|
159
164
|
- last round `review_gate_ready: true`, **and**
|
|
160
165
|
- `validate-plan-dag.mjs` still passes (re-run after patches).
|
|
161
166
|
|
|
162
|
-
###
|
|
167
|
+
### Consolidated state machine (`review_gate_mode: consolidated`, profile fast)
|
|
168
|
+
|
|
169
|
+
```
|
|
170
|
+
round_index := 1
|
|
171
|
+
debate_round_focus := all
|
|
172
|
+
spawn hypothesis-validator (blind)
|
|
173
|
+
WHILE NOT ready_for_integrator (harness_debate_round_status round_index=1):
|
|
174
|
+
follow next_tool (validation-turn, adversary-brief, sprint-audit in parallel-friendly order; one subagent per batch)
|
|
175
|
+
spawn review-integrator → write artifacts/review-round-consolidated.yaml → harness_debate_submit_round
|
|
176
|
+
IF review_gate_ready false OR blockers: escalate — threaded round per missing focus (spec/wbs/schedule/quality)
|
|
177
|
+
harness_debate_focus_coverage → harness_debate_consensus
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### Threaded state machine (standard/full/light)
|
|
163
181
|
|
|
164
182
|
```
|
|
165
183
|
round_index := next uncovered required focus
|
|
@@ -327,7 +327,7 @@ sentrux plugin add-standard 2>/dev/null || echo "Plugins already installed or fa
|
|
|
327
327
|
|
|
328
328
|
## Step 3 — Pi Extension Packages
|
|
329
329
|
|
|
330
|
-
Bundled extensions load from the installed `ultimate-pi` package. **
|
|
330
|
+
Bundled extensions load from the installed `ultimate-pi` package. **Session-locked model routing** comes from a **vendored** fork of [`yeliu84/pi-model-router`](https://github.com/yeliu84/pi-model-router) in `vendor/pi-model-router/`, wired through [`.pi/extensions/pi-model-router-harness.ts`](.pi/extensions/pi-model-router-harness.ts). The router picks **one concrete model** when the session starts (from the first user prompt + system prompt complexity), then changes **thinking level only** each turn. The harness **gates** activation on `.pi/model-router.json` (Step **3.5** below) so `router/auto` cannot load prematurely. Attribution: see [THIRD_PARTY_NOTICES.md](THIRD_PARTY_NOTICES.md) and `vendor/pi-model-router/UPSTREAM_PIN.md`. Maintainer refresh: `npm run vendor:sync-router`.
|
|
331
331
|
|
|
332
332
|
Optionally install the companion lockfile used in development:
|
|
333
333
|
|
|
@@ -381,9 +381,9 @@ If generation prints "No authenticated Pi providers": warn in report — user sh
|
|
|
381
381
|
|
|
382
382
|
Do NOT block setup. If no config is written, `harness-sync-model-router.mjs` clears a premature `defaultProvider: "router"` in `.pi/settings.json`.
|
|
383
383
|
|
|
384
|
-
**Router onboarding** — The vendored extension starts only after `.pi/model-router.json` appears. Running the script above prepares that file plus optional Pi defaults (**`router` / `auto
|
|
384
|
+
**Router onboarding** — The vendored extension starts only after `.pi/model-router.json` appears. Running the script above prepares that file plus optional Pi defaults (**`router` / `auto`**, or whatever `defaultProfile` is) via `harness-sync-model-router.mjs` when `defaultProvider` was unset—then **`/reload`**. Generated profiles use **one model SKU per profile**; high/medium/low tiers differ in **thinking** only. Subagents resolve their subprocess model from the **agent system prompt** complexity (same lock rules).
|
|
385
385
|
|
|
386
|
-
Manual override: **`/router profile auto`** anytime after reload if they changed defaults.
|
|
386
|
+
Manual override: **`/router profile auto`** or **`/router profile opencode-go`** anytime after reload if they changed defaults.
|
|
387
387
|
|
|
388
388
|
## Step 3.6 — Harness agents (package-resolved)
|
|
389
389
|
|
|
@@ -677,7 +677,7 @@ Output summary table:
|
|
|
677
677
|
| sentrux | ✓/✗ | CLI + plugins; rules via Step 4.2 bootstrap |
|
|
678
678
|
| Sentrux rules.toml | ✓/✗ | `.sentrux/rules.toml` synced from manifest |
|
|
679
679
|
| pi extensions | ✓/✗ | 4 packages |
|
|
680
|
-
| model router | ✓/✗ | Package + config verified, activation via `/router profile auto` |
|
|
680
|
+
| model router | ✓/✗ | Package + config verified, activation via `/router profile auto` (or `opencode-go`) |
|
|
681
681
|
| `.env` | ✓/✗/ask | Created / keys appended / user declined |
|
|
682
682
|
|
|
683
683
|
| .gitignore | ✓/✗ | entries added (incl. `.env`) |
|