ultimate-pi 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.pi/agents/harness/planning/hypothesis.md +1 -1
- package/.pi/agents/harness/planning/implementation-researcher.md +1 -1
- package/.pi/extensions/harness-debate-tools.ts +12 -3
- package/.pi/extensions/harness-run-context.ts +12 -0
- package/.pi/extensions/harness-subagent-submit.ts +2 -25
- package/.pi/extensions/harness-telemetry.ts +29 -4
- package/.pi/extensions/lib/debate-bus-core.ts +15 -9
- package/.pi/extensions/lib/harness-subagent-auth.ts +104 -19
- package/.pi/extensions/lib/harness-subagent-policy.ts +14 -0
- package/.pi/extensions/lib/harness-subagents-bridge.ts +85 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +61 -8
- package/.pi/extensions/lib/plan-debate-focus.ts +21 -9
- package/.pi/extensions/lib/plan-debate-gate.ts +80 -17
- package/.pi/extensions/lib/plan-debate-lanes.ts +27 -3
- package/.pi/extensions/lib/plan-debate-round-status.ts +18 -7
- package/.pi/extensions/lib/plan-messenger.ts +4 -0
- package/.pi/extensions/lib/plan-review-gate.ts +51 -0
- package/.pi/extensions/trace-recorder.ts +1 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/review-round-consolidated.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/research-brief.yaml +62 -0
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +40 -17
- package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
- package/.pi/model-router.example.json +13 -4
- package/.pi/prompts/harness-plan.md +25 -7
- package/.pi/prompts/harness-setup.md +4 -4
- package/.pi/scripts/harness-generate-model-router.mjs +118 -36
- package/.pi/scripts/harness-model-router-routing.test.mjs +97 -0
- package/.pi/scripts/harness-sync-model-router.mjs +15 -2
- package/.pi/scripts/harness-verify.mjs +29 -0
- package/CHANGELOG.md +11 -0
- package/package.json +1 -1
- package/vendor/pi-model-router/UPSTREAM_PIN.md +3 -1
- package/vendor/pi-model-router/extensions/commands.ts +4 -4
- package/vendor/pi-model-router/extensions/index.ts +21 -0
- package/vendor/pi-model-router/extensions/provider.ts +130 -79
- package/vendor/pi-model-router/extensions/routing.ts +148 -0
- package/vendor/pi-model-router/extensions/state.ts +3 -0
- package/vendor/pi-model-router/extensions/types.ts +9 -0
- package/vendor/pi-model-router/extensions/ui.ts +16 -2
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
import { PLAN_FOCUS_AREAS, type PlanDebateFocus } from "./plan-debate-focus.js";
|
|
6
6
|
|
|
7
|
-
export type DebateProfile = "full" | "standard" | "light";
|
|
7
|
+
export type DebateProfile = "full" | "standard" | "light" | "fast";
|
|
8
8
|
|
|
9
9
|
export interface DebateEligibilityInput {
|
|
10
10
|
risk_level?: string;
|
|
@@ -26,6 +26,7 @@ export interface DebateEligibilityResult {
|
|
|
26
26
|
debate_global_cap: number;
|
|
27
27
|
human_required: boolean;
|
|
28
28
|
rationale: string[];
|
|
29
|
+
review_gate_strategy: PlanReviewGateStrategy;
|
|
29
30
|
}
|
|
30
31
|
|
|
31
32
|
const LIGHT_FOCUS: PlanDebateFocus[] = ["spec", "quality"];
|
|
@@ -75,7 +76,7 @@ function confidenceAllowsLight(brief: Record<string, unknown> | null): boolean {
|
|
|
75
76
|
if (!rationale || refs.length < 2) return false;
|
|
76
77
|
if (implementationOpenQuestions(brief).length > 0) return false;
|
|
77
78
|
const patterns = Array.isArray(brief?.solution_patterns)
|
|
78
|
-
? (brief
|
|
79
|
+
? (brief?.solution_patterns as unknown[])
|
|
79
80
|
: [];
|
|
80
81
|
for (const p of patterns) {
|
|
81
82
|
const pat = asRecord(p);
|
|
@@ -85,7 +86,7 @@ function confidenceAllowsLight(brief: Record<string, unknown> | null): boolean {
|
|
|
85
86
|
}
|
|
86
87
|
}
|
|
87
88
|
const similar = Array.isArray(brief?.similar_implementations)
|
|
88
|
-
? (brief
|
|
89
|
+
? (brief?.similar_implementations as unknown[])
|
|
89
90
|
: [];
|
|
90
91
|
if (similar.length === 0) return false;
|
|
91
92
|
return true;
|
|
@@ -116,17 +117,46 @@ export const PLAN_BUDGET_LIGHT = {
|
|
|
116
117
|
debate_global_cap: 40000,
|
|
117
118
|
} as const;
|
|
118
119
|
|
|
120
|
+
export const PLAN_BUDGET_FAST = {
|
|
121
|
+
min_focus_rounds: 1,
|
|
122
|
+
max_rounds: 2,
|
|
123
|
+
max_exchanges_per_round: 1,
|
|
124
|
+
round_token_cap: 3500,
|
|
125
|
+
debate_global_cap: 20000,
|
|
126
|
+
} as const;
|
|
127
|
+
|
|
128
|
+
export interface PlanReviewGateStrategy {
|
|
129
|
+
mode: "consolidated" | "threaded";
|
|
130
|
+
profile: DebateProfile;
|
|
131
|
+
required_focuses: PlanDebateFocus[];
|
|
132
|
+
min_focus_rounds: number;
|
|
133
|
+
max_rounds: number;
|
|
134
|
+
max_exchanges_per_round: number;
|
|
135
|
+
round_token_cap: number;
|
|
136
|
+
debate_global_cap: number;
|
|
137
|
+
rationale: string[];
|
|
138
|
+
}
|
|
139
|
+
|
|
119
140
|
function capsForProfile(
|
|
120
141
|
profile: DebateProfile,
|
|
121
142
|
): Omit<
|
|
122
143
|
DebateEligibilityResult,
|
|
123
|
-
|
|
144
|
+
| "profile"
|
|
145
|
+
| "required_focuses"
|
|
146
|
+
| "human_required"
|
|
147
|
+
| "rationale"
|
|
148
|
+
| "review_gate_strategy"
|
|
124
149
|
> {
|
|
125
150
|
if (profile === "light") {
|
|
126
151
|
return {
|
|
127
152
|
...PLAN_BUDGET_LIGHT,
|
|
128
153
|
};
|
|
129
154
|
}
|
|
155
|
+
if (profile === "fast") {
|
|
156
|
+
return {
|
|
157
|
+
...PLAN_BUDGET_FAST,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
130
160
|
return {
|
|
131
161
|
...PLAN_BUDGET_STANDARD,
|
|
132
162
|
};
|
|
@@ -161,7 +191,7 @@ export function harnessPlanDebateEligibility(
|
|
|
161
191
|
|
|
162
192
|
const conflictingPatterns =
|
|
163
193
|
Array.isArray(impl?.solution_patterns) &&
|
|
164
|
-
(impl
|
|
194
|
+
(impl?.solution_patterns as unknown[]).length >= 2 &&
|
|
165
195
|
openQs.length > 0;
|
|
166
196
|
if (conflictingPatterns) {
|
|
167
197
|
human_required = true;
|
|
@@ -182,6 +212,18 @@ export function harnessPlanDebateEligibility(
|
|
|
182
212
|
rationale.push(
|
|
183
213
|
"full: high risk, material fork, open questions, DAG patch, or tensions",
|
|
184
214
|
);
|
|
215
|
+
} else if (
|
|
216
|
+
risk === "med" &&
|
|
217
|
+
!materialFork &&
|
|
218
|
+
!dagPatched &&
|
|
219
|
+
input.dag_pass !== false &&
|
|
220
|
+
openQs.length === 0 &&
|
|
221
|
+
stackHasClearPrimary(stack)
|
|
222
|
+
) {
|
|
223
|
+
profile = "fast";
|
|
224
|
+
rationale.push(
|
|
225
|
+
"fast: medium risk with clear stack and no open questions; use consolidated review with escalation on blockers",
|
|
226
|
+
);
|
|
185
227
|
} else if (
|
|
186
228
|
risk === "low" &&
|
|
187
229
|
!materialFork &&
|
|
@@ -190,9 +232,9 @@ export function harnessPlanDebateEligibility(
|
|
|
190
232
|
confidenceAllowsLight(impl) &&
|
|
191
233
|
stackHasClearPrimary(stack)
|
|
192
234
|
) {
|
|
193
|
-
profile = "
|
|
235
|
+
profile = "fast";
|
|
194
236
|
rationale.push(
|
|
195
|
-
"
|
|
237
|
+
"fast: low risk, clear stack, high-confidence implementation approach",
|
|
196
238
|
);
|
|
197
239
|
} else if (risk === "med") {
|
|
198
240
|
profile = "standard";
|
|
@@ -200,7 +242,7 @@ export function harnessPlanDebateEligibility(
|
|
|
200
242
|
}
|
|
201
243
|
|
|
202
244
|
const required_focuses: PlanDebateFocus[] =
|
|
203
|
-
profile === "
|
|
245
|
+
profile === "fast" ? [...LIGHT_FOCUS] : [...PLAN_FOCUS_AREAS];
|
|
204
246
|
|
|
205
247
|
const caps = capsForProfile(profile);
|
|
206
248
|
|
|
@@ -210,5 +252,16 @@ export function harnessPlanDebateEligibility(
|
|
|
210
252
|
...caps,
|
|
211
253
|
human_required,
|
|
212
254
|
rationale,
|
|
255
|
+
review_gate_strategy: {
|
|
256
|
+
mode: profile === "fast" ? "consolidated" : "threaded",
|
|
257
|
+
profile,
|
|
258
|
+
required_focuses: [...required_focuses],
|
|
259
|
+
min_focus_rounds: caps.min_focus_rounds,
|
|
260
|
+
max_rounds: caps.max_rounds,
|
|
261
|
+
max_exchanges_per_round: caps.max_exchanges_per_round,
|
|
262
|
+
round_token_cap: caps.round_token_cap,
|
|
263
|
+
debate_global_cap: caps.debate_global_cap,
|
|
264
|
+
rationale: [...rationale],
|
|
265
|
+
},
|
|
213
266
|
};
|
|
214
267
|
}
|
|
@@ -9,12 +9,13 @@ import { parse as parseYaml } from "yaml";
|
|
|
9
9
|
|
|
10
10
|
export const PLAN_FOCUS_AREAS = ["spec", "wbs", "schedule", "quality"] as const;
|
|
11
11
|
export type PlanDebateFocus = (typeof PLAN_FOCUS_AREAS)[number];
|
|
12
|
+
export type PlanDebateRoundFocus = PlanDebateFocus | "all";
|
|
12
13
|
|
|
13
14
|
export interface PlanFocusCoverage {
|
|
14
15
|
covered: PlanDebateFocus[];
|
|
15
16
|
missing: PlanDebateFocus[];
|
|
16
17
|
rounds_by_focus: Partial<Record<PlanDebateFocus, number>>;
|
|
17
|
-
focus_by_round: Partial<Record<number,
|
|
18
|
+
focus_by_round: Partial<Record<number, PlanDebateRoundFocus>>;
|
|
18
19
|
last_review_gate_ready: boolean;
|
|
19
20
|
last_round_index: number;
|
|
20
21
|
}
|
|
@@ -34,8 +35,9 @@ async function fileExists(path: string): Promise<boolean> {
|
|
|
34
35
|
|
|
35
36
|
function focusFromDraft(
|
|
36
37
|
draft: Record<string, unknown>,
|
|
37
|
-
):
|
|
38
|
+
): PlanDebateRoundFocus | null {
|
|
38
39
|
const focus = String(draft.debate_round_focus ?? "").trim();
|
|
40
|
+
if (focus === "all") return "all";
|
|
39
41
|
if ((PLAN_FOCUS_AREAS as readonly string[]).includes(focus)) {
|
|
40
42
|
return focus as PlanDebateFocus;
|
|
41
43
|
}
|
|
@@ -56,14 +58,14 @@ export async function getPlanFocusCoverage(
|
|
|
56
58
|
const artifactsDir = join(runDir, "artifacts");
|
|
57
59
|
const covered = new Set<PlanDebateFocus>();
|
|
58
60
|
const rounds_by_focus: Partial<Record<PlanDebateFocus, number>> = {};
|
|
59
|
-
const focus_by_round: Partial<Record<number,
|
|
61
|
+
const focus_by_round: Partial<Record<number, PlanDebateRoundFocus>> = {};
|
|
60
62
|
let last_review_gate_ready = false;
|
|
61
63
|
let last_round_index = 0;
|
|
62
64
|
|
|
63
65
|
let files: string[] = [];
|
|
64
66
|
try {
|
|
65
67
|
files = (await readdir(artifactsDir)).filter((f) =>
|
|
66
|
-
/^review-round
|
|
68
|
+
/^review-round(?:-r\d+|-consolidated)\.yaml$/i.test(f),
|
|
67
69
|
);
|
|
68
70
|
} catch {
|
|
69
71
|
return {
|
|
@@ -77,9 +79,12 @@ export async function getPlanFocusCoverage(
|
|
|
77
79
|
}
|
|
78
80
|
|
|
79
81
|
for (const name of files.sort()) {
|
|
80
|
-
const
|
|
82
|
+
const consolidated = /^review-round-consolidated\.yaml$/i.test(name);
|
|
83
|
+
const m = consolidated
|
|
84
|
+
? ["review-round-consolidated.yaml", "1"]
|
|
85
|
+
: /^review-round-r(\d+)\.yaml$/i.exec(name);
|
|
81
86
|
if (!m) continue;
|
|
82
|
-
const roundIndex = Number(m[1]);
|
|
87
|
+
const roundIndex = consolidated ? 1 : Number(m[1]);
|
|
83
88
|
if (roundIndex > last_round_index) last_round_index = roundIndex;
|
|
84
89
|
const raw = await readFile(join(artifactsDir, name), "utf-8");
|
|
85
90
|
let draft: Record<string, unknown>;
|
|
@@ -90,8 +95,15 @@ export async function getPlanFocusCoverage(
|
|
|
90
95
|
}
|
|
91
96
|
const focus = focusFromDraft(draft);
|
|
92
97
|
if (focus) {
|
|
93
|
-
|
|
94
|
-
|
|
98
|
+
if (focus === "all") {
|
|
99
|
+
for (const requiredFocus of required) {
|
|
100
|
+
covered.add(requiredFocus);
|
|
101
|
+
rounds_by_focus[requiredFocus] = roundIndex;
|
|
102
|
+
}
|
|
103
|
+
} else {
|
|
104
|
+
covered.add(focus);
|
|
105
|
+
rounds_by_focus[focus] = roundIndex;
|
|
106
|
+
}
|
|
95
107
|
focus_by_round[roundIndex] = focus;
|
|
96
108
|
}
|
|
97
109
|
if (roundIndex === last_round_index) {
|
|
@@ -138,7 +150,7 @@ export function planDebateOutcomeComplete(
|
|
|
138
150
|
export async function readDebateRoundFocus(
|
|
139
151
|
runDir: string,
|
|
140
152
|
roundIndex: number,
|
|
141
|
-
): Promise<
|
|
153
|
+
): Promise<PlanDebateRoundFocus | null> {
|
|
142
154
|
const path = join(runDir, "artifacts", `review-round-r${roundIndex}.yaml`);
|
|
143
155
|
if (!(await fileExists(path))) return null;
|
|
144
156
|
try {
|
|
@@ -7,18 +7,27 @@ import { access, readFile } from "node:fs/promises";
|
|
|
7
7
|
import { join } from "node:path";
|
|
8
8
|
import { isHarnessBudgetEnforceOn } from "../../lib/harness-budget-enforce.js";
|
|
9
9
|
import { capsForDebate } from "./debate-bus-core.js";
|
|
10
|
+
import type { DebateEligibilityResult } from "./plan-debate-eligibility.js";
|
|
10
11
|
import {
|
|
11
12
|
getPlanFocusCoverage,
|
|
12
13
|
type PlanDebateFocus,
|
|
13
14
|
planDebateOutcomeComplete,
|
|
14
15
|
} from "./plan-debate-focus.js";
|
|
15
16
|
import { planDebateIdForRun } from "./plan-debate-id.js";
|
|
16
|
-
import {
|
|
17
|
+
import {
|
|
18
|
+
laneArtifactPathsForConsolidatedRound,
|
|
19
|
+
laneArtifactPathsForRound,
|
|
20
|
+
} from "./plan-debate-lanes.js";
|
|
17
21
|
import {
|
|
18
22
|
getMessengerRoundState,
|
|
19
23
|
loadMessengerState,
|
|
20
24
|
messengerRoundDebateReady,
|
|
21
25
|
} from "./plan-messenger.js";
|
|
26
|
+
import {
|
|
27
|
+
CONSOLIDATED_REVIEW_ARTIFACT,
|
|
28
|
+
isConsolidatedReviewStrategy,
|
|
29
|
+
planReviewGateStrategyFromEligibility,
|
|
30
|
+
} from "./plan-review-gate.js";
|
|
22
31
|
|
|
23
32
|
async function fileExists(path: string): Promise<boolean> {
|
|
24
33
|
try {
|
|
@@ -64,6 +73,7 @@ export interface PlanDebateGateResult {
|
|
|
64
73
|
export async function validatePlanDebateGate(
|
|
65
74
|
projectRoot: string,
|
|
66
75
|
runId: string,
|
|
76
|
+
eligibility?: DebateEligibilityResult,
|
|
67
77
|
): Promise<PlanDebateGateResult> {
|
|
68
78
|
const errors: string[] = [];
|
|
69
79
|
const warnings: string[] = [];
|
|
@@ -77,6 +87,33 @@ export async function validatePlanDebateGate(
|
|
|
77
87
|
? messenger.required_focuses
|
|
78
88
|
: (["spec", "wbs", "schedule", "quality"] as const);
|
|
79
89
|
const caps = capsForDebate(debateId, debateProfile);
|
|
90
|
+
const reviewStrategy =
|
|
91
|
+
eligibility != null
|
|
92
|
+
? planReviewGateStrategyFromEligibility(eligibility)
|
|
93
|
+
: messenger?.review_gate_mode === "consolidated"
|
|
94
|
+
? {
|
|
95
|
+
mode: "consolidated" as const,
|
|
96
|
+
profile: debateProfile as DebateEligibilityResult["profile"],
|
|
97
|
+
required_focuses: [...requiredFocuses],
|
|
98
|
+
min_focus_rounds: caps.min_focus_rounds,
|
|
99
|
+
max_rounds: caps.max_rounds,
|
|
100
|
+
max_exchanges_per_round: caps.max_exchanges_per_round,
|
|
101
|
+
round_token_cap: caps.round_token_cap,
|
|
102
|
+
debate_global_cap: caps.debate_global_cap,
|
|
103
|
+
rationale: ["messenger review_gate_mode=consolidated"],
|
|
104
|
+
}
|
|
105
|
+
: {
|
|
106
|
+
mode: "threaded" as const,
|
|
107
|
+
profile: debateProfile as DebateEligibilityResult["profile"],
|
|
108
|
+
required_focuses: [...requiredFocuses],
|
|
109
|
+
min_focus_rounds: caps.min_focus_rounds,
|
|
110
|
+
max_rounds: caps.max_rounds,
|
|
111
|
+
max_exchanges_per_round: caps.max_exchanges_per_round,
|
|
112
|
+
round_token_cap: caps.round_token_cap,
|
|
113
|
+
debate_global_cap: caps.debate_global_cap,
|
|
114
|
+
rationale: [],
|
|
115
|
+
};
|
|
116
|
+
const consolidated = isConsolidatedReviewStrategy(reviewStrategy);
|
|
80
117
|
const coverage = await getPlanFocusCoverage(runDir, { requiredFocuses });
|
|
81
118
|
const dialogueOpts = {
|
|
82
119
|
max_exchanges_per_round: caps.max_exchanges_per_round,
|
|
@@ -89,31 +126,55 @@ export async function validatePlanDebateGate(
|
|
|
89
126
|
errors.push("last submitted review round has review_gate_ready !== true");
|
|
90
127
|
}
|
|
91
128
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
)
|
|
98
|
-
];
|
|
99
|
-
for (const r of roundIndices) {
|
|
100
|
-
const focus = coverage.focus_by_round[r] ?? null;
|
|
101
|
-
for (const rel of laneArtifactPathsForRound(r, focus)) {
|
|
129
|
+
if (consolidated) {
|
|
130
|
+
const absConsolidated = join(runDir, CONSOLIDATED_REVIEW_ARTIFACT);
|
|
131
|
+
if (!(await fileExists(absConsolidated))) {
|
|
132
|
+
errors.push(`missing ${CONSOLIDATED_REVIEW_ARTIFACT}`);
|
|
133
|
+
}
|
|
134
|
+
for (const rel of laneArtifactPathsForConsolidatedRound()) {
|
|
102
135
|
const abs = join(runDir, rel);
|
|
103
136
|
if (!(await fileExists(abs))) {
|
|
104
137
|
errors.push(`missing ${rel}`);
|
|
105
138
|
}
|
|
106
139
|
}
|
|
107
|
-
const roundState = await getMessengerRoundState(runDir,
|
|
108
|
-
const requireSprint = focus === "quality" || r >= 4;
|
|
140
|
+
const roundState = await getMessengerRoundState(runDir, 1);
|
|
109
141
|
const messengerCheck = messengerRoundDebateReady(
|
|
110
142
|
roundState,
|
|
111
|
-
|
|
143
|
+
true,
|
|
112
144
|
dialogueOpts,
|
|
113
145
|
);
|
|
114
146
|
if (!messengerCheck.ok) {
|
|
115
147
|
for (const e of messengerCheck.errors) {
|
|
116
|
-
errors.push(`round
|
|
148
|
+
errors.push(`consolidated round messenger: ${e}`);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
} else {
|
|
152
|
+
const roundIndices = [
|
|
153
|
+
...new Set(
|
|
154
|
+
Object.values(coverage.rounds_by_focus).filter(
|
|
155
|
+
(v): v is number => typeof v === "number",
|
|
156
|
+
),
|
|
157
|
+
),
|
|
158
|
+
];
|
|
159
|
+
for (const r of roundIndices) {
|
|
160
|
+
const focus = coverage.focus_by_round[r] ?? null;
|
|
161
|
+
for (const rel of laneArtifactPathsForRound(r, focus)) {
|
|
162
|
+
const abs = join(runDir, rel);
|
|
163
|
+
if (!(await fileExists(abs))) {
|
|
164
|
+
errors.push(`missing ${rel}`);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
const roundState = await getMessengerRoundState(runDir, r);
|
|
168
|
+
const requireSprint = focus === "quality" || r >= 4;
|
|
169
|
+
const messengerCheck = messengerRoundDebateReady(
|
|
170
|
+
roundState,
|
|
171
|
+
requireSprint,
|
|
172
|
+
dialogueOpts,
|
|
173
|
+
);
|
|
174
|
+
if (!messengerCheck.ok) {
|
|
175
|
+
for (const e of messengerCheck.errors) {
|
|
176
|
+
errors.push(`round ${r} messenger: ${e}`);
|
|
177
|
+
}
|
|
117
178
|
}
|
|
118
179
|
}
|
|
119
180
|
}
|
|
@@ -203,7 +264,9 @@ export async function validatePlanDebateGate(
|
|
|
203
264
|
}
|
|
204
265
|
|
|
205
266
|
export function isReviewRoundArtifactPath(relPath: string): boolean {
|
|
206
|
-
|
|
207
|
-
|
|
267
|
+
const norm = relPath.replace(/\\/g, "/");
|
|
268
|
+
return (
|
|
269
|
+
/^artifacts\/review-round-r\d+\.yaml$/i.test(norm) ||
|
|
270
|
+
norm === CONSOLIDATED_REVIEW_ARTIFACT
|
|
208
271
|
);
|
|
209
272
|
}
|
|
@@ -2,13 +2,13 @@
|
|
|
2
2
|
* Shared Review Gate lane list for a round (gate + round-status).
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
import type {
|
|
5
|
+
import type { PlanDebateRoundFocus } from "./plan-debate-focus.js";
|
|
6
6
|
import type { DebateLaneKind } from "./plan-debate-lane.js";
|
|
7
7
|
|
|
8
8
|
/** Lanes required before review-integrator for this round. */
|
|
9
9
|
export function lanesForRound(
|
|
10
10
|
roundIndex: number,
|
|
11
|
-
debateRoundFocus?:
|
|
11
|
+
debateRoundFocus?: PlanDebateRoundFocus | null,
|
|
12
12
|
): DebateLaneKind[] {
|
|
13
13
|
const lanes: DebateLaneKind[] = ["validation-turn", "adversary-brief"];
|
|
14
14
|
if (roundIndex === 1) {
|
|
@@ -23,7 +23,7 @@ export function lanesForRound(
|
|
|
23
23
|
/** Relative artifact paths for lane YAML + review-round. */
|
|
24
24
|
export function laneArtifactPathsForRound(
|
|
25
25
|
roundIndex: number,
|
|
26
|
-
debateRoundFocus?:
|
|
26
|
+
debateRoundFocus?: PlanDebateRoundFocus | null,
|
|
27
27
|
): string[] {
|
|
28
28
|
const paths = lanesForRound(roundIndex, debateRoundFocus).map((lane) => {
|
|
29
29
|
switch (lane) {
|
|
@@ -42,3 +42,27 @@ export function laneArtifactPathsForRound(
|
|
|
42
42
|
paths.push(`artifacts/review-round-r${roundIndex}.yaml`);
|
|
43
43
|
return paths;
|
|
44
44
|
}
|
|
45
|
+
|
|
46
|
+
/** Lanes for consolidated Review Gate (single round, parallel-friendly). */
|
|
47
|
+
export function lanesForConsolidatedRound(): DebateLaneKind[] {
|
|
48
|
+
return ["validation-turn", "adversary-brief", "sprint-audit"];
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export function laneArtifactPathsForConsolidatedRound(): string[] {
|
|
52
|
+
const roundIndex = 1;
|
|
53
|
+
return [
|
|
54
|
+
...lanesForConsolidatedRound().map((lane) => {
|
|
55
|
+
switch (lane) {
|
|
56
|
+
case "validation-turn":
|
|
57
|
+
return `artifacts/validation-turn-r${roundIndex}.yaml`;
|
|
58
|
+
case "adversary-brief":
|
|
59
|
+
return `artifacts/adversary-brief-r${roundIndex}.yaml`;
|
|
60
|
+
case "sprint-audit":
|
|
61
|
+
return `artifacts/sprint-audit-r${roundIndex}.yaml`;
|
|
62
|
+
default:
|
|
63
|
+
return `artifacts/${lane}-r${roundIndex}.yaml`;
|
|
64
|
+
}
|
|
65
|
+
}),
|
|
66
|
+
"artifacts/review-round-consolidated.yaml",
|
|
67
|
+
];
|
|
68
|
+
}
|
|
@@ -7,12 +7,15 @@ import { access } from "node:fs/promises";
|
|
|
7
7
|
import { join } from "node:path";
|
|
8
8
|
import { capsForDebate } from "./debate-bus-core.js";
|
|
9
9
|
import {
|
|
10
|
-
type
|
|
10
|
+
type PlanDebateRoundFocus,
|
|
11
11
|
readDebateRoundFocus,
|
|
12
12
|
} from "./plan-debate-focus.js";
|
|
13
13
|
import { planDebateIdForRun } from "./plan-debate-id.js";
|
|
14
14
|
import { laneArtifactPath } from "./plan-debate-lane.js";
|
|
15
|
-
import {
|
|
15
|
+
import {
|
|
16
|
+
lanesForConsolidatedRound,
|
|
17
|
+
lanesForRound,
|
|
18
|
+
} from "./plan-debate-lanes.js";
|
|
16
19
|
import {
|
|
17
20
|
getMessengerRoundState,
|
|
18
21
|
loadMessengerState,
|
|
@@ -40,26 +43,32 @@ export interface RoundStatusResult {
|
|
|
40
43
|
dialogue: { ok: boolean; errors: string[] };
|
|
41
44
|
unresolved_claim_ids: string[];
|
|
42
45
|
exchange_count: number;
|
|
43
|
-
debate_round_focus?:
|
|
46
|
+
debate_round_focus?: PlanDebateRoundFocus | null;
|
|
44
47
|
}
|
|
45
48
|
|
|
46
49
|
export async function getPlanDebateRoundStatus(
|
|
47
50
|
runDir: string,
|
|
48
51
|
roundIndex: number,
|
|
49
52
|
runId?: string,
|
|
50
|
-
opts?: { debate_round_focus?:
|
|
53
|
+
opts?: { debate_round_focus?: PlanDebateRoundFocus },
|
|
51
54
|
): Promise<RoundStatusResult> {
|
|
55
|
+
const messengerState = await loadMessengerState(runDir);
|
|
56
|
+
const consolidated =
|
|
57
|
+
messengerState?.review_gate_mode === "consolidated" && roundIndex === 1;
|
|
52
58
|
const focus =
|
|
53
59
|
opts?.debate_round_focus ??
|
|
60
|
+
(consolidated ? ("all" as PlanDebateRoundFocus) : null) ??
|
|
54
61
|
(await readDebateRoundFocus(runDir, roundIndex));
|
|
55
62
|
const missing: string[] = [];
|
|
56
|
-
|
|
63
|
+
const laneList = consolidated
|
|
64
|
+
? lanesForConsolidatedRound()
|
|
65
|
+
: lanesForRound(roundIndex, focus);
|
|
66
|
+
for (const lane of laneList) {
|
|
57
67
|
const rel = laneArtifactPath(lane, roundIndex);
|
|
58
68
|
if (!(await exists(join(runDir, rel)))) {
|
|
59
69
|
missing.push(rel);
|
|
60
70
|
}
|
|
61
71
|
}
|
|
62
|
-
const messengerState = await loadMessengerState(runDir);
|
|
63
72
|
const profile = messengerState?.debate_profile;
|
|
64
73
|
const caps = capsForDebate(
|
|
65
74
|
runId ? planDebateIdForRun(runId) : `plan-${runId ?? "unknown"}`,
|
|
@@ -73,7 +82,9 @@ export async function getPlanDebateRoundStatus(
|
|
|
73
82
|
if (!dialogue.ok) {
|
|
74
83
|
missing.push(...dialogue.errors.map((e) => `messenger: ${e}`));
|
|
75
84
|
}
|
|
76
|
-
const reviewRound =
|
|
85
|
+
const reviewRound = consolidated
|
|
86
|
+
? "artifacts/review-round-consolidated.yaml"
|
|
87
|
+
: `artifacts/review-round-r${roundIndex}.yaml`;
|
|
77
88
|
const reviewRoundOnDisk = await exists(join(runDir, reviewRound));
|
|
78
89
|
|
|
79
90
|
let next_tool: string | undefined;
|
|
@@ -63,6 +63,8 @@ export interface MessengerState {
|
|
|
63
63
|
rounds: Record<string, MessengerRoundState>;
|
|
64
64
|
debate_profile?: DebateProfile;
|
|
65
65
|
required_focuses?: PlanDebateFocus[];
|
|
66
|
+
/** consolidated = single Review Gate round; threaded = per-focus rounds */
|
|
67
|
+
review_gate_mode?: "consolidated" | "threaded";
|
|
66
68
|
}
|
|
67
69
|
|
|
68
70
|
function messengerRoot(runDir: string): string {
|
|
@@ -84,6 +86,7 @@ export async function initPlanMessenger(
|
|
|
84
86
|
debateId: string;
|
|
85
87
|
debate_profile?: DebateProfile;
|
|
86
88
|
required_focuses?: PlanDebateFocus[];
|
|
89
|
+
review_gate_mode?: "consolidated" | "threaded";
|
|
87
90
|
},
|
|
88
91
|
): Promise<string> {
|
|
89
92
|
const root = messengerRoot(runDir);
|
|
@@ -97,6 +100,7 @@ export async function initPlanMessenger(
|
|
|
97
100
|
rounds: {},
|
|
98
101
|
debate_profile: opts.debate_profile,
|
|
99
102
|
required_focuses: opts.required_focuses,
|
|
103
|
+
review_gate_mode: opts.review_gate_mode,
|
|
100
104
|
};
|
|
101
105
|
await writeFile(
|
|
102
106
|
join(root, "state.json"),
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Consolidated vs threaded Review Gate strategy for plan-phase debate.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type {
|
|
6
|
+
DebateEligibilityResult,
|
|
7
|
+
PlanReviewGateStrategy,
|
|
8
|
+
} from "./plan-debate-eligibility.js";
|
|
9
|
+
import type { PlanDebateFocus } from "./plan-debate-focus.js";
|
|
10
|
+
|
|
11
|
+
export type { PlanReviewGateStrategy };
|
|
12
|
+
|
|
13
|
+
export const CONSOLIDATED_REVIEW_ROUND = 1;
|
|
14
|
+
export const CONSOLIDATED_REVIEW_ARTIFACT =
|
|
15
|
+
"artifacts/review-round-consolidated.yaml";
|
|
16
|
+
|
|
17
|
+
export function planReviewGateStrategyFromEligibility(
|
|
18
|
+
eligibility: DebateEligibilityResult,
|
|
19
|
+
): PlanReviewGateStrategy {
|
|
20
|
+
return (
|
|
21
|
+
eligibility.review_gate_strategy ?? {
|
|
22
|
+
mode: eligibility.profile === "fast" ? "consolidated" : "threaded",
|
|
23
|
+
profile: eligibility.profile,
|
|
24
|
+
required_focuses: [...eligibility.required_focuses],
|
|
25
|
+
min_focus_rounds: eligibility.min_focus_rounds,
|
|
26
|
+
max_rounds: eligibility.max_rounds,
|
|
27
|
+
max_exchanges_per_round: eligibility.max_exchanges_per_round,
|
|
28
|
+
round_token_cap: eligibility.round_token_cap,
|
|
29
|
+
debate_global_cap: eligibility.debate_global_cap,
|
|
30
|
+
rationale: [...eligibility.rationale],
|
|
31
|
+
}
|
|
32
|
+
);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function isConsolidatedReviewStrategy(
|
|
36
|
+
strategy: PlanReviewGateStrategy,
|
|
37
|
+
): boolean {
|
|
38
|
+
return strategy.mode === "consolidated";
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/** Focus areas covered in a single consolidated review round (spec + quality gate). */
|
|
42
|
+
export const CONSOLIDATED_REVIEW_FOCUS_AREAS: readonly PlanDebateFocus[] = [
|
|
43
|
+
"spec",
|
|
44
|
+
"quality",
|
|
45
|
+
];
|
|
46
|
+
|
|
47
|
+
export function consolidatedReviewFocusesSatisfied(
|
|
48
|
+
covered: readonly string[],
|
|
49
|
+
): boolean {
|
|
50
|
+
return CONSOLIDATED_REVIEW_FOCUS_AREAS.every((f) => covered.includes(f));
|
|
51
|
+
}
|
|
@@ -235,6 +235,7 @@ export default function traceRecorder(pi: ExtensionAPI) {
|
|
|
235
235
|
if (shouldEmitStarted) {
|
|
236
236
|
captureHarnessEvent(sessionId, "harness_run_started", {
|
|
237
237
|
harness_run_id: runId,
|
|
238
|
+
run_id: runId,
|
|
238
239
|
harness_plan_id: activeRun.planId,
|
|
239
240
|
harness_phase: activeRun.phase,
|
|
240
241
|
pi_session_id: sessionId,
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
problem_framing: Validate harness plan-phase with fixture-driven smoke
|
|
3
|
+
sub_problems:
|
|
4
|
+
- DAG validation
|
|
5
|
+
- Debate gate coverage
|
|
6
|
+
internal_references:
|
|
7
|
+
- path: .pi/harness/evals/smoke/smoke-harness-plan.mjs
|
|
8
|
+
relevance: Existing smoke pattern
|
|
9
|
+
reuse_signal: high
|
|
10
|
+
external_references: []
|
|
11
|
+
solution_patterns:
|
|
12
|
+
- name: fixture-driven gate
|
|
13
|
+
provenance: in-repo smoke
|
|
14
|
+
fit: Validates plan pipeline without live agents
|
|
15
|
+
tradeoffs:
|
|
16
|
+
pros: [Deterministic CI]
|
|
17
|
+
cons: []
|
|
18
|
+
risks: []
|
|
19
|
+
similar_implementations: []
|
|
20
|
+
recommended_approach:
|
|
21
|
+
summary: Extend minimal-med fixture with implementation artifact
|
|
22
|
+
recommended_approach_confidence: high
|
|
23
|
+
confidence_rationale: Reuses established smoke-harness-plan pattern
|
|
24
|
+
evidence_refs:
|
|
25
|
+
- .pi/harness/evals/smoke/smoke-harness-plan.mjs
|
|
26
|
+
- .pi/scripts/validate-plan-dag.mjs
|
|
27
|
+
anti_patterns: []
|
|
28
|
+
open_questions: []
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
round_index: 1
|
|
3
|
+
debate_round_focus: all
|
|
4
|
+
round_summary: Consolidated review gate for fast profile fixture
|
|
5
|
+
validation_summary: Spec and quality checks pass in one round
|
|
6
|
+
adversary_summary: No blockers
|
|
7
|
+
disputes: []
|
|
8
|
+
recommended_packet_patches: []
|
|
9
|
+
review_gate_ready: true
|
|
10
|
+
participants:
|
|
11
|
+
- PlanEvaluatorAgent
|
|
12
|
+
- PlanAdversaryAgent
|
|
13
|
+
- SprintContractAuditorAgent
|
|
14
|
+
- ReviewIntegratorAgent
|
|
15
|
+
claims:
|
|
16
|
+
- consolidated review gate ready
|
|
17
|
+
rebuttals: []
|
|
18
|
+
evidence_refs: []
|
|
19
|
+
token_usage:
|
|
20
|
+
per_agent:
|
|
21
|
+
PlanEvaluatorAgent: 120
|
|
22
|
+
PlanAdversaryAgent: 100
|
|
23
|
+
SprintContractAuditorAgent: 80
|
|
24
|
+
round_total: 300
|
|
25
|
+
consensus_delta: 0.1
|