ultimate-pi 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-debate-plan/SKILL.md +41 -61
- package/.agents/skills/harness-governor/SKILL.md +11 -0
- package/.agents/skills/harness-orchestration/SKILL.md +5 -3
- package/.agents/skills/harness-plan/SKILL.md +11 -9
- package/.pi/agents/harness/adversary.md +1 -1
- package/.pi/agents/harness/evaluator.md +1 -1
- package/.pi/agents/harness/executor.md +1 -1
- package/.pi/agents/harness/incident-recorder.md +1 -1
- package/.pi/agents/harness/meta-optimizer.md +1 -1
- package/.pi/agents/harness/planning/decompose.md +8 -35
- package/.pi/agents/harness/planning/execution-plan-author.md +27 -15
- package/.pi/agents/harness/planning/hypothesis-validator.md +23 -6
- package/.pi/agents/harness/planning/hypothesis.md +4 -27
- package/.pi/agents/harness/planning/implementation-researcher.md +43 -0
- package/.pi/agents/harness/planning/plan-adversary.md +20 -5
- package/.pi/agents/harness/planning/plan-evaluator.md +28 -6
- package/.pi/agents/harness/planning/review-integrator.md +23 -10
- package/.pi/agents/harness/planning/scout-graphify.md +4 -23
- package/.pi/agents/harness/planning/scout-semantic.md +3 -18
- package/.pi/agents/harness/planning/scout-structure.md +3 -18
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +22 -6
- package/.pi/agents/harness/planning/stack-researcher.md +21 -11
- package/.pi/agents/harness/tie-breaker.md +1 -1
- package/.pi/agents/harness/trace-librarian.md +1 -1
- package/.pi/extensions/budget-guard.ts +33 -19
- package/.pi/extensions/harness-debate-tools.ts +280 -19
- package/.pi/extensions/harness-live-widget.ts +39 -159
- package/.pi/extensions/harness-plan-approval.ts +47 -5
- package/.pi/extensions/harness-run-context.ts +96 -2
- package/.pi/extensions/harness-subagent-submit.ts +195 -0
- package/.pi/extensions/lib/debate-bus-core.ts +108 -17
- package/.pi/extensions/lib/debate-bus-state.ts +6 -0
- package/.pi/extensions/lib/harness-subagent-policy.ts +45 -0
- package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +82 -0
- package/.pi/extensions/lib/harness-subagent-submit-registry.ts +172 -0
- package/.pi/extensions/lib/harness-subagents-bridge.ts +42 -0
- package/.pi/extensions/lib/plan-approval/plan-review.ts +56 -0
- package/.pi/extensions/lib/plan-approval/types.ts +1 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +214 -0
- package/.pi/extensions/lib/plan-debate-focus.ts +151 -0
- package/.pi/extensions/lib/plan-debate-gate.ts +88 -34
- package/.pi/extensions/lib/plan-debate-lane.ts +15 -0
- package/.pi/extensions/lib/plan-debate-lanes.ts +44 -0
- package/.pi/extensions/lib/plan-debate-round-status.ts +63 -20
- package/.pi/extensions/lib/plan-messenger.ts +93 -17
- package/.pi/extensions/policy-gate.ts +1 -1
- package/.pi/harness/README.md +1 -1
- package/.pi/harness/agents.manifest.json +25 -21
- package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +1 -3
- package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +13 -5
- package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +51 -0
- package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md +31 -0
- package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md +23 -0
- package/.pi/harness/docs/adrs/README.md +4 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r1.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r2.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/research-brief.yaml +62 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r2.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r3.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +29 -0
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +97 -16
- package/.pi/harness/specs/harness-executor-handoff.schema.json +19 -0
- package/.pi/harness/specs/harness-human-required.schema.json +16 -0
- package/.pi/harness/specs/plan-implementation-research-brief.schema.json +128 -0
- package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
- package/.pi/harness/specs/plan-scout-findings.schema.json +19 -0
- package/.pi/harness/specs/round-result.schema.json +15 -2
- package/.pi/lib/harness-agent-output.ts +45 -0
- package/.pi/lib/harness-budget-enforce.ts +18 -0
- package/.pi/lib/harness-schema-validate.ts +89 -0
- package/.pi/lib/harness-spawn-parse.ts +86 -0
- package/.pi/lib/harness-subagent-submit-path.ts +41 -0
- package/.pi/lib/harness-ui-state.ts +107 -2
- package/.pi/prompts/harness-auto.md +2 -2
- package/.pi/prompts/harness-plan.md +94 -42
- package/.pi/prompts/harness-run.md +2 -2
- package/.pi/prompts/planning-rubrics.md +31 -0
- package/.pi/scripts/harness-verify.mjs +2 -0
- package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
- package/CHANGELOG.md +21 -0
- package/package.json +4 -2
- package/vendor/pi-subagents/src/subagents.ts +29 -3
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pre-debate profile selection (full | standard | light).
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { PLAN_FOCUS_AREAS, type PlanDebateFocus } from "./plan-debate-focus.js";
|
|
6
|
+
|
|
7
|
+
export type DebateProfile = "full" | "standard" | "light";
|
|
8
|
+
|
|
9
|
+
export interface DebateEligibilityInput {
|
|
10
|
+
risk_level?: string;
|
|
11
|
+
material_fork?: boolean;
|
|
12
|
+
dag_pass?: boolean;
|
|
13
|
+
dag_manually_patched?: boolean;
|
|
14
|
+
implementation_brief?: Record<string, unknown> | null;
|
|
15
|
+
stack_brief?: Record<string, unknown> | null;
|
|
16
|
+
decomposition?: Record<string, unknown> | null;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface DebateEligibilityResult {
|
|
20
|
+
profile: DebateProfile;
|
|
21
|
+
required_focuses: PlanDebateFocus[];
|
|
22
|
+
min_focus_rounds: number;
|
|
23
|
+
max_rounds: number;
|
|
24
|
+
max_exchanges_per_round: number;
|
|
25
|
+
round_token_cap: number;
|
|
26
|
+
debate_global_cap: number;
|
|
27
|
+
human_required: boolean;
|
|
28
|
+
rationale: string[];
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const LIGHT_FOCUS: PlanDebateFocus[] = ["spec", "quality"];
|
|
32
|
+
|
|
33
|
+
function asRecord(value: unknown): Record<string, unknown> | null {
|
|
34
|
+
return value && typeof value === "object" && !Array.isArray(value)
|
|
35
|
+
? (value as Record<string, unknown>)
|
|
36
|
+
: null;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function strList(value: unknown): string[] {
|
|
40
|
+
if (!Array.isArray(value)) return [];
|
|
41
|
+
return value
|
|
42
|
+
.map((item) => (typeof item === "string" ? item.trim() : ""))
|
|
43
|
+
.filter(Boolean);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function implementationOpenQuestions(
|
|
47
|
+
brief: Record<string, unknown> | null,
|
|
48
|
+
): string[] {
|
|
49
|
+
if (!brief) return [];
|
|
50
|
+
return strList(brief.open_questions);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function recommendedApproach(
|
|
54
|
+
brief: Record<string, unknown> | null,
|
|
55
|
+
): Record<string, unknown> | null {
|
|
56
|
+
return asRecord(brief?.recommended_approach);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function stackHasClearPrimary(stack: Record<string, unknown> | null): boolean {
|
|
60
|
+
if (!stack) return false;
|
|
61
|
+
const primary = stack.recommended_primary;
|
|
62
|
+
return typeof primary === "string" && primary.trim().length > 0;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function confidenceAllowsLight(brief: Record<string, unknown> | null): boolean {
|
|
66
|
+
const rec = recommendedApproach(brief);
|
|
67
|
+
if (!rec) return false;
|
|
68
|
+
const conf = String(rec.recommended_approach_confidence ?? "").toLowerCase();
|
|
69
|
+
if (conf !== "high") return false;
|
|
70
|
+
const rationale =
|
|
71
|
+
typeof rec.confidence_rationale === "string"
|
|
72
|
+
? rec.confidence_rationale.trim()
|
|
73
|
+
: "";
|
|
74
|
+
const refs = strList(rec.evidence_refs);
|
|
75
|
+
if (!rationale || refs.length < 2) return false;
|
|
76
|
+
if (implementationOpenQuestions(brief).length > 0) return false;
|
|
77
|
+
const patterns = Array.isArray(brief?.solution_patterns)
|
|
78
|
+
? (brief!.solution_patterns as unknown[])
|
|
79
|
+
: [];
|
|
80
|
+
for (const p of patterns) {
|
|
81
|
+
const pat = asRecord(p);
|
|
82
|
+
const risks = pat ? strList(pat.risks) : [];
|
|
83
|
+
if (risks.some((r) => /unmitigated|critical|blocker/i.test(r))) {
|
|
84
|
+
return false;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
const similar = Array.isArray(brief?.similar_implementations)
|
|
88
|
+
? (brief!.similar_implementations as unknown[])
|
|
89
|
+
: [];
|
|
90
|
+
if (similar.length === 0) return false;
|
|
91
|
+
return true;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function decompositionTensionCount(
|
|
95
|
+
decomposition: Record<string, unknown> | null,
|
|
96
|
+
): number {
|
|
97
|
+
if (!decomposition) return 0;
|
|
98
|
+
return Array.isArray(decomposition.tensions)
|
|
99
|
+
? decomposition.tensions.length
|
|
100
|
+
: 0;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export const PLAN_BUDGET_STANDARD = {
|
|
104
|
+
min_focus_rounds: 4,
|
|
105
|
+
max_rounds: 12,
|
|
106
|
+
max_exchanges_per_round: 3,
|
|
107
|
+
round_token_cap: 8000,
|
|
108
|
+
debate_global_cap: 80000,
|
|
109
|
+
} as const;
|
|
110
|
+
|
|
111
|
+
export const PLAN_BUDGET_LIGHT = {
|
|
112
|
+
min_focus_rounds: 2,
|
|
113
|
+
max_rounds: 8,
|
|
114
|
+
max_exchanges_per_round: 3,
|
|
115
|
+
round_token_cap: 6000,
|
|
116
|
+
debate_global_cap: 40000,
|
|
117
|
+
} as const;
|
|
118
|
+
|
|
119
|
+
function capsForProfile(
|
|
120
|
+
profile: DebateProfile,
|
|
121
|
+
): Omit<
|
|
122
|
+
DebateEligibilityResult,
|
|
123
|
+
"profile" | "required_focuses" | "human_required" | "rationale"
|
|
124
|
+
> {
|
|
125
|
+
if (profile === "light") {
|
|
126
|
+
return {
|
|
127
|
+
...PLAN_BUDGET_LIGHT,
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
return {
|
|
131
|
+
...PLAN_BUDGET_STANDARD,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Select debate profile from pre-debate signals only (no R1 hypothesis output).
|
|
137
|
+
*/
|
|
138
|
+
export function harnessPlanDebateEligibility(
|
|
139
|
+
input: DebateEligibilityInput,
|
|
140
|
+
): DebateEligibilityResult {
|
|
141
|
+
const rationale: string[] = [];
|
|
142
|
+
const risk = String(input.risk_level ?? "med").toLowerCase();
|
|
143
|
+
const impl = input.implementation_brief ?? null;
|
|
144
|
+
const stack = input.stack_brief ?? null;
|
|
145
|
+
const openQs = implementationOpenQuestions(impl);
|
|
146
|
+
const materialFork = input.material_fork === true;
|
|
147
|
+
const dagPatched = input.dag_manually_patched === true;
|
|
148
|
+
const dagFail = input.dag_pass === false;
|
|
149
|
+
|
|
150
|
+
let human_required = false;
|
|
151
|
+
|
|
152
|
+
if (dagFail) {
|
|
153
|
+
rationale.push("DAG validation failed — use standard profile until fixed");
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
if (openQs.length > 0) {
|
|
157
|
+
rationale.push(
|
|
158
|
+
`implementation open_questions (${openQs.length}) — not eligible for light`,
|
|
159
|
+
);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const conflictingPatterns =
|
|
163
|
+
Array.isArray(impl?.solution_patterns) &&
|
|
164
|
+
(impl!.solution_patterns as unknown[]).length >= 2 &&
|
|
165
|
+
openQs.length > 0;
|
|
166
|
+
if (conflictingPatterns) {
|
|
167
|
+
human_required = true;
|
|
168
|
+
rationale.push("conflicting external patterns with open questions");
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
let profile: DebateProfile = "standard";
|
|
172
|
+
rationale.push("default profile: standard (fail-safe)");
|
|
173
|
+
|
|
174
|
+
if (
|
|
175
|
+
risk === "high" ||
|
|
176
|
+
materialFork ||
|
|
177
|
+
openQs.length > 0 ||
|
|
178
|
+
dagPatched ||
|
|
179
|
+
decompositionTensionCount(input.decomposition ?? null) >= 3
|
|
180
|
+
) {
|
|
181
|
+
profile = "full";
|
|
182
|
+
rationale.push(
|
|
183
|
+
"full: high risk, material fork, open questions, DAG patch, or tensions",
|
|
184
|
+
);
|
|
185
|
+
} else if (
|
|
186
|
+
risk === "low" &&
|
|
187
|
+
!materialFork &&
|
|
188
|
+
!dagPatched &&
|
|
189
|
+
input.dag_pass !== false &&
|
|
190
|
+
confidenceAllowsLight(impl) &&
|
|
191
|
+
stackHasClearPrimary(stack)
|
|
192
|
+
) {
|
|
193
|
+
profile = "light";
|
|
194
|
+
rationale.push(
|
|
195
|
+
"light: low risk, clear stack, high-confidence implementation approach",
|
|
196
|
+
);
|
|
197
|
+
} else if (risk === "med") {
|
|
198
|
+
profile = "standard";
|
|
199
|
+
rationale.push("standard: med risk default");
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
const required_focuses: PlanDebateFocus[] =
|
|
203
|
+
profile === "light" ? [...LIGHT_FOCUS] : [...PLAN_FOCUS_AREAS];
|
|
204
|
+
|
|
205
|
+
const caps = capsForProfile(profile);
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
profile,
|
|
209
|
+
required_focuses,
|
|
210
|
+
...caps,
|
|
211
|
+
human_required,
|
|
212
|
+
rationale,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Plan-phase Review Gate focus coverage (spec | wbs | schedule | quality).
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { constants } from "node:fs";
|
|
6
|
+
import { access, readdir, readFile } from "node:fs/promises";
|
|
7
|
+
import { join } from "node:path";
|
|
8
|
+
import { parse as parseYaml } from "yaml";
|
|
9
|
+
|
|
10
|
+
export const PLAN_FOCUS_AREAS = ["spec", "wbs", "schedule", "quality"] as const;
|
|
11
|
+
export type PlanDebateFocus = (typeof PLAN_FOCUS_AREAS)[number];
|
|
12
|
+
|
|
13
|
+
export interface PlanFocusCoverage {
|
|
14
|
+
covered: PlanDebateFocus[];
|
|
15
|
+
missing: PlanDebateFocus[];
|
|
16
|
+
rounds_by_focus: Partial<Record<PlanDebateFocus, number>>;
|
|
17
|
+
focus_by_round: Partial<Record<number, PlanDebateFocus>>;
|
|
18
|
+
last_review_gate_ready: boolean;
|
|
19
|
+
last_round_index: number;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface PlanFocusCoverageOptions {
|
|
23
|
+
requiredFocuses?: readonly PlanDebateFocus[];
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
async function fileExists(path: string): Promise<boolean> {
|
|
27
|
+
try {
|
|
28
|
+
await access(path, constants.R_OK);
|
|
29
|
+
return true;
|
|
30
|
+
} catch {
|
|
31
|
+
return false;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function focusFromDraft(
|
|
36
|
+
draft: Record<string, unknown>,
|
|
37
|
+
): PlanDebateFocus | null {
|
|
38
|
+
const focus = String(draft.debate_round_focus ?? "").trim();
|
|
39
|
+
if ((PLAN_FOCUS_AREAS as readonly string[]).includes(focus)) {
|
|
40
|
+
return focus as PlanDebateFocus;
|
|
41
|
+
}
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Scan submitted review-round artifacts for focus coverage and last gate flag.
|
|
47
|
+
*/
|
|
48
|
+
export async function getPlanFocusCoverage(
|
|
49
|
+
runDir: string,
|
|
50
|
+
opts?: PlanFocusCoverageOptions,
|
|
51
|
+
): Promise<PlanFocusCoverage> {
|
|
52
|
+
const required =
|
|
53
|
+
opts?.requiredFocuses && opts.requiredFocuses.length > 0
|
|
54
|
+
? opts.requiredFocuses
|
|
55
|
+
: PLAN_FOCUS_AREAS;
|
|
56
|
+
const artifactsDir = join(runDir, "artifacts");
|
|
57
|
+
const covered = new Set<PlanDebateFocus>();
|
|
58
|
+
const rounds_by_focus: Partial<Record<PlanDebateFocus, number>> = {};
|
|
59
|
+
const focus_by_round: Partial<Record<number, PlanDebateFocus>> = {};
|
|
60
|
+
let last_review_gate_ready = false;
|
|
61
|
+
let last_round_index = 0;
|
|
62
|
+
|
|
63
|
+
let files: string[] = [];
|
|
64
|
+
try {
|
|
65
|
+
files = (await readdir(artifactsDir)).filter((f) =>
|
|
66
|
+
/^review-round-r\d+\.yaml$/i.test(f),
|
|
67
|
+
);
|
|
68
|
+
} catch {
|
|
69
|
+
return {
|
|
70
|
+
covered: [],
|
|
71
|
+
missing: [...required],
|
|
72
|
+
rounds_by_focus: {},
|
|
73
|
+
focus_by_round: {},
|
|
74
|
+
last_review_gate_ready: false,
|
|
75
|
+
last_round_index: 0,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
for (const name of files.sort()) {
|
|
80
|
+
const m = /^review-round-r(\d+)\.yaml$/i.exec(name);
|
|
81
|
+
if (!m) continue;
|
|
82
|
+
const roundIndex = Number(m[1]);
|
|
83
|
+
if (roundIndex > last_round_index) last_round_index = roundIndex;
|
|
84
|
+
const raw = await readFile(join(artifactsDir, name), "utf-8");
|
|
85
|
+
let draft: Record<string, unknown>;
|
|
86
|
+
try {
|
|
87
|
+
draft = parseYaml(raw) as Record<string, unknown>;
|
|
88
|
+
} catch {
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
const focus = focusFromDraft(draft);
|
|
92
|
+
if (focus) {
|
|
93
|
+
covered.add(focus);
|
|
94
|
+
rounds_by_focus[focus] = roundIndex;
|
|
95
|
+
focus_by_round[roundIndex] = focus;
|
|
96
|
+
}
|
|
97
|
+
if (roundIndex === last_round_index) {
|
|
98
|
+
last_review_gate_ready = draft.review_gate_ready === true;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const coveredList = required.filter((f) => covered.has(f));
|
|
103
|
+
const missing = required.filter((f) => !covered.has(f));
|
|
104
|
+
|
|
105
|
+
return {
|
|
106
|
+
covered: coveredList,
|
|
107
|
+
missing,
|
|
108
|
+
rounds_by_focus,
|
|
109
|
+
focus_by_round,
|
|
110
|
+
last_review_gate_ready,
|
|
111
|
+
last_round_index,
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export interface PlanDebateOutcomeOptions {
|
|
116
|
+
requiredFocuses?: readonly PlanDebateFocus[];
|
|
117
|
+
minRoundIndex?: number;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export function planDebateOutcomeComplete(
|
|
121
|
+
coverage: PlanFocusCoverage,
|
|
122
|
+
opts?: PlanDebateOutcomeOptions,
|
|
123
|
+
): boolean {
|
|
124
|
+
const required =
|
|
125
|
+
opts?.requiredFocuses && opts.requiredFocuses.length > 0
|
|
126
|
+
? opts.requiredFocuses
|
|
127
|
+
: PLAN_FOCUS_AREAS;
|
|
128
|
+
const minRounds = opts?.minRoundIndex ?? required.length;
|
|
129
|
+
const missing = required.filter((f) => !coverage.covered.includes(f));
|
|
130
|
+
return (
|
|
131
|
+
missing.length === 0 &&
|
|
132
|
+
coverage.last_review_gate_ready === true &&
|
|
133
|
+
coverage.last_round_index >= minRounds
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/** Read debate_round_focus from an existing review-round artifact. */
|
|
138
|
+
export async function readDebateRoundFocus(
|
|
139
|
+
runDir: string,
|
|
140
|
+
roundIndex: number,
|
|
141
|
+
): Promise<PlanDebateFocus | null> {
|
|
142
|
+
const path = join(runDir, "artifacts", `review-round-r${roundIndex}.yaml`);
|
|
143
|
+
if (!(await fileExists(path))) return null;
|
|
144
|
+
try {
|
|
145
|
+
const raw = await readFile(path, "utf-8");
|
|
146
|
+
const draft = parseYaml(raw) as Record<string, unknown>;
|
|
147
|
+
return focusFromDraft(draft);
|
|
148
|
+
} catch {
|
|
149
|
+
return null;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
@@ -5,32 +5,21 @@
|
|
|
5
5
|
import { constants } from "node:fs";
|
|
6
6
|
import { access, readFile } from "node:fs/promises";
|
|
7
7
|
import { join } from "node:path";
|
|
8
|
+
import { isHarnessBudgetEnforceOn } from "../../lib/harness-budget-enforce.js";
|
|
9
|
+
import { capsForDebate } from "./debate-bus-core.js";
|
|
10
|
+
import {
|
|
11
|
+
getPlanFocusCoverage,
|
|
12
|
+
type PlanDebateFocus,
|
|
13
|
+
planDebateOutcomeComplete,
|
|
14
|
+
} from "./plan-debate-focus.js";
|
|
8
15
|
import { planDebateIdForRun } from "./plan-debate-id.js";
|
|
16
|
+
import { laneArtifactPathsForRound } from "./plan-debate-lanes.js";
|
|
9
17
|
import {
|
|
10
18
|
getMessengerRoundState,
|
|
11
19
|
loadMessengerState,
|
|
12
20
|
messengerRoundDebateReady,
|
|
13
21
|
} from "./plan-messenger.js";
|
|
14
22
|
|
|
15
|
-
const PLAN_ROUNDS = 4;
|
|
16
|
-
const FOCUS_BY_ROUND = ["spec", "wbs", "schedule", "quality"] as const;
|
|
17
|
-
|
|
18
|
-
function laneFilesForRound(roundIndex: number): string[] {
|
|
19
|
-
const n = roundIndex;
|
|
20
|
-
const lanes = [
|
|
21
|
-
`artifacts/validation-turn-r${n}.yaml`,
|
|
22
|
-
`artifacts/adversary-brief-r${n}.yaml`,
|
|
23
|
-
];
|
|
24
|
-
if (n === 1) {
|
|
25
|
-
lanes.unshift(`artifacts/hypothesis-validation-r${n}.yaml`);
|
|
26
|
-
}
|
|
27
|
-
if (n === 4) {
|
|
28
|
-
lanes.push(`artifacts/sprint-audit-r${n}.yaml`);
|
|
29
|
-
}
|
|
30
|
-
lanes.push(`artifacts/review-round-r${n}.yaml`);
|
|
31
|
-
return lanes;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
23
|
async function fileExists(path: string): Promise<boolean> {
|
|
35
24
|
try {
|
|
36
25
|
await access(path, constants.R_OK);
|
|
@@ -64,6 +53,12 @@ export interface PlanDebateGateResult {
|
|
|
64
53
|
errors: string[];
|
|
65
54
|
warnings: string[];
|
|
66
55
|
debateId: string;
|
|
56
|
+
focus_coverage?: {
|
|
57
|
+
covered: string[];
|
|
58
|
+
missing: string[];
|
|
59
|
+
last_review_gate_ready: boolean;
|
|
60
|
+
};
|
|
61
|
+
debate_profile?: string;
|
|
67
62
|
}
|
|
68
63
|
|
|
69
64
|
export async function validatePlanDebateGate(
|
|
@@ -75,16 +70,47 @@ export async function validatePlanDebateGate(
|
|
|
75
70
|
const debateId = planDebateIdForRun(runId);
|
|
76
71
|
const runDir = join(projectRoot, ".pi", "harness", "runs", runId);
|
|
77
72
|
const debatesDir = join(projectRoot, ".pi", "harness", "debates");
|
|
73
|
+
const messenger = await loadMessengerState(runDir);
|
|
74
|
+
const debateProfile = messenger?.debate_profile ?? "standard";
|
|
75
|
+
const requiredFocuses: readonly PlanDebateFocus[] =
|
|
76
|
+
messenger?.required_focuses && messenger.required_focuses.length > 0
|
|
77
|
+
? messenger.required_focuses
|
|
78
|
+
: (["spec", "wbs", "schedule", "quality"] as const);
|
|
79
|
+
const caps = capsForDebate(debateId, debateProfile);
|
|
80
|
+
const coverage = await getPlanFocusCoverage(runDir, { requiredFocuses });
|
|
81
|
+
const dialogueOpts = {
|
|
82
|
+
max_exchanges_per_round: caps.max_exchanges_per_round,
|
|
83
|
+
};
|
|
78
84
|
|
|
79
|
-
for (
|
|
80
|
-
|
|
85
|
+
for (const focus of coverage.missing) {
|
|
86
|
+
errors.push(`focus not covered in submitted rounds: ${focus}`);
|
|
87
|
+
}
|
|
88
|
+
if (!coverage.last_review_gate_ready) {
|
|
89
|
+
errors.push("last submitted review round has review_gate_ready !== true");
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const roundIndices = [
|
|
93
|
+
...new Set(
|
|
94
|
+
Object.values(coverage.rounds_by_focus).filter(
|
|
95
|
+
(v): v is number => typeof v === "number",
|
|
96
|
+
),
|
|
97
|
+
),
|
|
98
|
+
];
|
|
99
|
+
for (const r of roundIndices) {
|
|
100
|
+
const focus = coverage.focus_by_round[r] ?? null;
|
|
101
|
+
for (const rel of laneArtifactPathsForRound(r, focus)) {
|
|
81
102
|
const abs = join(runDir, rel);
|
|
82
103
|
if (!(await fileExists(abs))) {
|
|
83
104
|
errors.push(`missing ${rel}`);
|
|
84
105
|
}
|
|
85
106
|
}
|
|
86
107
|
const roundState = await getMessengerRoundState(runDir, r);
|
|
87
|
-
const
|
|
108
|
+
const requireSprint = focus === "quality" || r >= 4;
|
|
109
|
+
const messengerCheck = messengerRoundDebateReady(
|
|
110
|
+
roundState,
|
|
111
|
+
requireSprint,
|
|
112
|
+
dialogueOpts,
|
|
113
|
+
);
|
|
88
114
|
if (!messengerCheck.ok) {
|
|
89
115
|
for (const e of messengerCheck.errors) {
|
|
90
116
|
errors.push(`round ${r} messenger: ${e}`);
|
|
@@ -92,7 +118,22 @@ export async function validatePlanDebateGate(
|
|
|
92
118
|
}
|
|
93
119
|
}
|
|
94
120
|
|
|
95
|
-
|
|
121
|
+
if (
|
|
122
|
+
isHarnessBudgetEnforceOn() &&
|
|
123
|
+
coverage.last_round_index > caps.max_rounds
|
|
124
|
+
) {
|
|
125
|
+
errors.push(
|
|
126
|
+
`round_count ${coverage.last_round_index} exceeds max_rounds ${caps.max_rounds}`,
|
|
127
|
+
);
|
|
128
|
+
} else if (
|
|
129
|
+
!isHarnessBudgetEnforceOn() &&
|
|
130
|
+
coverage.last_round_index > caps.max_rounds
|
|
131
|
+
) {
|
|
132
|
+
warnings.push(
|
|
133
|
+
`round_count ${coverage.last_round_index} exceeds advisory max_rounds ${caps.max_rounds} (budget enforce off)`,
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
|
|
96
137
|
if (!messenger) {
|
|
97
138
|
errors.push(
|
|
98
139
|
"debate-messenger/state.json missing — call harness_debate_open",
|
|
@@ -103,9 +144,10 @@ export async function validatePlanDebateGate(
|
|
|
103
144
|
|
|
104
145
|
const jsonlPath = join(debatesDir, `${debateId}.jsonl`);
|
|
105
146
|
const { rounds, hasConsensus } = await countJsonlKinds(jsonlPath);
|
|
106
|
-
|
|
147
|
+
const minRounds = caps.min_focus_rounds;
|
|
148
|
+
if (rounds < minRounds) {
|
|
107
149
|
errors.push(
|
|
108
|
-
`${debateId}.jsonl has ${rounds}/${
|
|
150
|
+
`${debateId}.jsonl has ${rounds}/${minRounds} minimum round events — use harness_debate_submit_round per focus`,
|
|
109
151
|
);
|
|
110
152
|
}
|
|
111
153
|
if (!hasConsensus) {
|
|
@@ -114,6 +156,17 @@ export async function validatePlanDebateGate(
|
|
|
114
156
|
);
|
|
115
157
|
}
|
|
116
158
|
|
|
159
|
+
if (
|
|
160
|
+
!planDebateOutcomeComplete(coverage, {
|
|
161
|
+
requiredFocuses,
|
|
162
|
+
minRoundIndex: caps.min_focus_rounds,
|
|
163
|
+
})
|
|
164
|
+
) {
|
|
165
|
+
errors.push(
|
|
166
|
+
`debate outcome incomplete: required focuses [${requiredFocuses.join(", ")}] with last review_gate_ready true (profile=${debateProfile})`,
|
|
167
|
+
);
|
|
168
|
+
}
|
|
169
|
+
|
|
117
170
|
const consensusPath = join(debatesDir, `${debateId}.consensus.json`);
|
|
118
171
|
if (!(await fileExists(consensusPath))) {
|
|
119
172
|
errors.push(`missing ${debateId}.consensus.json`);
|
|
@@ -129,15 +182,10 @@ export async function validatePlanDebateGate(
|
|
|
129
182
|
}
|
|
130
183
|
}
|
|
131
184
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
const raw = await readFile(reviewPath, "utf-8");
|
|
137
|
-
if (!raw.includes(focus)) {
|
|
138
|
-
warnings.push(`review-round-r${r + 1} may not match focus ${focus}`);
|
|
139
|
-
}
|
|
140
|
-
}
|
|
185
|
+
if (rounds > caps.max_rounds) {
|
|
186
|
+
warnings.push(
|
|
187
|
+
`bus round count ${rounds} exceeds soft max_rounds ${caps.max_rounds}`,
|
|
188
|
+
);
|
|
141
189
|
}
|
|
142
190
|
|
|
143
191
|
return {
|
|
@@ -145,6 +193,12 @@ export async function validatePlanDebateGate(
|
|
|
145
193
|
errors,
|
|
146
194
|
warnings,
|
|
147
195
|
debateId,
|
|
196
|
+
focus_coverage: {
|
|
197
|
+
covered: coverage.covered,
|
|
198
|
+
missing: coverage.missing,
|
|
199
|
+
last_review_gate_ready: coverage.last_review_gate_ready,
|
|
200
|
+
},
|
|
201
|
+
debate_profile: debateProfile,
|
|
148
202
|
};
|
|
149
203
|
}
|
|
150
204
|
|
|
@@ -45,6 +45,21 @@ export function laneArtifactPath(
|
|
|
45
45
|
}
|
|
46
46
|
}
|
|
47
47
|
|
|
48
|
+
/** Apply messenger side effects when artifact YAML was already written via submit tool. */
|
|
49
|
+
export async function applyDebateLaneFromDoc(opts: {
|
|
50
|
+
runDir: string;
|
|
51
|
+
lane: DebateLaneKind;
|
|
52
|
+
doc: Record<string, unknown>;
|
|
53
|
+
roundIndex?: number;
|
|
54
|
+
}): Promise<ApplyDebateLaneResult> {
|
|
55
|
+
return applyDebateLane({
|
|
56
|
+
runDir: opts.runDir,
|
|
57
|
+
lane: opts.lane,
|
|
58
|
+
content: JSON.stringify(opts.doc),
|
|
59
|
+
roundIndex: opts.roundIndex,
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
|
|
48
63
|
export function extractClaimIds(doc: Record<string, unknown>): string[] {
|
|
49
64
|
const explicit = doc.messenger_claim_ids;
|
|
50
65
|
if (Array.isArray(explicit)) {
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared Review Gate lane list for a round (gate + round-status).
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { PlanDebateFocus } from "./plan-debate-focus.js";
|
|
6
|
+
import type { DebateLaneKind } from "./plan-debate-lane.js";
|
|
7
|
+
|
|
8
|
+
/** Lanes required before review-integrator for this round. */
|
|
9
|
+
export function lanesForRound(
|
|
10
|
+
roundIndex: number,
|
|
11
|
+
debateRoundFocus?: PlanDebateFocus | null,
|
|
12
|
+
): DebateLaneKind[] {
|
|
13
|
+
const lanes: DebateLaneKind[] = ["validation-turn", "adversary-brief"];
|
|
14
|
+
if (roundIndex === 1) {
|
|
15
|
+
lanes.unshift("hypothesis-validation");
|
|
16
|
+
}
|
|
17
|
+
if (roundIndex >= 4 || debateRoundFocus === "quality") {
|
|
18
|
+
lanes.push("sprint-audit");
|
|
19
|
+
}
|
|
20
|
+
return lanes;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** Relative artifact paths for lane YAML + review-round. */
|
|
24
|
+
export function laneArtifactPathsForRound(
|
|
25
|
+
roundIndex: number,
|
|
26
|
+
debateRoundFocus?: PlanDebateFocus | null,
|
|
27
|
+
): string[] {
|
|
28
|
+
const paths = lanesForRound(roundIndex, debateRoundFocus).map((lane) => {
|
|
29
|
+
switch (lane) {
|
|
30
|
+
case "hypothesis-validation":
|
|
31
|
+
return `artifacts/hypothesis-validation-r${roundIndex}.yaml`;
|
|
32
|
+
case "validation-turn":
|
|
33
|
+
return `artifacts/validation-turn-r${roundIndex}.yaml`;
|
|
34
|
+
case "adversary-brief":
|
|
35
|
+
return `artifacts/adversary-brief-r${roundIndex}.yaml`;
|
|
36
|
+
case "sprint-audit":
|
|
37
|
+
return `artifacts/sprint-audit-r${roundIndex}.yaml`;
|
|
38
|
+
default:
|
|
39
|
+
return `artifacts/${lane}-r${roundIndex}.yaml`;
|
|
40
|
+
}
|
|
41
|
+
});
|
|
42
|
+
paths.push(`artifacts/review-round-r${roundIndex}.yaml`);
|
|
43
|
+
return paths;
|
|
44
|
+
}
|