ultimate-pi 0.14.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-debate-plan/SKILL.md +41 -61
- package/.agents/skills/harness-orchestration/SKILL.md +2 -2
- package/.agents/skills/harness-plan/SKILL.md +10 -8
- package/.pi/agents/harness/planning/decompose.md +4 -2
- package/.pi/agents/harness/planning/execution-plan-author.md +25 -14
- package/.pi/agents/harness/planning/hypothesis-validator.md +21 -5
- package/.pi/agents/harness/planning/implementation-researcher.md +42 -0
- package/.pi/agents/harness/planning/plan-adversary.md +19 -3
- package/.pi/agents/harness/planning/plan-evaluator.md +26 -5
- package/.pi/agents/harness/planning/review-integrator.md +23 -9
- package/.pi/agents/harness/planning/scout-graphify.md +1 -1
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +19 -4
- package/.pi/agents/harness/planning/stack-researcher.md +19 -10
- package/.pi/extensions/harness-debate-tools.ts +238 -16
- package/.pi/extensions/harness-live-widget.ts +39 -159
- package/.pi/extensions/harness-plan-approval.ts +47 -5
- package/.pi/extensions/lib/debate-bus-core.ts +69 -15
- package/.pi/extensions/lib/debate-bus-state.ts +6 -0
- package/.pi/extensions/lib/plan-approval/plan-review.ts +56 -0
- package/.pi/extensions/lib/plan-approval/types.ts +1 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +214 -0
- package/.pi/extensions/lib/plan-debate-focus.ts +151 -0
- package/.pi/extensions/lib/plan-debate-gate.ts +77 -34
- package/.pi/extensions/lib/plan-debate-lanes.ts +44 -0
- package/.pi/extensions/lib/plan-debate-round-status.ts +63 -20
- package/.pi/extensions/lib/plan-messenger.ts +93 -17
- package/.pi/extensions/policy-gate.ts +1 -1
- package/.pi/harness/README.md +1 -1
- package/.pi/harness/agents.manifest.json +15 -11
- package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +1 -3
- package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +13 -5
- package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +51 -0
- package/.pi/harness/docs/adrs/README.md +2 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r1.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r2.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/research-brief.yaml +62 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r2.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r3.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +29 -0
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +97 -16
- package/.pi/harness/specs/plan-implementation-research-brief.schema.json +128 -0
- package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
- package/.pi/harness/specs/round-result.schema.json +15 -2
- package/.pi/lib/harness-ui-state.ts +92 -0
- package/.pi/prompts/harness-plan.md +87 -37
- package/.pi/prompts/planning-rubrics.md +31 -0
- package/CHANGELOG.md +11 -0
- package/package.json +2 -2
|
@@ -5,32 +5,20 @@
|
|
|
5
5
|
import { constants } from "node:fs";
|
|
6
6
|
import { access, readFile } from "node:fs/promises";
|
|
7
7
|
import { join } from "node:path";
|
|
8
|
+
import { capsForDebate } from "./debate-bus-core.js";
|
|
9
|
+
import {
|
|
10
|
+
getPlanFocusCoverage,
|
|
11
|
+
type PlanDebateFocus,
|
|
12
|
+
planDebateOutcomeComplete,
|
|
13
|
+
} from "./plan-debate-focus.js";
|
|
8
14
|
import { planDebateIdForRun } from "./plan-debate-id.js";
|
|
15
|
+
import { laneArtifactPathsForRound } from "./plan-debate-lanes.js";
|
|
9
16
|
import {
|
|
10
17
|
getMessengerRoundState,
|
|
11
18
|
loadMessengerState,
|
|
12
19
|
messengerRoundDebateReady,
|
|
13
20
|
} from "./plan-messenger.js";
|
|
14
21
|
|
|
15
|
-
const PLAN_ROUNDS = 4;
|
|
16
|
-
const FOCUS_BY_ROUND = ["spec", "wbs", "schedule", "quality"] as const;
|
|
17
|
-
|
|
18
|
-
function laneFilesForRound(roundIndex: number): string[] {
|
|
19
|
-
const n = roundIndex;
|
|
20
|
-
const lanes = [
|
|
21
|
-
`artifacts/validation-turn-r${n}.yaml`,
|
|
22
|
-
`artifacts/adversary-brief-r${n}.yaml`,
|
|
23
|
-
];
|
|
24
|
-
if (n === 1) {
|
|
25
|
-
lanes.unshift(`artifacts/hypothesis-validation-r${n}.yaml`);
|
|
26
|
-
}
|
|
27
|
-
if (n === 4) {
|
|
28
|
-
lanes.push(`artifacts/sprint-audit-r${n}.yaml`);
|
|
29
|
-
}
|
|
30
|
-
lanes.push(`artifacts/review-round-r${n}.yaml`);
|
|
31
|
-
return lanes;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
22
|
async function fileExists(path: string): Promise<boolean> {
|
|
35
23
|
try {
|
|
36
24
|
await access(path, constants.R_OK);
|
|
@@ -64,6 +52,12 @@ export interface PlanDebateGateResult {
|
|
|
64
52
|
errors: string[];
|
|
65
53
|
warnings: string[];
|
|
66
54
|
debateId: string;
|
|
55
|
+
focus_coverage?: {
|
|
56
|
+
covered: string[];
|
|
57
|
+
missing: string[];
|
|
58
|
+
last_review_gate_ready: boolean;
|
|
59
|
+
};
|
|
60
|
+
debate_profile?: string;
|
|
67
61
|
}
|
|
68
62
|
|
|
69
63
|
export async function validatePlanDebateGate(
|
|
@@ -75,16 +69,47 @@ export async function validatePlanDebateGate(
|
|
|
75
69
|
const debateId = planDebateIdForRun(runId);
|
|
76
70
|
const runDir = join(projectRoot, ".pi", "harness", "runs", runId);
|
|
77
71
|
const debatesDir = join(projectRoot, ".pi", "harness", "debates");
|
|
72
|
+
const messenger = await loadMessengerState(runDir);
|
|
73
|
+
const debateProfile = messenger?.debate_profile ?? "standard";
|
|
74
|
+
const requiredFocuses: readonly PlanDebateFocus[] =
|
|
75
|
+
messenger?.required_focuses && messenger.required_focuses.length > 0
|
|
76
|
+
? messenger.required_focuses
|
|
77
|
+
: (["spec", "wbs", "schedule", "quality"] as const);
|
|
78
|
+
const caps = capsForDebate(debateId, debateProfile);
|
|
79
|
+
const coverage = await getPlanFocusCoverage(runDir, { requiredFocuses });
|
|
80
|
+
const dialogueOpts = {
|
|
81
|
+
max_exchanges_per_round: caps.max_exchanges_per_round,
|
|
82
|
+
};
|
|
78
83
|
|
|
79
|
-
for (
|
|
80
|
-
|
|
84
|
+
for (const focus of coverage.missing) {
|
|
85
|
+
errors.push(`focus not covered in submitted rounds: ${focus}`);
|
|
86
|
+
}
|
|
87
|
+
if (!coverage.last_review_gate_ready) {
|
|
88
|
+
errors.push("last submitted review round has review_gate_ready !== true");
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const roundIndices = [
|
|
92
|
+
...new Set(
|
|
93
|
+
Object.values(coverage.rounds_by_focus).filter(
|
|
94
|
+
(v): v is number => typeof v === "number",
|
|
95
|
+
),
|
|
96
|
+
),
|
|
97
|
+
];
|
|
98
|
+
for (const r of roundIndices) {
|
|
99
|
+
const focus = coverage.focus_by_round[r] ?? null;
|
|
100
|
+
for (const rel of laneArtifactPathsForRound(r, focus)) {
|
|
81
101
|
const abs = join(runDir, rel);
|
|
82
102
|
if (!(await fileExists(abs))) {
|
|
83
103
|
errors.push(`missing ${rel}`);
|
|
84
104
|
}
|
|
85
105
|
}
|
|
86
106
|
const roundState = await getMessengerRoundState(runDir, r);
|
|
87
|
-
const
|
|
107
|
+
const requireSprint = focus === "quality" || r >= 4;
|
|
108
|
+
const messengerCheck = messengerRoundDebateReady(
|
|
109
|
+
roundState,
|
|
110
|
+
requireSprint,
|
|
111
|
+
dialogueOpts,
|
|
112
|
+
);
|
|
88
113
|
if (!messengerCheck.ok) {
|
|
89
114
|
for (const e of messengerCheck.errors) {
|
|
90
115
|
errors.push(`round ${r} messenger: ${e}`);
|
|
@@ -92,7 +117,12 @@ export async function validatePlanDebateGate(
|
|
|
92
117
|
}
|
|
93
118
|
}
|
|
94
119
|
|
|
95
|
-
|
|
120
|
+
if (coverage.last_round_index > caps.max_rounds) {
|
|
121
|
+
errors.push(
|
|
122
|
+
`round_count ${coverage.last_round_index} exceeds max_rounds ${caps.max_rounds}`,
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
|
|
96
126
|
if (!messenger) {
|
|
97
127
|
errors.push(
|
|
98
128
|
"debate-messenger/state.json missing — call harness_debate_open",
|
|
@@ -103,9 +133,10 @@ export async function validatePlanDebateGate(
|
|
|
103
133
|
|
|
104
134
|
const jsonlPath = join(debatesDir, `${debateId}.jsonl`);
|
|
105
135
|
const { rounds, hasConsensus } = await countJsonlKinds(jsonlPath);
|
|
106
|
-
|
|
136
|
+
const minRounds = caps.min_focus_rounds;
|
|
137
|
+
if (rounds < minRounds) {
|
|
107
138
|
errors.push(
|
|
108
|
-
`${debateId}.jsonl has ${rounds}/${
|
|
139
|
+
`${debateId}.jsonl has ${rounds}/${minRounds} minimum round events — use harness_debate_submit_round per focus`,
|
|
109
140
|
);
|
|
110
141
|
}
|
|
111
142
|
if (!hasConsensus) {
|
|
@@ -114,6 +145,17 @@ export async function validatePlanDebateGate(
|
|
|
114
145
|
);
|
|
115
146
|
}
|
|
116
147
|
|
|
148
|
+
if (
|
|
149
|
+
!planDebateOutcomeComplete(coverage, {
|
|
150
|
+
requiredFocuses,
|
|
151
|
+
minRoundIndex: caps.min_focus_rounds,
|
|
152
|
+
})
|
|
153
|
+
) {
|
|
154
|
+
errors.push(
|
|
155
|
+
`debate outcome incomplete: required focuses [${requiredFocuses.join(", ")}] with last review_gate_ready true (profile=${debateProfile})`,
|
|
156
|
+
);
|
|
157
|
+
}
|
|
158
|
+
|
|
117
159
|
const consensusPath = join(debatesDir, `${debateId}.consensus.json`);
|
|
118
160
|
if (!(await fileExists(consensusPath))) {
|
|
119
161
|
errors.push(`missing ${debateId}.consensus.json`);
|
|
@@ -129,15 +171,10 @@ export async function validatePlanDebateGate(
|
|
|
129
171
|
}
|
|
130
172
|
}
|
|
131
173
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
const raw = await readFile(reviewPath, "utf-8");
|
|
137
|
-
if (!raw.includes(focus)) {
|
|
138
|
-
warnings.push(`review-round-r${r + 1} may not match focus ${focus}`);
|
|
139
|
-
}
|
|
140
|
-
}
|
|
174
|
+
if (rounds > caps.max_rounds) {
|
|
175
|
+
warnings.push(
|
|
176
|
+
`bus round count ${rounds} exceeds soft max_rounds ${caps.max_rounds}`,
|
|
177
|
+
);
|
|
141
178
|
}
|
|
142
179
|
|
|
143
180
|
return {
|
|
@@ -145,6 +182,12 @@ export async function validatePlanDebateGate(
|
|
|
145
182
|
errors,
|
|
146
183
|
warnings,
|
|
147
184
|
debateId,
|
|
185
|
+
focus_coverage: {
|
|
186
|
+
covered: coverage.covered,
|
|
187
|
+
missing: coverage.missing,
|
|
188
|
+
last_review_gate_ready: coverage.last_review_gate_ready,
|
|
189
|
+
},
|
|
190
|
+
debate_profile: debateProfile,
|
|
148
191
|
};
|
|
149
192
|
}
|
|
150
193
|
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared Review Gate lane list for a round (gate + round-status).
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { PlanDebateFocus } from "./plan-debate-focus.js";
|
|
6
|
+
import type { DebateLaneKind } from "./plan-debate-lane.js";
|
|
7
|
+
|
|
8
|
+
/** Lanes required before review-integrator for this round. */
|
|
9
|
+
export function lanesForRound(
|
|
10
|
+
roundIndex: number,
|
|
11
|
+
debateRoundFocus?: PlanDebateFocus | null,
|
|
12
|
+
): DebateLaneKind[] {
|
|
13
|
+
const lanes: DebateLaneKind[] = ["validation-turn", "adversary-brief"];
|
|
14
|
+
if (roundIndex === 1) {
|
|
15
|
+
lanes.unshift("hypothesis-validation");
|
|
16
|
+
}
|
|
17
|
+
if (roundIndex >= 4 || debateRoundFocus === "quality") {
|
|
18
|
+
lanes.push("sprint-audit");
|
|
19
|
+
}
|
|
20
|
+
return lanes;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** Relative artifact paths for lane YAML + review-round. */
|
|
24
|
+
export function laneArtifactPathsForRound(
|
|
25
|
+
roundIndex: number,
|
|
26
|
+
debateRoundFocus?: PlanDebateFocus | null,
|
|
27
|
+
): string[] {
|
|
28
|
+
const paths = lanesForRound(roundIndex, debateRoundFocus).map((lane) => {
|
|
29
|
+
switch (lane) {
|
|
30
|
+
case "hypothesis-validation":
|
|
31
|
+
return `artifacts/hypothesis-validation-r${roundIndex}.yaml`;
|
|
32
|
+
case "validation-turn":
|
|
33
|
+
return `artifacts/validation-turn-r${roundIndex}.yaml`;
|
|
34
|
+
case "adversary-brief":
|
|
35
|
+
return `artifacts/adversary-brief-r${roundIndex}.yaml`;
|
|
36
|
+
case "sprint-audit":
|
|
37
|
+
return `artifacts/sprint-audit-r${roundIndex}.yaml`;
|
|
38
|
+
default:
|
|
39
|
+
return `artifacts/${lane}-r${roundIndex}.yaml`;
|
|
40
|
+
}
|
|
41
|
+
});
|
|
42
|
+
paths.push(`artifacts/review-round-r${roundIndex}.yaml`);
|
|
43
|
+
return paths;
|
|
44
|
+
}
|
|
@@ -5,10 +5,18 @@
|
|
|
5
5
|
import { constants } from "node:fs";
|
|
6
6
|
import { access } from "node:fs/promises";
|
|
7
7
|
import { join } from "node:path";
|
|
8
|
-
import {
|
|
8
|
+
import { capsForDebate } from "./debate-bus-core.js";
|
|
9
|
+
import {
|
|
10
|
+
type PlanDebateFocus,
|
|
11
|
+
readDebateRoundFocus,
|
|
12
|
+
} from "./plan-debate-focus.js";
|
|
13
|
+
import { planDebateIdForRun } from "./plan-debate-id.js";
|
|
14
|
+
import { laneArtifactPath } from "./plan-debate-lane.js";
|
|
15
|
+
import { lanesForRound } from "./plan-debate-lanes.js";
|
|
9
16
|
import {
|
|
10
17
|
getMessengerRoundState,
|
|
11
|
-
|
|
18
|
+
loadMessengerState,
|
|
19
|
+
messengerRoundDialogueReady,
|
|
12
20
|
} from "./plan-messenger.js";
|
|
13
21
|
|
|
14
22
|
async function exists(path: string): Promise<boolean> {
|
|
@@ -20,39 +28,50 @@ async function exists(path: string): Promise<boolean> {
|
|
|
20
28
|
}
|
|
21
29
|
}
|
|
22
30
|
|
|
23
|
-
function lanesForRound(roundIndex: number): DebateLaneKind[] {
|
|
24
|
-
const lanes: DebateLaneKind[] = ["validation-turn", "adversary-brief"];
|
|
25
|
-
if (roundIndex === 1) lanes.unshift("hypothesis-validation");
|
|
26
|
-
if (roundIndex === 4) lanes.push("sprint-audit");
|
|
27
|
-
return lanes;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
31
|
export interface RoundStatusResult {
|
|
31
32
|
round_index: number;
|
|
32
|
-
/** Lane YAML + messenger
|
|
33
|
+
/** Lane YAML + messenger dialogue complete; spawn integrator next. */
|
|
33
34
|
ready_for_integrator: boolean;
|
|
34
35
|
/** review-round-rN.yaml on disk (call harness_debate_submit_round if bus not updated). */
|
|
35
36
|
review_round_on_disk: boolean;
|
|
36
37
|
missing: string[];
|
|
37
38
|
next_tool?: string;
|
|
38
39
|
messenger: { ok: boolean; errors: string[] };
|
|
40
|
+
dialogue: { ok: boolean; errors: string[] };
|
|
41
|
+
unresolved_claim_ids: string[];
|
|
42
|
+
exchange_count: number;
|
|
43
|
+
debate_round_focus?: PlanDebateFocus | null;
|
|
39
44
|
}
|
|
40
45
|
|
|
41
46
|
export async function getPlanDebateRoundStatus(
|
|
42
47
|
runDir: string,
|
|
43
48
|
roundIndex: number,
|
|
49
|
+
runId?: string,
|
|
50
|
+
opts?: { debate_round_focus?: PlanDebateFocus },
|
|
44
51
|
): Promise<RoundStatusResult> {
|
|
52
|
+
const focus =
|
|
53
|
+
opts?.debate_round_focus ??
|
|
54
|
+
(await readDebateRoundFocus(runDir, roundIndex));
|
|
45
55
|
const missing: string[] = [];
|
|
46
|
-
for (const lane of lanesForRound(roundIndex)) {
|
|
56
|
+
for (const lane of lanesForRound(roundIndex, focus)) {
|
|
47
57
|
const rel = laneArtifactPath(lane, roundIndex);
|
|
48
58
|
if (!(await exists(join(runDir, rel)))) {
|
|
49
59
|
missing.push(rel);
|
|
50
60
|
}
|
|
51
61
|
}
|
|
62
|
+
const messengerState = await loadMessengerState(runDir);
|
|
63
|
+
const profile = messengerState?.debate_profile;
|
|
64
|
+
const caps = capsForDebate(
|
|
65
|
+
runId ? planDebateIdForRun(runId) : `plan-${runId ?? "unknown"}`,
|
|
66
|
+
profile,
|
|
67
|
+
);
|
|
52
68
|
const roundState = await getMessengerRoundState(runDir, roundIndex);
|
|
53
|
-
const
|
|
54
|
-
|
|
55
|
-
|
|
69
|
+
const dialogueOpts = {
|
|
70
|
+
max_exchanges_per_round: caps.max_exchanges_per_round,
|
|
71
|
+
};
|
|
72
|
+
const dialogue = messengerRoundDialogueReady(roundState, dialogueOpts);
|
|
73
|
+
if (!dialogue.ok) {
|
|
74
|
+
missing.push(...dialogue.errors.map((e) => `messenger: ${e}`));
|
|
56
75
|
}
|
|
57
76
|
const reviewRound = `artifacts/review-round-r${roundIndex}.yaml`;
|
|
58
77
|
const reviewRoundOnDisk = await exists(join(runDir, reviewRound));
|
|
@@ -62,14 +81,35 @@ export async function getPlanDebateRoundStatus(
|
|
|
62
81
|
next_tool = "subagent harness/planning/hypothesis-validator";
|
|
63
82
|
} else if (missing.some((m) => m.includes("validation-turn"))) {
|
|
64
83
|
next_tool = "subagent harness/planning/plan-evaluator";
|
|
84
|
+
} else if (
|
|
85
|
+
missing.some((m) => m.includes("adversary-brief")) &&
|
|
86
|
+
!roundState?.evaluator_posted
|
|
87
|
+
) {
|
|
88
|
+
next_tool = "subagent harness/planning/plan-evaluator";
|
|
65
89
|
} else if (missing.some((m) => m.includes("adversary-brief"))) {
|
|
66
90
|
next_tool =
|
|
67
91
|
"harness_messenger_read_round then subagent harness/planning/plan-adversary";
|
|
68
92
|
} else if (missing.some((m) => m.includes("sprint-audit"))) {
|
|
69
93
|
next_tool = "subagent harness/planning/sprint-contract-auditor";
|
|
70
|
-
} else if (
|
|
94
|
+
} else if (
|
|
95
|
+
roundState &&
|
|
96
|
+
roundState.evaluator_posted &&
|
|
97
|
+
!roundState.adversary_posted
|
|
98
|
+
) {
|
|
99
|
+
next_tool =
|
|
100
|
+
"harness_messenger_read_round then subagent harness/planning/plan-adversary";
|
|
101
|
+
} else if (
|
|
102
|
+
roundState &&
|
|
103
|
+
roundState.unresolved_claim_ids.length > 0 &&
|
|
104
|
+
roundState.exchange_count < caps.max_exchanges_per_round
|
|
105
|
+
) {
|
|
106
|
+
const spawnEvaluator = roundState.exchange_count % 2 === 1;
|
|
107
|
+
next_tool = spawnEvaluator
|
|
108
|
+
? "harness_debate_advance_thread → harness_messenger_read_round → subagent harness/planning/plan-evaluator (clarification; address unresolved claim_ids)"
|
|
109
|
+
: "harness_debate_advance_thread → harness_messenger_read_round → subagent harness/planning/plan-adversary (counter or concede)";
|
|
110
|
+
} else if (!dialogue.ok) {
|
|
71
111
|
next_tool =
|
|
72
|
-
"harness_debate_apply_lane (evaluator/adversary)
|
|
112
|
+
"harness_debate_advance_thread or harness_debate_apply_lane (evaluator/adversary)";
|
|
73
113
|
} else if (!reviewRoundOnDisk) {
|
|
74
114
|
next_tool =
|
|
75
115
|
"subagent harness/planning/review-integrator then harness_debate_submit_round";
|
|
@@ -78,10 +118,9 @@ export async function getPlanDebateRoundStatus(
|
|
|
78
118
|
"harness_debate_submit_round with integrator draft from review-round file";
|
|
79
119
|
}
|
|
80
120
|
|
|
121
|
+
const laneMissing = missing.filter((m) => !m.startsWith("messenger"));
|
|
81
122
|
const readyForIntegrator =
|
|
82
|
-
|
|
83
|
-
missing.filter((m) => !m.startsWith("messenger")).length === 0 &&
|
|
84
|
-
!reviewRoundOnDisk;
|
|
123
|
+
dialogue.ok && laneMissing.length === 0 && !reviewRoundOnDisk;
|
|
85
124
|
|
|
86
125
|
return {
|
|
87
126
|
round_index: roundIndex,
|
|
@@ -89,6 +128,10 @@ export async function getPlanDebateRoundStatus(
|
|
|
89
128
|
review_round_on_disk: reviewRoundOnDisk,
|
|
90
129
|
missing,
|
|
91
130
|
next_tool,
|
|
92
|
-
messenger,
|
|
131
|
+
messenger: dialogue,
|
|
132
|
+
dialogue,
|
|
133
|
+
unresolved_claim_ids: roundState?.unresolved_claim_ids ?? [],
|
|
134
|
+
exchange_count: roundState?.exchange_count ?? 0,
|
|
135
|
+
debate_round_focus: focus,
|
|
93
136
|
};
|
|
94
137
|
}
|
|
@@ -17,11 +17,15 @@ import {
|
|
|
17
17
|
} from "node:fs/promises";
|
|
18
18
|
import { join } from "node:path";
|
|
19
19
|
import type { DebateParticipant } from "../../lib/debate-orchestrator-types.js";
|
|
20
|
+
import type { DebateProfile } from "./plan-debate-eligibility.js";
|
|
21
|
+
import type { PlanDebateFocus } from "./plan-debate-focus.js";
|
|
20
22
|
|
|
21
23
|
export type MessengerMessageKind =
|
|
22
24
|
| "system"
|
|
23
25
|
| "claim"
|
|
24
26
|
| "rebuttal"
|
|
27
|
+
| "clarification"
|
|
28
|
+
| "counter"
|
|
25
29
|
| "integrate"
|
|
26
30
|
| "audit";
|
|
27
31
|
|
|
@@ -47,6 +51,8 @@ export interface MessengerRoundState {
|
|
|
47
51
|
integrator_posted: boolean;
|
|
48
52
|
claim_count: number;
|
|
49
53
|
rebuttal_count: number;
|
|
54
|
+
exchange_count: number;
|
|
55
|
+
unresolved_claim_ids: string[];
|
|
50
56
|
}
|
|
51
57
|
|
|
52
58
|
export interface MessengerState {
|
|
@@ -55,6 +61,8 @@ export interface MessengerState {
|
|
|
55
61
|
debate_id: string;
|
|
56
62
|
opened_at: string;
|
|
57
63
|
rounds: Record<string, MessengerRoundState>;
|
|
64
|
+
debate_profile?: DebateProfile;
|
|
65
|
+
required_focuses?: PlanDebateFocus[];
|
|
58
66
|
}
|
|
59
67
|
|
|
60
68
|
function messengerRoot(runDir: string): string {
|
|
@@ -71,7 +79,12 @@ function roundKey(roundIndex: number): string {
|
|
|
71
79
|
|
|
72
80
|
export async function initPlanMessenger(
|
|
73
81
|
runDir: string,
|
|
74
|
-
opts: {
|
|
82
|
+
opts: {
|
|
83
|
+
runId: string;
|
|
84
|
+
debateId: string;
|
|
85
|
+
debate_profile?: DebateProfile;
|
|
86
|
+
required_focuses?: PlanDebateFocus[];
|
|
87
|
+
},
|
|
75
88
|
): Promise<string> {
|
|
76
89
|
const root = messengerRoot(runDir);
|
|
77
90
|
await mkdir(join(root, "inbox"), { recursive: true });
|
|
@@ -82,6 +95,8 @@ export async function initPlanMessenger(
|
|
|
82
95
|
debate_id: opts.debateId,
|
|
83
96
|
opened_at: nowIso(),
|
|
84
97
|
rounds: {},
|
|
98
|
+
debate_profile: opts.debate_profile,
|
|
99
|
+
required_focuses: opts.required_focuses,
|
|
85
100
|
};
|
|
86
101
|
await writeFile(
|
|
87
102
|
join(root, "state.json"),
|
|
@@ -122,9 +137,51 @@ function defaultRoundState(roundIndex: number): MessengerRoundState {
|
|
|
122
137
|
integrator_posted: false,
|
|
123
138
|
claim_count: 0,
|
|
124
139
|
rebuttal_count: 0,
|
|
140
|
+
exchange_count: 0,
|
|
141
|
+
unresolved_claim_ids: [],
|
|
125
142
|
};
|
|
126
143
|
}
|
|
127
144
|
|
|
145
|
+
/** Recompute exchange + unresolved claim ids from a round transcript. */
|
|
146
|
+
export function syncRoundStateFromTranscript(
|
|
147
|
+
round: MessengerRoundState,
|
|
148
|
+
messages: MessengerMessage[],
|
|
149
|
+
): MessengerRoundState {
|
|
150
|
+
const claimed = new Set<string>();
|
|
151
|
+
const resolved = new Set<string>();
|
|
152
|
+
let exchange_count = 0;
|
|
153
|
+
|
|
154
|
+
for (const m of messages) {
|
|
155
|
+
if (m.from === "PlanEvaluatorAgent" && m.kind === "claim") {
|
|
156
|
+
round.evaluator_posted = true;
|
|
157
|
+
round.claim_count += m.claim_ids.length || 1;
|
|
158
|
+
for (const id of m.claim_ids) claimed.add(id);
|
|
159
|
+
}
|
|
160
|
+
if (m.from === "PlanAdversaryAgent" && m.kind === "rebuttal") {
|
|
161
|
+
round.adversary_posted = true;
|
|
162
|
+
round.rebuttal_count += m.in_reply_to.length || 1;
|
|
163
|
+
exchange_count += 1;
|
|
164
|
+
}
|
|
165
|
+
if (m.from === "PlanEvaluatorAgent" && m.kind === "clarification") {
|
|
166
|
+
exchange_count += 1;
|
|
167
|
+
for (const id of m.claim_ids) resolved.add(id);
|
|
168
|
+
for (const id of m.in_reply_to) resolved.add(id);
|
|
169
|
+
}
|
|
170
|
+
if (m.from === "PlanAdversaryAgent" && m.kind === "counter") {
|
|
171
|
+
exchange_count += 1;
|
|
172
|
+
for (const id of m.claim_ids) resolved.add(id);
|
|
173
|
+
for (const id of m.in_reply_to) resolved.add(id);
|
|
174
|
+
}
|
|
175
|
+
if (m.from === "ReviewIntegratorAgent" && m.kind === "integrate") {
|
|
176
|
+
round.integrator_posted = true;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
round.exchange_count = exchange_count;
|
|
181
|
+
round.unresolved_claim_ids = [...claimed].filter((id) => !resolved.has(id));
|
|
182
|
+
return round;
|
|
183
|
+
}
|
|
184
|
+
|
|
128
185
|
export async function postMessengerMessage(
|
|
129
186
|
runDir: string,
|
|
130
187
|
msg: Omit<MessengerMessage, "schema_version" | "id" | "ts"> & {
|
|
@@ -172,19 +229,10 @@ export async function postMessengerMessage(
|
|
|
172
229
|
rounds: {},
|
|
173
230
|
};
|
|
174
231
|
const key = roundKey(full.round_index);
|
|
232
|
+
const messages = await readRoundTranscript(runDir, full.round_index);
|
|
233
|
+
messages.push(full);
|
|
175
234
|
const round = state.rounds[key] ?? defaultRoundState(full.round_index);
|
|
176
|
-
|
|
177
|
-
round.evaluator_posted = true;
|
|
178
|
-
round.claim_count += full.claim_ids.length || 1;
|
|
179
|
-
}
|
|
180
|
-
if (full.from === "PlanAdversaryAgent" && full.kind === "rebuttal") {
|
|
181
|
-
round.adversary_posted = true;
|
|
182
|
-
round.rebuttal_count += full.in_reply_to.length || 1;
|
|
183
|
-
}
|
|
184
|
-
if (full.from === "ReviewIntegratorAgent" && full.kind === "integrate") {
|
|
185
|
-
round.integrator_posted = true;
|
|
186
|
-
}
|
|
187
|
-
state.rounds[key] = round;
|
|
235
|
+
state.rounds[key] = syncRoundStateFromTranscript(round, messages);
|
|
188
236
|
await saveMessengerState(runDir, state);
|
|
189
237
|
return full;
|
|
190
238
|
}
|
|
@@ -233,13 +281,22 @@ export async function getMessengerRoundState(
|
|
|
233
281
|
): Promise<MessengerRoundState | null> {
|
|
234
282
|
const state = await loadMessengerState(runDir);
|
|
235
283
|
if (!state) return null;
|
|
236
|
-
|
|
284
|
+
const round = state.rounds[roundKey(roundIndex)];
|
|
285
|
+
if (!round) return null;
|
|
286
|
+
const transcript = await readRoundTranscript(runDir, roundIndex);
|
|
287
|
+
return syncRoundStateFromTranscript({ ...round }, transcript);
|
|
237
288
|
}
|
|
238
289
|
|
|
239
|
-
export
|
|
290
|
+
export interface MessengerDialogueOptions {
|
|
291
|
+
max_exchanges_per_round?: number;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/** Evaluator + adversary dialogue settled; safe to spawn integrator. */
|
|
295
|
+
export function messengerRoundDialogueReady(
|
|
240
296
|
round: MessengerRoundState | null,
|
|
241
|
-
|
|
297
|
+
opts: MessengerDialogueOptions = {},
|
|
242
298
|
): { ok: boolean; errors: string[] } {
|
|
299
|
+
const maxExchanges = opts.max_exchanges_per_round ?? 3;
|
|
243
300
|
const errors: string[] = [];
|
|
244
301
|
if (!round) {
|
|
245
302
|
errors.push("no messenger activity for this round");
|
|
@@ -257,7 +314,26 @@ export function messengerRoundDebateReady(
|
|
|
257
314
|
if (round.rebuttal_count < 1) {
|
|
258
315
|
errors.push("adversary must rebut at least one claim (in_reply_to)");
|
|
259
316
|
}
|
|
260
|
-
|
|
317
|
+
const dialogueSettled =
|
|
318
|
+
round.unresolved_claim_ids.length === 0 ||
|
|
319
|
+
round.exchange_count >= maxExchanges;
|
|
320
|
+
if (!dialogueSettled) {
|
|
321
|
+
errors.push(
|
|
322
|
+
`unresolved claims remain (${round.unresolved_claim_ids.join(", ")}) and exchange_count ${round.exchange_count} < ${maxExchanges}`,
|
|
323
|
+
);
|
|
324
|
+
}
|
|
325
|
+
return { ok: errors.length === 0, errors };
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
/** Full round ready for harness_debate_submit_round (includes integrator). */
|
|
329
|
+
export function messengerRoundDebateReady(
|
|
330
|
+
round: MessengerRoundState | null,
|
|
331
|
+
_requireSprintAudit: boolean,
|
|
332
|
+
opts: MessengerDialogueOptions = {},
|
|
333
|
+
): { ok: boolean; errors: string[] } {
|
|
334
|
+
const dialogue = messengerRoundDialogueReady(round, opts);
|
|
335
|
+
const errors = [...dialogue.errors];
|
|
336
|
+
if (!round?.integrator_posted) {
|
|
261
337
|
errors.push(
|
|
262
338
|
"ReviewIntegratorAgent must post integrate message before bus submit",
|
|
263
339
|
);
|
|
@@ -243,7 +243,7 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
243
243
|
|
|
244
244
|
const planPhaseHint =
|
|
245
245
|
state.phase === "plan"
|
|
246
|
-
? "\nPlan phase: scouts → decompose → hypothesis → stack-researcher → execution-plan-author → validate-plan-dag →
|
|
246
|
+
? "\nPlan phase: scouts → decompose → hypothesis → implementation-researcher + stack-researcher → execution-plan-author → validate-plan-dag → debate eligibility + Review Gate → approve_plan → create_plan (YAML plan-packet.yaml). Post-execute: /harness-critic."
|
|
247
247
|
: "";
|
|
248
248
|
|
|
249
249
|
return {
|
package/.pi/harness/README.md
CHANGED
|
@@ -30,7 +30,7 @@ under `.pi/extensions/` and auto-loaded through the package `pi.extensions`
|
|
|
30
30
|
manifest (`package.json`).
|
|
31
31
|
|
|
32
32
|
- `harness-run-context.ts` - active run + plan injection; short commands without run/plan args
|
|
33
|
-
- `harness-live-widget.ts` - footer status (phase
|
|
33
|
+
- `harness-live-widget.ts` - footer status (current/next phase + plain-language status hint; no run id in UI)
|
|
34
34
|
- `policy-gate.ts` - phase state machine + plan-before-mutate enforcement
|
|
35
35
|
- `budget-guard.ts` - hard-stop token budget checks + budget exhausted artifacts
|
|
36
36
|
- `trace-recorder.ts` - append-only run traces + HarnessRunRecord + compact index
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schema_version": "1.0.0",
|
|
3
3
|
"package": "ultimate-pi",
|
|
4
|
-
"package_version": "0.
|
|
5
|
-
"generated_at": "2026-05-
|
|
4
|
+
"package_version": "0.14.0",
|
|
5
|
+
"generated_at": "2026-05-19T10:53:28.359Z",
|
|
6
6
|
"agents": {
|
|
7
7
|
"pi-pi/agent-expert": {
|
|
8
8
|
"path": ".pi/agents/pi-pi/agent-expert.md",
|
|
@@ -78,35 +78,39 @@
|
|
|
78
78
|
},
|
|
79
79
|
"harness/planning/decompose": {
|
|
80
80
|
"path": ".pi/agents/harness/planning/decompose.md",
|
|
81
|
-
"sha256": "
|
|
81
|
+
"sha256": "5c3b983772d013741d50f39945bc77f178aa338aecab56b93c09216d72192c69"
|
|
82
82
|
},
|
|
83
83
|
"harness/planning/execution-plan-author": {
|
|
84
84
|
"path": ".pi/agents/harness/planning/execution-plan-author.md",
|
|
85
|
-
"sha256": "
|
|
85
|
+
"sha256": "16f8800c50bcaf1b82ed9138889c8a0e538ee6a139aeae129ccd20cec2ec25f7"
|
|
86
86
|
},
|
|
87
87
|
"harness/planning/hypothesis-validator": {
|
|
88
88
|
"path": ".pi/agents/harness/planning/hypothesis-validator.md",
|
|
89
|
-
"sha256": "
|
|
89
|
+
"sha256": "9e68ec5d6aef96a3666c30227c3cbddf1aaed1182fdc94dbbd21ad3d48315ff2"
|
|
90
90
|
},
|
|
91
91
|
"harness/planning/hypothesis": {
|
|
92
92
|
"path": ".pi/agents/harness/planning/hypothesis.md",
|
|
93
93
|
"sha256": "b20c527d15c2243cd5d3a8f16cea6d44bdfd16e01915d42f3b830bf9938e5f8b"
|
|
94
94
|
},
|
|
95
|
+
"harness/planning/implementation-researcher": {
|
|
96
|
+
"path": ".pi/agents/harness/planning/implementation-researcher.md",
|
|
97
|
+
"sha256": "dbd1c4fc74d538b110d406febfd4603eebea77d82e8b367df4596ac7ff6e54cc"
|
|
98
|
+
},
|
|
95
99
|
"harness/planning/plan-adversary": {
|
|
96
100
|
"path": ".pi/agents/harness/planning/plan-adversary.md",
|
|
97
|
-
"sha256": "
|
|
101
|
+
"sha256": "7c14eaab65f356003ee2ff380f5d4e620170b5126daa67c3d226b12342f47bd2"
|
|
98
102
|
},
|
|
99
103
|
"harness/planning/plan-evaluator": {
|
|
100
104
|
"path": ".pi/agents/harness/planning/plan-evaluator.md",
|
|
101
|
-
"sha256": "
|
|
105
|
+
"sha256": "846575abe9df3e7e5be812c0c474989c1a9de8074a7884d77b9d3dd423643480"
|
|
102
106
|
},
|
|
103
107
|
"harness/planning/review-integrator": {
|
|
104
108
|
"path": ".pi/agents/harness/planning/review-integrator.md",
|
|
105
|
-
"sha256": "
|
|
109
|
+
"sha256": "bed43f3f049c279ac50a24bcffac1bbe46a8605d89c9cc6d0c3c6a87d488b1b8"
|
|
106
110
|
},
|
|
107
111
|
"harness/planning/scout-graphify": {
|
|
108
112
|
"path": ".pi/agents/harness/planning/scout-graphify.md",
|
|
109
|
-
"sha256": "
|
|
113
|
+
"sha256": "7f385d5bda2fe04b9da52cb4cb9247324efd345579b483d3ad55a6abefad50d5"
|
|
110
114
|
},
|
|
111
115
|
"harness/planning/scout-semantic": {
|
|
112
116
|
"path": ".pi/agents/harness/planning/scout-semantic.md",
|
|
@@ -118,11 +122,11 @@
|
|
|
118
122
|
},
|
|
119
123
|
"harness/planning/sprint-contract-auditor": {
|
|
120
124
|
"path": ".pi/agents/harness/planning/sprint-contract-auditor.md",
|
|
121
|
-
"sha256": "
|
|
125
|
+
"sha256": "d915274dc9b5addae5499bc2390b348eddeb8f133b526a816e23d0d19a2618bf"
|
|
122
126
|
},
|
|
123
127
|
"harness/planning/stack-researcher": {
|
|
124
128
|
"path": ".pi/agents/harness/planning/stack-researcher.md",
|
|
125
|
-
"sha256": "
|
|
129
|
+
"sha256": "fa228920abe2b66d4d8921c4a5d85593e3019a24bbe9ae512ed9149f235e3536"
|
|
126
130
|
}
|
|
127
131
|
}
|
|
128
132
|
}
|
|
@@ -13,9 +13,7 @@
|
|
|
13
13
|
- `harness/planning/decompose` — DeepMind-style problem decomposition (`PlanDecompositionBrief`)
|
|
14
14
|
- `harness/planning/hypothesis` — DARWIN hypothesis generation (`PlanHypothesisBrief`)
|
|
15
15
|
2. **Parent maps hypothesis → PlanPacket** — `plan-packet.schema.json` unchanged; execution gating stable.
|
|
16
|
-
3. **
|
|
17
|
-
- `harness/planning/plan-adversary` — execution risk on PlanPacket
|
|
18
|
-
- `harness/planning/hypothesis-eval` — blind self-eval (task + hypothesis only)
|
|
16
|
+
3. **Review Gate (ADR 0035):** outcome-based debate with `hypothesis-validator` on R1 (blind — task + hypothesis only). Retired `hypothesis-eval` as a separate pre-approval agent.
|
|
19
17
|
4. **`approve_plan` optional `research_brief`** — rendered in `plan-review.md`; not written to `plan-packet.json`.
|
|
20
18
|
5. **`--quick`** still skips semantic scout only; never skips decompose/hypothesis.
|
|
21
19
|
|