ultimate-pi 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/.agents/skills/harness-debate-plan/SKILL.md +41 -61
  2. package/.agents/skills/harness-orchestration/SKILL.md +2 -2
  3. package/.agents/skills/harness-plan/SKILL.md +10 -8
  4. package/.pi/agents/harness/planning/decompose.md +4 -2
  5. package/.pi/agents/harness/planning/execution-plan-author.md +25 -14
  6. package/.pi/agents/harness/planning/hypothesis-validator.md +21 -5
  7. package/.pi/agents/harness/planning/implementation-researcher.md +42 -0
  8. package/.pi/agents/harness/planning/plan-adversary.md +19 -3
  9. package/.pi/agents/harness/planning/plan-evaluator.md +26 -5
  10. package/.pi/agents/harness/planning/review-integrator.md +23 -9
  11. package/.pi/agents/harness/planning/scout-graphify.md +1 -1
  12. package/.pi/agents/harness/planning/sprint-contract-auditor.md +19 -4
  13. package/.pi/agents/harness/planning/stack-researcher.md +19 -10
  14. package/.pi/extensions/harness-debate-tools.ts +238 -16
  15. package/.pi/extensions/harness-live-widget.ts +39 -159
  16. package/.pi/extensions/harness-plan-approval.ts +47 -5
  17. package/.pi/extensions/lib/debate-bus-core.ts +69 -15
  18. package/.pi/extensions/lib/debate-bus-state.ts +6 -0
  19. package/.pi/extensions/lib/plan-approval/plan-review.ts +56 -0
  20. package/.pi/extensions/lib/plan-approval/types.ts +1 -0
  21. package/.pi/extensions/lib/plan-debate-eligibility.ts +214 -0
  22. package/.pi/extensions/lib/plan-debate-focus.ts +151 -0
  23. package/.pi/extensions/lib/plan-debate-gate.ts +77 -34
  24. package/.pi/extensions/lib/plan-debate-lanes.ts +44 -0
  25. package/.pi/extensions/lib/plan-debate-round-status.ts +63 -20
  26. package/.pi/extensions/lib/plan-messenger.ts +93 -17
  27. package/.pi/extensions/policy-gate.ts +1 -1
  28. package/.pi/harness/README.md +1 -1
  29. package/.pi/harness/agents.manifest.json +15 -11
  30. package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +1 -3
  31. package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +13 -5
  32. package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +51 -0
  33. package/.pi/harness/docs/adrs/README.md +2 -0
  34. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/implementation-research.yaml +28 -0
  35. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r1.yaml +24 -0
  36. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r2.yaml +25 -0
  37. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-packet.yaml +196 -0
  38. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-review.md +14 -0
  39. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/research-brief.yaml +62 -0
  40. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/implementation-research.yaml +28 -0
  41. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r2.yaml +24 -0
  42. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r3.yaml +24 -0
  43. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +29 -0
  44. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +97 -16
  45. package/.pi/harness/specs/plan-implementation-research-brief.schema.json +128 -0
  46. package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
  47. package/.pi/harness/specs/round-result.schema.json +15 -2
  48. package/.pi/lib/harness-ui-state.ts +92 -0
  49. package/.pi/prompts/harness-plan.md +87 -37
  50. package/.pi/prompts/planning-rubrics.md +31 -0
  51. package/CHANGELOG.md +11 -0
  52. package/package.json +2 -2
@@ -5,32 +5,20 @@
5
5
  import { constants } from "node:fs";
6
6
  import { access, readFile } from "node:fs/promises";
7
7
  import { join } from "node:path";
8
+ import { capsForDebate } from "./debate-bus-core.js";
9
+ import {
10
+ getPlanFocusCoverage,
11
+ type PlanDebateFocus,
12
+ planDebateOutcomeComplete,
13
+ } from "./plan-debate-focus.js";
8
14
  import { planDebateIdForRun } from "./plan-debate-id.js";
15
+ import { laneArtifactPathsForRound } from "./plan-debate-lanes.js";
9
16
  import {
10
17
  getMessengerRoundState,
11
18
  loadMessengerState,
12
19
  messengerRoundDebateReady,
13
20
  } from "./plan-messenger.js";
14
21
 
15
- const PLAN_ROUNDS = 4;
16
- const FOCUS_BY_ROUND = ["spec", "wbs", "schedule", "quality"] as const;
17
-
18
- function laneFilesForRound(roundIndex: number): string[] {
19
- const n = roundIndex;
20
- const lanes = [
21
- `artifacts/validation-turn-r${n}.yaml`,
22
- `artifacts/adversary-brief-r${n}.yaml`,
23
- ];
24
- if (n === 1) {
25
- lanes.unshift(`artifacts/hypothesis-validation-r${n}.yaml`);
26
- }
27
- if (n === 4) {
28
- lanes.push(`artifacts/sprint-audit-r${n}.yaml`);
29
- }
30
- lanes.push(`artifacts/review-round-r${n}.yaml`);
31
- return lanes;
32
- }
33
-
34
22
  async function fileExists(path: string): Promise<boolean> {
35
23
  try {
36
24
  await access(path, constants.R_OK);
@@ -64,6 +52,12 @@ export interface PlanDebateGateResult {
64
52
  errors: string[];
65
53
  warnings: string[];
66
54
  debateId: string;
55
+ focus_coverage?: {
56
+ covered: string[];
57
+ missing: string[];
58
+ last_review_gate_ready: boolean;
59
+ };
60
+ debate_profile?: string;
67
61
  }
68
62
 
69
63
  export async function validatePlanDebateGate(
@@ -75,16 +69,47 @@ export async function validatePlanDebateGate(
75
69
  const debateId = planDebateIdForRun(runId);
76
70
  const runDir = join(projectRoot, ".pi", "harness", "runs", runId);
77
71
  const debatesDir = join(projectRoot, ".pi", "harness", "debates");
72
+ const messenger = await loadMessengerState(runDir);
73
+ const debateProfile = messenger?.debate_profile ?? "standard";
74
+ const requiredFocuses: readonly PlanDebateFocus[] =
75
+ messenger?.required_focuses && messenger.required_focuses.length > 0
76
+ ? messenger.required_focuses
77
+ : (["spec", "wbs", "schedule", "quality"] as const);
78
+ const caps = capsForDebate(debateId, debateProfile);
79
+ const coverage = await getPlanFocusCoverage(runDir, { requiredFocuses });
80
+ const dialogueOpts = {
81
+ max_exchanges_per_round: caps.max_exchanges_per_round,
82
+ };
78
83
 
79
- for (let r = 1; r <= PLAN_ROUNDS; r++) {
80
- for (const rel of laneFilesForRound(r)) {
84
+ for (const focus of coverage.missing) {
85
+ errors.push(`focus not covered in submitted rounds: ${focus}`);
86
+ }
87
+ if (!coverage.last_review_gate_ready) {
88
+ errors.push("last submitted review round has review_gate_ready !== true");
89
+ }
90
+
91
+ const roundIndices = [
92
+ ...new Set(
93
+ Object.values(coverage.rounds_by_focus).filter(
94
+ (v): v is number => typeof v === "number",
95
+ ),
96
+ ),
97
+ ];
98
+ for (const r of roundIndices) {
99
+ const focus = coverage.focus_by_round[r] ?? null;
100
+ for (const rel of laneArtifactPathsForRound(r, focus)) {
81
101
  const abs = join(runDir, rel);
82
102
  if (!(await fileExists(abs))) {
83
103
  errors.push(`missing ${rel}`);
84
104
  }
85
105
  }
86
106
  const roundState = await getMessengerRoundState(runDir, r);
87
- const messengerCheck = messengerRoundDebateReady(roundState, r === 4);
107
+ const requireSprint = focus === "quality" || r >= 4;
108
+ const messengerCheck = messengerRoundDebateReady(
109
+ roundState,
110
+ requireSprint,
111
+ dialogueOpts,
112
+ );
88
113
  if (!messengerCheck.ok) {
89
114
  for (const e of messengerCheck.errors) {
90
115
  errors.push(`round ${r} messenger: ${e}`);
@@ -92,7 +117,12 @@ export async function validatePlanDebateGate(
92
117
  }
93
118
  }
94
119
 
95
- const messenger = await loadMessengerState(runDir);
120
+ if (coverage.last_round_index > caps.max_rounds) {
121
+ errors.push(
122
+ `round_count ${coverage.last_round_index} exceeds max_rounds ${caps.max_rounds}`,
123
+ );
124
+ }
125
+
96
126
  if (!messenger) {
97
127
  errors.push(
98
128
  "debate-messenger/state.json missing — call harness_debate_open",
@@ -103,9 +133,10 @@ export async function validatePlanDebateGate(
103
133
 
104
134
  const jsonlPath = join(debatesDir, `${debateId}.jsonl`);
105
135
  const { rounds, hasConsensus } = await countJsonlKinds(jsonlPath);
106
- if (rounds < PLAN_ROUNDS) {
136
+ const minRounds = caps.min_focus_rounds;
137
+ if (rounds < minRounds) {
107
138
  errors.push(
108
- `${debateId}.jsonl has ${rounds}/${PLAN_ROUNDS} round events — use harness_debate_submit_round each round`,
139
+ `${debateId}.jsonl has ${rounds}/${minRounds} minimum round events — use harness_debate_submit_round per focus`,
109
140
  );
110
141
  }
111
142
  if (!hasConsensus) {
@@ -114,6 +145,17 @@ export async function validatePlanDebateGate(
114
145
  );
115
146
  }
116
147
 
148
+ if (
149
+ !planDebateOutcomeComplete(coverage, {
150
+ requiredFocuses,
151
+ minRoundIndex: caps.min_focus_rounds,
152
+ })
153
+ ) {
154
+ errors.push(
155
+ `debate outcome incomplete: required focuses [${requiredFocuses.join(", ")}] with last review_gate_ready true (profile=${debateProfile})`,
156
+ );
157
+ }
158
+
117
159
  const consensusPath = join(debatesDir, `${debateId}.consensus.json`);
118
160
  if (!(await fileExists(consensusPath))) {
119
161
  errors.push(`missing ${debateId}.consensus.json`);
@@ -129,15 +171,10 @@ export async function validatePlanDebateGate(
129
171
  }
130
172
  }
131
173
 
132
- for (let r = 0; r < FOCUS_BY_ROUND.length; r++) {
133
- const focus = FOCUS_BY_ROUND[r];
134
- const reviewPath = join(runDir, `artifacts/review-round-r${r + 1}.yaml`);
135
- if (await fileExists(reviewPath)) {
136
- const raw = await readFile(reviewPath, "utf-8");
137
- if (!raw.includes(focus)) {
138
- warnings.push(`review-round-r${r + 1} may not match focus ${focus}`);
139
- }
140
- }
174
+ if (rounds > caps.max_rounds) {
175
+ warnings.push(
176
+ `bus round count ${rounds} exceeds soft max_rounds ${caps.max_rounds}`,
177
+ );
141
178
  }
142
179
 
143
180
  return {
@@ -145,6 +182,12 @@ export async function validatePlanDebateGate(
145
182
  errors,
146
183
  warnings,
147
184
  debateId,
185
+ focus_coverage: {
186
+ covered: coverage.covered,
187
+ missing: coverage.missing,
188
+ last_review_gate_ready: coverage.last_review_gate_ready,
189
+ },
190
+ debate_profile: debateProfile,
148
191
  };
149
192
  }
150
193
 
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Shared Review Gate lane list for a round (gate + round-status).
3
+ */
4
+
5
+ import type { PlanDebateFocus } from "./plan-debate-focus.js";
6
+ import type { DebateLaneKind } from "./plan-debate-lane.js";
7
+
8
+ /** Lanes required before review-integrator for this round. */
9
+ export function lanesForRound(
10
+ roundIndex: number,
11
+ debateRoundFocus?: PlanDebateFocus | null,
12
+ ): DebateLaneKind[] {
13
+ const lanes: DebateLaneKind[] = ["validation-turn", "adversary-brief"];
14
+ if (roundIndex === 1) {
15
+ lanes.unshift("hypothesis-validation");
16
+ }
17
+ if (roundIndex >= 4 || debateRoundFocus === "quality") {
18
+ lanes.push("sprint-audit");
19
+ }
20
+ return lanes;
21
+ }
22
+
23
+ /** Relative artifact paths for lane YAML + review-round. */
24
+ export function laneArtifactPathsForRound(
25
+ roundIndex: number,
26
+ debateRoundFocus?: PlanDebateFocus | null,
27
+ ): string[] {
28
+ const paths = lanesForRound(roundIndex, debateRoundFocus).map((lane) => {
29
+ switch (lane) {
30
+ case "hypothesis-validation":
31
+ return `artifacts/hypothesis-validation-r${roundIndex}.yaml`;
32
+ case "validation-turn":
33
+ return `artifacts/validation-turn-r${roundIndex}.yaml`;
34
+ case "adversary-brief":
35
+ return `artifacts/adversary-brief-r${roundIndex}.yaml`;
36
+ case "sprint-audit":
37
+ return `artifacts/sprint-audit-r${roundIndex}.yaml`;
38
+ default:
39
+ return `artifacts/${lane}-r${roundIndex}.yaml`;
40
+ }
41
+ });
42
+ paths.push(`artifacts/review-round-r${roundIndex}.yaml`);
43
+ return paths;
44
+ }
@@ -5,10 +5,18 @@
5
5
  import { constants } from "node:fs";
6
6
  import { access } from "node:fs/promises";
7
7
  import { join } from "node:path";
8
- import { type DebateLaneKind, laneArtifactPath } from "./plan-debate-lane.js";
8
+ import { capsForDebate } from "./debate-bus-core.js";
9
+ import {
10
+ type PlanDebateFocus,
11
+ readDebateRoundFocus,
12
+ } from "./plan-debate-focus.js";
13
+ import { planDebateIdForRun } from "./plan-debate-id.js";
14
+ import { laneArtifactPath } from "./plan-debate-lane.js";
15
+ import { lanesForRound } from "./plan-debate-lanes.js";
9
16
  import {
10
17
  getMessengerRoundState,
11
- messengerRoundDebateReady,
18
+ loadMessengerState,
19
+ messengerRoundDialogueReady,
12
20
  } from "./plan-messenger.js";
13
21
 
14
22
  async function exists(path: string): Promise<boolean> {
@@ -20,39 +28,50 @@ async function exists(path: string): Promise<boolean> {
20
28
  }
21
29
  }
22
30
 
23
- function lanesForRound(roundIndex: number): DebateLaneKind[] {
24
- const lanes: DebateLaneKind[] = ["validation-turn", "adversary-brief"];
25
- if (roundIndex === 1) lanes.unshift("hypothesis-validation");
26
- if (roundIndex === 4) lanes.push("sprint-audit");
27
- return lanes;
28
- }
29
-
30
31
  export interface RoundStatusResult {
31
32
  round_index: number;
32
- /** Lane YAML + messenger thread complete; spawn integrator next. */
33
+ /** Lane YAML + messenger dialogue complete; spawn integrator next. */
33
34
  ready_for_integrator: boolean;
34
35
  /** review-round-rN.yaml on disk (call harness_debate_submit_round if bus not updated). */
35
36
  review_round_on_disk: boolean;
36
37
  missing: string[];
37
38
  next_tool?: string;
38
39
  messenger: { ok: boolean; errors: string[] };
40
+ dialogue: { ok: boolean; errors: string[] };
41
+ unresolved_claim_ids: string[];
42
+ exchange_count: number;
43
+ debate_round_focus?: PlanDebateFocus | null;
39
44
  }
40
45
 
41
46
  export async function getPlanDebateRoundStatus(
42
47
  runDir: string,
43
48
  roundIndex: number,
49
+ runId?: string,
50
+ opts?: { debate_round_focus?: PlanDebateFocus },
44
51
  ): Promise<RoundStatusResult> {
52
+ const focus =
53
+ opts?.debate_round_focus ??
54
+ (await readDebateRoundFocus(runDir, roundIndex));
45
55
  const missing: string[] = [];
46
- for (const lane of lanesForRound(roundIndex)) {
56
+ for (const lane of lanesForRound(roundIndex, focus)) {
47
57
  const rel = laneArtifactPath(lane, roundIndex);
48
58
  if (!(await exists(join(runDir, rel)))) {
49
59
  missing.push(rel);
50
60
  }
51
61
  }
62
+ const messengerState = await loadMessengerState(runDir);
63
+ const profile = messengerState?.debate_profile;
64
+ const caps = capsForDebate(
65
+ runId ? planDebateIdForRun(runId) : `plan-${runId ?? "unknown"}`,
66
+ profile,
67
+ );
52
68
  const roundState = await getMessengerRoundState(runDir, roundIndex);
53
- const messenger = messengerRoundDebateReady(roundState, roundIndex === 4);
54
- if (!messenger.ok) {
55
- missing.push(...messenger.errors.map((e) => `messenger: ${e}`));
69
+ const dialogueOpts = {
70
+ max_exchanges_per_round: caps.max_exchanges_per_round,
71
+ };
72
+ const dialogue = messengerRoundDialogueReady(roundState, dialogueOpts);
73
+ if (!dialogue.ok) {
74
+ missing.push(...dialogue.errors.map((e) => `messenger: ${e}`));
56
75
  }
57
76
  const reviewRound = `artifacts/review-round-r${roundIndex}.yaml`;
58
77
  const reviewRoundOnDisk = await exists(join(runDir, reviewRound));
@@ -62,14 +81,35 @@ export async function getPlanDebateRoundStatus(
62
81
  next_tool = "subagent harness/planning/hypothesis-validator";
63
82
  } else if (missing.some((m) => m.includes("validation-turn"))) {
64
83
  next_tool = "subagent harness/planning/plan-evaluator";
84
+ } else if (
85
+ missing.some((m) => m.includes("adversary-brief")) &&
86
+ !roundState?.evaluator_posted
87
+ ) {
88
+ next_tool = "subagent harness/planning/plan-evaluator";
65
89
  } else if (missing.some((m) => m.includes("adversary-brief"))) {
66
90
  next_tool =
67
91
  "harness_messenger_read_round then subagent harness/planning/plan-adversary";
68
92
  } else if (missing.some((m) => m.includes("sprint-audit"))) {
69
93
  next_tool = "subagent harness/planning/sprint-contract-auditor";
70
- } else if (!messenger.ok) {
94
+ } else if (
95
+ roundState &&
96
+ roundState.evaluator_posted &&
97
+ !roundState.adversary_posted
98
+ ) {
99
+ next_tool =
100
+ "harness_messenger_read_round then subagent harness/planning/plan-adversary";
101
+ } else if (
102
+ roundState &&
103
+ roundState.unresolved_claim_ids.length > 0 &&
104
+ roundState.exchange_count < caps.max_exchanges_per_round
105
+ ) {
106
+ const spawnEvaluator = roundState.exchange_count % 2 === 1;
107
+ next_tool = spawnEvaluator
108
+ ? "harness_debate_advance_thread → harness_messenger_read_round → subagent harness/planning/plan-evaluator (clarification; address unresolved claim_ids)"
109
+ : "harness_debate_advance_thread → harness_messenger_read_round → subagent harness/planning/plan-adversary (counter or concede)";
110
+ } else if (!dialogue.ok) {
71
111
  next_tool =
72
- "harness_debate_apply_lane (evaluator/adversary) or re-spawn lane agent";
112
+ "harness_debate_advance_thread or harness_debate_apply_lane (evaluator/adversary)";
73
113
  } else if (!reviewRoundOnDisk) {
74
114
  next_tool =
75
115
  "subagent harness/planning/review-integrator then harness_debate_submit_round";
@@ -78,10 +118,9 @@ export async function getPlanDebateRoundStatus(
78
118
  "harness_debate_submit_round with integrator draft from review-round file";
79
119
  }
80
120
 
121
+ const laneMissing = missing.filter((m) => !m.startsWith("messenger"));
81
122
  const readyForIntegrator =
82
- messenger.ok &&
83
- missing.filter((m) => !m.startsWith("messenger")).length === 0 &&
84
- !reviewRoundOnDisk;
123
+ dialogue.ok && laneMissing.length === 0 && !reviewRoundOnDisk;
85
124
 
86
125
  return {
87
126
  round_index: roundIndex,
@@ -89,6 +128,10 @@ export async function getPlanDebateRoundStatus(
89
128
  review_round_on_disk: reviewRoundOnDisk,
90
129
  missing,
91
130
  next_tool,
92
- messenger,
131
+ messenger: dialogue,
132
+ dialogue,
133
+ unresolved_claim_ids: roundState?.unresolved_claim_ids ?? [],
134
+ exchange_count: roundState?.exchange_count ?? 0,
135
+ debate_round_focus: focus,
93
136
  };
94
137
  }
@@ -17,11 +17,15 @@ import {
17
17
  } from "node:fs/promises";
18
18
  import { join } from "node:path";
19
19
  import type { DebateParticipant } from "../../lib/debate-orchestrator-types.js";
20
+ import type { DebateProfile } from "./plan-debate-eligibility.js";
21
+ import type { PlanDebateFocus } from "./plan-debate-focus.js";
20
22
 
21
23
  export type MessengerMessageKind =
22
24
  | "system"
23
25
  | "claim"
24
26
  | "rebuttal"
27
+ | "clarification"
28
+ | "counter"
25
29
  | "integrate"
26
30
  | "audit";
27
31
 
@@ -47,6 +51,8 @@ export interface MessengerRoundState {
47
51
  integrator_posted: boolean;
48
52
  claim_count: number;
49
53
  rebuttal_count: number;
54
+ exchange_count: number;
55
+ unresolved_claim_ids: string[];
50
56
  }
51
57
 
52
58
  export interface MessengerState {
@@ -55,6 +61,8 @@ export interface MessengerState {
55
61
  debate_id: string;
56
62
  opened_at: string;
57
63
  rounds: Record<string, MessengerRoundState>;
64
+ debate_profile?: DebateProfile;
65
+ required_focuses?: PlanDebateFocus[];
58
66
  }
59
67
 
60
68
  function messengerRoot(runDir: string): string {
@@ -71,7 +79,12 @@ function roundKey(roundIndex: number): string {
71
79
 
72
80
  export async function initPlanMessenger(
73
81
  runDir: string,
74
- opts: { runId: string; debateId: string },
82
+ opts: {
83
+ runId: string;
84
+ debateId: string;
85
+ debate_profile?: DebateProfile;
86
+ required_focuses?: PlanDebateFocus[];
87
+ },
75
88
  ): Promise<string> {
76
89
  const root = messengerRoot(runDir);
77
90
  await mkdir(join(root, "inbox"), { recursive: true });
@@ -82,6 +95,8 @@ export async function initPlanMessenger(
82
95
  debate_id: opts.debateId,
83
96
  opened_at: nowIso(),
84
97
  rounds: {},
98
+ debate_profile: opts.debate_profile,
99
+ required_focuses: opts.required_focuses,
85
100
  };
86
101
  await writeFile(
87
102
  join(root, "state.json"),
@@ -122,9 +137,51 @@ function defaultRoundState(roundIndex: number): MessengerRoundState {
122
137
  integrator_posted: false,
123
138
  claim_count: 0,
124
139
  rebuttal_count: 0,
140
+ exchange_count: 0,
141
+ unresolved_claim_ids: [],
125
142
  };
126
143
  }
127
144
 
145
+ /** Recompute exchange + unresolved claim ids from a round transcript. */
146
+ export function syncRoundStateFromTranscript(
147
+ round: MessengerRoundState,
148
+ messages: MessengerMessage[],
149
+ ): MessengerRoundState {
150
+ const claimed = new Set<string>();
151
+ const resolved = new Set<string>();
152
+ let exchange_count = 0;
153
+
154
+ for (const m of messages) {
155
+ if (m.from === "PlanEvaluatorAgent" && m.kind === "claim") {
156
+ round.evaluator_posted = true;
157
+ round.claim_count += m.claim_ids.length || 1;
158
+ for (const id of m.claim_ids) claimed.add(id);
159
+ }
160
+ if (m.from === "PlanAdversaryAgent" && m.kind === "rebuttal") {
161
+ round.adversary_posted = true;
162
+ round.rebuttal_count += m.in_reply_to.length || 1;
163
+ exchange_count += 1;
164
+ }
165
+ if (m.from === "PlanEvaluatorAgent" && m.kind === "clarification") {
166
+ exchange_count += 1;
167
+ for (const id of m.claim_ids) resolved.add(id);
168
+ for (const id of m.in_reply_to) resolved.add(id);
169
+ }
170
+ if (m.from === "PlanAdversaryAgent" && m.kind === "counter") {
171
+ exchange_count += 1;
172
+ for (const id of m.claim_ids) resolved.add(id);
173
+ for (const id of m.in_reply_to) resolved.add(id);
174
+ }
175
+ if (m.from === "ReviewIntegratorAgent" && m.kind === "integrate") {
176
+ round.integrator_posted = true;
177
+ }
178
+ }
179
+
180
+ round.exchange_count = exchange_count;
181
+ round.unresolved_claim_ids = [...claimed].filter((id) => !resolved.has(id));
182
+ return round;
183
+ }
184
+
128
185
  export async function postMessengerMessage(
129
186
  runDir: string,
130
187
  msg: Omit<MessengerMessage, "schema_version" | "id" | "ts"> & {
@@ -172,19 +229,10 @@ export async function postMessengerMessage(
172
229
  rounds: {},
173
230
  };
174
231
  const key = roundKey(full.round_index);
232
+ const messages = await readRoundTranscript(runDir, full.round_index);
233
+ messages.push(full);
175
234
  const round = state.rounds[key] ?? defaultRoundState(full.round_index);
176
- if (full.from === "PlanEvaluatorAgent" && full.kind === "claim") {
177
- round.evaluator_posted = true;
178
- round.claim_count += full.claim_ids.length || 1;
179
- }
180
- if (full.from === "PlanAdversaryAgent" && full.kind === "rebuttal") {
181
- round.adversary_posted = true;
182
- round.rebuttal_count += full.in_reply_to.length || 1;
183
- }
184
- if (full.from === "ReviewIntegratorAgent" && full.kind === "integrate") {
185
- round.integrator_posted = true;
186
- }
187
- state.rounds[key] = round;
235
+ state.rounds[key] = syncRoundStateFromTranscript(round, messages);
188
236
  await saveMessengerState(runDir, state);
189
237
  return full;
190
238
  }
@@ -233,13 +281,22 @@ export async function getMessengerRoundState(
233
281
  ): Promise<MessengerRoundState | null> {
234
282
  const state = await loadMessengerState(runDir);
235
283
  if (!state) return null;
236
- return state.rounds[roundKey(roundIndex)] ?? null;
284
+ const round = state.rounds[roundKey(roundIndex)];
285
+ if (!round) return null;
286
+ const transcript = await readRoundTranscript(runDir, roundIndex);
287
+ return syncRoundStateFromTranscript({ ...round }, transcript);
237
288
  }
238
289
 
239
- export function messengerRoundDebateReady(
290
+ export interface MessengerDialogueOptions {
291
+ max_exchanges_per_round?: number;
292
+ }
293
+
294
+ /** Evaluator + adversary dialogue settled; safe to spawn integrator. */
295
+ export function messengerRoundDialogueReady(
240
296
  round: MessengerRoundState | null,
241
- _requireSprintAudit: boolean,
297
+ opts: MessengerDialogueOptions = {},
242
298
  ): { ok: boolean; errors: string[] } {
299
+ const maxExchanges = opts.max_exchanges_per_round ?? 3;
243
300
  const errors: string[] = [];
244
301
  if (!round) {
245
302
  errors.push("no messenger activity for this round");
@@ -257,7 +314,26 @@ export function messengerRoundDebateReady(
257
314
  if (round.rebuttal_count < 1) {
258
315
  errors.push("adversary must rebut at least one claim (in_reply_to)");
259
316
  }
260
- if (!round.integrator_posted) {
317
+ const dialogueSettled =
318
+ round.unresolved_claim_ids.length === 0 ||
319
+ round.exchange_count >= maxExchanges;
320
+ if (!dialogueSettled) {
321
+ errors.push(
322
+ `unresolved claims remain (${round.unresolved_claim_ids.join(", ")}) and exchange_count ${round.exchange_count} < ${maxExchanges}`,
323
+ );
324
+ }
325
+ return { ok: errors.length === 0, errors };
326
+ }
327
+
328
+ /** Full round ready for harness_debate_submit_round (includes integrator). */
329
+ export function messengerRoundDebateReady(
330
+ round: MessengerRoundState | null,
331
+ _requireSprintAudit: boolean,
332
+ opts: MessengerDialogueOptions = {},
333
+ ): { ok: boolean; errors: string[] } {
334
+ const dialogue = messengerRoundDialogueReady(round, opts);
335
+ const errors = [...dialogue.errors];
336
+ if (!round?.integrator_posted) {
261
337
  errors.push(
262
338
  "ReviewIntegratorAgent must post integrate message before bus submit",
263
339
  );
@@ -243,7 +243,7 @@ export default function policyGate(pi: ExtensionAPI) {
243
243
 
244
244
  const planPhaseHint =
245
245
  state.phase === "plan"
246
- ? "\nPlan phase: scouts → decompose → hypothesis → stack-researcher → execution-plan-author → validate-plan-dag → 4-round plan debate → approve_plan → create_plan (YAML plan-packet.yaml). Post-execute: /harness-critic."
246
+ ? "\nPlan phase: scouts → decompose → hypothesis → implementation-researcher + stack-researcher → execution-plan-author → validate-plan-dag → debate eligibility + Review Gate → approve_plan → create_plan (YAML plan-packet.yaml). Post-execute: /harness-critic."
247
247
  : "";
248
248
 
249
249
  return {
@@ -30,7 +30,7 @@ under `.pi/extensions/` and auto-loaded through the package `pi.extensions`
30
30
  manifest (`package.json`).
31
31
 
32
32
  - `harness-run-context.ts` - active run + plan injection; short commands without run/plan args
33
- - `harness-live-widget.ts` - footer status (phase, plan ready, next command; no run id in UI)
33
+ - `harness-live-widget.ts` - footer status (current/next phase + plain-language status hint; no run id in UI)
34
34
  - `policy-gate.ts` - phase state machine + plan-before-mutate enforcement
35
35
  - `budget-guard.ts` - hard-stop token budget checks + budget exhausted artifacts
36
36
  - `trace-recorder.ts` - append-only run traces + HarnessRunRecord + compact index
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "schema_version": "1.0.0",
3
3
  "package": "ultimate-pi",
4
- "package_version": "0.13.1",
5
- "generated_at": "2026-05-18T17:22:10.311Z",
4
+ "package_version": "0.14.0",
5
+ "generated_at": "2026-05-19T10:53:28.359Z",
6
6
  "agents": {
7
7
  "pi-pi/agent-expert": {
8
8
  "path": ".pi/agents/pi-pi/agent-expert.md",
@@ -78,35 +78,39 @@
78
78
  },
79
79
  "harness/planning/decompose": {
80
80
  "path": ".pi/agents/harness/planning/decompose.md",
81
- "sha256": "1b3f85d956d2e203ec87045a731c47f8b40f75b63fce8916fda91cefc39244a8"
81
+ "sha256": "5c3b983772d013741d50f39945bc77f178aa338aecab56b93c09216d72192c69"
82
82
  },
83
83
  "harness/planning/execution-plan-author": {
84
84
  "path": ".pi/agents/harness/planning/execution-plan-author.md",
85
- "sha256": "a69fb2e8bda9336e71ce9536071f9c8a2f4abd9d9d88930c6a8be29bdc9c5f62"
85
+ "sha256": "16f8800c50bcaf1b82ed9138889c8a0e538ee6a139aeae129ccd20cec2ec25f7"
86
86
  },
87
87
  "harness/planning/hypothesis-validator": {
88
88
  "path": ".pi/agents/harness/planning/hypothesis-validator.md",
89
- "sha256": "f75312439c441ccee72692d41f44b6e733df08e06c89e930740fc256bed3ba02"
89
+ "sha256": "9e68ec5d6aef96a3666c30227c3cbddf1aaed1182fdc94dbbd21ad3d48315ff2"
90
90
  },
91
91
  "harness/planning/hypothesis": {
92
92
  "path": ".pi/agents/harness/planning/hypothesis.md",
93
93
  "sha256": "b20c527d15c2243cd5d3a8f16cea6d44bdfd16e01915d42f3b830bf9938e5f8b"
94
94
  },
95
+ "harness/planning/implementation-researcher": {
96
+ "path": ".pi/agents/harness/planning/implementation-researcher.md",
97
+ "sha256": "dbd1c4fc74d538b110d406febfd4603eebea77d82e8b367df4596ac7ff6e54cc"
98
+ },
95
99
  "harness/planning/plan-adversary": {
96
100
  "path": ".pi/agents/harness/planning/plan-adversary.md",
97
- "sha256": "84c7fa63d38c39e32000c90093688a45bc2b96a2c6209037342222eae0c854f9"
101
+ "sha256": "7c14eaab65f356003ee2ff380f5d4e620170b5126daa67c3d226b12342f47bd2"
98
102
  },
99
103
  "harness/planning/plan-evaluator": {
100
104
  "path": ".pi/agents/harness/planning/plan-evaluator.md",
101
- "sha256": "580d8c7a31f7a6ecd9e627460459d600650580b5df63d129278beefd3f3e347c"
105
+ "sha256": "846575abe9df3e7e5be812c0c474989c1a9de8074a7884d77b9d3dd423643480"
102
106
  },
103
107
  "harness/planning/review-integrator": {
104
108
  "path": ".pi/agents/harness/planning/review-integrator.md",
105
- "sha256": "cd1e5d10f0cb8b7a4197d2e92489023c285e90e250f1badc371470165aeb8cfd"
109
+ "sha256": "bed43f3f049c279ac50a24bcffac1bbe46a8605d89c9cc6d0c3c6a87d488b1b8"
106
110
  },
107
111
  "harness/planning/scout-graphify": {
108
112
  "path": ".pi/agents/harness/planning/scout-graphify.md",
109
- "sha256": "8a5ff68306a5eedf1a62067ac8812eac4ac1fe2016cba63337ef4e90b5136e00"
113
+ "sha256": "7f385d5bda2fe04b9da52cb4cb9247324efd345579b483d3ad55a6abefad50d5"
110
114
  },
111
115
  "harness/planning/scout-semantic": {
112
116
  "path": ".pi/agents/harness/planning/scout-semantic.md",
@@ -118,11 +122,11 @@
118
122
  },
119
123
  "harness/planning/sprint-contract-auditor": {
120
124
  "path": ".pi/agents/harness/planning/sprint-contract-auditor.md",
121
- "sha256": "f613a4fa937d76936fa01155d4e7956a81878f300100f99f6a78915b0af6f7c7"
125
+ "sha256": "d915274dc9b5addae5499bc2390b348eddeb8f133b526a816e23d0d19a2618bf"
122
126
  },
123
127
  "harness/planning/stack-researcher": {
124
128
  "path": ".pi/agents/harness/planning/stack-researcher.md",
125
- "sha256": "90e2ff1348f54bebc8c0392407bf1bb4d794c942fd8d6f342d80b191c945b34e"
129
+ "sha256": "fa228920abe2b66d4d8921c4a5d85593e3019a24bbe9ae512ed9149f235e3536"
126
130
  }
127
131
  }
128
132
  }
@@ -13,9 +13,7 @@
13
13
  - `harness/planning/decompose` — DeepMind-style problem decomposition (`PlanDecompositionBrief`)
14
14
  - `harness/planning/hypothesis` — DARWIN hypothesis generation (`PlanHypothesisBrief`)
15
15
  2. **Parent maps hypothesis → PlanPacket** — `plan-packet.schema.json` unchanged; execution gating stable.
16
- 3. **Parallel pre-approval reviews:**
17
- - `harness/planning/plan-adversary` — execution risk on PlanPacket
18
- - `harness/planning/hypothesis-eval` — blind self-eval (task + hypothesis only)
16
+ 3. **Review Gate (ADR 0035):** outcome-based debate with `hypothesis-validator` on R1 (blind — task + hypothesis only). Retired `hypothesis-eval` as a separate pre-approval agent.
19
17
  4. **`approve_plan` optional `research_brief`** — rendered in `plan-review.md`; not written to `plan-packet.json`.
20
18
  5. **`--quick`** still skips semantic scout only; never skips decompose/hypothesis.
21
19