ultimate-pi 0.13.1 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-debate-plan/SKILL.md +42 -22
- package/.agents/skills/harness-orchestration/SKILL.md +3 -3
- package/.agents/skills/harness-plan/SKILL.md +10 -8
- package/.pi/agents/harness/planning/decompose.md +4 -2
- package/.pi/agents/harness/planning/execution-plan-author.md +25 -14
- package/.pi/agents/harness/planning/hypothesis-validator.md +21 -5
- package/.pi/agents/harness/planning/implementation-researcher.md +42 -0
- package/.pi/agents/harness/planning/plan-adversary.md +20 -4
- package/.pi/agents/harness/planning/plan-evaluator.md +28 -5
- package/.pi/agents/harness/planning/review-integrator.md +25 -9
- package/.pi/agents/harness/planning/scout-graphify.md +1 -1
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +19 -4
- package/.pi/agents/harness/planning/stack-researcher.md +19 -10
- package/.pi/extensions/debate-orchestrator.ts +39 -435
- package/.pi/extensions/harness-debate-tools.ts +741 -0
- package/.pi/extensions/harness-live-widget.ts +39 -159
- package/.pi/extensions/harness-plan-approval.ts +88 -22
- package/.pi/extensions/harness-run-context.ts +18 -0
- package/.pi/extensions/lib/debate-bus-core.ts +488 -0
- package/.pi/extensions/lib/debate-bus-state.ts +64 -0
- package/.pi/extensions/lib/harness-spawn-budget.ts +5 -25
- package/.pi/extensions/lib/plan-approval/dialog.ts +33 -272
- package/.pi/extensions/lib/plan-approval/format-plan.ts +12 -85
- package/.pi/extensions/lib/plan-approval/plan-review.ts +62 -6
- package/.pi/extensions/lib/plan-approval/render.ts +6 -0
- package/.pi/extensions/lib/plan-approval/types.ts +1 -0
- package/.pi/extensions/lib/plan-approval/validate.ts +1 -1
- package/.pi/extensions/lib/plan-debate-eligibility.ts +214 -0
- package/.pi/extensions/lib/plan-debate-envelope.ts +2 -0
- package/.pi/extensions/lib/plan-debate-focus.ts +151 -0
- package/.pi/extensions/lib/plan-debate-gate.ts +198 -0
- package/.pi/extensions/lib/plan-debate-id.ts +39 -0
- package/.pi/extensions/lib/plan-debate-lane.ts +220 -0
- package/.pi/extensions/lib/plan-debate-lanes.ts +44 -0
- package/.pi/extensions/lib/plan-debate-round-status.ts +137 -0
- package/.pi/extensions/lib/plan-debate-write-guard.ts +20 -0
- package/.pi/extensions/lib/plan-messenger.ts +352 -0
- package/.pi/extensions/lib/plan-review-integrator-rules.ts +119 -0
- package/.pi/extensions/lib/plan-scope-guard.ts +89 -0
- package/.pi/extensions/policy-gate.ts +1 -1
- package/.pi/harness/README.md +1 -1
- package/.pi/harness/agents.manifest.json +16 -12
- package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +1 -3
- package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +13 -5
- package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +51 -0
- package/.pi/harness/docs/adrs/README.md +2 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r1.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r2.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/research-brief.yaml +62 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r2.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r3.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +29 -0
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +97 -16
- package/.pi/harness/specs/plan-implementation-research-brief.schema.json +128 -0
- package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
- package/.pi/harness/specs/round-result.schema.json +15 -2
- package/.pi/lib/harness-ui-state.ts +92 -0
- package/.pi/prompts/harness-plan.md +90 -30
- package/.pi/prompts/planning-rubrics.md +31 -0
- package/CHANGELOG.md +23 -0
- package/package.json +3 -3
- package/.pi/extensions/lib/plan-approval/fallback.ts +0 -50
|
@@ -0,0 +1,741 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* P0–P3 plan debate tools — bus + pi-messenger transport.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { mkdir, readFile } from "node:fs/promises";
|
|
6
|
+
import { dirname, join } from "node:path";
|
|
7
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
8
|
+
import { Type } from "@sinclair/typebox";
|
|
9
|
+
import { parse as parseYaml } from "yaml";
|
|
10
|
+
import type { DebateParticipant } from "../lib/debate-orchestrator-types.js";
|
|
11
|
+
import {
|
|
12
|
+
getLatestRunContext,
|
|
13
|
+
getRunIdFromSession,
|
|
14
|
+
} from "../lib/harness-run-context.js";
|
|
15
|
+
import { writeYamlFile } from "../lib/harness-yaml.js";
|
|
16
|
+
import {
|
|
17
|
+
acceptDebateRound,
|
|
18
|
+
capsForDebate,
|
|
19
|
+
finalizeDebateConsensus,
|
|
20
|
+
openDebateBus,
|
|
21
|
+
} from "./lib/debate-bus-core.js";
|
|
22
|
+
import { getDebateState } from "./lib/debate-bus-state.js";
|
|
23
|
+
import { claimExtensionLoad } from "./lib/extension-load-guard.js";
|
|
24
|
+
import { captureHarnessEvent } from "./lib/harness-posthog.js";
|
|
25
|
+
import {
|
|
26
|
+
type DebateEligibilityInput,
|
|
27
|
+
harnessPlanDebateEligibility,
|
|
28
|
+
} from "./lib/plan-debate-eligibility.js";
|
|
29
|
+
import {
|
|
30
|
+
buildPlanReviewRoundEnvelope,
|
|
31
|
+
type PlanReviewRoundDraft,
|
|
32
|
+
} from "./lib/plan-debate-envelope.js";
|
|
33
|
+
import {
|
|
34
|
+
getPlanFocusCoverage,
|
|
35
|
+
planDebateOutcomeComplete,
|
|
36
|
+
} from "./lib/plan-debate-focus.js";
|
|
37
|
+
import {
|
|
38
|
+
normalizePlanDebateId,
|
|
39
|
+
planDebateIdForRun,
|
|
40
|
+
} from "./lib/plan-debate-id.js";
|
|
41
|
+
import {
|
|
42
|
+
applyDebateLane,
|
|
43
|
+
type DebateLaneKind,
|
|
44
|
+
debateLaneForAgent,
|
|
45
|
+
formatApplyLaneMessage,
|
|
46
|
+
} from "./lib/plan-debate-lane.js";
|
|
47
|
+
import { getPlanDebateRoundStatus } from "./lib/plan-debate-round-status.js";
|
|
48
|
+
import { withReviewRoundYamlWrite } from "./lib/plan-debate-write-guard.js";
|
|
49
|
+
import {
|
|
50
|
+
formatTranscriptForSpawn,
|
|
51
|
+
getMessengerRoundState,
|
|
52
|
+
initPlanMessenger,
|
|
53
|
+
loadMessengerState,
|
|
54
|
+
messengerRoundDebateReady,
|
|
55
|
+
postMessengerMessage,
|
|
56
|
+
readRoundTranscript,
|
|
57
|
+
} from "./lib/plan-messenger.js";
|
|
58
|
+
import {
|
|
59
|
+
loadValidationTurnYaml,
|
|
60
|
+
validateIntegratorDraft,
|
|
61
|
+
} from "./lib/plan-review-integrator-rules.js";
|
|
62
|
+
import { assessPlanScopeDrift } from "./lib/plan-scope-guard.js";
|
|
63
|
+
|
|
64
|
+
// @ts-expect-error pi extensions run as ESM
|
|
65
|
+
const MODULE_URL = import.meta.url;
|
|
66
|
+
|
|
67
|
+
function getRunId(ctx: {
|
|
68
|
+
sessionManager: { getEntries(): unknown[]; getSessionId(): string };
|
|
69
|
+
}): string {
|
|
70
|
+
return (
|
|
71
|
+
getRunIdFromSession(
|
|
72
|
+
ctx.sessionManager.getEntries(),
|
|
73
|
+
ctx.sessionManager.getSessionId(),
|
|
74
|
+
) ?? ctx.sessionManager.getSessionId()
|
|
75
|
+
);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function runDir(projectRoot: string, runId: string): string {
|
|
79
|
+
return join(projectRoot, ".pi", "harness", "runs", runId);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function debateHooks(pi: ExtensionAPI) {
|
|
83
|
+
return {
|
|
84
|
+
appendEntry: (customType: string, data: unknown) =>
|
|
85
|
+
pi.appendEntry(customType, data),
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function telemetryRound(
|
|
90
|
+
sessionId: string,
|
|
91
|
+
props: Record<string, unknown>,
|
|
92
|
+
): void {
|
|
93
|
+
captureHarnessEvent(sessionId, "harness_debate_round", props);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function subagentResults(
|
|
97
|
+
details: unknown,
|
|
98
|
+
): Array<{ agent: string; finalOutput?: string }> {
|
|
99
|
+
const d = details as {
|
|
100
|
+
results?: Array<{ agent: string; finalOutput?: string }>;
|
|
101
|
+
};
|
|
102
|
+
return d?.results ?? [];
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
export default function harnessDebateTools(pi: ExtensionAPI) {
|
|
106
|
+
if (!claimExtensionLoad("harness-debate-tools", MODULE_URL)) return;
|
|
107
|
+
|
|
108
|
+
pi.on("tool_result", async (event, ctx) => {
|
|
109
|
+
if (event.isError || event.toolName !== "subagent") return;
|
|
110
|
+
const runId = getRunId(ctx);
|
|
111
|
+
const projectRoot = process.cwd();
|
|
112
|
+
const rd = runDir(projectRoot, runId);
|
|
113
|
+
const entries = ctx.sessionManager.getEntries();
|
|
114
|
+
const runCtx = getLatestRunContext(entries);
|
|
115
|
+
if (!runCtx?.run_id || runCtx.run_id !== runId) return;
|
|
116
|
+
|
|
117
|
+
const applied: string[] = [];
|
|
118
|
+
let lastRound = 1;
|
|
119
|
+
for (const result of subagentResults(event.details)) {
|
|
120
|
+
const lane = debateLaneForAgent(result.agent ?? "");
|
|
121
|
+
if (!lane || !result.finalOutput?.trim()) continue;
|
|
122
|
+
const out = await applyDebateLane({
|
|
123
|
+
runDir: rd,
|
|
124
|
+
lane,
|
|
125
|
+
content: result.finalOutput,
|
|
126
|
+
});
|
|
127
|
+
if (out.round_index) lastRound = out.round_index;
|
|
128
|
+
pi.appendEntry("harness-debate-lane-applied", {
|
|
129
|
+
agent: result.agent,
|
|
130
|
+
...out,
|
|
131
|
+
});
|
|
132
|
+
applied.push(formatApplyLaneMessage(out));
|
|
133
|
+
}
|
|
134
|
+
if (applied.length === 0) return;
|
|
135
|
+
|
|
136
|
+
const status = await getPlanDebateRoundStatus(rd, lastRound, runId);
|
|
137
|
+
pi.sendMessage({
|
|
138
|
+
customType: "harness-debate-next-step",
|
|
139
|
+
content: [
|
|
140
|
+
"**Debate lane auto-applied from subagent output**",
|
|
141
|
+
...applied,
|
|
142
|
+
"",
|
|
143
|
+
status.next_tool
|
|
144
|
+
? `**Required next tool (do not stop with prose only):** ${status.next_tool}`
|
|
145
|
+
: "Check harness_debate_round_status for this round.",
|
|
146
|
+
].join("\n"),
|
|
147
|
+
display: true,
|
|
148
|
+
details: { applied, status },
|
|
149
|
+
});
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
pi.registerTool({
|
|
153
|
+
name: "harness_plan_debate_eligibility",
|
|
154
|
+
label: "Plan Debate Eligibility",
|
|
155
|
+
description:
|
|
156
|
+
"Pre-debate profile selection (full|standard|light). Call after DAG pass, before harness_debate_open. Uses risk, fork, implementation/stack briefs — not R1 hypothesis output.",
|
|
157
|
+
parameters: Type.Object({
|
|
158
|
+
risk_level: Type.Optional(
|
|
159
|
+
Type.String({ description: "low | med | high" }),
|
|
160
|
+
),
|
|
161
|
+
material_fork: Type.Optional(Type.Boolean()),
|
|
162
|
+
dag_pass: Type.Optional(Type.Boolean()),
|
|
163
|
+
dag_manually_patched: Type.Optional(Type.Boolean()),
|
|
164
|
+
implementation_brief_path: Type.Optional(
|
|
165
|
+
Type.String({
|
|
166
|
+
description:
|
|
167
|
+
"Default: artifacts/implementation-research.yaml under run dir",
|
|
168
|
+
}),
|
|
169
|
+
),
|
|
170
|
+
stack_brief_path: Type.Optional(Type.String()),
|
|
171
|
+
decomposition_path: Type.Optional(Type.String()),
|
|
172
|
+
}),
|
|
173
|
+
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
174
|
+
const runId = getRunId(ctx);
|
|
175
|
+
const rd = runDir(process.cwd(), runId);
|
|
176
|
+
const p = params as {
|
|
177
|
+
risk_level?: string;
|
|
178
|
+
material_fork?: boolean;
|
|
179
|
+
dag_pass?: boolean;
|
|
180
|
+
dag_manually_patched?: boolean;
|
|
181
|
+
implementation_brief_path?: string;
|
|
182
|
+
stack_brief_path?: string;
|
|
183
|
+
decomposition_path?: string;
|
|
184
|
+
};
|
|
185
|
+
async function loadYaml(
|
|
186
|
+
rel: string,
|
|
187
|
+
): Promise<Record<string, unknown> | null> {
|
|
188
|
+
try {
|
|
189
|
+
const raw = await readFile(join(rd, rel), "utf-8");
|
|
190
|
+
return parseYaml(raw) as Record<string, unknown>;
|
|
191
|
+
} catch {
|
|
192
|
+
return null;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
const input: DebateEligibilityInput = {
|
|
196
|
+
risk_level: p.risk_level,
|
|
197
|
+
material_fork: p.material_fork,
|
|
198
|
+
dag_pass: p.dag_pass,
|
|
199
|
+
dag_manually_patched: p.dag_manually_patched,
|
|
200
|
+
implementation_brief: await loadYaml(
|
|
201
|
+
p.implementation_brief_path ??
|
|
202
|
+
"artifacts/implementation-research.yaml",
|
|
203
|
+
),
|
|
204
|
+
stack_brief: await loadYaml(
|
|
205
|
+
p.stack_brief_path ?? "artifacts/stack.yaml",
|
|
206
|
+
),
|
|
207
|
+
decomposition: await loadYaml(
|
|
208
|
+
p.decomposition_path ?? "artifacts/decomposition.yaml",
|
|
209
|
+
),
|
|
210
|
+
};
|
|
211
|
+
const result = harnessPlanDebateEligibility(input);
|
|
212
|
+
const lines = [
|
|
213
|
+
`profile: ${result.profile}`,
|
|
214
|
+
`required_focuses: ${result.required_focuses.join(", ")}`,
|
|
215
|
+
`min_focus_rounds: ${result.min_focus_rounds}`,
|
|
216
|
+
`debate_global_cap: ${result.debate_global_cap}`,
|
|
217
|
+
`human_required: ${result.human_required}`,
|
|
218
|
+
...result.rationale.map((r) => `- ${r}`),
|
|
219
|
+
];
|
|
220
|
+
return {
|
|
221
|
+
content: [{ type: "text", text: lines.join("\n") }],
|
|
222
|
+
details: result,
|
|
223
|
+
};
|
|
224
|
+
},
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
pi.registerTool({
|
|
228
|
+
name: "harness_debate_open",
|
|
229
|
+
label: "Open Plan Debate",
|
|
230
|
+
description:
|
|
231
|
+
"Open plan-phase debate bus (plan-<run_id>) and initialize pi-messenger inboxes/threads. Call once after harness_plan_debate_eligibility.",
|
|
232
|
+
parameters: Type.Object({
|
|
233
|
+
debate_id: Type.Optional(
|
|
234
|
+
Type.String({ description: "Optional; normalized to plan-<run_id>" }),
|
|
235
|
+
),
|
|
236
|
+
debate_profile: Type.Optional(
|
|
237
|
+
Type.String({ description: "full | standard | light" }),
|
|
238
|
+
),
|
|
239
|
+
required_focuses: Type.Optional(
|
|
240
|
+
Type.Array(
|
|
241
|
+
Type.String({ description: "spec | wbs | schedule | quality" }),
|
|
242
|
+
),
|
|
243
|
+
),
|
|
244
|
+
}),
|
|
245
|
+
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
246
|
+
const runId = getRunId(ctx);
|
|
247
|
+
const projectRoot = process.cwd();
|
|
248
|
+
const p = params as {
|
|
249
|
+
debate_id?: string;
|
|
250
|
+
debate_profile?: string;
|
|
251
|
+
required_focuses?: string[];
|
|
252
|
+
};
|
|
253
|
+
const raw = String(p.debate_id ?? "");
|
|
254
|
+
const { debateId, corrected, warning } = normalizePlanDebateId(
|
|
255
|
+
raw,
|
|
256
|
+
runId,
|
|
257
|
+
);
|
|
258
|
+
const profile =
|
|
259
|
+
p.debate_profile === "full" ||
|
|
260
|
+
p.debate_profile === "standard" ||
|
|
261
|
+
p.debate_profile === "light"
|
|
262
|
+
? p.debate_profile
|
|
263
|
+
: "standard";
|
|
264
|
+
const required_focuses = (p.required_focuses ?? []).filter((f) =>
|
|
265
|
+
["spec", "wbs", "schedule", "quality"].includes(f),
|
|
266
|
+
) as Array<"spec" | "wbs" | "schedule" | "quality">;
|
|
267
|
+
const opened = await openDebateBus(runId, debateId, debateHooks(pi), {
|
|
268
|
+
debate_profile: profile,
|
|
269
|
+
required_focuses:
|
|
270
|
+
required_focuses.length > 0 ? required_focuses : undefined,
|
|
271
|
+
});
|
|
272
|
+
await initPlanMessenger(runDir(projectRoot, runId), {
|
|
273
|
+
runId,
|
|
274
|
+
debateId,
|
|
275
|
+
debate_profile: profile,
|
|
276
|
+
required_focuses: opened.required_focuses,
|
|
277
|
+
});
|
|
278
|
+
const sessionId = ctx.sessionManager.getSessionId();
|
|
279
|
+
captureHarnessEvent(sessionId, "harness_debate_round", {
|
|
280
|
+
run_id: runId,
|
|
281
|
+
debate_id: debateId,
|
|
282
|
+
event: "open",
|
|
283
|
+
debate_phase: "plan",
|
|
284
|
+
corrected_id: corrected,
|
|
285
|
+
});
|
|
286
|
+
const lines = [
|
|
287
|
+
`Plan debate opened: ${debateId}`,
|
|
288
|
+
`Profile: ${profile}`,
|
|
289
|
+
required_focuses.length
|
|
290
|
+
? `Required focuses: ${required_focuses.join(", ")}`
|
|
291
|
+
: opened.required_focuses?.length
|
|
292
|
+
? `Required focuses: ${opened.required_focuses.join(", ")}`
|
|
293
|
+
: "Required focuses: (default all four)",
|
|
294
|
+
`Messenger: debate-messenger/ (inbox + threads/round-N/transcript.jsonl)`,
|
|
295
|
+
];
|
|
296
|
+
if (warning) lines.push(`Note: ${warning}`);
|
|
297
|
+
return {
|
|
298
|
+
content: [{ type: "text", text: lines.join("\n") }],
|
|
299
|
+
details: { run_id: runId, debate_id: debateId, state: opened },
|
|
300
|
+
};
|
|
301
|
+
},
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
pi.registerTool({
|
|
305
|
+
name: "harness_messenger_post",
|
|
306
|
+
label: "Post Debate Messenger Message",
|
|
307
|
+
description:
|
|
308
|
+
"Post a claim/rebuttal/integrate message to the round thread and agent inbox (pi-messenger style). Evaluator posts claims first; adversary rebuts with in_reply_to claim ids.",
|
|
309
|
+
parameters: Type.Object({
|
|
310
|
+
round_index: Type.Number({ description: "1–12 (monotonic per run)" }),
|
|
311
|
+
from: Type.String({
|
|
312
|
+
description:
|
|
313
|
+
"PlanEvaluatorAgent | PlanAdversaryAgent | ReviewIntegratorAgent | HypothesisValidatorAgent | SprintContractAuditorAgent",
|
|
314
|
+
}),
|
|
315
|
+
kind: Type.String({
|
|
316
|
+
description:
|
|
317
|
+
"claim | rebuttal | clarification | counter | integrate | audit | system",
|
|
318
|
+
}),
|
|
319
|
+
body: Type.String(),
|
|
320
|
+
to: Type.Optional(Type.Array(Type.String())),
|
|
321
|
+
in_reply_to: Type.Optional(Type.Array(Type.String())),
|
|
322
|
+
claim_ids: Type.Optional(Type.Array(Type.String())),
|
|
323
|
+
evidence_refs: Type.Optional(Type.Array(Type.String())),
|
|
324
|
+
artifact_path: Type.Optional(Type.String()),
|
|
325
|
+
}),
|
|
326
|
+
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
327
|
+
const runId = getRunId(ctx);
|
|
328
|
+
const p = params as {
|
|
329
|
+
round_index: number;
|
|
330
|
+
from: DebateParticipant;
|
|
331
|
+
kind:
|
|
332
|
+
| "claim"
|
|
333
|
+
| "rebuttal"
|
|
334
|
+
| "clarification"
|
|
335
|
+
| "counter"
|
|
336
|
+
| "integrate"
|
|
337
|
+
| "audit"
|
|
338
|
+
| "system";
|
|
339
|
+
body: string;
|
|
340
|
+
to?: Array<DebateParticipant | "broadcast">;
|
|
341
|
+
in_reply_to?: string[];
|
|
342
|
+
claim_ids?: string[];
|
|
343
|
+
evidence_refs?: string[];
|
|
344
|
+
artifact_path?: string;
|
|
345
|
+
};
|
|
346
|
+
const msg = await postMessengerMessage(runDir(process.cwd(), runId), {
|
|
347
|
+
from: p.from,
|
|
348
|
+
kind: p.kind,
|
|
349
|
+
round_index: p.round_index,
|
|
350
|
+
to: p.to ?? ["broadcast"],
|
|
351
|
+
body: p.body,
|
|
352
|
+
in_reply_to: p.in_reply_to ?? [],
|
|
353
|
+
claim_ids: p.claim_ids ?? [],
|
|
354
|
+
evidence_refs: p.evidence_refs ?? [],
|
|
355
|
+
artifact_path: p.artifact_path,
|
|
356
|
+
});
|
|
357
|
+
return {
|
|
358
|
+
content: [
|
|
359
|
+
{
|
|
360
|
+
type: "text",
|
|
361
|
+
text: `Posted ${msg.kind} from ${msg.from} (round ${msg.round_index}, id ${msg.id})`,
|
|
362
|
+
},
|
|
363
|
+
],
|
|
364
|
+
details: { message: msg },
|
|
365
|
+
};
|
|
366
|
+
},
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
pi.registerTool({
|
|
370
|
+
name: "harness_messenger_read_round",
|
|
371
|
+
label: "Read Debate Round Transcript",
|
|
372
|
+
description:
|
|
373
|
+
"Return formatted messenger transcript for spawning adversary or integrator with full thread context.",
|
|
374
|
+
parameters: Type.Object({
|
|
375
|
+
round_index: Type.Number(),
|
|
376
|
+
}),
|
|
377
|
+
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
378
|
+
const runId = getRunId(ctx);
|
|
379
|
+
const roundIndex = Number(
|
|
380
|
+
(params as { round_index: number }).round_index,
|
|
381
|
+
);
|
|
382
|
+
const messages = await readRoundTranscript(
|
|
383
|
+
runDir(process.cwd(), runId),
|
|
384
|
+
roundIndex,
|
|
385
|
+
);
|
|
386
|
+
const text = formatTranscriptForSpawn(messages);
|
|
387
|
+
return {
|
|
388
|
+
content: [{ type: "text", text }],
|
|
389
|
+
details: { round_index: roundIndex, message_count: messages.length },
|
|
390
|
+
};
|
|
391
|
+
},
|
|
392
|
+
});
|
|
393
|
+
|
|
394
|
+
pi.registerTool({
|
|
395
|
+
name: "harness_debate_submit_round",
|
|
396
|
+
label: "Submit Plan Review Round",
|
|
397
|
+
description:
|
|
398
|
+
"Validate lane YAML + messenger thread, write review-round-rN.yaml, emit bus round envelope. Parent must not write review-round files directly.",
|
|
399
|
+
parameters: Type.Object({
|
|
400
|
+
round_index: Type.Number({ description: "1–12 (monotonic per run)" }),
|
|
401
|
+
integrator_draft: Type.Record(Type.String(), Type.Unknown(), {
|
|
402
|
+
description: "ReviewIntegrator YAML object (review-round-rN fields)",
|
|
403
|
+
}),
|
|
404
|
+
}),
|
|
405
|
+
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
406
|
+
const runId = getRunId(ctx);
|
|
407
|
+
const projectRoot = process.cwd();
|
|
408
|
+
const roundIndex = Number(
|
|
409
|
+
(params as { round_index: number }).round_index,
|
|
410
|
+
);
|
|
411
|
+
const draft = (params as { integrator_draft: Record<string, unknown> })
|
|
412
|
+
.integrator_draft as unknown as PlanReviewRoundDraft;
|
|
413
|
+
draft.round_index = roundIndex;
|
|
414
|
+
if (!draft.schema_version) draft.schema_version = "1.0.0";
|
|
415
|
+
const debateId = planDebateIdForRun(runId);
|
|
416
|
+
const rd = runDir(projectRoot, runId);
|
|
417
|
+
const integratorBody =
|
|
418
|
+
(typeof draft.round_summary === "string" && draft.round_summary) ||
|
|
419
|
+
"Review integrator synthesis for this round.";
|
|
420
|
+
await postMessengerMessage(rd, {
|
|
421
|
+
from: "ReviewIntegratorAgent",
|
|
422
|
+
kind: "integrate",
|
|
423
|
+
round_index: roundIndex,
|
|
424
|
+
to: ["broadcast"],
|
|
425
|
+
body: integratorBody,
|
|
426
|
+
in_reply_to: [],
|
|
427
|
+
claim_ids: [],
|
|
428
|
+
evidence_refs: [`artifacts/review-round-r${roundIndex}.yaml`],
|
|
429
|
+
});
|
|
430
|
+
|
|
431
|
+
const caps = capsForDebate(debateId);
|
|
432
|
+
const roundState = await getMessengerRoundState(rd, roundIndex);
|
|
433
|
+
const mCheck = messengerRoundDebateReady(roundState, roundIndex >= 4, {
|
|
434
|
+
max_exchanges_per_round: caps.max_exchanges_per_round,
|
|
435
|
+
});
|
|
436
|
+
if (!mCheck.ok) {
|
|
437
|
+
return {
|
|
438
|
+
content: [
|
|
439
|
+
{
|
|
440
|
+
type: "text",
|
|
441
|
+
text: `Messenger gate failed:\n- ${mCheck.errors.join("\n- ")}`,
|
|
442
|
+
},
|
|
443
|
+
],
|
|
444
|
+
details: { errors: mCheck.errors },
|
|
445
|
+
isError: true,
|
|
446
|
+
};
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
const validationTurn = await loadValidationTurnYaml(rd, roundIndex);
|
|
450
|
+
const integratorValidation = validateIntegratorDraft(
|
|
451
|
+
draft as unknown as Record<string, unknown>,
|
|
452
|
+
{ validationTurn },
|
|
453
|
+
);
|
|
454
|
+
if (!integratorValidation.ok) {
|
|
455
|
+
return {
|
|
456
|
+
content: [
|
|
457
|
+
{
|
|
458
|
+
type: "text",
|
|
459
|
+
text: `Integrator rules failed:\n- ${integratorValidation.errors.join("\n- ")}`,
|
|
460
|
+
},
|
|
461
|
+
],
|
|
462
|
+
details: { errors: integratorValidation.errors },
|
|
463
|
+
isError: true,
|
|
464
|
+
};
|
|
465
|
+
}
|
|
466
|
+
draft.review_gate_ready = integratorValidation.review_gate_ready;
|
|
467
|
+
|
|
468
|
+
const relPath = `artifacts/review-round-r${roundIndex}.yaml`;
|
|
469
|
+
const absPath = join(rd, relPath);
|
|
470
|
+
await withReviewRoundYamlWrite(async () => {
|
|
471
|
+
await mkdir(dirname(absPath), { recursive: true });
|
|
472
|
+
await writeYamlFile(absPath, draft);
|
|
473
|
+
});
|
|
474
|
+
|
|
475
|
+
const envelope = buildPlanReviewRoundEnvelope(draft, {
|
|
476
|
+
runId,
|
|
477
|
+
debateId,
|
|
478
|
+
});
|
|
479
|
+
const busState = getDebateState();
|
|
480
|
+
if (!busState || busState.debate_id !== debateId) {
|
|
481
|
+
await openDebateBus(runId, debateId, debateHooks(pi));
|
|
482
|
+
}
|
|
483
|
+
const result = await acceptDebateRound(envelope, debateHooks(pi));
|
|
484
|
+
if (!result.ok) {
|
|
485
|
+
return {
|
|
486
|
+
content: [
|
|
487
|
+
{
|
|
488
|
+
type: "text",
|
|
489
|
+
text: `Bus round rejected: ${result.reason ?? "unknown"}`,
|
|
490
|
+
},
|
|
491
|
+
],
|
|
492
|
+
details: { envelope },
|
|
493
|
+
isError: true,
|
|
494
|
+
};
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
const sessionId = ctx.sessionManager.getSessionId();
|
|
498
|
+
telemetryRound(sessionId, {
|
|
499
|
+
run_id: runId,
|
|
500
|
+
debate_id: debateId,
|
|
501
|
+
round_index: roundIndex,
|
|
502
|
+
review_gate_ready: draft.review_gate_ready,
|
|
503
|
+
messenger_messages: roundState?.claim_count,
|
|
504
|
+
});
|
|
505
|
+
|
|
506
|
+
return {
|
|
507
|
+
content: [
|
|
508
|
+
{
|
|
509
|
+
type: "text",
|
|
510
|
+
text: `Round ${roundIndex} submitted to ${debateId} (review_gate_ready=${draft.review_gate_ready})`,
|
|
511
|
+
},
|
|
512
|
+
],
|
|
513
|
+
details: {
|
|
514
|
+
path: relPath,
|
|
515
|
+
envelope,
|
|
516
|
+
review_gate_ready: draft.review_gate_ready,
|
|
517
|
+
warnings: integratorValidation.warnings,
|
|
518
|
+
},
|
|
519
|
+
};
|
|
520
|
+
},
|
|
521
|
+
});
|
|
522
|
+
|
|
523
|
+
pi.registerTool({
|
|
524
|
+
name: "harness_debate_consensus",
|
|
525
|
+
label: "Finalize Plan Debate Consensus",
|
|
526
|
+
description:
|
|
527
|
+
"After all focus areas covered (spec|wbs|schedule|quality) and last review_gate_ready true, emit consensus packet to .pi/harness/debates/plan-<run_id>.consensus.json",
|
|
528
|
+
parameters: Type.Object({
|
|
529
|
+
rationale: Type.Optional(Type.String()),
|
|
530
|
+
}),
|
|
531
|
+
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
532
|
+
const runId = getRunId(ctx);
|
|
533
|
+
const rationale =
|
|
534
|
+
String((params as { rationale?: string }).rationale ?? "").trim() ||
|
|
535
|
+
"Plan Review Gate consensus after focus coverage and messenger-backed rounds.";
|
|
536
|
+
const decision = await finalizeDebateConsensus(
|
|
537
|
+
rationale,
|
|
538
|
+
debateHooks(pi),
|
|
539
|
+
);
|
|
540
|
+
const debateId = planDebateIdForRun(runId);
|
|
541
|
+
captureHarnessEvent(
|
|
542
|
+
ctx.sessionManager.getSessionId(),
|
|
543
|
+
"harness_debate_consensus",
|
|
544
|
+
{
|
|
545
|
+
run_id: runId,
|
|
546
|
+
debate_id: debateId,
|
|
547
|
+
policy_decision: decision,
|
|
548
|
+
},
|
|
549
|
+
);
|
|
550
|
+
return {
|
|
551
|
+
content: [
|
|
552
|
+
{
|
|
553
|
+
type: "text",
|
|
554
|
+
text: `Consensus: ${decision ?? "unknown"} (${debateId})`,
|
|
555
|
+
},
|
|
556
|
+
],
|
|
557
|
+
details: { policy_decision: decision, debate_id: debateId },
|
|
558
|
+
};
|
|
559
|
+
},
|
|
560
|
+
});
|
|
561
|
+
|
|
562
|
+
pi.registerTool({
|
|
563
|
+
name: "harness_debate_apply_lane",
|
|
564
|
+
label: "Apply Debate Lane YAML + Messenger",
|
|
565
|
+
description:
|
|
566
|
+
"Parse subagent lane output, write artifacts/*-rN.yaml, and post evaluator claims / adversary rebuttals to messenger. Prefer letting subagent tool_result auto-apply; use this if auto-apply missed fenced YAML.",
|
|
567
|
+
parameters: Type.Object({
|
|
568
|
+
lane: Type.String({
|
|
569
|
+
description:
|
|
570
|
+
"hypothesis-validation | validation-turn | adversary-brief | sprint-audit",
|
|
571
|
+
}),
|
|
572
|
+
content: Type.String({ description: "Fenced YAML/JSON from subagent" }),
|
|
573
|
+
round_index: Type.Optional(Type.Number()),
|
|
574
|
+
}),
|
|
575
|
+
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
576
|
+
const runId = getRunId(ctx);
|
|
577
|
+
const p = params as {
|
|
578
|
+
lane: DebateLaneKind;
|
|
579
|
+
content: string;
|
|
580
|
+
round_index?: number;
|
|
581
|
+
};
|
|
582
|
+
const result = await applyDebateLane({
|
|
583
|
+
runDir: runDir(process.cwd(), runId),
|
|
584
|
+
lane: p.lane,
|
|
585
|
+
content: p.content,
|
|
586
|
+
roundIndex: p.round_index,
|
|
587
|
+
});
|
|
588
|
+
return {
|
|
589
|
+
content: [{ type: "text", text: formatApplyLaneMessage(result) }],
|
|
590
|
+
details: result,
|
|
591
|
+
isError: !result.ok,
|
|
592
|
+
};
|
|
593
|
+
},
|
|
594
|
+
});
|
|
595
|
+
|
|
596
|
+
pi.registerTool({
|
|
597
|
+
name: "harness_debate_round_status",
|
|
598
|
+
label: "Plan Debate Round Status",
|
|
599
|
+
description:
|
|
600
|
+
"List missing lane artifacts and messenger steps for a Review Gate round. Call when resuming after a stop.",
|
|
601
|
+
parameters: Type.Object({
|
|
602
|
+
round_index: Type.Number({ description: "1–12 (monotonic per run)" }),
|
|
603
|
+
debate_round_focus: Type.Optional(
|
|
604
|
+
Type.String({ description: "spec | wbs | schedule | quality" }),
|
|
605
|
+
),
|
|
606
|
+
}),
|
|
607
|
+
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
608
|
+
const runId = getRunId(ctx);
|
|
609
|
+
const p = params as {
|
|
610
|
+
round_index: number;
|
|
611
|
+
debate_round_focus?: string;
|
|
612
|
+
};
|
|
613
|
+
const roundIndex = Number(p.round_index);
|
|
614
|
+
const focus =
|
|
615
|
+
p.debate_round_focus === "spec" ||
|
|
616
|
+
p.debate_round_focus === "wbs" ||
|
|
617
|
+
p.debate_round_focus === "schedule" ||
|
|
618
|
+
p.debate_round_focus === "quality"
|
|
619
|
+
? p.debate_round_focus
|
|
620
|
+
: undefined;
|
|
621
|
+
const status = await getPlanDebateRoundStatus(
|
|
622
|
+
runDir(process.cwd(), runId),
|
|
623
|
+
roundIndex,
|
|
624
|
+
runId,
|
|
625
|
+
focus ? { debate_round_focus: focus } : undefined,
|
|
626
|
+
);
|
|
627
|
+
const lines = [
|
|
628
|
+
`Round ${roundIndex}: ready_for_integrator=${status.ready_for_integrator}`,
|
|
629
|
+
status.missing.length
|
|
630
|
+
? `Missing:\n- ${status.missing.join("\n- ")}`
|
|
631
|
+
: "Lane + messenger prerequisites satisfied.",
|
|
632
|
+
status.next_tool ? `Next: ${status.next_tool}` : "",
|
|
633
|
+
].filter(Boolean);
|
|
634
|
+
return {
|
|
635
|
+
content: [{ type: "text", text: lines.join("\n\n") }],
|
|
636
|
+
details: status,
|
|
637
|
+
};
|
|
638
|
+
},
|
|
639
|
+
});
|
|
640
|
+
|
|
641
|
+
pi.registerTool({
|
|
642
|
+
name: "harness_debate_focus_coverage",
|
|
643
|
+
label: "Plan Debate Focus Coverage",
|
|
644
|
+
description:
|
|
645
|
+
"Return which Review Gate focuses (spec|wbs|schedule|quality) are covered by submitted review-round artifacts and whether debate outcome is complete.",
|
|
646
|
+
parameters: Type.Object({}),
|
|
647
|
+
async execute(_id, _params, _signal, _onUpdate, ctx) {
|
|
648
|
+
const runId = getRunId(ctx);
|
|
649
|
+
const rd = runDir(process.cwd(), runId);
|
|
650
|
+
const messenger = await loadMessengerState(rd);
|
|
651
|
+
const requiredFocuses = messenger?.required_focuses;
|
|
652
|
+
const coverage = await getPlanFocusCoverage(rd, { requiredFocuses });
|
|
653
|
+
const caps = capsForDebate(
|
|
654
|
+
planDebateIdForRun(runId),
|
|
655
|
+
messenger?.debate_profile,
|
|
656
|
+
);
|
|
657
|
+
const complete = planDebateOutcomeComplete(coverage, {
|
|
658
|
+
requiredFocuses,
|
|
659
|
+
minRoundIndex: caps.min_focus_rounds,
|
|
660
|
+
});
|
|
661
|
+
const lines = [
|
|
662
|
+
`Profile: ${messenger?.debate_profile ?? "standard"}`,
|
|
663
|
+
`Required: ${(requiredFocuses ?? ["spec", "wbs", "schedule", "quality"]).join(", ")}`,
|
|
664
|
+
`Covered: ${coverage.covered.join(", ") || "(none)"}`,
|
|
665
|
+
coverage.missing.length
|
|
666
|
+
? `Missing: ${coverage.missing.join(", ")}`
|
|
667
|
+
: "All required focuses covered.",
|
|
668
|
+
`Last round: ${coverage.last_round_index}, review_gate_ready=${coverage.last_review_gate_ready}`,
|
|
669
|
+
`Outcome complete: ${complete}`,
|
|
670
|
+
`Budget: min_focus_rounds=${caps.min_focus_rounds}, max_rounds=${caps.max_rounds}, max_exchanges_per_round=${caps.max_exchanges_per_round}`,
|
|
671
|
+
];
|
|
672
|
+
return {
|
|
673
|
+
content: [{ type: "text", text: lines.join("\n") }],
|
|
674
|
+
details: {
|
|
675
|
+
coverage,
|
|
676
|
+
caps,
|
|
677
|
+
complete,
|
|
678
|
+
profile: messenger?.debate_profile,
|
|
679
|
+
},
|
|
680
|
+
};
|
|
681
|
+
},
|
|
682
|
+
});
|
|
683
|
+
|
|
684
|
+
pi.registerTool({
|
|
685
|
+
name: "harness_debate_advance_thread",
|
|
686
|
+
label: "Advance Plan Debate Thread",
|
|
687
|
+
description:
|
|
688
|
+
"Ping-pong helper: read round transcript and return next spawn (evaluator clarification vs adversary counter) based on unresolved claim_ids and exchange_count.",
|
|
689
|
+
parameters: Type.Object({
|
|
690
|
+
round_index: Type.Number(),
|
|
691
|
+
}),
|
|
692
|
+
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
693
|
+
const runId = getRunId(ctx);
|
|
694
|
+
const roundIndex = Number(
|
|
695
|
+
(params as { round_index: number }).round_index,
|
|
696
|
+
);
|
|
697
|
+
const status = await getPlanDebateRoundStatus(
|
|
698
|
+
runDir(process.cwd(), runId),
|
|
699
|
+
roundIndex,
|
|
700
|
+
runId,
|
|
701
|
+
);
|
|
702
|
+
const text = [
|
|
703
|
+
`Round ${roundIndex}: exchange_count=${status.exchange_count}`,
|
|
704
|
+
status.unresolved_claim_ids.length
|
|
705
|
+
? `Unresolved claims: ${status.unresolved_claim_ids.join(", ")}`
|
|
706
|
+
: "No unresolved claims.",
|
|
707
|
+
status.next_tool
|
|
708
|
+
? `Next: ${status.next_tool}`
|
|
709
|
+
: "Dialogue complete — spawn review-integrator.",
|
|
710
|
+
].join("\n");
|
|
711
|
+
return {
|
|
712
|
+
content: [{ type: "text", text }],
|
|
713
|
+
details: status,
|
|
714
|
+
};
|
|
715
|
+
},
|
|
716
|
+
});
|
|
717
|
+
|
|
718
|
+
pi.registerTool({
|
|
719
|
+
name: "harness_plan_scope_check",
|
|
720
|
+
label: "Plan Scope Drift Check",
|
|
721
|
+
description:
|
|
722
|
+
"P2 guard: compare task_summary with decomposition text; returns material_drift when plan narrows to infra-only work.",
|
|
723
|
+
parameters: Type.Object({
|
|
724
|
+
task_summary: Type.String(),
|
|
725
|
+
decomposition_text: Type.String(),
|
|
726
|
+
}),
|
|
727
|
+
async execute(_id, params) {
|
|
728
|
+
const p = params as { task_summary: string; decomposition_text: string };
|
|
729
|
+
const result = assessPlanScopeDrift(p.task_summary, p.decomposition_text);
|
|
730
|
+
return {
|
|
731
|
+
content: [
|
|
732
|
+
{
|
|
733
|
+
type: "text",
|
|
734
|
+
text: `${result.summary}\nmaterial_drift=${result.material_drift} overlap=${result.overlap_score.toFixed(3)}`,
|
|
735
|
+
},
|
|
736
|
+
],
|
|
737
|
+
details: result,
|
|
738
|
+
};
|
|
739
|
+
},
|
|
740
|
+
});
|
|
741
|
+
}
|