ultimate-pi 0.14.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-debate-plan/SKILL.md +41 -61
- package/.agents/skills/harness-orchestration/SKILL.md +2 -2
- package/.agents/skills/harness-plan/SKILL.md +10 -8
- package/.pi/agents/harness/planning/decompose.md +4 -2
- package/.pi/agents/harness/planning/execution-plan-author.md +25 -14
- package/.pi/agents/harness/planning/hypothesis-validator.md +21 -5
- package/.pi/agents/harness/planning/implementation-researcher.md +42 -0
- package/.pi/agents/harness/planning/plan-adversary.md +19 -3
- package/.pi/agents/harness/planning/plan-evaluator.md +26 -5
- package/.pi/agents/harness/planning/review-integrator.md +23 -9
- package/.pi/agents/harness/planning/scout-graphify.md +1 -1
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +19 -4
- package/.pi/agents/harness/planning/stack-researcher.md +19 -10
- package/.pi/extensions/harness-debate-tools.ts +238 -16
- package/.pi/extensions/harness-live-widget.ts +39 -159
- package/.pi/extensions/harness-plan-approval.ts +47 -5
- package/.pi/extensions/lib/debate-bus-core.ts +69 -15
- package/.pi/extensions/lib/debate-bus-state.ts +6 -0
- package/.pi/extensions/lib/plan-approval/plan-review.ts +56 -0
- package/.pi/extensions/lib/plan-approval/types.ts +1 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +214 -0
- package/.pi/extensions/lib/plan-debate-focus.ts +151 -0
- package/.pi/extensions/lib/plan-debate-gate.ts +77 -34
- package/.pi/extensions/lib/plan-debate-lanes.ts +44 -0
- package/.pi/extensions/lib/plan-debate-round-status.ts +63 -20
- package/.pi/extensions/lib/plan-messenger.ts +93 -17
- package/.pi/extensions/policy-gate.ts +1 -1
- package/.pi/harness/README.md +1 -1
- package/.pi/harness/agents.manifest.json +15 -11
- package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +1 -3
- package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +13 -5
- package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +51 -0
- package/.pi/harness/docs/adrs/README.md +2 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r1.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r2.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/research-brief.yaml +62 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r2.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r3.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +29 -0
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +97 -16
- package/.pi/harness/specs/plan-implementation-research-brief.schema.json +128 -0
- package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
- package/.pi/harness/specs/round-result.schema.json +15 -2
- package/.pi/lib/harness-ui-state.ts +92 -0
- package/.pi/prompts/harness-plan.md +87 -37
- package/.pi/prompts/planning-rubrics.md +31 -0
- package/CHANGELOG.md +11 -0
- package/package.json +2 -2
|
@@ -2,10 +2,11 @@
|
|
|
2
2
|
* P0–P3 plan debate tools — bus + pi-messenger transport.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
import { mkdir } from "node:fs/promises";
|
|
5
|
+
import { mkdir, readFile } from "node:fs/promises";
|
|
6
6
|
import { dirname, join } from "node:path";
|
|
7
7
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
8
8
|
import { Type } from "@sinclair/typebox";
|
|
9
|
+
import { parse as parseYaml } from "yaml";
|
|
9
10
|
import type { DebateParticipant } from "../lib/debate-orchestrator-types.js";
|
|
10
11
|
import {
|
|
11
12
|
getLatestRunContext,
|
|
@@ -14,16 +15,25 @@ import {
|
|
|
14
15
|
import { writeYamlFile } from "../lib/harness-yaml.js";
|
|
15
16
|
import {
|
|
16
17
|
acceptDebateRound,
|
|
18
|
+
capsForDebate,
|
|
17
19
|
finalizeDebateConsensus,
|
|
18
20
|
openDebateBus,
|
|
19
21
|
} from "./lib/debate-bus-core.js";
|
|
20
22
|
import { getDebateState } from "./lib/debate-bus-state.js";
|
|
21
23
|
import { claimExtensionLoad } from "./lib/extension-load-guard.js";
|
|
22
24
|
import { captureHarnessEvent } from "./lib/harness-posthog.js";
|
|
25
|
+
import {
|
|
26
|
+
type DebateEligibilityInput,
|
|
27
|
+
harnessPlanDebateEligibility,
|
|
28
|
+
} from "./lib/plan-debate-eligibility.js";
|
|
23
29
|
import {
|
|
24
30
|
buildPlanReviewRoundEnvelope,
|
|
25
31
|
type PlanReviewRoundDraft,
|
|
26
32
|
} from "./lib/plan-debate-envelope.js";
|
|
33
|
+
import {
|
|
34
|
+
getPlanFocusCoverage,
|
|
35
|
+
planDebateOutcomeComplete,
|
|
36
|
+
} from "./lib/plan-debate-focus.js";
|
|
27
37
|
import {
|
|
28
38
|
normalizePlanDebateId,
|
|
29
39
|
planDebateIdForRun,
|
|
@@ -40,6 +50,7 @@ import {
|
|
|
40
50
|
formatTranscriptForSpawn,
|
|
41
51
|
getMessengerRoundState,
|
|
42
52
|
initPlanMessenger,
|
|
53
|
+
loadMessengerState,
|
|
43
54
|
messengerRoundDebateReady,
|
|
44
55
|
postMessengerMessage,
|
|
45
56
|
readRoundTranscript,
|
|
@@ -122,7 +133,7 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
|
|
|
122
133
|
}
|
|
123
134
|
if (applied.length === 0) return;
|
|
124
135
|
|
|
125
|
-
const status = await getPlanDebateRoundStatus(rd, lastRound);
|
|
136
|
+
const status = await getPlanDebateRoundStatus(rd, lastRound, runId);
|
|
126
137
|
pi.sendMessage({
|
|
127
138
|
customType: "harness-debate-next-step",
|
|
128
139
|
content: [
|
|
@@ -138,28 +149,131 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
|
|
|
138
149
|
});
|
|
139
150
|
});
|
|
140
151
|
|
|
152
|
+
pi.registerTool({
|
|
153
|
+
name: "harness_plan_debate_eligibility",
|
|
154
|
+
label: "Plan Debate Eligibility",
|
|
155
|
+
description:
|
|
156
|
+
"Pre-debate profile selection (full|standard|light). Call after DAG pass, before harness_debate_open. Uses risk, fork, implementation/stack briefs — not R1 hypothesis output.",
|
|
157
|
+
parameters: Type.Object({
|
|
158
|
+
risk_level: Type.Optional(
|
|
159
|
+
Type.String({ description: "low | med | high" }),
|
|
160
|
+
),
|
|
161
|
+
material_fork: Type.Optional(Type.Boolean()),
|
|
162
|
+
dag_pass: Type.Optional(Type.Boolean()),
|
|
163
|
+
dag_manually_patched: Type.Optional(Type.Boolean()),
|
|
164
|
+
implementation_brief_path: Type.Optional(
|
|
165
|
+
Type.String({
|
|
166
|
+
description:
|
|
167
|
+
"Default: artifacts/implementation-research.yaml under run dir",
|
|
168
|
+
}),
|
|
169
|
+
),
|
|
170
|
+
stack_brief_path: Type.Optional(Type.String()),
|
|
171
|
+
decomposition_path: Type.Optional(Type.String()),
|
|
172
|
+
}),
|
|
173
|
+
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
174
|
+
const runId = getRunId(ctx);
|
|
175
|
+
const rd = runDir(process.cwd(), runId);
|
|
176
|
+
const p = params as {
|
|
177
|
+
risk_level?: string;
|
|
178
|
+
material_fork?: boolean;
|
|
179
|
+
dag_pass?: boolean;
|
|
180
|
+
dag_manually_patched?: boolean;
|
|
181
|
+
implementation_brief_path?: string;
|
|
182
|
+
stack_brief_path?: string;
|
|
183
|
+
decomposition_path?: string;
|
|
184
|
+
};
|
|
185
|
+
async function loadYaml(
|
|
186
|
+
rel: string,
|
|
187
|
+
): Promise<Record<string, unknown> | null> {
|
|
188
|
+
try {
|
|
189
|
+
const raw = await readFile(join(rd, rel), "utf-8");
|
|
190
|
+
return parseYaml(raw) as Record<string, unknown>;
|
|
191
|
+
} catch {
|
|
192
|
+
return null;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
const input: DebateEligibilityInput = {
|
|
196
|
+
risk_level: p.risk_level,
|
|
197
|
+
material_fork: p.material_fork,
|
|
198
|
+
dag_pass: p.dag_pass,
|
|
199
|
+
dag_manually_patched: p.dag_manually_patched,
|
|
200
|
+
implementation_brief: await loadYaml(
|
|
201
|
+
p.implementation_brief_path ??
|
|
202
|
+
"artifacts/implementation-research.yaml",
|
|
203
|
+
),
|
|
204
|
+
stack_brief: await loadYaml(
|
|
205
|
+
p.stack_brief_path ?? "artifacts/stack.yaml",
|
|
206
|
+
),
|
|
207
|
+
decomposition: await loadYaml(
|
|
208
|
+
p.decomposition_path ?? "artifacts/decomposition.yaml",
|
|
209
|
+
),
|
|
210
|
+
};
|
|
211
|
+
const result = harnessPlanDebateEligibility(input);
|
|
212
|
+
const lines = [
|
|
213
|
+
`profile: ${result.profile}`,
|
|
214
|
+
`required_focuses: ${result.required_focuses.join(", ")}`,
|
|
215
|
+
`min_focus_rounds: ${result.min_focus_rounds}`,
|
|
216
|
+
`debate_global_cap: ${result.debate_global_cap}`,
|
|
217
|
+
`human_required: ${result.human_required}`,
|
|
218
|
+
...result.rationale.map((r) => `- ${r}`),
|
|
219
|
+
];
|
|
220
|
+
return {
|
|
221
|
+
content: [{ type: "text", text: lines.join("\n") }],
|
|
222
|
+
details: result,
|
|
223
|
+
};
|
|
224
|
+
},
|
|
225
|
+
});
|
|
226
|
+
|
|
141
227
|
pi.registerTool({
|
|
142
228
|
name: "harness_debate_open",
|
|
143
229
|
label: "Open Plan Debate",
|
|
144
230
|
description:
|
|
145
|
-
"Open plan-phase debate bus (plan-<run_id>) and initialize pi-messenger inboxes/threads. Call once
|
|
231
|
+
"Open plan-phase debate bus (plan-<run_id>) and initialize pi-messenger inboxes/threads. Call once after harness_plan_debate_eligibility.",
|
|
146
232
|
parameters: Type.Object({
|
|
147
233
|
debate_id: Type.Optional(
|
|
148
234
|
Type.String({ description: "Optional; normalized to plan-<run_id>" }),
|
|
149
235
|
),
|
|
236
|
+
debate_profile: Type.Optional(
|
|
237
|
+
Type.String({ description: "full | standard | light" }),
|
|
238
|
+
),
|
|
239
|
+
required_focuses: Type.Optional(
|
|
240
|
+
Type.Array(
|
|
241
|
+
Type.String({ description: "spec | wbs | schedule | quality" }),
|
|
242
|
+
),
|
|
243
|
+
),
|
|
150
244
|
}),
|
|
151
245
|
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
152
246
|
const runId = getRunId(ctx);
|
|
153
247
|
const projectRoot = process.cwd();
|
|
154
|
-
const
|
|
248
|
+
const p = params as {
|
|
249
|
+
debate_id?: string;
|
|
250
|
+
debate_profile?: string;
|
|
251
|
+
required_focuses?: string[];
|
|
252
|
+
};
|
|
253
|
+
const raw = String(p.debate_id ?? "");
|
|
155
254
|
const { debateId, corrected, warning } = normalizePlanDebateId(
|
|
156
255
|
raw,
|
|
157
256
|
runId,
|
|
158
257
|
);
|
|
159
|
-
const
|
|
258
|
+
const profile =
|
|
259
|
+
p.debate_profile === "full" ||
|
|
260
|
+
p.debate_profile === "standard" ||
|
|
261
|
+
p.debate_profile === "light"
|
|
262
|
+
? p.debate_profile
|
|
263
|
+
: "standard";
|
|
264
|
+
const required_focuses = (p.required_focuses ?? []).filter((f) =>
|
|
265
|
+
["spec", "wbs", "schedule", "quality"].includes(f),
|
|
266
|
+
) as Array<"spec" | "wbs" | "schedule" | "quality">;
|
|
267
|
+
const opened = await openDebateBus(runId, debateId, debateHooks(pi), {
|
|
268
|
+
debate_profile: profile,
|
|
269
|
+
required_focuses:
|
|
270
|
+
required_focuses.length > 0 ? required_focuses : undefined,
|
|
271
|
+
});
|
|
160
272
|
await initPlanMessenger(runDir(projectRoot, runId), {
|
|
161
273
|
runId,
|
|
162
274
|
debateId,
|
|
275
|
+
debate_profile: profile,
|
|
276
|
+
required_focuses: opened.required_focuses,
|
|
163
277
|
});
|
|
164
278
|
const sessionId = ctx.sessionManager.getSessionId();
|
|
165
279
|
captureHarnessEvent(sessionId, "harness_debate_round", {
|
|
@@ -171,6 +285,12 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
|
|
|
171
285
|
});
|
|
172
286
|
const lines = [
|
|
173
287
|
`Plan debate opened: ${debateId}`,
|
|
288
|
+
`Profile: ${profile}`,
|
|
289
|
+
required_focuses.length
|
|
290
|
+
? `Required focuses: ${required_focuses.join(", ")}`
|
|
291
|
+
: opened.required_focuses?.length
|
|
292
|
+
? `Required focuses: ${opened.required_focuses.join(", ")}`
|
|
293
|
+
: "Required focuses: (default all four)",
|
|
174
294
|
`Messenger: debate-messenger/ (inbox + threads/round-N/transcript.jsonl)`,
|
|
175
295
|
];
|
|
176
296
|
if (warning) lines.push(`Note: ${warning}`);
|
|
@@ -187,13 +307,14 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
|
|
|
187
307
|
description:
|
|
188
308
|
"Post a claim/rebuttal/integrate message to the round thread and agent inbox (pi-messenger style). Evaluator posts claims first; adversary rebuts with in_reply_to claim ids.",
|
|
189
309
|
parameters: Type.Object({
|
|
190
|
-
round_index: Type.Number({ description: "1–
|
|
310
|
+
round_index: Type.Number({ description: "1–12 (monotonic per run)" }),
|
|
191
311
|
from: Type.String({
|
|
192
312
|
description:
|
|
193
313
|
"PlanEvaluatorAgent | PlanAdversaryAgent | ReviewIntegratorAgent | HypothesisValidatorAgent | SprintContractAuditorAgent",
|
|
194
314
|
}),
|
|
195
315
|
kind: Type.String({
|
|
196
|
-
description:
|
|
316
|
+
description:
|
|
317
|
+
"claim | rebuttal | clarification | counter | integrate | audit | system",
|
|
197
318
|
}),
|
|
198
319
|
body: Type.String(),
|
|
199
320
|
to: Type.Optional(Type.Array(Type.String())),
|
|
@@ -207,7 +328,14 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
|
|
|
207
328
|
const p = params as {
|
|
208
329
|
round_index: number;
|
|
209
330
|
from: DebateParticipant;
|
|
210
|
-
kind:
|
|
331
|
+
kind:
|
|
332
|
+
| "claim"
|
|
333
|
+
| "rebuttal"
|
|
334
|
+
| "clarification"
|
|
335
|
+
| "counter"
|
|
336
|
+
| "integrate"
|
|
337
|
+
| "audit"
|
|
338
|
+
| "system";
|
|
211
339
|
body: string;
|
|
212
340
|
to?: Array<DebateParticipant | "broadcast">;
|
|
213
341
|
in_reply_to?: string[];
|
|
@@ -269,7 +397,7 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
|
|
|
269
397
|
description:
|
|
270
398
|
"Validate lane YAML + messenger thread, write review-round-rN.yaml, emit bus round envelope. Parent must not write review-round files directly.",
|
|
271
399
|
parameters: Type.Object({
|
|
272
|
-
round_index: Type.Number({ description: "1–
|
|
400
|
+
round_index: Type.Number({ description: "1–12 (monotonic per run)" }),
|
|
273
401
|
integrator_draft: Type.Record(Type.String(), Type.Unknown(), {
|
|
274
402
|
description: "ReviewIntegrator YAML object (review-round-rN fields)",
|
|
275
403
|
}),
|
|
@@ -300,8 +428,11 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
|
|
|
300
428
|
evidence_refs: [`artifacts/review-round-r${roundIndex}.yaml`],
|
|
301
429
|
});
|
|
302
430
|
|
|
431
|
+
const caps = capsForDebate(debateId);
|
|
303
432
|
const roundState = await getMessengerRoundState(rd, roundIndex);
|
|
304
|
-
const mCheck = messengerRoundDebateReady(roundState, roundIndex
|
|
433
|
+
const mCheck = messengerRoundDebateReady(roundState, roundIndex >= 4, {
|
|
434
|
+
max_exchanges_per_round: caps.max_exchanges_per_round,
|
|
435
|
+
});
|
|
305
436
|
if (!mCheck.ok) {
|
|
306
437
|
return {
|
|
307
438
|
content: [
|
|
@@ -393,7 +524,7 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
|
|
|
393
524
|
name: "harness_debate_consensus",
|
|
394
525
|
label: "Finalize Plan Debate Consensus",
|
|
395
526
|
description:
|
|
396
|
-
"After
|
|
527
|
+
"After all focus areas covered (spec|wbs|schedule|quality) and last review_gate_ready true, emit consensus packet to .pi/harness/debates/plan-<run_id>.consensus.json",
|
|
397
528
|
parameters: Type.Object({
|
|
398
529
|
rationale: Type.Optional(Type.String()),
|
|
399
530
|
}),
|
|
@@ -401,7 +532,7 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
|
|
|
401
532
|
const runId = getRunId(ctx);
|
|
402
533
|
const rationale =
|
|
403
534
|
String((params as { rationale?: string }).rationale ?? "").trim() ||
|
|
404
|
-
"Plan Review Gate consensus after
|
|
535
|
+
"Plan Review Gate consensus after focus coverage and messenger-backed rounds.";
|
|
405
536
|
const decision = await finalizeDebateConsensus(
|
|
406
537
|
rationale,
|
|
407
538
|
debateHooks(pi),
|
|
@@ -468,16 +599,30 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
|
|
|
468
599
|
description:
|
|
469
600
|
"List missing lane artifacts and messenger steps for a Review Gate round. Call when resuming after a stop.",
|
|
470
601
|
parameters: Type.Object({
|
|
471
|
-
round_index: Type.Number({ description: "1–
|
|
602
|
+
round_index: Type.Number({ description: "1–12 (monotonic per run)" }),
|
|
603
|
+
debate_round_focus: Type.Optional(
|
|
604
|
+
Type.String({ description: "spec | wbs | schedule | quality" }),
|
|
605
|
+
),
|
|
472
606
|
}),
|
|
473
607
|
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
474
608
|
const runId = getRunId(ctx);
|
|
475
|
-
const
|
|
476
|
-
|
|
477
|
-
|
|
609
|
+
const p = params as {
|
|
610
|
+
round_index: number;
|
|
611
|
+
debate_round_focus?: string;
|
|
612
|
+
};
|
|
613
|
+
const roundIndex = Number(p.round_index);
|
|
614
|
+
const focus =
|
|
615
|
+
p.debate_round_focus === "spec" ||
|
|
616
|
+
p.debate_round_focus === "wbs" ||
|
|
617
|
+
p.debate_round_focus === "schedule" ||
|
|
618
|
+
p.debate_round_focus === "quality"
|
|
619
|
+
? p.debate_round_focus
|
|
620
|
+
: undefined;
|
|
478
621
|
const status = await getPlanDebateRoundStatus(
|
|
479
622
|
runDir(process.cwd(), runId),
|
|
480
623
|
roundIndex,
|
|
624
|
+
runId,
|
|
625
|
+
focus ? { debate_round_focus: focus } : undefined,
|
|
481
626
|
);
|
|
482
627
|
const lines = [
|
|
483
628
|
`Round ${roundIndex}: ready_for_integrator=${status.ready_for_integrator}`,
|
|
@@ -493,6 +638,83 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
|
|
|
493
638
|
},
|
|
494
639
|
});
|
|
495
640
|
|
|
641
|
+
pi.registerTool({
|
|
642
|
+
name: "harness_debate_focus_coverage",
|
|
643
|
+
label: "Plan Debate Focus Coverage",
|
|
644
|
+
description:
|
|
645
|
+
"Return which Review Gate focuses (spec|wbs|schedule|quality) are covered by submitted review-round artifacts and whether debate outcome is complete.",
|
|
646
|
+
parameters: Type.Object({}),
|
|
647
|
+
async execute(_id, _params, _signal, _onUpdate, ctx) {
|
|
648
|
+
const runId = getRunId(ctx);
|
|
649
|
+
const rd = runDir(process.cwd(), runId);
|
|
650
|
+
const messenger = await loadMessengerState(rd);
|
|
651
|
+
const requiredFocuses = messenger?.required_focuses;
|
|
652
|
+
const coverage = await getPlanFocusCoverage(rd, { requiredFocuses });
|
|
653
|
+
const caps = capsForDebate(
|
|
654
|
+
planDebateIdForRun(runId),
|
|
655
|
+
messenger?.debate_profile,
|
|
656
|
+
);
|
|
657
|
+
const complete = planDebateOutcomeComplete(coverage, {
|
|
658
|
+
requiredFocuses,
|
|
659
|
+
minRoundIndex: caps.min_focus_rounds,
|
|
660
|
+
});
|
|
661
|
+
const lines = [
|
|
662
|
+
`Profile: ${messenger?.debate_profile ?? "standard"}`,
|
|
663
|
+
`Required: ${(requiredFocuses ?? ["spec", "wbs", "schedule", "quality"]).join(", ")}`,
|
|
664
|
+
`Covered: ${coverage.covered.join(", ") || "(none)"}`,
|
|
665
|
+
coverage.missing.length
|
|
666
|
+
? `Missing: ${coverage.missing.join(", ")}`
|
|
667
|
+
: "All required focuses covered.",
|
|
668
|
+
`Last round: ${coverage.last_round_index}, review_gate_ready=${coverage.last_review_gate_ready}`,
|
|
669
|
+
`Outcome complete: ${complete}`,
|
|
670
|
+
`Budget: min_focus_rounds=${caps.min_focus_rounds}, max_rounds=${caps.max_rounds}, max_exchanges_per_round=${caps.max_exchanges_per_round}`,
|
|
671
|
+
];
|
|
672
|
+
return {
|
|
673
|
+
content: [{ type: "text", text: lines.join("\n") }],
|
|
674
|
+
details: {
|
|
675
|
+
coverage,
|
|
676
|
+
caps,
|
|
677
|
+
complete,
|
|
678
|
+
profile: messenger?.debate_profile,
|
|
679
|
+
},
|
|
680
|
+
};
|
|
681
|
+
},
|
|
682
|
+
});
|
|
683
|
+
|
|
684
|
+
pi.registerTool({
|
|
685
|
+
name: "harness_debate_advance_thread",
|
|
686
|
+
label: "Advance Plan Debate Thread",
|
|
687
|
+
description:
|
|
688
|
+
"Ping-pong helper: read round transcript and return next spawn (evaluator clarification vs adversary counter) based on unresolved claim_ids and exchange_count.",
|
|
689
|
+
parameters: Type.Object({
|
|
690
|
+
round_index: Type.Number(),
|
|
691
|
+
}),
|
|
692
|
+
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
693
|
+
const runId = getRunId(ctx);
|
|
694
|
+
const roundIndex = Number(
|
|
695
|
+
(params as { round_index: number }).round_index,
|
|
696
|
+
);
|
|
697
|
+
const status = await getPlanDebateRoundStatus(
|
|
698
|
+
runDir(process.cwd(), runId),
|
|
699
|
+
roundIndex,
|
|
700
|
+
runId,
|
|
701
|
+
);
|
|
702
|
+
const text = [
|
|
703
|
+
`Round ${roundIndex}: exchange_count=${status.exchange_count}`,
|
|
704
|
+
status.unresolved_claim_ids.length
|
|
705
|
+
? `Unresolved claims: ${status.unresolved_claim_ids.join(", ")}`
|
|
706
|
+
: "No unresolved claims.",
|
|
707
|
+
status.next_tool
|
|
708
|
+
? `Next: ${status.next_tool}`
|
|
709
|
+
: "Dialogue complete — spawn review-integrator.",
|
|
710
|
+
].join("\n");
|
|
711
|
+
return {
|
|
712
|
+
content: [{ type: "text", text }],
|
|
713
|
+
details: status,
|
|
714
|
+
};
|
|
715
|
+
},
|
|
716
|
+
});
|
|
717
|
+
|
|
496
718
|
pi.registerTool({
|
|
497
719
|
name: "harness_plan_scope_check",
|
|
498
720
|
label: "Plan Scope Drift Check",
|
|
@@ -3,12 +3,14 @@ import type {
|
|
|
3
3
|
ExtensionContext,
|
|
4
4
|
} from "@earendil-works/pi-coding-agent";
|
|
5
5
|
import {
|
|
6
|
+
deriveHarnessStatusHint,
|
|
7
|
+
formatHarnessPhaseLabel,
|
|
8
|
+
type HarnessStatusSeverity,
|
|
6
9
|
type HarnessUiState,
|
|
7
10
|
HarnessUiStateStore,
|
|
11
|
+
nextHarnessPhase,
|
|
8
12
|
} from "../lib/harness-ui-state";
|
|
9
13
|
|
|
10
|
-
type Severity = "accent" | "warning" | "error";
|
|
11
|
-
|
|
12
14
|
type TuiLike = { requestRender(): void };
|
|
13
15
|
type ThemeLike = {
|
|
14
16
|
fg(
|
|
@@ -164,31 +166,25 @@ function composeZones(left: string, right: string, width: number): string {
|
|
|
164
166
|
return fitToWidth(`${leftFit}${" ".repeat(minGap)}${rightFit}`, width);
|
|
165
167
|
}
|
|
166
168
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
169
|
+
function themeSeverityColor(
|
|
170
|
+
severity: HarnessStatusSeverity,
|
|
171
|
+
): "accent" | "warning" | "error" | "success" | "muted" {
|
|
172
|
+
return severity;
|
|
173
|
+
}
|
|
171
174
|
|
|
172
175
|
class HarnessWidgetComponent {
|
|
173
176
|
private widthCache?: number;
|
|
174
177
|
private linesCache?: string[];
|
|
175
178
|
private state: HarnessUiState;
|
|
176
|
-
private inFlight: InFlightState;
|
|
177
179
|
private themeRef: ThemeLike;
|
|
178
180
|
|
|
179
|
-
constructor(
|
|
180
|
-
state: HarnessUiState,
|
|
181
|
-
inFlight: InFlightState,
|
|
182
|
-
theme: ThemeLike,
|
|
183
|
-
) {
|
|
181
|
+
constructor(state: HarnessUiState, theme: ThemeLike) {
|
|
184
182
|
this.state = state;
|
|
185
|
-
this.inFlight = inFlight;
|
|
186
183
|
this.themeRef = theme;
|
|
187
184
|
}
|
|
188
185
|
|
|
189
|
-
public setData(state: HarnessUiState
|
|
186
|
+
public setData(state: HarnessUiState): void {
|
|
190
187
|
this.state = state;
|
|
191
|
-
this.inFlight = inFlight;
|
|
192
188
|
this.invalidate();
|
|
193
189
|
}
|
|
194
190
|
|
|
@@ -201,109 +197,23 @@ class HarnessWidgetComponent {
|
|
|
201
197
|
if (this.linesCache && this.widthCache === width) return this.linesCache;
|
|
202
198
|
const theme = this.themeRef;
|
|
203
199
|
const rowWidth = Math.max(1, width - TERMINAL_WIDTH_SAFETY_MARGIN);
|
|
204
|
-
const showDebateRow =
|
|
205
|
-
this.state.phase === "adversary" || this.state.phase === "merge";
|
|
206
|
-
|
|
207
|
-
const substateColor: Severity =
|
|
208
|
-
this.state.flowSubstate === "blocked"
|
|
209
|
-
? "error"
|
|
210
|
-
: this.state.flowSubstate === "severity-policy" ||
|
|
211
|
-
this.state.flowSubstate === "human-required"
|
|
212
|
-
? "warning"
|
|
213
|
-
: "accent";
|
|
214
|
-
const policyColor =
|
|
215
|
-
this.state.policyDecision === "pass"
|
|
216
|
-
? "success"
|
|
217
|
-
: this.state.policyDecision === "conditional_pass"
|
|
218
|
-
? "warning"
|
|
219
|
-
: this.state.policyDecision === "block" ||
|
|
220
|
-
this.state.policyDecision === "human_required"
|
|
221
|
-
? "error"
|
|
222
|
-
: "muted";
|
|
223
|
-
|
|
224
|
-
const policyDisplay = this.state.policyDecision ?? "pending";
|
|
225
|
-
|
|
226
|
-
const phaseToken = `${theme.fg("dim", "phase:")}${theme.fg("accent", this.state.phase)}`;
|
|
227
|
-
const flowToken = `${theme.fg("dim", "flow:")}${theme.fg(substateColor, this.state.flowSubstate)}`;
|
|
228
|
-
const policyToken = `${theme.fg("dim", "policy:")}${theme.fg(policyColor, policyDisplay)}`;
|
|
229
|
-
const row1 = composeZones(
|
|
230
|
-
`${theme.bold("Harness")} ${phaseToken} ${flowToken}`,
|
|
231
|
-
policyToken,
|
|
232
|
-
rowWidth,
|
|
233
|
-
);
|
|
234
200
|
|
|
235
|
-
const
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
const
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
? "down"
|
|
252
|
-
: "flat";
|
|
253
|
-
const trendColor =
|
|
254
|
-
consensusTrend === "up"
|
|
255
|
-
? "success"
|
|
256
|
-
: consensusTrend === "down"
|
|
257
|
-
? "warning"
|
|
258
|
-
: "muted";
|
|
259
|
-
|
|
260
|
-
const sev = this.state.severity;
|
|
261
|
-
const severityCompact =
|
|
262
|
-
sev.correctness == null &&
|
|
263
|
-
sev.security == null &&
|
|
264
|
-
sev.architecture == null &&
|
|
265
|
-
sev.testIntegrity == null
|
|
266
|
-
? theme.fg("muted", "sev:n/a")
|
|
267
|
-
: `${theme.fg("dim", "sev")} ${theme.fg("accent", `c:${sev.correctness ?? "-"}`)} ${theme.fg("accent", `s:${sev.security ?? "-"}`)} ${theme.fg("accent", `a:${sev.architecture ?? "-"}`)} ${theme.fg("accent", `t:${sev.testIntegrity ?? "-"}`)}`;
|
|
268
|
-
|
|
269
|
-
const planFlag = this.state.planApproved
|
|
270
|
-
? `${theme.fg("dim", "📋 Plan:")}${theme.fg("success", "OK")}`
|
|
271
|
-
: `${theme.fg("dim", "📋 Plan:")}${theme.fg("error", "NO")}`;
|
|
272
|
-
const reviewFlag = this.state.reviewIsolationOk
|
|
273
|
-
? `${theme.fg("dim", "🧪 Review:")}${theme.fg("success", "OK")}`
|
|
274
|
-
: `${theme.fg("dim", "🧪 Review:")}${theme.fg("warning", "ISO")}`;
|
|
275
|
-
const budgetFlag = this.state.budgetExhausted
|
|
276
|
-
? `${theme.fg("dim", "💰 Budget:")}${theme.fg("error", "HIT")}`
|
|
277
|
-
: `${theme.fg("dim", "💰 Budget:")}${theme.fg("success", "OK")}`;
|
|
278
|
-
const testsFlag =
|
|
279
|
-
this.state.testIntegritySeverity === "high"
|
|
280
|
-
? `${theme.fg("dim", "🛡 Tests:")}${theme.fg("error", "HIGH")}`
|
|
281
|
-
: this.state.testIntegritySeverity === "medium"
|
|
282
|
-
? `${theme.fg("dim", "🛡 Tests:")}${theme.fg("warning", "MED")}`
|
|
283
|
-
: `${theme.fg("dim", "🛡 Tests:")}${theme.fg("success", "OK")}`;
|
|
284
|
-
|
|
285
|
-
const toolDisplay = this.inFlight.lastToolName
|
|
286
|
-
? `${this.inFlight.toolCount}:${this.inFlight.lastToolName}`
|
|
287
|
-
: String(this.inFlight.toolCount);
|
|
288
|
-
const nextDisplay =
|
|
289
|
-
this.state.nextRecommendedCommand != null
|
|
290
|
-
? this.state.nextRecommendedCommand.length > 36
|
|
291
|
-
? `${this.state.nextRecommendedCommand.slice(0, 33)}...`
|
|
292
|
-
: this.state.nextRecommendedCommand
|
|
293
|
-
: null;
|
|
294
|
-
const row3Left = `${planFlag} ${reviewFlag} ${budgetFlag} ${testsFlag}`;
|
|
295
|
-
const row3Right = nextDisplay
|
|
296
|
-
? `${theme.fg("dim", "inFlight:")}${theme.fg("accent", toolDisplay)} ${theme.fg("dim", "next:")}${theme.fg("accent", nextDisplay)}`
|
|
297
|
-
: `${theme.fg("dim", "inFlight:")}${theme.fg("accent", toolDisplay)}`;
|
|
298
|
-
const row3 = composeZones(row3Left, row3Right, rowWidth);
|
|
299
|
-
|
|
300
|
-
const lines: string[] = [truncateToWidth(row1, rowWidth)];
|
|
301
|
-
if (showDebateRow) {
|
|
302
|
-
const debateLeft = `${theme.fg("dim", "Debate")} ${theme.fg("accent", `rounds:${debateProgress}`)} ${theme.fg("dim", "trend:")}${theme.fg(trendColor, consensusTrend)} ${theme.fg("dim", "budget:")}${theme.fg("accent", budgetDisplay)}`;
|
|
303
|
-
const row2 = composeZones(debateLeft, severityCompact, rowWidth);
|
|
304
|
-
lines.push(truncateToWidth(row2, rowWidth));
|
|
305
|
-
}
|
|
306
|
-
lines.push(truncateToWidth(row3, rowWidth));
|
|
201
|
+
const currentLabel = formatHarnessPhaseLabel(this.state.phase);
|
|
202
|
+
const nextPhase = nextHarnessPhase(this.state.phase);
|
|
203
|
+
const nowToken = `${theme.fg("dim", "now:")}${theme.fg("accent", currentLabel)}`;
|
|
204
|
+
const phaseToken =
|
|
205
|
+
nextPhase != null
|
|
206
|
+
? `${nowToken} ${theme.fg("dim", "→")} ${theme.fg("accent", formatHarnessPhaseLabel(nextPhase))}`
|
|
207
|
+
: nowToken;
|
|
208
|
+
|
|
209
|
+
const status = deriveHarnessStatusHint(this.state);
|
|
210
|
+
const statusColor = themeSeverityColor(status.severity);
|
|
211
|
+
const statusToken = theme.fg(statusColor, status.text);
|
|
212
|
+
|
|
213
|
+
const left = `${theme.bold("Harness")} ${phaseToken}`;
|
|
214
|
+
const row = composeZones(left, statusToken, rowWidth);
|
|
215
|
+
|
|
216
|
+
const lines = [truncateToWidth(row, rowWidth)];
|
|
307
217
|
this.widthCache = width;
|
|
308
218
|
this.linesCache = lines;
|
|
309
219
|
return lines;
|
|
@@ -316,14 +226,16 @@ class HarnessWidgetComponent {
|
|
|
316
226
|
}
|
|
317
227
|
|
|
318
228
|
function statusToken(state: HarnessUiState): string {
|
|
319
|
-
const
|
|
320
|
-
|
|
229
|
+
const current = formatHarnessPhaseLabel(state.phase);
|
|
230
|
+
const next = nextHarnessPhase(state.phase);
|
|
231
|
+
const phasePart =
|
|
232
|
+
next != null ? `${current}→${formatHarnessPhaseLabel(next)}` : current;
|
|
233
|
+
const hint = deriveHarnessStatusHint(state).text;
|
|
234
|
+
return `h:${phasePart}|${hint}`;
|
|
321
235
|
}
|
|
322
236
|
|
|
323
237
|
export default function harnessLiveWidget(pi: ExtensionAPI) {
|
|
324
238
|
const stateStore = new HarnessUiStateStore();
|
|
325
|
-
const inFlightCalls = new Set<string>();
|
|
326
|
-
let lastToolName: string | null = null;
|
|
327
239
|
let widgetMounted = false;
|
|
328
240
|
let tuiHandle: TuiLike | null = null;
|
|
329
241
|
let component: HarnessWidgetComponent | null = null;
|
|
@@ -334,19 +246,14 @@ export default function harnessLiveWidget(pi: ExtensionAPI) {
|
|
|
334
246
|
function mountHarnessWidget(ctx: ExtensionContext): void {
|
|
335
247
|
if (!ctx.hasUI) return;
|
|
336
248
|
const state = stateStore.refresh(ctx);
|
|
337
|
-
|
|
338
|
-
lastRenderHash = computeRenderHash(state, inFlight);
|
|
249
|
+
lastRenderHash = computeRenderHash(state);
|
|
339
250
|
|
|
340
251
|
ctx.ui.setWidget(
|
|
341
252
|
"harness-live",
|
|
342
253
|
(tui, theme) => {
|
|
343
254
|
widgetMounted = true;
|
|
344
255
|
tuiHandle = tui;
|
|
345
|
-
component = new HarnessWidgetComponent(
|
|
346
|
-
stateStore.snapshot(),
|
|
347
|
-
inFlight,
|
|
348
|
-
theme,
|
|
349
|
-
);
|
|
256
|
+
component = new HarnessWidgetComponent(stateStore.snapshot(), theme);
|
|
350
257
|
return {
|
|
351
258
|
render(width: number): string[] {
|
|
352
259
|
component?.setTheme(theme);
|
|
@@ -388,26 +295,15 @@ export default function harnessLiveWidget(pi: ExtensionAPI) {
|
|
|
388
295
|
ctx.ui.setStatus("harness-mode", undefined);
|
|
389
296
|
}
|
|
390
297
|
|
|
391
|
-
function computeRenderHash(
|
|
392
|
-
state: HarnessUiState,
|
|
393
|
-
inFlight: InFlightState,
|
|
394
|
-
): string {
|
|
298
|
+
function computeRenderHash(state: HarnessUiState): string {
|
|
395
299
|
return JSON.stringify({
|
|
396
300
|
phase: state.phase,
|
|
397
|
-
flowSubstate: state.flowSubstate,
|
|
398
301
|
planApproved: state.planApproved,
|
|
399
|
-
reviewIsolationOk: state.reviewIsolationOk,
|
|
400
302
|
budgetExhausted: state.budgetExhausted,
|
|
401
303
|
testIntegritySeverity: state.testIntegritySeverity,
|
|
402
|
-
debateRound: state.debateRound,
|
|
403
|
-
debateMaxRounds: state.debateMaxRounds,
|
|
404
|
-
debateBudgetUsed: state.debateBudgetUsed,
|
|
405
|
-
debateBudgetCap: state.debateBudgetCap,
|
|
406
304
|
policyDecision: state.policyDecision,
|
|
407
|
-
|
|
408
|
-
severity: state.severity,
|
|
305
|
+
flowSubstate: state.flowSubstate,
|
|
409
306
|
nextRecommendedCommand: state.nextRecommendedCommand,
|
|
410
|
-
inFlight,
|
|
411
307
|
});
|
|
412
308
|
}
|
|
413
309
|
|
|
@@ -417,15 +313,11 @@ export default function harnessLiveWidget(pi: ExtensionAPI) {
|
|
|
417
313
|
queueMicrotask(() => {
|
|
418
314
|
refreshQueued = false;
|
|
419
315
|
const state = stateStore.refresh(ctx);
|
|
420
|
-
const
|
|
421
|
-
toolCount: inFlightCalls.size,
|
|
422
|
-
lastToolName,
|
|
423
|
-
};
|
|
424
|
-
const hash = computeRenderHash(state, inFlight);
|
|
316
|
+
const hash = computeRenderHash(state);
|
|
425
317
|
updateStatusFallback(ctx, state);
|
|
426
318
|
if (hash === lastRenderHash) return;
|
|
427
319
|
lastRenderHash = hash;
|
|
428
|
-
if (component) component.setData(state
|
|
320
|
+
if (component) component.setData(state);
|
|
429
321
|
tuiHandle?.requestRender();
|
|
430
322
|
});
|
|
431
323
|
}
|
|
@@ -450,16 +342,4 @@ export default function harnessLiveWidget(pi: ExtensionAPI) {
|
|
|
450
342
|
pi.on("agent_end", (_event, ctx) => {
|
|
451
343
|
scheduleRefresh(ctx);
|
|
452
344
|
});
|
|
453
|
-
|
|
454
|
-
pi.on("tool_execution_start", (event, ctx) => {
|
|
455
|
-
inFlightCalls.add(event.toolCallId);
|
|
456
|
-
lastToolName = event.toolName;
|
|
457
|
-
scheduleRefresh(ctx);
|
|
458
|
-
});
|
|
459
|
-
|
|
460
|
-
pi.on("tool_result", (event, ctx) => {
|
|
461
|
-
inFlightCalls.delete(event.toolCallId);
|
|
462
|
-
if (inFlightCalls.size === 0) lastToolName = null;
|
|
463
|
-
scheduleRefresh(ctx);
|
|
464
|
-
});
|
|
465
345
|
}
|