ultimate-pi 0.13.1 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/.agents/skills/harness-debate-plan/SKILL.md +42 -22
  2. package/.agents/skills/harness-orchestration/SKILL.md +3 -3
  3. package/.agents/skills/harness-plan/SKILL.md +10 -8
  4. package/.pi/agents/harness/planning/decompose.md +4 -2
  5. package/.pi/agents/harness/planning/execution-plan-author.md +25 -14
  6. package/.pi/agents/harness/planning/hypothesis-validator.md +21 -5
  7. package/.pi/agents/harness/planning/implementation-researcher.md +42 -0
  8. package/.pi/agents/harness/planning/plan-adversary.md +20 -4
  9. package/.pi/agents/harness/planning/plan-evaluator.md +28 -5
  10. package/.pi/agents/harness/planning/review-integrator.md +25 -9
  11. package/.pi/agents/harness/planning/scout-graphify.md +1 -1
  12. package/.pi/agents/harness/planning/sprint-contract-auditor.md +19 -4
  13. package/.pi/agents/harness/planning/stack-researcher.md +19 -10
  14. package/.pi/extensions/debate-orchestrator.ts +39 -435
  15. package/.pi/extensions/harness-debate-tools.ts +741 -0
  16. package/.pi/extensions/harness-live-widget.ts +39 -159
  17. package/.pi/extensions/harness-plan-approval.ts +88 -22
  18. package/.pi/extensions/harness-run-context.ts +18 -0
  19. package/.pi/extensions/lib/debate-bus-core.ts +488 -0
  20. package/.pi/extensions/lib/debate-bus-state.ts +64 -0
  21. package/.pi/extensions/lib/harness-spawn-budget.ts +5 -25
  22. package/.pi/extensions/lib/plan-approval/dialog.ts +33 -272
  23. package/.pi/extensions/lib/plan-approval/format-plan.ts +12 -85
  24. package/.pi/extensions/lib/plan-approval/plan-review.ts +62 -6
  25. package/.pi/extensions/lib/plan-approval/render.ts +6 -0
  26. package/.pi/extensions/lib/plan-approval/types.ts +1 -0
  27. package/.pi/extensions/lib/plan-approval/validate.ts +1 -1
  28. package/.pi/extensions/lib/plan-debate-eligibility.ts +214 -0
  29. package/.pi/extensions/lib/plan-debate-envelope.ts +2 -0
  30. package/.pi/extensions/lib/plan-debate-focus.ts +151 -0
  31. package/.pi/extensions/lib/plan-debate-gate.ts +198 -0
  32. package/.pi/extensions/lib/plan-debate-id.ts +39 -0
  33. package/.pi/extensions/lib/plan-debate-lane.ts +220 -0
  34. package/.pi/extensions/lib/plan-debate-lanes.ts +44 -0
  35. package/.pi/extensions/lib/plan-debate-round-status.ts +137 -0
  36. package/.pi/extensions/lib/plan-debate-write-guard.ts +20 -0
  37. package/.pi/extensions/lib/plan-messenger.ts +352 -0
  38. package/.pi/extensions/lib/plan-review-integrator-rules.ts +119 -0
  39. package/.pi/extensions/lib/plan-scope-guard.ts +89 -0
  40. package/.pi/extensions/policy-gate.ts +1 -1
  41. package/.pi/harness/README.md +1 -1
  42. package/.pi/harness/agents.manifest.json +16 -12
  43. package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +1 -3
  44. package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +13 -5
  45. package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +51 -0
  46. package/.pi/harness/docs/adrs/README.md +2 -0
  47. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/implementation-research.yaml +28 -0
  48. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r1.yaml +24 -0
  49. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r2.yaml +25 -0
  50. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-packet.yaml +196 -0
  51. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-review.md +14 -0
  52. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/research-brief.yaml +62 -0
  53. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/implementation-research.yaml +28 -0
  54. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r2.yaml +24 -0
  55. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r3.yaml +24 -0
  56. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +29 -0
  57. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +97 -16
  58. package/.pi/harness/specs/plan-implementation-research-brief.schema.json +128 -0
  59. package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
  60. package/.pi/harness/specs/round-result.schema.json +15 -2
  61. package/.pi/lib/harness-ui-state.ts +92 -0
  62. package/.pi/prompts/harness-plan.md +90 -30
  63. package/.pi/prompts/planning-rubrics.md +31 -0
  64. package/CHANGELOG.md +23 -0
  65. package/package.json +3 -3
  66. package/.pi/extensions/lib/plan-approval/fallback.ts +0 -50
@@ -0,0 +1,220 @@
1
+ /**
2
+ * Persist plan debate lane YAML + pi-messenger side effects from subagent output.
3
+ */
4
+
5
+ import { constants } from "node:fs";
6
+ import { access, mkdir } from "node:fs/promises";
7
+ import { dirname, join } from "node:path";
8
+ import {
9
+ parseStructuredDocument,
10
+ writeYamlFile,
11
+ } from "../../lib/harness-yaml.js";
12
+ import { postMessengerMessage } from "./plan-messenger.js";
13
+
14
+ export type DebateLaneKind =
15
+ | "hypothesis-validation"
16
+ | "validation-turn"
17
+ | "adversary-brief"
18
+ | "sprint-audit";
19
+
20
+ const AGENT_LANE: Record<string, DebateLaneKind> = {
21
+ "harness/planning/hypothesis-validator": "hypothesis-validation",
22
+ "harness/planning/plan-evaluator": "validation-turn",
23
+ "harness/planning/plan-adversary": "adversary-brief",
24
+ "harness/planning/sprint-contract-auditor": "sprint-audit",
25
+ };
26
+
27
+ export function debateLaneForAgent(agent: string): DebateLaneKind | null {
28
+ const normalized = agent.replace(/^\.pi\/agents\//, "").trim();
29
+ return AGENT_LANE[normalized] ?? null;
30
+ }
31
+
32
+ export function laneArtifactPath(
33
+ lane: DebateLaneKind,
34
+ roundIndex: number,
35
+ ): string {
36
+ switch (lane) {
37
+ case "hypothesis-validation":
38
+ return `artifacts/hypothesis-validation-r${roundIndex}.yaml`;
39
+ case "validation-turn":
40
+ return `artifacts/validation-turn-r${roundIndex}.yaml`;
41
+ case "adversary-brief":
42
+ return `artifacts/adversary-brief-r${roundIndex}.yaml`;
43
+ case "sprint-audit":
44
+ return `artifacts/sprint-audit-r${roundIndex}.yaml`;
45
+ }
46
+ }
47
+
48
+ export function extractClaimIds(doc: Record<string, unknown>): string[] {
49
+ const explicit = doc.messenger_claim_ids;
50
+ if (Array.isArray(explicit)) {
51
+ return explicit.filter(
52
+ (x): x is string => typeof x === "string" && x.length > 0,
53
+ );
54
+ }
55
+ const checks = doc.checks;
56
+ if (!Array.isArray(checks)) return [];
57
+ return checks
58
+ .map((c) => (c as { id?: string }).id)
59
+ .filter((id): id is string => typeof id === "string" && id.length > 0);
60
+ }
61
+
62
+ async function fileExists(path: string): Promise<boolean> {
63
+ try {
64
+ await access(path, constants.R_OK);
65
+ return true;
66
+ } catch {
67
+ return false;
68
+ }
69
+ }
70
+
71
+ export interface ApplyDebateLaneResult {
72
+ ok: boolean;
73
+ lane: DebateLaneKind;
74
+ round_index: number;
75
+ artifact_path: string;
76
+ messenger_posted: boolean;
77
+ errors: string[];
78
+ next_step?: string;
79
+ }
80
+
81
+ export async function applyDebateLane(opts: {
82
+ runDir: string;
83
+ lane: DebateLaneKind;
84
+ content: string;
85
+ roundIndex?: number;
86
+ }): Promise<ApplyDebateLaneResult> {
87
+ const errors: string[] = [];
88
+ let doc: Record<string, unknown>;
89
+ try {
90
+ doc = parseStructuredDocument(opts.content, opts.lane) as Record<
91
+ string,
92
+ unknown
93
+ >;
94
+ } catch (err) {
95
+ const msg = err instanceof Error ? err.message : String(err);
96
+ return {
97
+ ok: false,
98
+ lane: opts.lane,
99
+ round_index: opts.roundIndex ?? 0,
100
+ artifact_path: "",
101
+ messenger_posted: false,
102
+ errors: [msg],
103
+ };
104
+ }
105
+
106
+ const roundIndex =
107
+ typeof doc.round_index === "number"
108
+ ? doc.round_index
109
+ : (opts.roundIndex ?? 1);
110
+ const relPath = laneArtifactPath(opts.lane, roundIndex);
111
+ const absPath = join(opts.runDir, relPath);
112
+ await mkdir(dirname(absPath), { recursive: true });
113
+ await writeYamlFile(absPath, doc);
114
+
115
+ let messengerPosted = false;
116
+ let nextStep: string | undefined;
117
+
118
+ if (opts.lane === "validation-turn") {
119
+ const claimIds = extractClaimIds(doc);
120
+ const body =
121
+ (typeof doc.human_summary === "string" && doc.human_summary.trim()) ||
122
+ claimIds.map((id) => `Check ${id}`).join("; ") ||
123
+ "Plan evaluator claims for this round.";
124
+ if (claimIds.length === 0) {
125
+ errors.push(
126
+ "validation-turn has no claim ids (checks[].id or messenger_claim_ids)",
127
+ );
128
+ } else {
129
+ await postMessengerMessage(opts.runDir, {
130
+ from: "PlanEvaluatorAgent",
131
+ kind: "claim",
132
+ round_index: roundIndex,
133
+ to: ["broadcast"],
134
+ body,
135
+ claim_ids: claimIds,
136
+ in_reply_to: [],
137
+ evidence_refs: [relPath],
138
+ artifact_path: relPath,
139
+ });
140
+ messengerPosted = true;
141
+ nextStep = `Spawn plan-adversary with harness_messenger_read_round({ round_index: ${roundIndex} }) transcript, then harness_debate_apply_lane for adversary output.`;
142
+ }
143
+ }
144
+
145
+ if (opts.lane === "adversary-brief") {
146
+ const turnPath = join(
147
+ opts.runDir,
148
+ laneArtifactPath("validation-turn", roundIndex),
149
+ );
150
+ let inReplyTo: string[] = [];
151
+ if (await fileExists(turnPath)) {
152
+ const { readFile } = await import("node:fs/promises");
153
+ const { parse: parseYaml } = await import("yaml");
154
+ const turn = parseYaml(await readFile(turnPath, "utf-8")) as Record<
155
+ string,
156
+ unknown
157
+ >;
158
+ inReplyTo = extractClaimIds(turn);
159
+ }
160
+ if (inReplyTo.length === 0) {
161
+ errors.push(
162
+ "no claim ids to rebut — validation-turn-rN must exist before adversary",
163
+ );
164
+ } else {
165
+ const body =
166
+ (typeof doc.human_summary === "string" && doc.human_summary.trim()) ||
167
+ (Array.isArray(doc.failure_modes) && doc.failure_modes[0]) ||
168
+ "Adversary rebuttal for evaluator claims.";
169
+ await postMessengerMessage(opts.runDir, {
170
+ from: "PlanAdversaryAgent",
171
+ kind: "rebuttal",
172
+ round_index: roundIndex,
173
+ to: ["broadcast"],
174
+ body: String(body),
175
+ claim_ids: [],
176
+ in_reply_to: inReplyTo,
177
+ evidence_refs: [relPath],
178
+ artifact_path: relPath,
179
+ });
180
+ messengerPosted = true;
181
+ nextStep = `Spawn review-integrator with harness_messenger_read_round({ round_index: ${roundIndex} }) + lane artifacts, then harness_debate_submit_round.`;
182
+ }
183
+ }
184
+
185
+ return {
186
+ ok: errors.length === 0,
187
+ lane: opts.lane,
188
+ round_index: roundIndex,
189
+ artifact_path: relPath,
190
+ messenger_posted: messengerPosted,
191
+ errors,
192
+ next_step: nextStep,
193
+ };
194
+ }
195
+
196
+ export function formatApplyLaneMessage(result: ApplyDebateLaneResult): string {
197
+ if (!result.ok) {
198
+ return `Lane ${result.lane} failed:\n- ${result.errors.join("\n- ")}`;
199
+ }
200
+ const parts = [
201
+ `Wrote ${result.artifact_path}`,
202
+ result.messenger_posted
203
+ ? "messenger updated"
204
+ : "no messenger post for this lane",
205
+ ];
206
+ if (result.next_step) parts.push(`Next: ${result.next_step}`);
207
+ return parts.join("\n");
208
+ }
209
+
210
+ export const DEBATE_LANE_AGENT_ORDER: Array<{
211
+ lane: DebateLaneKind;
212
+ agent: string;
213
+ }> = [
214
+ {
215
+ lane: "hypothesis-validation",
216
+ agent: "harness/planning/hypothesis-validator",
217
+ },
218
+ { lane: "validation-turn", agent: "harness/planning/plan-evaluator" },
219
+ { lane: "adversary-brief", agent: "harness/planning/plan-adversary" },
220
+ ];
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Shared Review Gate lane list for a round (gate + round-status).
3
+ */
4
+
5
+ import type { PlanDebateFocus } from "./plan-debate-focus.js";
6
+ import type { DebateLaneKind } from "./plan-debate-lane.js";
7
+
8
+ /** Lanes required before review-integrator for this round. */
9
+ export function lanesForRound(
10
+ roundIndex: number,
11
+ debateRoundFocus?: PlanDebateFocus | null,
12
+ ): DebateLaneKind[] {
13
+ const lanes: DebateLaneKind[] = ["validation-turn", "adversary-brief"];
14
+ if (roundIndex === 1) {
15
+ lanes.unshift("hypothesis-validation");
16
+ }
17
+ if (roundIndex >= 4 || debateRoundFocus === "quality") {
18
+ lanes.push("sprint-audit");
19
+ }
20
+ return lanes;
21
+ }
22
+
23
+ /** Relative artifact paths for lane YAML + review-round. */
24
+ export function laneArtifactPathsForRound(
25
+ roundIndex: number,
26
+ debateRoundFocus?: PlanDebateFocus | null,
27
+ ): string[] {
28
+ const paths = lanesForRound(roundIndex, debateRoundFocus).map((lane) => {
29
+ switch (lane) {
30
+ case "hypothesis-validation":
31
+ return `artifacts/hypothesis-validation-r${roundIndex}.yaml`;
32
+ case "validation-turn":
33
+ return `artifacts/validation-turn-r${roundIndex}.yaml`;
34
+ case "adversary-brief":
35
+ return `artifacts/adversary-brief-r${roundIndex}.yaml`;
36
+ case "sprint-audit":
37
+ return `artifacts/sprint-audit-r${roundIndex}.yaml`;
38
+ default:
39
+ return `artifacts/${lane}-r${roundIndex}.yaml`;
40
+ }
41
+ });
42
+ paths.push(`artifacts/review-round-r${roundIndex}.yaml`);
43
+ return paths;
44
+ }
@@ -0,0 +1,137 @@
1
+ /**
2
+ * Round-level debate readiness for parent orchestration.
3
+ */
4
+
5
+ import { constants } from "node:fs";
6
+ import { access } from "node:fs/promises";
7
+ import { join } from "node:path";
8
+ import { capsForDebate } from "./debate-bus-core.js";
9
+ import {
10
+ type PlanDebateFocus,
11
+ readDebateRoundFocus,
12
+ } from "./plan-debate-focus.js";
13
+ import { planDebateIdForRun } from "./plan-debate-id.js";
14
+ import { laneArtifactPath } from "./plan-debate-lane.js";
15
+ import { lanesForRound } from "./plan-debate-lanes.js";
16
+ import {
17
+ getMessengerRoundState,
18
+ loadMessengerState,
19
+ messengerRoundDialogueReady,
20
+ } from "./plan-messenger.js";
21
+
22
+ async function exists(path: string): Promise<boolean> {
23
+ try {
24
+ await access(path, constants.R_OK);
25
+ return true;
26
+ } catch {
27
+ return false;
28
+ }
29
+ }
30
+
31
+ export interface RoundStatusResult {
32
+ round_index: number;
33
+ /** Lane YAML + messenger dialogue complete; spawn integrator next. */
34
+ ready_for_integrator: boolean;
35
+ /** review-round-rN.yaml on disk (call harness_debate_submit_round if bus not updated). */
36
+ review_round_on_disk: boolean;
37
+ missing: string[];
38
+ next_tool?: string;
39
+ messenger: { ok: boolean; errors: string[] };
40
+ dialogue: { ok: boolean; errors: string[] };
41
+ unresolved_claim_ids: string[];
42
+ exchange_count: number;
43
+ debate_round_focus?: PlanDebateFocus | null;
44
+ }
45
+
46
+ export async function getPlanDebateRoundStatus(
47
+ runDir: string,
48
+ roundIndex: number,
49
+ runId?: string,
50
+ opts?: { debate_round_focus?: PlanDebateFocus },
51
+ ): Promise<RoundStatusResult> {
52
+ const focus =
53
+ opts?.debate_round_focus ??
54
+ (await readDebateRoundFocus(runDir, roundIndex));
55
+ const missing: string[] = [];
56
+ for (const lane of lanesForRound(roundIndex, focus)) {
57
+ const rel = laneArtifactPath(lane, roundIndex);
58
+ if (!(await exists(join(runDir, rel)))) {
59
+ missing.push(rel);
60
+ }
61
+ }
62
+ const messengerState = await loadMessengerState(runDir);
63
+ const profile = messengerState?.debate_profile;
64
+ const caps = capsForDebate(
65
+ runId ? planDebateIdForRun(runId) : `plan-${runId ?? "unknown"}`,
66
+ profile,
67
+ );
68
+ const roundState = await getMessengerRoundState(runDir, roundIndex);
69
+ const dialogueOpts = {
70
+ max_exchanges_per_round: caps.max_exchanges_per_round,
71
+ };
72
+ const dialogue = messengerRoundDialogueReady(roundState, dialogueOpts);
73
+ if (!dialogue.ok) {
74
+ missing.push(...dialogue.errors.map((e) => `messenger: ${e}`));
75
+ }
76
+ const reviewRound = `artifacts/review-round-r${roundIndex}.yaml`;
77
+ const reviewRoundOnDisk = await exists(join(runDir, reviewRound));
78
+
79
+ let next_tool: string | undefined;
80
+ if (missing.some((m) => m.includes("hypothesis-validation"))) {
81
+ next_tool = "subagent harness/planning/hypothesis-validator";
82
+ } else if (missing.some((m) => m.includes("validation-turn"))) {
83
+ next_tool = "subagent harness/planning/plan-evaluator";
84
+ } else if (
85
+ missing.some((m) => m.includes("adversary-brief")) &&
86
+ !roundState?.evaluator_posted
87
+ ) {
88
+ next_tool = "subagent harness/planning/plan-evaluator";
89
+ } else if (missing.some((m) => m.includes("adversary-brief"))) {
90
+ next_tool =
91
+ "harness_messenger_read_round then subagent harness/planning/plan-adversary";
92
+ } else if (missing.some((m) => m.includes("sprint-audit"))) {
93
+ next_tool = "subagent harness/planning/sprint-contract-auditor";
94
+ } else if (
95
+ roundState &&
96
+ roundState.evaluator_posted &&
97
+ !roundState.adversary_posted
98
+ ) {
99
+ next_tool =
100
+ "harness_messenger_read_round then subagent harness/planning/plan-adversary";
101
+ } else if (
102
+ roundState &&
103
+ roundState.unresolved_claim_ids.length > 0 &&
104
+ roundState.exchange_count < caps.max_exchanges_per_round
105
+ ) {
106
+ const spawnEvaluator = roundState.exchange_count % 2 === 1;
107
+ next_tool = spawnEvaluator
108
+ ? "harness_debate_advance_thread → harness_messenger_read_round → subagent harness/planning/plan-evaluator (clarification; address unresolved claim_ids)"
109
+ : "harness_debate_advance_thread → harness_messenger_read_round → subagent harness/planning/plan-adversary (counter or concede)";
110
+ } else if (!dialogue.ok) {
111
+ next_tool =
112
+ "harness_debate_advance_thread or harness_debate_apply_lane (evaluator/adversary)";
113
+ } else if (!reviewRoundOnDisk) {
114
+ next_tool =
115
+ "subagent harness/planning/review-integrator then harness_debate_submit_round";
116
+ } else {
117
+ next_tool =
118
+ "harness_debate_submit_round with integrator draft from review-round file";
119
+ }
120
+
121
+ const laneMissing = missing.filter((m) => !m.startsWith("messenger"));
122
+ const readyForIntegrator =
123
+ dialogue.ok && laneMissing.length === 0 && !reviewRoundOnDisk;
124
+
125
+ return {
126
+ round_index: roundIndex,
127
+ ready_for_integrator: readyForIntegrator,
128
+ review_round_on_disk: reviewRoundOnDisk,
129
+ missing,
130
+ next_tool,
131
+ messenger: dialogue,
132
+ dialogue,
133
+ unresolved_claim_ids: roundState?.unresolved_claim_ids ?? [],
134
+ exchange_count: roundState?.exchange_count ?? 0,
135
+ debate_round_focus: focus,
136
+ };
137
+ }
@@ -0,0 +1,20 @@
1
+ /**
2
+ * P0 — only harness_debate_submit_round may write review-round-r*.yaml via write_harness_yaml.
3
+ */
4
+
5
+ let reviewRoundWriteDepth = 0;
6
+
7
+ export function isReviewRoundYamlWriteAllowed(): boolean {
8
+ return reviewRoundWriteDepth > 0;
9
+ }
10
+
11
+ export async function withReviewRoundYamlWrite<T>(
12
+ fn: () => Promise<T>,
13
+ ): Promise<T> {
14
+ reviewRoundWriteDepth += 1;
15
+ try {
16
+ return await fn();
17
+ } finally {
18
+ reviewRoundWriteDepth -= 1;
19
+ }
20
+ }