ultimate-pi 0.13.1 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/.agents/skills/harness-debate-plan/SKILL.md +42 -22
  2. package/.agents/skills/harness-orchestration/SKILL.md +3 -3
  3. package/.agents/skills/harness-plan/SKILL.md +10 -8
  4. package/.pi/agents/harness/planning/decompose.md +4 -2
  5. package/.pi/agents/harness/planning/execution-plan-author.md +25 -14
  6. package/.pi/agents/harness/planning/hypothesis-validator.md +21 -5
  7. package/.pi/agents/harness/planning/implementation-researcher.md +42 -0
  8. package/.pi/agents/harness/planning/plan-adversary.md +20 -4
  9. package/.pi/agents/harness/planning/plan-evaluator.md +28 -5
  10. package/.pi/agents/harness/planning/review-integrator.md +25 -9
  11. package/.pi/agents/harness/planning/scout-graphify.md +1 -1
  12. package/.pi/agents/harness/planning/sprint-contract-auditor.md +19 -4
  13. package/.pi/agents/harness/planning/stack-researcher.md +19 -10
  14. package/.pi/extensions/debate-orchestrator.ts +39 -435
  15. package/.pi/extensions/harness-debate-tools.ts +741 -0
  16. package/.pi/extensions/harness-live-widget.ts +39 -159
  17. package/.pi/extensions/harness-plan-approval.ts +88 -22
  18. package/.pi/extensions/harness-run-context.ts +18 -0
  19. package/.pi/extensions/lib/debate-bus-core.ts +488 -0
  20. package/.pi/extensions/lib/debate-bus-state.ts +64 -0
  21. package/.pi/extensions/lib/harness-spawn-budget.ts +5 -25
  22. package/.pi/extensions/lib/plan-approval/dialog.ts +33 -272
  23. package/.pi/extensions/lib/plan-approval/format-plan.ts +12 -85
  24. package/.pi/extensions/lib/plan-approval/plan-review.ts +62 -6
  25. package/.pi/extensions/lib/plan-approval/render.ts +6 -0
  26. package/.pi/extensions/lib/plan-approval/types.ts +1 -0
  27. package/.pi/extensions/lib/plan-approval/validate.ts +1 -1
  28. package/.pi/extensions/lib/plan-debate-eligibility.ts +214 -0
  29. package/.pi/extensions/lib/plan-debate-envelope.ts +2 -0
  30. package/.pi/extensions/lib/plan-debate-focus.ts +151 -0
  31. package/.pi/extensions/lib/plan-debate-gate.ts +198 -0
  32. package/.pi/extensions/lib/plan-debate-id.ts +39 -0
  33. package/.pi/extensions/lib/plan-debate-lane.ts +220 -0
  34. package/.pi/extensions/lib/plan-debate-lanes.ts +44 -0
  35. package/.pi/extensions/lib/plan-debate-round-status.ts +137 -0
  36. package/.pi/extensions/lib/plan-debate-write-guard.ts +20 -0
  37. package/.pi/extensions/lib/plan-messenger.ts +352 -0
  38. package/.pi/extensions/lib/plan-review-integrator-rules.ts +119 -0
  39. package/.pi/extensions/lib/plan-scope-guard.ts +89 -0
  40. package/.pi/extensions/policy-gate.ts +1 -1
  41. package/.pi/harness/README.md +1 -1
  42. package/.pi/harness/agents.manifest.json +16 -12
  43. package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +1 -3
  44. package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +13 -5
  45. package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +51 -0
  46. package/.pi/harness/docs/adrs/README.md +2 -0
  47. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/implementation-research.yaml +28 -0
  48. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r1.yaml +24 -0
  49. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r2.yaml +25 -0
  50. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-packet.yaml +196 -0
  51. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-review.md +14 -0
  52. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/research-brief.yaml +62 -0
  53. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/implementation-research.yaml +28 -0
  54. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r2.yaml +24 -0
  55. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r3.yaml +24 -0
  56. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +29 -0
  57. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +97 -16
  58. package/.pi/harness/specs/plan-implementation-research-brief.schema.json +128 -0
  59. package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
  60. package/.pi/harness/specs/round-result.schema.json +15 -2
  61. package/.pi/lib/harness-ui-state.ts +92 -0
  62. package/.pi/prompts/harness-plan.md +90 -30
  63. package/.pi/prompts/planning-rubrics.md +31 -0
  64. package/CHANGELOG.md +23 -0
  65. package/package.json +3 -3
  66. package/.pi/extensions/lib/plan-approval/fallback.ts +0 -50
@@ -0,0 +1,352 @@
1
+ /**
2
+ * pi-messenger-style plan debate transport — per-agent inboxes + round threads.
3
+ *
4
+ * Layout under `.pi/harness/runs/<run_id>/debate-messenger/`:
5
+ * inbox/<AgentLabel>/<seq>-<kind>.json
6
+ * threads/round-<N>/transcript.jsonl
7
+ * state.json
8
+ */
9
+
10
+ import { randomUUID } from "node:crypto";
11
+ import {
12
+ appendFile,
13
+ mkdir,
14
+ readdir,
15
+ readFile,
16
+ writeFile,
17
+ } from "node:fs/promises";
18
+ import { join } from "node:path";
19
+ import type { DebateParticipant } from "../../lib/debate-orchestrator-types.js";
20
+ import type { DebateProfile } from "./plan-debate-eligibility.js";
21
+ import type { PlanDebateFocus } from "./plan-debate-focus.js";
22
+
23
+ export type MessengerMessageKind =
24
+ | "system"
25
+ | "claim"
26
+ | "rebuttal"
27
+ | "clarification"
28
+ | "counter"
29
+ | "integrate"
30
+ | "audit";
31
+
32
+ export interface MessengerMessage {
33
+ schema_version: "1.0.0";
34
+ id: string;
35
+ ts: string;
36
+ from: DebateParticipant | "system";
37
+ to: Array<DebateParticipant | "broadcast">;
38
+ kind: MessengerMessageKind;
39
+ round_index: number;
40
+ in_reply_to: string[];
41
+ body: string;
42
+ claim_ids: string[];
43
+ evidence_refs: string[];
44
+ artifact_path?: string;
45
+ }
46
+
47
+ export interface MessengerRoundState {
48
+ round_index: number;
49
+ evaluator_posted: boolean;
50
+ adversary_posted: boolean;
51
+ integrator_posted: boolean;
52
+ claim_count: number;
53
+ rebuttal_count: number;
54
+ exchange_count: number;
55
+ unresolved_claim_ids: string[];
56
+ }
57
+
58
+ export interface MessengerState {
59
+ schema_version: "1.0.0";
60
+ run_id: string;
61
+ debate_id: string;
62
+ opened_at: string;
63
+ rounds: Record<string, MessengerRoundState>;
64
+ debate_profile?: DebateProfile;
65
+ required_focuses?: PlanDebateFocus[];
66
+ }
67
+
68
+ function messengerRoot(runDir: string): string {
69
+ return join(runDir, "debate-messenger");
70
+ }
71
+
72
+ function nowIso(): string {
73
+ return new Date().toISOString();
74
+ }
75
+
76
+ function roundKey(roundIndex: number): string {
77
+ return String(roundIndex);
78
+ }
79
+
80
+ export async function initPlanMessenger(
81
+ runDir: string,
82
+ opts: {
83
+ runId: string;
84
+ debateId: string;
85
+ debate_profile?: DebateProfile;
86
+ required_focuses?: PlanDebateFocus[];
87
+ },
88
+ ): Promise<string> {
89
+ const root = messengerRoot(runDir);
90
+ await mkdir(join(root, "inbox"), { recursive: true });
91
+ await mkdir(join(root, "threads"), { recursive: true });
92
+ const state: MessengerState = {
93
+ schema_version: "1.0.0",
94
+ run_id: opts.runId,
95
+ debate_id: opts.debateId,
96
+ opened_at: nowIso(),
97
+ rounds: {},
98
+ debate_profile: opts.debate_profile,
99
+ required_focuses: opts.required_focuses,
100
+ };
101
+ await writeFile(
102
+ join(root, "state.json"),
103
+ `${JSON.stringify(state, null, 2)}\n`,
104
+ "utf-8",
105
+ );
106
+ return root;
107
+ }
108
+
109
+ export async function loadMessengerState(
110
+ runDir: string,
111
+ ): Promise<MessengerState | null> {
112
+ const path = join(messengerRoot(runDir), "state.json");
113
+ try {
114
+ const raw = await readFile(path, "utf-8");
115
+ return JSON.parse(raw) as MessengerState;
116
+ } catch {
117
+ return null;
118
+ }
119
+ }
120
+
121
+ async function saveMessengerState(
122
+ runDir: string,
123
+ state: MessengerState,
124
+ ): Promise<void> {
125
+ await writeFile(
126
+ join(messengerRoot(runDir), "state.json"),
127
+ `${JSON.stringify(state, null, 2)}\n`,
128
+ "utf-8",
129
+ );
130
+ }
131
+
132
+ function defaultRoundState(roundIndex: number): MessengerRoundState {
133
+ return {
134
+ round_index: roundIndex,
135
+ evaluator_posted: false,
136
+ adversary_posted: false,
137
+ integrator_posted: false,
138
+ claim_count: 0,
139
+ rebuttal_count: 0,
140
+ exchange_count: 0,
141
+ unresolved_claim_ids: [],
142
+ };
143
+ }
144
+
145
+ /** Recompute exchange + unresolved claim ids from a round transcript. */
146
+ export function syncRoundStateFromTranscript(
147
+ round: MessengerRoundState,
148
+ messages: MessengerMessage[],
149
+ ): MessengerRoundState {
150
+ const claimed = new Set<string>();
151
+ const resolved = new Set<string>();
152
+ let exchange_count = 0;
153
+
154
+ for (const m of messages) {
155
+ if (m.from === "PlanEvaluatorAgent" && m.kind === "claim") {
156
+ round.evaluator_posted = true;
157
+ round.claim_count += m.claim_ids.length || 1;
158
+ for (const id of m.claim_ids) claimed.add(id);
159
+ }
160
+ if (m.from === "PlanAdversaryAgent" && m.kind === "rebuttal") {
161
+ round.adversary_posted = true;
162
+ round.rebuttal_count += m.in_reply_to.length || 1;
163
+ exchange_count += 1;
164
+ }
165
+ if (m.from === "PlanEvaluatorAgent" && m.kind === "clarification") {
166
+ exchange_count += 1;
167
+ for (const id of m.claim_ids) resolved.add(id);
168
+ for (const id of m.in_reply_to) resolved.add(id);
169
+ }
170
+ if (m.from === "PlanAdversaryAgent" && m.kind === "counter") {
171
+ exchange_count += 1;
172
+ for (const id of m.claim_ids) resolved.add(id);
173
+ for (const id of m.in_reply_to) resolved.add(id);
174
+ }
175
+ if (m.from === "ReviewIntegratorAgent" && m.kind === "integrate") {
176
+ round.integrator_posted = true;
177
+ }
178
+ }
179
+
180
+ round.exchange_count = exchange_count;
181
+ round.unresolved_claim_ids = [...claimed].filter((id) => !resolved.has(id));
182
+ return round;
183
+ }
184
+
185
+ export async function postMessengerMessage(
186
+ runDir: string,
187
+ msg: Omit<MessengerMessage, "schema_version" | "id" | "ts"> & {
188
+ id?: string;
189
+ ts?: string;
190
+ },
191
+ ): Promise<MessengerMessage> {
192
+ const root = messengerRoot(runDir);
193
+ const full: MessengerMessage = {
194
+ schema_version: "1.0.0",
195
+ id: msg.id ?? randomUUID(),
196
+ ts: msg.ts ?? nowIso(),
197
+ from: msg.from,
198
+ to: msg.to,
199
+ kind: msg.kind,
200
+ round_index: msg.round_index,
201
+ in_reply_to: msg.in_reply_to ?? [],
202
+ body: msg.body,
203
+ claim_ids: msg.claim_ids ?? [],
204
+ evidence_refs: msg.evidence_refs ?? [],
205
+ artifact_path: msg.artifact_path,
206
+ };
207
+
208
+ const inboxDir = join(root, "inbox", full.from);
209
+ await mkdir(inboxDir, { recursive: true });
210
+ const inboxName = `${full.round_index.toString().padStart(2, "0")}-${full.kind}-${full.id.slice(0, 8)}.json`;
211
+ await writeFile(
212
+ join(inboxDir, inboxName),
213
+ `${JSON.stringify(full, null, 2)}\n`,
214
+ );
215
+
216
+ const threadDir = join(root, "threads", `round-${full.round_index}`);
217
+ await mkdir(threadDir, { recursive: true });
218
+ await appendFile(
219
+ join(threadDir, "transcript.jsonl"),
220
+ `${JSON.stringify(full)}\n`,
221
+ "utf-8",
222
+ );
223
+
224
+ const state = (await loadMessengerState(runDir)) ?? {
225
+ schema_version: "1.0.0",
226
+ run_id: "",
227
+ debate_id: "",
228
+ opened_at: nowIso(),
229
+ rounds: {},
230
+ };
231
+ const key = roundKey(full.round_index);
232
+ const messages = await readRoundTranscript(runDir, full.round_index);
233
+ messages.push(full);
234
+ const round = state.rounds[key] ?? defaultRoundState(full.round_index);
235
+ state.rounds[key] = syncRoundStateFromTranscript(round, messages);
236
+ await saveMessengerState(runDir, state);
237
+ return full;
238
+ }
239
+
240
+ export async function readRoundTranscript(
241
+ runDir: string,
242
+ roundIndex: number,
243
+ ): Promise<MessengerMessage[]> {
244
+ const path = join(
245
+ messengerRoot(runDir),
246
+ "threads",
247
+ `round-${roundIndex}`,
248
+ "transcript.jsonl",
249
+ );
250
+ try {
251
+ const raw = await readFile(path, "utf-8");
252
+ return raw
253
+ .split("\n")
254
+ .filter((line) => line.trim())
255
+ .map((line) => JSON.parse(line) as MessengerMessage);
256
+ } catch {
257
+ return [];
258
+ }
259
+ }
260
+
261
+ export function formatTranscriptForSpawn(
262
+ messages: MessengerMessage[],
263
+ maxChars = 12000,
264
+ ): string {
265
+ const lines = messages.map((m) => {
266
+ const reply =
267
+ m.in_reply_to.length > 0 ? ` (re: ${m.in_reply_to.join(", ")})` : "";
268
+ const claims = m.claim_ids.length > 0 ? ` [${m.claim_ids.join(", ")}]` : "";
269
+ return `[${m.from}/${m.kind}${claims}${reply}] ${m.body}`;
270
+ });
271
+ let text = lines.join("\n\n");
272
+ if (text.length > maxChars) {
273
+ text = `${text.slice(0, maxChars)}\n\n…(transcript truncated)`;
274
+ }
275
+ return text || "(empty thread — post evaluator claims before adversary)";
276
+ }
277
+
278
+ export async function getMessengerRoundState(
279
+ runDir: string,
280
+ roundIndex: number,
281
+ ): Promise<MessengerRoundState | null> {
282
+ const state = await loadMessengerState(runDir);
283
+ if (!state) return null;
284
+ const round = state.rounds[roundKey(roundIndex)];
285
+ if (!round) return null;
286
+ const transcript = await readRoundTranscript(runDir, roundIndex);
287
+ return syncRoundStateFromTranscript({ ...round }, transcript);
288
+ }
289
+
290
+ export interface MessengerDialogueOptions {
291
+ max_exchanges_per_round?: number;
292
+ }
293
+
294
+ /** Evaluator + adversary dialogue settled; safe to spawn integrator. */
295
+ export function messengerRoundDialogueReady(
296
+ round: MessengerRoundState | null,
297
+ opts: MessengerDialogueOptions = {},
298
+ ): { ok: boolean; errors: string[] } {
299
+ const maxExchanges = opts.max_exchanges_per_round ?? 3;
300
+ const errors: string[] = [];
301
+ if (!round) {
302
+ errors.push("no messenger activity for this round");
303
+ return { ok: false, errors };
304
+ }
305
+ if (!round.evaluator_posted) {
306
+ errors.push("PlanEvaluatorAgent has not posted claims to the thread");
307
+ }
308
+ if (!round.adversary_posted) {
309
+ errors.push("PlanAdversaryAgent has not posted rebuttals to the thread");
310
+ }
311
+ if (round.claim_count < 1) {
312
+ errors.push("round thread has no claim_ids");
313
+ }
314
+ if (round.rebuttal_count < 1) {
315
+ errors.push("adversary must rebut at least one claim (in_reply_to)");
316
+ }
317
+ const dialogueSettled =
318
+ round.unresolved_claim_ids.length === 0 ||
319
+ round.exchange_count >= maxExchanges;
320
+ if (!dialogueSettled) {
321
+ errors.push(
322
+ `unresolved claims remain (${round.unresolved_claim_ids.join(", ")}) and exchange_count ${round.exchange_count} < ${maxExchanges}`,
323
+ );
324
+ }
325
+ return { ok: errors.length === 0, errors };
326
+ }
327
+
328
+ /** Full round ready for harness_debate_submit_round (includes integrator). */
329
+ export function messengerRoundDebateReady(
330
+ round: MessengerRoundState | null,
331
+ _requireSprintAudit: boolean,
332
+ opts: MessengerDialogueOptions = {},
333
+ ): { ok: boolean; errors: string[] } {
334
+ const dialogue = messengerRoundDialogueReady(round, opts);
335
+ const errors = [...dialogue.errors];
336
+ if (!round?.integrator_posted) {
337
+ errors.push(
338
+ "ReviewIntegratorAgent must post integrate message before bus submit",
339
+ );
340
+ }
341
+ return { ok: errors.length === 0, errors };
342
+ }
343
+
344
+ export async function listInboxAgents(runDir: string): Promise<string[]> {
345
+ const inbox = join(messengerRoot(runDir), "inbox");
346
+ try {
347
+ const entries = await readdir(inbox, { withFileTypes: true });
348
+ return entries.filter((e) => e.isDirectory()).map((e) => e.name);
349
+ } catch {
350
+ return [];
351
+ }
352
+ }
@@ -0,0 +1,119 @@
1
+ /**
2
+ * P1 — integrator draft rules (disputes required when checks fail).
3
+ */
4
+
5
+ import { readFile } from "node:fs/promises";
6
+ import { join } from "node:path";
7
+ import { parse as parseYaml } from "yaml";
8
+
9
+ export interface IntegratorValidationResult {
10
+ ok: boolean;
11
+ review_gate_ready: boolean;
12
+ errors: string[];
13
+ warnings: string[];
14
+ }
15
+
16
+ function hasFailedChecks(doc: Record<string, unknown>): boolean {
17
+ const pe = doc.plan_evaluation as Record<string, unknown> | undefined;
18
+ if (!pe) return false;
19
+ for (const [key, val] of Object.entries(pe)) {
20
+ if (key === "source") continue;
21
+ const block = val as Record<string, unknown> | undefined;
22
+ if (block && block.passes === false) return true;
23
+ const checks = block?.issues as unknown[] | undefined;
24
+ if (Array.isArray(checks) && checks.length > 0) {
25
+ // issues on passing blocks are warnings only
26
+ }
27
+ }
28
+ const hv = doc.hypothesis_validation as Record<string, unknown> | undefined;
29
+ if (hv?.revision_recommended === true) return true;
30
+ if (
31
+ hv?.relevance &&
32
+ (hv.relevance as { passes?: boolean }).passes === false
33
+ ) {
34
+ return true;
35
+ }
36
+ return false;
37
+ }
38
+
39
+ function adversarySeverityHigh(doc: Record<string, unknown>): boolean {
40
+ const ab = doc.adversary_brief as Record<string, unknown> | undefined;
41
+ const sev = String(ab?.severity ?? "").toLowerCase();
42
+ return sev === "high" || sev === "critical";
43
+ }
44
+
45
+ export function validateIntegratorDraft(
46
+ draft: Record<string, unknown>,
47
+ opts?: { validationTurn?: Record<string, unknown> | null },
48
+ ): IntegratorValidationResult {
49
+ const errors: string[] = [];
50
+ const warnings: string[] = [];
51
+ const disputes = Array.isArray(draft.disputes)
52
+ ? (draft.disputes as string[]).filter(Boolean)
53
+ : [];
54
+ const unresolved = (
55
+ draft.review_integrator_summary as Record<string, unknown> | undefined
56
+ )?.debate_health as Record<string, unknown> | undefined;
57
+ const unresolvedTensions = Array.isArray(unresolved?.unresolved_tensions)
58
+ ? (unresolved.unresolved_tensions as string[])
59
+ : [];
60
+ let review_gate_ready = draft.review_gate_ready === true;
61
+
62
+ const failedInDraft = hasFailedChecks(draft);
63
+ const failedInTurn =
64
+ opts?.validationTurn?.overall_ready === false ||
65
+ (Array.isArray(opts?.validationTurn?.checks) &&
66
+ (opts.validationTurn.checks as { status?: string }[]).some(
67
+ (c) => c.status === "fail",
68
+ ));
69
+
70
+ if (failedInDraft || failedInTurn) {
71
+ if (disputes.length === 0) {
72
+ errors.push(
73
+ "evaluator reported failed/warn checks but disputes[] is empty — document tension",
74
+ );
75
+ review_gate_ready = false;
76
+ }
77
+ if (unresolvedTensions.length === 0 && disputes.length > 0) {
78
+ warnings.push(
79
+ "disputes present but unresolved_tensions empty — prefer listing open items",
80
+ );
81
+ }
82
+ }
83
+
84
+ if (adversarySeverityHigh(draft) && disputes.length === 0) {
85
+ errors.push("adversary severity is high but no disputes recorded");
86
+ review_gate_ready = false;
87
+ }
88
+
89
+ if (
90
+ review_gate_ready &&
91
+ (failedInDraft || failedInTurn) &&
92
+ disputes.length === 0
93
+ ) {
94
+ errors.push(
95
+ "review_gate_ready cannot be true without disputes when checks fail",
96
+ );
97
+ review_gate_ready = false;
98
+ }
99
+
100
+ return {
101
+ ok: errors.length === 0,
102
+ review_gate_ready,
103
+ errors,
104
+ warnings,
105
+ };
106
+ }
107
+
108
+ export async function loadValidationTurnYaml(
109
+ runDir: string,
110
+ roundIndex: number,
111
+ ): Promise<Record<string, unknown> | null> {
112
+ const path = join(runDir, "artifacts", `validation-turn-r${roundIndex}.yaml`);
113
+ try {
114
+ const raw = await readFile(path, "utf-8");
115
+ return parseYaml(raw) as Record<string, unknown>;
116
+ } catch {
117
+ return null;
118
+ }
119
+ }
@@ -0,0 +1,89 @@
1
+ /**
2
+ * P2 — detect material scope drift between task_summary and decomposition.
3
+ */
4
+
5
+ export interface ScopeGuardResult {
6
+ material_drift: boolean;
7
+ overlap_score: number;
8
+ summary: string;
9
+ suggested_ask_user: boolean;
10
+ }
11
+
12
+ const PRODUCT_OS_MARKERS = [
13
+ "product os",
14
+ "product operating system",
15
+ "knowledge base",
16
+ "multi-source",
17
+ "synthesiz",
18
+ "papers",
19
+ "youtube",
20
+ "transcripts",
21
+ "news",
22
+ "books",
23
+ "decisions",
24
+ ];
25
+
26
+ const INFRA_MARKERS = [
27
+ "cron",
28
+ "systemd",
29
+ "graphify add",
30
+ "graphify update",
31
+ "ingest",
32
+ "lockfile",
33
+ "feeds.yaml",
34
+ "polling",
35
+ "timer",
36
+ ];
37
+
38
+ function tokenize(text: string): Set<string> {
39
+ const lower = text.toLowerCase();
40
+ const words = lower.match(/[a-z][a-z0-9_-]{2,}/g) ?? [];
41
+ return new Set(words);
42
+ }
43
+
44
+ function markerHits(text: string, markers: string[]): number {
45
+ const lower = text.toLowerCase();
46
+ return markers.filter((m) => lower.includes(m)).length;
47
+ }
48
+
49
+ export function assessPlanScopeDrift(
50
+ taskSummary: string,
51
+ decompositionText: string,
52
+ ): ScopeGuardResult {
53
+ const taskTokens = tokenize(taskSummary);
54
+ const decompTokens = tokenize(decompositionText);
55
+ let overlap = 0;
56
+ for (const t of taskTokens) {
57
+ if (decompTokens.has(t)) overlap += 1;
58
+ }
59
+ const overlapScore = taskTokens.size === 0 ? 1 : overlap / taskTokens.size;
60
+
61
+ const taskProduct = markerHits(taskSummary, PRODUCT_OS_MARKERS);
62
+ const decompProduct = markerHits(decompositionText, PRODUCT_OS_MARKERS);
63
+ const _taskInfra = markerHits(taskSummary, INFRA_MARKERS);
64
+ const decompInfra = markerHits(decompositionText, INFRA_MARKERS);
65
+
66
+ const productIntent = taskProduct >= 2;
67
+ const narrowedToInfra =
68
+ productIntent && decompInfra >= 3 && decompProduct < taskProduct;
69
+ const lowOverlap = productIntent && overlapScore < 0.08;
70
+
71
+ const material_drift = narrowedToInfra || lowOverlap;
72
+ let summary: string;
73
+ if (narrowedToInfra) {
74
+ summary =
75
+ "Decomposition reads as infrastructure/cron/graphify-ingest while the task asked for a broader product-OS knowledge base — confirm scope with the user.";
76
+ } else if (lowOverlap) {
77
+ summary =
78
+ "Decomposition shares almost no vocabulary with the task summary — verify the plan targets the right problem.";
79
+ } else {
80
+ summary = "Scope alignment looks acceptable.";
81
+ }
82
+
83
+ return {
84
+ material_drift,
85
+ overlap_score: overlapScore,
86
+ summary,
87
+ suggested_ask_user: material_drift,
88
+ };
89
+ }
@@ -243,7 +243,7 @@ export default function policyGate(pi: ExtensionAPI) {
243
243
 
244
244
  const planPhaseHint =
245
245
  state.phase === "plan"
246
- ? "\nPlan phase: scouts → decompose → hypothesis → stack-researcher → execution-plan-author → validate-plan-dag → 4-round plan debate → approve_plan → create_plan (YAML plan-packet.yaml). Post-execute: /harness-critic."
246
+ ? "\nPlan phase: scouts → decompose → hypothesis → implementation-researcher + stack-researcher → execution-plan-author → validate-plan-dag → debate eligibility + Review Gate → approve_plan → create_plan (YAML plan-packet.yaml). Post-execute: /harness-critic."
247
247
  : "";
248
248
 
249
249
  return {
@@ -30,7 +30,7 @@ under `.pi/extensions/` and auto-loaded through the package `pi.extensions`
30
30
  manifest (`package.json`).
31
31
 
32
32
  - `harness-run-context.ts` - active run + plan injection; short commands without run/plan args
33
- - `harness-live-widget.ts` - footer status (phase, plan ready, next command; no run id in UI)
33
+ - `harness-live-widget.ts` - footer status (current/next phase + plain-language status hint; no run id in UI)
34
34
  - `policy-gate.ts` - phase state machine + plan-before-mutate enforcement
35
35
  - `budget-guard.ts` - hard-stop token budget checks + budget exhausted artifacts
36
36
  - `trace-recorder.ts` - append-only run traces + HarnessRunRecord + compact index
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "schema_version": "1.0.0",
3
3
  "package": "ultimate-pi",
4
- "package_version": "0.11.0",
5
- "generated_at": "2026-05-18T09:43:44.563Z",
4
+ "package_version": "0.14.0",
5
+ "generated_at": "2026-05-19T10:53:28.359Z",
6
6
  "agents": {
7
7
  "pi-pi/agent-expert": {
8
8
  "path": ".pi/agents/pi-pi/agent-expert.md",
@@ -78,39 +78,43 @@
78
78
  },
79
79
  "harness/planning/decompose": {
80
80
  "path": ".pi/agents/harness/planning/decompose.md",
81
- "sha256": "1b3f85d956d2e203ec87045a731c47f8b40f75b63fce8916fda91cefc39244a8"
81
+ "sha256": "5c3b983772d013741d50f39945bc77f178aa338aecab56b93c09216d72192c69"
82
82
  },
83
83
  "harness/planning/execution-plan-author": {
84
84
  "path": ".pi/agents/harness/planning/execution-plan-author.md",
85
- "sha256": "a69fb2e8bda9336e71ce9536071f9c8a2f4abd9d9d88930c6a8be29bdc9c5f62"
85
+ "sha256": "16f8800c50bcaf1b82ed9138889c8a0e538ee6a139aeae129ccd20cec2ec25f7"
86
86
  },
87
87
  "harness/planning/hypothesis-validator": {
88
88
  "path": ".pi/agents/harness/planning/hypothesis-validator.md",
89
- "sha256": "f75312439c441ccee72692d41f44b6e733df08e06c89e930740fc256bed3ba02"
89
+ "sha256": "9e68ec5d6aef96a3666c30227c3cbddf1aaed1182fdc94dbbd21ad3d48315ff2"
90
90
  },
91
91
  "harness/planning/hypothesis": {
92
92
  "path": ".pi/agents/harness/planning/hypothesis.md",
93
93
  "sha256": "b20c527d15c2243cd5d3a8f16cea6d44bdfd16e01915d42f3b830bf9938e5f8b"
94
94
  },
95
+ "harness/planning/implementation-researcher": {
96
+ "path": ".pi/agents/harness/planning/implementation-researcher.md",
97
+ "sha256": "dbd1c4fc74d538b110d406febfd4603eebea77d82e8b367df4596ac7ff6e54cc"
98
+ },
95
99
  "harness/planning/plan-adversary": {
96
100
  "path": ".pi/agents/harness/planning/plan-adversary.md",
97
- "sha256": "685926c638ae1377361d7cafda5e400be19cb3880510d8f6d389a5876647575f"
101
+ "sha256": "7c14eaab65f356003ee2ff380f5d4e620170b5126daa67c3d226b12342f47bd2"
98
102
  },
99
103
  "harness/planning/plan-evaluator": {
100
104
  "path": ".pi/agents/harness/planning/plan-evaluator.md",
101
- "sha256": "44fd52389d7e43dd5093653cba9694900561318ee5f00e3bc05c3ecef5d43621"
105
+ "sha256": "846575abe9df3e7e5be812c0c474989c1a9de8074a7884d77b9d3dd423643480"
102
106
  },
103
107
  "harness/planning/review-integrator": {
104
108
  "path": ".pi/agents/harness/planning/review-integrator.md",
105
- "sha256": "d0e8214539d0a78b9e5add70e61dd4e4de36def64172cda18d9b70727e7600ca"
109
+ "sha256": "bed43f3f049c279ac50a24bcffac1bbe46a8605d89c9cc6d0c3c6a87d488b1b8"
106
110
  },
107
111
  "harness/planning/scout-graphify": {
108
112
  "path": ".pi/agents/harness/planning/scout-graphify.md",
109
- "sha256": "b59916a26afccfe105e29c0bd8637ac54275e8afef1c6cc88a58bd05b0325473"
113
+ "sha256": "7f385d5bda2fe04b9da52cb4cb9247324efd345579b483d3ad55a6abefad50d5"
110
114
  },
111
115
  "harness/planning/scout-semantic": {
112
116
  "path": ".pi/agents/harness/planning/scout-semantic.md",
113
- "sha256": "47b7ea3e65b20a65e6d0ff11b6d5daff59b47a9ed618b8a3b6282f2eb0460572"
117
+ "sha256": "36bd424ebd422bda82bd447b22f591f99f32ec897ea43f385586119da5c26caa"
114
118
  },
115
119
  "harness/planning/scout-structure": {
116
120
  "path": ".pi/agents/harness/planning/scout-structure.md",
@@ -118,11 +122,11 @@
118
122
  },
119
123
  "harness/planning/sprint-contract-auditor": {
120
124
  "path": ".pi/agents/harness/planning/sprint-contract-auditor.md",
121
- "sha256": "f613a4fa937d76936fa01155d4e7956a81878f300100f99f6a78915b0af6f7c7"
125
+ "sha256": "d915274dc9b5addae5499bc2390b348eddeb8f133b526a816e23d0d19a2618bf"
122
126
  },
123
127
  "harness/planning/stack-researcher": {
124
128
  "path": ".pi/agents/harness/planning/stack-researcher.md",
125
- "sha256": "90e2ff1348f54bebc8c0392407bf1bb4d794c942fd8d6f342d80b191c945b34e"
129
+ "sha256": "fa228920abe2b66d4d8921c4a5d85593e3019a24bbe9ae512ed9149f235e3536"
126
130
  }
127
131
  }
128
132
  }
@@ -13,9 +13,7 @@
13
13
  - `harness/planning/decompose` — DeepMind-style problem decomposition (`PlanDecompositionBrief`)
14
14
  - `harness/planning/hypothesis` — DARWIN hypothesis generation (`PlanHypothesisBrief`)
15
15
  2. **Parent maps hypothesis → PlanPacket** — `plan-packet.schema.json` unchanged; execution gating stable.
16
- 3. **Parallel pre-approval reviews:**
17
- - `harness/planning/plan-adversary` — execution risk on PlanPacket
18
- - `harness/planning/hypothesis-eval` — blind self-eval (task + hypothesis only)
16
+ 3. **Review Gate (ADR 0035):** outcome-based debate with `hypothesis-validator` on R1 (blind — task + hypothesis only). Retired `hypothesis-eval` as a separate pre-approval agent.
19
17
  4. **`approve_plan` optional `research_brief`** — rendered in `plan-review.md`; not written to `plan-packet.json`.
20
18
  5. **`--quick`** still skips semantic scout only; never skips decompose/hypothesis.
21
19