ultimate-pi 0.13.1 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/.agents/skills/harness-debate-plan/SKILL.md +61 -21
  2. package/.agents/skills/harness-orchestration/SKILL.md +1 -1
  3. package/.pi/agents/harness/planning/plan-adversary.md +2 -2
  4. package/.pi/agents/harness/planning/plan-evaluator.md +3 -1
  5. package/.pi/agents/harness/planning/review-integrator.md +4 -2
  6. package/.pi/extensions/debate-orchestrator.ts +39 -435
  7. package/.pi/extensions/harness-debate-tools.ts +519 -0
  8. package/.pi/extensions/harness-plan-approval.ts +41 -17
  9. package/.pi/extensions/harness-run-context.ts +18 -0
  10. package/.pi/extensions/lib/debate-bus-core.ts +434 -0
  11. package/.pi/extensions/lib/debate-bus-state.ts +58 -0
  12. package/.pi/extensions/lib/harness-spawn-budget.ts +5 -25
  13. package/.pi/extensions/lib/plan-approval/dialog.ts +33 -272
  14. package/.pi/extensions/lib/plan-approval/format-plan.ts +12 -85
  15. package/.pi/extensions/lib/plan-approval/plan-review.ts +6 -6
  16. package/.pi/extensions/lib/plan-approval/render.ts +6 -0
  17. package/.pi/extensions/lib/plan-approval/validate.ts +1 -1
  18. package/.pi/extensions/lib/plan-debate-envelope.ts +2 -0
  19. package/.pi/extensions/lib/plan-debate-gate.ts +155 -0
  20. package/.pi/extensions/lib/plan-debate-id.ts +39 -0
  21. package/.pi/extensions/lib/plan-debate-lane.ts +220 -0
  22. package/.pi/extensions/lib/plan-debate-round-status.ts +94 -0
  23. package/.pi/extensions/lib/plan-debate-write-guard.ts +20 -0
  24. package/.pi/extensions/lib/plan-messenger.ts +276 -0
  25. package/.pi/extensions/lib/plan-review-integrator-rules.ts +119 -0
  26. package/.pi/extensions/lib/plan-scope-guard.ts +89 -0
  27. package/.pi/harness/agents.manifest.json +7 -7
  28. package/.pi/prompts/harness-plan.md +22 -12
  29. package/CHANGELOG.md +12 -0
  30. package/package.json +3 -3
  31. package/.pi/extensions/lib/plan-approval/fallback.ts +0 -50
@@ -1,44 +1,84 @@
1
1
  ---
2
2
  name: harness-debate-plan
3
- description: Plan-phase Review Gate debate — assemble rounds, token caps, bus envelopes for parent orchestrator.
3
+ description: Plan-phase Review Gate debate — pi-messenger threads, lane YAML, bus tools for parent orchestrator.
4
4
  ---
5
5
 
6
6
  # harness-debate-plan
7
7
 
8
- Use when running **Phase 5** of `/harness-plan` — four Review Gate rounds on the plan debate bus.
8
+ Use when running **Phase 5** of `/harness-plan` — four Review Gate rounds with **pi-messenger-style** turn-taking (claims → rebuttals → integrate), then bus submission.
9
9
 
10
10
  ## Open
11
11
 
12
12
  ```
13
- /harness-debate-open plan-<run_id>
13
+ harness_debate_open({})
14
14
  ```
15
15
 
16
+ - Debate id is always `plan-<run_id>` (tool normalizes wrong ids).
17
+ - Creates `.pi/harness/runs/<run_id>/debate-messenger/` (`inbox/<Agent>/`, `threads/round-N/transcript.jsonl`).
18
+
16
19
  Budget profile **plan**: `max_rounds=4`, `round_token_cap=2000`, `debate_global_cap=12000`.
17
20
 
18
- ## Per-round spawn order
21
+ ## Per-round spawn order (P1 sequential lanes)
22
+
23
+ 1. Round-specific lane spawns (write lane YAML with `write_harness_yaml`)
24
+ 2. `plan-evaluator` → lane artifact + `harness_messenger_post` (claims)
25
+ 3. `harness_messenger_read_round` → spawn `plan-adversary` with transcript
26
+ 4. `plan-adversary` → lane artifact + `harness_messenger_post` (rebuttals with `in_reply_to`)
27
+ 5. R1: `hypothesis-validator` first (blind — no decomposition/PlanPacket in prompt)
28
+ 6. R4: `sprint-contract-auditor` required before integrator
29
+ 7. `review-integrator` → integrator draft + `harness_messenger_post` (`integrate`)
30
+ 8. `harness_debate_submit_round({ round_index, integrator_draft })` — **only** path for `review-round-r{N}.yaml`
31
+
32
+ | Round | Extra lane artifacts |
33
+ |-------|----------------------|
34
+ | 1 | `hypothesis-validation-r1.yaml` |
35
+ | 4 | `sprint-audit-r4.yaml` (required) |
19
36
 
20
- 1. Round-specific extras (R1: `hypothesis-validator` first, blind)
21
- 2. `plan-evaluator`
22
- 3. `plan-adversary`
23
- 4. R4: `sprint-contract-auditor` (required)
24
- 5. `review-integrator`
37
+ ## Lane artifacts (auto-applied on subagent complete)
25
38
 
26
- ## Artifacts (YAML)
39
+ When a debate lane subagent finishes, the harness **automatically** writes lane YAML and posts messenger messages (evaluator claims, adversary rebuttals). Look for `harness-debate-next-step` in the transcript.
27
40
 
28
- | Agent | Output path |
29
- |-------|-------------|
30
- | hypothesis-validator | `artifacts/hypothesis-validation-r{N}.yaml` |
31
- | plan-evaluator | `artifacts/validation-turn-r{N}.yaml` |
32
- | plan-adversary | `artifacts/adversary-brief-r{N}.yaml` |
33
- | sprint-contract-auditor | `artifacts/sprint-audit-r{N}.yaml` |
34
- | review-integrator | `artifacts/review-round-r{N}.yaml` |
41
+ | Agent | Output path | Messenger |
42
+ |-------|-------------|-----------|
43
+ | hypothesis-validator | `artifacts/hypothesis-validation-r{N}.yaml` | — |
44
+ | plan-evaluator | `artifacts/validation-turn-r{N}.yaml` | `claim` |
45
+ | plan-adversary | `artifacts/adversary-brief-r{N}.yaml` | `rebuttal` |
46
+ | sprint-contract-auditor | `artifacts/sprint-audit-r{N}.yaml` (R4) | optional |
47
+ | review-integrator | *(integrator draft → `harness_debate_submit_round` only)* | `integrate` (on submit) |
35
48
 
36
- ## Bus envelope
49
+ Fallback: `harness_debate_apply_lane({ lane, content, round_index? })` if auto-apply missed fenced YAML.
37
50
 
38
- Load `review-round-r{N}.yaml`, validate, then `buildPlanReviewRoundEnvelope` (`.pi/extensions/lib/plan-debate-envelope.ts`) `/harness-debate-round '<json>'`.
51
+ Resume after stop: `harness_debate_round_status({ round_index: N })` then run the listed `next_tool`.
39
52
 
40
- Plan participants only. `StackResearchAgent` uses `artifacts/stack.yaml` claims — no spawn.
53
+ ## Messenger tools
54
+
55
+ ```typescript
56
+ harness_messenger_post({
57
+ round_index: 1,
58
+ from: "PlanEvaluatorAgent",
59
+ kind: "claim",
60
+ body: "...",
61
+ claim_ids: ["c1", "c2"],
62
+ to: ["broadcast"],
63
+ })
64
+ harness_messenger_post({
65
+ round_index: 1,
66
+ from: "PlanAdversaryAgent",
67
+ kind: "rebuttal",
68
+ in_reply_to: ["c1"],
69
+ body: "...",
70
+ })
71
+ harness_messenger_read_round({ round_index: 1 }) // for next spawn prompt
72
+ ```
73
+
74
+ ## Integrator + bus
75
+
76
+ `harness_debate_submit_round` validates messenger thread + integrator rules (`review_gate_ready` false when checks fail without `disputes[]`), writes `review-round-r{N}.yaml`, emits bus `kind: round`.
77
+
78
+ `StackResearchAgent` uses `artifacts/stack.yaml` claims — no spawn.
41
79
 
42
80
  ## Close
43
81
 
44
- After round 4: `/harness-debate-consensus`. Do not `approve_plan` on `policy_decision: block`.
82
+ After round 4: `harness_debate_consensus`. `approve_plan` is **hard-gated** on lane files, messenger, 4 bus rounds, and consensus not `block`.
83
+
84
+ Do not `approve_plan` on `policy_decision: block`. On `human_required` → `ask_user` first.
@@ -36,7 +36,7 @@ LIMIT 30
36
36
  1. **Parallel `tasks`** — one `subagent({ tasks: [...] })` for scouts, decompose+hypothesis, or review fan-in; subprocesses run in parallel upstream.
37
37
  2. **Blocking calls** — each `subagent` returns when the subprocess exits; no `get_subagent_result` polling.
38
38
  3. **Compact handoffs** — pass scout/decompose JSON only; never paste full subprocess message logs into the next spawn.
39
- 4. **Spawn caps** — bridge enforces **8** active + **12** total harness spawns per session. Do **not** pass `timeoutMs` unless the user wants a cap — subprocesses wait for natural exit (`PI_SUBAGENT_TIMEOUT_MS` optional env backstop only).
39
+ 4. **No spawn cap** — harness subagent spawns are unlimited per session (active count is telemetry only). Do **not** pass `timeoutMs` unless the user wants a cap — subprocesses wait for natural exit (`PI_SUBAGENT_TIMEOUT_MS` optional env backstop only).
40
40
 
41
41
  ## Command → agent
42
42
 
@@ -9,10 +9,10 @@ max_turns: 12
9
9
 
10
10
  You are **plan-adversary** — break the plan with reproducible counterexamples.
11
11
 
12
- Engage failed/warn checks from the same round's `plan-evaluator` first, then independent attacks. Cite `work_item_id` / `phase_id`.
12
+ Engage failed/warn checks from the same round's `plan-evaluator` first (parent provides evaluator YAML + messenger **claims**). Rebut specific `claim_ids` from the thread — parent posts your `rebuttal` with `in_reply_to`.
13
13
 
14
14
  ## Output
15
15
 
16
16
  Valid **YAML only** — `PlanAdversaryBrief` (`.pi/harness/specs/plan-adversary-brief.schema.json`).
17
17
 
18
- Bus label: `PlanAdversarysubagent`.
18
+ Bus label: `PlanAdversaryAgent`.
@@ -15,4 +15,6 @@ Parent passes `debate_round_focus`: `spec` | `wbs` | `schedule` | `quality`.
15
15
 
16
16
  Valid **YAML only** — `PlanValidationTurn` (`.pi/harness/specs/plan-validation-turn.schema.json`). Fail if `dag_validation.status === "fail"`.
17
17
 
18
- Bus label: `PlanEvaluatorsubagent`.
18
+ Include `claim_ids[]` in your summary for parent to post as messenger **claims** before spawning adversary.
19
+
20
+ Bus label: `PlanEvaluatorAgent`.
@@ -18,6 +18,8 @@ Valid **YAML only** — `PlanReviewRoundDraft` (`.pi/harness/specs/plan-review-r
18
18
  - `review_gate_ready` boolean
19
19
  - `participants`, `claims`, `rebuttals`, `evidence_refs`, `token_usage`, `severity_scores`
20
20
 
21
- Parent runs `buildPlanReviewRoundEnvelope` `/harness-debate-round`.
21
+ Parent passes `harness_messenger_read_round` transcript + lane YAML. After your YAML draft, parent calls `harness_messenger_post` (`kind: integrate`) then `harness_debate_submit_round` — you do not write `review-round-r*.yaml`.
22
22
 
23
- Bus label: `ReviewIntegratorsubagent`.
23
+ Set `review_gate_ready: false` when evaluator checks fail unless `disputes[]` documents open tension.
24
+
25
+ Bus label: `ReviewIntegratorAgent`.
@@ -1,103 +1,24 @@
1
1
  /**
2
2
  * debate-orchestrator — headless debate bus (pi-messenger-inspired semantics).
3
3
  *
4
- * No additional UI surface:
5
- * - transport is extension custom entries + debate artifacts on disk
6
- * - command interface is machine-friendly (`/harness-debate-*`)
7
- *
8
- * Protocol envelope:
9
- * {
10
- * protocol: "pi-debate-bus/v1",
11
- * kind: "open" | "round" | "consensus" | "budget_exhausted",
12
- * correlation: { run_id, debate_id, round_index?, sender },
13
- * payload: { ... }
14
- * }
4
+ * Commands mirror harness_debate_* tools; shared state lives in debate-bus-core.
15
5
  */
16
6
 
17
- import { appendFile, mkdir, writeFile } from "node:fs/promises";
18
7
  import { join } from "node:path";
19
8
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
20
- import {
21
- type DebateParticipant,
22
- debatePhaseFromId,
23
- isPlanDebateId,
24
- PLAN_DEBATE_PARTICIPANTS,
25
- POST_EXECUTE_DEBATE_PARTICIPANTS,
26
- } from "../lib/debate-orchestrator-types.js";
27
9
  import { getRunIdFromSession } from "../lib/harness-run-context.js";
28
-
29
- type PolicyDecision = "pass" | "conditional_pass" | "block" | "human_required";
30
- type DebatePhase = "plan" | "post_execute";
31
-
32
- interface RoundPayload {
33
- participants: DebateParticipant[];
34
- claims: string[];
35
- rebuttals: string[];
36
- evidence_refs: string[];
37
- token_usage: {
38
- per_agent: Record<string, number>;
39
- round_total: number;
40
- };
41
- consensus_delta: number;
42
- severity_scores?: {
43
- correctness: number;
44
- security: number;
45
- architecture: number;
46
- test_integrity: number;
47
- };
48
- }
49
-
50
- interface DebateState {
51
- run_id: string;
52
- debate_id: string;
53
- debate_phase: DebatePhase;
54
- round_count: number;
55
- budget_used: number;
56
- max_rounds: number;
57
- round_token_cap: number;
58
- debate_global_cap: number;
59
- last_review_gate_ready?: boolean;
60
- }
61
-
62
- interface BusEnvelope<T = unknown> {
63
- protocol: "pi-debate-bus/v1";
64
- kind: "open" | "round" | "consensus" | "budget_exhausted";
65
- correlation: {
66
- run_id: string;
67
- debate_id: string;
68
- round_index?: number;
69
- sender: DebateParticipant | "system";
70
- };
71
- payload: T;
72
- }
73
-
74
- const DEBATES_DIR = join(process.cwd(), ".pi", "harness", "debates");
75
- const WEIGHTS = {
76
- claim_quality: 0.2,
77
- reproducibility: 0.4,
78
- agreement: 0.4,
79
- };
80
- const THRESHOLDS = {
81
- correctness: 0.7,
82
- security: 0.7,
83
- architecture: 0.8,
84
- test_integrity: 0.8,
85
- };
86
- const HARD_STOP_DEBATE_CAPS = process.env.HARNESS_DEBATE_HARD_STOP === "true";
87
-
88
- function nowIso(): string {
89
- return new Date().toISOString();
90
- }
91
-
92
- function toSafeFloat(value: unknown): number {
93
- const n = Number(value);
94
- if (Number.isNaN(n) || !Number.isFinite(n)) return 0;
95
- return Math.max(0, Math.min(1, n));
96
- }
97
-
98
- async function ensureDebatesDir(): Promise<void> {
99
- await mkdir(DEBATES_DIR, { recursive: true });
100
- }
10
+ import {
11
+ acceptDebateRound,
12
+ finalizeDebateConsensus,
13
+ openDebateBus,
14
+ parseRoundEnvelope,
15
+ } from "./lib/debate-bus-core.js";
16
+ import {
17
+ getDebateState,
18
+ restoreDebateStateFromEntry,
19
+ } from "./lib/debate-bus-state.js";
20
+ import { normalizePlanDebateId } from "./lib/plan-debate-id.js";
21
+ import { initPlanMessenger } from "./lib/plan-messenger.js";
101
22
 
102
23
  function getRunId(ctx: {
103
24
  sessionManager: { getEntries(): unknown[]; getSessionId(): string };
@@ -110,339 +31,11 @@ function getRunId(ctx: {
110
31
  );
111
32
  }
112
33
 
113
- const PLAN_BUDGET = {
114
- max_rounds: 4,
115
- round_token_cap: 2000,
116
- debate_global_cap: 12000,
117
- } as const;
118
-
119
- const AGGRESSIVE_BUDGET = {
120
- max_rounds: 6,
121
- round_token_cap: 2500,
122
- debate_global_cap: 35000,
123
- } as const;
124
-
125
- function capsForDebate(debateId: string): {
126
- name: "plan" | "aggressive";
127
- max_rounds: number;
128
- round_token_cap: number;
129
- debate_global_cap: number;
130
- } {
131
- if (isPlanDebateId(debateId)) {
132
- return { name: "plan", ...PLAN_BUDGET };
133
- }
134
- return { name: "aggressive", ...AGGRESSIVE_BUDGET };
135
- }
136
-
137
- function participantAllowed(participant: string, phase: DebatePhase): boolean {
138
- if (phase === "plan") {
139
- return (PLAN_DEBATE_PARTICIPANTS as readonly string[]).includes(
140
- participant,
141
- );
142
- }
143
- return (POST_EXECUTE_DEBATE_PARTICIPANTS as readonly string[]).includes(
144
- participant,
145
- );
146
- }
147
-
148
- async function writeDebateEvent(
149
- debateId: string,
150
- event: unknown,
151
- ): Promise<void> {
152
- await ensureDebatesDir();
153
- const path = join(DEBATES_DIR, `${debateId}.jsonl`);
154
- await appendFile(path, `${JSON.stringify(event)}\n`, "utf-8");
155
- }
156
-
157
- function defaultSeverity(): {
158
- correctness: number;
159
- security: number;
160
- architecture: number;
161
- test_integrity: number;
162
- } {
163
- return { correctness: 0, security: 0, architecture: 0, test_integrity: 0 };
164
- }
165
-
166
- function decidePolicy(
167
- severity: ReturnType<typeof defaultSeverity>,
168
- minEvidenceConfidence: number,
169
- ): PolicyDecision {
170
- if (
171
- severity.security >= THRESHOLDS.security ||
172
- severity.correctness >= THRESHOLDS.correctness ||
173
- severity.architecture >= THRESHOLDS.architecture ||
174
- severity.test_integrity >= THRESHOLDS.test_integrity
175
- ) {
176
- return "block";
177
- }
178
- if (minEvidenceConfidence < 0.55) return "human_required";
179
- if (minEvidenceConfidence < 0.75) return "conditional_pass";
180
- return "pass";
181
- }
182
-
183
- function parseEnvelope(raw: string): BusEnvelope<RoundPayload> | null {
184
- try {
185
- const parsed = JSON.parse(raw) as BusEnvelope<RoundPayload>;
186
- if (parsed?.protocol !== "pi-debate-bus/v1") return null;
187
- if (parsed?.kind !== "round") return null;
188
- return parsed;
189
- } catch {
190
- return null;
191
- }
192
- }
193
-
194
34
  export default function debateOrchestrator(pi: ExtensionAPI) {
195
- let state: DebateState | null = null;
196
- let lastSeverity = defaultSeverity();
197
-
198
- async function openDebate(runId: string, debateId: string): Promise<void> {
199
- const caps = capsForDebate(debateId);
200
- const debate_phase = debatePhaseFromId(debateId);
201
- state = {
202
- run_id: runId,
203
- debate_id: debateId,
204
- debate_phase,
205
- round_count: 0,
206
- budget_used: 0,
207
- max_rounds: caps.max_rounds,
208
- round_token_cap: caps.round_token_cap,
209
- debate_global_cap: caps.debate_global_cap,
210
- last_review_gate_ready: false,
211
- };
212
- pi.appendEntry("harness-debate-state", state);
213
- const envelope: BusEnvelope = {
214
- protocol: "pi-debate-bus/v1",
215
- kind: "open",
216
- correlation: {
217
- run_id: runId,
218
- debate_id: debateId,
219
- sender: "system",
220
- },
221
- payload: {
222
- opened_at: nowIso(),
223
- debate_phase,
224
- budget_profile: caps.name,
225
- },
226
- };
227
- pi.appendEntry("harness-debate-envelope", envelope);
228
- await writeDebateEvent(debateId, envelope);
229
- }
230
-
231
- async function emitBudgetExhausted(reason: string): Promise<void> {
232
- if (!state) return;
233
- const envelope: BusEnvelope = {
234
- protocol: "pi-debate-bus/v1",
235
- kind: "budget_exhausted",
236
- correlation: {
237
- run_id: state.run_id,
238
- debate_id: state.debate_id,
239
- round_index: state.round_count,
240
- sender: "system",
241
- },
242
- payload: {
243
- schema_version: "1.0.0",
244
- contract_version: "1.0.0",
245
- event_type: "budget_exhausted",
246
- run_id: state.run_id,
247
- debate_id: state.debate_id,
248
- round_count: state.round_count,
249
- budget_used: state.budget_used,
250
- exhaustion_reason: reason,
251
- caps: {
252
- max_rounds: state.max_rounds,
253
- round_token_cap: state.round_token_cap,
254
- debate_global_cap: state.debate_global_cap,
255
- },
256
- minimum_evidence_confidence: 0.6,
257
- default_policy_outcome: "block",
258
- human_override_allowed: true,
259
- },
260
- };
261
- pi.appendEntry("harness-debate-envelope", envelope);
262
- pi.appendEntry("harness-budget-exhausted", envelope.payload);
263
- await writeDebateEvent(state.debate_id, envelope);
264
- }
265
-
266
- async function acceptRound(envelope: BusEnvelope<RoundPayload>): Promise<{
267
- ok: boolean;
268
- reason?: string;
269
- }> {
270
- if (!state) return { ok: false, reason: "no active debate" };
271
- if (state.debate_id !== envelope.correlation.debate_id) {
272
- return { ok: false, reason: "debate id mismatch" };
273
- }
274
-
275
- for (const p of envelope.payload.participants ?? []) {
276
- if (!participantAllowed(p, state.debate_phase)) {
277
- return {
278
- ok: false,
279
- reason: `participant ${p} invalid for debate_phase=${state.debate_phase}`,
280
- };
281
- }
282
- }
283
-
284
- const nextRound = state.round_count + 1;
285
- if (nextRound > state.max_rounds) {
286
- await emitBudgetExhausted("max_rounds_reached");
287
- if (HARD_STOP_DEBATE_CAPS) {
288
- return { ok: false, reason: "max rounds reached" };
289
- }
290
- }
291
-
292
- const perAgent = envelope.payload.token_usage?.per_agent ?? {};
293
- for (const [agent, tokens] of Object.entries(perAgent)) {
294
- if (Number(tokens) > state.round_token_cap) {
295
- await emitBudgetExhausted("round_token_cap_exceeded");
296
- if (HARD_STOP_DEBATE_CAPS) {
297
- return { ok: false, reason: `round cap exceeded by ${agent}` };
298
- }
299
- }
300
- }
301
-
302
- const roundTotal = Number(envelope.payload.token_usage?.round_total ?? 0);
303
- if (state.budget_used + roundTotal > state.debate_global_cap) {
304
- await emitBudgetExhausted("debate_global_cap_exceeded");
305
- if (HARD_STOP_DEBATE_CAPS) {
306
- return { ok: false, reason: "global cap exceeded" };
307
- }
308
- }
309
-
310
- state.round_count = nextRound;
311
- state.budget_used += roundTotal;
312
- pi.appendEntry("harness-debate-state", state);
313
-
314
- if (envelope.payload.severity_scores) {
315
- lastSeverity = {
316
- correctness: toSafeFloat(envelope.payload.severity_scores.correctness),
317
- security: toSafeFloat(envelope.payload.severity_scores.security),
318
- architecture: toSafeFloat(
319
- envelope.payload.severity_scores.architecture,
320
- ),
321
- test_integrity: toSafeFloat(
322
- envelope.payload.severity_scores.test_integrity,
323
- ),
324
- };
325
- }
326
-
327
- const profileName =
328
- state.debate_phase === "plan"
329
- ? ("plan" as const)
330
- : ("aggressive" as const);
331
-
332
- const roundRecord = {
333
- schema_version: "1.0.0",
334
- contract_version: "1.0.0",
335
- run_id: state.run_id,
336
- debate_id: state.debate_id,
337
- round_index: state.round_count,
338
- participants: envelope.payload.participants,
339
- claims: envelope.payload.claims,
340
- rebuttals: envelope.payload.rebuttals,
341
- evidence_refs: envelope.payload.evidence_refs,
342
- token_usage: envelope.payload.token_usage,
343
- budget_profile: {
344
- name: profileName,
345
- max_rounds: state.max_rounds,
346
- round_token_cap: state.round_token_cap,
347
- debate_global_cap: state.debate_global_cap,
348
- },
349
- consensus_delta: Number(envelope.payload.consensus_delta ?? 0),
350
- };
351
- pi.appendEntry("harness-round-result", roundRecord);
352
- pi.appendEntry("harness-debate-envelope", envelope);
353
- await writeDebateEvent(state.debate_id, envelope);
354
- return { ok: true };
355
- }
356
-
357
- async function finalizeConsensus(
358
- rationale: string,
359
- ): Promise<PolicyDecision | null> {
360
- if (!state) return null;
361
- const evidenceScore = Math.max(
362
- 0,
363
- Math.min(
364
- 1,
365
- lastSeverity.correctness * WEIGHTS.claim_quality +
366
- (1 - Math.max(lastSeverity.security, lastSeverity.test_integrity)) *
367
- WEIGHTS.reproducibility +
368
- Math.max(
369
- 0,
370
- 1 - Math.abs(lastSeverity.architecture - lastSeverity.correctness),
371
- ) *
372
- WEIGHTS.agreement,
373
- ),
374
- );
375
- const decision = decidePolicy(lastSeverity, evidenceScore);
376
- const planPhase = state.debate_phase === "plan";
377
- const evaluatorPassed = planPhase
378
- ? Boolean(state.last_review_gate_ready)
379
- : true;
380
- const debateComplete = planPhase
381
- ? state.round_count >= state.max_rounds
382
- : state.round_count > 0;
383
-
384
- const consensus = {
385
- schema_version: "1.0.0",
386
- contract_version: "1.0.0",
387
- run_id: state.run_id,
388
- debate_id: state.debate_id,
389
- debate_phase: state.debate_phase,
390
- round_count: state.round_count,
391
- budget_used: state.budget_used,
392
- severity_scores: lastSeverity,
393
- severity_thresholds: {
394
- correctness_block_at: THRESHOLDS.correctness,
395
- security_block_at: THRESHOLDS.security,
396
- architecture_block_at: THRESHOLDS.architecture,
397
- test_integrity_block_at: THRESHOLDS.test_integrity,
398
- },
399
- confidence_weights: WEIGHTS,
400
- evidence_refs: [],
401
- strict_gate_prerequisites: planPhase
402
- ? {
403
- plan_gate_passed: false,
404
- execution_completed: false,
405
- evaluator_passed: evaluatorPassed,
406
- adversarial_debate_completed: debateComplete,
407
- severity_policy_ok: decision !== "block",
408
- benchmark_delta_checks_passed: false,
409
- rollback_artifacts_generated: false,
410
- }
411
- : {
412
- plan_gate_passed: true,
413
- execution_completed: true,
414
- evaluator_passed: true,
415
- adversarial_debate_completed: debateComplete,
416
- severity_policy_ok: decision !== "block",
417
- benchmark_delta_checks_passed: false,
418
- rollback_artifacts_generated: false,
419
- },
420
- policy_decision: decision,
421
- rationale,
422
- };
423
-
424
- const envelope: BusEnvelope = {
425
- protocol: "pi-debate-bus/v1",
426
- kind: "consensus",
427
- correlation: {
428
- run_id: state.run_id,
429
- debate_id: state.debate_id,
430
- round_index: state.round_count,
431
- sender: "system",
432
- },
433
- payload: consensus,
434
- };
435
-
436
- await writeFile(
437
- join(DEBATES_DIR, `${state.debate_id}.consensus.json`),
438
- `${JSON.stringify(consensus, null, 2)}\n`,
439
- "utf-8",
440
- );
441
- pi.appendEntry("harness-consensus-packet", consensus);
442
- pi.appendEntry("harness-debate-envelope", envelope);
443
- await writeDebateEvent(state.debate_id, envelope);
444
- return decision;
445
- }
35
+ const hooks = {
36
+ appendEntry: (customType: string, data: unknown) =>
37
+ pi.appendEntry(customType, data),
38
+ };
446
39
 
447
40
  pi.on("session_start", async (_event, ctx) => {
448
41
  const entries = ctx.sessionManager.getEntries();
@@ -452,7 +45,7 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
452
45
  entry.type === "custom" &&
453
46
  entry.customType === "harness-debate-state"
454
47
  ) {
455
- state = entry.data as DebateState;
48
+ restoreDebateStateFromEntry(entry.data);
456
49
  break;
457
50
  }
458
51
  }
@@ -461,13 +54,21 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
461
54
  pi.registerCommand("harness-debate-open", {
462
55
  description: "Open a headless debate session",
463
56
  handler: async (args, ctx) => {
57
+ const runId = getRunId(ctx);
464
58
  const trimmed = args.trim();
465
- let debateId = trimmed;
466
- if (!debateId) debateId = `debate-${Date.now()}`;
467
- await openDebate(getRunId(ctx), debateId);
59
+ const { debateId, warning } = normalizePlanDebateId(trimmed, runId);
60
+ await openDebateBus(runId, debateId, hooks);
61
+ if (debateId.startsWith("plan-")) {
62
+ await initPlanMessenger(
63
+ join(process.cwd(), ".pi", "harness", "runs", runId),
64
+ { runId, debateId },
65
+ );
66
+ }
468
67
  pi.sendMessage({
469
68
  customType: "harness-debate-opened",
470
- content: `Debate opened: ${debateId}`,
69
+ content: warning
70
+ ? `Debate opened: ${debateId} (${warning})`
71
+ : `Debate opened: ${debateId}`,
471
72
  display: false,
472
73
  });
473
74
  },
@@ -476,10 +77,12 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
476
77
  pi.registerCommand("harness-debate-round", {
477
78
  description: "Submit a debate round envelope JSON",
478
79
  handler: async (args, ctx) => {
479
- if (!state) {
480
- await openDebate(getRunId(ctx), `debate-${Date.now()}`);
80
+ if (!getDebateState()) {
81
+ const runId = getRunId(ctx);
82
+ const { debateId } = normalizePlanDebateId("", runId);
83
+ await openDebateBus(runId, debateId, hooks);
481
84
  }
482
- const envelope = parseEnvelope(args.trim());
85
+ const envelope = parseRoundEnvelope(args.trim());
483
86
  if (!envelope) {
484
87
  pi.sendMessage({
485
88
  customType: "harness-debate-round-error",
@@ -489,7 +92,7 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
489
92
  });
490
93
  return;
491
94
  }
492
- const result = await acceptRound(envelope);
95
+ const result = await acceptDebateRound(envelope, hooks);
493
96
  if (!result.ok) {
494
97
  pi.sendMessage({
495
98
  customType: "harness-debate-round-rejected",
@@ -503,7 +106,7 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
503
106
  pi.registerCommand("harness-debate-consensus", {
504
107
  description: "Finalize debate and emit consensus packet",
505
108
  handler: async (args) => {
506
- if (!state) {
109
+ if (!getDebateState()) {
507
110
  pi.sendMessage({
508
111
  customType: "harness-debate-consensus-error",
509
112
  content: "No active debate to finalize.",
@@ -511,8 +114,9 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
511
114
  });
512
115
  return;
513
116
  }
514
- const decision = await finalizeConsensus(
117
+ const decision = await finalizeDebateConsensus(
515
118
  args.trim() || "Consensus generated by debate-orchestrator.",
119
+ hooks,
516
120
  );
517
121
  pi.sendMessage({
518
122
  customType: "harness-debate-consensus",