ultimate-pi 0.13.1 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-debate-plan/SKILL.md +61 -21
- package/.agents/skills/harness-orchestration/SKILL.md +1 -1
- package/.pi/agents/harness/planning/plan-adversary.md +2 -2
- package/.pi/agents/harness/planning/plan-evaluator.md +3 -1
- package/.pi/agents/harness/planning/review-integrator.md +4 -2
- package/.pi/extensions/debate-orchestrator.ts +39 -435
- package/.pi/extensions/harness-debate-tools.ts +519 -0
- package/.pi/extensions/harness-plan-approval.ts +41 -17
- package/.pi/extensions/harness-run-context.ts +18 -0
- package/.pi/extensions/lib/debate-bus-core.ts +434 -0
- package/.pi/extensions/lib/debate-bus-state.ts +58 -0
- package/.pi/extensions/lib/harness-spawn-budget.ts +5 -25
- package/.pi/extensions/lib/plan-approval/dialog.ts +33 -272
- package/.pi/extensions/lib/plan-approval/format-plan.ts +12 -85
- package/.pi/extensions/lib/plan-approval/plan-review.ts +6 -6
- package/.pi/extensions/lib/plan-approval/render.ts +6 -0
- package/.pi/extensions/lib/plan-approval/validate.ts +1 -1
- package/.pi/extensions/lib/plan-debate-envelope.ts +2 -0
- package/.pi/extensions/lib/plan-debate-gate.ts +155 -0
- package/.pi/extensions/lib/plan-debate-id.ts +39 -0
- package/.pi/extensions/lib/plan-debate-lane.ts +220 -0
- package/.pi/extensions/lib/plan-debate-round-status.ts +94 -0
- package/.pi/extensions/lib/plan-debate-write-guard.ts +20 -0
- package/.pi/extensions/lib/plan-messenger.ts +276 -0
- package/.pi/extensions/lib/plan-review-integrator-rules.ts +119 -0
- package/.pi/extensions/lib/plan-scope-guard.ts +89 -0
- package/.pi/harness/agents.manifest.json +7 -7
- package/.pi/prompts/harness-plan.md +22 -12
- package/CHANGELOG.md +12 -0
- package/package.json +3 -3
- package/.pi/extensions/lib/plan-approval/fallback.ts +0 -50
|
@@ -1,44 +1,84 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: harness-debate-plan
|
|
3
|
-
description: Plan-phase Review Gate debate —
|
|
3
|
+
description: Plan-phase Review Gate debate — pi-messenger threads, lane YAML, bus tools for parent orchestrator.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-debate-plan
|
|
7
7
|
|
|
8
|
-
Use when running **Phase 5** of `/harness-plan` — four Review Gate rounds
|
|
8
|
+
Use when running **Phase 5** of `/harness-plan` — four Review Gate rounds with **pi-messenger-style** turn-taking (claims → rebuttals → integrate), then bus submission.
|
|
9
9
|
|
|
10
10
|
## Open
|
|
11
11
|
|
|
12
12
|
```
|
|
13
|
-
|
|
13
|
+
harness_debate_open({})
|
|
14
14
|
```
|
|
15
15
|
|
|
16
|
+
- Debate id is always `plan-<run_id>` (tool normalizes wrong ids).
|
|
17
|
+
- Creates `.pi/harness/runs/<run_id>/debate-messenger/` (`inbox/<Agent>/`, `threads/round-N/transcript.jsonl`).
|
|
18
|
+
|
|
16
19
|
Budget profile **plan**: `max_rounds=4`, `round_token_cap=2000`, `debate_global_cap=12000`.
|
|
17
20
|
|
|
18
|
-
## Per-round spawn order
|
|
21
|
+
## Per-round spawn order (P1 sequential lanes)
|
|
22
|
+
|
|
23
|
+
1. Round-specific lane spawns (write lane YAML with `write_harness_yaml`)
|
|
24
|
+
2. `plan-evaluator` → lane artifact + `harness_messenger_post` (claims)
|
|
25
|
+
3. `harness_messenger_read_round` → spawn `plan-adversary` with transcript
|
|
26
|
+
4. `plan-adversary` → lane artifact + `harness_messenger_post` (rebuttals with `in_reply_to`)
|
|
27
|
+
5. R1: `hypothesis-validator` first (blind — no decomposition/PlanPacket in prompt)
|
|
28
|
+
6. R4: `sprint-contract-auditor` required before integrator
|
|
29
|
+
7. `review-integrator` → integrator draft + `harness_messenger_post` (`integrate`)
|
|
30
|
+
8. `harness_debate_submit_round({ round_index, integrator_draft })` — **only** path for `review-round-r{N}.yaml`
|
|
31
|
+
|
|
32
|
+
| Round | Extra lane artifacts |
|
|
33
|
+
|-------|----------------------|
|
|
34
|
+
| 1 | `hypothesis-validation-r1.yaml` |
|
|
35
|
+
| 4 | `sprint-audit-r4.yaml` (required) |
|
|
19
36
|
|
|
20
|
-
|
|
21
|
-
2. `plan-evaluator`
|
|
22
|
-
3. `plan-adversary`
|
|
23
|
-
4. R4: `sprint-contract-auditor` (required)
|
|
24
|
-
5. `review-integrator`
|
|
37
|
+
## Lane artifacts (auto-applied on subagent complete)
|
|
25
38
|
|
|
26
|
-
|
|
39
|
+
When a debate lane subagent finishes, the harness **automatically** writes lane YAML and posts messenger messages (evaluator claims, adversary rebuttals). Look for `harness-debate-next-step` in the transcript.
|
|
27
40
|
|
|
28
|
-
| Agent | Output path |
|
|
29
|
-
|
|
30
|
-
| hypothesis-validator | `artifacts/hypothesis-validation-r{N}.yaml` |
|
|
31
|
-
| plan-evaluator | `artifacts/validation-turn-r{N}.yaml` |
|
|
32
|
-
| plan-adversary | `artifacts/adversary-brief-r{N}.yaml` |
|
|
33
|
-
| sprint-contract-auditor | `artifacts/sprint-audit-r{N}.yaml` |
|
|
34
|
-
| review-integrator | `
|
|
41
|
+
| Agent | Output path | Messenger |
|
|
42
|
+
|-------|-------------|-----------|
|
|
43
|
+
| hypothesis-validator | `artifacts/hypothesis-validation-r{N}.yaml` | — |
|
|
44
|
+
| plan-evaluator | `artifacts/validation-turn-r{N}.yaml` | `claim` |
|
|
45
|
+
| plan-adversary | `artifacts/adversary-brief-r{N}.yaml` | `rebuttal` |
|
|
46
|
+
| sprint-contract-auditor | `artifacts/sprint-audit-r{N}.yaml` (R4) | optional |
|
|
47
|
+
| review-integrator | *(integrator draft → `harness_debate_submit_round` only)* | `integrate` (on submit) |
|
|
35
48
|
|
|
36
|
-
|
|
49
|
+
Fallback: `harness_debate_apply_lane({ lane, content, round_index? })` if auto-apply missed fenced YAML.
|
|
37
50
|
|
|
38
|
-
|
|
51
|
+
Resume after stop: `harness_debate_round_status({ round_index: N })` then run the listed `next_tool`.
|
|
39
52
|
|
|
40
|
-
|
|
53
|
+
## Messenger tools
|
|
54
|
+
|
|
55
|
+
```typescript
|
|
56
|
+
harness_messenger_post({
|
|
57
|
+
round_index: 1,
|
|
58
|
+
from: "PlanEvaluatorAgent",
|
|
59
|
+
kind: "claim",
|
|
60
|
+
body: "...",
|
|
61
|
+
claim_ids: ["c1", "c2"],
|
|
62
|
+
to: ["broadcast"],
|
|
63
|
+
})
|
|
64
|
+
harness_messenger_post({
|
|
65
|
+
round_index: 1,
|
|
66
|
+
from: "PlanAdversaryAgent",
|
|
67
|
+
kind: "rebuttal",
|
|
68
|
+
in_reply_to: ["c1"],
|
|
69
|
+
body: "...",
|
|
70
|
+
})
|
|
71
|
+
harness_messenger_read_round({ round_index: 1 }) // for next spawn prompt
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Integrator + bus
|
|
75
|
+
|
|
76
|
+
`harness_debate_submit_round` validates messenger thread + integrator rules (`review_gate_ready` false when checks fail without `disputes[]`), writes `review-round-r{N}.yaml`, emits bus `kind: round`.
|
|
77
|
+
|
|
78
|
+
`StackResearchAgent` uses `artifacts/stack.yaml` claims — no spawn.
|
|
41
79
|
|
|
42
80
|
## Close
|
|
43
81
|
|
|
44
|
-
After round 4:
|
|
82
|
+
After round 4: `harness_debate_consensus`. `approve_plan` is **hard-gated** on lane files, messenger, 4 bus rounds, and consensus not `block`.
|
|
83
|
+
|
|
84
|
+
Do not `approve_plan` on `policy_decision: block`. On `human_required` → `ask_user` first.
|
|
@@ -36,7 +36,7 @@ LIMIT 30
|
|
|
36
36
|
1. **Parallel `tasks`** — one `subagent({ tasks: [...] })` for scouts, decompose+hypothesis, or review fan-in; subprocesses run in parallel upstream.
|
|
37
37
|
2. **Blocking calls** — each `subagent` returns when the subprocess exits; no `get_subagent_result` polling.
|
|
38
38
|
3. **Compact handoffs** — pass scout/decompose JSON only; never paste full subprocess message logs into the next spawn.
|
|
39
|
-
4. **
|
|
39
|
+
4. **No spawn cap** — harness subagent spawns are unlimited per session (active count is telemetry only). Do **not** pass `timeoutMs` unless the user wants a cap — subprocesses wait for natural exit (`PI_SUBAGENT_TIMEOUT_MS` optional env backstop only).
|
|
40
40
|
|
|
41
41
|
## Command → agent
|
|
42
42
|
|
|
@@ -9,10 +9,10 @@ max_turns: 12
|
|
|
9
9
|
|
|
10
10
|
You are **plan-adversary** — break the plan with reproducible counterexamples.
|
|
11
11
|
|
|
12
|
-
Engage failed/warn checks from the same round's `plan-evaluator` first
|
|
12
|
+
Engage failed/warn checks from the same round's `plan-evaluator` first (parent provides evaluator YAML + messenger **claims**). Rebut specific `claim_ids` from the thread — parent posts your `rebuttal` with `in_reply_to`.
|
|
13
13
|
|
|
14
14
|
## Output
|
|
15
15
|
|
|
16
16
|
Valid **YAML only** — `PlanAdversaryBrief` (`.pi/harness/specs/plan-adversary-brief.schema.json`).
|
|
17
17
|
|
|
18
|
-
Bus label: `
|
|
18
|
+
Bus label: `PlanAdversaryAgent`.
|
|
@@ -15,4 +15,6 @@ Parent passes `debate_round_focus`: `spec` | `wbs` | `schedule` | `quality`.
|
|
|
15
15
|
|
|
16
16
|
Valid **YAML only** — `PlanValidationTurn` (`.pi/harness/specs/plan-validation-turn.schema.json`). Fail if `dag_validation.status === "fail"`.
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
Include `claim_ids[]` in your summary for parent to post as messenger **claims** before spawning adversary.
|
|
19
|
+
|
|
20
|
+
Bus label: `PlanEvaluatorAgent`.
|
|
@@ -18,6 +18,8 @@ Valid **YAML only** — `PlanReviewRoundDraft` (`.pi/harness/specs/plan-review-r
|
|
|
18
18
|
- `review_gate_ready` boolean
|
|
19
19
|
- `participants`, `claims`, `rebuttals`, `evidence_refs`, `token_usage`, `severity_scores`
|
|
20
20
|
|
|
21
|
-
Parent
|
|
21
|
+
Parent passes `harness_messenger_read_round` transcript + lane YAML. After your YAML draft, parent calls `harness_messenger_post` (`kind: integrate`) then `harness_debate_submit_round` — you do not write `review-round-r*.yaml`.
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
Set `review_gate_ready: false` when evaluator checks fail unless `disputes[]` documents open tension.
|
|
24
|
+
|
|
25
|
+
Bus label: `ReviewIntegratorAgent`.
|
|
@@ -1,103 +1,24 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* debate-orchestrator — headless debate bus (pi-messenger-inspired semantics).
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* - transport is extension custom entries + debate artifacts on disk
|
|
6
|
-
* - command interface is machine-friendly (`/harness-debate-*`)
|
|
7
|
-
*
|
|
8
|
-
* Protocol envelope:
|
|
9
|
-
* {
|
|
10
|
-
* protocol: "pi-debate-bus/v1",
|
|
11
|
-
* kind: "open" | "round" | "consensus" | "budget_exhausted",
|
|
12
|
-
* correlation: { run_id, debate_id, round_index?, sender },
|
|
13
|
-
* payload: { ... }
|
|
14
|
-
* }
|
|
4
|
+
* Commands mirror harness_debate_* tools; shared state lives in debate-bus-core.
|
|
15
5
|
*/
|
|
16
6
|
|
|
17
|
-
import { appendFile, mkdir, writeFile } from "node:fs/promises";
|
|
18
7
|
import { join } from "node:path";
|
|
19
8
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
20
|
-
import {
|
|
21
|
-
type DebateParticipant,
|
|
22
|
-
debatePhaseFromId,
|
|
23
|
-
isPlanDebateId,
|
|
24
|
-
PLAN_DEBATE_PARTICIPANTS,
|
|
25
|
-
POST_EXECUTE_DEBATE_PARTICIPANTS,
|
|
26
|
-
} from "../lib/debate-orchestrator-types.js";
|
|
27
9
|
import { getRunIdFromSession } from "../lib/harness-run-context.js";
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
};
|
|
41
|
-
consensus_delta: number;
|
|
42
|
-
severity_scores?: {
|
|
43
|
-
correctness: number;
|
|
44
|
-
security: number;
|
|
45
|
-
architecture: number;
|
|
46
|
-
test_integrity: number;
|
|
47
|
-
};
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
interface DebateState {
|
|
51
|
-
run_id: string;
|
|
52
|
-
debate_id: string;
|
|
53
|
-
debate_phase: DebatePhase;
|
|
54
|
-
round_count: number;
|
|
55
|
-
budget_used: number;
|
|
56
|
-
max_rounds: number;
|
|
57
|
-
round_token_cap: number;
|
|
58
|
-
debate_global_cap: number;
|
|
59
|
-
last_review_gate_ready?: boolean;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
interface BusEnvelope<T = unknown> {
|
|
63
|
-
protocol: "pi-debate-bus/v1";
|
|
64
|
-
kind: "open" | "round" | "consensus" | "budget_exhausted";
|
|
65
|
-
correlation: {
|
|
66
|
-
run_id: string;
|
|
67
|
-
debate_id: string;
|
|
68
|
-
round_index?: number;
|
|
69
|
-
sender: DebateParticipant | "system";
|
|
70
|
-
};
|
|
71
|
-
payload: T;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
const DEBATES_DIR = join(process.cwd(), ".pi", "harness", "debates");
|
|
75
|
-
const WEIGHTS = {
|
|
76
|
-
claim_quality: 0.2,
|
|
77
|
-
reproducibility: 0.4,
|
|
78
|
-
agreement: 0.4,
|
|
79
|
-
};
|
|
80
|
-
const THRESHOLDS = {
|
|
81
|
-
correctness: 0.7,
|
|
82
|
-
security: 0.7,
|
|
83
|
-
architecture: 0.8,
|
|
84
|
-
test_integrity: 0.8,
|
|
85
|
-
};
|
|
86
|
-
const HARD_STOP_DEBATE_CAPS = process.env.HARNESS_DEBATE_HARD_STOP === "true";
|
|
87
|
-
|
|
88
|
-
function nowIso(): string {
|
|
89
|
-
return new Date().toISOString();
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
function toSafeFloat(value: unknown): number {
|
|
93
|
-
const n = Number(value);
|
|
94
|
-
if (Number.isNaN(n) || !Number.isFinite(n)) return 0;
|
|
95
|
-
return Math.max(0, Math.min(1, n));
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
async function ensureDebatesDir(): Promise<void> {
|
|
99
|
-
await mkdir(DEBATES_DIR, { recursive: true });
|
|
100
|
-
}
|
|
10
|
+
import {
|
|
11
|
+
acceptDebateRound,
|
|
12
|
+
finalizeDebateConsensus,
|
|
13
|
+
openDebateBus,
|
|
14
|
+
parseRoundEnvelope,
|
|
15
|
+
} from "./lib/debate-bus-core.js";
|
|
16
|
+
import {
|
|
17
|
+
getDebateState,
|
|
18
|
+
restoreDebateStateFromEntry,
|
|
19
|
+
} from "./lib/debate-bus-state.js";
|
|
20
|
+
import { normalizePlanDebateId } from "./lib/plan-debate-id.js";
|
|
21
|
+
import { initPlanMessenger } from "./lib/plan-messenger.js";
|
|
101
22
|
|
|
102
23
|
function getRunId(ctx: {
|
|
103
24
|
sessionManager: { getEntries(): unknown[]; getSessionId(): string };
|
|
@@ -110,339 +31,11 @@ function getRunId(ctx: {
|
|
|
110
31
|
);
|
|
111
32
|
}
|
|
112
33
|
|
|
113
|
-
const PLAN_BUDGET = {
|
|
114
|
-
max_rounds: 4,
|
|
115
|
-
round_token_cap: 2000,
|
|
116
|
-
debate_global_cap: 12000,
|
|
117
|
-
} as const;
|
|
118
|
-
|
|
119
|
-
const AGGRESSIVE_BUDGET = {
|
|
120
|
-
max_rounds: 6,
|
|
121
|
-
round_token_cap: 2500,
|
|
122
|
-
debate_global_cap: 35000,
|
|
123
|
-
} as const;
|
|
124
|
-
|
|
125
|
-
function capsForDebate(debateId: string): {
|
|
126
|
-
name: "plan" | "aggressive";
|
|
127
|
-
max_rounds: number;
|
|
128
|
-
round_token_cap: number;
|
|
129
|
-
debate_global_cap: number;
|
|
130
|
-
} {
|
|
131
|
-
if (isPlanDebateId(debateId)) {
|
|
132
|
-
return { name: "plan", ...PLAN_BUDGET };
|
|
133
|
-
}
|
|
134
|
-
return { name: "aggressive", ...AGGRESSIVE_BUDGET };
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
function participantAllowed(participant: string, phase: DebatePhase): boolean {
|
|
138
|
-
if (phase === "plan") {
|
|
139
|
-
return (PLAN_DEBATE_PARTICIPANTS as readonly string[]).includes(
|
|
140
|
-
participant,
|
|
141
|
-
);
|
|
142
|
-
}
|
|
143
|
-
return (POST_EXECUTE_DEBATE_PARTICIPANTS as readonly string[]).includes(
|
|
144
|
-
participant,
|
|
145
|
-
);
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
async function writeDebateEvent(
|
|
149
|
-
debateId: string,
|
|
150
|
-
event: unknown,
|
|
151
|
-
): Promise<void> {
|
|
152
|
-
await ensureDebatesDir();
|
|
153
|
-
const path = join(DEBATES_DIR, `${debateId}.jsonl`);
|
|
154
|
-
await appendFile(path, `${JSON.stringify(event)}\n`, "utf-8");
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
function defaultSeverity(): {
|
|
158
|
-
correctness: number;
|
|
159
|
-
security: number;
|
|
160
|
-
architecture: number;
|
|
161
|
-
test_integrity: number;
|
|
162
|
-
} {
|
|
163
|
-
return { correctness: 0, security: 0, architecture: 0, test_integrity: 0 };
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
function decidePolicy(
|
|
167
|
-
severity: ReturnType<typeof defaultSeverity>,
|
|
168
|
-
minEvidenceConfidence: number,
|
|
169
|
-
): PolicyDecision {
|
|
170
|
-
if (
|
|
171
|
-
severity.security >= THRESHOLDS.security ||
|
|
172
|
-
severity.correctness >= THRESHOLDS.correctness ||
|
|
173
|
-
severity.architecture >= THRESHOLDS.architecture ||
|
|
174
|
-
severity.test_integrity >= THRESHOLDS.test_integrity
|
|
175
|
-
) {
|
|
176
|
-
return "block";
|
|
177
|
-
}
|
|
178
|
-
if (minEvidenceConfidence < 0.55) return "human_required";
|
|
179
|
-
if (minEvidenceConfidence < 0.75) return "conditional_pass";
|
|
180
|
-
return "pass";
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
function parseEnvelope(raw: string): BusEnvelope<RoundPayload> | null {
|
|
184
|
-
try {
|
|
185
|
-
const parsed = JSON.parse(raw) as BusEnvelope<RoundPayload>;
|
|
186
|
-
if (parsed?.protocol !== "pi-debate-bus/v1") return null;
|
|
187
|
-
if (parsed?.kind !== "round") return null;
|
|
188
|
-
return parsed;
|
|
189
|
-
} catch {
|
|
190
|
-
return null;
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
|
|
194
34
|
export default function debateOrchestrator(pi: ExtensionAPI) {
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
const caps = capsForDebate(debateId);
|
|
200
|
-
const debate_phase = debatePhaseFromId(debateId);
|
|
201
|
-
state = {
|
|
202
|
-
run_id: runId,
|
|
203
|
-
debate_id: debateId,
|
|
204
|
-
debate_phase,
|
|
205
|
-
round_count: 0,
|
|
206
|
-
budget_used: 0,
|
|
207
|
-
max_rounds: caps.max_rounds,
|
|
208
|
-
round_token_cap: caps.round_token_cap,
|
|
209
|
-
debate_global_cap: caps.debate_global_cap,
|
|
210
|
-
last_review_gate_ready: false,
|
|
211
|
-
};
|
|
212
|
-
pi.appendEntry("harness-debate-state", state);
|
|
213
|
-
const envelope: BusEnvelope = {
|
|
214
|
-
protocol: "pi-debate-bus/v1",
|
|
215
|
-
kind: "open",
|
|
216
|
-
correlation: {
|
|
217
|
-
run_id: runId,
|
|
218
|
-
debate_id: debateId,
|
|
219
|
-
sender: "system",
|
|
220
|
-
},
|
|
221
|
-
payload: {
|
|
222
|
-
opened_at: nowIso(),
|
|
223
|
-
debate_phase,
|
|
224
|
-
budget_profile: caps.name,
|
|
225
|
-
},
|
|
226
|
-
};
|
|
227
|
-
pi.appendEntry("harness-debate-envelope", envelope);
|
|
228
|
-
await writeDebateEvent(debateId, envelope);
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
async function emitBudgetExhausted(reason: string): Promise<void> {
|
|
232
|
-
if (!state) return;
|
|
233
|
-
const envelope: BusEnvelope = {
|
|
234
|
-
protocol: "pi-debate-bus/v1",
|
|
235
|
-
kind: "budget_exhausted",
|
|
236
|
-
correlation: {
|
|
237
|
-
run_id: state.run_id,
|
|
238
|
-
debate_id: state.debate_id,
|
|
239
|
-
round_index: state.round_count,
|
|
240
|
-
sender: "system",
|
|
241
|
-
},
|
|
242
|
-
payload: {
|
|
243
|
-
schema_version: "1.0.0",
|
|
244
|
-
contract_version: "1.0.0",
|
|
245
|
-
event_type: "budget_exhausted",
|
|
246
|
-
run_id: state.run_id,
|
|
247
|
-
debate_id: state.debate_id,
|
|
248
|
-
round_count: state.round_count,
|
|
249
|
-
budget_used: state.budget_used,
|
|
250
|
-
exhaustion_reason: reason,
|
|
251
|
-
caps: {
|
|
252
|
-
max_rounds: state.max_rounds,
|
|
253
|
-
round_token_cap: state.round_token_cap,
|
|
254
|
-
debate_global_cap: state.debate_global_cap,
|
|
255
|
-
},
|
|
256
|
-
minimum_evidence_confidence: 0.6,
|
|
257
|
-
default_policy_outcome: "block",
|
|
258
|
-
human_override_allowed: true,
|
|
259
|
-
},
|
|
260
|
-
};
|
|
261
|
-
pi.appendEntry("harness-debate-envelope", envelope);
|
|
262
|
-
pi.appendEntry("harness-budget-exhausted", envelope.payload);
|
|
263
|
-
await writeDebateEvent(state.debate_id, envelope);
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
async function acceptRound(envelope: BusEnvelope<RoundPayload>): Promise<{
|
|
267
|
-
ok: boolean;
|
|
268
|
-
reason?: string;
|
|
269
|
-
}> {
|
|
270
|
-
if (!state) return { ok: false, reason: "no active debate" };
|
|
271
|
-
if (state.debate_id !== envelope.correlation.debate_id) {
|
|
272
|
-
return { ok: false, reason: "debate id mismatch" };
|
|
273
|
-
}
|
|
274
|
-
|
|
275
|
-
for (const p of envelope.payload.participants ?? []) {
|
|
276
|
-
if (!participantAllowed(p, state.debate_phase)) {
|
|
277
|
-
return {
|
|
278
|
-
ok: false,
|
|
279
|
-
reason: `participant ${p} invalid for debate_phase=${state.debate_phase}`,
|
|
280
|
-
};
|
|
281
|
-
}
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
const nextRound = state.round_count + 1;
|
|
285
|
-
if (nextRound > state.max_rounds) {
|
|
286
|
-
await emitBudgetExhausted("max_rounds_reached");
|
|
287
|
-
if (HARD_STOP_DEBATE_CAPS) {
|
|
288
|
-
return { ok: false, reason: "max rounds reached" };
|
|
289
|
-
}
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
const perAgent = envelope.payload.token_usage?.per_agent ?? {};
|
|
293
|
-
for (const [agent, tokens] of Object.entries(perAgent)) {
|
|
294
|
-
if (Number(tokens) > state.round_token_cap) {
|
|
295
|
-
await emitBudgetExhausted("round_token_cap_exceeded");
|
|
296
|
-
if (HARD_STOP_DEBATE_CAPS) {
|
|
297
|
-
return { ok: false, reason: `round cap exceeded by ${agent}` };
|
|
298
|
-
}
|
|
299
|
-
}
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
const roundTotal = Number(envelope.payload.token_usage?.round_total ?? 0);
|
|
303
|
-
if (state.budget_used + roundTotal > state.debate_global_cap) {
|
|
304
|
-
await emitBudgetExhausted("debate_global_cap_exceeded");
|
|
305
|
-
if (HARD_STOP_DEBATE_CAPS) {
|
|
306
|
-
return { ok: false, reason: "global cap exceeded" };
|
|
307
|
-
}
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
state.round_count = nextRound;
|
|
311
|
-
state.budget_used += roundTotal;
|
|
312
|
-
pi.appendEntry("harness-debate-state", state);
|
|
313
|
-
|
|
314
|
-
if (envelope.payload.severity_scores) {
|
|
315
|
-
lastSeverity = {
|
|
316
|
-
correctness: toSafeFloat(envelope.payload.severity_scores.correctness),
|
|
317
|
-
security: toSafeFloat(envelope.payload.severity_scores.security),
|
|
318
|
-
architecture: toSafeFloat(
|
|
319
|
-
envelope.payload.severity_scores.architecture,
|
|
320
|
-
),
|
|
321
|
-
test_integrity: toSafeFloat(
|
|
322
|
-
envelope.payload.severity_scores.test_integrity,
|
|
323
|
-
),
|
|
324
|
-
};
|
|
325
|
-
}
|
|
326
|
-
|
|
327
|
-
const profileName =
|
|
328
|
-
state.debate_phase === "plan"
|
|
329
|
-
? ("plan" as const)
|
|
330
|
-
: ("aggressive" as const);
|
|
331
|
-
|
|
332
|
-
const roundRecord = {
|
|
333
|
-
schema_version: "1.0.0",
|
|
334
|
-
contract_version: "1.0.0",
|
|
335
|
-
run_id: state.run_id,
|
|
336
|
-
debate_id: state.debate_id,
|
|
337
|
-
round_index: state.round_count,
|
|
338
|
-
participants: envelope.payload.participants,
|
|
339
|
-
claims: envelope.payload.claims,
|
|
340
|
-
rebuttals: envelope.payload.rebuttals,
|
|
341
|
-
evidence_refs: envelope.payload.evidence_refs,
|
|
342
|
-
token_usage: envelope.payload.token_usage,
|
|
343
|
-
budget_profile: {
|
|
344
|
-
name: profileName,
|
|
345
|
-
max_rounds: state.max_rounds,
|
|
346
|
-
round_token_cap: state.round_token_cap,
|
|
347
|
-
debate_global_cap: state.debate_global_cap,
|
|
348
|
-
},
|
|
349
|
-
consensus_delta: Number(envelope.payload.consensus_delta ?? 0),
|
|
350
|
-
};
|
|
351
|
-
pi.appendEntry("harness-round-result", roundRecord);
|
|
352
|
-
pi.appendEntry("harness-debate-envelope", envelope);
|
|
353
|
-
await writeDebateEvent(state.debate_id, envelope);
|
|
354
|
-
return { ok: true };
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
async function finalizeConsensus(
|
|
358
|
-
rationale: string,
|
|
359
|
-
): Promise<PolicyDecision | null> {
|
|
360
|
-
if (!state) return null;
|
|
361
|
-
const evidenceScore = Math.max(
|
|
362
|
-
0,
|
|
363
|
-
Math.min(
|
|
364
|
-
1,
|
|
365
|
-
lastSeverity.correctness * WEIGHTS.claim_quality +
|
|
366
|
-
(1 - Math.max(lastSeverity.security, lastSeverity.test_integrity)) *
|
|
367
|
-
WEIGHTS.reproducibility +
|
|
368
|
-
Math.max(
|
|
369
|
-
0,
|
|
370
|
-
1 - Math.abs(lastSeverity.architecture - lastSeverity.correctness),
|
|
371
|
-
) *
|
|
372
|
-
WEIGHTS.agreement,
|
|
373
|
-
),
|
|
374
|
-
);
|
|
375
|
-
const decision = decidePolicy(lastSeverity, evidenceScore);
|
|
376
|
-
const planPhase = state.debate_phase === "plan";
|
|
377
|
-
const evaluatorPassed = planPhase
|
|
378
|
-
? Boolean(state.last_review_gate_ready)
|
|
379
|
-
: true;
|
|
380
|
-
const debateComplete = planPhase
|
|
381
|
-
? state.round_count >= state.max_rounds
|
|
382
|
-
: state.round_count > 0;
|
|
383
|
-
|
|
384
|
-
const consensus = {
|
|
385
|
-
schema_version: "1.0.0",
|
|
386
|
-
contract_version: "1.0.0",
|
|
387
|
-
run_id: state.run_id,
|
|
388
|
-
debate_id: state.debate_id,
|
|
389
|
-
debate_phase: state.debate_phase,
|
|
390
|
-
round_count: state.round_count,
|
|
391
|
-
budget_used: state.budget_used,
|
|
392
|
-
severity_scores: lastSeverity,
|
|
393
|
-
severity_thresholds: {
|
|
394
|
-
correctness_block_at: THRESHOLDS.correctness,
|
|
395
|
-
security_block_at: THRESHOLDS.security,
|
|
396
|
-
architecture_block_at: THRESHOLDS.architecture,
|
|
397
|
-
test_integrity_block_at: THRESHOLDS.test_integrity,
|
|
398
|
-
},
|
|
399
|
-
confidence_weights: WEIGHTS,
|
|
400
|
-
evidence_refs: [],
|
|
401
|
-
strict_gate_prerequisites: planPhase
|
|
402
|
-
? {
|
|
403
|
-
plan_gate_passed: false,
|
|
404
|
-
execution_completed: false,
|
|
405
|
-
evaluator_passed: evaluatorPassed,
|
|
406
|
-
adversarial_debate_completed: debateComplete,
|
|
407
|
-
severity_policy_ok: decision !== "block",
|
|
408
|
-
benchmark_delta_checks_passed: false,
|
|
409
|
-
rollback_artifacts_generated: false,
|
|
410
|
-
}
|
|
411
|
-
: {
|
|
412
|
-
plan_gate_passed: true,
|
|
413
|
-
execution_completed: true,
|
|
414
|
-
evaluator_passed: true,
|
|
415
|
-
adversarial_debate_completed: debateComplete,
|
|
416
|
-
severity_policy_ok: decision !== "block",
|
|
417
|
-
benchmark_delta_checks_passed: false,
|
|
418
|
-
rollback_artifacts_generated: false,
|
|
419
|
-
},
|
|
420
|
-
policy_decision: decision,
|
|
421
|
-
rationale,
|
|
422
|
-
};
|
|
423
|
-
|
|
424
|
-
const envelope: BusEnvelope = {
|
|
425
|
-
protocol: "pi-debate-bus/v1",
|
|
426
|
-
kind: "consensus",
|
|
427
|
-
correlation: {
|
|
428
|
-
run_id: state.run_id,
|
|
429
|
-
debate_id: state.debate_id,
|
|
430
|
-
round_index: state.round_count,
|
|
431
|
-
sender: "system",
|
|
432
|
-
},
|
|
433
|
-
payload: consensus,
|
|
434
|
-
};
|
|
435
|
-
|
|
436
|
-
await writeFile(
|
|
437
|
-
join(DEBATES_DIR, `${state.debate_id}.consensus.json`),
|
|
438
|
-
`${JSON.stringify(consensus, null, 2)}\n`,
|
|
439
|
-
"utf-8",
|
|
440
|
-
);
|
|
441
|
-
pi.appendEntry("harness-consensus-packet", consensus);
|
|
442
|
-
pi.appendEntry("harness-debate-envelope", envelope);
|
|
443
|
-
await writeDebateEvent(state.debate_id, envelope);
|
|
444
|
-
return decision;
|
|
445
|
-
}
|
|
35
|
+
const hooks = {
|
|
36
|
+
appendEntry: (customType: string, data: unknown) =>
|
|
37
|
+
pi.appendEntry(customType, data),
|
|
38
|
+
};
|
|
446
39
|
|
|
447
40
|
pi.on("session_start", async (_event, ctx) => {
|
|
448
41
|
const entries = ctx.sessionManager.getEntries();
|
|
@@ -452,7 +45,7 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
|
|
|
452
45
|
entry.type === "custom" &&
|
|
453
46
|
entry.customType === "harness-debate-state"
|
|
454
47
|
) {
|
|
455
|
-
|
|
48
|
+
restoreDebateStateFromEntry(entry.data);
|
|
456
49
|
break;
|
|
457
50
|
}
|
|
458
51
|
}
|
|
@@ -461,13 +54,21 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
|
|
|
461
54
|
pi.registerCommand("harness-debate-open", {
|
|
462
55
|
description: "Open a headless debate session",
|
|
463
56
|
handler: async (args, ctx) => {
|
|
57
|
+
const runId = getRunId(ctx);
|
|
464
58
|
const trimmed = args.trim();
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
59
|
+
const { debateId, warning } = normalizePlanDebateId(trimmed, runId);
|
|
60
|
+
await openDebateBus(runId, debateId, hooks);
|
|
61
|
+
if (debateId.startsWith("plan-")) {
|
|
62
|
+
await initPlanMessenger(
|
|
63
|
+
join(process.cwd(), ".pi", "harness", "runs", runId),
|
|
64
|
+
{ runId, debateId },
|
|
65
|
+
);
|
|
66
|
+
}
|
|
468
67
|
pi.sendMessage({
|
|
469
68
|
customType: "harness-debate-opened",
|
|
470
|
-
content:
|
|
69
|
+
content: warning
|
|
70
|
+
? `Debate opened: ${debateId} (${warning})`
|
|
71
|
+
: `Debate opened: ${debateId}`,
|
|
471
72
|
display: false,
|
|
472
73
|
});
|
|
473
74
|
},
|
|
@@ -476,10 +77,12 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
|
|
|
476
77
|
pi.registerCommand("harness-debate-round", {
|
|
477
78
|
description: "Submit a debate round envelope JSON",
|
|
478
79
|
handler: async (args, ctx) => {
|
|
479
|
-
if (!
|
|
480
|
-
|
|
80
|
+
if (!getDebateState()) {
|
|
81
|
+
const runId = getRunId(ctx);
|
|
82
|
+
const { debateId } = normalizePlanDebateId("", runId);
|
|
83
|
+
await openDebateBus(runId, debateId, hooks);
|
|
481
84
|
}
|
|
482
|
-
const envelope =
|
|
85
|
+
const envelope = parseRoundEnvelope(args.trim());
|
|
483
86
|
if (!envelope) {
|
|
484
87
|
pi.sendMessage({
|
|
485
88
|
customType: "harness-debate-round-error",
|
|
@@ -489,7 +92,7 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
|
|
|
489
92
|
});
|
|
490
93
|
return;
|
|
491
94
|
}
|
|
492
|
-
const result = await
|
|
95
|
+
const result = await acceptDebateRound(envelope, hooks);
|
|
493
96
|
if (!result.ok) {
|
|
494
97
|
pi.sendMessage({
|
|
495
98
|
customType: "harness-debate-round-rejected",
|
|
@@ -503,7 +106,7 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
|
|
|
503
106
|
pi.registerCommand("harness-debate-consensus", {
|
|
504
107
|
description: "Finalize debate and emit consensus packet",
|
|
505
108
|
handler: async (args) => {
|
|
506
|
-
if (!
|
|
109
|
+
if (!getDebateState()) {
|
|
507
110
|
pi.sendMessage({
|
|
508
111
|
customType: "harness-debate-consensus-error",
|
|
509
112
|
content: "No active debate to finalize.",
|
|
@@ -511,8 +114,9 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
|
|
|
511
114
|
});
|
|
512
115
|
return;
|
|
513
116
|
}
|
|
514
|
-
const decision = await
|
|
117
|
+
const decision = await finalizeDebateConsensus(
|
|
515
118
|
args.trim() || "Consensus generated by debate-orchestrator.",
|
|
119
|
+
hooks,
|
|
516
120
|
);
|
|
517
121
|
pi.sendMessage({
|
|
518
122
|
customType: "harness-debate-consensus",
|