ultimate-pi 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/{.pi → .agents}/skills/ccc/SKILL.md +1 -7
  2. package/.agents/skills/ccc/references/settings.md +126 -0
  3. package/.agents/skills/harness-debate-plan/SKILL.md +61 -21
  4. package/.agents/skills/harness-orchestration/SKILL.md +1 -1
  5. package/.pi/agents/harness/planning/plan-adversary.md +2 -2
  6. package/.pi/agents/harness/planning/plan-evaluator.md +3 -1
  7. package/.pi/agents/harness/planning/review-integrator.md +4 -2
  8. package/.pi/extensions/debate-orchestrator.ts +39 -435
  9. package/.pi/extensions/harness-debate-tools.ts +519 -0
  10. package/.pi/extensions/harness-plan-approval.ts +41 -17
  11. package/.pi/extensions/harness-run-context.ts +18 -0
  12. package/.pi/extensions/lib/debate-bus-core.ts +434 -0
  13. package/.pi/extensions/lib/debate-bus-state.ts +58 -0
  14. package/.pi/extensions/lib/harness-spawn-budget.ts +5 -25
  15. package/.pi/extensions/lib/plan-approval/dialog.ts +33 -272
  16. package/.pi/extensions/lib/plan-approval/format-plan.ts +12 -85
  17. package/.pi/extensions/lib/plan-approval/plan-review.ts +6 -6
  18. package/.pi/extensions/lib/plan-approval/render.ts +6 -0
  19. package/.pi/extensions/lib/plan-approval/validate.ts +1 -1
  20. package/.pi/extensions/lib/plan-debate-envelope.ts +2 -0
  21. package/.pi/extensions/lib/plan-debate-gate.ts +155 -0
  22. package/.pi/extensions/lib/plan-debate-id.ts +39 -0
  23. package/.pi/extensions/lib/plan-debate-lane.ts +220 -0
  24. package/.pi/extensions/lib/plan-debate-round-status.ts +94 -0
  25. package/.pi/extensions/lib/plan-debate-write-guard.ts +20 -0
  26. package/.pi/extensions/lib/plan-messenger.ts +276 -0
  27. package/.pi/extensions/lib/plan-review-integrator-rules.ts +119 -0
  28. package/.pi/extensions/lib/plan-scope-guard.ts +89 -0
  29. package/.pi/harness/agents.manifest.json +7 -7
  30. package/.pi/prompts/harness-plan.md +22 -12
  31. package/CHANGELOG.md +18 -0
  32. package/THIRD_PARTY_NOTICES.md +1 -1
  33. package/package.json +3 -3
  34. package/.agents/skills/ck-search/SKILL.md +0 -23
  35. package/.agents/skills/cocoindex-search/SKILL.md +0 -35
  36. package/.agents/skills/obsidian-bases/SKILL.md +0 -299
  37. package/.agents/skills/obsidian-markdown/SKILL.md +0 -237
  38. package/.pi/extensions/lib/plan-approval/fallback.ts +0 -50
  39. /package/{.pi → .agents}/skills/ccc/references/management.md +0 -0
@@ -1,103 +1,24 @@
1
1
  /**
2
2
  * debate-orchestrator — headless debate bus (pi-messenger-inspired semantics).
3
3
  *
4
- * No additional UI surface:
5
- * - transport is extension custom entries + debate artifacts on disk
6
- * - command interface is machine-friendly (`/harness-debate-*`)
7
- *
8
- * Protocol envelope:
9
- * {
10
- * protocol: "pi-debate-bus/v1",
11
- * kind: "open" | "round" | "consensus" | "budget_exhausted",
12
- * correlation: { run_id, debate_id, round_index?, sender },
13
- * payload: { ... }
14
- * }
4
+ * Commands mirror harness_debate_* tools; shared state lives in debate-bus-core.
15
5
  */
16
6
 
17
- import { appendFile, mkdir, writeFile } from "node:fs/promises";
18
7
  import { join } from "node:path";
19
8
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
20
- import {
21
- type DebateParticipant,
22
- debatePhaseFromId,
23
- isPlanDebateId,
24
- PLAN_DEBATE_PARTICIPANTS,
25
- POST_EXECUTE_DEBATE_PARTICIPANTS,
26
- } from "../lib/debate-orchestrator-types.js";
27
9
  import { getRunIdFromSession } from "../lib/harness-run-context.js";
28
-
29
- type PolicyDecision = "pass" | "conditional_pass" | "block" | "human_required";
30
- type DebatePhase = "plan" | "post_execute";
31
-
32
- interface RoundPayload {
33
- participants: DebateParticipant[];
34
- claims: string[];
35
- rebuttals: string[];
36
- evidence_refs: string[];
37
- token_usage: {
38
- per_agent: Record<string, number>;
39
- round_total: number;
40
- };
41
- consensus_delta: number;
42
- severity_scores?: {
43
- correctness: number;
44
- security: number;
45
- architecture: number;
46
- test_integrity: number;
47
- };
48
- }
49
-
50
- interface DebateState {
51
- run_id: string;
52
- debate_id: string;
53
- debate_phase: DebatePhase;
54
- round_count: number;
55
- budget_used: number;
56
- max_rounds: number;
57
- round_token_cap: number;
58
- debate_global_cap: number;
59
- last_review_gate_ready?: boolean;
60
- }
61
-
62
- interface BusEnvelope<T = unknown> {
63
- protocol: "pi-debate-bus/v1";
64
- kind: "open" | "round" | "consensus" | "budget_exhausted";
65
- correlation: {
66
- run_id: string;
67
- debate_id: string;
68
- round_index?: number;
69
- sender: DebateParticipant | "system";
70
- };
71
- payload: T;
72
- }
73
-
74
- const DEBATES_DIR = join(process.cwd(), ".pi", "harness", "debates");
75
- const WEIGHTS = {
76
- claim_quality: 0.2,
77
- reproducibility: 0.4,
78
- agreement: 0.4,
79
- };
80
- const THRESHOLDS = {
81
- correctness: 0.7,
82
- security: 0.7,
83
- architecture: 0.8,
84
- test_integrity: 0.8,
85
- };
86
- const HARD_STOP_DEBATE_CAPS = process.env.HARNESS_DEBATE_HARD_STOP === "true";
87
-
88
- function nowIso(): string {
89
- return new Date().toISOString();
90
- }
91
-
92
- function toSafeFloat(value: unknown): number {
93
- const n = Number(value);
94
- if (Number.isNaN(n) || !Number.isFinite(n)) return 0;
95
- return Math.max(0, Math.min(1, n));
96
- }
97
-
98
- async function ensureDebatesDir(): Promise<void> {
99
- await mkdir(DEBATES_DIR, { recursive: true });
100
- }
10
+ import {
11
+ acceptDebateRound,
12
+ finalizeDebateConsensus,
13
+ openDebateBus,
14
+ parseRoundEnvelope,
15
+ } from "./lib/debate-bus-core.js";
16
+ import {
17
+ getDebateState,
18
+ restoreDebateStateFromEntry,
19
+ } from "./lib/debate-bus-state.js";
20
+ import { normalizePlanDebateId } from "./lib/plan-debate-id.js";
21
+ import { initPlanMessenger } from "./lib/plan-messenger.js";
101
22
 
102
23
  function getRunId(ctx: {
103
24
  sessionManager: { getEntries(): unknown[]; getSessionId(): string };
@@ -110,339 +31,11 @@ function getRunId(ctx: {
110
31
  );
111
32
  }
112
33
 
113
- const PLAN_BUDGET = {
114
- max_rounds: 4,
115
- round_token_cap: 2000,
116
- debate_global_cap: 12000,
117
- } as const;
118
-
119
- const AGGRESSIVE_BUDGET = {
120
- max_rounds: 6,
121
- round_token_cap: 2500,
122
- debate_global_cap: 35000,
123
- } as const;
124
-
125
- function capsForDebate(debateId: string): {
126
- name: "plan" | "aggressive";
127
- max_rounds: number;
128
- round_token_cap: number;
129
- debate_global_cap: number;
130
- } {
131
- if (isPlanDebateId(debateId)) {
132
- return { name: "plan", ...PLAN_BUDGET };
133
- }
134
- return { name: "aggressive", ...AGGRESSIVE_BUDGET };
135
- }
136
-
137
- function participantAllowed(participant: string, phase: DebatePhase): boolean {
138
- if (phase === "plan") {
139
- return (PLAN_DEBATE_PARTICIPANTS as readonly string[]).includes(
140
- participant,
141
- );
142
- }
143
- return (POST_EXECUTE_DEBATE_PARTICIPANTS as readonly string[]).includes(
144
- participant,
145
- );
146
- }
147
-
148
- async function writeDebateEvent(
149
- debateId: string,
150
- event: unknown,
151
- ): Promise<void> {
152
- await ensureDebatesDir();
153
- const path = join(DEBATES_DIR, `${debateId}.jsonl`);
154
- await appendFile(path, `${JSON.stringify(event)}\n`, "utf-8");
155
- }
156
-
157
- function defaultSeverity(): {
158
- correctness: number;
159
- security: number;
160
- architecture: number;
161
- test_integrity: number;
162
- } {
163
- return { correctness: 0, security: 0, architecture: 0, test_integrity: 0 };
164
- }
165
-
166
- function decidePolicy(
167
- severity: ReturnType<typeof defaultSeverity>,
168
- minEvidenceConfidence: number,
169
- ): PolicyDecision {
170
- if (
171
- severity.security >= THRESHOLDS.security ||
172
- severity.correctness >= THRESHOLDS.correctness ||
173
- severity.architecture >= THRESHOLDS.architecture ||
174
- severity.test_integrity >= THRESHOLDS.test_integrity
175
- ) {
176
- return "block";
177
- }
178
- if (minEvidenceConfidence < 0.55) return "human_required";
179
- if (minEvidenceConfidence < 0.75) return "conditional_pass";
180
- return "pass";
181
- }
182
-
183
- function parseEnvelope(raw: string): BusEnvelope<RoundPayload> | null {
184
- try {
185
- const parsed = JSON.parse(raw) as BusEnvelope<RoundPayload>;
186
- if (parsed?.protocol !== "pi-debate-bus/v1") return null;
187
- if (parsed?.kind !== "round") return null;
188
- return parsed;
189
- } catch {
190
- return null;
191
- }
192
- }
193
-
194
34
  export default function debateOrchestrator(pi: ExtensionAPI) {
195
- let state: DebateState | null = null;
196
- let lastSeverity = defaultSeverity();
197
-
198
- async function openDebate(runId: string, debateId: string): Promise<void> {
199
- const caps = capsForDebate(debateId);
200
- const debate_phase = debatePhaseFromId(debateId);
201
- state = {
202
- run_id: runId,
203
- debate_id: debateId,
204
- debate_phase,
205
- round_count: 0,
206
- budget_used: 0,
207
- max_rounds: caps.max_rounds,
208
- round_token_cap: caps.round_token_cap,
209
- debate_global_cap: caps.debate_global_cap,
210
- last_review_gate_ready: false,
211
- };
212
- pi.appendEntry("harness-debate-state", state);
213
- const envelope: BusEnvelope = {
214
- protocol: "pi-debate-bus/v1",
215
- kind: "open",
216
- correlation: {
217
- run_id: runId,
218
- debate_id: debateId,
219
- sender: "system",
220
- },
221
- payload: {
222
- opened_at: nowIso(),
223
- debate_phase,
224
- budget_profile: caps.name,
225
- },
226
- };
227
- pi.appendEntry("harness-debate-envelope", envelope);
228
- await writeDebateEvent(debateId, envelope);
229
- }
230
-
231
- async function emitBudgetExhausted(reason: string): Promise<void> {
232
- if (!state) return;
233
- const envelope: BusEnvelope = {
234
- protocol: "pi-debate-bus/v1",
235
- kind: "budget_exhausted",
236
- correlation: {
237
- run_id: state.run_id,
238
- debate_id: state.debate_id,
239
- round_index: state.round_count,
240
- sender: "system",
241
- },
242
- payload: {
243
- schema_version: "1.0.0",
244
- contract_version: "1.0.0",
245
- event_type: "budget_exhausted",
246
- run_id: state.run_id,
247
- debate_id: state.debate_id,
248
- round_count: state.round_count,
249
- budget_used: state.budget_used,
250
- exhaustion_reason: reason,
251
- caps: {
252
- max_rounds: state.max_rounds,
253
- round_token_cap: state.round_token_cap,
254
- debate_global_cap: state.debate_global_cap,
255
- },
256
- minimum_evidence_confidence: 0.6,
257
- default_policy_outcome: "block",
258
- human_override_allowed: true,
259
- },
260
- };
261
- pi.appendEntry("harness-debate-envelope", envelope);
262
- pi.appendEntry("harness-budget-exhausted", envelope.payload);
263
- await writeDebateEvent(state.debate_id, envelope);
264
- }
265
-
266
- async function acceptRound(envelope: BusEnvelope<RoundPayload>): Promise<{
267
- ok: boolean;
268
- reason?: string;
269
- }> {
270
- if (!state) return { ok: false, reason: "no active debate" };
271
- if (state.debate_id !== envelope.correlation.debate_id) {
272
- return { ok: false, reason: "debate id mismatch" };
273
- }
274
-
275
- for (const p of envelope.payload.participants ?? []) {
276
- if (!participantAllowed(p, state.debate_phase)) {
277
- return {
278
- ok: false,
279
- reason: `participant ${p} invalid for debate_phase=${state.debate_phase}`,
280
- };
281
- }
282
- }
283
-
284
- const nextRound = state.round_count + 1;
285
- if (nextRound > state.max_rounds) {
286
- await emitBudgetExhausted("max_rounds_reached");
287
- if (HARD_STOP_DEBATE_CAPS) {
288
- return { ok: false, reason: "max rounds reached" };
289
- }
290
- }
291
-
292
- const perAgent = envelope.payload.token_usage?.per_agent ?? {};
293
- for (const [agent, tokens] of Object.entries(perAgent)) {
294
- if (Number(tokens) > state.round_token_cap) {
295
- await emitBudgetExhausted("round_token_cap_exceeded");
296
- if (HARD_STOP_DEBATE_CAPS) {
297
- return { ok: false, reason: `round cap exceeded by ${agent}` };
298
- }
299
- }
300
- }
301
-
302
- const roundTotal = Number(envelope.payload.token_usage?.round_total ?? 0);
303
- if (state.budget_used + roundTotal > state.debate_global_cap) {
304
- await emitBudgetExhausted("debate_global_cap_exceeded");
305
- if (HARD_STOP_DEBATE_CAPS) {
306
- return { ok: false, reason: "global cap exceeded" };
307
- }
308
- }
309
-
310
- state.round_count = nextRound;
311
- state.budget_used += roundTotal;
312
- pi.appendEntry("harness-debate-state", state);
313
-
314
- if (envelope.payload.severity_scores) {
315
- lastSeverity = {
316
- correctness: toSafeFloat(envelope.payload.severity_scores.correctness),
317
- security: toSafeFloat(envelope.payload.severity_scores.security),
318
- architecture: toSafeFloat(
319
- envelope.payload.severity_scores.architecture,
320
- ),
321
- test_integrity: toSafeFloat(
322
- envelope.payload.severity_scores.test_integrity,
323
- ),
324
- };
325
- }
326
-
327
- const profileName =
328
- state.debate_phase === "plan"
329
- ? ("plan" as const)
330
- : ("aggressive" as const);
331
-
332
- const roundRecord = {
333
- schema_version: "1.0.0",
334
- contract_version: "1.0.0",
335
- run_id: state.run_id,
336
- debate_id: state.debate_id,
337
- round_index: state.round_count,
338
- participants: envelope.payload.participants,
339
- claims: envelope.payload.claims,
340
- rebuttals: envelope.payload.rebuttals,
341
- evidence_refs: envelope.payload.evidence_refs,
342
- token_usage: envelope.payload.token_usage,
343
- budget_profile: {
344
- name: profileName,
345
- max_rounds: state.max_rounds,
346
- round_token_cap: state.round_token_cap,
347
- debate_global_cap: state.debate_global_cap,
348
- },
349
- consensus_delta: Number(envelope.payload.consensus_delta ?? 0),
350
- };
351
- pi.appendEntry("harness-round-result", roundRecord);
352
- pi.appendEntry("harness-debate-envelope", envelope);
353
- await writeDebateEvent(state.debate_id, envelope);
354
- return { ok: true };
355
- }
356
-
357
- async function finalizeConsensus(
358
- rationale: string,
359
- ): Promise<PolicyDecision | null> {
360
- if (!state) return null;
361
- const evidenceScore = Math.max(
362
- 0,
363
- Math.min(
364
- 1,
365
- lastSeverity.correctness * WEIGHTS.claim_quality +
366
- (1 - Math.max(lastSeverity.security, lastSeverity.test_integrity)) *
367
- WEIGHTS.reproducibility +
368
- Math.max(
369
- 0,
370
- 1 - Math.abs(lastSeverity.architecture - lastSeverity.correctness),
371
- ) *
372
- WEIGHTS.agreement,
373
- ),
374
- );
375
- const decision = decidePolicy(lastSeverity, evidenceScore);
376
- const planPhase = state.debate_phase === "plan";
377
- const evaluatorPassed = planPhase
378
- ? Boolean(state.last_review_gate_ready)
379
- : true;
380
- const debateComplete = planPhase
381
- ? state.round_count >= state.max_rounds
382
- : state.round_count > 0;
383
-
384
- const consensus = {
385
- schema_version: "1.0.0",
386
- contract_version: "1.0.0",
387
- run_id: state.run_id,
388
- debate_id: state.debate_id,
389
- debate_phase: state.debate_phase,
390
- round_count: state.round_count,
391
- budget_used: state.budget_used,
392
- severity_scores: lastSeverity,
393
- severity_thresholds: {
394
- correctness_block_at: THRESHOLDS.correctness,
395
- security_block_at: THRESHOLDS.security,
396
- architecture_block_at: THRESHOLDS.architecture,
397
- test_integrity_block_at: THRESHOLDS.test_integrity,
398
- },
399
- confidence_weights: WEIGHTS,
400
- evidence_refs: [],
401
- strict_gate_prerequisites: planPhase
402
- ? {
403
- plan_gate_passed: false,
404
- execution_completed: false,
405
- evaluator_passed: evaluatorPassed,
406
- adversarial_debate_completed: debateComplete,
407
- severity_policy_ok: decision !== "block",
408
- benchmark_delta_checks_passed: false,
409
- rollback_artifacts_generated: false,
410
- }
411
- : {
412
- plan_gate_passed: true,
413
- execution_completed: true,
414
- evaluator_passed: true,
415
- adversarial_debate_completed: debateComplete,
416
- severity_policy_ok: decision !== "block",
417
- benchmark_delta_checks_passed: false,
418
- rollback_artifacts_generated: false,
419
- },
420
- policy_decision: decision,
421
- rationale,
422
- };
423
-
424
- const envelope: BusEnvelope = {
425
- protocol: "pi-debate-bus/v1",
426
- kind: "consensus",
427
- correlation: {
428
- run_id: state.run_id,
429
- debate_id: state.debate_id,
430
- round_index: state.round_count,
431
- sender: "system",
432
- },
433
- payload: consensus,
434
- };
435
-
436
- await writeFile(
437
- join(DEBATES_DIR, `${state.debate_id}.consensus.json`),
438
- `${JSON.stringify(consensus, null, 2)}\n`,
439
- "utf-8",
440
- );
441
- pi.appendEntry("harness-consensus-packet", consensus);
442
- pi.appendEntry("harness-debate-envelope", envelope);
443
- await writeDebateEvent(state.debate_id, envelope);
444
- return decision;
445
- }
35
+ const hooks = {
36
+ appendEntry: (customType: string, data: unknown) =>
37
+ pi.appendEntry(customType, data),
38
+ };
446
39
 
447
40
  pi.on("session_start", async (_event, ctx) => {
448
41
  const entries = ctx.sessionManager.getEntries();
@@ -452,7 +45,7 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
452
45
  entry.type === "custom" &&
453
46
  entry.customType === "harness-debate-state"
454
47
  ) {
455
- state = entry.data as DebateState;
48
+ restoreDebateStateFromEntry(entry.data);
456
49
  break;
457
50
  }
458
51
  }
@@ -461,13 +54,21 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
461
54
  pi.registerCommand("harness-debate-open", {
462
55
  description: "Open a headless debate session",
463
56
  handler: async (args, ctx) => {
57
+ const runId = getRunId(ctx);
464
58
  const trimmed = args.trim();
465
- let debateId = trimmed;
466
- if (!debateId) debateId = `debate-${Date.now()}`;
467
- await openDebate(getRunId(ctx), debateId);
59
+ const { debateId, warning } = normalizePlanDebateId(trimmed, runId);
60
+ await openDebateBus(runId, debateId, hooks);
61
+ if (debateId.startsWith("plan-")) {
62
+ await initPlanMessenger(
63
+ join(process.cwd(), ".pi", "harness", "runs", runId),
64
+ { runId, debateId },
65
+ );
66
+ }
468
67
  pi.sendMessage({
469
68
  customType: "harness-debate-opened",
470
- content: `Debate opened: ${debateId}`,
69
+ content: warning
70
+ ? `Debate opened: ${debateId} (${warning})`
71
+ : `Debate opened: ${debateId}`,
471
72
  display: false,
472
73
  });
473
74
  },
@@ -476,10 +77,12 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
476
77
  pi.registerCommand("harness-debate-round", {
477
78
  description: "Submit a debate round envelope JSON",
478
79
  handler: async (args, ctx) => {
479
- if (!state) {
480
- await openDebate(getRunId(ctx), `debate-${Date.now()}`);
80
+ if (!getDebateState()) {
81
+ const runId = getRunId(ctx);
82
+ const { debateId } = normalizePlanDebateId("", runId);
83
+ await openDebateBus(runId, debateId, hooks);
481
84
  }
482
- const envelope = parseEnvelope(args.trim());
85
+ const envelope = parseRoundEnvelope(args.trim());
483
86
  if (!envelope) {
484
87
  pi.sendMessage({
485
88
  customType: "harness-debate-round-error",
@@ -489,7 +92,7 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
489
92
  });
490
93
  return;
491
94
  }
492
- const result = await acceptRound(envelope);
95
+ const result = await acceptDebateRound(envelope, hooks);
493
96
  if (!result.ok) {
494
97
  pi.sendMessage({
495
98
  customType: "harness-debate-round-rejected",
@@ -503,7 +106,7 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
503
106
  pi.registerCommand("harness-debate-consensus", {
504
107
  description: "Finalize debate and emit consensus packet",
505
108
  handler: async (args) => {
506
- if (!state) {
109
+ if (!getDebateState()) {
507
110
  pi.sendMessage({
508
111
  customType: "harness-debate-consensus-error",
509
112
  content: "No active debate to finalize.",
@@ -511,8 +114,9 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
511
114
  });
512
115
  return;
513
116
  }
514
- const decision = await finalizeConsensus(
117
+ const decision = await finalizeDebateConsensus(
515
118
  args.trim() || "Consensus generated by debate-orchestrator.",
119
+ hooks,
516
120
  );
517
121
  pi.sendMessage({
518
122
  customType: "harness-debate-consensus",