ultimate-pi 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/{.pi → .agents}/skills/ccc/SKILL.md +1 -7
  2. package/.agents/skills/ccc/references/settings.md +126 -0
  3. package/.agents/skills/harness-debate-plan/SKILL.md +61 -21
  4. package/.agents/skills/harness-orchestration/SKILL.md +1 -1
  5. package/.pi/agents/harness/planning/plan-adversary.md +2 -2
  6. package/.pi/agents/harness/planning/plan-evaluator.md +3 -1
  7. package/.pi/agents/harness/planning/review-integrator.md +4 -2
  8. package/.pi/extensions/debate-orchestrator.ts +39 -435
  9. package/.pi/extensions/harness-debate-tools.ts +519 -0
  10. package/.pi/extensions/harness-plan-approval.ts +41 -17
  11. package/.pi/extensions/harness-run-context.ts +18 -0
  12. package/.pi/extensions/lib/debate-bus-core.ts +434 -0
  13. package/.pi/extensions/lib/debate-bus-state.ts +58 -0
  14. package/.pi/extensions/lib/harness-spawn-budget.ts +5 -25
  15. package/.pi/extensions/lib/plan-approval/dialog.ts +33 -272
  16. package/.pi/extensions/lib/plan-approval/format-plan.ts +12 -85
  17. package/.pi/extensions/lib/plan-approval/plan-review.ts +6 -6
  18. package/.pi/extensions/lib/plan-approval/render.ts +6 -0
  19. package/.pi/extensions/lib/plan-approval/validate.ts +1 -1
  20. package/.pi/extensions/lib/plan-debate-envelope.ts +2 -0
  21. package/.pi/extensions/lib/plan-debate-gate.ts +155 -0
  22. package/.pi/extensions/lib/plan-debate-id.ts +39 -0
  23. package/.pi/extensions/lib/plan-debate-lane.ts +220 -0
  24. package/.pi/extensions/lib/plan-debate-round-status.ts +94 -0
  25. package/.pi/extensions/lib/plan-debate-write-guard.ts +20 -0
  26. package/.pi/extensions/lib/plan-messenger.ts +276 -0
  27. package/.pi/extensions/lib/plan-review-integrator-rules.ts +119 -0
  28. package/.pi/extensions/lib/plan-scope-guard.ts +89 -0
  29. package/.pi/harness/agents.manifest.json +7 -7
  30. package/.pi/prompts/harness-plan.md +22 -12
  31. package/CHANGELOG.md +18 -0
  32. package/THIRD_PARTY_NOTICES.md +1 -1
  33. package/package.json +3 -3
  34. package/.agents/skills/ck-search/SKILL.md +0 -23
  35. package/.agents/skills/cocoindex-search/SKILL.md +0 -35
  36. package/.agents/skills/obsidian-bases/SKILL.md +0 -299
  37. package/.agents/skills/obsidian-markdown/SKILL.md +0 -237
  38. package/.pi/extensions/lib/plan-approval/fallback.ts +0 -50
  39. /package/{.pi → .agents}/skills/ccc/references/management.md +0 -0
@@ -0,0 +1,434 @@
1
+ /**
2
+ * Headless pi-debate-bus/v1 transport (shared by commands + harness debate tools).
3
+ */
4
+
5
+ import { appendFile, mkdir, writeFile } from "node:fs/promises";
6
+ import { join } from "node:path";
7
+ import {
8
+ type DebateParticipant,
9
+ debatePhaseFromId,
10
+ isPlanDebateId,
11
+ PLAN_DEBATE_PARTICIPANTS,
12
+ POST_EXECUTE_DEBATE_PARTICIPANTS,
13
+ } from "../../lib/debate-orchestrator-types.js";
14
+ import {
15
+ type DebateState,
16
+ getDebateState,
17
+ getLastSeverity,
18
+ type SeverityScores,
19
+ setDebateState,
20
+ setLastSeverity,
21
+ } from "./debate-bus-state.js";
22
+
23
+ export type PolicyDecision =
24
+ | "pass"
25
+ | "conditional_pass"
26
+ | "block"
27
+ | "human_required";
28
+
29
+ export interface RoundPayload {
30
+ participants: DebateParticipant[];
31
+ claims: string[];
32
+ rebuttals: string[];
33
+ evidence_refs: string[];
34
+ token_usage: {
35
+ per_agent: Record<string, number>;
36
+ round_total: number;
37
+ };
38
+ consensus_delta: number;
39
+ severity_scores?: SeverityScores;
40
+ review_gate_ready?: boolean;
41
+ }
42
+
43
+ export interface BusEnvelope<T = unknown> {
44
+ protocol: "pi-debate-bus/v1";
45
+ kind: "open" | "round" | "consensus" | "budget_exhausted";
46
+ correlation: {
47
+ run_id: string;
48
+ debate_id: string;
49
+ round_index?: number;
50
+ sender: DebateParticipant | "system";
51
+ };
52
+ payload: T;
53
+ }
54
+
55
+ const DEBATES_DIR = join(process.cwd(), ".pi", "harness", "debates");
56
+ const WEIGHTS = {
57
+ claim_quality: 0.2,
58
+ reproducibility: 0.4,
59
+ agreement: 0.4,
60
+ };
61
+ const THRESHOLDS = {
62
+ correctness: 0.7,
63
+ security: 0.7,
64
+ architecture: 0.8,
65
+ test_integrity: 0.8,
66
+ };
67
+ const HARD_STOP_DEBATE_CAPS = process.env.HARNESS_DEBATE_HARD_STOP === "true";
68
+
69
+ const PLAN_BUDGET = {
70
+ max_rounds: 4,
71
+ round_token_cap: 2000,
72
+ debate_global_cap: 12000,
73
+ } as const;
74
+
75
+ const AGGRESSIVE_BUDGET = {
76
+ max_rounds: 6,
77
+ round_token_cap: 2500,
78
+ debate_global_cap: 35000,
79
+ } as const;
80
+
81
+ function nowIso(): string {
82
+ return new Date().toISOString();
83
+ }
84
+
85
+ function toSafeFloat(value: unknown): number {
86
+ const n = Number(value);
87
+ if (Number.isNaN(n) || !Number.isFinite(n)) return 0;
88
+ return Math.max(0, Math.min(1, n));
89
+ }
90
+
91
+ export function capsForDebate(debateId: string): {
92
+ name: "plan" | "aggressive";
93
+ max_rounds: number;
94
+ round_token_cap: number;
95
+ debate_global_cap: number;
96
+ } {
97
+ if (isPlanDebateId(debateId)) {
98
+ return { name: "plan", ...PLAN_BUDGET };
99
+ }
100
+ return { name: "aggressive", ...AGGRESSIVE_BUDGET };
101
+ }
102
+
103
+ function participantAllowed(
104
+ participant: string,
105
+ phase: DebateState["debate_phase"],
106
+ ): boolean {
107
+ if (phase === "plan") {
108
+ return (PLAN_DEBATE_PARTICIPANTS as readonly string[]).includes(
109
+ participant,
110
+ );
111
+ }
112
+ return (POST_EXECUTE_DEBATE_PARTICIPANTS as readonly string[]).includes(
113
+ participant,
114
+ );
115
+ }
116
+
117
+ async function ensureDebatesDir(): Promise<void> {
118
+ await mkdir(DEBATES_DIR, { recursive: true });
119
+ }
120
+
121
+ export async function writeDebateEvent(
122
+ debateId: string,
123
+ event: unknown,
124
+ ): Promise<void> {
125
+ await ensureDebatesDir();
126
+ const path = join(DEBATES_DIR, `${debateId}.jsonl`);
127
+ await appendFile(path, `${JSON.stringify(event)}\n`, "utf-8");
128
+ }
129
+
130
+ function decidePolicy(
131
+ severity: SeverityScores,
132
+ minEvidenceConfidence: number,
133
+ ): PolicyDecision {
134
+ if (
135
+ severity.security >= THRESHOLDS.security ||
136
+ severity.correctness >= THRESHOLDS.correctness ||
137
+ severity.architecture >= THRESHOLDS.architecture ||
138
+ severity.test_integrity >= THRESHOLDS.test_integrity
139
+ ) {
140
+ return "block";
141
+ }
142
+ if (minEvidenceConfidence < 0.55) return "human_required";
143
+ if (minEvidenceConfidence < 0.75) return "conditional_pass";
144
+ return "pass";
145
+ }
146
+
147
+ export function parseRoundEnvelope(
148
+ raw: string,
149
+ ): BusEnvelope<RoundPayload> | null {
150
+ try {
151
+ const parsed = JSON.parse(raw) as BusEnvelope<RoundPayload>;
152
+ if (parsed?.protocol !== "pi-debate-bus/v1") return null;
153
+ if (parsed?.kind !== "round") return null;
154
+ return parsed;
155
+ } catch {
156
+ return null;
157
+ }
158
+ }
159
+
160
+ export interface DebateBusHooks {
161
+ appendEntry: (customType: string, data: unknown) => void;
162
+ }
163
+
164
+ export async function openDebateBus(
165
+ runId: string,
166
+ debateId: string,
167
+ hooks: DebateBusHooks,
168
+ ): Promise<DebateState> {
169
+ const caps = capsForDebate(debateId);
170
+ const debate_phase = debatePhaseFromId(debateId);
171
+ const next: DebateState = {
172
+ run_id: runId,
173
+ debate_id: debateId,
174
+ debate_phase,
175
+ round_count: 0,
176
+ budget_used: 0,
177
+ max_rounds: caps.max_rounds,
178
+ round_token_cap: caps.round_token_cap,
179
+ debate_global_cap: caps.debate_global_cap,
180
+ last_review_gate_ready: false,
181
+ };
182
+ setDebateState(next);
183
+ setLastSeverity({
184
+ correctness: 0,
185
+ security: 0,
186
+ architecture: 0,
187
+ test_integrity: 0,
188
+ });
189
+ hooks.appendEntry("harness-debate-state", next);
190
+ const envelope: BusEnvelope = {
191
+ protocol: "pi-debate-bus/v1",
192
+ kind: "open",
193
+ correlation: {
194
+ run_id: runId,
195
+ debate_id: debateId,
196
+ sender: "system",
197
+ },
198
+ payload: {
199
+ opened_at: nowIso(),
200
+ debate_phase,
201
+ budget_profile: caps.name,
202
+ },
203
+ };
204
+ hooks.appendEntry("harness-debate-envelope", envelope);
205
+ await writeDebateEvent(debateId, envelope);
206
+ return next;
207
+ }
208
+
209
+ async function emitBudgetExhausted(
210
+ state: DebateState,
211
+ reason: string,
212
+ hooks: DebateBusHooks,
213
+ ): Promise<void> {
214
+ const envelope: BusEnvelope = {
215
+ protocol: "pi-debate-bus/v1",
216
+ kind: "budget_exhausted",
217
+ correlation: {
218
+ run_id: state.run_id,
219
+ debate_id: state.debate_id,
220
+ round_index: state.round_count,
221
+ sender: "system",
222
+ },
223
+ payload: {
224
+ schema_version: "1.0.0",
225
+ contract_version: "1.0.0",
226
+ event_type: "budget_exhausted",
227
+ run_id: state.run_id,
228
+ debate_id: state.debate_id,
229
+ round_count: state.round_count,
230
+ budget_used: state.budget_used,
231
+ exhaustion_reason: reason,
232
+ caps: {
233
+ max_rounds: state.max_rounds,
234
+ round_token_cap: state.round_token_cap,
235
+ debate_global_cap: state.debate_global_cap,
236
+ },
237
+ minimum_evidence_confidence: 0.6,
238
+ default_policy_outcome: "block",
239
+ human_override_allowed: true,
240
+ },
241
+ };
242
+ hooks.appendEntry("harness-debate-envelope", envelope);
243
+ hooks.appendEntry("harness-budget-exhausted", envelope.payload);
244
+ await writeDebateEvent(state.debate_id, envelope);
245
+ }
246
+
247
+ export async function acceptDebateRound(
248
+ envelope: BusEnvelope<RoundPayload>,
249
+ hooks: DebateBusHooks,
250
+ ): Promise<{ ok: boolean; reason?: string; state?: DebateState }> {
251
+ const state = getDebateState();
252
+ if (!state) return { ok: false, reason: "no active debate" };
253
+ if (state.debate_id !== envelope.correlation.debate_id) {
254
+ return { ok: false, reason: "debate id mismatch" };
255
+ }
256
+
257
+ for (const p of envelope.payload.participants ?? []) {
258
+ if (!participantAllowed(p, state.debate_phase)) {
259
+ return {
260
+ ok: false,
261
+ reason: `participant ${p} invalid for debate_phase=${state.debate_phase}`,
262
+ };
263
+ }
264
+ }
265
+
266
+ const nextRound = state.round_count + 1;
267
+ if (nextRound > state.max_rounds) {
268
+ await emitBudgetExhausted(state, "max_rounds_reached", hooks);
269
+ if (HARD_STOP_DEBATE_CAPS) {
270
+ return { ok: false, reason: "max rounds reached" };
271
+ }
272
+ }
273
+
274
+ const perAgent = envelope.payload.token_usage?.per_agent ?? {};
275
+ for (const [agent, tokens] of Object.entries(perAgent)) {
276
+ if (Number(tokens) > state.round_token_cap) {
277
+ await emitBudgetExhausted(state, "round_token_cap_exceeded", hooks);
278
+ if (HARD_STOP_DEBATE_CAPS) {
279
+ return { ok: false, reason: `round cap exceeded by ${agent}` };
280
+ }
281
+ }
282
+ }
283
+
284
+ const roundTotal = Number(envelope.payload.token_usage?.round_total ?? 0);
285
+ if (state.budget_used + roundTotal > state.debate_global_cap) {
286
+ await emitBudgetExhausted(state, "debate_global_cap_exceeded", hooks);
287
+ if (HARD_STOP_DEBATE_CAPS) {
288
+ return { ok: false, reason: "global cap exceeded" };
289
+ }
290
+ }
291
+
292
+ state.round_count = nextRound;
293
+ state.budget_used += roundTotal;
294
+ if (envelope.payload.review_gate_ready === true) {
295
+ state.last_review_gate_ready = true;
296
+ }
297
+ if (envelope.payload.review_gate_ready === false) {
298
+ state.last_review_gate_ready = false;
299
+ }
300
+ setDebateState(state);
301
+ hooks.appendEntry("harness-debate-state", state);
302
+
303
+ if (envelope.payload.severity_scores) {
304
+ setLastSeverity({
305
+ correctness: toSafeFloat(envelope.payload.severity_scores.correctness),
306
+ security: toSafeFloat(envelope.payload.severity_scores.security),
307
+ architecture: toSafeFloat(envelope.payload.severity_scores.architecture),
308
+ test_integrity: toSafeFloat(
309
+ envelope.payload.severity_scores.test_integrity,
310
+ ),
311
+ });
312
+ }
313
+
314
+ const profileName =
315
+ state.debate_phase === "plan" ? ("plan" as const) : ("aggressive" as const);
316
+
317
+ const roundRecord = {
318
+ schema_version: "1.0.0",
319
+ contract_version: "1.0.0",
320
+ run_id: state.run_id,
321
+ debate_id: state.debate_id,
322
+ round_index: state.round_count,
323
+ participants: envelope.payload.participants,
324
+ claims: envelope.payload.claims,
325
+ rebuttals: envelope.payload.rebuttals,
326
+ evidence_refs: envelope.payload.evidence_refs,
327
+ token_usage: envelope.payload.token_usage,
328
+ budget_profile: {
329
+ name: profileName,
330
+ max_rounds: state.max_rounds,
331
+ round_token_cap: state.round_token_cap,
332
+ debate_global_cap: state.debate_global_cap,
333
+ },
334
+ consensus_delta: Number(envelope.payload.consensus_delta ?? 0),
335
+ review_gate_ready: envelope.payload.review_gate_ready,
336
+ };
337
+ hooks.appendEntry("harness-round-result", roundRecord);
338
+ hooks.appendEntry("harness-debate-envelope", envelope);
339
+ await writeDebateEvent(state.debate_id, envelope);
340
+ return { ok: true, state };
341
+ }
342
+
343
+ export async function finalizeDebateConsensus(
344
+ rationale: string,
345
+ hooks: DebateBusHooks,
346
+ ): Promise<PolicyDecision | null> {
347
+ const state = getDebateState();
348
+ if (!state) return null;
349
+ const lastSeverity = getLastSeverity();
350
+ const evidenceScore = Math.max(
351
+ 0,
352
+ Math.min(
353
+ 1,
354
+ lastSeverity.correctness * WEIGHTS.claim_quality +
355
+ (1 - Math.max(lastSeverity.security, lastSeverity.test_integrity)) *
356
+ WEIGHTS.reproducibility +
357
+ Math.max(
358
+ 0,
359
+ 1 - Math.abs(lastSeverity.architecture - lastSeverity.correctness),
360
+ ) *
361
+ WEIGHTS.agreement,
362
+ ),
363
+ );
364
+ const decision = decidePolicy(lastSeverity, evidenceScore);
365
+ const planPhase = state.debate_phase === "plan";
366
+ const evaluatorPassed = planPhase
367
+ ? Boolean(state.last_review_gate_ready)
368
+ : true;
369
+ const debateComplete = planPhase
370
+ ? state.round_count >= state.max_rounds
371
+ : state.round_count > 0;
372
+
373
+ const consensus = {
374
+ schema_version: "1.0.0",
375
+ contract_version: "1.0.0",
376
+ run_id: state.run_id,
377
+ debate_id: state.debate_id,
378
+ debate_phase: state.debate_phase,
379
+ round_count: state.round_count,
380
+ budget_used: state.budget_used,
381
+ severity_scores: lastSeverity,
382
+ severity_thresholds: {
383
+ correctness_block_at: THRESHOLDS.correctness,
384
+ security_block_at: THRESHOLDS.security,
385
+ architecture_block_at: THRESHOLDS.architecture,
386
+ test_integrity_block_at: THRESHOLDS.test_integrity,
387
+ },
388
+ confidence_weights: WEIGHTS,
389
+ evidence_refs: [],
390
+ strict_gate_prerequisites: planPhase
391
+ ? {
392
+ plan_gate_passed: false,
393
+ execution_completed: false,
394
+ evaluator_passed: evaluatorPassed,
395
+ adversarial_debate_completed: debateComplete,
396
+ severity_policy_ok: decision !== "block",
397
+ benchmark_delta_checks_passed: false,
398
+ rollback_artifacts_generated: false,
399
+ }
400
+ : {
401
+ plan_gate_passed: true,
402
+ execution_completed: true,
403
+ evaluator_passed: true,
404
+ adversarial_debate_completed: debateComplete,
405
+ severity_policy_ok: decision !== "block",
406
+ benchmark_delta_checks_passed: false,
407
+ rollback_artifacts_generated: false,
408
+ },
409
+ policy_decision: decision,
410
+ rationale,
411
+ };
412
+
413
+ const envelope: BusEnvelope = {
414
+ protocol: "pi-debate-bus/v1",
415
+ kind: "consensus",
416
+ correlation: {
417
+ run_id: state.run_id,
418
+ debate_id: state.debate_id,
419
+ round_index: state.round_count,
420
+ sender: "system",
421
+ },
422
+ payload: consensus,
423
+ };
424
+
425
+ await writeFile(
426
+ join(DEBATES_DIR, `${state.debate_id}.consensus.json`),
427
+ `${JSON.stringify(consensus, null, 2)}\n`,
428
+ "utf-8",
429
+ );
430
+ hooks.appendEntry("harness-consensus-packet", consensus);
431
+ hooks.appendEntry("harness-debate-envelope", envelope);
432
+ await writeDebateEvent(state.debate_id, envelope);
433
+ return decision;
434
+ }
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Shared in-process debate bus state (one session per Pi process).
3
+ */
4
+
5
+ import type { DebateParticipant } from "../../lib/debate-orchestrator-types.js";
6
+
7
+ export type DebatePhase = "plan" | "post_execute";
8
+
9
+ export interface DebateState {
10
+ run_id: string;
11
+ debate_id: string;
12
+ debate_phase: DebatePhase;
13
+ round_count: number;
14
+ budget_used: number;
15
+ max_rounds: number;
16
+ round_token_cap: number;
17
+ debate_global_cap: number;
18
+ last_review_gate_ready?: boolean;
19
+ }
20
+
21
+ export interface SeverityScores {
22
+ correctness: number;
23
+ security: number;
24
+ architecture: number;
25
+ test_integrity: number;
26
+ }
27
+
28
+ let state: DebateState | null = null;
29
+ let lastSeverity: SeverityScores = {
30
+ correctness: 0,
31
+ security: 0,
32
+ architecture: 0,
33
+ test_integrity: 0,
34
+ };
35
+
36
+ export function getDebateState(): DebateState | null {
37
+ return state;
38
+ }
39
+
40
+ export function setDebateState(next: DebateState | null): void {
41
+ state = next;
42
+ }
43
+
44
+ export function getLastSeverity(): SeverityScores {
45
+ return lastSeverity;
46
+ }
47
+
48
+ export function setLastSeverity(next: SeverityScores): void {
49
+ lastSeverity = next;
50
+ }
51
+
52
+ export function restoreDebateStateFromEntry(data: unknown): void {
53
+ if (data && typeof data === "object") {
54
+ state = data as DebateState;
55
+ }
56
+ }
57
+
58
+ export type { DebateParticipant };
@@ -1,10 +1,8 @@
1
1
  /**
2
- * Harness subagent spawn caps (subprocess model).
2
+ * Harness subagent spawn accounting (subprocess model).
3
+ * No session caps — parallel batches are limited only by host resources.
3
4
  */
4
5
 
5
- export const HARNESS_MAX_ACTIVE_SUBAGENTS = 8;
6
- export const HARNESS_MAX_SUBAGENT_SPAWNS_PER_SESSION = 12;
7
-
8
6
  export function isHarnessAgentType(type: string): boolean {
9
7
  return type.startsWith("harness/");
10
8
  }
@@ -33,29 +31,11 @@ export function countHarnessAgentsInRequest(params: {
33
31
  return { harnessCount: harness.length, agents: harness };
34
32
  }
35
33
 
34
+ /** Always allows spawn; state is tracked for telemetry only. */
36
35
  export function checkHarnessSpawnBudget(
37
- state: SpawnBudgetState,
38
- incomingHarnessTasks: number,
36
+ _state: SpawnBudgetState,
37
+ _incomingHarnessTasks: number,
39
38
  ): { ok: boolean; message?: string } {
40
- if (state.active + incomingHarnessTasks > HARNESS_MAX_ACTIVE_SUBAGENTS) {
41
- return {
42
- ok: false,
43
- message:
44
- `Harness subagent limit reached (${state.active} active + ${incomingHarnessTasks} requested > ${HARNESS_MAX_ACTIVE_SUBAGENTS}). ` +
45
- `Wait for in-flight subagent calls to finish before spawning more.`,
46
- };
47
- }
48
- if (
49
- state.totalHarnessSpawns + incomingHarnessTasks >
50
- HARNESS_MAX_SUBAGENT_SPAWNS_PER_SESSION
51
- ) {
52
- return {
53
- ok: false,
54
- message:
55
- `Harness subagent spawn cap reached (${state.totalHarnessSpawns + incomingHarnessTasks}/${HARNESS_MAX_SUBAGENT_SPAWNS_PER_SESSION} this session). ` +
56
- `Finish the current harness phase or start a new session.`,
57
- };
58
- }
59
39
  return { ok: true };
60
40
  }
61
41