ultimate-pi 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/.agents/skills/harness-decisions/SKILL.md +20 -1
  2. package/.agents/skills/harness-eval/SKILL.md +11 -13
  3. package/.agents/skills/harness-orchestration/SKILL.md +36 -30
  4. package/.agents/skills/harness-plan/SKILL.md +13 -18
  5. package/.pi/PACKAGING.md +1 -1
  6. package/.pi/agents/harness/adversary.md +20 -12
  7. package/.pi/agents/harness/evaluator.md +25 -14
  8. package/.pi/agents/harness/executor.md +27 -16
  9. package/.pi/agents/harness/incident-recorder.md +37 -0
  10. package/.pi/agents/harness/meta-optimizer.md +18 -15
  11. package/.pi/agents/harness/planner.md +26 -30
  12. package/.pi/agents/harness/tie-breaker.md +4 -2
  13. package/.pi/agents/harness/trace-librarian.md +18 -11
  14. package/.pi/agents/pi-pi/ext-expert.md +1 -1
  15. package/.pi/agents/pi-pi/keybinding-expert.md +1 -1
  16. package/.pi/agents/pi-pi/tui-expert.md +3 -3
  17. package/.pi/extensions/00-ultimate-pi-system-prompt.ts +2 -2
  18. package/.pi/extensions/budget-guard.ts +47 -18
  19. package/.pi/extensions/custom-footer.ts +8 -3
  20. package/.pi/extensions/custom-header.ts +2 -2
  21. package/.pi/extensions/debate-orchestrator.ts +1 -1
  22. package/.pi/extensions/dotenv-loader.ts +1 -1
  23. package/.pi/extensions/drift-monitor.ts +1 -1
  24. package/.pi/extensions/harness-ask-user.ts +1 -1
  25. package/.pi/extensions/harness-live-widget.ts +1 -1
  26. package/.pi/extensions/harness-run-context.ts +197 -33
  27. package/.pi/extensions/harness-telemetry.ts +1 -1
  28. package/.pi/extensions/harness-web-guard.ts +1 -1
  29. package/.pi/extensions/harness-web-tools.ts +1 -1
  30. package/.pi/extensions/lib/ask-user/dialog.ts +2 -2
  31. package/.pi/extensions/lib/ask-user/fallback.ts +1 -1
  32. package/.pi/extensions/lib/ask-user/render.ts +3 -3
  33. package/.pi/extensions/lib/harness-subagents/agent-loader.ts +1 -1
  34. package/.pi/extensions/lib/harness-subagents/agent-parser.ts +1 -1
  35. package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +1 -1
  36. package/.pi/extensions/lib/harness-subagents/harness-subagent-policy.ts +134 -0
  37. package/.pi/extensions/lib/harness-subagents/parent-ask-user-bridge.ts +89 -0
  38. package/.pi/extensions/lib/harness-subagents/spawn-policy.ts +20 -2
  39. package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +3 -2
  40. package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +44 -24
  41. package/.pi/extensions/lib/harness-subagents/vendored/context.ts +1 -1
  42. package/.pi/extensions/lib/harness-subagents/vendored/env.ts +1 -1
  43. package/.pi/extensions/lib/harness-subagents/vendored/index.ts +23 -2
  44. package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +1 -1
  45. package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +1 -1
  46. package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +1 -1
  47. package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +1 -1
  48. package/.pi/extensions/lib/harness-subagents/vendored/types.ts +2 -2
  49. package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +1 -1
  50. package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +2 -2
  51. package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +1 -1
  52. package/.pi/extensions/observation-bus.ts +1 -1
  53. package/.pi/extensions/pi-model-router-harness.ts +1 -1
  54. package/.pi/extensions/policy-gate.ts +90 -20
  55. package/.pi/extensions/provider-payload-sanitize.ts +1 -1
  56. package/.pi/extensions/review-integrity.ts +76 -22
  57. package/.pi/extensions/sentrux-rules-sync.ts +1 -1
  58. package/.pi/extensions/soundboard.ts +1 -1
  59. package/.pi/extensions/test-diff-integrity.ts +1 -1
  60. package/.pi/extensions/trace-recorder.ts +1 -1
  61. package/.pi/extensions/ultimate-pi-vcc.ts +1 -1
  62. package/.pi/harness/agents.manifest.json +82 -78
  63. package/.pi/harness/docs/adrs/0031-harness-run-context.md +6 -3
  64. package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +37 -0
  65. package/.pi/harness/docs/adrs/README.md +1 -0
  66. package/.pi/harness/specs/budget-exhausted-event.schema.json +3 -1
  67. package/.pi/harness/specs/harness-spawn-context.schema.json +65 -0
  68. package/.pi/harness/specs/harness-turn.schema.json +18 -0
  69. package/.pi/lib/harness-agent-output.ts +41 -0
  70. package/.pi/lib/harness-run-context.ts +516 -37
  71. package/.pi/lib/harness-ui-state.ts +1 -1
  72. package/.pi/prompts/harness-auto.md +36 -61
  73. package/.pi/prompts/harness-critic.md +15 -28
  74. package/.pi/prompts/harness-eval.md +19 -27
  75. package/.pi/prompts/harness-incident.md +15 -34
  76. package/.pi/prompts/harness-plan.md +28 -49
  77. package/.pi/prompts/harness-review.md +16 -30
  78. package/.pi/prompts/harness-router-tune.md +16 -38
  79. package/.pi/prompts/harness-run.md +21 -38
  80. package/.pi/prompts/harness-setup.md +2 -0
  81. package/.pi/prompts/harness-trace.md +13 -30
  82. package/.pi/scripts/harness-generate-model-router.mjs +16 -13
  83. package/.pi/scripts/harness-verify.mjs +17 -0
  84. package/.pi/scripts/vendor-sync-pi-model-router.sh +10 -10
  85. package/CHANGELOG.md +25 -1
  86. package/README.md +4 -5
  87. package/THIRD_PARTY_NOTICES.md +1 -1
  88. package/package.json +13 -8
  89. package/vendor/pi-model-router/UPSTREAM_PIN.md +1 -1
  90. package/vendor/pi-model-router/extensions/commands.ts +2 -2
  91. package/vendor/pi-model-router/extensions/config.ts +2 -2
  92. package/vendor/pi-model-router/extensions/index.ts +1 -1
  93. package/vendor/pi-model-router/extensions/provider.ts +2 -2
  94. package/vendor/pi-model-router/extensions/routing.ts +2 -2
  95. package/vendor/pi-model-router/extensions/types.ts +1 -1
  96. package/vendor/pi-model-router/extensions/ui.ts +1 -1
  97. package/vendor/pi-model-router/package.json +4 -4
  98. package/vendor/pi-vcc/index.ts +1 -1
  99. package/vendor/pi-vcc/package.json +1 -1
  100. package/vendor/pi-vcc/src/commands/pi-vcc.ts +1 -1
  101. package/vendor/pi-vcc/src/commands/vcc-recall.ts +1 -1
  102. package/vendor/pi-vcc/src/core/content.ts +1 -1
  103. package/vendor/pi-vcc/src/core/load-messages.ts +1 -1
  104. package/vendor/pi-vcc/src/core/normalize.ts +1 -1
  105. package/vendor/pi-vcc/src/core/render-entries.ts +1 -1
  106. package/vendor/pi-vcc/src/core/report.ts +1 -1
  107. package/vendor/pi-vcc/src/core/search-entries.ts +1 -1
  108. package/vendor/pi-vcc/src/core/summarize.ts +1 -1
  109. package/vendor/pi-vcc/src/hooks/before-compact.ts +2 -2
  110. package/vendor/pi-vcc/src/tools/recall.ts +1 -1
  111. package/vendor/pi-vcc/src/types.ts +1 -1
  112. package/vendor/pi-vcc/tests/fixtures.ts +1 -1
  113. package/vendor/pi-vcc/tests/render-entries.test.ts +1 -1
  114. package/vendor/pi-vcc/tests/search-entries.test.ts +1 -1
  115. package/vendor/pi-vcc/tests/support/load-session.ts +2 -2
@@ -8,17 +8,24 @@
8
8
  * - command surface via pi.registerCommand()
9
9
  */
10
10
 
11
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
11
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
12
12
  import {
13
+ extractWritePathFromToolInput,
13
14
  getLatestRunContext,
14
15
  getPolicyTransitionBlock,
15
16
  hasApprovedPlanSignalFromUserPrompt,
16
17
  hasHarnessAbortSignal,
17
- inferHarnessPhaseFromPrompt,
18
+ inferHarnessPhase,
19
+ isHarnessAutoSession,
18
20
  isHarnessBootstrapPrompt,
21
+ isPlanPhaseAllowedMutation,
22
+ isPlanPhaseScopedWrite,
23
+ normalizeHarnessPath,
24
+ readPlanPacketFromPath,
19
25
  saveProjectActiveRun,
20
26
  saveRunContextToDisk,
21
27
  userVisiblePromptSlice,
28
+ validatePlanPacket,
22
29
  } from "../lib/harness-run-context.js";
23
30
 
24
31
  type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
@@ -70,8 +77,8 @@ function nowIso(): string {
70
77
 
71
78
  function defaultState(): PolicyState {
72
79
  return {
73
- phase: "execute",
74
- approvedPlan: true,
80
+ phase: "plan",
81
+ approvedPlan: false,
75
82
  planId: null,
76
83
  budgetBypass: false,
77
84
  aborted: false,
@@ -134,6 +141,11 @@ function getLatestPolicyStateFull(ctx: {
134
141
  export default function policyGate(pi: ExtensionAPI) {
135
142
  let state = defaultState();
136
143
 
144
+ const appendPolicyState = (next: PolicyState): void => {
145
+ state = next;
146
+ pi.appendEntry("harness-policy-state", state);
147
+ };
148
+
137
149
  pi.on("session_start", async (_event, ctx) => {
138
150
  state = getLatestPolicyStateFull(ctx);
139
151
  });
@@ -141,6 +153,7 @@ export default function policyGate(pi: ExtensionAPI) {
141
153
  pi.on("before_agent_start", async (event, ctx) => {
142
154
  const userPrompt = userVisiblePromptSlice(event.prompt);
143
155
  const entries = ctx.sessionManager.getEntries();
156
+ state = getLatestPolicyStateFull(ctx);
144
157
  const bootstrapPrompt = isHarnessBootstrapPrompt(userPrompt);
145
158
  const abortSignal = hasHarnessAbortSignal(userPrompt);
146
159
 
@@ -185,7 +198,7 @@ export default function policyGate(pi: ExtensionAPI) {
185
198
  };
186
199
  }
187
200
 
188
- const nextPhase = inferHarnessPhaseFromPrompt(userPrompt);
201
+ const nextPhase = inferHarnessPhase(entries, userPrompt);
189
202
  const planSignal = hasApprovedPlanSignal(userPrompt, entries);
190
203
 
191
204
  const transitionBlock = getPolicyTransitionBlock(userPrompt, entries);
@@ -228,26 +241,41 @@ export default function policyGate(pi: ExtensionAPI) {
228
241
  state.updatedAt = nowIso();
229
242
  pi.appendEntry("harness-policy-state", state);
230
243
 
244
+ const planPhaseHint =
245
+ state.phase === "plan"
246
+ ? "\nPlan phase: present the full PlanPacket in chat, call ask_user (Approve / Request changes / Cancel), then write only the canonical plan-packet.json after Approve."
247
+ : "";
248
+
231
249
  return {
232
- systemPrompt: `${event.systemPrompt}\n\n[PolicyGate]\nPhase=${state.phase}; ApprovedPlan=${state.approvedPlan}; PlanId=${state.planId ?? "none"}; Aborted=${state.aborted}.`,
250
+ systemPrompt: `${event.systemPrompt}\n\n[PolicyGate]\nPhase=${state.phase}; ApprovedPlan=${state.approvedPlan}; PlanId=${state.planId ?? "none"}; Aborted=${state.aborted}.${planPhaseHint}`,
233
251
  };
234
252
  });
235
253
 
236
- pi.on("tool_call", async (event) => {
237
- if (state.aborted && MUTATING_TOOLS.has(event.toolName)) {
238
- return {
239
- block: true,
240
- reason:
241
- "policy-gate: mutating tool blocked because harness-abort lock is active. Attach a new approved plan first.",
242
- };
243
- }
254
+ pi.on("tool_call", async (event, ctx) => {
255
+ state = getLatestPolicyStateFull(ctx);
256
+ const entries = ctx.sessionManager.getEntries();
257
+ const projectRoot = process.cwd();
258
+ const sessionId = ctx.sessionManager.getSessionId();
259
+ const runCtx = getLatestRunContext(entries);
260
+
244
261
  if (MUTATING_TOOLS.has(event.toolName)) {
245
- if (state.phase !== "execute") {
246
- return {
247
- block: true,
248
- reason: `policy-gate: ${event.toolName} blocked in phase '${state.phase}'. Allowed only in execute phase.`,
249
- };
262
+ const decision = await isPlanPhaseAllowedMutation(
263
+ event.toolName,
264
+ event.input as Record<string, unknown>,
265
+ state.phase,
266
+ runCtx,
267
+ projectRoot,
268
+ {
269
+ aborted: state.aborted,
270
+ entries,
271
+ ownerSessionId: runCtx?.owner_pi_session_id,
272
+ currentSessionId: sessionId,
273
+ },
274
+ );
275
+ if (!decision.allowed) {
276
+ return { block: true, reason: decision.reason };
250
277
  }
278
+ return undefined;
251
279
  }
252
280
 
253
281
  if (event.toolName === "bash") {
@@ -260,7 +288,7 @@ export default function policyGate(pi: ExtensionAPI) {
260
288
  "policy-gate: mutating bash command blocked because harness-abort lock is active. Attach a new approved plan first.",
261
289
  };
262
290
  }
263
- if (state.phase !== "execute") {
291
+ if (state.phase !== "execute" && state.phase !== "merge") {
264
292
  return {
265
293
  block: true,
266
294
  reason: `policy-gate: mutating bash command blocked in phase '${state.phase}'.`,
@@ -271,6 +299,48 @@ export default function policyGate(pi: ExtensionAPI) {
271
299
  return undefined;
272
300
  });
273
301
 
302
+ pi.on("tool_result", async (event, ctx) => {
303
+ if (event.isError) return;
304
+ if (event.toolName !== "write" && event.toolName !== "edit") return;
305
+
306
+ const entries = ctx.sessionManager.getEntries();
307
+ state = getLatestPolicyStateFull(ctx);
308
+ const projectRoot = process.cwd();
309
+ const runCtx = getLatestRunContext(entries);
310
+ if (!runCtx) return;
311
+
312
+ const target = extractWritePathFromToolInput(
313
+ event.input as Record<string, unknown>,
314
+ );
315
+ if (!target) return;
316
+ const scoped = await isPlanPhaseScopedWrite(target, runCtx, projectRoot);
317
+ if (!scoped) return;
318
+
319
+ const planPath = normalizeHarnessPath(target, projectRoot);
320
+ const packet = await readPlanPacketFromPath(planPath);
321
+ const validation = validatePlanPacket(packet);
322
+ if (!validation.valid || !packet?.plan_id) return;
323
+
324
+ if (isHarnessAutoSession(entries)) {
325
+ state.phase = "execute";
326
+ state.approvedPlan = true;
327
+ state.planId = packet.plan_id;
328
+ state.aborted = false;
329
+ state.abortReason = null;
330
+ state.abortedAt = null;
331
+ state.updatedAt = nowIso();
332
+ appendPolicyState(state);
333
+
334
+ runCtx.plan_ready = true;
335
+ runCtx.plan_id = packet.plan_id;
336
+ runCtx.phase = "execute";
337
+ runCtx.updated_at = nowIso();
338
+ pi.appendEntry("harness-run-context", runCtx);
339
+ void saveRunContextToDisk(runCtx);
340
+ void saveProjectActiveRun(runCtx);
341
+ }
342
+ });
343
+
274
344
  pi.registerCommand("harness-abort", {
275
345
  description: "Safely abort current harness run and reset to plan phase",
276
346
  handler: async (args, ctx) => {
@@ -9,7 +9,7 @@
9
9
  import type {
10
10
  BeforeProviderRequestEvent,
11
11
  ExtensionAPI,
12
- } from "@mariozechner/pi-coding-agent";
12
+ } from "@earendil-works/pi-coding-agent";
13
13
 
14
14
  const CHAT_MESSAGE_EXTRA_KEYS = [
15
15
  "reasoning",
@@ -1,19 +1,34 @@
1
1
  /**
2
2
  * review-integrity — enforce evaluator/adversary isolation from executor session.
3
3
  *
4
- * If review phases (`evaluate`/`adversary`) run in the same session as execution,
5
- * tool calls are blocked until the review is isolated (fork/switch session).
4
+ * Parent orchestrators spawn review agents in isolated subagent sessions.
5
+ * Direct review tools in the executor session are blocked; Agent/get_subagent_result
6
+ * for harness review agents remain allowed.
6
7
  */
7
8
 
8
9
  import { appendFile, mkdir } from "node:fs/promises";
9
10
  import { join } from "node:path";
10
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
11
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
11
12
 
12
13
  type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
13
14
 
14
15
  const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
15
16
  const INCIDENT_FILE = join(INCIDENTS_DIR, "review-integrity.jsonl");
16
17
 
18
+ const ORCHESTRATION_TOOLS = new Set([
19
+ "Agent",
20
+ "get_subagent_result",
21
+ "steer_subagent",
22
+ ]);
23
+
24
+ const REVIEW_SUBAGENT_TYPES = new Set([
25
+ "harness/evaluator",
26
+ "harness/adversary",
27
+ "harness/tie-breaker",
28
+ ]);
29
+
30
+ const EXECUTOR_SUBAGENT_TYPE = "harness/executor";
31
+
17
32
  interface IsolationState {
18
33
  executorSessionId: string | null;
19
34
  violationActive: boolean;
@@ -89,6 +104,17 @@ function restoreState(ctx: {
89
104
  };
90
105
  }
91
106
 
107
+ function subagentTypeFromInput(
108
+ input: Record<string, unknown> | undefined,
109
+ ): string {
110
+ if (!input) return "";
111
+ const direct = input.subagent_type;
112
+ if (typeof direct === "string") return direct;
113
+ const nested = input as { subagentType?: string };
114
+ if (typeof nested.subagentType === "string") return nested.subagentType;
115
+ return "";
116
+ }
117
+
92
118
  async function appendIncident(payload: Record<string, unknown>): Promise<void> {
93
119
  await mkdir(INCIDENTS_DIR, { recursive: true });
94
120
  await appendFile(
@@ -105,6 +131,10 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
105
131
  updatedAt: nowIso(),
106
132
  };
107
133
 
134
+ const persist = (): void => {
135
+ pi.appendEntry("harness-review-integrity", state);
136
+ };
137
+
108
138
  pi.on("session_start", async (_event, ctx) => {
109
139
  state = restoreState(ctx);
110
140
  });
@@ -115,7 +145,7 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
115
145
  state.executorSessionId = ctx.sessionManager.getSessionId();
116
146
  state.violationActive = false;
117
147
  state.updatedAt = nowIso();
118
- pi.appendEntry("harness-review-integrity", state);
148
+ persist();
119
149
  });
120
150
 
121
151
  pi.on("before_agent_start", async (_event, ctx) => {
@@ -125,7 +155,7 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
125
155
  if (!inReview) {
126
156
  state.violationActive = false;
127
157
  state.updatedAt = nowIso();
128
- pi.appendEntry("harness-review-integrity", state);
158
+ persist();
129
159
  return undefined;
130
160
  }
131
161
 
@@ -135,42 +165,66 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
135
165
  ) {
136
166
  state.violationActive = false;
137
167
  state.updatedAt = nowIso();
138
- pi.appendEntry("harness-review-integrity", state);
168
+ persist();
139
169
  return undefined;
140
170
  }
141
171
 
142
172
  state.violationActive = true;
143
173
  state.updatedAt = nowIso();
144
- pi.appendEntry("harness-review-integrity", state);
145
-
146
- await appendIncident({
147
- type: "review_integrity_violation",
148
- session_id: currentSessionId,
149
- phase,
150
- reason:
151
- "evaluator/adversary session is not isolated from executor session",
152
- mitigation:
153
- "fork or switch to a clean review session before running review tools",
154
- });
174
+ persist();
155
175
 
156
176
  return {
157
177
  message: {
158
- customType: "harness-review-integrity-block",
178
+ customType: "harness-review-integrity-hint",
159
179
  display: true,
160
180
  content: [
161
- "Review integrity violation: evaluator/adversary is sharing executor session context.",
162
- "Fork/switch session, then rerun review to maintain independent evaluation guarantees.",
181
+ "Review phase in executor session: spawn harness/evaluator or harness/adversary via Agent (isolated subagent context).",
182
+ "Do not run review checks directly in this session — use get_subagent_result after spawn.",
163
183
  ].join("\n"),
164
184
  },
165
185
  };
166
186
  });
167
187
 
168
- pi.on("tool_call", async (_event) => {
188
+ pi.on("tool_call", async (event, ctx) => {
189
+ if (event.toolName === "Agent") {
190
+ const subagentType = subagentTypeFromInput(
191
+ event.input as Record<string, unknown> | undefined,
192
+ );
193
+ if (subagentType === EXECUTOR_SUBAGENT_TYPE) {
194
+ state.executorSessionId = ctx.sessionManager.getSessionId();
195
+ state.violationActive = false;
196
+ state.updatedAt = nowIso();
197
+ persist();
198
+ return undefined;
199
+ }
200
+ if (REVIEW_SUBAGENT_TYPES.has(subagentType)) {
201
+ state.violationActive = false;
202
+ state.updatedAt = nowIso();
203
+ persist();
204
+ return undefined;
205
+ }
206
+ }
207
+
169
208
  if (!state.violationActive) return undefined;
209
+
210
+ if (ORCHESTRATION_TOOLS.has(event.toolName)) {
211
+ return undefined;
212
+ }
213
+
214
+ await appendIncident({
215
+ type: "review_integrity_violation",
216
+ session_id: ctx.sessionManager.getSessionId(),
217
+ tool: event.toolName,
218
+ reason:
219
+ "direct tool use in review phase while sharing executor session context",
220
+ mitigation:
221
+ "spawn harness/evaluator or harness/adversary via Agent instead",
222
+ });
223
+
170
224
  return {
171
225
  block: true,
172
226
  reason:
173
- "review-integrity: tool call blocked because review session is not isolated from executor context.",
227
+ "review-integrity: tool blocked in review phase spawn an isolated review subagent via Agent.",
174
228
  };
175
229
  });
176
230
 
@@ -3,7 +3,7 @@
3
3
  */
4
4
 
5
5
  import { spawn } from "node:child_process";
6
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
6
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
7
7
  import { resolveHarnessScript } from "./lib/harness-paths.js";
8
8
 
9
9
  function resolveSyncScript(): string {
@@ -31,7 +31,7 @@ import {
31
31
  import type {
32
32
  ExtensionAPI,
33
33
  ExtensionCommandContext,
34
- } from "@mariozechner/pi-coding-agent";
34
+ } from "@earendil-works/pi-coding-agent";
35
35
 
36
36
  // ── Constants ──────────────────────────────────────────────────────
37
37
 
@@ -12,7 +12,7 @@
12
12
 
13
13
  import { appendFile, mkdir } from "node:fs/promises";
14
14
  import { join } from "node:path";
15
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
15
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
16
16
 
17
17
  const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
18
18
  const INCIDENT_FILE = join(INCIDENTS_DIR, "test-diff-integrity.jsonl");
@@ -9,7 +9,7 @@
9
9
 
10
10
  import { appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
11
11
  import { join } from "node:path";
12
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
12
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
13
13
  import {
14
14
  getLatestRunContext,
15
15
  getRunIdFromSession,
@@ -9,7 +9,7 @@
9
9
  * - `HARNESS_VCC_DEBUG` — set `true` to write `/tmp/pi-vcc-debug.json` on compaction
10
10
  */
11
11
 
12
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
12
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
13
13
  import registerVcc from "../../vendor/pi-vcc/index.js";
14
14
 
15
15
  export default function ultimatePiVcc(pi: ExtensionAPI): void {
@@ -1,80 +1,84 @@
1
1
  {
2
- "schema_version": "1.0.0",
3
- "package": "ultimate-pi",
4
- "package_version": "0.6.1",
5
- "generated_at": "2026-05-17T06:10:49.269Z",
6
- "agents": {
7
- "pi-pi/agent-expert": {
8
- "path": ".pi/agents/pi-pi/agent-expert.md",
9
- "sha256": "86561eb092b92fa43f221bfc6305de8d5afe10d43c5f577b9bf15a71bda051c6"
10
- },
11
- "pi-pi/cli-expert": {
12
- "path": ".pi/agents/pi-pi/cli-expert.md",
13
- "sha256": "6ecdbc37c60f1da7c68d504187454c95197dd19bd7091d97d136c6fb9e866838"
14
- },
15
- "pi-pi/config-expert": {
16
- "path": ".pi/agents/pi-pi/config-expert.md",
17
- "sha256": "c23b9b70a4c326cca2d139f18de48f8db2e0e28fa5ceb8205600f25430822a40"
18
- },
19
- "pi-pi/ext-expert": {
20
- "path": ".pi/agents/pi-pi/ext-expert.md",
21
- "sha256": "ae71ccf598217c830ac99e3150365a075bde931778d8bc6305d0270ac959a8a9"
22
- },
23
- "pi-pi/keybinding-expert": {
24
- "path": ".pi/agents/pi-pi/keybinding-expert.md",
25
- "sha256": "d8c3d645a507b453d9b338f7a5435232a23e4a00cb7981e3b81a4ce2a0df6bba"
26
- },
27
- "pi-pi/pi-orchestrator": {
28
- "path": ".pi/agents/pi-pi/pi-orchestrator.md",
29
- "sha256": "1323fc262112030320bdd7d7866fd69e1b861377bb30fd3553a5e3a4398f9090"
30
- },
31
- "pi-pi/prompt-expert": {
32
- "path": ".pi/agents/pi-pi/prompt-expert.md",
33
- "sha256": "0d9f5e3e8ab162149cac8f08e159a7b2150a11f53ebe7021e72f8277fa004d0c"
34
- },
35
- "pi-pi/skill-expert": {
36
- "path": ".pi/agents/pi-pi/skill-expert.md",
37
- "sha256": "91732a2479097a2645b6af56171d5cdc1ed67a0896ca54bc4badba250341698f"
38
- },
39
- "pi-pi/theme-expert": {
40
- "path": ".pi/agents/pi-pi/theme-expert.md",
41
- "sha256": "3d256049203356d87c36eac2232c0ad6261fec8af02eb15d3144b18a400ed64d"
42
- },
43
- "pi-pi/tui-expert": {
44
- "path": ".pi/agents/pi-pi/tui-expert.md",
45
- "sha256": "cd6aaf6ca28e6e65a72ef1dc3c96e3315669ec0f859c148f0f15d0ee74a263d7"
46
- },
47
- "harness/adversary": {
48
- "path": ".pi/agents/harness/adversary.md",
49
- "sha256": "17c29621d99dd46cf6ea6566845b502c5e825338b61f073a28cb53f297b2f208"
50
- },
51
- "harness/evaluator": {
52
- "path": ".pi/agents/harness/evaluator.md",
53
- "sha256": "71cfb070ec3bbec9d764052ba03fee8692f2db385ad19f15c2f3a157422c1c2b"
54
- },
55
- "harness/executor": {
56
- "path": ".pi/agents/harness/executor.md",
57
- "sha256": "02cd2f438d97abb9f35ca6cdab51b3733fc6a9076427cf9c2c93fd4593052539"
58
- },
59
- "harness/meta-optimizer": {
60
- "path": ".pi/agents/harness/meta-optimizer.md",
61
- "sha256": "faf81af85d273796f059395287058d2f16fe70903c70b8c6ac9b3c5f12fca69c"
62
- },
63
- "harness/planner": {
64
- "path": ".pi/agents/harness/planner.md",
65
- "sha256": "58d9c983bddddd37070f33742409f16474bf7c54bdccb8808fca4926f4adfdad"
66
- },
67
- "harness/sentrux-bootstrap": {
68
- "path": ".pi/agents/harness/sentrux-bootstrap.md",
69
- "sha256": "3a0b43b94386a7c541b8a806a37524a5e53f1c8049270db7a420680df5799eeb"
70
- },
71
- "harness/tie-breaker": {
72
- "path": ".pi/agents/harness/tie-breaker.md",
73
- "sha256": "c8fd02b3423760d9503e20ea27d3a90aa11326fc38c98f1e7253e67676b15e42"
74
- },
75
- "harness/trace-librarian": {
76
- "path": ".pi/agents/harness/trace-librarian.md",
77
- "sha256": "f44439758317b5fbae66b760bb6a1b6df19787d344bf7038712368e8fe0594ee"
78
- }
79
- }
2
+ "schema_version": "1.0.0",
3
+ "package": "ultimate-pi",
4
+ "package_version": "0.8.0",
5
+ "generated_at": "2026-05-17T09:55:45.240Z",
6
+ "agents": {
7
+ "pi-pi/agent-expert": {
8
+ "path": ".pi/agents/pi-pi/agent-expert.md",
9
+ "sha256": "86561eb092b92fa43f221bfc6305de8d5afe10d43c5f577b9bf15a71bda051c6"
10
+ },
11
+ "pi-pi/cli-expert": {
12
+ "path": ".pi/agents/pi-pi/cli-expert.md",
13
+ "sha256": "6ecdbc37c60f1da7c68d504187454c95197dd19bd7091d97d136c6fb9e866838"
14
+ },
15
+ "pi-pi/config-expert": {
16
+ "path": ".pi/agents/pi-pi/config-expert.md",
17
+ "sha256": "c23b9b70a4c326cca2d139f18de48f8db2e0e28fa5ceb8205600f25430822a40"
18
+ },
19
+ "pi-pi/ext-expert": {
20
+ "path": ".pi/agents/pi-pi/ext-expert.md",
21
+ "sha256": "47c64a9e2cffe00a50cac5541b0edc89fe8bcbd66ec3bb302eecc10d405a977f"
22
+ },
23
+ "pi-pi/keybinding-expert": {
24
+ "path": ".pi/agents/pi-pi/keybinding-expert.md",
25
+ "sha256": "bb83f5fd2178075c8374ef28c360339f7de9faeedf811670b43a536c5f65c58e"
26
+ },
27
+ "pi-pi/pi-orchestrator": {
28
+ "path": ".pi/agents/pi-pi/pi-orchestrator.md",
29
+ "sha256": "1323fc262112030320bdd7d7866fd69e1b861377bb30fd3553a5e3a4398f9090"
30
+ },
31
+ "pi-pi/prompt-expert": {
32
+ "path": ".pi/agents/pi-pi/prompt-expert.md",
33
+ "sha256": "0d9f5e3e8ab162149cac8f08e159a7b2150a11f53ebe7021e72f8277fa004d0c"
34
+ },
35
+ "pi-pi/skill-expert": {
36
+ "path": ".pi/agents/pi-pi/skill-expert.md",
37
+ "sha256": "91732a2479097a2645b6af56171d5cdc1ed67a0896ca54bc4badba250341698f"
38
+ },
39
+ "pi-pi/theme-expert": {
40
+ "path": ".pi/agents/pi-pi/theme-expert.md",
41
+ "sha256": "3d256049203356d87c36eac2232c0ad6261fec8af02eb15d3144b18a400ed64d"
42
+ },
43
+ "pi-pi/tui-expert": {
44
+ "path": ".pi/agents/pi-pi/tui-expert.md",
45
+ "sha256": "a619b2ee3d3d94fe599abb61db0904f90d30335ec426851c3f1efdf2e5ce5390"
46
+ },
47
+ "harness/adversary": {
48
+ "path": ".pi/agents/harness/adversary.md",
49
+ "sha256": "b965f90610ca942d08b656f1aee839266d08a92beb174b8761dd5e840694a899"
50
+ },
51
+ "harness/evaluator": {
52
+ "path": ".pi/agents/harness/evaluator.md",
53
+ "sha256": "6c0de777a10de26dba4a6feb5641495fa5c2d31072a8b0e597a5ecc9921f129f"
54
+ },
55
+ "harness/executor": {
56
+ "path": ".pi/agents/harness/executor.md",
57
+ "sha256": "5af3ec2be4d64a738834e36d480a36c2bee4359e8cd5a2e1aac49be4cff79589"
58
+ },
59
+ "harness/incident-recorder": {
60
+ "path": ".pi/agents/harness/incident-recorder.md",
61
+ "sha256": "2de405f77b62dde38f331665bff220a3ef131c3c1cd42eebee364000fc83352b"
62
+ },
63
+ "harness/meta-optimizer": {
64
+ "path": ".pi/agents/harness/meta-optimizer.md",
65
+ "sha256": "ef2fb950e18e3a6439e91a68f764fc7ec922cd2d6b35de8f656f376854974d04"
66
+ },
67
+ "harness/planner": {
68
+ "path": ".pi/agents/harness/planner.md",
69
+ "sha256": "eb0459a1fcb018e4ca8d4339141e294828fa7014879d9a64258ae01abc13d3ad"
70
+ },
71
+ "harness/sentrux-bootstrap": {
72
+ "path": ".pi/agents/harness/sentrux-bootstrap.md",
73
+ "sha256": "3a0b43b94386a7c541b8a806a37524a5e53f1c8049270db7a420680df5799eeb"
74
+ },
75
+ "harness/tie-breaker": {
76
+ "path": ".pi/agents/harness/tie-breaker.md",
77
+ "sha256": "651f50b9e2c7903c542700e94908b1fcd026ebed12aa1f1d6ec481df3567e34f"
78
+ },
79
+ "harness/trace-librarian": {
80
+ "path": ".pi/agents/harness/trace-librarian.md",
81
+ "sha256": "d63fe08a2ea0466c0fd89fff4da03ac1d9d3580c306381cee251c89d4e8fdb97"
82
+ }
83
+ }
80
84
  }
@@ -9,7 +9,7 @@ Manual harness steps required copying `run_id` and `plan-packet.json` paths betw
9
9
 
10
10
  ## Decision
11
11
 
12
- 1. Add `.pi/lib/harness-run-context.ts` and `harness-run-context.ts` extension as the single source of truth for active runs.
12
+ 1. Add `.pi/lib/harness-run-context.ts` and `harness-run-context.ts` extension as the single source of truth for active runs. **Harness command routing:** `pi.on("input")` appends `harness-turn` for raw `/harness-*` (before template expansion); `before_agent_start` bootstraps from that entry, not expanded prompt headers.
13
13
  2. Persist mirrors:
14
14
  - `.pi/harness/runs/<run_id>/run-context.json`
15
15
  - `.pi/harness/active-run.json` (cross-session pointer for forked eval)
@@ -17,8 +17,11 @@ Manual harness steps required copying `run_id` and `plan-packet.json` paths betw
17
17
  4. **Hook order:** `harness-run-context` `before_agent_start` allocates/reuses `run_id` before `trace-recorder` `agent_start`. Trace writes phase files `trace-<phase>.json` plus rollup `trace.json`.
18
18
  5. PostHog `harness_run_started` at most once per logical `run_id`.
19
19
  6. Short commands: `/harness-run`, `/harness-eval`, etc. without args; recovery via `/harness-run-status`, `/harness-use-run`.
20
- 7. Review isolation unchanged: after execute, handoff says **new Pi session `/harness-eval`**; project `active-run.json` binds forked sessions.
21
- 8. `hasApprovedPlanSignal` uses user-visible prompt only; execute requires `plan_ready` from disk validation.
20
+ 7. After execute, handoff recommends **`/harness-eval`** in the same session; review commands spawn isolated subagents (see ADR 0032). `active-run.json` still supports cross-session recovery when Pi was closed mid-run.
21
+ 8. `hasApprovedPlanSignal` uses user-visible prompt only; execute requires `plan_ready` from disk validation **and** recorded `ask_user` approval (or `harness-plan-approval` entry).
22
+ 9. **Plan-phase writes:** policy-gate allows `write`/`edit` only on canonical `.pi/harness/runs/<run_id>/plan-packet.json` after approval; all other paths stay blocked until execute phase.
23
+ 10. **Approval-before-persist:** agents present the full plan, call `ask_user` (Approve / Request changes / Cancel), then write the packet. `--quick` narrows planning only — it does not skip approval.
24
+ 11. **`/harness-auto`:** after an approved plan-packet write, policy phase promotes to `execute` in the same agent turn so implementation can proceed without a separate `/harness-run` message.
22
25
 
23
26
  ## Consequences
24
27
 
@@ -0,0 +1,37 @@
1
+ # ADR 0032: Harness slash commands as agent orchestrators
2
+
3
+ - **Status:** Accepted
4
+ - **Date:** 2026-05-17
5
+
6
+ ## Context
7
+
8
+ Harness slash prompts duplicated logic already defined in `harness/*` agents. Commands did not invoke the `Agent` tool. Review docs told users to fork a new Pi session even though subagents already provide isolated context.
9
+
10
+ ## Decision
11
+
12
+ 1. **Slash commands** (prompt templates) are orchestrators: spawn `harness/*` agents once, perform policy-gated writes, emit handoff blocks. Command identity is captured on Pi **`input`** as `harness-turn` (raw `/harness-*`), not from expanded prompt markdown.
13
+ 2. **Agents** perform multi-turn reads and emit structured JSON drafts. **Planner** runs clarification and plan approval via `ask_user` (parent UI bridge); planner does not write `plan-packet.json`.
14
+ 3. **HarnessSpawnContext** is injected in `[HarnessRunContext]`; orchestrator copies it into spawn prompts. Subagents do not receive `[HarnessActivePlan]` injection.
15
+ 4. **Review isolation** uses `Agent` spawn with `inherit_context: false`. `review-integrity` allows `Agent` / `get_subagent_result` for evaluator/adversary/tie-breaker.
16
+ 5. **Subagent policy** blocks mutating tools for read-only phase agents; `ask_user` allowed for planner/evaluator/adversary/tie-breaker only.
17
+ 6. **Parent** does not duplicate planner `ask_user` or re-spawn for clarification. `get_subagent_result` syncs `harness-plan-approval` from subagent sessions.
18
+
19
+ ## Consequences
20
+
21
+ ### Positive
22
+
23
+ - Single source of truth for phase logic in agent files; prompts stay thin.
24
+ - L4 review isolation without manual session management.
25
+
26
+ ### Negative
27
+
28
+ - Orchestrator must parse subagent JSON reliably and pass complete spawn context.
29
+ - Scope enforcement remains prompt-driven for executor until optional path allowlist.
30
+
31
+ ## References
32
+
33
+ - `.pi/prompts/harness-*.md`
34
+ - `.pi/agents/harness/*.md`
35
+ - `.pi/extensions/lib/harness-subagents/harness-subagent-policy.ts`
36
+ - `.pi/extensions/review-integrity.ts`
37
+ - `.pi/lib/harness-agent-output.ts`
@@ -17,6 +17,7 @@ Team-shared ADRs for the ultimate-pi harness live under `.pi/harness/docs/adrs/`
17
17
  | [0009](0009-sentrux-rules-lifecycle.md) | Sentrux rules.toml lifecycle | Accepted |
18
18
  | [0030](0030-inhouse-vcc-compaction.md) | In-house VCC compaction (vendored pi-vcc) | Accepted |
19
19
  | [0031](0031-harness-run-context.md) | Harness active run context | Accepted |
20
+ | [0032](0032-harness-command-orchestration.md) | Harness commands as agent orchestrators | Accepted |
20
21
 
21
22
  ## Template
22
23
 
@@ -54,7 +54,9 @@
54
54
  "enum": [
55
55
  "max_rounds_reached",
56
56
  "round_token_cap_exceeded",
57
- "debate_global_cap_exceeded"
57
+ "debate_global_cap_exceeded",
58
+ "phase_cap_exceeded",
59
+ "global_cap_exceeded"
58
60
  ]
59
61
  },
60
62
  "caps": {