ultimate-pi 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/.agents/skills/harness-decisions/SKILL.md +20 -1
  2. package/.agents/skills/harness-eval/SKILL.md +11 -13
  3. package/.agents/skills/harness-orchestration/SKILL.md +36 -30
  4. package/.agents/skills/harness-plan/SKILL.md +13 -18
  5. package/.pi/PACKAGING.md +1 -1
  6. package/.pi/agents/harness/adversary.md +20 -12
  7. package/.pi/agents/harness/evaluator.md +25 -14
  8. package/.pi/agents/harness/executor.md +27 -16
  9. package/.pi/agents/harness/incident-recorder.md +37 -0
  10. package/.pi/agents/harness/meta-optimizer.md +18 -15
  11. package/.pi/agents/harness/planner.md +27 -30
  12. package/.pi/agents/harness/tie-breaker.md +4 -2
  13. package/.pi/agents/harness/trace-librarian.md +18 -11
  14. package/.pi/agents/pi-pi/ext-expert.md +1 -1
  15. package/.pi/agents/pi-pi/keybinding-expert.md +1 -1
  16. package/.pi/agents/pi-pi/tui-expert.md +3 -3
  17. package/.pi/extensions/00-ultimate-pi-system-prompt.ts +2 -2
  18. package/.pi/extensions/budget-guard.ts +1 -1
  19. package/.pi/extensions/custom-footer.ts +8 -3
  20. package/.pi/extensions/custom-header.ts +2 -2
  21. package/.pi/extensions/debate-orchestrator.ts +1 -1
  22. package/.pi/extensions/dotenv-loader.ts +1 -1
  23. package/.pi/extensions/drift-monitor.ts +1 -1
  24. package/.pi/extensions/harness-ask-user.ts +1 -1
  25. package/.pi/extensions/harness-live-widget.ts +1 -1
  26. package/.pi/extensions/harness-run-context.ts +52 -10
  27. package/.pi/extensions/harness-telemetry.ts +1 -1
  28. package/.pi/extensions/harness-web-guard.ts +1 -1
  29. package/.pi/extensions/harness-web-tools.ts +1 -1
  30. package/.pi/extensions/lib/ask-user/dialog.ts +2 -2
  31. package/.pi/extensions/lib/ask-user/fallback.ts +1 -1
  32. package/.pi/extensions/lib/ask-user/render.ts +3 -3
  33. package/.pi/extensions/lib/harness-subagents/agent-loader.ts +1 -1
  34. package/.pi/extensions/lib/harness-subagents/agent-parser.ts +1 -1
  35. package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +1 -1
  36. package/.pi/extensions/lib/harness-subagents/harness-subagent-policy.ts +134 -0
  37. package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +2 -2
  38. package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +9 -5
  39. package/.pi/extensions/lib/harness-subagents/vendored/context.ts +1 -1
  40. package/.pi/extensions/lib/harness-subagents/vendored/env.ts +1 -1
  41. package/.pi/extensions/lib/harness-subagents/vendored/index.ts +2 -2
  42. package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +1 -1
  43. package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +1 -1
  44. package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +1 -1
  45. package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +1 -1
  46. package/.pi/extensions/lib/harness-subagents/vendored/types.ts +2 -2
  47. package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +1 -1
  48. package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +2 -2
  49. package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +1 -1
  50. package/.pi/extensions/observation-bus.ts +1 -1
  51. package/.pi/extensions/pi-model-router-harness.ts +1 -1
  52. package/.pi/extensions/policy-gate.ts +86 -16
  53. package/.pi/extensions/provider-payload-sanitize.ts +1 -1
  54. package/.pi/extensions/review-integrity.ts +76 -22
  55. package/.pi/extensions/sentrux-rules-sync.ts +1 -1
  56. package/.pi/extensions/soundboard.ts +1 -1
  57. package/.pi/extensions/test-diff-integrity.ts +1 -1
  58. package/.pi/extensions/trace-recorder.ts +1 -1
  59. package/.pi/extensions/ultimate-pi-vcc.ts +1 -1
  60. package/.pi/harness/agents.manifest.json +16 -12
  61. package/.pi/harness/docs/adrs/0031-harness-run-context.md +5 -2
  62. package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +37 -0
  63. package/.pi/harness/docs/adrs/README.md +1 -0
  64. package/.pi/harness/specs/harness-spawn-context.schema.json +65 -0
  65. package/.pi/lib/harness-agent-output.ts +41 -0
  66. package/.pi/lib/harness-run-context.ts +352 -7
  67. package/.pi/lib/harness-ui-state.ts +1 -1
  68. package/.pi/prompts/harness-auto.md +36 -61
  69. package/.pi/prompts/harness-critic.md +15 -28
  70. package/.pi/prompts/harness-eval.md +19 -27
  71. package/.pi/prompts/harness-incident.md +15 -34
  72. package/.pi/prompts/harness-plan.md +31 -50
  73. package/.pi/prompts/harness-review.md +16 -30
  74. package/.pi/prompts/harness-router-tune.md +16 -38
  75. package/.pi/prompts/harness-run.md +21 -38
  76. package/.pi/prompts/harness-setup.md +2 -0
  77. package/.pi/prompts/harness-trace.md +13 -30
  78. package/.pi/scripts/harness-generate-model-router.mjs +16 -13
  79. package/.pi/scripts/harness-verify.mjs +16 -0
  80. package/.pi/scripts/vendor-sync-pi-model-router.sh +10 -10
  81. package/CHANGELOG.md +19 -1
  82. package/README.md +4 -5
  83. package/THIRD_PARTY_NOTICES.md +1 -1
  84. package/package.json +13 -8
  85. package/vendor/pi-model-router/UPSTREAM_PIN.md +1 -1
  86. package/vendor/pi-model-router/extensions/commands.ts +2 -2
  87. package/vendor/pi-model-router/extensions/config.ts +2 -2
  88. package/vendor/pi-model-router/extensions/index.ts +1 -1
  89. package/vendor/pi-model-router/extensions/provider.ts +2 -2
  90. package/vendor/pi-model-router/extensions/routing.ts +2 -2
  91. package/vendor/pi-model-router/extensions/types.ts +1 -1
  92. package/vendor/pi-model-router/extensions/ui.ts +1 -1
  93. package/vendor/pi-model-router/package.json +4 -4
  94. package/vendor/pi-vcc/index.ts +1 -1
  95. package/vendor/pi-vcc/package.json +1 -1
  96. package/vendor/pi-vcc/src/commands/pi-vcc.ts +1 -1
  97. package/vendor/pi-vcc/src/commands/vcc-recall.ts +1 -1
  98. package/vendor/pi-vcc/src/core/content.ts +1 -1
  99. package/vendor/pi-vcc/src/core/load-messages.ts +1 -1
  100. package/vendor/pi-vcc/src/core/normalize.ts +1 -1
  101. package/vendor/pi-vcc/src/core/render-entries.ts +1 -1
  102. package/vendor/pi-vcc/src/core/report.ts +1 -1
  103. package/vendor/pi-vcc/src/core/search-entries.ts +1 -1
  104. package/vendor/pi-vcc/src/core/summarize.ts +1 -1
  105. package/vendor/pi-vcc/src/hooks/before-compact.ts +2 -2
  106. package/vendor/pi-vcc/src/tools/recall.ts +1 -1
  107. package/vendor/pi-vcc/src/types.ts +1 -1
  108. package/vendor/pi-vcc/tests/fixtures.ts +1 -1
  109. package/vendor/pi-vcc/tests/render-entries.test.ts +1 -1
  110. package/vendor/pi-vcc/tests/search-entries.test.ts +1 -1
  111. package/vendor/pi-vcc/tests/support/load-session.ts +2 -2
@@ -5,7 +5,7 @@
5
5
  * Uses the callback form of setWidget for themed rendering.
6
6
  */
7
7
 
8
- import { truncateToWidth } from "@mariozechner/pi-tui";
8
+ import { truncateToWidth } from "@earendil-works/pi-tui";
9
9
  import type { AgentManager } from "../agent-manager.js";
10
10
  import { getConfig } from "../agent-types.js";
11
11
  import type { AgentInvocation, SubagentType } from "../types.js";
@@ -5,7 +5,7 @@
5
5
  * Subscribes to session events for real-time streaming updates.
6
6
  */
7
7
 
8
- import type { AgentSession } from "@mariozechner/pi-coding-agent";
8
+ import type { AgentSession } from "@earendil-works/pi-coding-agent";
9
9
  import {
10
10
  type Component,
11
11
  matchesKey,
@@ -13,7 +13,7 @@ import {
13
13
  truncateToWidth,
14
14
  visibleWidth,
15
15
  wrapTextWithAnsi,
16
- } from "@mariozechner/pi-tui";
16
+ } from "@earendil-works/pi-tui";
17
17
  import { extractText } from "../context.js";
18
18
  import type { AgentRecord } from "../types.js";
19
19
  import { getLifetimeTotal, getSessionContextPercent } from "../usage.js";
@@ -8,7 +8,7 @@
8
8
  * if real demand emerges.
9
9
  */
10
10
 
11
- import type { ExtensionCommandContext } from "@mariozechner/pi-coding-agent";
11
+ import type { ExtensionCommandContext } from "@earendil-works/pi-coding-agent";
12
12
  import type { SubagentScheduler } from "../schedule.js";
13
13
  import type { ScheduledSubagent } from "../types.js";
14
14
 
@@ -6,7 +6,7 @@
6
6
  */
7
7
 
8
8
  import { randomUUID } from "node:crypto";
9
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
9
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
10
10
  import { getRunIdFromSession } from "../lib/harness-run-context.js";
11
11
 
12
12
  type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
@@ -6,7 +6,7 @@
6
6
 
7
7
  import { existsSync, readFileSync } from "node:fs";
8
8
  import { join } from "node:path";
9
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
9
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
10
10
  import vendorModelRouter from "../../vendor/pi-model-router/extensions/index.js";
11
11
 
12
12
  function isHarnessRouterReady(cwd: string): boolean {
@@ -8,17 +8,24 @@
8
8
  * - command surface via pi.registerCommand()
9
9
  */
10
10
 
11
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
11
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
12
12
  import {
13
+ extractWritePathFromToolInput,
13
14
  getLatestRunContext,
14
15
  getPolicyTransitionBlock,
15
16
  hasApprovedPlanSignalFromUserPrompt,
16
17
  hasHarnessAbortSignal,
17
18
  inferHarnessPhaseFromPrompt,
19
+ isHarnessAutoSession,
18
20
  isHarnessBootstrapPrompt,
21
+ isPlanPhaseAllowedMutation,
22
+ isPlanPhaseScopedWrite,
23
+ normalizeHarnessPath,
24
+ readPlanPacketFromPath,
19
25
  saveProjectActiveRun,
20
26
  saveRunContextToDisk,
21
27
  userVisiblePromptSlice,
28
+ validatePlanPacket,
22
29
  } from "../lib/harness-run-context.js";
23
30
 
24
31
  type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
@@ -134,6 +141,11 @@ function getLatestPolicyStateFull(ctx: {
134
141
  export default function policyGate(pi: ExtensionAPI) {
135
142
  let state = defaultState();
136
143
 
144
+ const appendPolicyState = (next: PolicyState): void => {
145
+ state = next;
146
+ pi.appendEntry("harness-policy-state", state);
147
+ };
148
+
137
149
  pi.on("session_start", async (_event, ctx) => {
138
150
  state = getLatestPolicyStateFull(ctx);
139
151
  });
@@ -141,6 +153,7 @@ export default function policyGate(pi: ExtensionAPI) {
141
153
  pi.on("before_agent_start", async (event, ctx) => {
142
154
  const userPrompt = userVisiblePromptSlice(event.prompt);
143
155
  const entries = ctx.sessionManager.getEntries();
156
+ state = getLatestPolicyStateFull(ctx);
144
157
  const bootstrapPrompt = isHarnessBootstrapPrompt(userPrompt);
145
158
  const abortSignal = hasHarnessAbortSignal(userPrompt);
146
159
 
@@ -228,26 +241,41 @@ export default function policyGate(pi: ExtensionAPI) {
228
241
  state.updatedAt = nowIso();
229
242
  pi.appendEntry("harness-policy-state", state);
230
243
 
244
+ const planPhaseHint =
245
+ state.phase === "plan"
246
+ ? "\nPlan phase: present the full PlanPacket in chat, call ask_user (Approve / Request changes / Cancel), then write only the canonical plan-packet.json after Approve."
247
+ : "";
248
+
231
249
  return {
232
- systemPrompt: `${event.systemPrompt}\n\n[PolicyGate]\nPhase=${state.phase}; ApprovedPlan=${state.approvedPlan}; PlanId=${state.planId ?? "none"}; Aborted=${state.aborted}.`,
250
+ systemPrompt: `${event.systemPrompt}\n\n[PolicyGate]\nPhase=${state.phase}; ApprovedPlan=${state.approvedPlan}; PlanId=${state.planId ?? "none"}; Aborted=${state.aborted}.${planPhaseHint}`,
233
251
  };
234
252
  });
235
253
 
236
- pi.on("tool_call", async (event) => {
237
- if (state.aborted && MUTATING_TOOLS.has(event.toolName)) {
238
- return {
239
- block: true,
240
- reason:
241
- "policy-gate: mutating tool blocked because harness-abort lock is active. Attach a new approved plan first.",
242
- };
243
- }
254
+ pi.on("tool_call", async (event, ctx) => {
255
+ state = getLatestPolicyStateFull(ctx);
256
+ const entries = ctx.sessionManager.getEntries();
257
+ const projectRoot = process.cwd();
258
+ const sessionId = ctx.sessionManager.getSessionId();
259
+ const runCtx = getLatestRunContext(entries);
260
+
244
261
  if (MUTATING_TOOLS.has(event.toolName)) {
245
- if (state.phase !== "execute") {
246
- return {
247
- block: true,
248
- reason: `policy-gate: ${event.toolName} blocked in phase '${state.phase}'. Allowed only in execute phase.`,
249
- };
262
+ const decision = await isPlanPhaseAllowedMutation(
263
+ event.toolName,
264
+ event.input as Record<string, unknown>,
265
+ state.phase,
266
+ runCtx,
267
+ projectRoot,
268
+ {
269
+ aborted: state.aborted,
270
+ entries,
271
+ ownerSessionId: runCtx?.owner_pi_session_id,
272
+ currentSessionId: sessionId,
273
+ },
274
+ );
275
+ if (!decision.allowed) {
276
+ return { block: true, reason: decision.reason };
250
277
  }
278
+ return undefined;
251
279
  }
252
280
 
253
281
  if (event.toolName === "bash") {
@@ -260,7 +288,7 @@ export default function policyGate(pi: ExtensionAPI) {
260
288
  "policy-gate: mutating bash command blocked because harness-abort lock is active. Attach a new approved plan first.",
261
289
  };
262
290
  }
263
- if (state.phase !== "execute") {
291
+ if (state.phase !== "execute" && state.phase !== "merge") {
264
292
  return {
265
293
  block: true,
266
294
  reason: `policy-gate: mutating bash command blocked in phase '${state.phase}'.`,
@@ -271,6 +299,48 @@ export default function policyGate(pi: ExtensionAPI) {
271
299
  return undefined;
272
300
  });
273
301
 
302
+ pi.on("tool_result", async (event, ctx) => {
303
+ if (event.isError) return;
304
+ if (event.toolName !== "write" && event.toolName !== "edit") return;
305
+
306
+ const entries = ctx.sessionManager.getEntries();
307
+ state = getLatestPolicyStateFull(ctx);
308
+ const projectRoot = process.cwd();
309
+ const runCtx = getLatestRunContext(entries);
310
+ if (!runCtx) return;
311
+
312
+ const target = extractWritePathFromToolInput(
313
+ event.input as Record<string, unknown>,
314
+ );
315
+ if (!target) return;
316
+ const scoped = await isPlanPhaseScopedWrite(target, runCtx, projectRoot);
317
+ if (!scoped) return;
318
+
319
+ const planPath = normalizeHarnessPath(target, projectRoot);
320
+ const packet = await readPlanPacketFromPath(planPath);
321
+ const validation = validatePlanPacket(packet);
322
+ if (!validation.valid || !packet?.plan_id) return;
323
+
324
+ if (isHarnessAutoSession(entries)) {
325
+ state.phase = "execute";
326
+ state.approvedPlan = true;
327
+ state.planId = packet.plan_id;
328
+ state.aborted = false;
329
+ state.abortReason = null;
330
+ state.abortedAt = null;
331
+ state.updatedAt = nowIso();
332
+ appendPolicyState(state);
333
+
334
+ runCtx.plan_ready = true;
335
+ runCtx.plan_id = packet.plan_id;
336
+ runCtx.phase = "execute";
337
+ runCtx.updated_at = nowIso();
338
+ pi.appendEntry("harness-run-context", runCtx);
339
+ void saveRunContextToDisk(runCtx);
340
+ void saveProjectActiveRun(runCtx);
341
+ }
342
+ });
343
+
274
344
  pi.registerCommand("harness-abort", {
275
345
  description: "Safely abort current harness run and reset to plan phase",
276
346
  handler: async (args, ctx) => {
@@ -9,7 +9,7 @@
9
9
  import type {
10
10
  BeforeProviderRequestEvent,
11
11
  ExtensionAPI,
12
- } from "@mariozechner/pi-coding-agent";
12
+ } from "@earendil-works/pi-coding-agent";
13
13
 
14
14
  const CHAT_MESSAGE_EXTRA_KEYS = [
15
15
  "reasoning",
@@ -1,19 +1,34 @@
1
1
  /**
2
2
  * review-integrity — enforce evaluator/adversary isolation from executor session.
3
3
  *
4
- * If review phases (`evaluate`/`adversary`) run in the same session as execution,
5
- * tool calls are blocked until the review is isolated (fork/switch session).
4
+ * Parent orchestrators spawn review agents in isolated subagent sessions.
5
+ * Direct review tools in the executor session are blocked; Agent/get_subagent_result
6
+ * for harness review agents remain allowed.
6
7
  */
7
8
 
8
9
  import { appendFile, mkdir } from "node:fs/promises";
9
10
  import { join } from "node:path";
10
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
11
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
11
12
 
12
13
  type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
13
14
 
14
15
  const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
15
16
  const INCIDENT_FILE = join(INCIDENTS_DIR, "review-integrity.jsonl");
16
17
 
18
+ const ORCHESTRATION_TOOLS = new Set([
19
+ "Agent",
20
+ "get_subagent_result",
21
+ "steer_subagent",
22
+ ]);
23
+
24
+ const REVIEW_SUBAGENT_TYPES = new Set([
25
+ "harness/evaluator",
26
+ "harness/adversary",
27
+ "harness/tie-breaker",
28
+ ]);
29
+
30
+ const EXECUTOR_SUBAGENT_TYPE = "harness/executor";
31
+
17
32
  interface IsolationState {
18
33
  executorSessionId: string | null;
19
34
  violationActive: boolean;
@@ -89,6 +104,17 @@ function restoreState(ctx: {
89
104
  };
90
105
  }
91
106
 
107
+ function subagentTypeFromInput(
108
+ input: Record<string, unknown> | undefined,
109
+ ): string {
110
+ if (!input) return "";
111
+ const direct = input.subagent_type;
112
+ if (typeof direct === "string") return direct;
113
+ const nested = input as { subagentType?: string };
114
+ if (typeof nested.subagentType === "string") return nested.subagentType;
115
+ return "";
116
+ }
117
+
92
118
  async function appendIncident(payload: Record<string, unknown>): Promise<void> {
93
119
  await mkdir(INCIDENTS_DIR, { recursive: true });
94
120
  await appendFile(
@@ -105,6 +131,10 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
105
131
  updatedAt: nowIso(),
106
132
  };
107
133
 
134
+ const persist = (): void => {
135
+ pi.appendEntry("harness-review-integrity", state);
136
+ };
137
+
108
138
  pi.on("session_start", async (_event, ctx) => {
109
139
  state = restoreState(ctx);
110
140
  });
@@ -115,7 +145,7 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
115
145
  state.executorSessionId = ctx.sessionManager.getSessionId();
116
146
  state.violationActive = false;
117
147
  state.updatedAt = nowIso();
118
- pi.appendEntry("harness-review-integrity", state);
148
+ persist();
119
149
  });
120
150
 
121
151
  pi.on("before_agent_start", async (_event, ctx) => {
@@ -125,7 +155,7 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
125
155
  if (!inReview) {
126
156
  state.violationActive = false;
127
157
  state.updatedAt = nowIso();
128
- pi.appendEntry("harness-review-integrity", state);
158
+ persist();
129
159
  return undefined;
130
160
  }
131
161
 
@@ -135,42 +165,66 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
135
165
  ) {
136
166
  state.violationActive = false;
137
167
  state.updatedAt = nowIso();
138
- pi.appendEntry("harness-review-integrity", state);
168
+ persist();
139
169
  return undefined;
140
170
  }
141
171
 
142
172
  state.violationActive = true;
143
173
  state.updatedAt = nowIso();
144
- pi.appendEntry("harness-review-integrity", state);
145
-
146
- await appendIncident({
147
- type: "review_integrity_violation",
148
- session_id: currentSessionId,
149
- phase,
150
- reason:
151
- "evaluator/adversary session is not isolated from executor session",
152
- mitigation:
153
- "fork or switch to a clean review session before running review tools",
154
- });
174
+ persist();
155
175
 
156
176
  return {
157
177
  message: {
158
- customType: "harness-review-integrity-block",
178
+ customType: "harness-review-integrity-hint",
159
179
  display: true,
160
180
  content: [
161
- "Review integrity violation: evaluator/adversary is sharing executor session context.",
162
- "Fork/switch session, then rerun review to maintain independent evaluation guarantees.",
181
+ "Review phase in executor session: spawn harness/evaluator or harness/adversary via Agent (isolated subagent context).",
182
+ "Do not run review checks directly in this session — use get_subagent_result after spawn.",
163
183
  ].join("\n"),
164
184
  },
165
185
  };
166
186
  });
167
187
 
168
- pi.on("tool_call", async (_event) => {
188
+ pi.on("tool_call", async (event, ctx) => {
189
+ if (event.toolName === "Agent") {
190
+ const subagentType = subagentTypeFromInput(
191
+ event.input as Record<string, unknown> | undefined,
192
+ );
193
+ if (subagentType === EXECUTOR_SUBAGENT_TYPE) {
194
+ state.executorSessionId = ctx.sessionManager.getSessionId();
195
+ state.violationActive = false;
196
+ state.updatedAt = nowIso();
197
+ persist();
198
+ return undefined;
199
+ }
200
+ if (REVIEW_SUBAGENT_TYPES.has(subagentType)) {
201
+ state.violationActive = false;
202
+ state.updatedAt = nowIso();
203
+ persist();
204
+ return undefined;
205
+ }
206
+ }
207
+
169
208
  if (!state.violationActive) return undefined;
209
+
210
+ if (ORCHESTRATION_TOOLS.has(event.toolName)) {
211
+ return undefined;
212
+ }
213
+
214
+ await appendIncident({
215
+ type: "review_integrity_violation",
216
+ session_id: ctx.sessionManager.getSessionId(),
217
+ tool: event.toolName,
218
+ reason:
219
+ "direct tool use in review phase while sharing executor session context",
220
+ mitigation:
221
+ "spawn harness/evaluator or harness/adversary via Agent instead",
222
+ });
223
+
170
224
  return {
171
225
  block: true,
172
226
  reason:
173
- "review-integrity: tool call blocked because review session is not isolated from executor context.",
227
+ "review-integrity: tool blocked in review phase spawn an isolated review subagent via Agent.",
174
228
  };
175
229
  });
176
230
 
@@ -3,7 +3,7 @@
3
3
  */
4
4
 
5
5
  import { spawn } from "node:child_process";
6
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
6
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
7
7
  import { resolveHarnessScript } from "./lib/harness-paths.js";
8
8
 
9
9
  function resolveSyncScript(): string {
@@ -31,7 +31,7 @@ import {
31
31
  import type {
32
32
  ExtensionAPI,
33
33
  ExtensionCommandContext,
34
- } from "@mariozechner/pi-coding-agent";
34
+ } from "@earendil-works/pi-coding-agent";
35
35
 
36
36
  // ── Constants ──────────────────────────────────────────────────────
37
37
 
@@ -12,7 +12,7 @@
12
12
 
13
13
  import { appendFile, mkdir } from "node:fs/promises";
14
14
  import { join } from "node:path";
15
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
15
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
16
16
 
17
17
  const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
18
18
  const INCIDENT_FILE = join(INCIDENTS_DIR, "test-diff-integrity.jsonl");
@@ -9,7 +9,7 @@
9
9
 
10
10
  import { appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
11
11
  import { join } from "node:path";
12
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
12
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
13
13
  import {
14
14
  getLatestRunContext,
15
15
  getRunIdFromSession,
@@ -9,7 +9,7 @@
9
9
  * - `HARNESS_VCC_DEBUG` — set `true` to write `/tmp/pi-vcc-debug.json` on compaction
10
10
  */
11
11
 
12
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
12
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
13
13
  import registerVcc from "../../vendor/pi-vcc/index.js";
14
14
 
15
15
  export default function ultimatePiVcc(pi: ExtensionAPI): void {
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "schema_version": "1.0.0",
3
3
  "package": "ultimate-pi",
4
- "package_version": "0.6.1",
5
- "generated_at": "2026-05-17T06:10:49.269Z",
4
+ "package_version": "0.7.0",
5
+ "generated_at": "2026-05-17T08:24:57.263Z",
6
6
  "agents": {
7
7
  "pi-pi/agent-expert": {
8
8
  "path": ".pi/agents/pi-pi/agent-expert.md",
@@ -18,11 +18,11 @@
18
18
  },
19
19
  "pi-pi/ext-expert": {
20
20
  "path": ".pi/agents/pi-pi/ext-expert.md",
21
- "sha256": "ae71ccf598217c830ac99e3150365a075bde931778d8bc6305d0270ac959a8a9"
21
+ "sha256": "47c64a9e2cffe00a50cac5541b0edc89fe8bcbd66ec3bb302eecc10d405a977f"
22
22
  },
23
23
  "pi-pi/keybinding-expert": {
24
24
  "path": ".pi/agents/pi-pi/keybinding-expert.md",
25
- "sha256": "d8c3d645a507b453d9b338f7a5435232a23e4a00cb7981e3b81a4ce2a0df6bba"
25
+ "sha256": "bb83f5fd2178075c8374ef28c360339f7de9faeedf811670b43a536c5f65c58e"
26
26
  },
27
27
  "pi-pi/pi-orchestrator": {
28
28
  "path": ".pi/agents/pi-pi/pi-orchestrator.md",
@@ -42,27 +42,31 @@
42
42
  },
43
43
  "pi-pi/tui-expert": {
44
44
  "path": ".pi/agents/pi-pi/tui-expert.md",
45
- "sha256": "cd6aaf6ca28e6e65a72ef1dc3c96e3315669ec0f859c148f0f15d0ee74a263d7"
45
+ "sha256": "a619b2ee3d3d94fe599abb61db0904f90d30335ec426851c3f1efdf2e5ce5390"
46
46
  },
47
47
  "harness/adversary": {
48
48
  "path": ".pi/agents/harness/adversary.md",
49
- "sha256": "17c29621d99dd46cf6ea6566845b502c5e825338b61f073a28cb53f297b2f208"
49
+ "sha256": "b965f90610ca942d08b656f1aee839266d08a92beb174b8761dd5e840694a899"
50
50
  },
51
51
  "harness/evaluator": {
52
52
  "path": ".pi/agents/harness/evaluator.md",
53
- "sha256": "71cfb070ec3bbec9d764052ba03fee8692f2db385ad19f15c2f3a157422c1c2b"
53
+ "sha256": "6c0de777a10de26dba4a6feb5641495fa5c2d31072a8b0e597a5ecc9921f129f"
54
54
  },
55
55
  "harness/executor": {
56
56
  "path": ".pi/agents/harness/executor.md",
57
- "sha256": "02cd2f438d97abb9f35ca6cdab51b3733fc6a9076427cf9c2c93fd4593052539"
57
+ "sha256": "5af3ec2be4d64a738834e36d480a36c2bee4359e8cd5a2e1aac49be4cff79589"
58
+ },
59
+ "harness/incident-recorder": {
60
+ "path": ".pi/agents/harness/incident-recorder.md",
61
+ "sha256": "2de405f77b62dde38f331665bff220a3ef131c3c1cd42eebee364000fc83352b"
58
62
  },
59
63
  "harness/meta-optimizer": {
60
64
  "path": ".pi/agents/harness/meta-optimizer.md",
61
- "sha256": "faf81af85d273796f059395287058d2f16fe70903c70b8c6ac9b3c5f12fca69c"
65
+ "sha256": "ef2fb950e18e3a6439e91a68f764fc7ec922cd2d6b35de8f656f376854974d04"
62
66
  },
63
67
  "harness/planner": {
64
68
  "path": ".pi/agents/harness/planner.md",
65
- "sha256": "58d9c983bddddd37070f33742409f16474bf7c54bdccb8808fca4926f4adfdad"
69
+ "sha256": "3052e0b4ca504c7aa025b1926228e34adfe7f1c42d66a24db43fc6a1abb9968d"
66
70
  },
67
71
  "harness/sentrux-bootstrap": {
68
72
  "path": ".pi/agents/harness/sentrux-bootstrap.md",
@@ -70,11 +74,11 @@
70
74
  },
71
75
  "harness/tie-breaker": {
72
76
  "path": ".pi/agents/harness/tie-breaker.md",
73
- "sha256": "c8fd02b3423760d9503e20ea27d3a90aa11326fc38c98f1e7253e67676b15e42"
77
+ "sha256": "651f50b9e2c7903c542700e94908b1fcd026ebed12aa1f1d6ec481df3567e34f"
74
78
  },
75
79
  "harness/trace-librarian": {
76
80
  "path": ".pi/agents/harness/trace-librarian.md",
77
- "sha256": "f44439758317b5fbae66b760bb6a1b6df19787d344bf7038712368e8fe0594ee"
81
+ "sha256": "d63fe08a2ea0466c0fd89fff4da03ac1d9d3580c306381cee251c89d4e8fdb97"
78
82
  }
79
83
  }
80
84
  }
@@ -17,8 +17,11 @@ Manual harness steps required copying `run_id` and `plan-packet.json` paths betw
17
17
  4. **Hook order:** `harness-run-context` `before_agent_start` allocates/reuses `run_id` before `trace-recorder` `agent_start`. Trace writes phase files `trace-<phase>.json` plus rollup `trace.json`.
18
18
  5. PostHog `harness_run_started` at most once per logical `run_id`.
19
19
  6. Short commands: `/harness-run`, `/harness-eval`, etc. without args; recovery via `/harness-run-status`, `/harness-use-run`.
20
- 7. Review isolation unchanged: after execute, handoff says **new Pi session `/harness-eval`**; project `active-run.json` binds forked sessions.
21
- 8. `hasApprovedPlanSignal` uses user-visible prompt only; execute requires `plan_ready` from disk validation.
20
+ 7. After execute, handoff recommends **`/harness-eval`** in the same session; review commands spawn isolated subagents (see ADR 0032). `active-run.json` still supports cross-session recovery when Pi was closed mid-run.
21
+ 8. `hasApprovedPlanSignal` uses user-visible prompt only; execute requires `plan_ready` from disk validation **and** recorded `ask_user` approval (or `harness-plan-approval` entry).
22
+ 9. **Plan-phase writes:** policy-gate allows `write`/`edit` only on canonical `.pi/harness/runs/<run_id>/plan-packet.json` after approval; all other paths stay blocked until execute phase.
23
+ 10. **Approval-before-persist:** agents present the full plan, call `ask_user` (Approve / Request changes / Cancel), then write the packet. `--quick` narrows planning only — it does not skip approval.
24
+ 11. **`/harness-auto`:** after an approved plan-packet write, policy phase promotes to `execute` in the same agent turn so implementation can proceed without a separate `/harness-run` message.
22
25
 
23
26
  ## Consequences
24
27
 
@@ -0,0 +1,37 @@
1
+ # ADR 0032: Harness slash commands as agent orchestrators
2
+
3
+ - **Status:** Accepted
4
+ - **Date:** 2026-05-17
5
+
6
+ ## Context
7
+
8
+ Harness slash prompts duplicated logic already defined in `harness/*` agents. Commands did not invoke the `Agent` tool. Review docs told users to fork a new Pi session even though subagents already provide isolated context.
9
+
10
+ ## Decision
11
+
12
+ 1. **Slash commands** parse args, spawn the matching `harness/*` agent, run all `ask_user` gates, perform policy-gated writes, and emit handoff blocks.
13
+ 2. **Agents** perform multi-turn reads and emit structured JSON drafts; they do not approve plans or write canonical run artifacts (except executor mutations in scope).
14
+ 3. **HarnessSpawnContext** JSON (`.pi/harness/specs/harness-spawn-context.schema.json`) is required in every spawn prompt because subagents do not receive `[HarnessActivePlan]` injection.
15
+ 4. **Review isolation** uses `Agent` spawn with `inherit_context: false`, not session fork. `review-integrity` allows `Agent` / `get_subagent_result` for `harness/evaluator`, `harness/adversary`, and `harness/tie-breaker`.
16
+ 5. **Subagent policy** (`harness-subagent-policy.ts`) blocks mutating tools for planner/evaluator/adversary and related read-only agents; executor keeps write tools and `extensions: true`.
17
+ 6. **Planner** has `disallowed_tools: ask_user`; clarification options return in JSON for the parent orchestrator.
18
+
19
+ ## Consequences
20
+
21
+ ### Positive
22
+
23
+ - Single source of truth for phase logic in agent files; prompts stay thin.
24
+ - L4 review isolation without manual session management.
25
+
26
+ ### Negative
27
+
28
+ - Orchestrator must parse subagent JSON reliably and pass complete spawn context.
29
+ - Scope enforcement remains prompt-driven for executor until optional path allowlist.
30
+
31
+ ## References
32
+
33
+ - `.pi/prompts/harness-*.md`
34
+ - `.pi/agents/harness/*.md`
35
+ - `.pi/extensions/lib/harness-subagents/harness-subagent-policy.ts`
36
+ - `.pi/extensions/review-integrity.ts`
37
+ - `.pi/lib/harness-agent-output.ts`
@@ -17,6 +17,7 @@ Team-shared ADRs for the ultimate-pi harness live under `.pi/harness/docs/adrs/`
17
17
  | [0009](0009-sentrux-rules-lifecycle.md) | Sentrux rules.toml lifecycle | Accepted |
18
18
  | [0030](0030-inhouse-vcc-compaction.md) | In-house VCC compaction (vendored pi-vcc) | Accepted |
19
19
  | [0031](0031-harness-run-context.md) | Harness active run context | Accepted |
20
+ | [0032](0032-harness-command-orchestration.md) | Harness commands as agent orchestrators | Accepted |
20
21
 
21
22
  ## Template
22
23
 
@@ -0,0 +1,65 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://ultimate-pi.local/.pi/harness/specs/harness-spawn-context.schema.json",
4
+ "title": "HarnessSpawnContext",
5
+ "description": "Structured context passed from harness slash-command orchestrators to harness/* subagents.",
6
+ "type": "object",
7
+ "additionalProperties": false,
8
+ "required": ["schema_version", "agent", "mode"],
9
+ "properties": {
10
+ "schema_version": {
11
+ "type": "string",
12
+ "const": "1.0.0"
13
+ },
14
+ "agent": {
15
+ "type": "string",
16
+ "minLength": 1,
17
+ "description": "Target subagent id, e.g. harness/planner"
18
+ },
19
+ "mode": {
20
+ "type": "string",
21
+ "enum": [
22
+ "create",
23
+ "revise",
24
+ "execute",
25
+ "benchmark",
26
+ "verdict",
27
+ "adversary",
28
+ "trace",
29
+ "incident",
30
+ "tune"
31
+ ]
32
+ },
33
+ "run_id": {
34
+ "type": "string"
35
+ },
36
+ "plan_packet_path": {
37
+ "type": "string"
38
+ },
39
+ "run_dir": {
40
+ "type": "string"
41
+ },
42
+ "task_summary": {
43
+ "type": "string"
44
+ },
45
+ "risk_level": {
46
+ "type": "string",
47
+ "enum": ["low", "med", "high"]
48
+ },
49
+ "quick": {
50
+ "type": "boolean"
51
+ },
52
+ "acceptance_checks": {
53
+ "type": "array",
54
+ "items": { "type": "string" }
55
+ },
56
+ "artifact_paths": {
57
+ "type": "object",
58
+ "additionalProperties": { "type": "string" }
59
+ },
60
+ "handoff_summary": {
61
+ "type": "string",
62
+ "description": "Prior phase bullet summary for chained spawns (harness-auto)"
63
+ }
64
+ }
65
+ }