ultimate-pi 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/.agents/skills/harness-decisions/SKILL.md +20 -1
  2. package/.agents/skills/harness-eval/SKILL.md +11 -13
  3. package/.agents/skills/harness-orchestration/SKILL.md +36 -30
  4. package/.agents/skills/harness-plan/SKILL.md +13 -14
  5. package/.agents/skills/harness-sentrux-setup/SKILL.md +3 -4
  6. package/.pi/PACKAGING.md +1 -1
  7. package/.pi/agents/harness/adversary.md +20 -12
  8. package/.pi/agents/harness/evaluator.md +25 -14
  9. package/.pi/agents/harness/executor.md +27 -16
  10. package/.pi/agents/harness/incident-recorder.md +37 -0
  11. package/.pi/agents/harness/meta-optimizer.md +18 -15
  12. package/.pi/agents/harness/planner.md +27 -30
  13. package/.pi/agents/harness/tie-breaker.md +4 -2
  14. package/.pi/agents/harness/trace-librarian.md +18 -11
  15. package/.pi/agents/pi-pi/ext-expert.md +1 -1
  16. package/.pi/agents/pi-pi/keybinding-expert.md +1 -1
  17. package/.pi/agents/pi-pi/tui-expert.md +3 -3
  18. package/.pi/extensions/00-ultimate-pi-system-prompt.ts +194 -0
  19. package/.pi/extensions/budget-guard.ts +11 -3
  20. package/.pi/extensions/custom-footer.ts +8 -3
  21. package/.pi/extensions/custom-header.ts +2 -2
  22. package/.pi/extensions/debate-orchestrator.ts +11 -3
  23. package/.pi/extensions/dotenv-loader.ts +1 -1
  24. package/.pi/extensions/drift-monitor.ts +1 -1
  25. package/.pi/extensions/harness-ask-user.ts +1 -1
  26. package/.pi/extensions/harness-live-widget.ts +11 -4
  27. package/.pi/extensions/harness-run-context.ts +745 -0
  28. package/.pi/extensions/harness-telemetry.ts +1 -1
  29. package/.pi/extensions/harness-web-guard.ts +1 -1
  30. package/.pi/extensions/harness-web-tools.ts +1 -1
  31. package/.pi/extensions/lib/ask-user/dialog.ts +2 -2
  32. package/.pi/extensions/lib/ask-user/fallback.ts +1 -1
  33. package/.pi/extensions/lib/ask-user/render.ts +3 -3
  34. package/.pi/extensions/lib/harness-subagents/agent-loader.ts +1 -1
  35. package/.pi/extensions/lib/harness-subagents/agent-parser.ts +1 -1
  36. package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +1 -1
  37. package/.pi/extensions/lib/harness-subagents/harness-subagent-policy.ts +134 -0
  38. package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +2 -2
  39. package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +9 -5
  40. package/.pi/extensions/lib/harness-subagents/vendored/context.ts +1 -1
  41. package/.pi/extensions/lib/harness-subagents/vendored/env.ts +1 -1
  42. package/.pi/extensions/lib/harness-subagents/vendored/index.ts +2 -2
  43. package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +1 -1
  44. package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +1 -1
  45. package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +1 -1
  46. package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +1 -1
  47. package/.pi/extensions/lib/harness-subagents/vendored/types.ts +2 -2
  48. package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +1 -1
  49. package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +2 -2
  50. package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +1 -1
  51. package/.pi/extensions/observation-bus.ts +8 -10
  52. package/.pi/extensions/pi-model-router-harness.ts +1 -1
  53. package/.pi/extensions/policy-gate.ts +136 -84
  54. package/.pi/extensions/provider-payload-sanitize.ts +1 -1
  55. package/.pi/extensions/review-integrity.ts +76 -22
  56. package/.pi/extensions/sentrux-rules-sync.ts +1 -1
  57. package/.pi/extensions/soundboard.ts +1 -1
  58. package/.pi/extensions/test-diff-integrity.ts +1 -1
  59. package/.pi/extensions/trace-recorder.ts +81 -21
  60. package/.pi/extensions/ultimate-pi-vcc.ts +1 -1
  61. package/.pi/harness/README.md +2 -0
  62. package/.pi/harness/agents.manifest.json +17 -13
  63. package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +1 -1
  64. package/.pi/harness/docs/adrs/0031-harness-run-context.md +41 -0
  65. package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +37 -0
  66. package/.pi/harness/docs/adrs/README.md +2 -0
  67. package/.pi/harness/evals/smoke/run-context.fixture.json +17 -0
  68. package/.pi/harness/specs/harness-run-context.schema.json +80 -0
  69. package/.pi/harness/specs/harness-spawn-context.schema.json +65 -0
  70. package/.pi/lib/harness-agent-output.ts +41 -0
  71. package/.pi/lib/harness-run-context.ts +1139 -0
  72. package/.pi/lib/harness-ui-state.ts +12 -1
  73. package/.pi/prompts/harness-abort.md +9 -6
  74. package/.pi/prompts/harness-auto.md +36 -61
  75. package/.pi/prompts/harness-critic.md +17 -32
  76. package/.pi/prompts/harness-eval.md +22 -30
  77. package/.pi/prompts/harness-incident.md +17 -34
  78. package/.pi/prompts/harness-plan.md +32 -36
  79. package/.pi/prompts/harness-review.md +18 -33
  80. package/.pi/prompts/harness-router-tune.md +16 -38
  81. package/.pi/prompts/harness-run.md +23 -40
  82. package/.pi/prompts/harness-setup.md +7 -27
  83. package/.pi/prompts/harness-trace.md +15 -34
  84. package/.pi/scripts/harness-generate-model-router.mjs +16 -13
  85. package/.pi/scripts/harness-verify.mjs +34 -0
  86. package/.pi/scripts/vendor-sync-pi-model-router.sh +10 -10
  87. package/CHANGELOG.md +34 -1
  88. package/README.md +31 -15
  89. package/THIRD_PARTY_NOTICES.md +1 -1
  90. package/package.json +14 -9
  91. package/vendor/pi-model-router/UPSTREAM_PIN.md +1 -1
  92. package/vendor/pi-model-router/extensions/commands.ts +2 -2
  93. package/vendor/pi-model-router/extensions/config.ts +2 -2
  94. package/vendor/pi-model-router/extensions/index.ts +1 -1
  95. package/vendor/pi-model-router/extensions/provider.ts +2 -2
  96. package/vendor/pi-model-router/extensions/routing.ts +2 -2
  97. package/vendor/pi-model-router/extensions/types.ts +1 -1
  98. package/vendor/pi-model-router/extensions/ui.ts +1 -1
  99. package/vendor/pi-model-router/package.json +4 -4
  100. package/vendor/pi-vcc/index.ts +1 -1
  101. package/vendor/pi-vcc/package.json +1 -1
  102. package/vendor/pi-vcc/src/commands/pi-vcc.ts +1 -1
  103. package/vendor/pi-vcc/src/commands/vcc-recall.ts +1 -1
  104. package/vendor/pi-vcc/src/core/content.ts +1 -1
  105. package/vendor/pi-vcc/src/core/load-messages.ts +1 -1
  106. package/vendor/pi-vcc/src/core/normalize.ts +1 -1
  107. package/vendor/pi-vcc/src/core/render-entries.ts +1 -1
  108. package/vendor/pi-vcc/src/core/report.ts +1 -1
  109. package/vendor/pi-vcc/src/core/search-entries.ts +1 -1
  110. package/vendor/pi-vcc/src/core/summarize.ts +1 -1
  111. package/vendor/pi-vcc/src/hooks/before-compact.ts +2 -2
  112. package/vendor/pi-vcc/src/tools/recall.ts +1 -1
  113. package/vendor/pi-vcc/src/types.ts +1 -1
  114. package/vendor/pi-vcc/tests/fixtures.ts +1 -1
  115. package/vendor/pi-vcc/tests/render-entries.test.ts +1 -1
  116. package/vendor/pi-vcc/tests/search-entries.test.ts +1 -1
  117. package/vendor/pi-vcc/tests/support/load-session.ts +2 -2
@@ -8,7 +8,25 @@
8
8
  * - command surface via pi.registerCommand()
9
9
  */
10
10
 
11
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
11
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
12
+ import {
13
+ extractWritePathFromToolInput,
14
+ getLatestRunContext,
15
+ getPolicyTransitionBlock,
16
+ hasApprovedPlanSignalFromUserPrompt,
17
+ hasHarnessAbortSignal,
18
+ inferHarnessPhaseFromPrompt,
19
+ isHarnessAutoSession,
20
+ isHarnessBootstrapPrompt,
21
+ isPlanPhaseAllowedMutation,
22
+ isPlanPhaseScopedWrite,
23
+ normalizeHarnessPath,
24
+ readPlanPacketFromPath,
25
+ saveProjectActiveRun,
26
+ saveRunContextToDisk,
27
+ userVisiblePromptSlice,
28
+ validatePlanPacket,
29
+ } from "../lib/harness-run-context.js";
12
30
 
13
31
  type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
14
32
 
@@ -70,65 +88,17 @@ function defaultState(): PolicyState {
70
88
  };
71
89
  }
72
90
 
73
- function isBootstrapPrompt(prompt: string): boolean {
74
- const p = prompt.toLowerCase();
75
- return (
76
- p.includes("/harness-setup") ||
77
- p.includes("harness-setup") ||
78
- p.includes("full harness bootstrap")
79
- );
80
- }
81
-
82
- function inferPhase(prompt: string, _current: HarnessPhase): HarnessPhase {
83
- const p = prompt.toLowerCase();
84
- if (
85
- p.includes("/harness-plan") ||
86
- p.includes("harness-plan") ||
87
- p.includes("/harness-auto") ||
88
- p.includes("harness-auto")
89
- )
90
- return "plan";
91
- if (p.includes("/harness-run") || p.includes("harness-run")) return "execute";
92
- if (p.includes("/harness-eval") || p.includes("harness-eval"))
93
- return "evaluate";
94
- if (p.includes("/harness-review") || p.includes("harness-review"))
95
- return "evaluate";
96
- if (p.includes("/harness-critic") || p.includes("harness-critic"))
97
- return "adversary";
98
- if (p.includes("adversary")) return "adversary";
99
- if (p.includes("merge gate") || p.includes("policy decision")) return "merge";
100
- return "execute";
101
- }
102
-
103
- function hasApprovedPlanSignal(prompt: string): boolean {
104
- const p = prompt.toLowerCase();
105
- return (
106
- p.includes("planpacket") ||
107
- p.includes("--plan") ||
108
- p.includes("approved plan") ||
109
- p.includes("plan_id")
110
- );
111
- }
112
-
113
- function hasAbortSignal(prompt: string): boolean {
114
- const p = prompt.toLowerCase();
115
- return p.includes("/harness-abort") || p.includes("harness-abort");
116
- }
117
-
118
- function isValidTransition(from: HarnessPhase, to: HarnessPhase): boolean {
119
- if (from === to) return true;
120
- if (to === "plan") return true;
121
- if (to === "execute") return true;
122
- const fromIndex = PHASE_ORDER.indexOf(from);
123
- const toIndex = PHASE_ORDER.indexOf(to);
124
- return toIndex === fromIndex + 1;
91
+ function hasApprovedPlanSignal(prompt: string, entries: unknown[]): boolean {
92
+ const runCtx = getLatestRunContext(entries);
93
+ if (runCtx?.plan_ready) return true;
94
+ return hasApprovedPlanSignalFromUserPrompt(prompt);
125
95
  }
126
96
 
127
97
  function isMutatingBash(command: string): boolean {
128
98
  return BASH_MUTATION_PATTERNS.some((pattern) => pattern.test(command));
129
99
  }
130
100
 
131
- function getLatestPolicyState(ctx: {
101
+ function getLatestPolicyStateFull(ctx: {
132
102
  sessionManager: { getEntries(): unknown[] };
133
103
  }): PolicyState {
134
104
  const entries = ctx.sessionManager.getEntries() as SessionEntryLike[];
@@ -171,13 +141,21 @@ function getLatestPolicyState(ctx: {
171
141
  export default function policyGate(pi: ExtensionAPI) {
172
142
  let state = defaultState();
173
143
 
144
+ const appendPolicyState = (next: PolicyState): void => {
145
+ state = next;
146
+ pi.appendEntry("harness-policy-state", state);
147
+ };
148
+
174
149
  pi.on("session_start", async (_event, ctx) => {
175
- state = getLatestPolicyState(ctx);
150
+ state = getLatestPolicyStateFull(ctx);
176
151
  });
177
152
 
178
- pi.on("before_agent_start", async (event) => {
179
- const bootstrapPrompt = isBootstrapPrompt(event.prompt);
180
- const abortSignal = hasAbortSignal(event.prompt);
153
+ pi.on("before_agent_start", async (event, ctx) => {
154
+ const userPrompt = userVisiblePromptSlice(event.prompt);
155
+ const entries = ctx.sessionManager.getEntries();
156
+ state = getLatestPolicyStateFull(ctx);
157
+ const bootstrapPrompt = isHarnessBootstrapPrompt(userPrompt);
158
+ const abortSignal = hasHarnessAbortSignal(userPrompt);
181
159
 
182
160
  // /harness-setup instructions mention `harness-plan` (e.g. gh label text). That
183
161
  // substring must not force inferPhase() to "plan" or bootstrap stays blocked.
@@ -220,18 +198,17 @@ export default function policyGate(pi: ExtensionAPI) {
220
198
  };
221
199
  }
222
200
 
223
- const nextPhase = inferPhase(event.prompt, state.phase);
224
- const planSignal = hasApprovedPlanSignal(event.prompt);
201
+ const nextPhase = inferHarnessPhaseFromPrompt(userPrompt);
202
+ const planSignal = hasApprovedPlanSignal(userPrompt, entries);
225
203
 
226
- if (!isValidTransition(state.phase, nextPhase)) {
204
+ const transitionBlock = getPolicyTransitionBlock(userPrompt, entries);
205
+ if (transitionBlock.blocked) {
227
206
  return {
228
207
  message: {
229
208
  customType: "harness-policy-violation",
230
209
  display: true,
231
- content: [
232
- `Policy gate blocked invalid phase transition: ${state.phase} -> ${nextPhase}.`,
233
- "Run /harness-plan first or continue in the current phase.",
234
- ].join("\n"),
210
+ content:
211
+ transitionBlock.message ?? "Policy gate blocked this command.",
235
212
  },
236
213
  };
237
214
  }
@@ -242,13 +219,16 @@ export default function policyGate(pi: ExtensionAPI) {
242
219
  }
243
220
 
244
221
  if (nextPhase === "execute" && !state.approvedPlan && !planSignal) {
245
- // Softened enforcement: flow mode defaults to execute without hard plan requirement.
246
- state.approvedPlan = true;
222
+ const runCtx = getLatestRunContext(entries);
223
+ if (runCtx?.plan_ready) {
224
+ state.approvedPlan = true;
225
+ state.planId = runCtx.plan_id ?? state.planId;
226
+ }
247
227
  }
248
228
 
249
229
  if (planSignal) {
250
230
  state.approvedPlan = true;
251
- const planMatch = event.prompt.match(
231
+ const planMatch = userPrompt.match(
252
232
  /plan[_-]?id["'\s:=]+([A-Za-z0-9._:-]+)/i,
253
233
  );
254
234
  state.planId = planMatch?.[1] ?? state.planId;
@@ -261,26 +241,41 @@ export default function policyGate(pi: ExtensionAPI) {
261
241
  state.updatedAt = nowIso();
262
242
  pi.appendEntry("harness-policy-state", state);
263
243
 
244
+ const planPhaseHint =
245
+ state.phase === "plan"
246
+ ? "\nPlan phase: present the full PlanPacket in chat, call ask_user (Approve / Request changes / Cancel), then write only the canonical plan-packet.json after Approve."
247
+ : "";
248
+
264
249
  return {
265
- systemPrompt: `${event.systemPrompt}\n\n[PolicyGate]\nPhase=${state.phase}; ApprovedPlan=${state.approvedPlan}; PlanId=${state.planId ?? "none"}; Aborted=${state.aborted}.`,
250
+ systemPrompt: `${event.systemPrompt}\n\n[PolicyGate]\nPhase=${state.phase}; ApprovedPlan=${state.approvedPlan}; PlanId=${state.planId ?? "none"}; Aborted=${state.aborted}.${planPhaseHint}`,
266
251
  };
267
252
  });
268
253
 
269
- pi.on("tool_call", async (event) => {
270
- if (state.aborted && MUTATING_TOOLS.has(event.toolName)) {
271
- return {
272
- block: true,
273
- reason:
274
- "policy-gate: mutating tool blocked because harness-abort lock is active. Attach a new approved plan first.",
275
- };
276
- }
254
+ pi.on("tool_call", async (event, ctx) => {
255
+ state = getLatestPolicyStateFull(ctx);
256
+ const entries = ctx.sessionManager.getEntries();
257
+ const projectRoot = process.cwd();
258
+ const sessionId = ctx.sessionManager.getSessionId();
259
+ const runCtx = getLatestRunContext(entries);
260
+
277
261
  if (MUTATING_TOOLS.has(event.toolName)) {
278
- if (state.phase !== "execute") {
279
- return {
280
- block: true,
281
- reason: `policy-gate: ${event.toolName} blocked in phase '${state.phase}'. Allowed only in execute phase.`,
282
- };
262
+ const decision = await isPlanPhaseAllowedMutation(
263
+ event.toolName,
264
+ event.input as Record<string, unknown>,
265
+ state.phase,
266
+ runCtx,
267
+ projectRoot,
268
+ {
269
+ aborted: state.aborted,
270
+ entries,
271
+ ownerSessionId: runCtx?.owner_pi_session_id,
272
+ currentSessionId: sessionId,
273
+ },
274
+ );
275
+ if (!decision.allowed) {
276
+ return { block: true, reason: decision.reason };
283
277
  }
278
+ return undefined;
284
279
  }
285
280
 
286
281
  if (event.toolName === "bash") {
@@ -293,7 +288,7 @@ export default function policyGate(pi: ExtensionAPI) {
293
288
  "policy-gate: mutating bash command blocked because harness-abort lock is active. Attach a new approved plan first.",
294
289
  };
295
290
  }
296
- if (state.phase !== "execute") {
291
+ if (state.phase !== "execute" && state.phase !== "merge") {
297
292
  return {
298
293
  block: true,
299
294
  reason: `policy-gate: mutating bash command blocked in phase '${state.phase}'.`,
@@ -304,6 +299,48 @@ export default function policyGate(pi: ExtensionAPI) {
304
299
  return undefined;
305
300
  });
306
301
 
302
+ pi.on("tool_result", async (event, ctx) => {
303
+ if (event.isError) return;
304
+ if (event.toolName !== "write" && event.toolName !== "edit") return;
305
+
306
+ const entries = ctx.sessionManager.getEntries();
307
+ state = getLatestPolicyStateFull(ctx);
308
+ const projectRoot = process.cwd();
309
+ const runCtx = getLatestRunContext(entries);
310
+ if (!runCtx) return;
311
+
312
+ const target = extractWritePathFromToolInput(
313
+ event.input as Record<string, unknown>,
314
+ );
315
+ if (!target) return;
316
+ const scoped = await isPlanPhaseScopedWrite(target, runCtx, projectRoot);
317
+ if (!scoped) return;
318
+
319
+ const planPath = normalizeHarnessPath(target, projectRoot);
320
+ const packet = await readPlanPacketFromPath(planPath);
321
+ const validation = validatePlanPacket(packet);
322
+ if (!validation.valid || !packet?.plan_id) return;
323
+
324
+ if (isHarnessAutoSession(entries)) {
325
+ state.phase = "execute";
326
+ state.approvedPlan = true;
327
+ state.planId = packet.plan_id;
328
+ state.aborted = false;
329
+ state.abortReason = null;
330
+ state.abortedAt = null;
331
+ state.updatedAt = nowIso();
332
+ appendPolicyState(state);
333
+
334
+ runCtx.plan_ready = true;
335
+ runCtx.plan_id = packet.plan_id;
336
+ runCtx.phase = "execute";
337
+ runCtx.updated_at = nowIso();
338
+ pi.appendEntry("harness-run-context", runCtx);
339
+ void saveRunContextToDisk(runCtx);
340
+ void saveProjectActiveRun(runCtx);
341
+ }
342
+ });
343
+
307
344
  pi.registerCommand("harness-abort", {
308
345
  description: "Safely abort current harness run and reset to plan phase",
309
346
  handler: async (args, ctx) => {
@@ -318,6 +355,21 @@ export default function policyGate(pi: ExtensionAPI) {
318
355
  state.updatedAt = state.abortedAt;
319
356
  pi.appendEntry("harness-policy-state", state);
320
357
 
358
+ const runCtx = getLatestRunContext(ctx.sessionManager.getEntries());
359
+ if (runCtx) {
360
+ runCtx.status = "aborted";
361
+ runCtx.plan_ready = false;
362
+ runCtx.last_outcome = "aborted";
363
+ runCtx.last_completed_step = "abort";
364
+ runCtx.next_recommended_command = runCtx.task_summary
365
+ ? `/harness-plan "${runCtx.task_summary}"`
366
+ : '/harness-plan "<task>"';
367
+ runCtx.updated_at = state.abortedAt ?? nowIso();
368
+ pi.appendEntry("harness-run-context", runCtx);
369
+ void saveRunContextToDisk(runCtx);
370
+ void saveProjectActiveRun(runCtx);
371
+ }
372
+
321
373
  const lines = [
322
374
  "Harness run aborted safely.",
323
375
  " phase: plan",
@@ -342,7 +394,7 @@ export default function policyGate(pi: ExtensionAPI) {
342
394
  pi.registerCommand("harness-policy-status", {
343
395
  description: "Show current harness policy gate state",
344
396
  handler: async (_args, ctx) => {
345
- const latest = getLatestPolicyState(ctx);
397
+ const latest = getLatestPolicyStateFull(ctx);
346
398
  const lines = [
347
399
  "Harness policy gate:",
348
400
  ` phase: ${latest.phase}`,
@@ -9,7 +9,7 @@
9
9
  import type {
10
10
  BeforeProviderRequestEvent,
11
11
  ExtensionAPI,
12
- } from "@mariozechner/pi-coding-agent";
12
+ } from "@earendil-works/pi-coding-agent";
13
13
 
14
14
  const CHAT_MESSAGE_EXTRA_KEYS = [
15
15
  "reasoning",
@@ -1,19 +1,34 @@
1
1
  /**
2
2
  * review-integrity — enforce evaluator/adversary isolation from executor session.
3
3
  *
4
- * If review phases (`evaluate`/`adversary`) run in the same session as execution,
5
- * tool calls are blocked until the review is isolated (fork/switch session).
4
+ * Parent orchestrators spawn review agents in isolated subagent sessions.
5
+ * Direct review tools in the executor session are blocked; Agent/get_subagent_result
6
+ * for harness review agents remain allowed.
6
7
  */
7
8
 
8
9
  import { appendFile, mkdir } from "node:fs/promises";
9
10
  import { join } from "node:path";
10
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
11
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
11
12
 
12
13
  type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
13
14
 
14
15
  const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
15
16
  const INCIDENT_FILE = join(INCIDENTS_DIR, "review-integrity.jsonl");
16
17
 
18
+ const ORCHESTRATION_TOOLS = new Set([
19
+ "Agent",
20
+ "get_subagent_result",
21
+ "steer_subagent",
22
+ ]);
23
+
24
+ const REVIEW_SUBAGENT_TYPES = new Set([
25
+ "harness/evaluator",
26
+ "harness/adversary",
27
+ "harness/tie-breaker",
28
+ ]);
29
+
30
+ const EXECUTOR_SUBAGENT_TYPE = "harness/executor";
31
+
17
32
  interface IsolationState {
18
33
  executorSessionId: string | null;
19
34
  violationActive: boolean;
@@ -89,6 +104,17 @@ function restoreState(ctx: {
89
104
  };
90
105
  }
91
106
 
107
+ function subagentTypeFromInput(
108
+ input: Record<string, unknown> | undefined,
109
+ ): string {
110
+ if (!input) return "";
111
+ const direct = input.subagent_type;
112
+ if (typeof direct === "string") return direct;
113
+ const nested = input as { subagentType?: string };
114
+ if (typeof nested.subagentType === "string") return nested.subagentType;
115
+ return "";
116
+ }
117
+
92
118
  async function appendIncident(payload: Record<string, unknown>): Promise<void> {
93
119
  await mkdir(INCIDENTS_DIR, { recursive: true });
94
120
  await appendFile(
@@ -105,6 +131,10 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
105
131
  updatedAt: nowIso(),
106
132
  };
107
133
 
134
+ const persist = (): void => {
135
+ pi.appendEntry("harness-review-integrity", state);
136
+ };
137
+
108
138
  pi.on("session_start", async (_event, ctx) => {
109
139
  state = restoreState(ctx);
110
140
  });
@@ -115,7 +145,7 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
115
145
  state.executorSessionId = ctx.sessionManager.getSessionId();
116
146
  state.violationActive = false;
117
147
  state.updatedAt = nowIso();
118
- pi.appendEntry("harness-review-integrity", state);
148
+ persist();
119
149
  });
120
150
 
121
151
  pi.on("before_agent_start", async (_event, ctx) => {
@@ -125,7 +155,7 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
125
155
  if (!inReview) {
126
156
  state.violationActive = false;
127
157
  state.updatedAt = nowIso();
128
- pi.appendEntry("harness-review-integrity", state);
158
+ persist();
129
159
  return undefined;
130
160
  }
131
161
 
@@ -135,42 +165,66 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
135
165
  ) {
136
166
  state.violationActive = false;
137
167
  state.updatedAt = nowIso();
138
- pi.appendEntry("harness-review-integrity", state);
168
+ persist();
139
169
  return undefined;
140
170
  }
141
171
 
142
172
  state.violationActive = true;
143
173
  state.updatedAt = nowIso();
144
- pi.appendEntry("harness-review-integrity", state);
145
-
146
- await appendIncident({
147
- type: "review_integrity_violation",
148
- session_id: currentSessionId,
149
- phase,
150
- reason:
151
- "evaluator/adversary session is not isolated from executor session",
152
- mitigation:
153
- "fork or switch to a clean review session before running review tools",
154
- });
174
+ persist();
155
175
 
156
176
  return {
157
177
  message: {
158
- customType: "harness-review-integrity-block",
178
+ customType: "harness-review-integrity-hint",
159
179
  display: true,
160
180
  content: [
161
- "Review integrity violation: evaluator/adversary is sharing executor session context.",
162
- "Fork/switch session, then rerun review to maintain independent evaluation guarantees.",
181
+ "Review phase in executor session: spawn harness/evaluator or harness/adversary via Agent (isolated subagent context).",
182
+ "Do not run review checks directly in this session — use get_subagent_result after spawn.",
163
183
  ].join("\n"),
164
184
  },
165
185
  };
166
186
  });
167
187
 
168
- pi.on("tool_call", async (_event) => {
188
+ pi.on("tool_call", async (event, ctx) => {
189
+ if (event.toolName === "Agent") {
190
+ const subagentType = subagentTypeFromInput(
191
+ event.input as Record<string, unknown> | undefined,
192
+ );
193
+ if (subagentType === EXECUTOR_SUBAGENT_TYPE) {
194
+ state.executorSessionId = ctx.sessionManager.getSessionId();
195
+ state.violationActive = false;
196
+ state.updatedAt = nowIso();
197
+ persist();
198
+ return undefined;
199
+ }
200
+ if (REVIEW_SUBAGENT_TYPES.has(subagentType)) {
201
+ state.violationActive = false;
202
+ state.updatedAt = nowIso();
203
+ persist();
204
+ return undefined;
205
+ }
206
+ }
207
+
169
208
  if (!state.violationActive) return undefined;
209
+
210
+ if (ORCHESTRATION_TOOLS.has(event.toolName)) {
211
+ return undefined;
212
+ }
213
+
214
+ await appendIncident({
215
+ type: "review_integrity_violation",
216
+ session_id: ctx.sessionManager.getSessionId(),
217
+ tool: event.toolName,
218
+ reason:
219
+ "direct tool use in review phase while sharing executor session context",
220
+ mitigation:
221
+ "spawn harness/evaluator or harness/adversary via Agent instead",
222
+ });
223
+
170
224
  return {
171
225
  block: true,
172
226
  reason:
173
- "review-integrity: tool call blocked because review session is not isolated from executor context.",
227
+ "review-integrity: tool blocked in review phase spawn an isolated review subagent via Agent.",
174
228
  };
175
229
  });
176
230
 
@@ -3,7 +3,7 @@
3
3
  */
4
4
 
5
5
  import { spawn } from "node:child_process";
6
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
6
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
7
7
  import { resolveHarnessScript } from "./lib/harness-paths.js";
8
8
 
9
9
  function resolveSyncScript(): string {
@@ -31,7 +31,7 @@ import {
31
31
  import type {
32
32
  ExtensionAPI,
33
33
  ExtensionCommandContext,
34
- } from "@mariozechner/pi-coding-agent";
34
+ } from "@earendil-works/pi-coding-agent";
35
35
 
36
36
  // ── Constants ──────────────────────────────────────────────────────
37
37
 
@@ -12,7 +12,7 @@
12
12
 
13
13
  import { appendFile, mkdir } from "node:fs/promises";
14
14
  import { join } from "node:path";
15
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
15
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
16
16
 
17
17
  const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
18
18
  const INCIDENT_FILE = join(INCIDENTS_DIR, "test-diff-integrity.jsonl");
@@ -9,11 +9,18 @@
9
9
 
10
10
  import { appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
11
11
  import { join } from "node:path";
12
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
12
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
13
+ import {
14
+ getLatestRunContext,
15
+ getRunIdFromSession,
16
+ type HarnessPhase,
17
+ isHarnessSlashCommand,
18
+ loadRunContextFromDisk,
19
+ phaseTraceFileName,
20
+ saveRunContextToDisk,
21
+ } from "../lib/harness-run-context.js";
13
22
  import { captureHarnessEvent } from "./lib/harness-posthog.js";
14
23
 
15
- type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
16
-
17
24
  interface ToolSpan {
18
25
  tool_call_id: string;
19
26
  tool_name: string;
@@ -52,10 +59,6 @@ function nowIso(): string {
52
59
  return new Date().toISOString();
53
60
  }
54
61
 
55
- function makeRunId(sessionId: string): string {
56
- return `${sessionId}-${Date.now()}`;
57
- }
58
-
59
62
  function parsePhase(ctx: {
60
63
  sessionManager: { getEntries(): unknown[] };
61
64
  }): HarnessPhase {
@@ -165,8 +168,22 @@ async function readRunTraceSchemaVersion(): Promise<string> {
165
168
  }
166
169
  }
167
170
 
171
+ function resolveRunIdForAgentStart(
172
+ ctx: { sessionManager: { getEntries(): unknown[]; getSessionId(): string } },
173
+ prompt: string,
174
+ ): string {
175
+ const entries = ctx.sessionManager.getEntries();
176
+ const sessionId = ctx.sessionManager.getSessionId();
177
+ const fromSession = getRunIdFromSession(entries, sessionId);
178
+ if (fromSession && isHarnessSlashCommand(prompt)) return fromSession;
179
+ const runCtx = getLatestRunContext(entries);
180
+ if (runCtx && isHarnessSlashCommand(prompt)) return runCtx.run_id;
181
+ return `${sessionId}-${Date.now()}`;
182
+ }
183
+
168
184
  export default function traceRecorder(pi: ExtensionAPI) {
169
185
  let activeRun: ActiveRun | null = null;
186
+ let lastUserPrompt = "";
170
187
 
171
188
  async function writeEvent(
172
189
  runId: string,
@@ -180,14 +197,25 @@ export default function traceRecorder(pi: ExtensionAPI) {
180
197
  );
181
198
  }
182
199
 
200
+ pi.on("before_agent_start", async (event) => {
201
+ lastUserPrompt = event.prompt;
202
+ });
203
+
183
204
  pi.on("agent_start", async (_event, ctx) => {
205
+ if (!isHarnessSlashCommand(lastUserPrompt)) {
206
+ activeRun = null;
207
+ return;
208
+ }
209
+
184
210
  const sessionId = ctx.sessionManager.getSessionId();
185
- const runId = makeRunId(sessionId);
211
+ const entries = ctx.sessionManager.getEntries();
212
+ const runId = resolveRunIdForAgentStart(ctx, lastUserPrompt);
186
213
  const startedAt = nowIso();
214
+ const phase = parsePhase(ctx);
187
215
  activeRun = {
188
216
  runId,
189
217
  planId: parsePlanId(ctx),
190
- phase: parsePhase(ctx),
218
+ phase,
191
219
  startedAt,
192
220
  toolSpans: new Map(),
193
221
  artifactRefs: new Set(),
@@ -198,15 +226,29 @@ export default function traceRecorder(pi: ExtensionAPI) {
198
226
  phase: activeRun.phase,
199
227
  started_at: startedAt,
200
228
  });
201
- captureHarnessEvent(sessionId, "harness_run_started", {
202
- harness_run_id: runId,
203
- harness_plan_id: activeRun.planId,
204
- harness_phase: activeRun.phase,
205
- pi_session_id: sessionId,
206
- model: ctx.model?.id ?? "unknown",
207
- thinking_level:
208
- pi.getThinkingLevel() === "minimal" ? "off" : pi.getThinkingLevel(),
209
- });
229
+
230
+ const runCtx = getLatestRunContext(entries);
231
+ const projectRoot = process.cwd();
232
+ const diskCtx =
233
+ runCtx ?? (await loadRunContextFromDisk(runId, projectRoot));
234
+ const shouldEmitStarted = !diskCtx?.harness_run_started_emitted;
235
+ if (shouldEmitStarted) {
236
+ captureHarnessEvent(sessionId, "harness_run_started", {
237
+ harness_run_id: runId,
238
+ harness_plan_id: activeRun.planId,
239
+ harness_phase: activeRun.phase,
240
+ pi_session_id: sessionId,
241
+ model: ctx.model?.id ?? "unknown",
242
+ thinking_level:
243
+ pi.getThinkingLevel() === "minimal" ? "off" : pi.getThinkingLevel(),
244
+ });
245
+ if (diskCtx) {
246
+ diskCtx.harness_run_started_emitted = true;
247
+ await saveRunContextToDisk(diskCtx);
248
+ pi.appendEntry("harness-run-context", diskCtx);
249
+ }
250
+ }
251
+
210
252
  await writeEvent(runId, {
211
253
  type: "run_start",
212
254
  run_id: runId,
@@ -282,6 +324,12 @@ export default function traceRecorder(pi: ExtensionAPI) {
282
324
  cost: usage,
283
325
  };
284
326
 
327
+ const phaseFile = phaseTraceFileName(activeRun.phase);
328
+ await writeFile(
329
+ join(runDir, phaseFile),
330
+ `${JSON.stringify(summary, null, 2)}\n`,
331
+ "utf-8",
332
+ );
285
333
  await writeFile(
286
334
  join(runDir, "trace.json"),
287
335
  `${JSON.stringify(summary, null, 2)}\n`,
@@ -313,7 +361,7 @@ export default function traceRecorder(pi: ExtensionAPI) {
313
361
  });
314
362
 
315
363
  pi.registerCommand("harness-trace-last", {
316
- description: "Show last recorded run trace id",
364
+ description: "Show last harness trace phase summary (no run id)",
317
365
  handler: async (_args, ctx) => {
318
366
  const entries = ctx.sessionManager.getEntries();
319
367
  for (let i = entries.length - 1; i >= 0; i--) {
@@ -322,8 +370,20 @@ export default function traceRecorder(pi: ExtensionAPI) {
322
370
  entry.type === "custom" &&
323
371
  entry.customType === "harness-run-trace"
324
372
  ) {
325
- const data = entry.data as { run_id?: string } | undefined;
326
- const msg = `Last run trace: ${data?.run_id ?? "(unknown)"}`;
373
+ const data = entry.data as
374
+ | {
375
+ phase?: string;
376
+ tool_span_count?: number;
377
+ }
378
+ | undefined;
379
+ const handoff = getLatestRunContext(entries);
380
+ const next =
381
+ handoff?.next_recommended_command ?? "/harness-run-status";
382
+ const msg = [
383
+ `Last harness trace: phase ${data?.phase ?? "unknown"}`,
384
+ `tool spans: ${data?.tool_span_count ?? 0}`,
385
+ `Next: ${next}`,
386
+ ].join("\n");
327
387
  if (ctx.hasUI) {
328
388
  ctx.ui.notify(msg, "info");
329
389
  } else {