ultimate-pi 0.6.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-decisions/SKILL.md +20 -1
- package/.agents/skills/harness-eval/SKILL.md +11 -13
- package/.agents/skills/harness-orchestration/SKILL.md +36 -30
- package/.agents/skills/harness-plan/SKILL.md +13 -14
- package/.agents/skills/harness-sentrux-setup/SKILL.md +3 -4
- package/.pi/PACKAGING.md +1 -1
- package/.pi/agents/harness/adversary.md +20 -12
- package/.pi/agents/harness/evaluator.md +25 -14
- package/.pi/agents/harness/executor.md +27 -16
- package/.pi/agents/harness/incident-recorder.md +37 -0
- package/.pi/agents/harness/meta-optimizer.md +18 -15
- package/.pi/agents/harness/planner.md +27 -30
- package/.pi/agents/harness/tie-breaker.md +4 -2
- package/.pi/agents/harness/trace-librarian.md +18 -11
- package/.pi/agents/pi-pi/ext-expert.md +1 -1
- package/.pi/agents/pi-pi/keybinding-expert.md +1 -1
- package/.pi/agents/pi-pi/tui-expert.md +3 -3
- package/.pi/extensions/00-ultimate-pi-system-prompt.ts +194 -0
- package/.pi/extensions/budget-guard.ts +11 -3
- package/.pi/extensions/custom-footer.ts +8 -3
- package/.pi/extensions/custom-header.ts +2 -2
- package/.pi/extensions/debate-orchestrator.ts +11 -3
- package/.pi/extensions/dotenv-loader.ts +1 -1
- package/.pi/extensions/drift-monitor.ts +1 -1
- package/.pi/extensions/harness-ask-user.ts +1 -1
- package/.pi/extensions/harness-live-widget.ts +11 -4
- package/.pi/extensions/harness-run-context.ts +745 -0
- package/.pi/extensions/harness-telemetry.ts +1 -1
- package/.pi/extensions/harness-web-guard.ts +1 -1
- package/.pi/extensions/harness-web-tools.ts +1 -1
- package/.pi/extensions/lib/ask-user/dialog.ts +2 -2
- package/.pi/extensions/lib/ask-user/fallback.ts +1 -1
- package/.pi/extensions/lib/ask-user/render.ts +3 -3
- package/.pi/extensions/lib/harness-subagents/agent-loader.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/agent-parser.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/harness-subagent-policy.ts +134 -0
- package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +2 -2
- package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +9 -5
- package/.pi/extensions/lib/harness-subagents/vendored/context.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/env.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/index.ts +2 -2
- package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/types.ts +2 -2
- package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +2 -2
- package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +1 -1
- package/.pi/extensions/observation-bus.ts +8 -10
- package/.pi/extensions/pi-model-router-harness.ts +1 -1
- package/.pi/extensions/policy-gate.ts +136 -84
- package/.pi/extensions/provider-payload-sanitize.ts +1 -1
- package/.pi/extensions/review-integrity.ts +76 -22
- package/.pi/extensions/sentrux-rules-sync.ts +1 -1
- package/.pi/extensions/soundboard.ts +1 -1
- package/.pi/extensions/test-diff-integrity.ts +1 -1
- package/.pi/extensions/trace-recorder.ts +81 -21
- package/.pi/extensions/ultimate-pi-vcc.ts +1 -1
- package/.pi/harness/README.md +2 -0
- package/.pi/harness/agents.manifest.json +17 -13
- package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +1 -1
- package/.pi/harness/docs/adrs/0031-harness-run-context.md +41 -0
- package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +37 -0
- package/.pi/harness/docs/adrs/README.md +2 -0
- package/.pi/harness/evals/smoke/run-context.fixture.json +17 -0
- package/.pi/harness/specs/harness-run-context.schema.json +80 -0
- package/.pi/harness/specs/harness-spawn-context.schema.json +65 -0
- package/.pi/lib/harness-agent-output.ts +41 -0
- package/.pi/lib/harness-run-context.ts +1139 -0
- package/.pi/lib/harness-ui-state.ts +12 -1
- package/.pi/prompts/harness-abort.md +9 -6
- package/.pi/prompts/harness-auto.md +36 -61
- package/.pi/prompts/harness-critic.md +17 -32
- package/.pi/prompts/harness-eval.md +22 -30
- package/.pi/prompts/harness-incident.md +17 -34
- package/.pi/prompts/harness-plan.md +32 -36
- package/.pi/prompts/harness-review.md +18 -33
- package/.pi/prompts/harness-router-tune.md +16 -38
- package/.pi/prompts/harness-run.md +23 -40
- package/.pi/prompts/harness-setup.md +7 -27
- package/.pi/prompts/harness-trace.md +15 -34
- package/.pi/scripts/harness-generate-model-router.mjs +16 -13
- package/.pi/scripts/harness-verify.mjs +34 -0
- package/.pi/scripts/vendor-sync-pi-model-router.sh +10 -10
- package/CHANGELOG.md +34 -1
- package/README.md +31 -15
- package/THIRD_PARTY_NOTICES.md +1 -1
- package/package.json +14 -9
- package/vendor/pi-model-router/UPSTREAM_PIN.md +1 -1
- package/vendor/pi-model-router/extensions/commands.ts +2 -2
- package/vendor/pi-model-router/extensions/config.ts +2 -2
- package/vendor/pi-model-router/extensions/index.ts +1 -1
- package/vendor/pi-model-router/extensions/provider.ts +2 -2
- package/vendor/pi-model-router/extensions/routing.ts +2 -2
- package/vendor/pi-model-router/extensions/types.ts +1 -1
- package/vendor/pi-model-router/extensions/ui.ts +1 -1
- package/vendor/pi-model-router/package.json +4 -4
- package/vendor/pi-vcc/index.ts +1 -1
- package/vendor/pi-vcc/package.json +1 -1
- package/vendor/pi-vcc/src/commands/pi-vcc.ts +1 -1
- package/vendor/pi-vcc/src/commands/vcc-recall.ts +1 -1
- package/vendor/pi-vcc/src/core/content.ts +1 -1
- package/vendor/pi-vcc/src/core/load-messages.ts +1 -1
- package/vendor/pi-vcc/src/core/normalize.ts +1 -1
- package/vendor/pi-vcc/src/core/render-entries.ts +1 -1
- package/vendor/pi-vcc/src/core/report.ts +1 -1
- package/vendor/pi-vcc/src/core/search-entries.ts +1 -1
- package/vendor/pi-vcc/src/core/summarize.ts +1 -1
- package/vendor/pi-vcc/src/hooks/before-compact.ts +2 -2
- package/vendor/pi-vcc/src/tools/recall.ts +1 -1
- package/vendor/pi-vcc/src/types.ts +1 -1
- package/vendor/pi-vcc/tests/fixtures.ts +1 -1
- package/vendor/pi-vcc/tests/render-entries.test.ts +1 -1
- package/vendor/pi-vcc/tests/search-entries.test.ts +1 -1
- package/vendor/pi-vcc/tests/support/load-session.ts +2 -2
|
@@ -8,7 +8,25 @@
|
|
|
8
8
|
* - command surface via pi.registerCommand()
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
-
import type { ExtensionAPI } from "@
|
|
11
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
12
|
+
import {
|
|
13
|
+
extractWritePathFromToolInput,
|
|
14
|
+
getLatestRunContext,
|
|
15
|
+
getPolicyTransitionBlock,
|
|
16
|
+
hasApprovedPlanSignalFromUserPrompt,
|
|
17
|
+
hasHarnessAbortSignal,
|
|
18
|
+
inferHarnessPhaseFromPrompt,
|
|
19
|
+
isHarnessAutoSession,
|
|
20
|
+
isHarnessBootstrapPrompt,
|
|
21
|
+
isPlanPhaseAllowedMutation,
|
|
22
|
+
isPlanPhaseScopedWrite,
|
|
23
|
+
normalizeHarnessPath,
|
|
24
|
+
readPlanPacketFromPath,
|
|
25
|
+
saveProjectActiveRun,
|
|
26
|
+
saveRunContextToDisk,
|
|
27
|
+
userVisiblePromptSlice,
|
|
28
|
+
validatePlanPacket,
|
|
29
|
+
} from "../lib/harness-run-context.js";
|
|
12
30
|
|
|
13
31
|
type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
|
|
14
32
|
|
|
@@ -70,65 +88,17 @@ function defaultState(): PolicyState {
|
|
|
70
88
|
};
|
|
71
89
|
}
|
|
72
90
|
|
|
73
|
-
function
|
|
74
|
-
const
|
|
75
|
-
return
|
|
76
|
-
|
|
77
|
-
p.includes("harness-setup") ||
|
|
78
|
-
p.includes("full harness bootstrap")
|
|
79
|
-
);
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
function inferPhase(prompt: string, _current: HarnessPhase): HarnessPhase {
|
|
83
|
-
const p = prompt.toLowerCase();
|
|
84
|
-
if (
|
|
85
|
-
p.includes("/harness-plan") ||
|
|
86
|
-
p.includes("harness-plan") ||
|
|
87
|
-
p.includes("/harness-auto") ||
|
|
88
|
-
p.includes("harness-auto")
|
|
89
|
-
)
|
|
90
|
-
return "plan";
|
|
91
|
-
if (p.includes("/harness-run") || p.includes("harness-run")) return "execute";
|
|
92
|
-
if (p.includes("/harness-eval") || p.includes("harness-eval"))
|
|
93
|
-
return "evaluate";
|
|
94
|
-
if (p.includes("/harness-review") || p.includes("harness-review"))
|
|
95
|
-
return "evaluate";
|
|
96
|
-
if (p.includes("/harness-critic") || p.includes("harness-critic"))
|
|
97
|
-
return "adversary";
|
|
98
|
-
if (p.includes("adversary")) return "adversary";
|
|
99
|
-
if (p.includes("merge gate") || p.includes("policy decision")) return "merge";
|
|
100
|
-
return "execute";
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
function hasApprovedPlanSignal(prompt: string): boolean {
|
|
104
|
-
const p = prompt.toLowerCase();
|
|
105
|
-
return (
|
|
106
|
-
p.includes("planpacket") ||
|
|
107
|
-
p.includes("--plan") ||
|
|
108
|
-
p.includes("approved plan") ||
|
|
109
|
-
p.includes("plan_id")
|
|
110
|
-
);
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
function hasAbortSignal(prompt: string): boolean {
|
|
114
|
-
const p = prompt.toLowerCase();
|
|
115
|
-
return p.includes("/harness-abort") || p.includes("harness-abort");
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
function isValidTransition(from: HarnessPhase, to: HarnessPhase): boolean {
|
|
119
|
-
if (from === to) return true;
|
|
120
|
-
if (to === "plan") return true;
|
|
121
|
-
if (to === "execute") return true;
|
|
122
|
-
const fromIndex = PHASE_ORDER.indexOf(from);
|
|
123
|
-
const toIndex = PHASE_ORDER.indexOf(to);
|
|
124
|
-
return toIndex === fromIndex + 1;
|
|
91
|
+
function hasApprovedPlanSignal(prompt: string, entries: unknown[]): boolean {
|
|
92
|
+
const runCtx = getLatestRunContext(entries);
|
|
93
|
+
if (runCtx?.plan_ready) return true;
|
|
94
|
+
return hasApprovedPlanSignalFromUserPrompt(prompt);
|
|
125
95
|
}
|
|
126
96
|
|
|
127
97
|
function isMutatingBash(command: string): boolean {
|
|
128
98
|
return BASH_MUTATION_PATTERNS.some((pattern) => pattern.test(command));
|
|
129
99
|
}
|
|
130
100
|
|
|
131
|
-
function
|
|
101
|
+
function getLatestPolicyStateFull(ctx: {
|
|
132
102
|
sessionManager: { getEntries(): unknown[] };
|
|
133
103
|
}): PolicyState {
|
|
134
104
|
const entries = ctx.sessionManager.getEntries() as SessionEntryLike[];
|
|
@@ -171,13 +141,21 @@ function getLatestPolicyState(ctx: {
|
|
|
171
141
|
export default function policyGate(pi: ExtensionAPI) {
|
|
172
142
|
let state = defaultState();
|
|
173
143
|
|
|
144
|
+
const appendPolicyState = (next: PolicyState): void => {
|
|
145
|
+
state = next;
|
|
146
|
+
pi.appendEntry("harness-policy-state", state);
|
|
147
|
+
};
|
|
148
|
+
|
|
174
149
|
pi.on("session_start", async (_event, ctx) => {
|
|
175
|
-
state =
|
|
150
|
+
state = getLatestPolicyStateFull(ctx);
|
|
176
151
|
});
|
|
177
152
|
|
|
178
|
-
pi.on("before_agent_start", async (event) => {
|
|
179
|
-
const
|
|
180
|
-
const
|
|
153
|
+
pi.on("before_agent_start", async (event, ctx) => {
|
|
154
|
+
const userPrompt = userVisiblePromptSlice(event.prompt);
|
|
155
|
+
const entries = ctx.sessionManager.getEntries();
|
|
156
|
+
state = getLatestPolicyStateFull(ctx);
|
|
157
|
+
const bootstrapPrompt = isHarnessBootstrapPrompt(userPrompt);
|
|
158
|
+
const abortSignal = hasHarnessAbortSignal(userPrompt);
|
|
181
159
|
|
|
182
160
|
// /harness-setup instructions mention `harness-plan` (e.g. gh label text). That
|
|
183
161
|
// substring must not force inferPhase() to "plan" or bootstrap stays blocked.
|
|
@@ -220,18 +198,17 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
220
198
|
};
|
|
221
199
|
}
|
|
222
200
|
|
|
223
|
-
const nextPhase =
|
|
224
|
-
const planSignal = hasApprovedPlanSignal(
|
|
201
|
+
const nextPhase = inferHarnessPhaseFromPrompt(userPrompt);
|
|
202
|
+
const planSignal = hasApprovedPlanSignal(userPrompt, entries);
|
|
225
203
|
|
|
226
|
-
|
|
204
|
+
const transitionBlock = getPolicyTransitionBlock(userPrompt, entries);
|
|
205
|
+
if (transitionBlock.blocked) {
|
|
227
206
|
return {
|
|
228
207
|
message: {
|
|
229
208
|
customType: "harness-policy-violation",
|
|
230
209
|
display: true,
|
|
231
|
-
content:
|
|
232
|
-
|
|
233
|
-
"Run /harness-plan first or continue in the current phase.",
|
|
234
|
-
].join("\n"),
|
|
210
|
+
content:
|
|
211
|
+
transitionBlock.message ?? "Policy gate blocked this command.",
|
|
235
212
|
},
|
|
236
213
|
};
|
|
237
214
|
}
|
|
@@ -242,13 +219,16 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
242
219
|
}
|
|
243
220
|
|
|
244
221
|
if (nextPhase === "execute" && !state.approvedPlan && !planSignal) {
|
|
245
|
-
|
|
246
|
-
|
|
222
|
+
const runCtx = getLatestRunContext(entries);
|
|
223
|
+
if (runCtx?.plan_ready) {
|
|
224
|
+
state.approvedPlan = true;
|
|
225
|
+
state.planId = runCtx.plan_id ?? state.planId;
|
|
226
|
+
}
|
|
247
227
|
}
|
|
248
228
|
|
|
249
229
|
if (planSignal) {
|
|
250
230
|
state.approvedPlan = true;
|
|
251
|
-
const planMatch =
|
|
231
|
+
const planMatch = userPrompt.match(
|
|
252
232
|
/plan[_-]?id["'\s:=]+([A-Za-z0-9._:-]+)/i,
|
|
253
233
|
);
|
|
254
234
|
state.planId = planMatch?.[1] ?? state.planId;
|
|
@@ -261,26 +241,41 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
261
241
|
state.updatedAt = nowIso();
|
|
262
242
|
pi.appendEntry("harness-policy-state", state);
|
|
263
243
|
|
|
244
|
+
const planPhaseHint =
|
|
245
|
+
state.phase === "plan"
|
|
246
|
+
? "\nPlan phase: present the full PlanPacket in chat, call ask_user (Approve / Request changes / Cancel), then write only the canonical plan-packet.json after Approve."
|
|
247
|
+
: "";
|
|
248
|
+
|
|
264
249
|
return {
|
|
265
|
-
systemPrompt: `${event.systemPrompt}\n\n[PolicyGate]\nPhase=${state.phase}; ApprovedPlan=${state.approvedPlan}; PlanId=${state.planId ?? "none"}; Aborted=${state.aborted}
|
|
250
|
+
systemPrompt: `${event.systemPrompt}\n\n[PolicyGate]\nPhase=${state.phase}; ApprovedPlan=${state.approvedPlan}; PlanId=${state.planId ?? "none"}; Aborted=${state.aborted}.${planPhaseHint}`,
|
|
266
251
|
};
|
|
267
252
|
});
|
|
268
253
|
|
|
269
|
-
pi.on("tool_call", async (event) => {
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
}
|
|
254
|
+
pi.on("tool_call", async (event, ctx) => {
|
|
255
|
+
state = getLatestPolicyStateFull(ctx);
|
|
256
|
+
const entries = ctx.sessionManager.getEntries();
|
|
257
|
+
const projectRoot = process.cwd();
|
|
258
|
+
const sessionId = ctx.sessionManager.getSessionId();
|
|
259
|
+
const runCtx = getLatestRunContext(entries);
|
|
260
|
+
|
|
277
261
|
if (MUTATING_TOOLS.has(event.toolName)) {
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
262
|
+
const decision = await isPlanPhaseAllowedMutation(
|
|
263
|
+
event.toolName,
|
|
264
|
+
event.input as Record<string, unknown>,
|
|
265
|
+
state.phase,
|
|
266
|
+
runCtx,
|
|
267
|
+
projectRoot,
|
|
268
|
+
{
|
|
269
|
+
aborted: state.aborted,
|
|
270
|
+
entries,
|
|
271
|
+
ownerSessionId: runCtx?.owner_pi_session_id,
|
|
272
|
+
currentSessionId: sessionId,
|
|
273
|
+
},
|
|
274
|
+
);
|
|
275
|
+
if (!decision.allowed) {
|
|
276
|
+
return { block: true, reason: decision.reason };
|
|
283
277
|
}
|
|
278
|
+
return undefined;
|
|
284
279
|
}
|
|
285
280
|
|
|
286
281
|
if (event.toolName === "bash") {
|
|
@@ -293,7 +288,7 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
293
288
|
"policy-gate: mutating bash command blocked because harness-abort lock is active. Attach a new approved plan first.",
|
|
294
289
|
};
|
|
295
290
|
}
|
|
296
|
-
if (state.phase !== "execute") {
|
|
291
|
+
if (state.phase !== "execute" && state.phase !== "merge") {
|
|
297
292
|
return {
|
|
298
293
|
block: true,
|
|
299
294
|
reason: `policy-gate: mutating bash command blocked in phase '${state.phase}'.`,
|
|
@@ -304,6 +299,48 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
304
299
|
return undefined;
|
|
305
300
|
});
|
|
306
301
|
|
|
302
|
+
pi.on("tool_result", async (event, ctx) => {
|
|
303
|
+
if (event.isError) return;
|
|
304
|
+
if (event.toolName !== "write" && event.toolName !== "edit") return;
|
|
305
|
+
|
|
306
|
+
const entries = ctx.sessionManager.getEntries();
|
|
307
|
+
state = getLatestPolicyStateFull(ctx);
|
|
308
|
+
const projectRoot = process.cwd();
|
|
309
|
+
const runCtx = getLatestRunContext(entries);
|
|
310
|
+
if (!runCtx) return;
|
|
311
|
+
|
|
312
|
+
const target = extractWritePathFromToolInput(
|
|
313
|
+
event.input as Record<string, unknown>,
|
|
314
|
+
);
|
|
315
|
+
if (!target) return;
|
|
316
|
+
const scoped = await isPlanPhaseScopedWrite(target, runCtx, projectRoot);
|
|
317
|
+
if (!scoped) return;
|
|
318
|
+
|
|
319
|
+
const planPath = normalizeHarnessPath(target, projectRoot);
|
|
320
|
+
const packet = await readPlanPacketFromPath(planPath);
|
|
321
|
+
const validation = validatePlanPacket(packet);
|
|
322
|
+
if (!validation.valid || !packet?.plan_id) return;
|
|
323
|
+
|
|
324
|
+
if (isHarnessAutoSession(entries)) {
|
|
325
|
+
state.phase = "execute";
|
|
326
|
+
state.approvedPlan = true;
|
|
327
|
+
state.planId = packet.plan_id;
|
|
328
|
+
state.aborted = false;
|
|
329
|
+
state.abortReason = null;
|
|
330
|
+
state.abortedAt = null;
|
|
331
|
+
state.updatedAt = nowIso();
|
|
332
|
+
appendPolicyState(state);
|
|
333
|
+
|
|
334
|
+
runCtx.plan_ready = true;
|
|
335
|
+
runCtx.plan_id = packet.plan_id;
|
|
336
|
+
runCtx.phase = "execute";
|
|
337
|
+
runCtx.updated_at = nowIso();
|
|
338
|
+
pi.appendEntry("harness-run-context", runCtx);
|
|
339
|
+
void saveRunContextToDisk(runCtx);
|
|
340
|
+
void saveProjectActiveRun(runCtx);
|
|
341
|
+
}
|
|
342
|
+
});
|
|
343
|
+
|
|
307
344
|
pi.registerCommand("harness-abort", {
|
|
308
345
|
description: "Safely abort current harness run and reset to plan phase",
|
|
309
346
|
handler: async (args, ctx) => {
|
|
@@ -318,6 +355,21 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
318
355
|
state.updatedAt = state.abortedAt;
|
|
319
356
|
pi.appendEntry("harness-policy-state", state);
|
|
320
357
|
|
|
358
|
+
const runCtx = getLatestRunContext(ctx.sessionManager.getEntries());
|
|
359
|
+
if (runCtx) {
|
|
360
|
+
runCtx.status = "aborted";
|
|
361
|
+
runCtx.plan_ready = false;
|
|
362
|
+
runCtx.last_outcome = "aborted";
|
|
363
|
+
runCtx.last_completed_step = "abort";
|
|
364
|
+
runCtx.next_recommended_command = runCtx.task_summary
|
|
365
|
+
? `/harness-plan "${runCtx.task_summary}"`
|
|
366
|
+
: '/harness-plan "<task>"';
|
|
367
|
+
runCtx.updated_at = state.abortedAt ?? nowIso();
|
|
368
|
+
pi.appendEntry("harness-run-context", runCtx);
|
|
369
|
+
void saveRunContextToDisk(runCtx);
|
|
370
|
+
void saveProjectActiveRun(runCtx);
|
|
371
|
+
}
|
|
372
|
+
|
|
321
373
|
const lines = [
|
|
322
374
|
"Harness run aborted safely.",
|
|
323
375
|
" phase: plan",
|
|
@@ -342,7 +394,7 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
342
394
|
pi.registerCommand("harness-policy-status", {
|
|
343
395
|
description: "Show current harness policy gate state",
|
|
344
396
|
handler: async (_args, ctx) => {
|
|
345
|
-
const latest =
|
|
397
|
+
const latest = getLatestPolicyStateFull(ctx);
|
|
346
398
|
const lines = [
|
|
347
399
|
"Harness policy gate:",
|
|
348
400
|
` phase: ${latest.phase}`,
|
|
@@ -1,19 +1,34 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* review-integrity — enforce evaluator/adversary isolation from executor session.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
4
|
+
* Parent orchestrators spawn review agents in isolated subagent sessions.
|
|
5
|
+
* Direct review tools in the executor session are blocked; Agent/get_subagent_result
|
|
6
|
+
* for harness review agents remain allowed.
|
|
6
7
|
*/
|
|
7
8
|
|
|
8
9
|
import { appendFile, mkdir } from "node:fs/promises";
|
|
9
10
|
import { join } from "node:path";
|
|
10
|
-
import type { ExtensionAPI } from "@
|
|
11
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
11
12
|
|
|
12
13
|
type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
|
|
13
14
|
|
|
14
15
|
const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
|
|
15
16
|
const INCIDENT_FILE = join(INCIDENTS_DIR, "review-integrity.jsonl");
|
|
16
17
|
|
|
18
|
+
const ORCHESTRATION_TOOLS = new Set([
|
|
19
|
+
"Agent",
|
|
20
|
+
"get_subagent_result",
|
|
21
|
+
"steer_subagent",
|
|
22
|
+
]);
|
|
23
|
+
|
|
24
|
+
const REVIEW_SUBAGENT_TYPES = new Set([
|
|
25
|
+
"harness/evaluator",
|
|
26
|
+
"harness/adversary",
|
|
27
|
+
"harness/tie-breaker",
|
|
28
|
+
]);
|
|
29
|
+
|
|
30
|
+
const EXECUTOR_SUBAGENT_TYPE = "harness/executor";
|
|
31
|
+
|
|
17
32
|
interface IsolationState {
|
|
18
33
|
executorSessionId: string | null;
|
|
19
34
|
violationActive: boolean;
|
|
@@ -89,6 +104,17 @@ function restoreState(ctx: {
|
|
|
89
104
|
};
|
|
90
105
|
}
|
|
91
106
|
|
|
107
|
+
function subagentTypeFromInput(
|
|
108
|
+
input: Record<string, unknown> | undefined,
|
|
109
|
+
): string {
|
|
110
|
+
if (!input) return "";
|
|
111
|
+
const direct = input.subagent_type;
|
|
112
|
+
if (typeof direct === "string") return direct;
|
|
113
|
+
const nested = input as { subagentType?: string };
|
|
114
|
+
if (typeof nested.subagentType === "string") return nested.subagentType;
|
|
115
|
+
return "";
|
|
116
|
+
}
|
|
117
|
+
|
|
92
118
|
async function appendIncident(payload: Record<string, unknown>): Promise<void> {
|
|
93
119
|
await mkdir(INCIDENTS_DIR, { recursive: true });
|
|
94
120
|
await appendFile(
|
|
@@ -105,6 +131,10 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
|
|
|
105
131
|
updatedAt: nowIso(),
|
|
106
132
|
};
|
|
107
133
|
|
|
134
|
+
const persist = (): void => {
|
|
135
|
+
pi.appendEntry("harness-review-integrity", state);
|
|
136
|
+
};
|
|
137
|
+
|
|
108
138
|
pi.on("session_start", async (_event, ctx) => {
|
|
109
139
|
state = restoreState(ctx);
|
|
110
140
|
});
|
|
@@ -115,7 +145,7 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
|
|
|
115
145
|
state.executorSessionId = ctx.sessionManager.getSessionId();
|
|
116
146
|
state.violationActive = false;
|
|
117
147
|
state.updatedAt = nowIso();
|
|
118
|
-
|
|
148
|
+
persist();
|
|
119
149
|
});
|
|
120
150
|
|
|
121
151
|
pi.on("before_agent_start", async (_event, ctx) => {
|
|
@@ -125,7 +155,7 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
|
|
|
125
155
|
if (!inReview) {
|
|
126
156
|
state.violationActive = false;
|
|
127
157
|
state.updatedAt = nowIso();
|
|
128
|
-
|
|
158
|
+
persist();
|
|
129
159
|
return undefined;
|
|
130
160
|
}
|
|
131
161
|
|
|
@@ -135,42 +165,66 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
|
|
|
135
165
|
) {
|
|
136
166
|
state.violationActive = false;
|
|
137
167
|
state.updatedAt = nowIso();
|
|
138
|
-
|
|
168
|
+
persist();
|
|
139
169
|
return undefined;
|
|
140
170
|
}
|
|
141
171
|
|
|
142
172
|
state.violationActive = true;
|
|
143
173
|
state.updatedAt = nowIso();
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
await appendIncident({
|
|
147
|
-
type: "review_integrity_violation",
|
|
148
|
-
session_id: currentSessionId,
|
|
149
|
-
phase,
|
|
150
|
-
reason:
|
|
151
|
-
"evaluator/adversary session is not isolated from executor session",
|
|
152
|
-
mitigation:
|
|
153
|
-
"fork or switch to a clean review session before running review tools",
|
|
154
|
-
});
|
|
174
|
+
persist();
|
|
155
175
|
|
|
156
176
|
return {
|
|
157
177
|
message: {
|
|
158
|
-
customType: "harness-review-integrity-
|
|
178
|
+
customType: "harness-review-integrity-hint",
|
|
159
179
|
display: true,
|
|
160
180
|
content: [
|
|
161
|
-
"Review
|
|
162
|
-
"
|
|
181
|
+
"Review phase in executor session: spawn harness/evaluator or harness/adversary via Agent (isolated subagent context).",
|
|
182
|
+
"Do not run review checks directly in this session — use get_subagent_result after spawn.",
|
|
163
183
|
].join("\n"),
|
|
164
184
|
},
|
|
165
185
|
};
|
|
166
186
|
});
|
|
167
187
|
|
|
168
|
-
pi.on("tool_call", async (
|
|
188
|
+
pi.on("tool_call", async (event, ctx) => {
|
|
189
|
+
if (event.toolName === "Agent") {
|
|
190
|
+
const subagentType = subagentTypeFromInput(
|
|
191
|
+
event.input as Record<string, unknown> | undefined,
|
|
192
|
+
);
|
|
193
|
+
if (subagentType === EXECUTOR_SUBAGENT_TYPE) {
|
|
194
|
+
state.executorSessionId = ctx.sessionManager.getSessionId();
|
|
195
|
+
state.violationActive = false;
|
|
196
|
+
state.updatedAt = nowIso();
|
|
197
|
+
persist();
|
|
198
|
+
return undefined;
|
|
199
|
+
}
|
|
200
|
+
if (REVIEW_SUBAGENT_TYPES.has(subagentType)) {
|
|
201
|
+
state.violationActive = false;
|
|
202
|
+
state.updatedAt = nowIso();
|
|
203
|
+
persist();
|
|
204
|
+
return undefined;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
169
208
|
if (!state.violationActive) return undefined;
|
|
209
|
+
|
|
210
|
+
if (ORCHESTRATION_TOOLS.has(event.toolName)) {
|
|
211
|
+
return undefined;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
await appendIncident({
|
|
215
|
+
type: "review_integrity_violation",
|
|
216
|
+
session_id: ctx.sessionManager.getSessionId(),
|
|
217
|
+
tool: event.toolName,
|
|
218
|
+
reason:
|
|
219
|
+
"direct tool use in review phase while sharing executor session context",
|
|
220
|
+
mitigation:
|
|
221
|
+
"spawn harness/evaluator or harness/adversary via Agent instead",
|
|
222
|
+
});
|
|
223
|
+
|
|
170
224
|
return {
|
|
171
225
|
block: true,
|
|
172
226
|
reason:
|
|
173
|
-
"review-integrity: tool
|
|
227
|
+
"review-integrity: tool blocked in review phase — spawn an isolated review subagent via Agent.",
|
|
174
228
|
};
|
|
175
229
|
});
|
|
176
230
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
import { spawn } from "node:child_process";
|
|
6
|
-
import type { ExtensionAPI } from "@
|
|
6
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
7
7
|
import { resolveHarnessScript } from "./lib/harness-paths.js";
|
|
8
8
|
|
|
9
9
|
function resolveSyncScript(): string {
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
|
|
13
13
|
import { appendFile, mkdir } from "node:fs/promises";
|
|
14
14
|
import { join } from "node:path";
|
|
15
|
-
import type { ExtensionAPI } from "@
|
|
15
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
16
16
|
|
|
17
17
|
const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
|
|
18
18
|
const INCIDENT_FILE = join(INCIDENTS_DIR, "test-diff-integrity.jsonl");
|
|
@@ -9,11 +9,18 @@
|
|
|
9
9
|
|
|
10
10
|
import { appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
|
|
11
11
|
import { join } from "node:path";
|
|
12
|
-
import type { ExtensionAPI } from "@
|
|
12
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
13
|
+
import {
|
|
14
|
+
getLatestRunContext,
|
|
15
|
+
getRunIdFromSession,
|
|
16
|
+
type HarnessPhase,
|
|
17
|
+
isHarnessSlashCommand,
|
|
18
|
+
loadRunContextFromDisk,
|
|
19
|
+
phaseTraceFileName,
|
|
20
|
+
saveRunContextToDisk,
|
|
21
|
+
} from "../lib/harness-run-context.js";
|
|
13
22
|
import { captureHarnessEvent } from "./lib/harness-posthog.js";
|
|
14
23
|
|
|
15
|
-
type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
|
|
16
|
-
|
|
17
24
|
interface ToolSpan {
|
|
18
25
|
tool_call_id: string;
|
|
19
26
|
tool_name: string;
|
|
@@ -52,10 +59,6 @@ function nowIso(): string {
|
|
|
52
59
|
return new Date().toISOString();
|
|
53
60
|
}
|
|
54
61
|
|
|
55
|
-
function makeRunId(sessionId: string): string {
|
|
56
|
-
return `${sessionId}-${Date.now()}`;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
62
|
function parsePhase(ctx: {
|
|
60
63
|
sessionManager: { getEntries(): unknown[] };
|
|
61
64
|
}): HarnessPhase {
|
|
@@ -165,8 +168,22 @@ async function readRunTraceSchemaVersion(): Promise<string> {
|
|
|
165
168
|
}
|
|
166
169
|
}
|
|
167
170
|
|
|
171
|
+
function resolveRunIdForAgentStart(
|
|
172
|
+
ctx: { sessionManager: { getEntries(): unknown[]; getSessionId(): string } },
|
|
173
|
+
prompt: string,
|
|
174
|
+
): string {
|
|
175
|
+
const entries = ctx.sessionManager.getEntries();
|
|
176
|
+
const sessionId = ctx.sessionManager.getSessionId();
|
|
177
|
+
const fromSession = getRunIdFromSession(entries, sessionId);
|
|
178
|
+
if (fromSession && isHarnessSlashCommand(prompt)) return fromSession;
|
|
179
|
+
const runCtx = getLatestRunContext(entries);
|
|
180
|
+
if (runCtx && isHarnessSlashCommand(prompt)) return runCtx.run_id;
|
|
181
|
+
return `${sessionId}-${Date.now()}`;
|
|
182
|
+
}
|
|
183
|
+
|
|
168
184
|
export default function traceRecorder(pi: ExtensionAPI) {
|
|
169
185
|
let activeRun: ActiveRun | null = null;
|
|
186
|
+
let lastUserPrompt = "";
|
|
170
187
|
|
|
171
188
|
async function writeEvent(
|
|
172
189
|
runId: string,
|
|
@@ -180,14 +197,25 @@ export default function traceRecorder(pi: ExtensionAPI) {
|
|
|
180
197
|
);
|
|
181
198
|
}
|
|
182
199
|
|
|
200
|
+
pi.on("before_agent_start", async (event) => {
|
|
201
|
+
lastUserPrompt = event.prompt;
|
|
202
|
+
});
|
|
203
|
+
|
|
183
204
|
pi.on("agent_start", async (_event, ctx) => {
|
|
205
|
+
if (!isHarnessSlashCommand(lastUserPrompt)) {
|
|
206
|
+
activeRun = null;
|
|
207
|
+
return;
|
|
208
|
+
}
|
|
209
|
+
|
|
184
210
|
const sessionId = ctx.sessionManager.getSessionId();
|
|
185
|
-
const
|
|
211
|
+
const entries = ctx.sessionManager.getEntries();
|
|
212
|
+
const runId = resolveRunIdForAgentStart(ctx, lastUserPrompt);
|
|
186
213
|
const startedAt = nowIso();
|
|
214
|
+
const phase = parsePhase(ctx);
|
|
187
215
|
activeRun = {
|
|
188
216
|
runId,
|
|
189
217
|
planId: parsePlanId(ctx),
|
|
190
|
-
phase
|
|
218
|
+
phase,
|
|
191
219
|
startedAt,
|
|
192
220
|
toolSpans: new Map(),
|
|
193
221
|
artifactRefs: new Set(),
|
|
@@ -198,15 +226,29 @@ export default function traceRecorder(pi: ExtensionAPI) {
|
|
|
198
226
|
phase: activeRun.phase,
|
|
199
227
|
started_at: startedAt,
|
|
200
228
|
});
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
229
|
+
|
|
230
|
+
const runCtx = getLatestRunContext(entries);
|
|
231
|
+
const projectRoot = process.cwd();
|
|
232
|
+
const diskCtx =
|
|
233
|
+
runCtx ?? (await loadRunContextFromDisk(runId, projectRoot));
|
|
234
|
+
const shouldEmitStarted = !diskCtx?.harness_run_started_emitted;
|
|
235
|
+
if (shouldEmitStarted) {
|
|
236
|
+
captureHarnessEvent(sessionId, "harness_run_started", {
|
|
237
|
+
harness_run_id: runId,
|
|
238
|
+
harness_plan_id: activeRun.planId,
|
|
239
|
+
harness_phase: activeRun.phase,
|
|
240
|
+
pi_session_id: sessionId,
|
|
241
|
+
model: ctx.model?.id ?? "unknown",
|
|
242
|
+
thinking_level:
|
|
243
|
+
pi.getThinkingLevel() === "minimal" ? "off" : pi.getThinkingLevel(),
|
|
244
|
+
});
|
|
245
|
+
if (diskCtx) {
|
|
246
|
+
diskCtx.harness_run_started_emitted = true;
|
|
247
|
+
await saveRunContextToDisk(diskCtx);
|
|
248
|
+
pi.appendEntry("harness-run-context", diskCtx);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
210
252
|
await writeEvent(runId, {
|
|
211
253
|
type: "run_start",
|
|
212
254
|
run_id: runId,
|
|
@@ -282,6 +324,12 @@ export default function traceRecorder(pi: ExtensionAPI) {
|
|
|
282
324
|
cost: usage,
|
|
283
325
|
};
|
|
284
326
|
|
|
327
|
+
const phaseFile = phaseTraceFileName(activeRun.phase);
|
|
328
|
+
await writeFile(
|
|
329
|
+
join(runDir, phaseFile),
|
|
330
|
+
`${JSON.stringify(summary, null, 2)}\n`,
|
|
331
|
+
"utf-8",
|
|
332
|
+
);
|
|
285
333
|
await writeFile(
|
|
286
334
|
join(runDir, "trace.json"),
|
|
287
335
|
`${JSON.stringify(summary, null, 2)}\n`,
|
|
@@ -313,7 +361,7 @@ export default function traceRecorder(pi: ExtensionAPI) {
|
|
|
313
361
|
});
|
|
314
362
|
|
|
315
363
|
pi.registerCommand("harness-trace-last", {
|
|
316
|
-
description: "Show last
|
|
364
|
+
description: "Show last harness trace phase summary (no run id)",
|
|
317
365
|
handler: async (_args, ctx) => {
|
|
318
366
|
const entries = ctx.sessionManager.getEntries();
|
|
319
367
|
for (let i = entries.length - 1; i >= 0; i--) {
|
|
@@ -322,8 +370,20 @@ export default function traceRecorder(pi: ExtensionAPI) {
|
|
|
322
370
|
entry.type === "custom" &&
|
|
323
371
|
entry.customType === "harness-run-trace"
|
|
324
372
|
) {
|
|
325
|
-
const data = entry.data as
|
|
326
|
-
|
|
373
|
+
const data = entry.data as
|
|
374
|
+
| {
|
|
375
|
+
phase?: string;
|
|
376
|
+
tool_span_count?: number;
|
|
377
|
+
}
|
|
378
|
+
| undefined;
|
|
379
|
+
const handoff = getLatestRunContext(entries);
|
|
380
|
+
const next =
|
|
381
|
+
handoff?.next_recommended_command ?? "/harness-run-status";
|
|
382
|
+
const msg = [
|
|
383
|
+
`Last harness trace: phase ${data?.phase ?? "unknown"}`,
|
|
384
|
+
`tool spans: ${data?.tool_span_count ?? 0}`,
|
|
385
|
+
`Next: ${next}`,
|
|
386
|
+
].join("\n");
|
|
327
387
|
if (ctx.hasUI) {
|
|
328
388
|
ctx.ui.notify(msg, "info");
|
|
329
389
|
} else {
|