ultimate-pi 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-decisions/SKILL.md +20 -1
- package/.agents/skills/harness-eval/SKILL.md +11 -13
- package/.agents/skills/harness-orchestration/SKILL.md +36 -30
- package/.agents/skills/harness-plan/SKILL.md +13 -18
- package/.pi/PACKAGING.md +1 -1
- package/.pi/agents/harness/adversary.md +20 -12
- package/.pi/agents/harness/evaluator.md +25 -14
- package/.pi/agents/harness/executor.md +27 -16
- package/.pi/agents/harness/incident-recorder.md +37 -0
- package/.pi/agents/harness/meta-optimizer.md +18 -15
- package/.pi/agents/harness/planner.md +27 -30
- package/.pi/agents/harness/tie-breaker.md +4 -2
- package/.pi/agents/harness/trace-librarian.md +18 -11
- package/.pi/agents/pi-pi/ext-expert.md +1 -1
- package/.pi/agents/pi-pi/keybinding-expert.md +1 -1
- package/.pi/agents/pi-pi/tui-expert.md +3 -3
- package/.pi/extensions/00-ultimate-pi-system-prompt.ts +2 -2
- package/.pi/extensions/budget-guard.ts +1 -1
- package/.pi/extensions/custom-footer.ts +8 -3
- package/.pi/extensions/custom-header.ts +2 -2
- package/.pi/extensions/debate-orchestrator.ts +1 -1
- package/.pi/extensions/dotenv-loader.ts +1 -1
- package/.pi/extensions/drift-monitor.ts +1 -1
- package/.pi/extensions/harness-ask-user.ts +1 -1
- package/.pi/extensions/harness-live-widget.ts +1 -1
- package/.pi/extensions/harness-run-context.ts +52 -10
- package/.pi/extensions/harness-telemetry.ts +1 -1
- package/.pi/extensions/harness-web-guard.ts +1 -1
- package/.pi/extensions/harness-web-tools.ts +1 -1
- package/.pi/extensions/lib/ask-user/dialog.ts +2 -2
- package/.pi/extensions/lib/ask-user/fallback.ts +1 -1
- package/.pi/extensions/lib/ask-user/render.ts +3 -3
- package/.pi/extensions/lib/harness-subagents/agent-loader.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/agent-parser.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/harness-subagent-policy.ts +134 -0
- package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +2 -2
- package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +9 -5
- package/.pi/extensions/lib/harness-subagents/vendored/context.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/env.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/index.ts +2 -2
- package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/types.ts +2 -2
- package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +2 -2
- package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +1 -1
- package/.pi/extensions/observation-bus.ts +1 -1
- package/.pi/extensions/pi-model-router-harness.ts +1 -1
- package/.pi/extensions/policy-gate.ts +86 -16
- package/.pi/extensions/provider-payload-sanitize.ts +1 -1
- package/.pi/extensions/review-integrity.ts +76 -22
- package/.pi/extensions/sentrux-rules-sync.ts +1 -1
- package/.pi/extensions/soundboard.ts +1 -1
- package/.pi/extensions/test-diff-integrity.ts +1 -1
- package/.pi/extensions/trace-recorder.ts +1 -1
- package/.pi/extensions/ultimate-pi-vcc.ts +1 -1
- package/.pi/harness/agents.manifest.json +16 -12
- package/.pi/harness/docs/adrs/0031-harness-run-context.md +5 -2
- package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +37 -0
- package/.pi/harness/docs/adrs/README.md +1 -0
- package/.pi/harness/specs/harness-spawn-context.schema.json +65 -0
- package/.pi/lib/harness-agent-output.ts +41 -0
- package/.pi/lib/harness-run-context.ts +352 -7
- package/.pi/lib/harness-ui-state.ts +1 -1
- package/.pi/prompts/harness-auto.md +36 -61
- package/.pi/prompts/harness-critic.md +15 -28
- package/.pi/prompts/harness-eval.md +19 -27
- package/.pi/prompts/harness-incident.md +15 -34
- package/.pi/prompts/harness-plan.md +31 -50
- package/.pi/prompts/harness-review.md +16 -30
- package/.pi/prompts/harness-router-tune.md +16 -38
- package/.pi/prompts/harness-run.md +21 -38
- package/.pi/prompts/harness-setup.md +2 -0
- package/.pi/prompts/harness-trace.md +13 -30
- package/.pi/scripts/harness-generate-model-router.mjs +16 -13
- package/.pi/scripts/harness-verify.mjs +16 -0
- package/.pi/scripts/vendor-sync-pi-model-router.sh +10 -10
- package/CHANGELOG.md +19 -1
- package/README.md +4 -5
- package/THIRD_PARTY_NOTICES.md +1 -1
- package/package.json +13 -8
- package/vendor/pi-model-router/UPSTREAM_PIN.md +1 -1
- package/vendor/pi-model-router/extensions/commands.ts +2 -2
- package/vendor/pi-model-router/extensions/config.ts +2 -2
- package/vendor/pi-model-router/extensions/index.ts +1 -1
- package/vendor/pi-model-router/extensions/provider.ts +2 -2
- package/vendor/pi-model-router/extensions/routing.ts +2 -2
- package/vendor/pi-model-router/extensions/types.ts +1 -1
- package/vendor/pi-model-router/extensions/ui.ts +1 -1
- package/vendor/pi-model-router/package.json +4 -4
- package/vendor/pi-vcc/index.ts +1 -1
- package/vendor/pi-vcc/package.json +1 -1
- package/vendor/pi-vcc/src/commands/pi-vcc.ts +1 -1
- package/vendor/pi-vcc/src/commands/vcc-recall.ts +1 -1
- package/vendor/pi-vcc/src/core/content.ts +1 -1
- package/vendor/pi-vcc/src/core/load-messages.ts +1 -1
- package/vendor/pi-vcc/src/core/normalize.ts +1 -1
- package/vendor/pi-vcc/src/core/render-entries.ts +1 -1
- package/vendor/pi-vcc/src/core/report.ts +1 -1
- package/vendor/pi-vcc/src/core/search-entries.ts +1 -1
- package/vendor/pi-vcc/src/core/summarize.ts +1 -1
- package/vendor/pi-vcc/src/hooks/before-compact.ts +2 -2
- package/vendor/pi-vcc/src/tools/recall.ts +1 -1
- package/vendor/pi-vcc/src/types.ts +1 -1
- package/vendor/pi-vcc/tests/fixtures.ts +1 -1
- package/vendor/pi-vcc/tests/render-entries.test.ts +1 -1
- package/vendor/pi-vcc/tests/search-entries.test.ts +1 -1
- package/vendor/pi-vcc/tests/support/load-session.ts +2 -2
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* Uses the callback form of setWidget for themed rendering.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
import { truncateToWidth } from "@
|
|
8
|
+
import { truncateToWidth } from "@earendil-works/pi-tui";
|
|
9
9
|
import type { AgentManager } from "../agent-manager.js";
|
|
10
10
|
import { getConfig } from "../agent-types.js";
|
|
11
11
|
import type { AgentInvocation, SubagentType } from "../types.js";
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* Subscribes to session events for real-time streaming updates.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
import type { AgentSession } from "@
|
|
8
|
+
import type { AgentSession } from "@earendil-works/pi-coding-agent";
|
|
9
9
|
import {
|
|
10
10
|
type Component,
|
|
11
11
|
matchesKey,
|
|
@@ -13,7 +13,7 @@ import {
|
|
|
13
13
|
truncateToWidth,
|
|
14
14
|
visibleWidth,
|
|
15
15
|
wrapTextWithAnsi,
|
|
16
|
-
} from "@
|
|
16
|
+
} from "@earendil-works/pi-tui";
|
|
17
17
|
import { extractText } from "../context.js";
|
|
18
18
|
import type { AgentRecord } from "../types.js";
|
|
19
19
|
import { getLifetimeTotal, getSessionContextPercent } from "../usage.js";
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* if real demand emerges.
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
-
import type { ExtensionCommandContext } from "@
|
|
11
|
+
import type { ExtensionCommandContext } from "@earendil-works/pi-coding-agent";
|
|
12
12
|
import type { SubagentScheduler } from "../schedule.js";
|
|
13
13
|
import type { ScheduledSubagent } from "../types.js";
|
|
14
14
|
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import { randomUUID } from "node:crypto";
|
|
9
|
-
import type { ExtensionAPI } from "@
|
|
9
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
10
10
|
import { getRunIdFromSession } from "../lib/harness-run-context.js";
|
|
11
11
|
|
|
12
12
|
type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
import { existsSync, readFileSync } from "node:fs";
|
|
8
8
|
import { join } from "node:path";
|
|
9
|
-
import type { ExtensionAPI } from "@
|
|
9
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
10
10
|
import vendorModelRouter from "../../vendor/pi-model-router/extensions/index.js";
|
|
11
11
|
|
|
12
12
|
function isHarnessRouterReady(cwd: string): boolean {
|
|
@@ -8,17 +8,24 @@
|
|
|
8
8
|
* - command surface via pi.registerCommand()
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
-
import type { ExtensionAPI } from "@
|
|
11
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
12
12
|
import {
|
|
13
|
+
extractWritePathFromToolInput,
|
|
13
14
|
getLatestRunContext,
|
|
14
15
|
getPolicyTransitionBlock,
|
|
15
16
|
hasApprovedPlanSignalFromUserPrompt,
|
|
16
17
|
hasHarnessAbortSignal,
|
|
17
18
|
inferHarnessPhaseFromPrompt,
|
|
19
|
+
isHarnessAutoSession,
|
|
18
20
|
isHarnessBootstrapPrompt,
|
|
21
|
+
isPlanPhaseAllowedMutation,
|
|
22
|
+
isPlanPhaseScopedWrite,
|
|
23
|
+
normalizeHarnessPath,
|
|
24
|
+
readPlanPacketFromPath,
|
|
19
25
|
saveProjectActiveRun,
|
|
20
26
|
saveRunContextToDisk,
|
|
21
27
|
userVisiblePromptSlice,
|
|
28
|
+
validatePlanPacket,
|
|
22
29
|
} from "../lib/harness-run-context.js";
|
|
23
30
|
|
|
24
31
|
type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
|
|
@@ -134,6 +141,11 @@ function getLatestPolicyStateFull(ctx: {
|
|
|
134
141
|
export default function policyGate(pi: ExtensionAPI) {
|
|
135
142
|
let state = defaultState();
|
|
136
143
|
|
|
144
|
+
const appendPolicyState = (next: PolicyState): void => {
|
|
145
|
+
state = next;
|
|
146
|
+
pi.appendEntry("harness-policy-state", state);
|
|
147
|
+
};
|
|
148
|
+
|
|
137
149
|
pi.on("session_start", async (_event, ctx) => {
|
|
138
150
|
state = getLatestPolicyStateFull(ctx);
|
|
139
151
|
});
|
|
@@ -141,6 +153,7 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
141
153
|
pi.on("before_agent_start", async (event, ctx) => {
|
|
142
154
|
const userPrompt = userVisiblePromptSlice(event.prompt);
|
|
143
155
|
const entries = ctx.sessionManager.getEntries();
|
|
156
|
+
state = getLatestPolicyStateFull(ctx);
|
|
144
157
|
const bootstrapPrompt = isHarnessBootstrapPrompt(userPrompt);
|
|
145
158
|
const abortSignal = hasHarnessAbortSignal(userPrompt);
|
|
146
159
|
|
|
@@ -228,26 +241,41 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
228
241
|
state.updatedAt = nowIso();
|
|
229
242
|
pi.appendEntry("harness-policy-state", state);
|
|
230
243
|
|
|
244
|
+
const planPhaseHint =
|
|
245
|
+
state.phase === "plan"
|
|
246
|
+
? "\nPlan phase: present the full PlanPacket in chat, call ask_user (Approve / Request changes / Cancel), then write only the canonical plan-packet.json after Approve."
|
|
247
|
+
: "";
|
|
248
|
+
|
|
231
249
|
return {
|
|
232
|
-
systemPrompt: `${event.systemPrompt}\n\n[PolicyGate]\nPhase=${state.phase}; ApprovedPlan=${state.approvedPlan}; PlanId=${state.planId ?? "none"}; Aborted=${state.aborted}
|
|
250
|
+
systemPrompt: `${event.systemPrompt}\n\n[PolicyGate]\nPhase=${state.phase}; ApprovedPlan=${state.approvedPlan}; PlanId=${state.planId ?? "none"}; Aborted=${state.aborted}.${planPhaseHint}`,
|
|
233
251
|
};
|
|
234
252
|
});
|
|
235
253
|
|
|
236
|
-
pi.on("tool_call", async (event) => {
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
}
|
|
254
|
+
pi.on("tool_call", async (event, ctx) => {
|
|
255
|
+
state = getLatestPolicyStateFull(ctx);
|
|
256
|
+
const entries = ctx.sessionManager.getEntries();
|
|
257
|
+
const projectRoot = process.cwd();
|
|
258
|
+
const sessionId = ctx.sessionManager.getSessionId();
|
|
259
|
+
const runCtx = getLatestRunContext(entries);
|
|
260
|
+
|
|
244
261
|
if (MUTATING_TOOLS.has(event.toolName)) {
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
262
|
+
const decision = await isPlanPhaseAllowedMutation(
|
|
263
|
+
event.toolName,
|
|
264
|
+
event.input as Record<string, unknown>,
|
|
265
|
+
state.phase,
|
|
266
|
+
runCtx,
|
|
267
|
+
projectRoot,
|
|
268
|
+
{
|
|
269
|
+
aborted: state.aborted,
|
|
270
|
+
entries,
|
|
271
|
+
ownerSessionId: runCtx?.owner_pi_session_id,
|
|
272
|
+
currentSessionId: sessionId,
|
|
273
|
+
},
|
|
274
|
+
);
|
|
275
|
+
if (!decision.allowed) {
|
|
276
|
+
return { block: true, reason: decision.reason };
|
|
250
277
|
}
|
|
278
|
+
return undefined;
|
|
251
279
|
}
|
|
252
280
|
|
|
253
281
|
if (event.toolName === "bash") {
|
|
@@ -260,7 +288,7 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
260
288
|
"policy-gate: mutating bash command blocked because harness-abort lock is active. Attach a new approved plan first.",
|
|
261
289
|
};
|
|
262
290
|
}
|
|
263
|
-
if (state.phase !== "execute") {
|
|
291
|
+
if (state.phase !== "execute" && state.phase !== "merge") {
|
|
264
292
|
return {
|
|
265
293
|
block: true,
|
|
266
294
|
reason: `policy-gate: mutating bash command blocked in phase '${state.phase}'.`,
|
|
@@ -271,6 +299,48 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
271
299
|
return undefined;
|
|
272
300
|
});
|
|
273
301
|
|
|
302
|
+
pi.on("tool_result", async (event, ctx) => {
|
|
303
|
+
if (event.isError) return;
|
|
304
|
+
if (event.toolName !== "write" && event.toolName !== "edit") return;
|
|
305
|
+
|
|
306
|
+
const entries = ctx.sessionManager.getEntries();
|
|
307
|
+
state = getLatestPolicyStateFull(ctx);
|
|
308
|
+
const projectRoot = process.cwd();
|
|
309
|
+
const runCtx = getLatestRunContext(entries);
|
|
310
|
+
if (!runCtx) return;
|
|
311
|
+
|
|
312
|
+
const target = extractWritePathFromToolInput(
|
|
313
|
+
event.input as Record<string, unknown>,
|
|
314
|
+
);
|
|
315
|
+
if (!target) return;
|
|
316
|
+
const scoped = await isPlanPhaseScopedWrite(target, runCtx, projectRoot);
|
|
317
|
+
if (!scoped) return;
|
|
318
|
+
|
|
319
|
+
const planPath = normalizeHarnessPath(target, projectRoot);
|
|
320
|
+
const packet = await readPlanPacketFromPath(planPath);
|
|
321
|
+
const validation = validatePlanPacket(packet);
|
|
322
|
+
if (!validation.valid || !packet?.plan_id) return;
|
|
323
|
+
|
|
324
|
+
if (isHarnessAutoSession(entries)) {
|
|
325
|
+
state.phase = "execute";
|
|
326
|
+
state.approvedPlan = true;
|
|
327
|
+
state.planId = packet.plan_id;
|
|
328
|
+
state.aborted = false;
|
|
329
|
+
state.abortReason = null;
|
|
330
|
+
state.abortedAt = null;
|
|
331
|
+
state.updatedAt = nowIso();
|
|
332
|
+
appendPolicyState(state);
|
|
333
|
+
|
|
334
|
+
runCtx.plan_ready = true;
|
|
335
|
+
runCtx.plan_id = packet.plan_id;
|
|
336
|
+
runCtx.phase = "execute";
|
|
337
|
+
runCtx.updated_at = nowIso();
|
|
338
|
+
pi.appendEntry("harness-run-context", runCtx);
|
|
339
|
+
void saveRunContextToDisk(runCtx);
|
|
340
|
+
void saveProjectActiveRun(runCtx);
|
|
341
|
+
}
|
|
342
|
+
});
|
|
343
|
+
|
|
274
344
|
pi.registerCommand("harness-abort", {
|
|
275
345
|
description: "Safely abort current harness run and reset to plan phase",
|
|
276
346
|
handler: async (args, ctx) => {
|
|
@@ -1,19 +1,34 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* review-integrity — enforce evaluator/adversary isolation from executor session.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
4
|
+
* Parent orchestrators spawn review agents in isolated subagent sessions.
|
|
5
|
+
* Direct review tools in the executor session are blocked; Agent/get_subagent_result
|
|
6
|
+
* for harness review agents remain allowed.
|
|
6
7
|
*/
|
|
7
8
|
|
|
8
9
|
import { appendFile, mkdir } from "node:fs/promises";
|
|
9
10
|
import { join } from "node:path";
|
|
10
|
-
import type { ExtensionAPI } from "@
|
|
11
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
11
12
|
|
|
12
13
|
type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
|
|
13
14
|
|
|
14
15
|
const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
|
|
15
16
|
const INCIDENT_FILE = join(INCIDENTS_DIR, "review-integrity.jsonl");
|
|
16
17
|
|
|
18
|
+
const ORCHESTRATION_TOOLS = new Set([
|
|
19
|
+
"Agent",
|
|
20
|
+
"get_subagent_result",
|
|
21
|
+
"steer_subagent",
|
|
22
|
+
]);
|
|
23
|
+
|
|
24
|
+
const REVIEW_SUBAGENT_TYPES = new Set([
|
|
25
|
+
"harness/evaluator",
|
|
26
|
+
"harness/adversary",
|
|
27
|
+
"harness/tie-breaker",
|
|
28
|
+
]);
|
|
29
|
+
|
|
30
|
+
const EXECUTOR_SUBAGENT_TYPE = "harness/executor";
|
|
31
|
+
|
|
17
32
|
interface IsolationState {
|
|
18
33
|
executorSessionId: string | null;
|
|
19
34
|
violationActive: boolean;
|
|
@@ -89,6 +104,17 @@ function restoreState(ctx: {
|
|
|
89
104
|
};
|
|
90
105
|
}
|
|
91
106
|
|
|
107
|
+
function subagentTypeFromInput(
|
|
108
|
+
input: Record<string, unknown> | undefined,
|
|
109
|
+
): string {
|
|
110
|
+
if (!input) return "";
|
|
111
|
+
const direct = input.subagent_type;
|
|
112
|
+
if (typeof direct === "string") return direct;
|
|
113
|
+
const nested = input as { subagentType?: string };
|
|
114
|
+
if (typeof nested.subagentType === "string") return nested.subagentType;
|
|
115
|
+
return "";
|
|
116
|
+
}
|
|
117
|
+
|
|
92
118
|
async function appendIncident(payload: Record<string, unknown>): Promise<void> {
|
|
93
119
|
await mkdir(INCIDENTS_DIR, { recursive: true });
|
|
94
120
|
await appendFile(
|
|
@@ -105,6 +131,10 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
|
|
|
105
131
|
updatedAt: nowIso(),
|
|
106
132
|
};
|
|
107
133
|
|
|
134
|
+
const persist = (): void => {
|
|
135
|
+
pi.appendEntry("harness-review-integrity", state);
|
|
136
|
+
};
|
|
137
|
+
|
|
108
138
|
pi.on("session_start", async (_event, ctx) => {
|
|
109
139
|
state = restoreState(ctx);
|
|
110
140
|
});
|
|
@@ -115,7 +145,7 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
|
|
|
115
145
|
state.executorSessionId = ctx.sessionManager.getSessionId();
|
|
116
146
|
state.violationActive = false;
|
|
117
147
|
state.updatedAt = nowIso();
|
|
118
|
-
|
|
148
|
+
persist();
|
|
119
149
|
});
|
|
120
150
|
|
|
121
151
|
pi.on("before_agent_start", async (_event, ctx) => {
|
|
@@ -125,7 +155,7 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
|
|
|
125
155
|
if (!inReview) {
|
|
126
156
|
state.violationActive = false;
|
|
127
157
|
state.updatedAt = nowIso();
|
|
128
|
-
|
|
158
|
+
persist();
|
|
129
159
|
return undefined;
|
|
130
160
|
}
|
|
131
161
|
|
|
@@ -135,42 +165,66 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
|
|
|
135
165
|
) {
|
|
136
166
|
state.violationActive = false;
|
|
137
167
|
state.updatedAt = nowIso();
|
|
138
|
-
|
|
168
|
+
persist();
|
|
139
169
|
return undefined;
|
|
140
170
|
}
|
|
141
171
|
|
|
142
172
|
state.violationActive = true;
|
|
143
173
|
state.updatedAt = nowIso();
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
await appendIncident({
|
|
147
|
-
type: "review_integrity_violation",
|
|
148
|
-
session_id: currentSessionId,
|
|
149
|
-
phase,
|
|
150
|
-
reason:
|
|
151
|
-
"evaluator/adversary session is not isolated from executor session",
|
|
152
|
-
mitigation:
|
|
153
|
-
"fork or switch to a clean review session before running review tools",
|
|
154
|
-
});
|
|
174
|
+
persist();
|
|
155
175
|
|
|
156
176
|
return {
|
|
157
177
|
message: {
|
|
158
|
-
customType: "harness-review-integrity-
|
|
178
|
+
customType: "harness-review-integrity-hint",
|
|
159
179
|
display: true,
|
|
160
180
|
content: [
|
|
161
|
-
"Review
|
|
162
|
-
"
|
|
181
|
+
"Review phase in executor session: spawn harness/evaluator or harness/adversary via Agent (isolated subagent context).",
|
|
182
|
+
"Do not run review checks directly in this session — use get_subagent_result after spawn.",
|
|
163
183
|
].join("\n"),
|
|
164
184
|
},
|
|
165
185
|
};
|
|
166
186
|
});
|
|
167
187
|
|
|
168
|
-
pi.on("tool_call", async (
|
|
188
|
+
pi.on("tool_call", async (event, ctx) => {
|
|
189
|
+
if (event.toolName === "Agent") {
|
|
190
|
+
const subagentType = subagentTypeFromInput(
|
|
191
|
+
event.input as Record<string, unknown> | undefined,
|
|
192
|
+
);
|
|
193
|
+
if (subagentType === EXECUTOR_SUBAGENT_TYPE) {
|
|
194
|
+
state.executorSessionId = ctx.sessionManager.getSessionId();
|
|
195
|
+
state.violationActive = false;
|
|
196
|
+
state.updatedAt = nowIso();
|
|
197
|
+
persist();
|
|
198
|
+
return undefined;
|
|
199
|
+
}
|
|
200
|
+
if (REVIEW_SUBAGENT_TYPES.has(subagentType)) {
|
|
201
|
+
state.violationActive = false;
|
|
202
|
+
state.updatedAt = nowIso();
|
|
203
|
+
persist();
|
|
204
|
+
return undefined;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
169
208
|
if (!state.violationActive) return undefined;
|
|
209
|
+
|
|
210
|
+
if (ORCHESTRATION_TOOLS.has(event.toolName)) {
|
|
211
|
+
return undefined;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
await appendIncident({
|
|
215
|
+
type: "review_integrity_violation",
|
|
216
|
+
session_id: ctx.sessionManager.getSessionId(),
|
|
217
|
+
tool: event.toolName,
|
|
218
|
+
reason:
|
|
219
|
+
"direct tool use in review phase while sharing executor session context",
|
|
220
|
+
mitigation:
|
|
221
|
+
"spawn harness/evaluator or harness/adversary via Agent instead",
|
|
222
|
+
});
|
|
223
|
+
|
|
170
224
|
return {
|
|
171
225
|
block: true,
|
|
172
226
|
reason:
|
|
173
|
-
"review-integrity: tool
|
|
227
|
+
"review-integrity: tool blocked in review phase — spawn an isolated review subagent via Agent.",
|
|
174
228
|
};
|
|
175
229
|
});
|
|
176
230
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
import { spawn } from "node:child_process";
|
|
6
|
-
import type { ExtensionAPI } from "@
|
|
6
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
7
7
|
import { resolveHarnessScript } from "./lib/harness-paths.js";
|
|
8
8
|
|
|
9
9
|
function resolveSyncScript(): string {
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
|
|
13
13
|
import { appendFile, mkdir } from "node:fs/promises";
|
|
14
14
|
import { join } from "node:path";
|
|
15
|
-
import type { ExtensionAPI } from "@
|
|
15
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
16
16
|
|
|
17
17
|
const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
|
|
18
18
|
const INCIDENT_FILE = join(INCIDENTS_DIR, "test-diff-integrity.jsonl");
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
import { appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
|
|
11
11
|
import { join } from "node:path";
|
|
12
|
-
import type { ExtensionAPI } from "@
|
|
12
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
13
13
|
import {
|
|
14
14
|
getLatestRunContext,
|
|
15
15
|
getRunIdFromSession,
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
* - `HARNESS_VCC_DEBUG` — set `true` to write `/tmp/pi-vcc-debug.json` on compaction
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
|
-
import type { ExtensionAPI } from "@
|
|
12
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
13
13
|
import registerVcc from "../../vendor/pi-vcc/index.js";
|
|
14
14
|
|
|
15
15
|
export default function ultimatePiVcc(pi: ExtensionAPI): void {
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schema_version": "1.0.0",
|
|
3
3
|
"package": "ultimate-pi",
|
|
4
|
-
"package_version": "0.
|
|
5
|
-
"generated_at": "2026-05-
|
|
4
|
+
"package_version": "0.7.0",
|
|
5
|
+
"generated_at": "2026-05-17T08:24:57.263Z",
|
|
6
6
|
"agents": {
|
|
7
7
|
"pi-pi/agent-expert": {
|
|
8
8
|
"path": ".pi/agents/pi-pi/agent-expert.md",
|
|
@@ -18,11 +18,11 @@
|
|
|
18
18
|
},
|
|
19
19
|
"pi-pi/ext-expert": {
|
|
20
20
|
"path": ".pi/agents/pi-pi/ext-expert.md",
|
|
21
|
-
"sha256": "
|
|
21
|
+
"sha256": "47c64a9e2cffe00a50cac5541b0edc89fe8bcbd66ec3bb302eecc10d405a977f"
|
|
22
22
|
},
|
|
23
23
|
"pi-pi/keybinding-expert": {
|
|
24
24
|
"path": ".pi/agents/pi-pi/keybinding-expert.md",
|
|
25
|
-
"sha256": "
|
|
25
|
+
"sha256": "bb83f5fd2178075c8374ef28c360339f7de9faeedf811670b43a536c5f65c58e"
|
|
26
26
|
},
|
|
27
27
|
"pi-pi/pi-orchestrator": {
|
|
28
28
|
"path": ".pi/agents/pi-pi/pi-orchestrator.md",
|
|
@@ -42,27 +42,31 @@
|
|
|
42
42
|
},
|
|
43
43
|
"pi-pi/tui-expert": {
|
|
44
44
|
"path": ".pi/agents/pi-pi/tui-expert.md",
|
|
45
|
-
"sha256": "
|
|
45
|
+
"sha256": "a619b2ee3d3d94fe599abb61db0904f90d30335ec426851c3f1efdf2e5ce5390"
|
|
46
46
|
},
|
|
47
47
|
"harness/adversary": {
|
|
48
48
|
"path": ".pi/agents/harness/adversary.md",
|
|
49
|
-
"sha256": "
|
|
49
|
+
"sha256": "b965f90610ca942d08b656f1aee839266d08a92beb174b8761dd5e840694a899"
|
|
50
50
|
},
|
|
51
51
|
"harness/evaluator": {
|
|
52
52
|
"path": ".pi/agents/harness/evaluator.md",
|
|
53
|
-
"sha256": "
|
|
53
|
+
"sha256": "6c0de777a10de26dba4a6feb5641495fa5c2d31072a8b0e597a5ecc9921f129f"
|
|
54
54
|
},
|
|
55
55
|
"harness/executor": {
|
|
56
56
|
"path": ".pi/agents/harness/executor.md",
|
|
57
|
-
"sha256": "
|
|
57
|
+
"sha256": "5af3ec2be4d64a738834e36d480a36c2bee4359e8cd5a2e1aac49be4cff79589"
|
|
58
|
+
},
|
|
59
|
+
"harness/incident-recorder": {
|
|
60
|
+
"path": ".pi/agents/harness/incident-recorder.md",
|
|
61
|
+
"sha256": "2de405f77b62dde38f331665bff220a3ef131c3c1cd42eebee364000fc83352b"
|
|
58
62
|
},
|
|
59
63
|
"harness/meta-optimizer": {
|
|
60
64
|
"path": ".pi/agents/harness/meta-optimizer.md",
|
|
61
|
-
"sha256": "
|
|
65
|
+
"sha256": "ef2fb950e18e3a6439e91a68f764fc7ec922cd2d6b35de8f656f376854974d04"
|
|
62
66
|
},
|
|
63
67
|
"harness/planner": {
|
|
64
68
|
"path": ".pi/agents/harness/planner.md",
|
|
65
|
-
"sha256": "
|
|
69
|
+
"sha256": "3052e0b4ca504c7aa025b1926228e34adfe7f1c42d66a24db43fc6a1abb9968d"
|
|
66
70
|
},
|
|
67
71
|
"harness/sentrux-bootstrap": {
|
|
68
72
|
"path": ".pi/agents/harness/sentrux-bootstrap.md",
|
|
@@ -70,11 +74,11 @@
|
|
|
70
74
|
},
|
|
71
75
|
"harness/tie-breaker": {
|
|
72
76
|
"path": ".pi/agents/harness/tie-breaker.md",
|
|
73
|
-
"sha256": "
|
|
77
|
+
"sha256": "651f50b9e2c7903c542700e94908b1fcd026ebed12aa1f1d6ec481df3567e34f"
|
|
74
78
|
},
|
|
75
79
|
"harness/trace-librarian": {
|
|
76
80
|
"path": ".pi/agents/harness/trace-librarian.md",
|
|
77
|
-
"sha256": "
|
|
81
|
+
"sha256": "d63fe08a2ea0466c0fd89fff4da03ac1d9d3580c306381cee251c89d4e8fdb97"
|
|
78
82
|
}
|
|
79
83
|
}
|
|
80
84
|
}
|
|
@@ -17,8 +17,11 @@ Manual harness steps required copying `run_id` and `plan-packet.json` paths betw
|
|
|
17
17
|
4. **Hook order:** `harness-run-context` `before_agent_start` allocates/reuses `run_id` before `trace-recorder` `agent_start`. Trace writes phase files `trace-<phase>.json` plus rollup `trace.json`.
|
|
18
18
|
5. PostHog `harness_run_started` at most once per logical `run_id`.
|
|
19
19
|
6. Short commands: `/harness-run`, `/harness-eval`, etc. without args; recovery via `/harness-run-status`, `/harness-use-run`.
|
|
20
|
-
7.
|
|
21
|
-
8. `hasApprovedPlanSignal` uses user-visible prompt only; execute requires `plan_ready` from disk validation.
|
|
20
|
+
7. After execute, handoff recommends **`/harness-eval`** in the same session; review commands spawn isolated subagents (see ADR 0032). `active-run.json` still supports cross-session recovery when Pi was closed mid-run.
|
|
21
|
+
8. `hasApprovedPlanSignal` uses user-visible prompt only; execute requires `plan_ready` from disk validation **and** recorded `ask_user` approval (or `harness-plan-approval` entry).
|
|
22
|
+
9. **Plan-phase writes:** policy-gate allows `write`/`edit` only on canonical `.pi/harness/runs/<run_id>/plan-packet.json` after approval; all other paths stay blocked until execute phase.
|
|
23
|
+
10. **Approval-before-persist:** agents present the full plan, call `ask_user` (Approve / Request changes / Cancel), then write the packet. `--quick` narrows planning only — it does not skip approval.
|
|
24
|
+
11. **`/harness-auto`:** after an approved plan-packet write, policy phase promotes to `execute` in the same agent turn so implementation can proceed without a separate `/harness-run` message.
|
|
22
25
|
|
|
23
26
|
## Consequences
|
|
24
27
|
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# ADR 0032: Harness slash commands as agent orchestrators
|
|
2
|
+
|
|
3
|
+
- **Status:** Accepted
|
|
4
|
+
- **Date:** 2026-05-17
|
|
5
|
+
|
|
6
|
+
## Context
|
|
7
|
+
|
|
8
|
+
Harness slash prompts duplicated logic already defined in `harness/*` agents. Commands did not invoke the `Agent` tool. Review docs told users to fork a new Pi session even though subagents already provide isolated context.
|
|
9
|
+
|
|
10
|
+
## Decision
|
|
11
|
+
|
|
12
|
+
1. **Slash commands** parse args, spawn the matching `harness/*` agent, run all `ask_user` gates, perform policy-gated writes, and emit handoff blocks.
|
|
13
|
+
2. **Agents** perform multi-turn reads and emit structured JSON drafts; they do not approve plans or write canonical run artifacts (except executor mutations in scope).
|
|
14
|
+
3. **HarnessSpawnContext** JSON (`.pi/harness/specs/harness-spawn-context.schema.json`) is required in every spawn prompt because subagents do not receive `[HarnessActivePlan]` injection.
|
|
15
|
+
4. **Review isolation** uses `Agent` spawn with `inherit_context: false`, not session fork. `review-integrity` allows `Agent` / `get_subagent_result` for `harness/evaluator`, `harness/adversary`, and `harness/tie-breaker`.
|
|
16
|
+
5. **Subagent policy** (`harness-subagent-policy.ts`) blocks mutating tools for planner/evaluator/adversary and related read-only agents; executor keeps write tools and `extensions: true`.
|
|
17
|
+
6. **Planner** has `disallowed_tools: ask_user`; clarification options return in JSON for the parent orchestrator.
|
|
18
|
+
|
|
19
|
+
## Consequences
|
|
20
|
+
|
|
21
|
+
### Positive
|
|
22
|
+
|
|
23
|
+
- Single source of truth for phase logic in agent files; prompts stay thin.
|
|
24
|
+
- L4 review isolation without manual session management.
|
|
25
|
+
|
|
26
|
+
### Negative
|
|
27
|
+
|
|
28
|
+
- Orchestrator must parse subagent JSON reliably and pass complete spawn context.
|
|
29
|
+
- Scope enforcement remains prompt-driven for executor until optional path allowlist.
|
|
30
|
+
|
|
31
|
+
## References
|
|
32
|
+
|
|
33
|
+
- `.pi/prompts/harness-*.md`
|
|
34
|
+
- `.pi/agents/harness/*.md`
|
|
35
|
+
- `.pi/extensions/lib/harness-subagents/harness-subagent-policy.ts`
|
|
36
|
+
- `.pi/extensions/review-integrity.ts`
|
|
37
|
+
- `.pi/lib/harness-agent-output.ts`
|
|
@@ -17,6 +17,7 @@ Team-shared ADRs for the ultimate-pi harness live under `.pi/harness/docs/adrs/`
|
|
|
17
17
|
| [0009](0009-sentrux-rules-lifecycle.md) | Sentrux rules.toml lifecycle | Accepted |
|
|
18
18
|
| [0030](0030-inhouse-vcc-compaction.md) | In-house VCC compaction (vendored pi-vcc) | Accepted |
|
|
19
19
|
| [0031](0031-harness-run-context.md) | Harness active run context | Accepted |
|
|
20
|
+
| [0032](0032-harness-command-orchestration.md) | Harness commands as agent orchestrators | Accepted |
|
|
20
21
|
|
|
21
22
|
## Template
|
|
22
23
|
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://ultimate-pi.local/.pi/harness/specs/harness-spawn-context.schema.json",
|
|
4
|
+
"title": "HarnessSpawnContext",
|
|
5
|
+
"description": "Structured context passed from harness slash-command orchestrators to harness/* subagents.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"additionalProperties": false,
|
|
8
|
+
"required": ["schema_version", "agent", "mode"],
|
|
9
|
+
"properties": {
|
|
10
|
+
"schema_version": {
|
|
11
|
+
"type": "string",
|
|
12
|
+
"const": "1.0.0"
|
|
13
|
+
},
|
|
14
|
+
"agent": {
|
|
15
|
+
"type": "string",
|
|
16
|
+
"minLength": 1,
|
|
17
|
+
"description": "Target subagent id, e.g. harness/planner"
|
|
18
|
+
},
|
|
19
|
+
"mode": {
|
|
20
|
+
"type": "string",
|
|
21
|
+
"enum": [
|
|
22
|
+
"create",
|
|
23
|
+
"revise",
|
|
24
|
+
"execute",
|
|
25
|
+
"benchmark",
|
|
26
|
+
"verdict",
|
|
27
|
+
"adversary",
|
|
28
|
+
"trace",
|
|
29
|
+
"incident",
|
|
30
|
+
"tune"
|
|
31
|
+
]
|
|
32
|
+
},
|
|
33
|
+
"run_id": {
|
|
34
|
+
"type": "string"
|
|
35
|
+
},
|
|
36
|
+
"plan_packet_path": {
|
|
37
|
+
"type": "string"
|
|
38
|
+
},
|
|
39
|
+
"run_dir": {
|
|
40
|
+
"type": "string"
|
|
41
|
+
},
|
|
42
|
+
"task_summary": {
|
|
43
|
+
"type": "string"
|
|
44
|
+
},
|
|
45
|
+
"risk_level": {
|
|
46
|
+
"type": "string",
|
|
47
|
+
"enum": ["low", "med", "high"]
|
|
48
|
+
},
|
|
49
|
+
"quick": {
|
|
50
|
+
"type": "boolean"
|
|
51
|
+
},
|
|
52
|
+
"acceptance_checks": {
|
|
53
|
+
"type": "array",
|
|
54
|
+
"items": { "type": "string" }
|
|
55
|
+
},
|
|
56
|
+
"artifact_paths": {
|
|
57
|
+
"type": "object",
|
|
58
|
+
"additionalProperties": { "type": "string" }
|
|
59
|
+
},
|
|
60
|
+
"handoff_summary": {
|
|
61
|
+
"type": "string",
|
|
62
|
+
"description": "Prior phase bullet summary for chained spawns (harness-auto)"
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|