ultimate-pi 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-plan/SKILL.md +9 -5
- package/.agents/skills/harness-sentrux-setup/SKILL.md +3 -4
- package/.pi/extensions/00-ultimate-pi-system-prompt.ts +194 -0
- package/.pi/extensions/budget-guard.ts +10 -2
- package/.pi/extensions/debate-orchestrator.ts +10 -2
- package/.pi/extensions/harness-live-widget.ts +10 -3
- package/.pi/extensions/harness-run-context.ts +703 -0
- package/.pi/extensions/observation-bus.ts +7 -9
- package/.pi/extensions/policy-gate.ts +50 -68
- package/.pi/extensions/trace-recorder.ts +80 -20
- package/.pi/harness/README.md +2 -0
- package/.pi/harness/agents.manifest.json +3 -3
- package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +1 -1
- package/.pi/harness/docs/adrs/0031-harness-run-context.md +38 -0
- package/.pi/harness/docs/adrs/README.md +1 -0
- package/.pi/harness/env.harness.template +24 -10
- package/.pi/harness/evals/smoke/run-context.fixture.json +17 -0
- package/.pi/harness/specs/harness-run-context.schema.json +80 -0
- package/.pi/lib/harness-run-context.ts +794 -0
- package/.pi/lib/harness-ui-state.ts +11 -0
- package/.pi/prompts/harness-abort.md +9 -6
- package/.pi/prompts/harness-auto.md +3 -3
- package/.pi/prompts/harness-critic.md +3 -5
- package/.pi/prompts/harness-eval.md +16 -16
- package/.pi/prompts/harness-incident.md +7 -5
- package/.pi/prompts/harness-plan.md +18 -3
- package/.pi/prompts/harness-review.md +4 -5
- package/.pi/prompts/harness-router-tune.md +1 -1
- package/.pi/prompts/harness-run.md +11 -11
- package/.pi/prompts/harness-setup.md +5 -27
- package/.pi/prompts/harness-trace.md +3 -5
- package/.pi/scripts/harness-searxng-bootstrap.mjs +92 -7
- package/.pi/scripts/harness-verify.mjs +18 -0
- package/CHANGELOG.md +22 -0
- package/README.md +31 -14
- package/package.json +2 -2
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
import { randomUUID } from "node:crypto";
|
|
9
9
|
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
10
|
+
import { getRunIdFromSession } from "../lib/harness-run-context.js";
|
|
10
11
|
|
|
11
12
|
type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
|
|
12
13
|
type ObservationKind =
|
|
@@ -77,15 +78,12 @@ function nowIso(): string {
|
|
|
77
78
|
function getRunId(ctx: {
|
|
78
79
|
sessionManager: { getEntries(): unknown[]; getSessionId(): string };
|
|
79
80
|
}): string {
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
if (typeof runId === "string" && runId.length > 0) return runId;
|
|
87
|
-
}
|
|
88
|
-
return ctx.sessionManager.getSessionId();
|
|
81
|
+
return (
|
|
82
|
+
getRunIdFromSession(
|
|
83
|
+
ctx.sessionManager.getEntries(),
|
|
84
|
+
ctx.sessionManager.getSessionId(),
|
|
85
|
+
) ?? ctx.sessionManager.getSessionId()
|
|
86
|
+
);
|
|
89
87
|
}
|
|
90
88
|
|
|
91
89
|
export default function observationBus(pi: ExtensionAPI) {
|
|
@@ -9,6 +9,17 @@
|
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
11
|
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
12
|
+
import {
|
|
13
|
+
getLatestRunContext,
|
|
14
|
+
getPolicyTransitionBlock,
|
|
15
|
+
hasApprovedPlanSignalFromUserPrompt,
|
|
16
|
+
hasHarnessAbortSignal,
|
|
17
|
+
inferHarnessPhaseFromPrompt,
|
|
18
|
+
isHarnessBootstrapPrompt,
|
|
19
|
+
saveProjectActiveRun,
|
|
20
|
+
saveRunContextToDisk,
|
|
21
|
+
userVisiblePromptSlice,
|
|
22
|
+
} from "../lib/harness-run-context.js";
|
|
12
23
|
|
|
13
24
|
type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
|
|
14
25
|
|
|
@@ -70,65 +81,17 @@ function defaultState(): PolicyState {
|
|
|
70
81
|
};
|
|
71
82
|
}
|
|
72
83
|
|
|
73
|
-
function
|
|
74
|
-
const
|
|
75
|
-
return
|
|
76
|
-
|
|
77
|
-
p.includes("harness-setup") ||
|
|
78
|
-
p.includes("full harness bootstrap")
|
|
79
|
-
);
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
function inferPhase(prompt: string, _current: HarnessPhase): HarnessPhase {
|
|
83
|
-
const p = prompt.toLowerCase();
|
|
84
|
-
if (
|
|
85
|
-
p.includes("/harness-plan") ||
|
|
86
|
-
p.includes("harness-plan") ||
|
|
87
|
-
p.includes("/harness-auto") ||
|
|
88
|
-
p.includes("harness-auto")
|
|
89
|
-
)
|
|
90
|
-
return "plan";
|
|
91
|
-
if (p.includes("/harness-run") || p.includes("harness-run")) return "execute";
|
|
92
|
-
if (p.includes("/harness-eval") || p.includes("harness-eval"))
|
|
93
|
-
return "evaluate";
|
|
94
|
-
if (p.includes("/harness-review") || p.includes("harness-review"))
|
|
95
|
-
return "evaluate";
|
|
96
|
-
if (p.includes("/harness-critic") || p.includes("harness-critic"))
|
|
97
|
-
return "adversary";
|
|
98
|
-
if (p.includes("adversary")) return "adversary";
|
|
99
|
-
if (p.includes("merge gate") || p.includes("policy decision")) return "merge";
|
|
100
|
-
return "execute";
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
function hasApprovedPlanSignal(prompt: string): boolean {
|
|
104
|
-
const p = prompt.toLowerCase();
|
|
105
|
-
return (
|
|
106
|
-
p.includes("planpacket") ||
|
|
107
|
-
p.includes("--plan") ||
|
|
108
|
-
p.includes("approved plan") ||
|
|
109
|
-
p.includes("plan_id")
|
|
110
|
-
);
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
function hasAbortSignal(prompt: string): boolean {
|
|
114
|
-
const p = prompt.toLowerCase();
|
|
115
|
-
return p.includes("/harness-abort") || p.includes("harness-abort");
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
function isValidTransition(from: HarnessPhase, to: HarnessPhase): boolean {
|
|
119
|
-
if (from === to) return true;
|
|
120
|
-
if (to === "plan") return true;
|
|
121
|
-
if (to === "execute") return true;
|
|
122
|
-
const fromIndex = PHASE_ORDER.indexOf(from);
|
|
123
|
-
const toIndex = PHASE_ORDER.indexOf(to);
|
|
124
|
-
return toIndex === fromIndex + 1;
|
|
84
|
+
function hasApprovedPlanSignal(prompt: string, entries: unknown[]): boolean {
|
|
85
|
+
const runCtx = getLatestRunContext(entries);
|
|
86
|
+
if (runCtx?.plan_ready) return true;
|
|
87
|
+
return hasApprovedPlanSignalFromUserPrompt(prompt);
|
|
125
88
|
}
|
|
126
89
|
|
|
127
90
|
function isMutatingBash(command: string): boolean {
|
|
128
91
|
return BASH_MUTATION_PATTERNS.some((pattern) => pattern.test(command));
|
|
129
92
|
}
|
|
130
93
|
|
|
131
|
-
function
|
|
94
|
+
function getLatestPolicyStateFull(ctx: {
|
|
132
95
|
sessionManager: { getEntries(): unknown[] };
|
|
133
96
|
}): PolicyState {
|
|
134
97
|
const entries = ctx.sessionManager.getEntries() as SessionEntryLike[];
|
|
@@ -172,12 +135,14 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
172
135
|
let state = defaultState();
|
|
173
136
|
|
|
174
137
|
pi.on("session_start", async (_event, ctx) => {
|
|
175
|
-
state =
|
|
138
|
+
state = getLatestPolicyStateFull(ctx);
|
|
176
139
|
});
|
|
177
140
|
|
|
178
|
-
pi.on("before_agent_start", async (event) => {
|
|
179
|
-
const
|
|
180
|
-
const
|
|
141
|
+
pi.on("before_agent_start", async (event, ctx) => {
|
|
142
|
+
const userPrompt = userVisiblePromptSlice(event.prompt);
|
|
143
|
+
const entries = ctx.sessionManager.getEntries();
|
|
144
|
+
const bootstrapPrompt = isHarnessBootstrapPrompt(userPrompt);
|
|
145
|
+
const abortSignal = hasHarnessAbortSignal(userPrompt);
|
|
181
146
|
|
|
182
147
|
// /harness-setup instructions mention `harness-plan` (e.g. gh label text). That
|
|
183
148
|
// substring must not force inferPhase() to "plan" or bootstrap stays blocked.
|
|
@@ -220,18 +185,17 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
220
185
|
};
|
|
221
186
|
}
|
|
222
187
|
|
|
223
|
-
const nextPhase =
|
|
224
|
-
const planSignal = hasApprovedPlanSignal(
|
|
188
|
+
const nextPhase = inferHarnessPhaseFromPrompt(userPrompt);
|
|
189
|
+
const planSignal = hasApprovedPlanSignal(userPrompt, entries);
|
|
225
190
|
|
|
226
|
-
|
|
191
|
+
const transitionBlock = getPolicyTransitionBlock(userPrompt, entries);
|
|
192
|
+
if (transitionBlock.blocked) {
|
|
227
193
|
return {
|
|
228
194
|
message: {
|
|
229
195
|
customType: "harness-policy-violation",
|
|
230
196
|
display: true,
|
|
231
|
-
content:
|
|
232
|
-
|
|
233
|
-
"Run /harness-plan first or continue in the current phase.",
|
|
234
|
-
].join("\n"),
|
|
197
|
+
content:
|
|
198
|
+
transitionBlock.message ?? "Policy gate blocked this command.",
|
|
235
199
|
},
|
|
236
200
|
};
|
|
237
201
|
}
|
|
@@ -242,13 +206,16 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
242
206
|
}
|
|
243
207
|
|
|
244
208
|
if (nextPhase === "execute" && !state.approvedPlan && !planSignal) {
|
|
245
|
-
|
|
246
|
-
|
|
209
|
+
const runCtx = getLatestRunContext(entries);
|
|
210
|
+
if (runCtx?.plan_ready) {
|
|
211
|
+
state.approvedPlan = true;
|
|
212
|
+
state.planId = runCtx.plan_id ?? state.planId;
|
|
213
|
+
}
|
|
247
214
|
}
|
|
248
215
|
|
|
249
216
|
if (planSignal) {
|
|
250
217
|
state.approvedPlan = true;
|
|
251
|
-
const planMatch =
|
|
218
|
+
const planMatch = userPrompt.match(
|
|
252
219
|
/plan[_-]?id["'\s:=]+([A-Za-z0-9._:-]+)/i,
|
|
253
220
|
);
|
|
254
221
|
state.planId = planMatch?.[1] ?? state.planId;
|
|
@@ -318,6 +285,21 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
318
285
|
state.updatedAt = state.abortedAt;
|
|
319
286
|
pi.appendEntry("harness-policy-state", state);
|
|
320
287
|
|
|
288
|
+
const runCtx = getLatestRunContext(ctx.sessionManager.getEntries());
|
|
289
|
+
if (runCtx) {
|
|
290
|
+
runCtx.status = "aborted";
|
|
291
|
+
runCtx.plan_ready = false;
|
|
292
|
+
runCtx.last_outcome = "aborted";
|
|
293
|
+
runCtx.last_completed_step = "abort";
|
|
294
|
+
runCtx.next_recommended_command = runCtx.task_summary
|
|
295
|
+
? `/harness-plan "${runCtx.task_summary}"`
|
|
296
|
+
: '/harness-plan "<task>"';
|
|
297
|
+
runCtx.updated_at = state.abortedAt ?? nowIso();
|
|
298
|
+
pi.appendEntry("harness-run-context", runCtx);
|
|
299
|
+
void saveRunContextToDisk(runCtx);
|
|
300
|
+
void saveProjectActiveRun(runCtx);
|
|
301
|
+
}
|
|
302
|
+
|
|
321
303
|
const lines = [
|
|
322
304
|
"Harness run aborted safely.",
|
|
323
305
|
" phase: plan",
|
|
@@ -342,7 +324,7 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
342
324
|
pi.registerCommand("harness-policy-status", {
|
|
343
325
|
description: "Show current harness policy gate state",
|
|
344
326
|
handler: async (_args, ctx) => {
|
|
345
|
-
const latest =
|
|
327
|
+
const latest = getLatestPolicyStateFull(ctx);
|
|
346
328
|
const lines = [
|
|
347
329
|
"Harness policy gate:",
|
|
348
330
|
` phase: ${latest.phase}`,
|
|
@@ -10,10 +10,17 @@
|
|
|
10
10
|
import { appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
|
|
11
11
|
import { join } from "node:path";
|
|
12
12
|
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
13
|
+
import {
|
|
14
|
+
getLatestRunContext,
|
|
15
|
+
getRunIdFromSession,
|
|
16
|
+
type HarnessPhase,
|
|
17
|
+
isHarnessSlashCommand,
|
|
18
|
+
loadRunContextFromDisk,
|
|
19
|
+
phaseTraceFileName,
|
|
20
|
+
saveRunContextToDisk,
|
|
21
|
+
} from "../lib/harness-run-context.js";
|
|
13
22
|
import { captureHarnessEvent } from "./lib/harness-posthog.js";
|
|
14
23
|
|
|
15
|
-
type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
|
|
16
|
-
|
|
17
24
|
interface ToolSpan {
|
|
18
25
|
tool_call_id: string;
|
|
19
26
|
tool_name: string;
|
|
@@ -52,10 +59,6 @@ function nowIso(): string {
|
|
|
52
59
|
return new Date().toISOString();
|
|
53
60
|
}
|
|
54
61
|
|
|
55
|
-
function makeRunId(sessionId: string): string {
|
|
56
|
-
return `${sessionId}-${Date.now()}`;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
62
|
function parsePhase(ctx: {
|
|
60
63
|
sessionManager: { getEntries(): unknown[] };
|
|
61
64
|
}): HarnessPhase {
|
|
@@ -165,8 +168,22 @@ async function readRunTraceSchemaVersion(): Promise<string> {
|
|
|
165
168
|
}
|
|
166
169
|
}
|
|
167
170
|
|
|
171
|
+
function resolveRunIdForAgentStart(
|
|
172
|
+
ctx: { sessionManager: { getEntries(): unknown[]; getSessionId(): string } },
|
|
173
|
+
prompt: string,
|
|
174
|
+
): string {
|
|
175
|
+
const entries = ctx.sessionManager.getEntries();
|
|
176
|
+
const sessionId = ctx.sessionManager.getSessionId();
|
|
177
|
+
const fromSession = getRunIdFromSession(entries, sessionId);
|
|
178
|
+
if (fromSession && isHarnessSlashCommand(prompt)) return fromSession;
|
|
179
|
+
const runCtx = getLatestRunContext(entries);
|
|
180
|
+
if (runCtx && isHarnessSlashCommand(prompt)) return runCtx.run_id;
|
|
181
|
+
return `${sessionId}-${Date.now()}`;
|
|
182
|
+
}
|
|
183
|
+
|
|
168
184
|
export default function traceRecorder(pi: ExtensionAPI) {
|
|
169
185
|
let activeRun: ActiveRun | null = null;
|
|
186
|
+
let lastUserPrompt = "";
|
|
170
187
|
|
|
171
188
|
async function writeEvent(
|
|
172
189
|
runId: string,
|
|
@@ -180,14 +197,25 @@ export default function traceRecorder(pi: ExtensionAPI) {
|
|
|
180
197
|
);
|
|
181
198
|
}
|
|
182
199
|
|
|
200
|
+
pi.on("before_agent_start", async (event) => {
|
|
201
|
+
lastUserPrompt = event.prompt;
|
|
202
|
+
});
|
|
203
|
+
|
|
183
204
|
pi.on("agent_start", async (_event, ctx) => {
|
|
205
|
+
if (!isHarnessSlashCommand(lastUserPrompt)) {
|
|
206
|
+
activeRun = null;
|
|
207
|
+
return;
|
|
208
|
+
}
|
|
209
|
+
|
|
184
210
|
const sessionId = ctx.sessionManager.getSessionId();
|
|
185
|
-
const
|
|
211
|
+
const entries = ctx.sessionManager.getEntries();
|
|
212
|
+
const runId = resolveRunIdForAgentStart(ctx, lastUserPrompt);
|
|
186
213
|
const startedAt = nowIso();
|
|
214
|
+
const phase = parsePhase(ctx);
|
|
187
215
|
activeRun = {
|
|
188
216
|
runId,
|
|
189
217
|
planId: parsePlanId(ctx),
|
|
190
|
-
phase
|
|
218
|
+
phase,
|
|
191
219
|
startedAt,
|
|
192
220
|
toolSpans: new Map(),
|
|
193
221
|
artifactRefs: new Set(),
|
|
@@ -198,15 +226,29 @@ export default function traceRecorder(pi: ExtensionAPI) {
|
|
|
198
226
|
phase: activeRun.phase,
|
|
199
227
|
started_at: startedAt,
|
|
200
228
|
});
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
229
|
+
|
|
230
|
+
const runCtx = getLatestRunContext(entries);
|
|
231
|
+
const projectRoot = process.cwd();
|
|
232
|
+
const diskCtx =
|
|
233
|
+
runCtx ?? (await loadRunContextFromDisk(runId, projectRoot));
|
|
234
|
+
const shouldEmitStarted = !diskCtx?.harness_run_started_emitted;
|
|
235
|
+
if (shouldEmitStarted) {
|
|
236
|
+
captureHarnessEvent(sessionId, "harness_run_started", {
|
|
237
|
+
harness_run_id: runId,
|
|
238
|
+
harness_plan_id: activeRun.planId,
|
|
239
|
+
harness_phase: activeRun.phase,
|
|
240
|
+
pi_session_id: sessionId,
|
|
241
|
+
model: ctx.model?.id ?? "unknown",
|
|
242
|
+
thinking_level:
|
|
243
|
+
pi.getThinkingLevel() === "minimal" ? "off" : pi.getThinkingLevel(),
|
|
244
|
+
});
|
|
245
|
+
if (diskCtx) {
|
|
246
|
+
diskCtx.harness_run_started_emitted = true;
|
|
247
|
+
await saveRunContextToDisk(diskCtx);
|
|
248
|
+
pi.appendEntry("harness-run-context", diskCtx);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
210
252
|
await writeEvent(runId, {
|
|
211
253
|
type: "run_start",
|
|
212
254
|
run_id: runId,
|
|
@@ -282,6 +324,12 @@ export default function traceRecorder(pi: ExtensionAPI) {
|
|
|
282
324
|
cost: usage,
|
|
283
325
|
};
|
|
284
326
|
|
|
327
|
+
const phaseFile = phaseTraceFileName(activeRun.phase);
|
|
328
|
+
await writeFile(
|
|
329
|
+
join(runDir, phaseFile),
|
|
330
|
+
`${JSON.stringify(summary, null, 2)}\n`,
|
|
331
|
+
"utf-8",
|
|
332
|
+
);
|
|
285
333
|
await writeFile(
|
|
286
334
|
join(runDir, "trace.json"),
|
|
287
335
|
`${JSON.stringify(summary, null, 2)}\n`,
|
|
@@ -313,7 +361,7 @@ export default function traceRecorder(pi: ExtensionAPI) {
|
|
|
313
361
|
});
|
|
314
362
|
|
|
315
363
|
pi.registerCommand("harness-trace-last", {
|
|
316
|
-
description: "Show last
|
|
364
|
+
description: "Show last harness trace phase summary (no run id)",
|
|
317
365
|
handler: async (_args, ctx) => {
|
|
318
366
|
const entries = ctx.sessionManager.getEntries();
|
|
319
367
|
for (let i = entries.length - 1; i >= 0; i--) {
|
|
@@ -322,8 +370,20 @@ export default function traceRecorder(pi: ExtensionAPI) {
|
|
|
322
370
|
entry.type === "custom" &&
|
|
323
371
|
entry.customType === "harness-run-trace"
|
|
324
372
|
) {
|
|
325
|
-
const data = entry.data as
|
|
326
|
-
|
|
373
|
+
const data = entry.data as
|
|
374
|
+
| {
|
|
375
|
+
phase?: string;
|
|
376
|
+
tool_span_count?: number;
|
|
377
|
+
}
|
|
378
|
+
| undefined;
|
|
379
|
+
const handoff = getLatestRunContext(entries);
|
|
380
|
+
const next =
|
|
381
|
+
handoff?.next_recommended_command ?? "/harness-run-status";
|
|
382
|
+
const msg = [
|
|
383
|
+
`Last harness trace: phase ${data?.phase ?? "unknown"}`,
|
|
384
|
+
`tool spans: ${data?.tool_span_count ?? 0}`,
|
|
385
|
+
`Next: ${next}`,
|
|
386
|
+
].join("\n");
|
|
327
387
|
if (ctx.hasUI) {
|
|
328
388
|
ctx.ui.notify(msg, "info");
|
|
329
389
|
} else {
|
package/.pi/harness/README.md
CHANGED
|
@@ -29,6 +29,8 @@ Governance/runtime enforcement for this harness is implemented as Pi extensions
|
|
|
29
29
|
under `.pi/extensions/` and auto-loaded through the package `pi.extensions`
|
|
30
30
|
manifest (`package.json`).
|
|
31
31
|
|
|
32
|
+
- `harness-run-context.ts` - active run + plan injection; short commands without run/plan args
|
|
33
|
+
- `harness-live-widget.ts` - footer status (phase, plan ready, next command; no run id in UI)
|
|
32
34
|
- `policy-gate.ts` - phase state machine + plan-before-mutate enforcement
|
|
33
35
|
- `budget-guard.ts` - hard-stop token budget checks + budget exhausted artifacts
|
|
34
36
|
- `trace-recorder.ts` - append-only run traces + HarnessRunRecord + compact index
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schema_version": "1.0.0",
|
|
3
3
|
"package": "ultimate-pi",
|
|
4
|
-
"package_version": "0.
|
|
5
|
-
"generated_at": "2026-05-
|
|
4
|
+
"package_version": "0.6.1",
|
|
5
|
+
"generated_at": "2026-05-17T06:10:49.269Z",
|
|
6
6
|
"agents": {
|
|
7
7
|
"pi-pi/agent-expert": {
|
|
8
8
|
"path": ".pi/agents/pi-pi/agent-expert.md",
|
|
9
|
-
"sha256": "
|
|
9
|
+
"sha256": "86561eb092b92fa43f221bfc6305de8d5afe10d43c5f577b9bf15a71bda051c6"
|
|
10
10
|
},
|
|
11
11
|
"pi-pi/cli-expert": {
|
|
12
12
|
"path": ".pi/agents/pi-pi/cli-expert.md",
|
|
@@ -15,7 +15,7 @@ Sentrux enforces architecture via [`.sentrux/rules.toml`](https://sentrux.dev/do
|
|
|
15
15
|
4. **Re-sync command:** `node "$UP_PKG/.pi/scripts/sentrux-rules-sync.mjs" --force` or `harness-sentrux-bootstrap.mjs --force` (resolve `$UP_PKG` via [.pi/scripts/README.md](../../../scripts/README.md)).
|
|
16
16
|
5. **Pi command:** `/harness-sentrux-sync` via `sentrux-rules-sync.ts` extension.
|
|
17
17
|
6. **When to sync:**
|
|
18
|
-
- `/harness-setup` Step 4.
|
|
18
|
+
- `/harness-setup` Step 4.2 (after sentrux CLI install in Step 2.8)
|
|
19
19
|
- After editing `architecture.manifest.json`
|
|
20
20
|
- On `agent_end` when harness phase is `plan` or `merge`
|
|
21
21
|
- `node "$UP_PKG/.pi/scripts/harness-verify.mjs"` fails if manifest hash ≠ last sync (`--check`)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# ADR 0031: Harness active run context
|
|
2
|
+
|
|
3
|
+
- **Status:** Accepted
|
|
4
|
+
- **Date:** 2026-05-17
|
|
5
|
+
|
|
6
|
+
## Context
|
|
7
|
+
|
|
8
|
+
Manual harness steps required copying `run_id` and `plan-packet.json` paths between commands. `trace-recorder` minted a new `run_id` on every `agent_start`, splitting artifacts across phases. The live widget exposed raw trace ids.
|
|
9
|
+
|
|
10
|
+
## Decision
|
|
11
|
+
|
|
12
|
+
1. Add `.pi/lib/harness-run-context.ts` and `harness-run-context.ts` extension as the single source of truth for active runs.
|
|
13
|
+
2. Persist mirrors:
|
|
14
|
+
- `.pi/harness/runs/<run_id>/run-context.json`
|
|
15
|
+
- `.pi/harness/active-run.json` (cross-session pointer for forked eval)
|
|
16
|
+
3. Canonical plan path: `.pi/harness/runs/<run_id>/plan-packet.json` — injected via `[HarnessActivePlan]`; no `--plan` on the happy path.
|
|
17
|
+
4. **Hook order:** `harness-run-context` `before_agent_start` allocates/reuses `run_id` before `trace-recorder` `agent_start`. Trace writes phase files `trace-<phase>.json` plus rollup `trace.json`.
|
|
18
|
+
5. PostHog `harness_run_started` at most once per logical `run_id`.
|
|
19
|
+
6. Short commands: `/harness-run`, `/harness-eval`, etc. without args; recovery via `/harness-run-status`, `/harness-use-run`.
|
|
20
|
+
7. Review isolation unchanged: after execute, handoff says **new Pi session → `/harness-eval`**; project `active-run.json` binds forked sessions.
|
|
21
|
+
8. `hasApprovedPlanSignal` uses user-visible prompt only; execute requires `plan_ready` from disk validation.
|
|
22
|
+
|
|
23
|
+
## Consequences
|
|
24
|
+
|
|
25
|
+
### Positive
|
|
26
|
+
|
|
27
|
+
- One logical run per manual/auto pipeline; forensics and telemetry align.
|
|
28
|
+
- Users never copy run ids or plan paths in normal workflows.
|
|
29
|
+
|
|
30
|
+
### Negative
|
|
31
|
+
|
|
32
|
+
- Extension ordering and disk reconciliation must stay correct when adding new harness commands.
|
|
33
|
+
|
|
34
|
+
## References
|
|
35
|
+
|
|
36
|
+
- `.pi/lib/harness-run-context.ts`
|
|
37
|
+
- `.pi/extensions/harness-run-context.ts`
|
|
38
|
+
- `.pi/harness/specs/harness-run-context.schema.json`
|
|
@@ -16,6 +16,7 @@ Team-shared ADRs for the ultimate-pi harness live under `.pi/harness/docs/adrs/`
|
|
|
16
16
|
| [0008](0008-harness-posthog-telemetry.md) | Harness PostHog telemetry | Accepted |
|
|
17
17
|
| [0009](0009-sentrux-rules-lifecycle.md) | Sentrux rules.toml lifecycle | Accepted |
|
|
18
18
|
| [0030](0030-inhouse-vcc-compaction.md) | In-house VCC compaction (vendored pi-vcc) | Accepted |
|
|
19
|
+
| [0031](0031-harness-run-context.md) | Harness active run context | Accepted |
|
|
19
20
|
|
|
20
21
|
## Template
|
|
21
22
|
|
|
@@ -1,30 +1,44 @@
|
|
|
1
1
|
# ultimate-pi harness — local secrets and paths (gitignored .env)
|
|
2
|
-
#
|
|
2
|
+
# Created by /harness-setup (harness-sync-env.mjs). Re-run setup to append newly introduced keys only.
|
|
3
3
|
|
|
4
|
-
# Telemetry
|
|
4
|
+
# --- Telemetry ---
|
|
5
|
+
# Harness domain events (harness-telemetry.ts); set false to disable harness_* only
|
|
5
6
|
HARNESS_TELEMETRY_ENABLED=true
|
|
6
7
|
|
|
7
|
-
# harness-web (Scrapling scrape + pluggable search)
|
|
8
|
+
# --- harness-web (Scrapling scrape + pluggable search) ---
|
|
8
9
|
HARNESS_WEB_FETCH_MODE=stealth
|
|
9
10
|
HARNESS_WEB_SEARCH_ENGINE=ddg_html
|
|
10
|
-
#
|
|
11
|
+
# When HARNESS_WEB_SEARCH_ENGINE=searxng (bootstrap via harness-searxng-bootstrap.mjs):
|
|
11
12
|
# HARNESS_WEB_SEARXNG_URL=http://127.0.0.1:8080
|
|
12
13
|
# HARNESS_WEB_PROXY=
|
|
13
14
|
# HARNESS_WEB_RATE_LIMIT_MS=2000
|
|
14
15
|
# HARNESS_WEB_TIMEOUT_MS=30000
|
|
15
16
|
|
|
16
|
-
#
|
|
17
|
+
# --- VCC compaction (env-only; no JSON config files) ---
|
|
18
|
+
# Default: VCC handles /compact and auto-compaction. Set false for Pi LLM compaction:
|
|
19
|
+
# HARNESS_VCC_COMPACTION=false
|
|
20
|
+
# HARNESS_VCC_DEBUG=true
|
|
21
|
+
|
|
22
|
+
# --- PostHog (optional) ---
|
|
23
|
+
# Project key — required for harness_* telemetry when HARNESS_TELEMETRY_ENABLED=true
|
|
17
24
|
# POSTHOG_API_KEY=
|
|
25
|
+
# POSTHOG_HOST=https://us.i.posthog.com
|
|
26
|
+
# POSTHOG_ENABLED=true
|
|
18
27
|
# POSTHOG_PROJECT_NAME=ultimate-pi
|
|
19
28
|
# POSTHOG_PRIVACY_MODE=false
|
|
29
|
+
# Personal API key — PostHog MCP / posthog-analyst skill only
|
|
30
|
+
# POSTHOG_PERSONAL_API_KEY=
|
|
31
|
+
# POSTHOG_MCP_FEATURES=llm_analytics
|
|
20
32
|
|
|
21
|
-
# Graphify semantic extract (optional; `graphify update .` needs no key)
|
|
33
|
+
# --- Graphify semantic extract (optional; `graphify update .` needs no key) ---
|
|
22
34
|
# GEMINI_API_KEY=
|
|
23
35
|
# GOOGLE_API_KEY=
|
|
24
36
|
# OPENAI_API_KEY=
|
|
37
|
+
# OPENAI_API_BASE=
|
|
25
38
|
|
|
26
|
-
#
|
|
27
|
-
|
|
39
|
+
# --- Wiki / Obsidian vault (optional) ---
|
|
40
|
+
VAULT_WIKI_PATH=vault/wiki
|
|
28
41
|
|
|
29
|
-
#
|
|
30
|
-
#
|
|
42
|
+
# --- Sentrux gate (optional) ---
|
|
43
|
+
# Require Sentrux stub for harness-verify (see .pi/scripts/harness-verify.mjs)
|
|
44
|
+
# HARNESS_SENTRUX_REQUIRED=true
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema_version": "1.0.0",
|
|
3
|
+
"run_id": "smoke-session-1",
|
|
4
|
+
"pi_session_id": "smoke-session",
|
|
5
|
+
"project_root": "/tmp/ultimate-pi-smoke",
|
|
6
|
+
"phase": "plan",
|
|
7
|
+
"plan_id": null,
|
|
8
|
+
"plan_packet_path": "/tmp/ultimate-pi-smoke/.pi/harness/runs/smoke-session-1/plan-packet.json",
|
|
9
|
+
"plan_ready": false,
|
|
10
|
+
"task_summary": "smoke task",
|
|
11
|
+
"status": "active",
|
|
12
|
+
"last_completed_step": null,
|
|
13
|
+
"last_outcome": null,
|
|
14
|
+
"next_recommended_command": "/harness-plan \"smoke task\"",
|
|
15
|
+
"owner_pi_session_id": "smoke-session",
|
|
16
|
+
"updated_at": "2026-05-17T00:00:00.000Z"
|
|
17
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://ultimate-pi.local/.pi/harness/specs/harness-run-context.schema.json",
|
|
4
|
+
"title": "HarnessRunContext",
|
|
5
|
+
"description": "Session- and disk-backed active harness run pointer.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"additionalProperties": false,
|
|
8
|
+
"required": [
|
|
9
|
+
"schema_version",
|
|
10
|
+
"run_id",
|
|
11
|
+
"pi_session_id",
|
|
12
|
+
"project_root",
|
|
13
|
+
"phase",
|
|
14
|
+
"plan_ready",
|
|
15
|
+
"status",
|
|
16
|
+
"owner_pi_session_id",
|
|
17
|
+
"updated_at"
|
|
18
|
+
],
|
|
19
|
+
"properties": {
|
|
20
|
+
"schema_version": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"const": "1.0.0"
|
|
23
|
+
},
|
|
24
|
+
"run_id": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"minLength": 1
|
|
27
|
+
},
|
|
28
|
+
"pi_session_id": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"minLength": 1
|
|
31
|
+
},
|
|
32
|
+
"project_root": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"minLength": 1
|
|
35
|
+
},
|
|
36
|
+
"phase": {
|
|
37
|
+
"type": "string",
|
|
38
|
+
"enum": ["plan", "execute", "evaluate", "adversary", "merge"]
|
|
39
|
+
},
|
|
40
|
+
"plan_id": {
|
|
41
|
+
"type": ["string", "null"]
|
|
42
|
+
},
|
|
43
|
+
"plan_packet_path": {
|
|
44
|
+
"type": ["string", "null"]
|
|
45
|
+
},
|
|
46
|
+
"plan_ready": {
|
|
47
|
+
"type": "boolean"
|
|
48
|
+
},
|
|
49
|
+
"task_summary": {
|
|
50
|
+
"type": ["string", "null"]
|
|
51
|
+
},
|
|
52
|
+
"status": {
|
|
53
|
+
"type": "string",
|
|
54
|
+
"enum": ["active", "aborted", "completed"]
|
|
55
|
+
},
|
|
56
|
+
"last_completed_step": {
|
|
57
|
+
"type": ["string", "null"]
|
|
58
|
+
},
|
|
59
|
+
"last_outcome": {
|
|
60
|
+
"type": ["string", "null"]
|
|
61
|
+
},
|
|
62
|
+
"next_recommended_command": {
|
|
63
|
+
"type": ["string", "null"]
|
|
64
|
+
},
|
|
65
|
+
"owner_pi_session_id": {
|
|
66
|
+
"type": "string",
|
|
67
|
+
"minLength": 1
|
|
68
|
+
},
|
|
69
|
+
"updated_at": {
|
|
70
|
+
"type": "string",
|
|
71
|
+
"format": "date-time"
|
|
72
|
+
},
|
|
73
|
+
"harness_run_started_emitted": {
|
|
74
|
+
"type": "boolean"
|
|
75
|
+
},
|
|
76
|
+
"turn_override_run_id": {
|
|
77
|
+
"type": ["string", "null"]
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|