ultimate-pi 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/.agents/skills/harness-plan/SKILL.md +9 -5
  2. package/.agents/skills/harness-sentrux-setup/SKILL.md +3 -4
  3. package/.pi/extensions/00-ultimate-pi-system-prompt.ts +194 -0
  4. package/.pi/extensions/budget-guard.ts +10 -2
  5. package/.pi/extensions/debate-orchestrator.ts +10 -2
  6. package/.pi/extensions/harness-live-widget.ts +10 -3
  7. package/.pi/extensions/harness-run-context.ts +703 -0
  8. package/.pi/extensions/observation-bus.ts +7 -9
  9. package/.pi/extensions/policy-gate.ts +50 -68
  10. package/.pi/extensions/trace-recorder.ts +80 -20
  11. package/.pi/harness/README.md +2 -0
  12. package/.pi/harness/agents.manifest.json +3 -3
  13. package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +1 -1
  14. package/.pi/harness/docs/adrs/0031-harness-run-context.md +38 -0
  15. package/.pi/harness/docs/adrs/README.md +1 -0
  16. package/.pi/harness/env.harness.template +24 -10
  17. package/.pi/harness/evals/smoke/run-context.fixture.json +17 -0
  18. package/.pi/harness/specs/harness-run-context.schema.json +80 -0
  19. package/.pi/lib/harness-run-context.ts +794 -0
  20. package/.pi/lib/harness-ui-state.ts +11 -0
  21. package/.pi/prompts/harness-abort.md +9 -6
  22. package/.pi/prompts/harness-auto.md +3 -3
  23. package/.pi/prompts/harness-critic.md +3 -5
  24. package/.pi/prompts/harness-eval.md +16 -16
  25. package/.pi/prompts/harness-incident.md +7 -5
  26. package/.pi/prompts/harness-plan.md +18 -3
  27. package/.pi/prompts/harness-review.md +4 -5
  28. package/.pi/prompts/harness-router-tune.md +1 -1
  29. package/.pi/prompts/harness-run.md +11 -11
  30. package/.pi/prompts/harness-setup.md +5 -27
  31. package/.pi/prompts/harness-trace.md +3 -5
  32. package/.pi/scripts/harness-searxng-bootstrap.mjs +92 -7
  33. package/.pi/scripts/harness-verify.mjs +18 -0
  34. package/CHANGELOG.md +22 -0
  35. package/README.md +31 -14
  36. package/package.json +2 -2
@@ -0,0 +1,703 @@
1
+ /**
2
+ * harness-run-context — session-scoped active run + plan injection.
3
+ *
4
+ * Hook order: runs before trace-recorder (alphabetically h < t). Allocates run_id
5
+ * in before_agent_start so trace-recorder reuses it on agent_start.
6
+ */
7
+
8
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
9
+ import {
10
+ canonicalPlanPath,
11
+ createFreshRunContext,
12
+ driftGateActive,
13
+ extractCompletionStatuses,
14
+ formatActivePlanBlock,
15
+ formatPlanContextBlock,
16
+ getLatestPolicyPhase,
17
+ getLatestRunContext,
18
+ getPolicyTransitionBlock,
19
+ type HarnessRunContext,
20
+ hasHarnessAbortSignal,
21
+ isAmendPlanAllowed,
22
+ isHarnessBootstrapPrompt,
23
+ isHarnessSlashCommand,
24
+ isNewTaskPlanBlocked,
25
+ isStaleActiveRunPointer,
26
+ loadProjectActiveRun,
27
+ loadRunContextFromDisk,
28
+ nextStepAfterOutcome,
29
+ type PlanPacketSummary,
30
+ parseHarnessSlashCommand,
31
+ planPacketSummary,
32
+ readPlanPacketFromPath,
33
+ resolveArgsForCommand,
34
+ saveProjectActiveRun,
35
+ saveRunContextToDisk,
36
+ shouldReuseHarnessRunId,
37
+ userVisiblePromptSlice,
38
+ validatePlanOverridePath,
39
+ validatePlanPacket,
40
+ } from "../lib/harness-run-context.js";
41
+
42
+ interface SessionEntryLike {
43
+ type?: string;
44
+ customType?: string;
45
+ data?: unknown;
46
+ }
47
+
48
+ function getEntries(ctx: {
49
+ sessionManager: { getEntries(): unknown[] };
50
+ }): unknown[] {
51
+ return ctx.sessionManager.getEntries();
52
+ }
53
+
54
+ function persistContext(pi: ExtensionAPI, ctx: HarnessRunContext): void {
55
+ pi.appendEntry("harness-run-context", ctx);
56
+ void saveRunContextToDisk(ctx);
57
+ void saveProjectActiveRun(ctx);
58
+ }
59
+
60
+ function extractTaskSummary(prompt: string): string | null {
61
+ const quoted = prompt.match(/"([^"]+)"/);
62
+ if (quoted?.[1]) return quoted[1];
63
+ const cmd = parseHarnessSlashCommand(prompt);
64
+ if (cmd?.args) return cmd.args.slice(0, 200);
65
+ return null;
66
+ }
67
+
68
+ function syncPolicyFromPlan(
69
+ pi: ExtensionAPI,
70
+ entries: unknown[],
71
+ planId: string,
72
+ phase: HarnessRunContext["phase"],
73
+ ): void {
74
+ let prior: Record<string, unknown> = {
75
+ phase,
76
+ approvedPlan: true,
77
+ planId,
78
+ budgetBypass: false,
79
+ aborted: false,
80
+ abortReason: null,
81
+ abortedAt: null,
82
+ updatedAt: new Date().toISOString(),
83
+ };
84
+ for (let i = entries.length - 1; i >= 0; i--) {
85
+ const entry = entries[i] as SessionEntryLike;
86
+ if (entry.type !== "custom" || entry.customType !== "harness-policy-state")
87
+ continue;
88
+ prior = { ...(entry.data as Record<string, unknown>), ...prior };
89
+ break;
90
+ }
91
+ pi.appendEntry("harness-policy-state", prior);
92
+ pi.appendEntry("harness-drift-state", {
93
+ baseline_plan_id: planId,
94
+ drift_score: 0,
95
+ last_report_at: null,
96
+ user_acknowledged: false,
97
+ });
98
+ }
99
+
100
+ async function hydrateFromDisk(
101
+ sessionId: string,
102
+ projectRoot: string,
103
+ entries: unknown[],
104
+ ): Promise<HarnessRunContext | null> {
105
+ const fromSession = getLatestRunContext(entries);
106
+ if (fromSession) return fromSession;
107
+
108
+ const pointer = await loadProjectActiveRun(projectRoot);
109
+ if (!pointer || isStaleActiveRunPointer(pointer, projectRoot)) return null;
110
+
111
+ const disk = await loadRunContextFromDisk(pointer.run_id, projectRoot);
112
+ if (disk) return disk;
113
+
114
+ return {
115
+ schema_version: "1.0.0",
116
+ run_id: pointer.run_id,
117
+ pi_session_id: sessionId,
118
+ project_root: projectRoot,
119
+ phase: pointer.phase,
120
+ plan_id: pointer.plan_id,
121
+ plan_packet_path: canonicalPlanPath(pointer.run_id, projectRoot),
122
+ plan_ready: pointer.plan_ready,
123
+ task_summary: null,
124
+ status: "active",
125
+ last_completed_step: null,
126
+ last_outcome: null,
127
+ next_recommended_command: null,
128
+ owner_pi_session_id: pointer.owner_pi_session_id,
129
+ updated_at: pointer.updated_at,
130
+ };
131
+ }
132
+
133
+ function needsClarificationFollowUp(ctx: HarnessRunContext | null): boolean {
134
+ return ctx?.status === "active" && ctx.last_outcome === "needs_clarification";
135
+ }
136
+
137
+ export default function harnessRunContext(pi: ExtensionAPI) {
138
+ let activeCtx: HarnessRunContext | null = null;
139
+
140
+ pi.on("session_start", async (_event, ctx) => {
141
+ const sessionId = ctx.sessionManager.getSessionId();
142
+ const projectRoot = process.cwd();
143
+ const entries = getEntries(ctx);
144
+ activeCtx = await hydrateFromDisk(sessionId, projectRoot, entries);
145
+ });
146
+
147
+ pi.on("before_agent_start", async (event, ctx) => {
148
+ const sessionId = ctx.sessionManager.getSessionId();
149
+ const projectRoot = process.cwd();
150
+ const entries = getEntries(ctx);
151
+ const userPrompt = userVisiblePromptSlice(event.prompt);
152
+ const parsed = parseHarnessSlashCommand(userPrompt);
153
+ const harnessTurn =
154
+ isHarnessSlashCommand(userPrompt) ||
155
+ needsClarificationFollowUp(activeCtx);
156
+
157
+ if (
158
+ userPrompt.toLowerCase().includes("/harness-abort") ||
159
+ userPrompt.toLowerCase().includes("harness-abort")
160
+ ) {
161
+ if (!activeCtx) {
162
+ activeCtx = await hydrateFromDisk(sessionId, projectRoot, entries);
163
+ }
164
+ if (activeCtx) {
165
+ activeCtx.status = "aborted";
166
+ activeCtx.plan_ready = false;
167
+ activeCtx.last_outcome = "aborted";
168
+ activeCtx.last_completed_step = "abort";
169
+ activeCtx.next_recommended_command = activeCtx.task_summary
170
+ ? `/harness-plan "${activeCtx.task_summary}"`
171
+ : '/harness-plan "<task>"';
172
+ persistContext(pi, activeCtx);
173
+ }
174
+ }
175
+
176
+ if (!harnessTurn) {
177
+ return undefined;
178
+ }
179
+
180
+ if (!activeCtx) {
181
+ activeCtx = await hydrateFromDisk(sessionId, projectRoot, entries);
182
+ }
183
+
184
+ const policyPhase =
185
+ getLatestPolicyPhase(entries) ?? activeCtx?.phase ?? "plan";
186
+ const driftActive = driftGateActive(entries);
187
+
188
+ // Plain-language follow-up after needs_clarification
189
+ if (!parsed && needsClarificationFollowUp(activeCtx) && activeCtx) {
190
+ activeCtx.phase = "plan";
191
+ activeCtx.last_outcome = "needs_clarification";
192
+ const packet = activeCtx.plan_packet_path
193
+ ? await readPlanPacketFromPath(activeCtx.plan_packet_path)
194
+ : null;
195
+ const summary = packet
196
+ ? planPacketSummary(
197
+ packet,
198
+ activeCtx.plan_packet_path!,
199
+ "needs_clarification",
200
+ )
201
+ : null;
202
+ syncPolicyFromPlan(
203
+ pi,
204
+ entries,
205
+ activeCtx.plan_id ?? "plan-pending",
206
+ "plan",
207
+ );
208
+ persistContext(pi, activeCtx);
209
+ return {
210
+ systemPrompt: `${event.systemPrompt}\n\n${formatPlanContextBlock(activeCtx)}\n\n${formatActivePlanBlock(activeCtx, "revise", summary)}\n\nReply with clarification answers; the harness will treat this as plan amend.`,
211
+ };
212
+ }
213
+
214
+ if (!parsed) return undefined;
215
+
216
+ const { command, args } = parsed;
217
+
218
+ if (
219
+ !isHarnessBootstrapPrompt(userPrompt) &&
220
+ !hasHarnessAbortSignal(userPrompt)
221
+ ) {
222
+ const policyBlock = getPolicyTransitionBlock(userPrompt, entries);
223
+ if (policyBlock.blocked) {
224
+ return {
225
+ message: {
226
+ customType: "harness-run-context-block",
227
+ display: true,
228
+ content:
229
+ policyBlock.message ?? "Harness command blocked by policy phase.",
230
+ },
231
+ };
232
+ }
233
+ }
234
+
235
+ if (command === "harness-new-run") {
236
+ if (activeCtx?.status === "active") {
237
+ activeCtx.status = "aborted";
238
+ activeCtx.plan_ready = false;
239
+ activeCtx.last_outcome = "abandoned";
240
+ persistContext(pi, activeCtx);
241
+ }
242
+ const task = extractTaskSummary(userPrompt);
243
+ activeCtx = createFreshRunContext(sessionId, projectRoot, task);
244
+ persistContext(pi, activeCtx);
245
+ return {
246
+ systemPrompt: `${event.systemPrompt}\n\n${formatPlanContextBlock(activeCtx)}\n\n${formatActivePlanBlock(activeCtx, "create")}`,
247
+ };
248
+ }
249
+
250
+ if (command === "harness-use-run") {
251
+ const runId = args.trim().split(/\s+/)[0];
252
+ if (!runId) {
253
+ return {
254
+ message: {
255
+ customType: "harness-run-context-block",
256
+ display: true,
257
+ content: "Usage: /harness-use-run <run-id>",
258
+ },
259
+ };
260
+ }
261
+ const disk = await loadRunContextFromDisk(runId, projectRoot);
262
+ if (!disk) {
263
+ return {
264
+ message: {
265
+ customType: "harness-run-context-block",
266
+ display: true,
267
+ content: `No run directory for ${runId}. Check .pi/harness/runs/.`,
268
+ },
269
+ };
270
+ }
271
+ activeCtx = {
272
+ ...disk,
273
+ pi_session_id: sessionId,
274
+ turn_override_run_id: runId,
275
+ };
276
+ if (activeCtx.owner_pi_session_id !== sessionId) {
277
+ activeCtx.next_recommended_command =
278
+ "Read-only: owner session holds this run. Use /harness-new-run to take over.";
279
+ }
280
+ persistContext(pi, activeCtx);
281
+ return {
282
+ systemPrompt: `${event.systemPrompt}\n\n${formatPlanContextBlock(activeCtx)}`,
283
+ };
284
+ }
285
+
286
+ if (command === "harness-run-status") {
287
+ return undefined;
288
+ }
289
+
290
+ if (
291
+ command === "harness-plan" &&
292
+ activeCtx &&
293
+ isNewTaskPlanBlocked(activeCtx, userPrompt) &&
294
+ !isAmendPlanAllowed(activeCtx, userPrompt, driftActive)
295
+ ) {
296
+ return {
297
+ message: {
298
+ customType: "harness-run-context-block",
299
+ display: true,
300
+ content:
301
+ "Active harness run in progress. Use /harness-abort or /harness-new-run before starting a new task plan.",
302
+ },
303
+ };
304
+ }
305
+
306
+ const resolved = resolveArgsForCommand(command, args, activeCtx);
307
+ if (resolved.overrideRun && resolved.runId) {
308
+ const disk = await loadRunContextFromDisk(resolved.runId, projectRoot);
309
+ if (disk) activeCtx = { ...disk, turn_override_run_id: resolved.runId };
310
+ }
311
+
312
+ if (
313
+ command === "harness-plan" ||
314
+ command === "harness-auto" ||
315
+ (!activeCtx && command !== "harness-abort")
316
+ ) {
317
+ if (
318
+ !activeCtx ||
319
+ !shouldReuseHarnessRunId(userPrompt, activeCtx, command)
320
+ ) {
321
+ const task = extractTaskSummary(userPrompt);
322
+ activeCtx = createFreshRunContext(sessionId, projectRoot, task);
323
+ }
324
+ if (activeCtx.status === "aborted") {
325
+ activeCtx.plan_ready = false;
326
+ }
327
+ activeCtx.phase = "plan";
328
+ activeCtx.status = "active";
329
+ if (command === "harness-plan") {
330
+ const task = extractTaskSummary(userPrompt);
331
+ if (task) activeCtx.task_summary = task;
332
+ }
333
+ } else if (
334
+ activeCtx &&
335
+ shouldReuseHarnessRunId(userPrompt, activeCtx, command)
336
+ ) {
337
+ activeCtx.turn_override_run_id = resolved.overrideRun
338
+ ? resolved.runId
339
+ : null;
340
+ } else if (!activeCtx) {
341
+ const pointer = await loadProjectActiveRun(projectRoot);
342
+ if (pointer) {
343
+ if (isStaleActiveRunPointer(pointer, projectRoot)) {
344
+ const crossSessionCmd = new Set([
345
+ "harness-eval",
346
+ "harness-review",
347
+ "harness-critic",
348
+ "harness-trace",
349
+ "harness-incident",
350
+ ]);
351
+ if (crossSessionCmd.has(command)) {
352
+ return {
353
+ message: {
354
+ customType: "harness-run-context-block",
355
+ display: true,
356
+ content:
357
+ 'Project active-run pointer is stale or from another workspace. Run /harness-plan "<task>" or /harness-use-run <run-id> for recovery.',
358
+ },
359
+ };
360
+ }
361
+ } else {
362
+ const disk = await loadRunContextFromDisk(
363
+ pointer.run_id,
364
+ projectRoot,
365
+ );
366
+ if (disk) activeCtx = disk;
367
+ }
368
+ }
369
+ }
370
+
371
+ if (!activeCtx) {
372
+ return {
373
+ message: {
374
+ customType: "harness-run-context-block",
375
+ display: true,
376
+ content:
377
+ 'No active harness run. Run /harness-plan "<task>" first, or /harness-use-run <run-id> for recovery.',
378
+ },
379
+ };
380
+ }
381
+
382
+ activeCtx.phase = policyPhase;
383
+ activeCtx.updated_at = new Date().toISOString();
384
+ activeCtx.pi_session_id = sessionId;
385
+
386
+ if (resolved.planPath && resolved.runId) {
387
+ const check = validatePlanOverridePath(
388
+ resolved.planPath,
389
+ resolved.runId,
390
+ projectRoot,
391
+ );
392
+ if (!check.ok) {
393
+ return {
394
+ message: {
395
+ customType: "harness-run-context-block",
396
+ display: true,
397
+ content: check.reason ?? "Invalid --plan override",
398
+ },
399
+ };
400
+ }
401
+ activeCtx.plan_packet_path = resolved.planPath;
402
+ }
403
+
404
+ if (command === "harness-run" && !activeCtx.plan_ready) {
405
+ return {
406
+ message: {
407
+ customType: "harness-run-context-block",
408
+ display: true,
409
+ content: "Plan not ready. Run /harness-plan first.",
410
+ },
411
+ };
412
+ }
413
+
414
+ if (
415
+ command === "harness-run" &&
416
+ activeCtx.plan_ready &&
417
+ activeCtx.last_completed_step === "execute" &&
418
+ activeCtx.last_outcome === "completed"
419
+ ) {
420
+ const warn =
421
+ "Plan already executed in this run. Prefer a new Pi session → /harness-eval, or /harness-abort to replan.";
422
+ if (ctx.hasUI) ctx.ui.notify(warn, "warning");
423
+ }
424
+
425
+ let planSummary: PlanPacketSummary | null = null;
426
+ if (activeCtx.plan_packet_path) {
427
+ const packet = await readPlanPacketFromPath(activeCtx.plan_packet_path);
428
+ if (packet) {
429
+ planSummary = planPacketSummary(
430
+ packet,
431
+ activeCtx.plan_packet_path,
432
+ activeCtx.plan_ready ? "ready" : "draft",
433
+ );
434
+ activeCtx.plan_id = packet.plan_id ?? activeCtx.plan_id;
435
+ }
436
+ }
437
+
438
+ let activePlanBlock = "";
439
+ if (command === "harness-plan" || command === "harness-auto") {
440
+ const mode =
441
+ activeCtx.plan_ready || activeCtx.status === "aborted"
442
+ ? "revise"
443
+ : "create";
444
+ activePlanBlock = formatActivePlanBlock(activeCtx, mode, planSummary);
445
+ } else if (command === "harness-run") {
446
+ activePlanBlock = formatActivePlanBlock(
447
+ activeCtx,
448
+ "execute",
449
+ planSummary,
450
+ );
451
+ } else if (
452
+ command === "harness-eval" ||
453
+ command === "harness-review" ||
454
+ command === "harness-critic"
455
+ ) {
456
+ activePlanBlock = formatActivePlanBlock(activeCtx, "read", planSummary);
457
+ }
458
+
459
+ persistContext(pi, activeCtx);
460
+
461
+ return {
462
+ systemPrompt: `${event.systemPrompt}\n\n${formatPlanContextBlock(activeCtx)}${activePlanBlock ? `\n\n${activePlanBlock}` : ""}`,
463
+ };
464
+ });
465
+
466
+ pi.on("agent_end", async (_event, ctx) => {
467
+ const entries = getEntries(ctx);
468
+ const sessionId = ctx.sessionManager.getSessionId();
469
+ if (!activeCtx) {
470
+ activeCtx = getLatestRunContext(entries);
471
+ }
472
+ if (!activeCtx) return;
473
+
474
+ const userEntries = entries.filter((e) => {
475
+ const entry = e as { type?: string; message?: { role?: string } };
476
+ return entry.type === "message" && entry.message?.role === "user";
477
+ });
478
+ const lastUser = userEntries[userEntries.length - 1] as
479
+ | { message?: { content?: string | unknown[] } }
480
+ | undefined;
481
+ let lastPrompt = "";
482
+ if (lastUser?.message?.content) {
483
+ lastPrompt =
484
+ typeof lastUser.message.content === "string"
485
+ ? lastUser.message.content
486
+ : "";
487
+ }
488
+ const parsed = parseHarnessSlashCommand(userVisiblePromptSlice(lastPrompt));
489
+ if (!parsed && !needsClarificationFollowUp(activeCtx)) return;
490
+
491
+ const policyPhase = getLatestPolicyPhase(entries) ?? activeCtx.phase;
492
+ activeCtx.phase = policyPhase;
493
+
494
+ if (parsed?.command === "harness-abort") {
495
+ activeCtx.status = "aborted";
496
+ activeCtx.plan_ready = false;
497
+ activeCtx.last_outcome = "aborted";
498
+ activeCtx.last_completed_step = "abort";
499
+ activeCtx.next_recommended_command = activeCtx.task_summary
500
+ ? `/harness-plan "${activeCtx.task_summary}"`
501
+ : '/harness-plan "<task>"';
502
+ persistContext(pi, activeCtx);
503
+ const msg = `Harness aborted. Next: ${activeCtx.next_recommended_command}`;
504
+ if (ctx.hasUI) ctx.ui.notify(msg, "warning");
505
+ else
506
+ pi.sendMessage({
507
+ customType: "harness-step-handoff",
508
+ content: msg,
509
+ display: true,
510
+ });
511
+ return;
512
+ }
513
+
514
+ let planReady = activeCtx.plan_ready;
515
+ if (
516
+ (parsed?.command === "harness-plan" ||
517
+ parsed?.command === "harness-auto") &&
518
+ activeCtx.plan_packet_path
519
+ ) {
520
+ const packet = await readPlanPacketFromPath(activeCtx.plan_packet_path);
521
+ const validation = validatePlanPacket(packet);
522
+ planReady = validation.valid;
523
+ if (planReady && packet?.plan_id) {
524
+ activeCtx.plan_id = packet.plan_id;
525
+ syncPolicyFromPlan(pi, entries, packet.plan_id, "plan");
526
+ const summary = planPacketSummary(packet, activeCtx.plan_packet_path);
527
+ pi.appendEntry("harness-plan-packet", summary);
528
+ activeCtx.last_completed_step = "plan";
529
+ activeCtx.last_outcome = summary.plan_status;
530
+ } else {
531
+ activeCtx.last_outcome = "needs_clarification";
532
+ activeCtx.last_completed_step = "plan";
533
+ }
534
+ }
535
+
536
+ activeCtx.plan_ready = planReady;
537
+
538
+ const statuses = extractCompletionStatuses(entries);
539
+ if (parsed?.command === "harness-run") {
540
+ activeCtx.last_completed_step = "execute";
541
+ activeCtx.last_outcome =
542
+ statuses.executionStatus ?? activeCtx.last_outcome ?? "completed";
543
+ }
544
+ if (parsed?.command === "harness-eval") {
545
+ activeCtx.last_completed_step = "evaluate";
546
+ activeCtx.last_outcome = statuses.evalStatus ?? activeCtx.last_outcome;
547
+ }
548
+
549
+ const next = nextStepAfterOutcome({
550
+ phase: activeCtx.phase,
551
+ planStatus: statuses.planStatus ?? activeCtx.last_outcome,
552
+ executionStatus: statuses.executionStatus,
553
+ evalStatus: statuses.evalStatus,
554
+ aborted: activeCtx.status === "aborted",
555
+ });
556
+ activeCtx.next_recommended_command = next;
557
+ activeCtx.updated_at = new Date().toISOString();
558
+
559
+ persistContext(pi, activeCtx);
560
+
561
+ pi.appendEntry("harness-step-handoff", {
562
+ next_command: next,
563
+ plan_status: statuses.planStatus,
564
+ execution_status: statuses.executionStatus,
565
+ eval_status: statuses.evalStatus,
566
+ phase: activeCtx.phase,
567
+ });
568
+
569
+ if (next && parsed) {
570
+ const notify = `Next: ${next}`;
571
+ if (ctx.hasUI) ctx.ui.notify(notify, "info");
572
+ else
573
+ pi.sendMessage({
574
+ customType: "harness-step-handoff",
575
+ content: notify,
576
+ display: true,
577
+ });
578
+ }
579
+ });
580
+
581
+ pi.on("tool_call", async (event) => {
582
+ if (!activeCtx?.plan_packet_path) return undefined;
583
+ const phase = activeCtx.phase;
584
+ if (phase !== "evaluate" && phase !== "adversary") return undefined;
585
+ if (event.toolName !== "write" && event.toolName !== "edit") {
586
+ return undefined;
587
+ }
588
+ const target = String(
589
+ (event.input as { path?: string; filePath?: string }).path ??
590
+ (event.input as { filePath?: string }).filePath ??
591
+ "",
592
+ );
593
+ if (target.includes("plan-packet.json")) {
594
+ return {
595
+ block: true,
596
+ reason:
597
+ "harness-run-context: plan-packet.json is read-only in evaluate/adversary phases.",
598
+ };
599
+ }
600
+ return undefined;
601
+ });
602
+
603
+ pi.registerCommand("harness-run-status", {
604
+ description:
605
+ "Show harness phase, plan readiness, and next command (no run id)",
606
+ handler: async (_args, ctx) => {
607
+ const sessionId = ctx.sessionManager.getSessionId();
608
+ const projectRoot = process.cwd();
609
+ const entries = getEntries(ctx);
610
+ let ctxState = getLatestRunContext(entries) ?? activeCtx;
611
+ if (!ctxState) {
612
+ ctxState = await hydrateFromDisk(sessionId, projectRoot, entries);
613
+ }
614
+ if (!ctxState) {
615
+ const msg = 'No active harness run. Start with /harness-plan "<task>".';
616
+ if (ctx.hasUI) ctx.ui.notify(msg, "warning");
617
+ return;
618
+ }
619
+ let summary: PlanPacketSummary | null = null;
620
+ for (let i = entries.length - 1; i >= 0; i--) {
621
+ const entry = entries[i] as SessionEntryLike;
622
+ if (
623
+ entry.type !== "custom" ||
624
+ entry.customType !== "harness-plan-packet"
625
+ )
626
+ continue;
627
+ summary = entry.data as PlanPacketSummary;
628
+ break;
629
+ }
630
+ const lines = [
631
+ "Harness run status:",
632
+ ` phase: ${ctxState.phase}`,
633
+ ` status: ${ctxState.status}`,
634
+ ` plan_ready: ${ctxState.plan_ready}`,
635
+ ` plan_id: ${ctxState.plan_id ?? "(none)"}`,
636
+ summary
637
+ ? ` scope: ${summary.scope_one_liner}`
638
+ : " scope: (no plan summary yet)",
639
+ ` last_step: ${ctxState.last_completed_step ?? "(none)"}`,
640
+ ` last_outcome: ${ctxState.last_outcome ?? "(none)"}`,
641
+ ` next: ${ctxState.next_recommended_command ?? "/harness-run-status"}`,
642
+ ];
643
+ const text = lines.join("\n");
644
+ if (ctx.hasUI) ctx.ui.notify(text, "info");
645
+ else
646
+ pi.sendMessage({
647
+ customType: "harness-run-status",
648
+ content: text,
649
+ display: true,
650
+ });
651
+ },
652
+ });
653
+
654
+ pi.registerCommand("harness-new-run", {
655
+ description: "Abandon current active run and start a fresh harness run",
656
+ handler: async (args, ctx) => {
657
+ const sessionId = ctx.sessionManager.getSessionId();
658
+ const projectRoot = process.cwd();
659
+ if (activeCtx?.status === "active") {
660
+ activeCtx.status = "aborted";
661
+ activeCtx.plan_ready = false;
662
+ persistContext(pi, activeCtx);
663
+ }
664
+ activeCtx = createFreshRunContext(
665
+ sessionId,
666
+ projectRoot,
667
+ args.trim() || null,
668
+ );
669
+ persistContext(pi, activeCtx);
670
+ const msg =
671
+ 'New harness run allocated. Next: /harness-plan "<your task>"';
672
+ if (ctx.hasUI) ctx.ui.notify(msg, "info");
673
+ },
674
+ });
675
+
676
+ pi.registerCommand("harness-use-run", {
677
+ description: "Point this session at an existing run directory (recovery)",
678
+ handler: async (args, ctx) => {
679
+ const runId = args.trim().split(/\s+/)[0];
680
+ if (!runId) {
681
+ if (ctx.hasUI)
682
+ ctx.ui.notify("Usage: /harness-use-run <run-id>", "warning");
683
+ return;
684
+ }
685
+ const projectRoot = process.cwd();
686
+ const disk = await loadRunContextFromDisk(runId, projectRoot);
687
+ if (!disk) {
688
+ if (ctx.hasUI) ctx.ui.notify(`Run not found: ${runId}`, "error");
689
+ return;
690
+ }
691
+ activeCtx = {
692
+ ...disk,
693
+ pi_session_id: ctx.sessionManager.getSessionId(),
694
+ };
695
+ persistContext(pi, activeCtx);
696
+ if (ctx.hasUI)
697
+ ctx.ui.notify(
698
+ `Session bound to run ${runId}. See /harness-run-status.`,
699
+ "info",
700
+ );
701
+ },
702
+ });
703
+ }