ultimate-pi 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/.agents/skills/harness-plan/SKILL.md +9 -5
  2. package/.agents/skills/harness-sentrux-setup/SKILL.md +3 -4
  3. package/.pi/extensions/00-ultimate-pi-system-prompt.ts +194 -0
  4. package/.pi/extensions/budget-guard.ts +10 -2
  5. package/.pi/extensions/debate-orchestrator.ts +10 -2
  6. package/.pi/extensions/harness-live-widget.ts +10 -3
  7. package/.pi/extensions/harness-run-context.ts +703 -0
  8. package/.pi/extensions/observation-bus.ts +7 -9
  9. package/.pi/extensions/policy-gate.ts +50 -68
  10. package/.pi/extensions/trace-recorder.ts +80 -20
  11. package/.pi/harness/README.md +2 -0
  12. package/.pi/harness/agents.manifest.json +3 -3
  13. package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +1 -1
  14. package/.pi/harness/docs/adrs/0031-harness-run-context.md +38 -0
  15. package/.pi/harness/docs/adrs/README.md +1 -0
  16. package/.pi/harness/evals/smoke/run-context.fixture.json +17 -0
  17. package/.pi/harness/specs/harness-run-context.schema.json +80 -0
  18. package/.pi/lib/harness-run-context.ts +794 -0
  19. package/.pi/lib/harness-ui-state.ts +11 -0
  20. package/.pi/prompts/harness-abort.md +9 -6
  21. package/.pi/prompts/harness-auto.md +3 -3
  22. package/.pi/prompts/harness-critic.md +3 -5
  23. package/.pi/prompts/harness-eval.md +16 -16
  24. package/.pi/prompts/harness-incident.md +7 -5
  25. package/.pi/prompts/harness-plan.md +18 -3
  26. package/.pi/prompts/harness-review.md +4 -5
  27. package/.pi/prompts/harness-router-tune.md +1 -1
  28. package/.pi/prompts/harness-run.md +11 -11
  29. package/.pi/prompts/harness-setup.md +5 -27
  30. package/.pi/prompts/harness-trace.md +3 -5
  31. package/.pi/scripts/harness-verify.mjs +18 -0
  32. package/CHANGELOG.md +15 -0
  33. package/README.md +31 -14
  34. package/package.json +2 -2
@@ -0,0 +1,794 @@
1
+ /**
2
+ * harness-run-context — shared types and helpers for active harness runs.
3
+ *
4
+ * Session entry `harness-run-context` is the live source of truth; disk mirrors:
5
+ * - `.pi/harness/runs/<run_id>/run-context.json`
6
+ * - `.pi/harness/active-run.json` (cross-session pointer)
7
+ */
8
+
9
+ import { mkdir, readFile, writeFile } from "node:fs/promises";
10
+ import { isAbsolute, join, relative, resolve } from "node:path";
11
+
12
+ export type HarnessPhase =
13
+ | "plan"
14
+ | "execute"
15
+ | "evaluate"
16
+ | "adversary"
17
+ | "merge";
18
+
19
+ export type HarnessRunStatus = "active" | "aborted" | "completed";
20
+
21
+ export interface HarnessRunContext {
22
+ schema_version: "1.0.0";
23
+ run_id: string;
24
+ pi_session_id: string;
25
+ project_root: string;
26
+ phase: HarnessPhase;
27
+ plan_id: string | null;
28
+ plan_packet_path: string | null;
29
+ plan_ready: boolean;
30
+ task_summary: string | null;
31
+ status: HarnessRunStatus;
32
+ last_completed_step: string | null;
33
+ last_outcome: string | null;
34
+ next_recommended_command: string | null;
35
+ owner_pi_session_id: string;
36
+ updated_at: string;
37
+ harness_run_started_emitted?: boolean;
38
+ turn_override_run_id?: string | null;
39
+ }
40
+
41
+ export interface ProjectActiveRunPointer {
42
+ schema_version: "1.0.0";
43
+ run_id: string;
44
+ project_root: string;
45
+ owner_pi_session_id: string;
46
+ phase: HarnessPhase;
47
+ plan_id: string | null;
48
+ plan_ready: boolean;
49
+ updated_at: string;
50
+ }
51
+
52
+ export interface PlanPacketSummary {
53
+ plan_id: string;
54
+ plan_packet_path: string;
55
+ scope_one_liner: string;
56
+ acceptance_check_count: number;
57
+ plan_status: string;
58
+ }
59
+
60
+ export interface PlanPacketLike {
61
+ schema_version?: string;
62
+ contract_version?: string;
63
+ plan_id?: string;
64
+ task_id?: string;
65
+ scope?: string;
66
+ acceptance_checks?: unknown[];
67
+ risk_level?: string;
68
+ assumptions?: unknown[];
69
+ rollback_plan?: unknown;
70
+ }
71
+
72
+ interface SessionEntryLike {
73
+ type?: string;
74
+ customType?: string;
75
+ data?: unknown;
76
+ }
77
+
78
+ const SCHEMA_VERSION = "1.0.0" as const;
79
+
80
+ const HARNESS_COMMANDS = new Set([
81
+ "harness-plan",
82
+ "harness-run",
83
+ "harness-eval",
84
+ "harness-review",
85
+ "harness-critic",
86
+ "harness-trace",
87
+ "harness-incident",
88
+ "harness-abort",
89
+ "harness-auto",
90
+ "harness-new-run",
91
+ "harness-run-status",
92
+ "harness-use-run",
93
+ "harness-drift-replan",
94
+ "harness-drift-proceed",
95
+ "harness-policy-status",
96
+ "harness-trace-last",
97
+ "harness-router-tune",
98
+ "harness-budget-status",
99
+ ]);
100
+
101
+ export function harnessRunsRoot(projectRoot: string): string {
102
+ return join(projectRoot, ".pi", "harness", "runs");
103
+ }
104
+
105
+ export function activeRunPointerPath(projectRoot: string): string {
106
+ return join(projectRoot, ".pi", "harness", "active-run.json");
107
+ }
108
+
109
+ export function runContextDiskPath(runId: string, projectRoot: string): string {
110
+ return join(harnessRunsRoot(projectRoot), runId, "run-context.json");
111
+ }
112
+
113
+ export function canonicalPlanPath(runId: string, projectRoot: string): string {
114
+ return join(harnessRunsRoot(projectRoot), runId, "plan-packet.json");
115
+ }
116
+
117
+ export function allocateRunId(sessionId: string): string {
118
+ return `${sessionId}-${Date.now()}`;
119
+ }
120
+
121
+ export function nowIso(): string {
122
+ return new Date().toISOString();
123
+ }
124
+
125
+ export function isHarnessSlashCommand(prompt: string): boolean {
126
+ const trimmed = prompt.trim();
127
+ if (!trimmed.startsWith("/harness-")) return false;
128
+ const match = trimmed.match(/^\/(harness-[a-z0-9-]+)/);
129
+ if (!match) return false;
130
+ return HARNESS_COMMANDS.has(match[1]);
131
+ }
132
+
133
+ export function parseHarnessSlashCommand(
134
+ prompt: string,
135
+ ): { command: string; args: string } | null {
136
+ const trimmed = prompt.trim();
137
+ const match = trimmed.match(/^\/(harness-[a-z0-9-]+)(?:\s+([\s\S]*))?$/);
138
+ if (!match) return null;
139
+ const command = match[1];
140
+ if (!HARNESS_COMMANDS.has(command)) return null;
141
+ return { command, args: (match[2] ?? "").trim() };
142
+ }
143
+
144
+ /** User-visible prompt slice for policy signals (exclude injected blocks). */
145
+ export function userVisiblePromptSlice(prompt: string): string {
146
+ const markers = [
147
+ "\n\n[HarnessRunContext]",
148
+ "\n\n[HarnessActivePlan]",
149
+ "\n\n[PolicyGate]",
150
+ ];
151
+ let slice = prompt;
152
+ for (const marker of markers) {
153
+ const idx = slice.indexOf(marker);
154
+ if (idx >= 0) slice = slice.slice(0, idx);
155
+ }
156
+ return slice.trim();
157
+ }
158
+
159
+ export function hasApprovedPlanSignalFromUserPrompt(prompt: string): boolean {
160
+ const p = userVisiblePromptSlice(prompt).toLowerCase();
161
+ return (
162
+ p.includes("planpacket") ||
163
+ p.includes("approved plan") ||
164
+ /\bplan_id\s*[=:]/i.test(p)
165
+ );
166
+ }
167
+
168
+ export function isDriftReplanPrompt(prompt: string): boolean {
169
+ const p = userVisiblePromptSlice(prompt).toLowerCase();
170
+ return (
171
+ p.includes("harness-drift-replan") ||
172
+ p.includes("/harness-drift-replan") ||
173
+ p.includes("drift-replan")
174
+ );
175
+ }
176
+
177
+ export function getLatestRunContext(
178
+ entries: unknown[],
179
+ ): HarnessRunContext | null {
180
+ for (let i = entries.length - 1; i >= 0; i--) {
181
+ const entry = entries[i] as SessionEntryLike;
182
+ if (entry.type !== "custom" || entry.customType !== "harness-run-context")
183
+ continue;
184
+ const ctx = entry.data as Partial<HarnessRunContext> | undefined;
185
+ if (ctx?.run_id && ctx.project_root) {
186
+ return normalizeRunContext(ctx);
187
+ }
188
+ }
189
+ return null;
190
+ }
191
+
192
+ function normalizeRunContext(
193
+ partial: Partial<HarnessRunContext>,
194
+ ): HarnessRunContext {
195
+ return {
196
+ schema_version: SCHEMA_VERSION,
197
+ run_id: partial.run_id!,
198
+ pi_session_id: partial.pi_session_id ?? partial.run_id!,
199
+ project_root: partial.project_root!,
200
+ phase: partial.phase ?? "plan",
201
+ plan_id: partial.plan_id ?? null,
202
+ plan_packet_path: partial.plan_packet_path ?? null,
203
+ plan_ready: Boolean(partial.plan_ready),
204
+ task_summary: partial.task_summary ?? null,
205
+ status: partial.status ?? "active",
206
+ last_completed_step: partial.last_completed_step ?? null,
207
+ last_outcome: partial.last_outcome ?? null,
208
+ next_recommended_command: partial.next_recommended_command ?? null,
209
+ owner_pi_session_id:
210
+ partial.owner_pi_session_id ?? partial.pi_session_id ?? partial.run_id!,
211
+ updated_at: partial.updated_at ?? nowIso(),
212
+ harness_run_started_emitted: partial.harness_run_started_emitted,
213
+ turn_override_run_id: partial.turn_override_run_id ?? null,
214
+ };
215
+ }
216
+
217
+ export function createFreshRunContext(
218
+ sessionId: string,
219
+ projectRoot: string,
220
+ taskSummary: string | null = null,
221
+ ): HarnessRunContext {
222
+ const runId = allocateRunId(sessionId);
223
+ const ts = nowIso();
224
+ return {
225
+ schema_version: SCHEMA_VERSION,
226
+ run_id: runId,
227
+ pi_session_id: sessionId,
228
+ project_root: projectRoot,
229
+ phase: "plan",
230
+ plan_id: null,
231
+ plan_packet_path: canonicalPlanPath(runId, projectRoot),
232
+ plan_ready: false,
233
+ task_summary: taskSummary,
234
+ status: "active",
235
+ last_completed_step: null,
236
+ last_outcome: null,
237
+ next_recommended_command: null,
238
+ owner_pi_session_id: sessionId,
239
+ updated_at: ts,
240
+ harness_run_started_emitted: false,
241
+ turn_override_run_id: null,
242
+ };
243
+ }
244
+
245
+ export async function loadRunContextFromDisk(
246
+ runId: string,
247
+ projectRoot: string,
248
+ ): Promise<HarnessRunContext | null> {
249
+ try {
250
+ const raw = await readFile(runContextDiskPath(runId, projectRoot), "utf-8");
251
+ return normalizeRunContext(JSON.parse(raw) as Partial<HarnessRunContext>);
252
+ } catch {
253
+ return null;
254
+ }
255
+ }
256
+
257
+ export async function saveRunContextToDisk(
258
+ ctx: HarnessRunContext,
259
+ ): Promise<void> {
260
+ const dir = join(harnessRunsRoot(ctx.project_root), ctx.run_id);
261
+ await mkdir(dir, { recursive: true });
262
+ await writeFile(
263
+ runContextDiskPath(ctx.run_id, ctx.project_root),
264
+ `${JSON.stringify(ctx, null, 2)}\n`,
265
+ "utf-8",
266
+ );
267
+ }
268
+
269
+ export async function loadProjectActiveRun(
270
+ projectRoot: string,
271
+ ): Promise<ProjectActiveRunPointer | null> {
272
+ try {
273
+ const raw = await readFile(activeRunPointerPath(projectRoot), "utf-8");
274
+ return JSON.parse(raw) as ProjectActiveRunPointer;
275
+ } catch {
276
+ return null;
277
+ }
278
+ }
279
+
280
+ export async function saveProjectActiveRun(
281
+ ctx: HarnessRunContext,
282
+ ): Promise<void> {
283
+ const pointer: ProjectActiveRunPointer = {
284
+ schema_version: SCHEMA_VERSION,
285
+ run_id: ctx.run_id,
286
+ project_root: ctx.project_root,
287
+ owner_pi_session_id: ctx.owner_pi_session_id,
288
+ phase: ctx.phase,
289
+ plan_id: ctx.plan_id,
290
+ plan_ready: ctx.plan_ready,
291
+ updated_at: ctx.updated_at,
292
+ };
293
+ await mkdir(join(ctx.project_root, ".pi", "harness"), {
294
+ recursive: true,
295
+ });
296
+ await writeFile(
297
+ activeRunPointerPath(ctx.project_root),
298
+ `${JSON.stringify(pointer, null, 2)}\n`,
299
+ "utf-8",
300
+ );
301
+ }
302
+
303
+ export function activeRunTtlHours(): number {
304
+ const raw = Number(process.env.HARNESS_ACTIVE_RUN_TTL_HOURS ?? "72");
305
+ return Number.isFinite(raw) && raw > 0 ? raw : 72;
306
+ }
307
+
308
+ export function isStaleActiveRunPointer(
309
+ pointer: ProjectActiveRunPointer,
310
+ currentProjectRoot: string,
311
+ ): boolean {
312
+ if (resolve(pointer.project_root) !== resolve(currentProjectRoot)) {
313
+ return true;
314
+ }
315
+ const ageMs = Date.now() - Date.parse(pointer.updated_at);
316
+ if (!Number.isFinite(ageMs)) return true;
317
+ return ageMs > activeRunTtlHours() * 60 * 60 * 1000;
318
+ }
319
+
320
+ export async function readPlanPacketFromPath(
321
+ planPath: string,
322
+ ): Promise<PlanPacketLike | null> {
323
+ try {
324
+ const raw = await readFile(planPath, "utf-8");
325
+ return JSON.parse(raw) as PlanPacketLike;
326
+ } catch {
327
+ return null;
328
+ }
329
+ }
330
+
331
+ export function validatePlanPacket(packet: PlanPacketLike | null): {
332
+ valid: boolean;
333
+ errors: string[];
334
+ } {
335
+ if (!packet)
336
+ return { valid: false, errors: ["plan file missing or unreadable"] };
337
+ const errors: string[] = [];
338
+ if (packet.schema_version !== "1.0.0")
339
+ errors.push("schema_version must be 1.0.0");
340
+ if (packet.contract_version !== "1.0.0")
341
+ errors.push("contract_version must be 1.0.0");
342
+ if (!packet.plan_id || typeof packet.plan_id !== "string")
343
+ errors.push("plan_id required");
344
+ if (!packet.task_id || typeof packet.task_id !== "string")
345
+ errors.push("task_id required");
346
+ if (!packet.scope || typeof packet.scope !== "string")
347
+ errors.push("scope required");
348
+ if (
349
+ !Array.isArray(packet.acceptance_checks) ||
350
+ packet.acceptance_checks.length < 1
351
+ )
352
+ errors.push("acceptance_checks required");
353
+ if (!packet.risk_level) errors.push("risk_level required");
354
+ if (!packet.rollback_plan) errors.push("rollback_plan required");
355
+ return { valid: errors.length === 0, errors };
356
+ }
357
+
358
+ export function planPacketSummary(
359
+ packet: PlanPacketLike,
360
+ planPath: string,
361
+ planStatus = "ready",
362
+ ): PlanPacketSummary {
363
+ const scope = typeof packet.scope === "string" ? packet.scope : "(no scope)";
364
+ const oneLiner = scope.length > 120 ? `${scope.slice(0, 117)}...` : scope;
365
+ return {
366
+ plan_id: String(packet.plan_id ?? "unknown"),
367
+ plan_packet_path: planPath,
368
+ scope_one_liner: oneLiner,
369
+ acceptance_check_count: Array.isArray(packet.acceptance_checks)
370
+ ? packet.acceptance_checks.length
371
+ : 0,
372
+ plan_status: planStatus,
373
+ };
374
+ }
375
+
376
+ export function formatPlanContextBlock(ctx: HarnessRunContext): string {
377
+ const lines = [
378
+ "[HarnessRunContext]",
379
+ `run_id=${ctx.run_id}`,
380
+ `phase=${ctx.phase}`,
381
+ `status=${ctx.status}`,
382
+ `plan_ready=${ctx.plan_ready}`,
383
+ `plan_id=${ctx.plan_id ?? "none"}`,
384
+ `last_completed_step=${ctx.last_completed_step ?? "none"}`,
385
+ `last_outcome=${ctx.last_outcome ?? "none"}`,
386
+ `next_recommended_command=${ctx.next_recommended_command ?? "none"}`,
387
+ ];
388
+ if (ctx.plan_packet_path) {
389
+ lines.push(`plan_packet_path=${ctx.plan_packet_path}`);
390
+ }
391
+ return lines.join("\n");
392
+ }
393
+
394
+ export function formatActivePlanBlock(
395
+ ctx: HarnessRunContext,
396
+ mode: "create" | "revise" | "execute" | "read",
397
+ summary?: PlanPacketSummary | null,
398
+ ): string {
399
+ const lines = ["[HarnessActivePlan]"];
400
+ if (mode === "create") {
401
+ lines.push(
402
+ "No prior PlanPacket on disk. Create PlanPacket at the canonical path below.",
403
+ );
404
+ } else if (mode === "revise") {
405
+ lines.push(
406
+ "Read the current PlanPacket from disk first, then revise per the user task.",
407
+ );
408
+ if (ctx.status === "aborted") {
409
+ lines.push(
410
+ "Prior run was aborted; treat this as replan/amend of prior scope.",
411
+ );
412
+ }
413
+ } else if (mode === "execute") {
414
+ lines.push(
415
+ "Load PlanPacket from plan_packet_path and execute it. Do not parse --plan from user input on the happy path.",
416
+ );
417
+ } else {
418
+ lines.push(
419
+ "Plan is read-only in this phase. Do not edit plan-packet.json.",
420
+ );
421
+ }
422
+ if (ctx.plan_packet_path) {
423
+ lines.push(`plan_packet_path=${ctx.plan_packet_path}`);
424
+ }
425
+ if (ctx.task_summary) lines.push(`task_summary=${ctx.task_summary}`);
426
+ if (summary) {
427
+ lines.push(`plan_id=${summary.plan_id}`);
428
+ lines.push(`scope=${summary.scope_one_liner}`);
429
+ lines.push(`acceptance_checks=${summary.acceptance_check_count}`);
430
+ lines.push(`plan_status=${summary.plan_status}`);
431
+ }
432
+ return lines.join("\n");
433
+ }
434
+
435
+ export function parseArgFlag(args: string, flag: string): string | null {
436
+ const re = new RegExp(`${flag}\\s+("([^"]+)"|(\\S+))`);
437
+ const m = args.match(re);
438
+ if (!m) return null;
439
+ return m[2] ?? m[3] ?? null;
440
+ }
441
+
442
+ export function resolveArgsForCommand(
443
+ command: string,
444
+ args: string,
445
+ ctx: HarnessRunContext | null,
446
+ ): { runId: string | null; planPath: string | null; overrideRun: boolean } {
447
+ let runId = ctx?.run_id ?? null;
448
+ let planPath = ctx?.plan_packet_path ?? null;
449
+ let overrideRun = false;
450
+
451
+ const explicitRun = parseArgFlag(args, "--run");
452
+ if (explicitRun) {
453
+ runId = explicitRun;
454
+ overrideRun = true;
455
+ }
456
+ const explicitPlan = parseArgFlag(args, "--plan");
457
+ if (explicitPlan) {
458
+ planPath = explicitPlan;
459
+ }
460
+
461
+ if (command === "harness-use-run" && args.trim()) {
462
+ runId = args.trim().split(/\s+/)[0] ?? runId;
463
+ overrideRun = true;
464
+ }
465
+
466
+ return { runId, planPath, overrideRun };
467
+ }
468
+
469
+ export function validatePlanOverridePath(
470
+ planPath: string,
471
+ runId: string,
472
+ projectRoot: string,
473
+ ): { ok: boolean; reason?: string } {
474
+ const absPlan = resolve(planPath);
475
+ const runsDir = resolve(harnessRunsRoot(projectRoot), runId);
476
+ const rel = relative(runsDir, absPlan);
477
+ if (rel.startsWith("..") || isAbsolute(rel)) {
478
+ return {
479
+ ok: false,
480
+ reason: `--plan must be under runs/${runId}/ or use /harness-use-run to switch runs`,
481
+ };
482
+ }
483
+ return { ok: true };
484
+ }
485
+
486
+ export function getRunIdFromSession(
487
+ entries: unknown[],
488
+ sessionId: string,
489
+ ): string | null {
490
+ const ctx = getLatestRunContext(entries);
491
+ if (ctx?.turn_override_run_id) return ctx.turn_override_run_id;
492
+ if (ctx?.status === "active" || ctx?.status === "aborted") return ctx.run_id;
493
+ for (let i = entries.length - 1; i >= 0; i--) {
494
+ const entry = entries[i] as SessionEntryLike;
495
+ if (entry.type !== "custom" || entry.customType !== "harness-trace-state")
496
+ continue;
497
+ const runId = (entry.data as { run_id?: string })?.run_id;
498
+ if (typeof runId === "string" && runId.length > 0) return runId;
499
+ }
500
+ return null;
501
+ }
502
+
503
+ export function shouldReuseHarnessRunId(
504
+ prompt: string,
505
+ ctx: HarnessRunContext | null,
506
+ command: string | null,
507
+ ): boolean {
508
+ if (!command || !isHarnessSlashCommand(prompt)) return false;
509
+ if (command === "harness-new-run") return false;
510
+ if (!ctx) return false;
511
+ if (command === "harness-plan" || command === "harness-auto") {
512
+ return ctx.status === "active" || ctx.status === "aborted";
513
+ }
514
+ if (ctx.status === "active") return true;
515
+ return Boolean(ctx.run_id);
516
+ }
517
+
518
+ const HARNESS_PHASE_ORDER: HarnessPhase[] = [
519
+ "plan",
520
+ "execute",
521
+ "evaluate",
522
+ "adversary",
523
+ "merge",
524
+ ];
525
+
526
+ export interface HarnessPolicyState {
527
+ phase: HarnessPhase;
528
+ approvedPlan: boolean;
529
+ planId: string | null;
530
+ aborted: boolean;
531
+ }
532
+
533
+ export function inferHarnessPhaseFromPrompt(prompt: string): HarnessPhase {
534
+ const p = prompt.toLowerCase();
535
+ if (
536
+ p.includes("/harness-plan") ||
537
+ p.includes("harness-plan") ||
538
+ p.includes("/harness-auto") ||
539
+ p.includes("harness-auto")
540
+ ) {
541
+ return "plan";
542
+ }
543
+ if (p.includes("/harness-run") || p.includes("harness-run")) return "execute";
544
+ if (p.includes("/harness-eval") || p.includes("harness-eval")) {
545
+ return "evaluate";
546
+ }
547
+ if (p.includes("/harness-review") || p.includes("harness-review")) {
548
+ return "evaluate";
549
+ }
550
+ if (p.includes("/harness-critic") || p.includes("harness-critic")) {
551
+ return "adversary";
552
+ }
553
+ if (p.includes("adversary")) return "adversary";
554
+ if (p.includes("merge gate") || p.includes("policy decision")) return "merge";
555
+ return "execute";
556
+ }
557
+
558
+ export function isValidHarnessPhaseTransition(
559
+ from: HarnessPhase,
560
+ to: HarnessPhase,
561
+ ): boolean {
562
+ if (from === to) return true;
563
+ if (to === "plan") return true;
564
+ if (to === "execute") return true;
565
+ const fromIndex = HARNESS_PHASE_ORDER.indexOf(from);
566
+ const toIndex = HARNESS_PHASE_ORDER.indexOf(to);
567
+ return toIndex === fromIndex + 1;
568
+ }
569
+
570
+ export function getLatestPolicyState(entries: unknown[]): HarnessPolicyState {
571
+ const fallback: HarnessPolicyState = {
572
+ phase: "execute",
573
+ approvedPlan: true,
574
+ planId: null,
575
+ aborted: false,
576
+ };
577
+ for (let i = entries.length - 1; i >= 0; i--) {
578
+ const entry = entries[i] as SessionEntryLike;
579
+ if (
580
+ entry.type !== "custom" ||
581
+ entry.customType !== "harness-policy-state"
582
+ ) {
583
+ continue;
584
+ }
585
+ const candidate = entry.data as Partial<HarnessPolicyState> | undefined;
586
+ if (
587
+ candidate &&
588
+ typeof candidate.phase === "string" &&
589
+ HARNESS_PHASE_ORDER.includes(candidate.phase as HarnessPhase)
590
+ ) {
591
+ return {
592
+ phase: candidate.phase as HarnessPhase,
593
+ approvedPlan: Boolean(candidate.approvedPlan),
594
+ planId: typeof candidate.planId === "string" ? candidate.planId : null,
595
+ aborted: Boolean(candidate.aborted),
596
+ };
597
+ }
598
+ }
599
+ return fallback;
600
+ }
601
+
602
+ export function isHarnessBootstrapPrompt(prompt: string): boolean {
603
+ const p = prompt.toLowerCase();
604
+ return (
605
+ p.includes("/harness-setup") ||
606
+ p.includes("harness-setup") ||
607
+ p.includes("full harness bootstrap")
608
+ );
609
+ }
610
+
611
+ export function hasHarnessAbortSignal(prompt: string): boolean {
612
+ const p = prompt.toLowerCase();
613
+ return p.includes("/harness-abort") || p.includes("harness-abort");
614
+ }
615
+
616
+ /** Mirrors policy-gate phase checks so run-context does not inject on blocked turns. */
617
+ export function getPolicyTransitionBlock(
618
+ userPrompt: string,
619
+ entries: unknown[],
620
+ ): { blocked: boolean; message?: string } {
621
+ if (
622
+ isHarnessBootstrapPrompt(userPrompt) ||
623
+ hasHarnessAbortSignal(userPrompt)
624
+ ) {
625
+ return { blocked: false };
626
+ }
627
+ const state = getLatestPolicyState(entries);
628
+ const nextPhase = inferHarnessPhaseFromPrompt(userPrompt);
629
+ if (!isValidHarnessPhaseTransition(state.phase, nextPhase)) {
630
+ return {
631
+ blocked: true,
632
+ message: [
633
+ `Policy gate blocked invalid phase transition: ${state.phase} -> ${nextPhase}.`,
634
+ "Run /harness-plan first or continue in the current phase.",
635
+ ].join("\n"),
636
+ };
637
+ }
638
+ if (nextPhase === "execute" && !state.approvedPlan) {
639
+ const runCtx = getLatestRunContext(entries);
640
+ if (
641
+ !runCtx?.plan_ready &&
642
+ !hasApprovedPlanSignalFromUserPrompt(userPrompt)
643
+ ) {
644
+ return {
645
+ blocked: true,
646
+ message:
647
+ "Policy gate blocked execute: no approved plan in active run context. Run /harness-plan first.",
648
+ };
649
+ }
650
+ }
651
+ return { blocked: false };
652
+ }
653
+
654
+ export function isAmendPlanAllowed(
655
+ ctx: HarnessRunContext | null,
656
+ prompt: string,
657
+ driftGateActive: boolean,
658
+ ): boolean {
659
+ if (!ctx || ctx.status === "aborted") return true;
660
+ if (ctx.last_outcome === "needs_clarification") return true;
661
+ if (isDriftReplanPrompt(prompt)) return true;
662
+ if (driftGateActive) return true;
663
+ return false;
664
+ }
665
+
666
+ export function isNewTaskPlanBlocked(
667
+ ctx: HarnessRunContext,
668
+ prompt: string,
669
+ ): boolean {
670
+ if (ctx.status !== "active") return false;
671
+ if (isAmendPlanAllowed(ctx, prompt, false)) return false;
672
+ const cmd = parseHarnessSlashCommand(prompt);
673
+ if (cmd?.command !== "harness-plan") return false;
674
+ const taskMatch = prompt.match(/"([^"]+)"/);
675
+ if (!taskMatch || !ctx.task_summary) return true;
676
+ const newTask = taskMatch[1].trim().toLowerCase();
677
+ const prior = ctx.task_summary.trim().toLowerCase();
678
+ if (newTask === prior) return false;
679
+ return newTask.length > 0 && prior.length > 0;
680
+ }
681
+
682
+ export function nextStepAfterOutcome(input: {
683
+ phase: HarnessPhase;
684
+ planStatus?: string | null;
685
+ executionStatus?: string | null;
686
+ evalStatus?: string | null;
687
+ policyDecision?: string | null;
688
+ aborted?: boolean;
689
+ }): string {
690
+ if (input.aborted) {
691
+ return '/harness-plan "<task>"';
692
+ }
693
+ const plan = (input.planStatus ?? "").toLowerCase();
694
+ if (plan === "needs_clarification") {
695
+ return "Reply with answers or run /harness-plan with updates";
696
+ }
697
+ if (input.phase === "plan" && plan === "ready") return "/harness-run";
698
+ if (input.phase === "execute") {
699
+ const exec = (input.executionStatus ?? "").toLowerCase();
700
+ if (exec === "blocked" || exec === "scope_drift") {
701
+ return "/harness-plan or /harness-abort";
702
+ }
703
+ if (exec === "completed") {
704
+ return "New Pi session → /harness-eval";
705
+ }
706
+ }
707
+ if (input.phase === "evaluate") {
708
+ const ev = (input.evalStatus ?? "").toLowerCase();
709
+ if (ev === "fail") return "/harness-plan or /harness-incident";
710
+ return "/harness-review";
711
+ }
712
+ if (input.phase === "adversary") return "/harness-policy-status";
713
+ if (input.phase === "merge") return "/harness-policy-status";
714
+ return "/harness-run-status";
715
+ }
716
+
717
+ export function extractCompletionStatuses(entries: unknown[]): {
718
+ planStatus: string | null;
719
+ executionStatus: string | null;
720
+ evalStatus: string | null;
721
+ } {
722
+ let planStatus: string | null = null;
723
+ let executionStatus: string | null = null;
724
+ let evalStatus: string | null = null;
725
+
726
+ for (let i = entries.length - 1; i >= 0; i--) {
727
+ const entry = entries[i] as SessionEntryLike;
728
+ if (entry.type !== "custom") continue;
729
+ if (entry.customType === "harness-plan-packet") {
730
+ const d = entry.data as { plan_status?: string };
731
+ if (!planStatus && typeof d?.plan_status === "string") {
732
+ planStatus = d.plan_status;
733
+ }
734
+ }
735
+ if (entry.customType === "harness-step-handoff") {
736
+ const d = entry.data as {
737
+ plan_status?: string;
738
+ execution_status?: string;
739
+ eval_status?: string;
740
+ };
741
+ if (!planStatus && typeof d?.plan_status === "string")
742
+ planStatus = d.plan_status;
743
+ if (!executionStatus && typeof d?.execution_status === "string")
744
+ executionStatus = d.execution_status;
745
+ if (!evalStatus && typeof d?.eval_status === "string")
746
+ evalStatus = d.eval_status;
747
+ }
748
+ }
749
+ return { planStatus, executionStatus, evalStatus };
750
+ }
751
+
752
+ export function getLatestPolicyPhase(entries: unknown[]): HarnessPhase | null {
753
+ for (let i = entries.length - 1; i >= 0; i--) {
754
+ const entry = entries[i] as SessionEntryLike;
755
+ if (entry.type !== "custom" || entry.customType !== "harness-policy-state")
756
+ continue;
757
+ const phase = (entry.data as { phase?: string })?.phase;
758
+ if (
759
+ phase === "plan" ||
760
+ phase === "execute" ||
761
+ phase === "evaluate" ||
762
+ phase === "adversary" ||
763
+ phase === "merge"
764
+ ) {
765
+ return phase;
766
+ }
767
+ }
768
+ return null;
769
+ }
770
+
771
+ export function driftGateActive(entries: unknown[]): boolean {
772
+ for (let i = entries.length - 1; i >= 0; i--) {
773
+ const entry = entries[i] as SessionEntryLike;
774
+ if (entry.type !== "custom" || entry.customType !== "harness-drift-report")
775
+ continue;
776
+ const score = Number(
777
+ (entry.data as { drift_score?: number })?.drift_score ?? 0,
778
+ );
779
+ const ack = Boolean(
780
+ (entry.data as { user_acknowledged?: boolean })?.user_acknowledged,
781
+ );
782
+ if (
783
+ score >= Number(process.env.HARNESS_DRIFT_THRESHOLD ?? "0.65") &&
784
+ !ack
785
+ ) {
786
+ return true;
787
+ }
788
+ }
789
+ return false;
790
+ }
791
+
792
+ export function phaseTraceFileName(phase: HarnessPhase): string {
793
+ return `trace-${phase}.json`;
794
+ }