ultimate-pi 0.22.1 → 0.22.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/.pi/extensions/agt-kill-switch.ts +7 -1
  2. package/.pi/extensions/harness-plan-approval.ts +9 -1
  3. package/.pi/extensions/harness-run-context.ts +529 -84
  4. package/.pi/extensions/policy-gate.ts +15 -2
  5. package/.pi/harness/agents.manifest.json +3 -3
  6. package/.pi/harness/agents.policy.yaml +82 -3
  7. package/.pi/harness/specs/plan-task-clarification.schema.json +10 -1
  8. package/.pi/lib/agents-policy.mjs +42 -1
  9. package/.pi/lib/agt/build-evaluation-context.ts +3 -1
  10. package/.pi/lib/agt/kill-switch-state.ts +14 -0
  11. package/.pi/lib/agt/legacy-evaluate.ts +3 -1
  12. package/.pi/lib/ask-user/index.ts +2 -0
  13. package/.pi/lib/ask-user/merge-task-clarification.ts +5 -0
  14. package/.pi/lib/ask-user/policy.ts +23 -0
  15. package/.pi/lib/ask-user/presenters/glimpse.ts +8 -1
  16. package/.pi/lib/ask-user/presenters/headless.ts +15 -0
  17. package/.pi/lib/ask-user/presenters/select.ts +11 -2
  18. package/.pi/lib/ask-user/validate-core.mjs +16 -0
  19. package/.pi/lib/harness-artifact-gate.ts +75 -5
  20. package/.pi/lib/harness-repair-brief.ts +30 -4
  21. package/.pi/lib/harness-run-context.ts +804 -17
  22. package/.pi/lib/harness-schema-validate.ts +147 -38
  23. package/.pi/lib/harness-spawn-policy.ts +9 -0
  24. package/.pi/lib/harness-spawn-topology.ts +109 -7
  25. package/.pi/lib/harness-subagent-precheck.ts +21 -0
  26. package/.pi/lib/harness-subagent-submit-pipeline.ts +95 -21
  27. package/.pi/lib/harness-subagent-submit-register.ts +6 -1
  28. package/.pi/lib/harness-subagents-bridge.ts +3 -0
  29. package/.pi/lib/harness-yaml.ts +11 -3
  30. package/.pi/lib/plan-approval/create-plan.ts +2 -6
  31. package/.pi/lib/plan-debate-gate.ts +87 -0
  32. package/.pi/lib/plan-debate-lane.ts +8 -2
  33. package/.pi/lib/plan-human-gates.ts +322 -0
  34. package/.pi/prompts/harness-clear.md +25 -0
  35. package/.pi/prompts/harness-plan.md +4 -0
  36. package/.pi/scripts/generate-agents-policy-yaml.mjs +73 -7
  37. package/.pi/scripts/harness-reconcile-run-context.mjs +62 -0
  38. package/.pi/scripts/harness-schema-compile-verify.mjs +29 -0
  39. package/.pi/scripts/harness-verify.mjs +27 -0
  40. package/CHANGELOG.md +6 -0
  41. package/README.md +4 -0
  42. package/package.json +1 -1
@@ -10,6 +10,7 @@ import {
10
10
  readdir,
11
11
  readFile,
12
12
  rename,
13
+ rm,
13
14
  stat,
14
15
  writeFile,
15
16
  } from "node:fs/promises";
@@ -17,14 +18,25 @@ import { basename, dirname, join } from "node:path";
17
18
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
18
19
  import { Type } from "@sinclair/typebox";
19
20
  import { allowsAgentTool } from "../lib/agents-policy.mjs";
21
+ import {
22
+ disarmHarnessKillSwitch,
23
+ resetHarnessPolicyDenyCount,
24
+ } from "../lib/agt/kill-switch-state.js";
25
+ import { runAskUser } from "../lib/ask-user/index.js";
20
26
  import { claimHarnessGovernanceLoad } from "../lib/extension-load-guard.js";
21
27
  import { getHarnessPackageRoot } from "../lib/harness-paths.js";
22
28
  import {
29
+ blockingHarnessAutoCommandReason,
30
+ blockingReviewCommandReason,
31
+ blockingRunCommandReason,
32
+ blockingSteerCommandReason,
33
+ buildHarnessClearManifest,
23
34
  canonicalPlanPath,
24
35
  claimRunOwnership,
25
36
  createFreshRunContext,
26
37
  criticalPathWorkItemIdsFromPlanPacket,
27
38
  driftGateActive,
39
+ ensureReviewOutcomeFromEval,
28
40
  evaluateCrossSessionResume,
29
41
  extractWritePathFromToolInput,
30
42
  formatActivePlanBlock,
@@ -36,6 +48,7 @@ import {
36
48
  getPolicyTransitionBlock,
37
49
  type HarnessRunContext,
38
50
  type HarnessTurnEntry,
51
+ harnessAutoTasksDiffer,
39
52
  hasHarnessAbortSignal,
40
53
  hasPlanUserApproval,
41
54
  inferHarnessPhase,
@@ -51,6 +64,7 @@ import {
51
64
  normalizeHarnessPath,
52
65
  nowIso,
53
66
  type PlanPacketSummary,
67
+ parseArgFlag,
54
68
  parseHarnessSlashInput,
55
69
  parseHarnessUseRunArgs,
56
70
  parsePlanApprovalFromMessage,
@@ -58,14 +72,23 @@ import {
58
72
  readExecutorHandoffFromRun,
59
73
  readPlanPacketFromPath,
60
74
  readReviewOutcomeFromRun,
75
+ reconcileReviewRouting,
76
+ reconcileStaleExecuteCompletion,
77
+ relPathUnderActiveRun,
78
+ resetRunContextForHarnessAuto,
61
79
  resolveArgsForCommand,
62
80
  resolveCompletionStatuses,
81
+ resolveHarnessRunPostAgentState,
82
+ resolveHarnessRunWriteTarget,
83
+ resolveRemediationClassForRun,
63
84
  saveProjectActiveRun,
64
85
  saveRunContextToDisk,
65
86
  sessionHasResumePromptForRun,
66
87
  shouldAutoClaimHarnessRun,
67
88
  shouldReuseHarnessRunId,
68
89
  steerMaxAttemptsFromEnv,
90
+ syncPlanLastOutcomeFromTaskClarification,
91
+ syncPlanReadyFromDisk,
69
92
  userVisiblePromptSlice,
70
93
  validatePlanOverridePath,
71
94
  validatePlanPacket,
@@ -80,6 +103,11 @@ import {
80
103
  } from "../lib/harness-yaml.js";
81
104
  import { isReviewRoundArtifactPath } from "../lib/plan-debate-gate.js";
82
105
  import { isReviewRoundYamlWriteAllowed } from "../lib/plan-debate-write-guard.js";
106
+ import {
107
+ formatPlanHumanGateBlock,
108
+ resolvePlanHumanGateStatus,
109
+ validateTaskClarificationHumanGate,
110
+ } from "../lib/plan-human-gates.js";
83
111
  import {
84
112
  assertTaskClarificationReadyForPlanWrite,
85
113
  readTaskClarificationDoc,
@@ -102,8 +130,20 @@ function getEntries(ctx: {
102
130
 
103
131
  function persistContext(pi: ExtensionAPI, ctx: HarnessRunContext): void {
104
132
  pi.appendEntry("harness-run-context", ctx);
105
- void saveRunContextToDisk(ctx);
106
- void saveProjectActiveRun(ctx);
133
+ void saveRunContextToDisk(ctx).catch((err) => {
134
+ pi.appendEntry("harness-run-context-disk-error", {
135
+ run_id: ctx.run_id,
136
+ error: err instanceof Error ? err.message : String(err),
137
+ recorded_at: nowIso(),
138
+ });
139
+ });
140
+ void saveProjectActiveRun(ctx).catch((err) => {
141
+ pi.appendEntry("harness-run-context-disk-error", {
142
+ run_id: ctx.run_id,
143
+ error: err instanceof Error ? err.message : String(err),
144
+ recorded_at: nowIso(),
145
+ });
146
+ });
107
147
  pi.events.emit("harness-run-context:updated", { run_id: ctx.run_id });
108
148
  }
109
149
 
@@ -215,7 +255,8 @@ export async function archivePlanRevisionArtifacts(input: {
215
255
  return { archiveDir, moved };
216
256
  }
217
257
 
218
- function shouldArchiveForPlanRevise(input: {
258
+ /** Exported for tests — avoid archiving on every /harness-plan continue. */
259
+ export function shouldArchiveForPlanRevise(input: {
219
260
  command: string;
220
261
  mode: "create" | "revise" | null;
221
262
  runCtx: HarnessRunContext;
@@ -226,15 +267,20 @@ function shouldArchiveForPlanRevise(input: {
226
267
  return false;
227
268
  }
228
269
  if (input.mode !== "revise") return false;
229
- const next = (input.runCtx.next_recommended_command ?? "").toLowerCase();
230
270
  const prompt = input.userPrompt.toLowerCase();
231
- return (
232
- input.reviewOutcome?.remediation_class === "plan_gap" ||
233
- next.includes("/harness-plan") ||
234
- next.includes("revise") ||
271
+ const explicitRevise =
235
272
  prompt.includes("--mode revise") ||
236
273
  prompt.includes("--mode=revise") ||
237
- prompt.includes("mode: revise")
274
+ prompt.includes("mode: revise") ||
275
+ /\b(revise\s+(the\s+)?plan|reset\s+plan|start\s+over\s+on\s+the\s+plan)\b/.test(
276
+ prompt,
277
+ );
278
+ if (explicitRevise) return true;
279
+ if (input.reviewOutcome?.remediation_class !== "plan_gap") return false;
280
+ return (
281
+ prompt.includes("plan_gap") ||
282
+ prompt.includes("remediation_class") ||
283
+ /\brevise\s+per\s+review\b/.test(prompt)
238
284
  );
239
285
  }
240
286
 
@@ -341,13 +387,22 @@ async function hydrateFromDisk(
341
387
  entries: unknown[],
342
388
  ): Promise<HarnessRunContext | null> {
343
389
  const fromSession = getLatestRunContext(entries);
344
- if (fromSession) return fromSession;
390
+ if (fromSession) {
391
+ return reconcileStaleExecuteCompletion(projectRoot, fromSession, entries);
392
+ }
345
393
 
346
394
  const pointer = await loadProjectActiveRun(projectRoot);
347
395
  if (!pointer || isStaleActiveRunPointer(pointer, projectRoot)) return null;
348
396
 
349
397
  const disk = await loadRunContextFromDisk(pointer.run_id, projectRoot);
350
- if (disk) return disk;
398
+ if (disk) {
399
+ const clar = await syncPlanLastOutcomeFromTaskClarification(
400
+ projectRoot,
401
+ disk,
402
+ );
403
+ const planSynced = await syncPlanReadyFromDisk(projectRoot, clar, entries);
404
+ return reconcileStaleExecuteCompletion(projectRoot, planSynced, entries);
405
+ }
351
406
 
352
407
  return {
353
408
  schema_version: "1.0.0",
@@ -476,10 +531,13 @@ function startFreshPlanAttempt(input: {
476
531
  activeCtx: HarnessRunContext;
477
532
  command: string;
478
533
  turn: HarnessTurnEntry | null;
534
+ sessionId: string;
479
535
  }): void {
480
536
  input.activeCtx.plan_ready = false;
481
537
  input.activeCtx.phase = "plan";
482
538
  input.activeCtx.status = "active";
539
+ disarmHarnessKillSwitch(input.sessionId);
540
+ resetHarnessPolicyDenyCount(input.sessionId);
483
541
  input.pi.appendEntry("harness-plan-attempt", {
484
542
  run_id: input.activeCtx.run_id,
485
543
  command: input.command,
@@ -584,6 +642,159 @@ type ActiveContextAccess = {
584
642
  set(ctx: HarnessRunContext | null): void;
585
643
  };
586
644
 
645
+ const HARNESS_CLEAR_CONFIRM_OPTION = "Delete historical runs";
646
+
647
+ function isHarnessClearConfirmed(response: unknown): boolean {
648
+ if (!response || typeof response !== "object") return false;
649
+ const payload = response as {
650
+ kind?: string;
651
+ selections?: unknown;
652
+ };
653
+ if (payload.kind !== "selection" || !Array.isArray(payload.selections)) {
654
+ return false;
655
+ }
656
+ return (
657
+ payload.selections.length === 1 &&
658
+ payload.selections[0] === HARNESS_CLEAR_CONFIRM_OPTION
659
+ );
660
+ }
661
+
662
+ function registerHarnessClearCommand(
663
+ pi: ExtensionAPI,
664
+ active: ActiveContextAccess,
665
+ ): void {
666
+ pi.registerCommand("harness-clear", {
667
+ description:
668
+ "Delete historical harness runs under .pi/harness/runs while preserving the active run",
669
+ handler: async (_args, ctx) => {
670
+ const entries = getEntries(ctx);
671
+ const projectRoot = process.cwd();
672
+ const latest = active.get() ?? getLatestRunContext(entries);
673
+ const pointer = await loadProjectActiveRun(projectRoot);
674
+ const protectedRunIds = new Set<string>();
675
+ if (latest?.run_id) protectedRunIds.add(latest.run_id);
676
+ if (pointer?.run_id) protectedRunIds.add(pointer.run_id);
677
+ const manifest = await buildHarnessClearManifest(
678
+ projectRoot,
679
+ protectedRunIds,
680
+ );
681
+ if (manifest.candidates.length === 0) {
682
+ const message = [
683
+ "/harness-clear: no historical run directories eligible for deletion.",
684
+ ` protected: ${manifest.protected_run_ids.join(", ") || "(none)"}`,
685
+ ` skipped: ${manifest.skipped.length}`,
686
+ ].join("\n");
687
+ if (ctx.hasUI) ctx.ui.notify(message, "info");
688
+ else
689
+ pi.sendMessage({
690
+ customType: "harness-clear-result",
691
+ content: message,
692
+ display: true,
693
+ });
694
+ pi.appendEntry("harness-clear-result", {
695
+ approved: false,
696
+ deleted: 0,
697
+ protected: manifest.protected_run_ids,
698
+ skipped: manifest.skipped,
699
+ recorded_at: nowIso(),
700
+ });
701
+ return;
702
+ }
703
+ const ask = await runAskUser(
704
+ {
705
+ question: `Delete ${manifest.candidates.length} historical harness run directories?`,
706
+ context: [
707
+ "Scope: .pi/harness/runs/<run_id> only (historical runs).",
708
+ `Preserved active run ids: ${manifest.protected_run_ids.join(", ") || "(none)"}`,
709
+ `Candidates: ${manifest.candidates.map((item) => item.run_id).join(", ")}`,
710
+ ].join("\n"),
711
+ options: [HARNESS_CLEAR_CONFIRM_OPTION, "Cancel"],
712
+ allowSkip: true,
713
+ },
714
+ { ui: ctx.ui, hasUI: ctx.hasUI },
715
+ );
716
+ if ("error" in ask) {
717
+ const message = [
718
+ "/harness-clear: confirmation unavailable; no files deleted (fail-closed).",
719
+ ` reason: ${ask.error}`,
720
+ ].join("\n");
721
+ if (ctx.hasUI) ctx.ui.notify(message, "warning");
722
+ else
723
+ pi.sendMessage({
724
+ customType: "harness-clear-result",
725
+ content: message,
726
+ display: true,
727
+ });
728
+ pi.appendEntry("harness-clear-result", {
729
+ approved: false,
730
+ deleted: 0,
731
+ protected: manifest.protected_run_ids,
732
+ skipped: manifest.skipped,
733
+ ask_error: ask.error,
734
+ recorded_at: nowIso(),
735
+ });
736
+ return;
737
+ }
738
+ const confirmed =
739
+ !ask.details.cancelled && isHarnessClearConfirmed(ask.details.response);
740
+ if (!confirmed) {
741
+ const message = [
742
+ "/harness-clear: cancelled; no files deleted.",
743
+ ` candidates: ${manifest.candidates.length}`,
744
+ ].join("\n");
745
+ if (ctx.hasUI) ctx.ui.notify(message, "info");
746
+ else
747
+ pi.sendMessage({
748
+ customType: "harness-clear-result",
749
+ content: message,
750
+ display: true,
751
+ });
752
+ pi.appendEntry("harness-clear-result", {
753
+ approved: false,
754
+ deleted: 0,
755
+ protected: manifest.protected_run_ids,
756
+ skipped: manifest.skipped,
757
+ recorded_at: nowIso(),
758
+ });
759
+ return;
760
+ }
761
+ let deleted = 0;
762
+ const failed: Array<{ run_id: string; reason: string }> = [];
763
+ for (const candidate of manifest.candidates) {
764
+ try {
765
+ await rm(candidate.canonical_path, { recursive: true, force: true });
766
+ deleted += 1;
767
+ } catch (err) {
768
+ failed.push({
769
+ run_id: candidate.run_id,
770
+ reason: err instanceof Error ? err.message : String(err),
771
+ });
772
+ }
773
+ }
774
+ const message = [
775
+ "/harness-clear complete.",
776
+ ` deleted: ${deleted}`,
777
+ ` protected: ${manifest.protected_run_ids.length}`,
778
+ ` skipped: ${manifest.skipped.length + failed.length}`,
779
+ ].join("\n");
780
+ if (ctx.hasUI) ctx.ui.notify(message, "info");
781
+ else
782
+ pi.sendMessage({
783
+ customType: "harness-clear-result",
784
+ content: message,
785
+ display: true,
786
+ });
787
+ pi.appendEntry("harness-clear-result", {
788
+ approved: true,
789
+ deleted,
790
+ protected: manifest.protected_run_ids,
791
+ skipped: [...manifest.skipped, ...failed],
792
+ recorded_at: nowIso(),
793
+ });
794
+ },
795
+ });
796
+ }
797
+
587
798
  function registerHarnessRunStatusCommand(
588
799
  pi: ExtensionAPI,
589
800
  active: ActiveContextAccess,
@@ -926,6 +1137,13 @@ async function archivePlanRevisionIfNeeded(input: {
926
1137
  reason: "review_plan_gap_revise",
927
1138
  });
928
1139
  if (reset.moved.length === 0) return;
1140
+ input.activeCtx.plan_ready = false;
1141
+ const synced = await syncPlanLastOutcomeFromTaskClarification(
1142
+ input.projectRoot,
1143
+ input.activeCtx,
1144
+ );
1145
+ Object.assign(input.activeCtx, synced);
1146
+ persistContext(input.pi, input.activeCtx);
929
1147
  input.pi.appendEntry("harness-plan-revision-reset", {
930
1148
  run_id: input.activeCtx.run_id,
931
1149
  archive_dir: reset.archiveDir,
@@ -989,18 +1207,27 @@ async function updatePlanReadinessAfterAgent(input: {
989
1207
  )
990
1208
  return;
991
1209
  if (!input.activeCtx.plan_packet_path) return;
992
- const packet = await readPlanPacketFromPath(input.activeCtx.plan_packet_path);
993
- const validation = validatePlanPacket(packet);
994
- const approved = hasPlanUserApproval(input.entries, {
995
- sincePlanCommand: true,
996
- planId: packet?.plan_id ?? null,
997
- });
998
- input.activeCtx.plan_ready = validation.valid && approved;
999
- if (validation.valid && !approved) {
1000
- input.activeCtx.last_outcome = "needs_clarification";
1001
- input.activeCtx.last_completed_step = "plan";
1210
+ const beforeReady = input.activeCtx.plan_ready;
1211
+ const synced = await syncPlanReadyFromDisk(
1212
+ process.cwd(),
1213
+ input.activeCtx,
1214
+ input.entries,
1215
+ );
1216
+ Object.assign(input.activeCtx, synced);
1217
+ if (!beforeReady && synced.plan_ready && synced.plan_packet_path) {
1218
+ const packet = await readPlanPacketFromPath(synced.plan_packet_path);
1219
+ if (packet?.plan_id) {
1220
+ syncPolicyFromPlan(input.pi, input.entries, packet.plan_id, "plan", true);
1221
+ const summary = planPacketSummary(packet, synced.plan_packet_path);
1222
+ input.pi.appendEntry("harness-plan-packet", summary);
1223
+ }
1224
+ } else if (
1225
+ synced.plan_packet_path &&
1226
+ !synced.plan_ready &&
1227
+ synced.last_outcome === "pending_approval"
1228
+ ) {
1002
1229
  const msg =
1003
- "Plan file exists but user approval was not recorded. Planner must call approve_plan (or bridged ask_user Approve) before writing plan-packet.yaml.";
1230
+ "A draft plan-packet.yaml is on disk, but user approval was not recorded. Complete Review Gate (debate rounds + harness_debate_consensus), then call approve_plan; use create_plan only after Approve.";
1004
1231
  if (input.ctx.hasUI) input.ctx.ui.notify(msg, "warning");
1005
1232
  else
1006
1233
  input.pi.sendMessage({
@@ -1008,17 +1235,8 @@ async function updatePlanReadinessAfterAgent(input: {
1008
1235
  content: msg,
1009
1236
  display: true,
1010
1237
  });
1011
- } else if (input.activeCtx.plan_ready && packet?.plan_id) {
1012
- input.activeCtx.plan_id = packet.plan_id;
1013
- syncPolicyFromPlan(input.pi, input.entries, packet.plan_id, "plan", true);
1014
- const summary = planPacketSummary(packet, input.activeCtx.plan_packet_path);
1015
- input.pi.appendEntry("harness-plan-packet", summary);
1016
- input.activeCtx.last_completed_step = "plan";
1017
- input.activeCtx.last_outcome = summary.plan_status;
1018
- } else if (!validation.valid) {
1019
- input.activeCtx.last_outcome = "needs_clarification";
1020
- input.activeCtx.last_completed_step = "plan";
1021
1238
  }
1239
+ persistContext(input.pi, input.activeCtx);
1022
1240
  }
1023
1241
 
1024
1242
  function registerPlanApprovalCapture(
@@ -1029,15 +1247,46 @@ function registerPlanApprovalCapture(
1029
1247
  if (event.isError) return;
1030
1248
  if (event.toolName !== "ask_user" && event.toolName !== "approve_plan")
1031
1249
  return;
1250
+ const entries = getEntries(ctx);
1251
+ const runCtx = getLatestRunContext(entries) ?? active.get();
1252
+ if (!runCtx) return;
1253
+ if (event.toolName === "ask_user") {
1254
+ const details = event.details as { cancelled?: boolean; input?: unknown };
1255
+ if (details?.cancelled) {
1256
+ const synced = await syncPlanLastOutcomeFromTaskClarification(
1257
+ process.cwd(),
1258
+ runCtx,
1259
+ );
1260
+ Object.assign(runCtx, synced);
1261
+ persistContext(pi, runCtx);
1262
+ } else if (
1263
+ !isPlanApprovalAskUser(
1264
+ (details?.input ?? {}) as {
1265
+ question?: string;
1266
+ options?: unknown[];
1267
+ questions?: unknown[];
1268
+ },
1269
+ )
1270
+ ) {
1271
+ pi.appendEntry("harness-task-clarification-engagement", {
1272
+ run_id: runCtx.run_id,
1273
+ recorded_at: nowIso(),
1274
+ source: "ask_user",
1275
+ });
1276
+ const synced = await syncPlanLastOutcomeFromTaskClarification(
1277
+ process.cwd(),
1278
+ runCtx,
1279
+ );
1280
+ Object.assign(runCtx, synced);
1281
+ persistContext(pi, runCtx);
1282
+ }
1283
+ }
1032
1284
  const approval = parsePlanApprovalFromMessage({
1033
1285
  toolName: event.toolName,
1034
1286
  details: event.details,
1035
1287
  content: event.content,
1036
1288
  });
1037
1289
  if (!approval) return;
1038
- const entries = getEntries(ctx);
1039
- const runCtx = getLatestRunContext(entries) ?? active.get();
1040
- if (!runCtx) return;
1041
1290
  pi.appendEntry("harness-plan-approval", {
1042
1291
  plan_id: approval.plan_id ?? runCtx.plan_id,
1043
1292
  approved_at: approval.approved_at,
@@ -1165,18 +1414,41 @@ async function resolveCommandRunContext(input: {
1165
1414
  input.command === "harness-auto" ||
1166
1415
  (!activeCtx && input.command !== "harness-abort")
1167
1416
  ) {
1417
+ const task = extractTaskSummary(input.args, input.userPrompt);
1168
1418
  if (
1169
- !activeCtx ||
1170
- !shouldReuseHarnessRunId(input.userPrompt, activeCtx, input.command)
1419
+ input.command === "harness-auto" &&
1420
+ activeCtx &&
1421
+ task &&
1422
+ harnessAutoTasksDiffer(activeCtx, task)
1171
1423
  ) {
1424
+ activeCtx.status = "aborted";
1425
+ activeCtx.plan_ready = false;
1426
+ activeCtx.last_outcome = "abandoned";
1427
+ activeCtx.last_completed_step = "abort";
1428
+ persistContext(input.pi, activeCtx);
1429
+ activeCtx = null;
1430
+ }
1431
+ const reuseRun =
1432
+ activeCtx &&
1433
+ shouldReuseHarnessRunId(input.userPrompt, activeCtx, input.command);
1434
+ if (!activeCtx || !reuseRun) {
1435
+ if (activeCtx?.status === "active") {
1436
+ activeCtx.status = "aborted";
1437
+ activeCtx.plan_ready = false;
1438
+ activeCtx.last_outcome = "abandoned";
1439
+ activeCtx.last_completed_step = "abort";
1440
+ persistContext(input.pi, activeCtx);
1441
+ }
1172
1442
  activeCtx = createFreshRunContext(
1173
1443
  input.sessionId,
1174
1444
  input.projectRoot,
1175
- extractTaskSummary(input.args, input.userPrompt),
1445
+ task,
1176
1446
  );
1447
+ } else if (input.command === "harness-auto") {
1448
+ activeCtx = resetRunContextForHarnessAuto(activeCtx);
1449
+ if (task) activeCtx.task_summary = task;
1177
1450
  }
1178
1451
  if (input.command === "harness-plan") {
1179
- const task = extractTaskSummary(input.args, input.userPrompt);
1180
1452
  if (task) activeCtx.task_summary = task;
1181
1453
  }
1182
1454
  startFreshPlanAttempt({
@@ -1184,6 +1456,7 @@ async function resolveCommandRunContext(input: {
1184
1456
  activeCtx,
1185
1457
  command: input.command,
1186
1458
  turn: input.turn,
1459
+ sessionId: input.sessionId,
1187
1460
  });
1188
1461
  } else if (
1189
1462
  activeCtx &&
@@ -1297,7 +1570,7 @@ async function handlePreResolvedHarnessCommand(args: {
1297
1570
  handled: true,
1298
1571
  };
1299
1572
  }
1300
- if (command === "harness-run-status") {
1573
+ if (command === "harness-run-status" || command === "harness-clear") {
1301
1574
  return { activeCtx, response: undefined, handled: true };
1302
1575
  }
1303
1576
  if (
@@ -1317,21 +1590,6 @@ async function handlePreResolvedHarnessCommand(args: {
1317
1590
  return { activeCtx, response: null, handled: false };
1318
1591
  }
1319
1592
 
1320
- function blockingRunCommandReason(
1321
- command: string,
1322
- activeCtx: HarnessRunContext,
1323
- ): string | null {
1324
- if (command !== "harness-run") return null;
1325
- if (!activeCtx.plan_ready) return "Plan not ready. Run /harness-plan first.";
1326
- if (
1327
- activeCtx.last_completed_step === "execute" &&
1328
- activeCtx.last_outcome === "completed"
1329
- ) {
1330
- return "Execute already completed for this run. Next: /harness-review (same session), or /harness-abort to replan.";
1331
- }
1332
- return null;
1333
- }
1334
-
1335
1593
  async function handleBeforeAgentStart(input: {
1336
1594
  pi: ExtensionAPI;
1337
1595
  event: any;
@@ -1371,12 +1629,21 @@ async function handleBeforeAgentStart(input: {
1371
1629
  "plan";
1372
1630
  const driftActive = driftGateActive(entries);
1373
1631
  if (!parsed && needsClarificationFollowUp(activeCtx) && activeCtx) {
1374
- return maybeHandleClarificationFollowUp({
1375
- pi: input.pi,
1632
+ const synced = await syncPlanLastOutcomeFromTaskClarification(
1633
+ projectRoot,
1376
1634
  activeCtx,
1377
- entries,
1378
- systemPrompt: input.event.systemPrompt,
1379
- });
1635
+ );
1636
+ if (synced.last_outcome !== "needs_clarification") {
1637
+ input.active.set(synced);
1638
+ persistContext(input.pi, synced);
1639
+ } else {
1640
+ return maybeHandleClarificationFollowUp({
1641
+ pi: input.pi,
1642
+ activeCtx,
1643
+ entries,
1644
+ systemPrompt: input.event.systemPrompt,
1645
+ });
1646
+ }
1380
1647
  }
1381
1648
  if (!parsed) return undefined;
1382
1649
  const { command, args } = parsed;
@@ -1433,8 +1700,40 @@ async function handleBeforeAgentStart(input: {
1433
1700
  return blockRunContextMessage(check.reason ?? "Invalid --plan override");
1434
1701
  activeCtx.plan_packet_path = resolved.planPath;
1435
1702
  }
1436
- const runBlockReason = blockingRunCommandReason(command, activeCtx);
1703
+ let planSynced = await reconcileStaleExecuteCompletion(
1704
+ projectRoot,
1705
+ activeCtx,
1706
+ entries,
1707
+ );
1708
+ planSynced = await reconcileReviewRouting(projectRoot, planSynced);
1709
+ Object.assign(activeCtx, planSynced);
1710
+ persistContext(input.pi, activeCtx);
1711
+ const autoBlockReason = await blockingHarnessAutoCommandReason(
1712
+ command,
1713
+ activeCtx,
1714
+ args,
1715
+ userPrompt,
1716
+ );
1717
+ if (autoBlockReason) return blockRunContextMessage(autoBlockReason);
1718
+ const runBlockReason = await blockingRunCommandReason(
1719
+ command,
1720
+ activeCtx,
1721
+ projectRoot,
1722
+ entries,
1723
+ );
1437
1724
  if (runBlockReason) return blockRunContextMessage(runBlockReason);
1725
+ const reviewBlockReason = await blockingReviewCommandReason(
1726
+ command,
1727
+ activeCtx,
1728
+ projectRoot,
1729
+ );
1730
+ if (reviewBlockReason) return blockRunContextMessage(reviewBlockReason);
1731
+ const steerBlockReason = await blockingSteerCommandReason(
1732
+ command,
1733
+ activeCtx,
1734
+ projectRoot,
1735
+ );
1736
+ if (steerBlockReason) return blockRunContextMessage(steerBlockReason);
1438
1737
  const { planSummary, planPacketForSpawn } =
1439
1738
  await readPlanSpawnState(activeCtx);
1440
1739
  const { activePlanBlock, planMode, contextSpawnOpts } =
@@ -1452,10 +1751,34 @@ async function handleBeforeAgentStart(input: {
1452
1751
  projectRoot,
1453
1752
  userPrompt,
1454
1753
  });
1754
+ const syncedCtx = await syncPlanLastOutcomeFromTaskClarification(
1755
+ projectRoot,
1756
+ activeCtx,
1757
+ );
1758
+ Object.assign(activeCtx, syncedCtx);
1455
1759
  input.active.set(activeCtx);
1456
1760
  persistContext(input.pi, activeCtx);
1761
+ if (command === "harness-plan" || command === "harness-auto") {
1762
+ syncPolicyFromRunContext(input.pi, entries, activeCtx);
1763
+ }
1764
+ let gateBlock = "";
1765
+ if (command === "harness-plan" || command === "harness-auto") {
1766
+ const quick = parseArgFlag(args, "--quick") != null;
1767
+ const gateStatus = await resolvePlanHumanGateStatus(
1768
+ projectRoot,
1769
+ activeCtx.run_id,
1770
+ entries,
1771
+ {
1772
+ quick,
1773
+ taskSummary: activeCtx.task_summary ?? undefined,
1774
+ lastOutcome: activeCtx.last_outcome ?? undefined,
1775
+ },
1776
+ );
1777
+ gateBlock = formatPlanHumanGateBlock(gateStatus);
1778
+ }
1779
+ const gateSuffix = gateBlock ? `\n\n${gateBlock}` : "";
1457
1780
  return {
1458
- systemPrompt: `${input.event.systemPrompt}\n\n${formatPlanContextBlock(activeCtx, contextSpawnOpts)}${activePlanBlock ? `\n\n${activePlanBlock}` : ""}`,
1781
+ systemPrompt: `${input.event.systemPrompt}\n\n${formatPlanContextBlock(activeCtx, contextSpawnOpts)}${activePlanBlock ? `\n\n${activePlanBlock}` : ""}${gateSuffix}`,
1459
1782
  };
1460
1783
  }
1461
1784
 
@@ -1468,6 +1791,13 @@ async function handleAgentEnd(input: {
1468
1791
  const entries = getEntries(input.ctx);
1469
1792
  const activeCtx = input.active.get() ?? getLatestRunContext(entries);
1470
1793
  if (!activeCtx) return;
1794
+ let reconciledOnEnd = await reconcileStaleExecuteCompletion(
1795
+ projectRoot,
1796
+ activeCtx,
1797
+ entries,
1798
+ );
1799
+ reconciledOnEnd = await reconcileReviewRouting(projectRoot, reconciledOnEnd);
1800
+ Object.assign(activeCtx, reconciledOnEnd);
1471
1801
  input.active.set(activeCtx);
1472
1802
  const parsed = latestParsedHarnessCommand(entries);
1473
1803
  if (!parsed && !needsClarificationFollowUp(activeCtx)) return;
@@ -1482,13 +1812,23 @@ async function handleAgentEnd(input: {
1482
1812
  parsed,
1483
1813
  activeCtx,
1484
1814
  });
1815
+ if (
1816
+ parsed?.command === "harness-plan" ||
1817
+ parsed?.command === "harness-auto"
1818
+ ) {
1819
+ const synced = await syncPlanLastOutcomeFromTaskClarification(
1820
+ projectRoot,
1821
+ activeCtx,
1822
+ );
1823
+ Object.assign(activeCtx, synced);
1824
+ persistContext(input.pi, activeCtx);
1825
+ }
1485
1826
  const statuses = await resolveCompletionStatuses(
1486
1827
  entries,
1487
1828
  activeCtx.run_id,
1488
1829
  projectRoot,
1489
1830
  );
1490
1831
  if (parsed?.command === "harness-run") {
1491
- activeCtx.last_completed_step = "execute";
1492
1832
  let execStatus = statuses.executionStatus;
1493
1833
  if (!execStatus) {
1494
1834
  const handoff = await readExecutorHandoffFromRun(
@@ -1497,8 +1837,11 @@ async function handleAgentEnd(input: {
1497
1837
  );
1498
1838
  execStatus = handoff?.execution_status ?? null;
1499
1839
  }
1500
- activeCtx.last_outcome = execStatus ?? "completed";
1501
- activeCtx.phase = "evaluate";
1840
+ const runPost = resolveHarnessRunPostAgentState(
1841
+ execStatus,
1842
+ activeCtx.plan_ready,
1843
+ );
1844
+ Object.assign(activeCtx, runPost);
1502
1845
  }
1503
1846
  if (parsed?.command === "harness-steer") {
1504
1847
  activeCtx.last_completed_step = "steer";
@@ -1521,7 +1864,14 @@ async function handleAgentEnd(input: {
1521
1864
  activeCtx.last_completed_step = "adversary";
1522
1865
  } else if (statuses.evalStatus) activeCtx.phase = "evaluate";
1523
1866
  }
1524
- const reviewOutcome = await readReviewOutcomeFromRun(
1867
+ if (
1868
+ ["harness-eval", "harness-review", "harness-critic"].includes(
1869
+ parsed?.command ?? "",
1870
+ )
1871
+ ) {
1872
+ await ensureReviewOutcomeFromEval(activeCtx.run_id, projectRoot);
1873
+ }
1874
+ const remediationClass = await resolveRemediationClassForRun(
1525
1875
  activeCtx.run_id,
1526
1876
  projectRoot,
1527
1877
  );
@@ -1537,7 +1887,7 @@ async function handleAgentEnd(input: {
1537
1887
  evalStatus: statuses.evalStatus,
1538
1888
  adversaryComplete: statuses.adversaryComplete,
1539
1889
  aborted: activeCtx.status === "aborted",
1540
- remediationClass: reviewOutcome?.remediation_class ?? null,
1890
+ remediationClass,
1541
1891
  steerAttempt: activeCtx.steer_attempt ?? 0,
1542
1892
  steerMaxAttempts: activeCtx.steer_max_attempts ?? steerMaxAttemptsFromEnv(),
1543
1893
  reviewComplete,
@@ -1590,7 +1940,7 @@ function registerHarnessRunContextTool1(
1590
1940
  parameters: Type.Object({
1591
1941
  path: Type.String({
1592
1942
  description:
1593
- "Path under the active run, e.g. artifacts/decomposition.yaml or research-brief.yaml",
1943
+ "Run-relative path (preferred): artifacts/decomposition.yaml, research-brief.yaml, plan-packet.yaml. The active run id is applied automatically — do not prefix with .pi/harness/runs/.",
1594
1944
  }),
1595
1945
  content: Type.String({
1596
1946
  description:
@@ -1640,21 +1990,32 @@ function registerHarnessRunContextTool1(
1640
1990
  };
1641
1991
  }
1642
1992
  const projectRoot = process.cwd();
1643
- const absPath = normalizeHarnessPath(pathArg, projectRoot);
1644
- const scoped = await isPlanPhaseScopedWrite(absPath, runCtx, projectRoot);
1993
+ const resolved = resolveHarnessRunWriteTarget(
1994
+ pathArg,
1995
+ runCtx,
1996
+ projectRoot,
1997
+ );
1998
+ const absPath =
1999
+ resolved?.absPath ?? normalizeHarnessPath(pathArg, projectRoot);
2000
+ const scoped =
2001
+ resolved != null ||
2002
+ (await isPlanPhaseScopedWrite(absPath, runCtx, projectRoot));
1645
2003
  if (!scoped) {
1646
2004
  return {
1647
2005
  content: [
1648
2006
  {
1649
2007
  type: "text",
1650
- text: `Path not allowed: ${pathArg}. Must be under .pi/harness/runs/${runCtx.run_id}/ (artifacts/*.yaml, research-brief.yaml, etc.).`,
2008
+ text: `Path not allowed: ${pathArg}. Use a run-relative path like artifacts/decomposition.yaml or research-brief.yaml (active run ${runCtx.run_id} is applied automatically). Full paths under .pi/harness/runs/${runCtx.run_id}/ are also accepted.`,
1651
2009
  },
1652
2010
  ],
1653
- details: { path: pathArg },
2011
+ details: { path: pathArg, run_id: runCtx.run_id },
1654
2012
  isError: true,
1655
2013
  };
1656
2014
  }
1657
- const relForGate = pathArg.replace(/\\/g, "/");
2015
+ const relForGate =
2016
+ resolved?.relUnderRun ??
2017
+ (await relPathUnderActiveRun(absPath, runCtx, projectRoot)) ??
2018
+ pathArg.replace(/\\/g, "/");
1658
2019
  const subagentOnly = new Set([
1659
2020
  "artifacts/eval-verdict.yaml",
1660
2021
  "artifacts/adversary-report.yaml",
@@ -1721,12 +2082,67 @@ function registerHarnessRunContextTool1(
1721
2082
  doc = parseStructuredDocument(content, pathArg);
1722
2083
  } catch (err) {
1723
2084
  const msg = err instanceof Error ? err.message : String(err);
2085
+ const hint =
2086
+ msg.includes("not valid YAML") || msg.includes("JSON parse")
2087
+ ? " Pass a fenced ```yaml block, raw YAML object, or JSON object — not prose or a partial fragment."
2088
+ : "";
1724
2089
  return {
1725
- content: [{ type: "text", text: msg }],
1726
- details: { path: pathArg },
2090
+ content: [
2091
+ {
2092
+ type: "text",
2093
+ text: `${relForGate}: ${msg}${hint}`,
2094
+ },
2095
+ ],
2096
+ details: { path: relForGate, run_id: runCtx.run_id },
1727
2097
  isError: true,
1728
2098
  };
1729
2099
  }
2100
+ const docRecord = doc as Record<string, unknown>;
2101
+ if (relForGate === TASK_CLARIFICATION_ARTIFACT) {
2102
+ const humanGate = validateTaskClarificationHumanGate(
2103
+ entries,
2104
+ docRecord,
2105
+ {
2106
+ quick:
2107
+ parseArgFlag(
2108
+ getLatestHarnessTurn(entries)?.args ?? "",
2109
+ "--quick",
2110
+ ) != null,
2111
+ taskSummary: runCtx.task_summary ?? undefined,
2112
+ allowFollowUpMessage: runCtx.last_outcome === "needs_clarification",
2113
+ },
2114
+ );
2115
+ if (!humanGate.ok) {
2116
+ return {
2117
+ content: [
2118
+ {
2119
+ type: "text",
2120
+ text: humanGate.errors.join("\n"),
2121
+ },
2122
+ ],
2123
+ details: { path: pathArg },
2124
+ isError: true,
2125
+ };
2126
+ }
2127
+ }
2128
+ if (relForGate === "artifacts/plan-phase-status.yaml") {
2129
+ const planStatus = String(docRecord.plan_status ?? "").toLowerCase();
2130
+ if (
2131
+ planStatus === "ready" &&
2132
+ !hasPlanUserApproval(entries, { sincePlanCommand: true })
2133
+ ) {
2134
+ return {
2135
+ content: [
2136
+ {
2137
+ type: "text",
2138
+ text: "Blocked: plan_status ready requires approve_plan (then create_plan) before marking the plan phase complete.",
2139
+ },
2140
+ ],
2141
+ details: { path: pathArg },
2142
+ isError: true,
2143
+ };
2144
+ }
2145
+ }
1730
2146
  await mkdir(dirname(absPath), { recursive: true });
1731
2147
  await writeYamlFile(absPath, doc);
1732
2148
  if (relForGate === TASK_CLARIFICATION_ARTIFACT) {
@@ -1743,10 +2159,10 @@ function registerHarnessRunContextTool1(
1743
2159
  content: [
1744
2160
  {
1745
2161
  type: "text",
1746
- text: `Wrote ${pathArg} as canonical YAML.`,
2162
+ text: `Wrote ${relForGate} as canonical YAML.`,
1747
2163
  },
1748
2164
  ],
1749
- details: { path: absPath },
2165
+ details: { path: absPath, rel: relForGate, run_id: runCtx.run_id },
1750
2166
  };
1751
2167
  },
1752
2168
  });
@@ -1812,17 +2228,25 @@ function registerHarnessRunContextTool2(
1812
2228
  };
1813
2229
  }
1814
2230
  const projectRoot = process.cwd();
1815
- const absPath = normalizeHarnessPath(pathArg, projectRoot);
1816
- const scoped = await isPlanPhaseScopedWrite(absPath, runCtx, projectRoot);
2231
+ const resolved = resolveHarnessRunWriteTarget(
2232
+ pathArg,
2233
+ runCtx,
2234
+ projectRoot,
2235
+ );
2236
+ const absPath =
2237
+ resolved?.absPath ?? normalizeHarnessPath(pathArg, projectRoot);
2238
+ const scoped =
2239
+ resolved != null ||
2240
+ (await isPlanPhaseScopedWrite(absPath, runCtx, projectRoot));
1817
2241
  if (!scoped) {
1818
2242
  return {
1819
2243
  content: [
1820
2244
  {
1821
2245
  type: "text",
1822
- text: `Path not allowed: ${pathArg}.`,
2246
+ text: `Path not allowed: ${pathArg}. Use run-relative paths like artifacts/decomposition.yaml (active run ${runCtx.run_id}).`,
1823
2247
  },
1824
2248
  ],
1825
- details: { path: pathArg },
2249
+ details: { path: pathArg, run_id: runCtx.run_id },
1826
2250
  isError: true,
1827
2251
  };
1828
2252
  }
@@ -1833,7 +2257,10 @@ function registerHarnessRunContextTool2(
1833
2257
  "runs",
1834
2258
  runCtx.run_id,
1835
2259
  );
1836
- const relMerge = pathArg.replace(/\\/g, "/");
2260
+ const relMerge =
2261
+ resolved?.relUnderRun ??
2262
+ (await relPathUnderActiveRun(absPath, runCtx, projectRoot)) ??
2263
+ pathArg.replace(/\\/g, "/");
1837
2264
  const clarMerge = await assertTaskClarificationReadyForPlanWrite(
1838
2265
  runRoot,
1839
2266
  relMerge,
@@ -2044,7 +2471,18 @@ function registerHarnessRunContextTool4(
2044
2471
  const { validateHarnessArtifactPaths } = await import(
2045
2472
  "../lib/harness-artifact-gate.js"
2046
2473
  );
2047
- const gate = await validateHarnessArtifactPaths(runRoot, paths, specsDir);
2474
+ const turn = getLatestHarnessTurn(entries);
2475
+ const gate = await validateHarnessArtifactPaths(
2476
+ runRoot,
2477
+ paths,
2478
+ specsDir,
2479
+ {
2480
+ entries,
2481
+ quick: turn ? parseArgFlag(turn.args, "--quick") != null : false,
2482
+ taskSummary: runCtx.task_summary ?? undefined,
2483
+ lastOutcome: runCtx.last_outcome ?? undefined,
2484
+ },
2485
+ );
2048
2486
  if (
2049
2487
  gate.ok &&
2050
2488
  paths.some((p) => p.replace(/\\/g, "/") === TASK_CLARIFICATION_ARTIFACT)
@@ -2053,8 +2491,13 @@ function registerHarnessRunContextTool4(
2053
2491
  const clarified = String(clarDoc?.clarified_task ?? "").trim();
2054
2492
  if (clarified && runCtx.task_summary !== clarified) {
2055
2493
  runCtx.task_summary = clarified;
2056
- persistContext(pi, runCtx);
2057
2494
  }
2495
+ const synced = await syncPlanLastOutcomeFromTaskClarification(
2496
+ projectRoot,
2497
+ runCtx,
2498
+ );
2499
+ Object.assign(runCtx, synced);
2500
+ persistContext(pi, runCtx);
2058
2501
  }
2059
2502
  const text = gate.ok
2060
2503
  ? `All ${gate.present.length} artifact(s) present and valid.`
@@ -2138,6 +2581,8 @@ export default function harnessRunContext(pi: ExtensionAPI) {
2138
2581
  registerPlanApprovalCapture(pi, activeAccess);
2139
2582
  registerHarnessToolCallGuards(pi, activeAccess);
2140
2583
  registerHarnessRunStatusCommand(pi, activeAccess);
2584
+
2585
+ registerHarnessClearCommand(pi, activeAccess);
2141
2586
  registerHarnessNewRunCommand(pi, activeAccess);
2142
2587
 
2143
2588
  registerHarnessPlanCommitCommand(pi, activeAccess);