ultimate-pi 0.23.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/.pi/extensions/agt-prompt-guard.ts +20 -6
  2. package/.pi/extensions/harness-ask-user.ts +14 -5
  3. package/.pi/extensions/harness-auto-compact.ts +94 -0
  4. package/.pi/extensions/harness-debate-tools.ts +59 -4
  5. package/.pi/extensions/harness-live-widget.ts +25 -0
  6. package/.pi/extensions/harness-plan-approval.ts +65 -15
  7. package/.pi/extensions/harness-plan-orchestration.ts +140 -0
  8. package/.pi/extensions/harness-run-context.ts +501 -48
  9. package/.pi/extensions/harness-telemetry.ts +1 -0
  10. package/.pi/extensions/harness-web-tools.ts +1 -0
  11. package/.pi/extensions/policy-gate.ts +9 -0
  12. package/.pi/extensions/trace-recorder.ts +1 -0
  13. package/.pi/harness/agents.manifest.json +1 -1
  14. package/.pi/harness/docs/adrs/0056-agent-native-speed-wiring.md +26 -0
  15. package/.pi/harness/env.harness.template +14 -0
  16. package/.pi/harness/specs/harness-posthog-event.schema.json +2 -0
  17. package/.pi/harness/specs/sentrux-signal.schema.json +1 -1
  18. package/.pi/lib/harness-auto-approve.ts +140 -0
  19. package/.pi/lib/harness-auto-compact-policy.ts +85 -0
  20. package/.pi/lib/harness-cocoindex-refresh.ts +82 -2
  21. package/.pi/lib/harness-phase-telemetry.ts +81 -0
  22. package/.pi/lib/harness-phase-worker.ts +23 -0
  23. package/.pi/lib/harness-plan-fsm.ts +162 -0
  24. package/.pi/lib/harness-plan-route.ts +134 -0
  25. package/.pi/lib/harness-posthog.ts +6 -1
  26. package/.pi/lib/harness-remediation.ts +79 -0
  27. package/.pi/lib/harness-repair-brief.ts +2 -2
  28. package/.pi/lib/harness-review-parallel.ts +18 -0
  29. package/.pi/lib/harness-run-context.ts +119 -72
  30. package/.pi/lib/harness-spawn-budget.ts +32 -4
  31. package/.pi/lib/harness-spawn-stall-detector.ts +106 -0
  32. package/.pi/lib/harness-spawn-topology.ts +50 -1
  33. package/.pi/lib/harness-subagent-precheck.ts +41 -0
  34. package/.pi/lib/harness-subagent-progress.ts +119 -0
  35. package/.pi/lib/harness-subagent-timeout.ts +81 -0
  36. package/.pi/lib/harness-subagents-bridge.ts +94 -8
  37. package/.pi/lib/harness-ui-state.ts +5 -0
  38. package/.pi/lib/harness-vcc-settings.ts +36 -0
  39. package/.pi/lib/plan-approval-readiness.ts +9 -5
  40. package/.pi/lib/plan-debate-eligibility-snapshot.ts +90 -0
  41. package/.pi/lib/plan-debate-eligibility.ts +16 -9
  42. package/.pi/lib/plan-debate-focus.ts +23 -11
  43. package/.pi/lib/plan-debate-gate.ts +94 -31
  44. package/.pi/lib/plan-debate-round-status.ts +23 -8
  45. package/.pi/lib/plan-debate-wall-clock.ts +57 -0
  46. package/.pi/lib/plan-headless-ux.ts +598 -0
  47. package/.pi/lib/plan-human-gates.ts +24 -85
  48. package/.pi/lib/plan-messenger.ts +3 -3
  49. package/.pi/lib/plan-review-gate.ts +56 -0
  50. package/.pi/prompts/harness-abort.md +1 -0
  51. package/.pi/prompts/harness-auto.md +1 -1
  52. package/.pi/prompts/harness-clear.md +6 -6
  53. package/.pi/prompts/harness-plan.md +15 -2
  54. package/.pi/prompts/harness-review.md +26 -12
  55. package/.pi/scripts/harness-e2e-workflow.mjs +94 -0
  56. package/.pi/scripts/harness-project-toggle.mjs +1 -1
  57. package/.pi/scripts/harness-sentrux-cli.mjs +26 -1
  58. package/.pi/scripts/harness-sentrux-report.mjs +41 -6
  59. package/CHANGELOG.md +16 -0
  60. package/README.md +2 -2
  61. package/package.json +1 -1
  62. package/vendor/pi-subagents/src/subagents.ts +41 -10
@@ -23,8 +23,14 @@ import {
23
23
  resetHarnessPolicyDenyCount,
24
24
  } from "../lib/agt/kill-switch-state.js";
25
25
  import { runAskUser } from "../lib/ask-user/index.js";
26
+ import { isHarnessNonInteractive } from "../lib/ask-user/policy.js";
26
27
  import { claimHarnessGovernanceLoad } from "../lib/extension-load-guard.js";
27
28
  import { getHarnessPackageRoot } from "../lib/harness-paths.js";
29
+ import {
30
+ buildPhaseCompletedPayload,
31
+ phaseTerminalArtifact,
32
+ } from "../lib/harness-phase-telemetry.js";
33
+ import { captureHarnessEvent } from "../lib/harness-posthog.js";
28
34
  import {
29
35
  blockingHarnessAutoCommandReason,
30
36
  blockingReviewCommandReason,
@@ -35,10 +41,12 @@ import {
35
41
  claimRunOwnership,
36
42
  createFreshRunContext,
37
43
  criticalPathWorkItemIdsFromPlanPacket,
44
+ deleteProjectActiveRun,
38
45
  driftGateActive,
39
46
  ensureReviewOutcomeFromEval,
40
47
  evaluateCrossSessionResume,
41
48
  extractWritePathFromToolInput,
49
+ findActiveRunOwnershipConflict,
42
50
  formatActivePlanBlock,
43
51
  formatCrossSessionResumeMessage,
44
52
  formatPlanContextBlock,
@@ -51,6 +59,7 @@ import {
51
59
  harnessAutoTasksDiffer,
52
60
  hasHarnessAbortSignal,
53
61
  hasPlanUserApproval,
62
+ indexOfLastPlanCommand,
54
63
  inferHarnessPhase,
55
64
  isAmendPlanAllowed,
56
65
  isHarnessBootstrapPrompt,
@@ -75,6 +84,7 @@ import {
75
84
  reconcileReviewRouting,
76
85
  reconcileStaleExecuteCompletion,
77
86
  refreshRunContextProgress,
87
+ releaseForeignQaRunOwnership,
78
88
  relPathUnderActiveRun,
79
89
  resetRunContextForHarnessAuto,
80
90
  resolveArgsForCommand,
@@ -104,6 +114,14 @@ import {
104
114
  } from "../lib/harness-yaml.js";
105
115
  import { isReviewRoundArtifactPath } from "../lib/plan-debate-gate.js";
106
116
  import { isReviewRoundYamlWriteAllowed } from "../lib/plan-debate-write-guard.js";
117
+ import {
118
+ endHeadlessHarnessPrintSession,
119
+ maybeForceHeadlessPlanProgress,
120
+ maybeHeadlessQaAutoExecuteSmoke,
121
+ seedHeadlessTaskClarificationIfNeeded,
122
+ shouldEndHeadlessHarnessPrintSession,
123
+ tryHeadlessAutoPlanFinalize,
124
+ } from "../lib/plan-headless-ux.js";
107
125
  import {
108
126
  formatPlanHumanGateBlock,
109
127
  resolvePlanHumanGateStatus,
@@ -115,6 +133,7 @@ import {
115
133
  TASK_CLARIFICATION_ARTIFACT,
116
134
  } from "../lib/plan-task-clarification.js";
117
135
 
136
+ // @ts-expect-error pi extensions run as ESM
118
137
  const MODULE_URL = import.meta.url;
119
138
 
120
139
  interface SessionEntryLike {
@@ -148,6 +167,15 @@ function persistContext(pi: ExtensionAPI, ctx: HarnessRunContext): void {
148
167
  pi.events.emit("harness-run-context:updated", { run_id: ctx.run_id });
149
168
  }
150
169
 
170
+ function notifyHarnessHandoff(
171
+ ctx: { hasUI: boolean; ui: { notify(message: string, type?: string): void } },
172
+ message: string,
173
+ level: "info" | "warning" = "info",
174
+ ): void {
175
+ if (ctx.hasUI) ctx.ui.notify(message, level);
176
+ // Headless (-p/json): appendEntry records handoff; never inject user-visible messages.
177
+ }
178
+
151
179
  const PLAN_REVISION_ARTIFACT_FILES = new Set([
152
180
  "planning-context.yaml",
153
181
  "decomposition.yaml",
@@ -474,7 +502,7 @@ async function applyAbortSignal(input: {
474
502
  entries: unknown[];
475
503
  userPrompt: string;
476
504
  }): Promise<HarnessRunContext | null> {
477
- if (!input.userPrompt.toLowerCase().includes("harness-abort")) {
505
+ if (!hasHarnessAbortSignal(input.userPrompt)) {
478
506
  return input.activeCtx;
479
507
  }
480
508
  const nextCtx =
@@ -492,6 +520,43 @@ async function applyAbortSignal(input: {
492
520
  return nextCtx;
493
521
  }
494
522
 
523
+ function appendAbortPolicyState(
524
+ pi: ExtensionAPI,
525
+ reason: string,
526
+ abortedAt: string,
527
+ ): void {
528
+ pi.appendEntry("harness-policy-state", {
529
+ phase: "plan",
530
+ approvedPlan: false,
531
+ planId: null,
532
+ budgetBypass: false,
533
+ aborted: true,
534
+ abortReason: reason,
535
+ abortedAt,
536
+ updatedAt: abortedAt,
537
+ });
538
+ }
539
+
540
+ function abortActiveRunContext(input: {
541
+ pi: ExtensionAPI;
542
+ activeCtx: HarnessRunContext;
543
+ reason: string;
544
+ }): HarnessRunContext {
545
+ const abortedAt = nowIso();
546
+ input.activeCtx.phase = "plan";
547
+ input.activeCtx.status = "aborted";
548
+ input.activeCtx.plan_ready = false;
549
+ input.activeCtx.last_outcome = "aborted";
550
+ input.activeCtx.last_completed_step = "abort";
551
+ input.activeCtx.next_recommended_command = input.activeCtx.task_summary
552
+ ? `/harness-plan "${input.activeCtx.task_summary}"`
553
+ : '/harness-plan "<task>"';
554
+ input.activeCtx.updated_at = abortedAt;
555
+ appendAbortPolicyState(input.pi, input.reason, abortedAt);
556
+ persistContext(input.pi, input.activeCtx);
557
+ return input.activeCtx;
558
+ }
559
+
495
560
  async function maybeHandleClarificationFollowUp(input: {
496
561
  pi: ExtensionAPI;
497
562
  activeCtx: HarnessRunContext;
@@ -552,7 +617,7 @@ function contextPrompt(systemPrompt: string, activeCtx: HarnessRunContext) {
552
617
  };
553
618
  }
554
619
 
555
- function createNewRunContextForCommand(input: {
620
+ async function createNewRunContextForCommand(input: {
556
621
  pi: ExtensionAPI;
557
622
  activeCtx: HarnessRunContext | null;
558
623
  sessionId: string;
@@ -561,6 +626,18 @@ function createNewRunContextForCommand(input: {
561
626
  userPrompt: string;
562
627
  systemPrompt: string;
563
628
  }) {
629
+ const ownershipConflict = await findActiveRunOwnershipConflict(
630
+ input.projectRoot,
631
+ input.sessionId,
632
+ );
633
+ if (ownershipConflict) {
634
+ return {
635
+ activeCtx: input.activeCtx,
636
+ response: blockRunContextMessage(
637
+ `Another Pi session (${ownershipConflict.ownerPiSessionId}) owns active run ${ownershipConflict.runId}. Finish or abort that run before /harness-new-run.`,
638
+ ),
639
+ };
640
+ }
564
641
  if (input.activeCtx?.status === "active") {
565
642
  input.activeCtx.status = "aborted";
566
643
  input.activeCtx.plan_ready = false;
@@ -643,7 +720,7 @@ type ActiveContextAccess = {
643
720
  set(ctx: HarnessRunContext | null): void;
644
721
  };
645
722
 
646
- const HARNESS_CLEAR_CONFIRM_OPTION = "Delete historical runs";
723
+ const HARNESS_CLEAR_CONFIRM_OPTION = "Delete all harness runs";
647
724
 
648
725
  function isHarnessClearConfirmed(response: unknown): boolean {
649
726
  if (!response || typeof response !== "object") return false;
@@ -666,23 +743,23 @@ function registerHarnessClearCommand(
666
743
  ): void {
667
744
  pi.registerCommand("harness-clear", {
668
745
  description:
669
- "Delete historical harness runs under .pi/harness/runs while preserving the active run",
746
+ "Delete all harness runs under .pi/harness/runs, including the active run",
670
747
  handler: async (_args, ctx) => {
671
748
  const entries = getEntries(ctx);
672
749
  const projectRoot = process.cwd();
673
750
  const latest = active.get() ?? getLatestRunContext(entries);
674
751
  const pointer = await loadProjectActiveRun(projectRoot);
675
- const protectedRunIds = new Set<string>();
676
- if (latest?.run_id) protectedRunIds.add(latest.run_id);
677
- if (pointer?.run_id) protectedRunIds.add(pointer.run_id);
678
- const manifest = await buildHarnessClearManifest(
679
- projectRoot,
680
- protectedRunIds,
681
- );
682
- if (manifest.candidates.length === 0) {
752
+ const activeRunIds = [
753
+ ...new Set(
754
+ [latest?.run_id, pointer?.run_id].filter(Boolean) as string[],
755
+ ),
756
+ ].sort();
757
+ const manifest = await buildHarnessClearManifest(projectRoot);
758
+ const hasTargets =
759
+ manifest.candidates.length > 0 || activeRunIds.length > 0;
760
+ if (!hasTargets) {
683
761
  const message = [
684
- "/harness-clear: no historical run directories eligible for deletion.",
685
- ` protected: ${manifest.protected_run_ids.join(", ") || "(none)"}`,
762
+ "/harness-clear: no harness runs found.",
686
763
  ` skipped: ${manifest.skipped.length}`,
687
764
  ].join("\n");
688
765
  if (ctx.hasUI) ctx.ui.notify(message, "info");
@@ -694,8 +771,10 @@ function registerHarnessClearCommand(
694
771
  });
695
772
  pi.appendEntry("harness-clear-result", {
696
773
  approved: false,
774
+ cleared_all: false,
697
775
  deleted: 0,
698
- protected: manifest.protected_run_ids,
776
+ active_cleared: false,
777
+ active_run_ids: activeRunIds,
699
778
  skipped: manifest.skipped,
700
779
  recorded_at: nowIso(),
701
780
  });
@@ -703,11 +782,12 @@ function registerHarnessClearCommand(
703
782
  }
704
783
  const ask = await runAskUser(
705
784
  {
706
- question: `Delete ${manifest.candidates.length} historical harness run directories?`,
785
+ question: `Delete all ${manifest.candidates.length} harness run directories, including the current run?`,
707
786
  context: [
708
- "Scope: .pi/harness/runs/<run_id> only (historical runs).",
709
- `Preserved active run ids: ${manifest.protected_run_ids.join(", ") || "(none)"}`,
710
- `Candidates: ${manifest.candidates.map((item) => item.run_id).join(", ")}`,
787
+ "Scope: .pi/harness/runs/<run_id> directories plus .pi/harness/active-run.json.",
788
+ "The in-session active run context will also be cleared.",
789
+ `Active run ids: ${activeRunIds.join(", ") || "(none)"}`,
790
+ `Candidates: ${manifest.candidates.map((item) => item.run_id).join(", ") || "(none)"}`,
711
791
  ].join("\n"),
712
792
  options: [HARNESS_CLEAR_CONFIRM_OPTION, "Cancel"],
713
793
  allowSkip: true,
@@ -728,8 +808,10 @@ function registerHarnessClearCommand(
728
808
  });
729
809
  pi.appendEntry("harness-clear-result", {
730
810
  approved: false,
811
+ cleared_all: false,
731
812
  deleted: 0,
732
- protected: manifest.protected_run_ids,
813
+ active_cleared: false,
814
+ active_run_ids: activeRunIds,
733
815
  skipped: manifest.skipped,
734
816
  ask_error: ask.error,
735
817
  recorded_at: nowIso(),
@@ -752,8 +834,10 @@ function registerHarnessClearCommand(
752
834
  });
753
835
  pi.appendEntry("harness-clear-result", {
754
836
  approved: false,
837
+ cleared_all: false,
755
838
  deleted: 0,
756
- protected: manifest.protected_run_ids,
839
+ active_cleared: false,
840
+ active_run_ids: activeRunIds,
757
841
  skipped: manifest.skipped,
758
842
  recorded_at: nowIso(),
759
843
  });
@@ -772,10 +856,13 @@ function registerHarnessClearCommand(
772
856
  });
773
857
  }
774
858
  }
859
+ const activePointerDeleted = await deleteProjectActiveRun(projectRoot);
860
+ active.set(null);
775
861
  const message = [
776
862
  "/harness-clear complete.",
777
863
  ` deleted: ${deleted}`,
778
- ` protected: ${manifest.protected_run_ids.length}`,
864
+ ` active_cleared: true`,
865
+ ` active_pointer_deleted: ${activePointerDeleted}`,
779
866
  ` skipped: ${manifest.skipped.length + failed.length}`,
780
867
  ].join("\n");
781
868
  if (ctx.hasUI) ctx.ui.notify(message, "info");
@@ -787,11 +874,18 @@ function registerHarnessClearCommand(
787
874
  });
788
875
  pi.appendEntry("harness-clear-result", {
789
876
  approved: true,
877
+ cleared_all: failed.length === 0,
790
878
  deleted,
791
- protected: manifest.protected_run_ids,
879
+ active_cleared: true,
880
+ active_pointer_deleted: activePointerDeleted,
881
+ active_run_ids: activeRunIds,
792
882
  skipped: [...manifest.skipped, ...failed],
793
883
  recorded_at: nowIso(),
794
884
  });
885
+ pi.events.emit("harness-runs-cleared", {
886
+ deleted,
887
+ projectRoot,
888
+ });
795
889
  },
796
890
  });
797
891
  }
@@ -1193,13 +1287,7 @@ function handleAgentEndAbort(input: {
1193
1287
  : '/harness-plan "<task>"';
1194
1288
  persistContext(input.pi, input.activeCtx);
1195
1289
  const msg = `Harness aborted. Next: ${input.activeCtx.next_recommended_command}`;
1196
- if (input.ctx.hasUI) input.ctx.ui.notify(msg, "warning");
1197
- else
1198
- input.pi.sendMessage({
1199
- customType: "harness-step-handoff",
1200
- content: msg,
1201
- display: true,
1202
- });
1290
+ notifyHarnessHandoff(input.ctx, msg, "warning");
1203
1291
  }
1204
1292
 
1205
1293
  async function updatePlanReadinessAfterAgent(input: {
@@ -1236,13 +1324,7 @@ async function updatePlanReadinessAfterAgent(input: {
1236
1324
  ) {
1237
1325
  const msg =
1238
1326
  "A draft plan-packet.yaml is on disk, but user approval was not recorded. Complete Review Gate (debate rounds + harness_debate_consensus), then call approve_plan; use create_plan only after Approve.";
1239
- if (input.ctx.hasUI) input.ctx.ui.notify(msg, "warning");
1240
- else
1241
- input.pi.sendMessage({
1242
- customType: "harness-plan-packet",
1243
- content: msg,
1244
- display: true,
1245
- });
1327
+ notifyHarnessHandoff(input.ctx, msg, "warning");
1246
1328
  }
1247
1329
  persistContext(input.pi, input.activeCtx);
1248
1330
  }
@@ -1320,6 +1402,16 @@ function registerPlanApprovalCapture(
1320
1402
  });
1321
1403
  }
1322
1404
 
1405
+ function registerHeadlessPlanProgressWatcher(
1406
+ pi: ExtensionAPI,
1407
+ active: ActiveContextAccess,
1408
+ ): void {
1409
+ pi.on("tool_result", async (event, ctx) => {
1410
+ if (event.isError) return;
1411
+ await handlePlanToolResultForHeadlessProgress({ pi, ctx, active });
1412
+ });
1413
+ }
1414
+
1323
1415
  function registerExecutorHandoffReconcile(
1324
1416
  pi: ExtensionAPI,
1325
1417
  active: ActiveContextAccess,
@@ -1483,10 +1575,46 @@ async function resolveCommandRunContext(input: {
1483
1575
  persistContext(input.pi, activeCtx);
1484
1576
  activeCtx = null;
1485
1577
  }
1578
+ if (
1579
+ activeCtx &&
1580
+ (input.command === "harness-plan" || input.command === "harness-auto") &&
1581
+ activeCtx.owner_pi_session_id !== input.sessionId
1582
+ ) {
1583
+ const foreignRunConflict = await findActiveRunOwnershipConflict(
1584
+ input.projectRoot,
1585
+ input.sessionId,
1586
+ );
1587
+ if (foreignRunConflict) {
1588
+ return {
1589
+ activeCtx,
1590
+ resolved,
1591
+ response: blockRunContextMessage(
1592
+ `Another Pi session (${foreignRunConflict.ownerPiSessionId}) owns active run ${foreignRunConflict.runId}. Finish or abort that run before starting a new plan.`,
1593
+ ),
1594
+ };
1595
+ }
1596
+ activeCtx = null;
1597
+ }
1486
1598
  const reuseRun =
1487
1599
  activeCtx &&
1488
1600
  shouldReuseHarnessRunId(input.userPrompt, activeCtx, input.command);
1489
1601
  if (!activeCtx || !reuseRun) {
1602
+ if (process.env.HARNESS_QA_SMOKE === "1") {
1603
+ await releaseForeignQaRunOwnership(input.projectRoot, input.sessionId);
1604
+ }
1605
+ const ownershipConflict = await findActiveRunOwnershipConflict(
1606
+ input.projectRoot,
1607
+ input.sessionId,
1608
+ );
1609
+ if (ownershipConflict) {
1610
+ return {
1611
+ activeCtx,
1612
+ resolved,
1613
+ response: blockRunContextMessage(
1614
+ `Another Pi session (${ownershipConflict.ownerPiSessionId}) owns active run ${ownershipConflict.runId}. Finish or abort that run before starting a new plan.`,
1615
+ ),
1616
+ };
1617
+ }
1490
1618
  if (activeCtx?.status === "active") {
1491
1619
  activeCtx.status = "aborted";
1492
1620
  activeCtx.plan_ready = false;
@@ -1502,6 +1630,11 @@ async function resolveCommandRunContext(input: {
1502
1630
  } else if (input.command === "harness-auto") {
1503
1631
  activeCtx = resetRunContextForHarnessAuto(activeCtx);
1504
1632
  if (task) activeCtx.task_summary = task;
1633
+ } else if (
1634
+ input.command === "harness-plan" &&
1635
+ activeCtx.status === "aborted"
1636
+ ) {
1637
+ activeCtx = resetRunContextForHarnessAuto(activeCtx);
1505
1638
  }
1506
1639
  if (input.command === "harness-plan") {
1507
1640
  if (task) activeCtx.task_summary = task;
@@ -1594,8 +1727,44 @@ async function handlePreResolvedHarnessCommand(args: {
1594
1727
  };
1595
1728
  }
1596
1729
  }
1730
+ if (command === "harness-abort") {
1731
+ if (!activeCtx) {
1732
+ if (process.env.HARNESS_QA_SMOKE === "1") {
1733
+ const released = await releaseForeignQaRunOwnership(
1734
+ projectRoot,
1735
+ sessionId,
1736
+ );
1737
+ if (released) {
1738
+ return {
1739
+ activeCtx: null,
1740
+ response: blockRunContextMessage(
1741
+ 'Stale QA harness run released from disk. Next: /harness-plan "<task>"',
1742
+ ),
1743
+ handled: true,
1744
+ };
1745
+ }
1746
+ }
1747
+ return {
1748
+ activeCtx,
1749
+ response: blockRunContextMessage(
1750
+ 'No active harness run to abort. Next: /harness-plan "<task>"',
1751
+ ),
1752
+ handled: true,
1753
+ };
1754
+ }
1755
+ const reason = parsedArgs.trim() || "manual abort";
1756
+ const aborted = abortActiveRunContext({ pi, activeCtx, reason });
1757
+ return {
1758
+ activeCtx: aborted,
1759
+ response: blockRunContextMessage(
1760
+ `Harness aborted. Mutating tools are blocked until a new approved plan is attached. Next: ${aborted.next_recommended_command}`,
1761
+ ),
1762
+ handled: true,
1763
+ };
1764
+ }
1765
+
1597
1766
  if (command === "harness-new-run") {
1598
- const next = createNewRunContextForCommand({
1767
+ const next = await createNewRunContextForCommand({
1599
1768
  pi,
1600
1769
  activeCtx,
1601
1770
  sessionId,
@@ -1702,6 +1871,8 @@ async function handleBeforeAgentStart(input: {
1702
1871
  }
1703
1872
  if (!parsed) return undefined;
1704
1873
  const { command, args } = parsed;
1874
+ const planQuick = parseArgFlag(args, "--quick") != null;
1875
+ const planRisk = parseArgFlag(args, "--risk") ?? "med";
1705
1876
  const preResolved = await handlePreResolvedHarnessCommand({
1706
1877
  pi: input.pi,
1707
1878
  activeCtx,
@@ -1736,6 +1907,19 @@ async function handleBeforeAgentStart(input: {
1736
1907
  return blockRunContextMessage(
1737
1908
  'No active harness run. Run /harness-plan "<task>" first, or /harness-use-run <run-id> for recovery.',
1738
1909
  );
1910
+ if (
1911
+ isHarnessNonInteractive() &&
1912
+ (await shouldEndHeadlessHarnessPrintSession({
1913
+ command,
1914
+ runCtx: activeCtx,
1915
+ projectRoot,
1916
+ }))
1917
+ ) {
1918
+ endHeadlessHarnessPrintSession(input.ctx);
1919
+ return {
1920
+ systemPrompt: `${input.event.systemPrompt}\n\n[Harness] Headless session complete; ending.`,
1921
+ };
1922
+ }
1739
1923
  activeCtx.phase = policyPhase;
1740
1924
  activeCtx.updated_at = new Date().toISOString();
1741
1925
  activeCtx.pi_session_id = sessionId;
@@ -1811,6 +1995,26 @@ async function handleBeforeAgentStart(input: {
1811
1995
  activeCtx,
1812
1996
  );
1813
1997
  Object.assign(activeCtx, syncedCtx);
1998
+ if (command === "harness-plan" || command === "harness-auto") {
1999
+ const runDir = join(
2000
+ projectRoot,
2001
+ ".pi",
2002
+ "harness",
2003
+ "runs",
2004
+ activeCtx.run_id,
2005
+ );
2006
+ await seedHeadlessTaskClarificationIfNeeded({
2007
+ runDir,
2008
+ taskSummary: activeCtx.task_summary ?? "",
2009
+ riskLevel: planRisk,
2010
+ quick: planQuick,
2011
+ });
2012
+ const resynced = await syncPlanLastOutcomeFromTaskClarification(
2013
+ projectRoot,
2014
+ activeCtx,
2015
+ );
2016
+ Object.assign(activeCtx, resynced);
2017
+ }
1814
2018
  input.active.set(activeCtx);
1815
2019
  persistContext(input.pi, activeCtx);
1816
2020
  if (command === "harness-plan" || command === "harness-auto") {
@@ -1818,13 +2022,12 @@ async function handleBeforeAgentStart(input: {
1818
2022
  }
1819
2023
  let gateBlock = "";
1820
2024
  if (command === "harness-plan" || command === "harness-auto") {
1821
- const quick = parseArgFlag(args, "--quick") != null;
1822
2025
  const gateStatus = await resolvePlanHumanGateStatus(
1823
2026
  projectRoot,
1824
2027
  activeCtx.run_id,
1825
2028
  entries,
1826
2029
  {
1827
- quick,
2030
+ quick: planQuick,
1828
2031
  taskSummary: activeCtx.task_summary ?? undefined,
1829
2032
  lastOutcome: activeCtx.last_outcome ?? undefined,
1830
2033
  },
@@ -1837,6 +2040,138 @@ async function handleBeforeAgentStart(input: {
1837
2040
  };
1838
2041
  }
1839
2042
 
2043
+ async function applyHeadlessPlanFinalizeAndQaSmoke(input: {
2044
+ pi: ExtensionAPI;
2045
+ ctx: any;
2046
+ active: ActiveContextAccess;
2047
+ command: string;
2048
+ args: string;
2049
+ activeCtx: HarnessRunContext;
2050
+ entries: unknown[];
2051
+ }): Promise<void> {
2052
+ const projectRoot = process.cwd();
2053
+ const planQuick = parseArgFlag(input.args, "--quick") != null;
2054
+ const planRisk = parseArgFlag(input.args, "--risk") ?? "med";
2055
+ const outcome = await tryHeadlessAutoPlanFinalize({
2056
+ projectRoot,
2057
+ runCtx: input.activeCtx,
2058
+ taskSummary: input.activeCtx.task_summary ?? "",
2059
+ entries: input.entries,
2060
+ riskLevel: planRisk,
2061
+ quick: planQuick,
2062
+ deps: {
2063
+ appendEntry: (type, data) => input.pi.appendEntry(type, data),
2064
+ getEntries: () => getEntries(input.ctx),
2065
+ getSubagentEntries: () => getEntries(input.ctx),
2066
+ onPlanCommitted: (updated, packet, planPath) => {
2067
+ input.pi.appendEntry("harness-run-context", updated);
2068
+ input.pi.appendEntry(
2069
+ "harness-plan-packet",
2070
+ planPacketSummary(packet, planPath, "ready"),
2071
+ );
2072
+ },
2073
+ },
2074
+ });
2075
+ if (
2076
+ outcome.progress.seeded_clarification ||
2077
+ outcome.progress.seeded_planning_context ||
2078
+ outcome.progress.patched_review_gate ||
2079
+ outcome.progress.wrote_consensus_bypass
2080
+ ) {
2081
+ input.pi.appendEntry("harness-headless-plan-progress", {
2082
+ run_id: input.activeCtx.run_id,
2083
+ ...outcome.progress,
2084
+ recorded_at: nowIso(),
2085
+ });
2086
+ }
2087
+ if (outcome.finalized) {
2088
+ const synced = await syncPlanReadyFromDisk(
2089
+ projectRoot,
2090
+ input.activeCtx,
2091
+ input.entries,
2092
+ );
2093
+ Object.assign(input.activeCtx, synced);
2094
+ persistContext(input.pi, input.activeCtx);
2095
+ input.active.set(input.activeCtx);
2096
+ input.pi.appendEntry("harness-headless-plan-finalized", {
2097
+ run_id: input.activeCtx.run_id,
2098
+ source: "headless_auto",
2099
+ recorded_at: nowIso(),
2100
+ });
2101
+ input.activeCtx.next_recommended_command = "/harness-run";
2102
+ persistContext(input.pi, input.activeCtx);
2103
+ if (input.command === "harness-auto") {
2104
+ await maybeHeadlessQaAutoExecuteSmoke({
2105
+ projectRoot,
2106
+ runCtx: input.activeCtx,
2107
+ command: input.command,
2108
+ });
2109
+ persistContext(input.pi, input.activeCtx);
2110
+ }
2111
+ if (
2112
+ await shouldEndHeadlessHarnessPrintSession({
2113
+ command: input.command,
2114
+ runCtx: input.activeCtx,
2115
+ projectRoot,
2116
+ })
2117
+ ) {
2118
+ endHeadlessHarnessPrintSession(input.ctx);
2119
+ }
2120
+ } else if (outcome.reason && outcome.progress.force_reason) {
2121
+ input.pi.appendEntry("harness-headless-plan-progress", {
2122
+ run_id: input.activeCtx.run_id,
2123
+ finalize_blocked: outcome.reason,
2124
+ recorded_at: nowIso(),
2125
+ });
2126
+ }
2127
+ }
2128
+
2129
+ async function handleHeadlessPlanProgressCheck(input: {
2130
+ pi: ExtensionAPI;
2131
+ ctx: any;
2132
+ active: ActiveContextAccess;
2133
+ }): Promise<void> {
2134
+ const entries = getEntries(input.ctx);
2135
+ const turn = getLatestHarnessTurn(entries);
2136
+ if (
2137
+ !turn ||
2138
+ (turn.command !== "harness-plan" && turn.command !== "harness-auto")
2139
+ ) {
2140
+ return;
2141
+ }
2142
+ const activeCtx = input.active.get() ?? getLatestRunContext(entries);
2143
+ if (!activeCtx?.run_id || activeCtx.plan_ready) return;
2144
+ await applyHeadlessPlanFinalizeAndQaSmoke({
2145
+ pi: input.pi,
2146
+ ctx: input.ctx,
2147
+ active: input.active,
2148
+ command: turn.command,
2149
+ args: turn.args,
2150
+ activeCtx,
2151
+ entries,
2152
+ });
2153
+ }
2154
+
2155
+ async function handleTurnStart(input: {
2156
+ pi: ExtensionAPI;
2157
+ ctx: any;
2158
+ active: ActiveContextAccess;
2159
+ }): Promise<void> {
2160
+ await handleHeadlessPlanProgressCheck(input);
2161
+ }
2162
+
2163
+ async function handlePlanToolResultForHeadlessProgress(input: {
2164
+ pi: ExtensionAPI;
2165
+ ctx: any;
2166
+ active: ActiveContextAccess;
2167
+ }): Promise<void> {
2168
+ const entries = getEntries(input.ctx);
2169
+ const since = Math.max(0, indexOfLastPlanCommand(entries));
2170
+ const sinceEntries = entries.length - since;
2171
+ if (sinceEntries > 0 && sinceEntries % 12 !== 0) return;
2172
+ await handleHeadlessPlanProgressCheck(input);
2173
+ }
2174
+
1840
2175
  async function handleAgentEnd(input: {
1841
2176
  pi: ExtensionAPI;
1842
2177
  ctx: any;
@@ -1871,6 +2206,29 @@ async function handleAgentEnd(input: {
1871
2206
  parsed?.command === "harness-plan" ||
1872
2207
  parsed?.command === "harness-auto"
1873
2208
  ) {
2209
+ const planArgs = parsed.args ?? "";
2210
+ const quick = parseArgFlag(planArgs, "--quick") != null;
2211
+ const risk = parseArgFlag(planArgs, "--risk") ?? "med";
2212
+ const forced = await maybeForceHeadlessPlanProgress({
2213
+ projectRoot,
2214
+ runId: activeCtx.run_id,
2215
+ taskSummary: activeCtx.task_summary ?? "",
2216
+ entries,
2217
+ riskLevel: risk,
2218
+ quick,
2219
+ });
2220
+ if (
2221
+ forced.seeded_clarification ||
2222
+ forced.seeded_planning_context ||
2223
+ forced.patched_review_gate ||
2224
+ forced.wrote_consensus_bypass
2225
+ ) {
2226
+ input.pi.appendEntry("harness-headless-plan-progress", {
2227
+ run_id: activeCtx.run_id,
2228
+ ...forced,
2229
+ recorded_at: nowIso(),
2230
+ });
2231
+ }
1874
2232
  const synced = await syncPlanLastOutcomeFromTaskClarification(
1875
2233
  projectRoot,
1876
2234
  activeCtx,
@@ -1878,6 +2236,41 @@ async function handleAgentEnd(input: {
1878
2236
  Object.assign(activeCtx, synced);
1879
2237
  persistContext(input.pi, activeCtx);
1880
2238
  }
2239
+ if (
2240
+ parsed?.command === "harness-plan" ||
2241
+ parsed?.command === "harness-auto"
2242
+ ) {
2243
+ if (!activeCtx.plan_ready) {
2244
+ await applyHeadlessPlanFinalizeAndQaSmoke({
2245
+ pi: input.pi,
2246
+ ctx: input.ctx,
2247
+ active: input.active,
2248
+ command: parsed.command,
2249
+ args: parsed.args ?? "",
2250
+ activeCtx,
2251
+ entries,
2252
+ });
2253
+ } else if (
2254
+ parsed.command === "harness-auto" &&
2255
+ process.env.HARNESS_QA_SMOKE === "1"
2256
+ ) {
2257
+ await maybeHeadlessQaAutoExecuteSmoke({
2258
+ projectRoot,
2259
+ runCtx: activeCtx,
2260
+ command: parsed.command,
2261
+ });
2262
+ persistContext(input.pi, activeCtx);
2263
+ if (
2264
+ await shouldEndHeadlessHarnessPrintSession({
2265
+ command: parsed.command,
2266
+ runCtx: activeCtx,
2267
+ projectRoot,
2268
+ })
2269
+ ) {
2270
+ endHeadlessHarnessPrintSession(input.ctx);
2271
+ }
2272
+ }
2273
+ }
1881
2274
  const statuses = await resolveCompletionStatuses(
1882
2275
  entries,
1883
2276
  activeCtx.run_id,
@@ -1964,14 +2357,17 @@ async function handleAgentEnd(input: {
1964
2357
  phase: activeCtx.phase,
1965
2358
  });
1966
2359
  if (next && parsed) {
1967
- const notify = `Next: ${next}`;
1968
- if (input.ctx.hasUI) input.ctx.ui.notify(notify, "info");
1969
- else
1970
- input.pi.sendMessage({
1971
- customType: "harness-step-handoff",
1972
- content: notify,
1973
- display: true,
1974
- });
2360
+ notifyHarnessHandoff(input.ctx, `Next: ${next}`);
2361
+ }
2362
+ if (
2363
+ parsed &&
2364
+ (await shouldEndHeadlessHarnessPrintSession({
2365
+ command: parsed.command,
2366
+ runCtx: activeCtx,
2367
+ projectRoot,
2368
+ }))
2369
+ ) {
2370
+ endHeadlessHarnessPrintSession(input.ctx);
1975
2371
  }
1976
2372
  }
1977
2373
 
@@ -2554,6 +2950,44 @@ function registerHarnessRunContextTool4(
2554
2950
  Object.assign(runCtx, synced);
2555
2951
  persistContext(pi, runCtx);
2556
2952
  }
2953
+ if (gate.ok) {
2954
+ const sessionId = ctx.sessionManager.getSessionId();
2955
+ const completedPhases = new Set<string>();
2956
+ for (const rawPath of paths) {
2957
+ const norm = rawPath.replace(/\\/g, "/");
2958
+ const phase = phaseTerminalArtifact(norm);
2959
+ if (!phase || completedPhases.has(phase)) continue;
2960
+ const payload = buildPhaseCompletedPayload(runCtx.run_id, phase);
2961
+ if (payload) {
2962
+ completedPhases.add(phase);
2963
+ captureHarnessEvent(sessionId, "harness_phase_completed", {
2964
+ ...payload,
2965
+ harness_plan_id: runCtx.plan_id ?? "plan-unknown",
2966
+ pi_session_id: sessionId,
2967
+ });
2968
+ pi.appendEntry("harness-phase-completed", payload);
2969
+ }
2970
+ }
2971
+ if (
2972
+ paths.some(
2973
+ (p) => p.replace(/\\/g, "/") === "artifacts/review-outcome.yaml",
2974
+ )
2975
+ ) {
2976
+ captureHarnessEvent(sessionId, "harness_run_completed", {
2977
+ harness_run_id: runCtx.run_id,
2978
+ run_id: runCtx.run_id,
2979
+ harness_plan_id: runCtx.plan_id ?? "plan-unknown",
2980
+ harness_phase: "evaluate",
2981
+ pi_session_id: sessionId,
2982
+ source: "review-outcome_gate",
2983
+ duration_ms: 0,
2984
+ tool_span_count: 0,
2985
+ input_tokens: 0,
2986
+ output_tokens: 0,
2987
+ });
2988
+ }
2989
+ }
2990
+
2557
2991
  const text = gate.ok
2558
2992
  ? `All ${gate.present.length} artifact(s) present and valid.`
2559
2993
  : [
@@ -2599,6 +3033,20 @@ export default function harnessRunContext(pi: ExtensionAPI) {
2599
3033
  },
2600
3034
  };
2601
3035
 
3036
+ pi.events.on("harness-run-aborted", (payload: unknown) => {
3037
+ const reason =
3038
+ typeof (payload as { reason?: unknown })?.reason === "string"
3039
+ ? (payload as { reason: string }).reason || "manual abort"
3040
+ : "manual abort";
3041
+ if (activeCtx) {
3042
+ abortActiveRunContext({ pi, activeCtx, reason });
3043
+ }
3044
+ });
3045
+
3046
+ pi.events.on("harness-runs-cleared", () => {
3047
+ activeCtx = null;
3048
+ });
3049
+
2602
3050
  pi.on("session_start", async (_event, ctx) => {
2603
3051
  const entries = getEntries(ctx);
2604
3052
  activeCtx = hydrateFromSession(entries);
@@ -2629,11 +3077,16 @@ export default function harnessRunContext(pi: ExtensionAPI) {
2629
3077
  handleBeforeAgentStart({ pi, event, ctx, active: activeAccess }),
2630
3078
  );
2631
3079
 
3080
+ pi.on("turn_start", async (_event, ctx) => {
3081
+ await handleTurnStart({ pi, ctx, active: activeAccess });
3082
+ });
3083
+
2632
3084
  pi.on("agent_end", async (_event, ctx) => {
2633
3085
  await handleAgentEnd({ pi, ctx, active: activeAccess });
2634
3086
  });
2635
3087
 
2636
3088
  registerPlanApprovalCapture(pi, activeAccess);
3089
+ registerHeadlessPlanProgressWatcher(pi, activeAccess);
2637
3090
  registerExecutorHandoffReconcile(pi, activeAccess);
2638
3091
  registerHarnessToolCallGuards(pi, activeAccess);
2639
3092
  registerHarnessRunStatusCommand(pi, activeAccess);