ultimate-pi 0.17.0 → 0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/.agents/skills/harness-context/SKILL.md +13 -6
  2. package/.agents/skills/harness-debate-plan/SKILL.md +37 -20
  3. package/.agents/skills/harness-decisions/SKILL.md +1 -1
  4. package/.agents/skills/harness-eval/SKILL.md +6 -21
  5. package/.agents/skills/harness-governor/SKILL.md +4 -3
  6. package/.agents/skills/harness-orchestration/SKILL.md +41 -53
  7. package/.agents/skills/harness-plan/SKILL.md +23 -12
  8. package/.agents/skills/harness-review/SKILL.md +52 -0
  9. package/.agents/skills/harness-sentrux-setup/SKILL.md +16 -3
  10. package/.agents/skills/harness-steer/SKILL.md +14 -0
  11. package/.agents/skills/sentrux/SKILL.md +9 -9
  12. package/.pi/agents/harness/planning/decompose.md +7 -4
  13. package/.pi/agents/harness/planning/hypothesis-validator.md +2 -0
  14. package/.pi/agents/harness/planning/hypothesis.md +3 -1
  15. package/.pi/agents/harness/planning/plan-adversary.md +2 -0
  16. package/.pi/agents/harness/planning/plan-evaluator.md +2 -0
  17. package/.pi/agents/harness/planning/plan-synthesizer.md +25 -0
  18. package/.pi/agents/harness/planning/planning-context.md +48 -0
  19. package/.pi/agents/harness/planning/review-integrator.md +2 -0
  20. package/.pi/agents/harness/planning/sprint-contract-auditor.md +2 -0
  21. package/.pi/agents/harness/{adversary.md → reviewing/adversary.md} +3 -10
  22. package/.pi/agents/harness/{evaluator.md → reviewing/evaluator.md} +3 -12
  23. package/.pi/agents/harness/running/executor.md +45 -0
  24. package/.pi/agents/harness/sentrux-steward.md +51 -0
  25. package/.pi/extensions/00-harness-project-control.ts +133 -0
  26. package/.pi/extensions/00-posthog-network-bootstrap.ts +11 -0
  27. package/.pi/extensions/budget-guard.ts +2 -0
  28. package/.pi/extensions/debate-orchestrator.ts +2 -0
  29. package/.pi/extensions/harness-ask-user.ts +2 -2
  30. package/.pi/extensions/harness-debate-tools.ts +2 -2
  31. package/.pi/extensions/harness-live-widget.ts +60 -3
  32. package/.pi/extensions/harness-plan-approval.ts +64 -58
  33. package/.pi/extensions/harness-run-context.ts +715 -90
  34. package/.pi/extensions/harness-subagent-submit.ts +46 -12
  35. package/.pi/extensions/harness-subagents.ts +2 -2
  36. package/.pi/extensions/harness-telemetry.ts +2 -0
  37. package/.pi/extensions/harness-web-tools.ts +2 -2
  38. package/.pi/extensions/lib/extension-load-guard.ts +10 -0
  39. package/.pi/extensions/lib/harness-artifact-gate.ts +172 -0
  40. package/.pi/extensions/lib/harness-posthog.ts +9 -5
  41. package/.pi/extensions/lib/harness-spawn-topology.ts +165 -0
  42. package/.pi/extensions/lib/harness-subagent-auth.ts +1 -2
  43. package/.pi/extensions/lib/harness-subagent-policy.ts +28 -24
  44. package/.pi/extensions/lib/harness-subagent-precheck.ts +36 -10
  45. package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +66 -2
  46. package/.pi/extensions/lib/harness-subagent-submit-registry.ts +22 -22
  47. package/.pi/extensions/lib/harness-subagents-bridge.ts +7 -29
  48. package/.pi/extensions/lib/harness-subprocess-bootstrap.ts +73 -0
  49. package/.pi/extensions/lib/plan-approval/create-plan.ts +2 -3
  50. package/.pi/extensions/lib/plan-approval/resolve-disk.ts +102 -0
  51. package/.pi/extensions/lib/plan-approval/schema.ts +22 -8
  52. package/.pi/extensions/lib/plan-approval/types.ts +1 -1
  53. package/.pi/extensions/lib/plan-approval/validate.ts +2 -2
  54. package/.pi/extensions/lib/plan-approval-readiness.ts +192 -0
  55. package/.pi/extensions/lib/plan-debate-eligibility.ts +12 -5
  56. package/.pi/extensions/lib/plan-debate-gate.ts +22 -1
  57. package/.pi/extensions/lib/plan-debate-lanes.ts +32 -2
  58. package/.pi/extensions/lib/plan-review-gate.ts +8 -0
  59. package/.pi/extensions/lib/posthog-client.ts +76 -0
  60. package/.pi/extensions/lib/spawn-policy.ts +3 -3
  61. package/.pi/extensions/observation-bus.ts +2 -0
  62. package/.pi/extensions/policy-gate.ts +26 -19
  63. package/.pi/extensions/review-integrity.ts +91 -10
  64. package/.pi/extensions/sentrux-rules-sync.ts +2 -0
  65. package/.pi/extensions/test-diff-integrity.ts +1 -0
  66. package/.pi/extensions/trace-recorder.ts +2 -0
  67. package/.pi/harness/agents.manifest.json +37 -37
  68. package/.pi/harness/corpus/cron.example +8 -0
  69. package/.pi/harness/corpus/graphify-kb-updater.config.json +214 -0
  70. package/.pi/harness/corpus/systemd/graphify-kb-updater.env.template +4 -0
  71. package/.pi/harness/corpus/systemd/graphify-kb-updater.service +17 -0
  72. package/.pi/harness/corpus/systemd/graphify-kb-updater.timer +11 -0
  73. package/.pi/harness/docs/adrs/0001-harness-constitution.md +2 -1
  74. package/.pi/harness/docs/adrs/0006-sentrux-dual-layer.md +8 -6
  75. package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +6 -1
  76. package/.pi/harness/docs/adrs/0031-harness-run-context.md +1 -1
  77. package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +7 -0
  78. package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +3 -3
  79. package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +8 -5
  80. package/.pi/harness/docs/adrs/0039-harness-post-run-review-gate.md +47 -0
  81. package/.pi/harness/docs/adrs/0040-practice-grounded-orchestration.md +40 -0
  82. package/.pi/harness/docs/adrs/0041-intelligent-planning-reconnaissance.md +39 -0
  83. package/.pi/harness/docs/adrs/0042-agent-native-orchestration.md +35 -0
  84. package/.pi/harness/docs/adrs/0043-path-first-harness-tools.md +38 -0
  85. package/.pi/harness/docs/adrs/0044-harness-steer-loop.md +37 -0
  86. package/.pi/harness/docs/adrs/0045-phase-scoped-agent-directories.md +33 -0
  87. package/.pi/harness/docs/adrs/README.md +11 -0
  88. package/.pi/harness/docs/graphify-kb-updater-runbook.md +163 -0
  89. package/.pi/harness/docs/practice-map.md +110 -0
  90. package/.pi/harness/env.harness.template +5 -3
  91. package/.pi/harness/evals/smoke/sentrux-stub.json +1 -1
  92. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +5 -2
  93. package/.pi/harness/specs/README.md +1 -1
  94. package/.pi/harness/specs/harness-run-context.schema.json +11 -0
  95. package/.pi/harness/specs/harness-spawn-context.schema.json +15 -1
  96. package/.pi/harness/specs/plan-execution-plan.schema.json +39 -1
  97. package/.pi/harness/specs/plan-packet.schema.json +4 -0
  98. package/.pi/harness/specs/plan-phase-status.schema.json +17 -0
  99. package/.pi/harness/specs/plan-phase-waiver.schema.json +25 -0
  100. package/.pi/harness/specs/plan-planning-context.schema.json +50 -0
  101. package/.pi/harness/specs/repair-brief.schema.json +45 -0
  102. package/.pi/harness/specs/review-outcome.schema.json +46 -0
  103. package/.pi/harness/specs/sentrux-manifest-proposal.schema.json +80 -0
  104. package/.pi/harness/specs/sentrux-signal.schema.json +43 -0
  105. package/.pi/harness/specs/steer-state.schema.json +20 -0
  106. package/.pi/lib/harness-context-mode-policy.ts +256 -0
  107. package/.pi/lib/harness-project-config.ts +91 -0
  108. package/.pi/lib/harness-repair-brief.ts +145 -0
  109. package/.pi/lib/harness-run-context.ts +591 -32
  110. package/.pi/lib/harness-ui-state.ts +114 -21
  111. package/.pi/prompts/harness-auto.md +10 -10
  112. package/.pi/prompts/harness-critic.md +3 -30
  113. package/.pi/prompts/harness-eval.md +4 -37
  114. package/.pi/prompts/harness-plan.md +116 -54
  115. package/.pi/prompts/harness-review.md +150 -15
  116. package/.pi/prompts/harness-run.md +62 -10
  117. package/.pi/prompts/harness-sentrux-steward.md +55 -0
  118. package/.pi/prompts/harness-setup.md +5 -4
  119. package/.pi/prompts/harness-steer.md +30 -0
  120. package/.pi/scripts/README.md +1 -0
  121. package/.pi/scripts/graphify-kb-updater.mjs +398 -0
  122. package/.pi/scripts/harness-agents-manifest.mjs +1 -1
  123. package/.pi/scripts/harness-project-toggle.mjs +129 -0
  124. package/.pi/scripts/harness-sentrux-cli.mjs +142 -0
  125. package/.pi/scripts/harness-verify.mjs +22 -6
  126. package/.pi/scripts/harness-web-policy-guard.mjs +68 -0
  127. package/.pi/scripts/validate-plan-dag.mjs +3 -3
  128. package/AGENTS.md +1 -0
  129. package/CHANGELOG.md +23 -0
  130. package/README.md +94 -58
  131. package/package.json +5 -4
  132. package/.pi/agents/harness/executor.md +0 -47
  133. package/.pi/agents/harness/planning/scout-graphify.md +0 -37
  134. package/.pi/agents/harness/planning/scout-semantic.md +0 -39
  135. package/.pi/agents/harness/planning/scout-structure.md +0 -35
  136. package/.pi/prompts/git-sync.md +0 -124
  137. /package/.pi/agents/harness/{tie-breaker.md → reviewing/tie-breaker.md} +0 -0
@@ -37,6 +37,10 @@ export interface HarnessRunContext {
37
37
  updated_at: string;
38
38
  harness_run_started_emitted?: boolean;
39
39
  turn_override_run_id?: string | null;
40
+ /** Persisted steer gate approval (cross-session via run-context.yaml). */
41
+ steer_approved?: boolean;
42
+ steer_attempt?: number;
43
+ steer_max_attempts?: number;
40
44
  }
41
45
 
42
46
  export interface ProjectActiveRunPointer {
@@ -84,6 +88,7 @@ const HARNESS_COMMANDS = new Set([
84
88
  "harness-run",
85
89
  "harness-eval",
86
90
  "harness-review",
91
+ "harness-steer",
87
92
  "harness-critic",
88
93
  "harness-trace",
89
94
  "harness-incident",
@@ -147,6 +152,24 @@ const PLAN_RUN_SCOPED_ROOT_FILES = new Set([
147
152
  PLAN_REVIEW_BASENAME,
148
153
  ]);
149
154
 
155
+ /** Parent orchestrator artifacts writable during evaluate/adversary (ADR 0044). */
156
+ export const EVALUATE_PHASE_ORCHESTRATOR_ARTIFACTS = new Set([
157
+ "benchmark-log.yaml",
158
+ "review-outcome.yaml",
159
+ "repair-brief.yaml",
160
+ "steer-state.yaml",
161
+ "eval-benchmark.yaml",
162
+ ]);
163
+
164
+ export const DEFAULT_STEER_MAX_ATTEMPTS = 3;
165
+
166
+ export function steerMaxAttemptsFromEnv(): number {
167
+ const raw = process.env.HARNESS_STEER_MAX_ATTEMPTS?.trim();
168
+ if (!raw) return DEFAULT_STEER_MAX_ATTEMPTS;
169
+ const n = Number.parseInt(raw, 10);
170
+ return Number.isFinite(n) && n > 0 ? n : DEFAULT_STEER_MAX_ATTEMPTS;
171
+ }
172
+
150
173
  const MUTATING_FILE_TOOLS = new Set(["write", "edit"]);
151
174
 
152
175
  const PLAN_APPROVE_OPTION =
@@ -179,6 +202,7 @@ export const HARNESS_COMMAND_PHASE: Record<string, HarnessPhase> = {
179
202
  "harness-run": "execute",
180
203
  "harness-eval": "evaluate",
181
204
  "harness-review": "evaluate",
205
+ "harness-steer": "execute",
182
206
  "harness-critic": "adversary",
183
207
  "harness-trace": "evaluate",
184
208
  "harness-incident": "evaluate",
@@ -241,9 +265,49 @@ export function isPlanRunScopedRelativePath(rel: string): boolean {
241
265
  const file = parts[2];
242
266
  return file.endsWith(".yaml") || file.endsWith(".yml");
243
267
  }
268
+ if (
269
+ parts.length === 4 &&
270
+ parts[1] === "artifacts" &&
271
+ parts[2] === "context-bundles" &&
272
+ (parts[3].endsWith(".yaml") || parts[3].endsWith(".yml"))
273
+ ) {
274
+ return true;
275
+ }
244
276
  return false;
245
277
  }
246
278
 
279
+ export function isEvaluatePhaseOrchestratorArtifact(rel: string): boolean {
280
+ if (rel.startsWith("..") || isAbsolute(rel)) return false;
281
+ const parts = rel.split(/[/\\]/);
282
+ if (parts.length !== 3 || parts[1] !== "artifacts") return false;
283
+ return EVALUATE_PHASE_ORCHESTRATOR_ARTIFACTS.has(parts[2]);
284
+ }
285
+
286
+ async function planRunScopedRelative(
287
+ absPath: string,
288
+ runCtx: HarnessRunContext,
289
+ projectRoot: string,
290
+ ): Promise<string | null> {
291
+ let resolved: string;
292
+ try {
293
+ resolved = await realpath(normalizeHarnessPath(absPath, projectRoot));
294
+ } catch {
295
+ resolved = normalizeHarnessPath(absPath, projectRoot);
296
+ }
297
+ const runsRoot = resolve(harnessRunsRoot(projectRoot));
298
+ let runsReal: string;
299
+ try {
300
+ runsReal = await realpath(runsRoot);
301
+ } catch {
302
+ runsReal = runsRoot;
303
+ }
304
+ const rel = relative(runsReal, resolved);
305
+ if (!isPlanRunScopedRelativePath(rel)) return null;
306
+ const parts = rel.split(/[/\\]/);
307
+ if (parts[0] !== runCtx.run_id) return null;
308
+ return rel;
309
+ }
310
+
247
311
  /** True when absPath is a writable plan-run artifact for the active run. */
248
312
  export async function isPlanPhaseScopedWrite(
249
313
  absPath: string,
@@ -502,7 +566,8 @@ export async function isPlanPhaseAllowedMutation(
502
566
  if (
503
567
  runCtx?.owner_pi_session_id &&
504
568
  opts.currentSessionId &&
505
- runCtx.owner_pi_session_id !== opts.currentSessionId
569
+ runCtx.owner_pi_session_id !== opts.currentSessionId &&
570
+ !isHarnessSubprocess()
506
571
  ) {
507
572
  return {
508
573
  allowed: false,
@@ -540,10 +605,16 @@ export async function isPlanPhaseAllowedMutation(
540
605
  if (phase === "execute" || phase === "merge") {
541
606
  return { allowed: true, isScopedPlanWrite: true };
542
607
  }
608
+ if (phase === "evaluate" || phase === "adversary") {
609
+ const rel = await planRunScopedRelative(target, runCtx, projectRoot);
610
+ if (rel && isEvaluatePhaseOrchestratorArtifact(rel)) {
611
+ return { allowed: true, isScopedPlanWrite: true };
612
+ }
613
+ }
543
614
  return {
544
615
  allowed: false,
545
616
  isScopedPlanWrite: true,
546
- reason: `harness-run-context: plan-packet.yaml is read-only in phase '${phase}'.`,
617
+ reason: `harness-run-context: plan-run artifact is read-only in phase '${phase}'.`,
547
618
  };
548
619
  }
549
620
 
@@ -844,6 +915,97 @@ export function isStaleActiveRunPointer(
844
915
  return ageMs > activeRunTtlHours() * 60 * 60 * 1000;
845
916
  }
846
917
 
918
+ export interface CrossSessionResumeInfo {
919
+ runId: string;
920
+ resumeCommand: string;
921
+ phase: HarnessPhase;
922
+ planReady: boolean;
923
+ nextAfterResume: string | null;
924
+ taskSummary: string | null;
925
+ }
926
+
927
+ /** True when this session already showed the cross-session resume prompt for runId. */
928
+ export function sessionHasResumePromptForRun(
929
+ entries: unknown[],
930
+ runId: string,
931
+ ): boolean {
932
+ for (let i = entries.length - 1; i >= 0; i--) {
933
+ const entry = entries[i] as SessionEntryLike;
934
+ if (entry.type !== "custom") continue;
935
+ if (entry.customType !== "harness-session-resume-prompt") continue;
936
+ const data = entry.data as { run_id?: string } | undefined;
937
+ return data?.run_id === runId;
938
+ }
939
+ return false;
940
+ }
941
+
942
+ export function formatCrossSessionResumeMessage(
943
+ info: CrossSessionResumeInfo,
944
+ ): string {
945
+ const lines = [
946
+ "Previous Pi session left an active harness run on disk.",
947
+ ` run_id: ${info.runId}`,
948
+ ` phase: ${info.phase}`,
949
+ info.planReady ? " plan: approved" : " plan: not ready",
950
+ ];
951
+ if (info.taskSummary) {
952
+ const summary =
953
+ info.taskSummary.length > 80
954
+ ? `${info.taskSummary.slice(0, 77)}...`
955
+ : info.taskSummary;
956
+ lines.push(` task: ${summary}`);
957
+ }
958
+ lines.push("", `Resume this session with: ${info.resumeCommand}`);
959
+ if (info.nextAfterResume) {
960
+ lines.push(`After binding, next step: ${info.nextAfterResume}`);
961
+ }
962
+ return lines.join("\n");
963
+ }
964
+
965
+ export async function resolveCrossSessionResumeInfo(
966
+ projectRoot: string,
967
+ pointer: ProjectActiveRunPointer,
968
+ ): Promise<CrossSessionResumeInfo | null> {
969
+ if (isStaleActiveRunPointer(pointer, projectRoot)) return null;
970
+ const disk = await loadRunContextFromDisk(pointer.run_id, projectRoot);
971
+ if (!disk || disk.status === "completed") return null;
972
+ const resumeCommand = `/harness-use-run ${pointer.run_id} --claim`;
973
+ const statuses = await resolveCompletionStatuses(
974
+ [],
975
+ pointer.run_id,
976
+ projectRoot,
977
+ );
978
+ const nextAfterResume = nextStepAfterOutcome({
979
+ phase: disk.phase,
980
+ planStatus: disk.plan_ready ? "ready" : null,
981
+ lastCompletedStep: disk.last_completed_step,
982
+ lastOutcome: disk.last_outcome,
983
+ executionStatus: statuses.executionStatus,
984
+ evalStatus: statuses.evalStatus,
985
+ adversaryComplete: statuses.adversaryComplete,
986
+ aborted: disk.status === "aborted",
987
+ });
988
+ return {
989
+ runId: pointer.run_id,
990
+ resumeCommand,
991
+ phase: disk.phase,
992
+ planReady: disk.plan_ready,
993
+ nextAfterResume,
994
+ taskSummary: disk.task_summary,
995
+ };
996
+ }
997
+
998
+ /** Offer resume when disk has an active run but this Pi session is not bound yet. */
999
+ export async function evaluateCrossSessionResume(
1000
+ projectRoot: string,
1001
+ entries: unknown[],
1002
+ ): Promise<CrossSessionResumeInfo | null> {
1003
+ if (getLatestRunContext(entries)) return null;
1004
+ const pointer = await loadProjectActiveRun(projectRoot);
1005
+ if (!pointer) return null;
1006
+ return resolveCrossSessionResumeInfo(projectRoot, pointer);
1007
+ }
1008
+
847
1009
  export async function readPlanPacketFromPath(
848
1010
  planPath: string,
849
1011
  ): Promise<PlanPacketLike | null> {
@@ -906,31 +1068,78 @@ export function planPacketSummary(
906
1068
  };
907
1069
  }
908
1070
 
1071
+ export function criticalPathWorkItemIdsFromPlanPacket(
1072
+ packet: PlanPacketLike | null | undefined,
1073
+ ): string[] | undefined {
1074
+ if (!packet?.execution_plan || typeof packet.execution_plan !== "object") {
1075
+ return undefined;
1076
+ }
1077
+ const ep = packet.execution_plan as Record<string, unknown>;
1078
+ const meta = ep.schedule_metadata;
1079
+ if (!meta || typeof meta !== "object") return undefined;
1080
+ const ids = (meta as Record<string, unknown>).critical_path_work_item_ids;
1081
+ if (!Array.isArray(ids)) return undefined;
1082
+ const out = ids.map((id) => String(id).trim()).filter((id) => id.length > 0);
1083
+ return out.length > 0 ? out : undefined;
1084
+ }
1085
+
909
1086
  export function buildHarnessSpawnContextSnippet(
910
1087
  ctx: HarnessRunContext,
911
- opts?: { mode?: "create" | "revise"; risk_level?: string; quick?: boolean },
1088
+ opts?: {
1089
+ mode?:
1090
+ | "create"
1091
+ | "revise"
1092
+ | "execute"
1093
+ | "repair"
1094
+ | "benchmark"
1095
+ | "verdict"
1096
+ | "adversary";
1097
+ risk_level?: string;
1098
+ quick?: boolean;
1099
+ critical_path_work_item_ids?: string[];
1100
+ repair_brief_path?: string;
1101
+ },
912
1102
  ): string {
913
1103
  const mode =
914
1104
  opts?.mode ??
915
1105
  (ctx.plan_ready || ctx.status === "aborted" ? "revise" : "create");
916
- return JSON.stringify(
917
- {
918
- schema_version: "1.0.0",
919
- run_id: ctx.run_id,
920
- plan_packet_path: ctx.plan_packet_path,
921
- task_summary: ctx.task_summary,
922
- mode,
923
- risk_level: opts?.risk_level ?? "med",
924
- quick: opts?.quick ?? false,
925
- },
926
- null,
927
- 2,
928
- );
1106
+ const body: Record<string, unknown> = {
1107
+ schema_version: "1.0.0",
1108
+ run_id: ctx.run_id,
1109
+ plan_packet_path: ctx.plan_packet_path,
1110
+ task_summary: ctx.task_summary,
1111
+ mode,
1112
+ risk_level: opts?.risk_level ?? "med",
1113
+ quick: opts?.quick ?? false,
1114
+ };
1115
+ if (
1116
+ opts?.critical_path_work_item_ids &&
1117
+ opts.critical_path_work_item_ids.length > 0
1118
+ ) {
1119
+ body.critical_path_work_item_ids = opts.critical_path_work_item_ids;
1120
+ }
1121
+ if (opts?.repair_brief_path) {
1122
+ body.repair_brief_path = opts.repair_brief_path;
1123
+ }
1124
+ return JSON.stringify(body, null, 2);
929
1125
  }
930
1126
 
931
1127
  export function formatPlanContextBlock(
932
1128
  ctx: HarnessRunContext,
933
- opts?: { mode?: "create" | "revise"; risk_level?: string; quick?: boolean },
1129
+ opts?: {
1130
+ mode?:
1131
+ | "create"
1132
+ | "revise"
1133
+ | "execute"
1134
+ | "repair"
1135
+ | "benchmark"
1136
+ | "verdict"
1137
+ | "adversary";
1138
+ risk_level?: string;
1139
+ quick?: boolean;
1140
+ critical_path_work_item_ids?: string[];
1141
+ repair_brief_path?: string;
1142
+ },
934
1143
  ): string {
935
1144
  const lines = [
936
1145
  "[HarnessRunContext]",
@@ -952,6 +1161,14 @@ export function formatPlanContextBlock(
952
1161
  if (ctx.task_summary) {
953
1162
  lines.push(`task_summary=${ctx.task_summary}`);
954
1163
  }
1164
+ if (
1165
+ opts?.critical_path_work_item_ids &&
1166
+ opts.critical_path_work_item_ids.length > 0
1167
+ ) {
1168
+ lines.push(
1169
+ `critical_path_work_item_ids=${opts.critical_path_work_item_ids.join(",")}`,
1170
+ );
1171
+ }
955
1172
  lines.push(
956
1173
  `HarnessSpawnContext=${buildHarnessSpawnContextSnippet(ctx, opts)}`,
957
1174
  );
@@ -1006,6 +1223,160 @@ export function parseArgFlag(args: string, flag: string): string | null {
1006
1223
  return m[2] ?? m[3] ?? null;
1007
1224
  }
1008
1225
 
1226
+ export function hasHarnessArgFlag(args: string, flag: string): boolean {
1227
+ return new RegExp(`(?:^|\\s)${flag}(?:\\s|$)`).test(args.trim());
1228
+ }
1229
+
1230
+ /** Split slash-command args into flags and positional tokens (run-id, task text, etc.). */
1231
+ export function parseHarnessArgTokens(args: string): {
1232
+ flags: Set<string>;
1233
+ positional: string[];
1234
+ } {
1235
+ const flags = new Set<string>();
1236
+ const positional: string[] = [];
1237
+ for (const raw of args.trim().split(/\s+/)) {
1238
+ if (!raw) continue;
1239
+ if (raw.startsWith("--")) {
1240
+ flags.add(raw);
1241
+ continue;
1242
+ }
1243
+ positional.push(raw);
1244
+ }
1245
+ return { flags, positional };
1246
+ }
1247
+
1248
+ export interface HarnessUseRunArgs {
1249
+ runId: string | null;
1250
+ claim: boolean;
1251
+ readonly: boolean;
1252
+ }
1253
+
1254
+ export function parseHarnessUseRunArgs(args: string): HarnessUseRunArgs {
1255
+ const { flags, positional } = parseHarnessArgTokens(args);
1256
+ return {
1257
+ runId: positional[0] ?? null,
1258
+ claim: flags.has("--claim"),
1259
+ readonly: flags.has("--readonly"),
1260
+ };
1261
+ }
1262
+
1263
+ /** Post-run orchestration commands that may take ownership of a resumed run. */
1264
+ export const HARNESS_POST_RUN_CLAIM_COMMANDS = new Set([
1265
+ "harness-review",
1266
+ "harness-steer",
1267
+ "harness-eval",
1268
+ "harness-critic",
1269
+ ]);
1270
+
1271
+ export function shouldAutoClaimHarnessRun(
1272
+ command: string,
1273
+ args: string,
1274
+ ): boolean {
1275
+ if (hasHarnessArgFlag(args, "--readonly")) return false;
1276
+ return HARNESS_POST_RUN_CLAIM_COMMANDS.has(command);
1277
+ }
1278
+
1279
+ export function claimRunOwnership(
1280
+ ctx: HarnessRunContext,
1281
+ sessionId: string,
1282
+ ): HarnessRunContext {
1283
+ return {
1284
+ ...ctx,
1285
+ pi_session_id: sessionId,
1286
+ owner_pi_session_id: sessionId,
1287
+ updated_at: nowIso(),
1288
+ };
1289
+ }
1290
+
1291
+ export interface EvalVerdictDisk {
1292
+ status?: string;
1293
+ recommended_action?: string;
1294
+ }
1295
+
1296
+ export interface AdversaryReportDisk {
1297
+ block_merge?: boolean;
1298
+ severity?: string;
1299
+ }
1300
+
1301
+ export async function readEvalVerdictFromRun(
1302
+ runId: string,
1303
+ projectRoot: string,
1304
+ ): Promise<EvalVerdictDisk | null> {
1305
+ try {
1306
+ const path = join(
1307
+ harnessRunsRoot(projectRoot),
1308
+ runId,
1309
+ "artifacts",
1310
+ "eval-verdict.yaml",
1311
+ );
1312
+ return (await readYamlFile(path, "eval-verdict")) as EvalVerdictDisk;
1313
+ } catch {
1314
+ return null;
1315
+ }
1316
+ }
1317
+
1318
+ export async function readAdversaryReportFromRun(
1319
+ runId: string,
1320
+ projectRoot: string,
1321
+ ): Promise<AdversaryReportDisk | null> {
1322
+ try {
1323
+ const path = join(
1324
+ harnessRunsRoot(projectRoot),
1325
+ runId,
1326
+ "artifacts",
1327
+ "adversary-report.yaml",
1328
+ );
1329
+ return (await readYamlFile(
1330
+ path,
1331
+ "adversary-report",
1332
+ )) as AdversaryReportDisk;
1333
+ } catch {
1334
+ return null;
1335
+ }
1336
+ }
1337
+
1338
+ export interface CompletionStatuses {
1339
+ planStatus: string | null;
1340
+ executionStatus: string | null;
1341
+ evalStatus: string | null;
1342
+ adversaryComplete: boolean;
1343
+ }
1344
+
1345
+ /** Session handoff entries overlaid with canonical on-disk post-run artifacts. */
1346
+ export async function resolveCompletionStatuses(
1347
+ entries: unknown[],
1348
+ runId: string | null,
1349
+ projectRoot: string,
1350
+ ): Promise<CompletionStatuses> {
1351
+ const session = extractCompletionStatuses(entries);
1352
+ if (!runId) {
1353
+ return { ...session, adversaryComplete: false };
1354
+ }
1355
+
1356
+ let evalStatus = session.evalStatus;
1357
+ let executionStatus = session.executionStatus;
1358
+
1359
+ const verdict = await readEvalVerdictFromRun(runId, projectRoot);
1360
+ if (verdict?.status) {
1361
+ evalStatus = verdict.status;
1362
+ }
1363
+
1364
+ const handoff = await readExecutorHandoffFromRun(runId, projectRoot);
1365
+ if (handoff?.execution_status && !executionStatus) {
1366
+ executionStatus = handoff.execution_status;
1367
+ }
1368
+
1369
+ const adversary = await readAdversaryReportFromRun(runId, projectRoot);
1370
+ const adversaryComplete = adversary != null;
1371
+
1372
+ return {
1373
+ planStatus: session.planStatus,
1374
+ executionStatus,
1375
+ evalStatus,
1376
+ adversaryComplete,
1377
+ };
1378
+ }
1379
+
1009
1380
  export function resolveArgsForCommand(
1010
1381
  command: string,
1011
1382
  args: string,
@@ -1026,8 +1397,11 @@ export function resolveArgsForCommand(
1026
1397
  }
1027
1398
 
1028
1399
  if (command === "harness-use-run" && args.trim()) {
1029
- runId = args.trim().split(/\s+/)[0] ?? runId;
1030
- overrideRun = true;
1400
+ const parsed = parseHarnessUseRunArgs(args);
1401
+ if (parsed.runId) {
1402
+ runId = parsed.runId;
1403
+ overrideRun = true;
1404
+ }
1031
1405
  }
1032
1406
 
1033
1407
  return { runId, planPath, overrideRun };
@@ -1129,7 +1503,9 @@ export function inferHarnessPhaseFromPrompt(prompt: string): HarnessPhase {
1129
1503
  if (p.startsWith("/harness-plan") || p.startsWith("/harness-auto")) {
1130
1504
  return "plan";
1131
1505
  }
1132
- if (p.startsWith("/harness-run")) return "execute";
1506
+ if (p.startsWith("/harness-run") || p.startsWith("/harness-steer")) {
1507
+ return "execute";
1508
+ }
1133
1509
  if (p.startsWith("/harness-eval") || p.startsWith("/harness-review")) {
1134
1510
  return "evaluate";
1135
1511
  }
@@ -1262,13 +1638,51 @@ export function isNewTaskPlanBlocked(
1262
1638
  return newTask.length > 0 && prior.length > 0;
1263
1639
  }
1264
1640
 
1641
+ export type RemediationClass =
1642
+ | "pass"
1643
+ | "implementation_gap"
1644
+ | "plan_gap"
1645
+ | "rollback"
1646
+ | "inconclusive";
1647
+
1648
+ export interface ReviewOutcomeLike {
1649
+ schema_version?: string;
1650
+ status?: string;
1651
+ remediation_class?: RemediationClass | string;
1652
+ recommended_next?: string;
1653
+ }
1654
+
1655
+ export async function readReviewOutcomeFromRun(
1656
+ runId: string,
1657
+ projectRoot: string,
1658
+ ): Promise<ReviewOutcomeLike | null> {
1659
+ try {
1660
+ const path = join(
1661
+ harnessRunsRoot(projectRoot),
1662
+ runId,
1663
+ "artifacts",
1664
+ "review-outcome.yaml",
1665
+ );
1666
+ return (await readYamlFile(path, "review-outcome")) as ReviewOutcomeLike;
1667
+ } catch {
1668
+ return null;
1669
+ }
1670
+ }
1671
+
1265
1672
  export function nextStepAfterOutcome(input: {
1266
1673
  phase: HarnessPhase;
1267
1674
  planStatus?: string | null;
1268
1675
  executionStatus?: string | null;
1269
1676
  evalStatus?: string | null;
1677
+ lastCompletedStep?: string | null;
1678
+ lastOutcome?: string | null;
1270
1679
  policyDecision?: string | null;
1271
1680
  aborted?: boolean;
1681
+ adversaryComplete?: boolean;
1682
+ remediationClass?: string | null;
1683
+ steerAttempt?: number;
1684
+ steerMaxAttempts?: number;
1685
+ reviewComplete?: boolean;
1272
1686
  }): string {
1273
1687
  if (input.aborted) {
1274
1688
  return '/harness-plan "<task>"';
@@ -1277,26 +1691,103 @@ export function nextStepAfterOutcome(input: {
1277
1691
  if (plan === "needs_clarification") {
1278
1692
  return "Reply with answers or run /harness-plan with updates";
1279
1693
  }
1280
- if (input.phase === "plan" && plan === "ready") return "/harness-run";
1281
- if (input.phase === "execute") {
1282
- const exec = (input.executionStatus ?? "").toLowerCase();
1283
- if (exec === "blocked" || exec === "scope_drift") {
1284
- return "/harness-plan or /harness-abort";
1694
+
1695
+ const lastStep = (input.lastCompletedStep ?? "").toLowerCase();
1696
+ const exec = (input.executionStatus ?? "").toLowerCase();
1697
+ const lastOutcome = (input.lastOutcome ?? "").toLowerCase();
1698
+ const evalSt = (input.evalStatus ?? "").toLowerCase();
1699
+ const remediation = (input.remediationClass ?? "").toLowerCase();
1700
+ const steerAttempt = input.steerAttempt ?? 0;
1701
+ const steerMax = input.steerMaxAttempts ?? steerMaxAttemptsFromEnv();
1702
+
1703
+ const executionResolved = exec || (lastStep === "execute" ? lastOutcome : "");
1704
+
1705
+ const executeFinished =
1706
+ executionResolved === "completed" ||
1707
+ (lastStep === "execute" && input.phase === "evaluate") ||
1708
+ lastStep === "steer";
1709
+
1710
+ if (
1711
+ (executionResolved === "blocked" || executionResolved === "scope_drift") &&
1712
+ !input.reviewComplete &&
1713
+ lastStep !== "review"
1714
+ ) {
1715
+ return "/harness-review";
1716
+ }
1717
+
1718
+ if (input.phase === "plan" && plan === "ready") {
1719
+ return "/harness-run";
1720
+ }
1721
+
1722
+ if (executeFinished && !input.reviewComplete && lastStep !== "review") {
1723
+ return "/harness-review";
1724
+ }
1725
+
1726
+ if (input.phase === "execute" && lastStep === "steer") {
1727
+ return "/harness-review";
1728
+ }
1729
+
1730
+ if (input.phase === "execute" && !executeFinished) {
1731
+ return "/harness-run-status";
1732
+ }
1733
+
1734
+ if (input.phase === "evaluate" || input.phase === "adversary") {
1735
+ if (remediation === "pass" || evalSt === "pass") {
1736
+ if (input.adversaryComplete) return "/harness-policy-status";
1737
+ return "/harness-review";
1285
1738
  }
1286
- if (exec === "completed") {
1287
- return "/harness-eval";
1739
+ if (remediation === "rollback") {
1740
+ return "/harness-incident";
1288
1741
  }
1289
- }
1290
- if (input.phase === "evaluate") {
1291
- const ev = (input.evalStatus ?? "").toLowerCase();
1292
- if (ev === "fail") return "/harness-plan or /harness-incident";
1742
+ if (remediation === "plan_gap") {
1743
+ return "/harness-plan (mode: revise)";
1744
+ }
1745
+ if (
1746
+ remediation === "implementation_gap" ||
1747
+ (remediation === "inconclusive" && evalSt === "fail")
1748
+ ) {
1749
+ if (steerAttempt < steerMax) {
1750
+ return "/harness-steer";
1751
+ }
1752
+ return "/harness-plan (mode: revise) or /harness-abort";
1753
+ }
1754
+ if (evalSt === "fail") {
1755
+ if (steerAttempt < steerMax) return "/harness-steer";
1756
+ return "/harness-plan (mode: revise) or /harness-incident";
1757
+ }
1758
+ if (input.adversaryComplete) return "/harness-policy-status";
1293
1759
  return "/harness-review";
1294
1760
  }
1295
- if (input.phase === "adversary") return "/harness-policy-status";
1761
+
1296
1762
  if (input.phase === "merge") return "/harness-policy-status";
1297
1763
  return "/harness-run-status";
1298
1764
  }
1299
1765
 
1766
+ /** Read executor handoff artifact written by harness/running/executor submit pipeline. */
1767
+ export async function readExecutorHandoffFromRun(
1768
+ runId: string,
1769
+ projectRoot: string,
1770
+ ): Promise<{
1771
+ execution_status?: string;
1772
+ next_command?: string;
1773
+ } | null> {
1774
+ try {
1775
+ const path = join(
1776
+ harnessRunsRoot(projectRoot),
1777
+ runId,
1778
+ "handoff",
1779
+ "executor-summary.yaml",
1780
+ );
1781
+ const doc = (await readYamlFile(path, "executor-handoff")) as {
1782
+ execution_status?: string;
1783
+ next_command?: string;
1784
+ };
1785
+ return doc;
1786
+ } catch {
1787
+ return null;
1788
+ }
1789
+ }
1790
+
1300
1791
  export function extractCompletionStatuses(entries: unknown[]): {
1301
1792
  planStatus: string | null;
1302
1793
  executionStatus: string | null;
@@ -1398,3 +1889,71 @@ export function extractPlanApprovalsFromEntries(
1398
1889
  }
1399
1890
  return out;
1400
1891
  }
1892
+
1893
+ /** True inside `pi --mode json` harness subagent subprocesses. */
1894
+ export function isHarnessSubprocess(): boolean {
1895
+ return process.env.PI_HARNESS_SUBPROCESS === "1";
1896
+ }
1897
+
1898
+ export function harnessSubprocessRunId(): string | null {
1899
+ const runId = process.env.HARNESS_RUN_ID?.trim();
1900
+ return runId || null;
1901
+ }
1902
+
1903
+ /** Load approved run context for a harness subagent subprocess (env + disk). */
1904
+ export async function loadRunContextForSubprocess(
1905
+ projectRoot: string,
1906
+ ): Promise<HarnessRunContext | null> {
1907
+ if (!isHarnessSubprocess()) return null;
1908
+ const runId = harnessSubprocessRunId();
1909
+ if (!runId) return null;
1910
+
1911
+ const disk = await loadRunContextFromDisk(runId, projectRoot);
1912
+ if (disk) return disk;
1913
+
1914
+ const pointer = await loadProjectActiveRun(projectRoot);
1915
+ if (!pointer || pointer.run_id !== runId) return null;
1916
+
1917
+ return {
1918
+ schema_version: "1.0.0",
1919
+ run_id: pointer.run_id,
1920
+ pi_session_id: "",
1921
+ project_root: projectRoot,
1922
+ phase: pointer.phase,
1923
+ plan_id: pointer.plan_id,
1924
+ plan_packet_path: canonicalPlanPath(pointer.run_id, projectRoot),
1925
+ plan_ready: pointer.plan_ready,
1926
+ task_summary: null,
1927
+ status: "active",
1928
+ last_completed_step: null,
1929
+ last_outcome: null,
1930
+ next_recommended_command: null,
1931
+ owner_pi_session_id: pointer.owner_pi_session_id,
1932
+ updated_at: pointer.updated_at,
1933
+ };
1934
+ }
1935
+
1936
+ export interface HarnessPolicyBootstrap {
1937
+ phase: HarnessPhase;
1938
+ approvedPlan: boolean;
1939
+ planId: string | null;
1940
+ }
1941
+
1942
+ /** Map disk run context + subprocess agent id to policy-gate phase flags. */
1943
+ export function policyBootstrapFromRunContext(
1944
+ runCtx: HarnessRunContext,
1945
+ ): HarnessPolicyBootstrap {
1946
+ const agentId = process.env.HARNESS_AGENT_ID?.trim() ?? "";
1947
+ let phase = runCtx.phase;
1948
+ if (agentId.includes("executor")) phase = "execute";
1949
+ else if (agentId.includes("evaluator")) phase = "evaluate";
1950
+ else if (agentId.includes("adversary") || agentId.includes("tie-breaker")) {
1951
+ phase = "adversary";
1952
+ } else if (agentId.startsWith("harness/planning/")) phase = "plan";
1953
+
1954
+ return {
1955
+ phase,
1956
+ approvedPlan: runCtx.plan_ready,
1957
+ planId: runCtx.plan_id,
1958
+ };
1959
+ }