ultimate-pi 0.22.0 → 0.22.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/.agents/skills/harness-context/SKILL.md +3 -3
  2. package/.agents/skills/harness-debate-plan/SKILL.md +2 -2
  3. package/.agents/skills/harness-decisions/SKILL.md +2 -2
  4. package/.agents/skills/harness-eval/SKILL.md +1 -1
  5. package/.agents/skills/harness-git-commit/SKILL.md +1 -1
  6. package/.agents/skills/harness-governor/SKILL.md +5 -5
  7. package/.agents/skills/harness-ls-lint-setup/SKILL.md +2 -2
  8. package/.agents/skills/harness-orchestration/SKILL.md +4 -4
  9. package/.agents/skills/harness-plan/SKILL.md +2 -2
  10. package/.agents/skills/harness-review/SKILL.md +2 -2
  11. package/.agents/skills/harness-sentrux-repair/SKILL.md +1 -1
  12. package/.agents/skills/harness-sentrux-setup/SKILL.md +2 -2
  13. package/.agents/skills/harness-spec/SKILL.md +1 -1
  14. package/.agents/skills/harness-steer/SKILL.md +2 -2
  15. package/.agents/skills/posthog-analyst/SKILL.md +1 -1
  16. package/.agents/skills/sentrux/SKILL.md +4 -4
  17. package/.agents/skills/web-retrieval/SKILL.md +1 -1
  18. package/.pi/agents/harness/ls-lint-steward.md +3 -3
  19. package/.pi/agents/harness/planning/decompose.md +1 -1
  20. package/.pi/agents/harness/planning/execution-plan-author.md +1 -1
  21. package/.pi/agents/harness/planning/hypothesis-validator.md +1 -1
  22. package/.pi/agents/harness/planning/hypothesis.md +1 -1
  23. package/.pi/agents/harness/planning/plan-adversary.md +1 -1
  24. package/.pi/agents/harness/planning/plan-evaluator.md +2 -2
  25. package/.pi/agents/harness/planning/plan-synthesizer.md +2 -2
  26. package/.pi/agents/harness/planning/review-integrator.md +1 -1
  27. package/.pi/agents/harness/planning/sprint-contract-auditor.md +5 -5
  28. package/.pi/agents/harness/running/executor.md +1 -1
  29. package/.pi/agents/harness/sentrux-repair-advisor.md +1 -1
  30. package/.pi/agents/harness/sentrux-steward.md +2 -2
  31. package/.pi/extensions/agt-kill-switch.ts +7 -1
  32. package/.pi/extensions/harness-plan-approval.ts +9 -1
  33. package/.pi/extensions/harness-run-context.ts +529 -84
  34. package/.pi/extensions/policy-gate.ts +15 -2
  35. package/.pi/harness/agents.manifest.json +16 -16
  36. package/.pi/harness/agents.policy.yaml +82 -3
  37. package/.pi/harness/specs/plan-task-clarification.schema.json +10 -1
  38. package/.pi/lib/agents-policy.mjs +42 -1
  39. package/.pi/lib/agt/build-evaluation-context.ts +3 -1
  40. package/.pi/lib/agt/kill-switch-state.ts +14 -0
  41. package/.pi/lib/agt/legacy-evaluate.ts +3 -1
  42. package/.pi/lib/ask-user/index.ts +2 -0
  43. package/.pi/lib/ask-user/merge-task-clarification.ts +5 -0
  44. package/.pi/lib/ask-user/policy.ts +23 -0
  45. package/.pi/lib/ask-user/presenters/glimpse.ts +8 -1
  46. package/.pi/lib/ask-user/presenters/headless.ts +15 -0
  47. package/.pi/lib/ask-user/presenters/select.ts +11 -2
  48. package/.pi/lib/ask-user/validate-core.mjs +16 -0
  49. package/.pi/lib/harness-artifact-gate.ts +75 -5
  50. package/.pi/lib/harness-repair-brief.ts +30 -4
  51. package/.pi/lib/harness-run-context.ts +804 -17
  52. package/.pi/lib/harness-schema-validate.ts +147 -38
  53. package/.pi/lib/harness-spawn-policy.ts +9 -0
  54. package/.pi/lib/harness-spawn-topology.ts +109 -7
  55. package/.pi/lib/harness-subagent-precheck.ts +21 -0
  56. package/.pi/lib/harness-subagent-submit-pipeline.ts +95 -21
  57. package/.pi/lib/harness-subagent-submit-register.ts +6 -1
  58. package/.pi/lib/harness-subagents-bridge.ts +3 -0
  59. package/.pi/lib/harness-yaml.ts +11 -3
  60. package/.pi/lib/plan-approval/create-plan.ts +2 -6
  61. package/.pi/lib/plan-debate-gate.ts +87 -0
  62. package/.pi/lib/plan-debate-lane.ts +8 -2
  63. package/.pi/lib/plan-human-gates.ts +322 -0
  64. package/.pi/prompts/harness-clear.md +25 -0
  65. package/.pi/prompts/harness-plan.md +11 -7
  66. package/.pi/prompts/harness-review.md +5 -5
  67. package/.pi/prompts/harness-run.md +2 -2
  68. package/.pi/prompts/harness-sentrux-steward.md +2 -2
  69. package/.pi/prompts/harness-setup.md +3 -3
  70. package/.pi/prompts/harness-steer.md +5 -5
  71. package/.pi/scripts/generate-agents-policy-yaml.mjs +73 -7
  72. package/.pi/scripts/harness-reconcile-run-context.mjs +62 -0
  73. package/.pi/scripts/harness-schema-compile-verify.mjs +29 -0
  74. package/.pi/scripts/harness-verify.mjs +100 -0
  75. package/AGENTS.md +1 -0
  76. package/CHANGELOG.md +13 -0
  77. package/README.md +4 -0
  78. package/package.json +9 -6
@@ -6,7 +6,14 @@
6
6
  * - `.pi/harness/active-run.json` (cross-session pointer)
7
7
  */
8
8
 
9
- import { mkdir, readFile, realpath, writeFile } from "node:fs/promises";
9
+ import {
10
+ mkdir,
11
+ readdir,
12
+ readFile,
13
+ realpath,
14
+ stat,
15
+ writeFile,
16
+ } from "node:fs/promises";
10
17
  import { isAbsolute, join, relative, resolve } from "node:path";
11
18
  import {
12
19
  isPlanApprovalAskUser,
@@ -14,6 +21,7 @@ import {
14
21
  PLAN_CANCEL_OPTION,
15
22
  } from "./ask-user/policy.js";
16
23
  import { readYamlFile, writeYamlFile } from "./harness-yaml.js";
24
+ import { readTaskClarificationDoc } from "./plan-task-clarification.js";
17
25
 
18
26
  export { isPlanApprovalAskUser } from "./ask-user/policy.js";
19
27
 
@@ -82,6 +90,19 @@ export interface PlanPacketLike {
82
90
  execution_plan?: unknown;
83
91
  }
84
92
 
93
+ export interface HarnessClearManifestItem {
94
+ run_id: string;
95
+ absolute_path: string;
96
+ canonical_path: string;
97
+ }
98
+
99
+ export interface HarnessClearManifest {
100
+ runs_root: string;
101
+ protected_run_ids: string[];
102
+ candidates: ReadonlyArray<HarnessClearManifestItem>;
103
+ skipped: ReadonlyArray<{ run_id: string; reason: string }>;
104
+ }
105
+
85
106
  interface SessionEntryLike {
86
107
  type?: string;
87
108
  customType?: string;
@@ -109,12 +130,107 @@ const HARNESS_COMMANDS = new Set([
109
130
  "harness-policy-status",
110
131
  "harness-trace-last",
111
132
  "harness-budget-status",
133
+ "harness-clear",
112
134
  ]);
113
135
 
114
136
  export function harnessRunsRoot(projectRoot: string): string {
115
137
  return join(projectRoot, ".pi", "harness", "runs");
116
138
  }
117
139
 
140
+ export async function buildHarnessClearManifest(
141
+ projectRoot: string,
142
+ protectedRunIds: Iterable<string> = [],
143
+ ): Promise<HarnessClearManifest> {
144
+ const runsRoot = resolve(harnessRunsRoot(projectRoot));
145
+ const protectedSet = new Set(
146
+ [...protectedRunIds]
147
+ .filter(
148
+ (id): id is string => typeof id === "string" && id.trim().length > 0,
149
+ )
150
+ .map((id) => id.trim()),
151
+ );
152
+ const protectedIds = [...protectedSet].sort();
153
+ let runsReal = runsRoot;
154
+ try {
155
+ runsReal = await realpath(runsRoot);
156
+ } catch {
157
+ return {
158
+ runs_root: runsRoot,
159
+ protected_run_ids: protectedIds,
160
+ candidates: Object.freeze([]),
161
+ skipped: Object.freeze([]),
162
+ };
163
+ }
164
+ let entries: Array<{
165
+ name: string;
166
+ isDirectory(): boolean;
167
+ isSymbolicLink(): boolean;
168
+ }>;
169
+ try {
170
+ entries = await readdir(runsRoot, {
171
+ withFileTypes: true,
172
+ encoding: "utf8",
173
+ });
174
+ } catch {
175
+ return {
176
+ runs_root: runsReal,
177
+ protected_run_ids: protectedIds,
178
+ candidates: Object.freeze([]),
179
+ skipped: Object.freeze([]),
180
+ };
181
+ }
182
+ const candidates: HarnessClearManifestItem[] = [];
183
+ const skipped: Array<{ run_id: string; reason: string }> = [];
184
+ for (const entry of entries) {
185
+ if (!entry.isDirectory() && !entry.isSymbolicLink()) continue;
186
+ const runId = entry.name;
187
+ if (protectedSet.has(runId)) {
188
+ skipped.push({ run_id: runId, reason: "protected" });
189
+ continue;
190
+ }
191
+ const absPath = join(runsRoot, runId);
192
+ let canonicalPath: string;
193
+ try {
194
+ canonicalPath = await realpath(absPath);
195
+ } catch {
196
+ skipped.push({ run_id: runId, reason: "unresolvable" });
197
+ continue;
198
+ }
199
+ const rel = relative(runsReal, canonicalPath);
200
+ if (!rel || rel.startsWith("..") || isAbsolute(rel)) {
201
+ skipped.push({ run_id: runId, reason: "out_of_root" });
202
+ continue;
203
+ }
204
+ if (rel !== runId) {
205
+ skipped.push({ run_id: runId, reason: "non_canonical_child" });
206
+ continue;
207
+ }
208
+ try {
209
+ const info = await stat(canonicalPath);
210
+ if (!info.isDirectory()) {
211
+ skipped.push({ run_id: runId, reason: "not_directory" });
212
+ continue;
213
+ }
214
+ } catch {
215
+ skipped.push({ run_id: runId, reason: "missing" });
216
+ continue;
217
+ }
218
+ candidates.push({
219
+ run_id: runId,
220
+ absolute_path: absPath,
221
+ canonical_path: canonicalPath,
222
+ });
223
+ }
224
+ candidates.sort((a, b) => a.run_id.localeCompare(b.run_id));
225
+ skipped.sort((a, b) => a.run_id.localeCompare(b.run_id));
226
+ return {
227
+ runs_root: runsReal,
228
+ protected_run_ids: protectedIds,
229
+ candidates: Object.freeze(candidates.map((item) => Object.freeze(item))),
230
+ skipped: Object.freeze(skipped.map((item) => Object.freeze(item))),
231
+ };
232
+ }
233
+
118
234
  export function activeRunPointerPath(projectRoot: string): string {
119
235
  return join(projectRoot, ".pi", "harness", "active-run.json");
120
236
  }
@@ -158,15 +274,29 @@ const PLAN_RUN_SCOPED_ROOT_FILES = new Set([
158
274
  PLAN_REVIEW_BASENAME,
159
275
  ]);
160
276
 
161
- /** Parent orchestrator artifacts writable during evaluate/adversary (ADR 0044). */
277
+ /**
278
+ * Parent orchestrator artifacts writable during evaluate/adversary (ADR 0044).
279
+ * Keep in sync with harness-review.md / harness-steer.md parent write_harness_yaml paths.
280
+ */
162
281
  export const EVALUATE_PHASE_ORCHESTRATOR_ARTIFACTS = new Set([
163
282
  "benchmark-log.yaml",
164
283
  "review-outcome.yaml",
165
284
  "repair-brief.yaml",
166
285
  "steer-state.yaml",
167
286
  "eval-benchmark.yaml",
287
+ "sentrux-signal.yaml",
288
+ "ls-lint-signal.yaml",
289
+ "sentrux-repair-plan.yaml",
168
290
  ]);
169
291
 
292
+ /** Run-relative path like `artifacts/benchmark-log.yaml` (no run_id prefix). */
293
+ export function isEvaluatePhaseOrchestratorArtifactRel(rel: string): boolean {
294
+ const norm = rel.replace(/\\/g, "/");
295
+ const parts = norm.split("/");
296
+ if (parts.length !== 2 || parts[0] !== "artifacts") return false;
297
+ return EVALUATE_PHASE_ORCHESTRATOR_ARTIFACTS.has(parts[1]);
298
+ }
299
+
170
300
  export const DEFAULT_STEER_MAX_ATTEMPTS = 3;
171
301
 
172
302
  export function steerMaxAttemptsFromEnv(): number {
@@ -215,6 +345,7 @@ export const HARNESS_COMMAND_PHASE: Record<string, HarnessPhase> = {
215
345
  "harness-use-run": "plan",
216
346
  "harness-policy-status": "merge",
217
347
  "harness-budget-status": "plan",
348
+ "harness-clear": "plan",
218
349
  "harness-setup": "execute",
219
350
  };
220
351
 
@@ -235,6 +366,66 @@ export function normalizeHarnessPath(
235
366
  return resolve(projectRoot, trimmed);
236
367
  }
237
368
 
369
+ /** Run-scoped artifact path without `.pi/harness/runs/<run_id>/` prefix (agent-friendly). */
370
+ export function isBareHarnessRunArtifactPath(rel: string): boolean {
371
+ const norm = rel.replace(/\\/g, "/").replace(/^\.\//, "");
372
+ if (!norm || norm.startsWith("..") || isAbsolute(norm)) return false;
373
+ if (norm.startsWith(".pi/harness/runs/")) return false;
374
+ const parts = norm.split("/");
375
+ if (parts.length === 1 && PLAN_RUN_SCOPED_ROOT_FILES.has(parts[0])) {
376
+ return true;
377
+ }
378
+ if (parts.length === 2 && parts[0] === "artifacts") {
379
+ const file = parts[1];
380
+ return file.endsWith(".yaml") || file.endsWith(".yml");
381
+ }
382
+ if (
383
+ parts.length === 3 &&
384
+ parts[0] === "artifacts" &&
385
+ parts[1] === "context-bundles"
386
+ ) {
387
+ const file = parts[2];
388
+ return file.endsWith(".yaml") || file.endsWith(".yml");
389
+ }
390
+ return false;
391
+ }
392
+
393
+ /**
394
+ * Resolve a harness write path to an absolute file and run-relative gate path.
395
+ * Accepts `artifacts/foo.yaml`, `research-brief.yaml`, full `.pi/harness/runs/<id>/…`, or `<id>/artifacts/…`.
396
+ */
397
+ export function resolveHarnessRunWriteTarget(
398
+ pathArg: string,
399
+ runCtx: HarnessRunContext,
400
+ projectRoot: string,
401
+ ): { absPath: string; relUnderRun: string } | null {
402
+ const trimmed = pathArg.trim().replace(/\\/g, "/");
403
+ if (!trimmed || !runCtx.run_id) return null;
404
+
405
+ const runPrefix = `.pi/harness/runs/${runCtx.run_id}/`;
406
+ let relUnderRun: string | null = null;
407
+
408
+ if (trimmed.startsWith(runPrefix)) {
409
+ relUnderRun = trimmed.slice(runPrefix.length);
410
+ } else if (trimmed.startsWith(`${runCtx.run_id}/`)) {
411
+ relUnderRun = trimmed.slice(`${runCtx.run_id}/`.length);
412
+ } else if (isBareHarnessRunArtifactPath(trimmed)) {
413
+ relUnderRun = trimmed.replace(/^\.\//, "");
414
+ }
415
+
416
+ if (!relUnderRun) return null;
417
+
418
+ const scopedCheck = `${runCtx.run_id}/${relUnderRun}`;
419
+ if (!isPlanRunScopedRelativePath(scopedCheck)) return null;
420
+
421
+ const absPath = join(
422
+ harnessRunsRoot(projectRoot),
423
+ runCtx.run_id,
424
+ relUnderRun,
425
+ );
426
+ return { absPath, relUnderRun };
427
+ }
428
+
238
429
  export function isCanonicalPlanPacketPath(
239
430
  absPath: string,
240
431
  projectRoot: string,
@@ -276,6 +467,7 @@ export function isPlanRunScopedRelativePath(rel: string): boolean {
276
467
  return false;
277
468
  }
278
469
 
470
+ /** Scoped path under `.pi/harness/runs/<run_id>/` (includes run_id prefix). */
279
471
  export function isEvaluatePhaseOrchestratorArtifact(rel: string): boolean {
280
472
  if (rel.startsWith("..") || isAbsolute(rel)) return false;
281
473
  const parts = rel.split(/[/\\]/);
@@ -283,6 +475,27 @@ export function isEvaluatePhaseOrchestratorArtifact(rel: string): boolean {
283
475
  return EVALUATE_PHASE_ORCHESTRATOR_ARTIFACTS.has(parts[2]);
284
476
  }
285
477
 
478
+ /** Strip `<run_id>/` from a path relative to `.pi/harness/runs/`. */
479
+ export function stripRunIdFromHarnessScopedRelative(
480
+ rel: string,
481
+ runId: string,
482
+ ): string {
483
+ const norm = rel.replace(/\\/g, "/");
484
+ const prefix = `${runId}/`;
485
+ return norm.startsWith(prefix) ? norm.slice(prefix.length) : norm;
486
+ }
487
+
488
+ /** Path under the run directory (e.g. `artifacts/foo.yaml`), for gates and artifact keys. */
489
+ export async function relPathUnderActiveRun(
490
+ absPath: string,
491
+ runCtx: HarnessRunContext,
492
+ projectRoot: string,
493
+ ): Promise<string | null> {
494
+ const rel = await planRunScopedRelative(absPath, runCtx, projectRoot);
495
+ if (!rel) return null;
496
+ return stripRunIdFromHarnessScopedRelative(rel, runCtx.run_id);
497
+ }
498
+
286
499
  async function planRunScopedRelative(
287
500
  absPath: string,
288
501
  runCtx: HarnessRunContext,
@@ -508,9 +721,6 @@ export function hasPlanUserApproval(
508
721
  entries: unknown[],
509
722
  opts?: { planId?: string | null; sincePlanCommand?: boolean },
510
723
  ): boolean {
511
- if (process.env.HARNESS_PLAN_NONINTERACTIVE === "1") {
512
- return true;
513
- }
514
724
  const since = opts?.sincePlanCommand
515
725
  ? Math.max(0, indexOfLastPlanCommand(entries))
516
726
  : 0;
@@ -529,6 +739,10 @@ export function isHarnessAutoSession(entries: unknown[]): boolean {
529
739
  const entry = entries[i] as SessionEntryLike & {
530
740
  message?: { role?: string; content?: string };
531
741
  };
742
+ if (entry.type === "custom" && entry.customType === "harness-turn") {
743
+ const cmd = (entry.data as { command?: string })?.command;
744
+ if (cmd === "harness-auto") return true;
745
+ }
532
746
  if (entry.type !== "message" || entry.message?.role !== "user") continue;
533
747
  const text =
534
748
  typeof entry.message.content === "string"
@@ -554,13 +768,7 @@ export async function isPlanPhaseAllowedMutation(
554
768
  },
555
769
  ): Promise<PlanPhaseMutationDecision> {
556
770
  if (!MUTATING_FILE_TOOLS.has(toolName)) {
557
- if (phase === "execute" || phase === "merge") {
558
- return { allowed: true };
559
- }
560
- return {
561
- allowed: false,
562
- reason: `policy-gate: ${toolName} blocked in phase '${phase}'.`,
563
- };
771
+ return { allowed: true };
564
772
  }
565
773
 
566
774
  if (
@@ -607,7 +815,13 @@ export async function isPlanPhaseAllowedMutation(
607
815
  }
608
816
  if (phase === "evaluate" || phase === "adversary") {
609
817
  const rel = await planRunScopedRelative(target, runCtx, projectRoot);
610
- if (rel && isEvaluatePhaseOrchestratorArtifact(rel)) {
818
+ const relForGate = rel
819
+ ? stripRunIdFromHarnessScopedRelative(rel, runCtx.run_id)
820
+ : null;
821
+ if (
822
+ (rel && isEvaluatePhaseOrchestratorArtifact(rel)) ||
823
+ (relForGate && isEvaluatePhaseOrchestratorArtifactRel(relForGate))
824
+ ) {
611
825
  return { allowed: true, isScopedPlanWrite: true };
612
826
  }
613
827
  }
@@ -995,6 +1209,137 @@ export async function readPlanPacketFromPath(
995
1209
  }
996
1210
  }
997
1211
 
1212
+ /**
1213
+ * When plan-packet.yaml is missing (revision reset or pre-packet phase), derive
1214
+ * last_outcome from task-clarification instead of treating the run as invalid.
1215
+ */
1216
+ const PLAN_REVIEW_COMMITTED_RE = /\*\*Status:\*\*\s*committed/i;
1217
+
1218
+ /** True when plan-review.md on disk shows a committed plan (post create_plan). */
1219
+ export async function isPlanCommittedOnDisk(
1220
+ projectRoot: string,
1221
+ runId: string,
1222
+ ): Promise<boolean> {
1223
+ try {
1224
+ const raw = await readFile(
1225
+ canonicalPlanReviewPath(runId, projectRoot),
1226
+ "utf-8",
1227
+ );
1228
+ return PLAN_REVIEW_COMMITTED_RE.test(raw);
1229
+ } catch {
1230
+ return false;
1231
+ }
1232
+ }
1233
+
1234
+ /**
1235
+ * Align plan_ready / last_outcome with on-disk plan packet + plan-review.md
1236
+ * (survives -p sessions where approve_plan is not in the transcript).
1237
+ */
1238
+ export async function syncPlanReadyFromDisk(
1239
+ projectRoot: string,
1240
+ ctx: HarnessRunContext,
1241
+ entries?: unknown[],
1242
+ ): Promise<HarnessRunContext> {
1243
+ const planPath =
1244
+ ctx.plan_packet_path ?? canonicalPlanPath(ctx.run_id, projectRoot);
1245
+ const packet = await readPlanPacketFromPath(planPath);
1246
+ if (!packet) {
1247
+ return syncPlanLastOutcomeFromTaskClarification(projectRoot, ctx);
1248
+ }
1249
+ const validation = validatePlanPacket(packet);
1250
+ if (!validation.valid) {
1251
+ const synced = await syncPlanLastOutcomeFromTaskClarification(projectRoot, {
1252
+ ...ctx,
1253
+ plan_packet_path: planPath,
1254
+ });
1255
+ return {
1256
+ ...synced,
1257
+ plan_ready: false,
1258
+ last_outcome: "needs_clarification",
1259
+ };
1260
+ }
1261
+
1262
+ const committed = await isPlanCommittedOnDisk(projectRoot, ctx.run_id);
1263
+ const approved =
1264
+ committed ||
1265
+ (entries
1266
+ ? hasPlanUserApproval(entries, {
1267
+ sincePlanCommand: true,
1268
+ planId: packet.plan_id ?? null,
1269
+ })
1270
+ : false);
1271
+
1272
+ const updated: HarnessRunContext = {
1273
+ ...ctx,
1274
+ plan_packet_path: planPath,
1275
+ plan_id: packet.plan_id ?? ctx.plan_id,
1276
+ updated_at: nowIso(),
1277
+ };
1278
+
1279
+ if (approved) {
1280
+ updated.plan_ready = true;
1281
+ const preservePostPlanProgress =
1282
+ ctx.last_completed_step === "execute" ||
1283
+ ctx.last_completed_step === "steer" ||
1284
+ ctx.last_completed_step === "review" ||
1285
+ ctx.last_completed_step === "adversary";
1286
+ if (!preservePostPlanProgress) {
1287
+ updated.last_completed_step = "plan";
1288
+ updated.last_outcome = "ready";
1289
+ updated.next_recommended_command = "/harness-run";
1290
+ if (
1291
+ updated.phase !== "execute" &&
1292
+ updated.phase !== "evaluate" &&
1293
+ updated.phase !== "adversary"
1294
+ ) {
1295
+ updated.phase = "plan";
1296
+ }
1297
+ }
1298
+ return updated;
1299
+ }
1300
+
1301
+ updated.plan_ready = false;
1302
+ if (updated.last_outcome !== "needs_clarification") {
1303
+ updated.last_outcome = "pending_approval";
1304
+ }
1305
+ updated.next_recommended_command = nextStepAfterOutcome({
1306
+ phase: updated.phase,
1307
+ planStatus: null,
1308
+ lastOutcome: updated.last_outcome,
1309
+ lastCompletedStep: updated.last_completed_step,
1310
+ });
1311
+ return updated;
1312
+ }
1313
+
1314
+ export async function syncPlanLastOutcomeFromTaskClarification(
1315
+ projectRoot: string,
1316
+ ctx: HarnessRunContext,
1317
+ ): Promise<HarnessRunContext> {
1318
+ const runDir = join(harnessRunsRoot(projectRoot), ctx.run_id);
1319
+ const doc = await readTaskClarificationDoc(runDir);
1320
+ if (!doc) return ctx;
1321
+ const status = String(doc.status ?? "").toLowerCase();
1322
+ const updated: HarnessRunContext = { ...ctx, updated_at: nowIso() };
1323
+ if (status === "ready") {
1324
+ if (updated.last_outcome === "needs_clarification") {
1325
+ updated.last_outcome = null;
1326
+ }
1327
+ } else if (
1328
+ status === "needs_clarification" ||
1329
+ status === "needs_user" ||
1330
+ status === "draft"
1331
+ ) {
1332
+ updated.last_outcome = "needs_clarification";
1333
+ }
1334
+ updated.next_recommended_command = nextStepAfterOutcome({
1335
+ phase: updated.phase,
1336
+ planStatus: status === "ready" ? null : status,
1337
+ lastOutcome: updated.last_outcome,
1338
+ lastCompletedStep: updated.last_completed_step,
1339
+ });
1340
+ return updated;
1341
+ }
1342
+
998
1343
  export function validatePlanPacket(packet: PlanPacketLike | null): {
999
1344
  valid: boolean;
1000
1345
  errors: string[];
@@ -1361,7 +1706,8 @@ export function resolveArgsForCommand(
1361
1706
  ctx: HarnessRunContext | null,
1362
1707
  ): { runId: string | null; planPath: string | null; overrideRun: boolean } {
1363
1708
  let runId = ctx?.run_id ?? null;
1364
- let planPath = ctx?.plan_packet_path ?? null;
1709
+ /** Only honor explicit `--plan`; never inherit stale session plan paths onto fresh runs. */
1710
+ let planPath: string | null = null;
1365
1711
  let overrideRun = false;
1366
1712
 
1367
1713
  const explicitRun = parseArgFlag(args, "--run");
@@ -1417,6 +1763,45 @@ export function getRunIdFromSession(
1417
1763
  return null;
1418
1764
  }
1419
1765
 
1766
+ export function harnessAutoTasksDiffer(
1767
+ ctx: HarnessRunContext,
1768
+ newTask: string,
1769
+ ): boolean {
1770
+ const prior = (ctx.task_summary ?? "").trim().toLowerCase();
1771
+ const next = newTask.trim().toLowerCase();
1772
+ return prior.length > 0 && next.length > 0 && prior !== next;
1773
+ }
1774
+
1775
+ /** Full auto pipeline needs a clean run once execute/review has started. */
1776
+ export function shouldReuseHarnessRunIdForAuto(
1777
+ ctx: HarnessRunContext,
1778
+ ): boolean {
1779
+ if (ctx.status === "aborted") return true;
1780
+ const step = ctx.last_completed_step;
1781
+ if (!step || step === "plan") return true;
1782
+ return false;
1783
+ }
1784
+
1785
+ /** Reset in-run state when restarting /harness-auto on the same run directory. */
1786
+ export function resetRunContextForHarnessAuto(
1787
+ ctx: HarnessRunContext,
1788
+ ): HarnessRunContext {
1789
+ return {
1790
+ ...ctx,
1791
+ phase: "plan",
1792
+ plan_ready: false,
1793
+ plan_id: null,
1794
+ plan_packet_path: canonicalPlanPath(ctx.run_id, ctx.project_root),
1795
+ status: "active",
1796
+ last_completed_step: null,
1797
+ last_outcome: null,
1798
+ next_recommended_command: null,
1799
+ steer_attempt: 0,
1800
+ steer_approved: false,
1801
+ updated_at: nowIso(),
1802
+ };
1803
+ }
1804
+
1420
1805
  export function shouldReuseHarnessRunId(
1421
1806
  prompt: string,
1422
1807
  ctx: HarnessRunContext | null,
@@ -1425,7 +1810,13 @@ export function shouldReuseHarnessRunId(
1425
1810
  if (!command) return false;
1426
1811
  if (command === "harness-new-run") return false;
1427
1812
  if (!ctx) return false;
1428
- if (command === "harness-plan" || command === "harness-auto") {
1813
+ if (command === "harness-auto") {
1814
+ return (
1815
+ (ctx.status === "active" || ctx.status === "aborted") &&
1816
+ shouldReuseHarnessRunIdForAuto(ctx)
1817
+ );
1818
+ }
1819
+ if (command === "harness-plan") {
1429
1820
  return ctx.status === "active" || ctx.status === "aborted";
1430
1821
  }
1431
1822
  if (ctx.status === "active") return true;
@@ -1647,6 +2038,179 @@ export async function readReviewOutcomeFromRun(
1647
2038
  }
1648
2039
  }
1649
2040
 
2041
+ /** Infer remediation when parent skipped Phase 6 but eval-verdict exists on disk. */
2042
+ export function remediationClassFromEvalVerdict(
2043
+ verdict: EvalVerdictDisk | null,
2044
+ ): RemediationClass | null {
2045
+ if (!verdict) return null;
2046
+ const status = (verdict.status ?? "").toLowerCase();
2047
+ if (status === "pass") return "pass";
2048
+ const action = (verdict.recommended_action ?? "").toLowerCase();
2049
+ if (
2050
+ action === "replan" ||
2051
+ action.includes("revise") ||
2052
+ action.includes("plan")
2053
+ ) {
2054
+ return "plan_gap";
2055
+ }
2056
+ if (action === "rollback" || action.includes("rollback")) {
2057
+ return "rollback";
2058
+ }
2059
+ if (
2060
+ action === "steer" ||
2061
+ action === "repair" ||
2062
+ action.includes("implement")
2063
+ ) {
2064
+ return "implementation_gap";
2065
+ }
2066
+ const failed = (verdict as EvalVerdictDisk & { failed_checks?: string[] })
2067
+ .failed_checks;
2068
+ const joined = Array.isArray(failed) ? failed.join(" ").toLowerCase() : "";
2069
+ if (
2070
+ joined.includes("scope_minimization") ||
2071
+ joined.includes("scope_drift") ||
2072
+ joined.includes("replan")
2073
+ ) {
2074
+ return "plan_gap";
2075
+ }
2076
+ if (status === "fail") return "inconclusive";
2077
+ return null;
2078
+ }
2079
+
2080
+ export function recommendedNextForRemediation(
2081
+ remediation: RemediationClass,
2082
+ ): string {
2083
+ switch (remediation) {
2084
+ case "pass":
2085
+ return "/harness-policy-status";
2086
+ case "implementation_gap":
2087
+ return "/harness-steer";
2088
+ case "plan_gap":
2089
+ return "/harness-plan (mode: revise)";
2090
+ case "rollback":
2091
+ return "/harness-incident";
2092
+ default:
2093
+ return "/harness-review";
2094
+ }
2095
+ }
2096
+
2097
+ export async function resolveRemediationClassForRun(
2098
+ runId: string,
2099
+ projectRoot: string,
2100
+ ): Promise<RemediationClass | null> {
2101
+ const review = await readReviewOutcomeFromRun(runId, projectRoot);
2102
+ if (review?.remediation_class) {
2103
+ return review.remediation_class as RemediationClass;
2104
+ }
2105
+ const evalV = await readEvalVerdictFromRun(runId, projectRoot);
2106
+ return remediationClassFromEvalVerdict(evalV);
2107
+ }
2108
+
2109
+ export async function ensureReviewOutcomeFromEval(
2110
+ runId: string,
2111
+ projectRoot: string,
2112
+ ): Promise<ReviewOutcomeLike | null> {
2113
+ const existing = await readReviewOutcomeFromRun(runId, projectRoot);
2114
+ if (existing?.remediation_class) return existing;
2115
+
2116
+ const evalV = await readEvalVerdictFromRun(runId, projectRoot);
2117
+ if (!evalV?.status) return null;
2118
+
2119
+ const remediation = remediationClassFromEvalVerdict(evalV) ?? "inconclusive";
2120
+ const evalStatus = (evalV.status ?? "").toLowerCase();
2121
+ const status =
2122
+ evalStatus === "pass"
2123
+ ? "pass"
2124
+ : evalStatus === "fail"
2125
+ ? "fail"
2126
+ : "inconclusive";
2127
+
2128
+ const outcome: ReviewOutcomeLike & {
2129
+ run_id: string;
2130
+ recommended_next: string;
2131
+ source_artifacts: Record<string, string>;
2132
+ review_tier: string;
2133
+ } = {
2134
+ schema_version: "1.0.0",
2135
+ run_id: runId,
2136
+ status,
2137
+ remediation_class: remediation,
2138
+ recommended_next: recommendedNextForRemediation(remediation),
2139
+ source_artifacts: { "eval-verdict": "artifacts/eval-verdict.yaml" },
2140
+ review_tier: "synthesized",
2141
+ };
2142
+
2143
+ const outPath = join(
2144
+ harnessRunsRoot(projectRoot),
2145
+ runId,
2146
+ "artifacts",
2147
+ "review-outcome.yaml",
2148
+ );
2149
+ await writeYamlFile(outPath, outcome);
2150
+
2151
+ const { ensureRepairBriefOnDisk } = await import("./harness-repair-brief.js");
2152
+ await ensureRepairBriefOnDisk({
2153
+ runId,
2154
+ projectRoot,
2155
+ steerAttempt: 0,
2156
+ });
2157
+
2158
+ return outcome;
2159
+ }
2160
+
2161
+ /** Align next_recommended_command with on-disk review/eval routing after /harness-review. */
2162
+ export async function reconcileReviewRouting(
2163
+ projectRoot: string,
2164
+ ctx: HarnessRunContext,
2165
+ ): Promise<HarnessRunContext> {
2166
+ const evalV = await readEvalVerdictFromRun(ctx.run_id, projectRoot);
2167
+ const reviewStep =
2168
+ ctx.last_completed_step === "review" ||
2169
+ ctx.last_completed_step === "adversary" ||
2170
+ Boolean(evalV?.status);
2171
+ if (!reviewStep) return ctx;
2172
+
2173
+ let working = { ...ctx };
2174
+ if (
2175
+ evalV?.status &&
2176
+ working.last_completed_step === "execute" &&
2177
+ String(working.last_outcome ?? "").toLowerCase() === "completed"
2178
+ ) {
2179
+ working = {
2180
+ ...working,
2181
+ last_completed_step: "review",
2182
+ last_outcome: evalV.status,
2183
+ phase: "evaluate",
2184
+ };
2185
+ }
2186
+
2187
+ await ensureReviewOutcomeFromEval(working.run_id, projectRoot);
2188
+
2189
+ const remediation = await resolveRemediationClassForRun(
2190
+ working.run_id,
2191
+ projectRoot,
2192
+ );
2193
+ if (!remediation) return working;
2194
+
2195
+ const next = nextStepAfterOutcome({
2196
+ phase: working.phase,
2197
+ lastCompletedStep: working.last_completed_step,
2198
+ lastOutcome: working.last_outcome,
2199
+ evalStatus: working.last_outcome,
2200
+ remediationClass: remediation,
2201
+ steerAttempt: working.steer_attempt ?? 0,
2202
+ steerMaxAttempts: working.steer_max_attempts ?? steerMaxAttemptsFromEnv(),
2203
+ reviewComplete: true,
2204
+ aborted: working.status === "aborted",
2205
+ });
2206
+
2207
+ return {
2208
+ ...working,
2209
+ next_recommended_command: next,
2210
+ updated_at: nowIso(),
2211
+ };
2212
+ }
2213
+
1650
2214
  function nextStepForEvaluateLikePhase(input: {
1651
2215
  adversaryComplete?: boolean;
1652
2216
  remediation: string;
@@ -1668,7 +2232,16 @@ function nextStepForEvaluateLikePhase(input: {
1668
2232
  return "/harness-plan (mode: revise) or /harness-abort";
1669
2233
  }
1670
2234
  if (input.evalStatus === "fail") {
1671
- if (input.steerAttempt < input.steerMax) return "/harness-steer";
2235
+ if (input.remediation === "plan_gap") {
2236
+ return "/harness-plan (mode: revise)";
2237
+ }
2238
+ if (
2239
+ input.remediation === "implementation_gap" ||
2240
+ input.remediation === "inconclusive"
2241
+ ) {
2242
+ if (input.steerAttempt < input.steerMax) return "/harness-steer";
2243
+ return "/harness-plan (mode: revise) or /harness-abort";
2244
+ }
1672
2245
  return "/harness-plan (mode: revise) or /harness-incident";
1673
2246
  }
1674
2247
  if (input.adversaryComplete) return "/harness-policy-status";
@@ -1698,9 +2271,13 @@ export function nextStepAfterOutcome(input: {
1698
2271
  return "Reply with answers or run /harness-plan with updates";
1699
2272
  }
1700
2273
 
2274
+ const lastOutcome = (input.lastOutcome ?? "").toLowerCase();
2275
+ if (input.phase === "plan" && lastOutcome === "pending_approval") {
2276
+ return "Continue /harness-plan: finish Review Gate (harness_debate_round_status → debate lanes → harness_debate_consensus), then approve_plan";
2277
+ }
2278
+
1701
2279
  const lastStep = (input.lastCompletedStep ?? "").toLowerCase();
1702
2280
  const exec = (input.executionStatus ?? "").toLowerCase();
1703
- const lastOutcome = (input.lastOutcome ?? "").toLowerCase();
1704
2281
  const evalSt = (input.evalStatus ?? "").toLowerCase();
1705
2282
  const remediation = (input.remediationClass ?? "").toLowerCase();
1706
2283
  const steerAttempt = input.steerAttempt ?? 0;
@@ -1752,6 +2329,216 @@ export function nextStepAfterOutcome(input: {
1752
2329
  }
1753
2330
 
1754
2331
  /** Read executor handoff artifact written by harness/running/executor submit pipeline. */
2332
+ /** After /harness-run agent turn — do not mark completed without executor evidence. */
2333
+ export function resolveHarnessRunPostAgentState(
2334
+ execStatus: string | null,
2335
+ planReady: boolean,
2336
+ ): Pick<
2337
+ HarnessRunContext,
2338
+ "last_completed_step" | "last_outcome" | "phase" | "next_recommended_command"
2339
+ > {
2340
+ if (!execStatus) {
2341
+ return {
2342
+ last_completed_step: "plan",
2343
+ last_outcome: planReady ? "ready" : null,
2344
+ phase: "plan",
2345
+ next_recommended_command: "/harness-run",
2346
+ };
2347
+ }
2348
+ const normalized = execStatus.toLowerCase();
2349
+ const completed = normalized === "completed";
2350
+ return {
2351
+ last_completed_step: "execute",
2352
+ last_outcome: execStatus,
2353
+ phase: completed ? "evaluate" : "execute",
2354
+ next_recommended_command: completed ? "/harness-review" : "/harness-run",
2355
+ };
2356
+ }
2357
+
2358
+ function executeCompletionMatchesHandoff(
2359
+ ctx: HarnessRunContext,
2360
+ executionStatus: string,
2361
+ ): boolean {
2362
+ if (ctx.last_completed_step !== "execute") return false;
2363
+ const norm = executionStatus.toLowerCase();
2364
+ const outcome = String(ctx.last_outcome ?? "").toLowerCase();
2365
+ if (norm === "completed") return outcome === "completed";
2366
+ return outcome === norm;
2367
+ }
2368
+
2369
+ /** Sync plan_ready + executor handoff vs session/disk run-context (bidirectional). */
2370
+ export async function reconcileStaleExecuteCompletion(
2371
+ projectRoot: string,
2372
+ ctx: HarnessRunContext,
2373
+ entries?: unknown[],
2374
+ ): Promise<HarnessRunContext> {
2375
+ let synced = await syncPlanReadyFromDisk(projectRoot, ctx, entries);
2376
+
2377
+ const falselyCompleted =
2378
+ synced.last_completed_step === "execute" &&
2379
+ String(synced.last_outcome ?? "").toLowerCase() === "completed";
2380
+
2381
+ const handoff = await readExecutorHandoffFromRun(synced.run_id, projectRoot);
2382
+
2383
+ if (falselyCompleted && !handoff?.execution_status) {
2384
+ return {
2385
+ ...synced,
2386
+ ...resolveHarnessRunPostAgentState(null, synced.plan_ready),
2387
+ };
2388
+ }
2389
+
2390
+ const postExecuteProgress =
2391
+ synced.last_completed_step === "review" ||
2392
+ synced.last_completed_step === "adversary" ||
2393
+ synced.last_completed_step === "steer";
2394
+
2395
+ if (
2396
+ handoff?.execution_status &&
2397
+ !postExecuteProgress &&
2398
+ !executeCompletionMatchesHandoff(synced, handoff.execution_status)
2399
+ ) {
2400
+ const runPost = resolveHarnessRunPostAgentState(
2401
+ handoff.execution_status,
2402
+ synced.plan_ready,
2403
+ );
2404
+ synced = { ...synced, ...runPost };
2405
+ }
2406
+
2407
+ return synced;
2408
+ }
2409
+
2410
+ export async function blockingHarnessAutoCommandReason(
2411
+ command: string,
2412
+ activeCtx: HarnessRunContext | null,
2413
+ args: string,
2414
+ userPrompt: string,
2415
+ ): Promise<string | null> {
2416
+ if (command !== "harness-auto") return null;
2417
+ const task = extractTaskSummaryFromHarnessInput(args, userPrompt);
2418
+ if (!task) {
2419
+ return 'Usage: /harness-auto "<task>" [--quick] [--risk low|med|high]';
2420
+ }
2421
+ if (
2422
+ activeCtx?.status === "active" &&
2423
+ activeCtx.owner_pi_session_id &&
2424
+ activeCtx.task_summary &&
2425
+ harnessAutoTasksDiffer(activeCtx, task)
2426
+ ) {
2427
+ return "Active harness run is for a different task. Run /harness-abort or /harness-new-run before /harness-auto with a new task.";
2428
+ }
2429
+ return null;
2430
+ }
2431
+
2432
+ function extractTaskSummaryFromHarnessInput(
2433
+ args: string,
2434
+ prompt?: string,
2435
+ ): string | null {
2436
+ const fromArgs = args.match(/"([^"]+)"/);
2437
+ if (fromArgs?.[1]) return fromArgs[1];
2438
+ if (args.trim() && !args.trim().startsWith("--")) {
2439
+ return args.trim().slice(0, 200);
2440
+ }
2441
+ if (prompt) {
2442
+ const quoted = prompt.match(/"([^"]+)"/);
2443
+ if (quoted?.[1]) return quoted[1];
2444
+ }
2445
+ return null;
2446
+ }
2447
+
2448
+ export async function blockingRunCommandReason(
2449
+ command: string,
2450
+ activeCtx: HarnessRunContext,
2451
+ projectRoot: string,
2452
+ entries?: unknown[],
2453
+ ): Promise<string | null> {
2454
+ if (command !== "harness-run") return null;
2455
+ if (entries && isHarnessAutoSession(entries)) return null;
2456
+ if (!activeCtx.plan_ready) return "Plan not ready. Run /harness-plan first.";
2457
+ const handoff = await readExecutorHandoffFromRun(
2458
+ activeCtx.run_id,
2459
+ projectRoot,
2460
+ );
2461
+ const executeDone =
2462
+ activeCtx.last_completed_step === "execute" &&
2463
+ String(activeCtx.last_outcome ?? "").toLowerCase() === "completed";
2464
+ if (executeDone || handoff?.execution_status?.toLowerCase() === "completed") {
2465
+ if (handoff?.execution_status === "completed" || executeDone) {
2466
+ return "Execute already completed for this run. Next: /harness-review (same session), or /harness-abort to replan.";
2467
+ }
2468
+ }
2469
+ return null;
2470
+ }
2471
+
2472
+ export async function blockingReviewCommandReason(
2473
+ command: string,
2474
+ activeCtx: HarnessRunContext,
2475
+ projectRoot: string,
2476
+ ): Promise<string | null> {
2477
+ if (!["harness-review", "harness-eval", "harness-critic"].includes(command)) {
2478
+ return null;
2479
+ }
2480
+ const handoff = await readExecutorHandoffFromRun(
2481
+ activeCtx.run_id,
2482
+ projectRoot,
2483
+ );
2484
+ const execOutcome = String(activeCtx.last_outcome ?? "").toLowerCase();
2485
+ const executeFinished =
2486
+ activeCtx.last_completed_step === "execute" &&
2487
+ (execOutcome === "completed" ||
2488
+ execOutcome === "scope_drift" ||
2489
+ execOutcome === "blocked");
2490
+ const handoffStarted = Boolean(handoff?.execution_status);
2491
+ if (!executeFinished && !handoffStarted) {
2492
+ return "Execute not finished. Run /harness-run first.";
2493
+ }
2494
+ return null;
2495
+ }
2496
+
2497
+ export async function blockingSteerCommandReason(
2498
+ command: string,
2499
+ activeCtx: HarnessRunContext,
2500
+ projectRoot: string,
2501
+ ): Promise<string | null> {
2502
+ if (command !== "harness-steer") return null;
2503
+
2504
+ await ensureReviewOutcomeFromEval(activeCtx.run_id, projectRoot);
2505
+
2506
+ const remediation = await resolveRemediationClassForRun(
2507
+ activeCtx.run_id,
2508
+ projectRoot,
2509
+ );
2510
+ const evalV = await readEvalVerdictFromRun(activeCtx.run_id, projectRoot);
2511
+
2512
+ if (!remediation && !evalV?.status) {
2513
+ return "Run /harness-review first (no eval-verdict or review-outcome on disk).";
2514
+ }
2515
+ if (remediation !== "implementation_gap") {
2516
+ const next =
2517
+ remediation != null
2518
+ ? recommendedNextForRemediation(remediation)
2519
+ : "/harness-plan (mode: revise)";
2520
+ return `Steer applies only for implementation_gap (resolved: ${remediation ?? "unknown"}). Next: ${next}`;
2521
+ }
2522
+
2523
+ const briefPath = join(
2524
+ harnessRunsRoot(projectRoot),
2525
+ activeCtx.run_id,
2526
+ "artifacts",
2527
+ "repair-brief.yaml",
2528
+ );
2529
+ try {
2530
+ await readYamlFile(briefPath, "repair-brief");
2531
+ } catch {
2532
+ return "Run /harness-review first (artifacts/repair-brief.yaml missing).";
2533
+ }
2534
+
2535
+ const max = activeCtx.steer_max_attempts ?? steerMaxAttemptsFromEnv();
2536
+ if ((activeCtx.steer_attempt ?? 0) >= max) {
2537
+ return `Steer attempt cap reached (${max}). Use /harness-plan (mode: revise) or /harness-abort.`;
2538
+ }
2539
+ return null;
2540
+ }
2541
+
1755
2542
  export async function readExecutorHandoffFromRun(
1756
2543
  runId: string,
1757
2544
  projectRoot: string,