ultimate-pi 0.22.0 → 0.22.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-context/SKILL.md +3 -3
- package/.agents/skills/harness-debate-plan/SKILL.md +2 -2
- package/.agents/skills/harness-decisions/SKILL.md +2 -2
- package/.agents/skills/harness-eval/SKILL.md +1 -1
- package/.agents/skills/harness-git-commit/SKILL.md +1 -1
- package/.agents/skills/harness-governor/SKILL.md +5 -5
- package/.agents/skills/harness-ls-lint-setup/SKILL.md +2 -2
- package/.agents/skills/harness-orchestration/SKILL.md +4 -4
- package/.agents/skills/harness-plan/SKILL.md +2 -2
- package/.agents/skills/harness-review/SKILL.md +2 -2
- package/.agents/skills/harness-sentrux-repair/SKILL.md +1 -1
- package/.agents/skills/harness-sentrux-setup/SKILL.md +2 -2
- package/.agents/skills/harness-spec/SKILL.md +1 -1
- package/.agents/skills/harness-steer/SKILL.md +2 -2
- package/.agents/skills/posthog-analyst/SKILL.md +1 -1
- package/.agents/skills/sentrux/SKILL.md +4 -4
- package/.agents/skills/web-retrieval/SKILL.md +1 -1
- package/.pi/agents/harness/ls-lint-steward.md +3 -3
- package/.pi/agents/harness/planning/decompose.md +1 -1
- package/.pi/agents/harness/planning/execution-plan-author.md +1 -1
- package/.pi/agents/harness/planning/hypothesis-validator.md +1 -1
- package/.pi/agents/harness/planning/hypothesis.md +1 -1
- package/.pi/agents/harness/planning/plan-adversary.md +1 -1
- package/.pi/agents/harness/planning/plan-evaluator.md +2 -2
- package/.pi/agents/harness/planning/plan-synthesizer.md +2 -2
- package/.pi/agents/harness/planning/review-integrator.md +1 -1
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +5 -5
- package/.pi/agents/harness/running/executor.md +1 -1
- package/.pi/agents/harness/sentrux-repair-advisor.md +1 -1
- package/.pi/agents/harness/sentrux-steward.md +2 -2
- package/.pi/extensions/agt-kill-switch.ts +7 -1
- package/.pi/extensions/harness-plan-approval.ts +9 -1
- package/.pi/extensions/harness-run-context.ts +529 -84
- package/.pi/extensions/policy-gate.ts +15 -2
- package/.pi/harness/agents.manifest.json +16 -16
- package/.pi/harness/agents.policy.yaml +82 -3
- package/.pi/harness/specs/plan-task-clarification.schema.json +10 -1
- package/.pi/lib/agents-policy.mjs +42 -1
- package/.pi/lib/agt/build-evaluation-context.ts +3 -1
- package/.pi/lib/agt/kill-switch-state.ts +14 -0
- package/.pi/lib/agt/legacy-evaluate.ts +3 -1
- package/.pi/lib/ask-user/index.ts +2 -0
- package/.pi/lib/ask-user/merge-task-clarification.ts +5 -0
- package/.pi/lib/ask-user/policy.ts +23 -0
- package/.pi/lib/ask-user/presenters/glimpse.ts +8 -1
- package/.pi/lib/ask-user/presenters/headless.ts +15 -0
- package/.pi/lib/ask-user/presenters/select.ts +11 -2
- package/.pi/lib/ask-user/validate-core.mjs +16 -0
- package/.pi/lib/harness-artifact-gate.ts +75 -5
- package/.pi/lib/harness-repair-brief.ts +30 -4
- package/.pi/lib/harness-run-context.ts +804 -17
- package/.pi/lib/harness-schema-validate.ts +147 -38
- package/.pi/lib/harness-spawn-policy.ts +9 -0
- package/.pi/lib/harness-spawn-topology.ts +109 -7
- package/.pi/lib/harness-subagent-precheck.ts +21 -0
- package/.pi/lib/harness-subagent-submit-pipeline.ts +95 -21
- package/.pi/lib/harness-subagent-submit-register.ts +6 -1
- package/.pi/lib/harness-subagents-bridge.ts +3 -0
- package/.pi/lib/harness-yaml.ts +11 -3
- package/.pi/lib/plan-approval/create-plan.ts +2 -6
- package/.pi/lib/plan-debate-gate.ts +87 -0
- package/.pi/lib/plan-debate-lane.ts +8 -2
- package/.pi/lib/plan-human-gates.ts +322 -0
- package/.pi/prompts/harness-clear.md +25 -0
- package/.pi/prompts/harness-plan.md +11 -7
- package/.pi/prompts/harness-review.md +5 -5
- package/.pi/prompts/harness-run.md +2 -2
- package/.pi/prompts/harness-sentrux-steward.md +2 -2
- package/.pi/prompts/harness-setup.md +3 -3
- package/.pi/prompts/harness-steer.md +5 -5
- package/.pi/scripts/generate-agents-policy-yaml.mjs +73 -7
- package/.pi/scripts/harness-reconcile-run-context.mjs +62 -0
- package/.pi/scripts/harness-schema-compile-verify.mjs +29 -0
- package/.pi/scripts/harness-verify.mjs +100 -0
- package/AGENTS.md +1 -0
- package/CHANGELOG.md +13 -0
- package/README.md +4 -0
- package/package.json +9 -6
|
@@ -6,7 +6,14 @@
|
|
|
6
6
|
* - `.pi/harness/active-run.json` (cross-session pointer)
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
|
-
import {
|
|
9
|
+
import {
|
|
10
|
+
mkdir,
|
|
11
|
+
readdir,
|
|
12
|
+
readFile,
|
|
13
|
+
realpath,
|
|
14
|
+
stat,
|
|
15
|
+
writeFile,
|
|
16
|
+
} from "node:fs/promises";
|
|
10
17
|
import { isAbsolute, join, relative, resolve } from "node:path";
|
|
11
18
|
import {
|
|
12
19
|
isPlanApprovalAskUser,
|
|
@@ -14,6 +21,7 @@ import {
|
|
|
14
21
|
PLAN_CANCEL_OPTION,
|
|
15
22
|
} from "./ask-user/policy.js";
|
|
16
23
|
import { readYamlFile, writeYamlFile } from "./harness-yaml.js";
|
|
24
|
+
import { readTaskClarificationDoc } from "./plan-task-clarification.js";
|
|
17
25
|
|
|
18
26
|
export { isPlanApprovalAskUser } from "./ask-user/policy.js";
|
|
19
27
|
|
|
@@ -82,6 +90,19 @@ export interface PlanPacketLike {
|
|
|
82
90
|
execution_plan?: unknown;
|
|
83
91
|
}
|
|
84
92
|
|
|
93
|
+
export interface HarnessClearManifestItem {
|
|
94
|
+
run_id: string;
|
|
95
|
+
absolute_path: string;
|
|
96
|
+
canonical_path: string;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export interface HarnessClearManifest {
|
|
100
|
+
runs_root: string;
|
|
101
|
+
protected_run_ids: string[];
|
|
102
|
+
candidates: ReadonlyArray<HarnessClearManifestItem>;
|
|
103
|
+
skipped: ReadonlyArray<{ run_id: string; reason: string }>;
|
|
104
|
+
}
|
|
105
|
+
|
|
85
106
|
interface SessionEntryLike {
|
|
86
107
|
type?: string;
|
|
87
108
|
customType?: string;
|
|
@@ -109,12 +130,107 @@ const HARNESS_COMMANDS = new Set([
|
|
|
109
130
|
"harness-policy-status",
|
|
110
131
|
"harness-trace-last",
|
|
111
132
|
"harness-budget-status",
|
|
133
|
+
"harness-clear",
|
|
112
134
|
]);
|
|
113
135
|
|
|
114
136
|
export function harnessRunsRoot(projectRoot: string): string {
|
|
115
137
|
return join(projectRoot, ".pi", "harness", "runs");
|
|
116
138
|
}
|
|
117
139
|
|
|
140
|
+
export async function buildHarnessClearManifest(
|
|
141
|
+
projectRoot: string,
|
|
142
|
+
protectedRunIds: Iterable<string> = [],
|
|
143
|
+
): Promise<HarnessClearManifest> {
|
|
144
|
+
const runsRoot = resolve(harnessRunsRoot(projectRoot));
|
|
145
|
+
const protectedSet = new Set(
|
|
146
|
+
[...protectedRunIds]
|
|
147
|
+
.filter(
|
|
148
|
+
(id): id is string => typeof id === "string" && id.trim().length > 0,
|
|
149
|
+
)
|
|
150
|
+
.map((id) => id.trim()),
|
|
151
|
+
);
|
|
152
|
+
const protectedIds = [...protectedSet].sort();
|
|
153
|
+
let runsReal = runsRoot;
|
|
154
|
+
try {
|
|
155
|
+
runsReal = await realpath(runsRoot);
|
|
156
|
+
} catch {
|
|
157
|
+
return {
|
|
158
|
+
runs_root: runsRoot,
|
|
159
|
+
protected_run_ids: protectedIds,
|
|
160
|
+
candidates: Object.freeze([]),
|
|
161
|
+
skipped: Object.freeze([]),
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
let entries: Array<{
|
|
165
|
+
name: string;
|
|
166
|
+
isDirectory(): boolean;
|
|
167
|
+
isSymbolicLink(): boolean;
|
|
168
|
+
}>;
|
|
169
|
+
try {
|
|
170
|
+
entries = await readdir(runsRoot, {
|
|
171
|
+
withFileTypes: true,
|
|
172
|
+
encoding: "utf8",
|
|
173
|
+
});
|
|
174
|
+
} catch {
|
|
175
|
+
return {
|
|
176
|
+
runs_root: runsReal,
|
|
177
|
+
protected_run_ids: protectedIds,
|
|
178
|
+
candidates: Object.freeze([]),
|
|
179
|
+
skipped: Object.freeze([]),
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
const candidates: HarnessClearManifestItem[] = [];
|
|
183
|
+
const skipped: Array<{ run_id: string; reason: string }> = [];
|
|
184
|
+
for (const entry of entries) {
|
|
185
|
+
if (!entry.isDirectory() && !entry.isSymbolicLink()) continue;
|
|
186
|
+
const runId = entry.name;
|
|
187
|
+
if (protectedSet.has(runId)) {
|
|
188
|
+
skipped.push({ run_id: runId, reason: "protected" });
|
|
189
|
+
continue;
|
|
190
|
+
}
|
|
191
|
+
const absPath = join(runsRoot, runId);
|
|
192
|
+
let canonicalPath: string;
|
|
193
|
+
try {
|
|
194
|
+
canonicalPath = await realpath(absPath);
|
|
195
|
+
} catch {
|
|
196
|
+
skipped.push({ run_id: runId, reason: "unresolvable" });
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
const rel = relative(runsReal, canonicalPath);
|
|
200
|
+
if (!rel || rel.startsWith("..") || isAbsolute(rel)) {
|
|
201
|
+
skipped.push({ run_id: runId, reason: "out_of_root" });
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
204
|
+
if (rel !== runId) {
|
|
205
|
+
skipped.push({ run_id: runId, reason: "non_canonical_child" });
|
|
206
|
+
continue;
|
|
207
|
+
}
|
|
208
|
+
try {
|
|
209
|
+
const info = await stat(canonicalPath);
|
|
210
|
+
if (!info.isDirectory()) {
|
|
211
|
+
skipped.push({ run_id: runId, reason: "not_directory" });
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
} catch {
|
|
215
|
+
skipped.push({ run_id: runId, reason: "missing" });
|
|
216
|
+
continue;
|
|
217
|
+
}
|
|
218
|
+
candidates.push({
|
|
219
|
+
run_id: runId,
|
|
220
|
+
absolute_path: absPath,
|
|
221
|
+
canonical_path: canonicalPath,
|
|
222
|
+
});
|
|
223
|
+
}
|
|
224
|
+
candidates.sort((a, b) => a.run_id.localeCompare(b.run_id));
|
|
225
|
+
skipped.sort((a, b) => a.run_id.localeCompare(b.run_id));
|
|
226
|
+
return {
|
|
227
|
+
runs_root: runsReal,
|
|
228
|
+
protected_run_ids: protectedIds,
|
|
229
|
+
candidates: Object.freeze(candidates.map((item) => Object.freeze(item))),
|
|
230
|
+
skipped: Object.freeze(skipped.map((item) => Object.freeze(item))),
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
|
|
118
234
|
export function activeRunPointerPath(projectRoot: string): string {
|
|
119
235
|
return join(projectRoot, ".pi", "harness", "active-run.json");
|
|
120
236
|
}
|
|
@@ -158,15 +274,29 @@ const PLAN_RUN_SCOPED_ROOT_FILES = new Set([
|
|
|
158
274
|
PLAN_REVIEW_BASENAME,
|
|
159
275
|
]);
|
|
160
276
|
|
|
161
|
-
/**
|
|
277
|
+
/**
|
|
278
|
+
* Parent orchestrator artifacts writable during evaluate/adversary (ADR 0044).
|
|
279
|
+
* Keep in sync with harness-review.md / harness-steer.md parent write_harness_yaml paths.
|
|
280
|
+
*/
|
|
162
281
|
export const EVALUATE_PHASE_ORCHESTRATOR_ARTIFACTS = new Set([
|
|
163
282
|
"benchmark-log.yaml",
|
|
164
283
|
"review-outcome.yaml",
|
|
165
284
|
"repair-brief.yaml",
|
|
166
285
|
"steer-state.yaml",
|
|
167
286
|
"eval-benchmark.yaml",
|
|
287
|
+
"sentrux-signal.yaml",
|
|
288
|
+
"ls-lint-signal.yaml",
|
|
289
|
+
"sentrux-repair-plan.yaml",
|
|
168
290
|
]);
|
|
169
291
|
|
|
292
|
+
/** Run-relative path like `artifacts/benchmark-log.yaml` (no run_id prefix). */
|
|
293
|
+
export function isEvaluatePhaseOrchestratorArtifactRel(rel: string): boolean {
|
|
294
|
+
const norm = rel.replace(/\\/g, "/");
|
|
295
|
+
const parts = norm.split("/");
|
|
296
|
+
if (parts.length !== 2 || parts[0] !== "artifacts") return false;
|
|
297
|
+
return EVALUATE_PHASE_ORCHESTRATOR_ARTIFACTS.has(parts[1]);
|
|
298
|
+
}
|
|
299
|
+
|
|
170
300
|
export const DEFAULT_STEER_MAX_ATTEMPTS = 3;
|
|
171
301
|
|
|
172
302
|
export function steerMaxAttemptsFromEnv(): number {
|
|
@@ -215,6 +345,7 @@ export const HARNESS_COMMAND_PHASE: Record<string, HarnessPhase> = {
|
|
|
215
345
|
"harness-use-run": "plan",
|
|
216
346
|
"harness-policy-status": "merge",
|
|
217
347
|
"harness-budget-status": "plan",
|
|
348
|
+
"harness-clear": "plan",
|
|
218
349
|
"harness-setup": "execute",
|
|
219
350
|
};
|
|
220
351
|
|
|
@@ -235,6 +366,66 @@ export function normalizeHarnessPath(
|
|
|
235
366
|
return resolve(projectRoot, trimmed);
|
|
236
367
|
}
|
|
237
368
|
|
|
369
|
+
/** Run-scoped artifact path without `.pi/harness/runs/<run_id>/` prefix (agent-friendly). */
|
|
370
|
+
export function isBareHarnessRunArtifactPath(rel: string): boolean {
|
|
371
|
+
const norm = rel.replace(/\\/g, "/").replace(/^\.\//, "");
|
|
372
|
+
if (!norm || norm.startsWith("..") || isAbsolute(norm)) return false;
|
|
373
|
+
if (norm.startsWith(".pi/harness/runs/")) return false;
|
|
374
|
+
const parts = norm.split("/");
|
|
375
|
+
if (parts.length === 1 && PLAN_RUN_SCOPED_ROOT_FILES.has(parts[0])) {
|
|
376
|
+
return true;
|
|
377
|
+
}
|
|
378
|
+
if (parts.length === 2 && parts[0] === "artifacts") {
|
|
379
|
+
const file = parts[1];
|
|
380
|
+
return file.endsWith(".yaml") || file.endsWith(".yml");
|
|
381
|
+
}
|
|
382
|
+
if (
|
|
383
|
+
parts.length === 3 &&
|
|
384
|
+
parts[0] === "artifacts" &&
|
|
385
|
+
parts[1] === "context-bundles"
|
|
386
|
+
) {
|
|
387
|
+
const file = parts[2];
|
|
388
|
+
return file.endsWith(".yaml") || file.endsWith(".yml");
|
|
389
|
+
}
|
|
390
|
+
return false;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
/**
|
|
394
|
+
* Resolve a harness write path to an absolute file and run-relative gate path.
|
|
395
|
+
* Accepts `artifacts/foo.yaml`, `research-brief.yaml`, full `.pi/harness/runs/<id>/…`, or `<id>/artifacts/…`.
|
|
396
|
+
*/
|
|
397
|
+
export function resolveHarnessRunWriteTarget(
|
|
398
|
+
pathArg: string,
|
|
399
|
+
runCtx: HarnessRunContext,
|
|
400
|
+
projectRoot: string,
|
|
401
|
+
): { absPath: string; relUnderRun: string } | null {
|
|
402
|
+
const trimmed = pathArg.trim().replace(/\\/g, "/");
|
|
403
|
+
if (!trimmed || !runCtx.run_id) return null;
|
|
404
|
+
|
|
405
|
+
const runPrefix = `.pi/harness/runs/${runCtx.run_id}/`;
|
|
406
|
+
let relUnderRun: string | null = null;
|
|
407
|
+
|
|
408
|
+
if (trimmed.startsWith(runPrefix)) {
|
|
409
|
+
relUnderRun = trimmed.slice(runPrefix.length);
|
|
410
|
+
} else if (trimmed.startsWith(`${runCtx.run_id}/`)) {
|
|
411
|
+
relUnderRun = trimmed.slice(`${runCtx.run_id}/`.length);
|
|
412
|
+
} else if (isBareHarnessRunArtifactPath(trimmed)) {
|
|
413
|
+
relUnderRun = trimmed.replace(/^\.\//, "");
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
if (!relUnderRun) return null;
|
|
417
|
+
|
|
418
|
+
const scopedCheck = `${runCtx.run_id}/${relUnderRun}`;
|
|
419
|
+
if (!isPlanRunScopedRelativePath(scopedCheck)) return null;
|
|
420
|
+
|
|
421
|
+
const absPath = join(
|
|
422
|
+
harnessRunsRoot(projectRoot),
|
|
423
|
+
runCtx.run_id,
|
|
424
|
+
relUnderRun,
|
|
425
|
+
);
|
|
426
|
+
return { absPath, relUnderRun };
|
|
427
|
+
}
|
|
428
|
+
|
|
238
429
|
export function isCanonicalPlanPacketPath(
|
|
239
430
|
absPath: string,
|
|
240
431
|
projectRoot: string,
|
|
@@ -276,6 +467,7 @@ export function isPlanRunScopedRelativePath(rel: string): boolean {
|
|
|
276
467
|
return false;
|
|
277
468
|
}
|
|
278
469
|
|
|
470
|
+
/** Scoped path under `.pi/harness/runs/<run_id>/` (includes run_id prefix). */
|
|
279
471
|
export function isEvaluatePhaseOrchestratorArtifact(rel: string): boolean {
|
|
280
472
|
if (rel.startsWith("..") || isAbsolute(rel)) return false;
|
|
281
473
|
const parts = rel.split(/[/\\]/);
|
|
@@ -283,6 +475,27 @@ export function isEvaluatePhaseOrchestratorArtifact(rel: string): boolean {
|
|
|
283
475
|
return EVALUATE_PHASE_ORCHESTRATOR_ARTIFACTS.has(parts[2]);
|
|
284
476
|
}
|
|
285
477
|
|
|
478
|
+
/** Strip `<run_id>/` from a path relative to `.pi/harness/runs/`. */
|
|
479
|
+
export function stripRunIdFromHarnessScopedRelative(
|
|
480
|
+
rel: string,
|
|
481
|
+
runId: string,
|
|
482
|
+
): string {
|
|
483
|
+
const norm = rel.replace(/\\/g, "/");
|
|
484
|
+
const prefix = `${runId}/`;
|
|
485
|
+
return norm.startsWith(prefix) ? norm.slice(prefix.length) : norm;
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
/** Path under the run directory (e.g. `artifacts/foo.yaml`), for gates and artifact keys. */
|
|
489
|
+
export async function relPathUnderActiveRun(
|
|
490
|
+
absPath: string,
|
|
491
|
+
runCtx: HarnessRunContext,
|
|
492
|
+
projectRoot: string,
|
|
493
|
+
): Promise<string | null> {
|
|
494
|
+
const rel = await planRunScopedRelative(absPath, runCtx, projectRoot);
|
|
495
|
+
if (!rel) return null;
|
|
496
|
+
return stripRunIdFromHarnessScopedRelative(rel, runCtx.run_id);
|
|
497
|
+
}
|
|
498
|
+
|
|
286
499
|
async function planRunScopedRelative(
|
|
287
500
|
absPath: string,
|
|
288
501
|
runCtx: HarnessRunContext,
|
|
@@ -508,9 +721,6 @@ export function hasPlanUserApproval(
|
|
|
508
721
|
entries: unknown[],
|
|
509
722
|
opts?: { planId?: string | null; sincePlanCommand?: boolean },
|
|
510
723
|
): boolean {
|
|
511
|
-
if (process.env.HARNESS_PLAN_NONINTERACTIVE === "1") {
|
|
512
|
-
return true;
|
|
513
|
-
}
|
|
514
724
|
const since = opts?.sincePlanCommand
|
|
515
725
|
? Math.max(0, indexOfLastPlanCommand(entries))
|
|
516
726
|
: 0;
|
|
@@ -529,6 +739,10 @@ export function isHarnessAutoSession(entries: unknown[]): boolean {
|
|
|
529
739
|
const entry = entries[i] as SessionEntryLike & {
|
|
530
740
|
message?: { role?: string; content?: string };
|
|
531
741
|
};
|
|
742
|
+
if (entry.type === "custom" && entry.customType === "harness-turn") {
|
|
743
|
+
const cmd = (entry.data as { command?: string })?.command;
|
|
744
|
+
if (cmd === "harness-auto") return true;
|
|
745
|
+
}
|
|
532
746
|
if (entry.type !== "message" || entry.message?.role !== "user") continue;
|
|
533
747
|
const text =
|
|
534
748
|
typeof entry.message.content === "string"
|
|
@@ -554,13 +768,7 @@ export async function isPlanPhaseAllowedMutation(
|
|
|
554
768
|
},
|
|
555
769
|
): Promise<PlanPhaseMutationDecision> {
|
|
556
770
|
if (!MUTATING_FILE_TOOLS.has(toolName)) {
|
|
557
|
-
|
|
558
|
-
return { allowed: true };
|
|
559
|
-
}
|
|
560
|
-
return {
|
|
561
|
-
allowed: false,
|
|
562
|
-
reason: `policy-gate: ${toolName} blocked in phase '${phase}'.`,
|
|
563
|
-
};
|
|
771
|
+
return { allowed: true };
|
|
564
772
|
}
|
|
565
773
|
|
|
566
774
|
if (
|
|
@@ -607,7 +815,13 @@ export async function isPlanPhaseAllowedMutation(
|
|
|
607
815
|
}
|
|
608
816
|
if (phase === "evaluate" || phase === "adversary") {
|
|
609
817
|
const rel = await planRunScopedRelative(target, runCtx, projectRoot);
|
|
610
|
-
|
|
818
|
+
const relForGate = rel
|
|
819
|
+
? stripRunIdFromHarnessScopedRelative(rel, runCtx.run_id)
|
|
820
|
+
: null;
|
|
821
|
+
if (
|
|
822
|
+
(rel && isEvaluatePhaseOrchestratorArtifact(rel)) ||
|
|
823
|
+
(relForGate && isEvaluatePhaseOrchestratorArtifactRel(relForGate))
|
|
824
|
+
) {
|
|
611
825
|
return { allowed: true, isScopedPlanWrite: true };
|
|
612
826
|
}
|
|
613
827
|
}
|
|
@@ -995,6 +1209,137 @@ export async function readPlanPacketFromPath(
|
|
|
995
1209
|
}
|
|
996
1210
|
}
|
|
997
1211
|
|
|
1212
|
+
/**
|
|
1213
|
+
* When plan-packet.yaml is missing (revision reset or pre-packet phase), derive
|
|
1214
|
+
* last_outcome from task-clarification instead of treating the run as invalid.
|
|
1215
|
+
*/
|
|
1216
|
+
const PLAN_REVIEW_COMMITTED_RE = /\*\*Status:\*\*\s*committed/i;
|
|
1217
|
+
|
|
1218
|
+
/** True when plan-review.md on disk shows a committed plan (post create_plan). */
|
|
1219
|
+
export async function isPlanCommittedOnDisk(
|
|
1220
|
+
projectRoot: string,
|
|
1221
|
+
runId: string,
|
|
1222
|
+
): Promise<boolean> {
|
|
1223
|
+
try {
|
|
1224
|
+
const raw = await readFile(
|
|
1225
|
+
canonicalPlanReviewPath(runId, projectRoot),
|
|
1226
|
+
"utf-8",
|
|
1227
|
+
);
|
|
1228
|
+
return PLAN_REVIEW_COMMITTED_RE.test(raw);
|
|
1229
|
+
} catch {
|
|
1230
|
+
return false;
|
|
1231
|
+
}
|
|
1232
|
+
}
|
|
1233
|
+
|
|
1234
|
+
/**
|
|
1235
|
+
* Align plan_ready / last_outcome with on-disk plan packet + plan-review.md
|
|
1236
|
+
* (survives -p sessions where approve_plan is not in the transcript).
|
|
1237
|
+
*/
|
|
1238
|
+
export async function syncPlanReadyFromDisk(
|
|
1239
|
+
projectRoot: string,
|
|
1240
|
+
ctx: HarnessRunContext,
|
|
1241
|
+
entries?: unknown[],
|
|
1242
|
+
): Promise<HarnessRunContext> {
|
|
1243
|
+
const planPath =
|
|
1244
|
+
ctx.plan_packet_path ?? canonicalPlanPath(ctx.run_id, projectRoot);
|
|
1245
|
+
const packet = await readPlanPacketFromPath(planPath);
|
|
1246
|
+
if (!packet) {
|
|
1247
|
+
return syncPlanLastOutcomeFromTaskClarification(projectRoot, ctx);
|
|
1248
|
+
}
|
|
1249
|
+
const validation = validatePlanPacket(packet);
|
|
1250
|
+
if (!validation.valid) {
|
|
1251
|
+
const synced = await syncPlanLastOutcomeFromTaskClarification(projectRoot, {
|
|
1252
|
+
...ctx,
|
|
1253
|
+
plan_packet_path: planPath,
|
|
1254
|
+
});
|
|
1255
|
+
return {
|
|
1256
|
+
...synced,
|
|
1257
|
+
plan_ready: false,
|
|
1258
|
+
last_outcome: "needs_clarification",
|
|
1259
|
+
};
|
|
1260
|
+
}
|
|
1261
|
+
|
|
1262
|
+
const committed = await isPlanCommittedOnDisk(projectRoot, ctx.run_id);
|
|
1263
|
+
const approved =
|
|
1264
|
+
committed ||
|
|
1265
|
+
(entries
|
|
1266
|
+
? hasPlanUserApproval(entries, {
|
|
1267
|
+
sincePlanCommand: true,
|
|
1268
|
+
planId: packet.plan_id ?? null,
|
|
1269
|
+
})
|
|
1270
|
+
: false);
|
|
1271
|
+
|
|
1272
|
+
const updated: HarnessRunContext = {
|
|
1273
|
+
...ctx,
|
|
1274
|
+
plan_packet_path: planPath,
|
|
1275
|
+
plan_id: packet.plan_id ?? ctx.plan_id,
|
|
1276
|
+
updated_at: nowIso(),
|
|
1277
|
+
};
|
|
1278
|
+
|
|
1279
|
+
if (approved) {
|
|
1280
|
+
updated.plan_ready = true;
|
|
1281
|
+
const preservePostPlanProgress =
|
|
1282
|
+
ctx.last_completed_step === "execute" ||
|
|
1283
|
+
ctx.last_completed_step === "steer" ||
|
|
1284
|
+
ctx.last_completed_step === "review" ||
|
|
1285
|
+
ctx.last_completed_step === "adversary";
|
|
1286
|
+
if (!preservePostPlanProgress) {
|
|
1287
|
+
updated.last_completed_step = "plan";
|
|
1288
|
+
updated.last_outcome = "ready";
|
|
1289
|
+
updated.next_recommended_command = "/harness-run";
|
|
1290
|
+
if (
|
|
1291
|
+
updated.phase !== "execute" &&
|
|
1292
|
+
updated.phase !== "evaluate" &&
|
|
1293
|
+
updated.phase !== "adversary"
|
|
1294
|
+
) {
|
|
1295
|
+
updated.phase = "plan";
|
|
1296
|
+
}
|
|
1297
|
+
}
|
|
1298
|
+
return updated;
|
|
1299
|
+
}
|
|
1300
|
+
|
|
1301
|
+
updated.plan_ready = false;
|
|
1302
|
+
if (updated.last_outcome !== "needs_clarification") {
|
|
1303
|
+
updated.last_outcome = "pending_approval";
|
|
1304
|
+
}
|
|
1305
|
+
updated.next_recommended_command = nextStepAfterOutcome({
|
|
1306
|
+
phase: updated.phase,
|
|
1307
|
+
planStatus: null,
|
|
1308
|
+
lastOutcome: updated.last_outcome,
|
|
1309
|
+
lastCompletedStep: updated.last_completed_step,
|
|
1310
|
+
});
|
|
1311
|
+
return updated;
|
|
1312
|
+
}
|
|
1313
|
+
|
|
1314
|
+
export async function syncPlanLastOutcomeFromTaskClarification(
|
|
1315
|
+
projectRoot: string,
|
|
1316
|
+
ctx: HarnessRunContext,
|
|
1317
|
+
): Promise<HarnessRunContext> {
|
|
1318
|
+
const runDir = join(harnessRunsRoot(projectRoot), ctx.run_id);
|
|
1319
|
+
const doc = await readTaskClarificationDoc(runDir);
|
|
1320
|
+
if (!doc) return ctx;
|
|
1321
|
+
const status = String(doc.status ?? "").toLowerCase();
|
|
1322
|
+
const updated: HarnessRunContext = { ...ctx, updated_at: nowIso() };
|
|
1323
|
+
if (status === "ready") {
|
|
1324
|
+
if (updated.last_outcome === "needs_clarification") {
|
|
1325
|
+
updated.last_outcome = null;
|
|
1326
|
+
}
|
|
1327
|
+
} else if (
|
|
1328
|
+
status === "needs_clarification" ||
|
|
1329
|
+
status === "needs_user" ||
|
|
1330
|
+
status === "draft"
|
|
1331
|
+
) {
|
|
1332
|
+
updated.last_outcome = "needs_clarification";
|
|
1333
|
+
}
|
|
1334
|
+
updated.next_recommended_command = nextStepAfterOutcome({
|
|
1335
|
+
phase: updated.phase,
|
|
1336
|
+
planStatus: status === "ready" ? null : status,
|
|
1337
|
+
lastOutcome: updated.last_outcome,
|
|
1338
|
+
lastCompletedStep: updated.last_completed_step,
|
|
1339
|
+
});
|
|
1340
|
+
return updated;
|
|
1341
|
+
}
|
|
1342
|
+
|
|
998
1343
|
export function validatePlanPacket(packet: PlanPacketLike | null): {
|
|
999
1344
|
valid: boolean;
|
|
1000
1345
|
errors: string[];
|
|
@@ -1361,7 +1706,8 @@ export function resolveArgsForCommand(
|
|
|
1361
1706
|
ctx: HarnessRunContext | null,
|
|
1362
1707
|
): { runId: string | null; planPath: string | null; overrideRun: boolean } {
|
|
1363
1708
|
let runId = ctx?.run_id ?? null;
|
|
1364
|
-
|
|
1709
|
+
/** Only honor explicit `--plan`; never inherit stale session plan paths onto fresh runs. */
|
|
1710
|
+
let planPath: string | null = null;
|
|
1365
1711
|
let overrideRun = false;
|
|
1366
1712
|
|
|
1367
1713
|
const explicitRun = parseArgFlag(args, "--run");
|
|
@@ -1417,6 +1763,45 @@ export function getRunIdFromSession(
|
|
|
1417
1763
|
return null;
|
|
1418
1764
|
}
|
|
1419
1765
|
|
|
1766
|
+
export function harnessAutoTasksDiffer(
|
|
1767
|
+
ctx: HarnessRunContext,
|
|
1768
|
+
newTask: string,
|
|
1769
|
+
): boolean {
|
|
1770
|
+
const prior = (ctx.task_summary ?? "").trim().toLowerCase();
|
|
1771
|
+
const next = newTask.trim().toLowerCase();
|
|
1772
|
+
return prior.length > 0 && next.length > 0 && prior !== next;
|
|
1773
|
+
}
|
|
1774
|
+
|
|
1775
|
+
/** Full auto pipeline needs a clean run once execute/review has started. */
|
|
1776
|
+
export function shouldReuseHarnessRunIdForAuto(
|
|
1777
|
+
ctx: HarnessRunContext,
|
|
1778
|
+
): boolean {
|
|
1779
|
+
if (ctx.status === "aborted") return true;
|
|
1780
|
+
const step = ctx.last_completed_step;
|
|
1781
|
+
if (!step || step === "plan") return true;
|
|
1782
|
+
return false;
|
|
1783
|
+
}
|
|
1784
|
+
|
|
1785
|
+
/** Reset in-run state when restarting /harness-auto on the same run directory. */
|
|
1786
|
+
export function resetRunContextForHarnessAuto(
|
|
1787
|
+
ctx: HarnessRunContext,
|
|
1788
|
+
): HarnessRunContext {
|
|
1789
|
+
return {
|
|
1790
|
+
...ctx,
|
|
1791
|
+
phase: "plan",
|
|
1792
|
+
plan_ready: false,
|
|
1793
|
+
plan_id: null,
|
|
1794
|
+
plan_packet_path: canonicalPlanPath(ctx.run_id, ctx.project_root),
|
|
1795
|
+
status: "active",
|
|
1796
|
+
last_completed_step: null,
|
|
1797
|
+
last_outcome: null,
|
|
1798
|
+
next_recommended_command: null,
|
|
1799
|
+
steer_attempt: 0,
|
|
1800
|
+
steer_approved: false,
|
|
1801
|
+
updated_at: nowIso(),
|
|
1802
|
+
};
|
|
1803
|
+
}
|
|
1804
|
+
|
|
1420
1805
|
export function shouldReuseHarnessRunId(
|
|
1421
1806
|
prompt: string,
|
|
1422
1807
|
ctx: HarnessRunContext | null,
|
|
@@ -1425,7 +1810,13 @@ export function shouldReuseHarnessRunId(
|
|
|
1425
1810
|
if (!command) return false;
|
|
1426
1811
|
if (command === "harness-new-run") return false;
|
|
1427
1812
|
if (!ctx) return false;
|
|
1428
|
-
if (command === "harness-
|
|
1813
|
+
if (command === "harness-auto") {
|
|
1814
|
+
return (
|
|
1815
|
+
(ctx.status === "active" || ctx.status === "aborted") &&
|
|
1816
|
+
shouldReuseHarnessRunIdForAuto(ctx)
|
|
1817
|
+
);
|
|
1818
|
+
}
|
|
1819
|
+
if (command === "harness-plan") {
|
|
1429
1820
|
return ctx.status === "active" || ctx.status === "aborted";
|
|
1430
1821
|
}
|
|
1431
1822
|
if (ctx.status === "active") return true;
|
|
@@ -1647,6 +2038,179 @@ export async function readReviewOutcomeFromRun(
|
|
|
1647
2038
|
}
|
|
1648
2039
|
}
|
|
1649
2040
|
|
|
2041
|
+
/** Infer remediation when parent skipped Phase 6 but eval-verdict exists on disk. */
|
|
2042
|
+
export function remediationClassFromEvalVerdict(
|
|
2043
|
+
verdict: EvalVerdictDisk | null,
|
|
2044
|
+
): RemediationClass | null {
|
|
2045
|
+
if (!verdict) return null;
|
|
2046
|
+
const status = (verdict.status ?? "").toLowerCase();
|
|
2047
|
+
if (status === "pass") return "pass";
|
|
2048
|
+
const action = (verdict.recommended_action ?? "").toLowerCase();
|
|
2049
|
+
if (
|
|
2050
|
+
action === "replan" ||
|
|
2051
|
+
action.includes("revise") ||
|
|
2052
|
+
action.includes("plan")
|
|
2053
|
+
) {
|
|
2054
|
+
return "plan_gap";
|
|
2055
|
+
}
|
|
2056
|
+
if (action === "rollback" || action.includes("rollback")) {
|
|
2057
|
+
return "rollback";
|
|
2058
|
+
}
|
|
2059
|
+
if (
|
|
2060
|
+
action === "steer" ||
|
|
2061
|
+
action === "repair" ||
|
|
2062
|
+
action.includes("implement")
|
|
2063
|
+
) {
|
|
2064
|
+
return "implementation_gap";
|
|
2065
|
+
}
|
|
2066
|
+
const failed = (verdict as EvalVerdictDisk & { failed_checks?: string[] })
|
|
2067
|
+
.failed_checks;
|
|
2068
|
+
const joined = Array.isArray(failed) ? failed.join(" ").toLowerCase() : "";
|
|
2069
|
+
if (
|
|
2070
|
+
joined.includes("scope_minimization") ||
|
|
2071
|
+
joined.includes("scope_drift") ||
|
|
2072
|
+
joined.includes("replan")
|
|
2073
|
+
) {
|
|
2074
|
+
return "plan_gap";
|
|
2075
|
+
}
|
|
2076
|
+
if (status === "fail") return "inconclusive";
|
|
2077
|
+
return null;
|
|
2078
|
+
}
|
|
2079
|
+
|
|
2080
|
+
export function recommendedNextForRemediation(
|
|
2081
|
+
remediation: RemediationClass,
|
|
2082
|
+
): string {
|
|
2083
|
+
switch (remediation) {
|
|
2084
|
+
case "pass":
|
|
2085
|
+
return "/harness-policy-status";
|
|
2086
|
+
case "implementation_gap":
|
|
2087
|
+
return "/harness-steer";
|
|
2088
|
+
case "plan_gap":
|
|
2089
|
+
return "/harness-plan (mode: revise)";
|
|
2090
|
+
case "rollback":
|
|
2091
|
+
return "/harness-incident";
|
|
2092
|
+
default:
|
|
2093
|
+
return "/harness-review";
|
|
2094
|
+
}
|
|
2095
|
+
}
|
|
2096
|
+
|
|
2097
|
+
export async function resolveRemediationClassForRun(
|
|
2098
|
+
runId: string,
|
|
2099
|
+
projectRoot: string,
|
|
2100
|
+
): Promise<RemediationClass | null> {
|
|
2101
|
+
const review = await readReviewOutcomeFromRun(runId, projectRoot);
|
|
2102
|
+
if (review?.remediation_class) {
|
|
2103
|
+
return review.remediation_class as RemediationClass;
|
|
2104
|
+
}
|
|
2105
|
+
const evalV = await readEvalVerdictFromRun(runId, projectRoot);
|
|
2106
|
+
return remediationClassFromEvalVerdict(evalV);
|
|
2107
|
+
}
|
|
2108
|
+
|
|
2109
|
+
export async function ensureReviewOutcomeFromEval(
|
|
2110
|
+
runId: string,
|
|
2111
|
+
projectRoot: string,
|
|
2112
|
+
): Promise<ReviewOutcomeLike | null> {
|
|
2113
|
+
const existing = await readReviewOutcomeFromRun(runId, projectRoot);
|
|
2114
|
+
if (existing?.remediation_class) return existing;
|
|
2115
|
+
|
|
2116
|
+
const evalV = await readEvalVerdictFromRun(runId, projectRoot);
|
|
2117
|
+
if (!evalV?.status) return null;
|
|
2118
|
+
|
|
2119
|
+
const remediation = remediationClassFromEvalVerdict(evalV) ?? "inconclusive";
|
|
2120
|
+
const evalStatus = (evalV.status ?? "").toLowerCase();
|
|
2121
|
+
const status =
|
|
2122
|
+
evalStatus === "pass"
|
|
2123
|
+
? "pass"
|
|
2124
|
+
: evalStatus === "fail"
|
|
2125
|
+
? "fail"
|
|
2126
|
+
: "inconclusive";
|
|
2127
|
+
|
|
2128
|
+
const outcome: ReviewOutcomeLike & {
|
|
2129
|
+
run_id: string;
|
|
2130
|
+
recommended_next: string;
|
|
2131
|
+
source_artifacts: Record<string, string>;
|
|
2132
|
+
review_tier: string;
|
|
2133
|
+
} = {
|
|
2134
|
+
schema_version: "1.0.0",
|
|
2135
|
+
run_id: runId,
|
|
2136
|
+
status,
|
|
2137
|
+
remediation_class: remediation,
|
|
2138
|
+
recommended_next: recommendedNextForRemediation(remediation),
|
|
2139
|
+
source_artifacts: { "eval-verdict": "artifacts/eval-verdict.yaml" },
|
|
2140
|
+
review_tier: "synthesized",
|
|
2141
|
+
};
|
|
2142
|
+
|
|
2143
|
+
const outPath = join(
|
|
2144
|
+
harnessRunsRoot(projectRoot),
|
|
2145
|
+
runId,
|
|
2146
|
+
"artifacts",
|
|
2147
|
+
"review-outcome.yaml",
|
|
2148
|
+
);
|
|
2149
|
+
await writeYamlFile(outPath, outcome);
|
|
2150
|
+
|
|
2151
|
+
const { ensureRepairBriefOnDisk } = await import("./harness-repair-brief.js");
|
|
2152
|
+
await ensureRepairBriefOnDisk({
|
|
2153
|
+
runId,
|
|
2154
|
+
projectRoot,
|
|
2155
|
+
steerAttempt: 0,
|
|
2156
|
+
});
|
|
2157
|
+
|
|
2158
|
+
return outcome;
|
|
2159
|
+
}
|
|
2160
|
+
|
|
2161
|
+
/** Align next_recommended_command with on-disk review/eval routing after /harness-review. */
|
|
2162
|
+
export async function reconcileReviewRouting(
|
|
2163
|
+
projectRoot: string,
|
|
2164
|
+
ctx: HarnessRunContext,
|
|
2165
|
+
): Promise<HarnessRunContext> {
|
|
2166
|
+
const evalV = await readEvalVerdictFromRun(ctx.run_id, projectRoot);
|
|
2167
|
+
const reviewStep =
|
|
2168
|
+
ctx.last_completed_step === "review" ||
|
|
2169
|
+
ctx.last_completed_step === "adversary" ||
|
|
2170
|
+
Boolean(evalV?.status);
|
|
2171
|
+
if (!reviewStep) return ctx;
|
|
2172
|
+
|
|
2173
|
+
let working = { ...ctx };
|
|
2174
|
+
if (
|
|
2175
|
+
evalV?.status &&
|
|
2176
|
+
working.last_completed_step === "execute" &&
|
|
2177
|
+
String(working.last_outcome ?? "").toLowerCase() === "completed"
|
|
2178
|
+
) {
|
|
2179
|
+
working = {
|
|
2180
|
+
...working,
|
|
2181
|
+
last_completed_step: "review",
|
|
2182
|
+
last_outcome: evalV.status,
|
|
2183
|
+
phase: "evaluate",
|
|
2184
|
+
};
|
|
2185
|
+
}
|
|
2186
|
+
|
|
2187
|
+
await ensureReviewOutcomeFromEval(working.run_id, projectRoot);
|
|
2188
|
+
|
|
2189
|
+
const remediation = await resolveRemediationClassForRun(
|
|
2190
|
+
working.run_id,
|
|
2191
|
+
projectRoot,
|
|
2192
|
+
);
|
|
2193
|
+
if (!remediation) return working;
|
|
2194
|
+
|
|
2195
|
+
const next = nextStepAfterOutcome({
|
|
2196
|
+
phase: working.phase,
|
|
2197
|
+
lastCompletedStep: working.last_completed_step,
|
|
2198
|
+
lastOutcome: working.last_outcome,
|
|
2199
|
+
evalStatus: working.last_outcome,
|
|
2200
|
+
remediationClass: remediation,
|
|
2201
|
+
steerAttempt: working.steer_attempt ?? 0,
|
|
2202
|
+
steerMaxAttempts: working.steer_max_attempts ?? steerMaxAttemptsFromEnv(),
|
|
2203
|
+
reviewComplete: true,
|
|
2204
|
+
aborted: working.status === "aborted",
|
|
2205
|
+
});
|
|
2206
|
+
|
|
2207
|
+
return {
|
|
2208
|
+
...working,
|
|
2209
|
+
next_recommended_command: next,
|
|
2210
|
+
updated_at: nowIso(),
|
|
2211
|
+
};
|
|
2212
|
+
}
|
|
2213
|
+
|
|
1650
2214
|
function nextStepForEvaluateLikePhase(input: {
|
|
1651
2215
|
adversaryComplete?: boolean;
|
|
1652
2216
|
remediation: string;
|
|
@@ -1668,7 +2232,16 @@ function nextStepForEvaluateLikePhase(input: {
|
|
|
1668
2232
|
return "/harness-plan (mode: revise) or /harness-abort";
|
|
1669
2233
|
}
|
|
1670
2234
|
if (input.evalStatus === "fail") {
|
|
1671
|
-
if (input.
|
|
2235
|
+
if (input.remediation === "plan_gap") {
|
|
2236
|
+
return "/harness-plan (mode: revise)";
|
|
2237
|
+
}
|
|
2238
|
+
if (
|
|
2239
|
+
input.remediation === "implementation_gap" ||
|
|
2240
|
+
input.remediation === "inconclusive"
|
|
2241
|
+
) {
|
|
2242
|
+
if (input.steerAttempt < input.steerMax) return "/harness-steer";
|
|
2243
|
+
return "/harness-plan (mode: revise) or /harness-abort";
|
|
2244
|
+
}
|
|
1672
2245
|
return "/harness-plan (mode: revise) or /harness-incident";
|
|
1673
2246
|
}
|
|
1674
2247
|
if (input.adversaryComplete) return "/harness-policy-status";
|
|
@@ -1698,9 +2271,13 @@ export function nextStepAfterOutcome(input: {
|
|
|
1698
2271
|
return "Reply with answers or run /harness-plan with updates";
|
|
1699
2272
|
}
|
|
1700
2273
|
|
|
2274
|
+
const lastOutcome = (input.lastOutcome ?? "").toLowerCase();
|
|
2275
|
+
if (input.phase === "plan" && lastOutcome === "pending_approval") {
|
|
2276
|
+
return "Continue /harness-plan: finish Review Gate (harness_debate_round_status → debate lanes → harness_debate_consensus), then approve_plan";
|
|
2277
|
+
}
|
|
2278
|
+
|
|
1701
2279
|
const lastStep = (input.lastCompletedStep ?? "").toLowerCase();
|
|
1702
2280
|
const exec = (input.executionStatus ?? "").toLowerCase();
|
|
1703
|
-
const lastOutcome = (input.lastOutcome ?? "").toLowerCase();
|
|
1704
2281
|
const evalSt = (input.evalStatus ?? "").toLowerCase();
|
|
1705
2282
|
const remediation = (input.remediationClass ?? "").toLowerCase();
|
|
1706
2283
|
const steerAttempt = input.steerAttempt ?? 0;
|
|
@@ -1752,6 +2329,216 @@ export function nextStepAfterOutcome(input: {
|
|
|
1752
2329
|
}
|
|
1753
2330
|
|
|
1754
2331
|
/** Read executor handoff artifact written by harness/running/executor submit pipeline. */
|
|
2332
|
+
/** After /harness-run agent turn — do not mark completed without executor evidence. */
|
|
2333
|
+
export function resolveHarnessRunPostAgentState(
|
|
2334
|
+
execStatus: string | null,
|
|
2335
|
+
planReady: boolean,
|
|
2336
|
+
): Pick<
|
|
2337
|
+
HarnessRunContext,
|
|
2338
|
+
"last_completed_step" | "last_outcome" | "phase" | "next_recommended_command"
|
|
2339
|
+
> {
|
|
2340
|
+
if (!execStatus) {
|
|
2341
|
+
return {
|
|
2342
|
+
last_completed_step: "plan",
|
|
2343
|
+
last_outcome: planReady ? "ready" : null,
|
|
2344
|
+
phase: "plan",
|
|
2345
|
+
next_recommended_command: "/harness-run",
|
|
2346
|
+
};
|
|
2347
|
+
}
|
|
2348
|
+
const normalized = execStatus.toLowerCase();
|
|
2349
|
+
const completed = normalized === "completed";
|
|
2350
|
+
return {
|
|
2351
|
+
last_completed_step: "execute",
|
|
2352
|
+
last_outcome: execStatus,
|
|
2353
|
+
phase: completed ? "evaluate" : "execute",
|
|
2354
|
+
next_recommended_command: completed ? "/harness-review" : "/harness-run",
|
|
2355
|
+
};
|
|
2356
|
+
}
|
|
2357
|
+
|
|
2358
|
+
function executeCompletionMatchesHandoff(
|
|
2359
|
+
ctx: HarnessRunContext,
|
|
2360
|
+
executionStatus: string,
|
|
2361
|
+
): boolean {
|
|
2362
|
+
if (ctx.last_completed_step !== "execute") return false;
|
|
2363
|
+
const norm = executionStatus.toLowerCase();
|
|
2364
|
+
const outcome = String(ctx.last_outcome ?? "").toLowerCase();
|
|
2365
|
+
if (norm === "completed") return outcome === "completed";
|
|
2366
|
+
return outcome === norm;
|
|
2367
|
+
}
|
|
2368
|
+
|
|
2369
|
+
/** Sync plan_ready + executor handoff vs session/disk run-context (bidirectional). */
|
|
2370
|
+
export async function reconcileStaleExecuteCompletion(
|
|
2371
|
+
projectRoot: string,
|
|
2372
|
+
ctx: HarnessRunContext,
|
|
2373
|
+
entries?: unknown[],
|
|
2374
|
+
): Promise<HarnessRunContext> {
|
|
2375
|
+
let synced = await syncPlanReadyFromDisk(projectRoot, ctx, entries);
|
|
2376
|
+
|
|
2377
|
+
const falselyCompleted =
|
|
2378
|
+
synced.last_completed_step === "execute" &&
|
|
2379
|
+
String(synced.last_outcome ?? "").toLowerCase() === "completed";
|
|
2380
|
+
|
|
2381
|
+
const handoff = await readExecutorHandoffFromRun(synced.run_id, projectRoot);
|
|
2382
|
+
|
|
2383
|
+
if (falselyCompleted && !handoff?.execution_status) {
|
|
2384
|
+
return {
|
|
2385
|
+
...synced,
|
|
2386
|
+
...resolveHarnessRunPostAgentState(null, synced.plan_ready),
|
|
2387
|
+
};
|
|
2388
|
+
}
|
|
2389
|
+
|
|
2390
|
+
const postExecuteProgress =
|
|
2391
|
+
synced.last_completed_step === "review" ||
|
|
2392
|
+
synced.last_completed_step === "adversary" ||
|
|
2393
|
+
synced.last_completed_step === "steer";
|
|
2394
|
+
|
|
2395
|
+
if (
|
|
2396
|
+
handoff?.execution_status &&
|
|
2397
|
+
!postExecuteProgress &&
|
|
2398
|
+
!executeCompletionMatchesHandoff(synced, handoff.execution_status)
|
|
2399
|
+
) {
|
|
2400
|
+
const runPost = resolveHarnessRunPostAgentState(
|
|
2401
|
+
handoff.execution_status,
|
|
2402
|
+
synced.plan_ready,
|
|
2403
|
+
);
|
|
2404
|
+
synced = { ...synced, ...runPost };
|
|
2405
|
+
}
|
|
2406
|
+
|
|
2407
|
+
return synced;
|
|
2408
|
+
}
|
|
2409
|
+
|
|
2410
|
+
export async function blockingHarnessAutoCommandReason(
|
|
2411
|
+
command: string,
|
|
2412
|
+
activeCtx: HarnessRunContext | null,
|
|
2413
|
+
args: string,
|
|
2414
|
+
userPrompt: string,
|
|
2415
|
+
): Promise<string | null> {
|
|
2416
|
+
if (command !== "harness-auto") return null;
|
|
2417
|
+
const task = extractTaskSummaryFromHarnessInput(args, userPrompt);
|
|
2418
|
+
if (!task) {
|
|
2419
|
+
return 'Usage: /harness-auto "<task>" [--quick] [--risk low|med|high]';
|
|
2420
|
+
}
|
|
2421
|
+
if (
|
|
2422
|
+
activeCtx?.status === "active" &&
|
|
2423
|
+
activeCtx.owner_pi_session_id &&
|
|
2424
|
+
activeCtx.task_summary &&
|
|
2425
|
+
harnessAutoTasksDiffer(activeCtx, task)
|
|
2426
|
+
) {
|
|
2427
|
+
return "Active harness run is for a different task. Run /harness-abort or /harness-new-run before /harness-auto with a new task.";
|
|
2428
|
+
}
|
|
2429
|
+
return null;
|
|
2430
|
+
}
|
|
2431
|
+
|
|
2432
|
+
function extractTaskSummaryFromHarnessInput(
|
|
2433
|
+
args: string,
|
|
2434
|
+
prompt?: string,
|
|
2435
|
+
): string | null {
|
|
2436
|
+
const fromArgs = args.match(/"([^"]+)"/);
|
|
2437
|
+
if (fromArgs?.[1]) return fromArgs[1];
|
|
2438
|
+
if (args.trim() && !args.trim().startsWith("--")) {
|
|
2439
|
+
return args.trim().slice(0, 200);
|
|
2440
|
+
}
|
|
2441
|
+
if (prompt) {
|
|
2442
|
+
const quoted = prompt.match(/"([^"]+)"/);
|
|
2443
|
+
if (quoted?.[1]) return quoted[1];
|
|
2444
|
+
}
|
|
2445
|
+
return null;
|
|
2446
|
+
}
|
|
2447
|
+
|
|
2448
|
+
export async function blockingRunCommandReason(
|
|
2449
|
+
command: string,
|
|
2450
|
+
activeCtx: HarnessRunContext,
|
|
2451
|
+
projectRoot: string,
|
|
2452
|
+
entries?: unknown[],
|
|
2453
|
+
): Promise<string | null> {
|
|
2454
|
+
if (command !== "harness-run") return null;
|
|
2455
|
+
if (entries && isHarnessAutoSession(entries)) return null;
|
|
2456
|
+
if (!activeCtx.plan_ready) return "Plan not ready. Run /harness-plan first.";
|
|
2457
|
+
const handoff = await readExecutorHandoffFromRun(
|
|
2458
|
+
activeCtx.run_id,
|
|
2459
|
+
projectRoot,
|
|
2460
|
+
);
|
|
2461
|
+
const executeDone =
|
|
2462
|
+
activeCtx.last_completed_step === "execute" &&
|
|
2463
|
+
String(activeCtx.last_outcome ?? "").toLowerCase() === "completed";
|
|
2464
|
+
if (executeDone || handoff?.execution_status?.toLowerCase() === "completed") {
|
|
2465
|
+
if (handoff?.execution_status === "completed" || executeDone) {
|
|
2466
|
+
return "Execute already completed for this run. Next: /harness-review (same session), or /harness-abort to replan.";
|
|
2467
|
+
}
|
|
2468
|
+
}
|
|
2469
|
+
return null;
|
|
2470
|
+
}
|
|
2471
|
+
|
|
2472
|
+
export async function blockingReviewCommandReason(
|
|
2473
|
+
command: string,
|
|
2474
|
+
activeCtx: HarnessRunContext,
|
|
2475
|
+
projectRoot: string,
|
|
2476
|
+
): Promise<string | null> {
|
|
2477
|
+
if (!["harness-review", "harness-eval", "harness-critic"].includes(command)) {
|
|
2478
|
+
return null;
|
|
2479
|
+
}
|
|
2480
|
+
const handoff = await readExecutorHandoffFromRun(
|
|
2481
|
+
activeCtx.run_id,
|
|
2482
|
+
projectRoot,
|
|
2483
|
+
);
|
|
2484
|
+
const execOutcome = String(activeCtx.last_outcome ?? "").toLowerCase();
|
|
2485
|
+
const executeFinished =
|
|
2486
|
+
activeCtx.last_completed_step === "execute" &&
|
|
2487
|
+
(execOutcome === "completed" ||
|
|
2488
|
+
execOutcome === "scope_drift" ||
|
|
2489
|
+
execOutcome === "blocked");
|
|
2490
|
+
const handoffStarted = Boolean(handoff?.execution_status);
|
|
2491
|
+
if (!executeFinished && !handoffStarted) {
|
|
2492
|
+
return "Execute not finished. Run /harness-run first.";
|
|
2493
|
+
}
|
|
2494
|
+
return null;
|
|
2495
|
+
}
|
|
2496
|
+
|
|
2497
|
+
export async function blockingSteerCommandReason(
|
|
2498
|
+
command: string,
|
|
2499
|
+
activeCtx: HarnessRunContext,
|
|
2500
|
+
projectRoot: string,
|
|
2501
|
+
): Promise<string | null> {
|
|
2502
|
+
if (command !== "harness-steer") return null;
|
|
2503
|
+
|
|
2504
|
+
await ensureReviewOutcomeFromEval(activeCtx.run_id, projectRoot);
|
|
2505
|
+
|
|
2506
|
+
const remediation = await resolveRemediationClassForRun(
|
|
2507
|
+
activeCtx.run_id,
|
|
2508
|
+
projectRoot,
|
|
2509
|
+
);
|
|
2510
|
+
const evalV = await readEvalVerdictFromRun(activeCtx.run_id, projectRoot);
|
|
2511
|
+
|
|
2512
|
+
if (!remediation && !evalV?.status) {
|
|
2513
|
+
return "Run /harness-review first (no eval-verdict or review-outcome on disk).";
|
|
2514
|
+
}
|
|
2515
|
+
if (remediation !== "implementation_gap") {
|
|
2516
|
+
const next =
|
|
2517
|
+
remediation != null
|
|
2518
|
+
? recommendedNextForRemediation(remediation)
|
|
2519
|
+
: "/harness-plan (mode: revise)";
|
|
2520
|
+
return `Steer applies only for implementation_gap (resolved: ${remediation ?? "unknown"}). Next: ${next}`;
|
|
2521
|
+
}
|
|
2522
|
+
|
|
2523
|
+
const briefPath = join(
|
|
2524
|
+
harnessRunsRoot(projectRoot),
|
|
2525
|
+
activeCtx.run_id,
|
|
2526
|
+
"artifacts",
|
|
2527
|
+
"repair-brief.yaml",
|
|
2528
|
+
);
|
|
2529
|
+
try {
|
|
2530
|
+
await readYamlFile(briefPath, "repair-brief");
|
|
2531
|
+
} catch {
|
|
2532
|
+
return "Run /harness-review first (artifacts/repair-brief.yaml missing).";
|
|
2533
|
+
}
|
|
2534
|
+
|
|
2535
|
+
const max = activeCtx.steer_max_attempts ?? steerMaxAttemptsFromEnv();
|
|
2536
|
+
if ((activeCtx.steer_attempt ?? 0) >= max) {
|
|
2537
|
+
return `Steer attempt cap reached (${max}). Use /harness-plan (mode: revise) or /harness-abort.`;
|
|
2538
|
+
}
|
|
2539
|
+
return null;
|
|
2540
|
+
}
|
|
2541
|
+
|
|
1755
2542
|
export async function readExecutorHandoffFromRun(
|
|
1756
2543
|
runId: string,
|
|
1757
2544
|
projectRoot: string,
|