ultimate-pi 0.17.0 → 0.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-context/SKILL.md +13 -6
- package/.agents/skills/harness-debate-plan/SKILL.md +37 -20
- package/.agents/skills/harness-decisions/SKILL.md +1 -1
- package/.agents/skills/harness-eval/SKILL.md +6 -21
- package/.agents/skills/harness-governor/SKILL.md +4 -3
- package/.agents/skills/harness-orchestration/SKILL.md +41 -53
- package/.agents/skills/harness-plan/SKILL.md +23 -12
- package/.agents/skills/harness-review/SKILL.md +52 -0
- package/.agents/skills/harness-sentrux-setup/SKILL.md +16 -3
- package/.agents/skills/harness-steer/SKILL.md +14 -0
- package/.agents/skills/sentrux/SKILL.md +9 -9
- package/.pi/agents/harness/planning/decompose.md +7 -4
- package/.pi/agents/harness/planning/hypothesis-validator.md +2 -0
- package/.pi/agents/harness/planning/hypothesis.md +3 -1
- package/.pi/agents/harness/planning/plan-adversary.md +2 -0
- package/.pi/agents/harness/planning/plan-evaluator.md +2 -0
- package/.pi/agents/harness/planning/plan-synthesizer.md +25 -0
- package/.pi/agents/harness/planning/planning-context.md +48 -0
- package/.pi/agents/harness/planning/review-integrator.md +2 -0
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +2 -0
- package/.pi/agents/harness/{adversary.md → reviewing/adversary.md} +3 -10
- package/.pi/agents/harness/{evaluator.md → reviewing/evaluator.md} +3 -12
- package/.pi/agents/harness/running/executor.md +45 -0
- package/.pi/agents/harness/sentrux-steward.md +51 -0
- package/.pi/extensions/00-harness-project-control.ts +133 -0
- package/.pi/extensions/00-posthog-network-bootstrap.ts +11 -0
- package/.pi/extensions/budget-guard.ts +2 -0
- package/.pi/extensions/debate-orchestrator.ts +2 -0
- package/.pi/extensions/harness-ask-user.ts +2 -2
- package/.pi/extensions/harness-debate-tools.ts +2 -2
- package/.pi/extensions/harness-live-widget.ts +60 -3
- package/.pi/extensions/harness-plan-approval.ts +64 -58
- package/.pi/extensions/harness-run-context.ts +715 -90
- package/.pi/extensions/harness-subagent-submit.ts +46 -12
- package/.pi/extensions/harness-subagents.ts +2 -2
- package/.pi/extensions/harness-telemetry.ts +2 -0
- package/.pi/extensions/harness-web-tools.ts +2 -2
- package/.pi/extensions/lib/extension-load-guard.ts +10 -0
- package/.pi/extensions/lib/harness-artifact-gate.ts +172 -0
- package/.pi/extensions/lib/harness-posthog.ts +9 -5
- package/.pi/extensions/lib/harness-spawn-topology.ts +165 -0
- package/.pi/extensions/lib/harness-subagent-auth.ts +1 -2
- package/.pi/extensions/lib/harness-subagent-policy.ts +28 -24
- package/.pi/extensions/lib/harness-subagent-precheck.ts +36 -10
- package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +66 -2
- package/.pi/extensions/lib/harness-subagent-submit-registry.ts +22 -22
- package/.pi/extensions/lib/harness-subagents-bridge.ts +7 -29
- package/.pi/extensions/lib/harness-subprocess-bootstrap.ts +73 -0
- package/.pi/extensions/lib/plan-approval/create-plan.ts +2 -3
- package/.pi/extensions/lib/plan-approval/resolve-disk.ts +102 -0
- package/.pi/extensions/lib/plan-approval/schema.ts +22 -8
- package/.pi/extensions/lib/plan-approval/types.ts +1 -1
- package/.pi/extensions/lib/plan-approval/validate.ts +2 -2
- package/.pi/extensions/lib/plan-approval-readiness.ts +192 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +12 -5
- package/.pi/extensions/lib/plan-debate-gate.ts +22 -1
- package/.pi/extensions/lib/plan-debate-lanes.ts +32 -2
- package/.pi/extensions/lib/plan-review-gate.ts +8 -0
- package/.pi/extensions/lib/posthog-client.ts +76 -0
- package/.pi/extensions/lib/spawn-policy.ts +3 -3
- package/.pi/extensions/observation-bus.ts +2 -0
- package/.pi/extensions/policy-gate.ts +26 -19
- package/.pi/extensions/review-integrity.ts +91 -10
- package/.pi/extensions/sentrux-rules-sync.ts +2 -0
- package/.pi/extensions/test-diff-integrity.ts +1 -0
- package/.pi/extensions/trace-recorder.ts +2 -0
- package/.pi/harness/agents.manifest.json +37 -37
- package/.pi/harness/corpus/cron.example +8 -0
- package/.pi/harness/corpus/graphify-kb-updater.config.json +214 -0
- package/.pi/harness/corpus/systemd/graphify-kb-updater.env.template +4 -0
- package/.pi/harness/corpus/systemd/graphify-kb-updater.service +17 -0
- package/.pi/harness/corpus/systemd/graphify-kb-updater.timer +11 -0
- package/.pi/harness/docs/adrs/0001-harness-constitution.md +2 -1
- package/.pi/harness/docs/adrs/0006-sentrux-dual-layer.md +8 -6
- package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +6 -1
- package/.pi/harness/docs/adrs/0031-harness-run-context.md +1 -1
- package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +7 -0
- package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +3 -3
- package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +8 -5
- package/.pi/harness/docs/adrs/0039-harness-post-run-review-gate.md +47 -0
- package/.pi/harness/docs/adrs/0040-practice-grounded-orchestration.md +40 -0
- package/.pi/harness/docs/adrs/0041-intelligent-planning-reconnaissance.md +39 -0
- package/.pi/harness/docs/adrs/0042-agent-native-orchestration.md +35 -0
- package/.pi/harness/docs/adrs/0043-path-first-harness-tools.md +38 -0
- package/.pi/harness/docs/adrs/0044-harness-steer-loop.md +37 -0
- package/.pi/harness/docs/adrs/0045-phase-scoped-agent-directories.md +33 -0
- package/.pi/harness/docs/adrs/README.md +11 -0
- package/.pi/harness/docs/graphify-kb-updater-runbook.md +163 -0
- package/.pi/harness/docs/practice-map.md +110 -0
- package/.pi/harness/env.harness.template +5 -3
- package/.pi/harness/evals/smoke/sentrux-stub.json +1 -1
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +5 -2
- package/.pi/harness/specs/README.md +1 -1
- package/.pi/harness/specs/harness-run-context.schema.json +11 -0
- package/.pi/harness/specs/harness-spawn-context.schema.json +15 -1
- package/.pi/harness/specs/plan-execution-plan.schema.json +39 -1
- package/.pi/harness/specs/plan-packet.schema.json +4 -0
- package/.pi/harness/specs/plan-phase-status.schema.json +17 -0
- package/.pi/harness/specs/plan-phase-waiver.schema.json +25 -0
- package/.pi/harness/specs/plan-planning-context.schema.json +50 -0
- package/.pi/harness/specs/repair-brief.schema.json +45 -0
- package/.pi/harness/specs/review-outcome.schema.json +46 -0
- package/.pi/harness/specs/sentrux-manifest-proposal.schema.json +80 -0
- package/.pi/harness/specs/sentrux-signal.schema.json +43 -0
- package/.pi/harness/specs/steer-state.schema.json +20 -0
- package/.pi/lib/harness-context-mode-policy.ts +256 -0
- package/.pi/lib/harness-project-config.ts +91 -0
- package/.pi/lib/harness-repair-brief.ts +145 -0
- package/.pi/lib/harness-run-context.ts +591 -32
- package/.pi/lib/harness-ui-state.ts +114 -21
- package/.pi/prompts/harness-auto.md +10 -10
- package/.pi/prompts/harness-critic.md +3 -30
- package/.pi/prompts/harness-eval.md +4 -37
- package/.pi/prompts/harness-plan.md +116 -54
- package/.pi/prompts/harness-review.md +150 -15
- package/.pi/prompts/harness-run.md +62 -10
- package/.pi/prompts/harness-sentrux-steward.md +55 -0
- package/.pi/prompts/harness-setup.md +5 -4
- package/.pi/prompts/harness-steer.md +30 -0
- package/.pi/scripts/README.md +1 -0
- package/.pi/scripts/graphify-kb-updater.mjs +398 -0
- package/.pi/scripts/harness-agents-manifest.mjs +1 -1
- package/.pi/scripts/harness-project-toggle.mjs +129 -0
- package/.pi/scripts/harness-sentrux-cli.mjs +142 -0
- package/.pi/scripts/harness-verify.mjs +22 -6
- package/.pi/scripts/harness-web-policy-guard.mjs +68 -0
- package/.pi/scripts/validate-plan-dag.mjs +3 -3
- package/AGENTS.md +1 -0
- package/CHANGELOG.md +23 -0
- package/README.md +94 -58
- package/package.json +5 -4
- package/.pi/agents/harness/executor.md +0 -47
- package/.pi/agents/harness/planning/scout-graphify.md +0 -37
- package/.pi/agents/harness/planning/scout-semantic.md +0 -39
- package/.pi/agents/harness/planning/scout-structure.md +0 -35
- package/.pi/prompts/git-sync.md +0 -124
- /package/.pi/agents/harness/{tie-breaker.md → reviewing/tie-breaker.md} +0 -0
|
@@ -5,18 +5,27 @@
|
|
|
5
5
|
* in before_agent_start so trace-recorder reuses it on agent_start.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
import {
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
import {
|
|
9
|
+
mkdir,
|
|
10
|
+
readdir,
|
|
11
|
+
readFile,
|
|
12
|
+
rename,
|
|
13
|
+
stat,
|
|
14
|
+
writeFile,
|
|
15
|
+
} from "node:fs/promises";
|
|
16
|
+
import { basename, dirname, join } from "node:path";
|
|
11
17
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
12
18
|
import { Type } from "@sinclair/typebox";
|
|
13
19
|
import {
|
|
14
20
|
canonicalPlanPath,
|
|
21
|
+
claimRunOwnership,
|
|
15
22
|
createFreshRunContext,
|
|
23
|
+
criticalPathWorkItemIdsFromPlanPacket,
|
|
16
24
|
driftGateActive,
|
|
17
|
-
|
|
25
|
+
evaluateCrossSessionResume,
|
|
18
26
|
extractWritePathFromToolInput,
|
|
19
27
|
formatActivePlanBlock,
|
|
28
|
+
formatCrossSessionResumeMessage,
|
|
20
29
|
formatPlanContextBlock,
|
|
21
30
|
getLatestHarnessTurn,
|
|
22
31
|
getLatestPolicyPhase,
|
|
@@ -40,13 +49,20 @@ import {
|
|
|
40
49
|
nowIso,
|
|
41
50
|
type PlanPacketSummary,
|
|
42
51
|
parseHarnessSlashInput,
|
|
52
|
+
parseHarnessUseRunArgs,
|
|
43
53
|
parsePlanApprovalFromMessage,
|
|
44
54
|
planPacketSummary,
|
|
55
|
+
readExecutorHandoffFromRun,
|
|
45
56
|
readPlanPacketFromPath,
|
|
57
|
+
readReviewOutcomeFromRun,
|
|
46
58
|
resolveArgsForCommand,
|
|
59
|
+
resolveCompletionStatuses,
|
|
47
60
|
saveProjectActiveRun,
|
|
48
61
|
saveRunContextToDisk,
|
|
62
|
+
sessionHasResumePromptForRun,
|
|
63
|
+
shouldAutoClaimHarnessRun,
|
|
49
64
|
shouldReuseHarnessRunId,
|
|
65
|
+
steerMaxAttemptsFromEnv,
|
|
50
66
|
userVisiblePromptSlice,
|
|
51
67
|
validatePlanOverridePath,
|
|
52
68
|
validatePlanPacket,
|
|
@@ -56,11 +72,12 @@ import {
|
|
|
56
72
|
parseStructuredDocument,
|
|
57
73
|
writeYamlFile,
|
|
58
74
|
} from "../lib/harness-yaml.js";
|
|
59
|
-
import {
|
|
75
|
+
import { claimHarnessGovernanceLoad } from "./lib/extension-load-guard.js";
|
|
60
76
|
import {
|
|
61
77
|
evaluateHarnessSubagentToolCall,
|
|
62
78
|
isSubmitToolName,
|
|
63
79
|
} from "./lib/harness-subagent-policy.js";
|
|
80
|
+
import { bootstrapHarnessSubprocessFromEnv } from "./lib/harness-subprocess-bootstrap.js";
|
|
64
81
|
import { isReviewRoundArtifactPath } from "./lib/plan-debate-gate.js";
|
|
65
82
|
import { isReviewRoundYamlWriteAllowed } from "./lib/plan-debate-write-guard.js";
|
|
66
83
|
|
|
@@ -83,6 +100,151 @@ function persistContext(pi: ExtensionAPI, ctx: HarnessRunContext): void {
|
|
|
83
100
|
pi.appendEntry("harness-run-context", ctx);
|
|
84
101
|
void saveRunContextToDisk(ctx);
|
|
85
102
|
void saveProjectActiveRun(ctx);
|
|
103
|
+
pi.events.emit("harness-run-context:updated", { run_id: ctx.run_id });
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const PLAN_REVISION_ARTIFACT_FILES = new Set([
|
|
107
|
+
"planning-context.yaml",
|
|
108
|
+
"decomposition.yaml",
|
|
109
|
+
"hypothesis.yaml",
|
|
110
|
+
"implementation-research.yaml",
|
|
111
|
+
"stack.yaml",
|
|
112
|
+
"execution-plan-draft.yaml",
|
|
113
|
+
"plan-phase-status.yaml",
|
|
114
|
+
"plan-phase-waiver.yaml",
|
|
115
|
+
"sentrux-manifest-proposal.yaml",
|
|
116
|
+
]);
|
|
117
|
+
|
|
118
|
+
const PLAN_REVISION_ARTIFACT_PREFIXES = [
|
|
119
|
+
"hypothesis-validation-r",
|
|
120
|
+
"review-round-r",
|
|
121
|
+
"plan-evaluator-r",
|
|
122
|
+
"plan-adversary-r",
|
|
123
|
+
"sprint-contract-audit-r",
|
|
124
|
+
"adversary-brief-r",
|
|
125
|
+
] as const;
|
|
126
|
+
|
|
127
|
+
async function moveIfExists(from: string, to: string): Promise<boolean> {
|
|
128
|
+
try {
|
|
129
|
+
await stat(from);
|
|
130
|
+
} catch {
|
|
131
|
+
return false;
|
|
132
|
+
}
|
|
133
|
+
await mkdir(dirname(to), { recursive: true });
|
|
134
|
+
await rename(from, to);
|
|
135
|
+
return true;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function isPlanRevisionArtifactFile(name: string): boolean {
|
|
139
|
+
if (PLAN_REVISION_ARTIFACT_FILES.has(name)) return true;
|
|
140
|
+
if (name === "review-round-consolidated.yaml") return true;
|
|
141
|
+
return PLAN_REVISION_ARTIFACT_PREFIXES.some((prefix) =>
|
|
142
|
+
name.startsWith(prefix),
|
|
143
|
+
);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
export async function archivePlanRevisionArtifacts(input: {
|
|
147
|
+
projectRoot: string;
|
|
148
|
+
runId: string;
|
|
149
|
+
reason: string;
|
|
150
|
+
recordedAt?: string;
|
|
151
|
+
}): Promise<{ archiveDir: string; moved: string[] }> {
|
|
152
|
+
const recordedAt = input.recordedAt ?? nowIso();
|
|
153
|
+
const revisionId = recordedAt.replace(/[:.]/g, "-");
|
|
154
|
+
const runDir = join(input.projectRoot, ".pi", "harness", "runs", input.runId);
|
|
155
|
+
const artifactsDir = join(runDir, "artifacts");
|
|
156
|
+
const archiveDir = join(artifactsDir, "revisions", revisionId);
|
|
157
|
+
const moved: string[] = [];
|
|
158
|
+
|
|
159
|
+
async function archiveRel(rel: string): Promise<void> {
|
|
160
|
+
const ok = await moveIfExists(join(runDir, rel), join(archiveDir, rel));
|
|
161
|
+
if (ok) moved.push(rel);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
await archiveRel("plan-packet.yaml");
|
|
165
|
+
await archiveRel("plan-review.md");
|
|
166
|
+
await archiveRel("research-brief.yaml");
|
|
167
|
+
await archiveRel("debate-messenger");
|
|
168
|
+
|
|
169
|
+
try {
|
|
170
|
+
const names = await readdir(artifactsDir);
|
|
171
|
+
for (const name of names) {
|
|
172
|
+
if (!isPlanRevisionArtifactFile(name)) continue;
|
|
173
|
+
await archiveRel(join("artifacts", name));
|
|
174
|
+
}
|
|
175
|
+
} catch {
|
|
176
|
+
// No artifacts yet.
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
const debateRel = join(
|
|
180
|
+
".pi",
|
|
181
|
+
"harness",
|
|
182
|
+
"debates",
|
|
183
|
+
`plan-${input.runId}.jsonl`,
|
|
184
|
+
);
|
|
185
|
+
const debateArchived = await moveIfExists(
|
|
186
|
+
join(input.projectRoot, debateRel),
|
|
187
|
+
join(archiveDir, "debates", basename(debateRel)),
|
|
188
|
+
);
|
|
189
|
+
if (debateArchived) moved.push(debateRel);
|
|
190
|
+
|
|
191
|
+
if (moved.length > 0) {
|
|
192
|
+
await mkdir(archiveDir, { recursive: true });
|
|
193
|
+
await writeFile(
|
|
194
|
+
join(archiveDir, "revision-reset.json"),
|
|
195
|
+
`${JSON.stringify(
|
|
196
|
+
{
|
|
197
|
+
schema_version: "1.0.0",
|
|
198
|
+
run_id: input.runId,
|
|
199
|
+
reason: input.reason,
|
|
200
|
+
recorded_at: recordedAt,
|
|
201
|
+
moved,
|
|
202
|
+
},
|
|
203
|
+
null,
|
|
204
|
+
2,
|
|
205
|
+
)}\n`,
|
|
206
|
+
"utf-8",
|
|
207
|
+
);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
return { archiveDir, moved };
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
function shouldArchiveForPlanRevise(input: {
|
|
214
|
+
command: string;
|
|
215
|
+
mode: "create" | "revise" | null;
|
|
216
|
+
runCtx: HarnessRunContext;
|
|
217
|
+
reviewOutcome: Awaited<ReturnType<typeof readReviewOutcomeFromRun>>;
|
|
218
|
+
userPrompt: string;
|
|
219
|
+
}): boolean {
|
|
220
|
+
if (input.command !== "harness-plan" && input.command !== "harness-auto") {
|
|
221
|
+
return false;
|
|
222
|
+
}
|
|
223
|
+
if (input.mode !== "revise") return false;
|
|
224
|
+
const next = (input.runCtx.next_recommended_command ?? "").toLowerCase();
|
|
225
|
+
const prompt = input.userPrompt.toLowerCase();
|
|
226
|
+
return (
|
|
227
|
+
input.reviewOutcome?.remediation_class === "plan_gap" ||
|
|
228
|
+
next.includes("/harness-plan") ||
|
|
229
|
+
next.includes("revise") ||
|
|
230
|
+
prompt.includes("--mode revise") ||
|
|
231
|
+
prompt.includes("--mode=revise") ||
|
|
232
|
+
prompt.includes("mode: revise")
|
|
233
|
+
);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
function syncPolicyFromRunContext(
|
|
237
|
+
pi: ExtensionAPI,
|
|
238
|
+
entries: unknown[],
|
|
239
|
+
runCtx: HarnessRunContext,
|
|
240
|
+
): void {
|
|
241
|
+
syncPolicyFromPlan(
|
|
242
|
+
pi,
|
|
243
|
+
entries,
|
|
244
|
+
runCtx.plan_id ?? "plan-unknown",
|
|
245
|
+
runCtx.phase,
|
|
246
|
+
runCtx.plan_ready,
|
|
247
|
+
);
|
|
86
248
|
}
|
|
87
249
|
|
|
88
250
|
function extractTaskSummary(args: string, prompt?: string): string | null {
|
|
@@ -164,6 +326,10 @@ function syncPolicyFromPlan(
|
|
|
164
326
|
});
|
|
165
327
|
}
|
|
166
328
|
|
|
329
|
+
function hydrateFromSession(entries: unknown[]): HarnessRunContext | null {
|
|
330
|
+
return getLatestRunContext(entries);
|
|
331
|
+
}
|
|
332
|
+
|
|
167
333
|
async function hydrateFromDisk(
|
|
168
334
|
sessionId: string,
|
|
169
335
|
projectRoot: string,
|
|
@@ -201,15 +367,54 @@ function needsClarificationFollowUp(ctx: HarnessRunContext | null): boolean {
|
|
|
201
367
|
return ctx?.status === "active" && ctx.last_outcome === "needs_clarification";
|
|
202
368
|
}
|
|
203
369
|
|
|
370
|
+
async function offerCrossSessionResume(
|
|
371
|
+
pi: ExtensionAPI,
|
|
372
|
+
ctx: {
|
|
373
|
+
hasUI: boolean;
|
|
374
|
+
sessionManager: { getEntries(): unknown[] };
|
|
375
|
+
ui: {
|
|
376
|
+
notify(message: string, type?: "info" | "warning" | "error"): void;
|
|
377
|
+
};
|
|
378
|
+
},
|
|
379
|
+
): Promise<void> {
|
|
380
|
+
const projectRoot = process.cwd();
|
|
381
|
+
const entries = getEntries(ctx);
|
|
382
|
+
const info = await evaluateCrossSessionResume(projectRoot, entries);
|
|
383
|
+
if (!info || sessionHasResumePromptForRun(entries, info.runId)) return;
|
|
384
|
+
|
|
385
|
+
const content = formatCrossSessionResumeMessage(info);
|
|
386
|
+
pi.appendEntry("harness-session-resume-prompt", {
|
|
387
|
+
run_id: info.runId,
|
|
388
|
+
resume_command: info.resumeCommand,
|
|
389
|
+
shown_at: nowIso(),
|
|
390
|
+
});
|
|
391
|
+
pi.sendMessage({
|
|
392
|
+
customType: "harness-session-resume-prompt",
|
|
393
|
+
content,
|
|
394
|
+
display: true,
|
|
395
|
+
});
|
|
396
|
+
if (ctx.hasUI) {
|
|
397
|
+
ctx.ui.notify(
|
|
398
|
+
`Harness run on disk. Resume with ${info.resumeCommand}`,
|
|
399
|
+
"info",
|
|
400
|
+
);
|
|
401
|
+
}
|
|
402
|
+
pi.events.emit("harness-cross-session-resume", {
|
|
403
|
+
run_id: info.runId,
|
|
404
|
+
resume_command: info.resumeCommand,
|
|
405
|
+
});
|
|
406
|
+
}
|
|
407
|
+
|
|
204
408
|
export default function harnessRunContext(pi: ExtensionAPI) {
|
|
205
|
-
if (!
|
|
409
|
+
if (!claimHarnessGovernanceLoad("harness-run-context", MODULE_URL)) return;
|
|
206
410
|
let activeCtx: HarnessRunContext | null = null;
|
|
207
411
|
|
|
208
412
|
pi.on("session_start", async (_event, ctx) => {
|
|
209
|
-
const sessionId = ctx.sessionManager.getSessionId();
|
|
210
|
-
const projectRoot = process.cwd();
|
|
211
413
|
const entries = getEntries(ctx);
|
|
212
|
-
activeCtx =
|
|
414
|
+
activeCtx = hydrateFromSession(entries);
|
|
415
|
+
const booted = await bootstrapHarnessSubprocessFromEnv(pi, ctx);
|
|
416
|
+
if (booted) activeCtx = booted;
|
|
417
|
+
if (!booted) await offerCrossSessionResume(pi, ctx);
|
|
213
418
|
});
|
|
214
419
|
|
|
215
420
|
pi.on("input", async (event) => {
|
|
@@ -338,36 +543,57 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
338
543
|
}
|
|
339
544
|
|
|
340
545
|
if (command === "harness-use-run") {
|
|
341
|
-
const
|
|
342
|
-
if (!runId) {
|
|
546
|
+
const parsed = parseHarnessUseRunArgs(args);
|
|
547
|
+
if (!parsed.runId) {
|
|
343
548
|
return {
|
|
344
549
|
message: {
|
|
345
550
|
customType: "harness-run-context-block",
|
|
346
551
|
display: true,
|
|
347
|
-
content: "Usage: /harness-use-run <run-id>",
|
|
552
|
+
content: "Usage: /harness-use-run <run-id> [--claim] [--readonly]",
|
|
348
553
|
},
|
|
349
554
|
};
|
|
350
555
|
}
|
|
351
|
-
const disk = await loadRunContextFromDisk(runId, projectRoot);
|
|
556
|
+
const disk = await loadRunContextFromDisk(parsed.runId, projectRoot);
|
|
352
557
|
if (!disk) {
|
|
353
558
|
return {
|
|
354
559
|
message: {
|
|
355
560
|
customType: "harness-run-context-block",
|
|
356
561
|
display: true,
|
|
357
|
-
content: `No run directory for ${runId}. Check .pi/harness/runs/.`,
|
|
562
|
+
content: `No run directory for ${parsed.runId}. Check .pi/harness/runs/.`,
|
|
358
563
|
},
|
|
359
564
|
};
|
|
360
565
|
}
|
|
361
566
|
activeCtx = {
|
|
362
567
|
...disk,
|
|
363
568
|
pi_session_id: sessionId,
|
|
364
|
-
turn_override_run_id: runId,
|
|
569
|
+
turn_override_run_id: parsed.runId,
|
|
365
570
|
};
|
|
366
|
-
if (
|
|
571
|
+
if (parsed.claim) {
|
|
572
|
+
activeCtx = claimRunOwnership(activeCtx, sessionId);
|
|
573
|
+
}
|
|
574
|
+
const statuses = await resolveCompletionStatuses(
|
|
575
|
+
getEntries(ctx),
|
|
576
|
+
activeCtx.run_id,
|
|
577
|
+
projectRoot,
|
|
578
|
+
);
|
|
579
|
+
if (activeCtx.owner_pi_session_id !== sessionId && !parsed.claim) {
|
|
367
580
|
activeCtx.next_recommended_command =
|
|
368
|
-
"Read-only:
|
|
581
|
+
"Read-only: use /harness-use-run <run-id> --claim to take ownership, or /harness-new-run.";
|
|
582
|
+
} else {
|
|
583
|
+
activeCtx.next_recommended_command = nextStepAfterOutcome({
|
|
584
|
+
phase: activeCtx.phase,
|
|
585
|
+
planStatus: activeCtx.plan_ready ? "ready" : null,
|
|
586
|
+
lastCompletedStep: activeCtx.last_completed_step,
|
|
587
|
+
lastOutcome: activeCtx.last_outcome,
|
|
588
|
+
executionStatus: statuses.executionStatus,
|
|
589
|
+
evalStatus: statuses.evalStatus,
|
|
590
|
+
adversaryComplete: statuses.adversaryComplete,
|
|
591
|
+
aborted: activeCtx.status === "aborted",
|
|
592
|
+
});
|
|
369
593
|
}
|
|
594
|
+
activeCtx.updated_at = nowIso();
|
|
370
595
|
persistContext(pi, activeCtx);
|
|
596
|
+
syncPolicyFromRunContext(pi, getEntries(ctx), activeCtx);
|
|
371
597
|
return {
|
|
372
598
|
systemPrompt: `${event.systemPrompt}\n\n${formatPlanContextBlock(activeCtx)}`,
|
|
373
599
|
};
|
|
@@ -445,6 +671,7 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
445
671
|
const crossSessionCmd = new Set([
|
|
446
672
|
"harness-eval",
|
|
447
673
|
"harness-review",
|
|
674
|
+
"harness-steer",
|
|
448
675
|
"harness-critic",
|
|
449
676
|
"harness-trace",
|
|
450
677
|
"harness-incident",
|
|
@@ -484,6 +711,13 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
484
711
|
activeCtx.updated_at = new Date().toISOString();
|
|
485
712
|
activeCtx.pi_session_id = sessionId;
|
|
486
713
|
|
|
714
|
+
if (
|
|
715
|
+
shouldAutoClaimHarnessRun(command, args) &&
|
|
716
|
+
activeCtx.owner_pi_session_id !== sessionId
|
|
717
|
+
) {
|
|
718
|
+
activeCtx = claimRunOwnership(activeCtx, sessionId);
|
|
719
|
+
}
|
|
720
|
+
|
|
487
721
|
if (resolved.planPath && resolved.runId) {
|
|
488
722
|
const check = validatePlanOverridePath(
|
|
489
723
|
resolved.planPath,
|
|
@@ -518,37 +752,71 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
518
752
|
activeCtx.last_completed_step === "execute" &&
|
|
519
753
|
activeCtx.last_outcome === "completed"
|
|
520
754
|
) {
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
755
|
+
return {
|
|
756
|
+
message: {
|
|
757
|
+
customType: "harness-run-context-block",
|
|
758
|
+
display: true,
|
|
759
|
+
content:
|
|
760
|
+
"Execute already completed for this run. Next: /harness-review (same session), or /harness-abort to replan.",
|
|
761
|
+
},
|
|
762
|
+
};
|
|
524
763
|
}
|
|
525
764
|
|
|
526
765
|
let planSummary: PlanPacketSummary | null = null;
|
|
766
|
+
let planPacketForSpawn: Awaited<ReturnType<typeof readPlanPacketFromPath>> =
|
|
767
|
+
null;
|
|
527
768
|
if (activeCtx.plan_packet_path) {
|
|
528
|
-
|
|
529
|
-
|
|
769
|
+
planPacketForSpawn = await readPlanPacketFromPath(
|
|
770
|
+
activeCtx.plan_packet_path,
|
|
771
|
+
);
|
|
772
|
+
if (planPacketForSpawn) {
|
|
530
773
|
planSummary = planPacketSummary(
|
|
531
|
-
|
|
774
|
+
planPacketForSpawn,
|
|
532
775
|
activeCtx.plan_packet_path,
|
|
533
776
|
activeCtx.plan_ready ? "ready" : "draft",
|
|
534
777
|
);
|
|
535
|
-
activeCtx.plan_id =
|
|
778
|
+
activeCtx.plan_id = planPacketForSpawn.plan_id ?? activeCtx.plan_id;
|
|
536
779
|
}
|
|
537
780
|
}
|
|
538
781
|
|
|
782
|
+
let contextSpawnOpts:
|
|
783
|
+
| Parameters<typeof formatPlanContextBlock>[1]
|
|
784
|
+
| undefined;
|
|
785
|
+
if (command === "harness-run" && planPacketForSpawn) {
|
|
786
|
+
const criticalIds =
|
|
787
|
+
criticalPathWorkItemIdsFromPlanPacket(planPacketForSpawn);
|
|
788
|
+
contextSpawnOpts = {
|
|
789
|
+
mode: "execute",
|
|
790
|
+
critical_path_work_item_ids: criticalIds,
|
|
791
|
+
};
|
|
792
|
+
}
|
|
793
|
+
|
|
539
794
|
let activePlanBlock = "";
|
|
795
|
+
let planMode: "create" | "revise" | null = null;
|
|
540
796
|
if (command === "harness-plan" || command === "harness-auto") {
|
|
541
|
-
|
|
542
|
-
activeCtx.
|
|
797
|
+
planMode =
|
|
798
|
+
activeCtx.plan_id ||
|
|
799
|
+
activeCtx.plan_packet_path ||
|
|
800
|
+
activeCtx.status === "aborted"
|
|
543
801
|
? "revise"
|
|
544
802
|
: "create";
|
|
545
|
-
activePlanBlock = formatActivePlanBlock(activeCtx,
|
|
803
|
+
activePlanBlock = formatActivePlanBlock(activeCtx, planMode, planSummary);
|
|
546
804
|
} else if (command === "harness-run") {
|
|
547
805
|
activePlanBlock = formatActivePlanBlock(
|
|
548
806
|
activeCtx,
|
|
549
807
|
"execute",
|
|
550
808
|
planSummary,
|
|
551
809
|
);
|
|
810
|
+
} else if (command === "harness-steer") {
|
|
811
|
+
activePlanBlock = formatActivePlanBlock(
|
|
812
|
+
activeCtx,
|
|
813
|
+
"execute",
|
|
814
|
+
planSummary,
|
|
815
|
+
);
|
|
816
|
+
contextSpawnOpts = {
|
|
817
|
+
mode: "repair",
|
|
818
|
+
repair_brief_path: "artifacts/repair-brief.yaml",
|
|
819
|
+
};
|
|
552
820
|
} else if (
|
|
553
821
|
command === "harness-eval" ||
|
|
554
822
|
command === "harness-review" ||
|
|
@@ -557,14 +825,46 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
557
825
|
activePlanBlock = formatActivePlanBlock(activeCtx, "read", planSummary);
|
|
558
826
|
}
|
|
559
827
|
|
|
828
|
+
if (command === "harness-plan" || command === "harness-auto") {
|
|
829
|
+
const reviewOutcome = await readReviewOutcomeFromRun(
|
|
830
|
+
activeCtx.run_id,
|
|
831
|
+
projectRoot,
|
|
832
|
+
);
|
|
833
|
+
if (
|
|
834
|
+
shouldArchiveForPlanRevise({
|
|
835
|
+
command,
|
|
836
|
+
mode: planMode,
|
|
837
|
+
runCtx: activeCtx,
|
|
838
|
+
reviewOutcome,
|
|
839
|
+
userPrompt,
|
|
840
|
+
})
|
|
841
|
+
) {
|
|
842
|
+
const reset = await archivePlanRevisionArtifacts({
|
|
843
|
+
projectRoot,
|
|
844
|
+
runId: activeCtx.run_id,
|
|
845
|
+
reason: "review_plan_gap_revise",
|
|
846
|
+
});
|
|
847
|
+
if (reset.moved.length > 0) {
|
|
848
|
+
pi.appendEntry("harness-plan-revision-reset", {
|
|
849
|
+
run_id: activeCtx.run_id,
|
|
850
|
+
archive_dir: reset.archiveDir,
|
|
851
|
+
moved: reset.moved,
|
|
852
|
+
reason: "review_plan_gap_revise",
|
|
853
|
+
recorded_at: nowIso(),
|
|
854
|
+
});
|
|
855
|
+
}
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
|
|
560
859
|
persistContext(pi, activeCtx);
|
|
561
860
|
|
|
562
861
|
return {
|
|
563
|
-
systemPrompt: `${event.systemPrompt}\n\n${formatPlanContextBlock(activeCtx)}${activePlanBlock ? `\n\n${activePlanBlock}` : ""}`,
|
|
862
|
+
systemPrompt: `${event.systemPrompt}\n\n${formatPlanContextBlock(activeCtx, contextSpawnOpts)}${activePlanBlock ? `\n\n${activePlanBlock}` : ""}`,
|
|
564
863
|
};
|
|
565
864
|
});
|
|
566
865
|
|
|
567
866
|
pi.on("agent_end", async (_event, ctx) => {
|
|
867
|
+
const projectRoot = process.cwd();
|
|
568
868
|
const entries = getEntries(ctx);
|
|
569
869
|
if (!activeCtx) {
|
|
570
870
|
activeCtx = getLatestRunContext(entries);
|
|
@@ -591,9 +891,6 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
591
891
|
: parseHarnessSlashInput(userVisiblePromptSlice(lastPrompt));
|
|
592
892
|
if (!parsed && !needsClarificationFollowUp(activeCtx)) return;
|
|
593
893
|
|
|
594
|
-
const policyPhase = getLatestPolicyPhase(entries) ?? activeCtx.phase;
|
|
595
|
-
activeCtx.phase = policyPhase;
|
|
596
|
-
|
|
597
894
|
if (parsed?.command === "harness-abort") {
|
|
598
895
|
activeCtx.status = "aborted";
|
|
599
896
|
activeCtx.plan_ready = false;
|
|
@@ -654,27 +951,82 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
654
951
|
|
|
655
952
|
activeCtx.plan_ready = planReady;
|
|
656
953
|
|
|
657
|
-
const statuses =
|
|
954
|
+
const statuses = await resolveCompletionStatuses(
|
|
955
|
+
entries,
|
|
956
|
+
activeCtx.run_id,
|
|
957
|
+
projectRoot,
|
|
958
|
+
);
|
|
658
959
|
if (parsed?.command === "harness-run") {
|
|
659
960
|
activeCtx.last_completed_step = "execute";
|
|
660
|
-
|
|
661
|
-
|
|
961
|
+
let execStatus = statuses.executionStatus;
|
|
962
|
+
if (!execStatus) {
|
|
963
|
+
const handoff = await readExecutorHandoffFromRun(
|
|
964
|
+
activeCtx.run_id,
|
|
965
|
+
projectRoot,
|
|
966
|
+
);
|
|
967
|
+
execStatus = handoff?.execution_status ?? null;
|
|
968
|
+
}
|
|
969
|
+
activeCtx.last_outcome = execStatus ?? "completed";
|
|
970
|
+
activeCtx.phase = "evaluate";
|
|
662
971
|
}
|
|
663
|
-
if (parsed?.command === "harness-
|
|
664
|
-
activeCtx.last_completed_step = "
|
|
665
|
-
activeCtx.
|
|
972
|
+
if (parsed?.command === "harness-steer") {
|
|
973
|
+
activeCtx.last_completed_step = "steer";
|
|
974
|
+
activeCtx.steer_attempt = (activeCtx.steer_attempt ?? 0) + 1;
|
|
975
|
+
activeCtx.steer_max_attempts =
|
|
976
|
+
activeCtx.steer_max_attempts ?? steerMaxAttemptsFromEnv();
|
|
977
|
+
activeCtx.phase = "execute";
|
|
978
|
+
syncPolicyFromRunContext(pi, getEntries(ctx), activeCtx);
|
|
979
|
+
}
|
|
980
|
+
if (
|
|
981
|
+
parsed?.command === "harness-eval" ||
|
|
982
|
+
parsed?.command === "harness-review" ||
|
|
983
|
+
parsed?.command === "harness-critic"
|
|
984
|
+
) {
|
|
985
|
+
activeCtx.last_completed_step =
|
|
986
|
+
parsed.command === "harness-critic" ? "adversary" : "review";
|
|
987
|
+
if (statuses.evalStatus) {
|
|
988
|
+
activeCtx.last_outcome = statuses.evalStatus;
|
|
989
|
+
}
|
|
990
|
+
if (statuses.adversaryComplete) {
|
|
991
|
+
activeCtx.phase = "adversary";
|
|
992
|
+
activeCtx.last_completed_step = "adversary";
|
|
993
|
+
} else if (statuses.evalStatus) {
|
|
994
|
+
activeCtx.phase = "evaluate";
|
|
995
|
+
}
|
|
666
996
|
}
|
|
667
997
|
|
|
998
|
+
const reviewOutcome = await readReviewOutcomeFromRun(
|
|
999
|
+
activeCtx.run_id,
|
|
1000
|
+
projectRoot,
|
|
1001
|
+
);
|
|
1002
|
+
const reviewComplete =
|
|
1003
|
+
activeCtx.last_completed_step === "review" ||
|
|
1004
|
+
activeCtx.last_completed_step === "adversary";
|
|
668
1005
|
const next = nextStepAfterOutcome({
|
|
669
1006
|
phase: activeCtx.phase,
|
|
670
|
-
planStatus: statuses.planStatus
|
|
1007
|
+
planStatus: statuses.planStatus,
|
|
1008
|
+
lastCompletedStep: activeCtx.last_completed_step,
|
|
1009
|
+
lastOutcome: activeCtx.last_outcome,
|
|
671
1010
|
executionStatus: statuses.executionStatus,
|
|
672
1011
|
evalStatus: statuses.evalStatus,
|
|
1012
|
+
adversaryComplete: statuses.adversaryComplete,
|
|
673
1013
|
aborted: activeCtx.status === "aborted",
|
|
1014
|
+
remediationClass: reviewOutcome?.remediation_class ?? null,
|
|
1015
|
+
steerAttempt: activeCtx.steer_attempt ?? 0,
|
|
1016
|
+
steerMaxAttempts:
|
|
1017
|
+
activeCtx.steer_max_attempts ?? steerMaxAttemptsFromEnv(),
|
|
1018
|
+
reviewComplete,
|
|
674
1019
|
});
|
|
675
1020
|
activeCtx.next_recommended_command = next;
|
|
676
1021
|
activeCtx.updated_at = new Date().toISOString();
|
|
677
1022
|
|
|
1023
|
+
if (
|
|
1024
|
+
parsed?.command === "harness-run" &&
|
|
1025
|
+
activeCtx.last_outcome === "completed"
|
|
1026
|
+
) {
|
|
1027
|
+
syncPolicyFromRunContext(pi, getEntries(ctx), activeCtx);
|
|
1028
|
+
}
|
|
1029
|
+
|
|
678
1030
|
persistContext(pi, activeCtx);
|
|
679
1031
|
|
|
680
1032
|
pi.appendEntry("harness-step-handoff", {
|
|
@@ -719,26 +1071,6 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
719
1071
|
});
|
|
720
1072
|
|
|
721
1073
|
pi.on("tool_call", async (event, ctx) => {
|
|
722
|
-
// #region agent log
|
|
723
|
-
fetch("http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0", {
|
|
724
|
-
method: "POST",
|
|
725
|
-
headers: {
|
|
726
|
-
"Content-Type": "application/json",
|
|
727
|
-
"X-Debug-Session-Id": "2ca12b",
|
|
728
|
-
},
|
|
729
|
-
body: JSON.stringify({
|
|
730
|
-
sessionId: "2ca12b",
|
|
731
|
-
location: "harness-run-context.ts:tool_call",
|
|
732
|
-
message: "submit policy hook",
|
|
733
|
-
data: {
|
|
734
|
-
toolName: event.toolName,
|
|
735
|
-
typeofIsSubmitToolName: typeof isSubmitToolName,
|
|
736
|
-
},
|
|
737
|
-
timestamp: Date.now(),
|
|
738
|
-
hypothesisId: "H1",
|
|
739
|
-
}),
|
|
740
|
-
}).catch(() => {});
|
|
741
|
-
// #endregion
|
|
742
1074
|
if (isSubmitToolName(event.toolName)) {
|
|
743
1075
|
const decision = evaluateHarnessSubagentToolCall(
|
|
744
1076
|
event.toolName,
|
|
@@ -997,6 +1329,19 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
997
1329
|
}
|
|
998
1330
|
const pathArg = String((params as { path?: string }).path ?? "").trim();
|
|
999
1331
|
const content = String((params as { content?: string }).content ?? "");
|
|
1332
|
+
const HARNESS_YAML_INLINE_MAX = 32 * 1024;
|
|
1333
|
+
if (content.length > HARNESS_YAML_INLINE_MAX) {
|
|
1334
|
+
return {
|
|
1335
|
+
content: [
|
|
1336
|
+
{
|
|
1337
|
+
type: "text",
|
|
1338
|
+
text: `Content exceeds ${HARNESS_YAML_INLINE_MAX} bytes. Subagent must submit_* to disk, then use merge_harness_yaml with source_path or a small patch.`,
|
|
1339
|
+
},
|
|
1340
|
+
],
|
|
1341
|
+
details: { path: pathArg, bytes: content.length },
|
|
1342
|
+
isError: true,
|
|
1343
|
+
};
|
|
1344
|
+
}
|
|
1000
1345
|
if (!pathArg || !content.trim()) {
|
|
1001
1346
|
return {
|
|
1002
1347
|
content: [
|
|
@@ -1025,6 +1370,22 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
1025
1370
|
};
|
|
1026
1371
|
}
|
|
1027
1372
|
const relForGate = pathArg.replace(/\\/g, "/");
|
|
1373
|
+
const subagentOnly = new Set([
|
|
1374
|
+
"artifacts/eval-verdict.yaml",
|
|
1375
|
+
"artifacts/adversary-report.yaml",
|
|
1376
|
+
]);
|
|
1377
|
+
if (subagentOnly.has(relForGate)) {
|
|
1378
|
+
return {
|
|
1379
|
+
content: [
|
|
1380
|
+
{
|
|
1381
|
+
type: "text",
|
|
1382
|
+
text: `Path not allowed: ${pathArg}. Post-run verdicts must be written via submit_* in harness/reviewing/evaluator or harness/reviewing/adversary subagents; parent gates with harness_artifact_ready only.`,
|
|
1383
|
+
},
|
|
1384
|
+
],
|
|
1385
|
+
details: { path: pathArg },
|
|
1386
|
+
isError: true,
|
|
1387
|
+
};
|
|
1388
|
+
}
|
|
1028
1389
|
if (/\.json$/i.test(relForGate) && relForGate.startsWith("artifacts/")) {
|
|
1029
1390
|
return {
|
|
1030
1391
|
content: [
|
|
@@ -1078,18 +1439,34 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
1078
1439
|
});
|
|
1079
1440
|
|
|
1080
1441
|
pi.registerTool({
|
|
1081
|
-
name: "
|
|
1082
|
-
label: "Harness
|
|
1442
|
+
name: "merge_harness_yaml",
|
|
1443
|
+
label: "Merge Harness YAML",
|
|
1083
1444
|
description:
|
|
1084
|
-
"
|
|
1445
|
+
"Shallow-merge a patch or another run artifact into an existing harness YAML file (path-first).",
|
|
1446
|
+
promptSnippet:
|
|
1447
|
+
"Merge artifact paths without pasting large bodies into tool args.",
|
|
1448
|
+
promptGuidelines: [
|
|
1449
|
+
"Prefer source_path pointing at artifacts/*.yaml from subagent submit_*.",
|
|
1450
|
+
"Use patch for small top-level keys only.",
|
|
1451
|
+
],
|
|
1085
1452
|
parameters: Type.Object({
|
|
1086
|
-
|
|
1087
|
-
minItems: 1,
|
|
1453
|
+
path: Type.String({
|
|
1088
1454
|
description:
|
|
1089
|
-
"
|
|
1455
|
+
"Target path under the active run, e.g. research-brief.yaml",
|
|
1090
1456
|
}),
|
|
1457
|
+
patch: Type.Optional(
|
|
1458
|
+
Type.String({
|
|
1459
|
+
description: "Small YAML/JSON object merged into the target",
|
|
1460
|
+
}),
|
|
1461
|
+
),
|
|
1462
|
+
source_path: Type.Optional(
|
|
1463
|
+
Type.String({
|
|
1464
|
+
description:
|
|
1465
|
+
"Relative path under the run to merge into target (e.g. artifacts/implementation-research.yaml)",
|
|
1466
|
+
}),
|
|
1467
|
+
),
|
|
1091
1468
|
}),
|
|
1092
|
-
async execute(
|
|
1469
|
+
async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
|
|
1093
1470
|
const entries = getEntries(ctx);
|
|
1094
1471
|
const runCtx = getLatestRunContext(entries) ?? activeCtx;
|
|
1095
1472
|
if (!runCtx?.run_id) {
|
|
@@ -1099,8 +1476,38 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
1099
1476
|
isError: true,
|
|
1100
1477
|
};
|
|
1101
1478
|
}
|
|
1102
|
-
const
|
|
1479
|
+
const pathArg = String((params as { path?: string }).path ?? "").trim();
|
|
1480
|
+
const patchRaw = String((params as { patch?: string }).patch ?? "");
|
|
1481
|
+
const sourcePath = String(
|
|
1482
|
+
(params as { source_path?: string }).source_path ?? "",
|
|
1483
|
+
).trim();
|
|
1484
|
+
if (!pathArg || (!patchRaw.trim() && !sourcePath)) {
|
|
1485
|
+
return {
|
|
1486
|
+
content: [
|
|
1487
|
+
{
|
|
1488
|
+
type: "text",
|
|
1489
|
+
text: "merge_harness_yaml requires path and patch or source_path.",
|
|
1490
|
+
},
|
|
1491
|
+
],
|
|
1492
|
+
details: {},
|
|
1493
|
+
isError: true,
|
|
1494
|
+
};
|
|
1495
|
+
}
|
|
1103
1496
|
const projectRoot = process.cwd();
|
|
1497
|
+
const absPath = normalizeHarnessPath(pathArg, projectRoot);
|
|
1498
|
+
const scoped = await isPlanPhaseScopedWrite(absPath, runCtx, projectRoot);
|
|
1499
|
+
if (!scoped) {
|
|
1500
|
+
return {
|
|
1501
|
+
content: [
|
|
1502
|
+
{
|
|
1503
|
+
type: "text",
|
|
1504
|
+
text: `Path not allowed: ${pathArg}.`,
|
|
1505
|
+
},
|
|
1506
|
+
],
|
|
1507
|
+
details: { path: pathArg },
|
|
1508
|
+
isError: true,
|
|
1509
|
+
};
|
|
1510
|
+
}
|
|
1104
1511
|
const runRoot = join(
|
|
1105
1512
|
projectRoot,
|
|
1106
1513
|
".pi",
|
|
@@ -1108,59 +1515,277 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
1108
1515
|
"runs",
|
|
1109
1516
|
runCtx.run_id,
|
|
1110
1517
|
);
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
const
|
|
1115
|
-
|
|
1518
|
+
let existing: Record<string, unknown> = {};
|
|
1519
|
+
try {
|
|
1520
|
+
const { readYamlFile } = await import("../lib/harness-yaml.js");
|
|
1521
|
+
const cur = await readYamlFile(absPath, pathArg);
|
|
1522
|
+
if (cur && typeof cur === "object" && !Array.isArray(cur)) {
|
|
1523
|
+
existing = cur as Record<string, unknown>;
|
|
1524
|
+
}
|
|
1525
|
+
} catch {
|
|
1526
|
+
existing = {};
|
|
1527
|
+
}
|
|
1528
|
+
let patchDoc: Record<string, unknown>;
|
|
1529
|
+
if (sourcePath) {
|
|
1530
|
+
const srcRel = sourcePath.replace(/\\/g, "/").replace(/^\.\//, "");
|
|
1531
|
+
const srcAbs = srcRel.startsWith(".pi/")
|
|
1532
|
+
? normalizeHarnessPath(srcRel, projectRoot)
|
|
1533
|
+
: join(runRoot, srcRel);
|
|
1116
1534
|
try {
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1535
|
+
patchDoc = parseStructuredDocument(
|
|
1536
|
+
await readFile(srcAbs, "utf-8"),
|
|
1537
|
+
sourcePath,
|
|
1538
|
+
) as Record<string, unknown>;
|
|
1539
|
+
} catch (err) {
|
|
1540
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
1541
|
+
return {
|
|
1542
|
+
content: [{ type: "text", text: msg }],
|
|
1543
|
+
details: { source_path: sourcePath },
|
|
1544
|
+
isError: true,
|
|
1545
|
+
};
|
|
1121
1546
|
}
|
|
1547
|
+
} else {
|
|
1548
|
+
try {
|
|
1549
|
+
patchDoc = parseStructuredDocument(patchRaw, pathArg) as Record<
|
|
1550
|
+
string,
|
|
1551
|
+
unknown
|
|
1552
|
+
>;
|
|
1553
|
+
} catch (err) {
|
|
1554
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
1555
|
+
return {
|
|
1556
|
+
content: [{ type: "text", text: msg }],
|
|
1557
|
+
details: { path: pathArg },
|
|
1558
|
+
isError: true,
|
|
1559
|
+
};
|
|
1560
|
+
}
|
|
1561
|
+
}
|
|
1562
|
+
const merged = { ...existing, ...patchDoc };
|
|
1563
|
+
await mkdir(dirname(absPath), { recursive: true });
|
|
1564
|
+
await writeYamlFile(absPath, merged);
|
|
1565
|
+
return {
|
|
1566
|
+
content: [
|
|
1567
|
+
{
|
|
1568
|
+
type: "text",
|
|
1569
|
+
text: `Merged into ${pathArg} as canonical YAML.`,
|
|
1570
|
+
},
|
|
1571
|
+
],
|
|
1572
|
+
details: { path: absPath },
|
|
1573
|
+
};
|
|
1574
|
+
},
|
|
1575
|
+
});
|
|
1576
|
+
|
|
1577
|
+
pi.registerTool({
|
|
1578
|
+
name: "harness_synthesize_repair_brief",
|
|
1579
|
+
label: "Synthesize Repair Brief",
|
|
1580
|
+
description:
|
|
1581
|
+
"Build artifacts/repair-brief.yaml from review-outcome, eval-verdict, and adversary paths (no large inline bodies).",
|
|
1582
|
+
promptSnippet:
|
|
1583
|
+
"After /harness-review when remediation_class is implementation_gap.",
|
|
1584
|
+
promptGuidelines: [
|
|
1585
|
+
"Pass artifact paths only; tool reads YAML from disk.",
|
|
1586
|
+
"Default output: artifacts/repair-brief.yaml with steer_attempt from run context + 1.",
|
|
1587
|
+
],
|
|
1588
|
+
parameters: Type.Object({
|
|
1589
|
+
review_outcome_path: Type.Optional(Type.String()),
|
|
1590
|
+
eval_verdict_path: Type.Optional(Type.String()),
|
|
1591
|
+
adversary_report_path: Type.Optional(Type.String()),
|
|
1592
|
+
plan_packet_path: Type.Optional(Type.String()),
|
|
1593
|
+
output_path: Type.Optional(
|
|
1594
|
+
Type.String({
|
|
1595
|
+
description: "Default artifacts/repair-brief.yaml",
|
|
1596
|
+
}),
|
|
1597
|
+
),
|
|
1598
|
+
}),
|
|
1599
|
+
async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
|
|
1600
|
+
const entries = getEntries(ctx);
|
|
1601
|
+
const runCtx = getLatestRunContext(entries) ?? activeCtx;
|
|
1602
|
+
if (!runCtx?.run_id) {
|
|
1603
|
+
return {
|
|
1604
|
+
content: [{ type: "text", text: "No active harness run." }],
|
|
1605
|
+
details: {},
|
|
1606
|
+
isError: true,
|
|
1607
|
+
};
|
|
1608
|
+
}
|
|
1609
|
+
const projectRoot = process.cwd();
|
|
1610
|
+
const steerAttempt = (runCtx.steer_attempt ?? 0) + 1;
|
|
1611
|
+
const { synthesizeRepairBrief } = await import(
|
|
1612
|
+
"../lib/harness-repair-brief.js"
|
|
1613
|
+
);
|
|
1614
|
+
const brief = await synthesizeRepairBrief({
|
|
1615
|
+
runId: runCtx.run_id,
|
|
1616
|
+
projectRoot,
|
|
1617
|
+
steerAttempt,
|
|
1618
|
+
reviewOutcomePath: (params as { review_outcome_path?: string })
|
|
1619
|
+
.review_outcome_path,
|
|
1620
|
+
evalVerdictPath: (params as { eval_verdict_path?: string })
|
|
1621
|
+
.eval_verdict_path,
|
|
1622
|
+
adversaryReportPath: (params as { adversary_report_path?: string })
|
|
1623
|
+
.adversary_report_path,
|
|
1624
|
+
planPacketPath:
|
|
1625
|
+
(params as { plan_packet_path?: string }).plan_packet_path ??
|
|
1626
|
+
runCtx.plan_packet_path ??
|
|
1627
|
+
"plan-packet.yaml",
|
|
1628
|
+
});
|
|
1629
|
+
const outputPath =
|
|
1630
|
+
String((params as { output_path?: string }).output_path ?? "").trim() ||
|
|
1631
|
+
"artifacts/repair-brief.yaml";
|
|
1632
|
+
const absOut = normalizeHarnessPath(
|
|
1633
|
+
outputPath.startsWith(runCtx.run_id)
|
|
1634
|
+
? outputPath
|
|
1635
|
+
: join(
|
|
1636
|
+
projectRoot,
|
|
1637
|
+
".pi",
|
|
1638
|
+
"harness",
|
|
1639
|
+
"runs",
|
|
1640
|
+
runCtx.run_id,
|
|
1641
|
+
outputPath,
|
|
1642
|
+
),
|
|
1643
|
+
projectRoot,
|
|
1644
|
+
);
|
|
1645
|
+
const scoped = await isPlanPhaseScopedWrite(absOut, runCtx, projectRoot);
|
|
1646
|
+
if (!scoped) {
|
|
1647
|
+
return {
|
|
1648
|
+
content: [
|
|
1649
|
+
{
|
|
1650
|
+
type: "text",
|
|
1651
|
+
text: `Output path not allowed: ${outputPath}`,
|
|
1652
|
+
},
|
|
1653
|
+
],
|
|
1654
|
+
details: {},
|
|
1655
|
+
isError: true,
|
|
1656
|
+
};
|
|
1122
1657
|
}
|
|
1123
|
-
|
|
1658
|
+
await mkdir(dirname(absOut), { recursive: true });
|
|
1659
|
+
await writeYamlFile(absOut, brief);
|
|
1124
1660
|
return {
|
|
1125
1661
|
content: [
|
|
1126
1662
|
{
|
|
1127
1663
|
type: "text",
|
|
1128
|
-
text:
|
|
1129
|
-
? `All ${present.length} artifact(s) present.`
|
|
1130
|
-
: `Missing: ${missing.join(", ")}`,
|
|
1664
|
+
text: `Wrote ${outputPath} (steer_attempt=${steerAttempt}).`,
|
|
1131
1665
|
},
|
|
1132
1666
|
],
|
|
1133
|
-
details: {
|
|
1134
|
-
|
|
1667
|
+
details: { path: absOut, steer_attempt: steerAttempt },
|
|
1668
|
+
};
|
|
1669
|
+
},
|
|
1670
|
+
});
|
|
1671
|
+
|
|
1672
|
+
pi.registerTool({
|
|
1673
|
+
name: "harness_artifact_ready",
|
|
1674
|
+
label: "Harness Artifact Ready",
|
|
1675
|
+
description:
|
|
1676
|
+
"Check harness artifact paths exist and pass minimal schema/content gates under the active run.",
|
|
1677
|
+
parameters: Type.Object({
|
|
1678
|
+
paths: Type.Array(Type.String(), {
|
|
1679
|
+
minItems: 1,
|
|
1680
|
+
description:
|
|
1681
|
+
"Relative paths under the run dir, e.g. artifacts/decomposition.yaml",
|
|
1682
|
+
}),
|
|
1683
|
+
}),
|
|
1684
|
+
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
1685
|
+
const entries = getEntries(ctx);
|
|
1686
|
+
const runCtx = getLatestRunContext(entries) ?? activeCtx;
|
|
1687
|
+
if (!runCtx?.run_id) {
|
|
1688
|
+
return {
|
|
1689
|
+
content: [{ type: "text", text: "No active harness run." }],
|
|
1690
|
+
details: {},
|
|
1691
|
+
isError: true,
|
|
1692
|
+
};
|
|
1693
|
+
}
|
|
1694
|
+
const paths = (params as { paths?: string[] }).paths ?? [];
|
|
1695
|
+
const projectRoot = process.cwd();
|
|
1696
|
+
const runRoot = join(
|
|
1697
|
+
projectRoot,
|
|
1698
|
+
".pi",
|
|
1699
|
+
"harness",
|
|
1700
|
+
"runs",
|
|
1701
|
+
runCtx.run_id,
|
|
1702
|
+
);
|
|
1703
|
+
const specsDir = join(projectRoot, ".pi", "harness", "specs");
|
|
1704
|
+
const { validateHarnessArtifactPaths } = await import(
|
|
1705
|
+
"./lib/harness-artifact-gate.js"
|
|
1706
|
+
);
|
|
1707
|
+
const gate = await validateHarnessArtifactPaths(runRoot, paths, specsDir);
|
|
1708
|
+
const text = gate.ok
|
|
1709
|
+
? `All ${gate.present.length} artifact(s) present and valid.`
|
|
1710
|
+
: [
|
|
1711
|
+
gate.missing.length > 0
|
|
1712
|
+
? `Missing: ${gate.missing.join(", ")}`
|
|
1713
|
+
: null,
|
|
1714
|
+
gate.errors.length > 0 ? gate.errors.join("\n") : null,
|
|
1715
|
+
]
|
|
1716
|
+
.filter(Boolean)
|
|
1717
|
+
.join("\n");
|
|
1718
|
+
return {
|
|
1719
|
+
content: [{ type: "text", text }],
|
|
1720
|
+
details: {
|
|
1721
|
+
ok: gate.ok,
|
|
1722
|
+
present: gate.present,
|
|
1723
|
+
missing: gate.missing,
|
|
1724
|
+
errors: gate.errors,
|
|
1725
|
+
run_id: runCtx.run_id,
|
|
1726
|
+
},
|
|
1727
|
+
isError: !gate.ok,
|
|
1135
1728
|
};
|
|
1136
1729
|
},
|
|
1137
1730
|
});
|
|
1138
1731
|
|
|
1139
1732
|
pi.registerCommand("harness-use-run", {
|
|
1140
|
-
description:
|
|
1733
|
+
description:
|
|
1734
|
+
"Point this session at an existing run directory (recovery; --claim for write ownership)",
|
|
1141
1735
|
handler: async (args, ctx) => {
|
|
1142
|
-
const
|
|
1143
|
-
if (!runId) {
|
|
1736
|
+
const parsed = parseHarnessUseRunArgs(args);
|
|
1737
|
+
if (!parsed.runId) {
|
|
1144
1738
|
if (ctx.hasUI)
|
|
1145
|
-
ctx.ui.notify(
|
|
1739
|
+
ctx.ui.notify(
|
|
1740
|
+
"Usage: /harness-use-run <run-id> [--claim] [--readonly]",
|
|
1741
|
+
"warning",
|
|
1742
|
+
);
|
|
1146
1743
|
return;
|
|
1147
1744
|
}
|
|
1148
1745
|
const projectRoot = process.cwd();
|
|
1149
|
-
const
|
|
1746
|
+
const sessionId = ctx.sessionManager.getSessionId();
|
|
1747
|
+
const disk = await loadRunContextFromDisk(parsed.runId, projectRoot);
|
|
1150
1748
|
if (!disk) {
|
|
1151
|
-
if (ctx.hasUI) ctx.ui.notify(`Run not found: ${runId}`, "error");
|
|
1749
|
+
if (ctx.hasUI) ctx.ui.notify(`Run not found: ${parsed.runId}`, "error");
|
|
1152
1750
|
return;
|
|
1153
1751
|
}
|
|
1154
1752
|
activeCtx = {
|
|
1155
1753
|
...disk,
|
|
1156
|
-
pi_session_id:
|
|
1754
|
+
pi_session_id: sessionId,
|
|
1157
1755
|
};
|
|
1756
|
+
if (parsed.claim) {
|
|
1757
|
+
activeCtx = claimRunOwnership(activeCtx, sessionId);
|
|
1758
|
+
}
|
|
1759
|
+
const statuses = await resolveCompletionStatuses(
|
|
1760
|
+
getEntries(ctx),
|
|
1761
|
+
activeCtx.run_id,
|
|
1762
|
+
projectRoot,
|
|
1763
|
+
);
|
|
1764
|
+
if (activeCtx.owner_pi_session_id !== sessionId && !parsed.claim) {
|
|
1765
|
+
activeCtx.next_recommended_command =
|
|
1766
|
+
"Read-only: use /harness-use-run <run-id> --claim to take ownership.";
|
|
1767
|
+
} else {
|
|
1768
|
+
activeCtx.next_recommended_command = nextStepAfterOutcome({
|
|
1769
|
+
phase: activeCtx.phase,
|
|
1770
|
+
planStatus: activeCtx.plan_ready ? "ready" : null,
|
|
1771
|
+
lastCompletedStep: activeCtx.last_completed_step,
|
|
1772
|
+
lastOutcome: activeCtx.last_outcome,
|
|
1773
|
+
executionStatus: statuses.executionStatus,
|
|
1774
|
+
evalStatus: statuses.evalStatus,
|
|
1775
|
+
adversaryComplete: statuses.adversaryComplete,
|
|
1776
|
+
aborted: activeCtx.status === "aborted",
|
|
1777
|
+
});
|
|
1778
|
+
}
|
|
1779
|
+
activeCtx.updated_at = nowIso();
|
|
1158
1780
|
persistContext(pi, activeCtx);
|
|
1159
|
-
|
|
1781
|
+
syncPolicyFromRunContext(pi, getEntries(ctx), activeCtx);
|
|
1782
|
+
if (ctx.hasUI) {
|
|
1783
|
+
const mode = parsed.claim ? "claimed" : "bound (read-only)";
|
|
1160
1784
|
ctx.ui.notify(
|
|
1161
|
-
`Session
|
|
1785
|
+
`Session ${mode} to run ${parsed.runId}. See /harness-run-status.`,
|
|
1162
1786
|
"info",
|
|
1163
1787
|
);
|
|
1788
|
+
}
|
|
1164
1789
|
},
|
|
1165
1790
|
});
|
|
1166
1791
|
}
|