ultimate-pi 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-debate-plan/SKILL.md +41 -61
- package/.agents/skills/harness-governor/SKILL.md +11 -0
- package/.agents/skills/harness-orchestration/SKILL.md +5 -3
- package/.agents/skills/harness-plan/SKILL.md +11 -9
- package/.pi/agents/harness/adversary.md +1 -1
- package/.pi/agents/harness/evaluator.md +1 -1
- package/.pi/agents/harness/executor.md +1 -1
- package/.pi/agents/harness/incident-recorder.md +1 -1
- package/.pi/agents/harness/meta-optimizer.md +1 -1
- package/.pi/agents/harness/planning/decompose.md +8 -35
- package/.pi/agents/harness/planning/execution-plan-author.md +27 -15
- package/.pi/agents/harness/planning/hypothesis-validator.md +23 -6
- package/.pi/agents/harness/planning/hypothesis.md +4 -27
- package/.pi/agents/harness/planning/implementation-researcher.md +43 -0
- package/.pi/agents/harness/planning/plan-adversary.md +20 -5
- package/.pi/agents/harness/planning/plan-evaluator.md +28 -6
- package/.pi/agents/harness/planning/review-integrator.md +23 -10
- package/.pi/agents/harness/planning/scout-graphify.md +4 -23
- package/.pi/agents/harness/planning/scout-semantic.md +3 -18
- package/.pi/agents/harness/planning/scout-structure.md +3 -18
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +22 -6
- package/.pi/agents/harness/planning/stack-researcher.md +21 -11
- package/.pi/agents/harness/tie-breaker.md +1 -1
- package/.pi/agents/harness/trace-librarian.md +1 -1
- package/.pi/extensions/budget-guard.ts +33 -19
- package/.pi/extensions/harness-debate-tools.ts +280 -19
- package/.pi/extensions/harness-live-widget.ts +39 -159
- package/.pi/extensions/harness-plan-approval.ts +47 -5
- package/.pi/extensions/harness-run-context.ts +96 -2
- package/.pi/extensions/harness-subagent-submit.ts +195 -0
- package/.pi/extensions/lib/debate-bus-core.ts +108 -17
- package/.pi/extensions/lib/debate-bus-state.ts +6 -0
- package/.pi/extensions/lib/harness-subagent-policy.ts +45 -0
- package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +82 -0
- package/.pi/extensions/lib/harness-subagent-submit-registry.ts +172 -0
- package/.pi/extensions/lib/harness-subagents-bridge.ts +42 -0
- package/.pi/extensions/lib/plan-approval/plan-review.ts +56 -0
- package/.pi/extensions/lib/plan-approval/types.ts +1 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +214 -0
- package/.pi/extensions/lib/plan-debate-focus.ts +151 -0
- package/.pi/extensions/lib/plan-debate-gate.ts +88 -34
- package/.pi/extensions/lib/plan-debate-lane.ts +15 -0
- package/.pi/extensions/lib/plan-debate-lanes.ts +44 -0
- package/.pi/extensions/lib/plan-debate-round-status.ts +63 -20
- package/.pi/extensions/lib/plan-messenger.ts +93 -17
- package/.pi/extensions/policy-gate.ts +1 -1
- package/.pi/harness/README.md +1 -1
- package/.pi/harness/agents.manifest.json +25 -21
- package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +1 -3
- package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +13 -5
- package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +51 -0
- package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md +31 -0
- package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md +23 -0
- package/.pi/harness/docs/adrs/README.md +4 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r1.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r2.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/research-brief.yaml +62 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r2.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r3.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +29 -0
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +97 -16
- package/.pi/harness/specs/harness-executor-handoff.schema.json +19 -0
- package/.pi/harness/specs/harness-human-required.schema.json +16 -0
- package/.pi/harness/specs/plan-implementation-research-brief.schema.json +128 -0
- package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
- package/.pi/harness/specs/plan-scout-findings.schema.json +19 -0
- package/.pi/harness/specs/round-result.schema.json +15 -2
- package/.pi/lib/harness-agent-output.ts +45 -0
- package/.pi/lib/harness-budget-enforce.ts +18 -0
- package/.pi/lib/harness-schema-validate.ts +89 -0
- package/.pi/lib/harness-spawn-parse.ts +86 -0
- package/.pi/lib/harness-subagent-submit-path.ts +41 -0
- package/.pi/lib/harness-ui-state.ts +107 -2
- package/.pi/prompts/harness-auto.md +2 -2
- package/.pi/prompts/harness-plan.md +94 -42
- package/.pi/prompts/harness-run.md +2 -2
- package/.pi/prompts/planning-rubrics.md +31 -0
- package/.pi/scripts/harness-verify.mjs +2 -0
- package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
- package/CHANGELOG.md +21 -0
- package/package.json +4 -2
- package/vendor/pi-subagents/src/subagents.ts +29 -3
|
@@ -3,12 +3,14 @@ import type {
|
|
|
3
3
|
ExtensionContext,
|
|
4
4
|
} from "@earendil-works/pi-coding-agent";
|
|
5
5
|
import {
|
|
6
|
+
deriveHarnessStatusHint,
|
|
7
|
+
formatHarnessPhaseLabel,
|
|
8
|
+
type HarnessStatusSeverity,
|
|
6
9
|
type HarnessUiState,
|
|
7
10
|
HarnessUiStateStore,
|
|
11
|
+
nextHarnessPhase,
|
|
8
12
|
} from "../lib/harness-ui-state";
|
|
9
13
|
|
|
10
|
-
type Severity = "accent" | "warning" | "error";
|
|
11
|
-
|
|
12
14
|
type TuiLike = { requestRender(): void };
|
|
13
15
|
type ThemeLike = {
|
|
14
16
|
fg(
|
|
@@ -164,31 +166,25 @@ function composeZones(left: string, right: string, width: number): string {
|
|
|
164
166
|
return fitToWidth(`${leftFit}${" ".repeat(minGap)}${rightFit}`, width);
|
|
165
167
|
}
|
|
166
168
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
169
|
+
function themeSeverityColor(
|
|
170
|
+
severity: HarnessStatusSeverity,
|
|
171
|
+
): "accent" | "warning" | "error" | "success" | "muted" {
|
|
172
|
+
return severity;
|
|
173
|
+
}
|
|
171
174
|
|
|
172
175
|
class HarnessWidgetComponent {
|
|
173
176
|
private widthCache?: number;
|
|
174
177
|
private linesCache?: string[];
|
|
175
178
|
private state: HarnessUiState;
|
|
176
|
-
private inFlight: InFlightState;
|
|
177
179
|
private themeRef: ThemeLike;
|
|
178
180
|
|
|
179
|
-
constructor(
|
|
180
|
-
state: HarnessUiState,
|
|
181
|
-
inFlight: InFlightState,
|
|
182
|
-
theme: ThemeLike,
|
|
183
|
-
) {
|
|
181
|
+
constructor(state: HarnessUiState, theme: ThemeLike) {
|
|
184
182
|
this.state = state;
|
|
185
|
-
this.inFlight = inFlight;
|
|
186
183
|
this.themeRef = theme;
|
|
187
184
|
}
|
|
188
185
|
|
|
189
|
-
public setData(state: HarnessUiState
|
|
186
|
+
public setData(state: HarnessUiState): void {
|
|
190
187
|
this.state = state;
|
|
191
|
-
this.inFlight = inFlight;
|
|
192
188
|
this.invalidate();
|
|
193
189
|
}
|
|
194
190
|
|
|
@@ -201,109 +197,23 @@ class HarnessWidgetComponent {
|
|
|
201
197
|
if (this.linesCache && this.widthCache === width) return this.linesCache;
|
|
202
198
|
const theme = this.themeRef;
|
|
203
199
|
const rowWidth = Math.max(1, width - TERMINAL_WIDTH_SAFETY_MARGIN);
|
|
204
|
-
const showDebateRow =
|
|
205
|
-
this.state.phase === "adversary" || this.state.phase === "merge";
|
|
206
|
-
|
|
207
|
-
const substateColor: Severity =
|
|
208
|
-
this.state.flowSubstate === "blocked"
|
|
209
|
-
? "error"
|
|
210
|
-
: this.state.flowSubstate === "severity-policy" ||
|
|
211
|
-
this.state.flowSubstate === "human-required"
|
|
212
|
-
? "warning"
|
|
213
|
-
: "accent";
|
|
214
|
-
const policyColor =
|
|
215
|
-
this.state.policyDecision === "pass"
|
|
216
|
-
? "success"
|
|
217
|
-
: this.state.policyDecision === "conditional_pass"
|
|
218
|
-
? "warning"
|
|
219
|
-
: this.state.policyDecision === "block" ||
|
|
220
|
-
this.state.policyDecision === "human_required"
|
|
221
|
-
? "error"
|
|
222
|
-
: "muted";
|
|
223
|
-
|
|
224
|
-
const policyDisplay = this.state.policyDecision ?? "pending";
|
|
225
|
-
|
|
226
|
-
const phaseToken = `${theme.fg("dim", "phase:")}${theme.fg("accent", this.state.phase)}`;
|
|
227
|
-
const flowToken = `${theme.fg("dim", "flow:")}${theme.fg(substateColor, this.state.flowSubstate)}`;
|
|
228
|
-
const policyToken = `${theme.fg("dim", "policy:")}${theme.fg(policyColor, policyDisplay)}`;
|
|
229
|
-
const row1 = composeZones(
|
|
230
|
-
`${theme.bold("Harness")} ${phaseToken} ${flowToken}`,
|
|
231
|
-
policyToken,
|
|
232
|
-
rowWidth,
|
|
233
|
-
);
|
|
234
200
|
|
|
235
|
-
const
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
const
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
? "down"
|
|
252
|
-
: "flat";
|
|
253
|
-
const trendColor =
|
|
254
|
-
consensusTrend === "up"
|
|
255
|
-
? "success"
|
|
256
|
-
: consensusTrend === "down"
|
|
257
|
-
? "warning"
|
|
258
|
-
: "muted";
|
|
259
|
-
|
|
260
|
-
const sev = this.state.severity;
|
|
261
|
-
const severityCompact =
|
|
262
|
-
sev.correctness == null &&
|
|
263
|
-
sev.security == null &&
|
|
264
|
-
sev.architecture == null &&
|
|
265
|
-
sev.testIntegrity == null
|
|
266
|
-
? theme.fg("muted", "sev:n/a")
|
|
267
|
-
: `${theme.fg("dim", "sev")} ${theme.fg("accent", `c:${sev.correctness ?? "-"}`)} ${theme.fg("accent", `s:${sev.security ?? "-"}`)} ${theme.fg("accent", `a:${sev.architecture ?? "-"}`)} ${theme.fg("accent", `t:${sev.testIntegrity ?? "-"}`)}`;
|
|
268
|
-
|
|
269
|
-
const planFlag = this.state.planApproved
|
|
270
|
-
? `${theme.fg("dim", "📋 Plan:")}${theme.fg("success", "OK")}`
|
|
271
|
-
: `${theme.fg("dim", "📋 Plan:")}${theme.fg("error", "NO")}`;
|
|
272
|
-
const reviewFlag = this.state.reviewIsolationOk
|
|
273
|
-
? `${theme.fg("dim", "🧪 Review:")}${theme.fg("success", "OK")}`
|
|
274
|
-
: `${theme.fg("dim", "🧪 Review:")}${theme.fg("warning", "ISO")}`;
|
|
275
|
-
const budgetFlag = this.state.budgetExhausted
|
|
276
|
-
? `${theme.fg("dim", "💰 Budget:")}${theme.fg("error", "HIT")}`
|
|
277
|
-
: `${theme.fg("dim", "💰 Budget:")}${theme.fg("success", "OK")}`;
|
|
278
|
-
const testsFlag =
|
|
279
|
-
this.state.testIntegritySeverity === "high"
|
|
280
|
-
? `${theme.fg("dim", "🛡 Tests:")}${theme.fg("error", "HIGH")}`
|
|
281
|
-
: this.state.testIntegritySeverity === "medium"
|
|
282
|
-
? `${theme.fg("dim", "🛡 Tests:")}${theme.fg("warning", "MED")}`
|
|
283
|
-
: `${theme.fg("dim", "🛡 Tests:")}${theme.fg("success", "OK")}`;
|
|
284
|
-
|
|
285
|
-
const toolDisplay = this.inFlight.lastToolName
|
|
286
|
-
? `${this.inFlight.toolCount}:${this.inFlight.lastToolName}`
|
|
287
|
-
: String(this.inFlight.toolCount);
|
|
288
|
-
const nextDisplay =
|
|
289
|
-
this.state.nextRecommendedCommand != null
|
|
290
|
-
? this.state.nextRecommendedCommand.length > 36
|
|
291
|
-
? `${this.state.nextRecommendedCommand.slice(0, 33)}...`
|
|
292
|
-
: this.state.nextRecommendedCommand
|
|
293
|
-
: null;
|
|
294
|
-
const row3Left = `${planFlag} ${reviewFlag} ${budgetFlag} ${testsFlag}`;
|
|
295
|
-
const row3Right = nextDisplay
|
|
296
|
-
? `${theme.fg("dim", "inFlight:")}${theme.fg("accent", toolDisplay)} ${theme.fg("dim", "next:")}${theme.fg("accent", nextDisplay)}`
|
|
297
|
-
: `${theme.fg("dim", "inFlight:")}${theme.fg("accent", toolDisplay)}`;
|
|
298
|
-
const row3 = composeZones(row3Left, row3Right, rowWidth);
|
|
299
|
-
|
|
300
|
-
const lines: string[] = [truncateToWidth(row1, rowWidth)];
|
|
301
|
-
if (showDebateRow) {
|
|
302
|
-
const debateLeft = `${theme.fg("dim", "Debate")} ${theme.fg("accent", `rounds:${debateProgress}`)} ${theme.fg("dim", "trend:")}${theme.fg(trendColor, consensusTrend)} ${theme.fg("dim", "budget:")}${theme.fg("accent", budgetDisplay)}`;
|
|
303
|
-
const row2 = composeZones(debateLeft, severityCompact, rowWidth);
|
|
304
|
-
lines.push(truncateToWidth(row2, rowWidth));
|
|
305
|
-
}
|
|
306
|
-
lines.push(truncateToWidth(row3, rowWidth));
|
|
201
|
+
const currentLabel = formatHarnessPhaseLabel(this.state.phase);
|
|
202
|
+
const nextPhase = nextHarnessPhase(this.state.phase);
|
|
203
|
+
const nowToken = `${theme.fg("dim", "now:")}${theme.fg("accent", currentLabel)}`;
|
|
204
|
+
const phaseToken =
|
|
205
|
+
nextPhase != null
|
|
206
|
+
? `${nowToken} ${theme.fg("dim", "→")} ${theme.fg("accent", formatHarnessPhaseLabel(nextPhase))}`
|
|
207
|
+
: nowToken;
|
|
208
|
+
|
|
209
|
+
const status = deriveHarnessStatusHint(this.state);
|
|
210
|
+
const statusColor = themeSeverityColor(status.severity);
|
|
211
|
+
const statusToken = theme.fg(statusColor, status.text);
|
|
212
|
+
|
|
213
|
+
const left = `${theme.bold("Harness")} ${phaseToken}`;
|
|
214
|
+
const row = composeZones(left, statusToken, rowWidth);
|
|
215
|
+
|
|
216
|
+
const lines = [truncateToWidth(row, rowWidth)];
|
|
307
217
|
this.widthCache = width;
|
|
308
218
|
this.linesCache = lines;
|
|
309
219
|
return lines;
|
|
@@ -316,14 +226,16 @@ class HarnessWidgetComponent {
|
|
|
316
226
|
}
|
|
317
227
|
|
|
318
228
|
function statusToken(state: HarnessUiState): string {
|
|
319
|
-
const
|
|
320
|
-
|
|
229
|
+
const current = formatHarnessPhaseLabel(state.phase);
|
|
230
|
+
const next = nextHarnessPhase(state.phase);
|
|
231
|
+
const phasePart =
|
|
232
|
+
next != null ? `${current}→${formatHarnessPhaseLabel(next)}` : current;
|
|
233
|
+
const hint = deriveHarnessStatusHint(state).text;
|
|
234
|
+
return `h:${phasePart}|${hint}`;
|
|
321
235
|
}
|
|
322
236
|
|
|
323
237
|
export default function harnessLiveWidget(pi: ExtensionAPI) {
|
|
324
238
|
const stateStore = new HarnessUiStateStore();
|
|
325
|
-
const inFlightCalls = new Set<string>();
|
|
326
|
-
let lastToolName: string | null = null;
|
|
327
239
|
let widgetMounted = false;
|
|
328
240
|
let tuiHandle: TuiLike | null = null;
|
|
329
241
|
let component: HarnessWidgetComponent | null = null;
|
|
@@ -334,19 +246,14 @@ export default function harnessLiveWidget(pi: ExtensionAPI) {
|
|
|
334
246
|
function mountHarnessWidget(ctx: ExtensionContext): void {
|
|
335
247
|
if (!ctx.hasUI) return;
|
|
336
248
|
const state = stateStore.refresh(ctx);
|
|
337
|
-
|
|
338
|
-
lastRenderHash = computeRenderHash(state, inFlight);
|
|
249
|
+
lastRenderHash = computeRenderHash(state);
|
|
339
250
|
|
|
340
251
|
ctx.ui.setWidget(
|
|
341
252
|
"harness-live",
|
|
342
253
|
(tui, theme) => {
|
|
343
254
|
widgetMounted = true;
|
|
344
255
|
tuiHandle = tui;
|
|
345
|
-
component = new HarnessWidgetComponent(
|
|
346
|
-
stateStore.snapshot(),
|
|
347
|
-
inFlight,
|
|
348
|
-
theme,
|
|
349
|
-
);
|
|
256
|
+
component = new HarnessWidgetComponent(stateStore.snapshot(), theme);
|
|
350
257
|
return {
|
|
351
258
|
render(width: number): string[] {
|
|
352
259
|
component?.setTheme(theme);
|
|
@@ -388,26 +295,15 @@ export default function harnessLiveWidget(pi: ExtensionAPI) {
|
|
|
388
295
|
ctx.ui.setStatus("harness-mode", undefined);
|
|
389
296
|
}
|
|
390
297
|
|
|
391
|
-
function computeRenderHash(
|
|
392
|
-
state: HarnessUiState,
|
|
393
|
-
inFlight: InFlightState,
|
|
394
|
-
): string {
|
|
298
|
+
function computeRenderHash(state: HarnessUiState): string {
|
|
395
299
|
return JSON.stringify({
|
|
396
300
|
phase: state.phase,
|
|
397
|
-
flowSubstate: state.flowSubstate,
|
|
398
301
|
planApproved: state.planApproved,
|
|
399
|
-
reviewIsolationOk: state.reviewIsolationOk,
|
|
400
302
|
budgetExhausted: state.budgetExhausted,
|
|
401
303
|
testIntegritySeverity: state.testIntegritySeverity,
|
|
402
|
-
debateRound: state.debateRound,
|
|
403
|
-
debateMaxRounds: state.debateMaxRounds,
|
|
404
|
-
debateBudgetUsed: state.debateBudgetUsed,
|
|
405
|
-
debateBudgetCap: state.debateBudgetCap,
|
|
406
304
|
policyDecision: state.policyDecision,
|
|
407
|
-
|
|
408
|
-
severity: state.severity,
|
|
305
|
+
flowSubstate: state.flowSubstate,
|
|
409
306
|
nextRecommendedCommand: state.nextRecommendedCommand,
|
|
410
|
-
inFlight,
|
|
411
307
|
});
|
|
412
308
|
}
|
|
413
309
|
|
|
@@ -417,15 +313,11 @@ export default function harnessLiveWidget(pi: ExtensionAPI) {
|
|
|
417
313
|
queueMicrotask(() => {
|
|
418
314
|
refreshQueued = false;
|
|
419
315
|
const state = stateStore.refresh(ctx);
|
|
420
|
-
const
|
|
421
|
-
toolCount: inFlightCalls.size,
|
|
422
|
-
lastToolName,
|
|
423
|
-
};
|
|
424
|
-
const hash = computeRenderHash(state, inFlight);
|
|
316
|
+
const hash = computeRenderHash(state);
|
|
425
317
|
updateStatusFallback(ctx, state);
|
|
426
318
|
if (hash === lastRenderHash) return;
|
|
427
319
|
lastRenderHash = hash;
|
|
428
|
-
if (component) component.setData(state
|
|
320
|
+
if (component) component.setData(state);
|
|
429
321
|
tuiHandle?.requestRender();
|
|
430
322
|
});
|
|
431
323
|
}
|
|
@@ -450,16 +342,4 @@ export default function harnessLiveWidget(pi: ExtensionAPI) {
|
|
|
450
342
|
pi.on("agent_end", (_event, ctx) => {
|
|
451
343
|
scheduleRefresh(ctx);
|
|
452
344
|
});
|
|
453
|
-
|
|
454
|
-
pi.on("tool_execution_start", (event, ctx) => {
|
|
455
|
-
inFlightCalls.add(event.toolCallId);
|
|
456
|
-
lastToolName = event.toolName;
|
|
457
|
-
scheduleRefresh(ctx);
|
|
458
|
-
});
|
|
459
|
-
|
|
460
|
-
pi.on("tool_result", (event, ctx) => {
|
|
461
|
-
inFlightCalls.delete(event.toolCallId);
|
|
462
|
-
if (inFlightCalls.size === 0) lastToolName = null;
|
|
463
|
-
scheduleRefresh(ctx);
|
|
464
|
-
});
|
|
465
345
|
}
|
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
* harness-plan-approval — PlanPacket approval UI and transcript renderer for parent sessions.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
import { constants } from "node:fs";
|
|
6
|
+
import { access } from "node:fs/promises";
|
|
7
|
+
import { join } from "node:path";
|
|
5
8
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
6
9
|
import { Text } from "@earendil-works/pi-tui";
|
|
7
10
|
import { Type } from "@sinclair/typebox";
|
|
@@ -146,6 +149,43 @@ export default function harnessPlanApproval(pi: ExtensionAPI) {
|
|
|
146
149
|
`Plan ${planId} — pending your approval`;
|
|
147
150
|
const runCtx = getLatestRunContext(entries);
|
|
148
151
|
const projectRoot = process.cwd();
|
|
152
|
+
const implWarnings: string[] = [];
|
|
153
|
+
if (runCtx?.run_id) {
|
|
154
|
+
const implPath = join(
|
|
155
|
+
projectRoot,
|
|
156
|
+
".pi",
|
|
157
|
+
"harness",
|
|
158
|
+
"runs",
|
|
159
|
+
runCtx.run_id,
|
|
160
|
+
"artifacts",
|
|
161
|
+
"implementation-research.yaml",
|
|
162
|
+
);
|
|
163
|
+
let implExists = false;
|
|
164
|
+
try {
|
|
165
|
+
await access(implPath, constants.R_OK);
|
|
166
|
+
implExists = true;
|
|
167
|
+
} catch {
|
|
168
|
+
implExists = false;
|
|
169
|
+
}
|
|
170
|
+
const risk = String(
|
|
171
|
+
validated.plan_packet.risk_level ?? "med",
|
|
172
|
+
).toLowerCase();
|
|
173
|
+
if (!implExists) {
|
|
174
|
+
const msg =
|
|
175
|
+
"approve_plan: missing artifacts/implementation-research.yaml (Phase 3.5 required)";
|
|
176
|
+
if (risk === "high") {
|
|
177
|
+
return {
|
|
178
|
+
content: [{ type: "text", text: msg }],
|
|
179
|
+
details: {
|
|
180
|
+
plan_packet: validated.plan_packet,
|
|
181
|
+
cancelled: true,
|
|
182
|
+
},
|
|
183
|
+
isError: true,
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
implWarnings.push(msg);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
149
189
|
if (runCtx?.run_id) {
|
|
150
190
|
const gate = await validatePlanDebateGate(projectRoot, runCtx.run_id);
|
|
151
191
|
if (!gate.ok) {
|
|
@@ -237,13 +277,15 @@ export default function harnessPlanApproval(pi: ExtensionAPI) {
|
|
|
237
277
|
);
|
|
238
278
|
}
|
|
239
279
|
|
|
240
|
-
const text =
|
|
241
|
-
outcome.response,
|
|
242
|
-
|
|
243
|
-
|
|
280
|
+
const text = [
|
|
281
|
+
formatApprovePlanResultText(outcome.response, outcome.cancelled),
|
|
282
|
+
...implWarnings,
|
|
283
|
+
]
|
|
284
|
+
.filter(Boolean)
|
|
285
|
+
.join("\n\n");
|
|
244
286
|
return {
|
|
245
287
|
content: [{ type: "text", text }],
|
|
246
|
-
details,
|
|
288
|
+
details: { ...details, implementation_warnings: implWarnings },
|
|
247
289
|
};
|
|
248
290
|
},
|
|
249
291
|
|
|
@@ -5,8 +5,9 @@
|
|
|
5
5
|
* in before_agent_start so trace-recorder reuses it on agent_start.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
8
|
+
import { constants } from "node:fs";
|
|
9
|
+
import { access, mkdir, readFile, writeFile } from "node:fs/promises";
|
|
10
|
+
import { dirname, join } from "node:path";
|
|
10
11
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
11
12
|
import { Type } from "@sinclair/typebox";
|
|
12
13
|
import {
|
|
@@ -56,6 +57,10 @@ import {
|
|
|
56
57
|
writeYamlFile,
|
|
57
58
|
} from "../lib/harness-yaml.js";
|
|
58
59
|
import { claimExtensionLoad } from "./lib/extension-load-guard.js";
|
|
60
|
+
import {
|
|
61
|
+
evaluateHarnessSubagentToolCall,
|
|
62
|
+
isSubmitToolName,
|
|
63
|
+
} from "./lib/harness-subagent-policy.js";
|
|
59
64
|
import { isReviewRoundArtifactPath } from "./lib/plan-debate-gate.js";
|
|
60
65
|
import { isReviewRoundYamlWriteAllowed } from "./lib/plan-debate-write-guard.js";
|
|
61
66
|
|
|
@@ -714,6 +719,36 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
714
719
|
});
|
|
715
720
|
|
|
716
721
|
pi.on("tool_call", async (event, ctx) => {
|
|
722
|
+
// #region agent log
|
|
723
|
+
fetch("http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0", {
|
|
724
|
+
method: "POST",
|
|
725
|
+
headers: {
|
|
726
|
+
"Content-Type": "application/json",
|
|
727
|
+
"X-Debug-Session-Id": "2ca12b",
|
|
728
|
+
},
|
|
729
|
+
body: JSON.stringify({
|
|
730
|
+
sessionId: "2ca12b",
|
|
731
|
+
location: "harness-run-context.ts:tool_call",
|
|
732
|
+
message: "submit policy hook",
|
|
733
|
+
data: {
|
|
734
|
+
toolName: event.toolName,
|
|
735
|
+
typeofIsSubmitToolName: typeof isSubmitToolName,
|
|
736
|
+
},
|
|
737
|
+
timestamp: Date.now(),
|
|
738
|
+
hypothesisId: "H1",
|
|
739
|
+
}),
|
|
740
|
+
}).catch(() => {});
|
|
741
|
+
// #endregion
|
|
742
|
+
if (isSubmitToolName(event.toolName)) {
|
|
743
|
+
const decision = evaluateHarnessSubagentToolCall(
|
|
744
|
+
event.toolName,
|
|
745
|
+
event.input as Record<string, unknown>,
|
|
746
|
+
"parent-orchestrator",
|
|
747
|
+
);
|
|
748
|
+
if (decision.action === "block") {
|
|
749
|
+
return { block: true, reason: decision.reason };
|
|
750
|
+
}
|
|
751
|
+
}
|
|
717
752
|
if (event.toolName === "write") {
|
|
718
753
|
const entries = getEntries(ctx);
|
|
719
754
|
const runCtx = getLatestRunContext(entries) ?? activeCtx;
|
|
@@ -1030,6 +1065,65 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
1030
1065
|
},
|
|
1031
1066
|
});
|
|
1032
1067
|
|
|
1068
|
+
pi.registerTool({
|
|
1069
|
+
name: "harness_artifact_ready",
|
|
1070
|
+
label: "Harness Artifact Ready",
|
|
1071
|
+
description:
|
|
1072
|
+
"Check that harness artifact paths exist under the active run (no JSON parsing).",
|
|
1073
|
+
parameters: Type.Object({
|
|
1074
|
+
paths: Type.Array(Type.String(), {
|
|
1075
|
+
minItems: 1,
|
|
1076
|
+
description:
|
|
1077
|
+
"Relative paths under the run dir, e.g. artifacts/decomposition.yaml",
|
|
1078
|
+
}),
|
|
1079
|
+
}),
|
|
1080
|
+
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
1081
|
+
const entries = getEntries(ctx);
|
|
1082
|
+
const runCtx = getLatestRunContext(entries) ?? activeCtx;
|
|
1083
|
+
if (!runCtx?.run_id) {
|
|
1084
|
+
return {
|
|
1085
|
+
content: [{ type: "text", text: "No active harness run." }],
|
|
1086
|
+
details: {},
|
|
1087
|
+
isError: true,
|
|
1088
|
+
};
|
|
1089
|
+
}
|
|
1090
|
+
const paths = (params as { paths?: string[] }).paths ?? [];
|
|
1091
|
+
const projectRoot = process.cwd();
|
|
1092
|
+
const runRoot = join(
|
|
1093
|
+
projectRoot,
|
|
1094
|
+
".pi",
|
|
1095
|
+
"harness",
|
|
1096
|
+
"runs",
|
|
1097
|
+
runCtx.run_id,
|
|
1098
|
+
);
|
|
1099
|
+
const missing: string[] = [];
|
|
1100
|
+
const present: string[] = [];
|
|
1101
|
+
for (const rel of paths) {
|
|
1102
|
+
const normalized = rel.replace(/\\/g, "/");
|
|
1103
|
+
const abs = join(runRoot, normalized);
|
|
1104
|
+
try {
|
|
1105
|
+
await access(abs, constants.R_OK);
|
|
1106
|
+
present.push(normalized);
|
|
1107
|
+
} catch {
|
|
1108
|
+
missing.push(normalized);
|
|
1109
|
+
}
|
|
1110
|
+
}
|
|
1111
|
+
const ok = missing.length === 0;
|
|
1112
|
+
return {
|
|
1113
|
+
content: [
|
|
1114
|
+
{
|
|
1115
|
+
type: "text",
|
|
1116
|
+
text: ok
|
|
1117
|
+
? `All ${present.length} artifact(s) present.`
|
|
1118
|
+
: `Missing: ${missing.join(", ")}`,
|
|
1119
|
+
},
|
|
1120
|
+
],
|
|
1121
|
+
details: { ok, present, missing, run_id: runCtx.run_id },
|
|
1122
|
+
isError: !ok,
|
|
1123
|
+
};
|
|
1124
|
+
},
|
|
1125
|
+
});
|
|
1126
|
+
|
|
1033
1127
|
pi.registerCommand("harness-use-run", {
|
|
1034
1128
|
description: "Point this session at an existing run directory (recovery)",
|
|
1035
1129
|
handler: async (args, ctx) => {
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Subprocess-only harness submit tools — validate + write artifacts under run_dir.
|
|
3
|
+
* Loaded via `pi --no-extensions -e harness-subagent-submit.ts` for harness agents.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { join } from "node:path";
|
|
7
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
8
|
+
import { Type } from "@sinclair/typebox";
|
|
9
|
+
import { claimExtensionLoad } from "./lib/extension-load-guard.js";
|
|
10
|
+
import { getHarnessPackageRoot } from "./lib/harness-paths.js";
|
|
11
|
+
import { evaluateHarnessSubagentToolCall } from "./lib/harness-subagent-policy.js";
|
|
12
|
+
import { executeSubmitPipeline } from "./lib/harness-subagent-submit-pipeline.js";
|
|
13
|
+
import { SUBMIT_TOOL_SPECS } from "./lib/harness-subagent-submit-registry.js";
|
|
14
|
+
|
|
15
|
+
// @ts-expect-error pi extensions run as ESM
|
|
16
|
+
const MODULE_URL = import.meta.url;
|
|
17
|
+
|
|
18
|
+
const DocumentSchema = Type.Object(
|
|
19
|
+
{
|
|
20
|
+
document: Type.Record(Type.String(), Type.Unknown(), {
|
|
21
|
+
description: "Full artifact document matching the harness JSON schema",
|
|
22
|
+
}),
|
|
23
|
+
},
|
|
24
|
+
{ additionalProperties: false },
|
|
25
|
+
);
|
|
26
|
+
|
|
27
|
+
function resolveRunContext(): {
|
|
28
|
+
projectRoot: string;
|
|
29
|
+
specsDir: string;
|
|
30
|
+
runId: string;
|
|
31
|
+
runDirEnv?: string;
|
|
32
|
+
agentId: string;
|
|
33
|
+
} {
|
|
34
|
+
const projectRoot = process.env.HARNESS_PKG_ROOT ?? process.cwd();
|
|
35
|
+
const specsDir = join(projectRoot, ".pi", "harness", "specs");
|
|
36
|
+
const runId = process.env.HARNESS_RUN_ID?.trim() ?? "";
|
|
37
|
+
const runDirEnv = process.env.HARNESS_RUN_DIR?.trim();
|
|
38
|
+
const agentId = process.env.HARNESS_AGENT_ID?.trim() ?? "";
|
|
39
|
+
return { projectRoot, specsDir, runId, runDirEnv, agentId };
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function isSubprocessHarness(): boolean {
|
|
43
|
+
return (
|
|
44
|
+
process.env.PI_HARNESS_SUBPROCESS === "1" &&
|
|
45
|
+
Boolean(process.env.HARNESS_RUN_ID?.trim())
|
|
46
|
+
);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export default function harnessSubagentSubmit(pi: ExtensionAPI) {
|
|
50
|
+
if (!claimExtensionLoad("harness-subagent-submit", MODULE_URL)) return;
|
|
51
|
+
// Option A: only load submit tools in subprocess (`-e` bundle), not parent discovery.
|
|
52
|
+
if (process.env.PI_HARNESS_SUBPROCESS !== "1") {
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const _packageRoot = getHarnessPackageRoot(MODULE_URL);
|
|
57
|
+
|
|
58
|
+
pi.on("tool_call", async (event) => {
|
|
59
|
+
if (!event.toolName.startsWith("submit_")) return undefined;
|
|
60
|
+
const subprocessOk = isSubprocessHarness();
|
|
61
|
+
// #region agent log
|
|
62
|
+
fetch("http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0", {
|
|
63
|
+
method: "POST",
|
|
64
|
+
headers: {
|
|
65
|
+
"Content-Type": "application/json",
|
|
66
|
+
"X-Debug-Session-Id": "2ca12b",
|
|
67
|
+
},
|
|
68
|
+
body: JSON.stringify({
|
|
69
|
+
sessionId: "2ca12b",
|
|
70
|
+
hypothesisId: "H2",
|
|
71
|
+
location: "harness-subagent-submit.ts:tool_call",
|
|
72
|
+
message: "submit tool_call gate",
|
|
73
|
+
data: {
|
|
74
|
+
toolName: event.toolName,
|
|
75
|
+
PI_HARNESS_SUBPROCESS: process.env.PI_HARNESS_SUBPROCESS,
|
|
76
|
+
HARNESS_RUN_ID: process.env.HARNESS_RUN_ID ?? null,
|
|
77
|
+
HARNESS_RUN_DIR: process.env.HARNESS_RUN_DIR ?? null,
|
|
78
|
+
HARNESS_AGENT_ID: process.env.HARNESS_AGENT_ID ?? null,
|
|
79
|
+
subprocessOk,
|
|
80
|
+
},
|
|
81
|
+
timestamp: Date.now(),
|
|
82
|
+
}),
|
|
83
|
+
}).catch(() => {});
|
|
84
|
+
// #endregion
|
|
85
|
+
if (!subprocessOk) {
|
|
86
|
+
return {
|
|
87
|
+
block: true,
|
|
88
|
+
reason:
|
|
89
|
+
"harness-subagent-submit: submit_* tools are only available in harness subagent subprocesses.",
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
const { agentId } = resolveRunContext();
|
|
93
|
+
if (!agentId) {
|
|
94
|
+
return {
|
|
95
|
+
block: true,
|
|
96
|
+
reason:
|
|
97
|
+
"harness-subagent-submit: HARNESS_AGENT_ID is required for submit tools.",
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
const decision = evaluateHarnessSubagentToolCall(
|
|
101
|
+
event.toolName,
|
|
102
|
+
event.input as Record<string, unknown>,
|
|
103
|
+
agentId,
|
|
104
|
+
);
|
|
105
|
+
if (decision.action === "block") {
|
|
106
|
+
return { block: true, reason: decision.reason };
|
|
107
|
+
}
|
|
108
|
+
return undefined;
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
for (const spec of SUBMIT_TOOL_SPECS) {
|
|
112
|
+
pi.registerTool({
|
|
113
|
+
name: spec.toolName,
|
|
114
|
+
label: spec.toolName.replace(/^submit_/, "Submit "),
|
|
115
|
+
description: `Terminal harness artifact submit for ${spec.agents.join(", ")}. Call once with the full schema document before ending the turn.`,
|
|
116
|
+
parameters: DocumentSchema,
|
|
117
|
+
async execute(_id, params, _signal, _onUpdate, _ctx) {
|
|
118
|
+
if (!isSubprocessHarness()) {
|
|
119
|
+
return {
|
|
120
|
+
content: [
|
|
121
|
+
{
|
|
122
|
+
type: "text",
|
|
123
|
+
text: "submit tools require PI_HARNESS_SUBPROCESS and HARNESS_RUN_ID",
|
|
124
|
+
},
|
|
125
|
+
],
|
|
126
|
+
details: {},
|
|
127
|
+
isError: true,
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
const { projectRoot, specsDir, runId, runDirEnv, agentId } =
|
|
131
|
+
resolveRunContext();
|
|
132
|
+
if (!spec.agents.includes(agentId)) {
|
|
133
|
+
return {
|
|
134
|
+
content: [
|
|
135
|
+
{
|
|
136
|
+
type: "text",
|
|
137
|
+
text: `${spec.toolName} is not allowed for agent ${agentId}`,
|
|
138
|
+
},
|
|
139
|
+
],
|
|
140
|
+
details: { agentId, tool: spec.toolName },
|
|
141
|
+
isError: true,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
const document = (params as { document?: Record<string, unknown> })
|
|
145
|
+
.document;
|
|
146
|
+
if (!document || typeof document !== "object") {
|
|
147
|
+
return {
|
|
148
|
+
content: [{ type: "text", text: "document object is required" }],
|
|
149
|
+
details: {},
|
|
150
|
+
isError: true,
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
const result = await executeSubmitPipeline({
|
|
154
|
+
projectRoot,
|
|
155
|
+
specsDir,
|
|
156
|
+
spec,
|
|
157
|
+
agentId,
|
|
158
|
+
document,
|
|
159
|
+
runId,
|
|
160
|
+
runDirEnv,
|
|
161
|
+
});
|
|
162
|
+
if (!result.ok) {
|
|
163
|
+
return {
|
|
164
|
+
content: [
|
|
165
|
+
{
|
|
166
|
+
type: "text",
|
|
167
|
+
text: `Validation failed:\n${(result.validation_errors ?? []).join("\n")}`,
|
|
168
|
+
},
|
|
169
|
+
],
|
|
170
|
+
isError: true,
|
|
171
|
+
details: result,
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
const lines = [`ok: wrote ${result.artifact_path}`];
|
|
175
|
+
if (result.lane_result?.messenger_posted) {
|
|
176
|
+
lines.push("messenger updated");
|
|
177
|
+
}
|
|
178
|
+
if (result.human_required) {
|
|
179
|
+
lines.push("human_required: parent must call ask_user");
|
|
180
|
+
}
|
|
181
|
+
return {
|
|
182
|
+
content: [{ type: "text", text: lines.join("\n") }],
|
|
183
|
+
details: result as unknown,
|
|
184
|
+
};
|
|
185
|
+
},
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/** Absolute path to the subprocess submit extension (Option A). */
|
|
191
|
+
export function harnessSubagentSubmitExtensionPath(
|
|
192
|
+
packageRoot: string,
|
|
193
|
+
): string {
|
|
194
|
+
return join(packageRoot, ".pi", "extensions", "harness-subagent-submit.ts");
|
|
195
|
+
}
|