ultimate-pi 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/.agents/skills/harness-debate-plan/SKILL.md +41 -61
  2. package/.agents/skills/harness-governor/SKILL.md +11 -0
  3. package/.agents/skills/harness-orchestration/SKILL.md +5 -3
  4. package/.agents/skills/harness-plan/SKILL.md +11 -9
  5. package/.pi/agents/harness/adversary.md +1 -1
  6. package/.pi/agents/harness/evaluator.md +1 -1
  7. package/.pi/agents/harness/executor.md +1 -1
  8. package/.pi/agents/harness/incident-recorder.md +1 -1
  9. package/.pi/agents/harness/meta-optimizer.md +1 -1
  10. package/.pi/agents/harness/planning/decompose.md +8 -35
  11. package/.pi/agents/harness/planning/execution-plan-author.md +27 -15
  12. package/.pi/agents/harness/planning/hypothesis-validator.md +23 -6
  13. package/.pi/agents/harness/planning/hypothesis.md +4 -27
  14. package/.pi/agents/harness/planning/implementation-researcher.md +43 -0
  15. package/.pi/agents/harness/planning/plan-adversary.md +20 -5
  16. package/.pi/agents/harness/planning/plan-evaluator.md +28 -6
  17. package/.pi/agents/harness/planning/review-integrator.md +23 -10
  18. package/.pi/agents/harness/planning/scout-graphify.md +4 -23
  19. package/.pi/agents/harness/planning/scout-semantic.md +3 -18
  20. package/.pi/agents/harness/planning/scout-structure.md +3 -18
  21. package/.pi/agents/harness/planning/sprint-contract-auditor.md +22 -6
  22. package/.pi/agents/harness/planning/stack-researcher.md +21 -11
  23. package/.pi/agents/harness/tie-breaker.md +1 -1
  24. package/.pi/agents/harness/trace-librarian.md +1 -1
  25. package/.pi/extensions/budget-guard.ts +33 -19
  26. package/.pi/extensions/harness-debate-tools.ts +280 -19
  27. package/.pi/extensions/harness-live-widget.ts +39 -159
  28. package/.pi/extensions/harness-plan-approval.ts +47 -5
  29. package/.pi/extensions/harness-run-context.ts +96 -2
  30. package/.pi/extensions/harness-subagent-submit.ts +195 -0
  31. package/.pi/extensions/lib/debate-bus-core.ts +108 -17
  32. package/.pi/extensions/lib/debate-bus-state.ts +6 -0
  33. package/.pi/extensions/lib/harness-subagent-policy.ts +45 -0
  34. package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +82 -0
  35. package/.pi/extensions/lib/harness-subagent-submit-registry.ts +172 -0
  36. package/.pi/extensions/lib/harness-subagents-bridge.ts +42 -0
  37. package/.pi/extensions/lib/plan-approval/plan-review.ts +56 -0
  38. package/.pi/extensions/lib/plan-approval/types.ts +1 -0
  39. package/.pi/extensions/lib/plan-debate-eligibility.ts +214 -0
  40. package/.pi/extensions/lib/plan-debate-focus.ts +151 -0
  41. package/.pi/extensions/lib/plan-debate-gate.ts +88 -34
  42. package/.pi/extensions/lib/plan-debate-lane.ts +15 -0
  43. package/.pi/extensions/lib/plan-debate-lanes.ts +44 -0
  44. package/.pi/extensions/lib/plan-debate-round-status.ts +63 -20
  45. package/.pi/extensions/lib/plan-messenger.ts +93 -17
  46. package/.pi/extensions/policy-gate.ts +1 -1
  47. package/.pi/harness/README.md +1 -1
  48. package/.pi/harness/agents.manifest.json +25 -21
  49. package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +1 -3
  50. package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +13 -5
  51. package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +51 -0
  52. package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md +31 -0
  53. package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md +23 -0
  54. package/.pi/harness/docs/adrs/README.md +4 -0
  55. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/implementation-research.yaml +28 -0
  56. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r1.yaml +24 -0
  57. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r2.yaml +25 -0
  58. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-packet.yaml +196 -0
  59. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-review.md +14 -0
  60. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/research-brief.yaml +62 -0
  61. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/implementation-research.yaml +28 -0
  62. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r2.yaml +24 -0
  63. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r3.yaml +24 -0
  64. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +29 -0
  65. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +97 -16
  66. package/.pi/harness/specs/harness-executor-handoff.schema.json +19 -0
  67. package/.pi/harness/specs/harness-human-required.schema.json +16 -0
  68. package/.pi/harness/specs/plan-implementation-research-brief.schema.json +128 -0
  69. package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
  70. package/.pi/harness/specs/plan-scout-findings.schema.json +19 -0
  71. package/.pi/harness/specs/round-result.schema.json +15 -2
  72. package/.pi/lib/harness-agent-output.ts +45 -0
  73. package/.pi/lib/harness-budget-enforce.ts +18 -0
  74. package/.pi/lib/harness-schema-validate.ts +89 -0
  75. package/.pi/lib/harness-spawn-parse.ts +86 -0
  76. package/.pi/lib/harness-subagent-submit-path.ts +41 -0
  77. package/.pi/lib/harness-ui-state.ts +107 -2
  78. package/.pi/prompts/harness-auto.md +2 -2
  79. package/.pi/prompts/harness-plan.md +94 -42
  80. package/.pi/prompts/harness-run.md +2 -2
  81. package/.pi/prompts/planning-rubrics.md +31 -0
  82. package/.pi/scripts/harness-verify.mjs +2 -0
  83. package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
  84. package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
  85. package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
  86. package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
  87. package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
  88. package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
  89. package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
  90. package/CHANGELOG.md +21 -0
  91. package/package.json +4 -2
  92. package/vendor/pi-subagents/src/subagents.ts +29 -3
@@ -3,12 +3,14 @@ import type {
3
3
  ExtensionContext,
4
4
  } from "@earendil-works/pi-coding-agent";
5
5
  import {
6
+ deriveHarnessStatusHint,
7
+ formatHarnessPhaseLabel,
8
+ type HarnessStatusSeverity,
6
9
  type HarnessUiState,
7
10
  HarnessUiStateStore,
11
+ nextHarnessPhase,
8
12
  } from "../lib/harness-ui-state";
9
13
 
10
- type Severity = "accent" | "warning" | "error";
11
-
12
14
  type TuiLike = { requestRender(): void };
13
15
  type ThemeLike = {
14
16
  fg(
@@ -164,31 +166,25 @@ function composeZones(left: string, right: string, width: number): string {
164
166
  return fitToWidth(`${leftFit}${" ".repeat(minGap)}${rightFit}`, width);
165
167
  }
166
168
 
167
- type InFlightState = {
168
- toolCount: number;
169
- lastToolName: string | null;
170
- };
169
+ function themeSeverityColor(
170
+ severity: HarnessStatusSeverity,
171
+ ): "accent" | "warning" | "error" | "success" | "muted" {
172
+ return severity;
173
+ }
171
174
 
172
175
  class HarnessWidgetComponent {
173
176
  private widthCache?: number;
174
177
  private linesCache?: string[];
175
178
  private state: HarnessUiState;
176
- private inFlight: InFlightState;
177
179
  private themeRef: ThemeLike;
178
180
 
179
- constructor(
180
- state: HarnessUiState,
181
- inFlight: InFlightState,
182
- theme: ThemeLike,
183
- ) {
181
+ constructor(state: HarnessUiState, theme: ThemeLike) {
184
182
  this.state = state;
185
- this.inFlight = inFlight;
186
183
  this.themeRef = theme;
187
184
  }
188
185
 
189
- public setData(state: HarnessUiState, inFlight: InFlightState): void {
186
+ public setData(state: HarnessUiState): void {
190
187
  this.state = state;
191
- this.inFlight = inFlight;
192
188
  this.invalidate();
193
189
  }
194
190
 
@@ -201,109 +197,23 @@ class HarnessWidgetComponent {
201
197
  if (this.linesCache && this.widthCache === width) return this.linesCache;
202
198
  const theme = this.themeRef;
203
199
  const rowWidth = Math.max(1, width - TERMINAL_WIDTH_SAFETY_MARGIN);
204
- const showDebateRow =
205
- this.state.phase === "adversary" || this.state.phase === "merge";
206
-
207
- const substateColor: Severity =
208
- this.state.flowSubstate === "blocked"
209
- ? "error"
210
- : this.state.flowSubstate === "severity-policy" ||
211
- this.state.flowSubstate === "human-required"
212
- ? "warning"
213
- : "accent";
214
- const policyColor =
215
- this.state.policyDecision === "pass"
216
- ? "success"
217
- : this.state.policyDecision === "conditional_pass"
218
- ? "warning"
219
- : this.state.policyDecision === "block" ||
220
- this.state.policyDecision === "human_required"
221
- ? "error"
222
- : "muted";
223
-
224
- const policyDisplay = this.state.policyDecision ?? "pending";
225
-
226
- const phaseToken = `${theme.fg("dim", "phase:")}${theme.fg("accent", this.state.phase)}`;
227
- const flowToken = `${theme.fg("dim", "flow:")}${theme.fg(substateColor, this.state.flowSubstate)}`;
228
- const policyToken = `${theme.fg("dim", "policy:")}${theme.fg(policyColor, policyDisplay)}`;
229
- const row1 = composeZones(
230
- `${theme.bold("Harness")} ${phaseToken} ${flowToken}`,
231
- policyToken,
232
- rowWidth,
233
- );
234
200
 
235
- const debateProgress =
236
- this.state.debateMaxRounds != null
237
- ? `${this.state.debateRound}/${this.state.debateMaxRounds}`
238
- : String(this.state.debateRound);
239
- const budgetDisplay =
240
- this.state.debateBudgetUsed != null && this.state.debateBudgetCap != null
241
- ? `${this.state.debateBudgetUsed}/${this.state.debateBudgetCap}`
242
- : this.state.debateBudgetUsed != null
243
- ? String(this.state.debateBudgetUsed)
244
- : "n/a";
245
- const consensusTrend =
246
- this.state.consensusDelta == null
247
- ? "flat"
248
- : this.state.consensusDelta > 0
249
- ? "up"
250
- : this.state.consensusDelta < 0
251
- ? "down"
252
- : "flat";
253
- const trendColor =
254
- consensusTrend === "up"
255
- ? "success"
256
- : consensusTrend === "down"
257
- ? "warning"
258
- : "muted";
259
-
260
- const sev = this.state.severity;
261
- const severityCompact =
262
- sev.correctness == null &&
263
- sev.security == null &&
264
- sev.architecture == null &&
265
- sev.testIntegrity == null
266
- ? theme.fg("muted", "sev:n/a")
267
- : `${theme.fg("dim", "sev")} ${theme.fg("accent", `c:${sev.correctness ?? "-"}`)} ${theme.fg("accent", `s:${sev.security ?? "-"}`)} ${theme.fg("accent", `a:${sev.architecture ?? "-"}`)} ${theme.fg("accent", `t:${sev.testIntegrity ?? "-"}`)}`;
268
-
269
- const planFlag = this.state.planApproved
270
- ? `${theme.fg("dim", "📋 Plan:")}${theme.fg("success", "OK")}`
271
- : `${theme.fg("dim", "📋 Plan:")}${theme.fg("error", "NO")}`;
272
- const reviewFlag = this.state.reviewIsolationOk
273
- ? `${theme.fg("dim", "🧪 Review:")}${theme.fg("success", "OK")}`
274
- : `${theme.fg("dim", "🧪 Review:")}${theme.fg("warning", "ISO")}`;
275
- const budgetFlag = this.state.budgetExhausted
276
- ? `${theme.fg("dim", "💰 Budget:")}${theme.fg("error", "HIT")}`
277
- : `${theme.fg("dim", "💰 Budget:")}${theme.fg("success", "OK")}`;
278
- const testsFlag =
279
- this.state.testIntegritySeverity === "high"
280
- ? `${theme.fg("dim", "🛡 Tests:")}${theme.fg("error", "HIGH")}`
281
- : this.state.testIntegritySeverity === "medium"
282
- ? `${theme.fg("dim", "🛡 Tests:")}${theme.fg("warning", "MED")}`
283
- : `${theme.fg("dim", "🛡 Tests:")}${theme.fg("success", "OK")}`;
284
-
285
- const toolDisplay = this.inFlight.lastToolName
286
- ? `${this.inFlight.toolCount}:${this.inFlight.lastToolName}`
287
- : String(this.inFlight.toolCount);
288
- const nextDisplay =
289
- this.state.nextRecommendedCommand != null
290
- ? this.state.nextRecommendedCommand.length > 36
291
- ? `${this.state.nextRecommendedCommand.slice(0, 33)}...`
292
- : this.state.nextRecommendedCommand
293
- : null;
294
- const row3Left = `${planFlag} ${reviewFlag} ${budgetFlag} ${testsFlag}`;
295
- const row3Right = nextDisplay
296
- ? `${theme.fg("dim", "inFlight:")}${theme.fg("accent", toolDisplay)} ${theme.fg("dim", "next:")}${theme.fg("accent", nextDisplay)}`
297
- : `${theme.fg("dim", "inFlight:")}${theme.fg("accent", toolDisplay)}`;
298
- const row3 = composeZones(row3Left, row3Right, rowWidth);
299
-
300
- const lines: string[] = [truncateToWidth(row1, rowWidth)];
301
- if (showDebateRow) {
302
- const debateLeft = `${theme.fg("dim", "Debate")} ${theme.fg("accent", `rounds:${debateProgress}`)} ${theme.fg("dim", "trend:")}${theme.fg(trendColor, consensusTrend)} ${theme.fg("dim", "budget:")}${theme.fg("accent", budgetDisplay)}`;
303
- const row2 = composeZones(debateLeft, severityCompact, rowWidth);
304
- lines.push(truncateToWidth(row2, rowWidth));
305
- }
306
- lines.push(truncateToWidth(row3, rowWidth));
201
+ const currentLabel = formatHarnessPhaseLabel(this.state.phase);
202
+ const nextPhase = nextHarnessPhase(this.state.phase);
203
+ const nowToken = `${theme.fg("dim", "now:")}${theme.fg("accent", currentLabel)}`;
204
+ const phaseToken =
205
+ nextPhase != null
206
+ ? `${nowToken} ${theme.fg("dim", "→")} ${theme.fg("accent", formatHarnessPhaseLabel(nextPhase))}`
207
+ : nowToken;
208
+
209
+ const status = deriveHarnessStatusHint(this.state);
210
+ const statusColor = themeSeverityColor(status.severity);
211
+ const statusToken = theme.fg(statusColor, status.text);
212
+
213
+ const left = `${theme.bold("Harness")} ${phaseToken}`;
214
+ const row = composeZones(left, statusToken, rowWidth);
215
+
216
+ const lines = [truncateToWidth(row, rowWidth)];
307
217
  this.widthCache = width;
308
218
  this.linesCache = lines;
309
219
  return lines;
@@ -316,14 +226,16 @@ class HarnessWidgetComponent {
316
226
  }
317
227
 
318
228
  function statusToken(state: HarnessUiState): string {
319
- const decision = state.policyDecision ?? "pending";
320
- return `h:${state.phase}/${state.flowSubstate}/${decision}`;
229
+ const current = formatHarnessPhaseLabel(state.phase);
230
+ const next = nextHarnessPhase(state.phase);
231
+ const phasePart =
232
+ next != null ? `${current}→${formatHarnessPhaseLabel(next)}` : current;
233
+ const hint = deriveHarnessStatusHint(state).text;
234
+ return `h:${phasePart}|${hint}`;
321
235
  }
322
236
 
323
237
  export default function harnessLiveWidget(pi: ExtensionAPI) {
324
238
  const stateStore = new HarnessUiStateStore();
325
- const inFlightCalls = new Set<string>();
326
- let lastToolName: string | null = null;
327
239
  let widgetMounted = false;
328
240
  let tuiHandle: TuiLike | null = null;
329
241
  let component: HarnessWidgetComponent | null = null;
@@ -334,19 +246,14 @@ export default function harnessLiveWidget(pi: ExtensionAPI) {
334
246
  function mountHarnessWidget(ctx: ExtensionContext): void {
335
247
  if (!ctx.hasUI) return;
336
248
  const state = stateStore.refresh(ctx);
337
- const inFlight: InFlightState = { toolCount: 0, lastToolName: null };
338
- lastRenderHash = computeRenderHash(state, inFlight);
249
+ lastRenderHash = computeRenderHash(state);
339
250
 
340
251
  ctx.ui.setWidget(
341
252
  "harness-live",
342
253
  (tui, theme) => {
343
254
  widgetMounted = true;
344
255
  tuiHandle = tui;
345
- component = new HarnessWidgetComponent(
346
- stateStore.snapshot(),
347
- inFlight,
348
- theme,
349
- );
256
+ component = new HarnessWidgetComponent(stateStore.snapshot(), theme);
350
257
  return {
351
258
  render(width: number): string[] {
352
259
  component?.setTheme(theme);
@@ -388,26 +295,15 @@ export default function harnessLiveWidget(pi: ExtensionAPI) {
388
295
  ctx.ui.setStatus("harness-mode", undefined);
389
296
  }
390
297
 
391
- function computeRenderHash(
392
- state: HarnessUiState,
393
- inFlight: InFlightState,
394
- ): string {
298
+ function computeRenderHash(state: HarnessUiState): string {
395
299
  return JSON.stringify({
396
300
  phase: state.phase,
397
- flowSubstate: state.flowSubstate,
398
301
  planApproved: state.planApproved,
399
- reviewIsolationOk: state.reviewIsolationOk,
400
302
  budgetExhausted: state.budgetExhausted,
401
303
  testIntegritySeverity: state.testIntegritySeverity,
402
- debateRound: state.debateRound,
403
- debateMaxRounds: state.debateMaxRounds,
404
- debateBudgetUsed: state.debateBudgetUsed,
405
- debateBudgetCap: state.debateBudgetCap,
406
304
  policyDecision: state.policyDecision,
407
- consensusDelta: state.consensusDelta,
408
- severity: state.severity,
305
+ flowSubstate: state.flowSubstate,
409
306
  nextRecommendedCommand: state.nextRecommendedCommand,
410
- inFlight,
411
307
  });
412
308
  }
413
309
 
@@ -417,15 +313,11 @@ export default function harnessLiveWidget(pi: ExtensionAPI) {
417
313
  queueMicrotask(() => {
418
314
  refreshQueued = false;
419
315
  const state = stateStore.refresh(ctx);
420
- const inFlight: InFlightState = {
421
- toolCount: inFlightCalls.size,
422
- lastToolName,
423
- };
424
- const hash = computeRenderHash(state, inFlight);
316
+ const hash = computeRenderHash(state);
425
317
  updateStatusFallback(ctx, state);
426
318
  if (hash === lastRenderHash) return;
427
319
  lastRenderHash = hash;
428
- if (component) component.setData(state, inFlight);
320
+ if (component) component.setData(state);
429
321
  tuiHandle?.requestRender();
430
322
  });
431
323
  }
@@ -450,16 +342,4 @@ export default function harnessLiveWidget(pi: ExtensionAPI) {
450
342
  pi.on("agent_end", (_event, ctx) => {
451
343
  scheduleRefresh(ctx);
452
344
  });
453
-
454
- pi.on("tool_execution_start", (event, ctx) => {
455
- inFlightCalls.add(event.toolCallId);
456
- lastToolName = event.toolName;
457
- scheduleRefresh(ctx);
458
- });
459
-
460
- pi.on("tool_result", (event, ctx) => {
461
- inFlightCalls.delete(event.toolCallId);
462
- if (inFlightCalls.size === 0) lastToolName = null;
463
- scheduleRefresh(ctx);
464
- });
465
345
  }
@@ -2,6 +2,9 @@
2
2
  * harness-plan-approval — PlanPacket approval UI and transcript renderer for parent sessions.
3
3
  */
4
4
 
5
+ import { constants } from "node:fs";
6
+ import { access } from "node:fs/promises";
7
+ import { join } from "node:path";
5
8
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
6
9
  import { Text } from "@earendil-works/pi-tui";
7
10
  import { Type } from "@sinclair/typebox";
@@ -146,6 +149,43 @@ export default function harnessPlanApproval(pi: ExtensionAPI) {
146
149
  `Plan ${planId} — pending your approval`;
147
150
  const runCtx = getLatestRunContext(entries);
148
151
  const projectRoot = process.cwd();
152
+ const implWarnings: string[] = [];
153
+ if (runCtx?.run_id) {
154
+ const implPath = join(
155
+ projectRoot,
156
+ ".pi",
157
+ "harness",
158
+ "runs",
159
+ runCtx.run_id,
160
+ "artifacts",
161
+ "implementation-research.yaml",
162
+ );
163
+ let implExists = false;
164
+ try {
165
+ await access(implPath, constants.R_OK);
166
+ implExists = true;
167
+ } catch {
168
+ implExists = false;
169
+ }
170
+ const risk = String(
171
+ validated.plan_packet.risk_level ?? "med",
172
+ ).toLowerCase();
173
+ if (!implExists) {
174
+ const msg =
175
+ "approve_plan: missing artifacts/implementation-research.yaml (Phase 3.5 required)";
176
+ if (risk === "high") {
177
+ return {
178
+ content: [{ type: "text", text: msg }],
179
+ details: {
180
+ plan_packet: validated.plan_packet,
181
+ cancelled: true,
182
+ },
183
+ isError: true,
184
+ };
185
+ }
186
+ implWarnings.push(msg);
187
+ }
188
+ }
149
189
  if (runCtx?.run_id) {
150
190
  const gate = await validatePlanDebateGate(projectRoot, runCtx.run_id);
151
191
  if (!gate.ok) {
@@ -237,13 +277,15 @@ export default function harnessPlanApproval(pi: ExtensionAPI) {
237
277
  );
238
278
  }
239
279
 
240
- const text = formatApprovePlanResultText(
241
- outcome.response,
242
- outcome.cancelled,
243
- );
280
+ const text = [
281
+ formatApprovePlanResultText(outcome.response, outcome.cancelled),
282
+ ...implWarnings,
283
+ ]
284
+ .filter(Boolean)
285
+ .join("\n\n");
244
286
  return {
245
287
  content: [{ type: "text", text }],
246
- details,
288
+ details: { ...details, implementation_warnings: implWarnings },
247
289
  };
248
290
  },
249
291
 
@@ -5,8 +5,9 @@
5
5
  * in before_agent_start so trace-recorder reuses it on agent_start.
6
6
  */
7
7
 
8
- import { mkdir, readFile, writeFile } from "node:fs/promises";
9
- import { dirname } from "node:path";
8
+ import { constants } from "node:fs";
9
+ import { access, mkdir, readFile, writeFile } from "node:fs/promises";
10
+ import { dirname, join } from "node:path";
10
11
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
11
12
  import { Type } from "@sinclair/typebox";
12
13
  import {
@@ -56,6 +57,10 @@ import {
56
57
  writeYamlFile,
57
58
  } from "../lib/harness-yaml.js";
58
59
  import { claimExtensionLoad } from "./lib/extension-load-guard.js";
60
+ import {
61
+ evaluateHarnessSubagentToolCall,
62
+ isSubmitToolName,
63
+ } from "./lib/harness-subagent-policy.js";
59
64
  import { isReviewRoundArtifactPath } from "./lib/plan-debate-gate.js";
60
65
  import { isReviewRoundYamlWriteAllowed } from "./lib/plan-debate-write-guard.js";
61
66
 
@@ -714,6 +719,36 @@ export default function harnessRunContext(pi: ExtensionAPI) {
714
719
  });
715
720
 
716
721
  pi.on("tool_call", async (event, ctx) => {
722
+ // #region agent log
723
+ fetch("http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0", {
724
+ method: "POST",
725
+ headers: {
726
+ "Content-Type": "application/json",
727
+ "X-Debug-Session-Id": "2ca12b",
728
+ },
729
+ body: JSON.stringify({
730
+ sessionId: "2ca12b",
731
+ location: "harness-run-context.ts:tool_call",
732
+ message: "submit policy hook",
733
+ data: {
734
+ toolName: event.toolName,
735
+ typeofIsSubmitToolName: typeof isSubmitToolName,
736
+ },
737
+ timestamp: Date.now(),
738
+ hypothesisId: "H1",
739
+ }),
740
+ }).catch(() => {});
741
+ // #endregion
742
+ if (isSubmitToolName(event.toolName)) {
743
+ const decision = evaluateHarnessSubagentToolCall(
744
+ event.toolName,
745
+ event.input as Record<string, unknown>,
746
+ "parent-orchestrator",
747
+ );
748
+ if (decision.action === "block") {
749
+ return { block: true, reason: decision.reason };
750
+ }
751
+ }
717
752
  if (event.toolName === "write") {
718
753
  const entries = getEntries(ctx);
719
754
  const runCtx = getLatestRunContext(entries) ?? activeCtx;
@@ -1030,6 +1065,65 @@ export default function harnessRunContext(pi: ExtensionAPI) {
1030
1065
  },
1031
1066
  });
1032
1067
 
1068
+ pi.registerTool({
1069
+ name: "harness_artifact_ready",
1070
+ label: "Harness Artifact Ready",
1071
+ description:
1072
+ "Check that harness artifact paths exist under the active run (no JSON parsing).",
1073
+ parameters: Type.Object({
1074
+ paths: Type.Array(Type.String(), {
1075
+ minItems: 1,
1076
+ description:
1077
+ "Relative paths under the run dir, e.g. artifacts/decomposition.yaml",
1078
+ }),
1079
+ }),
1080
+ async execute(_id, params, _signal, _onUpdate, ctx) {
1081
+ const entries = getEntries(ctx);
1082
+ const runCtx = getLatestRunContext(entries) ?? activeCtx;
1083
+ if (!runCtx?.run_id) {
1084
+ return {
1085
+ content: [{ type: "text", text: "No active harness run." }],
1086
+ details: {},
1087
+ isError: true,
1088
+ };
1089
+ }
1090
+ const paths = (params as { paths?: string[] }).paths ?? [];
1091
+ const projectRoot = process.cwd();
1092
+ const runRoot = join(
1093
+ projectRoot,
1094
+ ".pi",
1095
+ "harness",
1096
+ "runs",
1097
+ runCtx.run_id,
1098
+ );
1099
+ const missing: string[] = [];
1100
+ const present: string[] = [];
1101
+ for (const rel of paths) {
1102
+ const normalized = rel.replace(/\\/g, "/");
1103
+ const abs = join(runRoot, normalized);
1104
+ try {
1105
+ await access(abs, constants.R_OK);
1106
+ present.push(normalized);
1107
+ } catch {
1108
+ missing.push(normalized);
1109
+ }
1110
+ }
1111
+ const ok = missing.length === 0;
1112
+ return {
1113
+ content: [
1114
+ {
1115
+ type: "text",
1116
+ text: ok
1117
+ ? `All ${present.length} artifact(s) present.`
1118
+ : `Missing: ${missing.join(", ")}`,
1119
+ },
1120
+ ],
1121
+ details: { ok, present, missing, run_id: runCtx.run_id },
1122
+ isError: !ok,
1123
+ };
1124
+ },
1125
+ });
1126
+
1033
1127
  pi.registerCommand("harness-use-run", {
1034
1128
  description: "Point this session at an existing run directory (recovery)",
1035
1129
  handler: async (args, ctx) => {
@@ -0,0 +1,195 @@
1
+ /**
2
+ * Subprocess-only harness submit tools — validate + write artifacts under run_dir.
3
+ * Loaded via `pi --no-extensions -e harness-subagent-submit.ts` for harness agents.
4
+ */
5
+
6
+ import { join } from "node:path";
7
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
8
+ import { Type } from "@sinclair/typebox";
9
+ import { claimExtensionLoad } from "./lib/extension-load-guard.js";
10
+ import { getHarnessPackageRoot } from "./lib/harness-paths.js";
11
+ import { evaluateHarnessSubagentToolCall } from "./lib/harness-subagent-policy.js";
12
+ import { executeSubmitPipeline } from "./lib/harness-subagent-submit-pipeline.js";
13
+ import { SUBMIT_TOOL_SPECS } from "./lib/harness-subagent-submit-registry.js";
14
+
15
+ // @ts-expect-error pi extensions run as ESM
16
+ const MODULE_URL = import.meta.url;
17
+
18
+ const DocumentSchema = Type.Object(
19
+ {
20
+ document: Type.Record(Type.String(), Type.Unknown(), {
21
+ description: "Full artifact document matching the harness JSON schema",
22
+ }),
23
+ },
24
+ { additionalProperties: false },
25
+ );
26
+
27
+ function resolveRunContext(): {
28
+ projectRoot: string;
29
+ specsDir: string;
30
+ runId: string;
31
+ runDirEnv?: string;
32
+ agentId: string;
33
+ } {
34
+ const projectRoot = process.env.HARNESS_PKG_ROOT ?? process.cwd();
35
+ const specsDir = join(projectRoot, ".pi", "harness", "specs");
36
+ const runId = process.env.HARNESS_RUN_ID?.trim() ?? "";
37
+ const runDirEnv = process.env.HARNESS_RUN_DIR?.trim();
38
+ const agentId = process.env.HARNESS_AGENT_ID?.trim() ?? "";
39
+ return { projectRoot, specsDir, runId, runDirEnv, agentId };
40
+ }
41
+
42
+ function isSubprocessHarness(): boolean {
43
+ return (
44
+ process.env.PI_HARNESS_SUBPROCESS === "1" &&
45
+ Boolean(process.env.HARNESS_RUN_ID?.trim())
46
+ );
47
+ }
48
+
49
+ export default function harnessSubagentSubmit(pi: ExtensionAPI) {
50
+ if (!claimExtensionLoad("harness-subagent-submit", MODULE_URL)) return;
51
+ // Option A: only load submit tools in subprocess (`-e` bundle), not parent discovery.
52
+ if (process.env.PI_HARNESS_SUBPROCESS !== "1") {
53
+ return;
54
+ }
55
+
56
+ const _packageRoot = getHarnessPackageRoot(MODULE_URL);
57
+
58
+ pi.on("tool_call", async (event) => {
59
+ if (!event.toolName.startsWith("submit_")) return undefined;
60
+ const subprocessOk = isSubprocessHarness();
61
+ // #region agent log
62
+ fetch("http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0", {
63
+ method: "POST",
64
+ headers: {
65
+ "Content-Type": "application/json",
66
+ "X-Debug-Session-Id": "2ca12b",
67
+ },
68
+ body: JSON.stringify({
69
+ sessionId: "2ca12b",
70
+ hypothesisId: "H2",
71
+ location: "harness-subagent-submit.ts:tool_call",
72
+ message: "submit tool_call gate",
73
+ data: {
74
+ toolName: event.toolName,
75
+ PI_HARNESS_SUBPROCESS: process.env.PI_HARNESS_SUBPROCESS,
76
+ HARNESS_RUN_ID: process.env.HARNESS_RUN_ID ?? null,
77
+ HARNESS_RUN_DIR: process.env.HARNESS_RUN_DIR ?? null,
78
+ HARNESS_AGENT_ID: process.env.HARNESS_AGENT_ID ?? null,
79
+ subprocessOk,
80
+ },
81
+ timestamp: Date.now(),
82
+ }),
83
+ }).catch(() => {});
84
+ // #endregion
85
+ if (!subprocessOk) {
86
+ return {
87
+ block: true,
88
+ reason:
89
+ "harness-subagent-submit: submit_* tools are only available in harness subagent subprocesses.",
90
+ };
91
+ }
92
+ const { agentId } = resolveRunContext();
93
+ if (!agentId) {
94
+ return {
95
+ block: true,
96
+ reason:
97
+ "harness-subagent-submit: HARNESS_AGENT_ID is required for submit tools.",
98
+ };
99
+ }
100
+ const decision = evaluateHarnessSubagentToolCall(
101
+ event.toolName,
102
+ event.input as Record<string, unknown>,
103
+ agentId,
104
+ );
105
+ if (decision.action === "block") {
106
+ return { block: true, reason: decision.reason };
107
+ }
108
+ return undefined;
109
+ });
110
+
111
+ for (const spec of SUBMIT_TOOL_SPECS) {
112
+ pi.registerTool({
113
+ name: spec.toolName,
114
+ label: spec.toolName.replace(/^submit_/, "Submit "),
115
+ description: `Terminal harness artifact submit for ${spec.agents.join(", ")}. Call once with the full schema document before ending the turn.`,
116
+ parameters: DocumentSchema,
117
+ async execute(_id, params, _signal, _onUpdate, _ctx) {
118
+ if (!isSubprocessHarness()) {
119
+ return {
120
+ content: [
121
+ {
122
+ type: "text",
123
+ text: "submit tools require PI_HARNESS_SUBPROCESS and HARNESS_RUN_ID",
124
+ },
125
+ ],
126
+ details: {},
127
+ isError: true,
128
+ };
129
+ }
130
+ const { projectRoot, specsDir, runId, runDirEnv, agentId } =
131
+ resolveRunContext();
132
+ if (!spec.agents.includes(agentId)) {
133
+ return {
134
+ content: [
135
+ {
136
+ type: "text",
137
+ text: `${spec.toolName} is not allowed for agent ${agentId}`,
138
+ },
139
+ ],
140
+ details: { agentId, tool: spec.toolName },
141
+ isError: true,
142
+ };
143
+ }
144
+ const document = (params as { document?: Record<string, unknown> })
145
+ .document;
146
+ if (!document || typeof document !== "object") {
147
+ return {
148
+ content: [{ type: "text", text: "document object is required" }],
149
+ details: {},
150
+ isError: true,
151
+ };
152
+ }
153
+ const result = await executeSubmitPipeline({
154
+ projectRoot,
155
+ specsDir,
156
+ spec,
157
+ agentId,
158
+ document,
159
+ runId,
160
+ runDirEnv,
161
+ });
162
+ if (!result.ok) {
163
+ return {
164
+ content: [
165
+ {
166
+ type: "text",
167
+ text: `Validation failed:\n${(result.validation_errors ?? []).join("\n")}`,
168
+ },
169
+ ],
170
+ isError: true,
171
+ details: result,
172
+ };
173
+ }
174
+ const lines = [`ok: wrote ${result.artifact_path}`];
175
+ if (result.lane_result?.messenger_posted) {
176
+ lines.push("messenger updated");
177
+ }
178
+ if (result.human_required) {
179
+ lines.push("human_required: parent must call ask_user");
180
+ }
181
+ return {
182
+ content: [{ type: "text", text: lines.join("\n") }],
183
+ details: result as unknown,
184
+ };
185
+ },
186
+ });
187
+ }
188
+ }
189
+
190
+ /** Absolute path to the subprocess submit extension (Option A). */
191
+ export function harnessSubagentSubmitExtensionPath(
192
+ packageRoot: string,
193
+ ): string {
194
+ return join(packageRoot, ".pi", "extensions", "harness-subagent-submit.ts");
195
+ }