ultimate-pi 0.8.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-plan/SKILL.md +6 -6
- package/.pi/agents/harness/planner.md +9 -10
- package/.pi/extensions/budget-guard.ts +46 -17
- package/.pi/extensions/harness-run-context.ts +150 -28
- package/.pi/extensions/lib/harness-subagents/harness-subagent-policy.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/parent-ask-user-bridge.ts +89 -0
- package/.pi/extensions/lib/harness-subagents/spawn-policy.ts +20 -2
- package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +1 -0
- package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +40 -24
- package/.pi/extensions/lib/harness-subagents/vendored/index.ts +21 -0
- package/.pi/extensions/policy-gate.ts +4 -4
- package/.pi/harness/agents.manifest.json +82 -82
- package/.pi/harness/docs/adrs/0031-harness-run-context.md +1 -1
- package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +6 -6
- package/.pi/harness/specs/budget-exhausted-event.schema.json +3 -1
- package/.pi/harness/specs/harness-turn.schema.json +18 -0
- package/.pi/lib/harness-run-context.ts +166 -32
- package/.pi/prompts/harness-plan.md +12 -14
- package/.pi/scripts/harness-verify.mjs +29 -1
- package/CHANGELOG.md +12 -0
- package/package.json +2 -2
|
@@ -14,15 +14,15 @@ description: Produce PlanPacket-aligned harness plans before execute phase. Use
|
|
|
14
14
|
|
|
15
15
|
## Workflow (orchestrator)
|
|
16
16
|
|
|
17
|
-
1.
|
|
18
|
-
2.
|
|
19
|
-
3.
|
|
20
|
-
4.
|
|
21
|
-
5. **Only after
|
|
17
|
+
1. Use `HarnessSpawnContext` from injected `[HarnessRunContext]` — do not read spec files from disk.
|
|
18
|
+
2. Spawn `harness/planner` **once** with that JSON in the prompt (`inherit_context: false`).
|
|
19
|
+
3. Parse planner JSON from `get_subagent_result` (`status`, `plan_packet`, `clarification`).
|
|
20
|
+
4. Do **not** parent `ask_user` or re-spawn for clarification — planner uses `ask_user` in the subagent.
|
|
21
|
+
5. **Only after** subagent approval is synced — write canonical `plan_packet_path`.
|
|
22
22
|
|
|
23
23
|
## Rules
|
|
24
24
|
|
|
25
|
-
-
|
|
25
|
+
- `harness/planner` owns clarification and approval `ask_user` (bridged to parent UI).
|
|
26
26
|
- Never plan or mutate source inline in the slash-command session.
|
|
27
27
|
- context-mode only on harness paths; never lean-ctx.
|
|
28
28
|
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Harness planner that compiles strict PlanPacket contracts before execution.
|
|
3
|
-
tools: read, grep, find, ls
|
|
3
|
+
tools: read, grep, find, ls, ask_user
|
|
4
4
|
extensions: false
|
|
5
|
-
disallowed_tools: ask_user
|
|
6
5
|
thinking: medium
|
|
7
6
|
max_turns: 20
|
|
8
7
|
inherit_context: false
|
|
@@ -12,7 +11,7 @@ You are the Harness Planner.
|
|
|
12
11
|
|
|
13
12
|
## Mission
|
|
14
13
|
|
|
15
|
-
Compile a strict, machine-readable `PlanPacket` draft
|
|
14
|
+
Compile a strict, machine-readable `PlanPacket` draft. Run clarification and final approval via `ask_user` in this session (parent UI). You do **not** write `plan-packet.json` — the orchestrator writes the canonical file after you return `status: ready` and the user has approved.
|
|
16
15
|
|
|
17
16
|
## Spawn context
|
|
18
17
|
|
|
@@ -20,20 +19,20 @@ Read the `HarnessSpawnContext` JSON in the spawn prompt (`schema_version`, `mode
|
|
|
20
19
|
|
|
21
20
|
## Process
|
|
22
21
|
|
|
23
|
-
1.
|
|
22
|
+
1. Use graphify context (`graphify-out/GRAPH_REPORT.md` or wiki) before claiming architecture — do not read harness spec JSON files from disk.
|
|
24
23
|
2. Parse task scope, constraints, and acceptance intent from spawn context.
|
|
25
|
-
3. **Greenfield** (`mode: create`) vs **revise** (`mode: revise`) — when revising, read the existing packet at `plan_packet_path` and amend
|
|
26
|
-
4. `--quick` / `quick: true` narrows breadth
|
|
24
|
+
3. **Greenfield** (`mode: create`) vs **revise** (`mode: revise`) — when revising, read the existing packet at `plan_packet_path` if present and amend.
|
|
25
|
+
4. `--quick` / `quick: true` narrows breadth, never safety or rollback requirements.
|
|
27
26
|
5. Build a complete `PlanPacket`: `plan_id`, `task_id`, `scope`, `assumptions`, `risk_level`, `acceptance_checks`, `rollback_plan` with `revert_command`, `revert_branch`, `patch_bundle`, `revert_commit_ready: true`.
|
|
28
27
|
6. Escalate `risk_level` to `high` for blast radius, uncertainty, or policy-sensitive surfaces.
|
|
29
|
-
7. If scope
|
|
28
|
+
7. If scope is ambiguous, call `ask_user` with structured options — do not return `needs_clarification` without trying `ask_user` first when options are clear.
|
|
29
|
+
8. Before returning `ready`, present the full plan in chat and call `ask_user` with **Approve** / **Request changes** / **Cancel**. On Request changes, revise and ask again in this session.
|
|
30
30
|
|
|
31
31
|
## Guardrails
|
|
32
32
|
|
|
33
|
-
- Do not mutate files (read-only tools
|
|
33
|
+
- Do not mutate project files (read-only tools except `ask_user`).
|
|
34
34
|
- Never speculate about code you have not read.
|
|
35
35
|
- Do not execute or widen implementation scope.
|
|
36
|
-
- Project overrides must not set `inherit_context: true` for `harness/*`.
|
|
37
36
|
|
|
38
37
|
## Output (required JSON block)
|
|
39
38
|
|
|
@@ -48,4 +47,4 @@ End with a single fenced `json` block the parent can parse:
|
|
|
48
47
|
}
|
|
49
48
|
```
|
|
50
49
|
|
|
51
|
-
Use `"status": "needs_clarification"`
|
|
50
|
+
Use `"status": "needs_clarification"` only when blocked after `ask_user` or user cancelled; include `clarification` when the parent must intervene without a live subagent.
|
|
@@ -23,7 +23,9 @@ interface BudgetExhaustedEvent {
|
|
|
23
23
|
exhaustion_reason:
|
|
24
24
|
| "max_rounds_reached"
|
|
25
25
|
| "round_token_cap_exceeded"
|
|
26
|
-
| "debate_global_cap_exceeded"
|
|
26
|
+
| "debate_global_cap_exceeded"
|
|
27
|
+
| "phase_cap_exceeded"
|
|
28
|
+
| "global_cap_exceeded";
|
|
27
29
|
caps: {
|
|
28
30
|
max_rounds: number;
|
|
29
31
|
round_token_cap: number;
|
|
@@ -52,7 +54,7 @@ const DEFAULT_GLOBAL_CAP = Number(
|
|
|
52
54
|
);
|
|
53
55
|
const HARD_STOP_BUDGETS = process.env.HARNESS_BUDGET_HARD_STOP === "true";
|
|
54
56
|
const DEFAULT_PHASE_CAPS: Record<HarnessPhase, number> = {
|
|
55
|
-
plan: Number(process.env.HARNESS_BUDGET_PLAN_TOKENS ?? "
|
|
57
|
+
plan: Number(process.env.HARNESS_BUDGET_PLAN_TOKENS ?? "80000"),
|
|
56
58
|
execute: Number(process.env.HARNESS_BUDGET_EXECUTE_TOKENS ?? "80000"),
|
|
57
59
|
evaluate: Number(process.env.HARNESS_BUDGET_EVALUATE_TOKENS ?? "25000"),
|
|
58
60
|
adversary: Number(process.env.HARNESS_BUDGET_ADVERSARY_TOKENS ?? "35000"),
|
|
@@ -191,6 +193,8 @@ async function emitBudgetEvent(
|
|
|
191
193
|
pi.appendEntry("harness-budget-exhausted", event);
|
|
192
194
|
}
|
|
193
195
|
|
|
196
|
+
const debouncedSoftLimit = new Map<string, boolean>();
|
|
197
|
+
|
|
194
198
|
export default function budgetGuard(pi: ExtensionAPI) {
|
|
195
199
|
pi.on("tool_call", async (_event, ctx) => {
|
|
196
200
|
const policy = getPolicyContext(ctx);
|
|
@@ -202,35 +206,60 @@ export default function budgetGuard(pi: ExtensionAPI) {
|
|
|
202
206
|
const globalCap = DEFAULT_GLOBAL_CAP;
|
|
203
207
|
const phaseCap = DEFAULT_PHASE_CAPS[phase];
|
|
204
208
|
const caps = await readDebateCapsFromSchema();
|
|
209
|
+
const runId = getRunId(ctx);
|
|
210
|
+
|
|
211
|
+
const phaseExceeded = phaseUsed >= phaseCap;
|
|
212
|
+
const globalExceeded = usage.totalTokens >= globalCap;
|
|
213
|
+
if (!phaseExceeded && !globalExceeded) return undefined;
|
|
205
214
|
|
|
206
|
-
|
|
215
|
+
const exhaustionReason = phaseExceeded
|
|
216
|
+
? "phase_cap_exceeded"
|
|
217
|
+
: "global_cap_exceeded";
|
|
218
|
+
const debateCaps =
|
|
219
|
+
phase === "adversary" || phase === "evaluate"
|
|
220
|
+
? caps
|
|
221
|
+
: {
|
|
222
|
+
max_rounds: 0,
|
|
223
|
+
round_token_cap: phaseCap,
|
|
224
|
+
debate_global_cap: globalCap,
|
|
225
|
+
};
|
|
207
226
|
|
|
208
227
|
const exhausted: BudgetExhaustedEvent = {
|
|
209
228
|
schema_version: "1.0.0",
|
|
210
229
|
contract_version: "1.0.0",
|
|
211
230
|
event_type: "budget_exhausted",
|
|
212
|
-
run_id:
|
|
231
|
+
run_id: runId,
|
|
213
232
|
debate_id: `${phase}-budget-guard`,
|
|
214
233
|
round_count: 1,
|
|
215
|
-
budget_used:
|
|
216
|
-
exhaustion_reason:
|
|
217
|
-
caps,
|
|
234
|
+
budget_used: phaseExceeded ? phaseUsed : usage.totalTokens,
|
|
235
|
+
exhaustion_reason: exhaustionReason,
|
|
236
|
+
caps: debateCaps,
|
|
218
237
|
minimum_evidence_confidence: 0.6,
|
|
219
238
|
default_policy_outcome: "block",
|
|
220
239
|
human_override_allowed: true,
|
|
221
240
|
};
|
|
222
241
|
|
|
223
|
-
|
|
242
|
+
const debounceKey = `${runId}:${phase}:${exhaustionReason}`;
|
|
243
|
+
if (!debouncedSoftLimit.has(debounceKey)) {
|
|
244
|
+
debouncedSoftLimit.set(debounceKey, true);
|
|
245
|
+
await emitBudgetEvent(pi, exhausted);
|
|
246
|
+
}
|
|
247
|
+
|
|
224
248
|
if (!HARD_STOP_BUDGETS) {
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
249
|
+
const softKey = `${debounceKey}:soft`;
|
|
250
|
+
if (!debouncedSoftLimit.has(softKey)) {
|
|
251
|
+
debouncedSoftLimit.set(softKey, true);
|
|
252
|
+
pi.appendEntry("harness-budget-soft-limit", {
|
|
253
|
+
run_id: exhausted.run_id,
|
|
254
|
+
phase,
|
|
255
|
+
phaseUsed,
|
|
256
|
+
phaseCap,
|
|
257
|
+
totalUsed: usage.totalTokens,
|
|
258
|
+
totalCap: globalCap,
|
|
259
|
+
exhaustion_reason: exhaustionReason,
|
|
260
|
+
timestamp: nowIso(),
|
|
261
|
+
});
|
|
262
|
+
}
|
|
234
263
|
return undefined;
|
|
235
264
|
}
|
|
236
265
|
return {
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
* in before_agent_start so trace-recorder reuses it on agent_start.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
+
import { readFile, writeFile } from "node:fs/promises";
|
|
8
9
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
9
10
|
import {
|
|
10
11
|
canonicalPlanPath,
|
|
@@ -13,15 +14,17 @@ import {
|
|
|
13
14
|
extractCompletionStatuses,
|
|
14
15
|
formatActivePlanBlock,
|
|
15
16
|
formatPlanContextBlock,
|
|
17
|
+
getLatestHarnessTurn,
|
|
16
18
|
getLatestPolicyPhase,
|
|
17
19
|
getLatestRunContext,
|
|
18
20
|
getPolicyTransitionBlock,
|
|
19
21
|
type HarnessRunContext,
|
|
22
|
+
type HarnessTurnEntry,
|
|
20
23
|
hasHarnessAbortSignal,
|
|
21
24
|
hasPlanUserApproval,
|
|
25
|
+
inferHarnessPhase,
|
|
22
26
|
isAmendPlanAllowed,
|
|
23
27
|
isHarnessBootstrapPrompt,
|
|
24
|
-
isHarnessSlashCommand,
|
|
25
28
|
isNewTaskPlanBlocked,
|
|
26
29
|
isStaleActiveRunPointer,
|
|
27
30
|
loadProjectActiveRun,
|
|
@@ -30,7 +33,7 @@ import {
|
|
|
30
33
|
nowIso,
|
|
31
34
|
type PlanPacketSummary,
|
|
32
35
|
parseAskUserApprovalFromMessage,
|
|
33
|
-
|
|
36
|
+
parseHarnessSlashInput,
|
|
34
37
|
planPacketSummary,
|
|
35
38
|
readPlanPacketFromPath,
|
|
36
39
|
resolveArgsForCommand,
|
|
@@ -60,14 +63,26 @@ function persistContext(pi: ExtensionAPI, ctx: HarnessRunContext): void {
|
|
|
60
63
|
void saveProjectActiveRun(ctx);
|
|
61
64
|
}
|
|
62
65
|
|
|
63
|
-
function extractTaskSummary(
|
|
64
|
-
const
|
|
65
|
-
if (
|
|
66
|
-
|
|
67
|
-
if (
|
|
66
|
+
function extractTaskSummary(args: string, prompt?: string): string | null {
|
|
67
|
+
const fromArgs = args.match(/"([^"]+)"/);
|
|
68
|
+
if (fromArgs?.[1]) return fromArgs[1];
|
|
69
|
+
if (args.trim()) return args.trim().slice(0, 200);
|
|
70
|
+
if (prompt) {
|
|
71
|
+
const quoted = prompt.match(/"([^"]+)"/);
|
|
72
|
+
if (quoted?.[1]) return quoted[1];
|
|
73
|
+
}
|
|
68
74
|
return null;
|
|
69
75
|
}
|
|
70
76
|
|
|
77
|
+
function appendHarnessTurn(pi: ExtensionAPI, turn: HarnessTurnEntry): void {
|
|
78
|
+
pi.appendEntry("harness-turn", turn);
|
|
79
|
+
pi.appendEntry("harness-plan-attempt", {
|
|
80
|
+
run_id: null,
|
|
81
|
+
command: turn.command,
|
|
82
|
+
started_at: turn.invoked_at,
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
|
|
71
86
|
function syncPolicyFromPlan(
|
|
72
87
|
pi: ExtensionAPI,
|
|
73
88
|
entries: unknown[],
|
|
@@ -148,15 +163,35 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
148
163
|
activeCtx = await hydrateFromDisk(sessionId, projectRoot, entries);
|
|
149
164
|
});
|
|
150
165
|
|
|
166
|
+
pi.on("input", async (event) => {
|
|
167
|
+
if (event.source === "extension") {
|
|
168
|
+
return { action: "continue" as const };
|
|
169
|
+
}
|
|
170
|
+
const parsed = parseHarnessSlashInput(event.text);
|
|
171
|
+
if (!parsed) {
|
|
172
|
+
return { action: "continue" as const };
|
|
173
|
+
}
|
|
174
|
+
appendHarnessTurn(pi, {
|
|
175
|
+
schema_version: "1.0.0",
|
|
176
|
+
command: parsed.command,
|
|
177
|
+
args: parsed.args,
|
|
178
|
+
source: "slash",
|
|
179
|
+
invoked_at: nowIso(),
|
|
180
|
+
});
|
|
181
|
+
return { action: "continue" as const };
|
|
182
|
+
});
|
|
183
|
+
|
|
151
184
|
pi.on("before_agent_start", async (event, ctx) => {
|
|
152
185
|
const sessionId = ctx.sessionManager.getSessionId();
|
|
153
186
|
const projectRoot = process.cwd();
|
|
154
187
|
const entries = getEntries(ctx);
|
|
155
188
|
const userPrompt = userVisiblePromptSlice(event.prompt);
|
|
156
|
-
const
|
|
189
|
+
const turn = getLatestHarnessTurn(entries);
|
|
190
|
+
const parsed = turn
|
|
191
|
+
? { command: turn.command, args: turn.args }
|
|
192
|
+
: parseHarnessSlashInput(userPrompt);
|
|
157
193
|
const harnessTurn =
|
|
158
|
-
|
|
159
|
-
needsClarificationFollowUp(activeCtx);
|
|
194
|
+
Boolean(turn) || Boolean(parsed) || needsClarificationFollowUp(activeCtx);
|
|
160
195
|
|
|
161
196
|
if (
|
|
162
197
|
userPrompt.toLowerCase().includes("/harness-abort") ||
|
|
@@ -186,7 +221,10 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
186
221
|
}
|
|
187
222
|
|
|
188
223
|
const policyPhase =
|
|
189
|
-
|
|
224
|
+
inferHarnessPhase(entries, userPrompt) ??
|
|
225
|
+
getLatestPolicyPhase(entries) ??
|
|
226
|
+
activeCtx?.phase ??
|
|
227
|
+
"plan";
|
|
190
228
|
const driftActive = driftGateActive(entries);
|
|
191
229
|
|
|
192
230
|
// Plain-language follow-up after needs_clarification
|
|
@@ -196,13 +234,11 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
196
234
|
const packet = activeCtx.plan_packet_path
|
|
197
235
|
? await readPlanPacketFromPath(activeCtx.plan_packet_path)
|
|
198
236
|
: null;
|
|
199
|
-
const
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
)
|
|
205
|
-
: null;
|
|
237
|
+
const planPath = activeCtx.plan_packet_path;
|
|
238
|
+
const summary =
|
|
239
|
+
packet && planPath
|
|
240
|
+
? planPacketSummary(packet, planPath, "needs_clarification")
|
|
241
|
+
: null;
|
|
206
242
|
syncPolicyFromPlan(
|
|
207
243
|
pi,
|
|
208
244
|
entries,
|
|
@@ -244,7 +280,7 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
244
280
|
activeCtx.last_outcome = "abandoned";
|
|
245
281
|
persistContext(pi, activeCtx);
|
|
246
282
|
}
|
|
247
|
-
const task = extractTaskSummary(userPrompt);
|
|
283
|
+
const task = extractTaskSummary(args, userPrompt);
|
|
248
284
|
activeCtx = createFreshRunContext(sessionId, projectRoot, task);
|
|
249
285
|
persistContext(pi, activeCtx);
|
|
250
286
|
return {
|
|
@@ -323,21 +359,29 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
323
359
|
!activeCtx ||
|
|
324
360
|
!shouldReuseHarnessRunId(userPrompt, activeCtx, command)
|
|
325
361
|
) {
|
|
326
|
-
const task = extractTaskSummary(userPrompt);
|
|
362
|
+
const task = extractTaskSummary(args, userPrompt);
|
|
327
363
|
activeCtx = createFreshRunContext(sessionId, projectRoot, task);
|
|
328
364
|
}
|
|
329
365
|
activeCtx.plan_ready = false;
|
|
330
366
|
activeCtx.phase = "plan";
|
|
331
367
|
activeCtx.status = "active";
|
|
332
368
|
if (command === "harness-plan") {
|
|
333
|
-
const task = extractTaskSummary(userPrompt);
|
|
369
|
+
const task = extractTaskSummary(args, userPrompt);
|
|
334
370
|
if (task) activeCtx.task_summary = task;
|
|
335
371
|
}
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
372
|
+
if (turn) {
|
|
373
|
+
pi.appendEntry("harness-plan-attempt", {
|
|
374
|
+
run_id: activeCtx.run_id,
|
|
375
|
+
command,
|
|
376
|
+
started_at: turn.invoked_at,
|
|
377
|
+
});
|
|
378
|
+
} else {
|
|
379
|
+
pi.appendEntry("harness-plan-attempt", {
|
|
380
|
+
run_id: activeCtx.run_id,
|
|
381
|
+
command,
|
|
382
|
+
started_at: nowIso(),
|
|
383
|
+
});
|
|
384
|
+
}
|
|
341
385
|
} else if (
|
|
342
386
|
activeCtx &&
|
|
343
387
|
shouldReuseHarnessRunId(userPrompt, activeCtx, command)
|
|
@@ -473,7 +517,6 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
473
517
|
|
|
474
518
|
pi.on("agent_end", async (_event, ctx) => {
|
|
475
519
|
const entries = getEntries(ctx);
|
|
476
|
-
const sessionId = ctx.sessionManager.getSessionId();
|
|
477
520
|
if (!activeCtx) {
|
|
478
521
|
activeCtx = getLatestRunContext(entries);
|
|
479
522
|
}
|
|
@@ -493,7 +536,10 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
493
536
|
? lastUser.message.content
|
|
494
537
|
: "";
|
|
495
538
|
}
|
|
496
|
-
const
|
|
539
|
+
const lastTurn = getLatestHarnessTurn(entries);
|
|
540
|
+
const parsed = lastTurn
|
|
541
|
+
? { command: lastTurn.command, args: lastTurn.args }
|
|
542
|
+
: parseHarnessSlashInput(userVisiblePromptSlice(lastPrompt));
|
|
497
543
|
if (!parsed && !needsClarificationFollowUp(activeCtx)) return;
|
|
498
544
|
|
|
499
545
|
const policyPhase = getLatestPolicyPhase(entries) ?? activeCtx.phase;
|
|
@@ -715,6 +761,82 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
715
761
|
},
|
|
716
762
|
});
|
|
717
763
|
|
|
764
|
+
pi.registerCommand("harness-plan-commit", {
|
|
765
|
+
description:
|
|
766
|
+
"Write approved plan-packet.json to the active run (requires harness-plan-approval)",
|
|
767
|
+
handler: async (args, ctx) => {
|
|
768
|
+
const projectRoot = process.cwd();
|
|
769
|
+
const entries = getEntries(ctx);
|
|
770
|
+
let runCtx = getLatestRunContext(entries) ?? activeCtx;
|
|
771
|
+
if (!runCtx) {
|
|
772
|
+
runCtx = await hydrateFromDisk(
|
|
773
|
+
ctx.sessionManager.getSessionId(),
|
|
774
|
+
projectRoot,
|
|
775
|
+
entries,
|
|
776
|
+
);
|
|
777
|
+
}
|
|
778
|
+
if (!runCtx?.plan_packet_path) {
|
|
779
|
+
const msg = "No active harness run. Run /harness-plan first.";
|
|
780
|
+
if (ctx.hasUI) ctx.ui.notify(msg, "warning");
|
|
781
|
+
return;
|
|
782
|
+
}
|
|
783
|
+
if (
|
|
784
|
+
!hasPlanUserApproval(entries, {
|
|
785
|
+
sincePlanCommand: true,
|
|
786
|
+
planId: runCtx.plan_id,
|
|
787
|
+
})
|
|
788
|
+
) {
|
|
789
|
+
const msg =
|
|
790
|
+
"Plan commit blocked: no user approval recorded. Approve via ask_user in the planner subagent first.";
|
|
791
|
+
if (ctx.hasUI) ctx.ui.notify(msg, "warning");
|
|
792
|
+
return;
|
|
793
|
+
}
|
|
794
|
+
const pathArg = args.trim();
|
|
795
|
+
let packetPath = runCtx.plan_packet_path;
|
|
796
|
+
if (pathArg) {
|
|
797
|
+
packetPath = pathArg;
|
|
798
|
+
}
|
|
799
|
+
const packet = await readPlanPacketFromPath(packetPath);
|
|
800
|
+
const validation = validatePlanPacket(packet);
|
|
801
|
+
if (!validation.valid || !packet) {
|
|
802
|
+
const msg = !packet
|
|
803
|
+
? "Plan packet file missing or unreadable."
|
|
804
|
+
: `Invalid plan packet: ${validation.errors.join("; ")}`;
|
|
805
|
+
if (ctx.hasUI) ctx.ui.notify(msg, "error");
|
|
806
|
+
return;
|
|
807
|
+
}
|
|
808
|
+
const target = runCtx.plan_packet_path;
|
|
809
|
+
if (!target) {
|
|
810
|
+
if (ctx.hasUI) ctx.ui.notify("No plan_packet_path on active run.", "error");
|
|
811
|
+
return;
|
|
812
|
+
}
|
|
813
|
+
if (pathArg && pathArg !== target) {
|
|
814
|
+
const raw = await readFile(pathArg, "utf-8");
|
|
815
|
+
await writeFile(target, raw, "utf-8");
|
|
816
|
+
}
|
|
817
|
+
runCtx.plan_id = packet.plan_id ?? runCtx.plan_id;
|
|
818
|
+
runCtx.plan_ready = true;
|
|
819
|
+
runCtx.phase = "plan";
|
|
820
|
+
runCtx.last_completed_step = "plan";
|
|
821
|
+
runCtx.last_outcome = "ready";
|
|
822
|
+
runCtx.next_recommended_command = "/harness-run";
|
|
823
|
+
runCtx.updated_at = nowIso();
|
|
824
|
+
activeCtx = runCtx;
|
|
825
|
+
persistContext(pi, runCtx);
|
|
826
|
+
syncPolicyFromPlan(
|
|
827
|
+
pi,
|
|
828
|
+
entries,
|
|
829
|
+
runCtx.plan_id ?? packet.plan_id ?? "plan-pending",
|
|
830
|
+
"plan",
|
|
831
|
+
true,
|
|
832
|
+
);
|
|
833
|
+
const summary = planPacketSummary(packet, target, "ready");
|
|
834
|
+
pi.appendEntry("harness-plan-packet", summary);
|
|
835
|
+
const msg = `Plan committed: ${target}`;
|
|
836
|
+
if (ctx.hasUI) ctx.ui.notify(msg, "info");
|
|
837
|
+
},
|
|
838
|
+
});
|
|
839
|
+
|
|
718
840
|
pi.registerCommand("harness-use-run", {
|
|
719
841
|
description: "Point this session at an existing run directory (recovery)",
|
|
720
842
|
handler: async (args, ctx) => {
|
|
@@ -82,7 +82,7 @@ export function evaluateHarnessSubagentToolCall(
|
|
|
82
82
|
input: Record<string, unknown> | undefined,
|
|
83
83
|
agentType: string,
|
|
84
84
|
): ToolCallDecision {
|
|
85
|
-
const base = evaluateSubagentToolCall(toolName);
|
|
85
|
+
const base = evaluateSubagentToolCall(toolName, agentType);
|
|
86
86
|
if (base.action === "block") {
|
|
87
87
|
return base;
|
|
88
88
|
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Registers ask_user in subagent sessions, delegating UI to the parent harness session.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type {
|
|
6
|
+
ExtensionAPI,
|
|
7
|
+
ExtensionContext,
|
|
8
|
+
} from "@earendil-works/pi-coding-agent";
|
|
9
|
+
import { runAskDialog } from "../ask-user/dialog.js";
|
|
10
|
+
import { runAskFallback } from "../ask-user/fallback.js";
|
|
11
|
+
import { renderAskCall, renderAskResult } from "../ask-user/render.js";
|
|
12
|
+
import {
|
|
13
|
+
AskUserParamsSchema,
|
|
14
|
+
PROMPT_GUIDELINES,
|
|
15
|
+
PROMPT_SNIPPET,
|
|
16
|
+
} from "../ask-user/schema.js";
|
|
17
|
+
import type { AskUserParams, DialogResult } from "../ask-user/types.js";
|
|
18
|
+
import {
|
|
19
|
+
formatResultText,
|
|
20
|
+
toToolDetails,
|
|
21
|
+
validateAskParams,
|
|
22
|
+
} from "../ask-user/validate.js";
|
|
23
|
+
|
|
24
|
+
const ASK_USER_AGENT_TYPES = new Set([
|
|
25
|
+
"harness/planner",
|
|
26
|
+
"harness/evaluator",
|
|
27
|
+
"harness/adversary",
|
|
28
|
+
"harness/tie-breaker",
|
|
29
|
+
]);
|
|
30
|
+
|
|
31
|
+
export function agentTypeAllowsParentAskUser(agentType: string): boolean {
|
|
32
|
+
return ASK_USER_AGENT_TYPES.has(agentType);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function createParentAskUserBridgeFactory(
|
|
36
|
+
parentCtx: ExtensionContext,
|
|
37
|
+
agentType: string,
|
|
38
|
+
): ((pi: ExtensionAPI) => void) | null {
|
|
39
|
+
if (!agentTypeAllowsParentAskUser(agentType)) {
|
|
40
|
+
return null;
|
|
41
|
+
}
|
|
42
|
+
return (pi: ExtensionAPI) => {
|
|
43
|
+
pi.registerTool({
|
|
44
|
+
name: "ask_user",
|
|
45
|
+
label: "Ask User",
|
|
46
|
+
description:
|
|
47
|
+
"Ask the user a structured question (parent session UI). Use for clarification and plan approval.",
|
|
48
|
+
promptSnippet: PROMPT_SNIPPET,
|
|
49
|
+
promptGuidelines: PROMPT_GUIDELINES,
|
|
50
|
+
parameters: AskUserParamsSchema,
|
|
51
|
+
async execute(_toolCallId, params, _signal, _onUpdate) {
|
|
52
|
+
const validated = validateAskParams(params as AskUserParams);
|
|
53
|
+
if (typeof validated === "string") {
|
|
54
|
+
return {
|
|
55
|
+
content: [{ type: "text", text: validated }],
|
|
56
|
+
details: {
|
|
57
|
+
question: params.question ?? "",
|
|
58
|
+
options: [],
|
|
59
|
+
response: null,
|
|
60
|
+
cancelled: true,
|
|
61
|
+
},
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
let outcome: DialogResult;
|
|
65
|
+
if (parentCtx.hasUI) {
|
|
66
|
+
outcome = await runAskDialog(parentCtx.ui, validated);
|
|
67
|
+
} else {
|
|
68
|
+
outcome = await runAskFallback(parentCtx.ui, validated);
|
|
69
|
+
}
|
|
70
|
+
const details = toToolDetails(
|
|
71
|
+
validated,
|
|
72
|
+
outcome.response,
|
|
73
|
+
outcome.cancelled,
|
|
74
|
+
);
|
|
75
|
+
const text = formatResultText(outcome.response, outcome.cancelled);
|
|
76
|
+
return {
|
|
77
|
+
content: [{ type: "text", text }],
|
|
78
|
+
details,
|
|
79
|
+
};
|
|
80
|
+
},
|
|
81
|
+
renderCall(args, theme) {
|
|
82
|
+
return renderAskCall(args, theme);
|
|
83
|
+
},
|
|
84
|
+
renderResult(result, options, theme) {
|
|
85
|
+
return renderAskResult(result, options, theme);
|
|
86
|
+
},
|
|
87
|
+
});
|
|
88
|
+
};
|
|
89
|
+
}
|
|
@@ -7,7 +7,13 @@ export const SUBAGENT_BLOCKED_TOOLS = new Set([
|
|
|
7
7
|
"get_subagent_result",
|
|
8
8
|
"steer_subagent",
|
|
9
9
|
"blackboard",
|
|
10
|
-
|
|
10
|
+
]);
|
|
11
|
+
|
|
12
|
+
const ASK_USER_ALLOWED_AGENT_TYPES = new Set([
|
|
13
|
+
"harness/planner",
|
|
14
|
+
"harness/evaluator",
|
|
15
|
+
"harness/adversary",
|
|
16
|
+
"harness/tie-breaker",
|
|
11
17
|
]);
|
|
12
18
|
|
|
13
19
|
export interface ToolCallDecision {
|
|
@@ -16,12 +22,24 @@ export interface ToolCallDecision {
|
|
|
16
22
|
newArgs?: Record<string, unknown>;
|
|
17
23
|
}
|
|
18
24
|
|
|
19
|
-
export function evaluateSubagentToolCall(
|
|
25
|
+
export function evaluateSubagentToolCall(
|
|
26
|
+
toolName: string,
|
|
27
|
+
agentType?: string,
|
|
28
|
+
): ToolCallDecision {
|
|
20
29
|
if (SUBAGENT_BLOCKED_TOOLS.has(toolName)) {
|
|
21
30
|
return {
|
|
22
31
|
action: "block",
|
|
23
32
|
reason: `Tool "${toolName}" is not available in subagent sessions (single spawn depth).`,
|
|
24
33
|
};
|
|
25
34
|
}
|
|
35
|
+
if (toolName === "ask_user") {
|
|
36
|
+
if (agentType && ASK_USER_ALLOWED_AGENT_TYPES.has(agentType)) {
|
|
37
|
+
return { action: "allow" };
|
|
38
|
+
}
|
|
39
|
+
return {
|
|
40
|
+
action: "block",
|
|
41
|
+
reason: `Tool "ask_user" is not available for ${agentType ?? "this agent"} (orchestrator-only).`,
|
|
42
|
+
};
|
|
43
|
+
}
|
|
26
44
|
return { action: "allow" };
|
|
27
45
|
}
|
|
@@ -252,6 +252,7 @@ export class AgentManager {
|
|
|
252
252
|
options.onSessionCreated?.(session);
|
|
253
253
|
},
|
|
254
254
|
systemPromptAppendix: options.systemPromptAppendix,
|
|
255
|
+
parentExtensionContext: ctx,
|
|
255
256
|
})
|
|
256
257
|
.then(({ responseText, session, aborted, steered }) => {
|
|
257
258
|
// Don't overwrite status if externally stopped via abort()
|