ultimate-pi 0.8.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-plan/SKILL.md +6 -6
- package/.pi/agents/harness/planner.md +9 -10
- package/.pi/extensions/budget-guard.ts +46 -17
- package/.pi/extensions/harness-run-context.ts +150 -28
- package/.pi/extensions/lib/harness-subagents/harness-subagent-policy.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/parent-ask-user-bridge.ts +89 -0
- package/.pi/extensions/lib/harness-subagents/spawn-policy.ts +20 -2
- package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +1 -0
- package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +40 -24
- package/.pi/extensions/lib/harness-subagents/vendored/index.ts +21 -0
- package/.pi/extensions/policy-gate.ts +4 -4
- package/.pi/harness/agents.manifest.json +82 -82
- package/.pi/harness/docs/adrs/0031-harness-run-context.md +1 -1
- package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +6 -6
- package/.pi/harness/specs/budget-exhausted-event.schema.json +3 -1
- package/.pi/harness/specs/harness-turn.schema.json +18 -0
- package/.pi/lib/harness-run-context.ts +166 -32
- package/.pi/prompts/harness-plan.md +12 -14
- package/.pi/scripts/harness-verify.mjs +29 -1
- package/CHANGELOG.md +12 -0
- package/package.json +2 -2
|
@@ -18,6 +18,7 @@ import {
|
|
|
18
18
|
SettingsManager,
|
|
19
19
|
} from "@earendil-works/pi-coding-agent";
|
|
20
20
|
import { evaluateHarnessSubagentToolCall } from "../harness-subagent-policy.js";
|
|
21
|
+
import { createParentAskUserBridgeFactory } from "../parent-ask-user-bridge.js";
|
|
21
22
|
import {
|
|
22
23
|
getAgentConfig,
|
|
23
24
|
getConfig,
|
|
@@ -39,7 +40,6 @@ const EXCLUDED_TOOL_NAMES = [
|
|
|
39
40
|
"get_subagent_result",
|
|
40
41
|
"steer_subagent",
|
|
41
42
|
"blackboard",
|
|
42
|
-
"ask_user",
|
|
43
43
|
];
|
|
44
44
|
|
|
45
45
|
/** Default max turns. undefined = unlimited (no turn limit). */
|
|
@@ -152,6 +152,8 @@ export interface RunOptions {
|
|
|
152
152
|
}) => void;
|
|
153
153
|
/** Blackboard or other spawn context appended to the subagent system prompt. */
|
|
154
154
|
systemPromptAppendix?: string;
|
|
155
|
+
/** Parent session context — used to bridge ask_user UI into subagents. */
|
|
156
|
+
parentExtensionContext?: ExtensionContext;
|
|
155
157
|
}
|
|
156
158
|
|
|
157
159
|
export interface RunResult {
|
|
@@ -328,26 +330,31 @@ export async function runAgent(
|
|
|
328
330
|
? `${systemPrompt}\n\n---\n\n## Spawn context\n\n${appendix}`
|
|
329
331
|
: systemPrompt;
|
|
330
332
|
|
|
331
|
-
const extensionFactories: Array<(pi: ExtensionAPI) => void> = [
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
}
|
|
350
|
-
|
|
333
|
+
const extensionFactories: Array<(pi: ExtensionAPI) => void> = [];
|
|
334
|
+
const askUserBridge = options.parentExtensionContext
|
|
335
|
+
? createParentAskUserBridgeFactory(options.parentExtensionContext, type)
|
|
336
|
+
: null;
|
|
337
|
+
if (askUserBridge) {
|
|
338
|
+
extensionFactories.push(askUserBridge);
|
|
339
|
+
}
|
|
340
|
+
extensionFactories.push((pi) => {
|
|
341
|
+
pi.on("tool_call", (event) => {
|
|
342
|
+
const decision = evaluateHarnessSubagentToolCall(
|
|
343
|
+
event.toolName,
|
|
344
|
+
event.input as Record<string, unknown> | undefined,
|
|
345
|
+
type,
|
|
346
|
+
);
|
|
347
|
+
if (decision.action === "block") {
|
|
348
|
+
return { block: true, reason: decision.reason };
|
|
349
|
+
}
|
|
350
|
+
return undefined;
|
|
351
|
+
});
|
|
352
|
+
pi.on("before_agent_start", (event: { systemPrompt?: string }) => {
|
|
353
|
+
const base =
|
|
354
|
+
typeof event.systemPrompt === "string" ? event.systemPrompt : "";
|
|
355
|
+
return { systemPrompt: base };
|
|
356
|
+
});
|
|
357
|
+
});
|
|
351
358
|
|
|
352
359
|
const loader = new DefaultResourceLoader({
|
|
353
360
|
cwd: effectiveCwd,
|
|
@@ -403,6 +410,7 @@ export async function runAgent(
|
|
|
403
410
|
const filterTools = (names: string[]) =>
|
|
404
411
|
names.filter((t) => {
|
|
405
412
|
if (EXCLUDED_TOOL_NAMES.includes(t)) return false;
|
|
413
|
+
if (t === "ask_user" && askUserBridge) return true;
|
|
406
414
|
if (disallowedSet?.has(t)) return false;
|
|
407
415
|
if (builtinToolNameSet.has(t)) return true;
|
|
408
416
|
if (extensions === false) return false;
|
|
@@ -416,9 +424,11 @@ export async function runAgent(
|
|
|
416
424
|
if (activeTools.length > 0) {
|
|
417
425
|
session.setActiveToolsByName(activeTools);
|
|
418
426
|
} else {
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
427
|
+
const fallback = toolNames.filter((t) => {
|
|
428
|
+
if (t === "ask_user" && askUserBridge) return true;
|
|
429
|
+
return !disallowedSet?.has(t);
|
|
430
|
+
});
|
|
431
|
+
session.setActiveToolsByName(fallback);
|
|
422
432
|
}
|
|
423
433
|
|
|
424
434
|
// Bind extensions so that session_start fires and extensions can initialize
|
|
@@ -434,6 +444,12 @@ export async function runAgent(
|
|
|
434
444
|
},
|
|
435
445
|
});
|
|
436
446
|
|
|
447
|
+
if (askUserBridge) {
|
|
448
|
+
const withAsk = new Set(session.getActiveToolNames());
|
|
449
|
+
withAsk.add("ask_user");
|
|
450
|
+
session.setActiveToolsByName([...withAsk]);
|
|
451
|
+
}
|
|
452
|
+
|
|
437
453
|
options.onSessionCreated?.(session);
|
|
438
454
|
|
|
439
455
|
// Track turns for graceful max_turns enforcement
|
|
@@ -17,6 +17,10 @@ import {
|
|
|
17
17
|
} from "@earendil-works/pi-coding-agent";
|
|
18
18
|
import { Text } from "@earendil-works/pi-tui";
|
|
19
19
|
import { Type } from "@sinclair/typebox";
|
|
20
|
+
import {
|
|
21
|
+
extractPlanApprovalsFromEntries,
|
|
22
|
+
getLatestRunContext,
|
|
23
|
+
} from "../../../../lib/harness-run-context.js";
|
|
20
24
|
import { getDriftReport } from "../agent-manifest.js";
|
|
21
25
|
import { Blackboard } from "../blackboard.js";
|
|
22
26
|
import {
|
|
@@ -1599,6 +1603,23 @@ Guidelines:
|
|
|
1599
1603
|
cancelNudge(params.agent_id);
|
|
1600
1604
|
}
|
|
1601
1605
|
|
|
1606
|
+
if (record.session && record.status !== "running") {
|
|
1607
|
+
const parentEntries = _ctx.sessionManager.getEntries();
|
|
1608
|
+
const runCtx = getLatestRunContext(parentEntries);
|
|
1609
|
+
if (runCtx) {
|
|
1610
|
+
const subEntries = record.session.sessionManager.getEntries();
|
|
1611
|
+
for (const approval of extractPlanApprovalsFromEntries(
|
|
1612
|
+
subEntries,
|
|
1613
|
+
)) {
|
|
1614
|
+
pi.appendEntry("harness-plan-approval", {
|
|
1615
|
+
plan_id: approval.plan_id ?? runCtx.plan_id,
|
|
1616
|
+
approved_at: approval.approved_at,
|
|
1617
|
+
source: "ask_user",
|
|
1618
|
+
});
|
|
1619
|
+
}
|
|
1620
|
+
}
|
|
1621
|
+
}
|
|
1622
|
+
|
|
1602
1623
|
// Verbose: include full conversation
|
|
1603
1624
|
if (params.verbose && record.session) {
|
|
1604
1625
|
const conversation = getAgentConversation(record.session);
|
|
@@ -15,7 +15,7 @@ import {
|
|
|
15
15
|
getPolicyTransitionBlock,
|
|
16
16
|
hasApprovedPlanSignalFromUserPrompt,
|
|
17
17
|
hasHarnessAbortSignal,
|
|
18
|
-
|
|
18
|
+
inferHarnessPhase,
|
|
19
19
|
isHarnessAutoSession,
|
|
20
20
|
isHarnessBootstrapPrompt,
|
|
21
21
|
isPlanPhaseAllowedMutation,
|
|
@@ -77,8 +77,8 @@ function nowIso(): string {
|
|
|
77
77
|
|
|
78
78
|
function defaultState(): PolicyState {
|
|
79
79
|
return {
|
|
80
|
-
phase: "
|
|
81
|
-
approvedPlan:
|
|
80
|
+
phase: "plan",
|
|
81
|
+
approvedPlan: false,
|
|
82
82
|
planId: null,
|
|
83
83
|
budgetBypass: false,
|
|
84
84
|
aborted: false,
|
|
@@ -198,7 +198,7 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
198
198
|
};
|
|
199
199
|
}
|
|
200
200
|
|
|
201
|
-
const nextPhase =
|
|
201
|
+
const nextPhase = inferHarnessPhase(entries, userPrompt);
|
|
202
202
|
const planSignal = hasApprovedPlanSignal(userPrompt, entries);
|
|
203
203
|
|
|
204
204
|
const transitionBlock = getPolicyTransitionBlock(userPrompt, entries);
|
|
@@ -1,84 +1,84 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
2
|
+
"schema_version": "1.0.0",
|
|
3
|
+
"package": "ultimate-pi",
|
|
4
|
+
"package_version": "0.9.0",
|
|
5
|
+
"generated_at": "2026-05-17T10:06:28.388Z",
|
|
6
|
+
"agents": {
|
|
7
|
+
"pi-pi/agent-expert": {
|
|
8
|
+
"path": ".pi/agents/pi-pi/agent-expert.md",
|
|
9
|
+
"sha256": "86561eb092b92fa43f221bfc6305de8d5afe10d43c5f577b9bf15a71bda051c6"
|
|
10
|
+
},
|
|
11
|
+
"pi-pi/cli-expert": {
|
|
12
|
+
"path": ".pi/agents/pi-pi/cli-expert.md",
|
|
13
|
+
"sha256": "6ecdbc37c60f1da7c68d504187454c95197dd19bd7091d97d136c6fb9e866838"
|
|
14
|
+
},
|
|
15
|
+
"pi-pi/config-expert": {
|
|
16
|
+
"path": ".pi/agents/pi-pi/config-expert.md",
|
|
17
|
+
"sha256": "c23b9b70a4c326cca2d139f18de48f8db2e0e28fa5ceb8205600f25430822a40"
|
|
18
|
+
},
|
|
19
|
+
"pi-pi/ext-expert": {
|
|
20
|
+
"path": ".pi/agents/pi-pi/ext-expert.md",
|
|
21
|
+
"sha256": "47c64a9e2cffe00a50cac5541b0edc89fe8bcbd66ec3bb302eecc10d405a977f"
|
|
22
|
+
},
|
|
23
|
+
"pi-pi/keybinding-expert": {
|
|
24
|
+
"path": ".pi/agents/pi-pi/keybinding-expert.md",
|
|
25
|
+
"sha256": "bb83f5fd2178075c8374ef28c360339f7de9faeedf811670b43a536c5f65c58e"
|
|
26
|
+
},
|
|
27
|
+
"pi-pi/pi-orchestrator": {
|
|
28
|
+
"path": ".pi/agents/pi-pi/pi-orchestrator.md",
|
|
29
|
+
"sha256": "1323fc262112030320bdd7d7866fd69e1b861377bb30fd3553a5e3a4398f9090"
|
|
30
|
+
},
|
|
31
|
+
"pi-pi/prompt-expert": {
|
|
32
|
+
"path": ".pi/agents/pi-pi/prompt-expert.md",
|
|
33
|
+
"sha256": "0d9f5e3e8ab162149cac8f08e159a7b2150a11f53ebe7021e72f8277fa004d0c"
|
|
34
|
+
},
|
|
35
|
+
"pi-pi/skill-expert": {
|
|
36
|
+
"path": ".pi/agents/pi-pi/skill-expert.md",
|
|
37
|
+
"sha256": "91732a2479097a2645b6af56171d5cdc1ed67a0896ca54bc4badba250341698f"
|
|
38
|
+
},
|
|
39
|
+
"pi-pi/theme-expert": {
|
|
40
|
+
"path": ".pi/agents/pi-pi/theme-expert.md",
|
|
41
|
+
"sha256": "3d256049203356d87c36eac2232c0ad6261fec8af02eb15d3144b18a400ed64d"
|
|
42
|
+
},
|
|
43
|
+
"pi-pi/tui-expert": {
|
|
44
|
+
"path": ".pi/agents/pi-pi/tui-expert.md",
|
|
45
|
+
"sha256": "a619b2ee3d3d94fe599abb61db0904f90d30335ec426851c3f1efdf2e5ce5390"
|
|
46
|
+
},
|
|
47
|
+
"harness/adversary": {
|
|
48
|
+
"path": ".pi/agents/harness/adversary.md",
|
|
49
|
+
"sha256": "b965f90610ca942d08b656f1aee839266d08a92beb174b8761dd5e840694a899"
|
|
50
|
+
},
|
|
51
|
+
"harness/evaluator": {
|
|
52
|
+
"path": ".pi/agents/harness/evaluator.md",
|
|
53
|
+
"sha256": "6c0de777a10de26dba4a6feb5641495fa5c2d31072a8b0e597a5ecc9921f129f"
|
|
54
|
+
},
|
|
55
|
+
"harness/executor": {
|
|
56
|
+
"path": ".pi/agents/harness/executor.md",
|
|
57
|
+
"sha256": "5af3ec2be4d64a738834e36d480a36c2bee4359e8cd5a2e1aac49be4cff79589"
|
|
58
|
+
},
|
|
59
|
+
"harness/incident-recorder": {
|
|
60
|
+
"path": ".pi/agents/harness/incident-recorder.md",
|
|
61
|
+
"sha256": "2de405f77b62dde38f331665bff220a3ef131c3c1cd42eebee364000fc83352b"
|
|
62
|
+
},
|
|
63
|
+
"harness/meta-optimizer": {
|
|
64
|
+
"path": ".pi/agents/harness/meta-optimizer.md",
|
|
65
|
+
"sha256": "ef2fb950e18e3a6439e91a68f764fc7ec922cd2d6b35de8f656f376854974d04"
|
|
66
|
+
},
|
|
67
|
+
"harness/planner": {
|
|
68
|
+
"path": ".pi/agents/harness/planner.md",
|
|
69
|
+
"sha256": "eb0459a1fcb018e4ca8d4339141e294828fa7014879d9a64258ae01abc13d3ad"
|
|
70
|
+
},
|
|
71
|
+
"harness/sentrux-bootstrap": {
|
|
72
|
+
"path": ".pi/agents/harness/sentrux-bootstrap.md",
|
|
73
|
+
"sha256": "3a0b43b94386a7c541b8a806a37524a5e53f1c8049270db7a420680df5799eeb"
|
|
74
|
+
},
|
|
75
|
+
"harness/tie-breaker": {
|
|
76
|
+
"path": ".pi/agents/harness/tie-breaker.md",
|
|
77
|
+
"sha256": "651f50b9e2c7903c542700e94908b1fcd026ebed12aa1f1d6ec481df3567e34f"
|
|
78
|
+
},
|
|
79
|
+
"harness/trace-librarian": {
|
|
80
|
+
"path": ".pi/agents/harness/trace-librarian.md",
|
|
81
|
+
"sha256": "d63fe08a2ea0466c0fd89fff4da03ac1d9d3580c306381cee251c89d4e8fdb97"
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
84
|
}
|
|
@@ -9,7 +9,7 @@ Manual harness steps required copying `run_id` and `plan-packet.json` paths betw
|
|
|
9
9
|
|
|
10
10
|
## Decision
|
|
11
11
|
|
|
12
|
-
1. Add `.pi/lib/harness-run-context.ts` and `harness-run-context.ts` extension as the single source of truth for active runs.
|
|
12
|
+
1. Add `.pi/lib/harness-run-context.ts` and `harness-run-context.ts` extension as the single source of truth for active runs. **Harness command routing:** `pi.on("input")` appends `harness-turn` for raw `/harness-*` (before template expansion); `before_agent_start` bootstraps from that entry, not expanded prompt headers.
|
|
13
13
|
2. Persist mirrors:
|
|
14
14
|
- `.pi/harness/runs/<run_id>/run-context.json`
|
|
15
15
|
- `.pi/harness/active-run.json` (cross-session pointer for forked eval)
|
|
@@ -9,12 +9,12 @@ Harness slash prompts duplicated logic already defined in `harness/*` agents. Co
|
|
|
9
9
|
|
|
10
10
|
## Decision
|
|
11
11
|
|
|
12
|
-
1. **Slash commands**
|
|
13
|
-
2. **Agents** perform multi-turn reads and emit structured JSON drafts
|
|
14
|
-
3. **HarnessSpawnContext**
|
|
15
|
-
4. **Review isolation** uses `Agent` spawn with `inherit_context: false
|
|
16
|
-
5. **Subagent policy**
|
|
17
|
-
6. **
|
|
12
|
+
1. **Slash commands** (prompt templates) are orchestrators: spawn `harness/*` agents once, perform policy-gated writes, emit handoff blocks. Command identity is captured on Pi **`input`** as `harness-turn` (raw `/harness-*`), not from expanded prompt markdown.
|
|
13
|
+
2. **Agents** perform multi-turn reads and emit structured JSON drafts. **Planner** runs clarification and plan approval via `ask_user` (parent UI bridge); planner does not write `plan-packet.json`.
|
|
14
|
+
3. **HarnessSpawnContext** is injected in `[HarnessRunContext]`; orchestrator copies it into spawn prompts. Subagents do not receive `[HarnessActivePlan]` injection.
|
|
15
|
+
4. **Review isolation** uses `Agent` spawn with `inherit_context: false`. `review-integrity` allows `Agent` / `get_subagent_result` for evaluator/adversary/tie-breaker.
|
|
16
|
+
5. **Subagent policy** blocks mutating tools for read-only phase agents; `ask_user` allowed for planner/evaluator/adversary/tie-breaker only.
|
|
17
|
+
6. **Parent** does not duplicate planner `ask_user` or re-spawn for clarification. `get_subagent_result` syncs `harness-plan-approval` from subagent sessions.
|
|
18
18
|
|
|
19
19
|
## Consequences
|
|
20
20
|
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"title": "HarnessTurn",
|
|
4
|
+
"description": "Recorded on Pi input event when user invokes a /harness-* prompt template (raw slash, before expansion).",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"required": ["schema_version", "command", "args", "source", "invoked_at"],
|
|
7
|
+
"additionalProperties": false,
|
|
8
|
+
"properties": {
|
|
9
|
+
"schema_version": { "const": "1.0.0" },
|
|
10
|
+
"command": {
|
|
11
|
+
"type": "string",
|
|
12
|
+
"pattern": "^harness-[a-z0-9-]+$"
|
|
13
|
+
},
|
|
14
|
+
"args": { "type": "string" },
|
|
15
|
+
"source": { "const": "slash" },
|
|
16
|
+
"invoked_at": { "type": "string", "format": "date-time" }
|
|
17
|
+
}
|
|
18
|
+
}
|