ultimate-pi 0.10.1 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-debate-plan/SKILL.md +44 -0
- package/.agents/skills/harness-decisions/SKILL.md +3 -3
- package/.agents/skills/harness-orchestration/SKILL.md +59 -25
- package/.agents/skills/harness-plan/SKILL.md +16 -15
- package/.pi/agents/harness/adversary.md +0 -1
- package/.pi/agents/harness/evaluator.md +0 -1
- package/.pi/agents/harness/executor.md +1 -2
- package/.pi/agents/harness/incident-recorder.md +0 -1
- package/.pi/agents/harness/meta-optimizer.md +0 -1
- package/.pi/agents/harness/planning/decompose.md +83 -0
- package/.pi/agents/harness/planning/execution-plan-author.md +30 -0
- package/.pi/agents/harness/planning/hypothesis-validator.md +23 -0
- package/.pi/agents/harness/planning/hypothesis.md +89 -0
- package/.pi/agents/harness/planning/plan-adversary.md +18 -0
- package/.pi/agents/harness/planning/plan-evaluator.md +18 -0
- package/.pi/agents/harness/planning/review-integrator.md +23 -0
- package/.pi/agents/harness/planning/scout-graphify.md +54 -0
- package/.pi/agents/harness/planning/scout-semantic.md +47 -0
- package/.pi/agents/harness/planning/scout-structure.md +50 -0
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +18 -0
- package/.pi/agents/harness/planning/stack-researcher.md +24 -0
- package/.pi/agents/harness/tie-breaker.md +0 -1
- package/.pi/agents/harness/trace-librarian.md +0 -1
- package/.pi/extensions/debate-orchestrator.ts +90 -53
- package/.pi/extensions/harness-ask-user.ts +5 -0
- package/.pi/extensions/harness-plan-approval.ts +137 -3
- package/.pi/extensions/harness-run-context.ts +146 -6
- package/.pi/extensions/harness-subagents.ts +10 -5
- package/.pi/extensions/harness-web-tools.ts +2 -0
- package/.pi/extensions/lib/extension-load-guard.ts +39 -0
- package/.pi/extensions/lib/harness-posthog.ts +6 -1
- package/.pi/extensions/lib/harness-spawn-budget.ts +75 -0
- package/.pi/extensions/lib/harness-subagent-auth.ts +123 -0
- package/.pi/extensions/lib/{harness-subagents/harness-subagent-policy.ts → harness-subagent-policy.ts} +34 -9
- package/.pi/extensions/lib/harness-subagent-precheck.ts +95 -0
- package/.pi/extensions/lib/harness-subagents-bridge.ts +176 -0
- package/.pi/extensions/lib/plan-approval/create-plan.ts +9 -7
- package/.pi/extensions/lib/plan-approval/plan-review.ts +393 -0
- package/.pi/extensions/lib/plan-approval/schema.ts +16 -1
- package/.pi/extensions/lib/plan-approval/types.ts +16 -0
- package/.pi/extensions/lib/plan-approval/validate.ts +2 -0
- package/.pi/extensions/lib/plan-debate-envelope.ts +84 -0
- package/.pi/extensions/lib/{harness-subagents/spawn-policy.ts → spawn-policy.ts} +2 -5
- package/.pi/extensions/policy-gate.ts +1 -1
- package/.pi/extensions/review-integrity.ts +48 -29
- package/.pi/extensions/ultimate-pi-vcc.ts +5 -0
- package/.pi/harness/agents.manifest.json +126 -82
- package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +7 -6
- package/.pi/harness/docs/adrs/0033-parent-orchestrated-planning.md +34 -0
- package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +41 -0
- package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +27 -0
- package/.pi/harness/docs/adrs/README.md +2 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml +26 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml +5 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +32 -0
- package/.pi/harness/evals/smoke/run-context.fixture.json +1 -1
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +88 -0
- package/.pi/harness/specs/README.md +1 -1
- package/.pi/harness/specs/harness-posthog-event.schema.json +6 -1
- package/.pi/harness/specs/harness-spawn-context.schema.json +2 -1
- package/.pi/harness/specs/plan-adversary-brief.schema.json +45 -0
- package/.pi/harness/specs/plan-decomposition-brief.schema.json +108 -0
- package/.pi/harness/specs/plan-execution-plan-brief.schema.json +13 -0
- package/.pi/harness/specs/plan-execution-plan.schema.json +255 -0
- package/.pi/harness/specs/plan-hypothesis-brief.schema.json +96 -0
- package/.pi/harness/specs/plan-hypothesis-eval.schema.json +61 -0
- package/.pi/harness/specs/plan-packet.schema.json +14 -5
- package/.pi/harness/specs/plan-review-round-draft.schema.json +68 -0
- package/.pi/harness/specs/plan-sprint-audit-turn.schema.json +29 -0
- package/.pi/harness/specs/plan-stack-brief.schema.json +65 -0
- package/.pi/harness/specs/plan-validation-turn.schema.json +42 -0
- package/.pi/harness/specs/round-result.schema.json +16 -9
- package/.pi/lib/debate-orchestrator-types.ts +38 -0
- package/.pi/lib/harness-agent-discovery.mjs +81 -0
- package/.pi/lib/harness-run-context.ts +76 -38
- package/.pi/lib/harness-yaml.mjs +73 -0
- package/.pi/lib/harness-yaml.ts +90 -0
- package/.pi/prompts/harness-auto.md +13 -11
- package/.pi/prompts/harness-critic.md +2 -2
- package/.pi/prompts/harness-eval.md +3 -3
- package/.pi/prompts/harness-incident.md +2 -2
- package/.pi/prompts/harness-plan.md +106 -37
- package/.pi/prompts/harness-review.md +2 -2
- package/.pi/prompts/harness-router-tune.md +1 -1
- package/.pi/prompts/harness-run.md +2 -2
- package/.pi/prompts/harness-setup.md +15 -6
- package/.pi/prompts/harness-trace.md +2 -2
- package/.pi/scripts/harness-agents-manifest.mjs +1 -1
- package/.pi/scripts/harness-resolve-up-pkg.mjs +13 -0
- package/.pi/scripts/harness-verify.mjs +28 -19
- package/.pi/scripts/validate-plan-dag.mjs +258 -0
- package/.pi/scripts/vendor-sync-pi-subagents.sh +19 -0
- package/CHANGELOG.md +24 -0
- package/THIRD_PARTY_NOTICES.md +8 -0
- package/biome.json +4 -1
- package/package.json +6 -4
- package/.pi/agents/harness/planner.md +0 -54
- package/.pi/extensions/lib/harness-subagents/agent-loader.ts +0 -126
- package/.pi/extensions/lib/harness-subagents/agent-manifest.ts +0 -119
- package/.pi/extensions/lib/harness-subagents/agent-parser.ts +0 -87
- package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +0 -118
- package/.pi/extensions/lib/harness-subagents/blackboard.ts +0 -175
- package/.pi/extensions/lib/harness-subagents/parent-ask-user-bridge.ts +0 -10
- package/.pi/extensions/lib/harness-subagents/parent-harness-ui-bridge.ts +0 -310
- package/.pi/extensions/lib/harness-subagents/parent-harness-ui-hooks.ts +0 -59
- package/.pi/extensions/lib/harness-subagents/types-blackboard.ts +0 -27
- package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +0 -558
- package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +0 -684
- package/.pi/extensions/lib/harness-subagents/vendored/agent-types.ts +0 -175
- package/.pi/extensions/lib/harness-subagents/vendored/context.ts +0 -59
- package/.pi/extensions/lib/harness-subagents/vendored/cross-extension-rpc.ts +0 -134
- package/.pi/extensions/lib/harness-subagents/vendored/custom-agents.ts +0 -5
- package/.pi/extensions/lib/harness-subagents/vendored/default-agents.ts +0 -123
- package/.pi/extensions/lib/harness-subagents/vendored/env.ts +0 -43
- package/.pi/extensions/lib/harness-subagents/vendored/group-join.ts +0 -144
- package/.pi/extensions/lib/harness-subagents/vendored/index.ts +0 -2494
- package/.pi/extensions/lib/harness-subagents/vendored/invocation-config.ts +0 -52
- package/.pi/extensions/lib/harness-subagents/vendored/memory.ts +0 -182
- package/.pi/extensions/lib/harness-subagents/vendored/model-resolver.ts +0 -92
- package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +0 -115
- package/.pi/extensions/lib/harness-subagents/vendored/prompts.ts +0 -103
- package/.pi/extensions/lib/harness-subagents/vendored/schedule-store.ts +0 -177
- package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +0 -416
- package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +0 -210
- package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +0 -108
- package/.pi/extensions/lib/harness-subagents/vendored/types.ts +0 -187
- package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +0 -639
- package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +0 -324
- package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +0 -110
- package/.pi/extensions/lib/harness-subagents/vendored/usage.ts +0 -71
- package/.pi/extensions/lib/harness-subagents/vendored/worktree.ts +0 -195
- /package/.pi/extensions/{00-ultimate-pi-system-prompt.ts → custom-system-prompt.ts} +0 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Plan-phase scout — ck semantic code search (read-only).
|
|
3
|
+
tools: read, bash, ls
|
|
4
|
+
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
|
|
5
|
+
extensions: false
|
|
6
|
+
thinking: low
|
|
7
|
+
max_turns: 6
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
You are the **Harness planning scout (semantic lane)**.
|
|
11
|
+
|
|
12
|
+
## Mission
|
|
13
|
+
|
|
14
|
+
Find conceptually related code via ck semantic search for the task in `HarnessSpawnContext`. You do **not** build the PlanPacket or mutate files.
|
|
15
|
+
|
|
16
|
+
## Spawn context
|
|
17
|
+
|
|
18
|
+
Read `HarnessSpawnContext` in the spawn prompt. For `mode: revise`, bias searches toward delta areas from the existing plan at `plan_packet_path`.
|
|
19
|
+
|
|
20
|
+
## Process
|
|
21
|
+
|
|
22
|
+
1. Use `ck search` or `ck query` (or project-documented ck CLI) with task-focused queries.
|
|
23
|
+
2. If ck is unavailable, set `status: partial` and document in `findings`.
|
|
24
|
+
3. **Stop early** — top **5** most relevant paths only.
|
|
25
|
+
|
|
26
|
+
## Bash guardrails
|
|
27
|
+
|
|
28
|
+
Read-only only: no installs, index rebuilds that mutate disk, or redirects.
|
|
29
|
+
|
|
30
|
+
## Output limits
|
|
31
|
+
|
|
32
|
+
- `findings`: at most **6** bullets
|
|
33
|
+
- `key_paths`: at most **8** absolute paths
|
|
34
|
+
- `open_questions`: at most **4** items
|
|
35
|
+
|
|
36
|
+
## Output (required JSON block)
|
|
37
|
+
|
|
38
|
+
```json
|
|
39
|
+
{
|
|
40
|
+
"schema_version": "1.0.0",
|
|
41
|
+
"lane": "semantic",
|
|
42
|
+
"status": "ok",
|
|
43
|
+
"findings": ["…"],
|
|
44
|
+
"key_paths": ["/absolute/path"],
|
|
45
|
+
"open_questions": ["…"]
|
|
46
|
+
}
|
|
47
|
+
```
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Plan-phase scout — ast-grep structural code search (read-only).
|
|
3
|
+
tools: read, bash, ls
|
|
4
|
+
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
|
|
5
|
+
extensions: false
|
|
6
|
+
thinking: low
|
|
7
|
+
max_turns: 6
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
You are the **Harness planning scout (structure lane)**.
|
|
11
|
+
|
|
12
|
+
## Mission
|
|
13
|
+
|
|
14
|
+
Find relevant code structure for the task using ast-grep (`sg`). You do **not** build the PlanPacket or mutate files.
|
|
15
|
+
|
|
16
|
+
Findings should name **implementation surfaces** (handlers, types, exports, call sites) for hypothesis mechanism and experiment design.
|
|
17
|
+
|
|
18
|
+
## Spawn context
|
|
19
|
+
|
|
20
|
+
Read `HarnessSpawnContext` in the spawn prompt. For `mode: revise`, read the existing plan at `plan_packet_path` and focus on files and patterns affected by the revision.
|
|
21
|
+
|
|
22
|
+
## Process
|
|
23
|
+
|
|
24
|
+
1. Run `sg -p '…'` with patterns tied to the task (handlers, types, exports, call sites). **Do not use `find` or `grep`.**
|
|
25
|
+
2. Prefer absolute paths in `key_paths`.
|
|
26
|
+
3. If `sg` is not on PATH, set `status: partial` and note the tooling gap in `findings`.
|
|
27
|
+
4. **Stop early** — target ≤6 tool calls when possible.
|
|
28
|
+
|
|
29
|
+
## Bash guardrails
|
|
30
|
+
|
|
31
|
+
Read-only only: no installs, redirects, or mutating git/npm commands.
|
|
32
|
+
|
|
33
|
+
## Output limits
|
|
34
|
+
|
|
35
|
+
- `findings`: at most **8** bullets
|
|
36
|
+
- `key_paths`: at most **10** absolute paths
|
|
37
|
+
- `open_questions`: at most **5** items
|
|
38
|
+
|
|
39
|
+
## Output (required JSON block)
|
|
40
|
+
|
|
41
|
+
```json
|
|
42
|
+
{
|
|
43
|
+
"schema_version": "1.0.0",
|
|
44
|
+
"lane": "structure",
|
|
45
|
+
"status": "ok",
|
|
46
|
+
"findings": ["…"],
|
|
47
|
+
"key_paths": ["/absolute/path"],
|
|
48
|
+
"open_questions": ["…"]
|
|
49
|
+
}
|
|
50
|
+
```
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Plan-phase ADR-020 sprint contract auditor.
|
|
3
|
+
tools: read, grep, find, ls
|
|
4
|
+
disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
|
|
5
|
+
extensions: false
|
|
6
|
+
thinking: medium
|
|
7
|
+
max_turns: 10
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
You are **sprint-contract-auditor** — ADR-020 Sprint Contract, Done Criteria Types, checkpoints, Keep Quality Left.
|
|
11
|
+
|
|
12
|
+
Required on debate **round 4**; optional spot-check round 2 if done_criteria sparse.
|
|
13
|
+
|
|
14
|
+
## Output
|
|
15
|
+
|
|
16
|
+
Valid **YAML only** — `PlanSprintAuditTurn` (`.pi/harness/specs/plan-sprint-audit-turn.schema.json`).
|
|
17
|
+
|
|
18
|
+
Bus label: `SprintContractAuditorsubagent`.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Plan-phase stack research (ctx7 + web, read-only file writes via parent).
|
|
3
|
+
tools: read, grep, find, ls, bash, web_search, web_fetch
|
|
4
|
+
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
|
|
5
|
+
extensions: false
|
|
6
|
+
thinking: medium
|
|
7
|
+
max_turns: 14
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
You are **stack-researcher** — evidence-backed stack recommendations for harness planning.
|
|
11
|
+
|
|
12
|
+
## Mission
|
|
13
|
+
|
|
14
|
+
Produce `PlanStackBrief` with ranked options. For brownfield tasks, always include **extend current stack** as one ranked option.
|
|
15
|
+
|
|
16
|
+
## Protocol
|
|
17
|
+
|
|
18
|
+
1. **Libraries / APIs:** `ctx7 library` → `ctx7 docs` (read context7-cli skill). Cite library IDs in `evidence_refs`.
|
|
19
|
+
2. **Comparisons / landscape:** `web_search` + `web_fetch` (`.web/` artifacts).
|
|
20
|
+
3. **Greenfield:** ≥3 distinct options with pros/cons/risks.
|
|
21
|
+
|
|
22
|
+
## Output
|
|
23
|
+
|
|
24
|
+
Return valid **YAML only** (no fences) matching `PlanStackBrief` (`.pi/harness/specs/plan-stack-brief.schema.json`). Parent writes `artifacts/stack.yaml`.
|
|
@@ -14,16 +14,20 @@
|
|
|
14
14
|
* }
|
|
15
15
|
*/
|
|
16
16
|
|
|
17
|
-
import { appendFile, mkdir,
|
|
17
|
+
import { appendFile, mkdir, writeFile } from "node:fs/promises";
|
|
18
18
|
import { join } from "node:path";
|
|
19
19
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
20
|
+
import {
|
|
21
|
+
type DebateParticipant,
|
|
22
|
+
debatePhaseFromId,
|
|
23
|
+
isPlanDebateId,
|
|
24
|
+
PLAN_DEBATE_PARTICIPANTS,
|
|
25
|
+
POST_EXECUTE_DEBATE_PARTICIPANTS,
|
|
26
|
+
} from "../lib/debate-orchestrator-types.js";
|
|
20
27
|
import { getRunIdFromSession } from "../lib/harness-run-context.js";
|
|
21
28
|
|
|
22
|
-
type DebateParticipant =
|
|
23
|
-
| "EvaluatorAgent"
|
|
24
|
-
| "AdversaryAgent"
|
|
25
|
-
| "TieBreakerAgent";
|
|
26
29
|
type PolicyDecision = "pass" | "conditional_pass" | "block" | "human_required";
|
|
30
|
+
type DebatePhase = "plan" | "post_execute";
|
|
27
31
|
|
|
28
32
|
interface RoundPayload {
|
|
29
33
|
participants: DebateParticipant[];
|
|
@@ -46,11 +50,13 @@ interface RoundPayload {
|
|
|
46
50
|
interface DebateState {
|
|
47
51
|
run_id: string;
|
|
48
52
|
debate_id: string;
|
|
53
|
+
debate_phase: DebatePhase;
|
|
49
54
|
round_count: number;
|
|
50
55
|
budget_used: number;
|
|
51
56
|
max_rounds: number;
|
|
52
57
|
round_token_cap: number;
|
|
53
58
|
debate_global_cap: number;
|
|
59
|
+
last_review_gate_ready?: boolean;
|
|
54
60
|
}
|
|
55
61
|
|
|
56
62
|
interface BusEnvelope<T = unknown> {
|
|
@@ -104,46 +110,39 @@ function getRunId(ctx: {
|
|
|
104
110
|
);
|
|
105
111
|
}
|
|
106
112
|
|
|
107
|
-
|
|
113
|
+
const PLAN_BUDGET = {
|
|
114
|
+
max_rounds: 4,
|
|
115
|
+
round_token_cap: 2000,
|
|
116
|
+
debate_global_cap: 12000,
|
|
117
|
+
} as const;
|
|
118
|
+
|
|
119
|
+
const AGGRESSIVE_BUDGET = {
|
|
120
|
+
max_rounds: 6,
|
|
121
|
+
round_token_cap: 2500,
|
|
122
|
+
debate_global_cap: 35000,
|
|
123
|
+
} as const;
|
|
124
|
+
|
|
125
|
+
function capsForDebate(debateId: string): {
|
|
126
|
+
name: "plan" | "aggressive";
|
|
108
127
|
max_rounds: number;
|
|
109
128
|
round_token_cap: number;
|
|
110
129
|
debate_global_cap: number;
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
130
|
+
} {
|
|
131
|
+
if (isPlanDebateId(debateId)) {
|
|
132
|
+
return { name: "plan", ...PLAN_BUDGET };
|
|
133
|
+
}
|
|
134
|
+
return { name: "aggressive", ...AGGRESSIVE_BUDGET };
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function participantAllowed(participant: string, phase: DebatePhase): boolean {
|
|
138
|
+
if (phase === "plan") {
|
|
139
|
+
return (PLAN_DEBATE_PARTICIPANTS as readonly string[]).includes(
|
|
140
|
+
participant,
|
|
119
141
|
);
|
|
120
|
-
const parsed = JSON.parse(await readFile(roundSchemaPath, "utf-8")) as {
|
|
121
|
-
properties?: {
|
|
122
|
-
budget_profile?: {
|
|
123
|
-
properties?: {
|
|
124
|
-
max_rounds?: { const?: number };
|
|
125
|
-
round_token_cap?: { const?: number };
|
|
126
|
-
debate_global_cap?: { const?: number };
|
|
127
|
-
};
|
|
128
|
-
};
|
|
129
|
-
};
|
|
130
|
-
};
|
|
131
|
-
return {
|
|
132
|
-
max_rounds: Number(
|
|
133
|
-
parsed?.properties?.budget_profile?.properties?.max_rounds?.const ?? 6,
|
|
134
|
-
),
|
|
135
|
-
round_token_cap: Number(
|
|
136
|
-
parsed?.properties?.budget_profile?.properties?.round_token_cap
|
|
137
|
-
?.const ?? 2500,
|
|
138
|
-
),
|
|
139
|
-
debate_global_cap: Number(
|
|
140
|
-
parsed?.properties?.budget_profile?.properties?.debate_global_cap
|
|
141
|
-
?.const ?? 35000,
|
|
142
|
-
),
|
|
143
|
-
};
|
|
144
|
-
} catch {
|
|
145
|
-
return { max_rounds: 6, round_token_cap: 2500, debate_global_cap: 35000 };
|
|
146
142
|
}
|
|
143
|
+
return (POST_EXECUTE_DEBATE_PARTICIPANTS as readonly string[]).includes(
|
|
144
|
+
participant,
|
|
145
|
+
);
|
|
147
146
|
}
|
|
148
147
|
|
|
149
148
|
async function writeDebateEvent(
|
|
@@ -197,13 +196,18 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
|
|
|
197
196
|
let lastSeverity = defaultSeverity();
|
|
198
197
|
|
|
199
198
|
async function openDebate(runId: string, debateId: string): Promise<void> {
|
|
200
|
-
const caps =
|
|
199
|
+
const caps = capsForDebate(debateId);
|
|
200
|
+
const debate_phase = debatePhaseFromId(debateId);
|
|
201
201
|
state = {
|
|
202
202
|
run_id: runId,
|
|
203
203
|
debate_id: debateId,
|
|
204
|
+
debate_phase,
|
|
204
205
|
round_count: 0,
|
|
205
206
|
budget_used: 0,
|
|
206
|
-
|
|
207
|
+
max_rounds: caps.max_rounds,
|
|
208
|
+
round_token_cap: caps.round_token_cap,
|
|
209
|
+
debate_global_cap: caps.debate_global_cap,
|
|
210
|
+
last_review_gate_ready: false,
|
|
207
211
|
};
|
|
208
212
|
pi.appendEntry("harness-debate-state", state);
|
|
209
213
|
const envelope: BusEnvelope = {
|
|
@@ -216,7 +220,8 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
|
|
|
216
220
|
},
|
|
217
221
|
payload: {
|
|
218
222
|
opened_at: nowIso(),
|
|
219
|
-
|
|
223
|
+
debate_phase,
|
|
224
|
+
budget_profile: caps.name,
|
|
220
225
|
},
|
|
221
226
|
};
|
|
222
227
|
pi.appendEntry("harness-debate-envelope", envelope);
|
|
@@ -267,6 +272,15 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
|
|
|
267
272
|
return { ok: false, reason: "debate id mismatch" };
|
|
268
273
|
}
|
|
269
274
|
|
|
275
|
+
for (const p of envelope.payload.participants ?? []) {
|
|
276
|
+
if (!participantAllowed(p, state.debate_phase)) {
|
|
277
|
+
return {
|
|
278
|
+
ok: false,
|
|
279
|
+
reason: `participant ${p} invalid for debate_phase=${state.debate_phase}`,
|
|
280
|
+
};
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
270
284
|
const nextRound = state.round_count + 1;
|
|
271
285
|
if (nextRound > state.max_rounds) {
|
|
272
286
|
await emitBudgetExhausted("max_rounds_reached");
|
|
@@ -310,6 +324,11 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
|
|
|
310
324
|
};
|
|
311
325
|
}
|
|
312
326
|
|
|
327
|
+
const profileName =
|
|
328
|
+
state.debate_phase === "plan"
|
|
329
|
+
? ("plan" as const)
|
|
330
|
+
: ("aggressive" as const);
|
|
331
|
+
|
|
313
332
|
const roundRecord = {
|
|
314
333
|
schema_version: "1.0.0",
|
|
315
334
|
contract_version: "1.0.0",
|
|
@@ -322,7 +341,7 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
|
|
|
322
341
|
evidence_refs: envelope.payload.evidence_refs,
|
|
323
342
|
token_usage: envelope.payload.token_usage,
|
|
324
343
|
budget_profile: {
|
|
325
|
-
name:
|
|
344
|
+
name: profileName,
|
|
326
345
|
max_rounds: state.max_rounds,
|
|
327
346
|
round_token_cap: state.round_token_cap,
|
|
328
347
|
debate_global_cap: state.debate_global_cap,
|
|
@@ -354,12 +373,20 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
|
|
|
354
373
|
),
|
|
355
374
|
);
|
|
356
375
|
const decision = decidePolicy(lastSeverity, evidenceScore);
|
|
376
|
+
const planPhase = state.debate_phase === "plan";
|
|
377
|
+
const evaluatorPassed = planPhase
|
|
378
|
+
? Boolean(state.last_review_gate_ready)
|
|
379
|
+
: true;
|
|
380
|
+
const debateComplete = planPhase
|
|
381
|
+
? state.round_count >= state.max_rounds
|
|
382
|
+
: state.round_count > 0;
|
|
357
383
|
|
|
358
384
|
const consensus = {
|
|
359
385
|
schema_version: "1.0.0",
|
|
360
386
|
contract_version: "1.0.0",
|
|
361
387
|
run_id: state.run_id,
|
|
362
388
|
debate_id: state.debate_id,
|
|
389
|
+
debate_phase: state.debate_phase,
|
|
363
390
|
round_count: state.round_count,
|
|
364
391
|
budget_used: state.budget_used,
|
|
365
392
|
severity_scores: lastSeverity,
|
|
@@ -371,15 +398,25 @@ export default function debateOrchestrator(pi: ExtensionAPI) {
|
|
|
371
398
|
},
|
|
372
399
|
confidence_weights: WEIGHTS,
|
|
373
400
|
evidence_refs: [],
|
|
374
|
-
strict_gate_prerequisites:
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
401
|
+
strict_gate_prerequisites: planPhase
|
|
402
|
+
? {
|
|
403
|
+
plan_gate_passed: false,
|
|
404
|
+
execution_completed: false,
|
|
405
|
+
evaluator_passed: evaluatorPassed,
|
|
406
|
+
adversarial_debate_completed: debateComplete,
|
|
407
|
+
severity_policy_ok: decision !== "block",
|
|
408
|
+
benchmark_delta_checks_passed: false,
|
|
409
|
+
rollback_artifacts_generated: false,
|
|
410
|
+
}
|
|
411
|
+
: {
|
|
412
|
+
plan_gate_passed: true,
|
|
413
|
+
execution_completed: true,
|
|
414
|
+
evaluator_passed: true,
|
|
415
|
+
adversarial_debate_completed: debateComplete,
|
|
416
|
+
severity_policy_ok: decision !== "block",
|
|
417
|
+
benchmark_delta_checks_passed: false,
|
|
418
|
+
rollback_artifacts_generated: false,
|
|
419
|
+
},
|
|
383
420
|
policy_decision: decision,
|
|
384
421
|
rationale,
|
|
385
422
|
};
|
|
@@ -18,8 +18,13 @@ import {
|
|
|
18
18
|
toToolDetails,
|
|
19
19
|
validateAskParams,
|
|
20
20
|
} from "./lib/ask-user/validate.js";
|
|
21
|
+
import { claimExtensionLoad } from "./lib/extension-load-guard.js";
|
|
22
|
+
|
|
23
|
+
// @ts-expect-error pi extensions run as ESM
|
|
24
|
+
const MODULE_URL = import.meta.url;
|
|
21
25
|
|
|
22
26
|
export default function harnessAskUser(pi: ExtensionAPI) {
|
|
27
|
+
if (!claimExtensionLoad("harness-ask-user", MODULE_URL)) return;
|
|
23
28
|
pi.registerTool({
|
|
24
29
|
name: "ask_user",
|
|
25
30
|
label: "Ask User",
|
|
@@ -4,14 +4,25 @@
|
|
|
4
4
|
|
|
5
5
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
6
6
|
import { Text } from "@earendil-works/pi-tui";
|
|
7
|
+
import { Type } from "@sinclair/typebox";
|
|
8
|
+
import type { PlanPacketLike } from "../lib/harness-run-context.js";
|
|
7
9
|
import {
|
|
8
10
|
appendPlanApprovalIfNew,
|
|
9
11
|
getLatestRunContext,
|
|
10
12
|
hasPlanUserApproval,
|
|
11
13
|
parsePlanApprovalFromMessage,
|
|
14
|
+
planPacketSummary,
|
|
12
15
|
} from "../lib/harness-run-context.js";
|
|
16
|
+
import { claimExtensionLoad } from "./lib/extension-load-guard.js";
|
|
17
|
+
import {
|
|
18
|
+
CREATE_PLAN_GUIDELINES,
|
|
19
|
+
CREATE_PLAN_SNIPPET,
|
|
20
|
+
executeCreatePlan,
|
|
21
|
+
formatCreatePlanResultText,
|
|
22
|
+
} from "./lib/plan-approval/create-plan.js";
|
|
13
23
|
import { runPlanApprovalDialog } from "./lib/plan-approval/dialog.js";
|
|
14
24
|
import { runPlanApprovalFallback } from "./lib/plan-approval/fallback.js";
|
|
25
|
+
import { writePlanReviewMarkdown } from "./lib/plan-approval/plan-review.js";
|
|
15
26
|
import {
|
|
16
27
|
renderApprovePlanCall,
|
|
17
28
|
renderApprovePlanResult,
|
|
@@ -32,7 +43,21 @@ import {
|
|
|
32
43
|
validateApprovePlanParams,
|
|
33
44
|
} from "./lib/plan-approval/validate.js";
|
|
34
45
|
|
|
46
|
+
// @ts-expect-error pi extensions run as ESM
|
|
47
|
+
const MODULE_URL = import.meta.url;
|
|
48
|
+
|
|
49
|
+
const CreatePlanParamsSchema = Type.Object({
|
|
50
|
+
plan_packet: Type.Object(
|
|
51
|
+
{},
|
|
52
|
+
{
|
|
53
|
+
description:
|
|
54
|
+
"Approved PlanPacket to persist (same object as approve_plan).",
|
|
55
|
+
},
|
|
56
|
+
),
|
|
57
|
+
});
|
|
58
|
+
|
|
35
59
|
export default function harnessPlanApproval(pi: ExtensionAPI) {
|
|
60
|
+
if (!claimExtensionLoad("harness-plan-approval", MODULE_URL)) return;
|
|
36
61
|
pi.registerMessageRenderer(
|
|
37
62
|
"harness-plan-draft",
|
|
38
63
|
(message, _options, theme) => {
|
|
@@ -61,7 +86,7 @@ export default function harnessPlanApproval(pi: ExtensionAPI) {
|
|
|
61
86
|
name: "approve_plan",
|
|
62
87
|
label: "Approve Plan",
|
|
63
88
|
description:
|
|
64
|
-
"Present a PlanPacket for user approval with a scrollable plan view.
|
|
89
|
+
"Present a PlanPacket for user approval with a scrollable plan view. Parent /harness-plan orchestrator calls this after decomposition, hypothesis, and parallel reviews.",
|
|
65
90
|
promptSnippet: PROMPT_SNIPPET,
|
|
66
91
|
promptGuidelines: PROMPT_GUIDELINES,
|
|
67
92
|
parameters: ApprovePlanParamsSchema,
|
|
@@ -92,7 +117,7 @@ export default function harnessPlanApproval(pi: ExtensionAPI) {
|
|
|
92
117
|
content: [
|
|
93
118
|
{
|
|
94
119
|
type: "text",
|
|
95
|
-
text: `Plan ${planId} already approved in this harness run
|
|
120
|
+
text: `Plan ${planId} already approved in this harness run. Proceed with /harness-run.`,
|
|
96
121
|
},
|
|
97
122
|
],
|
|
98
123
|
details: {
|
|
@@ -111,14 +136,32 @@ export default function harnessPlanApproval(pi: ExtensionAPI) {
|
|
|
111
136
|
const summary =
|
|
112
137
|
validated.human_summary?.trim() ||
|
|
113
138
|
`Plan ${planId} — pending your approval`;
|
|
139
|
+
const runCtx = getLatestRunContext(entries);
|
|
140
|
+
const projectRoot = process.cwd();
|
|
141
|
+
const reviewPath = await writePlanReviewMarkdown(
|
|
142
|
+
projectRoot,
|
|
143
|
+
runCtx,
|
|
144
|
+
validated.plan_packet,
|
|
145
|
+
{
|
|
146
|
+
human_summary: validated.human_summary,
|
|
147
|
+
research_brief: validated.research_brief,
|
|
148
|
+
status: "draft",
|
|
149
|
+
},
|
|
150
|
+
);
|
|
151
|
+
const draftContent =
|
|
152
|
+
reviewPath != null
|
|
153
|
+
? `${summary}\nEditor review: ${reviewPath}`
|
|
154
|
+
: summary;
|
|
114
155
|
pi.sendMessage({
|
|
115
156
|
customType: "harness-plan-draft",
|
|
116
|
-
content:
|
|
157
|
+
content: draftContent,
|
|
117
158
|
display: true,
|
|
118
159
|
details: {
|
|
119
160
|
schema_version: "1.0.0",
|
|
120
161
|
plan_packet: validated.plan_packet,
|
|
121
162
|
human_summary: validated.human_summary ?? null,
|
|
163
|
+
research_brief: validated.research_brief ?? null,
|
|
164
|
+
plan_review_path: reviewPath,
|
|
122
165
|
shown_at: new Date().toISOString(),
|
|
123
166
|
},
|
|
124
167
|
});
|
|
@@ -153,6 +196,23 @@ export default function harnessPlanApproval(pi: ExtensionAPI) {
|
|
|
153
196
|
);
|
|
154
197
|
}
|
|
155
198
|
|
|
199
|
+
const approved =
|
|
200
|
+
!outcome.cancelled &&
|
|
201
|
+
outcome.response?.kind === "selection" &&
|
|
202
|
+
/^approve/i.test(outcome.response.selections[0] ?? "");
|
|
203
|
+
if (approved && runCtx) {
|
|
204
|
+
await writePlanReviewMarkdown(
|
|
205
|
+
projectRoot,
|
|
206
|
+
runCtx,
|
|
207
|
+
validated.plan_packet,
|
|
208
|
+
{
|
|
209
|
+
human_summary: validated.human_summary,
|
|
210
|
+
research_brief: validated.research_brief,
|
|
211
|
+
status: "approved",
|
|
212
|
+
},
|
|
213
|
+
);
|
|
214
|
+
}
|
|
215
|
+
|
|
156
216
|
const text = formatApprovePlanResultText(
|
|
157
217
|
outcome.response,
|
|
158
218
|
outcome.cancelled,
|
|
@@ -171,4 +231,78 @@ export default function harnessPlanApproval(pi: ExtensionAPI) {
|
|
|
171
231
|
return renderApprovePlanResult(result, options, theme);
|
|
172
232
|
},
|
|
173
233
|
});
|
|
234
|
+
|
|
235
|
+
pi.registerTool({
|
|
236
|
+
name: "create_plan",
|
|
237
|
+
label: "Create Plan",
|
|
238
|
+
description:
|
|
239
|
+
"Write the approved PlanPacket to plan-packet.yaml for this harness run. Call only after approve_plan (Approve). Do not use write/edit.",
|
|
240
|
+
promptSnippet: CREATE_PLAN_SNIPPET,
|
|
241
|
+
promptGuidelines: CREATE_PLAN_GUIDELINES,
|
|
242
|
+
parameters: CreatePlanParamsSchema,
|
|
243
|
+
|
|
244
|
+
async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
|
|
245
|
+
const validated = validateApprovePlanParams(params as ApprovePlanParams);
|
|
246
|
+
if (typeof validated === "string") {
|
|
247
|
+
return {
|
|
248
|
+
content: [{ type: "text", text: validated }],
|
|
249
|
+
details: { error: validated },
|
|
250
|
+
isError: true,
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
const entries = ctx.sessionManager.getEntries();
|
|
255
|
+
const runCtx = getLatestRunContext(entries);
|
|
256
|
+
const projectRoot = process.cwd();
|
|
257
|
+
const result = await executeCreatePlan(validated.plan_packet, {
|
|
258
|
+
projectRoot,
|
|
259
|
+
getParentEntries: () => entries,
|
|
260
|
+
getSubagentEntries: () => entries,
|
|
261
|
+
getParentRunContext: () => runCtx,
|
|
262
|
+
onCommitted: (updated, packet, planPath) => {
|
|
263
|
+
pi.appendEntry("harness-run-context", updated);
|
|
264
|
+
pi.appendEntry(
|
|
265
|
+
"harness-plan-packet",
|
|
266
|
+
planPacketSummary(packet, planPath, "ready"),
|
|
267
|
+
);
|
|
268
|
+
},
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
const text = formatCreatePlanResultText(result);
|
|
272
|
+
return {
|
|
273
|
+
content: [{ type: "text", text }],
|
|
274
|
+
details: result.ok
|
|
275
|
+
? { plan_path: result.planPath, plan_id: result.planId }
|
|
276
|
+
: { error: result.error },
|
|
277
|
+
isError: !result.ok,
|
|
278
|
+
};
|
|
279
|
+
},
|
|
280
|
+
|
|
281
|
+
renderCall(args, theme) {
|
|
282
|
+
const packet = (args as { plan_packet?: PlanPacketLike }).plan_packet;
|
|
283
|
+
const id = packet?.plan_id ?? "?";
|
|
284
|
+
return new Text(theme.fg("accent", `create_plan: ${id}`), 0, 0);
|
|
285
|
+
},
|
|
286
|
+
|
|
287
|
+
renderResult(result, _options, theme) {
|
|
288
|
+
const details = result.details as
|
|
289
|
+
| { plan_path?: string; error?: string }
|
|
290
|
+
| undefined;
|
|
291
|
+
if (details?.error) {
|
|
292
|
+
return new Text(
|
|
293
|
+
theme.fg("error", details.error ?? "create_plan failed"),
|
|
294
|
+
0,
|
|
295
|
+
0,
|
|
296
|
+
);
|
|
297
|
+
}
|
|
298
|
+
return new Text(
|
|
299
|
+
theme.fg(
|
|
300
|
+
"success",
|
|
301
|
+
`Wrote ${details?.plan_path ?? "plan-packet.yaml"}`,
|
|
302
|
+
),
|
|
303
|
+
0,
|
|
304
|
+
0,
|
|
305
|
+
);
|
|
306
|
+
},
|
|
307
|
+
});
|
|
174
308
|
}
|