ultimate-pi 0.10.1 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-debate-plan/SKILL.md +44 -0
- package/.agents/skills/harness-decisions/SKILL.md +3 -3
- package/.agents/skills/harness-orchestration/SKILL.md +59 -25
- package/.agents/skills/harness-plan/SKILL.md +16 -15
- package/.pi/agents/harness/adversary.md +0 -1
- package/.pi/agents/harness/evaluator.md +0 -1
- package/.pi/agents/harness/executor.md +1 -2
- package/.pi/agents/harness/incident-recorder.md +0 -1
- package/.pi/agents/harness/meta-optimizer.md +0 -1
- package/.pi/agents/harness/planning/decompose.md +83 -0
- package/.pi/agents/harness/planning/execution-plan-author.md +30 -0
- package/.pi/agents/harness/planning/hypothesis-validator.md +23 -0
- package/.pi/agents/harness/planning/hypothesis.md +89 -0
- package/.pi/agents/harness/planning/plan-adversary.md +18 -0
- package/.pi/agents/harness/planning/plan-evaluator.md +18 -0
- package/.pi/agents/harness/planning/review-integrator.md +23 -0
- package/.pi/agents/harness/planning/scout-graphify.md +54 -0
- package/.pi/agents/harness/planning/scout-semantic.md +47 -0
- package/.pi/agents/harness/planning/scout-structure.md +50 -0
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +18 -0
- package/.pi/agents/harness/planning/stack-researcher.md +24 -0
- package/.pi/agents/harness/tie-breaker.md +0 -1
- package/.pi/agents/harness/trace-librarian.md +0 -1
- package/.pi/extensions/debate-orchestrator.ts +90 -53
- package/.pi/extensions/harness-ask-user.ts +5 -0
- package/.pi/extensions/harness-plan-approval.ts +137 -3
- package/.pi/extensions/harness-run-context.ts +146 -6
- package/.pi/extensions/harness-subagents.ts +10 -5
- package/.pi/extensions/harness-web-tools.ts +2 -0
- package/.pi/extensions/lib/extension-load-guard.ts +39 -0
- package/.pi/extensions/lib/harness-posthog.ts +6 -1
- package/.pi/extensions/lib/harness-spawn-budget.ts +75 -0
- package/.pi/extensions/lib/harness-subagent-auth.ts +123 -0
- package/.pi/extensions/lib/{harness-subagents/harness-subagent-policy.ts → harness-subagent-policy.ts} +34 -9
- package/.pi/extensions/lib/harness-subagent-precheck.ts +95 -0
- package/.pi/extensions/lib/harness-subagents-bridge.ts +176 -0
- package/.pi/extensions/lib/plan-approval/create-plan.ts +9 -7
- package/.pi/extensions/lib/plan-approval/plan-review.ts +393 -0
- package/.pi/extensions/lib/plan-approval/schema.ts +16 -1
- package/.pi/extensions/lib/plan-approval/types.ts +16 -0
- package/.pi/extensions/lib/plan-approval/validate.ts +2 -0
- package/.pi/extensions/lib/plan-debate-envelope.ts +84 -0
- package/.pi/extensions/lib/{harness-subagents/spawn-policy.ts → spawn-policy.ts} +2 -5
- package/.pi/extensions/policy-gate.ts +1 -1
- package/.pi/extensions/review-integrity.ts +48 -29
- package/.pi/extensions/ultimate-pi-vcc.ts +5 -0
- package/.pi/harness/agents.manifest.json +126 -82
- package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +7 -6
- package/.pi/harness/docs/adrs/0033-parent-orchestrated-planning.md +34 -0
- package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +41 -0
- package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +27 -0
- package/.pi/harness/docs/adrs/README.md +2 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml +26 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml +5 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +32 -0
- package/.pi/harness/evals/smoke/run-context.fixture.json +1 -1
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +88 -0
- package/.pi/harness/specs/README.md +1 -1
- package/.pi/harness/specs/harness-posthog-event.schema.json +6 -1
- package/.pi/harness/specs/harness-spawn-context.schema.json +2 -1
- package/.pi/harness/specs/plan-adversary-brief.schema.json +45 -0
- package/.pi/harness/specs/plan-decomposition-brief.schema.json +108 -0
- package/.pi/harness/specs/plan-execution-plan-brief.schema.json +13 -0
- package/.pi/harness/specs/plan-execution-plan.schema.json +255 -0
- package/.pi/harness/specs/plan-hypothesis-brief.schema.json +96 -0
- package/.pi/harness/specs/plan-hypothesis-eval.schema.json +61 -0
- package/.pi/harness/specs/plan-packet.schema.json +14 -5
- package/.pi/harness/specs/plan-review-round-draft.schema.json +68 -0
- package/.pi/harness/specs/plan-sprint-audit-turn.schema.json +29 -0
- package/.pi/harness/specs/plan-stack-brief.schema.json +65 -0
- package/.pi/harness/specs/plan-validation-turn.schema.json +42 -0
- package/.pi/harness/specs/round-result.schema.json +16 -9
- package/.pi/lib/debate-orchestrator-types.ts +38 -0
- package/.pi/lib/harness-agent-discovery.mjs +81 -0
- package/.pi/lib/harness-run-context.ts +76 -38
- package/.pi/lib/harness-yaml.mjs +73 -0
- package/.pi/lib/harness-yaml.ts +90 -0
- package/.pi/prompts/harness-auto.md +13 -11
- package/.pi/prompts/harness-critic.md +2 -2
- package/.pi/prompts/harness-eval.md +3 -3
- package/.pi/prompts/harness-incident.md +2 -2
- package/.pi/prompts/harness-plan.md +106 -37
- package/.pi/prompts/harness-review.md +2 -2
- package/.pi/prompts/harness-router-tune.md +1 -1
- package/.pi/prompts/harness-run.md +2 -2
- package/.pi/prompts/harness-setup.md +15 -6
- package/.pi/prompts/harness-trace.md +2 -2
- package/.pi/scripts/harness-agents-manifest.mjs +1 -1
- package/.pi/scripts/harness-resolve-up-pkg.mjs +13 -0
- package/.pi/scripts/harness-verify.mjs +28 -19
- package/.pi/scripts/validate-plan-dag.mjs +258 -0
- package/.pi/scripts/vendor-sync-pi-subagents.sh +19 -0
- package/CHANGELOG.md +24 -0
- package/THIRD_PARTY_NOTICES.md +8 -0
- package/biome.json +4 -1
- package/package.json +6 -4
- package/.pi/agents/harness/planner.md +0 -54
- package/.pi/extensions/lib/harness-subagents/agent-loader.ts +0 -126
- package/.pi/extensions/lib/harness-subagents/agent-manifest.ts +0 -119
- package/.pi/extensions/lib/harness-subagents/agent-parser.ts +0 -87
- package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +0 -118
- package/.pi/extensions/lib/harness-subagents/blackboard.ts +0 -175
- package/.pi/extensions/lib/harness-subagents/parent-ask-user-bridge.ts +0 -10
- package/.pi/extensions/lib/harness-subagents/parent-harness-ui-bridge.ts +0 -310
- package/.pi/extensions/lib/harness-subagents/parent-harness-ui-hooks.ts +0 -59
- package/.pi/extensions/lib/harness-subagents/types-blackboard.ts +0 -27
- package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +0 -558
- package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +0 -684
- package/.pi/extensions/lib/harness-subagents/vendored/agent-types.ts +0 -175
- package/.pi/extensions/lib/harness-subagents/vendored/context.ts +0 -59
- package/.pi/extensions/lib/harness-subagents/vendored/cross-extension-rpc.ts +0 -134
- package/.pi/extensions/lib/harness-subagents/vendored/custom-agents.ts +0 -5
- package/.pi/extensions/lib/harness-subagents/vendored/default-agents.ts +0 -123
- package/.pi/extensions/lib/harness-subagents/vendored/env.ts +0 -43
- package/.pi/extensions/lib/harness-subagents/vendored/group-join.ts +0 -144
- package/.pi/extensions/lib/harness-subagents/vendored/index.ts +0 -2494
- package/.pi/extensions/lib/harness-subagents/vendored/invocation-config.ts +0 -52
- package/.pi/extensions/lib/harness-subagents/vendored/memory.ts +0 -182
- package/.pi/extensions/lib/harness-subagents/vendored/model-resolver.ts +0 -92
- package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +0 -115
- package/.pi/extensions/lib/harness-subagents/vendored/prompts.ts +0 -103
- package/.pi/extensions/lib/harness-subagents/vendored/schedule-store.ts +0 -177
- package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +0 -416
- package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +0 -210
- package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +0 -108
- package/.pi/extensions/lib/harness-subagents/vendored/types.ts +0 -187
- package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +0 -639
- package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +0 -324
- package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +0 -110
- package/.pi/extensions/lib/harness-subagents/vendored/usage.ts +0 -71
- package/.pi/extensions/lib/harness-subagents/vendored/worktree.ts +0 -195
- /package/.pi/extensions/{00-ultimate-pi-system-prompt.ts → custom-system-prompt.ts} +0 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pre-spawn validation for harness subagent tool calls.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
type AgentConfig,
|
|
7
|
+
agentAllowsMutatingTools,
|
|
8
|
+
} from "../../../vendor/pi-subagents/src/agents.js";
|
|
9
|
+
import type { HarnessPhase } from "../../lib/harness-run-context.js";
|
|
10
|
+
import { inferHarnessPhase } from "../../lib/harness-run-context.js";
|
|
11
|
+
import { classifyHarnessAgent } from "./harness-subagent-policy.js";
|
|
12
|
+
|
|
13
|
+
export interface SubagentTaskRef {
|
|
14
|
+
agent: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface PrecheckResult {
|
|
18
|
+
ok: boolean;
|
|
19
|
+
message?: string;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function collectAgents(params: {
|
|
23
|
+
agent?: string;
|
|
24
|
+
tasks?: SubagentTaskRef[];
|
|
25
|
+
chain?: SubagentTaskRef[];
|
|
26
|
+
aggregator?: { agent: string };
|
|
27
|
+
}): string[] {
|
|
28
|
+
const names: string[] = [];
|
|
29
|
+
if (params.agent) names.push(params.agent);
|
|
30
|
+
if (params.tasks) for (const t of params.tasks) names.push(t.agent);
|
|
31
|
+
if (params.chain) for (const c of params.chain) names.push(c.agent);
|
|
32
|
+
if (params.aggregator) names.push(params.aggregator.agent);
|
|
33
|
+
return names;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function resolveAgent(
|
|
37
|
+
agents: AgentConfig[],
|
|
38
|
+
name: string,
|
|
39
|
+
): AgentConfig | undefined {
|
|
40
|
+
return agents.find((a) => a.name === name);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function precheckHarnessSubagentSpawn(
|
|
44
|
+
params: {
|
|
45
|
+
agent?: string;
|
|
46
|
+
tasks?: SubagentTaskRef[];
|
|
47
|
+
chain?: SubagentTaskRef[];
|
|
48
|
+
aggregator?: { agent: string };
|
|
49
|
+
},
|
|
50
|
+
agents: AgentConfig[],
|
|
51
|
+
phase: HarnessPhase,
|
|
52
|
+
): PrecheckResult {
|
|
53
|
+
const names = collectAgents(params);
|
|
54
|
+
const mutating = names.filter((n) => {
|
|
55
|
+
const cfg = resolveAgent(agents, n);
|
|
56
|
+
return cfg
|
|
57
|
+
? agentAllowsMutatingTools(cfg)
|
|
58
|
+
: n.startsWith("harness/executor");
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
if (phase === "plan" && mutating.length > 0) {
|
|
62
|
+
return {
|
|
63
|
+
ok: false,
|
|
64
|
+
message:
|
|
65
|
+
`Plan phase: cannot spawn mutating subagents (${mutating.join(", ")}). ` +
|
|
66
|
+
`Use read-only harness/planning/* agents until execute phase.`,
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if ((params.tasks?.length ?? 0) > 1 && mutating.length > 1) {
|
|
71
|
+
return {
|
|
72
|
+
ok: false,
|
|
73
|
+
message:
|
|
74
|
+
"Parallel subagent tasks cannot include multiple mutating agents (file race risk). " +
|
|
75
|
+
"Run one executor at a time.",
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
for (const name of names) {
|
|
80
|
+
if (!name.startsWith("harness/")) continue;
|
|
81
|
+
const kind = classifyHarnessAgent(name);
|
|
82
|
+
if (kind === "planner" && phase !== "plan") {
|
|
83
|
+
// allowed — planning agents can run in plan only ideally
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return { ok: true };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export function inferPhaseForPrecheck(
|
|
91
|
+
entries: unknown[],
|
|
92
|
+
prompt?: string,
|
|
93
|
+
): HarnessPhase {
|
|
94
|
+
return inferHarnessPhase(entries as never, prompt);
|
|
95
|
+
}
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ultimate-pi harness wrapper around vendored pi-subagents.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type {
|
|
6
|
+
ExtensionAPI,
|
|
7
|
+
ExtensionContext,
|
|
8
|
+
} from "@earendil-works/pi-coding-agent";
|
|
9
|
+
import type { AgentConfig } from "../../../vendor/pi-subagents/src/agents.js";
|
|
10
|
+
import {
|
|
11
|
+
createSubagentsExtension,
|
|
12
|
+
type HarnessSubagentsOptions,
|
|
13
|
+
type SpawnAuthForward,
|
|
14
|
+
} from "../../../vendor/pi-subagents/src/subagents.js";
|
|
15
|
+
import { captureHarnessEvent } from "./harness-posthog.js";
|
|
16
|
+
import {
|
|
17
|
+
checkHarnessSpawnBudget,
|
|
18
|
+
countHarnessAgentsInRequest,
|
|
19
|
+
createSpawnBudgetState,
|
|
20
|
+
recordSpawnEnd,
|
|
21
|
+
recordSpawnStart,
|
|
22
|
+
} from "./harness-spawn-budget.js";
|
|
23
|
+
import {
|
|
24
|
+
isUsableApiKey,
|
|
25
|
+
resolveConcreteSubagentModel,
|
|
26
|
+
} from "./harness-subagent-auth.js";
|
|
27
|
+
import {
|
|
28
|
+
inferPhaseForPrecheck,
|
|
29
|
+
precheckHarnessSubagentSpawn,
|
|
30
|
+
} from "./harness-subagent-precheck.js";
|
|
31
|
+
|
|
32
|
+
const spawnBudget = createSpawnBudgetState();
|
|
33
|
+
let lastSessionId = "harness";
|
|
34
|
+
|
|
35
|
+
function maskApiKey(key: string | undefined): string | undefined {
|
|
36
|
+
if (!key) return undefined;
|
|
37
|
+
if (key.length <= 12) return "***";
|
|
38
|
+
return `${key.slice(0, 7)}…${key.slice(-4)}`;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// #region agent log
|
|
42
|
+
function agentDebugLog(
|
|
43
|
+
hypothesisId: string,
|
|
44
|
+
location: string,
|
|
45
|
+
message: string,
|
|
46
|
+
data: Record<string, unknown>,
|
|
47
|
+
): void {
|
|
48
|
+
fetch("http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0", {
|
|
49
|
+
method: "POST",
|
|
50
|
+
headers: {
|
|
51
|
+
"Content-Type": "application/json",
|
|
52
|
+
"X-Debug-Session-Id": "e762d5",
|
|
53
|
+
},
|
|
54
|
+
body: JSON.stringify({
|
|
55
|
+
sessionId: "e762d5",
|
|
56
|
+
hypothesisId,
|
|
57
|
+
location,
|
|
58
|
+
message,
|
|
59
|
+
data,
|
|
60
|
+
timestamp: Date.now(),
|
|
61
|
+
}),
|
|
62
|
+
}).catch(() => {});
|
|
63
|
+
}
|
|
64
|
+
// #endregion
|
|
65
|
+
|
|
66
|
+
async function resolveHarnessSpawnAuth(
|
|
67
|
+
ctx: ExtensionContext,
|
|
68
|
+
agent: AgentConfig,
|
|
69
|
+
): Promise<SpawnAuthForward | undefined> {
|
|
70
|
+
const parentModel = ctx.model
|
|
71
|
+
? { provider: ctx.model.provider, id: ctx.model.id }
|
|
72
|
+
: undefined;
|
|
73
|
+
const concrete = resolveConcreteSubagentModel(ctx.cwd, parentModel, agent);
|
|
74
|
+
if (!concrete) {
|
|
75
|
+
// #region agent log
|
|
76
|
+
agentDebugLog(
|
|
77
|
+
"D",
|
|
78
|
+
"harness-subagents-bridge.ts:resolveHarnessSpawnAuth",
|
|
79
|
+
"no concrete model",
|
|
80
|
+
{
|
|
81
|
+
agent: agent.name,
|
|
82
|
+
agentModel: agent.model,
|
|
83
|
+
parentModel: parentModel
|
|
84
|
+
? `${parentModel.provider}/${parentModel.id}`
|
|
85
|
+
: undefined,
|
|
86
|
+
},
|
|
87
|
+
);
|
|
88
|
+
// #endregion
|
|
89
|
+
return undefined;
|
|
90
|
+
}
|
|
91
|
+
const apiKey = await ctx.modelRegistry.getApiKeyForProvider(
|
|
92
|
+
concrete.provider,
|
|
93
|
+
);
|
|
94
|
+
// #region agent log
|
|
95
|
+
agentDebugLog(
|
|
96
|
+
"F",
|
|
97
|
+
"harness-subagents-bridge.ts:resolveHarnessSpawnAuth",
|
|
98
|
+
"concrete subprocess auth",
|
|
99
|
+
{
|
|
100
|
+
agent: agent.name,
|
|
101
|
+
parentModel: parentModel
|
|
102
|
+
? `${parentModel.provider}/${parentModel.id}`
|
|
103
|
+
: undefined,
|
|
104
|
+
concreteModel: concrete.modelRef,
|
|
105
|
+
routerProfile: concrete.routerProfile,
|
|
106
|
+
routerTier: concrete.routerTier,
|
|
107
|
+
apiKey: maskApiKey(apiKey),
|
|
108
|
+
usable: isUsableApiKey(apiKey),
|
|
109
|
+
},
|
|
110
|
+
);
|
|
111
|
+
// #endregion
|
|
112
|
+
if (!isUsableApiKey(apiKey)) return undefined;
|
|
113
|
+
return {
|
|
114
|
+
provider: concrete.provider,
|
|
115
|
+
modelRef: concrete.modelRef,
|
|
116
|
+
apiKey,
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export function createHarnessSubagentsExtension(
|
|
121
|
+
packageRoot: string,
|
|
122
|
+
): (pi: ExtensionAPI) => void {
|
|
123
|
+
const options: HarnessSubagentsOptions = {
|
|
124
|
+
packageRoot,
|
|
125
|
+
defaultAgentScope: "both",
|
|
126
|
+
defaultConfirmProjectAgents: false,
|
|
127
|
+
truncateDetails: true,
|
|
128
|
+
resolveSpawnAuth: resolveHarnessSpawnAuth,
|
|
129
|
+
beforeExecute: async (params, agents, ctx) => {
|
|
130
|
+
lastSessionId = ctx.sessionManager.getSessionId();
|
|
131
|
+
const { harnessCount } = countHarnessAgentsInRequest(
|
|
132
|
+
params as Parameters<typeof countHarnessAgentsInRequest>[0],
|
|
133
|
+
);
|
|
134
|
+
if (harnessCount > 0) {
|
|
135
|
+
const budget = checkHarnessSpawnBudget(spawnBudget, harnessCount);
|
|
136
|
+
if (!budget.ok) {
|
|
137
|
+
return { ok: false, message: budget.message };
|
|
138
|
+
}
|
|
139
|
+
const phase = inferPhaseForPrecheck(ctx.sessionManager.getEntries());
|
|
140
|
+
const pre = precheckHarnessSubagentSpawn(
|
|
141
|
+
params as Parameters<typeof precheckHarnessSubagentSpawn>[0],
|
|
142
|
+
agents,
|
|
143
|
+
phase,
|
|
144
|
+
);
|
|
145
|
+
if (!pre.ok) {
|
|
146
|
+
return { ok: false, message: pre.message };
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
return { ok: true };
|
|
150
|
+
},
|
|
151
|
+
onSpawnStart: (harnessCount) => {
|
|
152
|
+
if (harnessCount <= 0) return;
|
|
153
|
+
recordSpawnStart(spawnBudget, harnessCount);
|
|
154
|
+
captureHarnessEvent(lastSessionId, "harness_subagent_spawned", {
|
|
155
|
+
active_after: spawnBudget.active,
|
|
156
|
+
spawn_count: harnessCount,
|
|
157
|
+
});
|
|
158
|
+
},
|
|
159
|
+
onSpawnEnd: (harnessCount) => {
|
|
160
|
+
if (harnessCount <= 0) return;
|
|
161
|
+
recordSpawnEnd(spawnBudget, harnessCount);
|
|
162
|
+
},
|
|
163
|
+
onCompleted: ({ agents, mode, durationMs }) => {
|
|
164
|
+
if (agents.length === 0) return;
|
|
165
|
+
captureHarnessEvent(lastSessionId, "harness_subagent_completed", {
|
|
166
|
+
mode,
|
|
167
|
+
duration_ms: durationMs,
|
|
168
|
+
agent_count: agents.length,
|
|
169
|
+
});
|
|
170
|
+
},
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
return (pi: ExtensionAPI) => {
|
|
174
|
+
createSubagentsExtension(pi, options);
|
|
175
|
+
};
|
|
176
|
+
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { mkdir
|
|
1
|
+
import { mkdir } from "node:fs/promises";
|
|
2
2
|
import { dirname, resolve } from "node:path";
|
|
3
3
|
import {
|
|
4
4
|
canonicalPlanPath,
|
|
@@ -9,6 +9,8 @@ import {
|
|
|
9
9
|
saveRunContextToDisk,
|
|
10
10
|
validatePlanPacket,
|
|
11
11
|
} from "../../../lib/harness-run-context.js";
|
|
12
|
+
import { writeYamlFile } from "../../../lib/harness-yaml.js";
|
|
13
|
+
import { writePlanReviewMarkdown } from "./plan-review.js";
|
|
12
14
|
|
|
13
15
|
export const CREATE_PLAN_SNIPPET =
|
|
14
16
|
"create_plan({ plan_packet: { ...approved PlanPacket } })";
|
|
@@ -16,7 +18,7 @@ export const CREATE_PLAN_SNIPPET =
|
|
|
16
18
|
export const CREATE_PLAN_GUIDELINES = [
|
|
17
19
|
"Call create_plan only after the user approves via approve_plan (Approve selection).",
|
|
18
20
|
"Pass the same plan_packet you showed in approve_plan — path is resolved automatically.",
|
|
19
|
-
"Never use write or edit for plan-packet.
|
|
21
|
+
"Never use write or edit for plan-packet.yaml; create_plan is the only allowed plan write.",
|
|
20
22
|
];
|
|
21
23
|
|
|
22
24
|
export interface CreatePlanDeps {
|
|
@@ -88,11 +90,7 @@ export async function executeCreatePlan(
|
|
|
88
90
|
|
|
89
91
|
try {
|
|
90
92
|
await mkdir(dirname(planPath), { recursive: true });
|
|
91
|
-
await
|
|
92
|
-
planPath,
|
|
93
|
-
`${JSON.stringify(planPacket, null, 2)}\n`,
|
|
94
|
-
"utf-8",
|
|
95
|
-
);
|
|
93
|
+
await writeYamlFile(planPath, planPacket);
|
|
96
94
|
} catch (err) {
|
|
97
95
|
const msg = err instanceof Error ? err.message : String(err);
|
|
98
96
|
return { ok: false, error: `create_plan: write failed — ${msg}` };
|
|
@@ -116,6 +114,10 @@ export async function executeCreatePlan(
|
|
|
116
114
|
/* disk mirror best-effort */
|
|
117
115
|
}
|
|
118
116
|
|
|
117
|
+
await writePlanReviewMarkdown(deps.projectRoot, updated, planPacket, {
|
|
118
|
+
status: "committed",
|
|
119
|
+
});
|
|
120
|
+
|
|
119
121
|
deps.onCommitted(updated, planPacket, planPath);
|
|
120
122
|
|
|
121
123
|
return {
|
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
import { mkdir, writeFile } from "node:fs/promises";
|
|
2
|
+
import { dirname, resolve } from "node:path";
|
|
3
|
+
import {
|
|
4
|
+
canonicalPlanPath,
|
|
5
|
+
canonicalPlanReviewPath,
|
|
6
|
+
type HarnessRunContext,
|
|
7
|
+
type PlanPacketLike,
|
|
8
|
+
} from "../../../lib/harness-run-context.js";
|
|
9
|
+
import { formatPlanPacketLines } from "./format-plan.js";
|
|
10
|
+
import type { PlanResearchBrief } from "./types.js";
|
|
11
|
+
|
|
12
|
+
export {
|
|
13
|
+
canonicalPlanReviewPath,
|
|
14
|
+
PLAN_REVIEW_BASENAME,
|
|
15
|
+
} from "../../../lib/harness-run-context.js";
|
|
16
|
+
|
|
17
|
+
export type PlanReviewStatus = "draft" | "approved" | "committed";
|
|
18
|
+
|
|
19
|
+
function asRecord(value: unknown): Record<string, unknown> | null {
|
|
20
|
+
return value && typeof value === "object" && !Array.isArray(value)
|
|
21
|
+
? (value as Record<string, unknown>)
|
|
22
|
+
: null;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function str(value: unknown): string | null {
|
|
26
|
+
return typeof value === "string" && value.trim() ? value.trim() : null;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function strList(value: unknown): string[] {
|
|
30
|
+
if (!Array.isArray(value)) return [];
|
|
31
|
+
return value
|
|
32
|
+
.map((item) => (typeof item === "string" ? item.trim() : null))
|
|
33
|
+
.filter((item): item is string => Boolean(item));
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/** Render Darwin research sections for plan-review.md. */
|
|
37
|
+
export function formatResearchBriefMarkdown(
|
|
38
|
+
research: PlanResearchBrief | null | undefined,
|
|
39
|
+
): string {
|
|
40
|
+
if (!research) return "";
|
|
41
|
+
const lines: string[] = [];
|
|
42
|
+
const decomp = asRecord(research.decomposition);
|
|
43
|
+
const hyp = asRecord(research.hypothesis);
|
|
44
|
+
const evalBrief = asRecord(research.eval);
|
|
45
|
+
|
|
46
|
+
if (decomp) {
|
|
47
|
+
lines.push("## Phase 1 — Problem decomposition");
|
|
48
|
+
lines.push("");
|
|
49
|
+
const restate = str(decomp.problem_restatement);
|
|
50
|
+
if (restate) {
|
|
51
|
+
lines.push("**What is being asked?**");
|
|
52
|
+
lines.push("");
|
|
53
|
+
lines.push(restate);
|
|
54
|
+
lines.push("");
|
|
55
|
+
}
|
|
56
|
+
const types = strList(decomp.problem_types);
|
|
57
|
+
if (types.length) {
|
|
58
|
+
lines.push(`**Problem type(s):** ${types.join(", ")}`);
|
|
59
|
+
lines.push("");
|
|
60
|
+
}
|
|
61
|
+
const scope = asRecord(decomp.scope);
|
|
62
|
+
if (scope) {
|
|
63
|
+
const focus = str(scope.narrowed_focus);
|
|
64
|
+
if (focus) {
|
|
65
|
+
lines.push("**Scope:**");
|
|
66
|
+
lines.push("");
|
|
67
|
+
lines.push(focus);
|
|
68
|
+
lines.push("");
|
|
69
|
+
}
|
|
70
|
+
const excluded = strList(scope.excluded);
|
|
71
|
+
if (excluded.length) {
|
|
72
|
+
lines.push("**Excluded:**");
|
|
73
|
+
for (const item of excluded) lines.push(`- ${item}`);
|
|
74
|
+
lines.push("");
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
for (const [label, key] of [
|
|
78
|
+
["Hard constraints", "hard_constraints"],
|
|
79
|
+
["Soft constraints", "soft_constraints"],
|
|
80
|
+
["Success metrics", "success_metrics"],
|
|
81
|
+
] as const) {
|
|
82
|
+
const items = strList(decomp[key]);
|
|
83
|
+
if (items.length) {
|
|
84
|
+
lines.push(`**${label}:**`);
|
|
85
|
+
for (const item of items) lines.push(`- ${item}`);
|
|
86
|
+
lines.push("");
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
const prior = asRecord(decomp.prior_art);
|
|
90
|
+
if (prior) {
|
|
91
|
+
lines.push("**Prior art:**");
|
|
92
|
+
lines.push("");
|
|
93
|
+
const best = str(prior.best_approach);
|
|
94
|
+
const gap = str(prior.gap);
|
|
95
|
+
if (best) lines.push(`- Best approach: ${best}`);
|
|
96
|
+
if (gap) lines.push(`- Gap: ${gap}`);
|
|
97
|
+
for (const dead of strList(prior.dead_ends)) {
|
|
98
|
+
lines.push(`- Dead end: ${dead}`);
|
|
99
|
+
}
|
|
100
|
+
lines.push("");
|
|
101
|
+
}
|
|
102
|
+
const core = str(decomp.core_tension);
|
|
103
|
+
if (core) {
|
|
104
|
+
lines.push("**Core tension:**");
|
|
105
|
+
lines.push("");
|
|
106
|
+
lines.push(core);
|
|
107
|
+
lines.push("");
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if (hyp) {
|
|
112
|
+
lines.push("## Phase 2 — DARWIN hypothesis");
|
|
113
|
+
lines.push("");
|
|
114
|
+
const primary = asRecord(hyp.primary);
|
|
115
|
+
if (primary) {
|
|
116
|
+
for (const [label, key] of [
|
|
117
|
+
["Claim", "claim"],
|
|
118
|
+
["Mechanism", "mechanism"],
|
|
119
|
+
["Prediction", "prediction"],
|
|
120
|
+
["Experiment", "experiment"],
|
|
121
|
+
["Resolves tension", "tension_resolution"],
|
|
122
|
+
] as const) {
|
|
123
|
+
const text = str(primary[key]);
|
|
124
|
+
if (text) {
|
|
125
|
+
lines.push(`**${label}:** ${text}`);
|
|
126
|
+
lines.push("");
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
const fork = asRecord(hyp.dialectical_fork);
|
|
131
|
+
if (fork) {
|
|
132
|
+
const forkText = str(fork.fork);
|
|
133
|
+
if (forkText) {
|
|
134
|
+
lines.push(`**Dialectical fork:** ${forkText}`);
|
|
135
|
+
lines.push("");
|
|
136
|
+
}
|
|
137
|
+
const pathA = str(fork.path_a);
|
|
138
|
+
const pathB = str(fork.path_b);
|
|
139
|
+
if (pathA) lines.push(`- **Path A:** ${pathA}`);
|
|
140
|
+
if (pathB) lines.push(`- **Path B:** ${pathB}`);
|
|
141
|
+
lines.push("");
|
|
142
|
+
}
|
|
143
|
+
const alts = Array.isArray(hyp.alternatives) ? hyp.alternatives : [];
|
|
144
|
+
if (alts.length) {
|
|
145
|
+
lines.push("**Alternatives:**");
|
|
146
|
+
for (const alt of alts) {
|
|
147
|
+
const rec = asRecord(alt);
|
|
148
|
+
if (!rec) continue;
|
|
149
|
+
const claim = str(rec.claim);
|
|
150
|
+
const bet = str(rec.key_bet);
|
|
151
|
+
if (claim) lines.push(`- ${claim}${bet ? ` (bet: ${bet})` : ""}`);
|
|
152
|
+
}
|
|
153
|
+
lines.push("");
|
|
154
|
+
}
|
|
155
|
+
const steps = strList(hyp.recommended_next_steps);
|
|
156
|
+
if (steps.length) {
|
|
157
|
+
lines.push("**Recommended next steps:**");
|
|
158
|
+
for (const step of steps) lines.push(`1. ${step}`);
|
|
159
|
+
lines.push("");
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (evalBrief) {
|
|
164
|
+
lines.push("## Self-evaluation");
|
|
165
|
+
lines.push("");
|
|
166
|
+
lines.push("| Dimension | Score | Rationale |");
|
|
167
|
+
lines.push("|-----------|-------|-----------|");
|
|
168
|
+
const dims = asRecord(evalBrief.dimensions);
|
|
169
|
+
if (dims) {
|
|
170
|
+
for (const name of [
|
|
171
|
+
"novelty",
|
|
172
|
+
"coherence",
|
|
173
|
+
"testability",
|
|
174
|
+
"impact",
|
|
175
|
+
] as const) {
|
|
176
|
+
const dim = asRecord(dims[name]);
|
|
177
|
+
if (!dim) continue;
|
|
178
|
+
const score = typeof dim.score === "number" ? String(dim.score) : "?";
|
|
179
|
+
const rationale = str(dim.rationale) ?? "";
|
|
180
|
+
lines.push(`| ${name} | ${score}/100 | ${rationale} |`);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
const rel = asRecord(evalBrief.relevance);
|
|
184
|
+
if (rel) {
|
|
185
|
+
const passes = rel.passes === true ? "✓" : "✗";
|
|
186
|
+
const rationale = str(rel.rationale) ?? "";
|
|
187
|
+
lines.push(`| Relevance | ${passes} | ${rationale} |`);
|
|
188
|
+
}
|
|
189
|
+
lines.push("");
|
|
190
|
+
const summary = str(evalBrief.human_summary);
|
|
191
|
+
if (summary) {
|
|
192
|
+
lines.push(summary);
|
|
193
|
+
lines.push("");
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
return lines.length ? `${lines.join("\n")}\n` : "";
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
export function formatPlanPacketMarkdown(
|
|
201
|
+
packet: PlanPacketLike,
|
|
202
|
+
opts?: {
|
|
203
|
+
human_summary?: string | null;
|
|
204
|
+
status?: PlanReviewStatus;
|
|
205
|
+
plan_packet_path?: string | null;
|
|
206
|
+
research_brief?: PlanResearchBrief | null;
|
|
207
|
+
},
|
|
208
|
+
): string {
|
|
209
|
+
const lines: string[] = [];
|
|
210
|
+
const status = opts?.status ?? "draft";
|
|
211
|
+
lines.push("# Harness plan");
|
|
212
|
+
lines.push("");
|
|
213
|
+
lines.push(`- **Status:** ${status}`);
|
|
214
|
+
lines.push(`- **plan_id:** ${packet.plan_id ?? "?"}`);
|
|
215
|
+
lines.push(`- **task_id:** ${packet.task_id ?? "?"}`);
|
|
216
|
+
lines.push(
|
|
217
|
+
`- **risk_level:** ${typeof packet.risk_level === "string" ? packet.risk_level : "med"}`,
|
|
218
|
+
);
|
|
219
|
+
if (opts?.plan_packet_path) {
|
|
220
|
+
lines.push(`- **canonical JSON:** \`${opts.plan_packet_path}\``);
|
|
221
|
+
}
|
|
222
|
+
lines.push("");
|
|
223
|
+
if (opts?.human_summary?.trim()) {
|
|
224
|
+
lines.push("## Summary");
|
|
225
|
+
lines.push("");
|
|
226
|
+
lines.push(opts.human_summary.trim());
|
|
227
|
+
lines.push("");
|
|
228
|
+
}
|
|
229
|
+
const researchMd = formatResearchBriefMarkdown(opts?.research_brief);
|
|
230
|
+
if (researchMd) {
|
|
231
|
+
lines.push(researchMd.trimEnd());
|
|
232
|
+
lines.push("");
|
|
233
|
+
}
|
|
234
|
+
lines.push("## Plan packet");
|
|
235
|
+
lines.push("");
|
|
236
|
+
lines.push("```text");
|
|
237
|
+
for (const line of formatPlanPacketLines(packet, 100)) {
|
|
238
|
+
lines.push(line);
|
|
239
|
+
}
|
|
240
|
+
lines.push("```");
|
|
241
|
+
lines.push("");
|
|
242
|
+
if (status === "draft") {
|
|
243
|
+
lines.push(
|
|
244
|
+
"Review this file in your editor, then return to the harness TUI to **Approve**, **Request changes**, or **Cancel**.",
|
|
245
|
+
);
|
|
246
|
+
} else if (status === "approved") {
|
|
247
|
+
lines.push(
|
|
248
|
+
"Approved in the harness TUI. Waiting for `create_plan` to write `plan-packet.yaml`, or run `/harness-plan-commit` if that step failed.",
|
|
249
|
+
);
|
|
250
|
+
} else {
|
|
251
|
+
lines.push(
|
|
252
|
+
"Plan committed. Next: `/harness-run` to execute (do not pass `--plan` on the happy path).",
|
|
253
|
+
);
|
|
254
|
+
}
|
|
255
|
+
lines.push("");
|
|
256
|
+
return `${lines.join("\n")}\n`;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
export async function writePlanReviewMarkdown(
|
|
260
|
+
projectRoot: string,
|
|
261
|
+
runCtx: HarnessRunContext | null,
|
|
262
|
+
packet: PlanPacketLike,
|
|
263
|
+
opts?: {
|
|
264
|
+
human_summary?: string | null;
|
|
265
|
+
status?: PlanReviewStatus;
|
|
266
|
+
research_brief?: PlanResearchBrief | null;
|
|
267
|
+
},
|
|
268
|
+
): Promise<string | null> {
|
|
269
|
+
const runId = runCtx?.run_id;
|
|
270
|
+
if (!runId) return null;
|
|
271
|
+
const reviewPath = canonicalPlanReviewPath(runId, projectRoot);
|
|
272
|
+
const planPacketPath =
|
|
273
|
+
runCtx.plan_packet_path ?? canonicalPlanPath(runId, projectRoot);
|
|
274
|
+
const body = formatPlanPacketMarkdown(packet, {
|
|
275
|
+
human_summary: opts?.human_summary,
|
|
276
|
+
status: opts?.status ?? "draft",
|
|
277
|
+
plan_packet_path: planPacketPath,
|
|
278
|
+
research_brief: opts?.research_brief,
|
|
279
|
+
});
|
|
280
|
+
try {
|
|
281
|
+
await mkdir(dirname(reviewPath), { recursive: true });
|
|
282
|
+
await writeFile(reviewPath, body, "utf-8");
|
|
283
|
+
return reviewPath;
|
|
284
|
+
} catch {
|
|
285
|
+
return null;
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
interface SessionEntryLike {
|
|
290
|
+
type?: string;
|
|
291
|
+
customType?: string;
|
|
292
|
+
data?: unknown;
|
|
293
|
+
message?: {
|
|
294
|
+
role?: string;
|
|
295
|
+
toolName?: string;
|
|
296
|
+
details?: unknown;
|
|
297
|
+
content?: { type?: string; text?: string }[];
|
|
298
|
+
};
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/** Latest plan_packet from drafts, approve_plan tool results, or assistant JSON blocks. */
|
|
302
|
+
export function extractLatestPlanPacketFromEntries(
|
|
303
|
+
entries: unknown[],
|
|
304
|
+
): { packet: PlanPacketLike; human_summary?: string | null } | null {
|
|
305
|
+
let found: { packet: PlanPacketLike; human_summary?: string | null } | null =
|
|
306
|
+
null;
|
|
307
|
+
|
|
308
|
+
const consider = (
|
|
309
|
+
packet: PlanPacketLike | undefined,
|
|
310
|
+
human_summary?: string | null,
|
|
311
|
+
) => {
|
|
312
|
+
if (!packet || typeof packet !== "object") return;
|
|
313
|
+
if (!packet.plan_id && !packet.scope) return;
|
|
314
|
+
found = { packet, human_summary: human_summary ?? null };
|
|
315
|
+
};
|
|
316
|
+
|
|
317
|
+
for (let i = entries.length - 1; i >= 0; i--) {
|
|
318
|
+
const entry = entries[i] as SessionEntryLike;
|
|
319
|
+
if (entry.type === "custom" && entry.customType === "harness-plan-draft") {
|
|
320
|
+
const data = entry.data as {
|
|
321
|
+
plan_packet?: PlanPacketLike;
|
|
322
|
+
human_summary?: string | null;
|
|
323
|
+
};
|
|
324
|
+
consider(data.plan_packet, data.human_summary);
|
|
325
|
+
if (found) return found;
|
|
326
|
+
}
|
|
327
|
+
if (entry.type === "message" && entry.message?.role === "toolResult") {
|
|
328
|
+
const toolName = entry.message.toolName;
|
|
329
|
+
const details = entry.message.details as
|
|
330
|
+
| {
|
|
331
|
+
plan_packet?: PlanPacketLike;
|
|
332
|
+
human_summary?: string;
|
|
333
|
+
}
|
|
334
|
+
| undefined;
|
|
335
|
+
if (toolName === "approve_plan" && details?.plan_packet) {
|
|
336
|
+
consider(details.plan_packet, details.human_summary);
|
|
337
|
+
if (found) return found;
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
for (let i = entries.length - 1; i >= 0; i--) {
|
|
343
|
+
const entry = entries[i] as SessionEntryLike;
|
|
344
|
+
if (entry.type !== "message" || entry.message?.role !== "assistant") {
|
|
345
|
+
continue;
|
|
346
|
+
}
|
|
347
|
+
const blocks = entry.message.content ?? [];
|
|
348
|
+
for (const block of blocks) {
|
|
349
|
+
if (block.type !== "text" || !block.text) continue;
|
|
350
|
+
const match = block.text.match(/```json\s*([\s\S]*?)```/i);
|
|
351
|
+
if (!match) continue;
|
|
352
|
+
try {
|
|
353
|
+
const parsed = JSON.parse(match[1]) as {
|
|
354
|
+
plan_packet?: PlanPacketLike;
|
|
355
|
+
human_summary?: string;
|
|
356
|
+
};
|
|
357
|
+
if (parsed.plan_packet) {
|
|
358
|
+
consider(parsed.plan_packet, parsed.human_summary);
|
|
359
|
+
if (found) return found;
|
|
360
|
+
}
|
|
361
|
+
} catch {
|
|
362
|
+
/* ignore malformed assistant JSON */
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
return found;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
export async function syncPlannerPlanReviewToDisk(
|
|
371
|
+
projectRoot: string,
|
|
372
|
+
runCtx: HarnessRunContext | null,
|
|
373
|
+
entries: unknown[],
|
|
374
|
+
_opts?: { agentStatus?: string },
|
|
375
|
+
): Promise<string | null> {
|
|
376
|
+
const draft = extractLatestPlanPacketFromEntries(entries);
|
|
377
|
+
if (!draft) return null;
|
|
378
|
+
return writePlanReviewMarkdown(projectRoot, runCtx, draft.packet, {
|
|
379
|
+
human_summary: draft.human_summary,
|
|
380
|
+
status: "draft",
|
|
381
|
+
});
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
export function formatPlanReviewUserHint(reviewPath: string | null): string {
|
|
385
|
+
if (!reviewPath) {
|
|
386
|
+
return "No plan draft was captured yet. If the planner is still clarifying, answer in the subagent or re-run /harness-plan.";
|
|
387
|
+
}
|
|
388
|
+
const abs = resolve(reviewPath);
|
|
389
|
+
return (
|
|
390
|
+
`Full plan for editor review: ${abs}\n` +
|
|
391
|
+
`Open this markdown file in VS Code (or your editor), read the scope and acceptance checks, then return to the harness TUI to Approve / Request changes / Cancel.`
|
|
392
|
+
);
|
|
393
|
+
}
|