ultimate-pi 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.pi/agents/harness/planning/hypothesis.md +1 -1
- package/.pi/agents/harness/planning/implementation-researcher.md +1 -1
- package/.pi/extensions/harness-debate-tools.ts +12 -3
- package/.pi/extensions/harness-run-context.ts +12 -0
- package/.pi/extensions/harness-subagent-submit.ts +2 -25
- package/.pi/extensions/harness-telemetry.ts +29 -4
- package/.pi/extensions/lib/debate-bus-core.ts +15 -9
- package/.pi/extensions/lib/harness-subagent-auth.ts +104 -19
- package/.pi/extensions/lib/harness-subagent-policy.ts +14 -0
- package/.pi/extensions/lib/harness-subagents-bridge.ts +85 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +61 -8
- package/.pi/extensions/lib/plan-debate-focus.ts +21 -9
- package/.pi/extensions/lib/plan-debate-gate.ts +80 -17
- package/.pi/extensions/lib/plan-debate-lanes.ts +27 -3
- package/.pi/extensions/lib/plan-debate-round-status.ts +18 -7
- package/.pi/extensions/lib/plan-messenger.ts +4 -0
- package/.pi/extensions/lib/plan-review-gate.ts +51 -0
- package/.pi/extensions/trace-recorder.ts +1 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/review-round-consolidated.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/research-brief.yaml +62 -0
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +40 -17
- package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
- package/.pi/model-router.example.json +13 -4
- package/.pi/prompts/harness-plan.md +25 -7
- package/.pi/prompts/harness-setup.md +4 -4
- package/.pi/scripts/harness-generate-model-router.mjs +118 -36
- package/.pi/scripts/harness-model-router-routing.test.mjs +97 -0
- package/.pi/scripts/harness-sync-model-router.mjs +15 -2
- package/.pi/scripts/harness-verify.mjs +29 -0
- package/CHANGELOG.md +11 -0
- package/package.json +1 -1
- package/vendor/pi-model-router/UPSTREAM_PIN.md +3 -1
- package/vendor/pi-model-router/extensions/commands.ts +4 -4
- package/vendor/pi-model-router/extensions/index.ts +21 -0
- package/vendor/pi-model-router/extensions/provider.ts +130 -79
- package/vendor/pi-model-router/extensions/routing.ts +148 -0
- package/vendor/pi-model-router/extensions/state.ts +3 -0
- package/vendor/pi-model-router/extensions/types.ts +9 -0
- package/vendor/pi-model-router/extensions/ui.ts +16 -2
|
@@ -63,4 +63,4 @@ Do **not** include self-evaluation scores — a separate agent handles that.
|
|
|
63
63
|
|
|
64
64
|
## Output
|
|
65
65
|
|
|
66
|
-
Before ending, call `submit_hypothesis_brief` exactly once with the full `PlanHypothesisBrief` document. Do not paste the artifact as prose or a fenced JSON block — the tool
|
|
66
|
+
Before ending, call `submit_hypothesis_brief` exactly once with the full `PlanHypothesisBrief` document. The harness writes **`artifacts/hypothesis.yaml`** (YAML on disk). Do not use bash or any `*.json` path under `artifacts/`; do not paste the artifact as prose or a fenced JSON block — the submit tool is the deliverable.
|
|
@@ -31,7 +31,7 @@ Read `HarnessSpawnContext` plus paths to `artifacts/decomposition.yaml`, `artifa
|
|
|
31
31
|
|
|
32
32
|
## Output
|
|
33
33
|
|
|
34
|
-
Before ending, call `submit_implementation_research` exactly once with the full document.
|
|
34
|
+
Before ending, call `submit_implementation_research` exactly once with the full document. The harness writes **`artifacts/implementation-research.yaml`** (YAML on disk). Do not use bash or `implementation-research.json`; prose summary is optional — the submit tool is the deliverable.
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
## Guardrails
|
|
@@ -192,7 +192,7 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
|
|
|
192
192
|
name: "harness_plan_debate_eligibility",
|
|
193
193
|
label: "Plan Debate Eligibility",
|
|
194
194
|
description:
|
|
195
|
-
"Pre-debate profile selection (full|standard|light). Call after DAG pass, before harness_debate_open. Uses risk, fork, implementation/stack briefs — not R1 hypothesis output.",
|
|
195
|
+
"Pre-debate profile selection (full|standard|light|fast). Call after DAG pass, before harness_debate_open. Uses risk, fork, implementation/stack briefs — not R1 hypothesis output.",
|
|
196
196
|
parameters: Type.Object({
|
|
197
197
|
risk_level: Type.Optional(
|
|
198
198
|
Type.String({ description: "low | med | high" }),
|
|
@@ -250,6 +250,7 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
|
|
|
250
250
|
const result = harnessPlanDebateEligibility(input);
|
|
251
251
|
const lines = [
|
|
252
252
|
`profile: ${result.profile}`,
|
|
253
|
+
`review_gate_mode: ${result.review_gate_strategy.mode}`,
|
|
253
254
|
`required_focuses: ${result.required_focuses.join(", ")}`,
|
|
254
255
|
`min_focus_rounds: ${result.min_focus_rounds}`,
|
|
255
256
|
`debate_global_cap: ${result.debate_global_cap}`,
|
|
@@ -273,7 +274,7 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
|
|
|
273
274
|
Type.String({ description: "Optional; normalized to plan-<run_id>" }),
|
|
274
275
|
),
|
|
275
276
|
debate_profile: Type.Optional(
|
|
276
|
-
Type.String({ description: "full | standard | light" }),
|
|
277
|
+
Type.String({ description: "full | standard | light | fast" }),
|
|
277
278
|
),
|
|
278
279
|
required_focuses: Type.Optional(
|
|
279
280
|
Type.Array(
|
|
@@ -297,7 +298,8 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
|
|
|
297
298
|
const profile =
|
|
298
299
|
p.debate_profile === "full" ||
|
|
299
300
|
p.debate_profile === "standard" ||
|
|
300
|
-
p.debate_profile === "light"
|
|
301
|
+
p.debate_profile === "light" ||
|
|
302
|
+
p.debate_profile === "fast"
|
|
301
303
|
? p.debate_profile
|
|
302
304
|
: "standard";
|
|
303
305
|
const required_focuses = (p.required_focuses ?? []).filter((f) =>
|
|
@@ -308,11 +310,14 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
|
|
|
308
310
|
required_focuses:
|
|
309
311
|
required_focuses.length > 0 ? required_focuses : undefined,
|
|
310
312
|
});
|
|
313
|
+
const review_gate_mode =
|
|
314
|
+
profile === "fast" ? ("consolidated" as const) : ("threaded" as const);
|
|
311
315
|
await initPlanMessenger(runDir(projectRoot, runId), {
|
|
312
316
|
runId,
|
|
313
317
|
debateId,
|
|
314
318
|
debate_profile: profile,
|
|
315
319
|
required_focuses: opened.required_focuses,
|
|
320
|
+
review_gate_mode,
|
|
316
321
|
});
|
|
317
322
|
const sessionId = ctx.sessionManager.getSessionId();
|
|
318
323
|
captureHarnessEvent(sessionId, "harness_debate_round", {
|
|
@@ -325,11 +330,15 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
|
|
|
325
330
|
const lines = [
|
|
326
331
|
`Plan debate opened: ${debateId}`,
|
|
327
332
|
`Profile: ${profile}`,
|
|
333
|
+
`Review gate mode: ${review_gate_mode}`,
|
|
328
334
|
required_focuses.length
|
|
329
335
|
? `Required focuses: ${required_focuses.join(", ")}`
|
|
330
336
|
: opened.required_focuses?.length
|
|
331
337
|
? `Required focuses: ${opened.required_focuses.join(", ")}`
|
|
332
338
|
: "Required focuses: (default all four)",
|
|
339
|
+
review_gate_mode === "consolidated"
|
|
340
|
+
? "Consolidated path: one review round (artifacts/review-round-consolidated.yaml); escalate to threaded rounds only on blockers."
|
|
341
|
+
: "Threaded path: one review round per focus (spec → wbs → schedule → quality).",
|
|
333
342
|
`Messenger: debate-messenger/ (inbox + threads/round-N/transcript.jsonl)`,
|
|
334
343
|
];
|
|
335
344
|
if (warning) lines.push(`Note: ${warning}`);
|
|
@@ -1025,6 +1025,18 @@ export default function harnessRunContext(pi: ExtensionAPI) {
|
|
|
1025
1025
|
};
|
|
1026
1026
|
}
|
|
1027
1027
|
const relForGate = pathArg.replace(/\\/g, "/");
|
|
1028
|
+
if (/\.json$/i.test(relForGate) && relForGate.startsWith("artifacts/")) {
|
|
1029
|
+
return {
|
|
1030
|
+
content: [
|
|
1031
|
+
{
|
|
1032
|
+
type: "text",
|
|
1033
|
+
text: `Path not allowed: ${pathArg}. Plan artifacts under artifacts/ must be .yaml (use submit_* from subagents or write_harness_yaml with YAML content).`,
|
|
1034
|
+
},
|
|
1035
|
+
],
|
|
1036
|
+
details: { path: pathArg },
|
|
1037
|
+
isError: true,
|
|
1038
|
+
};
|
|
1039
|
+
}
|
|
1028
1040
|
if (
|
|
1029
1041
|
isReviewRoundArtifactPath(relForGate) &&
|
|
1030
1042
|
!isReviewRoundYamlWriteAllowed()
|
|
@@ -18,7 +18,8 @@ const MODULE_URL = import.meta.url;
|
|
|
18
18
|
const DocumentSchema = Type.Object(
|
|
19
19
|
{
|
|
20
20
|
document: Type.Record(Type.String(), Type.Unknown(), {
|
|
21
|
-
description:
|
|
21
|
+
description:
|
|
22
|
+
"Plan artifact fields (validated via plan-*.schema.json, persisted as canonical YAML on disk)",
|
|
22
23
|
}),
|
|
23
24
|
},
|
|
24
25
|
{ additionalProperties: false },
|
|
@@ -58,30 +59,6 @@ export default function harnessSubagentSubmit(pi: ExtensionAPI) {
|
|
|
58
59
|
pi.on("tool_call", async (event) => {
|
|
59
60
|
if (!event.toolName.startsWith("submit_")) return undefined;
|
|
60
61
|
const subprocessOk = isSubprocessHarness();
|
|
61
|
-
// #region agent log
|
|
62
|
-
fetch("http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0", {
|
|
63
|
-
method: "POST",
|
|
64
|
-
headers: {
|
|
65
|
-
"Content-Type": "application/json",
|
|
66
|
-
"X-Debug-Session-Id": "2ca12b",
|
|
67
|
-
},
|
|
68
|
-
body: JSON.stringify({
|
|
69
|
-
sessionId: "2ca12b",
|
|
70
|
-
hypothesisId: "H2",
|
|
71
|
-
location: "harness-subagent-submit.ts:tool_call",
|
|
72
|
-
message: "submit tool_call gate",
|
|
73
|
-
data: {
|
|
74
|
-
toolName: event.toolName,
|
|
75
|
-
PI_HARNESS_SUBPROCESS: process.env.PI_HARNESS_SUBPROCESS,
|
|
76
|
-
HARNESS_RUN_ID: process.env.HARNESS_RUN_ID ?? null,
|
|
77
|
-
HARNESS_RUN_DIR: process.env.HARNESS_RUN_DIR ?? null,
|
|
78
|
-
HARNESS_AGENT_ID: process.env.HARNESS_AGENT_ID ?? null,
|
|
79
|
-
subprocessOk,
|
|
80
|
-
},
|
|
81
|
-
timestamp: Date.now(),
|
|
82
|
-
}),
|
|
83
|
-
}).catch(() => {});
|
|
84
|
-
// #endregion
|
|
85
62
|
if (!subprocessOk) {
|
|
86
63
|
return {
|
|
87
64
|
block: true,
|
|
@@ -127,6 +127,7 @@ function propsFromRun(
|
|
|
127
127
|
): Record<string, unknown> {
|
|
128
128
|
return {
|
|
129
129
|
harness_run_id: runId,
|
|
130
|
+
run_id: runId,
|
|
130
131
|
harness_plan_id: planId,
|
|
131
132
|
harness_phase: phase,
|
|
132
133
|
pi_session_id: distinctId,
|
|
@@ -134,6 +135,28 @@ function propsFromRun(
|
|
|
134
135
|
};
|
|
135
136
|
}
|
|
136
137
|
|
|
138
|
+
function normalizedRunId(
|
|
139
|
+
data: Record<string, unknown>,
|
|
140
|
+
trace: TraceState | null,
|
|
141
|
+
distinctId: string,
|
|
142
|
+
): string {
|
|
143
|
+
const fromData = [
|
|
144
|
+
data.harness_run_id,
|
|
145
|
+
data.run_id,
|
|
146
|
+
data.runId,
|
|
147
|
+
data.debate_id,
|
|
148
|
+
];
|
|
149
|
+
for (const candidate of fromData) {
|
|
150
|
+
if (typeof candidate === "string" && candidate.trim().length > 0) {
|
|
151
|
+
return candidate;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
if (typeof trace?.run_id === "string" && trace.run_id.length > 0) {
|
|
155
|
+
return trace.run_id;
|
|
156
|
+
}
|
|
157
|
+
return distinctId;
|
|
158
|
+
}
|
|
159
|
+
|
|
137
160
|
function mapCustomEntry(
|
|
138
161
|
customType: string,
|
|
139
162
|
data: Record<string, unknown>,
|
|
@@ -144,11 +167,9 @@ function mapCustomEntry(
|
|
|
144
167
|
event: HarnessPostHogEventName;
|
|
145
168
|
properties: Record<string, unknown>;
|
|
146
169
|
} | null {
|
|
147
|
-
const runId =
|
|
148
|
-
(typeof data.run_id === "string" && data.run_id) ||
|
|
149
|
-
trace?.run_id ||
|
|
150
|
-
distinctId;
|
|
170
|
+
const runId = normalizedRunId(data, trace, distinctId);
|
|
151
171
|
const planId =
|
|
172
|
+
(typeof data.harness_plan_id === "string" && data.harness_plan_id) ||
|
|
152
173
|
(typeof data.plan_id === "string" && data.plan_id) ||
|
|
153
174
|
policy?.planId ||
|
|
154
175
|
trace?.plan_id ||
|
|
@@ -185,6 +206,7 @@ function mapCustomEntry(
|
|
|
185
206
|
event: "harness_debate_consensus",
|
|
186
207
|
properties: {
|
|
187
208
|
...base,
|
|
209
|
+
debate_id: String(data.debate_id ?? runId),
|
|
188
210
|
consensus_id:
|
|
189
211
|
typeof data.debate_id === "string" ? data.debate_id : runId,
|
|
190
212
|
outcome: String(kind),
|
|
@@ -195,6 +217,8 @@ function mapCustomEntry(
|
|
|
195
217
|
event: "harness_debate_round",
|
|
196
218
|
properties: {
|
|
197
219
|
...base,
|
|
220
|
+
debate_id: String(data.debate_id ?? runId),
|
|
221
|
+
round_index: Number(data.round_index ?? data.round ?? 0),
|
|
198
222
|
round: Number(data.round_index ?? data.round ?? 0),
|
|
199
223
|
outcome: String(kind ?? "round"),
|
|
200
224
|
},
|
|
@@ -206,6 +230,7 @@ function mapCustomEntry(
|
|
|
206
230
|
event: "harness_debate_consensus",
|
|
207
231
|
properties: {
|
|
208
232
|
...base,
|
|
233
|
+
debate_id: String(data.debate_id ?? runId),
|
|
209
234
|
consensus_id:
|
|
210
235
|
typeof data.consensus_id === "string"
|
|
211
236
|
? data.consensus_id
|
|
@@ -25,6 +25,7 @@ import {
|
|
|
25
25
|
} from "./debate-bus-state.js";
|
|
26
26
|
import {
|
|
27
27
|
type DebateProfile,
|
|
28
|
+
PLAN_BUDGET_FAST,
|
|
28
29
|
PLAN_BUDGET_LIGHT,
|
|
29
30
|
PLAN_BUDGET_STANDARD,
|
|
30
31
|
} from "./plan-debate-eligibility.js";
|
|
@@ -113,15 +114,20 @@ export function capsForDebate(
|
|
|
113
114
|
} {
|
|
114
115
|
if (isPlanDebateId(debateId)) {
|
|
115
116
|
const active = profile ?? getDebateState()?.debate_profile ?? "standard";
|
|
116
|
-
const budget =
|
|
117
|
+
const budget =
|
|
118
|
+
active === "light"
|
|
119
|
+
? PLAN_BUDGET_LIGHT
|
|
120
|
+
: active === "fast"
|
|
121
|
+
? PLAN_BUDGET_FAST
|
|
122
|
+
: PLAN_BUDGET;
|
|
117
123
|
const caps = { name: "plan" as const, ...budget };
|
|
118
124
|
if (!isHarnessBudgetEnforceOn()) {
|
|
119
125
|
return {
|
|
120
126
|
...caps,
|
|
121
|
-
max_rounds:
|
|
122
|
-
max_exchanges_per_round:
|
|
123
|
-
round_token_cap: caps.round_token_cap *
|
|
124
|
-
debate_global_cap: caps.debate_global_cap *
|
|
127
|
+
max_rounds: caps.max_rounds,
|
|
128
|
+
max_exchanges_per_round: Math.max(caps.max_exchanges_per_round, 2),
|
|
129
|
+
round_token_cap: caps.round_token_cap * 2,
|
|
130
|
+
debate_global_cap: caps.debate_global_cap * 2,
|
|
125
131
|
};
|
|
126
132
|
}
|
|
127
133
|
return caps;
|
|
@@ -135,10 +141,10 @@ export function capsForDebate(
|
|
|
135
141
|
if (!isHarnessBudgetEnforceOn()) {
|
|
136
142
|
return {
|
|
137
143
|
...caps,
|
|
138
|
-
max_rounds:
|
|
139
|
-
max_exchanges_per_round:
|
|
140
|
-
round_token_cap: caps.round_token_cap *
|
|
141
|
-
debate_global_cap: caps.debate_global_cap *
|
|
144
|
+
max_rounds: caps.max_rounds,
|
|
145
|
+
max_exchanges_per_round: Math.max(caps.max_exchanges_per_round, 2),
|
|
146
|
+
round_token_cap: caps.round_token_cap * 2,
|
|
147
|
+
debate_global_cap: caps.debate_global_cap * 2,
|
|
142
148
|
};
|
|
143
149
|
}
|
|
144
150
|
return caps;
|
|
@@ -1,23 +1,32 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Resolve concrete LLM credentials for harness subagent subprocesses.
|
|
3
3
|
*
|
|
4
|
-
* Parent sessions often use `router
|
|
4
|
+
* Parent sessions often use `router/<profile>` (pi-model-router). Subagents run with
|
|
5
5
|
* `--no-extensions`, so they cannot use the logical router provider — they need
|
|
6
6
|
* a real provider/model plus that provider's API key.
|
|
7
|
+
*
|
|
8
|
+
* Session-locked routing: subprocess model is chosen once from agent system prompt
|
|
9
|
+
* complexity (same analysis as parent session lock), not from per-turn parent tier.
|
|
7
10
|
*/
|
|
8
11
|
|
|
9
12
|
import { existsSync, readFileSync } from "node:fs";
|
|
10
13
|
import { join } from "node:path";
|
|
14
|
+
import { resolveTierFromPrompt } from "../../../vendor/pi-model-router/extensions/routing.js";
|
|
15
|
+
import type {
|
|
16
|
+
RouterProfile,
|
|
17
|
+
RouterTier,
|
|
18
|
+
RoutingRule,
|
|
19
|
+
} from "../../../vendor/pi-model-router/extensions/types.js";
|
|
11
20
|
import type { AgentConfig } from "../../../vendor/pi-subagents/src/agents.js";
|
|
12
21
|
|
|
13
22
|
const ROUTER_SENTINEL_KEY = "pi-model-router";
|
|
14
23
|
const SENTINEL_API_KEYS = new Set([ROUTER_SENTINEL_KEY, "<authenticated>"]);
|
|
15
24
|
|
|
16
|
-
type RouterTier = "high" | "medium" | "low";
|
|
17
|
-
|
|
18
25
|
interface ModelRouterJson {
|
|
19
26
|
defaultProfile?: string;
|
|
20
|
-
|
|
27
|
+
phaseBias?: number;
|
|
28
|
+
rules?: RoutingRule[];
|
|
29
|
+
profiles?: Record<string, RouterProfile>;
|
|
21
30
|
}
|
|
22
31
|
|
|
23
32
|
export function isUsableApiKey(key: string | undefined): key is string {
|
|
@@ -35,7 +44,33 @@ export function parseModelRef(
|
|
|
35
44
|
return { provider, modelId };
|
|
36
45
|
}
|
|
37
46
|
|
|
38
|
-
|
|
47
|
+
/** Planning subagents that should prefer low/medium router tier for latency. */
|
|
48
|
+
const ROUTINE_PLANNING_AGENT_PATHS = new Set([
|
|
49
|
+
"harness/planning/plan-evaluator",
|
|
50
|
+
"harness/planning/plan-adversary",
|
|
51
|
+
"harness/planning/review-integrator",
|
|
52
|
+
"harness/planning/hypothesis-validator",
|
|
53
|
+
"harness/planning/sprint-contract-auditor",
|
|
54
|
+
"harness/planning/scout-structure",
|
|
55
|
+
"harness/planning/scout-semantic",
|
|
56
|
+
"harness/planning/decompose",
|
|
57
|
+
"harness/planning/hypothesis",
|
|
58
|
+
"harness/planning/stack-research",
|
|
59
|
+
"harness/planning/plan-validator",
|
|
60
|
+
]);
|
|
61
|
+
|
|
62
|
+
export function isRoutinePlanningAgent(agentName: string): boolean {
|
|
63
|
+
return ROUTINE_PLANNING_AGENT_PATHS.has(agentName);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export function thinkingToRouterTier(
|
|
67
|
+
thinking?: string,
|
|
68
|
+
agentName?: string,
|
|
69
|
+
): RouterTier {
|
|
70
|
+
if (agentName && isRoutinePlanningAgent(agentName)) {
|
|
71
|
+
if (thinking === "high" || thinking === "xhigh") return "medium";
|
|
72
|
+
return "low";
|
|
73
|
+
}
|
|
39
74
|
if (thinking === "high" || thinking === "xhigh") return "high";
|
|
40
75
|
if (thinking === "off" || thinking === "minimal" || thinking === "low") {
|
|
41
76
|
return "low";
|
|
@@ -43,6 +78,64 @@ export function thinkingToRouterTier(thinking?: string): RouterTier {
|
|
|
43
78
|
return "medium";
|
|
44
79
|
}
|
|
45
80
|
|
|
81
|
+
function loadModelRouterConfig(cwd: string): ModelRouterJson | undefined {
|
|
82
|
+
const path = join(cwd, ".pi", "model-router.json");
|
|
83
|
+
if (!existsSync(path)) return undefined;
|
|
84
|
+
try {
|
|
85
|
+
return JSON.parse(readFileSync(path, "utf8")) as ModelRouterJson;
|
|
86
|
+
} catch {
|
|
87
|
+
return undefined;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function resolveRouterProfileEntry(
|
|
92
|
+
config: ModelRouterJson,
|
|
93
|
+
profileId: string,
|
|
94
|
+
): { profileId: string; profile: RouterProfile } | undefined {
|
|
95
|
+
const profiles = config.profiles;
|
|
96
|
+
if (!profiles) return undefined;
|
|
97
|
+
const candidates = [
|
|
98
|
+
profileId,
|
|
99
|
+
config.defaultProfile ?? "auto",
|
|
100
|
+
"auto",
|
|
101
|
+
"opencode-go",
|
|
102
|
+
];
|
|
103
|
+
const seen = new Set<string>();
|
|
104
|
+
for (const id of candidates) {
|
|
105
|
+
if (!id || seen.has(id)) continue;
|
|
106
|
+
seen.add(id);
|
|
107
|
+
const profile = profiles[id];
|
|
108
|
+
if (profile?.high?.model && profile.medium?.model && profile.low?.model) {
|
|
109
|
+
return { profileId: id, profile };
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
return undefined;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/** Tier from agent system prompt (+ optional task line) for session model lock. */
|
|
116
|
+
export function resolveSubagentRouterTier(
|
|
117
|
+
cwd: string,
|
|
118
|
+
profileId: string,
|
|
119
|
+
agent: AgentConfig,
|
|
120
|
+
taskSnippet?: string,
|
|
121
|
+
): RouterTier {
|
|
122
|
+
const config = loadModelRouterConfig(cwd);
|
|
123
|
+
if (config) {
|
|
124
|
+
const entry = resolveRouterProfileEntry(config, profileId);
|
|
125
|
+
if (entry) {
|
|
126
|
+
return resolveTierFromPrompt(
|
|
127
|
+
agent.systemPrompt ?? "",
|
|
128
|
+
taskSnippet?.trim() ?? "",
|
|
129
|
+
entry.profileId,
|
|
130
|
+
entry.profile,
|
|
131
|
+
config.rules,
|
|
132
|
+
config.phaseBias ?? 0.5,
|
|
133
|
+
);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
return thinkingToRouterTier(agent.thinking, agent.name);
|
|
137
|
+
}
|
|
138
|
+
|
|
46
139
|
/** Map router profile tier → concrete `provider/model` from `.pi/model-router.json`. */
|
|
47
140
|
export function resolveRouterConcreteModelRef(
|
|
48
141
|
cwd: string,
|
|
@@ -51,19 +144,10 @@ export function resolveRouterConcreteModelRef(
|
|
|
51
144
|
): string | undefined {
|
|
52
145
|
const path = join(cwd, ".pi", "model-router.json");
|
|
53
146
|
if (!existsSync(path)) return undefined;
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
return undefined;
|
|
59
|
-
}
|
|
60
|
-
const profiles = raw.profiles;
|
|
61
|
-
if (!profiles) return undefined;
|
|
62
|
-
const profile =
|
|
63
|
-
profiles[profileId] ??
|
|
64
|
-
profiles[raw.defaultProfile ?? "auto"] ??
|
|
65
|
-
profiles.auto;
|
|
66
|
-
const model = profile?.[tier]?.model;
|
|
147
|
+
const raw = loadModelRouterConfig(cwd);
|
|
148
|
+
if (!raw) return undefined;
|
|
149
|
+
const entry = resolveRouterProfileEntry(raw, profileId);
|
|
150
|
+
const model = entry?.profile[tier]?.model;
|
|
67
151
|
return typeof model === "string" && model.includes("/") ? model : undefined;
|
|
68
152
|
}
|
|
69
153
|
|
|
@@ -83,6 +167,7 @@ export function resolveConcreteSubagentModel(
|
|
|
83
167
|
cwd: string,
|
|
84
168
|
parentModel: { provider: string; id: string } | undefined,
|
|
85
169
|
agent: AgentConfig,
|
|
170
|
+
taskSnippet?: string,
|
|
86
171
|
): ConcreteSubagentModel | undefined {
|
|
87
172
|
if (agent.model && !agent.model.startsWith("router/")) {
|
|
88
173
|
const parsed = parseModelRef(agent.model);
|
|
@@ -109,7 +194,7 @@ export function resolveConcreteSubagentModel(
|
|
|
109
194
|
agentIsRouter && agent.model
|
|
110
195
|
? agent.model.slice("router/".length)
|
|
111
196
|
: (parentModel?.id ?? "auto");
|
|
112
|
-
const tier =
|
|
197
|
+
const tier = resolveSubagentRouterTier(cwd, profileId, agent, taskSnippet);
|
|
113
198
|
const concrete = resolveRouterConcreteModelRef(cwd, profileId, tier);
|
|
114
199
|
if (!concrete) return undefined;
|
|
115
200
|
const parsed = parseModelRef(concrete);
|
|
@@ -24,6 +24,9 @@ export type HarnessAgentKind =
|
|
|
24
24
|
|
|
25
25
|
const MUTATING_TOOLS = new Set(["write", "edit"]);
|
|
26
26
|
|
|
27
|
+
/** Planning agents must use submit_* → canonical artifacts/*.yaml, not JSON dumps. */
|
|
28
|
+
const PLANNING_ARTIFACT_JSON_WRITE = /artifacts\/[^\s'"`;]+\.json\b/i;
|
|
29
|
+
|
|
27
30
|
const PLANNING_BASH_DENY_PATTERNS = [
|
|
28
31
|
/\bgraphify\s+update\b/i,
|
|
29
32
|
/\bgraphify\s+extract\b/i,
|
|
@@ -174,6 +177,17 @@ export function evaluateHarnessSubagentToolCall(
|
|
|
174
177
|
|
|
175
178
|
if (toolName === "bash") {
|
|
176
179
|
const command = String(input?.command ?? "");
|
|
180
|
+
if (
|
|
181
|
+
kind === "planner" &&
|
|
182
|
+
command &&
|
|
183
|
+
PLANNING_ARTIFACT_JSON_WRITE.test(command)
|
|
184
|
+
) {
|
|
185
|
+
return {
|
|
186
|
+
action: "block",
|
|
187
|
+
reason:
|
|
188
|
+
"harness-subagent-policy: artifacts must be YAML only — use submit_* (e.g. submit_hypothesis_brief → artifacts/hypothesis.yaml), not bash writes to .json.",
|
|
189
|
+
};
|
|
190
|
+
}
|
|
177
191
|
if (command && isMutatingBash(command)) {
|
|
178
192
|
return {
|
|
179
193
|
action: "block",
|
|
@@ -13,6 +13,11 @@ import {
|
|
|
13
13
|
type HarnessSubagentsOptions,
|
|
14
14
|
type SpawnAuthForward,
|
|
15
15
|
} from "../../../vendor/pi-subagents/src/subagents.js";
|
|
16
|
+
import {
|
|
17
|
+
getLatestRunContext,
|
|
18
|
+
getRunIdFromSession,
|
|
19
|
+
type HarnessPhase,
|
|
20
|
+
} from "../../lib/harness-run-context.js";
|
|
16
21
|
import { parseSpawnContextFromTask } from "../../lib/harness-spawn-parse.js";
|
|
17
22
|
import { harnessSubagentSubmitExtensionPath } from "../harness-subagent-submit.js";
|
|
18
23
|
import { refreshHarnessCocoindexIndex } from "./harness-cocoindex-refresh.js";
|
|
@@ -35,6 +40,51 @@ import {
|
|
|
35
40
|
|
|
36
41
|
const spawnBudget = createSpawnBudgetState();
|
|
37
42
|
let lastSessionId = "harness";
|
|
43
|
+
let spawnGroupCounter = 0;
|
|
44
|
+
type PendingSpawnTelemetry = {
|
|
45
|
+
harness_run_id: string;
|
|
46
|
+
run_id: string;
|
|
47
|
+
harness_plan_id: string;
|
|
48
|
+
harness_phase: HarnessPhase;
|
|
49
|
+
agent_ids: string[];
|
|
50
|
+
spawn_group_id: string;
|
|
51
|
+
};
|
|
52
|
+
let pendingSpawnTelemetry: PendingSpawnTelemetry | null = null;
|
|
53
|
+
|
|
54
|
+
function collectHarnessAgentIds(params: Record<string, unknown>): string[] {
|
|
55
|
+
const out = new Set<string>();
|
|
56
|
+
const maybe = params as {
|
|
57
|
+
agent?: string;
|
|
58
|
+
chain?: Array<{ agent?: string }>;
|
|
59
|
+
tasks?: Array<{ agent?: string }>;
|
|
60
|
+
aggregator?: { agent?: string };
|
|
61
|
+
};
|
|
62
|
+
if (typeof maybe.agent === "string" && maybe.agent.startsWith("harness/")) {
|
|
63
|
+
out.add(maybe.agent);
|
|
64
|
+
}
|
|
65
|
+
for (const item of maybe.chain ?? []) {
|
|
66
|
+
if (typeof item?.agent === "string" && item.agent.startsWith("harness/")) {
|
|
67
|
+
out.add(item.agent);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
for (const item of maybe.tasks ?? []) {
|
|
71
|
+
if (typeof item?.agent === "string" && item.agent.startsWith("harness/")) {
|
|
72
|
+
out.add(item.agent);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
if (
|
|
76
|
+
typeof maybe.aggregator?.agent === "string" &&
|
|
77
|
+
maybe.aggregator.agent.startsWith("harness/")
|
|
78
|
+
) {
|
|
79
|
+
out.add(maybe.aggregator.agent);
|
|
80
|
+
}
|
|
81
|
+
return Array.from(out.values()).sort();
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function nextSpawnGroupId(sessionId: string): string {
|
|
85
|
+
spawnGroupCounter += 1;
|
|
86
|
+
return `${sessionId}-${Date.now()}-${spawnGroupCounter}`;
|
|
87
|
+
}
|
|
38
88
|
|
|
39
89
|
async function resolveHarnessSpawnAuth(
|
|
40
90
|
ctx: ExtensionContext,
|
|
@@ -111,11 +161,13 @@ export function createHarnessSubagentsExtension(
|
|
|
111
161
|
const { harnessCount } = countHarnessAgentsInRequest(
|
|
112
162
|
params as Parameters<typeof countHarnessAgentsInRequest>[0],
|
|
113
163
|
);
|
|
164
|
+
pendingSpawnTelemetry = null;
|
|
114
165
|
if (harnessCount > 0) {
|
|
115
166
|
const budget = checkHarnessSpawnBudget(spawnBudget, harnessCount);
|
|
116
167
|
if (!budget.ok) {
|
|
117
168
|
return { ok: false, message: budget.message };
|
|
118
169
|
}
|
|
170
|
+
const entries = ctx.sessionManager.getEntries();
|
|
119
171
|
const phase = inferPhaseForPrecheck(ctx.sessionManager.getEntries());
|
|
120
172
|
const pre = precheckHarnessSubagentSpawn(
|
|
121
173
|
params as Parameters<typeof precheckHarnessSubagentSpawn>[0],
|
|
@@ -133,6 +185,19 @@ export function createHarnessSubagentsExtension(
|
|
|
133
185
|
return { ok: false, message: refreshMsg };
|
|
134
186
|
}
|
|
135
187
|
}
|
|
188
|
+
const runCtx = getLatestRunContext(entries);
|
|
189
|
+
const runId =
|
|
190
|
+
runCtx?.run_id ??
|
|
191
|
+
getRunIdFromSession(entries, lastSessionId) ??
|
|
192
|
+
lastSessionId;
|
|
193
|
+
pendingSpawnTelemetry = {
|
|
194
|
+
harness_run_id: runId,
|
|
195
|
+
run_id: runId,
|
|
196
|
+
harness_plan_id: runCtx?.plan_id ?? "plan-unknown",
|
|
197
|
+
harness_phase: phase,
|
|
198
|
+
agent_ids: collectHarnessAgentIds(params as Record<string, unknown>),
|
|
199
|
+
spawn_group_id: nextSpawnGroupId(lastSessionId),
|
|
200
|
+
};
|
|
136
201
|
}
|
|
137
202
|
return { ok: true };
|
|
138
203
|
},
|
|
@@ -142,6 +207,16 @@ export function createHarnessSubagentsExtension(
|
|
|
142
207
|
captureHarnessEvent(lastSessionId, "harness_subagent_spawned", {
|
|
143
208
|
active_after: spawnBudget.active,
|
|
144
209
|
spawn_count: harnessCount,
|
|
210
|
+
harness_run_id: pendingSpawnTelemetry?.harness_run_id ?? lastSessionId,
|
|
211
|
+
run_id: pendingSpawnTelemetry?.run_id ?? lastSessionId,
|
|
212
|
+
harness_plan_id:
|
|
213
|
+
pendingSpawnTelemetry?.harness_plan_id ?? "plan-unknown",
|
|
214
|
+
harness_phase: pendingSpawnTelemetry?.harness_phase ?? "plan",
|
|
215
|
+
agent_ids: pendingSpawnTelemetry?.agent_ids ?? [],
|
|
216
|
+
agent_count: pendingSpawnTelemetry?.agent_ids.length ?? harnessCount,
|
|
217
|
+
spawn_group_id:
|
|
218
|
+
pendingSpawnTelemetry?.spawn_group_id ??
|
|
219
|
+
nextSpawnGroupId(lastSessionId),
|
|
145
220
|
});
|
|
146
221
|
},
|
|
147
222
|
onSpawnEnd: (harnessCount) => {
|
|
@@ -154,7 +229,17 @@ export function createHarnessSubagentsExtension(
|
|
|
154
229
|
mode,
|
|
155
230
|
duration_ms: durationMs,
|
|
156
231
|
agent_count: agents.length,
|
|
232
|
+
agent_ids: agents,
|
|
233
|
+
harness_run_id: pendingSpawnTelemetry?.harness_run_id ?? lastSessionId,
|
|
234
|
+
run_id: pendingSpawnTelemetry?.run_id ?? lastSessionId,
|
|
235
|
+
harness_plan_id:
|
|
236
|
+
pendingSpawnTelemetry?.harness_plan_id ?? "plan-unknown",
|
|
237
|
+
harness_phase: pendingSpawnTelemetry?.harness_phase ?? "plan",
|
|
238
|
+
spawn_group_id:
|
|
239
|
+
pendingSpawnTelemetry?.spawn_group_id ??
|
|
240
|
+
nextSpawnGroupId(lastSessionId),
|
|
157
241
|
});
|
|
242
|
+
pendingSpawnTelemetry = null;
|
|
158
243
|
},
|
|
159
244
|
};
|
|
160
245
|
|