ultimate-pi 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/.pi/agents/harness/planning/hypothesis.md +1 -1
  2. package/.pi/agents/harness/planning/implementation-researcher.md +1 -1
  3. package/.pi/extensions/harness-debate-tools.ts +12 -3
  4. package/.pi/extensions/harness-run-context.ts +12 -0
  5. package/.pi/extensions/harness-subagent-submit.ts +2 -25
  6. package/.pi/extensions/harness-telemetry.ts +29 -4
  7. package/.pi/extensions/lib/debate-bus-core.ts +15 -9
  8. package/.pi/extensions/lib/harness-subagent-auth.ts +104 -19
  9. package/.pi/extensions/lib/harness-subagent-policy.ts +14 -0
  10. package/.pi/extensions/lib/harness-subagents-bridge.ts +85 -0
  11. package/.pi/extensions/lib/plan-debate-eligibility.ts +61 -8
  12. package/.pi/extensions/lib/plan-debate-focus.ts +21 -9
  13. package/.pi/extensions/lib/plan-debate-gate.ts +80 -17
  14. package/.pi/extensions/lib/plan-debate-lanes.ts +27 -3
  15. package/.pi/extensions/lib/plan-debate-round-status.ts +18 -7
  16. package/.pi/extensions/lib/plan-messenger.ts +4 -0
  17. package/.pi/extensions/lib/plan-review-gate.ts +51 -0
  18. package/.pi/extensions/trace-recorder.ts +1 -0
  19. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/implementation-research.yaml +28 -0
  20. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/review-round-consolidated.yaml +25 -0
  21. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-packet.yaml +196 -0
  22. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-review.md +14 -0
  23. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/research-brief.yaml +62 -0
  24. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +40 -17
  25. package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
  26. package/.pi/model-router.example.json +13 -4
  27. package/.pi/prompts/harness-plan.md +25 -7
  28. package/.pi/prompts/harness-setup.md +4 -4
  29. package/.pi/scripts/harness-generate-model-router.mjs +118 -36
  30. package/.pi/scripts/harness-model-router-routing.test.mjs +97 -0
  31. package/.pi/scripts/harness-sync-model-router.mjs +15 -2
  32. package/.pi/scripts/harness-verify.mjs +29 -0
  33. package/CHANGELOG.md +11 -0
  34. package/package.json +1 -1
  35. package/vendor/pi-model-router/UPSTREAM_PIN.md +3 -1
  36. package/vendor/pi-model-router/extensions/commands.ts +4 -4
  37. package/vendor/pi-model-router/extensions/index.ts +21 -0
  38. package/vendor/pi-model-router/extensions/provider.ts +130 -79
  39. package/vendor/pi-model-router/extensions/routing.ts +148 -0
  40. package/vendor/pi-model-router/extensions/state.ts +3 -0
  41. package/vendor/pi-model-router/extensions/types.ts +9 -0
  42. package/vendor/pi-model-router/extensions/ui.ts +16 -2
@@ -63,4 +63,4 @@ Do **not** include self-evaluation scores — a separate agent handles that.
63
63
 
64
64
  ## Output
65
65
 
66
- Before ending, call `submit_hypothesis_brief` exactly once with the full `PlanHypothesisBrief` document. Do not paste the artifact as prose or a fenced JSON block — the tool write is the deliverable.
66
+ Before ending, call `submit_hypothesis_brief` exactly once with the full `PlanHypothesisBrief` document. The harness writes **`artifacts/hypothesis.yaml`** (YAML on disk). Do not use bash or any `*.json` path under `artifacts/`; do not paste the artifact as prose or a fenced JSON block — the submit tool is the deliverable.
@@ -31,7 +31,7 @@ Read `HarnessSpawnContext` plus paths to `artifacts/decomposition.yaml`, `artifa
31
31
 
32
32
  ## Output
33
33
 
34
- Before ending, call `submit_implementation_research` exactly once with the full document. Prose summary is optional; the artifact is the tool call.
34
+ Before ending, call `submit_implementation_research` exactly once with the full document. The harness writes **`artifacts/implementation-research.yaml`** (YAML on disk). Do not use bash or `implementation-research.json`; prose summary is optional the submit tool is the deliverable.
35
35
 
36
36
 
37
37
  ## Guardrails
@@ -192,7 +192,7 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
192
192
  name: "harness_plan_debate_eligibility",
193
193
  label: "Plan Debate Eligibility",
194
194
  description:
195
- "Pre-debate profile selection (full|standard|light). Call after DAG pass, before harness_debate_open. Uses risk, fork, implementation/stack briefs — not R1 hypothesis output.",
195
+ "Pre-debate profile selection (full|standard|light|fast). Call after DAG pass, before harness_debate_open. Uses risk, fork, implementation/stack briefs — not R1 hypothesis output.",
196
196
  parameters: Type.Object({
197
197
  risk_level: Type.Optional(
198
198
  Type.String({ description: "low | med | high" }),
@@ -250,6 +250,7 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
250
250
  const result = harnessPlanDebateEligibility(input);
251
251
  const lines = [
252
252
  `profile: ${result.profile}`,
253
+ `review_gate_mode: ${result.review_gate_strategy.mode}`,
253
254
  `required_focuses: ${result.required_focuses.join(", ")}`,
254
255
  `min_focus_rounds: ${result.min_focus_rounds}`,
255
256
  `debate_global_cap: ${result.debate_global_cap}`,
@@ -273,7 +274,7 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
273
274
  Type.String({ description: "Optional; normalized to plan-<run_id>" }),
274
275
  ),
275
276
  debate_profile: Type.Optional(
276
- Type.String({ description: "full | standard | light" }),
277
+ Type.String({ description: "full | standard | light | fast" }),
277
278
  ),
278
279
  required_focuses: Type.Optional(
279
280
  Type.Array(
@@ -297,7 +298,8 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
297
298
  const profile =
298
299
  p.debate_profile === "full" ||
299
300
  p.debate_profile === "standard" ||
300
- p.debate_profile === "light"
301
+ p.debate_profile === "light" ||
302
+ p.debate_profile === "fast"
301
303
  ? p.debate_profile
302
304
  : "standard";
303
305
  const required_focuses = (p.required_focuses ?? []).filter((f) =>
@@ -308,11 +310,14 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
308
310
  required_focuses:
309
311
  required_focuses.length > 0 ? required_focuses : undefined,
310
312
  });
313
+ const review_gate_mode =
314
+ profile === "fast" ? ("consolidated" as const) : ("threaded" as const);
311
315
  await initPlanMessenger(runDir(projectRoot, runId), {
312
316
  runId,
313
317
  debateId,
314
318
  debate_profile: profile,
315
319
  required_focuses: opened.required_focuses,
320
+ review_gate_mode,
316
321
  });
317
322
  const sessionId = ctx.sessionManager.getSessionId();
318
323
  captureHarnessEvent(sessionId, "harness_debate_round", {
@@ -325,11 +330,15 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
325
330
  const lines = [
326
331
  `Plan debate opened: ${debateId}`,
327
332
  `Profile: ${profile}`,
333
+ `Review gate mode: ${review_gate_mode}`,
328
334
  required_focuses.length
329
335
  ? `Required focuses: ${required_focuses.join(", ")}`
330
336
  : opened.required_focuses?.length
331
337
  ? `Required focuses: ${opened.required_focuses.join(", ")}`
332
338
  : "Required focuses: (default all four)",
339
+ review_gate_mode === "consolidated"
340
+ ? "Consolidated path: one review round (artifacts/review-round-consolidated.yaml); escalate to threaded rounds only on blockers."
341
+ : "Threaded path: one review round per focus (spec → wbs → schedule → quality).",
333
342
  `Messenger: debate-messenger/ (inbox + threads/round-N/transcript.jsonl)`,
334
343
  ];
335
344
  if (warning) lines.push(`Note: ${warning}`);
@@ -1025,6 +1025,18 @@ export default function harnessRunContext(pi: ExtensionAPI) {
1025
1025
  };
1026
1026
  }
1027
1027
  const relForGate = pathArg.replace(/\\/g, "/");
1028
+ if (/\.json$/i.test(relForGate) && relForGate.startsWith("artifacts/")) {
1029
+ return {
1030
+ content: [
1031
+ {
1032
+ type: "text",
1033
+ text: `Path not allowed: ${pathArg}. Plan artifacts under artifacts/ must be .yaml (use submit_* from subagents or write_harness_yaml with YAML content).`,
1034
+ },
1035
+ ],
1036
+ details: { path: pathArg },
1037
+ isError: true,
1038
+ };
1039
+ }
1028
1040
  if (
1029
1041
  isReviewRoundArtifactPath(relForGate) &&
1030
1042
  !isReviewRoundYamlWriteAllowed()
@@ -18,7 +18,8 @@ const MODULE_URL = import.meta.url;
18
18
  const DocumentSchema = Type.Object(
19
19
  {
20
20
  document: Type.Record(Type.String(), Type.Unknown(), {
21
- description: "Full artifact document matching the harness JSON schema",
21
+ description:
22
+ "Plan artifact fields (validated via plan-*.schema.json, persisted as canonical YAML on disk)",
22
23
  }),
23
24
  },
24
25
  { additionalProperties: false },
@@ -58,30 +59,6 @@ export default function harnessSubagentSubmit(pi: ExtensionAPI) {
58
59
  pi.on("tool_call", async (event) => {
59
60
  if (!event.toolName.startsWith("submit_")) return undefined;
60
61
  const subprocessOk = isSubprocessHarness();
61
- // #region agent log
62
- fetch("http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0", {
63
- method: "POST",
64
- headers: {
65
- "Content-Type": "application/json",
66
- "X-Debug-Session-Id": "2ca12b",
67
- },
68
- body: JSON.stringify({
69
- sessionId: "2ca12b",
70
- hypothesisId: "H2",
71
- location: "harness-subagent-submit.ts:tool_call",
72
- message: "submit tool_call gate",
73
- data: {
74
- toolName: event.toolName,
75
- PI_HARNESS_SUBPROCESS: process.env.PI_HARNESS_SUBPROCESS,
76
- HARNESS_RUN_ID: process.env.HARNESS_RUN_ID ?? null,
77
- HARNESS_RUN_DIR: process.env.HARNESS_RUN_DIR ?? null,
78
- HARNESS_AGENT_ID: process.env.HARNESS_AGENT_ID ?? null,
79
- subprocessOk,
80
- },
81
- timestamp: Date.now(),
82
- }),
83
- }).catch(() => {});
84
- // #endregion
85
62
  if (!subprocessOk) {
86
63
  return {
87
64
  block: true,
@@ -127,6 +127,7 @@ function propsFromRun(
127
127
  ): Record<string, unknown> {
128
128
  return {
129
129
  harness_run_id: runId,
130
+ run_id: runId,
130
131
  harness_plan_id: planId,
131
132
  harness_phase: phase,
132
133
  pi_session_id: distinctId,
@@ -134,6 +135,28 @@ function propsFromRun(
134
135
  };
135
136
  }
136
137
 
138
+ function normalizedRunId(
139
+ data: Record<string, unknown>,
140
+ trace: TraceState | null,
141
+ distinctId: string,
142
+ ): string {
143
+ const fromData = [
144
+ data.harness_run_id,
145
+ data.run_id,
146
+ data.runId,
147
+ data.debate_id,
148
+ ];
149
+ for (const candidate of fromData) {
150
+ if (typeof candidate === "string" && candidate.trim().length > 0) {
151
+ return candidate;
152
+ }
153
+ }
154
+ if (typeof trace?.run_id === "string" && trace.run_id.length > 0) {
155
+ return trace.run_id;
156
+ }
157
+ return distinctId;
158
+ }
159
+
137
160
  function mapCustomEntry(
138
161
  customType: string,
139
162
  data: Record<string, unknown>,
@@ -144,11 +167,9 @@ function mapCustomEntry(
144
167
  event: HarnessPostHogEventName;
145
168
  properties: Record<string, unknown>;
146
169
  } | null {
147
- const runId =
148
- (typeof data.run_id === "string" && data.run_id) ||
149
- trace?.run_id ||
150
- distinctId;
170
+ const runId = normalizedRunId(data, trace, distinctId);
151
171
  const planId =
172
+ (typeof data.harness_plan_id === "string" && data.harness_plan_id) ||
152
173
  (typeof data.plan_id === "string" && data.plan_id) ||
153
174
  policy?.planId ||
154
175
  trace?.plan_id ||
@@ -185,6 +206,7 @@ function mapCustomEntry(
185
206
  event: "harness_debate_consensus",
186
207
  properties: {
187
208
  ...base,
209
+ debate_id: String(data.debate_id ?? runId),
188
210
  consensus_id:
189
211
  typeof data.debate_id === "string" ? data.debate_id : runId,
190
212
  outcome: String(kind),
@@ -195,6 +217,8 @@ function mapCustomEntry(
195
217
  event: "harness_debate_round",
196
218
  properties: {
197
219
  ...base,
220
+ debate_id: String(data.debate_id ?? runId),
221
+ round_index: Number(data.round_index ?? data.round ?? 0),
198
222
  round: Number(data.round_index ?? data.round ?? 0),
199
223
  outcome: String(kind ?? "round"),
200
224
  },
@@ -206,6 +230,7 @@ function mapCustomEntry(
206
230
  event: "harness_debate_consensus",
207
231
  properties: {
208
232
  ...base,
233
+ debate_id: String(data.debate_id ?? runId),
209
234
  consensus_id:
210
235
  typeof data.consensus_id === "string"
211
236
  ? data.consensus_id
@@ -25,6 +25,7 @@ import {
25
25
  } from "./debate-bus-state.js";
26
26
  import {
27
27
  type DebateProfile,
28
+ PLAN_BUDGET_FAST,
28
29
  PLAN_BUDGET_LIGHT,
29
30
  PLAN_BUDGET_STANDARD,
30
31
  } from "./plan-debate-eligibility.js";
@@ -113,15 +114,20 @@ export function capsForDebate(
113
114
  } {
114
115
  if (isPlanDebateId(debateId)) {
115
116
  const active = profile ?? getDebateState()?.debate_profile ?? "standard";
116
- const budget = active === "light" ? PLAN_BUDGET_LIGHT : PLAN_BUDGET;
117
+ const budget =
118
+ active === "light"
119
+ ? PLAN_BUDGET_LIGHT
120
+ : active === "fast"
121
+ ? PLAN_BUDGET_FAST
122
+ : PLAN_BUDGET;
117
123
  const caps = { name: "plan" as const, ...budget };
118
124
  if (!isHarnessBudgetEnforceOn()) {
119
125
  return {
120
126
  ...caps,
121
- max_rounds: 999,
122
- max_exchanges_per_round: 99,
123
- round_token_cap: caps.round_token_cap * 100,
124
- debate_global_cap: caps.debate_global_cap * 100,
127
+ max_rounds: caps.max_rounds,
128
+ max_exchanges_per_round: Math.max(caps.max_exchanges_per_round, 2),
129
+ round_token_cap: caps.round_token_cap * 2,
130
+ debate_global_cap: caps.debate_global_cap * 2,
125
131
  };
126
132
  }
127
133
  return caps;
@@ -135,10 +141,10 @@ export function capsForDebate(
135
141
  if (!isHarnessBudgetEnforceOn()) {
136
142
  return {
137
143
  ...caps,
138
- max_rounds: 999,
139
- max_exchanges_per_round: 99,
140
- round_token_cap: caps.round_token_cap * 100,
141
- debate_global_cap: caps.debate_global_cap * 100,
144
+ max_rounds: caps.max_rounds,
145
+ max_exchanges_per_round: Math.max(caps.max_exchanges_per_round, 2),
146
+ round_token_cap: caps.round_token_cap * 2,
147
+ debate_global_cap: caps.debate_global_cap * 2,
142
148
  };
143
149
  }
144
150
  return caps;
@@ -1,23 +1,32 @@
1
1
  /**
2
2
  * Resolve concrete LLM credentials for harness subagent subprocesses.
3
3
  *
4
- * Parent sessions often use `router/auto` (pi-model-router). Subagents run with
4
+ * Parent sessions often use `router/<profile>` (pi-model-router). Subagents run with
5
5
  * `--no-extensions`, so they cannot use the logical router provider — they need
6
6
  * a real provider/model plus that provider's API key.
7
+ *
8
+ * Session-locked routing: subprocess model is chosen once from agent system prompt
9
+ * complexity (same analysis as parent session lock), not from per-turn parent tier.
7
10
  */
8
11
 
9
12
  import { existsSync, readFileSync } from "node:fs";
10
13
  import { join } from "node:path";
14
+ import { resolveTierFromPrompt } from "../../../vendor/pi-model-router/extensions/routing.js";
15
+ import type {
16
+ RouterProfile,
17
+ RouterTier,
18
+ RoutingRule,
19
+ } from "../../../vendor/pi-model-router/extensions/types.js";
11
20
  import type { AgentConfig } from "../../../vendor/pi-subagents/src/agents.js";
12
21
 
13
22
  const ROUTER_SENTINEL_KEY = "pi-model-router";
14
23
  const SENTINEL_API_KEYS = new Set([ROUTER_SENTINEL_KEY, "<authenticated>"]);
15
24
 
16
- type RouterTier = "high" | "medium" | "low";
17
-
18
25
  interface ModelRouterJson {
19
26
  defaultProfile?: string;
20
- profiles?: Record<string, Partial<Record<RouterTier, { model?: string }>>>;
27
+ phaseBias?: number;
28
+ rules?: RoutingRule[];
29
+ profiles?: Record<string, RouterProfile>;
21
30
  }
22
31
 
23
32
  export function isUsableApiKey(key: string | undefined): key is string {
@@ -35,7 +44,33 @@ export function parseModelRef(
35
44
  return { provider, modelId };
36
45
  }
37
46
 
38
- export function thinkingToRouterTier(thinking?: string): RouterTier {
47
+ /** Planning subagents that should prefer low/medium router tier for latency. */
48
+ const ROUTINE_PLANNING_AGENT_PATHS = new Set([
49
+ "harness/planning/plan-evaluator",
50
+ "harness/planning/plan-adversary",
51
+ "harness/planning/review-integrator",
52
+ "harness/planning/hypothesis-validator",
53
+ "harness/planning/sprint-contract-auditor",
54
+ "harness/planning/scout-structure",
55
+ "harness/planning/scout-semantic",
56
+ "harness/planning/decompose",
57
+ "harness/planning/hypothesis",
58
+ "harness/planning/stack-research",
59
+ "harness/planning/plan-validator",
60
+ ]);
61
+
62
+ export function isRoutinePlanningAgent(agentName: string): boolean {
63
+ return ROUTINE_PLANNING_AGENT_PATHS.has(agentName);
64
+ }
65
+
66
+ export function thinkingToRouterTier(
67
+ thinking?: string,
68
+ agentName?: string,
69
+ ): RouterTier {
70
+ if (agentName && isRoutinePlanningAgent(agentName)) {
71
+ if (thinking === "high" || thinking === "xhigh") return "medium";
72
+ return "low";
73
+ }
39
74
  if (thinking === "high" || thinking === "xhigh") return "high";
40
75
  if (thinking === "off" || thinking === "minimal" || thinking === "low") {
41
76
  return "low";
@@ -43,6 +78,64 @@ export function thinkingToRouterTier(thinking?: string): RouterTier {
43
78
  return "medium";
44
79
  }
45
80
 
81
+ function loadModelRouterConfig(cwd: string): ModelRouterJson | undefined {
82
+ const path = join(cwd, ".pi", "model-router.json");
83
+ if (!existsSync(path)) return undefined;
84
+ try {
85
+ return JSON.parse(readFileSync(path, "utf8")) as ModelRouterJson;
86
+ } catch {
87
+ return undefined;
88
+ }
89
+ }
90
+
91
+ function resolveRouterProfileEntry(
92
+ config: ModelRouterJson,
93
+ profileId: string,
94
+ ): { profileId: string; profile: RouterProfile } | undefined {
95
+ const profiles = config.profiles;
96
+ if (!profiles) return undefined;
97
+ const candidates = [
98
+ profileId,
99
+ config.defaultProfile ?? "auto",
100
+ "auto",
101
+ "opencode-go",
102
+ ];
103
+ const seen = new Set<string>();
104
+ for (const id of candidates) {
105
+ if (!id || seen.has(id)) continue;
106
+ seen.add(id);
107
+ const profile = profiles[id];
108
+ if (profile?.high?.model && profile.medium?.model && profile.low?.model) {
109
+ return { profileId: id, profile };
110
+ }
111
+ }
112
+ return undefined;
113
+ }
114
+
115
+ /** Tier from agent system prompt (+ optional task line) for session model lock. */
116
+ export function resolveSubagentRouterTier(
117
+ cwd: string,
118
+ profileId: string,
119
+ agent: AgentConfig,
120
+ taskSnippet?: string,
121
+ ): RouterTier {
122
+ const config = loadModelRouterConfig(cwd);
123
+ if (config) {
124
+ const entry = resolveRouterProfileEntry(config, profileId);
125
+ if (entry) {
126
+ return resolveTierFromPrompt(
127
+ agent.systemPrompt ?? "",
128
+ taskSnippet?.trim() ?? "",
129
+ entry.profileId,
130
+ entry.profile,
131
+ config.rules,
132
+ config.phaseBias ?? 0.5,
133
+ );
134
+ }
135
+ }
136
+ return thinkingToRouterTier(agent.thinking, agent.name);
137
+ }
138
+
46
139
  /** Map router profile tier → concrete `provider/model` from `.pi/model-router.json`. */
47
140
  export function resolveRouterConcreteModelRef(
48
141
  cwd: string,
@@ -51,19 +144,10 @@ export function resolveRouterConcreteModelRef(
51
144
  ): string | undefined {
52
145
  const path = join(cwd, ".pi", "model-router.json");
53
146
  if (!existsSync(path)) return undefined;
54
- let raw: ModelRouterJson;
55
- try {
56
- raw = JSON.parse(readFileSync(path, "utf8")) as ModelRouterJson;
57
- } catch {
58
- return undefined;
59
- }
60
- const profiles = raw.profiles;
61
- if (!profiles) return undefined;
62
- const profile =
63
- profiles[profileId] ??
64
- profiles[raw.defaultProfile ?? "auto"] ??
65
- profiles.auto;
66
- const model = profile?.[tier]?.model;
147
+ const raw = loadModelRouterConfig(cwd);
148
+ if (!raw) return undefined;
149
+ const entry = resolveRouterProfileEntry(raw, profileId);
150
+ const model = entry?.profile[tier]?.model;
67
151
  return typeof model === "string" && model.includes("/") ? model : undefined;
68
152
  }
69
153
 
@@ -83,6 +167,7 @@ export function resolveConcreteSubagentModel(
83
167
  cwd: string,
84
168
  parentModel: { provider: string; id: string } | undefined,
85
169
  agent: AgentConfig,
170
+ taskSnippet?: string,
86
171
  ): ConcreteSubagentModel | undefined {
87
172
  if (agent.model && !agent.model.startsWith("router/")) {
88
173
  const parsed = parseModelRef(agent.model);
@@ -109,7 +194,7 @@ export function resolveConcreteSubagentModel(
109
194
  agentIsRouter && agent.model
110
195
  ? agent.model.slice("router/".length)
111
196
  : (parentModel?.id ?? "auto");
112
- const tier = thinkingToRouterTier(agent.thinking);
197
+ const tier = resolveSubagentRouterTier(cwd, profileId, agent, taskSnippet);
113
198
  const concrete = resolveRouterConcreteModelRef(cwd, profileId, tier);
114
199
  if (!concrete) return undefined;
115
200
  const parsed = parseModelRef(concrete);
@@ -24,6 +24,9 @@ export type HarnessAgentKind =
24
24
 
25
25
  const MUTATING_TOOLS = new Set(["write", "edit"]);
26
26
 
27
+ /** Planning agents must use submit_* → canonical artifacts/*.yaml, not JSON dumps. */
28
+ const PLANNING_ARTIFACT_JSON_WRITE = /artifacts\/[^\s'"`;]+\.json\b/i;
29
+
27
30
  const PLANNING_BASH_DENY_PATTERNS = [
28
31
  /\bgraphify\s+update\b/i,
29
32
  /\bgraphify\s+extract\b/i,
@@ -174,6 +177,17 @@ export function evaluateHarnessSubagentToolCall(
174
177
 
175
178
  if (toolName === "bash") {
176
179
  const command = String(input?.command ?? "");
180
+ if (
181
+ kind === "planner" &&
182
+ command &&
183
+ PLANNING_ARTIFACT_JSON_WRITE.test(command)
184
+ ) {
185
+ return {
186
+ action: "block",
187
+ reason:
188
+ "harness-subagent-policy: artifacts must be YAML only — use submit_* (e.g. submit_hypothesis_brief → artifacts/hypothesis.yaml), not bash writes to .json.",
189
+ };
190
+ }
177
191
  if (command && isMutatingBash(command)) {
178
192
  return {
179
193
  action: "block",
@@ -13,6 +13,11 @@ import {
13
13
  type HarnessSubagentsOptions,
14
14
  type SpawnAuthForward,
15
15
  } from "../../../vendor/pi-subagents/src/subagents.js";
16
+ import {
17
+ getLatestRunContext,
18
+ getRunIdFromSession,
19
+ type HarnessPhase,
20
+ } from "../../lib/harness-run-context.js";
16
21
  import { parseSpawnContextFromTask } from "../../lib/harness-spawn-parse.js";
17
22
  import { harnessSubagentSubmitExtensionPath } from "../harness-subagent-submit.js";
18
23
  import { refreshHarnessCocoindexIndex } from "./harness-cocoindex-refresh.js";
@@ -35,6 +40,51 @@ import {
35
40
 
36
41
  const spawnBudget = createSpawnBudgetState();
37
42
  let lastSessionId = "harness";
43
+ let spawnGroupCounter = 0;
44
+ type PendingSpawnTelemetry = {
45
+ harness_run_id: string;
46
+ run_id: string;
47
+ harness_plan_id: string;
48
+ harness_phase: HarnessPhase;
49
+ agent_ids: string[];
50
+ spawn_group_id: string;
51
+ };
52
+ let pendingSpawnTelemetry: PendingSpawnTelemetry | null = null;
53
+
54
+ function collectHarnessAgentIds(params: Record<string, unknown>): string[] {
55
+ const out = new Set<string>();
56
+ const maybe = params as {
57
+ agent?: string;
58
+ chain?: Array<{ agent?: string }>;
59
+ tasks?: Array<{ agent?: string }>;
60
+ aggregator?: { agent?: string };
61
+ };
62
+ if (typeof maybe.agent === "string" && maybe.agent.startsWith("harness/")) {
63
+ out.add(maybe.agent);
64
+ }
65
+ for (const item of maybe.chain ?? []) {
66
+ if (typeof item?.agent === "string" && item.agent.startsWith("harness/")) {
67
+ out.add(item.agent);
68
+ }
69
+ }
70
+ for (const item of maybe.tasks ?? []) {
71
+ if (typeof item?.agent === "string" && item.agent.startsWith("harness/")) {
72
+ out.add(item.agent);
73
+ }
74
+ }
75
+ if (
76
+ typeof maybe.aggregator?.agent === "string" &&
77
+ maybe.aggregator.agent.startsWith("harness/")
78
+ ) {
79
+ out.add(maybe.aggregator.agent);
80
+ }
81
+ return Array.from(out.values()).sort();
82
+ }
83
+
84
+ function nextSpawnGroupId(sessionId: string): string {
85
+ spawnGroupCounter += 1;
86
+ return `${sessionId}-${Date.now()}-${spawnGroupCounter}`;
87
+ }
38
88
 
39
89
  async function resolveHarnessSpawnAuth(
40
90
  ctx: ExtensionContext,
@@ -111,11 +161,13 @@ export function createHarnessSubagentsExtension(
111
161
  const { harnessCount } = countHarnessAgentsInRequest(
112
162
  params as Parameters<typeof countHarnessAgentsInRequest>[0],
113
163
  );
164
+ pendingSpawnTelemetry = null;
114
165
  if (harnessCount > 0) {
115
166
  const budget = checkHarnessSpawnBudget(spawnBudget, harnessCount);
116
167
  if (!budget.ok) {
117
168
  return { ok: false, message: budget.message };
118
169
  }
170
+ const entries = ctx.sessionManager.getEntries();
119
171
  const phase = inferPhaseForPrecheck(ctx.sessionManager.getEntries());
120
172
  const pre = precheckHarnessSubagentSpawn(
121
173
  params as Parameters<typeof precheckHarnessSubagentSpawn>[0],
@@ -133,6 +185,19 @@ export function createHarnessSubagentsExtension(
133
185
  return { ok: false, message: refreshMsg };
134
186
  }
135
187
  }
188
+ const runCtx = getLatestRunContext(entries);
189
+ const runId =
190
+ runCtx?.run_id ??
191
+ getRunIdFromSession(entries, lastSessionId) ??
192
+ lastSessionId;
193
+ pendingSpawnTelemetry = {
194
+ harness_run_id: runId,
195
+ run_id: runId,
196
+ harness_plan_id: runCtx?.plan_id ?? "plan-unknown",
197
+ harness_phase: phase,
198
+ agent_ids: collectHarnessAgentIds(params as Record<string, unknown>),
199
+ spawn_group_id: nextSpawnGroupId(lastSessionId),
200
+ };
136
201
  }
137
202
  return { ok: true };
138
203
  },
@@ -142,6 +207,16 @@ export function createHarnessSubagentsExtension(
142
207
  captureHarnessEvent(lastSessionId, "harness_subagent_spawned", {
143
208
  active_after: spawnBudget.active,
144
209
  spawn_count: harnessCount,
210
+ harness_run_id: pendingSpawnTelemetry?.harness_run_id ?? lastSessionId,
211
+ run_id: pendingSpawnTelemetry?.run_id ?? lastSessionId,
212
+ harness_plan_id:
213
+ pendingSpawnTelemetry?.harness_plan_id ?? "plan-unknown",
214
+ harness_phase: pendingSpawnTelemetry?.harness_phase ?? "plan",
215
+ agent_ids: pendingSpawnTelemetry?.agent_ids ?? [],
216
+ agent_count: pendingSpawnTelemetry?.agent_ids.length ?? harnessCount,
217
+ spawn_group_id:
218
+ pendingSpawnTelemetry?.spawn_group_id ??
219
+ nextSpawnGroupId(lastSessionId),
145
220
  });
146
221
  },
147
222
  onSpawnEnd: (harnessCount) => {
@@ -154,7 +229,17 @@ export function createHarnessSubagentsExtension(
154
229
  mode,
155
230
  duration_ms: durationMs,
156
231
  agent_count: agents.length,
232
+ agent_ids: agents,
233
+ harness_run_id: pendingSpawnTelemetry?.harness_run_id ?? lastSessionId,
234
+ run_id: pendingSpawnTelemetry?.run_id ?? lastSessionId,
235
+ harness_plan_id:
236
+ pendingSpawnTelemetry?.harness_plan_id ?? "plan-unknown",
237
+ harness_phase: pendingSpawnTelemetry?.harness_phase ?? "plan",
238
+ spawn_group_id:
239
+ pendingSpawnTelemetry?.spawn_group_id ??
240
+ nextSpawnGroupId(lastSessionId),
157
241
  });
242
+ pendingSpawnTelemetry = null;
158
243
  },
159
244
  };
160
245