ultimate-pi 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/.agents/skills/harness-governor/SKILL.md +11 -0
  2. package/.agents/skills/harness-orchestration/SKILL.md +3 -1
  3. package/.agents/skills/harness-plan/SKILL.md +5 -5
  4. package/.pi/agents/harness/adversary.md +1 -1
  5. package/.pi/agents/harness/evaluator.md +1 -1
  6. package/.pi/agents/harness/executor.md +1 -1
  7. package/.pi/agents/harness/incident-recorder.md +1 -1
  8. package/.pi/agents/harness/meta-optimizer.md +1 -1
  9. package/.pi/agents/harness/planning/decompose.md +4 -33
  10. package/.pi/agents/harness/planning/execution-plan-author.md +3 -2
  11. package/.pi/agents/harness/planning/hypothesis-validator.md +3 -2
  12. package/.pi/agents/harness/planning/hypothesis.md +4 -27
  13. package/.pi/agents/harness/planning/implementation-researcher.md +3 -2
  14. package/.pi/agents/harness/planning/plan-adversary.md +2 -3
  15. package/.pi/agents/harness/planning/plan-evaluator.md +3 -2
  16. package/.pi/agents/harness/planning/review-integrator.md +2 -3
  17. package/.pi/agents/harness/planning/scout-graphify.md +3 -22
  18. package/.pi/agents/harness/planning/scout-semantic.md +3 -18
  19. package/.pi/agents/harness/planning/scout-structure.md +3 -18
  20. package/.pi/agents/harness/planning/sprint-contract-auditor.md +3 -2
  21. package/.pi/agents/harness/planning/stack-researcher.md +3 -2
  22. package/.pi/agents/harness/tie-breaker.md +1 -1
  23. package/.pi/agents/harness/trace-librarian.md +1 -1
  24. package/.pi/extensions/budget-guard.ts +33 -19
  25. package/.pi/extensions/harness-debate-tools.ts +54 -6
  26. package/.pi/extensions/harness-run-context.ts +108 -2
  27. package/.pi/extensions/harness-subagent-submit.ts +172 -0
  28. package/.pi/extensions/harness-telemetry.ts +29 -4
  29. package/.pi/extensions/lib/debate-bus-core.ts +49 -6
  30. package/.pi/extensions/lib/harness-subagent-auth.ts +104 -19
  31. package/.pi/extensions/lib/harness-subagent-policy.ts +59 -0
  32. package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +82 -0
  33. package/.pi/extensions/lib/harness-subagent-submit-registry.ts +172 -0
  34. package/.pi/extensions/lib/harness-subagents-bridge.ts +127 -0
  35. package/.pi/extensions/lib/plan-debate-eligibility.ts +61 -8
  36. package/.pi/extensions/lib/plan-debate-focus.ts +21 -9
  37. package/.pi/extensions/lib/plan-debate-gate.ts +92 -18
  38. package/.pi/extensions/lib/plan-debate-lane.ts +15 -0
  39. package/.pi/extensions/lib/plan-debate-lanes.ts +27 -3
  40. package/.pi/extensions/lib/plan-debate-round-status.ts +18 -7
  41. package/.pi/extensions/lib/plan-messenger.ts +4 -0
  42. package/.pi/extensions/lib/plan-review-gate.ts +51 -0
  43. package/.pi/extensions/trace-recorder.ts +1 -0
  44. package/.pi/harness/agents.manifest.json +22 -22
  45. package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md +31 -0
  46. package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md +23 -0
  47. package/.pi/harness/docs/adrs/README.md +2 -0
  48. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/implementation-research.yaml +28 -0
  49. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/review-round-consolidated.yaml +25 -0
  50. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-packet.yaml +196 -0
  51. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-review.md +14 -0
  52. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/research-brief.yaml +62 -0
  53. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +40 -17
  54. package/.pi/harness/specs/harness-executor-handoff.schema.json +19 -0
  55. package/.pi/harness/specs/harness-human-required.schema.json +16 -0
  56. package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
  57. package/.pi/harness/specs/plan-scout-findings.schema.json +19 -0
  58. package/.pi/lib/harness-agent-output.ts +45 -0
  59. package/.pi/lib/harness-budget-enforce.ts +18 -0
  60. package/.pi/lib/harness-schema-validate.ts +89 -0
  61. package/.pi/lib/harness-spawn-parse.ts +86 -0
  62. package/.pi/lib/harness-subagent-submit-path.ts +41 -0
  63. package/.pi/lib/harness-ui-state.ts +15 -2
  64. package/.pi/model-router.example.json +13 -4
  65. package/.pi/prompts/harness-auto.md +2 -2
  66. package/.pi/prompts/harness-plan.md +34 -14
  67. package/.pi/prompts/harness-run.md +2 -2
  68. package/.pi/prompts/harness-setup.md +4 -4
  69. package/.pi/scripts/harness-generate-model-router.mjs +118 -36
  70. package/.pi/scripts/harness-model-router-routing.test.mjs +97 -0
  71. package/.pi/scripts/harness-sync-model-router.mjs +15 -2
  72. package/.pi/scripts/harness-verify.mjs +31 -0
  73. package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
  74. package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
  75. package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
  76. package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
  77. package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
  78. package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
  79. package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
  80. package/CHANGELOG.md +21 -0
  81. package/package.json +4 -2
  82. package/vendor/pi-model-router/UPSTREAM_PIN.md +3 -1
  83. package/vendor/pi-model-router/extensions/commands.ts +4 -4
  84. package/vendor/pi-model-router/extensions/index.ts +21 -0
  85. package/vendor/pi-model-router/extensions/provider.ts +130 -79
  86. package/vendor/pi-model-router/extensions/routing.ts +148 -0
  87. package/vendor/pi-model-router/extensions/state.ts +3 -0
  88. package/vendor/pi-model-router/extensions/types.ts +9 -0
  89. package/vendor/pi-model-router/extensions/ui.ts +16 -2
  90. package/vendor/pi-subagents/src/subagents.ts +29 -3
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
3
  * smoke-harness-plan — fixture validation for plan-phase pipeline (CI).
4
- * Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light]
4
+ * Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light|minimal-med-fast]
5
5
  */
6
6
 
7
7
  import { access, readFile } from "node:fs/promises";
@@ -26,16 +26,23 @@ async function scanFocusCoverage(fixtureRoot, requiredFocus) {
26
26
  let last_round_index = 0;
27
27
  const { readdir } = await import("node:fs/promises");
28
28
  const files = (await readdir(art)).filter((f) =>
29
- /^review-round-r\d+\.yaml$/i.test(f),
29
+ /^review-round(?:-r\d+|-consolidated)\.yaml$/i.test(f),
30
30
  );
31
31
  for (const name of files.sort()) {
32
- const m = /^review-round-r(\d+)\.yaml$/i.exec(name);
32
+ const consolidated = /^review-round-consolidated\.yaml$/i.test(name);
33
+ const m = consolidated
34
+ ? ["review-round-consolidated.yaml", "1"]
35
+ : /^review-round-r(\d+)\.yaml$/i.exec(name);
33
36
  if (!m) continue;
34
- const roundIndex = Number(m[1]);
37
+ const roundIndex = consolidated ? 1 : Number(m[1]);
35
38
  if (roundIndex > last_round_index) last_round_index = roundIndex;
36
39
  const draft = parseYaml(await readFile(join(art, name), "utf-8"));
37
40
  const focus = String(draft.debate_round_focus ?? "").trim();
38
- if (requiredFocus.includes(focus)) covered.add(focus);
41
+ if (focus === "all") {
42
+ for (const f of requiredFocus) covered.add(f);
43
+ } else if (requiredFocus.includes(focus)) {
44
+ covered.add(focus);
45
+ }
39
46
  if (roundIndex === last_round_index) {
40
47
  last_review_gate_ready = draft.review_gate_ready === true;
41
48
  }
@@ -110,22 +117,33 @@ async function runFixture(name) {
110
117
  ok("research-brief.yaml structure");
111
118
 
112
119
  const isLight = name === "minimal-low-light";
113
- const requiredFocus = isLight ? ["spec", "quality"] : ["spec", "wbs", "schedule", "quality"];
114
- const debateRounds = isLight
115
- ? ["review-round-r1.yaml", "review-round-r2.yaml"]
116
- : [
117
- "review-round-r1.yaml",
118
- "review-round-r2.yaml",
119
- "review-round-r3.yaml",
120
- "review-round-r4.yaml",
121
- ];
120
+ const isFast = name === "minimal-med-fast";
121
+ const requiredFocus =
122
+ isLight || isFast
123
+ ? ["spec", "quality"]
124
+ : ["spec", "wbs", "schedule", "quality"];
125
+ const debateRounds = isFast
126
+ ? ["review-round-consolidated.yaml"]
127
+ : isLight
128
+ ? ["review-round-r1.yaml", "review-round-r2.yaml"]
129
+ : [
130
+ "review-round-r1.yaml",
131
+ "review-round-r2.yaml",
132
+ "review-round-r3.yaml",
133
+ "review-round-r4.yaml",
134
+ ];
122
135
  const seenFocus = new Set();
123
136
  for (const fileName of debateRounds) {
124
137
  const p = join(fixtureRoot, "artifacts", fileName);
125
138
  await access(p, constants.R_OK);
126
139
  const draft = parseYaml(await readFile(p, "utf-8"));
127
140
  if (!draft.schema_version) fail(`${fileName} missing schema_version`);
128
- if (draft.debate_round_focus) seenFocus.add(draft.debate_round_focus);
141
+ const f = String(draft.debate_round_focus ?? "").trim();
142
+ if (f === "all") {
143
+ for (const req of requiredFocus) seenFocus.add(req);
144
+ } else if (f) {
145
+ seenFocus.add(f);
146
+ }
129
147
  }
130
148
  for (const focus of requiredFocus) {
131
149
  if (!seenFocus.has(focus)) {
@@ -135,7 +153,7 @@ async function runFixture(name) {
135
153
  ok(`debate round YAML artifacts (${requiredFocus.length} focuses)`);
136
154
 
137
155
  const coverage = await scanFocusCoverage(fixtureRoot, requiredFocus);
138
- const minRounds = isLight ? 2 : 4;
156
+ const minRounds = isFast ? 1 : isLight ? 2 : 4;
139
157
  if (!planOutcomeComplete(coverage, requiredFocus, minRounds)) {
140
158
  fail("debate outcome incomplete for fixture coverage");
141
159
  }
@@ -144,6 +162,9 @@ async function runFixture(name) {
144
162
  if (isLight && packet.risk_level !== "low") {
145
163
  fail("minimal-low-light fixture must use risk_level low");
146
164
  }
165
+ if (isFast && packet.risk_level !== "med") {
166
+ fail("minimal-med-fast fixture must use risk_level med");
167
+ }
147
168
 
148
169
  console.log(`smoke-harness-plan: all ${name} fixture checks passed`);
149
170
  }
@@ -161,7 +182,9 @@ async function main() {
161
182
  );
162
183
  return;
163
184
  }
164
- fail("Usage: smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light] | --live");
185
+ fail(
186
+ "Usage: smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light|minimal-med-fast] | --live",
187
+ );
165
188
  }
166
189
 
167
190
  main().catch((err) => {
@@ -0,0 +1,19 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://ultimate-pi.local/.pi/harness/specs/harness-executor-handoff.schema.json",
4
+ "title": "HarnessExecutorHandoff",
5
+ "type": "object",
6
+ "additionalProperties": true,
7
+ "required": ["schema_version", "execution_status"],
8
+ "properties": {
9
+ "schema_version": { "type": "string", "const": "1.0.0" },
10
+ "execution_status": {
11
+ "type": "string",
12
+ "enum": ["completed", "blocked", "scope_drift"]
13
+ },
14
+ "files_changed": { "type": "array" },
15
+ "validation_summary": { "type": "string" },
16
+ "rollback_refs": { "type": "object" },
17
+ "handoff_ready": { "type": "object" }
18
+ }
19
+ }
@@ -0,0 +1,16 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://ultimate-pi.local/.pi/harness/specs/harness-human-required.schema.json",
4
+ "title": "HarnessHumanRequired",
5
+ "type": "object",
6
+ "additionalProperties": false,
7
+ "required": ["schema_version", "reason"],
8
+ "properties": {
9
+ "schema_version": { "type": "string", "const": "1.0.0" },
10
+ "reason": { "type": "string", "minLength": 1 },
11
+ "questions": {
12
+ "type": "array",
13
+ "items": { "type": "string" }
14
+ }
15
+ }
16
+ }
@@ -16,7 +16,7 @@
16
16
  "round_index": { "type": "integer", "minimum": 1, "maximum": 12 },
17
17
  "debate_round_focus": {
18
18
  "type": "string",
19
- "enum": ["spec", "wbs", "schedule", "quality"]
19
+ "enum": ["spec", "wbs", "schedule", "quality", "all"]
20
20
  },
21
21
  "round_summary": { "type": "string", "minLength": 1 },
22
22
  "validation_summary": { "type": "string" },
@@ -0,0 +1,19 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://ultimate-pi.local/.pi/harness/specs/plan-scout-findings.schema.json",
4
+ "title": "PlanScoutFindings",
5
+ "type": "object",
6
+ "additionalProperties": true,
7
+ "required": ["schema_version", "lane", "summary"],
8
+ "properties": {
9
+ "schema_version": { "type": "string", "const": "1.0.0" },
10
+ "lane": {
11
+ "type": "string",
12
+ "enum": ["graphify", "structure", "semantic"]
13
+ },
14
+ "scout_lane": { "type": "string" },
15
+ "summary": { "type": "string", "minLength": 1 },
16
+ "key_paths": { "type": "array", "items": { "type": "string" } },
17
+ "findings": { "type": "array" }
18
+ }
19
+ }
@@ -21,6 +21,51 @@ export function extractJsonBlock(text: string): string | null {
21
21
  return null;
22
22
  }
23
23
 
24
+ export interface ToolCallPartLike {
25
+ type?: string;
26
+ name?: string;
27
+ arguments?: Record<string, unknown>;
28
+ }
29
+
30
+ export interface MessageLike {
31
+ role?: string;
32
+ content?: ToolCallPartLike[] | unknown;
33
+ }
34
+
35
+ /** Last matching submit_* tool call in subprocess messages (chain-safe). */
36
+ export function extractLastSubmitCall(
37
+ messages: MessageLike[],
38
+ toolNames: string | string[],
39
+ ): { toolName: string; document: Record<string, unknown> } | null {
40
+ const allowed = new Set(
41
+ (Array.isArray(toolNames) ? toolNames : [toolNames]).map((n) => n.trim()),
42
+ );
43
+ let last: { toolName: string; document: Record<string, unknown> } | null =
44
+ null;
45
+ for (const msg of messages) {
46
+ if (msg.role !== "assistant" || !Array.isArray(msg.content)) continue;
47
+ for (const part of msg.content) {
48
+ if (part.type !== "toolCall" || !part.name) continue;
49
+ if (!allowed.has(part.name)) continue;
50
+ const doc = part.arguments?.document;
51
+ if (doc && typeof doc === "object" && !Array.isArray(doc)) {
52
+ last = {
53
+ toolName: part.name,
54
+ document: doc as Record<string, unknown>,
55
+ };
56
+ }
57
+ }
58
+ }
59
+ return last;
60
+ }
61
+
62
+ export function extractLastSubmitCallForAgent(
63
+ messages: MessageLike[],
64
+ agentToolNames: readonly string[],
65
+ ): { toolName: string; document: Record<string, unknown> } | null {
66
+ return extractLastSubmitCall(messages, [...agentToolNames]);
67
+ }
68
+
24
69
  export function parseHarnessAgentJson<T extends Record<string, unknown>>(
25
70
  text: string,
26
71
  ): { ok: true; value: T } | { ok: false; error: string } {
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Central switch for harness token/debate budget enforcement.
3
+ * Default: telemetry-only (HARNESS_BUDGET_ENFORCE off).
4
+ */
5
+
6
+ export function isHarnessBudgetEnforceOn(): boolean {
7
+ const raw = (process.env.HARNESS_BUDGET_ENFORCE ?? "off").toLowerCase();
8
+ return raw === "1" || raw === "true" || raw === "on";
9
+ }
10
+
11
+ /** When false, soft-limit and debate telemetry must not block UI or gates. */
12
+ export function shouldEmitBlockingBudgetExhausted(): boolean {
13
+ if (!isHarnessBudgetEnforceOn()) return false;
14
+ return (
15
+ process.env.HARNESS_BUDGET_HARD_STOP === "true" ||
16
+ process.env.HARNESS_DEBATE_HARD_STOP === "true"
17
+ );
18
+ }
@@ -0,0 +1,89 @@
1
+ /**
2
+ * JSON Schema validation for harness submit tools (Ajv draft 2020-12, offline).
3
+ */
4
+
5
+ import { appendFile, readFile } from "node:fs/promises";
6
+ import { join } from "node:path";
7
+ import Ajv2020 from "ajv/dist/2020";
8
+ import addFormats from "ajv-formats";
9
+
10
+ type ValidateFn = (data: unknown) => boolean;
11
+
12
+ const compileCache = new Map<string, ValidateFn>();
13
+ const DEBUG_LOG_PATH =
14
+ "/home/aryaniyaps/ai-projects/ultimate-pi/.cursor/debug-2ca12b.log";
15
+
16
+ let ajvSingleton: InstanceType<typeof Ajv2020> | null = null;
17
+
18
+ function getAjv(): InstanceType<typeof Ajv2020> {
19
+ if (!ajvSingleton) {
20
+ ajvSingleton = new Ajv2020({
21
+ allErrors: true,
22
+ strict: false,
23
+ validateSchema: false,
24
+ });
25
+ addFormats(ajvSingleton);
26
+ }
27
+ return ajvSingleton;
28
+ }
29
+
30
+ async function debugLog(
31
+ hypothesisId: string,
32
+ message: string,
33
+ data: Record<string, unknown>,
34
+ ): Promise<void> {
35
+ // #region agent log
36
+ try {
37
+ await appendFile(
38
+ DEBUG_LOG_PATH,
39
+ `${JSON.stringify({
40
+ sessionId: "2ca12b",
41
+ hypothesisId,
42
+ location: "harness-schema-validate.ts",
43
+ message,
44
+ data,
45
+ timestamp: Date.now(),
46
+ })}\n`,
47
+ );
48
+ } catch {
49
+ /* ignore */
50
+ }
51
+ // #endregion
52
+ }
53
+
54
+ export async function validateAgainstHarnessSchema(
55
+ specsDir: string,
56
+ schemaFile: string,
57
+ document: unknown,
58
+ ): Promise<{ ok: true } | { ok: false; errors: string[] }> {
59
+ const cacheKey = `${specsDir}:${schemaFile}`;
60
+ let validate = compileCache.get(cacheKey);
61
+ if (!validate) {
62
+ const schemaPath = join(specsDir, schemaFile);
63
+ const raw = await readFile(schemaPath, "utf-8");
64
+ const schema = JSON.parse(raw) as Record<string, unknown>;
65
+ try {
66
+ const ajv = getAjv();
67
+ const compiled = ajv.compile(schema);
68
+ validate = compiled;
69
+ compileCache.set(cacheKey, compiled);
70
+ await debugLog("H3", "schema compile ok", { schemaFile });
71
+ } catch (err) {
72
+ const msg = err instanceof Error ? err.message : String(err);
73
+ await debugLog("H3", "schema compile failed", { schemaFile, error: msg });
74
+ return { ok: false, errors: [`schema compile failed: ${msg}`] };
75
+ }
76
+ }
77
+ const ok = validate(document);
78
+ if (ok) return { ok: true };
79
+ const errors = (
80
+ (
81
+ validate as {
82
+ errors?: Array<{ instancePath?: string; message?: string }>;
83
+ }
84
+ ).errors ?? []
85
+ ).map((e: { instancePath?: string; message?: string }) =>
86
+ `${e.instancePath || "/"} ${e.message ?? "invalid"}`.trim(),
87
+ );
88
+ return { ok: false, errors };
89
+ }
@@ -0,0 +1,86 @@
1
+ /**
2
+ * Parse HarnessSpawnContext embedded in subagent task strings.
3
+ */
4
+
5
+ const SPAWN_CTX_EQ_RE = /HarnessSpawnContext\s*=\s*(\{[\s\S]*?\})(?:\s|$|\.)/;
6
+
7
+ export interface ParsedSpawnContext {
8
+ run_id?: string;
9
+ run_dir?: string;
10
+ agent?: string;
11
+ plan_packet_path?: string;
12
+ }
13
+
14
+ function extractBalancedJsonObject(s: string, start: number): string | null {
15
+ if (s[start] !== "{") return null;
16
+ let depth = 0;
17
+ let inString = false;
18
+ let escaped = false;
19
+ for (let i = start; i < s.length; i++) {
20
+ const ch = s[i];
21
+ if (inString) {
22
+ if (escaped) escaped = false;
23
+ else if (ch === "\\") escaped = true;
24
+ else if (ch === '"') inString = false;
25
+ continue;
26
+ }
27
+ if (ch === '"') {
28
+ inString = true;
29
+ continue;
30
+ }
31
+ if (ch === "{") depth++;
32
+ else if (ch === "}") {
33
+ depth--;
34
+ if (depth === 0) return s.slice(start, i + 1);
35
+ }
36
+ }
37
+ return null;
38
+ }
39
+
40
+ function normalizeSpawnContext(parsed: unknown): ParsedSpawnContext | null {
41
+ if (!parsed || typeof parsed !== "object") return null;
42
+ const o = parsed as Record<string, unknown>;
43
+ const run_id = typeof o.run_id === "string" ? o.run_id : undefined;
44
+ const run_dir = typeof o.run_dir === "string" ? o.run_dir : undefined;
45
+ const agent = typeof o.agent === "string" ? o.agent : undefined;
46
+ const plan_packet_path =
47
+ typeof o.plan_packet_path === "string" ? o.plan_packet_path : undefined;
48
+ if (!run_id && !run_dir) return null;
49
+ return { run_id, run_dir, agent, plan_packet_path };
50
+ }
51
+
52
+ export function parseSpawnContextFromTask(
53
+ task: string,
54
+ ): ParsedSpawnContext | null {
55
+ const eqMatch = SPAWN_CTX_EQ_RE.exec(task);
56
+ if (eqMatch?.[1]) {
57
+ try {
58
+ return normalizeSpawnContext(JSON.parse(eqMatch[1]));
59
+ } catch {
60
+ // fall through to JSON-object forms
61
+ }
62
+ }
63
+
64
+ const firstBrace = task.indexOf("{");
65
+ if (firstBrace >= 0) {
66
+ const blob = extractBalancedJsonObject(task, firstBrace);
67
+ if (blob) {
68
+ try {
69
+ const outer = JSON.parse(blob) as Record<string, unknown>;
70
+ if (
71
+ outer.HarnessSpawnContext &&
72
+ typeof outer.HarnessSpawnContext === "object"
73
+ ) {
74
+ return normalizeSpawnContext(outer.HarnessSpawnContext);
75
+ }
76
+ if (typeof outer.run_id === "string") {
77
+ return normalizeSpawnContext(outer);
78
+ }
79
+ } catch {
80
+ // ignore
81
+ }
82
+ }
83
+ }
84
+
85
+ return null;
86
+ }
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Resolve and guard harness run directories for subagent submit tools.
3
+ */
4
+
5
+ import { realpath } from "node:fs/promises";
6
+ import { join, resolve } from "node:path";
7
+
8
+ export function harnessRunsRoot(projectRoot: string): string {
9
+ return join(projectRoot, ".pi", "harness", "runs");
10
+ }
11
+
12
+ export async function resolveGuardedRunDir(opts: {
13
+ projectRoot: string;
14
+ runId: string;
15
+ runDirEnv?: string;
16
+ }): Promise<{ ok: true; runDir: string } | { ok: false; error: string }> {
17
+ const { projectRoot, runId } = opts;
18
+ if (!runId.trim()) {
19
+ return { ok: false, error: "run_id is required" };
20
+ }
21
+ const expected = join(harnessRunsRoot(projectRoot), runId);
22
+ let candidate = opts.runDirEnv?.trim()
23
+ ? resolve(projectRoot, opts.runDirEnv)
24
+ : expected;
25
+ try {
26
+ candidate = await realpath(candidate);
27
+ const expectedReal = await realpath(expected);
28
+ if (
29
+ candidate !== expectedReal &&
30
+ !candidate.startsWith(`${expectedReal}/`)
31
+ ) {
32
+ return {
33
+ ok: false,
34
+ error: `run_dir must stay under ${expectedReal}`,
35
+ };
36
+ }
37
+ return { ok: true, runDir: candidate };
38
+ } catch {
39
+ return { ok: false, error: `run directory not found for run_id=${runId}` };
40
+ }
41
+ }
@@ -1,4 +1,5 @@
1
1
  import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
2
+ import { shouldEmitBlockingBudgetExhausted } from "./harness-budget-enforce.js";
2
3
 
3
4
  export type HarnessPhase =
4
5
  | "plan"
@@ -133,6 +134,9 @@ const RELEVANT_CUSTOM_TYPES = new Set([
133
134
  "harness-consensus-packet",
134
135
  "harness-round-result",
135
136
  "harness-budget-exhausted",
137
+ "harness-budget-soft-limit",
138
+ "harness-budget-telemetry",
139
+ "harness-debate-budget-telemetry",
136
140
  "harness-review-integrity",
137
141
  "harness-test-integrity-flag",
138
142
  "harness-run-trace",
@@ -189,7 +193,7 @@ function deriveFlowSubstate(state: HarnessUiState): HarnessFlowSubstate {
189
193
  return "idle";
190
194
  }
191
195
 
192
- function createStateFromEntries(entries: unknown[]): HarnessUiState {
196
+ export function createStateFromEntries(entries: unknown[]): HarnessUiState {
193
197
  const latest = pickLatestCustomEntries(entries);
194
198
  const state: HarnessUiState = {
195
199
  ...DEFAULT_STATE,
@@ -212,7 +216,7 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
212
216
  const budget = latest.get("harness-budget-exhausted") as
213
217
  | BudgetExhaustedLike
214
218
  | undefined;
215
- if (budget) {
219
+ if (budget && shouldEmitBlockingBudgetExhausted()) {
216
220
  state.budgetExhausted = true;
217
221
  state.budgetReason =
218
222
  typeof budget.exhaustion_reason === "string"
@@ -223,6 +227,15 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
223
227
  const cap = asNumber(budget.caps?.debate_global_cap);
224
228
  if (cap != null) state.debateBudgetCap = cap;
225
229
  }
230
+ const telemetry = latest.get("harness-budget-telemetry") as
231
+ | BudgetExhaustedLike
232
+ | undefined;
233
+ if (telemetry && !state.budgetExhausted) {
234
+ const budgetUsed = asNumber(telemetry.budget_used);
235
+ if (budgetUsed != null) state.debateBudgetUsed = budgetUsed;
236
+ const cap = asNumber(telemetry.caps?.debate_global_cap);
237
+ if (cap != null) state.debateBudgetCap = cap;
238
+ }
226
239
 
227
240
  const testIntegrity = latest.get("harness-test-integrity-flag") as
228
241
  | TestIntegrityLike
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "defaultProfile": "auto",
3
3
  "debug": false,
4
- "classifierModel": "opencode-go/qwen3.6-plus",
4
+ "classifierModel": "openai/gpt-5.4-nano",
5
5
  "phaseBias": 0.5,
6
6
  "maxSessionBudget": 1.0,
7
7
  "largeContextThreshold": 100000,
@@ -16,12 +16,21 @@
16
16
  "profiles": {
17
17
  "auto": {
18
18
  "high": {
19
- "model": "opencode-go/deepseek-v4-pro",
19
+ "model": "openai/gpt-5.5",
20
20
  "thinking": "high",
21
- "fallbacks": ["opencode-go/qwen3.6-plus"]
21
+ "fallbacks": ["openai/gpt-5.4-nano"]
22
+ },
23
+ "medium": { "model": "openai/gpt-5.5", "thinking": "medium" },
24
+ "low": { "model": "openai/gpt-5.5", "thinking": "low" }
25
+ },
26
+ "opencode-go": {
27
+ "high": {
28
+ "model": "opencode-go/qwen3.6-plus",
29
+ "thinking": "high",
30
+ "fallbacks": ["opencode-go/deepseek-v4-flash"]
22
31
  },
23
32
  "medium": { "model": "opencode-go/qwen3.6-plus", "thinking": "medium" },
24
- "low": { "model": "opencode-go/deepseek-v4-flash", "thinking": "low" }
33
+ "low": { "model": "opencode-go/qwen3.6-plus", "thinking": "low" }
25
34
  }
26
35
  }
27
36
  }
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Full strict harness pipeline with locked governance decisions.
3
- argument-hint: "\"<task>\" [--quick] [--risk low|med|high] [--budget <amount>]"
3
+ argument-hint: "\"<task>\" [--quick] [--risk low|med|high]"
4
4
  ---
5
5
 
6
6
  # harness-auto
@@ -10,7 +10,7 @@ Pipeline orchestrator — one session, sequential phase handoffs. Invoke **harne
10
10
  ## Step 0 — Parse arguments
11
11
 
12
12
  - required task (quoted or first token)
13
- - optional: `--quick`, `--risk`, `--budget`
13
+ - optional: `--quick`, `--risk` (`--budget` reserved/no-op)
14
14
 
15
15
  If task missing:
16
16