ultimate-pi 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/.agents/skills/harness-governor/SKILL.md +11 -0
  2. package/.agents/skills/harness-orchestration/SKILL.md +3 -1
  3. package/.agents/skills/harness-plan/SKILL.md +5 -5
  4. package/.pi/agents/harness/adversary.md +1 -1
  5. package/.pi/agents/harness/evaluator.md +1 -1
  6. package/.pi/agents/harness/executor.md +1 -1
  7. package/.pi/agents/harness/incident-recorder.md +1 -1
  8. package/.pi/agents/harness/meta-optimizer.md +1 -1
  9. package/.pi/agents/harness/planning/decompose.md +4 -33
  10. package/.pi/agents/harness/planning/execution-plan-author.md +3 -2
  11. package/.pi/agents/harness/planning/hypothesis-validator.md +3 -2
  12. package/.pi/agents/harness/planning/hypothesis.md +4 -27
  13. package/.pi/agents/harness/planning/implementation-researcher.md +3 -2
  14. package/.pi/agents/harness/planning/plan-adversary.md +2 -3
  15. package/.pi/agents/harness/planning/plan-evaluator.md +3 -2
  16. package/.pi/agents/harness/planning/review-integrator.md +2 -3
  17. package/.pi/agents/harness/planning/scout-graphify.md +3 -22
  18. package/.pi/agents/harness/planning/scout-semantic.md +3 -18
  19. package/.pi/agents/harness/planning/scout-structure.md +3 -18
  20. package/.pi/agents/harness/planning/sprint-contract-auditor.md +3 -2
  21. package/.pi/agents/harness/planning/stack-researcher.md +3 -2
  22. package/.pi/agents/harness/tie-breaker.md +1 -1
  23. package/.pi/agents/harness/trace-librarian.md +1 -1
  24. package/.pi/extensions/budget-guard.ts +33 -19
  25. package/.pi/extensions/harness-debate-tools.ts +54 -6
  26. package/.pi/extensions/harness-run-context.ts +108 -2
  27. package/.pi/extensions/harness-subagent-submit.ts +172 -0
  28. package/.pi/extensions/harness-telemetry.ts +29 -4
  29. package/.pi/extensions/lib/debate-bus-core.ts +49 -6
  30. package/.pi/extensions/lib/harness-subagent-auth.ts +104 -19
  31. package/.pi/extensions/lib/harness-subagent-policy.ts +59 -0
  32. package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +82 -0
  33. package/.pi/extensions/lib/harness-subagent-submit-registry.ts +172 -0
  34. package/.pi/extensions/lib/harness-subagents-bridge.ts +127 -0
  35. package/.pi/extensions/lib/plan-debate-eligibility.ts +61 -8
  36. package/.pi/extensions/lib/plan-debate-focus.ts +21 -9
  37. package/.pi/extensions/lib/plan-debate-gate.ts +92 -18
  38. package/.pi/extensions/lib/plan-debate-lane.ts +15 -0
  39. package/.pi/extensions/lib/plan-debate-lanes.ts +27 -3
  40. package/.pi/extensions/lib/plan-debate-round-status.ts +18 -7
  41. package/.pi/extensions/lib/plan-messenger.ts +4 -0
  42. package/.pi/extensions/lib/plan-review-gate.ts +51 -0
  43. package/.pi/extensions/trace-recorder.ts +1 -0
  44. package/.pi/harness/agents.manifest.json +22 -22
  45. package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md +31 -0
  46. package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md +23 -0
  47. package/.pi/harness/docs/adrs/README.md +2 -0
  48. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/implementation-research.yaml +28 -0
  49. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/review-round-consolidated.yaml +25 -0
  50. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-packet.yaml +196 -0
  51. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-review.md +14 -0
  52. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/research-brief.yaml +62 -0
  53. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +40 -17
  54. package/.pi/harness/specs/harness-executor-handoff.schema.json +19 -0
  55. package/.pi/harness/specs/harness-human-required.schema.json +16 -0
  56. package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
  57. package/.pi/harness/specs/plan-scout-findings.schema.json +19 -0
  58. package/.pi/lib/harness-agent-output.ts +45 -0
  59. package/.pi/lib/harness-budget-enforce.ts +18 -0
  60. package/.pi/lib/harness-schema-validate.ts +89 -0
  61. package/.pi/lib/harness-spawn-parse.ts +86 -0
  62. package/.pi/lib/harness-subagent-submit-path.ts +41 -0
  63. package/.pi/lib/harness-ui-state.ts +15 -2
  64. package/.pi/model-router.example.json +13 -4
  65. package/.pi/prompts/harness-auto.md +2 -2
  66. package/.pi/prompts/harness-plan.md +34 -14
  67. package/.pi/prompts/harness-run.md +2 -2
  68. package/.pi/prompts/harness-setup.md +4 -4
  69. package/.pi/scripts/harness-generate-model-router.mjs +118 -36
  70. package/.pi/scripts/harness-model-router-routing.test.mjs +97 -0
  71. package/.pi/scripts/harness-sync-model-router.mjs +15 -2
  72. package/.pi/scripts/harness-verify.mjs +31 -0
  73. package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
  74. package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
  75. package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
  76. package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
  77. package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
  78. package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
  79. package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
  80. package/CHANGELOG.md +21 -0
  81. package/package.json +4 -2
  82. package/vendor/pi-model-router/UPSTREAM_PIN.md +3 -1
  83. package/vendor/pi-model-router/extensions/commands.ts +4 -4
  84. package/vendor/pi-model-router/extensions/index.ts +21 -0
  85. package/vendor/pi-model-router/extensions/provider.ts +130 -79
  86. package/vendor/pi-model-router/extensions/routing.ts +148 -0
  87. package/vendor/pi-model-router/extensions/state.ts +3 -0
  88. package/vendor/pi-model-router/extensions/types.ts +9 -0
  89. package/vendor/pi-model-router/extensions/ui.ts +16 -2
  90. package/vendor/pi-subagents/src/subagents.ts +29 -3
@@ -7,12 +7,15 @@ import { access } from "node:fs/promises";
7
7
  import { join } from "node:path";
8
8
  import { capsForDebate } from "./debate-bus-core.js";
9
9
  import {
10
- type PlanDebateFocus,
10
+ type PlanDebateRoundFocus,
11
11
  readDebateRoundFocus,
12
12
  } from "./plan-debate-focus.js";
13
13
  import { planDebateIdForRun } from "./plan-debate-id.js";
14
14
  import { laneArtifactPath } from "./plan-debate-lane.js";
15
- import { lanesForRound } from "./plan-debate-lanes.js";
15
+ import {
16
+ lanesForConsolidatedRound,
17
+ lanesForRound,
18
+ } from "./plan-debate-lanes.js";
16
19
  import {
17
20
  getMessengerRoundState,
18
21
  loadMessengerState,
@@ -40,26 +43,32 @@ export interface RoundStatusResult {
40
43
  dialogue: { ok: boolean; errors: string[] };
41
44
  unresolved_claim_ids: string[];
42
45
  exchange_count: number;
43
- debate_round_focus?: PlanDebateFocus | null;
46
+ debate_round_focus?: PlanDebateRoundFocus | null;
44
47
  }
45
48
 
46
49
  export async function getPlanDebateRoundStatus(
47
50
  runDir: string,
48
51
  roundIndex: number,
49
52
  runId?: string,
50
- opts?: { debate_round_focus?: PlanDebateFocus },
53
+ opts?: { debate_round_focus?: PlanDebateRoundFocus },
51
54
  ): Promise<RoundStatusResult> {
55
+ const messengerState = await loadMessengerState(runDir);
56
+ const consolidated =
57
+ messengerState?.review_gate_mode === "consolidated" && roundIndex === 1;
52
58
  const focus =
53
59
  opts?.debate_round_focus ??
60
+ (consolidated ? ("all" as PlanDebateRoundFocus) : null) ??
54
61
  (await readDebateRoundFocus(runDir, roundIndex));
55
62
  const missing: string[] = [];
56
- for (const lane of lanesForRound(roundIndex, focus)) {
63
+ const laneList = consolidated
64
+ ? lanesForConsolidatedRound()
65
+ : lanesForRound(roundIndex, focus);
66
+ for (const lane of laneList) {
57
67
  const rel = laneArtifactPath(lane, roundIndex);
58
68
  if (!(await exists(join(runDir, rel)))) {
59
69
  missing.push(rel);
60
70
  }
61
71
  }
62
- const messengerState = await loadMessengerState(runDir);
63
72
  const profile = messengerState?.debate_profile;
64
73
  const caps = capsForDebate(
65
74
  runId ? planDebateIdForRun(runId) : `plan-${runId ?? "unknown"}`,
@@ -73,7 +82,9 @@ export async function getPlanDebateRoundStatus(
73
82
  if (!dialogue.ok) {
74
83
  missing.push(...dialogue.errors.map((e) => `messenger: ${e}`));
75
84
  }
76
- const reviewRound = `artifacts/review-round-r${roundIndex}.yaml`;
85
+ const reviewRound = consolidated
86
+ ? "artifacts/review-round-consolidated.yaml"
87
+ : `artifacts/review-round-r${roundIndex}.yaml`;
77
88
  const reviewRoundOnDisk = await exists(join(runDir, reviewRound));
78
89
 
79
90
  let next_tool: string | undefined;
@@ -63,6 +63,8 @@ export interface MessengerState {
63
63
  rounds: Record<string, MessengerRoundState>;
64
64
  debate_profile?: DebateProfile;
65
65
  required_focuses?: PlanDebateFocus[];
66
+ /** consolidated = single Review Gate round; threaded = per-focus rounds */
67
+ review_gate_mode?: "consolidated" | "threaded";
66
68
  }
67
69
 
68
70
  function messengerRoot(runDir: string): string {
@@ -84,6 +86,7 @@ export async function initPlanMessenger(
84
86
  debateId: string;
85
87
  debate_profile?: DebateProfile;
86
88
  required_focuses?: PlanDebateFocus[];
89
+ review_gate_mode?: "consolidated" | "threaded";
87
90
  },
88
91
  ): Promise<string> {
89
92
  const root = messengerRoot(runDir);
@@ -97,6 +100,7 @@ export async function initPlanMessenger(
97
100
  rounds: {},
98
101
  debate_profile: opts.debate_profile,
99
102
  required_focuses: opts.required_focuses,
103
+ review_gate_mode: opts.review_gate_mode,
100
104
  };
101
105
  await writeFile(
102
106
  join(root, "state.json"),
@@ -0,0 +1,51 @@
1
+ /**
2
+ * Consolidated vs threaded Review Gate strategy for plan-phase debate.
3
+ */
4
+
5
+ import type {
6
+ DebateEligibilityResult,
7
+ PlanReviewGateStrategy,
8
+ } from "./plan-debate-eligibility.js";
9
+ import type { PlanDebateFocus } from "./plan-debate-focus.js";
10
+
11
+ export type { PlanReviewGateStrategy };
12
+
13
+ export const CONSOLIDATED_REVIEW_ROUND = 1;
14
+ export const CONSOLIDATED_REVIEW_ARTIFACT =
15
+ "artifacts/review-round-consolidated.yaml";
16
+
17
+ export function planReviewGateStrategyFromEligibility(
18
+ eligibility: DebateEligibilityResult,
19
+ ): PlanReviewGateStrategy {
20
+ return (
21
+ eligibility.review_gate_strategy ?? {
22
+ mode: eligibility.profile === "fast" ? "consolidated" : "threaded",
23
+ profile: eligibility.profile,
24
+ required_focuses: [...eligibility.required_focuses],
25
+ min_focus_rounds: eligibility.min_focus_rounds,
26
+ max_rounds: eligibility.max_rounds,
27
+ max_exchanges_per_round: eligibility.max_exchanges_per_round,
28
+ round_token_cap: eligibility.round_token_cap,
29
+ debate_global_cap: eligibility.debate_global_cap,
30
+ rationale: [...eligibility.rationale],
31
+ }
32
+ );
33
+ }
34
+
35
+ export function isConsolidatedReviewStrategy(
36
+ strategy: PlanReviewGateStrategy,
37
+ ): boolean {
38
+ return strategy.mode === "consolidated";
39
+ }
40
+
41
+ /** Focus areas covered in a single consolidated review round (spec + quality gate). */
42
+ export const CONSOLIDATED_REVIEW_FOCUS_AREAS: readonly PlanDebateFocus[] = [
43
+ "spec",
44
+ "quality",
45
+ ];
46
+
47
+ export function consolidatedReviewFocusesSatisfied(
48
+ covered: readonly string[],
49
+ ): boolean {
50
+ return CONSOLIDATED_REVIEW_FOCUS_AREAS.every((f) => covered.includes(f));
51
+ }
@@ -235,6 +235,7 @@ export default function traceRecorder(pi: ExtensionAPI) {
235
235
  if (shouldEmitStarted) {
236
236
  captureHarnessEvent(sessionId, "harness_run_started", {
237
237
  harness_run_id: runId,
238
+ run_id: runId,
238
239
  harness_plan_id: activeRun.planId,
239
240
  harness_phase: activeRun.phase,
240
241
  pi_session_id: sessionId,
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "schema_version": "1.0.0",
3
3
  "package": "ultimate-pi",
4
- "package_version": "0.14.0",
5
- "generated_at": "2026-05-19T10:53:28.359Z",
4
+ "package_version": "0.15.0",
5
+ "generated_at": "2026-05-19T12:56:13.369Z",
6
6
  "agents": {
7
7
  "pi-pi/agent-expert": {
8
8
  "path": ".pi/agents/pi-pi/agent-expert.md",
@@ -46,23 +46,23 @@
46
46
  },
47
47
  "harness/adversary": {
48
48
  "path": ".pi/agents/harness/adversary.md",
49
- "sha256": "dd2ef87139cb175d795f4d7bde78dca1a181d2e42c3c3bd0d48832cf5069aa29"
49
+ "sha256": "560c7571ab91478bde1271e9ae6c3a112c3e1d28e1a261c5450fd1d00f9f89af"
50
50
  },
51
51
  "harness/evaluator": {
52
52
  "path": ".pi/agents/harness/evaluator.md",
53
- "sha256": "2b8039fd79f9177fdafd5319a53a96812719d4f1f68e2de70632030142649cfe"
53
+ "sha256": "a4667d3efb305ba2fe79118e3d7d2b0de5e0369637af040d1238161d75cd28ac"
54
54
  },
55
55
  "harness/executor": {
56
56
  "path": ".pi/agents/harness/executor.md",
57
- "sha256": "b549e9fc802ba23857a1bc6b2ff36f3c169e708fe5ec13857b3bcfe841384f1f"
57
+ "sha256": "6baffcc3d89954494ce3ae439175686a39928b6a543a0a451da27475094b1712"
58
58
  },
59
59
  "harness/incident-recorder": {
60
60
  "path": ".pi/agents/harness/incident-recorder.md",
61
- "sha256": "d7577c911a9e6c9607eb64f76337aab85c4eb9a92e7cd917eb8d989ef3cd1de5"
61
+ "sha256": "d42fa45de1a2fe3842d075c6f319315266588942e314f1b650caabac39bdc29a"
62
62
  },
63
63
  "harness/meta-optimizer": {
64
64
  "path": ".pi/agents/harness/meta-optimizer.md",
65
- "sha256": "a4eed88084c7cfb5ace3edc72b72d7ead4134b3eae0d444b391decfe2640a632"
65
+ "sha256": "cbaab35367126796b7136389a02ab41b4fd1fe7098cf83be562d7b7493ccc297"
66
66
  },
67
67
  "harness/sentrux-bootstrap": {
68
68
  "path": ".pi/agents/harness/sentrux-bootstrap.md",
@@ -70,63 +70,63 @@
70
70
  },
71
71
  "harness/tie-breaker": {
72
72
  "path": ".pi/agents/harness/tie-breaker.md",
73
- "sha256": "68f02b86e95927f06d7f963e1f61f193159bbef1ba4558d90c84d5457d62b3f7"
73
+ "sha256": "1c54c1c3274291dea1ea8826563a7ad4fe1d9c4302984e907bfcd22cfc4f5eba"
74
74
  },
75
75
  "harness/trace-librarian": {
76
76
  "path": ".pi/agents/harness/trace-librarian.md",
77
- "sha256": "03b499a948b8467f1cfe2b4e63190feb7b8b9d96461055638e774253b9b6b2d4"
77
+ "sha256": "336b3f3f6141cef8750ab18d29bbe454caf26973830a86afe099d9e4ad8b0abe"
78
78
  },
79
79
  "harness/planning/decompose": {
80
80
  "path": ".pi/agents/harness/planning/decompose.md",
81
- "sha256": "5c3b983772d013741d50f39945bc77f178aa338aecab56b93c09216d72192c69"
81
+ "sha256": "0919dafa1d1cd008d513c28524c1e7218867586a138982dccf01db5270c42c73"
82
82
  },
83
83
  "harness/planning/execution-plan-author": {
84
84
  "path": ".pi/agents/harness/planning/execution-plan-author.md",
85
- "sha256": "16f8800c50bcaf1b82ed9138889c8a0e538ee6a139aeae129ccd20cec2ec25f7"
85
+ "sha256": "55ece0f1ee14abd17fe7b3e478b548240f637eacbfc2a34758e98d3878dc82fd"
86
86
  },
87
87
  "harness/planning/hypothesis-validator": {
88
88
  "path": ".pi/agents/harness/planning/hypothesis-validator.md",
89
- "sha256": "9e68ec5d6aef96a3666c30227c3cbddf1aaed1182fdc94dbbd21ad3d48315ff2"
89
+ "sha256": "36f0baa7796229f21bd02faf5e70402c7bf054289eab557a25bfbe3cb7781de7"
90
90
  },
91
91
  "harness/planning/hypothesis": {
92
92
  "path": ".pi/agents/harness/planning/hypothesis.md",
93
- "sha256": "b20c527d15c2243cd5d3a8f16cea6d44bdfd16e01915d42f3b830bf9938e5f8b"
93
+ "sha256": "e83d5c4faaee8d32af4a5f22c9917b70a173f3e22d7c0f182b361706f2309171"
94
94
  },
95
95
  "harness/planning/implementation-researcher": {
96
96
  "path": ".pi/agents/harness/planning/implementation-researcher.md",
97
- "sha256": "dbd1c4fc74d538b110d406febfd4603eebea77d82e8b367df4596ac7ff6e54cc"
97
+ "sha256": "653f320b5d51bb331774246687f24a75347b406bba4e6dfd2968d6e5d4cc8bb3"
98
98
  },
99
99
  "harness/planning/plan-adversary": {
100
100
  "path": ".pi/agents/harness/planning/plan-adversary.md",
101
- "sha256": "7c14eaab65f356003ee2ff380f5d4e620170b5126daa67c3d226b12342f47bd2"
101
+ "sha256": "3241d7ec939dc29e0af64690b99e9f74b209f40b0daa4a2a1f9ff86f99f94a8d"
102
102
  },
103
103
  "harness/planning/plan-evaluator": {
104
104
  "path": ".pi/agents/harness/planning/plan-evaluator.md",
105
- "sha256": "846575abe9df3e7e5be812c0c474989c1a9de8074a7884d77b9d3dd423643480"
105
+ "sha256": "71660ab58bfcfdfae56c873140d4ea5946ae30cd5719c96afeabfd02b1d1f81d"
106
106
  },
107
107
  "harness/planning/review-integrator": {
108
108
  "path": ".pi/agents/harness/planning/review-integrator.md",
109
- "sha256": "bed43f3f049c279ac50a24bcffac1bbe46a8605d89c9cc6d0c3c6a87d488b1b8"
109
+ "sha256": "cf3f0dbe81274ec9ef0ff2e0c170e8dc929b20be65492d0ee9a80d985acf6d71"
110
110
  },
111
111
  "harness/planning/scout-graphify": {
112
112
  "path": ".pi/agents/harness/planning/scout-graphify.md",
113
- "sha256": "7f385d5bda2fe04b9da52cb4cb9247324efd345579b483d3ad55a6abefad50d5"
113
+ "sha256": "6e2bda8ad38311810c9916d9dab311873bc776e4b8832bb0e574136e45e1255e"
114
114
  },
115
115
  "harness/planning/scout-semantic": {
116
116
  "path": ".pi/agents/harness/planning/scout-semantic.md",
117
- "sha256": "36bd424ebd422bda82bd447b22f591f99f32ec897ea43f385586119da5c26caa"
117
+ "sha256": "416e518d8204a55b26dc53da1f750865c6f09ee2c7f343b41e7c08da3230c089"
118
118
  },
119
119
  "harness/planning/scout-structure": {
120
120
  "path": ".pi/agents/harness/planning/scout-structure.md",
121
- "sha256": "e67b7cd75519e5ae36e1bb5f49ca158888c28d365465863aee50a9b2e8e5b7d7"
121
+ "sha256": "76c42a15cc74cf1de2cf861cb0146c865c205f69cce7b9605d41893b19600029"
122
122
  },
123
123
  "harness/planning/sprint-contract-auditor": {
124
124
  "path": ".pi/agents/harness/planning/sprint-contract-auditor.md",
125
- "sha256": "d915274dc9b5addae5499bc2390b348eddeb8f133b526a816e23d0d19a2618bf"
125
+ "sha256": "12cb5e6b53dcc19ace62e8e4c152d96440717df53a182e76216dd2327410df4d"
126
126
  },
127
127
  "harness/planning/stack-researcher": {
128
128
  "path": ".pi/agents/harness/planning/stack-researcher.md",
129
- "sha256": "fa228920abe2b66d4d8921c4a5d85593e3019a24bbe9ae512ed9149f235e3536"
129
+ "sha256": "ce546ef3aca19da7f334f07cef8f510b79068bffeb7f276c428f3e6236bbe96b"
130
130
  }
131
131
  }
132
132
  }
@@ -0,0 +1,31 @@
1
+ # ADR 0037: Subagent submit tools (replace JSON prose contracts)
2
+
3
+ **Status:** Accepted
4
+ **Date:** 2026-05-19
5
+
6
+ ## Context
7
+
8
+ Harness plan/execute agents used fenced JSON in `finalOutput`, requiring the parent orchestrator to parse prose and call `write_harness_yaml`. This was fragile (truncated parallel summaries, invalid JSON, double-hop writes).
9
+
10
+ Planning agents set `extensions: false` and subprocess spawn used `--no-extensions`, so harness tools were unavailable in children.
11
+
12
+ ## Decision
13
+
14
+ 1. **Option A — subprocess-only extension bundle:** vendored spawn passes `--no-extensions -e .pi/extensions/harness-subagent-submit.ts` for `harness/*` agents with `extensions: false`.
15
+ 2. **Scoped `submit_*` tools** per agent, validated against `.pi/harness/specs/*.schema.json` (Ajv) and written deterministically under `HARNESS_RUN_DIR`.
16
+ 3. **Parent gates** via `harness_artifact_ready` (file existence) instead of parsing subprocess JSON.
17
+ 4. **Debate lanes:** `tool_result` hook prefers last `submit_*` in `details.results[].messages`; skips `finalOutput` auto-apply when submit present (`HARNESS_SUBMIT_TOOLS` default on).
18
+ 5. **Parent** blocks all `submit_*`; keeps `write_harness_yaml` for merges and debate round submission only.
19
+
20
+ ## Consequences
21
+
22
+ - Agent frontmatter lists one terminal `submit_*` tool per role.
23
+ - `HarnessSpawnContext` must include `run_id` / `run_dir`; bridge sets `HARNESS_RUN_ID`, `HARNESS_RUN_DIR`, `HARNESS_AGENT_ID` on spawn.
24
+ - `parseHarnessAgentJson` retained for migration/tests; hot path is tool args.
25
+ - See ADR 0038 for budget telemetry-only default.
26
+
27
+ ## References
28
+
29
+ - `.pi/extensions/harness-subagent-submit.ts`
30
+ - `.pi/extensions/lib/harness-subagent-submit-registry.ts`
31
+ - `.pi/harness/specs/plan-scout-findings.schema.json`
@@ -0,0 +1,23 @@
1
+ # ADR 0038: Budget enforcement telemetry-only (default)
2
+
3
+ **Status:** Accepted
4
+ **Date:** 2026-05-19
5
+
6
+ ## Context
7
+
8
+ Token and debate caps emitted `harness-budget-exhausted`, which set `budgetExhausted` in the live widget and blocked flows even when `HARNESS_BUDGET_HARD_STOP` was false. `max_rounds` and messenger exchange limits in `validatePlanDebateGate` also hard-failed approval.
9
+
10
+ ## Decision
11
+
12
+ - **`HARNESS_BUDGET_ENFORCE` default `off`:** phase/debate caps log `harness-budget-soft-limit` and `harness-budget-telemetry` only; `harness-budget-exhausted` is emitted only when enforce is on **and** hard-stop flags are set.
13
+ - **UI:** `budgetExhausted` / blocked substate only when blocking exhaustion events qualify.
14
+ - **Debate:** `capsForDebate` uses sentinel caps when enforce is off; `max_rounds` gate errors become warnings.
15
+ - **CLI:** `--budget` on harness prompts is reserved/no-op until a real budget story ships.
16
+
17
+ Re-enable: `HARNESS_BUDGET_ENFORCE=1` plus `HARNESS_BUDGET_HARD_STOP` / `HARNESS_DEBATE_HARD_STOP` as needed.
18
+
19
+ ## Consequences
20
+
21
+ - Long debates and large plans are not blocked by soft token telemetry.
22
+ - Quality gates (`min_focus_rounds`, required focuses, `review_gate_ready`) remain enforced.
23
+ - PostHog should prefer `harness_budget_telemetry` over exhausted for dashboards until enforce returns.
@@ -22,6 +22,8 @@ Team-shared ADRs for the ultimate-pi harness live under `.pi/harness/docs/adrs/`
22
22
  | [0034](0034-darwin-plan-research-pipeline.md) | Darwin plan research pipeline | Accepted |
23
23
  | [0035](0035-plan-phase-review-gate.md) | Plan-phase Review Gate | Accepted |
24
24
  | [0036](0036-implementation-research-and-selective-debate.md) | Implementation research and selective debate | Accepted |
25
+ | [0037](0037-subagent-submit-tools.md) | Subagent submit tools (subprocess extension) | Accepted |
26
+ | [0038](0038-budget-telemetry-only.md) | Budget caps telemetry-only by default | Accepted |
25
27
 
26
28
  ## Template
27
29
 
@@ -0,0 +1,28 @@
1
+ schema_version: "1.0.0"
2
+ problem_framing: Validate harness plan-phase with fixture-driven smoke
3
+ sub_problems:
4
+ - DAG validation
5
+ - Debate gate coverage
6
+ internal_references:
7
+ - path: .pi/harness/evals/smoke/smoke-harness-plan.mjs
8
+ relevance: Existing smoke pattern
9
+ reuse_signal: high
10
+ external_references: []
11
+ solution_patterns:
12
+ - name: fixture-driven gate
13
+ provenance: in-repo smoke
14
+ fit: Validates plan pipeline without live agents
15
+ tradeoffs:
16
+ pros: [Deterministic CI]
17
+ cons: []
18
+ risks: []
19
+ similar_implementations: []
20
+ recommended_approach:
21
+ summary: Extend minimal-med fixture with implementation artifact
22
+ recommended_approach_confidence: high
23
+ confidence_rationale: Reuses established smoke-harness-plan pattern
24
+ evidence_refs:
25
+ - .pi/harness/evals/smoke/smoke-harness-plan.mjs
26
+ - .pi/scripts/validate-plan-dag.mjs
27
+ anti_patterns: []
28
+ open_questions: []
@@ -0,0 +1,25 @@
1
+ schema_version: "1.0.0"
2
+ round_index: 1
3
+ debate_round_focus: all
4
+ round_summary: Consolidated review gate for fast profile fixture
5
+ validation_summary: Spec and quality checks pass in one round
6
+ adversary_summary: No blockers
7
+ disputes: []
8
+ recommended_packet_patches: []
9
+ review_gate_ready: true
10
+ participants:
11
+ - PlanEvaluatorAgent
12
+ - PlanAdversaryAgent
13
+ - SprintContractAuditorAgent
14
+ - ReviewIntegratorAgent
15
+ claims:
16
+ - consolidated review gate ready
17
+ rebuttals: []
18
+ evidence_refs: []
19
+ token_usage:
20
+ per_agent:
21
+ PlanEvaluatorAgent: 120
22
+ PlanAdversaryAgent: 100
23
+ SprintContractAuditorAgent: 80
24
+ round_total: 300
25
+ consensus_delta: 0.1
@@ -0,0 +1,196 @@
1
+ schema_version: "1.0.0"
2
+ contract_version: "1.1.0"
3
+ plan_id: plan-smoke-fixture-001
4
+ task_id: task-smoke-001
5
+ scope: Smoke fixture for plan-phase harness validation with execution_plan and debate artifacts.
6
+ assumptions:
7
+ - Fixture only; no live agent run
8
+ risk_level: med
9
+ acceptance_checks:
10
+ - id: AC-1
11
+ description: DAG validation passes
12
+ - id: AC-2
13
+ description: Consolidated debate round recorded (fast profile)
14
+ - id: AC-3
15
+ description: Stack brief present in research-brief
16
+ - id: AC-4
17
+ description: Sprint contract complete
18
+ - id: AC-5
19
+ description: plan-review.md renders
20
+ rollback_plan:
21
+ revert_commit_ready: true
22
+ rollback_artifacts:
23
+ revert_command: git revert HEAD
24
+ revert_branch: main
25
+ patch_bundle: .pi/harness/runs/smoke-fixture/patch.bundle
26
+ execution_plan:
27
+ schema_version: "1.0.0"
28
+ phases:
29
+ - phase_id: P1
30
+ name: Foundation
31
+ objective: Establish baseline and verify harness wiring
32
+ entry_criteria:
33
+ - Fixture loaded
34
+ exit_criteria:
35
+ - AC-1 satisfied
36
+ milestone: M1-baseline
37
+ work_item_ids: [WI-1, WI-2, WI-3]
38
+ - phase_id: P2
39
+ name: Build
40
+ objective: Implement core changes
41
+ entry_criteria:
42
+ - M1-baseline complete
43
+ exit_criteria:
44
+ - AC-2 satisfied
45
+ milestone: M2-build
46
+ work_item_ids: [WI-4, WI-5, WI-6]
47
+ - phase_id: P3
48
+ name: Verify
49
+ objective: Quality gate and documentation
50
+ entry_criteria:
51
+ - M2-build complete
52
+ exit_criteria:
53
+ - AC-5 satisfied
54
+ milestone: M3-ship
55
+ work_item_ids: [WI-7, WI-8]
56
+ work_items:
57
+ - work_item_id: WI-1
58
+ phase_id: P1
59
+ title: Load fixture packet
60
+ description: Read plan-packet.yaml from fixture directory
61
+ depends_on: []
62
+ files:
63
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml
64
+ parallel_safe: true
65
+ done_criteria:
66
+ type: manual
67
+ spec: Fixture packet readable
68
+ acceptance_check_ids: [AC-1]
69
+ - work_item_id: WI-2
70
+ phase_id: P1
71
+ title: Run DAG validator
72
+ description: Execute validate-plan-dag.mjs
73
+ depends_on: [WI-1]
74
+ files:
75
+ - .pi/scripts/validate-plan-dag.mjs
76
+ parallel_safe: false
77
+ done_criteria:
78
+ type: command
79
+ spec: node .pi/scripts/validate-plan-dag.mjs --packet plan-packet.yaml
80
+ acceptance_check_ids: [AC-1]
81
+ - work_item_id: WI-3
82
+ phase_id: P1
83
+ title: Lint harness-yaml
84
+ description: Ensure YAML helpers parse fixture
85
+ depends_on: [WI-1]
86
+ files:
87
+ - .pi/lib/harness-yaml.ts
88
+ parallel_safe: true
89
+ done_criteria:
90
+ type: lint
91
+ spec: npm test
92
+ acceptance_check_ids: [AC-1]
93
+ - work_item_id: WI-4
94
+ phase_id: P2
95
+ title: Debate round 1-2 artifacts
96
+ description: Validate review-round YAML
97
+ depends_on: [WI-2]
98
+ files:
99
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml
100
+ parallel_safe: false
101
+ done_criteria:
102
+ type: artifact
103
+ spec: artifacts/review-round-r1.yaml exists
104
+ acceptance_check_ids: [AC-2]
105
+ - work_item_id: WI-5
106
+ phase_id: P2
107
+ title: Debate round 3-4 artifacts
108
+ description: Validate final review round
109
+ depends_on: [WI-4]
110
+ files:
111
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml
112
+ parallel_safe: false
113
+ done_criteria:
114
+ type: artifact
115
+ spec: artifacts/review-round-r4.yaml exists
116
+ acceptance_check_ids: [AC-2]
117
+ - work_item_id: WI-6
118
+ phase_id: P2
119
+ title: Stack research merge
120
+ description: research-brief includes stack section
121
+ depends_on: [WI-2]
122
+ files: []
123
+ non_code: true
124
+ parallel_safe: true
125
+ done_criteria:
126
+ type: manual
127
+ spec: research-brief.yaml contains stack key
128
+ acceptance_check_ids: [AC-3]
129
+ - work_item_id: WI-7
130
+ phase_id: P3
131
+ title: Sprint contract audit
132
+ description: R4 sprint audit artifact
133
+ depends_on: [WI-5]
134
+ files:
135
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml
136
+ parallel_safe: false
137
+ done_criteria:
138
+ type: artifact
139
+ spec: sprint-audit-r4.yaml present
140
+ acceptance_check_ids: [AC-4]
141
+ - work_item_id: WI-8
142
+ phase_id: P3
143
+ title: Render plan-review
144
+ description: Human-readable plan review markdown
145
+ depends_on: [WI-7]
146
+ files:
147
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md
148
+ parallel_safe: false
149
+ done_criteria:
150
+ type: manual
151
+ spec: plan-review.md non-empty
152
+ acceptance_check_ids: [AC-5]
153
+ sprint_contract:
154
+ in_scope:
155
+ - Fixture validation only
156
+ out_of_scope:
157
+ - Production deploy
158
+ definition_of_done: All smoke checks green
159
+ assumptions:
160
+ - CI environment has node
161
+ external_dependencies: []
162
+ wbs_dictionary:
163
+ - work_item_id: WI-1
164
+ deliverable: Fixture packet loaded
165
+ owner_role: executor
166
+ inputs: []
167
+ outputs: [parsed packet]
168
+ risk_register:
169
+ - risk_id: R1
170
+ description: DAG validator false negative
171
+ likelihood: low
172
+ impact: high
173
+ mitigation: Unit tests on validate-plan-dag.mjs
174
+ linked_work_item_ids: [WI-2]
175
+ - risk_id: R2
176
+ description: Debate cap misconfiguration
177
+ likelihood: med
178
+ impact: med
179
+ mitigation: debate-orchestrator plan profile tests
180
+ linked_work_item_ids: [WI-4]
181
+ - risk_id: R3
182
+ description: YAML parse drift
183
+ likelihood: low
184
+ impact: med
185
+ mitigation: harness-yaml strict parse
186
+ linked_work_item_ids: [WI-3]
187
+ schedule_metadata:
188
+ critical_path_work_item_ids: [WI-1, WI-2, WI-4, WI-5, WI-7, WI-8]
189
+ parallel_groups:
190
+ - [WI-1, WI-3]
191
+ schedule_baseline_note: Fixture topological order; no calendar dates
192
+ dag_validation:
193
+ status: pass
194
+ topological_order: [WI-1, WI-2, WI-3, WI-4, WI-5, WI-6, WI-7, WI-8]
195
+ cycles: []
196
+ conflicts: []
@@ -0,0 +1,14 @@
1
+ # Plan review (fixture)
2
+
3
+ plan_id: plan-smoke-fixture-001
4
+
5
+ ## Execution plan
6
+
7
+ Phases: P1 Foundation → P2 Build → P3 Verify
8
+
9
+ Critical path: WI-1 → WI-2 → WI-4 → WI-5 → WI-7 → WI-8
10
+
11
+ ## Debate
12
+
13
+ - Round 1 (spec): review_gate_ready
14
+ - Round 4 (quality): review_gate_ready
@@ -0,0 +1,62 @@
1
+ decomposition:
2
+ schema_version: "1.0.0"
3
+ problem_restatement: Light-profile smoke for two-focus debate
4
+ hypothesis:
5
+ schema_version: "1.0.0"
6
+ primary:
7
+ claim: Light debate covers spec and quality only
8
+ mechanism: Eligibility profile light with min_focus_rounds 2
9
+ prediction: planDebateOutcomeComplete passes with two rounds
10
+ experiment: Run smoke-harness-plan.mjs --fixture minimal-low-light
11
+ implementation:
12
+ schema_version: "1.0.0"
13
+ problem_framing: Low-risk fixture for selective debate
14
+ sub_problems: [spec coverage, quality coverage]
15
+ internal_references:
16
+ - path: test/plan-debate-eligibility.test.mjs
17
+ relevance: Eligibility unit tests
18
+ reuse_signal: high
19
+ external_references: []
20
+ solution_patterns:
21
+ - name: light profile gate
22
+ provenance: ADR-0036
23
+ fit: Reduces debate cost on trivial tasks
24
+ tradeoffs:
25
+ pros: [Fewer rounds]
26
+ cons: []
27
+ risks: []
28
+ similar_implementations:
29
+ - name: minimal-med four-focus fixture
30
+ what_it_solves: Full debate coverage
31
+ gap_vs_us: Light uses two focuses only
32
+ recommended_approach:
33
+ summary: Two review rounds with spec then quality
34
+ recommended_approach_confidence: high
35
+ confidence_rationale: Deterministic fixture aligned with eligibility rules
36
+ evidence_refs:
37
+ - .pi/extensions/lib/plan-debate-eligibility.ts
38
+ - test/plan-debate-eligibility.test.mjs
39
+ anti_patterns: []
40
+ open_questions: []
41
+ stack:
42
+ schema_version: "1.0.0"
43
+ problem_framing: Node harness tooling
44
+ constraints: []
45
+ options:
46
+ - name: extend current stack
47
+ category: brownfield
48
+ fit_summary: Use existing ultimate-pi harness
49
+ tradeoffs:
50
+ pros: [No new deps]
51
+ cons: []
52
+ risks: []
53
+ evidence_refs: []
54
+ recommendation_rank: 1
55
+ recommended_primary: extend current stack
56
+ rationale: Fixture validates in-repo harness
57
+ eval:
58
+ schema_version: "1.0.0"
59
+ revision_recommended: false
60
+ relevance:
61
+ passes: true
62
+ rationale: Hypothesis matches light smoke task