ultimate-pi 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/.agents/skills/harness-debate-plan/SKILL.md +44 -0
  2. package/.agents/skills/harness-decisions/SKILL.md +1 -1
  3. package/.agents/skills/harness-orchestration/SKILL.md +54 -28
  4. package/.agents/skills/harness-plan/SKILL.md +15 -20
  5. package/.pi/agents/harness/adversary.md +0 -1
  6. package/.pi/agents/harness/evaluator.md +0 -1
  7. package/.pi/agents/harness/executor.md +1 -2
  8. package/.pi/agents/harness/incident-recorder.md +0 -1
  9. package/.pi/agents/harness/meta-optimizer.md +0 -1
  10. package/.pi/agents/harness/planning/decompose.md +3 -4
  11. package/.pi/agents/harness/planning/execution-plan-author.md +30 -0
  12. package/.pi/agents/harness/planning/hypothesis-validator.md +23 -0
  13. package/.pi/agents/harness/planning/hypothesis.md +3 -4
  14. package/.pi/agents/harness/planning/plan-adversary.md +10 -42
  15. package/.pi/agents/harness/planning/plan-evaluator.md +18 -0
  16. package/.pi/agents/harness/planning/review-integrator.md +23 -0
  17. package/.pi/agents/harness/planning/scout-graphify.md +11 -5
  18. package/.pi/agents/harness/planning/scout-semantic.md +11 -6
  19. package/.pi/agents/harness/planning/scout-structure.md +12 -6
  20. package/.pi/agents/harness/planning/sprint-contract-auditor.md +18 -0
  21. package/.pi/agents/harness/planning/stack-researcher.md +24 -0
  22. package/.pi/agents/harness/tie-breaker.md +0 -1
  23. package/.pi/agents/harness/trace-librarian.md +0 -1
  24. package/.pi/extensions/debate-orchestrator.ts +90 -53
  25. package/.pi/extensions/harness-plan-approval.ts +2 -2
  26. package/.pi/extensions/harness-run-context.ts +145 -5
  27. package/.pi/extensions/harness-subagents.ts +2 -2
  28. package/.pi/extensions/lib/harness-posthog.ts +6 -1
  29. package/.pi/extensions/lib/harness-spawn-budget.ts +75 -0
  30. package/.pi/extensions/lib/harness-subagent-auth.ts +123 -0
  31. package/.pi/extensions/lib/{harness-subagents/harness-subagent-policy.ts → harness-subagent-policy.ts} +3 -6
  32. package/.pi/extensions/lib/harness-subagent-precheck.ts +95 -0
  33. package/.pi/extensions/lib/harness-subagents-bridge.ts +176 -0
  34. package/.pi/extensions/lib/plan-approval/create-plan.ts +4 -7
  35. package/.pi/extensions/lib/plan-approval/plan-review.ts +1 -1
  36. package/.pi/extensions/lib/plan-approval/types.ts +7 -1
  37. package/.pi/extensions/lib/plan-debate-envelope.ts +84 -0
  38. package/.pi/extensions/lib/{harness-subagents/spawn-policy.ts → spawn-policy.ts} +1 -0
  39. package/.pi/extensions/policy-gate.ts +1 -1
  40. package/.pi/extensions/review-integrity.ts +48 -29
  41. package/.pi/harness/agents.manifest.json +37 -25
  42. package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +4 -3
  43. package/.pi/harness/docs/adrs/0033-parent-orchestrated-planning.md +1 -1
  44. package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +27 -0
  45. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml +25 -0
  46. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml +26 -0
  47. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml +5 -0
  48. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml +196 -0
  49. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md +14 -0
  50. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +32 -0
  51. package/.pi/harness/evals/smoke/run-context.fixture.json +1 -1
  52. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +88 -0
  53. package/.pi/harness/specs/harness-posthog-event.schema.json +6 -1
  54. package/.pi/harness/specs/plan-execution-plan-brief.schema.json +13 -0
  55. package/.pi/harness/specs/plan-execution-plan.schema.json +255 -0
  56. package/.pi/harness/specs/plan-packet.schema.json +14 -5
  57. package/.pi/harness/specs/plan-review-round-draft.schema.json +68 -0
  58. package/.pi/harness/specs/plan-sprint-audit-turn.schema.json +29 -0
  59. package/.pi/harness/specs/plan-stack-brief.schema.json +65 -0
  60. package/.pi/harness/specs/plan-validation-turn.schema.json +42 -0
  61. package/.pi/harness/specs/round-result.schema.json +16 -9
  62. package/.pi/lib/debate-orchestrator-types.ts +38 -0
  63. package/.pi/lib/harness-agent-discovery.mjs +81 -0
  64. package/.pi/lib/harness-run-context.ts +64 -38
  65. package/.pi/lib/harness-yaml.mjs +73 -0
  66. package/.pi/lib/harness-yaml.ts +90 -0
  67. package/.pi/prompts/harness-auto.md +13 -11
  68. package/.pi/prompts/harness-critic.md +2 -2
  69. package/.pi/prompts/harness-eval.md +3 -3
  70. package/.pi/prompts/harness-incident.md +2 -2
  71. package/.pi/prompts/harness-plan.md +79 -93
  72. package/.pi/prompts/harness-review.md +2 -2
  73. package/.pi/prompts/harness-router-tune.md +1 -1
  74. package/.pi/prompts/harness-run.md +2 -2
  75. package/.pi/prompts/harness-setup.md +15 -6
  76. package/.pi/prompts/harness-trace.md +2 -2
  77. package/.pi/scripts/harness-agents-manifest.mjs +1 -1
  78. package/.pi/scripts/harness-verify.mjs +28 -19
  79. package/.pi/scripts/validate-plan-dag.mjs +258 -0
  80. package/.pi/scripts/vendor-sync-pi-subagents.sh +19 -0
  81. package/CHANGELOG.md +12 -0
  82. package/THIRD_PARTY_NOTICES.md +8 -0
  83. package/biome.json +2 -2
  84. package/package.json +6 -4
  85. package/.pi/agents/harness/planner.md +0 -13
  86. package/.pi/agents/harness/planning/hypothesis-eval.md +0 -59
  87. package/.pi/agents/harness/planning/planner.md +0 -20
  88. package/.pi/extensions/lib/harness-subagents/agent-loader.ts +0 -126
  89. package/.pi/extensions/lib/harness-subagents/agent-manifest.ts +0 -119
  90. package/.pi/extensions/lib/harness-subagents/agent-parser.ts +0 -87
  91. package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +0 -118
  92. package/.pi/extensions/lib/harness-subagents/blackboard.ts +0 -175
  93. package/.pi/extensions/lib/harness-subagents/parent-ask-user-bridge.ts +0 -10
  94. package/.pi/extensions/lib/harness-subagents/parent-harness-ui-bridge.ts +0 -137
  95. package/.pi/extensions/lib/harness-subagents/parent-harness-ui-hooks.ts +0 -77
  96. package/.pi/extensions/lib/harness-subagents/types-blackboard.ts +0 -27
  97. package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +0 -558
  98. package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +0 -666
  99. package/.pi/extensions/lib/harness-subagents/vendored/agent-types.ts +0 -175
  100. package/.pi/extensions/lib/harness-subagents/vendored/context.ts +0 -59
  101. package/.pi/extensions/lib/harness-subagents/vendored/cross-extension-rpc.ts +0 -134
  102. package/.pi/extensions/lib/harness-subagents/vendored/custom-agents.ts +0 -5
  103. package/.pi/extensions/lib/harness-subagents/vendored/default-agents.ts +0 -123
  104. package/.pi/extensions/lib/harness-subagents/vendored/env.ts +0 -43
  105. package/.pi/extensions/lib/harness-subagents/vendored/group-join.ts +0 -144
  106. package/.pi/extensions/lib/harness-subagents/vendored/index.ts +0 -2460
  107. package/.pi/extensions/lib/harness-subagents/vendored/invocation-config.ts +0 -52
  108. package/.pi/extensions/lib/harness-subagents/vendored/memory.ts +0 -182
  109. package/.pi/extensions/lib/harness-subagents/vendored/model-resolver.ts +0 -92
  110. package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +0 -115
  111. package/.pi/extensions/lib/harness-subagents/vendored/prompts.ts +0 -103
  112. package/.pi/extensions/lib/harness-subagents/vendored/schedule-store.ts +0 -177
  113. package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +0 -416
  114. package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +0 -210
  115. package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +0 -108
  116. package/.pi/extensions/lib/harness-subagents/vendored/types.ts +0 -187
  117. package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +0 -639
  118. package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +0 -324
  119. package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +0 -110
  120. package/.pi/extensions/lib/harness-subagents/vendored/usage.ts +0 -71
  121. package/.pi/extensions/lib/harness-subagents/vendored/worktree.ts +0 -195
  122. /package/.pi/extensions/{00-ultimate-pi-system-prompt.ts → custom-system-prompt.ts} +0 -0
@@ -1,9 +1,8 @@
1
1
  /**
2
2
  * review-integrity — enforce evaluator/adversary isolation from executor session.
3
3
  *
4
- * Parent orchestrators spawn review agents in isolated subagent sessions.
5
- * Direct review tools in the executor session are blocked; Agent/get_subagent_result
6
- * for harness review agents remain allowed.
4
+ * Parent orchestrators spawn review agents in isolated subprocesses via `subagent`.
5
+ * Direct review tools in the executor session are blocked.
7
6
  */
8
7
 
9
8
  import { appendFile, mkdir } from "node:fs/promises";
@@ -15,12 +14,6 @@ type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
15
14
  const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
16
15
  const INCIDENT_FILE = join(INCIDENTS_DIR, "review-integrity.jsonl");
17
16
 
18
- const ORCHESTRATION_TOOLS = new Set([
19
- "Agent",
20
- "get_subagent_result",
21
- "steer_subagent",
22
- ]);
23
-
24
17
  const REVIEW_SUBAGENT_TYPES = new Set([
25
18
  "harness/evaluator",
26
19
  "harness/adversary",
@@ -104,15 +97,45 @@ function restoreState(ctx: {
104
97
  };
105
98
  }
106
99
 
107
- function subagentTypeFromInput(
100
+ function agentsFromSubagentInput(
108
101
  input: Record<string, unknown> | undefined,
109
- ): string {
110
- if (!input) return "";
111
- const direct = input.subagent_type;
112
- if (typeof direct === "string") return direct;
113
- const nested = input as { subagentType?: string };
114
- if (typeof nested.subagentType === "string") return nested.subagentType;
115
- return "";
102
+ ): string[] {
103
+ if (!input) return [];
104
+ const names: string[] = [];
105
+ if (typeof input.agent === "string") names.push(input.agent);
106
+ const tasks = input.tasks;
107
+ if (Array.isArray(tasks)) {
108
+ for (const t of tasks) {
109
+ if (
110
+ t &&
111
+ typeof t === "object" &&
112
+ typeof (t as { agent?: string }).agent === "string"
113
+ ) {
114
+ names.push((t as { agent: string }).agent);
115
+ }
116
+ }
117
+ }
118
+ const chain = input.chain;
119
+ if (Array.isArray(chain)) {
120
+ for (const c of chain) {
121
+ if (
122
+ c &&
123
+ typeof c === "object" &&
124
+ typeof (c as { agent?: string }).agent === "string"
125
+ ) {
126
+ names.push((c as { agent: string }).agent);
127
+ }
128
+ }
129
+ }
130
+ const agg = input.aggregator;
131
+ if (
132
+ agg &&
133
+ typeof agg === "object" &&
134
+ typeof (agg as { agent?: string }).agent === "string"
135
+ ) {
136
+ names.push((agg as { agent: string }).agent);
137
+ }
138
+ return names;
116
139
  }
117
140
 
118
141
  async function appendIncident(payload: Record<string, unknown>): Promise<void> {
@@ -178,26 +201,26 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
178
201
  customType: "harness-review-integrity-hint",
179
202
  display: true,
180
203
  content: [
181
- "Review phase in executor session: spawn harness/evaluator or harness/adversary via Agent (isolated subagent context).",
182
- "Do not run review checks directly in this session — use get_subagent_result after spawn.",
204
+ "Review phase in executor session: spawn harness/evaluator or harness/adversary via subagent (isolated subprocess).",
205
+ "Do not run review checks directly in this session.",
183
206
  ].join("\n"),
184
207
  },
185
208
  };
186
209
  });
187
210
 
188
211
  pi.on("tool_call", async (event, ctx) => {
189
- if (event.toolName === "Agent") {
190
- const subagentType = subagentTypeFromInput(
212
+ if (event.toolName === "subagent") {
213
+ const agents = agentsFromSubagentInput(
191
214
  event.input as Record<string, unknown> | undefined,
192
215
  );
193
- if (subagentType === EXECUTOR_SUBAGENT_TYPE) {
216
+ if (agents.includes(EXECUTOR_SUBAGENT_TYPE)) {
194
217
  state.executorSessionId = ctx.sessionManager.getSessionId();
195
218
  state.violationActive = false;
196
219
  state.updatedAt = nowIso();
197
220
  persist();
198
221
  return undefined;
199
222
  }
200
- if (REVIEW_SUBAGENT_TYPES.has(subagentType)) {
223
+ if (agents.some((a) => REVIEW_SUBAGENT_TYPES.has(a))) {
201
224
  state.violationActive = false;
202
225
  state.updatedAt = nowIso();
203
226
  persist();
@@ -207,10 +230,6 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
207
230
 
208
231
  if (!state.violationActive) return undefined;
209
232
 
210
- if (ORCHESTRATION_TOOLS.has(event.toolName)) {
211
- return undefined;
212
- }
213
-
214
233
  await appendIncident({
215
234
  type: "review_integrity_violation",
216
235
  session_id: ctx.sessionManager.getSessionId(),
@@ -218,13 +237,13 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
218
237
  reason:
219
238
  "direct tool use in review phase while sharing executor session context",
220
239
  mitigation:
221
- "spawn harness/evaluator or harness/adversary via Agent instead",
240
+ "spawn harness/evaluator or harness/adversary via subagent instead",
222
241
  });
223
242
 
224
243
  return {
225
244
  block: true,
226
245
  reason:
227
- "review-integrity: tool blocked in review phase — spawn an isolated review subagent via Agent.",
246
+ "review-integrity: tool blocked in review phase — spawn an isolated review subagent via subagent.",
228
247
  };
229
248
  });
230
249
 
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "schema_version": "1.0.0",
3
3
  "package": "ultimate-pi",
4
- "package_version": "0.10.1",
5
- "generated_at": "2026-05-17T17:48:22.826Z",
4
+ "package_version": "0.11.0",
5
+ "generated_at": "2026-05-18T09:43:44.563Z",
6
6
  "agents": {
7
7
  "pi-pi/agent-expert": {
8
8
  "path": ".pi/agents/pi-pi/agent-expert.md",
@@ -46,27 +46,23 @@
46
46
  },
47
47
  "harness/adversary": {
48
48
  "path": ".pi/agents/harness/adversary.md",
49
- "sha256": "b965f90610ca942d08b656f1aee839266d08a92beb174b8761dd5e840694a899"
49
+ "sha256": "dd2ef87139cb175d795f4d7bde78dca1a181d2e42c3c3bd0d48832cf5069aa29"
50
50
  },
51
51
  "harness/evaluator": {
52
52
  "path": ".pi/agents/harness/evaluator.md",
53
- "sha256": "6c0de777a10de26dba4a6feb5641495fa5c2d31072a8b0e597a5ecc9921f129f"
53
+ "sha256": "2b8039fd79f9177fdafd5319a53a96812719d4f1f68e2de70632030142649cfe"
54
54
  },
55
55
  "harness/executor": {
56
56
  "path": ".pi/agents/harness/executor.md",
57
- "sha256": "5af3ec2be4d64a738834e36d480a36c2bee4359e8cd5a2e1aac49be4cff79589"
57
+ "sha256": "b549e9fc802ba23857a1bc6b2ff36f3c169e708fe5ec13857b3bcfe841384f1f"
58
58
  },
59
59
  "harness/incident-recorder": {
60
60
  "path": ".pi/agents/harness/incident-recorder.md",
61
- "sha256": "2de405f77b62dde38f331665bff220a3ef131c3c1cd42eebee364000fc83352b"
61
+ "sha256": "d7577c911a9e6c9607eb64f76337aab85c4eb9a92e7cd917eb8d989ef3cd1de5"
62
62
  },
63
63
  "harness/meta-optimizer": {
64
64
  "path": ".pi/agents/harness/meta-optimizer.md",
65
- "sha256": "ef2fb950e18e3a6439e91a68f764fc7ec922cd2d6b35de8f656f376854974d04"
66
- },
67
- "harness/planner": {
68
- "path": ".pi/agents/harness/planner.md",
69
- "sha256": "648b9e4c56a6c0f983ae990238952579bb4745af81eb6c79add5325ea4929c91"
65
+ "sha256": "a4eed88084c7cfb5ace3edc72b72d7ead4134b3eae0d444b391decfe2640a632"
70
66
  },
71
67
  "harness/sentrux-bootstrap": {
72
68
  "path": ".pi/agents/harness/sentrux-bootstrap.md",
@@ -74,43 +70,59 @@
74
70
  },
75
71
  "harness/tie-breaker": {
76
72
  "path": ".pi/agents/harness/tie-breaker.md",
77
- "sha256": "651f50b9e2c7903c542700e94908b1fcd026ebed12aa1f1d6ec481df3567e34f"
73
+ "sha256": "68f02b86e95927f06d7f963e1f61f193159bbef1ba4558d90c84d5457d62b3f7"
78
74
  },
79
75
  "harness/trace-librarian": {
80
76
  "path": ".pi/agents/harness/trace-librarian.md",
81
- "sha256": "d63fe08a2ea0466c0fd89fff4da03ac1d9d3580c306381cee251c89d4e8fdb97"
77
+ "sha256": "03b499a948b8467f1cfe2b4e63190feb7b8b9d96461055638e774253b9b6b2d4"
82
78
  },
83
79
  "harness/planning/decompose": {
84
80
  "path": ".pi/agents/harness/planning/decompose.md",
85
- "sha256": "a4f33869759ebdc049e77b344ae050be5ede08ea9a92216b8599cc7d2f14c052"
81
+ "sha256": "1b3f85d956d2e203ec87045a731c47f8b40f75b63fce8916fda91cefc39244a8"
82
+ },
83
+ "harness/planning/execution-plan-author": {
84
+ "path": ".pi/agents/harness/planning/execution-plan-author.md",
85
+ "sha256": "a69fb2e8bda9336e71ce9536071f9c8a2f4abd9d9d88930c6a8be29bdc9c5f62"
86
86
  },
87
- "harness/planning/hypothesis-eval": {
88
- "path": ".pi/agents/harness/planning/hypothesis-eval.md",
89
- "sha256": "7a05e2f746bf79f20096cbfb12aaee31a0717e660680b44cb285ea967b3141e5"
87
+ "harness/planning/hypothesis-validator": {
88
+ "path": ".pi/agents/harness/planning/hypothesis-validator.md",
89
+ "sha256": "f75312439c441ccee72692d41f44b6e733df08e06c89e930740fc256bed3ba02"
90
90
  },
91
91
  "harness/planning/hypothesis": {
92
92
  "path": ".pi/agents/harness/planning/hypothesis.md",
93
- "sha256": "7f2af6dda328d6cc1279dbff20a46b1d93aacfda9d57857cc6117685f8a585dd"
93
+ "sha256": "b20c527d15c2243cd5d3a8f16cea6d44bdfd16e01915d42f3b830bf9938e5f8b"
94
94
  },
95
95
  "harness/planning/plan-adversary": {
96
96
  "path": ".pi/agents/harness/planning/plan-adversary.md",
97
- "sha256": "4beceb8c4181f82b7eb006d87392c0adb4c7ce41992193790888e7298c1b7594"
97
+ "sha256": "685926c638ae1377361d7cafda5e400be19cb3880510d8f6d389a5876647575f"
98
+ },
99
+ "harness/planning/plan-evaluator": {
100
+ "path": ".pi/agents/harness/planning/plan-evaluator.md",
101
+ "sha256": "44fd52389d7e43dd5093653cba9694900561318ee5f00e3bc05c3ecef5d43621"
98
102
  },
99
- "harness/planning/planner": {
100
- "path": ".pi/agents/harness/planning/planner.md",
101
- "sha256": "570c501c976e26d79a36814787eb03fab6aa97f79cc895af319dc717648a2a65"
103
+ "harness/planning/review-integrator": {
104
+ "path": ".pi/agents/harness/planning/review-integrator.md",
105
+ "sha256": "d0e8214539d0a78b9e5add70e61dd4e4de36def64172cda18d9b70727e7600ca"
102
106
  },
103
107
  "harness/planning/scout-graphify": {
104
108
  "path": ".pi/agents/harness/planning/scout-graphify.md",
105
- "sha256": "76a66a3dc8bce60a91ed30ffdc683fb1eab0692006b0ee80fbdc67b11b374b61"
109
+ "sha256": "b59916a26afccfe105e29c0bd8637ac54275e8afef1c6cc88a58bd05b0325473"
106
110
  },
107
111
  "harness/planning/scout-semantic": {
108
112
  "path": ".pi/agents/harness/planning/scout-semantic.md",
109
- "sha256": "99aedca25fd81000d3bb532e0191ce9e1a87b84ab4039f089734f3a0d24ba44b"
113
+ "sha256": "47b7ea3e65b20a65e6d0ff11b6d5daff59b47a9ed618b8a3b6282f2eb0460572"
110
114
  },
111
115
  "harness/planning/scout-structure": {
112
116
  "path": ".pi/agents/harness/planning/scout-structure.md",
113
- "sha256": "83fd09e5eccd77b27d9de464d7e32536d9a762469e021b86b0ca665942bb40af"
117
+ "sha256": "e67b7cd75519e5ae36e1bb5f49ca158888c28d365465863aee50a9b2e8e5b7d7"
118
+ },
119
+ "harness/planning/sprint-contract-auditor": {
120
+ "path": ".pi/agents/harness/planning/sprint-contract-auditor.md",
121
+ "sha256": "f613a4fa937d76936fa01155d4e7956a81878f300100f99f6a78915b0af6f7c7"
122
+ },
123
+ "harness/planning/stack-researcher": {
124
+ "path": ".pi/agents/harness/planning/stack-researcher.md",
125
+ "sha256": "90e2ff1348f54bebc8c0392407bf1bb4d794c942fd8d6f342d80b191c945b34e"
114
126
  }
115
127
  }
116
128
  }
@@ -5,14 +5,14 @@
5
5
 
6
6
  ## Context
7
7
 
8
- Harness slash prompts duplicated logic already defined in `harness/*` agents. Commands did not invoke the `Agent` tool. Review docs told users to fork a new Pi session even though subagents already provide isolated context.
8
+ Harness slash prompts duplicated logic already defined in `harness/*` agents. The in-process `Agent` / `createAgentSession` stack was heavy and unstable. Review docs told users to fork a new Pi session even though subprocess subagents already provide isolation.
9
9
 
10
10
  ## Decision
11
11
 
12
12
  1. **Slash commands** (prompt templates) are orchestrators: spawn `harness/*` agents once, perform policy-gated writes, emit handoff blocks. Command identity is captured on Pi **`input`** as `harness-turn` (raw `/harness-*`), not from expanded prompt markdown.
13
13
  2. **Agents** perform multi-turn reads and emit structured JSON drafts. **Planning** (`harness/planning/*`) scouts and plan-adversary are read-only; parent orchestrator runs `ask_user`, `approve_plan`, and `create_plan` (see ADR 0033).
14
14
  3. **HarnessSpawnContext** is injected in `[HarnessRunContext]`; orchestrator copies it into spawn prompts. Subagents do not receive `[HarnessActivePlan]` injection.
15
- 4. **Review isolation** uses `Agent` spawn with `inherit_context: false`. `review-integrity` allows `Agent` / `get_subagent_result` for evaluator/adversary/tie-breaker.
15
+ 4. **Review isolation** uses native `subagent` (vendored pi-subagents: isolated `pi --mode json` subprocess). `review-integrity` allows `subagent` when `agent` is evaluator/adversary/tie-breaker; bridge blocks plan-phase mutating spawns and nested `subagent` in children.
16
16
  5. **Subagent policy** blocks mutating tools for read-only phase agents; `ask_user` bridged for evaluator/adversary/tie-breaker only (not planning scouts).
17
17
  6. **Parent** owns plan-phase `ask_user`, `approve_plan`, and `create_plan` per ADR 0033.
18
18
 
@@ -32,6 +32,7 @@ Harness slash prompts duplicated logic already defined in `harness/*` agents. Co
32
32
 
33
33
  - `.pi/prompts/harness-*.md`
34
34
  - `.pi/agents/harness/*.md`
35
- - `.pi/extensions/lib/harness-subagents/harness-subagent-policy.ts`
35
+ - `vendor/pi-subagents/src/subagents.ts`, `.pi/extensions/lib/harness-subagents-bridge.ts`
36
+ - `.pi/extensions/lib/harness-subagent-policy.ts`
36
37
  - `.pi/extensions/review-integrity.ts`
37
38
  - `.pi/lib/harness-agent-output.ts`
@@ -5,7 +5,7 @@
5
5
 
6
6
  ## Context
7
7
 
8
- `/harness-plan` delegated the full plan lifecycle to a single `harness/planner` subagent. Plans and approval UI were largely invisible in the parent transcript until `get_subagent_result`, and the orchestrator could not call `ask_user` / `approve_plan` / `create_plan` directly.
8
+ `/harness-plan` previously delegated the full plan lifecycle to a single `harness/planner` subagent. Plans and approval UI were largely invisible in the parent transcript until subprocess completion, and the orchestrator could not call `ask_user` / `approve_plan` / `create_plan` directly.
9
9
 
10
10
  ## Decision
11
11
 
@@ -0,0 +1,27 @@
1
+ # ADR-0035: Plan-phase Review Gate and YAML artifacts
2
+
3
+ ## Status
4
+
5
+ Accepted (2026-05-18)
6
+
7
+ ## Context
8
+
9
+ `/harness-plan` produced thin PlanPackets (scope + bullets). Post-execute adversarial review (`/harness-critic`) ran too late. Graphify corpus (Structured Planning, ADR-020, Generator–Evaluator) defines WBS, validation, and review gate before baseline.
10
+
11
+ ## Decision
12
+
13
+ 1. **PlanPacket 1.1.0** — required `execution_plan` (phases, work_items, sprint_contract, dag_validation).
14
+ 2. **YAML on disk** — `plan-packet.yaml`, `research-brief.yaml`, `run-context.yaml`, `artifacts/*.yaml`. JSON Schema unchanged; instances validated after YAML parse.
15
+ 3. **Review Gate agents** — `stack-researcher`, `execution-plan-author`, debate: `hypothesis-validator`, `plan-evaluator`, `plan-adversary`, `sprint-contract-auditor`, `review-integrator`.
16
+ 4. **Debate bus** — `debate_id=plan-<run_id>`, plan budget profile (4 rounds, 12k cap), plan-phase consensus prerequisites.
17
+ 5. **No legacy JSON** plan paths; no pre-debate standalone `hypothesis-eval`.
18
+
19
+ ## Consequences
20
+
21
+ - Positive: PM-grade plans, deterministic DAG gate, blind hypothesis eval in debate R1.
22
+ - Negative: Higher spawn/token cost; `harness-verify` and smoke fixtures must use `.yaml`.
23
+
24
+ ## References
25
+
26
+ - [ADR-0033](0033-parent-orchestrated-planning.md), [ADR-0034](0034-darwin-plan-research-pipeline.md)
27
+ - `raw/decisions/adr-020.md`, `raw/modules/structured-planning.md`
@@ -0,0 +1,25 @@
1
+ schema_version: "1.0.0"
2
+ round_index: 1
3
+ debate_round_focus: spec
4
+ round_summary: Spec round passed for fixture
5
+ validation_summary: All spec checks pass
6
+ adversary_summary: No blocking adversarial findings
7
+ disputes: []
8
+ recommended_packet_patches: []
9
+ review_gate_ready: true
10
+ participants:
11
+ - PlanEvaluatorAgent
12
+ - PlanAdversaryAgent
13
+ - HypothesisValidatorAgent
14
+ - ReviewIntegratorAgent
15
+ claims:
16
+ - spec validation complete
17
+ rebuttals: []
18
+ evidence_refs: []
19
+ token_usage:
20
+ per_agent:
21
+ PlanEvaluatorAgent: 100
22
+ PlanAdversaryAgent: 100
23
+ ReviewIntegratorAgent: 50
24
+ round_total: 250
25
+ consensus_delta: 0.1
@@ -0,0 +1,26 @@
1
+ schema_version: "1.0.0"
2
+ round_index: 4
3
+ debate_round_focus: quality
4
+ round_summary: Quality and sprint contract round passed
5
+ validation_summary: Sprint contract complete
6
+ adversary_summary: No gaps
7
+ disputes: []
8
+ recommended_packet_patches: []
9
+ review_gate_ready: true
10
+ participants:
11
+ - PlanEvaluatorAgent
12
+ - PlanAdversaryAgent
13
+ - SprintContractAuditorAgent
14
+ - ReviewIntegratorAgent
15
+ claims:
16
+ - review gate ready
17
+ rebuttals: []
18
+ evidence_refs: []
19
+ token_usage:
20
+ per_agent:
21
+ PlanEvaluatorAgent: 120
22
+ PlanAdversaryAgent: 110
23
+ SprintContractAuditorAgent: 90
24
+ ReviewIntegratorAgent: 60
25
+ round_total: 380
26
+ consensus_delta: 0.15
@@ -0,0 +1,5 @@
1
+ schema_version: "1.0.0"
2
+ round_index: 4
3
+ gaps: []
4
+ recommendation: proceed
5
+ human_summary: Sprint contract satisfies ADR-020 for fixture
@@ -0,0 +1,196 @@
1
+ schema_version: "1.0.0"
2
+ contract_version: "1.1.0"
3
+ plan_id: plan-smoke-fixture-001
4
+ task_id: task-smoke-001
5
+ scope: Smoke fixture for plan-phase harness validation with execution_plan and debate artifacts.
6
+ assumptions:
7
+ - Fixture only; no live agent run
8
+ risk_level: med
9
+ acceptance_checks:
10
+ - id: AC-1
11
+ description: DAG validation passes
12
+ - id: AC-2
13
+ description: Four debate rounds recorded
14
+ - id: AC-3
15
+ description: Stack brief present in research-brief
16
+ - id: AC-4
17
+ description: Sprint contract complete
18
+ - id: AC-5
19
+ description: plan-review.md renders
20
+ rollback_plan:
21
+ revert_commit_ready: true
22
+ rollback_artifacts:
23
+ revert_command: git revert HEAD
24
+ revert_branch: main
25
+ patch_bundle: .pi/harness/runs/smoke-fixture/patch.bundle
26
+ execution_plan:
27
+ schema_version: "1.0.0"
28
+ phases:
29
+ - phase_id: P1
30
+ name: Foundation
31
+ objective: Establish baseline and verify harness wiring
32
+ entry_criteria:
33
+ - Fixture loaded
34
+ exit_criteria:
35
+ - AC-1 satisfied
36
+ milestone: M1-baseline
37
+ work_item_ids: [WI-1, WI-2, WI-3]
38
+ - phase_id: P2
39
+ name: Build
40
+ objective: Implement core changes
41
+ entry_criteria:
42
+ - M1-baseline complete
43
+ exit_criteria:
44
+ - AC-2 satisfied
45
+ milestone: M2-build
46
+ work_item_ids: [WI-4, WI-5, WI-6]
47
+ - phase_id: P3
48
+ name: Verify
49
+ objective: Quality gate and documentation
50
+ entry_criteria:
51
+ - M2-build complete
52
+ exit_criteria:
53
+ - AC-5 satisfied
54
+ milestone: M3-ship
55
+ work_item_ids: [WI-7, WI-8]
56
+ work_items:
57
+ - work_item_id: WI-1
58
+ phase_id: P1
59
+ title: Load fixture packet
60
+ description: Read plan-packet.yaml from fixture directory
61
+ depends_on: []
62
+ files:
63
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml
64
+ parallel_safe: true
65
+ done_criteria:
66
+ type: manual
67
+ spec: Fixture packet readable
68
+ acceptance_check_ids: [AC-1]
69
+ - work_item_id: WI-2
70
+ phase_id: P1
71
+ title: Run DAG validator
72
+ description: Execute validate-plan-dag.mjs
73
+ depends_on: [WI-1]
74
+ files:
75
+ - .pi/scripts/validate-plan-dag.mjs
76
+ parallel_safe: false
77
+ done_criteria:
78
+ type: command
79
+ spec: node .pi/scripts/validate-plan-dag.mjs --packet plan-packet.yaml
80
+ acceptance_check_ids: [AC-1]
81
+ - work_item_id: WI-3
82
+ phase_id: P1
83
+ title: Lint harness-yaml
84
+ description: Ensure YAML helpers parse fixture
85
+ depends_on: [WI-1]
86
+ files:
87
+ - .pi/lib/harness-yaml.ts
88
+ parallel_safe: true
89
+ done_criteria:
90
+ type: lint
91
+ spec: npm test
92
+ acceptance_check_ids: [AC-1]
93
+ - work_item_id: WI-4
94
+ phase_id: P2
95
+ title: Debate round 1-2 artifacts
96
+ description: Validate review-round YAML
97
+ depends_on: [WI-2]
98
+ files:
99
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml
100
+ parallel_safe: false
101
+ done_criteria:
102
+ type: artifact
103
+ spec: artifacts/review-round-r1.yaml exists
104
+ acceptance_check_ids: [AC-2]
105
+ - work_item_id: WI-5
106
+ phase_id: P2
107
+ title: Debate round 3-4 artifacts
108
+ description: Validate final review round
109
+ depends_on: [WI-4]
110
+ files:
111
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml
112
+ parallel_safe: false
113
+ done_criteria:
114
+ type: artifact
115
+ spec: artifacts/review-round-r4.yaml exists
116
+ acceptance_check_ids: [AC-2]
117
+ - work_item_id: WI-6
118
+ phase_id: P2
119
+ title: Stack research merge
120
+ description: research-brief includes stack section
121
+ depends_on: [WI-2]
122
+ files: []
123
+ non_code: true
124
+ parallel_safe: true
125
+ done_criteria:
126
+ type: manual
127
+ spec: research-brief.yaml contains stack key
128
+ acceptance_check_ids: [AC-3]
129
+ - work_item_id: WI-7
130
+ phase_id: P3
131
+ title: Sprint contract audit
132
+ description: R4 sprint audit artifact
133
+ depends_on: [WI-5]
134
+ files:
135
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml
136
+ parallel_safe: false
137
+ done_criteria:
138
+ type: artifact
139
+ spec: sprint-audit-r4.yaml present
140
+ acceptance_check_ids: [AC-4]
141
+ - work_item_id: WI-8
142
+ phase_id: P3
143
+ title: Render plan-review
144
+ description: Human-readable plan review markdown
145
+ depends_on: [WI-7]
146
+ files:
147
+ - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md
148
+ parallel_safe: false
149
+ done_criteria:
150
+ type: manual
151
+ spec: plan-review.md non-empty
152
+ acceptance_check_ids: [AC-5]
153
+ sprint_contract:
154
+ in_scope:
155
+ - Fixture validation only
156
+ out_of_scope:
157
+ - Production deploy
158
+ definition_of_done: All smoke checks green
159
+ assumptions:
160
+ - CI environment has node
161
+ external_dependencies: []
162
+ wbs_dictionary:
163
+ - work_item_id: WI-1
164
+ deliverable: Fixture packet loaded
165
+ owner_role: executor
166
+ inputs: []
167
+ outputs: [parsed packet]
168
+ risk_register:
169
+ - risk_id: R1
170
+ description: DAG validator false negative
171
+ likelihood: low
172
+ impact: high
173
+ mitigation: Unit tests on validate-plan-dag.mjs
174
+ linked_work_item_ids: [WI-2]
175
+ - risk_id: R2
176
+ description: Debate cap misconfiguration
177
+ likelihood: med
178
+ impact: med
179
+ mitigation: debate-orchestrator plan profile tests
180
+ linked_work_item_ids: [WI-4]
181
+ - risk_id: R3
182
+ description: YAML parse drift
183
+ likelihood: low
184
+ impact: med
185
+ mitigation: harness-yaml strict parse
186
+ linked_work_item_ids: [WI-3]
187
+ schedule_metadata:
188
+ critical_path_work_item_ids: [WI-1, WI-2, WI-4, WI-5, WI-7, WI-8]
189
+ parallel_groups:
190
+ - [WI-1, WI-3]
191
+ schedule_baseline_note: Fixture topological order; no calendar dates
192
+ dag_validation:
193
+ status: pass
194
+ topological_order: [WI-1, WI-2, WI-3, WI-4, WI-5, WI-6, WI-7, WI-8]
195
+ cycles: []
196
+ conflicts: []
@@ -0,0 +1,14 @@
1
+ # Plan review (fixture)
2
+
3
+ plan_id: plan-smoke-fixture-001
4
+
5
+ ## Execution plan
6
+
7
+ Phases: P1 Foundation → P2 Build → P3 Verify
8
+
9
+ Critical path: WI-1 → WI-2 → WI-4 → WI-5 → WI-7 → WI-8
10
+
11
+ ## Debate
12
+
13
+ - Round 1 (spec): review_gate_ready
14
+ - Round 4 (quality): review_gate_ready
@@ -0,0 +1,32 @@
1
+ decomposition:
2
+ schema_version: "1.0.0"
3
+ problem_restatement: Validate plan-phase YAML and debate pipeline
4
+ hypothesis:
5
+ schema_version: "1.0.0"
6
+ primary:
7
+ claim: Fixture-driven smoke covers DAG and debate
8
+ mechanism: Static artifacts plus validate-plan-dag.mjs
9
+ prediction: CI passes without live agents
10
+ experiment: Run smoke-harness-plan.mjs --fixture
11
+ stack:
12
+ schema_version: "1.0.0"
13
+ problem_framing: Node harness tooling
14
+ constraints: []
15
+ options:
16
+ - name: extend current stack
17
+ category: brownfield
18
+ fit_summary: Use existing ultimate-pi harness
19
+ tradeoffs:
20
+ pros: [No new deps]
21
+ cons: []
22
+ risks: []
23
+ evidence_refs: []
24
+ recommendation_rank: 1
25
+ recommended_primary: extend current stack
26
+ rationale: Fixture validates in-repo harness
27
+ eval:
28
+ schema_version: "1.0.0"
29
+ revision_recommended: false
30
+ relevance:
31
+ passes: true
32
+ rationale: Hypothesis matches smoke task
@@ -5,7 +5,7 @@
5
5
  "project_root": "/tmp/ultimate-pi-smoke",
6
6
  "phase": "plan",
7
7
  "plan_id": null,
8
- "plan_packet_path": "/tmp/ultimate-pi-smoke/.pi/harness/runs/smoke-session-1/plan-packet.json",
8
+ "plan_packet_path": "/tmp/ultimate-pi-smoke/.pi/harness/runs/smoke-session-1/plan-packet.yaml",
9
9
  "plan_ready": false,
10
10
  "task_summary": "smoke task",
11
11
  "status": "active",