ultimate-pi 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-debate-plan/SKILL.md +41 -61
- package/.agents/skills/harness-governor/SKILL.md +11 -0
- package/.agents/skills/harness-orchestration/SKILL.md +5 -3
- package/.agents/skills/harness-plan/SKILL.md +11 -9
- package/.pi/agents/harness/adversary.md +1 -1
- package/.pi/agents/harness/evaluator.md +1 -1
- package/.pi/agents/harness/executor.md +1 -1
- package/.pi/agents/harness/incident-recorder.md +1 -1
- package/.pi/agents/harness/meta-optimizer.md +1 -1
- package/.pi/agents/harness/planning/decompose.md +8 -35
- package/.pi/agents/harness/planning/execution-plan-author.md +27 -15
- package/.pi/agents/harness/planning/hypothesis-validator.md +23 -6
- package/.pi/agents/harness/planning/hypothesis.md +4 -27
- package/.pi/agents/harness/planning/implementation-researcher.md +43 -0
- package/.pi/agents/harness/planning/plan-adversary.md +20 -5
- package/.pi/agents/harness/planning/plan-evaluator.md +28 -6
- package/.pi/agents/harness/planning/review-integrator.md +23 -10
- package/.pi/agents/harness/planning/scout-graphify.md +4 -23
- package/.pi/agents/harness/planning/scout-semantic.md +3 -18
- package/.pi/agents/harness/planning/scout-structure.md +3 -18
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +22 -6
- package/.pi/agents/harness/planning/stack-researcher.md +21 -11
- package/.pi/agents/harness/tie-breaker.md +1 -1
- package/.pi/agents/harness/trace-librarian.md +1 -1
- package/.pi/extensions/budget-guard.ts +33 -19
- package/.pi/extensions/harness-debate-tools.ts +280 -19
- package/.pi/extensions/harness-live-widget.ts +39 -159
- package/.pi/extensions/harness-plan-approval.ts +47 -5
- package/.pi/extensions/harness-run-context.ts +96 -2
- package/.pi/extensions/harness-subagent-submit.ts +195 -0
- package/.pi/extensions/lib/debate-bus-core.ts +108 -17
- package/.pi/extensions/lib/debate-bus-state.ts +6 -0
- package/.pi/extensions/lib/harness-subagent-policy.ts +45 -0
- package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +82 -0
- package/.pi/extensions/lib/harness-subagent-submit-registry.ts +172 -0
- package/.pi/extensions/lib/harness-subagents-bridge.ts +42 -0
- package/.pi/extensions/lib/plan-approval/plan-review.ts +56 -0
- package/.pi/extensions/lib/plan-approval/types.ts +1 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +214 -0
- package/.pi/extensions/lib/plan-debate-focus.ts +151 -0
- package/.pi/extensions/lib/plan-debate-gate.ts +88 -34
- package/.pi/extensions/lib/plan-debate-lane.ts +15 -0
- package/.pi/extensions/lib/plan-debate-lanes.ts +44 -0
- package/.pi/extensions/lib/plan-debate-round-status.ts +63 -20
- package/.pi/extensions/lib/plan-messenger.ts +93 -17
- package/.pi/extensions/policy-gate.ts +1 -1
- package/.pi/harness/README.md +1 -1
- package/.pi/harness/agents.manifest.json +25 -21
- package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +1 -3
- package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +13 -5
- package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +51 -0
- package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md +31 -0
- package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md +23 -0
- package/.pi/harness/docs/adrs/README.md +4 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r1.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r2.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/research-brief.yaml +62 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r2.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r3.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +29 -0
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +97 -16
- package/.pi/harness/specs/harness-executor-handoff.schema.json +19 -0
- package/.pi/harness/specs/harness-human-required.schema.json +16 -0
- package/.pi/harness/specs/plan-implementation-research-brief.schema.json +128 -0
- package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
- package/.pi/harness/specs/plan-scout-findings.schema.json +19 -0
- package/.pi/harness/specs/round-result.schema.json +15 -2
- package/.pi/lib/harness-agent-output.ts +45 -0
- package/.pi/lib/harness-budget-enforce.ts +18 -0
- package/.pi/lib/harness-schema-validate.ts +89 -0
- package/.pi/lib/harness-spawn-parse.ts +86 -0
- package/.pi/lib/harness-subagent-submit-path.ts +41 -0
- package/.pi/lib/harness-ui-state.ts +107 -2
- package/.pi/prompts/harness-auto.md +2 -2
- package/.pi/prompts/harness-plan.md +94 -42
- package/.pi/prompts/harness-run.md +2 -2
- package/.pi/prompts/planning-rubrics.md +31 -0
- package/.pi/scripts/harness-verify.mjs +2 -0
- package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
- package/CHANGELOG.md +21 -0
- package/package.json +4 -2
- package/vendor/pi-subagents/src/subagents.ts +29 -3
|
@@ -11,6 +11,10 @@ import {
|
|
|
11
11
|
PLAN_DEBATE_PARTICIPANTS,
|
|
12
12
|
POST_EXECUTE_DEBATE_PARTICIPANTS,
|
|
13
13
|
} from "../../lib/debate-orchestrator-types.js";
|
|
14
|
+
import {
|
|
15
|
+
isHarnessBudgetEnforceOn,
|
|
16
|
+
shouldEmitBlockingBudgetExhausted,
|
|
17
|
+
} from "../../lib/harness-budget-enforce.js";
|
|
14
18
|
import {
|
|
15
19
|
type DebateState,
|
|
16
20
|
getDebateState,
|
|
@@ -19,6 +23,17 @@ import {
|
|
|
19
23
|
setDebateState,
|
|
20
24
|
setLastSeverity,
|
|
21
25
|
} from "./debate-bus-state.js";
|
|
26
|
+
import {
|
|
27
|
+
type DebateProfile,
|
|
28
|
+
PLAN_BUDGET_LIGHT,
|
|
29
|
+
PLAN_BUDGET_STANDARD,
|
|
30
|
+
} from "./plan-debate-eligibility.js";
|
|
31
|
+
import {
|
|
32
|
+
getPlanFocusCoverage,
|
|
33
|
+
PLAN_FOCUS_AREAS,
|
|
34
|
+
type PlanDebateFocus,
|
|
35
|
+
planDebateOutcomeComplete,
|
|
36
|
+
} from "./plan-debate-focus.js";
|
|
22
37
|
|
|
23
38
|
export type PolicyDecision =
|
|
24
39
|
| "pass"
|
|
@@ -64,13 +79,10 @@ const THRESHOLDS = {
|
|
|
64
79
|
architecture: 0.8,
|
|
65
80
|
test_integrity: 0.8,
|
|
66
81
|
};
|
|
67
|
-
const HARD_STOP_DEBATE_CAPS =
|
|
82
|
+
const HARD_STOP_DEBATE_CAPS =
|
|
83
|
+
process.env.HARNESS_DEBATE_HARD_STOP === "true" && isHarnessBudgetEnforceOn();
|
|
68
84
|
|
|
69
|
-
const PLAN_BUDGET =
|
|
70
|
-
max_rounds: 4,
|
|
71
|
-
round_token_cap: 2000,
|
|
72
|
-
debate_global_cap: 12000,
|
|
73
|
-
} as const;
|
|
85
|
+
const PLAN_BUDGET = PLAN_BUDGET_STANDARD;
|
|
74
86
|
|
|
75
87
|
const AGGRESSIVE_BUDGET = {
|
|
76
88
|
max_rounds: 6,
|
|
@@ -88,16 +100,48 @@ function toSafeFloat(value: unknown): number {
|
|
|
88
100
|
return Math.max(0, Math.min(1, n));
|
|
89
101
|
}
|
|
90
102
|
|
|
91
|
-
export function capsForDebate(
|
|
103
|
+
export function capsForDebate(
|
|
104
|
+
debateId: string,
|
|
105
|
+
profile?: DebateProfile,
|
|
106
|
+
): {
|
|
92
107
|
name: "plan" | "aggressive";
|
|
108
|
+
min_focus_rounds: number;
|
|
93
109
|
max_rounds: number;
|
|
110
|
+
max_exchanges_per_round: number;
|
|
94
111
|
round_token_cap: number;
|
|
95
112
|
debate_global_cap: number;
|
|
96
113
|
} {
|
|
97
114
|
if (isPlanDebateId(debateId)) {
|
|
98
|
-
|
|
115
|
+
const active = profile ?? getDebateState()?.debate_profile ?? "standard";
|
|
116
|
+
const budget = active === "light" ? PLAN_BUDGET_LIGHT : PLAN_BUDGET;
|
|
117
|
+
const caps = { name: "plan" as const, ...budget };
|
|
118
|
+
if (!isHarnessBudgetEnforceOn()) {
|
|
119
|
+
return {
|
|
120
|
+
...caps,
|
|
121
|
+
max_rounds: 999,
|
|
122
|
+
max_exchanges_per_round: 99,
|
|
123
|
+
round_token_cap: caps.round_token_cap * 100,
|
|
124
|
+
debate_global_cap: caps.debate_global_cap * 100,
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
return caps;
|
|
99
128
|
}
|
|
100
|
-
|
|
129
|
+
const caps = {
|
|
130
|
+
name: "aggressive" as const,
|
|
131
|
+
min_focus_rounds: 1,
|
|
132
|
+
max_exchanges_per_round: 1,
|
|
133
|
+
...AGGRESSIVE_BUDGET,
|
|
134
|
+
};
|
|
135
|
+
if (!isHarnessBudgetEnforceOn()) {
|
|
136
|
+
return {
|
|
137
|
+
...caps,
|
|
138
|
+
max_rounds: 999,
|
|
139
|
+
max_exchanges_per_round: 99,
|
|
140
|
+
round_token_cap: caps.round_token_cap * 100,
|
|
141
|
+
debate_global_cap: caps.debate_global_cap * 100,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
return caps;
|
|
101
145
|
}
|
|
102
146
|
|
|
103
147
|
function participantAllowed(
|
|
@@ -161,23 +205,40 @@ export interface DebateBusHooks {
|
|
|
161
205
|
appendEntry: (customType: string, data: unknown) => void;
|
|
162
206
|
}
|
|
163
207
|
|
|
208
|
+
export interface OpenDebateBusOptions {
|
|
209
|
+
debate_profile?: DebateProfile;
|
|
210
|
+
required_focuses?: DebateState["required_focuses"];
|
|
211
|
+
}
|
|
212
|
+
|
|
164
213
|
export async function openDebateBus(
|
|
165
214
|
runId: string,
|
|
166
215
|
debateId: string,
|
|
167
216
|
hooks: DebateBusHooks,
|
|
217
|
+
opts?: OpenDebateBusOptions,
|
|
168
218
|
): Promise<DebateState> {
|
|
169
|
-
const
|
|
219
|
+
const profile = opts?.debate_profile ?? "standard";
|
|
220
|
+
const caps = capsForDebate(debateId, profile);
|
|
170
221
|
const debate_phase = debatePhaseFromId(debateId);
|
|
222
|
+
const defaultFocuses: PlanDebateFocus[] =
|
|
223
|
+
profile === "light" ? ["spec", "quality"] : [...PLAN_FOCUS_AREAS];
|
|
224
|
+
const required_focuses =
|
|
225
|
+
opts?.required_focuses && opts.required_focuses.length > 0
|
|
226
|
+
? opts.required_focuses
|
|
227
|
+
: defaultFocuses;
|
|
171
228
|
const next: DebateState = {
|
|
172
229
|
run_id: runId,
|
|
173
230
|
debate_id: debateId,
|
|
174
231
|
debate_phase,
|
|
175
232
|
round_count: 0,
|
|
176
233
|
budget_used: 0,
|
|
234
|
+
min_focus_rounds: caps.min_focus_rounds,
|
|
177
235
|
max_rounds: caps.max_rounds,
|
|
236
|
+
max_exchanges_per_round: caps.max_exchanges_per_round,
|
|
178
237
|
round_token_cap: caps.round_token_cap,
|
|
179
238
|
debate_global_cap: caps.debate_global_cap,
|
|
180
239
|
last_review_gate_ready: false,
|
|
240
|
+
debate_profile: profile,
|
|
241
|
+
required_focuses,
|
|
181
242
|
};
|
|
182
243
|
setDebateState(next);
|
|
183
244
|
setLastSeverity({
|
|
@@ -199,6 +260,8 @@ export async function openDebateBus(
|
|
|
199
260
|
opened_at: nowIso(),
|
|
200
261
|
debate_phase,
|
|
201
262
|
budget_profile: caps.name,
|
|
263
|
+
debate_profile: profile,
|
|
264
|
+
required_focuses,
|
|
202
265
|
},
|
|
203
266
|
};
|
|
204
267
|
hooks.appendEntry("harness-debate-envelope", envelope);
|
|
@@ -230,7 +293,9 @@ async function emitBudgetExhausted(
|
|
|
230
293
|
budget_used: state.budget_used,
|
|
231
294
|
exhaustion_reason: reason,
|
|
232
295
|
caps: {
|
|
296
|
+
min_focus_rounds: state.min_focus_rounds,
|
|
233
297
|
max_rounds: state.max_rounds,
|
|
298
|
+
max_exchanges_per_round: state.max_exchanges_per_round,
|
|
234
299
|
round_token_cap: state.round_token_cap,
|
|
235
300
|
debate_global_cap: state.debate_global_cap,
|
|
236
301
|
},
|
|
@@ -240,7 +305,19 @@ async function emitBudgetExhausted(
|
|
|
240
305
|
},
|
|
241
306
|
};
|
|
242
307
|
hooks.appendEntry("harness-debate-envelope", envelope);
|
|
243
|
-
|
|
308
|
+
if (shouldEmitBlockingBudgetExhausted()) {
|
|
309
|
+
hooks.appendEntry("harness-budget-exhausted", envelope.payload);
|
|
310
|
+
} else {
|
|
311
|
+
const telemetryPayload = {
|
|
312
|
+
...(envelope.payload as Record<string, unknown>),
|
|
313
|
+
telemetry_only: true,
|
|
314
|
+
};
|
|
315
|
+
hooks.appendEntry("harness-debate-budget-telemetry", telemetryPayload);
|
|
316
|
+
hooks.appendEntry("harness-budget-telemetry", {
|
|
317
|
+
...telemetryPayload,
|
|
318
|
+
source: "debate-bus",
|
|
319
|
+
});
|
|
320
|
+
}
|
|
244
321
|
await writeDebateEvent(state.debate_id, envelope);
|
|
245
322
|
}
|
|
246
323
|
|
|
@@ -327,7 +404,9 @@ export async function acceptDebateRound(
|
|
|
327
404
|
token_usage: envelope.payload.token_usage,
|
|
328
405
|
budget_profile: {
|
|
329
406
|
name: profileName,
|
|
407
|
+
min_focus_rounds: state.min_focus_rounds,
|
|
330
408
|
max_rounds: state.max_rounds,
|
|
409
|
+
max_exchanges_per_round: state.max_exchanges_per_round,
|
|
331
410
|
round_token_cap: state.round_token_cap,
|
|
332
411
|
debate_global_cap: state.debate_global_cap,
|
|
333
412
|
},
|
|
@@ -363,12 +442,24 @@ export async function finalizeDebateConsensus(
|
|
|
363
442
|
);
|
|
364
443
|
const decision = decidePolicy(lastSeverity, evidenceScore);
|
|
365
444
|
const planPhase = state.debate_phase === "plan";
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
445
|
+
let evaluatorPassed = true;
|
|
446
|
+
let debateComplete = state.round_count > 0;
|
|
447
|
+
if (planPhase) {
|
|
448
|
+
const runDir = join(process.cwd(), ".pi", "harness", "runs", state.run_id);
|
|
449
|
+
const requiredFocuses =
|
|
450
|
+
state.required_focuses && state.required_focuses.length > 0
|
|
451
|
+
? state.required_focuses
|
|
452
|
+
: undefined;
|
|
453
|
+
const coverage = await getPlanFocusCoverage(runDir, {
|
|
454
|
+
requiredFocuses,
|
|
455
|
+
});
|
|
456
|
+
evaluatorPassed =
|
|
457
|
+
coverage.last_review_gate_ready || Boolean(state.last_review_gate_ready);
|
|
458
|
+
debateComplete = planDebateOutcomeComplete(coverage, {
|
|
459
|
+
requiredFocuses,
|
|
460
|
+
minRoundIndex: state.min_focus_rounds,
|
|
461
|
+
});
|
|
462
|
+
}
|
|
372
463
|
|
|
373
464
|
const consensus = {
|
|
374
465
|
schema_version: "1.0.0",
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
import type { DebateParticipant } from "../../lib/debate-orchestrator-types.js";
|
|
6
|
+
import type { DebateProfile } from "./plan-debate-eligibility.js";
|
|
7
|
+
import type { PlanDebateFocus } from "./plan-debate-focus.js";
|
|
6
8
|
|
|
7
9
|
export type DebatePhase = "plan" | "post_execute";
|
|
8
10
|
|
|
@@ -12,10 +14,14 @@ export interface DebateState {
|
|
|
12
14
|
debate_phase: DebatePhase;
|
|
13
15
|
round_count: number;
|
|
14
16
|
budget_used: number;
|
|
17
|
+
min_focus_rounds: number;
|
|
15
18
|
max_rounds: number;
|
|
19
|
+
max_exchanges_per_round: number;
|
|
16
20
|
round_token_cap: number;
|
|
17
21
|
debate_global_cap: number;
|
|
18
22
|
last_review_gate_ready?: boolean;
|
|
23
|
+
debate_profile?: DebateProfile;
|
|
24
|
+
required_focuses?: PlanDebateFocus[];
|
|
19
25
|
}
|
|
20
26
|
|
|
21
27
|
export interface SeverityScores {
|
|
@@ -2,6 +2,10 @@
|
|
|
2
2
|
* Per-agent tool policy for harness/* subagents (defense in depth with frontmatter).
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
import {
|
|
6
|
+
isSubmitToolName,
|
|
7
|
+
SUBMIT_TOOLS_BY_AGENT,
|
|
8
|
+
} from "./harness-subagent-submit-registry.js";
|
|
5
9
|
import {
|
|
6
10
|
evaluateSubagentToolCall,
|
|
7
11
|
type ToolCallDecision,
|
|
@@ -107,6 +111,45 @@ export function evaluateHarnessSubagentToolCall(
|
|
|
107
111
|
}
|
|
108
112
|
|
|
109
113
|
if (!isHarnessPackageAgent(agentType)) {
|
|
114
|
+
if (
|
|
115
|
+
isSubmitToolName(toolName) &&
|
|
116
|
+
process.env.PI_HARNESS_SUBPROCESS !== "1"
|
|
117
|
+
) {
|
|
118
|
+
return {
|
|
119
|
+
action: "block",
|
|
120
|
+
reason:
|
|
121
|
+
"harness-subagent-policy: submit_* tools are subprocess-only; parent orchestrator must use harness_artifact_ready and write_harness_yaml for merges.",
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
return { action: "allow" };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (isSubmitToolName(toolName)) {
|
|
128
|
+
if (process.env.PI_HARNESS_SUBPROCESS !== "1") {
|
|
129
|
+
return {
|
|
130
|
+
action: "block",
|
|
131
|
+
reason:
|
|
132
|
+
"harness-subagent-policy: submit_* tools are not available in the parent harness session.",
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
if (toolName === "submit_human_required") {
|
|
136
|
+
const kind = classifyHarnessAgent(agentType);
|
|
137
|
+
if (kind === "executor") {
|
|
138
|
+
return {
|
|
139
|
+
action: "block",
|
|
140
|
+
reason:
|
|
141
|
+
"submit_human_required is not available for harness/executor.",
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
return { action: "allow" };
|
|
145
|
+
}
|
|
146
|
+
const allowed = SUBMIT_TOOLS_BY_AGENT[agentType];
|
|
147
|
+
if (!allowed?.has(toolName)) {
|
|
148
|
+
return {
|
|
149
|
+
action: "block",
|
|
150
|
+
reason: `harness-subagent-policy: ${toolName} is not allowed for ${agentType}.`,
|
|
151
|
+
};
|
|
152
|
+
}
|
|
110
153
|
return { action: "allow" };
|
|
111
154
|
}
|
|
112
155
|
|
|
@@ -153,6 +196,8 @@ export function evaluateHarnessSubagentToolCall(
|
|
|
153
196
|
return { action: "allow" };
|
|
154
197
|
}
|
|
155
198
|
|
|
199
|
+
export { isSubmitToolName } from "./harness-subagent-submit-registry.js";
|
|
200
|
+
|
|
156
201
|
export function harnessSubagentPhaseHint(agentType: string): string | null {
|
|
157
202
|
if (isHarnessPlanningAgent(agentType)) {
|
|
158
203
|
return "plan";
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared write pipeline for harness subagent submit tools.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { mkdir } from "node:fs/promises";
|
|
6
|
+
import { dirname, join } from "node:path";
|
|
7
|
+
import { validateAgainstHarnessSchema } from "../../lib/harness-schema-validate.js";
|
|
8
|
+
import { resolveGuardedRunDir } from "../../lib/harness-subagent-submit-path.js";
|
|
9
|
+
import { writeYamlFile } from "../../lib/harness-yaml.js";
|
|
10
|
+
import {
|
|
11
|
+
resolveArtifactRelPath,
|
|
12
|
+
type SubmitToolSpec,
|
|
13
|
+
} from "./harness-subagent-submit-registry.js";
|
|
14
|
+
import {
|
|
15
|
+
type ApplyDebateLaneResult,
|
|
16
|
+
applyDebateLaneFromDoc,
|
|
17
|
+
} from "./plan-debate-lane.js";
|
|
18
|
+
|
|
19
|
+
export interface SubmitPipelineResult {
|
|
20
|
+
ok: boolean;
|
|
21
|
+
artifact_path?: string;
|
|
22
|
+
validation_errors?: string[];
|
|
23
|
+
lane_result?: ApplyDebateLaneResult;
|
|
24
|
+
human_required?: boolean;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export async function executeSubmitPipeline(opts: {
|
|
28
|
+
projectRoot: string;
|
|
29
|
+
specsDir: string;
|
|
30
|
+
spec: SubmitToolSpec;
|
|
31
|
+
agentId: string;
|
|
32
|
+
document: Record<string, unknown>;
|
|
33
|
+
runId: string;
|
|
34
|
+
runDirEnv?: string;
|
|
35
|
+
}): Promise<SubmitPipelineResult> {
|
|
36
|
+
const runResolved = await resolveGuardedRunDir({
|
|
37
|
+
projectRoot: opts.projectRoot,
|
|
38
|
+
runId: opts.runId,
|
|
39
|
+
runDirEnv: opts.runDirEnv,
|
|
40
|
+
});
|
|
41
|
+
if (!runResolved.ok) {
|
|
42
|
+
return { ok: false, validation_errors: [runResolved.error] };
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const validation = await validateAgainstHarnessSchema(
|
|
46
|
+
opts.specsDir,
|
|
47
|
+
opts.spec.schemaFile,
|
|
48
|
+
opts.document,
|
|
49
|
+
);
|
|
50
|
+
if (!validation.ok) {
|
|
51
|
+
return { ok: false, validation_errors: validation.errors };
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const relPath = resolveArtifactRelPath(opts.spec, opts.document);
|
|
55
|
+
const absPath = join(runResolved.runDir, relPath);
|
|
56
|
+
await mkdir(dirname(absPath), { recursive: true });
|
|
57
|
+
await writeYamlFile(absPath, opts.document);
|
|
58
|
+
|
|
59
|
+
let laneResult: ApplyDebateLaneResult | undefined;
|
|
60
|
+
if (opts.spec.debateLane) {
|
|
61
|
+
laneResult = await applyDebateLaneFromDoc({
|
|
62
|
+
runDir: runResolved.runDir,
|
|
63
|
+
lane: opts.spec.debateLane,
|
|
64
|
+
doc: opts.document,
|
|
65
|
+
});
|
|
66
|
+
if (!laneResult.ok) {
|
|
67
|
+
return {
|
|
68
|
+
ok: false,
|
|
69
|
+
artifact_path: relPath,
|
|
70
|
+
validation_errors: laneResult.errors,
|
|
71
|
+
lane_result: laneResult,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return {
|
|
77
|
+
ok: true,
|
|
78
|
+
artifact_path: relPath,
|
|
79
|
+
lane_result: laneResult,
|
|
80
|
+
human_required: opts.spec.humanRequired === true,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Registry: submit tool name → agent allowlist, schema, artifact path.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { DebateLaneKind } from "./plan-debate-lane.js";
|
|
6
|
+
|
|
7
|
+
export interface SubmitToolSpec {
|
|
8
|
+
toolName: string;
|
|
9
|
+
agents: readonly string[];
|
|
10
|
+
schemaFile: string;
|
|
11
|
+
artifactPath: string | ((doc: Record<string, unknown>) => string);
|
|
12
|
+
debateLane?: DebateLaneKind;
|
|
13
|
+
humanRequired?: boolean;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function roundPath(prefix: string, doc: Record<string, unknown>): string {
|
|
17
|
+
const r =
|
|
18
|
+
typeof doc.round_index === "number"
|
|
19
|
+
? doc.round_index
|
|
20
|
+
: Number(doc.round_index ?? 1);
|
|
21
|
+
return `artifacts/${prefix}-r${r}.yaml`;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export const SUBMIT_TOOL_SPECS: readonly SubmitToolSpec[] = [
|
|
25
|
+
{
|
|
26
|
+
toolName: "submit_scout_findings",
|
|
27
|
+
agents: [
|
|
28
|
+
"harness/planning/scout-graphify",
|
|
29
|
+
"harness/planning/scout-structure",
|
|
30
|
+
"harness/planning/scout-semantic",
|
|
31
|
+
],
|
|
32
|
+
schemaFile: "plan-scout-findings.schema.json",
|
|
33
|
+
artifactPath: (doc) => {
|
|
34
|
+
const lane =
|
|
35
|
+
typeof doc.lane === "string"
|
|
36
|
+
? doc.lane
|
|
37
|
+
: typeof doc.scout_lane === "string"
|
|
38
|
+
? doc.scout_lane
|
|
39
|
+
: "graphify";
|
|
40
|
+
return `artifacts/scout-${lane}.yaml`;
|
|
41
|
+
},
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
toolName: "submit_decomposition_brief",
|
|
45
|
+
agents: ["harness/planning/decompose"],
|
|
46
|
+
schemaFile: "plan-decomposition-brief.schema.json",
|
|
47
|
+
artifactPath: "artifacts/decomposition.yaml",
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
toolName: "submit_hypothesis_brief",
|
|
51
|
+
agents: ["harness/planning/hypothesis"],
|
|
52
|
+
schemaFile: "plan-hypothesis-brief.schema.json",
|
|
53
|
+
artifactPath: "artifacts/hypothesis.yaml",
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
toolName: "submit_implementation_research",
|
|
57
|
+
agents: ["harness/planning/implementation-researcher"],
|
|
58
|
+
schemaFile: "plan-implementation-research-brief.schema.json",
|
|
59
|
+
artifactPath: "artifacts/implementation-research.yaml",
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
toolName: "submit_stack_brief",
|
|
63
|
+
agents: ["harness/planning/stack-researcher"],
|
|
64
|
+
schemaFile: "plan-stack-brief.schema.json",
|
|
65
|
+
artifactPath: "artifacts/stack.yaml",
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
toolName: "submit_execution_plan_brief",
|
|
69
|
+
agents: ["harness/planning/execution-plan-author"],
|
|
70
|
+
schemaFile: "plan-execution-plan-brief.schema.json",
|
|
71
|
+
artifactPath: "artifacts/execution-plan-draft.yaml",
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
toolName: "submit_hypothesis_validation",
|
|
75
|
+
agents: ["harness/planning/hypothesis-validator"],
|
|
76
|
+
schemaFile: "plan-hypothesis-eval.schema.json",
|
|
77
|
+
artifactPath: (doc) => roundPath("hypothesis-validation", doc),
|
|
78
|
+
debateLane: "hypothesis-validation",
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
toolName: "submit_validation_turn",
|
|
82
|
+
agents: ["harness/planning/plan-evaluator"],
|
|
83
|
+
schemaFile: "plan-validation-turn.schema.json",
|
|
84
|
+
artifactPath: (doc) => roundPath("validation-turn", doc),
|
|
85
|
+
debateLane: "validation-turn",
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
toolName: "submit_adversary_brief",
|
|
89
|
+
agents: ["harness/planning/plan-adversary"],
|
|
90
|
+
schemaFile: "plan-adversary-brief.schema.json",
|
|
91
|
+
artifactPath: (doc) => roundPath("adversary-brief", doc),
|
|
92
|
+
debateLane: "adversary-brief",
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
toolName: "submit_sprint_audit",
|
|
96
|
+
agents: ["harness/planning/sprint-contract-auditor"],
|
|
97
|
+
schemaFile: "plan-sprint-audit-turn.schema.json",
|
|
98
|
+
artifactPath: (doc) => roundPath("sprint-audit", doc),
|
|
99
|
+
debateLane: "sprint-audit",
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
toolName: "submit_review_round_draft",
|
|
103
|
+
agents: ["harness/planning/review-integrator"],
|
|
104
|
+
schemaFile: "plan-review-round-draft.schema.json",
|
|
105
|
+
artifactPath: (doc) => roundPath("review-round-draft", doc),
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
toolName: "submit_executor_handoff",
|
|
109
|
+
agents: ["harness/executor"],
|
|
110
|
+
schemaFile: "harness-executor-handoff.schema.json",
|
|
111
|
+
artifactPath: "handoff/executor-summary.yaml",
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
toolName: "submit_eval_verdict",
|
|
115
|
+
agents: ["harness/evaluator"],
|
|
116
|
+
schemaFile: "eval-verdict.schema.json",
|
|
117
|
+
artifactPath: "artifacts/eval-verdict.yaml",
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
toolName: "submit_adversary_report",
|
|
121
|
+
agents: ["harness/adversary"],
|
|
122
|
+
schemaFile: "adversary-report.schema.json",
|
|
123
|
+
artifactPath: "artifacts/adversary-report.yaml",
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
toolName: "submit_human_required",
|
|
127
|
+
agents: ["harness/planning/decompose", "harness/planning/hypothesis"],
|
|
128
|
+
schemaFile: "harness-human-required.schema.json",
|
|
129
|
+
artifactPath: "artifacts/human-required.yaml",
|
|
130
|
+
humanRequired: true,
|
|
131
|
+
},
|
|
132
|
+
] as const;
|
|
133
|
+
|
|
134
|
+
export const SUBMIT_TOOLS_BY_AGENT: Readonly<
|
|
135
|
+
Record<string, ReadonlySet<string>>
|
|
136
|
+
> = (() => {
|
|
137
|
+
const map = new Map<string, Set<string>>();
|
|
138
|
+
for (const spec of SUBMIT_TOOL_SPECS) {
|
|
139
|
+
for (const agent of spec.agents) {
|
|
140
|
+
if (!map.has(agent)) map.set(agent, new Set());
|
|
141
|
+
map.get(agent)?.add(spec.toolName);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
return Object.fromEntries(map.entries());
|
|
145
|
+
})();
|
|
146
|
+
|
|
147
|
+
export function specForSubmitTool(
|
|
148
|
+
toolName: string,
|
|
149
|
+
): SubmitToolSpec | undefined {
|
|
150
|
+
return SUBMIT_TOOL_SPECS.find((s) => s.toolName === toolName);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
export function resolveArtifactRelPath(
|
|
154
|
+
spec: SubmitToolSpec,
|
|
155
|
+
doc: Record<string, unknown>,
|
|
156
|
+
): string {
|
|
157
|
+
if (typeof spec.artifactPath === "function") {
|
|
158
|
+
return spec.artifactPath(doc);
|
|
159
|
+
}
|
|
160
|
+
return spec.artifactPath;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
export function isSubmitToolName(toolName: string): boolean {
|
|
164
|
+
return toolName.startsWith("submit_");
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
export const DEBATE_AGENT_SUBMIT_TOOL: Readonly<Record<string, string>> = {
|
|
168
|
+
"harness/planning/hypothesis-validator": "submit_hypothesis_validation",
|
|
169
|
+
"harness/planning/plan-evaluator": "submit_validation_turn",
|
|
170
|
+
"harness/planning/plan-adversary": "submit_adversary_brief",
|
|
171
|
+
"harness/planning/sprint-contract-auditor": "submit_sprint_audit",
|
|
172
|
+
};
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* ultimate-pi harness wrapper around vendored pi-subagents.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
import { join } from "node:path";
|
|
5
6
|
import type {
|
|
6
7
|
ExtensionAPI,
|
|
7
8
|
ExtensionContext,
|
|
@@ -12,6 +13,8 @@ import {
|
|
|
12
13
|
type HarnessSubagentsOptions,
|
|
13
14
|
type SpawnAuthForward,
|
|
14
15
|
} from "../../../vendor/pi-subagents/src/subagents.js";
|
|
16
|
+
import { parseSpawnContextFromTask } from "../../lib/harness-spawn-parse.js";
|
|
17
|
+
import { harnessSubagentSubmitExtensionPath } from "../harness-subagent-submit.js";
|
|
15
18
|
import { refreshHarnessCocoindexIndex } from "./harness-cocoindex-refresh.js";
|
|
16
19
|
import { captureHarnessEvent } from "./harness-posthog.js";
|
|
17
20
|
import {
|
|
@@ -58,8 +61,47 @@ async function resolveHarnessSpawnAuth(
|
|
|
58
61
|
export function createHarnessSubagentsExtension(
|
|
59
62
|
packageRoot: string,
|
|
60
63
|
): (pi: ExtensionAPI) => void {
|
|
64
|
+
const submitExtPath = harnessSubagentSubmitExtensionPath(packageRoot);
|
|
61
65
|
const options: HarnessSubagentsOptions = {
|
|
62
66
|
packageRoot,
|
|
67
|
+
harnessSubprocessExtensionPath: submitExtPath,
|
|
68
|
+
resolveSubprocessEnv: (task, agent) => {
|
|
69
|
+
if (!agent.name.startsWith("harness/")) return undefined;
|
|
70
|
+
const ctx = parseSpawnContextFromTask(task);
|
|
71
|
+
// #region agent log
|
|
72
|
+
fetch(
|
|
73
|
+
"http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0",
|
|
74
|
+
{
|
|
75
|
+
method: "POST",
|
|
76
|
+
headers: {
|
|
77
|
+
"Content-Type": "application/json",
|
|
78
|
+
"X-Debug-Session-Id": "2ca12b",
|
|
79
|
+
},
|
|
80
|
+
body: JSON.stringify({
|
|
81
|
+
sessionId: "2ca12b",
|
|
82
|
+
hypothesisId: "H1",
|
|
83
|
+
location: "harness-subagents-bridge.ts:resolveSubprocessEnv",
|
|
84
|
+
message: "parsed spawn context for subprocess env",
|
|
85
|
+
data: {
|
|
86
|
+
agent: agent.name,
|
|
87
|
+
hasCtx: Boolean(ctx?.run_id),
|
|
88
|
+
run_id: ctx?.run_id ?? null,
|
|
89
|
+
run_dir: ctx?.run_dir ?? null,
|
|
90
|
+
taskPrefix: task.slice(0, 160),
|
|
91
|
+
},
|
|
92
|
+
timestamp: Date.now(),
|
|
93
|
+
}),
|
|
94
|
+
},
|
|
95
|
+
).catch(() => {});
|
|
96
|
+
// #endregion
|
|
97
|
+
if (!ctx?.run_id) return undefined;
|
|
98
|
+
return {
|
|
99
|
+
HARNESS_RUN_ID: ctx.run_id,
|
|
100
|
+
HARNESS_RUN_DIR:
|
|
101
|
+
ctx.run_dir ??
|
|
102
|
+
join(packageRoot, ".pi", "harness", "runs", ctx.run_id),
|
|
103
|
+
};
|
|
104
|
+
},
|
|
63
105
|
defaultAgentScope: "both",
|
|
64
106
|
defaultConfirmProjectAgents: false,
|
|
65
107
|
truncateDetails: true,
|
|
@@ -160,6 +160,62 @@ export function formatResearchBriefMarkdown(
|
|
|
160
160
|
}
|
|
161
161
|
}
|
|
162
162
|
|
|
163
|
+
const impl = asRecord(research.implementation);
|
|
164
|
+
if (impl) {
|
|
165
|
+
lines.push("## Phase 3.5 — Implementation research");
|
|
166
|
+
lines.push("");
|
|
167
|
+
const framing = str(impl.problem_framing);
|
|
168
|
+
if (framing) {
|
|
169
|
+
lines.push("**Problem framing:**");
|
|
170
|
+
lines.push("");
|
|
171
|
+
lines.push(framing);
|
|
172
|
+
lines.push("");
|
|
173
|
+
}
|
|
174
|
+
const rec = asRecord(impl.recommended_approach);
|
|
175
|
+
if (rec) {
|
|
176
|
+
const summary = str(rec.summary);
|
|
177
|
+
const conf = str(rec.recommended_approach_confidence);
|
|
178
|
+
if (summary) {
|
|
179
|
+
lines.push(
|
|
180
|
+
`**Recommended approach**${conf ? ` (${conf} confidence)` : ""}:`,
|
|
181
|
+
);
|
|
182
|
+
lines.push("");
|
|
183
|
+
lines.push(summary);
|
|
184
|
+
lines.push("");
|
|
185
|
+
}
|
|
186
|
+
const rationale = str(rec.confidence_rationale);
|
|
187
|
+
if (rationale) {
|
|
188
|
+
lines.push(`*Rationale:* ${rationale}`);
|
|
189
|
+
lines.push("");
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
const patterns = Array.isArray(impl.solution_patterns)
|
|
193
|
+
? impl.solution_patterns
|
|
194
|
+
: [];
|
|
195
|
+
if (patterns.length) {
|
|
196
|
+
lines.push("**Solution patterns:**");
|
|
197
|
+
for (const p of patterns) {
|
|
198
|
+
const pat = asRecord(p);
|
|
199
|
+
const name = pat ? str(pat.name) : null;
|
|
200
|
+
const fit = pat ? str(pat.fit) : null;
|
|
201
|
+
if (name) lines.push(`- **${name}**${fit ? `: ${fit}` : ""}`);
|
|
202
|
+
}
|
|
203
|
+
lines.push("");
|
|
204
|
+
}
|
|
205
|
+
const openQs = strList(impl.open_questions);
|
|
206
|
+
if (openQs.length) {
|
|
207
|
+
lines.push("**Open questions:**");
|
|
208
|
+
for (const q of openQs) lines.push(`- ${q}`);
|
|
209
|
+
lines.push("");
|
|
210
|
+
}
|
|
211
|
+
const anti = strList(impl.anti_patterns);
|
|
212
|
+
if (anti.length) {
|
|
213
|
+
lines.push("**Anti-patterns:**");
|
|
214
|
+
for (const a of anti) lines.push(`- ${a}`);
|
|
215
|
+
lines.push("");
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
163
219
|
if (evalBrief) {
|
|
164
220
|
lines.push("## Self-evaluation");
|
|
165
221
|
lines.push("");
|
|
@@ -13,6 +13,7 @@ export interface PlanResearchBrief {
|
|
|
13
13
|
hypothesis?: Record<string, unknown> | null;
|
|
14
14
|
eval?: Record<string, unknown> | null;
|
|
15
15
|
stack?: Record<string, unknown> | null;
|
|
16
|
+
implementation?: Record<string, unknown> | null;
|
|
16
17
|
debate?: {
|
|
17
18
|
rounds?: Record<string, unknown>[];
|
|
18
19
|
hypothesis_validations?: Record<string, unknown>[];
|