ultimate-pi 0.15.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-governor/SKILL.md +11 -0
- package/.agents/skills/harness-orchestration/SKILL.md +3 -1
- package/.agents/skills/harness-plan/SKILL.md +5 -5
- package/.pi/agents/harness/adversary.md +1 -1
- package/.pi/agents/harness/evaluator.md +1 -1
- package/.pi/agents/harness/executor.md +1 -1
- package/.pi/agents/harness/incident-recorder.md +1 -1
- package/.pi/agents/harness/meta-optimizer.md +1 -1
- package/.pi/agents/harness/planning/decompose.md +4 -33
- package/.pi/agents/harness/planning/execution-plan-author.md +3 -2
- package/.pi/agents/harness/planning/hypothesis-validator.md +3 -2
- package/.pi/agents/harness/planning/hypothesis.md +4 -27
- package/.pi/agents/harness/planning/implementation-researcher.md +3 -2
- package/.pi/agents/harness/planning/plan-adversary.md +2 -3
- package/.pi/agents/harness/planning/plan-evaluator.md +3 -2
- package/.pi/agents/harness/planning/review-integrator.md +2 -3
- package/.pi/agents/harness/planning/scout-graphify.md +3 -22
- package/.pi/agents/harness/planning/scout-semantic.md +3 -18
- package/.pi/agents/harness/planning/scout-structure.md +3 -18
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +3 -2
- package/.pi/agents/harness/planning/stack-researcher.md +3 -2
- package/.pi/agents/harness/tie-breaker.md +1 -1
- package/.pi/agents/harness/trace-librarian.md +1 -1
- package/.pi/extensions/budget-guard.ts +33 -19
- package/.pi/extensions/harness-debate-tools.ts +54 -6
- package/.pi/extensions/harness-run-context.ts +108 -2
- package/.pi/extensions/harness-subagent-submit.ts +172 -0
- package/.pi/extensions/harness-telemetry.ts +29 -4
- package/.pi/extensions/lib/debate-bus-core.ts +49 -6
- package/.pi/extensions/lib/harness-subagent-auth.ts +104 -19
- package/.pi/extensions/lib/harness-subagent-policy.ts +59 -0
- package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +82 -0
- package/.pi/extensions/lib/harness-subagent-submit-registry.ts +172 -0
- package/.pi/extensions/lib/harness-subagents-bridge.ts +127 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +61 -8
- package/.pi/extensions/lib/plan-debate-focus.ts +21 -9
- package/.pi/extensions/lib/plan-debate-gate.ts +92 -18
- package/.pi/extensions/lib/plan-debate-lane.ts +15 -0
- package/.pi/extensions/lib/plan-debate-lanes.ts +27 -3
- package/.pi/extensions/lib/plan-debate-round-status.ts +18 -7
- package/.pi/extensions/lib/plan-messenger.ts +4 -0
- package/.pi/extensions/lib/plan-review-gate.ts +51 -0
- package/.pi/extensions/trace-recorder.ts +1 -0
- package/.pi/harness/agents.manifest.json +22 -22
- package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md +31 -0
- package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md +23 -0
- package/.pi/harness/docs/adrs/README.md +2 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/review-round-consolidated.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/research-brief.yaml +62 -0
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +40 -17
- package/.pi/harness/specs/harness-executor-handoff.schema.json +19 -0
- package/.pi/harness/specs/harness-human-required.schema.json +16 -0
- package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
- package/.pi/harness/specs/plan-scout-findings.schema.json +19 -0
- package/.pi/lib/harness-agent-output.ts +45 -0
- package/.pi/lib/harness-budget-enforce.ts +18 -0
- package/.pi/lib/harness-schema-validate.ts +89 -0
- package/.pi/lib/harness-spawn-parse.ts +86 -0
- package/.pi/lib/harness-subagent-submit-path.ts +41 -0
- package/.pi/lib/harness-ui-state.ts +15 -2
- package/.pi/model-router.example.json +13 -4
- package/.pi/prompts/harness-auto.md +2 -2
- package/.pi/prompts/harness-plan.md +34 -14
- package/.pi/prompts/harness-run.md +2 -2
- package/.pi/prompts/harness-setup.md +4 -4
- package/.pi/scripts/harness-generate-model-router.mjs +118 -36
- package/.pi/scripts/harness-model-router-routing.test.mjs +97 -0
- package/.pi/scripts/harness-sync-model-router.mjs +15 -2
- package/.pi/scripts/harness-verify.mjs +31 -0
- package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
- package/CHANGELOG.md +21 -0
- package/package.json +4 -2
- package/vendor/pi-model-router/UPSTREAM_PIN.md +3 -1
- package/vendor/pi-model-router/extensions/commands.ts +4 -4
- package/vendor/pi-model-router/extensions/index.ts +21 -0
- package/vendor/pi-model-router/extensions/provider.ts +130 -79
- package/vendor/pi-model-router/extensions/routing.ts +148 -0
- package/vendor/pi-model-router/extensions/state.ts +3 -0
- package/vendor/pi-model-router/extensions/types.ts +9 -0
- package/vendor/pi-model-router/extensions/ui.ts +16 -2
- package/vendor/pi-subagents/src/subagents.ts +29 -3
|
@@ -7,12 +7,15 @@ import { access } from "node:fs/promises";
|
|
|
7
7
|
import { join } from "node:path";
|
|
8
8
|
import { capsForDebate } from "./debate-bus-core.js";
|
|
9
9
|
import {
|
|
10
|
-
type
|
|
10
|
+
type PlanDebateRoundFocus,
|
|
11
11
|
readDebateRoundFocus,
|
|
12
12
|
} from "./plan-debate-focus.js";
|
|
13
13
|
import { planDebateIdForRun } from "./plan-debate-id.js";
|
|
14
14
|
import { laneArtifactPath } from "./plan-debate-lane.js";
|
|
15
|
-
import {
|
|
15
|
+
import {
|
|
16
|
+
lanesForConsolidatedRound,
|
|
17
|
+
lanesForRound,
|
|
18
|
+
} from "./plan-debate-lanes.js";
|
|
16
19
|
import {
|
|
17
20
|
getMessengerRoundState,
|
|
18
21
|
loadMessengerState,
|
|
@@ -40,26 +43,32 @@ export interface RoundStatusResult {
|
|
|
40
43
|
dialogue: { ok: boolean; errors: string[] };
|
|
41
44
|
unresolved_claim_ids: string[];
|
|
42
45
|
exchange_count: number;
|
|
43
|
-
debate_round_focus?:
|
|
46
|
+
debate_round_focus?: PlanDebateRoundFocus | null;
|
|
44
47
|
}
|
|
45
48
|
|
|
46
49
|
export async function getPlanDebateRoundStatus(
|
|
47
50
|
runDir: string,
|
|
48
51
|
roundIndex: number,
|
|
49
52
|
runId?: string,
|
|
50
|
-
opts?: { debate_round_focus?:
|
|
53
|
+
opts?: { debate_round_focus?: PlanDebateRoundFocus },
|
|
51
54
|
): Promise<RoundStatusResult> {
|
|
55
|
+
const messengerState = await loadMessengerState(runDir);
|
|
56
|
+
const consolidated =
|
|
57
|
+
messengerState?.review_gate_mode === "consolidated" && roundIndex === 1;
|
|
52
58
|
const focus =
|
|
53
59
|
opts?.debate_round_focus ??
|
|
60
|
+
(consolidated ? ("all" as PlanDebateRoundFocus) : null) ??
|
|
54
61
|
(await readDebateRoundFocus(runDir, roundIndex));
|
|
55
62
|
const missing: string[] = [];
|
|
56
|
-
|
|
63
|
+
const laneList = consolidated
|
|
64
|
+
? lanesForConsolidatedRound()
|
|
65
|
+
: lanesForRound(roundIndex, focus);
|
|
66
|
+
for (const lane of laneList) {
|
|
57
67
|
const rel = laneArtifactPath(lane, roundIndex);
|
|
58
68
|
if (!(await exists(join(runDir, rel)))) {
|
|
59
69
|
missing.push(rel);
|
|
60
70
|
}
|
|
61
71
|
}
|
|
62
|
-
const messengerState = await loadMessengerState(runDir);
|
|
63
72
|
const profile = messengerState?.debate_profile;
|
|
64
73
|
const caps = capsForDebate(
|
|
65
74
|
runId ? planDebateIdForRun(runId) : `plan-${runId ?? "unknown"}`,
|
|
@@ -73,7 +82,9 @@ export async function getPlanDebateRoundStatus(
|
|
|
73
82
|
if (!dialogue.ok) {
|
|
74
83
|
missing.push(...dialogue.errors.map((e) => `messenger: ${e}`));
|
|
75
84
|
}
|
|
76
|
-
const reviewRound =
|
|
85
|
+
const reviewRound = consolidated
|
|
86
|
+
? "artifacts/review-round-consolidated.yaml"
|
|
87
|
+
: `artifacts/review-round-r${roundIndex}.yaml`;
|
|
77
88
|
const reviewRoundOnDisk = await exists(join(runDir, reviewRound));
|
|
78
89
|
|
|
79
90
|
let next_tool: string | undefined;
|
|
@@ -63,6 +63,8 @@ export interface MessengerState {
|
|
|
63
63
|
rounds: Record<string, MessengerRoundState>;
|
|
64
64
|
debate_profile?: DebateProfile;
|
|
65
65
|
required_focuses?: PlanDebateFocus[];
|
|
66
|
+
/** consolidated = single Review Gate round; threaded = per-focus rounds */
|
|
67
|
+
review_gate_mode?: "consolidated" | "threaded";
|
|
66
68
|
}
|
|
67
69
|
|
|
68
70
|
function messengerRoot(runDir: string): string {
|
|
@@ -84,6 +86,7 @@ export async function initPlanMessenger(
|
|
|
84
86
|
debateId: string;
|
|
85
87
|
debate_profile?: DebateProfile;
|
|
86
88
|
required_focuses?: PlanDebateFocus[];
|
|
89
|
+
review_gate_mode?: "consolidated" | "threaded";
|
|
87
90
|
},
|
|
88
91
|
): Promise<string> {
|
|
89
92
|
const root = messengerRoot(runDir);
|
|
@@ -97,6 +100,7 @@ export async function initPlanMessenger(
|
|
|
97
100
|
rounds: {},
|
|
98
101
|
debate_profile: opts.debate_profile,
|
|
99
102
|
required_focuses: opts.required_focuses,
|
|
103
|
+
review_gate_mode: opts.review_gate_mode,
|
|
100
104
|
};
|
|
101
105
|
await writeFile(
|
|
102
106
|
join(root, "state.json"),
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Consolidated vs threaded Review Gate strategy for plan-phase debate.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type {
|
|
6
|
+
DebateEligibilityResult,
|
|
7
|
+
PlanReviewGateStrategy,
|
|
8
|
+
} from "./plan-debate-eligibility.js";
|
|
9
|
+
import type { PlanDebateFocus } from "./plan-debate-focus.js";
|
|
10
|
+
|
|
11
|
+
export type { PlanReviewGateStrategy };
|
|
12
|
+
|
|
13
|
+
export const CONSOLIDATED_REVIEW_ROUND = 1;
|
|
14
|
+
export const CONSOLIDATED_REVIEW_ARTIFACT =
|
|
15
|
+
"artifacts/review-round-consolidated.yaml";
|
|
16
|
+
|
|
17
|
+
export function planReviewGateStrategyFromEligibility(
|
|
18
|
+
eligibility: DebateEligibilityResult,
|
|
19
|
+
): PlanReviewGateStrategy {
|
|
20
|
+
return (
|
|
21
|
+
eligibility.review_gate_strategy ?? {
|
|
22
|
+
mode: eligibility.profile === "fast" ? "consolidated" : "threaded",
|
|
23
|
+
profile: eligibility.profile,
|
|
24
|
+
required_focuses: [...eligibility.required_focuses],
|
|
25
|
+
min_focus_rounds: eligibility.min_focus_rounds,
|
|
26
|
+
max_rounds: eligibility.max_rounds,
|
|
27
|
+
max_exchanges_per_round: eligibility.max_exchanges_per_round,
|
|
28
|
+
round_token_cap: eligibility.round_token_cap,
|
|
29
|
+
debate_global_cap: eligibility.debate_global_cap,
|
|
30
|
+
rationale: [...eligibility.rationale],
|
|
31
|
+
}
|
|
32
|
+
);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function isConsolidatedReviewStrategy(
|
|
36
|
+
strategy: PlanReviewGateStrategy,
|
|
37
|
+
): boolean {
|
|
38
|
+
return strategy.mode === "consolidated";
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/** Focus areas covered in a single consolidated review round (spec + quality gate). */
|
|
42
|
+
export const CONSOLIDATED_REVIEW_FOCUS_AREAS: readonly PlanDebateFocus[] = [
|
|
43
|
+
"spec",
|
|
44
|
+
"quality",
|
|
45
|
+
];
|
|
46
|
+
|
|
47
|
+
export function consolidatedReviewFocusesSatisfied(
|
|
48
|
+
covered: readonly string[],
|
|
49
|
+
): boolean {
|
|
50
|
+
return CONSOLIDATED_REVIEW_FOCUS_AREAS.every((f) => covered.includes(f));
|
|
51
|
+
}
|
|
@@ -235,6 +235,7 @@ export default function traceRecorder(pi: ExtensionAPI) {
|
|
|
235
235
|
if (shouldEmitStarted) {
|
|
236
236
|
captureHarnessEvent(sessionId, "harness_run_started", {
|
|
237
237
|
harness_run_id: runId,
|
|
238
|
+
run_id: runId,
|
|
238
239
|
harness_plan_id: activeRun.planId,
|
|
239
240
|
harness_phase: activeRun.phase,
|
|
240
241
|
pi_session_id: sessionId,
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schema_version": "1.0.0",
|
|
3
3
|
"package": "ultimate-pi",
|
|
4
|
-
"package_version": "0.
|
|
5
|
-
"generated_at": "2026-05-
|
|
4
|
+
"package_version": "0.15.0",
|
|
5
|
+
"generated_at": "2026-05-19T12:56:13.369Z",
|
|
6
6
|
"agents": {
|
|
7
7
|
"pi-pi/agent-expert": {
|
|
8
8
|
"path": ".pi/agents/pi-pi/agent-expert.md",
|
|
@@ -46,23 +46,23 @@
|
|
|
46
46
|
},
|
|
47
47
|
"harness/adversary": {
|
|
48
48
|
"path": ".pi/agents/harness/adversary.md",
|
|
49
|
-
"sha256": "
|
|
49
|
+
"sha256": "560c7571ab91478bde1271e9ae6c3a112c3e1d28e1a261c5450fd1d00f9f89af"
|
|
50
50
|
},
|
|
51
51
|
"harness/evaluator": {
|
|
52
52
|
"path": ".pi/agents/harness/evaluator.md",
|
|
53
|
-
"sha256": "
|
|
53
|
+
"sha256": "a4667d3efb305ba2fe79118e3d7d2b0de5e0369637af040d1238161d75cd28ac"
|
|
54
54
|
},
|
|
55
55
|
"harness/executor": {
|
|
56
56
|
"path": ".pi/agents/harness/executor.md",
|
|
57
|
-
"sha256": "
|
|
57
|
+
"sha256": "6baffcc3d89954494ce3ae439175686a39928b6a543a0a451da27475094b1712"
|
|
58
58
|
},
|
|
59
59
|
"harness/incident-recorder": {
|
|
60
60
|
"path": ".pi/agents/harness/incident-recorder.md",
|
|
61
|
-
"sha256": "
|
|
61
|
+
"sha256": "d42fa45de1a2fe3842d075c6f319315266588942e314f1b650caabac39bdc29a"
|
|
62
62
|
},
|
|
63
63
|
"harness/meta-optimizer": {
|
|
64
64
|
"path": ".pi/agents/harness/meta-optimizer.md",
|
|
65
|
-
"sha256": "
|
|
65
|
+
"sha256": "cbaab35367126796b7136389a02ab41b4fd1fe7098cf83be562d7b7493ccc297"
|
|
66
66
|
},
|
|
67
67
|
"harness/sentrux-bootstrap": {
|
|
68
68
|
"path": ".pi/agents/harness/sentrux-bootstrap.md",
|
|
@@ -70,63 +70,63 @@
|
|
|
70
70
|
},
|
|
71
71
|
"harness/tie-breaker": {
|
|
72
72
|
"path": ".pi/agents/harness/tie-breaker.md",
|
|
73
|
-
"sha256": "
|
|
73
|
+
"sha256": "1c54c1c3274291dea1ea8826563a7ad4fe1d9c4302984e907bfcd22cfc4f5eba"
|
|
74
74
|
},
|
|
75
75
|
"harness/trace-librarian": {
|
|
76
76
|
"path": ".pi/agents/harness/trace-librarian.md",
|
|
77
|
-
"sha256": "
|
|
77
|
+
"sha256": "336b3f3f6141cef8750ab18d29bbe454caf26973830a86afe099d9e4ad8b0abe"
|
|
78
78
|
},
|
|
79
79
|
"harness/planning/decompose": {
|
|
80
80
|
"path": ".pi/agents/harness/planning/decompose.md",
|
|
81
|
-
"sha256": "
|
|
81
|
+
"sha256": "0919dafa1d1cd008d513c28524c1e7218867586a138982dccf01db5270c42c73"
|
|
82
82
|
},
|
|
83
83
|
"harness/planning/execution-plan-author": {
|
|
84
84
|
"path": ".pi/agents/harness/planning/execution-plan-author.md",
|
|
85
|
-
"sha256": "
|
|
85
|
+
"sha256": "55ece0f1ee14abd17fe7b3e478b548240f637eacbfc2a34758e98d3878dc82fd"
|
|
86
86
|
},
|
|
87
87
|
"harness/planning/hypothesis-validator": {
|
|
88
88
|
"path": ".pi/agents/harness/planning/hypothesis-validator.md",
|
|
89
|
-
"sha256": "
|
|
89
|
+
"sha256": "36f0baa7796229f21bd02faf5e70402c7bf054289eab557a25bfbe3cb7781de7"
|
|
90
90
|
},
|
|
91
91
|
"harness/planning/hypothesis": {
|
|
92
92
|
"path": ".pi/agents/harness/planning/hypothesis.md",
|
|
93
|
-
"sha256": "
|
|
93
|
+
"sha256": "e83d5c4faaee8d32af4a5f22c9917b70a173f3e22d7c0f182b361706f2309171"
|
|
94
94
|
},
|
|
95
95
|
"harness/planning/implementation-researcher": {
|
|
96
96
|
"path": ".pi/agents/harness/planning/implementation-researcher.md",
|
|
97
|
-
"sha256": "
|
|
97
|
+
"sha256": "653f320b5d51bb331774246687f24a75347b406bba4e6dfd2968d6e5d4cc8bb3"
|
|
98
98
|
},
|
|
99
99
|
"harness/planning/plan-adversary": {
|
|
100
100
|
"path": ".pi/agents/harness/planning/plan-adversary.md",
|
|
101
|
-
"sha256": "
|
|
101
|
+
"sha256": "3241d7ec939dc29e0af64690b99e9f74b209f40b0daa4a2a1f9ff86f99f94a8d"
|
|
102
102
|
},
|
|
103
103
|
"harness/planning/plan-evaluator": {
|
|
104
104
|
"path": ".pi/agents/harness/planning/plan-evaluator.md",
|
|
105
|
-
"sha256": "
|
|
105
|
+
"sha256": "71660ab58bfcfdfae56c873140d4ea5946ae30cd5719c96afeabfd02b1d1f81d"
|
|
106
106
|
},
|
|
107
107
|
"harness/planning/review-integrator": {
|
|
108
108
|
"path": ".pi/agents/harness/planning/review-integrator.md",
|
|
109
|
-
"sha256": "
|
|
109
|
+
"sha256": "cf3f0dbe81274ec9ef0ff2e0c170e8dc929b20be65492d0ee9a80d985acf6d71"
|
|
110
110
|
},
|
|
111
111
|
"harness/planning/scout-graphify": {
|
|
112
112
|
"path": ".pi/agents/harness/planning/scout-graphify.md",
|
|
113
|
-
"sha256": "
|
|
113
|
+
"sha256": "6e2bda8ad38311810c9916d9dab311873bc776e4b8832bb0e574136e45e1255e"
|
|
114
114
|
},
|
|
115
115
|
"harness/planning/scout-semantic": {
|
|
116
116
|
"path": ".pi/agents/harness/planning/scout-semantic.md",
|
|
117
|
-
"sha256": "
|
|
117
|
+
"sha256": "416e518d8204a55b26dc53da1f750865c6f09ee2c7f343b41e7c08da3230c089"
|
|
118
118
|
},
|
|
119
119
|
"harness/planning/scout-structure": {
|
|
120
120
|
"path": ".pi/agents/harness/planning/scout-structure.md",
|
|
121
|
-
"sha256": "
|
|
121
|
+
"sha256": "76c42a15cc74cf1de2cf861cb0146c865c205f69cce7b9605d41893b19600029"
|
|
122
122
|
},
|
|
123
123
|
"harness/planning/sprint-contract-auditor": {
|
|
124
124
|
"path": ".pi/agents/harness/planning/sprint-contract-auditor.md",
|
|
125
|
-
"sha256": "
|
|
125
|
+
"sha256": "12cb5e6b53dcc19ace62e8e4c152d96440717df53a182e76216dd2327410df4d"
|
|
126
126
|
},
|
|
127
127
|
"harness/planning/stack-researcher": {
|
|
128
128
|
"path": ".pi/agents/harness/planning/stack-researcher.md",
|
|
129
|
-
"sha256": "
|
|
129
|
+
"sha256": "ce546ef3aca19da7f334f07cef8f510b79068bffeb7f276c428f3e6236bbe96b"
|
|
130
130
|
}
|
|
131
131
|
}
|
|
132
132
|
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# ADR 0037: Subagent submit tools (replace JSON prose contracts)
|
|
2
|
+
|
|
3
|
+
**Status:** Accepted
|
|
4
|
+
**Date:** 2026-05-19
|
|
5
|
+
|
|
6
|
+
## Context
|
|
7
|
+
|
|
8
|
+
Harness plan/execute agents used fenced JSON in `finalOutput`, requiring the parent orchestrator to parse prose and call `write_harness_yaml`. This was fragile (truncated parallel summaries, invalid JSON, double-hop writes).
|
|
9
|
+
|
|
10
|
+
Planning agents set `extensions: false` and subprocess spawn used `--no-extensions`, so harness tools were unavailable in children.
|
|
11
|
+
|
|
12
|
+
## Decision
|
|
13
|
+
|
|
14
|
+
1. **Option A — subprocess-only extension bundle:** vendored spawn passes `--no-extensions -e .pi/extensions/harness-subagent-submit.ts` for `harness/*` agents with `extensions: false`.
|
|
15
|
+
2. **Scoped `submit_*` tools** per agent, validated against `.pi/harness/specs/*.schema.json` (Ajv) and written deterministically under `HARNESS_RUN_DIR`.
|
|
16
|
+
3. **Parent gates** via `harness_artifact_ready` (file existence) instead of parsing subprocess JSON.
|
|
17
|
+
4. **Debate lanes:** `tool_result` hook prefers last `submit_*` in `details.results[].messages`; skips `finalOutput` auto-apply when submit present (`HARNESS_SUBMIT_TOOLS` default on).
|
|
18
|
+
5. **Parent** blocks all `submit_*`; keeps `write_harness_yaml` for merges and debate round submission only.
|
|
19
|
+
|
|
20
|
+
## Consequences
|
|
21
|
+
|
|
22
|
+
- Agent frontmatter lists one terminal `submit_*` tool per role.
|
|
23
|
+
- `HarnessSpawnContext` must include `run_id` / `run_dir`; bridge sets `HARNESS_RUN_ID`, `HARNESS_RUN_DIR`, `HARNESS_AGENT_ID` on spawn.
|
|
24
|
+
- `parseHarnessAgentJson` retained for migration/tests; hot path is tool args.
|
|
25
|
+
- See ADR 0038 for budget telemetry-only default.
|
|
26
|
+
|
|
27
|
+
## References
|
|
28
|
+
|
|
29
|
+
- `.pi/extensions/harness-subagent-submit.ts`
|
|
30
|
+
- `.pi/extensions/lib/harness-subagent-submit-registry.ts`
|
|
31
|
+
- `.pi/harness/specs/plan-scout-findings.schema.json`
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# ADR 0038: Budget enforcement telemetry-only (default)
|
|
2
|
+
|
|
3
|
+
**Status:** Accepted
|
|
4
|
+
**Date:** 2026-05-19
|
|
5
|
+
|
|
6
|
+
## Context
|
|
7
|
+
|
|
8
|
+
Token and debate caps emitted `harness-budget-exhausted`, which set `budgetExhausted` in the live widget and blocked flows even when `HARNESS_BUDGET_HARD_STOP` was false. `max_rounds` and messenger exchange limits in `validatePlanDebateGate` also hard-failed approval.
|
|
9
|
+
|
|
10
|
+
## Decision
|
|
11
|
+
|
|
12
|
+
- **`HARNESS_BUDGET_ENFORCE` default `off`:** phase/debate caps log `harness-budget-soft-limit` and `harness-budget-telemetry` only; `harness-budget-exhausted` is emitted only when enforce is on **and** hard-stop flags are set.
|
|
13
|
+
- **UI:** `budgetExhausted` / blocked substate only when blocking exhaustion events qualify.
|
|
14
|
+
- **Debate:** `capsForDebate` uses sentinel caps when enforce is off; `max_rounds` gate errors become warnings.
|
|
15
|
+
- **CLI:** `--budget` on harness prompts is reserved/no-op until a real budget story ships.
|
|
16
|
+
|
|
17
|
+
Re-enable: `HARNESS_BUDGET_ENFORCE=1` plus `HARNESS_BUDGET_HARD_STOP` / `HARNESS_DEBATE_HARD_STOP` as needed.
|
|
18
|
+
|
|
19
|
+
## Consequences
|
|
20
|
+
|
|
21
|
+
- Long debates and large plans are not blocked by soft token telemetry.
|
|
22
|
+
- Quality gates (`min_focus_rounds`, required focuses, `review_gate_ready`) remain enforced.
|
|
23
|
+
- PostHog should prefer `harness_budget_telemetry` over exhausted for dashboards until enforce returns.
|
|
@@ -22,6 +22,8 @@ Team-shared ADRs for the ultimate-pi harness live under `.pi/harness/docs/adrs/`
|
|
|
22
22
|
| [0034](0034-darwin-plan-research-pipeline.md) | Darwin plan research pipeline | Accepted |
|
|
23
23
|
| [0035](0035-plan-phase-review-gate.md) | Plan-phase Review Gate | Accepted |
|
|
24
24
|
| [0036](0036-implementation-research-and-selective-debate.md) | Implementation research and selective debate | Accepted |
|
|
25
|
+
| [0037](0037-subagent-submit-tools.md) | Subagent submit tools (subprocess extension) | Accepted |
|
|
26
|
+
| [0038](0038-budget-telemetry-only.md) | Budget caps telemetry-only by default | Accepted |
|
|
25
27
|
|
|
26
28
|
## Template
|
|
27
29
|
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
problem_framing: Validate harness plan-phase with fixture-driven smoke
|
|
3
|
+
sub_problems:
|
|
4
|
+
- DAG validation
|
|
5
|
+
- Debate gate coverage
|
|
6
|
+
internal_references:
|
|
7
|
+
- path: .pi/harness/evals/smoke/smoke-harness-plan.mjs
|
|
8
|
+
relevance: Existing smoke pattern
|
|
9
|
+
reuse_signal: high
|
|
10
|
+
external_references: []
|
|
11
|
+
solution_patterns:
|
|
12
|
+
- name: fixture-driven gate
|
|
13
|
+
provenance: in-repo smoke
|
|
14
|
+
fit: Validates plan pipeline without live agents
|
|
15
|
+
tradeoffs:
|
|
16
|
+
pros: [Deterministic CI]
|
|
17
|
+
cons: []
|
|
18
|
+
risks: []
|
|
19
|
+
similar_implementations: []
|
|
20
|
+
recommended_approach:
|
|
21
|
+
summary: Extend minimal-med fixture with implementation artifact
|
|
22
|
+
recommended_approach_confidence: high
|
|
23
|
+
confidence_rationale: Reuses established smoke-harness-plan pattern
|
|
24
|
+
evidence_refs:
|
|
25
|
+
- .pi/harness/evals/smoke/smoke-harness-plan.mjs
|
|
26
|
+
- .pi/scripts/validate-plan-dag.mjs
|
|
27
|
+
anti_patterns: []
|
|
28
|
+
open_questions: []
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
round_index: 1
|
|
3
|
+
debate_round_focus: all
|
|
4
|
+
round_summary: Consolidated review gate for fast profile fixture
|
|
5
|
+
validation_summary: Spec and quality checks pass in one round
|
|
6
|
+
adversary_summary: No blockers
|
|
7
|
+
disputes: []
|
|
8
|
+
recommended_packet_patches: []
|
|
9
|
+
review_gate_ready: true
|
|
10
|
+
participants:
|
|
11
|
+
- PlanEvaluatorAgent
|
|
12
|
+
- PlanAdversaryAgent
|
|
13
|
+
- SprintContractAuditorAgent
|
|
14
|
+
- ReviewIntegratorAgent
|
|
15
|
+
claims:
|
|
16
|
+
- consolidated review gate ready
|
|
17
|
+
rebuttals: []
|
|
18
|
+
evidence_refs: []
|
|
19
|
+
token_usage:
|
|
20
|
+
per_agent:
|
|
21
|
+
PlanEvaluatorAgent: 120
|
|
22
|
+
PlanAdversaryAgent: 100
|
|
23
|
+
SprintContractAuditorAgent: 80
|
|
24
|
+
round_total: 300
|
|
25
|
+
consensus_delta: 0.1
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
schema_version: "1.0.0"
|
|
2
|
+
contract_version: "1.1.0"
|
|
3
|
+
plan_id: plan-smoke-fixture-001
|
|
4
|
+
task_id: task-smoke-001
|
|
5
|
+
scope: Smoke fixture for plan-phase harness validation with execution_plan and debate artifacts.
|
|
6
|
+
assumptions:
|
|
7
|
+
- Fixture only; no live agent run
|
|
8
|
+
risk_level: med
|
|
9
|
+
acceptance_checks:
|
|
10
|
+
- id: AC-1
|
|
11
|
+
description: DAG validation passes
|
|
12
|
+
- id: AC-2
|
|
13
|
+
description: Consolidated debate round recorded (fast profile)
|
|
14
|
+
- id: AC-3
|
|
15
|
+
description: Stack brief present in research-brief
|
|
16
|
+
- id: AC-4
|
|
17
|
+
description: Sprint contract complete
|
|
18
|
+
- id: AC-5
|
|
19
|
+
description: plan-review.md renders
|
|
20
|
+
rollback_plan:
|
|
21
|
+
revert_commit_ready: true
|
|
22
|
+
rollback_artifacts:
|
|
23
|
+
revert_command: git revert HEAD
|
|
24
|
+
revert_branch: main
|
|
25
|
+
patch_bundle: .pi/harness/runs/smoke-fixture/patch.bundle
|
|
26
|
+
execution_plan:
|
|
27
|
+
schema_version: "1.0.0"
|
|
28
|
+
phases:
|
|
29
|
+
- phase_id: P1
|
|
30
|
+
name: Foundation
|
|
31
|
+
objective: Establish baseline and verify harness wiring
|
|
32
|
+
entry_criteria:
|
|
33
|
+
- Fixture loaded
|
|
34
|
+
exit_criteria:
|
|
35
|
+
- AC-1 satisfied
|
|
36
|
+
milestone: M1-baseline
|
|
37
|
+
work_item_ids: [WI-1, WI-2, WI-3]
|
|
38
|
+
- phase_id: P2
|
|
39
|
+
name: Build
|
|
40
|
+
objective: Implement core changes
|
|
41
|
+
entry_criteria:
|
|
42
|
+
- M1-baseline complete
|
|
43
|
+
exit_criteria:
|
|
44
|
+
- AC-2 satisfied
|
|
45
|
+
milestone: M2-build
|
|
46
|
+
work_item_ids: [WI-4, WI-5, WI-6]
|
|
47
|
+
- phase_id: P3
|
|
48
|
+
name: Verify
|
|
49
|
+
objective: Quality gate and documentation
|
|
50
|
+
entry_criteria:
|
|
51
|
+
- M2-build complete
|
|
52
|
+
exit_criteria:
|
|
53
|
+
- AC-5 satisfied
|
|
54
|
+
milestone: M3-ship
|
|
55
|
+
work_item_ids: [WI-7, WI-8]
|
|
56
|
+
work_items:
|
|
57
|
+
- work_item_id: WI-1
|
|
58
|
+
phase_id: P1
|
|
59
|
+
title: Load fixture packet
|
|
60
|
+
description: Read plan-packet.yaml from fixture directory
|
|
61
|
+
depends_on: []
|
|
62
|
+
files:
|
|
63
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml
|
|
64
|
+
parallel_safe: true
|
|
65
|
+
done_criteria:
|
|
66
|
+
type: manual
|
|
67
|
+
spec: Fixture packet readable
|
|
68
|
+
acceptance_check_ids: [AC-1]
|
|
69
|
+
- work_item_id: WI-2
|
|
70
|
+
phase_id: P1
|
|
71
|
+
title: Run DAG validator
|
|
72
|
+
description: Execute validate-plan-dag.mjs
|
|
73
|
+
depends_on: [WI-1]
|
|
74
|
+
files:
|
|
75
|
+
- .pi/scripts/validate-plan-dag.mjs
|
|
76
|
+
parallel_safe: false
|
|
77
|
+
done_criteria:
|
|
78
|
+
type: command
|
|
79
|
+
spec: node .pi/scripts/validate-plan-dag.mjs --packet plan-packet.yaml
|
|
80
|
+
acceptance_check_ids: [AC-1]
|
|
81
|
+
- work_item_id: WI-3
|
|
82
|
+
phase_id: P1
|
|
83
|
+
title: Lint harness-yaml
|
|
84
|
+
description: Ensure YAML helpers parse fixture
|
|
85
|
+
depends_on: [WI-1]
|
|
86
|
+
files:
|
|
87
|
+
- .pi/lib/harness-yaml.ts
|
|
88
|
+
parallel_safe: true
|
|
89
|
+
done_criteria:
|
|
90
|
+
type: lint
|
|
91
|
+
spec: npm test
|
|
92
|
+
acceptance_check_ids: [AC-1]
|
|
93
|
+
- work_item_id: WI-4
|
|
94
|
+
phase_id: P2
|
|
95
|
+
title: Debate round 1-2 artifacts
|
|
96
|
+
description: Validate review-round YAML
|
|
97
|
+
depends_on: [WI-2]
|
|
98
|
+
files:
|
|
99
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml
|
|
100
|
+
parallel_safe: false
|
|
101
|
+
done_criteria:
|
|
102
|
+
type: artifact
|
|
103
|
+
spec: artifacts/review-round-r1.yaml exists
|
|
104
|
+
acceptance_check_ids: [AC-2]
|
|
105
|
+
- work_item_id: WI-5
|
|
106
|
+
phase_id: P2
|
|
107
|
+
title: Debate round 3-4 artifacts
|
|
108
|
+
description: Validate final review round
|
|
109
|
+
depends_on: [WI-4]
|
|
110
|
+
files:
|
|
111
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml
|
|
112
|
+
parallel_safe: false
|
|
113
|
+
done_criteria:
|
|
114
|
+
type: artifact
|
|
115
|
+
spec: artifacts/review-round-r4.yaml exists
|
|
116
|
+
acceptance_check_ids: [AC-2]
|
|
117
|
+
- work_item_id: WI-6
|
|
118
|
+
phase_id: P2
|
|
119
|
+
title: Stack research merge
|
|
120
|
+
description: research-brief includes stack section
|
|
121
|
+
depends_on: [WI-2]
|
|
122
|
+
files: []
|
|
123
|
+
non_code: true
|
|
124
|
+
parallel_safe: true
|
|
125
|
+
done_criteria:
|
|
126
|
+
type: manual
|
|
127
|
+
spec: research-brief.yaml contains stack key
|
|
128
|
+
acceptance_check_ids: [AC-3]
|
|
129
|
+
- work_item_id: WI-7
|
|
130
|
+
phase_id: P3
|
|
131
|
+
title: Sprint contract audit
|
|
132
|
+
description: R4 sprint audit artifact
|
|
133
|
+
depends_on: [WI-5]
|
|
134
|
+
files:
|
|
135
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml
|
|
136
|
+
parallel_safe: false
|
|
137
|
+
done_criteria:
|
|
138
|
+
type: artifact
|
|
139
|
+
spec: sprint-audit-r4.yaml present
|
|
140
|
+
acceptance_check_ids: [AC-4]
|
|
141
|
+
- work_item_id: WI-8
|
|
142
|
+
phase_id: P3
|
|
143
|
+
title: Render plan-review
|
|
144
|
+
description: Human-readable plan review markdown
|
|
145
|
+
depends_on: [WI-7]
|
|
146
|
+
files:
|
|
147
|
+
- .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md
|
|
148
|
+
parallel_safe: false
|
|
149
|
+
done_criteria:
|
|
150
|
+
type: manual
|
|
151
|
+
spec: plan-review.md non-empty
|
|
152
|
+
acceptance_check_ids: [AC-5]
|
|
153
|
+
sprint_contract:
|
|
154
|
+
in_scope:
|
|
155
|
+
- Fixture validation only
|
|
156
|
+
out_of_scope:
|
|
157
|
+
- Production deploy
|
|
158
|
+
definition_of_done: All smoke checks green
|
|
159
|
+
assumptions:
|
|
160
|
+
- CI environment has node
|
|
161
|
+
external_dependencies: []
|
|
162
|
+
wbs_dictionary:
|
|
163
|
+
- work_item_id: WI-1
|
|
164
|
+
deliverable: Fixture packet loaded
|
|
165
|
+
owner_role: executor
|
|
166
|
+
inputs: []
|
|
167
|
+
outputs: [parsed packet]
|
|
168
|
+
risk_register:
|
|
169
|
+
- risk_id: R1
|
|
170
|
+
description: DAG validator false negative
|
|
171
|
+
likelihood: low
|
|
172
|
+
impact: high
|
|
173
|
+
mitigation: Unit tests on validate-plan-dag.mjs
|
|
174
|
+
linked_work_item_ids: [WI-2]
|
|
175
|
+
- risk_id: R2
|
|
176
|
+
description: Debate cap misconfiguration
|
|
177
|
+
likelihood: med
|
|
178
|
+
impact: med
|
|
179
|
+
mitigation: debate-orchestrator plan profile tests
|
|
180
|
+
linked_work_item_ids: [WI-4]
|
|
181
|
+
- risk_id: R3
|
|
182
|
+
description: YAML parse drift
|
|
183
|
+
likelihood: low
|
|
184
|
+
impact: med
|
|
185
|
+
mitigation: harness-yaml strict parse
|
|
186
|
+
linked_work_item_ids: [WI-3]
|
|
187
|
+
schedule_metadata:
|
|
188
|
+
critical_path_work_item_ids: [WI-1, WI-2, WI-4, WI-5, WI-7, WI-8]
|
|
189
|
+
parallel_groups:
|
|
190
|
+
- [WI-1, WI-3]
|
|
191
|
+
schedule_baseline_note: Fixture topological order; no calendar dates
|
|
192
|
+
dag_validation:
|
|
193
|
+
status: pass
|
|
194
|
+
topological_order: [WI-1, WI-2, WI-3, WI-4, WI-5, WI-6, WI-7, WI-8]
|
|
195
|
+
cycles: []
|
|
196
|
+
conflicts: []
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Plan review (fixture)
|
|
2
|
+
|
|
3
|
+
plan_id: plan-smoke-fixture-001
|
|
4
|
+
|
|
5
|
+
## Execution plan
|
|
6
|
+
|
|
7
|
+
Phases: P1 Foundation → P2 Build → P3 Verify
|
|
8
|
+
|
|
9
|
+
Critical path: WI-1 → WI-2 → WI-4 → WI-5 → WI-7 → WI-8
|
|
10
|
+
|
|
11
|
+
## Debate
|
|
12
|
+
|
|
13
|
+
- Round 1 (spec): review_gate_ready
|
|
14
|
+
- Round 4 (quality): review_gate_ready
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
decomposition:
|
|
2
|
+
schema_version: "1.0.0"
|
|
3
|
+
problem_restatement: Light-profile smoke for two-focus debate
|
|
4
|
+
hypothesis:
|
|
5
|
+
schema_version: "1.0.0"
|
|
6
|
+
primary:
|
|
7
|
+
claim: Light debate covers spec and quality only
|
|
8
|
+
mechanism: Eligibility profile light with min_focus_rounds 2
|
|
9
|
+
prediction: planDebateOutcomeComplete passes with two rounds
|
|
10
|
+
experiment: Run smoke-harness-plan.mjs --fixture minimal-low-light
|
|
11
|
+
implementation:
|
|
12
|
+
schema_version: "1.0.0"
|
|
13
|
+
problem_framing: Low-risk fixture for selective debate
|
|
14
|
+
sub_problems: [spec coverage, quality coverage]
|
|
15
|
+
internal_references:
|
|
16
|
+
- path: test/plan-debate-eligibility.test.mjs
|
|
17
|
+
relevance: Eligibility unit tests
|
|
18
|
+
reuse_signal: high
|
|
19
|
+
external_references: []
|
|
20
|
+
solution_patterns:
|
|
21
|
+
- name: light profile gate
|
|
22
|
+
provenance: ADR-0036
|
|
23
|
+
fit: Reduces debate cost on trivial tasks
|
|
24
|
+
tradeoffs:
|
|
25
|
+
pros: [Fewer rounds]
|
|
26
|
+
cons: []
|
|
27
|
+
risks: []
|
|
28
|
+
similar_implementations:
|
|
29
|
+
- name: minimal-med four-focus fixture
|
|
30
|
+
what_it_solves: Full debate coverage
|
|
31
|
+
gap_vs_us: Light uses two focuses only
|
|
32
|
+
recommended_approach:
|
|
33
|
+
summary: Two review rounds with spec then quality
|
|
34
|
+
recommended_approach_confidence: high
|
|
35
|
+
confidence_rationale: Deterministic fixture aligned with eligibility rules
|
|
36
|
+
evidence_refs:
|
|
37
|
+
- .pi/extensions/lib/plan-debate-eligibility.ts
|
|
38
|
+
- test/plan-debate-eligibility.test.mjs
|
|
39
|
+
anti_patterns: []
|
|
40
|
+
open_questions: []
|
|
41
|
+
stack:
|
|
42
|
+
schema_version: "1.0.0"
|
|
43
|
+
problem_framing: Node harness tooling
|
|
44
|
+
constraints: []
|
|
45
|
+
options:
|
|
46
|
+
- name: extend current stack
|
|
47
|
+
category: brownfield
|
|
48
|
+
fit_summary: Use existing ultimate-pi harness
|
|
49
|
+
tradeoffs:
|
|
50
|
+
pros: [No new deps]
|
|
51
|
+
cons: []
|
|
52
|
+
risks: []
|
|
53
|
+
evidence_refs: []
|
|
54
|
+
recommendation_rank: 1
|
|
55
|
+
recommended_primary: extend current stack
|
|
56
|
+
rationale: Fixture validates in-repo harness
|
|
57
|
+
eval:
|
|
58
|
+
schema_version: "1.0.0"
|
|
59
|
+
revision_recommended: false
|
|
60
|
+
relevance:
|
|
61
|
+
passes: true
|
|
62
|
+
rationale: Hypothesis matches light smoke task
|