ultimate-pi 0.15.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-governor/SKILL.md +11 -0
- package/.agents/skills/harness-orchestration/SKILL.md +3 -1
- package/.agents/skills/harness-plan/SKILL.md +5 -5
- package/.pi/agents/harness/adversary.md +1 -1
- package/.pi/agents/harness/evaluator.md +1 -1
- package/.pi/agents/harness/executor.md +1 -1
- package/.pi/agents/harness/incident-recorder.md +1 -1
- package/.pi/agents/harness/meta-optimizer.md +1 -1
- package/.pi/agents/harness/planning/decompose.md +4 -33
- package/.pi/agents/harness/planning/execution-plan-author.md +3 -2
- package/.pi/agents/harness/planning/hypothesis-validator.md +3 -2
- package/.pi/agents/harness/planning/hypothesis.md +4 -27
- package/.pi/agents/harness/planning/implementation-researcher.md +3 -2
- package/.pi/agents/harness/planning/plan-adversary.md +2 -3
- package/.pi/agents/harness/planning/plan-evaluator.md +3 -2
- package/.pi/agents/harness/planning/review-integrator.md +2 -3
- package/.pi/agents/harness/planning/scout-graphify.md +3 -22
- package/.pi/agents/harness/planning/scout-semantic.md +3 -18
- package/.pi/agents/harness/planning/scout-structure.md +3 -18
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +3 -2
- package/.pi/agents/harness/planning/stack-researcher.md +3 -2
- package/.pi/agents/harness/tie-breaker.md +1 -1
- package/.pi/agents/harness/trace-librarian.md +1 -1
- package/.pi/extensions/budget-guard.ts +33 -19
- package/.pi/extensions/harness-debate-tools.ts +54 -6
- package/.pi/extensions/harness-run-context.ts +108 -2
- package/.pi/extensions/harness-subagent-submit.ts +172 -0
- package/.pi/extensions/harness-telemetry.ts +29 -4
- package/.pi/extensions/lib/debate-bus-core.ts +49 -6
- package/.pi/extensions/lib/harness-subagent-auth.ts +104 -19
- package/.pi/extensions/lib/harness-subagent-policy.ts +59 -0
- package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +82 -0
- package/.pi/extensions/lib/harness-subagent-submit-registry.ts +172 -0
- package/.pi/extensions/lib/harness-subagents-bridge.ts +127 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +61 -8
- package/.pi/extensions/lib/plan-debate-focus.ts +21 -9
- package/.pi/extensions/lib/plan-debate-gate.ts +92 -18
- package/.pi/extensions/lib/plan-debate-lane.ts +15 -0
- package/.pi/extensions/lib/plan-debate-lanes.ts +27 -3
- package/.pi/extensions/lib/plan-debate-round-status.ts +18 -7
- package/.pi/extensions/lib/plan-messenger.ts +4 -0
- package/.pi/extensions/lib/plan-review-gate.ts +51 -0
- package/.pi/extensions/trace-recorder.ts +1 -0
- package/.pi/harness/agents.manifest.json +22 -22
- package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md +31 -0
- package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md +23 -0
- package/.pi/harness/docs/adrs/README.md +2 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/review-round-consolidated.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/research-brief.yaml +62 -0
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +40 -17
- package/.pi/harness/specs/harness-executor-handoff.schema.json +19 -0
- package/.pi/harness/specs/harness-human-required.schema.json +16 -0
- package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
- package/.pi/harness/specs/plan-scout-findings.schema.json +19 -0
- package/.pi/lib/harness-agent-output.ts +45 -0
- package/.pi/lib/harness-budget-enforce.ts +18 -0
- package/.pi/lib/harness-schema-validate.ts +89 -0
- package/.pi/lib/harness-spawn-parse.ts +86 -0
- package/.pi/lib/harness-subagent-submit-path.ts +41 -0
- package/.pi/lib/harness-ui-state.ts +15 -2
- package/.pi/model-router.example.json +13 -4
- package/.pi/prompts/harness-auto.md +2 -2
- package/.pi/prompts/harness-plan.md +34 -14
- package/.pi/prompts/harness-run.md +2 -2
- package/.pi/prompts/harness-setup.md +4 -4
- package/.pi/scripts/harness-generate-model-router.mjs +118 -36
- package/.pi/scripts/harness-model-router-routing.test.mjs +97 -0
- package/.pi/scripts/harness-sync-model-router.mjs +15 -2
- package/.pi/scripts/harness-verify.mjs +31 -0
- package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
- package/CHANGELOG.md +21 -0
- package/package.json +4 -2
- package/vendor/pi-model-router/UPSTREAM_PIN.md +3 -1
- package/vendor/pi-model-router/extensions/commands.ts +4 -4
- package/vendor/pi-model-router/extensions/index.ts +21 -0
- package/vendor/pi-model-router/extensions/provider.ts +130 -79
- package/vendor/pi-model-router/extensions/routing.ts +148 -0
- package/vendor/pi-model-router/extensions/state.ts +3 -0
- package/vendor/pi-model-router/extensions/types.ts +9 -0
- package/vendor/pi-model-router/extensions/ui.ts +16 -2
- package/vendor/pi-subagents/src/subagents.ts +29 -3
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
3
|
* smoke-harness-plan — fixture validation for plan-phase pipeline (CI).
|
|
4
|
-
* Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light]
|
|
4
|
+
* Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light|minimal-med-fast]
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
import { access, readFile } from "node:fs/promises";
|
|
@@ -26,16 +26,23 @@ async function scanFocusCoverage(fixtureRoot, requiredFocus) {
|
|
|
26
26
|
let last_round_index = 0;
|
|
27
27
|
const { readdir } = await import("node:fs/promises");
|
|
28
28
|
const files = (await readdir(art)).filter((f) =>
|
|
29
|
-
/^review-round
|
|
29
|
+
/^review-round(?:-r\d+|-consolidated)\.yaml$/i.test(f),
|
|
30
30
|
);
|
|
31
31
|
for (const name of files.sort()) {
|
|
32
|
-
const
|
|
32
|
+
const consolidated = /^review-round-consolidated\.yaml$/i.test(name);
|
|
33
|
+
const m = consolidated
|
|
34
|
+
? ["review-round-consolidated.yaml", "1"]
|
|
35
|
+
: /^review-round-r(\d+)\.yaml$/i.exec(name);
|
|
33
36
|
if (!m) continue;
|
|
34
|
-
const roundIndex = Number(m[1]);
|
|
37
|
+
const roundIndex = consolidated ? 1 : Number(m[1]);
|
|
35
38
|
if (roundIndex > last_round_index) last_round_index = roundIndex;
|
|
36
39
|
const draft = parseYaml(await readFile(join(art, name), "utf-8"));
|
|
37
40
|
const focus = String(draft.debate_round_focus ?? "").trim();
|
|
38
|
-
if (
|
|
41
|
+
if (focus === "all") {
|
|
42
|
+
for (const f of requiredFocus) covered.add(f);
|
|
43
|
+
} else if (requiredFocus.includes(focus)) {
|
|
44
|
+
covered.add(focus);
|
|
45
|
+
}
|
|
39
46
|
if (roundIndex === last_round_index) {
|
|
40
47
|
last_review_gate_ready = draft.review_gate_ready === true;
|
|
41
48
|
}
|
|
@@ -110,22 +117,33 @@ async function runFixture(name) {
|
|
|
110
117
|
ok("research-brief.yaml structure");
|
|
111
118
|
|
|
112
119
|
const isLight = name === "minimal-low-light";
|
|
113
|
-
const
|
|
114
|
-
const
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
]
|
|
120
|
+
const isFast = name === "minimal-med-fast";
|
|
121
|
+
const requiredFocus =
|
|
122
|
+
isLight || isFast
|
|
123
|
+
? ["spec", "quality"]
|
|
124
|
+
: ["spec", "wbs", "schedule", "quality"];
|
|
125
|
+
const debateRounds = isFast
|
|
126
|
+
? ["review-round-consolidated.yaml"]
|
|
127
|
+
: isLight
|
|
128
|
+
? ["review-round-r1.yaml", "review-round-r2.yaml"]
|
|
129
|
+
: [
|
|
130
|
+
"review-round-r1.yaml",
|
|
131
|
+
"review-round-r2.yaml",
|
|
132
|
+
"review-round-r3.yaml",
|
|
133
|
+
"review-round-r4.yaml",
|
|
134
|
+
];
|
|
122
135
|
const seenFocus = new Set();
|
|
123
136
|
for (const fileName of debateRounds) {
|
|
124
137
|
const p = join(fixtureRoot, "artifacts", fileName);
|
|
125
138
|
await access(p, constants.R_OK);
|
|
126
139
|
const draft = parseYaml(await readFile(p, "utf-8"));
|
|
127
140
|
if (!draft.schema_version) fail(`${fileName} missing schema_version`);
|
|
128
|
-
|
|
141
|
+
const f = String(draft.debate_round_focus ?? "").trim();
|
|
142
|
+
if (f === "all") {
|
|
143
|
+
for (const req of requiredFocus) seenFocus.add(req);
|
|
144
|
+
} else if (f) {
|
|
145
|
+
seenFocus.add(f);
|
|
146
|
+
}
|
|
129
147
|
}
|
|
130
148
|
for (const focus of requiredFocus) {
|
|
131
149
|
if (!seenFocus.has(focus)) {
|
|
@@ -135,7 +153,7 @@ async function runFixture(name) {
|
|
|
135
153
|
ok(`debate round YAML artifacts (${requiredFocus.length} focuses)`);
|
|
136
154
|
|
|
137
155
|
const coverage = await scanFocusCoverage(fixtureRoot, requiredFocus);
|
|
138
|
-
const minRounds = isLight ? 2 : 4;
|
|
156
|
+
const minRounds = isFast ? 1 : isLight ? 2 : 4;
|
|
139
157
|
if (!planOutcomeComplete(coverage, requiredFocus, minRounds)) {
|
|
140
158
|
fail("debate outcome incomplete for fixture coverage");
|
|
141
159
|
}
|
|
@@ -144,6 +162,9 @@ async function runFixture(name) {
|
|
|
144
162
|
if (isLight && packet.risk_level !== "low") {
|
|
145
163
|
fail("minimal-low-light fixture must use risk_level low");
|
|
146
164
|
}
|
|
165
|
+
if (isFast && packet.risk_level !== "med") {
|
|
166
|
+
fail("minimal-med-fast fixture must use risk_level med");
|
|
167
|
+
}
|
|
147
168
|
|
|
148
169
|
console.log(`smoke-harness-plan: all ${name} fixture checks passed`);
|
|
149
170
|
}
|
|
@@ -161,7 +182,9 @@ async function main() {
|
|
|
161
182
|
);
|
|
162
183
|
return;
|
|
163
184
|
}
|
|
164
|
-
fail(
|
|
185
|
+
fail(
|
|
186
|
+
"Usage: smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light|minimal-med-fast] | --live",
|
|
187
|
+
);
|
|
165
188
|
}
|
|
166
189
|
|
|
167
190
|
main().catch((err) => {
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://ultimate-pi.local/.pi/harness/specs/harness-executor-handoff.schema.json",
|
|
4
|
+
"title": "HarnessExecutorHandoff",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"additionalProperties": true,
|
|
7
|
+
"required": ["schema_version", "execution_status"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"schema_version": { "type": "string", "const": "1.0.0" },
|
|
10
|
+
"execution_status": {
|
|
11
|
+
"type": "string",
|
|
12
|
+
"enum": ["completed", "blocked", "scope_drift"]
|
|
13
|
+
},
|
|
14
|
+
"files_changed": { "type": "array" },
|
|
15
|
+
"validation_summary": { "type": "string" },
|
|
16
|
+
"rollback_refs": { "type": "object" },
|
|
17
|
+
"handoff_ready": { "type": "object" }
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://ultimate-pi.local/.pi/harness/specs/harness-human-required.schema.json",
|
|
4
|
+
"title": "HarnessHumanRequired",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"additionalProperties": false,
|
|
7
|
+
"required": ["schema_version", "reason"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"schema_version": { "type": "string", "const": "1.0.0" },
|
|
10
|
+
"reason": { "type": "string", "minLength": 1 },
|
|
11
|
+
"questions": {
|
|
12
|
+
"type": "array",
|
|
13
|
+
"items": { "type": "string" }
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
}
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
"round_index": { "type": "integer", "minimum": 1, "maximum": 12 },
|
|
17
17
|
"debate_round_focus": {
|
|
18
18
|
"type": "string",
|
|
19
|
-
"enum": ["spec", "wbs", "schedule", "quality"]
|
|
19
|
+
"enum": ["spec", "wbs", "schedule", "quality", "all"]
|
|
20
20
|
},
|
|
21
21
|
"round_summary": { "type": "string", "minLength": 1 },
|
|
22
22
|
"validation_summary": { "type": "string" },
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://ultimate-pi.local/.pi/harness/specs/plan-scout-findings.schema.json",
|
|
4
|
+
"title": "PlanScoutFindings",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"additionalProperties": true,
|
|
7
|
+
"required": ["schema_version", "lane", "summary"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"schema_version": { "type": "string", "const": "1.0.0" },
|
|
10
|
+
"lane": {
|
|
11
|
+
"type": "string",
|
|
12
|
+
"enum": ["graphify", "structure", "semantic"]
|
|
13
|
+
},
|
|
14
|
+
"scout_lane": { "type": "string" },
|
|
15
|
+
"summary": { "type": "string", "minLength": 1 },
|
|
16
|
+
"key_paths": { "type": "array", "items": { "type": "string" } },
|
|
17
|
+
"findings": { "type": "array" }
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -21,6 +21,51 @@ export function extractJsonBlock(text: string): string | null {
|
|
|
21
21
|
return null;
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
+
export interface ToolCallPartLike {
|
|
25
|
+
type?: string;
|
|
26
|
+
name?: string;
|
|
27
|
+
arguments?: Record<string, unknown>;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface MessageLike {
|
|
31
|
+
role?: string;
|
|
32
|
+
content?: ToolCallPartLike[] | unknown;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/** Last matching submit_* tool call in subprocess messages (chain-safe). */
|
|
36
|
+
export function extractLastSubmitCall(
|
|
37
|
+
messages: MessageLike[],
|
|
38
|
+
toolNames: string | string[],
|
|
39
|
+
): { toolName: string; document: Record<string, unknown> } | null {
|
|
40
|
+
const allowed = new Set(
|
|
41
|
+
(Array.isArray(toolNames) ? toolNames : [toolNames]).map((n) => n.trim()),
|
|
42
|
+
);
|
|
43
|
+
let last: { toolName: string; document: Record<string, unknown> } | null =
|
|
44
|
+
null;
|
|
45
|
+
for (const msg of messages) {
|
|
46
|
+
if (msg.role !== "assistant" || !Array.isArray(msg.content)) continue;
|
|
47
|
+
for (const part of msg.content) {
|
|
48
|
+
if (part.type !== "toolCall" || !part.name) continue;
|
|
49
|
+
if (!allowed.has(part.name)) continue;
|
|
50
|
+
const doc = part.arguments?.document;
|
|
51
|
+
if (doc && typeof doc === "object" && !Array.isArray(doc)) {
|
|
52
|
+
last = {
|
|
53
|
+
toolName: part.name,
|
|
54
|
+
document: doc as Record<string, unknown>,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
return last;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export function extractLastSubmitCallForAgent(
|
|
63
|
+
messages: MessageLike[],
|
|
64
|
+
agentToolNames: readonly string[],
|
|
65
|
+
): { toolName: string; document: Record<string, unknown> } | null {
|
|
66
|
+
return extractLastSubmitCall(messages, [...agentToolNames]);
|
|
67
|
+
}
|
|
68
|
+
|
|
24
69
|
export function parseHarnessAgentJson<T extends Record<string, unknown>>(
|
|
25
70
|
text: string,
|
|
26
71
|
): { ok: true; value: T } | { ok: false; error: string } {
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Central switch for harness token/debate budget enforcement.
|
|
3
|
+
* Default: telemetry-only (HARNESS_BUDGET_ENFORCE off).
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export function isHarnessBudgetEnforceOn(): boolean {
|
|
7
|
+
const raw = (process.env.HARNESS_BUDGET_ENFORCE ?? "off").toLowerCase();
|
|
8
|
+
return raw === "1" || raw === "true" || raw === "on";
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
/** When false, soft-limit and debate telemetry must not block UI or gates. */
|
|
12
|
+
export function shouldEmitBlockingBudgetExhausted(): boolean {
|
|
13
|
+
if (!isHarnessBudgetEnforceOn()) return false;
|
|
14
|
+
return (
|
|
15
|
+
process.env.HARNESS_BUDGET_HARD_STOP === "true" ||
|
|
16
|
+
process.env.HARNESS_DEBATE_HARD_STOP === "true"
|
|
17
|
+
);
|
|
18
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSON Schema validation for harness submit tools (Ajv draft 2020-12, offline).
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { appendFile, readFile } from "node:fs/promises";
|
|
6
|
+
import { join } from "node:path";
|
|
7
|
+
import Ajv2020 from "ajv/dist/2020";
|
|
8
|
+
import addFormats from "ajv-formats";
|
|
9
|
+
|
|
10
|
+
type ValidateFn = (data: unknown) => boolean;
|
|
11
|
+
|
|
12
|
+
const compileCache = new Map<string, ValidateFn>();
|
|
13
|
+
const DEBUG_LOG_PATH =
|
|
14
|
+
"/home/aryaniyaps/ai-projects/ultimate-pi/.cursor/debug-2ca12b.log";
|
|
15
|
+
|
|
16
|
+
let ajvSingleton: InstanceType<typeof Ajv2020> | null = null;
|
|
17
|
+
|
|
18
|
+
function getAjv(): InstanceType<typeof Ajv2020> {
|
|
19
|
+
if (!ajvSingleton) {
|
|
20
|
+
ajvSingleton = new Ajv2020({
|
|
21
|
+
allErrors: true,
|
|
22
|
+
strict: false,
|
|
23
|
+
validateSchema: false,
|
|
24
|
+
});
|
|
25
|
+
addFormats(ajvSingleton);
|
|
26
|
+
}
|
|
27
|
+
return ajvSingleton;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
async function debugLog(
|
|
31
|
+
hypothesisId: string,
|
|
32
|
+
message: string,
|
|
33
|
+
data: Record<string, unknown>,
|
|
34
|
+
): Promise<void> {
|
|
35
|
+
// #region agent log
|
|
36
|
+
try {
|
|
37
|
+
await appendFile(
|
|
38
|
+
DEBUG_LOG_PATH,
|
|
39
|
+
`${JSON.stringify({
|
|
40
|
+
sessionId: "2ca12b",
|
|
41
|
+
hypothesisId,
|
|
42
|
+
location: "harness-schema-validate.ts",
|
|
43
|
+
message,
|
|
44
|
+
data,
|
|
45
|
+
timestamp: Date.now(),
|
|
46
|
+
})}\n`,
|
|
47
|
+
);
|
|
48
|
+
} catch {
|
|
49
|
+
/* ignore */
|
|
50
|
+
}
|
|
51
|
+
// #endregion
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export async function validateAgainstHarnessSchema(
|
|
55
|
+
specsDir: string,
|
|
56
|
+
schemaFile: string,
|
|
57
|
+
document: unknown,
|
|
58
|
+
): Promise<{ ok: true } | { ok: false; errors: string[] }> {
|
|
59
|
+
const cacheKey = `${specsDir}:${schemaFile}`;
|
|
60
|
+
let validate = compileCache.get(cacheKey);
|
|
61
|
+
if (!validate) {
|
|
62
|
+
const schemaPath = join(specsDir, schemaFile);
|
|
63
|
+
const raw = await readFile(schemaPath, "utf-8");
|
|
64
|
+
const schema = JSON.parse(raw) as Record<string, unknown>;
|
|
65
|
+
try {
|
|
66
|
+
const ajv = getAjv();
|
|
67
|
+
const compiled = ajv.compile(schema);
|
|
68
|
+
validate = compiled;
|
|
69
|
+
compileCache.set(cacheKey, compiled);
|
|
70
|
+
await debugLog("H3", "schema compile ok", { schemaFile });
|
|
71
|
+
} catch (err) {
|
|
72
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
73
|
+
await debugLog("H3", "schema compile failed", { schemaFile, error: msg });
|
|
74
|
+
return { ok: false, errors: [`schema compile failed: ${msg}`] };
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
const ok = validate(document);
|
|
78
|
+
if (ok) return { ok: true };
|
|
79
|
+
const errors = (
|
|
80
|
+
(
|
|
81
|
+
validate as {
|
|
82
|
+
errors?: Array<{ instancePath?: string; message?: string }>;
|
|
83
|
+
}
|
|
84
|
+
).errors ?? []
|
|
85
|
+
).map((e: { instancePath?: string; message?: string }) =>
|
|
86
|
+
`${e.instancePath || "/"} ${e.message ?? "invalid"}`.trim(),
|
|
87
|
+
);
|
|
88
|
+
return { ok: false, errors };
|
|
89
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse HarnessSpawnContext embedded in subagent task strings.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
const SPAWN_CTX_EQ_RE = /HarnessSpawnContext\s*=\s*(\{[\s\S]*?\})(?:\s|$|\.)/;
|
|
6
|
+
|
|
7
|
+
export interface ParsedSpawnContext {
|
|
8
|
+
run_id?: string;
|
|
9
|
+
run_dir?: string;
|
|
10
|
+
agent?: string;
|
|
11
|
+
plan_packet_path?: string;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function extractBalancedJsonObject(s: string, start: number): string | null {
|
|
15
|
+
if (s[start] !== "{") return null;
|
|
16
|
+
let depth = 0;
|
|
17
|
+
let inString = false;
|
|
18
|
+
let escaped = false;
|
|
19
|
+
for (let i = start; i < s.length; i++) {
|
|
20
|
+
const ch = s[i];
|
|
21
|
+
if (inString) {
|
|
22
|
+
if (escaped) escaped = false;
|
|
23
|
+
else if (ch === "\\") escaped = true;
|
|
24
|
+
else if (ch === '"') inString = false;
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
if (ch === '"') {
|
|
28
|
+
inString = true;
|
|
29
|
+
continue;
|
|
30
|
+
}
|
|
31
|
+
if (ch === "{") depth++;
|
|
32
|
+
else if (ch === "}") {
|
|
33
|
+
depth--;
|
|
34
|
+
if (depth === 0) return s.slice(start, i + 1);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return null;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function normalizeSpawnContext(parsed: unknown): ParsedSpawnContext | null {
|
|
41
|
+
if (!parsed || typeof parsed !== "object") return null;
|
|
42
|
+
const o = parsed as Record<string, unknown>;
|
|
43
|
+
const run_id = typeof o.run_id === "string" ? o.run_id : undefined;
|
|
44
|
+
const run_dir = typeof o.run_dir === "string" ? o.run_dir : undefined;
|
|
45
|
+
const agent = typeof o.agent === "string" ? o.agent : undefined;
|
|
46
|
+
const plan_packet_path =
|
|
47
|
+
typeof o.plan_packet_path === "string" ? o.plan_packet_path : undefined;
|
|
48
|
+
if (!run_id && !run_dir) return null;
|
|
49
|
+
return { run_id, run_dir, agent, plan_packet_path };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export function parseSpawnContextFromTask(
|
|
53
|
+
task: string,
|
|
54
|
+
): ParsedSpawnContext | null {
|
|
55
|
+
const eqMatch = SPAWN_CTX_EQ_RE.exec(task);
|
|
56
|
+
if (eqMatch?.[1]) {
|
|
57
|
+
try {
|
|
58
|
+
return normalizeSpawnContext(JSON.parse(eqMatch[1]));
|
|
59
|
+
} catch {
|
|
60
|
+
// fall through to JSON-object forms
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const firstBrace = task.indexOf("{");
|
|
65
|
+
if (firstBrace >= 0) {
|
|
66
|
+
const blob = extractBalancedJsonObject(task, firstBrace);
|
|
67
|
+
if (blob) {
|
|
68
|
+
try {
|
|
69
|
+
const outer = JSON.parse(blob) as Record<string, unknown>;
|
|
70
|
+
if (
|
|
71
|
+
outer.HarnessSpawnContext &&
|
|
72
|
+
typeof outer.HarnessSpawnContext === "object"
|
|
73
|
+
) {
|
|
74
|
+
return normalizeSpawnContext(outer.HarnessSpawnContext);
|
|
75
|
+
}
|
|
76
|
+
if (typeof outer.run_id === "string") {
|
|
77
|
+
return normalizeSpawnContext(outer);
|
|
78
|
+
}
|
|
79
|
+
} catch {
|
|
80
|
+
// ignore
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return null;
|
|
86
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Resolve and guard harness run directories for subagent submit tools.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { realpath } from "node:fs/promises";
|
|
6
|
+
import { join, resolve } from "node:path";
|
|
7
|
+
|
|
8
|
+
export function harnessRunsRoot(projectRoot: string): string {
|
|
9
|
+
return join(projectRoot, ".pi", "harness", "runs");
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export async function resolveGuardedRunDir(opts: {
|
|
13
|
+
projectRoot: string;
|
|
14
|
+
runId: string;
|
|
15
|
+
runDirEnv?: string;
|
|
16
|
+
}): Promise<{ ok: true; runDir: string } | { ok: false; error: string }> {
|
|
17
|
+
const { projectRoot, runId } = opts;
|
|
18
|
+
if (!runId.trim()) {
|
|
19
|
+
return { ok: false, error: "run_id is required" };
|
|
20
|
+
}
|
|
21
|
+
const expected = join(harnessRunsRoot(projectRoot), runId);
|
|
22
|
+
let candidate = opts.runDirEnv?.trim()
|
|
23
|
+
? resolve(projectRoot, opts.runDirEnv)
|
|
24
|
+
: expected;
|
|
25
|
+
try {
|
|
26
|
+
candidate = await realpath(candidate);
|
|
27
|
+
const expectedReal = await realpath(expected);
|
|
28
|
+
if (
|
|
29
|
+
candidate !== expectedReal &&
|
|
30
|
+
!candidate.startsWith(`${expectedReal}/`)
|
|
31
|
+
) {
|
|
32
|
+
return {
|
|
33
|
+
ok: false,
|
|
34
|
+
error: `run_dir must stay under ${expectedReal}`,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
return { ok: true, runDir: candidate };
|
|
38
|
+
} catch {
|
|
39
|
+
return { ok: false, error: `run directory not found for run_id=${runId}` };
|
|
40
|
+
}
|
|
41
|
+
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
|
|
2
|
+
import { shouldEmitBlockingBudgetExhausted } from "./harness-budget-enforce.js";
|
|
2
3
|
|
|
3
4
|
export type HarnessPhase =
|
|
4
5
|
| "plan"
|
|
@@ -133,6 +134,9 @@ const RELEVANT_CUSTOM_TYPES = new Set([
|
|
|
133
134
|
"harness-consensus-packet",
|
|
134
135
|
"harness-round-result",
|
|
135
136
|
"harness-budget-exhausted",
|
|
137
|
+
"harness-budget-soft-limit",
|
|
138
|
+
"harness-budget-telemetry",
|
|
139
|
+
"harness-debate-budget-telemetry",
|
|
136
140
|
"harness-review-integrity",
|
|
137
141
|
"harness-test-integrity-flag",
|
|
138
142
|
"harness-run-trace",
|
|
@@ -189,7 +193,7 @@ function deriveFlowSubstate(state: HarnessUiState): HarnessFlowSubstate {
|
|
|
189
193
|
return "idle";
|
|
190
194
|
}
|
|
191
195
|
|
|
192
|
-
function createStateFromEntries(entries: unknown[]): HarnessUiState {
|
|
196
|
+
export function createStateFromEntries(entries: unknown[]): HarnessUiState {
|
|
193
197
|
const latest = pickLatestCustomEntries(entries);
|
|
194
198
|
const state: HarnessUiState = {
|
|
195
199
|
...DEFAULT_STATE,
|
|
@@ -212,7 +216,7 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
|
|
|
212
216
|
const budget = latest.get("harness-budget-exhausted") as
|
|
213
217
|
| BudgetExhaustedLike
|
|
214
218
|
| undefined;
|
|
215
|
-
if (budget) {
|
|
219
|
+
if (budget && shouldEmitBlockingBudgetExhausted()) {
|
|
216
220
|
state.budgetExhausted = true;
|
|
217
221
|
state.budgetReason =
|
|
218
222
|
typeof budget.exhaustion_reason === "string"
|
|
@@ -223,6 +227,15 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
|
|
|
223
227
|
const cap = asNumber(budget.caps?.debate_global_cap);
|
|
224
228
|
if (cap != null) state.debateBudgetCap = cap;
|
|
225
229
|
}
|
|
230
|
+
const telemetry = latest.get("harness-budget-telemetry") as
|
|
231
|
+
| BudgetExhaustedLike
|
|
232
|
+
| undefined;
|
|
233
|
+
if (telemetry && !state.budgetExhausted) {
|
|
234
|
+
const budgetUsed = asNumber(telemetry.budget_used);
|
|
235
|
+
if (budgetUsed != null) state.debateBudgetUsed = budgetUsed;
|
|
236
|
+
const cap = asNumber(telemetry.caps?.debate_global_cap);
|
|
237
|
+
if (cap != null) state.debateBudgetCap = cap;
|
|
238
|
+
}
|
|
226
239
|
|
|
227
240
|
const testIntegrity = latest.get("harness-test-integrity-flag") as
|
|
228
241
|
| TestIntegrityLike
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"defaultProfile": "auto",
|
|
3
3
|
"debug": false,
|
|
4
|
-
"classifierModel": "
|
|
4
|
+
"classifierModel": "openai/gpt-5.4-nano",
|
|
5
5
|
"phaseBias": 0.5,
|
|
6
6
|
"maxSessionBudget": 1.0,
|
|
7
7
|
"largeContextThreshold": 100000,
|
|
@@ -16,12 +16,21 @@
|
|
|
16
16
|
"profiles": {
|
|
17
17
|
"auto": {
|
|
18
18
|
"high": {
|
|
19
|
-
"model": "
|
|
19
|
+
"model": "openai/gpt-5.5",
|
|
20
20
|
"thinking": "high",
|
|
21
|
-
"fallbacks": ["
|
|
21
|
+
"fallbacks": ["openai/gpt-5.4-nano"]
|
|
22
|
+
},
|
|
23
|
+
"medium": { "model": "openai/gpt-5.5", "thinking": "medium" },
|
|
24
|
+
"low": { "model": "openai/gpt-5.5", "thinking": "low" }
|
|
25
|
+
},
|
|
26
|
+
"opencode-go": {
|
|
27
|
+
"high": {
|
|
28
|
+
"model": "opencode-go/qwen3.6-plus",
|
|
29
|
+
"thinking": "high",
|
|
30
|
+
"fallbacks": ["opencode-go/deepseek-v4-flash"]
|
|
22
31
|
},
|
|
23
32
|
"medium": { "model": "opencode-go/qwen3.6-plus", "thinking": "medium" },
|
|
24
|
-
"low": { "model": "opencode-go/
|
|
33
|
+
"low": { "model": "opencode-go/qwen3.6-plus", "thinking": "low" }
|
|
25
34
|
}
|
|
26
35
|
}
|
|
27
36
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Full strict harness pipeline with locked governance decisions.
|
|
3
|
-
argument-hint: "\"<task>\" [--quick] [--risk low|med|high]
|
|
3
|
+
argument-hint: "\"<task>\" [--quick] [--risk low|med|high]"
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-auto
|
|
@@ -10,7 +10,7 @@ Pipeline orchestrator — one session, sequential phase handoffs. Invoke **harne
|
|
|
10
10
|
## Step 0 — Parse arguments
|
|
11
11
|
|
|
12
12
|
- required task (quoted or first token)
|
|
13
|
-
- optional: `--quick`, `--risk
|
|
13
|
+
- optional: `--quick`, `--risk` (`--budget` reserved/no-op)
|
|
14
14
|
|
|
15
15
|
If task missing:
|
|
16
16
|
|