ultimate-pi 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-governor/SKILL.md +11 -0
- package/.agents/skills/harness-orchestration/SKILL.md +3 -1
- package/.agents/skills/harness-plan/SKILL.md +5 -5
- package/.pi/agents/harness/adversary.md +1 -1
- package/.pi/agents/harness/evaluator.md +1 -1
- package/.pi/agents/harness/executor.md +1 -1
- package/.pi/agents/harness/incident-recorder.md +1 -1
- package/.pi/agents/harness/meta-optimizer.md +1 -1
- package/.pi/agents/harness/planning/decompose.md +4 -33
- package/.pi/agents/harness/planning/execution-plan-author.md +3 -2
- package/.pi/agents/harness/planning/hypothesis-validator.md +3 -2
- package/.pi/agents/harness/planning/hypothesis.md +4 -27
- package/.pi/agents/harness/planning/implementation-researcher.md +3 -2
- package/.pi/agents/harness/planning/plan-adversary.md +2 -3
- package/.pi/agents/harness/planning/plan-evaluator.md +3 -2
- package/.pi/agents/harness/planning/review-integrator.md +2 -3
- package/.pi/agents/harness/planning/scout-graphify.md +3 -22
- package/.pi/agents/harness/planning/scout-semantic.md +3 -18
- package/.pi/agents/harness/planning/scout-structure.md +3 -18
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +3 -2
- package/.pi/agents/harness/planning/stack-researcher.md +3 -2
- package/.pi/agents/harness/tie-breaker.md +1 -1
- package/.pi/agents/harness/trace-librarian.md +1 -1
- package/.pi/extensions/budget-guard.ts +33 -19
- package/.pi/extensions/harness-debate-tools.ts +42 -3
- package/.pi/extensions/harness-run-context.ts +96 -2
- package/.pi/extensions/harness-subagent-submit.ts +195 -0
- package/.pi/extensions/lib/debate-bus-core.ts +42 -5
- package/.pi/extensions/lib/harness-subagent-policy.ts +45 -0
- package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +82 -0
- package/.pi/extensions/lib/harness-subagent-submit-registry.ts +172 -0
- package/.pi/extensions/lib/harness-subagents-bridge.ts +42 -0
- package/.pi/extensions/lib/plan-debate-gate.ts +12 -1
- package/.pi/extensions/lib/plan-debate-lane.ts +15 -0
- package/.pi/harness/agents.manifest.json +22 -22
- package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md +31 -0
- package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md +23 -0
- package/.pi/harness/docs/adrs/README.md +2 -0
- package/.pi/harness/specs/harness-executor-handoff.schema.json +19 -0
- package/.pi/harness/specs/harness-human-required.schema.json +16 -0
- package/.pi/harness/specs/plan-scout-findings.schema.json +19 -0
- package/.pi/lib/harness-agent-output.ts +45 -0
- package/.pi/lib/harness-budget-enforce.ts +18 -0
- package/.pi/lib/harness-schema-validate.ts +89 -0
- package/.pi/lib/harness-spawn-parse.ts +86 -0
- package/.pi/lib/harness-subagent-submit-path.ts +41 -0
- package/.pi/lib/harness-ui-state.ts +15 -2
- package/.pi/prompts/harness-auto.md +2 -2
- package/.pi/prompts/harness-plan.md +9 -7
- package/.pi/prompts/harness-run.md +2 -2
- package/.pi/scripts/harness-verify.mjs +2 -0
- package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
- package/CHANGELOG.md +10 -0
- package/package.json +4 -2
- package/vendor/pi-subagents/src/subagents.ts +29 -3
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSON Schema validation for harness submit tools (Ajv draft 2020-12, offline).
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { appendFile, readFile } from "node:fs/promises";
|
|
6
|
+
import { join } from "node:path";
|
|
7
|
+
import Ajv2020 from "ajv/dist/2020";
|
|
8
|
+
import addFormats from "ajv-formats";
|
|
9
|
+
|
|
10
|
+
type ValidateFn = (data: unknown) => boolean;
|
|
11
|
+
|
|
12
|
+
const compileCache = new Map<string, ValidateFn>();
|
|
13
|
+
const DEBUG_LOG_PATH =
|
|
14
|
+
"/home/aryaniyaps/ai-projects/ultimate-pi/.cursor/debug-2ca12b.log";
|
|
15
|
+
|
|
16
|
+
let ajvSingleton: InstanceType<typeof Ajv2020> | null = null;
|
|
17
|
+
|
|
18
|
+
function getAjv(): InstanceType<typeof Ajv2020> {
|
|
19
|
+
if (!ajvSingleton) {
|
|
20
|
+
ajvSingleton = new Ajv2020({
|
|
21
|
+
allErrors: true,
|
|
22
|
+
strict: false,
|
|
23
|
+
validateSchema: false,
|
|
24
|
+
});
|
|
25
|
+
addFormats(ajvSingleton);
|
|
26
|
+
}
|
|
27
|
+
return ajvSingleton;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
async function debugLog(
|
|
31
|
+
hypothesisId: string,
|
|
32
|
+
message: string,
|
|
33
|
+
data: Record<string, unknown>,
|
|
34
|
+
): Promise<void> {
|
|
35
|
+
// #region agent log
|
|
36
|
+
try {
|
|
37
|
+
await appendFile(
|
|
38
|
+
DEBUG_LOG_PATH,
|
|
39
|
+
`${JSON.stringify({
|
|
40
|
+
sessionId: "2ca12b",
|
|
41
|
+
hypothesisId,
|
|
42
|
+
location: "harness-schema-validate.ts",
|
|
43
|
+
message,
|
|
44
|
+
data,
|
|
45
|
+
timestamp: Date.now(),
|
|
46
|
+
})}\n`,
|
|
47
|
+
);
|
|
48
|
+
} catch {
|
|
49
|
+
/* ignore */
|
|
50
|
+
}
|
|
51
|
+
// #endregion
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export async function validateAgainstHarnessSchema(
|
|
55
|
+
specsDir: string,
|
|
56
|
+
schemaFile: string,
|
|
57
|
+
document: unknown,
|
|
58
|
+
): Promise<{ ok: true } | { ok: false; errors: string[] }> {
|
|
59
|
+
const cacheKey = `${specsDir}:${schemaFile}`;
|
|
60
|
+
let validate = compileCache.get(cacheKey);
|
|
61
|
+
if (!validate) {
|
|
62
|
+
const schemaPath = join(specsDir, schemaFile);
|
|
63
|
+
const raw = await readFile(schemaPath, "utf-8");
|
|
64
|
+
const schema = JSON.parse(raw) as Record<string, unknown>;
|
|
65
|
+
try {
|
|
66
|
+
const ajv = getAjv();
|
|
67
|
+
const compiled = ajv.compile(schema);
|
|
68
|
+
validate = compiled;
|
|
69
|
+
compileCache.set(cacheKey, compiled);
|
|
70
|
+
await debugLog("H3", "schema compile ok", { schemaFile });
|
|
71
|
+
} catch (err) {
|
|
72
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
73
|
+
await debugLog("H3", "schema compile failed", { schemaFile, error: msg });
|
|
74
|
+
return { ok: false, errors: [`schema compile failed: ${msg}`] };
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
const ok = validate(document);
|
|
78
|
+
if (ok) return { ok: true };
|
|
79
|
+
const errors = (
|
|
80
|
+
(
|
|
81
|
+
validate as {
|
|
82
|
+
errors?: Array<{ instancePath?: string; message?: string }>;
|
|
83
|
+
}
|
|
84
|
+
).errors ?? []
|
|
85
|
+
).map((e: { instancePath?: string; message?: string }) =>
|
|
86
|
+
`${e.instancePath || "/"} ${e.message ?? "invalid"}`.trim(),
|
|
87
|
+
);
|
|
88
|
+
return { ok: false, errors };
|
|
89
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse HarnessSpawnContext embedded in subagent task strings.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
const SPAWN_CTX_EQ_RE = /HarnessSpawnContext\s*=\s*(\{[\s\S]*?\})(?:\s|$|\.)/;
|
|
6
|
+
|
|
7
|
+
export interface ParsedSpawnContext {
|
|
8
|
+
run_id?: string;
|
|
9
|
+
run_dir?: string;
|
|
10
|
+
agent?: string;
|
|
11
|
+
plan_packet_path?: string;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function extractBalancedJsonObject(s: string, start: number): string | null {
|
|
15
|
+
if (s[start] !== "{") return null;
|
|
16
|
+
let depth = 0;
|
|
17
|
+
let inString = false;
|
|
18
|
+
let escaped = false;
|
|
19
|
+
for (let i = start; i < s.length; i++) {
|
|
20
|
+
const ch = s[i];
|
|
21
|
+
if (inString) {
|
|
22
|
+
if (escaped) escaped = false;
|
|
23
|
+
else if (ch === "\\") escaped = true;
|
|
24
|
+
else if (ch === '"') inString = false;
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
if (ch === '"') {
|
|
28
|
+
inString = true;
|
|
29
|
+
continue;
|
|
30
|
+
}
|
|
31
|
+
if (ch === "{") depth++;
|
|
32
|
+
else if (ch === "}") {
|
|
33
|
+
depth--;
|
|
34
|
+
if (depth === 0) return s.slice(start, i + 1);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return null;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function normalizeSpawnContext(parsed: unknown): ParsedSpawnContext | null {
|
|
41
|
+
if (!parsed || typeof parsed !== "object") return null;
|
|
42
|
+
const o = parsed as Record<string, unknown>;
|
|
43
|
+
const run_id = typeof o.run_id === "string" ? o.run_id : undefined;
|
|
44
|
+
const run_dir = typeof o.run_dir === "string" ? o.run_dir : undefined;
|
|
45
|
+
const agent = typeof o.agent === "string" ? o.agent : undefined;
|
|
46
|
+
const plan_packet_path =
|
|
47
|
+
typeof o.plan_packet_path === "string" ? o.plan_packet_path : undefined;
|
|
48
|
+
if (!run_id && !run_dir) return null;
|
|
49
|
+
return { run_id, run_dir, agent, plan_packet_path };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export function parseSpawnContextFromTask(
|
|
53
|
+
task: string,
|
|
54
|
+
): ParsedSpawnContext | null {
|
|
55
|
+
const eqMatch = SPAWN_CTX_EQ_RE.exec(task);
|
|
56
|
+
if (eqMatch?.[1]) {
|
|
57
|
+
try {
|
|
58
|
+
return normalizeSpawnContext(JSON.parse(eqMatch[1]));
|
|
59
|
+
} catch {
|
|
60
|
+
// fall through to JSON-object forms
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const firstBrace = task.indexOf("{");
|
|
65
|
+
if (firstBrace >= 0) {
|
|
66
|
+
const blob = extractBalancedJsonObject(task, firstBrace);
|
|
67
|
+
if (blob) {
|
|
68
|
+
try {
|
|
69
|
+
const outer = JSON.parse(blob) as Record<string, unknown>;
|
|
70
|
+
if (
|
|
71
|
+
outer.HarnessSpawnContext &&
|
|
72
|
+
typeof outer.HarnessSpawnContext === "object"
|
|
73
|
+
) {
|
|
74
|
+
return normalizeSpawnContext(outer.HarnessSpawnContext);
|
|
75
|
+
}
|
|
76
|
+
if (typeof outer.run_id === "string") {
|
|
77
|
+
return normalizeSpawnContext(outer);
|
|
78
|
+
}
|
|
79
|
+
} catch {
|
|
80
|
+
// ignore
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return null;
|
|
86
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Resolve and guard harness run directories for subagent submit tools.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { realpath } from "node:fs/promises";
|
|
6
|
+
import { join, resolve } from "node:path";
|
|
7
|
+
|
|
8
|
+
export function harnessRunsRoot(projectRoot: string): string {
|
|
9
|
+
return join(projectRoot, ".pi", "harness", "runs");
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export async function resolveGuardedRunDir(opts: {
|
|
13
|
+
projectRoot: string;
|
|
14
|
+
runId: string;
|
|
15
|
+
runDirEnv?: string;
|
|
16
|
+
}): Promise<{ ok: true; runDir: string } | { ok: false; error: string }> {
|
|
17
|
+
const { projectRoot, runId } = opts;
|
|
18
|
+
if (!runId.trim()) {
|
|
19
|
+
return { ok: false, error: "run_id is required" };
|
|
20
|
+
}
|
|
21
|
+
const expected = join(harnessRunsRoot(projectRoot), runId);
|
|
22
|
+
let candidate = opts.runDirEnv?.trim()
|
|
23
|
+
? resolve(projectRoot, opts.runDirEnv)
|
|
24
|
+
: expected;
|
|
25
|
+
try {
|
|
26
|
+
candidate = await realpath(candidate);
|
|
27
|
+
const expectedReal = await realpath(expected);
|
|
28
|
+
if (
|
|
29
|
+
candidate !== expectedReal &&
|
|
30
|
+
!candidate.startsWith(`${expectedReal}/`)
|
|
31
|
+
) {
|
|
32
|
+
return {
|
|
33
|
+
ok: false,
|
|
34
|
+
error: `run_dir must stay under ${expectedReal}`,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
return { ok: true, runDir: candidate };
|
|
38
|
+
} catch {
|
|
39
|
+
return { ok: false, error: `run directory not found for run_id=${runId}` };
|
|
40
|
+
}
|
|
41
|
+
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
|
|
2
|
+
import { shouldEmitBlockingBudgetExhausted } from "./harness-budget-enforce.js";
|
|
2
3
|
|
|
3
4
|
export type HarnessPhase =
|
|
4
5
|
| "plan"
|
|
@@ -133,6 +134,9 @@ const RELEVANT_CUSTOM_TYPES = new Set([
|
|
|
133
134
|
"harness-consensus-packet",
|
|
134
135
|
"harness-round-result",
|
|
135
136
|
"harness-budget-exhausted",
|
|
137
|
+
"harness-budget-soft-limit",
|
|
138
|
+
"harness-budget-telemetry",
|
|
139
|
+
"harness-debate-budget-telemetry",
|
|
136
140
|
"harness-review-integrity",
|
|
137
141
|
"harness-test-integrity-flag",
|
|
138
142
|
"harness-run-trace",
|
|
@@ -189,7 +193,7 @@ function deriveFlowSubstate(state: HarnessUiState): HarnessFlowSubstate {
|
|
|
189
193
|
return "idle";
|
|
190
194
|
}
|
|
191
195
|
|
|
192
|
-
function createStateFromEntries(entries: unknown[]): HarnessUiState {
|
|
196
|
+
export function createStateFromEntries(entries: unknown[]): HarnessUiState {
|
|
193
197
|
const latest = pickLatestCustomEntries(entries);
|
|
194
198
|
const state: HarnessUiState = {
|
|
195
199
|
...DEFAULT_STATE,
|
|
@@ -212,7 +216,7 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
|
|
|
212
216
|
const budget = latest.get("harness-budget-exhausted") as
|
|
213
217
|
| BudgetExhaustedLike
|
|
214
218
|
| undefined;
|
|
215
|
-
if (budget) {
|
|
219
|
+
if (budget && shouldEmitBlockingBudgetExhausted()) {
|
|
216
220
|
state.budgetExhausted = true;
|
|
217
221
|
state.budgetReason =
|
|
218
222
|
typeof budget.exhaustion_reason === "string"
|
|
@@ -223,6 +227,15 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
|
|
|
223
227
|
const cap = asNumber(budget.caps?.debate_global_cap);
|
|
224
228
|
if (cap != null) state.debateBudgetCap = cap;
|
|
225
229
|
}
|
|
230
|
+
const telemetry = latest.get("harness-budget-telemetry") as
|
|
231
|
+
| BudgetExhaustedLike
|
|
232
|
+
| undefined;
|
|
233
|
+
if (telemetry && !state.budgetExhausted) {
|
|
234
|
+
const budgetUsed = asNumber(telemetry.budget_used);
|
|
235
|
+
if (budgetUsed != null) state.debateBudgetUsed = budgetUsed;
|
|
236
|
+
const cap = asNumber(telemetry.caps?.debate_global_cap);
|
|
237
|
+
if (cap != null) state.debateBudgetCap = cap;
|
|
238
|
+
}
|
|
226
239
|
|
|
227
240
|
const testIntegrity = latest.get("harness-test-integrity-flag") as
|
|
228
241
|
| TestIntegrityLike
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Full strict harness pipeline with locked governance decisions.
|
|
3
|
-
argument-hint: "\"<task>\" [--quick] [--risk low|med|high]
|
|
3
|
+
argument-hint: "\"<task>\" [--quick] [--risk low|med|high]"
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-auto
|
|
@@ -10,7 +10,7 @@ Pipeline orchestrator — one session, sequential phase handoffs. Invoke **harne
|
|
|
10
10
|
## Step 0 — Parse arguments
|
|
11
11
|
|
|
12
12
|
- required task (quoted or first token)
|
|
13
|
-
- optional: `--quick`, `--risk
|
|
13
|
+
- optional: `--quick`, `--risk` (`--budget` reserved/no-op)
|
|
14
14
|
|
|
15
15
|
If task missing:
|
|
16
16
|
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: PM-grade harness plan — scouts, implementation research, ExecutionPlan, DAG validation, selective Review Gate debate, approval.
|
|
3
|
-
argument-hint: "\"<task>\" [--risk low|med|high] [--
|
|
3
|
+
argument-hint: "\"<task>\" [--risk low|med|high] [--quick]"
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-plan
|
|
7
7
|
|
|
8
8
|
You are the **planning PM** for this harness run. Produce an execution baseline (`plan-packet.yaml` + `plan-review.md`), not strategy theater. Parent owns `ask_user`, `approve_plan`, `create_plan`, debate bus commands, and YAML writes under `.pi/harness/runs/<run_id>/`.
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
Subagents persist artifacts via scoped **`submit_*`** tools (deterministic YAML under the run dir). Parent uses **`harness_artifact_ready`** to gate phases (no JSON parsing). Parent merges still use **`write_harness_yaml`** for `research-brief.yaml`, `plan-packet.yaml` shell, and integrator patches only.
|
|
11
11
|
|
|
12
12
|
## Allowed subagents
|
|
13
13
|
|
|
@@ -33,12 +33,12 @@ Read **harness-debate-plan** skill before Review Gate rounds.
|
|
|
33
33
|
2. Each `subagent` call blocks until subprocesses finish — batch parallel scouts in one `tasks` array.
|
|
34
34
|
3. Do **not** set `timeoutMs` unless the user explicitly requests a cap — subagents run until natural completion (optional backstop: `PI_SUBAGENT_TIMEOUT_MS`).
|
|
35
35
|
4. No harness subagent spawn cap — run the full scout + research + debate pipeline without skipping lanes for budget.
|
|
36
|
-
5. Compact task text: embed `HarnessSpawnContext` JSON
|
|
36
|
+
5. Compact task text: embed spawn context + lane instructions. Prefer `HarnessSpawnContext={"run_id":"…","plan_packet_path":"…",…}` or a JSON object with `"HarnessSpawnContext":{…}` — both parse; `run_id` is required so subprocess submit tools get `HARNESS_RUN_ID`.
|
|
37
37
|
|
|
38
38
|
## Step 0 — Parse `$ARGUMENTS`
|
|
39
39
|
|
|
40
40
|
- task (required)
|
|
41
|
-
- `--risk low|med|high`, `--
|
|
41
|
+
- `--risk low|med|high`, `--quick` (`--budget` is reserved/no-op; token budgets are telemetry-only unless `HARNESS_BUDGET_ENFORCE=1`)
|
|
42
42
|
|
|
43
43
|
`--quick` skips **scout-semantic** and post-run adversary only — **never** skip graphify, structure, decompose, hypothesis, **Phase 3.5 implementation research**, stack research, execution plan, DAG validation, or **Review Gate debate**.
|
|
44
44
|
|
|
@@ -64,9 +64,11 @@ Do **not** run `ccc index` or `ccc search --refresh`. The harness runs increment
|
|
|
64
64
|
|
|
65
65
|
Add `harness/planning/scout-semantic` to `tasks` unless `--quick`. Require graphify + structure success. Semantic lane uses `ccc search` only (see `scout-semantic` agent).
|
|
66
66
|
|
|
67
|
+
After scouts: `harness_artifact_ready({ paths: ["artifacts/scout-graphify.yaml", "artifacts/scout-structure.yaml", ...] })`.
|
|
68
|
+
|
|
67
69
|
## Phase 2 & 3 — Decompose + hypothesis (parallel)
|
|
68
70
|
|
|
69
|
-
One `subagent` call with `tasks` for `harness/planning/decompose` and `harness/planning/hypothesis
|
|
71
|
+
One `subagent` call with `tasks` for `harness/planning/decompose` and `harness/planning/hypothesis` (include scout YAML paths in task text). Gate with `harness_artifact_ready` on `artifacts/decomposition.yaml` and `artifacts/hypothesis.yaml`.
|
|
70
72
|
|
|
71
73
|
Decompose **prior_art** is **internal only** (from scouts). External prior art arrives in Phase 3.5.
|
|
72
74
|
|
|
@@ -84,8 +86,8 @@ Decompose **prior_art** is **internal only** (from scouts). External prior art a
|
|
|
84
86
|
}
|
|
85
87
|
```
|
|
86
88
|
|
|
87
|
-
- `
|
|
88
|
-
- Merge both into `research-brief.yaml` (`implementation:` + `stack:`)
|
|
89
|
+
- Subagents write via `submit_implementation_research` / `submit_stack_brief`; gate with `harness_artifact_ready` on both paths.
|
|
90
|
+
- Merge both into `research-brief.yaml` (`implementation:` + `stack:`) via parent `write_harness_yaml`.
|
|
89
91
|
- **Partial failure:** if one lane fails, re-spawn that lane once; if still failing set `plan_status: partial` and `human_required` via `ask_user`. Do not proceed to Phase 4b without both artifacts or explicit human waiver.
|
|
90
92
|
- **Web dedup:** implementation owns patterns/repos; stack owns libraries/versions — no overlapping queries.
|
|
91
93
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Execute only against an approved PlanPacket with strict phase gates.
|
|
3
|
-
argument-hint: "
|
|
3
|
+
argument-hint: ""
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-run
|
|
@@ -9,7 +9,7 @@ Orchestrator only — spawn `harness/executor`. Do **not** implement inline.
|
|
|
9
9
|
|
|
10
10
|
## Step 0 — Parse arguments
|
|
11
11
|
|
|
12
|
-
-
|
|
12
|
+
- `--budget` is reserved/no-op (telemetry-only budgets by default)
|
|
13
13
|
- Do **not** use `--plan` on happy path — load from `[HarnessActivePlan]` / `plan_packet_path`.
|
|
14
14
|
|
|
15
15
|
If plan not ready:
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,16 @@ All notable changes to this project are documented in this file.
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
+
## [v0.16.0] — 2026-05-19
|
|
8
|
+
|
|
9
|
+
### ✨ Features
|
|
10
|
+
|
|
11
|
+
- add submit pipeline and planning/debate updates
|
|
12
|
+
|
|
13
|
+
### 🔧 Chores
|
|
14
|
+
|
|
15
|
+
- refresh graph artifacts after harness updates
|
|
16
|
+
|
|
7
17
|
## [v0.15.0] — 2026-05-19
|
|
8
18
|
|
|
9
19
|
### ✨ Features
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ultimate-pi",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.16.0",
|
|
4
4
|
"description": "Ultimate AI coding harness for pi.dev — extensible skills, Obsidian wiki knowledge layer, compressed context, deterministic output",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|
|
@@ -84,7 +84,7 @@
|
|
|
84
84
|
"format": "biome format --write",
|
|
85
85
|
"format:check": "biome format",
|
|
86
86
|
"prepare": "lefthook install",
|
|
87
|
-
"test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagent-precheck.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture && npx -y tsx --test test/harness-vcc-settings.test.ts test/harness-live-widget-status.test.ts test/harness-plan-phase-policy.test.mjs test/harness-subagent-policy.test.mjs test/harness-spawn-budget.test.mjs test/harness-turn-routing.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs test/debate-plan-phase.test.mjs test/plan-debate-eligibility.test.mjs test/plan-messenger-gate.test.mjs test/plan-debate-lane-apply.test.mjs",
|
|
87
|
+
"test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagent-precheck.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture && npx -y tsx --test test/harness-vcc-settings.test.ts test/harness-live-widget-status.test.ts test/harness-plan-phase-policy.test.mjs test/harness-subagent-policy.test.mjs test/harness-spawn-budget.test.mjs test/harness-spawn-parse.test.mjs test/harness-schema-validate.test.mjs test/harness-turn-routing.test.mjs test/harness-budget-enforce.test.mjs test/harness-submit-policy.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs test/debate-plan-phase.test.mjs test/plan-debate-eligibility.test.mjs test/plan-messenger-gate.test.mjs test/plan-debate-lane-apply.test.mjs",
|
|
88
88
|
"test:vcc": "npx -y tsx --test vendor/pi-vcc/tests/*.test.ts",
|
|
89
89
|
"harness:sentrux-bootstrap": "node .pi/scripts/harness-sentrux-bootstrap.mjs",
|
|
90
90
|
"harness:sentrux-sync": "node .pi/scripts/sentrux-rules-sync.mjs --force",
|
|
@@ -103,6 +103,8 @@
|
|
|
103
103
|
},
|
|
104
104
|
"dependencies": {
|
|
105
105
|
"@posthog/pi": "latest",
|
|
106
|
+
"ajv": "^8.17.1",
|
|
107
|
+
"ajv-formats": "^3.0.1",
|
|
106
108
|
"croner": "^9.0.0",
|
|
107
109
|
"jimp": "^1.6.1",
|
|
108
110
|
"nanoid": "^5.1.5",
|
|
@@ -42,6 +42,13 @@ export interface SpawnAuthForward {
|
|
|
42
42
|
|
|
43
43
|
export interface HarnessSubagentsOptions {
|
|
44
44
|
packageRoot?: string;
|
|
45
|
+
/** Absolute path to harness-subagent-submit.ts for subprocess-only extension loading (Option A). */
|
|
46
|
+
harnessSubprocessExtensionPath?: string;
|
|
47
|
+
/** Extra env vars per subprocess (e.g. HARNESS_RUN_ID, HARNESS_RUN_DIR). */
|
|
48
|
+
resolveSubprocessEnv?: (
|
|
49
|
+
task: string,
|
|
50
|
+
agent: AgentConfig,
|
|
51
|
+
) => Record<string, string> | undefined;
|
|
45
52
|
defaultAgentScope?: AgentScope;
|
|
46
53
|
defaultConfirmProjectAgents?: boolean;
|
|
47
54
|
beforeExecute?: (
|
|
@@ -388,8 +395,11 @@ function terminateProcess(proc: ReturnType<typeof spawn>) {
|
|
|
388
395
|
|
|
389
396
|
type OnUpdateCallback = (partial: AgentToolResult<SubagentDetails>) => void;
|
|
390
397
|
|
|
391
|
-
function buildSpawnEnv(
|
|
392
|
-
|
|
398
|
+
function buildSpawnEnv(
|
|
399
|
+
packageRoot?: string,
|
|
400
|
+
extra?: Record<string, string>,
|
|
401
|
+
): NodeJS.ProcessEnv {
|
|
402
|
+
const env = { ...process.env, ...extra };
|
|
393
403
|
env.PI_HARNESS_SUBPROCESS = "1";
|
|
394
404
|
if (packageRoot) {
|
|
395
405
|
env.UP_PKG = packageRoot;
|
|
@@ -411,6 +421,7 @@ async function runSingleAgent(
|
|
|
411
421
|
makeDetails: (results: SingleResult[]) => SubagentDetails,
|
|
412
422
|
packageRoot?: string,
|
|
413
423
|
spawnAuth?: SpawnAuthForward,
|
|
424
|
+
subagentsOptions?: HarnessSubagentsOptions,
|
|
414
425
|
): Promise<SingleResult> {
|
|
415
426
|
const agent = agents.find((a) => a.name === agentName);
|
|
416
427
|
|
|
@@ -434,8 +445,15 @@ async function runSingleAgent(
|
|
|
434
445
|
else if (spawnAuth) args.push("--model", spawnAuth.modelRef);
|
|
435
446
|
if (spawnAuth?.apiKey) args.push("--api-key", spawnAuth.apiKey);
|
|
436
447
|
if (agent.thinking) args.push("--thinking", agent.thinking);
|
|
448
|
+
const harnessExt =
|
|
449
|
+
agent.extensionsOff &&
|
|
450
|
+
agent.name.startsWith("harness/") &&
|
|
451
|
+
subagentsOptions?.harnessSubprocessExtensionPath;
|
|
437
452
|
if (agent.extensionsOff) {
|
|
438
453
|
args.push("--no-extensions");
|
|
454
|
+
if (harnessExt) {
|
|
455
|
+
args.push("-e", harnessExt);
|
|
456
|
+
}
|
|
439
457
|
if (agent.skillsOff) args.push("--no-skills");
|
|
440
458
|
}
|
|
441
459
|
if (agent.tools && agent.tools.length > 0) {
|
|
@@ -443,7 +461,11 @@ async function runSingleAgent(
|
|
|
443
461
|
} else if (agent.extensionsOff) {
|
|
444
462
|
args.push("--no-tools");
|
|
445
463
|
}
|
|
446
|
-
const
|
|
464
|
+
const extraEnv = subagentsOptions?.resolveSubprocessEnv?.(task, agent);
|
|
465
|
+
const spawnEnv = buildSpawnEnv(packageRoot, {
|
|
466
|
+
...extraEnv,
|
|
467
|
+
HARNESS_AGENT_ID: agent.name,
|
|
468
|
+
});
|
|
447
469
|
|
|
448
470
|
let tmpPromptDir: string | null = null;
|
|
449
471
|
let tmpPromptPath: string | null = null;
|
|
@@ -856,6 +878,7 @@ export function createSubagentsExtension(
|
|
|
856
878
|
makeDetails("chain"),
|
|
857
879
|
packageRoot,
|
|
858
880
|
await resolveSpawnAuth(step.agent),
|
|
881
|
+
options,
|
|
859
882
|
);
|
|
860
883
|
results.push(result);
|
|
861
884
|
|
|
@@ -950,6 +973,7 @@ export function createSubagentsExtension(
|
|
|
950
973
|
makeDetails("parallel"),
|
|
951
974
|
packageRoot,
|
|
952
975
|
await resolveSpawnAuth(t.agent),
|
|
976
|
+
options,
|
|
953
977
|
);
|
|
954
978
|
allResults[index] = result;
|
|
955
979
|
doneCount += 1;
|
|
@@ -987,6 +1011,7 @@ export function createSubagentsExtension(
|
|
|
987
1011
|
makeDetails("parallel"),
|
|
988
1012
|
packageRoot,
|
|
989
1013
|
await resolveSpawnAuth(aggregator.agent),
|
|
1014
|
+
options,
|
|
990
1015
|
);
|
|
991
1016
|
}
|
|
992
1017
|
|
|
@@ -1038,6 +1063,7 @@ export function createSubagentsExtension(
|
|
|
1038
1063
|
makeDetails("single"),
|
|
1039
1064
|
packageRoot,
|
|
1040
1065
|
await resolveSpawnAuth(params.agent),
|
|
1066
|
+
options,
|
|
1041
1067
|
);
|
|
1042
1068
|
const isError = result.exitCode !== 0 || result.stopReason === "error" || result.stopReason === "aborted";
|
|
1043
1069
|
if (isError) {
|