ultimate-pi 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/.agents/skills/harness-governor/SKILL.md +11 -0
  2. package/.agents/skills/harness-orchestration/SKILL.md +3 -1
  3. package/.agents/skills/harness-plan/SKILL.md +5 -5
  4. package/.pi/agents/harness/adversary.md +1 -1
  5. package/.pi/agents/harness/evaluator.md +1 -1
  6. package/.pi/agents/harness/executor.md +1 -1
  7. package/.pi/agents/harness/incident-recorder.md +1 -1
  8. package/.pi/agents/harness/meta-optimizer.md +1 -1
  9. package/.pi/agents/harness/planning/decompose.md +4 -33
  10. package/.pi/agents/harness/planning/execution-plan-author.md +3 -2
  11. package/.pi/agents/harness/planning/hypothesis-validator.md +3 -2
  12. package/.pi/agents/harness/planning/hypothesis.md +4 -27
  13. package/.pi/agents/harness/planning/implementation-researcher.md +3 -2
  14. package/.pi/agents/harness/planning/plan-adversary.md +2 -3
  15. package/.pi/agents/harness/planning/plan-evaluator.md +3 -2
  16. package/.pi/agents/harness/planning/review-integrator.md +2 -3
  17. package/.pi/agents/harness/planning/scout-graphify.md +3 -22
  18. package/.pi/agents/harness/planning/scout-semantic.md +3 -18
  19. package/.pi/agents/harness/planning/scout-structure.md +3 -18
  20. package/.pi/agents/harness/planning/sprint-contract-auditor.md +3 -2
  21. package/.pi/agents/harness/planning/stack-researcher.md +3 -2
  22. package/.pi/agents/harness/tie-breaker.md +1 -1
  23. package/.pi/agents/harness/trace-librarian.md +1 -1
  24. package/.pi/extensions/budget-guard.ts +33 -19
  25. package/.pi/extensions/harness-debate-tools.ts +42 -3
  26. package/.pi/extensions/harness-run-context.ts +96 -2
  27. package/.pi/extensions/harness-subagent-submit.ts +195 -0
  28. package/.pi/extensions/lib/debate-bus-core.ts +42 -5
  29. package/.pi/extensions/lib/harness-subagent-policy.ts +45 -0
  30. package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +82 -0
  31. package/.pi/extensions/lib/harness-subagent-submit-registry.ts +172 -0
  32. package/.pi/extensions/lib/harness-subagents-bridge.ts +42 -0
  33. package/.pi/extensions/lib/plan-debate-gate.ts +12 -1
  34. package/.pi/extensions/lib/plan-debate-lane.ts +15 -0
  35. package/.pi/harness/agents.manifest.json +22 -22
  36. package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md +31 -0
  37. package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md +23 -0
  38. package/.pi/harness/docs/adrs/README.md +2 -0
  39. package/.pi/harness/specs/harness-executor-handoff.schema.json +19 -0
  40. package/.pi/harness/specs/harness-human-required.schema.json +16 -0
  41. package/.pi/harness/specs/plan-scout-findings.schema.json +19 -0
  42. package/.pi/lib/harness-agent-output.ts +45 -0
  43. package/.pi/lib/harness-budget-enforce.ts +18 -0
  44. package/.pi/lib/harness-schema-validate.ts +89 -0
  45. package/.pi/lib/harness-spawn-parse.ts +86 -0
  46. package/.pi/lib/harness-subagent-submit-path.ts +41 -0
  47. package/.pi/lib/harness-ui-state.ts +15 -2
  48. package/.pi/prompts/harness-auto.md +2 -2
  49. package/.pi/prompts/harness-plan.md +9 -7
  50. package/.pi/prompts/harness-run.md +2 -2
  51. package/.pi/scripts/harness-verify.mjs +2 -0
  52. package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
  53. package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
  54. package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
  55. package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
  56. package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
  57. package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
  58. package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
  59. package/CHANGELOG.md +10 -0
  60. package/package.json +4 -2
  61. package/vendor/pi-subagents/src/subagents.ts +29 -3
@@ -0,0 +1,89 @@
1
+ /**
2
+ * JSON Schema validation for harness submit tools (Ajv draft 2020-12, offline).
3
+ */
4
+
5
+ import { appendFile, readFile } from "node:fs/promises";
6
+ import { join } from "node:path";
7
+ import Ajv2020 from "ajv/dist/2020";
8
+ import addFormats from "ajv-formats";
9
+
10
+ type ValidateFn = (data: unknown) => boolean;
11
+
12
+ const compileCache = new Map<string, ValidateFn>();
13
+ const DEBUG_LOG_PATH =
14
+ "/home/aryaniyaps/ai-projects/ultimate-pi/.cursor/debug-2ca12b.log";
15
+
16
+ let ajvSingleton: InstanceType<typeof Ajv2020> | null = null;
17
+
18
+ function getAjv(): InstanceType<typeof Ajv2020> {
19
+ if (!ajvSingleton) {
20
+ ajvSingleton = new Ajv2020({
21
+ allErrors: true,
22
+ strict: false,
23
+ validateSchema: false,
24
+ });
25
+ addFormats(ajvSingleton);
26
+ }
27
+ return ajvSingleton;
28
+ }
29
+
30
+ async function debugLog(
31
+ hypothesisId: string,
32
+ message: string,
33
+ data: Record<string, unknown>,
34
+ ): Promise<void> {
35
+ // #region agent log
36
+ try {
37
+ await appendFile(
38
+ DEBUG_LOG_PATH,
39
+ `${JSON.stringify({
40
+ sessionId: "2ca12b",
41
+ hypothesisId,
42
+ location: "harness-schema-validate.ts",
43
+ message,
44
+ data,
45
+ timestamp: Date.now(),
46
+ })}\n`,
47
+ );
48
+ } catch {
49
+ /* ignore */
50
+ }
51
+ // #endregion
52
+ }
53
+
54
+ export async function validateAgainstHarnessSchema(
55
+ specsDir: string,
56
+ schemaFile: string,
57
+ document: unknown,
58
+ ): Promise<{ ok: true } | { ok: false; errors: string[] }> {
59
+ const cacheKey = `${specsDir}:${schemaFile}`;
60
+ let validate = compileCache.get(cacheKey);
61
+ if (!validate) {
62
+ const schemaPath = join(specsDir, schemaFile);
63
+ const raw = await readFile(schemaPath, "utf-8");
64
+ const schema = JSON.parse(raw) as Record<string, unknown>;
65
+ try {
66
+ const ajv = getAjv();
67
+ const compiled = ajv.compile(schema);
68
+ validate = compiled;
69
+ compileCache.set(cacheKey, compiled);
70
+ await debugLog("H3", "schema compile ok", { schemaFile });
71
+ } catch (err) {
72
+ const msg = err instanceof Error ? err.message : String(err);
73
+ await debugLog("H3", "schema compile failed", { schemaFile, error: msg });
74
+ return { ok: false, errors: [`schema compile failed: ${msg}`] };
75
+ }
76
+ }
77
+ const ok = validate(document);
78
+ if (ok) return { ok: true };
79
+ const errors = (
80
+ (
81
+ validate as {
82
+ errors?: Array<{ instancePath?: string; message?: string }>;
83
+ }
84
+ ).errors ?? []
85
+ ).map((e: { instancePath?: string; message?: string }) =>
86
+ `${e.instancePath || "/"} ${e.message ?? "invalid"}`.trim(),
87
+ );
88
+ return { ok: false, errors };
89
+ }
@@ -0,0 +1,86 @@
1
+ /**
2
+ * Parse HarnessSpawnContext embedded in subagent task strings.
3
+ */
4
+
5
+ const SPAWN_CTX_EQ_RE = /HarnessSpawnContext\s*=\s*(\{[\s\S]*?\})(?:\s|$|\.)/;
6
+
7
+ export interface ParsedSpawnContext {
8
+ run_id?: string;
9
+ run_dir?: string;
10
+ agent?: string;
11
+ plan_packet_path?: string;
12
+ }
13
+
14
+ function extractBalancedJsonObject(s: string, start: number): string | null {
15
+ if (s[start] !== "{") return null;
16
+ let depth = 0;
17
+ let inString = false;
18
+ let escaped = false;
19
+ for (let i = start; i < s.length; i++) {
20
+ const ch = s[i];
21
+ if (inString) {
22
+ if (escaped) escaped = false;
23
+ else if (ch === "\\") escaped = true;
24
+ else if (ch === '"') inString = false;
25
+ continue;
26
+ }
27
+ if (ch === '"') {
28
+ inString = true;
29
+ continue;
30
+ }
31
+ if (ch === "{") depth++;
32
+ else if (ch === "}") {
33
+ depth--;
34
+ if (depth === 0) return s.slice(start, i + 1);
35
+ }
36
+ }
37
+ return null;
38
+ }
39
+
40
+ function normalizeSpawnContext(parsed: unknown): ParsedSpawnContext | null {
41
+ if (!parsed || typeof parsed !== "object") return null;
42
+ const o = parsed as Record<string, unknown>;
43
+ const run_id = typeof o.run_id === "string" ? o.run_id : undefined;
44
+ const run_dir = typeof o.run_dir === "string" ? o.run_dir : undefined;
45
+ const agent = typeof o.agent === "string" ? o.agent : undefined;
46
+ const plan_packet_path =
47
+ typeof o.plan_packet_path === "string" ? o.plan_packet_path : undefined;
48
+ if (!run_id && !run_dir) return null;
49
+ return { run_id, run_dir, agent, plan_packet_path };
50
+ }
51
+
52
+ export function parseSpawnContextFromTask(
53
+ task: string,
54
+ ): ParsedSpawnContext | null {
55
+ const eqMatch = SPAWN_CTX_EQ_RE.exec(task);
56
+ if (eqMatch?.[1]) {
57
+ try {
58
+ return normalizeSpawnContext(JSON.parse(eqMatch[1]));
59
+ } catch {
60
+ // fall through to JSON-object forms
61
+ }
62
+ }
63
+
64
+ const firstBrace = task.indexOf("{");
65
+ if (firstBrace >= 0) {
66
+ const blob = extractBalancedJsonObject(task, firstBrace);
67
+ if (blob) {
68
+ try {
69
+ const outer = JSON.parse(blob) as Record<string, unknown>;
70
+ if (
71
+ outer.HarnessSpawnContext &&
72
+ typeof outer.HarnessSpawnContext === "object"
73
+ ) {
74
+ return normalizeSpawnContext(outer.HarnessSpawnContext);
75
+ }
76
+ if (typeof outer.run_id === "string") {
77
+ return normalizeSpawnContext(outer);
78
+ }
79
+ } catch {
80
+ // ignore
81
+ }
82
+ }
83
+ }
84
+
85
+ return null;
86
+ }
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Resolve and guard harness run directories for subagent submit tools.
3
+ */
4
+
5
+ import { realpath } from "node:fs/promises";
6
+ import { join, resolve } from "node:path";
7
+
8
+ export function harnessRunsRoot(projectRoot: string): string {
9
+ return join(projectRoot, ".pi", "harness", "runs");
10
+ }
11
+
12
+ export async function resolveGuardedRunDir(opts: {
13
+ projectRoot: string;
14
+ runId: string;
15
+ runDirEnv?: string;
16
+ }): Promise<{ ok: true; runDir: string } | { ok: false; error: string }> {
17
+ const { projectRoot, runId } = opts;
18
+ if (!runId.trim()) {
19
+ return { ok: false, error: "run_id is required" };
20
+ }
21
+ const expected = join(harnessRunsRoot(projectRoot), runId);
22
+ let candidate = opts.runDirEnv?.trim()
23
+ ? resolve(projectRoot, opts.runDirEnv)
24
+ : expected;
25
+ try {
26
+ candidate = await realpath(candidate);
27
+ const expectedReal = await realpath(expected);
28
+ if (
29
+ candidate !== expectedReal &&
30
+ !candidate.startsWith(`${expectedReal}/`)
31
+ ) {
32
+ return {
33
+ ok: false,
34
+ error: `run_dir must stay under ${expectedReal}`,
35
+ };
36
+ }
37
+ return { ok: true, runDir: candidate };
38
+ } catch {
39
+ return { ok: false, error: `run directory not found for run_id=${runId}` };
40
+ }
41
+ }
@@ -1,4 +1,5 @@
1
1
  import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
2
+ import { shouldEmitBlockingBudgetExhausted } from "./harness-budget-enforce.js";
2
3
 
3
4
  export type HarnessPhase =
4
5
  | "plan"
@@ -133,6 +134,9 @@ const RELEVANT_CUSTOM_TYPES = new Set([
133
134
  "harness-consensus-packet",
134
135
  "harness-round-result",
135
136
  "harness-budget-exhausted",
137
+ "harness-budget-soft-limit",
138
+ "harness-budget-telemetry",
139
+ "harness-debate-budget-telemetry",
136
140
  "harness-review-integrity",
137
141
  "harness-test-integrity-flag",
138
142
  "harness-run-trace",
@@ -189,7 +193,7 @@ function deriveFlowSubstate(state: HarnessUiState): HarnessFlowSubstate {
189
193
  return "idle";
190
194
  }
191
195
 
192
- function createStateFromEntries(entries: unknown[]): HarnessUiState {
196
+ export function createStateFromEntries(entries: unknown[]): HarnessUiState {
193
197
  const latest = pickLatestCustomEntries(entries);
194
198
  const state: HarnessUiState = {
195
199
  ...DEFAULT_STATE,
@@ -212,7 +216,7 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
212
216
  const budget = latest.get("harness-budget-exhausted") as
213
217
  | BudgetExhaustedLike
214
218
  | undefined;
215
- if (budget) {
219
+ if (budget && shouldEmitBlockingBudgetExhausted()) {
216
220
  state.budgetExhausted = true;
217
221
  state.budgetReason =
218
222
  typeof budget.exhaustion_reason === "string"
@@ -223,6 +227,15 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
223
227
  const cap = asNumber(budget.caps?.debate_global_cap);
224
228
  if (cap != null) state.debateBudgetCap = cap;
225
229
  }
230
+ const telemetry = latest.get("harness-budget-telemetry") as
231
+ | BudgetExhaustedLike
232
+ | undefined;
233
+ if (telemetry && !state.budgetExhausted) {
234
+ const budgetUsed = asNumber(telemetry.budget_used);
235
+ if (budgetUsed != null) state.debateBudgetUsed = budgetUsed;
236
+ const cap = asNumber(telemetry.caps?.debate_global_cap);
237
+ if (cap != null) state.debateBudgetCap = cap;
238
+ }
226
239
 
227
240
  const testIntegrity = latest.get("harness-test-integrity-flag") as
228
241
  | TestIntegrityLike
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Full strict harness pipeline with locked governance decisions.
3
- argument-hint: "\"<task>\" [--quick] [--risk low|med|high] [--budget <amount>]"
3
+ argument-hint: "\"<task>\" [--quick] [--risk low|med|high]"
4
4
  ---
5
5
 
6
6
  # harness-auto
@@ -10,7 +10,7 @@ Pipeline orchestrator — one session, sequential phase handoffs. Invoke **harne
10
10
  ## Step 0 — Parse arguments
11
11
 
12
12
  - required task (quoted or first token)
13
- - optional: `--quick`, `--risk`, `--budget`
13
+ - optional: `--quick`, `--risk` (`--budget` reserved/no-op)
14
14
 
15
15
  If task missing:
16
16
 
@@ -1,13 +1,13 @@
1
1
  ---
2
2
  description: PM-grade harness plan — scouts, implementation research, ExecutionPlan, DAG validation, selective Review Gate debate, approval.
3
- argument-hint: "\"<task>\" [--risk low|med|high] [--budget <amount>] [--quick]"
3
+ argument-hint: "\"<task>\" [--risk low|med|high] [--quick]"
4
4
  ---
5
5
 
6
6
  # harness-plan
7
7
 
8
8
  You are the **planning PM** for this harness run. Produce an execution baseline (`plan-packet.yaml` + `plan-review.md`), not strategy theater. Parent owns `ask_user`, `approve_plan`, `create_plan`, debate bus commands, and YAML writes under `.pi/harness/runs/<run_id>/`.
9
9
 
10
- Never `write`/`edit` the final canonical packet except via **`write_harness_yaml`** for run artifacts and **`create_plan`** after approval. Do not paste JSON into `.yaml` files subagents emit JSON; you convert via `write_harness_yaml`.
10
+ Subagents persist artifacts via scoped **`submit_*`** tools (deterministic YAML under the run dir). Parent uses **`harness_artifact_ready`** to gate phases (no JSON parsing). Parent merges still use **`write_harness_yaml`** for `research-brief.yaml`, `plan-packet.yaml` shell, and integrator patches only.
11
11
 
12
12
  ## Allowed subagents
13
13
 
@@ -33,12 +33,12 @@ Read **harness-debate-plan** skill before Review Gate rounds.
33
33
  2. Each `subagent` call blocks until subprocesses finish — batch parallel scouts in one `tasks` array.
34
34
  3. Do **not** set `timeoutMs` unless the user explicitly requests a cap — subagents run until natural completion (optional backstop: `PI_SUBAGENT_TIMEOUT_MS`).
35
35
  4. No harness subagent spawn cap — run the full scout + research + debate pipeline without skipping lanes for budget.
36
- 5. Compact task text: embed `HarnessSpawnContext` JSON + lane-specific instructions only.
36
+ 5. Compact task text: embed spawn context + lane instructions. Prefer `HarnessSpawnContext={"run_id":"…","plan_packet_path":"…",…}` or a JSON object with `"HarnessSpawnContext":{…}` — both parse; `run_id` is required so subprocess submit tools get `HARNESS_RUN_ID`.
37
37
 
38
38
  ## Step 0 — Parse `$ARGUMENTS`
39
39
 
40
40
  - task (required)
41
- - `--risk low|med|high`, `--budget`, `--quick`
41
+ - `--risk low|med|high`, `--quick` (`--budget` is reserved/no-op; token budgets are telemetry-only unless `HARNESS_BUDGET_ENFORCE=1`)
42
42
 
43
43
  `--quick` skips **scout-semantic** and post-run adversary only — **never** skip graphify, structure, decompose, hypothesis, **Phase 3.5 implementation research**, stack research, execution plan, DAG validation, or **Review Gate debate**.
44
44
 
@@ -64,9 +64,11 @@ Do **not** run `ccc index` or `ccc search --refresh`. The harness runs increment
64
64
 
65
65
  Add `harness/planning/scout-semantic` to `tasks` unless `--quick`. Require graphify + structure success. Semantic lane uses `ccc search` only (see `scout-semantic` agent).
66
66
 
67
+ After scouts: `harness_artifact_ready({ paths: ["artifacts/scout-graphify.yaml", "artifacts/scout-structure.yaml", ...] })`.
68
+
67
69
  ## Phase 2 & 3 — Decompose + hypothesis (parallel)
68
70
 
69
- One `subagent` call with `tasks` for `harness/planning/decompose` and `harness/planning/hypothesis`. Parse `PlanDecompositionBrief` and `PlanHypothesisBrief` from outputs. Persist with `write_harness_yaml` `artifacts/decomposition.yaml` and `artifacts/hypothesis.yaml`.
71
+ One `subagent` call with `tasks` for `harness/planning/decompose` and `harness/planning/hypothesis` (include scout YAML paths in task text). Gate with `harness_artifact_ready` on `artifacts/decomposition.yaml` and `artifacts/hypothesis.yaml`.
70
72
 
71
73
  Decompose **prior_art** is **internal only** (from scouts). External prior art arrives in Phase 3.5.
72
74
 
@@ -84,8 +86,8 @@ Decompose **prior_art** is **internal only** (from scouts). External prior art a
84
86
  }
85
87
  ```
86
88
 
87
- - `write_harness_yaml` `artifacts/implementation-research.yaml` and `artifacts/stack.yaml`.
88
- - Merge both into `research-brief.yaml` (`implementation:` + `stack:`).
89
+ - Subagents write via `submit_implementation_research` / `submit_stack_brief`; gate with `harness_artifact_ready` on both paths.
90
+ - Merge both into `research-brief.yaml` (`implementation:` + `stack:`) via parent `write_harness_yaml`.
89
91
  - **Partial failure:** if one lane fails, re-spawn that lane once; if still failing set `plan_status: partial` and `human_required` via `ask_user`. Do not proceed to Phase 4b without both artifacts or explicit human waiver.
90
92
  - **Web dedup:** implementation owns patterns/repos; stack owns libraries/versions — no overlapping queries.
91
93
 
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Execute only against an approved PlanPacket with strict phase gates.
3
- argument-hint: "[--budget <amount>]"
3
+ argument-hint: ""
4
4
  ---
5
5
 
6
6
  # harness-run
@@ -9,7 +9,7 @@ Orchestrator only — spawn `harness/executor`. Do **not** implement inline.
9
9
 
10
10
  ## Step 0 — Parse arguments
11
11
 
12
- - optional: `--budget <amount>`
12
+ - `--budget` is reserved/no-op (telemetry-only budgets by default)
13
13
  - Do **not** use `--plan` on happy path — load from `[HarnessActivePlan]` / `plan_packet_path`.
14
14
 
15
15
  If plan not ready:
@@ -37,6 +37,8 @@ const REQUIRED_ADRS = [
37
37
  "0009-sentrux-rules-lifecycle.md",
38
38
  "0031-harness-run-context.md",
39
39
  "0032-harness-command-orchestration.md",
40
+ "0037-subagent-submit-tools.md",
41
+ "0038-budget-telemetry-only.md",
40
42
  ];
41
43
 
42
44
  const REQUIRED_EXTENSIONS = [
package/CHANGELOG.md CHANGED
@@ -4,6 +4,16 @@ All notable changes to this project are documented in this file.
4
4
 
5
5
  ## [Unreleased]
6
6
 
7
+ ## [v0.16.0] — 2026-05-19
8
+
9
+ ### ✨ Features
10
+
11
+ - add submit pipeline and planning/debate updates
12
+
13
+ ### 🔧 Chores
14
+
15
+ - refresh graph artifacts after harness updates
16
+
7
17
  ## [v0.15.0] — 2026-05-19
8
18
 
9
19
  ### ✨ Features
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ultimate-pi",
3
- "version": "0.15.0",
3
+ "version": "0.16.0",
4
4
  "description": "Ultimate AI coding harness for pi.dev — extensible skills, Obsidian wiki knowledge layer, compressed context, deterministic output",
5
5
  "keywords": [
6
6
  "pi-package",
@@ -84,7 +84,7 @@
84
84
  "format": "biome format --write",
85
85
  "format:check": "biome format",
86
86
  "prepare": "lefthook install",
87
- "test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagent-precheck.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture && npx -y tsx --test test/harness-vcc-settings.test.ts test/harness-live-widget-status.test.ts test/harness-plan-phase-policy.test.mjs test/harness-subagent-policy.test.mjs test/harness-spawn-budget.test.mjs test/harness-turn-routing.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs test/debate-plan-phase.test.mjs test/plan-debate-eligibility.test.mjs test/plan-messenger-gate.test.mjs test/plan-debate-lane-apply.test.mjs",
87
+ "test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagent-precheck.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture && npx -y tsx --test test/harness-vcc-settings.test.ts test/harness-live-widget-status.test.ts test/harness-plan-phase-policy.test.mjs test/harness-subagent-policy.test.mjs test/harness-spawn-budget.test.mjs test/harness-spawn-parse.test.mjs test/harness-schema-validate.test.mjs test/harness-turn-routing.test.mjs test/harness-budget-enforce.test.mjs test/harness-submit-policy.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs test/debate-plan-phase.test.mjs test/plan-debate-eligibility.test.mjs test/plan-messenger-gate.test.mjs test/plan-debate-lane-apply.test.mjs",
88
88
  "test:vcc": "npx -y tsx --test vendor/pi-vcc/tests/*.test.ts",
89
89
  "harness:sentrux-bootstrap": "node .pi/scripts/harness-sentrux-bootstrap.mjs",
90
90
  "harness:sentrux-sync": "node .pi/scripts/sentrux-rules-sync.mjs --force",
@@ -103,6 +103,8 @@
103
103
  },
104
104
  "dependencies": {
105
105
  "@posthog/pi": "latest",
106
+ "ajv": "^8.17.1",
107
+ "ajv-formats": "^3.0.1",
106
108
  "croner": "^9.0.0",
107
109
  "jimp": "^1.6.1",
108
110
  "nanoid": "^5.1.5",
@@ -42,6 +42,13 @@ export interface SpawnAuthForward {
42
42
 
43
43
  export interface HarnessSubagentsOptions {
44
44
  packageRoot?: string;
45
+ /** Absolute path to harness-subagent-submit.ts for subprocess-only extension loading (Option A). */
46
+ harnessSubprocessExtensionPath?: string;
47
+ /** Extra env vars per subprocess (e.g. HARNESS_RUN_ID, HARNESS_RUN_DIR). */
48
+ resolveSubprocessEnv?: (
49
+ task: string,
50
+ agent: AgentConfig,
51
+ ) => Record<string, string> | undefined;
45
52
  defaultAgentScope?: AgentScope;
46
53
  defaultConfirmProjectAgents?: boolean;
47
54
  beforeExecute?: (
@@ -388,8 +395,11 @@ function terminateProcess(proc: ReturnType<typeof spawn>) {
388
395
 
389
396
  type OnUpdateCallback = (partial: AgentToolResult<SubagentDetails>) => void;
390
397
 
391
- function buildSpawnEnv(packageRoot?: string): NodeJS.ProcessEnv {
392
- const env = { ...process.env };
398
+ function buildSpawnEnv(
399
+ packageRoot?: string,
400
+ extra?: Record<string, string>,
401
+ ): NodeJS.ProcessEnv {
402
+ const env = { ...process.env, ...extra };
393
403
  env.PI_HARNESS_SUBPROCESS = "1";
394
404
  if (packageRoot) {
395
405
  env.UP_PKG = packageRoot;
@@ -411,6 +421,7 @@ async function runSingleAgent(
411
421
  makeDetails: (results: SingleResult[]) => SubagentDetails,
412
422
  packageRoot?: string,
413
423
  spawnAuth?: SpawnAuthForward,
424
+ subagentsOptions?: HarnessSubagentsOptions,
414
425
  ): Promise<SingleResult> {
415
426
  const agent = agents.find((a) => a.name === agentName);
416
427
 
@@ -434,8 +445,15 @@ async function runSingleAgent(
434
445
  else if (spawnAuth) args.push("--model", spawnAuth.modelRef);
435
446
  if (spawnAuth?.apiKey) args.push("--api-key", spawnAuth.apiKey);
436
447
  if (agent.thinking) args.push("--thinking", agent.thinking);
448
+ const harnessExt =
449
+ agent.extensionsOff &&
450
+ agent.name.startsWith("harness/") &&
451
+ subagentsOptions?.harnessSubprocessExtensionPath;
437
452
  if (agent.extensionsOff) {
438
453
  args.push("--no-extensions");
454
+ if (harnessExt) {
455
+ args.push("-e", harnessExt);
456
+ }
439
457
  if (agent.skillsOff) args.push("--no-skills");
440
458
  }
441
459
  if (agent.tools && agent.tools.length > 0) {
@@ -443,7 +461,11 @@ async function runSingleAgent(
443
461
  } else if (agent.extensionsOff) {
444
462
  args.push("--no-tools");
445
463
  }
446
- const spawnEnv = buildSpawnEnv(packageRoot);
464
+ const extraEnv = subagentsOptions?.resolveSubprocessEnv?.(task, agent);
465
+ const spawnEnv = buildSpawnEnv(packageRoot, {
466
+ ...extraEnv,
467
+ HARNESS_AGENT_ID: agent.name,
468
+ });
447
469
 
448
470
  let tmpPromptDir: string | null = null;
449
471
  let tmpPromptPath: string | null = null;
@@ -856,6 +878,7 @@ export function createSubagentsExtension(
856
878
  makeDetails("chain"),
857
879
  packageRoot,
858
880
  await resolveSpawnAuth(step.agent),
881
+ options,
859
882
  );
860
883
  results.push(result);
861
884
 
@@ -950,6 +973,7 @@ export function createSubagentsExtension(
950
973
  makeDetails("parallel"),
951
974
  packageRoot,
952
975
  await resolveSpawnAuth(t.agent),
976
+ options,
953
977
  );
954
978
  allResults[index] = result;
955
979
  doneCount += 1;
@@ -987,6 +1011,7 @@ export function createSubagentsExtension(
987
1011
  makeDetails("parallel"),
988
1012
  packageRoot,
989
1013
  await resolveSpawnAuth(aggregator.agent),
1014
+ options,
990
1015
  );
991
1016
  }
992
1017
 
@@ -1038,6 +1063,7 @@ export function createSubagentsExtension(
1038
1063
  makeDetails("single"),
1039
1064
  packageRoot,
1040
1065
  await resolveSpawnAuth(params.agent),
1066
+ options,
1041
1067
  );
1042
1068
  const isError = result.exitCode !== 0 || result.stopReason === "error" || result.stopReason === "aborted";
1043
1069
  if (isError) {