npm - ultimate-pi - Versions diffs - 0.10.1 → 0.12.0 - Mend

ultimate-pi 0.10.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (135) hide show

package/.pi/extensions/harness-run-context.ts CHANGED Viewed

@@ -5,13 +5,16 @@
  * in before_agent_start so trace-recorder reuses it on agent_start.
  */
-import { readFile, writeFile } from "node:fs/promises";
+import { mkdir, readFile, writeFile } from "node:fs/promises";
+import { dirname } from "node:path";
 import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
+import { Type } from "@sinclair/typebox";
 import {
 	canonicalPlanPath,
 	createFreshRunContext,
 	driftGateActive,
 	extractCompletionStatuses,
+	extractWritePathFromToolInput,
 	formatActivePlanBlock,
 	formatPlanContextBlock,
 	getLatestHarnessTurn,
@@ -27,10 +30,12 @@ import {
 	isHarnessBootstrapPrompt,
 	isNewTaskPlanBlocked,
 	isPlanApprovalAskUser,
+	isPlanPhaseScopedWrite,
 	isStaleActiveRunPointer,
 	loadProjectActiveRun,
 	loadRunContextFromDisk,
 	nextStepAfterOutcome,
+	normalizeHarnessPath,
 	nowIso,
 	type PlanPacketSummary,
 	parseHarnessSlashInput,
@@ -45,6 +50,11 @@ import {
 	validatePlanOverridePath,
 	validatePlanPacket,
 } from "../lib/harness-run-context.js";
+import {
+	normalizeHarnessYamlContent,
+	parseStructuredDocument,
+	writeYamlFile,
+} from "../lib/harness-yaml.js";
 interface SessionEntryLike {
 	type?: string;
@@ -84,6 +94,32 @@ function appendHarnessTurn(pi: ExtensionAPI, turn: HarnessTurnEntry): void {
 	});
 }
+async function coerceScopedHarnessYamlWrite(
+	event: { toolName: string; input: Record<string, unknown> },
+	runCtx: HarnessRunContext,
+	projectRoot: string,
+): Promise<{ block: true; reason: string } | undefined> {
+	if (event.toolName !== "write") return undefined;
+	const target = extractWritePathFromToolInput(event.input);
+	if (!target.endsWith(".yaml") && !target.endsWith(".yml")) return undefined;
+	const scoped = await isPlanPhaseScopedWrite(target, runCtx, projectRoot);
+	if (!scoped) return undefined;
+	const content = event.input.content;
+	if (typeof content !== "string") return undefined;
+	try {
+		event.input.content = normalizeHarnessYamlContent(content, target);
+	} catch (err) {
+		const msg = err instanceof Error ? err.message : String(err);
+		return {
+			block: true,
+			reason:
+				`harness-run-context: ${target} must be canonical YAML, not embedded JSON. ` +
+				`Use write_harness_yaml with the subagent JSON/YAML block, or paste valid YAML. (${msg})`,
+		};
+	}
+	return undefined;
+}
 function syncPolicyFromPlan(
 	pi: ExtensionAPI,
 	entries: unknown[],
@@ -583,7 +619,7 @@ export default function harnessRunContext(pi: ExtensionAPI) {
 				activeCtx.last_outcome = "needs_clarification";
 				activeCtx.last_completed_step = "plan";
 				const msg =
-					"Plan file exists but user approval was not recorded. Planner must call approve_plan (or bridged ask_user Approve) before writing plan-packet.json.";
+					"Plan file exists but user approval was not recorded. Planner must call approve_plan (or bridged ask_user Approve) before writing plan-packet.yaml.";
 				if (ctx.hasUI) ctx.ui.notify(msg, "warning");
 				else
 					pi.sendMessage({
@@ -671,6 +707,18 @@ export default function harnessRunContext(pi: ExtensionAPI) {
 	});
 	pi.on("tool_call", async (event, ctx) => {
+		if (event.toolName === "write") {
+			const entries = getEntries(ctx);
+			const runCtx = getLatestRunContext(entries) ?? activeCtx;
+			if (runCtx) {
+				const blocked = await coerceScopedHarnessYamlWrite(
+					event,
+					runCtx,
+					process.cwd(),
+				);
+				if (blocked) return blocked;
+			}
+		}
 		if (activeCtx?.plan_packet_path) {
 			const entries = getEntries(ctx);
 			if (hasPlanUserApproval(entries, { sincePlanCommand: true })) {
@@ -707,11 +755,11 @@ export default function harnessRunContext(pi: ExtensionAPI) {
 				(event.input as { filePath?: string }).filePath ??
 				"",
 		);
-		if (target.includes("plan-packet.json")) {
+		if (target.includes("plan-packet.yaml")) {
 			return {
 				block: true,
 				reason:
-					"harness-run-context: plan-packet.json is read-only in evaluate/adversary phases.",
+					"harness-run-context: plan-packet.yaml is read-only in evaluate/adversary phases.",
 			};
 		}
 		return undefined;
@@ -792,7 +840,7 @@ export default function harnessRunContext(pi: ExtensionAPI) {
 	pi.registerCommand("harness-plan-commit", {
 		description:
-			"Write approved plan-packet.json to the active run (requires harness-plan-approval)",
+			"Write approved plan-packet.yaml to the active run (requires harness-plan-approval)",
 		handler: async (args, ctx) => {
 			const projectRoot = process.cwd();
 			const entries = getEntries(ctx);
@@ -816,7 +864,7 @@ export default function harnessRunContext(pi: ExtensionAPI) {
 				})
 			) {
 				const msg =
-					"Plan commit blocked: no user approval recorded. Approve via ask_user in the planner subagent first.";
+					"Plan commit blocked: no user approval recorded. Approve via approve_plan in this session first.";
 				if (ctx.hasUI) ctx.ui.notify(msg, "warning");
 				return;
 			}
@@ -867,6 +915,98 @@ export default function harnessRunContext(pi: ExtensionAPI) {
 		},
 	});
+	pi.registerTool({
+		name: "write_harness_yaml",
+		label: "Write Harness YAML",
+		description:
+			"Write a plan-phase harness artifact as canonical YAML (parses subagent JSON or YAML, never embeds JSON in .yaml files).",
+		promptSnippet:
+			"Persist plan artifacts (decomposition, hypothesis, stack, review rounds) as real YAML.",
+		promptGuidelines: [
+			"Use write_harness_yaml for all artifacts/*.yaml and research-brief.yaml updates during /harness-plan.",
+			"Pass the subagent fenced json or yaml block as content; the tool converts to YAML on disk.",
+			"Do not use write with stringified JSON for .yaml paths.",
+			"plan-packet.yaml after approval: prefer create_plan; write_harness_yaml is for drafts and side artifacts only.",
+		],
+		parameters: Type.Object({
+			path: Type.String({
+				description:
+					"Path under the active run, e.g. artifacts/decomposition.yaml or research-brief.yaml",
+			}),
+			content: Type.String({
+				description:
+					"YAML or JSON document (fenced or raw) matching the artifact schema",
+			}),
+		}),
+		async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
+			const entries = getEntries(ctx);
+			const runCtx = getLatestRunContext(entries) ?? activeCtx;
+			if (!runCtx?.run_id) {
+				return {
+					content: [
+						{
+							type: "text",
+							text: 'No active harness run. Run /harness-plan "<task>" first.',
+						},
+					],
+					details: {},
+					isError: true,
+				};
+			}
+			const pathArg = String((params as { path?: string }).path ?? "").trim();
+			const content = String((params as { content?: string }).content ?? "");
+			if (!pathArg || !content.trim()) {
+				return {
+					content: [
+						{
+							type: "text",
+							text: "write_harness_yaml requires path and content.",
+						},
+					],
+					details: {},
+					isError: true,
+				};
+			}
+			const projectRoot = process.cwd();
+			const absPath = normalizeHarnessPath(pathArg, projectRoot);
+			const scoped = await isPlanPhaseScopedWrite(absPath, runCtx, projectRoot);
+			if (!scoped) {
+				return {
+					content: [
+						{
+							type: "text",
+							text: `Path not allowed: ${pathArg}. Must be under .pi/harness/runs/${runCtx.run_id}/ (artifacts/*.yaml, research-brief.yaml, etc.).`,
+						},
+					],
+					details: { path: pathArg },
+					isError: true,
+				};
+			}
+			let doc: unknown;
+			try {
+				doc = parseStructuredDocument(content, pathArg);
+			} catch (err) {
+				const msg = err instanceof Error ? err.message : String(err);
+				return {
+					content: [{ type: "text", text: msg }],
+					details: { path: pathArg },
+					isError: true,
+				};
+			}
+			await mkdir(dirname(absPath), { recursive: true });
+			await writeYamlFile(absPath, doc);
+			return {
+				content: [
+					{
+						type: "text",
+						text: `Wrote ${pathArg} as canonical YAML.`,
+					},
+				],
+				details: { path: absPath },
+			};
+		},
+	});
 	pi.registerCommand("harness-use-run", {
 		description: "Point this session at an existing run directory (recovery)",
 		handler: async (args, ctx) => {

package/.pi/extensions/harness-subagents.ts CHANGED Viewed

@@ -1,9 +1,14 @@
 /**
- * harness-subagents — package-resolved agents, blackboard, observation-bus handoffs.
+ * harness-subagents — vendored pi-subagents with ultimate-pi discovery and policy gates.
  */
+import { claimExtensionLoad } from "./lib/extension-load-guard.js";
 import { getHarnessPackageRoot } from "./lib/harness-paths.js";
-import { createHarnessSubagentsExtension } from "./lib/harness-subagents/vendored/index.js";
+import { createHarnessSubagentsExtension } from "./lib/harness-subagents-bridge.js";
+// @ts-expect-error pi extensions run as ESM
+const MODULE_URL = import.meta.url;
-export default createHarnessSubagentsExtension(
-	getHarnessPackageRoot(import.meta.url),
-);
+export default claimExtensionLoad("harness-subagents", MODULE_URL)
+	? createHarnessSubagentsExtension(getHarnessPackageRoot(MODULE_URL))
+	: () => {};

package/.pi/extensions/harness-web-tools.ts CHANGED Viewed

@@ -4,6 +4,7 @@
 import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
 import { Type } from "@sinclair/typebox";
+import { claimExtensionLoad } from "./lib/extension-load-guard.js";
 import {
 	harnessWebContextLine,
 	readTextExcerpt,
@@ -97,6 +98,7 @@ function sessionCwd(ctx: { cwd?: string }): string {
 }
 export default function harnessWebTools(pi: ExtensionAPI) {
+	if (!claimExtensionLoad("harness-web-tools", MODULE_URL)) return;
 	pi.on("before_agent_start", async (event) => {
 		return {
 			systemPrompt: `${event.systemPrompt}\n\n${harnessWebContextLine()}`,

package/.pi/extensions/lib/extension-load-guard.ts ADDED Viewed

@@ -0,0 +1,39 @@
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+import { fileURLToPath } from "node:url";
+const LOAD_GUARD_KEY = Symbol.for("ultimate-pi.extension-load-guard");
+type LoadGuardRegistry = Set<string>;
+function getRegistry(): LoadGuardRegistry {
+	const state = globalThis as typeof globalThis & {
+		[LOAD_GUARD_KEY]?: LoadGuardRegistry;
+	};
+	if (!state[LOAD_GUARD_KEY]) {
+		state[LOAD_GUARD_KEY] = new Set<string>();
+	}
+	return state[LOAD_GUARD_KEY];
+}
+function isSourceRepo(): boolean {
+	try {
+		const pkg = JSON.parse(
+			readFileSync(join(process.cwd(), "package.json"), "utf8"),
+		) as { name?: string };
+		return pkg.name === "ultimate-pi";
+	} catch {
+		return false;
+	}
+}
+export function claimExtensionLoad(key: string, moduleUrl: string): boolean {
+	const registry = getRegistry();
+	const modulePath = fileURLToPath(moduleUrl);
+	if (modulePath.includes("/node_modules/ultimate-pi/") && isSourceRepo()) {
+		return false;
+	}
+	if (registry.has(key)) return false;
+	registry.add(key);
+	return true;
+}

package/.pi/extensions/lib/harness-posthog.ts CHANGED Viewed

@@ -22,7 +22,12 @@ export type HarnessPostHogEventName =
 	| "harness_drift_report"
 	| "harness_eval_verdict"
 	| "harness_sentrux_signal"
-	| "harness_observation";
+	| "harness_observation"
+	| "harness_subagent_spawned"
+	| "harness_subagent_completed"
+	| "harness_subagent_result_wait"
+	| "harness_subagent_setup"
+	| "harness_blackboard_op";
 const SCHEMA_VERSION = "1.0.0";

package/.pi/extensions/lib/harness-spawn-budget.ts ADDED Viewed

@@ -0,0 +1,75 @@
+/**
+ * Harness subagent spawn caps (subprocess model).
+ */
+export const HARNESS_MAX_ACTIVE_SUBAGENTS = 8;
+export const HARNESS_MAX_SUBAGENT_SPAWNS_PER_SESSION = 12;
+export function isHarnessAgentType(type: string): boolean {
+	return type.startsWith("harness/");
+}
+export interface SpawnBudgetState {
+	active: number;
+	totalHarnessSpawns: number;
+}
+export function createSpawnBudgetState(): SpawnBudgetState {
+	return { active: 0, totalHarnessSpawns: 0 };
+}
+export function countHarnessAgentsInRequest(params: {
+	agent?: string;
+	tasks?: { agent: string }[];
+	chain?: { agent: string }[];
+	aggregator?: { agent: string };
+}): { harnessCount: number; agents: string[] } {
+	const agents: string[] = [];
+	if (params.agent) agents.push(params.agent);
+	if (params.tasks) for (const t of params.tasks) agents.push(t.agent);
+	if (params.chain) for (const c of params.chain) agents.push(c.agent);
+	if (params.aggregator) agents.push(params.aggregator.agent);
+	const harness = agents.filter(isHarnessAgentType);
+	return { harnessCount: harness.length, agents: harness };
+}
+export function checkHarnessSpawnBudget(
+	state: SpawnBudgetState,
+	incomingHarnessTasks: number,
+): { ok: boolean; message?: string } {
+	if (state.active + incomingHarnessTasks > HARNESS_MAX_ACTIVE_SUBAGENTS) {
+		return {
+			ok: false,
+			message:
+				`Harness subagent limit reached (${state.active} active + ${incomingHarnessTasks} requested > ${HARNESS_MAX_ACTIVE_SUBAGENTS}). ` +
+				`Wait for in-flight subagent calls to finish before spawning more.`,
+		};
+	}
+	if (
+		state.totalHarnessSpawns + incomingHarnessTasks >
+		HARNESS_MAX_SUBAGENT_SPAWNS_PER_SESSION
+	) {
+		return {
+			ok: false,
+			message:
+				`Harness subagent spawn cap reached (${state.totalHarnessSpawns + incomingHarnessTasks}/${HARNESS_MAX_SUBAGENT_SPAWNS_PER_SESSION} this session). ` +
+				`Finish the current harness phase or start a new session.`,
+		};
+	}
+	return { ok: true };
+}
+export function recordSpawnStart(
+	state: SpawnBudgetState,
+	harnessCount: number,
+): void {
+	state.active += harnessCount;
+	state.totalHarnessSpawns += harnessCount;
+}
+export function recordSpawnEnd(
+	state: SpawnBudgetState,
+	harnessCount: number,
+): void {
+	state.active = Math.max(0, state.active - harnessCount);
+}

package/.pi/extensions/lib/harness-subagent-auth.ts ADDED Viewed

@@ -0,0 +1,123 @@
+/**
+ * Resolve concrete LLM credentials for harness subagent subprocesses.
+ *
+ * Parent sessions often use `router/auto` (pi-model-router). Subagents run with
+ * `--no-extensions`, so they cannot use the logical router provider — they need
+ * a real provider/model plus that provider's API key.
+ */
+import { existsSync, readFileSync } from "node:fs";
+import { join } from "node:path";
+import type { AgentConfig } from "../../../vendor/pi-subagents/src/agents.js";
+const ROUTER_SENTINEL_KEY = "pi-model-router";
+const SENTINEL_API_KEYS = new Set([ROUTER_SENTINEL_KEY, "<authenticated>"]);
+type RouterTier = "high" | "medium" | "low";
+interface ModelRouterJson {
+	defaultProfile?: string;
+	profiles?: Record<string, Partial<Record<RouterTier, { model?: string }>>>;
+}
+export function isUsableApiKey(key: string | undefined): key is string {
+	return Boolean(key && !SENTINEL_API_KEYS.has(key));
+}
+export function parseModelRef(
+	ref: string,
+): { provider: string; modelId: string } | null {
+	const slash = ref.indexOf("/");
+	if (slash <= 0) return null;
+	const provider = ref.slice(0, slash).trim();
+	const modelId = ref.slice(slash + 1).trim();
+	if (!provider || !modelId) return null;
+	return { provider, modelId };
+}
+export function thinkingToRouterTier(thinking?: string): RouterTier {
+	if (thinking === "high" || thinking === "xhigh") return "high";
+	if (thinking === "off" || thinking === "minimal" || thinking === "low") {
+		return "low";
+	}
+	return "medium";
+}
+/** Map router profile tier → concrete `provider/model` from `.pi/model-router.json`. */
+export function resolveRouterConcreteModelRef(
+	cwd: string,
+	profileId: string,
+	tier: RouterTier,
+): string | undefined {
+	const path = join(cwd, ".pi", "model-router.json");
+	if (!existsSync(path)) return undefined;
+	let raw: ModelRouterJson;
+	try {
+		raw = JSON.parse(readFileSync(path, "utf8")) as ModelRouterJson;
+	} catch {
+		return undefined;
+	}
+	const profiles = raw.profiles;
+	if (!profiles) return undefined;
+	const profile =
+		profiles[profileId] ??
+		profiles[raw.defaultProfile ?? "auto"] ??
+		profiles.auto;
+	const model = profile?.[tier]?.model;
+	return typeof model === "string" && model.includes("/") ? model : undefined;
+}
+export interface ConcreteSubagentModel {
+	modelRef: string;
+	provider: string;
+	modelId: string;
+	routerProfile?: string;
+	routerTier?: RouterTier;
+}
+/**
+ * Pick the subprocess model ref before resolving API keys.
+ * Never returns `router/*` — always a concrete provider.
+ */
+export function resolveConcreteSubagentModel(
+	cwd: string,
+	parentModel: { provider: string; id: string } | undefined,
+	agent: AgentConfig,
+): ConcreteSubagentModel | undefined {
+	if (agent.model && !agent.model.startsWith("router/")) {
+		const parsed = parseModelRef(agent.model);
+		if (parsed) {
+			return { modelRef: agent.model, ...parsed };
+		}
+	}
+	const parentIsRouter = parentModel?.provider === "router";
+	const agentIsRouter = Boolean(agent.model?.startsWith("router/"));
+	if (!parentIsRouter && !agentIsRouter) {
+		if (parentModel && parentModel.provider !== "router") {
+			return {
+				modelRef: `${parentModel.provider}/${parentModel.id}`,
+				provider: parentModel.provider,
+				modelId: parentModel.id,
+			};
+		}
+		return undefined;
+	}
+	const profileId =
+		agentIsRouter && agent.model
+			? agent.model.slice("router/".length)
+			: (parentModel?.id ?? "auto");
+	const tier = thinkingToRouterTier(agent.thinking);
+	const concrete = resolveRouterConcreteModelRef(cwd, profileId, tier);
+	if (!concrete) return undefined;
+	const parsed = parseModelRef(concrete);
+	if (!parsed || parsed.provider === "router") return undefined;
+	return {
+		modelRef: concrete,
+		...parsed,
+		routerProfile: profileId,
+		routerTier: tier,
+	};
+}

package/.pi/extensions/lib/{harness-subagents/harness-subagent-policy.ts → harness-subagent-policy.ts} RENAMED Viewed

@@ -20,6 +20,15 @@ export type HarnessAgentKind =
 const MUTATING_TOOLS = new Set(["write", "edit"]);
+const PLANNING_BASH_DENY_PATTERNS = [
+	/\bgraphify\s+update\b/i,
+	/\bgraphify\s+extract\b/i,
+	/\bgraphify\s+install\b/i,
+	/\bpip\s+install\b/i,
+	/\buv\s+tool\s+install\b/i,
+	/\bnpm\s+install\b/i,
+];
 const BASH_MUTATION_PATTERNS = [
 	/\brm\s+-/i,
 	/\bmv\s+/i,
@@ -45,11 +54,17 @@ const READ_ONLY_KINDS = new Set<HarnessAgentKind>([
 	"meta",
 ]);
+export function isHarnessPlanningAgent(agentType: string): boolean {
+	const id = agentType.replace(/^harness\//, "");
+	return id.startsWith("planning/");
+}
 export function classifyHarnessAgent(agentType: string): HarnessAgentKind {
 	const id = agentType.replace(/^harness\//, "");
+	if (id.startsWith("planning/")) {
+		return "planner";
+	}
 	switch (id) {
-		case "planner":
-			return "planner";
 		case "executor":
 			return "executor";
 		case "evaluator":
@@ -96,20 +111,17 @@ export function evaluateHarnessSubagentToolCall(
 		return { action: "allow" };
 	}
-	if (toolName === "create_plan") {
-		if (kind === "planner") {
-			return { action: "allow" };
-		}
+	if (toolName === "create_plan" || toolName === "approve_plan") {
 		return {
 			action: "block",
-			reason: `harness-subagent-policy: create_plan is only for harness/planner.`,
+			reason: `harness-subagent-policy: ${toolName} is parent-orchestrator only (not available in subagents).`,
 		};
 	}
 	if (MUTATING_TOOLS.has(toolName)) {
 		return {
 			action: "block",
-			reason: `harness-subagent-policy: ${toolName} blocked for harness/${kind} (read-only phase agent). Use create_plan after approve_plan instead of write/edit.`,
+			reason: `harness-subagent-policy: ${toolName} blocked for harness/${kind} (read-only phase agent).`,
 		};
 	}
@@ -121,13 +133,26 @@ export function evaluateHarnessSubagentToolCall(
 				reason: `harness-subagent-policy: mutating bash blocked for harness/${kind}.`,
 			};
 		}
+		if (
+			command &&
+			isHarnessPlanningAgent(agentType) &&
+			PLANNING_BASH_DENY_PATTERNS.some((p) => p.test(command))
+		) {
+			return {
+				action: "block",
+				reason:
+					"harness-subagent-policy: planning scouts may use read-only graphify/sg/ck commands only.",
+			};
+		}
 	}
 	return { action: "allow" };
 }
-/** Policy phase hint seeded into subagent system prompt appendix when extensions load policy-gate. */
 export function harnessSubagentPhaseHint(agentType: string): string | null {
+	if (isHarnessPlanningAgent(agentType)) {
+		return "plan";
+	}
 	const kind = classifyHarnessAgent(agentType);
 	switch (kind) {
 		case "planner":