npm - ultimate-pi - Versions diffs - 0.15.0 → 0.17.0 - Mend

ultimate-pi 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/.pi/harness/evals/smoke/smoke-harness-plan.mjs CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env node
 /**
  * smoke-harness-plan — fixture validation for plan-phase pipeline (CI).
- * Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light]
+ * Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light|minimal-med-fast]
  */
 import { access, readFile } from "node:fs/promises";
@@ -26,16 +26,23 @@ async function scanFocusCoverage(fixtureRoot, requiredFocus) {
 	let last_round_index = 0;
 	const { readdir } = await import("node:fs/promises");
 	const files = (await readdir(art)).filter((f) =>
-		/^review-round-r\d+\.yaml$/i.test(f),
+		/^review-round(?:-r\d+|-consolidated)\.yaml$/i.test(f),
 	);
 	for (const name of files.sort()) {
-		const m = /^review-round-r(\d+)\.yaml$/i.exec(name);
+		const consolidated = /^review-round-consolidated\.yaml$/i.test(name);
+		const m = consolidated
+			? ["review-round-consolidated.yaml", "1"]
+			: /^review-round-r(\d+)\.yaml$/i.exec(name);
 		if (!m) continue;
-		const roundIndex = Number(m[1]);
+		const roundIndex = consolidated ? 1 : Number(m[1]);
 		if (roundIndex > last_round_index) last_round_index = roundIndex;
 		const draft = parseYaml(await readFile(join(art, name), "utf-8"));
 		const focus = String(draft.debate_round_focus ?? "").trim();
-		if (requiredFocus.includes(focus)) covered.add(focus);
+		if (focus === "all") {
+			for (const f of requiredFocus) covered.add(f);
+		} else if (requiredFocus.includes(focus)) {
+			covered.add(focus);
+		}
 		if (roundIndex === last_round_index) {
 			last_review_gate_ready = draft.review_gate_ready === true;
 		}
@@ -110,22 +117,33 @@ async function runFixture(name) {
 	ok("research-brief.yaml structure");
 	const isLight = name === "minimal-low-light";
-	const requiredFocus = isLight ? ["spec", "quality"] : ["spec", "wbs", "schedule", "quality"];
-	const debateRounds = isLight
-		? ["review-round-r1.yaml", "review-round-r2.yaml"]
-		: [
-				"review-round-r1.yaml",
-				"review-round-r2.yaml",
-				"review-round-r3.yaml",
-				"review-round-r4.yaml",
-			];
+	const isFast = name === "minimal-med-fast";
+	const requiredFocus =
+		isLight || isFast
+			? ["spec", "quality"]
+			: ["spec", "wbs", "schedule", "quality"];
+	const debateRounds = isFast
+		? ["review-round-consolidated.yaml"]
+		: isLight
+			? ["review-round-r1.yaml", "review-round-r2.yaml"]
+			: [
+					"review-round-r1.yaml",
+					"review-round-r2.yaml",
+					"review-round-r3.yaml",
+					"review-round-r4.yaml",
+				];
 	const seenFocus = new Set();
 	for (const fileName of debateRounds) {
 		const p = join(fixtureRoot, "artifacts", fileName);
 		await access(p, constants.R_OK);
 		const draft = parseYaml(await readFile(p, "utf-8"));
 		if (!draft.schema_version) fail(`${fileName} missing schema_version`);
-		if (draft.debate_round_focus) seenFocus.add(draft.debate_round_focus);
+		const f = String(draft.debate_round_focus ?? "").trim();
+		if (f === "all") {
+			for (const req of requiredFocus) seenFocus.add(req);
+		} else if (f) {
+			seenFocus.add(f);
+		}
 	}
 	for (const focus of requiredFocus) {
 		if (!seenFocus.has(focus)) {
@@ -135,7 +153,7 @@ async function runFixture(name) {
 	ok(`debate round YAML artifacts (${requiredFocus.length} focuses)`);
 	const coverage = await scanFocusCoverage(fixtureRoot, requiredFocus);
-	const minRounds = isLight ? 2 : 4;
+	const minRounds = isFast ? 1 : isLight ? 2 : 4;
 	if (!planOutcomeComplete(coverage, requiredFocus, minRounds)) {
 		fail("debate outcome incomplete for fixture coverage");
 	}
@@ -144,6 +162,9 @@ async function runFixture(name) {
 	if (isLight && packet.risk_level !== "low") {
 		fail("minimal-low-light fixture must use risk_level low");
 	}
+	if (isFast && packet.risk_level !== "med") {
+		fail("minimal-med-fast fixture must use risk_level med");
+	}
 	console.log(`smoke-harness-plan: all ${name} fixture checks passed`);
 }
@@ -161,7 +182,9 @@ async function main() {
 		);
 		return;
 	}
-	fail("Usage: smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light] | --live");
+	fail(
+		"Usage: smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light|minimal-med-fast] | --live",
+	);
 }
 main().catch((err) => {

package/.pi/harness/specs/harness-executor-handoff.schema.json ADDED Viewed

@@ -0,0 +1,19 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://ultimate-pi.local/.pi/harness/specs/harness-executor-handoff.schema.json",
+	"title": "HarnessExecutorHandoff",
+	"type": "object",
+	"additionalProperties": true,
+	"required": ["schema_version", "execution_status"],
+	"properties": {
+		"schema_version": { "type": "string", "const": "1.0.0" },
+		"execution_status": {
+			"type": "string",
+			"enum": ["completed", "blocked", "scope_drift"]
+		},
+		"files_changed": { "type": "array" },
+		"validation_summary": { "type": "string" },
+		"rollback_refs": { "type": "object" },
+		"handoff_ready": { "type": "object" }
+	}
+}

package/.pi/harness/specs/harness-human-required.schema.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://ultimate-pi.local/.pi/harness/specs/harness-human-required.schema.json",
+	"title": "HarnessHumanRequired",
+	"type": "object",
+	"additionalProperties": false,
+	"required": ["schema_version", "reason"],
+	"properties": {
+		"schema_version": { "type": "string", "const": "1.0.0" },
+		"reason": { "type": "string", "minLength": 1 },
+		"questions": {
+			"type": "array",
+			"items": { "type": "string" }
+		}
+	}
+}

package/.pi/harness/specs/plan-review-round-draft.schema.json CHANGED Viewed

@@ -16,7 +16,7 @@
 		"round_index": { "type": "integer", "minimum": 1, "maximum": 12 },
 		"debate_round_focus": {
 			"type": "string",
-			"enum": ["spec", "wbs", "schedule", "quality"]
+			"enum": ["spec", "wbs", "schedule", "quality", "all"]
 		},
 		"round_summary": { "type": "string", "minLength": 1 },
 		"validation_summary": { "type": "string" },

package/.pi/harness/specs/plan-scout-findings.schema.json ADDED Viewed

@@ -0,0 +1,19 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://ultimate-pi.local/.pi/harness/specs/plan-scout-findings.schema.json",
+	"title": "PlanScoutFindings",
+	"type": "object",
+	"additionalProperties": true,
+	"required": ["schema_version", "lane", "summary"],
+	"properties": {
+		"schema_version": { "type": "string", "const": "1.0.0" },
+		"lane": {
+			"type": "string",
+			"enum": ["graphify", "structure", "semantic"]
+		},
+		"scout_lane": { "type": "string" },
+		"summary": { "type": "string", "minLength": 1 },
+		"key_paths": { "type": "array", "items": { "type": "string" } },
+		"findings": { "type": "array" }
+	}
+}

package/.pi/lib/harness-agent-output.ts CHANGED Viewed

@@ -21,6 +21,51 @@ export function extractJsonBlock(text: string): string | null {
 	return null;
 }
+export interface ToolCallPartLike {
+	type?: string;
+	name?: string;
+	arguments?: Record<string, unknown>;
+}
+export interface MessageLike {
+	role?: string;
+	content?: ToolCallPartLike[] | unknown;
+}
+/** Last matching submit_* tool call in subprocess messages (chain-safe). */
+export function extractLastSubmitCall(
+	messages: MessageLike[],
+	toolNames: string | string[],
+): { toolName: string; document: Record<string, unknown> } | null {
+	const allowed = new Set(
+		(Array.isArray(toolNames) ? toolNames : [toolNames]).map((n) => n.trim()),
+	);
+	let last: { toolName: string; document: Record<string, unknown> } | null =
+		null;
+	for (const msg of messages) {
+		if (msg.role !== "assistant" || !Array.isArray(msg.content)) continue;
+		for (const part of msg.content) {
+			if (part.type !== "toolCall" || !part.name) continue;
+			if (!allowed.has(part.name)) continue;
+			const doc = part.arguments?.document;
+			if (doc && typeof doc === "object" && !Array.isArray(doc)) {
+				last = {
+					toolName: part.name,
+					document: doc as Record<string, unknown>,
+				};
+			}
+		}
+	}
+	return last;
+}
+export function extractLastSubmitCallForAgent(
+	messages: MessageLike[],
+	agentToolNames: readonly string[],
+): { toolName: string; document: Record<string, unknown> } | null {
+	return extractLastSubmitCall(messages, [...agentToolNames]);
+}
 export function parseHarnessAgentJson<T extends Record<string, unknown>>(
 	text: string,
 ): { ok: true; value: T } | { ok: false; error: string } {

package/.pi/lib/harness-budget-enforce.ts ADDED Viewed

@@ -0,0 +1,18 @@
+/**
+ * Central switch for harness token/debate budget enforcement.
+ * Default: telemetry-only (HARNESS_BUDGET_ENFORCE off).
+ */
+export function isHarnessBudgetEnforceOn(): boolean {
+	const raw = (process.env.HARNESS_BUDGET_ENFORCE ?? "off").toLowerCase();
+	return raw === "1" || raw === "true" || raw === "on";
+}
+/** When false, soft-limit and debate telemetry must not block UI or gates. */
+export function shouldEmitBlockingBudgetExhausted(): boolean {
+	if (!isHarnessBudgetEnforceOn()) return false;
+	return (
+		process.env.HARNESS_BUDGET_HARD_STOP === "true" ||
+		process.env.HARNESS_DEBATE_HARD_STOP === "true"
+	);
+}

package/.pi/lib/harness-schema-validate.ts ADDED Viewed

@@ -0,0 +1,89 @@
+/**
+ * JSON Schema validation for harness submit tools (Ajv draft 2020-12, offline).
+ */
+import { appendFile, readFile } from "node:fs/promises";
+import { join } from "node:path";
+import Ajv2020 from "ajv/dist/2020";
+import addFormats from "ajv-formats";
+type ValidateFn = (data: unknown) => boolean;
+const compileCache = new Map<string, ValidateFn>();
+const DEBUG_LOG_PATH =
+	"/home/aryaniyaps/ai-projects/ultimate-pi/.cursor/debug-2ca12b.log";
+let ajvSingleton: InstanceType<typeof Ajv2020> | null = null;
+function getAjv(): InstanceType<typeof Ajv2020> {
+	if (!ajvSingleton) {
+		ajvSingleton = new Ajv2020({
+			allErrors: true,
+			strict: false,
+			validateSchema: false,
+		});
+		addFormats(ajvSingleton);
+	}
+	return ajvSingleton;
+}
+async function debugLog(
+	hypothesisId: string,
+	message: string,
+	data: Record<string, unknown>,
+): Promise<void> {
+	// #region agent log
+	try {
+		await appendFile(
+			DEBUG_LOG_PATH,
+			`${JSON.stringify({
+				sessionId: "2ca12b",
+				hypothesisId,
+				location: "harness-schema-validate.ts",
+				message,
+				data,
+				timestamp: Date.now(),
+			})}\n`,
+		);
+	} catch {
+		/* ignore */
+	}
+	// #endregion
+}
+export async function validateAgainstHarnessSchema(
+	specsDir: string,
+	schemaFile: string,
+	document: unknown,
+): Promise<{ ok: true } | { ok: false; errors: string[] }> {
+	const cacheKey = `${specsDir}:${schemaFile}`;
+	let validate = compileCache.get(cacheKey);
+	if (!validate) {
+		const schemaPath = join(specsDir, schemaFile);
+		const raw = await readFile(schemaPath, "utf-8");
+		const schema = JSON.parse(raw) as Record<string, unknown>;
+		try {
+			const ajv = getAjv();
+			const compiled = ajv.compile(schema);
+			validate = compiled;
+			compileCache.set(cacheKey, compiled);
+			await debugLog("H3", "schema compile ok", { schemaFile });
+		} catch (err) {
+			const msg = err instanceof Error ? err.message : String(err);
+			await debugLog("H3", "schema compile failed", { schemaFile, error: msg });
+			return { ok: false, errors: [`schema compile failed: ${msg}`] };
+		}
+	}
+	const ok = validate(document);
+	if (ok) return { ok: true };
+	const errors = (
+		(
+			validate as {
+				errors?: Array<{ instancePath?: string; message?: string }>;
+			}
+		).errors ?? []
+	).map((e: { instancePath?: string; message?: string }) =>
+		`${e.instancePath || "/"} ${e.message ?? "invalid"}`.trim(),
+	);
+	return { ok: false, errors };
+}

package/.pi/lib/harness-spawn-parse.ts ADDED Viewed

@@ -0,0 +1,86 @@
+/**
+ * Parse HarnessSpawnContext embedded in subagent task strings.
+ */
+const SPAWN_CTX_EQ_RE = /HarnessSpawnContext\s*=\s*(\{[\s\S]*?\})(?:\s|$|\.)/;
+export interface ParsedSpawnContext {
+	run_id?: string;
+	run_dir?: string;
+	agent?: string;
+	plan_packet_path?: string;
+}
+function extractBalancedJsonObject(s: string, start: number): string | null {
+	if (s[start] !== "{") return null;
+	let depth = 0;
+	let inString = false;
+	let escaped = false;
+	for (let i = start; i < s.length; i++) {
+		const ch = s[i];
+		if (inString) {
+			if (escaped) escaped = false;
+			else if (ch === "\\") escaped = true;
+			else if (ch === '"') inString = false;
+			continue;
+		}
+		if (ch === '"') {
+			inString = true;
+			continue;
+		}
+		if (ch === "{") depth++;
+		else if (ch === "}") {
+			depth--;
+			if (depth === 0) return s.slice(start, i + 1);
+		}
+	}
+	return null;
+}
+function normalizeSpawnContext(parsed: unknown): ParsedSpawnContext | null {
+	if (!parsed || typeof parsed !== "object") return null;
+	const o = parsed as Record<string, unknown>;
+	const run_id = typeof o.run_id === "string" ? o.run_id : undefined;
+	const run_dir = typeof o.run_dir === "string" ? o.run_dir : undefined;
+	const agent = typeof o.agent === "string" ? o.agent : undefined;
+	const plan_packet_path =
+		typeof o.plan_packet_path === "string" ? o.plan_packet_path : undefined;
+	if (!run_id && !run_dir) return null;
+	return { run_id, run_dir, agent, plan_packet_path };
+}
+export function parseSpawnContextFromTask(
+	task: string,
+): ParsedSpawnContext | null {
+	const eqMatch = SPAWN_CTX_EQ_RE.exec(task);
+	if (eqMatch?.[1]) {
+		try {
+			return normalizeSpawnContext(JSON.parse(eqMatch[1]));
+		} catch {
+			// fall through to JSON-object forms
+		}
+	}
+	const firstBrace = task.indexOf("{");
+	if (firstBrace >= 0) {
+		const blob = extractBalancedJsonObject(task, firstBrace);
+		if (blob) {
+			try {
+				const outer = JSON.parse(blob) as Record<string, unknown>;
+				if (
+					outer.HarnessSpawnContext &&
+					typeof outer.HarnessSpawnContext === "object"
+				) {
+					return normalizeSpawnContext(outer.HarnessSpawnContext);
+				}
+				if (typeof outer.run_id === "string") {
+					return normalizeSpawnContext(outer);
+				}
+			} catch {
+				// ignore
+			}
+		}
+	}
+	return null;
+}

package/.pi/lib/harness-subagent-submit-path.ts ADDED Viewed

@@ -0,0 +1,41 @@
+/**
+ * Resolve and guard harness run directories for subagent submit tools.
+ */
+import { realpath } from "node:fs/promises";
+import { join, resolve } from "node:path";
+export function harnessRunsRoot(projectRoot: string): string {
+	return join(projectRoot, ".pi", "harness", "runs");
+}
+export async function resolveGuardedRunDir(opts: {
+	projectRoot: string;
+	runId: string;
+	runDirEnv?: string;
+}): Promise<{ ok: true; runDir: string } | { ok: false; error: string }> {
+	const { projectRoot, runId } = opts;
+	if (!runId.trim()) {
+		return { ok: false, error: "run_id is required" };
+	}
+	const expected = join(harnessRunsRoot(projectRoot), runId);
+	let candidate = opts.runDirEnv?.trim()
+		? resolve(projectRoot, opts.runDirEnv)
+		: expected;
+	try {
+		candidate = await realpath(candidate);
+		const expectedReal = await realpath(expected);
+		if (
+			candidate !== expectedReal &&
+			!candidate.startsWith(`${expectedReal}/`)
+		) {
+			return {
+				ok: false,
+				error: `run_dir must stay under ${expectedReal}`,
+			};
+		}
+		return { ok: true, runDir: candidate };
+	} catch {
+		return { ok: false, error: `run directory not found for run_id=${runId}` };
+	}
+}

package/.pi/lib/harness-ui-state.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
+import { shouldEmitBlockingBudgetExhausted } from "./harness-budget-enforce.js";
 export type HarnessPhase =
 	| "plan"
@@ -133,6 +134,9 @@ const RELEVANT_CUSTOM_TYPES = new Set([
 	"harness-consensus-packet",
 	"harness-round-result",
 	"harness-budget-exhausted",
+	"harness-budget-soft-limit",
+	"harness-budget-telemetry",
+	"harness-debate-budget-telemetry",
 	"harness-review-integrity",
 	"harness-test-integrity-flag",
 	"harness-run-trace",
@@ -189,7 +193,7 @@ function deriveFlowSubstate(state: HarnessUiState): HarnessFlowSubstate {
 	return "idle";
 }
-function createStateFromEntries(entries: unknown[]): HarnessUiState {
+export function createStateFromEntries(entries: unknown[]): HarnessUiState {
 	const latest = pickLatestCustomEntries(entries);
 	const state: HarnessUiState = {
 		...DEFAULT_STATE,
@@ -212,7 +216,7 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
 	const budget = latest.get("harness-budget-exhausted") as
 		| BudgetExhaustedLike
 		| undefined;
-	if (budget) {
+	if (budget && shouldEmitBlockingBudgetExhausted()) {
 		state.budgetExhausted = true;
 		state.budgetReason =
 			typeof budget.exhaustion_reason === "string"
@@ -223,6 +227,15 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
 		const cap = asNumber(budget.caps?.debate_global_cap);
 		if (cap != null) state.debateBudgetCap = cap;
 	}
+	const telemetry = latest.get("harness-budget-telemetry") as
+		| BudgetExhaustedLike
+		| undefined;
+	if (telemetry && !state.budgetExhausted) {
+		const budgetUsed = asNumber(telemetry.budget_used);
+		if (budgetUsed != null) state.debateBudgetUsed = budgetUsed;
+		const cap = asNumber(telemetry.caps?.debate_global_cap);
+		if (cap != null) state.debateBudgetCap = cap;
+	}
 	const testIntegrity = latest.get("harness-test-integrity-flag") as
 		| TestIntegrityLike

package/.pi/model-router.example.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"defaultProfile": "auto",
 	"debug": false,
-	"classifierModel": "opencode-go/qwen3.6-plus",
+	"classifierModel": "openai/gpt-5.4-nano",
 	"phaseBias": 0.5,
 	"maxSessionBudget": 1.0,
 	"largeContextThreshold": 100000,
@@ -16,12 +16,21 @@
 	"profiles": {
 		"auto": {
 			"high": {
-				"model": "opencode-go/deepseek-v4-pro",
+				"model": "openai/gpt-5.5",
 				"thinking": "high",
-				"fallbacks": ["opencode-go/qwen3.6-plus"]
+				"fallbacks": ["openai/gpt-5.4-nano"]
+			},
+			"medium": { "model": "openai/gpt-5.5", "thinking": "medium" },
+			"low": { "model": "openai/gpt-5.5", "thinking": "low" }
+		},
+		"opencode-go": {
+			"high": {
+				"model": "opencode-go/qwen3.6-plus",
+				"thinking": "high",
+				"fallbacks": ["opencode-go/deepseek-v4-flash"]
 			},
 			"medium": { "model": "opencode-go/qwen3.6-plus", "thinking": "medium" },
-			"low": { "model": "opencode-go/deepseek-v4-flash", "thinking": "low" }
+			"low": { "model": "opencode-go/qwen3.6-plus", "thinking": "low" }
 		}
 	}
 }

package/.pi/prompts/harness-auto.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 description: Full strict harness pipeline with locked governance decisions.
-argument-hint: "\"<task>\" [--quick] [--risk low|med|high] [--budget <amount>]"
+argument-hint: "\"<task>\" [--quick] [--risk low|med|high]"
 ---
 # harness-auto
@@ -10,7 +10,7 @@ Pipeline orchestrator — one session, sequential phase handoffs. Invoke **harne
 ## Step 0 — Parse arguments
 - required task (quoted or first token)
-- optional: `--quick`, `--risk`, `--budget`
+- optional: `--quick`, `--risk` (`--budget` reserved/no-op)
 If task missing: