npm - ultimate-pi - Versions diffs - 0.1.7 → 0.2.2 - Mend

ultimate-pi 0.1.7 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (524) hide show

package/.pi/extensions/test-diff-integrity.ts ADDED Viewed

@@ -0,0 +1,240 @@
+/**
+ * test-diff-integrity — detect suspicious test edits.
+ *
+ * Flags:
+ * - assertion removal patterns
+ * - skip inflation (`it.skip`, `describe.skip`, `xit`, `xdescribe`)
+ * - disabled/no-test bypass flags in bash
+ *
+ * On detection, this extension emits escalation records so adversarial review
+ * becomes mandatory in downstream policy gates.
+ */
+import { appendFile, mkdir } from "node:fs/promises";
+import { join } from "node:path";
+import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
+const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
+const INCIDENT_FILE = join(INCIDENTS_DIR, "test-diff-integrity.jsonl");
+interface IntegrityFlag {
+	timestamp: string;
+	tool_name: string;
+	file_path?: string;
+	severity: "medium" | "high";
+	reasons: string[];
+	force_adversary: boolean;
+}
+interface EditWriteInputLike {
+	filePath?: unknown;
+	oldString?: unknown;
+	newString?: unknown;
+	content?: unknown;
+}
+function nowIso(): string {
+	return new Date().toISOString();
+}
+function looksLikeTestPath(path: string): boolean {
+	const p = path.toLowerCase();
+	return (
+		p.includes("/test/") ||
+		p.includes("/tests/") ||
+		p.endsWith(".test.ts") ||
+		p.endsWith(".test.tsx") ||
+		p.endsWith(".spec.ts") ||
+		p.endsWith(".spec.tsx") ||
+		p.endsWith(".test.js") ||
+		p.endsWith(".spec.js")
+	);
+}
+function countAssertions(content: string): number {
+	const patterns = [
+		/\bexpect\s*\(/g,
+		/\bassert(?:ion)?\b/g,
+		/\bto(be|equal|strict)/g,
+	];
+	let total = 0;
+	for (const pattern of patterns) {
+		total += (content.match(pattern) ?? []).length;
+	}
+	return total;
+}
+function countSkipTokens(content: string): number {
+	const patterns = [
+		/\bit\.skip\s*\(/g,
+		/\bdescribe\.skip\s*\(/g,
+		/\bxit\s*\(/g,
+		/\bxdescribe\s*\(/g,
+		/\btest\.skip\s*\(/g,
+	];
+	let total = 0;
+	for (const pattern of patterns)
+		total += (content.match(pattern) ?? []).length;
+	return total;
+}
+function inspectEditLikeChange(
+	oldText: string,
+	newText: string,
+): { reasons: string[]; severity: "medium" | "high" | null } {
+	const reasons: string[] = [];
+	let severity: "medium" | "high" | null = null;
+	const oldAssertions = countAssertions(oldText);
+	const newAssertions = countAssertions(newText);
+	if (oldAssertions > 0 && newAssertions < oldAssertions) {
+		reasons.push(`assertions decreased (${oldAssertions} -> ${newAssertions})`);
+		severity = "high";
+	}
+	const oldSkips = countSkipTokens(oldText);
+	const newSkips = countSkipTokens(newText);
+	if (newSkips > oldSkips) {
+		reasons.push(`skip markers increased (${oldSkips} -> ${newSkips})`);
+		if (!severity) severity = "medium";
+	}
+	if (/\b(passWithNoTests|--pass-with-no-tests)\b/i.test(newText)) {
+		reasons.push("contains no-tests bypass token");
+		if (!severity) severity = "medium";
+	}
+	return { reasons, severity };
+}
+function inspectBashCommand(command: string): {
+	reasons: string[];
+	severity: "medium" | "high" | null;
+} {
+	const reasons: string[] = [];
+	let severity: "medium" | "high" | null = null;
+	const c = command.toLowerCase();
+	if (c.includes("--passwithnotests") || c.includes("--pass-with-no-tests")) {
+		reasons.push("test command uses --passWithNoTests");
+		severity = "high";
+	}
+	if (c.includes("|| true")) {
+		reasons.push("test command includes '|| true' bypass");
+		if (!severity) severity = "medium";
+	}
+	if (/\b(skipping tests|disable tests)\b/.test(c)) {
+		reasons.push("test disablement phrase detected");
+		if (!severity) severity = "medium";
+	}
+	return { reasons, severity };
+}
+async function recordIntegrityFlag(flag: IntegrityFlag): Promise<void> {
+	await mkdir(INCIDENTS_DIR, { recursive: true });
+	await appendFile(INCIDENT_FILE, `${JSON.stringify(flag)}\n`, "utf-8");
+}
+export default function testDiffIntegrity(pi: ExtensionAPI) {
+	pi.on("tool_call", async (event) => {
+		if (event.toolName === "bash") {
+			const command = String(event.input.command ?? "");
+			const { reasons, severity } = inspectBashCommand(command);
+			if (!severity) return undefined;
+			const flag: IntegrityFlag = {
+				timestamp: nowIso(),
+				tool_name: "bash",
+				severity,
+				reasons,
+				force_adversary: true,
+			};
+			await recordIntegrityFlag(flag);
+			pi.appendEntry("harness-test-integrity-flag", flag);
+			pi.appendEntry("harness-policy-escalation", {
+				timestamp: nowIso(),
+				reason: "test_diff_integrity",
+				force_adversary: true,
+				risk_level: "high",
+			});
+			return undefined;
+		}
+		if (event.toolName !== "edit" && event.toolName !== "write")
+			return undefined;
+		const input = event.input as EditWriteInputLike;
+		const filePath = String(input.filePath ?? "");
+		if (!filePath || !looksLikeTestPath(filePath)) return undefined;
+		const oldText = String(input.oldString ?? "");
+		const newText =
+			String(input.newString ?? "") || String(input.content ?? "");
+		const { reasons, severity } = inspectEditLikeChange(oldText, newText);
+		if (!severity) return undefined;
+		const flag: IntegrityFlag = {
+			timestamp: nowIso(),
+			tool_name: event.toolName,
+			file_path: filePath,
+			severity,
+			reasons,
+			force_adversary: true,
+		};
+		await recordIntegrityFlag(flag);
+		pi.appendEntry("harness-test-integrity-flag", flag);
+		pi.appendEntry("harness-policy-escalation", {
+			timestamp: nowIso(),
+			reason: "test_diff_integrity",
+			force_adversary: true,
+			risk_level: severity === "high" ? "high" : "med",
+			file_path: filePath,
+		});
+		if (
+			severity === "high" &&
+			process.env.HARNESS_TEST_INTEGRITY_BLOCK === "true"
+		) {
+			return {
+				block: true,
+				reason: `test-diff-integrity: blocked suspicious test edit (${reasons.join("; ")})`,
+			};
+		}
+		return undefined;
+	});
+	pi.registerCommand("harness-test-integrity-last", {
+		description: "Show latest test-diff-integrity escalation",
+		handler: async (_args, ctx) => {
+			const entries = ctx.sessionManager.getEntries();
+			for (let i = entries.length - 1; i >= 0; i--) {
+				const entry = entries[i];
+				if (
+					entry.type !== "custom" ||
+					entry.customType !== "harness-test-integrity-flag"
+				) {
+					continue;
+				}
+				const data = entry.data as IntegrityFlag;
+				const msg = [
+					"Latest test integrity flag:",
+					`  severity: ${data.severity}`,
+					`  tool: ${data.tool_name}`,
+					`  file: ${data.file_path ?? "(n/a)"}`,
+					`  reasons: ${data.reasons.join("; ")}`,
+				].join("\n");
+				if (ctx.hasUI) {
+					ctx.ui.notify(msg, data.severity === "high" ? "warning" : "info");
+				} else {
+					pi.sendMessage({
+						customType: "harness-test-integrity-last",
+						content: msg,
+						display: true,
+					});
+				}
+				return;
+			}
+			if (ctx.hasUI)
+				ctx.ui.notify("No test integrity flags in this session.", "info");
+		},
+	});
+}

package/.pi/extensions/trace-recorder.ts ADDED Viewed

@@ -0,0 +1,315 @@
+/**
+ * trace-recorder — append-only run tracing with correlation ids.
+ *
+ * Writes:
+ * - `.pi/harness/runs/<run_id>/events.jsonl` (full payload refs/events)
+ * - `.pi/harness/runs/<run_id>/trace.json` (RunTrace-like summary)
+ * - `.pi/harness/runs/index.jsonl` (compact trace index)
+ */
+import { appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
+import { join } from "node:path";
+import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
+type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
+interface ToolSpan {
+	tool_call_id: string;
+	tool_name: string;
+	started_at: string;
+	ended_at: string;
+}
+interface ActiveRun {
+	runId: string;
+	planId: string;
+	phase: HarnessPhase;
+	toolSpans: Map<string, ToolSpan>;
+	artifactRefs: Set<string>;
+}
+interface SessionEntryLike {
+	type?: string;
+	customType?: string;
+	data?: { phase?: HarnessPhase; planId?: string };
+	message?: {
+		role?: string;
+		usage?: { input?: number; output?: number };
+	};
+}
+interface ToolEventLike {
+	input?: Record<string, unknown>;
+	details?: unknown;
+}
+const RUNS_ROOT = join(process.cwd(), ".pi", "harness", "runs");
+const INDEX_PATH = join(RUNS_ROOT, "index.jsonl");
+function nowIso(): string {
+	return new Date().toISOString();
+}
+function makeRunId(sessionId: string): string {
+	return `${sessionId}-${Date.now()}`;
+}
+function parsePhase(ctx: {
+	sessionManager: { getEntries(): unknown[] };
+}): HarnessPhase {
+	const entries = ctx.sessionManager.getEntries() as SessionEntryLike[];
+	for (let i = entries.length - 1; i >= 0; i--) {
+		const entry = entries[i];
+		if (
+			entry.type === "custom" &&
+			entry.customType === "harness-policy-state"
+		) {
+			const phase = entry.data?.phase;
+			if (
+				phase === "plan" ||
+				phase === "execute" ||
+				phase === "evaluate" ||
+				phase === "adversary" ||
+				phase === "merge"
+			) {
+				return phase;
+			}
+		}
+	}
+	return "plan";
+}
+function parsePlanId(ctx: {
+	sessionManager: { getEntries(): unknown[] };
+}): string {
+	const entries = ctx.sessionManager.getEntries() as SessionEntryLike[];
+	for (let i = entries.length - 1; i >= 0; i--) {
+		const entry = entries[i];
+		if (
+			entry.type === "custom" &&
+			entry.customType === "harness-policy-state"
+		) {
+			const planId = entry.data?.planId;
+			if (typeof planId === "string" && planId.length > 0) return planId;
+		}
+	}
+	return "plan-unknown";
+}
+function usageTotals(ctx: { sessionManager: { getEntries(): unknown[] } }): {
+	input_tokens: number;
+	output_tokens: number;
+	total_tokens: number;
+} {
+	const entries = ctx.sessionManager.getEntries() as SessionEntryLike[];
+	let input = 0;
+	let output = 0;
+	for (const entry of entries) {
+		if (entry.type !== "message" || entry.message?.role !== "assistant")
+			continue;
+		const usage = entry.message.usage ?? {};
+		input += Number(usage.input ?? 0);
+		output += Number(usage.output ?? 0);
+	}
+	return {
+		input_tokens: input,
+		output_tokens: output,
+		total_tokens: input + output,
+	};
+}
+function pullArtifactRefs(event: ToolEventLike): string[] {
+	const refs: string[] = [];
+	const input = event.input ?? {};
+	const details =
+		event.details && typeof event.details === "object"
+			? (event.details as Record<string, unknown>)
+			: {};
+	const maybePaths = [
+		input.filePath,
+		input.path,
+		input.targetPath,
+		details?.path,
+		details?.targetPath,
+	];
+	for (const candidate of maybePaths) {
+		if (typeof candidate === "string" && candidate.length > 0)
+			refs.push(candidate);
+	}
+	return refs;
+}
+async function ensureRunDir(runId: string): Promise<string> {
+	const runDir = join(RUNS_ROOT, runId);
+	await mkdir(runDir, { recursive: true });
+	return runDir;
+}
+async function readRunTraceSchemaVersion(): Promise<string> {
+	try {
+		const schemaPath = join(
+			process.cwd(),
+			".pi",
+			"harness",
+			"specs",
+			"run-trace.schema.json",
+		);
+		const parsed = JSON.parse(await readFile(schemaPath, "utf-8")) as {
+			properties?: { schema_version?: { const?: string } };
+		};
+		return String(parsed?.properties?.schema_version?.const ?? "1.0.0");
+	} catch {
+		return "1.0.0";
+	}
+}
+export default function traceRecorder(pi: ExtensionAPI) {
+	let activeRun: ActiveRun | null = null;
+	async function writeEvent(
+		runId: string,
+		payload: Record<string, unknown>,
+	): Promise<void> {
+		const runDir = await ensureRunDir(runId);
+		await appendFile(
+			join(runDir, "events.jsonl"),
+			`${JSON.stringify({ timestamp: nowIso(), ...payload })}\n`,
+			"utf-8",
+		);
+	}
+	pi.on("agent_start", async (_event, ctx) => {
+		const runId = makeRunId(ctx.sessionManager.getSessionId());
+		activeRun = {
+			runId,
+			planId: parsePlanId(ctx),
+			phase: parsePhase(ctx),
+			toolSpans: new Map(),
+			artifactRefs: new Set(),
+		};
+		pi.appendEntry("harness-trace-state", {
+			run_id: runId,
+			plan_id: activeRun.planId,
+			phase: activeRun.phase,
+			started_at: nowIso(),
+		});
+		await writeEvent(runId, {
+			type: "run_start",
+			run_id: runId,
+			plan_id: activeRun.planId,
+			phase: activeRun.phase,
+		});
+	});
+	pi.on("tool_execution_start", async (event) => {
+		if (!activeRun) return;
+		activeRun.toolSpans.set(event.toolCallId, {
+			tool_call_id: event.toolCallId,
+			tool_name: event.toolName,
+			started_at: nowIso(),
+			ended_at: nowIso(),
+		});
+		await writeEvent(activeRun.runId, {
+			type: "tool_start",
+			run_id: activeRun.runId,
+			tool_call_id: event.toolCallId,
+			tool_name: event.toolName,
+		});
+	});
+	pi.on("tool_result", async (event) => {
+		if (!activeRun) return;
+		const span = activeRun.toolSpans.get(event.toolCallId);
+		if (span) {
+			span.ended_at = nowIso();
+		}
+		for (const ref of pullArtifactRefs(event)) activeRun.artifactRefs.add(ref);
+		await writeEvent(activeRun.runId, {
+			type: "tool_result",
+			run_id: activeRun.runId,
+			tool_call_id: event.toolCallId,
+			tool_name: event.toolName,
+			is_error: event.isError,
+		});
+	});
+	pi.on("agent_end", async (_event, ctx) => {
+		if (!activeRun) return;
+		activeRun.phase = parsePhase(ctx);
+		const schemaVersion = await readRunTraceSchemaVersion();
+		const usage = usageTotals(ctx);
+		const runDir = await ensureRunDir(activeRun.runId);
+		const toolSpans = Array.from(activeRun.toolSpans.values());
+		const summary = {
+			schema_version: schemaVersion,
+			contract_version: "1.0.0",
+			run_id: activeRun.runId,
+			plan_id: activeRun.planId,
+			agent_id: ctx.sessionManager.getSessionId(),
+			phase: activeRun.phase,
+			model: ctx.model?.id ?? "unknown",
+			thinking_level:
+				pi.getThinkingLevel() === "minimal" ? "off" : pi.getThinkingLevel(),
+			tool_spans: toolSpans,
+			artifact_refs: Array.from(activeRun.artifactRefs.values()),
+			cost: usage,
+		};
+		await writeFile(
+			join(runDir, "trace.json"),
+			`${JSON.stringify(summary, null, 2)}\n`,
+			"utf-8",
+		);
+		await appendFile(
+			INDEX_PATH,
+			`${JSON.stringify({
+				timestamp: nowIso(),
+				run_id: activeRun.runId,
+				plan_id: activeRun.planId,
+				phase: activeRun.phase,
+				trace_file: join(runDir, "trace.json"),
+			})}\n`,
+			"utf-8",
+		);
+		pi.appendEntry("harness-run-trace", summary);
+		await writeEvent(activeRun.runId, {
+			type: "run_end",
+			run_id: activeRun.runId,
+			phase: activeRun.phase,
+			tool_span_count: toolSpans.length,
+			artifact_ref_count: activeRun.artifactRefs.size,
+		});
+		activeRun = null;
+	});
+	pi.registerCommand("harness-trace-last", {
+		description: "Show last recorded run trace id",
+		handler: async (_args, ctx) => {
+			const entries = ctx.sessionManager.getEntries();
+			for (let i = entries.length - 1; i >= 0; i--) {
+				const entry = entries[i];
+				if (
+					entry.type === "custom" &&
+					entry.customType === "harness-run-trace"
+				) {
+					const data = entry.data as { run_id?: string } | undefined;
+					const msg = `Last run trace: ${data?.run_id ?? "(unknown)"}`;
+					if (ctx.hasUI) {
+						ctx.ui.notify(msg, "info");
+					} else {
+						pi.sendMessage({
+							customType: "harness-trace-last",
+							content: msg,
+							display: true,
+						});
+					}
+					return;
+				}
+			}
+			if (ctx.hasUI) ctx.ui.notify("No harness trace recorded yet.", "warning");
+		},
+	});
+}

package/.pi/harness/README.md ADDED Viewed

@@ -0,0 +1,23 @@
+# Harness Scaffolding
+Phase 1 scaffold for the Pi harness runtime surfaces.
+- `specs/` - machine-readable schema contracts and contract notes.
+- `runs/` - per-run metadata and trace indexes.
+- `incidents/` - incident, override, and rollback trail records.
+- `debates/` - debate round artifacts and consensus packets.
+This scaffold is intentionally minimal and safe to adopt incrementally.
+## Governance Extensions
+Governance/runtime enforcement for this harness is implemented as Pi extensions
+under `.pi/extensions/` and auto-loaded through the package `pi.extensions`
+manifest (`package.json`).
+- `policy-gate.ts` - phase state machine + plan-before-mutate enforcement
+- `budget-guard.ts` - hard-stop token budget checks + budget exhausted artifacts
+- `trace-recorder.ts` - append-only run traces + compact index files
+- `review-integrity.ts` - executor/reviewer session-isolation enforcement
+- `test-diff-integrity.ts` - suspicious test-diff detection + adversary escalation
+- `debate-orchestrator.ts` - headless debate bus + consensus packet emission

package/.pi/harness/router/README.md ADDED Viewed

@@ -0,0 +1,35 @@
+# Router Tuning Flow
+Router tuning is intentionally split into two steps:
+1. **Propose** (`propose-router-tuning.mjs`)
+2. **Approve + apply** (`apply-router-proposal.mjs`)
+Blind writes to `.pi/model-router.json` are prohibited by design.
+## Proposal
+```bash
+node .pi/harness/router/propose-router-tuning.mjs \
+  --evidence /path/to/evidence.json \
+  --candidate /path/to/candidate-router.json \
+  --proposal-out .pi/harness/router/proposals/proposal-001.json
+```
+## Apply (requires explicit human approval + justification)
+```bash
+node .pi/harness/router/apply-router-proposal.mjs \
+  --proposal .pi/harness/router/proposals/proposal-001.json \
+  --approve-by "human.name" \
+  --justification "why this is safe" \
+  --write
+```
+## Safety checks
+- Evidence threshold must pass (`sample_count >= min_sample_count`)
+- Regression guard must pass
+- Base router hash in proposal must match current `.pi/model-router.json`
+- Apply requires explicit approver and justification
+- Current router file is backed up before write