npm - @oh-my-pi/pi-coding-agent - Versions diffs - 15.10.3 → 15.10.5 - Mend

@oh-my-pi/pi-coding-agent 15.10.3 → 15.10.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (161) hide show

package/CHANGELOG.md +72 -0
package/dist/types/capability/rule-buckets.d.ts +1 -1
package/dist/types/capability/rule.d.ts +6 -1
package/dist/types/cli/update-cli.d.ts +11 -1
package/dist/types/config/model-registry.d.ts +18 -1
package/dist/types/discovery/at-imports.d.ts +15 -0
package/dist/types/edit/diff.d.ts +3 -2
package/dist/types/eval/__tests__/helpers-local-roots.test.d.ts +1 -0
package/dist/types/eval/__tests__/js-context-manager.test.d.ts +1 -0
package/dist/types/eval/backend.d.ts +7 -0
package/dist/types/eval/bridge-timeout.d.ts +1 -1
package/dist/types/eval/{llm-bridge.d.ts → completion-bridge.d.ts} +8 -8
package/dist/types/eval/idle-timeout.d.ts +1 -1
package/dist/types/eval/js/context-manager.d.ts +1 -0
package/dist/types/eval/js/executor.d.ts +2 -0
package/dist/types/eval/js/index.d.ts +1 -1
package/dist/types/eval/js/shared/helpers.d.ts +6 -0
package/dist/types/eval/js/shared/runtime.d.ts +5 -0
package/dist/types/eval/js/worker-protocol.d.ts +6 -0
package/dist/types/eval/py/executor.d.ts +7 -0
package/dist/types/eval/py/index.d.ts +1 -1
package/dist/types/export/ttsr.d.ts +14 -0
package/dist/types/extensibility/extensions/types.d.ts +8 -1
package/dist/types/extensibility/legacy-pi-ai-shim.d.ts +1 -1
package/dist/types/internal-urls/local-protocol.d.ts +10 -0
package/dist/types/mcp/oauth-flow.d.ts +2 -2
package/dist/types/modes/components/custom-editor.d.ts +3 -0
package/dist/types/modes/components/{status-line.d.ts → status-line/component.d.ts} +2 -32
package/dist/types/modes/components/status-line/index.d.ts +1 -0
package/dist/types/modes/components/status-line/types.d.ts +31 -2
package/dist/types/modes/image-references.d.ts +8 -3
package/dist/types/modes/interactive-mode.d.ts +1 -1
package/dist/types/modes/theme/theme.d.ts +2 -1
package/dist/types/modes/types.d.ts +2 -1
package/dist/types/modes/utils/ui-helpers.d.ts +2 -2
package/dist/types/session/agent-session.d.ts +0 -2
package/dist/types/tools/ask.d.ts +1 -0
package/dist/types/tools/browser/tab-worker.d.ts +15 -0
package/dist/types/tools/index.d.ts +17 -0
package/dist/types/tools/render-utils.d.ts +1 -1
package/dist/types/tools/tool-timeouts.d.ts +1 -1
package/dist/types/utils/block-context.d.ts +35 -0
package/dist/types/utils/image-loading.d.ts +12 -0
package/package.json +29 -9
package/src/capability/rule-buckets.ts +4 -2
package/src/capability/rule.ts +10 -1
package/src/cli/auth-broker-cli.ts +6 -7
package/src/cli/auth-gateway-cli.ts +1 -1
package/src/cli/list-models.ts +5 -0
package/src/cli/update-cli.ts +138 -16
package/src/config/model-registry.ts +81 -2
package/src/debug/index.ts +4 -8
package/src/discovery/at-imports.ts +273 -0
package/src/discovery/builtin-rules/index.ts +4 -0
package/src/discovery/builtin-rules/ts-no-test-timers.md +55 -0
package/src/discovery/builtin-rules/ts-redundant-clear-guard.md +75 -0
package/src/discovery/helpers.ts +2 -1
package/src/edit/diff.ts +114 -4
package/src/edit/hashline/diff.ts +1 -1
package/src/edit/hashline/execute.ts +1 -1
package/src/edit/modes/patch.ts +6 -2
package/src/edit/modes/replace.ts +1 -1
package/src/edit/renderer.ts +12 -2
package/src/eval/__tests__/agent-bridge.test.ts +13 -0
package/src/eval/__tests__/{llm-bridge.test.ts → completion-bridge.test.ts} +60 -54
package/src/eval/__tests__/helpers-local-roots.test.ts +58 -0
package/src/eval/__tests__/js-context-manager.test.ts +241 -0
package/src/eval/agent-bridge.ts +6 -1
package/src/eval/backend.ts +15 -0
package/src/eval/bridge-timeout.ts +1 -1
package/src/eval/{llm-bridge.ts → completion-bridge.ts} +30 -27
package/src/eval/idle-timeout.ts +1 -1
package/src/eval/js/context-manager.ts +70 -8
package/src/eval/js/executor.ts +3 -0
package/src/eval/js/index.ts +7 -1
package/src/eval/js/shared/helpers.ts +53 -6
package/src/eval/js/shared/prelude.txt +4 -4
package/src/eval/js/shared/runtime.ts +8 -0
package/src/eval/js/tool-bridge.ts +3 -3
package/src/eval/js/worker-core.ts +1 -0
package/src/eval/js/worker-entry.ts +6 -0
package/src/eval/js/worker-protocol.ts +6 -0
package/src/eval/py/executor.ts +12 -0
package/src/eval/py/index.ts +7 -1
package/src/eval/py/prelude.py +46 -7
package/src/eval/py/runner.py +1 -0
package/src/exa/render.ts +1 -1
package/src/export/ttsr.ts +122 -1
package/src/extensibility/extensions/types.ts +8 -1
package/src/extensibility/legacy-pi-ai-shim.ts +1 -1
package/src/extensibility/plugins/doctor.ts +1 -1
package/src/extensibility/plugins/legacy-pi-compat.ts +6 -5
package/src/goals/tools/goal-tool.ts +1 -1
package/src/internal-urls/docs-index.generated.ts +8 -6
package/src/internal-urls/local-protocol.ts +13 -0
package/src/lsp/render.ts +8 -6
package/src/mcp/oauth-flow.ts +3 -3
package/src/mcp/render.ts +7 -1
package/src/modes/components/custom-editor.ts +12 -6
package/src/modes/components/login-dialog.ts +1 -1
package/src/modes/components/oauth-selector.ts +4 -4
package/src/modes/components/read-tool-group.ts +10 -3
package/src/modes/components/{status-line.ts → status-line/component.ts} +18 -40
package/src/modes/components/status-line/index.ts +1 -0
package/src/modes/components/status-line/types.ts +23 -8
package/src/modes/components/tips.txt +1 -1
package/src/modes/components/tool-execution.ts +1 -1
package/src/modes/components/transcript-container.ts +17 -10
package/src/modes/components/user-message.ts +6 -3
package/src/modes/components/welcome.ts +1 -1
package/src/modes/controllers/extension-ui-controller.ts +143 -127
package/src/modes/controllers/input-controller.ts +36 -10
package/src/modes/controllers/mcp-command-controller.ts +28 -12
package/src/modes/controllers/selector-controller.ts +4 -11
package/src/modes/controllers/ssh-command-controller.ts +2 -2
package/src/modes/image-references.ts +13 -7
package/src/modes/interactive-mode.ts +2 -2
package/src/modes/rpc/rpc-mode.ts +1 -1
package/src/modes/setup-wizard/scenes/sign-in.ts +3 -11
package/src/modes/theme/theme.ts +95 -1
package/src/modes/types.ts +2 -1
package/src/modes/utils/ui-helpers.ts +14 -5
package/src/prompts/system/tiny-title-system.md +1 -1
package/src/prompts/system/title-system.md +16 -3
package/src/prompts/system/workflow-notice.md +1 -1
package/src/prompts/tools/bash.md +1 -1
package/src/prompts/tools/eval.md +6 -6
package/src/sdk.ts +31 -14
package/src/session/agent-session.ts +213 -155
package/src/session/session-manager.ts +1 -1
package/src/slash-commands/builtin-registry.ts +1 -1
package/src/system-prompt.ts +15 -9
package/src/task/render.ts +20 -8
package/src/tools/ask.ts +14 -5
package/src/tools/bash-interactive.ts +1 -1
package/src/tools/bash.ts +14 -2
package/src/tools/browser/render.ts +5 -2
package/src/tools/browser/tab-worker.ts +211 -91
package/src/tools/debug.ts +5 -2
package/src/tools/eval-render.ts +8 -5
package/src/tools/eval.ts +2 -2
package/src/tools/gh-renderer.ts +29 -15
package/src/tools/index.ts +32 -0
package/src/tools/inspect-image-renderer.ts +12 -5
package/src/tools/job.ts +9 -6
package/src/tools/memory-render.ts +19 -5
package/src/tools/read.ts +165 -18
package/src/tools/render-utils.ts +3 -1
package/src/tools/resolve.ts +1 -1
package/src/tools/review.ts +1 -1
package/src/tools/ssh.ts +4 -1
package/src/tools/todo.ts +8 -1
package/src/tools/tool-timeouts.ts +1 -1
package/src/tools/write.ts +1 -1
package/src/tui/code-cell.ts +1 -1
package/src/utils/block-context.ts +312 -0
package/src/utils/image-loading.ts +31 -1
package/src/utils/title-generator.ts +2 -2
package/src/web/search/providers/codex.ts +1 -1
package/src/web/search/render.ts +14 -6
/package/dist/types/eval/__tests__/{llm-bridge.test.d.ts → completion-bridge.test.d.ts} +0 -0

package/src/edit/diff.ts CHANGED Viewed

@@ -6,6 +6,7 @@
  */
 import * as Diff from "diff";
 import { resolveToCwd } from "../tools/path-utils";
+import { type BlockContextSource, findBlockContextLines } from "../utils/block-context";
 import { DEFAULT_FUZZY_THRESHOLD, EditMatchError, findMatch } from "./modes/replace";
 import { adjustIndentation, normalizeToLF, stripBom } from "./normalize";
 import { readEditFileText } from "./read-file";
@@ -54,11 +55,109 @@ function formatNumberedDiffLine(prefix: "+" | "-" | " ", lineNum: number, conten
 	return `${prefix}${lineNum}|${content}`;
 }
+type DiffSource = "old" | "new";
+interface ParsedNumberedDiffRow {
+	prefix: "+" | "-" | " ";
+	lineNumber: number;
+	content: string;
+	source: DiffSource;
+}
+function parseNumberedDiffRow(row: string): ParsedNumberedDiffRow | undefined {
+	const match = /^([+\- ])(\d+)\|(.*)$/s.exec(row);
+	if (!match) return undefined;
+	const prefix = match[1] as "+" | "-" | " ";
+	const lineNumber = Number.parseInt(match[2], 10);
+	if (!Number.isFinite(lineNumber)) return undefined;
+	return {
+		prefix,
+		lineNumber,
+		content: match[3] ?? "",
+		source: prefix === "+" ? "new" : "old",
+	};
+}
+function isDiffChangeRow(row: string | undefined): boolean {
+	return row !== undefined && (row.startsWith("+") || row.startsWith("-"));
+}
+function adjustedContextInsertIndex(rows: readonly string[], index: number): number {
+	let start = index;
+	while (start > 0 && isDiffChangeRow(rows[start - 1])) start--;
+	let end = index;
+	while (end < rows.length && isDiffChangeRow(rows[end])) end++;
+	return index > start && index < end ? end : index;
+}
+function insertBracketContextRows(
+	rows: string[],
+	source: DiffSource,
+	contextLines: ReadonlyMap<number, string>,
+	seenRows: Set<string>,
+): void {
+	const context = [...contextLines].sort(([left], [right]) => left - right);
+	for (const [lineNumber, text] of context) {
+		const row = formatNumberedDiffLine(" ", lineNumber, text);
+		if (seenRows.has(row)) continue;
+		let insertIndex = rows.length;
+		let previousSourceLine: number | undefined;
+		let nextSourceLine: number | undefined;
+		for (let i = 0; i < rows.length; i++) {
+			const parsed = parseNumberedDiffRow(rows[i]);
+			if (!parsed || parsed.source !== source) continue;
+			if (parsed.lineNumber < lineNumber) {
+				previousSourceLine = parsed.lineNumber;
+				continue;
+			}
+			nextSourceLine = parsed.lineNumber;
+			insertIndex = i;
+			break;
+		}
+		const chunk: string[] = [];
+		if (previousSourceLine !== undefined && lineNumber > previousSourceLine + 1) chunk.push("...");
+		chunk.push(row);
+		if (nextSourceLine !== undefined && nextSourceLine > lineNumber + 1) chunk.push("...");
+		const adjustedIndex = adjustedContextInsertIndex(rows, insertIndex);
+		rows.splice(adjustedIndex, 0, ...chunk);
+		for (const inserted of chunk) seenRows.add(inserted);
+	}
+}
+function addMatchingBracketContextRows(
+	rows: string[],
+	oldLines: readonly string[],
+	newLines: readonly string[],
+	source: BlockContextSource,
+): void {
+	const oldVisible: number[] = [];
+	const newVisible: number[] = [];
+	const seenRows = new Set(rows);
+	for (const row of rows) {
+		const parsed = parseNumberedDiffRow(row);
+		if (!parsed) continue;
+		if (parsed.source === "old") oldVisible.push(parsed.lineNumber);
+		else newVisible.push(parsed.lineNumber);
+	}
+	insertBracketContextRows(rows, "old", findBlockContextLines(oldLines, oldVisible, source), seenRows);
+	insertBracketContextRows(rows, "new", findBlockContextLines(newLines, newVisible, source), seenRows);
+}
 /**
  * Generate a unified diff string with line numbers and context.
  * Returns both the diff string and the first changed line number (in the new file).
  */
-export function generateDiffString(oldContent: string, newContent: string, contextLines = 2): DiffResult {
+export function generateDiffString(
+	oldContent: string,
+	newContent: string,
+	contextLines = 2,
+	source: BlockContextSource = {},
+): DiffResult {
 	const parts = Diff.diffLines(oldContent, newContent);
 	const output: string[] = [];
@@ -133,8 +232,10 @@ export function generateDiffString(oldContent: string, newContent: string, conte
 					newLineNum++;
 				}
+				// Mid-skip placeholder is omitted too: the jump between the trailing
+				// number of the leading context and the leading number of the
+				// trailing context conveys the gap, just like leading/trailing skips.
 				if (middleSkip > 0) {
-					output.push(formatNumberedDiffLine(" ", oldLineNum, "..."));
 					oldLineNum += middleSkip;
 					newLineNum += middleSkip;
 					for (const line of linesToShow.slice(firstChunkLength)) {
@@ -160,6 +261,8 @@ export function generateDiffString(oldContent: string, newContent: string, conte
 		}
 	}
+	addMatchingBracketContextRows(output, oldContent.split("\n"), newContent.split("\n"), source);
 	return { diff: output.join("\n"), firstChangedLine };
 }
@@ -187,7 +290,12 @@ export interface ReplaceResult {
  * Generate a unified diff string without file headers.
  * Returns both the diff string and the first changed line number (in the new file).
  */
-export function generateUnifiedDiffString(oldContent: string, newContent: string, contextLines = 3): DiffResult {
+export function generateUnifiedDiffString(
+	oldContent: string,
+	newContent: string,
+	contextLines = 3,
+	source: BlockContextSource = {},
+): DiffResult {
 	const patch = Diff.structuredPatch("", "", oldContent, newContent, "", "", { context: contextLines });
 	const output: string[] = [];
 	let firstChangedLine: number | undefined;
@@ -218,6 +326,8 @@ export function generateUnifiedDiffString(oldContent: string, newContent: string
 		}
 	}
+	addMatchingBracketContextRows(output, oldContent.split("\n"), newContent.split("\n"), source);
 	return { diff: output.join("\n"), firstChangedLine };
 }
@@ -805,7 +915,7 @@ export async function computeEditDiff(
 			};
 		}
-		return generateDiffString(normalizedContent, result.content);
+		return generateDiffString(normalizedContent, result.content, undefined, { path });
 	} catch (err) {
 		return { error: err instanceof Error ? err.message : String(err) };
 	}

package/src/edit/hashline/diff.ts CHANGED Viewed

@@ -230,7 +230,7 @@ export async function computeHashlineSectionDiff(
 		if (options.streaming) return buildStreamingSectionDiff(section, normalized);
 		const result = applyPreviewEdits({ section, absolutePath, normalized, snapshots, options });
 		if (normalized === result.text) return { error: `No changes would be made to ${section.path}.` };
-		return generateDiffString(normalized, result.text);
+		return generateDiffString(normalized, result.text, undefined, { path: section.path });
 	} catch (err) {
 		return { error: err instanceof Error ? err.message : String(err) };
 	}

package/src/edit/hashline/execute.ts CHANGED Viewed

@@ -97,7 +97,7 @@ function renderSection(result: PatchSectionResult, diagnostics: FileDiagnosticsR
 		};
 	}
-	const diff = generateDiffString(result.before, result.after);
+	const diff = generateDiffString(result.before, result.after, undefined, { path: result.path });
 	const preview = buildCompactDiffPreview(diff.diff);
 	const meta = outputMeta()
 		.diagnostics(diagnostics?.summary ?? "", diagnostics?.messages ?? [])

package/src/edit/modes/patch.ts CHANGED Viewed

@@ -1571,7 +1571,9 @@ export async function computePatchDiff(
 		if (!normalizedOld && !normalizedNew) {
 			return { diff: "", firstChangedLine: undefined };
 		}
-		return generateUnifiedDiffString(normalizedOld, normalizedNew);
+		return generateUnifiedDiffString(normalizedOld, normalizedNew, undefined, {
+			path: result.change.newPath ?? result.change.path,
+		});
 	} catch (err) {
 		return { error: err instanceof Error ? err.message : String(err) };
 	}
@@ -1785,7 +1787,9 @@ export async function executePatchSingle(
 	if (result.change.type === "update" && result.change.oldContent && result.change.newContent) {
 		const normalizedOld = normalizeToLF(stripBom(result.change.oldContent).text);
 		const normalizedNew = normalizeToLF(stripBom(result.change.newContent).text);
-		diffResult = generateUnifiedDiffString(normalizedOld, normalizedNew);
+		diffResult = generateUnifiedDiffString(normalizedOld, normalizedNew, undefined, {
+			path: result.change.newPath ?? result.change.path,
+		});
 	}
 	let resultText: string;

package/src/edit/modes/replace.ts CHANGED Viewed

@@ -1078,7 +1078,7 @@ export async function executeReplaceSingle(
 	);
 	invalidateFsScanAfterWrite(absolutePath);
-	const diffResult = generateDiffString(normalizedContent, result.content);
+	const diffResult = generateDiffString(normalizedContent, result.content, undefined, { path });
 	const resultText =
 		result.count > 1
 			? `Successfully replaced ${result.count} occurrences in ${path}.`

package/src/edit/renderer.ts CHANGED Viewed

@@ -260,6 +260,7 @@ function renderEditHeader(
 	uiTheme: Theme,
 	options: {
 		icon: "pending" | "success" | "error";
+		iconOverride?: string;
 		spinnerFrame?: number;
 		op?: Operation;
 		rawPath: string;
@@ -279,8 +280,16 @@ function renderEditHeader(
 	const formatted = formatEditDescription(options.rawPath, uiTheme, descriptionOptions);
 	const suffix = `${options.statsSuffix ?? ""}${options.extraSuffix ?? ""}`;
 	const buildHeader = (description: string): string =>
-		renderStatusLine({ icon: options.icon, spinnerFrame: options.spinnerFrame, title, description }, uiTheme) +
-		suffix;
+		renderStatusLine(
+			{
+				icon: options.icon,
+				iconOverride: options.iconOverride,
+				spinnerFrame: options.spinnerFrame,
+				title,
+				description,
+			},
+			uiTheme,
+		) + suffix;
 	const header = buildHeader(formatted.description);
 	const overflow = visibleWidth(header) - editHeaderLabelBudget(width, uiTheme);
@@ -633,6 +642,7 @@ function renderSingleFileResult(
 		const statsSuffix = headerDiff ? formatDiffStatsSuffix(headerDiff, uiTheme) : "";
 		const header = renderEditHeader(width, uiTheme, {
 			icon: isError ? "error" : "success",
+			iconOverride: !isError && !options.isPartial ? uiTheme.styledSymbol("tool.edit", "accent") : undefined,
 			op,
 			rawPath,
 			rename,

package/src/eval/__tests__/agent-bridge.test.ts CHANGED Viewed

@@ -205,6 +205,19 @@ describe("runEvalAgent", () => {
 		expect(secondOptions.outputSchema).toBeUndefined();
 	});
+	it("forces LSP off for bridge subagents even when task.enableLsp is on", async () => {
+		mockAgents();
+		const runSpy = vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => singleResult(options));
+		// makeSession() defaults to enableLsp: true and task.enableLsp: true.
+		const session = makeSession();
+		await runEvalAgent({ prompt: "hello" }, { session });
+		const options = runSpy.mock.calls[0]?.[0];
+		if (!options) throw new Error("runSubprocess was not called");
+		expect(options.enableLsp).toBe(false);
+	});
 	it("maps successful and failed subagent results", async () => {
 		mockAgents();
 		const runSpy = vi.spyOn(taskExecutor, "runSubprocess");

package/src/eval/__tests__/{llm-bridge.test.ts → completion-bridge.test.ts} RENAMED Viewed

@@ -10,10 +10,10 @@ import { Settings } from "../../config/settings";
 import type { ToolSession } from "../../tools";
 import { ToolError } from "../../tools/tool-errors";
 import { EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP } from "../bridge-timeout";
+import { runEvalCompletion } from "../completion-bridge";
 import { IdleTimeout } from "../idle-timeout";
 import { disposeAllVmContexts } from "../js/context-manager";
 import { executeJs } from "../js/executor";
-import { runEvalLlm } from "../llm-bridge";
 import { disposeAllKernelSessions, type PythonResult } from "../py/executor";
 function makeModel(provider: string, id: string, extra: Partial<Model<Api>> = {}): Model<Api> {
@@ -98,16 +98,19 @@ function assistant(opts: {
 	};
 }
-async function runPythonLlmInSubprocess(options: { structured: boolean; tempDir: TempDir }): Promise<PythonResult> {
+async function runPythonCompletionInSubprocess(options: {
+	structured: boolean;
+	tempDir: TempDir;
+}): Promise<PythonResult> {
 	const repoRoot = path.resolve(import.meta.dir, "../../../..");
-	const scriptPath = path.join(options.tempDir.path(), "run-python-llm.ts");
-	const resultPath = path.join(options.tempDir.path(), "python-llm-result.json");
+	const scriptPath = path.join(options.tempDir.path(), "run-python-completion.ts");
+	const resultPath = path.join(options.tempDir.path(), "python-completion-result.json");
 	const aiPath = path.resolve(import.meta.dir, "../../../../ai/src/index.ts");
 	const executorPath = path.resolve(import.meta.dir, "../py/executor.ts");
 	const settingsPath = path.resolve(import.meta.dir, "../../config/settings.ts");
 	const code = options.structured
-		? 'import json\nprint(json.dumps(llm("hi", schema={"type": "object"})))'
-		: 'print(llm("hi", model="smol"))';
+		? 'import json\nprint(json.dumps(completion("hi", schema={"type": "object"})))'
+		: 'print(completion("hi", model="smol"))';
 	const responseContent = options.structured
 		? '[{ type: "toolCall", id: "tc-1", name: "respond", arguments: { ok: true } }]'
 		: '[{ type: "text", text: "hello from python" }]';
@@ -153,7 +156,7 @@ vi.spyOn(ai, "completeSimple").mockResolvedValue({
 });
 const result = await executePython(${JSON.stringify(code)}, {
 	cwd: ${JSON.stringify(options.tempDir.path())},
-	sessionId: ${JSON.stringify(`py-llm:${options.structured ? "struct" : "plain"}`)},
+	sessionId: ${JSON.stringify(`py-completion:${options.structured ? "struct" : "plain"}`)},
 	sessionFile: ${JSON.stringify(path.join(options.tempDir.path(), "session.jsonl"))},
 	toolSession: session,
 	kernelMode: "per-call",
@@ -165,11 +168,12 @@ process.exit(0);
 	const child = await $`bun ${scriptPath}`.cwd(repoRoot).quiet().nothrow();
 	const stdout = child.stdout.toString();
 	const stderr = child.stderr.toString();
-	if (child.exitCode !== 0) throw new Error(stderr || stdout || `Python llm subprocess exited with ${child.exitCode}`);
+	if (child.exitCode !== 0)
+		throw new Error(stderr || stdout || `Python completion subprocess exited with ${child.exitCode}`);
 	return (await Bun.file(resultPath).json()) as PythonResult;
 }
-describe("runEvalLlm", () => {
+describe("runEvalCompletion", () => {
 	afterEach(() => {
 		vi.restoreAllMocks();
 	});
@@ -178,9 +182,9 @@ describe("runEvalLlm", () => {
 		const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
 		const session = makeSession();
-		await runEvalLlm({ prompt: "q", model: "smol" }, { session });
-		await runEvalLlm({ prompt: "q", model: "default" }, { session });
-		await runEvalLlm({ prompt: "q", model: "slow" }, { session });
+		await runEvalCompletion({ prompt: "q", model: "smol" }, { session });
+		await runEvalCompletion({ prompt: "q", model: "default" }, { session });
+		await runEvalCompletion({ prompt: "q", model: "slow" }, { session });
 		const resolved = spy.mock.calls.map(call => {
 			const model = call[0] as Model<Api>;
@@ -193,7 +197,7 @@ describe("runEvalLlm", () => {
 		const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
 		const session = makeSession({ available: [SMOL, DEFAULT, SLOW], activeModel: "p/slow" });
-		await runEvalLlm({ prompt: "q", model: "default" }, { session });
+		await runEvalCompletion({ prompt: "q", model: "default" }, { session });
 		const model = spy.mock.calls[0]?.[0] as Model<Api>;
 		expect(`${model.provider}/${model.id}`).toBe("p/slow");
@@ -201,7 +205,7 @@ describe("runEvalLlm", () => {
 	it("returns the completion text in plain mode", async () => {
 		vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "the answer" }));
-		const result = await runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() });
+		const result = await runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() });
 		expect(result.text).toBe("the answer");
 		expect(result.details).toEqual({ model: "p/smol", tier: "smol", structured: false });
 	});
@@ -209,10 +213,10 @@ describe("runEvalLlm", () => {
 	it("supplies a non-empty systemPrompt when system is omitted (codex 'Instructions are required' guard)", async () => {
 		// The openai-codex Responses transformer drops `instructions` when no
 		// system prompt is provided, and the remote endpoint then 400s with
-		// "Instructions are required". runEvalLlm must always carry a non-empty
-		// systemPrompt so `llm("…")` without a `system` argument works.
+		// "Instructions are required". runEvalCompletion must always carry a non-empty
+		// systemPrompt so `completion("…")` without a `system` argument works.
 		const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
-		await runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() });
+		await runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() });
 		const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
 		expect(ctx.systemPrompt).toBeDefined();
 		expect(ctx.systemPrompt?.length).toBeGreaterThan(0);
@@ -221,7 +225,7 @@ describe("runEvalLlm", () => {
 	it("honors an explicit system prompt instead of overriding it", async () => {
 		const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
-		await runEvalLlm({ prompt: "q", model: "smol", system: "Be terse." }, { session: makeSession() });
+		await runEvalCompletion({ prompt: "q", model: "smol", system: "Be terse." }, { session: makeSession() });
 		const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
 		expect(ctx.systemPrompt).toEqual(["Be terse."]);
 	});
@@ -230,7 +234,7 @@ describe("runEvalLlm", () => {
 		const spy = vi
 			.spyOn(ai, "completeSimple")
 			.mockResolvedValue(assistant({ toolCall: { name: "respond", arguments: { answer: 42 } } }));
-		const result = await runEvalLlm(
+		const result = await runEvalCompletion(
 			{ prompt: "q", model: "smol", schema: { type: "object", properties: { answer: { type: "number" } } } },
 			{ session: makeSession() },
 		);
@@ -246,7 +250,7 @@ describe("runEvalLlm", () => {
 	it("falls back to JSON embedded in text when the model skips the respond tool", async () => {
 		vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: 'here: {"answer": 7}' }));
-		const result = await runEvalLlm(
+		const result = await runEvalCompletion(
 			{ prompt: "q", model: "smol", schema: { type: "object" } },
 			{ session: makeSession() },
 		);
@@ -257,8 +261,8 @@ describe("runEvalLlm", () => {
 		const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
 		const session = makeSession({ available: [SMOL, DEFAULT, REASONING_SLOW] });
-		await runEvalLlm({ prompt: "q", model: "smol" }, { session });
-		await runEvalLlm({ prompt: "q", model: "slow" }, { session });
+		await runEvalCompletion({ prompt: "q", model: "smol" }, { session });
+		await runEvalCompletion({ prompt: "q", model: "slow" }, { session });
 		const smolOpts = spy.mock.calls[0]?.[2] as { reasoning?: unknown };
 		const slowOpts = spy.mock.calls[1]?.[2] as { reasoning?: unknown };
@@ -269,47 +273,49 @@ describe("runEvalLlm", () => {
 	it("does not request reasoning for the slow tier on a non-reasoning model", async () => {
 		const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
 		// SLOW is reasoning:false — must not trip requireSupportedEffort downstream.
-		const result = await runEvalLlm({ prompt: "q", model: "slow" }, { session: makeSession() });
+		const result = await runEvalCompletion({ prompt: "q", model: "slow" }, { session: makeSession() });
 		expect(result.text).toBe("ok");
 		const opts = spy.mock.calls[0]?.[2] as { reasoning?: unknown };
 		expect(opts.reasoning).toBeUndefined();
 	});
 	it("throws ToolError on invalid arguments", async () => {
-		await expect(runEvalLlm({ prompt: "" }, { session: makeSession() })).rejects.toBeInstanceOf(ToolError);
-		await expect(runEvalLlm({ prompt: "q", model: "huge" }, { session: makeSession() })).rejects.toBeInstanceOf(
-			ToolError,
-		);
+		await expect(runEvalCompletion({ prompt: "" }, { session: makeSession() })).rejects.toBeInstanceOf(ToolError);
+		await expect(
+			runEvalCompletion({ prompt: "q", model: "huge" }, { session: makeSession() }),
+		).rejects.toBeInstanceOf(ToolError);
 	});
 	it("throws ToolError when no model resolves for the tier", async () => {
 		const session = makeSession({ available: [DEFAULT], roles: { smol: "missing/model" } });
-		await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
+		await expect(runEvalCompletion({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
 	});
 	it("throws ToolError when the resolved model has no API key", async () => {
 		const session = makeSession({ apiKey: null });
-		await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
+		await expect(runEvalCompletion({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
 	});
 	it("maps error and aborted stop reasons to ToolError", async () => {
 		vi.spyOn(ai, "completeSimple").mockResolvedValueOnce(assistant({ stopReason: "error", errorMessage: "boom" }));
-		await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toThrow("boom");
+		await expect(runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toThrow(
+			"boom",
+		);
 		vi.spyOn(ai, "completeSimple").mockResolvedValueOnce(assistant({ stopReason: "aborted" }));
-		await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toBeInstanceOf(
-			ToolError,
-		);
+		await expect(
+			runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() }),
+		).rejects.toBeInstanceOf(ToolError);
 	});
 	it("throws ToolError when plain mode produces no text", async () => {
 		vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "" }));
-		await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toBeInstanceOf(
-			ToolError,
-		);
+		await expect(
+			runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() }),
+		).rejects.toBeInstanceOf(ToolError);
 	});
-	it("pauses the idle watchdog while a slow llm() request is in flight", async () => {
+	it("pauses the idle watchdog while a slow completion() request is in flight", async () => {
 		// A oneshot completion emits no status until it returns; delegated model
 		// time must be invisible to the eval timeout budget.
 		vi.spyOn(ai, "completeSimple").mockImplementation(async () => {
@@ -319,7 +325,7 @@ describe("runEvalLlm", () => {
 		const ops: string[] = [];
 		using idle = new IdleTimeout(60);
-		const result = await runEvalLlm(
+		const result = await runEvalCompletion(
 			{ prompt: "q", model: "smol" },
 			{
 				session: makeSession(),
@@ -333,12 +339,12 @@ describe("runEvalLlm", () => {
 		);
 		expect(result.text).toBe("the answer");
-		expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP, "llm"]);
+		expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP, "completion"]);
 		expect(idle.signal.aborted).toBe(false);
 	});
 });
-describe("llm() through eval runtimes", () => {
+describe("completion() through eval runtimes", () => {
 	afterEach(() => {
 		vi.restoreAllMocks();
 	});
@@ -348,13 +354,13 @@ describe("llm() through eval runtimes", () => {
 		await disposeAllKernelSessions();
 	});
-	it("exposes llm() in the JavaScript runtime", async () => {
-		using tempDir = TempDir.createSync("@omp-eval-llm-js-");
+	it("exposes completion() in the JavaScript runtime", async () => {
+		using tempDir = TempDir.createSync("@omp-eval-completion-js-");
 		const sessionFile = path.join(tempDir.path(), "session.jsonl");
-		const sessionId = `js-llm:${crypto.randomUUID()}`;
+		const sessionId = `js-completion:${crypto.randomUUID()}`;
 		vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "hello from smol" }));
-		const result = await executeJs('return await llm("hi", { model: "smol" });', {
+		const result = await executeJs('return await completion("hi", { model: "smol" });', {
 			cwd: tempDir.path(),
 			sessionId,
 			session: makeSession(),
@@ -365,16 +371,16 @@ describe("llm() through eval runtimes", () => {
 		expect(result.output.trim()).toBe("hello from smol");
 	});
-	it("parses structured llm() output in the JavaScript runtime", async () => {
-		using tempDir = TempDir.createSync("@omp-eval-llm-js-struct-");
+	it("parses structured completion() output in the JavaScript runtime", async () => {
+		using tempDir = TempDir.createSync("@omp-eval-completion-js-struct-");
 		const sessionFile = path.join(tempDir.path(), "session.jsonl");
-		const sessionId = `js-llm-struct:${crypto.randomUUID()}`;
+		const sessionId = `js-completion-struct:${crypto.randomUUID()}`;
 		vi.spyOn(ai, "completeSimple").mockResolvedValue(
 			assistant({ toolCall: { name: "respond", arguments: { ok: true, n: 3 } } }),
 		);
 		const result = await executeJs(
-			'const r = await llm("hi", { schema: { type: "object" } }); return JSON.stringify(r);',
+			'const r = await completion("hi", { schema: { type: "object" } }); return JSON.stringify(r);',
 			{ cwd: tempDir.path(), sessionId, session: makeSession(), sessionFile },
 		);
@@ -382,10 +388,10 @@ describe("llm() through eval runtimes", () => {
 		expect(JSON.parse(result.output.trim())).toEqual({ ok: true, n: 3 });
 	});
-	it("exposes llm() in the Python runtime", async () => {
-		const tempDir = TempDir.createSync("@omp-eval-llm-py-");
+	it("exposes completion() in the Python runtime", async () => {
+		const tempDir = TempDir.createSync("@omp-eval-completion-py-");
 		try {
-			const result = await runPythonLlmInSubprocess({ structured: false, tempDir });
+			const result = await runPythonCompletionInSubprocess({ structured: false, tempDir });
 			expect(result.exitCode).toBe(0);
 			expect(result.output.trim()).toBe("hello from python");
 		} finally {
@@ -393,10 +399,10 @@ describe("llm() through eval runtimes", () => {
 		}
 	});
-	it("parses structured llm() output in the Python runtime", async () => {
-		const tempDir = TempDir.createSync("@omp-eval-llm-py-struct-");
+	it("parses structured completion() output in the Python runtime", async () => {
+		const tempDir = TempDir.createSync("@omp-eval-completion-py-struct-");
 		try {
-			const result = await runPythonLlmInSubprocess({ structured: true, tempDir });
+			const result = await runPythonCompletionInSubprocess({ structured: true, tempDir });
 			expect(result.exitCode).toBe(0);
 			expect(JSON.parse(result.output.trim())).toEqual({ ok: true });
 		} finally {

package/src/eval/__tests__/helpers-local-roots.test.ts ADDED Viewed

@@ -0,0 +1,58 @@
+import { describe, expect, it } from "bun:test";
+import * as path from "node:path";
+import { TempDir } from "@oh-my-pi/pi-utils";
+import { createHelpers, type HelperContext } from "../js/shared/helpers";
+/**
+ * The eval helpers (`read`/`write`/`append`) must substitute injected on-disk
+ * roots for internal-URL schemes. Without it, `write("local://x.md")` hits a
+ * stdlib `path.resolve` that collapses `local://` to `local:/`, creating a junk
+ * `local:` directory under the cwd instead of landing where `read local://x.md`
+ * resolves. These lock the substitution contract and its guards.
+ */
+function makeCtx(cwd: string, roots: Record<string, string>): HelperContext {
+	return {
+		cwd: () => cwd,
+		env: new Map(),
+		localRoots: () => roots,
+		emitStatus: () => {},
+	};
+}
+describe("eval js helpers internal-url resolution", () => {
+	it("writes, reads, and appends local:// under the injected root", async () => {
+		using tmp = TempDir.createSync("@eval-helpers-local-");
+		const root = path.join(tmp.path(), "local");
+		const helpers = createHelpers(makeCtx(tmp.path(), { local: root }));
+		const written = await helpers.writeFile("local://notes/merge-map.md", "hello");
+		expect(written).toBe(path.join(root, "notes", "merge-map.md"));
+		expect(await Bun.file(written).text()).toBe("hello");
+		expect(await helpers.read("local://notes/merge-map.md")).toBe("hello");
+		await helpers.append("local://notes/merge-map.md", " world");
+		expect(await helpers.read("local://notes/merge-map.md")).toBe("hello world");
+		// Regression: no literal `local:` directory created under the cwd.
+		expect(await Bun.file(path.join(tmp.path(), "local:")).exists()).toBe(false);
+		expect(await Bun.file(path.join(tmp.path(), "local:", "notes", "merge-map.md")).exists()).toBe(false);
+	});
+	it("rejects traversal and schemes without an injected root", async () => {
+		using tmp = TempDir.createSync("@eval-helpers-guard-");
+		const helpers = createHelpers(makeCtx(tmp.path(), { local: path.join(tmp.path(), "local") }));
+		await expect(helpers.writeFile("local://../escape.md", "x")).rejects.toThrow(/traversal|escapes/i);
+		await expect(helpers.writeFile("memory://x.md", "x")).rejects.toThrow(/not supported/i);
+		await expect(helpers.read("https://example.com/page")).rejects.toThrow(/not supported/i);
+	});
+	it("leaves plain relative and absolute paths resolving against the cwd", async () => {
+		using tmp = TempDir.createSync("@eval-helpers-plain-");
+		const helpers = createHelpers(makeCtx(tmp.path(), {}));
+		const rel = await helpers.writeFile("foo/bar.txt", "bar");
+		expect(rel).toBe(path.join(tmp.path(), "foo", "bar.txt"));
+		expect(await helpers.read("foo/bar.txt")).toBe("bar");
+	});
+});