npm - @oh-my-pi/pi-coding-agent - Versions diffs - 15.10.3 → 15.10.4 - Mend

@oh-my-pi/pi-coding-agent 15.10.3 → 15.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/CHANGELOG.md +20 -0
package/dist/types/eval/__tests__/js-context-manager.test.d.ts +1 -0
package/dist/types/eval/bridge-timeout.d.ts +1 -1
package/dist/types/eval/{llm-bridge.d.ts → completion-bridge.d.ts} +8 -8
package/dist/types/eval/idle-timeout.d.ts +1 -1
package/package.json +9 -9
package/src/eval/__tests__/agent-bridge.test.ts +13 -0
package/src/eval/__tests__/{llm-bridge.test.ts → completion-bridge.test.ts} +60 -54
package/src/eval/__tests__/js-context-manager.test.ts +241 -0
package/src/eval/agent-bridge.ts +6 -1
package/src/eval/bridge-timeout.ts +1 -1
package/src/eval/{llm-bridge.ts → completion-bridge.ts} +30 -27
package/src/eval/idle-timeout.ts +1 -1
package/src/eval/js/context-manager.ts +66 -6
package/src/eval/js/shared/prelude.txt +4 -4
package/src/eval/js/tool-bridge.ts +3 -3
package/src/eval/js/worker-entry.ts +6 -0
package/src/eval/py/prelude.py +3 -3
package/src/internal-urls/docs-index.generated.ts +4 -3
package/src/modes/components/tips.txt +1 -1
package/src/prompts/system/tiny-title-system.md +1 -1
package/src/prompts/system/title-system.md +16 -3
package/src/prompts/system/workflow-notice.md +1 -1
package/src/prompts/tools/eval.md +3 -3
package/src/tools/eval-render.ts +2 -2
package/src/tools/eval.ts +1 -1
package/src/utils/title-generator.ts +2 -2
/package/dist/types/eval/__tests__/{llm-bridge.test.d.ts → completion-bridge.test.d.ts} +0 -0

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,26 @@
 ## [Unreleased]
+## [15.10.4] - 2026-06-08
+### Added
+- macOS release binaries are now signed with a Developer ID Application identity (hardened runtime + secure timestamp + JIT/library-validation entitlements) and notarized in CI when the `APPLE_*` signing secrets are configured; releases auto-fall back to ad-hoc signing until then. This makes the shipped binaries Gatekeeper-acceptable, unblocking an official Homebrew submission ([#776](https://github.com/can1357/oh-my-pi/issues/776)). See `docs/macos-signing-notarization.md`.
+- Added a Homebrew install path: `brew install can1357/tap/omp`. The [can1357/homebrew-tap](https://github.com/can1357/homebrew-tap) formula installs the prebuilt release binary, and a `release_brew` CI job regenerates it (version + per-asset sha256) from each published release via `scripts/ci-update-brew-formula.ts` ([#776](https://github.com/can1357/oh-my-pi/issues/776)).
+### Changed
+- Adjusted `completion()` model resolution so the `default` tier now prefers the session’s active model and falls back to the configured default role when needed
+- Rewrote the session auto-title prompt (`prompts/system/title-system.md`) and the `set_title` tool description to ask for a concise, sentence-case title (3-7 words) that captures the session's topic/goal, with good/bad examples and explicit guidance to treat the first message as data (no following embedded links/instructions, no refusals, describe URL/reference asks). The local on-device title prompt (`tiny-title-system.md`) was aligned to the same 3-7 word, sentence-case convention. The deterministic greeting/low-signal filter and the `none` deferral sentinel are unchanged.
+- Renamed the eval oneshot helper from `llm()` to `completion()` in both JavaScript and Python preludes, including status events, prompt docs, and runtime tests.
+### Fixed
+- Fixed `completion()` to always send a non-empty default system prompt when `system` is omitted so providers that require instructions no longer reject requests
+- Fixed structured `completion()` mode to return parsed JSON from plain text output when the model skips the forced `respond` tool call
+- Fixed slow-tier `completion()` reasoning requests to avoid unsupported effort settings by only enabling reasoning on reasoning-capable models and capping effort to supported levels
+- Fixed JS eval worker reset/dispose to close workers gracefully before forced termination, avoiding Bun 1.3.14 N-API teardown crashes with native modules such as `canvas`.
 ## [15.10.3] - 2026-06-08
 ### Added

package/dist/types/eval/__tests__/js-context-manager.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/types/eval/bridge-timeout.d.ts CHANGED Viewed

@@ -2,7 +2,7 @@
  * Timeout suspension for in-flight host-side eval bridge calls.
  *
  * The eval watchdog caps a cell's `timeout` as a budget on the cell runtime's
- * own work. Host-side `agent()` / `parallel()` / `llm()` bridge calls hand
+ * own work. Host-side `agent()` / `parallel()` / `completion()` bridge calls hand
  * control to the outer TypeScript process, where the Python kernel or JS VM is
  * only waiting for a result. While that delegated work is in flight, the cell
  * timeout must be ignored completely; once the bridge returns and the runtime is

package/dist/types/eval/{llm-bridge.d.ts → completion-bridge.d.ts} RENAMED Viewed

@@ -1,25 +1,25 @@
 import type { ToolSession } from "../tools";
 import type { JsStatusEvent } from "./js/shared/types";
-/** Synthetic bridge name reserved for the `llm()` helper across both runtimes. */
-export declare const EVAL_LLM_BRIDGE_NAME = "__llm__";
-type LlmTier = "smol" | "default" | "slow";
-export interface EvalLlmBridgeOptions {
+/** Synthetic bridge name reserved for the `completion()` helper across both runtimes. */
+export declare const EVAL_COMPLETION_BRIDGE_NAME = "__completion__";
+type CompletionTier = "smol" | "default" | "slow";
+export interface EvalCompletionBridgeOptions {
     session: ToolSession;
     signal?: AbortSignal;
     emitStatus?: (event: JsStatusEvent) => void;
 }
-export interface EvalLlmResult {
+export interface EvalCompletionResult {
     text: string;
     details: {
         model: string;
-        tier: LlmTier;
+        tier: CompletionTier;
         structured: boolean;
     };
 }
 /**
- * Run a single stateless completion on behalf of an eval cell's `llm()` call.
+ * Run a single stateless completion on behalf of an eval cell's `completion()` call.
  * Returns a `{ text, details }` value shaped like a {@link callSessionTool}
  * result so the existing bridge transport carries it to either runtime.
  */
-export declare function runEvalLlm(args: unknown, options: EvalLlmBridgeOptions): Promise<EvalLlmResult>;
+export declare function runEvalCompletion(args: unknown, options: EvalCompletionBridgeOptions): Promise<EvalCompletionResult>;
 export {};

package/dist/types/eval/idle-timeout.d.ts CHANGED Viewed

@@ -3,7 +3,7 @@
  *
  * A cell's `timeout` bounds time while the Python kernel or JS VM is in control.
  * Host-side bridge calls can {@link pause} the watchdog so delegated
- * `agent()`/`parallel()`/`llm()` work is ignored completely, then {@link resume}
+ * `agent()`/`parallel()`/`completion()` work is ignored completely, then {@link resume}
  * starts a fresh timeout window once the runtime gets control back.
  *
  * The active timer self-reschedules instead of being torn down on every

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-coding-agent",
-	"version": "15.10.3",
+	"version": "15.10.4",
 	"description": "Coding agent CLI with read, bash, edit, write tools and session management",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -47,14 +47,14 @@
 		"@agentclientprotocol/sdk": "0.22.1",
 		"@babel/parser": "^7.29.7",
 		"@mozilla/readability": "^0.6.0",
-		"@oh-my-pi/hashline": "15.10.3",
-		"@oh-my-pi/omp-stats": "15.10.3",
-		"@oh-my-pi/pi-agent-core": "15.10.3",
-		"@oh-my-pi/pi-ai": "15.10.3",
-		"@oh-my-pi/pi-mnemopi": "15.10.3",
-		"@oh-my-pi/pi-natives": "15.10.3",
-		"@oh-my-pi/pi-tui": "15.10.3",
-		"@oh-my-pi/pi-utils": "15.10.3",
+		"@oh-my-pi/hashline": "15.10.4",
+		"@oh-my-pi/omp-stats": "15.10.4",
+		"@oh-my-pi/pi-agent-core": "15.10.4",
+		"@oh-my-pi/pi-ai": "15.10.4",
+		"@oh-my-pi/pi-mnemopi": "15.10.4",
+		"@oh-my-pi/pi-natives": "15.10.4",
+		"@oh-my-pi/pi-tui": "15.10.4",
+		"@oh-my-pi/pi-utils": "15.10.4",
 		"@opentelemetry/api": "^1.9.1",
 		"@opentelemetry/context-async-hooks": "^2.7.1",
 		"@opentelemetry/exporter-trace-otlp-proto": "^0.218.0",

package/src/eval/__tests__/agent-bridge.test.ts CHANGED Viewed

@@ -205,6 +205,19 @@ describe("runEvalAgent", () => {
 		expect(secondOptions.outputSchema).toBeUndefined();
 	});
+	it("forces LSP off for bridge subagents even when task.enableLsp is on", async () => {
+		mockAgents();
+		const runSpy = vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => singleResult(options));
+		// makeSession() defaults to enableLsp: true and task.enableLsp: true.
+		const session = makeSession();
+		await runEvalAgent({ prompt: "hello" }, { session });
+		const options = runSpy.mock.calls[0]?.[0];
+		if (!options) throw new Error("runSubprocess was not called");
+		expect(options.enableLsp).toBe(false);
+	});
 	it("maps successful and failed subagent results", async () => {
 		mockAgents();
 		const runSpy = vi.spyOn(taskExecutor, "runSubprocess");

package/src/eval/__tests__/{llm-bridge.test.ts → completion-bridge.test.ts} RENAMED Viewed

@@ -10,10 +10,10 @@ import { Settings } from "../../config/settings";
 import type { ToolSession } from "../../tools";
 import { ToolError } from "../../tools/tool-errors";
 import { EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP } from "../bridge-timeout";
+import { runEvalCompletion } from "../completion-bridge";
 import { IdleTimeout } from "../idle-timeout";
 import { disposeAllVmContexts } from "../js/context-manager";
 import { executeJs } from "../js/executor";
-import { runEvalLlm } from "../llm-bridge";
 import { disposeAllKernelSessions, type PythonResult } from "../py/executor";
 function makeModel(provider: string, id: string, extra: Partial<Model<Api>> = {}): Model<Api> {
@@ -98,16 +98,19 @@ function assistant(opts: {
 	};
 }
-async function runPythonLlmInSubprocess(options: { structured: boolean; tempDir: TempDir }): Promise<PythonResult> {
+async function runPythonCompletionInSubprocess(options: {
+	structured: boolean;
+	tempDir: TempDir;
+}): Promise<PythonResult> {
 	const repoRoot = path.resolve(import.meta.dir, "../../../..");
-	const scriptPath = path.join(options.tempDir.path(), "run-python-llm.ts");
-	const resultPath = path.join(options.tempDir.path(), "python-llm-result.json");
+	const scriptPath = path.join(options.tempDir.path(), "run-python-completion.ts");
+	const resultPath = path.join(options.tempDir.path(), "python-completion-result.json");
 	const aiPath = path.resolve(import.meta.dir, "../../../../ai/src/index.ts");
 	const executorPath = path.resolve(import.meta.dir, "../py/executor.ts");
 	const settingsPath = path.resolve(import.meta.dir, "../../config/settings.ts");
 	const code = options.structured
-		? 'import json\nprint(json.dumps(llm("hi", schema={"type": "object"})))'
-		: 'print(llm("hi", model="smol"))';
+		? 'import json\nprint(json.dumps(completion("hi", schema={"type": "object"})))'
+		: 'print(completion("hi", model="smol"))';
 	const responseContent = options.structured
 		? '[{ type: "toolCall", id: "tc-1", name: "respond", arguments: { ok: true } }]'
 		: '[{ type: "text", text: "hello from python" }]';
@@ -153,7 +156,7 @@ vi.spyOn(ai, "completeSimple").mockResolvedValue({
 });
 const result = await executePython(${JSON.stringify(code)}, {
 	cwd: ${JSON.stringify(options.tempDir.path())},
-	sessionId: ${JSON.stringify(`py-llm:${options.structured ? "struct" : "plain"}`)},
+	sessionId: ${JSON.stringify(`py-completion:${options.structured ? "struct" : "plain"}`)},
 	sessionFile: ${JSON.stringify(path.join(options.tempDir.path(), "session.jsonl"))},
 	toolSession: session,
 	kernelMode: "per-call",
@@ -165,11 +168,12 @@ process.exit(0);
 	const child = await $`bun ${scriptPath}`.cwd(repoRoot).quiet().nothrow();
 	const stdout = child.stdout.toString();
 	const stderr = child.stderr.toString();
-	if (child.exitCode !== 0) throw new Error(stderr || stdout || `Python llm subprocess exited with ${child.exitCode}`);
+	if (child.exitCode !== 0)
+		throw new Error(stderr || stdout || `Python completion subprocess exited with ${child.exitCode}`);
 	return (await Bun.file(resultPath).json()) as PythonResult;
 }
-describe("runEvalLlm", () => {
+describe("runEvalCompletion", () => {
 	afterEach(() => {
 		vi.restoreAllMocks();
 	});
@@ -178,9 +182,9 @@ describe("runEvalLlm", () => {
 		const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
 		const session = makeSession();
-		await runEvalLlm({ prompt: "q", model: "smol" }, { session });
-		await runEvalLlm({ prompt: "q", model: "default" }, { session });
-		await runEvalLlm({ prompt: "q", model: "slow" }, { session });
+		await runEvalCompletion({ prompt: "q", model: "smol" }, { session });
+		await runEvalCompletion({ prompt: "q", model: "default" }, { session });
+		await runEvalCompletion({ prompt: "q", model: "slow" }, { session });
 		const resolved = spy.mock.calls.map(call => {
 			const model = call[0] as Model<Api>;
@@ -193,7 +197,7 @@ describe("runEvalLlm", () => {
 		const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
 		const session = makeSession({ available: [SMOL, DEFAULT, SLOW], activeModel: "p/slow" });
-		await runEvalLlm({ prompt: "q", model: "default" }, { session });
+		await runEvalCompletion({ prompt: "q", model: "default" }, { session });
 		const model = spy.mock.calls[0]?.[0] as Model<Api>;
 		expect(`${model.provider}/${model.id}`).toBe("p/slow");
@@ -201,7 +205,7 @@ describe("runEvalLlm", () => {
 	it("returns the completion text in plain mode", async () => {
 		vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "the answer" }));
-		const result = await runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() });
+		const result = await runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() });
 		expect(result.text).toBe("the answer");
 		expect(result.details).toEqual({ model: "p/smol", tier: "smol", structured: false });
 	});
@@ -209,10 +213,10 @@ describe("runEvalLlm", () => {
 	it("supplies a non-empty systemPrompt when system is omitted (codex 'Instructions are required' guard)", async () => {
 		// The openai-codex Responses transformer drops `instructions` when no
 		// system prompt is provided, and the remote endpoint then 400s with
-		// "Instructions are required". runEvalLlm must always carry a non-empty
-		// systemPrompt so `llm("…")` without a `system` argument works.
+		// "Instructions are required". runEvalCompletion must always carry a non-empty
+		// systemPrompt so `completion("…")` without a `system` argument works.
 		const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
-		await runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() });
+		await runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() });
 		const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
 		expect(ctx.systemPrompt).toBeDefined();
 		expect(ctx.systemPrompt?.length).toBeGreaterThan(0);
@@ -221,7 +225,7 @@ describe("runEvalLlm", () => {
 	it("honors an explicit system prompt instead of overriding it", async () => {
 		const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
-		await runEvalLlm({ prompt: "q", model: "smol", system: "Be terse." }, { session: makeSession() });
+		await runEvalCompletion({ prompt: "q", model: "smol", system: "Be terse." }, { session: makeSession() });
 		const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
 		expect(ctx.systemPrompt).toEqual(["Be terse."]);
 	});
@@ -230,7 +234,7 @@ describe("runEvalLlm", () => {
 		const spy = vi
 			.spyOn(ai, "completeSimple")
 			.mockResolvedValue(assistant({ toolCall: { name: "respond", arguments: { answer: 42 } } }));
-		const result = await runEvalLlm(
+		const result = await runEvalCompletion(
 			{ prompt: "q", model: "smol", schema: { type: "object", properties: { answer: { type: "number" } } } },
 			{ session: makeSession() },
 		);
@@ -246,7 +250,7 @@ describe("runEvalLlm", () => {
 	it("falls back to JSON embedded in text when the model skips the respond tool", async () => {
 		vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: 'here: {"answer": 7}' }));
-		const result = await runEvalLlm(
+		const result = await runEvalCompletion(
 			{ prompt: "q", model: "smol", schema: { type: "object" } },
 			{ session: makeSession() },
 		);
@@ -257,8 +261,8 @@ describe("runEvalLlm", () => {
 		const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
 		const session = makeSession({ available: [SMOL, DEFAULT, REASONING_SLOW] });
-		await runEvalLlm({ prompt: "q", model: "smol" }, { session });
-		await runEvalLlm({ prompt: "q", model: "slow" }, { session });
+		await runEvalCompletion({ prompt: "q", model: "smol" }, { session });
+		await runEvalCompletion({ prompt: "q", model: "slow" }, { session });
 		const smolOpts = spy.mock.calls[0]?.[2] as { reasoning?: unknown };
 		const slowOpts = spy.mock.calls[1]?.[2] as { reasoning?: unknown };
@@ -269,47 +273,49 @@ describe("runEvalLlm", () => {
 	it("does not request reasoning for the slow tier on a non-reasoning model", async () => {
 		const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
 		// SLOW is reasoning:false — must not trip requireSupportedEffort downstream.
-		const result = await runEvalLlm({ prompt: "q", model: "slow" }, { session: makeSession() });
+		const result = await runEvalCompletion({ prompt: "q", model: "slow" }, { session: makeSession() });
 		expect(result.text).toBe("ok");
 		const opts = spy.mock.calls[0]?.[2] as { reasoning?: unknown };
 		expect(opts.reasoning).toBeUndefined();
 	});
 	it("throws ToolError on invalid arguments", async () => {
-		await expect(runEvalLlm({ prompt: "" }, { session: makeSession() })).rejects.toBeInstanceOf(ToolError);
-		await expect(runEvalLlm({ prompt: "q", model: "huge" }, { session: makeSession() })).rejects.toBeInstanceOf(
-			ToolError,
-		);
+		await expect(runEvalCompletion({ prompt: "" }, { session: makeSession() })).rejects.toBeInstanceOf(ToolError);
+		await expect(
+			runEvalCompletion({ prompt: "q", model: "huge" }, { session: makeSession() }),
+		).rejects.toBeInstanceOf(ToolError);
 	});
 	it("throws ToolError when no model resolves for the tier", async () => {
 		const session = makeSession({ available: [DEFAULT], roles: { smol: "missing/model" } });
-		await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
+		await expect(runEvalCompletion({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
 	});
 	it("throws ToolError when the resolved model has no API key", async () => {
 		const session = makeSession({ apiKey: null });
-		await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
+		await expect(runEvalCompletion({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
 	});
 	it("maps error and aborted stop reasons to ToolError", async () => {
 		vi.spyOn(ai, "completeSimple").mockResolvedValueOnce(assistant({ stopReason: "error", errorMessage: "boom" }));
-		await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toThrow("boom");
+		await expect(runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toThrow(
+			"boom",
+		);
 		vi.spyOn(ai, "completeSimple").mockResolvedValueOnce(assistant({ stopReason: "aborted" }));
-		await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toBeInstanceOf(
-			ToolError,
-		);
+		await expect(
+			runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() }),
+		).rejects.toBeInstanceOf(ToolError);
 	});
 	it("throws ToolError when plain mode produces no text", async () => {
 		vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "" }));
-		await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toBeInstanceOf(
-			ToolError,
-		);
+		await expect(
+			runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() }),
+		).rejects.toBeInstanceOf(ToolError);
 	});
-	it("pauses the idle watchdog while a slow llm() request is in flight", async () => {
+	it("pauses the idle watchdog while a slow completion() request is in flight", async () => {
 		// A oneshot completion emits no status until it returns; delegated model
 		// time must be invisible to the eval timeout budget.
 		vi.spyOn(ai, "completeSimple").mockImplementation(async () => {
@@ -319,7 +325,7 @@ describe("runEvalLlm", () => {
 		const ops: string[] = [];
 		using idle = new IdleTimeout(60);
-		const result = await runEvalLlm(
+		const result = await runEvalCompletion(
 			{ prompt: "q", model: "smol" },
 			{
 				session: makeSession(),
@@ -333,12 +339,12 @@ describe("runEvalLlm", () => {
 		);
 		expect(result.text).toBe("the answer");
-		expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP, "llm"]);
+		expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP, "completion"]);
 		expect(idle.signal.aborted).toBe(false);
 	});
 });
-describe("llm() through eval runtimes", () => {
+describe("completion() through eval runtimes", () => {
 	afterEach(() => {
 		vi.restoreAllMocks();
 	});
@@ -348,13 +354,13 @@ describe("llm() through eval runtimes", () => {
 		await disposeAllKernelSessions();
 	});
-	it("exposes llm() in the JavaScript runtime", async () => {
-		using tempDir = TempDir.createSync("@omp-eval-llm-js-");
+	it("exposes completion() in the JavaScript runtime", async () => {
+		using tempDir = TempDir.createSync("@omp-eval-completion-js-");
 		const sessionFile = path.join(tempDir.path(), "session.jsonl");
-		const sessionId = `js-llm:${crypto.randomUUID()}`;
+		const sessionId = `js-completion:${crypto.randomUUID()}`;
 		vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "hello from smol" }));
-		const result = await executeJs('return await llm("hi", { model: "smol" });', {
+		const result = await executeJs('return await completion("hi", { model: "smol" });', {
 			cwd: tempDir.path(),
 			sessionId,
 			session: makeSession(),
@@ -365,16 +371,16 @@ describe("llm() through eval runtimes", () => {
 		expect(result.output.trim()).toBe("hello from smol");
 	});
-	it("parses structured llm() output in the JavaScript runtime", async () => {
-		using tempDir = TempDir.createSync("@omp-eval-llm-js-struct-");
+	it("parses structured completion() output in the JavaScript runtime", async () => {
+		using tempDir = TempDir.createSync("@omp-eval-completion-js-struct-");
 		const sessionFile = path.join(tempDir.path(), "session.jsonl");
-		const sessionId = `js-llm-struct:${crypto.randomUUID()}`;
+		const sessionId = `js-completion-struct:${crypto.randomUUID()}`;
 		vi.spyOn(ai, "completeSimple").mockResolvedValue(
 			assistant({ toolCall: { name: "respond", arguments: { ok: true, n: 3 } } }),
 		);
 		const result = await executeJs(
-			'const r = await llm("hi", { schema: { type: "object" } }); return JSON.stringify(r);',
+			'const r = await completion("hi", { schema: { type: "object" } }); return JSON.stringify(r);',
 			{ cwd: tempDir.path(), sessionId, session: makeSession(), sessionFile },
 		);
@@ -382,10 +388,10 @@ describe("llm() through eval runtimes", () => {
 		expect(JSON.parse(result.output.trim())).toEqual({ ok: true, n: 3 });
 	});
-	it("exposes llm() in the Python runtime", async () => {
-		const tempDir = TempDir.createSync("@omp-eval-llm-py-");
+	it("exposes completion() in the Python runtime", async () => {
+		const tempDir = TempDir.createSync("@omp-eval-completion-py-");
 		try {
-			const result = await runPythonLlmInSubprocess({ structured: false, tempDir });
+			const result = await runPythonCompletionInSubprocess({ structured: false, tempDir });
 			expect(result.exitCode).toBe(0);
 			expect(result.output.trim()).toBe("hello from python");
 		} finally {
@@ -393,10 +399,10 @@ describe("llm() through eval runtimes", () => {
 		}
 	});
-	it("parses structured llm() output in the Python runtime", async () => {
-		const tempDir = TempDir.createSync("@omp-eval-llm-py-struct-");
+	it("parses structured completion() output in the Python runtime", async () => {
+		const tempDir = TempDir.createSync("@omp-eval-completion-py-struct-");
 		try {
-			const result = await runPythonLlmInSubprocess({ structured: true, tempDir });
+			const result = await runPythonCompletionInSubprocess({ structured: true, tempDir });
 			expect(result.exitCode).toBe(0);
 			expect(JSON.parse(result.output.trim())).toEqual({ ok: true });
 		} finally {