@oh-my-pi/pi-coding-agent 15.10.3 → 15.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,26 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.10.4] - 2026-06-08
6
+
7
+ ### Added
8
+
9
+ - macOS release binaries are now signed with a Developer ID Application identity (hardened runtime + secure timestamp + JIT/library-validation entitlements) and notarized in CI when the `APPLE_*` signing secrets are configured; releases auto-fall back to ad-hoc signing until then. This makes the shipped binaries Gatekeeper-acceptable, unblocking an official Homebrew submission ([#776](https://github.com/can1357/oh-my-pi/issues/776)). See `docs/macos-signing-notarization.md`.
10
+ - Added a Homebrew install path: `brew install can1357/tap/omp`. The [can1357/homebrew-tap](https://github.com/can1357/homebrew-tap) formula installs the prebuilt release binary, and a `release_brew` CI job regenerates it (version + per-asset sha256) from each published release via `scripts/ci-update-brew-formula.ts` ([#776](https://github.com/can1357/oh-my-pi/issues/776)).
11
+
12
+ ### Changed
13
+
14
+ - Adjusted `completion()` model resolution so the `default` tier now prefers the session’s active model and falls back to the configured default role when needed
15
+ - Rewrote the session auto-title prompt (`prompts/system/title-system.md`) and the `set_title` tool description to ask for a concise, sentence-case title (3-7 words) that captures the session's topic/goal, with good/bad examples and explicit guidance to treat the first message as data (no following embedded links/instructions, no refusals, describe URL/reference asks). The local on-device title prompt (`tiny-title-system.md`) was aligned to the same 3-7 word, sentence-case convention. The deterministic greeting/low-signal filter and the `none` deferral sentinel are unchanged.
16
+ - Renamed the eval oneshot helper from `llm()` to `completion()` in both JavaScript and Python preludes, including status events, prompt docs, and runtime tests.
17
+
18
+ ### Fixed
19
+
20
+ - Fixed `completion()` to always send a non-empty default system prompt when `system` is omitted so providers that require instructions no longer reject requests
21
+ - Fixed structured `completion()` mode to return parsed JSON from plain text output when the model skips the forced `respond` tool call
22
+ - Fixed slow-tier `completion()` reasoning requests to avoid unsupported effort settings by only enabling reasoning on reasoning-capable models and capping effort to supported levels
23
+ - Fixed JS eval worker reset/dispose to close workers gracefully before forced termination, avoiding Bun 1.3.14 N-API teardown crashes with native modules such as `canvas`.
24
+
5
25
  ## [15.10.3] - 2026-06-08
6
26
 
7
27
  ### Added
@@ -2,7 +2,7 @@
2
2
  * Timeout suspension for in-flight host-side eval bridge calls.
3
3
  *
4
4
  * The eval watchdog caps a cell's `timeout` as a budget on the cell runtime's
5
- * own work. Host-side `agent()` / `parallel()` / `llm()` bridge calls hand
5
+ * own work. Host-side `agent()` / `parallel()` / `completion()` bridge calls hand
6
6
  * control to the outer TypeScript process, where the Python kernel or JS VM is
7
7
  * only waiting for a result. While that delegated work is in flight, the cell
8
8
  * timeout must be ignored completely; once the bridge returns and the runtime is
@@ -1,25 +1,25 @@
1
1
  import type { ToolSession } from "../tools";
2
2
  import type { JsStatusEvent } from "./js/shared/types";
3
- /** Synthetic bridge name reserved for the `llm()` helper across both runtimes. */
4
- export declare const EVAL_LLM_BRIDGE_NAME = "__llm__";
5
- type LlmTier = "smol" | "default" | "slow";
6
- export interface EvalLlmBridgeOptions {
3
+ /** Synthetic bridge name reserved for the `completion()` helper across both runtimes. */
4
+ export declare const EVAL_COMPLETION_BRIDGE_NAME = "__completion__";
5
+ type CompletionTier = "smol" | "default" | "slow";
6
+ export interface EvalCompletionBridgeOptions {
7
7
  session: ToolSession;
8
8
  signal?: AbortSignal;
9
9
  emitStatus?: (event: JsStatusEvent) => void;
10
10
  }
11
- export interface EvalLlmResult {
11
+ export interface EvalCompletionResult {
12
12
  text: string;
13
13
  details: {
14
14
  model: string;
15
- tier: LlmTier;
15
+ tier: CompletionTier;
16
16
  structured: boolean;
17
17
  };
18
18
  }
19
19
  /**
20
- * Run a single stateless completion on behalf of an eval cell's `llm()` call.
20
+ * Run a single stateless completion on behalf of an eval cell's `completion()` call.
21
21
  * Returns a `{ text, details }` value shaped like a {@link callSessionTool}
22
22
  * result so the existing bridge transport carries it to either runtime.
23
23
  */
24
- export declare function runEvalLlm(args: unknown, options: EvalLlmBridgeOptions): Promise<EvalLlmResult>;
24
+ export declare function runEvalCompletion(args: unknown, options: EvalCompletionBridgeOptions): Promise<EvalCompletionResult>;
25
25
  export {};
@@ -3,7 +3,7 @@
3
3
  *
4
4
  * A cell's `timeout` bounds time while the Python kernel or JS VM is in control.
5
5
  * Host-side bridge calls can {@link pause} the watchdog so delegated
6
- * `agent()`/`parallel()`/`llm()` work is ignored completely, then {@link resume}
6
+ * `agent()`/`parallel()`/`completion()` work is ignored completely, then {@link resume}
7
7
  * starts a fresh timeout window once the runtime gets control back.
8
8
  *
9
9
  * The active timer self-reschedules instead of being torn down on every
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-coding-agent",
4
- "version": "15.10.3",
4
+ "version": "15.10.4",
5
5
  "description": "Coding agent CLI with read, bash, edit, write tools and session management",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -47,14 +47,14 @@
47
47
  "@agentclientprotocol/sdk": "0.22.1",
48
48
  "@babel/parser": "^7.29.7",
49
49
  "@mozilla/readability": "^0.6.0",
50
- "@oh-my-pi/hashline": "15.10.3",
51
- "@oh-my-pi/omp-stats": "15.10.3",
52
- "@oh-my-pi/pi-agent-core": "15.10.3",
53
- "@oh-my-pi/pi-ai": "15.10.3",
54
- "@oh-my-pi/pi-mnemopi": "15.10.3",
55
- "@oh-my-pi/pi-natives": "15.10.3",
56
- "@oh-my-pi/pi-tui": "15.10.3",
57
- "@oh-my-pi/pi-utils": "15.10.3",
50
+ "@oh-my-pi/hashline": "15.10.4",
51
+ "@oh-my-pi/omp-stats": "15.10.4",
52
+ "@oh-my-pi/pi-agent-core": "15.10.4",
53
+ "@oh-my-pi/pi-ai": "15.10.4",
54
+ "@oh-my-pi/pi-mnemopi": "15.10.4",
55
+ "@oh-my-pi/pi-natives": "15.10.4",
56
+ "@oh-my-pi/pi-tui": "15.10.4",
57
+ "@oh-my-pi/pi-utils": "15.10.4",
58
58
  "@opentelemetry/api": "^1.9.1",
59
59
  "@opentelemetry/context-async-hooks": "^2.7.1",
60
60
  "@opentelemetry/exporter-trace-otlp-proto": "^0.218.0",
@@ -205,6 +205,19 @@ describe("runEvalAgent", () => {
205
205
  expect(secondOptions.outputSchema).toBeUndefined();
206
206
  });
207
207
 
208
+ it("forces LSP off for bridge subagents even when task.enableLsp is on", async () => {
209
+ mockAgents();
210
+ const runSpy = vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => singleResult(options));
211
+ // makeSession() defaults to enableLsp: true and task.enableLsp: true.
212
+ const session = makeSession();
213
+
214
+ await runEvalAgent({ prompt: "hello" }, { session });
215
+
216
+ const options = runSpy.mock.calls[0]?.[0];
217
+ if (!options) throw new Error("runSubprocess was not called");
218
+ expect(options.enableLsp).toBe(false);
219
+ });
220
+
208
221
  it("maps successful and failed subagent results", async () => {
209
222
  mockAgents();
210
223
  const runSpy = vi.spyOn(taskExecutor, "runSubprocess");
@@ -10,10 +10,10 @@ import { Settings } from "../../config/settings";
10
10
  import type { ToolSession } from "../../tools";
11
11
  import { ToolError } from "../../tools/tool-errors";
12
12
  import { EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP } from "../bridge-timeout";
13
+ import { runEvalCompletion } from "../completion-bridge";
13
14
  import { IdleTimeout } from "../idle-timeout";
14
15
  import { disposeAllVmContexts } from "../js/context-manager";
15
16
  import { executeJs } from "../js/executor";
16
- import { runEvalLlm } from "../llm-bridge";
17
17
  import { disposeAllKernelSessions, type PythonResult } from "../py/executor";
18
18
 
19
19
  function makeModel(provider: string, id: string, extra: Partial<Model<Api>> = {}): Model<Api> {
@@ -98,16 +98,19 @@ function assistant(opts: {
98
98
  };
99
99
  }
100
100
 
101
- async function runPythonLlmInSubprocess(options: { structured: boolean; tempDir: TempDir }): Promise<PythonResult> {
101
+ async function runPythonCompletionInSubprocess(options: {
102
+ structured: boolean;
103
+ tempDir: TempDir;
104
+ }): Promise<PythonResult> {
102
105
  const repoRoot = path.resolve(import.meta.dir, "../../../..");
103
- const scriptPath = path.join(options.tempDir.path(), "run-python-llm.ts");
104
- const resultPath = path.join(options.tempDir.path(), "python-llm-result.json");
106
+ const scriptPath = path.join(options.tempDir.path(), "run-python-completion.ts");
107
+ const resultPath = path.join(options.tempDir.path(), "python-completion-result.json");
105
108
  const aiPath = path.resolve(import.meta.dir, "../../../../ai/src/index.ts");
106
109
  const executorPath = path.resolve(import.meta.dir, "../py/executor.ts");
107
110
  const settingsPath = path.resolve(import.meta.dir, "../../config/settings.ts");
108
111
  const code = options.structured
109
- ? 'import json\nprint(json.dumps(llm("hi", schema={"type": "object"})))'
110
- : 'print(llm("hi", model="smol"))';
112
+ ? 'import json\nprint(json.dumps(completion("hi", schema={"type": "object"})))'
113
+ : 'print(completion("hi", model="smol"))';
111
114
  const responseContent = options.structured
112
115
  ? '[{ type: "toolCall", id: "tc-1", name: "respond", arguments: { ok: true } }]'
113
116
  : '[{ type: "text", text: "hello from python" }]';
@@ -153,7 +156,7 @@ vi.spyOn(ai, "completeSimple").mockResolvedValue({
153
156
  });
154
157
  const result = await executePython(${JSON.stringify(code)}, {
155
158
  cwd: ${JSON.stringify(options.tempDir.path())},
156
- sessionId: ${JSON.stringify(`py-llm:${options.structured ? "struct" : "plain"}`)},
159
+ sessionId: ${JSON.stringify(`py-completion:${options.structured ? "struct" : "plain"}`)},
157
160
  sessionFile: ${JSON.stringify(path.join(options.tempDir.path(), "session.jsonl"))},
158
161
  toolSession: session,
159
162
  kernelMode: "per-call",
@@ -165,11 +168,12 @@ process.exit(0);
165
168
  const child = await $`bun ${scriptPath}`.cwd(repoRoot).quiet().nothrow();
166
169
  const stdout = child.stdout.toString();
167
170
  const stderr = child.stderr.toString();
168
- if (child.exitCode !== 0) throw new Error(stderr || stdout || `Python llm subprocess exited with ${child.exitCode}`);
171
+ if (child.exitCode !== 0)
172
+ throw new Error(stderr || stdout || `Python completion subprocess exited with ${child.exitCode}`);
169
173
  return (await Bun.file(resultPath).json()) as PythonResult;
170
174
  }
171
175
 
172
- describe("runEvalLlm", () => {
176
+ describe("runEvalCompletion", () => {
173
177
  afterEach(() => {
174
178
  vi.restoreAllMocks();
175
179
  });
@@ -178,9 +182,9 @@ describe("runEvalLlm", () => {
178
182
  const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
179
183
  const session = makeSession();
180
184
 
181
- await runEvalLlm({ prompt: "q", model: "smol" }, { session });
182
- await runEvalLlm({ prompt: "q", model: "default" }, { session });
183
- await runEvalLlm({ prompt: "q", model: "slow" }, { session });
185
+ await runEvalCompletion({ prompt: "q", model: "smol" }, { session });
186
+ await runEvalCompletion({ prompt: "q", model: "default" }, { session });
187
+ await runEvalCompletion({ prompt: "q", model: "slow" }, { session });
184
188
 
185
189
  const resolved = spy.mock.calls.map(call => {
186
190
  const model = call[0] as Model<Api>;
@@ -193,7 +197,7 @@ describe("runEvalLlm", () => {
193
197
  const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
194
198
  const session = makeSession({ available: [SMOL, DEFAULT, SLOW], activeModel: "p/slow" });
195
199
 
196
- await runEvalLlm({ prompt: "q", model: "default" }, { session });
200
+ await runEvalCompletion({ prompt: "q", model: "default" }, { session });
197
201
 
198
202
  const model = spy.mock.calls[0]?.[0] as Model<Api>;
199
203
  expect(`${model.provider}/${model.id}`).toBe("p/slow");
@@ -201,7 +205,7 @@ describe("runEvalLlm", () => {
201
205
 
202
206
  it("returns the completion text in plain mode", async () => {
203
207
  vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "the answer" }));
204
- const result = await runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() });
208
+ const result = await runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() });
205
209
  expect(result.text).toBe("the answer");
206
210
  expect(result.details).toEqual({ model: "p/smol", tier: "smol", structured: false });
207
211
  });
@@ -209,10 +213,10 @@ describe("runEvalLlm", () => {
209
213
  it("supplies a non-empty systemPrompt when system is omitted (codex 'Instructions are required' guard)", async () => {
210
214
  // The openai-codex Responses transformer drops `instructions` when no
211
215
  // system prompt is provided, and the remote endpoint then 400s with
212
- // "Instructions are required". runEvalLlm must always carry a non-empty
213
- // systemPrompt so `llm("…")` without a `system` argument works.
216
+ // "Instructions are required". runEvalCompletion must always carry a non-empty
217
+ // systemPrompt so `completion("…")` without a `system` argument works.
214
218
  const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
215
- await runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() });
219
+ await runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() });
216
220
  const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
217
221
  expect(ctx.systemPrompt).toBeDefined();
218
222
  expect(ctx.systemPrompt?.length).toBeGreaterThan(0);
@@ -221,7 +225,7 @@ describe("runEvalLlm", () => {
221
225
 
222
226
  it("honors an explicit system prompt instead of overriding it", async () => {
223
227
  const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
224
- await runEvalLlm({ prompt: "q", model: "smol", system: "Be terse." }, { session: makeSession() });
228
+ await runEvalCompletion({ prompt: "q", model: "smol", system: "Be terse." }, { session: makeSession() });
225
229
  const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
226
230
  expect(ctx.systemPrompt).toEqual(["Be terse."]);
227
231
  });
@@ -230,7 +234,7 @@ describe("runEvalLlm", () => {
230
234
  const spy = vi
231
235
  .spyOn(ai, "completeSimple")
232
236
  .mockResolvedValue(assistant({ toolCall: { name: "respond", arguments: { answer: 42 } } }));
233
- const result = await runEvalLlm(
237
+ const result = await runEvalCompletion(
234
238
  { prompt: "q", model: "smol", schema: { type: "object", properties: { answer: { type: "number" } } } },
235
239
  { session: makeSession() },
236
240
  );
@@ -246,7 +250,7 @@ describe("runEvalLlm", () => {
246
250
 
247
251
  it("falls back to JSON embedded in text when the model skips the respond tool", async () => {
248
252
  vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: 'here: {"answer": 7}' }));
249
- const result = await runEvalLlm(
253
+ const result = await runEvalCompletion(
250
254
  { prompt: "q", model: "smol", schema: { type: "object" } },
251
255
  { session: makeSession() },
252
256
  );
@@ -257,8 +261,8 @@ describe("runEvalLlm", () => {
257
261
  const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
258
262
  const session = makeSession({ available: [SMOL, DEFAULT, REASONING_SLOW] });
259
263
 
260
- await runEvalLlm({ prompt: "q", model: "smol" }, { session });
261
- await runEvalLlm({ prompt: "q", model: "slow" }, { session });
264
+ await runEvalCompletion({ prompt: "q", model: "smol" }, { session });
265
+ await runEvalCompletion({ prompt: "q", model: "slow" }, { session });
262
266
 
263
267
  const smolOpts = spy.mock.calls[0]?.[2] as { reasoning?: unknown };
264
268
  const slowOpts = spy.mock.calls[1]?.[2] as { reasoning?: unknown };
@@ -269,47 +273,49 @@ describe("runEvalLlm", () => {
269
273
  it("does not request reasoning for the slow tier on a non-reasoning model", async () => {
270
274
  const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
271
275
  // SLOW is reasoning:false — must not trip requireSupportedEffort downstream.
272
- const result = await runEvalLlm({ prompt: "q", model: "slow" }, { session: makeSession() });
276
+ const result = await runEvalCompletion({ prompt: "q", model: "slow" }, { session: makeSession() });
273
277
  expect(result.text).toBe("ok");
274
278
  const opts = spy.mock.calls[0]?.[2] as { reasoning?: unknown };
275
279
  expect(opts.reasoning).toBeUndefined();
276
280
  });
277
281
 
278
282
  it("throws ToolError on invalid arguments", async () => {
279
- await expect(runEvalLlm({ prompt: "" }, { session: makeSession() })).rejects.toBeInstanceOf(ToolError);
280
- await expect(runEvalLlm({ prompt: "q", model: "huge" }, { session: makeSession() })).rejects.toBeInstanceOf(
281
- ToolError,
282
- );
283
+ await expect(runEvalCompletion({ prompt: "" }, { session: makeSession() })).rejects.toBeInstanceOf(ToolError);
284
+ await expect(
285
+ runEvalCompletion({ prompt: "q", model: "huge" }, { session: makeSession() }),
286
+ ).rejects.toBeInstanceOf(ToolError);
283
287
  });
284
288
 
285
289
  it("throws ToolError when no model resolves for the tier", async () => {
286
290
  const session = makeSession({ available: [DEFAULT], roles: { smol: "missing/model" } });
287
- await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
291
+ await expect(runEvalCompletion({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
288
292
  });
289
293
 
290
294
  it("throws ToolError when the resolved model has no API key", async () => {
291
295
  const session = makeSession({ apiKey: null });
292
- await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
296
+ await expect(runEvalCompletion({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
293
297
  });
294
298
 
295
299
  it("maps error and aborted stop reasons to ToolError", async () => {
296
300
  vi.spyOn(ai, "completeSimple").mockResolvedValueOnce(assistant({ stopReason: "error", errorMessage: "boom" }));
297
- await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toThrow("boom");
301
+ await expect(runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toThrow(
302
+ "boom",
303
+ );
298
304
 
299
305
  vi.spyOn(ai, "completeSimple").mockResolvedValueOnce(assistant({ stopReason: "aborted" }));
300
- await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toBeInstanceOf(
301
- ToolError,
302
- );
306
+ await expect(
307
+ runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() }),
308
+ ).rejects.toBeInstanceOf(ToolError);
303
309
  });
304
310
 
305
311
  it("throws ToolError when plain mode produces no text", async () => {
306
312
  vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "" }));
307
- await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toBeInstanceOf(
308
- ToolError,
309
- );
313
+ await expect(
314
+ runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() }),
315
+ ).rejects.toBeInstanceOf(ToolError);
310
316
  });
311
317
 
312
- it("pauses the idle watchdog while a slow llm() request is in flight", async () => {
318
+ it("pauses the idle watchdog while a slow completion() request is in flight", async () => {
313
319
  // A oneshot completion emits no status until it returns; delegated model
314
320
  // time must be invisible to the eval timeout budget.
315
321
  vi.spyOn(ai, "completeSimple").mockImplementation(async () => {
@@ -319,7 +325,7 @@ describe("runEvalLlm", () => {
319
325
 
320
326
  const ops: string[] = [];
321
327
  using idle = new IdleTimeout(60);
322
- const result = await runEvalLlm(
328
+ const result = await runEvalCompletion(
323
329
  { prompt: "q", model: "smol" },
324
330
  {
325
331
  session: makeSession(),
@@ -333,12 +339,12 @@ describe("runEvalLlm", () => {
333
339
  );
334
340
 
335
341
  expect(result.text).toBe("the answer");
336
- expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP, "llm"]);
342
+ expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP, "completion"]);
337
343
  expect(idle.signal.aborted).toBe(false);
338
344
  });
339
345
  });
340
346
 
341
- describe("llm() through eval runtimes", () => {
347
+ describe("completion() through eval runtimes", () => {
342
348
  afterEach(() => {
343
349
  vi.restoreAllMocks();
344
350
  });
@@ -348,13 +354,13 @@ describe("llm() through eval runtimes", () => {
348
354
  await disposeAllKernelSessions();
349
355
  });
350
356
 
351
- it("exposes llm() in the JavaScript runtime", async () => {
352
- using tempDir = TempDir.createSync("@omp-eval-llm-js-");
357
+ it("exposes completion() in the JavaScript runtime", async () => {
358
+ using tempDir = TempDir.createSync("@omp-eval-completion-js-");
353
359
  const sessionFile = path.join(tempDir.path(), "session.jsonl");
354
- const sessionId = `js-llm:${crypto.randomUUID()}`;
360
+ const sessionId = `js-completion:${crypto.randomUUID()}`;
355
361
  vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "hello from smol" }));
356
362
 
357
- const result = await executeJs('return await llm("hi", { model: "smol" });', {
363
+ const result = await executeJs('return await completion("hi", { model: "smol" });', {
358
364
  cwd: tempDir.path(),
359
365
  sessionId,
360
366
  session: makeSession(),
@@ -365,16 +371,16 @@ describe("llm() through eval runtimes", () => {
365
371
  expect(result.output.trim()).toBe("hello from smol");
366
372
  });
367
373
 
368
- it("parses structured llm() output in the JavaScript runtime", async () => {
369
- using tempDir = TempDir.createSync("@omp-eval-llm-js-struct-");
374
+ it("parses structured completion() output in the JavaScript runtime", async () => {
375
+ using tempDir = TempDir.createSync("@omp-eval-completion-js-struct-");
370
376
  const sessionFile = path.join(tempDir.path(), "session.jsonl");
371
- const sessionId = `js-llm-struct:${crypto.randomUUID()}`;
377
+ const sessionId = `js-completion-struct:${crypto.randomUUID()}`;
372
378
  vi.spyOn(ai, "completeSimple").mockResolvedValue(
373
379
  assistant({ toolCall: { name: "respond", arguments: { ok: true, n: 3 } } }),
374
380
  );
375
381
 
376
382
  const result = await executeJs(
377
- 'const r = await llm("hi", { schema: { type: "object" } }); return JSON.stringify(r);',
383
+ 'const r = await completion("hi", { schema: { type: "object" } }); return JSON.stringify(r);',
378
384
  { cwd: tempDir.path(), sessionId, session: makeSession(), sessionFile },
379
385
  );
380
386
 
@@ -382,10 +388,10 @@ describe("llm() through eval runtimes", () => {
382
388
  expect(JSON.parse(result.output.trim())).toEqual({ ok: true, n: 3 });
383
389
  });
384
390
 
385
- it("exposes llm() in the Python runtime", async () => {
386
- const tempDir = TempDir.createSync("@omp-eval-llm-py-");
391
+ it("exposes completion() in the Python runtime", async () => {
392
+ const tempDir = TempDir.createSync("@omp-eval-completion-py-");
387
393
  try {
388
- const result = await runPythonLlmInSubprocess({ structured: false, tempDir });
394
+ const result = await runPythonCompletionInSubprocess({ structured: false, tempDir });
389
395
  expect(result.exitCode).toBe(0);
390
396
  expect(result.output.trim()).toBe("hello from python");
391
397
  } finally {
@@ -393,10 +399,10 @@ describe("llm() through eval runtimes", () => {
393
399
  }
394
400
  });
395
401
 
396
- it("parses structured llm() output in the Python runtime", async () => {
397
- const tempDir = TempDir.createSync("@omp-eval-llm-py-struct-");
402
+ it("parses structured completion() output in the Python runtime", async () => {
403
+ const tempDir = TempDir.createSync("@omp-eval-completion-py-struct-");
398
404
  try {
399
- const result = await runPythonLlmInSubprocess({ structured: true, tempDir });
405
+ const result = await runPythonCompletionInSubprocess({ structured: true, tempDir });
400
406
  expect(result.exitCode).toBe(0);
401
407
  expect(JSON.parse(result.output.trim())).toEqual({ ok: true });
402
408
  } finally {