@oh-my-pi/pi-coding-agent 15.10.2 → 15.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/CHANGELOG.md +66 -1
  2. package/dist/types/cli/gallery-fixtures/types.d.ts +7 -1
  3. package/dist/types/edit/index.d.ts +0 -1
  4. package/dist/types/eval/__tests__/js-context-manager.test.d.ts +1 -0
  5. package/dist/types/eval/bridge-timeout.d.ts +1 -1
  6. package/dist/types/eval/{llm-bridge.d.ts → completion-bridge.d.ts} +8 -8
  7. package/dist/types/eval/idle-timeout.d.ts +1 -1
  8. package/dist/types/lsp/index.d.ts +0 -5
  9. package/dist/types/main.d.ts +11 -0
  10. package/dist/types/modes/components/assistant-message.d.ts +0 -9
  11. package/dist/types/modes/components/late-diagnostics-message.d.ts +20 -0
  12. package/dist/types/modes/components/read-tool-group.d.ts +6 -0
  13. package/dist/types/modes/components/session-selector.d.ts +16 -7
  14. package/dist/types/modes/components/tool-execution.d.ts +0 -18
  15. package/dist/types/modes/types.d.ts +4 -0
  16. package/dist/types/session/messages.d.ts +11 -8
  17. package/dist/types/session/yield-queue.d.ts +10 -1
  18. package/dist/types/tools/eval-render.d.ts +0 -1
  19. package/dist/types/tools/index.d.ts +31 -0
  20. package/dist/types/tools/path-utils.d.ts +5 -1
  21. package/dist/types/tools/read.d.ts +2 -1
  22. package/dist/types/tools/render-utils.d.ts +3 -1
  23. package/dist/types/tools/renderers.d.ts +0 -15
  24. package/dist/types/tools/write.d.ts +0 -2
  25. package/dist/types/tui/code-cell.d.ts +0 -2
  26. package/dist/types/tui/hyperlink.d.ts +5 -7
  27. package/dist/types/tui/output-block.d.ts +0 -18
  28. package/package.json +9 -9
  29. package/src/cli/gallery-cli.ts +4 -0
  30. package/src/cli/gallery-fixtures/codeintel.ts +0 -1
  31. package/src/cli/gallery-fixtures/fs.ts +68 -1
  32. package/src/cli/gallery-fixtures/types.ts +8 -1
  33. package/src/commit/agentic/agent.ts +1 -0
  34. package/src/edit/hashline/diff.ts +86 -0
  35. package/src/edit/hashline/execute.ts +14 -1
  36. package/src/edit/index.ts +31 -17
  37. package/src/edit/renderer.ts +116 -31
  38. package/src/eval/__tests__/agent-bridge.test.ts +13 -0
  39. package/src/eval/__tests__/{llm-bridge.test.ts → completion-bridge.test.ts} +60 -54
  40. package/src/eval/__tests__/js-context-manager.test.ts +241 -0
  41. package/src/eval/agent-bridge.ts +6 -1
  42. package/src/eval/bridge-timeout.ts +1 -1
  43. package/src/eval/{llm-bridge.ts → completion-bridge.ts} +30 -27
  44. package/src/eval/idle-timeout.ts +1 -1
  45. package/src/eval/js/context-manager.ts +66 -6
  46. package/src/eval/js/shared/prelude.txt +28 -12
  47. package/src/eval/js/tool-bridge.ts +3 -3
  48. package/src/eval/js/worker-entry.ts +6 -0
  49. package/src/eval/py/prelude.py +3 -3
  50. package/src/internal-urls/docs-index.generated.ts +8 -7
  51. package/src/lsp/index.ts +128 -52
  52. package/src/main.ts +54 -14
  53. package/src/modes/components/assistant-message.ts +3 -15
  54. package/src/modes/components/late-diagnostics-message.ts +60 -0
  55. package/src/modes/components/plan-review-overlay.ts +26 -5
  56. package/src/modes/components/read-tool-group.ts +415 -35
  57. package/src/modes/components/session-selector.ts +89 -35
  58. package/src/modes/components/tips.txt +1 -1
  59. package/src/modes/components/tool-execution.ts +7 -49
  60. package/src/modes/components/transcript-container.ts +108 -32
  61. package/src/modes/controllers/event-controller.ts +6 -1
  62. package/src/modes/controllers/input-controller.ts +10 -2
  63. package/src/modes/types.ts +4 -0
  64. package/src/modes/utils/ui-helpers.ts +26 -5
  65. package/src/prompts/system/manual-continue.md +7 -0
  66. package/src/prompts/system/plan-mode-active.md +56 -72
  67. package/src/prompts/system/tiny-title-system.md +1 -1
  68. package/src/prompts/system/title-system.md +16 -3
  69. package/src/prompts/system/workflow-notice.md +1 -1
  70. package/src/prompts/tools/eval.md +6 -4
  71. package/src/prompts/tools/lsp-late-diagnostic.md +8 -0
  72. package/src/sdk.ts +59 -1
  73. package/src/session/agent-session.ts +5 -3
  74. package/src/session/messages.ts +21 -14
  75. package/src/session/session-manager.ts +2 -2
  76. package/src/session/yield-queue.ts +20 -2
  77. package/src/task/executor.ts +1 -0
  78. package/src/tiny/title-client.ts +6 -1
  79. package/src/tools/bash.ts +0 -7
  80. package/src/tools/eval-render.ts +6 -25
  81. package/src/tools/eval.ts +1 -1
  82. package/src/tools/find.ts +148 -106
  83. package/src/tools/index.ts +32 -0
  84. package/src/tools/path-utils.ts +19 -22
  85. package/src/tools/read.ts +16 -8
  86. package/src/tools/render-utils.ts +3 -1
  87. package/src/tools/renderers.ts +0 -15
  88. package/src/tools/ssh.ts +0 -1
  89. package/src/tools/todo.ts +1 -0
  90. package/src/tools/write.ts +3 -12
  91. package/src/tui/code-cell.ts +1 -6
  92. package/src/tui/hyperlink.ts +13 -23
  93. package/src/tui/output-block.ts +2 -97
  94. package/src/utils/title-generator.ts +2 -2
  95. /package/dist/types/eval/__tests__/{llm-bridge.test.d.ts → completion-bridge.test.d.ts} +0 -0
@@ -10,10 +10,10 @@ import { Settings } from "../../config/settings";
10
10
  import type { ToolSession } from "../../tools";
11
11
  import { ToolError } from "../../tools/tool-errors";
12
12
  import { EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP } from "../bridge-timeout";
13
+ import { runEvalCompletion } from "../completion-bridge";
13
14
  import { IdleTimeout } from "../idle-timeout";
14
15
  import { disposeAllVmContexts } from "../js/context-manager";
15
16
  import { executeJs } from "../js/executor";
16
- import { runEvalLlm } from "../llm-bridge";
17
17
  import { disposeAllKernelSessions, type PythonResult } from "../py/executor";
18
18
 
19
19
  function makeModel(provider: string, id: string, extra: Partial<Model<Api>> = {}): Model<Api> {
@@ -98,16 +98,19 @@ function assistant(opts: {
98
98
  };
99
99
  }
100
100
 
101
- async function runPythonLlmInSubprocess(options: { structured: boolean; tempDir: TempDir }): Promise<PythonResult> {
101
+ async function runPythonCompletionInSubprocess(options: {
102
+ structured: boolean;
103
+ tempDir: TempDir;
104
+ }): Promise<PythonResult> {
102
105
  const repoRoot = path.resolve(import.meta.dir, "../../../..");
103
- const scriptPath = path.join(options.tempDir.path(), "run-python-llm.ts");
104
- const resultPath = path.join(options.tempDir.path(), "python-llm-result.json");
106
+ const scriptPath = path.join(options.tempDir.path(), "run-python-completion.ts");
107
+ const resultPath = path.join(options.tempDir.path(), "python-completion-result.json");
105
108
  const aiPath = path.resolve(import.meta.dir, "../../../../ai/src/index.ts");
106
109
  const executorPath = path.resolve(import.meta.dir, "../py/executor.ts");
107
110
  const settingsPath = path.resolve(import.meta.dir, "../../config/settings.ts");
108
111
  const code = options.structured
109
- ? 'import json\nprint(json.dumps(llm("hi", schema={"type": "object"})))'
110
- : 'print(llm("hi", model="smol"))';
112
+ ? 'import json\nprint(json.dumps(completion("hi", schema={"type": "object"})))'
113
+ : 'print(completion("hi", model="smol"))';
111
114
  const responseContent = options.structured
112
115
  ? '[{ type: "toolCall", id: "tc-1", name: "respond", arguments: { ok: true } }]'
113
116
  : '[{ type: "text", text: "hello from python" }]';
@@ -153,7 +156,7 @@ vi.spyOn(ai, "completeSimple").mockResolvedValue({
153
156
  });
154
157
  const result = await executePython(${JSON.stringify(code)}, {
155
158
  cwd: ${JSON.stringify(options.tempDir.path())},
156
- sessionId: ${JSON.stringify(`py-llm:${options.structured ? "struct" : "plain"}`)},
159
+ sessionId: ${JSON.stringify(`py-completion:${options.structured ? "struct" : "plain"}`)},
157
160
  sessionFile: ${JSON.stringify(path.join(options.tempDir.path(), "session.jsonl"))},
158
161
  toolSession: session,
159
162
  kernelMode: "per-call",
@@ -165,11 +168,12 @@ process.exit(0);
165
168
  const child = await $`bun ${scriptPath}`.cwd(repoRoot).quiet().nothrow();
166
169
  const stdout = child.stdout.toString();
167
170
  const stderr = child.stderr.toString();
168
- if (child.exitCode !== 0) throw new Error(stderr || stdout || `Python llm subprocess exited with ${child.exitCode}`);
171
+ if (child.exitCode !== 0)
172
+ throw new Error(stderr || stdout || `Python completion subprocess exited with ${child.exitCode}`);
169
173
  return (await Bun.file(resultPath).json()) as PythonResult;
170
174
  }
171
175
 
172
- describe("runEvalLlm", () => {
176
+ describe("runEvalCompletion", () => {
173
177
  afterEach(() => {
174
178
  vi.restoreAllMocks();
175
179
  });
@@ -178,9 +182,9 @@ describe("runEvalLlm", () => {
178
182
  const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
179
183
  const session = makeSession();
180
184
 
181
- await runEvalLlm({ prompt: "q", model: "smol" }, { session });
182
- await runEvalLlm({ prompt: "q", model: "default" }, { session });
183
- await runEvalLlm({ prompt: "q", model: "slow" }, { session });
185
+ await runEvalCompletion({ prompt: "q", model: "smol" }, { session });
186
+ await runEvalCompletion({ prompt: "q", model: "default" }, { session });
187
+ await runEvalCompletion({ prompt: "q", model: "slow" }, { session });
184
188
 
185
189
  const resolved = spy.mock.calls.map(call => {
186
190
  const model = call[0] as Model<Api>;
@@ -193,7 +197,7 @@ describe("runEvalLlm", () => {
193
197
  const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
194
198
  const session = makeSession({ available: [SMOL, DEFAULT, SLOW], activeModel: "p/slow" });
195
199
 
196
- await runEvalLlm({ prompt: "q", model: "default" }, { session });
200
+ await runEvalCompletion({ prompt: "q", model: "default" }, { session });
197
201
 
198
202
  const model = spy.mock.calls[0]?.[0] as Model<Api>;
199
203
  expect(`${model.provider}/${model.id}`).toBe("p/slow");
@@ -201,7 +205,7 @@ describe("runEvalLlm", () => {
201
205
 
202
206
  it("returns the completion text in plain mode", async () => {
203
207
  vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "the answer" }));
204
- const result = await runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() });
208
+ const result = await runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() });
205
209
  expect(result.text).toBe("the answer");
206
210
  expect(result.details).toEqual({ model: "p/smol", tier: "smol", structured: false });
207
211
  });
@@ -209,10 +213,10 @@ describe("runEvalLlm", () => {
209
213
  it("supplies a non-empty systemPrompt when system is omitted (codex 'Instructions are required' guard)", async () => {
210
214
  // The openai-codex Responses transformer drops `instructions` when no
211
215
  // system prompt is provided, and the remote endpoint then 400s with
212
- // "Instructions are required". runEvalLlm must always carry a non-empty
213
- // systemPrompt so `llm("…")` without a `system` argument works.
216
+ // "Instructions are required". runEvalCompletion must always carry a non-empty
217
+ // systemPrompt so `completion("…")` without a `system` argument works.
214
218
  const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
215
- await runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() });
219
+ await runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() });
216
220
  const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
217
221
  expect(ctx.systemPrompt).toBeDefined();
218
222
  expect(ctx.systemPrompt?.length).toBeGreaterThan(0);
@@ -221,7 +225,7 @@ describe("runEvalLlm", () => {
221
225
 
222
226
  it("honors an explicit system prompt instead of overriding it", async () => {
223
227
  const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
224
- await runEvalLlm({ prompt: "q", model: "smol", system: "Be terse." }, { session: makeSession() });
228
+ await runEvalCompletion({ prompt: "q", model: "smol", system: "Be terse." }, { session: makeSession() });
225
229
  const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
226
230
  expect(ctx.systemPrompt).toEqual(["Be terse."]);
227
231
  });
@@ -230,7 +234,7 @@ describe("runEvalLlm", () => {
230
234
  const spy = vi
231
235
  .spyOn(ai, "completeSimple")
232
236
  .mockResolvedValue(assistant({ toolCall: { name: "respond", arguments: { answer: 42 } } }));
233
- const result = await runEvalLlm(
237
+ const result = await runEvalCompletion(
234
238
  { prompt: "q", model: "smol", schema: { type: "object", properties: { answer: { type: "number" } } } },
235
239
  { session: makeSession() },
236
240
  );
@@ -246,7 +250,7 @@ describe("runEvalLlm", () => {
246
250
 
247
251
  it("falls back to JSON embedded in text when the model skips the respond tool", async () => {
248
252
  vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: 'here: {"answer": 7}' }));
249
- const result = await runEvalLlm(
253
+ const result = await runEvalCompletion(
250
254
  { prompt: "q", model: "smol", schema: { type: "object" } },
251
255
  { session: makeSession() },
252
256
  );
@@ -257,8 +261,8 @@ describe("runEvalLlm", () => {
257
261
  const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
258
262
  const session = makeSession({ available: [SMOL, DEFAULT, REASONING_SLOW] });
259
263
 
260
- await runEvalLlm({ prompt: "q", model: "smol" }, { session });
261
- await runEvalLlm({ prompt: "q", model: "slow" }, { session });
264
+ await runEvalCompletion({ prompt: "q", model: "smol" }, { session });
265
+ await runEvalCompletion({ prompt: "q", model: "slow" }, { session });
262
266
 
263
267
  const smolOpts = spy.mock.calls[0]?.[2] as { reasoning?: unknown };
264
268
  const slowOpts = spy.mock.calls[1]?.[2] as { reasoning?: unknown };
@@ -269,47 +273,49 @@ describe("runEvalLlm", () => {
269
273
  it("does not request reasoning for the slow tier on a non-reasoning model", async () => {
270
274
  const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
271
275
  // SLOW is reasoning:false — must not trip requireSupportedEffort downstream.
272
- const result = await runEvalLlm({ prompt: "q", model: "slow" }, { session: makeSession() });
276
+ const result = await runEvalCompletion({ prompt: "q", model: "slow" }, { session: makeSession() });
273
277
  expect(result.text).toBe("ok");
274
278
  const opts = spy.mock.calls[0]?.[2] as { reasoning?: unknown };
275
279
  expect(opts.reasoning).toBeUndefined();
276
280
  });
277
281
 
278
282
  it("throws ToolError on invalid arguments", async () => {
279
- await expect(runEvalLlm({ prompt: "" }, { session: makeSession() })).rejects.toBeInstanceOf(ToolError);
280
- await expect(runEvalLlm({ prompt: "q", model: "huge" }, { session: makeSession() })).rejects.toBeInstanceOf(
281
- ToolError,
282
- );
283
+ await expect(runEvalCompletion({ prompt: "" }, { session: makeSession() })).rejects.toBeInstanceOf(ToolError);
284
+ await expect(
285
+ runEvalCompletion({ prompt: "q", model: "huge" }, { session: makeSession() }),
286
+ ).rejects.toBeInstanceOf(ToolError);
283
287
  });
284
288
 
285
289
  it("throws ToolError when no model resolves for the tier", async () => {
286
290
  const session = makeSession({ available: [DEFAULT], roles: { smol: "missing/model" } });
287
- await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
291
+ await expect(runEvalCompletion({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
288
292
  });
289
293
 
290
294
  it("throws ToolError when the resolved model has no API key", async () => {
291
295
  const session = makeSession({ apiKey: null });
292
- await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
296
+ await expect(runEvalCompletion({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
293
297
  });
294
298
 
295
299
  it("maps error and aborted stop reasons to ToolError", async () => {
296
300
  vi.spyOn(ai, "completeSimple").mockResolvedValueOnce(assistant({ stopReason: "error", errorMessage: "boom" }));
297
- await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toThrow("boom");
301
+ await expect(runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toThrow(
302
+ "boom",
303
+ );
298
304
 
299
305
  vi.spyOn(ai, "completeSimple").mockResolvedValueOnce(assistant({ stopReason: "aborted" }));
300
- await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toBeInstanceOf(
301
- ToolError,
302
- );
306
+ await expect(
307
+ runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() }),
308
+ ).rejects.toBeInstanceOf(ToolError);
303
309
  });
304
310
 
305
311
  it("throws ToolError when plain mode produces no text", async () => {
306
312
  vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "" }));
307
- await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toBeInstanceOf(
308
- ToolError,
309
- );
313
+ await expect(
314
+ runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() }),
315
+ ).rejects.toBeInstanceOf(ToolError);
310
316
  });
311
317
 
312
- it("pauses the idle watchdog while a slow llm() request is in flight", async () => {
318
+ it("pauses the idle watchdog while a slow completion() request is in flight", async () => {
313
319
  // A oneshot completion emits no status until it returns; delegated model
314
320
  // time must be invisible to the eval timeout budget.
315
321
  vi.spyOn(ai, "completeSimple").mockImplementation(async () => {
@@ -319,7 +325,7 @@ describe("runEvalLlm", () => {
319
325
 
320
326
  const ops: string[] = [];
321
327
  using idle = new IdleTimeout(60);
322
- const result = await runEvalLlm(
328
+ const result = await runEvalCompletion(
323
329
  { prompt: "q", model: "smol" },
324
330
  {
325
331
  session: makeSession(),
@@ -333,12 +339,12 @@ describe("runEvalLlm", () => {
333
339
  );
334
340
 
335
341
  expect(result.text).toBe("the answer");
336
- expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP, "llm"]);
342
+ expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP, "completion"]);
337
343
  expect(idle.signal.aborted).toBe(false);
338
344
  });
339
345
  });
340
346
 
341
- describe("llm() through eval runtimes", () => {
347
+ describe("completion() through eval runtimes", () => {
342
348
  afterEach(() => {
343
349
  vi.restoreAllMocks();
344
350
  });
@@ -348,13 +354,13 @@ describe("llm() through eval runtimes", () => {
348
354
  await disposeAllKernelSessions();
349
355
  });
350
356
 
351
- it("exposes llm() in the JavaScript runtime", async () => {
352
- using tempDir = TempDir.createSync("@omp-eval-llm-js-");
357
+ it("exposes completion() in the JavaScript runtime", async () => {
358
+ using tempDir = TempDir.createSync("@omp-eval-completion-js-");
353
359
  const sessionFile = path.join(tempDir.path(), "session.jsonl");
354
- const sessionId = `js-llm:${crypto.randomUUID()}`;
360
+ const sessionId = `js-completion:${crypto.randomUUID()}`;
355
361
  vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "hello from smol" }));
356
362
 
357
- const result = await executeJs('return await llm("hi", { model: "smol" });', {
363
+ const result = await executeJs('return await completion("hi", { model: "smol" });', {
358
364
  cwd: tempDir.path(),
359
365
  sessionId,
360
366
  session: makeSession(),
@@ -365,16 +371,16 @@ describe("llm() through eval runtimes", () => {
365
371
  expect(result.output.trim()).toBe("hello from smol");
366
372
  });
367
373
 
368
- it("parses structured llm() output in the JavaScript runtime", async () => {
369
- using tempDir = TempDir.createSync("@omp-eval-llm-js-struct-");
374
+ it("parses structured completion() output in the JavaScript runtime", async () => {
375
+ using tempDir = TempDir.createSync("@omp-eval-completion-js-struct-");
370
376
  const sessionFile = path.join(tempDir.path(), "session.jsonl");
371
- const sessionId = `js-llm-struct:${crypto.randomUUID()}`;
377
+ const sessionId = `js-completion-struct:${crypto.randomUUID()}`;
372
378
  vi.spyOn(ai, "completeSimple").mockResolvedValue(
373
379
  assistant({ toolCall: { name: "respond", arguments: { ok: true, n: 3 } } }),
374
380
  );
375
381
 
376
382
  const result = await executeJs(
377
- 'const r = await llm("hi", { schema: { type: "object" } }); return JSON.stringify(r);',
383
+ 'const r = await completion("hi", { schema: { type: "object" } }); return JSON.stringify(r);',
378
384
  { cwd: tempDir.path(), sessionId, session: makeSession(), sessionFile },
379
385
  );
380
386
 
@@ -382,10 +388,10 @@ describe("llm() through eval runtimes", () => {
382
388
  expect(JSON.parse(result.output.trim())).toEqual({ ok: true, n: 3 });
383
389
  });
384
390
 
385
- it("exposes llm() in the Python runtime", async () => {
386
- const tempDir = TempDir.createSync("@omp-eval-llm-py-");
391
+ it("exposes completion() in the Python runtime", async () => {
392
+ const tempDir = TempDir.createSync("@omp-eval-completion-py-");
387
393
  try {
388
- const result = await runPythonLlmInSubprocess({ structured: false, tempDir });
394
+ const result = await runPythonCompletionInSubprocess({ structured: false, tempDir });
389
395
  expect(result.exitCode).toBe(0);
390
396
  expect(result.output.trim()).toBe("hello from python");
391
397
  } finally {
@@ -393,10 +399,10 @@ describe("llm() through eval runtimes", () => {
393
399
  }
394
400
  });
395
401
 
396
- it("parses structured llm() output in the Python runtime", async () => {
397
- const tempDir = TempDir.createSync("@omp-eval-llm-py-struct-");
402
+ it("parses structured completion() output in the Python runtime", async () => {
403
+ const tempDir = TempDir.createSync("@omp-eval-completion-py-struct-");
398
404
  try {
399
- const result = await runPythonLlmInSubprocess({ structured: true, tempDir });
405
+ const result = await runPythonCompletionInSubprocess({ structured: true, tempDir });
400
406
  expect(result.exitCode).toBe(0);
401
407
  expect(JSON.parse(result.output.trim())).toEqual({ ok: true });
402
408
  } finally {
@@ -0,0 +1,241 @@
1
+ import { afterEach, describe, expect, it } from "bun:test";
2
+ import { TempDir } from "@oh-my-pi/pi-utils";
3
+ import { Settings } from "../../config/settings";
4
+ import type { ToolSession } from "../../tools";
5
+ import { disposeAllVmContexts } from "../js/context-manager";
6
+ import { executeJs } from "../js/executor";
7
+
8
+ const originalWorker = globalThis.Worker;
9
+
10
+ interface FakeWorkerStats {
11
+ closeRequests: number;
12
+ terminateCalls: number;
13
+ }
14
+
15
+ interface FakeWorkerBehavior {
16
+ exitOnClose: boolean;
17
+ settleRuns: boolean;
18
+ }
19
+
20
+ function makeSession(cwd: string): ToolSession {
21
+ return {
22
+ cwd,
23
+ hasUI: false,
24
+ settings: Settings.isolated({
25
+ "async.enabled": false,
26
+ "task.isolation.mode": "none",
27
+ "task.enableLsp": true,
28
+ }),
29
+ taskDepth: 0,
30
+ enableLsp: true,
31
+ getSessionFile: () => null,
32
+ getSessionSpawns: () => "*",
33
+ getActiveModelString: () => "p/active",
34
+ getModelString: () => "p/fallback",
35
+ getArtifactsDir: () => null,
36
+ getSessionId: () => "test-session",
37
+ getEvalSessionId: () => "test-eval-session",
38
+ };
39
+ }
40
+
41
+ async function withTimeout<T>(promise: Promise<T>, ms: number, label: string): Promise<T> {
42
+ let timeout: NodeJS.Timeout | undefined;
43
+ try {
44
+ return await Promise.race([
45
+ promise,
46
+ new Promise<never>((_, reject) => {
47
+ timeout = setTimeout(() => reject(new Error(`${label} timed out`)), ms);
48
+ }),
49
+ ]);
50
+ } finally {
51
+ if (timeout) clearTimeout(timeout);
52
+ }
53
+ }
54
+
55
+ async function waitForRealWorkerExitAfterClose(cwd: string): Promise<void> {
56
+ const worker = new originalWorker(new URL("../js/worker-entry.ts", import.meta.url).href, { type: "module" });
57
+ const ready = Promise.withResolvers<void>();
58
+ const runComplete = Promise.withResolvers<void>();
59
+ const closedAck = Promise.withResolvers<void>();
60
+ const workerClosed = Promise.withResolvers<void>();
61
+ const runId = `keep-alive:${crypto.randomUUID()}`;
62
+ const snapshot = { cwd, sessionId: `worker-exit:${crypto.randomUUID()}` };
63
+
64
+ worker.addEventListener("message", event => {
65
+ const msg = event.data as { type?: string; runId?: string; ok?: boolean };
66
+ if (msg.type === "ready") ready.resolve();
67
+ else if (msg.type === "result" && msg.runId === runId && msg.ok) runComplete.resolve();
68
+ else if (msg.type === "closed") closedAck.resolve();
69
+ });
70
+ worker.addEventListener("close", () => workerClosed.resolve());
71
+
72
+ try {
73
+ await withTimeout(ready.promise, 1_000, "worker ready");
74
+ worker.postMessage({
75
+ type: "run",
76
+ runId,
77
+ code: "globalThis.__keepAlive = setInterval(() => {}, 1000);\nundefined;",
78
+ filename: "keep-alive.js",
79
+ snapshot,
80
+ });
81
+ await withTimeout(runComplete.promise, 1_000, "worker run");
82
+ worker.postMessage({ type: "close" });
83
+ await withTimeout(closedAck.promise, 1_000, "worker closed ack");
84
+ await withTimeout(workerClosed.promise, 1_000, "worker close event");
85
+ } finally {
86
+ worker.terminate();
87
+ }
88
+ }
89
+
90
+ function installFakeWorker(stats: FakeWorkerStats, behavior: FakeWorkerBehavior): void {
91
+ class FakeWorker {
92
+ #messageListeners = new Set<(event: MessageEvent) => void>();
93
+ #closeListeners = new Set<(event: Event) => void>();
94
+ #readyQueued = false;
95
+ #exited = false;
96
+
97
+ postMessage(message: unknown): void {
98
+ if (!message || typeof message !== "object") return;
99
+ const typed = message as { type?: string; runId?: string };
100
+ if (typed.type === "run" && typed.runId && behavior.settleRuns) {
101
+ queueMicrotask(() => this.#emitMessage({ type: "result", runId: typed.runId, ok: true }));
102
+ return;
103
+ }
104
+ if (typed.type === "close") {
105
+ stats.closeRequests++;
106
+ queueMicrotask(() => {
107
+ this.#emitMessage({ type: "closed" });
108
+ if (behavior.exitOnClose) this.#emitClose();
109
+ });
110
+ }
111
+ }
112
+
113
+ addEventListener(type: string, listener: (event: MessageEvent | Event) => void): void {
114
+ if (type === "close") {
115
+ this.#closeListeners.add(listener as (event: Event) => void);
116
+ return;
117
+ }
118
+ if (type !== "message") return;
119
+ this.#messageListeners.add(listener as (event: MessageEvent) => void);
120
+ if (!this.#readyQueued) {
121
+ this.#readyQueued = true;
122
+ queueMicrotask(() => this.#emitMessage({ type: "ready" }));
123
+ }
124
+ }
125
+
126
+ removeEventListener(type: string, listener: (event: MessageEvent | Event) => void): void {
127
+ if (type === "close") {
128
+ this.#closeListeners.delete(listener as (event: Event) => void);
129
+ return;
130
+ }
131
+ if (type !== "message") return;
132
+ this.#messageListeners.delete(listener as (event: MessageEvent) => void);
133
+ }
134
+
135
+ terminate(): void {
136
+ stats.terminateCalls++;
137
+ this.#emitClose();
138
+ }
139
+
140
+ #emitMessage(data: unknown): void {
141
+ const event = new MessageEvent("message", { data });
142
+ for (const listener of this.#messageListeners) listener(event);
143
+ }
144
+
145
+ #emitClose(): void {
146
+ if (this.#exited) return;
147
+ this.#exited = true;
148
+ const event = new Event("close");
149
+ for (const listener of this.#closeListeners) listener(event);
150
+ }
151
+ }
152
+
153
+ Object.defineProperty(globalThis, "Worker", {
154
+ configurable: true,
155
+ writable: true,
156
+ value: FakeWorker as unknown as typeof Worker,
157
+ });
158
+ }
159
+
160
+ describe("JavaScript eval worker lifecycle", () => {
161
+ afterEach(async () => {
162
+ await disposeAllVmContexts();
163
+ Object.defineProperty(globalThis, "Worker", {
164
+ configurable: true,
165
+ writable: true,
166
+ value: originalWorker,
167
+ });
168
+ });
169
+
170
+ it("exits a real worker on graceful close even with ref'ed user handles", async () => {
171
+ using tempDir = TempDir.createSync("@omp-js-worker-real-close-");
172
+
173
+ await waitForRealWorkerExitAfterClose(tempDir.path());
174
+ });
175
+
176
+ it("waits for the worker to close on reset instead of force-terminating it", async () => {
177
+ using tempDir = TempDir.createSync("@omp-js-worker-close-");
178
+ const stats: FakeWorkerStats = { closeRequests: 0, terminateCalls: 0 };
179
+ installFakeWorker(stats, { exitOnClose: true, settleRuns: true });
180
+
181
+ const session = makeSession(tempDir.path());
182
+ const sessionId = `js-close:${crypto.randomUUID()}`;
183
+
184
+ const first = await executeJs("globalThis.marker = 1;", { cwd: tempDir.path(), sessionId, session });
185
+ expect(first.exitCode).toBe(0);
186
+
187
+ const second = await executeJs("globalThis.marker = 2;", {
188
+ cwd: tempDir.path(),
189
+ sessionId,
190
+ session,
191
+ reset: true,
192
+ });
193
+ expect(second.exitCode).toBe(0);
194
+ expect(stats.closeRequests).toBe(1);
195
+ expect(stats.terminateCalls).toBe(0);
196
+ });
197
+
198
+ it("terminates when close is acknowledged but the worker does not exit", async () => {
199
+ using tempDir = TempDir.createSync("@omp-js-worker-close-hung-");
200
+ const stats: FakeWorkerStats = { closeRequests: 0, terminateCalls: 0 };
201
+ installFakeWorker(stats, { exitOnClose: false, settleRuns: true });
202
+
203
+ const session = makeSession(tempDir.path());
204
+ const sessionId = `js-close-hung:${crypto.randomUUID()}`;
205
+
206
+ const first = await executeJs("globalThis.marker = 1;", { cwd: tempDir.path(), sessionId, session });
207
+ expect(first.exitCode).toBe(0);
208
+
209
+ const second = await executeJs("globalThis.marker = 2;", {
210
+ cwd: tempDir.path(),
211
+ sessionId,
212
+ session,
213
+ reset: true,
214
+ });
215
+ expect(second.exitCode).toBe(0);
216
+ expect(stats.closeRequests).toBe(1);
217
+ expect(stats.terminateCalls).toBe(1);
218
+ });
219
+
220
+ it("force-terminates instead of closing when an in-flight run is aborted", async () => {
221
+ using tempDir = TempDir.createSync("@omp-js-worker-abort-");
222
+ const stats: FakeWorkerStats = { closeRequests: 0, terminateCalls: 0 };
223
+ installFakeWorker(stats, { exitOnClose: true, settleRuns: false });
224
+
225
+ const session = makeSession(tempDir.path());
226
+ const sessionId = `js-abort:${crypto.randomUUID()}`;
227
+ const controller = new AbortController();
228
+ const resultPromise = executeJs("globalThis.neverFinishes = true;", {
229
+ cwd: tempDir.path(),
230
+ sessionId,
231
+ session,
232
+ signal: controller.signal,
233
+ });
234
+ setTimeout(() => controller.abort(new DOMException("Execution aborted", "AbortError")), 0);
235
+
236
+ const result = await resultPromise;
237
+ expect(result.cancelled).toBe(true);
238
+ expect(stats.closeRequests).toBe(0);
239
+ expect(stats.terminateCalls).toBe(1);
240
+ });
241
+ });
@@ -272,7 +272,12 @@ export async function runEvalAgent(args: unknown, options: EvalAgentBridgeOption
272
272
  persistArtifacts: Boolean(sessionFile),
273
273
  artifactsDir,
274
274
  contextFile,
275
- enableLsp: (options.session.enableLsp ?? true) && options.session.settings.get("task.enableLsp"),
275
+ // Eval `agent()` subagents are short-lived programmatic helpers (data
276
+ // collection, structured output, parallel() fan-out). LSP server
277
+ // cold-start costs tens of seconds and is pure overhead here, so it is
278
+ // forced off regardless of the `task.enableLsp` setting — that knob only
279
+ // governs LSP-aware delegation through the `task` tool.
280
+ enableLsp: false,
276
281
  signal: options.signal,
277
282
  eventBus: options.session.eventBus,
278
283
  onProgress: progress => emitProgressStatus(options.emitStatus, progress),
@@ -2,7 +2,7 @@
2
2
  * Timeout suspension for in-flight host-side eval bridge calls.
3
3
  *
4
4
  * The eval watchdog caps a cell's `timeout` as a budget on the cell runtime's
5
- * own work. Host-side `agent()` / `parallel()` / `llm()` bridge calls hand
5
+ * own work. Host-side `agent()` / `parallel()` / `completion()` bridge calls hand
6
6
  * control to the outer TypeScript process, where the Python kernel or JS VM is
7
7
  * only waiting for a result. While that delegated work is in flight, the cell
8
8
  * timeout must be ignored completely; once the bridge returns and the runtime is