@oh-my-pi/pi-coding-agent 15.10.2 → 15.10.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +66 -1
- package/dist/types/cli/gallery-fixtures/types.d.ts +7 -1
- package/dist/types/edit/index.d.ts +0 -1
- package/dist/types/eval/__tests__/js-context-manager.test.d.ts +1 -0
- package/dist/types/eval/bridge-timeout.d.ts +1 -1
- package/dist/types/eval/{llm-bridge.d.ts → completion-bridge.d.ts} +8 -8
- package/dist/types/eval/idle-timeout.d.ts +1 -1
- package/dist/types/lsp/index.d.ts +0 -5
- package/dist/types/main.d.ts +11 -0
- package/dist/types/modes/components/assistant-message.d.ts +0 -9
- package/dist/types/modes/components/late-diagnostics-message.d.ts +20 -0
- package/dist/types/modes/components/read-tool-group.d.ts +6 -0
- package/dist/types/modes/components/session-selector.d.ts +16 -7
- package/dist/types/modes/components/tool-execution.d.ts +0 -18
- package/dist/types/modes/types.d.ts +4 -0
- package/dist/types/session/messages.d.ts +11 -8
- package/dist/types/session/yield-queue.d.ts +10 -1
- package/dist/types/tools/eval-render.d.ts +0 -1
- package/dist/types/tools/index.d.ts +31 -0
- package/dist/types/tools/path-utils.d.ts +5 -1
- package/dist/types/tools/read.d.ts +2 -1
- package/dist/types/tools/render-utils.d.ts +3 -1
- package/dist/types/tools/renderers.d.ts +0 -15
- package/dist/types/tools/write.d.ts +0 -2
- package/dist/types/tui/code-cell.d.ts +0 -2
- package/dist/types/tui/hyperlink.d.ts +5 -7
- package/dist/types/tui/output-block.d.ts +0 -18
- package/package.json +9 -9
- package/src/cli/gallery-cli.ts +4 -0
- package/src/cli/gallery-fixtures/codeintel.ts +0 -1
- package/src/cli/gallery-fixtures/fs.ts +68 -1
- package/src/cli/gallery-fixtures/types.ts +8 -1
- package/src/commit/agentic/agent.ts +1 -0
- package/src/edit/hashline/diff.ts +86 -0
- package/src/edit/hashline/execute.ts +14 -1
- package/src/edit/index.ts +31 -17
- package/src/edit/renderer.ts +116 -31
- package/src/eval/__tests__/agent-bridge.test.ts +13 -0
- package/src/eval/__tests__/{llm-bridge.test.ts → completion-bridge.test.ts} +60 -54
- package/src/eval/__tests__/js-context-manager.test.ts +241 -0
- package/src/eval/agent-bridge.ts +6 -1
- package/src/eval/bridge-timeout.ts +1 -1
- package/src/eval/{llm-bridge.ts → completion-bridge.ts} +30 -27
- package/src/eval/idle-timeout.ts +1 -1
- package/src/eval/js/context-manager.ts +66 -6
- package/src/eval/js/shared/prelude.txt +28 -12
- package/src/eval/js/tool-bridge.ts +3 -3
- package/src/eval/js/worker-entry.ts +6 -0
- package/src/eval/py/prelude.py +3 -3
- package/src/internal-urls/docs-index.generated.ts +8 -7
- package/src/lsp/index.ts +128 -52
- package/src/main.ts +54 -14
- package/src/modes/components/assistant-message.ts +3 -15
- package/src/modes/components/late-diagnostics-message.ts +60 -0
- package/src/modes/components/plan-review-overlay.ts +26 -5
- package/src/modes/components/read-tool-group.ts +415 -35
- package/src/modes/components/session-selector.ts +89 -35
- package/src/modes/components/tips.txt +1 -1
- package/src/modes/components/tool-execution.ts +7 -49
- package/src/modes/components/transcript-container.ts +108 -32
- package/src/modes/controllers/event-controller.ts +6 -1
- package/src/modes/controllers/input-controller.ts +10 -2
- package/src/modes/types.ts +4 -0
- package/src/modes/utils/ui-helpers.ts +26 -5
- package/src/prompts/system/manual-continue.md +7 -0
- package/src/prompts/system/plan-mode-active.md +56 -72
- package/src/prompts/system/tiny-title-system.md +1 -1
- package/src/prompts/system/title-system.md +16 -3
- package/src/prompts/system/workflow-notice.md +1 -1
- package/src/prompts/tools/eval.md +6 -4
- package/src/prompts/tools/lsp-late-diagnostic.md +8 -0
- package/src/sdk.ts +59 -1
- package/src/session/agent-session.ts +5 -3
- package/src/session/messages.ts +21 -14
- package/src/session/session-manager.ts +2 -2
- package/src/session/yield-queue.ts +20 -2
- package/src/task/executor.ts +1 -0
- package/src/tiny/title-client.ts +6 -1
- package/src/tools/bash.ts +0 -7
- package/src/tools/eval-render.ts +6 -25
- package/src/tools/eval.ts +1 -1
- package/src/tools/find.ts +148 -106
- package/src/tools/index.ts +32 -0
- package/src/tools/path-utils.ts +19 -22
- package/src/tools/read.ts +16 -8
- package/src/tools/render-utils.ts +3 -1
- package/src/tools/renderers.ts +0 -15
- package/src/tools/ssh.ts +0 -1
- package/src/tools/todo.ts +1 -0
- package/src/tools/write.ts +3 -12
- package/src/tui/code-cell.ts +1 -6
- package/src/tui/hyperlink.ts +13 -23
- package/src/tui/output-block.ts +2 -97
- package/src/utils/title-generator.ts +2 -2
- /package/dist/types/eval/__tests__/{llm-bridge.test.d.ts → completion-bridge.test.d.ts} +0 -0
|
@@ -10,10 +10,10 @@ import { Settings } from "../../config/settings";
|
|
|
10
10
|
import type { ToolSession } from "../../tools";
|
|
11
11
|
import { ToolError } from "../../tools/tool-errors";
|
|
12
12
|
import { EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP } from "../bridge-timeout";
|
|
13
|
+
import { runEvalCompletion } from "../completion-bridge";
|
|
13
14
|
import { IdleTimeout } from "../idle-timeout";
|
|
14
15
|
import { disposeAllVmContexts } from "../js/context-manager";
|
|
15
16
|
import { executeJs } from "../js/executor";
|
|
16
|
-
import { runEvalLlm } from "../llm-bridge";
|
|
17
17
|
import { disposeAllKernelSessions, type PythonResult } from "../py/executor";
|
|
18
18
|
|
|
19
19
|
function makeModel(provider: string, id: string, extra: Partial<Model<Api>> = {}): Model<Api> {
|
|
@@ -98,16 +98,19 @@ function assistant(opts: {
|
|
|
98
98
|
};
|
|
99
99
|
}
|
|
100
100
|
|
|
101
|
-
async function
|
|
101
|
+
async function runPythonCompletionInSubprocess(options: {
|
|
102
|
+
structured: boolean;
|
|
103
|
+
tempDir: TempDir;
|
|
104
|
+
}): Promise<PythonResult> {
|
|
102
105
|
const repoRoot = path.resolve(import.meta.dir, "../../../..");
|
|
103
|
-
const scriptPath = path.join(options.tempDir.path(), "run-python-
|
|
104
|
-
const resultPath = path.join(options.tempDir.path(), "python-
|
|
106
|
+
const scriptPath = path.join(options.tempDir.path(), "run-python-completion.ts");
|
|
107
|
+
const resultPath = path.join(options.tempDir.path(), "python-completion-result.json");
|
|
105
108
|
const aiPath = path.resolve(import.meta.dir, "../../../../ai/src/index.ts");
|
|
106
109
|
const executorPath = path.resolve(import.meta.dir, "../py/executor.ts");
|
|
107
110
|
const settingsPath = path.resolve(import.meta.dir, "../../config/settings.ts");
|
|
108
111
|
const code = options.structured
|
|
109
|
-
? 'import json\nprint(json.dumps(
|
|
110
|
-
: 'print(
|
|
112
|
+
? 'import json\nprint(json.dumps(completion("hi", schema={"type": "object"})))'
|
|
113
|
+
: 'print(completion("hi", model="smol"))';
|
|
111
114
|
const responseContent = options.structured
|
|
112
115
|
? '[{ type: "toolCall", id: "tc-1", name: "respond", arguments: { ok: true } }]'
|
|
113
116
|
: '[{ type: "text", text: "hello from python" }]';
|
|
@@ -153,7 +156,7 @@ vi.spyOn(ai, "completeSimple").mockResolvedValue({
|
|
|
153
156
|
});
|
|
154
157
|
const result = await executePython(${JSON.stringify(code)}, {
|
|
155
158
|
cwd: ${JSON.stringify(options.tempDir.path())},
|
|
156
|
-
sessionId: ${JSON.stringify(`py-
|
|
159
|
+
sessionId: ${JSON.stringify(`py-completion:${options.structured ? "struct" : "plain"}`)},
|
|
157
160
|
sessionFile: ${JSON.stringify(path.join(options.tempDir.path(), "session.jsonl"))},
|
|
158
161
|
toolSession: session,
|
|
159
162
|
kernelMode: "per-call",
|
|
@@ -165,11 +168,12 @@ process.exit(0);
|
|
|
165
168
|
const child = await $`bun ${scriptPath}`.cwd(repoRoot).quiet().nothrow();
|
|
166
169
|
const stdout = child.stdout.toString();
|
|
167
170
|
const stderr = child.stderr.toString();
|
|
168
|
-
if (child.exitCode !== 0)
|
|
171
|
+
if (child.exitCode !== 0)
|
|
172
|
+
throw new Error(stderr || stdout || `Python completion subprocess exited with ${child.exitCode}`);
|
|
169
173
|
return (await Bun.file(resultPath).json()) as PythonResult;
|
|
170
174
|
}
|
|
171
175
|
|
|
172
|
-
describe("
|
|
176
|
+
describe("runEvalCompletion", () => {
|
|
173
177
|
afterEach(() => {
|
|
174
178
|
vi.restoreAllMocks();
|
|
175
179
|
});
|
|
@@ -178,9 +182,9 @@ describe("runEvalLlm", () => {
|
|
|
178
182
|
const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
|
|
179
183
|
const session = makeSession();
|
|
180
184
|
|
|
181
|
-
await
|
|
182
|
-
await
|
|
183
|
-
await
|
|
185
|
+
await runEvalCompletion({ prompt: "q", model: "smol" }, { session });
|
|
186
|
+
await runEvalCompletion({ prompt: "q", model: "default" }, { session });
|
|
187
|
+
await runEvalCompletion({ prompt: "q", model: "slow" }, { session });
|
|
184
188
|
|
|
185
189
|
const resolved = spy.mock.calls.map(call => {
|
|
186
190
|
const model = call[0] as Model<Api>;
|
|
@@ -193,7 +197,7 @@ describe("runEvalLlm", () => {
|
|
|
193
197
|
const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
|
|
194
198
|
const session = makeSession({ available: [SMOL, DEFAULT, SLOW], activeModel: "p/slow" });
|
|
195
199
|
|
|
196
|
-
await
|
|
200
|
+
await runEvalCompletion({ prompt: "q", model: "default" }, { session });
|
|
197
201
|
|
|
198
202
|
const model = spy.mock.calls[0]?.[0] as Model<Api>;
|
|
199
203
|
expect(`${model.provider}/${model.id}`).toBe("p/slow");
|
|
@@ -201,7 +205,7 @@ describe("runEvalLlm", () => {
|
|
|
201
205
|
|
|
202
206
|
it("returns the completion text in plain mode", async () => {
|
|
203
207
|
vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "the answer" }));
|
|
204
|
-
const result = await
|
|
208
|
+
const result = await runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() });
|
|
205
209
|
expect(result.text).toBe("the answer");
|
|
206
210
|
expect(result.details).toEqual({ model: "p/smol", tier: "smol", structured: false });
|
|
207
211
|
});
|
|
@@ -209,10 +213,10 @@ describe("runEvalLlm", () => {
|
|
|
209
213
|
it("supplies a non-empty systemPrompt when system is omitted (codex 'Instructions are required' guard)", async () => {
|
|
210
214
|
// The openai-codex Responses transformer drops `instructions` when no
|
|
211
215
|
// system prompt is provided, and the remote endpoint then 400s with
|
|
212
|
-
// "Instructions are required".
|
|
213
|
-
// systemPrompt so `
|
|
216
|
+
// "Instructions are required". runEvalCompletion must always carry a non-empty
|
|
217
|
+
// systemPrompt so `completion("…")` without a `system` argument works.
|
|
214
218
|
const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
|
|
215
|
-
await
|
|
219
|
+
await runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() });
|
|
216
220
|
const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
|
|
217
221
|
expect(ctx.systemPrompt).toBeDefined();
|
|
218
222
|
expect(ctx.systemPrompt?.length).toBeGreaterThan(0);
|
|
@@ -221,7 +225,7 @@ describe("runEvalLlm", () => {
|
|
|
221
225
|
|
|
222
226
|
it("honors an explicit system prompt instead of overriding it", async () => {
|
|
223
227
|
const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
|
|
224
|
-
await
|
|
228
|
+
await runEvalCompletion({ prompt: "q", model: "smol", system: "Be terse." }, { session: makeSession() });
|
|
225
229
|
const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
|
|
226
230
|
expect(ctx.systemPrompt).toEqual(["Be terse."]);
|
|
227
231
|
});
|
|
@@ -230,7 +234,7 @@ describe("runEvalLlm", () => {
|
|
|
230
234
|
const spy = vi
|
|
231
235
|
.spyOn(ai, "completeSimple")
|
|
232
236
|
.mockResolvedValue(assistant({ toolCall: { name: "respond", arguments: { answer: 42 } } }));
|
|
233
|
-
const result = await
|
|
237
|
+
const result = await runEvalCompletion(
|
|
234
238
|
{ prompt: "q", model: "smol", schema: { type: "object", properties: { answer: { type: "number" } } } },
|
|
235
239
|
{ session: makeSession() },
|
|
236
240
|
);
|
|
@@ -246,7 +250,7 @@ describe("runEvalLlm", () => {
|
|
|
246
250
|
|
|
247
251
|
it("falls back to JSON embedded in text when the model skips the respond tool", async () => {
|
|
248
252
|
vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: 'here: {"answer": 7}' }));
|
|
249
|
-
const result = await
|
|
253
|
+
const result = await runEvalCompletion(
|
|
250
254
|
{ prompt: "q", model: "smol", schema: { type: "object" } },
|
|
251
255
|
{ session: makeSession() },
|
|
252
256
|
);
|
|
@@ -257,8 +261,8 @@ describe("runEvalLlm", () => {
|
|
|
257
261
|
const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
|
|
258
262
|
const session = makeSession({ available: [SMOL, DEFAULT, REASONING_SLOW] });
|
|
259
263
|
|
|
260
|
-
await
|
|
261
|
-
await
|
|
264
|
+
await runEvalCompletion({ prompt: "q", model: "smol" }, { session });
|
|
265
|
+
await runEvalCompletion({ prompt: "q", model: "slow" }, { session });
|
|
262
266
|
|
|
263
267
|
const smolOpts = spy.mock.calls[0]?.[2] as { reasoning?: unknown };
|
|
264
268
|
const slowOpts = spy.mock.calls[1]?.[2] as { reasoning?: unknown };
|
|
@@ -269,47 +273,49 @@ describe("runEvalLlm", () => {
|
|
|
269
273
|
it("does not request reasoning for the slow tier on a non-reasoning model", async () => {
|
|
270
274
|
const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
|
|
271
275
|
// SLOW is reasoning:false — must not trip requireSupportedEffort downstream.
|
|
272
|
-
const result = await
|
|
276
|
+
const result = await runEvalCompletion({ prompt: "q", model: "slow" }, { session: makeSession() });
|
|
273
277
|
expect(result.text).toBe("ok");
|
|
274
278
|
const opts = spy.mock.calls[0]?.[2] as { reasoning?: unknown };
|
|
275
279
|
expect(opts.reasoning).toBeUndefined();
|
|
276
280
|
});
|
|
277
281
|
|
|
278
282
|
it("throws ToolError on invalid arguments", async () => {
|
|
279
|
-
await expect(
|
|
280
|
-
await expect(
|
|
281
|
-
|
|
282
|
-
);
|
|
283
|
+
await expect(runEvalCompletion({ prompt: "" }, { session: makeSession() })).rejects.toBeInstanceOf(ToolError);
|
|
284
|
+
await expect(
|
|
285
|
+
runEvalCompletion({ prompt: "q", model: "huge" }, { session: makeSession() }),
|
|
286
|
+
).rejects.toBeInstanceOf(ToolError);
|
|
283
287
|
});
|
|
284
288
|
|
|
285
289
|
it("throws ToolError when no model resolves for the tier", async () => {
|
|
286
290
|
const session = makeSession({ available: [DEFAULT], roles: { smol: "missing/model" } });
|
|
287
|
-
await expect(
|
|
291
|
+
await expect(runEvalCompletion({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
|
|
288
292
|
});
|
|
289
293
|
|
|
290
294
|
it("throws ToolError when the resolved model has no API key", async () => {
|
|
291
295
|
const session = makeSession({ apiKey: null });
|
|
292
|
-
await expect(
|
|
296
|
+
await expect(runEvalCompletion({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
|
|
293
297
|
});
|
|
294
298
|
|
|
295
299
|
it("maps error and aborted stop reasons to ToolError", async () => {
|
|
296
300
|
vi.spyOn(ai, "completeSimple").mockResolvedValueOnce(assistant({ stopReason: "error", errorMessage: "boom" }));
|
|
297
|
-
await expect(
|
|
301
|
+
await expect(runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toThrow(
|
|
302
|
+
"boom",
|
|
303
|
+
);
|
|
298
304
|
|
|
299
305
|
vi.spyOn(ai, "completeSimple").mockResolvedValueOnce(assistant({ stopReason: "aborted" }));
|
|
300
|
-
await expect(
|
|
301
|
-
|
|
302
|
-
);
|
|
306
|
+
await expect(
|
|
307
|
+
runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() }),
|
|
308
|
+
).rejects.toBeInstanceOf(ToolError);
|
|
303
309
|
});
|
|
304
310
|
|
|
305
311
|
it("throws ToolError when plain mode produces no text", async () => {
|
|
306
312
|
vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "" }));
|
|
307
|
-
await expect(
|
|
308
|
-
|
|
309
|
-
);
|
|
313
|
+
await expect(
|
|
314
|
+
runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() }),
|
|
315
|
+
).rejects.toBeInstanceOf(ToolError);
|
|
310
316
|
});
|
|
311
317
|
|
|
312
|
-
it("pauses the idle watchdog while a slow
|
|
318
|
+
it("pauses the idle watchdog while a slow completion() request is in flight", async () => {
|
|
313
319
|
// A oneshot completion emits no status until it returns; delegated model
|
|
314
320
|
// time must be invisible to the eval timeout budget.
|
|
315
321
|
vi.spyOn(ai, "completeSimple").mockImplementation(async () => {
|
|
@@ -319,7 +325,7 @@ describe("runEvalLlm", () => {
|
|
|
319
325
|
|
|
320
326
|
const ops: string[] = [];
|
|
321
327
|
using idle = new IdleTimeout(60);
|
|
322
|
-
const result = await
|
|
328
|
+
const result = await runEvalCompletion(
|
|
323
329
|
{ prompt: "q", model: "smol" },
|
|
324
330
|
{
|
|
325
331
|
session: makeSession(),
|
|
@@ -333,12 +339,12 @@ describe("runEvalLlm", () => {
|
|
|
333
339
|
);
|
|
334
340
|
|
|
335
341
|
expect(result.text).toBe("the answer");
|
|
336
|
-
expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP, "
|
|
342
|
+
expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP, "completion"]);
|
|
337
343
|
expect(idle.signal.aborted).toBe(false);
|
|
338
344
|
});
|
|
339
345
|
});
|
|
340
346
|
|
|
341
|
-
describe("
|
|
347
|
+
describe("completion() through eval runtimes", () => {
|
|
342
348
|
afterEach(() => {
|
|
343
349
|
vi.restoreAllMocks();
|
|
344
350
|
});
|
|
@@ -348,13 +354,13 @@ describe("llm() through eval runtimes", () => {
|
|
|
348
354
|
await disposeAllKernelSessions();
|
|
349
355
|
});
|
|
350
356
|
|
|
351
|
-
it("exposes
|
|
352
|
-
using tempDir = TempDir.createSync("@omp-eval-
|
|
357
|
+
it("exposes completion() in the JavaScript runtime", async () => {
|
|
358
|
+
using tempDir = TempDir.createSync("@omp-eval-completion-js-");
|
|
353
359
|
const sessionFile = path.join(tempDir.path(), "session.jsonl");
|
|
354
|
-
const sessionId = `js-
|
|
360
|
+
const sessionId = `js-completion:${crypto.randomUUID()}`;
|
|
355
361
|
vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "hello from smol" }));
|
|
356
362
|
|
|
357
|
-
const result = await executeJs('return await
|
|
363
|
+
const result = await executeJs('return await completion("hi", { model: "smol" });', {
|
|
358
364
|
cwd: tempDir.path(),
|
|
359
365
|
sessionId,
|
|
360
366
|
session: makeSession(),
|
|
@@ -365,16 +371,16 @@ describe("llm() through eval runtimes", () => {
|
|
|
365
371
|
expect(result.output.trim()).toBe("hello from smol");
|
|
366
372
|
});
|
|
367
373
|
|
|
368
|
-
it("parses structured
|
|
369
|
-
using tempDir = TempDir.createSync("@omp-eval-
|
|
374
|
+
it("parses structured completion() output in the JavaScript runtime", async () => {
|
|
375
|
+
using tempDir = TempDir.createSync("@omp-eval-completion-js-struct-");
|
|
370
376
|
const sessionFile = path.join(tempDir.path(), "session.jsonl");
|
|
371
|
-
const sessionId = `js-
|
|
377
|
+
const sessionId = `js-completion-struct:${crypto.randomUUID()}`;
|
|
372
378
|
vi.spyOn(ai, "completeSimple").mockResolvedValue(
|
|
373
379
|
assistant({ toolCall: { name: "respond", arguments: { ok: true, n: 3 } } }),
|
|
374
380
|
);
|
|
375
381
|
|
|
376
382
|
const result = await executeJs(
|
|
377
|
-
'const r = await
|
|
383
|
+
'const r = await completion("hi", { schema: { type: "object" } }); return JSON.stringify(r);',
|
|
378
384
|
{ cwd: tempDir.path(), sessionId, session: makeSession(), sessionFile },
|
|
379
385
|
);
|
|
380
386
|
|
|
@@ -382,10 +388,10 @@ describe("llm() through eval runtimes", () => {
|
|
|
382
388
|
expect(JSON.parse(result.output.trim())).toEqual({ ok: true, n: 3 });
|
|
383
389
|
});
|
|
384
390
|
|
|
385
|
-
it("exposes
|
|
386
|
-
const tempDir = TempDir.createSync("@omp-eval-
|
|
391
|
+
it("exposes completion() in the Python runtime", async () => {
|
|
392
|
+
const tempDir = TempDir.createSync("@omp-eval-completion-py-");
|
|
387
393
|
try {
|
|
388
|
-
const result = await
|
|
394
|
+
const result = await runPythonCompletionInSubprocess({ structured: false, tempDir });
|
|
389
395
|
expect(result.exitCode).toBe(0);
|
|
390
396
|
expect(result.output.trim()).toBe("hello from python");
|
|
391
397
|
} finally {
|
|
@@ -393,10 +399,10 @@ describe("llm() through eval runtimes", () => {
|
|
|
393
399
|
}
|
|
394
400
|
});
|
|
395
401
|
|
|
396
|
-
it("parses structured
|
|
397
|
-
const tempDir = TempDir.createSync("@omp-eval-
|
|
402
|
+
it("parses structured completion() output in the Python runtime", async () => {
|
|
403
|
+
const tempDir = TempDir.createSync("@omp-eval-completion-py-struct-");
|
|
398
404
|
try {
|
|
399
|
-
const result = await
|
|
405
|
+
const result = await runPythonCompletionInSubprocess({ structured: true, tempDir });
|
|
400
406
|
expect(result.exitCode).toBe(0);
|
|
401
407
|
expect(JSON.parse(result.output.trim())).toEqual({ ok: true });
|
|
402
408
|
} finally {
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
import { afterEach, describe, expect, it } from "bun:test";
|
|
2
|
+
import { TempDir } from "@oh-my-pi/pi-utils";
|
|
3
|
+
import { Settings } from "../../config/settings";
|
|
4
|
+
import type { ToolSession } from "../../tools";
|
|
5
|
+
import { disposeAllVmContexts } from "../js/context-manager";
|
|
6
|
+
import { executeJs } from "../js/executor";
|
|
7
|
+
|
|
8
|
+
const originalWorker = globalThis.Worker;
|
|
9
|
+
|
|
10
|
+
interface FakeWorkerStats {
|
|
11
|
+
closeRequests: number;
|
|
12
|
+
terminateCalls: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
interface FakeWorkerBehavior {
|
|
16
|
+
exitOnClose: boolean;
|
|
17
|
+
settleRuns: boolean;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function makeSession(cwd: string): ToolSession {
|
|
21
|
+
return {
|
|
22
|
+
cwd,
|
|
23
|
+
hasUI: false,
|
|
24
|
+
settings: Settings.isolated({
|
|
25
|
+
"async.enabled": false,
|
|
26
|
+
"task.isolation.mode": "none",
|
|
27
|
+
"task.enableLsp": true,
|
|
28
|
+
}),
|
|
29
|
+
taskDepth: 0,
|
|
30
|
+
enableLsp: true,
|
|
31
|
+
getSessionFile: () => null,
|
|
32
|
+
getSessionSpawns: () => "*",
|
|
33
|
+
getActiveModelString: () => "p/active",
|
|
34
|
+
getModelString: () => "p/fallback",
|
|
35
|
+
getArtifactsDir: () => null,
|
|
36
|
+
getSessionId: () => "test-session",
|
|
37
|
+
getEvalSessionId: () => "test-eval-session",
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
async function withTimeout<T>(promise: Promise<T>, ms: number, label: string): Promise<T> {
|
|
42
|
+
let timeout: NodeJS.Timeout | undefined;
|
|
43
|
+
try {
|
|
44
|
+
return await Promise.race([
|
|
45
|
+
promise,
|
|
46
|
+
new Promise<never>((_, reject) => {
|
|
47
|
+
timeout = setTimeout(() => reject(new Error(`${label} timed out`)), ms);
|
|
48
|
+
}),
|
|
49
|
+
]);
|
|
50
|
+
} finally {
|
|
51
|
+
if (timeout) clearTimeout(timeout);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
async function waitForRealWorkerExitAfterClose(cwd: string): Promise<void> {
|
|
56
|
+
const worker = new originalWorker(new URL("../js/worker-entry.ts", import.meta.url).href, { type: "module" });
|
|
57
|
+
const ready = Promise.withResolvers<void>();
|
|
58
|
+
const runComplete = Promise.withResolvers<void>();
|
|
59
|
+
const closedAck = Promise.withResolvers<void>();
|
|
60
|
+
const workerClosed = Promise.withResolvers<void>();
|
|
61
|
+
const runId = `keep-alive:${crypto.randomUUID()}`;
|
|
62
|
+
const snapshot = { cwd, sessionId: `worker-exit:${crypto.randomUUID()}` };
|
|
63
|
+
|
|
64
|
+
worker.addEventListener("message", event => {
|
|
65
|
+
const msg = event.data as { type?: string; runId?: string; ok?: boolean };
|
|
66
|
+
if (msg.type === "ready") ready.resolve();
|
|
67
|
+
else if (msg.type === "result" && msg.runId === runId && msg.ok) runComplete.resolve();
|
|
68
|
+
else if (msg.type === "closed") closedAck.resolve();
|
|
69
|
+
});
|
|
70
|
+
worker.addEventListener("close", () => workerClosed.resolve());
|
|
71
|
+
|
|
72
|
+
try {
|
|
73
|
+
await withTimeout(ready.promise, 1_000, "worker ready");
|
|
74
|
+
worker.postMessage({
|
|
75
|
+
type: "run",
|
|
76
|
+
runId,
|
|
77
|
+
code: "globalThis.__keepAlive = setInterval(() => {}, 1000);\nundefined;",
|
|
78
|
+
filename: "keep-alive.js",
|
|
79
|
+
snapshot,
|
|
80
|
+
});
|
|
81
|
+
await withTimeout(runComplete.promise, 1_000, "worker run");
|
|
82
|
+
worker.postMessage({ type: "close" });
|
|
83
|
+
await withTimeout(closedAck.promise, 1_000, "worker closed ack");
|
|
84
|
+
await withTimeout(workerClosed.promise, 1_000, "worker close event");
|
|
85
|
+
} finally {
|
|
86
|
+
worker.terminate();
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function installFakeWorker(stats: FakeWorkerStats, behavior: FakeWorkerBehavior): void {
|
|
91
|
+
class FakeWorker {
|
|
92
|
+
#messageListeners = new Set<(event: MessageEvent) => void>();
|
|
93
|
+
#closeListeners = new Set<(event: Event) => void>();
|
|
94
|
+
#readyQueued = false;
|
|
95
|
+
#exited = false;
|
|
96
|
+
|
|
97
|
+
postMessage(message: unknown): void {
|
|
98
|
+
if (!message || typeof message !== "object") return;
|
|
99
|
+
const typed = message as { type?: string; runId?: string };
|
|
100
|
+
if (typed.type === "run" && typed.runId && behavior.settleRuns) {
|
|
101
|
+
queueMicrotask(() => this.#emitMessage({ type: "result", runId: typed.runId, ok: true }));
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
if (typed.type === "close") {
|
|
105
|
+
stats.closeRequests++;
|
|
106
|
+
queueMicrotask(() => {
|
|
107
|
+
this.#emitMessage({ type: "closed" });
|
|
108
|
+
if (behavior.exitOnClose) this.#emitClose();
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
addEventListener(type: string, listener: (event: MessageEvent | Event) => void): void {
|
|
114
|
+
if (type === "close") {
|
|
115
|
+
this.#closeListeners.add(listener as (event: Event) => void);
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
if (type !== "message") return;
|
|
119
|
+
this.#messageListeners.add(listener as (event: MessageEvent) => void);
|
|
120
|
+
if (!this.#readyQueued) {
|
|
121
|
+
this.#readyQueued = true;
|
|
122
|
+
queueMicrotask(() => this.#emitMessage({ type: "ready" }));
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
removeEventListener(type: string, listener: (event: MessageEvent | Event) => void): void {
|
|
127
|
+
if (type === "close") {
|
|
128
|
+
this.#closeListeners.delete(listener as (event: Event) => void);
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
if (type !== "message") return;
|
|
132
|
+
this.#messageListeners.delete(listener as (event: MessageEvent) => void);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
terminate(): void {
|
|
136
|
+
stats.terminateCalls++;
|
|
137
|
+
this.#emitClose();
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
#emitMessage(data: unknown): void {
|
|
141
|
+
const event = new MessageEvent("message", { data });
|
|
142
|
+
for (const listener of this.#messageListeners) listener(event);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
#emitClose(): void {
|
|
146
|
+
if (this.#exited) return;
|
|
147
|
+
this.#exited = true;
|
|
148
|
+
const event = new Event("close");
|
|
149
|
+
for (const listener of this.#closeListeners) listener(event);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
Object.defineProperty(globalThis, "Worker", {
|
|
154
|
+
configurable: true,
|
|
155
|
+
writable: true,
|
|
156
|
+
value: FakeWorker as unknown as typeof Worker,
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
describe("JavaScript eval worker lifecycle", () => {
|
|
161
|
+
afterEach(async () => {
|
|
162
|
+
await disposeAllVmContexts();
|
|
163
|
+
Object.defineProperty(globalThis, "Worker", {
|
|
164
|
+
configurable: true,
|
|
165
|
+
writable: true,
|
|
166
|
+
value: originalWorker,
|
|
167
|
+
});
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
it("exits a real worker on graceful close even with ref'ed user handles", async () => {
|
|
171
|
+
using tempDir = TempDir.createSync("@omp-js-worker-real-close-");
|
|
172
|
+
|
|
173
|
+
await waitForRealWorkerExitAfterClose(tempDir.path());
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
it("waits for the worker to close on reset instead of force-terminating it", async () => {
|
|
177
|
+
using tempDir = TempDir.createSync("@omp-js-worker-close-");
|
|
178
|
+
const stats: FakeWorkerStats = { closeRequests: 0, terminateCalls: 0 };
|
|
179
|
+
installFakeWorker(stats, { exitOnClose: true, settleRuns: true });
|
|
180
|
+
|
|
181
|
+
const session = makeSession(tempDir.path());
|
|
182
|
+
const sessionId = `js-close:${crypto.randomUUID()}`;
|
|
183
|
+
|
|
184
|
+
const first = await executeJs("globalThis.marker = 1;", { cwd: tempDir.path(), sessionId, session });
|
|
185
|
+
expect(first.exitCode).toBe(0);
|
|
186
|
+
|
|
187
|
+
const second = await executeJs("globalThis.marker = 2;", {
|
|
188
|
+
cwd: tempDir.path(),
|
|
189
|
+
sessionId,
|
|
190
|
+
session,
|
|
191
|
+
reset: true,
|
|
192
|
+
});
|
|
193
|
+
expect(second.exitCode).toBe(0);
|
|
194
|
+
expect(stats.closeRequests).toBe(1);
|
|
195
|
+
expect(stats.terminateCalls).toBe(0);
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
it("terminates when close is acknowledged but the worker does not exit", async () => {
|
|
199
|
+
using tempDir = TempDir.createSync("@omp-js-worker-close-hung-");
|
|
200
|
+
const stats: FakeWorkerStats = { closeRequests: 0, terminateCalls: 0 };
|
|
201
|
+
installFakeWorker(stats, { exitOnClose: false, settleRuns: true });
|
|
202
|
+
|
|
203
|
+
const session = makeSession(tempDir.path());
|
|
204
|
+
const sessionId = `js-close-hung:${crypto.randomUUID()}`;
|
|
205
|
+
|
|
206
|
+
const first = await executeJs("globalThis.marker = 1;", { cwd: tempDir.path(), sessionId, session });
|
|
207
|
+
expect(first.exitCode).toBe(0);
|
|
208
|
+
|
|
209
|
+
const second = await executeJs("globalThis.marker = 2;", {
|
|
210
|
+
cwd: tempDir.path(),
|
|
211
|
+
sessionId,
|
|
212
|
+
session,
|
|
213
|
+
reset: true,
|
|
214
|
+
});
|
|
215
|
+
expect(second.exitCode).toBe(0);
|
|
216
|
+
expect(stats.closeRequests).toBe(1);
|
|
217
|
+
expect(stats.terminateCalls).toBe(1);
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
it("force-terminates instead of closing when an in-flight run is aborted", async () => {
|
|
221
|
+
using tempDir = TempDir.createSync("@omp-js-worker-abort-");
|
|
222
|
+
const stats: FakeWorkerStats = { closeRequests: 0, terminateCalls: 0 };
|
|
223
|
+
installFakeWorker(stats, { exitOnClose: true, settleRuns: false });
|
|
224
|
+
|
|
225
|
+
const session = makeSession(tempDir.path());
|
|
226
|
+
const sessionId = `js-abort:${crypto.randomUUID()}`;
|
|
227
|
+
const controller = new AbortController();
|
|
228
|
+
const resultPromise = executeJs("globalThis.neverFinishes = true;", {
|
|
229
|
+
cwd: tempDir.path(),
|
|
230
|
+
sessionId,
|
|
231
|
+
session,
|
|
232
|
+
signal: controller.signal,
|
|
233
|
+
});
|
|
234
|
+
setTimeout(() => controller.abort(new DOMException("Execution aborted", "AbortError")), 0);
|
|
235
|
+
|
|
236
|
+
const result = await resultPromise;
|
|
237
|
+
expect(result.cancelled).toBe(true);
|
|
238
|
+
expect(stats.closeRequests).toBe(0);
|
|
239
|
+
expect(stats.terminateCalls).toBe(1);
|
|
240
|
+
});
|
|
241
|
+
});
|
package/src/eval/agent-bridge.ts
CHANGED
|
@@ -272,7 +272,12 @@ export async function runEvalAgent(args: unknown, options: EvalAgentBridgeOption
|
|
|
272
272
|
persistArtifacts: Boolean(sessionFile),
|
|
273
273
|
artifactsDir,
|
|
274
274
|
contextFile,
|
|
275
|
-
|
|
275
|
+
// Eval `agent()` subagents are short-lived programmatic helpers (data
|
|
276
|
+
// collection, structured output, parallel() fan-out). LSP server
|
|
277
|
+
// cold-start costs tens of seconds and is pure overhead here, so it is
|
|
278
|
+
// forced off regardless of the `task.enableLsp` setting — that knob only
|
|
279
|
+
// governs LSP-aware delegation through the `task` tool.
|
|
280
|
+
enableLsp: false,
|
|
276
281
|
signal: options.signal,
|
|
277
282
|
eventBus: options.session.eventBus,
|
|
278
283
|
onProgress: progress => emitProgressStatus(options.emitStatus, progress),
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Timeout suspension for in-flight host-side eval bridge calls.
|
|
3
3
|
*
|
|
4
4
|
* The eval watchdog caps a cell's `timeout` as a budget on the cell runtime's
|
|
5
|
-
* own work. Host-side `agent()` / `parallel()` / `
|
|
5
|
+
* own work. Host-side `agent()` / `parallel()` / `completion()` bridge calls hand
|
|
6
6
|
* control to the outer TypeScript process, where the Python kernel or JS VM is
|
|
7
7
|
* only waiting for a result. While that delegated work is in flight, the cell
|
|
8
8
|
* timeout must be ignored completely; once the bridge returns and the runtime is
|