@oh-my-pi/pi-coding-agent 15.10.3 → 15.10.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/CHANGELOG.md +72 -0
  2. package/dist/types/capability/rule-buckets.d.ts +1 -1
  3. package/dist/types/capability/rule.d.ts +6 -1
  4. package/dist/types/cli/update-cli.d.ts +11 -1
  5. package/dist/types/config/model-registry.d.ts +18 -1
  6. package/dist/types/discovery/at-imports.d.ts +15 -0
  7. package/dist/types/edit/diff.d.ts +3 -2
  8. package/dist/types/eval/__tests__/helpers-local-roots.test.d.ts +1 -0
  9. package/dist/types/eval/__tests__/js-context-manager.test.d.ts +1 -0
  10. package/dist/types/eval/backend.d.ts +7 -0
  11. package/dist/types/eval/bridge-timeout.d.ts +1 -1
  12. package/dist/types/eval/{llm-bridge.d.ts → completion-bridge.d.ts} +8 -8
  13. package/dist/types/eval/idle-timeout.d.ts +1 -1
  14. package/dist/types/eval/js/context-manager.d.ts +1 -0
  15. package/dist/types/eval/js/executor.d.ts +2 -0
  16. package/dist/types/eval/js/index.d.ts +1 -1
  17. package/dist/types/eval/js/shared/helpers.d.ts +6 -0
  18. package/dist/types/eval/js/shared/runtime.d.ts +5 -0
  19. package/dist/types/eval/js/worker-protocol.d.ts +6 -0
  20. package/dist/types/eval/py/executor.d.ts +7 -0
  21. package/dist/types/eval/py/index.d.ts +1 -1
  22. package/dist/types/export/ttsr.d.ts +14 -0
  23. package/dist/types/extensibility/extensions/types.d.ts +8 -1
  24. package/dist/types/extensibility/legacy-pi-ai-shim.d.ts +1 -1
  25. package/dist/types/internal-urls/local-protocol.d.ts +10 -0
  26. package/dist/types/mcp/oauth-flow.d.ts +2 -2
  27. package/dist/types/modes/components/custom-editor.d.ts +3 -0
  28. package/dist/types/modes/components/{status-line.d.ts → status-line/component.d.ts} +2 -32
  29. package/dist/types/modes/components/status-line/index.d.ts +1 -0
  30. package/dist/types/modes/components/status-line/types.d.ts +31 -2
  31. package/dist/types/modes/image-references.d.ts +8 -3
  32. package/dist/types/modes/interactive-mode.d.ts +1 -1
  33. package/dist/types/modes/theme/theme.d.ts +2 -1
  34. package/dist/types/modes/types.d.ts +2 -1
  35. package/dist/types/modes/utils/ui-helpers.d.ts +2 -2
  36. package/dist/types/session/agent-session.d.ts +0 -2
  37. package/dist/types/tools/ask.d.ts +1 -0
  38. package/dist/types/tools/browser/tab-worker.d.ts +15 -0
  39. package/dist/types/tools/index.d.ts +17 -0
  40. package/dist/types/tools/render-utils.d.ts +1 -1
  41. package/dist/types/tools/tool-timeouts.d.ts +1 -1
  42. package/dist/types/utils/block-context.d.ts +35 -0
  43. package/dist/types/utils/image-loading.d.ts +12 -0
  44. package/package.json +29 -9
  45. package/src/capability/rule-buckets.ts +4 -2
  46. package/src/capability/rule.ts +10 -1
  47. package/src/cli/auth-broker-cli.ts +6 -7
  48. package/src/cli/auth-gateway-cli.ts +1 -1
  49. package/src/cli/list-models.ts +5 -0
  50. package/src/cli/update-cli.ts +138 -16
  51. package/src/config/model-registry.ts +81 -2
  52. package/src/debug/index.ts +4 -8
  53. package/src/discovery/at-imports.ts +273 -0
  54. package/src/discovery/builtin-rules/index.ts +4 -0
  55. package/src/discovery/builtin-rules/ts-no-test-timers.md +55 -0
  56. package/src/discovery/builtin-rules/ts-redundant-clear-guard.md +75 -0
  57. package/src/discovery/helpers.ts +2 -1
  58. package/src/edit/diff.ts +114 -4
  59. package/src/edit/hashline/diff.ts +1 -1
  60. package/src/edit/hashline/execute.ts +1 -1
  61. package/src/edit/modes/patch.ts +6 -2
  62. package/src/edit/modes/replace.ts +1 -1
  63. package/src/edit/renderer.ts +12 -2
  64. package/src/eval/__tests__/agent-bridge.test.ts +13 -0
  65. package/src/eval/__tests__/{llm-bridge.test.ts → completion-bridge.test.ts} +60 -54
  66. package/src/eval/__tests__/helpers-local-roots.test.ts +58 -0
  67. package/src/eval/__tests__/js-context-manager.test.ts +241 -0
  68. package/src/eval/agent-bridge.ts +6 -1
  69. package/src/eval/backend.ts +15 -0
  70. package/src/eval/bridge-timeout.ts +1 -1
  71. package/src/eval/{llm-bridge.ts → completion-bridge.ts} +30 -27
  72. package/src/eval/idle-timeout.ts +1 -1
  73. package/src/eval/js/context-manager.ts +70 -8
  74. package/src/eval/js/executor.ts +3 -0
  75. package/src/eval/js/index.ts +7 -1
  76. package/src/eval/js/shared/helpers.ts +53 -6
  77. package/src/eval/js/shared/prelude.txt +4 -4
  78. package/src/eval/js/shared/runtime.ts +8 -0
  79. package/src/eval/js/tool-bridge.ts +3 -3
  80. package/src/eval/js/worker-core.ts +1 -0
  81. package/src/eval/js/worker-entry.ts +6 -0
  82. package/src/eval/js/worker-protocol.ts +6 -0
  83. package/src/eval/py/executor.ts +12 -0
  84. package/src/eval/py/index.ts +7 -1
  85. package/src/eval/py/prelude.py +46 -7
  86. package/src/eval/py/runner.py +1 -0
  87. package/src/exa/render.ts +1 -1
  88. package/src/export/ttsr.ts +122 -1
  89. package/src/extensibility/extensions/types.ts +8 -1
  90. package/src/extensibility/legacy-pi-ai-shim.ts +1 -1
  91. package/src/extensibility/plugins/doctor.ts +1 -1
  92. package/src/extensibility/plugins/legacy-pi-compat.ts +6 -5
  93. package/src/goals/tools/goal-tool.ts +1 -1
  94. package/src/internal-urls/docs-index.generated.ts +8 -6
  95. package/src/internal-urls/local-protocol.ts +13 -0
  96. package/src/lsp/render.ts +8 -6
  97. package/src/mcp/oauth-flow.ts +3 -3
  98. package/src/mcp/render.ts +7 -1
  99. package/src/modes/components/custom-editor.ts +12 -6
  100. package/src/modes/components/login-dialog.ts +1 -1
  101. package/src/modes/components/oauth-selector.ts +4 -4
  102. package/src/modes/components/read-tool-group.ts +10 -3
  103. package/src/modes/components/{status-line.ts → status-line/component.ts} +18 -40
  104. package/src/modes/components/status-line/index.ts +1 -0
  105. package/src/modes/components/status-line/types.ts +23 -8
  106. package/src/modes/components/tips.txt +1 -1
  107. package/src/modes/components/tool-execution.ts +1 -1
  108. package/src/modes/components/transcript-container.ts +17 -10
  109. package/src/modes/components/user-message.ts +6 -3
  110. package/src/modes/components/welcome.ts +1 -1
  111. package/src/modes/controllers/extension-ui-controller.ts +143 -127
  112. package/src/modes/controllers/input-controller.ts +36 -10
  113. package/src/modes/controllers/mcp-command-controller.ts +28 -12
  114. package/src/modes/controllers/selector-controller.ts +4 -11
  115. package/src/modes/controllers/ssh-command-controller.ts +2 -2
  116. package/src/modes/image-references.ts +13 -7
  117. package/src/modes/interactive-mode.ts +2 -2
  118. package/src/modes/rpc/rpc-mode.ts +1 -1
  119. package/src/modes/setup-wizard/scenes/sign-in.ts +3 -11
  120. package/src/modes/theme/theme.ts +95 -1
  121. package/src/modes/types.ts +2 -1
  122. package/src/modes/utils/ui-helpers.ts +14 -5
  123. package/src/prompts/system/tiny-title-system.md +1 -1
  124. package/src/prompts/system/title-system.md +16 -3
  125. package/src/prompts/system/workflow-notice.md +1 -1
  126. package/src/prompts/tools/bash.md +1 -1
  127. package/src/prompts/tools/eval.md +6 -6
  128. package/src/sdk.ts +31 -14
  129. package/src/session/agent-session.ts +213 -155
  130. package/src/session/session-manager.ts +1 -1
  131. package/src/slash-commands/builtin-registry.ts +1 -1
  132. package/src/system-prompt.ts +15 -9
  133. package/src/task/render.ts +20 -8
  134. package/src/tools/ask.ts +14 -5
  135. package/src/tools/bash-interactive.ts +1 -1
  136. package/src/tools/bash.ts +14 -2
  137. package/src/tools/browser/render.ts +5 -2
  138. package/src/tools/browser/tab-worker.ts +211 -91
  139. package/src/tools/debug.ts +5 -2
  140. package/src/tools/eval-render.ts +8 -5
  141. package/src/tools/eval.ts +2 -2
  142. package/src/tools/gh-renderer.ts +29 -15
  143. package/src/tools/index.ts +32 -0
  144. package/src/tools/inspect-image-renderer.ts +12 -5
  145. package/src/tools/job.ts +9 -6
  146. package/src/tools/memory-render.ts +19 -5
  147. package/src/tools/read.ts +165 -18
  148. package/src/tools/render-utils.ts +3 -1
  149. package/src/tools/resolve.ts +1 -1
  150. package/src/tools/review.ts +1 -1
  151. package/src/tools/ssh.ts +4 -1
  152. package/src/tools/todo.ts +8 -1
  153. package/src/tools/tool-timeouts.ts +1 -1
  154. package/src/tools/write.ts +1 -1
  155. package/src/tui/code-cell.ts +1 -1
  156. package/src/utils/block-context.ts +312 -0
  157. package/src/utils/image-loading.ts +31 -1
  158. package/src/utils/title-generator.ts +2 -2
  159. package/src/web/search/providers/codex.ts +1 -1
  160. package/src/web/search/render.ts +14 -6
  161. /package/dist/types/eval/__tests__/{llm-bridge.test.d.ts → completion-bridge.test.d.ts} +0 -0
@@ -0,0 +1,241 @@
1
+ import { afterEach, describe, expect, it } from "bun:test";
2
+ import { TempDir } from "@oh-my-pi/pi-utils";
3
+ import { Settings } from "../../config/settings";
4
+ import type { ToolSession } from "../../tools";
5
+ import { disposeAllVmContexts } from "../js/context-manager";
6
+ import { executeJs } from "../js/executor";
7
+
8
+ const originalWorker = globalThis.Worker;
9
+
10
+ interface FakeWorkerStats {
11
+ closeRequests: number;
12
+ terminateCalls: number;
13
+ }
14
+
15
+ interface FakeWorkerBehavior {
16
+ exitOnClose: boolean;
17
+ settleRuns: boolean;
18
+ }
19
+
20
+ function makeSession(cwd: string): ToolSession {
21
+ return {
22
+ cwd,
23
+ hasUI: false,
24
+ settings: Settings.isolated({
25
+ "async.enabled": false,
26
+ "task.isolation.mode": "none",
27
+ "task.enableLsp": true,
28
+ }),
29
+ taskDepth: 0,
30
+ enableLsp: true,
31
+ getSessionFile: () => null,
32
+ getSessionSpawns: () => "*",
33
+ getActiveModelString: () => "p/active",
34
+ getModelString: () => "p/fallback",
35
+ getArtifactsDir: () => null,
36
+ getSessionId: () => "test-session",
37
+ getEvalSessionId: () => "test-eval-session",
38
+ };
39
+ }
40
+
41
+ async function withTimeout<T>(promise: Promise<T>, ms: number, label: string): Promise<T> {
42
+ let timeout: NodeJS.Timeout | undefined;
43
+ try {
44
+ return await Promise.race([
45
+ promise,
46
+ new Promise<never>((_, reject) => {
47
+ timeout = setTimeout(() => reject(new Error(`${label} timed out`)), ms);
48
+ }),
49
+ ]);
50
+ } finally {
51
+ if (timeout) clearTimeout(timeout);
52
+ }
53
+ }
54
+
55
+ async function waitForRealWorkerExitAfterClose(cwd: string): Promise<void> {
56
+ const worker = new originalWorker(new URL("../js/worker-entry.ts", import.meta.url).href, { type: "module" });
57
+ const ready = Promise.withResolvers<void>();
58
+ const runComplete = Promise.withResolvers<void>();
59
+ const closedAck = Promise.withResolvers<void>();
60
+ const workerClosed = Promise.withResolvers<void>();
61
+ const runId = `keep-alive:${crypto.randomUUID()}`;
62
+ const snapshot = { cwd, sessionId: `worker-exit:${crypto.randomUUID()}` };
63
+
64
+ worker.addEventListener("message", event => {
65
+ const msg = event.data as { type?: string; runId?: string; ok?: boolean };
66
+ if (msg.type === "ready") ready.resolve();
67
+ else if (msg.type === "result" && msg.runId === runId && msg.ok) runComplete.resolve();
68
+ else if (msg.type === "closed") closedAck.resolve();
69
+ });
70
+ worker.addEventListener("close", () => workerClosed.resolve());
71
+
72
+ try {
73
+ await withTimeout(ready.promise, 1_000, "worker ready");
74
+ worker.postMessage({
75
+ type: "run",
76
+ runId,
77
+ code: "globalThis.__keepAlive = setInterval(() => {}, 1000);\nundefined;",
78
+ filename: "keep-alive.js",
79
+ snapshot,
80
+ });
81
+ await withTimeout(runComplete.promise, 1_000, "worker run");
82
+ worker.postMessage({ type: "close" });
83
+ await withTimeout(closedAck.promise, 1_000, "worker closed ack");
84
+ await withTimeout(workerClosed.promise, 1_000, "worker close event");
85
+ } finally {
86
+ worker.terminate();
87
+ }
88
+ }
89
+
90
+ function installFakeWorker(stats: FakeWorkerStats, behavior: FakeWorkerBehavior): void {
91
+ class FakeWorker {
92
+ #messageListeners = new Set<(event: MessageEvent) => void>();
93
+ #closeListeners = new Set<(event: Event) => void>();
94
+ #readyQueued = false;
95
+ #exited = false;
96
+
97
+ postMessage(message: unknown): void {
98
+ if (!message || typeof message !== "object") return;
99
+ const typed = message as { type?: string; runId?: string };
100
+ if (typed.type === "run" && typed.runId && behavior.settleRuns) {
101
+ queueMicrotask(() => this.#emitMessage({ type: "result", runId: typed.runId, ok: true }));
102
+ return;
103
+ }
104
+ if (typed.type === "close") {
105
+ stats.closeRequests++;
106
+ queueMicrotask(() => {
107
+ this.#emitMessage({ type: "closed" });
108
+ if (behavior.exitOnClose) this.#emitClose();
109
+ });
110
+ }
111
+ }
112
+
113
+ addEventListener(type: string, listener: (event: MessageEvent | Event) => void): void {
114
+ if (type === "close") {
115
+ this.#closeListeners.add(listener as (event: Event) => void);
116
+ return;
117
+ }
118
+ if (type !== "message") return;
119
+ this.#messageListeners.add(listener as (event: MessageEvent) => void);
120
+ if (!this.#readyQueued) {
121
+ this.#readyQueued = true;
122
+ queueMicrotask(() => this.#emitMessage({ type: "ready" }));
123
+ }
124
+ }
125
+
126
+ removeEventListener(type: string, listener: (event: MessageEvent | Event) => void): void {
127
+ if (type === "close") {
128
+ this.#closeListeners.delete(listener as (event: Event) => void);
129
+ return;
130
+ }
131
+ if (type !== "message") return;
132
+ this.#messageListeners.delete(listener as (event: MessageEvent) => void);
133
+ }
134
+
135
+ terminate(): void {
136
+ stats.terminateCalls++;
137
+ this.#emitClose();
138
+ }
139
+
140
+ #emitMessage(data: unknown): void {
141
+ const event = new MessageEvent("message", { data });
142
+ for (const listener of this.#messageListeners) listener(event);
143
+ }
144
+
145
+ #emitClose(): void {
146
+ if (this.#exited) return;
147
+ this.#exited = true;
148
+ const event = new Event("close");
149
+ for (const listener of this.#closeListeners) listener(event);
150
+ }
151
+ }
152
+
153
+ Object.defineProperty(globalThis, "Worker", {
154
+ configurable: true,
155
+ writable: true,
156
+ value: FakeWorker as unknown as typeof Worker,
157
+ });
158
+ }
159
+
160
+ describe("JavaScript eval worker lifecycle", () => {
161
+ afterEach(async () => {
162
+ await disposeAllVmContexts();
163
+ Object.defineProperty(globalThis, "Worker", {
164
+ configurable: true,
165
+ writable: true,
166
+ value: originalWorker,
167
+ });
168
+ });
169
+
170
+ it("exits a real worker on graceful close even with ref'ed user handles", async () => {
171
+ using tempDir = TempDir.createSync("@omp-js-worker-real-close-");
172
+
173
+ await waitForRealWorkerExitAfterClose(tempDir.path());
174
+ });
175
+
176
+ it("waits for the worker to close on reset instead of force-terminating it", async () => {
177
+ using tempDir = TempDir.createSync("@omp-js-worker-close-");
178
+ const stats: FakeWorkerStats = { closeRequests: 0, terminateCalls: 0 };
179
+ installFakeWorker(stats, { exitOnClose: true, settleRuns: true });
180
+
181
+ const session = makeSession(tempDir.path());
182
+ const sessionId = `js-close:${crypto.randomUUID()}`;
183
+
184
+ const first = await executeJs("globalThis.marker = 1;", { cwd: tempDir.path(), sessionId, session });
185
+ expect(first.exitCode).toBe(0);
186
+
187
+ const second = await executeJs("globalThis.marker = 2;", {
188
+ cwd: tempDir.path(),
189
+ sessionId,
190
+ session,
191
+ reset: true,
192
+ });
193
+ expect(second.exitCode).toBe(0);
194
+ expect(stats.closeRequests).toBe(1);
195
+ expect(stats.terminateCalls).toBe(0);
196
+ });
197
+
198
+ it("terminates when close is acknowledged but the worker does not exit", async () => {
199
+ using tempDir = TempDir.createSync("@omp-js-worker-close-hung-");
200
+ const stats: FakeWorkerStats = { closeRequests: 0, terminateCalls: 0 };
201
+ installFakeWorker(stats, { exitOnClose: false, settleRuns: true });
202
+
203
+ const session = makeSession(tempDir.path());
204
+ const sessionId = `js-close-hung:${crypto.randomUUID()}`;
205
+
206
+ const first = await executeJs("globalThis.marker = 1;", { cwd: tempDir.path(), sessionId, session });
207
+ expect(first.exitCode).toBe(0);
208
+
209
+ const second = await executeJs("globalThis.marker = 2;", {
210
+ cwd: tempDir.path(),
211
+ sessionId,
212
+ session,
213
+ reset: true,
214
+ });
215
+ expect(second.exitCode).toBe(0);
216
+ expect(stats.closeRequests).toBe(1);
217
+ expect(stats.terminateCalls).toBe(1);
218
+ });
219
+
220
+ it("force-terminates instead of closing when an in-flight run is aborted", async () => {
221
+ using tempDir = TempDir.createSync("@omp-js-worker-abort-");
222
+ const stats: FakeWorkerStats = { closeRequests: 0, terminateCalls: 0 };
223
+ installFakeWorker(stats, { exitOnClose: true, settleRuns: false });
224
+
225
+ const session = makeSession(tempDir.path());
226
+ const sessionId = `js-abort:${crypto.randomUUID()}`;
227
+ const controller = new AbortController();
228
+ const resultPromise = executeJs("globalThis.neverFinishes = true;", {
229
+ cwd: tempDir.path(),
230
+ sessionId,
231
+ session,
232
+ signal: controller.signal,
233
+ });
234
+ setTimeout(() => controller.abort(new DOMException("Execution aborted", "AbortError")), 0);
235
+
236
+ const result = await resultPromise;
237
+ expect(result.cancelled).toBe(true);
238
+ expect(stats.closeRequests).toBe(0);
239
+ expect(stats.terminateCalls).toBe(1);
240
+ });
241
+ });
@@ -272,7 +272,12 @@ export async function runEvalAgent(args: unknown, options: EvalAgentBridgeOption
272
272
  persistArtifacts: Boolean(sessionFile),
273
273
  artifactsDir,
274
274
  contextFile,
275
- enableLsp: (options.session.enableLsp ?? true) && options.session.settings.get("task.enableLsp"),
275
+ // Eval `agent()` subagents are short-lived programmatic helpers (data
276
+ // collection, structured output, parallel() fan-out). LSP server
277
+ // cold-start costs tens of seconds and is pure overhead here, so it is
278
+ // forced off regardless of the `task.enableLsp` setting — that knob only
279
+ // governs LSP-aware delegation through the `task` tool.
280
+ enableLsp: false,
276
281
  signal: options.signal,
277
282
  eventBus: options.session.eventBus,
278
283
  onProgress: progress => emitProgressStatus(options.emitStatus, progress),
@@ -1,3 +1,4 @@
1
+ import { buildEvalUrlRoots, type LocalProtocolOptions } from "../internal-urls";
1
2
  import type { ToolSession } from "../tools";
2
3
  import type { EvalDisplayOutput, EvalLanguage, EvalStatusEvent } from "./types";
3
4
 
@@ -56,3 +57,17 @@ export interface ExecutorBackend {
56
57
  /** Execute one cell. Caller invokes once per cell and aggregates results. */
57
58
  execute(code: string, opts: ExecutorBackendExecOptions): Promise<ExecutorBackendResult>;
58
59
  }
60
+
61
+ /**
62
+ * Resolve the on-disk roots that the eval helpers substitute for internal-URL
63
+ * schemes (currently `local://`). Prefers the session's own
64
+ * {@link LocalProtocolOptions} — the exact mapping `read local://…` uses — so an
65
+ * eval `write("local://x")` and a later `read local://x` agree on the location.
66
+ */
67
+ export function resolveEvalUrlRoots(session: ToolSession): Record<string, string> {
68
+ const options: LocalProtocolOptions = session.localProtocolOptions ?? {
69
+ getArtifactsDir: () => session.getArtifactsDir?.() ?? null,
70
+ getSessionId: () => session.getSessionId?.() ?? null,
71
+ };
72
+ return buildEvalUrlRoots(options);
73
+ }
@@ -2,7 +2,7 @@
2
2
  * Timeout suspension for in-flight host-side eval bridge calls.
3
3
  *
4
4
  * The eval watchdog caps a cell's `timeout` as a budget on the cell runtime's
5
- * own work. Host-side `agent()` / `parallel()` / `llm()` bridge calls hand
5
+ * own work. Host-side `agent()` / `parallel()` / `completion()` bridge calls hand
6
6
  * control to the outer TypeScript process, where the Python kernel or JS VM is
7
7
  * only waiting for a result. While that delegated work is in flight, the cell
8
8
  * timeout must be ignored completely; once the bridge returns and the runtime is
@@ -1,11 +1,11 @@
1
1
  /**
2
- * Host-side handler for the eval `llm()` helper.
2
+ * Host-side handler for the eval `completion()` helper.
3
3
  *
4
4
  * Both eval runtimes (JS worker + Python kernel) route helper→host calls
5
5
  * through {@link callSessionTool}. Reserving the synthetic tool name
6
- * {@link EVAL_LLM_BRIDGE_NAME} lets a single host handler serve both
6
+ * {@link EVAL_COMPLETION_BRIDGE_NAME} lets a single host handler serve both
7
7
  * transports without registering an agent-visible tool: cell code calls
8
- * `llm(prompt, opts)`, the prelude forwards `{ prompt, model, system?, schema? }`
8
+ * `completion(prompt, opts)`, the prelude forwards `{ prompt, model, system?, schema? }`
9
9
  * through the bridge, and this module performs one stateless completion.
10
10
  *
11
11
  * The call is oneshot and toolless from the model's perspective — pure text
@@ -27,36 +27,36 @@ import { ToolError } from "../tools/tool-errors";
27
27
  import { withBridgeTimeoutPause } from "./bridge-timeout";
28
28
  import type { JsStatusEvent } from "./js/shared/types";
29
29
 
30
- /** Synthetic bridge name reserved for the `llm()` helper across both runtimes. */
31
- export const EVAL_LLM_BRIDGE_NAME = "__llm__";
30
+ /** Synthetic bridge name reserved for the `completion()` helper across both runtimes. */
31
+ export const EVAL_COMPLETION_BRIDGE_NAME = "__completion__";
32
32
 
33
33
  /** Synthetic tool the model is forced to call when a `schema` is supplied. */
34
34
  const STRUCTURED_TOOL_NAME = "respond";
35
35
 
36
- type LlmTier = "smol" | "default" | "slow";
36
+ type CompletionTier = "smol" | "default" | "slow";
37
37
 
38
- const TIER_TO_PATTERN: Record<LlmTier, string> = {
38
+ const TIER_TO_PATTERN: Record<CompletionTier, string> = {
39
39
  smol: "pi/smol",
40
40
  default: "pi/default",
41
41
  slow: "pi/slow",
42
42
  };
43
43
 
44
- const llmArgsSchema = z.object({
44
+ const completionArgsSchema = z.object({
45
45
  prompt: z.string().min(1, "prompt must be a non-empty string"),
46
46
  model: z.enum(["smol", "default", "slow"]).default("default"),
47
47
  system: z.string().optional(),
48
48
  schema: z.record(z.string(), z.unknown()).optional(),
49
49
  });
50
50
 
51
- export interface EvalLlmBridgeOptions {
51
+ export interface EvalCompletionBridgeOptions {
52
52
  session: ToolSession;
53
53
  signal?: AbortSignal;
54
54
  emitStatus?: (event: JsStatusEvent) => void;
55
55
  }
56
56
 
57
- export interface EvalLlmResult {
57
+ export interface EvalCompletionResult {
58
58
  text: string;
59
- details: { model: string; tier: LlmTier; structured: boolean };
59
+ details: { model: string; tier: CompletionTier; structured: boolean };
60
60
  }
61
61
 
62
62
  /**
@@ -64,7 +64,7 @@ export interface EvalLlmResult {
64
64
  * active model and falls back to the `pi/default` role; `smol`/`slow` resolve
65
65
  * their respective role patterns. Returns `undefined` when nothing matches.
66
66
  */
67
- function resolveTierModel(tier: LlmTier, session: ToolSession): Model<Api> | undefined {
67
+ function resolveTierModel(tier: CompletionTier, session: ToolSession): Model<Api> | undefined {
68
68
  const modelRegistry = session.modelRegistry;
69
69
  if (!modelRegistry) return undefined;
70
70
  const available = modelRegistry.getAvailable();
@@ -90,7 +90,7 @@ function resolveTierModel(tier: LlmTier, session: ToolSession): Model<Api> | und
90
90
  * throwing downstream on models that cannot reason. Clamps to the highest
91
91
  * supported effort so a reasoning model without `high` does not 400.
92
92
  */
93
- function reasoningForTier(tier: LlmTier, model: Model<Api>): Effort | undefined {
93
+ function reasoningForTier(tier: CompletionTier, model: Model<Api>): Effort | undefined {
94
94
  if (tier !== "slow" || !model.reasoning) return undefined;
95
95
  const efforts = getSupportedEfforts(model);
96
96
  if (efforts.length === 0) return undefined;
@@ -98,23 +98,26 @@ function reasoningForTier(tier: LlmTier, model: Model<Api>): Effort | undefined
98
98
  }
99
99
 
100
100
  /**
101
- * Run a single stateless completion on behalf of an eval cell's `llm()` call.
101
+ * Run a single stateless completion on behalf of an eval cell's `completion()` call.
102
102
  * Returns a `{ text, details }` value shaped like a {@link callSessionTool}
103
103
  * result so the existing bridge transport carries it to either runtime.
104
104
  */
105
- export async function runEvalLlm(args: unknown, options: EvalLlmBridgeOptions): Promise<EvalLlmResult> {
106
- const parsed = llmArgsSchema.safeParse(args);
105
+ export async function runEvalCompletion(
106
+ args: unknown,
107
+ options: EvalCompletionBridgeOptions,
108
+ ): Promise<EvalCompletionResult> {
109
+ const parsed = completionArgsSchema.safeParse(args);
107
110
  if (!parsed.success) {
108
111
  const issue = parsed.error.issues[0];
109
112
  const where = issue?.path.length ? `${issue.path.join(".")}: ` : "";
110
- throw new ToolError(`llm() received invalid arguments: ${where}${issue?.message ?? "bad input"}`);
113
+ throw new ToolError(`completion() received invalid arguments: ${where}${issue?.message ?? "bad input"}`);
111
114
  }
112
115
  const { prompt, model: tier, system, schema } = parsed.data;
113
116
 
114
117
  const model = resolveTierModel(tier, options.session);
115
118
  if (!model) {
116
119
  throw new ToolError(
117
- `llm() could not resolve a model for the "${tier}" tier. Configure modelRoles.${tier === "default" ? "default" : tier} or ensure a provider is available.`,
120
+ `completion() could not resolve a model for the "${tier}" tier. Configure modelRoles.${tier === "default" ? "default" : tier} or ensure a provider is available.`,
118
121
  );
119
122
  }
120
123
 
@@ -122,7 +125,7 @@ export async function runEvalLlm(args: unknown, options: EvalLlmBridgeOptions):
122
125
  const apiKey = await registry?.getApiKey(model);
123
126
  if (!registry || !apiKey) {
124
127
  throw new ToolError(
125
- `llm() has no API key for ${formatModelString(model)}. Configure credentials for this provider or choose another tier.`,
128
+ `completion() has no API key for ${formatModelString(model)}. Configure credentials for this provider or choose another tier.`,
126
129
  );
127
130
  }
128
131
 
@@ -141,7 +144,7 @@ export async function runEvalLlm(args: unknown, options: EvalLlmBridgeOptions):
141
144
 
142
145
  // Some providers (notably openai-codex) require a non-empty `instructions`
143
146
  // field on every Responses request and 400 with "Instructions are required"
144
- // when it is missing. Fall back to a minimal default so `llm(prompt)` works
147
+ // when it is missing. Fall back to a minimal default so `completion(prompt)` works
145
148
  // without forcing every caller to pass a `system` prompt.
146
149
  const systemPrompt = system ? [system] : ["You are a helpful assistant."];
147
150
 
@@ -164,15 +167,15 @@ export async function runEvalLlm(args: unknown, options: EvalLlmBridgeOptions):
164
167
  reasoning: reasoningForTier(tier, model),
165
168
  toolChoice: schema ? { type: "tool", name: STRUCTURED_TOOL_NAME } : undefined,
166
169
  },
167
- { telemetry, oneshotKind: "eval_llm" },
170
+ { telemetry, oneshotKind: "eval_completion" },
168
171
  ),
169
172
  );
170
173
 
171
174
  if (response.stopReason === "error") {
172
- throw new ToolError(response.errorMessage ?? "llm() request failed.");
175
+ throw new ToolError(response.errorMessage ?? "completion() request failed.");
173
176
  }
174
177
  if (response.stopReason === "aborted") {
175
- throw new ToolError("llm() request aborted.");
178
+ throw new ToolError("completion() request aborted.");
176
179
  }
177
180
 
178
181
  let resultText: string;
@@ -183,20 +186,20 @@ export async function runEvalLlm(args: unknown, options: EvalLlmBridgeOptions):
183
186
  value = call.arguments;
184
187
  } else {
185
188
  const text = extractTextContent(response);
186
- if (!text) throw new ToolError("llm() returned no structured response.");
189
+ if (!text) throw new ToolError("completion() returned no structured response.");
187
190
  try {
188
191
  value = parseJsonPayload(text);
189
192
  } catch {
190
- throw new ToolError("llm() did not return a structured response matching the schema.");
193
+ throw new ToolError("completion() did not return a structured response matching the schema.");
191
194
  }
192
195
  }
193
196
  resultText = JSON.stringify(value);
194
197
  } else {
195
198
  resultText = extractTextContent(response);
196
- if (!resultText) throw new ToolError("llm() returned no text output.");
199
+ if (!resultText) throw new ToolError("completion() returned no text output.");
197
200
  }
198
201
 
199
- options.emitStatus?.({ op: "llm", model: formatModelString(model), tier, chars: resultText.length });
202
+ options.emitStatus?.({ op: "completion", model: formatModelString(model), tier, chars: resultText.length });
200
203
 
201
204
  return { text: resultText, details: { model: formatModelString(model), tier, structured: Boolean(schema) } };
202
205
  }
@@ -3,7 +3,7 @@
3
3
  *
4
4
  * A cell's `timeout` bounds time while the Python kernel or JS VM is in control.
5
5
  * Host-side bridge calls can {@link pause} the watchdog so delegated
6
- * `agent()`/`parallel()`/`llm()` work is ignored completely, then {@link resume}
6
+ * `agent()`/`parallel()`/`completion()` work is ignored completely, then {@link resume}
7
7
  * starts a fresh timeout window once the runtime gets control back.
8
8
  *
9
9
  * The active timer self-reschedules instead of being torn down on every
@@ -30,6 +30,7 @@ interface WorkerHandle {
30
30
  mode: "worker" | "inline";
31
31
  send(msg: WorkerInbound): void;
32
32
  onMessage(handler: (msg: WorkerOutbound) => void): () => void;
33
+ close(): Promise<boolean>;
33
34
  terminate(): Promise<void>;
34
35
  }
35
36
 
@@ -60,12 +61,14 @@ const resettingSessions = new Map<string, Promise<void>>();
60
61
  // avoiding `vm.runInContext` (see shared/indirect-eval.ts), here surfacing as a
61
62
  // SIGILL/SIGSEGV. Callers that pass a larger per-cell budget still dominate.
62
63
  const WORKER_INIT_TIMEOUT_MS = 15_000;
64
+ const WORKER_CLOSE_TIMEOUT_MS = 1_000;
63
65
 
64
66
  export async function executeInVmContext(options: {
65
67
  sessionKey: string;
66
68
  sessionId: string;
67
69
  cwd: string;
68
70
  session: ToolSession;
71
+ localRoots?: Record<string, string>;
69
72
  reset?: boolean;
70
73
  code: string;
71
74
  filename: string;
@@ -98,7 +101,7 @@ export async function executeInVmContext(options: {
98
101
  }
99
102
  const session = await acquireSession(
100
103
  options.sessionKey,
101
- { cwd: options.cwd, sessionId: options.sessionId },
104
+ { cwd: options.cwd, sessionId: options.sessionId, localRoots: options.localRoots },
102
105
  options.timeoutMs,
103
106
  );
104
107
  return await runOnce(session, options);
@@ -108,7 +111,7 @@ export async function resetVmContext(sessionKey: string): Promise<void> {
108
111
  const session = sessions.get(sessionKey) ?? (await startingSessions.get(sessionKey)?.catch(() => undefined));
109
112
  if (!session) return;
110
113
  sessions.delete(sessionKey);
111
- await killSession(session, new ToolError("JS context reset"));
114
+ await killSession(session, new ToolError("JS context reset"), { force: false });
112
115
  }
113
116
 
114
117
  export async function disposeAllVmContexts(): Promise<void> {
@@ -121,7 +124,7 @@ export async function disposeAllVmContexts(): Promise<void> {
121
124
  if (!all.includes(result.value)) all.push(result.value);
122
125
  }
123
126
  sessions.clear();
124
- await Promise.all(all.map(session => killSession(session, new ToolError("JS context disposed"))));
127
+ await Promise.all(all.map(session => killSession(session, new ToolError("JS context disposed"), { force: false })));
125
128
  }
126
129
 
127
130
  async function runOnce(
@@ -130,6 +133,7 @@ async function runOnce(
130
133
  sessionId: string;
131
134
  cwd: string;
132
135
  session: ToolSession;
136
+ localRoots?: Record<string, string>;
133
137
  code: string;
134
138
  filename: string;
135
139
  runState: VmRunState;
@@ -154,7 +158,7 @@ async function runOnce(
154
158
  // Cancel any in-flight tool calls first.
155
159
  for (const ctrl of pending.toolCalls.values()) ctrl.abort(abortError);
156
160
  // Hard-kill the worker — only way to interrupt synchronous user code.
157
- void killSessionFor(session, abortError);
161
+ void killSessionFor(session, abortError, { force: true });
158
162
  };
159
163
 
160
164
  if (options.runState.signal?.aborted) {
@@ -169,7 +173,7 @@ async function runOnce(
169
173
  runId,
170
174
  code: options.code,
171
175
  filename: options.filename,
172
- snapshot: { cwd: options.cwd, sessionId: options.sessionId },
176
+ snapshot: { cwd: options.cwd, sessionId: options.sessionId, localRoots: options.localRoots },
173
177
  });
174
178
  return await promise;
175
179
  } finally {
@@ -294,14 +298,14 @@ function settlePending(session: JsSession, msg: Extract<WorkerOutbound, { type:
294
298
  pending.reject(errorFromPayload(msg.error));
295
299
  }
296
300
 
297
- async function killSessionFor(session: JsSession, error: Error): Promise<void> {
301
+ async function killSessionFor(session: JsSession, error: Error, options: { force: boolean }): Promise<void> {
298
302
  if (sessions.get(session.sessionKey) === session) {
299
303
  sessions.delete(session.sessionKey);
300
304
  }
301
- await killSession(session, error);
305
+ await killSession(session, error, options);
302
306
  }
303
307
 
304
- async function killSession(session: JsSession, error: Error): Promise<void> {
308
+ async function killSession(session: JsSession, error: Error, options: { force: boolean }): Promise<void> {
305
309
  if (session.state === "dead") return;
306
310
  session.state = "dead";
307
311
  for (const pending of session.pending.values()) {
@@ -311,6 +315,11 @@ async function killSession(session: JsSession, error: Error): Promise<void> {
311
315
  pending.reject(error);
312
316
  }
313
317
  session.pending.clear();
318
+ if (options.force) {
319
+ await session.worker.terminate().catch(() => undefined);
320
+ return;
321
+ }
322
+ if (await session.worker.close().catch(() => false)) return;
314
323
  await session.worker.terminate().catch(() => undefined);
315
324
  }
316
325
 
@@ -398,6 +407,38 @@ function wrapBunWorker(worker: Worker): WorkerHandle {
398
407
  worker.addEventListener("message", wrap);
399
408
  return () => worker.removeEventListener("message", wrap);
400
409
  },
410
+ async close() {
411
+ const { promise: closed, resolve } = Promise.withResolvers<boolean>();
412
+ let settled = false;
413
+ let sawClosedAck = false;
414
+ let sawWorkerExit = false;
415
+ let timeout: NodeJS.Timeout | undefined;
416
+ let unsubscribe = (): void => {};
417
+ const finish = (value: boolean): void => {
418
+ if (settled) return;
419
+ settled = true;
420
+ if (timeout) clearTimeout(timeout);
421
+ unsubscribe();
422
+ worker.removeEventListener("close", onClose);
423
+ resolve(value);
424
+ };
425
+ const finishIfClosed = (): void => {
426
+ if (sawClosedAck && sawWorkerExit) finish(true);
427
+ };
428
+ const onClose = (): void => {
429
+ sawWorkerExit = true;
430
+ finishIfClosed();
431
+ };
432
+ unsubscribe = this.onMessage(msg => {
433
+ if (msg.type !== "closed") return;
434
+ sawClosedAck = true;
435
+ finishIfClosed();
436
+ });
437
+ worker.addEventListener("close", onClose);
438
+ timeout = setTimeout(() => finish(false), WORKER_CLOSE_TIMEOUT_MS);
439
+ worker.postMessage({ type: "close" } satisfies WorkerInbound);
440
+ return await closed;
441
+ },
401
442
  async terminate() {
402
443
  worker.terminate();
403
444
  },
@@ -434,6 +475,27 @@ function spawnInlineWorker(): WorkerHandle {
434
475
  hostListeners.add(handler);
435
476
  return () => hostListeners.delete(handler);
436
477
  },
478
+ async close() {
479
+ const { promise: closed, resolve } = Promise.withResolvers<boolean>();
480
+ let settled = false;
481
+ let timeout: NodeJS.Timeout | undefined;
482
+ let unsubscribe = (): void => {};
483
+ const finish = (value: boolean): void => {
484
+ if (settled) return;
485
+ settled = true;
486
+ if (timeout) clearTimeout(timeout);
487
+ unsubscribe();
488
+ hostListeners.clear();
489
+ workerListeners.clear();
490
+ resolve(value);
491
+ };
492
+ unsubscribe = this.onMessage(msg => {
493
+ if (msg.type === "closed") finish(true);
494
+ });
495
+ this.send({ type: "close" });
496
+ timeout = setTimeout(() => finish(false), WORKER_CLOSE_TIMEOUT_MS);
497
+ return await closed;
498
+ },
437
499
  async terminate() {
438
500
  hostListeners.clear();
439
501
  workerListeners.clear();
@@ -24,6 +24,8 @@ export interface JsExecutorOptions {
24
24
  artifactPath?: string;
25
25
  artifactId?: string;
26
26
  session: ToolSession;
27
+ /** On-disk roots the helpers substitute for internal-URL schemes (e.g. `local://`). */
28
+ localRoots?: Record<string, string>;
27
29
  }
28
30
 
29
31
  export interface JsResult {
@@ -96,6 +98,7 @@ export async function executeJs(code: string, options: JsExecutorOptions): Promi
96
98
  sessionId: options.sessionId,
97
99
  cwd: options.cwd ?? options.session.cwd,
98
100
  session: options.session,
101
+ localRoots: options.localRoots,
99
102
  reset: options.reset,
100
103
  code,
101
104
  filename: `js-cell-${crypto.randomUUID()}.js`,
@@ -1,5 +1,10 @@
1
1
  import type { ToolSession } from "../../tools";
2
- import type { ExecutorBackend, ExecutorBackendExecOptions, ExecutorBackendResult } from "../backend";
2
+ import {
3
+ type ExecutorBackend,
4
+ type ExecutorBackendExecOptions,
5
+ type ExecutorBackendResult,
6
+ resolveEvalUrlRoots,
7
+ } from "../backend";
3
8
  import { executeJs } from "./executor";
4
9
 
5
10
  const JS_SESSION_PREFIX = "js:";
@@ -30,6 +35,7 @@ export default {
30
35
  onChunk: opts.onChunk,
31
36
  onStatus: opts.onStatus,
32
37
  session: opts.session,
38
+ localRoots: resolveEvalUrlRoots(opts.session),
33
39
  });
34
40
  return {
35
41
  output: result.output,