@oh-my-pi/pi-coding-agent 15.3.2 → 15.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. package/CHANGELOG.md +110 -0
  2. package/dist/types/cli/file-processor.d.ts +1 -1
  3. package/dist/types/config/settings-schema.d.ts +45 -3
  4. package/dist/types/config/settings.d.ts +1 -1
  5. package/dist/types/debug/raw-sse.d.ts +2 -0
  6. package/dist/types/edit/file-read-cache.d.ts +15 -4
  7. package/dist/types/edit/index.d.ts +3 -8
  8. package/dist/types/edit/renderer.d.ts +1 -2
  9. package/dist/types/eval/__tests__/shared-executors.test.d.ts +1 -0
  10. package/dist/types/eval/js/shared/local-module-loader.d.ts +16 -0
  11. package/dist/types/eval/js/shared/rewrite-imports.d.ts +4 -0
  12. package/dist/types/eval/js/shared/runtime.d.ts +14 -8
  13. package/dist/types/eval/py/executor.d.ts +1 -2
  14. package/dist/types/eval/py/kernel.d.ts +6 -0
  15. package/dist/types/eval/py/tool-bridge.d.ts +1 -5
  16. package/dist/types/eval/session-id.d.ts +3 -0
  17. package/dist/types/extensibility/extensions/types.d.ts +1 -3
  18. package/dist/types/hashline/anchors.d.ts +15 -9
  19. package/dist/types/hashline/constants.d.ts +0 -2
  20. package/dist/types/hashline/diff.d.ts +1 -2
  21. package/dist/types/hashline/executor.d.ts +52 -0
  22. package/dist/types/hashline/hash.d.ts +44 -93
  23. package/dist/types/hashline/index.d.ts +2 -1
  24. package/dist/types/hashline/input.d.ts +2 -9
  25. package/dist/types/hashline/recovery.d.ts +3 -9
  26. package/dist/types/hashline/tokenizer.d.ts +91 -0
  27. package/dist/types/hashline/types.d.ts +5 -7
  28. package/dist/types/modes/components/extensions/types.d.ts +0 -4
  29. package/dist/types/modes/types.d.ts +1 -0
  30. package/dist/types/modes/utils/ui-helpers.d.ts +1 -0
  31. package/dist/types/sdk.d.ts +2 -0
  32. package/dist/types/session/agent-session.d.ts +11 -15
  33. package/dist/types/session/agent-storage.d.ts +11 -10
  34. package/dist/types/slash-commands/acp-builtins.d.ts +3 -3
  35. package/dist/types/slash-commands/types.d.ts +0 -5
  36. package/dist/types/task/executor.d.ts +2 -0
  37. package/dist/types/tool-discovery/tool-index.d.ts +0 -50
  38. package/dist/types/tools/index.d.ts +2 -8
  39. package/dist/types/tools/match-line-format.d.ts +4 -4
  40. package/dist/types/tools/output-schema-validator.d.ts +64 -0
  41. package/dist/types/tools/review.d.ts +13 -0
  42. package/dist/types/tools/search-tool-bm25.d.ts +1 -1
  43. package/dist/types/tools/search.d.ts +4 -3
  44. package/dist/types/utils/edit-mode.d.ts +1 -1
  45. package/dist/types/web/kagi.d.ts +4 -2
  46. package/dist/types/web/parallel.d.ts +4 -3
  47. package/dist/types/web/scrapers/types.d.ts +2 -1
  48. package/dist/types/web/search/index.d.ts +12 -4
  49. package/dist/types/web/search/provider.d.ts +2 -1
  50. package/dist/types/web/search/providers/anthropic.d.ts +9 -4
  51. package/dist/types/web/search/providers/base.d.ts +34 -2
  52. package/dist/types/web/search/providers/brave.d.ts +8 -1
  53. package/dist/types/web/search/providers/codex.d.ts +13 -9
  54. package/dist/types/web/search/providers/exa.d.ts +10 -1
  55. package/dist/types/web/search/providers/gemini.d.ts +20 -23
  56. package/dist/types/web/search/providers/jina.d.ts +2 -1
  57. package/dist/types/web/search/providers/kagi.d.ts +4 -1
  58. package/dist/types/web/search/providers/kimi.d.ts +10 -1
  59. package/dist/types/web/search/providers/parallel.d.ts +3 -2
  60. package/dist/types/web/search/providers/perplexity.d.ts +5 -2
  61. package/dist/types/web/search/providers/searxng.d.ts +2 -1
  62. package/dist/types/web/search/providers/synthetic.d.ts +5 -8
  63. package/dist/types/web/search/providers/tavily.d.ts +11 -4
  64. package/dist/types/web/search/providers/utils.d.ts +8 -6
  65. package/dist/types/web/search/providers/zai.d.ts +12 -3
  66. package/package.json +7 -7
  67. package/src/cli/file-processor.ts +12 -2
  68. package/src/cli.ts +0 -8
  69. package/src/commands/commit.ts +8 -8
  70. package/src/config/prompt-templates.ts +6 -6
  71. package/src/config/settings-schema.ts +47 -3
  72. package/src/config/settings.ts +5 -5
  73. package/src/debug/raw-sse.ts +68 -3
  74. package/src/edit/file-read-cache.ts +68 -25
  75. package/src/edit/index.ts +6 -37
  76. package/src/edit/renderer.ts +9 -47
  77. package/src/edit/streaming.ts +43 -56
  78. package/src/eval/__tests__/shared-executors.test.ts +520 -0
  79. package/src/eval/js/context-manager.ts +64 -53
  80. package/src/eval/js/shared/local-module-loader.ts +265 -0
  81. package/src/eval/js/shared/prelude.txt +4 -0
  82. package/src/eval/js/shared/rewrite-imports.ts +85 -0
  83. package/src/eval/js/shared/runtime.ts +129 -86
  84. package/src/eval/js/worker-core.ts +23 -38
  85. package/src/eval/py/executor.ts +155 -84
  86. package/src/eval/py/kernel.ts +10 -1
  87. package/src/eval/py/prelude.py +22 -24
  88. package/src/eval/py/runner.py +203 -85
  89. package/src/eval/py/tool-bridge.ts +17 -10
  90. package/src/eval/session-id.ts +8 -0
  91. package/src/exec/bash-executor.ts +27 -16
  92. package/src/extensibility/extensions/runner.ts +0 -1
  93. package/src/extensibility/extensions/types.ts +1 -3
  94. package/src/hashline/anchors.ts +56 -65
  95. package/src/hashline/apply.ts +29 -31
  96. package/src/hashline/constants.ts +0 -3
  97. package/src/hashline/diff-preview.ts +4 -5
  98. package/src/hashline/diff.ts +30 -4
  99. package/src/hashline/execute.ts +91 -26
  100. package/src/hashline/executor.ts +239 -0
  101. package/src/hashline/grammar.lark +12 -10
  102. package/src/hashline/hash.ts +69 -114
  103. package/src/hashline/index.ts +2 -1
  104. package/src/hashline/input.ts +48 -41
  105. package/src/hashline/prefixes.ts +21 -11
  106. package/src/hashline/recovery.ts +63 -71
  107. package/src/hashline/stream.ts +2 -2
  108. package/src/hashline/tokenizer.ts +467 -0
  109. package/src/hashline/types.ts +6 -8
  110. package/src/internal-urls/docs-index.generated.ts +7 -7
  111. package/src/modes/components/extensions/types.ts +0 -5
  112. package/src/modes/components/session-observer-overlay.ts +11 -2
  113. package/src/modes/components/settings-selector.ts +10 -1
  114. package/src/modes/components/tree-selector.ts +10 -2
  115. package/src/modes/controllers/command-controller.ts +1 -3
  116. package/src/modes/controllers/extension-ui-controller.ts +10 -11
  117. package/src/modes/controllers/selector-controller.ts +5 -5
  118. package/src/modes/theme/theme.ts +4 -2
  119. package/src/modes/types.ts +4 -1
  120. package/src/modes/utils/ui-helpers.ts +4 -0
  121. package/src/prompts/agents/explore.md +1 -1
  122. package/src/prompts/tools/ast-edit.md +1 -1
  123. package/src/prompts/tools/ast-grep.md +1 -1
  124. package/src/prompts/tools/eval.md +1 -1
  125. package/src/prompts/tools/hashline.md +73 -94
  126. package/src/prompts/tools/read.md +4 -4
  127. package/src/prompts/tools/search.md +3 -3
  128. package/src/sdk.ts +33 -26
  129. package/src/session/agent-session.ts +59 -66
  130. package/src/session/agent-storage.ts +13 -14
  131. package/src/slash-commands/acp-builtins.ts +3 -3
  132. package/src/slash-commands/types.ts +0 -6
  133. package/src/task/executor.ts +26 -57
  134. package/src/task/index.ts +8 -4
  135. package/src/tool-discovery/tool-index.ts +0 -134
  136. package/src/tools/ast-edit.ts +36 -13
  137. package/src/tools/ast-grep.ts +45 -4
  138. package/src/tools/browser/tab-worker.ts +3 -2
  139. package/src/tools/eval.ts +2 -1
  140. package/src/tools/fetch.ts +23 -14
  141. package/src/tools/index.ts +2 -8
  142. package/src/tools/irc.ts +59 -5
  143. package/src/tools/match-line-format.ts +5 -7
  144. package/src/tools/output-schema-validator.ts +132 -0
  145. package/src/tools/read.ts +142 -31
  146. package/src/tools/review.ts +23 -0
  147. package/src/tools/search-tool-bm25.ts +3 -30
  148. package/src/tools/search.ts +48 -16
  149. package/src/tools/write.ts +3 -3
  150. package/src/tools/yield.ts +32 -41
  151. package/src/utils/edit-mode.ts +1 -2
  152. package/src/utils/file-mentions.ts +2 -2
  153. package/src/web/kagi.ts +15 -6
  154. package/src/web/parallel.ts +9 -6
  155. package/src/web/scrapers/types.ts +7 -1
  156. package/src/web/scrapers/youtube.ts +13 -7
  157. package/src/web/search/index.ts +37 -11
  158. package/src/web/search/provider.ts +5 -3
  159. package/src/web/search/providers/anthropic.ts +30 -21
  160. package/src/web/search/providers/base.ts +35 -2
  161. package/src/web/search/providers/brave.ts +4 -4
  162. package/src/web/search/providers/codex.ts +118 -89
  163. package/src/web/search/providers/exa.ts +3 -2
  164. package/src/web/search/providers/gemini.ts +58 -155
  165. package/src/web/search/providers/jina.ts +4 -4
  166. package/src/web/search/providers/kagi.ts +17 -11
  167. package/src/web/search/providers/kimi.ts +29 -13
  168. package/src/web/search/providers/parallel.ts +171 -23
  169. package/src/web/search/providers/perplexity.ts +38 -37
  170. package/src/web/search/providers/searxng.ts +3 -1
  171. package/src/web/search/providers/synthetic.ts +16 -19
  172. package/src/web/search/providers/tavily.ts +23 -18
  173. package/src/web/search/providers/utils.ts +11 -17
  174. package/src/web/search/providers/zai.ts +16 -8
  175. package/dist/types/hashline/parser.d.ts +0 -7
  176. package/dist/types/mcp/discoverable-tool-metadata.d.ts +0 -7
  177. package/dist/types/tools/vim.d.ts +0 -58
  178. package/dist/types/vim/buffer.d.ts +0 -41
  179. package/dist/types/vim/commands.d.ts +0 -6
  180. package/dist/types/vim/engine.d.ts +0 -47
  181. package/dist/types/vim/parser.d.ts +0 -3
  182. package/dist/types/vim/render.d.ts +0 -25
  183. package/dist/types/vim/types.d.ts +0 -182
  184. package/src/hashline/parser.ts +0 -246
  185. package/src/mcp/discoverable-tool-metadata.ts +0 -24
  186. package/src/prompts/tools/vim.md +0 -98
  187. package/src/tools/vim.ts +0 -949
  188. package/src/vim/buffer.ts +0 -309
  189. package/src/vim/commands.ts +0 -382
  190. package/src/vim/engine.ts +0 -2409
  191. package/src/vim/parser.ts +0 -134
  192. package/src/vim/render.ts +0 -252
  193. package/src/vim/types.ts +0 -197
@@ -0,0 +1,520 @@
1
+ import { afterAll, afterEach, describe, expect, it, vi } from "bun:test";
2
+ import * as fs from "node:fs/promises";
3
+ import * as path from "node:path";
4
+ import type { AssistantMessage } from "@oh-my-pi/pi-ai";
5
+ import { TempDir } from "@oh-my-pi/pi-utils";
6
+ import type { ModelRegistry } from "../../config/model-registry";
7
+ import { Settings } from "../../config/settings";
8
+ import type { LoadExtensionsResult } from "../../extensibility/extensions/types";
9
+ import type { CreateAgentSessionOptions, CreateAgentSessionResult } from "../../sdk";
10
+ import * as sdkModule from "../../sdk";
11
+ import type { AgentSession, AgentSessionEvent, PromptOptions } from "../../session/agent-session";
12
+ import { TaskTool } from "../../task";
13
+ import * as discoveryModule from "../../task/discovery";
14
+ import type { AgentDefinition, TaskParams } from "../../task/types";
15
+ import type { ToolSession } from "../../tools";
16
+ import { EventBus } from "../../utils/event-bus";
17
+ import { disposeAllVmContexts } from "../js/context-manager";
18
+ import { executeJs } from "../js/executor";
19
+ import { disposeAllKernelSessions, executePython } from "../py/executor";
20
+
21
+ function createToolSession(cwd: string, sessionFile: string | null, evalSessionId?: string): ToolSession {
22
+ const modelRegistry = {
23
+ authStorage: undefined,
24
+ refresh: async () => {},
25
+ getAvailable: () => [],
26
+ getApiKey: async () => null,
27
+ } as unknown as ModelRegistry;
28
+ return {
29
+ cwd,
30
+ hasUI: false,
31
+ settings: Settings.isolated({
32
+ "async.enabled": false,
33
+ "task.isolation.mode": "none",
34
+ }),
35
+ getSessionFile: () => sessionFile,
36
+ getSessionSpawns: () => "*",
37
+ getEvalSessionId: evalSessionId ? () => evalSessionId : undefined,
38
+ modelRegistry,
39
+ } as unknown as ToolSession;
40
+ }
41
+
42
+ function createBridgeToolSession(resultText: string, calls: unknown[]): ToolSession {
43
+ const readTool = {
44
+ name: "read",
45
+ label: "read",
46
+ description: "read",
47
+ parameters: { type: "object" },
48
+ async execute(_id: string, args: unknown) {
49
+ calls.push(args);
50
+ return { content: [{ type: "text" as const, text: resultText }] };
51
+ },
52
+ };
53
+ const tools = new Map<string, unknown>([["read", readTool]]);
54
+ return { getToolByName: (name: string) => tools.get(name) } as unknown as ToolSession;
55
+ }
56
+
57
+ function assistantStopMessage(text: string): AssistantMessage {
58
+ return {
59
+ role: "assistant",
60
+ content: text ? [{ type: "text", text }] : [],
61
+ api: "openai-responses",
62
+ provider: "openai",
63
+ model: "mock",
64
+ usage: {
65
+ input: 0,
66
+ output: 0,
67
+ cacheRead: 0,
68
+ cacheWrite: 0,
69
+ totalTokens: 0,
70
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
71
+ },
72
+ stopReason: "stop",
73
+ timestamp: Date.now(),
74
+ };
75
+ }
76
+
77
+ function createYieldingSubagentSession(onPrompt: () => Promise<void>): AgentSession {
78
+ const listeners: Array<(event: AgentSessionEvent) => void> = [];
79
+ const state = { messages: [] as AssistantMessage[] };
80
+ const emit = (event: AgentSessionEvent) => {
81
+ for (const listener of listeners) listener(event);
82
+ };
83
+ return {
84
+ state,
85
+ agent: { state: { systemPrompt: ["test"] } },
86
+ model: undefined,
87
+ extensionRunner: undefined,
88
+ sessionManager: {
89
+ appendSessionInit: () => {},
90
+ },
91
+ getActiveToolNames: () => ["eval", "yield"],
92
+ setActiveToolsByName: async () => {},
93
+ subscribe: (listener: (event: AgentSessionEvent) => void) => {
94
+ listeners.push(listener);
95
+ return () => {
96
+ const index = listeners.indexOf(listener);
97
+ if (index >= 0) listeners.splice(index, 1);
98
+ };
99
+ },
100
+ prompt: async (_text: string, _options?: PromptOptions) => {
101
+ await onPrompt();
102
+ state.messages.push(assistantStopMessage("done"));
103
+ emit({
104
+ type: "tool_execution_end",
105
+ toolCallId: "yield-call",
106
+ toolName: "yield",
107
+ result: {
108
+ content: [{ type: "text", text: "Result submitted." }],
109
+ details: { status: "success", data: { ok: true } },
110
+ },
111
+ isError: false,
112
+ });
113
+ },
114
+ waitForIdle: async () => {},
115
+ getLastAssistantMessage: () => state.messages[state.messages.length - 1],
116
+ abort: async () => {},
117
+ dispose: async () => {},
118
+ } as unknown as AgentSession;
119
+ }
120
+
121
+ const taskAgent: AgentDefinition = {
122
+ name: "task",
123
+ description: "Task agent",
124
+ systemPrompt: "Read eval state and yield.",
125
+ source: "bundled",
126
+ tools: ["eval", "yield"],
127
+ };
128
+
129
+ const taskParams: TaskParams = {
130
+ agent: "task",
131
+ tasks: [{ id: "ReadEval", description: "Read eval state", assignment: "Read parent eval state." }],
132
+ };
133
+
134
+ describe("shared eval executors", () => {
135
+ afterEach(() => {
136
+ vi.restoreAllMocks();
137
+ });
138
+
139
+ afterAll(async () => {
140
+ await disposeAllVmContexts();
141
+ await disposeAllKernelSessions();
142
+ });
143
+
144
+ it("shares JavaScript state across executeJs calls with one session id", async () => {
145
+ using tempDir = TempDir.createSync("@omp-eval-js-shared-");
146
+ const sessionFile = path.join(tempDir.path(), "session.jsonl");
147
+ const sessionId = `js-shared:${crypto.randomUUID()}`;
148
+ const session = createToolSession(tempDir.path(), sessionFile);
149
+
150
+ await executeJs("globalThis.x = 41;", { sessionId, session, sessionFile });
151
+ const result = await executeJs("return globalThis.x + 1;", { sessionId, session, sessionFile });
152
+
153
+ expect(result.exitCode).toBe(0);
154
+ expect(result.output.trim()).toBe("42");
155
+ });
156
+
157
+ it("shares Python state across executePython calls with one session id", async () => {
158
+ using tempDir = TempDir.createSync("@omp-eval-py-shared-");
159
+ const sessionFile = path.join(tempDir.path(), "session.jsonl");
160
+ const sessionId = `py-shared:${crypto.randomUUID()}`;
161
+
162
+ await executePython("x = 41", { cwd: tempDir.path(), sessionId, sessionFile });
163
+ const result = await executePython("print(x + 1)", { cwd: tempDir.path(), sessionId, sessionFile });
164
+
165
+ expect(result.exitCode).toBe(0);
166
+ expect(result.output.trim()).toBe("42");
167
+ });
168
+
169
+ it("deduplicates concurrent first JavaScript session acquisition", async () => {
170
+ using tempDir = TempDir.createSync("@omp-eval-js-cold-start-");
171
+ const sessionFile = path.join(tempDir.path(), "session.jsonl");
172
+ const sessionId = `js-cold-start:${crypto.randomUUID()}`;
173
+ const session = createToolSession(tempDir.path(), sessionFile);
174
+
175
+ const [first, second] = await Promise.all([
176
+ executeJs(
177
+ "globalThis.sharedMarker ??= crypto.randomUUID(); await Bun.sleep(50); return globalThis.sharedMarker;",
178
+ {
179
+ sessionId,
180
+ session,
181
+ sessionFile,
182
+ },
183
+ ),
184
+ executeJs("globalThis.sharedMarker ??= crypto.randomUUID(); return globalThis.sharedMarker;", {
185
+ sessionId,
186
+ session,
187
+ sessionFile,
188
+ }),
189
+ ]);
190
+ const third = await executeJs("return globalThis.sharedMarker;", { sessionId, session, sessionFile });
191
+
192
+ expect(first.exitCode).toBe(0);
193
+ expect(second.exitCode).toBe(0);
194
+ expect(third.exitCode).toBe(0);
195
+ expect(first.output.trim()).toBe(second.output.trim());
196
+ expect(third.output.trim()).toBe(first.output.trim());
197
+ });
198
+
199
+ it("deduplicates concurrent first Python session acquisition", async () => {
200
+ using tempDir = TempDir.createSync("@omp-eval-py-cold-start-");
201
+ const sessionFile = path.join(tempDir.path(), "session.jsonl");
202
+ const sessionId = `py-cold-start:${crypto.randomUUID()}`;
203
+
204
+ const [first, second] = await Promise.all([
205
+ executePython(
206
+ `import asyncio, uuid
207
+ shared_marker = globals().get("shared_marker") or str(uuid.uuid4())
208
+ globals()["shared_marker"] = shared_marker
209
+ await asyncio.sleep(0.05)
210
+ print(shared_marker)`,
211
+ { cwd: tempDir.path(), sessionId, sessionFile },
212
+ ),
213
+ executePython(
214
+ `import uuid
215
+ shared_marker = globals().get("shared_marker") or str(uuid.uuid4())
216
+ globals()["shared_marker"] = shared_marker
217
+ print(shared_marker)`,
218
+ { cwd: tempDir.path(), sessionId, sessionFile },
219
+ ),
220
+ ]);
221
+ const third = await executePython("print(shared_marker)", { cwd: tempDir.path(), sessionId, sessionFile });
222
+
223
+ expect(first.exitCode).toBe(0);
224
+ expect(second.exitCode).toBe(0);
225
+ expect(third.exitCode).toBe(0);
226
+ expect(first.output.trim()).toBe(second.output.trim());
227
+ expect(third.output.trim()).toBe(first.output.trim());
228
+ });
229
+
230
+ it("splits retained Python kernels by cwd for one shared session id", async () => {
231
+ using tempDir = TempDir.createSync("@omp-eval-py-cwd-");
232
+ const dirA = path.join(tempDir.path(), "a");
233
+ const dirB = path.join(tempDir.path(), "b");
234
+ await fs.mkdir(dirA);
235
+ await fs.mkdir(dirB);
236
+ const realDirA = await fs.realpath(dirA);
237
+ const realDirB = await fs.realpath(dirB);
238
+ const sessionFile = path.join(tempDir.path(), "session.jsonl");
239
+ const sessionId = `py-cwd:${crypto.randomUUID()}`;
240
+
241
+ const first = await executePython(
242
+ `import os
243
+ token = "from-a"
244
+ print(os.getcwd())`,
245
+ {
246
+ cwd: dirA,
247
+ sessionId,
248
+ sessionFile,
249
+ },
250
+ );
251
+ const second = await executePython(
252
+ `import os
253
+ print(os.getcwd())
254
+ print("token" in globals())`,
255
+ {
256
+ cwd: dirB,
257
+ sessionId,
258
+ sessionFile,
259
+ },
260
+ );
261
+ const third = await executePython("print(token)", { cwd: dirA, sessionId, sessionFile });
262
+
263
+ expect(first.exitCode).toBe(0);
264
+ expect(first.output.trim()).toBe(realDirA);
265
+ expect(second.exitCode).toBe(0);
266
+ expect(second.output.trim().split("\n")).toEqual([realDirB, "False"]);
267
+ expect(third.exitCode).toBe(0);
268
+ expect(third.output.trim()).toBe("from-a");
269
+ });
270
+
271
+ it("interrupts timed out synchronous Python cells before they mutate shared state", async () => {
272
+ using tempDir = TempDir.createSync("@omp-eval-py-sync-timeout-");
273
+ const sessionFile = path.join(tempDir.path(), "session.jsonl");
274
+ const sessionId = `py-sync-timeout:${crypto.randomUUID()}`;
275
+
276
+ const timedOut = await executePython("import time\ntime.sleep(0.2)\nleaked_after_timeout = True", {
277
+ cwd: tempDir.path(),
278
+ sessionId,
279
+ sessionFile,
280
+ timeoutMs: 20,
281
+ });
282
+ await Bun.sleep(250);
283
+ const probe = await executePython('print("leaked_after_timeout" in globals())', {
284
+ cwd: tempDir.path(),
285
+ sessionId,
286
+ sessionFile,
287
+ });
288
+
289
+ expect(timedOut.cancelled).toBe(true);
290
+ expect(probe.exitCode).toBe(0);
291
+ expect(probe.output.trim()).toBe("False");
292
+ });
293
+
294
+ it("settles Python cells that raise SystemExit", async () => {
295
+ using tempDir = TempDir.createSync("@omp-eval-py-system-exit-");
296
+ const sessionFile = path.join(tempDir.path(), "session.jsonl");
297
+ const sessionId = `py-system-exit:${crypto.randomUUID()}`;
298
+
299
+ const result = await executePython('raise SystemExit("bye")', {
300
+ cwd: tempDir.path(),
301
+ sessionId,
302
+ sessionFile,
303
+ timeoutMs: 500,
304
+ });
305
+
306
+ expect(result.exitCode).toBe(1);
307
+ expect(result.output).toContain("SystemExit");
308
+ expect(result.output).toContain("bye");
309
+ });
310
+
311
+ it("lets a subagent inherit parent JavaScript and Python eval state", async () => {
312
+ using tempDir = TempDir.createSync("@omp-eval-subagent-");
313
+ const sessionFile = path.join(tempDir.path(), "session.jsonl");
314
+ const evalSessionId = `session:${sessionFile}:cwd:${tempDir.path()}`;
315
+ const parentSession = createToolSession(tempDir.path(), sessionFile, evalSessionId);
316
+ let seenJs = "";
317
+ let seenPy = "";
318
+ let capturedOptions: CreateAgentSessionOptions | undefined;
319
+
320
+ await executeJs('globalThis.parentSecret = "hello-js";', {
321
+ sessionId: `js:${evalSessionId}`,
322
+ session: parentSession,
323
+ sessionFile,
324
+ });
325
+ await executePython('parent_secret = "hello-py"', {
326
+ cwd: tempDir.path(),
327
+ sessionId: `python:${evalSessionId}`,
328
+ sessionFile,
329
+ });
330
+
331
+ vi.spyOn(discoveryModule, "discoverAgents").mockResolvedValue({ agents: [taskAgent], projectAgentsDir: null });
332
+ vi.spyOn(sdkModule, "createAgentSession").mockImplementation(async (options = {}) => {
333
+ capturedOptions = options;
334
+ const inherited = options.parentEvalSessionId;
335
+ if (!inherited) throw new Error("Missing parent eval session id");
336
+ return {
337
+ session: createYieldingSubagentSession(async () => {
338
+ const jsResult = await executeJs("return globalThis.parentSecret;", {
339
+ sessionId: `js:${inherited}`,
340
+ session: parentSession,
341
+ sessionFile,
342
+ });
343
+ const pyResult = await executePython("print(parent_secret)", {
344
+ cwd: tempDir.path(),
345
+ sessionId: `python:${inherited}`,
346
+ sessionFile,
347
+ });
348
+ seenJs = jsResult.output.trim();
349
+ seenPy = pyResult.output.trim();
350
+ }),
351
+ extensionsResult: {} as unknown as LoadExtensionsResult,
352
+ setToolUIContext: () => {},
353
+ eventBus: new EventBus(),
354
+ } satisfies CreateAgentSessionResult;
355
+ });
356
+
357
+ const tool = await TaskTool.create(parentSession);
358
+ await tool.execute("tool-call", taskParams);
359
+
360
+ expect(capturedOptions?.parentEvalSessionId).toBe(evalSessionId);
361
+ expect(seenJs).toBe("hello-js");
362
+ expect(seenPy).toBe("hello-py");
363
+ });
364
+
365
+ it("routes interleaved JavaScript display output to the matching run", async () => {
366
+ using tempDir = TempDir.createSync("@omp-eval-js-interleave-");
367
+ const sessionFile = path.join(tempDir.path(), "session.jsonl");
368
+ const sessionId = `js-interleave:${crypto.randomUUID()}`;
369
+ const session = createToolSession(tempDir.path(), sessionFile);
370
+
371
+ const first = executeJs('await Bun.sleep(80); display({ label: "A" });', {
372
+ sessionId,
373
+ session,
374
+ sessionFile,
375
+ });
376
+ await Bun.sleep(10);
377
+ const second = executeJs('display({ label: "B" });', {
378
+ sessionId,
379
+ session,
380
+ sessionFile,
381
+ });
382
+
383
+ const [firstResult, secondResult] = await Promise.all([first, second]);
384
+ expect(firstResult.exitCode).toBe(0);
385
+ expect(secondResult.exitCode).toBe(0);
386
+ expect(firstResult.displayOutputs).toEqual([{ type: "json", data: { label: "A" } }]);
387
+ expect(secondResult.displayOutputs).toEqual([{ type: "json", data: { label: "B" } }]);
388
+ });
389
+
390
+ it("routes interleaved Python display output to the matching run", async () => {
391
+ using tempDir = TempDir.createSync("@omp-eval-py-interleave-");
392
+ const sessionFile = path.join(tempDir.path(), "session.jsonl");
393
+ const sessionId = `py-interleave:${crypto.randomUUID()}`;
394
+
395
+ const first = executePython(
396
+ `import asyncio
397
+ await asyncio.sleep(0.08)
398
+ display({"label": "A"})`,
399
+ {
400
+ cwd: tempDir.path(),
401
+ sessionId,
402
+ sessionFile,
403
+ },
404
+ );
405
+ await Bun.sleep(10);
406
+ const second = executePython('display({"label": "B"})', {
407
+ cwd: tempDir.path(),
408
+ sessionId,
409
+ sessionFile,
410
+ });
411
+
412
+ const [firstResult, secondResult] = await Promise.all([first, second]);
413
+ expect(firstResult.exitCode).toBe(0);
414
+ expect(secondResult.exitCode).toBe(0);
415
+ expect(firstResult.displayOutputs).toEqual([{ type: "json", data: { label: "A" } }]);
416
+ expect(secondResult.displayOutputs).toEqual([{ type: "json", data: { label: "B" } }]);
417
+ });
418
+ it("preserves module-level singleton state across re-imports of an unchanged file", async () => {
419
+ using tempDir = TempDir.createSync("@omp-eval-js-mtime-");
420
+ const sessionFile = path.join(tempDir.path(), "session.jsonl");
421
+ const sessionId = `js-mtime:${crypto.randomUUID()}`;
422
+ const session = createToolSession(tempDir.path(), sessionFile);
423
+ const modulePath = path.join(tempDir.path(), "singleton.ts");
424
+ const moduleSpec = JSON.stringify(modulePath);
425
+ await Bun.write(
426
+ modulePath,
427
+ "let value = 0;\nexport function set(v) { value = v; }\nexport function get() { return value; }\n",
428
+ );
429
+
430
+ const initResult = await executeJs(`const mod = await import(${moduleSpec}); mod.set(42); return mod.get();`, {
431
+ sessionId,
432
+ session,
433
+ sessionFile,
434
+ });
435
+ expect(initResult.exitCode).toBe(0);
436
+ expect(initResult.output.trim()).toBe("42");
437
+
438
+ // Unchanged file: re-import must reuse the existing module namespace so the
439
+ // counter is still 42. This is the regression — the previous unconditional
440
+ // `delete require.cache[target]` reset singletons on every dynamic import.
441
+ const reuseResult = await executeJs(`const mod = await import(${moduleSpec}); return mod.get();`, {
442
+ sessionId,
443
+ session,
444
+ sessionFile,
445
+ });
446
+ expect(reuseResult.exitCode).toBe(0);
447
+ expect(reuseResult.output.trim()).toBe("42");
448
+
449
+ // Bump mtime by 5s to simulate an edit; the next import must evict the cache
450
+ // and re-evaluate the file, dropping the counter back to its initializer.
451
+ const future = new Date(Date.now() + 5_000);
452
+ await fs.utimes(modulePath, future, future);
453
+
454
+ const reloadResult = await executeJs(`const mod = await import(${moduleSpec}); return mod.get();`, {
455
+ sessionId,
456
+ session,
457
+ sessionFile,
458
+ });
459
+ expect(reloadResult.exitCode).toBe(0);
460
+ expect(reloadResult.output.trim()).toBe("0");
461
+ });
462
+
463
+ it("reloads a local re-export when a transitive dependency changes", async () => {
464
+ using tempDir = TempDir.createSync("@omp-eval-js-transitive-");
465
+ const sessionFile = path.join(tempDir.path(), "session.jsonl");
466
+ const sessionId = `js-transitive:${crypto.randomUUID()}`;
467
+ const session = createToolSession(tempDir.path(), sessionFile);
468
+ const leafPath = path.join(tempDir.path(), "leaf.ts");
469
+ const entryPath = path.join(tempDir.path(), "entry.ts");
470
+ const entrySpec = JSON.stringify(entryPath);
471
+ await Bun.write(leafPath, "export const value = 1;\n");
472
+ await Bun.write(entryPath, 'export { value } from "./leaf.ts";\n');
473
+
474
+ const initial = await executeJs(`const mod = await import(${entrySpec}); return mod.value;`, {
475
+ sessionId,
476
+ session,
477
+ sessionFile,
478
+ });
479
+ expect(initial.exitCode).toBe(0);
480
+ expect(initial.output.trim()).toBe("1");
481
+
482
+ await Bun.write(leafPath, "export const value = 2;\n");
483
+ const future = new Date(Date.now() + 5_000);
484
+ await fs.utimes(leafPath, future, future);
485
+
486
+ const reloaded = await executeJs(`const mod = await import(${entrySpec}); return mod.value;`, {
487
+ sessionId,
488
+ session,
489
+ sessionFile,
490
+ });
491
+ expect(reloaded.exitCode).toBe(0);
492
+ expect(reloaded.output.trim()).toBe("2");
493
+ });
494
+
495
+ it("refreshes the Python tool proxy when bridge env appears after kernel warm-up", async () => {
496
+ using tempDir = TempDir.createSync("@omp-eval-py-tool-proxy-");
497
+ const sessionFile = path.join(tempDir.path(), "session.jsonl");
498
+ const sessionId = `py-tool-proxy:${crypto.randomUUID()}`;
499
+ const bridgeCalls: unknown[] = [];
500
+ const bridgeSession = createBridgeToolSession("bridge-ok", bridgeCalls);
501
+
502
+ const withoutBridge = await executePython(
503
+ 'try:\n print(tool.read({"path": "foo.txt"}))\nexcept Exception as exc:\n print(type(exc).__name__)\n print(str(exc))',
504
+ { cwd: tempDir.path(), sessionId, sessionFile },
505
+ );
506
+ const withBridge = await executePython('print(tool.read({"path": "foo.txt"}))', {
507
+ cwd: tempDir.path(),
508
+ sessionId,
509
+ sessionFile,
510
+ toolSession: bridgeSession,
511
+ });
512
+
513
+ expect(withoutBridge.exitCode).toBe(0);
514
+ expect(withoutBridge.output).toContain("RuntimeError");
515
+ expect(withoutBridge.output).toContain("tool bridge is unavailable");
516
+ expect(withBridge.exitCode).toBe(0);
517
+ expect(withBridge.output.trim()).toBe("bridge-ok");
518
+ expect(bridgeCalls).toEqual([{ path: "foo.txt", _i: "py prelude" }]);
519
+ });
520
+ });
@@ -48,10 +48,11 @@ interface JsSession {
48
48
  worker: WorkerHandle;
49
49
  state: "alive" | "dead";
50
50
  pending: Map<string, PendingRun>;
51
- queue: Promise<void>;
52
51
  }
53
52
 
54
53
  const sessions = new Map<string, JsSession>();
54
+ const startingSessions = new Map<string, Promise<JsSession>>();
55
+ const resettingSessions = new Set<string>();
55
56
  const READY_TIMEOUT_MS_DEFAULT = 5_000;
56
57
 
57
58
  export async function executeInVmContext(options: {
@@ -66,45 +67,46 @@ export async function executeInVmContext(options: {
66
67
  runState: VmRunState;
67
68
  }): Promise<{ value: unknown }> {
68
69
  if (options.reset) {
69
- await resetVmContext(options.sessionKey);
70
+ if (resettingSessions.has(options.sessionKey)) {
71
+ throw new ToolError("JS context reset already in progress");
72
+ }
73
+ resettingSessions.add(options.sessionKey);
74
+ try {
75
+ await resetVmContext(options.sessionKey);
76
+ } finally {
77
+ resettingSessions.delete(options.sessionKey);
78
+ }
79
+ } else if (resettingSessions.has(options.sessionKey)) {
80
+ throw new ToolError("JS context reset in progress");
70
81
  }
71
82
  const session = await acquireSession(
72
83
  options.sessionKey,
73
84
  { cwd: options.cwd, sessionId: options.sessionId },
74
85
  options.timeoutMs,
75
86
  );
76
- return await runQueued(session, () => runOnce(session, options));
87
+ return await runOnce(session, options);
77
88
  }
78
89
 
79
90
  export async function resetVmContext(sessionKey: string): Promise<void> {
80
- const session = sessions.get(sessionKey);
91
+ const session = sessions.get(sessionKey) ?? (await startingSessions.get(sessionKey)?.catch(() => undefined));
81
92
  if (!session) return;
82
93
  sessions.delete(sessionKey);
83
94
  await killSession(session, new ToolError("JS context reset"));
84
95
  }
85
96
 
86
97
  export async function disposeAllVmContexts(): Promise<void> {
98
+ const pending = [...startingSessions.values()];
99
+ startingSessions.clear();
100
+ const started = await Promise.allSettled(pending);
87
101
  const all = [...sessions.values()];
102
+ for (const result of started) {
103
+ if (result.status !== "fulfilled") continue;
104
+ if (!all.includes(result.value)) all.push(result.value);
105
+ }
88
106
  sessions.clear();
89
107
  await Promise.all(all.map(session => killSession(session, new ToolError("JS context disposed"))));
90
108
  }
91
109
 
92
- async function runQueued<T>(session: JsSession, work: () => Promise<T>): Promise<T> {
93
- const previous = session.queue;
94
- const { promise, resolve } = Promise.withResolvers<void>();
95
- session.queue = promise;
96
- try {
97
- await previous;
98
- } catch {
99
- // Previous run's failure must not poison this one.
100
- }
101
- try {
102
- return await work();
103
- } finally {
104
- resolve();
105
- }
106
- }
107
-
108
110
  async function runOnce(
109
111
  session: JsSession,
110
112
  options: {
@@ -162,43 +164,52 @@ async function runOnce(
162
164
  async function acquireSession(sessionKey: string, snapshot: SessionSnapshot, timeoutMs?: number): Promise<JsSession> {
163
165
  const existing = sessions.get(sessionKey);
164
166
  if (existing && existing.state === "alive") return existing;
167
+ const starting = startingSessions.get(sessionKey);
168
+ if (starting) return await starting;
165
169
 
166
- const worker = await spawnJsWorker();
167
- const session: JsSession = {
168
- sessionKey,
169
- worker,
170
- state: "alive",
171
- pending: new Map(),
172
- queue: Promise.resolve(),
173
- };
174
- const { promise: readyPromise, resolve: resolveReady, reject: rejectReady } = Promise.withResolvers<void>();
175
- let resolved = false;
176
- const unsubscribe = worker.onMessage(msg => {
177
- if (!resolved && msg.type === "ready") {
178
- resolved = true;
179
- resolveReady();
180
- return;
181
- }
182
- if (!resolved && msg.type === "init-failed") {
183
- resolved = true;
184
- rejectReady(errorFromPayload(msg.error));
185
- return;
170
+ const startup = (async (): Promise<JsSession> => {
171
+ const worker = await spawnJsWorker();
172
+ const session: JsSession = {
173
+ sessionKey,
174
+ worker,
175
+ state: "alive",
176
+ pending: new Map(),
177
+ };
178
+ const { promise: readyPromise, resolve: resolveReady, reject: rejectReady } = Promise.withResolvers<void>();
179
+ let resolved = false;
180
+ const unsubscribe = worker.onMessage(msg => {
181
+ if (!resolved && msg.type === "ready") {
182
+ resolved = true;
183
+ resolveReady();
184
+ return;
185
+ }
186
+ if (!resolved && msg.type === "init-failed") {
187
+ resolved = true;
188
+ rejectReady(errorFromPayload(msg.error));
189
+ return;
190
+ }
191
+ handleSessionMessage(session, msg);
192
+ });
193
+ try {
194
+ // Cold-start can exceed 5s on slow hosts. Let the caller's per-cell timeout dominate so
195
+ // users can grant more headroom when they raise `timeout` on a cell.
196
+ const readyTimeoutMs = Math.max(READY_TIMEOUT_MS_DEFAULT, timeoutMs ?? 0);
197
+ await raceWithTimeout(readyPromise, readyTimeoutMs, "Timed out initializing JS eval worker");
198
+ worker.send({ type: "init", snapshot });
199
+ sessions.set(sessionKey, session);
200
+ return session;
201
+ } catch (error) {
202
+ unsubscribe();
203
+ await worker.terminate().catch(() => undefined);
204
+ throw error;
186
205
  }
187
- handleSessionMessage(session, msg);
188
- });
206
+ })();
207
+ startingSessions.set(sessionKey, startup);
189
208
  try {
190
- // Cold-start can exceed 5s on slow hosts. Let the caller's per-cell timeout dominate so
191
- // users can grant more headroom when they raise `timeout` on a cell.
192
- const readyTimeoutMs = Math.max(READY_TIMEOUT_MS_DEFAULT, timeoutMs ?? 0);
193
- await raceWithTimeout(readyPromise, readyTimeoutMs, "Timed out initializing JS eval worker");
194
- } catch (error) {
195
- unsubscribe();
196
- await worker.terminate().catch(() => undefined);
197
- throw error;
209
+ return await startup;
210
+ } finally {
211
+ if (startingSessions.get(sessionKey) === startup) startingSessions.delete(sessionKey);
198
212
  }
199
- worker.send({ type: "init", snapshot });
200
- sessions.set(sessionKey, session);
201
- return session;
202
213
  }
203
214
 
204
215
  function handleSessionMessage(session: JsSession, msg: WorkerOutbound): void {