@oh-my-pi/pi-coding-agent 15.10.0 → 15.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. package/CHANGELOG.md +142 -1
  2. package/dist/types/cli/dry-balance-cli.d.ts +15 -1
  3. package/dist/types/cli/startup-cwd.d.ts +2 -0
  4. package/dist/types/commands/launch.d.ts +3 -0
  5. package/dist/types/commit/analysis/conventional.d.ts +2 -2
  6. package/dist/types/commit/analysis/summary.d.ts +2 -2
  7. package/dist/types/commit/changelog/generate.d.ts +2 -2
  8. package/dist/types/commit/changelog/index.d.ts +2 -2
  9. package/dist/types/commit/map-reduce/index.d.ts +3 -3
  10. package/dist/types/commit/map-reduce/map-phase.d.ts +2 -2
  11. package/dist/types/commit/map-reduce/reduce-phase.d.ts +2 -2
  12. package/dist/types/commit/model-selection.d.ts +10 -4
  13. package/dist/types/config/api-key-resolver.d.ts +34 -0
  14. package/dist/types/config/keybindings.d.ts +2 -2
  15. package/dist/types/config/model-provider-priority.d.ts +1 -0
  16. package/dist/types/config/model-registry.d.ts +17 -1
  17. package/dist/types/config/model-resolver.d.ts +4 -1
  18. package/dist/types/config/settings-schema.d.ts +9 -0
  19. package/dist/types/config/settings.d.ts +7 -2
  20. package/dist/types/dap/config.d.ts +14 -1
  21. package/dist/types/dap/types.d.ts +10 -0
  22. package/dist/types/debug/report-bundle.d.ts +3 -0
  23. package/dist/types/edit/file-snapshot-store.d.ts +18 -10
  24. package/dist/types/eval/py/__tests__/prelude.test.d.ts +1 -0
  25. package/dist/types/extensibility/extensions/types.d.ts +4 -1
  26. package/dist/types/lsp/client.d.ts +10 -0
  27. package/dist/types/lsp/utils.d.ts +3 -2
  28. package/dist/types/main.d.ts +3 -9
  29. package/dist/types/mcp/tool-bridge.d.ts +2 -0
  30. package/dist/types/modes/components/chat-block.d.ts +64 -0
  31. package/dist/types/modes/components/custom-editor.d.ts +4 -1
  32. package/dist/types/modes/components/overlay-box.d.ts +17 -0
  33. package/dist/types/modes/components/plan-review-overlay.d.ts +59 -0
  34. package/dist/types/modes/components/plan-toc.d.ts +41 -0
  35. package/dist/types/modes/components/read-tool-group.d.ts +2 -0
  36. package/dist/types/modes/components/status-line.d.ts +2 -0
  37. package/dist/types/modes/components/transcript-container.d.ts +11 -0
  38. package/dist/types/modes/controllers/command-controller.d.ts +1 -0
  39. package/dist/types/modes/controllers/event-controller.d.ts +17 -1
  40. package/dist/types/modes/controllers/extension-ui-controller.d.ts +0 -1
  41. package/dist/types/modes/controllers/input-controller.d.ts +1 -1
  42. package/dist/types/modes/controllers/streaming-reveal.d.ts +22 -0
  43. package/dist/types/modes/controllers/tan-command-controller.d.ts +6 -0
  44. package/dist/types/modes/interactive-mode.d.ts +16 -5
  45. package/dist/types/modes/magic-keywords.d.ts +1 -1
  46. package/dist/types/modes/markdown-prose.d.ts +1 -1
  47. package/dist/types/modes/theme/theme.d.ts +1 -1
  48. package/dist/types/modes/types.d.ts +21 -5
  49. package/dist/types/modes/utils/copy-targets.d.ts +21 -1
  50. package/dist/types/modes/workflow.d.ts +3 -3
  51. package/dist/types/plan-mode/approved-plan.d.ts +27 -8
  52. package/dist/types/plan-mode/plan-protection.d.ts +4 -4
  53. package/dist/types/sdk.d.ts +2 -0
  54. package/dist/types/session/agent-session.d.ts +21 -0
  55. package/dist/types/session/auth-storage.d.ts +1 -1
  56. package/dist/types/session/messages.d.ts +12 -0
  57. package/dist/types/session/session-manager.d.ts +8 -3
  58. package/dist/types/slash-commands/types.d.ts +4 -6
  59. package/dist/types/task/executor.d.ts +17 -0
  60. package/dist/types/task/index.d.ts +1 -0
  61. package/dist/types/task/render.d.ts +3 -2
  62. package/dist/types/tools/archive-reader.d.ts +5 -0
  63. package/dist/types/tools/ast-edit.d.ts +3 -0
  64. package/dist/types/tools/ast-grep.d.ts +3 -0
  65. package/dist/types/tools/bash.d.ts +1 -0
  66. package/dist/types/tools/eval.d.ts +8 -0
  67. package/dist/types/tools/find.d.ts +8 -4
  68. package/dist/types/tools/gh-cache-invalidation.d.ts +6 -0
  69. package/dist/types/tools/github-cache.d.ts +12 -0
  70. package/dist/types/tools/grouped-file-output.d.ts +95 -12
  71. package/dist/types/tools/memory-render.d.ts +4 -1
  72. package/dist/types/tools/path-utils.d.ts +8 -0
  73. package/dist/types/tools/plan-mode-guard.d.ts +8 -9
  74. package/dist/types/tools/render-utils.d.ts +5 -9
  75. package/dist/types/tools/search.d.ts +6 -2
  76. package/dist/types/tools/sqlite-reader.d.ts +1 -0
  77. package/dist/types/tools/todo.d.ts +3 -2
  78. package/dist/types/tools/write.d.ts +3 -0
  79. package/dist/types/tools/yield.d.ts +8 -0
  80. package/dist/types/tui/output-block.d.ts +16 -4
  81. package/dist/types/tui/status-line.d.ts +3 -0
  82. package/dist/types/utils/enhanced-paste.d.ts +20 -0
  83. package/dist/types/web/search/providers/kimi.d.ts +1 -1
  84. package/package.json +9 -9
  85. package/src/auto-thinking/classifier.ts +5 -1
  86. package/src/cli/args.ts +3 -1
  87. package/src/cli/dry-balance-cli.ts +54 -21
  88. package/src/cli/gallery-cli.ts +4 -1
  89. package/src/cli/gallery-fixtures/misc.ts +29 -0
  90. package/src/cli/startup-cwd.ts +68 -0
  91. package/src/commands/launch.ts +3 -0
  92. package/src/commit/analysis/conventional.ts +2 -2
  93. package/src/commit/analysis/summary.ts +2 -2
  94. package/src/commit/changelog/generate.ts +2 -2
  95. package/src/commit/changelog/index.ts +2 -2
  96. package/src/commit/map-reduce/index.ts +3 -3
  97. package/src/commit/map-reduce/map-phase.ts +2 -2
  98. package/src/commit/map-reduce/reduce-phase.ts +2 -2
  99. package/src/commit/model-selection.ts +36 -11
  100. package/src/commit/pipeline.ts +4 -4
  101. package/src/config/api-key-resolver.ts +58 -0
  102. package/src/config/model-provider-priority.ts +55 -0
  103. package/src/config/model-registry.ts +29 -24
  104. package/src/config/model-resolver.ts +39 -7
  105. package/src/config/settings-schema.ts +10 -0
  106. package/src/config/settings.ts +106 -43
  107. package/src/dap/config.ts +41 -2
  108. package/src/dap/defaults.json +1 -0
  109. package/src/dap/session.ts +1 -0
  110. package/src/dap/types.ts +10 -0
  111. package/src/debug/index.ts +47 -53
  112. package/src/debug/raw-sse-buffer.ts +7 -4
  113. package/src/debug/report-bundle.ts +9 -0
  114. package/src/edit/file-snapshot-store.ts +33 -1
  115. package/src/edit/hashline/filesystem.ts +2 -1
  116. package/src/edit/renderer.ts +82 -78
  117. package/src/eval/__tests__/llm-bridge.test.ts +110 -31
  118. package/src/eval/js/context-manager.ts +32 -15
  119. package/src/eval/llm-bridge.ts +22 -6
  120. package/src/eval/py/__tests__/prelude.test.ts +19 -0
  121. package/src/eval/py/executor.ts +23 -11
  122. package/src/eval/py/prelude.py +1 -1
  123. package/src/extensibility/extensions/types.ts +10 -1
  124. package/src/goals/tools/goal-tool.ts +36 -26
  125. package/src/internal-urls/docs-index.generated.ts +8 -8
  126. package/src/lsp/client.ts +23 -11
  127. package/src/lsp/config.ts +11 -1
  128. package/src/lsp/index.ts +61 -9
  129. package/src/lsp/utils.ts +3 -2
  130. package/src/main.ts +100 -72
  131. package/src/mcp/tool-bridge.ts +2 -0
  132. package/src/memories/index.ts +14 -7
  133. package/src/mnemopi/backend.ts +5 -1
  134. package/src/modes/acp/acp-agent.ts +33 -26
  135. package/src/modes/components/assistant-message.ts +2 -9
  136. package/src/modes/components/chat-block.ts +111 -0
  137. package/src/modes/components/copy-selector.ts +1 -44
  138. package/src/modes/components/custom-editor.ts +164 -109
  139. package/src/modes/components/custom-message.ts +1 -3
  140. package/src/modes/components/execution-shared.ts +1 -2
  141. package/src/modes/components/hook-message.ts +1 -3
  142. package/src/modes/components/model-selector.ts +59 -13
  143. package/src/modes/components/oauth-selector.ts +33 -7
  144. package/src/modes/components/overlay-box.ts +108 -0
  145. package/src/modes/components/plan-review-overlay.ts +799 -0
  146. package/src/modes/components/plan-toc.ts +138 -0
  147. package/src/modes/components/read-tool-group.ts +20 -4
  148. package/src/modes/components/skill-message.ts +0 -1
  149. package/src/modes/components/status-line.ts +19 -4
  150. package/src/modes/components/tips.txt +2 -1
  151. package/src/modes/components/todo-reminder.ts +0 -2
  152. package/src/modes/components/tool-execution.ts +68 -88
  153. package/src/modes/components/transcript-container.ts +84 -24
  154. package/src/modes/components/user-message.ts +2 -3
  155. package/src/modes/controllers/command-controller-shared.ts +7 -6
  156. package/src/modes/controllers/command-controller.ts +57 -55
  157. package/src/modes/controllers/event-controller.ts +67 -40
  158. package/src/modes/controllers/extension-ui-controller.ts +10 -73
  159. package/src/modes/controllers/input-controller.ts +170 -126
  160. package/src/modes/controllers/mcp-command-controller.ts +69 -60
  161. package/src/modes/controllers/selector-controller.ts +23 -25
  162. package/src/modes/controllers/streaming-reveal.ts +212 -0
  163. package/src/modes/controllers/tan-command-controller.ts +173 -0
  164. package/src/modes/interactive-mode.ts +274 -112
  165. package/src/modes/magic-keywords.ts +1 -1
  166. package/src/modes/markdown-prose.ts +1 -1
  167. package/src/modes/setup-wizard/wizard-overlay.ts +1 -1
  168. package/src/modes/theme/shimmer.ts +20 -9
  169. package/src/modes/theme/theme-schema.json +1 -1
  170. package/src/modes/theme/theme.ts +8 -4
  171. package/src/modes/types.ts +21 -7
  172. package/src/modes/utils/copy-targets.ts +133 -27
  173. package/src/modes/utils/ui-helpers.ts +44 -46
  174. package/src/modes/workflow.ts +10 -10
  175. package/src/plan-mode/approved-plan.ts +66 -43
  176. package/src/plan-mode/plan-protection.ts +4 -4
  177. package/src/prompts/system/background-tan-dispatch.md +8 -0
  178. package/src/prompts/system/plan-mode-active.md +67 -58
  179. package/src/prompts/system/plan-mode-approved.md +1 -1
  180. package/src/prompts/system/workflow-notice.md +1 -1
  181. package/src/prompts/tools/bash.md +9 -0
  182. package/src/prompts/tools/browser.md +1 -1
  183. package/src/prompts/tools/eval.md +2 -1
  184. package/src/prompts/tools/read.md +2 -2
  185. package/src/sdk.ts +37 -46
  186. package/src/session/agent-session.ts +119 -18
  187. package/src/session/auth-storage.ts +2 -0
  188. package/src/session/messages.ts +26 -0
  189. package/src/session/session-manager.ts +109 -28
  190. package/src/slash-commands/builtin-registry.ts +36 -9
  191. package/src/slash-commands/types.ts +4 -6
  192. package/src/task/executor.ts +76 -38
  193. package/src/task/index.ts +4 -0
  194. package/src/task/render.ts +211 -147
  195. package/src/tools/archive-reader.ts +64 -0
  196. package/src/tools/ask.ts +119 -164
  197. package/src/tools/ast-edit.ts +98 -71
  198. package/src/tools/ast-grep.ts +37 -43
  199. package/src/tools/bash.ts +57 -6
  200. package/src/tools/browser/tab-supervisor.ts +13 -1
  201. package/src/tools/browser/tab-worker.ts +33 -4
  202. package/src/tools/debug.ts +20 -8
  203. package/src/tools/eval.ts +13 -2
  204. package/src/tools/fetch.ts +297 -7
  205. package/src/tools/find.ts +51 -30
  206. package/src/tools/gh-cache-invalidation.ts +200 -0
  207. package/src/tools/gh-renderer.ts +81 -42
  208. package/src/tools/github-cache.ts +25 -0
  209. package/src/tools/grouped-file-output.ts +272 -48
  210. package/src/tools/image-gen.ts +150 -103
  211. package/src/tools/inspect-image-renderer.ts +63 -41
  212. package/src/tools/inspect-image.ts +10 -3
  213. package/src/tools/job.ts +3 -4
  214. package/src/tools/memory-render.ts +4 -1
  215. package/src/tools/path-utils.ts +28 -2
  216. package/src/tools/plan-mode-guard.ts +66 -39
  217. package/src/tools/read.ts +48 -28
  218. package/src/tools/render-utils.ts +21 -37
  219. package/src/tools/resolve.ts +14 -0
  220. package/src/tools/search-tool-bm25.ts +36 -23
  221. package/src/tools/search.ts +118 -81
  222. package/src/tools/sqlite-reader.ts +9 -12
  223. package/src/tools/todo.ts +118 -52
  224. package/src/tools/write.ts +83 -64
  225. package/src/tools/yield.ts +10 -1
  226. package/src/tui/output-block.ts +60 -13
  227. package/src/tui/status-line.ts +5 -1
  228. package/src/utils/commit-message-generator.ts +11 -3
  229. package/src/utils/enhanced-paste.ts +230 -0
  230. package/src/utils/title-generator.ts +2 -1
  231. package/src/web/search/providers/anthropic.ts +25 -19
  232. package/src/web/search/providers/codex.ts +37 -8
  233. package/src/web/search/providers/exa.ts +11 -3
  234. package/src/web/search/providers/kimi.ts +28 -17
  235. package/src/web/search/providers/parallel.ts +35 -24
  236. package/src/web/search/providers/synthetic.ts +8 -6
  237. package/src/web/search/providers/tavily.ts +9 -8
  238. package/src/web/search/providers/zai.ts +8 -6
@@ -4,6 +4,7 @@ import type { Api, AssistantMessage, Model } from "@oh-my-pi/pi-ai";
4
4
  import * as ai from "@oh-my-pi/pi-ai";
5
5
  import { Effort } from "@oh-my-pi/pi-ai";
6
6
  import { TempDir } from "@oh-my-pi/pi-utils";
7
+ import { $ } from "bun";
7
8
  import type { ModelRegistry } from "../../config/model-registry";
8
9
  import { Settings } from "../../config/settings";
9
10
  import type { ToolSession } from "../../tools";
@@ -13,7 +14,7 @@ import { IdleTimeout } from "../idle-timeout";
13
14
  import { disposeAllVmContexts } from "../js/context-manager";
14
15
  import { executeJs } from "../js/executor";
15
16
  import { runEvalLlm } from "../llm-bridge";
16
- import { disposeAllKernelSessions, executePython } from "../py/executor";
17
+ import { disposeAllKernelSessions, type PythonResult } from "../py/executor";
17
18
 
18
19
  function makeModel(provider: string, id: string, extra: Partial<Model<Api>> = {}): Model<Api> {
19
20
  return {
@@ -57,6 +58,7 @@ function makeSession(opts: SessionOptions = {}): ToolSession {
57
58
  const modelRegistry = {
58
59
  getAvailable: () => opts.available ?? [SMOL, DEFAULT, SLOW],
59
60
  getApiKey: async () => (opts.apiKey === undefined ? "test-key" : opts.apiKey),
61
+ resolver: () => async () => (opts.apiKey === undefined ? "test-key" : opts.apiKey),
60
62
  } as unknown as ModelRegistry;
61
63
  return {
62
64
  settings,
@@ -96,6 +98,77 @@ function assistant(opts: {
96
98
  };
97
99
  }
98
100
 
101
+ async function runPythonLlmInSubprocess(options: { structured: boolean; tempDir: TempDir }): Promise<PythonResult> {
102
+ const repoRoot = path.resolve(import.meta.dir, "../../../..");
103
+ const scriptPath = path.join(options.tempDir.path(), "run-python-llm.ts");
104
+ const resultPath = path.join(options.tempDir.path(), "python-llm-result.json");
105
+ const aiPath = path.resolve(import.meta.dir, "../../../../ai/src/index.ts");
106
+ const executorPath = path.resolve(import.meta.dir, "../py/executor.ts");
107
+ const settingsPath = path.resolve(import.meta.dir, "../../config/settings.ts");
108
+ const code = options.structured
109
+ ? 'import json\nprint(json.dumps(llm("hi", schema={"type": "object"})))'
110
+ : 'print(llm("hi", model="smol"))';
111
+ const responseContent = options.structured
112
+ ? '[{ type: "toolCall", id: "tc-1", name: "respond", arguments: { ok: true } }]'
113
+ : '[{ type: "text", text: "hello from python" }]';
114
+ await Bun.write(
115
+ scriptPath,
116
+ `
117
+ import { vi } from "bun:test";
118
+ import * as ai from ${JSON.stringify(aiPath)};
119
+ import { executePython } from ${JSON.stringify(executorPath)};
120
+ import { Settings } from ${JSON.stringify(settingsPath)};
121
+
122
+ const SMOL = {
123
+ id: "smol",
124
+ name: "smol",
125
+ api: "openai-responses",
126
+ provider: "p",
127
+ baseUrl: "https://example.test/v1",
128
+ reasoning: false,
129
+ input: ["text"],
130
+ cost: { input: 1, output: 1, cacheRead: 0, cacheWrite: 1 },
131
+ contextWindow: 128000,
132
+ maxTokens: 4096,
133
+ };
134
+ const settings = Settings.isolated({ "async.enabled": false, "task.isolation.mode": "none" });
135
+ settings.setModelRole("smol", "p/smol");
136
+ settings.setModelRole("slow", "p/slow");
137
+ const session = {
138
+ settings,
139
+ modelRegistry: {
140
+ getAvailable: () => [SMOL],
141
+ getApiKey: async () => "test-key",
142
+ resolver: () => async () => "test-key",
143
+ },
144
+ getActiveModelString: () => "p/smol",
145
+ };
146
+ vi.spyOn(ai, "completeSimple").mockResolvedValue({
147
+ role: "assistant",
148
+ api: "openai-responses",
149
+ provider: "p",
150
+ model: "smol",
151
+ stopReason: "stop",
152
+ content: ${responseContent},
153
+ });
154
+ const result = await executePython(${JSON.stringify(code)}, {
155
+ cwd: ${JSON.stringify(options.tempDir.path())},
156
+ sessionId: ${JSON.stringify(`py-llm:${options.structured ? "struct" : "plain"}`)},
157
+ sessionFile: ${JSON.stringify(path.join(options.tempDir.path(), "session.jsonl"))},
158
+ toolSession: session,
159
+ kernelMode: "per-call",
160
+ });
161
+ await Bun.write(${JSON.stringify(resultPath)}, JSON.stringify(result));
162
+ process.exit(0);
163
+ `,
164
+ );
165
+ const child = await $`bun ${scriptPath}`.cwd(repoRoot).quiet().nothrow();
166
+ const stdout = child.stdout.toString();
167
+ const stderr = child.stderr.toString();
168
+ if (child.exitCode !== 0) throw new Error(stderr || stdout || `Python llm subprocess exited with ${child.exitCode}`);
169
+ return (await Bun.file(resultPath).json()) as PythonResult;
170
+ }
171
+
99
172
  describe("runEvalLlm", () => {
100
173
  afterEach(() => {
101
174
  vi.restoreAllMocks();
@@ -133,6 +206,26 @@ describe("runEvalLlm", () => {
133
206
  expect(result.details).toEqual({ model: "p/smol", tier: "smol", structured: false });
134
207
  });
135
208
 
209
+ it("supplies a non-empty systemPrompt when system is omitted (codex 'Instructions are required' guard)", async () => {
210
+ // The openai-codex Responses transformer drops `instructions` when no
211
+ // system prompt is provided, and the remote endpoint then 400s with
212
+ // "Instructions are required". runEvalLlm must always carry a non-empty
213
+ // systemPrompt so `llm("…")` without a `system` argument works.
214
+ const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
215
+ await runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() });
216
+ const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
217
+ expect(ctx.systemPrompt).toBeDefined();
218
+ expect(ctx.systemPrompt?.length).toBeGreaterThan(0);
219
+ expect(ctx.systemPrompt?.[0]).toMatch(/.+/);
220
+ });
221
+
222
+ it("honors an explicit system prompt instead of overriding it", async () => {
223
+ const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
224
+ await runEvalLlm({ prompt: "q", model: "smol", system: "Be terse." }, { session: makeSession() });
225
+ const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
226
+ expect(ctx.systemPrompt).toEqual(["Be terse."]);
227
+ });
228
+
136
229
  it("forces a respond tool call and returns its arguments in structured mode", async () => {
137
230
  const spy = vi
138
231
  .spyOn(ai, "completeSimple")
@@ -290,38 +383,24 @@ describe("llm() through eval runtimes", () => {
290
383
  });
291
384
 
292
385
  it("exposes llm() in the Python runtime", async () => {
293
- using tempDir = TempDir.createSync("@omp-eval-llm-py-");
294
- const sessionFile = path.join(tempDir.path(), "session.jsonl");
295
- const sessionId = `py-llm:${crypto.randomUUID()}`;
296
- vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "hello from python" }));
297
-
298
- const result = await executePython('print(llm("hi", model="smol"))', {
299
- cwd: tempDir.path(),
300
- sessionId,
301
- sessionFile,
302
- toolSession: makeSession(),
303
- });
304
-
305
- expect(result.exitCode).toBe(0);
306
- expect(result.output.trim()).toBe("hello from python");
386
+ const tempDir = TempDir.createSync("@omp-eval-llm-py-");
387
+ try {
388
+ const result = await runPythonLlmInSubprocess({ structured: false, tempDir });
389
+ expect(result.exitCode).toBe(0);
390
+ expect(result.output.trim()).toBe("hello from python");
391
+ } finally {
392
+ tempDir.removeSync();
393
+ }
307
394
  });
308
395
 
309
396
  it("parses structured llm() output in the Python runtime", async () => {
310
- using tempDir = TempDir.createSync("@omp-eval-llm-py-struct-");
311
- const sessionFile = path.join(tempDir.path(), "session.jsonl");
312
- const sessionId = `py-llm-struct:${crypto.randomUUID()}`;
313
- vi.spyOn(ai, "completeSimple").mockResolvedValue(
314
- assistant({ toolCall: { name: "respond", arguments: { ok: true } } }),
315
- );
316
-
317
- const result = await executePython('import json\nprint(json.dumps(llm("hi", schema={"type": "object"})))', {
318
- cwd: tempDir.path(),
319
- sessionId,
320
- sessionFile,
321
- toolSession: makeSession(),
322
- });
323
-
324
- expect(result.exitCode).toBe(0);
325
- expect(JSON.parse(result.output.trim())).toEqual({ ok: true });
397
+ const tempDir = TempDir.createSync("@omp-eval-llm-py-struct-");
398
+ try {
399
+ const result = await runPythonLlmInSubprocess({ structured: true, tempDir });
400
+ expect(result.exitCode).toBe(0);
401
+ expect(JSON.parse(result.output.trim())).toEqual({ ok: true });
402
+ } finally {
403
+ tempDir.removeSync();
404
+ }
326
405
  });
327
406
  });
@@ -52,8 +52,14 @@ interface JsSession {
52
52
 
53
53
  const sessions = new Map<string, JsSession>();
54
54
  const startingSessions = new Map<string, Promise<JsSession>>();
55
- const resettingSessions = new Set<string>();
56
- const READY_TIMEOUT_MS_DEFAULT = 5_000;
55
+ const resettingSessions = new Map<string, Promise<void>>();
56
+ // Worker startup (module-graph import + WorkerCore construction) is infrastructure
57
+ // cost, not user compute. Floor it independently of Bun's 5s default per-test timeout
58
+ // so a slow cold-start under load isn't aborted mid-init — terminating a still-
59
+ // initializing Bun worker triggers the same kind of terminate-race that motivates
60
+ // avoiding `vm.runInContext` (see shared/indirect-eval.ts), here surfacing as a
61
+ // SIGILL/SIGSEGV. Callers that pass a larger per-cell budget still dominate.
62
+ const WORKER_INIT_TIMEOUT_MS = 15_000;
57
63
 
58
64
  export async function executeInVmContext(options: {
59
65
  sessionKey: string;
@@ -67,17 +73,28 @@ export async function executeInVmContext(options: {
67
73
  runState: VmRunState;
68
74
  }): Promise<{ value: unknown }> {
69
75
  if (options.reset) {
70
- if (resettingSessions.has(options.sessionKey)) {
71
- throw new ToolError("JS context reset already in progress");
72
- }
73
- resettingSessions.add(options.sessionKey);
74
- try {
75
- await resetVmContext(options.sessionKey);
76
- } finally {
77
- resettingSessions.delete(options.sessionKey);
76
+ // Coalesce concurrent resets: an existing in-flight reset already
77
+ // produces a fresh context, so a follow-up `reset: true` cell should
78
+ // just wait for it rather than failing the user-visible call.
79
+ const inFlight = resettingSessions.get(options.sessionKey);
80
+ if (inFlight) await inFlight.catch(() => undefined);
81
+ else {
82
+ const resetPromise = resetVmContext(options.sessionKey);
83
+ resettingSessions.set(
84
+ options.sessionKey,
85
+ resetPromise.then(() => undefined),
86
+ );
87
+ try {
88
+ await resetPromise;
89
+ } finally {
90
+ resettingSessions.delete(options.sessionKey);
91
+ }
78
92
  }
79
- } else if (resettingSessions.has(options.sessionKey)) {
80
- throw new ToolError("JS context reset in progress");
93
+ } else {
94
+ // Internal coordination: wait for any in-flight reset to settle and
95
+ // then run on the freshly-rebuilt context.
96
+ const inFlight = resettingSessions.get(options.sessionKey);
97
+ if (inFlight) await inFlight.catch(() => undefined);
81
98
  }
82
99
  const session = await acquireSession(
83
100
  options.sessionKey,
@@ -191,9 +208,9 @@ async function acquireSession(sessionKey: string, snapshot: SessionSnapshot, tim
191
208
  handleSessionMessage(session, msg);
192
209
  });
193
210
  try {
194
- // Cold-start can exceed 5s on slow hosts. Let the caller's per-cell timeout dominate so
195
- // users can grant more headroom when they raise `timeout` on a cell.
196
- const readyTimeoutMs = Math.max(READY_TIMEOUT_MS_DEFAULT, timeoutMs ?? 0);
211
+ // Init headroom is the fixed infrastructure floor; the caller's per-cell timeout
212
+ // dominates when larger so users can grant more by raising `timeout` on a cell.
213
+ const readyTimeoutMs = Math.max(WORKER_INIT_TIMEOUT_MS, timeoutMs ?? 0);
197
214
  await raceWithTimeout(readyPromise, readyTimeoutMs, "Timed out initializing JS eval worker");
198
215
  worker.send({ type: "init", snapshot });
199
216
  sessions.set(sessionKey, session);
@@ -15,7 +15,13 @@ import { instrumentedCompleteSimple, resolveTelemetry } from "@oh-my-pi/pi-agent
15
15
  import { type Api, Effort, getSupportedEfforts, type Model, type Tool } from "@oh-my-pi/pi-ai";
16
16
  import * as z from "zod/v4";
17
17
  import { extractTextContent, extractToolCall, parseJsonPayload } from "../commit/utils";
18
- import { expandRoleAlias, formatModelString, resolveModelFromString } from "../config/model-resolver";
18
+
19
+ import {
20
+ expandRoleAlias,
21
+ formatModelString,
22
+ getModelMatchPreferences,
23
+ resolveModelFromString,
24
+ } from "../config/model-resolver";
19
25
  import type { ToolSession } from "../tools";
20
26
  import { ToolError } from "../tools/tool-errors";
21
27
  import { withBridgeTimeoutPause } from "./bridge-timeout";
@@ -64,7 +70,7 @@ function resolveTierModel(tier: LlmTier, session: ToolSession): Model<Api> | und
64
70
  const available = modelRegistry.getAvailable();
65
71
  if (available.length === 0) return undefined;
66
72
 
67
- const matchPreferences = { usageOrder: session.settings.getStorage()?.getModelUsageOrder() };
73
+ const matchPreferences = getModelMatchPreferences(session.settings);
68
74
  const resolve = (pattern: string | undefined): Model<Api> | undefined => {
69
75
  if (!pattern) return undefined;
70
76
  const expanded = expandRoleAlias(pattern, session.settings);
@@ -112,8 +118,9 @@ export async function runEvalLlm(args: unknown, options: EvalLlmBridgeOptions):
112
118
  );
113
119
  }
114
120
 
115
- const apiKey = await options.session.modelRegistry?.getApiKey(model);
116
- if (!apiKey) {
121
+ const registry = options.session.modelRegistry;
122
+ const apiKey = await registry?.getApiKey(model);
123
+ if (!registry || !apiKey) {
117
124
  throw new ToolError(
118
125
  `llm() has no API key for ${formatModelString(model)}. Configure credentials for this provider or choose another tier.`,
119
126
  );
@@ -132,18 +139,27 @@ export async function runEvalLlm(args: unknown, options: EvalLlmBridgeOptions):
132
139
 
133
140
  const telemetry = resolveTelemetry(options.session.getTelemetry?.(), options.session.getSessionId?.() ?? undefined);
134
141
 
142
+ // Some providers (notably openai-codex) require a non-empty `instructions`
143
+ // field on every Responses request and 400 with "Instructions are required"
144
+ // when it is missing. Fall back to a minimal default so `llm(prompt)` works
145
+ // without forcing every caller to pass a `system` prompt.
146
+ const systemPrompt = system ? [system] : ["You are a helpful assistant."];
147
+
135
148
  // Suspend eval timeout accounting while the model request owns control. The
136
149
  // timeout clock restarts once the bridge returns to the cell runtime.
137
150
  const response = await withBridgeTimeoutPause(options.emitStatus, () =>
138
151
  instrumentedCompleteSimple(
139
152
  model,
140
153
  {
141
- systemPrompt: system ? [system] : undefined,
154
+ systemPrompt,
142
155
  messages: [{ role: "user", content: [{ type: "text", text: prompt }], timestamp: Date.now() }],
143
156
  tools,
144
157
  },
145
158
  {
146
- apiKey,
159
+ apiKey: registry.resolver(model.provider, {
160
+ sessionId: options.session.getSessionId?.() ?? undefined,
161
+ baseUrl: model.baseUrl,
162
+ }),
147
163
  signal: options.signal,
148
164
  reasoning: reasoningForTier(tier, model),
149
165
  toolChoice: schema ? { type: "tool", name: STRUCTURED_TOOL_NAME } : undefined,
@@ -0,0 +1,19 @@
1
+ import { describe, expect, it } from "bun:test";
2
+ import { PYTHON_PRELUDE } from "../prelude";
3
+
4
+ describe("python prelude", () => {
5
+ it("exposes read(path, offset?, limit?) with positional optional args", () => {
6
+ // The eval docs advertise `read(path, offset?=1, limit?=None)`. A
7
+ // keyword-only signature (`def read(path, *, offset=1, limit=None)`)
8
+ // makes `read("file", 10)` raise `TypeError: read() takes 1 positional
9
+ // argument but 2 were given`, which agents in the wild repeatedly hit.
10
+ // Lock the contract so the helper accepts both positional and keyword
11
+ // forms.
12
+ const match = PYTHON_PRELUDE.match(/def\s+read\(([^)]+)\)/);
13
+ expect(match).not.toBeNull();
14
+ const signature = match?.[1] ?? "";
15
+ expect(signature).not.toContain("*,");
16
+ expect(signature).toContain("offset");
17
+ expect(signature).toContain("limit");
18
+ });
19
+ });
@@ -126,7 +126,7 @@ interface PythonSession {
126
126
 
127
127
  const sessions = new Map<string, PythonSession>();
128
128
  const startingSessions = new Map<string, Promise<PythonSession>>();
129
- const resettingSessions = new Set<string>();
129
+ const resettingSessions = new Map<string, Promise<void>>();
130
130
 
131
131
  function normalizeSessionCwd(cwd: string): string {
132
132
  return path.resolve(cwd);
@@ -611,17 +611,29 @@ async function executeOnSession(code: string, cwd: string, options: PythonExecut
611
611
  options.bridgeSessionId = sessionId;
612
612
  }
613
613
  if (options.reset) {
614
- if (resettingSessions.has(sessionKey)) {
615
- throw new Error("Python kernel reset already in progress");
616
- }
617
- resettingSessions.add(sessionKey);
618
- try {
619
- await resetSession(sessionKey);
620
- } finally {
621
- resettingSessions.delete(sessionKey);
614
+ // Coalesce concurrent resets: if another reset is in flight for this
615
+ // session, await it instead of throwing — the caller's intent ("start
616
+ // from a clean kernel") is satisfied once that reset settles.
617
+ const inFlight = resettingSessions.get(sessionKey);
618
+ if (inFlight) await inFlight.catch(() => undefined);
619
+ else {
620
+ const resetPromise = resetSession(sessionKey);
621
+ resettingSessions.set(
622
+ sessionKey,
623
+ resetPromise.then(() => undefined),
624
+ );
625
+ try {
626
+ await resetPromise;
627
+ } finally {
628
+ resettingSessions.delete(sessionKey);
629
+ }
622
630
  }
623
- } else if (resettingSessions.has(sessionKey)) {
624
- throw new Error("Python kernel reset in progress");
631
+ } else {
632
+ // A reset already in progress is an internal coordination state, not a
633
+ // user-visible failure. Wait for it to clear, then proceed with the
634
+ // requested execution on the freshly-restarted kernel.
635
+ const inFlight = resettingSessions.get(sessionKey);
636
+ if (inFlight) await inFlight.catch(() => undefined);
625
637
  }
626
638
  const session = await acquireSession(sessionKey, sessionId, cwd, options);
627
639
  if (options.signal?.aborted) {
@@ -53,7 +53,7 @@ if "__omp_prelude_loaded__" not in globals():
53
53
  _emit_status("env", key=key, value=val, action="get")
54
54
  return val
55
55
 
56
- def read(path: str | Path, *, offset: int = 1, limit: int | None = None) -> str:
56
+ def read(path: str | Path, offset: int = 1, limit: int | None = None) -> str:
57
57
  """Read file contents. offset/limit are 1-indexed line numbers."""
58
58
  p = Path(path)
59
59
  data = p.read_text(encoding="utf-8")
@@ -7,7 +7,13 @@
7
7
  * - Register commands, keyboard shortcuts, and CLI flags
8
8
  * - Interact with the user via UI primitives
9
9
  */
10
- import type { AgentMessage, AgentToolResult, AgentToolUpdateCallback, ThinkingLevel } from "@oh-my-pi/pi-agent-core";
10
+ import type {
11
+ AgentMessage,
12
+ AgentToolResult,
13
+ AgentToolUpdateCallback,
14
+ ThinkingLevel,
15
+ ToolApproval,
16
+ } from "@oh-my-pi/pi-agent-core";
11
17
  import type { CompactionResult } from "@oh-my-pi/pi-agent-core/compaction";
12
18
  import type {
13
19
  Api,
@@ -392,6 +398,9 @@ export interface ToolDefinition<TParams extends TSchema = TSchema, TDetails = un
392
398
  defaultInactive?: boolean;
393
399
  /** If true, tool may stage deferred changes that require explicit resolve/discard. */
394
400
  deferrable?: boolean;
401
+ /** Tool approval tier. Defaults to `"exec"` when omitted.
402
+ * `"read"`: read-only operations. `"write"`: mutations. `"exec"`: code execution. */
403
+ approval?: ToolApproval;
395
404
  /** MCP server name for discovery/search metadata when this tool fronts an MCP server. */
396
405
  mcpServerName?: string;
397
406
  /** Original MCP tool name for discovery/search metadata. */
@@ -10,7 +10,7 @@ import { formatDuration } from "../../slash-commands/helpers/format";
10
10
  import type { ToolSession } from "../../tools";
11
11
  import { formatErrorDetail, TRUNCATE_LENGTHS } from "../../tools/render-utils";
12
12
  import { ToolError } from "../../tools/tool-errors";
13
- import { renderStatusLine, truncateToWidth } from "../../tui";
13
+ import { framedBlock, renderStatusLine, truncateToWidth } from "../../tui";
14
14
  import { completionBudgetReport, remainingTokens } from "../runtime";
15
15
  import type { Goal, GoalStatus, GoalToolDetails } from "../state";
16
16
 
@@ -173,8 +173,7 @@ export const goalToolRenderer = {
173
173
  if (args.op === "create" && args.token_budget !== undefined) {
174
174
  meta.push(`budget ${formatNumber(args.token_budget)}`);
175
175
  }
176
- const text = renderStatusLine({ icon: "pending", title: "Goal", description, meta }, uiTheme);
177
- return new Text(text, 0, 0);
176
+ return new Text(renderStatusLine({ icon: "pending", title: "Goal", description, meta }, uiTheme), 0, 0);
178
177
  },
179
178
 
180
179
  renderResult(
@@ -190,51 +189,62 @@ export const goalToolRenderer = {
190
189
 
191
190
  if (result.isError) {
192
191
  const header = renderStatusLine({ icon: "error", title: "Goal", description }, uiTheme);
193
- const body = formatErrorDetail(fallbackText || "Goal tool failed", uiTheme);
194
- return new Text([header, body].join("\n"), 0, 0);
192
+ return framedBlock(uiTheme, width => ({
193
+ header,
194
+ sections: [{ lines: formatErrorDetail(fallbackText || "Goal tool failed", uiTheme).split("\n") }],
195
+ state: "error",
196
+ borderColor: "error",
197
+ width,
198
+ }));
195
199
  }
196
200
 
197
201
  const goal = details?.goal ?? null;
198
202
  if (!goal) {
199
- const header = renderStatusLine({ icon: "warning", title: "Goal", description }, uiTheme);
200
- const body = uiTheme.fg("muted", "No active goal.");
201
- return new Text([header, body].join("\n"), 0, 0);
203
+ return new Text(
204
+ renderStatusLine({ icon: "warning", title: "Goal", description, meta: ["no active goal"] }, uiTheme),
205
+ 0,
206
+ 0,
207
+ );
202
208
  }
203
209
 
204
- const lines: string[] = [];
205
- lines.push(
206
- renderStatusLine(
207
- {
208
- icon: "success",
209
- title: "Goal",
210
- description,
211
- badge: { label: goal.status, color: goalBadgeColor(goal.status) },
212
- },
213
- uiTheme,
214
- ),
210
+ const header = renderStatusLine(
211
+ {
212
+ icon: "success",
213
+ title: "Goal",
214
+ description,
215
+ badge: { label: goal.status, color: goalBadgeColor(goal.status) },
216
+ },
217
+ uiTheme,
215
218
  );
216
219
 
220
+ const lines: string[] = [];
217
221
  const objectiveText = truncateToWidth(goal.objective.trim(), TRUNCATE_LENGTHS.LONG);
218
- lines.push(` ${uiTheme.italic(uiTheme.fg("muted", `"${objectiveText}"`))}`);
222
+ lines.push(uiTheme.italic(uiTheme.fg("muted", `"${objectiveText}"`)));
219
223
 
220
224
  const used = formatNumber(goal.tokensUsed);
221
225
  const tokensLine =
222
226
  goal.tokenBudget !== undefined
223
227
  ? `${used} / ${formatNumber(goal.tokenBudget)} tokens (${formatNumber(Math.max(0, goal.tokenBudget - goal.tokensUsed))} left)`
224
228
  : `${used} tokens`;
225
- lines.push(` ${uiTheme.fg("dim", tokensLine)}`);
226
-
229
+ const metaParts = [tokensLine];
227
230
  if (goal.timeUsedSeconds > 0) {
228
- lines.push(` ${uiTheme.fg("dim", `${formatDuration(goal.timeUsedSeconds * 1000)} elapsed`)}`);
231
+ metaParts.push(`${formatDuration(goal.timeUsedSeconds * 1000)} elapsed`);
229
232
  }
233
+ lines.push(uiTheme.fg("dim", metaParts.join(" · ")));
230
234
 
231
235
  const report = details?.completionBudgetReport;
236
+ const sections: Array<{ label?: string; lines: string[] }> = [{ lines }];
232
237
  if (report) {
233
- lines.push("");
234
- lines.push(uiTheme.italic(uiTheme.fg("muted", report)));
238
+ sections.push({ label: "Report", lines: report.split("\n").map(line => uiTheme.fg("muted", line)) });
235
239
  }
236
240
 
237
- return new Text(lines.join("\n"), 0, 0);
241
+ return framedBlock(uiTheme, width => ({
242
+ header,
243
+ sections,
244
+ state: "success",
245
+ borderColor: "borderMuted",
246
+ width,
247
+ }));
238
248
  },
239
249
 
240
250
  mergeCallAndResult: true,