@oh-my-pi/pi-coding-agent 15.5.13 → 15.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/CHANGELOG.md +77 -0
  2. package/dist/types/cli/classify-install-target.d.ts +0 -10
  3. package/dist/types/cli/initial-message.d.ts +1 -1
  4. package/dist/types/cli/tiny-models-cli.d.ts +9 -0
  5. package/dist/types/commands/tiny-models.d.ts +22 -0
  6. package/dist/types/commit/analysis/conventional.d.ts +1 -1
  7. package/dist/types/commit/analysis/summary.d.ts +1 -1
  8. package/dist/types/commit/changelog/generate.d.ts +1 -1
  9. package/dist/types/commit/changelog/index.d.ts +2 -2
  10. package/dist/types/commit/map-reduce/map-phase.d.ts +1 -1
  11. package/dist/types/commit/map-reduce/reduce-phase.d.ts +1 -1
  12. package/dist/types/config/model-id-affixes.d.ts +10 -0
  13. package/dist/types/config/model-registry.d.ts +1 -1
  14. package/dist/types/config/models-config-schema.d.ts +2 -0
  15. package/dist/types/config/settings-schema.d.ts +233 -17
  16. package/dist/types/discovery/helpers.d.ts +1 -1
  17. package/dist/types/discovery/substitute-plugin-root.d.ts +0 -4
  18. package/dist/types/eval/__tests__/llm-bridge.test.d.ts +1 -0
  19. package/dist/types/eval/js/shared/rewrite-imports.d.ts +16 -1
  20. package/dist/types/eval/llm-bridge.d.ts +25 -0
  21. package/dist/types/export/html/template.generated.d.ts +1 -1
  22. package/dist/types/extensibility/plugins/legacy-pi-compat.d.ts +15 -0
  23. package/dist/types/internal-urls/agent-protocol.d.ts +2 -1
  24. package/dist/types/internal-urls/artifact-protocol.d.ts +2 -1
  25. package/dist/types/internal-urls/local-protocol.d.ts +2 -1
  26. package/dist/types/internal-urls/memory-protocol.d.ts +2 -1
  27. package/dist/types/internal-urls/omp-protocol.d.ts +2 -1
  28. package/dist/types/internal-urls/router.d.ts +8 -1
  29. package/dist/types/internal-urls/rule-protocol.d.ts +2 -1
  30. package/dist/types/internal-urls/skill-protocol.d.ts +2 -1
  31. package/dist/types/internal-urls/types.d.ts +26 -0
  32. package/dist/types/memory-backend/index.d.ts +1 -0
  33. package/dist/types/memory-backend/resolve.d.ts +2 -1
  34. package/dist/types/memory-backend/types.d.ts +7 -1
  35. package/dist/types/mnemosyne/backend.d.ts +4 -0
  36. package/dist/types/mnemosyne/config.d.ts +29 -0
  37. package/dist/types/mnemosyne/index.d.ts +3 -0
  38. package/dist/types/mnemosyne/state.d.ts +72 -0
  39. package/dist/types/modes/components/custom-editor.d.ts +2 -3
  40. package/dist/types/modes/components/hook-selector.d.ts +27 -0
  41. package/dist/types/modes/components/index.d.ts +1 -0
  42. package/dist/types/modes/components/status-line/context-thresholds.d.ts +6 -0
  43. package/dist/types/modes/components/tiny-title-download-progress.d.ts +11 -0
  44. package/dist/types/modes/components/welcome.d.ts +1 -0
  45. package/dist/types/modes/controllers/extension-ui-controller.d.ts +4 -1
  46. package/dist/types/modes/gradient-highlight.d.ts +23 -0
  47. package/dist/types/modes/interactive-mode.d.ts +4 -2
  48. package/dist/types/modes/internal-url-autocomplete.d.ts +43 -0
  49. package/dist/types/modes/orchestrate.d.ts +10 -0
  50. package/dist/types/modes/theme/defaults/index.d.ts +8406 -8406
  51. package/dist/types/modes/theme/theme.d.ts +2 -1
  52. package/dist/types/modes/ultrathink.d.ts +3 -3
  53. package/dist/types/modes/utils/keybinding-matchers.d.ts +5 -0
  54. package/dist/types/sdk.d.ts +3 -0
  55. package/dist/types/session/agent-session.d.ts +35 -0
  56. package/dist/types/system-prompt.d.ts +2 -0
  57. package/dist/types/task/executor.d.ts +2 -0
  58. package/dist/types/task/render.d.ts +5 -1
  59. package/dist/types/tiny/models.d.ts +185 -0
  60. package/dist/types/tiny/text.d.ts +4 -0
  61. package/dist/types/tiny/title-client.d.ts +24 -0
  62. package/dist/types/tiny/title-protocol.d.ts +74 -0
  63. package/dist/types/tiny/worker.d.ts +2 -0
  64. package/dist/types/tools/bash.d.ts +3 -1
  65. package/dist/types/tools/index.d.ts +7 -4
  66. package/dist/types/tools/memory-edit.d.ts +40 -0
  67. package/dist/types/tools/{hindsight-recall.d.ts → memory-recall.d.ts} +6 -6
  68. package/dist/types/tools/{hindsight-reflect.d.ts → memory-reflect.d.ts} +6 -6
  69. package/dist/types/tools/memory-render.d.ts +60 -0
  70. package/dist/types/tools/{hindsight-retain.d.ts → memory-retain.d.ts} +6 -6
  71. package/dist/types/tools/todo-write.d.ts +8 -0
  72. package/dist/types/tools/tool-result.d.ts +2 -0
  73. package/dist/types/utils/title-generator.d.ts +3 -0
  74. package/package.json +18 -14
  75. package/scripts/build-binary.ts +1 -0
  76. package/src/cli/tiny-models-cli.ts +127 -0
  77. package/src/cli-commands.ts +1 -0
  78. package/src/cli.ts +8 -8
  79. package/src/commands/tiny-models.ts +36 -0
  80. package/src/config/model-equivalence.ts +43 -2
  81. package/src/config/model-id-affixes.ts +64 -0
  82. package/src/config/model-registry.ts +166 -8
  83. package/src/config/models-config-schema.ts +1 -1
  84. package/src/config/settings-schema.ts +206 -14
  85. package/src/edit/hashline/diff.ts +5 -7
  86. package/src/eval/__tests__/llm-bridge.test.ts +297 -0
  87. package/src/eval/__tests__/shared-executors.test.ts +36 -0
  88. package/src/eval/js/shared/local-module-loader.ts +13 -1
  89. package/src/eval/js/shared/prelude.txt +8 -0
  90. package/src/eval/js/shared/rewrite-imports.ts +31 -26
  91. package/src/eval/js/tool-bridge.ts +4 -0
  92. package/src/eval/llm-bridge.ts +181 -0
  93. package/src/eval/py/prelude.py +52 -31
  94. package/src/export/html/template.generated.ts +1 -1
  95. package/src/export/html/template.js +0 -13
  96. package/src/extensibility/plugins/legacy-pi-compat.ts +60 -23
  97. package/src/internal-urls/agent-protocol.ts +18 -1
  98. package/src/internal-urls/artifact-protocol.ts +19 -1
  99. package/src/internal-urls/docs-index.generated.ts +5 -4
  100. package/src/internal-urls/local-protocol.ts +14 -1
  101. package/src/internal-urls/memory-protocol.ts +6 -1
  102. package/src/internal-urls/omp-protocol.ts +5 -1
  103. package/src/internal-urls/router.ts +20 -1
  104. package/src/internal-urls/rule-protocol.ts +8 -1
  105. package/src/internal-urls/skill-protocol.ts +8 -1
  106. package/src/internal-urls/types.ts +27 -0
  107. package/src/lsp/render.ts +1 -1
  108. package/src/main.ts +4 -0
  109. package/src/mcp/oauth-flow.ts +2 -2
  110. package/src/memory-backend/index.ts +1 -0
  111. package/src/memory-backend/resolve.ts +4 -1
  112. package/src/memory-backend/types.ts +8 -1
  113. package/src/mnemosyne/backend.ts +374 -0
  114. package/src/mnemosyne/config.ts +160 -0
  115. package/src/mnemosyne/index.ts +3 -0
  116. package/src/mnemosyne/state.ts +548 -0
  117. package/src/modes/acp/acp-agent.ts +11 -6
  118. package/src/modes/components/agent-dashboard.ts +4 -4
  119. package/src/modes/components/custom-editor.ts +3 -2
  120. package/src/modes/components/diff.ts +2 -2
  121. package/src/modes/components/extensions/extension-list.ts +3 -2
  122. package/src/modes/components/footer.ts +5 -6
  123. package/src/modes/components/history-search.ts +3 -3
  124. package/src/modes/components/hook-selector.ts +94 -8
  125. package/src/modes/components/index.ts +1 -0
  126. package/src/modes/components/mcp-add-wizard.ts +3 -3
  127. package/src/modes/components/model-selector.ts +124 -26
  128. package/src/modes/components/oauth-selector.ts +3 -3
  129. package/src/modes/components/session-observer-overlay.ts +19 -13
  130. package/src/modes/components/session-selector.ts +3 -3
  131. package/src/modes/components/settings-defs.ts +7 -0
  132. package/src/modes/components/status-line/context-thresholds.ts +11 -0
  133. package/src/modes/components/status-line/presets.ts +1 -0
  134. package/src/modes/components/status-line/segments.ts +25 -2
  135. package/src/modes/components/tiny-title-download-progress.ts +90 -0
  136. package/src/modes/components/tips.txt +12 -0
  137. package/src/modes/components/tool-execution.ts +67 -3
  138. package/src/modes/components/tree-selector.ts +3 -3
  139. package/src/modes/components/user-message-selector.ts +3 -3
  140. package/src/modes/components/welcome.ts +55 -1
  141. package/src/modes/controllers/command-controller.ts +16 -1
  142. package/src/modes/controllers/extension-ui-controller.ts +3 -1
  143. package/src/modes/controllers/input-controller.ts +57 -0
  144. package/src/modes/gradient-highlight.ts +70 -0
  145. package/src/modes/interactive-mode.ts +80 -196
  146. package/src/modes/internal-url-autocomplete.ts +143 -0
  147. package/src/modes/orchestrate.ts +36 -0
  148. package/src/modes/prompt-action-autocomplete.ts +12 -0
  149. package/src/modes/theme/theme.ts +7 -0
  150. package/src/modes/ultrathink.ts +9 -53
  151. package/src/modes/utils/keybinding-matchers.ts +11 -0
  152. package/src/prompts/system/memory-consolidation-system.md +8 -0
  153. package/src/prompts/system/memory-extraction-system.md +26 -0
  154. package/src/prompts/{commands/orchestrate.md → system/orchestrate-notice.md} +5 -16
  155. package/src/prompts/system/system-prompt.md +2 -0
  156. package/src/prompts/system/tiny-title-system.md +8 -0
  157. package/src/prompts/tools/eval.md +2 -0
  158. package/src/prompts/tools/memory-edit.md +8 -0
  159. package/src/prompts/tools/task.md +4 -7
  160. package/src/sdk.ts +8 -6
  161. package/src/session/agent-session.ts +147 -44
  162. package/src/session/session-manager.ts +47 -0
  163. package/src/slash-commands/builtin-registry.ts +10 -1
  164. package/src/system-prompt.ts +4 -0
  165. package/src/task/commands.ts +1 -5
  166. package/src/task/executor.ts +8 -0
  167. package/src/task/index.ts +2 -0
  168. package/src/task/render.ts +69 -26
  169. package/src/tiny/models.ts +217 -0
  170. package/src/tiny/text.ts +19 -0
  171. package/src/tiny/title-client.ts +340 -0
  172. package/src/tiny/title-protocol.ts +51 -0
  173. package/src/tiny/worker.ts +523 -0
  174. package/src/tools/bash.ts +58 -16
  175. package/src/tools/browser/tab-worker.ts +1 -1
  176. package/src/tools/eval.ts +24 -48
  177. package/src/tools/index.ts +17 -15
  178. package/src/tools/memory-edit.ts +59 -0
  179. package/src/tools/memory-recall.ts +100 -0
  180. package/src/tools/memory-reflect.ts +88 -0
  181. package/src/tools/memory-render.ts +185 -0
  182. package/src/tools/memory-retain.ts +91 -0
  183. package/src/tools/renderers.ts +4 -2
  184. package/src/tools/todo-write.ts +128 -29
  185. package/src/tools/tool-result.ts +8 -0
  186. package/src/utils/title-generator.ts +115 -13
  187. package/dist/types/tools/calculator.d.ts +0 -77
  188. package/src/prompts/tools/calculator.md +0 -10
  189. package/src/tools/calculator.ts +0 -541
  190. package/src/tools/hindsight-recall.ts +0 -69
  191. package/src/tools/hindsight-reflect.ts +0 -58
  192. package/src/tools/hindsight-retain.ts +0 -57
@@ -0,0 +1,297 @@
1
+ import { afterAll, afterEach, describe, expect, it, vi } from "bun:test";
2
+ import * as path from "node:path";
3
+ import type { Api, AssistantMessage, Model } from "@oh-my-pi/pi-ai";
4
+ import * as ai from "@oh-my-pi/pi-ai";
5
+ import { Effort } from "@oh-my-pi/pi-ai";
6
+ import { TempDir } from "@oh-my-pi/pi-utils";
7
+ import type { ModelRegistry } from "../../config/model-registry";
8
+ import { Settings } from "../../config/settings";
9
+ import type { ToolSession } from "../../tools";
10
+ import { ToolError } from "../../tools/tool-errors";
11
+ import { disposeAllVmContexts } from "../js/context-manager";
12
+ import { executeJs } from "../js/executor";
13
+ import { runEvalLlm } from "../llm-bridge";
14
+ import { disposeAllKernelSessions, executePython } from "../py/executor";
15
+
16
+ function makeModel(provider: string, id: string, extra: Partial<Model<Api>> = {}): Model<Api> {
17
+ return {
18
+ id,
19
+ name: id,
20
+ api: "openai-responses",
21
+ provider,
22
+ baseUrl: "https://example.test/v1",
23
+ reasoning: false,
24
+ input: ["text"],
25
+ cost: { input: 1, output: 1, cacheRead: 0, cacheWrite: 1 },
26
+ contextWindow: 128000,
27
+ maxTokens: 4096,
28
+ ...extra,
29
+ } as Model<Api>;
30
+ }
31
+
32
+ const SMOL = makeModel("p", "smol");
33
+ const DEFAULT = makeModel("p", "default");
34
+ const SLOW = makeModel("p", "slow");
35
+ const REASONING_SLOW = makeModel("p", "slow", {
36
+ api: "anthropic-messages",
37
+ reasoning: true,
38
+ thinking: { minLevel: Effort.Low, maxLevel: Effort.High, mode: "anthropic-adaptive" },
39
+ });
40
+
41
+ interface SessionOptions {
42
+ available?: Model<Api>[];
43
+ apiKey?: string | null;
44
+ activeModel?: string;
45
+ roles?: Partial<Record<"smol" | "default" | "slow", string>>;
46
+ }
47
+
48
+ function makeSession(opts: SessionOptions = {}): ToolSession {
49
+ const settings = Settings.isolated({ "async.enabled": false, "task.isolation.mode": "none" });
50
+ const roles = opts.roles ?? { smol: "p/smol", slow: "p/slow" };
51
+ for (const role in roles) {
52
+ const value = roles[role as keyof typeof roles];
53
+ if (value) settings.setModelRole(role, value);
54
+ }
55
+ const modelRegistry = {
56
+ getAvailable: () => opts.available ?? [SMOL, DEFAULT, SLOW],
57
+ getApiKey: async () => (opts.apiKey === undefined ? "test-key" : opts.apiKey),
58
+ } as unknown as ModelRegistry;
59
+ return {
60
+ settings,
61
+ modelRegistry,
62
+ getActiveModelString: () => opts.activeModel ?? "p/default",
63
+ } as unknown as ToolSession;
64
+ }
65
+
66
+ function assistant(opts: {
67
+ text?: string;
68
+ toolCall?: { name: string; arguments: Record<string, unknown> };
69
+ stopReason?: AssistantMessage["stopReason"];
70
+ errorMessage?: string;
71
+ }): AssistantMessage {
72
+ const content: AssistantMessage["content"] = [];
73
+ if (opts.text) content.push({ type: "text", text: opts.text });
74
+ if (opts.toolCall) {
75
+ content.push({ type: "toolCall", id: "tc-1", name: opts.toolCall.name, arguments: opts.toolCall.arguments });
76
+ }
77
+ return {
78
+ role: "assistant",
79
+ content,
80
+ api: "openai-responses",
81
+ provider: "p",
82
+ model: "default",
83
+ usage: {
84
+ input: 0,
85
+ output: 0,
86
+ cacheRead: 0,
87
+ cacheWrite: 0,
88
+ totalTokens: 0,
89
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
90
+ },
91
+ stopReason: opts.stopReason ?? "stop",
92
+ errorMessage: opts.errorMessage,
93
+ timestamp: Date.now(),
94
+ };
95
+ }
96
+
97
+ describe("runEvalLlm", () => {
98
+ afterEach(() => {
99
+ vi.restoreAllMocks();
100
+ });
101
+
102
+ it("resolves each tier to its expected model", async () => {
103
+ const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
104
+ const session = makeSession();
105
+
106
+ await runEvalLlm({ prompt: "q", model: "smol" }, { session });
107
+ await runEvalLlm({ prompt: "q", model: "default" }, { session });
108
+ await runEvalLlm({ prompt: "q", model: "slow" }, { session });
109
+
110
+ const resolved = spy.mock.calls.map(call => {
111
+ const model = call[0] as Model<Api>;
112
+ return `${model.provider}/${model.id}`;
113
+ });
114
+ expect(resolved).toEqual(["p/smol", "p/default", "p/slow"]);
115
+ });
116
+
117
+ it("prefers the session active model for the default tier, falling back to pi/default", async () => {
118
+ const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
119
+ const session = makeSession({ available: [SMOL, DEFAULT, SLOW], activeModel: "p/slow" });
120
+
121
+ await runEvalLlm({ prompt: "q", model: "default" }, { session });
122
+
123
+ const model = spy.mock.calls[0]?.[0] as Model<Api>;
124
+ expect(`${model.provider}/${model.id}`).toBe("p/slow");
125
+ });
126
+
127
+ it("returns the completion text in plain mode", async () => {
128
+ vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "the answer" }));
129
+ const result = await runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() });
130
+ expect(result.text).toBe("the answer");
131
+ expect(result.details).toEqual({ model: "p/smol", tier: "smol", structured: false });
132
+ });
133
+
134
+ it("forces a respond tool call and returns its arguments in structured mode", async () => {
135
+ const spy = vi
136
+ .spyOn(ai, "completeSimple")
137
+ .mockResolvedValue(assistant({ toolCall: { name: "respond", arguments: { answer: 42 } } }));
138
+ const result = await runEvalLlm(
139
+ { prompt: "q", model: "smol", schema: { type: "object", properties: { answer: { type: "number" } } } },
140
+ { session: makeSession() },
141
+ );
142
+
143
+ expect(JSON.parse(result.text)).toEqual({ answer: 42 });
144
+ expect(result.details.structured).toBe(true);
145
+
146
+ const ctx = spy.mock.calls[0]?.[1] as { tools?: Array<{ name: string }> };
147
+ const opts = spy.mock.calls[0]?.[2] as { toolChoice?: unknown };
148
+ expect(ctx.tools?.[0]?.name).toBe("respond");
149
+ expect(opts.toolChoice).toEqual({ type: "tool", name: "respond" });
150
+ });
151
+
152
+ it("falls back to JSON embedded in text when the model skips the respond tool", async () => {
153
+ vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: 'here: {"answer": 7}' }));
154
+ const result = await runEvalLlm(
155
+ { prompt: "q", model: "smol", schema: { type: "object" } },
156
+ { session: makeSession() },
157
+ );
158
+ expect(JSON.parse(result.text)).toEqual({ answer: 7 });
159
+ });
160
+
161
+ it("requests reasoning only for the slow tier on a reasoning-capable model", async () => {
162
+ const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
163
+ const session = makeSession({ available: [SMOL, DEFAULT, REASONING_SLOW] });
164
+
165
+ await runEvalLlm({ prompt: "q", model: "smol" }, { session });
166
+ await runEvalLlm({ prompt: "q", model: "slow" }, { session });
167
+
168
+ const smolOpts = spy.mock.calls[0]?.[2] as { reasoning?: unknown };
169
+ const slowOpts = spy.mock.calls[1]?.[2] as { reasoning?: unknown };
170
+ expect(smolOpts.reasoning).toBeUndefined();
171
+ expect(slowOpts.reasoning).toBe(Effort.High);
172
+ });
173
+
174
+ it("does not request reasoning for the slow tier on a non-reasoning model", async () => {
175
+ const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
176
+ // SLOW is reasoning:false — must not trip requireSupportedEffort downstream.
177
+ const result = await runEvalLlm({ prompt: "q", model: "slow" }, { session: makeSession() });
178
+ expect(result.text).toBe("ok");
179
+ const opts = spy.mock.calls[0]?.[2] as { reasoning?: unknown };
180
+ expect(opts.reasoning).toBeUndefined();
181
+ });
182
+
183
+ it("throws ToolError on invalid arguments", async () => {
184
+ await expect(runEvalLlm({ prompt: "" }, { session: makeSession() })).rejects.toBeInstanceOf(ToolError);
185
+ await expect(runEvalLlm({ prompt: "q", model: "huge" }, { session: makeSession() })).rejects.toBeInstanceOf(
186
+ ToolError,
187
+ );
188
+ });
189
+
190
+ it("throws ToolError when no model resolves for the tier", async () => {
191
+ const session = makeSession({ available: [DEFAULT], roles: { smol: "missing/model" } });
192
+ await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
193
+ });
194
+
195
+ it("throws ToolError when the resolved model has no API key", async () => {
196
+ const session = makeSession({ apiKey: null });
197
+ await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
198
+ });
199
+
200
+ it("maps error and aborted stop reasons to ToolError", async () => {
201
+ vi.spyOn(ai, "completeSimple").mockResolvedValueOnce(assistant({ stopReason: "error", errorMessage: "boom" }));
202
+ await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toThrow("boom");
203
+
204
+ vi.spyOn(ai, "completeSimple").mockResolvedValueOnce(assistant({ stopReason: "aborted" }));
205
+ await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toBeInstanceOf(
206
+ ToolError,
207
+ );
208
+ });
209
+
210
+ it("throws ToolError when plain mode produces no text", async () => {
211
+ vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "" }));
212
+ await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toBeInstanceOf(
213
+ ToolError,
214
+ );
215
+ });
216
+ });
217
+
218
+ describe("llm() through eval runtimes", () => {
219
+ afterEach(() => {
220
+ vi.restoreAllMocks();
221
+ });
222
+
223
+ afterAll(async () => {
224
+ await disposeAllVmContexts();
225
+ await disposeAllKernelSessions();
226
+ });
227
+
228
+ it("exposes llm() in the JavaScript runtime", async () => {
229
+ using tempDir = TempDir.createSync("@omp-eval-llm-js-");
230
+ const sessionFile = path.join(tempDir.path(), "session.jsonl");
231
+ const sessionId = `js-llm:${crypto.randomUUID()}`;
232
+ vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "hello from smol" }));
233
+
234
+ const result = await executeJs('return await llm("hi", { model: "smol" });', {
235
+ cwd: tempDir.path(),
236
+ sessionId,
237
+ session: makeSession(),
238
+ sessionFile,
239
+ });
240
+
241
+ expect(result.exitCode).toBe(0);
242
+ expect(result.output.trim()).toBe("hello from smol");
243
+ });
244
+
245
+ it("parses structured llm() output in the JavaScript runtime", async () => {
246
+ using tempDir = TempDir.createSync("@omp-eval-llm-js-struct-");
247
+ const sessionFile = path.join(tempDir.path(), "session.jsonl");
248
+ const sessionId = `js-llm-struct:${crypto.randomUUID()}`;
249
+ vi.spyOn(ai, "completeSimple").mockResolvedValue(
250
+ assistant({ toolCall: { name: "respond", arguments: { ok: true, n: 3 } } }),
251
+ );
252
+
253
+ const result = await executeJs(
254
+ 'const r = await llm("hi", { schema: { type: "object" } }); return JSON.stringify(r);',
255
+ { cwd: tempDir.path(), sessionId, session: makeSession(), sessionFile },
256
+ );
257
+
258
+ expect(result.exitCode).toBe(0);
259
+ expect(JSON.parse(result.output.trim())).toEqual({ ok: true, n: 3 });
260
+ });
261
+
262
+ it("exposes llm() in the Python runtime", async () => {
263
+ using tempDir = TempDir.createSync("@omp-eval-llm-py-");
264
+ const sessionFile = path.join(tempDir.path(), "session.jsonl");
265
+ const sessionId = `py-llm:${crypto.randomUUID()}`;
266
+ vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "hello from python" }));
267
+
268
+ const result = await executePython('print(llm("hi", model="smol"))', {
269
+ cwd: tempDir.path(),
270
+ sessionId,
271
+ sessionFile,
272
+ toolSession: makeSession(),
273
+ });
274
+
275
+ expect(result.exitCode).toBe(0);
276
+ expect(result.output.trim()).toBe("hello from python");
277
+ });
278
+
279
+ it("parses structured llm() output in the Python runtime", async () => {
280
+ using tempDir = TempDir.createSync("@omp-eval-llm-py-struct-");
281
+ const sessionFile = path.join(tempDir.path(), "session.jsonl");
282
+ const sessionId = `py-llm-struct:${crypto.randomUUID()}`;
283
+ vi.spyOn(ai, "completeSimple").mockResolvedValue(
284
+ assistant({ toolCall: { name: "respond", arguments: { ok: true } } }),
285
+ );
286
+
287
+ const result = await executePython('import json\nprint(json.dumps(llm("hi", schema={"type": "object"})))', {
288
+ cwd: tempDir.path(),
289
+ sessionId,
290
+ sessionFile,
291
+ toolSession: makeSession(),
292
+ });
293
+
294
+ expect(result.exitCode).toBe(0);
295
+ expect(JSON.parse(result.output.trim())).toEqual({ ok: true });
296
+ });
297
+ });
@@ -492,6 +492,42 @@ display({"label": "A"})`,
492
492
  expect(reloaded.output.trim()).toBe("2");
493
493
  });
494
494
 
495
+ it("loads TypeScript type-only imports in cells and local modules", async () => {
496
+ using tempDir = TempDir.createSync("@omp-eval-js-type-imports-");
497
+ const sessionFile = path.join(tempDir.path(), "session.jsonl");
498
+ const sessionId = `js-type-imports:${crypto.randomUUID()}`;
499
+ const session = createToolSession(tempDir.path(), sessionFile);
500
+ const typesPath = path.join(tempDir.path(), "types.ts");
501
+ const valuesPath = path.join(tempDir.path(), "values.ts");
502
+ const entryPath = path.join(tempDir.path(), "entry.ts");
503
+ const typesSpec = JSON.stringify(typesPath);
504
+ const entrySpec = JSON.stringify(entryPath);
505
+ await Bun.write(typesPath, "export interface TypeOnly { value: number }\n");
506
+ await Bun.write(valuesPath, "export interface InlineOnly { value: number }\nexport const imported = 41;\n");
507
+ await Bun.write(
508
+ entryPath,
509
+ [
510
+ 'import type { TypeOnly } from "./types.ts";',
511
+ 'import { type InlineOnly, imported } from "./values.ts";',
512
+ "export const typeOnly = 1;",
513
+ "export const inlineType = imported;",
514
+ "",
515
+ ].join("\n"),
516
+ );
517
+
518
+ const result = await executeJs(
519
+ `import type { TypeOnly } from ${typesSpec};\nconst mod = await import(${entrySpec});\nreturn mod.typeOnly + mod.inlineType;`,
520
+ {
521
+ sessionId,
522
+ session,
523
+ sessionFile,
524
+ },
525
+ );
526
+
527
+ expect(result.exitCode).toBe(0);
528
+ expect(result.output.trim()).toBe("42");
529
+ });
530
+
495
531
  it("refreshes the Python tool proxy when bridge env appears after kernel warm-up", async () => {
496
532
  using tempDir = TempDir.createSync("@omp-eval-py-tool-proxy-");
497
533
  const sessionFile = path.join(tempDir.path(), "session.jsonl");
@@ -88,7 +88,10 @@ export class LocalModuleLoader {
88
88
 
89
89
  async #buildLocalModule(modulePath: string): Promise<LocalModuleEntry> {
90
90
  const rawSource = fs.readFileSync(modulePath, "utf8");
91
- const stripped = stripTypeScriptSyntax(rawSource);
91
+ const stripped = stripTypeScriptSyntax(rawSource, {
92
+ force: isTypeScriptModulePath(modulePath),
93
+ loader: stripLoaderForPath(modulePath),
94
+ });
92
95
  const moduleDir = path.dirname(modulePath);
93
96
  const localDeps = new Set<string>();
94
97
  for (const specifier of collectModuleSourceSpecifiers(stripped)) {
@@ -251,6 +254,15 @@ function isLocalPathSpecifier(source: string): boolean {
251
254
  );
252
255
  }
253
256
 
257
+ function isTypeScriptModulePath(modulePath: string): boolean {
258
+ const ext = path.extname(modulePath);
259
+ return ext === ".ts" || ext === ".tsx" || ext === ".mts";
260
+ }
261
+
262
+ function stripLoaderForPath(modulePath: string): "ts" | "tsx" {
263
+ return path.extname(modulePath) === ".tsx" ? "tsx" : "ts";
264
+ }
265
+
254
266
  function isManagedLocalModulePath(target: string): boolean {
255
267
  return (
256
268
  path.isAbsolute(target) &&
@@ -39,6 +39,13 @@ if (!globalThis.__omp_js_prelude_loaded__) {
39
39
  return values.length === 1 ? values[0] : values;
40
40
  };
41
41
 
42
+ const llm = async (prompt, opts = {}) => {
43
+ const o = toOptions(opts);
44
+ const res = await globalThis.__omp_call_tool__("__llm__", { prompt, ...o });
45
+ const text = res && typeof res === "object" ? res.text : res;
46
+ return o.schema ? JSON.parse(text) : text;
47
+ };
48
+
42
49
  const display = value => {
43
50
  globalThis.__omp_display__(value);
44
51
  };
@@ -61,6 +68,7 @@ if (!globalThis.__omp_js_prelude_loaded__) {
61
68
  globalThis.print = consoleBridge.log;
62
69
  globalThis.display = display;
63
70
  globalThis.tool = tool;
71
+ globalThis.llm = llm;
64
72
  globalThis.output = output;
65
73
  globalThis.read = read;
66
74
  globalThis.write = write;
@@ -75,6 +75,7 @@ function parseProgram(code: string): { program: { body: ReadonlyArray<BabelProgr
75
75
  allowSuperOutsideMethod: true,
76
76
  allowUndeclaredExports: true,
77
77
  errorRecovery: true,
78
+ plugins: ["typescript"],
78
79
  }) as unknown as { program: { body: ReadonlyArray<BabelProgramNode> } };
79
80
  } catch {
80
81
  return null;
@@ -178,8 +179,7 @@ export function rewriteImports(code: string): string {
178
179
  if (node.type !== "CallExpression") return;
179
180
  const call = node as unknown as { callee?: { type?: string; start?: number; end?: number } };
180
181
  const callee = call.callee;
181
- if (!callee || callee.type !== "Import" || typeof callee.start !== "number" || typeof callee.end !== "number")
182
- return;
182
+ if (callee?.type !== "Import" || typeof callee.start !== "number" || typeof callee.end !== "number") return;
183
183
  edits.push({ start: callee.start, end: callee.end, text: "__omp_import__" });
184
184
  });
185
185
 
@@ -252,12 +252,7 @@ export function rewriteDynamicImports(code: string, callee = "__omp_import__"):
252
252
  if (node.type !== "CallExpression") return;
253
253
  const call = node as unknown as { callee?: { type?: string; start?: number; end?: number } };
254
254
  const callCallee = call.callee;
255
- if (
256
- !callCallee ||
257
- callCallee.type !== "Import" ||
258
- typeof callCallee.start !== "number" ||
259
- typeof callCallee.end !== "number"
260
- ) {
255
+ if (callCallee?.type !== "Import" || typeof callCallee.start !== "number" || typeof callCallee.end !== "number") {
261
256
  return;
262
257
  }
263
258
  edits.push({ start: callCallee.start, end: callCallee.end, text: callee });
@@ -453,38 +448,48 @@ function requiresAsyncWrapper(code: string): boolean {
453
448
  }
454
449
 
455
450
  /**
456
- * Strip TypeScript syntax (type annotations, `interface`, `as`, `satisfies`, generics in
457
- * call expressions, etc.) before the import/lexical rewriters parse the code. We use Bun's
458
- * native transpiler in `ts` loader mode — fast, no JSX transforms, preserves `import`/
459
- * `export` declarations so the downstream Babel rewrites keep working.
451
+ * Strip TypeScript syntax (type annotations, type-only imports/exports, `interface`, `as`,
452
+ * `satisfies`, generics in call expressions, etc.) before the import/lexical rewriters parse
453
+ * the code. Bun's native transpiler preserves `import`/`export` declarations, so downstream
454
+ * Babel rewrites still control module resolution.
460
455
  *
461
- * Skipped when the code parses as plain JavaScript already (Babel can accept it), so the
462
- * common case avoids an extra transpile pass. We detect "looks like TS" with a cheap regex
463
- * before invoking the transpiler.
456
+ * Eval cells use a cheap "looks like TS" heuristic to avoid transpiling ordinary JS. Known
457
+ * TypeScript modules pass `force` because a file can contain TS-only module syntax such as
458
+ * `import type` without any value-level type annotations.
464
459
  */
465
- function stripTypeScript(code: string): string {
466
- if (!LOOKS_LIKE_TS.test(code)) return code;
460
+ type TypeScriptStripLoader = "ts" | "tsx";
461
+
462
+ const TS_TRANSPILER = new Bun.Transpiler({ loader: "ts" });
463
+ const TSX_TRANSPILER = new Bun.Transpiler({ loader: "tsx" });
464
+
465
+ function stripTypeScript(code: string, options: { force?: boolean; loader?: TypeScriptStripLoader } = {}): string {
466
+ if (!options.force && !LOOKS_LIKE_TS.test(code)) return code;
467
467
  try {
468
- return new Bun.Transpiler({ loader: "ts" }).transformSync(code);
468
+ const transpiler = options.loader === "tsx" ? TSX_TRANSPILER : TS_TRANSPILER;
469
+ return transpiler.transformSync(code);
469
470
  } catch {
470
471
  // Transpiler failed (e.g. unrecoverable syntax). Hand the original source back so the
471
472
  // downstream rewriter / VM surfaces the real error to the user.
472
473
  return code;
473
474
  }
474
475
  }
475
- export function stripTypeScriptSyntax(code: string): string {
476
- return stripTypeScript(code);
476
+ export function stripTypeScriptSyntax(
477
+ code: string,
478
+ options: { force?: boolean; loader?: TypeScriptStripLoader } = {},
479
+ ): string {
480
+ return stripTypeScript(code, options);
477
481
  }
478
482
 
479
- // Heuristic: any of the obvious TS-only tokens. Plain JS using `as` only inside strings
480
- // won't match because we require a leading word boundary plus a colon/keyword neighbor.
483
+ // Heuristic: obvious TS-only tokens, including type-only module syntax. Plain JS using `as`
484
+ // only inside strings won't match because we require a leading word boundary plus a
485
+ // colon/keyword neighbor.
481
486
  const LOOKS_LIKE_TS =
482
- /(?:\binterface\s+\w|\btype\s+\w+\s*=|\b(?:as|satisfies)\s+(?:[A-Z]|\bconst\b)|:\s*(?:string|number|boolean|any|unknown|void|never|object|[A-Z]\w*)\b|<\s*[A-Z]\w*\s*[,>])/;
487
+ /(?:\bimport\s+type\b|\bexport\s+type\b|\b(?:import|export)\s*\{[^}\n]*\btype\s+\w|\binterface\s+\w|\btype\s+\w+\s*=|\b(?:as|satisfies)\s+(?:[A-Z]|\bconst\b)|:\s*(?:string|number|boolean|any|unknown|void|never|object|[A-Z]\w*)\b|<\s*[A-Z]\w*\s*[,>])/;
483
488
 
484
489
  export function wrapCode(code: string): { source: string; asyncWrapped: boolean; finalExpressionReturned: boolean } {
485
- const stripped = stripTypeScript(code);
486
- const finalExpression = returnFinalExpression(stripped);
487
- const importsRewritten = rewriteImports(finalExpression.source);
490
+ const finalExpression = returnFinalExpression(code);
491
+ const stripped = stripTypeScript(finalExpression.source);
492
+ const importsRewritten = rewriteImports(stripped);
488
493
  const needsAsyncWrapper = requiresAsyncWrapper(importsRewritten);
489
494
  const rewritten = {
490
495
  source: demoteTopLevelLexicals(importsRewritten, { publishGlobals: needsAsyncWrapper }),
@@ -1,6 +1,7 @@
1
1
  import type { AgentTool, AgentToolResult } from "@oh-my-pi/pi-agent-core";
2
2
  import type { ToolSession } from "../../tools";
3
3
  import { ToolError } from "../../tools/tool-errors";
4
+ import { EVAL_LLM_BRIDGE_NAME, runEvalLlm } from "../llm-bridge";
4
5
  import type { JsStatusEvent } from "./shared/types";
5
6
 
6
7
  export type { JsStatusEvent } from "./shared/types";
@@ -101,6 +102,9 @@ function summarizeToolResult(
101
102
  }
102
103
 
103
104
  export async function callSessionTool(name: string, args: unknown, options: ToolBridgeOptions): Promise<ToolValue> {
105
+ if (name === EVAL_LLM_BRIDGE_NAME) {
106
+ return await runEvalLlm(args, options);
107
+ }
104
108
  const tool = getTool(options.session, name);
105
109
  const normalizedArgs = normalizeArgs(args);
106
110
  const toolCallId = `js-${name}-${crypto.randomUUID()}`;