@oh-my-pi/pi-coding-agent 15.5.13 → 15.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +77 -0
- package/dist/types/cli/classify-install-target.d.ts +0 -10
- package/dist/types/cli/initial-message.d.ts +1 -1
- package/dist/types/cli/tiny-models-cli.d.ts +9 -0
- package/dist/types/commands/tiny-models.d.ts +22 -0
- package/dist/types/commit/analysis/conventional.d.ts +1 -1
- package/dist/types/commit/analysis/summary.d.ts +1 -1
- package/dist/types/commit/changelog/generate.d.ts +1 -1
- package/dist/types/commit/changelog/index.d.ts +2 -2
- package/dist/types/commit/map-reduce/map-phase.d.ts +1 -1
- package/dist/types/commit/map-reduce/reduce-phase.d.ts +1 -1
- package/dist/types/config/model-id-affixes.d.ts +10 -0
- package/dist/types/config/model-registry.d.ts +1 -1
- package/dist/types/config/models-config-schema.d.ts +2 -0
- package/dist/types/config/settings-schema.d.ts +233 -17
- package/dist/types/discovery/helpers.d.ts +1 -1
- package/dist/types/discovery/substitute-plugin-root.d.ts +0 -4
- package/dist/types/eval/__tests__/llm-bridge.test.d.ts +1 -0
- package/dist/types/eval/js/shared/rewrite-imports.d.ts +16 -1
- package/dist/types/eval/llm-bridge.d.ts +25 -0
- package/dist/types/export/html/template.generated.d.ts +1 -1
- package/dist/types/extensibility/plugins/legacy-pi-compat.d.ts +15 -0
- package/dist/types/internal-urls/agent-protocol.d.ts +2 -1
- package/dist/types/internal-urls/artifact-protocol.d.ts +2 -1
- package/dist/types/internal-urls/local-protocol.d.ts +2 -1
- package/dist/types/internal-urls/memory-protocol.d.ts +2 -1
- package/dist/types/internal-urls/omp-protocol.d.ts +2 -1
- package/dist/types/internal-urls/router.d.ts +8 -1
- package/dist/types/internal-urls/rule-protocol.d.ts +2 -1
- package/dist/types/internal-urls/skill-protocol.d.ts +2 -1
- package/dist/types/internal-urls/types.d.ts +26 -0
- package/dist/types/memory-backend/index.d.ts +1 -0
- package/dist/types/memory-backend/resolve.d.ts +2 -1
- package/dist/types/memory-backend/types.d.ts +7 -1
- package/dist/types/mnemosyne/backend.d.ts +4 -0
- package/dist/types/mnemosyne/config.d.ts +29 -0
- package/dist/types/mnemosyne/index.d.ts +3 -0
- package/dist/types/mnemosyne/state.d.ts +72 -0
- package/dist/types/modes/components/custom-editor.d.ts +2 -3
- package/dist/types/modes/components/hook-selector.d.ts +27 -0
- package/dist/types/modes/components/index.d.ts +1 -0
- package/dist/types/modes/components/status-line/context-thresholds.d.ts +6 -0
- package/dist/types/modes/components/tiny-title-download-progress.d.ts +11 -0
- package/dist/types/modes/components/welcome.d.ts +1 -0
- package/dist/types/modes/controllers/extension-ui-controller.d.ts +4 -1
- package/dist/types/modes/gradient-highlight.d.ts +23 -0
- package/dist/types/modes/interactive-mode.d.ts +4 -2
- package/dist/types/modes/internal-url-autocomplete.d.ts +43 -0
- package/dist/types/modes/orchestrate.d.ts +10 -0
- package/dist/types/modes/theme/defaults/index.d.ts +8406 -8406
- package/dist/types/modes/theme/theme.d.ts +2 -1
- package/dist/types/modes/ultrathink.d.ts +3 -3
- package/dist/types/modes/utils/keybinding-matchers.d.ts +5 -0
- package/dist/types/sdk.d.ts +3 -0
- package/dist/types/session/agent-session.d.ts +35 -0
- package/dist/types/system-prompt.d.ts +2 -0
- package/dist/types/task/executor.d.ts +2 -0
- package/dist/types/task/render.d.ts +5 -1
- package/dist/types/tiny/models.d.ts +185 -0
- package/dist/types/tiny/text.d.ts +4 -0
- package/dist/types/tiny/title-client.d.ts +24 -0
- package/dist/types/tiny/title-protocol.d.ts +74 -0
- package/dist/types/tiny/worker.d.ts +2 -0
- package/dist/types/tools/bash.d.ts +3 -1
- package/dist/types/tools/index.d.ts +7 -4
- package/dist/types/tools/memory-edit.d.ts +40 -0
- package/dist/types/tools/{hindsight-recall.d.ts → memory-recall.d.ts} +6 -6
- package/dist/types/tools/{hindsight-reflect.d.ts → memory-reflect.d.ts} +6 -6
- package/dist/types/tools/memory-render.d.ts +60 -0
- package/dist/types/tools/{hindsight-retain.d.ts → memory-retain.d.ts} +6 -6
- package/dist/types/tools/todo-write.d.ts +8 -0
- package/dist/types/tools/tool-result.d.ts +2 -0
- package/dist/types/utils/title-generator.d.ts +3 -0
- package/package.json +18 -14
- package/scripts/build-binary.ts +1 -0
- package/src/cli/tiny-models-cli.ts +127 -0
- package/src/cli-commands.ts +1 -0
- package/src/cli.ts +8 -8
- package/src/commands/tiny-models.ts +36 -0
- package/src/config/model-equivalence.ts +43 -2
- package/src/config/model-id-affixes.ts +64 -0
- package/src/config/model-registry.ts +166 -8
- package/src/config/models-config-schema.ts +1 -1
- package/src/config/settings-schema.ts +206 -14
- package/src/edit/hashline/diff.ts +5 -7
- package/src/eval/__tests__/llm-bridge.test.ts +297 -0
- package/src/eval/__tests__/shared-executors.test.ts +36 -0
- package/src/eval/js/shared/local-module-loader.ts +13 -1
- package/src/eval/js/shared/prelude.txt +8 -0
- package/src/eval/js/shared/rewrite-imports.ts +31 -26
- package/src/eval/js/tool-bridge.ts +4 -0
- package/src/eval/llm-bridge.ts +181 -0
- package/src/eval/py/prelude.py +52 -31
- package/src/export/html/template.generated.ts +1 -1
- package/src/export/html/template.js +0 -13
- package/src/extensibility/plugins/legacy-pi-compat.ts +60 -23
- package/src/internal-urls/agent-protocol.ts +18 -1
- package/src/internal-urls/artifact-protocol.ts +19 -1
- package/src/internal-urls/docs-index.generated.ts +5 -4
- package/src/internal-urls/local-protocol.ts +14 -1
- package/src/internal-urls/memory-protocol.ts +6 -1
- package/src/internal-urls/omp-protocol.ts +5 -1
- package/src/internal-urls/router.ts +20 -1
- package/src/internal-urls/rule-protocol.ts +8 -1
- package/src/internal-urls/skill-protocol.ts +8 -1
- package/src/internal-urls/types.ts +27 -0
- package/src/lsp/render.ts +1 -1
- package/src/main.ts +4 -0
- package/src/mcp/oauth-flow.ts +2 -2
- package/src/memory-backend/index.ts +1 -0
- package/src/memory-backend/resolve.ts +4 -1
- package/src/memory-backend/types.ts +8 -1
- package/src/mnemosyne/backend.ts +374 -0
- package/src/mnemosyne/config.ts +160 -0
- package/src/mnemosyne/index.ts +3 -0
- package/src/mnemosyne/state.ts +548 -0
- package/src/modes/acp/acp-agent.ts +11 -6
- package/src/modes/components/agent-dashboard.ts +4 -4
- package/src/modes/components/custom-editor.ts +3 -2
- package/src/modes/components/diff.ts +2 -2
- package/src/modes/components/extensions/extension-list.ts +3 -2
- package/src/modes/components/footer.ts +5 -6
- package/src/modes/components/history-search.ts +3 -3
- package/src/modes/components/hook-selector.ts +94 -8
- package/src/modes/components/index.ts +1 -0
- package/src/modes/components/mcp-add-wizard.ts +3 -3
- package/src/modes/components/model-selector.ts +124 -26
- package/src/modes/components/oauth-selector.ts +3 -3
- package/src/modes/components/session-observer-overlay.ts +19 -13
- package/src/modes/components/session-selector.ts +3 -3
- package/src/modes/components/settings-defs.ts +7 -0
- package/src/modes/components/status-line/context-thresholds.ts +11 -0
- package/src/modes/components/status-line/presets.ts +1 -0
- package/src/modes/components/status-line/segments.ts +25 -2
- package/src/modes/components/tiny-title-download-progress.ts +90 -0
- package/src/modes/components/tips.txt +12 -0
- package/src/modes/components/tool-execution.ts +67 -3
- package/src/modes/components/tree-selector.ts +3 -3
- package/src/modes/components/user-message-selector.ts +3 -3
- package/src/modes/components/welcome.ts +55 -1
- package/src/modes/controllers/command-controller.ts +16 -1
- package/src/modes/controllers/extension-ui-controller.ts +3 -1
- package/src/modes/controllers/input-controller.ts +57 -0
- package/src/modes/gradient-highlight.ts +70 -0
- package/src/modes/interactive-mode.ts +80 -196
- package/src/modes/internal-url-autocomplete.ts +143 -0
- package/src/modes/orchestrate.ts +36 -0
- package/src/modes/prompt-action-autocomplete.ts +12 -0
- package/src/modes/theme/theme.ts +7 -0
- package/src/modes/ultrathink.ts +9 -53
- package/src/modes/utils/keybinding-matchers.ts +11 -0
- package/src/prompts/system/memory-consolidation-system.md +8 -0
- package/src/prompts/system/memory-extraction-system.md +26 -0
- package/src/prompts/{commands/orchestrate.md → system/orchestrate-notice.md} +5 -16
- package/src/prompts/system/system-prompt.md +2 -0
- package/src/prompts/system/tiny-title-system.md +8 -0
- package/src/prompts/tools/eval.md +2 -0
- package/src/prompts/tools/memory-edit.md +8 -0
- package/src/prompts/tools/task.md +4 -7
- package/src/sdk.ts +8 -6
- package/src/session/agent-session.ts +147 -44
- package/src/session/session-manager.ts +47 -0
- package/src/slash-commands/builtin-registry.ts +10 -1
- package/src/system-prompt.ts +4 -0
- package/src/task/commands.ts +1 -5
- package/src/task/executor.ts +8 -0
- package/src/task/index.ts +2 -0
- package/src/task/render.ts +69 -26
- package/src/tiny/models.ts +217 -0
- package/src/tiny/text.ts +19 -0
- package/src/tiny/title-client.ts +340 -0
- package/src/tiny/title-protocol.ts +51 -0
- package/src/tiny/worker.ts +523 -0
- package/src/tools/bash.ts +58 -16
- package/src/tools/browser/tab-worker.ts +1 -1
- package/src/tools/eval.ts +24 -48
- package/src/tools/index.ts +17 -15
- package/src/tools/memory-edit.ts +59 -0
- package/src/tools/memory-recall.ts +100 -0
- package/src/tools/memory-reflect.ts +88 -0
- package/src/tools/memory-render.ts +185 -0
- package/src/tools/memory-retain.ts +91 -0
- package/src/tools/renderers.ts +4 -2
- package/src/tools/todo-write.ts +128 -29
- package/src/tools/tool-result.ts +8 -0
- package/src/utils/title-generator.ts +115 -13
- package/dist/types/tools/calculator.d.ts +0 -77
- package/src/prompts/tools/calculator.md +0 -10
- package/src/tools/calculator.ts +0 -541
- package/src/tools/hindsight-recall.ts +0 -69
- package/src/tools/hindsight-reflect.ts +0 -58
- package/src/tools/hindsight-retain.ts +0 -57
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
import { afterAll, afterEach, describe, expect, it, vi } from "bun:test";
|
|
2
|
+
import * as path from "node:path";
|
|
3
|
+
import type { Api, AssistantMessage, Model } from "@oh-my-pi/pi-ai";
|
|
4
|
+
import * as ai from "@oh-my-pi/pi-ai";
|
|
5
|
+
import { Effort } from "@oh-my-pi/pi-ai";
|
|
6
|
+
import { TempDir } from "@oh-my-pi/pi-utils";
|
|
7
|
+
import type { ModelRegistry } from "../../config/model-registry";
|
|
8
|
+
import { Settings } from "../../config/settings";
|
|
9
|
+
import type { ToolSession } from "../../tools";
|
|
10
|
+
import { ToolError } from "../../tools/tool-errors";
|
|
11
|
+
import { disposeAllVmContexts } from "../js/context-manager";
|
|
12
|
+
import { executeJs } from "../js/executor";
|
|
13
|
+
import { runEvalLlm } from "../llm-bridge";
|
|
14
|
+
import { disposeAllKernelSessions, executePython } from "../py/executor";
|
|
15
|
+
|
|
16
|
+
function makeModel(provider: string, id: string, extra: Partial<Model<Api>> = {}): Model<Api> {
|
|
17
|
+
return {
|
|
18
|
+
id,
|
|
19
|
+
name: id,
|
|
20
|
+
api: "openai-responses",
|
|
21
|
+
provider,
|
|
22
|
+
baseUrl: "https://example.test/v1",
|
|
23
|
+
reasoning: false,
|
|
24
|
+
input: ["text"],
|
|
25
|
+
cost: { input: 1, output: 1, cacheRead: 0, cacheWrite: 1 },
|
|
26
|
+
contextWindow: 128000,
|
|
27
|
+
maxTokens: 4096,
|
|
28
|
+
...extra,
|
|
29
|
+
} as Model<Api>;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const SMOL = makeModel("p", "smol");
|
|
33
|
+
const DEFAULT = makeModel("p", "default");
|
|
34
|
+
const SLOW = makeModel("p", "slow");
|
|
35
|
+
const REASONING_SLOW = makeModel("p", "slow", {
|
|
36
|
+
api: "anthropic-messages",
|
|
37
|
+
reasoning: true,
|
|
38
|
+
thinking: { minLevel: Effort.Low, maxLevel: Effort.High, mode: "anthropic-adaptive" },
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
interface SessionOptions {
|
|
42
|
+
available?: Model<Api>[];
|
|
43
|
+
apiKey?: string | null;
|
|
44
|
+
activeModel?: string;
|
|
45
|
+
roles?: Partial<Record<"smol" | "default" | "slow", string>>;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function makeSession(opts: SessionOptions = {}): ToolSession {
|
|
49
|
+
const settings = Settings.isolated({ "async.enabled": false, "task.isolation.mode": "none" });
|
|
50
|
+
const roles = opts.roles ?? { smol: "p/smol", slow: "p/slow" };
|
|
51
|
+
for (const role in roles) {
|
|
52
|
+
const value = roles[role as keyof typeof roles];
|
|
53
|
+
if (value) settings.setModelRole(role, value);
|
|
54
|
+
}
|
|
55
|
+
const modelRegistry = {
|
|
56
|
+
getAvailable: () => opts.available ?? [SMOL, DEFAULT, SLOW],
|
|
57
|
+
getApiKey: async () => (opts.apiKey === undefined ? "test-key" : opts.apiKey),
|
|
58
|
+
} as unknown as ModelRegistry;
|
|
59
|
+
return {
|
|
60
|
+
settings,
|
|
61
|
+
modelRegistry,
|
|
62
|
+
getActiveModelString: () => opts.activeModel ?? "p/default",
|
|
63
|
+
} as unknown as ToolSession;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function assistant(opts: {
|
|
67
|
+
text?: string;
|
|
68
|
+
toolCall?: { name: string; arguments: Record<string, unknown> };
|
|
69
|
+
stopReason?: AssistantMessage["stopReason"];
|
|
70
|
+
errorMessage?: string;
|
|
71
|
+
}): AssistantMessage {
|
|
72
|
+
const content: AssistantMessage["content"] = [];
|
|
73
|
+
if (opts.text) content.push({ type: "text", text: opts.text });
|
|
74
|
+
if (opts.toolCall) {
|
|
75
|
+
content.push({ type: "toolCall", id: "tc-1", name: opts.toolCall.name, arguments: opts.toolCall.arguments });
|
|
76
|
+
}
|
|
77
|
+
return {
|
|
78
|
+
role: "assistant",
|
|
79
|
+
content,
|
|
80
|
+
api: "openai-responses",
|
|
81
|
+
provider: "p",
|
|
82
|
+
model: "default",
|
|
83
|
+
usage: {
|
|
84
|
+
input: 0,
|
|
85
|
+
output: 0,
|
|
86
|
+
cacheRead: 0,
|
|
87
|
+
cacheWrite: 0,
|
|
88
|
+
totalTokens: 0,
|
|
89
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
90
|
+
},
|
|
91
|
+
stopReason: opts.stopReason ?? "stop",
|
|
92
|
+
errorMessage: opts.errorMessage,
|
|
93
|
+
timestamp: Date.now(),
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
describe("runEvalLlm", () => {
|
|
98
|
+
afterEach(() => {
|
|
99
|
+
vi.restoreAllMocks();
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it("resolves each tier to its expected model", async () => {
|
|
103
|
+
const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
|
|
104
|
+
const session = makeSession();
|
|
105
|
+
|
|
106
|
+
await runEvalLlm({ prompt: "q", model: "smol" }, { session });
|
|
107
|
+
await runEvalLlm({ prompt: "q", model: "default" }, { session });
|
|
108
|
+
await runEvalLlm({ prompt: "q", model: "slow" }, { session });
|
|
109
|
+
|
|
110
|
+
const resolved = spy.mock.calls.map(call => {
|
|
111
|
+
const model = call[0] as Model<Api>;
|
|
112
|
+
return `${model.provider}/${model.id}`;
|
|
113
|
+
});
|
|
114
|
+
expect(resolved).toEqual(["p/smol", "p/default", "p/slow"]);
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
it("prefers the session active model for the default tier, falling back to pi/default", async () => {
|
|
118
|
+
const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
|
|
119
|
+
const session = makeSession({ available: [SMOL, DEFAULT, SLOW], activeModel: "p/slow" });
|
|
120
|
+
|
|
121
|
+
await runEvalLlm({ prompt: "q", model: "default" }, { session });
|
|
122
|
+
|
|
123
|
+
const model = spy.mock.calls[0]?.[0] as Model<Api>;
|
|
124
|
+
expect(`${model.provider}/${model.id}`).toBe("p/slow");
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
it("returns the completion text in plain mode", async () => {
|
|
128
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "the answer" }));
|
|
129
|
+
const result = await runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() });
|
|
130
|
+
expect(result.text).toBe("the answer");
|
|
131
|
+
expect(result.details).toEqual({ model: "p/smol", tier: "smol", structured: false });
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it("forces a respond tool call and returns its arguments in structured mode", async () => {
|
|
135
|
+
const spy = vi
|
|
136
|
+
.spyOn(ai, "completeSimple")
|
|
137
|
+
.mockResolvedValue(assistant({ toolCall: { name: "respond", arguments: { answer: 42 } } }));
|
|
138
|
+
const result = await runEvalLlm(
|
|
139
|
+
{ prompt: "q", model: "smol", schema: { type: "object", properties: { answer: { type: "number" } } } },
|
|
140
|
+
{ session: makeSession() },
|
|
141
|
+
);
|
|
142
|
+
|
|
143
|
+
expect(JSON.parse(result.text)).toEqual({ answer: 42 });
|
|
144
|
+
expect(result.details.structured).toBe(true);
|
|
145
|
+
|
|
146
|
+
const ctx = spy.mock.calls[0]?.[1] as { tools?: Array<{ name: string }> };
|
|
147
|
+
const opts = spy.mock.calls[0]?.[2] as { toolChoice?: unknown };
|
|
148
|
+
expect(ctx.tools?.[0]?.name).toBe("respond");
|
|
149
|
+
expect(opts.toolChoice).toEqual({ type: "tool", name: "respond" });
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
it("falls back to JSON embedded in text when the model skips the respond tool", async () => {
|
|
153
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: 'here: {"answer": 7}' }));
|
|
154
|
+
const result = await runEvalLlm(
|
|
155
|
+
{ prompt: "q", model: "smol", schema: { type: "object" } },
|
|
156
|
+
{ session: makeSession() },
|
|
157
|
+
);
|
|
158
|
+
expect(JSON.parse(result.text)).toEqual({ answer: 7 });
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
it("requests reasoning only for the slow tier on a reasoning-capable model", async () => {
|
|
162
|
+
const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
|
|
163
|
+
const session = makeSession({ available: [SMOL, DEFAULT, REASONING_SLOW] });
|
|
164
|
+
|
|
165
|
+
await runEvalLlm({ prompt: "q", model: "smol" }, { session });
|
|
166
|
+
await runEvalLlm({ prompt: "q", model: "slow" }, { session });
|
|
167
|
+
|
|
168
|
+
const smolOpts = spy.mock.calls[0]?.[2] as { reasoning?: unknown };
|
|
169
|
+
const slowOpts = spy.mock.calls[1]?.[2] as { reasoning?: unknown };
|
|
170
|
+
expect(smolOpts.reasoning).toBeUndefined();
|
|
171
|
+
expect(slowOpts.reasoning).toBe(Effort.High);
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
it("does not request reasoning for the slow tier on a non-reasoning model", async () => {
|
|
175
|
+
const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
|
|
176
|
+
// SLOW is reasoning:false — must not trip requireSupportedEffort downstream.
|
|
177
|
+
const result = await runEvalLlm({ prompt: "q", model: "slow" }, { session: makeSession() });
|
|
178
|
+
expect(result.text).toBe("ok");
|
|
179
|
+
const opts = spy.mock.calls[0]?.[2] as { reasoning?: unknown };
|
|
180
|
+
expect(opts.reasoning).toBeUndefined();
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
it("throws ToolError on invalid arguments", async () => {
|
|
184
|
+
await expect(runEvalLlm({ prompt: "" }, { session: makeSession() })).rejects.toBeInstanceOf(ToolError);
|
|
185
|
+
await expect(runEvalLlm({ prompt: "q", model: "huge" }, { session: makeSession() })).rejects.toBeInstanceOf(
|
|
186
|
+
ToolError,
|
|
187
|
+
);
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
it("throws ToolError when no model resolves for the tier", async () => {
|
|
191
|
+
const session = makeSession({ available: [DEFAULT], roles: { smol: "missing/model" } });
|
|
192
|
+
await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
it("throws ToolError when the resolved model has no API key", async () => {
|
|
196
|
+
const session = makeSession({ apiKey: null });
|
|
197
|
+
await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
it("maps error and aborted stop reasons to ToolError", async () => {
|
|
201
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValueOnce(assistant({ stopReason: "error", errorMessage: "boom" }));
|
|
202
|
+
await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toThrow("boom");
|
|
203
|
+
|
|
204
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValueOnce(assistant({ stopReason: "aborted" }));
|
|
205
|
+
await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toBeInstanceOf(
|
|
206
|
+
ToolError,
|
|
207
|
+
);
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
it("throws ToolError when plain mode produces no text", async () => {
|
|
211
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "" }));
|
|
212
|
+
await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toBeInstanceOf(
|
|
213
|
+
ToolError,
|
|
214
|
+
);
|
|
215
|
+
});
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
describe("llm() through eval runtimes", () => {
|
|
219
|
+
afterEach(() => {
|
|
220
|
+
vi.restoreAllMocks();
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
afterAll(async () => {
|
|
224
|
+
await disposeAllVmContexts();
|
|
225
|
+
await disposeAllKernelSessions();
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
it("exposes llm() in the JavaScript runtime", async () => {
|
|
229
|
+
using tempDir = TempDir.createSync("@omp-eval-llm-js-");
|
|
230
|
+
const sessionFile = path.join(tempDir.path(), "session.jsonl");
|
|
231
|
+
const sessionId = `js-llm:${crypto.randomUUID()}`;
|
|
232
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "hello from smol" }));
|
|
233
|
+
|
|
234
|
+
const result = await executeJs('return await llm("hi", { model: "smol" });', {
|
|
235
|
+
cwd: tempDir.path(),
|
|
236
|
+
sessionId,
|
|
237
|
+
session: makeSession(),
|
|
238
|
+
sessionFile,
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
expect(result.exitCode).toBe(0);
|
|
242
|
+
expect(result.output.trim()).toBe("hello from smol");
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
it("parses structured llm() output in the JavaScript runtime", async () => {
|
|
246
|
+
using tempDir = TempDir.createSync("@omp-eval-llm-js-struct-");
|
|
247
|
+
const sessionFile = path.join(tempDir.path(), "session.jsonl");
|
|
248
|
+
const sessionId = `js-llm-struct:${crypto.randomUUID()}`;
|
|
249
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValue(
|
|
250
|
+
assistant({ toolCall: { name: "respond", arguments: { ok: true, n: 3 } } }),
|
|
251
|
+
);
|
|
252
|
+
|
|
253
|
+
const result = await executeJs(
|
|
254
|
+
'const r = await llm("hi", { schema: { type: "object" } }); return JSON.stringify(r);',
|
|
255
|
+
{ cwd: tempDir.path(), sessionId, session: makeSession(), sessionFile },
|
|
256
|
+
);
|
|
257
|
+
|
|
258
|
+
expect(result.exitCode).toBe(0);
|
|
259
|
+
expect(JSON.parse(result.output.trim())).toEqual({ ok: true, n: 3 });
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
it("exposes llm() in the Python runtime", async () => {
|
|
263
|
+
using tempDir = TempDir.createSync("@omp-eval-llm-py-");
|
|
264
|
+
const sessionFile = path.join(tempDir.path(), "session.jsonl");
|
|
265
|
+
const sessionId = `py-llm:${crypto.randomUUID()}`;
|
|
266
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "hello from python" }));
|
|
267
|
+
|
|
268
|
+
const result = await executePython('print(llm("hi", model="smol"))', {
|
|
269
|
+
cwd: tempDir.path(),
|
|
270
|
+
sessionId,
|
|
271
|
+
sessionFile,
|
|
272
|
+
toolSession: makeSession(),
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
expect(result.exitCode).toBe(0);
|
|
276
|
+
expect(result.output.trim()).toBe("hello from python");
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
it("parses structured llm() output in the Python runtime", async () => {
|
|
280
|
+
using tempDir = TempDir.createSync("@omp-eval-llm-py-struct-");
|
|
281
|
+
const sessionFile = path.join(tempDir.path(), "session.jsonl");
|
|
282
|
+
const sessionId = `py-llm-struct:${crypto.randomUUID()}`;
|
|
283
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValue(
|
|
284
|
+
assistant({ toolCall: { name: "respond", arguments: { ok: true } } }),
|
|
285
|
+
);
|
|
286
|
+
|
|
287
|
+
const result = await executePython('import json\nprint(json.dumps(llm("hi", schema={"type": "object"})))', {
|
|
288
|
+
cwd: tempDir.path(),
|
|
289
|
+
sessionId,
|
|
290
|
+
sessionFile,
|
|
291
|
+
toolSession: makeSession(),
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
expect(result.exitCode).toBe(0);
|
|
295
|
+
expect(JSON.parse(result.output.trim())).toEqual({ ok: true });
|
|
296
|
+
});
|
|
297
|
+
});
|
|
@@ -492,6 +492,42 @@ display({"label": "A"})`,
|
|
|
492
492
|
expect(reloaded.output.trim()).toBe("2");
|
|
493
493
|
});
|
|
494
494
|
|
|
495
|
+
it("loads TypeScript type-only imports in cells and local modules", async () => {
|
|
496
|
+
using tempDir = TempDir.createSync("@omp-eval-js-type-imports-");
|
|
497
|
+
const sessionFile = path.join(tempDir.path(), "session.jsonl");
|
|
498
|
+
const sessionId = `js-type-imports:${crypto.randomUUID()}`;
|
|
499
|
+
const session = createToolSession(tempDir.path(), sessionFile);
|
|
500
|
+
const typesPath = path.join(tempDir.path(), "types.ts");
|
|
501
|
+
const valuesPath = path.join(tempDir.path(), "values.ts");
|
|
502
|
+
const entryPath = path.join(tempDir.path(), "entry.ts");
|
|
503
|
+
const typesSpec = JSON.stringify(typesPath);
|
|
504
|
+
const entrySpec = JSON.stringify(entryPath);
|
|
505
|
+
await Bun.write(typesPath, "export interface TypeOnly { value: number }\n");
|
|
506
|
+
await Bun.write(valuesPath, "export interface InlineOnly { value: number }\nexport const imported = 41;\n");
|
|
507
|
+
await Bun.write(
|
|
508
|
+
entryPath,
|
|
509
|
+
[
|
|
510
|
+
'import type { TypeOnly } from "./types.ts";',
|
|
511
|
+
'import { type InlineOnly, imported } from "./values.ts";',
|
|
512
|
+
"export const typeOnly = 1;",
|
|
513
|
+
"export const inlineType = imported;",
|
|
514
|
+
"",
|
|
515
|
+
].join("\n"),
|
|
516
|
+
);
|
|
517
|
+
|
|
518
|
+
const result = await executeJs(
|
|
519
|
+
`import type { TypeOnly } from ${typesSpec};\nconst mod = await import(${entrySpec});\nreturn mod.typeOnly + mod.inlineType;`,
|
|
520
|
+
{
|
|
521
|
+
sessionId,
|
|
522
|
+
session,
|
|
523
|
+
sessionFile,
|
|
524
|
+
},
|
|
525
|
+
);
|
|
526
|
+
|
|
527
|
+
expect(result.exitCode).toBe(0);
|
|
528
|
+
expect(result.output.trim()).toBe("42");
|
|
529
|
+
});
|
|
530
|
+
|
|
495
531
|
it("refreshes the Python tool proxy when bridge env appears after kernel warm-up", async () => {
|
|
496
532
|
using tempDir = TempDir.createSync("@omp-eval-py-tool-proxy-");
|
|
497
533
|
const sessionFile = path.join(tempDir.path(), "session.jsonl");
|
|
@@ -88,7 +88,10 @@ export class LocalModuleLoader {
|
|
|
88
88
|
|
|
89
89
|
async #buildLocalModule(modulePath: string): Promise<LocalModuleEntry> {
|
|
90
90
|
const rawSource = fs.readFileSync(modulePath, "utf8");
|
|
91
|
-
const stripped = stripTypeScriptSyntax(rawSource
|
|
91
|
+
const stripped = stripTypeScriptSyntax(rawSource, {
|
|
92
|
+
force: isTypeScriptModulePath(modulePath),
|
|
93
|
+
loader: stripLoaderForPath(modulePath),
|
|
94
|
+
});
|
|
92
95
|
const moduleDir = path.dirname(modulePath);
|
|
93
96
|
const localDeps = new Set<string>();
|
|
94
97
|
for (const specifier of collectModuleSourceSpecifiers(stripped)) {
|
|
@@ -251,6 +254,15 @@ function isLocalPathSpecifier(source: string): boolean {
|
|
|
251
254
|
);
|
|
252
255
|
}
|
|
253
256
|
|
|
257
|
+
function isTypeScriptModulePath(modulePath: string): boolean {
|
|
258
|
+
const ext = path.extname(modulePath);
|
|
259
|
+
return ext === ".ts" || ext === ".tsx" || ext === ".mts";
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
function stripLoaderForPath(modulePath: string): "ts" | "tsx" {
|
|
263
|
+
return path.extname(modulePath) === ".tsx" ? "tsx" : "ts";
|
|
264
|
+
}
|
|
265
|
+
|
|
254
266
|
function isManagedLocalModulePath(target: string): boolean {
|
|
255
267
|
return (
|
|
256
268
|
path.isAbsolute(target) &&
|
|
@@ -39,6 +39,13 @@ if (!globalThis.__omp_js_prelude_loaded__) {
|
|
|
39
39
|
return values.length === 1 ? values[0] : values;
|
|
40
40
|
};
|
|
41
41
|
|
|
42
|
+
const llm = async (prompt, opts = {}) => {
|
|
43
|
+
const o = toOptions(opts);
|
|
44
|
+
const res = await globalThis.__omp_call_tool__("__llm__", { prompt, ...o });
|
|
45
|
+
const text = res && typeof res === "object" ? res.text : res;
|
|
46
|
+
return o.schema ? JSON.parse(text) : text;
|
|
47
|
+
};
|
|
48
|
+
|
|
42
49
|
const display = value => {
|
|
43
50
|
globalThis.__omp_display__(value);
|
|
44
51
|
};
|
|
@@ -61,6 +68,7 @@ if (!globalThis.__omp_js_prelude_loaded__) {
|
|
|
61
68
|
globalThis.print = consoleBridge.log;
|
|
62
69
|
globalThis.display = display;
|
|
63
70
|
globalThis.tool = tool;
|
|
71
|
+
globalThis.llm = llm;
|
|
64
72
|
globalThis.output = output;
|
|
65
73
|
globalThis.read = read;
|
|
66
74
|
globalThis.write = write;
|
|
@@ -75,6 +75,7 @@ function parseProgram(code: string): { program: { body: ReadonlyArray<BabelProgr
|
|
|
75
75
|
allowSuperOutsideMethod: true,
|
|
76
76
|
allowUndeclaredExports: true,
|
|
77
77
|
errorRecovery: true,
|
|
78
|
+
plugins: ["typescript"],
|
|
78
79
|
}) as unknown as { program: { body: ReadonlyArray<BabelProgramNode> } };
|
|
79
80
|
} catch {
|
|
80
81
|
return null;
|
|
@@ -178,8 +179,7 @@ export function rewriteImports(code: string): string {
|
|
|
178
179
|
if (node.type !== "CallExpression") return;
|
|
179
180
|
const call = node as unknown as { callee?: { type?: string; start?: number; end?: number } };
|
|
180
181
|
const callee = call.callee;
|
|
181
|
-
if (
|
|
182
|
-
return;
|
|
182
|
+
if (callee?.type !== "Import" || typeof callee.start !== "number" || typeof callee.end !== "number") return;
|
|
183
183
|
edits.push({ start: callee.start, end: callee.end, text: "__omp_import__" });
|
|
184
184
|
});
|
|
185
185
|
|
|
@@ -252,12 +252,7 @@ export function rewriteDynamicImports(code: string, callee = "__omp_import__"):
|
|
|
252
252
|
if (node.type !== "CallExpression") return;
|
|
253
253
|
const call = node as unknown as { callee?: { type?: string; start?: number; end?: number } };
|
|
254
254
|
const callCallee = call.callee;
|
|
255
|
-
if (
|
|
256
|
-
!callCallee ||
|
|
257
|
-
callCallee.type !== "Import" ||
|
|
258
|
-
typeof callCallee.start !== "number" ||
|
|
259
|
-
typeof callCallee.end !== "number"
|
|
260
|
-
) {
|
|
255
|
+
if (callCallee?.type !== "Import" || typeof callCallee.start !== "number" || typeof callCallee.end !== "number") {
|
|
261
256
|
return;
|
|
262
257
|
}
|
|
263
258
|
edits.push({ start: callCallee.start, end: callCallee.end, text: callee });
|
|
@@ -453,38 +448,48 @@ function requiresAsyncWrapper(code: string): boolean {
|
|
|
453
448
|
}
|
|
454
449
|
|
|
455
450
|
/**
|
|
456
|
-
* Strip TypeScript syntax (type annotations, `interface`, `as`,
|
|
457
|
-
* call expressions, etc.) before the import/lexical rewriters parse
|
|
458
|
-
* native transpiler
|
|
459
|
-
*
|
|
451
|
+
* Strip TypeScript syntax (type annotations, type-only imports/exports, `interface`, `as`,
|
|
452
|
+
* `satisfies`, generics in call expressions, etc.) before the import/lexical rewriters parse
|
|
453
|
+
* the code. Bun's native transpiler preserves `import`/`export` declarations, so downstream
|
|
454
|
+
* Babel rewrites still control module resolution.
|
|
460
455
|
*
|
|
461
|
-
*
|
|
462
|
-
*
|
|
463
|
-
*
|
|
456
|
+
* Eval cells use a cheap "looks like TS" heuristic to avoid transpiling ordinary JS. Known
|
|
457
|
+
* TypeScript modules pass `force` because a file can contain TS-only module syntax such as
|
|
458
|
+
* `import type` without any value-level type annotations.
|
|
464
459
|
*/
|
|
465
|
-
|
|
466
|
-
|
|
460
|
+
type TypeScriptStripLoader = "ts" | "tsx";
|
|
461
|
+
|
|
462
|
+
const TS_TRANSPILER = new Bun.Transpiler({ loader: "ts" });
|
|
463
|
+
const TSX_TRANSPILER = new Bun.Transpiler({ loader: "tsx" });
|
|
464
|
+
|
|
465
|
+
function stripTypeScript(code: string, options: { force?: boolean; loader?: TypeScriptStripLoader } = {}): string {
|
|
466
|
+
if (!options.force && !LOOKS_LIKE_TS.test(code)) return code;
|
|
467
467
|
try {
|
|
468
|
-
|
|
468
|
+
const transpiler = options.loader === "tsx" ? TSX_TRANSPILER : TS_TRANSPILER;
|
|
469
|
+
return transpiler.transformSync(code);
|
|
469
470
|
} catch {
|
|
470
471
|
// Transpiler failed (e.g. unrecoverable syntax). Hand the original source back so the
|
|
471
472
|
// downstream rewriter / VM surfaces the real error to the user.
|
|
472
473
|
return code;
|
|
473
474
|
}
|
|
474
475
|
}
|
|
475
|
-
export function stripTypeScriptSyntax(
|
|
476
|
-
|
|
476
|
+
export function stripTypeScriptSyntax(
|
|
477
|
+
code: string,
|
|
478
|
+
options: { force?: boolean; loader?: TypeScriptStripLoader } = {},
|
|
479
|
+
): string {
|
|
480
|
+
return stripTypeScript(code, options);
|
|
477
481
|
}
|
|
478
482
|
|
|
479
|
-
// Heuristic:
|
|
480
|
-
// won't match because we require a leading word boundary plus a
|
|
483
|
+
// Heuristic: obvious TS-only tokens, including type-only module syntax. Plain JS using `as`
|
|
484
|
+
// only inside strings won't match because we require a leading word boundary plus a
|
|
485
|
+
// colon/keyword neighbor.
|
|
481
486
|
const LOOKS_LIKE_TS =
|
|
482
|
-
/(?:\binterface\s+\w|\btype\s+\w+\s*=|\b(?:as|satisfies)\s+(?:[A-Z]|\bconst\b)|:\s*(?:string|number|boolean|any|unknown|void|never|object|[A-Z]\w*)\b|<\s*[A-Z]\w*\s*[,>])/;
|
|
487
|
+
/(?:\bimport\s+type\b|\bexport\s+type\b|\b(?:import|export)\s*\{[^}\n]*\btype\s+\w|\binterface\s+\w|\btype\s+\w+\s*=|\b(?:as|satisfies)\s+(?:[A-Z]|\bconst\b)|:\s*(?:string|number|boolean|any|unknown|void|never|object|[A-Z]\w*)\b|<\s*[A-Z]\w*\s*[,>])/;
|
|
483
488
|
|
|
484
489
|
export function wrapCode(code: string): { source: string; asyncWrapped: boolean; finalExpressionReturned: boolean } {
|
|
485
|
-
const
|
|
486
|
-
const
|
|
487
|
-
const importsRewritten = rewriteImports(
|
|
490
|
+
const finalExpression = returnFinalExpression(code);
|
|
491
|
+
const stripped = stripTypeScript(finalExpression.source);
|
|
492
|
+
const importsRewritten = rewriteImports(stripped);
|
|
488
493
|
const needsAsyncWrapper = requiresAsyncWrapper(importsRewritten);
|
|
489
494
|
const rewritten = {
|
|
490
495
|
source: demoteTopLevelLexicals(importsRewritten, { publishGlobals: needsAsyncWrapper }),
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { AgentTool, AgentToolResult } from "@oh-my-pi/pi-agent-core";
|
|
2
2
|
import type { ToolSession } from "../../tools";
|
|
3
3
|
import { ToolError } from "../../tools/tool-errors";
|
|
4
|
+
import { EVAL_LLM_BRIDGE_NAME, runEvalLlm } from "../llm-bridge";
|
|
4
5
|
import type { JsStatusEvent } from "./shared/types";
|
|
5
6
|
|
|
6
7
|
export type { JsStatusEvent } from "./shared/types";
|
|
@@ -101,6 +102,9 @@ function summarizeToolResult(
|
|
|
101
102
|
}
|
|
102
103
|
|
|
103
104
|
export async function callSessionTool(name: string, args: unknown, options: ToolBridgeOptions): Promise<ToolValue> {
|
|
105
|
+
if (name === EVAL_LLM_BRIDGE_NAME) {
|
|
106
|
+
return await runEvalLlm(args, options);
|
|
107
|
+
}
|
|
104
108
|
const tool = getTool(options.session, name);
|
|
105
109
|
const normalizedArgs = normalizeArgs(args);
|
|
106
110
|
const toolCallId = `js-${name}-${crypto.randomUUID()}`;
|