@oh-my-pi/pi-coding-agent 15.5.13 → 15.5.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +31 -0
- package/dist/types/config/model-registry.d.ts +1 -1
- package/dist/types/config/models-config-schema.d.ts +2 -0
- package/dist/types/config/settings-schema.d.ts +1 -10
- package/dist/types/eval/__tests__/llm-bridge.test.d.ts +1 -0
- package/dist/types/eval/llm-bridge.d.ts +25 -0
- package/dist/types/export/html/template.generated.d.ts +1 -1
- package/dist/types/extensibility/plugins/legacy-pi-compat.d.ts +15 -0
- package/dist/types/modes/theme/theme.d.ts +2 -1
- package/dist/types/session/agent-session.d.ts +2 -0
- package/dist/types/tools/index.d.ts +0 -1
- package/package.json +8 -8
- package/src/config/model-registry.ts +89 -5
- package/src/config/models-config-schema.ts +1 -1
- package/src/config/settings-schema.ts +1 -10
- package/src/eval/__tests__/llm-bridge.test.ts +297 -0
- package/src/eval/js/shared/prelude.txt +8 -0
- package/src/eval/js/tool-bridge.ts +4 -0
- package/src/eval/llm-bridge.ts +181 -0
- package/src/eval/py/prelude.py +52 -31
- package/src/export/html/template.generated.ts +1 -1
- package/src/export/html/template.js +0 -13
- package/src/extensibility/plugins/legacy-pi-compat.ts +60 -23
- package/src/internal-urls/docs-index.generated.ts +3 -4
- package/src/main.ts +4 -0
- package/src/modes/components/model-selector.ts +119 -22
- package/src/modes/components/status-line/presets.ts +1 -0
- package/src/modes/components/status-line/segments.ts +23 -0
- package/src/modes/interactive-mode.ts +22 -87
- package/src/modes/theme/theme.ts +7 -0
- package/src/prompts/tools/eval.md +2 -0
- package/src/session/agent-session.ts +19 -0
- package/src/session/session-manager.ts +47 -0
- package/src/tools/eval.ts +24 -48
- package/src/tools/index.ts +0 -4
- package/src/tools/renderers.ts +0 -2
- package/dist/types/tools/calculator.d.ts +0 -77
- package/src/prompts/tools/calculator.md +0 -10
- package/src/tools/calculator.ts +0 -541
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
import * as path from "node:path";
|
|
2
|
+
/**
|
|
3
|
+
* Compute the bunfs package root from the compiled binary's `import.meta.dir`
|
|
4
|
+
* (or any stand-in supplied by tests). Bun 1.3 reports the bunfs mount root
|
|
5
|
+
* (`/$bunfs/root` or `<drive>:\~BUN\root`) for imported modules as well as the
|
|
6
|
+
* entrypoint, so the normal path is `<root>/packages`.
|
|
7
|
+
*
|
|
8
|
+
* The suffix branch preserves correctness if a future Bun release switches to
|
|
9
|
+
* module-specific `import.meta.dir` values inside compiled binaries, matching
|
|
10
|
+
* the source layout:
|
|
11
|
+
* `<bunfs>/packages/coding-agent/src/extensibility/plugins`.
|
|
12
|
+
*
|
|
13
|
+
* Exported for tests; production callers use `BUNFS_PACKAGE_ROOT` below.
|
|
14
|
+
*/
|
|
15
|
+
export declare function __computeBunfsPackageRoot(metaDir: string, pathImpl?: typeof path): string;
|
|
1
16
|
export declare function loadLegacyPiModule(resolvedPath: string): Promise<unknown>;
|
|
2
17
|
export declare function installLegacyPiSpecifierShim(): void;
|
|
3
18
|
/** Test seam: clears the memoized canonical specifier resolutions. */
|
|
@@ -6,7 +6,7 @@ export type SymbolPreset = "unicode" | "nerd" | "ascii";
|
|
|
6
6
|
/**
|
|
7
7
|
* All available symbol keys organized by category.
|
|
8
8
|
*/
|
|
9
|
-
export type SymbolKey = "status.success" | "status.error" | "status.warning" | "status.info" | "status.pending" | "status.disabled" | "status.enabled" | "status.running" | "status.shadowed" | "status.aborted" | "nav.cursor" | "nav.selected" | "nav.expand" | "nav.collapse" | "nav.back" | "tree.branch" | "tree.last" | "tree.vertical" | "tree.horizontal" | "tree.hook" | "boxRound.topLeft" | "boxRound.topRight" | "boxRound.bottomLeft" | "boxRound.bottomRight" | "boxRound.horizontal" | "boxRound.vertical" | "boxSharp.topLeft" | "boxSharp.topRight" | "boxSharp.bottomLeft" | "boxSharp.bottomRight" | "boxSharp.horizontal" | "boxSharp.vertical" | "boxSharp.cross" | "boxSharp.teeDown" | "boxSharp.teeUp" | "boxSharp.teeRight" | "boxSharp.teeLeft" | "sep.powerline" | "sep.powerlineThin" | "sep.powerlineLeft" | "sep.powerlineRight" | "sep.powerlineThinLeft" | "sep.powerlineThinRight" | "sep.block" | "sep.space" | "sep.asciiLeft" | "sep.asciiRight" | "sep.dot" | "sep.slash" | "sep.pipe" | "icon.model" | "icon.plan" | "icon.goal" | "icon.pause" | "icon.loop" | "icon.folder" | "icon.scratchFolder" | "icon.file" | "icon.git" | "icon.branch" | "icon.pr" | "icon.tokens" | "icon.context" | "icon.cost" | "icon.time" | "icon.pi" | "icon.agents" | "icon.cache" | "icon.input" | "icon.output" | "icon.host" | "icon.session" | "icon.package" | "icon.warning" | "icon.rewind" | "icon.auto" | "icon.fast" | "icon.extensionSkill" | "icon.extensionTool" | "icon.extensionSlashCommand" | "icon.extensionMcp" | "icon.extensionRule" | "icon.extensionHook" | "icon.extensionPrompt" | "icon.extensionContextFile" | "icon.extensionInstruction" | "icon.mic" | "thinking.minimal" | "thinking.low" | "thinking.medium" | "thinking.high" | "thinking.xhigh" | "checkbox.checked" | "checkbox.unchecked" | "format.bullet" | "format.dash" | "format.bracketLeft" | "format.bracketRight" | "md.quoteBorder" | "md.hrChar" | "md.bullet" | "lang.default" | "lang.typescript" | "lang.javascript" | "lang.python" | "lang.rust" | "lang.go" | "lang.java" | "lang.c" | "lang.cpp" | "lang.csharp" | "lang.ruby" | "lang.php" | "lang.swift" | "lang.kotlin" | "lang.shell" | "lang.html" | "lang.css" | "lang.json" | "lang.yaml" | "lang.markdown" | "lang.sql" | "lang.docker" | "lang.lua" | "lang.text" | "lang.env" | "lang.toml" | "lang.xml" | "lang.ini" | "lang.conf" | "lang.log" | "lang.csv" | "lang.tsv" | "lang.image" | "lang.pdf" | "lang.archive" | "lang.binary" | "tab.appearance" | "tab.model" | "tab.interaction" | "tab.context" | "tab.editing" | "tab.tools" | "tab.memory" | "tab.tasks" | "tab.providers";
|
|
9
|
+
export type SymbolKey = "status.success" | "status.error" | "status.warning" | "status.info" | "status.pending" | "status.disabled" | "status.enabled" | "status.running" | "status.shadowed" | "status.aborted" | "nav.cursor" | "nav.selected" | "nav.expand" | "nav.collapse" | "nav.back" | "tree.branch" | "tree.last" | "tree.vertical" | "tree.horizontal" | "tree.hook" | "boxRound.topLeft" | "boxRound.topRight" | "boxRound.bottomLeft" | "boxRound.bottomRight" | "boxRound.horizontal" | "boxRound.vertical" | "boxSharp.topLeft" | "boxSharp.topRight" | "boxSharp.bottomLeft" | "boxSharp.bottomRight" | "boxSharp.horizontal" | "boxSharp.vertical" | "boxSharp.cross" | "boxSharp.teeDown" | "boxSharp.teeUp" | "boxSharp.teeRight" | "boxSharp.teeLeft" | "sep.powerline" | "sep.powerlineThin" | "sep.powerlineLeft" | "sep.powerlineRight" | "sep.powerlineThinLeft" | "sep.powerlineThinRight" | "sep.block" | "sep.space" | "sep.asciiLeft" | "sep.asciiRight" | "sep.dot" | "sep.slash" | "sep.pipe" | "icon.model" | "icon.plan" | "icon.goal" | "icon.pause" | "icon.loop" | "icon.folder" | "icon.scratchFolder" | "icon.file" | "icon.git" | "icon.branch" | "icon.pr" | "icon.tokens" | "icon.context" | "icon.cost" | "icon.time" | "icon.pi" | "icon.agents" | "icon.cache" | "icon.input" | "icon.output" | "icon.host" | "icon.session" | "icon.package" | "icon.warning" | "icon.rewind" | "icon.auto" | "icon.fast" | "icon.extensionSkill" | "icon.extensionTool" | "icon.extensionSlashCommand" | "icon.extensionMcp" | "icon.extensionRule" | "icon.extensionHook" | "icon.extensionPrompt" | "icon.extensionContextFile" | "icon.extensionInstruction" | "icon.mic" | "thinking.minimal" | "thinking.low" | "thinking.medium" | "thinking.high" | "thinking.xhigh" | "checkbox.checked" | "checkbox.unchecked" | "format.bullet" | "format.dash" | "format.bracketLeft" | "format.bracketRight" | "md.quoteBorder" | "md.hrChar" | "md.bullet" | "md.colorSwatch" | "lang.default" | "lang.typescript" | "lang.javascript" | "lang.python" | "lang.rust" | "lang.go" | "lang.java" | "lang.c" | "lang.cpp" | "lang.csharp" | "lang.ruby" | "lang.php" | "lang.swift" | "lang.kotlin" | "lang.shell" | "lang.html" | "lang.css" | "lang.json" | "lang.yaml" | "lang.markdown" | "lang.sql" | "lang.docker" | "lang.lua" | "lang.text" | "lang.env" | "lang.toml" | "lang.xml" | "lang.ini" | "lang.conf" | "lang.log" | "lang.csv" | "lang.tsv" | "lang.image" | "lang.pdf" | "lang.archive" | "lang.binary" | "tab.appearance" | "tab.model" | "tab.interaction" | "tab.context" | "tab.editing" | "tab.tools" | "tab.memory" | "tab.tasks" | "tab.providers";
|
|
10
10
|
export type SpinnerType = "status" | "activity";
|
|
11
11
|
export type ThemeColor = "accent" | "border" | "borderAccent" | "borderMuted" | "success" | "error" | "warning" | "muted" | "dim" | "text" | "thinkingText" | "userMessageText" | "customMessageText" | "customMessageLabel" | "toolTitle" | "toolOutput" | "mdHeading" | "mdLink" | "mdLinkUrl" | "mdCode" | "mdCodeBlock" | "mdCodeBlockBorder" | "mdQuote" | "mdQuoteBorder" | "mdHr" | "mdListBullet" | "toolDiffAdded" | "toolDiffRemoved" | "toolDiffContext" | "syntaxComment" | "syntaxKeyword" | "syntaxFunction" | "syntaxVariable" | "syntaxString" | "syntaxNumber" | "syntaxType" | "syntaxOperator" | "syntaxPunctuation" | "thinkingOff" | "thinkingMinimal" | "thinkingLow" | "thinkingMedium" | "thinkingHigh" | "thinkingXhigh" | "bashMode" | "pythonMode" | "statusLineSep" | "statusLineModel" | "statusLinePath" | "statusLineGitClean" | "statusLineGitDirty" | "statusLineContext" | "statusLineSpend" | "statusLineStaged" | "statusLineDirty" | "statusLineUntracked" | "statusLineOutput" | "statusLineCost" | "statusLineSubagents";
|
|
12
12
|
/** Check if a string is a valid ThemeColor value */
|
|
@@ -165,6 +165,7 @@ export declare class Theme {
|
|
|
165
165
|
quoteBorder: string;
|
|
166
166
|
hrChar: string;
|
|
167
167
|
bullet: string;
|
|
168
|
+
colorSwatch: string;
|
|
168
169
|
};
|
|
169
170
|
/**
|
|
170
171
|
* Default spinner frames (status spinner).
|
|
@@ -262,6 +262,8 @@ export interface SessionStats {
|
|
|
262
262
|
premiumRequests: number;
|
|
263
263
|
cost: number;
|
|
264
264
|
}
|
|
265
|
+
export declare const ANTHROPIC_TOOL_CALL_BATCH_CAP = 4;
|
|
266
|
+
export declare function resolveToolCallBatchCapForModel(model: Model | undefined): number | undefined;
|
|
265
267
|
export declare class AgentSession {
|
|
266
268
|
#private;
|
|
267
269
|
readonly agent: Agent;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-coding-agent",
|
|
4
|
-
"version": "15.5.
|
|
4
|
+
"version": "15.5.15",
|
|
5
5
|
"description": "Coding agent CLI with read, bash, edit, write tools and session management",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -47,13 +47,13 @@
|
|
|
47
47
|
"@agentclientprotocol/sdk": "0.21.0",
|
|
48
48
|
"@babel/parser": "^7.29.3",
|
|
49
49
|
"@mozilla/readability": "^0.6.0",
|
|
50
|
-
"@oh-my-pi/hashline": "15.5.
|
|
51
|
-
"@oh-my-pi/omp-stats": "15.5.
|
|
52
|
-
"@oh-my-pi/pi-agent-core": "15.5.
|
|
53
|
-
"@oh-my-pi/pi-ai": "15.5.
|
|
54
|
-
"@oh-my-pi/pi-natives": "15.5.
|
|
55
|
-
"@oh-my-pi/pi-tui": "15.5.
|
|
56
|
-
"@oh-my-pi/pi-utils": "15.5.
|
|
50
|
+
"@oh-my-pi/hashline": "15.5.15",
|
|
51
|
+
"@oh-my-pi/omp-stats": "15.5.15",
|
|
52
|
+
"@oh-my-pi/pi-agent-core": "15.5.15",
|
|
53
|
+
"@oh-my-pi/pi-ai": "15.5.15",
|
|
54
|
+
"@oh-my-pi/pi-natives": "15.5.15",
|
|
55
|
+
"@oh-my-pi/pi-tui": "15.5.15",
|
|
56
|
+
"@oh-my-pi/pi-utils": "15.5.15",
|
|
57
57
|
"@puppeteer/browsers": "^2.13.0",
|
|
58
58
|
"@types/turndown": "5.0.6",
|
|
59
59
|
"@xterm/headless": "^6.0.0",
|
|
@@ -192,7 +192,7 @@ function validateProviderConfiguration(
|
|
|
192
192
|
}
|
|
193
193
|
}
|
|
194
194
|
|
|
195
|
-
if (mode === "models-config" && config.discovery && !config.api) {
|
|
195
|
+
if (mode === "models-config" && config.discovery && !config.api && config.discovery.type !== "proxy") {
|
|
196
196
|
throw new Error(`Provider ${providerName}: "api" is required when discovery is enabled at provider level.`);
|
|
197
197
|
}
|
|
198
198
|
|
|
@@ -1209,13 +1209,17 @@ export class ModelRegistry {
|
|
|
1209
1209
|
keylessProviders.add(providerName);
|
|
1210
1210
|
}
|
|
1211
1211
|
|
|
1212
|
-
if (providerConfig.discovery && providerConfig.api) {
|
|
1212
|
+
if (providerConfig.discovery && (providerConfig.api || providerConfig.discovery.type === "proxy")) {
|
|
1213
|
+
const disableStrictCompat = providerConfig.disableStrictTools ? { disableStrictTools: true } : undefined;
|
|
1213
1214
|
discoverableProviders.push({
|
|
1214
1215
|
provider: providerName,
|
|
1215
|
-
|
|
1216
|
+
// Proxy discovery derives per-model api from /v1/models's
|
|
1217
|
+
// supported_endpoint_types; the provider-level api is only a
|
|
1218
|
+
// fallback for entries that don't advertise one.
|
|
1219
|
+
api: (providerConfig.api ?? "openai-completions") as Api,
|
|
1216
1220
|
baseUrl: providerConfig.baseUrl,
|
|
1217
1221
|
headers: providerConfig.headers,
|
|
1218
|
-
compat: providerConfig.compat,
|
|
1222
|
+
compat: mergeCompat(providerConfig.compat, disableStrictCompat),
|
|
1219
1223
|
discovery: providerConfig.discovery,
|
|
1220
1224
|
optional: false,
|
|
1221
1225
|
});
|
|
@@ -1385,6 +1389,8 @@ export class ModelRegistry {
|
|
|
1385
1389
|
case "lm-studio":
|
|
1386
1390
|
case "openai-models-list":
|
|
1387
1391
|
return this.#discoverOpenAIModelsList(providerConfig);
|
|
1392
|
+
case "proxy":
|
|
1393
|
+
return this.#discoverProxyModels(providerConfig);
|
|
1388
1394
|
}
|
|
1389
1395
|
}
|
|
1390
1396
|
|
|
@@ -1711,7 +1717,7 @@ export class ModelRegistry {
|
|
|
1711
1717
|
|
|
1712
1718
|
const response = await fetch(modelsUrl, {
|
|
1713
1719
|
headers,
|
|
1714
|
-
signal: AbortSignal.timeout(
|
|
1720
|
+
signal: AbortSignal.timeout(10_000),
|
|
1715
1721
|
});
|
|
1716
1722
|
if (!response.ok) {
|
|
1717
1723
|
throw new Error(`HTTP ${response.status} from ${modelsUrl}`);
|
|
@@ -1746,6 +1752,84 @@ export class ModelRegistry {
|
|
|
1746
1752
|
return this.#applyProviderModelOverrides(providerConfig.provider, discovered);
|
|
1747
1753
|
}
|
|
1748
1754
|
|
|
1755
|
+
/**
|
|
1756
|
+
* Discover models from an Anthropic+OpenAI-compatible reseller proxy that
|
|
1757
|
+
* exposes both `/v1/messages` and `/v1/chat/completions`, advertising each
|
|
1758
|
+
* model's wire capabilities through `supported_endpoint_types` on
|
|
1759
|
+
* `GET /v1/models` (new-api / one-api-style proxies).
|
|
1760
|
+
*
|
|
1761
|
+
* Routing per model:
|
|
1762
|
+
* supported_endpoint_types: ["anthropic", ...] -> api: "anthropic-messages"
|
|
1763
|
+
* supported_endpoint_types: ["openai"] -> api: "openai-completions"
|
|
1764
|
+
* missing / neither -> provider-level api fallback
|
|
1765
|
+
*
|
|
1766
|
+
* Anthropic models share the same baseUrl; the Anthropic SDK strips a
|
|
1767
|
+
* trailing `/v1` itself before appending `/v1/messages`, so the discovery
|
|
1768
|
+
* URL (which ends in `/v1`) round-trips correctly.
|
|
1769
|
+
*/
|
|
1770
|
+
async #discoverProxyModels(providerConfig: DiscoveryProviderConfig): Promise<Model<Api>[]> {
|
|
1771
|
+
const baseUrl = this.#normalizeOpenAIModelsListBaseUrl(providerConfig.baseUrl);
|
|
1772
|
+
const modelsUrl = `${baseUrl}/models`;
|
|
1773
|
+
|
|
1774
|
+
const headers: Record<string, string> = { ...(providerConfig.headers ?? {}) };
|
|
1775
|
+
const apiKey = await this.authStorage.getApiKey(providerConfig.provider);
|
|
1776
|
+
if (apiKey && apiKey !== DEFAULT_LOCAL_TOKEN && apiKey !== kNoAuth) {
|
|
1777
|
+
headers.Authorization = `Bearer ${apiKey}`;
|
|
1778
|
+
}
|
|
1779
|
+
|
|
1780
|
+
const response = await fetch(modelsUrl, {
|
|
1781
|
+
headers,
|
|
1782
|
+
signal: AbortSignal.timeout(10_000),
|
|
1783
|
+
});
|
|
1784
|
+
if (!response.ok) {
|
|
1785
|
+
throw new Error(`HTTP ${response.status} from ${modelsUrl}`);
|
|
1786
|
+
}
|
|
1787
|
+
const payload = (await response.json()) as {
|
|
1788
|
+
data?: Array<{ id?: string; supported_endpoint_types?: string[] }>;
|
|
1789
|
+
};
|
|
1790
|
+
const items = payload.data ?? [];
|
|
1791
|
+
const discovered: Model<Api>[] = [];
|
|
1792
|
+
for (const item of items) {
|
|
1793
|
+
const id = item.id;
|
|
1794
|
+
if (!id) continue;
|
|
1795
|
+
const endpoints = item.supported_endpoint_types ?? [];
|
|
1796
|
+
const api: Api | undefined = endpoints.includes("anthropic")
|
|
1797
|
+
? "anthropic-messages"
|
|
1798
|
+
: endpoints.includes("openai")
|
|
1799
|
+
? "openai-completions"
|
|
1800
|
+
: providerConfig.api;
|
|
1801
|
+
if (!api) continue;
|
|
1802
|
+
const isAnthropic = api === "anthropic-messages";
|
|
1803
|
+
discovered.push(
|
|
1804
|
+
enrichModelThinking({
|
|
1805
|
+
id,
|
|
1806
|
+
name: id,
|
|
1807
|
+
api,
|
|
1808
|
+
provider: providerConfig.provider,
|
|
1809
|
+
baseUrl,
|
|
1810
|
+
reasoning: false,
|
|
1811
|
+
input: ["text"],
|
|
1812
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
1813
|
+
contextWindow: 128000,
|
|
1814
|
+
maxTokens: 8192,
|
|
1815
|
+
headers,
|
|
1816
|
+
// OpenAI-compat fields are no-ops on anthropic models; the
|
|
1817
|
+
// Anthropic SDK ignores them. Provider-level disableStrictTools
|
|
1818
|
+
// flows in via #applyProviderCompat for the third-party-Anthropic
|
|
1819
|
+
// path.
|
|
1820
|
+
compat: isAnthropic
|
|
1821
|
+
? undefined
|
|
1822
|
+
: {
|
|
1823
|
+
supportsStore: false,
|
|
1824
|
+
supportsDeveloperRole: false,
|
|
1825
|
+
supportsReasoningEffort: false,
|
|
1826
|
+
},
|
|
1827
|
+
}),
|
|
1828
|
+
);
|
|
1829
|
+
}
|
|
1830
|
+
return this.#applyProviderModelOverrides(providerConfig.provider, discovered);
|
|
1831
|
+
}
|
|
1832
|
+
|
|
1749
1833
|
#normalizeLlamaCppBaseUrl(baseUrl?: string): string {
|
|
1750
1834
|
const defaultBaseUrl = "http://127.0.0.1:8080";
|
|
1751
1835
|
const raw = baseUrl || defaultBaseUrl;
|
|
@@ -121,7 +121,7 @@ export const ModelOverrideSchema = z.object({
|
|
|
121
121
|
export type ModelOverride = z.infer<typeof ModelOverrideSchema>;
|
|
122
122
|
|
|
123
123
|
export const ProviderDiscoverySchema = z.object({
|
|
124
|
-
type: z.enum(["ollama", "llama.cpp", "lm-studio", "openai-models-list"]),
|
|
124
|
+
type: z.enum(["ollama", "llama.cpp", "lm-studio", "openai-models-list", "proxy"]),
|
|
125
125
|
});
|
|
126
126
|
|
|
127
127
|
export const ProviderAuthSchema = z.enum(["apiKey", "none", "oauth"]);
|
|
@@ -81,6 +81,7 @@ export type StatusLineSegmentId =
|
|
|
81
81
|
| "hostname"
|
|
82
82
|
| "cache_read"
|
|
83
83
|
| "cache_write"
|
|
84
|
+
| "cache_hit"
|
|
84
85
|
| "session_name"
|
|
85
86
|
| "usage";
|
|
86
87
|
|
|
@@ -2016,16 +2017,6 @@ export const SETTINGS_SCHEMA = {
|
|
|
2016
2017
|
},
|
|
2017
2018
|
},
|
|
2018
2019
|
|
|
2019
|
-
"calc.enabled": {
|
|
2020
|
-
type: "boolean",
|
|
2021
|
-
default: false,
|
|
2022
|
-
ui: {
|
|
2023
|
-
tab: "tools",
|
|
2024
|
-
label: "Calculator",
|
|
2025
|
-
description: "Enable the calculator tool for basic calculations",
|
|
2026
|
-
},
|
|
2027
|
-
},
|
|
2028
|
-
|
|
2029
2020
|
"tts.enabled": {
|
|
2030
2021
|
type: "boolean",
|
|
2031
2022
|
default: false,
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
import { afterAll, afterEach, describe, expect, it, vi } from "bun:test";
|
|
2
|
+
import * as path from "node:path";
|
|
3
|
+
import type { Api, AssistantMessage, Model } from "@oh-my-pi/pi-ai";
|
|
4
|
+
import * as ai from "@oh-my-pi/pi-ai";
|
|
5
|
+
import { Effort } from "@oh-my-pi/pi-ai";
|
|
6
|
+
import { TempDir } from "@oh-my-pi/pi-utils";
|
|
7
|
+
import type { ModelRegistry } from "../../config/model-registry";
|
|
8
|
+
import { Settings } from "../../config/settings";
|
|
9
|
+
import type { ToolSession } from "../../tools";
|
|
10
|
+
import { ToolError } from "../../tools/tool-errors";
|
|
11
|
+
import { disposeAllVmContexts } from "../js/context-manager";
|
|
12
|
+
import { executeJs } from "../js/executor";
|
|
13
|
+
import { runEvalLlm } from "../llm-bridge";
|
|
14
|
+
import { disposeAllKernelSessions, executePython } from "../py/executor";
|
|
15
|
+
|
|
16
|
+
function makeModel(provider: string, id: string, extra: Partial<Model<Api>> = {}): Model<Api> {
|
|
17
|
+
return {
|
|
18
|
+
id,
|
|
19
|
+
name: id,
|
|
20
|
+
api: "openai-responses",
|
|
21
|
+
provider,
|
|
22
|
+
baseUrl: "https://example.test/v1",
|
|
23
|
+
reasoning: false,
|
|
24
|
+
input: ["text"],
|
|
25
|
+
cost: { input: 1, output: 1, cacheRead: 0, cacheWrite: 1 },
|
|
26
|
+
contextWindow: 128000,
|
|
27
|
+
maxTokens: 4096,
|
|
28
|
+
...extra,
|
|
29
|
+
} as Model<Api>;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const SMOL = makeModel("p", "smol");
|
|
33
|
+
const DEFAULT = makeModel("p", "default");
|
|
34
|
+
const SLOW = makeModel("p", "slow");
|
|
35
|
+
const REASONING_SLOW = makeModel("p", "slow", {
|
|
36
|
+
api: "anthropic-messages",
|
|
37
|
+
reasoning: true,
|
|
38
|
+
thinking: { minLevel: Effort.Low, maxLevel: Effort.High, mode: "anthropic-adaptive" },
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
interface SessionOptions {
|
|
42
|
+
available?: Model<Api>[];
|
|
43
|
+
apiKey?: string | null;
|
|
44
|
+
activeModel?: string;
|
|
45
|
+
roles?: Partial<Record<"smol" | "default" | "slow", string>>;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function makeSession(opts: SessionOptions = {}): ToolSession {
|
|
49
|
+
const settings = Settings.isolated({ "async.enabled": false, "task.isolation.mode": "none" });
|
|
50
|
+
const roles = opts.roles ?? { smol: "p/smol", slow: "p/slow" };
|
|
51
|
+
for (const role in roles) {
|
|
52
|
+
const value = roles[role as keyof typeof roles];
|
|
53
|
+
if (value) settings.setModelRole(role, value);
|
|
54
|
+
}
|
|
55
|
+
const modelRegistry = {
|
|
56
|
+
getAvailable: () => opts.available ?? [SMOL, DEFAULT, SLOW],
|
|
57
|
+
getApiKey: async () => (opts.apiKey === undefined ? "test-key" : opts.apiKey),
|
|
58
|
+
} as unknown as ModelRegistry;
|
|
59
|
+
return {
|
|
60
|
+
settings,
|
|
61
|
+
modelRegistry,
|
|
62
|
+
getActiveModelString: () => opts.activeModel ?? "p/default",
|
|
63
|
+
} as unknown as ToolSession;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function assistant(opts: {
|
|
67
|
+
text?: string;
|
|
68
|
+
toolCall?: { name: string; arguments: Record<string, unknown> };
|
|
69
|
+
stopReason?: AssistantMessage["stopReason"];
|
|
70
|
+
errorMessage?: string;
|
|
71
|
+
}): AssistantMessage {
|
|
72
|
+
const content: AssistantMessage["content"] = [];
|
|
73
|
+
if (opts.text) content.push({ type: "text", text: opts.text });
|
|
74
|
+
if (opts.toolCall) {
|
|
75
|
+
content.push({ type: "toolCall", id: "tc-1", name: opts.toolCall.name, arguments: opts.toolCall.arguments });
|
|
76
|
+
}
|
|
77
|
+
return {
|
|
78
|
+
role: "assistant",
|
|
79
|
+
content,
|
|
80
|
+
api: "openai-responses",
|
|
81
|
+
provider: "p",
|
|
82
|
+
model: "default",
|
|
83
|
+
usage: {
|
|
84
|
+
input: 0,
|
|
85
|
+
output: 0,
|
|
86
|
+
cacheRead: 0,
|
|
87
|
+
cacheWrite: 0,
|
|
88
|
+
totalTokens: 0,
|
|
89
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
90
|
+
},
|
|
91
|
+
stopReason: opts.stopReason ?? "stop",
|
|
92
|
+
errorMessage: opts.errorMessage,
|
|
93
|
+
timestamp: Date.now(),
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
describe("runEvalLlm", () => {
|
|
98
|
+
afterEach(() => {
|
|
99
|
+
vi.restoreAllMocks();
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it("resolves each tier to its expected model", async () => {
|
|
103
|
+
const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
|
|
104
|
+
const session = makeSession();
|
|
105
|
+
|
|
106
|
+
await runEvalLlm({ prompt: "q", model: "smol" }, { session });
|
|
107
|
+
await runEvalLlm({ prompt: "q", model: "default" }, { session });
|
|
108
|
+
await runEvalLlm({ prompt: "q", model: "slow" }, { session });
|
|
109
|
+
|
|
110
|
+
const resolved = spy.mock.calls.map(call => {
|
|
111
|
+
const model = call[0] as Model<Api>;
|
|
112
|
+
return `${model.provider}/${model.id}`;
|
|
113
|
+
});
|
|
114
|
+
expect(resolved).toEqual(["p/smol", "p/default", "p/slow"]);
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
it("prefers the session active model for the default tier, falling back to pi/default", async () => {
|
|
118
|
+
const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
|
|
119
|
+
const session = makeSession({ available: [SMOL, DEFAULT, SLOW], activeModel: "p/slow" });
|
|
120
|
+
|
|
121
|
+
await runEvalLlm({ prompt: "q", model: "default" }, { session });
|
|
122
|
+
|
|
123
|
+
const model = spy.mock.calls[0]?.[0] as Model<Api>;
|
|
124
|
+
expect(`${model.provider}/${model.id}`).toBe("p/slow");
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
it("returns the completion text in plain mode", async () => {
|
|
128
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "the answer" }));
|
|
129
|
+
const result = await runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() });
|
|
130
|
+
expect(result.text).toBe("the answer");
|
|
131
|
+
expect(result.details).toEqual({ model: "p/smol", tier: "smol", structured: false });
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it("forces a respond tool call and returns its arguments in structured mode", async () => {
|
|
135
|
+
const spy = vi
|
|
136
|
+
.spyOn(ai, "completeSimple")
|
|
137
|
+
.mockResolvedValue(assistant({ toolCall: { name: "respond", arguments: { answer: 42 } } }));
|
|
138
|
+
const result = await runEvalLlm(
|
|
139
|
+
{ prompt: "q", model: "smol", schema: { type: "object", properties: { answer: { type: "number" } } } },
|
|
140
|
+
{ session: makeSession() },
|
|
141
|
+
);
|
|
142
|
+
|
|
143
|
+
expect(JSON.parse(result.text)).toEqual({ answer: 42 });
|
|
144
|
+
expect(result.details.structured).toBe(true);
|
|
145
|
+
|
|
146
|
+
const ctx = spy.mock.calls[0]?.[1] as { tools?: Array<{ name: string }> };
|
|
147
|
+
const opts = spy.mock.calls[0]?.[2] as { toolChoice?: unknown };
|
|
148
|
+
expect(ctx.tools?.[0]?.name).toBe("respond");
|
|
149
|
+
expect(opts.toolChoice).toEqual({ type: "tool", name: "respond" });
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
it("falls back to JSON embedded in text when the model skips the respond tool", async () => {
|
|
153
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: 'here: {"answer": 7}' }));
|
|
154
|
+
const result = await runEvalLlm(
|
|
155
|
+
{ prompt: "q", model: "smol", schema: { type: "object" } },
|
|
156
|
+
{ session: makeSession() },
|
|
157
|
+
);
|
|
158
|
+
expect(JSON.parse(result.text)).toEqual({ answer: 7 });
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
it("requests reasoning only for the slow tier on a reasoning-capable model", async () => {
|
|
162
|
+
const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
|
|
163
|
+
const session = makeSession({ available: [SMOL, DEFAULT, REASONING_SLOW] });
|
|
164
|
+
|
|
165
|
+
await runEvalLlm({ prompt: "q", model: "smol" }, { session });
|
|
166
|
+
await runEvalLlm({ prompt: "q", model: "slow" }, { session });
|
|
167
|
+
|
|
168
|
+
const smolOpts = spy.mock.calls[0]?.[2] as { reasoning?: unknown };
|
|
169
|
+
const slowOpts = spy.mock.calls[1]?.[2] as { reasoning?: unknown };
|
|
170
|
+
expect(smolOpts.reasoning).toBeUndefined();
|
|
171
|
+
expect(slowOpts.reasoning).toBe(Effort.High);
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
it("does not request reasoning for the slow tier on a non-reasoning model", async () => {
|
|
175
|
+
const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
|
|
176
|
+
// SLOW is reasoning:false — must not trip requireSupportedEffort downstream.
|
|
177
|
+
const result = await runEvalLlm({ prompt: "q", model: "slow" }, { session: makeSession() });
|
|
178
|
+
expect(result.text).toBe("ok");
|
|
179
|
+
const opts = spy.mock.calls[0]?.[2] as { reasoning?: unknown };
|
|
180
|
+
expect(opts.reasoning).toBeUndefined();
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
it("throws ToolError on invalid arguments", async () => {
|
|
184
|
+
await expect(runEvalLlm({ prompt: "" }, { session: makeSession() })).rejects.toBeInstanceOf(ToolError);
|
|
185
|
+
await expect(runEvalLlm({ prompt: "q", model: "huge" }, { session: makeSession() })).rejects.toBeInstanceOf(
|
|
186
|
+
ToolError,
|
|
187
|
+
);
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
it("throws ToolError when no model resolves for the tier", async () => {
|
|
191
|
+
const session = makeSession({ available: [DEFAULT], roles: { smol: "missing/model" } });
|
|
192
|
+
await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
it("throws ToolError when the resolved model has no API key", async () => {
|
|
196
|
+
const session = makeSession({ apiKey: null });
|
|
197
|
+
await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
it("maps error and aborted stop reasons to ToolError", async () => {
|
|
201
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValueOnce(assistant({ stopReason: "error", errorMessage: "boom" }));
|
|
202
|
+
await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toThrow("boom");
|
|
203
|
+
|
|
204
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValueOnce(assistant({ stopReason: "aborted" }));
|
|
205
|
+
await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toBeInstanceOf(
|
|
206
|
+
ToolError,
|
|
207
|
+
);
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
it("throws ToolError when plain mode produces no text", async () => {
|
|
211
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "" }));
|
|
212
|
+
await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toBeInstanceOf(
|
|
213
|
+
ToolError,
|
|
214
|
+
);
|
|
215
|
+
});
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
describe("llm() through eval runtimes", () => {
|
|
219
|
+
afterEach(() => {
|
|
220
|
+
vi.restoreAllMocks();
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
afterAll(async () => {
|
|
224
|
+
await disposeAllVmContexts();
|
|
225
|
+
await disposeAllKernelSessions();
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
it("exposes llm() in the JavaScript runtime", async () => {
|
|
229
|
+
using tempDir = TempDir.createSync("@omp-eval-llm-js-");
|
|
230
|
+
const sessionFile = path.join(tempDir.path(), "session.jsonl");
|
|
231
|
+
const sessionId = `js-llm:${crypto.randomUUID()}`;
|
|
232
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "hello from smol" }));
|
|
233
|
+
|
|
234
|
+
const result = await executeJs('return await llm("hi", { model: "smol" });', {
|
|
235
|
+
cwd: tempDir.path(),
|
|
236
|
+
sessionId,
|
|
237
|
+
session: makeSession(),
|
|
238
|
+
sessionFile,
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
expect(result.exitCode).toBe(0);
|
|
242
|
+
expect(result.output.trim()).toBe("hello from smol");
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
it("parses structured llm() output in the JavaScript runtime", async () => {
|
|
246
|
+
using tempDir = TempDir.createSync("@omp-eval-llm-js-struct-");
|
|
247
|
+
const sessionFile = path.join(tempDir.path(), "session.jsonl");
|
|
248
|
+
const sessionId = `js-llm-struct:${crypto.randomUUID()}`;
|
|
249
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValue(
|
|
250
|
+
assistant({ toolCall: { name: "respond", arguments: { ok: true, n: 3 } } }),
|
|
251
|
+
);
|
|
252
|
+
|
|
253
|
+
const result = await executeJs(
|
|
254
|
+
'const r = await llm("hi", { schema: { type: "object" } }); return JSON.stringify(r);',
|
|
255
|
+
{ cwd: tempDir.path(), sessionId, session: makeSession(), sessionFile },
|
|
256
|
+
);
|
|
257
|
+
|
|
258
|
+
expect(result.exitCode).toBe(0);
|
|
259
|
+
expect(JSON.parse(result.output.trim())).toEqual({ ok: true, n: 3 });
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
it("exposes llm() in the Python runtime", async () => {
|
|
263
|
+
using tempDir = TempDir.createSync("@omp-eval-llm-py-");
|
|
264
|
+
const sessionFile = path.join(tempDir.path(), "session.jsonl");
|
|
265
|
+
const sessionId = `py-llm:${crypto.randomUUID()}`;
|
|
266
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "hello from python" }));
|
|
267
|
+
|
|
268
|
+
const result = await executePython('print(llm("hi", model="smol"))', {
|
|
269
|
+
cwd: tempDir.path(),
|
|
270
|
+
sessionId,
|
|
271
|
+
sessionFile,
|
|
272
|
+
toolSession: makeSession(),
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
expect(result.exitCode).toBe(0);
|
|
276
|
+
expect(result.output.trim()).toBe("hello from python");
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
it("parses structured llm() output in the Python runtime", async () => {
|
|
280
|
+
using tempDir = TempDir.createSync("@omp-eval-llm-py-struct-");
|
|
281
|
+
const sessionFile = path.join(tempDir.path(), "session.jsonl");
|
|
282
|
+
const sessionId = `py-llm-struct:${crypto.randomUUID()}`;
|
|
283
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValue(
|
|
284
|
+
assistant({ toolCall: { name: "respond", arguments: { ok: true } } }),
|
|
285
|
+
);
|
|
286
|
+
|
|
287
|
+
const result = await executePython('import json\nprint(json.dumps(llm("hi", schema={"type": "object"})))', {
|
|
288
|
+
cwd: tempDir.path(),
|
|
289
|
+
sessionId,
|
|
290
|
+
sessionFile,
|
|
291
|
+
toolSession: makeSession(),
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
expect(result.exitCode).toBe(0);
|
|
295
|
+
expect(JSON.parse(result.output.trim())).toEqual({ ok: true });
|
|
296
|
+
});
|
|
297
|
+
});
|
|
@@ -39,6 +39,13 @@ if (!globalThis.__omp_js_prelude_loaded__) {
|
|
|
39
39
|
return values.length === 1 ? values[0] : values;
|
|
40
40
|
};
|
|
41
41
|
|
|
42
|
+
const llm = async (prompt, opts = {}) => {
|
|
43
|
+
const o = toOptions(opts);
|
|
44
|
+
const res = await globalThis.__omp_call_tool__("__llm__", { prompt, ...o });
|
|
45
|
+
const text = res && typeof res === "object" ? res.text : res;
|
|
46
|
+
return o.schema ? JSON.parse(text) : text;
|
|
47
|
+
};
|
|
48
|
+
|
|
42
49
|
const display = value => {
|
|
43
50
|
globalThis.__omp_display__(value);
|
|
44
51
|
};
|
|
@@ -61,6 +68,7 @@ if (!globalThis.__omp_js_prelude_loaded__) {
|
|
|
61
68
|
globalThis.print = consoleBridge.log;
|
|
62
69
|
globalThis.display = display;
|
|
63
70
|
globalThis.tool = tool;
|
|
71
|
+
globalThis.llm = llm;
|
|
64
72
|
globalThis.output = output;
|
|
65
73
|
globalThis.read = read;
|
|
66
74
|
globalThis.write = write;
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { AgentTool, AgentToolResult } from "@oh-my-pi/pi-agent-core";
|
|
2
2
|
import type { ToolSession } from "../../tools";
|
|
3
3
|
import { ToolError } from "../../tools/tool-errors";
|
|
4
|
+
import { EVAL_LLM_BRIDGE_NAME, runEvalLlm } from "../llm-bridge";
|
|
4
5
|
import type { JsStatusEvent } from "./shared/types";
|
|
5
6
|
|
|
6
7
|
export type { JsStatusEvent } from "./shared/types";
|
|
@@ -101,6 +102,9 @@ function summarizeToolResult(
|
|
|
101
102
|
}
|
|
102
103
|
|
|
103
104
|
export async function callSessionTool(name: string, args: unknown, options: ToolBridgeOptions): Promise<ToolValue> {
|
|
105
|
+
if (name === EVAL_LLM_BRIDGE_NAME) {
|
|
106
|
+
return await runEvalLlm(args, options);
|
|
107
|
+
}
|
|
104
108
|
const tool = getTool(options.session, name);
|
|
105
109
|
const normalizedArgs = normalizeArgs(args);
|
|
106
110
|
const toolCallId = `js-${name}-${crypto.randomUUID()}`;
|