@oh-my-pi/pi-ai 14.7.0 → 14.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [14.7.2] - 2026-05-06
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
|
|
9
|
+
- Fixed VLLM model discovery to use `max_model_len` as the context window when the endpoint reports it.
|
|
10
|
+
- Fixed custom Ollama Cloud/local-proxy model aliases (for example `deepseek-v4-pro:cloud`) to inherit bundled cache-pricing metadata when the upstream model is known ([#937](https://github.com/can1357/oh-my-pi/issues/937)).
|
|
11
|
+
- Fixed local Ollama model discovery to apply `/api/show` thinking and vision capabilities in addition to native context windows ([#928](https://github.com/can1357/oh-my-pi/issues/928)).
|
|
12
|
+
|
|
5
13
|
## [14.7.0] - 2026-05-04
|
|
6
14
|
### Breaking Changes
|
|
7
15
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "14.7.
|
|
4
|
+
"version": "14.7.2",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://github.com/can1357/oh-my-pi",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -41,24 +41,24 @@
|
|
|
41
41
|
"generate-models": "bun scripts/generate-models.ts"
|
|
42
42
|
},
|
|
43
43
|
"dependencies": {
|
|
44
|
-
"@anthropic-ai/sdk": "^0.
|
|
45
|
-
"@aws-sdk/client-bedrock-runtime": "^3.
|
|
46
|
-
"@aws-sdk/credential-provider-node": "^3.972.
|
|
44
|
+
"@anthropic-ai/sdk": "^0.94.0",
|
|
45
|
+
"@aws-sdk/client-bedrock-runtime": "^3.1043.0",
|
|
46
|
+
"@aws-sdk/credential-provider-node": "^3.972.39",
|
|
47
47
|
"@bufbuild/protobuf": "^2.12.0",
|
|
48
|
-
"@google/genai": "^1.
|
|
49
|
-
"@oh-my-pi/pi-natives": "14.7.
|
|
50
|
-
"@oh-my-pi/pi-utils": "14.7.
|
|
48
|
+
"@google/genai": "^1.52.0",
|
|
49
|
+
"@oh-my-pi/pi-natives": "14.7.2",
|
|
50
|
+
"@oh-my-pi/pi-utils": "14.7.2",
|
|
51
51
|
"@sinclair/typebox": "^0.34.49",
|
|
52
52
|
"@smithy/node-http-handler": "^4.6.1",
|
|
53
53
|
"ajv": "^8.20.0",
|
|
54
54
|
"ajv-formats": "^3.0.1",
|
|
55
|
-
"openai": "^6.
|
|
55
|
+
"openai": "^6.36.0",
|
|
56
56
|
"partial-json": "^0.1.7",
|
|
57
57
|
"proxy-agent": "^8.0.1",
|
|
58
|
-
"zod": "4.3
|
|
58
|
+
"zod": "4.4.3"
|
|
59
59
|
},
|
|
60
60
|
"devDependencies": {
|
|
61
|
-
"@types/bun": "^1.3"
|
|
61
|
+
"@types/bun": "^1.3.13"
|
|
62
62
|
},
|
|
63
63
|
"engines": {
|
|
64
64
|
"bun": ">=1.3.7"
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { ModelManagerOptions } from "../model-manager";
|
|
2
|
+
import { Effort } from "../model-thinking";
|
|
2
3
|
import { getBundledModels } from "../models";
|
|
3
|
-
import type { Api, Model } from "../types";
|
|
4
|
+
import type { Api, Model, ThinkingConfig } from "../types";
|
|
4
5
|
import { isAnthropicOAuthToken, isRecord, toBoolean, toNumber, toPositiveNumber } from "../utils";
|
|
5
6
|
import {
|
|
6
7
|
fetchOpenAICompatibleModels,
|
|
@@ -192,7 +193,7 @@ function toOllamaNativeBaseUrl(baseUrl: string): string {
|
|
|
192
193
|
|
|
193
194
|
async function fetchOllamaNativeModels(
|
|
194
195
|
baseUrl: string,
|
|
195
|
-
|
|
196
|
+
resolveMetadata: (modelId: string) => Promise<OllamaResolvedMetadata>,
|
|
196
197
|
): Promise<Model<"openai-responses">[] | null> {
|
|
197
198
|
const nativeBaseUrl = toOllamaNativeBaseUrl(baseUrl);
|
|
198
199
|
let response: Response;
|
|
@@ -213,18 +214,19 @@ async function fetchOllamaNativeModels(
|
|
|
213
214
|
entries.map(async (entry): Promise<Model<"openai-responses"> | null> => {
|
|
214
215
|
const id = entry.model ?? entry.name;
|
|
215
216
|
if (!id) return null;
|
|
216
|
-
const
|
|
217
|
+
const metadata = await resolveMetadata(id);
|
|
217
218
|
return {
|
|
218
219
|
id,
|
|
219
220
|
name: entry.name ?? id,
|
|
220
221
|
api: "openai-responses",
|
|
221
222
|
provider: "ollama",
|
|
222
223
|
baseUrl,
|
|
223
|
-
reasoning: false,
|
|
224
|
-
|
|
224
|
+
reasoning: metadata.reasoning ?? false,
|
|
225
|
+
thinking: metadata.thinking,
|
|
226
|
+
input: metadata.input ?? ["text"],
|
|
225
227
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
226
|
-
contextWindow,
|
|
227
|
-
maxTokens,
|
|
228
|
+
contextWindow: metadata.contextWindow,
|
|
229
|
+
maxTokens: metadata.maxTokens,
|
|
228
230
|
};
|
|
229
231
|
}),
|
|
230
232
|
);
|
|
@@ -241,18 +243,65 @@ const OLLAMA_FALLBACK_CONTEXT_WINDOW = 128_000;
|
|
|
241
243
|
/** Cap max output tokens at a value that matches OMP's other openai-responses defaults. */
|
|
242
244
|
const OLLAMA_DEFAULT_MAX_TOKENS = 8192;
|
|
243
245
|
|
|
244
|
-
interface
|
|
246
|
+
interface OllamaResolvedMetadata {
|
|
245
247
|
contextWindow: number;
|
|
246
248
|
maxTokens: number;
|
|
249
|
+
capabilities?: string[];
|
|
250
|
+
reasoning?: boolean;
|
|
251
|
+
thinking?: ThinkingConfig;
|
|
252
|
+
input?: ("text" | "image")[];
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
interface OllamaShowMetadata {
|
|
256
|
+
contextWindow?: number;
|
|
257
|
+
maxTokens?: number;
|
|
258
|
+
capabilities?: string[];
|
|
259
|
+
reasoning?: boolean;
|
|
260
|
+
thinking?: ThinkingConfig;
|
|
261
|
+
input?: ("text" | "image")[];
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
function getOllamaContextWindow(modelInfo: Record<string, unknown> | undefined): number | undefined {
|
|
265
|
+
if (!modelInfo) {
|
|
266
|
+
return undefined;
|
|
267
|
+
}
|
|
268
|
+
for (const [key, value] of Object.entries(modelInfo)) {
|
|
269
|
+
if (typeof value !== "number" || value <= 0) {
|
|
270
|
+
continue;
|
|
271
|
+
}
|
|
272
|
+
if (key.endsWith(".context_length") || key.endsWith(".num_ctx") || key.endsWith(".context_window")) {
|
|
273
|
+
return value;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
function getOllamaCapabilities(value: unknown): string[] | undefined {
|
|
279
|
+
if (!Array.isArray(value)) {
|
|
280
|
+
return undefined;
|
|
281
|
+
}
|
|
282
|
+
return value.filter((item): item is string => typeof item === "string");
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
function getOllamaThinkingConfig(capabilities: string[] | undefined): ThinkingConfig | undefined {
|
|
286
|
+
if (!capabilities?.includes("thinking")) {
|
|
287
|
+
return undefined;
|
|
288
|
+
}
|
|
289
|
+
return {
|
|
290
|
+
mode: "effort",
|
|
291
|
+
minLevel: Effort.Minimal,
|
|
292
|
+
maxLevel: Effort.High,
|
|
293
|
+
};
|
|
247
294
|
}
|
|
248
295
|
|
|
249
296
|
/**
|
|
250
|
-
* Query Ollama's `/api/show` endpoint for a single model and pull
|
|
251
|
-
* context
|
|
252
|
-
*
|
|
253
|
-
* unavailable so callers can layer their own fallback.
|
|
297
|
+
* Query Ollama's `/api/show` endpoint for a single model and pull native
|
|
298
|
+
* context and capability metadata from the response. Returns `undefined` when
|
|
299
|
+
* the endpoint is unavailable so callers can layer their own fallback.
|
|
254
300
|
*/
|
|
255
|
-
async function
|
|
301
|
+
async function fetchOllamaShowMetadata(
|
|
302
|
+
nativeBaseUrl: string,
|
|
303
|
+
modelId: string,
|
|
304
|
+
): Promise<OllamaShowMetadata | undefined> {
|
|
256
305
|
try {
|
|
257
306
|
const response = await fetch(`${nativeBaseUrl}/api/show`, {
|
|
258
307
|
method: "POST",
|
|
@@ -262,13 +311,21 @@ async function fetchOllamaShowLimits(nativeBaseUrl: string, modelId: string): Pr
|
|
|
262
311
|
if (!response.ok) {
|
|
263
312
|
return undefined;
|
|
264
313
|
}
|
|
265
|
-
const payload = (await response.json()) as { model_info?: Record<string, unknown> };
|
|
266
|
-
const
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
314
|
+
const payload = (await response.json()) as { capabilities?: unknown; model_info?: Record<string, unknown> };
|
|
315
|
+
const capabilities = getOllamaCapabilities(payload.capabilities);
|
|
316
|
+
const contextWindow = getOllamaContextWindow(payload.model_info);
|
|
317
|
+
return {
|
|
318
|
+
contextWindow,
|
|
319
|
+
maxTokens: contextWindow ? OLLAMA_DEFAULT_MAX_TOKENS : undefined,
|
|
320
|
+
capabilities,
|
|
321
|
+
reasoning: capabilities ? capabilities.includes("thinking") : undefined,
|
|
322
|
+
thinking: getOllamaThinkingConfig(capabilities),
|
|
323
|
+
input: capabilities
|
|
324
|
+
? capabilities.includes("vision")
|
|
325
|
+
? (["text", "image"] as Array<"text" | "image">)
|
|
326
|
+
: (["text"] as Array<"text">)
|
|
327
|
+
: undefined,
|
|
328
|
+
};
|
|
272
329
|
} catch {
|
|
273
330
|
// fall through; caller decides on the fallback
|
|
274
331
|
}
|
|
@@ -276,23 +333,27 @@ async function fetchOllamaShowLimits(nativeBaseUrl: string, modelId: string): Pr
|
|
|
276
333
|
}
|
|
277
334
|
|
|
278
335
|
/**
|
|
279
|
-
* Build a resolver that fetches `/api/show`
|
|
280
|
-
* result in-memory for the lifetime of the manager. Successful lookups are
|
|
336
|
+
* Build a resolver that fetches `/api/show` metadata per model id and caches
|
|
337
|
+
* the result in-memory for the lifetime of the manager. Successful lookups are
|
|
281
338
|
* cached so repeated `fetchDynamicModels` calls do not refetch; failed
|
|
282
339
|
* lookups stay uncached so a later refresh can recover.
|
|
283
340
|
*/
|
|
284
|
-
function
|
|
285
|
-
const cache = new Map<string, Promise<
|
|
341
|
+
function createOllamaMetadataResolver(nativeBaseUrl: string): (modelId: string) => Promise<OllamaResolvedMetadata> {
|
|
342
|
+
const cache = new Map<string, Promise<OllamaResolvedMetadata>>();
|
|
286
343
|
return modelId => {
|
|
287
344
|
const cached = cache.get(modelId);
|
|
288
345
|
if (cached) return cached;
|
|
289
346
|
const pending = (async () => {
|
|
290
|
-
const
|
|
291
|
-
if (!
|
|
347
|
+
const metadata = await fetchOllamaShowMetadata(nativeBaseUrl, modelId);
|
|
348
|
+
if (!metadata) {
|
|
292
349
|
cache.delete(modelId);
|
|
293
350
|
return { contextWindow: OLLAMA_FALLBACK_CONTEXT_WINDOW, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS };
|
|
294
351
|
}
|
|
295
|
-
return
|
|
352
|
+
return {
|
|
353
|
+
...metadata,
|
|
354
|
+
contextWindow: metadata.contextWindow ?? OLLAMA_FALLBACK_CONTEXT_WINDOW,
|
|
355
|
+
maxTokens: metadata.maxTokens ?? OLLAMA_DEFAULT_MAX_TOKENS,
|
|
356
|
+
};
|
|
296
357
|
})();
|
|
297
358
|
cache.set(modelId, pending);
|
|
298
359
|
void pending.catch(() => cache.delete(modelId));
|
|
@@ -702,7 +763,7 @@ export function ollamaModelManagerOptions(config?: OllamaModelManagerConfig): Mo
|
|
|
702
763
|
const baseUrl = normalizeOllamaBaseUrl(config?.baseUrl);
|
|
703
764
|
const nativeBaseUrl = toOllamaNativeBaseUrl(baseUrl);
|
|
704
765
|
const references = createBundledReferenceMap<"openai-responses">("ollama" as Parameters<typeof getBundledModels>[0]);
|
|
705
|
-
const
|
|
766
|
+
const resolveMetadata = createOllamaMetadataResolver(nativeBaseUrl);
|
|
706
767
|
return {
|
|
707
768
|
providerId: "ollama",
|
|
708
769
|
fetchDynamicModels: async () => {
|
|
@@ -727,13 +788,20 @@ export function ollamaModelManagerOptions(config?: OllamaModelManagerConfig): Mo
|
|
|
727
788
|
if (openAiCompatible && openAiCompatible.length > 0) {
|
|
728
789
|
await Promise.all(
|
|
729
790
|
openAiCompatible.map(async model => {
|
|
730
|
-
const
|
|
731
|
-
model.contextWindow =
|
|
791
|
+
const metadata = await resolveMetadata(model.id);
|
|
792
|
+
model.contextWindow = metadata.contextWindow;
|
|
793
|
+
if (metadata.reasoning !== undefined) {
|
|
794
|
+
model.reasoning = metadata.reasoning;
|
|
795
|
+
model.thinking = metadata.thinking;
|
|
796
|
+
}
|
|
797
|
+
if (metadata.input) {
|
|
798
|
+
model.input = metadata.input;
|
|
799
|
+
}
|
|
732
800
|
}),
|
|
733
801
|
);
|
|
734
802
|
return openAiCompatible;
|
|
735
803
|
}
|
|
736
|
-
const nativeFallback = await fetchOllamaNativeModels(baseUrl,
|
|
804
|
+
const nativeFallback = await fetchOllamaNativeModels(baseUrl, resolveMetadata);
|
|
737
805
|
if (nativeFallback && nativeFallback.length > 0) {
|
|
738
806
|
return nativeFallback;
|
|
739
807
|
}
|
|
@@ -1407,8 +1475,11 @@ export function vllmModelManagerOptions(config?: VllmModelManagerConfig): ModelM
|
|
|
1407
1475
|
baseUrl,
|
|
1408
1476
|
apiKey,
|
|
1409
1477
|
mapModel: (entry, defaults) => {
|
|
1410
|
-
const
|
|
1411
|
-
return
|
|
1478
|
+
const model = mapWithBundledReference(entry, defaults, references.get(defaults.id));
|
|
1479
|
+
return {
|
|
1480
|
+
...model,
|
|
1481
|
+
contextWindow: toPositiveNumber(entry.max_model_len, model.contextWindow),
|
|
1482
|
+
};
|
|
1412
1483
|
},
|
|
1413
1484
|
}),
|
|
1414
1485
|
};
|
|
@@ -119,6 +119,7 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
119
119
|
reasoningEffortMap,
|
|
120
120
|
supportsUsageInStreaming: !isCerebras,
|
|
121
121
|
disableReasoningOnForcedToolChoice: isKimiModel || isAnthropicModel,
|
|
122
|
+
disableReasoningOnToolChoice: isDeepseekFamily && Boolean(model.reasoning),
|
|
122
123
|
supportsToolChoice: true,
|
|
123
124
|
maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
|
|
124
125
|
requiresToolResultName: isMistral,
|
|
@@ -195,6 +196,7 @@ export function resolveOpenAICompat(
|
|
|
195
196
|
model.compat.requiresAssistantContentForToolCalls ?? detected.requiresAssistantContentForToolCalls,
|
|
196
197
|
disableReasoningOnForcedToolChoice:
|
|
197
198
|
model.compat.disableReasoningOnForcedToolChoice ?? detected.disableReasoningOnForcedToolChoice,
|
|
199
|
+
disableReasoningOnToolChoice: model.compat.disableReasoningOnToolChoice ?? detected.disableReasoningOnToolChoice,
|
|
198
200
|
openRouterRouting: model.compat.openRouterRouting ?? detected.openRouterRouting,
|
|
199
201
|
vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
|
|
200
202
|
supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
|
|
@@ -996,6 +996,14 @@ function buildParams(
|
|
|
996
996
|
params.reasoning_effort = mapReasoningEffort(options.reasoning, compat.reasoningEffortMap) as Effort;
|
|
997
997
|
}
|
|
998
998
|
|
|
999
|
+
if (compat.disableReasoningOnToolChoice && params.tool_choice !== undefined) {
|
|
1000
|
+
// DeepSeek reasoning models accept tools/tool_choice, but reject that
|
|
1001
|
+
// control field while thinking is enabled. Keep the tool-selection
|
|
1002
|
+
// contract and suppress reasoning for this single request.
|
|
1003
|
+
delete params.reasoning_effort;
|
|
1004
|
+
delete params.reasoning;
|
|
1005
|
+
}
|
|
1006
|
+
|
|
999
1007
|
if (compat.disableReasoningOnForcedToolChoice && isForcedToolChoice(params.tool_choice)) {
|
|
1000
1008
|
// Mirrors anthropic.ts:disableThinkingIfToolChoiceForced — backends like
|
|
1001
1009
|
// Kimi 400 with `tool_choice 'specified' is incompatible with thinking
|
|
@@ -13,6 +13,7 @@ import {
|
|
|
13
13
|
type Context,
|
|
14
14
|
type MessageAttribution,
|
|
15
15
|
type Model,
|
|
16
|
+
type OpenAICompat,
|
|
16
17
|
type ProviderSessionState,
|
|
17
18
|
type ServiceTier,
|
|
18
19
|
type StreamFunction,
|
|
@@ -431,7 +432,9 @@ function buildParams(
|
|
|
431
432
|
|
|
432
433
|
if (options?.reasoning || options?.reasoningSummary) {
|
|
433
434
|
params.reasoning = {
|
|
434
|
-
effort: options?.reasoning || "medium",
|
|
435
|
+
effort: mapReasoningEffort(options?.reasoning || "medium", model.compat?.reasoningEffortMap) as NonNullable<
|
|
436
|
+
OpenAIResponsesSamplingParams["reasoning"]
|
|
437
|
+
>["effort"],
|
|
435
438
|
summary: options?.reasoningSummary || "auto",
|
|
436
439
|
};
|
|
437
440
|
} else if (model.name.startsWith("gpt-5")) {
|
|
@@ -451,6 +454,13 @@ function buildParams(
|
|
|
451
454
|
return { conversationMessages, params };
|
|
452
455
|
}
|
|
453
456
|
|
|
457
|
+
function mapReasoningEffort(
|
|
458
|
+
effort: NonNullable<OpenAIResponsesOptions["reasoning"]>,
|
|
459
|
+
reasoningEffortMap: OpenAICompat["reasoningEffortMap"] | undefined,
|
|
460
|
+
): string {
|
|
461
|
+
return reasoningEffortMap?.[effort] ?? effort;
|
|
462
|
+
}
|
|
463
|
+
|
|
454
464
|
function isAzureOpenAIBaseUrl(baseUrl: string): boolean {
|
|
455
465
|
return baseUrl.includes(".openai.azure.com") || baseUrl.includes("azure.com/openai");
|
|
456
466
|
}
|
package/src/types.ts
CHANGED
|
@@ -576,6 +576,13 @@ export interface OpenAICompat {
|
|
|
576
576
|
* enabled` whenever both are present. Default: auto-detected (Kimi).
|
|
577
577
|
*/
|
|
578
578
|
disableReasoningOnForcedToolChoice?: boolean;
|
|
579
|
+
/**
|
|
580
|
+
* Drop reasoning fields (`reasoning_effort`, OpenRouter `reasoning`) for
|
|
581
|
+
* any request that sends `tool_choice`. Use for providers/models that accept
|
|
582
|
+
* tools and `tool_choice`, but reject `tool_choice` while thinking is enabled.
|
|
583
|
+
* Default: auto-detected (DeepSeek reasoning models).
|
|
584
|
+
*/
|
|
585
|
+
disableReasoningOnToolChoice?: boolean;
|
|
579
586
|
/** OpenRouter-specific routing preferences. Only used when baseUrl points to OpenRouter. */
|
|
580
587
|
openRouterRouting?: OpenRouterRouting;
|
|
581
588
|
/** Vercel AI Gateway routing preferences. Only used when baseUrl points to Vercel AI Gateway. */
|
|
@@ -666,7 +673,7 @@ export interface Model<TApi extends Api = any> {
|
|
|
666
673
|
/** Canonical thinking capability metadata for this model. */
|
|
667
674
|
thinking?: ThinkingConfig;
|
|
668
675
|
/** Compatibility overrides per API. If not set, auto-detected from baseUrl. */
|
|
669
|
-
compat?: TApi extends "openai-completions"
|
|
676
|
+
compat?: TApi extends "openai-completions" | "openai-responses"
|
|
670
677
|
? OpenAICompat
|
|
671
678
|
: TApi extends "anthropic-messages"
|
|
672
679
|
? AnthropicCompat
|