@oh-my-pi/pi-ai 14.7.1 → 14.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/package.json +3 -3
- package/src/provider-models/openai-compat.ts +104 -33
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [14.7.2] - 2026-05-06
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
|
|
9
|
+
- Fixed VLLM model discovery to use `max_model_len` as the context window when the endpoint reports it.
|
|
10
|
+
- Fixed custom Ollama Cloud/local-proxy model aliases (for example `deepseek-v4-pro:cloud`) to inherit bundled cache-pricing metadata when the upstream model is known ([#937](https://github.com/can1357/oh-my-pi/issues/937)).
|
|
11
|
+
- Fixed local Ollama model discovery to apply `/api/show` thinking and vision capabilities in addition to native context windows ([#928](https://github.com/can1357/oh-my-pi/issues/928)).
|
|
12
|
+
|
|
5
13
|
## [14.7.0] - 2026-05-04
|
|
6
14
|
### Breaking Changes
|
|
7
15
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "14.7.
|
|
4
|
+
"version": "14.7.2",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://github.com/can1357/oh-my-pi",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -46,8 +46,8 @@
|
|
|
46
46
|
"@aws-sdk/credential-provider-node": "^3.972.39",
|
|
47
47
|
"@bufbuild/protobuf": "^2.12.0",
|
|
48
48
|
"@google/genai": "^1.52.0",
|
|
49
|
-
"@oh-my-pi/pi-natives": "14.7.
|
|
50
|
-
"@oh-my-pi/pi-utils": "14.7.
|
|
49
|
+
"@oh-my-pi/pi-natives": "14.7.2",
|
|
50
|
+
"@oh-my-pi/pi-utils": "14.7.2",
|
|
51
51
|
"@sinclair/typebox": "^0.34.49",
|
|
52
52
|
"@smithy/node-http-handler": "^4.6.1",
|
|
53
53
|
"ajv": "^8.20.0",
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { ModelManagerOptions } from "../model-manager";
|
|
2
|
+
import { Effort } from "../model-thinking";
|
|
2
3
|
import { getBundledModels } from "../models";
|
|
3
|
-
import type { Api, Model } from "../types";
|
|
4
|
+
import type { Api, Model, ThinkingConfig } from "../types";
|
|
4
5
|
import { isAnthropicOAuthToken, isRecord, toBoolean, toNumber, toPositiveNumber } from "../utils";
|
|
5
6
|
import {
|
|
6
7
|
fetchOpenAICompatibleModels,
|
|
@@ -192,7 +193,7 @@ function toOllamaNativeBaseUrl(baseUrl: string): string {
|
|
|
192
193
|
|
|
193
194
|
async function fetchOllamaNativeModels(
|
|
194
195
|
baseUrl: string,
|
|
195
|
-
|
|
196
|
+
resolveMetadata: (modelId: string) => Promise<OllamaResolvedMetadata>,
|
|
196
197
|
): Promise<Model<"openai-responses">[] | null> {
|
|
197
198
|
const nativeBaseUrl = toOllamaNativeBaseUrl(baseUrl);
|
|
198
199
|
let response: Response;
|
|
@@ -213,18 +214,19 @@ async function fetchOllamaNativeModels(
|
|
|
213
214
|
entries.map(async (entry): Promise<Model<"openai-responses"> | null> => {
|
|
214
215
|
const id = entry.model ?? entry.name;
|
|
215
216
|
if (!id) return null;
|
|
216
|
-
const
|
|
217
|
+
const metadata = await resolveMetadata(id);
|
|
217
218
|
return {
|
|
218
219
|
id,
|
|
219
220
|
name: entry.name ?? id,
|
|
220
221
|
api: "openai-responses",
|
|
221
222
|
provider: "ollama",
|
|
222
223
|
baseUrl,
|
|
223
|
-
reasoning: false,
|
|
224
|
-
|
|
224
|
+
reasoning: metadata.reasoning ?? false,
|
|
225
|
+
thinking: metadata.thinking,
|
|
226
|
+
input: metadata.input ?? ["text"],
|
|
225
227
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
226
|
-
contextWindow,
|
|
227
|
-
maxTokens,
|
|
228
|
+
contextWindow: metadata.contextWindow,
|
|
229
|
+
maxTokens: metadata.maxTokens,
|
|
228
230
|
};
|
|
229
231
|
}),
|
|
230
232
|
);
|
|
@@ -241,18 +243,65 @@ const OLLAMA_FALLBACK_CONTEXT_WINDOW = 128_000;
|
|
|
241
243
|
/** Cap max output tokens at a value that matches OMP's other openai-responses defaults. */
|
|
242
244
|
const OLLAMA_DEFAULT_MAX_TOKENS = 8192;
|
|
243
245
|
|
|
244
|
-
interface
|
|
246
|
+
interface OllamaResolvedMetadata {
|
|
245
247
|
contextWindow: number;
|
|
246
248
|
maxTokens: number;
|
|
249
|
+
capabilities?: string[];
|
|
250
|
+
reasoning?: boolean;
|
|
251
|
+
thinking?: ThinkingConfig;
|
|
252
|
+
input?: ("text" | "image")[];
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
interface OllamaShowMetadata {
|
|
256
|
+
contextWindow?: number;
|
|
257
|
+
maxTokens?: number;
|
|
258
|
+
capabilities?: string[];
|
|
259
|
+
reasoning?: boolean;
|
|
260
|
+
thinking?: ThinkingConfig;
|
|
261
|
+
input?: ("text" | "image")[];
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
function getOllamaContextWindow(modelInfo: Record<string, unknown> | undefined): number | undefined {
|
|
265
|
+
if (!modelInfo) {
|
|
266
|
+
return undefined;
|
|
267
|
+
}
|
|
268
|
+
for (const [key, value] of Object.entries(modelInfo)) {
|
|
269
|
+
if (typeof value !== "number" || value <= 0) {
|
|
270
|
+
continue;
|
|
271
|
+
}
|
|
272
|
+
if (key.endsWith(".context_length") || key.endsWith(".num_ctx") || key.endsWith(".context_window")) {
|
|
273
|
+
return value;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
function getOllamaCapabilities(value: unknown): string[] | undefined {
|
|
279
|
+
if (!Array.isArray(value)) {
|
|
280
|
+
return undefined;
|
|
281
|
+
}
|
|
282
|
+
return value.filter((item): item is string => typeof item === "string");
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
function getOllamaThinkingConfig(capabilities: string[] | undefined): ThinkingConfig | undefined {
|
|
286
|
+
if (!capabilities?.includes("thinking")) {
|
|
287
|
+
return undefined;
|
|
288
|
+
}
|
|
289
|
+
return {
|
|
290
|
+
mode: "effort",
|
|
291
|
+
minLevel: Effort.Minimal,
|
|
292
|
+
maxLevel: Effort.High,
|
|
293
|
+
};
|
|
247
294
|
}
|
|
248
295
|
|
|
249
296
|
/**
|
|
250
|
-
* Query Ollama's `/api/show` endpoint for a single model and pull
|
|
251
|
-
* context
|
|
252
|
-
*
|
|
253
|
-
* unavailable so callers can layer their own fallback.
|
|
297
|
+
* Query Ollama's `/api/show` endpoint for a single model and pull native
|
|
298
|
+
* context and capability metadata from the response. Returns `undefined` when
|
|
299
|
+
* the endpoint is unavailable so callers can layer their own fallback.
|
|
254
300
|
*/
|
|
255
|
-
async function
|
|
301
|
+
async function fetchOllamaShowMetadata(
|
|
302
|
+
nativeBaseUrl: string,
|
|
303
|
+
modelId: string,
|
|
304
|
+
): Promise<OllamaShowMetadata | undefined> {
|
|
256
305
|
try {
|
|
257
306
|
const response = await fetch(`${nativeBaseUrl}/api/show`, {
|
|
258
307
|
method: "POST",
|
|
@@ -262,13 +311,21 @@ async function fetchOllamaShowLimits(nativeBaseUrl: string, modelId: string): Pr
|
|
|
262
311
|
if (!response.ok) {
|
|
263
312
|
return undefined;
|
|
264
313
|
}
|
|
265
|
-
const payload = (await response.json()) as { model_info?: Record<string, unknown> };
|
|
266
|
-
const
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
314
|
+
const payload = (await response.json()) as { capabilities?: unknown; model_info?: Record<string, unknown> };
|
|
315
|
+
const capabilities = getOllamaCapabilities(payload.capabilities);
|
|
316
|
+
const contextWindow = getOllamaContextWindow(payload.model_info);
|
|
317
|
+
return {
|
|
318
|
+
contextWindow,
|
|
319
|
+
maxTokens: contextWindow ? OLLAMA_DEFAULT_MAX_TOKENS : undefined,
|
|
320
|
+
capabilities,
|
|
321
|
+
reasoning: capabilities ? capabilities.includes("thinking") : undefined,
|
|
322
|
+
thinking: getOllamaThinkingConfig(capabilities),
|
|
323
|
+
input: capabilities
|
|
324
|
+
? capabilities.includes("vision")
|
|
325
|
+
? (["text", "image"] as Array<"text" | "image">)
|
|
326
|
+
: (["text"] as Array<"text">)
|
|
327
|
+
: undefined,
|
|
328
|
+
};
|
|
272
329
|
} catch {
|
|
273
330
|
// fall through; caller decides on the fallback
|
|
274
331
|
}
|
|
@@ -276,23 +333,27 @@ async function fetchOllamaShowLimits(nativeBaseUrl: string, modelId: string): Pr
|
|
|
276
333
|
}
|
|
277
334
|
|
|
278
335
|
/**
|
|
279
|
-
* Build a resolver that fetches `/api/show`
|
|
280
|
-
* result in-memory for the lifetime of the manager. Successful lookups are
|
|
336
|
+
* Build a resolver that fetches `/api/show` metadata per model id and caches
|
|
337
|
+
* the result in-memory for the lifetime of the manager. Successful lookups are
|
|
281
338
|
* cached so repeated `fetchDynamicModels` calls do not refetch; failed
|
|
282
339
|
* lookups stay uncached so a later refresh can recover.
|
|
283
340
|
*/
|
|
284
|
-
function
|
|
285
|
-
const cache = new Map<string, Promise<
|
|
341
|
+
function createOllamaMetadataResolver(nativeBaseUrl: string): (modelId: string) => Promise<OllamaResolvedMetadata> {
|
|
342
|
+
const cache = new Map<string, Promise<OllamaResolvedMetadata>>();
|
|
286
343
|
return modelId => {
|
|
287
344
|
const cached = cache.get(modelId);
|
|
288
345
|
if (cached) return cached;
|
|
289
346
|
const pending = (async () => {
|
|
290
|
-
const
|
|
291
|
-
if (!
|
|
347
|
+
const metadata = await fetchOllamaShowMetadata(nativeBaseUrl, modelId);
|
|
348
|
+
if (!metadata) {
|
|
292
349
|
cache.delete(modelId);
|
|
293
350
|
return { contextWindow: OLLAMA_FALLBACK_CONTEXT_WINDOW, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS };
|
|
294
351
|
}
|
|
295
|
-
return
|
|
352
|
+
return {
|
|
353
|
+
...metadata,
|
|
354
|
+
contextWindow: metadata.contextWindow ?? OLLAMA_FALLBACK_CONTEXT_WINDOW,
|
|
355
|
+
maxTokens: metadata.maxTokens ?? OLLAMA_DEFAULT_MAX_TOKENS,
|
|
356
|
+
};
|
|
296
357
|
})();
|
|
297
358
|
cache.set(modelId, pending);
|
|
298
359
|
void pending.catch(() => cache.delete(modelId));
|
|
@@ -702,7 +763,7 @@ export function ollamaModelManagerOptions(config?: OllamaModelManagerConfig): Mo
|
|
|
702
763
|
const baseUrl = normalizeOllamaBaseUrl(config?.baseUrl);
|
|
703
764
|
const nativeBaseUrl = toOllamaNativeBaseUrl(baseUrl);
|
|
704
765
|
const references = createBundledReferenceMap<"openai-responses">("ollama" as Parameters<typeof getBundledModels>[0]);
|
|
705
|
-
const
|
|
766
|
+
const resolveMetadata = createOllamaMetadataResolver(nativeBaseUrl);
|
|
706
767
|
return {
|
|
707
768
|
providerId: "ollama",
|
|
708
769
|
fetchDynamicModels: async () => {
|
|
@@ -727,13 +788,20 @@ export function ollamaModelManagerOptions(config?: OllamaModelManagerConfig): Mo
|
|
|
727
788
|
if (openAiCompatible && openAiCompatible.length > 0) {
|
|
728
789
|
await Promise.all(
|
|
729
790
|
openAiCompatible.map(async model => {
|
|
730
|
-
const
|
|
731
|
-
model.contextWindow =
|
|
791
|
+
const metadata = await resolveMetadata(model.id);
|
|
792
|
+
model.contextWindow = metadata.contextWindow;
|
|
793
|
+
if (metadata.reasoning !== undefined) {
|
|
794
|
+
model.reasoning = metadata.reasoning;
|
|
795
|
+
model.thinking = metadata.thinking;
|
|
796
|
+
}
|
|
797
|
+
if (metadata.input) {
|
|
798
|
+
model.input = metadata.input;
|
|
799
|
+
}
|
|
732
800
|
}),
|
|
733
801
|
);
|
|
734
802
|
return openAiCompatible;
|
|
735
803
|
}
|
|
736
|
-
const nativeFallback = await fetchOllamaNativeModels(baseUrl,
|
|
804
|
+
const nativeFallback = await fetchOllamaNativeModels(baseUrl, resolveMetadata);
|
|
737
805
|
if (nativeFallback && nativeFallback.length > 0) {
|
|
738
806
|
return nativeFallback;
|
|
739
807
|
}
|
|
@@ -1407,8 +1475,11 @@ export function vllmModelManagerOptions(config?: VllmModelManagerConfig): ModelM
|
|
|
1407
1475
|
baseUrl,
|
|
1408
1476
|
apiKey,
|
|
1409
1477
|
mapModel: (entry, defaults) => {
|
|
1410
|
-
const
|
|
1411
|
-
return
|
|
1478
|
+
const model = mapWithBundledReference(entry, defaults, references.get(defaults.id));
|
|
1479
|
+
return {
|
|
1480
|
+
...model,
|
|
1481
|
+
contextWindow: toPositiveNumber(entry.max_model_len, model.contextWindow),
|
|
1482
|
+
};
|
|
1412
1483
|
},
|
|
1413
1484
|
}),
|
|
1414
1485
|
};
|