@oh-my-pi/pi-ai 14.5.1 → 14.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/README.md +31 -10
- package/package.json +4 -4
- package/src/api-registry.ts +1 -0
- package/src/auth-storage.ts +12 -0
- package/src/cli.ts +19 -0
- package/src/index.ts +1 -0
- package/src/models.json +432 -208
- package/src/provider-models/bundled-references.ts +38 -0
- package/src/provider-models/descriptors.ts +14 -0
- package/src/provider-models/index.ts +1 -0
- package/src/provider-models/ollama.ts +149 -0
- package/src/provider-models/openai-compat.ts +95 -44
- package/src/providers/anthropic.ts +1 -5
- package/src/providers/ollama.ts +497 -0
- package/src/providers/openai-completions-compat.ts +6 -1
- package/src/providers/openai-completions.ts +3 -1
- package/src/providers/openai-responses.ts +1 -1
- package/src/providers/register-builtins.ts +20 -0
- package/src/stream.ts +13 -0
- package/src/types.ts +5 -0
- package/src/utils/fireworks-model-id.ts +13 -0
- package/src/utils/oauth/fireworks.ts +15 -0
- package/src/utils/oauth/index.ts +16 -0
- package/src/utils/oauth/ollama-cloud.ts +28 -0
- package/src/utils/oauth/types.ts +2 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,23 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [14.5.3] - 2026-04-27
|
|
6
|
+
### Added
|
|
7
|
+
|
|
8
|
+
- Added `fireworks` as a supported provider with API key login flow and credential storage
|
|
9
|
+
- Added Fireworks model catalog support with `fireworks`-scoped openai-completions models `glm-5`, `glm-5.1`, `kimi-k2.5`, `kimi-k2.6`, and `minimax-m2.7`
|
|
10
|
+
- Added built-in discovery wiring so providers with base URL `api.fireworks.ai` are recognized as OpenAI-compatible and can use streaming token control
|
|
11
|
+
|
|
12
|
+
### Changed
|
|
13
|
+
|
|
14
|
+
- Updated the built-in model catalog to use corrected `contextWindow` and `maxTokens` values for many existing models instead of placeholder limits
|
|
15
|
+
- Updated several model cost entries, including cache-read pricing, to corrected values
|
|
16
|
+
|
|
17
|
+
### Fixed
|
|
18
|
+
|
|
19
|
+
- Fixed Fireworks request formatting by translating between public model IDs and API wire IDs when sending OpenAI-completions requests
|
|
20
|
+
- Fixed OpenAI-compatible model parameter handling for Fireworks by allowing `max_tokens` to be sent during requests
|
|
21
|
+
|
|
5
22
|
## [14.5.1] - 2026-04-26
|
|
6
23
|
|
|
7
24
|
### Fixed
|
|
@@ -95,6 +112,7 @@
|
|
|
95
112
|
- Fixed shell execution failure responses to preserve all result fields when sanitizing, preventing truncated metadata in stream results
|
|
96
113
|
- Fixed context overflow detection to recognize `model_context_window_exceeded` from z.ai / GLM providers, preventing infinite retry loops when context window is exceeded ([#638](https://github.com/can1357/oh-my-pi/issues/638))
|
|
97
114
|
- Fixed strict tool schema enforcement to preserve `additionalProperties: false` and required keys for reused nested object schemas, preventing invalid `todo_write` function schemas in Codex/OpenAI requests
|
|
115
|
+
- Fixed GitHub Copilot reasoning regressions by preserving GPT-5.x / Claude 4.x reasoning controls instead of stripping them from requests ([#773](https://github.com/can1357/oh-my-pi/issues/773))
|
|
98
116
|
|
|
99
117
|
## [14.1.0] - 2026-04-11
|
|
100
118
|
|
package/README.md
CHANGED
|
@@ -72,6 +72,7 @@ Unified LLM API with automatic model discovery, provider configuration, token an
|
|
|
72
72
|
- **Qwen Portal** (supports `QWEN_OAUTH_TOKEN` or `QWEN_PORTAL_API_KEY`)
|
|
73
73
|
- **Cloudflare AI Gateway** (requires `CLOUDFLARE_AI_GATEWAY_API_KEY` and provider-specific gateway base URL)
|
|
74
74
|
- **Ollama** (local OpenAI-compatible runtime; optional `OLLAMA_API_KEY`)
|
|
75
|
+
- **Ollama Cloud** (hosted native Ollama API; requires `OLLAMA_CLOUD_API_KEY`)
|
|
75
76
|
- **llama.cpp** (local OpenAI and Anthropic compatible inference server)
|
|
76
77
|
- **vLLM** (OpenAI-compatible server; `VLLM_API_KEY` for secured deployments)
|
|
77
78
|
- **GitHub Copilot** (requires OAuth, see below)
|
|
@@ -690,13 +691,14 @@ console.log(`Using ${model.name} via ${model.api} API`);
|
|
|
690
691
|
|
|
691
692
|
### Custom Models
|
|
692
693
|
|
|
693
|
-
You can create custom models for local inference servers or custom endpoints
|
|
694
|
-
|
|
694
|
+
You can create custom models for local inference servers or custom endpoints.
|
|
695
|
+
|
|
696
|
+
For local Ollama, `OLLAMA_API_KEY` is optional and mainly needed for authenticated/self-hosted gateways. `ollama` remains the local OpenAI-compatible runtime integration.
|
|
695
697
|
|
|
696
698
|
```typescript
|
|
697
699
|
import { Model, stream } from "@oh-my-pi/pi-ai";
|
|
698
700
|
|
|
699
|
-
// Example: Ollama using OpenAI-compatible API
|
|
701
|
+
// Example: local Ollama using the OpenAI-compatible API
|
|
700
702
|
const ollamaModel: Model<"openai-completions"> = {
|
|
701
703
|
id: "llama-3.1-8b",
|
|
702
704
|
name: "Llama 3.1 8B (Ollama)",
|
|
@@ -710,6 +712,28 @@ const ollamaModel: Model<"openai-completions"> = {
|
|
|
710
712
|
maxTokens: 32000,
|
|
711
713
|
};
|
|
712
714
|
|
|
715
|
+
const localResponse = await stream(ollamaModel, context, {
|
|
716
|
+
apiKey: process.env.OLLAMA_API_KEY, // Optional; local Ollama usually runs without auth
|
|
717
|
+
});
|
|
718
|
+
|
|
719
|
+
// Example: Ollama Cloud using the native /api/chat transport
|
|
720
|
+
const ollamaCloudModel: Model<"ollama-chat"> = {
|
|
721
|
+
id: "gpt-oss:120b",
|
|
722
|
+
name: "GPT OSS 120B (Ollama Cloud)",
|
|
723
|
+
api: "ollama-chat",
|
|
724
|
+
provider: "ollama-cloud",
|
|
725
|
+
baseUrl: "https://ollama.com",
|
|
726
|
+
reasoning: true,
|
|
727
|
+
input: ["text", "image"],
|
|
728
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
729
|
+
contextWindow: 262144,
|
|
730
|
+
maxTokens: 8192,
|
|
731
|
+
};
|
|
732
|
+
|
|
733
|
+
const cloudResponse = await stream(ollamaCloudModel, context, {
|
|
734
|
+
apiKey: process.env.OLLAMA_CLOUD_API_KEY,
|
|
735
|
+
});
|
|
736
|
+
|
|
713
737
|
// Example: LiteLLM proxy with explicit compat settings
|
|
714
738
|
const litellmModel: Model<"openai-completions"> = {
|
|
715
739
|
id: "gpt-4o",
|
|
@@ -744,11 +768,6 @@ const proxyModel: Model<"anthropic-messages"> = {
|
|
|
744
768
|
"X-Custom-Auth": "bearer-token-here",
|
|
745
769
|
},
|
|
746
770
|
};
|
|
747
|
-
|
|
748
|
-
// Use the custom model
|
|
749
|
-
const response = await stream(ollamaModel, context, {
|
|
750
|
-
apiKey: process.env.OLLAMA_API_KEY, // Optional; local Ollama usually runs without auth
|
|
751
|
-
});
|
|
752
771
|
```
|
|
753
772
|
|
|
754
773
|
### OpenAI Compatibility Settings
|
|
@@ -928,6 +947,7 @@ In Node.js environments, you can set environment variables to avoid passing API
|
|
|
928
947
|
| OpenRouter | `OPENROUTER_API_KEY` |
|
|
929
948
|
| LiteLLM | `LITELLM_API_KEY` |
|
|
930
949
|
| Ollama | `OLLAMA_API_KEY` (optional for local deployments) |
|
|
950
|
+
| Ollama Cloud | `OLLAMA_CLOUD_API_KEY` |
|
|
931
951
|
| Qwen Portal | `QWEN_OAUTH_TOKEN` or `QWEN_PORTAL_API_KEY` |
|
|
932
952
|
| zAI | `ZAI_API_KEY` |
|
|
933
953
|
| MiniMax Code | `MINIMAX_CODE_API_KEY` (international) or `MINIMAX_CODE_CN_API_KEY` (China) |
|
|
@@ -957,7 +977,8 @@ Provider endpoint defaults for the current OpenAI-compatible integrations:
|
|
|
957
977
|
- ZenMux (OpenAI): `https://zenmux.ai/api/v1`
|
|
958
978
|
- ZenMux (Anthropic models): `https://zenmux.ai/api/anthropic`
|
|
959
979
|
- vLLM: `http://127.0.0.1:8000/v1`
|
|
960
|
-
- Ollama: local OpenAI-compatible runtime
|
|
980
|
+
- Ollama: local OpenAI-compatible runtime (`http://127.0.0.1:11434/v1`)
|
|
981
|
+
- Ollama Cloud: native Ollama API host (`https://ollama.com/api`, configured here as base URL `https://ollama.com`)
|
|
961
982
|
- LiteLLM: `http://localhost:4000/v1`
|
|
962
983
|
- Cloudflare AI Gateway: `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic`
|
|
963
984
|
- Qwen Portal: `https://portal.qwen.ai/v1`
|
|
@@ -1049,7 +1070,7 @@ Credentials are saved to `agent.db` in the agent directory. `/login qianfan` ope
|
|
|
1049
1070
|
|
|
1050
1071
|
`login` supports OAuth providers (Anthropic, OpenAI Codex, GitHub Copilot, Gemini CLI, Antigravity) and API-key onboarding flows.
|
|
1051
1072
|
|
|
1052
|
-
For the current
|
|
1073
|
+
For the current API-key onboarding flows, the library covers Together, Moonshot, Qianfan, NVIDIA, NanoGPT, Hugging Face, Venice, Xiaomi, vLLM, LiteLLM, Cloudflare AI Gateway, Qwen Portal, and Ollama Cloud. Ollama remains the local runtime integration; set `OLLAMA_API_KEY` only when your local or self-hosted deployment enforces bearer auth.
|
|
1053
1074
|
|
|
1054
1075
|
### Programmatic OAuth
|
|
1055
1076
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "14.5.
|
|
4
|
+
"version": "14.5.3",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://github.com/can1357/oh-my-pi",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -46,8 +46,8 @@
|
|
|
46
46
|
"@aws-sdk/credential-provider-node": "^3.972.36",
|
|
47
47
|
"@bufbuild/protobuf": "^2.12.0",
|
|
48
48
|
"@google/genai": "^1.50.1",
|
|
49
|
-
"@oh-my-pi/pi-natives": "14.5.
|
|
50
|
-
"@oh-my-pi/pi-utils": "14.5.
|
|
49
|
+
"@oh-my-pi/pi-natives": "14.5.3",
|
|
50
|
+
"@oh-my-pi/pi-utils": "14.5.3",
|
|
51
51
|
"@sinclair/typebox": "^0.34.49",
|
|
52
52
|
"@smithy/node-http-handler": "^4.6.1",
|
|
53
53
|
"ajv": "^8.20.0",
|
|
@@ -58,7 +58,7 @@
|
|
|
58
58
|
"zod": "4.3.6"
|
|
59
59
|
},
|
|
60
60
|
"devDependencies": {
|
|
61
|
-
"@types/bun": "^1.3
|
|
61
|
+
"@types/bun": "^1.3"
|
|
62
62
|
},
|
|
63
63
|
"engines": {
|
|
64
64
|
"bun": ">=1.3.7"
|
package/src/api-registry.ts
CHANGED
package/src/auth-storage.ts
CHANGED
|
@@ -36,6 +36,7 @@ import { loginAnthropic } from "./utils/oauth/anthropic";
|
|
|
36
36
|
import { loginCerebras } from "./utils/oauth/cerebras";
|
|
37
37
|
import { loginCloudflareAiGateway } from "./utils/oauth/cloudflare-ai-gateway";
|
|
38
38
|
import { loginCursor } from "./utils/oauth/cursor";
|
|
39
|
+
import { loginFireworks } from "./utils/oauth/fireworks";
|
|
39
40
|
import { loginGitHubCopilot } from "./utils/oauth/github-copilot";
|
|
40
41
|
import { loginGitLabDuo } from "./utils/oauth/gitlab-duo";
|
|
41
42
|
import { loginAntigravity } from "./utils/oauth/google-antigravity";
|
|
@@ -51,6 +52,7 @@ import { loginMoonshot } from "./utils/oauth/moonshot";
|
|
|
51
52
|
import { loginNanoGPT } from "./utils/oauth/nanogpt";
|
|
52
53
|
import { loginNvidia } from "./utils/oauth/nvidia";
|
|
53
54
|
import { loginOllama } from "./utils/oauth/ollama";
|
|
55
|
+
import { loginOllamaCloud } from "./utils/oauth/ollama-cloud";
|
|
54
56
|
import { loginOpenAICodex } from "./utils/oauth/openai-codex";
|
|
55
57
|
import { loginOpenCode } from "./utils/oauth/opencode";
|
|
56
58
|
import { loginParallel } from "./utils/oauth/parallel";
|
|
@@ -838,11 +840,21 @@ export class AuthStorage {
|
|
|
838
840
|
await saveApiKeyCredential(apiKey);
|
|
839
841
|
return;
|
|
840
842
|
}
|
|
843
|
+
case "ollama-cloud": {
|
|
844
|
+
const apiKey = await loginOllamaCloud(ctrl);
|
|
845
|
+
await saveApiKeyCredential(apiKey);
|
|
846
|
+
return;
|
|
847
|
+
}
|
|
841
848
|
case "cerebras": {
|
|
842
849
|
const apiKey = await loginCerebras(ctrl);
|
|
843
850
|
await saveApiKeyCredential(apiKey);
|
|
844
851
|
return;
|
|
845
852
|
}
|
|
853
|
+
case "fireworks": {
|
|
854
|
+
const apiKey = await loginFireworks(ctrl);
|
|
855
|
+
await saveApiKeyCredential(apiKey);
|
|
856
|
+
return;
|
|
857
|
+
}
|
|
846
858
|
case "zai": {
|
|
847
859
|
const apiKey = await loginZai(ctrl);
|
|
848
860
|
await saveApiKeyCredential(apiKey);
|
package/src/cli.ts
CHANGED
|
@@ -12,6 +12,7 @@ import { loginKilo } from "./utils/oauth/kilo";
|
|
|
12
12
|
import { loginKimi } from "./utils/oauth/kimi";
|
|
13
13
|
import { loginMiniMaxCode, loginMiniMaxCodeCn } from "./utils/oauth/minimax-code";
|
|
14
14
|
import { loginNanoGPT } from "./utils/oauth/nanogpt";
|
|
15
|
+
import { loginOllamaCloud } from "./utils/oauth/ollama-cloud";
|
|
15
16
|
import { loginOpenAICodex } from "./utils/oauth/openai-codex";
|
|
16
17
|
import { loginParallel } from "./utils/oauth/parallel";
|
|
17
18
|
import { loginTavily } from "./utils/oauth/tavily";
|
|
@@ -271,6 +272,23 @@ async function login(provider: OAuthProvider): Promise<void> {
|
|
|
271
272
|
console.log(`\nAPI key saved to ~/.omp/agent/agent.db`);
|
|
272
273
|
return;
|
|
273
274
|
}
|
|
275
|
+
case "ollama-cloud": {
|
|
276
|
+
const apiKey = await loginOllamaCloud({
|
|
277
|
+
onAuth(info) {
|
|
278
|
+
const { url, instructions } = info;
|
|
279
|
+
console.log(`\nOpen this URL in your browser:\n${url}`);
|
|
280
|
+
if (instructions) console.log(instructions);
|
|
281
|
+
console.log();
|
|
282
|
+
},
|
|
283
|
+
onPrompt(p) {
|
|
284
|
+
return promptFn(`${p.message}${p.placeholder ? ` (${p.placeholder})` : ""}:`);
|
|
285
|
+
},
|
|
286
|
+
});
|
|
287
|
+
storage.saveApiKey(provider, apiKey);
|
|
288
|
+
console.log(`\nAPI key saved to ~/.omp/agent/agent.db`);
|
|
289
|
+
return;
|
|
290
|
+
}
|
|
291
|
+
|
|
274
292
|
case "minimax-code": {
|
|
275
293
|
const apiKey = await loginMiniMaxCode({
|
|
276
294
|
onAuth(info) {
|
|
@@ -347,6 +365,7 @@ Providers:
|
|
|
347
365
|
minimax-code-cn MiniMax Coding Plan (China)
|
|
348
366
|
cursor Cursor (Claude, GPT, etc.)
|
|
349
367
|
zenmux ZenMux
|
|
368
|
+
ollama-cloud Ollama Cloud
|
|
350
369
|
|
|
351
370
|
Examples:
|
|
352
371
|
bunx @oh-my-pi/pi-ai login # interactive provider selection
|
package/src/index.ts
CHANGED
|
@@ -16,6 +16,7 @@ export * from "./providers/google";
|
|
|
16
16
|
export * from "./providers/google-gemini-cli";
|
|
17
17
|
export * from "./providers/google-vertex";
|
|
18
18
|
export * from "./providers/kimi";
|
|
19
|
+
export * from "./providers/ollama";
|
|
19
20
|
export type { OpenAICodexResponsesOptions } from "./providers/openai-codex-responses";
|
|
20
21
|
export * from "./providers/openai-completions";
|
|
21
22
|
export * from "./providers/openai-responses";
|