@oh-my-pi/pi-ai 14.5.0 → 14.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,12 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [14.5.1] - 2026-04-26
6
+
7
+ ### Fixed
8
+
9
+ - Fixed NVIDIA NIM DeepSeek-V4 models leaking chat-template tool-call markers (e.g. `<|DSML|tool_calls|>`) into visible response text by stripping the special tokens from streamed `delta.content` ([#798](https://github.com/can1357/oh-my-pi/issues/798))
10
+
5
11
  ## [14.4.0] - 2026-04-26
6
12
 
7
13
  ### Added
@@ -89,6 +95,7 @@
89
95
  - Fixed shell execution failure responses to preserve all result fields when sanitizing, preventing truncated metadata in stream results
90
96
  - Fixed context overflow detection to recognize `model_context_window_exceeded` from z.ai / GLM providers, preventing infinite retry loops when context window is exceeded ([#638](https://github.com/can1357/oh-my-pi/issues/638))
91
97
  - Fixed strict tool schema enforcement to preserve `additionalProperties: false` and required keys for reused nested object schemas, preventing invalid `todo_write` function schemas in Codex/OpenAI requests
98
+ - Fixed GitHub Copilot reasoning regressions by preserving GPT-5.x / Claude 4.x reasoning controls instead of stripping them from requests ([#773](https://github.com/can1357/oh-my-pi/issues/773))
92
99
 
93
100
  ## [14.1.0] - 2026-04-11
94
101
 
package/README.md CHANGED
@@ -72,6 +72,7 @@ Unified LLM API with automatic model discovery, provider configuration, token an
72
72
  - **Qwen Portal** (supports `QWEN_OAUTH_TOKEN` or `QWEN_PORTAL_API_KEY`)
73
73
  - **Cloudflare AI Gateway** (requires `CLOUDFLARE_AI_GATEWAY_API_KEY` and provider-specific gateway base URL)
74
74
  - **Ollama** (local OpenAI-compatible runtime; optional `OLLAMA_API_KEY`)
75
+ - **Ollama Cloud** (hosted native Ollama API; requires `OLLAMA_CLOUD_API_KEY`)
75
76
  - **llama.cpp** (local OpenAI and Anthropic compatible inference server)
76
77
  - **vLLM** (OpenAI-compatible server; `VLLM_API_KEY` for secured deployments)
77
78
  - **GitHub Copilot** (requires OAuth, see below)
@@ -690,13 +691,14 @@ console.log(`Using ${model.name} via ${model.api} API`);
690
691
 
691
692
  ### Custom Models
692
693
 
693
- You can create custom models for local inference servers or custom endpoints:
694
- For Ollama, `OLLAMA_API_KEY` is optional and mainly needed for authenticated/self-hosted gateways.
694
+ You can create custom models for local inference servers or custom endpoints.
695
+
696
+ For local Ollama, `OLLAMA_API_KEY` is optional and mainly needed for authenticated/self-hosted gateways. `ollama` remains the local OpenAI-compatible runtime integration.
695
697
 
696
698
  ```typescript
697
699
  import { Model, stream } from "@oh-my-pi/pi-ai";
698
700
 
699
- // Example: Ollama using OpenAI-compatible API
701
+ // Example: local Ollama using the OpenAI-compatible API
700
702
  const ollamaModel: Model<"openai-completions"> = {
701
703
  id: "llama-3.1-8b",
702
704
  name: "Llama 3.1 8B (Ollama)",
@@ -710,6 +712,28 @@ const ollamaModel: Model<"openai-completions"> = {
710
712
  maxTokens: 32000,
711
713
  };
712
714
 
715
+ const localResponse = await stream(ollamaModel, context, {
716
+ apiKey: process.env.OLLAMA_API_KEY, // Optional; local Ollama usually runs without auth
717
+ });
718
+
719
+ // Example: Ollama Cloud using the native /api/chat transport
720
+ const ollamaCloudModel: Model<"ollama-chat"> = {
721
+ id: "gpt-oss:120b",
722
+ name: "GPT OSS 120B (Ollama Cloud)",
723
+ api: "ollama-chat",
724
+ provider: "ollama-cloud",
725
+ baseUrl: "https://ollama.com",
726
+ reasoning: true,
727
+ input: ["text", "image"],
728
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
729
+ contextWindow: 262144,
730
+ maxTokens: 8192,
731
+ };
732
+
733
+ const cloudResponse = await stream(ollamaCloudModel, context, {
734
+ apiKey: process.env.OLLAMA_CLOUD_API_KEY,
735
+ });
736
+
713
737
  // Example: LiteLLM proxy with explicit compat settings
714
738
  const litellmModel: Model<"openai-completions"> = {
715
739
  id: "gpt-4o",
@@ -744,11 +768,6 @@ const proxyModel: Model<"anthropic-messages"> = {
744
768
  "X-Custom-Auth": "bearer-token-here",
745
769
  },
746
770
  };
747
-
748
- // Use the custom model
749
- const response = await stream(ollamaModel, context, {
750
- apiKey: process.env.OLLAMA_API_KEY, // Optional; local Ollama usually runs without auth
751
- });
752
771
  ```
753
772
 
754
773
  ### OpenAI Compatibility Settings
@@ -928,6 +947,7 @@ In Node.js environments, you can set environment variables to avoid passing API
928
947
  | OpenRouter | `OPENROUTER_API_KEY` |
929
948
  | LiteLLM | `LITELLM_API_KEY` |
930
949
  | Ollama | `OLLAMA_API_KEY` (optional for local deployments) |
950
+ | Ollama Cloud | `OLLAMA_CLOUD_API_KEY` |
931
951
  | Qwen Portal | `QWEN_OAUTH_TOKEN` or `QWEN_PORTAL_API_KEY` |
932
952
  | zAI | `ZAI_API_KEY` |
933
953
  | MiniMax Code | `MINIMAX_CODE_API_KEY` (international) or `MINIMAX_CODE_CN_API_KEY` (China) |
@@ -957,7 +977,8 @@ Provider endpoint defaults for the current OpenAI-compatible integrations:
957
977
  - ZenMux (OpenAI): `https://zenmux.ai/api/v1`
958
978
  - ZenMux (Anthropic models): `https://zenmux.ai/api/anthropic`
959
979
  - vLLM: `http://127.0.0.1:8000/v1`
960
- - Ollama: local OpenAI-compatible runtime
980
+ - Ollama: local OpenAI-compatible runtime (`http://127.0.0.1:11434/v1`)
981
+ - Ollama Cloud: native Ollama API host (`https://ollama.com/api`, configured here as base URL `https://ollama.com`)
961
982
  - LiteLLM: `http://localhost:4000/v1`
962
983
  - Cloudflare AI Gateway: `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic`
963
984
  - Qwen Portal: `https://portal.qwen.ai/v1`
@@ -1049,7 +1070,7 @@ Credentials are saved to `agent.db` in the agent directory. `/login qianfan` ope
1049
1070
 
1050
1071
  `login` supports OAuth providers (Anthropic, OpenAI Codex, GitHub Copilot, Gemini CLI, Antigravity) and API-key onboarding flows.
1051
1072
 
1052
- For the current OpenAI-compatible integrations, API-key onboarding covers Together, Moonshot, Qianfan, NVIDIA, NanoGPT, Hugging Face, Venice, Xiaomi, vLLM, LiteLLM, Cloudflare AI Gateway, and Qwen Portal. Ollama is typically local and unauthenticated; set `OLLAMA_API_KEY` only when your Ollama deployment enforces bearer auth.
1073
+ For the current API-key onboarding flows, the library covers Together, Moonshot, Qianfan, NVIDIA, NanoGPT, Hugging Face, Venice, Xiaomi, vLLM, LiteLLM, Cloudflare AI Gateway, Qwen Portal, and Ollama Cloud. Ollama remains the local runtime integration; set `OLLAMA_API_KEY` only when your local or self-hosted deployment enforces bearer auth.
1053
1074
 
1054
1075
  ### Programmatic OAuth
1055
1076
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "14.5.0",
4
+ "version": "14.5.2",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://github.com/can1357/oh-my-pi",
7
7
  "author": "Can Boluk",
@@ -46,8 +46,8 @@
46
46
  "@aws-sdk/credential-provider-node": "^3.972.36",
47
47
  "@bufbuild/protobuf": "^2.12.0",
48
48
  "@google/genai": "^1.50.1",
49
- "@oh-my-pi/pi-natives": "14.5.0",
50
- "@oh-my-pi/pi-utils": "14.5.0",
49
+ "@oh-my-pi/pi-natives": "14.5.2",
50
+ "@oh-my-pi/pi-utils": "14.5.2",
51
51
  "@sinclair/typebox": "^0.34.49",
52
52
  "@smithy/node-http-handler": "^4.6.1",
53
53
  "ajv": "^8.20.0",
@@ -58,7 +58,7 @@
58
58
  "zod": "4.3.6"
59
59
  },
60
60
  "devDependencies": {
61
- "@types/bun": "^1.3.13"
61
+ "@types/bun": "^1.3"
62
62
  },
63
63
  "engines": {
64
64
  "bun": ">=1.3.7"
@@ -24,6 +24,7 @@ const BUILTIN_APIS = new Set<KnownApi>([
24
24
  "google-generative-ai",
25
25
  "google-gemini-cli",
26
26
  "google-vertex",
27
+ "ollama-chat",
27
28
  "cursor-agent",
28
29
  ]);
29
30
 
@@ -51,6 +51,7 @@ import { loginMoonshot } from "./utils/oauth/moonshot";
51
51
  import { loginNanoGPT } from "./utils/oauth/nanogpt";
52
52
  import { loginNvidia } from "./utils/oauth/nvidia";
53
53
  import { loginOllama } from "./utils/oauth/ollama";
54
+ import { loginOllamaCloud } from "./utils/oauth/ollama-cloud";
54
55
  import { loginOpenAICodex } from "./utils/oauth/openai-codex";
55
56
  import { loginOpenCode } from "./utils/oauth/opencode";
56
57
  import { loginParallel } from "./utils/oauth/parallel";
@@ -838,6 +839,11 @@ export class AuthStorage {
838
839
  await saveApiKeyCredential(apiKey);
839
840
  return;
840
841
  }
842
+ case "ollama-cloud": {
843
+ const apiKey = await loginOllamaCloud(ctrl);
844
+ await saveApiKeyCredential(apiKey);
845
+ return;
846
+ }
841
847
  case "cerebras": {
842
848
  const apiKey = await loginCerebras(ctrl);
843
849
  await saveApiKeyCredential(apiKey);
package/src/cli.ts CHANGED
@@ -12,6 +12,7 @@ import { loginKilo } from "./utils/oauth/kilo";
12
12
  import { loginKimi } from "./utils/oauth/kimi";
13
13
  import { loginMiniMaxCode, loginMiniMaxCodeCn } from "./utils/oauth/minimax-code";
14
14
  import { loginNanoGPT } from "./utils/oauth/nanogpt";
15
+ import { loginOllamaCloud } from "./utils/oauth/ollama-cloud";
15
16
  import { loginOpenAICodex } from "./utils/oauth/openai-codex";
16
17
  import { loginParallel } from "./utils/oauth/parallel";
17
18
  import { loginTavily } from "./utils/oauth/tavily";
@@ -271,6 +272,23 @@ async function login(provider: OAuthProvider): Promise<void> {
271
272
  console.log(`\nAPI key saved to ~/.omp/agent/agent.db`);
272
273
  return;
273
274
  }
275
+ case "ollama-cloud": {
276
+ const apiKey = await loginOllamaCloud({
277
+ onAuth(info) {
278
+ const { url, instructions } = info;
279
+ console.log(`\nOpen this URL in your browser:\n${url}`);
280
+ if (instructions) console.log(instructions);
281
+ console.log();
282
+ },
283
+ onPrompt(p) {
284
+ return promptFn(`${p.message}${p.placeholder ? ` (${p.placeholder})` : ""}:`);
285
+ },
286
+ });
287
+ storage.saveApiKey(provider, apiKey);
288
+ console.log(`\nAPI key saved to ~/.omp/agent/agent.db`);
289
+ return;
290
+ }
291
+
274
292
  case "minimax-code": {
275
293
  const apiKey = await loginMiniMaxCode({
276
294
  onAuth(info) {
@@ -347,6 +365,7 @@ Providers:
347
365
  minimax-code-cn MiniMax Coding Plan (China)
348
366
  cursor Cursor (Claude, GPT, etc.)
349
367
  zenmux ZenMux
368
+ ollama-cloud Ollama Cloud
350
369
 
351
370
  Examples:
352
371
  bunx @oh-my-pi/pi-ai login # interactive provider selection
package/src/index.ts CHANGED
@@ -16,6 +16,7 @@ export * from "./providers/google";
16
16
  export * from "./providers/google-gemini-cli";
17
17
  export * from "./providers/google-vertex";
18
18
  export * from "./providers/kimi";
19
+ export * from "./providers/ollama";
19
20
  export type { OpenAICodexResponsesOptions } from "./providers/openai-codex-responses";
20
21
  export * from "./providers/openai-completions";
21
22
  export * from "./providers/openai-responses";
@@ -158,7 +158,7 @@ export function applyGeneratedModelPolicies(models: ApiModel<Api>[]): void {
158
158
  *
159
159
  * When a model's context is exhausted, the agent can promote to a sibling
160
160
  * model with a larger context window on the same provider:
161
- * - `-spark` variants promote to `gpt-5.5`.
161
+ * - `codex-spark` variants promote to `gpt-5.5`.
162
162
  * - `gpt-5.5` (270K input) promotes to `gpt-5.4` (1M input).
163
163
  */
164
164
  export function linkOpenAIPromotionTargets(models: ApiModel<Api>[]): void {
@@ -472,6 +472,9 @@ function inferFallbackEfforts<TApi extends Api>(model: ApiModel<TApi>): readonly
472
472
  if (model.api === "anthropic-messages") {
473
473
  return DEFAULT_REASONING_EFFORTS_WITH_XHIGH;
474
474
  }
475
+ if (model.name.includes("deepseek-v4")) {
476
+ return DEFAULT_REASONING_EFFORTS_WITH_XHIGH;
477
+ }
475
478
  if (model.api === "bedrock-converse-stream") {
476
479
  return DEFAULT_REASONING_EFFORTS;
477
480
  }