@oh-my-pi/pi-ai 14.5.1 → 14.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -95,6 +95,7 @@
95
95
  - Fixed shell execution failure responses to preserve all result fields when sanitizing, preventing truncated metadata in stream results
96
96
  - Fixed context overflow detection to recognize `model_context_window_exceeded` from z.ai / GLM providers, preventing infinite retry loops when context window is exceeded ([#638](https://github.com/can1357/oh-my-pi/issues/638))
97
97
  - Fixed strict tool schema enforcement to preserve `additionalProperties: false` and required keys for reused nested object schemas, preventing invalid `todo_write` function schemas in Codex/OpenAI requests
98
+ - Fixed GitHub Copilot reasoning regressions by preserving GPT-5.x / Claude 4.x reasoning controls instead of stripping them from requests ([#773](https://github.com/can1357/oh-my-pi/issues/773))
98
99
 
99
100
  ## [14.1.0] - 2026-04-11
100
101
 
package/README.md CHANGED
@@ -72,6 +72,7 @@ Unified LLM API with automatic model discovery, provider configuration, token an
72
72
  - **Qwen Portal** (supports `QWEN_OAUTH_TOKEN` or `QWEN_PORTAL_API_KEY`)
73
73
  - **Cloudflare AI Gateway** (requires `CLOUDFLARE_AI_GATEWAY_API_KEY` and provider-specific gateway base URL)
74
74
  - **Ollama** (local OpenAI-compatible runtime; optional `OLLAMA_API_KEY`)
75
+ - **Ollama Cloud** (hosted native Ollama API; requires `OLLAMA_CLOUD_API_KEY`)
75
76
  - **llama.cpp** (local OpenAI and Anthropic compatible inference server)
76
77
  - **vLLM** (OpenAI-compatible server; `VLLM_API_KEY` for secured deployments)
77
78
  - **GitHub Copilot** (requires OAuth, see below)
@@ -690,13 +691,14 @@ console.log(`Using ${model.name} via ${model.api} API`);
690
691
 
691
692
  ### Custom Models
692
693
 
693
- You can create custom models for local inference servers or custom endpoints:
694
- For Ollama, `OLLAMA_API_KEY` is optional and mainly needed for authenticated/self-hosted gateways.
694
+ You can create custom models for local inference servers or custom endpoints.
695
+
696
+ For local Ollama, `OLLAMA_API_KEY` is optional and mainly needed for authenticated/self-hosted gateways. `ollama` remains the local OpenAI-compatible runtime integration.
695
697
 
696
698
  ```typescript
697
699
  import { Model, stream } from "@oh-my-pi/pi-ai";
698
700
 
699
- // Example: Ollama using OpenAI-compatible API
701
+ // Example: local Ollama using the OpenAI-compatible API
700
702
  const ollamaModel: Model<"openai-completions"> = {
701
703
  id: "llama-3.1-8b",
702
704
  name: "Llama 3.1 8B (Ollama)",
@@ -710,6 +712,28 @@ const ollamaModel: Model<"openai-completions"> = {
710
712
  maxTokens: 32000,
711
713
  };
712
714
 
715
+ const localResponse = await stream(ollamaModel, context, {
716
+ apiKey: process.env.OLLAMA_API_KEY, // Optional; local Ollama usually runs without auth
717
+ });
718
+
719
+ // Example: Ollama Cloud using the native /api/chat transport
720
+ const ollamaCloudModel: Model<"ollama-chat"> = {
721
+ id: "gpt-oss:120b",
722
+ name: "GPT OSS 120B (Ollama Cloud)",
723
+ api: "ollama-chat",
724
+ provider: "ollama-cloud",
725
+ baseUrl: "https://ollama.com",
726
+ reasoning: true,
727
+ input: ["text", "image"],
728
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
729
+ contextWindow: 262144,
730
+ maxTokens: 8192,
731
+ };
732
+
733
+ const cloudResponse = await stream(ollamaCloudModel, context, {
734
+ apiKey: process.env.OLLAMA_CLOUD_API_KEY,
735
+ });
736
+
713
737
  // Example: LiteLLM proxy with explicit compat settings
714
738
  const litellmModel: Model<"openai-completions"> = {
715
739
  id: "gpt-4o",
@@ -744,11 +768,6 @@ const proxyModel: Model<"anthropic-messages"> = {
744
768
  "X-Custom-Auth": "bearer-token-here",
745
769
  },
746
770
  };
747
-
748
- // Use the custom model
749
- const response = await stream(ollamaModel, context, {
750
- apiKey: process.env.OLLAMA_API_KEY, // Optional; local Ollama usually runs without auth
751
- });
752
771
  ```
753
772
 
754
773
  ### OpenAI Compatibility Settings
@@ -928,6 +947,7 @@ In Node.js environments, you can set environment variables to avoid passing API
928
947
  | OpenRouter | `OPENROUTER_API_KEY` |
929
948
  | LiteLLM | `LITELLM_API_KEY` |
930
949
  | Ollama | `OLLAMA_API_KEY` (optional for local deployments) |
950
+ | Ollama Cloud | `OLLAMA_CLOUD_API_KEY` |
931
951
  | Qwen Portal | `QWEN_OAUTH_TOKEN` or `QWEN_PORTAL_API_KEY` |
932
952
  | zAI | `ZAI_API_KEY` |
933
953
  | MiniMax Code | `MINIMAX_CODE_API_KEY` (international) or `MINIMAX_CODE_CN_API_KEY` (China) |
@@ -957,7 +977,8 @@ Provider endpoint defaults for the current OpenAI-compatible integrations:
957
977
  - ZenMux (OpenAI): `https://zenmux.ai/api/v1`
958
978
  - ZenMux (Anthropic models): `https://zenmux.ai/api/anthropic`
959
979
  - vLLM: `http://127.0.0.1:8000/v1`
960
- - Ollama: local OpenAI-compatible runtime
980
+ - Ollama: local OpenAI-compatible runtime (`http://127.0.0.1:11434/v1`)
981
+ - Ollama Cloud: native Ollama API host (`https://ollama.com/api`, configured here as base URL `https://ollama.com`)
961
982
  - LiteLLM: `http://localhost:4000/v1`
962
983
  - Cloudflare AI Gateway: `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic`
963
984
  - Qwen Portal: `https://portal.qwen.ai/v1`
@@ -1049,7 +1070,7 @@ Credentials are saved to `agent.db` in the agent directory. `/login qianfan` ope
1049
1070
 
1050
1071
  `login` supports OAuth providers (Anthropic, OpenAI Codex, GitHub Copilot, Gemini CLI, Antigravity) and API-key onboarding flows.
1051
1072
 
1052
- For the current OpenAI-compatible integrations, API-key onboarding covers Together, Moonshot, Qianfan, NVIDIA, NanoGPT, Hugging Face, Venice, Xiaomi, vLLM, LiteLLM, Cloudflare AI Gateway, and Qwen Portal. Ollama is typically local and unauthenticated; set `OLLAMA_API_KEY` only when your Ollama deployment enforces bearer auth.
1073
+ For the current API-key onboarding flows, the library covers Together, Moonshot, Qianfan, NVIDIA, NanoGPT, Hugging Face, Venice, Xiaomi, vLLM, LiteLLM, Cloudflare AI Gateway, Qwen Portal, and Ollama Cloud. Ollama remains the local runtime integration; set `OLLAMA_API_KEY` only when your local or self-hosted deployment enforces bearer auth.
1053
1074
 
1054
1075
  ### Programmatic OAuth
1055
1076
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "14.5.1",
4
+ "version": "14.5.2",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://github.com/can1357/oh-my-pi",
7
7
  "author": "Can Boluk",
@@ -46,8 +46,8 @@
46
46
  "@aws-sdk/credential-provider-node": "^3.972.36",
47
47
  "@bufbuild/protobuf": "^2.12.0",
48
48
  "@google/genai": "^1.50.1",
49
- "@oh-my-pi/pi-natives": "14.5.1",
50
- "@oh-my-pi/pi-utils": "14.5.1",
49
+ "@oh-my-pi/pi-natives": "14.5.2",
50
+ "@oh-my-pi/pi-utils": "14.5.2",
51
51
  "@sinclair/typebox": "^0.34.49",
52
52
  "@smithy/node-http-handler": "^4.6.1",
53
53
  "ajv": "^8.20.0",
@@ -58,7 +58,7 @@
58
58
  "zod": "4.3.6"
59
59
  },
60
60
  "devDependencies": {
61
- "@types/bun": "^1.3.13"
61
+ "@types/bun": "^1.3"
62
62
  },
63
63
  "engines": {
64
64
  "bun": ">=1.3.7"
@@ -24,6 +24,7 @@ const BUILTIN_APIS = new Set<KnownApi>([
24
24
  "google-generative-ai",
25
25
  "google-gemini-cli",
26
26
  "google-vertex",
27
+ "ollama-chat",
27
28
  "cursor-agent",
28
29
  ]);
29
30
 
@@ -51,6 +51,7 @@ import { loginMoonshot } from "./utils/oauth/moonshot";
51
51
  import { loginNanoGPT } from "./utils/oauth/nanogpt";
52
52
  import { loginNvidia } from "./utils/oauth/nvidia";
53
53
  import { loginOllama } from "./utils/oauth/ollama";
54
+ import { loginOllamaCloud } from "./utils/oauth/ollama-cloud";
54
55
  import { loginOpenAICodex } from "./utils/oauth/openai-codex";
55
56
  import { loginOpenCode } from "./utils/oauth/opencode";
56
57
  import { loginParallel } from "./utils/oauth/parallel";
@@ -838,6 +839,11 @@ export class AuthStorage {
838
839
  await saveApiKeyCredential(apiKey);
839
840
  return;
840
841
  }
842
+ case "ollama-cloud": {
843
+ const apiKey = await loginOllamaCloud(ctrl);
844
+ await saveApiKeyCredential(apiKey);
845
+ return;
846
+ }
841
847
  case "cerebras": {
842
848
  const apiKey = await loginCerebras(ctrl);
843
849
  await saveApiKeyCredential(apiKey);
package/src/cli.ts CHANGED
@@ -12,6 +12,7 @@ import { loginKilo } from "./utils/oauth/kilo";
12
12
  import { loginKimi } from "./utils/oauth/kimi";
13
13
  import { loginMiniMaxCode, loginMiniMaxCodeCn } from "./utils/oauth/minimax-code";
14
14
  import { loginNanoGPT } from "./utils/oauth/nanogpt";
15
+ import { loginOllamaCloud } from "./utils/oauth/ollama-cloud";
15
16
  import { loginOpenAICodex } from "./utils/oauth/openai-codex";
16
17
  import { loginParallel } from "./utils/oauth/parallel";
17
18
  import { loginTavily } from "./utils/oauth/tavily";
@@ -271,6 +272,23 @@ async function login(provider: OAuthProvider): Promise<void> {
271
272
  console.log(`\nAPI key saved to ~/.omp/agent/agent.db`);
272
273
  return;
273
274
  }
275
+ case "ollama-cloud": {
276
+ const apiKey = await loginOllamaCloud({
277
+ onAuth(info) {
278
+ const { url, instructions } = info;
279
+ console.log(`\nOpen this URL in your browser:\n${url}`);
280
+ if (instructions) console.log(instructions);
281
+ console.log();
282
+ },
283
+ onPrompt(p) {
284
+ return promptFn(`${p.message}${p.placeholder ? ` (${p.placeholder})` : ""}:`);
285
+ },
286
+ });
287
+ storage.saveApiKey(provider, apiKey);
288
+ console.log(`\nAPI key saved to ~/.omp/agent/agent.db`);
289
+ return;
290
+ }
291
+
274
292
  case "minimax-code": {
275
293
  const apiKey = await loginMiniMaxCode({
276
294
  onAuth(info) {
@@ -347,6 +365,7 @@ Providers:
347
365
  minimax-code-cn MiniMax Coding Plan (China)
348
366
  cursor Cursor (Claude, GPT, etc.)
349
367
  zenmux ZenMux
368
+ ollama-cloud Ollama Cloud
350
369
 
351
370
  Examples:
352
371
  bunx @oh-my-pi/pi-ai login # interactive provider selection
package/src/index.ts CHANGED
@@ -16,6 +16,7 @@ export * from "./providers/google";
16
16
  export * from "./providers/google-gemini-cli";
17
17
  export * from "./providers/google-vertex";
18
18
  export * from "./providers/kimi";
19
+ export * from "./providers/ollama";
19
20
  export type { OpenAICodexResponsesOptions } from "./providers/openai-codex-responses";
20
21
  export * from "./providers/openai-completions";
21
22
  export * from "./providers/openai-responses";
package/src/models.json CHANGED
@@ -42445,6 +42445,107 @@
42445
42445
  }
42446
42446
  }
42447
42447
  },
42448
+ "ollama-cloud": {
42449
+ "gemma4:31b": {
42450
+ "id": "gemma4:31b",
42451
+ "name": "Gemma 4",
42452
+ "api": "ollama-chat",
42453
+ "provider": "ollama-cloud",
42454
+ "baseUrl": "https://ollama.com",
42455
+ "reasoning": true,
42456
+ "input": [
42457
+ "text",
42458
+ "image"
42459
+ ],
42460
+ "cost": {
42461
+ "input": 0,
42462
+ "output": 0,
42463
+ "cacheRead": 0,
42464
+ "cacheWrite": 0
42465
+ },
42466
+ "contextWindow": 262144,
42467
+ "maxTokens": 16384,
42468
+ "thinking": {
42469
+ "mode": "effort",
42470
+ "minLevel": "minimal",
42471
+ "maxLevel": "high"
42472
+ }
42473
+ },
42474
+ "gpt-oss:120b": {
42475
+ "id": "gpt-oss:120b",
42476
+ "name": "GPT OSS (120B)",
42477
+ "api": "ollama-chat",
42478
+ "provider": "ollama-cloud",
42479
+ "baseUrl": "https://ollama.com",
42480
+ "reasoning": true,
42481
+ "input": [
42482
+ "text",
42483
+ "image"
42484
+ ],
42485
+ "cost": {
42486
+ "input": 0,
42487
+ "output": 0,
42488
+ "cacheRead": 0,
42489
+ "cacheWrite": 0
42490
+ },
42491
+ "contextWindow": 131072,
42492
+ "maxTokens": 16384,
42493
+ "thinking": {
42494
+ "mode": "effort",
42495
+ "minLevel": "minimal",
42496
+ "maxLevel": "high"
42497
+ }
42498
+ },
42499
+ "gpt-oss:20b": {
42500
+ "id": "gpt-oss:20b",
42501
+ "name": "GPT OSS (20B)",
42502
+ "api": "ollama-chat",
42503
+ "provider": "ollama-cloud",
42504
+ "baseUrl": "https://ollama.com",
42505
+ "reasoning": true,
42506
+ "input": [
42507
+ "text"
42508
+ ],
42509
+ "cost": {
42510
+ "input": 0,
42511
+ "output": 0,
42512
+ "cacheRead": 0,
42513
+ "cacheWrite": 0
42514
+ },
42515
+ "contextWindow": 131072,
42516
+ "maxTokens": 16384,
42517
+ "thinking": {
42518
+ "mode": "effort",
42519
+ "minLevel": "minimal",
42520
+ "maxLevel": "high"
42521
+ }
42522
+ },
42523
+ "qwen3-next:80b": {
42524
+ "id": "qwen3-next:80b",
42525
+ "name": "Qwen 3 Next (80B)",
42526
+ "api": "ollama-chat",
42527
+ "provider": "ollama-cloud",
42528
+ "baseUrl": "https://ollama.com",
42529
+ "reasoning": true,
42530
+ "input": [
42531
+ "text"
42532
+ ],
42533
+ "cost": {
42534
+ "input": 0,
42535
+ "output": 0,
42536
+ "cacheRead": 0,
42537
+ "cacheWrite": 0
42538
+ },
42539
+ "contextWindow": 262144,
42540
+ "maxTokens": 16384,
42541
+ "thinking": {
42542
+ "mode": "effort",
42543
+ "minLevel": "minimal",
42544
+ "maxLevel": "high"
42545
+ }
42546
+ }
42547
+ },
42548
+
42448
42549
  "qianfan": {
42449
42550
  "deepseek-v3.2": {
42450
42551
  "id": "deepseek-v3.2",
@@ -0,0 +1,38 @@
1
+ import { getBundledModels, getBundledProviders } from "../models";
2
+ import type { Api, Model } from "../types";
3
+
4
+ export function createBundledReferenceMap<TApi extends Api>(
5
+ provider: Parameters<typeof getBundledModels>[0],
6
+ ): Map<string, Model<TApi>> {
7
+ const references = new Map<string, Model<TApi>>();
8
+ for (const model of getBundledModels(provider)) {
9
+ references.set(model.id, model as Model<TApi>);
10
+ }
11
+ return references;
12
+ }
13
+
14
+ export function createReferenceResolver<TApi extends Api>(
15
+ providerRefs: Map<string, Model<TApi>>,
16
+ ): (modelId: string) => Model<TApi> | undefined {
17
+ const globalRefs = new Map<string, Model<Api>>();
18
+ for (const provider of getBundledProviders()) {
19
+ for (const model of getBundledModels(provider as Parameters<typeof getBundledModels>[0])) {
20
+ const candidate = model as Model<Api>;
21
+ const existing = globalRefs.get(candidate.id);
22
+ if (!existing) {
23
+ globalRefs.set(candidate.id, candidate);
24
+ } else if (candidate.contextWindow !== existing.contextWindow) {
25
+ if (candidate.contextWindow > existing.contextWindow) {
26
+ globalRefs.set(candidate.id, candidate);
27
+ }
28
+ } else if (candidate.maxTokens !== existing.maxTokens) {
29
+ if (candidate.maxTokens > existing.maxTokens) {
30
+ globalRefs.set(candidate.id, candidate);
31
+ }
32
+ } else if (existing.provider !== "openai" && candidate.provider === "openai") {
33
+ globalRefs.set(candidate.id, candidate);
34
+ }
35
+ }
36
+ }
37
+ return (modelId: string) => providerRefs.get(modelId) ?? (globalRefs.get(modelId) as Model<TApi> | undefined);
38
+ }
@@ -7,6 +7,7 @@ import type { ModelManagerOptions } from "../model-manager";
7
7
  import type { Api, KnownProvider } from "../types";
8
8
  import type { OAuthProvider } from "../utils/oauth/types";
9
9
  import { googleModelManagerOptions } from "./google";
10
+ import { ollamaCloudModelManagerOptions } from "./ollama";
10
11
  import {
11
12
  alibabaCodingPlanModelManagerOptions,
12
13
  anthropicModelManagerOptions,
@@ -184,6 +185,12 @@ export const PROVIDER_DESCRIPTORS: readonly ProviderDescriptor[] = [
184
185
  catalog("Ollama", ["OLLAMA_API_KEY"]),
185
186
  { allowUnauthenticated: true },
186
187
  ),
188
+ catalogDescriptor(
189
+ "ollama-cloud",
190
+ "gpt-oss:120b",
191
+ config => ollamaCloudModelManagerOptions(config),
192
+ catalog("Ollama Cloud", ["OLLAMA_CLOUD_API_KEY"], { oauthProvider: "ollama-cloud" }),
193
+ ),
187
194
  catalogDescriptor(
188
195
  "cloudflare-ai-gateway",
189
196
  "claude-sonnet-4-5",
@@ -1,4 +1,5 @@
1
1
  export * from "./descriptors";
2
2
  export * from "./google";
3
+ export * from "./ollama";
3
4
  export * from "./openai-compat";
4
5
  export * from "./special";
@@ -0,0 +1,149 @@
1
+ import type { ModelManagerOptions } from "../model-manager";
2
+ import { Effort } from "../model-thinking";
3
+ import type { ThinkingConfig } from "../types";
4
+ import { createBundledReferenceMap, createReferenceResolver } from "./bundled-references";
5
+
6
+ export interface OllamaCloudModelManagerConfig {
7
+ apiKey?: string;
8
+ baseUrl?: string;
9
+ }
10
+
11
+ type OllamaTagEntry = {
12
+ name?: string;
13
+ model?: string;
14
+ };
15
+
16
+ type OllamaShowResponse = {
17
+ capabilities?: string[];
18
+ model_info?: Record<string, unknown>;
19
+ };
20
+
21
+ function trimTrailingSlash(value: string): string {
22
+ return value.endsWith("/") ? value.slice(0, -1) : value;
23
+ }
24
+
25
+ export function normalizeOllamaCloudBaseUrl(baseUrl?: string): string {
26
+ const value = baseUrl?.trim();
27
+ if (!value) {
28
+ return "https://ollama.com";
29
+ }
30
+ const trimmed = trimTrailingSlash(value);
31
+ return trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
32
+ }
33
+
34
+ function createCloudHeaders(apiKey: string): Record<string, string> {
35
+ return {
36
+ Accept: "application/json",
37
+ Authorization: `Bearer ${apiKey}`,
38
+ };
39
+ }
40
+
41
+ function getContextWindow(modelInfo: Record<string, unknown> | undefined): number | undefined {
42
+ if (!modelInfo) {
43
+ return undefined;
44
+ }
45
+ for (const [key, value] of Object.entries(modelInfo)) {
46
+ if (typeof value !== "number") {
47
+ continue;
48
+ }
49
+ if (key.endsWith(".context_length") || key.endsWith(".num_ctx") || key.endsWith(".context_window")) {
50
+ return value;
51
+ }
52
+ }
53
+ }
54
+
55
+ function getThinkingConfig(capabilities: string[] | undefined): ThinkingConfig | undefined {
56
+ if (!capabilities?.includes("thinking")) {
57
+ return undefined;
58
+ }
59
+ return {
60
+ mode: "effort",
61
+ minLevel: Effort.Minimal,
62
+ maxLevel: Effort.High,
63
+ };
64
+ }
65
+
66
+ async function fetchShowMetadata(
67
+ baseUrl: string,
68
+ apiKey: string,
69
+ model: string,
70
+ ): Promise<OllamaShowResponse | undefined> {
71
+ const response = await fetch(`${baseUrl}/api/show`, {
72
+ method: "POST",
73
+ headers: {
74
+ ...createCloudHeaders(apiKey),
75
+ "Content-Type": "application/json",
76
+ },
77
+ body: JSON.stringify({ model }),
78
+ });
79
+ if (!response.ok) {
80
+ return undefined;
81
+ }
82
+ return (await response.json()) as OllamaShowResponse;
83
+ }
84
+
85
+ export function ollamaCloudModelManagerOptions(
86
+ config?: OllamaCloudModelManagerConfig,
87
+ ): ModelManagerOptions<"ollama-chat"> {
88
+ const apiKey = config?.apiKey;
89
+ const baseUrl = normalizeOllamaCloudBaseUrl(config?.baseUrl);
90
+ const resolveReference = createReferenceResolver(createBundledReferenceMap<"ollama-chat">("ollama-cloud"));
91
+ return {
92
+ providerId: "ollama-cloud",
93
+ fetchDynamicModels: async () => {
94
+ if (!apiKey) {
95
+ return [];
96
+ }
97
+ const response = await fetch(`${baseUrl}/api/tags`, {
98
+ method: "GET",
99
+ headers: createCloudHeaders(apiKey),
100
+ });
101
+ if (!response.ok) {
102
+ throw new Error(`HTTP ${response.status} from ${baseUrl}/api/tags`);
103
+ }
104
+ const payload = (await response.json()) as { models?: OllamaTagEntry[] };
105
+ const entries = payload.models ?? [];
106
+ const models = await Promise.all(
107
+ entries.map(async entry => {
108
+ const id = entry.model ?? entry.name;
109
+ if (!id) {
110
+ return undefined;
111
+ }
112
+ const reference = resolveReference(id);
113
+ let metadata: OllamaShowResponse | undefined;
114
+ try {
115
+ metadata = await fetchShowMetadata(baseUrl, apiKey, id);
116
+ } catch {
117
+ metadata = undefined;
118
+ }
119
+ const capabilities = metadata?.capabilities;
120
+ const contextWindow = getContextWindow(metadata?.model_info) ?? reference?.contextWindow ?? 128000;
121
+ const reasoning = capabilities ? capabilities.includes("thinking") : (reference?.reasoning ?? false);
122
+ const thinking = capabilities ? getThinkingConfig(capabilities) : reference?.thinking;
123
+ const input = capabilities
124
+ ? capabilities.includes("vision")
125
+ ? (["text", "image"] as Array<"text" | "image">)
126
+ : (["text"] as Array<"text">)
127
+ : ((reference?.input as Array<"text" | "image"> | undefined) ?? (["text"] as Array<"text">));
128
+ const resolvedName = entry.name && entry.name !== id ? entry.name : (reference?.name ?? id);
129
+ return {
130
+ id,
131
+ name: resolvedName,
132
+ api: "ollama-chat" as const,
133
+ provider: "ollama-cloud" as const,
134
+ baseUrl,
135
+ reasoning,
136
+ thinking,
137
+ input,
138
+ cost: reference?.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
139
+ contextWindow,
140
+ maxTokens: reference?.maxTokens ?? Math.min(contextWindow, 8192),
141
+ };
142
+ }),
143
+ );
144
+ return models
145
+ .filter((model): model is NonNullable<(typeof models)[number]> => model !== undefined)
146
+ .sort((left, right) => left.id.localeCompare(right.id));
147
+ },
148
+ };
149
+ }
@@ -1,5 +1,5 @@
1
1
  import type { ModelManagerOptions } from "../model-manager";
2
- import { getBundledModels, getBundledProviders } from "../models";
2
+ import { getBundledModels } from "../models";
3
3
  import type { Api, Model } from "../types";
4
4
  import { isAnthropicOAuthToken, isRecord, toNumber, toPositiveNumber } from "../utils";
5
5
  import {
@@ -8,6 +8,7 @@ import {
8
8
  type OpenAICompatibleModelRecord,
9
9
  } from "../utils/discovery/openai-compatible";
10
10
  import { getGitHubCopilotBaseUrl, OPENCODE_HEADERS, parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
11
+ import { createBundledReferenceMap, createReferenceResolver } from "./bundled-references";
11
12
 
12
13
  const MODELS_DEV_URL = "https://models.dev/api.json";
13
14
  const ANTHROPIC_BASE_URL = "https://api.anthropic.com/v1";
@@ -163,48 +164,6 @@ function mapWithBundledReference<TApi extends Api>(
163
164
  };
164
165
  }
165
166
 
166
- function createBundledReferenceMap<TApi extends Api>(
167
- provider: Parameters<typeof getBundledModels>[0],
168
- ): Map<string, Model<TApi>> {
169
- const references = new Map<string, Model<TApi>>();
170
- for (const model of getBundledModels(provider)) {
171
- references.set(model.id, model as Model<TApi>);
172
- }
173
- return references;
174
- }
175
-
176
- /**
177
- * Returns a lookup that resolves a model ID to a bundled reference, preferring
178
- * the provider-specific entry over a cross-provider fallback. The global fallback
179
- * picks the best entry across all providers (largest contextWindow, then maxTokens,
180
- * then canonical OpenAI), but proxy providers (Copilot, nanogpt, etc.) impose their
181
- * own limits that are typically lower than native provider limits, so the
182
- * provider-specific entry must win.
183
- */
184
- function createReferenceResolver<TApi extends Api>(
185
- providerRefs: Map<string, Model<TApi>>,
186
- ): (modelId: string) => Model<TApi> | undefined {
187
- const globalRefs = new Map<string, Model<Api>>();
188
- for (const provider of getBundledProviders()) {
189
- for (const model of getBundledModels(provider as Parameters<typeof getBundledModels>[0])) {
190
- const candidate = model as Model<Api>;
191
- const existing = globalRefs.get(candidate.id);
192
- if (!existing) {
193
- globalRefs.set(candidate.id, candidate);
194
- } else if (candidate.contextWindow !== existing.contextWindow) {
195
- if (candidate.contextWindow > existing.contextWindow) globalRefs.set(candidate.id, candidate);
196
- } else if (candidate.maxTokens !== existing.maxTokens) {
197
- if (candidate.maxTokens > existing.maxTokens) globalRefs.set(candidate.id, candidate);
198
- } else if (existing.provider !== "openai" && candidate.provider === "openai") {
199
- // When limits tie, prefer OpenAI as canonical so generic OpenAI-family
200
- // providers inherit OpenAI pricing/capabilities instead of proxy metadata.
201
- globalRefs.set(candidate.id, candidate);
202
- }
203
- }
204
- }
205
- return (modelId: string) => providerRefs.get(modelId) ?? (globalRefs.get(modelId) as Model<TApi> | undefined);
206
- }
207
-
208
167
  function normalizeAnthropicBaseUrl(baseUrl: string | undefined, fallback: string): string {
209
168
  const value = baseUrl?.trim();
210
169
  if (!value) {
@@ -1519,7 +1519,7 @@ function buildParams(
1519
1519
  );
1520
1520
  }
1521
1521
 
1522
- if (options?.thinkingEnabled && model.reasoning && model.provider !== "github-copilot") {
1522
+ if (options?.thinkingEnabled && model.reasoning) {
1523
1523
  const mode = model.thinking?.mode;
1524
1524
  const requestedEffort = options.reasoning;
1525
1525
  const effort =
@@ -1583,10 +1583,6 @@ function buildParams(
1583
1583
  params.system = systemBlocks;
1584
1584
  }
1585
1585
  disableThinkingIfToolChoiceForced(params);
1586
- if (model.provider === "github-copilot") {
1587
- delete params.thinking;
1588
- delete params.output_config;
1589
- }
1590
1586
  ensureMaxTokensForThinking(params, model);
1591
1587
  applyPromptCaching(params, cacheControl);
1592
1588
  enforceCacheControlLimit(params, 4);
@@ -0,0 +1,497 @@
1
+ import type { TSchema } from "@sinclair/typebox";
2
+ import { getEnvApiKey } from "../stream";
3
+ import type {
4
+ Api,
5
+ AssistantMessage,
6
+ Context,
7
+ DeveloperMessage,
8
+ Message,
9
+ Model,
10
+ StreamFunction,
11
+ StreamOptions,
12
+ Tool,
13
+ ToolChoice,
14
+ ToolResultMessage,
15
+ UserMessage,
16
+ } from "../types";
17
+ import { AssistantMessageEventStream } from "../utils/event-stream";
18
+ import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
19
+ import { parseStreamingJson } from "../utils/json-parse";
20
+ import { transformMessages } from "./transform-messages";
21
+
22
+ export interface OllamaChatOptions extends StreamOptions {
23
+ reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
24
+ toolChoice?: ToolChoice;
25
+ }
26
+
27
+ type OllamaFunctionTool = {
28
+ type: "function";
29
+ function: {
30
+ name: string;
31
+ description: string;
32
+ parameters: TSchema;
33
+ };
34
+ };
35
+
36
+ type OllamaMessage = {
37
+ role: "system" | "user" | "assistant" | "tool";
38
+ content: string;
39
+ images?: string[];
40
+ thinking?: string;
41
+ tool_calls?: Array<{
42
+ type: "function";
43
+ function: {
44
+ index?: number;
45
+ name: string;
46
+ arguments: Record<string, unknown>;
47
+ };
48
+ }>;
49
+ tool_name?: string;
50
+ };
51
+
52
+ type OllamaChatChunk = {
53
+ message?: {
54
+ role?: string;
55
+ content?: string;
56
+ thinking?: string;
57
+ tool_calls?: Array<{
58
+ type?: string;
59
+ function?: {
60
+ index?: number;
61
+ name?: string;
62
+ arguments?: Record<string, unknown> | string;
63
+ };
64
+ }>;
65
+ };
66
+ done?: boolean;
67
+ done_reason?: string;
68
+ prompt_eval_count?: number;
69
+ eval_count?: number;
70
+ };
71
+
72
+ type InternalToolCallBlock = AssistantMessage["content"][number] & {
73
+ type: "toolCall";
74
+ partialJson?: string;
75
+ };
76
+
77
+ function normalizeBaseUrl(baseUrl?: string): string {
78
+ const value = baseUrl?.trim();
79
+ if (!value) {
80
+ return "https://ollama.com";
81
+ }
82
+ const trimmed = value.endsWith("/") ? value.slice(0, -1) : value;
83
+ return trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
84
+ }
85
+
86
+ function mapReasoning(reasoning: OllamaChatOptions["reasoning"]): boolean | "low" | "medium" | "high" | undefined {
87
+ switch (reasoning) {
88
+ case "minimal":
89
+ case "low":
90
+ return "low";
91
+ case "medium":
92
+ return "medium";
93
+ case "high":
94
+ case "xhigh":
95
+ return "high";
96
+ default:
97
+ return undefined;
98
+ }
99
+ }
100
+
101
+ function mapToolChoice(toolChoice: ToolChoice | undefined): "auto" | "none" | "required" | undefined {
102
+ if (!toolChoice || toolChoice === "auto") {
103
+ return undefined;
104
+ }
105
+ if (toolChoice === "none") {
106
+ return "none";
107
+ }
108
+ if (toolChoice === "required" || toolChoice === "any") {
109
+ return "required";
110
+ }
111
+ if (typeof toolChoice === "object") {
112
+ return "required";
113
+ }
114
+ return undefined;
115
+ }
116
+
117
+ function toPlainContent(content: string | Array<{ type: "text" | "image"; text?: string; data?: string }>): {
118
+ content: string;
119
+ images?: string[];
120
+ } {
121
+ if (typeof content === "string") {
122
+ return { content };
123
+ }
124
+ const textParts: string[] = [];
125
+ const images: string[] = [];
126
+ for (const block of content) {
127
+ if (block.type === "text" && typeof block.text === "string") {
128
+ textParts.push(block.text);
129
+ }
130
+ if (block.type === "image" && typeof block.data === "string") {
131
+ images.push(block.data);
132
+ }
133
+ }
134
+ return {
135
+ content: textParts.join("\n"),
136
+ ...(images.length > 0 ? { images } : {}),
137
+ };
138
+ }
139
+
140
+ function convertMessage(message: Message): OllamaMessage {
141
+ if (message.role === "user") {
142
+ const converted = toPlainContent(message.content as UserMessage["content"]);
143
+ return { role: "user", ...converted };
144
+ }
145
+ if (message.role === "developer") {
146
+ const converted = toPlainContent(message.content as DeveloperMessage["content"]);
147
+ return { role: "system", ...converted };
148
+ }
149
+ if (message.role === "toolResult") {
150
+ const converted = toPlainContent(message.content as ToolResultMessage["content"]);
151
+ return {
152
+ role: "tool",
153
+ tool_name: message.toolName,
154
+ ...converted,
155
+ };
156
+ }
157
+ const text: string[] = [];
158
+ const thinking: string[] = [];
159
+ const toolCalls: NonNullable<OllamaMessage["tool_calls"]> = [];
160
+ for (const block of message.content) {
161
+ if (block.type === "text") {
162
+ text.push(block.text);
163
+ continue;
164
+ }
165
+ if (block.type === "thinking") {
166
+ thinking.push(block.thinking);
167
+ continue;
168
+ }
169
+ if (block.type === "toolCall") {
170
+ toolCalls.push({
171
+ type: "function",
172
+ function: {
173
+ name: block.name,
174
+ arguments: block.arguments,
175
+ },
176
+ });
177
+ }
178
+ }
179
+ return {
180
+ role: "assistant",
181
+ content: text.join("\n"),
182
+ ...(thinking.length > 0 ? { thinking: thinking.join("\n") } : {}),
183
+ ...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}),
184
+ };
185
+ }
186
+
187
+ function convertMessages(model: Model<"ollama-chat">, context: Context): OllamaMessage[] {
188
+ const messages: Message[] = [];
189
+ if (context.systemPrompt) {
190
+ messages.push({
191
+ role: "developer",
192
+ content: context.systemPrompt,
193
+ timestamp: Date.now(),
194
+ });
195
+ }
196
+ messages.push(...context.messages);
197
+ return transformMessages(messages, model).map(convertMessage);
198
+ }
199
+
200
+ function convertTools(tools: Tool[] | undefined): OllamaFunctionTool[] | undefined {
201
+ if (!tools || tools.length === 0) {
202
+ return undefined;
203
+ }
204
+ return tools.map(tool => ({
205
+ type: "function",
206
+ function: {
207
+ name: tool.name,
208
+ description: tool.description,
209
+ parameters: tool.parameters,
210
+ },
211
+ }));
212
+ }
213
+
214
+ function createChatBody(model: Model<"ollama-chat">, context: Context, options: OllamaChatOptions | undefined) {
215
+ const think = mapReasoning(options?.reasoning);
216
+ const toolChoice = mapToolChoice(options?.toolChoice);
217
+ return {
218
+ model: model.id,
219
+ messages: convertMessages(model, context),
220
+ ...(convertTools(context.tools) ? { tools: convertTools(context.tools) } : {}),
221
+ ...(think !== undefined ? { think } : {}),
222
+ ...(toolChoice !== undefined ? { tool_choice: toolChoice } : {}),
223
+ ...(options?.maxTokens !== undefined ? { options: { num_predict: options.maxTokens } } : {}),
224
+ stream: true,
225
+ };
226
+ }
227
+
228
+ async function* iterateNdjson(stream: ReadableStream<Uint8Array>): AsyncGenerator<OllamaChatChunk> {
229
+ const reader = stream.getReader();
230
+ const decoder = new TextDecoder();
231
+ let buffer = "";
232
+ while (true) {
233
+ const { done, value } = await reader.read();
234
+ if (done) {
235
+ break;
236
+ }
237
+ buffer += decoder.decode(value, { stream: true });
238
+ while (true) {
239
+ const newlineIndex = buffer.indexOf("\n");
240
+ if (newlineIndex < 0) {
241
+ break;
242
+ }
243
+ const line = buffer.slice(0, newlineIndex).trim();
244
+ buffer = buffer.slice(newlineIndex + 1);
245
+ if (!line) {
246
+ continue;
247
+ }
248
+ yield JSON.parse(line) as OllamaChatChunk;
249
+ }
250
+ }
251
+ buffer += decoder.decode();
252
+ const tail = buffer.trim();
253
+ if (tail) {
254
+ yield JSON.parse(tail) as OllamaChatChunk;
255
+ }
256
+ }
257
+
258
+ function createEmptyOutput(model: Model<"ollama-chat">): AssistantMessage {
259
+ return {
260
+ role: "assistant",
261
+ content: [],
262
+ api: "ollama-chat" as Api,
263
+ provider: model.provider,
264
+ model: model.id,
265
+ usage: {
266
+ input: 0,
267
+ output: 0,
268
+ cacheRead: 0,
269
+ cacheWrite: 0,
270
+ totalTokens: 0,
271
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
272
+ },
273
+ stopReason: "stop",
274
+ timestamp: Date.now(),
275
+ };
276
+ }
277
+
278
+ function endThinkingBlock(stream: AssistantMessageEventStream, output: AssistantMessage, index: number): void {
279
+ const block = output.content[index];
280
+ if (block?.type === "thinking") {
281
+ stream.push({ type: "thinking_end", contentIndex: index, content: block.thinking, partial: output });
282
+ }
283
+ }
284
+
285
+ function endTextBlock(stream: AssistantMessageEventStream, output: AssistantMessage, index: number): void {
286
+ const block = output.content[index];
287
+ if (block?.type === "text") {
288
+ stream.push({ type: "text_end", contentIndex: index, content: block.text, partial: output });
289
+ }
290
+ }
291
+
292
+ function endToolCallBlock(stream: AssistantMessageEventStream, output: AssistantMessage, index: number): void {
293
+ const block = output.content[index];
294
+ if (block?.type !== "toolCall") {
295
+ return;
296
+ }
297
+ const toolCall = block as InternalToolCallBlock;
298
+ if (toolCall.partialJson) {
299
+ toolCall.arguments = parseStreamingJson<Record<string, unknown>>(toolCall.partialJson);
300
+ delete toolCall.partialJson;
301
+ }
302
+ stream.push({ type: "toolcall_end", contentIndex: index, toolCall, partial: output });
303
+ }
304
+
305
+ function mapDoneReason(doneReason: string | undefined, output: AssistantMessage): AssistantMessage["stopReason"] {
306
+ if (doneReason === "length") {
307
+ return "length";
308
+ }
309
+ if (doneReason === "tool_calls") {
310
+ return "toolUse";
311
+ }
312
+ if (doneReason === undefined && output.content.some(block => block.type === "toolCall")) {
313
+ return "toolUse";
314
+ }
315
+ return "stop";
316
+ }
317
+
318
+ export const streamOllama: StreamFunction<"ollama-chat"> = (
319
+ model: Model<"ollama-chat">,
320
+ context: Context,
321
+ options: OllamaChatOptions,
322
+ ): AssistantMessageEventStream => {
323
+ const stream = new AssistantMessageEventStream();
324
+ void (async () => {
325
+ const startTime = Date.now();
326
+ let firstTokenTime: number | undefined;
327
+ const output = createEmptyOutput(model);
328
+ let rawRequestDump: RawHttpRequestDump | undefined;
329
+ let activeThinkingIndex: number | undefined;
330
+ let activeTextIndex: number | undefined;
331
+ const activeToolIndices = new Set<number>();
332
+ try {
333
+ const apiKey = options.apiKey || getEnvApiKey(model.provider);
334
+ if (!apiKey) {
335
+ throw new Error(`No API key for provider: ${model.provider}`);
336
+ }
337
+ const baseUrl = normalizeBaseUrl(model.baseUrl);
338
+ let body = createChatBody(model, context, options);
339
+ const replacementPayload = await options.onPayload?.(body, model);
340
+ if (replacementPayload !== undefined) {
341
+ body = replacementPayload as typeof body;
342
+ }
343
+ rawRequestDump = {
344
+ provider: model.provider,
345
+ api: model.api,
346
+ model: model.id,
347
+ method: "POST",
348
+ url: `${baseUrl}/api/chat`,
349
+ body,
350
+ };
351
+ const response = await fetch(`${baseUrl}/api/chat`, {
352
+ method: "POST",
353
+ headers: {
354
+ ...model.headers,
355
+ ...options.headers,
356
+ Authorization: `Bearer ${apiKey}`,
357
+ "Content-Type": "application/json",
358
+ },
359
+ body: JSON.stringify(body),
360
+ signal: options.signal,
361
+ });
362
+ if (!response.ok) {
363
+ throw new Error(`HTTP ${response.status} from ${baseUrl}/api/chat`);
364
+ }
365
+ if (!response.body) {
366
+ throw new Error("Ollama returned an empty response body");
367
+ }
368
+ stream.push({ type: "start", partial: output });
369
+ for await (const chunk of iterateNdjson(response.body)) {
370
+ if (chunk.message?.thinking) {
371
+ if (activeTextIndex !== undefined) {
372
+ endTextBlock(stream, output, activeTextIndex);
373
+ activeTextIndex = undefined;
374
+ }
375
+ if (activeThinkingIndex === undefined) {
376
+ output.content.push({ type: "thinking", thinking: "" });
377
+ activeThinkingIndex = output.content.length - 1;
378
+ stream.push({ type: "thinking_start", contentIndex: activeThinkingIndex, partial: output });
379
+ }
380
+ const block = output.content[activeThinkingIndex];
381
+ if (block?.type === "thinking") {
382
+ block.thinking += chunk.message.thinking;
383
+ stream.push({
384
+ type: "thinking_delta",
385
+ contentIndex: activeThinkingIndex,
386
+ delta: chunk.message.thinking,
387
+ partial: output,
388
+ });
389
+ }
390
+ if (!firstTokenTime) {
391
+ firstTokenTime = Date.now();
392
+ }
393
+ }
394
+ if (chunk.message?.content) {
395
+ if (activeThinkingIndex !== undefined) {
396
+ endThinkingBlock(stream, output, activeThinkingIndex);
397
+ activeThinkingIndex = undefined;
398
+ }
399
+ if (activeTextIndex === undefined) {
400
+ output.content.push({ type: "text", text: "" });
401
+ activeTextIndex = output.content.length - 1;
402
+ stream.push({ type: "text_start", contentIndex: activeTextIndex, partial: output });
403
+ }
404
+ const block = output.content[activeTextIndex];
405
+ if (block?.type === "text") {
406
+ block.text += chunk.message.content;
407
+ stream.push({
408
+ type: "text_delta",
409
+ contentIndex: activeTextIndex,
410
+ delta: chunk.message.content,
411
+ partial: output,
412
+ });
413
+ }
414
+ if (!firstTokenTime) {
415
+ firstTokenTime = Date.now();
416
+ }
417
+ }
418
+ if (chunk.message?.tool_calls?.length) {
419
+ if (activeThinkingIndex !== undefined) {
420
+ endThinkingBlock(stream, output, activeThinkingIndex);
421
+ activeThinkingIndex = undefined;
422
+ }
423
+ if (activeTextIndex !== undefined) {
424
+ endTextBlock(stream, output, activeTextIndex);
425
+ activeTextIndex = undefined;
426
+ }
427
+ for (const call of chunk.message.tool_calls) {
428
+ const name = call.function?.name ?? "unknown_tool";
429
+ const rawArgs = call.function?.arguments;
430
+ const partialJson = typeof rawArgs === "string" ? rawArgs : JSON.stringify(rawArgs ?? {});
431
+ const toolCall: InternalToolCallBlock = {
432
+ type: "toolCall",
433
+ id: `ollama:${output.content.length}:${name}`,
434
+ name,
435
+ arguments: parseStreamingJson<Record<string, unknown>>(partialJson),
436
+ partialJson,
437
+ };
438
+ output.content.push(toolCall);
439
+ const index = output.content.length - 1;
440
+ activeToolIndices.add(index);
441
+ stream.push({ type: "toolcall_start", contentIndex: index, partial: output });
442
+ stream.push({
443
+ type: "toolcall_delta",
444
+ contentIndex: index,
445
+ delta: partialJson,
446
+ partial: output,
447
+ });
448
+ if (!firstTokenTime) {
449
+ firstTokenTime = Date.now();
450
+ }
451
+ }
452
+ }
453
+ if (chunk.done) {
454
+ if (activeThinkingIndex !== undefined) {
455
+ endThinkingBlock(stream, output, activeThinkingIndex);
456
+ activeThinkingIndex = undefined;
457
+ }
458
+ if (activeTextIndex !== undefined) {
459
+ endTextBlock(stream, output, activeTextIndex);
460
+ activeTextIndex = undefined;
461
+ }
462
+ for (const index of activeToolIndices) {
463
+ endToolCallBlock(stream, output, index);
464
+ }
465
+ activeToolIndices.clear();
466
+ output.stopReason = mapDoneReason(chunk.done_reason, output);
467
+ output.usage.input = chunk.prompt_eval_count ?? 0;
468
+ output.usage.output = chunk.eval_count ?? 0;
469
+ output.usage.totalTokens = output.usage.input + output.usage.output;
470
+ }
471
+ }
472
+ output.duration = Date.now() - startTime;
473
+ if (firstTokenTime) {
474
+ output.ttft = firstTokenTime - startTime;
475
+ }
476
+ const doneReason =
477
+ output.stopReason === "length" ? "length" : output.stopReason === "toolUse" ? "toolUse" : "stop";
478
+ stream.push({ type: "done", reason: doneReason, message: output });
479
+ stream.end();
480
+ } catch (error) {
481
+ for (const block of output.content) {
482
+ if (block.type === "toolCall") {
483
+ delete (block as InternalToolCallBlock).partialJson;
484
+ }
485
+ }
486
+ output.stopReason = options.signal?.aborted ? "aborted" : "error";
487
+ output.errorMessage = await finalizeErrorMessage(error, rawRequestDump);
488
+ output.duration = Date.now() - startTime;
489
+ if (firstTokenTime) {
490
+ output.ttft = firstTokenTime - startTime;
491
+ }
492
+ stream.push({ type: "error", reason: output.stopReason, error: output });
493
+ stream.end();
494
+ }
495
+ })();
496
+ return stream;
497
+ };
@@ -404,7 +404,7 @@ function buildParams(
404
404
  }
405
405
  }
406
406
 
407
- if (model.reasoning && model.provider !== "github-copilot") {
407
+ if (model.reasoning) {
408
408
  // Always request encrypted reasoning content so reasoning items can be
409
409
  // replayed in multi-turn conversations when store is false (items aren't
410
410
  // persisted server-side, so we must include the full content).
@@ -27,6 +27,7 @@ import type { CursorOptions } from "./cursor";
27
27
  import type { GoogleOptions } from "./google";
28
28
  import type { GoogleGeminiCliOptions } from "./google-gemini-cli";
29
29
  import type { GoogleVertexOptions } from "./google-vertex";
30
+ import type { OllamaChatOptions } from "./ollama";
30
31
  import type { OpenAICodexResponsesOptions } from "./openai-codex-responses";
31
32
  import type { OpenAICompletionsOptions } from "./openai-completions";
32
33
  import type { OpenAIResponsesOptions } from "./openai-responses";
@@ -103,6 +104,14 @@ interface OpenAIResponsesProviderModule {
103
104
  ) => AssistantMessageEventStream;
104
105
  }
105
106
 
107
+ interface OllamaProviderModule {
108
+ streamOllama: (
109
+ model: Model<"ollama-chat">,
110
+ context: Context,
111
+ options: OllamaChatOptions,
112
+ ) => AssistantMessageEventStream;
113
+ }
114
+
106
115
  interface CursorProviderModule {
107
116
  streamCursor: (
108
117
  model: Model<"cursor-agent">,
@@ -133,6 +142,7 @@ let googleVertexProviderModulePromise: Promise<LazyProviderModule<"google-vertex
133
142
  let openAICodexResponsesProviderModulePromise: Promise<LazyProviderModule<"openai-codex-responses">> | undefined;
134
143
  let openAICompletionsProviderModulePromise: Promise<LazyProviderModule<"openai-completions">> | undefined;
135
144
  let openAIResponsesProviderModulePromise: Promise<LazyProviderModule<"openai-responses">> | undefined;
145
+ let ollamaProviderModulePromise: Promise<LazyProviderModule<"ollama-chat">> | undefined;
136
146
  let cursorProviderModulePromise: Promise<LazyProviderModule<"cursor-agent">> | undefined;
137
147
  let bedrockProviderModuleOverride: LazyProviderModule<"bedrock-converse-stream"> | undefined;
138
148
  let bedrockProviderModulePromise: Promise<LazyProviderModule<"bedrock-converse-stream">> | undefined;
@@ -290,6 +300,14 @@ function loadOpenAIResponsesProviderModule(): Promise<LazyProviderModule<"openai
290
300
  return openAIResponsesProviderModulePromise;
291
301
  }
292
302
 
303
+ function loadOllamaProviderModule(): Promise<LazyProviderModule<"ollama-chat">> {
304
+ ollamaProviderModulePromise ||= import("./ollama").then(module => {
305
+ const provider = module as OllamaProviderModule;
306
+ return { stream: provider.streamOllama };
307
+ });
308
+ return ollamaProviderModulePromise;
309
+ }
310
+
293
311
  function loadCursorProviderModule(): Promise<LazyProviderModule<"cursor-agent">> {
294
312
  cursorProviderModulePromise ||= import("./cursor").then(module => {
295
313
  const provider = module as CursorProviderModule;
@@ -326,4 +344,6 @@ export const streamOpenAICodexResponses = createLazyStream(loadOpenAICodexRespon
326
344
  export const streamOpenAICompletions = createLazyStream(loadOpenAICompletionsProviderModule);
327
345
  export const streamOpenAIResponses = createLazyStream(loadOpenAIResponsesProviderModule);
328
346
  export const streamCursor = createLazyStream(loadCursorProviderModule);
347
+ export const streamOllama = createLazyStream(loadOllamaProviderModule);
348
+
329
349
  export const streamBedrock = createLazyStream(loadBedrockProviderModule);
package/src/stream.ts CHANGED
@@ -18,6 +18,7 @@ import { type GoogleOptions, streamGoogle } from "./providers/google";
18
18
  import { type GoogleGeminiCliOptions, streamGoogleGeminiCli } from "./providers/google-gemini-cli";
19
19
  import { type GoogleVertexOptions, streamGoogleVertex } from "./providers/google-vertex";
20
20
  import { isKimiModel, streamKimi } from "./providers/kimi";
21
+ import { type OllamaChatOptions, streamOllama } from "./providers/ollama";
21
22
  import { streamOpenAICodexResponses } from "./providers/openai-codex-responses";
22
23
  import { type OpenAICompletionsOptions, streamOpenAICompletions } from "./providers/openai-completions";
23
24
  import { streamOpenAIResponses } from "./providers/openai-responses";
@@ -131,6 +132,7 @@ const serviceProviderMap: Record<string, KeyResolver> = {
131
132
  nanogpt: "NANO_GPT_API_KEY",
132
133
  "lm-studio": "LM_STUDIO_API_KEY",
133
134
  ollama: "OLLAMA_API_KEY",
135
+ "ollama-cloud": "OLLAMA_CLOUD_API_KEY",
134
136
  "llama.cpp": "LLAMA_CPP_API_KEY",
135
137
  qianfan: "QIANFAN_API_KEY",
136
138
  "qwen-portal": () => $pickenv("QWEN_OAUTH_TOKEN", "QWEN_PORTAL_API_KEY"),
@@ -218,6 +220,9 @@ export function stream<TApi extends Api>(
218
220
  providerOptions as GoogleGeminiCliOptions,
219
221
  );
220
222
 
223
+ case "ollama-chat":
224
+ return streamOllama(model as Model<"ollama-chat">, context, providerOptions as OllamaChatOptions);
225
+
221
226
  case "cursor-agent":
222
227
  return streamCursor(model as Model<"cursor-agent">, context, providerOptions as CursorOptions);
223
228
 
@@ -677,6 +682,13 @@ function mapOptionsForApi<TApi extends Api>(
677
682
  });
678
683
  }
679
684
 
685
+ case "ollama-chat":
686
+ return castApi<"ollama-chat">({
687
+ ...base,
688
+ reasoning: resolveOpenAiReasoningEffort(model, options),
689
+ toolChoice: options?.toolChoice,
690
+ });
691
+
680
692
  case "cursor-agent": {
681
693
  const execHandlers = options?.cursorExecHandlers ?? options?.execHandlers;
682
694
  const onToolResult = options?.cursorOnToolResult ?? execHandlers?.onToolResult;
package/src/types.ts CHANGED
@@ -23,6 +23,7 @@ import type {
23
23
  import type { GoogleOptions } from "./providers/google";
24
24
  import type { GoogleGeminiCliOptions } from "./providers/google-gemini-cli";
25
25
  import type { GoogleVertexOptions } from "./providers/google-vertex";
26
+ import type { OllamaChatOptions } from "./providers/ollama";
26
27
  import type { OpenAICodexResponsesOptions } from "./providers/openai-codex-responses";
27
28
  import type { OpenAICompletionsOptions } from "./providers/openai-completions";
28
29
  import type { OpenAIResponsesOptions } from "./providers/openai-responses";
@@ -40,6 +41,7 @@ export type KnownApi =
40
41
  | "google-generative-ai"
41
42
  | "google-gemini-cli"
42
43
  | "google-vertex"
44
+ | "ollama-chat"
43
45
  | "cursor-agent";
44
46
  export type Api = KnownApi | (string & {});
45
47
  export interface ApiOptionsMap {
@@ -52,6 +54,7 @@ export interface ApiOptionsMap {
52
54
  "google-generative-ai": GoogleOptions;
53
55
  "google-gemini-cli": GoogleGeminiCliOptions;
54
56
  "google-vertex": GoogleVertexOptions;
57
+ "ollama-chat": OllamaChatOptions;
55
58
  "cursor-agent": CursorOptions;
56
59
  }
57
60
  // Compile-time exhaustiveness check - this will fail if ApiOptionsMap doesn't have all KnownApi keys
@@ -120,6 +123,7 @@ export type KnownProvider =
120
123
  | "nvidia"
121
124
  | "nanogpt"
122
125
  | "ollama"
126
+ | "ollama-cloud"
123
127
  | "qianfan"
124
128
  | "qwen-portal"
125
129
  | "together"
@@ -91,6 +91,7 @@ export { loginNanoGPT } from "./nanogpt";
91
91
  export { loginNvidia } from "./nvidia";
92
92
  // Ollama (optional API key)
93
93
  export { loginOllama } from "./ollama";
94
+ export { loginOllamaCloud } from "./ollama-cloud";
94
95
  export type { OpenAICodexLoginOptions } from "./openai-codex";
95
96
  // OpenAI Codex (ChatGPT OAuth)
96
97
  export { loginOpenAICodex, refreshOpenAICodexToken } from "./openai-codex";
@@ -200,6 +201,11 @@ const builtInOAuthProviders: OAuthProviderInfo[] = [
200
201
  name: "Ollama (Local OpenAI-compatible)",
201
202
  available: true,
202
203
  },
204
+ {
205
+ id: "ollama-cloud",
206
+ name: "Ollama Cloud",
207
+ available: true,
208
+ },
203
209
  {
204
210
  id: "huggingface",
205
211
  name: "Hugging Face Inference",
@@ -398,6 +404,7 @@ export async function refreshOAuthToken(
398
404
  case "litellm":
399
405
  case "lm-studio":
400
406
  case "ollama":
407
+ case "ollama-cloud":
401
408
  case "xiaomi":
402
409
  case "zai":
403
410
  case "qianfan":
@@ -0,0 +1,28 @@
1
+ import type { OAuthController } from "./types";
2
+
3
+ const OLLAMA_CLOUD_KEYS_URL = "https://ollama.com/settings/keys";
4
+
5
+ export async function loginOllamaCloud(options: OAuthController): Promise<string> {
6
+ if (options.signal?.aborted) {
7
+ throw new Error("Login cancelled");
8
+ }
9
+ if (!options.onPrompt) {
10
+ throw new Error("Interactive prompt is required for Ollama Cloud login");
11
+ }
12
+ options.onAuth?.({
13
+ url: OLLAMA_CLOUD_KEYS_URL,
14
+ instructions: "Create an Ollama Cloud API key, then paste it here.",
15
+ });
16
+ const apiKey = await options.onPrompt({
17
+ message: "Paste your Ollama Cloud API key",
18
+ placeholder: "ollama-cloud-api-key",
19
+ });
20
+ if (options.signal?.aborted) {
21
+ throw new Error("Login cancelled");
22
+ }
23
+ const trimmed = apiKey.trim();
24
+ if (!trimmed) {
25
+ throw new Error("Ollama Cloud API key is required");
26
+ }
27
+ return trimmed;
28
+ }
@@ -30,6 +30,7 @@ export type OAuthProvider =
30
30
  | "nvidia"
31
31
  | "nanogpt"
32
32
  | "ollama"
33
+ | "ollama-cloud"
33
34
  | "openai-codex"
34
35
  | "opencode-go"
35
36
  | "opencode-zen"