npm - @oh-my-pi/pi-ai - Versions diffs - 14.5.1 → 14.5.2 - Mend

@oh-my-pi/pi-ai 14.5.1 → 14.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/CHANGELOG.md +1 -0
package/README.md +31 -10
package/package.json +4 -4
package/src/api-registry.ts +1 -0
package/src/auth-storage.ts +6 -0
package/src/cli.ts +19 -0
package/src/index.ts +1 -0
package/src/models.json +101 -0
package/src/provider-models/bundled-references.ts +38 -0
package/src/provider-models/descriptors.ts +7 -0
package/src/provider-models/index.ts +1 -0
package/src/provider-models/ollama.ts +149 -0
package/src/provider-models/openai-compat.ts +2 -43
package/src/providers/anthropic.ts +1 -5
package/src/providers/ollama.ts +497 -0
package/src/providers/openai-responses.ts +1 -1
package/src/providers/register-builtins.ts +20 -0
package/src/stream.ts +12 -0
package/src/types.ts +4 -0
package/src/utils/oauth/index.ts +7 -0
package/src/utils/oauth/ollama-cloud.ts +28 -0
package/src/utils/oauth/types.ts +1 -0

package/CHANGELOG.md CHANGED Viewed

@@ -95,6 +95,7 @@
 - Fixed shell execution failure responses to preserve all result fields when sanitizing, preventing truncated metadata in stream results
 - Fixed context overflow detection to recognize `model_context_window_exceeded` from z.ai / GLM providers, preventing infinite retry loops when context window is exceeded ([#638](https://github.com/can1357/oh-my-pi/issues/638))
 - Fixed strict tool schema enforcement to preserve `additionalProperties: false` and required keys for reused nested object schemas, preventing invalid `todo_write` function schemas in Codex/OpenAI requests
+- Fixed GitHub Copilot reasoning regressions by preserving GPT-5.x / Claude 4.x reasoning controls instead of stripping them from requests ([#773](https://github.com/can1357/oh-my-pi/issues/773))
 ## [14.1.0] - 2026-04-11

package/README.md CHANGED Viewed

@@ -72,6 +72,7 @@ Unified LLM API with automatic model discovery, provider configuration, token an
 - **Qwen Portal** (supports `QWEN_OAUTH_TOKEN` or `QWEN_PORTAL_API_KEY`)
 - **Cloudflare AI Gateway** (requires `CLOUDFLARE_AI_GATEWAY_API_KEY` and provider-specific gateway base URL)
 - **Ollama** (local OpenAI-compatible runtime; optional `OLLAMA_API_KEY`)
+- **Ollama Cloud** (hosted native Ollama API; requires `OLLAMA_CLOUD_API_KEY`)
 - **llama.cpp** (local OpenAI and Anthropic compatible inference server)
 - **vLLM** (OpenAI-compatible server; `VLLM_API_KEY` for secured deployments)
 - **GitHub Copilot** (requires OAuth, see below)
@@ -690,13 +691,14 @@ console.log(`Using ${model.name} via ${model.api} API`);
 ### Custom Models
-You can create custom models for local inference servers or custom endpoints:
-For Ollama, `OLLAMA_API_KEY` is optional and mainly needed for authenticated/self-hosted gateways.
+You can create custom models for local inference servers or custom endpoints.
+For local Ollama, `OLLAMA_API_KEY` is optional and mainly needed for authenticated/self-hosted gateways. `ollama` remains the local OpenAI-compatible runtime integration.
 ```typescript
 import { Model, stream } from "@oh-my-pi/pi-ai";
-// Example: Ollama using OpenAI-compatible API
+// Example: local Ollama using the OpenAI-compatible API
 const ollamaModel: Model<"openai-completions"> = {
 	id: "llama-3.1-8b",
 	name: "Llama 3.1 8B (Ollama)",
@@ -710,6 +712,28 @@ const ollamaModel: Model<"openai-completions"> = {
 	maxTokens: 32000,
 };
+const localResponse = await stream(ollamaModel, context, {
+	apiKey: process.env.OLLAMA_API_KEY, // Optional; local Ollama usually runs without auth
+});
+// Example: Ollama Cloud using the native /api/chat transport
+const ollamaCloudModel: Model<"ollama-chat"> = {
+	id: "gpt-oss:120b",
+	name: "GPT OSS 120B (Ollama Cloud)",
+	api: "ollama-chat",
+	provider: "ollama-cloud",
+	baseUrl: "https://ollama.com",
+	reasoning: true,
+	input: ["text", "image"],
+	cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+	contextWindow: 262144,
+	maxTokens: 8192,
+};
+const cloudResponse = await stream(ollamaCloudModel, context, {
+	apiKey: process.env.OLLAMA_CLOUD_API_KEY,
+});
 // Example: LiteLLM proxy with explicit compat settings
 const litellmModel: Model<"openai-completions"> = {
 	id: "gpt-4o",
@@ -744,11 +768,6 @@ const proxyModel: Model<"anthropic-messages"> = {
 		"X-Custom-Auth": "bearer-token-here",
 	},
 };
-// Use the custom model
-const response = await stream(ollamaModel, context, {
-	apiKey: process.env.OLLAMA_API_KEY, // Optional; local Ollama usually runs without auth
-});
 ```
 ### OpenAI Compatibility Settings
@@ -928,6 +947,7 @@ In Node.js environments, you can set environment variables to avoid passing API
 | OpenRouter     | `OPENROUTER_API_KEY`                                                         |
 | LiteLLM        | `LITELLM_API_KEY`                                                            |
 | Ollama         | `OLLAMA_API_KEY` (optional for local deployments)                            |
+| Ollama Cloud   | `OLLAMA_CLOUD_API_KEY`                                                     |
 | Qwen Portal    | `QWEN_OAUTH_TOKEN` or `QWEN_PORTAL_API_KEY`                                  |
 | zAI            | `ZAI_API_KEY`                                                                |
 | MiniMax Code   | `MINIMAX_CODE_API_KEY` (international) or `MINIMAX_CODE_CN_API_KEY` (China) |
@@ -957,7 +977,8 @@ Provider endpoint defaults for the current OpenAI-compatible integrations:
 - ZenMux (OpenAI): `https://zenmux.ai/api/v1`
 - ZenMux (Anthropic models): `https://zenmux.ai/api/anthropic`
 - vLLM: `http://127.0.0.1:8000/v1`
-- Ollama: local OpenAI-compatible runtime
+- Ollama: local OpenAI-compatible runtime (`http://127.0.0.1:11434/v1`)
+- Ollama Cloud: native Ollama API host (`https://ollama.com/api`, configured here as base URL `https://ollama.com`)
 - LiteLLM: `http://localhost:4000/v1`
 - Cloudflare AI Gateway: `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic`
 - Qwen Portal: `https://portal.qwen.ai/v1`
@@ -1049,7 +1070,7 @@ Credentials are saved to `agent.db` in the agent directory. `/login qianfan` ope
 `login` supports OAuth providers (Anthropic, OpenAI Codex, GitHub Copilot, Gemini CLI, Antigravity) and API-key onboarding flows.
-For the current OpenAI-compatible integrations, API-key onboarding covers Together, Moonshot, Qianfan, NVIDIA, NanoGPT, Hugging Face, Venice, Xiaomi, vLLM, LiteLLM, Cloudflare AI Gateway, and Qwen Portal. Ollama is typically local and unauthenticated; set `OLLAMA_API_KEY` only when your Ollama deployment enforces bearer auth.
+For the current API-key onboarding flows, the library covers Together, Moonshot, Qianfan, NVIDIA, NanoGPT, Hugging Face, Venice, Xiaomi, vLLM, LiteLLM, Cloudflare AI Gateway, Qwen Portal, and Ollama Cloud. Ollama remains the local runtime integration; set `OLLAMA_API_KEY` only when your local or self-hosted deployment enforces bearer auth.
 ### Programmatic OAuth

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-ai",
-	"version": "14.5.1",
+	"version": "14.5.2",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"homepage": "https://github.com/can1357/oh-my-pi",
 	"author": "Can Boluk",
@@ -46,8 +46,8 @@
 		"@aws-sdk/credential-provider-node": "^3.972.36",
 		"@bufbuild/protobuf": "^2.12.0",
 		"@google/genai": "^1.50.1",
-		"@oh-my-pi/pi-natives": "14.5.1",
-		"@oh-my-pi/pi-utils": "14.5.1",
+		"@oh-my-pi/pi-natives": "14.5.2",
+		"@oh-my-pi/pi-utils": "14.5.2",
 		"@sinclair/typebox": "^0.34.49",
 		"@smithy/node-http-handler": "^4.6.1",
 		"ajv": "^8.20.0",
@@ -58,7 +58,7 @@
 		"zod": "4.3.6"
 	},
 	"devDependencies": {
-		"@types/bun": "^1.3.13"
+		"@types/bun": "^1.3"
 	},
 	"engines": {
 		"bun": ">=1.3.7"

package/src/api-registry.ts CHANGED Viewed

@@ -24,6 +24,7 @@ const BUILTIN_APIS = new Set<KnownApi>([
 	"google-generative-ai",
 	"google-gemini-cli",
 	"google-vertex",
+	"ollama-chat",
 	"cursor-agent",
 ]);

package/src/auth-storage.ts CHANGED Viewed

@@ -51,6 +51,7 @@ import { loginMoonshot } from "./utils/oauth/moonshot";
 import { loginNanoGPT } from "./utils/oauth/nanogpt";
 import { loginNvidia } from "./utils/oauth/nvidia";
 import { loginOllama } from "./utils/oauth/ollama";
+import { loginOllamaCloud } from "./utils/oauth/ollama-cloud";
 import { loginOpenAICodex } from "./utils/oauth/openai-codex";
 import { loginOpenCode } from "./utils/oauth/opencode";
 import { loginParallel } from "./utils/oauth/parallel";
@@ -838,6 +839,11 @@ export class AuthStorage {
 				await saveApiKeyCredential(apiKey);
 				return;
 			}
+			case "ollama-cloud": {
+				const apiKey = await loginOllamaCloud(ctrl);
+				await saveApiKeyCredential(apiKey);
+				return;
+			}
 			case "cerebras": {
 				const apiKey = await loginCerebras(ctrl);
 				await saveApiKeyCredential(apiKey);

package/src/cli.ts CHANGED Viewed

@@ -12,6 +12,7 @@ import { loginKilo } from "./utils/oauth/kilo";
 import { loginKimi } from "./utils/oauth/kimi";
 import { loginMiniMaxCode, loginMiniMaxCodeCn } from "./utils/oauth/minimax-code";
 import { loginNanoGPT } from "./utils/oauth/nanogpt";
+import { loginOllamaCloud } from "./utils/oauth/ollama-cloud";
 import { loginOpenAICodex } from "./utils/oauth/openai-codex";
 import { loginParallel } from "./utils/oauth/parallel";
 import { loginTavily } from "./utils/oauth/tavily";
@@ -271,6 +272,23 @@ async function login(provider: OAuthProvider): Promise<void> {
 				console.log(`\nAPI key saved to ~/.omp/agent/agent.db`);
 				return;
 			}
+			case "ollama-cloud": {
+				const apiKey = await loginOllamaCloud({
+					onAuth(info) {
+						const { url, instructions } = info;
+						console.log(`\nOpen this URL in your browser:\n${url}`);
+						if (instructions) console.log(instructions);
+						console.log();
+					},
+					onPrompt(p) {
+						return promptFn(`${p.message}${p.placeholder ? ` (${p.placeholder})` : ""}:`);
+					},
+				});
+				storage.saveApiKey(provider, apiKey);
+				console.log(`\nAPI key saved to ~/.omp/agent/agent.db`);
+				return;
+			}
 			case "minimax-code": {
 				const apiKey = await loginMiniMaxCode({
 					onAuth(info) {
@@ -347,6 +365,7 @@ Providers:
   minimax-code-cn   MiniMax Coding Plan (China)
   cursor            Cursor (Claude, GPT, etc.)
   zenmux            ZenMux
+  ollama-cloud      Ollama Cloud
 Examples:
   bunx @oh-my-pi/pi-ai login              # interactive provider selection

package/src/index.ts CHANGED Viewed

@@ -16,6 +16,7 @@ export * from "./providers/google";
 export * from "./providers/google-gemini-cli";
 export * from "./providers/google-vertex";
 export * from "./providers/kimi";
+export * from "./providers/ollama";
 export type { OpenAICodexResponsesOptions } from "./providers/openai-codex-responses";
 export * from "./providers/openai-completions";
 export * from "./providers/openai-responses";

package/src/models.json CHANGED Viewed

@@ -42445,6 +42445,107 @@
 			}
 		}
 	},
+	"ollama-cloud": {
+		"gemma4:31b": {
+			"id": "gemma4:31b",
+			"name": "Gemma 4",
+			"api": "ollama-chat",
+			"provider": "ollama-cloud",
+			"baseUrl": "https://ollama.com",
+			"reasoning": true,
+			"input": [
+				"text",
+				"image"
+			],
+			"cost": {
+				"input": 0,
+				"output": 0,
+				"cacheRead": 0,
+				"cacheWrite": 0
+			},
+			"contextWindow": 262144,
+			"maxTokens": 16384,
+			"thinking": {
+				"mode": "effort",
+				"minLevel": "minimal",
+				"maxLevel": "high"
+			}
+		},
+		"gpt-oss:120b": {
+			"id": "gpt-oss:120b",
+			"name": "GPT OSS (120B)",
+			"api": "ollama-chat",
+			"provider": "ollama-cloud",
+			"baseUrl": "https://ollama.com",
+			"reasoning": true,
+			"input": [
+				"text",
+				"image"
+			],
+			"cost": {
+				"input": 0,
+				"output": 0,
+				"cacheRead": 0,
+				"cacheWrite": 0
+			},
+			"contextWindow": 131072,
+			"maxTokens": 16384,
+			"thinking": {
+				"mode": "effort",
+				"minLevel": "minimal",
+				"maxLevel": "high"
+			}
+		},
+		"gpt-oss:20b": {
+			"id": "gpt-oss:20b",
+			"name": "GPT OSS (20B)",
+			"api": "ollama-chat",
+			"provider": "ollama-cloud",
+			"baseUrl": "https://ollama.com",
+			"reasoning": true,
+			"input": [
+				"text"
+			],
+			"cost": {
+				"input": 0,
+				"output": 0,
+				"cacheRead": 0,
+				"cacheWrite": 0
+			},
+			"contextWindow": 131072,
+			"maxTokens": 16384,
+			"thinking": {
+				"mode": "effort",
+				"minLevel": "minimal",
+				"maxLevel": "high"
+			}
+		},
+		"qwen3-next:80b": {
+			"id": "qwen3-next:80b",
+			"name": "Qwen 3 Next (80B)",
+			"api": "ollama-chat",
+			"provider": "ollama-cloud",
+			"baseUrl": "https://ollama.com",
+			"reasoning": true,
+			"input": [
+				"text"
+			],
+			"cost": {
+				"input": 0,
+				"output": 0,
+				"cacheRead": 0,
+				"cacheWrite": 0
+			},
+			"contextWindow": 262144,
+			"maxTokens": 16384,
+			"thinking": {
+				"mode": "effort",
+				"minLevel": "minimal",
+				"maxLevel": "high"
+			}
+		}
+	},
 	"qianfan": {
 		"deepseek-v3.2": {
 			"id": "deepseek-v3.2",

package/src/provider-models/bundled-references.ts ADDED Viewed

@@ -0,0 +1,38 @@
+import { getBundledModels, getBundledProviders } from "../models";
+import type { Api, Model } from "../types";
+export function createBundledReferenceMap<TApi extends Api>(
+	provider: Parameters<typeof getBundledModels>[0],
+): Map<string, Model<TApi>> {
+	const references = new Map<string, Model<TApi>>();
+	for (const model of getBundledModels(provider)) {
+		references.set(model.id, model as Model<TApi>);
+	}
+	return references;
+}
+export function createReferenceResolver<TApi extends Api>(
+	providerRefs: Map<string, Model<TApi>>,
+): (modelId: string) => Model<TApi> | undefined {
+	const globalRefs = new Map<string, Model<Api>>();
+	for (const provider of getBundledProviders()) {
+		for (const model of getBundledModels(provider as Parameters<typeof getBundledModels>[0])) {
+			const candidate = model as Model<Api>;
+			const existing = globalRefs.get(candidate.id);
+			if (!existing) {
+				globalRefs.set(candidate.id, candidate);
+			} else if (candidate.contextWindow !== existing.contextWindow) {
+				if (candidate.contextWindow > existing.contextWindow) {
+					globalRefs.set(candidate.id, candidate);
+				}
+			} else if (candidate.maxTokens !== existing.maxTokens) {
+				if (candidate.maxTokens > existing.maxTokens) {
+					globalRefs.set(candidate.id, candidate);
+				}
+			} else if (existing.provider !== "openai" && candidate.provider === "openai") {
+				globalRefs.set(candidate.id, candidate);
+			}
+		}
+	}
+	return (modelId: string) => providerRefs.get(modelId) ?? (globalRefs.get(modelId) as Model<TApi> | undefined);
+}

package/src/provider-models/descriptors.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import type { ModelManagerOptions } from "../model-manager";
 import type { Api, KnownProvider } from "../types";
 import type { OAuthProvider } from "../utils/oauth/types";
 import { googleModelManagerOptions } from "./google";
+import { ollamaCloudModelManagerOptions } from "./ollama";
 import {
 	alibabaCodingPlanModelManagerOptions,
 	anthropicModelManagerOptions,
@@ -184,6 +185,12 @@ export const PROVIDER_DESCRIPTORS: readonly ProviderDescriptor[] = [
 		catalog("Ollama", ["OLLAMA_API_KEY"]),
 		{ allowUnauthenticated: true },
 	),
+	catalogDescriptor(
+		"ollama-cloud",
+		"gpt-oss:120b",
+		config => ollamaCloudModelManagerOptions(config),
+		catalog("Ollama Cloud", ["OLLAMA_CLOUD_API_KEY"], { oauthProvider: "ollama-cloud" }),
+	),
 	catalogDescriptor(
 		"cloudflare-ai-gateway",
 		"claude-sonnet-4-5",

package/src/provider-models/index.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 export * from "./descriptors";
 export * from "./google";
+export * from "./ollama";
 export * from "./openai-compat";
 export * from "./special";

package/src/provider-models/ollama.ts ADDED Viewed

@@ -0,0 +1,149 @@
+import type { ModelManagerOptions } from "../model-manager";
+import { Effort } from "../model-thinking";
+import type { ThinkingConfig } from "../types";
+import { createBundledReferenceMap, createReferenceResolver } from "./bundled-references";
+export interface OllamaCloudModelManagerConfig {
+	apiKey?: string;
+	baseUrl?: string;
+}
+type OllamaTagEntry = {
+	name?: string;
+	model?: string;
+};
+type OllamaShowResponse = {
+	capabilities?: string[];
+	model_info?: Record<string, unknown>;
+};
+function trimTrailingSlash(value: string): string {
+	return value.endsWith("/") ? value.slice(0, -1) : value;
+}
+export function normalizeOllamaCloudBaseUrl(baseUrl?: string): string {
+	const value = baseUrl?.trim();
+	if (!value) {
+		return "https://ollama.com";
+	}
+	const trimmed = trimTrailingSlash(value);
+	return trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
+}
+function createCloudHeaders(apiKey: string): Record<string, string> {
+	return {
+		Accept: "application/json",
+		Authorization: `Bearer ${apiKey}`,
+	};
+}
+function getContextWindow(modelInfo: Record<string, unknown> | undefined): number | undefined {
+	if (!modelInfo) {
+		return undefined;
+	}
+	for (const [key, value] of Object.entries(modelInfo)) {
+		if (typeof value !== "number") {
+			continue;
+		}
+		if (key.endsWith(".context_length") || key.endsWith(".num_ctx") || key.endsWith(".context_window")) {
+			return value;
+		}
+	}
+}
+function getThinkingConfig(capabilities: string[] | undefined): ThinkingConfig | undefined {
+	if (!capabilities?.includes("thinking")) {
+		return undefined;
+	}
+	return {
+		mode: "effort",
+		minLevel: Effort.Minimal,
+		maxLevel: Effort.High,
+	};
+}
+async function fetchShowMetadata(
+	baseUrl: string,
+	apiKey: string,
+	model: string,
+): Promise<OllamaShowResponse | undefined> {
+	const response = await fetch(`${baseUrl}/api/show`, {
+		method: "POST",
+		headers: {
+			...createCloudHeaders(apiKey),
+			"Content-Type": "application/json",
+		},
+		body: JSON.stringify({ model }),
+	});
+	if (!response.ok) {
+		return undefined;
+	}
+	return (await response.json()) as OllamaShowResponse;
+}
+export function ollamaCloudModelManagerOptions(
+	config?: OllamaCloudModelManagerConfig,
+): ModelManagerOptions<"ollama-chat"> {
+	const apiKey = config?.apiKey;
+	const baseUrl = normalizeOllamaCloudBaseUrl(config?.baseUrl);
+	const resolveReference = createReferenceResolver(createBundledReferenceMap<"ollama-chat">("ollama-cloud"));
+	return {
+		providerId: "ollama-cloud",
+		fetchDynamicModels: async () => {
+			if (!apiKey) {
+				return [];
+			}
+			const response = await fetch(`${baseUrl}/api/tags`, {
+				method: "GET",
+				headers: createCloudHeaders(apiKey),
+			});
+			if (!response.ok) {
+				throw new Error(`HTTP ${response.status} from ${baseUrl}/api/tags`);
+			}
+			const payload = (await response.json()) as { models?: OllamaTagEntry[] };
+			const entries = payload.models ?? [];
+			const models = await Promise.all(
+				entries.map(async entry => {
+					const id = entry.model ?? entry.name;
+					if (!id) {
+						return undefined;
+					}
+					const reference = resolveReference(id);
+					let metadata: OllamaShowResponse | undefined;
+					try {
+						metadata = await fetchShowMetadata(baseUrl, apiKey, id);
+					} catch {
+						metadata = undefined;
+					}
+					const capabilities = metadata?.capabilities;
+					const contextWindow = getContextWindow(metadata?.model_info) ?? reference?.contextWindow ?? 128000;
+					const reasoning = capabilities ? capabilities.includes("thinking") : (reference?.reasoning ?? false);
+					const thinking = capabilities ? getThinkingConfig(capabilities) : reference?.thinking;
+					const input = capabilities
+						? capabilities.includes("vision")
+							? (["text", "image"] as Array<"text" | "image">)
+							: (["text"] as Array<"text">)
+						: ((reference?.input as Array<"text" | "image"> | undefined) ?? (["text"] as Array<"text">));
+					const resolvedName = entry.name && entry.name !== id ? entry.name : (reference?.name ?? id);
+					return {
+						id,
+						name: resolvedName,
+						api: "ollama-chat" as const,
+						provider: "ollama-cloud" as const,
+						baseUrl,
+						reasoning,
+						thinking,
+						input,
+						cost: reference?.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+						contextWindow,
+						maxTokens: reference?.maxTokens ?? Math.min(contextWindow, 8192),
+					};
+				}),
+			);
+			return models
+				.filter((model): model is NonNullable<(typeof models)[number]> => model !== undefined)
+				.sort((left, right) => left.id.localeCompare(right.id));
+		},
+	};
+}

package/src/provider-models/openai-compat.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { ModelManagerOptions } from "../model-manager";
-import { getBundledModels, getBundledProviders } from "../models";
+import { getBundledModels } from "../models";
 import type { Api, Model } from "../types";
 import { isAnthropicOAuthToken, isRecord, toNumber, toPositiveNumber } from "../utils";
 import {
@@ -8,6 +8,7 @@ import {
 	type OpenAICompatibleModelRecord,
 } from "../utils/discovery/openai-compatible";
 import { getGitHubCopilotBaseUrl, OPENCODE_HEADERS, parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
+import { createBundledReferenceMap, createReferenceResolver } from "./bundled-references";
 const MODELS_DEV_URL = "https://models.dev/api.json";
 const ANTHROPIC_BASE_URL = "https://api.anthropic.com/v1";
@@ -163,48 +164,6 @@ function mapWithBundledReference<TApi extends Api>(
 	};
 }
-function createBundledReferenceMap<TApi extends Api>(
-	provider: Parameters<typeof getBundledModels>[0],
-): Map<string, Model<TApi>> {
-	const references = new Map<string, Model<TApi>>();
-	for (const model of getBundledModels(provider)) {
-		references.set(model.id, model as Model<TApi>);
-	}
-	return references;
-}
-/**
- * Returns a lookup that resolves a model ID to a bundled reference, preferring
- * the provider-specific entry over a cross-provider fallback. The global fallback
- * picks the best entry across all providers (largest contextWindow, then maxTokens,
- * then canonical OpenAI), but proxy providers (Copilot, nanogpt, etc.) impose their
- * own limits that are typically lower than native provider limits, so the
- * provider-specific entry must win.
- */
-function createReferenceResolver<TApi extends Api>(
-	providerRefs: Map<string, Model<TApi>>,
-): (modelId: string) => Model<TApi> | undefined {
-	const globalRefs = new Map<string, Model<Api>>();
-	for (const provider of getBundledProviders()) {
-		for (const model of getBundledModels(provider as Parameters<typeof getBundledModels>[0])) {
-			const candidate = model as Model<Api>;
-			const existing = globalRefs.get(candidate.id);
-			if (!existing) {
-				globalRefs.set(candidate.id, candidate);
-			} else if (candidate.contextWindow !== existing.contextWindow) {
-				if (candidate.contextWindow > existing.contextWindow) globalRefs.set(candidate.id, candidate);
-			} else if (candidate.maxTokens !== existing.maxTokens) {
-				if (candidate.maxTokens > existing.maxTokens) globalRefs.set(candidate.id, candidate);
-			} else if (existing.provider !== "openai" && candidate.provider === "openai") {
-				// When limits tie, prefer OpenAI as canonical so generic OpenAI-family
-				// providers inherit OpenAI pricing/capabilities instead of proxy metadata.
-				globalRefs.set(candidate.id, candidate);
-			}
-		}
-	}
-	return (modelId: string) => providerRefs.get(modelId) ?? (globalRefs.get(modelId) as Model<TApi> | undefined);
-}
 function normalizeAnthropicBaseUrl(baseUrl: string | undefined, fallback: string): string {
 	const value = baseUrl?.trim();
 	if (!value) {

package/src/providers/anthropic.ts CHANGED Viewed

@@ -1519,7 +1519,7 @@ function buildParams(
 		);
 	}
-	if (options?.thinkingEnabled && model.reasoning && model.provider !== "github-copilot") {
+	if (options?.thinkingEnabled && model.reasoning) {
 		const mode = model.thinking?.mode;
 		const requestedEffort = options.reasoning;
 		const effort =
@@ -1583,10 +1583,6 @@ function buildParams(
 		params.system = systemBlocks;
 	}
 	disableThinkingIfToolChoiceForced(params);
-	if (model.provider === "github-copilot") {
-		delete params.thinking;
-		delete params.output_config;
-	}
 	ensureMaxTokensForThinking(params, model);
 	applyPromptCaching(params, cacheControl);
 	enforceCacheControlLimit(params, 4);

package/src/providers/ollama.ts ADDED Viewed

@@ -0,0 +1,497 @@
+import type { TSchema } from "@sinclair/typebox";
+import { getEnvApiKey } from "../stream";
+import type {
+	Api,
+	AssistantMessage,
+	Context,
+	DeveloperMessage,
+	Message,
+	Model,
+	StreamFunction,
+	StreamOptions,
+	Tool,
+	ToolChoice,
+	ToolResultMessage,
+	UserMessage,
+} from "../types";
+import { AssistantMessageEventStream } from "../utils/event-stream";
+import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
+import { parseStreamingJson } from "../utils/json-parse";
+import { transformMessages } from "./transform-messages";
+export interface OllamaChatOptions extends StreamOptions {
+	reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
+	toolChoice?: ToolChoice;
+}
+type OllamaFunctionTool = {
+	type: "function";
+	function: {
+		name: string;
+		description: string;
+		parameters: TSchema;
+	};
+};
+type OllamaMessage = {
+	role: "system" | "user" | "assistant" | "tool";
+	content: string;
+	images?: string[];
+	thinking?: string;
+	tool_calls?: Array<{
+		type: "function";
+		function: {
+			index?: number;
+			name: string;
+			arguments: Record<string, unknown>;
+		};
+	}>;
+	tool_name?: string;
+};
+type OllamaChatChunk = {
+	message?: {
+		role?: string;
+		content?: string;
+		thinking?: string;
+		tool_calls?: Array<{
+			type?: string;
+			function?: {
+				index?: number;
+				name?: string;
+				arguments?: Record<string, unknown> | string;
+			};
+		}>;
+	};
+	done?: boolean;
+	done_reason?: string;
+	prompt_eval_count?: number;
+	eval_count?: number;
+};
+type InternalToolCallBlock = AssistantMessage["content"][number] & {
+	type: "toolCall";
+	partialJson?: string;
+};
+function normalizeBaseUrl(baseUrl?: string): string {
+	const value = baseUrl?.trim();
+	if (!value) {
+		return "https://ollama.com";
+	}
+	const trimmed = value.endsWith("/") ? value.slice(0, -1) : value;
+	return trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
+}
+function mapReasoning(reasoning: OllamaChatOptions["reasoning"]): boolean | "low" | "medium" | "high" | undefined {
+	switch (reasoning) {
+		case "minimal":
+		case "low":
+			return "low";
+		case "medium":
+			return "medium";
+		case "high":
+		case "xhigh":
+			return "high";
+		default:
+			return undefined;
+	}
+}
+function mapToolChoice(toolChoice: ToolChoice | undefined): "auto" | "none" | "required" | undefined {
+	if (!toolChoice || toolChoice === "auto") {
+		return undefined;
+	}
+	if (toolChoice === "none") {
+		return "none";
+	}
+	if (toolChoice === "required" || toolChoice === "any") {
+		return "required";
+	}
+	if (typeof toolChoice === "object") {
+		return "required";
+	}
+	return undefined;
+}
+function toPlainContent(content: string | Array<{ type: "text" | "image"; text?: string; data?: string }>): {
+	content: string;
+	images?: string[];
+} {
+	if (typeof content === "string") {
+		return { content };
+	}
+	const textParts: string[] = [];
+	const images: string[] = [];
+	for (const block of content) {
+		if (block.type === "text" && typeof block.text === "string") {
+			textParts.push(block.text);
+		}
+		if (block.type === "image" && typeof block.data === "string") {
+			images.push(block.data);
+		}
+	}
+	return {
+		content: textParts.join("\n"),
+		...(images.length > 0 ? { images } : {}),
+	};
+}
+function convertMessage(message: Message): OllamaMessage {
+	if (message.role === "user") {
+		const converted = toPlainContent(message.content as UserMessage["content"]);
+		return { role: "user", ...converted };
+	}
+	if (message.role === "developer") {
+		const converted = toPlainContent(message.content as DeveloperMessage["content"]);
+		return { role: "system", ...converted };
+	}
+	if (message.role === "toolResult") {
+		const converted = toPlainContent(message.content as ToolResultMessage["content"]);
+		return {
+			role: "tool",
+			tool_name: message.toolName,
+			...converted,
+		};
+	}
+	const text: string[] = [];
+	const thinking: string[] = [];
+	const toolCalls: NonNullable<OllamaMessage["tool_calls"]> = [];
+	for (const block of message.content) {
+		if (block.type === "text") {
+			text.push(block.text);
+			continue;
+		}
+		if (block.type === "thinking") {
+			thinking.push(block.thinking);
+			continue;
+		}
+		if (block.type === "toolCall") {
+			toolCalls.push({
+				type: "function",
+				function: {
+					name: block.name,
+					arguments: block.arguments,
+				},
+			});
+		}
+	}
+	return {
+		role: "assistant",
+		content: text.join("\n"),
+		...(thinking.length > 0 ? { thinking: thinking.join("\n") } : {}),
+		...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}),
+	};
+}
+function convertMessages(model: Model<"ollama-chat">, context: Context): OllamaMessage[] {
+	const messages: Message[] = [];
+	if (context.systemPrompt) {
+		messages.push({
+			role: "developer",
+			content: context.systemPrompt,
+			timestamp: Date.now(),
+		});
+	}
+	messages.push(...context.messages);
+	return transformMessages(messages, model).map(convertMessage);
+}
+function convertTools(tools: Tool[] | undefined): OllamaFunctionTool[] | undefined {
+	if (!tools || tools.length === 0) {
+		return undefined;
+	}
+	return tools.map(tool => ({
+		type: "function",
+		function: {
+			name: tool.name,
+			description: tool.description,
+			parameters: tool.parameters,
+		},
+	}));
+}
+function createChatBody(model: Model<"ollama-chat">, context: Context, options: OllamaChatOptions | undefined) {
+	const think = mapReasoning(options?.reasoning);
+	const toolChoice = mapToolChoice(options?.toolChoice);
+	return {
+		model: model.id,
+		messages: convertMessages(model, context),
+		...(convertTools(context.tools) ? { tools: convertTools(context.tools) } : {}),
+		...(think !== undefined ? { think } : {}),
+		...(toolChoice !== undefined ? { tool_choice: toolChoice } : {}),
+		...(options?.maxTokens !== undefined ? { options: { num_predict: options.maxTokens } } : {}),
+		stream: true,
+	};
+}
+async function* iterateNdjson(stream: ReadableStream<Uint8Array>): AsyncGenerator<OllamaChatChunk> {
+	const reader = stream.getReader();
+	const decoder = new TextDecoder();
+	let buffer = "";
+	while (true) {
+		const { done, value } = await reader.read();
+		if (done) {
+			break;
+		}
+		buffer += decoder.decode(value, { stream: true });
+		while (true) {
+			const newlineIndex = buffer.indexOf("\n");
+			if (newlineIndex < 0) {
+				break;
+			}
+			const line = buffer.slice(0, newlineIndex).trim();
+			buffer = buffer.slice(newlineIndex + 1);
+			if (!line) {
+				continue;
+			}
+			yield JSON.parse(line) as OllamaChatChunk;
+		}
+	}
+	buffer += decoder.decode();
+	const tail = buffer.trim();
+	if (tail) {
+		yield JSON.parse(tail) as OllamaChatChunk;
+	}
+}
+function createEmptyOutput(model: Model<"ollama-chat">): AssistantMessage {
+	return {
+		role: "assistant",
+		content: [],
+		api: "ollama-chat" as Api,
+		provider: model.provider,
+		model: model.id,
+		usage: {
+			input: 0,
+			output: 0,
+			cacheRead: 0,
+			cacheWrite: 0,
+			totalTokens: 0,
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+		},
+		stopReason: "stop",
+		timestamp: Date.now(),
+	};
+}
+function endThinkingBlock(stream: AssistantMessageEventStream, output: AssistantMessage, index: number): void {
+	const block = output.content[index];
+	if (block?.type === "thinking") {
+		stream.push({ type: "thinking_end", contentIndex: index, content: block.thinking, partial: output });
+	}
+}
+function endTextBlock(stream: AssistantMessageEventStream, output: AssistantMessage, index: number): void {
+	const block = output.content[index];
+	if (block?.type === "text") {
+		stream.push({ type: "text_end", contentIndex: index, content: block.text, partial: output });
+	}
+}
+function endToolCallBlock(stream: AssistantMessageEventStream, output: AssistantMessage, index: number): void {
+	const block = output.content[index];
+	if (block?.type !== "toolCall") {
+		return;
+	}
+	const toolCall = block as InternalToolCallBlock;
+	if (toolCall.partialJson) {
+		toolCall.arguments = parseStreamingJson<Record<string, unknown>>(toolCall.partialJson);
+		delete toolCall.partialJson;
+	}
+	stream.push({ type: "toolcall_end", contentIndex: index, toolCall, partial: output });
+}
+function mapDoneReason(doneReason: string | undefined, output: AssistantMessage): AssistantMessage["stopReason"] {
+	if (doneReason === "length") {
+		return "length";
+	}
+	if (doneReason === "tool_calls") {
+		return "toolUse";
+	}
+	if (doneReason === undefined && output.content.some(block => block.type === "toolCall")) {
+		return "toolUse";
+	}
+	return "stop";
+}
+export const streamOllama: StreamFunction<"ollama-chat"> = (
+	model: Model<"ollama-chat">,
+	context: Context,
+	options: OllamaChatOptions,
+): AssistantMessageEventStream => {
+	const stream = new AssistantMessageEventStream();
+	void (async () => {
+		const startTime = Date.now();
+		let firstTokenTime: number | undefined;
+		const output = createEmptyOutput(model);
+		let rawRequestDump: RawHttpRequestDump | undefined;
+		let activeThinkingIndex: number | undefined;
+		let activeTextIndex: number | undefined;
+		const activeToolIndices = new Set<number>();
+		try {
+			const apiKey = options.apiKey || getEnvApiKey(model.provider);
+			if (!apiKey) {
+				throw new Error(`No API key for provider: ${model.provider}`);
+			}
+			const baseUrl = normalizeBaseUrl(model.baseUrl);
+			let body = createChatBody(model, context, options);
+			const replacementPayload = await options.onPayload?.(body, model);
+			if (replacementPayload !== undefined) {
+				body = replacementPayload as typeof body;
+			}
+			rawRequestDump = {
+				provider: model.provider,
+				api: model.api,
+				model: model.id,
+				method: "POST",
+				url: `${baseUrl}/api/chat`,
+				body,
+			};
+			const response = await fetch(`${baseUrl}/api/chat`, {
+				method: "POST",
+				headers: {
+					...model.headers,
+					...options.headers,
+					Authorization: `Bearer ${apiKey}`,
+					"Content-Type": "application/json",
+				},
+				body: JSON.stringify(body),
+				signal: options.signal,
+			});
+			if (!response.ok) {
+				throw new Error(`HTTP ${response.status} from ${baseUrl}/api/chat`);
+			}
+			if (!response.body) {
+				throw new Error("Ollama returned an empty response body");
+			}
+			stream.push({ type: "start", partial: output });
+			for await (const chunk of iterateNdjson(response.body)) {
+				if (chunk.message?.thinking) {
+					if (activeTextIndex !== undefined) {
+						endTextBlock(stream, output, activeTextIndex);
+						activeTextIndex = undefined;
+					}
+					if (activeThinkingIndex === undefined) {
+						output.content.push({ type: "thinking", thinking: "" });
+						activeThinkingIndex = output.content.length - 1;
+						stream.push({ type: "thinking_start", contentIndex: activeThinkingIndex, partial: output });
+					}
+					const block = output.content[activeThinkingIndex];
+					if (block?.type === "thinking") {
+						block.thinking += chunk.message.thinking;
+						stream.push({
+							type: "thinking_delta",
+							contentIndex: activeThinkingIndex,
+							delta: chunk.message.thinking,
+							partial: output,
+						});
+					}
+					if (!firstTokenTime) {
+						firstTokenTime = Date.now();
+					}
+				}
+				if (chunk.message?.content) {
+					if (activeThinkingIndex !== undefined) {
+						endThinkingBlock(stream, output, activeThinkingIndex);
+						activeThinkingIndex = undefined;
+					}
+					if (activeTextIndex === undefined) {
+						output.content.push({ type: "text", text: "" });
+						activeTextIndex = output.content.length - 1;
+						stream.push({ type: "text_start", contentIndex: activeTextIndex, partial: output });
+					}
+					const block = output.content[activeTextIndex];
+					if (block?.type === "text") {
+						block.text += chunk.message.content;
+						stream.push({
+							type: "text_delta",
+							contentIndex: activeTextIndex,
+							delta: chunk.message.content,
+							partial: output,
+						});
+					}
+					if (!firstTokenTime) {
+						firstTokenTime = Date.now();
+					}
+				}
+				if (chunk.message?.tool_calls?.length) {
+					if (activeThinkingIndex !== undefined) {
+						endThinkingBlock(stream, output, activeThinkingIndex);
+						activeThinkingIndex = undefined;
+					}
+					if (activeTextIndex !== undefined) {
+						endTextBlock(stream, output, activeTextIndex);
+						activeTextIndex = undefined;
+					}
+					for (const call of chunk.message.tool_calls) {
+						const name = call.function?.name ?? "unknown_tool";
+						const rawArgs = call.function?.arguments;
+						const partialJson = typeof rawArgs === "string" ? rawArgs : JSON.stringify(rawArgs ?? {});
+						const toolCall: InternalToolCallBlock = {
+							type: "toolCall",
+							id: `ollama:${output.content.length}:${name}`,
+							name,
+							arguments: parseStreamingJson<Record<string, unknown>>(partialJson),
+							partialJson,
+						};
+						output.content.push(toolCall);
+						const index = output.content.length - 1;
+						activeToolIndices.add(index);
+						stream.push({ type: "toolcall_start", contentIndex: index, partial: output });
+						stream.push({
+							type: "toolcall_delta",
+							contentIndex: index,
+							delta: partialJson,
+							partial: output,
+						});
+						if (!firstTokenTime) {
+							firstTokenTime = Date.now();
+						}
+					}
+				}
+				if (chunk.done) {
+					if (activeThinkingIndex !== undefined) {
+						endThinkingBlock(stream, output, activeThinkingIndex);
+						activeThinkingIndex = undefined;
+					}
+					if (activeTextIndex !== undefined) {
+						endTextBlock(stream, output, activeTextIndex);
+						activeTextIndex = undefined;
+					}
+					for (const index of activeToolIndices) {
+						endToolCallBlock(stream, output, index);
+					}
+					activeToolIndices.clear();
+					output.stopReason = mapDoneReason(chunk.done_reason, output);
+					output.usage.input = chunk.prompt_eval_count ?? 0;
+					output.usage.output = chunk.eval_count ?? 0;
+					output.usage.totalTokens = output.usage.input + output.usage.output;
+				}
+			}
+			output.duration = Date.now() - startTime;
+			if (firstTokenTime) {
+				output.ttft = firstTokenTime - startTime;
+			}
+			const doneReason =
+				output.stopReason === "length" ? "length" : output.stopReason === "toolUse" ? "toolUse" : "stop";
+			stream.push({ type: "done", reason: doneReason, message: output });
+			stream.end();
+		} catch (error) {
+			for (const block of output.content) {
+				if (block.type === "toolCall") {
+					delete (block as InternalToolCallBlock).partialJson;
+				}
+			}
+			output.stopReason = options.signal?.aborted ? "aborted" : "error";
+			output.errorMessage = await finalizeErrorMessage(error, rawRequestDump);
+			output.duration = Date.now() - startTime;
+			if (firstTokenTime) {
+				output.ttft = firstTokenTime - startTime;
+			}
+			stream.push({ type: "error", reason: output.stopReason, error: output });
+			stream.end();
+		}
+	})();
+	return stream;
+};

package/src/providers/openai-responses.ts CHANGED Viewed

@@ -404,7 +404,7 @@ function buildParams(
 		}
 	}
-	if (model.reasoning && model.provider !== "github-copilot") {
+	if (model.reasoning) {
 		// Always request encrypted reasoning content so reasoning items can be
 		// replayed in multi-turn conversations when store is false (items aren't
 		// persisted server-side, so we must include the full content).

package/src/providers/register-builtins.ts CHANGED Viewed

@@ -27,6 +27,7 @@ import type { CursorOptions } from "./cursor";
 import type { GoogleOptions } from "./google";
 import type { GoogleGeminiCliOptions } from "./google-gemini-cli";
 import type { GoogleVertexOptions } from "./google-vertex";
+import type { OllamaChatOptions } from "./ollama";
 import type { OpenAICodexResponsesOptions } from "./openai-codex-responses";
 import type { OpenAICompletionsOptions } from "./openai-completions";
 import type { OpenAIResponsesOptions } from "./openai-responses";
@@ -103,6 +104,14 @@ interface OpenAIResponsesProviderModule {
 	) => AssistantMessageEventStream;
 }
+interface OllamaProviderModule {
+	streamOllama: (
+		model: Model<"ollama-chat">,
+		context: Context,
+		options: OllamaChatOptions,
+	) => AssistantMessageEventStream;
+}
 interface CursorProviderModule {
 	streamCursor: (
 		model: Model<"cursor-agent">,
@@ -133,6 +142,7 @@ let googleVertexProviderModulePromise: Promise<LazyProviderModule<"google-vertex
 let openAICodexResponsesProviderModulePromise: Promise<LazyProviderModule<"openai-codex-responses">> | undefined;
 let openAICompletionsProviderModulePromise: Promise<LazyProviderModule<"openai-completions">> | undefined;
 let openAIResponsesProviderModulePromise: Promise<LazyProviderModule<"openai-responses">> | undefined;
+let ollamaProviderModulePromise: Promise<LazyProviderModule<"ollama-chat">> | undefined;
 let cursorProviderModulePromise: Promise<LazyProviderModule<"cursor-agent">> | undefined;
 let bedrockProviderModuleOverride: LazyProviderModule<"bedrock-converse-stream"> | undefined;
 let bedrockProviderModulePromise: Promise<LazyProviderModule<"bedrock-converse-stream">> | undefined;
@@ -290,6 +300,14 @@ function loadOpenAIResponsesProviderModule(): Promise<LazyProviderModule<"openai
 	return openAIResponsesProviderModulePromise;
 }
+function loadOllamaProviderModule(): Promise<LazyProviderModule<"ollama-chat">> {
+	ollamaProviderModulePromise ||= import("./ollama").then(module => {
+		const provider = module as OllamaProviderModule;
+		return { stream: provider.streamOllama };
+	});
+	return ollamaProviderModulePromise;
+}
 function loadCursorProviderModule(): Promise<LazyProviderModule<"cursor-agent">> {
 	cursorProviderModulePromise ||= import("./cursor").then(module => {
 		const provider = module as CursorProviderModule;
@@ -326,4 +344,6 @@ export const streamOpenAICodexResponses = createLazyStream(loadOpenAICodexRespon
 export const streamOpenAICompletions = createLazyStream(loadOpenAICompletionsProviderModule);
 export const streamOpenAIResponses = createLazyStream(loadOpenAIResponsesProviderModule);
 export const streamCursor = createLazyStream(loadCursorProviderModule);
+export const streamOllama = createLazyStream(loadOllamaProviderModule);
 export const streamBedrock = createLazyStream(loadBedrockProviderModule);

package/src/stream.ts CHANGED Viewed

@@ -18,6 +18,7 @@ import { type GoogleOptions, streamGoogle } from "./providers/google";
 import { type GoogleGeminiCliOptions, streamGoogleGeminiCli } from "./providers/google-gemini-cli";
 import { type GoogleVertexOptions, streamGoogleVertex } from "./providers/google-vertex";
 import { isKimiModel, streamKimi } from "./providers/kimi";
+import { type OllamaChatOptions, streamOllama } from "./providers/ollama";
 import { streamOpenAICodexResponses } from "./providers/openai-codex-responses";
 import { type OpenAICompletionsOptions, streamOpenAICompletions } from "./providers/openai-completions";
 import { streamOpenAIResponses } from "./providers/openai-responses";
@@ -131,6 +132,7 @@ const serviceProviderMap: Record<string, KeyResolver> = {
 	nanogpt: "NANO_GPT_API_KEY",
 	"lm-studio": "LM_STUDIO_API_KEY",
 	ollama: "OLLAMA_API_KEY",
+	"ollama-cloud": "OLLAMA_CLOUD_API_KEY",
 	"llama.cpp": "LLAMA_CPP_API_KEY",
 	qianfan: "QIANFAN_API_KEY",
 	"qwen-portal": () => $pickenv("QWEN_OAUTH_TOKEN", "QWEN_PORTAL_API_KEY"),
@@ -218,6 +220,9 @@ export function stream<TApi extends Api>(
 				providerOptions as GoogleGeminiCliOptions,
 			);
+		case "ollama-chat":
+			return streamOllama(model as Model<"ollama-chat">, context, providerOptions as OllamaChatOptions);
 		case "cursor-agent":
 			return streamCursor(model as Model<"cursor-agent">, context, providerOptions as CursorOptions);
@@ -677,6 +682,13 @@ function mapOptionsForApi<TApi extends Api>(
 			});
 		}
+		case "ollama-chat":
+			return castApi<"ollama-chat">({
+				...base,
+				reasoning: resolveOpenAiReasoningEffort(model, options),
+				toolChoice: options?.toolChoice,
+			});
 		case "cursor-agent": {
 			const execHandlers = options?.cursorExecHandlers ?? options?.execHandlers;
 			const onToolResult = options?.cursorOnToolResult ?? execHandlers?.onToolResult;

package/src/types.ts CHANGED Viewed

@@ -23,6 +23,7 @@ import type {
 import type { GoogleOptions } from "./providers/google";
 import type { GoogleGeminiCliOptions } from "./providers/google-gemini-cli";
 import type { GoogleVertexOptions } from "./providers/google-vertex";
+import type { OllamaChatOptions } from "./providers/ollama";
 import type { OpenAICodexResponsesOptions } from "./providers/openai-codex-responses";
 import type { OpenAICompletionsOptions } from "./providers/openai-completions";
 import type { OpenAIResponsesOptions } from "./providers/openai-responses";
@@ -40,6 +41,7 @@ export type KnownApi =
 	| "google-generative-ai"
 	| "google-gemini-cli"
 	| "google-vertex"
+	| "ollama-chat"
 	| "cursor-agent";
 export type Api = KnownApi | (string & {});
 export interface ApiOptionsMap {
@@ -52,6 +54,7 @@ export interface ApiOptionsMap {
 	"google-generative-ai": GoogleOptions;
 	"google-gemini-cli": GoogleGeminiCliOptions;
 	"google-vertex": GoogleVertexOptions;
+	"ollama-chat": OllamaChatOptions;
 	"cursor-agent": CursorOptions;
 }
 // Compile-time exhaustiveness check - this will fail if ApiOptionsMap doesn't have all KnownApi keys
@@ -120,6 +123,7 @@ export type KnownProvider =
 	| "nvidia"
 	| "nanogpt"
 	| "ollama"
+	| "ollama-cloud"
 	| "qianfan"
 	| "qwen-portal"
 	| "together"

package/src/utils/oauth/index.ts CHANGED Viewed

@@ -91,6 +91,7 @@ export { loginNanoGPT } from "./nanogpt";
 export { loginNvidia } from "./nvidia";
 // Ollama (optional API key)
 export { loginOllama } from "./ollama";
+export { loginOllamaCloud } from "./ollama-cloud";
 export type { OpenAICodexLoginOptions } from "./openai-codex";
 // OpenAI Codex (ChatGPT OAuth)
 export { loginOpenAICodex, refreshOpenAICodexToken } from "./openai-codex";
@@ -200,6 +201,11 @@ const builtInOAuthProviders: OAuthProviderInfo[] = [
 		name: "Ollama (Local OpenAI-compatible)",
 		available: true,
 	},
+	{
+		id: "ollama-cloud",
+		name: "Ollama Cloud",
+		available: true,
+	},
 	{
 		id: "huggingface",
 		name: "Hugging Face Inference",
@@ -398,6 +404,7 @@ export async function refreshOAuthToken(
 		case "litellm":
 		case "lm-studio":
 		case "ollama":
+		case "ollama-cloud":
 		case "xiaomi":
 		case "zai":
 		case "qianfan":

package/src/utils/oauth/ollama-cloud.ts ADDED Viewed

@@ -0,0 +1,28 @@
+import type { OAuthController } from "./types";
+const OLLAMA_CLOUD_KEYS_URL = "https://ollama.com/settings/keys";
+export async function loginOllamaCloud(options: OAuthController): Promise<string> {
+	if (options.signal?.aborted) {
+		throw new Error("Login cancelled");
+	}
+	if (!options.onPrompt) {
+		throw new Error("Interactive prompt is required for Ollama Cloud login");
+	}
+	options.onAuth?.({
+		url: OLLAMA_CLOUD_KEYS_URL,
+		instructions: "Create an Ollama Cloud API key, then paste it here.",
+	});
+	const apiKey = await options.onPrompt({
+		message: "Paste your Ollama Cloud API key",
+		placeholder: "ollama-cloud-api-key",
+	});
+	if (options.signal?.aborted) {
+		throw new Error("Login cancelled");
+	}
+	const trimmed = apiKey.trim();
+	if (!trimmed) {
+		throw new Error("Ollama Cloud API key is required");
+	}
+	return trimmed;
+}

package/src/utils/oauth/types.ts CHANGED Viewed

@@ -30,6 +30,7 @@ export type OAuthProvider =
 	| "nvidia"
 	| "nanogpt"
 	| "ollama"
+	| "ollama-cloud"
 	| "openai-codex"
 	| "opencode-go"
 	| "opencode-zen"