@oh-my-pi/pi-ai 15.2.3 → 15.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,23 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.3.0] - 2026-05-25
6
+
7
+ ### Added
8
+
9
+ - Added DeepSeek to the built-in API-key login provider catalog so `omp login deepseek` stores a reusable `DEEPSEEK_API_KEY` credential for the bundled DeepSeek models.
10
+
11
+ ## [15.2.4] - 2026-05-22
12
+
13
+ ### Fixed
14
+
15
+ - Fixed ChatGPT Plus/Pro (Codex) OAuth login returning `Token exchange failed: 403` on Windows. When port 1455 was in use, the callback server silently fell back to a random port; OpenAI's authorization endpoint accepts any localhost redirect URI (loose validation), so the browser callback succeeds and shows "Authentication Successful", but the token endpoint rejects the non-registered port with 403. The `OpenAICodexOAuthFlow` now enforces a fixed `redirectUri` option so a busy port immediately surfaces as "port unavailable" instead of producing a confusing 403 ([#1277](https://github.com/can1357/oh-my-pi/issues/1277)).
16
+ - Improved `exchangeCodeForToken` error diagnostics: the 403 response body (`error` / `error_description` fields) is now included in the thrown message, matching the existing `refreshOpenAICodexToken` behaviour.
17
+
18
+ ### Added
19
+
20
+ - Added `ChatGPT Plus/Pro (Codex, headless/device)` (`openai-codex-device`) as an alternative login method for the Codex provider. Uses OpenAI's device-code flow (`/api/accounts/deviceauth/usercode` → poll `/api/accounts/deviceauth/token`), which avoids a local callback server and port 1455 entirely. Credentials are stored under the existing `openai-codex` provider key so all models and tooling continue to work without reconfiguration ([#1277](https://github.com/can1357/oh-my-pi/issues/1277)).
21
+
5
22
  ## [15.2.2] - 2026-05-22
6
23
 
7
24
  ### Fixed
@@ -57,6 +74,7 @@
57
74
 
58
75
  ### Fixed
59
76
 
77
+ - Fixed OpenCode-Go and OpenCode-Zen chat-completions replay to omit stored reasoning fields on Kimi assistant tool-call messages, avoiding provider 400s for rejected `messages[].reasoning` payloads. ([#1157](https://github.com/can1357/oh-my-pi/issues/1157))
60
78
  - Fixed OpenAI Responses and Codex tool schema normalization to emit `properties: {}` for no-argument object schemas without rewriting literal payloads. ([#1147](https://github.com/can1357/oh-my-pi/issues/1147))
61
79
  - Fixed Anthropic 400 (`unexpected tool_use_id found in tool_result blocks ... Each tool_result block must have a corresponding tool_use block in the previous message`) when handoff/compaction folds an assistant `tool_use` into the handoff summary string but leaves the matching user-side `tool_result` message in the history. `transformMessages` now indexes every `tool_use` id surviving the first pass and drops orphan `tool_result` messages whose originator was compacted away, preserving the text payload as a user-level `<stale-tool-result>` note so the model still sees what the tool returned. The note is emitted with `role: "user"` rather than `role: "developer"` so providers that elevate developer-role messages (Ollama: `developer` → `system`; OpenAI chat-completions reasoning models: `developer` → `developer`) cannot lift stale tool output to an instruction-priority tier above the surrounding user/developer messages.
62
80
  - Fixed streaming authentication retry to trigger when a provider emits a 401 `error` event after a `start` event but before any replay-unsafe content is emitted
@@ -165,7 +165,7 @@ export interface XiaomiModelManagerConfig {
165
165
  apiKey?: string;
166
166
  baseUrl?: string;
167
167
  }
168
- export declare function xiaomiModelManagerOptions(config?: XiaomiModelManagerConfig): ModelManagerOptions<"anthropic-messages">;
168
+ export declare function xiaomiModelManagerOptions(config?: XiaomiModelManagerConfig): ModelManagerOptions<"openai-completions">;
169
169
  export interface LiteLLMModelManagerConfig {
170
170
  apiKey?: string;
171
171
  baseUrl?: string;
@@ -18,7 +18,7 @@ export declare function isOpenAICompletionsProgressChunk(chunk: unknown): boolea
18
18
  export interface OpenAICompletionsOptions extends StreamOptions {
19
19
  toolChoice?: ToolChoice;
20
20
  reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
21
- /** Force-disable reasoning for OpenRouter-format requests (sends `reasoning: { enabled: false }`). */
21
+ /** Force-disable reasoning where supported, or request the lowest effort on generic effort endpoints. */
22
22
  disableReasoning?: boolean;
23
23
  serviceTier?: ServiceTier;
24
24
  }
@@ -242,9 +242,9 @@ export interface SimpleStreamOptions extends StreamOptions {
242
242
  * Force-disable reasoning for the request even when the model supports it.
243
243
  * Takes precedence over `reasoning`. Useful for fast utility calls
244
244
  * (e.g. title generation) where the model would otherwise burn the entire
245
- * output budget on internal thinking. Currently honored by OpenRouter
246
- * (sends `reasoning: { enabled: false }`); other providers already behave
247
- * this way when `reasoning` is undefined.
245
+ * output budget on internal thinking. Provider support is format-specific:
246
+ * some transports can disable reasoning directly, while generic
247
+ * effort-based OpenAI-compatible endpoints use the lowest supported effort.
248
248
  */
249
249
  disableReasoning?: boolean;
250
250
  /**
@@ -0,0 +1 @@
1
+ export declare const loginDeepSeek: (options: import("./types").OAuthController) => Promise<string>;
@@ -8,6 +8,13 @@ export type OpenAICodexLoginOptions = OAuthController & {
8
8
  originator?: string;
9
9
  };
10
10
  export declare function loginOpenAICodex(options: OpenAICodexLoginOptions): Promise<OAuthCredentials>;
11
+ /**
12
+ * Login with OpenAI Codex using the device-code (headless) flow.
13
+ *
14
+ * Avoids a local callback server entirely — useful when port 1455 is unavailable
15
+ * or when the browser callback flow fails with 403 (e.g. network/proxy issues).
16
+ */
17
+ export declare function loginOpenAICodexDevice(ctrl: OAuthController): Promise<OAuthCredentials>;
11
18
  /**
12
19
  * Refresh OpenAI Codex OAuth token
13
20
  */
@@ -7,7 +7,7 @@ export type OAuthCredentials = {
7
7
  email?: string;
8
8
  accountId?: string;
9
9
  };
10
- export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai";
10
+ export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "deepseek" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "openai-codex-device" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai";
11
11
  export type OAuthProviderId = OAuthProvider | (string & {});
12
12
  export type OAuthPrompt = {
13
13
  message: string;
@@ -1,8 +1,8 @@
1
1
  /**
2
2
  * Xiaomi MiMo login flow.
3
3
  *
4
- * Xiaomi MiMo provides Anthropic-compatible models via
5
- * https://api.xiaomimimo.com/anthropic.
4
+ * Xiaomi MiMo provides OpenAI-compatible models via
5
+ * https://api.xiaomimimo.com/v1.
6
6
  *
7
7
  * This is not OAuth - it's a simple API key flow:
8
8
  * 1. Open browser to Xiaomi MiMo API key console
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "15.2.3",
4
+ "version": "15.3.0",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -43,7 +43,7 @@
43
43
  "dependencies": {
44
44
  "@anthropic-ai/sdk": "^0.94.0",
45
45
  "@bufbuild/protobuf": "^2.12.0",
46
- "@oh-my-pi/pi-utils": "15.2.3",
46
+ "@oh-my-pi/pi-utils": "15.3.0",
47
47
  "openai": "^6.36.0",
48
48
  "partial-json": "^0.1.7",
49
49
  "zod": "4.4.3"
@@ -29,6 +29,8 @@ import { kimiUsageProvider } from "./usage/kimi";
29
29
  import { codexRankingStrategy, openaiCodexUsageProvider } from "./usage/openai-codex";
30
30
  import { zaiUsageProvider } from "./usage/zai";
31
31
  import { getOAuthApiKey, getOAuthProvider, refreshOAuthToken } from "./utils/oauth";
32
+ import { loginDeepSeek } from "./utils/oauth/deepseek";
33
+ import { loginOpenAICodexDevice } from "./utils/oauth/openai-codex";
32
34
  import type { OAuthController, OAuthCredentials, OAuthProvider, OAuthProviderId } from "./utils/oauth/types";
33
35
 
34
36
  // ─────────────────────────────────────────────────────────────────────────────
@@ -1298,6 +1300,14 @@ export class AuthStorage {
1298
1300
  });
1299
1301
  break;
1300
1302
  }
1303
+ case "openai-codex-device": {
1304
+ // Device/headless flow — stores credentials under "openai-codex" so the
1305
+ // provider can pick them up without a separate provider configuration.
1306
+ const deviceCredentials = await loginOpenAICodexDevice(ctrl);
1307
+ const newCredential: OAuthCredential = { type: "oauth", ...deviceCredentials };
1308
+ await this.#upsertOAuthCredential("openai-codex", newCredential);
1309
+ return;
1310
+ }
1301
1311
  case "gitlab-duo": {
1302
1312
  const { loginGitLabDuo } = await import("./utils/oauth/gitlab-duo");
1303
1313
  credentials = await loginGitLabDuo({
@@ -1369,6 +1379,11 @@ export class AuthStorage {
1369
1379
  await saveApiKeyCredential(apiKey);
1370
1380
  return;
1371
1381
  }
1382
+ case "deepseek": {
1383
+ const apiKey = await loginDeepSeek(ctrl);
1384
+ await saveApiKeyCredential(apiKey);
1385
+ return;
1386
+ }
1372
1387
  case "fireworks": {
1373
1388
  const { loginFireworks } = await import("./utils/oauth/fireworks");
1374
1389
  const apiKey = await loginFireworks(ctrl);
package/src/cli.ts CHANGED
@@ -109,6 +109,7 @@ Providers:
109
109
  kagi Kagi
110
110
  tavily Tavily
111
111
  zai Z.AI (GLM Coding Plan)
112
+ deepseek DeepSeek
112
113
  nanogpt NanoGPT
113
114
  minimax-code MiniMax Coding Plan (International)
114
115
  minimax-code-cn MiniMax Coding Plan (China)
package/src/models.json CHANGED
@@ -52450,9 +52450,9 @@
52450
52450
  "mimo-v2-flash": {
52451
52451
  "id": "mimo-v2-flash",
52452
52452
  "name": "MiMo-V2-Flash",
52453
- "api": "anthropic-messages",
52453
+ "api": "openai-completions",
52454
52454
  "provider": "xiaomi",
52455
- "baseUrl": "https://api.xiaomimimo.com/anthropic",
52455
+ "baseUrl": "https://api.xiaomimimo.com/v1",
52456
52456
  "reasoning": true,
52457
52457
  "input": [
52458
52458
  "text"
@@ -52474,9 +52474,9 @@
52474
52474
  "mimo-v2-omni": {
52475
52475
  "id": "mimo-v2-omni",
52476
52476
  "name": "MiMo-V2-Omni",
52477
- "api": "anthropic-messages",
52477
+ "api": "openai-completions",
52478
52478
  "provider": "xiaomi",
52479
- "baseUrl": "https://api.xiaomimimo.com/anthropic",
52479
+ "baseUrl": "https://api.xiaomimimo.com/v1",
52480
52480
  "reasoning": true,
52481
52481
  "input": [
52482
52482
  "text",
@@ -52499,9 +52499,9 @@
52499
52499
  "mimo-v2-pro": {
52500
52500
  "id": "mimo-v2-pro",
52501
52501
  "name": "MiMo-V2-Pro",
52502
- "api": "anthropic-messages",
52502
+ "api": "openai-completions",
52503
52503
  "provider": "xiaomi",
52504
- "baseUrl": "https://api.xiaomimimo.com/anthropic",
52504
+ "baseUrl": "https://api.xiaomimimo.com/v1",
52505
52505
  "reasoning": true,
52506
52506
  "input": [
52507
52507
  "text"
@@ -52523,9 +52523,9 @@
52523
52523
  "mimo-v2.5": {
52524
52524
  "id": "mimo-v2.5",
52525
52525
  "name": "MiMo-V2.5",
52526
- "api": "anthropic-messages",
52526
+ "api": "openai-completions",
52527
52527
  "provider": "xiaomi",
52528
- "baseUrl": "https://api.xiaomimimo.com/anthropic",
52528
+ "baseUrl": "https://api.xiaomimimo.com/v1",
52529
52529
  "reasoning": true,
52530
52530
  "input": [
52531
52531
  "text",
@@ -52548,9 +52548,9 @@
52548
52548
  "mimo-v2.5-pro": {
52549
52549
  "id": "mimo-v2.5-pro",
52550
52550
  "name": "MiMo-V2.5-Pro",
52551
- "api": "anthropic-messages",
52551
+ "api": "openai-completions",
52552
52552
  "provider": "xiaomi",
52553
- "baseUrl": "https://api.xiaomimimo.com/anthropic",
52553
+ "baseUrl": "https://api.xiaomimimo.com/v1",
52554
52554
  "reasoning": true,
52555
52555
  "input": [
52556
52556
  "text"
@@ -1408,28 +1408,26 @@ export interface XiaomiModelManagerConfig {
1408
1408
 
1409
1409
  export function xiaomiModelManagerOptions(
1410
1410
  config?: XiaomiModelManagerConfig,
1411
- ): ModelManagerOptions<"anthropic-messages"> {
1411
+ ): ModelManagerOptions<"openai-completions"> {
1412
1412
  const apiKey = config?.apiKey;
1413
1413
  // Xiaomi splits API keys across two backends: standard `sk-` keys hit
1414
- // api.xiaomimimo.com; "token plan" `tp-` keys hit the EU token-plan host.
1415
- // Both expose the same Anthropic-compat layout under /anthropic/v1/*.
1416
- const defaultBaseUrl = apiKey?.startsWith("tp-")
1417
- ? "https://token-plan-ams.xiaomimimo.com/anthropic"
1418
- : "https://api.xiaomimimo.com/anthropic";
1419
- const baseUrl = normalizeAnthropicBaseUrl(config?.baseUrl, defaultBaseUrl);
1420
- // Xiaomi hosts chat completions under /anthropic/* but exposes model
1421
- // discovery at the OpenAI-style /v1/models endpoint on the root host.
1422
- const discoveryRoot = baseUrl.endsWith("/anthropic") ? baseUrl.slice(0, -"/anthropic".length) : baseUrl;
1423
- const discoveryBaseUrl = toAnthropicDiscoveryBaseUrl(discoveryRoot);
1424
- const references = createBundledReferenceMap<"anthropic-messages">("xiaomi");
1414
+ // api.xiaomimimo.com; "token plan" `tp-` keys hit either the SG or EU
1415
+ // token-plan host. Try SGP first; if discovery fails, retry AMS.
1416
+ const TOKEN_PLAN_SGP_BASE_URL = "https://token-plan-sgp.xiaomimimo.com/v1";
1417
+ const TOKEN_PLAN_AMS_BASE_URL = "https://token-plan-ams.xiaomimimo.com/v1";
1418
+ const defaultBaseUrl = apiKey?.startsWith("tp-") ? TOKEN_PLAN_SGP_BASE_URL : "https://api.xiaomimimo.com/v1";
1419
+ // Token-plan keys always use the TP baseUrl; config?.baseUrl (from catalog)
1420
+ // would incorrectly pin to the standard endpoint (api.xiaomimimo.com).
1421
+ const baseUrl = apiKey?.startsWith("tp-") ? defaultBaseUrl : (config?.baseUrl ?? defaultBaseUrl);
1422
+ const references = createBundledReferenceMap<"openai-completions">("xiaomi");
1425
1423
  return {
1426
1424
  providerId: "xiaomi",
1427
1425
  ...(apiKey && {
1428
- fetchDynamicModels: () =>
1429
- fetchOpenAICompatibleModels({
1430
- api: "anthropic-messages",
1426
+ fetchDynamicModels: async () => {
1427
+ const sgpResult = await fetchOpenAICompatibleModels({
1428
+ api: "openai-completions",
1431
1429
  provider: "xiaomi",
1432
- baseUrl: discoveryBaseUrl,
1430
+ baseUrl,
1433
1431
  apiKey,
1434
1432
  filterModel: (_entry, model) => !model.id.includes("-tts"),
1435
1433
  mapModel: (entry, defaults) => {
@@ -1438,10 +1436,29 @@ export function xiaomiModelManagerOptions(
1438
1436
  return {
1439
1437
  ...model,
1440
1438
  name: toModelName(entry.display_name, model.name),
1441
- baseUrl,
1442
1439
  };
1443
1440
  },
1444
- }),
1441
+ });
1442
+ if (sgpResult || !apiKey?.startsWith("tp-")) {
1443
+ return sgpResult;
1444
+ }
1445
+ // Token-plan discovery failed with SGP; retry with AMS
1446
+ return fetchOpenAICompatibleModels({
1447
+ api: "openai-completions",
1448
+ provider: "xiaomi",
1449
+ baseUrl: TOKEN_PLAN_AMS_BASE_URL,
1450
+ apiKey,
1451
+ filterModel: (_entry, model) => !model.id.includes("-tts"),
1452
+ mapModel: (entry, defaults) => {
1453
+ const reference = references.get(defaults.id);
1454
+ const model = mapWithBundledReference(entry, defaults, reference);
1455
+ return {
1456
+ ...model,
1457
+ name: toModelName(entry.display_name, model.name),
1458
+ };
1459
+ },
1460
+ });
1461
+ },
1445
1462
  }),
1446
1463
  };
1447
1464
  }
@@ -171,7 +171,13 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
171
171
  high: "high",
172
172
  xhigh: "max",
173
173
  } satisfies Partial<Record<OpenAIReasoningEffort, string>>)
174
- : {};
174
+ : isFireworks
175
+ ? ({
176
+ // Fireworks' OpenAI-compatible endpoint rejects OpenAI's
177
+ // `minimal` literal but accepts `none` for the lowest setting.
178
+ minimal: "none",
179
+ } satisfies Partial<Record<OpenAIReasoningEffort, string>>)
180
+ : {};
175
181
 
176
182
  return {
177
183
  supportsStore: !isNonStandard,
@@ -10,7 +10,7 @@ import type {
10
10
  ChatCompletionToolMessageParam,
11
11
  } from "openai/resources/chat/completions";
12
12
  import packageJson from "../../package.json" with { type: "json" };
13
- import type { Effort } from "../model-thinking";
13
+ import { type Effort, getSupportedEfforts } from "../model-thinking";
14
14
  import { calculateCost } from "../models";
15
15
  import { getEnvApiKey } from "../stream";
16
16
  import {
@@ -219,7 +219,7 @@ export function isOpenAICompletionsProgressChunk(chunk: unknown): boolean {
219
219
  export interface OpenAICompletionsOptions extends StreamOptions {
220
220
  toolChoice?: ToolChoice;
221
221
  reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
222
- /** Force-disable reasoning for OpenRouter-format requests (sends `reasoning: { enabled: false }`). */
222
+ /** Force-disable reasoning where supported, or request the lowest effort on generic effort endpoints. */
223
223
  disableReasoning?: boolean;
224
224
  serviceTier?: ServiceTier;
225
225
  }
@@ -1177,6 +1177,21 @@ function buildParams(
1177
1177
  ) {
1178
1178
  // OpenAI-style reasoning_effort
1179
1179
  params.reasoning_effort = mapReasoningEffort(options.reasoning, compat.reasoningEffortMap) as Effort;
1180
+ } else if (
1181
+ supportsReasoningParams &&
1182
+ options?.disableReasoning &&
1183
+ !options?.reasoning &&
1184
+ model.reasoning &&
1185
+ compat.supportsReasoningEffort
1186
+ ) {
1187
+ // Generic OpenAI-compatible effort endpoints do not expose a true off
1188
+ // switch. Use the model's lowest supported effort as the closest
1189
+ // transport-level approximation when callers request disabled reasoning.
1190
+ const minEffort = getSupportedEfforts(model)[0];
1191
+ if (minEffort === undefined) {
1192
+ throw new Error(`Model ${model.provider}/${model.id} has no supported reasoning efforts`);
1193
+ }
1194
+ params.reasoning_effort = mapReasoningEffort(minEffort, compat.reasoningEffortMap) as Effort;
1180
1195
  }
1181
1196
 
1182
1197
  if (compat.disableReasoningOnToolChoice && params.tool_choice !== undefined) {
@@ -1484,7 +1499,7 @@ export function convertMessages(
1484
1499
  } else {
1485
1500
  assistantMsg.content = [{ type: "text", text: thinkingText }];
1486
1501
  }
1487
- } else {
1502
+ } else if (compat.requiresReasoningContentForToolCalls) {
1488
1503
  // Use the signature from the first thinking block if available, but only for
1489
1504
  // recognized OpenAI-compat reasoning field names. Opaque signatures from other
1490
1505
  // providers (Anthropic encrypted, OpenAI Responses JSON) are not valid property names.
@@ -1496,7 +1511,7 @@ export function convertMessages(
1496
1511
  }
1497
1512
  }
1498
1513
 
1499
- if (compat.thinkingFormat === "openai") {
1514
+ if (compat.thinkingFormat === "openai" && compat.requiresReasoningContentForToolCalls) {
1500
1515
  const streamedReasoningField = nonEmptyThinkingBlocks[0]?.thinkingSignature;
1501
1516
  const reasoningField =
1502
1517
  streamedReasoningField === "reasoning_content" ||
@@ -391,10 +391,24 @@ function buildParams(
391
391
  const messages: ResponseInput = [...conversationMessages];
392
392
 
393
393
  const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
394
+ let systemInstructions: string | undefined;
394
395
  if (systemPrompts.length > 0) {
395
- const role: "developer" | "system" =
396
- model.reasoning && supportsDeveloperRole(resolvedBaseUrl ?? model) ? "developer" : "system";
397
- messages.unshift(...systemPrompts.map(systemPrompt => ({ role, content: systemPrompt })));
396
+ const needsDeveloperRole = model.reasoning && supportsDeveloperRole(resolvedBaseUrl ?? model);
397
+ if (needsDeveloperRole) {
398
+ // Reasoning models on known OpenAI-compatible endpoints require the
399
+ // `developer` role. Send all system prompts inline in `input`.
400
+ messages.unshift(
401
+ ...systemPrompts.map(systemPrompt => ({ role: "developer" as const, content: systemPrompt })),
402
+ );
403
+ } else {
404
+ // All other endpoints (including third-party /v1/responses proxies) use
405
+ // the canonical top-level `instructions` field so that proxies that
406
+ // reject `input[{role:"system"}]` work out of the box.
407
+ systemInstructions = systemPrompts[0];
408
+ if (systemPrompts.length > 1) {
409
+ messages.unshift(...systemPrompts.slice(1).map(p => ({ role: "system" as const, content: p })));
410
+ }
411
+ }
398
412
  }
399
413
 
400
414
  const cacheRetention = resolveCacheRetention(options?.cacheRetention);
@@ -402,6 +416,7 @@ function buildParams(
402
416
  const params: OpenAIResponsesSamplingParams = {
403
417
  model: model.id,
404
418
  input: messages,
419
+ instructions: systemInstructions,
405
420
  stream: true,
406
421
  prompt_cache_key: promptCacheKey,
407
422
  prompt_cache_retention: promptCacheKey ? getPromptCacheRetention(model.baseUrl, cacheRetention) : undefined,
package/src/types.ts CHANGED
@@ -375,9 +375,9 @@ export interface SimpleStreamOptions extends StreamOptions {
375
375
  * Force-disable reasoning for the request even when the model supports it.
376
376
  * Takes precedence over `reasoning`. Useful for fast utility calls
377
377
  * (e.g. title generation) where the model would otherwise burn the entire
378
- * output budget on internal thinking. Currently honored by OpenRouter
379
- * (sends `reasoning: { enabled: false }`); other providers already behave
380
- * this way when `reasoning` is undefined.
378
+ * output budget on internal thinking. Provider support is format-specific:
379
+ * some transports can disable reasoning directly, while generic
380
+ * effort-based OpenAI-compatible endpoints use the lowest supported effort.
381
381
  */
382
382
  disableReasoning?: boolean;
383
383
  /**
@@ -0,0 +1,16 @@
1
+ /** DeepSeek login flow (API key paste against https://api.deepseek.com). */
2
+ import { createApiKeyLogin } from "./api-key-login";
3
+
4
+ export const loginDeepSeek = createApiKeyLogin({
5
+ providerLabel: "DeepSeek",
6
+ authUrl: "https://platform.deepseek.com/api_keys",
7
+ instructions: "Create or copy your API key from the DeepSeek dashboard",
8
+ promptMessage: "Paste your DeepSeek API key",
9
+ placeholder: "sk-...",
10
+ validation: {
11
+ kind: "chat-completions",
12
+ provider: "deepseek",
13
+ baseUrl: "https://api.deepseek.com/v1",
14
+ model: "deepseek-v4-pro",
15
+ },
16
+ });
@@ -25,6 +25,11 @@ const builtInOAuthProviders: OAuthProviderInfo[] = [
25
25
  name: "ChatGPT Plus/Pro (Codex Subscription)",
26
26
  available: true,
27
27
  },
28
+ {
29
+ id: "openai-codex-device",
30
+ name: "ChatGPT Plus/Pro (Codex, headless/device)",
31
+ available: true,
32
+ },
28
33
  {
29
34
  id: "gitlab-duo",
30
35
  name: "GitLab Duo",
@@ -50,6 +55,11 @@ const builtInOAuthProviders: OAuthProviderInfo[] = [
50
55
  name: "Cerebras",
51
56
  available: true,
52
57
  },
58
+ {
59
+ id: "deepseek",
60
+ name: "DeepSeek",
61
+ available: true,
62
+ },
53
63
  {
54
64
  id: "fireworks",
55
65
  name: "Fireworks",
@@ -279,7 +289,8 @@ export async function refreshOAuthToken(
279
289
  newCredentials = await refreshAntigravityToken(credentials.refresh, credentials.projectId);
280
290
  break;
281
291
  }
282
- case "openai-codex": {
292
+ case "openai-codex":
293
+ case "openai-codex-device": {
283
294
  const { refreshOpenAICodexToken } = await import("./openai-codex");
284
295
  newCredentials = await refreshOpenAICodexToken(credentials.refresh);
285
296
  break;
@@ -1,7 +1,7 @@
1
1
  /**
2
- * OpenAI Codex (ChatGPT OAuth) flow
2
+ * OpenAI Codex (ChatGPT OAuth) flow — browser and device-code flows.
3
3
  */
4
- import { OAuthCallbackFlow } from "./callback-server";
4
+ import { OAuthCallbackFlow, type OAuthCallbackFlowOptions } from "./callback-server";
5
5
  import { generatePKCE } from "./pkce";
6
6
  import type { OAuthController, OAuthCredentials } from "./types";
7
7
 
@@ -14,6 +14,14 @@ const SCOPE = "openid profile email offline_access";
14
14
  const JWT_CLAIM_PATH = "https://api.openai.com/auth";
15
15
  const JWT_PROFILE_CLAIM = "https://api.openai.com/profile";
16
16
  const TOKEN_REQUEST_TIMEOUT_MS = 15_000;
17
+ const DEVICE_USERCODE_URL = "https://auth.openai.com/api/accounts/deviceauth/usercode";
18
+ const DEVICE_TOKEN_URL = "https://auth.openai.com/api/accounts/deviceauth/token";
19
+ const DEVICE_REDIRECT_URI = "https://auth.openai.com/deviceauth/callback";
20
+ const DEVICE_AUTH_URL = "https://auth.openai.com/codex/device";
21
+ const DEVICE_POLL_INTERVAL_MS = 5_000;
22
+ const DEVICE_POLL_SAFETY_MARGIN_MS = 3_000;
23
+ /** Upper bound on device-code polling to avoid infinite loops on server errors. */
24
+ const DEVICE_MAX_POLLS = 120;
17
25
 
18
26
  type JwtPayload = {
19
27
  [JWT_CLAIM_PATH]?: {
@@ -59,7 +67,15 @@ class OpenAICodexOAuthFlow extends OAuthCallbackFlow {
59
67
  private readonly pkce: PKCE,
60
68
  private readonly originator: string,
61
69
  ) {
62
- super(ctrl, CALLBACK_PORT, CALLBACK_PATH);
70
+ super(ctrl, {
71
+ preferredPort: CALLBACK_PORT,
72
+ callbackPath: CALLBACK_PATH,
73
+ // Enforce the fixed port: OpenAI only allows http://localhost:1455/auth/callback.
74
+ // Without this, a busy port 1455 falls back to a random port, and the token
75
+ // exchange would fail with 403 because the redirect_uri no longer matches the
76
+ // registered allowlist entry.
77
+ redirectUri: `http://localhost:${CALLBACK_PORT}${CALLBACK_PATH}`,
78
+ } satisfies OAuthCallbackFlowOptions);
63
79
  }
64
80
 
65
81
  async generateAuthUrl(state: string, redirectUri: string): Promise<{ url: string; instructions?: string }> {
@@ -100,7 +116,13 @@ async function exchangeCodeForToken(code: string, verifier: string, redirectUri:
100
116
  });
101
117
 
102
118
  if (!tokenResponse.ok) {
103
- throw new Error(`Token exchange failed: ${tokenResponse.status}`);
119
+ let detail = `${tokenResponse.status}`;
120
+ try {
121
+ const body = (await tokenResponse.json()) as { error?: string; error_description?: string };
122
+ if (body.error)
123
+ detail = `${tokenResponse.status} ${body.error}${body.error_description ? `: ${body.error_description}` : ""}`;
124
+ } catch {}
125
+ throw new Error(`Token exchange failed: ${detail}`);
104
126
  }
105
127
 
106
128
  const tokenData = (await tokenResponse.json()) as {
@@ -143,6 +165,93 @@ export async function loginOpenAICodex(options: OpenAICodexLoginOptions): Promis
143
165
  return flow.login();
144
166
  }
145
167
 
168
+ /**
169
+ * Login with OpenAI Codex using the device-code (headless) flow.
170
+ *
171
+ * Avoids a local callback server entirely — useful when port 1455 is unavailable
172
+ * or when the browser callback flow fails with 403 (e.g. network/proxy issues).
173
+ */
174
+ export async function loginOpenAICodexDevice(ctrl: OAuthController): Promise<OAuthCredentials> {
175
+ ctrl.onProgress?.("Initiating device authorization…");
176
+
177
+ const initResponse = await fetch(DEVICE_USERCODE_URL, {
178
+ method: "POST",
179
+ headers: { "Content-Type": "application/json" },
180
+ body: JSON.stringify({ client_id: CLIENT_ID }),
181
+ signal: AbortSignal.timeout(TOKEN_REQUEST_TIMEOUT_MS),
182
+ });
183
+
184
+ if (!initResponse.ok) {
185
+ throw new Error(`Device authorization initiation failed: ${initResponse.status}`);
186
+ }
187
+
188
+ const initData = (await initResponse.json()) as {
189
+ device_auth_id?: string;
190
+ user_code?: string;
191
+ interval?: string | number;
192
+ };
193
+
194
+ if (!initData.device_auth_id || !initData.user_code) {
195
+ throw new Error("Device authorization response missing required fields");
196
+ }
197
+
198
+ const userCode = initData.user_code;
199
+ const pollIntervalMs =
200
+ (typeof initData.interval === "number"
201
+ ? initData.interval
202
+ : parseInt(String(initData.interval ?? "5"), 10) || 5) *
203
+ 1000 +
204
+ DEVICE_POLL_SAFETY_MARGIN_MS;
205
+
206
+ ctrl.onAuth?.({
207
+ url: DEVICE_AUTH_URL,
208
+ instructions: `Enter code: ${userCode}`,
209
+ });
210
+
211
+ ctrl.onProgress?.(`Waiting for browser authorization (code: ${userCode})…`);
212
+
213
+ for (let poll = 0; poll < DEVICE_MAX_POLLS; poll++) {
214
+ await Bun.sleep(poll === 0 ? Math.min(pollIntervalMs, DEVICE_POLL_INTERVAL_MS) : pollIntervalMs);
215
+
216
+ if (ctrl.signal?.aborted) {
217
+ throw new Error("Device authorization cancelled");
218
+ }
219
+
220
+ const pollResponse = await fetch(DEVICE_TOKEN_URL, {
221
+ method: "POST",
222
+ headers: { "Content-Type": "application/json" },
223
+ body: JSON.stringify({
224
+ device_auth_id: initData.device_auth_id,
225
+ user_code: userCode,
226
+ }),
227
+ signal: AbortSignal.timeout(TOKEN_REQUEST_TIMEOUT_MS),
228
+ });
229
+
230
+ // 403/404 = authorization pending, keep polling
231
+ if (pollResponse.status === 403 || pollResponse.status === 404) {
232
+ continue;
233
+ }
234
+
235
+ if (!pollResponse.ok) {
236
+ throw new Error(`Device token polling failed: ${pollResponse.status}`);
237
+ }
238
+
239
+ const pollData = (await pollResponse.json()) as {
240
+ authorization_code?: string;
241
+ code_verifier?: string;
242
+ };
243
+
244
+ if (!pollData.authorization_code || !pollData.code_verifier) {
245
+ throw new Error("Device token response missing authorization_code or code_verifier");
246
+ }
247
+
248
+ ctrl.onProgress?.("Exchanging authorization code for tokens…");
249
+ return exchangeCodeForToken(pollData.authorization_code, pollData.code_verifier, DEVICE_REDIRECT_URI);
250
+ }
251
+
252
+ throw new Error("Device authorization timed out — user did not complete login in time");
253
+ }
254
+
146
255
  /**
147
256
  * Refresh OpenAI Codex OAuth token
148
257
  */
@@ -14,6 +14,7 @@ export type OAuthProvider =
14
14
  | "cerebras"
15
15
  | "cloudflare-ai-gateway"
16
16
  | "cursor"
17
+ | "deepseek"
17
18
  | "fireworks"
18
19
  | "firepass"
19
20
  | "github-copilot"
@@ -34,6 +35,7 @@ export type OAuthProvider =
34
35
  | "ollama"
35
36
  | "ollama-cloud"
36
37
  | "openai-codex"
38
+ | "openai-codex-device"
37
39
  | "opencode-go"
38
40
  | "opencode-zen"
39
41
  | "parallel"
@@ -1,8 +1,8 @@
1
1
  /**
2
2
  * Xiaomi MiMo login flow.
3
3
  *
4
- * Xiaomi MiMo provides Anthropic-compatible models via
5
- * https://api.xiaomimimo.com/anthropic.
4
+ * Xiaomi MiMo provides OpenAI-compatible models via
5
+ * https://api.xiaomimimo.com/v1.
6
6
  *
7
7
  * This is not OAuth - it's a simple API key flow:
8
8
  * 1. Open browser to Xiaomi MiMo API key console
@@ -15,8 +15,9 @@ import type { OAuthController } from "./types";
15
15
  const PROVIDER_ID = "xiaomi";
16
16
  const PROVIDER_NAME = "Xiaomi MiMo";
17
17
  const STANDARD_AUTH_URL = "https://platform.xiaomimimo.com/#/console/api-keys";
18
- const STANDARD_API_BASE_URL = "https://api.xiaomimimo.com/anthropic";
19
- const TOKEN_PLAN_API_BASE_URL = "https://token-plan-ams.xiaomimimo.com/anthropic";
18
+ const STANDARD_API_BASE_URL = "https://api.xiaomimimo.com/v1";
19
+ const TOKEN_PLAN_SGP_API_BASE_URL = "https://token-plan-sgp.xiaomimimo.com/v1";
20
+ const TOKEN_PLAN_AMS_API_BASE_URL = "https://token-plan-ams.xiaomimimo.com/v1";
20
21
  const TOKEN_PLAN_KEY_PREFIX = "tp-";
21
22
  const STANDARD_VALIDATION_MODEL = "mimo-v2-flash";
22
23
  const TOKEN_PLAN_VALIDATION_MODEL = "mimo-v2.5";
@@ -25,50 +26,81 @@ function isTokenPlanKey(apiKey: string): boolean {
25
26
  return apiKey.startsWith(TOKEN_PLAN_KEY_PREFIX);
26
27
  }
27
28
 
28
- function resolveEndpoint(apiKey: string): { baseUrl: string; model: string } {
29
- if (isTokenPlanKey(apiKey)) {
30
- return { baseUrl: TOKEN_PLAN_API_BASE_URL, model: TOKEN_PLAN_VALIDATION_MODEL };
31
- }
32
- return { baseUrl: STANDARD_API_BASE_URL, model: STANDARD_VALIDATION_MODEL };
33
- }
34
- const ANTHROPIC_VERSION = "2023-06-01";
35
29
  const VALIDATION_TIMEOUT_MS = 15_000;
36
30
 
37
31
  async function validateXiaomiApiKey(apiKey: string, signal?: AbortSignal): Promise<void> {
38
32
  const timeoutSignal = AbortSignal.timeout(VALIDATION_TIMEOUT_MS);
39
33
  const requestSignal = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal;
40
- const { baseUrl, model } = resolveEndpoint(apiKey);
41
34
 
42
- const response = await fetch(`${baseUrl}/v1/messages`, {
43
- method: "POST",
44
- headers: {
45
- "Content-Type": "application/json",
46
- "x-api-key": apiKey,
47
- "anthropic-version": ANTHROPIC_VERSION,
48
- },
49
- body: JSON.stringify({
50
- model,
51
- max_tokens: 1,
52
- messages: [{ role: "user", content: "ping" }],
53
- }),
54
- signal: requestSignal,
55
- });
35
+ // For token-plan keys try SGP first, then AMS as fallback.
36
+ // Standard sk- keys only hit the one endpoint.
37
+ const endpoints = isTokenPlanKey(apiKey)
38
+ ? [
39
+ { baseUrl: TOKEN_PLAN_SGP_API_BASE_URL, model: TOKEN_PLAN_VALIDATION_MODEL },
40
+ { baseUrl: TOKEN_PLAN_AMS_API_BASE_URL, model: TOKEN_PLAN_VALIDATION_MODEL },
41
+ ]
42
+ : [{ baseUrl: STANDARD_API_BASE_URL, model: STANDARD_VALIDATION_MODEL }];
56
43
 
57
- if (response.ok) {
58
- return;
59
- }
44
+ let lastError: Error | null = null;
60
45
 
61
- let details = "";
62
- try {
63
- details = (await response.text()).trim();
64
- } catch {
65
- // ignore body parse errors, status is enough
66
- }
46
+ for (const ep of endpoints) {
47
+ try {
48
+ const response = await fetch(`${ep.baseUrl}/chat/completions`, {
49
+ method: "POST",
50
+ headers: {
51
+ "Content-Type": "application/json",
52
+ "x-api-key": apiKey,
53
+ },
54
+ body: JSON.stringify({
55
+ model: ep.model,
56
+ max_tokens: 1,
57
+ messages: [{ role: "user", content: "ping" }],
58
+ }),
59
+ signal: requestSignal,
60
+ });
61
+
62
+ if (response.ok) {
63
+ return;
64
+ }
67
65
 
68
- const message = details
69
- ? `${PROVIDER_NAME} API key validation failed (${response.status}): ${details}`
70
- : `${PROVIDER_NAME} API key validation failed (${response.status})`;
71
- throw new Error(message);
66
+ // 401 means this endpoint didn't accept the key; try the next one
67
+ if (response.status === 401) {
68
+ let details = "";
69
+ try {
70
+ details = (await response.text()).trim();
71
+ } catch {
72
+ // ignore body parse errors, status is enough
73
+ }
74
+ lastError = new Error(
75
+ details
76
+ ? `${PROVIDER_NAME} API key validation failed (${response.status}): ${details}`
77
+ : `${PROVIDER_NAME} API key validation failed (${response.status})`,
78
+ );
79
+ continue;
80
+ }
81
+
82
+ // Non-auth errors are real failures
83
+ let details = "";
84
+ try {
85
+ details = (await response.text()).trim();
86
+ } catch {
87
+ // ignore body parse errors, status is enough
88
+ }
89
+ const message = details
90
+ ? `${PROVIDER_NAME} API key validation failed (${response.status}): ${details}`
91
+ : `${PROVIDER_NAME} API key validation failed (${response.status})`;
92
+ throw new Error(message);
93
+ } catch (e) {
94
+ // Only re-throw AbortError when the caller explicitly cancelled.
95
+ // Timeout aborts (from AbortSignal.timeout) should fall through to
96
+ // the next endpoint so SGP→AMS fallback works during regional outages.
97
+ if (e instanceof DOMException && e.name === "AbortError" && signal?.aborted) {
98
+ throw e;
99
+ }
100
+ lastError = e instanceof Error ? e : new Error(String(e));
101
+ }
102
+ }
103
+ throw lastError ?? new Error(`${PROVIDER_NAME} API key validation failed`);
72
104
  }
73
105
 
74
106
  /**