@oh-my-pi/pi-ai 15.4.2 → 15.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,27 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.5.0] - 2026-05-26
6
+ ### Added
7
+
8
+ - Added `zhipu-coding-plan` provider for Zhipu (智谱) BigModel's domestic coding-plan SKU at `https://open.bigmodel.cn/api/coding/paas/v4`, with dynamic model discovery (`ZHIPU_API_KEY`), zai-format thinking, `reasoning_content` field, and OAuth login flow ([#1340](https://github.com/can1357/oh-my-pi/issues/1340)).
9
+
10
+ ### Removed
11
+
12
+ - Removed the `pi-ai` CLI binary (`packages/ai/src/cli.ts`) and its `bin` entry. Use the in-process equivalent in the omp coding-agent CLI: `omp auth-broker login [provider]`, `omp auth-broker logout [provider]`, and `omp auth-broker list`. The library API (`AuthStorage.login()`, `getOAuthProviders()`, etc.) is unchanged.
13
+
14
+ ### Fixed
15
+
16
+ - Fixed delayed `toolResult` emissions so real tool results are emitted in the correct assistant `toolCall` window after handoff/compaction, preventing out-of-order or orphaned tool results
17
+ - Fixed delayed `toolResult` handling for aborted calls so a late real result is emitted instead of a synthetic `aborted` result for the same `toolCallId`
18
+ - Fixed usage polling to disable credentials when OAuth refresh fails definitively (for example `invalid_grant`) and clear cached last-good usage data so stale reports no longer remain visible
19
+
20
+ ## [15.4.3] - 2026-05-26
21
+
22
+ ### Fixed
23
+
24
+ - Fixed Google Vertex model discovery to use the project-scoped OpenAI-compatible model list so Vertex Model Garden models such as GLM and Claude are available through ADC auth ([#1412](https://github.com/can1357/oh-my-pi/issues/1412)).
25
+
5
26
  ## [15.4.2] - 2026-05-26
6
27
 
7
28
  ### Fixed
package/README.md CHANGED
@@ -1057,13 +1057,14 @@ Official docs: [Application Default Credentials](https://cloud.google.com/docs/a
1057
1057
 
1058
1058
  ### CLI Login
1059
1059
 
1060
- The quickest way to authenticate:
1060
+ Authenticate via the [`omp`](https://omp.sh) coding-agent CLI, which drives this library's OAuth/API-key flows in-process and persists into `agent.db`:
1061
1061
 
1062
1062
  ```bash
1063
- bunx @oh-my-pi/pi-ai login # interactive provider selection
1064
- bunx @oh-my-pi/pi-ai login anthropic # login to specific provider
1065
- bunx @oh-my-pi/pi-ai login vllm # store vLLM API key (or placeholder for local no-auth)
1066
- bunx @oh-my-pi/pi-ai list # list available providers
1063
+ omp auth-broker login # interactive provider selection
1064
+ omp auth-broker login anthropic # login to a specific provider
1065
+ omp auth-broker login vllm # store vLLM API key (or placeholder for local no-auth)
1066
+ omp auth-broker list # list supported providers
1067
+ omp auth-broker logout # interactive — pick a stored credential to remove
1067
1068
  ```
1068
1069
 
1069
1070
  Credentials are saved to `agent.db` in the agent directory. `/login qianfan` opens the Qianfan console and stores the pasted API key.
@@ -1,4 +1,4 @@
1
- import type { AuthStorage } from "../auth-storage";
1
+ import { type AuthStorage } from "../auth-storage";
2
2
  export interface AuthBrokerRefresherOptions {
3
3
  storage: AuthStorage;
4
4
  /** Refresh credentials expiring within this window. Default 5 min. */
@@ -281,6 +281,7 @@ export type AuthStorageOptions = {
281
281
  */
282
282
  fetchUsageReports?: (signal?: AbortSignal) => Promise<UsageReport[] | null>;
283
283
  };
284
+ export declare function isDefinitiveOAuthFailure(errorMsg: string): boolean;
284
285
  type AuthApiKeyOptions = {
285
286
  baseUrl?: string;
286
287
  modelId?: string;
@@ -1,9 +1,14 @@
1
1
  import type { ModelManagerOptions } from "../model-manager";
2
+ import type { FetchImpl } from "../types";
2
3
  export interface GoogleModelManagerConfig {
3
4
  apiKey?: string;
4
5
  }
5
6
  export interface GoogleVertexModelManagerConfig {
6
7
  apiKey?: string;
8
+ project?: string;
9
+ location?: string;
10
+ signal?: AbortSignal;
11
+ fetch?: FetchImpl;
7
12
  }
8
13
  export interface GoogleAntigravityModelManagerConfig {
9
14
  oauthToken?: string;
@@ -14,6 +19,6 @@ export interface GoogleGeminiCliModelManagerConfig {
14
19
  endpoint?: string;
15
20
  }
16
21
  export declare function googleModelManagerOptions(config?: GoogleModelManagerConfig): ModelManagerOptions<"google-generative-ai">;
17
- export declare function googleVertexModelManagerOptions(_config?: GoogleVertexModelManagerConfig): ModelManagerOptions<"google-vertex">;
22
+ export declare function googleVertexModelManagerOptions(config?: GoogleVertexModelManagerConfig): ModelManagerOptions;
18
23
  export declare function googleAntigravityModelManagerOptions(config?: GoogleAntigravityModelManagerConfig): ModelManagerOptions<"google-gemini-cli">;
19
24
  export declare function googleGeminiCliModelManagerOptions(config?: GoogleGeminiCliModelManagerConfig): ModelManagerOptions<"google-gemini-cli">;
@@ -58,6 +58,11 @@ export interface DeepSeekModelManagerConfig {
58
58
  baseUrl?: string;
59
59
  }
60
60
  export declare function deepseekModelManagerOptions(config?: DeepSeekModelManagerConfig): ModelManagerOptions<"openai-completions">;
61
+ export interface ZhipuCodingPlanModelManagerConfig {
62
+ apiKey?: string;
63
+ baseUrl?: string;
64
+ }
65
+ export declare function zhipuCodingPlanModelManagerOptions(config?: ZhipuCodingPlanModelManagerConfig): ModelManagerOptions<"openai-completions">;
61
66
  export interface FireworksModelManagerConfig {
62
67
  apiKey?: string;
63
68
  baseUrl?: string;
@@ -48,7 +48,7 @@ export interface ThinkingConfig {
48
48
  /** Provider-specific transport used to encode the selected effort. */
49
49
  mode: ThinkingControlMode;
50
50
  }
51
- export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
51
+ export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "zhipu-coding-plan" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
52
52
  export type Provider = KnownProvider | string;
53
53
  import type { Effort } from "./model-thinking";
54
54
  /** Token budgets for each thinking level (token-based providers only) */
@@ -2,3 +2,4 @@ export * from "./antigravity";
2
2
  export * from "./codex";
3
3
  export * from "./gemini";
4
4
  export * from "./openai-compatible";
5
+ export * from "./vertex";
@@ -0,0 +1,25 @@
1
+ import type { FetchImpl, Model } from "../../types";
2
+ /** Configuration for Vertex AI OpenAI-compatible model discovery. */
3
+ export interface VertexDiscoveryOptions {
4
+ /** Google Cloud project ID hosting the Vertex AI endpoint. */
5
+ project: string;
6
+ /** Vertex AI location, for example `global` or `us-central1`. */
7
+ location: string;
8
+ /** Optional requested page size for model listing. */
9
+ pageSize?: number;
10
+ /** Maximum number of pages to request before stopping pagination. */
11
+ maxPages?: number;
12
+ /** Optional abort signal for HTTP requests. */
13
+ signal?: AbortSignal;
14
+ /** Optional fetch implementation override for tests. */
15
+ fetch?: FetchImpl;
16
+ }
17
+ /**
18
+ * Fetches models exposed by Vertex AI's OpenAI-compatible endpoint.
19
+ *
20
+ * Returns `null` on auth, transport, or protocol failures so callers can fall
21
+ * back to cache/static models without surfacing discovery noise at startup.
22
+ */
23
+ export declare function fetchVertexOpenAIModels(options: VertexDiscoveryOptions): Promise<Model<"openai-completions">[] | null>;
24
+ /** Returns the stable Vertex AI OpenAI-compatible endpoint base URL. */
25
+ export declare function buildVertexOpenAIBaseUrl(project: string, location: string): string;
@@ -7,7 +7,7 @@ export type OAuthCredentials = {
7
7
  email?: string;
8
8
  accountId?: string;
9
9
  };
10
- export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "deepseek" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "openai-codex-device" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai";
10
+ export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "deepseek" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "openai-codex-device" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai" | "zhipu-coding-plan";
11
11
  export type OAuthProviderId = OAuthProvider | (string & {});
12
12
  export type OAuthPrompt = {
13
13
  message: string;
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Zhipu Coding Plan login flow.
3
+ *
4
+ * Zhipu BigModel (智谱) provides an OpenAI-compatible API.
5
+ * API docs: https://docs.bigmodel.cn/cn/guide/develop/openai/introduction
6
+ *
7
+ * Simple API key flow:
8
+ * 1. User gets their API key from https://open.bigmodel.cn
9
+ * 2. User pastes the API key into the CLI
10
+ */
11
+ import type { OAuthController } from "./types";
12
+ /**
13
+ * Login to Zhipu Coding Plan.
14
+ *
15
+ * Opens browser to API keys page, prompts user to paste their API key.
16
+ * Returns the API key directly (not OAuthCredentials - this isn't OAuth).
17
+ */
18
+ export declare function loginZhipuCodingPlan(options: OAuthController): Promise<string>;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "15.4.2",
4
+ "version": "15.5.0",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -28,9 +28,6 @@
28
28
  ],
29
29
  "main": "./src/index.ts",
30
30
  "types": "./dist/types/index.d.ts",
31
- "bin": {
32
- "pi-ai": "./src/cli.ts"
33
- },
34
31
  "scripts": {
35
32
  "check": "biome check . && bun run check:types",
36
33
  "check:types": "tsgo -p tsconfig.json --noEmit",
@@ -43,7 +40,7 @@
43
40
  "dependencies": {
44
41
  "@anthropic-ai/sdk": "^0.94.0",
45
42
  "@bufbuild/protobuf": "^2.12.0",
46
- "@oh-my-pi/pi-utils": "15.4.2",
43
+ "@oh-my-pi/pi-utils": "15.5.0",
47
44
  "openai": "^6.36.0",
48
45
  "partial-json": "^0.1.7",
49
46
  "zod": "4.4.3"
@@ -10,7 +10,7 @@
10
10
  * snapshot pull surfaces a clean delete on the client.
11
11
  */
12
12
  import { logger } from "@oh-my-pi/pi-utils";
13
- import type { AuthStorage } from "../auth-storage";
13
+ import { type AuthStorage, isDefinitiveOAuthFailure } from "../auth-storage";
14
14
  import { DEFAULT_REFRESH_INTERVAL_MS, DEFAULT_REFRESH_SKEW_MS } from "./types";
15
15
 
16
16
  export interface AuthBrokerRefresherOptions {
@@ -23,16 +23,6 @@ export interface AuthBrokerRefresherOptions {
23
23
  now?: () => number;
24
24
  }
25
25
 
26
- const INVALID_GRANT_REGEX = /invalid_grant|invalid_token|revoked|unauthorized|expired.*refresh|refresh.*expired/i;
27
- const TRANSIENT_REGEX = /timeout|network|fetch failed|ECONNREFUSED/i;
28
- const HTTP_401_403_REGEX = /\b(401|403)\b/;
29
-
30
- function isDefinitiveFailure(errorMsg: string): boolean {
31
- if (INVALID_GRANT_REGEX.test(errorMsg)) return true;
32
- if (HTTP_401_403_REGEX.test(errorMsg) && !TRANSIENT_REGEX.test(errorMsg)) return true;
33
- return false;
34
- }
35
-
36
26
  export interface AuthBrokerRefresherSchedule {
37
27
  enabled: boolean;
38
28
  intervalMs: number;
@@ -113,7 +103,7 @@ export class AuthBrokerRefresher {
113
103
  await this.#storage.refreshCredentialById(id);
114
104
  } catch (error) {
115
105
  const errorMsg = String(error);
116
- if (isDefinitiveFailure(errorMsg)) {
106
+ if (isDefinitiveOAuthFailure(errorMsg)) {
117
107
  logger.warn("auth-broker refresh failed definitively; disabling credential", {
118
108
  id,
119
109
  error: errorMsg,
@@ -414,6 +414,29 @@ const OAUTH_REFRESH_SKEW_MS = 60_000;
414
414
  */
415
415
  const MAX_PENDING_DISABLED_EVENTS = 32;
416
416
 
417
+ /**
418
+ * Classify an OAuth refresh error as a definitive credential failure (the
419
+ * refresh token is dead — re-login required) versus a transient blip
420
+ * (network/5xx — retry next sweep).
421
+ *
422
+ * Anchored at module scope so all three refresh sites — in-stream
423
+ * {@link AuthStorage.getApiKey}, the usage probe in
424
+ * {@link AuthStorage.fetchUsageReports}, and the auth-broker background
425
+ * refresher — disable rows on the same criteria. A drifting classifier
426
+ * between sites would let stale last-good usage reports surface indefinitely
427
+ * while streaming requests correctly tear the row down.
428
+ */
429
+ const OAUTH_DEFINITIVE_FAILURE_REGEX =
430
+ /invalid_grant|invalid_token|revoked|unauthorized|expired.*refresh|refresh.*expired/i;
431
+ const OAUTH_TRANSIENT_FAILURE_REGEX = /timeout|network|fetch failed|ECONNREFUSED/i;
432
+ const OAUTH_HTTP_AUTH_REGEX = /\b(401|403)\b/;
433
+
434
+ export function isDefinitiveOAuthFailure(errorMsg: string): boolean {
435
+ if (OAUTH_DEFINITIVE_FAILURE_REGEX.test(errorMsg)) return true;
436
+ if (OAUTH_HTTP_AUTH_REGEX.test(errorMsg) && !OAUTH_TRANSIENT_FAILURE_REGEX.test(errorMsg)) return true;
437
+ return false;
438
+ }
439
+
417
440
  type UsageCacheEntry<T> = {
418
441
  value: T;
419
442
  expiresAt: number;
@@ -1497,6 +1520,12 @@ export class AuthStorage {
1497
1520
  await saveApiKeyCredential(apiKey);
1498
1521
  return;
1499
1522
  }
1523
+ case "zhipu-coding-plan": {
1524
+ const { loginZhipuCodingPlan } = await import("./utils/oauth/zhipu");
1525
+ const apiKey = await loginZhipuCodingPlan(ctrl);
1526
+ await saveApiKeyCredential(apiKey);
1527
+ return;
1528
+ }
1500
1529
  case "qianfan": {
1501
1530
  const { loginQianfan } = await import("./utils/oauth/qianfan");
1502
1531
  const apiKey = await loginQianfan(ctrl);
@@ -1832,9 +1861,50 @@ export class AuthStorage {
1832
1861
  credential: refreshedCredential,
1833
1862
  };
1834
1863
  } catch (error) {
1864
+ const errorMsg = String(error);
1865
+ // Definitive failure (invalid_grant / 401 not from a network blip) means
1866
+ // the refresh token itself is dead — probing with the original credential
1867
+ // will 401, the catch below will return null, and #fetchUsageCached's
1868
+ // last-good fallback will surface yesterday's report indefinitely
1869
+ // (including its already-elapsed `resetsAt`). CAS-disable the row and
1870
+ // clear the cache so the credential drops out of the report instead of
1871
+ // freezing in place until the user notices and re-logs in.
1872
+ if (isDefinitiveOAuthFailure(errorMsg)) {
1873
+ const credentialId = this.#findStoredCredentialIdForUsageCredential(
1874
+ request.provider,
1875
+ request.credential,
1876
+ );
1877
+ if (credentialId !== undefined) {
1878
+ const entries = this.#getStoredCredentials(request.provider);
1879
+ const index = entries.findIndex(entry => entry.id === credentialId);
1880
+ if (index !== -1) {
1881
+ const disabled = this.#tryDisableCredentialAtIfMatches(
1882
+ request.provider,
1883
+ index,
1884
+ refreshableCredential,
1885
+ `oauth refresh failed during usage probe: ${errorMsg}`,
1886
+ );
1887
+ if (disabled) {
1888
+ this.#usageLogger?.warn(
1889
+ "Usage credential refresh failed definitively; credential disabled",
1890
+ { provider: request.provider, credentialId, error: errorMsg },
1891
+ );
1892
+ // Neutralize last-good for this cache key: write a null
1893
+ // entry with an immediately-elapsed expiry so a future
1894
+ // getStale lookup (e.g. on re-login under the same
1895
+ // account identity) can't replay the stale report.
1896
+ this.#usageCache.set(this.#buildUsageReportCacheKey(request), {
1897
+ value: null,
1898
+ expiresAt: 0,
1899
+ });
1900
+ return null;
1901
+ }
1902
+ }
1903
+ }
1904
+ }
1835
1905
  this.#usageLogger?.debug("Usage credential refresh failed, using original credential", {
1836
1906
  provider: request.provider,
1837
- error: String(error),
1907
+ error: errorMsg,
1838
1908
  });
1839
1909
  }
1840
1910
  }
@@ -2877,9 +2947,7 @@ export class AuthStorage {
2877
2947
  const errorMsg = String(error);
2878
2948
  // Only remove credentials for definitive auth failures
2879
2949
  // Keep credentials for transient errors (network, 5xx) and block temporarily
2880
- const isDefinitiveFailure =
2881
- /invalid_grant|invalid_token|revoked|unauthorized|expired.*refresh|refresh.*expired/i.test(errorMsg) ||
2882
- (/\b(401|403)\b/.test(errorMsg) && !/timeout|network|fetch failed|ECONNREFUSED/i.test(errorMsg));
2950
+ const isDefinitiveFailure = isDefinitiveOAuthFailure(errorMsg);
2883
2951
 
2884
2952
  logger.warn("OAuth token refresh failed", {
2885
2953
  provider,