@oh-my-pi/pi-ai 12.14.2 → 12.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oh-my-pi/pi-ai",
3
- "version": "12.14.2",
3
+ "version": "12.15.0",
4
4
  "description": "Unified LLM API with automatic model discovery and provider configuration",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -63,7 +63,7 @@
63
63
  "@connectrpc/connect-node": "^2.1.1",
64
64
  "@google/genai": "^1.41.0",
65
65
  "@mistralai/mistralai": "^1.14.0",
66
- "@oh-my-pi/pi-utils": "12.14.2",
66
+ "@oh-my-pi/pi-utils": "12.15.0",
67
67
  "@sinclair/typebox": "^0.34.48",
68
68
  "@smithy/node-http-handler": "^4.4.10",
69
69
  "ajv": "^8.18.0",
@@ -629,6 +629,7 @@ function createClient(
629
629
  baseURL: config.baseURL,
630
630
  defaultHeaders: config.defaultHeaders,
631
631
  dangerouslyAllowBrowser: config.dangerouslyAllowBrowser,
632
+ maxRetries: 5,
632
633
  });
633
634
 
634
635
  return { client, isOAuthToken: config.isOAuthToken };
@@ -424,6 +424,7 @@ function createClient(model: Model<"azure-openai-responses">, apiKey: string, op
424
424
  apiKey,
425
425
  apiVersion,
426
426
  dangerouslyAllowBrowser: true,
427
+ maxRetries: 5,
427
428
  defaultHeaders: headers,
428
429
  baseURL: baseUrl,
429
430
  });
@@ -99,6 +99,7 @@ const MAX_RETRIES = 3;
99
99
  const BASE_DELAY_MS = 1000;
100
100
  const MAX_EMPTY_STREAM_RETRIES = 2;
101
101
  const EMPTY_STREAM_BASE_DELAY_MS = 500;
102
+ const RATE_LIMIT_BUDGET_MS = 5 * 60 * 1000;
102
103
  const CLAUDE_THINKING_BETA_HEADER = "interleaved-thinking-2025-05-14";
103
104
 
104
105
  /**
@@ -360,8 +361,9 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
360
361
  let response: Response | undefined;
361
362
  let lastError: Error | undefined;
362
363
  let requestUrl: string | undefined;
364
+ let rateLimitTimeSpent = 0;
363
365
 
364
- for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
366
+ for (let attempt = 0; ; attempt++) {
365
367
  if (options?.signal?.aborted) {
366
368
  throw new Error("Request was aborted");
367
369
  }
@@ -382,13 +384,25 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
382
384
 
383
385
  const errorText = await response.text();
384
386
 
385
- // Check if retryable
386
- if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
387
- // Use server-provided delay or exponential backoff
387
+ // Handle 429 rate limits with time budget
388
+ if (response.status === 429) {
389
+ const serverDelay = extractRetryDelay(errorText, response);
390
+ if (serverDelay && rateLimitTimeSpent + serverDelay <= RATE_LIMIT_BUDGET_MS) {
391
+ rateLimitTimeSpent += serverDelay;
392
+ await abortableSleep(serverDelay, options?.signal);
393
+ continue;
394
+ }
395
+ // Fallback: use exponential backoff if no server delay, up to MAX_RETRIES
396
+ if (!serverDelay && attempt < MAX_RETRIES) {
397
+ await abortableSleep(BASE_DELAY_MS * 2 ** attempt, options?.signal);
398
+ continue;
399
+ }
400
+ } else if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
401
+ // Non-429 retryable errors use standard attempt cap
388
402
  const serverDelay = extractRetryDelay(errorText, response);
389
403
  const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
390
404
 
391
- // Check if server delay exceeds max allowed (default: 60s)
405
+ // Check if server delay exceeds max allowed (default: 60s) for non-429 errors
392
406
  const maxDelayMs = options?.maxRetryDelayMs ?? 60000;
393
407
  if (maxDelayMs > 0 && serverDelay && serverDelay > maxDelayMs) {
394
408
  const delaySeconds = Math.ceil(serverDelay / 1000);
@@ -401,7 +415,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
401
415
  continue;
402
416
  }
403
417
 
404
- // Not retryable or max retries exceeded
418
+ // Not retryable or budget exceeded
405
419
  throw new Error(`Cloud Code Assist API error (${response.status}): ${extractErrorMessage(errorText)}`);
406
420
  } catch (error) {
407
421
  // Check for abort - fetch throws AbortError, our code throws "Request was aborted"
@@ -47,11 +47,14 @@ export async function parseCodexError(response: Response): Promise<CodexErrorInf
47
47
  const resetsAt = (err as { resets_at?: number }).resets_at ?? primary.resets_at ?? secondary.resets_at;
48
48
  const mins = resetsAt ? Math.max(0, Math.round((resetsAt * 1000 - Date.now()) / 60000)) : undefined;
49
49
 
50
- if (/usage_limit_reached|usage_not_included|rate_limit_exceeded/i.test(code) || response.status === 429) {
50
+ if (/usage_limit_reached|usage_not_included/i.test(code)) {
51
51
  const planType = (err as { plan_type?: string }).plan_type;
52
52
  const plan = planType ? ` (${String(planType).toLowerCase()} plan)` : "";
53
53
  const when = mins !== undefined ? ` Try again in ~${mins} min.` : "";
54
54
  friendlyMessage = `You have hit your ChatGPT usage limit${plan}.${when}`.trim();
55
+ } else if (/rate_limit_exceeded/i.test(code) || response.status === 429) {
56
+ const when = mins !== undefined ? ` Try again in ~${mins} min.` : "";
57
+ friendlyMessage = `ChatGPT rate limit exceeded.${when}`.trim();
55
58
  }
56
59
 
57
60
  const errMessage = (err as { message?: string }).message;
@@ -1377,16 +1377,20 @@ function logCodexDebug(message: string, details?: Record<string, unknown>): void
1377
1377
  console.error(`[codex] ${message}`);
1378
1378
  }
1379
1379
 
1380
- function getRetryDelayMs(response: Response | null, attempt: number, errorBody?: string): number {
1380
+ function getRetryDelayMs(
1381
+ response: Response | null,
1382
+ attempt: number,
1383
+ errorBody?: string,
1384
+ ): { delay: number; serverProvided: boolean } {
1381
1385
  const retryAfter = response?.headers?.get("retry-after") || null;
1382
1386
  if (retryAfter) {
1383
1387
  const seconds = Number(retryAfter);
1384
1388
  if (Number.isFinite(seconds)) {
1385
- return Math.max(0, seconds * 1000);
1389
+ return { delay: Math.max(0, seconds * 1000), serverProvided: true };
1386
1390
  }
1387
1391
  const parsedDate = Date.parse(retryAfter);
1388
1392
  if (!Number.isNaN(parsedDate)) {
1389
- return Math.max(0, parsedDate - Date.now());
1393
+ return { delay: Math.max(0, parsedDate - Date.now()), serverProvided: true };
1390
1394
  }
1391
1395
  }
1392
1396
  // Parse retry delay from error body (e.g., "Please try again in 225ms" or "Please try again in 1.5s")
@@ -1394,28 +1398,41 @@ function getRetryDelayMs(response: Response | null, attempt: number, errorBody?:
1394
1398
  const msMatch = /try again in\s+(\d+(?:\.\d+)?)\s*ms/i.exec(errorBody);
1395
1399
  if (msMatch) {
1396
1400
  const ms = Number(msMatch[1]);
1397
- if (Number.isFinite(ms)) return Math.max(ms, 100);
1401
+ if (Number.isFinite(ms)) return { delay: Math.max(ms, 100), serverProvided: true };
1398
1402
  }
1399
1403
  const sMatch = /try again in\s+(\d+(?:\.\d+)?)\s*s(?:ec)?/i.exec(errorBody);
1400
1404
  if (sMatch) {
1401
1405
  const s = Number(sMatch[1]);
1402
- if (Number.isFinite(s)) return Math.max(s * 1000, 100);
1406
+ if (Number.isFinite(s)) return { delay: Math.max(s * 1000, 100), serverProvided: true };
1403
1407
  }
1404
1408
  }
1405
- return CODEX_RETRY_DELAY_MS * (attempt + 1);
1409
+ return { delay: CODEX_RETRY_DELAY_MS * (attempt + 1), serverProvided: false };
1406
1410
  }
1411
+ /** Max total time to spend retrying 429s with server-provided delays (5 minutes). */
1412
+ const CODEX_RATE_LIMIT_BUDGET_MS = 5 * 60 * 1000;
1413
+
1407
1414
  async function fetchWithRetry(url: string, init: RequestInit, signal?: AbortSignal): Promise<Response> {
1408
1415
  let attempt = 0;
1416
+ let rateLimitTimeSpent = 0;
1409
1417
  while (true) {
1410
1418
  try {
1411
1419
  const response = await fetch(url, { ...init, signal: signal ?? init.signal });
1412
- if (!CODEX_RETRYABLE_STATUS.has(response.status) || attempt >= CODEX_MAX_RETRIES) {
1420
+ if (!CODEX_RETRYABLE_STATUS.has(response.status)) {
1413
1421
  return response;
1414
1422
  }
1415
1423
  if (signal?.aborted) return response;
1416
1424
  // Read error body for retry delay parsing
1417
1425
  const errorBody = await response.text();
1418
- const delay = getRetryDelayMs(response, attempt, errorBody);
1426
+ const { delay, serverProvided } = getRetryDelayMs(response, attempt, errorBody);
1427
+ // For 429s with a server-provided delay, use a time budget instead of attempt count
1428
+ if (response.status === 429 && serverProvided) {
1429
+ if (rateLimitTimeSpent + delay > CODEX_RATE_LIMIT_BUDGET_MS) {
1430
+ return response;
1431
+ }
1432
+ rateLimitTimeSpent += delay;
1433
+ } else if (attempt >= CODEX_MAX_RETRIES) {
1434
+ return response;
1435
+ }
1419
1436
  await abortableSleep(delay, signal);
1420
1437
  } catch (error) {
1421
1438
  if (attempt >= CODEX_MAX_RETRIES || signal?.aborted) {
@@ -480,6 +480,7 @@ async function createClient(
480
480
  apiKey,
481
481
  baseURL: model.baseUrl,
482
482
  dangerouslyAllowBrowser: true,
483
+ maxRetries: 5,
483
484
  defaultHeaders: headers,
484
485
  });
485
486
  }
@@ -397,6 +397,7 @@ function createClient(
397
397
  apiKey,
398
398
  baseURL: model.baseUrl,
399
399
  dangerouslyAllowBrowser: true,
400
+ maxRetries: 5,
400
401
  defaultHeaders: headers,
401
402
  });
402
403
  }
@@ -380,7 +380,7 @@ export async function getOAuthApiKey(
380
380
  if (Date.now() >= creds.expires) {
381
381
  try {
382
382
  creds = await refreshOAuthToken(provider, creds);
383
- } catch {
383
+ } catch (refreshError) {
384
384
  if (provider === "perplexity") {
385
385
  const jwtExpiry = getPerplexityJwtExpiryMs(creds.access);
386
386
  if (jwtExpiry && Date.now() < jwtExpiry) {
@@ -388,7 +388,8 @@ export async function getOAuthApiKey(
388
388
  return { newCredentials: fallbackCredentials, apiKey: fallbackCredentials.access };
389
389
  }
390
390
  }
391
- throw new Error(`Failed to refresh OAuth token for ${provider}`);
391
+ const reason = refreshError instanceof Error ? refreshError.message : String(refreshError);
392
+ throw new Error(`Failed to refresh OAuth token for ${provider}: ${reason}`);
392
393
  }
393
394
  }
394
395
  // For providers that need projectId, return JSON
@@ -147,7 +147,13 @@ export async function refreshOpenAICodexToken(refreshToken: string): Promise<OAu
147
147
  });
148
148
 
149
149
  if (!response.ok) {
150
- throw new Error(`OpenAI Codex token refresh failed: ${response.status}`);
150
+ let detail = `${response.status}`;
151
+ try {
152
+ const body = (await response.json()) as { error?: string; error_description?: string };
153
+ if (body.error)
154
+ detail = `${response.status} ${body.error}${body.error_description ? `: ${body.error_description}` : ""}`;
155
+ } catch {}
156
+ throw new Error(`OpenAI Codex token refresh failed: ${detail}`);
151
157
  }
152
158
 
153
159
  const tokenData = (await response.json()) as {