@oh-my-pi/pi-ai 12.14.2 → 12.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/providers/anthropic.ts +1 -0
- package/src/providers/azure-openai-responses.ts +1 -0
- package/src/providers/google-gemini-cli.ts +20 -6
- package/src/providers/openai-codex/response-handler.ts +4 -1
- package/src/providers/openai-codex-responses.ts +25 -8
- package/src/providers/openai-completions.ts +1 -0
- package/src/providers/openai-responses.ts +1 -0
- package/src/utils/oauth/index.ts +3 -2
- package/src/utils/oauth/openai-codex.ts +7 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@oh-my-pi/pi-ai",
|
|
3
|
-
"version": "12.
|
|
3
|
+
"version": "12.15.0",
|
|
4
4
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.ts",
|
|
@@ -63,7 +63,7 @@
|
|
|
63
63
|
"@connectrpc/connect-node": "^2.1.1",
|
|
64
64
|
"@google/genai": "^1.41.0",
|
|
65
65
|
"@mistralai/mistralai": "^1.14.0",
|
|
66
|
-
"@oh-my-pi/pi-utils": "12.
|
|
66
|
+
"@oh-my-pi/pi-utils": "12.15.0",
|
|
67
67
|
"@sinclair/typebox": "^0.34.48",
|
|
68
68
|
"@smithy/node-http-handler": "^4.4.10",
|
|
69
69
|
"ajv": "^8.18.0",
|
|
@@ -99,6 +99,7 @@ const MAX_RETRIES = 3;
|
|
|
99
99
|
const BASE_DELAY_MS = 1000;
|
|
100
100
|
const MAX_EMPTY_STREAM_RETRIES = 2;
|
|
101
101
|
const EMPTY_STREAM_BASE_DELAY_MS = 500;
|
|
102
|
+
const RATE_LIMIT_BUDGET_MS = 5 * 60 * 1000;
|
|
102
103
|
const CLAUDE_THINKING_BETA_HEADER = "interleaved-thinking-2025-05-14";
|
|
103
104
|
|
|
104
105
|
/**
|
|
@@ -360,8 +361,9 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
360
361
|
let response: Response | undefined;
|
|
361
362
|
let lastError: Error | undefined;
|
|
362
363
|
let requestUrl: string | undefined;
|
|
364
|
+
let rateLimitTimeSpent = 0;
|
|
363
365
|
|
|
364
|
-
for (let attempt = 0;
|
|
366
|
+
for (let attempt = 0; ; attempt++) {
|
|
365
367
|
if (options?.signal?.aborted) {
|
|
366
368
|
throw new Error("Request was aborted");
|
|
367
369
|
}
|
|
@@ -382,13 +384,25 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
382
384
|
|
|
383
385
|
const errorText = await response.text();
|
|
384
386
|
|
|
385
|
-
//
|
|
386
|
-
if (
|
|
387
|
-
|
|
387
|
+
// Handle 429 rate limits with time budget
|
|
388
|
+
if (response.status === 429) {
|
|
389
|
+
const serverDelay = extractRetryDelay(errorText, response);
|
|
390
|
+
if (serverDelay && rateLimitTimeSpent + serverDelay <= RATE_LIMIT_BUDGET_MS) {
|
|
391
|
+
rateLimitTimeSpent += serverDelay;
|
|
392
|
+
await abortableSleep(serverDelay, options?.signal);
|
|
393
|
+
continue;
|
|
394
|
+
}
|
|
395
|
+
// Fallback: use exponential backoff if no server delay, up to MAX_RETRIES
|
|
396
|
+
if (!serverDelay && attempt < MAX_RETRIES) {
|
|
397
|
+
await abortableSleep(BASE_DELAY_MS * 2 ** attempt, options?.signal);
|
|
398
|
+
continue;
|
|
399
|
+
}
|
|
400
|
+
} else if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
|
|
401
|
+
// Non-429 retryable errors use standard attempt cap
|
|
388
402
|
const serverDelay = extractRetryDelay(errorText, response);
|
|
389
403
|
const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
|
|
390
404
|
|
|
391
|
-
// Check if server delay exceeds max allowed (default: 60s)
|
|
405
|
+
// Check if server delay exceeds max allowed (default: 60s) for non-429 errors
|
|
392
406
|
const maxDelayMs = options?.maxRetryDelayMs ?? 60000;
|
|
393
407
|
if (maxDelayMs > 0 && serverDelay && serverDelay > maxDelayMs) {
|
|
394
408
|
const delaySeconds = Math.ceil(serverDelay / 1000);
|
|
@@ -401,7 +415,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
401
415
|
continue;
|
|
402
416
|
}
|
|
403
417
|
|
|
404
|
-
// Not retryable or
|
|
418
|
+
// Not retryable or budget exceeded
|
|
405
419
|
throw new Error(`Cloud Code Assist API error (${response.status}): ${extractErrorMessage(errorText)}`);
|
|
406
420
|
} catch (error) {
|
|
407
421
|
// Check for abort - fetch throws AbortError, our code throws "Request was aborted"
|
|
@@ -47,11 +47,14 @@ export async function parseCodexError(response: Response): Promise<CodexErrorInf
|
|
|
47
47
|
const resetsAt = (err as { resets_at?: number }).resets_at ?? primary.resets_at ?? secondary.resets_at;
|
|
48
48
|
const mins = resetsAt ? Math.max(0, Math.round((resetsAt * 1000 - Date.now()) / 60000)) : undefined;
|
|
49
49
|
|
|
50
|
-
if (/usage_limit_reached|usage_not_included
|
|
50
|
+
if (/usage_limit_reached|usage_not_included/i.test(code)) {
|
|
51
51
|
const planType = (err as { plan_type?: string }).plan_type;
|
|
52
52
|
const plan = planType ? ` (${String(planType).toLowerCase()} plan)` : "";
|
|
53
53
|
const when = mins !== undefined ? ` Try again in ~${mins} min.` : "";
|
|
54
54
|
friendlyMessage = `You have hit your ChatGPT usage limit${plan}.${when}`.trim();
|
|
55
|
+
} else if (/rate_limit_exceeded/i.test(code) || response.status === 429) {
|
|
56
|
+
const when = mins !== undefined ? ` Try again in ~${mins} min.` : "";
|
|
57
|
+
friendlyMessage = `ChatGPT rate limit exceeded.${when}`.trim();
|
|
55
58
|
}
|
|
56
59
|
|
|
57
60
|
const errMessage = (err as { message?: string }).message;
|
|
@@ -1377,16 +1377,20 @@ function logCodexDebug(message: string, details?: Record<string, unknown>): void
|
|
|
1377
1377
|
console.error(`[codex] ${message}`);
|
|
1378
1378
|
}
|
|
1379
1379
|
|
|
1380
|
-
function getRetryDelayMs(
|
|
1380
|
+
function getRetryDelayMs(
|
|
1381
|
+
response: Response | null,
|
|
1382
|
+
attempt: number,
|
|
1383
|
+
errorBody?: string,
|
|
1384
|
+
): { delay: number; serverProvided: boolean } {
|
|
1381
1385
|
const retryAfter = response?.headers?.get("retry-after") || null;
|
|
1382
1386
|
if (retryAfter) {
|
|
1383
1387
|
const seconds = Number(retryAfter);
|
|
1384
1388
|
if (Number.isFinite(seconds)) {
|
|
1385
|
-
return Math.max(0, seconds * 1000);
|
|
1389
|
+
return { delay: Math.max(0, seconds * 1000), serverProvided: true };
|
|
1386
1390
|
}
|
|
1387
1391
|
const parsedDate = Date.parse(retryAfter);
|
|
1388
1392
|
if (!Number.isNaN(parsedDate)) {
|
|
1389
|
-
return Math.max(0, parsedDate - Date.now());
|
|
1393
|
+
return { delay: Math.max(0, parsedDate - Date.now()), serverProvided: true };
|
|
1390
1394
|
}
|
|
1391
1395
|
}
|
|
1392
1396
|
// Parse retry delay from error body (e.g., "Please try again in 225ms" or "Please try again in 1.5s")
|
|
@@ -1394,28 +1398,41 @@ function getRetryDelayMs(response: Response | null, attempt: number, errorBody?:
|
|
|
1394
1398
|
const msMatch = /try again in\s+(\d+(?:\.\d+)?)\s*ms/i.exec(errorBody);
|
|
1395
1399
|
if (msMatch) {
|
|
1396
1400
|
const ms = Number(msMatch[1]);
|
|
1397
|
-
if (Number.isFinite(ms)) return Math.max(ms, 100);
|
|
1401
|
+
if (Number.isFinite(ms)) return { delay: Math.max(ms, 100), serverProvided: true };
|
|
1398
1402
|
}
|
|
1399
1403
|
const sMatch = /try again in\s+(\d+(?:\.\d+)?)\s*s(?:ec)?/i.exec(errorBody);
|
|
1400
1404
|
if (sMatch) {
|
|
1401
1405
|
const s = Number(sMatch[1]);
|
|
1402
|
-
if (Number.isFinite(s)) return Math.max(s * 1000, 100);
|
|
1406
|
+
if (Number.isFinite(s)) return { delay: Math.max(s * 1000, 100), serverProvided: true };
|
|
1403
1407
|
}
|
|
1404
1408
|
}
|
|
1405
|
-
return CODEX_RETRY_DELAY_MS * (attempt + 1);
|
|
1409
|
+
return { delay: CODEX_RETRY_DELAY_MS * (attempt + 1), serverProvided: false };
|
|
1406
1410
|
}
|
|
1411
|
+
/** Max total time to spend retrying 429s with server-provided delays (5 minutes). */
|
|
1412
|
+
const CODEX_RATE_LIMIT_BUDGET_MS = 5 * 60 * 1000;
|
|
1413
|
+
|
|
1407
1414
|
async function fetchWithRetry(url: string, init: RequestInit, signal?: AbortSignal): Promise<Response> {
|
|
1408
1415
|
let attempt = 0;
|
|
1416
|
+
let rateLimitTimeSpent = 0;
|
|
1409
1417
|
while (true) {
|
|
1410
1418
|
try {
|
|
1411
1419
|
const response = await fetch(url, { ...init, signal: signal ?? init.signal });
|
|
1412
|
-
if (!CODEX_RETRYABLE_STATUS.has(response.status)
|
|
1420
|
+
if (!CODEX_RETRYABLE_STATUS.has(response.status)) {
|
|
1413
1421
|
return response;
|
|
1414
1422
|
}
|
|
1415
1423
|
if (signal?.aborted) return response;
|
|
1416
1424
|
// Read error body for retry delay parsing
|
|
1417
1425
|
const errorBody = await response.text();
|
|
1418
|
-
const delay = getRetryDelayMs(response, attempt, errorBody);
|
|
1426
|
+
const { delay, serverProvided } = getRetryDelayMs(response, attempt, errorBody);
|
|
1427
|
+
// For 429s with a server-provided delay, use a time budget instead of attempt count
|
|
1428
|
+
if (response.status === 429 && serverProvided) {
|
|
1429
|
+
if (rateLimitTimeSpent + delay > CODEX_RATE_LIMIT_BUDGET_MS) {
|
|
1430
|
+
return response;
|
|
1431
|
+
}
|
|
1432
|
+
rateLimitTimeSpent += delay;
|
|
1433
|
+
} else if (attempt >= CODEX_MAX_RETRIES) {
|
|
1434
|
+
return response;
|
|
1435
|
+
}
|
|
1419
1436
|
await abortableSleep(delay, signal);
|
|
1420
1437
|
} catch (error) {
|
|
1421
1438
|
if (attempt >= CODEX_MAX_RETRIES || signal?.aborted) {
|
package/src/utils/oauth/index.ts
CHANGED
|
@@ -380,7 +380,7 @@ export async function getOAuthApiKey(
|
|
|
380
380
|
if (Date.now() >= creds.expires) {
|
|
381
381
|
try {
|
|
382
382
|
creds = await refreshOAuthToken(provider, creds);
|
|
383
|
-
} catch {
|
|
383
|
+
} catch (refreshError) {
|
|
384
384
|
if (provider === "perplexity") {
|
|
385
385
|
const jwtExpiry = getPerplexityJwtExpiryMs(creds.access);
|
|
386
386
|
if (jwtExpiry && Date.now() < jwtExpiry) {
|
|
@@ -388,7 +388,8 @@ export async function getOAuthApiKey(
|
|
|
388
388
|
return { newCredentials: fallbackCredentials, apiKey: fallbackCredentials.access };
|
|
389
389
|
}
|
|
390
390
|
}
|
|
391
|
-
|
|
391
|
+
const reason = refreshError instanceof Error ? refreshError.message : String(refreshError);
|
|
392
|
+
throw new Error(`Failed to refresh OAuth token for ${provider}: ${reason}`);
|
|
392
393
|
}
|
|
393
394
|
}
|
|
394
395
|
// For providers that need projectId, return JSON
|
|
@@ -147,7 +147,13 @@ export async function refreshOpenAICodexToken(refreshToken: string): Promise<OAu
|
|
|
147
147
|
});
|
|
148
148
|
|
|
149
149
|
if (!response.ok) {
|
|
150
|
-
|
|
150
|
+
let detail = `${response.status}`;
|
|
151
|
+
try {
|
|
152
|
+
const body = (await response.json()) as { error?: string; error_description?: string };
|
|
153
|
+
if (body.error)
|
|
154
|
+
detail = `${response.status} ${body.error}${body.error_description ? `: ${body.error_description}` : ""}`;
|
|
155
|
+
} catch {}
|
|
156
|
+
throw new Error(`OpenAI Codex token refresh failed: ${detail}`);
|
|
151
157
|
}
|
|
152
158
|
|
|
153
159
|
const tokenData = (await response.json()) as {
|