@oh-my-pi/pi-ai 13.5.2 → 13.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/package.json +2 -2
- package/src/index.ts +1 -0
- package/src/providers/google-gemini-cli.ts +18 -2
- package/src/rate-limit-utils.ts +76 -0
- package/src/utils/validation.ts +57 -3
package/CHANGELOG.md
CHANGED
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "13.5.
|
|
4
|
+
"version": "13.5.4",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://github.com/can1357/oh-my-pi",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
"@aws-sdk/client-bedrock-runtime": "^3.998",
|
|
42
42
|
"@bufbuild/protobuf": "^2.11",
|
|
43
43
|
"@google/genai": "^1.43",
|
|
44
|
-
"@oh-my-pi/pi-utils": "13.5.
|
|
44
|
+
"@oh-my-pi/pi-utils": "13.5.4",
|
|
45
45
|
"@sinclair/typebox": "^0.34",
|
|
46
46
|
"@smithy/node-http-handler": "^4.4",
|
|
47
47
|
"ajv": "^8.18",
|
package/src/index.ts
CHANGED
|
@@ -19,6 +19,7 @@ export * from "./providers/kimi";
|
|
|
19
19
|
export * from "./providers/openai-completions";
|
|
20
20
|
export * from "./providers/openai-responses";
|
|
21
21
|
export * from "./providers/synthetic";
|
|
22
|
+
export * from "./rate-limit-utils";
|
|
22
23
|
export * from "./stream";
|
|
23
24
|
export * from "./types";
|
|
24
25
|
export * from "./usage";
|
|
@@ -22,6 +22,7 @@ import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
|
22
22
|
import { appendRawHttpRequestDumpFor400, type RawHttpRequestDump, withHttpStatus } from "../utils/http-inspector";
|
|
23
23
|
import { refreshAntigravityToken } from "../utils/oauth/google-antigravity";
|
|
24
24
|
import { refreshGoogleCloudToken } from "../utils/oauth/google-gemini-cli";
|
|
25
|
+
import { extractHttpStatusFromError } from "../utils/retry";
|
|
25
26
|
import {
|
|
26
27
|
convertMessages,
|
|
27
28
|
convertTools,
|
|
@@ -529,6 +530,12 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
529
530
|
|
|
530
531
|
// Handle 429 rate limits with time budget
|
|
531
532
|
if (response.status === 429) {
|
|
533
|
+
if (/quota|exhausted/i.test(errorText)) {
|
|
534
|
+
throw withHttpStatus(
|
|
535
|
+
new Error(`Cloud Code Assist API error (429): ${extractErrorMessage(errorText)}`),
|
|
536
|
+
429,
|
|
537
|
+
);
|
|
538
|
+
}
|
|
532
539
|
const serverDelay = extractRetryDelay(errorText, response);
|
|
533
540
|
if (serverDelay && rateLimitTimeSpent + serverDelay <= RATE_LIMIT_BUDGET_MS) {
|
|
534
541
|
rateLimitTimeSpent += serverDelay;
|
|
@@ -549,8 +556,11 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
549
556
|
const maxDelayMs = options?.maxRetryDelayMs ?? 60000;
|
|
550
557
|
if (maxDelayMs > 0 && serverDelay && serverDelay > maxDelayMs) {
|
|
551
558
|
const delaySeconds = Math.ceil(serverDelay / 1000);
|
|
552
|
-
throw
|
|
553
|
-
|
|
559
|
+
throw withHttpStatus(
|
|
560
|
+
new Error(
|
|
561
|
+
`Server requested ${delaySeconds}s retry delay (max: ${Math.ceil(maxDelayMs / 1000)}s). ${extractErrorMessage(errorText)}`,
|
|
562
|
+
),
|
|
563
|
+
response.status,
|
|
554
564
|
);
|
|
555
565
|
}
|
|
556
566
|
|
|
@@ -570,6 +580,12 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
570
580
|
throw new Error("Request was aborted");
|
|
571
581
|
}
|
|
572
582
|
}
|
|
583
|
+
|
|
584
|
+
// HTTP responses are handled inside the try block.
|
|
585
|
+
// If we intentionally throw with status metadata, don't convert it into a network retry.
|
|
586
|
+
if (extractHttpStatusFromError(error) !== undefined) {
|
|
587
|
+
throw error;
|
|
588
|
+
}
|
|
573
589
|
// Extract detailed error message from fetch errors (Node includes cause)
|
|
574
590
|
lastError = error instanceof Error ? error : new Error(String(error));
|
|
575
591
|
if (lastError.message === "fetch failed" && lastError.cause instanceof Error) {
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rate limit reason classification and backoff calculation utilities.
|
|
3
|
+
* Ported from opencode-antigravity-auth plugin for consistency.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export type RateLimitReason =
|
|
7
|
+
| "QUOTA_EXHAUSTED"
|
|
8
|
+
| "RATE_LIMIT_EXCEEDED"
|
|
9
|
+
| "MODEL_CAPACITY_EXHAUSTED"
|
|
10
|
+
| "SERVER_ERROR"
|
|
11
|
+
| "UNKNOWN";
|
|
12
|
+
|
|
13
|
+
const QUOTA_EXHAUSTED_BACKOFF_MS = 30 * 60 * 1000; // 30 min
|
|
14
|
+
const RATE_LIMIT_EXCEEDED_BACKOFF_MS = 30 * 1000; // 30s
|
|
15
|
+
const MODEL_CAPACITY_BASE_MS = 45 * 1000; // 45s base
|
|
16
|
+
const MODEL_CAPACITY_JITTER_MS = 30 * 1000; // ±15s
|
|
17
|
+
const SERVER_ERROR_BACKOFF_MS = 20 * 1000; // 20s
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Classify a rate-limit error message into a reason category.
|
|
21
|
+
* Priority order: MODEL_CAPACITY > RATE_LIMIT > QUOTA > SERVER_ERROR > UNKNOWN.
|
|
22
|
+
*
|
|
23
|
+
* "resource exhausted" maps to MODEL_CAPACITY (transient, short wait)
|
|
24
|
+
* "quota exceeded" maps to QUOTA_EXHAUSTED (long wait, switch account)
|
|
25
|
+
*/
|
|
26
|
+
export function parseRateLimitReason(errorMessage: string): RateLimitReason {
|
|
27
|
+
const lower = errorMessage.toLowerCase();
|
|
28
|
+
|
|
29
|
+
if (
|
|
30
|
+
lower.includes("capacity") ||
|
|
31
|
+
lower.includes("overloaded") ||
|
|
32
|
+
lower.includes("529") ||
|
|
33
|
+
lower.includes("503") ||
|
|
34
|
+
lower.includes("resource exhausted")
|
|
35
|
+
) {
|
|
36
|
+
return "MODEL_CAPACITY_EXHAUSTED";
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if (
|
|
40
|
+
lower.includes("per minute") ||
|
|
41
|
+
lower.includes("rate limit") ||
|
|
42
|
+
lower.includes("too many requests") ||
|
|
43
|
+
lower.includes("presque")
|
|
44
|
+
) {
|
|
45
|
+
return "RATE_LIMIT_EXCEEDED";
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if (lower.includes("exhausted") || lower.includes("quota")) {
|
|
49
|
+
return "QUOTA_EXHAUSTED";
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (lower.includes("500") || lower.includes("internal error") || lower.includes("internal server error")) {
|
|
53
|
+
return "SERVER_ERROR";
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return "UNKNOWN";
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Calculate backoff delay in ms for a given rate limit reason.
|
|
61
|
+
* MODEL_CAPACITY gets jitter to prevent thundering herd.
|
|
62
|
+
*/
|
|
63
|
+
export function calculateRateLimitBackoffMs(reason: RateLimitReason): number {
|
|
64
|
+
switch (reason) {
|
|
65
|
+
case "QUOTA_EXHAUSTED":
|
|
66
|
+
return QUOTA_EXHAUSTED_BACKOFF_MS;
|
|
67
|
+
case "RATE_LIMIT_EXCEEDED":
|
|
68
|
+
return RATE_LIMIT_EXCEEDED_BACKOFF_MS;
|
|
69
|
+
case "MODEL_CAPACITY_EXHAUSTED":
|
|
70
|
+
return MODEL_CAPACITY_BASE_MS + Math.random() * MODEL_CAPACITY_JITTER_MS;
|
|
71
|
+
case "SERVER_ERROR":
|
|
72
|
+
return SERVER_ERROR_BACKOFF_MS;
|
|
73
|
+
default:
|
|
74
|
+
return QUOTA_EXHAUSTED_BACKOFF_MS; // conservative default
|
|
75
|
+
}
|
|
76
|
+
}
|
package/src/utils/validation.ts
CHANGED
|
@@ -89,6 +89,55 @@ function tryParseNumberString(value: string, expectedTypes: string[]): { value:
|
|
|
89
89
|
return { value: parsed, changed: true };
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
+
function tryParseLeadingJsonContainer(value: string): unknown | undefined {
|
|
93
|
+
const firstChar = value[0];
|
|
94
|
+
const closingChar = firstChar === "{" ? "}" : firstChar === "[" ? "]" : undefined;
|
|
95
|
+
if (!closingChar) return undefined;
|
|
96
|
+
|
|
97
|
+
let depth = 0;
|
|
98
|
+
let inString = false;
|
|
99
|
+
let escaped = false;
|
|
100
|
+
|
|
101
|
+
for (let index = 0; index < value.length; index += 1) {
|
|
102
|
+
const char = value[index];
|
|
103
|
+
|
|
104
|
+
if (inString) {
|
|
105
|
+
if (escaped) {
|
|
106
|
+
escaped = false;
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
if (char === "\\") {
|
|
110
|
+
escaped = true;
|
|
111
|
+
continue;
|
|
112
|
+
}
|
|
113
|
+
if (char === '"') inString = false;
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if (char === '"') {
|
|
118
|
+
inString = true;
|
|
119
|
+
continue;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (char === firstChar) {
|
|
123
|
+
depth += 1;
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (char !== closingChar) continue;
|
|
128
|
+
depth -= 1;
|
|
129
|
+
if (depth !== 0) continue;
|
|
130
|
+
|
|
131
|
+
try {
|
|
132
|
+
return JSON.parse(value.slice(0, index + 1)) as unknown;
|
|
133
|
+
} catch {
|
|
134
|
+
return undefined;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return undefined;
|
|
139
|
+
}
|
|
140
|
+
|
|
92
141
|
/**
|
|
93
142
|
* Attempts to parse a string as JSON if it looks like a JSON literal and
|
|
94
143
|
* the parsed result matches one of the expected types.
|
|
@@ -112,8 +161,8 @@ function tryParseJsonForTypes(value: string, expectedTypes: string[]): { value:
|
|
|
112
161
|
}
|
|
113
162
|
|
|
114
163
|
// Quick syntactic checks to avoid unnecessary parse attempts
|
|
115
|
-
const looksJsonObject = trimmed.startsWith("{")
|
|
116
|
-
const looksJsonArray = trimmed.startsWith("[")
|
|
164
|
+
const looksJsonObject = trimmed.startsWith("{");
|
|
165
|
+
const looksJsonArray = trimmed.startsWith("[");
|
|
117
166
|
const looksJsonLiteral =
|
|
118
167
|
trimmed === "true" || trimmed === "false" || trimmed === "null" || JSON_NUMBER_PATTERN.test(trimmed);
|
|
119
168
|
|
|
@@ -128,7 +177,12 @@ function tryParseJsonForTypes(value: string, expectedTypes: string[]): { value:
|
|
|
128
177
|
return { value: parsed, changed: true };
|
|
129
178
|
}
|
|
130
179
|
} catch {
|
|
131
|
-
|
|
180
|
+
if (looksJsonObject || looksJsonArray) {
|
|
181
|
+
const parsed = tryParseLeadingJsonContainer(trimmed);
|
|
182
|
+
if (parsed !== undefined && matchesExpectedType(parsed, expectedTypes)) {
|
|
183
|
+
return { value: parsed, changed: true };
|
|
184
|
+
}
|
|
185
|
+
}
|
|
132
186
|
return { value, changed: false };
|
|
133
187
|
}
|
|
134
188
|
|