opencode-gemini-auth 1.4.3 → 1.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/plugin/notify.test.ts +144 -0
- package/src/plugin/notify.ts +89 -0
- package/src/plugin/request/prepare.ts +42 -0
- package/src/plugin/request.test.ts +30 -0
- package/src/plugin/retry/index.ts +144 -1
- package/src/plugin/retry/quota.ts +29 -10
- package/src/plugin/retry.test.ts +162 -8
- package/src/plugin/types.ts +10 -0
- package/src/plugin.ts +8 -0
package/package.json
CHANGED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test";
|
|
2
|
+
|
|
3
|
+
import { maybeShowGeminiCapacityToast, notifyInternals } from "./notify";
|
|
4
|
+
import type { PluginClient } from "./types";
|
|
5
|
+
|
|
6
|
+
function makeQuota429(reason: string): Response {
|
|
7
|
+
return new Response(
|
|
8
|
+
JSON.stringify([
|
|
9
|
+
{
|
|
10
|
+
error: {
|
|
11
|
+
message: "rate limited",
|
|
12
|
+
details: [
|
|
13
|
+
{
|
|
14
|
+
"@type": "type.googleapis.com/google.rpc.ErrorInfo",
|
|
15
|
+
reason,
|
|
16
|
+
domain: "cloudcode-pa.googleapis.com",
|
|
17
|
+
},
|
|
18
|
+
],
|
|
19
|
+
},
|
|
20
|
+
},
|
|
21
|
+
]),
|
|
22
|
+
{
|
|
23
|
+
status: 429,
|
|
24
|
+
headers: { "content-type": "application/json" },
|
|
25
|
+
},
|
|
26
|
+
);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
describe("maybeShowGeminiCapacityToast", () => {
|
|
30
|
+
const originalTestToastFlag = process.env.OPENCODE_GEMINI_TEST_TOAST;
|
|
31
|
+
|
|
32
|
+
beforeEach(() => {
|
|
33
|
+
notifyInternals.resetCooldowns();
|
|
34
|
+
delete process.env.OPENCODE_GEMINI_TEST_TOAST;
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
afterEach(() => {
|
|
38
|
+
mock.restore();
|
|
39
|
+
if (originalTestToastFlag === undefined) {
|
|
40
|
+
delete process.env.OPENCODE_GEMINI_TEST_TOAST;
|
|
41
|
+
} else {
|
|
42
|
+
process.env.OPENCODE_GEMINI_TEST_TOAST = originalTestToastFlag;
|
|
43
|
+
}
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("shows toast for MODEL_CAPACITY_EXHAUSTED", async () => {
|
|
47
|
+
const showToast = mock(async (_input: unknown) => true);
|
|
48
|
+
const client = { auth: { set: async () => {} }, tui: { showToast } } as PluginClient;
|
|
49
|
+
const response = makeQuota429("MODEL_CAPACITY_EXHAUSTED");
|
|
50
|
+
|
|
51
|
+
await maybeShowGeminiCapacityToast(client, response, "project-1", "gemini-3-flash-preview");
|
|
52
|
+
|
|
53
|
+
expect(showToast.mock.calls.length).toBe(1);
|
|
54
|
+
const firstCall = showToast.mock.calls.at(0);
|
|
55
|
+
expect(firstCall?.[0]).toEqual({
|
|
56
|
+
body: {
|
|
57
|
+
title: "Gemini Capacity Unavailable",
|
|
58
|
+
message:
|
|
59
|
+
"Google reports temporary server capacity limits for gemini-3-flash-preview. Please retry in a few seconds.",
|
|
60
|
+
variant: "warning",
|
|
61
|
+
duration: 7000,
|
|
62
|
+
},
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it("does not show toast for non-capacity 429 reasons", async () => {
|
|
67
|
+
const showToast = mock(async (_input: unknown) => true);
|
|
68
|
+
const client = { auth: { set: async () => {} }, tui: { showToast } } as PluginClient;
|
|
69
|
+
const response = makeQuota429("RATE_LIMIT_EXCEEDED");
|
|
70
|
+
|
|
71
|
+
await maybeShowGeminiCapacityToast(client, response, "project-1", "gemini-3-flash-preview");
|
|
72
|
+
|
|
73
|
+
expect(showToast.mock.calls.length).toBe(0);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it("dedupes toasts within cooldown window", async () => {
|
|
77
|
+
const showToast = mock(async (_input: unknown) => true);
|
|
78
|
+
const client = { auth: { set: async () => {} }, tui: { showToast } } as PluginClient;
|
|
79
|
+
|
|
80
|
+
await maybeShowGeminiCapacityToast(
|
|
81
|
+
client,
|
|
82
|
+
makeQuota429("MODEL_CAPACITY_EXHAUSTED"),
|
|
83
|
+
"project-1",
|
|
84
|
+
"gemini-3-flash-preview",
|
|
85
|
+
);
|
|
86
|
+
await maybeShowGeminiCapacityToast(
|
|
87
|
+
client,
|
|
88
|
+
makeQuota429("MODEL_CAPACITY_EXHAUSTED"),
|
|
89
|
+
"project-1",
|
|
90
|
+
"gemini-3-flash-preview",
|
|
91
|
+
);
|
|
92
|
+
|
|
93
|
+
expect(showToast.mock.calls.length).toBe(1);
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
describe("maybeShowGeminiTestToast", () => {
|
|
98
|
+
const originalTestToastFlag = process.env.OPENCODE_GEMINI_TEST_TOAST;
|
|
99
|
+
|
|
100
|
+
beforeEach(() => {
|
|
101
|
+
notifyInternals.resetCooldowns();
|
|
102
|
+
delete process.env.OPENCODE_GEMINI_TEST_TOAST;
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
afterEach(() => {
|
|
106
|
+
mock.restore();
|
|
107
|
+
if (originalTestToastFlag === undefined) {
|
|
108
|
+
delete process.env.OPENCODE_GEMINI_TEST_TOAST;
|
|
109
|
+
} else {
|
|
110
|
+
process.env.OPENCODE_GEMINI_TEST_TOAST = originalTestToastFlag;
|
|
111
|
+
}
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
it("does not show test toast when flag is not enabled", async () => {
|
|
115
|
+
const { maybeShowGeminiTestToast } = await import("./notify");
|
|
116
|
+
const showToast = mock(async (_input: unknown) => true);
|
|
117
|
+
const client = { auth: { set: async () => {} }, tui: { showToast } } as PluginClient;
|
|
118
|
+
|
|
119
|
+
await maybeShowGeminiTestToast(client, "project-1");
|
|
120
|
+
|
|
121
|
+
expect(showToast.mock.calls.length).toBe(0);
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
it("shows test toast once per project when flag is enabled", async () => {
|
|
125
|
+
process.env.OPENCODE_GEMINI_TEST_TOAST = "1";
|
|
126
|
+
const { maybeShowGeminiTestToast } = await import("./notify");
|
|
127
|
+
const showToast = mock(async (_input: unknown) => true);
|
|
128
|
+
const client = { auth: { set: async () => {} }, tui: { showToast } } as PluginClient;
|
|
129
|
+
|
|
130
|
+
await maybeShowGeminiTestToast(client, "project-1");
|
|
131
|
+
await maybeShowGeminiTestToast(client, "project-1");
|
|
132
|
+
|
|
133
|
+
expect(showToast.mock.calls.length).toBe(1);
|
|
134
|
+
const firstCall = showToast.mock.calls.at(0);
|
|
135
|
+
expect(firstCall?.[0]).toEqual({
|
|
136
|
+
body: {
|
|
137
|
+
title: "Gemini Toast Test",
|
|
138
|
+
message: "Temporary test toast from opencode-gemini-auth.",
|
|
139
|
+
variant: "info",
|
|
140
|
+
duration: 5000,
|
|
141
|
+
},
|
|
142
|
+
});
|
|
143
|
+
});
|
|
144
|
+
});
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import { classifyQuotaResponse } from "./retry/quota";
|
|
2
|
+
import { isGeminiDebugEnabled, logGeminiDebugMessage } from "./debug";
|
|
3
|
+
import type { PluginClient } from "./types";
|
|
4
|
+
|
|
5
|
+
const MODEL_CAPACITY_TOAST_COOLDOWN_MS = 30_000;
|
|
6
|
+
const modelCapacityToastCooldownByKey = new Map<string, number>();
|
|
7
|
+
const TEST_TOAST_FLAG = "OPENCODE_GEMINI_TEST_TOAST";
|
|
8
|
+
const testToastShownByProject = new Set<string>();
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Emits a user-facing toast for server-side Gemini model capacity exhaustion.
|
|
12
|
+
*
|
|
13
|
+
* We deliberately notify only `MODEL_CAPACITY_EXHAUSTED` (not generic 429s)
|
|
14
|
+
* so we do not mislabel account-level quota limits as backend incidents.
|
|
15
|
+
*/
|
|
16
|
+
export async function maybeShowGeminiCapacityToast(
|
|
17
|
+
client: PluginClient,
|
|
18
|
+
response: Response,
|
|
19
|
+
projectId: string,
|
|
20
|
+
requestedModel?: string,
|
|
21
|
+
): Promise<void> {
|
|
22
|
+
if (response.status !== 429 || !client.tui?.showToast) {
|
|
23
|
+
return;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const quotaContext = await classifyQuotaResponse(response);
|
|
27
|
+
if (quotaContext?.reason !== "MODEL_CAPACITY_EXHAUSTED") {
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const model = requestedModel ?? "the selected model";
|
|
32
|
+
const toastKey = `${projectId}|${model}|MODEL_CAPACITY_EXHAUSTED`;
|
|
33
|
+
const now = Date.now();
|
|
34
|
+
const cooldownUntil = modelCapacityToastCooldownByKey.get(toastKey) ?? 0;
|
|
35
|
+
if (cooldownUntil > now) {
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
modelCapacityToastCooldownByKey.set(toastKey, now + MODEL_CAPACITY_TOAST_COOLDOWN_MS);
|
|
39
|
+
|
|
40
|
+
await client.tui.showToast({
|
|
41
|
+
body: {
|
|
42
|
+
title: "Gemini Capacity Unavailable",
|
|
43
|
+
message: `Google reports temporary server capacity limits for ${model}. Please retry in a few seconds.`,
|
|
44
|
+
variant: "warning",
|
|
45
|
+
duration: 7000,
|
|
46
|
+
},
|
|
47
|
+
});
|
|
48
|
+
if (isGeminiDebugEnabled()) {
|
|
49
|
+
logGeminiDebugMessage(`Toast: emitted capacity warning for model=${model} project=${projectId}`);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Temporary smoke-test toast, enabled only with OPENCODE_GEMINI_TEST_TOAST=1.
|
|
55
|
+
* Emits once per project per process lifetime to avoid toast spam.
|
|
56
|
+
*/
|
|
57
|
+
export async function maybeShowGeminiTestToast(
|
|
58
|
+
client: PluginClient,
|
|
59
|
+
projectId: string,
|
|
60
|
+
): Promise<void> {
|
|
61
|
+
if (process.env[TEST_TOAST_FLAG]?.trim() !== "1" || !client.tui?.showToast) {
|
|
62
|
+
return;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const key = projectId || "global";
|
|
66
|
+
if (testToastShownByProject.has(key)) {
|
|
67
|
+
return;
|
|
68
|
+
}
|
|
69
|
+
testToastShownByProject.add(key);
|
|
70
|
+
|
|
71
|
+
await client.tui.showToast({
|
|
72
|
+
body: {
|
|
73
|
+
title: "Gemini Toast Test",
|
|
74
|
+
message: "Temporary test toast from opencode-gemini-auth.",
|
|
75
|
+
variant: "info",
|
|
76
|
+
duration: 5000,
|
|
77
|
+
},
|
|
78
|
+
});
|
|
79
|
+
if (isGeminiDebugEnabled()) {
|
|
80
|
+
logGeminiDebugMessage(`Toast: emitted test toast (project=${key})`);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export const notifyInternals = {
|
|
85
|
+
resetCooldowns() {
|
|
86
|
+
modelCapacityToastCooldownByKey.clear();
|
|
87
|
+
testToastShownByProject.clear();
|
|
88
|
+
},
|
|
89
|
+
};
|
|
@@ -116,6 +116,7 @@ function transformRequestBody(
|
|
|
116
116
|
normalizeThinking(requestPayload);
|
|
117
117
|
normalizeSystemInstruction(requestPayload);
|
|
118
118
|
normalizeCachedContent(requestPayload);
|
|
119
|
+
stripThoughtPartsFromHistory(requestPayload);
|
|
119
120
|
|
|
120
121
|
if ("model" in requestPayload) {
|
|
121
122
|
delete requestPayload.model;
|
|
@@ -188,3 +189,44 @@ function normalizeCachedContent(requestPayload: Record<string, unknown>): void {
|
|
|
188
189
|
delete requestPayload.extra_body;
|
|
189
190
|
}
|
|
190
191
|
}
|
|
192
|
+
|
|
193
|
+
function stripThoughtPartsFromHistory(requestPayload: Record<string, unknown>): void {
|
|
194
|
+
const contents = requestPayload.contents;
|
|
195
|
+
if (!Array.isArray(contents)) {
|
|
196
|
+
return;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const sanitizedContents: unknown[] = [];
|
|
200
|
+
for (const content of contents) {
|
|
201
|
+
if (!content || typeof content !== "object") {
|
|
202
|
+
sanitizedContents.push(content);
|
|
203
|
+
continue;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
const record = content as Record<string, unknown>;
|
|
207
|
+
const parts = Array.isArray(record.parts) ? record.parts : undefined;
|
|
208
|
+
if (!parts) {
|
|
209
|
+
sanitizedContents.push(content);
|
|
210
|
+
continue;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
const filteredParts = parts.filter((part) => {
|
|
214
|
+
if (!part || typeof part !== "object") {
|
|
215
|
+
return true;
|
|
216
|
+
}
|
|
217
|
+
return (part as Record<string, unknown>).thought !== true;
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
// Drop empty model turns produced by interrupted thought streaming.
|
|
221
|
+
if (filteredParts.length === 0 && record.role === "model") {
|
|
222
|
+
continue;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
sanitizedContents.push({
|
|
226
|
+
...record,
|
|
227
|
+
parts: filteredParts,
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
requestPayload.contents = sanitizedContents;
|
|
232
|
+
}
|
|
@@ -55,6 +55,36 @@ describe("request helpers", () => {
|
|
|
55
55
|
expect((parsed.request as Record<string, unknown>).system_instruction).toBeUndefined();
|
|
56
56
|
});
|
|
57
57
|
|
|
58
|
+
it("drops thought-only model parts from replayed history", () => {
|
|
59
|
+
const input =
|
|
60
|
+
"https://generativelanguage.googleapis.com/v1beta/models/gemini-3-flash-preview:streamGenerateContent";
|
|
61
|
+
const init: RequestInit = {
|
|
62
|
+
method: "POST",
|
|
63
|
+
headers: {
|
|
64
|
+
"Content-Type": "application/json",
|
|
65
|
+
},
|
|
66
|
+
body: JSON.stringify({
|
|
67
|
+
contents: [
|
|
68
|
+
{ role: "user", parts: [{ text: "give me a joke" }] },
|
|
69
|
+
{
|
|
70
|
+
role: "model",
|
|
71
|
+
parts: [{ text: "internal thought", thought: true }],
|
|
72
|
+
},
|
|
73
|
+
{ role: "user", parts: [{ text: "well?" }] },
|
|
74
|
+
],
|
|
75
|
+
}),
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
const result = prepareGeminiRequest(input, init, "token-123", "project-456");
|
|
79
|
+
const parsed = JSON.parse(result.init.body as string) as Record<string, unknown>;
|
|
80
|
+
const request = parsed.request as Record<string, unknown>;
|
|
81
|
+
const contents = request.contents as Array<Record<string, unknown>>;
|
|
82
|
+
|
|
83
|
+
expect(contents.length).toBe(2);
|
|
84
|
+
expect(contents[0]?.role).toBe("user");
|
|
85
|
+
expect(contents[1]?.role).toBe("user");
|
|
86
|
+
});
|
|
87
|
+
|
|
58
88
|
it("maps traceId to responseId for JSON responses", async () => {
|
|
59
89
|
const response = new Response(
|
|
60
90
|
JSON.stringify({
|
|
@@ -8,6 +8,11 @@ import {
|
|
|
8
8
|
wait,
|
|
9
9
|
} from "./helpers";
|
|
10
10
|
import { classifyQuotaResponse, retryInternals } from "./quota";
|
|
11
|
+
import { isGeminiDebugEnabled, logGeminiDebugMessage } from "../debug";
|
|
12
|
+
|
|
13
|
+
const retryCooldownByKey = new Map<string, number>();
|
|
14
|
+
const RETRY_IN_FLIGHT_LOG_INTERVAL_MS = 5000;
|
|
15
|
+
const MODEL_CAPACITY_COOLDOWN_MS = 8000;
|
|
11
16
|
|
|
12
17
|
/**
|
|
13
18
|
* Sends requests with retry/backoff semantics aligned to Gemini CLI:
|
|
@@ -24,39 +29,74 @@ export async function fetchWithRetry(
|
|
|
24
29
|
}
|
|
25
30
|
|
|
26
31
|
const retryInit = cloneRetryableInit(init);
|
|
32
|
+
const throttleKey = buildRetryThrottleKey(input, retryInit);
|
|
33
|
+
await waitForRetryCooldown(throttleKey, retryInit.signal);
|
|
27
34
|
let attempt = 1;
|
|
35
|
+
const url = readRequestUrl(input);
|
|
28
36
|
|
|
29
37
|
while (attempt <= DEFAULT_MAX_ATTEMPTS) {
|
|
30
38
|
let response: Response;
|
|
39
|
+
const stopInFlightLog = startInFlightLog(attempt, url);
|
|
31
40
|
try {
|
|
41
|
+
debugRetry(
|
|
42
|
+
`attempt ${attempt}/${DEFAULT_MAX_ATTEMPTS} -> ${url}`,
|
|
43
|
+
);
|
|
32
44
|
response = await fetch(input, retryInit);
|
|
33
45
|
} catch (error) {
|
|
46
|
+
stopInFlightLog();
|
|
34
47
|
if (attempt >= DEFAULT_MAX_ATTEMPTS || !isRetryableNetworkError(error)) {
|
|
48
|
+
debugRetry(
|
|
49
|
+
`attempt ${attempt} network error is non-retryable or maxed: ${formatErrorSummary(error)}`,
|
|
50
|
+
);
|
|
35
51
|
throw error;
|
|
36
52
|
}
|
|
37
53
|
if (retryInit.signal?.aborted) {
|
|
54
|
+
debugRetry(`attempt ${attempt} aborted before retry`);
|
|
38
55
|
throw error;
|
|
39
56
|
}
|
|
40
57
|
|
|
41
|
-
|
|
58
|
+
const delayMs = getExponentialDelayWithJitter(attempt);
|
|
59
|
+
debugRetry(
|
|
60
|
+
`attempt ${attempt} network retry scheduled in ${delayMs}ms (${formatErrorSummary(error)})`,
|
|
61
|
+
);
|
|
62
|
+
await wait(delayMs);
|
|
42
63
|
attempt += 1;
|
|
43
64
|
continue;
|
|
44
65
|
}
|
|
66
|
+
stopInFlightLog();
|
|
45
67
|
|
|
46
68
|
if (!isRetryableStatus(response.status)) {
|
|
69
|
+
debugRetry(`attempt ${attempt} success or non-retryable status: ${response.status}`);
|
|
47
70
|
return response;
|
|
48
71
|
}
|
|
49
72
|
|
|
50
73
|
const quotaContext = response.status === 429 ? await classifyQuotaResponse(response) : null;
|
|
51
74
|
if (response.status === 429 && quotaContext?.terminal) {
|
|
75
|
+
if (quotaContext.reason === "MODEL_CAPACITY_EXHAUSTED") {
|
|
76
|
+
const cooldownMs = quotaContext.retryDelayMs ?? MODEL_CAPACITY_COOLDOWN_MS;
|
|
77
|
+
setRetryCooldown(throttleKey, cooldownMs);
|
|
78
|
+
debugRetry(`terminal model capacity; cooldown ${cooldownMs}ms before next request`);
|
|
79
|
+
}
|
|
80
|
+
debugRetry(
|
|
81
|
+
`attempt ${attempt} terminal 429 (${quotaContext.reason ?? "unknown"}), returning without retry`,
|
|
82
|
+
);
|
|
52
83
|
return response;
|
|
53
84
|
}
|
|
54
85
|
|
|
55
86
|
if (attempt >= DEFAULT_MAX_ATTEMPTS || retryInit.signal?.aborted) {
|
|
87
|
+
debugRetry(
|
|
88
|
+
`attempt ${attempt} reached retry boundary (status=${response.status})`,
|
|
89
|
+
);
|
|
56
90
|
return response;
|
|
57
91
|
}
|
|
58
92
|
|
|
59
93
|
const delayMs = await resolveRetryDelayMs(response, attempt, quotaContext?.retryDelayMs);
|
|
94
|
+
debugRetry(
|
|
95
|
+
`attempt ${attempt} retrying status=${response.status} reason=${quotaContext?.reason ?? "n/a"} delay=${delayMs}ms`,
|
|
96
|
+
);
|
|
97
|
+
if (delayMs > 0 && response.status === 429) {
|
|
98
|
+
setRetryCooldown(throttleKey, delayMs);
|
|
99
|
+
}
|
|
60
100
|
if (delayMs > 0) {
|
|
61
101
|
await wait(delayMs);
|
|
62
102
|
}
|
|
@@ -76,4 +116,107 @@ function cloneRetryableInit(init: RequestInit | undefined): RequestInit {
|
|
|
76
116
|
};
|
|
77
117
|
}
|
|
78
118
|
|
|
119
|
+
function buildRetryThrottleKey(input: RequestInfo, init: RequestInit): string {
|
|
120
|
+
const url = readRequestUrl(input);
|
|
121
|
+
const body = typeof init.body === "string" ? safeParseBody(init.body) : null;
|
|
122
|
+
const project = readString(body?.project);
|
|
123
|
+
const model = readString(body?.model);
|
|
124
|
+
return `${url}|${project ?? ""}|${model ?? ""}`;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
async function waitForRetryCooldown(key: string, signal?: AbortSignal | null): Promise<void> {
|
|
128
|
+
const until = retryCooldownByKey.get(key);
|
|
129
|
+
if (!until) {
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const remaining = until - Date.now();
|
|
134
|
+
if (remaining <= 0) {
|
|
135
|
+
retryCooldownByKey.delete(key);
|
|
136
|
+
return;
|
|
137
|
+
}
|
|
138
|
+
if (signal?.aborted) {
|
|
139
|
+
debugRetry(`cooldown skipped due to abort (key=${shortKey(key)})`);
|
|
140
|
+
return;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
debugRetry(`cooldown wait ${remaining}ms (key=${shortKey(key)})`);
|
|
144
|
+
await wait(remaining);
|
|
145
|
+
retryCooldownByKey.delete(key);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function setRetryCooldown(key: string, delayMs: number): void {
|
|
149
|
+
const next = Date.now() + delayMs;
|
|
150
|
+
const current = retryCooldownByKey.get(key) ?? 0;
|
|
151
|
+
retryCooldownByKey.set(key, Math.max(current, next));
|
|
152
|
+
debugRetry(`cooldown set ${delayMs}ms (key=${shortKey(key)})`);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function readRequestUrl(input: RequestInfo): string {
|
|
156
|
+
if (typeof input === "string") {
|
|
157
|
+
return input;
|
|
158
|
+
}
|
|
159
|
+
if (input instanceof URL) {
|
|
160
|
+
return input.toString();
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const request = input as Request;
|
|
164
|
+
if (request.url) {
|
|
165
|
+
return request.url;
|
|
166
|
+
}
|
|
167
|
+
return input.toString();
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function safeParseBody(body: string): Record<string, unknown> | null {
|
|
171
|
+
if (!body) {
|
|
172
|
+
return null;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
try {
|
|
176
|
+
const parsed = JSON.parse(body);
|
|
177
|
+
if (parsed && typeof parsed === "object") {
|
|
178
|
+
return parsed as Record<string, unknown>;
|
|
179
|
+
}
|
|
180
|
+
} catch {}
|
|
181
|
+
return null;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
function readString(value: unknown): string | undefined {
|
|
185
|
+
return typeof value === "string" && value.trim() ? value : undefined;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
function debugRetry(message: string): void {
|
|
189
|
+
if (!isGeminiDebugEnabled()) {
|
|
190
|
+
return;
|
|
191
|
+
}
|
|
192
|
+
logGeminiDebugMessage(`Retry: ${message}`);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function formatErrorSummary(error: unknown): string {
|
|
196
|
+
if (error instanceof Error) {
|
|
197
|
+
return error.message;
|
|
198
|
+
}
|
|
199
|
+
return String(error);
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
function shortKey(key: string): string {
|
|
203
|
+
return key.length <= 120 ? key : `${key.slice(0, 120)}...`;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
function startInFlightLog(attempt: number, url: string): () => void {
|
|
207
|
+
if (!isGeminiDebugEnabled()) {
|
|
208
|
+
return () => {};
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const startedAt = Date.now();
|
|
212
|
+
const interval = setInterval(() => {
|
|
213
|
+
const elapsed = Date.now() - startedAt;
|
|
214
|
+
debugRetry(`attempt ${attempt} still waiting for response (${elapsed}ms) -> ${url}`);
|
|
215
|
+
}, RETRY_IN_FLIGHT_LOG_INTERVAL_MS);
|
|
216
|
+
|
|
217
|
+
return () => {
|
|
218
|
+
clearInterval(interval);
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
|
|
79
222
|
export { retryInternals };
|
|
@@ -23,6 +23,7 @@ interface GoogleRpcRetryInfo {
|
|
|
23
23
|
export interface QuotaContext {
|
|
24
24
|
terminal: boolean;
|
|
25
25
|
retryDelayMs?: number;
|
|
26
|
+
reason?: string;
|
|
26
27
|
}
|
|
27
28
|
|
|
28
29
|
const CLOUDCODE_DOMAINS = new Set([
|
|
@@ -65,10 +66,17 @@ export async function classifyQuotaResponse(response: Response): Promise<QuotaCo
|
|
|
65
66
|
return null;
|
|
66
67
|
}
|
|
67
68
|
if (errorInfo?.reason === "QUOTA_EXHAUSTED") {
|
|
68
|
-
return { terminal: true, retryDelayMs };
|
|
69
|
+
return { terminal: true, retryDelayMs, reason: errorInfo.reason };
|
|
69
70
|
}
|
|
70
71
|
if (errorInfo?.reason === "RATE_LIMIT_EXCEEDED") {
|
|
71
|
-
return { terminal: false, retryDelayMs: retryDelayMs ?? 10_000 };
|
|
72
|
+
return { terminal: false, retryDelayMs: retryDelayMs ?? 10_000, reason: errorInfo.reason };
|
|
73
|
+
}
|
|
74
|
+
if (errorInfo?.reason === "MODEL_CAPACITY_EXHAUSTED") {
|
|
75
|
+
return {
|
|
76
|
+
terminal: retryDelayMs === undefined,
|
|
77
|
+
retryDelayMs,
|
|
78
|
+
reason: errorInfo.reason,
|
|
79
|
+
};
|
|
72
80
|
}
|
|
73
81
|
|
|
74
82
|
const quotaFailure = details.find(
|
|
@@ -83,20 +91,20 @@ export async function classifyQuotaResponse(response: Response): Promise<QuotaCo
|
|
|
83
91
|
.toLowerCase();
|
|
84
92
|
|
|
85
93
|
if (allTexts.includes("perday") || allTexts.includes("daily") || allTexts.includes("per day")) {
|
|
86
|
-
return { terminal: true, retryDelayMs };
|
|
94
|
+
return { terminal: true, retryDelayMs, reason: errorInfo?.reason };
|
|
87
95
|
}
|
|
88
96
|
if (allTexts.includes("perminute") || allTexts.includes("per minute")) {
|
|
89
|
-
return { terminal: false, retryDelayMs: retryDelayMs ?? 60_000 };
|
|
97
|
+
return { terminal: false, retryDelayMs: retryDelayMs ?? 60_000, reason: errorInfo?.reason };
|
|
90
98
|
}
|
|
91
|
-
return { terminal: false, retryDelayMs };
|
|
99
|
+
return { terminal: false, retryDelayMs, reason: errorInfo?.reason };
|
|
92
100
|
}
|
|
93
101
|
|
|
94
102
|
const quotaLimit = errorInfo?.metadata?.quota_limit?.toLowerCase() ?? "";
|
|
95
103
|
if (quotaLimit.includes("perminute") || quotaLimit.includes("per minute")) {
|
|
96
|
-
return { terminal: false, retryDelayMs: retryDelayMs ?? 60_000 };
|
|
104
|
+
return { terminal: false, retryDelayMs: retryDelayMs ?? 60_000, reason: errorInfo?.reason };
|
|
97
105
|
}
|
|
98
106
|
|
|
99
|
-
return { terminal: false, retryDelayMs };
|
|
107
|
+
return { terminal: false, retryDelayMs, reason: errorInfo?.reason };
|
|
100
108
|
}
|
|
101
109
|
|
|
102
110
|
/**
|
|
@@ -191,12 +199,15 @@ async function parseErrorBody(
|
|
|
191
199
|
return null;
|
|
192
200
|
}
|
|
193
201
|
|
|
194
|
-
|
|
202
|
+
const normalized = normalizeErrorEnvelope(parsed);
|
|
203
|
+
if (!normalized || !isObject(normalized.error)) {
|
|
195
204
|
return null;
|
|
196
205
|
}
|
|
206
|
+
|
|
207
|
+
const error = normalized.error as Record<string, unknown>;
|
|
197
208
|
return {
|
|
198
|
-
message: typeof
|
|
199
|
-
details: Array.isArray(
|
|
209
|
+
message: typeof error.message === "string" ? error.message : undefined,
|
|
210
|
+
details: Array.isArray(error.details) ? error.details : undefined,
|
|
200
211
|
};
|
|
201
212
|
}
|
|
202
213
|
|
|
@@ -204,6 +215,14 @@ function isObject(value: unknown): value is Record<string, any> {
|
|
|
204
215
|
return !!value && typeof value === "object";
|
|
205
216
|
}
|
|
206
217
|
|
|
218
|
+
function normalizeErrorEnvelope(parsed: unknown): Record<string, unknown> | null {
|
|
219
|
+
if (Array.isArray(parsed)) {
|
|
220
|
+
const first = parsed[0];
|
|
221
|
+
return isObject(first) ? first : null;
|
|
222
|
+
}
|
|
223
|
+
return isObject(parsed) ? parsed : null;
|
|
224
|
+
}
|
|
225
|
+
|
|
207
226
|
export const retryInternals = {
|
|
208
227
|
parseRetryDelayValue,
|
|
209
228
|
parseRetryDelayFromMessage,
|
package/src/plugin/retry.test.ts
CHANGED
|
@@ -3,8 +3,13 @@ import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test";
|
|
|
3
3
|
import { fetchWithRetry, retryInternals } from "./retry";
|
|
4
4
|
|
|
5
5
|
const originalSetTimeout = globalThis.setTimeout;
|
|
6
|
+
const scheduledDelays: number[] = [];
|
|
6
7
|
|
|
7
|
-
function makeQuota429(
|
|
8
|
+
function makeQuota429(
|
|
9
|
+
reason: "RATE_LIMIT_EXCEEDED" | "QUOTA_EXHAUSTED" | "MODEL_CAPACITY_EXHAUSTED",
|
|
10
|
+
retryDelay?: string,
|
|
11
|
+
wrappedAsArray = false,
|
|
12
|
+
): Response {
|
|
8
13
|
const details: Record<string, unknown>[] = [
|
|
9
14
|
{
|
|
10
15
|
"@type": "type.googleapis.com/google.rpc.ErrorInfo",
|
|
@@ -18,13 +23,49 @@ function makeQuota429(reason: "RATE_LIMIT_EXCEEDED" | "QUOTA_EXHAUSTED", retryDe
|
|
|
18
23
|
retryDelay,
|
|
19
24
|
});
|
|
20
25
|
}
|
|
26
|
+
const payload = {
|
|
27
|
+
error: {
|
|
28
|
+
message: "rate limited",
|
|
29
|
+
details,
|
|
30
|
+
},
|
|
31
|
+
};
|
|
32
|
+
return new Response(
|
|
33
|
+
JSON.stringify(
|
|
34
|
+
wrappedAsArray
|
|
35
|
+
? [payload]
|
|
36
|
+
: payload,
|
|
37
|
+
),
|
|
38
|
+
{
|
|
39
|
+
status: 429,
|
|
40
|
+
headers: { "content-type": "application/json" },
|
|
41
|
+
},
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function makeQuota429WithMessage(
|
|
46
|
+
reason: "RATE_LIMIT_EXCEEDED" | "QUOTA_EXHAUSTED" | "MODEL_CAPACITY_EXHAUSTED",
|
|
47
|
+
message: string,
|
|
48
|
+
wrappedAsArray = false,
|
|
49
|
+
): Response {
|
|
50
|
+
const details: Record<string, unknown>[] = [
|
|
51
|
+
{
|
|
52
|
+
"@type": "type.googleapis.com/google.rpc.ErrorInfo",
|
|
53
|
+
reason,
|
|
54
|
+
domain: "cloudcode-pa.googleapis.com",
|
|
55
|
+
},
|
|
56
|
+
];
|
|
57
|
+
const payload = {
|
|
58
|
+
error: {
|
|
59
|
+
message,
|
|
60
|
+
details,
|
|
61
|
+
},
|
|
62
|
+
};
|
|
21
63
|
return new Response(
|
|
22
|
-
JSON.stringify(
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
}),
|
|
64
|
+
JSON.stringify(
|
|
65
|
+
wrappedAsArray
|
|
66
|
+
? [payload]
|
|
67
|
+
: payload,
|
|
68
|
+
),
|
|
28
69
|
{
|
|
29
70
|
status: 429,
|
|
30
71
|
headers: { "content-type": "application/json" },
|
|
@@ -35,7 +76,12 @@ function makeQuota429(reason: "RATE_LIMIT_EXCEEDED" | "QUOTA_EXHAUSTED", retryDe
|
|
|
35
76
|
describe("fetchWithRetry", () => {
|
|
36
77
|
beforeEach(() => {
|
|
37
78
|
mock.restore();
|
|
38
|
-
|
|
79
|
+
scheduledDelays.length = 0;
|
|
80
|
+
(globalThis as { setTimeout: typeof setTimeout }).setTimeout = ((
|
|
81
|
+
fn: (...args: any[]) => void,
|
|
82
|
+
delay?: number | undefined,
|
|
83
|
+
) => {
|
|
84
|
+
scheduledDelays.push(typeof delay === "number" ? delay : 0);
|
|
39
85
|
fn();
|
|
40
86
|
return 0 as unknown as ReturnType<typeof setTimeout>;
|
|
41
87
|
}) as typeof setTimeout;
|
|
@@ -96,6 +142,84 @@ describe("fetchWithRetry", () => {
|
|
|
96
142
|
expect(fetchMock.mock.calls.length).toBe(1);
|
|
97
143
|
});
|
|
98
144
|
|
|
145
|
+
it("fails fast on model capacity exhaustion when no retry hint is provided", async () => {
|
|
146
|
+
const fetchMock = mock(async () => makeQuota429("MODEL_CAPACITY_EXHAUSTED"));
|
|
147
|
+
(globalThis as { fetch: typeof fetch }).fetch = fetchMock as unknown as typeof fetch;
|
|
148
|
+
|
|
149
|
+
const response = await fetchWithRetry("https://example.com", {
|
|
150
|
+
method: "POST",
|
|
151
|
+
body: JSON.stringify({ hello: "world" }),
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
expect(response.status).toBe(429);
|
|
155
|
+
expect(fetchMock.mock.calls.length).toBe(1);
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
it("fails fast on array-wrapped model capacity exhaustion payload", async () => {
|
|
159
|
+
const fetchMock = mock(async () =>
|
|
160
|
+
makeQuota429WithMessage(
|
|
161
|
+
"MODEL_CAPACITY_EXHAUSTED",
|
|
162
|
+
"No capacity available for model gemini-3-flash-preview on the server",
|
|
163
|
+
true,
|
|
164
|
+
),
|
|
165
|
+
);
|
|
166
|
+
(globalThis as { fetch: typeof fetch }).fetch = fetchMock as unknown as typeof fetch;
|
|
167
|
+
|
|
168
|
+
const response = await fetchWithRetry("https://example.com", {
|
|
169
|
+
method: "POST",
|
|
170
|
+
body: JSON.stringify({ hello: "world" }),
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
expect(response.status).toBe(429);
|
|
174
|
+
expect(fetchMock.mock.calls.length).toBe(1);
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
it("applies cooldown after terminal model capacity exhaustion", async () => {
|
|
178
|
+
const fetchMock = mock(async () => {
|
|
179
|
+
if (fetchMock.mock.calls.length === 1) {
|
|
180
|
+
return makeQuota429WithMessage(
|
|
181
|
+
"MODEL_CAPACITY_EXHAUSTED",
|
|
182
|
+
"No capacity available for model gemini-3-flash-preview on the server",
|
|
183
|
+
true,
|
|
184
|
+
);
|
|
185
|
+
}
|
|
186
|
+
return new Response("ok", { status: 200 });
|
|
187
|
+
});
|
|
188
|
+
(globalThis as { fetch: typeof fetch }).fetch = fetchMock as unknown as typeof fetch;
|
|
189
|
+
|
|
190
|
+
const firstResponse = await fetchWithRetry("https://example.com", {
|
|
191
|
+
method: "POST",
|
|
192
|
+
body: JSON.stringify({ project: "project-1", model: "gemini-3-flash-preview" }),
|
|
193
|
+
});
|
|
194
|
+
const secondResponse = await fetchWithRetry("https://example.com", {
|
|
195
|
+
method: "POST",
|
|
196
|
+
body: JSON.stringify({ project: "project-1", model: "gemini-3-flash-preview" }),
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
expect(firstResponse.status).toBe(429);
|
|
200
|
+
expect(secondResponse.status).toBe(200);
|
|
201
|
+
expect(fetchMock.mock.calls.length).toBe(2);
|
|
202
|
+
expect(scheduledDelays).toContain(8000);
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
it("retries model capacity exhaustion when server provides RetryInfo", async () => {
|
|
206
|
+
const fetchMock = mock(async () => {
|
|
207
|
+
if (fetchMock.mock.calls.length === 1) {
|
|
208
|
+
return makeQuota429("MODEL_CAPACITY_EXHAUSTED", "500ms");
|
|
209
|
+
}
|
|
210
|
+
return new Response("ok", { status: 200 });
|
|
211
|
+
});
|
|
212
|
+
(globalThis as { fetch: typeof fetch }).fetch = fetchMock as unknown as typeof fetch;
|
|
213
|
+
|
|
214
|
+
const response = await fetchWithRetry("https://example.com", {
|
|
215
|
+
method: "POST",
|
|
216
|
+
body: JSON.stringify({ hello: "world" }),
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
expect(response.status).toBe(200);
|
|
220
|
+
expect(fetchMock.mock.calls.length).toBe(2);
|
|
221
|
+
});
|
|
222
|
+
|
|
99
223
|
it("retries immediately when server returns Retry-After: 0", async () => {
|
|
100
224
|
const fetchMock = mock(async () => {
|
|
101
225
|
if (fetchMock.mock.calls.length === 1) {
|
|
@@ -116,6 +240,36 @@ describe("fetchWithRetry", () => {
|
|
|
116
240
|
expect(response.status).toBe(200);
|
|
117
241
|
expect(fetchMock.mock.calls.length).toBe(2);
|
|
118
242
|
});
|
|
243
|
+
|
|
244
|
+
it("applies cooldown across requests to avoid repeated initial 429s", async () => {
|
|
245
|
+
const fetchMock = mock(async () => {
|
|
246
|
+
const callNumber = fetchMock.mock.calls.length;
|
|
247
|
+
if (callNumber === 1) {
|
|
248
|
+
return makeQuota429("RATE_LIMIT_EXCEEDED", "1500ms");
|
|
249
|
+
}
|
|
250
|
+
if (callNumber === 3 && scheduledDelays.length < 2) {
|
|
251
|
+
return makeQuota429("RATE_LIMIT_EXCEEDED", "1500ms");
|
|
252
|
+
}
|
|
253
|
+
return new Response("ok", { status: 200 });
|
|
254
|
+
});
|
|
255
|
+
(globalThis as { fetch: typeof fetch }).fetch = fetchMock as unknown as typeof fetch;
|
|
256
|
+
|
|
257
|
+
const firstResponse = await fetchWithRetry("https://example.com", {
|
|
258
|
+
method: "POST",
|
|
259
|
+
body: JSON.stringify({ project: "project-1", model: "gemini-2.5-flash" }),
|
|
260
|
+
});
|
|
261
|
+
const secondResponse = await fetchWithRetry("https://example.com", {
|
|
262
|
+
method: "POST",
|
|
263
|
+
body: JSON.stringify({ project: "project-1", model: "gemini-2.5-flash" }),
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
expect(firstResponse.status).toBe(200);
|
|
267
|
+
expect(secondResponse.status).toBe(200);
|
|
268
|
+
expect(fetchMock.mock.calls.length).toBe(3);
|
|
269
|
+
expect(scheduledDelays.length).toBe(2);
|
|
270
|
+
expect(scheduledDelays[0]).toBe(1500);
|
|
271
|
+
expect(scheduledDelays[1]).toBe(1500);
|
|
272
|
+
});
|
|
119
273
|
});
|
|
120
274
|
|
|
121
275
|
describe("retryInternals", () => {
|
package/src/plugin/types.ts
CHANGED
|
@@ -52,6 +52,16 @@ export interface PluginClient {
|
|
|
52
52
|
auth: {
|
|
53
53
|
set(input: { path: { id: string }; body: OAuthAuthDetails }): Promise<void>;
|
|
54
54
|
};
|
|
55
|
+
tui?: {
|
|
56
|
+
showToast(input: {
|
|
57
|
+
body: {
|
|
58
|
+
title?: string;
|
|
59
|
+
message: string;
|
|
60
|
+
variant: "info" | "success" | "warning" | "error";
|
|
61
|
+
duration?: number;
|
|
62
|
+
};
|
|
63
|
+
}): Promise<unknown>;
|
|
64
|
+
};
|
|
55
65
|
}
|
|
56
66
|
|
|
57
67
|
export interface PluginContext {
|
package/src/plugin.ts
CHANGED
|
@@ -8,6 +8,7 @@ import {
|
|
|
8
8
|
GEMINI_QUOTA_TOOL_NAME,
|
|
9
9
|
} from "./plugin/quota";
|
|
10
10
|
import { isGeminiDebugEnabled, logGeminiDebugMessage, startGeminiDebugRequest } from "./plugin/debug";
|
|
11
|
+
import { maybeShowGeminiCapacityToast, maybeShowGeminiTestToast } from "./plugin/notify";
|
|
11
12
|
import {
|
|
12
13
|
isGenerativeLanguageRequest,
|
|
13
14
|
prepareGeminiRequest,
|
|
@@ -98,6 +99,7 @@ export const GeminiCLIOAuthPlugin = async (
|
|
|
98
99
|
client,
|
|
99
100
|
configuredProjectId,
|
|
100
101
|
);
|
|
102
|
+
await maybeShowGeminiTestToast(client, projectContext.effectiveProjectId);
|
|
101
103
|
await maybeLogAvailableQuotaModels(
|
|
102
104
|
authRecord.access,
|
|
103
105
|
projectContext.effectiveProjectId,
|
|
@@ -123,6 +125,12 @@ export const GeminiCLIOAuthPlugin = async (
|
|
|
123
125
|
* We intentionally do not auto-downgrade model tiers to avoid misleading users.
|
|
124
126
|
*/
|
|
125
127
|
const response = await fetchWithRetry(transformed.request, transformed.init);
|
|
128
|
+
await maybeShowGeminiCapacityToast(
|
|
129
|
+
client,
|
|
130
|
+
response,
|
|
131
|
+
projectContext.effectiveProjectId,
|
|
132
|
+
transformed.requestedModel,
|
|
133
|
+
);
|
|
126
134
|
return transformGeminiResponse(
|
|
127
135
|
response,
|
|
128
136
|
transformed.streaming,
|