opencode-gemini-auth 1.4.3 → 1.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "opencode-gemini-auth",
3
3
  "module": "index.ts",
4
- "version": "1.4.3",
4
+ "version": "1.4.5",
5
5
  "author": "jenslys",
6
6
  "repository": "https://github.com/jenslys/opencode-gemini-auth",
7
7
  "files": [
@@ -11,7 +11,9 @@
11
11
  "license": "MIT",
12
12
  "type": "module",
13
13
  "scripts": {
14
- "update:gemini-cli": "git -C .local/gemini-cli pull --ff-only"
14
+ "update:gemini-cli": "git -C .local/gemini-cli pull --ff-only",
15
+ "update:gemini-cli-version": "node commands/sync-gemini-cli-version.mjs",
16
+ "update:gemini-cli-sync": "npm run update:gemini-cli && npm run update:gemini-cli-version"
15
17
  },
16
18
  "devDependencies": {
17
19
  "@types/bun": "latest"
package/src/constants.ts CHANGED
@@ -28,7 +28,6 @@ export const GEMINI_REDIRECT_URI = "http://localhost:8085/oauth2callback";
28
28
  export const GEMINI_CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com";
29
29
 
30
30
  export const CODE_ASSIST_HEADERS = {
31
- "User-Agent": "google-api-nodejs-client/9.15.1",
32
31
  "X-Goog-Api-Client": "gl-node/22.17.0",
33
32
  "Client-Metadata": "ideType=IDE_UNSPECIFIED,platform=PLATFORM_UNSPECIFIED,pluginType=GEMINI",
34
33
  } as const;
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Synced from `.local/gemini-cli/packages/cli/package.json`.
3
+ * Update with: `npm run update:gemini-cli-version`
4
+ */
5
+ export const GEMINI_CLI_VERSION = "0.30.0-nightly.20260210.a2174751d";
@@ -0,0 +1,144 @@
1
+ import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test";
2
+
3
+ import { maybeShowGeminiCapacityToast, notifyInternals } from "./notify";
4
+ import type { PluginClient } from "./types";
5
+
6
+ function makeQuota429(reason: string): Response {
7
+ return new Response(
8
+ JSON.stringify([
9
+ {
10
+ error: {
11
+ message: "rate limited",
12
+ details: [
13
+ {
14
+ "@type": "type.googleapis.com/google.rpc.ErrorInfo",
15
+ reason,
16
+ domain: "cloudcode-pa.googleapis.com",
17
+ },
18
+ ],
19
+ },
20
+ },
21
+ ]),
22
+ {
23
+ status: 429,
24
+ headers: { "content-type": "application/json" },
25
+ },
26
+ );
27
+ }
28
+
29
+ describe("maybeShowGeminiCapacityToast", () => {
30
+ const originalTestToastFlag = process.env.OPENCODE_GEMINI_TEST_TOAST;
31
+
32
+ beforeEach(() => {
33
+ notifyInternals.resetCooldowns();
34
+ delete process.env.OPENCODE_GEMINI_TEST_TOAST;
35
+ });
36
+
37
+ afterEach(() => {
38
+ mock.restore();
39
+ if (originalTestToastFlag === undefined) {
40
+ delete process.env.OPENCODE_GEMINI_TEST_TOAST;
41
+ } else {
42
+ process.env.OPENCODE_GEMINI_TEST_TOAST = originalTestToastFlag;
43
+ }
44
+ });
45
+
46
+ it("shows toast for MODEL_CAPACITY_EXHAUSTED", async () => {
47
+ const showToast = mock(async (_input: unknown) => true);
48
+ const client = { auth: { set: async () => {} }, tui: { showToast } } as PluginClient;
49
+ const response = makeQuota429("MODEL_CAPACITY_EXHAUSTED");
50
+
51
+ await maybeShowGeminiCapacityToast(client, response, "project-1", "gemini-3-flash-preview");
52
+
53
+ expect(showToast.mock.calls.length).toBe(1);
54
+ const firstCall = showToast.mock.calls.at(0);
55
+ expect(firstCall?.[0]).toEqual({
56
+ body: {
57
+ title: "Gemini Capacity Unavailable",
58
+ message:
59
+ "Google reports temporary server capacity limits for gemini-3-flash-preview. Please retry in a few seconds.",
60
+ variant: "warning",
61
+ duration: 7000,
62
+ },
63
+ });
64
+ });
65
+
66
+ it("does not show toast for non-capacity 429 reasons", async () => {
67
+ const showToast = mock(async (_input: unknown) => true);
68
+ const client = { auth: { set: async () => {} }, tui: { showToast } } as PluginClient;
69
+ const response = makeQuota429("RATE_LIMIT_EXCEEDED");
70
+
71
+ await maybeShowGeminiCapacityToast(client, response, "project-1", "gemini-3-flash-preview");
72
+
73
+ expect(showToast.mock.calls.length).toBe(0);
74
+ });
75
+
76
+ it("dedupes toasts within cooldown window", async () => {
77
+ const showToast = mock(async (_input: unknown) => true);
78
+ const client = { auth: { set: async () => {} }, tui: { showToast } } as PluginClient;
79
+
80
+ await maybeShowGeminiCapacityToast(
81
+ client,
82
+ makeQuota429("MODEL_CAPACITY_EXHAUSTED"),
83
+ "project-1",
84
+ "gemini-3-flash-preview",
85
+ );
86
+ await maybeShowGeminiCapacityToast(
87
+ client,
88
+ makeQuota429("MODEL_CAPACITY_EXHAUSTED"),
89
+ "project-1",
90
+ "gemini-3-flash-preview",
91
+ );
92
+
93
+ expect(showToast.mock.calls.length).toBe(1);
94
+ });
95
+ });
96
+
97
+ describe("maybeShowGeminiTestToast", () => {
98
+ const originalTestToastFlag = process.env.OPENCODE_GEMINI_TEST_TOAST;
99
+
100
+ beforeEach(() => {
101
+ notifyInternals.resetCooldowns();
102
+ delete process.env.OPENCODE_GEMINI_TEST_TOAST;
103
+ });
104
+
105
+ afterEach(() => {
106
+ mock.restore();
107
+ if (originalTestToastFlag === undefined) {
108
+ delete process.env.OPENCODE_GEMINI_TEST_TOAST;
109
+ } else {
110
+ process.env.OPENCODE_GEMINI_TEST_TOAST = originalTestToastFlag;
111
+ }
112
+ });
113
+
114
+ it("does not show test toast when flag is not enabled", async () => {
115
+ const { maybeShowGeminiTestToast } = await import("./notify");
116
+ const showToast = mock(async (_input: unknown) => true);
117
+ const client = { auth: { set: async () => {} }, tui: { showToast } } as PluginClient;
118
+
119
+ await maybeShowGeminiTestToast(client, "project-1");
120
+
121
+ expect(showToast.mock.calls.length).toBe(0);
122
+ });
123
+
124
+ it("shows test toast once per project when flag is enabled", async () => {
125
+ process.env.OPENCODE_GEMINI_TEST_TOAST = "1";
126
+ const { maybeShowGeminiTestToast } = await import("./notify");
127
+ const showToast = mock(async (_input: unknown) => true);
128
+ const client = { auth: { set: async () => {} }, tui: { showToast } } as PluginClient;
129
+
130
+ await maybeShowGeminiTestToast(client, "project-1");
131
+ await maybeShowGeminiTestToast(client, "project-1");
132
+
133
+ expect(showToast.mock.calls.length).toBe(1);
134
+ const firstCall = showToast.mock.calls.at(0);
135
+ expect(firstCall?.[0]).toEqual({
136
+ body: {
137
+ title: "Gemini Toast Test",
138
+ message: "Temporary test toast from opencode-gemini-auth.",
139
+ variant: "info",
140
+ duration: 5000,
141
+ },
142
+ });
143
+ });
144
+ });
@@ -0,0 +1,89 @@
1
+ import { classifyQuotaResponse } from "./retry/quota";
2
+ import { isGeminiDebugEnabled, logGeminiDebugMessage } from "./debug";
3
+ import type { PluginClient } from "./types";
4
+
5
+ const MODEL_CAPACITY_TOAST_COOLDOWN_MS = 30_000;
6
+ const modelCapacityToastCooldownByKey = new Map<string, number>();
7
+ const TEST_TOAST_FLAG = "OPENCODE_GEMINI_TEST_TOAST";
8
+ const testToastShownByProject = new Set<string>();
9
+
10
+ /**
11
+ * Emits a user-facing toast for server-side Gemini model capacity exhaustion.
12
+ *
13
+ * We deliberately notify only `MODEL_CAPACITY_EXHAUSTED` (not generic 429s)
14
+ * so we do not mislabel account-level quota limits as backend incidents.
15
+ */
16
+ export async function maybeShowGeminiCapacityToast(
17
+ client: PluginClient,
18
+ response: Response,
19
+ projectId: string,
20
+ requestedModel?: string,
21
+ ): Promise<void> {
22
+ if (response.status !== 429 || !client.tui?.showToast) {
23
+ return;
24
+ }
25
+
26
+ const quotaContext = await classifyQuotaResponse(response);
27
+ if (quotaContext?.reason !== "MODEL_CAPACITY_EXHAUSTED") {
28
+ return;
29
+ }
30
+
31
+ const model = requestedModel ?? "the selected model";
32
+ const toastKey = `${projectId}|${model}|MODEL_CAPACITY_EXHAUSTED`;
33
+ const now = Date.now();
34
+ const cooldownUntil = modelCapacityToastCooldownByKey.get(toastKey) ?? 0;
35
+ if (cooldownUntil > now) {
36
+ return;
37
+ }
38
+ modelCapacityToastCooldownByKey.set(toastKey, now + MODEL_CAPACITY_TOAST_COOLDOWN_MS);
39
+
40
+ await client.tui.showToast({
41
+ body: {
42
+ title: "Gemini Capacity Unavailable",
43
+ message: `Google reports temporary server capacity limits for ${model}. Please retry in a few seconds.`,
44
+ variant: "warning",
45
+ duration: 7000,
46
+ },
47
+ });
48
+ if (isGeminiDebugEnabled()) {
49
+ logGeminiDebugMessage(`Toast: emitted capacity warning for model=${model} project=${projectId}`);
50
+ }
51
+ }
52
+
53
+ /**
54
+ * Temporary smoke-test toast, enabled only with OPENCODE_GEMINI_TEST_TOAST=1.
55
+ * Emits once per project per process lifetime to avoid toast spam.
56
+ */
57
+ export async function maybeShowGeminiTestToast(
58
+ client: PluginClient,
59
+ projectId: string,
60
+ ): Promise<void> {
61
+ if (process.env[TEST_TOAST_FLAG]?.trim() !== "1" || !client.tui?.showToast) {
62
+ return;
63
+ }
64
+
65
+ const key = projectId || "global";
66
+ if (testToastShownByProject.has(key)) {
67
+ return;
68
+ }
69
+ testToastShownByProject.add(key);
70
+
71
+ await client.tui.showToast({
72
+ body: {
73
+ title: "Gemini Toast Test",
74
+ message: "Temporary test toast from opencode-gemini-auth.",
75
+ variant: "info",
76
+ duration: 5000,
77
+ },
78
+ });
79
+ if (isGeminiDebugEnabled()) {
80
+ logGeminiDebugMessage(`Toast: emitted test toast (project=${key})`);
81
+ }
82
+ }
83
+
84
+ export const notifyInternals = {
85
+ resetCooldowns() {
86
+ modelCapacityToastCooldownByKey.clear();
87
+ testToastShownByProject.clear();
88
+ },
89
+ };
@@ -1,5 +1,6 @@
1
1
  import { CODE_ASSIST_HEADERS, GEMINI_CODE_ASSIST_ENDPOINT } from "../../constants";
2
2
  import { logGeminiDebugResponse, startGeminiDebugRequest } from "../debug";
3
+ import { buildGeminiCliUserAgent } from "../user-agent";
3
4
  import {
4
5
  FREE_TIER_ID,
5
6
  type LoadCodeAssistPayload,
@@ -27,6 +28,7 @@ export async function loadManagedProject(
27
28
  const headers = {
28
29
  "Content-Type": "application/json",
29
30
  Authorization: `Bearer ${accessToken}`,
31
+ "User-Agent": buildGeminiCliUserAgent(),
30
32
  ...CODE_ASSIST_HEADERS,
31
33
  };
32
34
  const debugContext = startGeminiDebugRequest({
@@ -92,6 +94,7 @@ export async function onboardManagedProject(
92
94
  const headers = {
93
95
  "Content-Type": "application/json",
94
96
  Authorization: `Bearer ${accessToken}`,
97
+ "User-Agent": buildGeminiCliUserAgent(),
95
98
  ...CODE_ASSIST_HEADERS,
96
99
  };
97
100
 
@@ -143,6 +146,7 @@ export async function retrieveUserQuota(
143
146
  const headers = {
144
147
  "Content-Type": "application/json",
145
148
  Authorization: `Bearer ${accessToken}`,
149
+ "User-Agent": buildGeminiCliUserAgent(),
146
150
  ...CODE_ASSIST_HEADERS,
147
151
  };
148
152
 
@@ -2,6 +2,7 @@ import { randomUUID } from "node:crypto";
2
2
 
3
3
  import { CODE_ASSIST_HEADERS, GEMINI_CODE_ASSIST_ENDPOINT } from "../../constants";
4
4
  import { normalizeThinkingConfig } from "../request-helpers";
5
+ import { buildGeminiCliUserAgent } from "../user-agent";
5
6
  import { normalizeRequestPayloadIdentifiers, normalizeWrappedIdentifiers } from "./identifiers";
6
7
  import { addThoughtSignaturesToFunctionCalls, transformOpenAIToolCalls } from "./openai";
7
8
  import { isGenerativeLanguageRequest, toRequestUrlString } from "./shared";
@@ -38,6 +39,7 @@ export function prepareGeminiRequest(
38
39
 
39
40
  headers.set("Authorization", `Bearer ${accessToken}`);
40
41
  headers.delete("x-api-key");
42
+ headers.delete("x-goog-api-key");
41
43
 
42
44
  const match = toRequestUrlString(input).match(/\/models\/([^:]+):(\w+)/);
43
45
  if (!match) {
@@ -70,7 +72,7 @@ export function prepareGeminiRequest(
70
72
  headers.set("Accept", "text/event-stream");
71
73
  }
72
74
 
73
- headers.set("User-Agent", CODE_ASSIST_HEADERS["User-Agent"]);
75
+ headers.set("User-Agent", buildGeminiCliUserAgent(effectiveModel));
74
76
  headers.set("X-Goog-Api-Client", CODE_ASSIST_HEADERS["X-Goog-Api-Client"]);
75
77
  headers.set("Client-Metadata", CODE_ASSIST_HEADERS["Client-Metadata"]);
76
78
  /**
@@ -116,6 +118,7 @@ function transformRequestBody(
116
118
  normalizeThinking(requestPayload);
117
119
  normalizeSystemInstruction(requestPayload);
118
120
  normalizeCachedContent(requestPayload);
121
+ stripThoughtPartsFromHistory(requestPayload);
119
122
 
120
123
  if ("model" in requestPayload) {
121
124
  delete requestPayload.model;
@@ -188,3 +191,44 @@ function normalizeCachedContent(requestPayload: Record<string, unknown>): void {
188
191
  delete requestPayload.extra_body;
189
192
  }
190
193
  }
194
+
195
+ function stripThoughtPartsFromHistory(requestPayload: Record<string, unknown>): void {
196
+ const contents = requestPayload.contents;
197
+ if (!Array.isArray(contents)) {
198
+ return;
199
+ }
200
+
201
+ const sanitizedContents: unknown[] = [];
202
+ for (const content of contents) {
203
+ if (!content || typeof content !== "object") {
204
+ sanitizedContents.push(content);
205
+ continue;
206
+ }
207
+
208
+ const record = content as Record<string, unknown>;
209
+ const parts = Array.isArray(record.parts) ? record.parts : undefined;
210
+ if (!parts) {
211
+ sanitizedContents.push(content);
212
+ continue;
213
+ }
214
+
215
+ const filteredParts = parts.filter((part) => {
216
+ if (!part || typeof part !== "object") {
217
+ return true;
218
+ }
219
+ return (part as Record<string, unknown>).thought !== true;
220
+ });
221
+
222
+ // Drop empty model turns produced by interrupted thought streaming.
223
+ if (filteredParts.length === 0 && record.role === "model") {
224
+ continue;
225
+ }
226
+
227
+ sanitizedContents.push({
228
+ ...record,
229
+ parts: filteredParts,
230
+ });
231
+ }
232
+
233
+ requestPayload.contents = sanitizedContents;
234
+ }
@@ -25,6 +25,7 @@ describe("request helpers", () => {
25
25
  headers: {
26
26
  "Content-Type": "application/json",
27
27
  "x-api-key": "should-be-removed",
28
+ "x-goog-api-key": "should-also-be-removed",
28
29
  },
29
30
  body: JSON.stringify({
30
31
  contents: [{ role: "user", parts: [{ text: "hi" }] }],
@@ -43,6 +44,9 @@ describe("request helpers", () => {
43
44
  const headers = new Headers(result.init.headers);
44
45
  expect(headers.get("Authorization")).toBe("Bearer token-123");
45
46
  expect(headers.get("x-api-key")).toBeNull();
47
+ expect(headers.get("x-goog-api-key")).toBeNull();
48
+ expect(headers.get("User-Agent")).toContain("GeminiCLI/");
49
+ expect(headers.get("User-Agent")).toContain("/gemini-3-flash-preview ");
46
50
  expect(headers.get("Accept")).toBe("text/event-stream");
47
51
  expect(headers.get("x-activity-request-id")).toBeTruthy();
48
52
 
@@ -55,6 +59,36 @@ describe("request helpers", () => {
55
59
  expect((parsed.request as Record<string, unknown>).system_instruction).toBeUndefined();
56
60
  });
57
61
 
62
+ it("drops thought-only model parts from replayed history", () => {
63
+ const input =
64
+ "https://generativelanguage.googleapis.com/v1beta/models/gemini-3-flash-preview:streamGenerateContent";
65
+ const init: RequestInit = {
66
+ method: "POST",
67
+ headers: {
68
+ "Content-Type": "application/json",
69
+ },
70
+ body: JSON.stringify({
71
+ contents: [
72
+ { role: "user", parts: [{ text: "give me a joke" }] },
73
+ {
74
+ role: "model",
75
+ parts: [{ text: "internal thought", thought: true }],
76
+ },
77
+ { role: "user", parts: [{ text: "well?" }] },
78
+ ],
79
+ }),
80
+ };
81
+
82
+ const result = prepareGeminiRequest(input, init, "token-123", "project-456");
83
+ const parsed = JSON.parse(result.init.body as string) as Record<string, unknown>;
84
+ const request = parsed.request as Record<string, unknown>;
85
+ const contents = request.contents as Array<Record<string, unknown>>;
86
+
87
+ expect(contents.length).toBe(2);
88
+ expect(contents[0]?.role).toBe("user");
89
+ expect(contents[1]?.role).toBe("user");
90
+ });
91
+
58
92
  it("maps traceId to responseId for JSON responses", async () => {
59
93
  const response = new Response(
60
94
  JSON.stringify({
@@ -8,6 +8,11 @@ import {
8
8
  wait,
9
9
  } from "./helpers";
10
10
  import { classifyQuotaResponse, retryInternals } from "./quota";
11
+ import { isGeminiDebugEnabled, logGeminiDebugMessage } from "../debug";
12
+
13
+ const retryCooldownByKey = new Map<string, number>();
14
+ const RETRY_IN_FLIGHT_LOG_INTERVAL_MS = 5000;
15
+ const MODEL_CAPACITY_COOLDOWN_MS = 8000;
11
16
 
12
17
  /**
13
18
  * Sends requests with retry/backoff semantics aligned to Gemini CLI:
@@ -24,39 +29,74 @@ export async function fetchWithRetry(
24
29
  }
25
30
 
26
31
  const retryInit = cloneRetryableInit(init);
32
+ const throttleKey = buildRetryThrottleKey(input, retryInit);
33
+ await waitForRetryCooldown(throttleKey, retryInit.signal);
27
34
  let attempt = 1;
35
+ const url = readRequestUrl(input);
28
36
 
29
37
  while (attempt <= DEFAULT_MAX_ATTEMPTS) {
30
38
  let response: Response;
39
+ const stopInFlightLog = startInFlightLog(attempt, url);
31
40
  try {
41
+ debugRetry(
42
+ `attempt ${attempt}/${DEFAULT_MAX_ATTEMPTS} -> ${url}`,
43
+ );
32
44
  response = await fetch(input, retryInit);
33
45
  } catch (error) {
46
+ stopInFlightLog();
34
47
  if (attempt >= DEFAULT_MAX_ATTEMPTS || !isRetryableNetworkError(error)) {
48
+ debugRetry(
49
+ `attempt ${attempt} network error is non-retryable or maxed: ${formatErrorSummary(error)}`,
50
+ );
35
51
  throw error;
36
52
  }
37
53
  if (retryInit.signal?.aborted) {
54
+ debugRetry(`attempt ${attempt} aborted before retry`);
38
55
  throw error;
39
56
  }
40
57
 
41
- await wait(getExponentialDelayWithJitter(attempt));
58
+ const delayMs = getExponentialDelayWithJitter(attempt);
59
+ debugRetry(
60
+ `attempt ${attempt} network retry scheduled in ${delayMs}ms (${formatErrorSummary(error)})`,
61
+ );
62
+ await wait(delayMs);
42
63
  attempt += 1;
43
64
  continue;
44
65
  }
66
+ stopInFlightLog();
45
67
 
46
68
  if (!isRetryableStatus(response.status)) {
69
+ debugRetry(`attempt ${attempt} success or non-retryable status: ${response.status}`);
47
70
  return response;
48
71
  }
49
72
 
50
73
  const quotaContext = response.status === 429 ? await classifyQuotaResponse(response) : null;
51
74
  if (response.status === 429 && quotaContext?.terminal) {
75
+ if (quotaContext.reason === "MODEL_CAPACITY_EXHAUSTED") {
76
+ const cooldownMs = quotaContext.retryDelayMs ?? MODEL_CAPACITY_COOLDOWN_MS;
77
+ setRetryCooldown(throttleKey, cooldownMs);
78
+ debugRetry(`terminal model capacity; cooldown ${cooldownMs}ms before next request`);
79
+ }
80
+ debugRetry(
81
+ `attempt ${attempt} terminal 429 (${quotaContext.reason ?? "unknown"}), returning without retry`,
82
+ );
52
83
  return response;
53
84
  }
54
85
 
55
86
  if (attempt >= DEFAULT_MAX_ATTEMPTS || retryInit.signal?.aborted) {
87
+ debugRetry(
88
+ `attempt ${attempt} reached retry boundary (status=${response.status})`,
89
+ );
56
90
  return response;
57
91
  }
58
92
 
59
93
  const delayMs = await resolveRetryDelayMs(response, attempt, quotaContext?.retryDelayMs);
94
+ debugRetry(
95
+ `attempt ${attempt} retrying status=${response.status} reason=${quotaContext?.reason ?? "n/a"} delay=${delayMs}ms`,
96
+ );
97
+ if (delayMs > 0 && response.status === 429) {
98
+ setRetryCooldown(throttleKey, delayMs);
99
+ }
60
100
  if (delayMs > 0) {
61
101
  await wait(delayMs);
62
102
  }
@@ -76,4 +116,107 @@ function cloneRetryableInit(init: RequestInit | undefined): RequestInit {
76
116
  };
77
117
  }
78
118
 
119
+ function buildRetryThrottleKey(input: RequestInfo, init: RequestInit): string {
120
+ const url = readRequestUrl(input);
121
+ const body = typeof init.body === "string" ? safeParseBody(init.body) : null;
122
+ const project = readString(body?.project);
123
+ const model = readString(body?.model);
124
+ return `${url}|${project ?? ""}|${model ?? ""}`;
125
+ }
126
+
127
+ async function waitForRetryCooldown(key: string, signal?: AbortSignal | null): Promise<void> {
128
+ const until = retryCooldownByKey.get(key);
129
+ if (!until) {
130
+ return;
131
+ }
132
+
133
+ const remaining = until - Date.now();
134
+ if (remaining <= 0) {
135
+ retryCooldownByKey.delete(key);
136
+ return;
137
+ }
138
+ if (signal?.aborted) {
139
+ debugRetry(`cooldown skipped due to abort (key=${shortKey(key)})`);
140
+ return;
141
+ }
142
+
143
+ debugRetry(`cooldown wait ${remaining}ms (key=${shortKey(key)})`);
144
+ await wait(remaining);
145
+ retryCooldownByKey.delete(key);
146
+ }
147
+
148
+ function setRetryCooldown(key: string, delayMs: number): void {
149
+ const next = Date.now() + delayMs;
150
+ const current = retryCooldownByKey.get(key) ?? 0;
151
+ retryCooldownByKey.set(key, Math.max(current, next));
152
+ debugRetry(`cooldown set ${delayMs}ms (key=${shortKey(key)})`);
153
+ }
154
+
155
+ function readRequestUrl(input: RequestInfo): string {
156
+ if (typeof input === "string") {
157
+ return input;
158
+ }
159
+ if (input instanceof URL) {
160
+ return input.toString();
161
+ }
162
+
163
+ const request = input as Request;
164
+ if (request.url) {
165
+ return request.url;
166
+ }
167
+ return input.toString();
168
+ }
169
+
170
+ function safeParseBody(body: string): Record<string, unknown> | null {
171
+ if (!body) {
172
+ return null;
173
+ }
174
+
175
+ try {
176
+ const parsed = JSON.parse(body);
177
+ if (parsed && typeof parsed === "object") {
178
+ return parsed as Record<string, unknown>;
179
+ }
180
+ } catch {}
181
+ return null;
182
+ }
183
+
184
+ function readString(value: unknown): string | undefined {
185
+ return typeof value === "string" && value.trim() ? value : undefined;
186
+ }
187
+
188
+ function debugRetry(message: string): void {
189
+ if (!isGeminiDebugEnabled()) {
190
+ return;
191
+ }
192
+ logGeminiDebugMessage(`Retry: ${message}`);
193
+ }
194
+
195
+ function formatErrorSummary(error: unknown): string {
196
+ if (error instanceof Error) {
197
+ return error.message;
198
+ }
199
+ return String(error);
200
+ }
201
+
202
+ function shortKey(key: string): string {
203
+ return key.length <= 120 ? key : `${key.slice(0, 120)}...`;
204
+ }
205
+
206
+ function startInFlightLog(attempt: number, url: string): () => void {
207
+ if (!isGeminiDebugEnabled()) {
208
+ return () => {};
209
+ }
210
+
211
+ const startedAt = Date.now();
212
+ const interval = setInterval(() => {
213
+ const elapsed = Date.now() - startedAt;
214
+ debugRetry(`attempt ${attempt} still waiting for response (${elapsed}ms) -> ${url}`);
215
+ }, RETRY_IN_FLIGHT_LOG_INTERVAL_MS);
216
+
217
+ return () => {
218
+ clearInterval(interval);
219
+ };
220
+ }
221
+
79
222
  export { retryInternals };
@@ -23,6 +23,7 @@ interface GoogleRpcRetryInfo {
23
23
  export interface QuotaContext {
24
24
  terminal: boolean;
25
25
  retryDelayMs?: number;
26
+ reason?: string;
26
27
  }
27
28
 
28
29
  const CLOUDCODE_DOMAINS = new Set([
@@ -65,10 +66,17 @@ export async function classifyQuotaResponse(response: Response): Promise<QuotaCo
65
66
  return null;
66
67
  }
67
68
  if (errorInfo?.reason === "QUOTA_EXHAUSTED") {
68
- return { terminal: true, retryDelayMs };
69
+ return { terminal: true, retryDelayMs, reason: errorInfo.reason };
69
70
  }
70
71
  if (errorInfo?.reason === "RATE_LIMIT_EXCEEDED") {
71
- return { terminal: false, retryDelayMs: retryDelayMs ?? 10_000 };
72
+ return { terminal: false, retryDelayMs: retryDelayMs ?? 10_000, reason: errorInfo.reason };
73
+ }
74
+ if (errorInfo?.reason === "MODEL_CAPACITY_EXHAUSTED") {
75
+ return {
76
+ terminal: retryDelayMs === undefined,
77
+ retryDelayMs,
78
+ reason: errorInfo.reason,
79
+ };
72
80
  }
73
81
 
74
82
  const quotaFailure = details.find(
@@ -83,20 +91,20 @@ export async function classifyQuotaResponse(response: Response): Promise<QuotaCo
83
91
  .toLowerCase();
84
92
 
85
93
  if (allTexts.includes("perday") || allTexts.includes("daily") || allTexts.includes("per day")) {
86
- return { terminal: true, retryDelayMs };
94
+ return { terminal: true, retryDelayMs, reason: errorInfo?.reason };
87
95
  }
88
96
  if (allTexts.includes("perminute") || allTexts.includes("per minute")) {
89
- return { terminal: false, retryDelayMs: retryDelayMs ?? 60_000 };
97
+ return { terminal: false, retryDelayMs: retryDelayMs ?? 60_000, reason: errorInfo?.reason };
90
98
  }
91
- return { terminal: false, retryDelayMs };
99
+ return { terminal: false, retryDelayMs, reason: errorInfo?.reason };
92
100
  }
93
101
 
94
102
  const quotaLimit = errorInfo?.metadata?.quota_limit?.toLowerCase() ?? "";
95
103
  if (quotaLimit.includes("perminute") || quotaLimit.includes("per minute")) {
96
- return { terminal: false, retryDelayMs: retryDelayMs ?? 60_000 };
104
+ return { terminal: false, retryDelayMs: retryDelayMs ?? 60_000, reason: errorInfo?.reason };
97
105
  }
98
106
 
99
- return { terminal: false, retryDelayMs };
107
+ return { terminal: false, retryDelayMs, reason: errorInfo?.reason };
100
108
  }
101
109
 
102
110
  /**
@@ -191,12 +199,15 @@ async function parseErrorBody(
191
199
  return null;
192
200
  }
193
201
 
194
- if (!isObject(parsed) || !isObject(parsed.error)) {
202
+ const normalized = normalizeErrorEnvelope(parsed);
203
+ if (!normalized || !isObject(normalized.error)) {
195
204
  return null;
196
205
  }
206
+
207
+ const error = normalized.error as Record<string, unknown>;
197
208
  return {
198
- message: typeof parsed.error.message === "string" ? parsed.error.message : undefined,
199
- details: Array.isArray(parsed.error.details) ? parsed.error.details : undefined,
209
+ message: typeof error.message === "string" ? error.message : undefined,
210
+ details: Array.isArray(error.details) ? error.details : undefined,
200
211
  };
201
212
  }
202
213
 
@@ -204,6 +215,14 @@ function isObject(value: unknown): value is Record<string, any> {
204
215
  return !!value && typeof value === "object";
205
216
  }
206
217
 
218
+ function normalizeErrorEnvelope(parsed: unknown): Record<string, unknown> | null {
219
+ if (Array.isArray(parsed)) {
220
+ const first = parsed[0];
221
+ return isObject(first) ? first : null;
222
+ }
223
+ return isObject(parsed) ? parsed : null;
224
+ }
225
+
207
226
  export const retryInternals = {
208
227
  parseRetryDelayValue,
209
228
  parseRetryDelayFromMessage,
@@ -3,8 +3,13 @@ import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test";
3
3
  import { fetchWithRetry, retryInternals } from "./retry";
4
4
 
5
5
  const originalSetTimeout = globalThis.setTimeout;
6
+ const scheduledDelays: number[] = [];
6
7
 
7
- function makeQuota429(reason: "RATE_LIMIT_EXCEEDED" | "QUOTA_EXHAUSTED", retryDelay?: string): Response {
8
+ function makeQuota429(
9
+ reason: "RATE_LIMIT_EXCEEDED" | "QUOTA_EXHAUSTED" | "MODEL_CAPACITY_EXHAUSTED",
10
+ retryDelay?: string,
11
+ wrappedAsArray = false,
12
+ ): Response {
8
13
  const details: Record<string, unknown>[] = [
9
14
  {
10
15
  "@type": "type.googleapis.com/google.rpc.ErrorInfo",
@@ -18,13 +23,49 @@ function makeQuota429(reason: "RATE_LIMIT_EXCEEDED" | "QUOTA_EXHAUSTED", retryDe
18
23
  retryDelay,
19
24
  });
20
25
  }
26
+ const payload = {
27
+ error: {
28
+ message: "rate limited",
29
+ details,
30
+ },
31
+ };
32
+ return new Response(
33
+ JSON.stringify(
34
+ wrappedAsArray
35
+ ? [payload]
36
+ : payload,
37
+ ),
38
+ {
39
+ status: 429,
40
+ headers: { "content-type": "application/json" },
41
+ },
42
+ );
43
+ }
44
+
45
+ function makeQuota429WithMessage(
46
+ reason: "RATE_LIMIT_EXCEEDED" | "QUOTA_EXHAUSTED" | "MODEL_CAPACITY_EXHAUSTED",
47
+ message: string,
48
+ wrappedAsArray = false,
49
+ ): Response {
50
+ const details: Record<string, unknown>[] = [
51
+ {
52
+ "@type": "type.googleapis.com/google.rpc.ErrorInfo",
53
+ reason,
54
+ domain: "cloudcode-pa.googleapis.com",
55
+ },
56
+ ];
57
+ const payload = {
58
+ error: {
59
+ message,
60
+ details,
61
+ },
62
+ };
21
63
  return new Response(
22
- JSON.stringify({
23
- error: {
24
- message: "rate limited",
25
- details,
26
- },
27
- }),
64
+ JSON.stringify(
65
+ wrappedAsArray
66
+ ? [payload]
67
+ : payload,
68
+ ),
28
69
  {
29
70
  status: 429,
30
71
  headers: { "content-type": "application/json" },
@@ -35,7 +76,12 @@ function makeQuota429(reason: "RATE_LIMIT_EXCEEDED" | "QUOTA_EXHAUSTED", retryDe
35
76
  describe("fetchWithRetry", () => {
36
77
  beforeEach(() => {
37
78
  mock.restore();
38
- (globalThis as { setTimeout: typeof setTimeout }).setTimeout = ((fn: (...args: any[]) => void) => {
79
+ scheduledDelays.length = 0;
80
+ (globalThis as { setTimeout: typeof setTimeout }).setTimeout = ((
81
+ fn: (...args: any[]) => void,
82
+ delay?: number | undefined,
83
+ ) => {
84
+ scheduledDelays.push(typeof delay === "number" ? delay : 0);
39
85
  fn();
40
86
  return 0 as unknown as ReturnType<typeof setTimeout>;
41
87
  }) as typeof setTimeout;
@@ -96,6 +142,84 @@ describe("fetchWithRetry", () => {
96
142
  expect(fetchMock.mock.calls.length).toBe(1);
97
143
  });
98
144
 
145
+ it("fails fast on model capacity exhaustion when no retry hint is provided", async () => {
146
+ const fetchMock = mock(async () => makeQuota429("MODEL_CAPACITY_EXHAUSTED"));
147
+ (globalThis as { fetch: typeof fetch }).fetch = fetchMock as unknown as typeof fetch;
148
+
149
+ const response = await fetchWithRetry("https://example.com", {
150
+ method: "POST",
151
+ body: JSON.stringify({ hello: "world" }),
152
+ });
153
+
154
+ expect(response.status).toBe(429);
155
+ expect(fetchMock.mock.calls.length).toBe(1);
156
+ });
157
+
158
+ it("fails fast on array-wrapped model capacity exhaustion payload", async () => {
159
+ const fetchMock = mock(async () =>
160
+ makeQuota429WithMessage(
161
+ "MODEL_CAPACITY_EXHAUSTED",
162
+ "No capacity available for model gemini-3-flash-preview on the server",
163
+ true,
164
+ ),
165
+ );
166
+ (globalThis as { fetch: typeof fetch }).fetch = fetchMock as unknown as typeof fetch;
167
+
168
+ const response = await fetchWithRetry("https://example.com", {
169
+ method: "POST",
170
+ body: JSON.stringify({ hello: "world" }),
171
+ });
172
+
173
+ expect(response.status).toBe(429);
174
+ expect(fetchMock.mock.calls.length).toBe(1);
175
+ });
176
+
177
+ it("applies cooldown after terminal model capacity exhaustion", async () => {
178
+ const fetchMock = mock(async () => {
179
+ if (fetchMock.mock.calls.length === 1) {
180
+ return makeQuota429WithMessage(
181
+ "MODEL_CAPACITY_EXHAUSTED",
182
+ "No capacity available for model gemini-3-flash-preview on the server",
183
+ true,
184
+ );
185
+ }
186
+ return new Response("ok", { status: 200 });
187
+ });
188
+ (globalThis as { fetch: typeof fetch }).fetch = fetchMock as unknown as typeof fetch;
189
+
190
+ const firstResponse = await fetchWithRetry("https://example.com", {
191
+ method: "POST",
192
+ body: JSON.stringify({ project: "project-1", model: "gemini-3-flash-preview" }),
193
+ });
194
+ const secondResponse = await fetchWithRetry("https://example.com", {
195
+ method: "POST",
196
+ body: JSON.stringify({ project: "project-1", model: "gemini-3-flash-preview" }),
197
+ });
198
+
199
+ expect(firstResponse.status).toBe(429);
200
+ expect(secondResponse.status).toBe(200);
201
+ expect(fetchMock.mock.calls.length).toBe(2);
202
+ expect(scheduledDelays).toContain(8000);
203
+ });
204
+
205
+ it("retries model capacity exhaustion when server provides RetryInfo", async () => {
206
+ const fetchMock = mock(async () => {
207
+ if (fetchMock.mock.calls.length === 1) {
208
+ return makeQuota429("MODEL_CAPACITY_EXHAUSTED", "500ms");
209
+ }
210
+ return new Response("ok", { status: 200 });
211
+ });
212
+ (globalThis as { fetch: typeof fetch }).fetch = fetchMock as unknown as typeof fetch;
213
+
214
+ const response = await fetchWithRetry("https://example.com", {
215
+ method: "POST",
216
+ body: JSON.stringify({ hello: "world" }),
217
+ });
218
+
219
+ expect(response.status).toBe(200);
220
+ expect(fetchMock.mock.calls.length).toBe(2);
221
+ });
222
+
99
223
  it("retries immediately when server returns Retry-After: 0", async () => {
100
224
  const fetchMock = mock(async () => {
101
225
  if (fetchMock.mock.calls.length === 1) {
@@ -116,6 +240,36 @@ describe("fetchWithRetry", () => {
116
240
  expect(response.status).toBe(200);
117
241
  expect(fetchMock.mock.calls.length).toBe(2);
118
242
  });
243
+
244
+ it("applies cooldown across requests to avoid repeated initial 429s", async () => {
245
+ const fetchMock = mock(async () => {
246
+ const callNumber = fetchMock.mock.calls.length;
247
+ if (callNumber === 1) {
248
+ return makeQuota429("RATE_LIMIT_EXCEEDED", "1500ms");
249
+ }
250
+ if (callNumber === 3 && scheduledDelays.length < 2) {
251
+ return makeQuota429("RATE_LIMIT_EXCEEDED", "1500ms");
252
+ }
253
+ return new Response("ok", { status: 200 });
254
+ });
255
+ (globalThis as { fetch: typeof fetch }).fetch = fetchMock as unknown as typeof fetch;
256
+
257
+ const firstResponse = await fetchWithRetry("https://example.com", {
258
+ method: "POST",
259
+ body: JSON.stringify({ project: "project-1", model: "gemini-2.5-flash" }),
260
+ });
261
+ const secondResponse = await fetchWithRetry("https://example.com", {
262
+ method: "POST",
263
+ body: JSON.stringify({ project: "project-1", model: "gemini-2.5-flash" }),
264
+ });
265
+
266
+ expect(firstResponse.status).toBe(200);
267
+ expect(secondResponse.status).toBe(200);
268
+ expect(fetchMock.mock.calls.length).toBe(3);
269
+ expect(scheduledDelays.length).toBe(2);
270
+ expect(scheduledDelays[0]).toBe(1500);
271
+ expect(scheduledDelays[1]).toBe(1500);
272
+ });
119
273
  });
120
274
 
121
275
  describe("retryInternals", () => {
@@ -52,6 +52,16 @@ export interface PluginClient {
52
52
  auth: {
53
53
  set(input: { path: { id: string }; body: OAuthAuthDetails }): Promise<void>;
54
54
  };
55
+ tui?: {
56
+ showToast(input: {
57
+ body: {
58
+ title?: string;
59
+ message: string;
60
+ variant: "info" | "success" | "warning" | "error";
61
+ duration?: number;
62
+ };
63
+ }): Promise<unknown>;
64
+ };
55
65
  }
56
66
 
57
67
  export interface PluginContext {
@@ -0,0 +1,50 @@
1
+ import { afterEach, describe, expect, it } from "bun:test";
2
+
3
+ import { GEMINI_CLI_VERSION } from "./gemini-cli-version";
4
+ import { buildGeminiCliUserAgent, getGeminiCliVersion, userAgentInternals } from "./user-agent";
5
+
6
+ const originalNpmPackageVersion = process.env.npm_package_version;
7
+ const originalExplicitVersion = process.env.OPENCODE_GEMINI_CLI_VERSION;
8
+
9
+ describe("user-agent", () => {
10
+ afterEach(() => {
11
+ if (originalNpmPackageVersion === undefined) {
12
+ delete process.env.npm_package_version;
13
+ } else {
14
+ process.env.npm_package_version = originalNpmPackageVersion;
15
+ }
16
+ if (originalExplicitVersion === undefined) {
17
+ delete process.env.OPENCODE_GEMINI_CLI_VERSION;
18
+ } else {
19
+ process.env.OPENCODE_GEMINI_CLI_VERSION = originalExplicitVersion;
20
+ }
21
+ userAgentInternals.resetCache();
22
+ });
23
+
24
+ it("prefers OPENCODE_GEMINI_CLI_VERSION when available", () => {
25
+ process.env.OPENCODE_GEMINI_CLI_VERSION = "8.8.8-explicit";
26
+ process.env.npm_package_version = "9.9.9-test";
27
+ userAgentInternals.resetCache();
28
+
29
+ expect(getGeminiCliVersion()).toBe("8.8.8-explicit");
30
+ expect(buildGeminiCliUserAgent("gemini-3-flash-preview")).toContain("/8.8.8-explicit/");
31
+ });
32
+
33
+ it("prefers synced GEMINI_CLI_VERSION over npm_package_version", () => {
34
+ delete process.env.OPENCODE_GEMINI_CLI_VERSION;
35
+ process.env.npm_package_version = "9.9.9-test";
36
+ userAgentInternals.resetCache();
37
+
38
+ expect(getGeminiCliVersion()).toBe(GEMINI_CLI_VERSION);
39
+ });
40
+
41
+ it("builds a GeminiCLI-style user agent", () => {
42
+ delete process.env.OPENCODE_GEMINI_CLI_VERSION;
43
+ delete process.env.npm_package_version;
44
+ userAgentInternals.resetCache();
45
+
46
+ const userAgent = buildGeminiCliUserAgent("gemini-3-flash-preview");
47
+ expect(userAgent).toContain("GeminiCLI/");
48
+ expect(userAgent).toContain(`/gemini-3-flash-preview `);
49
+ });
50
+ });
@@ -0,0 +1,76 @@
1
+ import { readFileSync } from "node:fs";
2
+ import { dirname, join } from "node:path";
3
+ import { fileURLToPath } from "node:url";
4
+ import { GEMINI_CLI_VERSION } from "./gemini-cli-version";
5
+
6
+ const GEMINI_CLI_UA_NAME = "GeminiCLI";
7
+ const GEMINI_CLI_DEFAULT_MODEL = "gemini-code-assist";
8
+
9
+ let cachedGeminiCliVersion: string | undefined;
10
+
11
+ /**
12
+ * Resolves plugin version for User-Agent:
13
+ * 1) explicit override (`OPENCODE_GEMINI_CLI_VERSION`)
14
+ * 2) synced Gemini CLI version file (`src/plugin/gemini-cli-version.ts`)
15
+ * 3) package-manager runtime env (`npm_package_version`)
16
+ * 4) local package.json next to the plugin sources
17
+ * 5) cwd package.json as final fallback
18
+ */
19
+ export function getGeminiCliVersion(): string {
20
+ if (cachedGeminiCliVersion) {
21
+ return cachedGeminiCliVersion;
22
+ }
23
+
24
+ const explicitVersion = process.env.OPENCODE_GEMINI_CLI_VERSION?.trim();
25
+ if (explicitVersion) {
26
+ cachedGeminiCliVersion = explicitVersion;
27
+ return cachedGeminiCliVersion;
28
+ }
29
+
30
+ if (GEMINI_CLI_VERSION.trim()) {
31
+ cachedGeminiCliVersion = GEMINI_CLI_VERSION.trim();
32
+ return cachedGeminiCliVersion;
33
+ }
34
+
35
+ const envVersion = process.env.npm_package_version?.trim();
36
+ if (envVersion) {
37
+ cachedGeminiCliVersion = envVersion;
38
+ return cachedGeminiCliVersion;
39
+ }
40
+
41
+ const moduleDir = dirname(fileURLToPath(import.meta.url));
42
+ const candidatePaths = [
43
+ join(moduleDir, "../../package.json"),
44
+ join(moduleDir, "../package.json"),
45
+ join(process.cwd(), "package.json"),
46
+ ];
47
+
48
+ for (const packagePath of candidatePaths) {
49
+ try {
50
+ const parsed = JSON.parse(readFileSync(packagePath, "utf8")) as { version?: unknown };
51
+ if (typeof parsed.version === "string" && parsed.version.trim()) {
52
+ cachedGeminiCliVersion = parsed.version.trim();
53
+ return cachedGeminiCliVersion;
54
+ }
55
+ } catch {
56
+ continue;
57
+ }
58
+ }
59
+
60
+ cachedGeminiCliVersion = "0.0.0";
61
+ return cachedGeminiCliVersion;
62
+ }
63
+
64
+ /**
65
+ * Builds a Gemini CLI-style User-Agent string.
66
+ */
67
+ export function buildGeminiCliUserAgent(model?: string): string {
68
+ const modelSegment = model?.trim() || GEMINI_CLI_DEFAULT_MODEL;
69
+ return `${GEMINI_CLI_UA_NAME}/${getGeminiCliVersion()}/${modelSegment} (${process.platform}; ${process.arch})`;
70
+ }
71
+
72
+ export const userAgentInternals = {
73
+ resetCache() {
74
+ cachedGeminiCliVersion = undefined;
75
+ },
76
+ };
package/src/plugin.ts CHANGED
@@ -8,6 +8,7 @@ import {
8
8
  GEMINI_QUOTA_TOOL_NAME,
9
9
  } from "./plugin/quota";
10
10
  import { isGeminiDebugEnabled, logGeminiDebugMessage, startGeminiDebugRequest } from "./plugin/debug";
11
+ import { maybeShowGeminiCapacityToast, maybeShowGeminiTestToast } from "./plugin/notify";
11
12
  import {
12
13
  isGenerativeLanguageRequest,
13
14
  prepareGeminiRequest,
@@ -98,6 +99,7 @@ export const GeminiCLIOAuthPlugin = async (
98
99
  client,
99
100
  configuredProjectId,
100
101
  );
102
+ await maybeShowGeminiTestToast(client, projectContext.effectiveProjectId);
101
103
  await maybeLogAvailableQuotaModels(
102
104
  authRecord.access,
103
105
  projectContext.effectiveProjectId,
@@ -123,6 +125,12 @@ export const GeminiCLIOAuthPlugin = async (
123
125
  * We intentionally do not auto-downgrade model tiers to avoid misleading users.
124
126
  */
125
127
  const response = await fetchWithRetry(transformed.request, transformed.init);
128
+ await maybeShowGeminiCapacityToast(
129
+ client,
130
+ response,
131
+ projectContext.effectiveProjectId,
132
+ transformed.requestedModel,
133
+ );
126
134
  return transformGeminiResponse(
127
135
  response,
128
136
  transformed.streaming,