@oh-my-pi/pi-ai 5.0.1 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@
4
4
  * Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.
5
5
  */
6
6
 
7
+ import { createHash } from "node:crypto";
7
8
  import type { Content, ThinkingConfig } from "@google/genai";
8
9
  import { calculateCost } from "../models";
9
10
  import type {
@@ -18,7 +19,6 @@ import type {
18
19
  ToolCall,
19
20
  } from "../types";
20
21
  import { AssistantMessageEventStream } from "../utils/event-stream";
21
- import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
22
22
  import { sanitizeSurrogates } from "../utils/sanitize-unicode";
23
23
  import {
24
24
  convertMessages,
@@ -55,6 +55,8 @@ export interface GoogleGeminiCliOptions extends StreamOptions {
55
55
  }
56
56
 
57
57
  const DEFAULT_ENDPOINT = "https://cloudcode-pa.googleapis.com";
58
+ const ANTIGRAVITY_DAILY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
59
+ const ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, DEFAULT_ENDPOINT] as const;
58
60
  // Headers for Gemini CLI (prod endpoint)
59
61
  const GEMINI_CLI_HEADERS = {
60
62
  "User-Agent": "google-cloud-sdk vscode_cloudshelleditor/0.1",
@@ -164,16 +166,66 @@ let toolCallCounter = 0;
164
166
  // Retry configuration
165
167
  const MAX_RETRIES = 3;
166
168
  const BASE_DELAY_MS = 1000;
169
+ const MAX_EMPTY_STREAM_RETRIES = 2;
170
+ const EMPTY_STREAM_BASE_DELAY_MS = 500;
171
+ const CLAUDE_THINKING_BETA_HEADER = "interleaved-thinking-2025-05-14";
167
172
 
168
173
  /**
169
174
  * Extract retry delay from Gemini error response (in milliseconds).
170
- * Parses patterns like:
175
+ * Checks headers first (Retry-After, x-ratelimit-reset, x-ratelimit-reset-after),
176
+ * then parses body patterns like:
171
177
  * - "Your quota will reset after 39s"
172
178
  * - "Your quota will reset after 18h31m10s"
173
179
  * - "Please retry in Xs" or "Please retry in Xms"
174
180
  * - "retryDelay": "34.074824224s" (JSON field)
175
181
  */
176
- function extractRetryDelay(errorText: string): number | undefined {
182
+ export function extractRetryDelay(errorText: string, response?: Response | Headers): number | undefined {
183
+ const normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined);
184
+
185
+ const headers = response instanceof Headers ? response : response?.headers;
186
+ if (headers) {
187
+ const retryAfter = headers.get("retry-after");
188
+ if (retryAfter) {
189
+ const retryAfterSeconds = Number(retryAfter);
190
+ if (Number.isFinite(retryAfterSeconds)) {
191
+ const delay = normalizeDelay(retryAfterSeconds * 1000);
192
+ if (delay !== undefined) {
193
+ return delay;
194
+ }
195
+ }
196
+ const retryAfterDate = new Date(retryAfter);
197
+ const retryAfterMs = retryAfterDate.getTime();
198
+ if (!Number.isNaN(retryAfterMs)) {
199
+ const delay = normalizeDelay(retryAfterMs - Date.now());
200
+ if (delay !== undefined) {
201
+ return delay;
202
+ }
203
+ }
204
+ }
205
+
206
+ const rateLimitReset = headers.get("x-ratelimit-reset");
207
+ if (rateLimitReset) {
208
+ const resetSeconds = Number.parseInt(rateLimitReset, 10);
209
+ if (!Number.isNaN(resetSeconds)) {
210
+ const delay = normalizeDelay(resetSeconds * 1000 - Date.now());
211
+ if (delay !== undefined) {
212
+ return delay;
213
+ }
214
+ }
215
+ }
216
+
217
+ const rateLimitResetAfter = headers.get("x-ratelimit-reset-after");
218
+ if (rateLimitResetAfter) {
219
+ const resetAfterSeconds = Number(rateLimitResetAfter);
220
+ if (Number.isFinite(resetAfterSeconds)) {
221
+ const delay = normalizeDelay(resetAfterSeconds * 1000);
222
+ if (delay !== undefined) {
223
+ return delay;
224
+ }
225
+ }
226
+ }
227
+ }
228
+
177
229
  // Pattern 1: "Your quota will reset after ..." (formats: "18h31m10s", "10m15s", "6s", "39s")
178
230
  const durationMatch = errorText.match(/reset after (?:(\d+)h)?(?:(\d+)m)?(\d+(?:\.\d+)?)s/i);
179
231
  if (durationMatch) {
@@ -182,8 +234,9 @@ function extractRetryDelay(errorText: string): number | undefined {
182
234
  const seconds = parseFloat(durationMatch[3]);
183
235
  if (!Number.isNaN(seconds)) {
184
236
  const totalMs = ((hours * 60 + minutes) * 60 + seconds) * 1000;
185
- if (totalMs > 0) {
186
- return Math.ceil(totalMs + 1000); // Add 1s buffer
237
+ const delay = normalizeDelay(totalMs);
238
+ if (delay !== undefined) {
239
+ return delay;
187
240
  }
188
241
  }
189
242
  }
@@ -194,7 +247,10 @@ function extractRetryDelay(errorText: string): number | undefined {
194
247
  const value = parseFloat(retryInMatch[1]);
195
248
  if (!Number.isNaN(value) && value > 0) {
196
249
  const ms = retryInMatch[2].toLowerCase() === "ms" ? value : value * 1000;
197
- return Math.ceil(ms + 1000);
250
+ const delay = normalizeDelay(ms);
251
+ if (delay !== undefined) {
252
+ return delay;
253
+ }
198
254
  }
199
255
  }
200
256
 
@@ -204,21 +260,45 @@ function extractRetryDelay(errorText: string): number | undefined {
204
260
  const value = parseFloat(retryDelayMatch[1]);
205
261
  if (!Number.isNaN(value) && value > 0) {
206
262
  const ms = retryDelayMatch[2].toLowerCase() === "ms" ? value : value * 1000;
207
- return Math.ceil(ms + 1000);
263
+ const delay = normalizeDelay(ms);
264
+ if (delay !== undefined) {
265
+ return delay;
266
+ }
208
267
  }
209
268
  }
210
269
 
211
270
  return undefined;
212
271
  }
213
272
 
273
+ function isClaudeThinkingModel(modelId: string): boolean {
274
+ const normalized = modelId.toLowerCase();
275
+ return normalized.includes("claude") && normalized.includes("thinking");
276
+ }
277
+
214
278
  /**
215
- * Check if an error is retryable (rate limit, server error, etc.)
279
+ * Check if an error is retryable (rate limit, server error, network error, etc.)
216
280
  */
217
281
  function isRetryableError(status: number, errorText: string): boolean {
218
282
  if (status === 429 || status === 500 || status === 502 || status === 503 || status === 504) {
219
283
  return true;
220
284
  }
221
- return /resource.?exhausted|rate.?limit|overloaded|service.?unavailable/i.test(errorText);
285
+ return /resource.?exhausted|rate.?limit|overloaded|service.?unavailable|other.?side.?closed/i.test(errorText);
286
+ }
287
+
288
+ /**
289
+ * Extract a clean, user-friendly error message from Google API error response.
290
+ * Parses JSON error responses and returns just the message field.
291
+ */
292
+ function extractErrorMessage(errorText: string): string {
293
+ try {
294
+ const parsed = JSON.parse(errorText) as { error?: { message?: string } };
295
+ if (parsed.error?.message) {
296
+ return parsed.error.message;
297
+ }
298
+ } catch {
299
+ // Not JSON, return as-is
300
+ }
301
+ return errorText;
222
302
  }
223
303
 
224
304
  /**
@@ -243,6 +323,7 @@ interface CloudCodeAssistRequest {
243
323
  model: string;
244
324
  request: {
245
325
  contents: Content[];
326
+ sessionId?: string;
246
327
  systemInstruction?: { role?: string; parts: { text: string }[] };
247
328
  generationConfig?: {
248
329
  maxOutputTokens?: number;
@@ -340,17 +421,26 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
340
421
  throw new Error("Missing token or projectId in Google Cloud credentials. Use /login to re-authenticate.");
341
422
  }
342
423
 
343
- const endpoint = model.baseUrl || DEFAULT_ENDPOINT;
344
- const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
424
+ const isAntigravity = model.provider === "google-antigravity";
425
+ const baseUrl = model.baseUrl?.trim();
426
+ const endpoints = baseUrl ? [baseUrl] : isAntigravity ? ANTIGRAVITY_ENDPOINT_FALLBACKS : [DEFAULT_ENDPOINT];
345
427
 
346
- // Use Antigravity headers for sandbox endpoint, otherwise Gemini CLI headers
347
- const isAntigravity = endpoint.includes("sandbox.googleapis.com");
348
428
  const requestBody = buildRequest(model, context, projectId, options, isAntigravity);
349
429
  const headers = isAntigravity ? ANTIGRAVITY_HEADERS : GEMINI_CLI_HEADERS;
350
430
 
431
+ const requestHeaders = {
432
+ Authorization: `Bearer ${accessToken}`,
433
+ "Content-Type": "application/json",
434
+ Accept: "text/event-stream",
435
+ ...headers,
436
+ ...(isClaudeThinkingModel(model.id) ? { "anthropic-beta": CLAUDE_THINKING_BETA_HEADER } : {}),
437
+ };
438
+ const requestBodyJson = JSON.stringify(requestBody);
439
+
351
440
  // Fetch with retry logic for rate limits and transient errors
352
441
  let response: Response | undefined;
353
442
  let lastError: Error | undefined;
443
+ let requestUrl: string | undefined;
354
444
 
355
445
  for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
356
446
  if (options?.signal?.aborted) {
@@ -358,15 +448,12 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
358
448
  }
359
449
 
360
450
  try {
361
- response = await fetch(url, {
451
+ const endpoint = endpoints[Math.min(attempt, endpoints.length - 1)];
452
+ requestUrl = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
453
+ response = await fetch(requestUrl, {
362
454
  method: "POST",
363
- headers: {
364
- Authorization: `Bearer ${accessToken}`,
365
- "Content-Type": "application/json",
366
- Accept: "text/event-stream",
367
- ...headers,
368
- },
369
- body: JSON.stringify(requestBody),
455
+ headers: requestHeaders,
456
+ body: requestBodyJson,
370
457
  signal: options?.signal,
371
458
  });
372
459
 
@@ -379,14 +466,14 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
379
466
  // Check if retryable
380
467
  if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
381
468
  // Use server-provided delay or exponential backoff
382
- const serverDelay = extractRetryDelay(errorText);
469
+ const serverDelay = extractRetryDelay(errorText, response);
383
470
  const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
384
471
  await sleep(delayMs, options?.signal);
385
472
  continue;
386
473
  }
387
474
 
388
475
  // Not retryable or max retries exceeded
389
- throw new Error(`Cloud Code Assist API error (${response.status}): ${errorText}`);
476
+ throw new Error(`Cloud Code Assist API error (${response.status}): ${extractErrorMessage(errorText)}`);
390
477
  } catch (error) {
391
478
  // Check for abort - fetch throws AbortError, our code throws "Request was aborted"
392
479
  if (error instanceof Error) {
@@ -394,7 +481,11 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
394
481
  throw new Error("Request was aborted");
395
482
  }
396
483
  }
484
+ // Extract detailed error message from fetch errors (Node includes cause)
397
485
  lastError = error instanceof Error ? error : new Error(String(error));
486
+ if (lastError.message === "fetch failed" && lastError.cause instanceof Error) {
487
+ lastError = new Error(`Network error: ${lastError.cause.message}`);
488
+ }
398
489
  // Network errors are retryable
399
490
  if (attempt < MAX_RETRIES) {
400
491
  const delayMs = BASE_DELAY_MS * 2 ** attempt;
@@ -409,73 +500,160 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
409
500
  throw lastError ?? new Error("Failed to get response after retries");
410
501
  }
411
502
 
412
- if (!response.body) {
413
- throw new Error("No response body");
414
- }
503
+ let started = false;
504
+ const ensureStarted = () => {
505
+ if (!started) {
506
+ stream.push({ type: "start", partial: output });
507
+ started = true;
508
+ }
509
+ };
415
510
 
416
- stream.push({ type: "start", partial: output });
511
+ const resetOutput = () => {
512
+ output.content = [];
513
+ output.usage = {
514
+ input: 0,
515
+ output: 0,
516
+ cacheRead: 0,
517
+ cacheWrite: 0,
518
+ totalTokens: 0,
519
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
520
+ };
521
+ output.stopReason = "stop";
522
+ output.errorMessage = undefined;
523
+ output.timestamp = Date.now();
524
+ started = false;
525
+ };
417
526
 
418
- let currentBlock: TextContent | ThinkingContent | null = null;
419
- const blocks = output.content;
420
- const blockIndex = () => blocks.length - 1;
527
+ const streamResponse = async (activeResponse: Response): Promise<boolean> => {
528
+ if (!activeResponse.body) {
529
+ throw new Error("No response body");
530
+ }
421
531
 
422
- // Read SSE stream
423
- const reader = response.body.getReader();
424
- const decoder = new TextDecoder();
425
- let buffer = "";
532
+ let hasContent = false;
533
+ let currentBlock: TextContent | ThinkingContent | null = null;
534
+ const blocks = output.content;
535
+ const blockIndex = () => blocks.length - 1;
426
536
 
427
- // Set up abort handler to cancel reader when signal fires
428
- const abortHandler = () => {
429
- void reader.cancel().catch(() => {});
430
- };
431
- options?.signal?.addEventListener("abort", abortHandler);
537
+ // Read SSE stream
538
+ const reader = activeResponse.body.getReader();
539
+ const decoder = new TextDecoder();
540
+ let buffer = "";
432
541
 
433
- try {
434
- while (true) {
435
- // Check abort signal before each read
436
- if (options?.signal?.aborted) {
437
- throw new Error("Request was aborted");
438
- }
542
+ // Set up abort handler to cancel reader when signal fires
543
+ const abortHandler = () => {
544
+ void reader.cancel().catch(() => {});
545
+ };
546
+ options?.signal?.addEventListener("abort", abortHandler);
439
547
 
440
- const { done, value } = await reader.read();
441
- if (done) break;
548
+ try {
549
+ while (true) {
550
+ // Check abort signal before each read
551
+ if (options?.signal?.aborted) {
552
+ throw new Error("Request was aborted");
553
+ }
442
554
 
443
- buffer += decoder.decode(value, { stream: true });
444
- const lines = buffer.split("\n");
445
- buffer = lines.pop() || "";
555
+ const { done, value } = await reader.read();
556
+ if (done) break;
446
557
 
447
- for (const line of lines) {
448
- if (!line.startsWith("data:")) continue;
558
+ buffer += decoder.decode(value, { stream: true });
559
+ const lines = buffer.split("\n");
560
+ buffer = lines.pop() || "";
449
561
 
450
- const jsonStr = line.slice(5).trim();
451
- if (!jsonStr) continue;
562
+ for (const line of lines) {
563
+ if (!line.startsWith("data:")) continue;
452
564
 
453
- let chunk: CloudCodeAssistResponseChunk;
454
- try {
455
- chunk = JSON.parse(jsonStr);
456
- } catch {
457
- continue;
458
- }
565
+ const jsonStr = line.slice(5).trim();
566
+ if (!jsonStr) continue;
567
+
568
+ let chunk: CloudCodeAssistResponseChunk;
569
+ try {
570
+ chunk = JSON.parse(jsonStr);
571
+ } catch {
572
+ continue;
573
+ }
459
574
 
460
- // Unwrap the response
461
- const responseData = chunk.response;
462
- if (!responseData) continue;
463
-
464
- const candidate = responseData.candidates?.[0];
465
- if (candidate?.content?.parts) {
466
- for (const part of candidate.content.parts) {
467
- if (part.text !== undefined) {
468
- const isThinking = isThinkingPart(part);
469
- if (
470
- !currentBlock ||
471
- (isThinking && currentBlock.type !== "thinking") ||
472
- (!isThinking && currentBlock.type !== "text")
473
- ) {
575
+ // Unwrap the response
576
+ const responseData = chunk.response;
577
+ if (!responseData) continue;
578
+
579
+ const candidate = responseData.candidates?.[0];
580
+ if (candidate?.content?.parts) {
581
+ for (const part of candidate.content.parts) {
582
+ if (part.text !== undefined) {
583
+ hasContent = true;
584
+ const isThinking = isThinkingPart(part);
585
+ if (
586
+ !currentBlock ||
587
+ (isThinking && currentBlock.type !== "thinking") ||
588
+ (!isThinking && currentBlock.type !== "text")
589
+ ) {
590
+ if (currentBlock) {
591
+ if (currentBlock.type === "text") {
592
+ stream.push({
593
+ type: "text_end",
594
+ contentIndex: blocks.length - 1,
595
+ content: currentBlock.text,
596
+ partial: output,
597
+ });
598
+ } else {
599
+ stream.push({
600
+ type: "thinking_end",
601
+ contentIndex: blockIndex(),
602
+ content: currentBlock.thinking,
603
+ partial: output,
604
+ });
605
+ }
606
+ }
607
+ if (isThinking) {
608
+ currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
609
+ output.content.push(currentBlock);
610
+ ensureStarted();
611
+ stream.push({
612
+ type: "thinking_start",
613
+ contentIndex: blockIndex(),
614
+ partial: output,
615
+ });
616
+ } else {
617
+ currentBlock = { type: "text", text: "" };
618
+ output.content.push(currentBlock);
619
+ ensureStarted();
620
+ stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
621
+ }
622
+ }
623
+ if (currentBlock.type === "thinking") {
624
+ currentBlock.thinking += part.text;
625
+ currentBlock.thinkingSignature = retainThoughtSignature(
626
+ currentBlock.thinkingSignature,
627
+ part.thoughtSignature,
628
+ );
629
+ stream.push({
630
+ type: "thinking_delta",
631
+ contentIndex: blockIndex(),
632
+ delta: part.text,
633
+ partial: output,
634
+ });
635
+ } else {
636
+ currentBlock.text += part.text;
637
+ currentBlock.textSignature = retainThoughtSignature(
638
+ currentBlock.textSignature,
639
+ part.thoughtSignature,
640
+ );
641
+ stream.push({
642
+ type: "text_delta",
643
+ contentIndex: blockIndex(),
644
+ delta: part.text,
645
+ partial: output,
646
+ });
647
+ }
648
+ }
649
+
650
+ if (part.functionCall) {
651
+ hasContent = true;
474
652
  if (currentBlock) {
475
653
  if (currentBlock.type === "text") {
476
654
  stream.push({
477
655
  type: "text_end",
478
- contentIndex: blocks.length - 1,
656
+ contentIndex: blockIndex(),
479
657
  content: currentBlock.text,
480
658
  partial: output,
481
659
  });
@@ -487,145 +665,144 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
487
665
  partial: output,
488
666
  });
489
667
  }
668
+ currentBlock = null;
490
669
  }
491
- if (isThinking) {
492
- currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
493
- output.content.push(currentBlock);
494
- stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
495
- } else {
496
- currentBlock = { type: "text", text: "" };
497
- output.content.push(currentBlock);
498
- stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
499
- }
500
- }
501
- if (currentBlock.type === "thinking") {
502
- currentBlock.thinking += part.text;
503
- currentBlock.thinkingSignature = retainThoughtSignature(
504
- currentBlock.thinkingSignature,
505
- part.thoughtSignature,
506
- );
670
+
671
+ const providedId = part.functionCall.id;
672
+ const needsNewId =
673
+ !providedId ||
674
+ output.content.some((b) => b.type === "toolCall" && b.id === providedId);
675
+ const toolCallId = needsNewId
676
+ ? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
677
+ : providedId;
678
+
679
+ const toolCall: ToolCall = {
680
+ type: "toolCall",
681
+ id: toolCallId,
682
+ name: part.functionCall.name || "",
683
+ arguments: part.functionCall.args as Record<string, unknown>,
684
+ ...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
685
+ };
686
+
687
+ output.content.push(toolCall);
688
+ ensureStarted();
689
+ stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
507
690
  stream.push({
508
- type: "thinking_delta",
691
+ type: "toolcall_delta",
509
692
  contentIndex: blockIndex(),
510
- delta: part.text,
693
+ delta: JSON.stringify(toolCall.arguments),
511
694
  partial: output,
512
695
  });
513
- } else {
514
- currentBlock.text += part.text;
515
- currentBlock.textSignature = retainThoughtSignature(
516
- currentBlock.textSignature,
517
- part.thoughtSignature,
518
- );
519
696
  stream.push({
520
- type: "text_delta",
697
+ type: "toolcall_end",
521
698
  contentIndex: blockIndex(),
522
- delta: part.text,
699
+ toolCall,
523
700
  partial: output,
524
701
  });
525
702
  }
526
703
  }
704
+ }
527
705
 
528
- if (part.functionCall) {
529
- if (currentBlock) {
530
- if (currentBlock.type === "text") {
531
- stream.push({
532
- type: "text_end",
533
- contentIndex: blockIndex(),
534
- content: currentBlock.text,
535
- partial: output,
536
- });
537
- } else {
538
- stream.push({
539
- type: "thinking_end",
540
- contentIndex: blockIndex(),
541
- content: currentBlock.thinking,
542
- partial: output,
543
- });
544
- }
545
- currentBlock = null;
546
- }
547
-
548
- const providedId = part.functionCall.id;
549
- const needsNewId =
550
- !providedId || output.content.some((b) => b.type === "toolCall" && b.id === providedId);
551
- const toolCallId = needsNewId
552
- ? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
553
- : providedId;
554
-
555
- const toolCall: ToolCall = {
556
- type: "toolCall",
557
- id: toolCallId,
558
- name: part.functionCall.name || "",
559
- arguments: part.functionCall.args as Record<string, unknown>,
560
- ...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
561
- };
562
-
563
- output.content.push(toolCall);
564
- stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
565
- stream.push({
566
- type: "toolcall_delta",
567
- contentIndex: blockIndex(),
568
- delta: JSON.stringify(toolCall.arguments),
569
- partial: output,
570
- });
571
- stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
706
+ if (candidate?.finishReason) {
707
+ output.stopReason = mapStopReasonString(candidate.finishReason);
708
+ if (output.content.some((b) => b.type === "toolCall")) {
709
+ output.stopReason = "toolUse";
572
710
  }
573
711
  }
574
- }
575
712
 
576
- if (candidate?.finishReason) {
577
- output.stopReason = mapStopReasonString(candidate.finishReason);
578
- if (output.content.some((b) => b.type === "toolCall")) {
579
- output.stopReason = "toolUse";
713
+ if (responseData.usageMetadata) {
714
+ // promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
715
+ const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
716
+ const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
717
+ output.usage = {
718
+ input: promptTokens - cacheReadTokens,
719
+ output:
720
+ (responseData.usageMetadata.candidatesTokenCount || 0) +
721
+ (responseData.usageMetadata.thoughtsTokenCount || 0),
722
+ cacheRead: cacheReadTokens,
723
+ cacheWrite: 0,
724
+ totalTokens: responseData.usageMetadata.totalTokenCount || 0,
725
+ cost: {
726
+ input: 0,
727
+ output: 0,
728
+ cacheRead: 0,
729
+ cacheWrite: 0,
730
+ total: 0,
731
+ },
732
+ };
733
+ calculateCost(model, output.usage);
580
734
  }
581
735
  }
736
+ }
737
+ } finally {
738
+ options?.signal?.removeEventListener("abort", abortHandler);
739
+ }
582
740
 
583
- if (responseData.usageMetadata) {
584
- // promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
585
- const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
586
- const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
587
- output.usage = {
588
- input: promptTokens - cacheReadTokens,
589
- output:
590
- (responseData.usageMetadata.candidatesTokenCount || 0) +
591
- (responseData.usageMetadata.thoughtsTokenCount || 0),
592
- cacheRead: cacheReadTokens,
593
- cacheWrite: 0,
594
- totalTokens: responseData.usageMetadata.totalTokenCount || 0,
595
- cost: {
596
- input: 0,
597
- output: 0,
598
- cacheRead: 0,
599
- cacheWrite: 0,
600
- total: 0,
601
- },
602
- };
603
- calculateCost(model, output.usage);
604
- }
741
+ if (currentBlock) {
742
+ if (currentBlock.type === "text") {
743
+ stream.push({
744
+ type: "text_end",
745
+ contentIndex: blockIndex(),
746
+ content: currentBlock.text,
747
+ partial: output,
748
+ });
749
+ } else {
750
+ stream.push({
751
+ type: "thinking_end",
752
+ contentIndex: blockIndex(),
753
+ content: currentBlock.thinking,
754
+ partial: output,
755
+ });
605
756
  }
606
757
  }
607
- } finally {
608
- options?.signal?.removeEventListener("abort", abortHandler);
609
- }
610
758
 
611
- if (currentBlock) {
612
- if (currentBlock.type === "text") {
613
- stream.push({
614
- type: "text_end",
615
- contentIndex: blockIndex(),
616
- content: currentBlock.text,
617
- partial: output,
618
- });
619
- } else {
620
- stream.push({
621
- type: "thinking_end",
622
- contentIndex: blockIndex(),
623
- content: currentBlock.thinking,
624
- partial: output,
759
+ return hasContent;
760
+ };
761
+
762
+ let receivedContent = false;
763
+ let currentResponse = response;
764
+
765
+ for (let emptyAttempt = 0; emptyAttempt <= MAX_EMPTY_STREAM_RETRIES; emptyAttempt++) {
766
+ if (options?.signal?.aborted) {
767
+ throw new Error("Request was aborted");
768
+ }
769
+
770
+ if (emptyAttempt > 0) {
771
+ const backoffMs = EMPTY_STREAM_BASE_DELAY_MS * 2 ** (emptyAttempt - 1);
772
+ await sleep(backoffMs, options?.signal);
773
+
774
+ if (!requestUrl) {
775
+ throw new Error("Missing request URL");
776
+ }
777
+
778
+ currentResponse = await fetch(requestUrl, {
779
+ method: "POST",
780
+ headers: requestHeaders,
781
+ body: requestBodyJson,
782
+ signal: options?.signal,
625
783
  });
784
+
785
+ if (!currentResponse.ok) {
786
+ const retryErrorText = await currentResponse.text();
787
+ throw new Error(`Cloud Code Assist API error (${currentResponse.status}): ${retryErrorText}`);
788
+ }
789
+ }
790
+
791
+ const streamed = await streamResponse(currentResponse);
792
+ if (streamed) {
793
+ receivedContent = true;
794
+ break;
795
+ }
796
+
797
+ if (emptyAttempt < MAX_EMPTY_STREAM_RETRIES) {
798
+ resetOutput();
626
799
  }
627
800
  }
628
801
 
802
+ if (!receivedContent) {
803
+ throw new Error("Cloud Code Assist API returned an empty response");
804
+ }
805
+
629
806
  if (options?.signal?.aborted) {
630
807
  throw new Error("Request was aborted");
631
808
  }
@@ -643,7 +820,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
643
820
  }
644
821
  }
645
822
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
646
- output.errorMessage = formatErrorMessageWithRetryAfter(error);
823
+ output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
647
824
  stream.push({ type: "error", reason: output.stopReason, error: output });
648
825
  stream.end();
649
826
  }
@@ -652,7 +829,34 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
652
829
  return stream;
653
830
  };
654
831
 
655
- function buildRequest(
832
+ function deriveSessionId(context: Context): string | undefined {
833
+ for (const message of context.messages) {
834
+ if (message.role !== "user") {
835
+ continue;
836
+ }
837
+
838
+ let text = "";
839
+ if (typeof message.content === "string") {
840
+ text = message.content;
841
+ } else if (Array.isArray(message.content)) {
842
+ text = message.content
843
+ .filter((item): item is TextContent => item.type === "text")
844
+ .map((item) => item.text)
845
+ .join("\n");
846
+ }
847
+
848
+ if (!text || text.trim().length === 0) {
849
+ return undefined;
850
+ }
851
+
852
+ const hash = createHash("sha256").update(text).digest("hex");
853
+ return hash.slice(0, 32);
854
+ }
855
+
856
+ return undefined;
857
+ }
858
+
859
+ export function buildRequest(
656
860
  model: Model<"google-gemini-cli">,
657
861
  context: Context,
658
862
  projectId: string,
@@ -687,6 +891,11 @@ function buildRequest(
687
891
  contents,
688
892
  };
689
893
 
894
+ const sessionId = deriveSessionId(context);
895
+ if (sessionId) {
896
+ request.sessionId = sessionId;
897
+ }
898
+
690
899
  // System instruction must be object with parts, not plain string
691
900
  if (context.systemPrompt) {
692
901
  request.systemInstruction = {