npm - @mariozechner/pi-ai - Versions diffs - 0.43.0 → 0.45.0 - Mend

@mariozechner/pi-ai 0.43.0 → 0.45.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/README.md +87 -0
package/dist/models.generated.d.ts +922 -17
package/dist/models.generated.d.ts.map +1 -1
package/dist/models.generated.js +932 -27
package/dist/models.generated.js.map +1 -1
package/dist/providers/amazon-bedrock.d.ts +14 -0
package/dist/providers/amazon-bedrock.d.ts.map +1 -0
package/dist/providers/amazon-bedrock.js +435 -0
package/dist/providers/amazon-bedrock.js.map +1 -0
package/dist/providers/anthropic.d.ts.map +1 -1
package/dist/providers/anthropic.js +3 -3
package/dist/providers/anthropic.js.map +1 -1
package/dist/providers/google-gemini-cli.d.ts +43 -1
package/dist/providers/google-gemini-cli.d.ts.map +1 -1
package/dist/providers/google-gemini-cli.js +369 -182
package/dist/providers/google-gemini-cli.js.map +1 -1
package/dist/providers/google-shared.d.ts +4 -0
package/dist/providers/google-shared.d.ts.map +1 -1
package/dist/providers/google-shared.js +32 -5
package/dist/providers/google-shared.js.map +1 -1
package/dist/providers/openai-codex-responses.d.ts.map +1 -1
package/dist/providers/openai-codex-responses.js +1 -1
package/dist/providers/openai-codex-responses.js.map +1 -1
package/dist/providers/openai-completions.d.ts.map +1 -1
package/dist/providers/openai-completions.js +30 -1
package/dist/providers/openai-completions.js.map +1 -1
package/dist/providers/openai-responses.d.ts +2 -0
package/dist/providers/openai-responses.d.ts.map +1 -1
package/dist/providers/openai-responses.js +25 -1
package/dist/providers/openai-responses.js.map +1 -1
package/dist/providers/{transorm-messages.d.ts → transform-messages.d.ts} +1 -1
package/dist/providers/transform-messages.d.ts.map +1 -0
package/dist/providers/{transorm-messages.js → transform-messages.js} +1 -1
package/dist/providers/transform-messages.js.map +1 -0
package/dist/stream.d.ts.map +1 -1
package/dist/stream.js +28 -0
package/dist/stream.js.map +1 -1
package/dist/types.d.ts +4 -2
package/dist/types.d.ts.map +1 -1
package/dist/types.js.map +1 -1
package/dist/utils/overflow.d.ts.map +1 -1
package/dist/utils/overflow.js +3 -0
package/dist/utils/overflow.js.map +1 -1
package/package.json +3 -1
package/dist/providers/transorm-messages.d.ts.map +0 -1
package/dist/providers/transorm-messages.js.map +0 -1

package/dist/providers/google-gemini-cli.js CHANGED Viewed

@@ -3,11 +3,14 @@
  * Shared implementation for both google-gemini-cli and google-antigravity providers.
  * Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.
  */
+import { createHash } from "node:crypto";
 import { calculateCost } from "../models.js";
 import { AssistantMessageEventStream } from "../utils/event-stream.js";
 import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
 import { convertMessages, convertTools, isThinkingPart, mapStopReasonString, mapToolChoice, retainThoughtSignature, } from "./google-shared.js";
 const DEFAULT_ENDPOINT = "https://cloudcode-pa.googleapis.com";
+const ANTIGRAVITY_DAILY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
+const ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, DEFAULT_ENDPOINT];
 // Headers for Gemini CLI (prod endpoint)
 const GEMINI_CLI_HEADERS = {
     "User-Agent": "google-cloud-sdk vscode_cloudshelleditor/0.1",
@@ -113,15 +116,61 @@ let toolCallCounter = 0;
 // Retry configuration
 const MAX_RETRIES = 3;
 const BASE_DELAY_MS = 1000;
+const MAX_EMPTY_STREAM_RETRIES = 2;
+const EMPTY_STREAM_BASE_DELAY_MS = 500;
+const CLAUDE_THINKING_BETA_HEADER = "interleaved-thinking-2025-05-14";
 /**
  * Extract retry delay from Gemini error response (in milliseconds).
- * Parses patterns like:
+ * Checks headers first (Retry-After, x-ratelimit-reset, x-ratelimit-reset-after),
+ * then parses body patterns like:
  * - "Your quota will reset after 39s"
  * - "Your quota will reset after 18h31m10s"
  * - "Please retry in Xs" or "Please retry in Xms"
  * - "retryDelay": "34.074824224s" (JSON field)
  */
-function extractRetryDelay(errorText) {
+export function extractRetryDelay(errorText, response) {
+    const normalizeDelay = (ms) => (ms > 0 ? Math.ceil(ms + 1000) : undefined);
+    const headers = response instanceof Headers ? response : response?.headers;
+    if (headers) {
+        const retryAfter = headers.get("retry-after");
+        if (retryAfter) {
+            const retryAfterSeconds = Number(retryAfter);
+            if (Number.isFinite(retryAfterSeconds)) {
+                const delay = normalizeDelay(retryAfterSeconds * 1000);
+                if (delay !== undefined) {
+                    return delay;
+                }
+            }
+            const retryAfterDate = new Date(retryAfter);
+            const retryAfterMs = retryAfterDate.getTime();
+            if (!Number.isNaN(retryAfterMs)) {
+                const delay = normalizeDelay(retryAfterMs - Date.now());
+                if (delay !== undefined) {
+                    return delay;
+                }
+            }
+        }
+        const rateLimitReset = headers.get("x-ratelimit-reset");
+        if (rateLimitReset) {
+            const resetSeconds = Number.parseInt(rateLimitReset, 10);
+            if (!Number.isNaN(resetSeconds)) {
+                const delay = normalizeDelay(resetSeconds * 1000 - Date.now());
+                if (delay !== undefined) {
+                    return delay;
+                }
+            }
+        }
+        const rateLimitResetAfter = headers.get("x-ratelimit-reset-after");
+        if (rateLimitResetAfter) {
+            const resetAfterSeconds = Number(rateLimitResetAfter);
+            if (Number.isFinite(resetAfterSeconds)) {
+                const delay = normalizeDelay(resetAfterSeconds * 1000);
+                if (delay !== undefined) {
+                    return delay;
+                }
+            }
+        }
+    }
     // Pattern 1: "Your quota will reset after ..." (formats: "18h31m10s", "10m15s", "6s", "39s")
     const durationMatch = errorText.match(/reset after (?:(\d+)h)?(?:(\d+)m)?(\d+(?:\.\d+)?)s/i);
     if (durationMatch) {
@@ -130,8 +179,9 @@ function extractRetryDelay(errorText) {
         const seconds = parseFloat(durationMatch[3]);
         if (!Number.isNaN(seconds)) {
             const totalMs = ((hours * 60 + minutes) * 60 + seconds) * 1000;
-            if (totalMs > 0) {
-                return Math.ceil(totalMs + 1000); // Add 1s buffer
+            const delay = normalizeDelay(totalMs);
+            if (delay !== undefined) {
+                return delay;
             }
         }
     }
@@ -141,7 +191,10 @@ function extractRetryDelay(errorText) {
         const value = parseFloat(retryInMatch[1]);
         if (!Number.isNaN(value) && value > 0) {
             const ms = retryInMatch[2].toLowerCase() === "ms" ? value : value * 1000;
-            return Math.ceil(ms + 1000);
+            const delay = normalizeDelay(ms);
+            if (delay !== undefined) {
+                return delay;
+            }
         }
     }
     // Pattern 3: "retryDelay": "34.074824224s" (JSON field in error details)
@@ -150,19 +203,42 @@ function extractRetryDelay(errorText) {
         const value = parseFloat(retryDelayMatch[1]);
         if (!Number.isNaN(value) && value > 0) {
             const ms = retryDelayMatch[2].toLowerCase() === "ms" ? value : value * 1000;
-            return Math.ceil(ms + 1000);
+            const delay = normalizeDelay(ms);
+            if (delay !== undefined) {
+                return delay;
+            }
         }
     }
     return undefined;
 }
+function isClaudeThinkingModel(modelId) {
+    const normalized = modelId.toLowerCase();
+    return normalized.includes("claude") && normalized.includes("thinking");
+}
 /**
- * Check if an error is retryable (rate limit, server error, etc.)
+ * Check if an error is retryable (rate limit, server error, network error, etc.)
  */
 function isRetryableError(status, errorText) {
     if (status === 429 || status === 500 || status === 502 || status === 503 || status === 504) {
         return true;
     }
-    return /resource.?exhausted|rate.?limit|overloaded|service.?unavailable/i.test(errorText);
+    return /resource.?exhausted|rate.?limit|overloaded|service.?unavailable|other.?side.?closed/i.test(errorText);
+}
+/**
+ * Extract a clean, user-friendly error message from Google API error response.
+ * Parses JSON error responses and returns just the message field.
+ */
+function extractErrorMessage(errorText) {
+    try {
+        const parsed = JSON.parse(errorText);
+        if (parsed.error?.message) {
+            return parsed.error.message;
+        }
+    }
+    catch {
+        // Not JSON, return as-is
+    }
+    return errorText;
 }
 /**
  * Sleep for a given number of milliseconds, respecting abort signal.
@@ -219,29 +295,34 @@ export const streamGoogleGeminiCli = (model, context, options) => {
             if (!accessToken || !projectId) {
                 throw new Error("Missing token or projectId in Google Cloud credentials. Use /login to re-authenticate.");
             }
-            const endpoint = model.baseUrl || DEFAULT_ENDPOINT;
-            const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
-            // Use Antigravity headers for sandbox endpoint, otherwise Gemini CLI headers
-            const isAntigravity = endpoint.includes("sandbox.googleapis.com");
+            const isAntigravity = model.provider === "google-antigravity";
+            const baseUrl = model.baseUrl?.trim();
+            const endpoints = baseUrl ? [baseUrl] : isAntigravity ? ANTIGRAVITY_ENDPOINT_FALLBACKS : [DEFAULT_ENDPOINT];
             const requestBody = buildRequest(model, context, projectId, options, isAntigravity);
             const headers = isAntigravity ? ANTIGRAVITY_HEADERS : GEMINI_CLI_HEADERS;
+            const requestHeaders = {
+                Authorization: `Bearer ${accessToken}`,
+                "Content-Type": "application/json",
+                Accept: "text/event-stream",
+                ...headers,
+                ...(isClaudeThinkingModel(model.id) ? { "anthropic-beta": CLAUDE_THINKING_BETA_HEADER } : {}),
+            };
+            const requestBodyJson = JSON.stringify(requestBody);
             // Fetch with retry logic for rate limits and transient errors
             let response;
             let lastError;
+            let requestUrl;
             for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
                 if (options?.signal?.aborted) {
                     throw new Error("Request was aborted");
                 }
                 try {
-                    response = await fetch(url, {
+                    const endpoint = endpoints[Math.min(attempt, endpoints.length - 1)];
+                    requestUrl = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
+                    response = await fetch(requestUrl, {
                         method: "POST",
-                        headers: {
-                            Authorization: `Bearer ${accessToken}`,
-                            "Content-Type": "application/json",
-                            Accept: "text/event-stream",
-                            ...headers,
-                        },
-                        body: JSON.stringify(requestBody),
+                        headers: requestHeaders,
+                        body: requestBodyJson,
                         signal: options?.signal,
                     });
                     if (response.ok) {
@@ -251,13 +332,13 @@ export const streamGoogleGeminiCli = (model, context, options) => {
                     // Check if retryable
                     if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
                         // Use server-provided delay or exponential backoff
-                        const serverDelay = extractRetryDelay(errorText);
+                        const serverDelay = extractRetryDelay(errorText, response);
                         const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
                         await sleep(delayMs, options?.signal);
                         continue;
                     }
                     // Not retryable or max retries exceeded
-                    throw new Error(`Cloud Code Assist API error (${response.status}): ${errorText}`);
+                    throw new Error(`Cloud Code Assist API error (${response.status}): ${extractErrorMessage(errorText)}`);
                 }
                 catch (error) {
                     // Check for abort - fetch throws AbortError, our code throws "Request was aborted"
@@ -266,7 +347,11 @@ export const streamGoogleGeminiCli = (model, context, options) => {
                             throw new Error("Request was aborted");
                         }
                     }
+                    // Extract detailed error message from fetch errors (Node includes cause)
                     lastError = error instanceof Error ? error : new Error(String(error));
+                    if (lastError.message === "fetch failed" && lastError.cause instanceof Error) {
+                        lastError = new Error(`Network error: ${lastError.cause.message}`);
+                    }
                     // Network errors are retryable
                     if (attempt < MAX_RETRIES) {
                         const delayMs = BASE_DELAY_MS * 2 ** attempt;
@@ -279,64 +364,146 @@ export const streamGoogleGeminiCli = (model, context, options) => {
             if (!response || !response.ok) {
                 throw lastError ?? new Error("Failed to get response after retries");
             }
-            if (!response.body) {
-                throw new Error("No response body");
-            }
-            stream.push({ type: "start", partial: output });
-            let currentBlock = null;
-            const blocks = output.content;
-            const blockIndex = () => blocks.length - 1;
-            // Read SSE stream
-            const reader = response.body.getReader();
-            const decoder = new TextDecoder();
-            let buffer = "";
-            // Set up abort handler to cancel reader when signal fires
-            const abortHandler = () => {
-                void reader.cancel().catch(() => { });
+            let started = false;
+            const ensureStarted = () => {
+                if (!started) {
+                    stream.push({ type: "start", partial: output });
+                    started = true;
+                }
             };
-            options?.signal?.addEventListener("abort", abortHandler);
-            try {
-                while (true) {
-                    // Check abort signal before each read
-                    if (options?.signal?.aborted) {
-                        throw new Error("Request was aborted");
-                    }
-                    const { done, value } = await reader.read();
-                    if (done)
-                        break;
-                    buffer += decoder.decode(value, { stream: true });
-                    const lines = buffer.split("\n");
-                    buffer = lines.pop() || "";
-                    for (const line of lines) {
-                        if (!line.startsWith("data:"))
-                            continue;
-                        const jsonStr = line.slice(5).trim();
-                        if (!jsonStr)
-                            continue;
-                        let chunk;
-                        try {
-                            chunk = JSON.parse(jsonStr);
-                        }
-                        catch {
-                            continue;
+            const resetOutput = () => {
+                output.content = [];
+                output.usage = {
+                    input: 0,
+                    output: 0,
+                    cacheRead: 0,
+                    cacheWrite: 0,
+                    totalTokens: 0,
+                    cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+                };
+                output.stopReason = "stop";
+                output.errorMessage = undefined;
+                output.timestamp = Date.now();
+                started = false;
+            };
+            const streamResponse = async (activeResponse) => {
+                if (!activeResponse.body) {
+                    throw new Error("No response body");
+                }
+                let hasContent = false;
+                let currentBlock = null;
+                const blocks = output.content;
+                const blockIndex = () => blocks.length - 1;
+                // Read SSE stream
+                const reader = activeResponse.body.getReader();
+                const decoder = new TextDecoder();
+                let buffer = "";
+                // Set up abort handler to cancel reader when signal fires
+                const abortHandler = () => {
+                    void reader.cancel().catch(() => { });
+                };
+                options?.signal?.addEventListener("abort", abortHandler);
+                try {
+                    while (true) {
+                        // Check abort signal before each read
+                        if (options?.signal?.aborted) {
+                            throw new Error("Request was aborted");
                         }
-                        // Unwrap the response
-                        const responseData = chunk.response;
-                        if (!responseData)
-                            continue;
-                        const candidate = responseData.candidates?.[0];
-                        if (candidate?.content?.parts) {
-                            for (const part of candidate.content.parts) {
-                                if (part.text !== undefined) {
-                                    const isThinking = isThinkingPart(part);
-                                    if (!currentBlock ||
-                                        (isThinking && currentBlock.type !== "thinking") ||
-                                        (!isThinking && currentBlock.type !== "text")) {
+                        const { done, value } = await reader.read();
+                        if (done)
+                            break;
+                        buffer += decoder.decode(value, { stream: true });
+                        const lines = buffer.split("\n");
+                        buffer = lines.pop() || "";
+                        for (const line of lines) {
+                            if (!line.startsWith("data:"))
+                                continue;
+                            const jsonStr = line.slice(5).trim();
+                            if (!jsonStr)
+                                continue;
+                            let chunk;
+                            try {
+                                chunk = JSON.parse(jsonStr);
+                            }
+                            catch {
+                                continue;
+                            }
+                            // Unwrap the response
+                            const responseData = chunk.response;
+                            if (!responseData)
+                                continue;
+                            const candidate = responseData.candidates?.[0];
+                            if (candidate?.content?.parts) {
+                                for (const part of candidate.content.parts) {
+                                    if (part.text !== undefined) {
+                                        hasContent = true;
+                                        const isThinking = isThinkingPart(part);
+                                        if (!currentBlock ||
+                                            (isThinking && currentBlock.type !== "thinking") ||
+                                            (!isThinking && currentBlock.type !== "text")) {
+                                            if (currentBlock) {
+                                                if (currentBlock.type === "text") {
+                                                    stream.push({
+                                                        type: "text_end",
+                                                        contentIndex: blocks.length - 1,
+                                                        content: currentBlock.text,
+                                                        partial: output,
+                                                    });
+                                                }
+                                                else {
+                                                    stream.push({
+                                                        type: "thinking_end",
+                                                        contentIndex: blockIndex(),
+                                                        content: currentBlock.thinking,
+                                                        partial: output,
+                                                    });
+                                                }
+                                            }
+                                            if (isThinking) {
+                                                currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
+                                                output.content.push(currentBlock);
+                                                ensureStarted();
+                                                stream.push({
+                                                    type: "thinking_start",
+                                                    contentIndex: blockIndex(),
+                                                    partial: output,
+                                                });
+                                            }
+                                            else {
+                                                currentBlock = { type: "text", text: "" };
+                                                output.content.push(currentBlock);
+                                                ensureStarted();
+                                                stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
+                                            }
+                                        }
+                                        if (currentBlock.type === "thinking") {
+                                            currentBlock.thinking += part.text;
+                                            currentBlock.thinkingSignature = retainThoughtSignature(currentBlock.thinkingSignature, part.thoughtSignature);
+                                            stream.push({
+                                                type: "thinking_delta",
+                                                contentIndex: blockIndex(),
+                                                delta: part.text,
+                                                partial: output,
+                                            });
+                                        }
+                                        else {
+                                            currentBlock.text += part.text;
+                                            currentBlock.textSignature = retainThoughtSignature(currentBlock.textSignature, part.thoughtSignature);
+                                            stream.push({
+                                                type: "text_delta",
+                                                contentIndex: blockIndex(),
+                                                delta: part.text,
+                                                partial: output,
+                                            });
+                                        }
+                                    }
+                                    if (part.functionCall) {
+                                        hasContent = true;
                                         if (currentBlock) {
                                             if (currentBlock.type === "text") {
                                                 stream.push({
                                                     type: "text_end",
-                                                    contentIndex: blocks.length - 1,
+                                                    contentIndex: blockIndex(),
                                                     content: currentBlock.text,
                                                     partial: output,
                                                 });
@@ -349,134 +516,127 @@ export const streamGoogleGeminiCli = (model, context, options) => {
                                                     partial: output,
                                                 });
                                             }
+                                            currentBlock = null;
                                         }
-                                        if (isThinking) {
-                                            currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
-                                            output.content.push(currentBlock);
-                                            stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
-                                        }
-                                        else {
-                                            currentBlock = { type: "text", text: "" };
-                                            output.content.push(currentBlock);
-                                            stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
-                                        }
-                                    }
-                                    if (currentBlock.type === "thinking") {
-                                        currentBlock.thinking += part.text;
-                                        currentBlock.thinkingSignature = retainThoughtSignature(currentBlock.thinkingSignature, part.thoughtSignature);
+                                        const providedId = part.functionCall.id;
+                                        const needsNewId = !providedId ||
+                                            output.content.some((b) => b.type === "toolCall" && b.id === providedId);
+                                        const toolCallId = needsNewId
+                                            ? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
+                                            : providedId;
+                                        const toolCall = {
+                                            type: "toolCall",
+                                            id: toolCallId,
+                                            name: part.functionCall.name || "",
+                                            arguments: part.functionCall.args,
+                                            ...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
+                                        };
+                                        output.content.push(toolCall);
+                                        ensureStarted();
+                                        stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
                                         stream.push({
-                                            type: "thinking_delta",
+                                            type: "toolcall_delta",
                                             contentIndex: blockIndex(),
-                                            delta: part.text,
+                                            delta: JSON.stringify(toolCall.arguments),
                                             partial: output,
                                         });
-                                    }
-                                    else {
-                                        currentBlock.text += part.text;
-                                        currentBlock.textSignature = retainThoughtSignature(currentBlock.textSignature, part.thoughtSignature);
                                         stream.push({
-                                            type: "text_delta",
+                                            type: "toolcall_end",
                                             contentIndex: blockIndex(),
-                                            delta: part.text,
+                                            toolCall,
                                             partial: output,
                                         });
                                     }
                                 }
-                                if (part.functionCall) {
-                                    if (currentBlock) {
-                                        if (currentBlock.type === "text") {
-                                            stream.push({
-                                                type: "text_end",
-                                                contentIndex: blockIndex(),
-                                                content: currentBlock.text,
-                                                partial: output,
-                                            });
-                                        }
-                                        else {
-                                            stream.push({
-                                                type: "thinking_end",
-                                                contentIndex: blockIndex(),
-                                                content: currentBlock.thinking,
-                                                partial: output,
-                                            });
-                                        }
-                                        currentBlock = null;
-                                    }
-                                    const providedId = part.functionCall.id;
-                                    const needsNewId = !providedId || output.content.some((b) => b.type === "toolCall" && b.id === providedId);
-                                    const toolCallId = needsNewId
-                                        ? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
-                                        : providedId;
-                                    const toolCall = {
-                                        type: "toolCall",
-                                        id: toolCallId,
-                                        name: part.functionCall.name || "",
-                                        arguments: part.functionCall.args,
-                                        ...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
-                                    };
-                                    output.content.push(toolCall);
-                                    stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
-                                    stream.push({
-                                        type: "toolcall_delta",
-                                        contentIndex: blockIndex(),
-                                        delta: JSON.stringify(toolCall.arguments),
-                                        partial: output,
-                                    });
-                                    stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
-                                }
                             }
-                        }
-                        if (candidate?.finishReason) {
-                            output.stopReason = mapStopReasonString(candidate.finishReason);
-                            if (output.content.some((b) => b.type === "toolCall")) {
-                                output.stopReason = "toolUse";
+                            if (candidate?.finishReason) {
+                                output.stopReason = mapStopReasonString(candidate.finishReason);
+                                if (output.content.some((b) => b.type === "toolCall")) {
+                                    output.stopReason = "toolUse";
+                                }
                             }
-                        }
-                        if (responseData.usageMetadata) {
-                            // promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
-                            const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
-                            const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
-                            output.usage = {
-                                input: promptTokens - cacheReadTokens,
-                                output: (responseData.usageMetadata.candidatesTokenCount || 0) +
-                                    (responseData.usageMetadata.thoughtsTokenCount || 0),
-                                cacheRead: cacheReadTokens,
-                                cacheWrite: 0,
-                                totalTokens: responseData.usageMetadata.totalTokenCount || 0,
-                                cost: {
-                                    input: 0,
-                                    output: 0,
-                                    cacheRead: 0,
+                            if (responseData.usageMetadata) {
+                                // promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
+                                const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
+                                const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
+                                output.usage = {
+                                    input: promptTokens - cacheReadTokens,
+                                    output: (responseData.usageMetadata.candidatesTokenCount || 0) +
+                                        (responseData.usageMetadata.thoughtsTokenCount || 0),
+                                    cacheRead: cacheReadTokens,
                                     cacheWrite: 0,
-                                    total: 0,
-                                },
-                            };
-                            calculateCost(model, output.usage);
+                                    totalTokens: responseData.usageMetadata.totalTokenCount || 0,
+                                    cost: {
+                                        input: 0,
+                                        output: 0,
+                                        cacheRead: 0,
+                                        cacheWrite: 0,
+                                        total: 0,
+                                    },
+                                };
+                                calculateCost(model, output.usage);
+                            }
                         }
                     }
                 }
-            }
-            finally {
-                options?.signal?.removeEventListener("abort", abortHandler);
-            }
-            if (currentBlock) {
-                if (currentBlock.type === "text") {
-                    stream.push({
-                        type: "text_end",
-                        contentIndex: blockIndex(),
-                        content: currentBlock.text,
-                        partial: output,
-                    });
+                finally {
+                    options?.signal?.removeEventListener("abort", abortHandler);
                 }
-                else {
-                    stream.push({
-                        type: "thinking_end",
-                        contentIndex: blockIndex(),
-                        content: currentBlock.thinking,
-                        partial: output,
+                if (currentBlock) {
+                    if (currentBlock.type === "text") {
+                        stream.push({
+                            type: "text_end",
+                            contentIndex: blockIndex(),
+                            content: currentBlock.text,
+                            partial: output,
+                        });
+                    }
+                    else {
+                        stream.push({
+                            type: "thinking_end",
+                            contentIndex: blockIndex(),
+                            content: currentBlock.thinking,
+                            partial: output,
+                        });
+                    }
+                }
+                return hasContent;
+            };
+            let receivedContent = false;
+            let currentResponse = response;
+            for (let emptyAttempt = 0; emptyAttempt <= MAX_EMPTY_STREAM_RETRIES; emptyAttempt++) {
+                if (options?.signal?.aborted) {
+                    throw new Error("Request was aborted");
+                }
+                if (emptyAttempt > 0) {
+                    const backoffMs = EMPTY_STREAM_BASE_DELAY_MS * 2 ** (emptyAttempt - 1);
+                    await sleep(backoffMs, options?.signal);
+                    if (!requestUrl) {
+                        throw new Error("Missing request URL");
+                    }
+                    currentResponse = await fetch(requestUrl, {
+                        method: "POST",
+                        headers: requestHeaders,
+                        body: requestBodyJson,
+                        signal: options?.signal,
                     });
+                    if (!currentResponse.ok) {
+                        const retryErrorText = await currentResponse.text();
+                        throw new Error(`Cloud Code Assist API error (${currentResponse.status}): ${retryErrorText}`);
+                    }
+                }
+                const streamed = await streamResponse(currentResponse);
+                if (streamed) {
+                    receivedContent = true;
+                    break;
+                }
+                if (emptyAttempt < MAX_EMPTY_STREAM_RETRIES) {
+                    resetOutput();
                 }
             }
+            if (!receivedContent) {
+                throw new Error("Cloud Code Assist API returned an empty response");
+            }
             if (options?.signal?.aborted) {
                 throw new Error("Request was aborted");
             }
@@ -500,7 +660,30 @@ export const streamGoogleGeminiCli = (model, context, options) => {
     })();
     return stream;
 };
-function buildRequest(model, context, projectId, options = {}, isAntigravity = false) {
+function deriveSessionId(context) {
+    for (const message of context.messages) {
+        if (message.role !== "user") {
+            continue;
+        }
+        let text = "";
+        if (typeof message.content === "string") {
+            text = message.content;
+        }
+        else if (Array.isArray(message.content)) {
+            text = message.content
+                .filter((item) => item.type === "text")
+                .map((item) => item.text)
+                .join("\n");
+        }
+        if (!text || text.trim().length === 0) {
+            return undefined;
+        }
+        const hash = createHash("sha256").update(text).digest("hex");
+        return hash.slice(0, 32);
+    }
+    return undefined;
+}
+export function buildRequest(model, context, projectId, options = {}, isAntigravity = false) {
     const contents = convertMessages(model, context);
     const generationConfig = {};
     if (options.temperature !== undefined) {
@@ -526,6 +709,10 @@ function buildRequest(model, context, projectId, options = {}, isAntigravity = f
     const request = {
         contents,
     };
+    const sessionId = deriveSessionId(context);
+    if (sessionId) {
+        request.sessionId = sessionId;
+    }
     // System instruction must be object with parts, not plain string
     if (context.systemPrompt) {
         request.systemInstruction = {