@mariozechner/pi-ai 0.43.0 → 0.45.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +87 -0
  2. package/dist/models.generated.d.ts +922 -17
  3. package/dist/models.generated.d.ts.map +1 -1
  4. package/dist/models.generated.js +932 -27
  5. package/dist/models.generated.js.map +1 -1
  6. package/dist/providers/amazon-bedrock.d.ts +14 -0
  7. package/dist/providers/amazon-bedrock.d.ts.map +1 -0
  8. package/dist/providers/amazon-bedrock.js +435 -0
  9. package/dist/providers/amazon-bedrock.js.map +1 -0
  10. package/dist/providers/anthropic.d.ts.map +1 -1
  11. package/dist/providers/anthropic.js +3 -3
  12. package/dist/providers/anthropic.js.map +1 -1
  13. package/dist/providers/google-gemini-cli.d.ts +43 -1
  14. package/dist/providers/google-gemini-cli.d.ts.map +1 -1
  15. package/dist/providers/google-gemini-cli.js +369 -182
  16. package/dist/providers/google-gemini-cli.js.map +1 -1
  17. package/dist/providers/google-shared.d.ts +4 -0
  18. package/dist/providers/google-shared.d.ts.map +1 -1
  19. package/dist/providers/google-shared.js +32 -5
  20. package/dist/providers/google-shared.js.map +1 -1
  21. package/dist/providers/openai-codex-responses.d.ts.map +1 -1
  22. package/dist/providers/openai-codex-responses.js +1 -1
  23. package/dist/providers/openai-codex-responses.js.map +1 -1
  24. package/dist/providers/openai-completions.d.ts.map +1 -1
  25. package/dist/providers/openai-completions.js +30 -1
  26. package/dist/providers/openai-completions.js.map +1 -1
  27. package/dist/providers/openai-responses.d.ts +2 -0
  28. package/dist/providers/openai-responses.d.ts.map +1 -1
  29. package/dist/providers/openai-responses.js +25 -1
  30. package/dist/providers/openai-responses.js.map +1 -1
  31. package/dist/providers/{transorm-messages.d.ts → transform-messages.d.ts} +1 -1
  32. package/dist/providers/transform-messages.d.ts.map +1 -0
  33. package/dist/providers/{transorm-messages.js → transform-messages.js} +1 -1
  34. package/dist/providers/transform-messages.js.map +1 -0
  35. package/dist/stream.d.ts.map +1 -1
  36. package/dist/stream.js +28 -0
  37. package/dist/stream.js.map +1 -1
  38. package/dist/types.d.ts +4 -2
  39. package/dist/types.d.ts.map +1 -1
  40. package/dist/types.js.map +1 -1
  41. package/dist/utils/overflow.d.ts.map +1 -1
  42. package/dist/utils/overflow.js +3 -0
  43. package/dist/utils/overflow.js.map +1 -1
  44. package/package.json +3 -1
  45. package/dist/providers/transorm-messages.d.ts.map +0 -1
  46. package/dist/providers/transorm-messages.js.map +0 -1
@@ -3,11 +3,14 @@
3
3
  * Shared implementation for both google-gemini-cli and google-antigravity providers.
4
4
  * Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.
5
5
  */
6
+ import { createHash } from "node:crypto";
6
7
  import { calculateCost } from "../models.js";
7
8
  import { AssistantMessageEventStream } from "../utils/event-stream.js";
8
9
  import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
9
10
  import { convertMessages, convertTools, isThinkingPart, mapStopReasonString, mapToolChoice, retainThoughtSignature, } from "./google-shared.js";
10
11
  const DEFAULT_ENDPOINT = "https://cloudcode-pa.googleapis.com";
12
+ const ANTIGRAVITY_DAILY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
13
+ const ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, DEFAULT_ENDPOINT];
11
14
  // Headers for Gemini CLI (prod endpoint)
12
15
  const GEMINI_CLI_HEADERS = {
13
16
  "User-Agent": "google-cloud-sdk vscode_cloudshelleditor/0.1",
@@ -113,15 +116,61 @@ let toolCallCounter = 0;
113
116
  // Retry configuration
114
117
  const MAX_RETRIES = 3;
115
118
  const BASE_DELAY_MS = 1000;
119
+ const MAX_EMPTY_STREAM_RETRIES = 2;
120
+ const EMPTY_STREAM_BASE_DELAY_MS = 500;
121
+ const CLAUDE_THINKING_BETA_HEADER = "interleaved-thinking-2025-05-14";
116
122
  /**
117
123
  * Extract retry delay from Gemini error response (in milliseconds).
118
- * Parses patterns like:
124
+ * Checks headers first (Retry-After, x-ratelimit-reset, x-ratelimit-reset-after),
125
+ * then parses body patterns like:
119
126
  * - "Your quota will reset after 39s"
120
127
  * - "Your quota will reset after 18h31m10s"
121
128
  * - "Please retry in Xs" or "Please retry in Xms"
122
129
  * - "retryDelay": "34.074824224s" (JSON field)
123
130
  */
124
- function extractRetryDelay(errorText) {
131
+ export function extractRetryDelay(errorText, response) {
132
+ const normalizeDelay = (ms) => (ms > 0 ? Math.ceil(ms + 1000) : undefined);
133
+ const headers = response instanceof Headers ? response : response?.headers;
134
+ if (headers) {
135
+ const retryAfter = headers.get("retry-after");
136
+ if (retryAfter) {
137
+ const retryAfterSeconds = Number(retryAfter);
138
+ if (Number.isFinite(retryAfterSeconds)) {
139
+ const delay = normalizeDelay(retryAfterSeconds * 1000);
140
+ if (delay !== undefined) {
141
+ return delay;
142
+ }
143
+ }
144
+ const retryAfterDate = new Date(retryAfter);
145
+ const retryAfterMs = retryAfterDate.getTime();
146
+ if (!Number.isNaN(retryAfterMs)) {
147
+ const delay = normalizeDelay(retryAfterMs - Date.now());
148
+ if (delay !== undefined) {
149
+ return delay;
150
+ }
151
+ }
152
+ }
153
+ const rateLimitReset = headers.get("x-ratelimit-reset");
154
+ if (rateLimitReset) {
155
+ const resetSeconds = Number.parseInt(rateLimitReset, 10);
156
+ if (!Number.isNaN(resetSeconds)) {
157
+ const delay = normalizeDelay(resetSeconds * 1000 - Date.now());
158
+ if (delay !== undefined) {
159
+ return delay;
160
+ }
161
+ }
162
+ }
163
+ const rateLimitResetAfter = headers.get("x-ratelimit-reset-after");
164
+ if (rateLimitResetAfter) {
165
+ const resetAfterSeconds = Number(rateLimitResetAfter);
166
+ if (Number.isFinite(resetAfterSeconds)) {
167
+ const delay = normalizeDelay(resetAfterSeconds * 1000);
168
+ if (delay !== undefined) {
169
+ return delay;
170
+ }
171
+ }
172
+ }
173
+ }
125
174
  // Pattern 1: "Your quota will reset after ..." (formats: "18h31m10s", "10m15s", "6s", "39s")
126
175
  const durationMatch = errorText.match(/reset after (?:(\d+)h)?(?:(\d+)m)?(\d+(?:\.\d+)?)s/i);
127
176
  if (durationMatch) {
@@ -130,8 +179,9 @@ function extractRetryDelay(errorText) {
130
179
  const seconds = parseFloat(durationMatch[3]);
131
180
  if (!Number.isNaN(seconds)) {
132
181
  const totalMs = ((hours * 60 + minutes) * 60 + seconds) * 1000;
133
- if (totalMs > 0) {
134
- return Math.ceil(totalMs + 1000); // Add 1s buffer
182
+ const delay = normalizeDelay(totalMs);
183
+ if (delay !== undefined) {
184
+ return delay;
135
185
  }
136
186
  }
137
187
  }
@@ -141,7 +191,10 @@ function extractRetryDelay(errorText) {
141
191
  const value = parseFloat(retryInMatch[1]);
142
192
  if (!Number.isNaN(value) && value > 0) {
143
193
  const ms = retryInMatch[2].toLowerCase() === "ms" ? value : value * 1000;
144
- return Math.ceil(ms + 1000);
194
+ const delay = normalizeDelay(ms);
195
+ if (delay !== undefined) {
196
+ return delay;
197
+ }
145
198
  }
146
199
  }
147
200
  // Pattern 3: "retryDelay": "34.074824224s" (JSON field in error details)
@@ -150,19 +203,42 @@ function extractRetryDelay(errorText) {
150
203
  const value = parseFloat(retryDelayMatch[1]);
151
204
  if (!Number.isNaN(value) && value > 0) {
152
205
  const ms = retryDelayMatch[2].toLowerCase() === "ms" ? value : value * 1000;
153
- return Math.ceil(ms + 1000);
206
+ const delay = normalizeDelay(ms);
207
+ if (delay !== undefined) {
208
+ return delay;
209
+ }
154
210
  }
155
211
  }
156
212
  return undefined;
157
213
  }
214
+ function isClaudeThinkingModel(modelId) {
215
+ const normalized = modelId.toLowerCase();
216
+ return normalized.includes("claude") && normalized.includes("thinking");
217
+ }
158
218
  /**
159
- * Check if an error is retryable (rate limit, server error, etc.)
219
+ * Check if an error is retryable (rate limit, server error, network error, etc.)
160
220
  */
161
221
  function isRetryableError(status, errorText) {
162
222
  if (status === 429 || status === 500 || status === 502 || status === 503 || status === 504) {
163
223
  return true;
164
224
  }
165
- return /resource.?exhausted|rate.?limit|overloaded|service.?unavailable/i.test(errorText);
225
+ return /resource.?exhausted|rate.?limit|overloaded|service.?unavailable|other.?side.?closed/i.test(errorText);
226
+ }
227
+ /**
228
+ * Extract a clean, user-friendly error message from Google API error response.
229
+ * Parses JSON error responses and returns just the message field.
230
+ */
231
+ function extractErrorMessage(errorText) {
232
+ try {
233
+ const parsed = JSON.parse(errorText);
234
+ if (parsed.error?.message) {
235
+ return parsed.error.message;
236
+ }
237
+ }
238
+ catch {
239
+ // Not JSON, return as-is
240
+ }
241
+ return errorText;
166
242
  }
167
243
  /**
168
244
  * Sleep for a given number of milliseconds, respecting abort signal.
@@ -219,29 +295,34 @@ export const streamGoogleGeminiCli = (model, context, options) => {
219
295
  if (!accessToken || !projectId) {
220
296
  throw new Error("Missing token or projectId in Google Cloud credentials. Use /login to re-authenticate.");
221
297
  }
222
- const endpoint = model.baseUrl || DEFAULT_ENDPOINT;
223
- const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
224
- // Use Antigravity headers for sandbox endpoint, otherwise Gemini CLI headers
225
- const isAntigravity = endpoint.includes("sandbox.googleapis.com");
298
+ const isAntigravity = model.provider === "google-antigravity";
299
+ const baseUrl = model.baseUrl?.trim();
300
+ const endpoints = baseUrl ? [baseUrl] : isAntigravity ? ANTIGRAVITY_ENDPOINT_FALLBACKS : [DEFAULT_ENDPOINT];
226
301
  const requestBody = buildRequest(model, context, projectId, options, isAntigravity);
227
302
  const headers = isAntigravity ? ANTIGRAVITY_HEADERS : GEMINI_CLI_HEADERS;
303
+ const requestHeaders = {
304
+ Authorization: `Bearer ${accessToken}`,
305
+ "Content-Type": "application/json",
306
+ Accept: "text/event-stream",
307
+ ...headers,
308
+ ...(isClaudeThinkingModel(model.id) ? { "anthropic-beta": CLAUDE_THINKING_BETA_HEADER } : {}),
309
+ };
310
+ const requestBodyJson = JSON.stringify(requestBody);
228
311
  // Fetch with retry logic for rate limits and transient errors
229
312
  let response;
230
313
  let lastError;
314
+ let requestUrl;
231
315
  for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
232
316
  if (options?.signal?.aborted) {
233
317
  throw new Error("Request was aborted");
234
318
  }
235
319
  try {
236
- response = await fetch(url, {
320
+ const endpoint = endpoints[Math.min(attempt, endpoints.length - 1)];
321
+ requestUrl = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
322
+ response = await fetch(requestUrl, {
237
323
  method: "POST",
238
- headers: {
239
- Authorization: `Bearer ${accessToken}`,
240
- "Content-Type": "application/json",
241
- Accept: "text/event-stream",
242
- ...headers,
243
- },
244
- body: JSON.stringify(requestBody),
324
+ headers: requestHeaders,
325
+ body: requestBodyJson,
245
326
  signal: options?.signal,
246
327
  });
247
328
  if (response.ok) {
@@ -251,13 +332,13 @@ export const streamGoogleGeminiCli = (model, context, options) => {
251
332
  // Check if retryable
252
333
  if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
253
334
  // Use server-provided delay or exponential backoff
254
- const serverDelay = extractRetryDelay(errorText);
335
+ const serverDelay = extractRetryDelay(errorText, response);
255
336
  const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
256
337
  await sleep(delayMs, options?.signal);
257
338
  continue;
258
339
  }
259
340
  // Not retryable or max retries exceeded
260
- throw new Error(`Cloud Code Assist API error (${response.status}): ${errorText}`);
341
+ throw new Error(`Cloud Code Assist API error (${response.status}): ${extractErrorMessage(errorText)}`);
261
342
  }
262
343
  catch (error) {
263
344
  // Check for abort - fetch throws AbortError, our code throws "Request was aborted"
@@ -266,7 +347,11 @@ export const streamGoogleGeminiCli = (model, context, options) => {
266
347
  throw new Error("Request was aborted");
267
348
  }
268
349
  }
350
+ // Extract detailed error message from fetch errors (Node includes cause)
269
351
  lastError = error instanceof Error ? error : new Error(String(error));
352
+ if (lastError.message === "fetch failed" && lastError.cause instanceof Error) {
353
+ lastError = new Error(`Network error: ${lastError.cause.message}`);
354
+ }
270
355
  // Network errors are retryable
271
356
  if (attempt < MAX_RETRIES) {
272
357
  const delayMs = BASE_DELAY_MS * 2 ** attempt;
@@ -279,64 +364,146 @@ export const streamGoogleGeminiCli = (model, context, options) => {
279
364
  if (!response || !response.ok) {
280
365
  throw lastError ?? new Error("Failed to get response after retries");
281
366
  }
282
- if (!response.body) {
283
- throw new Error("No response body");
284
- }
285
- stream.push({ type: "start", partial: output });
286
- let currentBlock = null;
287
- const blocks = output.content;
288
- const blockIndex = () => blocks.length - 1;
289
- // Read SSE stream
290
- const reader = response.body.getReader();
291
- const decoder = new TextDecoder();
292
- let buffer = "";
293
- // Set up abort handler to cancel reader when signal fires
294
- const abortHandler = () => {
295
- void reader.cancel().catch(() => { });
367
+ let started = false;
368
+ const ensureStarted = () => {
369
+ if (!started) {
370
+ stream.push({ type: "start", partial: output });
371
+ started = true;
372
+ }
296
373
  };
297
- options?.signal?.addEventListener("abort", abortHandler);
298
- try {
299
- while (true) {
300
- // Check abort signal before each read
301
- if (options?.signal?.aborted) {
302
- throw new Error("Request was aborted");
303
- }
304
- const { done, value } = await reader.read();
305
- if (done)
306
- break;
307
- buffer += decoder.decode(value, { stream: true });
308
- const lines = buffer.split("\n");
309
- buffer = lines.pop() || "";
310
- for (const line of lines) {
311
- if (!line.startsWith("data:"))
312
- continue;
313
- const jsonStr = line.slice(5).trim();
314
- if (!jsonStr)
315
- continue;
316
- let chunk;
317
- try {
318
- chunk = JSON.parse(jsonStr);
319
- }
320
- catch {
321
- continue;
374
+ const resetOutput = () => {
375
+ output.content = [];
376
+ output.usage = {
377
+ input: 0,
378
+ output: 0,
379
+ cacheRead: 0,
380
+ cacheWrite: 0,
381
+ totalTokens: 0,
382
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
383
+ };
384
+ output.stopReason = "stop";
385
+ output.errorMessage = undefined;
386
+ output.timestamp = Date.now();
387
+ started = false;
388
+ };
389
+ const streamResponse = async (activeResponse) => {
390
+ if (!activeResponse.body) {
391
+ throw new Error("No response body");
392
+ }
393
+ let hasContent = false;
394
+ let currentBlock = null;
395
+ const blocks = output.content;
396
+ const blockIndex = () => blocks.length - 1;
397
+ // Read SSE stream
398
+ const reader = activeResponse.body.getReader();
399
+ const decoder = new TextDecoder();
400
+ let buffer = "";
401
+ // Set up abort handler to cancel reader when signal fires
402
+ const abortHandler = () => {
403
+ void reader.cancel().catch(() => { });
404
+ };
405
+ options?.signal?.addEventListener("abort", abortHandler);
406
+ try {
407
+ while (true) {
408
+ // Check abort signal before each read
409
+ if (options?.signal?.aborted) {
410
+ throw new Error("Request was aborted");
322
411
  }
323
- // Unwrap the response
324
- const responseData = chunk.response;
325
- if (!responseData)
326
- continue;
327
- const candidate = responseData.candidates?.[0];
328
- if (candidate?.content?.parts) {
329
- for (const part of candidate.content.parts) {
330
- if (part.text !== undefined) {
331
- const isThinking = isThinkingPart(part);
332
- if (!currentBlock ||
333
- (isThinking && currentBlock.type !== "thinking") ||
334
- (!isThinking && currentBlock.type !== "text")) {
412
+ const { done, value } = await reader.read();
413
+ if (done)
414
+ break;
415
+ buffer += decoder.decode(value, { stream: true });
416
+ const lines = buffer.split("\n");
417
+ buffer = lines.pop() || "";
418
+ for (const line of lines) {
419
+ if (!line.startsWith("data:"))
420
+ continue;
421
+ const jsonStr = line.slice(5).trim();
422
+ if (!jsonStr)
423
+ continue;
424
+ let chunk;
425
+ try {
426
+ chunk = JSON.parse(jsonStr);
427
+ }
428
+ catch {
429
+ continue;
430
+ }
431
+ // Unwrap the response
432
+ const responseData = chunk.response;
433
+ if (!responseData)
434
+ continue;
435
+ const candidate = responseData.candidates?.[0];
436
+ if (candidate?.content?.parts) {
437
+ for (const part of candidate.content.parts) {
438
+ if (part.text !== undefined) {
439
+ hasContent = true;
440
+ const isThinking = isThinkingPart(part);
441
+ if (!currentBlock ||
442
+ (isThinking && currentBlock.type !== "thinking") ||
443
+ (!isThinking && currentBlock.type !== "text")) {
444
+ if (currentBlock) {
445
+ if (currentBlock.type === "text") {
446
+ stream.push({
447
+ type: "text_end",
448
+ contentIndex: blocks.length - 1,
449
+ content: currentBlock.text,
450
+ partial: output,
451
+ });
452
+ }
453
+ else {
454
+ stream.push({
455
+ type: "thinking_end",
456
+ contentIndex: blockIndex(),
457
+ content: currentBlock.thinking,
458
+ partial: output,
459
+ });
460
+ }
461
+ }
462
+ if (isThinking) {
463
+ currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
464
+ output.content.push(currentBlock);
465
+ ensureStarted();
466
+ stream.push({
467
+ type: "thinking_start",
468
+ contentIndex: blockIndex(),
469
+ partial: output,
470
+ });
471
+ }
472
+ else {
473
+ currentBlock = { type: "text", text: "" };
474
+ output.content.push(currentBlock);
475
+ ensureStarted();
476
+ stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
477
+ }
478
+ }
479
+ if (currentBlock.type === "thinking") {
480
+ currentBlock.thinking += part.text;
481
+ currentBlock.thinkingSignature = retainThoughtSignature(currentBlock.thinkingSignature, part.thoughtSignature);
482
+ stream.push({
483
+ type: "thinking_delta",
484
+ contentIndex: blockIndex(),
485
+ delta: part.text,
486
+ partial: output,
487
+ });
488
+ }
489
+ else {
490
+ currentBlock.text += part.text;
491
+ currentBlock.textSignature = retainThoughtSignature(currentBlock.textSignature, part.thoughtSignature);
492
+ stream.push({
493
+ type: "text_delta",
494
+ contentIndex: blockIndex(),
495
+ delta: part.text,
496
+ partial: output,
497
+ });
498
+ }
499
+ }
500
+ if (part.functionCall) {
501
+ hasContent = true;
335
502
  if (currentBlock) {
336
503
  if (currentBlock.type === "text") {
337
504
  stream.push({
338
505
  type: "text_end",
339
- contentIndex: blocks.length - 1,
506
+ contentIndex: blockIndex(),
340
507
  content: currentBlock.text,
341
508
  partial: output,
342
509
  });
@@ -349,134 +516,127 @@ export const streamGoogleGeminiCli = (model, context, options) => {
349
516
  partial: output,
350
517
  });
351
518
  }
519
+ currentBlock = null;
352
520
  }
353
- if (isThinking) {
354
- currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
355
- output.content.push(currentBlock);
356
- stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
357
- }
358
- else {
359
- currentBlock = { type: "text", text: "" };
360
- output.content.push(currentBlock);
361
- stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
362
- }
363
- }
364
- if (currentBlock.type === "thinking") {
365
- currentBlock.thinking += part.text;
366
- currentBlock.thinkingSignature = retainThoughtSignature(currentBlock.thinkingSignature, part.thoughtSignature);
521
+ const providedId = part.functionCall.id;
522
+ const needsNewId = !providedId ||
523
+ output.content.some((b) => b.type === "toolCall" && b.id === providedId);
524
+ const toolCallId = needsNewId
525
+ ? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
526
+ : providedId;
527
+ const toolCall = {
528
+ type: "toolCall",
529
+ id: toolCallId,
530
+ name: part.functionCall.name || "",
531
+ arguments: part.functionCall.args,
532
+ ...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
533
+ };
534
+ output.content.push(toolCall);
535
+ ensureStarted();
536
+ stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
367
537
  stream.push({
368
- type: "thinking_delta",
538
+ type: "toolcall_delta",
369
539
  contentIndex: blockIndex(),
370
- delta: part.text,
540
+ delta: JSON.stringify(toolCall.arguments),
371
541
  partial: output,
372
542
  });
373
- }
374
- else {
375
- currentBlock.text += part.text;
376
- currentBlock.textSignature = retainThoughtSignature(currentBlock.textSignature, part.thoughtSignature);
377
543
  stream.push({
378
- type: "text_delta",
544
+ type: "toolcall_end",
379
545
  contentIndex: blockIndex(),
380
- delta: part.text,
546
+ toolCall,
381
547
  partial: output,
382
548
  });
383
549
  }
384
550
  }
385
- if (part.functionCall) {
386
- if (currentBlock) {
387
- if (currentBlock.type === "text") {
388
- stream.push({
389
- type: "text_end",
390
- contentIndex: blockIndex(),
391
- content: currentBlock.text,
392
- partial: output,
393
- });
394
- }
395
- else {
396
- stream.push({
397
- type: "thinking_end",
398
- contentIndex: blockIndex(),
399
- content: currentBlock.thinking,
400
- partial: output,
401
- });
402
- }
403
- currentBlock = null;
404
- }
405
- const providedId = part.functionCall.id;
406
- const needsNewId = !providedId || output.content.some((b) => b.type === "toolCall" && b.id === providedId);
407
- const toolCallId = needsNewId
408
- ? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
409
- : providedId;
410
- const toolCall = {
411
- type: "toolCall",
412
- id: toolCallId,
413
- name: part.functionCall.name || "",
414
- arguments: part.functionCall.args,
415
- ...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
416
- };
417
- output.content.push(toolCall);
418
- stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
419
- stream.push({
420
- type: "toolcall_delta",
421
- contentIndex: blockIndex(),
422
- delta: JSON.stringify(toolCall.arguments),
423
- partial: output,
424
- });
425
- stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
426
- }
427
551
  }
428
- }
429
- if (candidate?.finishReason) {
430
- output.stopReason = mapStopReasonString(candidate.finishReason);
431
- if (output.content.some((b) => b.type === "toolCall")) {
432
- output.stopReason = "toolUse";
552
+ if (candidate?.finishReason) {
553
+ output.stopReason = mapStopReasonString(candidate.finishReason);
554
+ if (output.content.some((b) => b.type === "toolCall")) {
555
+ output.stopReason = "toolUse";
556
+ }
433
557
  }
434
- }
435
- if (responseData.usageMetadata) {
436
- // promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
437
- const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
438
- const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
439
- output.usage = {
440
- input: promptTokens - cacheReadTokens,
441
- output: (responseData.usageMetadata.candidatesTokenCount || 0) +
442
- (responseData.usageMetadata.thoughtsTokenCount || 0),
443
- cacheRead: cacheReadTokens,
444
- cacheWrite: 0,
445
- totalTokens: responseData.usageMetadata.totalTokenCount || 0,
446
- cost: {
447
- input: 0,
448
- output: 0,
449
- cacheRead: 0,
558
+ if (responseData.usageMetadata) {
559
+ // promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
560
+ const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
561
+ const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
562
+ output.usage = {
563
+ input: promptTokens - cacheReadTokens,
564
+ output: (responseData.usageMetadata.candidatesTokenCount || 0) +
565
+ (responseData.usageMetadata.thoughtsTokenCount || 0),
566
+ cacheRead: cacheReadTokens,
450
567
  cacheWrite: 0,
451
- total: 0,
452
- },
453
- };
454
- calculateCost(model, output.usage);
568
+ totalTokens: responseData.usageMetadata.totalTokenCount || 0,
569
+ cost: {
570
+ input: 0,
571
+ output: 0,
572
+ cacheRead: 0,
573
+ cacheWrite: 0,
574
+ total: 0,
575
+ },
576
+ };
577
+ calculateCost(model, output.usage);
578
+ }
455
579
  }
456
580
  }
457
581
  }
458
- }
459
- finally {
460
- options?.signal?.removeEventListener("abort", abortHandler);
461
- }
462
- if (currentBlock) {
463
- if (currentBlock.type === "text") {
464
- stream.push({
465
- type: "text_end",
466
- contentIndex: blockIndex(),
467
- content: currentBlock.text,
468
- partial: output,
469
- });
582
+ finally {
583
+ options?.signal?.removeEventListener("abort", abortHandler);
470
584
  }
471
- else {
472
- stream.push({
473
- type: "thinking_end",
474
- contentIndex: blockIndex(),
475
- content: currentBlock.thinking,
476
- partial: output,
585
+ if (currentBlock) {
586
+ if (currentBlock.type === "text") {
587
+ stream.push({
588
+ type: "text_end",
589
+ contentIndex: blockIndex(),
590
+ content: currentBlock.text,
591
+ partial: output,
592
+ });
593
+ }
594
+ else {
595
+ stream.push({
596
+ type: "thinking_end",
597
+ contentIndex: blockIndex(),
598
+ content: currentBlock.thinking,
599
+ partial: output,
600
+ });
601
+ }
602
+ }
603
+ return hasContent;
604
+ };
605
+ let receivedContent = false;
606
+ let currentResponse = response;
607
+ for (let emptyAttempt = 0; emptyAttempt <= MAX_EMPTY_STREAM_RETRIES; emptyAttempt++) {
608
+ if (options?.signal?.aborted) {
609
+ throw new Error("Request was aborted");
610
+ }
611
+ if (emptyAttempt > 0) {
612
+ const backoffMs = EMPTY_STREAM_BASE_DELAY_MS * 2 ** (emptyAttempt - 1);
613
+ await sleep(backoffMs, options?.signal);
614
+ if (!requestUrl) {
615
+ throw new Error("Missing request URL");
616
+ }
617
+ currentResponse = await fetch(requestUrl, {
618
+ method: "POST",
619
+ headers: requestHeaders,
620
+ body: requestBodyJson,
621
+ signal: options?.signal,
477
622
  });
623
+ if (!currentResponse.ok) {
624
+ const retryErrorText = await currentResponse.text();
625
+ throw new Error(`Cloud Code Assist API error (${currentResponse.status}): ${retryErrorText}`);
626
+ }
627
+ }
628
+ const streamed = await streamResponse(currentResponse);
629
+ if (streamed) {
630
+ receivedContent = true;
631
+ break;
632
+ }
633
+ if (emptyAttempt < MAX_EMPTY_STREAM_RETRIES) {
634
+ resetOutput();
478
635
  }
479
636
  }
637
+ if (!receivedContent) {
638
+ throw new Error("Cloud Code Assist API returned an empty response");
639
+ }
480
640
  if (options?.signal?.aborted) {
481
641
  throw new Error("Request was aborted");
482
642
  }
@@ -500,7 +660,30 @@ export const streamGoogleGeminiCli = (model, context, options) => {
500
660
  })();
501
661
  return stream;
502
662
  };
503
- function buildRequest(model, context, projectId, options = {}, isAntigravity = false) {
663
+ function deriveSessionId(context) {
664
+ for (const message of context.messages) {
665
+ if (message.role !== "user") {
666
+ continue;
667
+ }
668
+ let text = "";
669
+ if (typeof message.content === "string") {
670
+ text = message.content;
671
+ }
672
+ else if (Array.isArray(message.content)) {
673
+ text = message.content
674
+ .filter((item) => item.type === "text")
675
+ .map((item) => item.text)
676
+ .join("\n");
677
+ }
678
+ if (!text || text.trim().length === 0) {
679
+ return undefined;
680
+ }
681
+ const hash = createHash("sha256").update(text).digest("hex");
682
+ return hash.slice(0, 32);
683
+ }
684
+ return undefined;
685
+ }
686
+ export function buildRequest(model, context, projectId, options = {}, isAntigravity = false) {
504
687
  const contents = convertMessages(model, context);
505
688
  const generationConfig = {};
506
689
  if (options.temperature !== undefined) {
@@ -526,6 +709,10 @@ function buildRequest(model, context, projectId, options = {}, isAntigravity = f
526
709
  const request = {
527
710
  contents,
528
711
  };
712
+ const sessionId = deriveSessionId(context);
713
+ if (sessionId) {
714
+ request.sessionId = sessionId;
715
+ }
529
716
  // System instruction must be object with parts, not plain string
530
717
  if (context.systemPrompt) {
531
718
  request.systemInstruction = {