@oh-my-pi/pi-ai 5.0.1 → 5.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -1
- package/src/models.generated.ts +1085 -27
- package/src/models.ts +5 -3
- package/src/providers/amazon-bedrock.ts +549 -0
- package/src/providers/anthropic.ts +1 -1
- package/src/providers/google-gemini-cli.ts +400 -191
- package/src/providers/google-shared.ts +37 -7
- package/src/providers/openai-codex-responses.ts +1 -1
- package/src/providers/openai-completions.ts +48 -4
- package/src/providers/openai-responses.ts +11 -2
- package/src/providers/{transorm-messages.ts → transform-messages.ts} +13 -7
- package/src/stream.ts +31 -0
- package/src/types.ts +8 -0
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
* Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
+
import { createHash } from "node:crypto";
|
|
7
8
|
import type { Content, ThinkingConfig } from "@google/genai";
|
|
8
9
|
import { calculateCost } from "../models";
|
|
9
10
|
import type {
|
|
@@ -18,7 +19,6 @@ import type {
|
|
|
18
19
|
ToolCall,
|
|
19
20
|
} from "../types";
|
|
20
21
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
21
|
-
import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
|
|
22
22
|
import { sanitizeSurrogates } from "../utils/sanitize-unicode";
|
|
23
23
|
import {
|
|
24
24
|
convertMessages,
|
|
@@ -55,6 +55,8 @@ export interface GoogleGeminiCliOptions extends StreamOptions {
|
|
|
55
55
|
}
|
|
56
56
|
|
|
57
57
|
const DEFAULT_ENDPOINT = "https://cloudcode-pa.googleapis.com";
|
|
58
|
+
const ANTIGRAVITY_DAILY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
|
|
59
|
+
const ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, DEFAULT_ENDPOINT] as const;
|
|
58
60
|
// Headers for Gemini CLI (prod endpoint)
|
|
59
61
|
const GEMINI_CLI_HEADERS = {
|
|
60
62
|
"User-Agent": "google-cloud-sdk vscode_cloudshelleditor/0.1",
|
|
@@ -164,16 +166,66 @@ let toolCallCounter = 0;
|
|
|
164
166
|
// Retry configuration
|
|
165
167
|
const MAX_RETRIES = 3;
|
|
166
168
|
const BASE_DELAY_MS = 1000;
|
|
169
|
+
const MAX_EMPTY_STREAM_RETRIES = 2;
|
|
170
|
+
const EMPTY_STREAM_BASE_DELAY_MS = 500;
|
|
171
|
+
const CLAUDE_THINKING_BETA_HEADER = "interleaved-thinking-2025-05-14";
|
|
167
172
|
|
|
168
173
|
/**
|
|
169
174
|
* Extract retry delay from Gemini error response (in milliseconds).
|
|
170
|
-
*
|
|
175
|
+
* Checks headers first (Retry-After, x-ratelimit-reset, x-ratelimit-reset-after),
|
|
176
|
+
* then parses body patterns like:
|
|
171
177
|
* - "Your quota will reset after 39s"
|
|
172
178
|
* - "Your quota will reset after 18h31m10s"
|
|
173
179
|
* - "Please retry in Xs" or "Please retry in Xms"
|
|
174
180
|
* - "retryDelay": "34.074824224s" (JSON field)
|
|
175
181
|
*/
|
|
176
|
-
function extractRetryDelay(errorText: string): number | undefined {
|
|
182
|
+
export function extractRetryDelay(errorText: string, response?: Response | Headers): number | undefined {
|
|
183
|
+
const normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined);
|
|
184
|
+
|
|
185
|
+
const headers = response instanceof Headers ? response : response?.headers;
|
|
186
|
+
if (headers) {
|
|
187
|
+
const retryAfter = headers.get("retry-after");
|
|
188
|
+
if (retryAfter) {
|
|
189
|
+
const retryAfterSeconds = Number(retryAfter);
|
|
190
|
+
if (Number.isFinite(retryAfterSeconds)) {
|
|
191
|
+
const delay = normalizeDelay(retryAfterSeconds * 1000);
|
|
192
|
+
if (delay !== undefined) {
|
|
193
|
+
return delay;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
const retryAfterDate = new Date(retryAfter);
|
|
197
|
+
const retryAfterMs = retryAfterDate.getTime();
|
|
198
|
+
if (!Number.isNaN(retryAfterMs)) {
|
|
199
|
+
const delay = normalizeDelay(retryAfterMs - Date.now());
|
|
200
|
+
if (delay !== undefined) {
|
|
201
|
+
return delay;
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
const rateLimitReset = headers.get("x-ratelimit-reset");
|
|
207
|
+
if (rateLimitReset) {
|
|
208
|
+
const resetSeconds = Number.parseInt(rateLimitReset, 10);
|
|
209
|
+
if (!Number.isNaN(resetSeconds)) {
|
|
210
|
+
const delay = normalizeDelay(resetSeconds * 1000 - Date.now());
|
|
211
|
+
if (delay !== undefined) {
|
|
212
|
+
return delay;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const rateLimitResetAfter = headers.get("x-ratelimit-reset-after");
|
|
218
|
+
if (rateLimitResetAfter) {
|
|
219
|
+
const resetAfterSeconds = Number(rateLimitResetAfter);
|
|
220
|
+
if (Number.isFinite(resetAfterSeconds)) {
|
|
221
|
+
const delay = normalizeDelay(resetAfterSeconds * 1000);
|
|
222
|
+
if (delay !== undefined) {
|
|
223
|
+
return delay;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
177
229
|
// Pattern 1: "Your quota will reset after ..." (formats: "18h31m10s", "10m15s", "6s", "39s")
|
|
178
230
|
const durationMatch = errorText.match(/reset after (?:(\d+)h)?(?:(\d+)m)?(\d+(?:\.\d+)?)s/i);
|
|
179
231
|
if (durationMatch) {
|
|
@@ -182,8 +234,9 @@ function extractRetryDelay(errorText: string): number | undefined {
|
|
|
182
234
|
const seconds = parseFloat(durationMatch[3]);
|
|
183
235
|
if (!Number.isNaN(seconds)) {
|
|
184
236
|
const totalMs = ((hours * 60 + minutes) * 60 + seconds) * 1000;
|
|
185
|
-
|
|
186
|
-
|
|
237
|
+
const delay = normalizeDelay(totalMs);
|
|
238
|
+
if (delay !== undefined) {
|
|
239
|
+
return delay;
|
|
187
240
|
}
|
|
188
241
|
}
|
|
189
242
|
}
|
|
@@ -194,7 +247,10 @@ function extractRetryDelay(errorText: string): number | undefined {
|
|
|
194
247
|
const value = parseFloat(retryInMatch[1]);
|
|
195
248
|
if (!Number.isNaN(value) && value > 0) {
|
|
196
249
|
const ms = retryInMatch[2].toLowerCase() === "ms" ? value : value * 1000;
|
|
197
|
-
|
|
250
|
+
const delay = normalizeDelay(ms);
|
|
251
|
+
if (delay !== undefined) {
|
|
252
|
+
return delay;
|
|
253
|
+
}
|
|
198
254
|
}
|
|
199
255
|
}
|
|
200
256
|
|
|
@@ -204,21 +260,45 @@ function extractRetryDelay(errorText: string): number | undefined {
|
|
|
204
260
|
const value = parseFloat(retryDelayMatch[1]);
|
|
205
261
|
if (!Number.isNaN(value) && value > 0) {
|
|
206
262
|
const ms = retryDelayMatch[2].toLowerCase() === "ms" ? value : value * 1000;
|
|
207
|
-
|
|
263
|
+
const delay = normalizeDelay(ms);
|
|
264
|
+
if (delay !== undefined) {
|
|
265
|
+
return delay;
|
|
266
|
+
}
|
|
208
267
|
}
|
|
209
268
|
}
|
|
210
269
|
|
|
211
270
|
return undefined;
|
|
212
271
|
}
|
|
213
272
|
|
|
273
|
+
function isClaudeThinkingModel(modelId: string): boolean {
|
|
274
|
+
const normalized = modelId.toLowerCase();
|
|
275
|
+
return normalized.includes("claude") && normalized.includes("thinking");
|
|
276
|
+
}
|
|
277
|
+
|
|
214
278
|
/**
|
|
215
|
-
* Check if an error is retryable (rate limit, server error, etc.)
|
|
279
|
+
* Check if an error is retryable (rate limit, server error, network error, etc.)
|
|
216
280
|
*/
|
|
217
281
|
function isRetryableError(status: number, errorText: string): boolean {
|
|
218
282
|
if (status === 429 || status === 500 || status === 502 || status === 503 || status === 504) {
|
|
219
283
|
return true;
|
|
220
284
|
}
|
|
221
|
-
return /resource.?exhausted|rate.?limit|overloaded|service.?unavailable/i.test(errorText);
|
|
285
|
+
return /resource.?exhausted|rate.?limit|overloaded|service.?unavailable|other.?side.?closed/i.test(errorText);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Extract a clean, user-friendly error message from Google API error response.
|
|
290
|
+
* Parses JSON error responses and returns just the message field.
|
|
291
|
+
*/
|
|
292
|
+
function extractErrorMessage(errorText: string): string {
|
|
293
|
+
try {
|
|
294
|
+
const parsed = JSON.parse(errorText) as { error?: { message?: string } };
|
|
295
|
+
if (parsed.error?.message) {
|
|
296
|
+
return parsed.error.message;
|
|
297
|
+
}
|
|
298
|
+
} catch {
|
|
299
|
+
// Not JSON, return as-is
|
|
300
|
+
}
|
|
301
|
+
return errorText;
|
|
222
302
|
}
|
|
223
303
|
|
|
224
304
|
/**
|
|
@@ -243,6 +323,7 @@ interface CloudCodeAssistRequest {
|
|
|
243
323
|
model: string;
|
|
244
324
|
request: {
|
|
245
325
|
contents: Content[];
|
|
326
|
+
sessionId?: string;
|
|
246
327
|
systemInstruction?: { role?: string; parts: { text: string }[] };
|
|
247
328
|
generationConfig?: {
|
|
248
329
|
maxOutputTokens?: number;
|
|
@@ -340,17 +421,26 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
340
421
|
throw new Error("Missing token or projectId in Google Cloud credentials. Use /login to re-authenticate.");
|
|
341
422
|
}
|
|
342
423
|
|
|
343
|
-
const
|
|
344
|
-
const
|
|
424
|
+
const isAntigravity = model.provider === "google-antigravity";
|
|
425
|
+
const baseUrl = model.baseUrl?.trim();
|
|
426
|
+
const endpoints = baseUrl ? [baseUrl] : isAntigravity ? ANTIGRAVITY_ENDPOINT_FALLBACKS : [DEFAULT_ENDPOINT];
|
|
345
427
|
|
|
346
|
-
// Use Antigravity headers for sandbox endpoint, otherwise Gemini CLI headers
|
|
347
|
-
const isAntigravity = endpoint.includes("sandbox.googleapis.com");
|
|
348
428
|
const requestBody = buildRequest(model, context, projectId, options, isAntigravity);
|
|
349
429
|
const headers = isAntigravity ? ANTIGRAVITY_HEADERS : GEMINI_CLI_HEADERS;
|
|
350
430
|
|
|
431
|
+
const requestHeaders = {
|
|
432
|
+
Authorization: `Bearer ${accessToken}`,
|
|
433
|
+
"Content-Type": "application/json",
|
|
434
|
+
Accept: "text/event-stream",
|
|
435
|
+
...headers,
|
|
436
|
+
...(isClaudeThinkingModel(model.id) ? { "anthropic-beta": CLAUDE_THINKING_BETA_HEADER } : {}),
|
|
437
|
+
};
|
|
438
|
+
const requestBodyJson = JSON.stringify(requestBody);
|
|
439
|
+
|
|
351
440
|
// Fetch with retry logic for rate limits and transient errors
|
|
352
441
|
let response: Response | undefined;
|
|
353
442
|
let lastError: Error | undefined;
|
|
443
|
+
let requestUrl: string | undefined;
|
|
354
444
|
|
|
355
445
|
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
356
446
|
if (options?.signal?.aborted) {
|
|
@@ -358,15 +448,12 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
358
448
|
}
|
|
359
449
|
|
|
360
450
|
try {
|
|
361
|
-
|
|
451
|
+
const endpoint = endpoints[Math.min(attempt, endpoints.length - 1)];
|
|
452
|
+
requestUrl = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
|
|
453
|
+
response = await fetch(requestUrl, {
|
|
362
454
|
method: "POST",
|
|
363
|
-
headers:
|
|
364
|
-
|
|
365
|
-
"Content-Type": "application/json",
|
|
366
|
-
Accept: "text/event-stream",
|
|
367
|
-
...headers,
|
|
368
|
-
},
|
|
369
|
-
body: JSON.stringify(requestBody),
|
|
455
|
+
headers: requestHeaders,
|
|
456
|
+
body: requestBodyJson,
|
|
370
457
|
signal: options?.signal,
|
|
371
458
|
});
|
|
372
459
|
|
|
@@ -379,14 +466,14 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
379
466
|
// Check if retryable
|
|
380
467
|
if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
|
|
381
468
|
// Use server-provided delay or exponential backoff
|
|
382
|
-
const serverDelay = extractRetryDelay(errorText);
|
|
469
|
+
const serverDelay = extractRetryDelay(errorText, response);
|
|
383
470
|
const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
|
|
384
471
|
await sleep(delayMs, options?.signal);
|
|
385
472
|
continue;
|
|
386
473
|
}
|
|
387
474
|
|
|
388
475
|
// Not retryable or max retries exceeded
|
|
389
|
-
throw new Error(`Cloud Code Assist API error (${response.status}): ${errorText}`);
|
|
476
|
+
throw new Error(`Cloud Code Assist API error (${response.status}): ${extractErrorMessage(errorText)}`);
|
|
390
477
|
} catch (error) {
|
|
391
478
|
// Check for abort - fetch throws AbortError, our code throws "Request was aborted"
|
|
392
479
|
if (error instanceof Error) {
|
|
@@ -394,7 +481,11 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
394
481
|
throw new Error("Request was aborted");
|
|
395
482
|
}
|
|
396
483
|
}
|
|
484
|
+
// Extract detailed error message from fetch errors (Node includes cause)
|
|
397
485
|
lastError = error instanceof Error ? error : new Error(String(error));
|
|
486
|
+
if (lastError.message === "fetch failed" && lastError.cause instanceof Error) {
|
|
487
|
+
lastError = new Error(`Network error: ${lastError.cause.message}`);
|
|
488
|
+
}
|
|
398
489
|
// Network errors are retryable
|
|
399
490
|
if (attempt < MAX_RETRIES) {
|
|
400
491
|
const delayMs = BASE_DELAY_MS * 2 ** attempt;
|
|
@@ -409,73 +500,160 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
409
500
|
throw lastError ?? new Error("Failed to get response after retries");
|
|
410
501
|
}
|
|
411
502
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
503
|
+
let started = false;
|
|
504
|
+
const ensureStarted = () => {
|
|
505
|
+
if (!started) {
|
|
506
|
+
stream.push({ type: "start", partial: output });
|
|
507
|
+
started = true;
|
|
508
|
+
}
|
|
509
|
+
};
|
|
415
510
|
|
|
416
|
-
|
|
511
|
+
const resetOutput = () => {
|
|
512
|
+
output.content = [];
|
|
513
|
+
output.usage = {
|
|
514
|
+
input: 0,
|
|
515
|
+
output: 0,
|
|
516
|
+
cacheRead: 0,
|
|
517
|
+
cacheWrite: 0,
|
|
518
|
+
totalTokens: 0,
|
|
519
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
520
|
+
};
|
|
521
|
+
output.stopReason = "stop";
|
|
522
|
+
output.errorMessage = undefined;
|
|
523
|
+
output.timestamp = Date.now();
|
|
524
|
+
started = false;
|
|
525
|
+
};
|
|
417
526
|
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
527
|
+
const streamResponse = async (activeResponse: Response): Promise<boolean> => {
|
|
528
|
+
if (!activeResponse.body) {
|
|
529
|
+
throw new Error("No response body");
|
|
530
|
+
}
|
|
421
531
|
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
532
|
+
let hasContent = false;
|
|
533
|
+
let currentBlock: TextContent | ThinkingContent | null = null;
|
|
534
|
+
const blocks = output.content;
|
|
535
|
+
const blockIndex = () => blocks.length - 1;
|
|
426
536
|
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
options?.signal?.addEventListener("abort", abortHandler);
|
|
537
|
+
// Read SSE stream
|
|
538
|
+
const reader = activeResponse.body.getReader();
|
|
539
|
+
const decoder = new TextDecoder();
|
|
540
|
+
let buffer = "";
|
|
432
541
|
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
}
|
|
542
|
+
// Set up abort handler to cancel reader when signal fires
|
|
543
|
+
const abortHandler = () => {
|
|
544
|
+
void reader.cancel().catch(() => {});
|
|
545
|
+
};
|
|
546
|
+
options?.signal?.addEventListener("abort", abortHandler);
|
|
439
547
|
|
|
440
|
-
|
|
441
|
-
|
|
548
|
+
try {
|
|
549
|
+
while (true) {
|
|
550
|
+
// Check abort signal before each read
|
|
551
|
+
if (options?.signal?.aborted) {
|
|
552
|
+
throw new Error("Request was aborted");
|
|
553
|
+
}
|
|
442
554
|
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
buffer = lines.pop() || "";
|
|
555
|
+
const { done, value } = await reader.read();
|
|
556
|
+
if (done) break;
|
|
446
557
|
|
|
447
|
-
|
|
448
|
-
|
|
558
|
+
buffer += decoder.decode(value, { stream: true });
|
|
559
|
+
const lines = buffer.split("\n");
|
|
560
|
+
buffer = lines.pop() || "";
|
|
449
561
|
|
|
450
|
-
const
|
|
451
|
-
|
|
562
|
+
for (const line of lines) {
|
|
563
|
+
if (!line.startsWith("data:")) continue;
|
|
452
564
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
565
|
+
const jsonStr = line.slice(5).trim();
|
|
566
|
+
if (!jsonStr) continue;
|
|
567
|
+
|
|
568
|
+
let chunk: CloudCodeAssistResponseChunk;
|
|
569
|
+
try {
|
|
570
|
+
chunk = JSON.parse(jsonStr);
|
|
571
|
+
} catch {
|
|
572
|
+
continue;
|
|
573
|
+
}
|
|
459
574
|
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
575
|
+
// Unwrap the response
|
|
576
|
+
const responseData = chunk.response;
|
|
577
|
+
if (!responseData) continue;
|
|
578
|
+
|
|
579
|
+
const candidate = responseData.candidates?.[0];
|
|
580
|
+
if (candidate?.content?.parts) {
|
|
581
|
+
for (const part of candidate.content.parts) {
|
|
582
|
+
if (part.text !== undefined) {
|
|
583
|
+
hasContent = true;
|
|
584
|
+
const isThinking = isThinkingPart(part);
|
|
585
|
+
if (
|
|
586
|
+
!currentBlock ||
|
|
587
|
+
(isThinking && currentBlock.type !== "thinking") ||
|
|
588
|
+
(!isThinking && currentBlock.type !== "text")
|
|
589
|
+
) {
|
|
590
|
+
if (currentBlock) {
|
|
591
|
+
if (currentBlock.type === "text") {
|
|
592
|
+
stream.push({
|
|
593
|
+
type: "text_end",
|
|
594
|
+
contentIndex: blocks.length - 1,
|
|
595
|
+
content: currentBlock.text,
|
|
596
|
+
partial: output,
|
|
597
|
+
});
|
|
598
|
+
} else {
|
|
599
|
+
stream.push({
|
|
600
|
+
type: "thinking_end",
|
|
601
|
+
contentIndex: blockIndex(),
|
|
602
|
+
content: currentBlock.thinking,
|
|
603
|
+
partial: output,
|
|
604
|
+
});
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
if (isThinking) {
|
|
608
|
+
currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
|
|
609
|
+
output.content.push(currentBlock);
|
|
610
|
+
ensureStarted();
|
|
611
|
+
stream.push({
|
|
612
|
+
type: "thinking_start",
|
|
613
|
+
contentIndex: blockIndex(),
|
|
614
|
+
partial: output,
|
|
615
|
+
});
|
|
616
|
+
} else {
|
|
617
|
+
currentBlock = { type: "text", text: "" };
|
|
618
|
+
output.content.push(currentBlock);
|
|
619
|
+
ensureStarted();
|
|
620
|
+
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
if (currentBlock.type === "thinking") {
|
|
624
|
+
currentBlock.thinking += part.text;
|
|
625
|
+
currentBlock.thinkingSignature = retainThoughtSignature(
|
|
626
|
+
currentBlock.thinkingSignature,
|
|
627
|
+
part.thoughtSignature,
|
|
628
|
+
);
|
|
629
|
+
stream.push({
|
|
630
|
+
type: "thinking_delta",
|
|
631
|
+
contentIndex: blockIndex(),
|
|
632
|
+
delta: part.text,
|
|
633
|
+
partial: output,
|
|
634
|
+
});
|
|
635
|
+
} else {
|
|
636
|
+
currentBlock.text += part.text;
|
|
637
|
+
currentBlock.textSignature = retainThoughtSignature(
|
|
638
|
+
currentBlock.textSignature,
|
|
639
|
+
part.thoughtSignature,
|
|
640
|
+
);
|
|
641
|
+
stream.push({
|
|
642
|
+
type: "text_delta",
|
|
643
|
+
contentIndex: blockIndex(),
|
|
644
|
+
delta: part.text,
|
|
645
|
+
partial: output,
|
|
646
|
+
});
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
if (part.functionCall) {
|
|
651
|
+
hasContent = true;
|
|
474
652
|
if (currentBlock) {
|
|
475
653
|
if (currentBlock.type === "text") {
|
|
476
654
|
stream.push({
|
|
477
655
|
type: "text_end",
|
|
478
|
-
contentIndex:
|
|
656
|
+
contentIndex: blockIndex(),
|
|
479
657
|
content: currentBlock.text,
|
|
480
658
|
partial: output,
|
|
481
659
|
});
|
|
@@ -487,145 +665,144 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
487
665
|
partial: output,
|
|
488
666
|
});
|
|
489
667
|
}
|
|
668
|
+
currentBlock = null;
|
|
490
669
|
}
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
part.thoughtSignature,
|
|
506
|
-
|
|
670
|
+
|
|
671
|
+
const providedId = part.functionCall.id;
|
|
672
|
+
const needsNewId =
|
|
673
|
+
!providedId ||
|
|
674
|
+
output.content.some((b) => b.type === "toolCall" && b.id === providedId);
|
|
675
|
+
const toolCallId = needsNewId
|
|
676
|
+
? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
|
|
677
|
+
: providedId;
|
|
678
|
+
|
|
679
|
+
const toolCall: ToolCall = {
|
|
680
|
+
type: "toolCall",
|
|
681
|
+
id: toolCallId,
|
|
682
|
+
name: part.functionCall.name || "",
|
|
683
|
+
arguments: part.functionCall.args as Record<string, unknown>,
|
|
684
|
+
...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
|
|
685
|
+
};
|
|
686
|
+
|
|
687
|
+
output.content.push(toolCall);
|
|
688
|
+
ensureStarted();
|
|
689
|
+
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
|
|
507
690
|
stream.push({
|
|
508
|
-
type: "
|
|
691
|
+
type: "toolcall_delta",
|
|
509
692
|
contentIndex: blockIndex(),
|
|
510
|
-
delta:
|
|
693
|
+
delta: JSON.stringify(toolCall.arguments),
|
|
511
694
|
partial: output,
|
|
512
695
|
});
|
|
513
|
-
} else {
|
|
514
|
-
currentBlock.text += part.text;
|
|
515
|
-
currentBlock.textSignature = retainThoughtSignature(
|
|
516
|
-
currentBlock.textSignature,
|
|
517
|
-
part.thoughtSignature,
|
|
518
|
-
);
|
|
519
696
|
stream.push({
|
|
520
|
-
type: "
|
|
697
|
+
type: "toolcall_end",
|
|
521
698
|
contentIndex: blockIndex(),
|
|
522
|
-
|
|
699
|
+
toolCall,
|
|
523
700
|
partial: output,
|
|
524
701
|
});
|
|
525
702
|
}
|
|
526
703
|
}
|
|
704
|
+
}
|
|
527
705
|
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
type: "text_end",
|
|
533
|
-
contentIndex: blockIndex(),
|
|
534
|
-
content: currentBlock.text,
|
|
535
|
-
partial: output,
|
|
536
|
-
});
|
|
537
|
-
} else {
|
|
538
|
-
stream.push({
|
|
539
|
-
type: "thinking_end",
|
|
540
|
-
contentIndex: blockIndex(),
|
|
541
|
-
content: currentBlock.thinking,
|
|
542
|
-
partial: output,
|
|
543
|
-
});
|
|
544
|
-
}
|
|
545
|
-
currentBlock = null;
|
|
546
|
-
}
|
|
547
|
-
|
|
548
|
-
const providedId = part.functionCall.id;
|
|
549
|
-
const needsNewId =
|
|
550
|
-
!providedId || output.content.some((b) => b.type === "toolCall" && b.id === providedId);
|
|
551
|
-
const toolCallId = needsNewId
|
|
552
|
-
? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
|
|
553
|
-
: providedId;
|
|
554
|
-
|
|
555
|
-
const toolCall: ToolCall = {
|
|
556
|
-
type: "toolCall",
|
|
557
|
-
id: toolCallId,
|
|
558
|
-
name: part.functionCall.name || "",
|
|
559
|
-
arguments: part.functionCall.args as Record<string, unknown>,
|
|
560
|
-
...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
|
|
561
|
-
};
|
|
562
|
-
|
|
563
|
-
output.content.push(toolCall);
|
|
564
|
-
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
|
|
565
|
-
stream.push({
|
|
566
|
-
type: "toolcall_delta",
|
|
567
|
-
contentIndex: blockIndex(),
|
|
568
|
-
delta: JSON.stringify(toolCall.arguments),
|
|
569
|
-
partial: output,
|
|
570
|
-
});
|
|
571
|
-
stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
|
|
706
|
+
if (candidate?.finishReason) {
|
|
707
|
+
output.stopReason = mapStopReasonString(candidate.finishReason);
|
|
708
|
+
if (output.content.some((b) => b.type === "toolCall")) {
|
|
709
|
+
output.stopReason = "toolUse";
|
|
572
710
|
}
|
|
573
711
|
}
|
|
574
|
-
}
|
|
575
712
|
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
713
|
+
if (responseData.usageMetadata) {
|
|
714
|
+
// promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
|
|
715
|
+
const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
|
|
716
|
+
const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
|
|
717
|
+
output.usage = {
|
|
718
|
+
input: promptTokens - cacheReadTokens,
|
|
719
|
+
output:
|
|
720
|
+
(responseData.usageMetadata.candidatesTokenCount || 0) +
|
|
721
|
+
(responseData.usageMetadata.thoughtsTokenCount || 0),
|
|
722
|
+
cacheRead: cacheReadTokens,
|
|
723
|
+
cacheWrite: 0,
|
|
724
|
+
totalTokens: responseData.usageMetadata.totalTokenCount || 0,
|
|
725
|
+
cost: {
|
|
726
|
+
input: 0,
|
|
727
|
+
output: 0,
|
|
728
|
+
cacheRead: 0,
|
|
729
|
+
cacheWrite: 0,
|
|
730
|
+
total: 0,
|
|
731
|
+
},
|
|
732
|
+
};
|
|
733
|
+
calculateCost(model, output.usage);
|
|
580
734
|
}
|
|
581
735
|
}
|
|
736
|
+
}
|
|
737
|
+
} finally {
|
|
738
|
+
options?.signal?.removeEventListener("abort", abortHandler);
|
|
739
|
+
}
|
|
582
740
|
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
cacheRead: 0,
|
|
599
|
-
cacheWrite: 0,
|
|
600
|
-
total: 0,
|
|
601
|
-
},
|
|
602
|
-
};
|
|
603
|
-
calculateCost(model, output.usage);
|
|
604
|
-
}
|
|
741
|
+
if (currentBlock) {
|
|
742
|
+
if (currentBlock.type === "text") {
|
|
743
|
+
stream.push({
|
|
744
|
+
type: "text_end",
|
|
745
|
+
contentIndex: blockIndex(),
|
|
746
|
+
content: currentBlock.text,
|
|
747
|
+
partial: output,
|
|
748
|
+
});
|
|
749
|
+
} else {
|
|
750
|
+
stream.push({
|
|
751
|
+
type: "thinking_end",
|
|
752
|
+
contentIndex: blockIndex(),
|
|
753
|
+
content: currentBlock.thinking,
|
|
754
|
+
partial: output,
|
|
755
|
+
});
|
|
605
756
|
}
|
|
606
757
|
}
|
|
607
|
-
} finally {
|
|
608
|
-
options?.signal?.removeEventListener("abort", abortHandler);
|
|
609
|
-
}
|
|
610
758
|
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
759
|
+
return hasContent;
|
|
760
|
+
};
|
|
761
|
+
|
|
762
|
+
let receivedContent = false;
|
|
763
|
+
let currentResponse = response;
|
|
764
|
+
|
|
765
|
+
for (let emptyAttempt = 0; emptyAttempt <= MAX_EMPTY_STREAM_RETRIES; emptyAttempt++) {
|
|
766
|
+
if (options?.signal?.aborted) {
|
|
767
|
+
throw new Error("Request was aborted");
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
if (emptyAttempt > 0) {
|
|
771
|
+
const backoffMs = EMPTY_STREAM_BASE_DELAY_MS * 2 ** (emptyAttempt - 1);
|
|
772
|
+
await sleep(backoffMs, options?.signal);
|
|
773
|
+
|
|
774
|
+
if (!requestUrl) {
|
|
775
|
+
throw new Error("Missing request URL");
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
currentResponse = await fetch(requestUrl, {
|
|
779
|
+
method: "POST",
|
|
780
|
+
headers: requestHeaders,
|
|
781
|
+
body: requestBodyJson,
|
|
782
|
+
signal: options?.signal,
|
|
625
783
|
});
|
|
784
|
+
|
|
785
|
+
if (!currentResponse.ok) {
|
|
786
|
+
const retryErrorText = await currentResponse.text();
|
|
787
|
+
throw new Error(`Cloud Code Assist API error (${currentResponse.status}): ${retryErrorText}`);
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
const streamed = await streamResponse(currentResponse);
|
|
792
|
+
if (streamed) {
|
|
793
|
+
receivedContent = true;
|
|
794
|
+
break;
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
if (emptyAttempt < MAX_EMPTY_STREAM_RETRIES) {
|
|
798
|
+
resetOutput();
|
|
626
799
|
}
|
|
627
800
|
}
|
|
628
801
|
|
|
802
|
+
if (!receivedContent) {
|
|
803
|
+
throw new Error("Cloud Code Assist API returned an empty response");
|
|
804
|
+
}
|
|
805
|
+
|
|
629
806
|
if (options?.signal?.aborted) {
|
|
630
807
|
throw new Error("Request was aborted");
|
|
631
808
|
}
|
|
@@ -643,7 +820,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
643
820
|
}
|
|
644
821
|
}
|
|
645
822
|
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
|
646
|
-
output.errorMessage =
|
|
823
|
+
output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
|
|
647
824
|
stream.push({ type: "error", reason: output.stopReason, error: output });
|
|
648
825
|
stream.end();
|
|
649
826
|
}
|
|
@@ -652,7 +829,34 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
652
829
|
return stream;
|
|
653
830
|
};
|
|
654
831
|
|
|
655
|
-
function
|
|
832
|
+
function deriveSessionId(context: Context): string | undefined {
|
|
833
|
+
for (const message of context.messages) {
|
|
834
|
+
if (message.role !== "user") {
|
|
835
|
+
continue;
|
|
836
|
+
}
|
|
837
|
+
|
|
838
|
+
let text = "";
|
|
839
|
+
if (typeof message.content === "string") {
|
|
840
|
+
text = message.content;
|
|
841
|
+
} else if (Array.isArray(message.content)) {
|
|
842
|
+
text = message.content
|
|
843
|
+
.filter((item): item is TextContent => item.type === "text")
|
|
844
|
+
.map((item) => item.text)
|
|
845
|
+
.join("\n");
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
if (!text || text.trim().length === 0) {
|
|
849
|
+
return undefined;
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
const hash = createHash("sha256").update(text).digest("hex");
|
|
853
|
+
return hash.slice(0, 32);
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
return undefined;
|
|
857
|
+
}
|
|
858
|
+
|
|
859
|
+
export function buildRequest(
|
|
656
860
|
model: Model<"google-gemini-cli">,
|
|
657
861
|
context: Context,
|
|
658
862
|
projectId: string,
|
|
@@ -687,6 +891,11 @@ function buildRequest(
|
|
|
687
891
|
contents,
|
|
688
892
|
};
|
|
689
893
|
|
|
894
|
+
const sessionId = deriveSessionId(context);
|
|
895
|
+
if (sessionId) {
|
|
896
|
+
request.sessionId = sessionId;
|
|
897
|
+
}
|
|
898
|
+
|
|
690
899
|
// System instruction must be object with parts, not plain string
|
|
691
900
|
if (context.systemPrompt) {
|
|
692
901
|
request.systemInstruction = {
|