@mariozechner/pi-ai 0.44.0 → 0.45.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -0
- package/dist/models.generated.d.ts +922 -0
- package/dist/models.generated.d.ts.map +1 -1
- package/dist/models.generated.js +928 -6
- package/dist/models.generated.js.map +1 -1
- package/dist/providers/amazon-bedrock.d.ts +14 -0
- package/dist/providers/amazon-bedrock.d.ts.map +1 -0
- package/dist/providers/amazon-bedrock.js +435 -0
- package/dist/providers/amazon-bedrock.js.map +1 -0
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +1 -1
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/google-gemini-cli.d.ts +43 -1
- package/dist/providers/google-gemini-cli.d.ts.map +1 -1
- package/dist/providers/google-gemini-cli.js +369 -182
- package/dist/providers/google-gemini-cli.js.map +1 -1
- package/dist/providers/openai-completions.d.ts.map +1 -1
- package/dist/providers/openai-completions.js +29 -0
- package/dist/providers/openai-completions.js.map +1 -1
- package/dist/providers/openai-responses.d.ts +2 -0
- package/dist/providers/openai-responses.d.ts.map +1 -1
- package/dist/providers/openai-responses.js +24 -0
- package/dist/providers/openai-responses.js.map +1 -1
- package/dist/stream.d.ts.map +1 -1
- package/dist/stream.js +28 -0
- package/dist/stream.js.map +1 -1
- package/dist/types.d.ts +4 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/overflow.d.ts.map +1 -1
- package/dist/utils/overflow.js +3 -0
- package/dist/utils/overflow.js.map +1 -1
- package/package.json +3 -1
|
@@ -3,11 +3,14 @@
|
|
|
3
3
|
* Shared implementation for both google-gemini-cli and google-antigravity providers.
|
|
4
4
|
* Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.
|
|
5
5
|
*/
|
|
6
|
+
import { createHash } from "node:crypto";
|
|
6
7
|
import { calculateCost } from "../models.js";
|
|
7
8
|
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
|
8
9
|
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
|
9
10
|
import { convertMessages, convertTools, isThinkingPart, mapStopReasonString, mapToolChoice, retainThoughtSignature, } from "./google-shared.js";
|
|
10
11
|
const DEFAULT_ENDPOINT = "https://cloudcode-pa.googleapis.com";
|
|
12
|
+
const ANTIGRAVITY_DAILY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
|
|
13
|
+
const ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, DEFAULT_ENDPOINT];
|
|
11
14
|
// Headers for Gemini CLI (prod endpoint)
|
|
12
15
|
const GEMINI_CLI_HEADERS = {
|
|
13
16
|
"User-Agent": "google-cloud-sdk vscode_cloudshelleditor/0.1",
|
|
@@ -113,15 +116,61 @@ let toolCallCounter = 0;
|
|
|
113
116
|
// Retry configuration
|
|
114
117
|
const MAX_RETRIES = 3;
|
|
115
118
|
const BASE_DELAY_MS = 1000;
|
|
119
|
+
const MAX_EMPTY_STREAM_RETRIES = 2;
|
|
120
|
+
const EMPTY_STREAM_BASE_DELAY_MS = 500;
|
|
121
|
+
const CLAUDE_THINKING_BETA_HEADER = "interleaved-thinking-2025-05-14";
|
|
116
122
|
/**
|
|
117
123
|
* Extract retry delay from Gemini error response (in milliseconds).
|
|
118
|
-
*
|
|
124
|
+
* Checks headers first (Retry-After, x-ratelimit-reset, x-ratelimit-reset-after),
|
|
125
|
+
* then parses body patterns like:
|
|
119
126
|
* - "Your quota will reset after 39s"
|
|
120
127
|
* - "Your quota will reset after 18h31m10s"
|
|
121
128
|
* - "Please retry in Xs" or "Please retry in Xms"
|
|
122
129
|
* - "retryDelay": "34.074824224s" (JSON field)
|
|
123
130
|
*/
|
|
124
|
-
function extractRetryDelay(errorText) {
|
|
131
|
+
export function extractRetryDelay(errorText, response) {
|
|
132
|
+
const normalizeDelay = (ms) => (ms > 0 ? Math.ceil(ms + 1000) : undefined);
|
|
133
|
+
const headers = response instanceof Headers ? response : response?.headers;
|
|
134
|
+
if (headers) {
|
|
135
|
+
const retryAfter = headers.get("retry-after");
|
|
136
|
+
if (retryAfter) {
|
|
137
|
+
const retryAfterSeconds = Number(retryAfter);
|
|
138
|
+
if (Number.isFinite(retryAfterSeconds)) {
|
|
139
|
+
const delay = normalizeDelay(retryAfterSeconds * 1000);
|
|
140
|
+
if (delay !== undefined) {
|
|
141
|
+
return delay;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
const retryAfterDate = new Date(retryAfter);
|
|
145
|
+
const retryAfterMs = retryAfterDate.getTime();
|
|
146
|
+
if (!Number.isNaN(retryAfterMs)) {
|
|
147
|
+
const delay = normalizeDelay(retryAfterMs - Date.now());
|
|
148
|
+
if (delay !== undefined) {
|
|
149
|
+
return delay;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
const rateLimitReset = headers.get("x-ratelimit-reset");
|
|
154
|
+
if (rateLimitReset) {
|
|
155
|
+
const resetSeconds = Number.parseInt(rateLimitReset, 10);
|
|
156
|
+
if (!Number.isNaN(resetSeconds)) {
|
|
157
|
+
const delay = normalizeDelay(resetSeconds * 1000 - Date.now());
|
|
158
|
+
if (delay !== undefined) {
|
|
159
|
+
return delay;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
const rateLimitResetAfter = headers.get("x-ratelimit-reset-after");
|
|
164
|
+
if (rateLimitResetAfter) {
|
|
165
|
+
const resetAfterSeconds = Number(rateLimitResetAfter);
|
|
166
|
+
if (Number.isFinite(resetAfterSeconds)) {
|
|
167
|
+
const delay = normalizeDelay(resetAfterSeconds * 1000);
|
|
168
|
+
if (delay !== undefined) {
|
|
169
|
+
return delay;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
125
174
|
// Pattern 1: "Your quota will reset after ..." (formats: "18h31m10s", "10m15s", "6s", "39s")
|
|
126
175
|
const durationMatch = errorText.match(/reset after (?:(\d+)h)?(?:(\d+)m)?(\d+(?:\.\d+)?)s/i);
|
|
127
176
|
if (durationMatch) {
|
|
@@ -130,8 +179,9 @@ function extractRetryDelay(errorText) {
|
|
|
130
179
|
const seconds = parseFloat(durationMatch[3]);
|
|
131
180
|
if (!Number.isNaN(seconds)) {
|
|
132
181
|
const totalMs = ((hours * 60 + minutes) * 60 + seconds) * 1000;
|
|
133
|
-
|
|
134
|
-
|
|
182
|
+
const delay = normalizeDelay(totalMs);
|
|
183
|
+
if (delay !== undefined) {
|
|
184
|
+
return delay;
|
|
135
185
|
}
|
|
136
186
|
}
|
|
137
187
|
}
|
|
@@ -141,7 +191,10 @@ function extractRetryDelay(errorText) {
|
|
|
141
191
|
const value = parseFloat(retryInMatch[1]);
|
|
142
192
|
if (!Number.isNaN(value) && value > 0) {
|
|
143
193
|
const ms = retryInMatch[2].toLowerCase() === "ms" ? value : value * 1000;
|
|
144
|
-
|
|
194
|
+
const delay = normalizeDelay(ms);
|
|
195
|
+
if (delay !== undefined) {
|
|
196
|
+
return delay;
|
|
197
|
+
}
|
|
145
198
|
}
|
|
146
199
|
}
|
|
147
200
|
// Pattern 3: "retryDelay": "34.074824224s" (JSON field in error details)
|
|
@@ -150,19 +203,42 @@ function extractRetryDelay(errorText) {
|
|
|
150
203
|
const value = parseFloat(retryDelayMatch[1]);
|
|
151
204
|
if (!Number.isNaN(value) && value > 0) {
|
|
152
205
|
const ms = retryDelayMatch[2].toLowerCase() === "ms" ? value : value * 1000;
|
|
153
|
-
|
|
206
|
+
const delay = normalizeDelay(ms);
|
|
207
|
+
if (delay !== undefined) {
|
|
208
|
+
return delay;
|
|
209
|
+
}
|
|
154
210
|
}
|
|
155
211
|
}
|
|
156
212
|
return undefined;
|
|
157
213
|
}
|
|
214
|
+
function isClaudeThinkingModel(modelId) {
|
|
215
|
+
const normalized = modelId.toLowerCase();
|
|
216
|
+
return normalized.includes("claude") && normalized.includes("thinking");
|
|
217
|
+
}
|
|
158
218
|
/**
|
|
159
|
-
* Check if an error is retryable (rate limit, server error, etc.)
|
|
219
|
+
* Check if an error is retryable (rate limit, server error, network error, etc.)
|
|
160
220
|
*/
|
|
161
221
|
function isRetryableError(status, errorText) {
|
|
162
222
|
if (status === 429 || status === 500 || status === 502 || status === 503 || status === 504) {
|
|
163
223
|
return true;
|
|
164
224
|
}
|
|
165
|
-
return /resource.?exhausted|rate.?limit|overloaded|service.?unavailable/i.test(errorText);
|
|
225
|
+
return /resource.?exhausted|rate.?limit|overloaded|service.?unavailable|other.?side.?closed/i.test(errorText);
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* Extract a clean, user-friendly error message from Google API error response.
|
|
229
|
+
* Parses JSON error responses and returns just the message field.
|
|
230
|
+
*/
|
|
231
|
+
function extractErrorMessage(errorText) {
|
|
232
|
+
try {
|
|
233
|
+
const parsed = JSON.parse(errorText);
|
|
234
|
+
if (parsed.error?.message) {
|
|
235
|
+
return parsed.error.message;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
catch {
|
|
239
|
+
// Not JSON, return as-is
|
|
240
|
+
}
|
|
241
|
+
return errorText;
|
|
166
242
|
}
|
|
167
243
|
/**
|
|
168
244
|
* Sleep for a given number of milliseconds, respecting abort signal.
|
|
@@ -219,29 +295,34 @@ export const streamGoogleGeminiCli = (model, context, options) => {
|
|
|
219
295
|
if (!accessToken || !projectId) {
|
|
220
296
|
throw new Error("Missing token or projectId in Google Cloud credentials. Use /login to re-authenticate.");
|
|
221
297
|
}
|
|
222
|
-
const
|
|
223
|
-
const
|
|
224
|
-
|
|
225
|
-
const isAntigravity = endpoint.includes("sandbox.googleapis.com");
|
|
298
|
+
const isAntigravity = model.provider === "google-antigravity";
|
|
299
|
+
const baseUrl = model.baseUrl?.trim();
|
|
300
|
+
const endpoints = baseUrl ? [baseUrl] : isAntigravity ? ANTIGRAVITY_ENDPOINT_FALLBACKS : [DEFAULT_ENDPOINT];
|
|
226
301
|
const requestBody = buildRequest(model, context, projectId, options, isAntigravity);
|
|
227
302
|
const headers = isAntigravity ? ANTIGRAVITY_HEADERS : GEMINI_CLI_HEADERS;
|
|
303
|
+
const requestHeaders = {
|
|
304
|
+
Authorization: `Bearer ${accessToken}`,
|
|
305
|
+
"Content-Type": "application/json",
|
|
306
|
+
Accept: "text/event-stream",
|
|
307
|
+
...headers,
|
|
308
|
+
...(isClaudeThinkingModel(model.id) ? { "anthropic-beta": CLAUDE_THINKING_BETA_HEADER } : {}),
|
|
309
|
+
};
|
|
310
|
+
const requestBodyJson = JSON.stringify(requestBody);
|
|
228
311
|
// Fetch with retry logic for rate limits and transient errors
|
|
229
312
|
let response;
|
|
230
313
|
let lastError;
|
|
314
|
+
let requestUrl;
|
|
231
315
|
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
232
316
|
if (options?.signal?.aborted) {
|
|
233
317
|
throw new Error("Request was aborted");
|
|
234
318
|
}
|
|
235
319
|
try {
|
|
236
|
-
|
|
320
|
+
const endpoint = endpoints[Math.min(attempt, endpoints.length - 1)];
|
|
321
|
+
requestUrl = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
|
|
322
|
+
response = await fetch(requestUrl, {
|
|
237
323
|
method: "POST",
|
|
238
|
-
headers:
|
|
239
|
-
|
|
240
|
-
"Content-Type": "application/json",
|
|
241
|
-
Accept: "text/event-stream",
|
|
242
|
-
...headers,
|
|
243
|
-
},
|
|
244
|
-
body: JSON.stringify(requestBody),
|
|
324
|
+
headers: requestHeaders,
|
|
325
|
+
body: requestBodyJson,
|
|
245
326
|
signal: options?.signal,
|
|
246
327
|
});
|
|
247
328
|
if (response.ok) {
|
|
@@ -251,13 +332,13 @@ export const streamGoogleGeminiCli = (model, context, options) => {
|
|
|
251
332
|
// Check if retryable
|
|
252
333
|
if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
|
|
253
334
|
// Use server-provided delay or exponential backoff
|
|
254
|
-
const serverDelay = extractRetryDelay(errorText);
|
|
335
|
+
const serverDelay = extractRetryDelay(errorText, response);
|
|
255
336
|
const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
|
|
256
337
|
await sleep(delayMs, options?.signal);
|
|
257
338
|
continue;
|
|
258
339
|
}
|
|
259
340
|
// Not retryable or max retries exceeded
|
|
260
|
-
throw new Error(`Cloud Code Assist API error (${response.status}): ${errorText}`);
|
|
341
|
+
throw new Error(`Cloud Code Assist API error (${response.status}): ${extractErrorMessage(errorText)}`);
|
|
261
342
|
}
|
|
262
343
|
catch (error) {
|
|
263
344
|
// Check for abort - fetch throws AbortError, our code throws "Request was aborted"
|
|
@@ -266,7 +347,11 @@ export const streamGoogleGeminiCli = (model, context, options) => {
|
|
|
266
347
|
throw new Error("Request was aborted");
|
|
267
348
|
}
|
|
268
349
|
}
|
|
350
|
+
// Extract detailed error message from fetch errors (Node includes cause)
|
|
269
351
|
lastError = error instanceof Error ? error : new Error(String(error));
|
|
352
|
+
if (lastError.message === "fetch failed" && lastError.cause instanceof Error) {
|
|
353
|
+
lastError = new Error(`Network error: ${lastError.cause.message}`);
|
|
354
|
+
}
|
|
270
355
|
// Network errors are retryable
|
|
271
356
|
if (attempt < MAX_RETRIES) {
|
|
272
357
|
const delayMs = BASE_DELAY_MS * 2 ** attempt;
|
|
@@ -279,64 +364,146 @@ export const streamGoogleGeminiCli = (model, context, options) => {
|
|
|
279
364
|
if (!response || !response.ok) {
|
|
280
365
|
throw lastError ?? new Error("Failed to get response after retries");
|
|
281
366
|
}
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
const blockIndex = () => blocks.length - 1;
|
|
289
|
-
// Read SSE stream
|
|
290
|
-
const reader = response.body.getReader();
|
|
291
|
-
const decoder = new TextDecoder();
|
|
292
|
-
let buffer = "";
|
|
293
|
-
// Set up abort handler to cancel reader when signal fires
|
|
294
|
-
const abortHandler = () => {
|
|
295
|
-
void reader.cancel().catch(() => { });
|
|
367
|
+
let started = false;
|
|
368
|
+
const ensureStarted = () => {
|
|
369
|
+
if (!started) {
|
|
370
|
+
stream.push({ type: "start", partial: output });
|
|
371
|
+
started = true;
|
|
372
|
+
}
|
|
296
373
|
};
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
374
|
+
const resetOutput = () => {
|
|
375
|
+
output.content = [];
|
|
376
|
+
output.usage = {
|
|
377
|
+
input: 0,
|
|
378
|
+
output: 0,
|
|
379
|
+
cacheRead: 0,
|
|
380
|
+
cacheWrite: 0,
|
|
381
|
+
totalTokens: 0,
|
|
382
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
383
|
+
};
|
|
384
|
+
output.stopReason = "stop";
|
|
385
|
+
output.errorMessage = undefined;
|
|
386
|
+
output.timestamp = Date.now();
|
|
387
|
+
started = false;
|
|
388
|
+
};
|
|
389
|
+
const streamResponse = async (activeResponse) => {
|
|
390
|
+
if (!activeResponse.body) {
|
|
391
|
+
throw new Error("No response body");
|
|
392
|
+
}
|
|
393
|
+
let hasContent = false;
|
|
394
|
+
let currentBlock = null;
|
|
395
|
+
const blocks = output.content;
|
|
396
|
+
const blockIndex = () => blocks.length - 1;
|
|
397
|
+
// Read SSE stream
|
|
398
|
+
const reader = activeResponse.body.getReader();
|
|
399
|
+
const decoder = new TextDecoder();
|
|
400
|
+
let buffer = "";
|
|
401
|
+
// Set up abort handler to cancel reader when signal fires
|
|
402
|
+
const abortHandler = () => {
|
|
403
|
+
void reader.cancel().catch(() => { });
|
|
404
|
+
};
|
|
405
|
+
options?.signal?.addEventListener("abort", abortHandler);
|
|
406
|
+
try {
|
|
407
|
+
while (true) {
|
|
408
|
+
// Check abort signal before each read
|
|
409
|
+
if (options?.signal?.aborted) {
|
|
410
|
+
throw new Error("Request was aborted");
|
|
322
411
|
}
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
const
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
412
|
+
const { done, value } = await reader.read();
|
|
413
|
+
if (done)
|
|
414
|
+
break;
|
|
415
|
+
buffer += decoder.decode(value, { stream: true });
|
|
416
|
+
const lines = buffer.split("\n");
|
|
417
|
+
buffer = lines.pop() || "";
|
|
418
|
+
for (const line of lines) {
|
|
419
|
+
if (!line.startsWith("data:"))
|
|
420
|
+
continue;
|
|
421
|
+
const jsonStr = line.slice(5).trim();
|
|
422
|
+
if (!jsonStr)
|
|
423
|
+
continue;
|
|
424
|
+
let chunk;
|
|
425
|
+
try {
|
|
426
|
+
chunk = JSON.parse(jsonStr);
|
|
427
|
+
}
|
|
428
|
+
catch {
|
|
429
|
+
continue;
|
|
430
|
+
}
|
|
431
|
+
// Unwrap the response
|
|
432
|
+
const responseData = chunk.response;
|
|
433
|
+
if (!responseData)
|
|
434
|
+
continue;
|
|
435
|
+
const candidate = responseData.candidates?.[0];
|
|
436
|
+
if (candidate?.content?.parts) {
|
|
437
|
+
for (const part of candidate.content.parts) {
|
|
438
|
+
if (part.text !== undefined) {
|
|
439
|
+
hasContent = true;
|
|
440
|
+
const isThinking = isThinkingPart(part);
|
|
441
|
+
if (!currentBlock ||
|
|
442
|
+
(isThinking && currentBlock.type !== "thinking") ||
|
|
443
|
+
(!isThinking && currentBlock.type !== "text")) {
|
|
444
|
+
if (currentBlock) {
|
|
445
|
+
if (currentBlock.type === "text") {
|
|
446
|
+
stream.push({
|
|
447
|
+
type: "text_end",
|
|
448
|
+
contentIndex: blocks.length - 1,
|
|
449
|
+
content: currentBlock.text,
|
|
450
|
+
partial: output,
|
|
451
|
+
});
|
|
452
|
+
}
|
|
453
|
+
else {
|
|
454
|
+
stream.push({
|
|
455
|
+
type: "thinking_end",
|
|
456
|
+
contentIndex: blockIndex(),
|
|
457
|
+
content: currentBlock.thinking,
|
|
458
|
+
partial: output,
|
|
459
|
+
});
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
if (isThinking) {
|
|
463
|
+
currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
|
|
464
|
+
output.content.push(currentBlock);
|
|
465
|
+
ensureStarted();
|
|
466
|
+
stream.push({
|
|
467
|
+
type: "thinking_start",
|
|
468
|
+
contentIndex: blockIndex(),
|
|
469
|
+
partial: output,
|
|
470
|
+
});
|
|
471
|
+
}
|
|
472
|
+
else {
|
|
473
|
+
currentBlock = { type: "text", text: "" };
|
|
474
|
+
output.content.push(currentBlock);
|
|
475
|
+
ensureStarted();
|
|
476
|
+
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
if (currentBlock.type === "thinking") {
|
|
480
|
+
currentBlock.thinking += part.text;
|
|
481
|
+
currentBlock.thinkingSignature = retainThoughtSignature(currentBlock.thinkingSignature, part.thoughtSignature);
|
|
482
|
+
stream.push({
|
|
483
|
+
type: "thinking_delta",
|
|
484
|
+
contentIndex: blockIndex(),
|
|
485
|
+
delta: part.text,
|
|
486
|
+
partial: output,
|
|
487
|
+
});
|
|
488
|
+
}
|
|
489
|
+
else {
|
|
490
|
+
currentBlock.text += part.text;
|
|
491
|
+
currentBlock.textSignature = retainThoughtSignature(currentBlock.textSignature, part.thoughtSignature);
|
|
492
|
+
stream.push({
|
|
493
|
+
type: "text_delta",
|
|
494
|
+
contentIndex: blockIndex(),
|
|
495
|
+
delta: part.text,
|
|
496
|
+
partial: output,
|
|
497
|
+
});
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
if (part.functionCall) {
|
|
501
|
+
hasContent = true;
|
|
335
502
|
if (currentBlock) {
|
|
336
503
|
if (currentBlock.type === "text") {
|
|
337
504
|
stream.push({
|
|
338
505
|
type: "text_end",
|
|
339
|
-
contentIndex:
|
|
506
|
+
contentIndex: blockIndex(),
|
|
340
507
|
content: currentBlock.text,
|
|
341
508
|
partial: output,
|
|
342
509
|
});
|
|
@@ -349,134 +516,127 @@ export const streamGoogleGeminiCli = (model, context, options) => {
|
|
|
349
516
|
partial: output,
|
|
350
517
|
});
|
|
351
518
|
}
|
|
519
|
+
currentBlock = null;
|
|
352
520
|
}
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
output.content.
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
521
|
+
const providedId = part.functionCall.id;
|
|
522
|
+
const needsNewId = !providedId ||
|
|
523
|
+
output.content.some((b) => b.type === "toolCall" && b.id === providedId);
|
|
524
|
+
const toolCallId = needsNewId
|
|
525
|
+
? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
|
|
526
|
+
: providedId;
|
|
527
|
+
const toolCall = {
|
|
528
|
+
type: "toolCall",
|
|
529
|
+
id: toolCallId,
|
|
530
|
+
name: part.functionCall.name || "",
|
|
531
|
+
arguments: part.functionCall.args,
|
|
532
|
+
...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
|
|
533
|
+
};
|
|
534
|
+
output.content.push(toolCall);
|
|
535
|
+
ensureStarted();
|
|
536
|
+
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
|
|
367
537
|
stream.push({
|
|
368
|
-
type: "
|
|
538
|
+
type: "toolcall_delta",
|
|
369
539
|
contentIndex: blockIndex(),
|
|
370
|
-
delta:
|
|
540
|
+
delta: JSON.stringify(toolCall.arguments),
|
|
371
541
|
partial: output,
|
|
372
542
|
});
|
|
373
|
-
}
|
|
374
|
-
else {
|
|
375
|
-
currentBlock.text += part.text;
|
|
376
|
-
currentBlock.textSignature = retainThoughtSignature(currentBlock.textSignature, part.thoughtSignature);
|
|
377
543
|
stream.push({
|
|
378
|
-
type: "
|
|
544
|
+
type: "toolcall_end",
|
|
379
545
|
contentIndex: blockIndex(),
|
|
380
|
-
|
|
546
|
+
toolCall,
|
|
381
547
|
partial: output,
|
|
382
548
|
});
|
|
383
549
|
}
|
|
384
550
|
}
|
|
385
|
-
if (part.functionCall) {
|
|
386
|
-
if (currentBlock) {
|
|
387
|
-
if (currentBlock.type === "text") {
|
|
388
|
-
stream.push({
|
|
389
|
-
type: "text_end",
|
|
390
|
-
contentIndex: blockIndex(),
|
|
391
|
-
content: currentBlock.text,
|
|
392
|
-
partial: output,
|
|
393
|
-
});
|
|
394
|
-
}
|
|
395
|
-
else {
|
|
396
|
-
stream.push({
|
|
397
|
-
type: "thinking_end",
|
|
398
|
-
contentIndex: blockIndex(),
|
|
399
|
-
content: currentBlock.thinking,
|
|
400
|
-
partial: output,
|
|
401
|
-
});
|
|
402
|
-
}
|
|
403
|
-
currentBlock = null;
|
|
404
|
-
}
|
|
405
|
-
const providedId = part.functionCall.id;
|
|
406
|
-
const needsNewId = !providedId || output.content.some((b) => b.type === "toolCall" && b.id === providedId);
|
|
407
|
-
const toolCallId = needsNewId
|
|
408
|
-
? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
|
|
409
|
-
: providedId;
|
|
410
|
-
const toolCall = {
|
|
411
|
-
type: "toolCall",
|
|
412
|
-
id: toolCallId,
|
|
413
|
-
name: part.functionCall.name || "",
|
|
414
|
-
arguments: part.functionCall.args,
|
|
415
|
-
...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
|
|
416
|
-
};
|
|
417
|
-
output.content.push(toolCall);
|
|
418
|
-
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
|
|
419
|
-
stream.push({
|
|
420
|
-
type: "toolcall_delta",
|
|
421
|
-
contentIndex: blockIndex(),
|
|
422
|
-
delta: JSON.stringify(toolCall.arguments),
|
|
423
|
-
partial: output,
|
|
424
|
-
});
|
|
425
|
-
stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
|
|
426
|
-
}
|
|
427
551
|
}
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
552
|
+
if (candidate?.finishReason) {
|
|
553
|
+
output.stopReason = mapStopReasonString(candidate.finishReason);
|
|
554
|
+
if (output.content.some((b) => b.type === "toolCall")) {
|
|
555
|
+
output.stopReason = "toolUse";
|
|
556
|
+
}
|
|
433
557
|
}
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
cacheRead: cacheReadTokens,
|
|
444
|
-
cacheWrite: 0,
|
|
445
|
-
totalTokens: responseData.usageMetadata.totalTokenCount || 0,
|
|
446
|
-
cost: {
|
|
447
|
-
input: 0,
|
|
448
|
-
output: 0,
|
|
449
|
-
cacheRead: 0,
|
|
558
|
+
if (responseData.usageMetadata) {
|
|
559
|
+
// promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
|
|
560
|
+
const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
|
|
561
|
+
const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
|
|
562
|
+
output.usage = {
|
|
563
|
+
input: promptTokens - cacheReadTokens,
|
|
564
|
+
output: (responseData.usageMetadata.candidatesTokenCount || 0) +
|
|
565
|
+
(responseData.usageMetadata.thoughtsTokenCount || 0),
|
|
566
|
+
cacheRead: cacheReadTokens,
|
|
450
567
|
cacheWrite: 0,
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
568
|
+
totalTokens: responseData.usageMetadata.totalTokenCount || 0,
|
|
569
|
+
cost: {
|
|
570
|
+
input: 0,
|
|
571
|
+
output: 0,
|
|
572
|
+
cacheRead: 0,
|
|
573
|
+
cacheWrite: 0,
|
|
574
|
+
total: 0,
|
|
575
|
+
},
|
|
576
|
+
};
|
|
577
|
+
calculateCost(model, output.usage);
|
|
578
|
+
}
|
|
455
579
|
}
|
|
456
580
|
}
|
|
457
581
|
}
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
options?.signal?.removeEventListener("abort", abortHandler);
|
|
461
|
-
}
|
|
462
|
-
if (currentBlock) {
|
|
463
|
-
if (currentBlock.type === "text") {
|
|
464
|
-
stream.push({
|
|
465
|
-
type: "text_end",
|
|
466
|
-
contentIndex: blockIndex(),
|
|
467
|
-
content: currentBlock.text,
|
|
468
|
-
partial: output,
|
|
469
|
-
});
|
|
582
|
+
finally {
|
|
583
|
+
options?.signal?.removeEventListener("abort", abortHandler);
|
|
470
584
|
}
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
585
|
+
if (currentBlock) {
|
|
586
|
+
if (currentBlock.type === "text") {
|
|
587
|
+
stream.push({
|
|
588
|
+
type: "text_end",
|
|
589
|
+
contentIndex: blockIndex(),
|
|
590
|
+
content: currentBlock.text,
|
|
591
|
+
partial: output,
|
|
592
|
+
});
|
|
593
|
+
}
|
|
594
|
+
else {
|
|
595
|
+
stream.push({
|
|
596
|
+
type: "thinking_end",
|
|
597
|
+
contentIndex: blockIndex(),
|
|
598
|
+
content: currentBlock.thinking,
|
|
599
|
+
partial: output,
|
|
600
|
+
});
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
return hasContent;
|
|
604
|
+
};
|
|
605
|
+
let receivedContent = false;
|
|
606
|
+
let currentResponse = response;
|
|
607
|
+
for (let emptyAttempt = 0; emptyAttempt <= MAX_EMPTY_STREAM_RETRIES; emptyAttempt++) {
|
|
608
|
+
if (options?.signal?.aborted) {
|
|
609
|
+
throw new Error("Request was aborted");
|
|
610
|
+
}
|
|
611
|
+
if (emptyAttempt > 0) {
|
|
612
|
+
const backoffMs = EMPTY_STREAM_BASE_DELAY_MS * 2 ** (emptyAttempt - 1);
|
|
613
|
+
await sleep(backoffMs, options?.signal);
|
|
614
|
+
if (!requestUrl) {
|
|
615
|
+
throw new Error("Missing request URL");
|
|
616
|
+
}
|
|
617
|
+
currentResponse = await fetch(requestUrl, {
|
|
618
|
+
method: "POST",
|
|
619
|
+
headers: requestHeaders,
|
|
620
|
+
body: requestBodyJson,
|
|
621
|
+
signal: options?.signal,
|
|
477
622
|
});
|
|
623
|
+
if (!currentResponse.ok) {
|
|
624
|
+
const retryErrorText = await currentResponse.text();
|
|
625
|
+
throw new Error(`Cloud Code Assist API error (${currentResponse.status}): ${retryErrorText}`);
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
const streamed = await streamResponse(currentResponse);
|
|
629
|
+
if (streamed) {
|
|
630
|
+
receivedContent = true;
|
|
631
|
+
break;
|
|
632
|
+
}
|
|
633
|
+
if (emptyAttempt < MAX_EMPTY_STREAM_RETRIES) {
|
|
634
|
+
resetOutput();
|
|
478
635
|
}
|
|
479
636
|
}
|
|
637
|
+
if (!receivedContent) {
|
|
638
|
+
throw new Error("Cloud Code Assist API returned an empty response");
|
|
639
|
+
}
|
|
480
640
|
if (options?.signal?.aborted) {
|
|
481
641
|
throw new Error("Request was aborted");
|
|
482
642
|
}
|
|
@@ -500,7 +660,30 @@ export const streamGoogleGeminiCli = (model, context, options) => {
|
|
|
500
660
|
})();
|
|
501
661
|
return stream;
|
|
502
662
|
};
|
|
503
|
-
function
|
|
663
|
+
function deriveSessionId(context) {
|
|
664
|
+
for (const message of context.messages) {
|
|
665
|
+
if (message.role !== "user") {
|
|
666
|
+
continue;
|
|
667
|
+
}
|
|
668
|
+
let text = "";
|
|
669
|
+
if (typeof message.content === "string") {
|
|
670
|
+
text = message.content;
|
|
671
|
+
}
|
|
672
|
+
else if (Array.isArray(message.content)) {
|
|
673
|
+
text = message.content
|
|
674
|
+
.filter((item) => item.type === "text")
|
|
675
|
+
.map((item) => item.text)
|
|
676
|
+
.join("\n");
|
|
677
|
+
}
|
|
678
|
+
if (!text || text.trim().length === 0) {
|
|
679
|
+
return undefined;
|
|
680
|
+
}
|
|
681
|
+
const hash = createHash("sha256").update(text).digest("hex");
|
|
682
|
+
return hash.slice(0, 32);
|
|
683
|
+
}
|
|
684
|
+
return undefined;
|
|
685
|
+
}
|
|
686
|
+
export function buildRequest(model, context, projectId, options = {}, isAntigravity = false) {
|
|
504
687
|
const contents = convertMessages(model, context);
|
|
505
688
|
const generationConfig = {};
|
|
506
689
|
if (options.temperature !== undefined) {
|
|
@@ -526,6 +709,10 @@ function buildRequest(model, context, projectId, options = {}, isAntigravity = f
|
|
|
526
709
|
const request = {
|
|
527
710
|
contents,
|
|
528
711
|
};
|
|
712
|
+
const sessionId = deriveSessionId(context);
|
|
713
|
+
if (sessionId) {
|
|
714
|
+
request.sessionId = sessionId;
|
|
715
|
+
}
|
|
529
716
|
// System instruction must be object with parts, not plain string
|
|
530
717
|
if (context.systemPrompt) {
|
|
531
718
|
request.systemInstruction = {
|