@oh-my-pi/pi-ai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,603 @@
1
+ /**
2
+ * Google Gemini CLI / Antigravity provider.
3
+ * Shared implementation for both google-gemini-cli and google-antigravity providers.
4
+ * Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.
5
+ */
6
+
7
+ import type { Content, ThinkingConfig } from "@google/genai";
8
+ import { calculateCost } from "../models";
9
+ import type {
10
+ Api,
11
+ AssistantMessage,
12
+ Context,
13
+ Model,
14
+ StreamFunction,
15
+ StreamOptions,
16
+ TextContent,
17
+ ThinkingContent,
18
+ ToolCall,
19
+ } from "../types";
20
+ import { AssistantMessageEventStream } from "../utils/event-stream";
21
+ import { sanitizeSurrogates } from "../utils/sanitize-unicode";
22
+ import { convertMessages, convertTools, mapStopReasonString, mapToolChoice } from "./google-shared";
23
+
24
+ /**
25
+ * Thinking level for Gemini 3 models.
26
+ * Mirrors Google's ThinkingLevel enum values.
27
+ */
28
+ export type GoogleThinkingLevel = "THINKING_LEVEL_UNSPECIFIED" | "MINIMAL" | "LOW" | "MEDIUM" | "HIGH";
29
+
30
+ export interface GoogleGeminiCliOptions extends StreamOptions {
31
+ toolChoice?: "auto" | "none" | "any";
32
+ /**
33
+ * Thinking/reasoning configuration.
34
+ * - Gemini 2.x models: use `budgetTokens` to set the thinking budget
35
+ * - Gemini 3 models (gemini-3-pro-*, gemini-3-flash-*): use `level` instead
36
+ *
37
+ * When using `streamSimple`, this is handled automatically based on the model.
38
+ */
39
+ thinking?: {
40
+ enabled: boolean;
41
+ /** Thinking budget in tokens. Use for Gemini 2.x models. */
42
+ budgetTokens?: number;
43
+ /** Thinking level. Use for Gemini 3 models (LOW/HIGH for Pro, MINIMAL/LOW/MEDIUM/HIGH for Flash). */
44
+ level?: GoogleThinkingLevel;
45
+ };
46
+ projectId?: string;
47
+ }
48
+
49
+ const DEFAULT_ENDPOINT = "https://cloudcode-pa.googleapis.com";
50
+ // Headers for Gemini CLI (prod endpoint)
51
+ const GEMINI_CLI_HEADERS = {
52
+ "User-Agent": "google-cloud-sdk vscode_cloudshelleditor/0.1",
53
+ "X-Goog-Api-Client": "gl-node/22.17.0",
54
+ "Client-Metadata": JSON.stringify({
55
+ ideType: "IDE_UNSPECIFIED",
56
+ platform: "PLATFORM_UNSPECIFIED",
57
+ pluginType: "GEMINI",
58
+ }),
59
+ };
60
+
61
+ // Headers for Antigravity (sandbox endpoint) - requires specific User-Agent
62
+ const ANTIGRAVITY_HEADERS = {
63
+ "User-Agent": "antigravity/1.11.5 darwin/arm64",
64
+ "X-Goog-Api-Client": "google-cloud-sdk vscode_cloudshelleditor/0.1",
65
+ "Client-Metadata": JSON.stringify({
66
+ ideType: "IDE_UNSPECIFIED",
67
+ platform: "PLATFORM_UNSPECIFIED",
68
+ pluginType: "GEMINI",
69
+ }),
70
+ };
71
+
72
+ // Counter for generating unique tool call IDs
73
+ let toolCallCounter = 0;
74
+
75
+ // Retry configuration
76
+ const MAX_RETRIES = 3;
77
+ const BASE_DELAY_MS = 1000;
78
+
79
+ /**
80
+ * Extract retry delay from Gemini error response (in milliseconds).
81
+ * Parses patterns like:
82
+ * - "Your quota will reset after 39s"
83
+ * - "Your quota will reset after 18h31m10s"
84
+ * - "Please retry in Xs" or "Please retry in Xms"
85
+ * - "retryDelay": "34.074824224s" (JSON field)
86
+ */
87
+ function extractRetryDelay(errorText: string): number | undefined {
88
+ // Pattern 1: "Your quota will reset after ..." (formats: "18h31m10s", "10m15s", "6s", "39s")
89
+ const durationMatch = errorText.match(/reset after (?:(\d+)h)?(?:(\d+)m)?(\d+(?:\.\d+)?)s/i);
90
+ if (durationMatch) {
91
+ const hours = durationMatch[1] ? parseInt(durationMatch[1], 10) : 0;
92
+ const minutes = durationMatch[2] ? parseInt(durationMatch[2], 10) : 0;
93
+ const seconds = parseFloat(durationMatch[3]);
94
+ if (!Number.isNaN(seconds)) {
95
+ const totalMs = ((hours * 60 + minutes) * 60 + seconds) * 1000;
96
+ if (totalMs > 0) {
97
+ return Math.ceil(totalMs + 1000); // Add 1s buffer
98
+ }
99
+ }
100
+ }
101
+
102
+ // Pattern 2: "Please retry in X[ms|s]"
103
+ const retryInMatch = errorText.match(/Please retry in ([0-9.]+)(ms|s)/i);
104
+ if (retryInMatch?.[1]) {
105
+ const value = parseFloat(retryInMatch[1]);
106
+ if (!Number.isNaN(value) && value > 0) {
107
+ const ms = retryInMatch[2].toLowerCase() === "ms" ? value : value * 1000;
108
+ return Math.ceil(ms + 1000);
109
+ }
110
+ }
111
+
112
+ // Pattern 3: "retryDelay": "34.074824224s" (JSON field in error details)
113
+ const retryDelayMatch = errorText.match(/"retryDelay":\s*"([0-9.]+)(ms|s)"/i);
114
+ if (retryDelayMatch?.[1]) {
115
+ const value = parseFloat(retryDelayMatch[1]);
116
+ if (!Number.isNaN(value) && value > 0) {
117
+ const ms = retryDelayMatch[2].toLowerCase() === "ms" ? value : value * 1000;
118
+ return Math.ceil(ms + 1000);
119
+ }
120
+ }
121
+
122
+ return undefined;
123
+ }
124
+
125
+ /**
126
+ * Check if an error is retryable (rate limit, server error, etc.)
127
+ */
128
+ function isRetryableError(status: number, errorText: string): boolean {
129
+ if (status === 429 || status === 500 || status === 502 || status === 503 || status === 504) {
130
+ return true;
131
+ }
132
+ return /resource.?exhausted|rate.?limit|overloaded|service.?unavailable/i.test(errorText);
133
+ }
134
+
135
+ /**
136
+ * Sleep for a given number of milliseconds, respecting abort signal.
137
+ */
138
+ function sleep(ms: number, signal?: AbortSignal): Promise<void> {
139
+ return new Promise((resolve, reject) => {
140
+ if (signal?.aborted) {
141
+ reject(new Error("Request was aborted"));
142
+ return;
143
+ }
144
+ const timeout = setTimeout(resolve, ms);
145
+ signal?.addEventListener("abort", () => {
146
+ clearTimeout(timeout);
147
+ reject(new Error("Request was aborted"));
148
+ });
149
+ });
150
+ }
151
+
152
+ interface CloudCodeAssistRequest {
153
+ project: string;
154
+ model: string;
155
+ request: {
156
+ contents: Content[];
157
+ systemInstruction?: { parts: { text: string }[] };
158
+ generationConfig?: {
159
+ maxOutputTokens?: number;
160
+ temperature?: number;
161
+ thinkingConfig?: ThinkingConfig;
162
+ };
163
+ tools?: ReturnType<typeof convertTools>;
164
+ toolConfig?: {
165
+ functionCallingConfig: {
166
+ mode: ReturnType<typeof mapToolChoice>;
167
+ };
168
+ };
169
+ };
170
+ userAgent?: string;
171
+ requestId?: string;
172
+ }
173
+
174
+ interface CloudCodeAssistResponseChunk {
175
+ response?: {
176
+ candidates?: Array<{
177
+ content?: {
178
+ role: string;
179
+ parts?: Array<{
180
+ text?: string;
181
+ thought?: boolean;
182
+ thoughtSignature?: string;
183
+ functionCall?: {
184
+ name: string;
185
+ args: Record<string, unknown>;
186
+ id?: string;
187
+ };
188
+ }>;
189
+ };
190
+ finishReason?: string;
191
+ }>;
192
+ usageMetadata?: {
193
+ promptTokenCount?: number;
194
+ candidatesTokenCount?: number;
195
+ thoughtsTokenCount?: number;
196
+ totalTokenCount?: number;
197
+ cachedContentTokenCount?: number;
198
+ };
199
+ modelVersion?: string;
200
+ responseId?: string;
201
+ };
202
+ traceId?: string;
203
+ }
204
+
205
+ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
206
+ model: Model<"google-gemini-cli">,
207
+ context: Context,
208
+ options?: GoogleGeminiCliOptions,
209
+ ): AssistantMessageEventStream => {
210
+ const stream = new AssistantMessageEventStream();
211
+
212
+ (async () => {
213
+ const output: AssistantMessage = {
214
+ role: "assistant",
215
+ content: [],
216
+ api: "google-gemini-cli" as Api,
217
+ provider: model.provider,
218
+ model: model.id,
219
+ usage: {
220
+ input: 0,
221
+ output: 0,
222
+ cacheRead: 0,
223
+ cacheWrite: 0,
224
+ totalTokens: 0,
225
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
226
+ },
227
+ stopReason: "stop",
228
+ timestamp: Date.now(),
229
+ };
230
+
231
+ try {
232
+ // apiKey is JSON-encoded: { token, projectId }
233
+ const apiKeyRaw = options?.apiKey;
234
+ if (!apiKeyRaw) {
235
+ throw new Error("Google Cloud Code Assist requires OAuth authentication. Use /login to authenticate.");
236
+ }
237
+
238
+ let accessToken: string;
239
+ let projectId: string;
240
+
241
+ try {
242
+ const parsed = JSON.parse(apiKeyRaw) as { token: string; projectId: string };
243
+ accessToken = parsed.token;
244
+ projectId = parsed.projectId;
245
+ } catch {
246
+ throw new Error("Invalid Google Cloud Code Assist credentials. Use /login to re-authenticate.");
247
+ }
248
+
249
+ if (!accessToken || !projectId) {
250
+ throw new Error("Missing token or projectId in Google Cloud credentials. Use /login to re-authenticate.");
251
+ }
252
+
253
+ const requestBody = buildRequest(model, context, projectId, options);
254
+ const endpoint = model.baseUrl || DEFAULT_ENDPOINT;
255
+ const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
256
+
257
+ // Use Antigravity headers for sandbox endpoint, otherwise Gemini CLI headers
258
+ const isAntigravity = endpoint.includes("sandbox.googleapis.com");
259
+ const headers = isAntigravity ? ANTIGRAVITY_HEADERS : GEMINI_CLI_HEADERS;
260
+
261
+ // Fetch with retry logic for rate limits and transient errors
262
+ let response: Response | undefined;
263
+ let lastError: Error | undefined;
264
+
265
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
266
+ if (options?.signal?.aborted) {
267
+ throw new Error("Request was aborted");
268
+ }
269
+
270
+ try {
271
+ response = await fetch(url, {
272
+ method: "POST",
273
+ headers: {
274
+ Authorization: `Bearer ${accessToken}`,
275
+ "Content-Type": "application/json",
276
+ Accept: "text/event-stream",
277
+ ...headers,
278
+ },
279
+ body: JSON.stringify(requestBody),
280
+ signal: options?.signal,
281
+ });
282
+
283
+ if (response.ok) {
284
+ break; // Success, exit retry loop
285
+ }
286
+
287
+ const errorText = await response.text();
288
+
289
+ // Check if retryable
290
+ if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
291
+ // Use server-provided delay or exponential backoff
292
+ const serverDelay = extractRetryDelay(errorText);
293
+ const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
294
+ await sleep(delayMs, options?.signal);
295
+ continue;
296
+ }
297
+
298
+ // Not retryable or max retries exceeded
299
+ throw new Error(`Cloud Code Assist API error (${response.status}): ${errorText}`);
300
+ } catch (error) {
301
+ if (error instanceof Error && error.message === "Request was aborted") {
302
+ throw error;
303
+ }
304
+ lastError = error instanceof Error ? error : new Error(String(error));
305
+ // Network errors are retryable
306
+ if (attempt < MAX_RETRIES) {
307
+ const delayMs = BASE_DELAY_MS * 2 ** attempt;
308
+ await sleep(delayMs, options?.signal);
309
+ continue;
310
+ }
311
+ throw lastError;
312
+ }
313
+ }
314
+
315
+ if (!response || !response.ok) {
316
+ throw lastError ?? new Error("Failed to get response after retries");
317
+ }
318
+
319
+ if (!response.body) {
320
+ throw new Error("No response body");
321
+ }
322
+
323
+ stream.push({ type: "start", partial: output });
324
+
325
+ let currentBlock: TextContent | ThinkingContent | null = null;
326
+ const blocks = output.content;
327
+ const blockIndex = () => blocks.length - 1;
328
+
329
+ // Read SSE stream
330
+ const reader = response.body.getReader();
331
+ const decoder = new TextDecoder();
332
+ let buffer = "";
333
+
334
+ while (true) {
335
+ const { done, value } = await reader.read();
336
+ if (done) break;
337
+
338
+ buffer += decoder.decode(value, { stream: true });
339
+ const lines = buffer.split("\n");
340
+ buffer = lines.pop() || "";
341
+
342
+ for (const line of lines) {
343
+ if (!line.startsWith("data:")) continue;
344
+
345
+ const jsonStr = line.slice(5).trim();
346
+ if (!jsonStr) continue;
347
+
348
+ let chunk: CloudCodeAssistResponseChunk;
349
+ try {
350
+ chunk = JSON.parse(jsonStr);
351
+ } catch {
352
+ continue;
353
+ }
354
+
355
+ // Unwrap the response
356
+ const responseData = chunk.response;
357
+ if (!responseData) continue;
358
+
359
+ const candidate = responseData.candidates?.[0];
360
+ if (candidate?.content?.parts) {
361
+ for (const part of candidate.content.parts) {
362
+ if (part.text !== undefined) {
363
+ const isThinking = part.thought === true;
364
+ if (
365
+ !currentBlock ||
366
+ (isThinking && currentBlock.type !== "thinking") ||
367
+ (!isThinking && currentBlock.type !== "text")
368
+ ) {
369
+ if (currentBlock) {
370
+ if (currentBlock.type === "text") {
371
+ stream.push({
372
+ type: "text_end",
373
+ contentIndex: blocks.length - 1,
374
+ content: currentBlock.text,
375
+ partial: output,
376
+ });
377
+ } else {
378
+ stream.push({
379
+ type: "thinking_end",
380
+ contentIndex: blockIndex(),
381
+ content: currentBlock.thinking,
382
+ partial: output,
383
+ });
384
+ }
385
+ }
386
+ if (isThinking) {
387
+ currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
388
+ output.content.push(currentBlock);
389
+ stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
390
+ } else {
391
+ currentBlock = { type: "text", text: "" };
392
+ output.content.push(currentBlock);
393
+ stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
394
+ }
395
+ }
396
+ if (currentBlock.type === "thinking") {
397
+ currentBlock.thinking += part.text;
398
+ currentBlock.thinkingSignature = part.thoughtSignature;
399
+ stream.push({
400
+ type: "thinking_delta",
401
+ contentIndex: blockIndex(),
402
+ delta: part.text,
403
+ partial: output,
404
+ });
405
+ } else {
406
+ currentBlock.text += part.text;
407
+ stream.push({
408
+ type: "text_delta",
409
+ contentIndex: blockIndex(),
410
+ delta: part.text,
411
+ partial: output,
412
+ });
413
+ }
414
+ }
415
+
416
+ if (part.functionCall) {
417
+ if (currentBlock) {
418
+ if (currentBlock.type === "text") {
419
+ stream.push({
420
+ type: "text_end",
421
+ contentIndex: blockIndex(),
422
+ content: currentBlock.text,
423
+ partial: output,
424
+ });
425
+ } else {
426
+ stream.push({
427
+ type: "thinking_end",
428
+ contentIndex: blockIndex(),
429
+ content: currentBlock.thinking,
430
+ partial: output,
431
+ });
432
+ }
433
+ currentBlock = null;
434
+ }
435
+
436
+ const providedId = part.functionCall.id;
437
+ const needsNewId =
438
+ !providedId || output.content.some((b) => b.type === "toolCall" && b.id === providedId);
439
+ const toolCallId = needsNewId
440
+ ? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
441
+ : providedId;
442
+
443
+ const toolCall: ToolCall = {
444
+ type: "toolCall",
445
+ id: toolCallId,
446
+ name: part.functionCall.name || "",
447
+ arguments: part.functionCall.args as Record<string, unknown>,
448
+ ...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
449
+ };
450
+
451
+ output.content.push(toolCall);
452
+ stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
453
+ stream.push({
454
+ type: "toolcall_delta",
455
+ contentIndex: blockIndex(),
456
+ delta: JSON.stringify(toolCall.arguments),
457
+ partial: output,
458
+ });
459
+ stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
460
+ }
461
+ }
462
+ }
463
+
464
+ if (candidate?.finishReason) {
465
+ output.stopReason = mapStopReasonString(candidate.finishReason);
466
+ if (output.content.some((b) => b.type === "toolCall")) {
467
+ output.stopReason = "toolUse";
468
+ }
469
+ }
470
+
471
+ if (responseData.usageMetadata) {
472
+ // promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
473
+ const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
474
+ const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
475
+ output.usage = {
476
+ input: promptTokens - cacheReadTokens,
477
+ output:
478
+ (responseData.usageMetadata.candidatesTokenCount || 0) +
479
+ (responseData.usageMetadata.thoughtsTokenCount || 0),
480
+ cacheRead: cacheReadTokens,
481
+ cacheWrite: 0,
482
+ totalTokens: responseData.usageMetadata.totalTokenCount || 0,
483
+ cost: {
484
+ input: 0,
485
+ output: 0,
486
+ cacheRead: 0,
487
+ cacheWrite: 0,
488
+ total: 0,
489
+ },
490
+ };
491
+ calculateCost(model, output.usage);
492
+ }
493
+ }
494
+ }
495
+
496
+ if (currentBlock) {
497
+ if (currentBlock.type === "text") {
498
+ stream.push({
499
+ type: "text_end",
500
+ contentIndex: blockIndex(),
501
+ content: currentBlock.text,
502
+ partial: output,
503
+ });
504
+ } else {
505
+ stream.push({
506
+ type: "thinking_end",
507
+ contentIndex: blockIndex(),
508
+ content: currentBlock.thinking,
509
+ partial: output,
510
+ });
511
+ }
512
+ }
513
+
514
+ if (options?.signal?.aborted) {
515
+ throw new Error("Request was aborted");
516
+ }
517
+
518
+ if (output.stopReason === "aborted" || output.stopReason === "error") {
519
+ throw new Error("An unknown error occurred");
520
+ }
521
+
522
+ stream.push({ type: "done", reason: output.stopReason, message: output });
523
+ stream.end();
524
+ } catch (error) {
525
+ for (const block of output.content) {
526
+ if ("index" in block) {
527
+ delete (block as { index?: number }).index;
528
+ }
529
+ }
530
+ output.stopReason = options?.signal?.aborted ? "aborted" : "error";
531
+ output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
532
+ stream.push({ type: "error", reason: output.stopReason, error: output });
533
+ stream.end();
534
+ }
535
+ })();
536
+
537
+ return stream;
538
+ };
539
+
540
+ function buildRequest(
541
+ model: Model<"google-gemini-cli">,
542
+ context: Context,
543
+ projectId: string,
544
+ options: GoogleGeminiCliOptions = {},
545
+ ): CloudCodeAssistRequest {
546
+ const contents = convertMessages(model, context);
547
+
548
+ const generationConfig: CloudCodeAssistRequest["request"]["generationConfig"] = {};
549
+ if (options.temperature !== undefined) {
550
+ generationConfig.temperature = options.temperature;
551
+ }
552
+ if (options.maxTokens !== undefined) {
553
+ generationConfig.maxOutputTokens = options.maxTokens;
554
+ }
555
+
556
+ // Thinking config
557
+ if (options.thinking?.enabled && model.reasoning) {
558
+ generationConfig.thinkingConfig = {
559
+ includeThoughts: true,
560
+ };
561
+ // Gemini 3 models use thinkingLevel, older models use thinkingBudget
562
+ if (options.thinking.level !== undefined) {
563
+ // Cast to any since our GoogleThinkingLevel mirrors Google's ThinkingLevel enum values
564
+ generationConfig.thinkingConfig.thinkingLevel = options.thinking.level as any;
565
+ } else if (options.thinking.budgetTokens !== undefined) {
566
+ generationConfig.thinkingConfig.thinkingBudget = options.thinking.budgetTokens;
567
+ }
568
+ }
569
+
570
+ const request: CloudCodeAssistRequest["request"] = {
571
+ contents,
572
+ };
573
+
574
+ // System instruction must be object with parts, not plain string
575
+ if (context.systemPrompt) {
576
+ request.systemInstruction = {
577
+ parts: [{ text: sanitizeSurrogates(context.systemPrompt) }],
578
+ };
579
+ }
580
+
581
+ if (Object.keys(generationConfig).length > 0) {
582
+ request.generationConfig = generationConfig;
583
+ }
584
+
585
+ if (context.tools && context.tools.length > 0) {
586
+ request.tools = convertTools(context.tools);
587
+ if (options.toolChoice) {
588
+ request.toolConfig = {
589
+ functionCallingConfig: {
590
+ mode: mapToolChoice(options.toolChoice),
591
+ },
592
+ };
593
+ }
594
+ }
595
+
596
+ return {
597
+ project: projectId,
598
+ model: model.id,
599
+ request,
600
+ userAgent: "pi-coding-agent",
601
+ requestId: `pi-${Date.now()}-${Math.random().toString(36).slice(2, 11)}`,
602
+ };
603
+ }