npm - @oh-my-pi/pi-ai - Versions diffs - 0.1.0 - Mend

@oh-my-pi/pi-ai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +959 -0
package/package.json +60 -0
package/src/cli.ts +173 -0
package/src/index.ts +13 -0
package/src/models.generated.ts +7105 -0
package/src/models.ts +68 -0
package/src/providers/anthropic.ts +587 -0
package/src/providers/google-gemini-cli.ts +603 -0
package/src/providers/google-shared.ts +227 -0
package/src/providers/google.ts +324 -0
package/src/providers/openai-completions.ts +676 -0
package/src/providers/openai-responses.ts +569 -0
package/src/providers/transorm-messages.ts +143 -0
package/src/stream.ts +340 -0
package/src/types.ts +218 -0
package/src/utils/event-stream.ts +82 -0
package/src/utils/json-parse.ts +28 -0
package/src/utils/oauth/anthropic.ts +118 -0
package/src/utils/oauth/github-copilot.ts +311 -0
package/src/utils/oauth/google-antigravity.ts +322 -0
package/src/utils/oauth/google-gemini-cli.ts +353 -0
package/src/utils/oauth/index.ts +143 -0
package/src/utils/oauth/pkce.ts +34 -0
package/src/utils/oauth/types.ts +27 -0
package/src/utils/overflow.ts +115 -0
package/src/utils/sanitize-unicode.ts +25 -0
package/src/utils/typebox-helpers.ts +24 -0
package/src/utils/validation.ts +80 -0

package/src/providers/google-gemini-cli.ts ADDED Viewed

@@ -0,0 +1,603 @@
+/**
+ * Google Gemini CLI / Antigravity provider.
+ * Shared implementation for both google-gemini-cli and google-antigravity providers.
+ * Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.
+ */
+import type { Content, ThinkingConfig } from "@google/genai";
+import { calculateCost } from "../models";
+import type {
+	Api,
+	AssistantMessage,
+	Context,
+	Model,
+	StreamFunction,
+	StreamOptions,
+	TextContent,
+	ThinkingContent,
+	ToolCall,
+} from "../types";
+import { AssistantMessageEventStream } from "../utils/event-stream";
+import { sanitizeSurrogates } from "../utils/sanitize-unicode";
+import { convertMessages, convertTools, mapStopReasonString, mapToolChoice } from "./google-shared";
+/**
+ * Thinking level for Gemini 3 models.
+ * Mirrors Google's ThinkingLevel enum values.
+ */
+export type GoogleThinkingLevel = "THINKING_LEVEL_UNSPECIFIED" | "MINIMAL" | "LOW" | "MEDIUM" | "HIGH";
+export interface GoogleGeminiCliOptions extends StreamOptions {
+	toolChoice?: "auto" | "none" | "any";
+	/**
+	 * Thinking/reasoning configuration.
+	 * - Gemini 2.x models: use `budgetTokens` to set the thinking budget
+	 * - Gemini 3 models (gemini-3-pro-*, gemini-3-flash-*): use `level` instead
+	 *
+	 * When using `streamSimple`, this is handled automatically based on the model.
+	 */
+	thinking?: {
+		enabled: boolean;
+		/** Thinking budget in tokens. Use for Gemini 2.x models. */
+		budgetTokens?: number;
+		/** Thinking level. Use for Gemini 3 models (LOW/HIGH for Pro, MINIMAL/LOW/MEDIUM/HIGH for Flash). */
+		level?: GoogleThinkingLevel;
+	};
+	projectId?: string;
+}
+const DEFAULT_ENDPOINT = "https://cloudcode-pa.googleapis.com";
+// Headers for Gemini CLI (prod endpoint)
+const GEMINI_CLI_HEADERS = {
+	"User-Agent": "google-cloud-sdk vscode_cloudshelleditor/0.1",
+	"X-Goog-Api-Client": "gl-node/22.17.0",
+	"Client-Metadata": JSON.stringify({
+		ideType: "IDE_UNSPECIFIED",
+		platform: "PLATFORM_UNSPECIFIED",
+		pluginType: "GEMINI",
+	}),
+};
+// Headers for Antigravity (sandbox endpoint) - requires specific User-Agent
+const ANTIGRAVITY_HEADERS = {
+	"User-Agent": "antigravity/1.11.5 darwin/arm64",
+	"X-Goog-Api-Client": "google-cloud-sdk vscode_cloudshelleditor/0.1",
+	"Client-Metadata": JSON.stringify({
+		ideType: "IDE_UNSPECIFIED",
+		platform: "PLATFORM_UNSPECIFIED",
+		pluginType: "GEMINI",
+	}),
+};
+// Counter for generating unique tool call IDs
+let toolCallCounter = 0;
+// Retry configuration
+const MAX_RETRIES = 3;
+const BASE_DELAY_MS = 1000;
+/**
+ * Extract retry delay from Gemini error response (in milliseconds).
+ * Parses patterns like:
+ * - "Your quota will reset after 39s"
+ * - "Your quota will reset after 18h31m10s"
+ * - "Please retry in Xs" or "Please retry in Xms"
+ * - "retryDelay": "34.074824224s" (JSON field)
+ */
+function extractRetryDelay(errorText: string): number | undefined {
+	// Pattern 1: "Your quota will reset after ..." (formats: "18h31m10s", "10m15s", "6s", "39s")
+	const durationMatch = errorText.match(/reset after (?:(\d+)h)?(?:(\d+)m)?(\d+(?:\.\d+)?)s/i);
+	if (durationMatch) {
+		const hours = durationMatch[1] ? parseInt(durationMatch[1], 10) : 0;
+		const minutes = durationMatch[2] ? parseInt(durationMatch[2], 10) : 0;
+		const seconds = parseFloat(durationMatch[3]);
+		if (!Number.isNaN(seconds)) {
+			const totalMs = ((hours * 60 + minutes) * 60 + seconds) * 1000;
+			if (totalMs > 0) {
+				return Math.ceil(totalMs + 1000); // Add 1s buffer
+			}
+		}
+	}
+	// Pattern 2: "Please retry in X[ms|s]"
+	const retryInMatch = errorText.match(/Please retry in ([0-9.]+)(ms|s)/i);
+	if (retryInMatch?.[1]) {
+		const value = parseFloat(retryInMatch[1]);
+		if (!Number.isNaN(value) && value > 0) {
+			const ms = retryInMatch[2].toLowerCase() === "ms" ? value : value * 1000;
+			return Math.ceil(ms + 1000);
+		}
+	}
+	// Pattern 3: "retryDelay": "34.074824224s" (JSON field in error details)
+	const retryDelayMatch = errorText.match(/"retryDelay":\s*"([0-9.]+)(ms|s)"/i);
+	if (retryDelayMatch?.[1]) {
+		const value = parseFloat(retryDelayMatch[1]);
+		if (!Number.isNaN(value) && value > 0) {
+			const ms = retryDelayMatch[2].toLowerCase() === "ms" ? value : value * 1000;
+			return Math.ceil(ms + 1000);
+		}
+	}
+	return undefined;
+}
+/**
+ * Check if an error is retryable (rate limit, server error, etc.)
+ */
+function isRetryableError(status: number, errorText: string): boolean {
+	if (status === 429 || status === 500 || status === 502 || status === 503 || status === 504) {
+		return true;
+	}
+	return /resource.?exhausted|rate.?limit|overloaded|service.?unavailable/i.test(errorText);
+}
+/**
+ * Sleep for a given number of milliseconds, respecting abort signal.
+ */
+function sleep(ms: number, signal?: AbortSignal): Promise<void> {
+	return new Promise((resolve, reject) => {
+		if (signal?.aborted) {
+			reject(new Error("Request was aborted"));
+			return;
+		}
+		const timeout = setTimeout(resolve, ms);
+		signal?.addEventListener("abort", () => {
+			clearTimeout(timeout);
+			reject(new Error("Request was aborted"));
+		});
+	});
+}
+interface CloudCodeAssistRequest {
+	project: string;
+	model: string;
+	request: {
+		contents: Content[];
+		systemInstruction?: { parts: { text: string }[] };
+		generationConfig?: {
+			maxOutputTokens?: number;
+			temperature?: number;
+			thinkingConfig?: ThinkingConfig;
+		};
+		tools?: ReturnType<typeof convertTools>;
+		toolConfig?: {
+			functionCallingConfig: {
+				mode: ReturnType<typeof mapToolChoice>;
+			};
+		};
+	};
+	userAgent?: string;
+	requestId?: string;
+}
+interface CloudCodeAssistResponseChunk {
+	response?: {
+		candidates?: Array<{
+			content?: {
+				role: string;
+				parts?: Array<{
+					text?: string;
+					thought?: boolean;
+					thoughtSignature?: string;
+					functionCall?: {
+						name: string;
+						args: Record<string, unknown>;
+						id?: string;
+					};
+				}>;
+			};
+			finishReason?: string;
+		}>;
+		usageMetadata?: {
+			promptTokenCount?: number;
+			candidatesTokenCount?: number;
+			thoughtsTokenCount?: number;
+			totalTokenCount?: number;
+			cachedContentTokenCount?: number;
+		};
+		modelVersion?: string;
+		responseId?: string;
+	};
+	traceId?: string;
+}
+export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
+	model: Model<"google-gemini-cli">,
+	context: Context,
+	options?: GoogleGeminiCliOptions,
+): AssistantMessageEventStream => {
+	const stream = new AssistantMessageEventStream();
+	(async () => {
+		const output: AssistantMessage = {
+			role: "assistant",
+			content: [],
+			api: "google-gemini-cli" as Api,
+			provider: model.provider,
+			model: model.id,
+			usage: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+				totalTokens: 0,
+				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+			},
+			stopReason: "stop",
+			timestamp: Date.now(),
+		};
+		try {
+			// apiKey is JSON-encoded: { token, projectId }
+			const apiKeyRaw = options?.apiKey;
+			if (!apiKeyRaw) {
+				throw new Error("Google Cloud Code Assist requires OAuth authentication. Use /login to authenticate.");
+			}
+			let accessToken: string;
+			let projectId: string;
+			try {
+				const parsed = JSON.parse(apiKeyRaw) as { token: string; projectId: string };
+				accessToken = parsed.token;
+				projectId = parsed.projectId;
+			} catch {
+				throw new Error("Invalid Google Cloud Code Assist credentials. Use /login to re-authenticate.");
+			}
+			if (!accessToken || !projectId) {
+				throw new Error("Missing token or projectId in Google Cloud credentials. Use /login to re-authenticate.");
+			}
+			const requestBody = buildRequest(model, context, projectId, options);
+			const endpoint = model.baseUrl || DEFAULT_ENDPOINT;
+			const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
+			// Use Antigravity headers for sandbox endpoint, otherwise Gemini CLI headers
+			const isAntigravity = endpoint.includes("sandbox.googleapis.com");
+			const headers = isAntigravity ? ANTIGRAVITY_HEADERS : GEMINI_CLI_HEADERS;
+			// Fetch with retry logic for rate limits and transient errors
+			let response: Response | undefined;
+			let lastError: Error | undefined;
+			for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
+				if (options?.signal?.aborted) {
+					throw new Error("Request was aborted");
+				}
+				try {
+					response = await fetch(url, {
+						method: "POST",
+						headers: {
+							Authorization: `Bearer ${accessToken}`,
+							"Content-Type": "application/json",
+							Accept: "text/event-stream",
+							...headers,
+						},
+						body: JSON.stringify(requestBody),
+						signal: options?.signal,
+					});
+					if (response.ok) {
+						break; // Success, exit retry loop
+					}
+					const errorText = await response.text();
+					// Check if retryable
+					if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
+						// Use server-provided delay or exponential backoff
+						const serverDelay = extractRetryDelay(errorText);
+						const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
+						await sleep(delayMs, options?.signal);
+						continue;
+					}
+					// Not retryable or max retries exceeded
+					throw new Error(`Cloud Code Assist API error (${response.status}): ${errorText}`);
+				} catch (error) {
+					if (error instanceof Error && error.message === "Request was aborted") {
+						throw error;
+					}
+					lastError = error instanceof Error ? error : new Error(String(error));
+					// Network errors are retryable
+					if (attempt < MAX_RETRIES) {
+						const delayMs = BASE_DELAY_MS * 2 ** attempt;
+						await sleep(delayMs, options?.signal);
+						continue;
+					}
+					throw lastError;
+				}
+			}
+			if (!response || !response.ok) {
+				throw lastError ?? new Error("Failed to get response after retries");
+			}
+			if (!response.body) {
+				throw new Error("No response body");
+			}
+			stream.push({ type: "start", partial: output });
+			let currentBlock: TextContent | ThinkingContent | null = null;
+			const blocks = output.content;
+			const blockIndex = () => blocks.length - 1;
+			// Read SSE stream
+			const reader = response.body.getReader();
+			const decoder = new TextDecoder();
+			let buffer = "";
+			while (true) {
+				const { done, value } = await reader.read();
+				if (done) break;
+				buffer += decoder.decode(value, { stream: true });
+				const lines = buffer.split("\n");
+				buffer = lines.pop() || "";
+				for (const line of lines) {
+					if (!line.startsWith("data:")) continue;
+					const jsonStr = line.slice(5).trim();
+					if (!jsonStr) continue;
+					let chunk: CloudCodeAssistResponseChunk;
+					try {
+						chunk = JSON.parse(jsonStr);
+					} catch {
+						continue;
+					}
+					// Unwrap the response
+					const responseData = chunk.response;
+					if (!responseData) continue;
+					const candidate = responseData.candidates?.[0];
+					if (candidate?.content?.parts) {
+						for (const part of candidate.content.parts) {
+							if (part.text !== undefined) {
+								const isThinking = part.thought === true;
+								if (
+									!currentBlock ||
+									(isThinking && currentBlock.type !== "thinking") ||
+									(!isThinking && currentBlock.type !== "text")
+								) {
+									if (currentBlock) {
+										if (currentBlock.type === "text") {
+											stream.push({
+												type: "text_end",
+												contentIndex: blocks.length - 1,
+												content: currentBlock.text,
+												partial: output,
+											});
+										} else {
+											stream.push({
+												type: "thinking_end",
+												contentIndex: blockIndex(),
+												content: currentBlock.thinking,
+												partial: output,
+											});
+										}
+									}
+									if (isThinking) {
+										currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
+										output.content.push(currentBlock);
+										stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
+									} else {
+										currentBlock = { type: "text", text: "" };
+										output.content.push(currentBlock);
+										stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
+									}
+								}
+								if (currentBlock.type === "thinking") {
+									currentBlock.thinking += part.text;
+									currentBlock.thinkingSignature = part.thoughtSignature;
+									stream.push({
+										type: "thinking_delta",
+										contentIndex: blockIndex(),
+										delta: part.text,
+										partial: output,
+									});
+								} else {
+									currentBlock.text += part.text;
+									stream.push({
+										type: "text_delta",
+										contentIndex: blockIndex(),
+										delta: part.text,
+										partial: output,
+									});
+								}
+							}
+							if (part.functionCall) {
+								if (currentBlock) {
+									if (currentBlock.type === "text") {
+										stream.push({
+											type: "text_end",
+											contentIndex: blockIndex(),
+											content: currentBlock.text,
+											partial: output,
+										});
+									} else {
+										stream.push({
+											type: "thinking_end",
+											contentIndex: blockIndex(),
+											content: currentBlock.thinking,
+											partial: output,
+										});
+									}
+									currentBlock = null;
+								}
+								const providedId = part.functionCall.id;
+								const needsNewId =
+									!providedId || output.content.some((b) => b.type === "toolCall" && b.id === providedId);
+								const toolCallId = needsNewId
+									? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
+									: providedId;
+								const toolCall: ToolCall = {
+									type: "toolCall",
+									id: toolCallId,
+									name: part.functionCall.name || "",
+									arguments: part.functionCall.args as Record<string, unknown>,
+									...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
+								};
+								output.content.push(toolCall);
+								stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
+								stream.push({
+									type: "toolcall_delta",
+									contentIndex: blockIndex(),
+									delta: JSON.stringify(toolCall.arguments),
+									partial: output,
+								});
+								stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
+							}
+						}
+					}
+					if (candidate?.finishReason) {
+						output.stopReason = mapStopReasonString(candidate.finishReason);
+						if (output.content.some((b) => b.type === "toolCall")) {
+							output.stopReason = "toolUse";
+						}
+					}
+					if (responseData.usageMetadata) {
+						// promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
+						const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
+						const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
+						output.usage = {
+							input: promptTokens - cacheReadTokens,
+							output:
+								(responseData.usageMetadata.candidatesTokenCount || 0) +
+								(responseData.usageMetadata.thoughtsTokenCount || 0),
+							cacheRead: cacheReadTokens,
+							cacheWrite: 0,
+							totalTokens: responseData.usageMetadata.totalTokenCount || 0,
+							cost: {
+								input: 0,
+								output: 0,
+								cacheRead: 0,
+								cacheWrite: 0,
+								total: 0,
+							},
+						};
+						calculateCost(model, output.usage);
+					}
+				}
+			}
+			if (currentBlock) {
+				if (currentBlock.type === "text") {
+					stream.push({
+						type: "text_end",
+						contentIndex: blockIndex(),
+						content: currentBlock.text,
+						partial: output,
+					});
+				} else {
+					stream.push({
+						type: "thinking_end",
+						contentIndex: blockIndex(),
+						content: currentBlock.thinking,
+						partial: output,
+					});
+				}
+			}
+			if (options?.signal?.aborted) {
+				throw new Error("Request was aborted");
+			}
+			if (output.stopReason === "aborted" || output.stopReason === "error") {
+				throw new Error("An unknown error occurred");
+			}
+			stream.push({ type: "done", reason: output.stopReason, message: output });
+			stream.end();
+		} catch (error) {
+			for (const block of output.content) {
+				if ("index" in block) {
+					delete (block as { index?: number }).index;
+				}
+			}
+			output.stopReason = options?.signal?.aborted ? "aborted" : "error";
+			output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
+			stream.push({ type: "error", reason: output.stopReason, error: output });
+			stream.end();
+		}
+	})();
+	return stream;
+};
+function buildRequest(
+	model: Model<"google-gemini-cli">,
+	context: Context,
+	projectId: string,
+	options: GoogleGeminiCliOptions = {},
+): CloudCodeAssistRequest {
+	const contents = convertMessages(model, context);
+	const generationConfig: CloudCodeAssistRequest["request"]["generationConfig"] = {};
+	if (options.temperature !== undefined) {
+		generationConfig.temperature = options.temperature;
+	}
+	if (options.maxTokens !== undefined) {
+		generationConfig.maxOutputTokens = options.maxTokens;
+	}
+	// Thinking config
+	if (options.thinking?.enabled && model.reasoning) {
+		generationConfig.thinkingConfig = {
+			includeThoughts: true,
+		};
+		// Gemini 3 models use thinkingLevel, older models use thinkingBudget
+		if (options.thinking.level !== undefined) {
+			// Cast to any since our GoogleThinkingLevel mirrors Google's ThinkingLevel enum values
+			generationConfig.thinkingConfig.thinkingLevel = options.thinking.level as any;
+		} else if (options.thinking.budgetTokens !== undefined) {
+			generationConfig.thinkingConfig.thinkingBudget = options.thinking.budgetTokens;
+		}
+	}
+	const request: CloudCodeAssistRequest["request"] = {
+		contents,
+	};
+	// System instruction must be object with parts, not plain string
+	if (context.systemPrompt) {
+		request.systemInstruction = {
+			parts: [{ text: sanitizeSurrogates(context.systemPrompt) }],
+		};
+	}
+	if (Object.keys(generationConfig).length > 0) {
+		request.generationConfig = generationConfig;
+	}
+	if (context.tools && context.tools.length > 0) {
+		request.tools = convertTools(context.tools);
+		if (options.toolChoice) {
+			request.toolConfig = {
+				functionCallingConfig: {
+					mode: mapToolChoice(options.toolChoice),
+				},
+			};
+		}
+	}
+	return {
+		project: projectId,
+		model: model.id,
+		request,
+		userAgent: "pi-coding-agent",
+		requestId: `pi-${Date.now()}-${Math.random().toString(36).slice(2, 11)}`,
+	};
+}