@mariozechner/pi-ai 0.70.6 → 0.71.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -30
- package/dist/env-api-keys.d.ts.map +1 -1
- package/dist/env-api-keys.js +3 -0
- package/dist/env-api-keys.js.map +1 -1
- package/dist/index.d.ts +3 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/models.d.ts +1 -1
- package/dist/models.d.ts.map +1 -1
- package/dist/models.generated.d.ts +1437 -576
- package/dist/models.generated.d.ts.map +1 -1
- package/dist/models.generated.js +1173 -438
- package/dist/models.generated.js.map +1 -1
- package/dist/models.js +3 -2
- package/dist/models.js.map +1 -1
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +38 -15
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/cloudflare.d.ts +9 -3
- package/dist/providers/cloudflare.d.ts.map +1 -1
- package/dist/providers/cloudflare.js +10 -3
- package/dist/providers/cloudflare.js.map +1 -1
- package/dist/providers/google-shared.d.ts +7 -2
- package/dist/providers/google-shared.d.ts.map +1 -1
- package/dist/providers/google-shared.js +4 -13
- package/dist/providers/google-shared.js.map +1 -1
- package/dist/providers/google-vertex.d.ts +1 -1
- package/dist/providers/google-vertex.d.ts.map +1 -1
- package/dist/providers/google-vertex.js.map +1 -1
- package/dist/providers/google.d.ts +1 -1
- package/dist/providers/google.d.ts.map +1 -1
- package/dist/providers/google.js.map +1 -1
- package/dist/providers/mistral.d.ts.map +1 -1
- package/dist/providers/mistral.js +1 -1
- package/dist/providers/mistral.js.map +1 -1
- package/dist/providers/openai-codex-responses.d.ts +15 -0
- package/dist/providers/openai-codex-responses.d.ts.map +1 -1
- package/dist/providers/openai-codex-responses.js +138 -3
- package/dist/providers/openai-codex-responses.js.map +1 -1
- package/dist/providers/openai-completions.d.ts.map +1 -1
- package/dist/providers/openai-completions.js +21 -7
- package/dist/providers/openai-completions.js.map +1 -1
- package/dist/providers/openai-responses.d.ts.map +1 -1
- package/dist/providers/openai-responses.js +10 -2
- package/dist/providers/openai-responses.js.map +1 -1
- package/dist/providers/register-builtins.d.ts +0 -3
- package/dist/providers/register-builtins.d.ts.map +1 -1
- package/dist/providers/register-builtins.js +0 -18
- package/dist/providers/register-builtins.js.map +1 -1
- package/dist/types.d.ts +4 -3
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/oauth/index.d.ts +0 -4
- package/dist/utils/oauth/index.d.ts.map +1 -1
- package/dist/utils/oauth/index.js +0 -10
- package/dist/utils/oauth/index.js.map +1 -1
- package/package.json +2 -6
- package/dist/providers/google-gemini-cli.d.ts +0 -74
- package/dist/providers/google-gemini-cli.d.ts.map +0 -1
- package/dist/providers/google-gemini-cli.js +0 -779
- package/dist/providers/google-gemini-cli.js.map +0 -1
- package/dist/utils/oauth/google-antigravity.d.ts +0 -26
- package/dist/utils/oauth/google-antigravity.d.ts.map +0 -1
- package/dist/utils/oauth/google-antigravity.js +0 -377
- package/dist/utils/oauth/google-antigravity.js.map +0 -1
- package/dist/utils/oauth/google-gemini-cli.d.ts +0 -26
- package/dist/utils/oauth/google-gemini-cli.d.ts.map +0 -1
- package/dist/utils/oauth/google-gemini-cli.js +0 -482
- package/dist/utils/oauth/google-gemini-cli.js.map +0 -1
package/dist/types.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"","sourcesContent":["import type { AssistantMessageEventStream } from \"./utils/event-stream.js\";\n\nexport type { AssistantMessageEventStream } from \"./utils/event-stream.js\";\n\nexport type KnownApi =\n\t| \"openai-completions\"\n\t| \"mistral-conversations\"\n\t| \"openai-responses\"\n\t| \"azure-openai-responses\"\n\t| \"openai-codex-responses\"\n\t| \"anthropic-messages\"\n\t| \"bedrock-converse-stream\"\n\t| \"google-generative-ai\"\n\t| \"google-gemini-cli\"\n\t| \"google-vertex\";\n\nexport type Api = KnownApi | (string & {});\n\nexport type KnownProvider =\n\t| \"amazon-bedrock\"\n\t| \"anthropic\"\n\t| \"google\"\n\t| \"google-gemini-cli\"\n\t| \"google-antigravity\"\n\t| \"google-vertex\"\n\t| \"openai\"\n\t| \"azure-openai-responses\"\n\t| \"openai-codex\"\n\t| \"deepseek\"\n\t| \"github-copilot\"\n\t| \"xai\"\n\t| \"groq\"\n\t| \"cerebras\"\n\t| \"openrouter\"\n\t| \"vercel-ai-gateway\"\n\t| \"zai\"\n\t| \"mistral\"\n\t| \"minimax\"\n\t| \"minimax-cn\"\n\t| \"huggingface\"\n\t| \"fireworks\"\n\t| \"opencode\"\n\t| \"opencode-go\"\n\t| \"kimi-coding\"\n\t| \"cloudflare-workers-ai\";\nexport type Provider = KnownProvider | string;\n\nexport type ThinkingLevel = \"minimal\" | \"low\" | \"medium\" | \"high\" | \"xhigh\";\n\n/** Token budgets for each thinking level (token-based providers only) */\nexport interface ThinkingBudgets {\n\tminimal?: number;\n\tlow?: number;\n\tmedium?: number;\n\thigh?: number;\n}\n\n// Base options all providers share\nexport type CacheRetention = \"none\" | \"short\" | \"long\";\n\nexport type Transport = \"sse\" | \"websocket\" | \"auto\";\n\nexport interface ProviderResponse {\n\tstatus: number;\n\theaders: Record<string, string>;\n}\n\nexport interface StreamOptions {\n\ttemperature?: number;\n\tmaxTokens?: number;\n\tsignal?: AbortSignal;\n\tapiKey?: string;\n\t/**\n\t * Preferred transport for providers that support multiple transports.\n\t * Providers that do not support this option ignore it.\n\t */\n\ttransport?: Transport;\n\t/**\n\t * Prompt cache retention preference. Providers map this to their supported values.\n\t * Default: \"short\".\n\t */\n\tcacheRetention?: CacheRetention;\n\t/**\n\t * Optional session identifier for providers that support session-based caching.\n\t * Providers can use this to enable prompt caching, request routing, or other\n\t * session-aware features. Ignored by providers that don't support it.\n\t */\n\tsessionId?: string;\n\t/**\n\t * Optional callback for inspecting or replacing provider payloads before sending.\n\t * Return undefined to keep the payload unchanged.\n\t */\n\tonPayload?: (payload: unknown, model: Model<Api>) => unknown | undefined | Promise<unknown | undefined>;\n\t/**\n\t * Optional callback invoked after an HTTP response is received and before\n\t * its body stream is consumed.\n\t */\n\tonResponse?: (response: ProviderResponse, model: Model<Api>) => void | Promise<void>;\n\t/**\n\t * Optional custom HTTP headers to include in API requests.\n\t * Merged with provider defaults; can override default headers.\n\t * Not supported by all providers (e.g., AWS Bedrock uses SDK auth).\n\t */\n\theaders?: Record<string, string>;\n\t/**\n\t * HTTP request timeout in milliseconds for providers/SDKs that support it.\n\t * For example, OpenAI and Anthropic SDK clients default to 10 minutes.\n\t */\n\ttimeoutMs?: number;\n\t/**\n\t * Maximum retry attempts for providers/SDKs that support client-side retries.\n\t * For example, OpenAI and Anthropic SDK clients default to 2.\n\t */\n\tmaxRetries?: number;\n\t/**\n\t * Maximum delay in milliseconds to wait for a retry when the server requests a long wait.\n\t * If the server's requested delay exceeds this value, the request fails immediately\n\t * with an error containing the requested delay, allowing higher-level retry logic\n\t * to handle it with user visibility.\n\t * Default: 60000 (60 seconds). Set to 0 to disable the cap.\n\t */\n\tmaxRetryDelayMs?: number;\n\t/**\n\t * Optional metadata to include in API requests.\n\t * Providers extract the fields they understand and ignore the rest.\n\t * For example, Anthropic uses `user_id` for abuse tracking and rate limiting.\n\t */\n\tmetadata?: Record<string, unknown>;\n}\n\nexport type ProviderStreamOptions = StreamOptions & Record<string, unknown>;\n\n// Unified options with reasoning passed to streamSimple() and completeSimple()\nexport interface SimpleStreamOptions extends StreamOptions {\n\treasoning?: ThinkingLevel;\n\t/** Custom token budgets for thinking levels (token-based providers only) */\n\tthinkingBudgets?: ThinkingBudgets;\n}\n\n// Generic StreamFunction with typed options.\n//\n// Contract:\n// - Must return an AssistantMessageEventStream.\n// - Once invoked, request/model/runtime failures should be encoded in the\n// returned stream, not thrown.\n// - Error termination must produce an AssistantMessage with stopReason\n// \"error\" or \"aborted\" and errorMessage, emitted via the stream protocol.\nexport type StreamFunction<TApi extends Api = Api, TOptions extends StreamOptions = StreamOptions> = (\n\tmodel: Model<TApi>,\n\tcontext: Context,\n\toptions?: TOptions,\n) => AssistantMessageEventStream;\n\nexport interface TextSignatureV1 {\n\tv: 1;\n\tid: string;\n\tphase?: \"commentary\" | \"final_answer\";\n}\n\nexport interface TextContent {\n\ttype: \"text\";\n\ttext: string;\n\ttextSignature?: string; // e.g., for OpenAI responses, message metadata (legacy id string or TextSignatureV1 JSON)\n}\n\nexport interface ThinkingContent {\n\ttype: \"thinking\";\n\tthinking: string;\n\tthinkingSignature?: string; // e.g., for OpenAI responses, the reasoning item ID\n\t/** When true, the thinking content was redacted by safety filters. The opaque\n\t * encrypted payload is stored in `thinkingSignature` so it can be passed back\n\t * to the API for multi-turn continuity. */\n\tredacted?: boolean;\n}\n\nexport interface ImageContent {\n\ttype: \"image\";\n\tdata: string; // base64 encoded image data\n\tmimeType: string; // e.g., \"image/jpeg\", \"image/png\"\n}\n\nexport interface ToolCall {\n\ttype: \"toolCall\";\n\tid: string;\n\tname: string;\n\targuments: Record<string, any>;\n\tthoughtSignature?: string; // Google-specific: opaque signature for reusing thought context\n}\n\nexport interface Usage {\n\tinput: number;\n\toutput: number;\n\tcacheRead: number;\n\tcacheWrite: number;\n\ttotalTokens: number;\n\tcost: {\n\t\tinput: number;\n\t\toutput: number;\n\t\tcacheRead: number;\n\t\tcacheWrite: number;\n\t\ttotal: number;\n\t};\n}\n\nexport type StopReason = \"stop\" | \"length\" | \"toolUse\" | \"error\" | \"aborted\";\n\nexport interface UserMessage {\n\trole: \"user\";\n\tcontent: string | (TextContent | ImageContent)[];\n\ttimestamp: number; // Unix timestamp in milliseconds\n}\n\nexport interface AssistantMessage {\n\trole: \"assistant\";\n\tcontent: (TextContent | ThinkingContent | ToolCall)[];\n\tapi: Api;\n\tprovider: Provider;\n\tmodel: string;\n\tresponseId?: string; // Provider-specific response/message identifier when the upstream API exposes one\n\tusage: Usage;\n\tstopReason: StopReason;\n\terrorMessage?: string;\n\ttimestamp: number; // Unix timestamp in milliseconds\n}\n\nexport interface ToolResultMessage<TDetails = any> {\n\trole: \"toolResult\";\n\ttoolCallId: string;\n\ttoolName: string;\n\tcontent: (TextContent | ImageContent)[]; // Supports text and images\n\tdetails?: TDetails;\n\tisError: boolean;\n\ttimestamp: number; // Unix timestamp in milliseconds\n}\n\nexport type Message = UserMessage | AssistantMessage | ToolResultMessage;\n\nimport type { TSchema } from \"typebox\";\n\nexport interface Tool<TParameters extends TSchema = TSchema> {\n\tname: string;\n\tdescription: string;\n\tparameters: TParameters;\n}\n\nexport interface Context {\n\tsystemPrompt?: string;\n\tmessages: Message[];\n\ttools?: Tool[];\n}\n\n/**\n * Event protocol for AssistantMessageEventStream.\n *\n * Streams should emit `start` before partial updates, then terminate with either:\n * - `done` carrying the final successful AssistantMessage, or\n * - `error` carrying the final AssistantMessage with stopReason \"error\" or \"aborted\"\n * and errorMessage.\n */\nexport type AssistantMessageEvent =\n\t| { type: \"start\"; partial: AssistantMessage }\n\t| { type: \"text_start\"; contentIndex: number; partial: AssistantMessage }\n\t| { type: \"text_delta\"; contentIndex: number; delta: string; partial: AssistantMessage }\n\t| { type: \"text_end\"; contentIndex: number; content: string; partial: AssistantMessage }\n\t| { type: \"thinking_start\"; contentIndex: number; partial: AssistantMessage }\n\t| { type: \"thinking_delta\"; contentIndex: number; delta: string; partial: AssistantMessage }\n\t| { type: \"thinking_end\"; contentIndex: number; content: string; partial: AssistantMessage }\n\t| { type: \"toolcall_start\"; contentIndex: number; partial: AssistantMessage }\n\t| { type: \"toolcall_delta\"; contentIndex: number; delta: string; partial: AssistantMessage }\n\t| { type: \"toolcall_end\"; contentIndex: number; toolCall: ToolCall; partial: AssistantMessage }\n\t| { type: \"done\"; reason: Extract<StopReason, \"stop\" | \"length\" | \"toolUse\">; message: AssistantMessage }\n\t| { type: \"error\"; reason: Extract<StopReason, \"aborted\" | \"error\">; error: AssistantMessage };\n\n/**\n * Compatibility settings for OpenAI-compatible completions APIs.\n * Use this to override URL-based auto-detection for custom providers.\n */\nexport interface OpenAICompletionsCompat {\n\t/** Whether the provider supports the `store` field. Default: auto-detected from URL. */\n\tsupportsStore?: boolean;\n\t/** Whether the provider supports the `developer` role (vs `system`). Default: auto-detected from URL. */\n\tsupportsDeveloperRole?: boolean;\n\t/** Whether the provider supports `reasoning_effort`. Default: auto-detected from URL. */\n\tsupportsReasoningEffort?: boolean;\n\t/** Optional mapping from pi-ai reasoning levels to provider/model-specific `reasoning_effort` values. */\n\treasoningEffortMap?: Partial<Record<ThinkingLevel, string>>;\n\t/** Whether the provider supports `stream_options: { include_usage: true }` for token usage in streaming responses. Default: true. */\n\tsupportsUsageInStreaming?: boolean;\n\t/** Which field to use for max tokens. Default: auto-detected from URL. */\n\tmaxTokensField?: \"max_completion_tokens\" | \"max_tokens\";\n\t/** Whether tool results require the `name` field. Default: auto-detected from URL. */\n\trequiresToolResultName?: boolean;\n\t/** Whether a user message after tool results requires an assistant message in between. Default: auto-detected from URL. */\n\trequiresAssistantAfterToolResult?: boolean;\n\t/** Whether thinking blocks must be converted to text blocks with <thinking> delimiters. Default: auto-detected from URL. */\n\trequiresThinkingAsText?: boolean;\n\t/** Whether all replayed assistant messages must include an empty reasoning_content field when reasoning is enabled. Default: auto-detected from URL. */\n\trequiresReasoningContentOnAssistantMessages?: boolean;\n\t/** Format for reasoning/thinking parameter. \"openai\" uses reasoning_effort, \"openrouter\" uses reasoning: { effort }, \"deepseek\" uses thinking: { type } plus reasoning_effort, \"zai\" uses top-level enable_thinking: boolean, \"qwen\" uses top-level enable_thinking: boolean, and \"qwen-chat-template\" uses chat_template_kwargs.enable_thinking. Default: \"openai\". */\n\tthinkingFormat?: \"openai\" | \"openrouter\" | \"deepseek\" | \"zai\" | \"qwen\" | \"qwen-chat-template\";\n\t/** OpenRouter-specific routing preferences. Only used when baseUrl points to OpenRouter. */\n\topenRouterRouting?: OpenRouterRouting;\n\t/** Vercel AI Gateway routing preferences. Only used when baseUrl points to Vercel AI Gateway. */\n\tvercelGatewayRouting?: VercelGatewayRouting;\n\t/** Whether z.ai supports top-level `tool_stream: true` for streaming tool call deltas. Default: false. */\n\tzaiToolStream?: boolean;\n\t/** Whether the provider supports the `strict` field in tool definitions. Default: true. */\n\tsupportsStrictMode?: boolean;\n\t/** Cache control convention for prompt caching. \"anthropic\" applies Anthropic-style `cache_control` markers to the system prompt, last tool definition, and last user/assistant text content. */\n\tcacheControlFormat?: \"anthropic\";\n\t/** Whether to send known session-affinity headers (`session_id`, `x-client-request-id`, `x-session-affinity`) from `options.sessionId` when caching is enabled. Default: false. */\n\tsendSessionAffinityHeaders?: boolean;\n\t/** Whether the provider supports long prompt cache retention (`prompt_cache_retention: \"24h\"` or Anthropic-style `cache_control.ttl: \"1h\"`, depending on format). Default: true. */\n\tsupportsLongCacheRetention?: boolean;\n}\n\n/** Compatibility settings for OpenAI Responses APIs. */\nexport interface OpenAIResponsesCompat {\n\t/** Whether to send the OpenAI `session_id` cache-affinity header from `options.sessionId` when caching is enabled. Default: true. */\n\tsendSessionIdHeader?: boolean;\n\t/** Whether the provider supports `prompt_cache_retention: \"24h\"`. Default: true. */\n\tsupportsLongCacheRetention?: boolean;\n}\n\n/** Compatibility settings for Anthropic Messages-compatible APIs. */\nexport interface AnthropicMessagesCompat {\n\t/**\n\t * Whether the provider accepts per-tool `eager_input_streaming`.\n\t * When false, the Anthropic provider omits `tools[].eager_input_streaming`\n\t * and sends the legacy `fine-grained-tool-streaming-2025-05-14` beta header\n\t * for tool-enabled requests.\n\t * Default: true.\n\t */\n\tsupportsEagerToolInputStreaming?: boolean;\n\t/** Whether the provider supports Anthropic long cache retention (`cache_control.ttl: \"1h\"`). Default: true. */\n\tsupportsLongCacheRetention?: boolean;\n}\n\n/**\n * OpenRouter provider routing preferences.\n * Controls which upstream providers OpenRouter routes requests to.\n * Sent as the `provider` field in the OpenRouter API request body.\n * @see https://openrouter.ai/docs/guides/routing/provider-selection\n */\nexport interface OpenRouterRouting {\n\t/** Whether to allow backup providers to serve requests. Default: true. */\n\tallow_fallbacks?: boolean;\n\t/** Whether to filter providers to only those that support all parameters in the request. Default: false. */\n\trequire_parameters?: boolean;\n\t/** Data collection setting. \"allow\" (default): allow providers that may store/train on data. \"deny\": only use providers that don't collect user data. */\n\tdata_collection?: \"deny\" | \"allow\";\n\t/** Whether to restrict routing to only ZDR (Zero Data Retention) endpoints. */\n\tzdr?: boolean;\n\t/** Whether to restrict routing to only models that allow text distillation. */\n\tenforce_distillable_text?: boolean;\n\t/** An ordered list of provider names/slugs to try in sequence, falling back to the next if unavailable. */\n\torder?: string[];\n\t/** List of provider names/slugs to exclusively allow for this request. */\n\tonly?: string[];\n\t/** List of provider names/slugs to skip for this request. */\n\tignore?: string[];\n\t/** A list of quantization levels to filter providers by (e.g., [\"fp16\", \"bf16\", \"fp8\", \"fp6\", \"int8\", \"int4\", \"fp4\", \"fp32\"]). */\n\tquantizations?: string[];\n\t/** Sorting strategy. Can be a string (e.g., \"price\", \"throughput\", \"latency\") or an object with `by` and `partition`. */\n\tsort?:\n\t\t| string\n\t\t| {\n\t\t\t\t/** The sorting metric: \"price\", \"throughput\", \"latency\". */\n\t\t\t\tby?: string;\n\t\t\t\t/** Partitioning strategy: \"model\" (default) or \"none\". */\n\t\t\t\tpartition?: string | null;\n\t\t };\n\t/** Maximum price per million tokens (USD). */\n\tmax_price?: {\n\t\t/** Price per million prompt tokens. */\n\t\tprompt?: number | string;\n\t\t/** Price per million completion tokens. */\n\t\tcompletion?: number | string;\n\t\t/** Price per image. */\n\t\timage?: number | string;\n\t\t/** Price per audio unit. */\n\t\taudio?: number | string;\n\t\t/** Price per request. */\n\t\trequest?: number | string;\n\t};\n\t/** Preferred minimum throughput (tokens/second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. */\n\tpreferred_min_throughput?:\n\t\t| number\n\t\t| {\n\t\t\t\t/** Minimum tokens/second at the 50th percentile. */\n\t\t\t\tp50?: number;\n\t\t\t\t/** Minimum tokens/second at the 75th percentile. */\n\t\t\t\tp75?: number;\n\t\t\t\t/** Minimum tokens/second at the 90th percentile. */\n\t\t\t\tp90?: number;\n\t\t\t\t/** Minimum tokens/second at the 99th percentile. */\n\t\t\t\tp99?: number;\n\t\t };\n\t/** Preferred maximum latency (seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. */\n\tpreferred_max_latency?:\n\t\t| number\n\t\t| {\n\t\t\t\t/** Maximum latency in seconds at the 50th percentile. */\n\t\t\t\tp50?: number;\n\t\t\t\t/** Maximum latency in seconds at the 75th percentile. */\n\t\t\t\tp75?: number;\n\t\t\t\t/** Maximum latency in seconds at the 90th percentile. */\n\t\t\t\tp90?: number;\n\t\t\t\t/** Maximum latency in seconds at the 99th percentile. */\n\t\t\t\tp99?: number;\n\t\t };\n}\n\n/**\n * Vercel AI Gateway routing preferences.\n * Controls which upstream providers the gateway routes requests to.\n * @see https://vercel.com/docs/ai-gateway/models-and-providers/provider-options\n */\nexport interface VercelGatewayRouting {\n\t/** List of provider slugs to exclusively use for this request (e.g., [\"bedrock\", \"anthropic\"]). */\n\tonly?: string[];\n\t/** List of provider slugs to try in order (e.g., [\"anthropic\", \"openai\"]). */\n\torder?: string[];\n}\n\n// Model interface for the unified model system\nexport interface Model<TApi extends Api> {\n\tid: string;\n\tname: string;\n\tapi: TApi;\n\tprovider: Provider;\n\tbaseUrl: string;\n\treasoning: boolean;\n\tinput: (\"text\" | \"image\")[];\n\tcost: {\n\t\tinput: number; // $/million tokens\n\t\toutput: number; // $/million tokens\n\t\tcacheRead: number; // $/million tokens\n\t\tcacheWrite: number; // $/million tokens\n\t};\n\tcontextWindow: number;\n\tmaxTokens: number;\n\theaders?: Record<string, string>;\n\t/** Compatibility overrides for OpenAI-compatible APIs. If not set, auto-detected from baseUrl. */\n\tcompat?: TApi extends \"openai-completions\"\n\t\t? OpenAICompletionsCompat\n\t\t: TApi extends \"openai-responses\"\n\t\t\t? OpenAIResponsesCompat\n\t\t\t: TApi extends \"anthropic-messages\"\n\t\t\t\t? AnthropicMessagesCompat\n\t\t\t\t: never;\n}\n"]}
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"","sourcesContent":["import type { AssistantMessageEventStream } from \"./utils/event-stream.js\";\n\nexport type { AssistantMessageEventStream } from \"./utils/event-stream.js\";\n\nexport type KnownApi =\n\t| \"openai-completions\"\n\t| \"mistral-conversations\"\n\t| \"openai-responses\"\n\t| \"azure-openai-responses\"\n\t| \"openai-codex-responses\"\n\t| \"anthropic-messages\"\n\t| \"bedrock-converse-stream\"\n\t| \"google-generative-ai\"\n\t| \"google-vertex\";\n\nexport type Api = KnownApi | (string & {});\n\nexport type KnownProvider =\n\t| \"amazon-bedrock\"\n\t| \"anthropic\"\n\t| \"google\"\n\t| \"google-vertex\"\n\t| \"openai\"\n\t| \"azure-openai-responses\"\n\t| \"openai-codex\"\n\t| \"deepseek\"\n\t| \"github-copilot\"\n\t| \"xai\"\n\t| \"groq\"\n\t| \"cerebras\"\n\t| \"openrouter\"\n\t| \"vercel-ai-gateway\"\n\t| \"zai\"\n\t| \"mistral\"\n\t| \"minimax\"\n\t| \"minimax-cn\"\n\t| \"moonshotai\"\n\t| \"moonshotai-cn\"\n\t| \"huggingface\"\n\t| \"fireworks\"\n\t| \"opencode\"\n\t| \"opencode-go\"\n\t| \"kimi-coding\"\n\t| \"cloudflare-workers-ai\"\n\t| \"cloudflare-ai-gateway\";\nexport type Provider = KnownProvider | string;\n\nexport type ThinkingLevel = \"minimal\" | \"low\" | \"medium\" | \"high\" | \"xhigh\";\n\n/** Token budgets for each thinking level (token-based providers only) */\nexport interface ThinkingBudgets {\n\tminimal?: number;\n\tlow?: number;\n\tmedium?: number;\n\thigh?: number;\n}\n\n// Base options all providers share\nexport type CacheRetention = \"none\" | \"short\" | \"long\";\n\nexport type Transport = \"sse\" | \"websocket\" | \"websocket-cached\" | \"auto\";\n\nexport interface ProviderResponse {\n\tstatus: number;\n\theaders: Record<string, string>;\n}\n\nexport interface StreamOptions {\n\ttemperature?: number;\n\tmaxTokens?: number;\n\tsignal?: AbortSignal;\n\tapiKey?: string;\n\t/**\n\t * Preferred transport for providers that support multiple transports.\n\t * Providers that do not support this option ignore it.\n\t */\n\ttransport?: Transport;\n\t/**\n\t * Prompt cache retention preference. Providers map this to their supported values.\n\t * Default: \"short\".\n\t */\n\tcacheRetention?: CacheRetention;\n\t/**\n\t * Optional session identifier for providers that support session-based caching.\n\t * Providers can use this to enable prompt caching, request routing, or other\n\t * session-aware features. Ignored by providers that don't support it.\n\t */\n\tsessionId?: string;\n\t/**\n\t * Optional callback for inspecting or replacing provider payloads before sending.\n\t * Return undefined to keep the payload unchanged.\n\t */\n\tonPayload?: (payload: unknown, model: Model<Api>) => unknown | undefined | Promise<unknown | undefined>;\n\t/**\n\t * Optional callback invoked after an HTTP response is received and before\n\t * its body stream is consumed.\n\t */\n\tonResponse?: (response: ProviderResponse, model: Model<Api>) => void | Promise<void>;\n\t/**\n\t * Optional custom HTTP headers to include in API requests.\n\t * Merged with provider defaults; can override default headers.\n\t * Not supported by all providers (e.g., AWS Bedrock uses SDK auth).\n\t */\n\theaders?: Record<string, string>;\n\t/**\n\t * HTTP request timeout in milliseconds for providers/SDKs that support it.\n\t * For example, OpenAI and Anthropic SDK clients default to 10 minutes.\n\t */\n\ttimeoutMs?: number;\n\t/**\n\t * Maximum retry attempts for providers/SDKs that support client-side retries.\n\t * For example, OpenAI and Anthropic SDK clients default to 2.\n\t */\n\tmaxRetries?: number;\n\t/**\n\t * Maximum delay in milliseconds to wait for a retry when the server requests a long wait.\n\t * If the server's requested delay exceeds this value, the request fails immediately\n\t * with an error containing the requested delay, allowing higher-level retry logic\n\t * to handle it with user visibility.\n\t * Default: 60000 (60 seconds). Set to 0 to disable the cap.\n\t */\n\tmaxRetryDelayMs?: number;\n\t/**\n\t * Optional metadata to include in API requests.\n\t * Providers extract the fields they understand and ignore the rest.\n\t * For example, Anthropic uses `user_id` for abuse tracking and rate limiting.\n\t */\n\tmetadata?: Record<string, unknown>;\n}\n\nexport type ProviderStreamOptions = StreamOptions & Record<string, unknown>;\n\n// Unified options with reasoning passed to streamSimple() and completeSimple()\nexport interface SimpleStreamOptions extends StreamOptions {\n\treasoning?: ThinkingLevel;\n\t/** Custom token budgets for thinking levels (token-based providers only) */\n\tthinkingBudgets?: ThinkingBudgets;\n}\n\n// Generic StreamFunction with typed options.\n//\n// Contract:\n// - Must return an AssistantMessageEventStream.\n// - Once invoked, request/model/runtime failures should be encoded in the\n// returned stream, not thrown.\n// - Error termination must produce an AssistantMessage with stopReason\n// \"error\" or \"aborted\" and errorMessage, emitted via the stream protocol.\nexport type StreamFunction<TApi extends Api = Api, TOptions extends StreamOptions = StreamOptions> = (\n\tmodel: Model<TApi>,\n\tcontext: Context,\n\toptions?: TOptions,\n) => AssistantMessageEventStream;\n\nexport interface TextSignatureV1 {\n\tv: 1;\n\tid: string;\n\tphase?: \"commentary\" | \"final_answer\";\n}\n\nexport interface TextContent {\n\ttype: \"text\";\n\ttext: string;\n\ttextSignature?: string; // e.g., for OpenAI responses, message metadata (legacy id string or TextSignatureV1 JSON)\n}\n\nexport interface ThinkingContent {\n\ttype: \"thinking\";\n\tthinking: string;\n\tthinkingSignature?: string; // e.g., for OpenAI responses, the reasoning item ID\n\t/** When true, the thinking content was redacted by safety filters. The opaque\n\t * encrypted payload is stored in `thinkingSignature` so it can be passed back\n\t * to the API for multi-turn continuity. */\n\tredacted?: boolean;\n}\n\nexport interface ImageContent {\n\ttype: \"image\";\n\tdata: string; // base64 encoded image data\n\tmimeType: string; // e.g., \"image/jpeg\", \"image/png\"\n}\n\nexport interface ToolCall {\n\ttype: \"toolCall\";\n\tid: string;\n\tname: string;\n\targuments: Record<string, any>;\n\tthoughtSignature?: string; // Google-specific: opaque signature for reusing thought context\n}\n\nexport interface Usage {\n\tinput: number;\n\toutput: number;\n\tcacheRead: number;\n\tcacheWrite: number;\n\ttotalTokens: number;\n\tcost: {\n\t\tinput: number;\n\t\toutput: number;\n\t\tcacheRead: number;\n\t\tcacheWrite: number;\n\t\ttotal: number;\n\t};\n}\n\nexport type StopReason = \"stop\" | \"length\" | \"toolUse\" | \"error\" | \"aborted\";\n\nexport interface UserMessage {\n\trole: \"user\";\n\tcontent: string | (TextContent | ImageContent)[];\n\ttimestamp: number; // Unix timestamp in milliseconds\n}\n\nexport interface AssistantMessage {\n\trole: \"assistant\";\n\tcontent: (TextContent | ThinkingContent | ToolCall)[];\n\tapi: Api;\n\tprovider: Provider;\n\tmodel: string;\n\tresponseModel?: string; // Concrete `chunk.model` when different from the requested `model` (e.g. OpenRouter `auto` -> `anthropic/...`)\n\tresponseId?: string; // Provider-specific response/message identifier when the upstream API exposes one\n\tusage: Usage;\n\tstopReason: StopReason;\n\terrorMessage?: string;\n\ttimestamp: number; // Unix timestamp in milliseconds\n}\n\nexport interface ToolResultMessage<TDetails = any> {\n\trole: \"toolResult\";\n\ttoolCallId: string;\n\ttoolName: string;\n\tcontent: (TextContent | ImageContent)[]; // Supports text and images\n\tdetails?: TDetails;\n\tisError: boolean;\n\ttimestamp: number; // Unix timestamp in milliseconds\n}\n\nexport type Message = UserMessage | AssistantMessage | ToolResultMessage;\n\nimport type { TSchema } from \"typebox\";\n\nexport interface Tool<TParameters extends TSchema = TSchema> {\n\tname: string;\n\tdescription: string;\n\tparameters: TParameters;\n}\n\nexport interface Context {\n\tsystemPrompt?: string;\n\tmessages: Message[];\n\ttools?: Tool[];\n}\n\n/**\n * Event protocol for AssistantMessageEventStream.\n *\n * Streams should emit `start` before partial updates, then terminate with either:\n * - `done` carrying the final successful AssistantMessage, or\n * - `error` carrying the final AssistantMessage with stopReason \"error\" or \"aborted\"\n * and errorMessage.\n */\nexport type AssistantMessageEvent =\n\t| { type: \"start\"; partial: AssistantMessage }\n\t| { type: \"text_start\"; contentIndex: number; partial: AssistantMessage }\n\t| { type: \"text_delta\"; contentIndex: number; delta: string; partial: AssistantMessage }\n\t| { type: \"text_end\"; contentIndex: number; content: string; partial: AssistantMessage }\n\t| { type: \"thinking_start\"; contentIndex: number; partial: AssistantMessage }\n\t| { type: \"thinking_delta\"; contentIndex: number; delta: string; partial: AssistantMessage }\n\t| { type: \"thinking_end\"; contentIndex: number; content: string; partial: AssistantMessage }\n\t| { type: \"toolcall_start\"; contentIndex: number; partial: AssistantMessage }\n\t| { type: \"toolcall_delta\"; contentIndex: number; delta: string; partial: AssistantMessage }\n\t| { type: \"toolcall_end\"; contentIndex: number; toolCall: ToolCall; partial: AssistantMessage }\n\t| { type: \"done\"; reason: Extract<StopReason, \"stop\" | \"length\" | \"toolUse\">; message: AssistantMessage }\n\t| { type: \"error\"; reason: Extract<StopReason, \"aborted\" | \"error\">; error: AssistantMessage };\n\n/**\n * Compatibility settings for OpenAI-compatible completions APIs.\n * Use this to override URL-based auto-detection for custom providers.\n */\nexport interface OpenAICompletionsCompat {\n\t/** Whether the provider supports the `store` field. Default: auto-detected from URL. */\n\tsupportsStore?: boolean;\n\t/** Whether the provider supports the `developer` role (vs `system`). Default: auto-detected from URL. */\n\tsupportsDeveloperRole?: boolean;\n\t/** Whether the provider supports `reasoning_effort`. Default: auto-detected from URL. */\n\tsupportsReasoningEffort?: boolean;\n\t/** Optional mapping from pi-ai reasoning levels to provider/model-specific `reasoning_effort` values. */\n\treasoningEffortMap?: Partial<Record<ThinkingLevel, string>>;\n\t/** Whether the provider supports `stream_options: { include_usage: true }` for token usage in streaming responses. Default: true. */\n\tsupportsUsageInStreaming?: boolean;\n\t/** Which field to use for max tokens. Default: auto-detected from URL. */\n\tmaxTokensField?: \"max_completion_tokens\" | \"max_tokens\";\n\t/** Whether tool results require the `name` field. Default: auto-detected from URL. */\n\trequiresToolResultName?: boolean;\n\t/** Whether a user message after tool results requires an assistant message in between. Default: auto-detected from URL. */\n\trequiresAssistantAfterToolResult?: boolean;\n\t/** Whether thinking blocks must be converted to text blocks with <thinking> delimiters. Default: auto-detected from URL. */\n\trequiresThinkingAsText?: boolean;\n\t/** Whether all replayed assistant messages must include an empty reasoning_content field when reasoning is enabled. Default: auto-detected from URL. */\n\trequiresReasoningContentOnAssistantMessages?: boolean;\n\t/** Format for reasoning/thinking parameter. \"openai\" uses reasoning_effort, \"openrouter\" uses reasoning: { effort }, \"deepseek\" uses thinking: { type } plus reasoning_effort, \"zai\" uses top-level enable_thinking: boolean, \"qwen\" uses top-level enable_thinking: boolean, and \"qwen-chat-template\" uses chat_template_kwargs.enable_thinking. Default: \"openai\". */\n\tthinkingFormat?: \"openai\" | \"openrouter\" | \"deepseek\" | \"zai\" | \"qwen\" | \"qwen-chat-template\";\n\t/** OpenRouter-specific routing preferences. Only used when baseUrl points to OpenRouter. */\n\topenRouterRouting?: OpenRouterRouting;\n\t/** Vercel AI Gateway routing preferences. Only used when baseUrl points to Vercel AI Gateway. */\n\tvercelGatewayRouting?: VercelGatewayRouting;\n\t/** Whether z.ai supports top-level `tool_stream: true` for streaming tool call deltas. Default: false. */\n\tzaiToolStream?: boolean;\n\t/** Whether the provider supports the `strict` field in tool definitions. Default: true. */\n\tsupportsStrictMode?: boolean;\n\t/** Cache control convention for prompt caching. \"anthropic\" applies Anthropic-style `cache_control` markers to the system prompt, last tool definition, and last user/assistant text content. */\n\tcacheControlFormat?: \"anthropic\";\n\t/** Whether to send known session-affinity headers (`session_id`, `x-client-request-id`, `x-session-affinity`) from `options.sessionId` when caching is enabled. Default: false. */\n\tsendSessionAffinityHeaders?: boolean;\n\t/** Whether the provider supports long prompt cache retention (`prompt_cache_retention: \"24h\"` or Anthropic-style `cache_control.ttl: \"1h\"`, depending on format). Default: true. */\n\tsupportsLongCacheRetention?: boolean;\n}\n\n/** Compatibility settings for OpenAI Responses APIs. */\nexport interface OpenAIResponsesCompat {\n\t/** Whether to send the OpenAI `session_id` cache-affinity header from `options.sessionId` when caching is enabled. Default: true. */\n\tsendSessionIdHeader?: boolean;\n\t/** Whether the provider supports `prompt_cache_retention: \"24h\"`. Default: true. */\n\tsupportsLongCacheRetention?: boolean;\n}\n\n/** Compatibility settings for Anthropic Messages-compatible APIs. */\nexport interface AnthropicMessagesCompat {\n\t/**\n\t * Whether the provider accepts per-tool `eager_input_streaming`.\n\t * When false, the Anthropic provider omits `tools[].eager_input_streaming`\n\t * and sends the legacy `fine-grained-tool-streaming-2025-05-14` beta header\n\t * for tool-enabled requests.\n\t * Default: true.\n\t */\n\tsupportsEagerToolInputStreaming?: boolean;\n\t/** Whether the provider supports Anthropic long cache retention (`cache_control.ttl: \"1h\"`). Default: true. */\n\tsupportsLongCacheRetention?: boolean;\n}\n\n/**\n * OpenRouter provider routing preferences.\n * Controls which upstream providers OpenRouter routes requests to.\n * Sent as the `provider` field in the OpenRouter API request body.\n * @see https://openrouter.ai/docs/guides/routing/provider-selection\n */\nexport interface OpenRouterRouting {\n\t/** Whether to allow backup providers to serve requests. Default: true. */\n\tallow_fallbacks?: boolean;\n\t/** Whether to filter providers to only those that support all parameters in the request. Default: false. */\n\trequire_parameters?: boolean;\n\t/** Data collection setting. \"allow\" (default): allow providers that may store/train on data. \"deny\": only use providers that don't collect user data. */\n\tdata_collection?: \"deny\" | \"allow\";\n\t/** Whether to restrict routing to only ZDR (Zero Data Retention) endpoints. */\n\tzdr?: boolean;\n\t/** Whether to restrict routing to only models that allow text distillation. */\n\tenforce_distillable_text?: boolean;\n\t/** An ordered list of provider names/slugs to try in sequence, falling back to the next if unavailable. */\n\torder?: string[];\n\t/** List of provider names/slugs to exclusively allow for this request. */\n\tonly?: string[];\n\t/** List of provider names/slugs to skip for this request. */\n\tignore?: string[];\n\t/** A list of quantization levels to filter providers by (e.g., [\"fp16\", \"bf16\", \"fp8\", \"fp6\", \"int8\", \"int4\", \"fp4\", \"fp32\"]). */\n\tquantizations?: string[];\n\t/** Sorting strategy. Can be a string (e.g., \"price\", \"throughput\", \"latency\") or an object with `by` and `partition`. */\n\tsort?:\n\t\t| string\n\t\t| {\n\t\t\t\t/** The sorting metric: \"price\", \"throughput\", \"latency\". */\n\t\t\t\tby?: string;\n\t\t\t\t/** Partitioning strategy: \"model\" (default) or \"none\". */\n\t\t\t\tpartition?: string | null;\n\t\t };\n\t/** Maximum price per million tokens (USD). */\n\tmax_price?: {\n\t\t/** Price per million prompt tokens. */\n\t\tprompt?: number | string;\n\t\t/** Price per million completion tokens. */\n\t\tcompletion?: number | string;\n\t\t/** Price per image. */\n\t\timage?: number | string;\n\t\t/** Price per audio unit. */\n\t\taudio?: number | string;\n\t\t/** Price per request. */\n\t\trequest?: number | string;\n\t};\n\t/** Preferred minimum throughput (tokens/second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. */\n\tpreferred_min_throughput?:\n\t\t| number\n\t\t| {\n\t\t\t\t/** Minimum tokens/second at the 50th percentile. */\n\t\t\t\tp50?: number;\n\t\t\t\t/** Minimum tokens/second at the 75th percentile. */\n\t\t\t\tp75?: number;\n\t\t\t\t/** Minimum tokens/second at the 90th percentile. */\n\t\t\t\tp90?: number;\n\t\t\t\t/** Minimum tokens/second at the 99th percentile. */\n\t\t\t\tp99?: number;\n\t\t };\n\t/** Preferred maximum latency (seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. */\n\tpreferred_max_latency?:\n\t\t| number\n\t\t| {\n\t\t\t\t/** Maximum latency in seconds at the 50th percentile. */\n\t\t\t\tp50?: number;\n\t\t\t\t/** Maximum latency in seconds at the 75th percentile. */\n\t\t\t\tp75?: number;\n\t\t\t\t/** Maximum latency in seconds at the 90th percentile. */\n\t\t\t\tp90?: number;\n\t\t\t\t/** Maximum latency in seconds at the 99th percentile. */\n\t\t\t\tp99?: number;\n\t\t };\n}\n\n/**\n * Vercel AI Gateway routing preferences.\n * Controls which upstream providers the gateway routes requests to.\n * @see https://vercel.com/docs/ai-gateway/models-and-providers/provider-options\n */\nexport interface VercelGatewayRouting {\n\t/** List of provider slugs to exclusively use for this request (e.g., [\"bedrock\", \"anthropic\"]). */\n\tonly?: string[];\n\t/** List of provider slugs to try in order (e.g., [\"anthropic\", \"openai\"]). */\n\torder?: string[];\n}\n\n// Model interface for the unified model system\nexport interface Model<TApi extends Api> {\n\tid: string;\n\tname: string;\n\tapi: TApi;\n\tprovider: Provider;\n\tbaseUrl: string;\n\treasoning: boolean;\n\tinput: (\"text\" | \"image\")[];\n\tcost: {\n\t\tinput: number; // $/million tokens\n\t\toutput: number; // $/million tokens\n\t\tcacheRead: number; // $/million tokens\n\t\tcacheWrite: number; // $/million tokens\n\t};\n\tcontextWindow: number;\n\tmaxTokens: number;\n\theaders?: Record<string, string>;\n\t/** Compatibility overrides for OpenAI-compatible APIs. If not set, auto-detected from baseUrl. */\n\tcompat?: TApi extends \"openai-completions\"\n\t\t? OpenAICompletionsCompat\n\t\t: TApi extends \"openai-responses\"\n\t\t\t? OpenAIResponsesCompat\n\t\t\t: TApi extends \"anthropic-messages\"\n\t\t\t\t? AnthropicMessagesCompat\n\t\t\t\t: never;\n}\n"]}
|
|
@@ -5,13 +5,9 @@
|
|
|
5
5
|
* for OAuth-based providers:
|
|
6
6
|
* - Anthropic (Claude Pro/Max)
|
|
7
7
|
* - GitHub Copilot
|
|
8
|
-
* - Google Cloud Code Assist (Gemini CLI)
|
|
9
|
-
* - Antigravity (Gemini 3, Claude, GPT-OSS via Google Cloud)
|
|
10
8
|
*/
|
|
11
9
|
export { anthropicOAuthProvider, loginAnthropic, refreshAnthropicToken } from "./anthropic.js";
|
|
12
10
|
export { getGitHubCopilotBaseUrl, githubCopilotOAuthProvider, loginGitHubCopilot, normalizeDomain, refreshGitHubCopilotToken, } from "./github-copilot.js";
|
|
13
|
-
export { antigravityOAuthProvider, loginAntigravity, refreshAntigravityToken } from "./google-antigravity.js";
|
|
14
|
-
export { geminiCliOAuthProvider, loginGeminiCli, refreshGoogleCloudToken } from "./google-gemini-cli.js";
|
|
15
11
|
export { loginOpenAICodex, openaiCodexOAuthProvider, refreshOpenAICodexToken } from "./openai-codex.js";
|
|
16
12
|
export * from "./types.js";
|
|
17
13
|
import type { OAuthCredentials, OAuthProviderId, OAuthProviderInfo, OAuthProviderInterface } from "./types.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/utils/oauth/index.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/utils/oauth/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,OAAO,EAAE,sBAAsB,EAAE,cAAc,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AAE/F,OAAO,EACN,uBAAuB,EACvB,0BAA0B,EAC1B,kBAAkB,EAClB,eAAe,EACf,yBAAyB,GACzB,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EAAE,gBAAgB,EAAE,wBAAwB,EAAE,uBAAuB,EAAE,MAAM,mBAAmB,CAAC;AAExG,cAAc,YAAY,CAAC;AAS3B,OAAO,KAAK,EAAE,gBAAgB,EAAE,eAAe,EAAE,iBAAiB,EAAE,sBAAsB,EAAE,MAAM,YAAY,CAAC;AAY/G;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,EAAE,EAAE,eAAe,GAAG,sBAAsB,GAAG,SAAS,CAExF;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,sBAAsB,GAAG,IAAI,CAE5E;AAED;;;;;GAKG;AACH,wBAAgB,uBAAuB,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI,CAOxD;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,IAAI,CAK1C;AAED;;GAEG;AACH,wBAAgB,iBAAiB,IAAI,sBAAsB,EAAE,CAE5D;AAED;;GAEG;AACH,wBAAgB,wBAAwB,IAAI,iBAAiB,EAAE,CAM9D;AAMD;;;GAGG;AACH,wBAAsB,iBAAiB,CACtC,UAAU,EAAE,eAAe,EAC3B,WAAW,EAAE,gBAAgB,GAC3B,OAAO,CAAC,gBAAgB,CAAC,CAM3B;AAED;;;;;;GAMG;AACH,wBAAsB,cAAc,CACnC,UAAU,EAAE,eAAe,EAC3B,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,gBAAgB,CAAC,GAC3C,OAAO,CAAC;IAAE,cAAc,EAAE,gBAAgB,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAAC,CAsBtE","sourcesContent":["/**\n * OAuth credential management for AI providers.\n *\n * This module handles login, token refresh, and credential storage\n * for OAuth-based providers:\n * - Anthropic (Claude Pro/Max)\n * - GitHub Copilot\n */\n\n// Anthropic\nexport { anthropicOAuthProvider, loginAnthropic, refreshAnthropicToken } from \"./anthropic.js\";\n// GitHub Copilot\nexport {\n\tgetGitHubCopilotBaseUrl,\n\tgithubCopilotOAuthProvider,\n\tloginGitHubCopilot,\n\tnormalizeDomain,\n\trefreshGitHubCopilotToken,\n} from \"./github-copilot.js\";\n// OpenAI Codex (ChatGPT OAuth)\nexport { loginOpenAICodex, openaiCodexOAuthProvider, refreshOpenAICodexToken } from \"./openai-codex.js\";\n\nexport * from \"./types.js\";\n\n// ============================================================================\n// Provider Registry\n// ============================================================================\n\nimport { anthropicOAuthProvider } from \"./anthropic.js\";\nimport { githubCopilotOAuthProvider } from \"./github-copilot.js\";\nimport { openaiCodexOAuthProvider } from \"./openai-codex.js\";\nimport type { OAuthCredentials, OAuthProviderId, OAuthProviderInfo, OAuthProviderInterface } from \"./types.js\";\n\nconst BUILT_IN_OAUTH_PROVIDERS: OAuthProviderInterface[] = [\n\tanthropicOAuthProvider,\n\tgithubCopilotOAuthProvider,\n\topenaiCodexOAuthProvider,\n];\n\nconst oauthProviderRegistry = new Map<string, OAuthProviderInterface>(\n\tBUILT_IN_OAUTH_PROVIDERS.map((provider) => [provider.id, provider]),\n);\n\n/**\n * Get an OAuth provider by ID\n */\nexport function getOAuthProvider(id: OAuthProviderId): OAuthProviderInterface | undefined {\n\treturn oauthProviderRegistry.get(id);\n}\n\n/**\n * Register a custom OAuth provider\n */\nexport function registerOAuthProvider(provider: OAuthProviderInterface): void {\n\toauthProviderRegistry.set(provider.id, provider);\n}\n\n/**\n * Unregister an OAuth provider.\n *\n * If the provider is built-in, restores the built-in implementation.\n * Custom providers are removed completely.\n */\nexport function unregisterOAuthProvider(id: string): void {\n\tconst builtInProvider = BUILT_IN_OAUTH_PROVIDERS.find((provider) => provider.id === id);\n\tif (builtInProvider) {\n\t\toauthProviderRegistry.set(id, builtInProvider);\n\t\treturn;\n\t}\n\toauthProviderRegistry.delete(id);\n}\n\n/**\n * Reset OAuth providers to built-ins.\n */\nexport function resetOAuthProviders(): void {\n\toauthProviderRegistry.clear();\n\tfor (const provider of BUILT_IN_OAUTH_PROVIDERS) {\n\t\toauthProviderRegistry.set(provider.id, provider);\n\t}\n}\n\n/**\n * Get all registered OAuth providers\n */\nexport function getOAuthProviders(): OAuthProviderInterface[] {\n\treturn Array.from(oauthProviderRegistry.values());\n}\n\n/**\n * @deprecated Use getOAuthProviders() which returns OAuthProviderInterface[]\n */\nexport function getOAuthProviderInfoList(): OAuthProviderInfo[] {\n\treturn getOAuthProviders().map((p) => ({\n\t\tid: p.id,\n\t\tname: p.name,\n\t\tavailable: true,\n\t}));\n}\n\n// ============================================================================\n// High-level API (uses provider registry)\n// ============================================================================\n\n/**\n * Refresh token for any OAuth provider.\n * @deprecated Use getOAuthProvider(id).refreshToken() instead\n */\nexport async function refreshOAuthToken(\n\tproviderId: OAuthProviderId,\n\tcredentials: OAuthCredentials,\n): Promise<OAuthCredentials> {\n\tconst provider = getOAuthProvider(providerId);\n\tif (!provider) {\n\t\tthrow new Error(`Unknown OAuth provider: ${providerId}`);\n\t}\n\treturn provider.refreshToken(credentials);\n}\n\n/**\n * Get API key for a provider from OAuth credentials.\n * Automatically refreshes expired tokens.\n *\n * @returns API key string and updated credentials, or null if no credentials\n * @throws Error if refresh fails\n */\nexport async function getOAuthApiKey(\n\tproviderId: OAuthProviderId,\n\tcredentials: Record<string, OAuthCredentials>,\n): Promise<{ newCredentials: OAuthCredentials; apiKey: string } | null> {\n\tconst provider = getOAuthProvider(providerId);\n\tif (!provider) {\n\t\tthrow new Error(`Unknown OAuth provider: ${providerId}`);\n\t}\n\n\tlet creds = credentials[providerId];\n\tif (!creds) {\n\t\treturn null;\n\t}\n\n\t// Refresh if expired\n\tif (Date.now() >= creds.expires) {\n\t\ttry {\n\t\t\tcreds = await provider.refreshToken(creds);\n\t\t} catch (_error) {\n\t\t\tthrow new Error(`Failed to refresh OAuth token for ${providerId}`);\n\t\t}\n\t}\n\n\tconst apiKey = provider.getApiKey(creds);\n\treturn { newCredentials: creds, apiKey };\n}\n"]}
|
|
@@ -5,17 +5,11 @@
|
|
|
5
5
|
* for OAuth-based providers:
|
|
6
6
|
* - Anthropic (Claude Pro/Max)
|
|
7
7
|
* - GitHub Copilot
|
|
8
|
-
* - Google Cloud Code Assist (Gemini CLI)
|
|
9
|
-
* - Antigravity (Gemini 3, Claude, GPT-OSS via Google Cloud)
|
|
10
8
|
*/
|
|
11
9
|
// Anthropic
|
|
12
10
|
export { anthropicOAuthProvider, loginAnthropic, refreshAnthropicToken } from "./anthropic.js";
|
|
13
11
|
// GitHub Copilot
|
|
14
12
|
export { getGitHubCopilotBaseUrl, githubCopilotOAuthProvider, loginGitHubCopilot, normalizeDomain, refreshGitHubCopilotToken, } from "./github-copilot.js";
|
|
15
|
-
// Google Antigravity
|
|
16
|
-
export { antigravityOAuthProvider, loginAntigravity, refreshAntigravityToken } from "./google-antigravity.js";
|
|
17
|
-
// Google Gemini CLI
|
|
18
|
-
export { geminiCliOAuthProvider, loginGeminiCli, refreshGoogleCloudToken } from "./google-gemini-cli.js";
|
|
19
13
|
// OpenAI Codex (ChatGPT OAuth)
|
|
20
14
|
export { loginOpenAICodex, openaiCodexOAuthProvider, refreshOpenAICodexToken } from "./openai-codex.js";
|
|
21
15
|
export * from "./types.js";
|
|
@@ -24,14 +18,10 @@ export * from "./types.js";
|
|
|
24
18
|
// ============================================================================
|
|
25
19
|
import { anthropicOAuthProvider } from "./anthropic.js";
|
|
26
20
|
import { githubCopilotOAuthProvider } from "./github-copilot.js";
|
|
27
|
-
import { antigravityOAuthProvider } from "./google-antigravity.js";
|
|
28
|
-
import { geminiCliOAuthProvider } from "./google-gemini-cli.js";
|
|
29
21
|
import { openaiCodexOAuthProvider } from "./openai-codex.js";
|
|
30
22
|
const BUILT_IN_OAUTH_PROVIDERS = [
|
|
31
23
|
anthropicOAuthProvider,
|
|
32
24
|
githubCopilotOAuthProvider,
|
|
33
|
-
geminiCliOAuthProvider,
|
|
34
|
-
antigravityOAuthProvider,
|
|
35
25
|
openaiCodexOAuthProvider,
|
|
36
26
|
];
|
|
37
27
|
const oauthProviderRegistry = new Map(BUILT_IN_OAUTH_PROVIDERS.map((provider) => [provider.id, provider]));
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/utils/oauth/index.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/utils/oauth/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,YAAY;AACZ,OAAO,EAAE,sBAAsB,EAAE,cAAc,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AAC/F,iBAAiB;AACjB,OAAO,EACN,uBAAuB,EACvB,0BAA0B,EAC1B,kBAAkB,EAClB,eAAe,EACf,yBAAyB,GACzB,MAAM,qBAAqB,CAAC;AAC7B,+BAA+B;AAC/B,OAAO,EAAE,gBAAgB,EAAE,wBAAwB,EAAE,uBAAuB,EAAE,MAAM,mBAAmB,CAAC;AAExG,cAAc,YAAY,CAAC;AAE3B,+EAA+E;AAC/E,oBAAoB;AACpB,+EAA+E;AAE/E,OAAO,EAAE,sBAAsB,EAAE,MAAM,gBAAgB,CAAC;AACxD,OAAO,EAAE,0BAA0B,EAAE,MAAM,qBAAqB,CAAC;AACjE,OAAO,EAAE,wBAAwB,EAAE,MAAM,mBAAmB,CAAC;AAG7D,MAAM,wBAAwB,GAA6B;IAC1D,sBAAsB;IACtB,0BAA0B;IAC1B,wBAAwB;CACxB,CAAC;AAEF,MAAM,qBAAqB,GAAG,IAAI,GAAG,CACpC,wBAAwB,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAC,CACnE,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,EAAmB,EAAsC;IACzF,OAAO,qBAAqB,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;AAAA,CACrC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,QAAgC,EAAQ;IAC7E,qBAAqB,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAC;AAAA,CACjD;AAED;;;;;GAKG;AACH,MAAM,UAAU,uBAAuB,CAAC,EAAU,EAAQ;IACzD,MAAM,eAAe,GAAG,wBAAwB,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;IACxF,IAAI,eAAe,EAAE,CAAC;QACrB,qBAAqB,CAAC,GAAG,CAAC,EAAE,EAAE,eAAe,CAAC,CAAC;QAC/C,OAAO;IACR,CAAC;IACD,qBAAqB,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;AAAA,CACjC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,GAAS;IAC3C,qBAAqB,CAAC,KAAK,EAAE,CAAC;IAC9B,KAAK,MAAM,QAAQ,IAAI,wBAAwB,EAAE,CAAC;QACjD,qBAAqB,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAC;IAClD,CAAC;AAAA,CACD;AAED;;GAEG;AACH,MAAM,UAAU,iBAAiB,GAA6B;IAC7D,OAAO,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,MAAM,EAAE,CAAC,CAAC;AAAA,CAClD;AAED;;GAEG;AACH,MAAM,UAAU,wBAAwB,GAAwB;IAC/D,OAAO,iBAAiB,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACtC,EAAE,EAAE,CAAC,CAAC,EAAE;QACR,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,SAAS,EAAE,IAAI;KACf,CAAC,CAAC,CAAC;AAAA,CACJ;AAED,+EAA+E;AAC/E,0CAA0C;AAC1C,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACtC,UAA2B,EAC3B,WAA6B,EACD;IAC5B,MAAM,QAAQ,GAAG,gBAAgB,CAAC,UAAU,CAAC,CAAC;IAC9C,IAAI,CAAC,QAAQ,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,2BAA2B,UAAU,EAAE,CAAC,CAAC;IAC1D,CAAC;IACD,OAAO,QAAQ,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC;AAAA,CAC1C;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CACnC,UAA2B,EAC3B,WAA6C,EAC0B;IACvE,MAAM,QAAQ,GAAG,gBAAgB,CAAC,UAAU,CAAC,CAAC;IAC9C,IAAI,CAAC,QAAQ,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,2BAA2B,UAAU,EAAE,CAAC,CAAC;IAC1D,CAAC;IAED,IAAI,KAAK,GAAG,WAAW,CAAC,UAAU,CAAC,CAAC;IACpC,IAAI,CAAC,KAAK,EAAE,CAAC;QACZ,OAAO,IAAI,CAAC;IACb,CAAC;IAED,qBAAqB;IACrB,IAAI,IAAI,CAAC,GAAG,EAAE,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;QACjC,IAAI,CAAC;YACJ,KAAK,GAAG,MAAM,QAAQ,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;QAC5C,CAAC;QAAC,OAAO,MAAM,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,qCAAqC,UAAU,EAAE,CAAC,CAAC;QACpE,CAAC;IACF,CAAC;IAED,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;IACzC,OAAO,EAAE,cAAc,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;AAAA,CACzC","sourcesContent":["/**\n * OAuth credential management for AI providers.\n *\n * This module handles login, token refresh, and credential storage\n * for OAuth-based providers:\n * - Anthropic (Claude Pro/Max)\n * - GitHub Copilot\n */\n\n// Anthropic\nexport { anthropicOAuthProvider, loginAnthropic, refreshAnthropicToken } from \"./anthropic.js\";\n// GitHub Copilot\nexport {\n\tgetGitHubCopilotBaseUrl,\n\tgithubCopilotOAuthProvider,\n\tloginGitHubCopilot,\n\tnormalizeDomain,\n\trefreshGitHubCopilotToken,\n} from \"./github-copilot.js\";\n// OpenAI Codex (ChatGPT OAuth)\nexport { loginOpenAICodex, openaiCodexOAuthProvider, refreshOpenAICodexToken } from \"./openai-codex.js\";\n\nexport * from \"./types.js\";\n\n// ============================================================================\n// Provider Registry\n// ============================================================================\n\nimport { anthropicOAuthProvider } from \"./anthropic.js\";\nimport { githubCopilotOAuthProvider } from \"./github-copilot.js\";\nimport { openaiCodexOAuthProvider } from \"./openai-codex.js\";\nimport type { OAuthCredentials, OAuthProviderId, OAuthProviderInfo, OAuthProviderInterface } from \"./types.js\";\n\nconst BUILT_IN_OAUTH_PROVIDERS: OAuthProviderInterface[] = [\n\tanthropicOAuthProvider,\n\tgithubCopilotOAuthProvider,\n\topenaiCodexOAuthProvider,\n];\n\nconst oauthProviderRegistry = new Map<string, OAuthProviderInterface>(\n\tBUILT_IN_OAUTH_PROVIDERS.map((provider) => [provider.id, provider]),\n);\n\n/**\n * Get an OAuth provider by ID\n */\nexport function getOAuthProvider(id: OAuthProviderId): OAuthProviderInterface | undefined {\n\treturn oauthProviderRegistry.get(id);\n}\n\n/**\n * Register a custom OAuth provider\n */\nexport function registerOAuthProvider(provider: OAuthProviderInterface): void {\n\toauthProviderRegistry.set(provider.id, provider);\n}\n\n/**\n * Unregister an OAuth provider.\n *\n * If the provider is built-in, restores the built-in implementation.\n * Custom providers are removed completely.\n */\nexport function unregisterOAuthProvider(id: string): void {\n\tconst builtInProvider = BUILT_IN_OAUTH_PROVIDERS.find((provider) => provider.id === id);\n\tif (builtInProvider) {\n\t\toauthProviderRegistry.set(id, builtInProvider);\n\t\treturn;\n\t}\n\toauthProviderRegistry.delete(id);\n}\n\n/**\n * Reset OAuth providers to built-ins.\n */\nexport function resetOAuthProviders(): void {\n\toauthProviderRegistry.clear();\n\tfor (const provider of BUILT_IN_OAUTH_PROVIDERS) {\n\t\toauthProviderRegistry.set(provider.id, provider);\n\t}\n}\n\n/**\n * Get all registered OAuth providers\n */\nexport function getOAuthProviders(): OAuthProviderInterface[] {\n\treturn Array.from(oauthProviderRegistry.values());\n}\n\n/**\n * @deprecated Use getOAuthProviders() which returns OAuthProviderInterface[]\n */\nexport function getOAuthProviderInfoList(): OAuthProviderInfo[] {\n\treturn getOAuthProviders().map((p) => ({\n\t\tid: p.id,\n\t\tname: p.name,\n\t\tavailable: true,\n\t}));\n}\n\n// ============================================================================\n// High-level API (uses provider registry)\n// ============================================================================\n\n/**\n * Refresh token for any OAuth provider.\n * @deprecated Use getOAuthProvider(id).refreshToken() instead\n */\nexport async function refreshOAuthToken(\n\tproviderId: OAuthProviderId,\n\tcredentials: OAuthCredentials,\n): Promise<OAuthCredentials> {\n\tconst provider = getOAuthProvider(providerId);\n\tif (!provider) {\n\t\tthrow new Error(`Unknown OAuth provider: ${providerId}`);\n\t}\n\treturn provider.refreshToken(credentials);\n}\n\n/**\n * Get API key for a provider from OAuth credentials.\n * Automatically refreshes expired tokens.\n *\n * @returns API key string and updated credentials, or null if no credentials\n * @throws Error if refresh fails\n */\nexport async function getOAuthApiKey(\n\tproviderId: OAuthProviderId,\n\tcredentials: Record<string, OAuthCredentials>,\n): Promise<{ newCredentials: OAuthCredentials; apiKey: string } | null> {\n\tconst provider = getOAuthProvider(providerId);\n\tif (!provider) {\n\t\tthrow new Error(`Unknown OAuth provider: ${providerId}`);\n\t}\n\n\tlet creds = credentials[providerId];\n\tif (!creds) {\n\t\treturn null;\n\t}\n\n\t// Refresh if expired\n\tif (Date.now() >= creds.expires) {\n\t\ttry {\n\t\t\tcreds = await provider.refreshToken(creds);\n\t\t} catch (_error) {\n\t\t\tthrow new Error(`Failed to refresh OAuth token for ${providerId}`);\n\t\t}\n\t}\n\n\tconst apiKey = provider.getApiKey(creds);\n\treturn { newCredentials: creds, apiKey };\n}\n"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mariozechner/pi-ai",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.71.1",
|
|
4
4
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -22,10 +22,6 @@
|
|
|
22
22
|
"types": "./dist/providers/google.d.ts",
|
|
23
23
|
"import": "./dist/providers/google.js"
|
|
24
24
|
},
|
|
25
|
-
"./google-gemini-cli": {
|
|
26
|
-
"types": "./dist/providers/google-gemini-cli.d.ts",
|
|
27
|
-
"import": "./dist/providers/google-gemini-cli.js"
|
|
28
|
-
},
|
|
29
25
|
"./google-vertex": {
|
|
30
26
|
"types": "./dist/providers/google-vertex.d.ts",
|
|
31
27
|
"import": "./dist/providers/google-vertex.js"
|
|
@@ -72,7 +68,7 @@
|
|
|
72
68
|
"prepublishOnly": "npm run clean && npm run build"
|
|
73
69
|
},
|
|
74
70
|
"dependencies": {
|
|
75
|
-
"@anthropic-ai/sdk": "^0.
|
|
71
|
+
"@anthropic-ai/sdk": "^0.91.1",
|
|
76
72
|
"@aws-sdk/client-bedrock-runtime": "^3.1030.0",
|
|
77
73
|
"@google/genai": "^1.40.0",
|
|
78
74
|
"@mistralai/mistralai": "^2.2.0",
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Google Gemini CLI / Antigravity provider.
|
|
3
|
-
* Shared implementation for both google-gemini-cli and google-antigravity providers.
|
|
4
|
-
* Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.
|
|
5
|
-
*/
|
|
6
|
-
import type { Content, ThinkingConfig } from "@google/genai";
|
|
7
|
-
import type { Context, Model, SimpleStreamOptions, StreamFunction, StreamOptions } from "../types.js";
|
|
8
|
-
import { convertTools, mapToolChoice } from "./google-shared.js";
|
|
9
|
-
/**
|
|
10
|
-
* Thinking level for Gemini 3 models.
|
|
11
|
-
* Mirrors Google's ThinkingLevel enum values.
|
|
12
|
-
*/
|
|
13
|
-
export type GoogleThinkingLevel = "THINKING_LEVEL_UNSPECIFIED" | "MINIMAL" | "LOW" | "MEDIUM" | "HIGH";
|
|
14
|
-
export interface GoogleGeminiCliOptions extends StreamOptions {
|
|
15
|
-
toolChoice?: "auto" | "none" | "any";
|
|
16
|
-
/**
|
|
17
|
-
* Thinking/reasoning configuration.
|
|
18
|
-
* - Gemini 2.x models: use `budgetTokens` to set the thinking budget
|
|
19
|
-
* - Gemini 3 models (gemini-3-pro-*, gemini-3-flash-*): use `level` instead
|
|
20
|
-
*
|
|
21
|
-
* When using `streamSimple`, this is handled automatically based on the model.
|
|
22
|
-
*/
|
|
23
|
-
thinking?: {
|
|
24
|
-
enabled: boolean;
|
|
25
|
-
/** Thinking budget in tokens. Use for Gemini 2.x models. */
|
|
26
|
-
budgetTokens?: number;
|
|
27
|
-
/** Thinking level. Use for Gemini 3 models (LOW/HIGH for Pro, MINIMAL/LOW/MEDIUM/HIGH for Flash). */
|
|
28
|
-
level?: GoogleThinkingLevel;
|
|
29
|
-
};
|
|
30
|
-
projectId?: string;
|
|
31
|
-
}
|
|
32
|
-
/**
|
|
33
|
-
* Extract retry delay from Gemini error response (in milliseconds).
|
|
34
|
-
* Checks headers first (Retry-After, x-ratelimit-reset, x-ratelimit-reset-after),
|
|
35
|
-
* then parses body patterns like:
|
|
36
|
-
* - "Your quota will reset after 39s"
|
|
37
|
-
* - "Your quota will reset after 18h31m10s"
|
|
38
|
-
* - "Please retry in Xs" or "Please retry in Xms"
|
|
39
|
-
* - "retryDelay": "34.074824224s" (JSON field)
|
|
40
|
-
*/
|
|
41
|
-
export declare function extractRetryDelay(errorText: string, response?: Response | Headers): number | undefined;
|
|
42
|
-
interface CloudCodeAssistRequest {
|
|
43
|
-
project: string;
|
|
44
|
-
model: string;
|
|
45
|
-
request: {
|
|
46
|
-
contents: Content[];
|
|
47
|
-
sessionId?: string;
|
|
48
|
-
systemInstruction?: {
|
|
49
|
-
role?: string;
|
|
50
|
-
parts: {
|
|
51
|
-
text: string;
|
|
52
|
-
}[];
|
|
53
|
-
};
|
|
54
|
-
generationConfig?: {
|
|
55
|
-
maxOutputTokens?: number;
|
|
56
|
-
temperature?: number;
|
|
57
|
-
thinkingConfig?: ThinkingConfig;
|
|
58
|
-
};
|
|
59
|
-
tools?: ReturnType<typeof convertTools>;
|
|
60
|
-
toolConfig?: {
|
|
61
|
-
functionCallingConfig: {
|
|
62
|
-
mode: ReturnType<typeof mapToolChoice>;
|
|
63
|
-
};
|
|
64
|
-
};
|
|
65
|
-
};
|
|
66
|
-
requestType?: string;
|
|
67
|
-
userAgent?: string;
|
|
68
|
-
requestId?: string;
|
|
69
|
-
}
|
|
70
|
-
export declare const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli", GoogleGeminiCliOptions>;
|
|
71
|
-
export declare const streamSimpleGoogleGeminiCli: StreamFunction<"google-gemini-cli", SimpleStreamOptions>;
|
|
72
|
-
export declare function buildRequest(model: Model<"google-gemini-cli">, context: Context, projectId: string, options?: GoogleGeminiCliOptions, isAntigravity?: boolean): CloudCodeAssistRequest;
|
|
73
|
-
export {};
|
|
74
|
-
//# sourceMappingURL=google-gemini-cli.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"google-gemini-cli.d.ts","sourceRoot":"","sources":["../../src/providers/google-gemini-cli.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,OAAO,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAE7D,OAAO,KAAK,EAGX,OAAO,EACP,KAAK,EACL,mBAAmB,EACnB,cAAc,EACd,aAAa,EAMb,MAAM,aAAa,CAAC;AAIrB,OAAO,EAEN,YAAY,EAGZ,aAAa,EAEb,MAAM,oBAAoB,CAAC;AAG5B;;;GAGG;AACH,MAAM,MAAM,mBAAmB,GAAG,4BAA4B,GAAG,SAAS,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAEvG,MAAM,WAAW,sBAAuB,SAAQ,aAAa;IAC5D,UAAU,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,KAAK,CAAC;IACrC;;;;;;OAMG;IACH,QAAQ,CAAC,EAAE;QACV,OAAO,EAAE,OAAO,CAAC;QACjB,4DAA4D;QAC5D,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,qGAAqG;QACrG,KAAK,CAAC,EAAE,mBAAmB,CAAC;KAC5B,CAAC;IACF,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAgDD;;;;;;;;GAQG;AACH,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,QAAQ,GAAG,OAAO,GAAG,MAAM,GAAG,SAAS,CAyFtG;AA6DD,UAAU,sBAAsB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE;QACR,QAAQ,EAAE,OAAO,EAAE,CAAC;QACpB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,iBAAiB,CAAC,EAAE;YAAE,IAAI,CAAC,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE;gBAAE,IAAI,EAAE,MAAM,CAAA;aAAE,EAAE,CAAA;SAAE,CAAC;QACjE,gBAAgB,CAAC,EAAE;YAClB,eAAe,CAAC,EAAE,MAAM,CAAC;YACzB,WAAW,CAAC,EAAE,MAAM,CAAC;YACrB,cAAc,CAAC,EAAE,cAAc,CAAC;SAChC,CAAC;QACF,KAAK,CAAC,EAAE,UAAU,CAAC,OAAO,YAAY,CAAC,CAAC;QACxC,UAAU,CAAC,EAAE;YACZ,qBAAqB,EAAE;gBACtB,IAAI,EAAE,UAAU,CAAC,OAAO,aAAa,CAAC,CAAC;aACvC,CAAC;SACF,CAAC;KACF,CAAC;IACF,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAiCD,eAAO,MAAM,qBAAqB,EAAE,cAAc,CAAC,mBAAmB,EAAE,sBAAsB,CA8e7F,CAAC;AAEF,eAAO,MAAM,2BAA2B,EAAE,cAAc,CAAC,mBAAmB,EAAE,mBAAmB,CAqDhG,CAAC;AAEF,wBAAgB,YAAY,CAC3B,KAAK,EAAE,KAAK,CAAC,mBAAmB,CAAC,EACjC,OAAO,EAAE,OAAO,EAChB,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,sBAA2B,EACpC,aAAa,UAAQ,GACnB,sBAAsB,CA8ExB","sourcesContent":["/**\n * Google Gemini CLI / Antigravity provider.\n * Shared implementation for both google-gemini-cli and google-antigravity providers.\n * Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.\n */\n\nimport type { Content, ThinkingConfig } from \"@google/genai\";\nimport { calculateCost } from \"../models.js\";\nimport type {\n\tApi,\n\tAssistantMessage,\n\tContext,\n\tModel,\n\tSimpleStreamOptions,\n\tStreamFunction,\n\tStreamOptions,\n\tTextContent,\n\tThinkingBudgets,\n\tThinkingContent,\n\tThinkingLevel,\n\tToolCall,\n} from \"../types.js\";\nimport { AssistantMessageEventStream } from \"../utils/event-stream.js\";\nimport { headersToRecord } from \"../utils/headers.js\";\nimport { sanitizeSurrogates } from \"../utils/sanitize-unicode.js\";\nimport {\n\tconvertMessages,\n\tconvertTools,\n\tisThinkingPart,\n\tmapStopReasonString,\n\tmapToolChoice,\n\tretainThoughtSignature,\n} from \"./google-shared.js\";\nimport { buildBaseOptions, clampReasoning } from \"./simple-options.js\";\n\n/**\n * Thinking level for Gemini 3 models.\n * Mirrors Google's ThinkingLevel enum values.\n */\nexport type GoogleThinkingLevel = \"THINKING_LEVEL_UNSPECIFIED\" | \"MINIMAL\" | \"LOW\" | \"MEDIUM\" | \"HIGH\";\n\nexport interface GoogleGeminiCliOptions extends StreamOptions {\n\ttoolChoice?: \"auto\" | \"none\" | \"any\";\n\t/**\n\t * Thinking/reasoning configuration.\n\t * - Gemini 2.x models: use `budgetTokens` to set the thinking budget\n\t * - Gemini 3 models (gemini-3-pro-*, gemini-3-flash-*): use `level` instead\n\t *\n\t * When using `streamSimple`, this is handled automatically based on the model.\n\t */\n\tthinking?: {\n\t\tenabled: boolean;\n\t\t/** Thinking budget in tokens. Use for Gemini 2.x models. */\n\t\tbudgetTokens?: number;\n\t\t/** Thinking level. Use for Gemini 3 models (LOW/HIGH for Pro, MINIMAL/LOW/MEDIUM/HIGH for Flash). */\n\t\tlevel?: GoogleThinkingLevel;\n\t};\n\tprojectId?: string;\n}\n\nconst DEFAULT_ENDPOINT = \"https://cloudcode-pa.googleapis.com\";\nconst ANTIGRAVITY_DAILY_ENDPOINT = \"https://daily-cloudcode-pa.sandbox.googleapis.com\";\nconst ANTIGRAVITY_AUTOPUSH_ENDPOINT = \"https://autopush-cloudcode-pa.sandbox.googleapis.com\";\nconst ANTIGRAVITY_ENDPOINT_FALLBACKS = [\n\tANTIGRAVITY_DAILY_ENDPOINT,\n\tANTIGRAVITY_AUTOPUSH_ENDPOINT,\n\tDEFAULT_ENDPOINT,\n] as const;\n// Headers for Gemini CLI (prod endpoint)\nconst GEMINI_CLI_HEADERS = {\n\t\"User-Agent\": \"google-cloud-sdk vscode_cloudshelleditor/0.1\",\n\t\"X-Goog-Api-Client\": \"gl-node/22.17.0\",\n\t\"Client-Metadata\": JSON.stringify({\n\t\tideType: \"IDE_UNSPECIFIED\",\n\t\tplatform: \"PLATFORM_UNSPECIFIED\",\n\t\tpluginType: \"GEMINI\",\n\t}),\n};\n\n// Headers for Antigravity (sandbox endpoint) - requires specific User-Agent\nconst DEFAULT_ANTIGRAVITY_VERSION = \"1.21.9\";\n\nfunction getAntigravityHeaders() {\n\tconst version = process.env.PI_AI_ANTIGRAVITY_VERSION || DEFAULT_ANTIGRAVITY_VERSION;\n\treturn {\n\t\t\"User-Agent\": `antigravity/${version} darwin/arm64`,\n\t};\n}\n\n// Antigravity system instruction (compact version from CLIProxyAPI).\nconst ANTIGRAVITY_SYSTEM_INSTRUCTION =\n\t\"You are Antigravity, a powerful agentic AI coding assistant designed by the Google Deepmind team working on Advanced Agentic Coding.\" +\n\t\"You are pair programming with a USER to solve their coding task. The task may require creating a new codebase, modifying or debugging an existing codebase, or simply answering a question.\" +\n\t\"**Absolute paths only**\" +\n\t\"**Proactiveness**\";\n\n// Counter for generating unique tool call IDs\nlet toolCallCounter = 0;\n\n// Retry configuration\nconst MAX_RETRIES = 3;\nconst BASE_DELAY_MS = 1000;\nconst MAX_EMPTY_STREAM_RETRIES = 2;\nconst EMPTY_STREAM_BASE_DELAY_MS = 500;\nconst CLAUDE_THINKING_BETA_HEADER = \"interleaved-thinking-2025-05-14\";\n\n/**\n * Extract retry delay from Gemini error response (in milliseconds).\n * Checks headers first (Retry-After, x-ratelimit-reset, x-ratelimit-reset-after),\n * then parses body patterns like:\n * - \"Your quota will reset after 39s\"\n * - \"Your quota will reset after 18h31m10s\"\n * - \"Please retry in Xs\" or \"Please retry in Xms\"\n * - \"retryDelay\": \"34.074824224s\" (JSON field)\n */\nexport function extractRetryDelay(errorText: string, response?: Response | Headers): number | undefined {\n\tconst normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined);\n\n\tconst headers = response instanceof Headers ? response : response?.headers;\n\tif (headers) {\n\t\tconst retryAfter = headers.get(\"retry-after\");\n\t\tif (retryAfter) {\n\t\t\tconst retryAfterSeconds = Number(retryAfter);\n\t\t\tif (Number.isFinite(retryAfterSeconds)) {\n\t\t\t\tconst delay = normalizeDelay(retryAfterSeconds * 1000);\n\t\t\t\tif (delay !== undefined) {\n\t\t\t\t\treturn delay;\n\t\t\t\t}\n\t\t\t}\n\t\t\tconst retryAfterDate = new Date(retryAfter);\n\t\t\tconst retryAfterMs = retryAfterDate.getTime();\n\t\t\tif (!Number.isNaN(retryAfterMs)) {\n\t\t\t\tconst delay = normalizeDelay(retryAfterMs - Date.now());\n\t\t\t\tif (delay !== undefined) {\n\t\t\t\t\treturn delay;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tconst rateLimitReset = headers.get(\"x-ratelimit-reset\");\n\t\tif (rateLimitReset) {\n\t\t\tconst resetSeconds = Number.parseInt(rateLimitReset, 10);\n\t\t\tif (!Number.isNaN(resetSeconds)) {\n\t\t\t\tconst delay = normalizeDelay(resetSeconds * 1000 - Date.now());\n\t\t\t\tif (delay !== undefined) {\n\t\t\t\t\treturn delay;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tconst rateLimitResetAfter = headers.get(\"x-ratelimit-reset-after\");\n\t\tif (rateLimitResetAfter) {\n\t\t\tconst resetAfterSeconds = Number(rateLimitResetAfter);\n\t\t\tif (Number.isFinite(resetAfterSeconds)) {\n\t\t\t\tconst delay = normalizeDelay(resetAfterSeconds * 1000);\n\t\t\t\tif (delay !== undefined) {\n\t\t\t\t\treturn delay;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\t// Pattern 1: \"Your quota will reset after ...\" (formats: \"18h31m10s\", \"10m15s\", \"6s\", \"39s\")\n\tconst durationMatch = errorText.match(/reset after (?:(\\d+)h)?(?:(\\d+)m)?(\\d+(?:\\.\\d+)?)s/i);\n\tif (durationMatch) {\n\t\tconst hours = durationMatch[1] ? parseInt(durationMatch[1], 10) : 0;\n\t\tconst minutes = durationMatch[2] ? parseInt(durationMatch[2], 10) : 0;\n\t\tconst seconds = parseFloat(durationMatch[3]);\n\t\tif (!Number.isNaN(seconds)) {\n\t\t\tconst totalMs = ((hours * 60 + minutes) * 60 + seconds) * 1000;\n\t\t\tconst delay = normalizeDelay(totalMs);\n\t\t\tif (delay !== undefined) {\n\t\t\t\treturn delay;\n\t\t\t}\n\t\t}\n\t}\n\n\t// Pattern 2: \"Please retry in X[ms|s]\"\n\tconst retryInMatch = errorText.match(/Please retry in ([0-9.]+)(ms|s)/i);\n\tif (retryInMatch?.[1]) {\n\t\tconst value = parseFloat(retryInMatch[1]);\n\t\tif (!Number.isNaN(value) && value > 0) {\n\t\t\tconst ms = retryInMatch[2].toLowerCase() === \"ms\" ? value : value * 1000;\n\t\t\tconst delay = normalizeDelay(ms);\n\t\t\tif (delay !== undefined) {\n\t\t\t\treturn delay;\n\t\t\t}\n\t\t}\n\t}\n\n\t// Pattern 3: \"retryDelay\": \"34.074824224s\" (JSON field in error details)\n\tconst retryDelayMatch = errorText.match(/\"retryDelay\":\\s*\"([0-9.]+)(ms|s)\"/i);\n\tif (retryDelayMatch?.[1]) {\n\t\tconst value = parseFloat(retryDelayMatch[1]);\n\t\tif (!Number.isNaN(value) && value > 0) {\n\t\t\tconst ms = retryDelayMatch[2].toLowerCase() === \"ms\" ? value : value * 1000;\n\t\t\tconst delay = normalizeDelay(ms);\n\t\t\tif (delay !== undefined) {\n\t\t\t\treturn delay;\n\t\t\t}\n\t\t}\n\t}\n\n\treturn undefined;\n}\n\nfunction needsClaudeThinkingBetaHeader(model: Model<\"google-gemini-cli\">): boolean {\n\treturn model.provider === \"google-antigravity\" && model.id.startsWith(\"claude-\") && model.reasoning;\n}\n\nfunction isGemini3ProModel(modelId: string): boolean {\n\treturn /gemini-3(?:\\.1)?-pro/.test(modelId.toLowerCase());\n}\n\nfunction isGemini3FlashModel(modelId: string): boolean {\n\treturn /gemini-3(?:\\.1)?-flash/.test(modelId.toLowerCase());\n}\n\nfunction isGemini3Model(modelId: string): boolean {\n\treturn isGemini3ProModel(modelId) || isGemini3FlashModel(modelId);\n}\n\n/**\n * Check if an error is retryable (rate limit, server error, network error, etc.)\n */\nfunction isRetryableError(status: number, errorText: string): boolean {\n\tif (status === 429 || status === 500 || status === 502 || status === 503 || status === 504) {\n\t\treturn true;\n\t}\n\treturn /resource.?exhausted|rate.?limit|overloaded|service.?unavailable|other.?side.?closed/i.test(errorText);\n}\n\n/**\n * Extract a clean, user-friendly error message from Google API error response.\n * Parses JSON error responses and returns just the message field.\n */\nfunction extractErrorMessage(errorText: string): string {\n\ttry {\n\t\tconst parsed = JSON.parse(errorText) as { error?: { message?: string } };\n\t\tif (parsed.error?.message) {\n\t\t\treturn parsed.error.message;\n\t\t}\n\t} catch {\n\t\t// Not JSON, return as-is\n\t}\n\treturn errorText;\n}\n\n/**\n * Sleep for a given number of milliseconds, respecting abort signal.\n */\nfunction sleep(ms: number, signal?: AbortSignal): Promise<void> {\n\treturn new Promise((resolve, reject) => {\n\t\tif (signal?.aborted) {\n\t\t\treject(new Error(\"Request was aborted\"));\n\t\t\treturn;\n\t\t}\n\t\tconst timeout = setTimeout(resolve, ms);\n\t\tsignal?.addEventListener(\"abort\", () => {\n\t\t\tclearTimeout(timeout);\n\t\t\treject(new Error(\"Request was aborted\"));\n\t\t});\n\t});\n}\n\ninterface CloudCodeAssistRequest {\n\tproject: string;\n\tmodel: string;\n\trequest: {\n\t\tcontents: Content[];\n\t\tsessionId?: string;\n\t\tsystemInstruction?: { role?: string; parts: { text: string }[] };\n\t\tgenerationConfig?: {\n\t\t\tmaxOutputTokens?: number;\n\t\t\ttemperature?: number;\n\t\t\tthinkingConfig?: ThinkingConfig;\n\t\t};\n\t\ttools?: ReturnType<typeof convertTools>;\n\t\ttoolConfig?: {\n\t\t\tfunctionCallingConfig: {\n\t\t\t\tmode: ReturnType<typeof mapToolChoice>;\n\t\t\t};\n\t\t};\n\t};\n\trequestType?: string;\n\tuserAgent?: string;\n\trequestId?: string;\n}\n\ninterface CloudCodeAssistResponseChunk {\n\tresponse?: {\n\t\tcandidates?: Array<{\n\t\t\tcontent?: {\n\t\t\t\trole: string;\n\t\t\t\tparts?: Array<{\n\t\t\t\t\ttext?: string;\n\t\t\t\t\tthought?: boolean;\n\t\t\t\t\tthoughtSignature?: string;\n\t\t\t\t\tfunctionCall?: {\n\t\t\t\t\t\tname: string;\n\t\t\t\t\t\targs: Record<string, unknown>;\n\t\t\t\t\t\tid?: string;\n\t\t\t\t\t};\n\t\t\t\t}>;\n\t\t\t};\n\t\t\tfinishReason?: string;\n\t\t}>;\n\t\tusageMetadata?: {\n\t\t\tpromptTokenCount?: number;\n\t\t\tcandidatesTokenCount?: number;\n\t\t\tthoughtsTokenCount?: number;\n\t\t\ttotalTokenCount?: number;\n\t\t\tcachedContentTokenCount?: number;\n\t\t};\n\t\tmodelVersion?: string;\n\t\tresponseId?: string;\n\t};\n\ttraceId?: string;\n}\n\nexport const streamGoogleGeminiCli: StreamFunction<\"google-gemini-cli\", GoogleGeminiCliOptions> = (\n\tmodel: Model<\"google-gemini-cli\">,\n\tcontext: Context,\n\toptions?: GoogleGeminiCliOptions,\n): AssistantMessageEventStream => {\n\tconst stream = new AssistantMessageEventStream();\n\n\t(async () => {\n\t\tconst output: AssistantMessage = {\n\t\t\trole: \"assistant\",\n\t\t\tcontent: [],\n\t\t\tapi: \"google-gemini-cli\" as Api,\n\t\t\tprovider: model.provider,\n\t\t\tmodel: model.id,\n\t\t\tusage: {\n\t\t\t\tinput: 0,\n\t\t\t\toutput: 0,\n\t\t\t\tcacheRead: 0,\n\t\t\t\tcacheWrite: 0,\n\t\t\t\ttotalTokens: 0,\n\t\t\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t\t\t},\n\t\t\tstopReason: \"stop\",\n\t\t\ttimestamp: Date.now(),\n\t\t};\n\n\t\ttry {\n\t\t\t// apiKey is JSON-encoded: { token, projectId }\n\t\t\tconst apiKeyRaw = options?.apiKey;\n\t\t\tif (!apiKeyRaw) {\n\t\t\t\tthrow new Error(\"Google Cloud Code Assist requires OAuth authentication. Use /login to authenticate.\");\n\t\t\t}\n\n\t\t\tlet accessToken: string;\n\t\t\tlet projectId: string;\n\n\t\t\ttry {\n\t\t\t\tconst parsed = JSON.parse(apiKeyRaw) as { token: string; projectId: string };\n\t\t\t\taccessToken = parsed.token;\n\t\t\t\tprojectId = parsed.projectId;\n\t\t\t} catch {\n\t\t\t\tthrow new Error(\"Invalid Google Cloud Code Assist credentials. Use /login to re-authenticate.\");\n\t\t\t}\n\n\t\t\tif (!accessToken || !projectId) {\n\t\t\t\tthrow new Error(\"Missing token or projectId in Google Cloud credentials. Use /login to re-authenticate.\");\n\t\t\t}\n\n\t\t\tconst isAntigravity = model.provider === \"google-antigravity\";\n\t\t\tconst baseUrl = model.baseUrl?.trim();\n\t\t\tconst endpoints = baseUrl ? [baseUrl] : isAntigravity ? ANTIGRAVITY_ENDPOINT_FALLBACKS : [DEFAULT_ENDPOINT];\n\n\t\t\tlet requestBody = buildRequest(model, context, projectId, options, isAntigravity);\n\t\t\tconst nextRequestBody = await options?.onPayload?.(requestBody, model);\n\t\t\tif (nextRequestBody !== undefined) {\n\t\t\t\trequestBody = nextRequestBody as CloudCodeAssistRequest;\n\t\t\t}\n\t\t\tconst headers = isAntigravity ? getAntigravityHeaders() : GEMINI_CLI_HEADERS;\n\n\t\t\tconst requestHeaders = {\n\t\t\t\tAuthorization: `Bearer ${accessToken}`,\n\t\t\t\t\"Content-Type\": \"application/json\",\n\t\t\t\tAccept: \"text/event-stream\",\n\t\t\t\t...headers,\n\t\t\t\t...(needsClaudeThinkingBetaHeader(model) ? { \"anthropic-beta\": CLAUDE_THINKING_BETA_HEADER } : {}),\n\t\t\t\t...options?.headers,\n\t\t\t};\n\t\t\tconst requestBodyJson = JSON.stringify(requestBody);\n\n\t\t\t// Fetch with retry logic for rate limits, transient errors, and endpoint fallbacks.\n\t\t\t// On 403/404, immediately try the next endpoint (no delay).\n\t\t\t// On 429/5xx, retry with backoff on the same or next endpoint.\n\t\t\tlet response: Response | undefined;\n\t\t\tlet lastError: Error | undefined;\n\t\t\tlet requestUrl: string | undefined;\n\t\t\tlet endpointIndex = 0;\n\n\t\t\tfor (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {\n\t\t\t\tif (options?.signal?.aborted) {\n\t\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t\t}\n\n\t\t\t\ttry {\n\t\t\t\t\tconst endpoint = endpoints[endpointIndex];\n\t\t\t\t\trequestUrl = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;\n\t\t\t\t\tresponse = await fetch(requestUrl, {\n\t\t\t\t\t\tmethod: \"POST\",\n\t\t\t\t\t\theaders: requestHeaders,\n\t\t\t\t\t\tbody: requestBodyJson,\n\t\t\t\t\t\tsignal: options?.signal,\n\t\t\t\t\t});\n\t\t\t\t\tawait options?.onResponse?.(\n\t\t\t\t\t\t{ status: response.status, headers: headersToRecord(response.headers) },\n\t\t\t\t\t\tmodel,\n\t\t\t\t\t);\n\n\t\t\t\t\tif (response.ok) {\n\t\t\t\t\t\tbreak; // Success, exit retry loop\n\t\t\t\t\t}\n\n\t\t\t\t\tconst errorText = await response.text();\n\n\t\t\t\t\t// On 403/404, cascade to the next endpoint immediately (no delay)\n\t\t\t\t\tif ((response.status === 403 || response.status === 404) && endpointIndex < endpoints.length - 1) {\n\t\t\t\t\t\tendpointIndex++;\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\n\t\t\t\t\t// Check if retryable (429, 5xx, network patterns)\n\t\t\t\t\tif (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {\n\t\t\t\t\t\t// Advance endpoint if possible\n\t\t\t\t\t\tif (endpointIndex < endpoints.length - 1) {\n\t\t\t\t\t\t\tendpointIndex++;\n\t\t\t\t\t\t}\n\n\t\t\t\t\t\t// Use server-provided delay or exponential backoff\n\t\t\t\t\t\tconst serverDelay = extractRetryDelay(errorText, response);\n\t\t\t\t\t\tconst delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;\n\n\t\t\t\t\t\t// Check if server delay exceeds max allowed (default: 60s)\n\t\t\t\t\t\tconst maxDelayMs = options?.maxRetryDelayMs ?? 60000;\n\t\t\t\t\t\tif (maxDelayMs > 0 && serverDelay && serverDelay > maxDelayMs) {\n\t\t\t\t\t\t\tconst delaySeconds = Math.ceil(serverDelay / 1000);\n\t\t\t\t\t\t\tthrow new Error(\n\t\t\t\t\t\t\t\t`Server requested ${delaySeconds}s retry delay (max: ${Math.ceil(maxDelayMs / 1000)}s). ${extractErrorMessage(errorText)}`,\n\t\t\t\t\t\t\t);\n\t\t\t\t\t\t}\n\n\t\t\t\t\t\tawait sleep(delayMs, options?.signal);\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\n\t\t\t\t\t// Not retryable or max retries exceeded\n\t\t\t\t\tthrow new Error(`Cloud Code Assist API error (${response.status}): ${extractErrorMessage(errorText)}`);\n\t\t\t\t} catch (error) {\n\t\t\t\t\t// Check for abort - fetch throws AbortError, our code throws \"Request was aborted\"\n\t\t\t\t\tif (error instanceof Error) {\n\t\t\t\t\t\tif (error.name === \"AbortError\" || error.message === \"Request was aborted\") {\n\t\t\t\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t\t// Extract detailed error message from fetch errors (Node includes cause)\n\t\t\t\t\tlastError = error instanceof Error ? error : new Error(String(error));\n\t\t\t\t\tif (lastError.message === \"fetch failed\" && lastError.cause instanceof Error) {\n\t\t\t\t\t\tlastError = new Error(`Network error: ${lastError.cause.message}`);\n\t\t\t\t\t}\n\t\t\t\t\t// Network errors are retryable\n\t\t\t\t\tif (attempt < MAX_RETRIES) {\n\t\t\t\t\t\tconst delayMs = BASE_DELAY_MS * 2 ** attempt;\n\t\t\t\t\t\tawait sleep(delayMs, options?.signal);\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\t\t\t\t\tthrow lastError;\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif (!response || !response.ok) {\n\t\t\t\tthrow lastError ?? new Error(\"Failed to get response after retries\");\n\t\t\t}\n\n\t\t\tlet started = false;\n\t\t\tconst ensureStarted = () => {\n\t\t\t\tif (!started) {\n\t\t\t\t\tstream.push({ type: \"start\", partial: output });\n\t\t\t\t\tstarted = true;\n\t\t\t\t}\n\t\t\t};\n\n\t\t\tconst resetOutput = () => {\n\t\t\t\toutput.content = [];\n\t\t\t\toutput.usage = {\n\t\t\t\t\tinput: 0,\n\t\t\t\t\toutput: 0,\n\t\t\t\t\tcacheRead: 0,\n\t\t\t\t\tcacheWrite: 0,\n\t\t\t\t\ttotalTokens: 0,\n\t\t\t\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t\t\t\t};\n\t\t\t\toutput.stopReason = \"stop\";\n\t\t\t\toutput.errorMessage = undefined;\n\t\t\t\toutput.timestamp = Date.now();\n\t\t\t\tstarted = false;\n\t\t\t};\n\n\t\t\tconst streamResponse = async (activeResponse: Response): Promise<boolean> => {\n\t\t\t\tif (!activeResponse.body) {\n\t\t\t\t\tthrow new Error(\"No response body\");\n\t\t\t\t}\n\n\t\t\t\tlet hasContent = false;\n\t\t\t\tlet currentBlock: TextContent | ThinkingContent | null = null;\n\t\t\t\tconst blocks = output.content;\n\t\t\t\tconst blockIndex = () => blocks.length - 1;\n\n\t\t\t\t// Read SSE stream\n\t\t\t\tconst reader = activeResponse.body.getReader();\n\t\t\t\tconst decoder = new TextDecoder();\n\t\t\t\tlet buffer = \"\";\n\n\t\t\t\t// Set up abort handler to cancel reader when signal fires\n\t\t\t\tconst abortHandler = () => {\n\t\t\t\t\tvoid reader.cancel().catch(() => {});\n\t\t\t\t};\n\t\t\t\toptions?.signal?.addEventListener(\"abort\", abortHandler);\n\n\t\t\t\ttry {\n\t\t\t\t\twhile (true) {\n\t\t\t\t\t\t// Check abort signal before each read\n\t\t\t\t\t\tif (options?.signal?.aborted) {\n\t\t\t\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t\t\t\t}\n\n\t\t\t\t\t\tconst { done, value } = await reader.read();\n\t\t\t\t\t\tif (done) break;\n\n\t\t\t\t\t\tbuffer += decoder.decode(value, { stream: true });\n\t\t\t\t\t\tconst lines = buffer.split(\"\\n\");\n\t\t\t\t\t\tbuffer = lines.pop() || \"\";\n\n\t\t\t\t\t\tfor (const line of lines) {\n\t\t\t\t\t\t\tif (!line.startsWith(\"data:\")) continue;\n\n\t\t\t\t\t\t\tconst jsonStr = line.slice(5).trim();\n\t\t\t\t\t\t\tif (!jsonStr) continue;\n\n\t\t\t\t\t\t\tlet chunk: CloudCodeAssistResponseChunk;\n\t\t\t\t\t\t\ttry {\n\t\t\t\t\t\t\t\tchunk = JSON.parse(jsonStr);\n\t\t\t\t\t\t\t} catch {\n\t\t\t\t\t\t\t\tcontinue;\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\t// Unwrap the response\n\t\t\t\t\t\t\tconst responseData = chunk.response;\n\t\t\t\t\t\t\tif (!responseData) continue;\n\t\t\t\t\t\t\t// Cloud Code Assist mirrors Gemini's responseId field. Keep the first non-empty one.\n\t\t\t\t\t\t\t// A single streamed response should retain the same ID across chunks.\n\t\t\t\t\t\t\toutput.responseId ||= responseData.responseId;\n\n\t\t\t\t\t\t\tconst candidate = responseData.candidates?.[0];\n\t\t\t\t\t\t\tif (candidate?.content?.parts) {\n\t\t\t\t\t\t\t\tfor (const part of candidate.content.parts) {\n\t\t\t\t\t\t\t\t\tif (part.text !== undefined) {\n\t\t\t\t\t\t\t\t\t\thasContent = true;\n\t\t\t\t\t\t\t\t\t\tconst isThinking = isThinkingPart(part);\n\t\t\t\t\t\t\t\t\t\tif (\n\t\t\t\t\t\t\t\t\t\t\t!currentBlock ||\n\t\t\t\t\t\t\t\t\t\t\t(isThinking && currentBlock.type !== \"thinking\") ||\n\t\t\t\t\t\t\t\t\t\t\t(!isThinking && currentBlock.type !== \"text\")\n\t\t\t\t\t\t\t\t\t\t) {\n\t\t\t\t\t\t\t\t\t\t\tif (currentBlock) {\n\t\t\t\t\t\t\t\t\t\t\t\tif (currentBlock.type === \"text\") {\n\t\t\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\t\t\ttype: \"text_end\",\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blocks.length - 1,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tcontent: currentBlock.text,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\t\t\ttype: \"thinking_end\",\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tcontent: currentBlock.thinking,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t\t\t\tif (isThinking) {\n\t\t\t\t\t\t\t\t\t\t\t\tcurrentBlock = { type: \"thinking\", thinking: \"\", thinkingSignature: undefined };\n\t\t\t\t\t\t\t\t\t\t\t\toutput.content.push(currentBlock);\n\t\t\t\t\t\t\t\t\t\t\t\tensureStarted();\n\t\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\t\ttype: \"thinking_start\",\n\t\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\t\t\t\t\tcurrentBlock = { type: \"text\", text: \"\" };\n\t\t\t\t\t\t\t\t\t\t\t\toutput.content.push(currentBlock);\n\t\t\t\t\t\t\t\t\t\t\t\tensureStarted();\n\t\t\t\t\t\t\t\t\t\t\t\tstream.push({ type: \"text_start\", contentIndex: blockIndex(), partial: output });\n\t\t\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t\t\tif (currentBlock.type === \"thinking\") {\n\t\t\t\t\t\t\t\t\t\t\tcurrentBlock.thinking += part.text;\n\t\t\t\t\t\t\t\t\t\t\tcurrentBlock.thinkingSignature = retainThoughtSignature(\n\t\t\t\t\t\t\t\t\t\t\t\tcurrentBlock.thinkingSignature,\n\t\t\t\t\t\t\t\t\t\t\t\tpart.thoughtSignature,\n\t\t\t\t\t\t\t\t\t\t\t);\n\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\ttype: \"thinking_delta\",\n\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\t\tdelta: part.text,\n\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\t\t\t\tcurrentBlock.text += part.text;\n\t\t\t\t\t\t\t\t\t\t\tcurrentBlock.textSignature = retainThoughtSignature(\n\t\t\t\t\t\t\t\t\t\t\t\tcurrentBlock.textSignature,\n\t\t\t\t\t\t\t\t\t\t\t\tpart.thoughtSignature,\n\t\t\t\t\t\t\t\t\t\t\t);\n\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\ttype: \"text_delta\",\n\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\t\tdelta: part.text,\n\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\t\t\tif (part.functionCall) {\n\t\t\t\t\t\t\t\t\t\thasContent = true;\n\t\t\t\t\t\t\t\t\t\tif (currentBlock) {\n\t\t\t\t\t\t\t\t\t\t\tif (currentBlock.type === \"text\") {\n\t\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\t\ttype: \"text_end\",\n\t\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\t\t\tcontent: currentBlock.text,\n\t\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\t\ttype: \"thinking_end\",\n\t\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\t\t\tcontent: currentBlock.thinking,\n\t\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t\t\t\tcurrentBlock = null;\n\t\t\t\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\t\t\t\tconst providedId = part.functionCall.id;\n\t\t\t\t\t\t\t\t\t\tconst needsNewId =\n\t\t\t\t\t\t\t\t\t\t\t!providedId ||\n\t\t\t\t\t\t\t\t\t\t\toutput.content.some((b) => b.type === \"toolCall\" && b.id === providedId);\n\t\t\t\t\t\t\t\t\t\tconst toolCallId = needsNewId\n\t\t\t\t\t\t\t\t\t\t\t? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`\n\t\t\t\t\t\t\t\t\t\t\t: providedId;\n\n\t\t\t\t\t\t\t\t\t\tconst toolCall: ToolCall = {\n\t\t\t\t\t\t\t\t\t\t\ttype: \"toolCall\",\n\t\t\t\t\t\t\t\t\t\t\tid: toolCallId,\n\t\t\t\t\t\t\t\t\t\t\tname: part.functionCall.name || \"\",\n\t\t\t\t\t\t\t\t\t\t\targuments: (part.functionCall.args as Record<string, unknown>) ?? {},\n\t\t\t\t\t\t\t\t\t\t\t...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),\n\t\t\t\t\t\t\t\t\t\t};\n\n\t\t\t\t\t\t\t\t\t\toutput.content.push(toolCall);\n\t\t\t\t\t\t\t\t\t\tensureStarted();\n\t\t\t\t\t\t\t\t\t\tstream.push({ type: \"toolcall_start\", contentIndex: blockIndex(), partial: output });\n\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\ttype: \"toolcall_delta\",\n\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\tdelta: JSON.stringify(toolCall.arguments),\n\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\ttype: \"toolcall_end\",\n\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\ttoolCall,\n\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\tif (candidate?.finishReason) {\n\t\t\t\t\t\t\t\toutput.stopReason = mapStopReasonString(candidate.finishReason);\n\t\t\t\t\t\t\t\tif (output.content.some((b) => b.type === \"toolCall\")) {\n\t\t\t\t\t\t\t\t\toutput.stopReason = \"toolUse\";\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\tif (responseData.usageMetadata) {\n\t\t\t\t\t\t\t\t// promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input\n\t\t\t\t\t\t\t\tconst promptTokens = responseData.usageMetadata.promptTokenCount || 0;\n\t\t\t\t\t\t\t\tconst cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;\n\t\t\t\t\t\t\t\toutput.usage = {\n\t\t\t\t\t\t\t\t\tinput: promptTokens - cacheReadTokens,\n\t\t\t\t\t\t\t\t\toutput:\n\t\t\t\t\t\t\t\t\t\t(responseData.usageMetadata.candidatesTokenCount || 0) +\n\t\t\t\t\t\t\t\t\t\t(responseData.usageMetadata.thoughtsTokenCount || 0),\n\t\t\t\t\t\t\t\t\tcacheRead: cacheReadTokens,\n\t\t\t\t\t\t\t\t\tcacheWrite: 0,\n\t\t\t\t\t\t\t\t\ttotalTokens: responseData.usageMetadata.totalTokenCount || 0,\n\t\t\t\t\t\t\t\t\tcost: {\n\t\t\t\t\t\t\t\t\t\tinput: 0,\n\t\t\t\t\t\t\t\t\t\toutput: 0,\n\t\t\t\t\t\t\t\t\t\tcacheRead: 0,\n\t\t\t\t\t\t\t\t\t\tcacheWrite: 0,\n\t\t\t\t\t\t\t\t\t\ttotal: 0,\n\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t};\n\t\t\t\t\t\t\t\tcalculateCost(model, output.usage);\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t} finally {\n\t\t\t\t\toptions?.signal?.removeEventListener(\"abort\", abortHandler);\n\t\t\t\t}\n\n\t\t\t\tif (currentBlock) {\n\t\t\t\t\tif (currentBlock.type === \"text\") {\n\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\ttype: \"text_end\",\n\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\tcontent: currentBlock.text,\n\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t});\n\t\t\t\t\t} else {\n\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\ttype: \"thinking_end\",\n\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\tcontent: currentBlock.thinking,\n\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t});\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\treturn hasContent;\n\t\t\t};\n\n\t\t\tlet receivedContent = false;\n\t\t\tlet currentResponse = response;\n\n\t\t\tfor (let emptyAttempt = 0; emptyAttempt <= MAX_EMPTY_STREAM_RETRIES; emptyAttempt++) {\n\t\t\t\tif (options?.signal?.aborted) {\n\t\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t\t}\n\n\t\t\t\tif (emptyAttempt > 0) {\n\t\t\t\t\tconst backoffMs = EMPTY_STREAM_BASE_DELAY_MS * 2 ** (emptyAttempt - 1);\n\t\t\t\t\tawait sleep(backoffMs, options?.signal);\n\n\t\t\t\t\tif (!requestUrl) {\n\t\t\t\t\t\tthrow new Error(\"Missing request URL\");\n\t\t\t\t\t}\n\n\t\t\t\t\tcurrentResponse = await fetch(requestUrl, {\n\t\t\t\t\t\tmethod: \"POST\",\n\t\t\t\t\t\theaders: requestHeaders,\n\t\t\t\t\t\tbody: requestBodyJson,\n\t\t\t\t\t\tsignal: options?.signal,\n\t\t\t\t\t});\n\t\t\t\t\tawait options?.onResponse?.(\n\t\t\t\t\t\t{ status: currentResponse.status, headers: headersToRecord(currentResponse.headers) },\n\t\t\t\t\t\tmodel,\n\t\t\t\t\t);\n\n\t\t\t\t\tif (!currentResponse.ok) {\n\t\t\t\t\t\tconst retryErrorText = await currentResponse.text();\n\t\t\t\t\t\tthrow new Error(`Cloud Code Assist API error (${currentResponse.status}): ${retryErrorText}`);\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\tconst streamed = await streamResponse(currentResponse);\n\t\t\t\tif (streamed) {\n\t\t\t\t\treceivedContent = true;\n\t\t\t\t\tbreak;\n\t\t\t\t}\n\n\t\t\t\tif (emptyAttempt < MAX_EMPTY_STREAM_RETRIES) {\n\t\t\t\t\tresetOutput();\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif (!receivedContent) {\n\t\t\t\tthrow new Error(\"Cloud Code Assist API returned an empty response\");\n\t\t\t}\n\n\t\t\tif (options?.signal?.aborted) {\n\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t}\n\n\t\t\tif (output.stopReason === \"aborted\" || output.stopReason === \"error\") {\n\t\t\t\tthrow new Error(\"An unknown error occurred\");\n\t\t\t}\n\n\t\t\tstream.push({ type: \"done\", reason: output.stopReason, message: output });\n\t\t\tstream.end();\n\t\t} catch (error) {\n\t\t\tfor (const block of output.content) {\n\t\t\t\tif (\"index\" in block) {\n\t\t\t\t\tdelete (block as { index?: number }).index;\n\t\t\t\t}\n\t\t\t}\n\t\t\toutput.stopReason = options?.signal?.aborted ? \"aborted\" : \"error\";\n\t\t\toutput.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);\n\t\t\tstream.push({ type: \"error\", reason: output.stopReason, error: output });\n\t\t\tstream.end();\n\t\t}\n\t})();\n\n\treturn stream;\n};\n\nexport const streamSimpleGoogleGeminiCli: StreamFunction<\"google-gemini-cli\", SimpleStreamOptions> = (\n\tmodel: Model<\"google-gemini-cli\">,\n\tcontext: Context,\n\toptions?: SimpleStreamOptions,\n): AssistantMessageEventStream => {\n\tconst apiKey = options?.apiKey;\n\tif (!apiKey) {\n\t\tthrow new Error(\"Google Cloud Code Assist requires OAuth authentication. Use /login to authenticate.\");\n\t}\n\n\tconst base = buildBaseOptions(model, options, apiKey);\n\tif (!options?.reasoning) {\n\t\treturn streamGoogleGeminiCli(model, context, {\n\t\t\t...base,\n\t\t\tthinking: { enabled: false },\n\t\t} satisfies GoogleGeminiCliOptions);\n\t}\n\n\tconst effort = clampReasoning(options.reasoning)!;\n\tif (isGemini3Model(model.id)) {\n\t\treturn streamGoogleGeminiCli(model, context, {\n\t\t\t...base,\n\t\t\tthinking: {\n\t\t\t\tenabled: true,\n\t\t\t\tlevel: getGeminiCliThinkingLevel(effort, model.id),\n\t\t\t},\n\t\t} satisfies GoogleGeminiCliOptions);\n\t}\n\n\tconst defaultBudgets: ThinkingBudgets = {\n\t\tminimal: 1024,\n\t\tlow: 2048,\n\t\tmedium: 8192,\n\t\thigh: 16384,\n\t};\n\tconst budgets = { ...defaultBudgets, ...options.thinkingBudgets };\n\n\tconst minOutputTokens = 1024;\n\tlet thinkingBudget = budgets[effort]!;\n\tconst maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);\n\n\tif (maxTokens <= thinkingBudget) {\n\t\tthinkingBudget = Math.max(0, maxTokens - minOutputTokens);\n\t}\n\n\treturn streamGoogleGeminiCli(model, context, {\n\t\t...base,\n\t\tmaxTokens,\n\t\tthinking: {\n\t\t\tenabled: true,\n\t\t\tbudgetTokens: thinkingBudget,\n\t\t},\n\t} satisfies GoogleGeminiCliOptions);\n};\n\nexport function buildRequest(\n\tmodel: Model<\"google-gemini-cli\">,\n\tcontext: Context,\n\tprojectId: string,\n\toptions: GoogleGeminiCliOptions = {},\n\tisAntigravity = false,\n): CloudCodeAssistRequest {\n\tconst contents = convertMessages(model, context);\n\n\tconst generationConfig: CloudCodeAssistRequest[\"request\"][\"generationConfig\"] = {};\n\tif (options.temperature !== undefined) {\n\t\tgenerationConfig.temperature = options.temperature;\n\t}\n\tif (options.maxTokens !== undefined) {\n\t\tgenerationConfig.maxOutputTokens = options.maxTokens;\n\t}\n\n\t// Thinking config\n\tif (options.thinking?.enabled && model.reasoning) {\n\t\tgenerationConfig.thinkingConfig = {\n\t\t\tincludeThoughts: true,\n\t\t};\n\t\t// Gemini 3 models use thinkingLevel, older models use thinkingBudget\n\t\tif (options.thinking.level !== undefined) {\n\t\t\t// Cast to any since our GoogleThinkingLevel mirrors Google's ThinkingLevel enum values\n\t\t\tgenerationConfig.thinkingConfig.thinkingLevel = options.thinking.level as any;\n\t\t} else if (options.thinking.budgetTokens !== undefined) {\n\t\t\tgenerationConfig.thinkingConfig.thinkingBudget = options.thinking.budgetTokens;\n\t\t}\n\t} else if (model.reasoning && options.thinking && !options.thinking.enabled) {\n\t\tgenerationConfig.thinkingConfig = getDisabledThinkingConfig(model.id);\n\t}\n\n\tconst request: CloudCodeAssistRequest[\"request\"] = {\n\t\tcontents,\n\t};\n\n\trequest.sessionId = options.sessionId;\n\n\t// System instruction must be object with parts, not plain string\n\tif (context.systemPrompt) {\n\t\trequest.systemInstruction = {\n\t\t\tparts: [{ text: sanitizeSurrogates(context.systemPrompt) }],\n\t\t};\n\t}\n\n\tif (Object.keys(generationConfig).length > 0) {\n\t\trequest.generationConfig = generationConfig;\n\t}\n\n\tif (context.tools && context.tools.length > 0) {\n\t\t// Claude models on Cloud Code Assist need the legacy `parameters` field;\n\t\t// the API translates it into Anthropic's `input_schema`.\n\t\tconst useParameters = model.id.startsWith(\"claude-\");\n\t\trequest.tools = convertTools(context.tools, useParameters);\n\t\tif (options.toolChoice) {\n\t\t\trequest.toolConfig = {\n\t\t\t\tfunctionCallingConfig: {\n\t\t\t\t\tmode: mapToolChoice(options.toolChoice),\n\t\t\t\t},\n\t\t\t};\n\t\t}\n\t}\n\n\tif (isAntigravity) {\n\t\tconst existingParts = request.systemInstruction?.parts ?? [];\n\t\trequest.systemInstruction = {\n\t\t\trole: \"user\",\n\t\t\tparts: [\n\t\t\t\t{ text: ANTIGRAVITY_SYSTEM_INSTRUCTION },\n\t\t\t\t{ text: `Please ignore following [ignore]${ANTIGRAVITY_SYSTEM_INSTRUCTION}[/ignore]` },\n\t\t\t\t...existingParts,\n\t\t\t],\n\t\t};\n\t}\n\n\treturn {\n\t\tproject: projectId,\n\t\tmodel: model.id,\n\t\trequest,\n\t\t...(isAntigravity ? { requestType: \"agent\" } : {}),\n\t\tuserAgent: isAntigravity ? \"antigravity\" : \"pi-coding-agent\",\n\t\trequestId: `${isAntigravity ? \"agent\" : \"pi\"}-${Date.now()}-${Math.random().toString(36).slice(2, 11)}`,\n\t};\n}\n\ntype ClampedThinkingLevel = Exclude<ThinkingLevel, \"xhigh\">;\n\nfunction getDisabledThinkingConfig(modelId: string): ThinkingConfig {\n\t// Google docs: Gemini 3.1 Pro cannot disable thinking, and Gemini 3 Flash / Flash-Lite\n\t// do not support full thinking-off either. For Gemini 3 models, use the lowest supported\n\t// thinkingLevel without includeThoughts so hidden thinking remains invisible to pi.\n\tif (isGemini3ProModel(modelId)) {\n\t\treturn { thinkingLevel: \"LOW\" as any };\n\t}\n\tif (isGemini3FlashModel(modelId)) {\n\t\treturn { thinkingLevel: \"MINIMAL\" as any };\n\t}\n\n\t// Gemini 2.x supports disabling via thinkingBudget = 0.\n\treturn { thinkingBudget: 0 };\n}\n\nfunction getGeminiCliThinkingLevel(effort: ClampedThinkingLevel, modelId: string): GoogleThinkingLevel {\n\tif (isGemini3ProModel(modelId)) {\n\t\tswitch (effort) {\n\t\t\tcase \"minimal\":\n\t\t\tcase \"low\":\n\t\t\t\treturn \"LOW\";\n\t\t\tcase \"medium\":\n\t\t\tcase \"high\":\n\t\t\t\treturn \"HIGH\";\n\t\t}\n\t}\n\tswitch (effort) {\n\t\tcase \"minimal\":\n\t\t\treturn \"MINIMAL\";\n\t\tcase \"low\":\n\t\t\treturn \"LOW\";\n\t\tcase \"medium\":\n\t\t\treturn \"MEDIUM\";\n\t\tcase \"high\":\n\t\t\treturn \"HIGH\";\n\t}\n}\n"]}
|