npm - @probeo/anymodel - Versions diffs - 0.4.0 → 0.5.1 - Mend

@probeo/anymodel 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md CHANGED Viewed

@@ -85,6 +85,18 @@ perplexity/sonar-pro
 ollama/llama3.3
 ```
+### Flex Pricing (OpenAI)
+Get 50% off OpenAI requests with flexible latency:
+```typescript
+const response = await client.chat.completions.create({
+  model: "openai/gpt-4o",
+  messages: [{ role: "user", content: "Hello!" }],
+  service_tier: "flex",
+});
+```
 ## Fallback Routing
 Try multiple models in order. If one fails, the next is attempted:
@@ -148,7 +160,7 @@ const response = await client.chat.completions.create({
 ## Batch Processing
-Process many requests with native provider batch APIs or concurrent fallback. OpenAI and Anthropic batches are processed server-side — OpenAI at 50% cost, Anthropic with async processing for up to 10K requests. Other providers fall back to concurrent execution automatically.
+Process many requests with native provider batch APIs or concurrent fallback. OpenAI, Anthropic, and Google batches are processed server-side — OpenAI at 50% cost, Anthropic with async processing for up to 10K requests, Google at 50% cost via `batchGenerateContent`. Other providers fall back to concurrent execution automatically.
 ### Submit and wait
@@ -169,7 +181,7 @@ for (const result of results.results) {
 ### Submit now, check later
-Submit a batch and get back an ID immediately — no need to keep the process running for native batches (OpenAI, Anthropic):
+Submit a batch and get back an ID immediately — no need to keep the process running for native batches (OpenAI, Anthropic, Google):
 ```typescript
 // Submit and get the batch ID
@@ -232,6 +244,10 @@ const results = await client.batches.createAndPoll(request, {
 Batches are persisted to `./.anymodel/batches/` in the current working directory and survive process restarts.
+### Automatic max_tokens
+When `max_tokens` isn't set on a batch request, anymodel automatically calculates a safe value per-request based on the estimated input size and the model's context window. This prevents truncated responses and context overflow errors without requiring you to hand-tune each request in a large batch. The estimation uses a ~4 chars/token heuristic with a 5% safety margin — conservative enough to avoid overflows, lightweight enough to skip tokenizer dependencies.
 ## Models Endpoint
 ```typescript
@@ -265,6 +281,7 @@ const client = new AnyModel({
     temperature: 0.7,
     max_tokens: 4096,
     retries: 2,
+    timeout: 120, // HTTP timeout in seconds (default: 120 = 2 min, flex: 600 = 10 min)
   },
 });
@@ -426,6 +443,8 @@ npx tsx examples/basic.ts batch
 - **Retries**: Automatic retry with exponential backoff on 429/502/503 errors (configurable via `defaults.retries`)
 - **Rate limit tracking**: Per-provider rate limit state, automatically skips rate-limited providers during fallback routing
 - **Parameter stripping**: Unsupported parameters are automatically removed before forwarding to providers
+- **Smart batch defaults**: Automatic `max_tokens` estimation per-request in batches — calculates safe values from input size and model context limits, preventing truncation and overflow without manual tuning
+- **Memory-efficient batching**: Concurrent batch requests are streamed from disk — only N requests (default 5) are in-flight at a time, making 10K+ request batches safe without memory spikes
 - **High-volume IO**: All batch file operations use concurrency-limited async queues with atomic durable writes (temp file + fsync + rename) to prevent corruption on crash. Defaults: 20 concurrent reads, 10 concurrent writes — configurable via `io.readConcurrency` and `io.writeConcurrency`
 ## Roadmap
@@ -433,7 +452,7 @@ npx tsx examples/basic.ts batch
 - [ ] **A/B testing** — split routing (% traffic to each model) and compare mode (same request to multiple models, return all responses with stats)
 - [ ] **Cost tracking** — per-request and aggregate cost calculation from provider pricing
 - [ ] **Caching** — response caching with configurable TTL for identical requests
-- [x] **Native batch APIs** — OpenAI Batch API (JSONL upload, 50% cost) and Anthropic Message Batches (10K requests, async). Auto-detects provider and routes to native API, falls back to concurrent for other providers
+- [x] **Native batch APIs** — OpenAI Batch API (JSONL upload, 50% cost), Anthropic Message Batches (10K requests, async), and Google Gemini Batch (50% cost). Auto-detects provider and routes to native API, falls back to concurrent for other providers
 - [ ] **Result export** — `saveResults()` to write batch results to a configurable output directory
 - [ ] **Prompt logging** — optional request/response logging for debugging and evaluation

package/dist/cli.cjs CHANGED Viewed

@@ -508,6 +508,25 @@ var Router = class {
   }
 };
+// src/utils/fetch-with-timeout.ts
+var _defaultTimeout = 12e4;
+var _flexTimeout = 6e5;
+function setDefaultTimeout(ms) {
+  _defaultTimeout = ms;
+}
+function getFlexTimeout() {
+  return _flexTimeout;
+}
+function fetchWithTimeout(url, init, timeoutMs) {
+  const ms = timeoutMs ?? _defaultTimeout;
+  const signal = AbortSignal.timeout(ms);
+  if (init?.signal) {
+    const combined = AbortSignal.any([signal, init.signal]);
+    return fetch(url, { ...init, signal: combined });
+  }
+  return fetch(url, { ...init, signal });
+}
 // src/providers/openai.ts
 var OPENAI_API_BASE = "https://api.openai.com/v1";
 var SUPPORTED_PARAMS = /* @__PURE__ */ new Set([
@@ -525,19 +544,20 @@ var SUPPORTED_PARAMS = /* @__PURE__ */ new Set([
   "tools",
   "tool_choice",
   "user",
-  "logit_bias"
+  "logit_bias",
+  "service_tier"
 ]);
 function createOpenAIAdapter(apiKey, baseURL) {
   const base = baseURL || OPENAI_API_BASE;
-  async function makeRequest(path2, body, method = "POST") {
-    const res = await fetch(`${base}${path2}`, {
+  async function makeRequest(path2, body, method = "POST", timeoutMs) {
+    const res = await fetchWithTimeout(`${base}${path2}`, {
       method,
       headers: {
         "Content-Type": "application/json",
         "Authorization": `Bearer ${apiKey}`
       },
       body: body ? JSON.stringify(body) : void 0
-    });
+    }, timeoutMs);
     if (!res.ok) {
       let errorBody;
       try {
@@ -585,6 +605,7 @@ function createOpenAIAdapter(apiKey, baseURL) {
     if (request.tools !== void 0) body.tools = request.tools;
     if (request.tool_choice !== void 0) body.tool_choice = request.tool_choice;
     if (request.user !== void 0) body.user = request.user;
+    if (request.service_tier !== void 0) body.service_tier = request.service_tier;
     return body;
   }
   const adapter = {
@@ -686,13 +707,15 @@ function createOpenAIAdapter(apiKey, baseURL) {
     },
     async sendRequest(request) {
       const body = buildRequestBody(request);
-      const res = await makeRequest("/chat/completions", body);
+      const timeout = request.service_tier === "flex" ? getFlexTimeout() : void 0;
+      const res = await makeRequest("/chat/completions", body, "POST", timeout);
       const json = await res.json();
       return adapter.translateResponse(json);
     },
     async sendStreamingRequest(request) {
       const body = buildRequestBody({ ...request, stream: true });
-      const res = await makeRequest("/chat/completions", body);
+      const timeout = request.service_tier === "flex" ? getFlexTimeout() : void 0;
+      const res = await makeRequest("/chat/completions", body, "POST", timeout);
       if (!res.body) {
         throw new AnyModelError(502, "No response body for streaming request", {
           provider_name: "openai"
@@ -739,7 +762,7 @@ var FALLBACK_MODELS = [
 ];
 function createAnthropicAdapter(apiKey) {
   async function makeRequest(path2, body, stream = false) {
-    const res = await fetch(`${ANTHROPIC_API_BASE}${path2}`, {
+    const res = await fetchWithTimeout(`${ANTHROPIC_API_BASE}${path2}`, {
       method: "POST",
       headers: {
         "Content-Type": "application/json",
@@ -996,7 +1019,7 @@ ${body.system}` : jsonInstruction;
     },
     async listModels() {
       try {
-        const res = await fetch(`${ANTHROPIC_API_BASE}/models`, {
+        const res = await fetchWithTimeout(`${ANTHROPIC_API_BASE}/models`, {
           method: "GET",
           headers: {
             "x-api-key": apiKey,
@@ -1281,7 +1304,7 @@ function createGoogleAdapter(apiKey) {
     },
     async listModels() {
       try {
-        const res = await fetch(`${GEMINI_API_BASE}/models?key=${apiKey}`);
+        const res = await fetchWithTimeout(`${GEMINI_API_BASE}/models?key=${apiKey}`);
         if (!res.ok) return FALLBACK_MODELS2;
         const data = await res.json();
         const models = data.models || [];
@@ -1316,12 +1339,12 @@ function createGoogleAdapter(apiKey) {
       return SUPPORTED_PARAMS3.has(param);
     },
     supportsBatch() {
-      return false;
+      return true;
     },
     async sendRequest(request) {
       const body = translateRequest(request);
       const url = getModelEndpoint(request.model, false);
-      const res = await fetch(url, {
+      const res = await fetchWithTimeout(url, {
         method: "POST",
         headers: { "Content-Type": "application/json" },
         body: JSON.stringify(body)
@@ -1344,7 +1367,7 @@ function createGoogleAdapter(apiKey) {
     async sendStreamingRequest(request) {
       const body = translateRequest(request);
       const url = getModelEndpoint(request.model, true);
-      const res = await fetch(url, {
+      const res = await fetchWithTimeout(url, {
         method: "POST",
         headers: { "Content-Type": "application/json" },
         body: JSON.stringify(body)
@@ -1394,7 +1417,7 @@ var MODELS = [
 ];
 function createPerplexityAdapter(apiKey) {
   async function makeRequest(path2, body, method = "POST") {
-    const res = await fetch(`${PERPLEXITY_API_BASE}${path2}`, {
+    const res = await fetchWithTimeout(`${PERPLEXITY_API_BASE}${path2}`, {
       method,
       headers: {
         "Content-Type": "application/json",
@@ -1949,6 +1972,17 @@ var BatchStore = class {
     const entries = await readDirQueued(this.dir);
     return entries.filter((d) => d.isDirectory()).map((d) => d.name).sort();
   }
+  /**
+   * Stream requests from JSONL one line at a time (memory-efficient).
+   */
+  async *streamRequests(id) {
+    const p = joinPath(this.batchDir(id), "requests.jsonl");
+    if (!await fileExistsQueued(p)) return;
+    const raw = await readFileQueued(p, "utf8");
+    for (const line of raw.split("\n")) {
+      if (line.trim()) yield JSON.parse(line);
+    }
+  }
   /**
    * Check if a batch exists.
    */
@@ -2013,7 +2047,7 @@ var BatchManager = class {
       this.processNativeBatch(id, request, native.adapter).catch(() => {
       });
     } else {
-      this.processConcurrentBatch(id, request).catch(() => {
+      this.processConcurrentBatch(id, request.model, request.options).catch(() => {
       });
     }
     return batch;
@@ -2193,28 +2227,28 @@ var BatchManager = class {
   }
   /**
    * Process batch requests concurrently (fallback path).
+   * Streams requests from disk to avoid holding them all in memory.
    */
-  async processConcurrentBatch(batchId, request) {
+  async processConcurrentBatch(batchId, model, options) {
     const batch = await this.store.getMeta(batchId);
     if (!batch) return;
     batch.status = "processing";
     await this.store.updateMeta(batch);
-    const items = request.requests;
     const active = /* @__PURE__ */ new Set();
     const processItem = async (item) => {
       const current = await this.store.getMeta(batchId);
       if (current?.status === "cancelled") return;
       const chatRequest = {
-        model: request.model,
+        model,
         messages: item.messages,
-        max_tokens: item.max_tokens ?? request.options?.max_tokens,
-        temperature: item.temperature ?? request.options?.temperature,
-        top_p: item.top_p ?? request.options?.top_p,
-        top_k: item.top_k ?? request.options?.top_k,
-        stop: item.stop ?? request.options?.stop,
-        response_format: item.response_format ?? request.options?.response_format,
-        tools: item.tools ?? request.options?.tools,
-        tool_choice: item.tool_choice ?? request.options?.tool_choice
+        max_tokens: item.max_tokens ?? options?.max_tokens,
+        temperature: item.temperature ?? options?.temperature,
+        top_p: item.top_p ?? options?.top_p,
+        top_k: item.top_k ?? options?.top_k,
+        stop: item.stop ?? options?.stop,
+        response_format: item.response_format ?? options?.response_format,
+        tools: item.tools ?? options?.tools,
+        tool_choice: item.tool_choice ?? options?.tool_choice
       };
       let result;
       try {
@@ -2245,7 +2279,7 @@ var BatchManager = class {
         await this.store.updateMeta(meta);
       }
     };
-    for (const item of items) {
+    for await (const item of this.store.streamRequests(batchId)) {
       const current = await this.store.getMeta(batchId);
       if (current?.status === "cancelled") break;
       if (active.size >= this.concurrencyLimit) {
@@ -2266,6 +2300,51 @@ var BatchManager = class {
   }
 };
+// src/utils/token-estimate.ts
+var CHARS_PER_TOKEN2 = 4;
+var MODEL_LIMITS = [
+  // OpenAI
+  { pattern: "gpt-4o-mini", limit: { contextLength: 128e3, maxCompletionTokens: 16384 } },
+  { pattern: "gpt-4o", limit: { contextLength: 128e3, maxCompletionTokens: 16384 } },
+  { pattern: "gpt-4-turbo", limit: { contextLength: 128e3, maxCompletionTokens: 4096 } },
+  { pattern: "gpt-3.5-turbo", limit: { contextLength: 16385, maxCompletionTokens: 4096 } },
+  { pattern: "o1", limit: { contextLength: 2e5, maxCompletionTokens: 1e5 } },
+  { pattern: "o3", limit: { contextLength: 2e5, maxCompletionTokens: 1e5 } },
+  { pattern: "o4-mini", limit: { contextLength: 2e5, maxCompletionTokens: 1e5 } },
+  // Anthropic
+  { pattern: "claude-opus-4", limit: { contextLength: 2e5, maxCompletionTokens: 32768 } },
+  { pattern: "claude-sonnet-4", limit: { contextLength: 2e5, maxCompletionTokens: 16384 } },
+  { pattern: "claude-haiku-4", limit: { contextLength: 2e5, maxCompletionTokens: 8192 } },
+  { pattern: "claude-3.5-sonnet", limit: { contextLength: 2e5, maxCompletionTokens: 8192 } },
+  { pattern: "claude-3-opus", limit: { contextLength: 2e5, maxCompletionTokens: 4096 } },
+  // Google
+  { pattern: "gemini-2.5-pro", limit: { contextLength: 1048576, maxCompletionTokens: 65536 } },
+  { pattern: "gemini-2.5-flash", limit: { contextLength: 1048576, maxCompletionTokens: 65536 } },
+  { pattern: "gemini-2.0-flash", limit: { contextLength: 1048576, maxCompletionTokens: 65536 } },
+  { pattern: "gemini-1.5-pro", limit: { contextLength: 2097152, maxCompletionTokens: 8192 } },
+  { pattern: "gemini-1.5-flash", limit: { contextLength: 1048576, maxCompletionTokens: 8192 } }
+];
+var DEFAULT_LIMIT = { contextLength: 128e3, maxCompletionTokens: 4096 };
+function getModelLimits(model) {
+  const bare = model.includes("/") ? model.slice(model.indexOf("/") + 1) : model;
+  for (const entry of MODEL_LIMITS) {
+    if (bare.startsWith(entry.pattern) || bare.includes(entry.pattern)) {
+      return entry.limit;
+    }
+  }
+  return DEFAULT_LIMIT;
+}
+function resolveMaxTokens(model, messages, userMaxTokens) {
+  if (userMaxTokens !== void 0) return userMaxTokens;
+  const inputChars = JSON.stringify(messages).length;
+  const estimatedInput = Math.ceil(inputChars / CHARS_PER_TOKEN2);
+  const estimatedWithMargin = Math.ceil(estimatedInput * 1.05);
+  const limits = getModelLimits(model);
+  const available = limits.contextLength - estimatedWithMargin;
+  const result = Math.min(limits.maxCompletionTokens, available);
+  return Math.max(1, result);
+}
 // src/providers/openai-batch.ts
 var OPENAI_API_BASE2 = "https://api.openai.com/v1";
 function createOpenAIBatchAdapter(apiKey) {
@@ -2280,7 +2359,7 @@ function createOpenAIBatchAdapter(apiKey) {
       headers["Content-Type"] = "application/json";
       fetchBody = JSON.stringify(options.body);
     }
-    const res = await fetch(`${OPENAI_API_BASE2}${path2}`, {
+    const res = await fetchWithTimeout(`${OPENAI_API_BASE2}${path2}`, {
       method: options.method || "GET",
       headers,
       body: fetchBody
@@ -2306,7 +2385,7 @@ function createOpenAIBatchAdapter(apiKey) {
         model,
         messages: req.messages
       };
-      if (req.max_tokens !== void 0) body.max_tokens = req.max_tokens;
+      body.max_tokens = req.max_tokens !== void 0 ? req.max_tokens : resolveMaxTokens(model, req.messages);
       if (req.temperature !== void 0) body.temperature = req.temperature;
       if (req.top_p !== void 0) body.top_p = req.top_p;
       if (req.stop !== void 0) body.stop = req.stop;
@@ -2465,7 +2544,7 @@ function createAnthropicBatchAdapter(apiKey) {
       "anthropic-version": ANTHROPIC_VERSION2,
       "Content-Type": "application/json"
     };
-    const res = await fetch(`${ANTHROPIC_API_BASE2}${path2}`, {
+    const res = await fetchWithTimeout(`${ANTHROPIC_API_BASE2}${path2}`, {
       method: options.method || "GET",
       headers,
       body: options.body ? JSON.stringify(options.body) : void 0
@@ -2488,7 +2567,7 @@ function createAnthropicBatchAdapter(apiKey) {
   function translateToAnthropicParams(model, req) {
     const params = {
       model,
-      max_tokens: req.max_tokens || DEFAULT_MAX_TOKENS2
+      max_tokens: resolveMaxTokens(model, req.messages, req.max_tokens || DEFAULT_MAX_TOKENS2)
     };
     const systemMessages = req.messages.filter((m) => m.role === "system");
     const nonSystemMessages = req.messages.filter((m) => m.role !== "system");
@@ -2662,6 +2741,284 @@ ${params.system}` : jsonInstruction;
   };
 }
+// src/providers/google-batch.ts
+var GEMINI_API_BASE2 = "https://generativelanguage.googleapis.com/v1beta";
+function createGoogleBatchAdapter(apiKey) {
+  async function apiRequest(path2, options = {}) {
+    const headers = {
+      "Content-Type": "application/json",
+      "x-goog-api-key": apiKey
+    };
+    const res = await fetchWithTimeout(`${GEMINI_API_BASE2}${path2}`, {
+      method: options.method || "GET",
+      headers,
+      body: options.body ? JSON.stringify(options.body) : void 0
+    });
+    if (!res.ok) {
+      let errorBody;
+      try {
+        errorBody = await res.json();
+      } catch {
+        errorBody = { message: res.statusText };
+      }
+      const msg = errorBody?.error?.message || errorBody?.message || res.statusText;
+      throw new AnyModelError(res.status >= 500 ? 502 : res.status, msg, {
+        provider_name: "google",
+        raw: errorBody
+      });
+    }
+    return res;
+  }
+  function translateRequestToGemini(model, req) {
+    const body = {};
+    const systemMessages = req.messages.filter((m) => m.role === "system");
+    const nonSystemMessages = req.messages.filter((m) => m.role !== "system");
+    if (systemMessages.length > 0) {
+      body.systemInstruction = {
+        parts: [{ text: systemMessages.map((m) => typeof m.content === "string" ? m.content : "").join("\n") }]
+      };
+    }
+    body.contents = nonSystemMessages.map((m) => ({
+      role: m.role === "assistant" ? "model" : "user",
+      parts: typeof m.content === "string" ? [{ text: m.content }] : Array.isArray(m.content) ? m.content.map((p) => p.type === "text" ? { text: p.text } : { text: "" }) : [{ text: "" }]
+    }));
+    const generationConfig = {};
+    if (req.temperature !== void 0) generationConfig.temperature = req.temperature;
+    generationConfig.maxOutputTokens = req.max_tokens !== void 0 ? req.max_tokens : resolveMaxTokens(model, req.messages);
+    if (req.top_p !== void 0) generationConfig.topP = req.top_p;
+    if (req.top_k !== void 0) generationConfig.topK = req.top_k;
+    if (req.stop !== void 0) {
+      generationConfig.stopSequences = Array.isArray(req.stop) ? req.stop : [req.stop];
+    }
+    if (req.response_format) {
+      if (req.response_format.type === "json_object") {
+        generationConfig.responseMimeType = "application/json";
+      } else if (req.response_format.type === "json_schema") {
+        generationConfig.responseMimeType = "application/json";
+        generationConfig.responseSchema = req.response_format.json_schema?.schema;
+      }
+    }
+    if (Object.keys(generationConfig).length > 0) {
+      body.generationConfig = generationConfig;
+    }
+    if (req.tools && req.tools.length > 0) {
+      body.tools = [{
+        functionDeclarations: req.tools.map((t) => ({
+          name: t.function.name,
+          description: t.function.description || "",
+          parameters: t.function.parameters || {}
+        }))
+      }];
+      if (req.tool_choice) {
+        if (req.tool_choice === "auto") {
+          body.toolConfig = { functionCallingConfig: { mode: "AUTO" } };
+        } else if (req.tool_choice === "required") {
+          body.toolConfig = { functionCallingConfig: { mode: "ANY" } };
+        } else if (req.tool_choice === "none") {
+          body.toolConfig = { functionCallingConfig: { mode: "NONE" } };
+        } else if (typeof req.tool_choice === "object") {
+          body.toolConfig = {
+            functionCallingConfig: {
+              mode: "ANY",
+              allowedFunctionNames: [req.tool_choice.function.name]
+            }
+          };
+        }
+      }
+    }
+    return body;
+  }
+  function mapFinishReason(reason) {
+    switch (reason) {
+      case "STOP":
+        return "stop";
+      case "MAX_TOKENS":
+        return "length";
+      case "SAFETY":
+        return "content_filter";
+      case "RECITATION":
+        return "content_filter";
+      default:
+        return "stop";
+    }
+  }
+  function translateGeminiResponse(response, model) {
+    const candidate = response.candidates?.[0];
+    let content = "";
+    const toolCalls = [];
+    for (const part of candidate?.content?.parts || []) {
+      if (part.text) {
+        content += part.text;
+      } else if (part.functionCall) {
+        toolCalls.push({
+          id: generateId("call"),
+          type: "function",
+          function: {
+            name: part.functionCall.name,
+            arguments: JSON.stringify(part.functionCall.args || {})
+          }
+        });
+      }
+    }
+    const message = { role: "assistant", content };
+    if (toolCalls.length > 0) {
+      message.tool_calls = toolCalls;
+    }
+    const finishReason = toolCalls.length > 0 ? "tool_calls" : mapFinishReason(candidate?.finishReason || "STOP");
+    return {
+      id: generateId(),
+      object: "chat.completion",
+      created: Math.floor(Date.now() / 1e3),
+      model: `google/${model}`,
+      choices: [{ index: 0, message, finish_reason: finishReason }],
+      usage: {
+        prompt_tokens: response.usageMetadata?.promptTokenCount || 0,
+        completion_tokens: response.usageMetadata?.candidatesTokenCount || 0,
+        total_tokens: response.usageMetadata?.totalTokenCount || 0
+      }
+    };
+  }
+  function mapBatchState(state) {
+    switch (state) {
+      case "JOB_STATE_PENDING":
+        return "pending";
+      case "JOB_STATE_RUNNING":
+        return "processing";
+      case "JOB_STATE_SUCCEEDED":
+        return "completed";
+      case "JOB_STATE_FAILED":
+        return "failed";
+      case "JOB_STATE_CANCELLED":
+        return "cancelled";
+      case "JOB_STATE_EXPIRED":
+        return "failed";
+      default:
+        return "pending";
+    }
+  }
+  return {
+    async createBatch(model, requests, _options) {
+      const batchRequests = requests.map((req) => ({
+        request: translateRequestToGemini(model, req),
+        metadata: { key: req.custom_id }
+      }));
+      const res = await apiRequest(`/models/${model}:batchGenerateContent`, {
+        method: "POST",
+        body: {
+          batch: {
+            display_name: `anymodel-batch-${Date.now()}`,
+            input_config: {
+              requests: {
+                requests: batchRequests
+              }
+            }
+          }
+        }
+      });
+      const data = await res.json();
+      const batchName = data.name || data.batch?.name;
+      if (!batchName) {
+        throw new AnyModelError(502, "No batch name in Google response", {
+          provider_name: "google",
+          raw: data
+        });
+      }
+      return {
+        providerBatchId: batchName,
+        metadata: {
+          model,
+          total_requests: requests.length
+        }
+      };
+    },
+    async pollBatch(providerBatchId) {
+      const res = await apiRequest(`/${providerBatchId}`);
+      const data = await res.json();
+      const state = data.state || "JOB_STATE_PENDING";
+      const status = mapBatchState(state);
+      const totalCount = data.totalCount || data.metadata?.total_requests || 0;
+      const successCount = data.succeededCount || 0;
+      const failedCount = data.failedCount || 0;
+      return {
+        status,
+        total: totalCount || successCount + failedCount,
+        completed: successCount,
+        failed: failedCount
+      };
+    },
+    async getBatchResults(providerBatchId) {
+      const batchRes = await apiRequest(`/${providerBatchId}`);
+      const batchData = await batchRes.json();
+      const results = [];
+      const model = batchData.metadata?.model || "unknown";
+      if (batchData.response?.inlinedResponses) {
+        for (const item of batchData.response.inlinedResponses) {
+          const customId = item.metadata?.key || `request-${results.length}`;
+          if (item.response) {
+            results.push({
+              custom_id: customId,
+              status: "success",
+              response: translateGeminiResponse(item.response, model),
+              error: null
+            });
+          } else if (item.error) {
+            results.push({
+              custom_id: customId,
+              status: "error",
+              response: null,
+              error: {
+                code: item.error.code || 500,
+                message: item.error.message || "Batch item failed"
+              }
+            });
+          }
+        }
+        return results;
+      }
+      const responsesFile = batchData.response?.responsesFileName || batchData.outputConfig?.file_name;
+      if (responsesFile) {
+        const downloadUrl = `${GEMINI_API_BASE2}/${responsesFile}:download?alt=media`;
+        const fileRes = await fetchWithTimeout(downloadUrl, {
+          headers: { "x-goog-api-key": apiKey }
+        });
+        if (!fileRes.ok) {
+          throw new AnyModelError(502, "Failed to download batch results file", {
+            provider_name: "google"
+          });
+        }
+        const text = await fileRes.text();
+        for (const line of text.trim().split("\n")) {
+          if (!line) continue;
+          const item = JSON.parse(line);
+          const customId = item.key || item.metadata?.key || `request-${results.length}`;
+          if (item.response) {
+            results.push({
+              custom_id: customId,
+              status: "success",
+              response: translateGeminiResponse(item.response, model),
+              error: null
+            });
+          } else if (item.error) {
+            results.push({
+              custom_id: customId,
+              status: "error",
+              response: null,
+              error: {
+                code: item.error.code || 500,
+                message: item.error.message || "Batch item failed"
+              }
+            });
+          }
+        }
+      }
+      return results;
+    },
+    async cancelBatch(providerBatchId) {
+      await apiRequest(`/${providerBatchId}:cancel`, { method: "POST" });
+    }
+  };
+}
 // src/client.ts
 var AnyModel = class {
   registry;
@@ -2677,6 +3034,7 @@ var AnyModel = class {
   constructor(config = {}) {
     this.config = resolveConfig(config);
     this.registry = new ProviderRegistry();
+    setDefaultTimeout((this.config.defaults?.timeout ?? 120) * 1e3);
     if (this.config.io) {
       configureFsIO(this.config.io);
     }
@@ -2797,6 +3155,10 @@ var AnyModel = class {
     if (anthropicKey) {
       this.batchManager.registerBatchAdapter("anthropic", createAnthropicBatchAdapter(anthropicKey));
     }
+    const googleKey = config.google?.apiKey || process.env.GOOGLE_API_KEY;
+    if (googleKey) {
+      this.batchManager.registerBatchAdapter("google", createGoogleBatchAdapter(googleKey));
+    }
   }
   applyDefaults(request) {
     const defaults = this.config.defaults;