@probeo/anymodel 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -85,6 +85,18 @@ perplexity/sonar-pro
85
85
  ollama/llama3.3
86
86
  ```
87
87
 
88
+ ### Flex Pricing (OpenAI)
89
+
90
+ Get 50% off OpenAI requests with flexible latency:
91
+
92
+ ```typescript
93
+ const response = await client.chat.completions.create({
94
+ model: "openai/gpt-4o",
95
+ messages: [{ role: "user", content: "Hello!" }],
96
+ service_tier: "flex",
97
+ });
98
+ ```
99
+
88
100
  ## Fallback Routing
89
101
 
90
102
  Try multiple models in order. If one fails, the next is attempted:
@@ -148,7 +160,7 @@ const response = await client.chat.completions.create({
148
160
 
149
161
  ## Batch Processing
150
162
 
151
- Process many requests with native provider batch APIs or concurrent fallback. OpenAI and Anthropic batches are processed server-side — OpenAI at 50% cost, Anthropic with async processing for up to 10K requests. Other providers fall back to concurrent execution automatically.
163
+ Process many requests with native provider batch APIs or concurrent fallback. OpenAI, Anthropic, and Google batches are processed server-side — OpenAI at 50% cost, Anthropic with async processing for up to 10K requests, Google at 50% cost via `batchGenerateContent`. Other providers fall back to concurrent execution automatically.
152
164
 
153
165
  ### Submit and wait
154
166
 
@@ -169,7 +181,7 @@ for (const result of results.results) {
169
181
 
170
182
  ### Submit now, check later
171
183
 
172
- Submit a batch and get back an ID immediately — no need to keep the process running for native batches (OpenAI, Anthropic):
184
+ Submit a batch and get back an ID immediately — no need to keep the process running for native batches (OpenAI, Anthropic, Google):
173
185
 
174
186
  ```typescript
175
187
  // Submit and get the batch ID
@@ -232,6 +244,10 @@ const results = await client.batches.createAndPoll(request, {
232
244
 
233
245
  Batches are persisted to `./.anymodel/batches/` in the current working directory and survive process restarts.
234
246
 
247
+ ### Automatic max_tokens
248
+
249
+ When `max_tokens` isn't set on a batch request, anymodel automatically calculates a safe value per-request based on the estimated input size and the model's context window. This prevents truncated responses and context overflow errors without requiring you to hand-tune each request in a large batch. The estimation uses a ~4 chars/token heuristic with a 5% safety margin — conservative enough to avoid overflows, lightweight enough to skip tokenizer dependencies.
250
+
235
251
  ## Models Endpoint
236
252
 
237
253
  ```typescript
@@ -265,6 +281,7 @@ const client = new AnyModel({
265
281
  temperature: 0.7,
266
282
  max_tokens: 4096,
267
283
  retries: 2,
284
+ timeout: 120, // HTTP timeout in seconds (default: 120 = 2 min, flex: 600 = 10 min)
268
285
  },
269
286
  });
270
287
 
@@ -426,6 +443,7 @@ npx tsx examples/basic.ts batch
426
443
  - **Retries**: Automatic retry with exponential backoff on 429/502/503 errors (configurable via `defaults.retries`)
427
444
  - **Rate limit tracking**: Per-provider rate limit state, automatically skips rate-limited providers during fallback routing
428
445
  - **Parameter stripping**: Unsupported parameters are automatically removed before forwarding to providers
446
+ - **Smart batch defaults**: Automatic `max_tokens` estimation per-request in batches — calculates safe values from input size and model context limits, preventing truncation and overflow without manual tuning
429
447
  - **High-volume IO**: All batch file operations use concurrency-limited async queues with atomic durable writes (temp file + fsync + rename) to prevent corruption on crash. Defaults: 20 concurrent reads, 10 concurrent writes — configurable via `io.readConcurrency` and `io.writeConcurrency`
430
448
 
431
449
  ## Roadmap
@@ -433,7 +451,7 @@ npx tsx examples/basic.ts batch
433
451
  - [ ] **A/B testing** — split routing (% traffic to each model) and compare mode (same request to multiple models, return all responses with stats)
434
452
  - [ ] **Cost tracking** — per-request and aggregate cost calculation from provider pricing
435
453
  - [ ] **Caching** — response caching with configurable TTL for identical requests
436
- - [x] **Native batch APIs** — OpenAI Batch API (JSONL upload, 50% cost) and Anthropic Message Batches (10K requests, async). Auto-detects provider and routes to native API, falls back to concurrent for other providers
454
+ - [x] **Native batch APIs** — OpenAI Batch API (JSONL upload, 50% cost), Anthropic Message Batches (10K requests, async), and Google Gemini Batch (50% cost). Auto-detects provider and routes to native API, falls back to concurrent for other providers
437
455
  - [ ] **Result export** — `saveResults()` to write batch results to a configurable output directory
438
456
  - [ ] **Prompt logging** — optional request/response logging for debugging and evaluation
439
457
 
package/dist/cli.cjs CHANGED
@@ -508,6 +508,25 @@ var Router = class {
508
508
  }
509
509
  };
510
510
 
511
+ // src/utils/fetch-with-timeout.ts
512
+ var _defaultTimeout = 12e4;
513
+ var _flexTimeout = 6e5;
514
+ function setDefaultTimeout(ms) {
515
+ _defaultTimeout = ms;
516
+ }
517
+ function getFlexTimeout() {
518
+ return _flexTimeout;
519
+ }
520
+ function fetchWithTimeout(url, init, timeoutMs) {
521
+ const ms = timeoutMs ?? _defaultTimeout;
522
+ const signal = AbortSignal.timeout(ms);
523
+ if (init?.signal) {
524
+ const combined = AbortSignal.any([signal, init.signal]);
525
+ return fetch(url, { ...init, signal: combined });
526
+ }
527
+ return fetch(url, { ...init, signal });
528
+ }
529
+
511
530
  // src/providers/openai.ts
512
531
  var OPENAI_API_BASE = "https://api.openai.com/v1";
513
532
  var SUPPORTED_PARAMS = /* @__PURE__ */ new Set([
@@ -525,19 +544,20 @@ var SUPPORTED_PARAMS = /* @__PURE__ */ new Set([
525
544
  "tools",
526
545
  "tool_choice",
527
546
  "user",
528
- "logit_bias"
547
+ "logit_bias",
548
+ "service_tier"
529
549
  ]);
530
550
  function createOpenAIAdapter(apiKey, baseURL) {
531
551
  const base = baseURL || OPENAI_API_BASE;
532
- async function makeRequest(path2, body, method = "POST") {
533
- const res = await fetch(`${base}${path2}`, {
552
+ async function makeRequest(path2, body, method = "POST", timeoutMs) {
553
+ const res = await fetchWithTimeout(`${base}${path2}`, {
534
554
  method,
535
555
  headers: {
536
556
  "Content-Type": "application/json",
537
557
  "Authorization": `Bearer ${apiKey}`
538
558
  },
539
559
  body: body ? JSON.stringify(body) : void 0
540
- });
560
+ }, timeoutMs);
541
561
  if (!res.ok) {
542
562
  let errorBody;
543
563
  try {
@@ -585,6 +605,7 @@ function createOpenAIAdapter(apiKey, baseURL) {
585
605
  if (request.tools !== void 0) body.tools = request.tools;
586
606
  if (request.tool_choice !== void 0) body.tool_choice = request.tool_choice;
587
607
  if (request.user !== void 0) body.user = request.user;
608
+ if (request.service_tier !== void 0) body.service_tier = request.service_tier;
588
609
  return body;
589
610
  }
590
611
  const adapter = {
@@ -686,13 +707,15 @@ function createOpenAIAdapter(apiKey, baseURL) {
686
707
  },
687
708
  async sendRequest(request) {
688
709
  const body = buildRequestBody(request);
689
- const res = await makeRequest("/chat/completions", body);
710
+ const timeout = request.service_tier === "flex" ? getFlexTimeout() : void 0;
711
+ const res = await makeRequest("/chat/completions", body, "POST", timeout);
690
712
  const json = await res.json();
691
713
  return adapter.translateResponse(json);
692
714
  },
693
715
  async sendStreamingRequest(request) {
694
716
  const body = buildRequestBody({ ...request, stream: true });
695
- const res = await makeRequest("/chat/completions", body);
717
+ const timeout = request.service_tier === "flex" ? getFlexTimeout() : void 0;
718
+ const res = await makeRequest("/chat/completions", body, "POST", timeout);
696
719
  if (!res.body) {
697
720
  throw new AnyModelError(502, "No response body for streaming request", {
698
721
  provider_name: "openai"
@@ -739,7 +762,7 @@ var FALLBACK_MODELS = [
739
762
  ];
740
763
  function createAnthropicAdapter(apiKey) {
741
764
  async function makeRequest(path2, body, stream = false) {
742
- const res = await fetch(`${ANTHROPIC_API_BASE}${path2}`, {
765
+ const res = await fetchWithTimeout(`${ANTHROPIC_API_BASE}${path2}`, {
743
766
  method: "POST",
744
767
  headers: {
745
768
  "Content-Type": "application/json",
@@ -996,7 +1019,7 @@ ${body.system}` : jsonInstruction;
996
1019
  },
997
1020
  async listModels() {
998
1021
  try {
999
- const res = await fetch(`${ANTHROPIC_API_BASE}/models`, {
1022
+ const res = await fetchWithTimeout(`${ANTHROPIC_API_BASE}/models`, {
1000
1023
  method: "GET",
1001
1024
  headers: {
1002
1025
  "x-api-key": apiKey,
@@ -1281,7 +1304,7 @@ function createGoogleAdapter(apiKey) {
1281
1304
  },
1282
1305
  async listModels() {
1283
1306
  try {
1284
- const res = await fetch(`${GEMINI_API_BASE}/models?key=${apiKey}`);
1307
+ const res = await fetchWithTimeout(`${GEMINI_API_BASE}/models?key=${apiKey}`);
1285
1308
  if (!res.ok) return FALLBACK_MODELS2;
1286
1309
  const data = await res.json();
1287
1310
  const models = data.models || [];
@@ -1316,12 +1339,12 @@ function createGoogleAdapter(apiKey) {
1316
1339
  return SUPPORTED_PARAMS3.has(param);
1317
1340
  },
1318
1341
  supportsBatch() {
1319
- return false;
1342
+ return true;
1320
1343
  },
1321
1344
  async sendRequest(request) {
1322
1345
  const body = translateRequest(request);
1323
1346
  const url = getModelEndpoint(request.model, false);
1324
- const res = await fetch(url, {
1347
+ const res = await fetchWithTimeout(url, {
1325
1348
  method: "POST",
1326
1349
  headers: { "Content-Type": "application/json" },
1327
1350
  body: JSON.stringify(body)
@@ -1344,7 +1367,7 @@ function createGoogleAdapter(apiKey) {
1344
1367
  async sendStreamingRequest(request) {
1345
1368
  const body = translateRequest(request);
1346
1369
  const url = getModelEndpoint(request.model, true);
1347
- const res = await fetch(url, {
1370
+ const res = await fetchWithTimeout(url, {
1348
1371
  method: "POST",
1349
1372
  headers: { "Content-Type": "application/json" },
1350
1373
  body: JSON.stringify(body)
@@ -1394,7 +1417,7 @@ var MODELS = [
1394
1417
  ];
1395
1418
  function createPerplexityAdapter(apiKey) {
1396
1419
  async function makeRequest(path2, body, method = "POST") {
1397
- const res = await fetch(`${PERPLEXITY_API_BASE}${path2}`, {
1420
+ const res = await fetchWithTimeout(`${PERPLEXITY_API_BASE}${path2}`, {
1398
1421
  method,
1399
1422
  headers: {
1400
1423
  "Content-Type": "application/json",
@@ -2266,6 +2289,51 @@ var BatchManager = class {
2266
2289
  }
2267
2290
  };
2268
2291
 
2292
+ // src/utils/token-estimate.ts
2293
+ var CHARS_PER_TOKEN2 = 4;
2294
+ var MODEL_LIMITS = [
2295
+ // OpenAI
2296
+ { pattern: "gpt-4o-mini", limit: { contextLength: 128e3, maxCompletionTokens: 16384 } },
2297
+ { pattern: "gpt-4o", limit: { contextLength: 128e3, maxCompletionTokens: 16384 } },
2298
+ { pattern: "gpt-4-turbo", limit: { contextLength: 128e3, maxCompletionTokens: 4096 } },
2299
+ { pattern: "gpt-3.5-turbo", limit: { contextLength: 16385, maxCompletionTokens: 4096 } },
2300
+ { pattern: "o1", limit: { contextLength: 2e5, maxCompletionTokens: 1e5 } },
2301
+ { pattern: "o3", limit: { contextLength: 2e5, maxCompletionTokens: 1e5 } },
2302
+ { pattern: "o4-mini", limit: { contextLength: 2e5, maxCompletionTokens: 1e5 } },
2303
+ // Anthropic
2304
+ { pattern: "claude-opus-4", limit: { contextLength: 2e5, maxCompletionTokens: 32768 } },
2305
+ { pattern: "claude-sonnet-4", limit: { contextLength: 2e5, maxCompletionTokens: 16384 } },
2306
+ { pattern: "claude-haiku-4", limit: { contextLength: 2e5, maxCompletionTokens: 8192 } },
2307
+ { pattern: "claude-3.5-sonnet", limit: { contextLength: 2e5, maxCompletionTokens: 8192 } },
2308
+ { pattern: "claude-3-opus", limit: { contextLength: 2e5, maxCompletionTokens: 4096 } },
2309
+ // Google
2310
+ { pattern: "gemini-2.5-pro", limit: { contextLength: 1048576, maxCompletionTokens: 65536 } },
2311
+ { pattern: "gemini-2.5-flash", limit: { contextLength: 1048576, maxCompletionTokens: 65536 } },
2312
+ { pattern: "gemini-2.0-flash", limit: { contextLength: 1048576, maxCompletionTokens: 65536 } },
2313
+ { pattern: "gemini-1.5-pro", limit: { contextLength: 2097152, maxCompletionTokens: 8192 } },
2314
+ { pattern: "gemini-1.5-flash", limit: { contextLength: 1048576, maxCompletionTokens: 8192 } }
2315
+ ];
2316
+ var DEFAULT_LIMIT = { contextLength: 128e3, maxCompletionTokens: 4096 };
2317
+ function getModelLimits(model) {
2318
+ const bare = model.includes("/") ? model.slice(model.indexOf("/") + 1) : model;
2319
+ for (const entry of MODEL_LIMITS) {
2320
+ if (bare.startsWith(entry.pattern) || bare.includes(entry.pattern)) {
2321
+ return entry.limit;
2322
+ }
2323
+ }
2324
+ return DEFAULT_LIMIT;
2325
+ }
2326
+ function resolveMaxTokens(model, messages, userMaxTokens) {
2327
+ if (userMaxTokens !== void 0) return userMaxTokens;
2328
+ const inputChars = JSON.stringify(messages).length;
2329
+ const estimatedInput = Math.ceil(inputChars / CHARS_PER_TOKEN2);
2330
+ const estimatedWithMargin = Math.ceil(estimatedInput * 1.05);
2331
+ const limits = getModelLimits(model);
2332
+ const available = limits.contextLength - estimatedWithMargin;
2333
+ const result = Math.min(limits.maxCompletionTokens, available);
2334
+ return Math.max(1, result);
2335
+ }
2336
+
2269
2337
  // src/providers/openai-batch.ts
2270
2338
  var OPENAI_API_BASE2 = "https://api.openai.com/v1";
2271
2339
  function createOpenAIBatchAdapter(apiKey) {
@@ -2280,7 +2348,7 @@ function createOpenAIBatchAdapter(apiKey) {
2280
2348
  headers["Content-Type"] = "application/json";
2281
2349
  fetchBody = JSON.stringify(options.body);
2282
2350
  }
2283
- const res = await fetch(`${OPENAI_API_BASE2}${path2}`, {
2351
+ const res = await fetchWithTimeout(`${OPENAI_API_BASE2}${path2}`, {
2284
2352
  method: options.method || "GET",
2285
2353
  headers,
2286
2354
  body: fetchBody
@@ -2306,7 +2374,7 @@ function createOpenAIBatchAdapter(apiKey) {
2306
2374
  model,
2307
2375
  messages: req.messages
2308
2376
  };
2309
- if (req.max_tokens !== void 0) body.max_tokens = req.max_tokens;
2377
+ body.max_tokens = req.max_tokens !== void 0 ? req.max_tokens : resolveMaxTokens(model, req.messages);
2310
2378
  if (req.temperature !== void 0) body.temperature = req.temperature;
2311
2379
  if (req.top_p !== void 0) body.top_p = req.top_p;
2312
2380
  if (req.stop !== void 0) body.stop = req.stop;
@@ -2465,7 +2533,7 @@ function createAnthropicBatchAdapter(apiKey) {
2465
2533
  "anthropic-version": ANTHROPIC_VERSION2,
2466
2534
  "Content-Type": "application/json"
2467
2535
  };
2468
- const res = await fetch(`${ANTHROPIC_API_BASE2}${path2}`, {
2536
+ const res = await fetchWithTimeout(`${ANTHROPIC_API_BASE2}${path2}`, {
2469
2537
  method: options.method || "GET",
2470
2538
  headers,
2471
2539
  body: options.body ? JSON.stringify(options.body) : void 0
@@ -2488,7 +2556,7 @@ function createAnthropicBatchAdapter(apiKey) {
2488
2556
  function translateToAnthropicParams(model, req) {
2489
2557
  const params = {
2490
2558
  model,
2491
- max_tokens: req.max_tokens || DEFAULT_MAX_TOKENS2
2559
+ max_tokens: resolveMaxTokens(model, req.messages, req.max_tokens || DEFAULT_MAX_TOKENS2)
2492
2560
  };
2493
2561
  const systemMessages = req.messages.filter((m) => m.role === "system");
2494
2562
  const nonSystemMessages = req.messages.filter((m) => m.role !== "system");
@@ -2662,6 +2730,284 @@ ${params.system}` : jsonInstruction;
2662
2730
  };
2663
2731
  }
2664
2732
 
2733
+ // src/providers/google-batch.ts
2734
+ var GEMINI_API_BASE2 = "https://generativelanguage.googleapis.com/v1beta";
2735
+ function createGoogleBatchAdapter(apiKey) {
2736
+ async function apiRequest(path2, options = {}) {
2737
+ const headers = {
2738
+ "Content-Type": "application/json",
2739
+ "x-goog-api-key": apiKey
2740
+ };
2741
+ const res = await fetchWithTimeout(`${GEMINI_API_BASE2}${path2}`, {
2742
+ method: options.method || "GET",
2743
+ headers,
2744
+ body: options.body ? JSON.stringify(options.body) : void 0
2745
+ });
2746
+ if (!res.ok) {
2747
+ let errorBody;
2748
+ try {
2749
+ errorBody = await res.json();
2750
+ } catch {
2751
+ errorBody = { message: res.statusText };
2752
+ }
2753
+ const msg = errorBody?.error?.message || errorBody?.message || res.statusText;
2754
+ throw new AnyModelError(res.status >= 500 ? 502 : res.status, msg, {
2755
+ provider_name: "google",
2756
+ raw: errorBody
2757
+ });
2758
+ }
2759
+ return res;
2760
+ }
2761
+ function translateRequestToGemini(model, req) {
2762
+ const body = {};
2763
+ const systemMessages = req.messages.filter((m) => m.role === "system");
2764
+ const nonSystemMessages = req.messages.filter((m) => m.role !== "system");
2765
+ if (systemMessages.length > 0) {
2766
+ body.systemInstruction = {
2767
+ parts: [{ text: systemMessages.map((m) => typeof m.content === "string" ? m.content : "").join("\n") }]
2768
+ };
2769
+ }
2770
+ body.contents = nonSystemMessages.map((m) => ({
2771
+ role: m.role === "assistant" ? "model" : "user",
2772
+ parts: typeof m.content === "string" ? [{ text: m.content }] : Array.isArray(m.content) ? m.content.map((p) => p.type === "text" ? { text: p.text } : { text: "" }) : [{ text: "" }]
2773
+ }));
2774
+ const generationConfig = {};
2775
+ if (req.temperature !== void 0) generationConfig.temperature = req.temperature;
2776
+ generationConfig.maxOutputTokens = req.max_tokens !== void 0 ? req.max_tokens : resolveMaxTokens(model, req.messages);
2777
+ if (req.top_p !== void 0) generationConfig.topP = req.top_p;
2778
+ if (req.top_k !== void 0) generationConfig.topK = req.top_k;
2779
+ if (req.stop !== void 0) {
2780
+ generationConfig.stopSequences = Array.isArray(req.stop) ? req.stop : [req.stop];
2781
+ }
2782
+ if (req.response_format) {
2783
+ if (req.response_format.type === "json_object") {
2784
+ generationConfig.responseMimeType = "application/json";
2785
+ } else if (req.response_format.type === "json_schema") {
2786
+ generationConfig.responseMimeType = "application/json";
2787
+ generationConfig.responseSchema = req.response_format.json_schema?.schema;
2788
+ }
2789
+ }
2790
+ if (Object.keys(generationConfig).length > 0) {
2791
+ body.generationConfig = generationConfig;
2792
+ }
2793
+ if (req.tools && req.tools.length > 0) {
2794
+ body.tools = [{
2795
+ functionDeclarations: req.tools.map((t) => ({
2796
+ name: t.function.name,
2797
+ description: t.function.description || "",
2798
+ parameters: t.function.parameters || {}
2799
+ }))
2800
+ }];
2801
+ if (req.tool_choice) {
2802
+ if (req.tool_choice === "auto") {
2803
+ body.toolConfig = { functionCallingConfig: { mode: "AUTO" } };
2804
+ } else if (req.tool_choice === "required") {
2805
+ body.toolConfig = { functionCallingConfig: { mode: "ANY" } };
2806
+ } else if (req.tool_choice === "none") {
2807
+ body.toolConfig = { functionCallingConfig: { mode: "NONE" } };
2808
+ } else if (typeof req.tool_choice === "object") {
2809
+ body.toolConfig = {
2810
+ functionCallingConfig: {
2811
+ mode: "ANY",
2812
+ allowedFunctionNames: [req.tool_choice.function.name]
2813
+ }
2814
+ };
2815
+ }
2816
+ }
2817
+ }
2818
+ return body;
2819
+ }
2820
+ function mapFinishReason(reason) {
2821
+ switch (reason) {
2822
+ case "STOP":
2823
+ return "stop";
2824
+ case "MAX_TOKENS":
2825
+ return "length";
2826
+ case "SAFETY":
2827
+ return "content_filter";
2828
+ case "RECITATION":
2829
+ return "content_filter";
2830
+ default:
2831
+ return "stop";
2832
+ }
2833
+ }
2834
+ function translateGeminiResponse(response, model) {
2835
+ const candidate = response.candidates?.[0];
2836
+ let content = "";
2837
+ const toolCalls = [];
2838
+ for (const part of candidate?.content?.parts || []) {
2839
+ if (part.text) {
2840
+ content += part.text;
2841
+ } else if (part.functionCall) {
2842
+ toolCalls.push({
2843
+ id: generateId("call"),
2844
+ type: "function",
2845
+ function: {
2846
+ name: part.functionCall.name,
2847
+ arguments: JSON.stringify(part.functionCall.args || {})
2848
+ }
2849
+ });
2850
+ }
2851
+ }
2852
+ const message = { role: "assistant", content };
2853
+ if (toolCalls.length > 0) {
2854
+ message.tool_calls = toolCalls;
2855
+ }
2856
+ const finishReason = toolCalls.length > 0 ? "tool_calls" : mapFinishReason(candidate?.finishReason || "STOP");
2857
+ return {
2858
+ id: generateId(),
2859
+ object: "chat.completion",
2860
+ created: Math.floor(Date.now() / 1e3),
2861
+ model: `google/${model}`,
2862
+ choices: [{ index: 0, message, finish_reason: finishReason }],
2863
+ usage: {
2864
+ prompt_tokens: response.usageMetadata?.promptTokenCount || 0,
2865
+ completion_tokens: response.usageMetadata?.candidatesTokenCount || 0,
2866
+ total_tokens: response.usageMetadata?.totalTokenCount || 0
2867
+ }
2868
+ };
2869
+ }
2870
+ function mapBatchState(state) {
2871
+ switch (state) {
2872
+ case "JOB_STATE_PENDING":
2873
+ return "pending";
2874
+ case "JOB_STATE_RUNNING":
2875
+ return "processing";
2876
+ case "JOB_STATE_SUCCEEDED":
2877
+ return "completed";
2878
+ case "JOB_STATE_FAILED":
2879
+ return "failed";
2880
+ case "JOB_STATE_CANCELLED":
2881
+ return "cancelled";
2882
+ case "JOB_STATE_EXPIRED":
2883
+ return "failed";
2884
+ default:
2885
+ return "pending";
2886
+ }
2887
+ }
2888
+ return {
2889
+ async createBatch(model, requests, _options) {
2890
+ const batchRequests = requests.map((req) => ({
2891
+ request: translateRequestToGemini(model, req),
2892
+ metadata: { key: req.custom_id }
2893
+ }));
2894
+ const res = await apiRequest(`/models/${model}:batchGenerateContent`, {
2895
+ method: "POST",
2896
+ body: {
2897
+ batch: {
2898
+ display_name: `anymodel-batch-${Date.now()}`,
2899
+ input_config: {
2900
+ requests: {
2901
+ requests: batchRequests
2902
+ }
2903
+ }
2904
+ }
2905
+ }
2906
+ });
2907
+ const data = await res.json();
2908
+ const batchName = data.name || data.batch?.name;
2909
+ if (!batchName) {
2910
+ throw new AnyModelError(502, "No batch name in Google response", {
2911
+ provider_name: "google",
2912
+ raw: data
2913
+ });
2914
+ }
2915
+ return {
2916
+ providerBatchId: batchName,
2917
+ metadata: {
2918
+ model,
2919
+ total_requests: requests.length
2920
+ }
2921
+ };
2922
+ },
2923
+ async pollBatch(providerBatchId) {
2924
+ const res = await apiRequest(`/${providerBatchId}`);
2925
+ const data = await res.json();
2926
+ const state = data.state || "JOB_STATE_PENDING";
2927
+ const status = mapBatchState(state);
2928
+ const totalCount = data.totalCount || data.metadata?.total_requests || 0;
2929
+ const successCount = data.succeededCount || 0;
2930
+ const failedCount = data.failedCount || 0;
2931
+ return {
2932
+ status,
2933
+ total: totalCount || successCount + failedCount,
2934
+ completed: successCount,
2935
+ failed: failedCount
2936
+ };
2937
+ },
2938
+ async getBatchResults(providerBatchId) {
2939
+ const batchRes = await apiRequest(`/${providerBatchId}`);
2940
+ const batchData = await batchRes.json();
2941
+ const results = [];
2942
+ const model = batchData.metadata?.model || "unknown";
2943
+ if (batchData.response?.inlinedResponses) {
2944
+ for (const item of batchData.response.inlinedResponses) {
2945
+ const customId = item.metadata?.key || `request-${results.length}`;
2946
+ if (item.response) {
2947
+ results.push({
2948
+ custom_id: customId,
2949
+ status: "success",
2950
+ response: translateGeminiResponse(item.response, model),
2951
+ error: null
2952
+ });
2953
+ } else if (item.error) {
2954
+ results.push({
2955
+ custom_id: customId,
2956
+ status: "error",
2957
+ response: null,
2958
+ error: {
2959
+ code: item.error.code || 500,
2960
+ message: item.error.message || "Batch item failed"
2961
+ }
2962
+ });
2963
+ }
2964
+ }
2965
+ return results;
2966
+ }
2967
+ const responsesFile = batchData.response?.responsesFileName || batchData.outputConfig?.file_name;
2968
+ if (responsesFile) {
2969
+ const downloadUrl = `${GEMINI_API_BASE2}/${responsesFile}:download?alt=media`;
2970
+ const fileRes = await fetchWithTimeout(downloadUrl, {
2971
+ headers: { "x-goog-api-key": apiKey }
2972
+ });
2973
+ if (!fileRes.ok) {
2974
+ throw new AnyModelError(502, "Failed to download batch results file", {
2975
+ provider_name: "google"
2976
+ });
2977
+ }
2978
+ const text = await fileRes.text();
2979
+ for (const line of text.trim().split("\n")) {
2980
+ if (!line) continue;
2981
+ const item = JSON.parse(line);
2982
+ const customId = item.key || item.metadata?.key || `request-${results.length}`;
2983
+ if (item.response) {
2984
+ results.push({
2985
+ custom_id: customId,
2986
+ status: "success",
2987
+ response: translateGeminiResponse(item.response, model),
2988
+ error: null
2989
+ });
2990
+ } else if (item.error) {
2991
+ results.push({
2992
+ custom_id: customId,
2993
+ status: "error",
2994
+ response: null,
2995
+ error: {
2996
+ code: item.error.code || 500,
2997
+ message: item.error.message || "Batch item failed"
2998
+ }
2999
+ });
3000
+ }
3001
+ }
3002
+ }
3003
+ return results;
3004
+ },
3005
+ async cancelBatch(providerBatchId) {
3006
+ await apiRequest(`/${providerBatchId}:cancel`, { method: "POST" });
3007
+ }
3008
+ };
3009
+ }
3010
+
2665
3011
  // src/client.ts
2666
3012
  var AnyModel = class {
2667
3013
  registry;
@@ -2677,6 +3023,7 @@ var AnyModel = class {
2677
3023
  constructor(config = {}) {
2678
3024
  this.config = resolveConfig(config);
2679
3025
  this.registry = new ProviderRegistry();
3026
+ setDefaultTimeout((this.config.defaults?.timeout ?? 120) * 1e3);
2680
3027
  if (this.config.io) {
2681
3028
  configureFsIO(this.config.io);
2682
3029
  }
@@ -2797,6 +3144,10 @@ var AnyModel = class {
2797
3144
  if (anthropicKey) {
2798
3145
  this.batchManager.registerBatchAdapter("anthropic", createAnthropicBatchAdapter(anthropicKey));
2799
3146
  }
3147
+ const googleKey = config.google?.apiKey || process.env.GOOGLE_API_KEY;
3148
+ if (googleKey) {
3149
+ this.batchManager.registerBatchAdapter("google", createGoogleBatchAdapter(googleKey));
3150
+ }
2800
3151
  }
2801
3152
  applyDefaults(request) {
2802
3153
  const defaults = this.config.defaults;