@probeo/anymodel 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -85,6 +85,18 @@ perplexity/sonar-pro
85
85
  ollama/llama3.3
86
86
  ```
87
87
 
88
+ ### Flex Pricing (OpenAI)
89
+
90
+ Get 50% off OpenAI requests with flexible latency:
91
+
92
+ ```typescript
93
+ const response = await client.chat.completions.create({
94
+ model: "openai/gpt-4o",
95
+ messages: [{ role: "user", content: "Hello!" }],
96
+ service_tier: "flex",
97
+ });
98
+ ```
99
+
88
100
  ## Fallback Routing
89
101
 
90
102
  Try multiple models in order. If one fails, the next is attempted:
@@ -148,7 +160,7 @@ const response = await client.chat.completions.create({
148
160
 
149
161
  ## Batch Processing
150
162
 
151
- Process many requests with native provider batch APIs or concurrent fallback. OpenAI and Anthropic batches are processed server-side — OpenAI at 50% cost, Anthropic with async processing for up to 10K requests. Other providers fall back to concurrent execution automatically.
163
+ Process many requests with native provider batch APIs or concurrent fallback. OpenAI, Anthropic, and Google batches are processed server-side — OpenAI at 50% cost, Anthropic with async processing for up to 10K requests, Google at 50% cost via `batchGenerateContent`. Other providers fall back to concurrent execution automatically.
152
164
 
153
165
  ### Submit and wait
154
166
 
@@ -169,7 +181,7 @@ for (const result of results.results) {
169
181
 
170
182
  ### Submit now, check later
171
183
 
172
- Submit a batch and get back an ID immediately — no need to keep the process running for native batches (OpenAI, Anthropic):
184
+ Submit a batch and get back an ID immediately — no need to keep the process running for native batches (OpenAI, Anthropic, Google):
173
185
 
174
186
  ```typescript
175
187
  // Submit and get the batch ID
@@ -232,6 +244,10 @@ const results = await client.batches.createAndPoll(request, {
232
244
 
233
245
  Batches are persisted to `./.anymodel/batches/` in the current working directory and survive process restarts.
234
246
 
247
+ ### Automatic max_tokens
248
+
249
+ When `max_tokens` isn't set on a batch request, anymodel automatically calculates a safe value per-request based on the estimated input size and the model's context window. This prevents truncated responses and context overflow errors without requiring you to hand-tune each request in a large batch. The estimation uses a ~4 chars/token heuristic with a 5% safety margin — conservative enough to avoid overflows, lightweight enough to skip tokenizer dependencies.
250
+
235
251
  ## Models Endpoint
236
252
 
237
253
  ```typescript
@@ -265,6 +281,7 @@ const client = new AnyModel({
265
281
  temperature: 0.7,
266
282
  max_tokens: 4096,
267
283
  retries: 2,
284
+ timeout: 120, // HTTP timeout in seconds (default: 120 = 2 min, flex: 600 = 10 min)
268
285
  },
269
286
  });
270
287
 
@@ -426,6 +443,8 @@ npx tsx examples/basic.ts batch
426
443
  - **Retries**: Automatic retry with exponential backoff on 429/502/503 errors (configurable via `defaults.retries`)
427
444
  - **Rate limit tracking**: Per-provider rate limit state, automatically skips rate-limited providers during fallback routing
428
445
  - **Parameter stripping**: Unsupported parameters are automatically removed before forwarding to providers
446
+ - **Smart batch defaults**: Automatic `max_tokens` estimation per-request in batches — calculates safe values from input size and model context limits, preventing truncation and overflow without manual tuning
447
+ - **Memory-efficient batching**: Concurrent batch requests are streamed from disk — only N requests (default 5) are in-flight at a time, making 10K+ request batches safe without memory spikes
429
448
  - **High-volume IO**: All batch file operations use concurrency-limited async queues with atomic durable writes (temp file + fsync + rename) to prevent corruption on crash. Defaults: 20 concurrent reads, 10 concurrent writes — configurable via `io.readConcurrency` and `io.writeConcurrency`
430
449
 
431
450
  ## Roadmap
@@ -433,7 +452,7 @@ npx tsx examples/basic.ts batch
433
452
  - [ ] **A/B testing** — split routing (% traffic to each model) and compare mode (same request to multiple models, return all responses with stats)
434
453
  - [ ] **Cost tracking** — per-request and aggregate cost calculation from provider pricing
435
454
  - [ ] **Caching** — response caching with configurable TTL for identical requests
436
- - [x] **Native batch APIs** — OpenAI Batch API (JSONL upload, 50% cost) and Anthropic Message Batches (10K requests, async). Auto-detects provider and routes to native API, falls back to concurrent for other providers
455
+ - [x] **Native batch APIs** — OpenAI Batch API (JSONL upload, 50% cost), Anthropic Message Batches (10K requests, async), and Google Gemini Batch (50% cost). Auto-detects provider and routes to native API, falls back to concurrent for other providers
437
456
  - [ ] **Result export** — `saveResults()` to write batch results to a configurable output directory
438
457
  - [ ] **Prompt logging** — optional request/response logging for debugging and evaluation
439
458
 
package/dist/cli.cjs CHANGED
@@ -508,6 +508,25 @@ var Router = class {
508
508
  }
509
509
  };
510
510
 
511
+ // src/utils/fetch-with-timeout.ts
512
+ var _defaultTimeout = 12e4;
513
+ var _flexTimeout = 6e5;
514
+ function setDefaultTimeout(ms) {
515
+ _defaultTimeout = ms;
516
+ }
517
+ function getFlexTimeout() {
518
+ return _flexTimeout;
519
+ }
520
+ function fetchWithTimeout(url, init, timeoutMs) {
521
+ const ms = timeoutMs ?? _defaultTimeout;
522
+ const signal = AbortSignal.timeout(ms);
523
+ if (init?.signal) {
524
+ const combined = AbortSignal.any([signal, init.signal]);
525
+ return fetch(url, { ...init, signal: combined });
526
+ }
527
+ return fetch(url, { ...init, signal });
528
+ }
529
+
511
530
  // src/providers/openai.ts
512
531
  var OPENAI_API_BASE = "https://api.openai.com/v1";
513
532
  var SUPPORTED_PARAMS = /* @__PURE__ */ new Set([
@@ -525,19 +544,20 @@ var SUPPORTED_PARAMS = /* @__PURE__ */ new Set([
525
544
  "tools",
526
545
  "tool_choice",
527
546
  "user",
528
- "logit_bias"
547
+ "logit_bias",
548
+ "service_tier"
529
549
  ]);
530
550
  function createOpenAIAdapter(apiKey, baseURL) {
531
551
  const base = baseURL || OPENAI_API_BASE;
532
- async function makeRequest(path2, body, method = "POST") {
533
- const res = await fetch(`${base}${path2}`, {
552
+ async function makeRequest(path2, body, method = "POST", timeoutMs) {
553
+ const res = await fetchWithTimeout(`${base}${path2}`, {
534
554
  method,
535
555
  headers: {
536
556
  "Content-Type": "application/json",
537
557
  "Authorization": `Bearer ${apiKey}`
538
558
  },
539
559
  body: body ? JSON.stringify(body) : void 0
540
- });
560
+ }, timeoutMs);
541
561
  if (!res.ok) {
542
562
  let errorBody;
543
563
  try {
@@ -585,6 +605,7 @@ function createOpenAIAdapter(apiKey, baseURL) {
585
605
  if (request.tools !== void 0) body.tools = request.tools;
586
606
  if (request.tool_choice !== void 0) body.tool_choice = request.tool_choice;
587
607
  if (request.user !== void 0) body.user = request.user;
608
+ if (request.service_tier !== void 0) body.service_tier = request.service_tier;
588
609
  return body;
589
610
  }
590
611
  const adapter = {
@@ -686,13 +707,15 @@ function createOpenAIAdapter(apiKey, baseURL) {
686
707
  },
687
708
  async sendRequest(request) {
688
709
  const body = buildRequestBody(request);
689
- const res = await makeRequest("/chat/completions", body);
710
+ const timeout = request.service_tier === "flex" ? getFlexTimeout() : void 0;
711
+ const res = await makeRequest("/chat/completions", body, "POST", timeout);
690
712
  const json = await res.json();
691
713
  return adapter.translateResponse(json);
692
714
  },
693
715
  async sendStreamingRequest(request) {
694
716
  const body = buildRequestBody({ ...request, stream: true });
695
- const res = await makeRequest("/chat/completions", body);
717
+ const timeout = request.service_tier === "flex" ? getFlexTimeout() : void 0;
718
+ const res = await makeRequest("/chat/completions", body, "POST", timeout);
696
719
  if (!res.body) {
697
720
  throw new AnyModelError(502, "No response body for streaming request", {
698
721
  provider_name: "openai"
@@ -739,7 +762,7 @@ var FALLBACK_MODELS = [
739
762
  ];
740
763
  function createAnthropicAdapter(apiKey) {
741
764
  async function makeRequest(path2, body, stream = false) {
742
- const res = await fetch(`${ANTHROPIC_API_BASE}${path2}`, {
765
+ const res = await fetchWithTimeout(`${ANTHROPIC_API_BASE}${path2}`, {
743
766
  method: "POST",
744
767
  headers: {
745
768
  "Content-Type": "application/json",
@@ -996,7 +1019,7 @@ ${body.system}` : jsonInstruction;
996
1019
  },
997
1020
  async listModels() {
998
1021
  try {
999
- const res = await fetch(`${ANTHROPIC_API_BASE}/models`, {
1022
+ const res = await fetchWithTimeout(`${ANTHROPIC_API_BASE}/models`, {
1000
1023
  method: "GET",
1001
1024
  headers: {
1002
1025
  "x-api-key": apiKey,
@@ -1281,7 +1304,7 @@ function createGoogleAdapter(apiKey) {
1281
1304
  },
1282
1305
  async listModels() {
1283
1306
  try {
1284
- const res = await fetch(`${GEMINI_API_BASE}/models?key=${apiKey}`);
1307
+ const res = await fetchWithTimeout(`${GEMINI_API_BASE}/models?key=${apiKey}`);
1285
1308
  if (!res.ok) return FALLBACK_MODELS2;
1286
1309
  const data = await res.json();
1287
1310
  const models = data.models || [];
@@ -1316,12 +1339,12 @@ function createGoogleAdapter(apiKey) {
1316
1339
  return SUPPORTED_PARAMS3.has(param);
1317
1340
  },
1318
1341
  supportsBatch() {
1319
- return false;
1342
+ return true;
1320
1343
  },
1321
1344
  async sendRequest(request) {
1322
1345
  const body = translateRequest(request);
1323
1346
  const url = getModelEndpoint(request.model, false);
1324
- const res = await fetch(url, {
1347
+ const res = await fetchWithTimeout(url, {
1325
1348
  method: "POST",
1326
1349
  headers: { "Content-Type": "application/json" },
1327
1350
  body: JSON.stringify(body)
@@ -1344,7 +1367,7 @@ function createGoogleAdapter(apiKey) {
1344
1367
  async sendStreamingRequest(request) {
1345
1368
  const body = translateRequest(request);
1346
1369
  const url = getModelEndpoint(request.model, true);
1347
- const res = await fetch(url, {
1370
+ const res = await fetchWithTimeout(url, {
1348
1371
  method: "POST",
1349
1372
  headers: { "Content-Type": "application/json" },
1350
1373
  body: JSON.stringify(body)
@@ -1394,7 +1417,7 @@ var MODELS = [
1394
1417
  ];
1395
1418
  function createPerplexityAdapter(apiKey) {
1396
1419
  async function makeRequest(path2, body, method = "POST") {
1397
- const res = await fetch(`${PERPLEXITY_API_BASE}${path2}`, {
1420
+ const res = await fetchWithTimeout(`${PERPLEXITY_API_BASE}${path2}`, {
1398
1421
  method,
1399
1422
  headers: {
1400
1423
  "Content-Type": "application/json",
@@ -1949,6 +1972,17 @@ var BatchStore = class {
1949
1972
  const entries = await readDirQueued(this.dir);
1950
1973
  return entries.filter((d) => d.isDirectory()).map((d) => d.name).sort();
1951
1974
  }
1975
+ /**
1976
+ * Stream requests from JSONL one line at a time (memory-efficient).
1977
+ */
1978
+ async *streamRequests(id) {
1979
+ const p = joinPath(this.batchDir(id), "requests.jsonl");
1980
+ if (!await fileExistsQueued(p)) return;
1981
+ const raw = await readFileQueued(p, "utf8");
1982
+ for (const line of raw.split("\n")) {
1983
+ if (line.trim()) yield JSON.parse(line);
1984
+ }
1985
+ }
1952
1986
  /**
1953
1987
  * Check if a batch exists.
1954
1988
  */
@@ -2013,7 +2047,7 @@ var BatchManager = class {
2013
2047
  this.processNativeBatch(id, request, native.adapter).catch(() => {
2014
2048
  });
2015
2049
  } else {
2016
- this.processConcurrentBatch(id, request).catch(() => {
2050
+ this.processConcurrentBatch(id, request.model, request.options).catch(() => {
2017
2051
  });
2018
2052
  }
2019
2053
  return batch;
@@ -2193,28 +2227,28 @@ var BatchManager = class {
2193
2227
  }
2194
2228
  /**
2195
2229
  * Process batch requests concurrently (fallback path).
2230
+ * Streams requests from disk to avoid holding them all in memory.
2196
2231
  */
2197
- async processConcurrentBatch(batchId, request) {
2232
+ async processConcurrentBatch(batchId, model, options) {
2198
2233
  const batch = await this.store.getMeta(batchId);
2199
2234
  if (!batch) return;
2200
2235
  batch.status = "processing";
2201
2236
  await this.store.updateMeta(batch);
2202
- const items = request.requests;
2203
2237
  const active = /* @__PURE__ */ new Set();
2204
2238
  const processItem = async (item) => {
2205
2239
  const current = await this.store.getMeta(batchId);
2206
2240
  if (current?.status === "cancelled") return;
2207
2241
  const chatRequest = {
2208
- model: request.model,
2242
+ model,
2209
2243
  messages: item.messages,
2210
- max_tokens: item.max_tokens ?? request.options?.max_tokens,
2211
- temperature: item.temperature ?? request.options?.temperature,
2212
- top_p: item.top_p ?? request.options?.top_p,
2213
- top_k: item.top_k ?? request.options?.top_k,
2214
- stop: item.stop ?? request.options?.stop,
2215
- response_format: item.response_format ?? request.options?.response_format,
2216
- tools: item.tools ?? request.options?.tools,
2217
- tool_choice: item.tool_choice ?? request.options?.tool_choice
2244
+ max_tokens: item.max_tokens ?? options?.max_tokens,
2245
+ temperature: item.temperature ?? options?.temperature,
2246
+ top_p: item.top_p ?? options?.top_p,
2247
+ top_k: item.top_k ?? options?.top_k,
2248
+ stop: item.stop ?? options?.stop,
2249
+ response_format: item.response_format ?? options?.response_format,
2250
+ tools: item.tools ?? options?.tools,
2251
+ tool_choice: item.tool_choice ?? options?.tool_choice
2218
2252
  };
2219
2253
  let result;
2220
2254
  try {
@@ -2245,7 +2279,7 @@ var BatchManager = class {
2245
2279
  await this.store.updateMeta(meta);
2246
2280
  }
2247
2281
  };
2248
- for (const item of items) {
2282
+ for await (const item of this.store.streamRequests(batchId)) {
2249
2283
  const current = await this.store.getMeta(batchId);
2250
2284
  if (current?.status === "cancelled") break;
2251
2285
  if (active.size >= this.concurrencyLimit) {
@@ -2266,6 +2300,51 @@ var BatchManager = class {
2266
2300
  }
2267
2301
  };
2268
2302
 
2303
+ // src/utils/token-estimate.ts
2304
+ var CHARS_PER_TOKEN2 = 4;
2305
+ var MODEL_LIMITS = [
2306
+ // OpenAI
2307
+ { pattern: "gpt-4o-mini", limit: { contextLength: 128e3, maxCompletionTokens: 16384 } },
2308
+ { pattern: "gpt-4o", limit: { contextLength: 128e3, maxCompletionTokens: 16384 } },
2309
+ { pattern: "gpt-4-turbo", limit: { contextLength: 128e3, maxCompletionTokens: 4096 } },
2310
+ { pattern: "gpt-3.5-turbo", limit: { contextLength: 16385, maxCompletionTokens: 4096 } },
2311
+ { pattern: "o1", limit: { contextLength: 2e5, maxCompletionTokens: 1e5 } },
2312
+ { pattern: "o3", limit: { contextLength: 2e5, maxCompletionTokens: 1e5 } },
2313
+ { pattern: "o4-mini", limit: { contextLength: 2e5, maxCompletionTokens: 1e5 } },
2314
+ // Anthropic
2315
+ { pattern: "claude-opus-4", limit: { contextLength: 2e5, maxCompletionTokens: 32768 } },
2316
+ { pattern: "claude-sonnet-4", limit: { contextLength: 2e5, maxCompletionTokens: 16384 } },
2317
+ { pattern: "claude-haiku-4", limit: { contextLength: 2e5, maxCompletionTokens: 8192 } },
2318
+ { pattern: "claude-3.5-sonnet", limit: { contextLength: 2e5, maxCompletionTokens: 8192 } },
2319
+ { pattern: "claude-3-opus", limit: { contextLength: 2e5, maxCompletionTokens: 4096 } },
2320
+ // Google
2321
+ { pattern: "gemini-2.5-pro", limit: { contextLength: 1048576, maxCompletionTokens: 65536 } },
2322
+ { pattern: "gemini-2.5-flash", limit: { contextLength: 1048576, maxCompletionTokens: 65536 } },
2323
+ { pattern: "gemini-2.0-flash", limit: { contextLength: 1048576, maxCompletionTokens: 65536 } },
2324
+ { pattern: "gemini-1.5-pro", limit: { contextLength: 2097152, maxCompletionTokens: 8192 } },
2325
+ { pattern: "gemini-1.5-flash", limit: { contextLength: 1048576, maxCompletionTokens: 8192 } }
2326
+ ];
2327
+ var DEFAULT_LIMIT = { contextLength: 128e3, maxCompletionTokens: 4096 };
2328
+ function getModelLimits(model) {
2329
+ const bare = model.includes("/") ? model.slice(model.indexOf("/") + 1) : model;
2330
+ for (const entry of MODEL_LIMITS) {
2331
+ if (bare.startsWith(entry.pattern) || bare.includes(entry.pattern)) {
2332
+ return entry.limit;
2333
+ }
2334
+ }
2335
+ return DEFAULT_LIMIT;
2336
+ }
2337
+ function resolveMaxTokens(model, messages, userMaxTokens) {
2338
+ if (userMaxTokens !== void 0) return userMaxTokens;
2339
+ const inputChars = JSON.stringify(messages).length;
2340
+ const estimatedInput = Math.ceil(inputChars / CHARS_PER_TOKEN2);
2341
+ const estimatedWithMargin = Math.ceil(estimatedInput * 1.05);
2342
+ const limits = getModelLimits(model);
2343
+ const available = limits.contextLength - estimatedWithMargin;
2344
+ const result = Math.min(limits.maxCompletionTokens, available);
2345
+ return Math.max(1, result);
2346
+ }
2347
+
2269
2348
  // src/providers/openai-batch.ts
2270
2349
  var OPENAI_API_BASE2 = "https://api.openai.com/v1";
2271
2350
  function createOpenAIBatchAdapter(apiKey) {
@@ -2280,7 +2359,7 @@ function createOpenAIBatchAdapter(apiKey) {
2280
2359
  headers["Content-Type"] = "application/json";
2281
2360
  fetchBody = JSON.stringify(options.body);
2282
2361
  }
2283
- const res = await fetch(`${OPENAI_API_BASE2}${path2}`, {
2362
+ const res = await fetchWithTimeout(`${OPENAI_API_BASE2}${path2}`, {
2284
2363
  method: options.method || "GET",
2285
2364
  headers,
2286
2365
  body: fetchBody
@@ -2306,7 +2385,7 @@ function createOpenAIBatchAdapter(apiKey) {
2306
2385
  model,
2307
2386
  messages: req.messages
2308
2387
  };
2309
- if (req.max_tokens !== void 0) body.max_tokens = req.max_tokens;
2388
+ body.max_tokens = req.max_tokens !== void 0 ? req.max_tokens : resolveMaxTokens(model, req.messages);
2310
2389
  if (req.temperature !== void 0) body.temperature = req.temperature;
2311
2390
  if (req.top_p !== void 0) body.top_p = req.top_p;
2312
2391
  if (req.stop !== void 0) body.stop = req.stop;
@@ -2465,7 +2544,7 @@ function createAnthropicBatchAdapter(apiKey) {
2465
2544
  "anthropic-version": ANTHROPIC_VERSION2,
2466
2545
  "Content-Type": "application/json"
2467
2546
  };
2468
- const res = await fetch(`${ANTHROPIC_API_BASE2}${path2}`, {
2547
+ const res = await fetchWithTimeout(`${ANTHROPIC_API_BASE2}${path2}`, {
2469
2548
  method: options.method || "GET",
2470
2549
  headers,
2471
2550
  body: options.body ? JSON.stringify(options.body) : void 0
@@ -2488,7 +2567,7 @@ function createAnthropicBatchAdapter(apiKey) {
2488
2567
  function translateToAnthropicParams(model, req) {
2489
2568
  const params = {
2490
2569
  model,
2491
- max_tokens: req.max_tokens || DEFAULT_MAX_TOKENS2
2570
+ max_tokens: resolveMaxTokens(model, req.messages, req.max_tokens || DEFAULT_MAX_TOKENS2)
2492
2571
  };
2493
2572
  const systemMessages = req.messages.filter((m) => m.role === "system");
2494
2573
  const nonSystemMessages = req.messages.filter((m) => m.role !== "system");
@@ -2662,6 +2741,284 @@ ${params.system}` : jsonInstruction;
2662
2741
  };
2663
2742
  }
2664
2743
 
2744
+ // src/providers/google-batch.ts
2745
+ var GEMINI_API_BASE2 = "https://generativelanguage.googleapis.com/v1beta";
2746
+ function createGoogleBatchAdapter(apiKey) {
2747
+ async function apiRequest(path2, options = {}) {
2748
+ const headers = {
2749
+ "Content-Type": "application/json",
2750
+ "x-goog-api-key": apiKey
2751
+ };
2752
+ const res = await fetchWithTimeout(`${GEMINI_API_BASE2}${path2}`, {
2753
+ method: options.method || "GET",
2754
+ headers,
2755
+ body: options.body ? JSON.stringify(options.body) : void 0
2756
+ });
2757
+ if (!res.ok) {
2758
+ let errorBody;
2759
+ try {
2760
+ errorBody = await res.json();
2761
+ } catch {
2762
+ errorBody = { message: res.statusText };
2763
+ }
2764
+ const msg = errorBody?.error?.message || errorBody?.message || res.statusText;
2765
+ throw new AnyModelError(res.status >= 500 ? 502 : res.status, msg, {
2766
+ provider_name: "google",
2767
+ raw: errorBody
2768
+ });
2769
+ }
2770
+ return res;
2771
+ }
2772
+ function translateRequestToGemini(model, req) {
2773
+ const body = {};
2774
+ const systemMessages = req.messages.filter((m) => m.role === "system");
2775
+ const nonSystemMessages = req.messages.filter((m) => m.role !== "system");
2776
+ if (systemMessages.length > 0) {
2777
+ body.systemInstruction = {
2778
+ parts: [{ text: systemMessages.map((m) => typeof m.content === "string" ? m.content : "").join("\n") }]
2779
+ };
2780
+ }
2781
+ body.contents = nonSystemMessages.map((m) => ({
2782
+ role: m.role === "assistant" ? "model" : "user",
2783
+ parts: typeof m.content === "string" ? [{ text: m.content }] : Array.isArray(m.content) ? m.content.map((p) => p.type === "text" ? { text: p.text } : { text: "" }) : [{ text: "" }]
2784
+ }));
2785
+ const generationConfig = {};
2786
+ if (req.temperature !== void 0) generationConfig.temperature = req.temperature;
2787
+ generationConfig.maxOutputTokens = req.max_tokens !== void 0 ? req.max_tokens : resolveMaxTokens(model, req.messages);
2788
+ if (req.top_p !== void 0) generationConfig.topP = req.top_p;
2789
+ if (req.top_k !== void 0) generationConfig.topK = req.top_k;
2790
+ if (req.stop !== void 0) {
2791
+ generationConfig.stopSequences = Array.isArray(req.stop) ? req.stop : [req.stop];
2792
+ }
2793
+ if (req.response_format) {
2794
+ if (req.response_format.type === "json_object") {
2795
+ generationConfig.responseMimeType = "application/json";
2796
+ } else if (req.response_format.type === "json_schema") {
2797
+ generationConfig.responseMimeType = "application/json";
2798
+ generationConfig.responseSchema = req.response_format.json_schema?.schema;
2799
+ }
2800
+ }
2801
+ if (Object.keys(generationConfig).length > 0) {
2802
+ body.generationConfig = generationConfig;
2803
+ }
2804
+ if (req.tools && req.tools.length > 0) {
2805
+ body.tools = [{
2806
+ functionDeclarations: req.tools.map((t) => ({
2807
+ name: t.function.name,
2808
+ description: t.function.description || "",
2809
+ parameters: t.function.parameters || {}
2810
+ }))
2811
+ }];
2812
+ if (req.tool_choice) {
2813
+ if (req.tool_choice === "auto") {
2814
+ body.toolConfig = { functionCallingConfig: { mode: "AUTO" } };
2815
+ } else if (req.tool_choice === "required") {
2816
+ body.toolConfig = { functionCallingConfig: { mode: "ANY" } };
2817
+ } else if (req.tool_choice === "none") {
2818
+ body.toolConfig = { functionCallingConfig: { mode: "NONE" } };
2819
+ } else if (typeof req.tool_choice === "object") {
2820
+ body.toolConfig = {
2821
+ functionCallingConfig: {
2822
+ mode: "ANY",
2823
+ allowedFunctionNames: [req.tool_choice.function.name]
2824
+ }
2825
+ };
2826
+ }
2827
+ }
2828
+ }
2829
+ return body;
2830
+ }
2831
+ function mapFinishReason(reason) {
2832
+ switch (reason) {
2833
+ case "STOP":
2834
+ return "stop";
2835
+ case "MAX_TOKENS":
2836
+ return "length";
2837
+ case "SAFETY":
2838
+ return "content_filter";
2839
+ case "RECITATION":
2840
+ return "content_filter";
2841
+ default:
2842
+ return "stop";
2843
+ }
2844
+ }
2845
+ function translateGeminiResponse(response, model) {
2846
+ const candidate = response.candidates?.[0];
2847
+ let content = "";
2848
+ const toolCalls = [];
2849
+ for (const part of candidate?.content?.parts || []) {
2850
+ if (part.text) {
2851
+ content += part.text;
2852
+ } else if (part.functionCall) {
2853
+ toolCalls.push({
2854
+ id: generateId("call"),
2855
+ type: "function",
2856
+ function: {
2857
+ name: part.functionCall.name,
2858
+ arguments: JSON.stringify(part.functionCall.args || {})
2859
+ }
2860
+ });
2861
+ }
2862
+ }
2863
+ const message = { role: "assistant", content };
2864
+ if (toolCalls.length > 0) {
2865
+ message.tool_calls = toolCalls;
2866
+ }
2867
+ const finishReason = toolCalls.length > 0 ? "tool_calls" : mapFinishReason(candidate?.finishReason || "STOP");
2868
+ return {
2869
+ id: generateId(),
2870
+ object: "chat.completion",
2871
+ created: Math.floor(Date.now() / 1e3),
2872
+ model: `google/${model}`,
2873
+ choices: [{ index: 0, message, finish_reason: finishReason }],
2874
+ usage: {
2875
+ prompt_tokens: response.usageMetadata?.promptTokenCount || 0,
2876
+ completion_tokens: response.usageMetadata?.candidatesTokenCount || 0,
2877
+ total_tokens: response.usageMetadata?.totalTokenCount || 0
2878
+ }
2879
+ };
2880
+ }
2881
+ function mapBatchState(state) {
2882
+ switch (state) {
2883
+ case "JOB_STATE_PENDING":
2884
+ return "pending";
2885
+ case "JOB_STATE_RUNNING":
2886
+ return "processing";
2887
+ case "JOB_STATE_SUCCEEDED":
2888
+ return "completed";
2889
+ case "JOB_STATE_FAILED":
2890
+ return "failed";
2891
+ case "JOB_STATE_CANCELLED":
2892
+ return "cancelled";
2893
+ case "JOB_STATE_EXPIRED":
2894
+ return "failed";
2895
+ default:
2896
+ return "pending";
2897
+ }
2898
+ }
2899
+ return {
2900
+ async createBatch(model, requests, _options) {
2901
+ const batchRequests = requests.map((req) => ({
2902
+ request: translateRequestToGemini(model, req),
2903
+ metadata: { key: req.custom_id }
2904
+ }));
2905
+ const res = await apiRequest(`/models/${model}:batchGenerateContent`, {
2906
+ method: "POST",
2907
+ body: {
2908
+ batch: {
2909
+ display_name: `anymodel-batch-${Date.now()}`,
2910
+ input_config: {
2911
+ requests: {
2912
+ requests: batchRequests
2913
+ }
2914
+ }
2915
+ }
2916
+ }
2917
+ });
2918
+ const data = await res.json();
2919
+ const batchName = data.name || data.batch?.name;
2920
+ if (!batchName) {
2921
+ throw new AnyModelError(502, "No batch name in Google response", {
2922
+ provider_name: "google",
2923
+ raw: data
2924
+ });
2925
+ }
2926
+ return {
2927
+ providerBatchId: batchName,
2928
+ metadata: {
2929
+ model,
2930
+ total_requests: requests.length
2931
+ }
2932
+ };
2933
+ },
2934
+ async pollBatch(providerBatchId) {
2935
+ const res = await apiRequest(`/${providerBatchId}`);
2936
+ const data = await res.json();
2937
+ const state = data.state || "JOB_STATE_PENDING";
2938
+ const status = mapBatchState(state);
2939
+ const totalCount = data.totalCount || data.metadata?.total_requests || 0;
2940
+ const successCount = data.succeededCount || 0;
2941
+ const failedCount = data.failedCount || 0;
2942
+ return {
2943
+ status,
2944
+ total: totalCount || successCount + failedCount,
2945
+ completed: successCount,
2946
+ failed: failedCount
2947
+ };
2948
+ },
2949
+ async getBatchResults(providerBatchId) {
2950
+ const batchRes = await apiRequest(`/${providerBatchId}`);
2951
+ const batchData = await batchRes.json();
2952
+ const results = [];
2953
+ const model = batchData.metadata?.model || "unknown";
2954
+ if (batchData.response?.inlinedResponses) {
2955
+ for (const item of batchData.response.inlinedResponses) {
2956
+ const customId = item.metadata?.key || `request-${results.length}`;
2957
+ if (item.response) {
2958
+ results.push({
2959
+ custom_id: customId,
2960
+ status: "success",
2961
+ response: translateGeminiResponse(item.response, model),
2962
+ error: null
2963
+ });
2964
+ } else if (item.error) {
2965
+ results.push({
2966
+ custom_id: customId,
2967
+ status: "error",
2968
+ response: null,
2969
+ error: {
2970
+ code: item.error.code || 500,
2971
+ message: item.error.message || "Batch item failed"
2972
+ }
2973
+ });
2974
+ }
2975
+ }
2976
+ return results;
2977
+ }
2978
+ const responsesFile = batchData.response?.responsesFileName || batchData.outputConfig?.file_name;
2979
+ if (responsesFile) {
2980
+ const downloadUrl = `${GEMINI_API_BASE2}/${responsesFile}:download?alt=media`;
2981
+ const fileRes = await fetchWithTimeout(downloadUrl, {
2982
+ headers: { "x-goog-api-key": apiKey }
2983
+ });
2984
+ if (!fileRes.ok) {
2985
+ throw new AnyModelError(502, "Failed to download batch results file", {
2986
+ provider_name: "google"
2987
+ });
2988
+ }
2989
+ const text = await fileRes.text();
2990
+ for (const line of text.trim().split("\n")) {
2991
+ if (!line) continue;
2992
+ const item = JSON.parse(line);
2993
+ const customId = item.key || item.metadata?.key || `request-${results.length}`;
2994
+ if (item.response) {
2995
+ results.push({
2996
+ custom_id: customId,
2997
+ status: "success",
2998
+ response: translateGeminiResponse(item.response, model),
2999
+ error: null
3000
+ });
3001
+ } else if (item.error) {
3002
+ results.push({
3003
+ custom_id: customId,
3004
+ status: "error",
3005
+ response: null,
3006
+ error: {
3007
+ code: item.error.code || 500,
3008
+ message: item.error.message || "Batch item failed"
3009
+ }
3010
+ });
3011
+ }
3012
+ }
3013
+ }
3014
+ return results;
3015
+ },
3016
+ async cancelBatch(providerBatchId) {
3017
+ await apiRequest(`/${providerBatchId}:cancel`, { method: "POST" });
3018
+ }
3019
+ };
3020
+ }
3021
+
2665
3022
  // src/client.ts
2666
3023
  var AnyModel = class {
2667
3024
  registry;
@@ -2677,6 +3034,7 @@ var AnyModel = class {
2677
3034
  constructor(config = {}) {
2678
3035
  this.config = resolveConfig(config);
2679
3036
  this.registry = new ProviderRegistry();
3037
+ setDefaultTimeout((this.config.defaults?.timeout ?? 120) * 1e3);
2680
3038
  if (this.config.io) {
2681
3039
  configureFsIO(this.config.io);
2682
3040
  }
@@ -2797,6 +3155,10 @@ var AnyModel = class {
2797
3155
  if (anthropicKey) {
2798
3156
  this.batchManager.registerBatchAdapter("anthropic", createAnthropicBatchAdapter(anthropicKey));
2799
3157
  }
3158
+ const googleKey = config.google?.apiKey || process.env.GOOGLE_API_KEY;
3159
+ if (googleKey) {
3160
+ this.batchManager.registerBatchAdapter("google", createGoogleBatchAdapter(googleKey));
3161
+ }
2800
3162
  }
2801
3163
  applyDefaults(request) {
2802
3164
  const defaults = this.config.defaults;