@probeo/anymodel 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -3
- package/dist/cli.cjs +368 -17
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +368 -17
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +377 -17
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +19 -1
- package/dist/index.d.ts +19 -1
- package/dist/index.js +374 -17
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -85,6 +85,18 @@ perplexity/sonar-pro
|
|
|
85
85
|
ollama/llama3.3
|
|
86
86
|
```
|
|
87
87
|
|
|
88
|
+
### Flex Pricing (OpenAI)
|
|
89
|
+
|
|
90
|
+
Get 50% off OpenAI requests with flexible latency:
|
|
91
|
+
|
|
92
|
+
```typescript
|
|
93
|
+
const response = await client.chat.completions.create({
|
|
94
|
+
model: "openai/gpt-4o",
|
|
95
|
+
messages: [{ role: "user", content: "Hello!" }],
|
|
96
|
+
service_tier: "flex",
|
|
97
|
+
});
|
|
98
|
+
```
|
|
99
|
+
|
|
88
100
|
## Fallback Routing
|
|
89
101
|
|
|
90
102
|
Try multiple models in order. If one fails, the next is attempted:
|
|
@@ -148,7 +160,7 @@ const response = await client.chat.completions.create({
|
|
|
148
160
|
|
|
149
161
|
## Batch Processing
|
|
150
162
|
|
|
151
|
-
Process many requests with native provider batch APIs or concurrent fallback. OpenAI and
|
|
163
|
+
Process many requests with native provider batch APIs or concurrent fallback. OpenAI, Anthropic, and Google batches are processed server-side — OpenAI at 50% cost, Anthropic with async processing for up to 10K requests, Google at 50% cost via `batchGenerateContent`. Other providers fall back to concurrent execution automatically.
|
|
152
164
|
|
|
153
165
|
### Submit and wait
|
|
154
166
|
|
|
@@ -169,7 +181,7 @@ for (const result of results.results) {
|
|
|
169
181
|
|
|
170
182
|
### Submit now, check later
|
|
171
183
|
|
|
172
|
-
Submit a batch and get back an ID immediately — no need to keep the process running for native batches (OpenAI, Anthropic):
|
|
184
|
+
Submit a batch and get back an ID immediately — no need to keep the process running for native batches (OpenAI, Anthropic, Google):
|
|
173
185
|
|
|
174
186
|
```typescript
|
|
175
187
|
// Submit and get the batch ID
|
|
@@ -232,6 +244,10 @@ const results = await client.batches.createAndPoll(request, {
|
|
|
232
244
|
|
|
233
245
|
Batches are persisted to `./.anymodel/batches/` in the current working directory and survive process restarts.
|
|
234
246
|
|
|
247
|
+
### Automatic max_tokens
|
|
248
|
+
|
|
249
|
+
When `max_tokens` isn't set on a batch request, anymodel automatically calculates a safe value per-request based on the estimated input size and the model's context window. This prevents truncated responses and context overflow errors without requiring you to hand-tune each request in a large batch. The estimation uses a ~4 chars/token heuristic with a 5% safety margin — conservative enough to avoid overflows, lightweight enough to skip tokenizer dependencies.
|
|
250
|
+
|
|
235
251
|
## Models Endpoint
|
|
236
252
|
|
|
237
253
|
```typescript
|
|
@@ -265,6 +281,7 @@ const client = new AnyModel({
|
|
|
265
281
|
temperature: 0.7,
|
|
266
282
|
max_tokens: 4096,
|
|
267
283
|
retries: 2,
|
|
284
|
+
timeout: 120, // HTTP timeout in seconds (default: 120 = 2 min, flex: 600 = 10 min)
|
|
268
285
|
},
|
|
269
286
|
});
|
|
270
287
|
|
|
@@ -426,6 +443,7 @@ npx tsx examples/basic.ts batch
|
|
|
426
443
|
- **Retries**: Automatic retry with exponential backoff on 429/502/503 errors (configurable via `defaults.retries`)
|
|
427
444
|
- **Rate limit tracking**: Per-provider rate limit state, automatically skips rate-limited providers during fallback routing
|
|
428
445
|
- **Parameter stripping**: Unsupported parameters are automatically removed before forwarding to providers
|
|
446
|
+
- **Smart batch defaults**: Automatic `max_tokens` estimation per-request in batches — calculates safe values from input size and model context limits, preventing truncation and overflow without manual tuning
|
|
429
447
|
- **High-volume IO**: All batch file operations use concurrency-limited async queues with atomic durable writes (temp file + fsync + rename) to prevent corruption on crash. Defaults: 20 concurrent reads, 10 concurrent writes — configurable via `io.readConcurrency` and `io.writeConcurrency`
|
|
430
448
|
|
|
431
449
|
## Roadmap
|
|
@@ -433,7 +451,7 @@ npx tsx examples/basic.ts batch
|
|
|
433
451
|
- [ ] **A/B testing** — split routing (% traffic to each model) and compare mode (same request to multiple models, return all responses with stats)
|
|
434
452
|
- [ ] **Cost tracking** — per-request and aggregate cost calculation from provider pricing
|
|
435
453
|
- [ ] **Caching** — response caching with configurable TTL for identical requests
|
|
436
|
-
- [x] **Native batch APIs** — OpenAI Batch API (JSONL upload, 50% cost)
|
|
454
|
+
- [x] **Native batch APIs** — OpenAI Batch API (JSONL upload, 50% cost), Anthropic Message Batches (10K requests, async), and Google Gemini Batch (50% cost). Auto-detects provider and routes to native API, falls back to concurrent for other providers
|
|
437
455
|
- [ ] **Result export** — `saveResults()` to write batch results to a configurable output directory
|
|
438
456
|
- [ ] **Prompt logging** — optional request/response logging for debugging and evaluation
|
|
439
457
|
|
package/dist/cli.cjs
CHANGED
|
@@ -508,6 +508,25 @@ var Router = class {
|
|
|
508
508
|
}
|
|
509
509
|
};
|
|
510
510
|
|
|
511
|
+
// src/utils/fetch-with-timeout.ts
|
|
512
|
+
var _defaultTimeout = 12e4;
|
|
513
|
+
var _flexTimeout = 6e5;
|
|
514
|
+
function setDefaultTimeout(ms) {
|
|
515
|
+
_defaultTimeout = ms;
|
|
516
|
+
}
|
|
517
|
+
function getFlexTimeout() {
|
|
518
|
+
return _flexTimeout;
|
|
519
|
+
}
|
|
520
|
+
function fetchWithTimeout(url, init, timeoutMs) {
|
|
521
|
+
const ms = timeoutMs ?? _defaultTimeout;
|
|
522
|
+
const signal = AbortSignal.timeout(ms);
|
|
523
|
+
if (init?.signal) {
|
|
524
|
+
const combined = AbortSignal.any([signal, init.signal]);
|
|
525
|
+
return fetch(url, { ...init, signal: combined });
|
|
526
|
+
}
|
|
527
|
+
return fetch(url, { ...init, signal });
|
|
528
|
+
}
|
|
529
|
+
|
|
511
530
|
// src/providers/openai.ts
|
|
512
531
|
var OPENAI_API_BASE = "https://api.openai.com/v1";
|
|
513
532
|
var SUPPORTED_PARAMS = /* @__PURE__ */ new Set([
|
|
@@ -525,19 +544,20 @@ var SUPPORTED_PARAMS = /* @__PURE__ */ new Set([
|
|
|
525
544
|
"tools",
|
|
526
545
|
"tool_choice",
|
|
527
546
|
"user",
|
|
528
|
-
"logit_bias"
|
|
547
|
+
"logit_bias",
|
|
548
|
+
"service_tier"
|
|
529
549
|
]);
|
|
530
550
|
function createOpenAIAdapter(apiKey, baseURL) {
|
|
531
551
|
const base = baseURL || OPENAI_API_BASE;
|
|
532
|
-
async function makeRequest(path2, body, method = "POST") {
|
|
533
|
-
const res = await
|
|
552
|
+
async function makeRequest(path2, body, method = "POST", timeoutMs) {
|
|
553
|
+
const res = await fetchWithTimeout(`${base}${path2}`, {
|
|
534
554
|
method,
|
|
535
555
|
headers: {
|
|
536
556
|
"Content-Type": "application/json",
|
|
537
557
|
"Authorization": `Bearer ${apiKey}`
|
|
538
558
|
},
|
|
539
559
|
body: body ? JSON.stringify(body) : void 0
|
|
540
|
-
});
|
|
560
|
+
}, timeoutMs);
|
|
541
561
|
if (!res.ok) {
|
|
542
562
|
let errorBody;
|
|
543
563
|
try {
|
|
@@ -585,6 +605,7 @@ function createOpenAIAdapter(apiKey, baseURL) {
|
|
|
585
605
|
if (request.tools !== void 0) body.tools = request.tools;
|
|
586
606
|
if (request.tool_choice !== void 0) body.tool_choice = request.tool_choice;
|
|
587
607
|
if (request.user !== void 0) body.user = request.user;
|
|
608
|
+
if (request.service_tier !== void 0) body.service_tier = request.service_tier;
|
|
588
609
|
return body;
|
|
589
610
|
}
|
|
590
611
|
const adapter = {
|
|
@@ -686,13 +707,15 @@ function createOpenAIAdapter(apiKey, baseURL) {
|
|
|
686
707
|
},
|
|
687
708
|
async sendRequest(request) {
|
|
688
709
|
const body = buildRequestBody(request);
|
|
689
|
-
const
|
|
710
|
+
const timeout = request.service_tier === "flex" ? getFlexTimeout() : void 0;
|
|
711
|
+
const res = await makeRequest("/chat/completions", body, "POST", timeout);
|
|
690
712
|
const json = await res.json();
|
|
691
713
|
return adapter.translateResponse(json);
|
|
692
714
|
},
|
|
693
715
|
async sendStreamingRequest(request) {
|
|
694
716
|
const body = buildRequestBody({ ...request, stream: true });
|
|
695
|
-
const
|
|
717
|
+
const timeout = request.service_tier === "flex" ? getFlexTimeout() : void 0;
|
|
718
|
+
const res = await makeRequest("/chat/completions", body, "POST", timeout);
|
|
696
719
|
if (!res.body) {
|
|
697
720
|
throw new AnyModelError(502, "No response body for streaming request", {
|
|
698
721
|
provider_name: "openai"
|
|
@@ -739,7 +762,7 @@ var FALLBACK_MODELS = [
|
|
|
739
762
|
];
|
|
740
763
|
function createAnthropicAdapter(apiKey) {
|
|
741
764
|
async function makeRequest(path2, body, stream = false) {
|
|
742
|
-
const res = await
|
|
765
|
+
const res = await fetchWithTimeout(`${ANTHROPIC_API_BASE}${path2}`, {
|
|
743
766
|
method: "POST",
|
|
744
767
|
headers: {
|
|
745
768
|
"Content-Type": "application/json",
|
|
@@ -996,7 +1019,7 @@ ${body.system}` : jsonInstruction;
|
|
|
996
1019
|
},
|
|
997
1020
|
async listModels() {
|
|
998
1021
|
try {
|
|
999
|
-
const res = await
|
|
1022
|
+
const res = await fetchWithTimeout(`${ANTHROPIC_API_BASE}/models`, {
|
|
1000
1023
|
method: "GET",
|
|
1001
1024
|
headers: {
|
|
1002
1025
|
"x-api-key": apiKey,
|
|
@@ -1281,7 +1304,7 @@ function createGoogleAdapter(apiKey) {
|
|
|
1281
1304
|
},
|
|
1282
1305
|
async listModels() {
|
|
1283
1306
|
try {
|
|
1284
|
-
const res = await
|
|
1307
|
+
const res = await fetchWithTimeout(`${GEMINI_API_BASE}/models?key=${apiKey}`);
|
|
1285
1308
|
if (!res.ok) return FALLBACK_MODELS2;
|
|
1286
1309
|
const data = await res.json();
|
|
1287
1310
|
const models = data.models || [];
|
|
@@ -1316,12 +1339,12 @@ function createGoogleAdapter(apiKey) {
|
|
|
1316
1339
|
return SUPPORTED_PARAMS3.has(param);
|
|
1317
1340
|
},
|
|
1318
1341
|
supportsBatch() {
|
|
1319
|
-
return
|
|
1342
|
+
return true;
|
|
1320
1343
|
},
|
|
1321
1344
|
async sendRequest(request) {
|
|
1322
1345
|
const body = translateRequest(request);
|
|
1323
1346
|
const url = getModelEndpoint(request.model, false);
|
|
1324
|
-
const res = await
|
|
1347
|
+
const res = await fetchWithTimeout(url, {
|
|
1325
1348
|
method: "POST",
|
|
1326
1349
|
headers: { "Content-Type": "application/json" },
|
|
1327
1350
|
body: JSON.stringify(body)
|
|
@@ -1344,7 +1367,7 @@ function createGoogleAdapter(apiKey) {
|
|
|
1344
1367
|
async sendStreamingRequest(request) {
|
|
1345
1368
|
const body = translateRequest(request);
|
|
1346
1369
|
const url = getModelEndpoint(request.model, true);
|
|
1347
|
-
const res = await
|
|
1370
|
+
const res = await fetchWithTimeout(url, {
|
|
1348
1371
|
method: "POST",
|
|
1349
1372
|
headers: { "Content-Type": "application/json" },
|
|
1350
1373
|
body: JSON.stringify(body)
|
|
@@ -1394,7 +1417,7 @@ var MODELS = [
|
|
|
1394
1417
|
];
|
|
1395
1418
|
function createPerplexityAdapter(apiKey) {
|
|
1396
1419
|
async function makeRequest(path2, body, method = "POST") {
|
|
1397
|
-
const res = await
|
|
1420
|
+
const res = await fetchWithTimeout(`${PERPLEXITY_API_BASE}${path2}`, {
|
|
1398
1421
|
method,
|
|
1399
1422
|
headers: {
|
|
1400
1423
|
"Content-Type": "application/json",
|
|
@@ -2266,6 +2289,51 @@ var BatchManager = class {
|
|
|
2266
2289
|
}
|
|
2267
2290
|
};
|
|
2268
2291
|
|
|
2292
|
+
// src/utils/token-estimate.ts
|
|
2293
|
+
var CHARS_PER_TOKEN2 = 4;
|
|
2294
|
+
var MODEL_LIMITS = [
|
|
2295
|
+
// OpenAI
|
|
2296
|
+
{ pattern: "gpt-4o-mini", limit: { contextLength: 128e3, maxCompletionTokens: 16384 } },
|
|
2297
|
+
{ pattern: "gpt-4o", limit: { contextLength: 128e3, maxCompletionTokens: 16384 } },
|
|
2298
|
+
{ pattern: "gpt-4-turbo", limit: { contextLength: 128e3, maxCompletionTokens: 4096 } },
|
|
2299
|
+
{ pattern: "gpt-3.5-turbo", limit: { contextLength: 16385, maxCompletionTokens: 4096 } },
|
|
2300
|
+
{ pattern: "o1", limit: { contextLength: 2e5, maxCompletionTokens: 1e5 } },
|
|
2301
|
+
{ pattern: "o3", limit: { contextLength: 2e5, maxCompletionTokens: 1e5 } },
|
|
2302
|
+
{ pattern: "o4-mini", limit: { contextLength: 2e5, maxCompletionTokens: 1e5 } },
|
|
2303
|
+
// Anthropic
|
|
2304
|
+
{ pattern: "claude-opus-4", limit: { contextLength: 2e5, maxCompletionTokens: 32768 } },
|
|
2305
|
+
{ pattern: "claude-sonnet-4", limit: { contextLength: 2e5, maxCompletionTokens: 16384 } },
|
|
2306
|
+
{ pattern: "claude-haiku-4", limit: { contextLength: 2e5, maxCompletionTokens: 8192 } },
|
|
2307
|
+
{ pattern: "claude-3.5-sonnet", limit: { contextLength: 2e5, maxCompletionTokens: 8192 } },
|
|
2308
|
+
{ pattern: "claude-3-opus", limit: { contextLength: 2e5, maxCompletionTokens: 4096 } },
|
|
2309
|
+
// Google
|
|
2310
|
+
{ pattern: "gemini-2.5-pro", limit: { contextLength: 1048576, maxCompletionTokens: 65536 } },
|
|
2311
|
+
{ pattern: "gemini-2.5-flash", limit: { contextLength: 1048576, maxCompletionTokens: 65536 } },
|
|
2312
|
+
{ pattern: "gemini-2.0-flash", limit: { contextLength: 1048576, maxCompletionTokens: 65536 } },
|
|
2313
|
+
{ pattern: "gemini-1.5-pro", limit: { contextLength: 2097152, maxCompletionTokens: 8192 } },
|
|
2314
|
+
{ pattern: "gemini-1.5-flash", limit: { contextLength: 1048576, maxCompletionTokens: 8192 } }
|
|
2315
|
+
];
|
|
2316
|
+
var DEFAULT_LIMIT = { contextLength: 128e3, maxCompletionTokens: 4096 };
|
|
2317
|
+
function getModelLimits(model) {
|
|
2318
|
+
const bare = model.includes("/") ? model.slice(model.indexOf("/") + 1) : model;
|
|
2319
|
+
for (const entry of MODEL_LIMITS) {
|
|
2320
|
+
if (bare.startsWith(entry.pattern) || bare.includes(entry.pattern)) {
|
|
2321
|
+
return entry.limit;
|
|
2322
|
+
}
|
|
2323
|
+
}
|
|
2324
|
+
return DEFAULT_LIMIT;
|
|
2325
|
+
}
|
|
2326
|
+
function resolveMaxTokens(model, messages, userMaxTokens) {
|
|
2327
|
+
if (userMaxTokens !== void 0) return userMaxTokens;
|
|
2328
|
+
const inputChars = JSON.stringify(messages).length;
|
|
2329
|
+
const estimatedInput = Math.ceil(inputChars / CHARS_PER_TOKEN2);
|
|
2330
|
+
const estimatedWithMargin = Math.ceil(estimatedInput * 1.05);
|
|
2331
|
+
const limits = getModelLimits(model);
|
|
2332
|
+
const available = limits.contextLength - estimatedWithMargin;
|
|
2333
|
+
const result = Math.min(limits.maxCompletionTokens, available);
|
|
2334
|
+
return Math.max(1, result);
|
|
2335
|
+
}
|
|
2336
|
+
|
|
2269
2337
|
// src/providers/openai-batch.ts
|
|
2270
2338
|
var OPENAI_API_BASE2 = "https://api.openai.com/v1";
|
|
2271
2339
|
function createOpenAIBatchAdapter(apiKey) {
|
|
@@ -2280,7 +2348,7 @@ function createOpenAIBatchAdapter(apiKey) {
|
|
|
2280
2348
|
headers["Content-Type"] = "application/json";
|
|
2281
2349
|
fetchBody = JSON.stringify(options.body);
|
|
2282
2350
|
}
|
|
2283
|
-
const res = await
|
|
2351
|
+
const res = await fetchWithTimeout(`${OPENAI_API_BASE2}${path2}`, {
|
|
2284
2352
|
method: options.method || "GET",
|
|
2285
2353
|
headers,
|
|
2286
2354
|
body: fetchBody
|
|
@@ -2306,7 +2374,7 @@ function createOpenAIBatchAdapter(apiKey) {
|
|
|
2306
2374
|
model,
|
|
2307
2375
|
messages: req.messages
|
|
2308
2376
|
};
|
|
2309
|
-
|
|
2377
|
+
body.max_tokens = req.max_tokens !== void 0 ? req.max_tokens : resolveMaxTokens(model, req.messages);
|
|
2310
2378
|
if (req.temperature !== void 0) body.temperature = req.temperature;
|
|
2311
2379
|
if (req.top_p !== void 0) body.top_p = req.top_p;
|
|
2312
2380
|
if (req.stop !== void 0) body.stop = req.stop;
|
|
@@ -2465,7 +2533,7 @@ function createAnthropicBatchAdapter(apiKey) {
|
|
|
2465
2533
|
"anthropic-version": ANTHROPIC_VERSION2,
|
|
2466
2534
|
"Content-Type": "application/json"
|
|
2467
2535
|
};
|
|
2468
|
-
const res = await
|
|
2536
|
+
const res = await fetchWithTimeout(`${ANTHROPIC_API_BASE2}${path2}`, {
|
|
2469
2537
|
method: options.method || "GET",
|
|
2470
2538
|
headers,
|
|
2471
2539
|
body: options.body ? JSON.stringify(options.body) : void 0
|
|
@@ -2488,7 +2556,7 @@ function createAnthropicBatchAdapter(apiKey) {
|
|
|
2488
2556
|
function translateToAnthropicParams(model, req) {
|
|
2489
2557
|
const params = {
|
|
2490
2558
|
model,
|
|
2491
|
-
max_tokens: req.max_tokens || DEFAULT_MAX_TOKENS2
|
|
2559
|
+
max_tokens: resolveMaxTokens(model, req.messages, req.max_tokens || DEFAULT_MAX_TOKENS2)
|
|
2492
2560
|
};
|
|
2493
2561
|
const systemMessages = req.messages.filter((m) => m.role === "system");
|
|
2494
2562
|
const nonSystemMessages = req.messages.filter((m) => m.role !== "system");
|
|
@@ -2662,6 +2730,284 @@ ${params.system}` : jsonInstruction;
|
|
|
2662
2730
|
};
|
|
2663
2731
|
}
|
|
2664
2732
|
|
|
2733
|
+
// src/providers/google-batch.ts
|
|
2734
|
+
var GEMINI_API_BASE2 = "https://generativelanguage.googleapis.com/v1beta";
|
|
2735
|
+
function createGoogleBatchAdapter(apiKey) {
|
|
2736
|
+
async function apiRequest(path2, options = {}) {
|
|
2737
|
+
const headers = {
|
|
2738
|
+
"Content-Type": "application/json",
|
|
2739
|
+
"x-goog-api-key": apiKey
|
|
2740
|
+
};
|
|
2741
|
+
const res = await fetchWithTimeout(`${GEMINI_API_BASE2}${path2}`, {
|
|
2742
|
+
method: options.method || "GET",
|
|
2743
|
+
headers,
|
|
2744
|
+
body: options.body ? JSON.stringify(options.body) : void 0
|
|
2745
|
+
});
|
|
2746
|
+
if (!res.ok) {
|
|
2747
|
+
let errorBody;
|
|
2748
|
+
try {
|
|
2749
|
+
errorBody = await res.json();
|
|
2750
|
+
} catch {
|
|
2751
|
+
errorBody = { message: res.statusText };
|
|
2752
|
+
}
|
|
2753
|
+
const msg = errorBody?.error?.message || errorBody?.message || res.statusText;
|
|
2754
|
+
throw new AnyModelError(res.status >= 500 ? 502 : res.status, msg, {
|
|
2755
|
+
provider_name: "google",
|
|
2756
|
+
raw: errorBody
|
|
2757
|
+
});
|
|
2758
|
+
}
|
|
2759
|
+
return res;
|
|
2760
|
+
}
|
|
2761
|
+
function translateRequestToGemini(model, req) {
|
|
2762
|
+
const body = {};
|
|
2763
|
+
const systemMessages = req.messages.filter((m) => m.role === "system");
|
|
2764
|
+
const nonSystemMessages = req.messages.filter((m) => m.role !== "system");
|
|
2765
|
+
if (systemMessages.length > 0) {
|
|
2766
|
+
body.systemInstruction = {
|
|
2767
|
+
parts: [{ text: systemMessages.map((m) => typeof m.content === "string" ? m.content : "").join("\n") }]
|
|
2768
|
+
};
|
|
2769
|
+
}
|
|
2770
|
+
body.contents = nonSystemMessages.map((m) => ({
|
|
2771
|
+
role: m.role === "assistant" ? "model" : "user",
|
|
2772
|
+
parts: typeof m.content === "string" ? [{ text: m.content }] : Array.isArray(m.content) ? m.content.map((p) => p.type === "text" ? { text: p.text } : { text: "" }) : [{ text: "" }]
|
|
2773
|
+
}));
|
|
2774
|
+
const generationConfig = {};
|
|
2775
|
+
if (req.temperature !== void 0) generationConfig.temperature = req.temperature;
|
|
2776
|
+
generationConfig.maxOutputTokens = req.max_tokens !== void 0 ? req.max_tokens : resolveMaxTokens(model, req.messages);
|
|
2777
|
+
if (req.top_p !== void 0) generationConfig.topP = req.top_p;
|
|
2778
|
+
if (req.top_k !== void 0) generationConfig.topK = req.top_k;
|
|
2779
|
+
if (req.stop !== void 0) {
|
|
2780
|
+
generationConfig.stopSequences = Array.isArray(req.stop) ? req.stop : [req.stop];
|
|
2781
|
+
}
|
|
2782
|
+
if (req.response_format) {
|
|
2783
|
+
if (req.response_format.type === "json_object") {
|
|
2784
|
+
generationConfig.responseMimeType = "application/json";
|
|
2785
|
+
} else if (req.response_format.type === "json_schema") {
|
|
2786
|
+
generationConfig.responseMimeType = "application/json";
|
|
2787
|
+
generationConfig.responseSchema = req.response_format.json_schema?.schema;
|
|
2788
|
+
}
|
|
2789
|
+
}
|
|
2790
|
+
if (Object.keys(generationConfig).length > 0) {
|
|
2791
|
+
body.generationConfig = generationConfig;
|
|
2792
|
+
}
|
|
2793
|
+
if (req.tools && req.tools.length > 0) {
|
|
2794
|
+
body.tools = [{
|
|
2795
|
+
functionDeclarations: req.tools.map((t) => ({
|
|
2796
|
+
name: t.function.name,
|
|
2797
|
+
description: t.function.description || "",
|
|
2798
|
+
parameters: t.function.parameters || {}
|
|
2799
|
+
}))
|
|
2800
|
+
}];
|
|
2801
|
+
if (req.tool_choice) {
|
|
2802
|
+
if (req.tool_choice === "auto") {
|
|
2803
|
+
body.toolConfig = { functionCallingConfig: { mode: "AUTO" } };
|
|
2804
|
+
} else if (req.tool_choice === "required") {
|
|
2805
|
+
body.toolConfig = { functionCallingConfig: { mode: "ANY" } };
|
|
2806
|
+
} else if (req.tool_choice === "none") {
|
|
2807
|
+
body.toolConfig = { functionCallingConfig: { mode: "NONE" } };
|
|
2808
|
+
} else if (typeof req.tool_choice === "object") {
|
|
2809
|
+
body.toolConfig = {
|
|
2810
|
+
functionCallingConfig: {
|
|
2811
|
+
mode: "ANY",
|
|
2812
|
+
allowedFunctionNames: [req.tool_choice.function.name]
|
|
2813
|
+
}
|
|
2814
|
+
};
|
|
2815
|
+
}
|
|
2816
|
+
}
|
|
2817
|
+
}
|
|
2818
|
+
return body;
|
|
2819
|
+
}
|
|
2820
|
+
function mapFinishReason(reason) {
|
|
2821
|
+
switch (reason) {
|
|
2822
|
+
case "STOP":
|
|
2823
|
+
return "stop";
|
|
2824
|
+
case "MAX_TOKENS":
|
|
2825
|
+
return "length";
|
|
2826
|
+
case "SAFETY":
|
|
2827
|
+
return "content_filter";
|
|
2828
|
+
case "RECITATION":
|
|
2829
|
+
return "content_filter";
|
|
2830
|
+
default:
|
|
2831
|
+
return "stop";
|
|
2832
|
+
}
|
|
2833
|
+
}
|
|
2834
|
+
function translateGeminiResponse(response, model) {
|
|
2835
|
+
const candidate = response.candidates?.[0];
|
|
2836
|
+
let content = "";
|
|
2837
|
+
const toolCalls = [];
|
|
2838
|
+
for (const part of candidate?.content?.parts || []) {
|
|
2839
|
+
if (part.text) {
|
|
2840
|
+
content += part.text;
|
|
2841
|
+
} else if (part.functionCall) {
|
|
2842
|
+
toolCalls.push({
|
|
2843
|
+
id: generateId("call"),
|
|
2844
|
+
type: "function",
|
|
2845
|
+
function: {
|
|
2846
|
+
name: part.functionCall.name,
|
|
2847
|
+
arguments: JSON.stringify(part.functionCall.args || {})
|
|
2848
|
+
}
|
|
2849
|
+
});
|
|
2850
|
+
}
|
|
2851
|
+
}
|
|
2852
|
+
const message = { role: "assistant", content };
|
|
2853
|
+
if (toolCalls.length > 0) {
|
|
2854
|
+
message.tool_calls = toolCalls;
|
|
2855
|
+
}
|
|
2856
|
+
const finishReason = toolCalls.length > 0 ? "tool_calls" : mapFinishReason(candidate?.finishReason || "STOP");
|
|
2857
|
+
return {
|
|
2858
|
+
id: generateId(),
|
|
2859
|
+
object: "chat.completion",
|
|
2860
|
+
created: Math.floor(Date.now() / 1e3),
|
|
2861
|
+
model: `google/${model}`,
|
|
2862
|
+
choices: [{ index: 0, message, finish_reason: finishReason }],
|
|
2863
|
+
usage: {
|
|
2864
|
+
prompt_tokens: response.usageMetadata?.promptTokenCount || 0,
|
|
2865
|
+
completion_tokens: response.usageMetadata?.candidatesTokenCount || 0,
|
|
2866
|
+
total_tokens: response.usageMetadata?.totalTokenCount || 0
|
|
2867
|
+
}
|
|
2868
|
+
};
|
|
2869
|
+
}
|
|
2870
|
+
function mapBatchState(state) {
|
|
2871
|
+
switch (state) {
|
|
2872
|
+
case "JOB_STATE_PENDING":
|
|
2873
|
+
return "pending";
|
|
2874
|
+
case "JOB_STATE_RUNNING":
|
|
2875
|
+
return "processing";
|
|
2876
|
+
case "JOB_STATE_SUCCEEDED":
|
|
2877
|
+
return "completed";
|
|
2878
|
+
case "JOB_STATE_FAILED":
|
|
2879
|
+
return "failed";
|
|
2880
|
+
case "JOB_STATE_CANCELLED":
|
|
2881
|
+
return "cancelled";
|
|
2882
|
+
case "JOB_STATE_EXPIRED":
|
|
2883
|
+
return "failed";
|
|
2884
|
+
default:
|
|
2885
|
+
return "pending";
|
|
2886
|
+
}
|
|
2887
|
+
}
|
|
2888
|
+
return {
|
|
2889
|
+
async createBatch(model, requests, _options) {
|
|
2890
|
+
const batchRequests = requests.map((req) => ({
|
|
2891
|
+
request: translateRequestToGemini(model, req),
|
|
2892
|
+
metadata: { key: req.custom_id }
|
|
2893
|
+
}));
|
|
2894
|
+
const res = await apiRequest(`/models/${model}:batchGenerateContent`, {
|
|
2895
|
+
method: "POST",
|
|
2896
|
+
body: {
|
|
2897
|
+
batch: {
|
|
2898
|
+
display_name: `anymodel-batch-${Date.now()}`,
|
|
2899
|
+
input_config: {
|
|
2900
|
+
requests: {
|
|
2901
|
+
requests: batchRequests
|
|
2902
|
+
}
|
|
2903
|
+
}
|
|
2904
|
+
}
|
|
2905
|
+
}
|
|
2906
|
+
});
|
|
2907
|
+
const data = await res.json();
|
|
2908
|
+
const batchName = data.name || data.batch?.name;
|
|
2909
|
+
if (!batchName) {
|
|
2910
|
+
throw new AnyModelError(502, "No batch name in Google response", {
|
|
2911
|
+
provider_name: "google",
|
|
2912
|
+
raw: data
|
|
2913
|
+
});
|
|
2914
|
+
}
|
|
2915
|
+
return {
|
|
2916
|
+
providerBatchId: batchName,
|
|
2917
|
+
metadata: {
|
|
2918
|
+
model,
|
|
2919
|
+
total_requests: requests.length
|
|
2920
|
+
}
|
|
2921
|
+
};
|
|
2922
|
+
},
|
|
2923
|
+
async pollBatch(providerBatchId) {
|
|
2924
|
+
const res = await apiRequest(`/${providerBatchId}`);
|
|
2925
|
+
const data = await res.json();
|
|
2926
|
+
const state = data.state || "JOB_STATE_PENDING";
|
|
2927
|
+
const status = mapBatchState(state);
|
|
2928
|
+
const totalCount = data.totalCount || data.metadata?.total_requests || 0;
|
|
2929
|
+
const successCount = data.succeededCount || 0;
|
|
2930
|
+
const failedCount = data.failedCount || 0;
|
|
2931
|
+
return {
|
|
2932
|
+
status,
|
|
2933
|
+
total: totalCount || successCount + failedCount,
|
|
2934
|
+
completed: successCount,
|
|
2935
|
+
failed: failedCount
|
|
2936
|
+
};
|
|
2937
|
+
},
|
|
2938
|
+
async getBatchResults(providerBatchId) {
|
|
2939
|
+
const batchRes = await apiRequest(`/${providerBatchId}`);
|
|
2940
|
+
const batchData = await batchRes.json();
|
|
2941
|
+
const results = [];
|
|
2942
|
+
const model = batchData.metadata?.model || "unknown";
|
|
2943
|
+
if (batchData.response?.inlinedResponses) {
|
|
2944
|
+
for (const item of batchData.response.inlinedResponses) {
|
|
2945
|
+
const customId = item.metadata?.key || `request-${results.length}`;
|
|
2946
|
+
if (item.response) {
|
|
2947
|
+
results.push({
|
|
2948
|
+
custom_id: customId,
|
|
2949
|
+
status: "success",
|
|
2950
|
+
response: translateGeminiResponse(item.response, model),
|
|
2951
|
+
error: null
|
|
2952
|
+
});
|
|
2953
|
+
} else if (item.error) {
|
|
2954
|
+
results.push({
|
|
2955
|
+
custom_id: customId,
|
|
2956
|
+
status: "error",
|
|
2957
|
+
response: null,
|
|
2958
|
+
error: {
|
|
2959
|
+
code: item.error.code || 500,
|
|
2960
|
+
message: item.error.message || "Batch item failed"
|
|
2961
|
+
}
|
|
2962
|
+
});
|
|
2963
|
+
}
|
|
2964
|
+
}
|
|
2965
|
+
return results;
|
|
2966
|
+
}
|
|
2967
|
+
const responsesFile = batchData.response?.responsesFileName || batchData.outputConfig?.file_name;
|
|
2968
|
+
if (responsesFile) {
|
|
2969
|
+
const downloadUrl = `${GEMINI_API_BASE2}/${responsesFile}:download?alt=media`;
|
|
2970
|
+
const fileRes = await fetchWithTimeout(downloadUrl, {
|
|
2971
|
+
headers: { "x-goog-api-key": apiKey }
|
|
2972
|
+
});
|
|
2973
|
+
if (!fileRes.ok) {
|
|
2974
|
+
throw new AnyModelError(502, "Failed to download batch results file", {
|
|
2975
|
+
provider_name: "google"
|
|
2976
|
+
});
|
|
2977
|
+
}
|
|
2978
|
+
const text = await fileRes.text();
|
|
2979
|
+
for (const line of text.trim().split("\n")) {
|
|
2980
|
+
if (!line) continue;
|
|
2981
|
+
const item = JSON.parse(line);
|
|
2982
|
+
const customId = item.key || item.metadata?.key || `request-${results.length}`;
|
|
2983
|
+
if (item.response) {
|
|
2984
|
+
results.push({
|
|
2985
|
+
custom_id: customId,
|
|
2986
|
+
status: "success",
|
|
2987
|
+
response: translateGeminiResponse(item.response, model),
|
|
2988
|
+
error: null
|
|
2989
|
+
});
|
|
2990
|
+
} else if (item.error) {
|
|
2991
|
+
results.push({
|
|
2992
|
+
custom_id: customId,
|
|
2993
|
+
status: "error",
|
|
2994
|
+
response: null,
|
|
2995
|
+
error: {
|
|
2996
|
+
code: item.error.code || 500,
|
|
2997
|
+
message: item.error.message || "Batch item failed"
|
|
2998
|
+
}
|
|
2999
|
+
});
|
|
3000
|
+
}
|
|
3001
|
+
}
|
|
3002
|
+
}
|
|
3003
|
+
return results;
|
|
3004
|
+
},
|
|
3005
|
+
async cancelBatch(providerBatchId) {
|
|
3006
|
+
await apiRequest(`/${providerBatchId}:cancel`, { method: "POST" });
|
|
3007
|
+
}
|
|
3008
|
+
};
|
|
3009
|
+
}
|
|
3010
|
+
|
|
2665
3011
|
// src/client.ts
|
|
2666
3012
|
var AnyModel = class {
|
|
2667
3013
|
registry;
|
|
@@ -2677,6 +3023,7 @@ var AnyModel = class {
|
|
|
2677
3023
|
constructor(config = {}) {
|
|
2678
3024
|
this.config = resolveConfig(config);
|
|
2679
3025
|
this.registry = new ProviderRegistry();
|
|
3026
|
+
setDefaultTimeout((this.config.defaults?.timeout ?? 120) * 1e3);
|
|
2680
3027
|
if (this.config.io) {
|
|
2681
3028
|
configureFsIO(this.config.io);
|
|
2682
3029
|
}
|
|
@@ -2797,6 +3144,10 @@ var AnyModel = class {
|
|
|
2797
3144
|
if (anthropicKey) {
|
|
2798
3145
|
this.batchManager.registerBatchAdapter("anthropic", createAnthropicBatchAdapter(anthropicKey));
|
|
2799
3146
|
}
|
|
3147
|
+
const googleKey = config.google?.apiKey || process.env.GOOGLE_API_KEY;
|
|
3148
|
+
if (googleKey) {
|
|
3149
|
+
this.batchManager.registerBatchAdapter("google", createGoogleBatchAdapter(googleKey));
|
|
3150
|
+
}
|
|
2800
3151
|
}
|
|
2801
3152
|
applyDefaults(request) {
|
|
2802
3153
|
const defaults = this.config.defaults;
|