@probeo/anymodel 0.4.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -3
- package/dist/cli.cjs +392 -30
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +392 -30
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +401 -30
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +24 -1
- package/dist/index.d.ts +24 -1
- package/dist/index.js +398 -30
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -85,6 +85,18 @@ perplexity/sonar-pro
|
|
|
85
85
|
ollama/llama3.3
|
|
86
86
|
```
|
|
87
87
|
|
|
88
|
+
### Flex Pricing (OpenAI)
|
|
89
|
+
|
|
90
|
+
Get 50% off OpenAI requests with flexible latency:
|
|
91
|
+
|
|
92
|
+
```typescript
|
|
93
|
+
const response = await client.chat.completions.create({
|
|
94
|
+
model: "openai/gpt-4o",
|
|
95
|
+
messages: [{ role: "user", content: "Hello!" }],
|
|
96
|
+
service_tier: "flex",
|
|
97
|
+
});
|
|
98
|
+
```
|
|
99
|
+
|
|
88
100
|
## Fallback Routing
|
|
89
101
|
|
|
90
102
|
Try multiple models in order. If one fails, the next is attempted:
|
|
@@ -148,7 +160,7 @@ const response = await client.chat.completions.create({
|
|
|
148
160
|
|
|
149
161
|
## Batch Processing
|
|
150
162
|
|
|
151
|
-
Process many requests with native provider batch APIs or concurrent fallback. OpenAI and
|
|
163
|
+
Process many requests with native provider batch APIs or concurrent fallback. OpenAI, Anthropic, and Google batches are processed server-side — OpenAI at 50% cost, Anthropic with async processing for up to 10K requests, Google at 50% cost via `batchGenerateContent`. Other providers fall back to concurrent execution automatically.
|
|
152
164
|
|
|
153
165
|
### Submit and wait
|
|
154
166
|
|
|
@@ -169,7 +181,7 @@ for (const result of results.results) {
|
|
|
169
181
|
|
|
170
182
|
### Submit now, check later
|
|
171
183
|
|
|
172
|
-
Submit a batch and get back an ID immediately — no need to keep the process running for native batches (OpenAI, Anthropic):
|
|
184
|
+
Submit a batch and get back an ID immediately — no need to keep the process running for native batches (OpenAI, Anthropic, Google):
|
|
173
185
|
|
|
174
186
|
```typescript
|
|
175
187
|
// Submit and get the batch ID
|
|
@@ -232,6 +244,10 @@ const results = await client.batches.createAndPoll(request, {
|
|
|
232
244
|
|
|
233
245
|
Batches are persisted to `./.anymodel/batches/` in the current working directory and survive process restarts.
|
|
234
246
|
|
|
247
|
+
### Automatic max_tokens
|
|
248
|
+
|
|
249
|
+
When `max_tokens` isn't set on a batch request, anymodel automatically calculates a safe value per-request based on the estimated input size and the model's context window. This prevents truncated responses and context overflow errors without requiring you to hand-tune each request in a large batch. The estimation uses a ~4 chars/token heuristic with a 5% safety margin — conservative enough to avoid overflows, lightweight enough to skip tokenizer dependencies.
|
|
250
|
+
|
|
235
251
|
## Models Endpoint
|
|
236
252
|
|
|
237
253
|
```typescript
|
|
@@ -265,6 +281,7 @@ const client = new AnyModel({
|
|
|
265
281
|
temperature: 0.7,
|
|
266
282
|
max_tokens: 4096,
|
|
267
283
|
retries: 2,
|
|
284
|
+
timeout: 120, // HTTP timeout in seconds (default: 120 = 2 min, flex: 600 = 10 min)
|
|
268
285
|
},
|
|
269
286
|
});
|
|
270
287
|
|
|
@@ -426,6 +443,8 @@ npx tsx examples/basic.ts batch
|
|
|
426
443
|
- **Retries**: Automatic retry with exponential backoff on 429/502/503 errors (configurable via `defaults.retries`)
|
|
427
444
|
- **Rate limit tracking**: Per-provider rate limit state, automatically skips rate-limited providers during fallback routing
|
|
428
445
|
- **Parameter stripping**: Unsupported parameters are automatically removed before forwarding to providers
|
|
446
|
+
- **Smart batch defaults**: Automatic `max_tokens` estimation per-request in batches — calculates safe values from input size and model context limits, preventing truncation and overflow without manual tuning
|
|
447
|
+
- **Memory-efficient batching**: Concurrent batch requests are streamed from disk — only N requests (default 5) are in-flight at a time, making 10K+ request batches safe without memory spikes
|
|
429
448
|
- **High-volume IO**: All batch file operations use concurrency-limited async queues with atomic durable writes (temp file + fsync + rename) to prevent corruption on crash. Defaults: 20 concurrent reads, 10 concurrent writes — configurable via `io.readConcurrency` and `io.writeConcurrency`
|
|
430
449
|
|
|
431
450
|
## Roadmap
|
|
@@ -433,7 +452,7 @@ npx tsx examples/basic.ts batch
|
|
|
433
452
|
- [ ] **A/B testing** — split routing (% traffic to each model) and compare mode (same request to multiple models, return all responses with stats)
|
|
434
453
|
- [ ] **Cost tracking** — per-request and aggregate cost calculation from provider pricing
|
|
435
454
|
- [ ] **Caching** — response caching with configurable TTL for identical requests
|
|
436
|
-
- [x] **Native batch APIs** — OpenAI Batch API (JSONL upload, 50% cost)
|
|
455
|
+
- [x] **Native batch APIs** — OpenAI Batch API (JSONL upload, 50% cost), Anthropic Message Batches (10K requests, async), and Google Gemini Batch (50% cost). Auto-detects provider and routes to native API, falls back to concurrent for other providers
|
|
437
456
|
- [ ] **Result export** — `saveResults()` to write batch results to a configurable output directory
|
|
438
457
|
- [ ] **Prompt logging** — optional request/response logging for debugging and evaluation
|
|
439
458
|
|
package/dist/cli.cjs
CHANGED
|
@@ -508,6 +508,25 @@ var Router = class {
|
|
|
508
508
|
}
|
|
509
509
|
};
|
|
510
510
|
|
|
511
|
+
// src/utils/fetch-with-timeout.ts
|
|
512
|
+
var _defaultTimeout = 12e4;
|
|
513
|
+
var _flexTimeout = 6e5;
|
|
514
|
+
function setDefaultTimeout(ms) {
|
|
515
|
+
_defaultTimeout = ms;
|
|
516
|
+
}
|
|
517
|
+
function getFlexTimeout() {
|
|
518
|
+
return _flexTimeout;
|
|
519
|
+
}
|
|
520
|
+
function fetchWithTimeout(url, init, timeoutMs) {
|
|
521
|
+
const ms = timeoutMs ?? _defaultTimeout;
|
|
522
|
+
const signal = AbortSignal.timeout(ms);
|
|
523
|
+
if (init?.signal) {
|
|
524
|
+
const combined = AbortSignal.any([signal, init.signal]);
|
|
525
|
+
return fetch(url, { ...init, signal: combined });
|
|
526
|
+
}
|
|
527
|
+
return fetch(url, { ...init, signal });
|
|
528
|
+
}
|
|
529
|
+
|
|
511
530
|
// src/providers/openai.ts
|
|
512
531
|
var OPENAI_API_BASE = "https://api.openai.com/v1";
|
|
513
532
|
var SUPPORTED_PARAMS = /* @__PURE__ */ new Set([
|
|
@@ -525,19 +544,20 @@ var SUPPORTED_PARAMS = /* @__PURE__ */ new Set([
|
|
|
525
544
|
"tools",
|
|
526
545
|
"tool_choice",
|
|
527
546
|
"user",
|
|
528
|
-
"logit_bias"
|
|
547
|
+
"logit_bias",
|
|
548
|
+
"service_tier"
|
|
529
549
|
]);
|
|
530
550
|
function createOpenAIAdapter(apiKey, baseURL) {
|
|
531
551
|
const base = baseURL || OPENAI_API_BASE;
|
|
532
|
-
async function makeRequest(path2, body, method = "POST") {
|
|
533
|
-
const res = await
|
|
552
|
+
async function makeRequest(path2, body, method = "POST", timeoutMs) {
|
|
553
|
+
const res = await fetchWithTimeout(`${base}${path2}`, {
|
|
534
554
|
method,
|
|
535
555
|
headers: {
|
|
536
556
|
"Content-Type": "application/json",
|
|
537
557
|
"Authorization": `Bearer ${apiKey}`
|
|
538
558
|
},
|
|
539
559
|
body: body ? JSON.stringify(body) : void 0
|
|
540
|
-
});
|
|
560
|
+
}, timeoutMs);
|
|
541
561
|
if (!res.ok) {
|
|
542
562
|
let errorBody;
|
|
543
563
|
try {
|
|
@@ -585,6 +605,7 @@ function createOpenAIAdapter(apiKey, baseURL) {
|
|
|
585
605
|
if (request.tools !== void 0) body.tools = request.tools;
|
|
586
606
|
if (request.tool_choice !== void 0) body.tool_choice = request.tool_choice;
|
|
587
607
|
if (request.user !== void 0) body.user = request.user;
|
|
608
|
+
if (request.service_tier !== void 0) body.service_tier = request.service_tier;
|
|
588
609
|
return body;
|
|
589
610
|
}
|
|
590
611
|
const adapter = {
|
|
@@ -686,13 +707,15 @@ function createOpenAIAdapter(apiKey, baseURL) {
|
|
|
686
707
|
},
|
|
687
708
|
async sendRequest(request) {
|
|
688
709
|
const body = buildRequestBody(request);
|
|
689
|
-
const
|
|
710
|
+
const timeout = request.service_tier === "flex" ? getFlexTimeout() : void 0;
|
|
711
|
+
const res = await makeRequest("/chat/completions", body, "POST", timeout);
|
|
690
712
|
const json = await res.json();
|
|
691
713
|
return adapter.translateResponse(json);
|
|
692
714
|
},
|
|
693
715
|
async sendStreamingRequest(request) {
|
|
694
716
|
const body = buildRequestBody({ ...request, stream: true });
|
|
695
|
-
const
|
|
717
|
+
const timeout = request.service_tier === "flex" ? getFlexTimeout() : void 0;
|
|
718
|
+
const res = await makeRequest("/chat/completions", body, "POST", timeout);
|
|
696
719
|
if (!res.body) {
|
|
697
720
|
throw new AnyModelError(502, "No response body for streaming request", {
|
|
698
721
|
provider_name: "openai"
|
|
@@ -739,7 +762,7 @@ var FALLBACK_MODELS = [
|
|
|
739
762
|
];
|
|
740
763
|
function createAnthropicAdapter(apiKey) {
|
|
741
764
|
async function makeRequest(path2, body, stream = false) {
|
|
742
|
-
const res = await
|
|
765
|
+
const res = await fetchWithTimeout(`${ANTHROPIC_API_BASE}${path2}`, {
|
|
743
766
|
method: "POST",
|
|
744
767
|
headers: {
|
|
745
768
|
"Content-Type": "application/json",
|
|
@@ -996,7 +1019,7 @@ ${body.system}` : jsonInstruction;
|
|
|
996
1019
|
},
|
|
997
1020
|
async listModels() {
|
|
998
1021
|
try {
|
|
999
|
-
const res = await
|
|
1022
|
+
const res = await fetchWithTimeout(`${ANTHROPIC_API_BASE}/models`, {
|
|
1000
1023
|
method: "GET",
|
|
1001
1024
|
headers: {
|
|
1002
1025
|
"x-api-key": apiKey,
|
|
@@ -1281,7 +1304,7 @@ function createGoogleAdapter(apiKey) {
|
|
|
1281
1304
|
},
|
|
1282
1305
|
async listModels() {
|
|
1283
1306
|
try {
|
|
1284
|
-
const res = await
|
|
1307
|
+
const res = await fetchWithTimeout(`${GEMINI_API_BASE}/models?key=${apiKey}`);
|
|
1285
1308
|
if (!res.ok) return FALLBACK_MODELS2;
|
|
1286
1309
|
const data = await res.json();
|
|
1287
1310
|
const models = data.models || [];
|
|
@@ -1316,12 +1339,12 @@ function createGoogleAdapter(apiKey) {
|
|
|
1316
1339
|
return SUPPORTED_PARAMS3.has(param);
|
|
1317
1340
|
},
|
|
1318
1341
|
supportsBatch() {
|
|
1319
|
-
return
|
|
1342
|
+
return true;
|
|
1320
1343
|
},
|
|
1321
1344
|
async sendRequest(request) {
|
|
1322
1345
|
const body = translateRequest(request);
|
|
1323
1346
|
const url = getModelEndpoint(request.model, false);
|
|
1324
|
-
const res = await
|
|
1347
|
+
const res = await fetchWithTimeout(url, {
|
|
1325
1348
|
method: "POST",
|
|
1326
1349
|
headers: { "Content-Type": "application/json" },
|
|
1327
1350
|
body: JSON.stringify(body)
|
|
@@ -1344,7 +1367,7 @@ function createGoogleAdapter(apiKey) {
|
|
|
1344
1367
|
async sendStreamingRequest(request) {
|
|
1345
1368
|
const body = translateRequest(request);
|
|
1346
1369
|
const url = getModelEndpoint(request.model, true);
|
|
1347
|
-
const res = await
|
|
1370
|
+
const res = await fetchWithTimeout(url, {
|
|
1348
1371
|
method: "POST",
|
|
1349
1372
|
headers: { "Content-Type": "application/json" },
|
|
1350
1373
|
body: JSON.stringify(body)
|
|
@@ -1394,7 +1417,7 @@ var MODELS = [
|
|
|
1394
1417
|
];
|
|
1395
1418
|
function createPerplexityAdapter(apiKey) {
|
|
1396
1419
|
async function makeRequest(path2, body, method = "POST") {
|
|
1397
|
-
const res = await
|
|
1420
|
+
const res = await fetchWithTimeout(`${PERPLEXITY_API_BASE}${path2}`, {
|
|
1398
1421
|
method,
|
|
1399
1422
|
headers: {
|
|
1400
1423
|
"Content-Type": "application/json",
|
|
@@ -1949,6 +1972,17 @@ var BatchStore = class {
|
|
|
1949
1972
|
const entries = await readDirQueued(this.dir);
|
|
1950
1973
|
return entries.filter((d) => d.isDirectory()).map((d) => d.name).sort();
|
|
1951
1974
|
}
|
|
1975
|
+
/**
|
|
1976
|
+
* Stream requests from JSONL one line at a time (memory-efficient).
|
|
1977
|
+
*/
|
|
1978
|
+
async *streamRequests(id) {
|
|
1979
|
+
const p = joinPath(this.batchDir(id), "requests.jsonl");
|
|
1980
|
+
if (!await fileExistsQueued(p)) return;
|
|
1981
|
+
const raw = await readFileQueued(p, "utf8");
|
|
1982
|
+
for (const line of raw.split("\n")) {
|
|
1983
|
+
if (line.trim()) yield JSON.parse(line);
|
|
1984
|
+
}
|
|
1985
|
+
}
|
|
1952
1986
|
/**
|
|
1953
1987
|
* Check if a batch exists.
|
|
1954
1988
|
*/
|
|
@@ -2013,7 +2047,7 @@ var BatchManager = class {
|
|
|
2013
2047
|
this.processNativeBatch(id, request, native.adapter).catch(() => {
|
|
2014
2048
|
});
|
|
2015
2049
|
} else {
|
|
2016
|
-
this.processConcurrentBatch(id, request).catch(() => {
|
|
2050
|
+
this.processConcurrentBatch(id, request.model, request.options).catch(() => {
|
|
2017
2051
|
});
|
|
2018
2052
|
}
|
|
2019
2053
|
return batch;
|
|
@@ -2193,28 +2227,28 @@ var BatchManager = class {
|
|
|
2193
2227
|
}
|
|
2194
2228
|
/**
|
|
2195
2229
|
* Process batch requests concurrently (fallback path).
|
|
2230
|
+
* Streams requests from disk to avoid holding them all in memory.
|
|
2196
2231
|
*/
|
|
2197
|
-
async processConcurrentBatch(batchId,
|
|
2232
|
+
async processConcurrentBatch(batchId, model, options) {
|
|
2198
2233
|
const batch = await this.store.getMeta(batchId);
|
|
2199
2234
|
if (!batch) return;
|
|
2200
2235
|
batch.status = "processing";
|
|
2201
2236
|
await this.store.updateMeta(batch);
|
|
2202
|
-
const items = request.requests;
|
|
2203
2237
|
const active = /* @__PURE__ */ new Set();
|
|
2204
2238
|
const processItem = async (item) => {
|
|
2205
2239
|
const current = await this.store.getMeta(batchId);
|
|
2206
2240
|
if (current?.status === "cancelled") return;
|
|
2207
2241
|
const chatRequest = {
|
|
2208
|
-
model
|
|
2242
|
+
model,
|
|
2209
2243
|
messages: item.messages,
|
|
2210
|
-
max_tokens: item.max_tokens ??
|
|
2211
|
-
temperature: item.temperature ??
|
|
2212
|
-
top_p: item.top_p ??
|
|
2213
|
-
top_k: item.top_k ??
|
|
2214
|
-
stop: item.stop ??
|
|
2215
|
-
response_format: item.response_format ??
|
|
2216
|
-
tools: item.tools ??
|
|
2217
|
-
tool_choice: item.tool_choice ??
|
|
2244
|
+
max_tokens: item.max_tokens ?? options?.max_tokens,
|
|
2245
|
+
temperature: item.temperature ?? options?.temperature,
|
|
2246
|
+
top_p: item.top_p ?? options?.top_p,
|
|
2247
|
+
top_k: item.top_k ?? options?.top_k,
|
|
2248
|
+
stop: item.stop ?? options?.stop,
|
|
2249
|
+
response_format: item.response_format ?? options?.response_format,
|
|
2250
|
+
tools: item.tools ?? options?.tools,
|
|
2251
|
+
tool_choice: item.tool_choice ?? options?.tool_choice
|
|
2218
2252
|
};
|
|
2219
2253
|
let result;
|
|
2220
2254
|
try {
|
|
@@ -2245,7 +2279,7 @@ var BatchManager = class {
|
|
|
2245
2279
|
await this.store.updateMeta(meta);
|
|
2246
2280
|
}
|
|
2247
2281
|
};
|
|
2248
|
-
for (const item of
|
|
2282
|
+
for await (const item of this.store.streamRequests(batchId)) {
|
|
2249
2283
|
const current = await this.store.getMeta(batchId);
|
|
2250
2284
|
if (current?.status === "cancelled") break;
|
|
2251
2285
|
if (active.size >= this.concurrencyLimit) {
|
|
@@ -2266,6 +2300,51 @@ var BatchManager = class {
|
|
|
2266
2300
|
}
|
|
2267
2301
|
};
|
|
2268
2302
|
|
|
2303
|
+
// src/utils/token-estimate.ts
|
|
2304
|
+
var CHARS_PER_TOKEN2 = 4;
|
|
2305
|
+
var MODEL_LIMITS = [
|
|
2306
|
+
// OpenAI
|
|
2307
|
+
{ pattern: "gpt-4o-mini", limit: { contextLength: 128e3, maxCompletionTokens: 16384 } },
|
|
2308
|
+
{ pattern: "gpt-4o", limit: { contextLength: 128e3, maxCompletionTokens: 16384 } },
|
|
2309
|
+
{ pattern: "gpt-4-turbo", limit: { contextLength: 128e3, maxCompletionTokens: 4096 } },
|
|
2310
|
+
{ pattern: "gpt-3.5-turbo", limit: { contextLength: 16385, maxCompletionTokens: 4096 } },
|
|
2311
|
+
{ pattern: "o1", limit: { contextLength: 2e5, maxCompletionTokens: 1e5 } },
|
|
2312
|
+
{ pattern: "o3", limit: { contextLength: 2e5, maxCompletionTokens: 1e5 } },
|
|
2313
|
+
{ pattern: "o4-mini", limit: { contextLength: 2e5, maxCompletionTokens: 1e5 } },
|
|
2314
|
+
// Anthropic
|
|
2315
|
+
{ pattern: "claude-opus-4", limit: { contextLength: 2e5, maxCompletionTokens: 32768 } },
|
|
2316
|
+
{ pattern: "claude-sonnet-4", limit: { contextLength: 2e5, maxCompletionTokens: 16384 } },
|
|
2317
|
+
{ pattern: "claude-haiku-4", limit: { contextLength: 2e5, maxCompletionTokens: 8192 } },
|
|
2318
|
+
{ pattern: "claude-3.5-sonnet", limit: { contextLength: 2e5, maxCompletionTokens: 8192 } },
|
|
2319
|
+
{ pattern: "claude-3-opus", limit: { contextLength: 2e5, maxCompletionTokens: 4096 } },
|
|
2320
|
+
// Google
|
|
2321
|
+
{ pattern: "gemini-2.5-pro", limit: { contextLength: 1048576, maxCompletionTokens: 65536 } },
|
|
2322
|
+
{ pattern: "gemini-2.5-flash", limit: { contextLength: 1048576, maxCompletionTokens: 65536 } },
|
|
2323
|
+
{ pattern: "gemini-2.0-flash", limit: { contextLength: 1048576, maxCompletionTokens: 65536 } },
|
|
2324
|
+
{ pattern: "gemini-1.5-pro", limit: { contextLength: 2097152, maxCompletionTokens: 8192 } },
|
|
2325
|
+
{ pattern: "gemini-1.5-flash", limit: { contextLength: 1048576, maxCompletionTokens: 8192 } }
|
|
2326
|
+
];
|
|
2327
|
+
var DEFAULT_LIMIT = { contextLength: 128e3, maxCompletionTokens: 4096 };
|
|
2328
|
+
function getModelLimits(model) {
|
|
2329
|
+
const bare = model.includes("/") ? model.slice(model.indexOf("/") + 1) : model;
|
|
2330
|
+
for (const entry of MODEL_LIMITS) {
|
|
2331
|
+
if (bare.startsWith(entry.pattern) || bare.includes(entry.pattern)) {
|
|
2332
|
+
return entry.limit;
|
|
2333
|
+
}
|
|
2334
|
+
}
|
|
2335
|
+
return DEFAULT_LIMIT;
|
|
2336
|
+
}
|
|
2337
|
+
function resolveMaxTokens(model, messages, userMaxTokens) {
|
|
2338
|
+
if (userMaxTokens !== void 0) return userMaxTokens;
|
|
2339
|
+
const inputChars = JSON.stringify(messages).length;
|
|
2340
|
+
const estimatedInput = Math.ceil(inputChars / CHARS_PER_TOKEN2);
|
|
2341
|
+
const estimatedWithMargin = Math.ceil(estimatedInput * 1.05);
|
|
2342
|
+
const limits = getModelLimits(model);
|
|
2343
|
+
const available = limits.contextLength - estimatedWithMargin;
|
|
2344
|
+
const result = Math.min(limits.maxCompletionTokens, available);
|
|
2345
|
+
return Math.max(1, result);
|
|
2346
|
+
}
|
|
2347
|
+
|
|
2269
2348
|
// src/providers/openai-batch.ts
|
|
2270
2349
|
var OPENAI_API_BASE2 = "https://api.openai.com/v1";
|
|
2271
2350
|
function createOpenAIBatchAdapter(apiKey) {
|
|
@@ -2280,7 +2359,7 @@ function createOpenAIBatchAdapter(apiKey) {
|
|
|
2280
2359
|
headers["Content-Type"] = "application/json";
|
|
2281
2360
|
fetchBody = JSON.stringify(options.body);
|
|
2282
2361
|
}
|
|
2283
|
-
const res = await
|
|
2362
|
+
const res = await fetchWithTimeout(`${OPENAI_API_BASE2}${path2}`, {
|
|
2284
2363
|
method: options.method || "GET",
|
|
2285
2364
|
headers,
|
|
2286
2365
|
body: fetchBody
|
|
@@ -2306,7 +2385,7 @@ function createOpenAIBatchAdapter(apiKey) {
|
|
|
2306
2385
|
model,
|
|
2307
2386
|
messages: req.messages
|
|
2308
2387
|
};
|
|
2309
|
-
|
|
2388
|
+
body.max_tokens = req.max_tokens !== void 0 ? req.max_tokens : resolveMaxTokens(model, req.messages);
|
|
2310
2389
|
if (req.temperature !== void 0) body.temperature = req.temperature;
|
|
2311
2390
|
if (req.top_p !== void 0) body.top_p = req.top_p;
|
|
2312
2391
|
if (req.stop !== void 0) body.stop = req.stop;
|
|
@@ -2465,7 +2544,7 @@ function createAnthropicBatchAdapter(apiKey) {
|
|
|
2465
2544
|
"anthropic-version": ANTHROPIC_VERSION2,
|
|
2466
2545
|
"Content-Type": "application/json"
|
|
2467
2546
|
};
|
|
2468
|
-
const res = await
|
|
2547
|
+
const res = await fetchWithTimeout(`${ANTHROPIC_API_BASE2}${path2}`, {
|
|
2469
2548
|
method: options.method || "GET",
|
|
2470
2549
|
headers,
|
|
2471
2550
|
body: options.body ? JSON.stringify(options.body) : void 0
|
|
@@ -2488,7 +2567,7 @@ function createAnthropicBatchAdapter(apiKey) {
|
|
|
2488
2567
|
function translateToAnthropicParams(model, req) {
|
|
2489
2568
|
const params = {
|
|
2490
2569
|
model,
|
|
2491
|
-
max_tokens: req.max_tokens || DEFAULT_MAX_TOKENS2
|
|
2570
|
+
max_tokens: resolveMaxTokens(model, req.messages, req.max_tokens || DEFAULT_MAX_TOKENS2)
|
|
2492
2571
|
};
|
|
2493
2572
|
const systemMessages = req.messages.filter((m) => m.role === "system");
|
|
2494
2573
|
const nonSystemMessages = req.messages.filter((m) => m.role !== "system");
|
|
@@ -2662,6 +2741,284 @@ ${params.system}` : jsonInstruction;
|
|
|
2662
2741
|
};
|
|
2663
2742
|
}
|
|
2664
2743
|
|
|
2744
|
+
// src/providers/google-batch.ts
|
|
2745
|
+
var GEMINI_API_BASE2 = "https://generativelanguage.googleapis.com/v1beta";
|
|
2746
|
+
function createGoogleBatchAdapter(apiKey) {
|
|
2747
|
+
async function apiRequest(path2, options = {}) {
|
|
2748
|
+
const headers = {
|
|
2749
|
+
"Content-Type": "application/json",
|
|
2750
|
+
"x-goog-api-key": apiKey
|
|
2751
|
+
};
|
|
2752
|
+
const res = await fetchWithTimeout(`${GEMINI_API_BASE2}${path2}`, {
|
|
2753
|
+
method: options.method || "GET",
|
|
2754
|
+
headers,
|
|
2755
|
+
body: options.body ? JSON.stringify(options.body) : void 0
|
|
2756
|
+
});
|
|
2757
|
+
if (!res.ok) {
|
|
2758
|
+
let errorBody;
|
|
2759
|
+
try {
|
|
2760
|
+
errorBody = await res.json();
|
|
2761
|
+
} catch {
|
|
2762
|
+
errorBody = { message: res.statusText };
|
|
2763
|
+
}
|
|
2764
|
+
const msg = errorBody?.error?.message || errorBody?.message || res.statusText;
|
|
2765
|
+
throw new AnyModelError(res.status >= 500 ? 502 : res.status, msg, {
|
|
2766
|
+
provider_name: "google",
|
|
2767
|
+
raw: errorBody
|
|
2768
|
+
});
|
|
2769
|
+
}
|
|
2770
|
+
return res;
|
|
2771
|
+
}
|
|
2772
|
+
function translateRequestToGemini(model, req) {
|
|
2773
|
+
const body = {};
|
|
2774
|
+
const systemMessages = req.messages.filter((m) => m.role === "system");
|
|
2775
|
+
const nonSystemMessages = req.messages.filter((m) => m.role !== "system");
|
|
2776
|
+
if (systemMessages.length > 0) {
|
|
2777
|
+
body.systemInstruction = {
|
|
2778
|
+
parts: [{ text: systemMessages.map((m) => typeof m.content === "string" ? m.content : "").join("\n") }]
|
|
2779
|
+
};
|
|
2780
|
+
}
|
|
2781
|
+
body.contents = nonSystemMessages.map((m) => ({
|
|
2782
|
+
role: m.role === "assistant" ? "model" : "user",
|
|
2783
|
+
parts: typeof m.content === "string" ? [{ text: m.content }] : Array.isArray(m.content) ? m.content.map((p) => p.type === "text" ? { text: p.text } : { text: "" }) : [{ text: "" }]
|
|
2784
|
+
}));
|
|
2785
|
+
const generationConfig = {};
|
|
2786
|
+
if (req.temperature !== void 0) generationConfig.temperature = req.temperature;
|
|
2787
|
+
generationConfig.maxOutputTokens = req.max_tokens !== void 0 ? req.max_tokens : resolveMaxTokens(model, req.messages);
|
|
2788
|
+
if (req.top_p !== void 0) generationConfig.topP = req.top_p;
|
|
2789
|
+
if (req.top_k !== void 0) generationConfig.topK = req.top_k;
|
|
2790
|
+
if (req.stop !== void 0) {
|
|
2791
|
+
generationConfig.stopSequences = Array.isArray(req.stop) ? req.stop : [req.stop];
|
|
2792
|
+
}
|
|
2793
|
+
if (req.response_format) {
|
|
2794
|
+
if (req.response_format.type === "json_object") {
|
|
2795
|
+
generationConfig.responseMimeType = "application/json";
|
|
2796
|
+
} else if (req.response_format.type === "json_schema") {
|
|
2797
|
+
generationConfig.responseMimeType = "application/json";
|
|
2798
|
+
generationConfig.responseSchema = req.response_format.json_schema?.schema;
|
|
2799
|
+
}
|
|
2800
|
+
}
|
|
2801
|
+
if (Object.keys(generationConfig).length > 0) {
|
|
2802
|
+
body.generationConfig = generationConfig;
|
|
2803
|
+
}
|
|
2804
|
+
if (req.tools && req.tools.length > 0) {
|
|
2805
|
+
body.tools = [{
|
|
2806
|
+
functionDeclarations: req.tools.map((t) => ({
|
|
2807
|
+
name: t.function.name,
|
|
2808
|
+
description: t.function.description || "",
|
|
2809
|
+
parameters: t.function.parameters || {}
|
|
2810
|
+
}))
|
|
2811
|
+
}];
|
|
2812
|
+
if (req.tool_choice) {
|
|
2813
|
+
if (req.tool_choice === "auto") {
|
|
2814
|
+
body.toolConfig = { functionCallingConfig: { mode: "AUTO" } };
|
|
2815
|
+
} else if (req.tool_choice === "required") {
|
|
2816
|
+
body.toolConfig = { functionCallingConfig: { mode: "ANY" } };
|
|
2817
|
+
} else if (req.tool_choice === "none") {
|
|
2818
|
+
body.toolConfig = { functionCallingConfig: { mode: "NONE" } };
|
|
2819
|
+
} else if (typeof req.tool_choice === "object") {
|
|
2820
|
+
body.toolConfig = {
|
|
2821
|
+
functionCallingConfig: {
|
|
2822
|
+
mode: "ANY",
|
|
2823
|
+
allowedFunctionNames: [req.tool_choice.function.name]
|
|
2824
|
+
}
|
|
2825
|
+
};
|
|
2826
|
+
}
|
|
2827
|
+
}
|
|
2828
|
+
}
|
|
2829
|
+
return body;
|
|
2830
|
+
}
|
|
2831
|
+
function mapFinishReason(reason) {
|
|
2832
|
+
switch (reason) {
|
|
2833
|
+
case "STOP":
|
|
2834
|
+
return "stop";
|
|
2835
|
+
case "MAX_TOKENS":
|
|
2836
|
+
return "length";
|
|
2837
|
+
case "SAFETY":
|
|
2838
|
+
return "content_filter";
|
|
2839
|
+
case "RECITATION":
|
|
2840
|
+
return "content_filter";
|
|
2841
|
+
default:
|
|
2842
|
+
return "stop";
|
|
2843
|
+
}
|
|
2844
|
+
}
|
|
2845
|
+
function translateGeminiResponse(response, model) {
|
|
2846
|
+
const candidate = response.candidates?.[0];
|
|
2847
|
+
let content = "";
|
|
2848
|
+
const toolCalls = [];
|
|
2849
|
+
for (const part of candidate?.content?.parts || []) {
|
|
2850
|
+
if (part.text) {
|
|
2851
|
+
content += part.text;
|
|
2852
|
+
} else if (part.functionCall) {
|
|
2853
|
+
toolCalls.push({
|
|
2854
|
+
id: generateId("call"),
|
|
2855
|
+
type: "function",
|
|
2856
|
+
function: {
|
|
2857
|
+
name: part.functionCall.name,
|
|
2858
|
+
arguments: JSON.stringify(part.functionCall.args || {})
|
|
2859
|
+
}
|
|
2860
|
+
});
|
|
2861
|
+
}
|
|
2862
|
+
}
|
|
2863
|
+
const message = { role: "assistant", content };
|
|
2864
|
+
if (toolCalls.length > 0) {
|
|
2865
|
+
message.tool_calls = toolCalls;
|
|
2866
|
+
}
|
|
2867
|
+
const finishReason = toolCalls.length > 0 ? "tool_calls" : mapFinishReason(candidate?.finishReason || "STOP");
|
|
2868
|
+
return {
|
|
2869
|
+
id: generateId(),
|
|
2870
|
+
object: "chat.completion",
|
|
2871
|
+
created: Math.floor(Date.now() / 1e3),
|
|
2872
|
+
model: `google/${model}`,
|
|
2873
|
+
choices: [{ index: 0, message, finish_reason: finishReason }],
|
|
2874
|
+
usage: {
|
|
2875
|
+
prompt_tokens: response.usageMetadata?.promptTokenCount || 0,
|
|
2876
|
+
completion_tokens: response.usageMetadata?.candidatesTokenCount || 0,
|
|
2877
|
+
total_tokens: response.usageMetadata?.totalTokenCount || 0
|
|
2878
|
+
}
|
|
2879
|
+
};
|
|
2880
|
+
}
|
|
2881
|
+
function mapBatchState(state) {
|
|
2882
|
+
switch (state) {
|
|
2883
|
+
case "JOB_STATE_PENDING":
|
|
2884
|
+
return "pending";
|
|
2885
|
+
case "JOB_STATE_RUNNING":
|
|
2886
|
+
return "processing";
|
|
2887
|
+
case "JOB_STATE_SUCCEEDED":
|
|
2888
|
+
return "completed";
|
|
2889
|
+
case "JOB_STATE_FAILED":
|
|
2890
|
+
return "failed";
|
|
2891
|
+
case "JOB_STATE_CANCELLED":
|
|
2892
|
+
return "cancelled";
|
|
2893
|
+
case "JOB_STATE_EXPIRED":
|
|
2894
|
+
return "failed";
|
|
2895
|
+
default:
|
|
2896
|
+
return "pending";
|
|
2897
|
+
}
|
|
2898
|
+
}
|
|
2899
|
+
return {
|
|
2900
|
+
async createBatch(model, requests, _options) {
|
|
2901
|
+
const batchRequests = requests.map((req) => ({
|
|
2902
|
+
request: translateRequestToGemini(model, req),
|
|
2903
|
+
metadata: { key: req.custom_id }
|
|
2904
|
+
}));
|
|
2905
|
+
const res = await apiRequest(`/models/${model}:batchGenerateContent`, {
|
|
2906
|
+
method: "POST",
|
|
2907
|
+
body: {
|
|
2908
|
+
batch: {
|
|
2909
|
+
display_name: `anymodel-batch-${Date.now()}`,
|
|
2910
|
+
input_config: {
|
|
2911
|
+
requests: {
|
|
2912
|
+
requests: batchRequests
|
|
2913
|
+
}
|
|
2914
|
+
}
|
|
2915
|
+
}
|
|
2916
|
+
}
|
|
2917
|
+
});
|
|
2918
|
+
const data = await res.json();
|
|
2919
|
+
const batchName = data.name || data.batch?.name;
|
|
2920
|
+
if (!batchName) {
|
|
2921
|
+
throw new AnyModelError(502, "No batch name in Google response", {
|
|
2922
|
+
provider_name: "google",
|
|
2923
|
+
raw: data
|
|
2924
|
+
});
|
|
2925
|
+
}
|
|
2926
|
+
return {
|
|
2927
|
+
providerBatchId: batchName,
|
|
2928
|
+
metadata: {
|
|
2929
|
+
model,
|
|
2930
|
+
total_requests: requests.length
|
|
2931
|
+
}
|
|
2932
|
+
};
|
|
2933
|
+
},
|
|
2934
|
+
async pollBatch(providerBatchId) {
|
|
2935
|
+
const res = await apiRequest(`/${providerBatchId}`);
|
|
2936
|
+
const data = await res.json();
|
|
2937
|
+
const state = data.state || "JOB_STATE_PENDING";
|
|
2938
|
+
const status = mapBatchState(state);
|
|
2939
|
+
const totalCount = data.totalCount || data.metadata?.total_requests || 0;
|
|
2940
|
+
const successCount = data.succeededCount || 0;
|
|
2941
|
+
const failedCount = data.failedCount || 0;
|
|
2942
|
+
return {
|
|
2943
|
+
status,
|
|
2944
|
+
total: totalCount || successCount + failedCount,
|
|
2945
|
+
completed: successCount,
|
|
2946
|
+
failed: failedCount
|
|
2947
|
+
};
|
|
2948
|
+
},
|
|
2949
|
+
async getBatchResults(providerBatchId) {
|
|
2950
|
+
const batchRes = await apiRequest(`/${providerBatchId}`);
|
|
2951
|
+
const batchData = await batchRes.json();
|
|
2952
|
+
const results = [];
|
|
2953
|
+
const model = batchData.metadata?.model || "unknown";
|
|
2954
|
+
if (batchData.response?.inlinedResponses) {
|
|
2955
|
+
for (const item of batchData.response.inlinedResponses) {
|
|
2956
|
+
const customId = item.metadata?.key || `request-${results.length}`;
|
|
2957
|
+
if (item.response) {
|
|
2958
|
+
results.push({
|
|
2959
|
+
custom_id: customId,
|
|
2960
|
+
status: "success",
|
|
2961
|
+
response: translateGeminiResponse(item.response, model),
|
|
2962
|
+
error: null
|
|
2963
|
+
});
|
|
2964
|
+
} else if (item.error) {
|
|
2965
|
+
results.push({
|
|
2966
|
+
custom_id: customId,
|
|
2967
|
+
status: "error",
|
|
2968
|
+
response: null,
|
|
2969
|
+
error: {
|
|
2970
|
+
code: item.error.code || 500,
|
|
2971
|
+
message: item.error.message || "Batch item failed"
|
|
2972
|
+
}
|
|
2973
|
+
});
|
|
2974
|
+
}
|
|
2975
|
+
}
|
|
2976
|
+
return results;
|
|
2977
|
+
}
|
|
2978
|
+
const responsesFile = batchData.response?.responsesFileName || batchData.outputConfig?.file_name;
|
|
2979
|
+
if (responsesFile) {
|
|
2980
|
+
const downloadUrl = `${GEMINI_API_BASE2}/${responsesFile}:download?alt=media`;
|
|
2981
|
+
const fileRes = await fetchWithTimeout(downloadUrl, {
|
|
2982
|
+
headers: { "x-goog-api-key": apiKey }
|
|
2983
|
+
});
|
|
2984
|
+
if (!fileRes.ok) {
|
|
2985
|
+
throw new AnyModelError(502, "Failed to download batch results file", {
|
|
2986
|
+
provider_name: "google"
|
|
2987
|
+
});
|
|
2988
|
+
}
|
|
2989
|
+
const text = await fileRes.text();
|
|
2990
|
+
for (const line of text.trim().split("\n")) {
|
|
2991
|
+
if (!line) continue;
|
|
2992
|
+
const item = JSON.parse(line);
|
|
2993
|
+
const customId = item.key || item.metadata?.key || `request-${results.length}`;
|
|
2994
|
+
if (item.response) {
|
|
2995
|
+
results.push({
|
|
2996
|
+
custom_id: customId,
|
|
2997
|
+
status: "success",
|
|
2998
|
+
response: translateGeminiResponse(item.response, model),
|
|
2999
|
+
error: null
|
|
3000
|
+
});
|
|
3001
|
+
} else if (item.error) {
|
|
3002
|
+
results.push({
|
|
3003
|
+
custom_id: customId,
|
|
3004
|
+
status: "error",
|
|
3005
|
+
response: null,
|
|
3006
|
+
error: {
|
|
3007
|
+
code: item.error.code || 500,
|
|
3008
|
+
message: item.error.message || "Batch item failed"
|
|
3009
|
+
}
|
|
3010
|
+
});
|
|
3011
|
+
}
|
|
3012
|
+
}
|
|
3013
|
+
}
|
|
3014
|
+
return results;
|
|
3015
|
+
},
|
|
3016
|
+
async cancelBatch(providerBatchId) {
|
|
3017
|
+
await apiRequest(`/${providerBatchId}:cancel`, { method: "POST" });
|
|
3018
|
+
}
|
|
3019
|
+
};
|
|
3020
|
+
}
|
|
3021
|
+
|
|
2665
3022
|
// src/client.ts
|
|
2666
3023
|
var AnyModel = class {
|
|
2667
3024
|
registry;
|
|
@@ -2677,6 +3034,7 @@ var AnyModel = class {
|
|
|
2677
3034
|
constructor(config = {}) {
|
|
2678
3035
|
this.config = resolveConfig(config);
|
|
2679
3036
|
this.registry = new ProviderRegistry();
|
|
3037
|
+
setDefaultTimeout((this.config.defaults?.timeout ?? 120) * 1e3);
|
|
2680
3038
|
if (this.config.io) {
|
|
2681
3039
|
configureFsIO(this.config.io);
|
|
2682
3040
|
}
|
|
@@ -2797,6 +3155,10 @@ var AnyModel = class {
|
|
|
2797
3155
|
if (anthropicKey) {
|
|
2798
3156
|
this.batchManager.registerBatchAdapter("anthropic", createAnthropicBatchAdapter(anthropicKey));
|
|
2799
3157
|
}
|
|
3158
|
+
const googleKey = config.google?.apiKey || process.env.GOOGLE_API_KEY;
|
|
3159
|
+
if (googleKey) {
|
|
3160
|
+
this.batchManager.registerBatchAdapter("google", createGoogleBatchAdapter(googleKey));
|
|
3161
|
+
}
|
|
2800
3162
|
}
|
|
2801
3163
|
applyDefaults(request) {
|
|
2802
3164
|
const defaults = this.config.defaults;
|