@probeo/anymodel 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -6
- package/dist/cli.cjs +799 -104
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +777 -104
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +820 -106
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +106 -22
- package/dist/index.d.ts +106 -22
- package/dist/index.js +798 -105
- package/dist/index.js.map +1 -1
- package/package.json +4 -1
package/README.md
CHANGED
|
@@ -147,11 +147,13 @@ const response = await client.chat.completions.create({
|
|
|
147
147
|
|
|
148
148
|
## Batch Processing
|
|
149
149
|
|
|
150
|
-
Process many requests
|
|
150
|
+
Process many requests with native provider batch APIs or concurrent fallback. OpenAI and Anthropic batches are processed server-side — OpenAI at 50% cost, Anthropic with async processing for up to 10K requests. Other providers fall back to concurrent execution automatically.
|
|
151
|
+
|
|
152
|
+
### Submit and wait
|
|
151
153
|
|
|
152
154
|
```typescript
|
|
153
155
|
const results = await client.batches.createAndPoll({
|
|
154
|
-
model: "
|
|
156
|
+
model: "openai/gpt-4o-mini",
|
|
155
157
|
requests: [
|
|
156
158
|
{ custom_id: "req-1", messages: [{ role: "user", content: "Summarize AI" }] },
|
|
157
159
|
{ custom_id: "req-2", messages: [{ role: "user", content: "Summarize ML" }] },
|
|
@@ -164,14 +166,71 @@ for (const result of results.results) {
|
|
|
164
166
|
}
|
|
165
167
|
```
|
|
166
168
|
|
|
167
|
-
|
|
169
|
+
### Submit now, check later
|
|
170
|
+
|
|
171
|
+
Submit a batch and get back an ID immediately — no need to keep the process running for native batches (OpenAI, Anthropic):
|
|
172
|
+
|
|
173
|
+
```typescript
|
|
174
|
+
// Submit and get the batch ID
|
|
175
|
+
const batch = await client.batches.create({
|
|
176
|
+
model: "anthropic/claude-haiku-4-5",
|
|
177
|
+
requests: [
|
|
178
|
+
{ custom_id: "req-1", messages: [{ role: "user", content: "Summarize AI" }] },
|
|
179
|
+
{ custom_id: "req-2", messages: [{ role: "user", content: "Summarize ML" }] },
|
|
180
|
+
],
|
|
181
|
+
});
|
|
182
|
+
console.log(batch.id); // "batch-abc123"
|
|
183
|
+
console.log(batch.batch_mode); // "native" or "concurrent"
|
|
184
|
+
|
|
185
|
+
// Check status any time — even after a process restart
|
|
186
|
+
const status = client.batches.get("batch-abc123");
|
|
187
|
+
console.log(status.status); // "pending", "processing", "completed", "failed"
|
|
188
|
+
|
|
189
|
+
// Wait for results when you're ready (reconnects to provider API)
|
|
190
|
+
const results = await client.batches.poll("batch-abc123");
|
|
191
|
+
|
|
192
|
+
// Or get results directly if already completed
|
|
193
|
+
const results = client.batches.results("batch-abc123");
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### List and cancel
|
|
197
|
+
|
|
198
|
+
```typescript
|
|
199
|
+
// List all batches on disk
|
|
200
|
+
const all = client.batches.list();
|
|
201
|
+
for (const b of all) {
|
|
202
|
+
console.log(b.id, b.batch_mode, b.status, b.provider_name);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Cancel a running batch (also cancels at the provider for native batches)
|
|
206
|
+
await client.batches.cancel("batch-abc123");
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### Batch configuration
|
|
168
210
|
|
|
169
211
|
```typescript
|
|
170
|
-
const
|
|
171
|
-
|
|
172
|
-
|
|
212
|
+
const client = new AnyModel({
|
|
213
|
+
batch: {
|
|
214
|
+
pollInterval: 10000, // default poll interval in ms (default: 5000)
|
|
215
|
+
concurrencyFallback: 10, // concurrent request limit for non-native providers (default: 5)
|
|
216
|
+
},
|
|
217
|
+
io: {
|
|
218
|
+
readConcurrency: 30, // concurrent file reads (default: 20)
|
|
219
|
+
writeConcurrency: 15, // concurrent file writes (default: 10)
|
|
220
|
+
},
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
// Override poll interval per call
|
|
224
|
+
const results = await client.batches.createAndPoll(request, {
|
|
225
|
+
interval: 3000, // poll every 3s for this batch
|
|
226
|
+
onProgress: (batch) => {
|
|
227
|
+
console.log(`${batch.completed}/${batch.total} done`);
|
|
228
|
+
},
|
|
229
|
+
});
|
|
173
230
|
```
|
|
174
231
|
|
|
232
|
+
Batches are persisted to `./.anymodel/batches/` in the current working directory and survive process restarts.
|
|
233
|
+
|
|
175
234
|
## Models Endpoint
|
|
176
235
|
|
|
177
236
|
```typescript
|
|
@@ -231,6 +290,14 @@ Create `anymodel.config.json` in your project root:
|
|
|
231
290
|
"defaults": {
|
|
232
291
|
"temperature": 0.7,
|
|
233
292
|
"max_tokens": 4096
|
|
293
|
+
},
|
|
294
|
+
"batch": {
|
|
295
|
+
"pollInterval": 5000,
|
|
296
|
+
"concurrencyFallback": 5
|
|
297
|
+
},
|
|
298
|
+
"io": {
|
|
299
|
+
"readConcurrency": 20,
|
|
300
|
+
"writeConcurrency": 10
|
|
234
301
|
}
|
|
235
302
|
}
|
|
236
303
|
```
|
|
@@ -358,6 +425,16 @@ npx tsx examples/basic.ts batch
|
|
|
358
425
|
- **Retries**: Automatic retry with exponential backoff on 429/502/503 errors (configurable via `defaults.retries`)
|
|
359
426
|
- **Rate limit tracking**: Per-provider rate limit state, automatically skips rate-limited providers during fallback routing
|
|
360
427
|
- **Parameter stripping**: Unsupported parameters are automatically removed before forwarding to providers
|
|
428
|
+
- **High-volume IO**: All batch file operations use concurrency-limited async queues with atomic durable writes (temp file + fsync + rename) to prevent corruption on crash. Defaults: 20 concurrent reads, 10 concurrent writes — configurable via `io.readConcurrency` and `io.writeConcurrency`
|
|
429
|
+
|
|
430
|
+
## Roadmap
|
|
431
|
+
|
|
432
|
+
- [ ] **A/B testing** — split routing (% traffic to each model) and compare mode (same request to multiple models, return all responses with stats)
|
|
433
|
+
- [ ] **Cost tracking** — per-request and aggregate cost calculation from provider pricing
|
|
434
|
+
- [ ] **Caching** — response caching with configurable TTL for identical requests
|
|
435
|
+
- [x] **Native batch APIs** — OpenAI Batch API (JSONL upload, 50% cost) and Anthropic Message Batches (10K requests, async). Auto-detects provider and routes to native API, falls back to concurrent for other providers
|
|
436
|
+
- [ ] **Result export** — `saveResults()` to write batch results to a configurable output directory
|
|
437
|
+
- [ ] **Prompt logging** — optional request/response logging for debugging and evaluation
|
|
361
438
|
|
|
362
439
|
## License
|
|
363
440
|
|