@probeo/anymodel 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/dist/cli.cjs +24 -13
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +24 -13
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +24 -13
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +5 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +24 -13
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -444,6 +444,7 @@ npx tsx examples/basic.ts batch
|
|
|
444
444
|
- **Rate limit tracking**: Per-provider rate limit state, automatically skips rate-limited providers during fallback routing
|
|
445
445
|
- **Parameter stripping**: Unsupported parameters are automatically removed before forwarding to providers
|
|
446
446
|
- **Smart batch defaults**: Automatic `max_tokens` estimation per-request in batches — calculates safe values from input size and model context limits, preventing truncation and overflow without manual tuning
|
|
447
|
+
- **Memory-efficient batching**: Concurrent batch requests are streamed from disk — only N requests (default 5) are in-flight at a time, making 10K+ request batches safe without memory spikes
|
|
447
448
|
- **High-volume IO**: All batch file operations use concurrency-limited async queues with atomic durable writes (temp file + fsync + rename) to prevent corruption on crash. Defaults: 20 concurrent reads, 10 concurrent writes — configurable via `io.readConcurrency` and `io.writeConcurrency`
|
|
448
449
|
|
|
449
450
|
## Roadmap
|
package/dist/cli.cjs
CHANGED
|
@@ -1972,6 +1972,17 @@ var BatchStore = class {
|
|
|
1972
1972
|
const entries = await readDirQueued(this.dir);
|
|
1973
1973
|
return entries.filter((d) => d.isDirectory()).map((d) => d.name).sort();
|
|
1974
1974
|
}
|
|
1975
|
+
/**
|
|
1976
|
+
* Stream requests from JSONL one line at a time (memory-efficient).
|
|
1977
|
+
*/
|
|
1978
|
+
async *streamRequests(id) {
|
|
1979
|
+
const p = joinPath(this.batchDir(id), "requests.jsonl");
|
|
1980
|
+
if (!await fileExistsQueued(p)) return;
|
|
1981
|
+
const raw = await readFileQueued(p, "utf8");
|
|
1982
|
+
for (const line of raw.split("\n")) {
|
|
1983
|
+
if (line.trim()) yield JSON.parse(line);
|
|
1984
|
+
}
|
|
1985
|
+
}
|
|
1975
1986
|
/**
|
|
1976
1987
|
* Check if a batch exists.
|
|
1977
1988
|
*/
|
|
@@ -2036,7 +2047,7 @@ var BatchManager = class {
|
|
|
2036
2047
|
this.processNativeBatch(id, request, native.adapter).catch(() => {
|
|
2037
2048
|
});
|
|
2038
2049
|
} else {
|
|
2039
|
-
this.processConcurrentBatch(id, request).catch(() => {
|
|
2050
|
+
this.processConcurrentBatch(id, request.model, request.options).catch(() => {
|
|
2040
2051
|
});
|
|
2041
2052
|
}
|
|
2042
2053
|
return batch;
|
|
@@ -2216,28 +2227,28 @@ var BatchManager = class {
|
|
|
2216
2227
|
}
|
|
2217
2228
|
/**
|
|
2218
2229
|
* Process batch requests concurrently (fallback path).
|
|
2230
|
+
* Streams requests from disk to avoid holding them all in memory.
|
|
2219
2231
|
*/
|
|
2220
|
-
async processConcurrentBatch(batchId,
|
|
2232
|
+
async processConcurrentBatch(batchId, model, options) {
|
|
2221
2233
|
const batch = await this.store.getMeta(batchId);
|
|
2222
2234
|
if (!batch) return;
|
|
2223
2235
|
batch.status = "processing";
|
|
2224
2236
|
await this.store.updateMeta(batch);
|
|
2225
|
-
const items = request.requests;
|
|
2226
2237
|
const active = /* @__PURE__ */ new Set();
|
|
2227
2238
|
const processItem = async (item) => {
|
|
2228
2239
|
const current = await this.store.getMeta(batchId);
|
|
2229
2240
|
if (current?.status === "cancelled") return;
|
|
2230
2241
|
const chatRequest = {
|
|
2231
|
-
model
|
|
2242
|
+
model,
|
|
2232
2243
|
messages: item.messages,
|
|
2233
|
-
max_tokens: item.max_tokens ??
|
|
2234
|
-
temperature: item.temperature ??
|
|
2235
|
-
top_p: item.top_p ??
|
|
2236
|
-
top_k: item.top_k ??
|
|
2237
|
-
stop: item.stop ??
|
|
2238
|
-
response_format: item.response_format ??
|
|
2239
|
-
tools: item.tools ??
|
|
2240
|
-
tool_choice: item.tool_choice ??
|
|
2244
|
+
max_tokens: item.max_tokens ?? options?.max_tokens,
|
|
2245
|
+
temperature: item.temperature ?? options?.temperature,
|
|
2246
|
+
top_p: item.top_p ?? options?.top_p,
|
|
2247
|
+
top_k: item.top_k ?? options?.top_k,
|
|
2248
|
+
stop: item.stop ?? options?.stop,
|
|
2249
|
+
response_format: item.response_format ?? options?.response_format,
|
|
2250
|
+
tools: item.tools ?? options?.tools,
|
|
2251
|
+
tool_choice: item.tool_choice ?? options?.tool_choice
|
|
2241
2252
|
};
|
|
2242
2253
|
let result;
|
|
2243
2254
|
try {
|
|
@@ -2268,7 +2279,7 @@ var BatchManager = class {
|
|
|
2268
2279
|
await this.store.updateMeta(meta);
|
|
2269
2280
|
}
|
|
2270
2281
|
};
|
|
2271
|
-
for (const item of
|
|
2282
|
+
for await (const item of this.store.streamRequests(batchId)) {
|
|
2272
2283
|
const current = await this.store.getMeta(batchId);
|
|
2273
2284
|
if (current?.status === "cancelled") break;
|
|
2274
2285
|
if (active.size >= this.concurrencyLimit) {
|