@probeo/anymodel 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -444,6 +444,7 @@ npx tsx examples/basic.ts batch
444
444
  - **Rate limit tracking**: Per-provider rate limit state, automatically skips rate-limited providers during fallback routing
445
445
  - **Parameter stripping**: Unsupported parameters are automatically removed before forwarding to providers
446
446
  - **Smart batch defaults**: Automatic `max_tokens` estimation per-request in batches — calculates safe values from input size and model context limits, preventing truncation and overflow without manual tuning
447
+ - **Memory-efficient batching**: Concurrent batch requests are streamed from disk — only N requests (default 5) are in-flight at a time, making 10K+ request batches safe without memory spikes
447
448
  - **High-volume IO**: All batch file operations use concurrency-limited async queues with atomic durable writes (temp file + fsync + rename) to prevent corruption on crash. Defaults: 20 concurrent reads, 10 concurrent writes — configurable via `io.readConcurrency` and `io.writeConcurrency`
448
449
 
449
450
  ## Roadmap
package/dist/cli.cjs CHANGED
@@ -1972,6 +1972,17 @@ var BatchStore = class {
1972
1972
  const entries = await readDirQueued(this.dir);
1973
1973
  return entries.filter((d) => d.isDirectory()).map((d) => d.name).sort();
1974
1974
  }
1975
+ /**
1976
+ * Stream requests from JSONL one line at a time (memory-efficient).
1977
+ */
1978
+ async *streamRequests(id) {
1979
+ const p = joinPath(this.batchDir(id), "requests.jsonl");
1980
+ if (!await fileExistsQueued(p)) return;
1981
+ const raw = await readFileQueued(p, "utf8");
1982
+ for (const line of raw.split("\n")) {
1983
+ if (line.trim()) yield JSON.parse(line);
1984
+ }
1985
+ }
1975
1986
  /**
1976
1987
  * Check if a batch exists.
1977
1988
  */
@@ -2036,7 +2047,7 @@ var BatchManager = class {
2036
2047
  this.processNativeBatch(id, request, native.adapter).catch(() => {
2037
2048
  });
2038
2049
  } else {
2039
- this.processConcurrentBatch(id, request).catch(() => {
2050
+ this.processConcurrentBatch(id, request.model, request.options).catch(() => {
2040
2051
  });
2041
2052
  }
2042
2053
  return batch;
@@ -2216,28 +2227,28 @@ var BatchManager = class {
2216
2227
  }
2217
2228
  /**
2218
2229
  * Process batch requests concurrently (fallback path).
2230
+ * Streams requests from disk to avoid holding them all in memory.
2219
2231
  */
2220
- async processConcurrentBatch(batchId, request) {
2232
+ async processConcurrentBatch(batchId, model, options) {
2221
2233
  const batch = await this.store.getMeta(batchId);
2222
2234
  if (!batch) return;
2223
2235
  batch.status = "processing";
2224
2236
  await this.store.updateMeta(batch);
2225
- const items = request.requests;
2226
2237
  const active = /* @__PURE__ */ new Set();
2227
2238
  const processItem = async (item) => {
2228
2239
  const current = await this.store.getMeta(batchId);
2229
2240
  if (current?.status === "cancelled") return;
2230
2241
  const chatRequest = {
2231
- model: request.model,
2242
+ model,
2232
2243
  messages: item.messages,
2233
- max_tokens: item.max_tokens ?? request.options?.max_tokens,
2234
- temperature: item.temperature ?? request.options?.temperature,
2235
- top_p: item.top_p ?? request.options?.top_p,
2236
- top_k: item.top_k ?? request.options?.top_k,
2237
- stop: item.stop ?? request.options?.stop,
2238
- response_format: item.response_format ?? request.options?.response_format,
2239
- tools: item.tools ?? request.options?.tools,
2240
- tool_choice: item.tool_choice ?? request.options?.tool_choice
2244
+ max_tokens: item.max_tokens ?? options?.max_tokens,
2245
+ temperature: item.temperature ?? options?.temperature,
2246
+ top_p: item.top_p ?? options?.top_p,
2247
+ top_k: item.top_k ?? options?.top_k,
2248
+ stop: item.stop ?? options?.stop,
2249
+ response_format: item.response_format ?? options?.response_format,
2250
+ tools: item.tools ?? options?.tools,
2251
+ tool_choice: item.tool_choice ?? options?.tool_choice
2241
2252
  };
2242
2253
  let result;
2243
2254
  try {
@@ -2268,7 +2279,7 @@ var BatchManager = class {
2268
2279
  await this.store.updateMeta(meta);
2269
2280
  }
2270
2281
  };
2271
- for (const item of items) {
2282
+ for await (const item of this.store.streamRequests(batchId)) {
2272
2283
  const current = await this.store.getMeta(batchId);
2273
2284
  if (current?.status === "cancelled") break;
2274
2285
  if (active.size >= this.concurrencyLimit) {