npm - @probeo/anymodel - Versions diffs - 0.5.0 → 0.5.1 - Mend

@probeo/anymodel 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md CHANGED Viewed

@@ -444,6 +444,7 @@ npx tsx examples/basic.ts batch
 - **Rate limit tracking**: Per-provider rate limit state, automatically skips rate-limited providers during fallback routing
 - **Parameter stripping**: Unsupported parameters are automatically removed before forwarding to providers
 - **Smart batch defaults**: Automatic `max_tokens` estimation per-request in batches — calculates safe values from input size and model context limits, preventing truncation and overflow without manual tuning
+- **Memory-efficient batching**: Concurrent batch requests are streamed from disk — only N requests (default 5) are in-flight at a time, making 10K+ request batches safe without memory spikes
 - **High-volume IO**: All batch file operations use concurrency-limited async queues with atomic durable writes (temp file + fsync + rename) to prevent corruption on crash. Defaults: 20 concurrent reads, 10 concurrent writes — configurable via `io.readConcurrency` and `io.writeConcurrency`
 ## Roadmap

package/dist/cli.cjs CHANGED Viewed

@@ -1972,6 +1972,17 @@ var BatchStore = class {
     const entries = await readDirQueued(this.dir);
     return entries.filter((d) => d.isDirectory()).map((d) => d.name).sort();
   }
+  /**
+   * Stream requests from JSONL one line at a time (memory-efficient).
+   */
+  async *streamRequests(id) {
+    const p = joinPath(this.batchDir(id), "requests.jsonl");
+    if (!await fileExistsQueued(p)) return;
+    const raw = await readFileQueued(p, "utf8");
+    for (const line of raw.split("\n")) {
+      if (line.trim()) yield JSON.parse(line);
+    }
+  }
   /**
    * Check if a batch exists.
    */
@@ -2036,7 +2047,7 @@ var BatchManager = class {
       this.processNativeBatch(id, request, native.adapter).catch(() => {
       });
     } else {
-      this.processConcurrentBatch(id, request).catch(() => {
+      this.processConcurrentBatch(id, request.model, request.options).catch(() => {
       });
     }
     return batch;
@@ -2216,28 +2227,28 @@ var BatchManager = class {
   }
   /**
    * Process batch requests concurrently (fallback path).
+   * Streams requests from disk to avoid holding them all in memory.
    */
-  async processConcurrentBatch(batchId, request) {
+  async processConcurrentBatch(batchId, model, options) {
     const batch = await this.store.getMeta(batchId);
     if (!batch) return;
     batch.status = "processing";
     await this.store.updateMeta(batch);
-    const items = request.requests;
     const active = /* @__PURE__ */ new Set();
     const processItem = async (item) => {
       const current = await this.store.getMeta(batchId);
       if (current?.status === "cancelled") return;
       const chatRequest = {
-        model: request.model,
+        model,
         messages: item.messages,
-        max_tokens: item.max_tokens ?? request.options?.max_tokens,
-        temperature: item.temperature ?? request.options?.temperature,
-        top_p: item.top_p ?? request.options?.top_p,
-        top_k: item.top_k ?? request.options?.top_k,
-        stop: item.stop ?? request.options?.stop,
-        response_format: item.response_format ?? request.options?.response_format,
-        tools: item.tools ?? request.options?.tools,
-        tool_choice: item.tool_choice ?? request.options?.tool_choice
+        max_tokens: item.max_tokens ?? options?.max_tokens,
+        temperature: item.temperature ?? options?.temperature,
+        top_p: item.top_p ?? options?.top_p,
+        top_k: item.top_k ?? options?.top_k,
+        stop: item.stop ?? options?.stop,
+        response_format: item.response_format ?? options?.response_format,
+        tools: item.tools ?? options?.tools,
+        tool_choice: item.tool_choice ?? options?.tool_choice
       };
       let result;
       try {
@@ -2268,7 +2279,7 @@ var BatchManager = class {
         await this.store.updateMeta(meta);
       }
     };
-    for (const item of items) {
+    for await (const item of this.store.streamRequests(batchId)) {
       const current = await this.store.getMeta(batchId);
       if (current?.status === "cancelled") break;
       if (active.size >= this.concurrencyLimit) {