npm - @blockrun/franklin - Versions diffs - 3.24.0 → 3.24.1 - Mend

@blockrun/franklin 3.24.0 → 3.24.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/agent/llm.js +17 -4
package/package.json +1 -1

package/dist/agent/llm.js CHANGED Viewed

@@ -79,6 +79,11 @@ function getModelRequestTimeoutMs() {
         180_000);
 }
 function getModelStreamIdleTimeoutMs() {
+    // Inter-chunk idle budget: the max gap allowed *between* SSE chunks once the
+    // stream is flowing. It does NOT cover time-to-first-token — that first read
+    // uses the larger request budget (see getModelRequestTimeoutMs + the
+    // firstRead branch in parseSSEStream). Conflating the two regressed #74:
+    // reasoning models taking 60–120s to first token aborted at this 90s wall.
     return (parseTimeoutEnv('FRANKLIN_MODEL_STREAM_IDLE_TIMEOUT_MS') ??
         parseTimeoutEnv('FRANKLIN_MODEL_IDLE_TIMEOUT_MS') ??
         90_000);
@@ -597,8 +602,11 @@ export class ModelClient {
                 yield* this.parseNonStreamingMessage(response, request.model);
                 return;
             }
-            // Parse SSE stream
-            yield* this.parseSSEStream(response, requestController, streamTimeoutMs, request.model);
+            // Parse SSE stream. The first read waits for time-to-first-token (which
+            // the gateway does *not* cover with the request timeout — it flushes SSE
+            // headers before the first content chunk), so it gets the larger request
+            // budget; subsequent reads use the tighter stream-idle budget.
+            yield* this.parseSSEStream(response, requestController, streamTimeoutMs, request.model, requestTimeoutMs);
         }
         finally {
             unlinkAbort();
@@ -1087,7 +1095,7 @@ export class ModelClient {
         return header;
     }
     // ─── SSE Parsing ───────────────────────────────────────────────────────
-    async *parseSSEStream(response, controller, timeoutMs, model) {
+    async *parseSSEStream(response, controller, timeoutMs, model, firstReadTimeoutMs = timeoutMs) {
         const reader = response.body?.getReader();
         if (!reader) {
             yield { kind: 'error', payload: { message: 'No response body' } };
@@ -1097,12 +1105,17 @@ export class ModelClient {
         let buffer = '';
         // Persist across read() calls — event: and data: may arrive in separate chunks
         let currentEvent = '';
+        // The first read waits for time-to-first-token (60–120s for reasoning
+        // models on cache-cold prompts); only later reads measure inter-chunk idle.
+        let firstRead = true;
         const MAX_BUFFER = 1_000_000; // 1MB buffer cap
         try {
             while (true) {
                 if (controller.signal.aborted)
                     break;
-                const { done, value } = await withAbortableTimeout(() => reader.read(), controller, createModelTimeoutError('stream', model, timeoutMs), timeoutMs);
+                const budgetMs = firstRead ? firstReadTimeoutMs : timeoutMs;
+                firstRead = false;
+                const { done, value } = await withAbortableTimeout(() => reader.read(), controller, createModelTimeoutError('stream', model, budgetMs), budgetMs);
                 if (done)
                     break;
                 buffer += decoder.decode(value, { stream: true });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/franklin",
-  "version": "3.24.0",
+  "version": "3.24.1",
   "description": "Franklin Agent — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
   "type": "module",
   "exports": {