@blockrun/franklin 3.24.0 → 3.24.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/agent/llm.js CHANGED
@@ -79,6 +79,11 @@ function getModelRequestTimeoutMs() {
79
79
  180_000);
80
80
  }
81
81
  function getModelStreamIdleTimeoutMs() {
82
+ // Inter-chunk idle budget: the max gap allowed *between* SSE chunks once the
83
+ // stream is flowing. It does NOT cover time-to-first-token — that first read
84
+ // uses the larger request budget (see getModelRequestTimeoutMs + the
85
+ // firstRead branch in parseSSEStream). Conflating the two regressed #74:
86
+ // reasoning models taking 60–120s to first token aborted at this 90s wall.
82
87
  return (parseTimeoutEnv('FRANKLIN_MODEL_STREAM_IDLE_TIMEOUT_MS') ??
83
88
  parseTimeoutEnv('FRANKLIN_MODEL_IDLE_TIMEOUT_MS') ??
84
89
  90_000);
@@ -597,8 +602,11 @@ export class ModelClient {
597
602
  yield* this.parseNonStreamingMessage(response, request.model);
598
603
  return;
599
604
  }
600
- // Parse SSE stream
601
- yield* this.parseSSEStream(response, requestController, streamTimeoutMs, request.model);
605
+ // Parse SSE stream. The first read waits for time-to-first-token (which
606
+ // the gateway does *not* cover with the request timeout — it flushes SSE
607
+ // headers before the first content chunk), so it gets the larger request
608
+ // budget; subsequent reads use the tighter stream-idle budget.
609
+ yield* this.parseSSEStream(response, requestController, streamTimeoutMs, request.model, requestTimeoutMs);
602
610
  }
603
611
  finally {
604
612
  unlinkAbort();
@@ -1087,7 +1095,7 @@ export class ModelClient {
1087
1095
  return header;
1088
1096
  }
1089
1097
  // ─── SSE Parsing ───────────────────────────────────────────────────────
1090
- async *parseSSEStream(response, controller, timeoutMs, model) {
1098
+ async *parseSSEStream(response, controller, timeoutMs, model, firstReadTimeoutMs = timeoutMs) {
1091
1099
  const reader = response.body?.getReader();
1092
1100
  if (!reader) {
1093
1101
  yield { kind: 'error', payload: { message: 'No response body' } };
@@ -1097,12 +1105,17 @@ export class ModelClient {
1097
1105
  let buffer = '';
1098
1106
  // Persist across read() calls — event: and data: may arrive in separate chunks
1099
1107
  let currentEvent = '';
1108
+ // The first read waits for time-to-first-token (60–120s for reasoning
1109
+ // models on cache-cold prompts); only later reads measure inter-chunk idle.
1110
+ let firstRead = true;
1100
1111
  const MAX_BUFFER = 1_000_000; // 1MB buffer cap
1101
1112
  try {
1102
1113
  while (true) {
1103
1114
  if (controller.signal.aborted)
1104
1115
  break;
1105
- const { done, value } = await withAbortableTimeout(() => reader.read(), controller, createModelTimeoutError('stream', model, timeoutMs), timeoutMs);
1116
+ const budgetMs = firstRead ? firstReadTimeoutMs : timeoutMs;
1117
+ firstRead = false;
1118
+ const { done, value } = await withAbortableTimeout(() => reader.read(), controller, createModelTimeoutError('stream', model, budgetMs), budgetMs);
1106
1119
  if (done)
1107
1120
  break;
1108
1121
  buffer += decoder.decode(value, { stream: true });
@@ -235,6 +235,7 @@ const MODEL_CONTEXT_WINDOWS = {
235
235
  'zai/glm-5.1': 200_000,
236
236
  'moonshot/kimi-k2.6': 256_000,
237
237
  'moonshot/kimi-k2.5': 128_000,
238
+ 'minimax/minimax-m3': 1_000_000,
238
239
  'minimax/minimax-m2.7': 128_000,
239
240
  // NVIDIA-hosted free tier (2026-04-29 V4 Flash + Omni launch)
240
241
  'nvidia/deepseek-v4-flash': 1_000_000,
package/dist/pricing.js CHANGED
@@ -76,6 +76,7 @@ export const MODEL_PRICING = {
76
76
  // through 2026-05-31 — list is $2.00/$4.00, promo is $0.50/$1.00.
77
77
  'deepseek/deepseek-v4-pro': { input: 0.50, output: 1.00 },
78
78
  // Minimax
79
+ 'minimax/minimax-m3': { input: 0.3, output: 1.2 },
79
80
  'minimax/minimax-m2.7': { input: 0.3, output: 1.2 },
80
81
  'minimax/minimax-m2.5': { input: 0.3, output: 1.2 },
81
82
  // Moonshot
@@ -154,7 +154,8 @@ const MODEL_SHORTCUTS = {
154
154
  nemotron: 'nvidia/qwen3-coder-480b',
155
155
  devstral: 'nvidia/qwen3-coder-480b',
156
156
  // Minimax
157
- minimax: 'minimax/minimax-m2.7',
157
+ minimax: 'minimax/minimax-m3',
158
+ 'm3': 'minimax/minimax-m3',
158
159
  'm2.7': 'minimax/minimax-m2.7',
159
160
  // Others
160
161
  glm: 'zai/glm-5.1',
@@ -91,7 +91,8 @@ export const MODEL_SHORTCUTS = {
91
91
  nemotron: 'nvidia/qwen3-coder-480b',
92
92
  devstral: 'nvidia/qwen3-coder-480b',
93
93
  // Others
94
- minimax: 'minimax/minimax-m2.7',
94
+ minimax: 'minimax/minimax-m3',
95
+ 'm3': 'minimax/minimax-m3',
95
96
  'm2.7': 'minimax/minimax-m2.7',
96
97
  glm: 'zai/glm-5.1',
97
98
  'glm-turbo': 'zai/glm-5-turbo',
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.24.0",
3
+ "version": "3.24.2",
4
4
  "description": "Franklin Agent — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {