@blockrun/franklin 3.10.2 → 3.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,9 +44,16 @@ export function classifyAgentError(message) {
44
44
  '429',
45
45
  'rate limit',
46
46
  'too many requests',
47
+ 'too many tokens', // Anthropic per-day TPM cap leak via gateway
48
+ 'tokens per day',
49
+ 'please wait before trying',
50
+ 'quota exceeded',
47
51
  ])) {
52
+ // 1 retry is plenty: a per-second rate limit clears in seconds (one
53
+ // backoff covers it), but a per-day TPM quota won't clear in this
54
+ // session at all — caller falls back to a different provider after.
48
55
  return {
49
- category: 'rate_limit', label: 'RateLimit', isTransient: true,
56
+ category: 'rate_limit', label: 'RateLimit', isTransient: true, maxRetries: 1,
50
57
  suggestion: 'Try /model to switch to a different model, or wait a moment and /retry.',
51
58
  };
52
59
  }
@@ -2,7 +2,24 @@
2
2
  * Franklin Agent Loop
3
3
  * The core reasoning-action cycle: prompt → model → extract capabilities → execute → repeat.
4
4
  */
5
- import type { AgentConfig, Dialogue, StreamEvent } from './types.js';
5
+ import type { AgentConfig, ContentPart, Dialogue, StreamEvent } from './types.js';
6
+ /**
7
+ * Detect when the gateway leaked an upstream rate-limit / quota error as a
8
+ * 200-OK text content block instead of a real HTTP error. The Anthropic
9
+ * provider in particular surfaces per-day TPM exhaustion as a bracketed
10
+ * "[Error: Too many tokens per day, please wait before trying again.]"
11
+ * message glued into the assistant text channel, which then poisons grounding
12
+ * checks and gets persisted to session history as if it were a real reply.
13
+ *
14
+ * Treat any assistant turn whose entire text payload is a single bracketed
15
+ * `[Error: ...]` line — and contains no tool_use / thinking blocks — as a
16
+ * masquerading transport error. The caller throws to let the existing
17
+ * classifier + retry path take over.
18
+ */
19
+ export declare function looksLikeGatewayErrorAsText(parts: ContentPart[]): {
20
+ match: boolean;
21
+ message: string;
22
+ };
6
23
  /**
7
24
  * Identify models known to hallucinate tool calls (invented names, literal
8
25
  * `[TOOLCALL]` / `<tool_call>` text in answers) — they need the explicit
@@ -206,6 +206,42 @@ function stripMediaFromHistory(history) {
206
206
  });
207
207
  return { history: stripped ? result : history, stripped };
208
208
  }
209
+ /**
210
+ * Detect when the gateway leaked an upstream rate-limit / quota error as a
211
+ * 200-OK text content block instead of a real HTTP error. The Anthropic
212
+ * provider in particular surfaces per-day TPM exhaustion as a bracketed
213
+ * "[Error: Too many tokens per day, please wait before trying again.]"
214
+ * message glued into the assistant text channel, which then poisons grounding
215
+ * checks and gets persisted to session history as if it were a real reply.
216
+ *
217
+ * Treat any assistant turn whose entire text payload is a single bracketed
218
+ * `[Error: ...]` line — and contains no tool_use / thinking blocks — as a
219
+ * masquerading transport error. The caller throws to let the existing
220
+ * classifier + retry path take over.
221
+ */
222
+ export function looksLikeGatewayErrorAsText(parts) {
223
+ if (parts.length === 0)
224
+ return { match: false, message: '' };
225
+ // Reject if any non-text content (real tool calls, real thinking) was emitted.
226
+ const textParts = [];
227
+ for (const p of parts) {
228
+ if (p.type === 'tool_use')
229
+ return { match: false, message: '' };
230
+ if (p.type === 'text' && typeof p.text === 'string') {
231
+ textParts.push(p.text);
232
+ }
233
+ }
234
+ const joined = textParts.join('').trim();
235
+ if (!joined)
236
+ return { match: false, message: '' };
237
+ // Pattern: `[Error: ...]` taking up the entire text payload, modulo
238
+ // surrounding whitespace. Allow the bracket to be the whole message OR
239
+ // the message to start with it (some gateways append a stray newline).
240
+ const m = /^\[Error:\s*([^\]]+?)\]\s*$/.exec(joined);
241
+ if (!m)
242
+ return { match: false, message: '' };
243
+ return { match: true, message: m[1].trim() };
244
+ }
209
245
  /**
210
246
  * Calculate backoff delay with jitter to avoid thundering herd.
211
247
  * Base: exponential (2^attempt * 1000ms), jitter: ±25%.
@@ -1024,6 +1060,33 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1024
1060
  continue; // Retry with next model
1025
1061
  }
1026
1062
  }
1063
+ // ── Rate-limit / quota: auto-fallback to a different provider ──
1064
+ // Per-day TPM caps (Anthropic) won't clear in this session; per-second
1065
+ // limits already had their backoff retry above and still failed. In
1066
+ // both cases, the productive next move is to run the same turn on a
1067
+ // model from a different provider rather than thrash on the failing
1068
+ // one. Mirror the payment fallback shape: mark the model as failed
1069
+ // for this turn and pick the next free model that hasn't failed yet.
1070
+ if (classified.category === 'rate_limit') {
1071
+ turnFailedModels.add(config.model);
1072
+ if (lastRoutedCategory) {
1073
+ recordOutcome(lastRoutedCategory, config.model, 'rate_limit');
1074
+ }
1075
+ const FREE_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick', 'nvidia/glm-4.7'];
1076
+ const nextFree = FREE_MODELS.find(m => !turnFailedModels.has(m));
1077
+ if (nextFree) {
1078
+ const oldModel = config.model;
1079
+ config.model = nextFree;
1080
+ config.onModelChange?.(nextFree, 'system');
1081
+ // Reset retry counter — the new model gets its own retry budget.
1082
+ recoveryAttempts = 0;
1083
+ onEvent({
1084
+ kind: 'text_delta',
1085
+ text: `\n*${oldModel} rate-limited — switching to ${nextFree}*\n`,
1086
+ });
1087
+ continue;
1088
+ }
1089
+ }
1027
1090
  // ── Unrecoverable: show error with suggestion from classifier ──
1028
1091
  const suggestion = classified.suggestion ? `\nTip: ${classified.suggestion}` : '';
1029
1092
  onEvent({
@@ -1165,6 +1228,20 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1165
1228
  lastSessionActivity = Date.now();
1166
1229
  continue; // Retry with higher limit
1167
1230
  }
1231
+ // ── Gateway error masquerading as text (BlockRun → Anthropic TPM) ──
1232
+ // Some upstreams swallow rate-limit / quota errors and emit them as a
1233
+ // single bracketed text block on a 200 OK. Persisting that as a real
1234
+ // assistant reply poisons history (the next turn sees an "answer" that
1235
+ // is actually a transport error) and triggers grounding-check retries
1236
+ // that hit the same wall. Detect, throw into the classifier, and let
1237
+ // the existing recovery flow handle it.
1238
+ const gatewayErr = looksLikeGatewayErrorAsText(responseParts);
1239
+ if (gatewayErr.match) {
1240
+ if (config.debug) {
1241
+ console.error(`[franklin] Gateway returned an error text in lieu of an answer (${resolvedModel}): ${gatewayErr.message}`);
1242
+ }
1243
+ throw new Error(gatewayErr.message);
1244
+ }
1168
1245
  // Reset recovery counter on successful completion
1169
1246
  recoveryAttempts = 0;
1170
1247
  // Extract tool invocations (text/thinking already streamed in real-time)
@@ -5,7 +5,7 @@
5
5
  * Storage: ~/.blockrun/router-history.jsonl (append-only, capped 2000 records)
6
6
  * Never uploaded — purely local personalization.
7
7
  */
8
- export type Outcome = 'continued' | 'switched' | 'retried' | 'error' | 'max_turns' | 'payment';
8
+ export type Outcome = 'continued' | 'switched' | 'retried' | 'error' | 'max_turns' | 'payment' | 'rate_limit';
9
9
  /**
10
10
  * Record a model outcome for local learning.
11
11
  */
@@ -92,6 +92,13 @@ export function computeLocalElo() {
92
92
  case 'payment':
93
93
  delta = -K_FACTOR * 1.5;
94
94
  break;
95
+ // Rate-limited: provider isn't broken, just exhausted right now.
96
+ // Penalize less than payment (which won't clear without action) but
97
+ // more than a generic error so the router avoids the same provider
98
+ // for the rest of the session.
99
+ case 'rate_limit':
100
+ delta = -K_FACTOR * 1.2;
101
+ break;
95
102
  case 'max_turns':
96
103
  delta = -K_FACTOR * 0.3;
97
104
  break;
package/dist/ui/app.js CHANGED
@@ -15,6 +15,25 @@ import { estimateCost } from '../pricing.js';
15
15
  import { formatTokens, shortModelName } from '../stats/format.js';
16
16
  import { mouse, forceDisableMouseTracking } from './mouse.js';
17
17
  // ─── Full-width input box ──────────────────────────────────────────────────
18
+ const DISABLE_AUTO_WRAP = '\x1b[?7l';
19
+ const ENABLE_AUTO_WRAP = '\x1b[?7h';
20
+ function disableTerminalAutoWrap() {
21
+ if (!process.stdout.isTTY)
22
+ return undefined;
23
+ let restored = false;
24
+ const restore = () => {
25
+ if (restored || !process.stdout.writable)
26
+ return;
27
+ restored = true;
28
+ process.stdout.write(ENABLE_AUTO_WRAP);
29
+ };
30
+ process.stdout.write(DISABLE_AUTO_WRAP);
31
+ process.once('exit', restore);
32
+ return () => {
33
+ process.off('exit', restore);
34
+ restore();
35
+ };
36
+ }
18
37
  // Subscribe to terminal resize so React re-renders with fresh dimensions.
19
38
  // Without this, useStdout() returns a stable ref and children that read
20
39
  // stdout.columns on each render still need React to re-execute them — which
@@ -775,6 +794,7 @@ export function launchInkUI(opts) {
775
794
  let pendingInput = null; // Queue for inputs that arrive before waitForInput
776
795
  let exiting = false;
777
796
  let abortCallback = null;
797
+ const restoreTerminalAutoWrap = disableTerminalAutoWrap();
778
798
  const instance = render(_jsx(RunCodeApp, { initialModel: opts.model, workDir: opts.workDir, walletAddress: opts.walletAddress || 'not set — run: franklin setup', walletBalance: opts.walletBalance || 'unknown', chain: opts.chain || 'base', startWithPicker: opts.showPicker, onSubmit: (value) => {
779
799
  if (resolveInput) {
780
800
  resolveInput(value);
@@ -820,7 +840,11 @@ export function launchInkUI(opts) {
820
840
  return new Promise((resolve) => { resolveInput = resolve; });
821
841
  },
822
842
  onAbort: (cb) => { abortCallback = cb; },
823
- cleanup: () => { mouse.disable(); instance.unmount(); },
843
+ cleanup: () => {
844
+ mouse.disable();
845
+ instance.unmount();
846
+ restoreTerminalAutoWrap?.();
847
+ },
824
848
  requestPermission: (toolName, description) => {
825
849
  const ui = globalThis.__franklin_ui;
826
850
  return ui?.requestPermission(toolName, description) ?? Promise.resolve('no');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.10.2",
3
+ "version": "3.10.4",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {
@@ -66,7 +66,7 @@
66
66
  "node": ">=20"
67
67
  },
68
68
  "dependencies": {
69
- "@blockrun/llm": "^1.4.2",
69
+ "@blockrun/llm": "^1.13.0",
70
70
  "@modelcontextprotocol/sdk": "^1.29.0",
71
71
  "@solana/spl-token": "^0.4.14",
72
72
  "@solana/web3.js": "^1.98.4",