@blockrun/franklin 3.15.96 → 3.15.98

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -177,8 +177,25 @@ const DIRECT_COMMANDS = {
177
177
  if ('type' in part) {
178
178
  if (part.type === 'tool_result') {
179
179
  toolResults++;
180
- const c = typeof part.content === 'string' ? part.content : JSON.stringify(part.content);
181
- totalToolChars += c.length;
180
+ // Sibling of PR #54's tokens.ts fix: image base64 must NOT
181
+ // count toward the displayed char total — `/context` would
182
+ // otherwise show ~70K chars per attached image and confuse
183
+ // the user about why the ring is at 1% but "total tool
184
+ // chars" is huge.
185
+ if (typeof part.content === 'string') {
186
+ totalToolChars += part.content.length;
187
+ }
188
+ else if (Array.isArray(part.content)) {
189
+ for (const block of part.content) {
190
+ const t = block.type;
191
+ if (t === 'text') {
192
+ totalToolChars += (block.text || '').length;
193
+ }
194
+ else if (t === 'image') {
195
+ totalToolChars += 6000; // ~1500 tokens × 4 chars/tok
196
+ }
197
+ }
198
+ }
182
199
  }
183
200
  if (part.type === 'thinking')
184
201
  thinkingBlocks++;
@@ -385,7 +385,36 @@ function formatForSummarization(messages) {
385
385
  textParts.push(`[Called tool: ${part.name}(${JSON.stringify(part.input).slice(0, 200)})]`);
386
386
  break;
387
387
  case 'tool_result': {
388
- const content = typeof part.content === 'string' ? part.content : JSON.stringify(part.content);
388
+ // Sibling of PR #54's tokens.ts fix: when content is a
389
+ // [{text}, {image}] array, JSON.stringify dumps base64
390
+ // bytes into the summary prompt — bloats the summarizer's
391
+ // input and produces a useless preview ("[Tool result:
392
+ // [{\"type\":\"text\",\"text\":\"Image file: ...\"},{\"type\":\"image\",\"source\":{\"type\":\"base64\",\"data\":\"...").
393
+ // Build the preview from text blocks only; mark images
394
+ // explicitly so the summarizer knows they exist.
395
+ let content;
396
+ if (typeof part.content === 'string') {
397
+ content = part.content;
398
+ }
399
+ else if (Array.isArray(part.content)) {
400
+ const pieces = [];
401
+ let imageCount = 0;
402
+ for (const block of part.content) {
403
+ const t = block.type;
404
+ if (t === 'text') {
405
+ pieces.push(block.text || '');
406
+ }
407
+ else if (t === 'image') {
408
+ imageCount++;
409
+ }
410
+ }
411
+ if (imageCount > 0)
412
+ pieces.push(`[${imageCount} image block${imageCount > 1 ? 's' : ''}]`);
413
+ content = pieces.join(' ');
414
+ }
415
+ else {
416
+ content = JSON.stringify(part.content);
417
+ }
389
418
  const truncated = content.length > 500 ? content.slice(0, 500) + '...' : content;
390
419
  textParts.push(`[Tool result${part.is_error ? ' (ERROR)' : ''}: ${truncated}]`);
391
420
  break;
@@ -1585,7 +1585,11 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1585
1585
  tier: routingTier,
1586
1586
  confidence: routingConfidence,
1587
1587
  savings: routingSavings,
1588
- contextPct: Math.round(contextUsagePct),
1588
+ // Preserve sub-1% precision: a fresh session at 0.4% would
1589
+ // round to 0 and freeze the renderer's context ring until the
1590
+ // conversation grows past ~1k tokens. Match `/context`'s
1591
+ // `.toFixed(1)` fidelity.
1592
+ contextPct: Math.round(contextUsagePct * 10) / 10,
1589
1593
  });
1590
1594
  // Record usage for stats tracking (franklin stats command).
1591
1595
  // Prefer the real x402 charge from the gateway over a token-catalog
@@ -458,7 +458,27 @@ function estimateChars(history) {
458
458
  if (p.type === 'text')
459
459
  total += p.text.length;
460
460
  else if (p.type === 'tool_result') {
461
- total += typeof p.content === 'string' ? p.content.length : JSON.stringify(p.content).length;
461
+ // Sibling of PR #54's tokens.ts fix: JSON.stringify-ing a
462
+ // [{text}, {image}] array counts the base64 `data` field as
463
+ // text and inflates the char count by ~70K per image. That
464
+ // skews every reduce-pass decision (when to dedupe, when to
465
+ // collapse) toward "save chars by collapsing the image-
466
+ // bearing result" — exactly wrong. Walk blocks instead.
467
+ if (typeof p.content === 'string') {
468
+ total += p.content.length;
469
+ }
470
+ else if (Array.isArray(p.content)) {
471
+ for (const block of p.content) {
472
+ if (block.type === 'text') {
473
+ total += (block.text || '').length;
474
+ }
475
+ else if (block.type === 'image') {
476
+ // Mirror tokens.ts: image ≈ 1500 tokens ≈ ~6K chars
477
+ // at the 4-chars/token rule estimateTokens uses.
478
+ total += 6000;
479
+ }
480
+ }
481
+ }
462
482
  }
463
483
  else if (p.type === 'tool_use') {
464
484
  total += JSON.stringify(p.input).length;
@@ -219,7 +219,19 @@ export class StreamingExecutor {
219
219
  const execElapsed = Date.now() - execStart;
220
220
  if (execElapsed >= 30_000) {
221
221
  const status = result.isError ? 'error' : 'ok';
222
- const preview = this.inputPreview(invocation) || '';
222
+ // Single-line the preview before logging. Bash invocations like
223
+ // `python3 -c "<heredoc>"` carry embedded newlines that, sliced
224
+ // raw, break the one-line-per-entry contract of
225
+ // franklin-debug.log and shred any parser that splits on
226
+ // `^\[timestamp\]`. Verified 2026-05-04 (and reviewed again
227
+ // 2026-05-12): a real entry produced
228
+ // `Slow tool: Bash ok after 438.4s — cd ... python3 -c "`
229
+ // `import subprocess`
230
+ // `[2026-05-04T19:25:10] [ERROR] Signature-loop hard stop ...`
231
+ // where "import subprocess" sat on its own line, untimestamped,
232
+ // because the preview's first 80 chars contained the heredoc
233
+ // opener.
234
+ const preview = (this.inputPreview(invocation) || '').replace(/[\r\n]+/g, ' ');
223
235
  logger.info(`[franklin] Slow tool: ${invocation.name} ${status} after ${(execElapsed / 1000).toFixed(1)}s${preview ? ` — ${preview.slice(0, 80)}` : ''}`);
224
236
  }
225
237
  // Persist large results to disk with preview.
@@ -45,6 +45,11 @@ export function updateActualTokens(inputTokens, outputTokens, messageCount) {
45
45
  * More accurate than pure estimation because it's grounded in actual API counts.
46
46
  */
47
47
  export function getAnchoredTokenCount(history) {
48
+ // The model that just billed input — used as the denominator below.
49
+ // _currentModel is set per-turn by setEstimationModel(), so it reflects
50
+ // whatever the router actually resolved (not just config.model, which
51
+ // may be a routing profile like blockrun/auto).
52
+ const contextWindow = _currentModel ? getContextWindow(_currentModel) : 200_000;
48
53
  if (lastApiInputTokens > 0 && lastApiMessageCount > 0 && history.length >= lastApiMessageCount) {
49
54
  // Sanity check: if history was mutated (compaction, micro-compact), anchor may be stale.
50
55
  // Detect by checking if new messages were only appended (length grew), not if content changed.
@@ -60,17 +65,18 @@ export function getAnchoredTokenCount(history) {
60
65
  return {
61
66
  estimated: total,
62
67
  apiAnchored: true,
63
- contextUsagePct: 0,
68
+ contextUsagePct: (total / contextWindow) * 100,
64
69
  };
65
70
  }
66
71
  // Too much growth — anchor is unreliable, fall through to estimation
67
72
  resetTokenAnchor();
68
73
  }
69
74
  // No anchor — pure estimation
75
+ const est = estimateHistoryTokens(history);
70
76
  return {
71
- estimated: estimateHistoryTokens(history),
77
+ estimated: est,
72
78
  apiAnchored: false,
73
- contextUsagePct: 0,
79
+ contextUsagePct: (est / contextWindow) * 100,
74
80
  };
75
81
  }
76
82
  /**
@@ -115,10 +121,40 @@ function estimateContentPartTokens(part) {
115
121
  // +16 tokens for tool_use framing (type, id, name fields, JSON structure)
116
122
  return 16 + estimateTokens(part.name) + estimateTokens(JSON.stringify(part.input), 2);
117
123
  case 'tool_result': {
118
- const content = typeof part.content === 'string'
119
- ? part.content
120
- : JSON.stringify(part.content);
121
- return estimateTokens(content, 2);
124
+ // String content: count as text directly.
125
+ if (typeof part.content === 'string') {
126
+ return estimateTokens(part.content, 2);
127
+ }
128
+ // Array content: sum block-by-block. CRITICAL: image blocks must
129
+ // NOT go through JSON.stringify — their base64 `data` field would
130
+ // be tokenized as text (a 100KB image → ~70k phantom tokens),
131
+ // which is what made the context ring read ~86% on a 2-image chat
132
+ // and triggered premature /compact loops. Anthropic actually
133
+ // bills (w*h)/750 per image, ≈1100-1500 for typical sizes; a flat
134
+ // 1500-token estimate is close enough without needing to decode
135
+ // the image dimensions client-side.
136
+ let total = 0;
137
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
138
+ const blocks = part.content;
139
+ for (const block of blocks) {
140
+ const blockType = block?.type;
141
+ if (blockType === 'text') {
142
+ total += estimateTokens(block?.text ?? '', 2);
143
+ }
144
+ else if (blockType === 'image') {
145
+ total += 1500;
146
+ }
147
+ else {
148
+ // Unknown block — stringify minus any nested base64 data field
149
+ // to avoid the same blow-up for future block kinds.
150
+ const sanitized = { ...block };
151
+ if (sanitized?.source && typeof sanitized.source === 'object' && sanitized.source.data) {
152
+ sanitized.source = { ...sanitized.source, data: '<bytes>' };
153
+ }
154
+ total += estimateTokens(JSON.stringify(sanitized), 2);
155
+ }
156
+ }
157
+ return total;
122
158
  }
123
159
  case 'thinking':
124
160
  return estimateTokens(part.thinking);
package/dist/logger.js CHANGED
@@ -86,7 +86,22 @@ function writeFile(level, msg) {
86
86
  writesSinceRotateProbe = 0;
87
87
  maybeRotate();
88
88
  }
89
- const clean = msg.replace(ANSI_RE, '');
89
+ // Two-step sanitize, in this order:
90
+ // 1. Collapse embedded newlines (\n / \r / \r\n) to a literal
91
+ // " ↵ " marker so a single logger call always produces one
92
+ // physical log line.
93
+ // 2. Strip ANSI escape sequences.
94
+ //
95
+ // Order matters: ANSI_RE strips bare \r (used by progress bars), so
96
+ // running it first would erase \r-only line breaks and let
97
+ // "first\rsecond" appear as "firstsecond" in the log. Verified
98
+ // 2026-05-12 from franklin-debug.log: a `Slow tool: Bash ok ...
99
+ // python3 -c "` preview leaked `import subprocess` onto its own
100
+ // untimestamped line because the embedded \n in the bash command
101
+ // survived the preview slice and broke any parser that splits on
102
+ // ^\[timestamp\]. Cheaper to enforce one-line-per-entry here than
103
+ // to police every callsite.
104
+ const clean = msg.replace(/\r\n|\r|\n/g, ' ↵ ').replace(ANSI_RE, '');
90
105
  fs.appendFileSync(LOG_FILE, `[${new Date().toISOString()}] [${level.toUpperCase()}] ${clean}\n`);
91
106
  }
92
107
  catch { /* best-effort — never break the agent on log failure */ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.15.96",
3
+ "version": "3.15.98",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {