@blockrun/franklin 3.15.89 → 3.15.90

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,13 +44,37 @@ export function ageToolResults(history) {
44
44
  const aged = parts.map(part => {
45
45
  if (part.type !== 'tool_result')
46
46
  return part;
47
+ // Vision tool_results carry [text, image] arrays. JSON.stringify-ing
48
+ // them and rebuilding `content` as a truncated string drops the
49
+ // image block entirely. Measure only the text portion; aging code
50
+ // below rebuilds as a string only when no image is present, otherwise
51
+ // we leave the part untouched (image bytes are already context-cheap
52
+ // once cached, and turn them into placeholders is the wrong fix).
53
+ let hasImage = false;
54
+ let textOnly = '';
55
+ if (Array.isArray(part.content)) {
56
+ for (const block of part.content) {
57
+ if (block?.type === 'text' && typeof block.text === 'string') {
58
+ textOnly += (textOnly ? '\n' : '') + block.text;
59
+ }
60
+ else if (block?.type === 'image') {
61
+ hasImage = true;
62
+ }
63
+ }
64
+ }
47
65
  const content = typeof part.content === 'string'
48
66
  ? part.content
49
- : JSON.stringify(part.content);
67
+ : Array.isArray(part.content)
68
+ ? textOnly
69
+ : JSON.stringify(part.content);
50
70
  const charLen = content.length;
51
71
  // Recent 3 results: keep full
52
72
  if (age <= 3)
53
73
  return part;
74
+ // Preserve image-bearing tool_results regardless of age — replacing
75
+ // them with a text stub would silently delete the model's vision context.
76
+ if (hasImage)
77
+ return part;
54
78
  // Age 4-8: keep first 500 chars
55
79
  if (age <= 8 && charLen > 500) {
56
80
  modified = true;
@@ -206,7 +230,32 @@ export function deduplicateToolResultLines(history) {
206
230
  const newParts = parts.map(part => {
207
231
  if (part.type !== 'tool_result')
208
232
  return part;
209
- const raw = typeof part.content === 'string' ? part.content : JSON.stringify(part.content);
233
+ // Vision tool_results carry [text, image] arrays. JSON.stringify-ing
234
+ // them and writing back as a string would destroy the image (same
235
+ // bug class as ageToolResults / budgetToolResults — sibling site,
236
+ // verified 2026-05-10 during PR #53 review). For arrays, dedupe
237
+ // only the text segments; image segments pass through untouched.
238
+ let raw;
239
+ const imageBlocks = [];
240
+ if (typeof part.content === 'string') {
241
+ raw = part.content;
242
+ }
243
+ else if (Array.isArray(part.content)) {
244
+ const blocks = part.content;
245
+ const texts = [];
246
+ for (const b of blocks) {
247
+ if (b?.type === 'text' && typeof b.text === 'string') {
248
+ texts.push(b.text);
249
+ }
250
+ else if (b?.type === 'image') {
251
+ imageBlocks.push(b);
252
+ }
253
+ }
254
+ raw = texts.join('\n');
255
+ }
256
+ else {
257
+ raw = JSON.stringify(part.content);
258
+ }
210
259
  // Strip ANSI codes
211
260
  const stripped = raw.replace(ANSI_RE_REDUCE, '');
212
261
  // Collapse repeated consecutive lines
@@ -231,6 +280,19 @@ export function deduplicateToolResultLines(history) {
231
280
  if (result === raw)
232
281
  return part;
233
282
  partModified = true;
283
+ // If the original content was an array with image blocks, rebuild
284
+ // as an array — keep all image segments, replace the joined text
285
+ // payload with a single deduped text segment. This way dedupe runs
286
+ // for free on image-bearing results without losing vision context.
287
+ if (Array.isArray(part.content) && imageBlocks.length > 0) {
288
+ return {
289
+ ...part,
290
+ content: [
291
+ { type: 'text', text: result },
292
+ ...imageBlocks,
293
+ ],
294
+ };
295
+ }
234
296
  return { ...part, content: result };
235
297
  });
236
298
  if (!partModified)
@@ -301,6 +363,16 @@ export function collapseRepetitiveTools(history) {
301
363
  const parts = msg.content.map(part => {
302
364
  if (part.type !== 'tool_result' || !oldIds.has(part.tool_use_id))
303
365
  return part;
366
+ // Image-bearing results (third sibling site of the JSON.stringify
367
+ // bug class — same pattern as ageToolResults / budgetToolResults /
368
+ // deduplicateToolResultLines). Don't collapse; replacing them with
369
+ // a `[first-line...]` string would destroy the vision context.
370
+ // Image bytes are already cache-cheap upstream once prompt-cached;
371
+ // the cost-control intent of this collapser is satisfied without
372
+ // touching them.
373
+ if (Array.isArray(part.content) && part.content.some((b) => b.type === 'image')) {
374
+ return part;
375
+ }
304
376
  const content = typeof part.content === 'string' ? part.content : JSON.stringify(part.content);
305
377
  if (content.length <= 80)
306
378
  return part;
@@ -103,12 +103,64 @@ async function execute(input, ctx) {
103
103
  output: `Image file: ${resolved} (${ext}, ${sizeStr}). Too large to inline for vision (>${Math.round(IMAGE_MAX_BYTES / 1_000_000)}MB). Resize or crop first.`,
104
104
  };
105
105
  }
106
- const bytes = fs.readFileSync(resolved);
107
- const base64 = bytes.toString('base64');
106
+ // Client-side normalization to bound vision-token cost. The BlockRun
107
+ // gateway (verified 2026-05-09) tokenizes image base64 as text on the
108
+ // /v1/messages forward path, so a 1.9MB PNG → ~2.5M base64 chars →
109
+ // ~1.36M billed tokens (~$0.50 per call) instead of Anthropic's
110
+ // native vision tokenization (~1.6k tokens). Resizing the long edge
111
+ // to 1280px and re-encoding as JPEG q85 cuts payload to ~80KB while
112
+ // keeping vision usable. Skip work if the file is already small;
113
+ // preserve PNG when transparency matters (alpha sample).
114
+ const SKIP_BELOW_BYTES = 150 * 1024;
115
+ const MAX_LONG_EDGE = 1280;
116
+ const JPEG_QUALITY = 85;
117
+ const rawBytes = fs.readFileSync(resolved);
118
+ let outBytes = rawBytes;
119
+ let outMedia = IMAGE_MEDIA_TYPES[ext];
120
+ let normalizeNote = '';
121
+ if (stat.size > SKIP_BELOW_BYTES) {
122
+ try {
123
+ const sharpMod = await import('sharp');
124
+ const sharp = sharpMod.default;
125
+ const img = sharp(rawBytes, { failOn: 'none' });
126
+ const meta = await img.metadata();
127
+ const longEdge = Math.max(meta.width ?? 0, meta.height ?? 0);
128
+ // Detect transparency: GIF/WebP/PNG with non-opaque alpha → keep PNG.
129
+ let hasAlpha = false;
130
+ if (meta.hasAlpha) {
131
+ const stats = await sharp(rawBytes).stats();
132
+ const alpha = stats.channels[stats.channels.length - 1];
133
+ hasAlpha = alpha?.min !== undefined && alpha.min < 255;
134
+ }
135
+ let pipeline = sharp(rawBytes, { failOn: 'none' });
136
+ if (longEdge > MAX_LONG_EDGE) {
137
+ pipeline = pipeline.resize({
138
+ width: meta.width && meta.width >= (meta.height ?? 0) ? MAX_LONG_EDGE : undefined,
139
+ height: meta.height && meta.height > (meta.width ?? 0) ? MAX_LONG_EDGE : undefined,
140
+ fit: 'inside',
141
+ withoutEnlargement: true,
142
+ });
143
+ }
144
+ if (hasAlpha) {
145
+ outBytes = await pipeline.png({ compressionLevel: 9 }).toBuffer();
146
+ outMedia = 'image/png';
147
+ }
148
+ else {
149
+ outBytes = await pipeline.jpeg({ quality: JPEG_QUALITY, mozjpeg: true }).toBuffer();
150
+ outMedia = 'image/jpeg';
151
+ }
152
+ const outKb = (outBytes.length / 1024).toFixed(1);
153
+ normalizeNote = ` Normalized: ${sizeStr} → ${outKb}KB (${meta.width}×${meta.height}${longEdge > MAX_LONG_EDGE ? ` → long edge ${MAX_LONG_EDGE}` : ''}, ${hasAlpha ? 'PNG/alpha' : `JPEG q${JPEG_QUALITY}`}).`;
154
+ }
155
+ catch {
156
+ // Best-effort — if sharp fails, fall through with raw bytes.
157
+ }
158
+ }
159
+ const base64 = outBytes.toString('base64');
108
160
  fileReadTracker.set(resolved, { mtimeMs: stat.mtimeMs, readAt: Date.now() });
109
161
  return {
110
- output: `Image file: ${resolved} (${ext}, ${sizeStr}). Rendered below for vision-capable models.`,
111
- images: [{ mediaType: IMAGE_MEDIA_TYPES[ext], base64 }],
162
+ output: `Image file: ${resolved} (${ext}, ${sizeStr}).${normalizeNote} Rendered below for vision-capable models.`,
163
+ images: [{ mediaType: outMedia, base64 }],
112
164
  };
113
165
  }
114
166
  const binaryExts = new Set(['.ico', '.bmp', '.pdf', '.zip', '.tar', '.gz', '.woff', '.woff2', '.ttf', '.eot', '.mp3', '.mp4', '.wav', '.avi', '.mov', '.exe', '.dll', '.so', '.dylib']);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.15.89",
3
+ "version": "3.15.90",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {
@@ -80,6 +80,7 @@
80
80
  "playwright-core": "^1.49.1",
81
81
  "qrcode": "^1.5.4",
82
82
  "react": "^19.2.4",
83
+ "sharp": "^0.34.5",
83
84
  "viem": "^2.48.1"
84
85
  },
85
86
  "devDependencies": {