@blockrun/franklin 3.8.35 → 3.8.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +1 -1
  2. package/dist/agent/commands.js +1 -1
  3. package/dist/agent/compact.js +1 -1
  4. package/dist/agent/evaluator.d.ts +3 -1
  5. package/dist/agent/evaluator.js +44 -8
  6. package/dist/agent/llm.js +2 -2
  7. package/dist/agent/loop.js +19 -0
  8. package/dist/agent/optimize.js +1 -0
  9. package/dist/agent/permissions.js +10 -1
  10. package/dist/agent/tokens.js +4 -0
  11. package/dist/agent/types.d.ts +22 -1
  12. package/dist/commands/balance.js +1 -1
  13. package/dist/commands/daemon.js +23 -16
  14. package/dist/commands/plugin.d.ts +1 -1
  15. package/dist/commands/plugin.js +10 -10
  16. package/dist/commands/stats.d.ts +1 -1
  17. package/dist/commands/stats.js +2 -2
  18. package/dist/index.js +2 -2
  19. package/dist/panel/server.js +7 -6
  20. package/dist/plugin-sdk/index.d.ts +2 -2
  21. package/dist/plugin-sdk/index.js +2 -2
  22. package/dist/plugin-sdk/plugin.d.ts +4 -4
  23. package/dist/plugins/registry.d.ts +3 -3
  24. package/dist/plugins/registry.js +6 -6
  25. package/dist/pricing.js +1 -0
  26. package/dist/proxy/server.js +148 -26
  27. package/dist/router/index.js +3 -3
  28. package/dist/session/storage.js +2 -2
  29. package/dist/tools/imagegen.d.ts +14 -0
  30. package/dist/tools/imagegen.js +154 -22
  31. package/dist/tools/read.js +29 -2
  32. package/dist/tools/videogen.d.ts +14 -3
  33. package/dist/tools/videogen.js +161 -28
  34. package/dist/tools/webhook.js +2 -1
  35. package/dist/trading/providers/coingecko/client.js +2 -1
  36. package/dist/ui/app.js +12 -12
  37. package/dist/ui/model-picker.js +7 -4
  38. package/dist/wallet/index.d.ts +17 -0
  39. package/dist/wallet/index.js +22 -0
  40. package/package.json +7 -5
@@ -5,7 +5,7 @@ import os from 'node:os';
5
5
  import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK, } from '@blockrun/llm';
6
6
  import { recordUsage } from '../stats/tracker.js';
7
7
  import { appendAudit } from '../stats/audit.js';
8
- import { fetchWithFallback, buildFallbackChain, DEFAULT_FALLBACK_CONFIG, ROUTING_PROFILES, } from './fallback.js';
8
+ import { buildFallbackChain, DEFAULT_FALLBACK_CONFIG, ROUTING_PROFILES, } from './fallback.js';
9
9
  import { routeRequest, parseRoutingProfile, } from '../router/index.js';
10
10
  import { estimateCost } from '../pricing.js';
11
11
  import { VERSION } from '../config.js';
@@ -41,6 +41,57 @@ function log(...args) {
41
41
  catch { /* ignore */ }
42
42
  }
43
43
  const DEFAULT_MAX_TOKENS = 4096;
44
+ const DEFAULT_PROXY_REQUEST_TIMEOUT_MS = 45_000;
45
+ const DEFAULT_PROXY_STREAM_TIMEOUT_MS = 5 * 60 * 1000;
46
+ function parseTimeoutEnv(name, fallback) {
47
+ const raw = process.env[name];
48
+ if (!raw)
49
+ return fallback;
50
+ const parsed = Number.parseInt(raw, 10);
51
+ return Number.isFinite(parsed) && parsed >= 0 ? parsed : fallback;
52
+ }
53
+ function getProxyRequestTimeoutMs() {
54
+ return parseTimeoutEnv('FRANKLIN_PROXY_REQUEST_TIMEOUT_MS', DEFAULT_PROXY_REQUEST_TIMEOUT_MS);
55
+ }
56
+ function getProxyStreamTimeoutMs() {
57
+ return parseTimeoutEnv('FRANKLIN_PROXY_STREAM_TIMEOUT_MS', DEFAULT_PROXY_STREAM_TIMEOUT_MS);
58
+ }
59
+ function createProxyTimeoutError(label, timeoutMs) {
60
+ return new Error(`${label} timed out after ${timeoutMs}ms`);
61
+ }
62
+ async function fetchWithTimeout(url, init, timeoutMs, label) {
63
+ if (timeoutMs <= 0)
64
+ return fetch(url, init);
65
+ const controller = new AbortController();
66
+ const timeoutError = createProxyTimeoutError(label, timeoutMs);
67
+ const timeout = setTimeout(() => {
68
+ try {
69
+ controller.abort(timeoutError);
70
+ }
71
+ catch { /* ignore */ }
72
+ }, timeoutMs);
73
+ try {
74
+ return await fetch(url, { ...init, signal: controller.signal });
75
+ }
76
+ catch (err) {
77
+ if (controller.signal.aborted)
78
+ throw timeoutError;
79
+ throw err;
80
+ }
81
+ finally {
82
+ clearTimeout(timeout);
83
+ }
84
+ }
85
+ function replaceModelInBody(body, model) {
86
+ try {
87
+ const parsed = JSON.parse(body);
88
+ parsed.model = model;
89
+ return JSON.stringify(parsed);
90
+ }
91
+ catch {
92
+ return body;
93
+ }
94
+ }
44
95
  // Per-model last output tokens for adaptive max_tokens (avoids cross-request pollution)
45
96
  const MAX_TRACKED_MODELS = 50;
46
97
  const lastOutputByModel = new Map();
@@ -67,9 +118,11 @@ const MODEL_SHORTCUTS = {
67
118
  'opus-4.6': 'anthropic/claude-opus-4.6',
68
119
  haiku: 'anthropic/claude-haiku-4.5',
69
120
  // OpenAI
70
- gpt: 'openai/gpt-5.4',
71
- gpt5: 'openai/gpt-5.4',
72
- 'gpt-5': 'openai/gpt-5.4',
121
+ // `gpt` / `gpt5` / `gpt-5` follow the gateway's flagship — currently 5.5.
122
+ gpt: 'openai/gpt-5.5',
123
+ gpt5: 'openai/gpt-5.5',
124
+ 'gpt-5': 'openai/gpt-5.5',
125
+ 'gpt-5.5': 'openai/gpt-5.5',
73
126
  'gpt-5.4': 'openai/gpt-5.4',
74
127
  'gpt-5.4-pro': 'openai/gpt-5.4-pro',
75
128
  'gpt-5.3': 'openai/gpt-5.3',
@@ -367,13 +420,21 @@ export function createProxy(options) {
367
420
  };
368
421
  let response;
369
422
  let finalModel = requestModel;
423
+ const requestTimeoutMs = getProxyRequestTimeoutMs();
370
424
  // Use fallback chain if enabled
371
425
  if (fallbackEnabled && body && requestPath.includes('messages')) {
372
426
  const fallbackConfig = {
373
427
  ...DEFAULT_FALLBACK_CONFIG,
374
428
  chain: buildFallbackChain(requestModel),
375
429
  };
376
- const result = await fetchWithFallback(targetUrl, requestInit, body, fallbackConfig, (failedModel, status, nextModel) => {
430
+ const result = await fetchWithPaymentFallback(targetUrl, requestInit, body, fallbackConfig, {
431
+ method: req.method || 'POST',
432
+ headers,
433
+ chain,
434
+ baseWallet,
435
+ solanaWallet,
436
+ timeoutMs: requestTimeoutMs,
437
+ }, (failedModel, status, nextModel) => {
377
438
  log(`⚠️ ${failedModel} returned ${status}, falling back to ${nextModel}`);
378
439
  });
379
440
  response = result.response;
@@ -386,20 +447,14 @@ export function createProxy(options) {
386
447
  }
387
448
  }
388
449
  else {
389
- // Direct fetch without fallback (with timeout)
390
- const directCtrl = new AbortController();
391
- const directTimeout = setTimeout(() => directCtrl.abort(), 120_000); // 2min
392
- response = await fetch(targetUrl, { ...requestInit, signal: directCtrl.signal });
393
- clearTimeout(directTimeout);
394
- }
395
- // Handle 402 payment — body now has the correct model after fallback
396
- if (response.status === 402) {
397
- if (chain === 'solana' && solanaWallet) {
398
- response = await handleSolanaPayment(response, targetUrl, req.method || 'POST', headers, body, solanaWallet.privateKey, solanaWallet.address);
399
- }
400
- else if (baseWallet) {
401
- response = await handleBasePayment(response, targetUrl, req.method || 'POST', headers, body, baseWallet.privateKey, baseWallet.address);
402
- }
450
+ response = await fetchModelAttempt(targetUrl, requestInit, body, requestModel, {
451
+ method: req.method || 'POST',
452
+ headers,
453
+ chain,
454
+ baseWallet,
455
+ solanaWallet,
456
+ timeoutMs: requestTimeoutMs,
457
+ });
403
458
  }
404
459
  const responseHeaders = {};
405
460
  response.headers.forEach((v, k) => {
@@ -450,7 +505,7 @@ export function createProxy(options) {
450
505
  const decoder = new TextDecoder();
451
506
  let fullResponse = '';
452
507
  const STREAM_CAP = 5_000_000; // 5MB cap on accumulated stream
453
- const STREAM_TIMEOUT_MS = 5 * 60 * 1000; // 5 min timeout for entire stream
508
+ const STREAM_TIMEOUT_MS = getProxyStreamTimeoutMs();
454
509
  const streamDeadline = Date.now() + STREAM_TIMEOUT_MS;
455
510
  const pump = async () => {
456
511
  while (true) {
@@ -561,10 +616,77 @@ export function createProxy(options) {
561
616
  });
562
617
  return server;
563
618
  }
619
+ async function fetchModelAttempt(url, init, body, model, payment) {
620
+ let response = await fetchWithTimeout(url, { ...init, body: body || undefined }, payment.timeoutMs, `Proxy request for ${model}`);
621
+ if (response.status !== 402)
622
+ return response;
623
+ if (payment.chain === 'solana' && payment.solanaWallet) {
624
+ return handleSolanaPayment(response, url, payment.method, payment.headers, body, payment.solanaWallet.privateKey, payment.solanaWallet.address, payment.timeoutMs, model);
625
+ }
626
+ if (payment.baseWallet) {
627
+ return handleBasePayment(response, url, payment.method, payment.headers, body, payment.baseWallet.privateKey, payment.baseWallet.address, payment.timeoutMs, model);
628
+ }
629
+ return response;
630
+ }
631
+ /**
632
+ * Try each fallback model as a full x402 attempt:
633
+ * unpaid 402 probe, payment signing, then the paid provider call. The older
634
+ * flow only applied fallback to the probe, which meant a slow paid call could
635
+ * hang Franklin until the outer client gave up.
636
+ */
637
+ async function fetchWithPaymentFallback(url, init, originalBody, config, payment, onFallback) {
638
+ const failedModels = [];
639
+ let attempts = 0;
640
+ for (let i = 0; i < config.chain.length && attempts < config.maxRetries; i++) {
641
+ const model = config.chain[i];
642
+ const body = replaceModelInBody(originalBody, model);
643
+ try {
644
+ attempts++;
645
+ const response = await fetchModelAttempt(url, init, body, model, payment);
646
+ if (!config.retryOn.includes(response.status)) {
647
+ return {
648
+ response,
649
+ modelUsed: model,
650
+ bodyUsed: body,
651
+ fallbackUsed: i > 0,
652
+ attemptsCount: attempts,
653
+ failedModels,
654
+ };
655
+ }
656
+ try {
657
+ await response.body?.cancel();
658
+ }
659
+ catch { /* ignore */ }
660
+ failedModels.push(model);
661
+ const nextModel = config.chain[i + 1];
662
+ if (nextModel && onFallback) {
663
+ onFallback(model, response.status, nextModel);
664
+ }
665
+ if (i < config.chain.length - 1) {
666
+ await sleep(config.retryDelayMs);
667
+ }
668
+ }
669
+ catch (err) {
670
+ failedModels.push(model);
671
+ const nextModel = config.chain[i + 1];
672
+ if (nextModel && onFallback) {
673
+ onFallback(model, 0, nextModel);
674
+ }
675
+ log(`[fallback] ${model} request error: ${err instanceof Error ? err.message : String(err)}`);
676
+ if (i < config.chain.length - 1) {
677
+ await sleep(config.retryDelayMs);
678
+ }
679
+ }
680
+ }
681
+ throw new Error(`All models in fallback chain failed: ${failedModels.join(', ')}`);
682
+ }
683
+ function sleep(ms) {
684
+ return new Promise((resolve) => setTimeout(resolve, ms));
685
+ }
564
686
  // ======================================================================
565
687
  // Base (EIP-712) payment handler
566
688
  // ======================================================================
567
- async function handleBasePayment(response, url, method, headers, body, privateKey, fromAddress) {
689
+ async function handleBasePayment(response, url, method, headers, body, privateKey, fromAddress, timeoutMs = getProxyRequestTimeoutMs(), model = 'unknown') {
568
690
  const paymentHeader = await extractPaymentHeader(response);
569
691
  if (!paymentHeader) {
570
692
  throw new Error('402 Payment Required — wallet may need funding. Run: franklin balance');
@@ -577,19 +699,19 @@ async function handleBasePayment(response, url, method, headers, body, privateKe
577
699
  maxTimeoutSeconds: details.maxTimeoutSeconds || 300,
578
700
  extra: details.extra,
579
701
  });
580
- return fetch(url, {
702
+ return fetchWithTimeout(url, {
581
703
  method,
582
704
  headers: {
583
705
  ...headers,
584
706
  'PAYMENT-SIGNATURE': paymentPayload,
585
707
  },
586
708
  body: body || undefined,
587
- });
709
+ }, timeoutMs, `Paid proxy request for ${model}`);
588
710
  }
589
711
  // ======================================================================
590
712
  // Solana payment handler
591
713
  // ======================================================================
592
- async function handleSolanaPayment(response, url, method, headers, body, privateKey, fromAddress) {
714
+ async function handleSolanaPayment(response, url, method, headers, body, privateKey, fromAddress, timeoutMs = getProxyRequestTimeoutMs(), model = 'unknown') {
593
715
  const paymentHeader = await extractPaymentHeader(response);
594
716
  if (!paymentHeader) {
595
717
  throw new Error('402 Payment Required — wallet may need funding. Run: franklin balance');
@@ -604,14 +726,14 @@ async function handleSolanaPayment(response, url, method, headers, body, private
604
726
  maxTimeoutSeconds: details.maxTimeoutSeconds || 300,
605
727
  extra: details.extra,
606
728
  });
607
- return fetch(url, {
729
+ return fetchWithTimeout(url, {
608
730
  method,
609
731
  headers: {
610
732
  ...headers,
611
733
  'PAYMENT-SIGNATURE': paymentPayload,
612
734
  },
613
735
  body: body || undefined,
614
- });
736
+ }, timeoutMs, `Paid proxy request for ${model}`);
615
737
  }
616
738
  export function classifyRequest(body) {
617
739
  try {
@@ -44,11 +44,11 @@ const AUTO_TIERS = {
44
44
  },
45
45
  MEDIUM: {
46
46
  primary: 'anthropic/claude-sonnet-4.6',
47
- fallback: ['openai/gpt-5.4', 'google/gemini-3.1-pro', 'moonshot/kimi-k2.6'],
47
+ fallback: ['openai/gpt-5.5', 'google/gemini-3.1-pro', 'moonshot/kimi-k2.6'],
48
48
  },
49
49
  COMPLEX: {
50
50
  primary: 'anthropic/claude-sonnet-4.6',
51
- fallback: ['openai/gpt-5.4', 'anthropic/claude-opus-4.7', 'moonshot/kimi-k2.6'],
51
+ fallback: ['openai/gpt-5.5', 'anthropic/claude-opus-4.7', 'moonshot/kimi-k2.6'],
52
52
  },
53
53
  REASONING: {
54
54
  // Opus 4.7: step-change improvement in agentic coding over 4.6 per
@@ -93,7 +93,7 @@ const PREMIUM_TIERS = {
93
93
  },
94
94
  COMPLEX: {
95
95
  primary: 'anthropic/claude-opus-4.7',
96
- fallback: ['anthropic/claude-opus-4.6', 'openai/gpt-5.4', 'anthropic/claude-sonnet-4.6'],
96
+ fallback: ['anthropic/claude-opus-4.6', 'openai/gpt-5.5', 'anthropic/claude-sonnet-4.6'],
97
97
  },
98
98
  REASONING: {
99
99
  primary: 'anthropic/claude-opus-4.7',
@@ -13,7 +13,7 @@ function getSessionsDir() {
13
13
  if (resolvedSessionsDir)
14
14
  return resolvedSessionsDir;
15
15
  const preferred = path.join(BLOCKRUN_DIR, 'sessions');
16
- const fallback = path.join(os.tmpdir(), 'runcode', 'sessions');
16
+ const fallback = path.join(os.tmpdir(), 'franklin', 'sessions');
17
17
  for (const dir of [preferred, fallback]) {
18
18
  try {
19
19
  fs.mkdirSync(dir, { recursive: true });
@@ -41,7 +41,7 @@ function metaPath(id) {
41
41
  }
42
42
  function withWritableSessionDir(action) {
43
43
  const preferred = path.join(BLOCKRUN_DIR, 'sessions');
44
- const fallback = path.join(os.tmpdir(), 'runcode', 'sessions');
44
+ const fallback = path.join(os.tmpdir(), 'franklin', 'sessions');
45
45
  try {
46
46
  action();
47
47
  }
@@ -4,6 +4,20 @@
4
4
  */
5
5
  import type { CapabilityHandler } from '../agent/types.js';
6
6
  import type { ContentLibrary } from '../content/library.js';
7
+ /**
8
+ * Models that accept a reference image via /v1/images/image2image. Currently
9
+ * limited to OpenAI's edit endpoint — Gemini Nano Banana Pro and Grok Imagine
10
+ * Image Pro need gateway-side support before they can be wired in here.
11
+ */
12
+ export declare const EDIT_SUPPORTED_MODELS: Set<string>;
13
+ export declare const REFERENCE_IMAGE_MAX_BYTES = 4000000;
14
+ /**
15
+ * Normalize a reference image into a base64 data URI for the gateway. The
16
+ * /v1/images/image2image endpoint validates `image` against /^data:image\//,
17
+ * so http(s) URLs and local paths both have to be inlined client-side before
18
+ * posting. Already-formed data URIs pass through.
19
+ */
20
+ export declare function resolveReferenceImage(input: string, workingDir: string): Promise<string>;
7
21
  export interface ImageGenDeps {
8
22
  /** Optional Content library for auto-recording generations into a piece. */
9
23
  library?: ContentLibrary;
@@ -9,13 +9,89 @@ import { loadChain, API_URLS, VERSION } from '../config.js';
9
9
  import { checkImageBudget, recordImageAsset } from '../content/record-image.js';
10
10
  import { ModelClient } from '../agent/llm.js';
11
11
  import { analyzeMediaRequest, renderProposalForAskUser } from '../agent/media-router.js';
12
+ import { recordUsage } from '../stats/tracker.js';
13
+ import { findModel, estimateCostUsd } from '../gateway-models.js';
14
+ /**
15
+ * Models that accept a reference image via /v1/images/image2image. Currently
16
+ * limited to OpenAI's edit endpoint — Gemini Nano Banana Pro and Grok Imagine
17
+ * Image Pro need gateway-side support before they can be wired in here.
18
+ */
19
+ export const EDIT_SUPPORTED_MODELS = new Set([
20
+ 'openai/gpt-image-1',
21
+ 'openai/gpt-image-2',
22
+ ]);
23
+ export const REFERENCE_IMAGE_MAX_BYTES = 4_000_000;
24
+ /**
25
+ * Normalize a reference image into a base64 data URI for the gateway. The
26
+ * /v1/images/image2image endpoint validates `image` against /^data:image\//,
27
+ * so http(s) URLs and local paths both have to be inlined client-side before
28
+ * posting. Already-formed data URIs pass through.
29
+ */
30
+ export async function resolveReferenceImage(input, workingDir) {
31
+ if (input.startsWith('data:image/'))
32
+ return input;
33
+ if (/^https?:\/\//i.test(input)) {
34
+ const ctrl = new AbortController();
35
+ const timeout = setTimeout(() => ctrl.abort(), 30_000);
36
+ try {
37
+ const resp = await fetch(input, { signal: ctrl.signal });
38
+ if (!resp.ok) {
39
+ throw new Error(`Reference image fetch failed: ${resp.status} ${resp.statusText}`);
40
+ }
41
+ const contentType = (resp.headers.get('content-type') || '').toLowerCase().split(';')[0].trim();
42
+ if (!contentType.startsWith('image/')) {
43
+ throw new Error(`Reference image URL returned non-image content-type: ${contentType || '(none)'}`);
44
+ }
45
+ const buf = Buffer.from(await resp.arrayBuffer());
46
+ if (buf.byteLength > REFERENCE_IMAGE_MAX_BYTES) {
47
+ throw new Error(`Reference image too large: ${(buf.byteLength / 1_000_000).toFixed(1)}MB > ${(REFERENCE_IMAGE_MAX_BYTES / 1_000_000).toFixed(1)}MB cap.`);
48
+ }
49
+ return `data:${contentType};base64,${buf.toString('base64')}`;
50
+ }
51
+ finally {
52
+ clearTimeout(timeout);
53
+ }
54
+ }
55
+ // Treat as local file path.
56
+ const resolved = path.isAbsolute(input) ? input : path.resolve(workingDir, input);
57
+ const stat = fs.statSync(resolved);
58
+ if (stat.size > REFERENCE_IMAGE_MAX_BYTES) {
59
+ throw new Error(`Reference image too large: ${(stat.size / 1_000_000).toFixed(1)}MB > ${(REFERENCE_IMAGE_MAX_BYTES / 1_000_000).toFixed(1)}MB cap. Resize or crop first.`);
60
+ }
61
+ const ext = path.extname(resolved).toLowerCase();
62
+ const mimeMap = {
63
+ '.png': 'image/png',
64
+ '.jpg': 'image/jpeg',
65
+ '.jpeg': 'image/jpeg',
66
+ '.gif': 'image/gif',
67
+ '.webp': 'image/webp',
68
+ };
69
+ const mime = mimeMap[ext];
70
+ if (!mime) {
71
+ throw new Error(`Unsupported reference image extension ${ext || '(none)'}. Use .png/.jpg/.jpeg/.gif/.webp.`);
72
+ }
73
+ const bytes = fs.readFileSync(resolved);
74
+ return `data:${mime};base64,${bytes.toString('base64')}`;
75
+ }
12
76
  function buildExecute(deps) {
13
77
  return async function execute(input, ctx) {
14
78
  const rawInput = input;
15
- const { output_path, size, model, contentId } = rawInput;
79
+ const { output_path, size, model, contentId, image_url } = rawInput;
16
80
  if (!rawInput.prompt) {
17
81
  return { output: 'Error: prompt is required', isError: true };
18
82
  }
83
+ // Resolve the reference image (if any) before any paid call so we fail
84
+ // cheaply on bad paths / oversize attachments. Holds the resolved data URI
85
+ // / http URL that gets posted to /v1/images/image2image.
86
+ let referenceImage;
87
+ if (image_url) {
88
+ try {
89
+ referenceImage = await resolveReferenceImage(image_url, ctx.workingDir);
90
+ }
91
+ catch (err) {
92
+ return { output: `Error: ${err.message}`, isError: true };
93
+ }
94
+ }
19
95
  // One-shot refinement opt-out: leading `///` tells Franklin "don't
20
96
  // refine this prompt, I wrote it the way I want it." Strip the prefix
21
97
  // and pass skipRefine through to the router.
@@ -31,11 +107,26 @@ function buildExecute(deps) {
31
107
  // step and use the old default. Otherwise: classifier picks a fitting
32
108
  // model + rewrites the prompt, the preview goes to AskUser, user
33
109
  // chooses or cancels.
34
- let imageModel = model || 'openai/gpt-image-1';
110
+ // Reference-image mode forces an edit-capable model. If the caller named
111
+ // an unsupported one, fail loudly so we don't silently downgrade their
112
+ // request to text-only generation.
113
+ if (referenceImage && model && !EDIT_SUPPORTED_MODELS.has(model)) {
114
+ return {
115
+ output: `Error: model ${model} does not support reference images. ` +
116
+ `Use one of: ${[...EDIT_SUPPORTED_MODELS].join(', ')}.`,
117
+ isError: true,
118
+ };
119
+ }
120
+ let imageModel = model || (referenceImage ? 'openai/gpt-image-2' : 'openai/gpt-image-1');
35
121
  const imageSize = size || '1024x1024';
36
122
  let chosenPrompt = prompt;
123
+ // Skip the proposal flow when a reference image is set: the media router
124
+ // doesn't know which models support image-to-image, so its suggestions
125
+ // would frequently be unusable (text-only models). Default to gpt-image-1
126
+ // for now; a future router upgrade can pick between the four edit-capable
127
+ // models based on the prompt.
37
128
  const autoApprove = process.env.FRANKLIN_MEDIA_AUTO_APPROVE_ALL === '1';
38
- if (!model && !autoApprove && ctx.onAskUser) {
129
+ if (!model && !autoApprove && ctx.onAskUser && !referenceImage) {
39
130
  try {
40
131
  const chain = loadChain();
41
132
  const client = new ModelClient({ apiUrl: API_URLS[chain], chain });
@@ -95,18 +186,30 @@ function buildExecute(deps) {
95
186
  }
96
187
  const chain = loadChain();
97
188
  const apiUrl = API_URLS[chain];
98
- const endpoint = `${apiUrl}/v1/images/generations`;
189
+ // Reference-image mode hits the dedicated /v1/images/image2image endpoint;
190
+ // otherwise stay on text-to-image generations.
191
+ const endpoint = referenceImage
192
+ ? `${apiUrl}/v1/images/image2image`
193
+ : `${apiUrl}/v1/images/generations`;
99
194
  // Default output path
100
195
  const outPath = output_path
101
196
  ? (path.isAbsolute(output_path) ? output_path : path.resolve(ctx.workingDir, output_path))
102
197
  : path.resolve(ctx.workingDir, `generated-${Date.now()}.png`);
103
- const body = JSON.stringify({
104
- model: imageModel,
105
- prompt: chosenPrompt,
106
- n: 1,
107
- size: imageSize,
108
- response_format: 'b64_json',
109
- });
198
+ const body = JSON.stringify(referenceImage
199
+ ? {
200
+ model: imageModel,
201
+ prompt: chosenPrompt,
202
+ image: referenceImage,
203
+ size: imageSize,
204
+ n: 1,
205
+ }
206
+ : {
207
+ model: imageModel,
208
+ prompt: chosenPrompt,
209
+ n: 1,
210
+ size: imageSize,
211
+ response_format: 'b64_json',
212
+ });
110
213
  const headers = {
111
214
  'Content-Type': 'application/json',
112
215
  'User-Agent': `franklin/${VERSION}`,
@@ -125,7 +228,7 @@ function buildExecute(deps) {
125
228
  if (response.status === 402) {
126
229
  const paymentHeaders = await signPayment(response, chain, endpoint);
127
230
  if (!paymentHeaders) {
128
- return { output: 'Payment failed. Check wallet balance with: runcode balance', isError: true };
231
+ return { output: 'Payment failed. Check wallet balance with: franklin balance', isError: true };
129
232
  }
130
233
  response = await fetch(endpoint, {
131
234
  method: 'POST',
@@ -143,12 +246,23 @@ function buildExecute(deps) {
143
246
  if (!imageData) {
144
247
  return { output: 'No image data returned from API', isError: true };
145
248
  }
146
- // Save image
249
+ // Save image. The /v1/images/image2image endpoint returns Gemini results
250
+ // as a data URI in `url`, so decode those locally instead of going through
251
+ // fetch — saves a network round-trip and avoids data:-URI fetch quirks.
147
252
  if (imageData.b64_json) {
148
253
  const buffer = Buffer.from(imageData.b64_json, 'base64');
149
254
  fs.mkdirSync(path.dirname(outPath), { recursive: true });
150
255
  fs.writeFileSync(outPath, buffer);
151
256
  }
257
+ else if (imageData.url && imageData.url.startsWith('data:')) {
258
+ const match = imageData.url.match(/^data:[^;]+;base64,(.+)$/);
259
+ if (!match) {
260
+ return { output: 'Malformed data URI in response', isError: true };
261
+ }
262
+ const buffer = Buffer.from(match[1], 'base64');
263
+ fs.mkdirSync(path.dirname(outPath), { recursive: true });
264
+ fs.writeFileSync(outPath, buffer);
265
+ }
152
266
  else if (imageData.url) {
153
267
  // Download from URL (with 30s timeout)
154
268
  const dlCtrl = new AbortController();
@@ -165,6 +279,20 @@ function buildExecute(deps) {
165
279
  const fileSize = fs.statSync(outPath).size;
166
280
  const sizeKB = (fileSize / 1024).toFixed(1);
167
281
  const revisedPrompt = imageData.revised_prompt ? `\nRevised prompt: ${imageData.revised_prompt}` : '';
282
+ // Stats: record this generation so it shows up in `franklin insights`
283
+ // alongside chat spend. Before this, media generations bypassed
284
+ // recordUsage entirely (only LLM chat calls were tracked), so the
285
+ // insights panel under-reported total spend and never surfaced
286
+ // image-generation models in its "top models" list. Fire-and-forget —
287
+ // stats write must not fail a user-visible generation.
288
+ void (async () => {
289
+ try {
290
+ const m = await findModel(imageModel);
291
+ const estCost = m ? estimateCostUsd(m, { quantity: 1 }) : 0;
292
+ recordUsage(imageModel, 0, 0, estCost, 0);
293
+ }
294
+ catch { /* ignore stats errors */ }
295
+ })();
168
296
  let contentSummary = '';
169
297
  if (contentId && deps.library) {
170
298
  const rec = recordImageAsset(deps.library, {
@@ -225,7 +353,7 @@ async function signPayment(response, chain, endpoint) {
225
353
  const feePayer = details.extra?.feePayer || details.recipient;
226
354
  const payload = await createSolanaPaymentPayload(secretBytes, wallet.address, details.recipient, details.amount, feePayer, {
227
355
  resourceUrl: details.resource?.url || endpoint,
228
- resourceDescription: details.resource?.description || 'RunCode image generation',
356
+ resourceDescription: details.resource?.description || 'Franklin image generation',
229
357
  maxTimeoutSeconds: details.maxTimeoutSeconds || 300,
230
358
  extra: details.extra,
231
359
  });
@@ -237,7 +365,7 @@ async function signPayment(response, chain, endpoint) {
237
365
  const details = extractPaymentDetails(paymentRequired);
238
366
  const payload = await createPaymentPayload(wallet.privateKey, wallet.address, details.recipient, details.amount, details.network || 'eip155:8453', {
239
367
  resourceUrl: details.resource?.url || endpoint,
240
- resourceDescription: details.resource?.description || 'RunCode image generation',
368
+ resourceDescription: details.resource?.description || 'Franklin image generation',
241
369
  maxTimeoutSeconds: details.maxTimeoutSeconds || 300,
242
370
  extra: details.extra,
243
371
  });
@@ -272,13 +400,16 @@ export function createImageGenCapability(deps = {}) {
272
400
  return {
273
401
  spec: {
274
402
  name: 'ImageGen',
275
- description: "Generate an image from a text prompt. Costs USDC from the user's wallet " +
276
- " confirm before generating. Saves to a local file. Default size: " +
277
- "1024x1024. Do NOT call repeatedly to iterate on style ask the user " +
278
- "first. Pass contentId to attach the result to an existing Content " +
279
- "piece: the content's budget is checked BEFORE paying, and on success " +
280
- "the image is recorded as an asset with its estimated cost. Skipping " +
281
- "contentId generates a one-off image with no budget tracking.",
403
+ description: "Generate an image from a text prompt optionally with a reference " +
404
+ "image for style transfer / character consistency / edits. Costs USDC " +
405
+ "from the user's wallet confirm before generating. Saves to a local " +
406
+ "file. Default size: 1024x1024. Do NOT call repeatedly to iterate on " +
407
+ "style — ask the user first. Pass contentId to attach the result to " +
408
+ "an existing Content piece: the content's budget is checked BEFORE " +
409
+ "paying, and on success the image is recorded as an asset with its " +
410
+ "estimated cost. Skipping contentId generates a one-off image with no " +
411
+ "budget tracking. When image_url is set, only edit-capable models " +
412
+ "(openai/gpt-image-1, openai/gpt-image-2) are accepted.",
282
413
  input_schema: {
283
414
  type: 'object',
284
415
  properties: {
@@ -286,6 +417,7 @@ export function createImageGenCapability(deps = {}) {
286
417
  output_path: { type: 'string', description: 'Where to save the image. Default: generated-<timestamp>.png in working directory' },
287
418
  size: { type: 'string', description: 'Image size: 1024x1024, 1792x1024, or 1024x1792. Default: 1024x1024' },
288
419
  model: { type: 'string', description: 'Image model to use. Default: openai/gpt-image-1' },
420
+ image_url: { type: 'string', description: 'Optional reference image (image-to-image / style transfer). Accepts an http(s) URL, a data URI, or a local file path. Only works with edit-capable models.' },
289
421
  contentId: { type: 'string', description: 'Optional Content id to attach this generation to. Pre-flight budget check + auto-record on success.' },
290
422
  },
291
423
  required: ['prompt'],
@@ -84,7 +84,34 @@ async function execute(input, ctx) {
84
84
  // (some binaries have no extension: `.env.enc`, `.data`, compiled tools
85
85
  // without suffixes, etc. Content sniff catches those.)
86
86
  const ext = path.extname(resolved).toLowerCase();
87
- const binaryExts = new Set(['.png', '.jpg', '.jpeg', '.gif', '.webp', '.ico', '.bmp', '.pdf', '.zip', '.tar', '.gz', '.woff', '.woff2', '.ttf', '.eot', '.mp3', '.mp4', '.wav', '.avi', '.mov', '.exe', '.dll', '.so', '.dylib']);
87
+ // Image extensions load as vision content so models with vision (Sonnet,
88
+ // GPT-4o, Gemini) actually see the bytes instead of a "Binary file" stub.
89
+ // The agent loop wraps `images` into tool_result.content for provider APIs.
90
+ const IMAGE_MEDIA_TYPES = {
91
+ '.png': 'image/png',
92
+ '.jpg': 'image/jpeg',
93
+ '.jpeg': 'image/jpeg',
94
+ '.gif': 'image/gif',
95
+ '.webp': 'image/webp',
96
+ };
97
+ if (IMAGE_MEDIA_TYPES[ext]) {
98
+ const sizeStr = stat.size >= 1024 ? `${(stat.size / 1024).toFixed(1)}KB` : `${stat.size}B`;
99
+ // Anthropic accepts up to 5MB base64; cap raw bytes at ~3.75MB to be safe.
100
+ const IMAGE_MAX_BYTES = 3_750_000;
101
+ if (stat.size > IMAGE_MAX_BYTES) {
102
+ return {
103
+ output: `Image file: ${resolved} (${ext}, ${sizeStr}). Too large to inline for vision (>${Math.round(IMAGE_MAX_BYTES / 1_000_000)}MB). Resize or crop first.`,
104
+ };
105
+ }
106
+ const bytes = fs.readFileSync(resolved);
107
+ const base64 = bytes.toString('base64');
108
+ fileReadTracker.set(resolved, { mtimeMs: stat.mtimeMs, readAt: Date.now() });
109
+ return {
110
+ output: `Image file: ${resolved} (${ext}, ${sizeStr}). Rendered below for vision-capable models.`,
111
+ images: [{ mediaType: IMAGE_MEDIA_TYPES[ext], base64 }],
112
+ };
113
+ }
114
+ const binaryExts = new Set(['.ico', '.bmp', '.pdf', '.zip', '.tar', '.gz', '.woff', '.woff2', '.ttf', '.eot', '.mp3', '.mp4', '.wav', '.avi', '.mov', '.exe', '.dll', '.so', '.dylib']);
88
115
  if (binaryExts.has(ext)) {
89
116
  const sizeStr = stat.size >= 1024 ? `${(stat.size / 1024).toFixed(1)}KB` : `${stat.size}B`;
90
117
  return { output: `Binary file: ${resolved} (${ext}, ${sizeStr}). Cannot display contents.` };
@@ -163,7 +190,7 @@ Usage:
163
190
  - This tool can only read files, not directories. To list a directory, use Glob or ls via Bash.
164
191
  - If you read a file that exists but has empty contents you will receive a warning.
165
192
  - Reads over 2MB are rejected — use offset/limit to read portions.
166
- - Cannot read binary files (images, PDFs, archives).
193
+ - Image files (.png, .jpg, .jpeg, .gif, .webp) are loaded as vision content — vision-capable models see the actual image. Other binary files (PDFs, archives, fonts) cannot be displayed.
167
194
  - You will regularly be asked to read screenshots or images. If the user provides a path, ALWAYS use this tool to view it.
168
195
 
169
196
  IMPORTANT: Always use Read instead of cat, head, or tail via Bash. This tool provides line numbers and integrates with Edit's read-before-edit enforcement.`,