@steipete/summarize 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/CHANGELOG.md +73 -5
  2. package/README.md +116 -18
  3. package/dist/cli.cjs +8136 -4368
  4. package/dist/cli.cjs.map +4 -4
  5. package/dist/esm/cli-main.js +47 -2
  6. package/dist/esm/cli-main.js.map +1 -1
  7. package/dist/esm/config.js +368 -3
  8. package/dist/esm/config.js.map +1 -1
  9. package/dist/esm/content/link-preview/content/index.js +13 -0
  10. package/dist/esm/content/link-preview/content/index.js.map +1 -1
  11. package/dist/esm/content/link-preview/content/utils.js +3 -1
  12. package/dist/esm/content/link-preview/content/utils.js.map +1 -1
  13. package/dist/esm/content/link-preview/content/video.js +96 -0
  14. package/dist/esm/content/link-preview/content/video.js.map +1 -0
  15. package/dist/esm/content/link-preview/transcript/providers/youtube/captions.js +21 -21
  16. package/dist/esm/content/link-preview/transcript/providers/youtube/captions.js.map +1 -1
  17. package/dist/esm/costs.js.map +1 -1
  18. package/dist/esm/flags.js +23 -0
  19. package/dist/esm/flags.js.map +1 -1
  20. package/dist/esm/generate-free.js +616 -0
  21. package/dist/esm/generate-free.js.map +1 -0
  22. package/dist/esm/llm/cli.js +290 -0
  23. package/dist/esm/llm/cli.js.map +1 -0
  24. package/dist/esm/llm/generate-text.js +159 -105
  25. package/dist/esm/llm/generate-text.js.map +1 -1
  26. package/dist/esm/llm/html-to-markdown.js +4 -2
  27. package/dist/esm/llm/html-to-markdown.js.map +1 -1
  28. package/dist/esm/model-auto.js +353 -0
  29. package/dist/esm/model-auto.js.map +1 -0
  30. package/dist/esm/model-spec.js +82 -0
  31. package/dist/esm/model-spec.js.map +1 -0
  32. package/dist/esm/prompts/cli.js +18 -0
  33. package/dist/esm/prompts/cli.js.map +1 -0
  34. package/dist/esm/prompts/file.js +4 -4
  35. package/dist/esm/prompts/file.js.map +1 -1
  36. package/dist/esm/prompts/index.js +1 -0
  37. package/dist/esm/prompts/index.js.map +1 -1
  38. package/dist/esm/prompts/link-summary.js +3 -8
  39. package/dist/esm/prompts/link-summary.js.map +1 -1
  40. package/dist/esm/refresh-free.js +667 -0
  41. package/dist/esm/refresh-free.js.map +1 -0
  42. package/dist/esm/run.js +1384 -532
  43. package/dist/esm/run.js.map +1 -1
  44. package/dist/esm/version.js +1 -1
  45. package/dist/types/config.d.ts +58 -5
  46. package/dist/types/content/link-preview/content/types.d.ts +10 -0
  47. package/dist/types/content/link-preview/content/utils.d.ts +1 -1
  48. package/dist/types/content/link-preview/content/video.d.ts +5 -0
  49. package/dist/types/costs.d.ts +2 -1
  50. package/dist/types/flags.d.ts +3 -0
  51. package/dist/types/generate-free.d.ts +17 -0
  52. package/dist/types/llm/cli.d.ts +24 -0
  53. package/dist/types/llm/generate-text.d.ts +13 -4
  54. package/dist/types/llm/html-to-markdown.d.ts +9 -3
  55. package/dist/types/model-auto.d.ts +23 -0
  56. package/dist/types/model-spec.d.ts +33 -0
  57. package/dist/types/prompts/cli.d.ts +8 -0
  58. package/dist/types/prompts/index.d.ts +1 -0
  59. package/dist/types/refresh-free.d.ts +19 -0
  60. package/dist/types/version.d.ts +1 -1
  61. package/docs/README.md +3 -0
  62. package/docs/cli.md +95 -0
  63. package/docs/config.md +123 -1
  64. package/docs/llm.md +24 -4
  65. package/docs/manual-tests.md +40 -0
  66. package/docs/model-auto.md +92 -0
  67. package/docs/site/assets/site.js +20 -17
  68. package/docs/smoketest.md +58 -0
  69. package/docs/website.md +3 -1
  70. package/package.json +8 -4
  71. package/dist/esm/content/link-preview/transcript/providers/twitter.js +0 -12
  72. package/dist/esm/content/link-preview/transcript/providers/twitter.js.map +0 -1
  73. package/dist/esm/content/link-preview/transcript/providers/youtube/ytdlp.js +0 -114
  74. package/dist/esm/content/link-preview/transcript/providers/youtube/ytdlp.js.map +0 -1
  75. package/dist/esm/summarizeHome.js +0 -20
  76. package/dist/esm/summarizeHome.js.map +0 -1
  77. package/dist/esm/tty/live-markdown.js +0 -52
  78. package/dist/esm/tty/live-markdown.js.map +0 -1
  79. package/dist/types/content/link-preview/transcript/providers/twitter.d.ts +0 -3
  80. package/dist/types/content/link-preview/transcript/providers/youtube/ytdlp.d.ts +0 -3
  81. package/dist/types/summarizeHome.d.ts +0 -6
  82. package/dist/types/tty/live-markdown.d.ts +0 -10
package/dist/esm/run.js CHANGED
@@ -1,24 +1,31 @@
1
1
  import { execFile } from 'node:child_process';
2
2
  import { accessSync, constants as fsConstants } from 'node:fs';
3
3
  import fs from 'node:fs/promises';
4
+ import { tmpdir } from 'node:os';
4
5
  import path from 'node:path';
5
6
  import { Command, CommanderError, Option } from 'commander';
6
7
  import { countTokens } from 'gpt-tokenizer';
7
8
  import { createLiveRenderer, render as renderMarkdownAnsi } from 'markdansi';
9
+ import mime from 'mime';
8
10
  import { normalizeTokenUsage, tallyCosts } from 'tokentally';
9
11
  import { loadSummarizeConfig } from './config.js';
10
12
  import { buildAssetPromptMessages, classifyUrl, loadLocalAsset, loadRemoteAsset, resolveInputTarget, } from './content/asset.js';
11
13
  import { createLinkPreviewClient } from './content/index.js';
14
+ import { fetchWithTimeout } from './content/link-preview/fetch-with-timeout.js';
12
15
  import { buildRunMetricsReport } from './costs.js';
13
16
  import { createFirecrawlScraper } from './firecrawl.js';
14
- import { parseDurationMs, parseExtractFormat, parseFirecrawlMode, parseLengthArg, parseMarkdownMode, parseMaxOutputTokensArg, parseMetricsMode, parsePreprocessMode, parseRenderMode, parseStreamMode, parseYoutubeMode, } from './flags.js';
17
+ import { parseDurationMs, parseExtractFormat, parseFirecrawlMode, parseLengthArg, parseMarkdownMode, parseMaxOutputTokensArg, parseMetricsMode, parsePreprocessMode, parseRenderMode, parseRetriesArg, parseStreamMode, parseVideoMode, parseYoutubeMode, } from './flags.js';
18
+ import { isCliDisabled, resolveCliBinary, runCliModel } from './llm/cli.js';
15
19
  import { generateTextWithModelId, streamTextWithModelId } from './llm/generate-text.js';
16
20
  import { resolveGoogleModelForUsage } from './llm/google-models.js';
17
21
  import { createHtmlToMarkdownConverter } from './llm/html-to-markdown.js';
18
- import { normalizeGatewayStyleModelId, parseGatewayStyleModelId } from './llm/model-id.js';
22
+ import { parseGatewayStyleModelId } from './llm/model-id.js';
19
23
  import { convertToMarkdownWithMarkitdown } from './markitdown.js';
24
+ import { buildAutoModelAttempts } from './model-auto.js';
25
+ import { parseRequestedModelId } from './model-spec.js';
20
26
  import { loadLiteLlmCatalog, resolveLiteLlmMaxInputTokensForModelId, resolveLiteLlmMaxOutputTokensForModelId, resolveLiteLlmPricingForModelId, } from './pricing/litellm.js';
21
- import { buildFileSummaryPrompt, buildFileTextSummaryPrompt, buildLinkSummaryPrompt, } from './prompts/index.js';
27
+ import { buildFileSummaryPrompt, buildFileTextSummaryPrompt, buildLinkSummaryPrompt, buildPathSummaryPrompt, } from './prompts/index.js';
28
+ import { refreshFree } from './refresh-free.js';
22
29
  import { startOscProgress } from './tty/osc-progress.js';
23
30
  import { startSpinner } from './tty/spinner.js';
24
31
  import { resolvePackageVersion } from './version.js';
@@ -32,6 +39,61 @@ const SUMMARY_LENGTH_MAX_CHARACTERS = {
32
39
  xl: 14000,
33
40
  xxl: Number.POSITIVE_INFINITY,
34
41
  };
42
+ function truncateList(items, max) {
43
+ const normalized = items.map((item) => item.trim()).filter(Boolean);
44
+ if (normalized.length <= max)
45
+ return normalized.join(', ');
46
+ return `${normalized.slice(0, max).join(', ')} (+${normalized.length - max} more)`;
47
+ }
48
+ function parseOpenRouterModelId(modelId) {
49
+ const normalized = modelId.trim();
50
+ if (!normalized.startsWith('openrouter/'))
51
+ return null;
52
+ const rest = normalized.slice('openrouter/'.length);
53
+ const [author, ...slugParts] = rest.split('/');
54
+ if (!author || slugParts.length === 0)
55
+ return null;
56
+ return { author, slug: slugParts.join('/') };
57
+ }
58
+ async function resolveOpenRouterProvidersForModels({ modelIds, fetchImpl, timeoutMs, }) {
59
+ const results = new Map();
60
+ const unique = Array.from(new Set(modelIds.map((id) => id.trim()).filter(Boolean)));
61
+ await Promise.all(unique.map(async (modelId) => {
62
+ const parsed = parseOpenRouterModelId(modelId);
63
+ if (!parsed)
64
+ return;
65
+ const url = `https://openrouter.ai/api/v1/models/${encodeURIComponent(parsed.author)}/${encodeURIComponent(parsed.slug)}/endpoints`;
66
+ try {
67
+ const response = await fetchWithTimeout(fetchImpl, url, { headers: { Accept: 'application/json' } }, Math.min(timeoutMs, 15_000));
68
+ if (!response.ok)
69
+ return;
70
+ const payload = (await response.json());
71
+ const endpoints = Array.isArray(payload.data?.endpoints) ? payload.data?.endpoints : [];
72
+ const providers = endpoints
73
+ .map((endpoint) => endpoint && typeof endpoint.provider_name === 'string'
74
+ ? endpoint.provider_name.trim()
75
+ : null)
76
+ .filter((value) => Boolean(value));
77
+ const uniqueProviders = Array.from(new Set(providers)).sort((a, b) => a.localeCompare(b));
78
+ if (uniqueProviders.length > 0)
79
+ results.set(modelId, uniqueProviders);
80
+ }
81
+ catch {
82
+ // best-effort only
83
+ }
84
+ }));
85
+ return results;
86
+ }
87
+ async function buildOpenRouterNoAllowedProvidersMessage({ attempts, fetchImpl, timeoutMs, }) {
88
+ const modelIds = attempts
89
+ .map((attempt) => attempt.userModelId)
90
+ .filter((id) => id.startsWith('openrouter/'));
91
+ const tried = truncateList(modelIds, 6);
92
+ const providerMap = await resolveOpenRouterProvidersForModels({ modelIds, fetchImpl, timeoutMs });
93
+ const allProviders = Array.from(new Set(Array.from(providerMap.values()).flat())).sort((a, b) => a.localeCompare(b));
94
+ const providersHint = allProviders.length > 0 ? ` Providers to allow: ${truncateList(allProviders, 10)}.` : '';
95
+ return `OpenRouter could not route any models with this API key (no allowed providers). Tried: ${tried}.${providersHint} Hint: increase --timeout (e.g. 10m) and/or use --debug/--verbose to see per-model failures. (OpenRouter: Settings → API Keys → edit key → Allowed providers.)`;
96
+ }
35
97
  function resolveTargetCharacters(lengthArg) {
36
98
  return lengthArg.kind === 'chars'
37
99
  ? lengthArg.maxCharacters
@@ -58,28 +120,63 @@ function isExecutable(filePath) {
58
120
  return false;
59
121
  }
60
122
  }
61
- function hasBirdCli(env) {
62
- const candidates = [];
123
+ function resolveExecutableInPath(binary, env) {
124
+ if (!binary)
125
+ return null;
126
+ if (path.isAbsolute(binary)) {
127
+ return isExecutable(binary) ? binary : null;
128
+ }
63
129
  const pathEnv = env.PATH ?? '';
64
130
  for (const entry of pathEnv.split(path.delimiter)) {
65
131
  if (!entry)
66
132
  continue;
67
- candidates.push(path.join(entry, 'bird'));
133
+ const candidate = path.join(entry, binary);
134
+ if (isExecutable(candidate))
135
+ return candidate;
68
136
  }
69
- return candidates.some((candidate) => isExecutable(candidate));
137
+ return null;
138
+ }
139
+ function hasBirdCli(env) {
140
+ return resolveExecutableInPath('bird', env) !== null;
70
141
  }
71
142
  function hasUvxCli(env) {
72
143
  if (typeof env.UVX_PATH === 'string' && env.UVX_PATH.trim().length > 0) {
73
144
  return true;
74
145
  }
75
- const candidates = [];
76
- const pathEnv = env.PATH ?? '';
77
- for (const entry of pathEnv.split(path.delimiter)) {
78
- if (!entry)
146
+ return resolveExecutableInPath('uvx', env) !== null;
147
+ }
148
+ function resolveCliAvailability({ env, config, }) {
149
+ const cliConfig = config?.cli ?? null;
150
+ const providers = ['claude', 'codex', 'gemini'];
151
+ const availability = {};
152
+ for (const provider of providers) {
153
+ if (isCliDisabled(provider, cliConfig)) {
154
+ availability[provider] = false;
79
155
  continue;
80
- candidates.push(path.join(entry, 'uvx'));
156
+ }
157
+ const binary = resolveCliBinary(provider, cliConfig, env);
158
+ availability[provider] = resolveExecutableInPath(binary, env) !== null;
159
+ }
160
+ return availability;
161
+ }
162
+ function parseCliUserModelId(modelId) {
163
+ const parts = modelId
164
+ .trim()
165
+ .split('/')
166
+ .map((part) => part.trim());
167
+ const provider = parts[1]?.toLowerCase();
168
+ if (provider !== 'claude' && provider !== 'codex' && provider !== 'gemini') {
169
+ throw new Error(`Invalid CLI model id "${modelId}". Expected cli/<provider>/<model>.`);
170
+ }
171
+ const model = parts.slice(2).join('/').trim();
172
+ return { provider, model: model.length > 0 ? model : null };
173
+ }
174
+ function parseCliProviderArg(raw) {
175
+ const normalized = raw.trim().toLowerCase();
176
+ if (normalized === 'claude' || normalized === 'codex' || normalized === 'gemini') {
177
+ return normalized;
81
178
  }
82
- return candidates.some((candidate) => isExecutable(candidate));
179
+ throw new Error(`Unsupported --cli: ${raw}`);
83
180
  }
84
181
  async function readTweetWithBird(args) {
85
182
  return await new Promise((resolve, reject) => {
@@ -132,12 +229,33 @@ function withUvxTip(error, env) {
132
229
  return error instanceof Error ? new Error(combined, { cause: error }) : new Error(combined);
133
230
  }
134
231
  const MAX_TEXT_BYTES_DEFAULT = 10 * 1024 * 1024;
232
+ const BUILTIN_MODELS = {
233
+ free: {
234
+ mode: 'auto',
235
+ rules: [
236
+ {
237
+ candidates: [
238
+ // Snapshot (2025-12-23): generated via `summarize refresh-free`.
239
+ 'openrouter/xiaomi/mimo-v2-flash:free',
240
+ 'openrouter/mistralai/devstral-2512:free',
241
+ 'openrouter/qwen/qwen3-coder:free',
242
+ 'openrouter/kwaipilot/kat-coder-pro:free',
243
+ 'openrouter/moonshotai/kimi-k2:free',
244
+ 'openrouter/nex-agi/deepseek-v3.1-nex-n1:free',
245
+ ],
246
+ },
247
+ ],
248
+ },
249
+ };
135
250
  function buildProgram() {
136
251
  return new Command()
137
252
  .name('summarize')
138
253
  .description('Summarize web pages and YouTube links (uses direct provider API keys).')
139
254
  .argument('[input]', 'URL or local file path to summarize')
140
255
  .option('--youtube <mode>', 'YouTube transcript source: auto, web (youtubei/captionTracks), yt-dlp (audio+whisper), apify', 'auto')
256
+ .addOption(new Option('--video-mode <mode>', 'Video handling: auto (prefer video understanding if supported), transcript, understand.')
257
+ .choices(['auto', 'transcript', 'understand'])
258
+ .default('auto'))
141
259
  .option('--firecrawl <mode>', 'Firecrawl usage: off, auto (fallback), always (try Firecrawl first). Note: in --format md website mode, defaults to always when FIRECRAWL_API_KEY is set (unless --firecrawl is set explicitly).', 'auto')
142
260
  .option('--format <format>', 'Website/file content format: md|text. For websites: controls the extraction format. For files: controls whether we try to preprocess to Markdown for model compatibility. (default: text)', 'text')
143
261
  .addOption(new Option('--preprocess <mode>', 'Preprocess inputs for model compatibility: off, auto (fallback), always.')
@@ -145,16 +263,19 @@ function buildProgram() {
145
263
  .default('auto'))
146
264
  .addOption(new Option('--markdown-mode <mode>', 'HTML→Markdown conversion: off, auto (prefer Firecrawl when configured, then LLM when configured, then markitdown when available), llm (force LLM). Only affects --format md for non-YouTube URLs.').default('auto'))
147
265
  .addOption(new Option('--markdown <mode>', 'Deprecated alias for --markdown-mode (use --extract --format md --markdown-mode ...)').hideHelp())
148
- .option('--length <length>', 'Summary length: short|medium|long|xl|xxl or a character limit like 20000, 20k', 'medium')
266
+ .option('--length <length>', 'Summary length: short|medium|long|xl|xxl or a character limit like 20000, 20k', 'xl')
149
267
  .option('--max-output-tokens <count>', 'Hard cap for LLM output tokens (e.g. 2000, 2k). Overrides provider defaults.', undefined)
150
268
  .option('--timeout <duration>', 'Timeout for content fetching and LLM request: 30 (seconds), 30s, 2m, 5000ms', '2m')
151
- .option('--model <model>', 'LLM model id (gateway-style): xai/..., openai/..., google/... (default: google/gemini-3-flash-preview)', undefined)
269
+ .option('--retries <count>', 'LLM retry attempts on timeout (default: 1).', '1')
270
+ .option('--model <model>', 'LLM model id: auto, <name>, cli/<provider>/<model>, xai/..., openai/..., google/..., anthropic/... or openrouter/<author>/<slug> (default: auto)', undefined)
271
+ .addOption(new Option('--cli [provider]', 'Use a CLI provider: claude, gemini, codex (equivalent to --model cli/<provider>). If omitted, use auto selection with CLI enabled.'))
152
272
  .option('--extract', 'Print extracted content and exit (no LLM summary)', false)
153
273
  .addOption(new Option('--extract-only', 'Deprecated alias for --extract').hideHelp())
154
274
  .option('--json', 'Output structured JSON (includes prompt + metrics)', false)
155
275
  .option('--stream <mode>', 'Stream LLM output: auto (TTY only), on, off. Note: streaming is disabled in --json mode.', 'auto')
156
276
  .option('--render <mode>', 'Render Markdown output: auto (TTY only), md-live, md, plain. Note: auto selects md-live when streaming to a TTY.', 'auto')
157
277
  .option('--verbose', 'Print detailed progress info to stderr', false)
278
+ .option('--debug', 'Alias for --verbose (and defaults --metrics to detailed)', false)
158
279
  .addOption(new Option('--metrics <mode>', 'Metrics output: off, on, detailed')
159
280
  .choices(['off', 'on', 'detailed'])
160
281
  .default('on'))
@@ -285,6 +406,31 @@ function getFileBytesFromAttachment(attachment) {
285
406
  const data = attachment.part.data;
286
407
  return data instanceof Uint8Array ? data : null;
287
408
  }
409
+ function getAttachmentBytes(attachment) {
410
+ if (attachment.part.type === 'image') {
411
+ const image = attachment.part.image;
412
+ return image instanceof Uint8Array ? image : null;
413
+ }
414
+ return getFileBytesFromAttachment(attachment);
415
+ }
416
+ async function ensureCliAttachmentPath({ sourceKind, sourceLabel, attachment, }) {
417
+ if (sourceKind === 'file')
418
+ return sourceLabel;
419
+ const bytes = getAttachmentBytes(attachment);
420
+ if (!bytes) {
421
+ throw new Error('CLI attachment missing bytes');
422
+ }
423
+ const ext = attachment.filename && path.extname(attachment.filename)
424
+ ? path.extname(attachment.filename)
425
+ : attachment.mediaType
426
+ ? `.${mime.getExtension(attachment.mediaType) ?? 'bin'}`
427
+ : '.bin';
428
+ const filename = attachment.filename?.trim() || `asset${ext}`;
429
+ const dir = await fs.mkdtemp(path.join(tmpdir(), 'summarize-cli-asset-'));
430
+ const filePath = path.join(dir, filename);
431
+ await fs.writeFile(filePath, bytes);
432
+ return filePath;
433
+ }
288
434
  function shouldMarkitdownConvertMediaType(mediaType) {
289
435
  const mt = mediaType.toLowerCase();
290
436
  if (mt === 'application/pdf')
@@ -371,8 +517,8 @@ ${heading('Examples')}
371
517
  ${cmd('summarize "https://example.com" --extract --format md')} ${dim('# extracted markdown (prefers Firecrawl when configured)')}
372
518
  ${cmd('summarize "https://example.com" --extract --format md --markdown-mode llm')} ${dim('# extracted markdown via LLM')}
373
519
  ${cmd('summarize "https://www.youtube.com/watch?v=I845O57ZSy4&t=11s" --extract --youtube web')}
374
- ${cmd('summarize "https://example.com" --length 20k --max-output-tokens 2k --timeout 2m --model openai/gpt-5.2')}
375
- ${cmd('OPENROUTER_API_KEY=... summarize "https://example.com" --model openai/openai/gpt-oss-20b')}
520
+ ${cmd('summarize "https://example.com" --length 20k --max-output-tokens 2k --timeout 2m --model openai/gpt-5-mini')}
521
+ ${cmd('summarize "https://example.com" --model mymodel')} ${dim('# config preset')}
376
522
  ${cmd('summarize "https://example.com" --json --verbose')}
377
523
 
378
524
  ${heading('Env Vars')}
@@ -380,9 +526,11 @@ ${heading('Env Vars')}
380
526
  OPENAI_API_KEY optional (required for openai/... models)
381
527
  OPENAI_BASE_URL optional (OpenAI-compatible API endpoint; e.g. OpenRouter)
382
528
  OPENROUTER_API_KEY optional (routes openai/... models through OpenRouter)
383
- OPENROUTER_PROVIDERS optional (provider fallback order, e.g. "groq,google-vertex")
384
529
  GEMINI_API_KEY optional (required for google/... models)
385
530
  ANTHROPIC_API_KEY optional (required for anthropic/... models)
531
+ CLAUDE_PATH optional (path to Claude CLI binary)
532
+ CODEX_PATH optional (path to Codex CLI binary)
533
+ GEMINI_PATH optional (path to Gemini CLI binary)
386
534
  SUMMARIZE_MODEL optional (overrides default model selection)
387
535
  FIRECRAWL_API_KEY optional website extraction fallback (Markdown)
388
536
  APIFY_API_TOKEN optional YouTube transcript fallback
@@ -390,16 +538,18 @@ ${heading('Env Vars')}
390
538
  FAL_KEY optional FAL AI API key for audio transcription
391
539
  `);
392
540
  }
393
- async function summarizeWithModelId({ modelId, prompt, maxOutputTokens, timeoutMs, fetchImpl, apiKeys, openrouter, }) {
541
+ async function summarizeWithModelId({ modelId, prompt, maxOutputTokens, timeoutMs, fetchImpl, apiKeys, forceOpenRouter, retries, onRetry, }) {
394
542
  const result = await generateTextWithModelId({
395
543
  modelId,
396
544
  apiKeys,
397
- openrouter,
545
+ forceOpenRouter,
398
546
  prompt,
399
547
  temperature: 0,
400
548
  maxOutputTokens,
401
549
  timeoutMs,
402
550
  fetchImpl,
551
+ retries,
552
+ onRetry,
403
553
  });
404
554
  return {
405
555
  text: result.text,
@@ -416,6 +566,23 @@ function writeVerbose(stderr, verbose, message, color) {
416
566
  const prefix = ansi('36', VERBOSE_PREFIX, color);
417
567
  stderr.write(`${prefix} ${message}\n`);
418
568
  }
569
+ function createRetryLogger({ stderr, verbose, color, modelId, }) {
570
+ return (notice) => {
571
+ const message = typeof notice.error === 'string'
572
+ ? notice.error
573
+ : notice.error instanceof Error
574
+ ? notice.error.message
575
+ : typeof notice.error?.message === 'string'
576
+ ? String(notice.error.message)
577
+ : '';
578
+ const reason = /empty summary/i.test(message)
579
+ ? 'empty output'
580
+ : /timed out/i.test(message)
581
+ ? 'timeout'
582
+ : 'error';
583
+ writeVerbose(stderr, verbose, `LLM ${reason} for ${modelId}; retry ${notice.attempt}/${notice.maxRetries} in ${notice.delayMs}ms.`, color);
584
+ };
585
+ }
419
586
  function formatOptionalString(value) {
420
587
  if (typeof value === 'string' && value.trim().length > 0) {
421
588
  return value.trim();
@@ -472,40 +639,207 @@ function formatUSD(value) {
472
639
  return 'n/a';
473
640
  return `$${value.toFixed(4)}`;
474
641
  }
642
+ function normalizeStreamText(input) {
643
+ return input.replace(/\r\n?/g, '\n');
644
+ }
645
+ function commonPrefixLength(a, b, limit = 4096) {
646
+ const max = Math.min(a.length, b.length, limit);
647
+ let i = 0;
648
+ for (; i < max; i += 1) {
649
+ if (a[i] !== b[i])
650
+ break;
651
+ }
652
+ return i;
653
+ }
475
654
  function mergeStreamingChunk(previous, chunk) {
476
655
  if (!chunk)
477
656
  return { next: previous, appended: '' };
478
- if (chunk.startsWith(previous)) {
479
- return { next: chunk, appended: chunk.slice(previous.length) };
657
+ const prev = normalizeStreamText(previous);
658
+ const nextChunk = normalizeStreamText(chunk);
659
+ if (!prev)
660
+ return { next: nextChunk, appended: nextChunk };
661
+ if (nextChunk.startsWith(prev)) {
662
+ return { next: nextChunk, appended: nextChunk.slice(prev.length) };
663
+ }
664
+ if (prev.startsWith(nextChunk)) {
665
+ return { next: prev, appended: '' };
666
+ }
667
+ if (nextChunk.length >= prev.length) {
668
+ const prefixLen = commonPrefixLength(prev, nextChunk);
669
+ if (prefixLen > 0) {
670
+ const minPrefix = Math.max(prev.length - 64, Math.floor(prev.length * 0.9));
671
+ if (prefixLen >= minPrefix) {
672
+ return { next: nextChunk, appended: nextChunk.slice(prefixLen) };
673
+ }
674
+ }
675
+ }
676
+ const maxOverlap = Math.min(prev.length, nextChunk.length, 2048);
677
+ for (let len = maxOverlap; len > 0; len -= 1) {
678
+ if (prev.slice(-len) === nextChunk.slice(0, len)) {
679
+ return { next: prev + nextChunk.slice(len), appended: nextChunk.slice(len) };
680
+ }
480
681
  }
481
- return { next: previous + chunk, appended: chunk };
682
+ return { next: prev + nextChunk, appended: nextChunk };
482
683
  }
483
- function writeFinishLine({ stderr, elapsedMs, model, report, costUsd, color, }) {
684
+ function writeFinishLine({ stderr, elapsedMs, model, report, costUsd, detailed, extraParts, color, }) {
484
685
  const promptTokens = sumNumbersOrNull(report.llm.map((row) => row.promptTokens));
485
686
  const completionTokens = sumNumbersOrNull(report.llm.map((row) => row.completionTokens));
486
687
  const totalTokens = sumNumbersOrNull(report.llm.map((row) => row.totalTokens));
487
- const tokPart = promptTokens !== null || completionTokens !== null || totalTokens !== null
488
- ? `tok(i/o/t)=${promptTokens?.toLocaleString() ?? 'unknown'}/${completionTokens?.toLocaleString() ?? 'unknown'}/${totalTokens?.toLocaleString() ?? 'unknown'}`
489
- : 'tok(i/o/t)=unknown';
490
- const parts = [
688
+ const hasAnyTokens = promptTokens !== null || completionTokens !== null || totalTokens !== null;
689
+ const tokensPart = hasAnyTokens
690
+ ? `${promptTokens?.toLocaleString() ?? 'unknown'}/${completionTokens?.toLocaleString() ?? 'unknown'}/${totalTokens?.toLocaleString() ?? 'unknown'} (in/out/Σ)`
691
+ : null;
692
+ const summaryParts = [
693
+ formatElapsedMs(elapsedMs),
694
+ costUsd != null ? formatUSD(costUsd) : null,
491
695
  model,
492
- costUsd != null ? `cost=${formatUSD(costUsd)}` : 'cost=N/A',
493
- tokPart,
696
+ tokensPart,
494
697
  ];
495
- if (report.services.firecrawl.requests > 0) {
496
- parts.push(`firecrawl=${report.services.firecrawl.requests}`);
698
+ const line1 = summaryParts.filter((part) => typeof part === 'string').join(' · ');
699
+ const totalCalls = report.llm.reduce((sum, row) => sum + row.calls, 0);
700
+ stderr.write('\n');
701
+ stderr.write(`${ansi('1;32', line1, color)}\n`);
702
+ if (detailed) {
703
+ const lenParts = extraParts?.filter((part) => part.startsWith('input=') || part.startsWith('transcript=')) ??
704
+ [];
705
+ const miscParts = extraParts?.filter((part) => !part.startsWith('input=') && !part.startsWith('transcript=')) ??
706
+ [];
707
+ const line2Segments = [];
708
+ if (lenParts.length > 0) {
709
+ line2Segments.push(`len ${lenParts.join(' ')}`);
710
+ }
711
+ line2Segments.push(`calls=${totalCalls.toLocaleString()}`);
712
+ if (report.services.firecrawl.requests > 0 || report.services.apify.requests > 0) {
713
+ const svcParts = [];
714
+ if (report.services.firecrawl.requests > 0) {
715
+ svcParts.push(`firecrawl=${report.services.firecrawl.requests.toLocaleString()}`);
716
+ }
717
+ if (report.services.apify.requests > 0) {
718
+ svcParts.push(`apify=${report.services.apify.requests.toLocaleString()}`);
719
+ }
720
+ line2Segments.push(`svc ${svcParts.join(' ')}`);
721
+ }
722
+ if (miscParts.length > 0) {
723
+ line2Segments.push(...miscParts);
724
+ }
725
+ if (line2Segments.length > 0) {
726
+ stderr.write(`${ansi('0;90', line2Segments.join(' | '), color)}\n`);
727
+ }
497
728
  }
498
- if (report.services.apify.requests > 0) {
499
- parts.push(`apify=${report.services.apify.requests}`);
729
+ }
730
+ function formatCompactCount(value) {
731
+ if (!Number.isFinite(value))
732
+ return 'unknown';
733
+ const abs = Math.abs(value);
734
+ const format = (n, suffix) => {
735
+ const decimals = n >= 10 ? 0 : 1;
736
+ return `${n.toFixed(decimals)}${suffix}`;
737
+ };
738
+ if (abs >= 1_000_000_000)
739
+ return format(value / 1_000_000_000, 'B');
740
+ if (abs >= 1_000_000)
741
+ return format(value / 1_000_000, 'M');
742
+ if (abs >= 10_000)
743
+ return format(value / 1_000, 'k');
744
+ if (abs >= 1_000)
745
+ return `${(value / 1_000).toFixed(1)}k`;
746
+ return String(Math.floor(value));
747
+ }
748
+ function buildDetailedLengthPartsForExtracted(extracted) {
749
+ const parts = [];
750
+ const isYouTube = extracted.siteName === 'YouTube' || /youtube\.com|youtu\.be/i.test(extracted.url);
751
+ if (!isYouTube && !extracted.transcriptCharacters)
752
+ return parts;
753
+ parts.push(`input=${formatCompactCount(extracted.totalCharacters)} chars (~${formatCompactCount(extracted.wordCount)} words)`);
754
+ if (typeof extracted.transcriptCharacters === 'number' && extracted.transcriptCharacters > 0) {
755
+ const wordEstimate = Math.max(0, Math.round(extracted.transcriptCharacters / 6));
756
+ const minutesEstimate = Math.max(1, Math.round(wordEstimate / 160));
757
+ const details = [`${formatCompactCount(extracted.transcriptCharacters)} chars`];
758
+ if (typeof extracted.transcriptLines === 'number' && extracted.transcriptLines > 0) {
759
+ details.push(`${formatCompactCount(extracted.transcriptLines)} lines`);
760
+ }
761
+ parts.push(`transcript=~${minutesEstimate}m (${details.join(', ')})`);
500
762
  }
501
- const line = `Finished in ${formatElapsedMs(elapsedMs)} (${parts.join(' | ')})`;
502
- stderr.write('\n');
503
- stderr.write(`${ansi('1;32', line, color)}\n`);
763
+ return parts;
504
764
  }
505
765
  export async function runCli(argv, { env, fetch, execFile: execFileOverride, stdout, stderr }) {
506
766
  ;
507
767
  globalThis.AI_SDK_LOG_WARNINGS = false;
508
768
  const normalizedArgv = argv.filter((arg) => arg !== '--');
769
+ if (normalizedArgv[0]?.toLowerCase() === 'refresh-free') {
770
+ const verbose = normalizedArgv.includes('--verbose') || normalizedArgv.includes('--debug');
771
+ const setDefault = normalizedArgv.includes('--set-default');
772
+ const help = normalizedArgv.includes('--help') ||
773
+ normalizedArgv.includes('-h') ||
774
+ normalizedArgv.includes('help');
775
+ const readArgValue = (name) => {
776
+ const eq = normalizedArgv.find((a) => a.startsWith(`${name}=`));
777
+ if (eq)
778
+ return eq.slice(`${name}=`.length).trim() || null;
779
+ const index = normalizedArgv.indexOf(name);
780
+ if (index === -1)
781
+ return null;
782
+ const next = normalizedArgv[index + 1];
783
+ if (!next || next.startsWith('-'))
784
+ return null;
785
+ return next.trim() || null;
786
+ };
787
+ const runsRaw = readArgValue('--runs');
788
+ const smartRaw = readArgValue('--smart');
789
+ const minParamsRaw = readArgValue('--min-params');
790
+ const maxAgeDaysRaw = readArgValue('--max-age-days');
791
+ const runs = runsRaw ? Number(runsRaw) : 2;
792
+ const smart = smartRaw ? Number(smartRaw) : 3;
793
+ const minParams = (() => {
794
+ if (!minParamsRaw)
795
+ return 27;
796
+ const raw = minParamsRaw.trim().toLowerCase();
797
+ const normalized = raw.endsWith('b') ? raw.slice(0, -1).trim() : raw;
798
+ const value = Number(normalized);
799
+ return value;
800
+ })();
801
+ const maxAgeDays = (() => {
802
+ if (!maxAgeDaysRaw)
803
+ return 180;
804
+ const value = Number(maxAgeDaysRaw.trim());
805
+ return value;
806
+ })();
807
+ if (help) {
808
+ stdout.write(`${[
809
+ 'Usage: summarize refresh-free [--runs 2] [--smart 3] [--min-params 27b] [--max-age-days 180] [--set-default] [--verbose]',
810
+ '',
811
+ 'Writes ~/.summarize/config.json (models.free) with working OpenRouter :free candidates.',
812
+ 'With --set-default: also sets `model` to "free".',
813
+ ].join('\n')}\n`);
814
+ return;
815
+ }
816
+ if (!Number.isFinite(runs) || runs < 0)
817
+ throw new Error('--runs must be >= 0');
818
+ if (!Number.isFinite(smart) || smart < 0)
819
+ throw new Error('--smart must be >= 0');
820
+ if (!Number.isFinite(minParams) || minParams < 0)
821
+ throw new Error('--min-params must be >= 0 (e.g. 27b)');
822
+ if (!Number.isFinite(maxAgeDays) || maxAgeDays < 0)
823
+ throw new Error('--max-age-days must be >= 0');
824
+ await refreshFree({
825
+ env,
826
+ fetchImpl: fetch,
827
+ stdout,
828
+ stderr,
829
+ verbose,
830
+ options: {
831
+ runs,
832
+ smart,
833
+ minParamB: minParams,
834
+ maxAgeDays,
835
+ setDefault,
836
+ maxCandidates: 10,
837
+ concurrency: 4,
838
+ timeoutMs: 10_000,
839
+ },
840
+ });
841
+ return;
842
+ }
509
843
  const execFileImpl = execFileOverride ?? execFile;
510
844
  const version = resolvePackageVersion();
511
845
  const program = buildProgram();
@@ -532,7 +866,19 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
532
866
  stdout.write(`${version}\n`);
533
867
  return;
534
868
  }
535
- const rawInput = program.args[0];
869
+ const cliFlagPresent = normalizedArgv.some((arg) => arg === '--cli' || arg.startsWith('--cli='));
870
+ let cliProviderArgRaw = typeof program.opts().cli === 'string' ? program.opts().cli : null;
871
+ let rawInput = program.args[0];
872
+ if (!rawInput && cliFlagPresent && cliProviderArgRaw) {
873
+ try {
874
+ resolveInputTarget(cliProviderArgRaw);
875
+ rawInput = cliProviderArgRaw;
876
+ cliProviderArgRaw = null;
877
+ }
878
+ catch {
879
+ // keep rawInput as-is
880
+ }
881
+ }
536
882
  if (!rawInput) {
537
883
  throw new Error('Usage: summarize <url-or-file> [--youtube auto|web|apify] [--length 20k] [--max-output-tokens 2k] [--timeout 2m] [--json]');
538
884
  }
@@ -540,15 +886,19 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
540
886
  const url = inputTarget.kind === 'url' ? inputTarget.url : null;
541
887
  const runStartedAtMs = Date.now();
542
888
  const youtubeMode = parseYoutubeMode(program.opts().youtube);
889
+ const videoModeExplicitlySet = normalizedArgv.some((arg) => arg === '--video-mode' || arg.startsWith('--video-mode='));
543
890
  const lengthArg = parseLengthArg(program.opts().length);
544
891
  const maxOutputTokensArg = parseMaxOutputTokensArg(program.opts().maxOutputTokens);
545
892
  const timeoutMs = parseDurationMs(program.opts().timeout);
893
+ const retries = parseRetriesArg(program.opts().retries);
546
894
  const extractMode = Boolean(program.opts().extract) || Boolean(program.opts().extractOnly);
547
895
  const json = Boolean(program.opts().json);
548
896
  const streamMode = parseStreamMode(program.opts().stream);
549
897
  const renderMode = parseRenderMode(program.opts().render);
550
- const verbose = Boolean(program.opts().verbose);
551
- const metricsMode = parseMetricsMode(program.opts().metrics);
898
+ const debug = Boolean(program.opts().debug);
899
+ const verbose = Boolean(program.opts().verbose) || debug;
900
+ const metricsExplicitlySet = normalizedArgv.some((arg) => arg === '--metrics' || arg.startsWith('--metrics='));
901
+ const metricsMode = parseMetricsMode(debug && !metricsExplicitlySet ? 'detailed' : program.opts().metrics);
552
902
  const metricsEnabled = metricsMode !== 'off';
553
903
  const metricsDetailed = metricsMode === 'detailed';
554
904
  const preprocessMode = parsePreprocessMode(program.opts().preprocess);
@@ -567,17 +917,37 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
567
917
  : 'off';
568
918
  const requestedFirecrawlMode = parseFirecrawlMode(program.opts().firecrawl);
569
919
  const modelArg = typeof program.opts().model === 'string' ? program.opts().model : null;
920
+ const cliProviderArg = typeof cliProviderArgRaw === 'string' && cliProviderArgRaw.trim().length > 0
921
+ ? parseCliProviderArg(cliProviderArgRaw)
922
+ : null;
923
+ if (cliFlagPresent && modelArg) {
924
+ throw new Error('Use either --model or --cli (not both).');
925
+ }
926
+ const explicitModelArg = cliProviderArg
927
+ ? `cli/${cliProviderArg}`
928
+ : cliFlagPresent
929
+ ? 'auto'
930
+ : modelArg;
570
931
  const { config, path: configPath } = loadSummarizeConfig({ env });
932
+ const videoMode = parseVideoMode(videoModeExplicitlySet
933
+ ? program.opts().videoMode
934
+ : (config?.media?.videoMode ?? program.opts().videoMode));
935
+ const cliEnabledOverride = (() => {
936
+ if (!cliFlagPresent || cliProviderArg)
937
+ return null;
938
+ if (Array.isArray(config?.cli?.enabled))
939
+ return config.cli.enabled;
940
+ return ['gemini', 'claude', 'codex'];
941
+ })();
942
+ const cliConfigForRun = cliEnabledOverride
943
+ ? { ...(config?.cli ?? {}), enabled: cliEnabledOverride }
944
+ : config?.cli;
945
+ const configForCli = cliEnabledOverride !== null
946
+ ? { ...(config ?? {}), ...(cliConfigForRun ? { cli: cliConfigForRun } : {}) }
947
+ : config;
571
948
  const xaiKeyRaw = typeof env.XAI_API_KEY === 'string' ? env.XAI_API_KEY : null;
572
949
  const openaiBaseUrl = typeof env.OPENAI_BASE_URL === 'string' ? env.OPENAI_BASE_URL : null;
573
950
  const openRouterKeyRaw = typeof env.OPENROUTER_API_KEY === 'string' ? env.OPENROUTER_API_KEY : null;
574
- const openRouterProvidersRaw = typeof env.OPENROUTER_PROVIDERS === 'string' ? env.OPENROUTER_PROVIDERS : null;
575
- const openRouterProviders = openRouterProvidersRaw
576
- ? openRouterProvidersRaw
577
- .split(',')
578
- .map((p) => p.trim())
579
- .filter(Boolean)
580
- : null;
581
951
  const openaiKeyRaw = typeof env.OPENAI_API_KEY === 'string' ? env.OPENAI_API_KEY : null;
582
952
  const apiKey = typeof openaiBaseUrl === 'string' && /openrouter\.ai/i.test(openaiBaseUrl)
583
953
  ? (openRouterKeyRaw ?? openaiKeyRaw)
@@ -599,13 +969,24 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
599
969
  const xaiApiKey = xaiKeyRaw?.trim() ?? null;
600
970
  const googleApiKey = googleKeyRaw?.trim() ?? null;
601
971
  const anthropicApiKey = anthropicKeyRaw?.trim() ?? null;
602
- const openrouterApiKey = openRouterKeyRaw?.trim() ?? null;
972
+ const openrouterApiKey = (() => {
973
+ const explicit = openRouterKeyRaw?.trim() ?? '';
974
+ if (explicit.length > 0)
975
+ return explicit;
976
+ const baseUrl = openaiBaseUrl?.trim() ?? '';
977
+ const openaiKey = openaiKeyRaw?.trim() ?? '';
978
+ if (baseUrl.length > 0 && /openrouter\.ai/i.test(baseUrl) && openaiKey.length > 0) {
979
+ return openaiKey;
980
+ }
981
+ return null;
982
+ })();
603
983
  const openaiTranscriptionKey = openaiKeyRaw?.trim() ?? null;
604
984
  const googleConfigured = typeof googleApiKey === 'string' && googleApiKey.length > 0;
605
985
  const xaiConfigured = typeof xaiApiKey === 'string' && xaiApiKey.length > 0;
606
986
  const anthropicConfigured = typeof anthropicApiKey === 'string' && anthropicApiKey.length > 0;
607
987
  const openrouterConfigured = typeof openrouterApiKey === 'string' && openrouterApiKey.length > 0;
608
- const openrouterOptions = openRouterProviders ? { providers: openRouterProviders } : undefined;
988
+ const cliAvailability = resolveCliAvailability({ env, config: configForCli });
989
+ const envForAuto = openrouterApiKey ? { ...env, OPENROUTER_API_KEY: openrouterApiKey } : env;
609
990
  if (markdownModeExplicitlySet && format !== 'markdown') {
610
991
  throw new Error('--markdown-mode is only supported with --format md');
611
992
  }
@@ -652,10 +1033,13 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
652
1033
  return null;
653
1034
  };
654
1035
  const estimateCostUsd = async () => {
655
- const catalog = await getLiteLlmCatalog();
656
- if (!catalog)
657
- return null;
658
- const calls = llmCalls.map((call) => {
1036
+ const explicitCosts = llmCalls
1037
+ .map((call) => typeof call.costUsd === 'number' && Number.isFinite(call.costUsd) ? call.costUsd : null)
1038
+ .filter((value) => typeof value === 'number');
1039
+ const explicitTotal = explicitCosts.length > 0 ? explicitCosts.reduce((sum, value) => sum + value, 0) : 0;
1040
+ const calls = llmCalls
1041
+ .filter((call) => !(typeof call.costUsd === 'number' && Number.isFinite(call.costUsd)))
1042
+ .map((call) => {
659
1043
  const promptTokens = call.usage?.promptTokens ?? null;
660
1044
  const completionTokens = call.usage?.completionTokens ?? null;
661
1045
  const hasTokens = typeof promptTokens === 'number' &&
@@ -671,11 +1055,21 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
671
1055
  : null;
672
1056
  return { model: call.model, usage };
673
1057
  });
1058
+ if (calls.length === 0) {
1059
+ return explicitCosts.length > 0 ? explicitTotal : null;
1060
+ }
1061
+ const catalog = await getLiteLlmCatalog();
1062
+ if (!catalog) {
1063
+ return explicitCosts.length > 0 ? explicitTotal : null;
1064
+ }
674
1065
  const result = await tallyCosts({
675
1066
  calls,
676
1067
  resolvePricing: (modelId) => resolveLiteLlmPricingForModelId(catalog, modelId),
677
1068
  });
678
- return result.total?.totalUsd ?? null;
1069
+ const catalogTotal = result.total?.totalUsd ?? null;
1070
+ if (catalogTotal === null && explicitCosts.length === 0)
1071
+ return null;
1072
+ return (catalogTotal ?? 0) + explicitTotal;
679
1073
  };
680
1074
  const buildReport = async () => {
681
1075
  return buildRunMetricsReport({ llmCalls, firecrawlRequests, apifyRequests });
@@ -697,17 +1091,68 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
697
1091
  }
698
1092
  return fetch(input, init);
699
1093
  };
1094
+ const modelMap = (() => {
1095
+ const out = new Map();
1096
+ for (const [name, model] of Object.entries(BUILTIN_MODELS)) {
1097
+ out.set(name.toLowerCase(), { name, model });
1098
+ }
1099
+ const raw = config?.models;
1100
+ if (!raw)
1101
+ return out;
1102
+ for (const [name, model] of Object.entries(raw)) {
1103
+ out.set(name.toLowerCase(), { name, model });
1104
+ }
1105
+ return out;
1106
+ })();
700
1107
  const resolvedDefaultModel = (() => {
701
1108
  if (typeof env.SUMMARIZE_MODEL === 'string' && env.SUMMARIZE_MODEL.trim().length > 0) {
702
1109
  return env.SUMMARIZE_MODEL.trim();
703
1110
  }
704
- if (typeof config?.model === 'string' && config.model.trim().length > 0) {
705
- return config.model.trim();
1111
+ const modelFromConfig = config?.model;
1112
+ if (modelFromConfig) {
1113
+ if ('id' in modelFromConfig && typeof modelFromConfig.id === 'string') {
1114
+ const id = modelFromConfig.id.trim();
1115
+ if (id.length > 0)
1116
+ return id;
1117
+ }
1118
+ if ('name' in modelFromConfig && typeof modelFromConfig.name === 'string') {
1119
+ const name = modelFromConfig.name.trim();
1120
+ if (name.length > 0)
1121
+ return name;
1122
+ }
1123
+ if ('mode' in modelFromConfig && modelFromConfig.mode === 'auto')
1124
+ return 'auto';
1125
+ }
1126
+ return 'auto';
1127
+ })();
1128
+ const requestedModelInput = ((explicitModelArg?.trim() ?? '') || resolvedDefaultModel).trim();
1129
+ const requestedModelInputLower = requestedModelInput.toLowerCase();
1130
+ const wantsFreeNamedModel = requestedModelInputLower === 'free';
1131
+ const namedModelMatch = requestedModelInputLower !== 'auto' ? (modelMap.get(requestedModelInputLower) ?? null) : null;
1132
+ const namedModelConfig = namedModelMatch?.model ?? null;
1133
+ const isNamedModelSelection = Boolean(namedModelMatch);
1134
+ const configForModelSelection = isNamedModelSelection && namedModelConfig
1135
+ ? { ...(configForCli ?? {}), model: namedModelConfig }
1136
+ : configForCli;
1137
+ const requestedModel = (() => {
1138
+ if (isNamedModelSelection && namedModelConfig) {
1139
+ if ('id' in namedModelConfig)
1140
+ return parseRequestedModelId(namedModelConfig.id);
1141
+ if ('mode' in namedModelConfig && namedModelConfig.mode === 'auto')
1142
+ return { kind: 'auto' };
1143
+ throw new Error(`Invalid model "${namedModelMatch?.name ?? requestedModelInput}": unsupported model config`);
1144
+ }
1145
+ if (requestedModelInputLower !== 'auto' && !requestedModelInput.includes('/')) {
1146
+ throw new Error(`Unknown model "${requestedModelInput}". Define it in ${configPath ?? '~/.summarize/config.json'} under "models", or use a provider-prefixed id like openai/...`);
706
1147
  }
707
- return 'google/gemini-3-flash-preview';
1148
+ return parseRequestedModelId(requestedModelInput);
708
1149
  })();
709
- const model = normalizeGatewayStyleModelId((modelArg?.trim() ?? '') || resolvedDefaultModel);
710
- const parsedModelForLlm = parseGatewayStyleModelId(model);
1150
+ const requestedModelLabel = isNamedModelSelection
1151
+ ? requestedModelInput
1152
+ : requestedModel.kind === 'auto'
1153
+ ? 'auto'
1154
+ : requestedModel.userModelId;
1155
+ const isFallbackModel = requestedModel.kind === 'auto';
711
1156
  const verboseColor = supportsColor(stderr, env);
712
1157
  const effectiveStreamMode = (() => {
713
1158
  if (streamMode !== 'auto')
@@ -722,17 +1167,6 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
722
1167
  return 'plain';
723
1168
  return streamingEnabled ? 'md-live' : 'md';
724
1169
  })();
725
- const writeMetricsReport = (report) => {
726
- const promptTokens = sumNumbersOrNull(report.llm.map((row) => row.promptTokens));
727
- const completionTokens = sumNumbersOrNull(report.llm.map((row) => row.completionTokens));
728
- const totalTokens = sumNumbersOrNull(report.llm.map((row) => row.totalTokens));
729
- for (const row of report.llm) {
730
- stderr.write(`metrics llm provider=${row.provider} model=${row.model} calls=${row.calls} promptTokens=${row.promptTokens ?? 'unknown'} completionTokens=${row.completionTokens ?? 'unknown'} totalTokens=${row.totalTokens ?? 'unknown'}\n`);
731
- }
732
- stderr.write(`metrics firecrawl requests=${report.services.firecrawl.requests}\n`);
733
- stderr.write(`metrics apify requests=${report.services.apify.requests}\n`);
734
- stderr.write(`metrics total tok(i/o/t)=${promptTokens ?? 'unknown'}/${completionTokens ?? 'unknown'}/${totalTokens ?? 'unknown'}\n`);
735
- };
736
1170
  if (extractMode && inputTarget.kind !== 'url') {
737
1171
  throw new Error('--extract is only supported for website/YouTube URLs');
738
1172
  }
@@ -743,8 +1177,103 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
743
1177
  clearProgressBeforeStdout = null;
744
1178
  fn?.();
745
1179
  };
746
- const summarizeAsset = async ({ sourceKind, sourceLabel, attachment, }) => {
747
- const parsedModel = parseGatewayStyleModelId(model);
1180
+ const fixedModelSpec = requestedModel.kind === 'fixed' ? requestedModel : null;
1181
+ const desiredOutputTokens = (() => {
1182
+ if (typeof maxOutputTokensArg === 'number')
1183
+ return maxOutputTokensArg;
1184
+ const targetChars = resolveTargetCharacters(lengthArg);
1185
+ if (!Number.isFinite(targetChars) ||
1186
+ targetChars <= 0 ||
1187
+ targetChars === Number.POSITIVE_INFINITY) {
1188
+ return null;
1189
+ }
1190
+ // Rough heuristic (chars → tokens). Used for auto selection + cost estimation.
1191
+ return Math.max(16, Math.ceil(targetChars / 4));
1192
+ })();
1193
+ const envHasKeyFor = (requiredEnv) => {
1194
+ if (requiredEnv === 'CLI_CLAUDE') {
1195
+ return Boolean(cliAvailability.claude);
1196
+ }
1197
+ if (requiredEnv === 'CLI_CODEX') {
1198
+ return Boolean(cliAvailability.codex);
1199
+ }
1200
+ if (requiredEnv === 'CLI_GEMINI') {
1201
+ return Boolean(cliAvailability.gemini);
1202
+ }
1203
+ if (requiredEnv === 'GEMINI_API_KEY') {
1204
+ return googleConfigured;
1205
+ }
1206
+ if (requiredEnv === 'OPENROUTER_API_KEY') {
1207
+ return openrouterConfigured;
1208
+ }
1209
+ if (requiredEnv === 'OPENAI_API_KEY') {
1210
+ return Boolean(apiKey);
1211
+ }
1212
+ if (requiredEnv === 'XAI_API_KEY') {
1213
+ return Boolean(xaiApiKey);
1214
+ }
1215
+ return Boolean(anthropicApiKey);
1216
+ };
1217
+ const formatMissingModelError = (attempt) => {
1218
+ if (attempt.requiredEnv === 'CLI_CLAUDE') {
1219
+ return `Claude CLI not found for model ${attempt.userModelId}. Install Claude CLI or set CLAUDE_PATH.`;
1220
+ }
1221
+ if (attempt.requiredEnv === 'CLI_CODEX') {
1222
+ return `Codex CLI not found for model ${attempt.userModelId}. Install Codex CLI or set CODEX_PATH.`;
1223
+ }
1224
+ if (attempt.requiredEnv === 'CLI_GEMINI') {
1225
+ return `Gemini CLI not found for model ${attempt.userModelId}. Install Gemini CLI or set GEMINI_PATH.`;
1226
+ }
1227
+ return `Missing ${attempt.requiredEnv} for model ${attempt.userModelId}. Set the env var or choose a different --model.`;
1228
+ };
1229
+ const runSummaryAttempt = async ({ attempt, prompt, allowStreaming, onModelChosen, cli, }) => {
1230
+ onModelChosen?.(attempt.userModelId);
1231
+ if (attempt.transport === 'cli') {
1232
+ const cliPrompt = typeof prompt === 'string' ? prompt : (cli?.promptOverride ?? null);
1233
+ if (!cliPrompt) {
1234
+ throw new Error('CLI models require a text prompt (no binary attachments).');
1235
+ }
1236
+ if (!attempt.cliProvider) {
1237
+ throw new Error(`Missing CLI provider for model ${attempt.userModelId}.`);
1238
+ }
1239
+ if (isCliDisabled(attempt.cliProvider, cliConfigForRun)) {
1240
+ throw new Error(`CLI provider ${attempt.cliProvider} is disabled by cli.enabled. Update your config to enable it.`);
1241
+ }
1242
+ const result = await runCliModel({
1243
+ provider: attempt.cliProvider,
1244
+ prompt: cliPrompt,
1245
+ model: attempt.cliModel ?? null,
1246
+ allowTools: Boolean(cli?.allowTools),
1247
+ timeoutMs,
1248
+ env,
1249
+ execFileImpl,
1250
+ config: cliConfigForRun ?? null,
1251
+ cwd: cli?.cwd,
1252
+ extraArgs: cli?.extraArgsByProvider?.[attempt.cliProvider],
1253
+ });
1254
+ const summary = result.text.trim();
1255
+ if (!summary)
1256
+ throw new Error('CLI returned an empty summary');
1257
+ if (result.usage || typeof result.costUsd === 'number') {
1258
+ llmCalls.push({
1259
+ provider: 'cli',
1260
+ model: attempt.userModelId,
1261
+ usage: result.usage ?? null,
1262
+ costUsd: result.costUsd ?? null,
1263
+ purpose: 'summary',
1264
+ });
1265
+ }
1266
+ return {
1267
+ summary,
1268
+ summaryAlreadyPrinted: false,
1269
+ modelMeta: { provider: 'cli', canonical: attempt.userModelId },
1270
+ maxOutputTokensForCall: null,
1271
+ };
1272
+ }
1273
+ if (!attempt.llmModelId) {
1274
+ throw new Error(`Missing model id for ${attempt.userModelId}.`);
1275
+ }
1276
+ const parsedModel = parseGatewayStyleModelId(attempt.llmModelId);
748
1277
  const apiKeysForLlm = {
749
1278
  xaiApiKey,
750
1279
  openaiApiKey: apiKey,
@@ -752,23 +1281,6 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
752
1281
  anthropicApiKey: anthropicConfigured ? anthropicApiKey : null,
753
1282
  openrouterApiKey: openrouterConfigured ? openrouterApiKey : null,
754
1283
  };
755
- const requiredKeyEnv = parsedModel.provider === 'xai'
756
- ? 'XAI_API_KEY'
757
- : parsedModel.provider === 'google'
758
- ? 'GEMINI_API_KEY (or GOOGLE_GENERATIVE_AI_API_KEY / GOOGLE_API_KEY)'
759
- : parsedModel.provider === 'anthropic'
760
- ? 'ANTHROPIC_API_KEY'
761
- : 'OPENAI_API_KEY (or OPENROUTER_API_KEY)';
762
- const hasRequiredKey = parsedModel.provider === 'xai'
763
- ? Boolean(xaiApiKey)
764
- : parsedModel.provider === 'google'
765
- ? googleConfigured
766
- : parsedModel.provider === 'anthropic'
767
- ? anthropicConfigured
768
- : Boolean(apiKey) || openrouterConfigured;
769
- if (!hasRequiredKey) {
770
- throw new Error(`Missing ${requiredKeyEnv} for model ${parsedModel.canonical}. Set the env var or choose a different --model.`);
771
- }
772
1284
  const modelResolution = await resolveModelIdForLlmCall({
773
1285
  parsedModel,
774
1286
  apiKeys: { googleApiKey: apiKeysForLlm.googleApiKey },
@@ -778,10 +1290,229 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
778
1290
  if (modelResolution.note && verbose) {
779
1291
  writeVerbose(stderr, verbose, modelResolution.note, verboseColor);
780
1292
  }
781
- const effectiveModelId = modelResolution.modelId;
782
- const parsedModelEffective = parseGatewayStyleModelId(effectiveModelId);
783
- const streamingEnabledForCall = streamingEnabled && !modelResolution.forceStreamOff;
1293
+ const parsedModelEffective = parseGatewayStyleModelId(modelResolution.modelId);
1294
+ const streamingEnabledForCall = allowStreaming && streamingEnabled && !modelResolution.forceStreamOff;
784
1295
  const maxOutputTokensForCall = await resolveMaxOutputTokensForCall(parsedModelEffective.canonical);
1296
+ const maxInputTokensForCall = await resolveMaxInputTokensForCall(parsedModelEffective.canonical);
1297
+ if (typeof maxInputTokensForCall === 'number' &&
1298
+ Number.isFinite(maxInputTokensForCall) &&
1299
+ maxInputTokensForCall > 0 &&
1300
+ typeof prompt === 'string') {
1301
+ const tokenCount = countTokens(prompt);
1302
+ if (tokenCount > maxInputTokensForCall) {
1303
+ throw new Error(`Input token count (${formatCount(tokenCount)}) exceeds model input limit (${formatCount(maxInputTokensForCall)}). Tokenized with GPT tokenizer; prompt included.`);
1304
+ }
1305
+ }
1306
+ if (!streamingEnabledForCall) {
1307
+ const result = await summarizeWithModelId({
1308
+ modelId: parsedModelEffective.canonical,
1309
+ prompt,
1310
+ maxOutputTokens: maxOutputTokensForCall ?? undefined,
1311
+ timeoutMs,
1312
+ fetchImpl: trackedFetch,
1313
+ apiKeys: apiKeysForLlm,
1314
+ forceOpenRouter: attempt.forceOpenRouter,
1315
+ retries,
1316
+ onRetry: createRetryLogger({
1317
+ stderr,
1318
+ verbose,
1319
+ color: verboseColor,
1320
+ modelId: parsedModelEffective.canonical,
1321
+ }),
1322
+ });
1323
+ llmCalls.push({
1324
+ provider: result.provider,
1325
+ model: result.canonicalModelId,
1326
+ usage: result.usage,
1327
+ purpose: 'summary',
1328
+ });
1329
+ const summary = result.text.trim();
1330
+ if (!summary)
1331
+ throw new Error('LLM returned an empty summary');
1332
+ return {
1333
+ summary,
1334
+ summaryAlreadyPrinted: false,
1335
+ modelMeta: {
1336
+ provider: parsedModelEffective.provider,
1337
+ canonical: parsedModelEffective.canonical,
1338
+ },
1339
+ maxOutputTokensForCall: maxOutputTokensForCall ?? null,
1340
+ };
1341
+ }
1342
+ const shouldBufferSummaryForRender = streamingEnabledForCall && effectiveRenderMode === 'md' && isRichTty(stdout);
1343
+ const shouldLiveRenderSummary = streamingEnabledForCall && effectiveRenderMode === 'md-live' && isRichTty(stdout);
1344
+ const shouldStreamSummaryToStdout = streamingEnabledForCall && !shouldBufferSummaryForRender && !shouldLiveRenderSummary;
1345
+ let summaryAlreadyPrinted = false;
1346
+ let summary = '';
1347
+ let getLastStreamError = null;
1348
+ let streamResult = null;
1349
+ try {
1350
+ streamResult = await streamTextWithModelId({
1351
+ modelId: parsedModelEffective.canonical,
1352
+ apiKeys: apiKeysForLlm,
1353
+ forceOpenRouter: attempt.forceOpenRouter,
1354
+ prompt,
1355
+ temperature: 0,
1356
+ maxOutputTokens: maxOutputTokensForCall ?? undefined,
1357
+ timeoutMs,
1358
+ fetchImpl: trackedFetch,
1359
+ });
1360
+ }
1361
+ catch (error) {
1362
+ if (isStreamingTimeoutError(error)) {
1363
+ writeVerbose(stderr, verbose, `Streaming timed out for ${parsedModelEffective.canonical}; falling back to non-streaming.`, verboseColor);
1364
+ const result = await summarizeWithModelId({
1365
+ modelId: parsedModelEffective.canonical,
1366
+ prompt,
1367
+ maxOutputTokens: maxOutputTokensForCall ?? undefined,
1368
+ timeoutMs,
1369
+ fetchImpl: trackedFetch,
1370
+ apiKeys: apiKeysForLlm,
1371
+ forceOpenRouter: attempt.forceOpenRouter,
1372
+ retries,
1373
+ onRetry: createRetryLogger({
1374
+ stderr,
1375
+ verbose,
1376
+ color: verboseColor,
1377
+ modelId: parsedModelEffective.canonical,
1378
+ }),
1379
+ });
1380
+ llmCalls.push({
1381
+ provider: result.provider,
1382
+ model: result.canonicalModelId,
1383
+ usage: result.usage,
1384
+ purpose: 'summary',
1385
+ });
1386
+ summary = result.text;
1387
+ streamResult = null;
1388
+ }
1389
+ else if (parsedModelEffective.provider === 'google' &&
1390
+ isGoogleStreamingUnsupportedError(error)) {
1391
+ writeVerbose(stderr, verbose, `Google model ${parsedModelEffective.canonical} rejected streamGenerateContent; falling back to non-streaming.`, verboseColor);
1392
+ const result = await summarizeWithModelId({
1393
+ modelId: parsedModelEffective.canonical,
1394
+ prompt,
1395
+ maxOutputTokens: maxOutputTokensForCall ?? undefined,
1396
+ timeoutMs,
1397
+ fetchImpl: trackedFetch,
1398
+ apiKeys: apiKeysForLlm,
1399
+ forceOpenRouter: attempt.forceOpenRouter,
1400
+ retries,
1401
+ onRetry: createRetryLogger({
1402
+ stderr,
1403
+ verbose,
1404
+ color: verboseColor,
1405
+ modelId: parsedModelEffective.canonical,
1406
+ }),
1407
+ });
1408
+ llmCalls.push({
1409
+ provider: result.provider,
1410
+ model: result.canonicalModelId,
1411
+ usage: result.usage,
1412
+ purpose: 'summary',
1413
+ });
1414
+ summary = result.text;
1415
+ streamResult = null;
1416
+ }
1417
+ else {
1418
+ throw error;
1419
+ }
1420
+ }
1421
+ if (streamResult) {
1422
+ getLastStreamError = streamResult.lastError;
1423
+ let streamed = '';
1424
+ const liveRenderer = shouldLiveRenderSummary
1425
+ ? createLiveRenderer({
1426
+ write: (chunk) => {
1427
+ clearProgressForStdout();
1428
+ stdout.write(chunk);
1429
+ },
1430
+ width: markdownRenderWidth(stdout, env),
1431
+ renderFrame: (markdown) => renderMarkdownAnsi(markdown, {
1432
+ width: markdownRenderWidth(stdout, env),
1433
+ wrap: true,
1434
+ color: supportsColor(stdout, env),
1435
+ }),
1436
+ })
1437
+ : null;
1438
+ let lastFrameAtMs = 0;
1439
+ try {
1440
+ let cleared = false;
1441
+ for await (const delta of streamResult.textStream) {
1442
+ const merged = mergeStreamingChunk(streamed, delta);
1443
+ streamed = merged.next;
1444
+ if (shouldStreamSummaryToStdout) {
1445
+ if (!cleared) {
1446
+ clearProgressForStdout();
1447
+ cleared = true;
1448
+ }
1449
+ if (merged.appended)
1450
+ stdout.write(merged.appended);
1451
+ continue;
1452
+ }
1453
+ if (liveRenderer) {
1454
+ const now = Date.now();
1455
+ const due = now - lastFrameAtMs >= 120;
1456
+ const hasNewline = delta.includes('\n');
1457
+ if (hasNewline || due) {
1458
+ liveRenderer.render(streamed);
1459
+ lastFrameAtMs = now;
1460
+ }
1461
+ }
1462
+ }
1463
+ const trimmed = streamed.trim();
1464
+ streamed = trimmed;
1465
+ if (liveRenderer) {
1466
+ liveRenderer.render(trimmed);
1467
+ summaryAlreadyPrinted = true;
1468
+ }
1469
+ }
1470
+ finally {
1471
+ liveRenderer?.finish();
1472
+ }
1473
+ const usage = await streamResult.usage;
1474
+ llmCalls.push({
1475
+ provider: streamResult.provider,
1476
+ model: streamResult.canonicalModelId,
1477
+ usage,
1478
+ purpose: 'summary',
1479
+ });
1480
+ summary = streamed;
1481
+ if (shouldStreamSummaryToStdout) {
1482
+ if (!streamed.endsWith('\n')) {
1483
+ stdout.write('\n');
1484
+ }
1485
+ summaryAlreadyPrinted = true;
1486
+ }
1487
+ }
1488
+ summary = summary.trim();
1489
+ if (summary.length === 0) {
1490
+ const last = getLastStreamError?.();
1491
+ if (last instanceof Error) {
1492
+ throw new Error(last.message, { cause: last });
1493
+ }
1494
+ throw new Error('LLM returned an empty summary');
1495
+ }
1496
+ return {
1497
+ summary,
1498
+ summaryAlreadyPrinted,
1499
+ modelMeta: {
1500
+ provider: parsedModelEffective.provider,
1501
+ canonical: parsedModelEffective.canonical,
1502
+ },
1503
+ maxOutputTokensForCall: maxOutputTokensForCall ?? null,
1504
+ };
1505
+ };
1506
+ const writeViaFooter = (parts) => {
1507
+ if (json)
1508
+ return;
1509
+ const filtered = parts.map((p) => p.trim()).filter(Boolean);
1510
+ if (filtered.length === 0)
1511
+ return;
1512
+ clearProgressForStdout();
1513
+ stderr.write(`${ansi('2', `via ${filtered.join(', ')}`, verboseColor)}\n`);
1514
+ };
1515
+ const summarizeAsset = async ({ sourceKind, sourceLabel, attachment, onModelChosen, }) => {
785
1516
  const textContent = getTextContentFromAttachment(attachment);
786
1517
  if (textContent && textContent.bytes > MAX_TEXT_BYTES_DEFAULT) {
787
1518
  throw new Error(`Text file too large (${formatBytes(textContent.bytes)}). Limit is ${formatBytes(MAX_TEXT_BYTES_DEFAULT)}.`);
@@ -795,6 +1526,7 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
795
1526
  shouldMarkitdownConvertMediaType(attachment.mediaType);
796
1527
  const summaryLengthTarget = lengthArg.kind === 'preset' ? lengthArg.preset : { maxCharacters: lengthArg.maxCharacters };
797
1528
  let promptText = '';
1529
+ const assetFooterParts = [];
798
1530
  const buildAttachmentPromptPayload = () => {
799
1531
  promptText = buildFileSummaryPrompt({
800
1532
  filename: attachment.filename,
@@ -839,6 +1571,7 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
839
1571
  throw new Error(`Preprocessed Markdown too large (${formatBytes(Buffer.byteLength(preprocessedMarkdown, 'utf8'))}). Limit is ${formatBytes(MAX_TEXT_BYTES_DEFAULT)}.`);
840
1572
  }
841
1573
  usingPreprocessedMarkdown = true;
1574
+ assetFooterParts.push(`markitdown(${attachment.mediaType})`);
842
1575
  }
843
1576
  let promptPayload = buildAttachmentPromptPayload();
844
1577
  if (usingPreprocessedMarkdown) {
@@ -847,18 +1580,21 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
847
1580
  }
848
1581
  promptPayload = buildMarkitdownPromptPayload(preprocessedMarkdown);
849
1582
  }
850
- if (!usingPreprocessedMarkdown) {
1583
+ if (!usingPreprocessedMarkdown &&
1584
+ fixedModelSpec &&
1585
+ fixedModelSpec.transport !== 'cli' &&
1586
+ preprocessMode !== 'off') {
1587
+ const fixedParsed = parseGatewayStyleModelId(fixedModelSpec.llmModelId);
851
1588
  try {
852
1589
  assertProviderSupportsAttachment({
853
- provider: parsedModel.provider,
854
- modelId: parsedModel.canonical,
1590
+ provider: fixedParsed.provider,
1591
+ modelId: fixedModelSpec.userModelId,
855
1592
  attachment: { part: attachment.part, mediaType: attachment.mediaType },
856
1593
  });
857
1594
  }
858
1595
  catch (error) {
859
1596
  if (!canPreprocessWithMarkitdown) {
860
1597
  if (format === 'markdown' &&
861
- preprocessMode !== 'off' &&
862
1598
  attachment.part.type === 'file' &&
863
1599
  shouldMarkitdownConvertMediaType(attachment.mediaType) &&
864
1600
  !hasUvxCli(env)) {
@@ -891,199 +1627,192 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
891
1627
  throw new Error(`Preprocessed Markdown too large (${formatBytes(Buffer.byteLength(preprocessedMarkdown, 'utf8'))}). Limit is ${formatBytes(MAX_TEXT_BYTES_DEFAULT)}.`);
892
1628
  }
893
1629
  usingPreprocessedMarkdown = true;
1630
+ assetFooterParts.push(`markitdown(${attachment.mediaType})`);
894
1631
  promptPayload = buildMarkitdownPromptPayload(preprocessedMarkdown);
895
1632
  }
896
1633
  }
897
- const maxInputTokensForCall = await resolveMaxInputTokensForCall(parsedModelEffective.canonical);
898
- if (typeof maxInputTokensForCall === 'number' &&
899
- Number.isFinite(maxInputTokensForCall) &&
900
- maxInputTokensForCall > 0 &&
901
- typeof promptPayload === 'string') {
902
- const tokenCount = countTokens(promptPayload);
903
- if (tokenCount > maxInputTokensForCall) {
904
- throw new Error(`Input token count (${formatCount(tokenCount)}) exceeds model input limit (${formatCount(maxInputTokensForCall)}). Tokenized with GPT tokenizer; prompt included.`);
1634
+ const promptTokensForAuto = typeof promptPayload === 'string' ? countTokens(promptPayload) : null;
1635
+ const lowerMediaType = attachment.mediaType.toLowerCase();
1636
+ const kind = lowerMediaType.startsWith('video/')
1637
+ ? 'video'
1638
+ : lowerMediaType.startsWith('image/')
1639
+ ? 'image'
1640
+ : textContent
1641
+ ? 'text'
1642
+ : 'file';
1643
+ const requiresVideoUnderstanding = kind === 'video' && videoMode !== 'transcript';
1644
+ const attempts = await (async () => {
1645
+ if (isFallbackModel) {
1646
+ const catalog = await getLiteLlmCatalog();
1647
+ const all = buildAutoModelAttempts({
1648
+ kind,
1649
+ promptTokens: promptTokensForAuto,
1650
+ desiredOutputTokens,
1651
+ requiresVideoUnderstanding,
1652
+ env: envForAuto,
1653
+ config: configForModelSelection,
1654
+ catalog,
1655
+ openrouterProvidersFromEnv: null,
1656
+ cliAvailability,
1657
+ });
1658
+ const mapped = all.map((attempt) => {
1659
+ if (attempt.transport !== 'cli')
1660
+ return attempt;
1661
+ const parsed = parseCliUserModelId(attempt.userModelId);
1662
+ return { ...attempt, cliProvider: parsed.provider, cliModel: parsed.model };
1663
+ });
1664
+ const filtered = mapped.filter((a) => {
1665
+ if (a.transport === 'cli')
1666
+ return true;
1667
+ if (!a.llmModelId)
1668
+ return false;
1669
+ const parsed = parseGatewayStyleModelId(a.llmModelId);
1670
+ if (parsed.provider === 'xai' &&
1671
+ attachment.part.type === 'file' &&
1672
+ !isTextLikeMediaType(attachment.mediaType)) {
1673
+ return false;
1674
+ }
1675
+ return true;
1676
+ });
1677
+ return filtered;
1678
+ }
1679
+ if (!fixedModelSpec) {
1680
+ throw new Error('Internal error: missing fixed model spec');
1681
+ }
1682
+ if (fixedModelSpec.transport === 'cli') {
1683
+ return [
1684
+ {
1685
+ transport: 'cli',
1686
+ userModelId: fixedModelSpec.userModelId,
1687
+ llmModelId: null,
1688
+ cliProvider: fixedModelSpec.cliProvider,
1689
+ cliModel: fixedModelSpec.cliModel,
1690
+ openrouterProviders: null,
1691
+ forceOpenRouter: false,
1692
+ requiredEnv: fixedModelSpec.requiredEnv,
1693
+ },
1694
+ ];
1695
+ }
1696
+ return [
1697
+ {
1698
+ transport: fixedModelSpec.transport === 'openrouter' ? 'openrouter' : 'native',
1699
+ userModelId: fixedModelSpec.userModelId,
1700
+ llmModelId: fixedModelSpec.llmModelId,
1701
+ openrouterProviders: fixedModelSpec.openrouterProviders,
1702
+ forceOpenRouter: fixedModelSpec.forceOpenRouter,
1703
+ requiredEnv: fixedModelSpec.requiredEnv,
1704
+ },
1705
+ ];
1706
+ })();
1707
+ const cliContext = await (async () => {
1708
+ if (!attempts.some((a) => a.transport === 'cli'))
1709
+ return null;
1710
+ if (typeof promptPayload === 'string')
1711
+ return null;
1712
+ const needsPathPrompt = attachment.part.type === 'image' || attachment.part.type === 'file';
1713
+ if (!needsPathPrompt)
1714
+ return null;
1715
+ const filePath = await ensureCliAttachmentPath({ sourceKind, sourceLabel, attachment });
1716
+ const dir = path.dirname(filePath);
1717
+ const extraArgsByProvider = {
1718
+ gemini: ['--include-directories', dir],
1719
+ codex: attachment.part.type === 'image' ? ['-i', filePath] : undefined,
1720
+ };
1721
+ return {
1722
+ promptOverride: buildPathSummaryPrompt({
1723
+ kindLabel: attachment.part.type === 'image' ? 'image' : 'file',
1724
+ filePath,
1725
+ filename: attachment.filename,
1726
+ mediaType: attachment.mediaType,
1727
+ summaryLength: summaryLengthTarget,
1728
+ }),
1729
+ allowTools: true,
1730
+ cwd: dir,
1731
+ extraArgsByProvider,
1732
+ };
1733
+ })();
1734
+ let summaryResult = null;
1735
+ let usedAttempt = null;
1736
+ let lastError = null;
1737
+ let sawOpenRouterNoAllowedProviders = false;
1738
+ const missingRequiredEnvs = new Set();
1739
+ for (const attempt of attempts) {
1740
+ const hasKey = envHasKeyFor(attempt.requiredEnv);
1741
+ if (!hasKey) {
1742
+ if (isFallbackModel) {
1743
+ if (isNamedModelSelection) {
1744
+ missingRequiredEnvs.add(attempt.requiredEnv);
1745
+ continue;
1746
+ }
1747
+ writeVerbose(stderr, verbose, `auto skip ${attempt.userModelId}: missing ${attempt.requiredEnv}`, verboseColor);
1748
+ continue;
1749
+ }
1750
+ throw new Error(formatMissingModelError(attempt));
905
1751
  }
906
- }
907
- const shouldBufferSummaryForRender = streamingEnabledForCall && effectiveRenderMode === 'md' && isRichTty(stdout);
908
- const shouldLiveRenderSummary = streamingEnabledForCall && effectiveRenderMode === 'md-live' && isRichTty(stdout);
909
- const shouldStreamSummaryToStdout = streamingEnabledForCall && !shouldBufferSummaryForRender && !shouldLiveRenderSummary;
910
- let summaryAlreadyPrinted = false;
911
- let summary = '';
912
- let getLastStreamError = null;
913
- if (streamingEnabledForCall) {
914
- let streamResult = null;
915
1752
  try {
916
- streamResult = await streamTextWithModelId({
917
- modelId: parsedModelEffective.canonical,
918
- apiKeys: apiKeysForLlm,
919
- openrouter: openrouterOptions,
1753
+ summaryResult = await runSummaryAttempt({
1754
+ attempt,
920
1755
  prompt: promptPayload,
921
- temperature: 0,
922
- maxOutputTokens: maxOutputTokensForCall ?? undefined,
923
- timeoutMs,
924
- fetchImpl: trackedFetch,
1756
+ allowStreaming: requestedModel.kind === 'fixed',
1757
+ onModelChosen: onModelChosen ?? null,
1758
+ cli: cliContext,
925
1759
  });
1760
+ usedAttempt = attempt;
1761
+ break;
926
1762
  }
927
1763
  catch (error) {
928
- if (isStreamingTimeoutError(error)) {
929
- writeVerbose(stderr, verbose, `Streaming timed out for ${parsedModelEffective.canonical}; falling back to non-streaming.`, verboseColor);
930
- const result = await summarizeWithModelId({
931
- modelId: parsedModelEffective.canonical,
932
- prompt: promptPayload,
933
- maxOutputTokens: maxOutputTokensForCall ?? undefined,
934
- timeoutMs,
935
- fetchImpl: trackedFetch,
936
- apiKeys: apiKeysForLlm,
937
- openrouter: openrouterOptions,
938
- });
939
- llmCalls.push({
940
- provider: result.provider,
941
- model: result.canonicalModelId,
942
- usage: result.usage,
943
- purpose: 'summary',
944
- });
945
- summary = result.text;
946
- streamResult = null;
1764
+ lastError = error;
1765
+ if (isNamedModelSelection &&
1766
+ error instanceof Error &&
1767
+ /No allowed providers are available for the selected model/i.test(error.message)) {
1768
+ sawOpenRouterNoAllowedProviders = true;
947
1769
  }
948
- else if (parsedModelEffective.provider === 'google' &&
949
- isGoogleStreamingUnsupportedError(error)) {
950
- writeVerbose(stderr, verbose, `Google model ${parsedModelEffective.canonical} rejected streamGenerateContent; falling back to non-streaming.`, verboseColor);
951
- const result = await summarizeWithModelId({
952
- modelId: parsedModelEffective.canonical,
953
- prompt: promptPayload,
954
- maxOutputTokens: maxOutputTokensForCall ?? undefined,
955
- timeoutMs,
956
- fetchImpl: trackedFetch,
957
- apiKeys: apiKeysForLlm,
958
- openrouter: openrouterOptions,
959
- });
960
- llmCalls.push({
961
- provider: result.provider,
962
- model: result.canonicalModelId,
963
- usage: result.usage,
964
- purpose: 'summary',
965
- });
966
- summary = result.text;
967
- streamResult = null;
968
- }
969
- else if (isUnsupportedAttachmentError(error)) {
970
- throw new Error(`Model ${parsedModel.canonical} does not support attaching files of type ${attachment.mediaType}. Try a different --model (e.g. google/gemini-3-flash-preview).`, { cause: error });
971
- }
972
- else {
1770
+ if (requestedModel.kind === 'fixed') {
1771
+ if (isUnsupportedAttachmentError(error)) {
1772
+ throw new Error(`Model ${attempt.userModelId} does not support attaching files of type ${attachment.mediaType}. Try a different --model.`, { cause: error });
1773
+ }
973
1774
  throw error;
974
1775
  }
1776
+ writeVerbose(stderr, verbose, `auto failed ${attempt.userModelId}: ${error instanceof Error ? error.message : String(error)}`, verboseColor);
975
1777
  }
976
- if (streamResult) {
977
- getLastStreamError = streamResult.lastError;
978
- let streamed = '';
979
- const liveRenderer = shouldLiveRenderSummary
980
- ? createLiveRenderer({
981
- write: (chunk) => {
982
- clearProgressForStdout();
983
- stdout.write(chunk);
984
- },
985
- width: markdownRenderWidth(stdout, env),
986
- renderFrame: (markdown) => renderMarkdownAnsi(markdown, {
987
- width: markdownRenderWidth(stdout, env),
988
- wrap: true,
989
- color: supportsColor(stdout, env),
990
- }),
991
- })
992
- : null;
993
- let lastFrameAtMs = 0;
994
- try {
995
- try {
996
- let cleared = false;
997
- for await (const delta of streamResult.textStream) {
998
- if (!cleared) {
999
- clearProgressForStdout();
1000
- cleared = true;
1001
- }
1002
- const merged = mergeStreamingChunk(streamed, delta);
1003
- streamed = merged.next;
1004
- if (shouldStreamSummaryToStdout) {
1005
- if (merged.appended)
1006
- stdout.write(merged.appended);
1007
- continue;
1008
- }
1009
- if (liveRenderer) {
1010
- const now = Date.now();
1011
- const due = now - lastFrameAtMs >= 120;
1012
- const hasNewline = delta.includes('\n');
1013
- if (hasNewline || due) {
1014
- liveRenderer.render(streamed);
1015
- lastFrameAtMs = now;
1016
- }
1017
- }
1018
- }
1019
- }
1020
- catch (error) {
1021
- if (isUnsupportedAttachmentError(error)) {
1022
- throw new Error(`Model ${parsedModel.canonical} does not support attaching files of type ${attachment.mediaType}. Try a different --model (e.g. google/gemini-3-flash-preview).`, { cause: error });
1023
- }
1024
- throw error;
1025
- }
1026
- const trimmed = streamed.trim();
1027
- streamed = trimmed;
1028
- if (liveRenderer) {
1029
- liveRenderer.render(trimmed);
1030
- summaryAlreadyPrinted = true;
1031
- }
1032
- }
1033
- finally {
1034
- liveRenderer?.finish();
1778
+ }
1779
+ if (!summaryResult || !usedAttempt) {
1780
+ const withFreeTip = (message) => {
1781
+ if (!isNamedModelSelection || !wantsFreeNamedModel)
1782
+ return message;
1783
+ return (`${message}\n` +
1784
+ `Tip: run "summarize refresh-free" to refresh the free model candidates (writes ~/.summarize/config.json).`);
1785
+ };
1786
+ if (isNamedModelSelection) {
1787
+ if (lastError === null && missingRequiredEnvs.size > 0) {
1788
+ throw new Error(withFreeTip(`Missing ${Array.from(missingRequiredEnvs).sort().join(', ')} for --model ${requestedModelInput}.`));
1035
1789
  }
1036
- const usage = await streamResult.usage;
1037
- llmCalls.push({
1038
- provider: streamResult.provider,
1039
- model: streamResult.canonicalModelId,
1040
- usage,
1041
- purpose: 'summary',
1042
- });
1043
- summary = streamed;
1044
- if (shouldStreamSummaryToStdout) {
1045
- if (!streamed.endsWith('\n')) {
1046
- stdout.write('\n');
1790
+ if (lastError instanceof Error) {
1791
+ if (sawOpenRouterNoAllowedProviders) {
1792
+ const message = await buildOpenRouterNoAllowedProvidersMessage({
1793
+ attempts,
1794
+ fetchImpl: trackedFetch,
1795
+ timeoutMs,
1796
+ });
1797
+ throw new Error(withFreeTip(message), { cause: lastError });
1047
1798
  }
1048
- summaryAlreadyPrinted = true;
1799
+ throw new Error(withFreeTip(lastError.message), { cause: lastError });
1049
1800
  }
1801
+ throw new Error(withFreeTip(`No model available for --model ${requestedModelInput}`));
1050
1802
  }
1051
- }
1052
- else {
1053
- let result;
1054
- try {
1055
- result = await summarizeWithModelId({
1056
- modelId: parsedModelEffective.canonical,
1057
- prompt: promptPayload,
1058
- maxOutputTokens: maxOutputTokensForCall ?? undefined,
1059
- timeoutMs,
1060
- fetchImpl: trackedFetch,
1061
- apiKeys: apiKeysForLlm,
1062
- openrouter: openrouterOptions,
1063
- });
1064
- }
1065
- catch (error) {
1066
- if (isUnsupportedAttachmentError(error)) {
1067
- throw new Error(`Model ${parsedModel.canonical} does not support attaching files of type ${attachment.mediaType}. Try a different --model (e.g. google/gemini-3-flash-preview).`, { cause: error });
1803
+ if (textContent) {
1804
+ clearProgressForStdout();
1805
+ stdout.write(`${textContent.content.trim()}\n`);
1806
+ if (assetFooterParts.length > 0) {
1807
+ writeViaFooter([...assetFooterParts, 'no model']);
1068
1808
  }
1069
- throw error;
1070
- }
1071
- llmCalls.push({
1072
- provider: result.provider,
1073
- model: result.canonicalModelId,
1074
- usage: result.usage,
1075
- purpose: 'summary',
1076
- });
1077
- summary = result.text;
1078
- }
1079
- summary = summary.trim();
1080
- if (summary.length === 0) {
1081
- const last = getLastStreamError?.();
1082
- if (last instanceof Error) {
1083
- throw new Error(last.message, { cause: last });
1809
+ return;
1084
1810
  }
1085
- throw new Error('LLM returned an empty summary');
1811
+ if (lastError instanceof Error)
1812
+ throw lastError;
1813
+ throw new Error('No model available for this input');
1086
1814
  }
1815
+ const { summary, summaryAlreadyPrinted, modelMeta, maxOutputTokensForCall } = summaryResult;
1087
1816
  const extracted = {
1088
1817
  kind: 'asset',
1089
1818
  source: sourceLabel,
@@ -1102,7 +1831,7 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
1102
1831
  ? { kind: 'preset', preset: lengthArg.preset }
1103
1832
  : { kind: 'chars', maxCharacters: lengthArg.maxCharacters },
1104
1833
  maxOutputTokens: maxOutputTokensArg,
1105
- model,
1834
+ model: requestedModelLabel,
1106
1835
  }
1107
1836
  : {
1108
1837
  kind: 'asset-url',
@@ -1112,13 +1841,14 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
1112
1841
  ? { kind: 'preset', preset: lengthArg.preset }
1113
1842
  : { kind: 'chars', maxCharacters: lengthArg.maxCharacters },
1114
1843
  maxOutputTokens: maxOutputTokensArg,
1115
- model,
1844
+ model: requestedModelLabel,
1116
1845
  };
1117
1846
  const payload = {
1118
1847
  input,
1119
1848
  env: {
1120
1849
  hasXaiKey: Boolean(xaiApiKey),
1121
1850
  hasOpenAIKey: Boolean(apiKey),
1851
+ hasOpenRouterKey: Boolean(openrouterApiKey),
1122
1852
  hasApifyToken: Boolean(apifyToken),
1123
1853
  hasFirecrawlKey: firecrawlConfigured,
1124
1854
  hasGoogleKey: googleConfigured,
@@ -1127,26 +1857,25 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
1127
1857
  extracted,
1128
1858
  prompt: promptText,
1129
1859
  llm: {
1130
- provider: parsedModelEffective.provider,
1131
- model: parsedModelEffective.canonical,
1860
+ provider: modelMeta.provider,
1861
+ model: usedAttempt.userModelId,
1132
1862
  maxCompletionTokens: maxOutputTokensForCall,
1133
1863
  strategy: 'single',
1134
1864
  },
1135
1865
  metrics: metricsEnabled ? finishReport : null,
1136
1866
  summary,
1137
1867
  };
1138
- if (metricsDetailed && finishReport) {
1139
- writeMetricsReport(finishReport);
1140
- }
1141
1868
  stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
1142
1869
  if (metricsEnabled && finishReport) {
1143
1870
  const costUsd = await estimateCostUsd();
1144
1871
  writeFinishLine({
1145
1872
  stderr,
1146
1873
  elapsedMs: Date.now() - runStartedAtMs,
1147
- model: parsedModelEffective.canonical,
1874
+ model: usedAttempt.userModelId,
1148
1875
  report: finishReport,
1149
1876
  costUsd,
1877
+ detailed: metricsDetailed,
1878
+ extraParts: null,
1150
1879
  color: verboseColor,
1151
1880
  });
1152
1881
  }
@@ -1166,17 +1895,18 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
1166
1895
  stdout.write('\n');
1167
1896
  }
1168
1897
  }
1898
+ writeViaFooter([...assetFooterParts, `model ${usedAttempt.userModelId}`]);
1169
1899
  const report = shouldComputeReport ? await buildReport() : null;
1170
- if (metricsDetailed && report)
1171
- writeMetricsReport(report);
1172
1900
  if (metricsEnabled && report) {
1173
1901
  const costUsd = await estimateCostUsd();
1174
1902
  writeFinishLine({
1175
1903
  stderr,
1176
1904
  elapsedMs: Date.now() - runStartedAtMs,
1177
- model: parsedModelEffective.canonical,
1905
+ model: usedAttempt.userModelId,
1178
1906
  report,
1179
1907
  costUsd,
1908
+ detailed: metricsDetailed,
1909
+ extraParts: null,
1180
1910
  color: verboseColor,
1181
1911
  });
1182
1912
  }
@@ -1226,6 +1956,16 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
1226
1956
  sourceKind: 'file',
1227
1957
  sourceLabel: loaded.sourceLabel,
1228
1958
  attachment: loaded.attachment,
1959
+ onModelChosen: (modelId) => {
1960
+ if (!progressEnabled)
1961
+ return;
1962
+ const mt = loaded.attachment.mediaType;
1963
+ const name = loaded.attachment.filename;
1964
+ const details = sizeLabel ? `${mt}, ${sizeLabel}` : mt;
1965
+ spinner.setText(name
1966
+ ? `Summarizing ${name} (${details}, model: ${modelId})…`
1967
+ : `Summarizing ${details} (model: ${modelId})…`);
1968
+ },
1229
1969
  });
1230
1970
  return;
1231
1971
  }
@@ -1281,6 +2021,11 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
1281
2021
  sourceKind: 'asset-url',
1282
2022
  sourceLabel: loaded.sourceLabel,
1283
2023
  attachment: loaded.attachment,
2024
+ onModelChosen: (modelId) => {
2025
+ if (!progressEnabled)
2026
+ return;
2027
+ spinner.setText(`Summarizing (model: ${modelId})…`);
2028
+ },
1284
2029
  });
1285
2030
  return;
1286
2031
  }
@@ -1310,41 +2055,106 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
1310
2055
  }
1311
2056
  const markdownRequested = wantsMarkdown;
1312
2057
  const effectiveMarkdownMode = markdownRequested ? markdownMode : 'off';
1313
- const hasKeyForModel = parsedModelForLlm.provider === 'xai'
1314
- ? xaiConfigured
1315
- : parsedModelForLlm.provider === 'google'
1316
- ? googleConfigured
1317
- : parsedModelForLlm.provider === 'anthropic'
1318
- ? anthropicConfigured
1319
- : Boolean(apiKey);
1320
- const markdownProvider = hasKeyForModel ? parsedModelForLlm.provider : 'none';
1321
- if (markdownRequested && effectiveMarkdownMode === 'llm' && !hasKeyForModel) {
1322
- const required = parsedModelForLlm.provider === 'xai'
1323
- ? 'XAI_API_KEY'
1324
- : parsedModelForLlm.provider === 'google'
1325
- ? 'GEMINI_API_KEY (or GOOGLE_GENERATIVE_AI_API_KEY / GOOGLE_API_KEY)'
1326
- : parsedModelForLlm.provider === 'anthropic'
1327
- ? 'ANTHROPIC_API_KEY'
1328
- : 'OPENAI_API_KEY';
1329
- throw new Error(`--markdown-mode llm requires ${required} for model ${parsedModelForLlm.canonical}`);
2058
+ const markdownModel = (() => {
2059
+ if (!markdownRequested)
2060
+ return null;
2061
+ // Prefer the explicitly chosen model when it is a native provider (keeps behavior stable).
2062
+ if (requestedModel.kind === 'fixed' && requestedModel.transport === 'native') {
2063
+ return { llmModelId: requestedModel.llmModelId, forceOpenRouter: false };
2064
+ }
2065
+ // Otherwise pick a safe, broadly-capable default for HTML→Markdown conversion.
2066
+ if (googleConfigured) {
2067
+ return { llmModelId: 'google/gemini-3-flash-preview', forceOpenRouter: false };
2068
+ }
2069
+ if (apiKey) {
2070
+ return { llmModelId: 'openai/gpt-5-mini', forceOpenRouter: false };
2071
+ }
2072
+ if (openrouterConfigured) {
2073
+ return { llmModelId: 'openai/openai/gpt-5-mini', forceOpenRouter: true };
2074
+ }
2075
+ if (anthropicConfigured) {
2076
+ return { llmModelId: 'anthropic/claude-sonnet-4-5', forceOpenRouter: false };
2077
+ }
2078
+ if (xaiConfigured) {
2079
+ return { llmModelId: 'xai/grok-4-fast-non-reasoning', forceOpenRouter: false };
2080
+ }
2081
+ return null;
2082
+ })();
2083
+ const markdownProvider = (() => {
2084
+ if (!markdownModel)
2085
+ return 'none';
2086
+ const parsed = parseGatewayStyleModelId(markdownModel.llmModelId);
2087
+ return parsed.provider;
2088
+ })();
2089
+ const hasKeyForMarkdownModel = (() => {
2090
+ if (!markdownModel)
2091
+ return false;
2092
+ if (markdownModel.forceOpenRouter)
2093
+ return openrouterConfigured;
2094
+ const parsed = parseGatewayStyleModelId(markdownModel.llmModelId);
2095
+ return parsed.provider === 'xai'
2096
+ ? xaiConfigured
2097
+ : parsed.provider === 'google'
2098
+ ? googleConfigured
2099
+ : parsed.provider === 'anthropic'
2100
+ ? anthropicConfigured
2101
+ : Boolean(apiKey);
2102
+ })();
2103
+ if (markdownRequested && effectiveMarkdownMode === 'llm' && !hasKeyForMarkdownModel) {
2104
+ const required = (() => {
2105
+ if (markdownModel?.forceOpenRouter)
2106
+ return 'OPENROUTER_API_KEY';
2107
+ if (markdownModel) {
2108
+ const parsed = parseGatewayStyleModelId(markdownModel.llmModelId);
2109
+ return parsed.provider === 'xai'
2110
+ ? 'XAI_API_KEY'
2111
+ : parsed.provider === 'google'
2112
+ ? 'GEMINI_API_KEY (or GOOGLE_GENERATIVE_AI_API_KEY / GOOGLE_API_KEY)'
2113
+ : parsed.provider === 'anthropic'
2114
+ ? 'ANTHROPIC_API_KEY'
2115
+ : 'OPENAI_API_KEY';
2116
+ }
2117
+ return 'GEMINI_API_KEY (or GOOGLE_GENERATIVE_AI_API_KEY / GOOGLE_API_KEY)';
2118
+ })();
2119
+ throw new Error(`--markdown-mode llm requires ${required}`);
1330
2120
  }
1331
- writeVerbose(stderr, verbose, `config url=${url} timeoutMs=${timeoutMs} youtube=${youtubeMode} firecrawl=${firecrawlMode} length=${lengthArg.kind === 'preset' ? lengthArg.preset : `${lengthArg.maxCharacters} chars`} maxOutputTokens=${formatOptionalNumber(maxOutputTokensArg)} json=${json} extract=${extractMode} format=${format} preprocess=${preprocessMode} markdownMode=${markdownMode} model=${model} stream=${effectiveStreamMode} render=${effectiveRenderMode}`, verboseColor);
1332
- writeVerbose(stderr, verbose, `configFile path=${formatOptionalString(configPath)} model=${formatOptionalString(config?.model ?? null)}`, verboseColor);
2121
+ writeVerbose(stderr, verbose, `config url=${url} timeoutMs=${timeoutMs} youtube=${youtubeMode} firecrawl=${firecrawlMode} length=${lengthArg.kind === 'preset' ? lengthArg.preset : `${lengthArg.maxCharacters} chars`} maxOutputTokens=${formatOptionalNumber(maxOutputTokensArg)} retries=${retries} json=${json} extract=${extractMode} format=${format} preprocess=${preprocessMode} markdownMode=${markdownMode} model=${requestedModelLabel} videoMode=${videoMode} stream=${effectiveStreamMode} render=${effectiveRenderMode}`, verboseColor);
2122
+ writeVerbose(stderr, verbose, `configFile path=${formatOptionalString(configPath)} model=${formatOptionalString((() => {
2123
+ const model = config?.model;
2124
+ if (!model)
2125
+ return null;
2126
+ if ('id' in model)
2127
+ return model.id;
2128
+ if ('name' in model)
2129
+ return model.name;
2130
+ if ('mode' in model && model.mode === 'auto')
2131
+ return 'auto';
2132
+ return null;
2133
+ })())}`, verboseColor);
1333
2134
  writeVerbose(stderr, verbose, `env xaiKey=${xaiConfigured} openaiKey=${Boolean(apiKey)} googleKey=${googleConfigured} anthropicKey=${anthropicConfigured} openrouterKey=${openrouterConfigured} apifyToken=${Boolean(apifyToken)} firecrawlKey=${firecrawlConfigured}`, verboseColor);
1334
2135
  writeVerbose(stderr, verbose, `markdown requested=${markdownRequested} provider=${markdownProvider}`, verboseColor);
1335
2136
  const scrapeWithFirecrawl = firecrawlConfigured && firecrawlMode !== 'off'
1336
2137
  ? createFirecrawlScraper({ apiKey: firecrawlApiKey, fetchImpl: trackedFetch })
1337
2138
  : null;
1338
- const llmHtmlToMarkdown = markdownRequested && (effectiveMarkdownMode === 'llm' || markdownProvider !== 'none')
2139
+ const llmHtmlToMarkdown = markdownRequested &&
2140
+ markdownModel !== null &&
2141
+ (effectiveMarkdownMode === 'llm' || markdownProvider !== 'none')
1339
2142
  ? createHtmlToMarkdownConverter({
1340
- modelId: model,
2143
+ modelId: markdownModel.llmModelId,
2144
+ forceOpenRouter: markdownModel.forceOpenRouter,
1341
2145
  xaiApiKey: xaiConfigured ? xaiApiKey : null,
1342
2146
  googleApiKey: googleConfigured ? googleApiKey : null,
1343
2147
  openaiApiKey: apiKey,
1344
2148
  anthropicApiKey: anthropicConfigured ? anthropicApiKey : null,
1345
2149
  openrouterApiKey: openrouterConfigured ? openrouterApiKey : null,
1346
- openrouter: openrouterOptions,
1347
2150
  fetchImpl: trackedFetch,
2151
+ retries,
2152
+ onRetry: createRetryLogger({
2153
+ stderr,
2154
+ verbose,
2155
+ color: verboseColor,
2156
+ modelId: markdownModel.llmModelId,
2157
+ }),
1348
2158
  onUsage: ({ model: usedModel, provider, usage }) => {
1349
2159
  llmCalls.push({ provider, model: usedModel, usage, purpose: 'markdown' });
1350
2160
  },
@@ -1568,19 +2378,43 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
1568
2378
  catch (error) {
1569
2379
  throw withBirdTip(error, url, env);
1570
2380
  }
1571
- const extractedContentBytes = Buffer.byteLength(extracted.content, 'utf8');
1572
- const extractedContentSize = formatBytes(extractedContentBytes);
1573
- const viaSources = [];
1574
- if (extracted.diagnostics.strategy === 'bird') {
1575
- viaSources.push('bird');
1576
- }
1577
- if (extracted.diagnostics.strategy === 'nitter') {
1578
- viaSources.push('Nitter');
1579
- }
1580
- if (extracted.diagnostics.firecrawl.used) {
1581
- viaSources.push('Firecrawl');
1582
- }
1583
- const viaSourceLabel = viaSources.length > 0 ? `, ${viaSources.join('+')}` : '';
2381
+ let extractedContentSize = 'unknown';
2382
+ let viaSourceLabel = '';
2383
+ let footerBaseParts = [];
2384
+ const recomputeExtractionUi = () => {
2385
+ const extractedContentBytes = Buffer.byteLength(extracted.content, 'utf8');
2386
+ extractedContentSize = formatBytes(extractedContentBytes);
2387
+ const viaSources = [];
2388
+ if (extracted.diagnostics.strategy === 'bird') {
2389
+ viaSources.push('bird');
2390
+ }
2391
+ if (extracted.diagnostics.strategy === 'nitter') {
2392
+ viaSources.push('Nitter');
2393
+ }
2394
+ if (extracted.diagnostics.firecrawl.used) {
2395
+ viaSources.push('Firecrawl');
2396
+ }
2397
+ viaSourceLabel = viaSources.length > 0 ? `, ${viaSources.join('+')}` : '';
2398
+ footerBaseParts = [];
2399
+ if (extracted.diagnostics.strategy === 'html')
2400
+ footerBaseParts.push('html');
2401
+ if (extracted.diagnostics.strategy === 'bird')
2402
+ footerBaseParts.push('bird');
2403
+ if (extracted.diagnostics.strategy === 'nitter')
2404
+ footerBaseParts.push('nitter');
2405
+ if (extracted.diagnostics.firecrawl.used)
2406
+ footerBaseParts.push('firecrawl');
2407
+ if (extracted.diagnostics.markdown.used) {
2408
+ footerBaseParts.push(extracted.diagnostics.markdown.provider === 'llm' ? 'html→md llm' : 'markdown');
2409
+ }
2410
+ if (extracted.diagnostics.transcript.textProvided) {
2411
+ footerBaseParts.push(`transcript ${extracted.diagnostics.transcript.provider ?? 'unknown'}`);
2412
+ }
2413
+ if (extracted.isVideoOnly && extracted.video) {
2414
+ footerBaseParts.push(extracted.video.kind === 'youtube' ? 'video youtube' : 'video url');
2415
+ }
2416
+ };
2417
+ recomputeExtractionUi();
1584
2418
  if (progressEnabled) {
1585
2419
  websiteProgress?.stop?.();
1586
2420
  spinner.setText(extractMode
@@ -1602,6 +2436,58 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
1602
2436
  !hasUvxCli(env)) {
1603
2437
  stderr.write(`${UVX_TIP}\n`);
1604
2438
  }
2439
+ if (!isYoutubeUrl && extracted.isVideoOnly && extracted.video) {
2440
+ if (extracted.video.kind === 'youtube') {
2441
+ writeVerbose(stderr, verbose, `video-only page detected; switching to YouTube URL ${extracted.video.url}`, verboseColor);
2442
+ if (progressEnabled) {
2443
+ spinner.setText('Video-only page: fetching YouTube transcript…');
2444
+ }
2445
+ extracted = await client.fetchLinkContent(extracted.video.url, {
2446
+ timeoutMs,
2447
+ youtubeTranscript: youtubeMode,
2448
+ firecrawl: firecrawlMode,
2449
+ format: markdownRequested ? 'markdown' : 'text',
2450
+ });
2451
+ recomputeExtractionUi();
2452
+ if (progressEnabled) {
2453
+ spinner.setText(extractMode
2454
+ ? `Extracted (${extractedContentSize}${viaSourceLabel})`
2455
+ : `Summarizing (sent ${extractedContentSize}${viaSourceLabel})…`);
2456
+ }
2457
+ }
2458
+ else if (extracted.video.kind === 'direct') {
2459
+ const wantsVideoUnderstanding = videoMode === 'understand' || videoMode === 'auto';
2460
+ const canVideoUnderstand = wantsVideoUnderstanding &&
2461
+ googleConfigured &&
2462
+ (requestedModel.kind === 'auto' ||
2463
+ (fixedModelSpec?.transport === 'native' && fixedModelSpec.provider === 'google'));
2464
+ if (canVideoUnderstand) {
2465
+ if (progressEnabled)
2466
+ spinner.setText('Downloading video…');
2467
+ const loadedVideo = await loadRemoteAsset({
2468
+ url: extracted.video.url,
2469
+ fetchImpl: trackedFetch,
2470
+ timeoutMs,
2471
+ });
2472
+ assertAssetMediaTypeSupported({ attachment: loadedVideo.attachment, sizeLabel: null });
2473
+ let chosenModel = null;
2474
+ if (progressEnabled)
2475
+ spinner.setText('Summarizing video…');
2476
+ await summarizeAsset({
2477
+ sourceKind: 'asset-url',
2478
+ sourceLabel: loadedVideo.sourceLabel,
2479
+ attachment: loadedVideo.attachment,
2480
+ onModelChosen: (modelId) => {
2481
+ chosenModel = modelId;
2482
+ if (progressEnabled)
2483
+ spinner.setText(`Summarizing video (model: ${modelId})…`);
2484
+ },
2485
+ });
2486
+ writeViaFooter([...footerBaseParts, ...(chosenModel ? [`model ${chosenModel}`] : [])]);
2487
+ return;
2488
+ }
2489
+ }
2490
+ }
1605
2491
  const isYouTube = extracted.siteName === 'YouTube';
1606
2492
  const prompt = buildLinkSummaryPrompt({
1607
2493
  url: extracted.url,
@@ -1632,11 +2518,12 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
1632
2518
  ? { kind: 'preset', preset: lengthArg.preset }
1633
2519
  : { kind: 'chars', maxCharacters: lengthArg.maxCharacters },
1634
2520
  maxOutputTokens: maxOutputTokensArg,
1635
- model,
2521
+ model: requestedModelLabel,
1636
2522
  },
1637
2523
  env: {
1638
2524
  hasXaiKey: Boolean(xaiApiKey),
1639
2525
  hasOpenAIKey: Boolean(apiKey),
2526
+ hasOpenRouterKey: Boolean(openrouterApiKey),
1640
2527
  hasApifyToken: Boolean(apifyToken),
1641
2528
  hasFirecrawlKey: firecrawlConfigured,
1642
2529
  hasGoogleKey: googleConfigured,
@@ -1648,35 +2535,35 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
1648
2535
  metrics: metricsEnabled ? finishReport : null,
1649
2536
  summary: null,
1650
2537
  };
1651
- if (metricsDetailed && finishReport) {
1652
- writeMetricsReport(finishReport);
1653
- }
1654
2538
  stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
1655
2539
  if (metricsEnabled && finishReport) {
1656
2540
  const costUsd = await estimateCostUsd();
1657
2541
  writeFinishLine({
1658
2542
  stderr,
1659
2543
  elapsedMs: Date.now() - runStartedAtMs,
1660
- model,
2544
+ model: requestedModelLabel,
1661
2545
  report: finishReport,
1662
2546
  costUsd,
2547
+ detailed: metricsDetailed,
2548
+ extraParts: metricsDetailed ? buildDetailedLengthPartsForExtracted(extracted) : null,
1663
2549
  color: verboseColor,
1664
2550
  });
1665
2551
  }
1666
2552
  return;
1667
2553
  }
1668
2554
  stdout.write(`${extracted.content}\n`);
2555
+ writeViaFooter(footerBaseParts);
1669
2556
  const report = shouldComputeReport ? await buildReport() : null;
1670
- if (metricsDetailed && report)
1671
- writeMetricsReport(report);
1672
2557
  if (metricsEnabled && report) {
1673
2558
  const costUsd = await estimateCostUsd();
1674
2559
  writeFinishLine({
1675
2560
  stderr,
1676
2561
  elapsedMs: Date.now() - runStartedAtMs,
1677
- model,
2562
+ model: requestedModelLabel,
1678
2563
  report,
1679
2564
  costUsd,
2565
+ detailed: metricsDetailed,
2566
+ extraParts: metricsDetailed ? buildDetailedLengthPartsForExtracted(extracted) : null,
1680
2567
  color: verboseColor,
1681
2568
  });
1682
2569
  }
@@ -1703,11 +2590,12 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
1703
2590
  ? { kind: 'preset', preset: lengthArg.preset }
1704
2591
  : { kind: 'chars', maxCharacters: lengthArg.maxCharacters },
1705
2592
  maxOutputTokens: maxOutputTokensArg,
1706
- model,
2593
+ model: requestedModelLabel,
1707
2594
  },
1708
2595
  env: {
1709
2596
  hasXaiKey: Boolean(xaiApiKey),
1710
2597
  hasOpenAIKey: Boolean(apiKey),
2598
+ hasOpenRouterKey: Boolean(openrouterApiKey),
1711
2599
  hasApifyToken: Boolean(apifyToken),
1712
2600
  hasFirecrawlKey: firecrawlConfigured,
1713
2601
  hasGoogleKey: googleConfigured,
@@ -1719,248 +2607,212 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
1719
2607
  metrics: metricsEnabled ? finishReport : null,
1720
2608
  summary: extracted.content,
1721
2609
  };
1722
- if (metricsDetailed && finishReport) {
1723
- writeMetricsReport(finishReport);
1724
- }
1725
2610
  stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
1726
2611
  if (metricsEnabled && finishReport) {
1727
2612
  const costUsd = await estimateCostUsd();
1728
2613
  writeFinishLine({
1729
2614
  stderr,
1730
2615
  elapsedMs: Date.now() - runStartedAtMs,
1731
- model,
2616
+ model: requestedModelLabel,
1732
2617
  report: finishReport,
1733
2618
  costUsd,
2619
+ detailed: metricsDetailed,
2620
+ extraParts: metricsDetailed ? buildDetailedLengthPartsForExtracted(extracted) : null,
1734
2621
  color: verboseColor,
1735
2622
  });
1736
2623
  }
1737
2624
  return;
1738
2625
  }
1739
2626
  stdout.write(`${extracted.content}\n`);
2627
+ writeViaFooter(footerBaseParts);
1740
2628
  const report = shouldComputeReport ? await buildReport() : null;
1741
- if (metricsDetailed && report)
1742
- writeMetricsReport(report);
1743
2629
  if (metricsEnabled && report) {
1744
2630
  const costUsd = await estimateCostUsd();
1745
2631
  writeFinishLine({
1746
2632
  stderr,
1747
2633
  elapsedMs: Date.now() - runStartedAtMs,
1748
- model,
2634
+ model: requestedModelLabel,
1749
2635
  report,
1750
2636
  costUsd,
2637
+ detailed: metricsDetailed,
2638
+ extraParts: metricsDetailed ? buildDetailedLengthPartsForExtracted(extracted) : null,
1751
2639
  color: verboseColor,
1752
2640
  });
1753
2641
  }
1754
2642
  return;
1755
2643
  }
1756
- const parsedModel = parseGatewayStyleModelId(model);
1757
- const apiKeysForLlm = {
1758
- xaiApiKey,
1759
- openaiApiKey: apiKey,
1760
- googleApiKey: googleConfigured ? googleApiKey : null,
1761
- anthropicApiKey: anthropicConfigured ? anthropicApiKey : null,
1762
- openrouterApiKey: openrouterConfigured ? openrouterApiKey : null,
2644
+ const promptTokens = countTokens(prompt);
2645
+ const kindForAuto = isYouTube ? 'youtube' : 'website';
2646
+ const attempts = await (async () => {
2647
+ if (isFallbackModel) {
2648
+ const catalog = await getLiteLlmCatalog();
2649
+ const list = buildAutoModelAttempts({
2650
+ kind: kindForAuto,
2651
+ promptTokens,
2652
+ desiredOutputTokens,
2653
+ requiresVideoUnderstanding: false,
2654
+ env: envForAuto,
2655
+ config: configForModelSelection,
2656
+ catalog,
2657
+ openrouterProvidersFromEnv: null,
2658
+ cliAvailability,
2659
+ });
2660
+ if (verbose) {
2661
+ for (const a of list.slice(0, 8)) {
2662
+ writeVerbose(stderr, verbose, `auto candidate ${a.debug}`, verboseColor);
2663
+ }
2664
+ }
2665
+ return list.map((attempt) => {
2666
+ if (attempt.transport !== 'cli')
2667
+ return attempt;
2668
+ const parsed = parseCliUserModelId(attempt.userModelId);
2669
+ return { ...attempt, cliProvider: parsed.provider, cliModel: parsed.model };
2670
+ });
2671
+ }
2672
+ if (!fixedModelSpec) {
2673
+ throw new Error('Internal error: missing fixed model spec');
2674
+ }
2675
+ if (fixedModelSpec.transport === 'cli') {
2676
+ return [
2677
+ {
2678
+ transport: 'cli',
2679
+ userModelId: fixedModelSpec.userModelId,
2680
+ llmModelId: null,
2681
+ cliProvider: fixedModelSpec.cliProvider,
2682
+ cliModel: fixedModelSpec.cliModel,
2683
+ openrouterProviders: null,
2684
+ forceOpenRouter: false,
2685
+ requiredEnv: fixedModelSpec.requiredEnv,
2686
+ },
2687
+ ];
2688
+ }
2689
+ return [
2690
+ {
2691
+ transport: fixedModelSpec.transport === 'openrouter' ? 'openrouter' : 'native',
2692
+ userModelId: fixedModelSpec.userModelId,
2693
+ llmModelId: fixedModelSpec.llmModelId,
2694
+ openrouterProviders: fixedModelSpec.openrouterProviders,
2695
+ forceOpenRouter: fixedModelSpec.forceOpenRouter,
2696
+ requiredEnv: fixedModelSpec.requiredEnv,
2697
+ },
2698
+ ];
2699
+ })();
2700
+ const onModelChosen = (modelId) => {
2701
+ if (!progressEnabled)
2702
+ return;
2703
+ spinner.setText(`Summarizing (sent ${extractedContentSize}${viaSourceLabel}, model: ${modelId})…`);
1763
2704
  };
1764
- const requiredKeyEnv = parsedModel.provider === 'xai'
1765
- ? 'XAI_API_KEY'
1766
- : parsedModel.provider === 'google'
1767
- ? 'GEMINI_API_KEY (or GOOGLE_GENERATIVE_AI_API_KEY / GOOGLE_API_KEY)'
1768
- : parsedModel.provider === 'anthropic'
1769
- ? 'ANTHROPIC_API_KEY'
1770
- : 'OPENAI_API_KEY (or OPENROUTER_API_KEY)';
1771
- const hasRequiredKey = parsedModel.provider === 'xai'
1772
- ? Boolean(xaiApiKey)
1773
- : parsedModel.provider === 'google'
1774
- ? googleConfigured
1775
- : parsedModel.provider === 'anthropic'
1776
- ? anthropicConfigured
1777
- : Boolean(apiKey) || openrouterConfigured;
1778
- if (!hasRequiredKey) {
1779
- throw new Error(`Missing ${requiredKeyEnv} for model ${parsedModel.canonical}. Set the env var or choose a different --model.`);
1780
- }
1781
- const modelResolution = await resolveModelIdForLlmCall({
1782
- parsedModel,
1783
- apiKeys: { googleApiKey: apiKeysForLlm.googleApiKey },
1784
- fetchImpl: trackedFetch,
1785
- timeoutMs,
1786
- });
1787
- if (modelResolution.note && verbose) {
1788
- writeVerbose(stderr, verbose, modelResolution.note, verboseColor);
1789
- }
1790
- const parsedModelEffective = parseGatewayStyleModelId(modelResolution.modelId);
1791
- const streamingEnabledForCall = streamingEnabled && !modelResolution.forceStreamOff;
1792
- writeVerbose(stderr, verbose, `mode summarize provider=${parsedModelEffective.provider} model=${parsedModelEffective.canonical}`, verboseColor);
1793
- const maxOutputTokensForCall = await resolveMaxOutputTokensForCall(parsedModelEffective.canonical);
1794
- const maxInputTokensForCall = await resolveMaxInputTokensForCall(parsedModelEffective.canonical);
1795
- if (typeof maxInputTokensForCall === 'number' &&
1796
- Number.isFinite(maxInputTokensForCall) &&
1797
- maxInputTokensForCall > 0) {
1798
- const tokenCount = countTokens(prompt);
1799
- if (tokenCount > maxInputTokensForCall) {
1800
- throw new Error(`Input token count (${formatCount(tokenCount)}) exceeds model input limit (${formatCount(maxInputTokensForCall)}). Tokenized with GPT tokenizer; prompt included.`);
2705
+ let summaryResult = null;
2706
+ let usedAttempt = null;
2707
+ let lastError = null;
2708
+ let sawOpenRouterNoAllowedProviders = false;
2709
+ const missingRequiredEnvs = new Set();
2710
+ for (const attempt of attempts) {
2711
+ const hasKey = envHasKeyFor(attempt.requiredEnv);
2712
+ if (!hasKey) {
2713
+ if (isFallbackModel) {
2714
+ if (isNamedModelSelection) {
2715
+ missingRequiredEnvs.add(attempt.requiredEnv);
2716
+ continue;
2717
+ }
2718
+ writeVerbose(stderr, verbose, `auto skip ${attempt.userModelId}: missing ${attempt.requiredEnv}`, verboseColor);
2719
+ continue;
2720
+ }
2721
+ throw new Error(formatMissingModelError(attempt));
1801
2722
  }
1802
- }
1803
- const shouldBufferSummaryForRender = streamingEnabledForCall && effectiveRenderMode === 'md' && isRichTty(stdout);
1804
- const shouldLiveRenderSummary = streamingEnabledForCall && effectiveRenderMode === 'md-live' && isRichTty(stdout);
1805
- const shouldStreamSummaryToStdout = streamingEnabledForCall && !shouldBufferSummaryForRender && !shouldLiveRenderSummary;
1806
- let summaryAlreadyPrinted = false;
1807
- let summary = '';
1808
- let getLastStreamError = null;
1809
- writeVerbose(stderr, verbose, 'summarize strategy=single', verboseColor);
1810
- if (streamingEnabledForCall) {
1811
- writeVerbose(stderr, verbose, `summarize stream=on buffered=${shouldBufferSummaryForRender}`, verboseColor);
1812
- let streamResult = null;
1813
2723
  try {
1814
- streamResult = await streamTextWithModelId({
1815
- modelId: parsedModelEffective.canonical,
1816
- apiKeys: apiKeysForLlm,
2724
+ summaryResult = await runSummaryAttempt({
2725
+ attempt,
1817
2726
  prompt,
1818
- temperature: 0,
1819
- maxOutputTokens: maxOutputTokensForCall ?? undefined,
1820
- timeoutMs,
1821
- fetchImpl: trackedFetch,
2727
+ allowStreaming: requestedModel.kind === 'fixed',
2728
+ onModelChosen,
1822
2729
  });
2730
+ usedAttempt = attempt;
2731
+ break;
1823
2732
  }
1824
2733
  catch (error) {
1825
- if (isStreamingTimeoutError(error)) {
1826
- writeVerbose(stderr, verbose, `Streaming timed out for ${parsedModelEffective.canonical}; falling back to non-streaming.`, verboseColor);
1827
- const result = await summarizeWithModelId({
1828
- modelId: parsedModelEffective.canonical,
1829
- prompt,
1830
- maxOutputTokens: maxOutputTokensForCall ?? undefined,
1831
- timeoutMs,
1832
- fetchImpl: trackedFetch,
1833
- apiKeys: apiKeysForLlm,
1834
- openrouter: openrouterOptions,
1835
- });
1836
- llmCalls.push({
1837
- provider: result.provider,
1838
- model: result.canonicalModelId,
1839
- usage: result.usage,
1840
- purpose: 'summary',
1841
- });
1842
- summary = result.text;
1843
- streamResult = null;
2734
+ lastError = error;
2735
+ if (isNamedModelSelection &&
2736
+ error instanceof Error &&
2737
+ /No allowed providers are available for the selected model/i.test(error.message)) {
2738
+ sawOpenRouterNoAllowedProviders = true;
1844
2739
  }
1845
- else if (parsedModelEffective.provider === 'google' &&
1846
- isGoogleStreamingUnsupportedError(error)) {
1847
- writeVerbose(stderr, verbose, `Google model ${parsedModelEffective.canonical} rejected streamGenerateContent; falling back to non-streaming.`, verboseColor);
1848
- const result = await summarizeWithModelId({
1849
- modelId: parsedModelEffective.canonical,
1850
- prompt,
1851
- maxOutputTokens: maxOutputTokensForCall ?? undefined,
1852
- timeoutMs,
1853
- fetchImpl: trackedFetch,
1854
- apiKeys: apiKeysForLlm,
1855
- openrouter: openrouterOptions,
1856
- });
1857
- llmCalls.push({
1858
- provider: result.provider,
1859
- model: result.canonicalModelId,
1860
- usage: result.usage,
1861
- purpose: 'summary',
1862
- });
1863
- summary = result.text;
1864
- streamResult = null;
1865
- }
1866
- else {
2740
+ if (requestedModel.kind === 'fixed') {
1867
2741
  throw error;
1868
2742
  }
2743
+ writeVerbose(stderr, verbose, `auto failed ${attempt.userModelId}: ${error instanceof Error ? error.message : String(error)}`, verboseColor);
1869
2744
  }
1870
- if (streamResult) {
1871
- getLastStreamError = streamResult.lastError;
1872
- let streamed = '';
1873
- const liveRenderer = shouldLiveRenderSummary
1874
- ? createLiveRenderer({
1875
- write: (chunk) => {
1876
- clearProgressForStdout();
1877
- stdout.write(chunk);
1878
- },
1879
- width: markdownRenderWidth(stdout, env),
1880
- renderFrame: (markdown) => renderMarkdownAnsi(markdown, {
1881
- width: markdownRenderWidth(stdout, env),
1882
- wrap: true,
1883
- color: supportsColor(stdout, env),
1884
- }),
1885
- })
1886
- : null;
1887
- let lastFrameAtMs = 0;
1888
- try {
1889
- let cleared = false;
1890
- for await (const delta of streamResult.textStream) {
1891
- const merged = mergeStreamingChunk(streamed, delta);
1892
- streamed = merged.next;
1893
- if (shouldStreamSummaryToStdout) {
1894
- if (!cleared) {
1895
- clearProgressForStdout();
1896
- cleared = true;
1897
- }
1898
- if (merged.appended)
1899
- stdout.write(merged.appended);
1900
- continue;
1901
- }
1902
- if (liveRenderer) {
1903
- const now = Date.now();
1904
- const due = now - lastFrameAtMs >= 120;
1905
- const hasNewline = delta.includes('\n');
1906
- if (hasNewline || due) {
1907
- liveRenderer.render(streamed);
1908
- lastFrameAtMs = now;
1909
- }
1910
- }
1911
- }
1912
- const trimmed = streamed.trim();
1913
- streamed = trimmed;
1914
- if (liveRenderer) {
1915
- liveRenderer.render(trimmed);
1916
- summaryAlreadyPrinted = true;
1917
- }
1918
- }
1919
- finally {
1920
- liveRenderer?.finish();
2745
+ }
2746
+ if (!summaryResult || !usedAttempt) {
2747
+ const withFreeTip = (message) => {
2748
+ if (!isNamedModelSelection || !wantsFreeNamedModel)
2749
+ return message;
2750
+ return (`${message}\n` +
2751
+ `Tip: run "summarize refresh-free" to refresh the free model candidates (writes ~/.summarize/config.json).`);
2752
+ };
2753
+ if (isNamedModelSelection) {
2754
+ if (lastError === null && missingRequiredEnvs.size > 0) {
2755
+ throw new Error(withFreeTip(`Missing ${Array.from(missingRequiredEnvs).sort().join(', ')} for --model ${requestedModelInput}.`));
1921
2756
  }
1922
- const usage = await streamResult.usage;
1923
- llmCalls.push({
1924
- provider: streamResult.provider,
1925
- model: streamResult.canonicalModelId,
1926
- usage,
1927
- purpose: 'summary',
1928
- });
1929
- summary = streamed;
1930
- if (shouldStreamSummaryToStdout) {
1931
- if (!streamed.endsWith('\n')) {
1932
- stdout.write('\n');
2757
+ if (lastError instanceof Error) {
2758
+ if (sawOpenRouterNoAllowedProviders) {
2759
+ const message = await buildOpenRouterNoAllowedProvidersMessage({
2760
+ attempts,
2761
+ fetchImpl: trackedFetch,
2762
+ timeoutMs,
2763
+ });
2764
+ throw new Error(withFreeTip(message), { cause: lastError });
1933
2765
  }
1934
- summaryAlreadyPrinted = true;
2766
+ throw new Error(withFreeTip(lastError.message), { cause: lastError });
1935
2767
  }
2768
+ throw new Error(withFreeTip(`No model available for --model ${requestedModelInput}`));
1936
2769
  }
1937
- }
1938
- else {
1939
- const result = await summarizeWithModelId({
1940
- modelId: parsedModelEffective.canonical,
1941
- prompt,
1942
- maxOutputTokens: maxOutputTokensForCall ?? undefined,
1943
- timeoutMs,
1944
- fetchImpl: trackedFetch,
1945
- apiKeys: apiKeysForLlm,
1946
- openrouter: openrouterOptions,
1947
- });
1948
- llmCalls.push({
1949
- provider: result.provider,
1950
- model: result.canonicalModelId,
1951
- usage: result.usage,
1952
- purpose: 'summary',
1953
- });
1954
- summary = result.text;
1955
- }
1956
- summary = summary.trim();
1957
- if (summary.length === 0) {
1958
- const last = getLastStreamError?.();
1959
- if (last instanceof Error) {
1960
- throw new Error(last.message, { cause: last });
2770
+ clearProgressForStdout();
2771
+ if (json) {
2772
+ const finishReport = shouldComputeReport ? await buildReport() : null;
2773
+ const payload = {
2774
+ input: {
2775
+ kind: 'url',
2776
+ url,
2777
+ timeoutMs,
2778
+ youtube: youtubeMode,
2779
+ firecrawl: firecrawlMode,
2780
+ format,
2781
+ markdown: effectiveMarkdownMode,
2782
+ length: lengthArg.kind === 'preset'
2783
+ ? { kind: 'preset', preset: lengthArg.preset }
2784
+ : { kind: 'chars', maxCharacters: lengthArg.maxCharacters },
2785
+ maxOutputTokens: maxOutputTokensArg,
2786
+ model: requestedModelLabel,
2787
+ },
2788
+ env: {
2789
+ hasXaiKey: Boolean(xaiApiKey),
2790
+ hasOpenAIKey: Boolean(apiKey),
2791
+ hasOpenRouterKey: Boolean(openrouterApiKey),
2792
+ hasApifyToken: Boolean(apifyToken),
2793
+ hasFirecrawlKey: firecrawlConfigured,
2794
+ hasGoogleKey: googleConfigured,
2795
+ hasAnthropicKey: anthropicConfigured,
2796
+ },
2797
+ extracted,
2798
+ prompt,
2799
+ llm: null,
2800
+ metrics: metricsEnabled ? finishReport : null,
2801
+ summary: extracted.content,
2802
+ };
2803
+ stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
2804
+ return;
1961
2805
  }
1962
- throw new Error('LLM returned an empty summary');
2806
+ stdout.write(`${extracted.content}\n`);
2807
+ if (footerBaseParts.length > 0) {
2808
+ writeViaFooter([...footerBaseParts, 'no model']);
2809
+ }
2810
+ if (lastError instanceof Error && verbose) {
2811
+ writeVerbose(stderr, verbose, `auto failed all models: ${lastError.message}`, verboseColor);
2812
+ }
2813
+ return;
1963
2814
  }
2815
+ const { summary, summaryAlreadyPrinted, modelMeta, maxOutputTokensForCall } = summaryResult;
1964
2816
  if (json) {
1965
2817
  const finishReport = shouldComputeReport ? await buildReport() : null;
1966
2818
  const payload = {
@@ -1976,11 +2828,12 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
1976
2828
  ? { kind: 'preset', preset: lengthArg.preset }
1977
2829
  : { kind: 'chars', maxCharacters: lengthArg.maxCharacters },
1978
2830
  maxOutputTokens: maxOutputTokensArg,
1979
- model,
2831
+ model: requestedModelLabel,
1980
2832
  },
1981
2833
  env: {
1982
2834
  hasXaiKey: Boolean(xaiApiKey),
1983
2835
  hasOpenAIKey: Boolean(apiKey),
2836
+ hasOpenRouterKey: Boolean(openrouterApiKey),
1984
2837
  hasApifyToken: Boolean(apifyToken),
1985
2838
  hasFirecrawlKey: firecrawlConfigured,
1986
2839
  hasGoogleKey: googleConfigured,
@@ -1989,26 +2842,25 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
1989
2842
  extracted,
1990
2843
  prompt,
1991
2844
  llm: {
1992
- provider: parsedModelEffective.provider,
1993
- model: parsedModelEffective.canonical,
2845
+ provider: modelMeta.provider,
2846
+ model: usedAttempt.userModelId,
1994
2847
  maxCompletionTokens: maxOutputTokensForCall,
1995
2848
  strategy: 'single',
1996
2849
  },
1997
2850
  metrics: metricsEnabled ? finishReport : null,
1998
2851
  summary,
1999
2852
  };
2000
- if (metricsDetailed && finishReport) {
2001
- writeMetricsReport(finishReport);
2002
- }
2003
2853
  stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
2004
2854
  if (metricsEnabled && finishReport) {
2005
2855
  const costUsd = await estimateCostUsd();
2006
2856
  writeFinishLine({
2007
2857
  stderr,
2008
2858
  elapsedMs: Date.now() - runStartedAtMs,
2009
- model: parsedModelEffective.canonical,
2859
+ model: usedAttempt.userModelId,
2010
2860
  report: finishReport,
2011
2861
  costUsd,
2862
+ detailed: metricsDetailed,
2863
+ extraParts: metricsDetailed ? buildDetailedLengthPartsForExtracted(extracted) : null,
2012
2864
  color: verboseColor,
2013
2865
  });
2014
2866
  }
@@ -2029,16 +2881,16 @@ export async function runCli(argv, { env, fetch, execFile: execFileOverride, std
2029
2881
  }
2030
2882
  }
2031
2883
  const report = shouldComputeReport ? await buildReport() : null;
2032
- if (metricsDetailed && report)
2033
- writeMetricsReport(report);
2034
2884
  if (metricsEnabled && report) {
2035
2885
  const costUsd = await estimateCostUsd();
2036
2886
  writeFinishLine({
2037
2887
  stderr,
2038
2888
  elapsedMs: Date.now() - runStartedAtMs,
2039
- model: parsedModelEffective.canonical,
2889
+ model: modelMeta.canonical,
2040
2890
  report,
2041
2891
  costUsd,
2892
+ detailed: metricsDetailed,
2893
+ extraParts: metricsDetailed ? buildDetailedLengthPartsForExtracted(extracted) : null,
2042
2894
  color: verboseColor,
2043
2895
  });
2044
2896
  }