@steipete/summarize 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/CHANGELOG.md +33 -3
  2. package/README.md +41 -9
  3. package/dist/cli.cjs +5209 -740
  4. package/dist/cli.cjs.map +4 -4
  5. package/dist/esm/content/link-preview/client.js +6 -0
  6. package/dist/esm/content/link-preview/client.js.map +1 -1
  7. package/dist/esm/content/link-preview/transcript/index.js +6 -0
  8. package/dist/esm/content/link-preview/transcript/index.js.map +1 -1
  9. package/dist/esm/content/link-preview/transcript/providers/youtube/yt-dlp.js +213 -0
  10. package/dist/esm/content/link-preview/transcript/providers/youtube/yt-dlp.js.map +1 -0
  11. package/dist/esm/content/link-preview/transcript/providers/youtube.js +40 -2
  12. package/dist/esm/content/link-preview/transcript/providers/youtube.js.map +1 -1
  13. package/dist/esm/flags.js +20 -1
  14. package/dist/esm/flags.js.map +1 -1
  15. package/dist/esm/llm/generate-text.js +51 -14
  16. package/dist/esm/llm/generate-text.js.map +1 -1
  17. package/dist/esm/llm/html-to-markdown.js +3 -2
  18. package/dist/esm/llm/html-to-markdown.js.map +1 -1
  19. package/dist/esm/markitdown.js +54 -0
  20. package/dist/esm/markitdown.js.map +1 -0
  21. package/dist/esm/prompts/file.js +19 -0
  22. package/dist/esm/prompts/file.js.map +1 -1
  23. package/dist/esm/prompts/index.js +1 -1
  24. package/dist/esm/prompts/index.js.map +1 -1
  25. package/dist/esm/run.js +302 -44
  26. package/dist/esm/run.js.map +1 -1
  27. package/dist/esm/version.js +1 -1
  28. package/dist/types/content/link-preview/client.d.ts +3 -0
  29. package/dist/types/content/link-preview/content/types.d.ts +1 -1
  30. package/dist/types/content/link-preview/deps.d.ts +3 -0
  31. package/dist/types/content/link-preview/transcript/providers/youtube/yt-dlp.d.ts +15 -0
  32. package/dist/types/content/link-preview/transcript/types.d.ts +4 -0
  33. package/dist/types/flags.d.ts +5 -1
  34. package/dist/types/llm/generate-text.d.ts +8 -2
  35. package/dist/types/llm/html-to-markdown.d.ts +4 -1
  36. package/dist/types/markitdown.d.ts +10 -0
  37. package/dist/types/prompts/file.d.ts +7 -0
  38. package/dist/types/prompts/index.d.ts +1 -1
  39. package/dist/types/run.d.ts +3 -1
  40. package/dist/types/version.d.ts +1 -1
  41. package/docs/README.md +1 -1
  42. package/docs/extract-only.md +10 -7
  43. package/docs/firecrawl.md +2 -2
  44. package/docs/site/docs/config.html +3 -3
  45. package/docs/site/docs/extract-only.html +7 -5
  46. package/docs/site/docs/firecrawl.html +6 -6
  47. package/docs/site/docs/index.html +2 -2
  48. package/docs/site/docs/llm.html +2 -2
  49. package/docs/site/docs/openai.html +2 -2
  50. package/docs/site/docs/website.html +7 -4
  51. package/docs/site/docs/youtube.html +2 -2
  52. package/docs/site/index.html +1 -1
  53. package/docs/website.md +10 -7
  54. package/docs/youtube.md +6 -3
  55. package/package.json +5 -1
package/dist/esm/run.js CHANGED
@@ -11,17 +11,19 @@ import { buildAssetPromptMessages, classifyUrl, loadLocalAsset, loadRemoteAsset,
11
11
  import { createLinkPreviewClient } from './content/index.js';
12
12
  import { buildRunMetricsReport } from './costs.js';
13
13
  import { createFirecrawlScraper } from './firecrawl.js';
14
- import { parseDurationMs, parseFirecrawlMode, parseLengthArg, parseMarkdownMode, parseMaxOutputTokensArg, parseMetricsMode, parseRenderMode, parseStreamMode, parseYoutubeMode, } from './flags.js';
14
+ import { parseDurationMs, parseExtractFormat, parseFirecrawlMode, parseLengthArg, parseMarkdownMode, parseMaxOutputTokensArg, parseMetricsMode, parsePreprocessMode, parseRenderMode, parseStreamMode, parseYoutubeMode, } from './flags.js';
15
15
  import { generateTextWithModelId, streamTextWithModelId } from './llm/generate-text.js';
16
16
  import { resolveGoogleModelForUsage } from './llm/google-models.js';
17
17
  import { createHtmlToMarkdownConverter } from './llm/html-to-markdown.js';
18
18
  import { normalizeGatewayStyleModelId, parseGatewayStyleModelId } from './llm/model-id.js';
19
+ import { convertToMarkdownWithMarkitdown } from './markitdown.js';
19
20
  import { loadLiteLlmCatalog, resolveLiteLlmMaxInputTokensForModelId, resolveLiteLlmMaxOutputTokensForModelId, resolveLiteLlmPricingForModelId, } from './pricing/litellm.js';
20
- import { buildFileSummaryPrompt, buildLinkSummaryPrompt } from './prompts/index.js';
21
+ import { buildFileSummaryPrompt, buildFileTextSummaryPrompt, buildLinkSummaryPrompt, } from './prompts/index.js';
21
22
  import { startOscProgress } from './tty/osc-progress.js';
22
23
  import { startSpinner } from './tty/spinner.js';
23
24
  import { resolvePackageVersion } from './version.js';
24
25
  const BIRD_TIP = 'Tip: Install bird🐦 for better Twitter support: https://github.com/steipete/bird';
26
+ const UVX_TIP = 'Tip: Install uv (uvx) for local Markdown conversion: brew install uv (or set UVX_PATH to your uvx binary).';
25
27
  const TWITTER_HOSTS = new Set(['x.com', 'twitter.com', 'mobile.twitter.com']);
26
28
  const SUMMARY_LENGTH_MAX_CHARACTERS = {
27
29
  short: 1200,
@@ -58,7 +60,7 @@ function isExecutable(filePath) {
58
60
  }
59
61
  function hasBirdCli(env) {
60
62
  const candidates = [];
61
- const pathEnv = env.PATH ?? process.env.PATH ?? '';
63
+ const pathEnv = env.PATH ?? '';
62
64
  for (const entry of pathEnv.split(path.delimiter)) {
63
65
  if (!entry)
64
66
  continue;
@@ -66,6 +68,19 @@ function hasBirdCli(env) {
66
68
  }
67
69
  return candidates.some((candidate) => isExecutable(candidate));
68
70
  }
71
+ function hasUvxCli(env) {
72
+ if (typeof env.UVX_PATH === 'string' && env.UVX_PATH.trim().length > 0) {
73
+ return true;
74
+ }
75
+ const candidates = [];
76
+ const pathEnv = env.PATH ?? '';
77
+ for (const entry of pathEnv.split(path.delimiter)) {
78
+ if (!entry)
79
+ continue;
80
+ candidates.push(path.join(entry, 'uvx'));
81
+ }
82
+ return candidates.some((candidate) => isExecutable(candidate));
83
+ }
69
84
  async function readTweetWithBird(args) {
70
85
  return await new Promise((resolve, reject) => {
71
86
  execFile('bird', ['read', args.url, '--json'], {
@@ -108,20 +123,34 @@ function withBirdTip(error, url, env) {
108
123
  const combined = `${message}\n${BIRD_TIP}`;
109
124
  return error instanceof Error ? new Error(combined, { cause: error }) : new Error(combined);
110
125
  }
126
+ function withUvxTip(error, env) {
127
+ if (hasUvxCli(env)) {
128
+ return error instanceof Error ? error : new Error(String(error));
129
+ }
130
+ const message = error instanceof Error ? error.message : String(error);
131
+ const combined = `${message}\n${UVX_TIP}`;
132
+ return error instanceof Error ? new Error(combined, { cause: error }) : new Error(combined);
133
+ }
111
134
  const MAX_TEXT_BYTES_DEFAULT = 10 * 1024 * 1024;
112
135
  function buildProgram() {
113
136
  return new Command()
114
137
  .name('summarize')
115
138
  .description('Summarize web pages and YouTube links (uses direct provider API keys).')
116
139
  .argument('[input]', 'URL or local file path to summarize')
117
- .option('--youtube <mode>', 'YouTube transcript source: auto (web then apify), web (youtubei/captionTracks), apify', 'auto')
118
- .option('--firecrawl <mode>', 'Firecrawl usage: off, auto (fallback), always (try Firecrawl first).', 'auto')
119
- .option('--markdown <mode>', 'Website Markdown output: off, auto (use LLM when configured), llm (force LLM). Only affects --extract-only for non-YouTube URLs.', 'auto')
140
+ .option('--youtube <mode>', 'YouTube transcript source: auto, web (youtubei/captionTracks), yt-dlp (audio+whisper), apify', 'auto')
141
+ .option('--firecrawl <mode>', 'Firecrawl usage: off, auto (fallback), always (try Firecrawl first). Note: in --format md website mode, defaults to always when FIRECRAWL_API_KEY is set (unless --firecrawl is set explicitly).', 'auto')
142
+ .option('--format <format>', 'Website/file content format: md|text. For websites: controls the extraction format. For files: controls whether we try to preprocess to Markdown for model compatibility. (default: text)', 'text')
143
+ .addOption(new Option('--preprocess <mode>', 'Preprocess inputs for model compatibility: off, auto (fallback), always.')
144
+ .choices(['off', 'auto', 'always'])
145
+ .default('auto'))
146
+ .addOption(new Option('--markdown-mode <mode>', 'HTML→Markdown conversion: off, auto (prefer Firecrawl when configured, then LLM when configured, then markitdown when available), llm (force LLM). Only affects --format md for non-YouTube URLs.').default('auto'))
147
+ .addOption(new Option('--markdown <mode>', 'Deprecated alias for --markdown-mode (use --extract --format md --markdown-mode ...)').hideHelp())
120
148
  .option('--length <length>', 'Summary length: short|medium|long|xl|xxl or a character limit like 20000, 20k', 'medium')
121
149
  .option('--max-output-tokens <count>', 'Hard cap for LLM output tokens (e.g. 2000, 2k). Overrides provider defaults.', undefined)
122
150
  .option('--timeout <duration>', 'Timeout for content fetching and LLM request: 30 (seconds), 30s, 2m, 5000ms', '2m')
123
151
  .option('--model <model>', 'LLM model id (gateway-style): xai/..., openai/..., google/... (default: google/gemini-3-flash-preview)', undefined)
124
- .option('--extract-only', 'Print extracted content and exit (no LLM summary)', false)
152
+ .option('--extract', 'Print extracted content and exit (no LLM summary)', false)
153
+ .addOption(new Option('--extract-only', 'Deprecated alias for --extract').hideHelp())
125
154
  .option('--json', 'Output structured JSON (includes prompt + metrics)', false)
126
155
  .option('--stream <mode>', 'Stream LLM output: auto (TTY only), on, off. Note: streaming is disabled in --json mode.', 'auto')
127
156
  .option('--render <mode>', 'Render Markdown output: auto (TTY only), md-live, md, plain. Note: auto selects md-live when streaming to a TTY.', 'auto')
@@ -250,6 +279,30 @@ function getTextContentFromAttachment(attachment) {
250
279
  }
251
280
  return { content: '', bytes: 0 };
252
281
  }
282
+ function getFileBytesFromAttachment(attachment) {
283
+ if (attachment.part.type !== 'file')
284
+ return null;
285
+ const data = attachment.part.data;
286
+ return data instanceof Uint8Array ? data : null;
287
+ }
288
+ function shouldMarkitdownConvertMediaType(mediaType) {
289
+ const mt = mediaType.toLowerCase();
290
+ if (mt === 'application/pdf')
291
+ return true;
292
+ if (mt === 'application/rtf')
293
+ return true;
294
+ if (mt === 'text/html' || mt === 'application/xhtml+xml')
295
+ return true;
296
+ if (mt === 'application/msword')
297
+ return true;
298
+ if (mt.startsWith('application/vnd.openxmlformats-officedocument.'))
299
+ return true;
300
+ if (mt === 'application/vnd.ms-excel')
301
+ return true;
302
+ if (mt === 'application/vnd.ms-powerpoint')
303
+ return true;
304
+ return false;
305
+ }
253
306
  function assertProviderSupportsAttachment({ provider, modelId, attachment, }) {
254
307
  // xAI via AI SDK currently supports image parts, but not generic file parts (e.g. PDFs).
255
308
  if (provider === 'xai' &&
@@ -314,29 +367,34 @@ function attachRichHelp(program, env, stdout) {
314
367
  program.addHelpText('after', () => `
315
368
  ${heading('Examples')}
316
369
  ${cmd('summarize "https://example.com"')}
317
- ${cmd('summarize "https://example.com" --extract-only')} ${dim('# website markdown (LLM if configured)')}
318
- ${cmd('summarize "https://example.com" --extract-only --markdown llm')} ${dim('# website markdown via LLM')}
319
- ${cmd('summarize "https://www.youtube.com/watch?v=I845O57ZSy4&t=11s" --extract-only --youtube web')}
370
+ ${cmd('summarize "https://example.com" --extract')} ${dim('# extracted plain text')}
371
+ ${cmd('summarize "https://example.com" --extract --format md')} ${dim('# extracted markdown (prefers Firecrawl when configured)')}
372
+ ${cmd('summarize "https://example.com" --extract --format md --markdown-mode llm')} ${dim('# extracted markdown via LLM')}
373
+ ${cmd('summarize "https://www.youtube.com/watch?v=I845O57ZSy4&t=11s" --extract --youtube web')}
320
374
  ${cmd('summarize "https://example.com" --length 20k --max-output-tokens 2k --timeout 2m --model openai/gpt-5.2')}
321
- ${cmd('OPENAI_BASE_URL=https://openrouter.ai/api/v1 OPENROUTER_API_KEY=... summarize "https://example.com" --model openai/xiaomi/mimo-v2-flash:free')}
375
+ ${cmd('OPENROUTER_API_KEY=... summarize "https://example.com" --model openai/openai/gpt-oss-20b')}
322
376
  ${cmd('summarize "https://example.com" --json --verbose')}
323
377
 
324
378
  ${heading('Env Vars')}
325
379
  XAI_API_KEY optional (required for xai/... models)
326
380
  OPENAI_API_KEY optional (required for openai/... models)
327
381
  OPENAI_BASE_URL optional (OpenAI-compatible API endpoint; e.g. OpenRouter)
328
- OPENROUTER_API_KEY optional (used when OPENAI_BASE_URL points to OpenRouter)
382
+ OPENROUTER_API_KEY optional (routes openai/... models through OpenRouter)
383
+ OPENROUTER_PROVIDERS optional (provider fallback order, e.g. "groq,google-vertex")
329
384
  GEMINI_API_KEY optional (required for google/... models)
330
385
  ANTHROPIC_API_KEY optional (required for anthropic/... models)
331
386
  SUMMARIZE_MODEL optional (overrides default model selection)
332
387
  FIRECRAWL_API_KEY optional website extraction fallback (Markdown)
333
388
  APIFY_API_TOKEN optional YouTube transcript fallback
389
+ YT_DLP_PATH optional path to yt-dlp binary for audio extraction
390
+ FAL_KEY optional FAL AI API key for audio transcription
334
391
  `);
335
392
  }
336
- async function summarizeWithModelId({ modelId, prompt, maxOutputTokens, timeoutMs, fetchImpl, apiKeys, }) {
393
+ async function summarizeWithModelId({ modelId, prompt, maxOutputTokens, timeoutMs, fetchImpl, apiKeys, openrouter, }) {
337
394
  const result = await generateTextWithModelId({
338
395
  modelId,
339
396
  apiKeys,
397
+ openrouter,
340
398
  prompt,
341
399
  temperature: 0,
342
400
  maxOutputTokens,
@@ -444,10 +502,11 @@ function writeFinishLine({ stderr, elapsedMs, model, report, costUsd, color, })
444
502
  stderr.write('\n');
445
503
  stderr.write(`${ansi('1;32', line, color)}\n`);
446
504
  }
447
- export async function runCli(argv, { env, fetch, stdout, stderr }) {
505
+ export async function runCli(argv, { env, fetch, execFile: execFileOverride, stdout, stderr }) {
448
506
  ;
449
507
  globalThis.AI_SDK_LOG_WARNINGS = false;
450
508
  const normalizedArgv = argv.filter((arg) => arg !== '--');
509
+ const execFileImpl = execFileOverride ?? execFile;
451
510
  const version = resolvePackageVersion();
452
511
  const program = buildProgram();
453
512
  program.configureOutput({
@@ -484,7 +543,7 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
484
543
  const lengthArg = parseLengthArg(program.opts().length);
485
544
  const maxOutputTokensArg = parseMaxOutputTokensArg(program.opts().maxOutputTokens);
486
545
  const timeoutMs = parseDurationMs(program.opts().timeout);
487
- const extractOnly = Boolean(program.opts().extractOnly);
546
+ const extractMode = Boolean(program.opts().extract) || Boolean(program.opts().extractOnly);
488
547
  const json = Boolean(program.opts().json);
489
548
  const streamMode = parseStreamMode(program.opts().stream);
490
549
  const renderMode = parseRenderMode(program.opts().render);
@@ -492,20 +551,40 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
492
551
  const metricsMode = parseMetricsMode(program.opts().metrics);
493
552
  const metricsEnabled = metricsMode !== 'off';
494
553
  const metricsDetailed = metricsMode === 'detailed';
495
- const markdownMode = parseMarkdownMode(program.opts().markdown);
554
+ const preprocessMode = parsePreprocessMode(program.opts().preprocess);
555
+ const format = parseExtractFormat(program.opts().format);
496
556
  const shouldComputeReport = metricsEnabled;
497
557
  const isYoutubeUrl = typeof url === 'string' ? /youtube\.com|youtu\.be/i.test(url) : false;
558
+ const firecrawlExplicitlySet = normalizedArgv.some((arg) => arg === '--firecrawl' || arg.startsWith('--firecrawl='));
559
+ const markdownModeExplicitlySet = normalizedArgv.some((arg) => arg === '--markdown-mode' ||
560
+ arg.startsWith('--markdown-mode=') ||
561
+ arg === '--markdown' ||
562
+ arg.startsWith('--markdown='));
563
+ const markdownMode = format === 'markdown'
564
+ ? parseMarkdownMode(program.opts().markdownMode ??
565
+ program.opts().markdown ??
566
+ 'auto')
567
+ : 'off';
498
568
  const requestedFirecrawlMode = parseFirecrawlMode(program.opts().firecrawl);
499
569
  const modelArg = typeof program.opts().model === 'string' ? program.opts().model : null;
500
570
  const { config, path: configPath } = loadSummarizeConfig({ env });
501
571
  const xaiKeyRaw = typeof env.XAI_API_KEY === 'string' ? env.XAI_API_KEY : null;
502
572
  const openaiBaseUrl = typeof env.OPENAI_BASE_URL === 'string' ? env.OPENAI_BASE_URL : null;
503
573
  const openRouterKeyRaw = typeof env.OPENROUTER_API_KEY === 'string' ? env.OPENROUTER_API_KEY : null;
574
+ const openRouterProvidersRaw = typeof env.OPENROUTER_PROVIDERS === 'string' ? env.OPENROUTER_PROVIDERS : null;
575
+ const openRouterProviders = openRouterProvidersRaw
576
+ ? openRouterProvidersRaw
577
+ .split(',')
578
+ .map((p) => p.trim())
579
+ .filter(Boolean)
580
+ : null;
504
581
  const openaiKeyRaw = typeof env.OPENAI_API_KEY === 'string' ? env.OPENAI_API_KEY : null;
505
582
  const apiKey = typeof openaiBaseUrl === 'string' && /openrouter\.ai/i.test(openaiBaseUrl)
506
583
  ? (openRouterKeyRaw ?? openaiKeyRaw)
507
584
  : openaiKeyRaw;
508
585
  const apifyToken = typeof env.APIFY_API_TOKEN === 'string' ? env.APIFY_API_TOKEN : null;
586
+ const ytDlpPath = typeof env.YT_DLP_PATH === 'string' ? env.YT_DLP_PATH : null;
587
+ const falApiKey = typeof env.FAL_KEY === 'string' ? env.FAL_KEY : null;
509
588
  const firecrawlKey = typeof env.FIRECRAWL_API_KEY === 'string' ? env.FIRECRAWL_API_KEY : null;
510
589
  const anthropicKeyRaw = typeof env.ANTHROPIC_API_KEY === 'string' ? env.ANTHROPIC_API_KEY : null;
511
590
  const googleKeyRaw = typeof env.GEMINI_API_KEY === 'string'
@@ -520,9 +599,19 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
520
599
  const xaiApiKey = xaiKeyRaw?.trim() ?? null;
521
600
  const googleApiKey = googleKeyRaw?.trim() ?? null;
522
601
  const anthropicApiKey = anthropicKeyRaw?.trim() ?? null;
602
+ const openrouterApiKey = openRouterKeyRaw?.trim() ?? null;
603
+ const openaiTranscriptionKey = openaiKeyRaw?.trim() ?? null;
523
604
  const googleConfigured = typeof googleApiKey === 'string' && googleApiKey.length > 0;
524
605
  const xaiConfigured = typeof xaiApiKey === 'string' && xaiApiKey.length > 0;
525
606
  const anthropicConfigured = typeof anthropicApiKey === 'string' && anthropicApiKey.length > 0;
607
+ const openrouterConfigured = typeof openrouterApiKey === 'string' && openrouterApiKey.length > 0;
608
+ const openrouterOptions = openRouterProviders ? { providers: openRouterProviders } : undefined;
609
+ if (markdownModeExplicitlySet && format !== 'markdown') {
610
+ throw new Error('--markdown-mode is only supported with --format md');
611
+ }
612
+ if (markdownModeExplicitlySet && inputTarget.kind !== 'url') {
613
+ throw new Error('--markdown-mode is only supported for website URLs');
614
+ }
526
615
  const llmCalls = [];
527
616
  let firecrawlRequests = 0;
528
617
  let apifyRequests = 0;
@@ -625,7 +714,7 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
625
714
  return streamMode;
626
715
  return isRichTty(stdout) ? 'on' : 'off';
627
716
  })();
628
- const streamingEnabled = effectiveStreamMode === 'on' && !json && !extractOnly;
717
+ const streamingEnabled = effectiveStreamMode === 'on' && !json && !extractMode;
629
718
  const effectiveRenderMode = (() => {
630
719
  if (renderMode !== 'auto')
631
720
  return renderMode;
@@ -644,8 +733,8 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
644
733
  stderr.write(`metrics apify requests=${report.services.apify.requests}\n`);
645
734
  stderr.write(`metrics total tok(i/o/t)=${promptTokens ?? 'unknown'}/${completionTokens ?? 'unknown'}/${totalTokens ?? 'unknown'}\n`);
646
735
  };
647
- if (extractOnly && inputTarget.kind !== 'url') {
648
- throw new Error('--extract-only is only supported for website/YouTube URLs');
736
+ if (extractMode && inputTarget.kind !== 'url') {
737
+ throw new Error('--extract is only supported for website/YouTube URLs');
649
738
  }
650
739
  const progressEnabled = isRichTty(stderr) && !verbose && !json;
651
740
  let clearProgressBeforeStdout = null;
@@ -661,6 +750,7 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
661
750
  openaiApiKey: apiKey,
662
751
  googleApiKey: googleConfigured ? googleApiKey : null,
663
752
  anthropicApiKey: anthropicConfigured ? anthropicApiKey : null,
753
+ openrouterApiKey: openrouterConfigured ? openrouterApiKey : null,
664
754
  };
665
755
  const requiredKeyEnv = parsedModel.provider === 'xai'
666
756
  ? 'XAI_API_KEY'
@@ -668,22 +758,17 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
668
758
  ? 'GEMINI_API_KEY (or GOOGLE_GENERATIVE_AI_API_KEY / GOOGLE_API_KEY)'
669
759
  : parsedModel.provider === 'anthropic'
670
760
  ? 'ANTHROPIC_API_KEY'
671
- : 'OPENAI_API_KEY';
761
+ : 'OPENAI_API_KEY (or OPENROUTER_API_KEY)';
672
762
  const hasRequiredKey = parsedModel.provider === 'xai'
673
763
  ? Boolean(xaiApiKey)
674
764
  : parsedModel.provider === 'google'
675
765
  ? googleConfigured
676
766
  : parsedModel.provider === 'anthropic'
677
767
  ? anthropicConfigured
678
- : Boolean(apiKey);
768
+ : Boolean(apiKey) || openrouterConfigured;
679
769
  if (!hasRequiredKey) {
680
770
  throw new Error(`Missing ${requiredKeyEnv} for model ${parsedModel.canonical}. Set the env var or choose a different --model.`);
681
771
  }
682
- assertProviderSupportsAttachment({
683
- provider: parsedModel.provider,
684
- modelId: parsedModel.canonical,
685
- attachment: { part: attachment.part, mediaType: attachment.mediaType },
686
- });
687
772
  const modelResolution = await resolveModelIdForLlmCall({
688
773
  parsedModel,
689
774
  apiKeys: { googleApiKey: apiKeysForLlm.googleApiKey },
@@ -701,14 +786,114 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
701
786
  if (textContent && textContent.bytes > MAX_TEXT_BYTES_DEFAULT) {
702
787
  throw new Error(`Text file too large (${formatBytes(textContent.bytes)}). Limit is ${formatBytes(MAX_TEXT_BYTES_DEFAULT)}.`);
703
788
  }
789
+ const fileBytes = getFileBytesFromAttachment(attachment);
790
+ const canPreprocessWithMarkitdown = format === 'markdown' &&
791
+ preprocessMode !== 'off' &&
792
+ hasUvxCli(env) &&
793
+ attachment.part.type === 'file' &&
794
+ fileBytes !== null &&
795
+ shouldMarkitdownConvertMediaType(attachment.mediaType);
704
796
  const summaryLengthTarget = lengthArg.kind === 'preset' ? lengthArg.preset : { maxCharacters: lengthArg.maxCharacters };
705
- const promptText = buildFileSummaryPrompt({
706
- filename: attachment.filename,
707
- mediaType: attachment.mediaType,
708
- summaryLength: summaryLengthTarget,
709
- contentLength: textContent?.content.length ?? null,
710
- });
711
- const promptPayload = buildAssetPromptPayload({ promptText, attachment, textContent });
797
+ let promptText = '';
798
+ const buildAttachmentPromptPayload = () => {
799
+ promptText = buildFileSummaryPrompt({
800
+ filename: attachment.filename,
801
+ mediaType: attachment.mediaType,
802
+ summaryLength: summaryLengthTarget,
803
+ contentLength: textContent?.content.length ?? null,
804
+ });
805
+ return buildAssetPromptPayload({ promptText, attachment, textContent });
806
+ };
807
+ const buildMarkitdownPromptPayload = (markdown) => {
808
+ promptText = buildFileTextSummaryPrompt({
809
+ filename: attachment.filename,
810
+ originalMediaType: attachment.mediaType,
811
+ contentMediaType: 'text/markdown',
812
+ summaryLength: summaryLengthTarget,
813
+ contentLength: markdown.length,
814
+ });
815
+ return `${promptText}\n\n---\n\n${markdown}`.trim();
816
+ };
817
+ let preprocessedMarkdown = null;
818
+ let usingPreprocessedMarkdown = false;
819
+ if (preprocessMode === 'always' && canPreprocessWithMarkitdown) {
820
+ if (!fileBytes) {
821
+ throw new Error('Internal error: missing file bytes for markitdown preprocessing');
822
+ }
823
+ try {
824
+ preprocessedMarkdown = await convertToMarkdownWithMarkitdown({
825
+ bytes: fileBytes,
826
+ filenameHint: attachment.filename,
827
+ mediaTypeHint: attachment.mediaType,
828
+ uvxCommand: env.UVX_PATH,
829
+ timeoutMs,
830
+ env,
831
+ execFileImpl,
832
+ });
833
+ }
834
+ catch (error) {
835
+ const message = error instanceof Error ? error.message : String(error);
836
+ throw new Error(`Failed to preprocess ${attachment.mediaType} with markitdown: ${message} (disable with --preprocess off).`);
837
+ }
838
+ if (Buffer.byteLength(preprocessedMarkdown, 'utf8') > MAX_TEXT_BYTES_DEFAULT) {
839
+ throw new Error(`Preprocessed Markdown too large (${formatBytes(Buffer.byteLength(preprocessedMarkdown, 'utf8'))}). Limit is ${formatBytes(MAX_TEXT_BYTES_DEFAULT)}.`);
840
+ }
841
+ usingPreprocessedMarkdown = true;
842
+ }
843
+ let promptPayload = buildAttachmentPromptPayload();
844
+ if (usingPreprocessedMarkdown) {
845
+ if (!preprocessedMarkdown) {
846
+ throw new Error('Internal error: missing markitdown content for preprocessing');
847
+ }
848
+ promptPayload = buildMarkitdownPromptPayload(preprocessedMarkdown);
849
+ }
850
+ if (!usingPreprocessedMarkdown) {
851
+ try {
852
+ assertProviderSupportsAttachment({
853
+ provider: parsedModel.provider,
854
+ modelId: parsedModel.canonical,
855
+ attachment: { part: attachment.part, mediaType: attachment.mediaType },
856
+ });
857
+ }
858
+ catch (error) {
859
+ if (!canPreprocessWithMarkitdown) {
860
+ if (format === 'markdown' &&
861
+ preprocessMode !== 'off' &&
862
+ attachment.part.type === 'file' &&
863
+ shouldMarkitdownConvertMediaType(attachment.mediaType) &&
864
+ !hasUvxCli(env)) {
865
+ throw withUvxTip(error, env);
866
+ }
867
+ throw error;
868
+ }
869
+ if (!fileBytes) {
870
+ throw new Error('Internal error: missing file bytes for markitdown preprocessing');
871
+ }
872
+ try {
873
+ preprocessedMarkdown = await convertToMarkdownWithMarkitdown({
874
+ bytes: fileBytes,
875
+ filenameHint: attachment.filename,
876
+ mediaTypeHint: attachment.mediaType,
877
+ uvxCommand: env.UVX_PATH,
878
+ timeoutMs,
879
+ env,
880
+ execFileImpl,
881
+ });
882
+ }
883
+ catch (markitdownError) {
884
+ if (preprocessMode === 'auto') {
885
+ throw error;
886
+ }
887
+ const message = markitdownError instanceof Error ? markitdownError.message : String(markitdownError);
888
+ throw new Error(`Failed to preprocess ${attachment.mediaType} with markitdown: ${message} (disable with --preprocess off).`);
889
+ }
890
+ if (Buffer.byteLength(preprocessedMarkdown, 'utf8') > MAX_TEXT_BYTES_DEFAULT) {
891
+ throw new Error(`Preprocessed Markdown too large (${formatBytes(Buffer.byteLength(preprocessedMarkdown, 'utf8'))}). Limit is ${formatBytes(MAX_TEXT_BYTES_DEFAULT)}.`);
892
+ }
893
+ usingPreprocessedMarkdown = true;
894
+ promptPayload = buildMarkitdownPromptPayload(preprocessedMarkdown);
895
+ }
896
+ }
712
897
  const maxInputTokensForCall = await resolveMaxInputTokensForCall(parsedModelEffective.canonical);
713
898
  if (typeof maxInputTokensForCall === 'number' &&
714
899
  Number.isFinite(maxInputTokensForCall) &&
@@ -731,6 +916,7 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
731
916
  streamResult = await streamTextWithModelId({
732
917
  modelId: parsedModelEffective.canonical,
733
918
  apiKeys: apiKeysForLlm,
919
+ openrouter: openrouterOptions,
734
920
  prompt: promptPayload,
735
921
  temperature: 0,
736
922
  maxOutputTokens: maxOutputTokensForCall ?? undefined,
@@ -748,6 +934,7 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
748
934
  timeoutMs,
749
935
  fetchImpl: trackedFetch,
750
936
  apiKeys: apiKeysForLlm,
937
+ openrouter: openrouterOptions,
751
938
  });
752
939
  llmCalls.push({
753
940
  provider: result.provider,
@@ -768,6 +955,7 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
768
955
  timeoutMs,
769
956
  fetchImpl: trackedFetch,
770
957
  apiKeys: apiKeysForLlm,
958
+ openrouter: openrouterOptions,
771
959
  });
772
960
  llmCalls.push({
773
961
  provider: result.provider,
@@ -871,6 +1059,7 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
871
1059
  timeoutMs,
872
1060
  fetchImpl: trackedFetch,
873
1061
  apiKeys: apiKeysForLlm,
1062
+ openrouter: openrouterOptions,
874
1063
  });
875
1064
  }
876
1065
  catch (error) {
@@ -1106,12 +1295,21 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
1106
1295
  if (!url) {
1107
1296
  throw new Error('Only HTTP and HTTPS URLs can be summarized');
1108
1297
  }
1109
- const firecrawlMode = requestedFirecrawlMode;
1298
+ const wantsMarkdown = format === 'markdown' && !isYoutubeUrl;
1299
+ if (wantsMarkdown && markdownMode === 'off') {
1300
+ throw new Error('--format md conflicts with --markdown-mode off (use --format text)');
1301
+ }
1302
+ const firecrawlMode = (() => {
1303
+ if (wantsMarkdown && !isYoutubeUrl && !firecrawlExplicitlySet && firecrawlConfigured) {
1304
+ return 'always';
1305
+ }
1306
+ return requestedFirecrawlMode;
1307
+ })();
1110
1308
  if (firecrawlMode === 'always' && !firecrawlConfigured) {
1111
1309
  throw new Error('--firecrawl always requires FIRECRAWL_API_KEY');
1112
1310
  }
1113
- const effectiveMarkdownMode = markdownMode;
1114
- const markdownRequested = extractOnly && !isYoutubeUrl && effectiveMarkdownMode !== 'off';
1311
+ const markdownRequested = wantsMarkdown;
1312
+ const effectiveMarkdownMode = markdownRequested ? markdownMode : 'off';
1115
1313
  const hasKeyForModel = parsedModelForLlm.provider === 'xai'
1116
1314
  ? xaiConfigured
1117
1315
  : parsedModelForLlm.provider === 'google'
@@ -1128,28 +1326,70 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
1128
1326
  : parsedModelForLlm.provider === 'anthropic'
1129
1327
  ? 'ANTHROPIC_API_KEY'
1130
1328
  : 'OPENAI_API_KEY';
1131
- throw new Error(`--markdown llm requires ${required} for model ${parsedModelForLlm.canonical}`);
1329
+ throw new Error(`--markdown-mode llm requires ${required} for model ${parsedModelForLlm.canonical}`);
1132
1330
  }
1133
- writeVerbose(stderr, verbose, `config url=${url} timeoutMs=${timeoutMs} youtube=${youtubeMode} firecrawl=${firecrawlMode} length=${lengthArg.kind === 'preset' ? lengthArg.preset : `${lengthArg.maxCharacters} chars`} maxOutputTokens=${formatOptionalNumber(maxOutputTokensArg)} json=${json} extractOnly=${extractOnly} markdown=${effectiveMarkdownMode} model=${model} stream=${effectiveStreamMode} render=${effectiveRenderMode}`, verboseColor);
1331
+ writeVerbose(stderr, verbose, `config url=${url} timeoutMs=${timeoutMs} youtube=${youtubeMode} firecrawl=${firecrawlMode} length=${lengthArg.kind === 'preset' ? lengthArg.preset : `${lengthArg.maxCharacters} chars`} maxOutputTokens=${formatOptionalNumber(maxOutputTokensArg)} json=${json} extract=${extractMode} format=${format} preprocess=${preprocessMode} markdownMode=${markdownMode} model=${model} stream=${effectiveStreamMode} render=${effectiveRenderMode}`, verboseColor);
1134
1332
  writeVerbose(stderr, verbose, `configFile path=${formatOptionalString(configPath)} model=${formatOptionalString(config?.model ?? null)}`, verboseColor);
1135
- writeVerbose(stderr, verbose, `env xaiKey=${xaiConfigured} openaiKey=${Boolean(apiKey)} googleKey=${googleConfigured} anthropicKey=${anthropicConfigured} apifyToken=${Boolean(apifyToken)} firecrawlKey=${firecrawlConfigured}`, verboseColor);
1333
+ writeVerbose(stderr, verbose, `env xaiKey=${xaiConfigured} openaiKey=${Boolean(apiKey)} googleKey=${googleConfigured} anthropicKey=${anthropicConfigured} openrouterKey=${openrouterConfigured} apifyToken=${Boolean(apifyToken)} firecrawlKey=${firecrawlConfigured}`, verboseColor);
1136
1334
  writeVerbose(stderr, verbose, `markdown requested=${markdownRequested} provider=${markdownProvider}`, verboseColor);
1137
1335
  const scrapeWithFirecrawl = firecrawlConfigured && firecrawlMode !== 'off'
1138
1336
  ? createFirecrawlScraper({ apiKey: firecrawlApiKey, fetchImpl: trackedFetch })
1139
1337
  : null;
1140
- const convertHtmlToMarkdown = markdownRequested && (effectiveMarkdownMode === 'llm' || markdownProvider !== 'none')
1338
+ const llmHtmlToMarkdown = markdownRequested && (effectiveMarkdownMode === 'llm' || markdownProvider !== 'none')
1141
1339
  ? createHtmlToMarkdownConverter({
1142
1340
  modelId: model,
1143
1341
  xaiApiKey: xaiConfigured ? xaiApiKey : null,
1144
1342
  googleApiKey: googleConfigured ? googleApiKey : null,
1145
1343
  openaiApiKey: apiKey,
1146
1344
  anthropicApiKey: anthropicConfigured ? anthropicApiKey : null,
1345
+ openrouterApiKey: openrouterConfigured ? openrouterApiKey : null,
1346
+ openrouter: openrouterOptions,
1147
1347
  fetchImpl: trackedFetch,
1148
1348
  onUsage: ({ model: usedModel, provider, usage }) => {
1149
1349
  llmCalls.push({ provider, model: usedModel, usage, purpose: 'markdown' });
1150
1350
  },
1151
1351
  })
1152
1352
  : null;
1353
+ const markitdownHtmlToMarkdown = markdownRequested && preprocessMode !== 'off' && hasUvxCli(env)
1354
+ ? async (args) => {
1355
+ void args.url;
1356
+ void args.title;
1357
+ void args.siteName;
1358
+ return convertToMarkdownWithMarkitdown({
1359
+ bytes: new TextEncoder().encode(args.html),
1360
+ filenameHint: 'page.html',
1361
+ mediaTypeHint: 'text/html',
1362
+ uvxCommand: env.UVX_PATH,
1363
+ timeoutMs: args.timeoutMs,
1364
+ env,
1365
+ execFileImpl,
1366
+ });
1367
+ }
1368
+ : null;
1369
+ const convertHtmlToMarkdown = markdownRequested
1370
+ ? async (args) => {
1371
+ if (effectiveMarkdownMode === 'llm') {
1372
+ if (!llmHtmlToMarkdown) {
1373
+ throw new Error('No HTML→Markdown converter configured');
1374
+ }
1375
+ return llmHtmlToMarkdown(args);
1376
+ }
1377
+ if (llmHtmlToMarkdown) {
1378
+ try {
1379
+ return await llmHtmlToMarkdown(args);
1380
+ }
1381
+ catch (error) {
1382
+ if (!markitdownHtmlToMarkdown)
1383
+ throw error;
1384
+ return await markitdownHtmlToMarkdown(args);
1385
+ }
1386
+ }
1387
+ if (markitdownHtmlToMarkdown) {
1388
+ return await markitdownHtmlToMarkdown(args);
1389
+ }
1390
+ throw new Error('No HTML→Markdown converter configured');
1391
+ }
1392
+ : null;
1153
1393
  const readTweetWithBirdClient = hasBirdCli(env)
1154
1394
  ? ({ url, timeoutMs }) => readTweetWithBird({ url, timeoutMs, env })
1155
1395
  : null;
@@ -1296,6 +1536,9 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
1296
1536
  })();
1297
1537
  const client = createLinkPreviewClient({
1298
1538
  apifyApiToken: apifyToken,
1539
+ ytDlpPath,
1540
+ falApiKey,
1541
+ openaiApiKey: openaiTranscriptionKey,
1299
1542
  scrapeWithFirecrawl,
1300
1543
  convertHtmlToMarkdown,
1301
1544
  readTweetWithBird: readTweetWithBirdClient,
@@ -1340,7 +1583,7 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
1340
1583
  const viaSourceLabel = viaSources.length > 0 ? `, ${viaSources.join('+')}` : '';
1341
1584
  if (progressEnabled) {
1342
1585
  websiteProgress?.stop?.();
1343
- spinner.setText(extractOnly
1586
+ spinner.setText(extractMode
1344
1587
  ? `Extracted (${extractedContentSize}${viaSourceLabel})`
1345
1588
  : `Summarizing (sent ${extractedContentSize}${viaSourceLabel})…`);
1346
1589
  }
@@ -1351,6 +1594,14 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
1351
1594
  writeVerbose(stderr, verbose, `extract transcript textProvided=${extracted.diagnostics.transcript.textProvided} provider=${formatOptionalString(extracted.diagnostics.transcript.provider ?? null)} attemptedProviders=${extracted.diagnostics.transcript.attemptedProviders.length > 0
1352
1595
  ? extracted.diagnostics.transcript.attemptedProviders.join(',')
1353
1596
  : 'none'} notes=${formatOptionalString(extracted.diagnostics.transcript.notes ?? null)}`, verboseColor);
1597
+ if (extractMode &&
1598
+ markdownRequested &&
1599
+ preprocessMode !== 'off' &&
1600
+ effectiveMarkdownMode === 'auto' &&
1601
+ !extracted.diagnostics.markdown.used &&
1602
+ !hasUvxCli(env)) {
1603
+ stderr.write(`${UVX_TIP}\n`);
1604
+ }
1354
1605
  const isYouTube = extracted.siteName === 'YouTube';
1355
1606
  const prompt = buildLinkSummaryPrompt({
1356
1607
  url: extracted.url,
@@ -1364,7 +1615,7 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
1364
1615
  summaryLength: lengthArg.kind === 'preset' ? lengthArg.preset : { maxCharacters: lengthArg.maxCharacters },
1365
1616
  shares: [],
1366
1617
  });
1367
- if (extractOnly) {
1618
+ if (extractMode) {
1368
1619
  clearProgressForStdout();
1369
1620
  if (json) {
1370
1621
  const finishReport = shouldComputeReport ? await buildReport() : null;
@@ -1375,6 +1626,7 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
1375
1626
  timeoutMs,
1376
1627
  youtube: youtubeMode,
1377
1628
  firecrawl: firecrawlMode,
1629
+ format,
1378
1630
  markdown: effectiveMarkdownMode,
1379
1631
  length: lengthArg.kind === 'preset'
1380
1632
  ? { kind: 'preset', preset: lengthArg.preset }
@@ -1445,6 +1697,7 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
1445
1697
  timeoutMs,
1446
1698
  youtube: youtubeMode,
1447
1699
  firecrawl: firecrawlMode,
1700
+ format,
1448
1701
  markdown: effectiveMarkdownMode,
1449
1702
  length: lengthArg.kind === 'preset'
1450
1703
  ? { kind: 'preset', preset: lengthArg.preset }
@@ -1506,6 +1759,7 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
1506
1759
  openaiApiKey: apiKey,
1507
1760
  googleApiKey: googleConfigured ? googleApiKey : null,
1508
1761
  anthropicApiKey: anthropicConfigured ? anthropicApiKey : null,
1762
+ openrouterApiKey: openrouterConfigured ? openrouterApiKey : null,
1509
1763
  };
1510
1764
  const requiredKeyEnv = parsedModel.provider === 'xai'
1511
1765
  ? 'XAI_API_KEY'
@@ -1513,14 +1767,14 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
1513
1767
  ? 'GEMINI_API_KEY (or GOOGLE_GENERATIVE_AI_API_KEY / GOOGLE_API_KEY)'
1514
1768
  : parsedModel.provider === 'anthropic'
1515
1769
  ? 'ANTHROPIC_API_KEY'
1516
- : 'OPENAI_API_KEY';
1770
+ : 'OPENAI_API_KEY (or OPENROUTER_API_KEY)';
1517
1771
  const hasRequiredKey = parsedModel.provider === 'xai'
1518
1772
  ? Boolean(xaiApiKey)
1519
1773
  : parsedModel.provider === 'google'
1520
1774
  ? googleConfigured
1521
1775
  : parsedModel.provider === 'anthropic'
1522
1776
  ? anthropicConfigured
1523
- : Boolean(apiKey);
1777
+ : Boolean(apiKey) || openrouterConfigured;
1524
1778
  if (!hasRequiredKey) {
1525
1779
  throw new Error(`Missing ${requiredKeyEnv} for model ${parsedModel.canonical}. Set the env var or choose a different --model.`);
1526
1780
  }
@@ -1577,6 +1831,7 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
1577
1831
  timeoutMs,
1578
1832
  fetchImpl: trackedFetch,
1579
1833
  apiKeys: apiKeysForLlm,
1834
+ openrouter: openrouterOptions,
1580
1835
  });
1581
1836
  llmCalls.push({
1582
1837
  provider: result.provider,
@@ -1597,6 +1852,7 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
1597
1852
  timeoutMs,
1598
1853
  fetchImpl: trackedFetch,
1599
1854
  apiKeys: apiKeysForLlm,
1855
+ openrouter: openrouterOptions,
1600
1856
  });
1601
1857
  llmCalls.push({
1602
1858
  provider: result.provider,
@@ -1687,6 +1943,7 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
1687
1943
  timeoutMs,
1688
1944
  fetchImpl: trackedFetch,
1689
1945
  apiKeys: apiKeysForLlm,
1946
+ openrouter: openrouterOptions,
1690
1947
  });
1691
1948
  llmCalls.push({
1692
1949
  provider: result.provider,
@@ -1713,6 +1970,7 @@ export async function runCli(argv, { env, fetch, stdout, stderr }) {
1713
1970
  timeoutMs,
1714
1971
  youtube: youtubeMode,
1715
1972
  firecrawl: firecrawlMode,
1973
+ format,
1716
1974
  markdown: effectiveMarkdownMode,
1717
1975
  length: lengthArg.kind === 'preset'
1718
1976
  ? { kind: 'preset', preset: lengthArg.preset }