npm - @steipete/summarize - Versions diffs - 0.3.0 → 0.5.0 - Mend

@steipete/summarize 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

package/CHANGELOG.md +80 -5
package/README.md +122 -20
package/dist/cli.cjs +8446 -4360
package/dist/cli.cjs.map +4 -4
package/dist/esm/cli-main.js +47 -2
package/dist/esm/cli-main.js.map +1 -1
package/dist/esm/config.js +368 -3
package/dist/esm/config.js.map +1 -1
package/dist/esm/content/link-preview/content/index.js +13 -0
package/dist/esm/content/link-preview/content/index.js.map +1 -1
package/dist/esm/content/link-preview/content/utils.js +3 -1
package/dist/esm/content/link-preview/content/utils.js.map +1 -1
package/dist/esm/content/link-preview/content/video.js +96 -0
package/dist/esm/content/link-preview/content/video.js.map +1 -0
package/dist/esm/content/link-preview/transcript/providers/youtube/captions.js +21 -21
package/dist/esm/content/link-preview/transcript/providers/youtube/captions.js.map +1 -1
package/dist/esm/costs.js.map +1 -1
package/dist/esm/flags.js +41 -1
package/dist/esm/flags.js.map +1 -1
package/dist/esm/generate-free.js +616 -0
package/dist/esm/generate-free.js.map +1 -0
package/dist/esm/llm/cli.js +290 -0
package/dist/esm/llm/cli.js.map +1 -0
package/dist/esm/llm/generate-text.js +159 -105
package/dist/esm/llm/generate-text.js.map +1 -1
package/dist/esm/llm/html-to-markdown.js +4 -2
package/dist/esm/llm/html-to-markdown.js.map +1 -1
package/dist/esm/markitdown.js +54 -0
package/dist/esm/markitdown.js.map +1 -0
package/dist/esm/model-auto.js +353 -0
package/dist/esm/model-auto.js.map +1 -0
package/dist/esm/model-spec.js +82 -0
package/dist/esm/model-spec.js.map +1 -0
package/dist/esm/prompts/cli.js +18 -0
package/dist/esm/prompts/cli.js.map +1 -0
package/dist/esm/prompts/file.js +21 -2
package/dist/esm/prompts/file.js.map +1 -1
package/dist/esm/prompts/index.js +2 -1
package/dist/esm/prompts/index.js.map +1 -1
package/dist/esm/prompts/link-summary.js +3 -8
package/dist/esm/prompts/link-summary.js.map +1 -1
package/dist/esm/refresh-free.js +667 -0
package/dist/esm/refresh-free.js.map +1 -0
package/dist/esm/run.js +1612 -533
package/dist/esm/run.js.map +1 -1
package/dist/esm/version.js +1 -1
package/dist/types/config.d.ts +58 -5
package/dist/types/content/link-preview/content/types.d.ts +10 -0
package/dist/types/content/link-preview/content/utils.d.ts +1 -1
package/dist/types/content/link-preview/content/video.d.ts +5 -0
package/dist/types/costs.d.ts +2 -1
package/dist/types/flags.d.ts +7 -0
package/dist/types/generate-free.d.ts +17 -0
package/dist/types/llm/cli.d.ts +24 -0
package/dist/types/llm/generate-text.d.ts +13 -4
package/dist/types/llm/html-to-markdown.d.ts +9 -3
package/dist/types/markitdown.d.ts +10 -0
package/dist/types/model-auto.d.ts +23 -0
package/dist/types/model-spec.d.ts +33 -0
package/dist/types/prompts/cli.d.ts +8 -0
package/dist/types/prompts/file.d.ts +7 -0
package/dist/types/prompts/index.d.ts +2 -1
package/dist/types/refresh-free.d.ts +19 -0
package/dist/types/run.d.ts +3 -1
package/dist/types/version.d.ts +1 -1
package/docs/README.md +4 -1
package/docs/cli.md +95 -0
package/docs/config.md +123 -1
package/docs/extract-only.md +10 -7
package/docs/firecrawl.md +2 -2
package/docs/llm.md +24 -4
package/docs/manual-tests.md +40 -0
package/docs/model-auto.md +92 -0
package/docs/site/assets/site.js +20 -17
package/docs/site/docs/config.html +3 -3
package/docs/site/docs/extract-only.html +7 -5
package/docs/site/docs/firecrawl.html +6 -6
package/docs/site/docs/index.html +2 -2
package/docs/site/docs/llm.html +2 -2
package/docs/site/docs/openai.html +2 -2
package/docs/site/docs/website.html +7 -4
package/docs/site/docs/youtube.html +2 -2
package/docs/site/index.html +1 -1
package/docs/smoketest.md +58 -0
package/docs/website.md +13 -8
package/docs/youtube.md +1 -1
package/package.json +8 -4
package/dist/esm/content/link-preview/transcript/providers/twitter.js +0 -12
package/dist/esm/content/link-preview/transcript/providers/twitter.js.map +0 -1
package/dist/esm/content/link-preview/transcript/providers/youtube/ytdlp.js +0 -114
package/dist/esm/content/link-preview/transcript/providers/youtube/ytdlp.js.map +0 -1
package/dist/esm/summarizeHome.js +0 -20
package/dist/esm/summarizeHome.js.map +0 -1
package/dist/esm/tty/live-markdown.js +0 -52
package/dist/esm/tty/live-markdown.js.map +0 -1
package/dist/types/content/link-preview/transcript/providers/twitter.d.ts +0 -3
package/dist/types/content/link-preview/transcript/providers/youtube/ytdlp.d.ts +0 -3
package/dist/types/summarizeHome.d.ts +0 -6
package/dist/types/tty/live-markdown.d.ts +0 -10

package/dist/types/llm/generate-text.d.ts CHANGED Viewed

@@ -14,7 +14,13 @@ export type LlmTokenUsage = {
     completionTokens: number | null;
     totalTokens: number | null;
 };
-export declare function generateTextWithModelId({ modelId, apiKeys, system, prompt, temperature, maxOutputTokens, timeoutMs, fetchImpl, openrouter, }: {
+type RetryNotice = {
+    attempt: number;
+    maxRetries: number;
+    delayMs: number;
+    error: unknown;
+};
+export declare function generateTextWithModelId({ modelId, apiKeys, system, prompt, temperature, maxOutputTokens, timeoutMs, fetchImpl, forceOpenRouter, retries, onRetry, }: {
     modelId: string;
     apiKeys: LlmApiKeys;
     system?: string;
@@ -23,14 +29,16 @@ export declare function generateTextWithModelId({ modelId, apiKeys, system, prom
     maxOutputTokens?: number;
     timeoutMs: number;
     fetchImpl: typeof fetch;
-    openrouter?: OpenRouterOptions;
+    forceOpenRouter?: boolean;
+    retries?: number;
+    onRetry?: (notice: RetryNotice) => void;
 }): Promise<{
     text: string;
     canonicalModelId: string;
     provider: 'xai' | 'openai' | 'google' | 'anthropic';
     usage: LlmTokenUsage | null;
 }>;
-export declare function streamTextWithModelId({ modelId, apiKeys, system, prompt, temperature, maxOutputTokens, timeoutMs, fetchImpl, openrouter, }: {
+export declare function streamTextWithModelId({ modelId, apiKeys, system, prompt, temperature, maxOutputTokens, timeoutMs, fetchImpl, forceOpenRouter, }: {
     modelId: string;
     apiKeys: LlmApiKeys;
     system?: string;
@@ -39,7 +47,7 @@ export declare function streamTextWithModelId({ modelId, apiKeys, system, prompt
     maxOutputTokens?: number;
     timeoutMs: number;
     fetchImpl: typeof fetch;
-    openrouter?: OpenRouterOptions;
+    forceOpenRouter?: boolean;
 }): Promise<{
     textStream: AsyncIterable<string>;
     canonicalModelId: string;
@@ -47,3 +55,4 @@ export declare function streamTextWithModelId({ modelId, apiKeys, system, prompt
     usage: Promise<LlmTokenUsage | null>;
     lastError: () => unknown;
 }>;
+export {};

package/dist/types/llm/html-to-markdown.d.ts CHANGED Viewed

@@ -1,15 +1,21 @@
 import type { ConvertHtmlToMarkdown } from '../content/link-preview/deps.js';
 import type { LlmTokenUsage } from './generate-text.js';
-import { type OpenRouterOptions } from './generate-text.js';
-export declare function createHtmlToMarkdownConverter({ modelId, xaiApiKey, googleApiKey, openaiApiKey, anthropicApiKey, openrouterApiKey, openrouter, fetchImpl, onUsage, }: {
+export declare function createHtmlToMarkdownConverter({ modelId, forceOpenRouter, xaiApiKey, googleApiKey, openaiApiKey, anthropicApiKey, openrouterApiKey, fetchImpl, retries, onRetry, onUsage, }: {
     modelId: string;
+    forceOpenRouter?: boolean;
     xaiApiKey: string | null;
     googleApiKey: string | null;
     openaiApiKey: string | null;
     fetchImpl: typeof fetch;
     anthropicApiKey: string | null;
     openrouterApiKey: string | null;
-    openrouter?: OpenRouterOptions;
+    retries?: number;
+    onRetry?: (notice: {
+        attempt: number;
+        maxRetries: number;
+        delayMs: number;
+        error: unknown;
+    }) => void;
     onUsage?: (usage: {
         model: string;
         provider: 'xai' | 'openai' | 'google' | 'anthropic';

package/dist/types/markitdown.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+export type ExecFileFn = typeof import('node:child_process').execFile;
+export declare function convertToMarkdownWithMarkitdown({ bytes, filenameHint, mediaTypeHint, uvxCommand, timeoutMs, env, execFileImpl, }: {
+    bytes: Uint8Array;
+    filenameHint: string | null;
+    mediaTypeHint: string | null;
+    uvxCommand?: string | null;
+    timeoutMs: number;
+    env: Record<string, string | undefined>;
+    execFileImpl: ExecFileFn;
+}): Promise<string>;

package/dist/types/model-auto.d.ts ADDED Viewed

@@ -0,0 +1,23 @@
+import type { AutoRuleKind, CliProvider, SummarizeConfig } from './config.js';
+import type { LiteLlmCatalog } from './pricing/litellm.js';
+export type AutoSelectionInput = {
+    kind: AutoRuleKind;
+    promptTokens: number | null;
+    desiredOutputTokens: number | null;
+    requiresVideoUnderstanding: boolean;
+    env: Record<string, string | undefined>;
+    config: SummarizeConfig | null;
+    catalog: LiteLlmCatalog | null;
+    openrouterProvidersFromEnv: string[] | null;
+    cliAvailability?: Partial<Record<CliProvider, boolean>>;
+};
+export type AutoModelAttempt = {
+    transport: 'native' | 'openrouter' | 'cli';
+    userModelId: string;
+    llmModelId: string | null;
+    openrouterProviders: string[] | null;
+    forceOpenRouter: boolean;
+    requiredEnv: 'XAI_API_KEY' | 'OPENAI_API_KEY' | 'GEMINI_API_KEY' | 'ANTHROPIC_API_KEY' | 'OPENROUTER_API_KEY' | 'CLI_CLAUDE' | 'CLI_CODEX' | 'CLI_GEMINI';
+    debug: string;
+};
+export declare function buildAutoModelAttempts(input: AutoSelectionInput): AutoModelAttempt[];

package/dist/types/model-spec.d.ts ADDED Viewed

@@ -0,0 +1,33 @@
+import type { CliProvider } from './config.js';
+export type FixedModelSpec = {
+    transport: 'native';
+    userModelId: string;
+    llmModelId: string;
+    provider: 'xai' | 'openai' | 'google' | 'anthropic';
+    openrouterProviders: string[] | null;
+    forceOpenRouter: false;
+    requiredEnv: 'XAI_API_KEY' | 'OPENAI_API_KEY' | 'GEMINI_API_KEY' | 'ANTHROPIC_API_KEY';
+} | {
+    transport: 'openrouter';
+    userModelId: string;
+    openrouterModelId: string;
+    llmModelId: string;
+    openrouterProviders: string[] | null;
+    forceOpenRouter: true;
+    requiredEnv: 'OPENROUTER_API_KEY';
+} | {
+    transport: 'cli';
+    userModelId: string;
+    llmModelId: null;
+    openrouterProviders: null;
+    forceOpenRouter: false;
+    requiredEnv: 'CLI_CLAUDE' | 'CLI_CODEX' | 'CLI_GEMINI';
+    cliProvider: CliProvider;
+    cliModel: string | null;
+};
+export type RequestedModel = {
+    kind: 'auto';
+} | ({
+    kind: 'fixed';
+} & FixedModelSpec);
+export declare function parseRequestedModelId(raw: string): RequestedModel;

package/dist/types/prompts/cli.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+import type { SummaryLengthTarget } from './link-summary.js';
+export declare function buildPathSummaryPrompt({ kindLabel, filePath, filename, mediaType, summaryLength, }: {
+    kindLabel: 'file' | 'image';
+    filePath: string;
+    filename: string | null;
+    mediaType: string | null;
+    summaryLength: SummaryLengthTarget;
+}): string;

package/dist/types/prompts/file.d.ts CHANGED Viewed

@@ -5,3 +5,10 @@ export declare function buildFileSummaryPrompt({ filename, mediaType, summaryLen
     summaryLength: SummaryLengthTarget;
     contentLength?: number | null;
 }): string;
+export declare function buildFileTextSummaryPrompt({ filename, originalMediaType, contentMediaType, summaryLength, contentLength, }: {
+    filename: string | null;
+    originalMediaType: string | null;
+    contentMediaType: string;
+    summaryLength: SummaryLengthTarget;
+    contentLength: number;
+}): string;

package/dist/types/prompts/index.d.ts CHANGED Viewed

@@ -1,3 +1,4 @@
 export type { SummaryLength } from '../shared/contracts.js';
-export { buildFileSummaryPrompt } from './file.js';
+export { buildPathSummaryPrompt } from './cli.js';
+export { buildFileSummaryPrompt, buildFileTextSummaryPrompt } from './file.js';
 export { buildLinkSummaryPrompt, estimateMaxCompletionTokensForCharacters, pickSummaryLengthForCharacters, type ShareContextEntry, SUMMARY_LENGTH_TO_TOKENS, type SummaryLengthTarget, } from './link-summary.js';

package/dist/types/refresh-free.d.ts ADDED Viewed

@@ -0,0 +1,19 @@
+type GenerateFreeOptions = {
+    runs: number;
+    smart: number;
+    maxCandidates: number;
+    concurrency: number;
+    timeoutMs: number;
+    minParamB: number;
+    maxAgeDays: number;
+    setDefault: boolean;
+};
+export declare function refreshFree({ env, fetchImpl, stdout, stderr, verbose, options, }: {
+    env: Record<string, string | undefined>;
+    fetchImpl: typeof fetch;
+    stdout: NodeJS.WritableStream;
+    stderr: NodeJS.WritableStream;
+    verbose?: boolean;
+    options?: Partial<GenerateFreeOptions>;
+}): Promise<void>;
+export {};

package/dist/types/run.d.ts CHANGED Viewed

@@ -1,8 +1,10 @@
+import { type ExecFileFn } from './markitdown.js';
 type RunEnv = {
     env: Record<string, string | undefined>;
     fetch: typeof fetch;
+    execFile?: ExecFileFn;
     stdout: NodeJS.WritableStream;
     stderr: NodeJS.WritableStream;
 };
-export declare function runCli(argv: string[], { env, fetch, stdout, stderr }: RunEnv): Promise<void>;
+export declare function runCli(argv: string[], { env, fetch, execFile: execFileOverride, stdout, stderr }: RunEnv): Promise<void>;
 export {};

package/dist/types/version.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
-export declare const FALLBACK_VERSION = "0.3.0";
+export declare const FALLBACK_VERSION = "0.5.0";
 export declare function resolvePackageVersion(importMetaUrl?: string): string;

package/docs/README.md CHANGED Viewed

@@ -4,7 +4,10 @@
 - `docs/youtube.md` — YouTube transcript extraction (youtubei / captionTracks / Apify)
 - `docs/firecrawl.md` — Firecrawl mode + API key
 - `docs/llm.md` — LLM summarization + model config (Gateway/OpenAI)
-- `docs/extract-only.md` — extraction mode (no LLM call)
+- `docs/cli.md` — CLI models (Claude/Codex/Gemini)
+- `docs/model-auto.md` — automatic model selection (`--model auto`)
+- `docs/manual-tests.md` — manual end-to-end test checklist
+- `docs/extract-only.md` — extract mode (no summary LLM call)
 ## Website

package/docs/cli.md ADDED Viewed

@@ -0,0 +1,95 @@
+# CLI models
+Summarize can use installed CLIs (Claude, Codex, Gemini) as local model backends.
+## Model ids
+- `cli/claude/<model>` (e.g. `cli/claude/sonnet`)
+- `cli/codex/<model>` (e.g. `cli/codex/gpt-5.2`)
+- `cli/gemini/<model>` (e.g. `cli/gemini/gemini-3-flash-preview`)
+Use `--cli [provider]` (case-insensitive) for the provider default, or `--model cli/<provider>/<model>` to pin a model.
+If `--cli` is provided without a provider, auto selection is used with CLI enabled.
+## Auto mode
+Auto mode does **not** use CLIs unless you set `cli.enabled` in config.
+Why: CLI adds ~4s latency per attempt and higher variance.
+Recommendation: enable only Gemini unless you have a reason to add others.
+Gemini CLI performance: summarize sets `GEMINI_CLI_NO_RELAUNCH=true` for Gemini CLI runs to avoid a costly self-relaunch (can be overridden by setting it yourself).
+When enabled, auto prepends CLI attempts in the order listed in `cli.enabled`
+(recommended: `["gemini"]`).
+Enable CLI attempts:
+```json
+{
+  "cli": { "enabled": ["gemini"] }
+}
+```
+Disable CLI attempts:
+```json
+{
+  "cli": { "enabled": [] }
+}
+```
+Note: when `cli.enabled` is set, it also acts as an allowlist for explicit `--cli` / `--model cli/...`.
+## CLI discovery
+Binary lookup:
+- `CLAUDE_PATH`, `CODEX_PATH`, `GEMINI_PATH` (optional overrides)
+- Otherwise uses `PATH`
+## Attachments (images/files)
+When a CLI attempt is used for an image or non-text file, Summarize switches to a
+path-based prompt and enables the required tool flags:
+- Claude: `--tools Read --dangerously-skip-permissions`
+- Gemini: `--yolo` and `--include-directories <dir>`
+- Codex: `codex exec --output-last-message ...` and `-i <image>` for images
+## Config
+```json
+{
+  "cli": {
+    "enabled": ["claude", "gemini", "codex"],
+    "codex": { "model": "gpt-5.2" },
+    "gemini": { "model": "gemini-3-flash-preview", "extraArgs": ["--verbose"] },
+    "claude": {
+      "model": "sonnet",
+      "binary": "/usr/local/bin/claude",
+      "extraArgs": ["--verbose"]
+    }
+  }
+}
+```
+Notes:
+- CLI output is treated as text only (no token accounting).
+- If a CLI call fails, auto mode falls back to the next candidate.
+## Generate free preset (OpenRouter)
+`summarize` ships with a built-in preset `free`, backed by OpenRouter `:free` models.
+To regenerate the candidate list (and persist it in your config):
+```bash
+summarize refresh-free
+```
+Options:
+- `--runs 2` (default): extra timing runs per selected model (total runs = 1 + runs)
+- `--smart 3` (default): number of “smart-first” picks (rest filled by fastest)
+- `--set-default`: also sets `"model": "free"` in `~/.summarize/config.json`

package/docs/config.md CHANGED Viewed

@@ -15,14 +15,136 @@ For `model`:
 1. CLI flag `--model`
 2. Env `SUMMARIZE_MODEL`
 3. Config file `model`
-4. Built-in default (`google/gemini-3-flash-preview`)
+4. Built-in default (`auto`)
 ## Format
 `~/.summarize/config.json`:
+```json
+{
+  "model": { "id": "google/gemini-3-flash-preview" }
+}
+```
+Shorthand (equivalent):
 ```json
 {
   "model": "google/gemini-3-flash-preview"
 }
 ```
+`model` can also be auto:
+```json
+{
+  "model": { "mode": "auto" }
+}
+```
+Shorthand (equivalent):
+```json
+{
+  "model": "auto"
+}
+```
+## Presets
+Define presets you can select via `--model <preset>`:
+```json
+{
+  "models": {
+    "fast": { "id": "openai/gpt-5-mini" },
+    "or-free": {
+      "rules": [
+        {
+          "candidates": [
+            "openrouter/google/gemini-2.0-flash-exp:free",
+            "openrouter/meta-llama/llama-3.3-70b-instruct:free"
+          ]
+        }
+      ]
+    }
+  }
+}
+```
+Notes:
+- `auto` is reserved and can’t be defined as a preset.
+- `free` is built-in (OpenRouter `:free` candidates). Override it by defining `models.free` in your config, or regenerate it via `summarize refresh-free`.
+Use a preset as your default `model`:
+```json
+{
+  "model": "fast"
+}
+```
+Notes:
+- For presets, `"mode": "auto"` is optional when `"rules"` is present.
+For auto selection with rules:
+```json
+{
+  "model": {
+    "mode": "auto",
+    "rules": [
+      {
+        "when": ["video"],
+        "candidates": ["google/gemini-3-flash-preview"]
+      },
+      {
+        "when": ["website", "youtube"],
+        "bands": [
+          {
+            "token": { "max": 8000 },
+            "candidates": ["openai/gpt-5-mini"]
+          },
+          {
+            "candidates": ["xai/grok-4-fast-non-reasoning"]
+          }
+        ]
+      },
+      {
+        "candidates": ["openai/gpt-5-mini", "openrouter/openai/gpt-5-mini"]
+      }
+    ]
+  },
+  "media": { "videoMode": "auto" }
+}
+```
+Notes:
+- Parsed leniently (JSON5), but **comments are not allowed**.
+- Unknown keys are ignored.
+- `model.rules` is optional. If omitted, built-in defaults apply.
+- `model.rules[].when` (optional) must be an array (e.g. `["video","youtube"]`).
+- `model.rules[]` must use either `candidates` or `bands`.
+## CLI config
+```json
+{
+  "cli": {
+    "enabled": ["gemini"],
+    "codex": { "model": "gpt-5.2" },
+    "claude": { "binary": "/usr/local/bin/claude", "extraArgs": ["--verbose"] }
+  }
+}
+```
+Notes:
+- `cli.enabled` is an allowlist (auto uses CLIs only when set; explicit `--cli` / `--model cli/...` must be included).
+- Recommendation: keep `cli.enabled` to `["gemini"]` unless you have a reason to add others (extra latency/variance).
+- `cli.<provider>.binary` overrides CLI binary discovery.
+- `cli.<provider>.extraArgs` appends extra CLI args.

package/docs/extract-only.md CHANGED Viewed

@@ -1,13 +1,16 @@
-# Extract-only mode
+# Extract mode
-`--extract-only` prints the extracted content and exits.
+`--extract` prints the extracted content and exits.
+Deprecated alias: `--extract-only`.
 ## Notes
 - No summarization LLM call happens in this mode.
-- `--markdown llm` / `--markdown auto` may still call the configured LLM for HTML → Markdown conversion.
+- `--format md` may still convert HTML → Markdown (depending on `--markdown-mode` and available tools).
 - `--length` is intended for summarization guidance; extraction prints full content.
-- For non-YouTube URLs, Firecrawl is only used when HTML extraction looks blocked/thin (or when forced with `--firecrawl always`).
-  - Force plain HTML extraction with `--firecrawl off`.
-- For non-YouTube URLs, `--markdown auto` can convert HTML → Markdown via an LLM when configured.
-  - Force it with `--markdown llm`.
+- For non-YouTube URLs with `--format md`, the CLI prefers Firecrawl Markdown by default when `FIRECRAWL_API_KEY` is configured (unless you set `--firecrawl` explicitly).
+  - Force plain HTML extraction with `--firecrawl off` (or use `--format text`).
+- For non-YouTube URLs with `--format md`, `--markdown-mode auto` can convert HTML → Markdown via an LLM when configured.
+  - Force it with `--markdown-mode llm`.
+  - If no LLM is configured, `--markdown-mode auto` may fall back to `uvx markitdown` when available.

package/docs/firecrawl.md CHANGED Viewed

@@ -8,9 +8,9 @@ Firecrawl is a fallback for sites that block direct HTML fetching or don’t ren
 - `auto` (default): use Firecrawl only when HTML extraction looks blocked/thin.
 - `always`: try Firecrawl first (falls back to HTML if Firecrawl is unavailable/empty).
-## Extract-only
+## Extract default
-Firecrawl is only used when HTML extraction looks blocked/thin (or when forced with `--firecrawl always`).
+When `--extract --format md` is used for non-YouTube URLs and `FIRECRAWL_API_KEY` is configured, the CLI defaults to `--firecrawl always` to return Markdown.
 ## API key

package/docs/llm.md CHANGED Viewed

@@ -1,37 +1,57 @@
 # LLM / summarization mode
-By default `summarize` will call an LLM using **direct provider API keys**.
+By default `summarize` will call an LLM using **direct provider API keys**. When CLI tools are
+installed, auto mode can use local CLI models when `cli.enabled` is set (see `docs/cli.md`).
 ## Defaults
-- Default model: `google/gemini-3-flash-preview`
+- Default model: `auto`
 - Override with `SUMMARIZE_MODEL`, config file (`model`), or `--model`.
 ## Env
+- `.env` (optional): when running the CLI, `summarize` also reads `.env` in the current working directory and merges it into the environment (real env vars win).
 - `XAI_API_KEY` (required for `xai/...` models)
 - `OPENAI_API_KEY` (required for `openai/...` models)
 - `OPENAI_BASE_URL` (optional; OpenAI-compatible API endpoint, e.g. OpenRouter)
-- `OPENROUTER_API_KEY` (optional; used when `OPENAI_BASE_URL` points to OpenRouter)
+- `OPENROUTER_API_KEY` (optional; required for `openrouter/...` models; also used when `OPENAI_BASE_URL` points to OpenRouter)
 - `GEMINI_API_KEY` (required for `google/...` models; also accepts `GOOGLE_GENERATIVE_AI_API_KEY` / `GOOGLE_API_KEY`)
 - `ANTHROPIC_API_KEY` (required for `anthropic/...` models)
 - `SUMMARIZE_MODEL` (optional; overrides default model selection)
+- `CLAUDE_PATH` / `CODEX_PATH` / `GEMINI_PATH` (optional; override CLI binary paths)
 ## Flags
 - `--model <model>`
   - Examples:
+    - `cli/codex/gpt-5.2`
+    - `cli/claude/sonnet`
+    - `cli/gemini/gemini-3-flash-preview`
     - `google/gemini-3-flash-preview`
-    - `openai/gpt-5.2`
+    - `openai/gpt-5-mini`
     - `xai/grok-4-fast-non-reasoning`
     - `google/gemini-2.0-flash`
     - `anthropic/claude-sonnet-4-5`
+    - `openrouter/meta-llama/llama-3.3-70b-instruct:free` (force OpenRouter)
+- `--cli [provider]`
+  - Examples: `--cli claude`, `--cli Gemini`, `--cli codex` (equivalent to `--model cli/<provider>`); `--cli` alone uses auto selection with CLI enabled.
+- `--model auto`
+  - See `docs/model-auto.md`
+- `--model <preset>`
+  - Uses a config-defined preset (see `docs/config.md` → “Presets”).
+- `--video-mode auto|transcript|understand`
+  - Only relevant for video inputs / video-only pages.
 - `--length short|medium|long|xl|xxl|<chars>`
   - This is *soft guidance* to the model (no hard truncation).
   - Minimum numeric value: 50 chars.
+  - Default: `long`.
 - `--max-output-tokens <count>`
   - Hard cap for output tokens (optional).
+  - If omitted, no max token parameter is sent (provider default).
   - Minimum numeric value: 16.
+  - Recommendation: prefer `--length` unless you need a hard cap (some providers count “reasoning” into the cap).
+- `--retries <count>`
+  - LLM retry attempts on timeout (default: 1).
 - `--json` (includes prompt + summary in one JSON object)
 ## Input limits

package/docs/manual-tests.md ADDED Viewed

@@ -0,0 +1,40 @@
+# Manual tests
+Goal: sanity-check auto selection + presets end-to-end.
+## Setup
+- `OPENAI_API_KEY=...` (optional)
+- `GEMINI_API_KEY=...` (optional)
+- `ANTHROPIC_API_KEY=...` (optional)
+- `XAI_API_KEY=...` (optional)
+- `OPENROUTER_API_KEY=...` (optional)
+Tip: use `--verbose` to see model attempts + the chosen model.
+## Auto (default)
+- Website summary (should pick a model, show it in spinner):
+  - `summarize --max-output-tokens 200 https://example.com`
+- No-model-needed shortcut (should print extracted text; no footer “no model needed”):
+  - `summarize --max-output-tokens 99999 https://example.com`
+- Missing-key skip (configure only one key; should skip other providers, still succeed):
+  - Set only `OPENAI_API_KEY`, then run a website summary; should not try Gemini/Anthropic/XAI.
+## Presets
+- Define a preset in `~/.summarize/config.json` (see `docs/config.md` → “Presets”), then:
+  - `summarize --model <preset> --max-output-tokens 200 https://example.com`
+  - If the preset contains OpenRouter models, ensure `OPENROUTER_API_KEY` is set.
+## Images
+- Local image (auto uses API models by default; enable CLI via `cli.enabled` to test CLIs):
+  - `summarize ./path/to/image.png --max-output-tokens 200`
+## Video
+- YouTube:
+  - `summarize https://www.youtube.com/watch?v=dQw4w9WgXcQ --max-output-tokens 200`
+- Local video understanding (requires Gemini video-capable model; otherwise expect an error or transcript-only behavior depending on input):
+  - `summarize ./path/to/video.mp4 --max-output-tokens 200`