npm - @steipete/summarize - Versions diffs - 0.3.0 → 0.4.0 - Mend

@steipete/summarize 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/CHANGELOG.md +10 -3
package/README.md +7 -3
package/dist/cli.cjs +451 -133
package/dist/cli.cjs.map +4 -4
package/dist/esm/flags.js +18 -1
package/dist/esm/flags.js.map +1 -1
package/dist/esm/markitdown.js +54 -0
package/dist/esm/markitdown.js.map +1 -0
package/dist/esm/prompts/file.js +19 -0
package/dist/esm/prompts/file.js.map +1 -1
package/dist/esm/prompts/index.js +1 -1
package/dist/esm/prompts/index.js.map +1 -1
package/dist/esm/run.js +262 -35
package/dist/esm/run.js.map +1 -1
package/dist/esm/version.js +1 -1
package/dist/types/flags.d.ts +4 -0
package/dist/types/markitdown.d.ts +10 -0
package/dist/types/prompts/file.d.ts +7 -0
package/dist/types/prompts/index.d.ts +1 -1
package/dist/types/run.d.ts +3 -1
package/dist/types/version.d.ts +1 -1
package/docs/README.md +1 -1
package/docs/extract-only.md +10 -7
package/docs/firecrawl.md +2 -2
package/docs/site/docs/config.html +3 -3
package/docs/site/docs/extract-only.html +7 -5
package/docs/site/docs/firecrawl.html +6 -6
package/docs/site/docs/index.html +2 -2
package/docs/site/docs/llm.html +2 -2
package/docs/site/docs/openai.html +2 -2
package/docs/site/docs/website.html +7 -4
package/docs/site/docs/youtube.html +2 -2
package/docs/site/index.html +1 -1
package/docs/website.md +10 -7
package/docs/youtube.md +1 -1
package/package.json +1 -1

package/dist/cli.cjs CHANGED Viewed

@@ -1196,8 +1196,8 @@ var require_command = __commonJS({
   "node_modules/.pnpm/commander@14.0.2/node_modules/commander/lib/command.js"(exports2) {
     var EventEmitter = require("node:events").EventEmitter;
     var childProcess = require("node:child_process");
-    var path7 = require("node:path");
-    var fs6 = require("node:fs");
+    var path8 = require("node:path");
+    var fs7 = require("node:fs");
     var process14 = require("node:process");
     var { Argument: Argument2, humanReadableArgName } = require_argument();
     var { CommanderError: CommanderError2 } = require_error();
@@ -2191,7 +2191,7 @@ Expecting one of '${allowedValues.join("', '")}'`);
        * @param {string} subcommandName
        */
       _checkForMissingExecutable(executableFile, executableDir, subcommandName) {
-        if (fs6.existsSync(executableFile)) return;
+        if (fs7.existsSync(executableFile)) return;
         const executableDirMessage = executableDir ? `searched for local subcommand relative to directory '${executableDir}'` : "no directory for search for local subcommand, use .executableDir() to supply a custom directory";
         const executableMissing = `'${executableFile}' does not exist
  - if '${subcommandName}' is not meant to be an executable command, remove description parameter from '.command()' and use '.description()' instead
@@ -2209,11 +2209,11 @@ Expecting one of '${allowedValues.join("', '")}'`);
         let launchWithNode = false;
         const sourceExt = [".js", ".ts", ".tsx", ".mjs", ".cjs"];
         function findFile(baseDir, baseName) {
-          const localBin = path7.resolve(baseDir, baseName);
-          if (fs6.existsSync(localBin)) return localBin;
-          if (sourceExt.includes(path7.extname(baseName))) return void 0;
+          const localBin = path8.resolve(baseDir, baseName);
+          if (fs7.existsSync(localBin)) return localBin;
+          if (sourceExt.includes(path8.extname(baseName))) return void 0;
           const foundExt = sourceExt.find(
-            (ext) => fs6.existsSync(`${localBin}${ext}`)
+            (ext) => fs7.existsSync(`${localBin}${ext}`)
           );
           if (foundExt) return `${localBin}${foundExt}`;
           return void 0;
@@ -2225,21 +2225,21 @@ Expecting one of '${allowedValues.join("', '")}'`);
         if (this._scriptPath) {
           let resolvedScriptPath;
           try {
-            resolvedScriptPath = fs6.realpathSync(this._scriptPath);
+            resolvedScriptPath = fs7.realpathSync(this._scriptPath);
           } catch {
             resolvedScriptPath = this._scriptPath;
           }
-          executableDir = path7.resolve(
-            path7.dirname(resolvedScriptPath),
+          executableDir = path8.resolve(
+            path8.dirname(resolvedScriptPath),
             executableDir
           );
         }
         if (executableDir) {
           let localFile = findFile(executableDir, executableFile);
           if (!localFile && !subcommand._executableFile && this._scriptPath) {
-            const legacyName = path7.basename(
+            const legacyName = path8.basename(
               this._scriptPath,
-              path7.extname(this._scriptPath)
+              path8.extname(this._scriptPath)
             );
             if (legacyName !== this._name) {
               localFile = findFile(
@@ -2250,7 +2250,7 @@ Expecting one of '${allowedValues.join("', '")}'`);
           }
           executableFile = localFile || executableFile;
         }
-        launchWithNode = sourceExt.includes(path7.extname(executableFile));
+        launchWithNode = sourceExt.includes(path8.extname(executableFile));
         let proc;
         if (process14.platform !== "win32") {
           if (launchWithNode) {
@@ -3165,7 +3165,7 @@ Expecting one of '${allowedValues.join("', '")}'`);
        * @return {Command}
        */
       nameFromFilename(filename) {
-        this._name = path7.basename(filename, path7.extname(filename));
+        this._name = path8.basename(filename, path8.extname(filename));
         return this;
       }
       /**
@@ -3179,9 +3179,9 @@ Expecting one of '${allowedValues.join("', '")}'`);
        * @param {string} [path]
        * @return {(string|null|Command)}
        */
-      executableDir(path8) {
-        if (path8 === void 0) return this._executableDir;
-        this._executableDir = path8;
+      executableDir(path9) {
+        if (path9 === void 0) return this._executableDir;
+        this._executableDir = path9;
         return this;
       }
       /**
@@ -4902,17 +4902,17 @@ var require_request = __commonJS({
     function buildUrl(id, options = {}) {
       var _a18, _b16;
       const method = ((_a18 = options.method) !== null && _a18 !== void 0 ? _a18 : "post").toLowerCase();
-      const path7 = ((_b16 = options.path) !== null && _b16 !== void 0 ? _b16 : "").replace(/^\//, "").replace(/\/{2,}/, "/");
+      const path8 = ((_b16 = options.path) !== null && _b16 !== void 0 ? _b16 : "").replace(/^\//, "").replace(/\/{2,}/, "/");
       const input = options.input;
       const params = Object.assign(Object.assign({}, options.query || {}), method === "get" ? input : {});
       const queryParams = Object.keys(params).length > 0 ? `?${new URLSearchParams(params).toString()}` : "";
       if ((0, utils_1.isValidUrl)(id)) {
         const url3 = id.endsWith("/") ? id : `${id}/`;
-        return `${url3}${path7}${queryParams}`;
+        return `${url3}${path8}${queryParams}`;
       }
       const appId = (0, utils_1.ensureEndpointIdFormat)(id);
       const subdomain = options.subdomain ? `${options.subdomain}.` : "";
-      const url2 = `https://${subdomain}fal.run/${appId}/${path7}`;
+      const url2 = `https://${subdomain}fal.run/${appId}/${path8}`;
       return `${url2.replace(/\/$/, "")}${queryParams}`;
     }
   }
@@ -8709,10 +8709,10 @@ function mergeDefs(...defs) {
 function cloneDef(schema) {
   return mergeDefs(schema._zod.def);
 }
-function getElementAtPath(obj, path7) {
-  if (!path7)
+function getElementAtPath(obj, path8) {
+  if (!path8)
     return obj;
-  return path7.reduce((acc, key) => acc?.[key], obj);
+  return path8.reduce((acc, key) => acc?.[key], obj);
 }
 function promiseAllObject(promisesObj) {
   const keys = Object.keys(promisesObj);
@@ -9007,11 +9007,11 @@ function aborted(x, startIndex = 0) {
   }
   return false;
 }
-function prefixIssues(path7, issues) {
+function prefixIssues(path8, issues) {
   return issues.map((iss) => {
     var _a18;
     (_a18 = iss).path ?? (_a18.path = []);
-    iss.path.unshift(path7);
+    iss.path.unshift(path8);
     return iss;
   });
 }
@@ -9232,7 +9232,7 @@ function formatError(error47, mapper = (issue2) => issue2.message) {
 }
 function treeifyError(error47, mapper = (issue2) => issue2.message) {
   const result = { errors: [] };
-  const processError = (error48, path7 = []) => {
+  const processError = (error48, path8 = []) => {
     var _a18, _b16;
     for (const issue2 of error48.issues) {
       if (issue2.code === "invalid_union" && issue2.errors.length) {
@@ -9242,7 +9242,7 @@ function treeifyError(error47, mapper = (issue2) => issue2.message) {
       } else if (issue2.code === "invalid_element") {
         processError({ issues: issue2.issues }, issue2.path);
       } else {
-        const fullpath = [...path7, ...issue2.path];
+        const fullpath = [...path8, ...issue2.path];
         if (fullpath.length === 0) {
           result.errors.push(mapper(issue2));
           continue;
@@ -9274,8 +9274,8 @@ function treeifyError(error47, mapper = (issue2) => issue2.message) {
 }
 function toDotPath(_path) {
   const segs = [];
-  const path7 = _path.map((seg) => typeof seg === "object" ? seg.key : seg);
-  for (const seg of path7) {
+  const path8 = _path.map((seg) => typeof seg === "object" ? seg.key : seg);
+  for (const seg of path8) {
     if (typeof seg === "number")
       segs.push(`[${seg}]`);
     else if (typeof seg === "symbol")
@@ -21848,13 +21848,13 @@ function resolveRef(ref, ctx) {
   if (!ref.startsWith("#")) {
     throw new Error("External $ref is not supported, only local refs (#/...) are allowed");
   }
-  const path7 = ref.slice(1).split("/").filter(Boolean);
-  if (path7.length === 0) {
+  const path8 = ref.slice(1).split("/").filter(Boolean);
+  if (path8.length === 0) {
     return ctx.rootSchema;
   }
   const defsKey = ctx.version === "draft-2020-12" ? "$defs" : "definitions";
-  if (path7[0] === defsKey) {
-    const key = path7[1];
+  if (path8[0] === defsKey) {
+    const key = path8[1];
     if (!key || !ctx.defs[key]) {
       throw new Error(`Reference not found: ${ref}`);
     }
@@ -22930,8 +22930,8 @@ var init_parseUtil = __esm({
     init_errors3();
     init_en2();
     makeIssue = (params) => {
-      const { data, path: path7, errorMaps, issueData } = params;
-      const fullPath = [...path7, ...issueData.path || []];
+      const { data, path: path8, errorMaps, issueData } = params;
+      const fullPath = [...path8, ...issueData.path || []];
       const fullIssue = {
         ...issueData,
         path: fullPath
@@ -23211,11 +23211,11 @@ var init_types = __esm({
     init_parseUtil();
     init_util2();
     ParseInputLazyPath = class {
-      constructor(parent, value, path7, key) {
+      constructor(parent, value, path8, key) {
         this._cachedPath = [];
         this.parent = parent;
         this.data = value;
-        this._path = path7;
+        this._path = path8;
         this._key = key;
       }
       get path() {
@@ -28921,8 +28921,8 @@ var require_token_util = __commonJS({
       saveToken: () => saveToken
     });
     module2.exports = __toCommonJS(token_util_exports);
-    var path7 = __toESM2(require("path"));
-    var fs6 = __toESM2(require("fs"));
+    var path8 = __toESM2(require("path"));
+    var fs7 = __toESM2(require("fs"));
     var import_token_error = require_token_error();
     var import_token_io = require_token_io();
     function getVercelDataDir() {
@@ -28931,18 +28931,18 @@ var require_token_util = __commonJS({
       if (!dataDir) {
         return null;
       }
-      return path7.join(dataDir, vercelFolder);
+      return path8.join(dataDir, vercelFolder);
     }
     function getVercelCliToken() {
       const dataDir = getVercelDataDir();
       if (!dataDir) {
         return null;
       }
-      const tokenPath = path7.join(dataDir, "auth.json");
-      if (!fs6.existsSync(tokenPath)) {
+      const tokenPath = path8.join(dataDir, "auth.json");
+      if (!fs7.existsSync(tokenPath)) {
         return null;
       }
-      const token = fs6.readFileSync(tokenPath, "utf8");
+      const token = fs7.readFileSync(tokenPath, "utf8");
       if (!token) {
         return null;
       }
@@ -28983,11 +28983,11 @@ var require_token_util = __commonJS({
         throw new import_token_error.VercelOidcTokenError("Unable to find root directory");
       }
       try {
-        const prjPath = path7.join(dir, ".vercel", "project.json");
-        if (!fs6.existsSync(prjPath)) {
+        const prjPath = path8.join(dir, ".vercel", "project.json");
+        if (!fs7.existsSync(prjPath)) {
           throw new import_token_error.VercelOidcTokenError("project.json not found");
         }
-        const prj = JSON.parse(fs6.readFileSync(prjPath, "utf8"));
+        const prj = JSON.parse(fs7.readFileSync(prjPath, "utf8"));
         if (typeof prj.projectId !== "string" && typeof prj.orgId !== "string") {
           throw new TypeError("Expected a string-valued projectId property");
         }
@@ -29002,11 +29002,11 @@ var require_token_util = __commonJS({
         if (!dir) {
           throw new import_token_error.VercelOidcTokenError("Unable to find user data directory");
         }
-        const tokenPath = path7.join(dir, "com.vercel.token", `${projectId}.json`);
+        const tokenPath = path8.join(dir, "com.vercel.token", `${projectId}.json`);
         const tokenJson = JSON.stringify(token);
-        fs6.mkdirSync(path7.dirname(tokenPath), { mode: 504, recursive: true });
-        fs6.writeFileSync(tokenPath, tokenJson);
-        fs6.chmodSync(tokenPath, 432);
+        fs7.mkdirSync(path8.dirname(tokenPath), { mode: 504, recursive: true });
+        fs7.writeFileSync(tokenPath, tokenJson);
+        fs7.chmodSync(tokenPath, 432);
         return;
       } catch (e) {
         throw new import_token_error.VercelOidcTokenError(`Failed to save token`, e);
@@ -29018,11 +29018,11 @@ var require_token_util = __commonJS({
         if (!dir) {
           return null;
         }
-        const tokenPath = path7.join(dir, "com.vercel.token", `${projectId}.json`);
-        if (!fs6.existsSync(tokenPath)) {
+        const tokenPath = path8.join(dir, "com.vercel.token", `${projectId}.json`);
+        if (!fs7.existsSync(tokenPath)) {
           return null;
         }
-        const token = JSON.parse(fs6.readFileSync(tokenPath, "utf8"));
+        const token = JSON.parse(fs7.readFileSync(tokenPath, "utf8"));
         assertVercelOidcTokenResponse(token);
         return token;
       } catch (e) {
@@ -42982,7 +42982,7 @@ function createXai(options = {}) {
   const createImageModel = (modelId) => {
     return new OpenAICompatibleImageModel(modelId, {
       provider: "xai.image",
-      url: ({ path: path7 }) => `${baseURL}${path7}`,
+      url: ({ path: path8 }) => `${baseURL}${path8}`,
       headers: getHeaders,
       fetch: options.fetch,
       errorStructure: xaiErrorStructure
@@ -51227,37 +51227,37 @@ function createOpenAI(options = {}) {
   );
   const createChatModel = (modelId) => new OpenAIChatLanguageModel(modelId, {
     provider: `${providerName}.chat`,
-    url: ({ path: path7 }) => `${baseURL}${path7}`,
+    url: ({ path: path8 }) => `${baseURL}${path8}`,
     headers: getHeaders,
     fetch: options.fetch
   });
   const createCompletionModel = (modelId) => new OpenAICompletionLanguageModel(modelId, {
     provider: `${providerName}.completion`,
-    url: ({ path: path7 }) => `${baseURL}${path7}`,
+    url: ({ path: path8 }) => `${baseURL}${path8}`,
     headers: getHeaders,
     fetch: options.fetch
   });
   const createEmbeddingModel = (modelId) => new OpenAIEmbeddingModel(modelId, {
     provider: `${providerName}.embedding`,
-    url: ({ path: path7 }) => `${baseURL}${path7}`,
+    url: ({ path: path8 }) => `${baseURL}${path8}`,
     headers: getHeaders,
     fetch: options.fetch
   });
   const createImageModel = (modelId) => new OpenAIImageModel(modelId, {
     provider: `${providerName}.image`,
-    url: ({ path: path7 }) => `${baseURL}${path7}`,
+    url: ({ path: path8 }) => `${baseURL}${path8}`,
     headers: getHeaders,
     fetch: options.fetch
   });
   const createTranscriptionModel = (modelId) => new OpenAITranscriptionModel(modelId, {
     provider: `${providerName}.transcription`,
-    url: ({ path: path7 }) => `${baseURL}${path7}`,
+    url: ({ path: path8 }) => `${baseURL}${path8}`,
     headers: getHeaders,
     fetch: options.fetch
   });
   const createSpeechModel = (modelId) => new OpenAISpeechModel(modelId, {
     provider: `${providerName}.speech`,
-    url: ({ path: path7 }) => `${baseURL}${path7}`,
+    url: ({ path: path8 }) => `${baseURL}${path8}`,
     headers: getHeaders,
     fetch: options.fetch
   });
@@ -51272,7 +51272,7 @@ function createOpenAI(options = {}) {
   const createResponsesModel = (modelId) => {
     return new OpenAIResponsesLanguageModel(modelId, {
       provider: `${providerName}.responses`,
-      url: ({ path: path7 }) => `${baseURL}${path7}`,
+      url: ({ path: path8 }) => `${baseURL}${path8}`,
       headers: getHeaders,
       fetch: options.fetch,
       fileIdPrefixes: ["file-"]
@@ -55600,8 +55600,8 @@ var init_dist11 = __esm({
 // src/run.ts
 var import_node_child_process2 = require("node:child_process");
 var import_node_fs6 = require("node:fs");
-var import_promises4 = __toESM(require("node:fs/promises"), 1);
-var import_node_path7 = __toESM(require("node:path"), 1);
+var import_promises5 = __toESM(require("node:fs/promises"), 1);
+var import_node_path8 = __toESM(require("node:path"), 1);
 // node_modules/.pnpm/commander@14.0.2/node_modules/commander/esm.mjs
 var import_index = __toESM(require_commander(), 1);
@@ -64632,7 +64632,7 @@ function transformGfmAutolinkLiterals(tree) {
     { ignore: ["link", "linkReference"] }
   );
 }
-function findUrl(_, protocol, domain3, path7, match) {
+function findUrl(_, protocol, domain3, path8, match) {
   let prefix = "";
   if (!previous2(match)) {
     return false;
@@ -64645,7 +64645,7 @@ function findUrl(_, protocol, domain3, path7, match) {
   if (!isCorrectDomain(domain3)) {
     return false;
   }
-  const parts = splitUrl(domain3 + path7);
+  const parts = splitUrl(domain3 + path8);
   if (!parts[0]) return false;
   const result = {
     type: "link",
@@ -67598,25 +67598,25 @@ function isRecord2(value) {
 function loadSummarizeConfig({ env: env3 }) {
   const home = env3.HOME?.trim() || (0, import_node_os3.homedir)();
   if (!home) return { config: null, path: null };
-  const path7 = (0, import_node_path.join)(home, ".summarize", "config.json");
+  const path8 = (0, import_node_path.join)(home, ".summarize", "config.json");
   let raw;
   try {
-    raw = (0, import_node_fs.readFileSync)(path7, "utf8");
+    raw = (0, import_node_fs.readFileSync)(path8, "utf8");
   } catch {
-    return { config: null, path: path7 };
+    return { config: null, path: path8 };
   }
   let parsed;
   try {
     parsed = JSON.parse(raw);
   } catch (error47) {
     const message = error47 instanceof Error ? error47.message : String(error47);
-    throw new Error(`Invalid JSON in config file ${path7}: ${message}`);
+    throw new Error(`Invalid JSON in config file ${path8}: ${message}`);
   }
   if (!isRecord2(parsed)) {
-    throw new Error(`Invalid config file ${path7}: expected an object at the top level`);
+    throw new Error(`Invalid config file ${path8}: expected an object at the top level`);
   }
   const model = typeof parsed.model === "string" ? parsed.model : void 0;
-  return { config: { model }, path: path7 };
+  return { config: { model }, path: path8 };
 }
 // src/content/asset.ts
@@ -71757,12 +71757,12 @@ var Mime = class {
     }
     return this;
   }
-  getType(path7) {
-    if (typeof path7 !== "string")
+  getType(path8) {
+    if (typeof path8 !== "string")
       return null;
-    const last = path7.replace(/^.*[/\\]/s, "").toLowerCase();
+    const last = path8.replace(/^.*[/\\]/s, "").toLowerCase();
     const ext = last.replace(/^.*\./s, "").toLowerCase();
-    const hasPath = last.length < path7.length;
+    const hasPath = last.length < path8.length;
     const hasDot = ext.length < last.length - 1;
     if (!hasDot && hasPath)
       return null;
@@ -72458,9 +72458,9 @@ var fetchTranscriptFromTranscriptEndpoint = async (fetchImpl, {
     return null;
   }
 };
-function getNestedProperty(object3, path7) {
+function getNestedProperty(object3, path8) {
   let current = object3;
-  for (const key of path7) {
+  for (const key of path8) {
     if (!(isRecord3(current) && key in current)) {
       return null;
     }
@@ -72468,8 +72468,8 @@ function getNestedProperty(object3, path7) {
   }
   return current;
 }
-function getArrayProperty(object3, path7) {
-  const value = getNestedProperty(object3, path7);
+function getArrayProperty(object3, path8) {
+  const value = getNestedProperty(object3, path8);
   return Array.isArray(value) ? value : null;
 }
 var extractTranscriptFromTranscriptEndpoint = (data) => {
@@ -74662,7 +74662,21 @@ function parseFirecrawlMode(raw) {
 function parseMarkdownMode(raw) {
   const normalized = raw.trim().toLowerCase();
   if (normalized === "off" || normalized === "auto" || normalized === "llm") return normalized;
-  throw new Error(`Unsupported --markdown: ${raw}`);
+  throw new Error(`Unsupported --markdown-mode: ${raw}`);
+}
+function parseExtractFormat(raw) {
+  const normalized = raw.trim().toLowerCase();
+  if (normalized === "text" || normalized === "txt" || normalized === "plain") return "text";
+  if (normalized === "md" || normalized === "markdown") return "markdown";
+  throw new Error(`Unsupported --format: ${raw}`);
+}
+function parsePreprocessMode(raw) {
+  const normalized = raw.trim().toLowerCase();
+  if (normalized === "off" || normalized === "auto" || normalized === "always") {
+    return normalized;
+  }
+  if (normalized === "on") return "always";
+  throw new Error(`Unsupported --preprocess: ${raw}`);
 }
 function parseStreamMode(raw) {
   const normalized = raw.trim().toLowerCase();
@@ -75336,37 +75350,102 @@ function createHtmlToMarkdownConverter({
   };
 }
+// src/markitdown.ts
+var import_promises3 = __toESM(require("node:fs/promises"), 1);
+var import_node_os5 = require("node:os");
+var import_node_path4 = __toESM(require("node:path"), 1);
+function guessExtension({
+  filenameHint,
+  mediaType
+}) {
+  const ext = filenameHint ? import_node_path4.default.extname(filenameHint).toLowerCase() : "";
+  if (ext) return ext;
+  if (mediaType === "text/html" || mediaType === "application/xhtml+xml") return ".html";
+  if (mediaType === "application/pdf") return ".pdf";
+  return ".bin";
+}
+async function execFileText(execFileImpl, cmd, args, options) {
+  return await new Promise((resolve2, reject) => {
+    execFileImpl(cmd, args, options, (error47, stdout, stderr) => {
+      if (error47) {
+        const stderrText2 = typeof stderr === "string" ? stderr : stderr.toString("utf8");
+        const message = stderrText2.trim() ? `${error47.message}: ${stderrText2.trim()}` : error47.message;
+        reject(new Error(message, { cause: error47 }));
+        return;
+      }
+      const stdoutText = typeof stdout === "string" ? stdout : stdout.toString("utf8");
+      const stderrText = typeof stderr === "string" ? stderr : stderr.toString("utf8");
+      resolve2({ stdout: stdoutText, stderr: stderrText });
+    });
+  });
+}
+async function convertToMarkdownWithMarkitdown({
+  bytes,
+  filenameHint,
+  mediaTypeHint,
+  uvxCommand,
+  timeoutMs,
+  env: env3,
+  execFileImpl
+}) {
+  const dir = await import_promises3.default.mkdtemp(import_node_path4.default.join((0, import_node_os5.tmpdir)(), "summarize-markitdown-"));
+  const ext = guessExtension({ filenameHint, mediaType: mediaTypeHint });
+  const base2 = (filenameHint ? import_node_path4.default.basename(filenameHint, import_node_path4.default.extname(filenameHint)) : "input").replaceAll(/[^\w.-]+/g, "-").slice(0, 64);
+  const filePath = import_node_path4.default.join(dir, `${base2}${ext}`);
+  try {
+    await import_promises3.default.writeFile(filePath, bytes);
+    const from = "markitdown[all]";
+    const { stdout } = await execFileText(
+      execFileImpl,
+      uvxCommand && uvxCommand.trim().length > 0 ? uvxCommand.trim() : "uvx",
+      ["--from", from, "markitdown", filePath],
+      {
+        timeout: timeoutMs,
+        env: { ...process.env, ...env3 },
+        maxBuffer: 50 * 1024 * 1024
+      }
+    );
+    const markdown = stdout.trim();
+    if (!markdown) {
+      throw new Error("markitdown returned empty output");
+    }
+    return markdown;
+  } finally {
+    await import_promises3.default.rm(dir, { recursive: true, force: true });
+  }
+}
 // src/pricing/litellm.ts
-var import_node_path5 = __toESM(require("node:path"), 1);
+var import_node_path6 = __toESM(require("node:path"), 1);
 // node_modules/.pnpm/tokentally@https+++codeload.github.com+steipete+tokentally+tar.gz+99865e5c16f5340c9589a2c5d85c3ea47dbcec82/node_modules/tokentally/dist/node/litellm.js
 var import_node_fs4 = require("node:fs");
-var import_promises3 = __toESM(require("node:fs/promises"), 1);
-var import_node_path4 = __toESM(require("node:path"), 1);
+var import_promises4 = __toESM(require("node:fs/promises"), 1);
+var import_node_path5 = __toESM(require("node:path"), 1);
 var LITELLM_CATALOG_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json";
 var CACHE_TTL_MS = 7 * 24 * 60 * 60 * 1e3;
 function cachePaths(env3) {
   const override = env3.TOKENTALLY_CACHE_DIR?.trim();
   const home = env3.HOME?.trim();
-  const cacheDir = override && override.length > 0 ? override : home ? import_node_path4.default.join(home, ".tokentally", "cache") : null;
+  const cacheDir = override && override.length > 0 ? override : home ? import_node_path5.default.join(home, ".tokentally", "cache") : null;
   if (!cacheDir)
     return null;
   return {
-    catalogPath: import_node_path4.default.join(cacheDir, "litellm-model_prices_and_context_window.json"),
-    metaPath: import_node_path4.default.join(cacheDir, "litellm-model_prices_and_context_window.meta.json")
+    catalogPath: import_node_path5.default.join(cacheDir, "litellm-model_prices_and_context_window.json"),
+    metaPath: import_node_path5.default.join(cacheDir, "litellm-model_prices_and_context_window.meta.json")
   };
 }
 async function readJsonFile(filePath) {
   try {
-    const raw = await import_promises3.default.readFile(filePath, "utf8");
+    const raw = await import_promises4.default.readFile(filePath, "utf8");
     return JSON.parse(raw);
   } catch {
     return null;
   }
 }
 async function writeJsonFile(filePath, value) {
-  await import_promises3.default.mkdir(import_node_path4.default.dirname(filePath), { recursive: true });
-  await import_promises3.default.writeFile(filePath, `${JSON.stringify(value, null, 2)}
+  await import_promises4.default.mkdir(import_node_path5.default.dirname(filePath), { recursive: true });
+  await import_promises4.default.writeFile(filePath, `${JSON.stringify(value, null, 2)}
 `, "utf8");
 }
 function isStale(meta3, nowMs) {
@@ -75509,7 +75588,7 @@ function withDefaultCacheDir(env3) {
   }
   const home = env3.HOME?.trim();
   if (!home) return env3;
-  return { ...env3, TOKENTALLY_CACHE_DIR: import_node_path5.default.join(home, ".summarize", "cache") };
+  return { ...env3, TOKENTALLY_CACHE_DIR: import_node_path6.default.join(home, ".summarize", "cache") };
 }
 async function loadLiteLlmCatalog2({
   env: env3,
@@ -75554,6 +75633,27 @@ ${headerLines.length > 0 ? `${headerLines.join("\n")}
 ` : ""}Return only the summary.`;
   return prompt;
 }
+function buildFileTextSummaryPrompt({
+  filename,
+  originalMediaType,
+  contentMediaType,
+  summaryLength,
+  contentLength
+}) {
+  const effectiveSummaryLength = typeof summaryLength === "string" ? summaryLength : summaryLength.maxCharacters > contentLength ? { maxCharacters: contentLength } : summaryLength;
+  const maxCharactersLine = typeof effectiveSummaryLength === "string" ? "" : `Target length: around ${effectiveSummaryLength.maxCharacters.toLocaleString()} characters total (including Markdown and whitespace). This is a soft guideline; prioritize clarity.`;
+  const headerLines = [
+    filename ? `Filename: ${filename}` : null,
+    originalMediaType ? `Original media type: ${originalMediaType}` : null,
+    `Provided as: ${contentMediaType}`,
+    `Extracted content length: ${contentLength.toLocaleString()} characters. Do not exceed the extracted content length; if the requested length is larger, keep the summary at or below the extracted content length and do not add details.`
+  ].filter(Boolean);
+  return `You summarize files for curious users. Summarize the file content below. Be factual and do not invent details. Format the answer in Markdown. Do not use emojis. ${maxCharactersLine}
+${headerLines.length > 0 ? `${headerLines.join("\n")}
+` : ""}Return only the summary.`;
+}
 // src/prompts/link-summary.ts
 var SUMMARY_LENGTH_DIRECTIVES = {
@@ -78283,16 +78383,16 @@ function startSpinner({
 // src/version.ts
 var import_node_fs5 = __toESM(require("node:fs"), 1);
-var import_node_path6 = __toESM(require("node:path"), 1);
+var import_node_path7 = __toESM(require("node:path"), 1);
 var import_node_url = require("node:url");
-var FALLBACK_VERSION = "0.3.0";
+var FALLBACK_VERSION = "0.4.0";
 function resolvePackageVersion(importMetaUrl) {
   const injected = typeof process !== "undefined" && typeof process.env.SUMMARIZE_VERSION === "string" ? process.env.SUMMARIZE_VERSION.trim() : "";
   if (injected.length > 0) return injected;
   const startDir = (() => {
     if (typeof importMetaUrl === "string" && importMetaUrl.trim().length > 0) {
       try {
-        return import_node_path6.default.dirname((0, import_node_url.fileURLToPath)(importMetaUrl));
+        return import_node_path7.default.dirname((0, import_node_url.fileURLToPath)(importMetaUrl));
       } catch {
       }
     }
@@ -78301,7 +78401,7 @@ function resolvePackageVersion(importMetaUrl) {
   })();
   let dir = startDir;
   for (let i = 0; i < 10; i += 1) {
-    const candidate = import_node_path6.default.join(dir, "package.json");
+    const candidate = import_node_path7.default.join(dir, "package.json");
     try {
       const raw = import_node_fs5.default.readFileSync(candidate, "utf8");
       const json3 = JSON.parse(raw);
@@ -78310,7 +78410,7 @@ function resolvePackageVersion(importMetaUrl) {
       }
     } catch {
     }
-    const parent = import_node_path6.default.dirname(dir);
+    const parent = import_node_path7.default.dirname(dir);
     if (parent === dir) break;
     dir = parent;
   }
@@ -78319,6 +78419,7 @@ function resolvePackageVersion(importMetaUrl) {
 // src/run.ts
 var BIRD_TIP = "Tip: Install bird\u{1F426} for better Twitter support: https://github.com/steipete/bird";
+var UVX_TIP = "Tip: Install uv (uvx) for local Markdown conversion: brew install uv (or set UVX_PATH to your uvx binary).";
 var TWITTER_HOSTS2 = /* @__PURE__ */ new Set(["x.com", "twitter.com", "mobile.twitter.com"]);
 var SUMMARY_LENGTH_MAX_CHARACTERS = {
   short: 1200,
@@ -78350,10 +78451,22 @@ function isExecutable(filePath) {
 }
 function hasBirdCli(env3) {
   const candidates = [];
-  const pathEnv = env3.PATH ?? process.env.PATH ?? "";
-  for (const entry of pathEnv.split(import_node_path7.default.delimiter)) {
+  const pathEnv = env3.PATH ?? "";
+  for (const entry of pathEnv.split(import_node_path8.default.delimiter)) {
     if (!entry) continue;
-    candidates.push(import_node_path7.default.join(entry, "bird"));
+    candidates.push(import_node_path8.default.join(entry, "bird"));
+  }
+  return candidates.some((candidate) => isExecutable(candidate));
+}
+function hasUvxCli(env3) {
+  if (typeof env3.UVX_PATH === "string" && env3.UVX_PATH.trim().length > 0) {
+    return true;
+  }
+  const candidates = [];
+  const pathEnv = env3.PATH ?? "";
+  for (const entry of pathEnv.split(import_node_path8.default.delimiter)) {
+    if (!entry) continue;
+    candidates.push(import_node_path8.default.join(entry, "uvx"));
   }
   return candidates.some((candidate) => isExecutable(candidate));
 }
@@ -78404,6 +78517,15 @@ function withBirdTip(error47, url2, env3) {
 ${BIRD_TIP}`;
   return error47 instanceof Error ? new Error(combined, { cause: error47 }) : new Error(combined);
 }
+function withUvxTip(error47, env3) {
+  if (hasUvxCli(env3)) {
+    return error47 instanceof Error ? error47 : new Error(String(error47));
+  }
+  const message = error47 instanceof Error ? error47.message : String(error47);
+  const combined = `${message}
+${UVX_TIP}`;
+  return error47 instanceof Error ? new Error(combined, { cause: error47 }) : new Error(combined);
+}
 var MAX_TEXT_BYTES_DEFAULT = 10 * 1024 * 1024;
 function buildProgram() {
   return new Command().name("summarize").description("Summarize web pages and YouTube links (uses direct provider API keys).").argument("[input]", "URL or local file path to summarize").option(
@@ -78412,12 +78534,27 @@ function buildProgram() {
     "auto"
   ).option(
     "--firecrawl <mode>",
-    "Firecrawl usage: off, auto (fallback), always (try Firecrawl first).",
+    "Firecrawl usage: off, auto (fallback), always (try Firecrawl first). Note: in --format md website mode, defaults to always when FIRECRAWL_API_KEY is set (unless --firecrawl is set explicitly).",
     "auto"
   ).option(
-    "--markdown <mode>",
-    "Website Markdown output: off, auto (use LLM when configured), llm (force LLM). Only affects --extract-only for non-YouTube URLs.",
-    "auto"
+    "--format <format>",
+    "Website/file content format: md|text. For websites: controls the extraction format. For files: controls whether we try to preprocess to Markdown for model compatibility. (default: text)",
+    "text"
+  ).addOption(
+    new Option(
+      "--preprocess <mode>",
+      "Preprocess inputs for model compatibility: off, auto (fallback), always."
+    ).choices(["off", "auto", "always"]).default("auto")
+  ).addOption(
+    new Option(
+      "--markdown-mode <mode>",
+      "HTML\u2192Markdown conversion: off, auto (prefer Firecrawl when configured, then LLM when configured, then markitdown when available), llm (force LLM). Only affects --format md for non-YouTube URLs."
+    ).default("auto")
+  ).addOption(
+    new Option(
+      "--markdown <mode>",
+      "Deprecated alias for --markdown-mode (use --extract --format md --markdown-mode ...)"
+    ).hideHelp()
   ).option(
     "--length <length>",
     "Summary length: short|medium|long|xl|xxl or a character limit like 20000, 20k",
@@ -78434,7 +78571,7 @@ function buildProgram() {
     "--model <model>",
     "LLM model id (gateway-style): xai/..., openai/..., google/... (default: google/gemini-3-flash-preview)",
     void 0
-  ).option("--extract-only", "Print extracted content and exit (no LLM summary)", false).option("--json", "Output structured JSON (includes prompt + metrics)", false).option(
+  ).option("--extract", "Print extracted content and exit (no LLM summary)", false).addOption(new Option("--extract-only", "Deprecated alias for --extract").hideHelp()).option("--json", "Output structured JSON (includes prompt + metrics)", false).option(
     "--stream <mode>",
     "Stream LLM output: auto (TTY only), on, off. Note: streaming is disabled in --json mode.",
     "auto"
@@ -78550,6 +78687,22 @@ function getTextContentFromAttachment(attachment) {
   }
   return { content: "", bytes: 0 };
 }
+function getFileBytesFromAttachment(attachment) {
+  if (attachment.part.type !== "file") return null;
+  const data = attachment.part.data;
+  return data instanceof Uint8Array ? data : null;
+}
+function shouldMarkitdownConvertMediaType(mediaType) {
+  const mt = mediaType.toLowerCase();
+  if (mt === "application/pdf") return true;
+  if (mt === "application/rtf") return true;
+  if (mt === "text/html" || mt === "application/xhtml+xml") return true;
+  if (mt === "application/msword") return true;
+  if (mt.startsWith("application/vnd.openxmlformats-officedocument.")) return true;
+  if (mt === "application/vnd.ms-excel") return true;
+  if (mt === "application/vnd.ms-powerpoint") return true;
+  return false;
+}
 function assertProviderSupportsAttachment({
   provider,
   modelId,
@@ -78613,9 +78766,10 @@ function attachRichHelp(program2, env3, stdout) {
     () => `
 ${heading("Examples")}
   ${cmd('summarize "https://example.com"')}
-  ${cmd('summarize "https://example.com" --extract-only')} ${dim3("# website markdown (LLM if configured)")}
-  ${cmd('summarize "https://example.com" --extract-only --markdown llm')} ${dim3("# website markdown via LLM")}
-  ${cmd('summarize "https://www.youtube.com/watch?v=I845O57ZSy4&t=11s" --extract-only --youtube web')}
+  ${cmd('summarize "https://example.com" --extract')} ${dim3("# extracted plain text")}
+  ${cmd('summarize "https://example.com" --extract --format md')} ${dim3("# extracted markdown (prefers Firecrawl when configured)")}
+  ${cmd('summarize "https://example.com" --extract --format md --markdown-mode llm')} ${dim3("# extracted markdown via LLM")}
+  ${cmd('summarize "https://www.youtube.com/watch?v=I845O57ZSy4&t=11s" --extract --youtube web')}
   ${cmd('summarize "https://example.com" --length 20k --max-output-tokens 2k --timeout 2m --model openai/gpt-5.2')}
   ${cmd('OPENROUTER_API_KEY=... summarize "https://example.com" --model openai/openai/gpt-oss-20b')}
   ${cmd('summarize "https://example.com" --json --verbose')}
@@ -78756,10 +78910,11 @@ function writeFinishLine({
   stderr.write(`${ansi("1;32", line, color2)}
 `);
 }
-async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
+async function runCli(argv, { env: env3, fetch: fetch2, execFile: execFileOverride, stdout, stderr }) {
   ;
   globalThis.AI_SDK_LOG_WARNINGS = false;
   const normalizedArgv = argv.filter((arg) => arg !== "--");
+  const execFileImpl = execFileOverride ?? import_node_child_process2.execFile;
   const version2 = resolvePackageVersion();
   const program2 = buildProgram();
   program2.configureOutput({
@@ -78800,7 +78955,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
     program2.opts().maxOutputTokens
   );
   const timeoutMs = parseDurationMs(program2.opts().timeout);
-  const extractOnly = Boolean(program2.opts().extractOnly);
+  const extractMode = Boolean(program2.opts().extract) || Boolean(program2.opts().extractOnly);
   const json3 = Boolean(program2.opts().json);
   const streamMode = parseStreamMode(program2.opts().stream);
   const renderMode = parseRenderMode(program2.opts().render);
@@ -78808,9 +78963,19 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
   const metricsMode = parseMetricsMode(program2.opts().metrics);
   const metricsEnabled = metricsMode !== "off";
   const metricsDetailed = metricsMode === "detailed";
-  const markdownMode = parseMarkdownMode(program2.opts().markdown);
+  const preprocessMode = parsePreprocessMode(program2.opts().preprocess);
+  const format2 = parseExtractFormat(program2.opts().format);
   const shouldComputeReport = metricsEnabled;
   const isYoutubeUrl = typeof url2 === "string" ? /youtube\.com|youtu\.be/i.test(url2) : false;
+  const firecrawlExplicitlySet = normalizedArgv.some(
+    (arg) => arg === "--firecrawl" || arg.startsWith("--firecrawl=")
+  );
+  const markdownModeExplicitlySet = normalizedArgv.some(
+    (arg) => arg === "--markdown-mode" || arg.startsWith("--markdown-mode=") || arg === "--markdown" || arg.startsWith("--markdown=")
+  );
+  const markdownMode = format2 === "markdown" ? parseMarkdownMode(
+    program2.opts().markdownMode ?? program2.opts().markdown ?? "auto"
+  ) : "off";
   const requestedFirecrawlMode = parseFirecrawlMode(program2.opts().firecrawl);
   const modelArg = typeof program2.opts().model === "string" ? program2.opts().model : null;
   const { config: config2, path: configPath } = loadSummarizeConfig({ env: env3 });
@@ -78839,6 +79004,12 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
   const anthropicConfigured = typeof anthropicApiKey === "string" && anthropicApiKey.length > 0;
   const openrouterConfigured = typeof openrouterApiKey === "string" && openrouterApiKey.length > 0;
   const openrouterOptions = openRouterProviders ? { providers: openRouterProviders } : void 0;
+  if (markdownModeExplicitlySet && format2 !== "markdown") {
+    throw new Error("--markdown-mode is only supported with --format md");
+  }
+  if (markdownModeExplicitlySet && inputTarget.kind !== "url") {
+    throw new Error("--markdown-mode is only supported for website URLs");
+  }
   const llmCalls = [];
   let firecrawlRequests = 0;
   let apifyRequests = 0;
@@ -78932,7 +79103,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
     if (streamMode !== "auto") return streamMode;
     return isRichTty(stdout) ? "on" : "off";
   })();
-  const streamingEnabled = effectiveStreamMode === "on" && !json3 && !extractOnly;
+  const streamingEnabled = effectiveStreamMode === "on" && !json3 && !extractMode;
   const effectiveRenderMode = (() => {
     if (renderMode !== "auto") return renderMode;
     if (!isRichTty(stdout)) return "plain";
@@ -78957,8 +79128,8 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
 `
     );
   };
-  if (extractOnly && inputTarget.kind !== "url") {
-    throw new Error("--extract-only is only supported for website/YouTube URLs");
+  if (extractMode && inputTarget.kind !== "url") {
+    throw new Error("--extract is only supported for website/YouTube URLs");
   }
   const progressEnabled = isRichTty(stderr) && !verbose && !json3;
   let clearProgressBeforeStdout = null;
@@ -78987,11 +79158,6 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
         `Missing ${requiredKeyEnv} for model ${parsedModel.canonical}. Set the env var or choose a different --model.`
       );
     }
-    assertProviderSupportsAttachment({
-      provider: parsedModel.provider,
-      modelId: parsedModel.canonical,
-      attachment: { part: attachment.part, mediaType: attachment.mediaType }
-    });
     const modelResolution = await resolveModelIdForLlmCall({
       parsedModel,
       apiKeys: { googleApiKey: apiKeysForLlm.googleApiKey },
@@ -79013,14 +79179,114 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
         `Text file too large (${formatBytes(textContent.bytes)}). Limit is ${formatBytes(MAX_TEXT_BYTES_DEFAULT)}.`
       );
     }
+    const fileBytes = getFileBytesFromAttachment(attachment);
+    const canPreprocessWithMarkitdown = format2 === "markdown" && preprocessMode !== "off" && hasUvxCli(env3) && attachment.part.type === "file" && fileBytes !== null && shouldMarkitdownConvertMediaType(attachment.mediaType);
     const summaryLengthTarget = lengthArg.kind === "preset" ? lengthArg.preset : { maxCharacters: lengthArg.maxCharacters };
-    const promptText = buildFileSummaryPrompt({
-      filename: attachment.filename,
-      mediaType: attachment.mediaType,
-      summaryLength: summaryLengthTarget,
-      contentLength: textContent?.content.length ?? null
-    });
-    const promptPayload = buildAssetPromptPayload({ promptText, attachment, textContent });
+    let promptText = "";
+    const buildAttachmentPromptPayload = () => {
+      promptText = buildFileSummaryPrompt({
+        filename: attachment.filename,
+        mediaType: attachment.mediaType,
+        summaryLength: summaryLengthTarget,
+        contentLength: textContent?.content.length ?? null
+      });
+      return buildAssetPromptPayload({ promptText, attachment, textContent });
+    };
+    const buildMarkitdownPromptPayload = (markdown) => {
+      promptText = buildFileTextSummaryPrompt({
+        filename: attachment.filename,
+        originalMediaType: attachment.mediaType,
+        contentMediaType: "text/markdown",
+        summaryLength: summaryLengthTarget,
+        contentLength: markdown.length
+      });
+      return `${promptText}
+---
+${markdown}`.trim();
+    };
+    let preprocessedMarkdown = null;
+    let usingPreprocessedMarkdown = false;
+    if (preprocessMode === "always" && canPreprocessWithMarkitdown) {
+      if (!fileBytes) {
+        throw new Error("Internal error: missing file bytes for markitdown preprocessing");
+      }
+      try {
+        preprocessedMarkdown = await convertToMarkdownWithMarkitdown({
+          bytes: fileBytes,
+          filenameHint: attachment.filename,
+          mediaTypeHint: attachment.mediaType,
+          uvxCommand: env3.UVX_PATH,
+          timeoutMs,
+          env: env3,
+          execFileImpl
+        });
+      } catch (error47) {
+        const message = error47 instanceof Error ? error47.message : String(error47);
+        throw new Error(
+          `Failed to preprocess ${attachment.mediaType} with markitdown: ${message} (disable with --preprocess off).`
+        );
+      }
+      if (Buffer.byteLength(preprocessedMarkdown, "utf8") > MAX_TEXT_BYTES_DEFAULT) {
+        throw new Error(
+          `Preprocessed Markdown too large (${formatBytes(Buffer.byteLength(preprocessedMarkdown, "utf8"))}). Limit is ${formatBytes(MAX_TEXT_BYTES_DEFAULT)}.`
+        );
+      }
+      usingPreprocessedMarkdown = true;
+    }
+    let promptPayload = buildAttachmentPromptPayload();
+    if (usingPreprocessedMarkdown) {
+      if (!preprocessedMarkdown) {
+        throw new Error("Internal error: missing markitdown content for preprocessing");
+      }
+      promptPayload = buildMarkitdownPromptPayload(preprocessedMarkdown);
+    }
+    if (!usingPreprocessedMarkdown) {
+      try {
+        assertProviderSupportsAttachment({
+          provider: parsedModel.provider,
+          modelId: parsedModel.canonical,
+          attachment: { part: attachment.part, mediaType: attachment.mediaType }
+        });
+      } catch (error47) {
+        if (!canPreprocessWithMarkitdown) {
+          if (format2 === "markdown" && preprocessMode !== "off" && attachment.part.type === "file" && shouldMarkitdownConvertMediaType(attachment.mediaType) && !hasUvxCli(env3)) {
+            throw withUvxTip(error47, env3);
+          }
+          throw error47;
+        }
+        if (!fileBytes) {
+          throw new Error("Internal error: missing file bytes for markitdown preprocessing");
+        }
+        try {
+          preprocessedMarkdown = await convertToMarkdownWithMarkitdown({
+            bytes: fileBytes,
+            filenameHint: attachment.filename,
+            mediaTypeHint: attachment.mediaType,
+            uvxCommand: env3.UVX_PATH,
+            timeoutMs,
+            env: env3,
+            execFileImpl
+          });
+        } catch (markitdownError) {
+          if (preprocessMode === "auto") {
+            throw error47;
+          }
+          const message = markitdownError instanceof Error ? markitdownError.message : String(markitdownError);
+          throw new Error(
+            `Failed to preprocess ${attachment.mediaType} with markitdown: ${message} (disable with --preprocess off).`
+          );
+        }
+        if (Buffer.byteLength(preprocessedMarkdown, "utf8") > MAX_TEXT_BYTES_DEFAULT) {
+          throw new Error(
+            `Preprocessed Markdown too large (${formatBytes(Buffer.byteLength(preprocessedMarkdown, "utf8"))}). Limit is ${formatBytes(MAX_TEXT_BYTES_DEFAULT)}.`
+          );
+        }
+        usingPreprocessedMarkdown = true;
+        promptPayload = buildMarkitdownPromptPayload(preprocessedMarkdown);
+      }
+    }
     const maxInputTokensForCall = await resolveMaxInputTokensForCall(parsedModelEffective.canonical);
     if (typeof maxInputTokensForCall === "number" && Number.isFinite(maxInputTokensForCall) && maxInputTokensForCall > 0 && typeof promptPayload === "string") {
       const tokenCount = countTokens(promptPayload);
@@ -79309,7 +79575,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
   if (inputTarget.kind === "file") {
     let sizeLabel = null;
     try {
-      const stat = await import_promises4.default.stat(inputTarget.filePath);
+      const stat = await import_promises5.default.stat(inputTarget.filePath);
       if (stat.isFile()) {
         sizeLabel = formatBytes(stat.size);
       }
@@ -79411,22 +79677,33 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
   if (!url2) {
     throw new Error("Only HTTP and HTTPS URLs can be summarized");
   }
-  const firecrawlMode = requestedFirecrawlMode;
+  const wantsMarkdown = format2 === "markdown" && !isYoutubeUrl;
+  if (wantsMarkdown && markdownMode === "off") {
+    throw new Error("--format md conflicts with --markdown-mode off (use --format text)");
+  }
+  const firecrawlMode = (() => {
+    if (wantsMarkdown && !isYoutubeUrl && !firecrawlExplicitlySet && firecrawlConfigured) {
+      return "always";
+    }
+    return requestedFirecrawlMode;
+  })();
   if (firecrawlMode === "always" && !firecrawlConfigured) {
     throw new Error("--firecrawl always requires FIRECRAWL_API_KEY");
   }
-  const effectiveMarkdownMode = markdownMode;
-  const markdownRequested = extractOnly && !isYoutubeUrl && effectiveMarkdownMode !== "off";
+  const markdownRequested = wantsMarkdown;
+  const effectiveMarkdownMode = markdownRequested ? markdownMode : "off";
   const hasKeyForModel = parsedModelForLlm.provider === "xai" ? xaiConfigured : parsedModelForLlm.provider === "google" ? googleConfigured : parsedModelForLlm.provider === "anthropic" ? anthropicConfigured : Boolean(apiKey);
   const markdownProvider = hasKeyForModel ? parsedModelForLlm.provider : "none";
   if (markdownRequested && effectiveMarkdownMode === "llm" && !hasKeyForModel) {
     const required2 = parsedModelForLlm.provider === "xai" ? "XAI_API_KEY" : parsedModelForLlm.provider === "google" ? "GEMINI_API_KEY (or GOOGLE_GENERATIVE_AI_API_KEY / GOOGLE_API_KEY)" : parsedModelForLlm.provider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY";
-    throw new Error(`--markdown llm requires ${required2} for model ${parsedModelForLlm.canonical}`);
+    throw new Error(
+      `--markdown-mode llm requires ${required2} for model ${parsedModelForLlm.canonical}`
+    );
   }
   writeVerbose(
     stderr,
     verbose,
-    `config url=${url2} timeoutMs=${timeoutMs} youtube=${youtubeMode} firecrawl=${firecrawlMode} length=${lengthArg.kind === "preset" ? lengthArg.preset : `${lengthArg.maxCharacters} chars`} maxOutputTokens=${formatOptionalNumber(maxOutputTokensArg)} json=${json3} extractOnly=${extractOnly} markdown=${effectiveMarkdownMode} model=${model} stream=${effectiveStreamMode} render=${effectiveRenderMode}`,
+    `config url=${url2} timeoutMs=${timeoutMs} youtube=${youtubeMode} firecrawl=${firecrawlMode} length=${lengthArg.kind === "preset" ? lengthArg.preset : `${lengthArg.maxCharacters} chars`} maxOutputTokens=${formatOptionalNumber(maxOutputTokensArg)} json=${json3} extract=${extractMode} format=${format2} preprocess=${preprocessMode} markdownMode=${markdownMode} model=${model} stream=${effectiveStreamMode} render=${effectiveRenderMode}`,
     verboseColor
   );
   writeVerbose(
@@ -79450,7 +79727,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
     verboseColor
   );
   const scrapeWithFirecrawl = firecrawlConfigured && firecrawlMode !== "off" ? createFirecrawlScraper({ apiKey: firecrawlApiKey, fetchImpl: trackedFetch }) : null;
-  const convertHtmlToMarkdown = markdownRequested && (effectiveMarkdownMode === "llm" || markdownProvider !== "none") ? createHtmlToMarkdownConverter({
+  const llmHtmlToMarkdown = markdownRequested && (effectiveMarkdownMode === "llm" || markdownProvider !== "none") ? createHtmlToMarkdownConverter({
     modelId: model,
     xaiApiKey: xaiConfigured ? xaiApiKey : null,
     googleApiKey: googleConfigured ? googleApiKey : null,
@@ -79463,6 +79740,40 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
       llmCalls.push({ provider, model: usedModel, usage, purpose: "markdown" });
     }
   }) : null;
+  const markitdownHtmlToMarkdown = markdownRequested && preprocessMode !== "off" && hasUvxCli(env3) ? async (args) => {
+    void args.url;
+    void args.title;
+    void args.siteName;
+    return convertToMarkdownWithMarkitdown({
+      bytes: new TextEncoder().encode(args.html),
+      filenameHint: "page.html",
+      mediaTypeHint: "text/html",
+      uvxCommand: env3.UVX_PATH,
+      timeoutMs: args.timeoutMs,
+      env: env3,
+      execFileImpl
+    });
+  } : null;
+  const convertHtmlToMarkdown = markdownRequested ? async (args) => {
+    if (effectiveMarkdownMode === "llm") {
+      if (!llmHtmlToMarkdown) {
+        throw new Error("No HTML\u2192Markdown converter configured");
+      }
+      return llmHtmlToMarkdown(args);
+    }
+    if (llmHtmlToMarkdown) {
+      try {
+        return await llmHtmlToMarkdown(args);
+      } catch (error47) {
+        if (!markitdownHtmlToMarkdown) throw error47;
+        return await markitdownHtmlToMarkdown(args);
+      }
+    }
+    if (markitdownHtmlToMarkdown) {
+      return await markitdownHtmlToMarkdown(args);
+    }
+    throw new Error("No HTML\u2192Markdown converter configured");
+  } : null;
   const readTweetWithBirdClient = hasBirdCli(env3) ? ({ url: url3, timeoutMs: timeoutMs2 }) => readTweetWithBird({ url: url3, timeoutMs: timeoutMs2, env: env3 }) : null;
   writeVerbose(stderr, verbose, "extract start", verboseColor);
   const stopOscProgress = startOscProgress2({
@@ -79643,7 +79954,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
     if (progressEnabled) {
       websiteProgress?.stop?.();
       spinner.setText(
-        extractOnly ? `Extracted (${extractedContentSize}${viaSourceLabel})` : `Summarizing (sent ${extractedContentSize}${viaSourceLabel})\u2026`
+        extractMode ? `Extracted (${extractedContentSize}${viaSourceLabel})` : `Summarizing (sent ${extractedContentSize}${viaSourceLabel})\u2026`
       );
     }
     writeVerbose(
@@ -79688,6 +79999,10 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
       )} attemptedProviders=${extracted.diagnostics.transcript.attemptedProviders.length > 0 ? extracted.diagnostics.transcript.attemptedProviders.join(",") : "none"} notes=${formatOptionalString(extracted.diagnostics.transcript.notes ?? null)}`,
       verboseColor
     );
+    if (extractMode && markdownRequested && preprocessMode !== "off" && effectiveMarkdownMode === "auto" && !extracted.diagnostics.markdown.used && !hasUvxCli(env3)) {
+      stderr.write(`${UVX_TIP}
+`);
+    }
     const isYouTube = extracted.siteName === "YouTube";
     const prompt = buildLinkSummaryPrompt({
       url: extracted.url,
@@ -79700,7 +80015,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
       summaryLength: lengthArg.kind === "preset" ? lengthArg.preset : { maxCharacters: lengthArg.maxCharacters },
       shares: []
     });
-    if (extractOnly) {
+    if (extractMode) {
       clearProgressForStdout();
       if (json3) {
         const finishReport = shouldComputeReport ? await buildReport() : null;
@@ -79711,6 +80026,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
             timeoutMs,
             youtube: youtubeMode,
             firecrawl: firecrawlMode,
+            format: format2,
             markdown: effectiveMarkdownMode,
             length: lengthArg.kind === "preset" ? { kind: "preset", preset: lengthArg.preset } : { kind: "chars", maxCharacters: lengthArg.maxCharacters },
             maxOutputTokens: maxOutputTokensArg,
@@ -79783,6 +80099,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
             timeoutMs,
             youtube: youtubeMode,
             firecrawl: firecrawlMode,
+            format: format2,
             markdown: effectiveMarkdownMode,
             length: lengthArg.kind === "preset" ? { kind: "preset", preset: lengthArg.preset } : { kind: "chars", maxCharacters: lengthArg.maxCharacters },
             maxOutputTokens: maxOutputTokensArg,
@@ -80057,6 +80374,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
           timeoutMs,
           youtube: youtubeMode,
           firecrawl: firecrawlMode,
+          format: format2,
           markdown: effectiveMarkdownMode,
           length: lengthArg.kind === "preset" ? { kind: "preset", preset: lengthArg.preset } : { kind: "chars", maxCharacters: lengthArg.maxCharacters },
           maxOutputTokens: maxOutputTokensArg,