@steipete/summarize 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -1196,8 +1196,8 @@ var require_command = __commonJS({
1196
1196
  "node_modules/.pnpm/commander@14.0.2/node_modules/commander/lib/command.js"(exports2) {
1197
1197
  var EventEmitter = require("node:events").EventEmitter;
1198
1198
  var childProcess = require("node:child_process");
1199
- var path7 = require("node:path");
1200
- var fs6 = require("node:fs");
1199
+ var path8 = require("node:path");
1200
+ var fs7 = require("node:fs");
1201
1201
  var process14 = require("node:process");
1202
1202
  var { Argument: Argument2, humanReadableArgName } = require_argument();
1203
1203
  var { CommanderError: CommanderError2 } = require_error();
@@ -2191,7 +2191,7 @@ Expecting one of '${allowedValues.join("', '")}'`);
2191
2191
  * @param {string} subcommandName
2192
2192
  */
2193
2193
  _checkForMissingExecutable(executableFile, executableDir, subcommandName) {
2194
- if (fs6.existsSync(executableFile)) return;
2194
+ if (fs7.existsSync(executableFile)) return;
2195
2195
  const executableDirMessage = executableDir ? `searched for local subcommand relative to directory '${executableDir}'` : "no directory for search for local subcommand, use .executableDir() to supply a custom directory";
2196
2196
  const executableMissing = `'${executableFile}' does not exist
2197
2197
  - if '${subcommandName}' is not meant to be an executable command, remove description parameter from '.command()' and use '.description()' instead
@@ -2209,11 +2209,11 @@ Expecting one of '${allowedValues.join("', '")}'`);
2209
2209
  let launchWithNode = false;
2210
2210
  const sourceExt = [".js", ".ts", ".tsx", ".mjs", ".cjs"];
2211
2211
  function findFile(baseDir, baseName) {
2212
- const localBin = path7.resolve(baseDir, baseName);
2213
- if (fs6.existsSync(localBin)) return localBin;
2214
- if (sourceExt.includes(path7.extname(baseName))) return void 0;
2212
+ const localBin = path8.resolve(baseDir, baseName);
2213
+ if (fs7.existsSync(localBin)) return localBin;
2214
+ if (sourceExt.includes(path8.extname(baseName))) return void 0;
2215
2215
  const foundExt = sourceExt.find(
2216
- (ext) => fs6.existsSync(`${localBin}${ext}`)
2216
+ (ext) => fs7.existsSync(`${localBin}${ext}`)
2217
2217
  );
2218
2218
  if (foundExt) return `${localBin}${foundExt}`;
2219
2219
  return void 0;
@@ -2225,21 +2225,21 @@ Expecting one of '${allowedValues.join("', '")}'`);
2225
2225
  if (this._scriptPath) {
2226
2226
  let resolvedScriptPath;
2227
2227
  try {
2228
- resolvedScriptPath = fs6.realpathSync(this._scriptPath);
2228
+ resolvedScriptPath = fs7.realpathSync(this._scriptPath);
2229
2229
  } catch {
2230
2230
  resolvedScriptPath = this._scriptPath;
2231
2231
  }
2232
- executableDir = path7.resolve(
2233
- path7.dirname(resolvedScriptPath),
2232
+ executableDir = path8.resolve(
2233
+ path8.dirname(resolvedScriptPath),
2234
2234
  executableDir
2235
2235
  );
2236
2236
  }
2237
2237
  if (executableDir) {
2238
2238
  let localFile = findFile(executableDir, executableFile);
2239
2239
  if (!localFile && !subcommand._executableFile && this._scriptPath) {
2240
- const legacyName = path7.basename(
2240
+ const legacyName = path8.basename(
2241
2241
  this._scriptPath,
2242
- path7.extname(this._scriptPath)
2242
+ path8.extname(this._scriptPath)
2243
2243
  );
2244
2244
  if (legacyName !== this._name) {
2245
2245
  localFile = findFile(
@@ -2250,7 +2250,7 @@ Expecting one of '${allowedValues.join("', '")}'`);
2250
2250
  }
2251
2251
  executableFile = localFile || executableFile;
2252
2252
  }
2253
- launchWithNode = sourceExt.includes(path7.extname(executableFile));
2253
+ launchWithNode = sourceExt.includes(path8.extname(executableFile));
2254
2254
  let proc;
2255
2255
  if (process14.platform !== "win32") {
2256
2256
  if (launchWithNode) {
@@ -3165,7 +3165,7 @@ Expecting one of '${allowedValues.join("', '")}'`);
3165
3165
  * @return {Command}
3166
3166
  */
3167
3167
  nameFromFilename(filename) {
3168
- this._name = path7.basename(filename, path7.extname(filename));
3168
+ this._name = path8.basename(filename, path8.extname(filename));
3169
3169
  return this;
3170
3170
  }
3171
3171
  /**
@@ -3179,9 +3179,9 @@ Expecting one of '${allowedValues.join("', '")}'`);
3179
3179
  * @param {string} [path]
3180
3180
  * @return {(string|null|Command)}
3181
3181
  */
3182
- executableDir(path8) {
3183
- if (path8 === void 0) return this._executableDir;
3184
- this._executableDir = path8;
3182
+ executableDir(path9) {
3183
+ if (path9 === void 0) return this._executableDir;
3184
+ this._executableDir = path9;
3185
3185
  return this;
3186
3186
  }
3187
3187
  /**
@@ -4902,17 +4902,17 @@ var require_request = __commonJS({
4902
4902
  function buildUrl(id, options = {}) {
4903
4903
  var _a18, _b16;
4904
4904
  const method = ((_a18 = options.method) !== null && _a18 !== void 0 ? _a18 : "post").toLowerCase();
4905
- const path7 = ((_b16 = options.path) !== null && _b16 !== void 0 ? _b16 : "").replace(/^\//, "").replace(/\/{2,}/, "/");
4905
+ const path8 = ((_b16 = options.path) !== null && _b16 !== void 0 ? _b16 : "").replace(/^\//, "").replace(/\/{2,}/, "/");
4906
4906
  const input = options.input;
4907
4907
  const params = Object.assign(Object.assign({}, options.query || {}), method === "get" ? input : {});
4908
4908
  const queryParams = Object.keys(params).length > 0 ? `?${new URLSearchParams(params).toString()}` : "";
4909
4909
  if ((0, utils_1.isValidUrl)(id)) {
4910
4910
  const url3 = id.endsWith("/") ? id : `${id}/`;
4911
- return `${url3}${path7}${queryParams}`;
4911
+ return `${url3}${path8}${queryParams}`;
4912
4912
  }
4913
4913
  const appId = (0, utils_1.ensureEndpointIdFormat)(id);
4914
4914
  const subdomain = options.subdomain ? `${options.subdomain}.` : "";
4915
- const url2 = `https://${subdomain}fal.run/${appId}/${path7}`;
4915
+ const url2 = `https://${subdomain}fal.run/${appId}/${path8}`;
4916
4916
  return `${url2.replace(/\/$/, "")}${queryParams}`;
4917
4917
  }
4918
4918
  }
@@ -8709,10 +8709,10 @@ function mergeDefs(...defs) {
8709
8709
  function cloneDef(schema) {
8710
8710
  return mergeDefs(schema._zod.def);
8711
8711
  }
8712
- function getElementAtPath(obj, path7) {
8713
- if (!path7)
8712
+ function getElementAtPath(obj, path8) {
8713
+ if (!path8)
8714
8714
  return obj;
8715
- return path7.reduce((acc, key) => acc?.[key], obj);
8715
+ return path8.reduce((acc, key) => acc?.[key], obj);
8716
8716
  }
8717
8717
  function promiseAllObject(promisesObj) {
8718
8718
  const keys = Object.keys(promisesObj);
@@ -9007,11 +9007,11 @@ function aborted(x, startIndex = 0) {
9007
9007
  }
9008
9008
  return false;
9009
9009
  }
9010
- function prefixIssues(path7, issues) {
9010
+ function prefixIssues(path8, issues) {
9011
9011
  return issues.map((iss) => {
9012
9012
  var _a18;
9013
9013
  (_a18 = iss).path ?? (_a18.path = []);
9014
- iss.path.unshift(path7);
9014
+ iss.path.unshift(path8);
9015
9015
  return iss;
9016
9016
  });
9017
9017
  }
@@ -9232,7 +9232,7 @@ function formatError(error47, mapper = (issue2) => issue2.message) {
9232
9232
  }
9233
9233
  function treeifyError(error47, mapper = (issue2) => issue2.message) {
9234
9234
  const result = { errors: [] };
9235
- const processError = (error48, path7 = []) => {
9235
+ const processError = (error48, path8 = []) => {
9236
9236
  var _a18, _b16;
9237
9237
  for (const issue2 of error48.issues) {
9238
9238
  if (issue2.code === "invalid_union" && issue2.errors.length) {
@@ -9242,7 +9242,7 @@ function treeifyError(error47, mapper = (issue2) => issue2.message) {
9242
9242
  } else if (issue2.code === "invalid_element") {
9243
9243
  processError({ issues: issue2.issues }, issue2.path);
9244
9244
  } else {
9245
- const fullpath = [...path7, ...issue2.path];
9245
+ const fullpath = [...path8, ...issue2.path];
9246
9246
  if (fullpath.length === 0) {
9247
9247
  result.errors.push(mapper(issue2));
9248
9248
  continue;
@@ -9274,8 +9274,8 @@ function treeifyError(error47, mapper = (issue2) => issue2.message) {
9274
9274
  }
9275
9275
  function toDotPath(_path) {
9276
9276
  const segs = [];
9277
- const path7 = _path.map((seg) => typeof seg === "object" ? seg.key : seg);
9278
- for (const seg of path7) {
9277
+ const path8 = _path.map((seg) => typeof seg === "object" ? seg.key : seg);
9278
+ for (const seg of path8) {
9279
9279
  if (typeof seg === "number")
9280
9280
  segs.push(`[${seg}]`);
9281
9281
  else if (typeof seg === "symbol")
@@ -21848,13 +21848,13 @@ function resolveRef(ref, ctx) {
21848
21848
  if (!ref.startsWith("#")) {
21849
21849
  throw new Error("External $ref is not supported, only local refs (#/...) are allowed");
21850
21850
  }
21851
- const path7 = ref.slice(1).split("/").filter(Boolean);
21852
- if (path7.length === 0) {
21851
+ const path8 = ref.slice(1).split("/").filter(Boolean);
21852
+ if (path8.length === 0) {
21853
21853
  return ctx.rootSchema;
21854
21854
  }
21855
21855
  const defsKey = ctx.version === "draft-2020-12" ? "$defs" : "definitions";
21856
- if (path7[0] === defsKey) {
21857
- const key = path7[1];
21856
+ if (path8[0] === defsKey) {
21857
+ const key = path8[1];
21858
21858
  if (!key || !ctx.defs[key]) {
21859
21859
  throw new Error(`Reference not found: ${ref}`);
21860
21860
  }
@@ -22930,8 +22930,8 @@ var init_parseUtil = __esm({
22930
22930
  init_errors3();
22931
22931
  init_en2();
22932
22932
  makeIssue = (params) => {
22933
- const { data, path: path7, errorMaps, issueData } = params;
22934
- const fullPath = [...path7, ...issueData.path || []];
22933
+ const { data, path: path8, errorMaps, issueData } = params;
22934
+ const fullPath = [...path8, ...issueData.path || []];
22935
22935
  const fullIssue = {
22936
22936
  ...issueData,
22937
22937
  path: fullPath
@@ -23211,11 +23211,11 @@ var init_types = __esm({
23211
23211
  init_parseUtil();
23212
23212
  init_util2();
23213
23213
  ParseInputLazyPath = class {
23214
- constructor(parent, value, path7, key) {
23214
+ constructor(parent, value, path8, key) {
23215
23215
  this._cachedPath = [];
23216
23216
  this.parent = parent;
23217
23217
  this.data = value;
23218
- this._path = path7;
23218
+ this._path = path8;
23219
23219
  this._key = key;
23220
23220
  }
23221
23221
  get path() {
@@ -28921,8 +28921,8 @@ var require_token_util = __commonJS({
28921
28921
  saveToken: () => saveToken
28922
28922
  });
28923
28923
  module2.exports = __toCommonJS(token_util_exports);
28924
- var path7 = __toESM2(require("path"));
28925
- var fs6 = __toESM2(require("fs"));
28924
+ var path8 = __toESM2(require("path"));
28925
+ var fs7 = __toESM2(require("fs"));
28926
28926
  var import_token_error = require_token_error();
28927
28927
  var import_token_io = require_token_io();
28928
28928
  function getVercelDataDir() {
@@ -28931,18 +28931,18 @@ var require_token_util = __commonJS({
28931
28931
  if (!dataDir) {
28932
28932
  return null;
28933
28933
  }
28934
- return path7.join(dataDir, vercelFolder);
28934
+ return path8.join(dataDir, vercelFolder);
28935
28935
  }
28936
28936
  function getVercelCliToken() {
28937
28937
  const dataDir = getVercelDataDir();
28938
28938
  if (!dataDir) {
28939
28939
  return null;
28940
28940
  }
28941
- const tokenPath = path7.join(dataDir, "auth.json");
28942
- if (!fs6.existsSync(tokenPath)) {
28941
+ const tokenPath = path8.join(dataDir, "auth.json");
28942
+ if (!fs7.existsSync(tokenPath)) {
28943
28943
  return null;
28944
28944
  }
28945
- const token = fs6.readFileSync(tokenPath, "utf8");
28945
+ const token = fs7.readFileSync(tokenPath, "utf8");
28946
28946
  if (!token) {
28947
28947
  return null;
28948
28948
  }
@@ -28983,11 +28983,11 @@ var require_token_util = __commonJS({
28983
28983
  throw new import_token_error.VercelOidcTokenError("Unable to find root directory");
28984
28984
  }
28985
28985
  try {
28986
- const prjPath = path7.join(dir, ".vercel", "project.json");
28987
- if (!fs6.existsSync(prjPath)) {
28986
+ const prjPath = path8.join(dir, ".vercel", "project.json");
28987
+ if (!fs7.existsSync(prjPath)) {
28988
28988
  throw new import_token_error.VercelOidcTokenError("project.json not found");
28989
28989
  }
28990
- const prj = JSON.parse(fs6.readFileSync(prjPath, "utf8"));
28990
+ const prj = JSON.parse(fs7.readFileSync(prjPath, "utf8"));
28991
28991
  if (typeof prj.projectId !== "string" && typeof prj.orgId !== "string") {
28992
28992
  throw new TypeError("Expected a string-valued projectId property");
28993
28993
  }
@@ -29002,11 +29002,11 @@ var require_token_util = __commonJS({
29002
29002
  if (!dir) {
29003
29003
  throw new import_token_error.VercelOidcTokenError("Unable to find user data directory");
29004
29004
  }
29005
- const tokenPath = path7.join(dir, "com.vercel.token", `${projectId}.json`);
29005
+ const tokenPath = path8.join(dir, "com.vercel.token", `${projectId}.json`);
29006
29006
  const tokenJson = JSON.stringify(token);
29007
- fs6.mkdirSync(path7.dirname(tokenPath), { mode: 504, recursive: true });
29008
- fs6.writeFileSync(tokenPath, tokenJson);
29009
- fs6.chmodSync(tokenPath, 432);
29007
+ fs7.mkdirSync(path8.dirname(tokenPath), { mode: 504, recursive: true });
29008
+ fs7.writeFileSync(tokenPath, tokenJson);
29009
+ fs7.chmodSync(tokenPath, 432);
29010
29010
  return;
29011
29011
  } catch (e) {
29012
29012
  throw new import_token_error.VercelOidcTokenError(`Failed to save token`, e);
@@ -29018,11 +29018,11 @@ var require_token_util = __commonJS({
29018
29018
  if (!dir) {
29019
29019
  return null;
29020
29020
  }
29021
- const tokenPath = path7.join(dir, "com.vercel.token", `${projectId}.json`);
29022
- if (!fs6.existsSync(tokenPath)) {
29021
+ const tokenPath = path8.join(dir, "com.vercel.token", `${projectId}.json`);
29022
+ if (!fs7.existsSync(tokenPath)) {
29023
29023
  return null;
29024
29024
  }
29025
- const token = JSON.parse(fs6.readFileSync(tokenPath, "utf8"));
29025
+ const token = JSON.parse(fs7.readFileSync(tokenPath, "utf8"));
29026
29026
  assertVercelOidcTokenResponse(token);
29027
29027
  return token;
29028
29028
  } catch (e) {
@@ -42982,7 +42982,7 @@ function createXai(options = {}) {
42982
42982
  const createImageModel = (modelId) => {
42983
42983
  return new OpenAICompatibleImageModel(modelId, {
42984
42984
  provider: "xai.image",
42985
- url: ({ path: path7 }) => `${baseURL}${path7}`,
42985
+ url: ({ path: path8 }) => `${baseURL}${path8}`,
42986
42986
  headers: getHeaders,
42987
42987
  fetch: options.fetch,
42988
42988
  errorStructure: xaiErrorStructure
@@ -51227,37 +51227,37 @@ function createOpenAI(options = {}) {
51227
51227
  );
51228
51228
  const createChatModel = (modelId) => new OpenAIChatLanguageModel(modelId, {
51229
51229
  provider: `${providerName}.chat`,
51230
- url: ({ path: path7 }) => `${baseURL}${path7}`,
51230
+ url: ({ path: path8 }) => `${baseURL}${path8}`,
51231
51231
  headers: getHeaders,
51232
51232
  fetch: options.fetch
51233
51233
  });
51234
51234
  const createCompletionModel = (modelId) => new OpenAICompletionLanguageModel(modelId, {
51235
51235
  provider: `${providerName}.completion`,
51236
- url: ({ path: path7 }) => `${baseURL}${path7}`,
51236
+ url: ({ path: path8 }) => `${baseURL}${path8}`,
51237
51237
  headers: getHeaders,
51238
51238
  fetch: options.fetch
51239
51239
  });
51240
51240
  const createEmbeddingModel = (modelId) => new OpenAIEmbeddingModel(modelId, {
51241
51241
  provider: `${providerName}.embedding`,
51242
- url: ({ path: path7 }) => `${baseURL}${path7}`,
51242
+ url: ({ path: path8 }) => `${baseURL}${path8}`,
51243
51243
  headers: getHeaders,
51244
51244
  fetch: options.fetch
51245
51245
  });
51246
51246
  const createImageModel = (modelId) => new OpenAIImageModel(modelId, {
51247
51247
  provider: `${providerName}.image`,
51248
- url: ({ path: path7 }) => `${baseURL}${path7}`,
51248
+ url: ({ path: path8 }) => `${baseURL}${path8}`,
51249
51249
  headers: getHeaders,
51250
51250
  fetch: options.fetch
51251
51251
  });
51252
51252
  const createTranscriptionModel = (modelId) => new OpenAITranscriptionModel(modelId, {
51253
51253
  provider: `${providerName}.transcription`,
51254
- url: ({ path: path7 }) => `${baseURL}${path7}`,
51254
+ url: ({ path: path8 }) => `${baseURL}${path8}`,
51255
51255
  headers: getHeaders,
51256
51256
  fetch: options.fetch
51257
51257
  });
51258
51258
  const createSpeechModel = (modelId) => new OpenAISpeechModel(modelId, {
51259
51259
  provider: `${providerName}.speech`,
51260
- url: ({ path: path7 }) => `${baseURL}${path7}`,
51260
+ url: ({ path: path8 }) => `${baseURL}${path8}`,
51261
51261
  headers: getHeaders,
51262
51262
  fetch: options.fetch
51263
51263
  });
@@ -51272,7 +51272,7 @@ function createOpenAI(options = {}) {
51272
51272
  const createResponsesModel = (modelId) => {
51273
51273
  return new OpenAIResponsesLanguageModel(modelId, {
51274
51274
  provider: `${providerName}.responses`,
51275
- url: ({ path: path7 }) => `${baseURL}${path7}`,
51275
+ url: ({ path: path8 }) => `${baseURL}${path8}`,
51276
51276
  headers: getHeaders,
51277
51277
  fetch: options.fetch,
51278
51278
  fileIdPrefixes: ["file-"]
@@ -55600,8 +55600,8 @@ var init_dist11 = __esm({
55600
55600
  // src/run.ts
55601
55601
  var import_node_child_process2 = require("node:child_process");
55602
55602
  var import_node_fs6 = require("node:fs");
55603
- var import_promises4 = __toESM(require("node:fs/promises"), 1);
55604
- var import_node_path7 = __toESM(require("node:path"), 1);
55603
+ var import_promises5 = __toESM(require("node:fs/promises"), 1);
55604
+ var import_node_path8 = __toESM(require("node:path"), 1);
55605
55605
 
55606
55606
  // node_modules/.pnpm/commander@14.0.2/node_modules/commander/esm.mjs
55607
55607
  var import_index = __toESM(require_commander(), 1);
@@ -64632,7 +64632,7 @@ function transformGfmAutolinkLiterals(tree) {
64632
64632
  { ignore: ["link", "linkReference"] }
64633
64633
  );
64634
64634
  }
64635
- function findUrl(_, protocol, domain3, path7, match) {
64635
+ function findUrl(_, protocol, domain3, path8, match) {
64636
64636
  let prefix = "";
64637
64637
  if (!previous2(match)) {
64638
64638
  return false;
@@ -64645,7 +64645,7 @@ function findUrl(_, protocol, domain3, path7, match) {
64645
64645
  if (!isCorrectDomain(domain3)) {
64646
64646
  return false;
64647
64647
  }
64648
- const parts = splitUrl(domain3 + path7);
64648
+ const parts = splitUrl(domain3 + path8);
64649
64649
  if (!parts[0]) return false;
64650
64650
  const result = {
64651
64651
  type: "link",
@@ -67598,25 +67598,25 @@ function isRecord2(value) {
67598
67598
  function loadSummarizeConfig({ env: env3 }) {
67599
67599
  const home = env3.HOME?.trim() || (0, import_node_os3.homedir)();
67600
67600
  if (!home) return { config: null, path: null };
67601
- const path7 = (0, import_node_path.join)(home, ".summarize", "config.json");
67601
+ const path8 = (0, import_node_path.join)(home, ".summarize", "config.json");
67602
67602
  let raw;
67603
67603
  try {
67604
- raw = (0, import_node_fs.readFileSync)(path7, "utf8");
67604
+ raw = (0, import_node_fs.readFileSync)(path8, "utf8");
67605
67605
  } catch {
67606
- return { config: null, path: path7 };
67606
+ return { config: null, path: path8 };
67607
67607
  }
67608
67608
  let parsed;
67609
67609
  try {
67610
67610
  parsed = JSON.parse(raw);
67611
67611
  } catch (error47) {
67612
67612
  const message = error47 instanceof Error ? error47.message : String(error47);
67613
- throw new Error(`Invalid JSON in config file ${path7}: ${message}`);
67613
+ throw new Error(`Invalid JSON in config file ${path8}: ${message}`);
67614
67614
  }
67615
67615
  if (!isRecord2(parsed)) {
67616
- throw new Error(`Invalid config file ${path7}: expected an object at the top level`);
67616
+ throw new Error(`Invalid config file ${path8}: expected an object at the top level`);
67617
67617
  }
67618
67618
  const model = typeof parsed.model === "string" ? parsed.model : void 0;
67619
- return { config: { model }, path: path7 };
67619
+ return { config: { model }, path: path8 };
67620
67620
  }
67621
67621
 
67622
67622
  // src/content/asset.ts
@@ -71757,12 +71757,12 @@ var Mime = class {
71757
71757
  }
71758
71758
  return this;
71759
71759
  }
71760
- getType(path7) {
71761
- if (typeof path7 !== "string")
71760
+ getType(path8) {
71761
+ if (typeof path8 !== "string")
71762
71762
  return null;
71763
- const last = path7.replace(/^.*[/\\]/s, "").toLowerCase();
71763
+ const last = path8.replace(/^.*[/\\]/s, "").toLowerCase();
71764
71764
  const ext = last.replace(/^.*\./s, "").toLowerCase();
71765
- const hasPath = last.length < path7.length;
71765
+ const hasPath = last.length < path8.length;
71766
71766
  const hasDot = ext.length < last.length - 1;
71767
71767
  if (!hasDot && hasPath)
71768
71768
  return null;
@@ -72458,9 +72458,9 @@ var fetchTranscriptFromTranscriptEndpoint = async (fetchImpl, {
72458
72458
  return null;
72459
72459
  }
72460
72460
  };
72461
- function getNestedProperty(object3, path7) {
72461
+ function getNestedProperty(object3, path8) {
72462
72462
  let current = object3;
72463
- for (const key of path7) {
72463
+ for (const key of path8) {
72464
72464
  if (!(isRecord3(current) && key in current)) {
72465
72465
  return null;
72466
72466
  }
@@ -72468,8 +72468,8 @@ function getNestedProperty(object3, path7) {
72468
72468
  }
72469
72469
  return current;
72470
72470
  }
72471
- function getArrayProperty(object3, path7) {
72472
- const value = getNestedProperty(object3, path7);
72471
+ function getArrayProperty(object3, path8) {
72472
+ const value = getNestedProperty(object3, path8);
72473
72473
  return Array.isArray(value) ? value : null;
72474
72474
  }
72475
72475
  var extractTranscriptFromTranscriptEndpoint = (data) => {
@@ -74662,7 +74662,21 @@ function parseFirecrawlMode(raw) {
74662
74662
  function parseMarkdownMode(raw) {
74663
74663
  const normalized = raw.trim().toLowerCase();
74664
74664
  if (normalized === "off" || normalized === "auto" || normalized === "llm") return normalized;
74665
- throw new Error(`Unsupported --markdown: ${raw}`);
74665
+ throw new Error(`Unsupported --markdown-mode: ${raw}`);
74666
+ }
74667
+ function parseExtractFormat(raw) {
74668
+ const normalized = raw.trim().toLowerCase();
74669
+ if (normalized === "text" || normalized === "txt" || normalized === "plain") return "text";
74670
+ if (normalized === "md" || normalized === "markdown") return "markdown";
74671
+ throw new Error(`Unsupported --format: ${raw}`);
74672
+ }
74673
+ function parsePreprocessMode(raw) {
74674
+ const normalized = raw.trim().toLowerCase();
74675
+ if (normalized === "off" || normalized === "auto" || normalized === "always") {
74676
+ return normalized;
74677
+ }
74678
+ if (normalized === "on") return "always";
74679
+ throw new Error(`Unsupported --preprocess: ${raw}`);
74666
74680
  }
74667
74681
  function parseStreamMode(raw) {
74668
74682
  const normalized = raw.trim().toLowerCase();
@@ -75336,37 +75350,102 @@ function createHtmlToMarkdownConverter({
75336
75350
  };
75337
75351
  }
75338
75352
 
75353
+ // src/markitdown.ts
75354
+ var import_promises3 = __toESM(require("node:fs/promises"), 1);
75355
+ var import_node_os5 = require("node:os");
75356
+ var import_node_path4 = __toESM(require("node:path"), 1);
75357
+ function guessExtension({
75358
+ filenameHint,
75359
+ mediaType
75360
+ }) {
75361
+ const ext = filenameHint ? import_node_path4.default.extname(filenameHint).toLowerCase() : "";
75362
+ if (ext) return ext;
75363
+ if (mediaType === "text/html" || mediaType === "application/xhtml+xml") return ".html";
75364
+ if (mediaType === "application/pdf") return ".pdf";
75365
+ return ".bin";
75366
+ }
75367
+ async function execFileText(execFileImpl, cmd, args, options) {
75368
+ return await new Promise((resolve2, reject) => {
75369
+ execFileImpl(cmd, args, options, (error47, stdout, stderr) => {
75370
+ if (error47) {
75371
+ const stderrText2 = typeof stderr === "string" ? stderr : stderr.toString("utf8");
75372
+ const message = stderrText2.trim() ? `${error47.message}: ${stderrText2.trim()}` : error47.message;
75373
+ reject(new Error(message, { cause: error47 }));
75374
+ return;
75375
+ }
75376
+ const stdoutText = typeof stdout === "string" ? stdout : stdout.toString("utf8");
75377
+ const stderrText = typeof stderr === "string" ? stderr : stderr.toString("utf8");
75378
+ resolve2({ stdout: stdoutText, stderr: stderrText });
75379
+ });
75380
+ });
75381
+ }
75382
+ async function convertToMarkdownWithMarkitdown({
75383
+ bytes,
75384
+ filenameHint,
75385
+ mediaTypeHint,
75386
+ uvxCommand,
75387
+ timeoutMs,
75388
+ env: env3,
75389
+ execFileImpl
75390
+ }) {
75391
+ const dir = await import_promises3.default.mkdtemp(import_node_path4.default.join((0, import_node_os5.tmpdir)(), "summarize-markitdown-"));
75392
+ const ext = guessExtension({ filenameHint, mediaType: mediaTypeHint });
75393
+ const base2 = (filenameHint ? import_node_path4.default.basename(filenameHint, import_node_path4.default.extname(filenameHint)) : "input").replaceAll(/[^\w.-]+/g, "-").slice(0, 64);
75394
+ const filePath = import_node_path4.default.join(dir, `${base2}${ext}`);
75395
+ try {
75396
+ await import_promises3.default.writeFile(filePath, bytes);
75397
+ const from = "markitdown[all]";
75398
+ const { stdout } = await execFileText(
75399
+ execFileImpl,
75400
+ uvxCommand && uvxCommand.trim().length > 0 ? uvxCommand.trim() : "uvx",
75401
+ ["--from", from, "markitdown", filePath],
75402
+ {
75403
+ timeout: timeoutMs,
75404
+ env: { ...process.env, ...env3 },
75405
+ maxBuffer: 50 * 1024 * 1024
75406
+ }
75407
+ );
75408
+ const markdown = stdout.trim();
75409
+ if (!markdown) {
75410
+ throw new Error("markitdown returned empty output");
75411
+ }
75412
+ return markdown;
75413
+ } finally {
75414
+ await import_promises3.default.rm(dir, { recursive: true, force: true });
75415
+ }
75416
+ }
75417
+
75339
75418
  // src/pricing/litellm.ts
75340
- var import_node_path5 = __toESM(require("node:path"), 1);
75419
+ var import_node_path6 = __toESM(require("node:path"), 1);
75341
75420
 
75342
75421
  // node_modules/.pnpm/tokentally@https+++codeload.github.com+steipete+tokentally+tar.gz+99865e5c16f5340c9589a2c5d85c3ea47dbcec82/node_modules/tokentally/dist/node/litellm.js
75343
75422
  var import_node_fs4 = require("node:fs");
75344
- var import_promises3 = __toESM(require("node:fs/promises"), 1);
75345
- var import_node_path4 = __toESM(require("node:path"), 1);
75423
+ var import_promises4 = __toESM(require("node:fs/promises"), 1);
75424
+ var import_node_path5 = __toESM(require("node:path"), 1);
75346
75425
  var LITELLM_CATALOG_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json";
75347
75426
  var CACHE_TTL_MS = 7 * 24 * 60 * 60 * 1e3;
75348
75427
  function cachePaths(env3) {
75349
75428
  const override = env3.TOKENTALLY_CACHE_DIR?.trim();
75350
75429
  const home = env3.HOME?.trim();
75351
- const cacheDir = override && override.length > 0 ? override : home ? import_node_path4.default.join(home, ".tokentally", "cache") : null;
75430
+ const cacheDir = override && override.length > 0 ? override : home ? import_node_path5.default.join(home, ".tokentally", "cache") : null;
75352
75431
  if (!cacheDir)
75353
75432
  return null;
75354
75433
  return {
75355
- catalogPath: import_node_path4.default.join(cacheDir, "litellm-model_prices_and_context_window.json"),
75356
- metaPath: import_node_path4.default.join(cacheDir, "litellm-model_prices_and_context_window.meta.json")
75434
+ catalogPath: import_node_path5.default.join(cacheDir, "litellm-model_prices_and_context_window.json"),
75435
+ metaPath: import_node_path5.default.join(cacheDir, "litellm-model_prices_and_context_window.meta.json")
75357
75436
  };
75358
75437
  }
75359
75438
  async function readJsonFile(filePath) {
75360
75439
  try {
75361
- const raw = await import_promises3.default.readFile(filePath, "utf8");
75440
+ const raw = await import_promises4.default.readFile(filePath, "utf8");
75362
75441
  return JSON.parse(raw);
75363
75442
  } catch {
75364
75443
  return null;
75365
75444
  }
75366
75445
  }
75367
75446
  async function writeJsonFile(filePath, value) {
75368
- await import_promises3.default.mkdir(import_node_path4.default.dirname(filePath), { recursive: true });
75369
- await import_promises3.default.writeFile(filePath, `${JSON.stringify(value, null, 2)}
75447
+ await import_promises4.default.mkdir(import_node_path5.default.dirname(filePath), { recursive: true });
75448
+ await import_promises4.default.writeFile(filePath, `${JSON.stringify(value, null, 2)}
75370
75449
  `, "utf8");
75371
75450
  }
75372
75451
  function isStale(meta3, nowMs) {
@@ -75509,7 +75588,7 @@ function withDefaultCacheDir(env3) {
75509
75588
  }
75510
75589
  const home = env3.HOME?.trim();
75511
75590
  if (!home) return env3;
75512
- return { ...env3, TOKENTALLY_CACHE_DIR: import_node_path5.default.join(home, ".summarize", "cache") };
75591
+ return { ...env3, TOKENTALLY_CACHE_DIR: import_node_path6.default.join(home, ".summarize", "cache") };
75513
75592
  }
75514
75593
  async function loadLiteLlmCatalog2({
75515
75594
  env: env3,
@@ -75554,6 +75633,27 @@ ${headerLines.length > 0 ? `${headerLines.join("\n")}
75554
75633
  ` : ""}Return only the summary.`;
75555
75634
  return prompt;
75556
75635
  }
75636
+ function buildFileTextSummaryPrompt({
75637
+ filename,
75638
+ originalMediaType,
75639
+ contentMediaType,
75640
+ summaryLength,
75641
+ contentLength
75642
+ }) {
75643
+ const effectiveSummaryLength = typeof summaryLength === "string" ? summaryLength : summaryLength.maxCharacters > contentLength ? { maxCharacters: contentLength } : summaryLength;
75644
+ const maxCharactersLine = typeof effectiveSummaryLength === "string" ? "" : `Target length: around ${effectiveSummaryLength.maxCharacters.toLocaleString()} characters total (including Markdown and whitespace). This is a soft guideline; prioritize clarity.`;
75645
+ const headerLines = [
75646
+ filename ? `Filename: ${filename}` : null,
75647
+ originalMediaType ? `Original media type: ${originalMediaType}` : null,
75648
+ `Provided as: ${contentMediaType}`,
75649
+ `Extracted content length: ${contentLength.toLocaleString()} characters. Do not exceed the extracted content length; if the requested length is larger, keep the summary at or below the extracted content length and do not add details.`
75650
+ ].filter(Boolean);
75651
+ return `You summarize files for curious users. Summarize the file content below. Be factual and do not invent details. Format the answer in Markdown. Do not use emojis. ${maxCharactersLine}
75652
+
75653
+ ${headerLines.length > 0 ? `${headerLines.join("\n")}
75654
+
75655
+ ` : ""}Return only the summary.`;
75656
+ }
75557
75657
 
75558
75658
  // src/prompts/link-summary.ts
75559
75659
  var SUMMARY_LENGTH_DIRECTIVES = {
@@ -78283,16 +78383,16 @@ function startSpinner({
78283
78383
 
78284
78384
  // src/version.ts
78285
78385
  var import_node_fs5 = __toESM(require("node:fs"), 1);
78286
- var import_node_path6 = __toESM(require("node:path"), 1);
78386
+ var import_node_path7 = __toESM(require("node:path"), 1);
78287
78387
  var import_node_url = require("node:url");
78288
- var FALLBACK_VERSION = "0.3.0";
78388
+ var FALLBACK_VERSION = "0.4.0";
78289
78389
  function resolvePackageVersion(importMetaUrl) {
78290
78390
  const injected = typeof process !== "undefined" && typeof process.env.SUMMARIZE_VERSION === "string" ? process.env.SUMMARIZE_VERSION.trim() : "";
78291
78391
  if (injected.length > 0) return injected;
78292
78392
  const startDir = (() => {
78293
78393
  if (typeof importMetaUrl === "string" && importMetaUrl.trim().length > 0) {
78294
78394
  try {
78295
- return import_node_path6.default.dirname((0, import_node_url.fileURLToPath)(importMetaUrl));
78395
+ return import_node_path7.default.dirname((0, import_node_url.fileURLToPath)(importMetaUrl));
78296
78396
  } catch {
78297
78397
  }
78298
78398
  }
@@ -78301,7 +78401,7 @@ function resolvePackageVersion(importMetaUrl) {
78301
78401
  })();
78302
78402
  let dir = startDir;
78303
78403
  for (let i = 0; i < 10; i += 1) {
78304
- const candidate = import_node_path6.default.join(dir, "package.json");
78404
+ const candidate = import_node_path7.default.join(dir, "package.json");
78305
78405
  try {
78306
78406
  const raw = import_node_fs5.default.readFileSync(candidate, "utf8");
78307
78407
  const json3 = JSON.parse(raw);
@@ -78310,7 +78410,7 @@ function resolvePackageVersion(importMetaUrl) {
78310
78410
  }
78311
78411
  } catch {
78312
78412
  }
78313
- const parent = import_node_path6.default.dirname(dir);
78413
+ const parent = import_node_path7.default.dirname(dir);
78314
78414
  if (parent === dir) break;
78315
78415
  dir = parent;
78316
78416
  }
@@ -78319,6 +78419,7 @@ function resolvePackageVersion(importMetaUrl) {
78319
78419
 
78320
78420
  // src/run.ts
78321
78421
  var BIRD_TIP = "Tip: Install bird\u{1F426} for better Twitter support: https://github.com/steipete/bird";
78422
+ var UVX_TIP = "Tip: Install uv (uvx) for local Markdown conversion: brew install uv (or set UVX_PATH to your uvx binary).";
78322
78423
  var TWITTER_HOSTS2 = /* @__PURE__ */ new Set(["x.com", "twitter.com", "mobile.twitter.com"]);
78323
78424
  var SUMMARY_LENGTH_MAX_CHARACTERS = {
78324
78425
  short: 1200,
@@ -78350,10 +78451,22 @@ function isExecutable(filePath) {
78350
78451
  }
78351
78452
  function hasBirdCli(env3) {
78352
78453
  const candidates = [];
78353
- const pathEnv = env3.PATH ?? process.env.PATH ?? "";
78354
- for (const entry of pathEnv.split(import_node_path7.default.delimiter)) {
78454
+ const pathEnv = env3.PATH ?? "";
78455
+ for (const entry of pathEnv.split(import_node_path8.default.delimiter)) {
78355
78456
  if (!entry) continue;
78356
- candidates.push(import_node_path7.default.join(entry, "bird"));
78457
+ candidates.push(import_node_path8.default.join(entry, "bird"));
78458
+ }
78459
+ return candidates.some((candidate) => isExecutable(candidate));
78460
+ }
78461
+ function hasUvxCli(env3) {
78462
+ if (typeof env3.UVX_PATH === "string" && env3.UVX_PATH.trim().length > 0) {
78463
+ return true;
78464
+ }
78465
+ const candidates = [];
78466
+ const pathEnv = env3.PATH ?? "";
78467
+ for (const entry of pathEnv.split(import_node_path8.default.delimiter)) {
78468
+ if (!entry) continue;
78469
+ candidates.push(import_node_path8.default.join(entry, "uvx"));
78357
78470
  }
78358
78471
  return candidates.some((candidate) => isExecutable(candidate));
78359
78472
  }
@@ -78404,6 +78517,15 @@ function withBirdTip(error47, url2, env3) {
78404
78517
  ${BIRD_TIP}`;
78405
78518
  return error47 instanceof Error ? new Error(combined, { cause: error47 }) : new Error(combined);
78406
78519
  }
78520
+ function withUvxTip(error47, env3) {
78521
+ if (hasUvxCli(env3)) {
78522
+ return error47 instanceof Error ? error47 : new Error(String(error47));
78523
+ }
78524
+ const message = error47 instanceof Error ? error47.message : String(error47);
78525
+ const combined = `${message}
78526
+ ${UVX_TIP}`;
78527
+ return error47 instanceof Error ? new Error(combined, { cause: error47 }) : new Error(combined);
78528
+ }
78407
78529
  var MAX_TEXT_BYTES_DEFAULT = 10 * 1024 * 1024;
78408
78530
  function buildProgram() {
78409
78531
  return new Command().name("summarize").description("Summarize web pages and YouTube links (uses direct provider API keys).").argument("[input]", "URL or local file path to summarize").option(
@@ -78412,12 +78534,27 @@ function buildProgram() {
78412
78534
  "auto"
78413
78535
  ).option(
78414
78536
  "--firecrawl <mode>",
78415
- "Firecrawl usage: off, auto (fallback), always (try Firecrawl first).",
78537
+ "Firecrawl usage: off, auto (fallback), always (try Firecrawl first). Note: in --format md website mode, defaults to always when FIRECRAWL_API_KEY is set (unless --firecrawl is set explicitly).",
78416
78538
  "auto"
78417
78539
  ).option(
78418
- "--markdown <mode>",
78419
- "Website Markdown output: off, auto (use LLM when configured), llm (force LLM). Only affects --extract-only for non-YouTube URLs.",
78420
- "auto"
78540
+ "--format <format>",
78541
+ "Website/file content format: md|text. For websites: controls the extraction format. For files: controls whether we try to preprocess to Markdown for model compatibility. (default: text)",
78542
+ "text"
78543
+ ).addOption(
78544
+ new Option(
78545
+ "--preprocess <mode>",
78546
+ "Preprocess inputs for model compatibility: off, auto (fallback), always."
78547
+ ).choices(["off", "auto", "always"]).default("auto")
78548
+ ).addOption(
78549
+ new Option(
78550
+ "--markdown-mode <mode>",
78551
+ "HTML\u2192Markdown conversion: off, auto (prefer Firecrawl when configured, then LLM when configured, then markitdown when available), llm (force LLM). Only affects --format md for non-YouTube URLs."
78552
+ ).default("auto")
78553
+ ).addOption(
78554
+ new Option(
78555
+ "--markdown <mode>",
78556
+ "Deprecated alias for --markdown-mode (use --extract --format md --markdown-mode ...)"
78557
+ ).hideHelp()
78421
78558
  ).option(
78422
78559
  "--length <length>",
78423
78560
  "Summary length: short|medium|long|xl|xxl or a character limit like 20000, 20k",
@@ -78434,7 +78571,7 @@ function buildProgram() {
78434
78571
  "--model <model>",
78435
78572
  "LLM model id (gateway-style): xai/..., openai/..., google/... (default: google/gemini-3-flash-preview)",
78436
78573
  void 0
78437
- ).option("--extract-only", "Print extracted content and exit (no LLM summary)", false).option("--json", "Output structured JSON (includes prompt + metrics)", false).option(
78574
+ ).option("--extract", "Print extracted content and exit (no LLM summary)", false).addOption(new Option("--extract-only", "Deprecated alias for --extract").hideHelp()).option("--json", "Output structured JSON (includes prompt + metrics)", false).option(
78438
78575
  "--stream <mode>",
78439
78576
  "Stream LLM output: auto (TTY only), on, off. Note: streaming is disabled in --json mode.",
78440
78577
  "auto"
@@ -78550,6 +78687,22 @@ function getTextContentFromAttachment(attachment) {
78550
78687
  }
78551
78688
  return { content: "", bytes: 0 };
78552
78689
  }
78690
+ function getFileBytesFromAttachment(attachment) {
78691
+ if (attachment.part.type !== "file") return null;
78692
+ const data = attachment.part.data;
78693
+ return data instanceof Uint8Array ? data : null;
78694
+ }
78695
+ function shouldMarkitdownConvertMediaType(mediaType) {
78696
+ const mt = mediaType.toLowerCase();
78697
+ if (mt === "application/pdf") return true;
78698
+ if (mt === "application/rtf") return true;
78699
+ if (mt === "text/html" || mt === "application/xhtml+xml") return true;
78700
+ if (mt === "application/msword") return true;
78701
+ if (mt.startsWith("application/vnd.openxmlformats-officedocument.")) return true;
78702
+ if (mt === "application/vnd.ms-excel") return true;
78703
+ if (mt === "application/vnd.ms-powerpoint") return true;
78704
+ return false;
78705
+ }
78553
78706
  function assertProviderSupportsAttachment({
78554
78707
  provider,
78555
78708
  modelId,
@@ -78613,9 +78766,10 @@ function attachRichHelp(program2, env3, stdout) {
78613
78766
  () => `
78614
78767
  ${heading("Examples")}
78615
78768
  ${cmd('summarize "https://example.com"')}
78616
- ${cmd('summarize "https://example.com" --extract-only')} ${dim3("# website markdown (LLM if configured)")}
78617
- ${cmd('summarize "https://example.com" --extract-only --markdown llm')} ${dim3("# website markdown via LLM")}
78618
- ${cmd('summarize "https://www.youtube.com/watch?v=I845O57ZSy4&t=11s" --extract-only --youtube web')}
78769
+ ${cmd('summarize "https://example.com" --extract')} ${dim3("# extracted plain text")}
78770
+ ${cmd('summarize "https://example.com" --extract --format md')} ${dim3("# extracted markdown (prefers Firecrawl when configured)")}
78771
+ ${cmd('summarize "https://example.com" --extract --format md --markdown-mode llm')} ${dim3("# extracted markdown via LLM")}
78772
+ ${cmd('summarize "https://www.youtube.com/watch?v=I845O57ZSy4&t=11s" --extract --youtube web')}
78619
78773
  ${cmd('summarize "https://example.com" --length 20k --max-output-tokens 2k --timeout 2m --model openai/gpt-5.2')}
78620
78774
  ${cmd('OPENROUTER_API_KEY=... summarize "https://example.com" --model openai/openai/gpt-oss-20b')}
78621
78775
  ${cmd('summarize "https://example.com" --json --verbose')}
@@ -78756,10 +78910,11 @@ function writeFinishLine({
78756
78910
  stderr.write(`${ansi("1;32", line, color2)}
78757
78911
  `);
78758
78912
  }
78759
- async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
78913
+ async function runCli(argv, { env: env3, fetch: fetch2, execFile: execFileOverride, stdout, stderr }) {
78760
78914
  ;
78761
78915
  globalThis.AI_SDK_LOG_WARNINGS = false;
78762
78916
  const normalizedArgv = argv.filter((arg) => arg !== "--");
78917
+ const execFileImpl = execFileOverride ?? import_node_child_process2.execFile;
78763
78918
  const version2 = resolvePackageVersion();
78764
78919
  const program2 = buildProgram();
78765
78920
  program2.configureOutput({
@@ -78800,7 +78955,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
78800
78955
  program2.opts().maxOutputTokens
78801
78956
  );
78802
78957
  const timeoutMs = parseDurationMs(program2.opts().timeout);
78803
- const extractOnly = Boolean(program2.opts().extractOnly);
78958
+ const extractMode = Boolean(program2.opts().extract) || Boolean(program2.opts().extractOnly);
78804
78959
  const json3 = Boolean(program2.opts().json);
78805
78960
  const streamMode = parseStreamMode(program2.opts().stream);
78806
78961
  const renderMode = parseRenderMode(program2.opts().render);
@@ -78808,9 +78963,19 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
78808
78963
  const metricsMode = parseMetricsMode(program2.opts().metrics);
78809
78964
  const metricsEnabled = metricsMode !== "off";
78810
78965
  const metricsDetailed = metricsMode === "detailed";
78811
- const markdownMode = parseMarkdownMode(program2.opts().markdown);
78966
+ const preprocessMode = parsePreprocessMode(program2.opts().preprocess);
78967
+ const format2 = parseExtractFormat(program2.opts().format);
78812
78968
  const shouldComputeReport = metricsEnabled;
78813
78969
  const isYoutubeUrl = typeof url2 === "string" ? /youtube\.com|youtu\.be/i.test(url2) : false;
78970
+ const firecrawlExplicitlySet = normalizedArgv.some(
78971
+ (arg) => arg === "--firecrawl" || arg.startsWith("--firecrawl=")
78972
+ );
78973
+ const markdownModeExplicitlySet = normalizedArgv.some(
78974
+ (arg) => arg === "--markdown-mode" || arg.startsWith("--markdown-mode=") || arg === "--markdown" || arg.startsWith("--markdown=")
78975
+ );
78976
+ const markdownMode = format2 === "markdown" ? parseMarkdownMode(
78977
+ program2.opts().markdownMode ?? program2.opts().markdown ?? "auto"
78978
+ ) : "off";
78814
78979
  const requestedFirecrawlMode = parseFirecrawlMode(program2.opts().firecrawl);
78815
78980
  const modelArg = typeof program2.opts().model === "string" ? program2.opts().model : null;
78816
78981
  const { config: config2, path: configPath } = loadSummarizeConfig({ env: env3 });
@@ -78839,6 +79004,12 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
78839
79004
  const anthropicConfigured = typeof anthropicApiKey === "string" && anthropicApiKey.length > 0;
78840
79005
  const openrouterConfigured = typeof openrouterApiKey === "string" && openrouterApiKey.length > 0;
78841
79006
  const openrouterOptions = openRouterProviders ? { providers: openRouterProviders } : void 0;
79007
+ if (markdownModeExplicitlySet && format2 !== "markdown") {
79008
+ throw new Error("--markdown-mode is only supported with --format md");
79009
+ }
79010
+ if (markdownModeExplicitlySet && inputTarget.kind !== "url") {
79011
+ throw new Error("--markdown-mode is only supported for website URLs");
79012
+ }
78842
79013
  const llmCalls = [];
78843
79014
  let firecrawlRequests = 0;
78844
79015
  let apifyRequests = 0;
@@ -78932,7 +79103,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
78932
79103
  if (streamMode !== "auto") return streamMode;
78933
79104
  return isRichTty(stdout) ? "on" : "off";
78934
79105
  })();
78935
- const streamingEnabled = effectiveStreamMode === "on" && !json3 && !extractOnly;
79106
+ const streamingEnabled = effectiveStreamMode === "on" && !json3 && !extractMode;
78936
79107
  const effectiveRenderMode = (() => {
78937
79108
  if (renderMode !== "auto") return renderMode;
78938
79109
  if (!isRichTty(stdout)) return "plain";
@@ -78957,8 +79128,8 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
78957
79128
  `
78958
79129
  );
78959
79130
  };
78960
- if (extractOnly && inputTarget.kind !== "url") {
78961
- throw new Error("--extract-only is only supported for website/YouTube URLs");
79131
+ if (extractMode && inputTarget.kind !== "url") {
79132
+ throw new Error("--extract is only supported for website/YouTube URLs");
78962
79133
  }
78963
79134
  const progressEnabled = isRichTty(stderr) && !verbose && !json3;
78964
79135
  let clearProgressBeforeStdout = null;
@@ -78987,11 +79158,6 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
78987
79158
  `Missing ${requiredKeyEnv} for model ${parsedModel.canonical}. Set the env var or choose a different --model.`
78988
79159
  );
78989
79160
  }
78990
- assertProviderSupportsAttachment({
78991
- provider: parsedModel.provider,
78992
- modelId: parsedModel.canonical,
78993
- attachment: { part: attachment.part, mediaType: attachment.mediaType }
78994
- });
78995
79161
  const modelResolution = await resolveModelIdForLlmCall({
78996
79162
  parsedModel,
78997
79163
  apiKeys: { googleApiKey: apiKeysForLlm.googleApiKey },
@@ -79013,14 +79179,114 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
79013
79179
  `Text file too large (${formatBytes(textContent.bytes)}). Limit is ${formatBytes(MAX_TEXT_BYTES_DEFAULT)}.`
79014
79180
  );
79015
79181
  }
79182
+ const fileBytes = getFileBytesFromAttachment(attachment);
79183
+ const canPreprocessWithMarkitdown = format2 === "markdown" && preprocessMode !== "off" && hasUvxCli(env3) && attachment.part.type === "file" && fileBytes !== null && shouldMarkitdownConvertMediaType(attachment.mediaType);
79016
79184
  const summaryLengthTarget = lengthArg.kind === "preset" ? lengthArg.preset : { maxCharacters: lengthArg.maxCharacters };
79017
- const promptText = buildFileSummaryPrompt({
79018
- filename: attachment.filename,
79019
- mediaType: attachment.mediaType,
79020
- summaryLength: summaryLengthTarget,
79021
- contentLength: textContent?.content.length ?? null
79022
- });
79023
- const promptPayload = buildAssetPromptPayload({ promptText, attachment, textContent });
79185
+ let promptText = "";
79186
+ const buildAttachmentPromptPayload = () => {
79187
+ promptText = buildFileSummaryPrompt({
79188
+ filename: attachment.filename,
79189
+ mediaType: attachment.mediaType,
79190
+ summaryLength: summaryLengthTarget,
79191
+ contentLength: textContent?.content.length ?? null
79192
+ });
79193
+ return buildAssetPromptPayload({ promptText, attachment, textContent });
79194
+ };
79195
+ const buildMarkitdownPromptPayload = (markdown) => {
79196
+ promptText = buildFileTextSummaryPrompt({
79197
+ filename: attachment.filename,
79198
+ originalMediaType: attachment.mediaType,
79199
+ contentMediaType: "text/markdown",
79200
+ summaryLength: summaryLengthTarget,
79201
+ contentLength: markdown.length
79202
+ });
79203
+ return `${promptText}
79204
+
79205
+ ---
79206
+
79207
+ ${markdown}`.trim();
79208
+ };
79209
+ let preprocessedMarkdown = null;
79210
+ let usingPreprocessedMarkdown = false;
79211
+ if (preprocessMode === "always" && canPreprocessWithMarkitdown) {
79212
+ if (!fileBytes) {
79213
+ throw new Error("Internal error: missing file bytes for markitdown preprocessing");
79214
+ }
79215
+ try {
79216
+ preprocessedMarkdown = await convertToMarkdownWithMarkitdown({
79217
+ bytes: fileBytes,
79218
+ filenameHint: attachment.filename,
79219
+ mediaTypeHint: attachment.mediaType,
79220
+ uvxCommand: env3.UVX_PATH,
79221
+ timeoutMs,
79222
+ env: env3,
79223
+ execFileImpl
79224
+ });
79225
+ } catch (error47) {
79226
+ const message = error47 instanceof Error ? error47.message : String(error47);
79227
+ throw new Error(
79228
+ `Failed to preprocess ${attachment.mediaType} with markitdown: ${message} (disable with --preprocess off).`
79229
+ );
79230
+ }
79231
+ if (Buffer.byteLength(preprocessedMarkdown, "utf8") > MAX_TEXT_BYTES_DEFAULT) {
79232
+ throw new Error(
79233
+ `Preprocessed Markdown too large (${formatBytes(Buffer.byteLength(preprocessedMarkdown, "utf8"))}). Limit is ${formatBytes(MAX_TEXT_BYTES_DEFAULT)}.`
79234
+ );
79235
+ }
79236
+ usingPreprocessedMarkdown = true;
79237
+ }
79238
+ let promptPayload = buildAttachmentPromptPayload();
79239
+ if (usingPreprocessedMarkdown) {
79240
+ if (!preprocessedMarkdown) {
79241
+ throw new Error("Internal error: missing markitdown content for preprocessing");
79242
+ }
79243
+ promptPayload = buildMarkitdownPromptPayload(preprocessedMarkdown);
79244
+ }
79245
+ if (!usingPreprocessedMarkdown) {
79246
+ try {
79247
+ assertProviderSupportsAttachment({
79248
+ provider: parsedModel.provider,
79249
+ modelId: parsedModel.canonical,
79250
+ attachment: { part: attachment.part, mediaType: attachment.mediaType }
79251
+ });
79252
+ } catch (error47) {
79253
+ if (!canPreprocessWithMarkitdown) {
79254
+ if (format2 === "markdown" && preprocessMode !== "off" && attachment.part.type === "file" && shouldMarkitdownConvertMediaType(attachment.mediaType) && !hasUvxCli(env3)) {
79255
+ throw withUvxTip(error47, env3);
79256
+ }
79257
+ throw error47;
79258
+ }
79259
+ if (!fileBytes) {
79260
+ throw new Error("Internal error: missing file bytes for markitdown preprocessing");
79261
+ }
79262
+ try {
79263
+ preprocessedMarkdown = await convertToMarkdownWithMarkitdown({
79264
+ bytes: fileBytes,
79265
+ filenameHint: attachment.filename,
79266
+ mediaTypeHint: attachment.mediaType,
79267
+ uvxCommand: env3.UVX_PATH,
79268
+ timeoutMs,
79269
+ env: env3,
79270
+ execFileImpl
79271
+ });
79272
+ } catch (markitdownError) {
79273
+ if (preprocessMode === "auto") {
79274
+ throw error47;
79275
+ }
79276
+ const message = markitdownError instanceof Error ? markitdownError.message : String(markitdownError);
79277
+ throw new Error(
79278
+ `Failed to preprocess ${attachment.mediaType} with markitdown: ${message} (disable with --preprocess off).`
79279
+ );
79280
+ }
79281
+ if (Buffer.byteLength(preprocessedMarkdown, "utf8") > MAX_TEXT_BYTES_DEFAULT) {
79282
+ throw new Error(
79283
+ `Preprocessed Markdown too large (${formatBytes(Buffer.byteLength(preprocessedMarkdown, "utf8"))}). Limit is ${formatBytes(MAX_TEXT_BYTES_DEFAULT)}.`
79284
+ );
79285
+ }
79286
+ usingPreprocessedMarkdown = true;
79287
+ promptPayload = buildMarkitdownPromptPayload(preprocessedMarkdown);
79288
+ }
79289
+ }
79024
79290
  const maxInputTokensForCall = await resolveMaxInputTokensForCall(parsedModelEffective.canonical);
79025
79291
  if (typeof maxInputTokensForCall === "number" && Number.isFinite(maxInputTokensForCall) && maxInputTokensForCall > 0 && typeof promptPayload === "string") {
79026
79292
  const tokenCount = countTokens(promptPayload);
@@ -79309,7 +79575,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
79309
79575
  if (inputTarget.kind === "file") {
79310
79576
  let sizeLabel = null;
79311
79577
  try {
79312
- const stat = await import_promises4.default.stat(inputTarget.filePath);
79578
+ const stat = await import_promises5.default.stat(inputTarget.filePath);
79313
79579
  if (stat.isFile()) {
79314
79580
  sizeLabel = formatBytes(stat.size);
79315
79581
  }
@@ -79411,22 +79677,33 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
79411
79677
  if (!url2) {
79412
79678
  throw new Error("Only HTTP and HTTPS URLs can be summarized");
79413
79679
  }
79414
- const firecrawlMode = requestedFirecrawlMode;
79680
+ const wantsMarkdown = format2 === "markdown" && !isYoutubeUrl;
79681
+ if (wantsMarkdown && markdownMode === "off") {
79682
+ throw new Error("--format md conflicts with --markdown-mode off (use --format text)");
79683
+ }
79684
+ const firecrawlMode = (() => {
79685
+ if (wantsMarkdown && !isYoutubeUrl && !firecrawlExplicitlySet && firecrawlConfigured) {
79686
+ return "always";
79687
+ }
79688
+ return requestedFirecrawlMode;
79689
+ })();
79415
79690
  if (firecrawlMode === "always" && !firecrawlConfigured) {
79416
79691
  throw new Error("--firecrawl always requires FIRECRAWL_API_KEY");
79417
79692
  }
79418
- const effectiveMarkdownMode = markdownMode;
79419
- const markdownRequested = extractOnly && !isYoutubeUrl && effectiveMarkdownMode !== "off";
79693
+ const markdownRequested = wantsMarkdown;
79694
+ const effectiveMarkdownMode = markdownRequested ? markdownMode : "off";
79420
79695
  const hasKeyForModel = parsedModelForLlm.provider === "xai" ? xaiConfigured : parsedModelForLlm.provider === "google" ? googleConfigured : parsedModelForLlm.provider === "anthropic" ? anthropicConfigured : Boolean(apiKey);
79421
79696
  const markdownProvider = hasKeyForModel ? parsedModelForLlm.provider : "none";
79422
79697
  if (markdownRequested && effectiveMarkdownMode === "llm" && !hasKeyForModel) {
79423
79698
  const required2 = parsedModelForLlm.provider === "xai" ? "XAI_API_KEY" : parsedModelForLlm.provider === "google" ? "GEMINI_API_KEY (or GOOGLE_GENERATIVE_AI_API_KEY / GOOGLE_API_KEY)" : parsedModelForLlm.provider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY";
79424
- throw new Error(`--markdown llm requires ${required2} for model ${parsedModelForLlm.canonical}`);
79699
+ throw new Error(
79700
+ `--markdown-mode llm requires ${required2} for model ${parsedModelForLlm.canonical}`
79701
+ );
79425
79702
  }
79426
79703
  writeVerbose(
79427
79704
  stderr,
79428
79705
  verbose,
79429
- `config url=${url2} timeoutMs=${timeoutMs} youtube=${youtubeMode} firecrawl=${firecrawlMode} length=${lengthArg.kind === "preset" ? lengthArg.preset : `${lengthArg.maxCharacters} chars`} maxOutputTokens=${formatOptionalNumber(maxOutputTokensArg)} json=${json3} extractOnly=${extractOnly} markdown=${effectiveMarkdownMode} model=${model} stream=${effectiveStreamMode} render=${effectiveRenderMode}`,
79706
+ `config url=${url2} timeoutMs=${timeoutMs} youtube=${youtubeMode} firecrawl=${firecrawlMode} length=${lengthArg.kind === "preset" ? lengthArg.preset : `${lengthArg.maxCharacters} chars`} maxOutputTokens=${formatOptionalNumber(maxOutputTokensArg)} json=${json3} extract=${extractMode} format=${format2} preprocess=${preprocessMode} markdownMode=${markdownMode} model=${model} stream=${effectiveStreamMode} render=${effectiveRenderMode}`,
79430
79707
  verboseColor
79431
79708
  );
79432
79709
  writeVerbose(
@@ -79450,7 +79727,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
79450
79727
  verboseColor
79451
79728
  );
79452
79729
  const scrapeWithFirecrawl = firecrawlConfigured && firecrawlMode !== "off" ? createFirecrawlScraper({ apiKey: firecrawlApiKey, fetchImpl: trackedFetch }) : null;
79453
- const convertHtmlToMarkdown = markdownRequested && (effectiveMarkdownMode === "llm" || markdownProvider !== "none") ? createHtmlToMarkdownConverter({
79730
+ const llmHtmlToMarkdown = markdownRequested && (effectiveMarkdownMode === "llm" || markdownProvider !== "none") ? createHtmlToMarkdownConverter({
79454
79731
  modelId: model,
79455
79732
  xaiApiKey: xaiConfigured ? xaiApiKey : null,
79456
79733
  googleApiKey: googleConfigured ? googleApiKey : null,
@@ -79463,6 +79740,40 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
79463
79740
  llmCalls.push({ provider, model: usedModel, usage, purpose: "markdown" });
79464
79741
  }
79465
79742
  }) : null;
79743
+ const markitdownHtmlToMarkdown = markdownRequested && preprocessMode !== "off" && hasUvxCli(env3) ? async (args) => {
79744
+ void args.url;
79745
+ void args.title;
79746
+ void args.siteName;
79747
+ return convertToMarkdownWithMarkitdown({
79748
+ bytes: new TextEncoder().encode(args.html),
79749
+ filenameHint: "page.html",
79750
+ mediaTypeHint: "text/html",
79751
+ uvxCommand: env3.UVX_PATH,
79752
+ timeoutMs: args.timeoutMs,
79753
+ env: env3,
79754
+ execFileImpl
79755
+ });
79756
+ } : null;
79757
+ const convertHtmlToMarkdown = markdownRequested ? async (args) => {
79758
+ if (effectiveMarkdownMode === "llm") {
79759
+ if (!llmHtmlToMarkdown) {
79760
+ throw new Error("No HTML\u2192Markdown converter configured");
79761
+ }
79762
+ return llmHtmlToMarkdown(args);
79763
+ }
79764
+ if (llmHtmlToMarkdown) {
79765
+ try {
79766
+ return await llmHtmlToMarkdown(args);
79767
+ } catch (error47) {
79768
+ if (!markitdownHtmlToMarkdown) throw error47;
79769
+ return await markitdownHtmlToMarkdown(args);
79770
+ }
79771
+ }
79772
+ if (markitdownHtmlToMarkdown) {
79773
+ return await markitdownHtmlToMarkdown(args);
79774
+ }
79775
+ throw new Error("No HTML\u2192Markdown converter configured");
79776
+ } : null;
79466
79777
  const readTweetWithBirdClient = hasBirdCli(env3) ? ({ url: url3, timeoutMs: timeoutMs2 }) => readTweetWithBird({ url: url3, timeoutMs: timeoutMs2, env: env3 }) : null;
79467
79778
  writeVerbose(stderr, verbose, "extract start", verboseColor);
79468
79779
  const stopOscProgress = startOscProgress2({
@@ -79643,7 +79954,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
79643
79954
  if (progressEnabled) {
79644
79955
  websiteProgress?.stop?.();
79645
79956
  spinner.setText(
79646
- extractOnly ? `Extracted (${extractedContentSize}${viaSourceLabel})` : `Summarizing (sent ${extractedContentSize}${viaSourceLabel})\u2026`
79957
+ extractMode ? `Extracted (${extractedContentSize}${viaSourceLabel})` : `Summarizing (sent ${extractedContentSize}${viaSourceLabel})\u2026`
79647
79958
  );
79648
79959
  }
79649
79960
  writeVerbose(
@@ -79688,6 +79999,10 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
79688
79999
  )} attemptedProviders=${extracted.diagnostics.transcript.attemptedProviders.length > 0 ? extracted.diagnostics.transcript.attemptedProviders.join(",") : "none"} notes=${formatOptionalString(extracted.diagnostics.transcript.notes ?? null)}`,
79689
80000
  verboseColor
79690
80001
  );
80002
+ if (extractMode && markdownRequested && preprocessMode !== "off" && effectiveMarkdownMode === "auto" && !extracted.diagnostics.markdown.used && !hasUvxCli(env3)) {
80003
+ stderr.write(`${UVX_TIP}
80004
+ `);
80005
+ }
79691
80006
  const isYouTube = extracted.siteName === "YouTube";
79692
80007
  const prompt = buildLinkSummaryPrompt({
79693
80008
  url: extracted.url,
@@ -79700,7 +80015,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
79700
80015
  summaryLength: lengthArg.kind === "preset" ? lengthArg.preset : { maxCharacters: lengthArg.maxCharacters },
79701
80016
  shares: []
79702
80017
  });
79703
- if (extractOnly) {
80018
+ if (extractMode) {
79704
80019
  clearProgressForStdout();
79705
80020
  if (json3) {
79706
80021
  const finishReport = shouldComputeReport ? await buildReport() : null;
@@ -79711,6 +80026,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
79711
80026
  timeoutMs,
79712
80027
  youtube: youtubeMode,
79713
80028
  firecrawl: firecrawlMode,
80029
+ format: format2,
79714
80030
  markdown: effectiveMarkdownMode,
79715
80031
  length: lengthArg.kind === "preset" ? { kind: "preset", preset: lengthArg.preset } : { kind: "chars", maxCharacters: lengthArg.maxCharacters },
79716
80032
  maxOutputTokens: maxOutputTokensArg,
@@ -79783,6 +80099,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
79783
80099
  timeoutMs,
79784
80100
  youtube: youtubeMode,
79785
80101
  firecrawl: firecrawlMode,
80102
+ format: format2,
79786
80103
  markdown: effectiveMarkdownMode,
79787
80104
  length: lengthArg.kind === "preset" ? { kind: "preset", preset: lengthArg.preset } : { kind: "chars", maxCharacters: lengthArg.maxCharacters },
79788
80105
  maxOutputTokens: maxOutputTokensArg,
@@ -80057,6 +80374,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
80057
80374
  timeoutMs,
80058
80375
  youtube: youtubeMode,
80059
80376
  firecrawl: firecrawlMode,
80377
+ format: format2,
80060
80378
  markdown: effectiveMarkdownMode,
80061
80379
  length: lengthArg.kind === "preset" ? { kind: "preset", preset: lengthArg.preset } : { kind: "chars", maxCharacters: lengthArg.maxCharacters },
80062
80380
  maxOutputTokens: maxOutputTokensArg,