@steipete/summarize 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -3
- package/README.md +7 -3
- package/dist/cli.cjs +451 -133
- package/dist/cli.cjs.map +4 -4
- package/dist/esm/flags.js +18 -1
- package/dist/esm/flags.js.map +1 -1
- package/dist/esm/markitdown.js +54 -0
- package/dist/esm/markitdown.js.map +1 -0
- package/dist/esm/prompts/file.js +19 -0
- package/dist/esm/prompts/file.js.map +1 -1
- package/dist/esm/prompts/index.js +1 -1
- package/dist/esm/prompts/index.js.map +1 -1
- package/dist/esm/run.js +262 -35
- package/dist/esm/run.js.map +1 -1
- package/dist/esm/version.js +1 -1
- package/dist/types/flags.d.ts +4 -0
- package/dist/types/markitdown.d.ts +10 -0
- package/dist/types/prompts/file.d.ts +7 -0
- package/dist/types/prompts/index.d.ts +1 -1
- package/dist/types/run.d.ts +3 -1
- package/dist/types/version.d.ts +1 -1
- package/docs/README.md +1 -1
- package/docs/extract-only.md +10 -7
- package/docs/firecrawl.md +2 -2
- package/docs/site/docs/config.html +3 -3
- package/docs/site/docs/extract-only.html +7 -5
- package/docs/site/docs/firecrawl.html +6 -6
- package/docs/site/docs/index.html +2 -2
- package/docs/site/docs/llm.html +2 -2
- package/docs/site/docs/openai.html +2 -2
- package/docs/site/docs/website.html +7 -4
- package/docs/site/docs/youtube.html +2 -2
- package/docs/site/index.html +1 -1
- package/docs/website.md +10 -7
- package/docs/youtube.md +1 -1
- package/package.json +1 -1
package/dist/cli.cjs
CHANGED
|
@@ -1196,8 +1196,8 @@ var require_command = __commonJS({
|
|
|
1196
1196
|
"node_modules/.pnpm/commander@14.0.2/node_modules/commander/lib/command.js"(exports2) {
|
|
1197
1197
|
var EventEmitter = require("node:events").EventEmitter;
|
|
1198
1198
|
var childProcess = require("node:child_process");
|
|
1199
|
-
var
|
|
1200
|
-
var
|
|
1199
|
+
var path8 = require("node:path");
|
|
1200
|
+
var fs7 = require("node:fs");
|
|
1201
1201
|
var process14 = require("node:process");
|
|
1202
1202
|
var { Argument: Argument2, humanReadableArgName } = require_argument();
|
|
1203
1203
|
var { CommanderError: CommanderError2 } = require_error();
|
|
@@ -2191,7 +2191,7 @@ Expecting one of '${allowedValues.join("', '")}'`);
|
|
|
2191
2191
|
* @param {string} subcommandName
|
|
2192
2192
|
*/
|
|
2193
2193
|
_checkForMissingExecutable(executableFile, executableDir, subcommandName) {
|
|
2194
|
-
if (
|
|
2194
|
+
if (fs7.existsSync(executableFile)) return;
|
|
2195
2195
|
const executableDirMessage = executableDir ? `searched for local subcommand relative to directory '${executableDir}'` : "no directory for search for local subcommand, use .executableDir() to supply a custom directory";
|
|
2196
2196
|
const executableMissing = `'${executableFile}' does not exist
|
|
2197
2197
|
- if '${subcommandName}' is not meant to be an executable command, remove description parameter from '.command()' and use '.description()' instead
|
|
@@ -2209,11 +2209,11 @@ Expecting one of '${allowedValues.join("', '")}'`);
|
|
|
2209
2209
|
let launchWithNode = false;
|
|
2210
2210
|
const sourceExt = [".js", ".ts", ".tsx", ".mjs", ".cjs"];
|
|
2211
2211
|
function findFile(baseDir, baseName) {
|
|
2212
|
-
const localBin =
|
|
2213
|
-
if (
|
|
2214
|
-
if (sourceExt.includes(
|
|
2212
|
+
const localBin = path8.resolve(baseDir, baseName);
|
|
2213
|
+
if (fs7.existsSync(localBin)) return localBin;
|
|
2214
|
+
if (sourceExt.includes(path8.extname(baseName))) return void 0;
|
|
2215
2215
|
const foundExt = sourceExt.find(
|
|
2216
|
-
(ext) =>
|
|
2216
|
+
(ext) => fs7.existsSync(`${localBin}${ext}`)
|
|
2217
2217
|
);
|
|
2218
2218
|
if (foundExt) return `${localBin}${foundExt}`;
|
|
2219
2219
|
return void 0;
|
|
@@ -2225,21 +2225,21 @@ Expecting one of '${allowedValues.join("', '")}'`);
|
|
|
2225
2225
|
if (this._scriptPath) {
|
|
2226
2226
|
let resolvedScriptPath;
|
|
2227
2227
|
try {
|
|
2228
|
-
resolvedScriptPath =
|
|
2228
|
+
resolvedScriptPath = fs7.realpathSync(this._scriptPath);
|
|
2229
2229
|
} catch {
|
|
2230
2230
|
resolvedScriptPath = this._scriptPath;
|
|
2231
2231
|
}
|
|
2232
|
-
executableDir =
|
|
2233
|
-
|
|
2232
|
+
executableDir = path8.resolve(
|
|
2233
|
+
path8.dirname(resolvedScriptPath),
|
|
2234
2234
|
executableDir
|
|
2235
2235
|
);
|
|
2236
2236
|
}
|
|
2237
2237
|
if (executableDir) {
|
|
2238
2238
|
let localFile = findFile(executableDir, executableFile);
|
|
2239
2239
|
if (!localFile && !subcommand._executableFile && this._scriptPath) {
|
|
2240
|
-
const legacyName =
|
|
2240
|
+
const legacyName = path8.basename(
|
|
2241
2241
|
this._scriptPath,
|
|
2242
|
-
|
|
2242
|
+
path8.extname(this._scriptPath)
|
|
2243
2243
|
);
|
|
2244
2244
|
if (legacyName !== this._name) {
|
|
2245
2245
|
localFile = findFile(
|
|
@@ -2250,7 +2250,7 @@ Expecting one of '${allowedValues.join("', '")}'`);
|
|
|
2250
2250
|
}
|
|
2251
2251
|
executableFile = localFile || executableFile;
|
|
2252
2252
|
}
|
|
2253
|
-
launchWithNode = sourceExt.includes(
|
|
2253
|
+
launchWithNode = sourceExt.includes(path8.extname(executableFile));
|
|
2254
2254
|
let proc;
|
|
2255
2255
|
if (process14.platform !== "win32") {
|
|
2256
2256
|
if (launchWithNode) {
|
|
@@ -3165,7 +3165,7 @@ Expecting one of '${allowedValues.join("', '")}'`);
|
|
|
3165
3165
|
* @return {Command}
|
|
3166
3166
|
*/
|
|
3167
3167
|
nameFromFilename(filename) {
|
|
3168
|
-
this._name =
|
|
3168
|
+
this._name = path8.basename(filename, path8.extname(filename));
|
|
3169
3169
|
return this;
|
|
3170
3170
|
}
|
|
3171
3171
|
/**
|
|
@@ -3179,9 +3179,9 @@ Expecting one of '${allowedValues.join("', '")}'`);
|
|
|
3179
3179
|
* @param {string} [path]
|
|
3180
3180
|
* @return {(string|null|Command)}
|
|
3181
3181
|
*/
|
|
3182
|
-
executableDir(
|
|
3183
|
-
if (
|
|
3184
|
-
this._executableDir =
|
|
3182
|
+
executableDir(path9) {
|
|
3183
|
+
if (path9 === void 0) return this._executableDir;
|
|
3184
|
+
this._executableDir = path9;
|
|
3185
3185
|
return this;
|
|
3186
3186
|
}
|
|
3187
3187
|
/**
|
|
@@ -4902,17 +4902,17 @@ var require_request = __commonJS({
|
|
|
4902
4902
|
function buildUrl(id, options = {}) {
|
|
4903
4903
|
var _a18, _b16;
|
|
4904
4904
|
const method = ((_a18 = options.method) !== null && _a18 !== void 0 ? _a18 : "post").toLowerCase();
|
|
4905
|
-
const
|
|
4905
|
+
const path8 = ((_b16 = options.path) !== null && _b16 !== void 0 ? _b16 : "").replace(/^\//, "").replace(/\/{2,}/, "/");
|
|
4906
4906
|
const input = options.input;
|
|
4907
4907
|
const params = Object.assign(Object.assign({}, options.query || {}), method === "get" ? input : {});
|
|
4908
4908
|
const queryParams = Object.keys(params).length > 0 ? `?${new URLSearchParams(params).toString()}` : "";
|
|
4909
4909
|
if ((0, utils_1.isValidUrl)(id)) {
|
|
4910
4910
|
const url3 = id.endsWith("/") ? id : `${id}/`;
|
|
4911
|
-
return `${url3}${
|
|
4911
|
+
return `${url3}${path8}${queryParams}`;
|
|
4912
4912
|
}
|
|
4913
4913
|
const appId = (0, utils_1.ensureEndpointIdFormat)(id);
|
|
4914
4914
|
const subdomain = options.subdomain ? `${options.subdomain}.` : "";
|
|
4915
|
-
const url2 = `https://${subdomain}fal.run/${appId}/${
|
|
4915
|
+
const url2 = `https://${subdomain}fal.run/${appId}/${path8}`;
|
|
4916
4916
|
return `${url2.replace(/\/$/, "")}${queryParams}`;
|
|
4917
4917
|
}
|
|
4918
4918
|
}
|
|
@@ -8709,10 +8709,10 @@ function mergeDefs(...defs) {
|
|
|
8709
8709
|
function cloneDef(schema) {
|
|
8710
8710
|
return mergeDefs(schema._zod.def);
|
|
8711
8711
|
}
|
|
8712
|
-
function getElementAtPath(obj,
|
|
8713
|
-
if (!
|
|
8712
|
+
function getElementAtPath(obj, path8) {
|
|
8713
|
+
if (!path8)
|
|
8714
8714
|
return obj;
|
|
8715
|
-
return
|
|
8715
|
+
return path8.reduce((acc, key) => acc?.[key], obj);
|
|
8716
8716
|
}
|
|
8717
8717
|
function promiseAllObject(promisesObj) {
|
|
8718
8718
|
const keys = Object.keys(promisesObj);
|
|
@@ -9007,11 +9007,11 @@ function aborted(x, startIndex = 0) {
|
|
|
9007
9007
|
}
|
|
9008
9008
|
return false;
|
|
9009
9009
|
}
|
|
9010
|
-
function prefixIssues(
|
|
9010
|
+
function prefixIssues(path8, issues) {
|
|
9011
9011
|
return issues.map((iss) => {
|
|
9012
9012
|
var _a18;
|
|
9013
9013
|
(_a18 = iss).path ?? (_a18.path = []);
|
|
9014
|
-
iss.path.unshift(
|
|
9014
|
+
iss.path.unshift(path8);
|
|
9015
9015
|
return iss;
|
|
9016
9016
|
});
|
|
9017
9017
|
}
|
|
@@ -9232,7 +9232,7 @@ function formatError(error47, mapper = (issue2) => issue2.message) {
|
|
|
9232
9232
|
}
|
|
9233
9233
|
function treeifyError(error47, mapper = (issue2) => issue2.message) {
|
|
9234
9234
|
const result = { errors: [] };
|
|
9235
|
-
const processError = (error48,
|
|
9235
|
+
const processError = (error48, path8 = []) => {
|
|
9236
9236
|
var _a18, _b16;
|
|
9237
9237
|
for (const issue2 of error48.issues) {
|
|
9238
9238
|
if (issue2.code === "invalid_union" && issue2.errors.length) {
|
|
@@ -9242,7 +9242,7 @@ function treeifyError(error47, mapper = (issue2) => issue2.message) {
|
|
|
9242
9242
|
} else if (issue2.code === "invalid_element") {
|
|
9243
9243
|
processError({ issues: issue2.issues }, issue2.path);
|
|
9244
9244
|
} else {
|
|
9245
|
-
const fullpath = [...
|
|
9245
|
+
const fullpath = [...path8, ...issue2.path];
|
|
9246
9246
|
if (fullpath.length === 0) {
|
|
9247
9247
|
result.errors.push(mapper(issue2));
|
|
9248
9248
|
continue;
|
|
@@ -9274,8 +9274,8 @@ function treeifyError(error47, mapper = (issue2) => issue2.message) {
|
|
|
9274
9274
|
}
|
|
9275
9275
|
function toDotPath(_path) {
|
|
9276
9276
|
const segs = [];
|
|
9277
|
-
const
|
|
9278
|
-
for (const seg of
|
|
9277
|
+
const path8 = _path.map((seg) => typeof seg === "object" ? seg.key : seg);
|
|
9278
|
+
for (const seg of path8) {
|
|
9279
9279
|
if (typeof seg === "number")
|
|
9280
9280
|
segs.push(`[${seg}]`);
|
|
9281
9281
|
else if (typeof seg === "symbol")
|
|
@@ -21848,13 +21848,13 @@ function resolveRef(ref, ctx) {
|
|
|
21848
21848
|
if (!ref.startsWith("#")) {
|
|
21849
21849
|
throw new Error("External $ref is not supported, only local refs (#/...) are allowed");
|
|
21850
21850
|
}
|
|
21851
|
-
const
|
|
21852
|
-
if (
|
|
21851
|
+
const path8 = ref.slice(1).split("/").filter(Boolean);
|
|
21852
|
+
if (path8.length === 0) {
|
|
21853
21853
|
return ctx.rootSchema;
|
|
21854
21854
|
}
|
|
21855
21855
|
const defsKey = ctx.version === "draft-2020-12" ? "$defs" : "definitions";
|
|
21856
|
-
if (
|
|
21857
|
-
const key =
|
|
21856
|
+
if (path8[0] === defsKey) {
|
|
21857
|
+
const key = path8[1];
|
|
21858
21858
|
if (!key || !ctx.defs[key]) {
|
|
21859
21859
|
throw new Error(`Reference not found: ${ref}`);
|
|
21860
21860
|
}
|
|
@@ -22930,8 +22930,8 @@ var init_parseUtil = __esm({
|
|
|
22930
22930
|
init_errors3();
|
|
22931
22931
|
init_en2();
|
|
22932
22932
|
makeIssue = (params) => {
|
|
22933
|
-
const { data, path:
|
|
22934
|
-
const fullPath = [...
|
|
22933
|
+
const { data, path: path8, errorMaps, issueData } = params;
|
|
22934
|
+
const fullPath = [...path8, ...issueData.path || []];
|
|
22935
22935
|
const fullIssue = {
|
|
22936
22936
|
...issueData,
|
|
22937
22937
|
path: fullPath
|
|
@@ -23211,11 +23211,11 @@ var init_types = __esm({
|
|
|
23211
23211
|
init_parseUtil();
|
|
23212
23212
|
init_util2();
|
|
23213
23213
|
ParseInputLazyPath = class {
|
|
23214
|
-
constructor(parent, value,
|
|
23214
|
+
constructor(parent, value, path8, key) {
|
|
23215
23215
|
this._cachedPath = [];
|
|
23216
23216
|
this.parent = parent;
|
|
23217
23217
|
this.data = value;
|
|
23218
|
-
this._path =
|
|
23218
|
+
this._path = path8;
|
|
23219
23219
|
this._key = key;
|
|
23220
23220
|
}
|
|
23221
23221
|
get path() {
|
|
@@ -28921,8 +28921,8 @@ var require_token_util = __commonJS({
|
|
|
28921
28921
|
saveToken: () => saveToken
|
|
28922
28922
|
});
|
|
28923
28923
|
module2.exports = __toCommonJS(token_util_exports);
|
|
28924
|
-
var
|
|
28925
|
-
var
|
|
28924
|
+
var path8 = __toESM2(require("path"));
|
|
28925
|
+
var fs7 = __toESM2(require("fs"));
|
|
28926
28926
|
var import_token_error = require_token_error();
|
|
28927
28927
|
var import_token_io = require_token_io();
|
|
28928
28928
|
function getVercelDataDir() {
|
|
@@ -28931,18 +28931,18 @@ var require_token_util = __commonJS({
|
|
|
28931
28931
|
if (!dataDir) {
|
|
28932
28932
|
return null;
|
|
28933
28933
|
}
|
|
28934
|
-
return
|
|
28934
|
+
return path8.join(dataDir, vercelFolder);
|
|
28935
28935
|
}
|
|
28936
28936
|
function getVercelCliToken() {
|
|
28937
28937
|
const dataDir = getVercelDataDir();
|
|
28938
28938
|
if (!dataDir) {
|
|
28939
28939
|
return null;
|
|
28940
28940
|
}
|
|
28941
|
-
const tokenPath =
|
|
28942
|
-
if (!
|
|
28941
|
+
const tokenPath = path8.join(dataDir, "auth.json");
|
|
28942
|
+
if (!fs7.existsSync(tokenPath)) {
|
|
28943
28943
|
return null;
|
|
28944
28944
|
}
|
|
28945
|
-
const token =
|
|
28945
|
+
const token = fs7.readFileSync(tokenPath, "utf8");
|
|
28946
28946
|
if (!token) {
|
|
28947
28947
|
return null;
|
|
28948
28948
|
}
|
|
@@ -28983,11 +28983,11 @@ var require_token_util = __commonJS({
|
|
|
28983
28983
|
throw new import_token_error.VercelOidcTokenError("Unable to find root directory");
|
|
28984
28984
|
}
|
|
28985
28985
|
try {
|
|
28986
|
-
const prjPath =
|
|
28987
|
-
if (!
|
|
28986
|
+
const prjPath = path8.join(dir, ".vercel", "project.json");
|
|
28987
|
+
if (!fs7.existsSync(prjPath)) {
|
|
28988
28988
|
throw new import_token_error.VercelOidcTokenError("project.json not found");
|
|
28989
28989
|
}
|
|
28990
|
-
const prj = JSON.parse(
|
|
28990
|
+
const prj = JSON.parse(fs7.readFileSync(prjPath, "utf8"));
|
|
28991
28991
|
if (typeof prj.projectId !== "string" && typeof prj.orgId !== "string") {
|
|
28992
28992
|
throw new TypeError("Expected a string-valued projectId property");
|
|
28993
28993
|
}
|
|
@@ -29002,11 +29002,11 @@ var require_token_util = __commonJS({
|
|
|
29002
29002
|
if (!dir) {
|
|
29003
29003
|
throw new import_token_error.VercelOidcTokenError("Unable to find user data directory");
|
|
29004
29004
|
}
|
|
29005
|
-
const tokenPath =
|
|
29005
|
+
const tokenPath = path8.join(dir, "com.vercel.token", `${projectId}.json`);
|
|
29006
29006
|
const tokenJson = JSON.stringify(token);
|
|
29007
|
-
|
|
29008
|
-
|
|
29009
|
-
|
|
29007
|
+
fs7.mkdirSync(path8.dirname(tokenPath), { mode: 504, recursive: true });
|
|
29008
|
+
fs7.writeFileSync(tokenPath, tokenJson);
|
|
29009
|
+
fs7.chmodSync(tokenPath, 432);
|
|
29010
29010
|
return;
|
|
29011
29011
|
} catch (e) {
|
|
29012
29012
|
throw new import_token_error.VercelOidcTokenError(`Failed to save token`, e);
|
|
@@ -29018,11 +29018,11 @@ var require_token_util = __commonJS({
|
|
|
29018
29018
|
if (!dir) {
|
|
29019
29019
|
return null;
|
|
29020
29020
|
}
|
|
29021
|
-
const tokenPath =
|
|
29022
|
-
if (!
|
|
29021
|
+
const tokenPath = path8.join(dir, "com.vercel.token", `${projectId}.json`);
|
|
29022
|
+
if (!fs7.existsSync(tokenPath)) {
|
|
29023
29023
|
return null;
|
|
29024
29024
|
}
|
|
29025
|
-
const token = JSON.parse(
|
|
29025
|
+
const token = JSON.parse(fs7.readFileSync(tokenPath, "utf8"));
|
|
29026
29026
|
assertVercelOidcTokenResponse(token);
|
|
29027
29027
|
return token;
|
|
29028
29028
|
} catch (e) {
|
|
@@ -42982,7 +42982,7 @@ function createXai(options = {}) {
|
|
|
42982
42982
|
const createImageModel = (modelId) => {
|
|
42983
42983
|
return new OpenAICompatibleImageModel(modelId, {
|
|
42984
42984
|
provider: "xai.image",
|
|
42985
|
-
url: ({ path:
|
|
42985
|
+
url: ({ path: path8 }) => `${baseURL}${path8}`,
|
|
42986
42986
|
headers: getHeaders,
|
|
42987
42987
|
fetch: options.fetch,
|
|
42988
42988
|
errorStructure: xaiErrorStructure
|
|
@@ -51227,37 +51227,37 @@ function createOpenAI(options = {}) {
|
|
|
51227
51227
|
);
|
|
51228
51228
|
const createChatModel = (modelId) => new OpenAIChatLanguageModel(modelId, {
|
|
51229
51229
|
provider: `${providerName}.chat`,
|
|
51230
|
-
url: ({ path:
|
|
51230
|
+
url: ({ path: path8 }) => `${baseURL}${path8}`,
|
|
51231
51231
|
headers: getHeaders,
|
|
51232
51232
|
fetch: options.fetch
|
|
51233
51233
|
});
|
|
51234
51234
|
const createCompletionModel = (modelId) => new OpenAICompletionLanguageModel(modelId, {
|
|
51235
51235
|
provider: `${providerName}.completion`,
|
|
51236
|
-
url: ({ path:
|
|
51236
|
+
url: ({ path: path8 }) => `${baseURL}${path8}`,
|
|
51237
51237
|
headers: getHeaders,
|
|
51238
51238
|
fetch: options.fetch
|
|
51239
51239
|
});
|
|
51240
51240
|
const createEmbeddingModel = (modelId) => new OpenAIEmbeddingModel(modelId, {
|
|
51241
51241
|
provider: `${providerName}.embedding`,
|
|
51242
|
-
url: ({ path:
|
|
51242
|
+
url: ({ path: path8 }) => `${baseURL}${path8}`,
|
|
51243
51243
|
headers: getHeaders,
|
|
51244
51244
|
fetch: options.fetch
|
|
51245
51245
|
});
|
|
51246
51246
|
const createImageModel = (modelId) => new OpenAIImageModel(modelId, {
|
|
51247
51247
|
provider: `${providerName}.image`,
|
|
51248
|
-
url: ({ path:
|
|
51248
|
+
url: ({ path: path8 }) => `${baseURL}${path8}`,
|
|
51249
51249
|
headers: getHeaders,
|
|
51250
51250
|
fetch: options.fetch
|
|
51251
51251
|
});
|
|
51252
51252
|
const createTranscriptionModel = (modelId) => new OpenAITranscriptionModel(modelId, {
|
|
51253
51253
|
provider: `${providerName}.transcription`,
|
|
51254
|
-
url: ({ path:
|
|
51254
|
+
url: ({ path: path8 }) => `${baseURL}${path8}`,
|
|
51255
51255
|
headers: getHeaders,
|
|
51256
51256
|
fetch: options.fetch
|
|
51257
51257
|
});
|
|
51258
51258
|
const createSpeechModel = (modelId) => new OpenAISpeechModel(modelId, {
|
|
51259
51259
|
provider: `${providerName}.speech`,
|
|
51260
|
-
url: ({ path:
|
|
51260
|
+
url: ({ path: path8 }) => `${baseURL}${path8}`,
|
|
51261
51261
|
headers: getHeaders,
|
|
51262
51262
|
fetch: options.fetch
|
|
51263
51263
|
});
|
|
@@ -51272,7 +51272,7 @@ function createOpenAI(options = {}) {
|
|
|
51272
51272
|
const createResponsesModel = (modelId) => {
|
|
51273
51273
|
return new OpenAIResponsesLanguageModel(modelId, {
|
|
51274
51274
|
provider: `${providerName}.responses`,
|
|
51275
|
-
url: ({ path:
|
|
51275
|
+
url: ({ path: path8 }) => `${baseURL}${path8}`,
|
|
51276
51276
|
headers: getHeaders,
|
|
51277
51277
|
fetch: options.fetch,
|
|
51278
51278
|
fileIdPrefixes: ["file-"]
|
|
@@ -55600,8 +55600,8 @@ var init_dist11 = __esm({
|
|
|
55600
55600
|
// src/run.ts
|
|
55601
55601
|
var import_node_child_process2 = require("node:child_process");
|
|
55602
55602
|
var import_node_fs6 = require("node:fs");
|
|
55603
|
-
var
|
|
55604
|
-
var
|
|
55603
|
+
var import_promises5 = __toESM(require("node:fs/promises"), 1);
|
|
55604
|
+
var import_node_path8 = __toESM(require("node:path"), 1);
|
|
55605
55605
|
|
|
55606
55606
|
// node_modules/.pnpm/commander@14.0.2/node_modules/commander/esm.mjs
|
|
55607
55607
|
var import_index = __toESM(require_commander(), 1);
|
|
@@ -64632,7 +64632,7 @@ function transformGfmAutolinkLiterals(tree) {
|
|
|
64632
64632
|
{ ignore: ["link", "linkReference"] }
|
|
64633
64633
|
);
|
|
64634
64634
|
}
|
|
64635
|
-
function findUrl(_, protocol, domain3,
|
|
64635
|
+
function findUrl(_, protocol, domain3, path8, match) {
|
|
64636
64636
|
let prefix = "";
|
|
64637
64637
|
if (!previous2(match)) {
|
|
64638
64638
|
return false;
|
|
@@ -64645,7 +64645,7 @@ function findUrl(_, protocol, domain3, path7, match) {
|
|
|
64645
64645
|
if (!isCorrectDomain(domain3)) {
|
|
64646
64646
|
return false;
|
|
64647
64647
|
}
|
|
64648
|
-
const parts = splitUrl(domain3 +
|
|
64648
|
+
const parts = splitUrl(domain3 + path8);
|
|
64649
64649
|
if (!parts[0]) return false;
|
|
64650
64650
|
const result = {
|
|
64651
64651
|
type: "link",
|
|
@@ -67598,25 +67598,25 @@ function isRecord2(value) {
|
|
|
67598
67598
|
function loadSummarizeConfig({ env: env3 }) {
|
|
67599
67599
|
const home = env3.HOME?.trim() || (0, import_node_os3.homedir)();
|
|
67600
67600
|
if (!home) return { config: null, path: null };
|
|
67601
|
-
const
|
|
67601
|
+
const path8 = (0, import_node_path.join)(home, ".summarize", "config.json");
|
|
67602
67602
|
let raw;
|
|
67603
67603
|
try {
|
|
67604
|
-
raw = (0, import_node_fs.readFileSync)(
|
|
67604
|
+
raw = (0, import_node_fs.readFileSync)(path8, "utf8");
|
|
67605
67605
|
} catch {
|
|
67606
|
-
return { config: null, path:
|
|
67606
|
+
return { config: null, path: path8 };
|
|
67607
67607
|
}
|
|
67608
67608
|
let parsed;
|
|
67609
67609
|
try {
|
|
67610
67610
|
parsed = JSON.parse(raw);
|
|
67611
67611
|
} catch (error47) {
|
|
67612
67612
|
const message = error47 instanceof Error ? error47.message : String(error47);
|
|
67613
|
-
throw new Error(`Invalid JSON in config file ${
|
|
67613
|
+
throw new Error(`Invalid JSON in config file ${path8}: ${message}`);
|
|
67614
67614
|
}
|
|
67615
67615
|
if (!isRecord2(parsed)) {
|
|
67616
|
-
throw new Error(`Invalid config file ${
|
|
67616
|
+
throw new Error(`Invalid config file ${path8}: expected an object at the top level`);
|
|
67617
67617
|
}
|
|
67618
67618
|
const model = typeof parsed.model === "string" ? parsed.model : void 0;
|
|
67619
|
-
return { config: { model }, path:
|
|
67619
|
+
return { config: { model }, path: path8 };
|
|
67620
67620
|
}
|
|
67621
67621
|
|
|
67622
67622
|
// src/content/asset.ts
|
|
@@ -71757,12 +71757,12 @@ var Mime = class {
|
|
|
71757
71757
|
}
|
|
71758
71758
|
return this;
|
|
71759
71759
|
}
|
|
71760
|
-
getType(
|
|
71761
|
-
if (typeof
|
|
71760
|
+
getType(path8) {
|
|
71761
|
+
if (typeof path8 !== "string")
|
|
71762
71762
|
return null;
|
|
71763
|
-
const last =
|
|
71763
|
+
const last = path8.replace(/^.*[/\\]/s, "").toLowerCase();
|
|
71764
71764
|
const ext = last.replace(/^.*\./s, "").toLowerCase();
|
|
71765
|
-
const hasPath = last.length <
|
|
71765
|
+
const hasPath = last.length < path8.length;
|
|
71766
71766
|
const hasDot = ext.length < last.length - 1;
|
|
71767
71767
|
if (!hasDot && hasPath)
|
|
71768
71768
|
return null;
|
|
@@ -72458,9 +72458,9 @@ var fetchTranscriptFromTranscriptEndpoint = async (fetchImpl, {
|
|
|
72458
72458
|
return null;
|
|
72459
72459
|
}
|
|
72460
72460
|
};
|
|
72461
|
-
function getNestedProperty(object3,
|
|
72461
|
+
function getNestedProperty(object3, path8) {
|
|
72462
72462
|
let current = object3;
|
|
72463
|
-
for (const key of
|
|
72463
|
+
for (const key of path8) {
|
|
72464
72464
|
if (!(isRecord3(current) && key in current)) {
|
|
72465
72465
|
return null;
|
|
72466
72466
|
}
|
|
@@ -72468,8 +72468,8 @@ function getNestedProperty(object3, path7) {
|
|
|
72468
72468
|
}
|
|
72469
72469
|
return current;
|
|
72470
72470
|
}
|
|
72471
|
-
function getArrayProperty(object3,
|
|
72472
|
-
const value = getNestedProperty(object3,
|
|
72471
|
+
function getArrayProperty(object3, path8) {
|
|
72472
|
+
const value = getNestedProperty(object3, path8);
|
|
72473
72473
|
return Array.isArray(value) ? value : null;
|
|
72474
72474
|
}
|
|
72475
72475
|
var extractTranscriptFromTranscriptEndpoint = (data) => {
|
|
@@ -74662,7 +74662,21 @@ function parseFirecrawlMode(raw) {
|
|
|
74662
74662
|
function parseMarkdownMode(raw) {
|
|
74663
74663
|
const normalized = raw.trim().toLowerCase();
|
|
74664
74664
|
if (normalized === "off" || normalized === "auto" || normalized === "llm") return normalized;
|
|
74665
|
-
throw new Error(`Unsupported --markdown: ${raw}`);
|
|
74665
|
+
throw new Error(`Unsupported --markdown-mode: ${raw}`);
|
|
74666
|
+
}
|
|
74667
|
+
function parseExtractFormat(raw) {
|
|
74668
|
+
const normalized = raw.trim().toLowerCase();
|
|
74669
|
+
if (normalized === "text" || normalized === "txt" || normalized === "plain") return "text";
|
|
74670
|
+
if (normalized === "md" || normalized === "markdown") return "markdown";
|
|
74671
|
+
throw new Error(`Unsupported --format: ${raw}`);
|
|
74672
|
+
}
|
|
74673
|
+
function parsePreprocessMode(raw) {
|
|
74674
|
+
const normalized = raw.trim().toLowerCase();
|
|
74675
|
+
if (normalized === "off" || normalized === "auto" || normalized === "always") {
|
|
74676
|
+
return normalized;
|
|
74677
|
+
}
|
|
74678
|
+
if (normalized === "on") return "always";
|
|
74679
|
+
throw new Error(`Unsupported --preprocess: ${raw}`);
|
|
74666
74680
|
}
|
|
74667
74681
|
function parseStreamMode(raw) {
|
|
74668
74682
|
const normalized = raw.trim().toLowerCase();
|
|
@@ -75336,37 +75350,102 @@ function createHtmlToMarkdownConverter({
|
|
|
75336
75350
|
};
|
|
75337
75351
|
}
|
|
75338
75352
|
|
|
75353
|
+
// src/markitdown.ts
|
|
75354
|
+
var import_promises3 = __toESM(require("node:fs/promises"), 1);
|
|
75355
|
+
var import_node_os5 = require("node:os");
|
|
75356
|
+
var import_node_path4 = __toESM(require("node:path"), 1);
|
|
75357
|
+
function guessExtension({
|
|
75358
|
+
filenameHint,
|
|
75359
|
+
mediaType
|
|
75360
|
+
}) {
|
|
75361
|
+
const ext = filenameHint ? import_node_path4.default.extname(filenameHint).toLowerCase() : "";
|
|
75362
|
+
if (ext) return ext;
|
|
75363
|
+
if (mediaType === "text/html" || mediaType === "application/xhtml+xml") return ".html";
|
|
75364
|
+
if (mediaType === "application/pdf") return ".pdf";
|
|
75365
|
+
return ".bin";
|
|
75366
|
+
}
|
|
75367
|
+
async function execFileText(execFileImpl, cmd, args, options) {
|
|
75368
|
+
return await new Promise((resolve2, reject) => {
|
|
75369
|
+
execFileImpl(cmd, args, options, (error47, stdout, stderr) => {
|
|
75370
|
+
if (error47) {
|
|
75371
|
+
const stderrText2 = typeof stderr === "string" ? stderr : stderr.toString("utf8");
|
|
75372
|
+
const message = stderrText2.trim() ? `${error47.message}: ${stderrText2.trim()}` : error47.message;
|
|
75373
|
+
reject(new Error(message, { cause: error47 }));
|
|
75374
|
+
return;
|
|
75375
|
+
}
|
|
75376
|
+
const stdoutText = typeof stdout === "string" ? stdout : stdout.toString("utf8");
|
|
75377
|
+
const stderrText = typeof stderr === "string" ? stderr : stderr.toString("utf8");
|
|
75378
|
+
resolve2({ stdout: stdoutText, stderr: stderrText });
|
|
75379
|
+
});
|
|
75380
|
+
});
|
|
75381
|
+
}
|
|
75382
|
+
async function convertToMarkdownWithMarkitdown({
|
|
75383
|
+
bytes,
|
|
75384
|
+
filenameHint,
|
|
75385
|
+
mediaTypeHint,
|
|
75386
|
+
uvxCommand,
|
|
75387
|
+
timeoutMs,
|
|
75388
|
+
env: env3,
|
|
75389
|
+
execFileImpl
|
|
75390
|
+
}) {
|
|
75391
|
+
const dir = await import_promises3.default.mkdtemp(import_node_path4.default.join((0, import_node_os5.tmpdir)(), "summarize-markitdown-"));
|
|
75392
|
+
const ext = guessExtension({ filenameHint, mediaType: mediaTypeHint });
|
|
75393
|
+
const base2 = (filenameHint ? import_node_path4.default.basename(filenameHint, import_node_path4.default.extname(filenameHint)) : "input").replaceAll(/[^\w.-]+/g, "-").slice(0, 64);
|
|
75394
|
+
const filePath = import_node_path4.default.join(dir, `${base2}${ext}`);
|
|
75395
|
+
try {
|
|
75396
|
+
await import_promises3.default.writeFile(filePath, bytes);
|
|
75397
|
+
const from = "markitdown[all]";
|
|
75398
|
+
const { stdout } = await execFileText(
|
|
75399
|
+
execFileImpl,
|
|
75400
|
+
uvxCommand && uvxCommand.trim().length > 0 ? uvxCommand.trim() : "uvx",
|
|
75401
|
+
["--from", from, "markitdown", filePath],
|
|
75402
|
+
{
|
|
75403
|
+
timeout: timeoutMs,
|
|
75404
|
+
env: { ...process.env, ...env3 },
|
|
75405
|
+
maxBuffer: 50 * 1024 * 1024
|
|
75406
|
+
}
|
|
75407
|
+
);
|
|
75408
|
+
const markdown = stdout.trim();
|
|
75409
|
+
if (!markdown) {
|
|
75410
|
+
throw new Error("markitdown returned empty output");
|
|
75411
|
+
}
|
|
75412
|
+
return markdown;
|
|
75413
|
+
} finally {
|
|
75414
|
+
await import_promises3.default.rm(dir, { recursive: true, force: true });
|
|
75415
|
+
}
|
|
75416
|
+
}
|
|
75417
|
+
|
|
75339
75418
|
// src/pricing/litellm.ts
|
|
75340
|
-
var
|
|
75419
|
+
var import_node_path6 = __toESM(require("node:path"), 1);
|
|
75341
75420
|
|
|
75342
75421
|
// node_modules/.pnpm/tokentally@https+++codeload.github.com+steipete+tokentally+tar.gz+99865e5c16f5340c9589a2c5d85c3ea47dbcec82/node_modules/tokentally/dist/node/litellm.js
|
|
75343
75422
|
var import_node_fs4 = require("node:fs");
|
|
75344
|
-
var
|
|
75345
|
-
var
|
|
75423
|
+
var import_promises4 = __toESM(require("node:fs/promises"), 1);
|
|
75424
|
+
var import_node_path5 = __toESM(require("node:path"), 1);
|
|
75346
75425
|
var LITELLM_CATALOG_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json";
|
|
75347
75426
|
var CACHE_TTL_MS = 7 * 24 * 60 * 60 * 1e3;
|
|
75348
75427
|
function cachePaths(env3) {
|
|
75349
75428
|
const override = env3.TOKENTALLY_CACHE_DIR?.trim();
|
|
75350
75429
|
const home = env3.HOME?.trim();
|
|
75351
|
-
const cacheDir = override && override.length > 0 ? override : home ?
|
|
75430
|
+
const cacheDir = override && override.length > 0 ? override : home ? import_node_path5.default.join(home, ".tokentally", "cache") : null;
|
|
75352
75431
|
if (!cacheDir)
|
|
75353
75432
|
return null;
|
|
75354
75433
|
return {
|
|
75355
|
-
catalogPath:
|
|
75356
|
-
metaPath:
|
|
75434
|
+
catalogPath: import_node_path5.default.join(cacheDir, "litellm-model_prices_and_context_window.json"),
|
|
75435
|
+
metaPath: import_node_path5.default.join(cacheDir, "litellm-model_prices_and_context_window.meta.json")
|
|
75357
75436
|
};
|
|
75358
75437
|
}
|
|
75359
75438
|
async function readJsonFile(filePath) {
|
|
75360
75439
|
try {
|
|
75361
|
-
const raw = await
|
|
75440
|
+
const raw = await import_promises4.default.readFile(filePath, "utf8");
|
|
75362
75441
|
return JSON.parse(raw);
|
|
75363
75442
|
} catch {
|
|
75364
75443
|
return null;
|
|
75365
75444
|
}
|
|
75366
75445
|
}
|
|
75367
75446
|
async function writeJsonFile(filePath, value) {
|
|
75368
|
-
await
|
|
75369
|
-
await
|
|
75447
|
+
await import_promises4.default.mkdir(import_node_path5.default.dirname(filePath), { recursive: true });
|
|
75448
|
+
await import_promises4.default.writeFile(filePath, `${JSON.stringify(value, null, 2)}
|
|
75370
75449
|
`, "utf8");
|
|
75371
75450
|
}
|
|
75372
75451
|
function isStale(meta3, nowMs) {
|
|
@@ -75509,7 +75588,7 @@ function withDefaultCacheDir(env3) {
|
|
|
75509
75588
|
}
|
|
75510
75589
|
const home = env3.HOME?.trim();
|
|
75511
75590
|
if (!home) return env3;
|
|
75512
|
-
return { ...env3, TOKENTALLY_CACHE_DIR:
|
|
75591
|
+
return { ...env3, TOKENTALLY_CACHE_DIR: import_node_path6.default.join(home, ".summarize", "cache") };
|
|
75513
75592
|
}
|
|
75514
75593
|
async function loadLiteLlmCatalog2({
|
|
75515
75594
|
env: env3,
|
|
@@ -75554,6 +75633,27 @@ ${headerLines.length > 0 ? `${headerLines.join("\n")}
|
|
|
75554
75633
|
` : ""}Return only the summary.`;
|
|
75555
75634
|
return prompt;
|
|
75556
75635
|
}
|
|
75636
|
+
function buildFileTextSummaryPrompt({
|
|
75637
|
+
filename,
|
|
75638
|
+
originalMediaType,
|
|
75639
|
+
contentMediaType,
|
|
75640
|
+
summaryLength,
|
|
75641
|
+
contentLength
|
|
75642
|
+
}) {
|
|
75643
|
+
const effectiveSummaryLength = typeof summaryLength === "string" ? summaryLength : summaryLength.maxCharacters > contentLength ? { maxCharacters: contentLength } : summaryLength;
|
|
75644
|
+
const maxCharactersLine = typeof effectiveSummaryLength === "string" ? "" : `Target length: around ${effectiveSummaryLength.maxCharacters.toLocaleString()} characters total (including Markdown and whitespace). This is a soft guideline; prioritize clarity.`;
|
|
75645
|
+
const headerLines = [
|
|
75646
|
+
filename ? `Filename: ${filename}` : null,
|
|
75647
|
+
originalMediaType ? `Original media type: ${originalMediaType}` : null,
|
|
75648
|
+
`Provided as: ${contentMediaType}`,
|
|
75649
|
+
`Extracted content length: ${contentLength.toLocaleString()} characters. Do not exceed the extracted content length; if the requested length is larger, keep the summary at or below the extracted content length and do not add details.`
|
|
75650
|
+
].filter(Boolean);
|
|
75651
|
+
return `You summarize files for curious users. Summarize the file content below. Be factual and do not invent details. Format the answer in Markdown. Do not use emojis. ${maxCharactersLine}
|
|
75652
|
+
|
|
75653
|
+
${headerLines.length > 0 ? `${headerLines.join("\n")}
|
|
75654
|
+
|
|
75655
|
+
` : ""}Return only the summary.`;
|
|
75656
|
+
}
|
|
75557
75657
|
|
|
75558
75658
|
// src/prompts/link-summary.ts
|
|
75559
75659
|
var SUMMARY_LENGTH_DIRECTIVES = {
|
|
@@ -78283,16 +78383,16 @@ function startSpinner({
|
|
|
78283
78383
|
|
|
78284
78384
|
// src/version.ts
|
|
78285
78385
|
var import_node_fs5 = __toESM(require("node:fs"), 1);
|
|
78286
|
-
var
|
|
78386
|
+
var import_node_path7 = __toESM(require("node:path"), 1);
|
|
78287
78387
|
var import_node_url = require("node:url");
|
|
78288
|
-
var FALLBACK_VERSION = "0.
|
|
78388
|
+
var FALLBACK_VERSION = "0.4.0";
|
|
78289
78389
|
function resolvePackageVersion(importMetaUrl) {
|
|
78290
78390
|
const injected = typeof process !== "undefined" && typeof process.env.SUMMARIZE_VERSION === "string" ? process.env.SUMMARIZE_VERSION.trim() : "";
|
|
78291
78391
|
if (injected.length > 0) return injected;
|
|
78292
78392
|
const startDir = (() => {
|
|
78293
78393
|
if (typeof importMetaUrl === "string" && importMetaUrl.trim().length > 0) {
|
|
78294
78394
|
try {
|
|
78295
|
-
return
|
|
78395
|
+
return import_node_path7.default.dirname((0, import_node_url.fileURLToPath)(importMetaUrl));
|
|
78296
78396
|
} catch {
|
|
78297
78397
|
}
|
|
78298
78398
|
}
|
|
@@ -78301,7 +78401,7 @@ function resolvePackageVersion(importMetaUrl) {
|
|
|
78301
78401
|
})();
|
|
78302
78402
|
let dir = startDir;
|
|
78303
78403
|
for (let i = 0; i < 10; i += 1) {
|
|
78304
|
-
const candidate =
|
|
78404
|
+
const candidate = import_node_path7.default.join(dir, "package.json");
|
|
78305
78405
|
try {
|
|
78306
78406
|
const raw = import_node_fs5.default.readFileSync(candidate, "utf8");
|
|
78307
78407
|
const json3 = JSON.parse(raw);
|
|
@@ -78310,7 +78410,7 @@ function resolvePackageVersion(importMetaUrl) {
|
|
|
78310
78410
|
}
|
|
78311
78411
|
} catch {
|
|
78312
78412
|
}
|
|
78313
|
-
const parent =
|
|
78413
|
+
const parent = import_node_path7.default.dirname(dir);
|
|
78314
78414
|
if (parent === dir) break;
|
|
78315
78415
|
dir = parent;
|
|
78316
78416
|
}
|
|
@@ -78319,6 +78419,7 @@ function resolvePackageVersion(importMetaUrl) {
|
|
|
78319
78419
|
|
|
78320
78420
|
// src/run.ts
|
|
78321
78421
|
var BIRD_TIP = "Tip: Install bird\u{1F426} for better Twitter support: https://github.com/steipete/bird";
|
|
78422
|
+
var UVX_TIP = "Tip: Install uv (uvx) for local Markdown conversion: brew install uv (or set UVX_PATH to your uvx binary).";
|
|
78322
78423
|
var TWITTER_HOSTS2 = /* @__PURE__ */ new Set(["x.com", "twitter.com", "mobile.twitter.com"]);
|
|
78323
78424
|
var SUMMARY_LENGTH_MAX_CHARACTERS = {
|
|
78324
78425
|
short: 1200,
|
|
@@ -78350,10 +78451,22 @@ function isExecutable(filePath) {
|
|
|
78350
78451
|
}
|
|
78351
78452
|
function hasBirdCli(env3) {
|
|
78352
78453
|
const candidates = [];
|
|
78353
|
-
const pathEnv = env3.PATH ??
|
|
78354
|
-
for (const entry of pathEnv.split(
|
|
78454
|
+
const pathEnv = env3.PATH ?? "";
|
|
78455
|
+
for (const entry of pathEnv.split(import_node_path8.default.delimiter)) {
|
|
78355
78456
|
if (!entry) continue;
|
|
78356
|
-
candidates.push(
|
|
78457
|
+
candidates.push(import_node_path8.default.join(entry, "bird"));
|
|
78458
|
+
}
|
|
78459
|
+
return candidates.some((candidate) => isExecutable(candidate));
|
|
78460
|
+
}
|
|
78461
|
+
function hasUvxCli(env3) {
|
|
78462
|
+
if (typeof env3.UVX_PATH === "string" && env3.UVX_PATH.trim().length > 0) {
|
|
78463
|
+
return true;
|
|
78464
|
+
}
|
|
78465
|
+
const candidates = [];
|
|
78466
|
+
const pathEnv = env3.PATH ?? "";
|
|
78467
|
+
for (const entry of pathEnv.split(import_node_path8.default.delimiter)) {
|
|
78468
|
+
if (!entry) continue;
|
|
78469
|
+
candidates.push(import_node_path8.default.join(entry, "uvx"));
|
|
78357
78470
|
}
|
|
78358
78471
|
return candidates.some((candidate) => isExecutable(candidate));
|
|
78359
78472
|
}
|
|
@@ -78404,6 +78517,15 @@ function withBirdTip(error47, url2, env3) {
|
|
|
78404
78517
|
${BIRD_TIP}`;
|
|
78405
78518
|
return error47 instanceof Error ? new Error(combined, { cause: error47 }) : new Error(combined);
|
|
78406
78519
|
}
|
|
78520
|
+
function withUvxTip(error47, env3) {
|
|
78521
|
+
if (hasUvxCli(env3)) {
|
|
78522
|
+
return error47 instanceof Error ? error47 : new Error(String(error47));
|
|
78523
|
+
}
|
|
78524
|
+
const message = error47 instanceof Error ? error47.message : String(error47);
|
|
78525
|
+
const combined = `${message}
|
|
78526
|
+
${UVX_TIP}`;
|
|
78527
|
+
return error47 instanceof Error ? new Error(combined, { cause: error47 }) : new Error(combined);
|
|
78528
|
+
}
|
|
78407
78529
|
var MAX_TEXT_BYTES_DEFAULT = 10 * 1024 * 1024;
|
|
78408
78530
|
function buildProgram() {
|
|
78409
78531
|
return new Command().name("summarize").description("Summarize web pages and YouTube links (uses direct provider API keys).").argument("[input]", "URL or local file path to summarize").option(
|
|
@@ -78412,12 +78534,27 @@ function buildProgram() {
|
|
|
78412
78534
|
"auto"
|
|
78413
78535
|
).option(
|
|
78414
78536
|
"--firecrawl <mode>",
|
|
78415
|
-
"Firecrawl usage: off, auto (fallback), always (try Firecrawl first).",
|
|
78537
|
+
"Firecrawl usage: off, auto (fallback), always (try Firecrawl first). Note: in --format md website mode, defaults to always when FIRECRAWL_API_KEY is set (unless --firecrawl is set explicitly).",
|
|
78416
78538
|
"auto"
|
|
78417
78539
|
).option(
|
|
78418
|
-
"--
|
|
78419
|
-
"Website
|
|
78420
|
-
"
|
|
78540
|
+
"--format <format>",
|
|
78541
|
+
"Website/file content format: md|text. For websites: controls the extraction format. For files: controls whether we try to preprocess to Markdown for model compatibility. (default: text)",
|
|
78542
|
+
"text"
|
|
78543
|
+
).addOption(
|
|
78544
|
+
new Option(
|
|
78545
|
+
"--preprocess <mode>",
|
|
78546
|
+
"Preprocess inputs for model compatibility: off, auto (fallback), always."
|
|
78547
|
+
).choices(["off", "auto", "always"]).default("auto")
|
|
78548
|
+
).addOption(
|
|
78549
|
+
new Option(
|
|
78550
|
+
"--markdown-mode <mode>",
|
|
78551
|
+
"HTML\u2192Markdown conversion: off, auto (prefer Firecrawl when configured, then LLM when configured, then markitdown when available), llm (force LLM). Only affects --format md for non-YouTube URLs."
|
|
78552
|
+
).default("auto")
|
|
78553
|
+
).addOption(
|
|
78554
|
+
new Option(
|
|
78555
|
+
"--markdown <mode>",
|
|
78556
|
+
"Deprecated alias for --markdown-mode (use --extract --format md --markdown-mode ...)"
|
|
78557
|
+
).hideHelp()
|
|
78421
78558
|
).option(
|
|
78422
78559
|
"--length <length>",
|
|
78423
78560
|
"Summary length: short|medium|long|xl|xxl or a character limit like 20000, 20k",
|
|
@@ -78434,7 +78571,7 @@ function buildProgram() {
|
|
|
78434
78571
|
"--model <model>",
|
|
78435
78572
|
"LLM model id (gateway-style): xai/..., openai/..., google/... (default: google/gemini-3-flash-preview)",
|
|
78436
78573
|
void 0
|
|
78437
|
-
).option("--extract
|
|
78574
|
+
).option("--extract", "Print extracted content and exit (no LLM summary)", false).addOption(new Option("--extract-only", "Deprecated alias for --extract").hideHelp()).option("--json", "Output structured JSON (includes prompt + metrics)", false).option(
|
|
78438
78575
|
"--stream <mode>",
|
|
78439
78576
|
"Stream LLM output: auto (TTY only), on, off. Note: streaming is disabled in --json mode.",
|
|
78440
78577
|
"auto"
|
|
@@ -78550,6 +78687,22 @@ function getTextContentFromAttachment(attachment) {
|
|
|
78550
78687
|
}
|
|
78551
78688
|
return { content: "", bytes: 0 };
|
|
78552
78689
|
}
|
|
78690
|
+
function getFileBytesFromAttachment(attachment) {
|
|
78691
|
+
if (attachment.part.type !== "file") return null;
|
|
78692
|
+
const data = attachment.part.data;
|
|
78693
|
+
return data instanceof Uint8Array ? data : null;
|
|
78694
|
+
}
|
|
78695
|
+
function shouldMarkitdownConvertMediaType(mediaType) {
|
|
78696
|
+
const mt = mediaType.toLowerCase();
|
|
78697
|
+
if (mt === "application/pdf") return true;
|
|
78698
|
+
if (mt === "application/rtf") return true;
|
|
78699
|
+
if (mt === "text/html" || mt === "application/xhtml+xml") return true;
|
|
78700
|
+
if (mt === "application/msword") return true;
|
|
78701
|
+
if (mt.startsWith("application/vnd.openxmlformats-officedocument.")) return true;
|
|
78702
|
+
if (mt === "application/vnd.ms-excel") return true;
|
|
78703
|
+
if (mt === "application/vnd.ms-powerpoint") return true;
|
|
78704
|
+
return false;
|
|
78705
|
+
}
|
|
78553
78706
|
function assertProviderSupportsAttachment({
|
|
78554
78707
|
provider,
|
|
78555
78708
|
modelId,
|
|
@@ -78613,9 +78766,10 @@ function attachRichHelp(program2, env3, stdout) {
|
|
|
78613
78766
|
() => `
|
|
78614
78767
|
${heading("Examples")}
|
|
78615
78768
|
${cmd('summarize "https://example.com"')}
|
|
78616
|
-
${cmd('summarize "https://example.com" --extract
|
|
78617
|
-
${cmd('summarize "https://example.com" --extract
|
|
78618
|
-
${cmd('summarize "https://
|
|
78769
|
+
${cmd('summarize "https://example.com" --extract')} ${dim3("# extracted plain text")}
|
|
78770
|
+
${cmd('summarize "https://example.com" --extract --format md')} ${dim3("# extracted markdown (prefers Firecrawl when configured)")}
|
|
78771
|
+
${cmd('summarize "https://example.com" --extract --format md --markdown-mode llm')} ${dim3("# extracted markdown via LLM")}
|
|
78772
|
+
${cmd('summarize "https://www.youtube.com/watch?v=I845O57ZSy4&t=11s" --extract --youtube web')}
|
|
78619
78773
|
${cmd('summarize "https://example.com" --length 20k --max-output-tokens 2k --timeout 2m --model openai/gpt-5.2')}
|
|
78620
78774
|
${cmd('OPENROUTER_API_KEY=... summarize "https://example.com" --model openai/openai/gpt-oss-20b')}
|
|
78621
78775
|
${cmd('summarize "https://example.com" --json --verbose')}
|
|
@@ -78756,10 +78910,11 @@ function writeFinishLine({
|
|
|
78756
78910
|
stderr.write(`${ansi("1;32", line, color2)}
|
|
78757
78911
|
`);
|
|
78758
78912
|
}
|
|
78759
|
-
async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
|
|
78913
|
+
async function runCli(argv, { env: env3, fetch: fetch2, execFile: execFileOverride, stdout, stderr }) {
|
|
78760
78914
|
;
|
|
78761
78915
|
globalThis.AI_SDK_LOG_WARNINGS = false;
|
|
78762
78916
|
const normalizedArgv = argv.filter((arg) => arg !== "--");
|
|
78917
|
+
const execFileImpl = execFileOverride ?? import_node_child_process2.execFile;
|
|
78763
78918
|
const version2 = resolvePackageVersion();
|
|
78764
78919
|
const program2 = buildProgram();
|
|
78765
78920
|
program2.configureOutput({
|
|
@@ -78800,7 +78955,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
|
|
|
78800
78955
|
program2.opts().maxOutputTokens
|
|
78801
78956
|
);
|
|
78802
78957
|
const timeoutMs = parseDurationMs(program2.opts().timeout);
|
|
78803
|
-
const
|
|
78958
|
+
const extractMode = Boolean(program2.opts().extract) || Boolean(program2.opts().extractOnly);
|
|
78804
78959
|
const json3 = Boolean(program2.opts().json);
|
|
78805
78960
|
const streamMode = parseStreamMode(program2.opts().stream);
|
|
78806
78961
|
const renderMode = parseRenderMode(program2.opts().render);
|
|
@@ -78808,9 +78963,19 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
|
|
|
78808
78963
|
const metricsMode = parseMetricsMode(program2.opts().metrics);
|
|
78809
78964
|
const metricsEnabled = metricsMode !== "off";
|
|
78810
78965
|
const metricsDetailed = metricsMode === "detailed";
|
|
78811
|
-
const
|
|
78966
|
+
const preprocessMode = parsePreprocessMode(program2.opts().preprocess);
|
|
78967
|
+
const format2 = parseExtractFormat(program2.opts().format);
|
|
78812
78968
|
const shouldComputeReport = metricsEnabled;
|
|
78813
78969
|
const isYoutubeUrl = typeof url2 === "string" ? /youtube\.com|youtu\.be/i.test(url2) : false;
|
|
78970
|
+
const firecrawlExplicitlySet = normalizedArgv.some(
|
|
78971
|
+
(arg) => arg === "--firecrawl" || arg.startsWith("--firecrawl=")
|
|
78972
|
+
);
|
|
78973
|
+
const markdownModeExplicitlySet = normalizedArgv.some(
|
|
78974
|
+
(arg) => arg === "--markdown-mode" || arg.startsWith("--markdown-mode=") || arg === "--markdown" || arg.startsWith("--markdown=")
|
|
78975
|
+
);
|
|
78976
|
+
const markdownMode = format2 === "markdown" ? parseMarkdownMode(
|
|
78977
|
+
program2.opts().markdownMode ?? program2.opts().markdown ?? "auto"
|
|
78978
|
+
) : "off";
|
|
78814
78979
|
const requestedFirecrawlMode = parseFirecrawlMode(program2.opts().firecrawl);
|
|
78815
78980
|
const modelArg = typeof program2.opts().model === "string" ? program2.opts().model : null;
|
|
78816
78981
|
const { config: config2, path: configPath } = loadSummarizeConfig({ env: env3 });
|
|
@@ -78839,6 +79004,12 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
|
|
|
78839
79004
|
const anthropicConfigured = typeof anthropicApiKey === "string" && anthropicApiKey.length > 0;
|
|
78840
79005
|
const openrouterConfigured = typeof openrouterApiKey === "string" && openrouterApiKey.length > 0;
|
|
78841
79006
|
const openrouterOptions = openRouterProviders ? { providers: openRouterProviders } : void 0;
|
|
79007
|
+
if (markdownModeExplicitlySet && format2 !== "markdown") {
|
|
79008
|
+
throw new Error("--markdown-mode is only supported with --format md");
|
|
79009
|
+
}
|
|
79010
|
+
if (markdownModeExplicitlySet && inputTarget.kind !== "url") {
|
|
79011
|
+
throw new Error("--markdown-mode is only supported for website URLs");
|
|
79012
|
+
}
|
|
78842
79013
|
const llmCalls = [];
|
|
78843
79014
|
let firecrawlRequests = 0;
|
|
78844
79015
|
let apifyRequests = 0;
|
|
@@ -78932,7 +79103,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
|
|
|
78932
79103
|
if (streamMode !== "auto") return streamMode;
|
|
78933
79104
|
return isRichTty(stdout) ? "on" : "off";
|
|
78934
79105
|
})();
|
|
78935
|
-
const streamingEnabled = effectiveStreamMode === "on" && !json3 && !
|
|
79106
|
+
const streamingEnabled = effectiveStreamMode === "on" && !json3 && !extractMode;
|
|
78936
79107
|
const effectiveRenderMode = (() => {
|
|
78937
79108
|
if (renderMode !== "auto") return renderMode;
|
|
78938
79109
|
if (!isRichTty(stdout)) return "plain";
|
|
@@ -78957,8 +79128,8 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
|
|
|
78957
79128
|
`
|
|
78958
79129
|
);
|
|
78959
79130
|
};
|
|
78960
|
-
if (
|
|
78961
|
-
throw new Error("--extract
|
|
79131
|
+
if (extractMode && inputTarget.kind !== "url") {
|
|
79132
|
+
throw new Error("--extract is only supported for website/YouTube URLs");
|
|
78962
79133
|
}
|
|
78963
79134
|
const progressEnabled = isRichTty(stderr) && !verbose && !json3;
|
|
78964
79135
|
let clearProgressBeforeStdout = null;
|
|
@@ -78987,11 +79158,6 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
|
|
|
78987
79158
|
`Missing ${requiredKeyEnv} for model ${parsedModel.canonical}. Set the env var or choose a different --model.`
|
|
78988
79159
|
);
|
|
78989
79160
|
}
|
|
78990
|
-
assertProviderSupportsAttachment({
|
|
78991
|
-
provider: parsedModel.provider,
|
|
78992
|
-
modelId: parsedModel.canonical,
|
|
78993
|
-
attachment: { part: attachment.part, mediaType: attachment.mediaType }
|
|
78994
|
-
});
|
|
78995
79161
|
const modelResolution = await resolveModelIdForLlmCall({
|
|
78996
79162
|
parsedModel,
|
|
78997
79163
|
apiKeys: { googleApiKey: apiKeysForLlm.googleApiKey },
|
|
@@ -79013,14 +79179,114 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
|
|
|
79013
79179
|
`Text file too large (${formatBytes(textContent.bytes)}). Limit is ${formatBytes(MAX_TEXT_BYTES_DEFAULT)}.`
|
|
79014
79180
|
);
|
|
79015
79181
|
}
|
|
79182
|
+
const fileBytes = getFileBytesFromAttachment(attachment);
|
|
79183
|
+
const canPreprocessWithMarkitdown = format2 === "markdown" && preprocessMode !== "off" && hasUvxCli(env3) && attachment.part.type === "file" && fileBytes !== null && shouldMarkitdownConvertMediaType(attachment.mediaType);
|
|
79016
79184
|
const summaryLengthTarget = lengthArg.kind === "preset" ? lengthArg.preset : { maxCharacters: lengthArg.maxCharacters };
|
|
79017
|
-
|
|
79018
|
-
|
|
79019
|
-
|
|
79020
|
-
|
|
79021
|
-
|
|
79022
|
-
|
|
79023
|
-
|
|
79185
|
+
let promptText = "";
|
|
79186
|
+
const buildAttachmentPromptPayload = () => {
|
|
79187
|
+
promptText = buildFileSummaryPrompt({
|
|
79188
|
+
filename: attachment.filename,
|
|
79189
|
+
mediaType: attachment.mediaType,
|
|
79190
|
+
summaryLength: summaryLengthTarget,
|
|
79191
|
+
contentLength: textContent?.content.length ?? null
|
|
79192
|
+
});
|
|
79193
|
+
return buildAssetPromptPayload({ promptText, attachment, textContent });
|
|
79194
|
+
};
|
|
79195
|
+
const buildMarkitdownPromptPayload = (markdown) => {
|
|
79196
|
+
promptText = buildFileTextSummaryPrompt({
|
|
79197
|
+
filename: attachment.filename,
|
|
79198
|
+
originalMediaType: attachment.mediaType,
|
|
79199
|
+
contentMediaType: "text/markdown",
|
|
79200
|
+
summaryLength: summaryLengthTarget,
|
|
79201
|
+
contentLength: markdown.length
|
|
79202
|
+
});
|
|
79203
|
+
return `${promptText}
|
|
79204
|
+
|
|
79205
|
+
---
|
|
79206
|
+
|
|
79207
|
+
${markdown}`.trim();
|
|
79208
|
+
};
|
|
79209
|
+
let preprocessedMarkdown = null;
|
|
79210
|
+
let usingPreprocessedMarkdown = false;
|
|
79211
|
+
if (preprocessMode === "always" && canPreprocessWithMarkitdown) {
|
|
79212
|
+
if (!fileBytes) {
|
|
79213
|
+
throw new Error("Internal error: missing file bytes for markitdown preprocessing");
|
|
79214
|
+
}
|
|
79215
|
+
try {
|
|
79216
|
+
preprocessedMarkdown = await convertToMarkdownWithMarkitdown({
|
|
79217
|
+
bytes: fileBytes,
|
|
79218
|
+
filenameHint: attachment.filename,
|
|
79219
|
+
mediaTypeHint: attachment.mediaType,
|
|
79220
|
+
uvxCommand: env3.UVX_PATH,
|
|
79221
|
+
timeoutMs,
|
|
79222
|
+
env: env3,
|
|
79223
|
+
execFileImpl
|
|
79224
|
+
});
|
|
79225
|
+
} catch (error47) {
|
|
79226
|
+
const message = error47 instanceof Error ? error47.message : String(error47);
|
|
79227
|
+
throw new Error(
|
|
79228
|
+
`Failed to preprocess ${attachment.mediaType} with markitdown: ${message} (disable with --preprocess off).`
|
|
79229
|
+
);
|
|
79230
|
+
}
|
|
79231
|
+
if (Buffer.byteLength(preprocessedMarkdown, "utf8") > MAX_TEXT_BYTES_DEFAULT) {
|
|
79232
|
+
throw new Error(
|
|
79233
|
+
`Preprocessed Markdown too large (${formatBytes(Buffer.byteLength(preprocessedMarkdown, "utf8"))}). Limit is ${formatBytes(MAX_TEXT_BYTES_DEFAULT)}.`
|
|
79234
|
+
);
|
|
79235
|
+
}
|
|
79236
|
+
usingPreprocessedMarkdown = true;
|
|
79237
|
+
}
|
|
79238
|
+
let promptPayload = buildAttachmentPromptPayload();
|
|
79239
|
+
if (usingPreprocessedMarkdown) {
|
|
79240
|
+
if (!preprocessedMarkdown) {
|
|
79241
|
+
throw new Error("Internal error: missing markitdown content for preprocessing");
|
|
79242
|
+
}
|
|
79243
|
+
promptPayload = buildMarkitdownPromptPayload(preprocessedMarkdown);
|
|
79244
|
+
}
|
|
79245
|
+
if (!usingPreprocessedMarkdown) {
|
|
79246
|
+
try {
|
|
79247
|
+
assertProviderSupportsAttachment({
|
|
79248
|
+
provider: parsedModel.provider,
|
|
79249
|
+
modelId: parsedModel.canonical,
|
|
79250
|
+
attachment: { part: attachment.part, mediaType: attachment.mediaType }
|
|
79251
|
+
});
|
|
79252
|
+
} catch (error47) {
|
|
79253
|
+
if (!canPreprocessWithMarkitdown) {
|
|
79254
|
+
if (format2 === "markdown" && preprocessMode !== "off" && attachment.part.type === "file" && shouldMarkitdownConvertMediaType(attachment.mediaType) && !hasUvxCli(env3)) {
|
|
79255
|
+
throw withUvxTip(error47, env3);
|
|
79256
|
+
}
|
|
79257
|
+
throw error47;
|
|
79258
|
+
}
|
|
79259
|
+
if (!fileBytes) {
|
|
79260
|
+
throw new Error("Internal error: missing file bytes for markitdown preprocessing");
|
|
79261
|
+
}
|
|
79262
|
+
try {
|
|
79263
|
+
preprocessedMarkdown = await convertToMarkdownWithMarkitdown({
|
|
79264
|
+
bytes: fileBytes,
|
|
79265
|
+
filenameHint: attachment.filename,
|
|
79266
|
+
mediaTypeHint: attachment.mediaType,
|
|
79267
|
+
uvxCommand: env3.UVX_PATH,
|
|
79268
|
+
timeoutMs,
|
|
79269
|
+
env: env3,
|
|
79270
|
+
execFileImpl
|
|
79271
|
+
});
|
|
79272
|
+
} catch (markitdownError) {
|
|
79273
|
+
if (preprocessMode === "auto") {
|
|
79274
|
+
throw error47;
|
|
79275
|
+
}
|
|
79276
|
+
const message = markitdownError instanceof Error ? markitdownError.message : String(markitdownError);
|
|
79277
|
+
throw new Error(
|
|
79278
|
+
`Failed to preprocess ${attachment.mediaType} with markitdown: ${message} (disable with --preprocess off).`
|
|
79279
|
+
);
|
|
79280
|
+
}
|
|
79281
|
+
if (Buffer.byteLength(preprocessedMarkdown, "utf8") > MAX_TEXT_BYTES_DEFAULT) {
|
|
79282
|
+
throw new Error(
|
|
79283
|
+
`Preprocessed Markdown too large (${formatBytes(Buffer.byteLength(preprocessedMarkdown, "utf8"))}). Limit is ${formatBytes(MAX_TEXT_BYTES_DEFAULT)}.`
|
|
79284
|
+
);
|
|
79285
|
+
}
|
|
79286
|
+
usingPreprocessedMarkdown = true;
|
|
79287
|
+
promptPayload = buildMarkitdownPromptPayload(preprocessedMarkdown);
|
|
79288
|
+
}
|
|
79289
|
+
}
|
|
79024
79290
|
const maxInputTokensForCall = await resolveMaxInputTokensForCall(parsedModelEffective.canonical);
|
|
79025
79291
|
if (typeof maxInputTokensForCall === "number" && Number.isFinite(maxInputTokensForCall) && maxInputTokensForCall > 0 && typeof promptPayload === "string") {
|
|
79026
79292
|
const tokenCount = countTokens(promptPayload);
|
|
@@ -79309,7 +79575,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
|
|
|
79309
79575
|
if (inputTarget.kind === "file") {
|
|
79310
79576
|
let sizeLabel = null;
|
|
79311
79577
|
try {
|
|
79312
|
-
const stat = await
|
|
79578
|
+
const stat = await import_promises5.default.stat(inputTarget.filePath);
|
|
79313
79579
|
if (stat.isFile()) {
|
|
79314
79580
|
sizeLabel = formatBytes(stat.size);
|
|
79315
79581
|
}
|
|
@@ -79411,22 +79677,33 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
|
|
|
79411
79677
|
if (!url2) {
|
|
79412
79678
|
throw new Error("Only HTTP and HTTPS URLs can be summarized");
|
|
79413
79679
|
}
|
|
79414
|
-
const
|
|
79680
|
+
const wantsMarkdown = format2 === "markdown" && !isYoutubeUrl;
|
|
79681
|
+
if (wantsMarkdown && markdownMode === "off") {
|
|
79682
|
+
throw new Error("--format md conflicts with --markdown-mode off (use --format text)");
|
|
79683
|
+
}
|
|
79684
|
+
const firecrawlMode = (() => {
|
|
79685
|
+
if (wantsMarkdown && !isYoutubeUrl && !firecrawlExplicitlySet && firecrawlConfigured) {
|
|
79686
|
+
return "always";
|
|
79687
|
+
}
|
|
79688
|
+
return requestedFirecrawlMode;
|
|
79689
|
+
})();
|
|
79415
79690
|
if (firecrawlMode === "always" && !firecrawlConfigured) {
|
|
79416
79691
|
throw new Error("--firecrawl always requires FIRECRAWL_API_KEY");
|
|
79417
79692
|
}
|
|
79418
|
-
const
|
|
79419
|
-
const
|
|
79693
|
+
const markdownRequested = wantsMarkdown;
|
|
79694
|
+
const effectiveMarkdownMode = markdownRequested ? markdownMode : "off";
|
|
79420
79695
|
const hasKeyForModel = parsedModelForLlm.provider === "xai" ? xaiConfigured : parsedModelForLlm.provider === "google" ? googleConfigured : parsedModelForLlm.provider === "anthropic" ? anthropicConfigured : Boolean(apiKey);
|
|
79421
79696
|
const markdownProvider = hasKeyForModel ? parsedModelForLlm.provider : "none";
|
|
79422
79697
|
if (markdownRequested && effectiveMarkdownMode === "llm" && !hasKeyForModel) {
|
|
79423
79698
|
const required2 = parsedModelForLlm.provider === "xai" ? "XAI_API_KEY" : parsedModelForLlm.provider === "google" ? "GEMINI_API_KEY (or GOOGLE_GENERATIVE_AI_API_KEY / GOOGLE_API_KEY)" : parsedModelForLlm.provider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY";
|
|
79424
|
-
throw new Error(
|
|
79699
|
+
throw new Error(
|
|
79700
|
+
`--markdown-mode llm requires ${required2} for model ${parsedModelForLlm.canonical}`
|
|
79701
|
+
);
|
|
79425
79702
|
}
|
|
79426
79703
|
writeVerbose(
|
|
79427
79704
|
stderr,
|
|
79428
79705
|
verbose,
|
|
79429
|
-
`config url=${url2} timeoutMs=${timeoutMs} youtube=${youtubeMode} firecrawl=${firecrawlMode} length=${lengthArg.kind === "preset" ? lengthArg.preset : `${lengthArg.maxCharacters} chars`} maxOutputTokens=${formatOptionalNumber(maxOutputTokensArg)} json=${json3}
|
|
79706
|
+
`config url=${url2} timeoutMs=${timeoutMs} youtube=${youtubeMode} firecrawl=${firecrawlMode} length=${lengthArg.kind === "preset" ? lengthArg.preset : `${lengthArg.maxCharacters} chars`} maxOutputTokens=${formatOptionalNumber(maxOutputTokensArg)} json=${json3} extract=${extractMode} format=${format2} preprocess=${preprocessMode} markdownMode=${markdownMode} model=${model} stream=${effectiveStreamMode} render=${effectiveRenderMode}`,
|
|
79430
79707
|
verboseColor
|
|
79431
79708
|
);
|
|
79432
79709
|
writeVerbose(
|
|
@@ -79450,7 +79727,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
|
|
|
79450
79727
|
verboseColor
|
|
79451
79728
|
);
|
|
79452
79729
|
const scrapeWithFirecrawl = firecrawlConfigured && firecrawlMode !== "off" ? createFirecrawlScraper({ apiKey: firecrawlApiKey, fetchImpl: trackedFetch }) : null;
|
|
79453
|
-
const
|
|
79730
|
+
const llmHtmlToMarkdown = markdownRequested && (effectiveMarkdownMode === "llm" || markdownProvider !== "none") ? createHtmlToMarkdownConverter({
|
|
79454
79731
|
modelId: model,
|
|
79455
79732
|
xaiApiKey: xaiConfigured ? xaiApiKey : null,
|
|
79456
79733
|
googleApiKey: googleConfigured ? googleApiKey : null,
|
|
@@ -79463,6 +79740,40 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
|
|
|
79463
79740
|
llmCalls.push({ provider, model: usedModel, usage, purpose: "markdown" });
|
|
79464
79741
|
}
|
|
79465
79742
|
}) : null;
|
|
79743
|
+
const markitdownHtmlToMarkdown = markdownRequested && preprocessMode !== "off" && hasUvxCli(env3) ? async (args) => {
|
|
79744
|
+
void args.url;
|
|
79745
|
+
void args.title;
|
|
79746
|
+
void args.siteName;
|
|
79747
|
+
return convertToMarkdownWithMarkitdown({
|
|
79748
|
+
bytes: new TextEncoder().encode(args.html),
|
|
79749
|
+
filenameHint: "page.html",
|
|
79750
|
+
mediaTypeHint: "text/html",
|
|
79751
|
+
uvxCommand: env3.UVX_PATH,
|
|
79752
|
+
timeoutMs: args.timeoutMs,
|
|
79753
|
+
env: env3,
|
|
79754
|
+
execFileImpl
|
|
79755
|
+
});
|
|
79756
|
+
} : null;
|
|
79757
|
+
const convertHtmlToMarkdown = markdownRequested ? async (args) => {
|
|
79758
|
+
if (effectiveMarkdownMode === "llm") {
|
|
79759
|
+
if (!llmHtmlToMarkdown) {
|
|
79760
|
+
throw new Error("No HTML\u2192Markdown converter configured");
|
|
79761
|
+
}
|
|
79762
|
+
return llmHtmlToMarkdown(args);
|
|
79763
|
+
}
|
|
79764
|
+
if (llmHtmlToMarkdown) {
|
|
79765
|
+
try {
|
|
79766
|
+
return await llmHtmlToMarkdown(args);
|
|
79767
|
+
} catch (error47) {
|
|
79768
|
+
if (!markitdownHtmlToMarkdown) throw error47;
|
|
79769
|
+
return await markitdownHtmlToMarkdown(args);
|
|
79770
|
+
}
|
|
79771
|
+
}
|
|
79772
|
+
if (markitdownHtmlToMarkdown) {
|
|
79773
|
+
return await markitdownHtmlToMarkdown(args);
|
|
79774
|
+
}
|
|
79775
|
+
throw new Error("No HTML\u2192Markdown converter configured");
|
|
79776
|
+
} : null;
|
|
79466
79777
|
const readTweetWithBirdClient = hasBirdCli(env3) ? ({ url: url3, timeoutMs: timeoutMs2 }) => readTweetWithBird({ url: url3, timeoutMs: timeoutMs2, env: env3 }) : null;
|
|
79467
79778
|
writeVerbose(stderr, verbose, "extract start", verboseColor);
|
|
79468
79779
|
const stopOscProgress = startOscProgress2({
|
|
@@ -79643,7 +79954,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
|
|
|
79643
79954
|
if (progressEnabled) {
|
|
79644
79955
|
websiteProgress?.stop?.();
|
|
79645
79956
|
spinner.setText(
|
|
79646
|
-
|
|
79957
|
+
extractMode ? `Extracted (${extractedContentSize}${viaSourceLabel})` : `Summarizing (sent ${extractedContentSize}${viaSourceLabel})\u2026`
|
|
79647
79958
|
);
|
|
79648
79959
|
}
|
|
79649
79960
|
writeVerbose(
|
|
@@ -79688,6 +79999,10 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
|
|
|
79688
79999
|
)} attemptedProviders=${extracted.diagnostics.transcript.attemptedProviders.length > 0 ? extracted.diagnostics.transcript.attemptedProviders.join(",") : "none"} notes=${formatOptionalString(extracted.diagnostics.transcript.notes ?? null)}`,
|
|
79689
80000
|
verboseColor
|
|
79690
80001
|
);
|
|
80002
|
+
if (extractMode && markdownRequested && preprocessMode !== "off" && effectiveMarkdownMode === "auto" && !extracted.diagnostics.markdown.used && !hasUvxCli(env3)) {
|
|
80003
|
+
stderr.write(`${UVX_TIP}
|
|
80004
|
+
`);
|
|
80005
|
+
}
|
|
79691
80006
|
const isYouTube = extracted.siteName === "YouTube";
|
|
79692
80007
|
const prompt = buildLinkSummaryPrompt({
|
|
79693
80008
|
url: extracted.url,
|
|
@@ -79700,7 +80015,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
|
|
|
79700
80015
|
summaryLength: lengthArg.kind === "preset" ? lengthArg.preset : { maxCharacters: lengthArg.maxCharacters },
|
|
79701
80016
|
shares: []
|
|
79702
80017
|
});
|
|
79703
|
-
if (
|
|
80018
|
+
if (extractMode) {
|
|
79704
80019
|
clearProgressForStdout();
|
|
79705
80020
|
if (json3) {
|
|
79706
80021
|
const finishReport = shouldComputeReport ? await buildReport() : null;
|
|
@@ -79711,6 +80026,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
|
|
|
79711
80026
|
timeoutMs,
|
|
79712
80027
|
youtube: youtubeMode,
|
|
79713
80028
|
firecrawl: firecrawlMode,
|
|
80029
|
+
format: format2,
|
|
79714
80030
|
markdown: effectiveMarkdownMode,
|
|
79715
80031
|
length: lengthArg.kind === "preset" ? { kind: "preset", preset: lengthArg.preset } : { kind: "chars", maxCharacters: lengthArg.maxCharacters },
|
|
79716
80032
|
maxOutputTokens: maxOutputTokensArg,
|
|
@@ -79783,6 +80099,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
|
|
|
79783
80099
|
timeoutMs,
|
|
79784
80100
|
youtube: youtubeMode,
|
|
79785
80101
|
firecrawl: firecrawlMode,
|
|
80102
|
+
format: format2,
|
|
79786
80103
|
markdown: effectiveMarkdownMode,
|
|
79787
80104
|
length: lengthArg.kind === "preset" ? { kind: "preset", preset: lengthArg.preset } : { kind: "chars", maxCharacters: lengthArg.maxCharacters },
|
|
79788
80105
|
maxOutputTokens: maxOutputTokensArg,
|
|
@@ -80057,6 +80374,7 @@ async function runCli(argv, { env: env3, fetch: fetch2, stdout, stderr }) {
|
|
|
80057
80374
|
timeoutMs,
|
|
80058
80375
|
youtube: youtubeMode,
|
|
80059
80376
|
firecrawl: firecrawlMode,
|
|
80377
|
+
format: format2,
|
|
80060
80378
|
markdown: effectiveMarkdownMode,
|
|
80061
80379
|
length: lengthArg.kind === "preset" ? { kind: "preset", preset: lengthArg.preset } : { kind: "chars", maxCharacters: lengthArg.maxCharacters },
|
|
80062
80380
|
maxOutputTokens: maxOutputTokensArg,
|