webpeel 0.17.1 → 0.17.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cache.d.ts.map +1 -1
- package/dist/cache.js +1 -0
- package/dist/cache.js.map +1 -1
- package/dist/cli.js +218 -13
- package/dist/cli.js.map +1 -1
- package/dist/core/content-pruner.d.ts.map +1 -1
- package/dist/core/content-pruner.js +27 -0
- package/dist/core/content-pruner.js.map +1 -1
- package/dist/core/domain-extractors.js +4 -4
- package/dist/core/http-fetch.js +1 -1
- package/dist/core/http-fetch.js.map +1 -1
- package/dist/core/markdown.d.ts.map +1 -1
- package/dist/core/markdown.js +30 -1
- package/dist/core/markdown.js.map +1 -1
- package/dist/core/metadata.d.ts +3 -1
- package/dist/core/metadata.d.ts.map +1 -1
- package/dist/core/metadata.js +14 -2
- package/dist/core/metadata.js.map +1 -1
- package/dist/core/pipeline.d.ts +2 -0
- package/dist/core/pipeline.d.ts.map +1 -1
- package/dist/core/pipeline.js +45 -2
- package/dist/core/pipeline.js.map +1 -1
- package/dist/core/readability.d.ts.map +1 -1
- package/dist/core/readability.js +24 -0
- package/dist/core/readability.js.map +1 -1
- package/dist/server/app.d.ts.map +1 -1
- package/dist/server/app.js +10 -10
- package/dist/server/app.js.map +1 -1
- package/dist/server/job-queue.d.ts.map +1 -1
- package/dist/server/job-queue.js +4 -2
- package/dist/server/job-queue.js.map +1 -1
- package/dist/server/logger.d.ts +11 -0
- package/dist/server/logger.d.ts.map +1 -0
- package/dist/server/logger.js +38 -0
- package/dist/server/logger.js.map +1 -0
- package/dist/server/middleware/auth.js +4 -4
- package/dist/server/middleware/auth.js.map +1 -1
- package/dist/server/middleware/rate-limit.d.ts.map +1 -1
- package/dist/server/middleware/rate-limit.js +24 -7
- package/dist/server/middleware/rate-limit.js.map +1 -1
- package/dist/server/routes/cli-usage.js +1 -1
- package/dist/server/routes/cli-usage.js.map +1 -1
- package/dist/server/routes/stripe.d.ts.map +1 -1
- package/dist/server/routes/stripe.js +15 -13
- package/dist/server/routes/stripe.js.map +1 -1
- package/dist/server/routes/users.d.ts.map +1 -1
- package/dist/server/routes/users.js +44 -0
- package/dist/server/routes/users.js.map +1 -1
- package/dist/types.d.ts +2 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
package/dist/cache.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAiBH;;GAEG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAc5C;
|
|
1
|
+
{"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAiBH;;GAEG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAc5C;AAkBD;;GAEG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,GAAG,GAAG,IAAI,CAsB/E;AAED;;GAEG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,IAAI,CAerG;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,GAAG,UAAQ,GAAG,MAAM,CAyB9C;AAED;;GAEG;AACH,wBAAgB,UAAU,IAAI;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,CAgBhF"}
|
package/dist/cache.js
CHANGED
|
@@ -37,6 +37,7 @@ function cacheKey(url, options) {
|
|
|
37
37
|
stealth: options?.stealth || false,
|
|
38
38
|
selector: options?.selector || null,
|
|
39
39
|
format: options?.format || 'markdown',
|
|
40
|
+
readable: options?.readable || false,
|
|
40
41
|
};
|
|
41
42
|
const hash = createHash('sha256').update(JSON.stringify(relevant)).digest('hex').slice(0, 16);
|
|
42
43
|
return hash;
|
package/dist/cache.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cache.js","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,SAAS,EAAE,WAAW,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AAC3G,OAAO,EAAE,OAAO,EAAE,MAAM,IAAI,CAAC;AAC7B,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,UAAU,EAAE,OAAO,CAAC,CAAC;AAUvD;;GAEG;AACH,MAAM,UAAU,QAAQ,CAAC,GAAW;IAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;IAC5C,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,wBAAwB,GAAG,yBAAyB,CAAC,CAAC;IACxE,CAAC;IACD,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACjC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IACtB,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,GAAG,CAAC,CAAC,OAAO,KAAK,GAAG,IAAI,CAAC;QAC9B,KAAK,GAAG,CAAC,CAAC,OAAO,KAAK,GAAG,EAAE,GAAG,IAAI,CAAC;QACnC,KAAK,GAAG,CAAC,CAAC,OAAO,KAAK,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;QACxC,KAAK,GAAG,CAAC,CAAC,OAAO,KAAK,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;QAC7C,OAAO,CAAC,CAAC,MAAM,IAAI,KAAK,CAAC,qBAAqB,IAAI,EAAE,CAAC,CAAC;IACxD,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,QAAQ,CAAC,GAAW,EAAE,OAA6B;IAC1D,MAAM,QAAQ,GAAG;QACf,GAAG;QACH,MAAM,EAAE,OAAO,EAAE,MAAM,IAAI,KAAK;QAChC,OAAO,EAAE,OAAO,EAAE,OAAO,IAAI,KAAK;QAClC,QAAQ,EAAE,OAAO,EAAE,QAAQ,IAAI,IAAI;QACnC,MAAM,EAAE,OAAO,EAAE,MAAM,IAAI,UAAU;
|
|
1
|
+
{"version":3,"file":"cache.js","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,SAAS,EAAE,WAAW,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AAC3G,OAAO,EAAE,OAAO,EAAE,MAAM,IAAI,CAAC;AAC7B,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,UAAU,EAAE,OAAO,CAAC,CAAC;AAUvD;;GAEG;AACH,MAAM,UAAU,QAAQ,CAAC,GAAW;IAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;IAC5C,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,wBAAwB,GAAG,yBAAyB,CAAC,CAAC;IACxE,CAAC;IACD,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACjC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IACtB,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,GAAG,CAAC,CAAC,OAAO,KAAK,GAAG,IAAI,CAAC;QAC9B,KAAK,GAAG,CAAC,CAAC,OAAO,KAAK,GAAG,EAAE,GAAG,IAAI,CAAC;QACnC,KAAK,GAAG,CAAC,CAAC,OAAO,KAAK,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;QACxC,KAAK,GAAG,CAAC,CAAC,OAAO,KAAK,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;QAC7C,OAAO,CAAC,CAAC,MAAM,IAAI,KAAK,CAAC,qBAAqB,IAAI,EAAE,CAAC,CAAC;IACxD,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,QAAQ,CAAC,GAAW,EAAE,OAA6B;IAC1D,MAAM,QAAQ,GAAG;QACf,GAAG;QACH,MAAM,EAAE,OAAO,EAAE,MAAM,IAAI,KAAK;QAChC,OAAO,EAAE,OAAO,EAAE,OAAO,IAAI,KAAK;QAClC,QAAQ,EAAE,OAAO,EAAE,QAAQ,IAAI,IAAI;QACnC,MAAM,EAAE,OAAO,EAAE,MAAM,IAAI,UAAU;QACrC,QAAQ,EAAE,OAAO,EAAE,QAAQ,IAAI,KAAK;KACrC,CAAC;IACF,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC9F,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,QAAQ,CAAC,GAAW,EAAE,OAA6B;IACjE,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IACnC,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,GAAG,OAAO,CAAC,CAAC;IAEhD,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC;QAAE,OAAO,IAAI,CAAC;IAEvC,IAAI,CAAC;QACH,MAAM,KAAK,GAAe,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;QACtE,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,QAAQ,CAAC;QAExC,IAAI,GAAG,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC;YACtB,mCAAmC;YACnC,IAAI,CAAC;gBAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;YAAC,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACvC,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;oBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,qBAAqB,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC/G,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO,KAAK,CAAC,MAAM,CAAC;IACtB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,QAAQ,CAAC,GAAW,EAAE,MAAW,EAAE,KAAa,EAAE,OAA6B;IAC7F,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC3B,SAAS,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC5C,CAAC;IAED,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IACnC,MAAM,KAAK,GAAe;QACxB,GAAG;QACH,MAAM;QACN,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE;QACpB,KAAK;QACL,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS;KACvD,CAAC;IAEF,aAAa,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,GAAG,OAAO,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC;AACvE,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,GAAG,GAAG,KAAK;IACpC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;QAAE,OAAO,CAAC,CAAC;IAErC,MAAM,KAAK,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;IACtE,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;QACvC,IAAI,CAAC;YACH,IAAI,GAAG,EAAE,CAAC;gBACR,UAAU,CAAC,QAAQ,CAAC,CAAC;gBACrB,OAAO,EAAE,CAAC;YACZ,CAAC;iBAAM,CAAC;gBACN,MAAM,KAAK,GAAe,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;gBACtE,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,QAAQ,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC;oBAC9C,UAAU,CAAC,QAAQ,CAAC,CAAC;oBACrB,OAAO,EAAE,CAAC;gBACZ,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,0BAA0B,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACpH,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU;IACxB,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC;IAEhF,MAAM,KAAK,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;IACtE,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC,CAAC;YAC7C,SAAS,IAAI,IAAI,CAAC,IAAI,CAAC;QACzB,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,mBAAmB,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7G,CAAC;IACH,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,KAAK,CAAC,MAAM,EAAE,SAAS,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC;AAC9D,CAAC"}
|
package/dist/cli.js
CHANGED
|
@@ -187,7 +187,7 @@ program
|
|
|
187
187
|
.option('-w, --wait <ms>', 'Wait time after page load (ms)', parseInt)
|
|
188
188
|
.option('--html', 'Output raw HTML instead of markdown')
|
|
189
189
|
.option('--text', 'Output plain text instead of markdown')
|
|
190
|
-
.option('--clean', '
|
|
190
|
+
.option('--clean', 'Clean output — article content only, no links or metadata (alias for --readable with URL-stripped markdown)')
|
|
191
191
|
.option('--json', 'Output as JSON')
|
|
192
192
|
.option('-t, --timeout <ms>', 'Request timeout (ms)', (v) => parseInt(v, 10), 30000)
|
|
193
193
|
.option('--ua <agent>', 'Custom user agent')
|
|
@@ -201,6 +201,7 @@ program
|
|
|
201
201
|
.option('--only-main-content', 'Shortcut for --include-tags main,article')
|
|
202
202
|
.option('--full-content', 'Return full page content (disable automatic content density pruning)')
|
|
203
203
|
.option('--readable', 'Reader mode — extract only the main article content, strip all noise (like browser Reader Mode)')
|
|
204
|
+
.option('--full-nav', 'Keep full navigation/content (disable auto-readability when piped or in agent mode)')
|
|
204
205
|
.option('--focus <query>', 'Query-focused filtering — only return content relevant to this query (BM25 ranking)')
|
|
205
206
|
.option('--chunk', 'Split content into RAG-ready chunks')
|
|
206
207
|
.option('--chunk-size <tokens>', 'Max tokens per chunk (default: 512)', parseInt)
|
|
@@ -214,6 +215,7 @@ program
|
|
|
214
215
|
.option('--images', 'Output image URLs from the page')
|
|
215
216
|
.option('--meta', 'Output only the page metadata (title, description, author, etc.)')
|
|
216
217
|
.option('--raw', 'Return full page without smart content extraction')
|
|
218
|
+
.option('--full', 'Alias for --raw — full page content, no budget')
|
|
217
219
|
.option('--lite', 'Lite mode — minimal processing, maximum speed (skip pruning, budget, metadata)')
|
|
218
220
|
.option('--action <actions...>', 'Page actions before scraping (e.g., "click:.btn" "wait:2000" "scroll:bottom")')
|
|
219
221
|
.option('--extract <json>', 'Extract structured data using CSS selectors (JSON object of field:selector pairs)')
|
|
@@ -247,11 +249,60 @@ program
|
|
|
247
249
|
.option('--wait-until <event>', 'Page load event: domcontentloaded, networkidle, load, commit (auto-enables --render)')
|
|
248
250
|
.option('--wait-selector <css>', 'Wait for CSS selector before extracting (auto-enables --render)')
|
|
249
251
|
.option('--block-resources <types>', 'Block resource types, comma-separated: image,stylesheet,font,media,script (auto-enables --render)');
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
252
|
+
// ─── Help System ─────────────────────────────────────────────────────────────
|
|
253
|
+
// Detect --help-all early, before Commander parses argv.
|
|
254
|
+
const isHelpAll = process.argv.slice(2).some(a => a === '--help-all');
|
|
255
|
+
if (isHelpAll) {
|
|
256
|
+
// Translate --help-all → --help so Commander generates its standard output.
|
|
257
|
+
const idx = process.argv.indexOf('--help-all');
|
|
258
|
+
if (idx !== -1)
|
|
259
|
+
process.argv[idx] = '--help';
|
|
260
|
+
}
|
|
261
|
+
// ANSI helpers (fall back gracefully when colors are disabled).
|
|
262
|
+
const NO_COLOR = process.env.NO_COLOR !== undefined || !process.stdout.isTTY;
|
|
263
|
+
const bold = (s) => NO_COLOR ? s : `\x1b[1m${s}\x1b[0m`;
|
|
264
|
+
const dim = (s) => NO_COLOR ? s : `\x1b[2m${s}\x1b[0m`;
|
|
265
|
+
const cyan = (s) => NO_COLOR ? s : `\x1b[36m${s}\x1b[0m`;
|
|
266
|
+
/**
|
|
267
|
+
* Reconstruct the standard Commander help layout for --help-all and subcommands.
|
|
268
|
+
* This mirrors Commander's own default formatHelp() so subcommand help keeps working.
|
|
269
|
+
*/
|
|
270
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
271
|
+
function buildCommanderHelp(cmd, helper) {
|
|
272
|
+
const termWidth = helper.padWidth(cmd, helper);
|
|
273
|
+
const helpWidth = helper.helpWidth ?? 80;
|
|
274
|
+
const pad = ' ';
|
|
275
|
+
const formatItem = (term, description) => {
|
|
276
|
+
if (description) {
|
|
277
|
+
const full = `${term.padEnd(termWidth + 2)}${description}`;
|
|
278
|
+
return helper.wrap(full, helpWidth - pad.length, termWidth + 2);
|
|
279
|
+
}
|
|
280
|
+
return term;
|
|
281
|
+
};
|
|
282
|
+
const formatList = (items) => items.join('\n').replace(/^/gm, pad);
|
|
283
|
+
let out = [`Usage: ${helper.commandUsage(cmd)}`, ''];
|
|
284
|
+
const desc = helper.commandDescription(cmd);
|
|
285
|
+
if (desc.length > 0) {
|
|
286
|
+
out = out.concat([helper.wrap(desc, helpWidth, 0), '']);
|
|
287
|
+
}
|
|
288
|
+
// Arguments
|
|
289
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
290
|
+
const args = helper.visibleArguments(cmd).map(a => formatItem(helper.argumentTerm(a), helper.argumentDescription(a)));
|
|
291
|
+
if (args.length > 0)
|
|
292
|
+
out = out.concat(['Arguments:', formatList(args), '']);
|
|
293
|
+
// Options
|
|
294
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
295
|
+
const opts = helper.visibleOptions(cmd).map(o => formatItem(helper.optionTerm(o), helper.optionDescription(o)));
|
|
296
|
+
if (opts.length > 0)
|
|
297
|
+
out = out.concat(['Options:', formatList(opts), '']);
|
|
298
|
+
// Subcommands
|
|
299
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
300
|
+
const cmds = helper.visibleCommands(cmd).map(c => formatItem(helper.subcommandTerm(c), helper.subcommandDescription(c)));
|
|
301
|
+
if (cmds.length > 0)
|
|
302
|
+
out = out.concat(['Commands:', formatList(cmds), '']);
|
|
303
|
+
// Append grouped option sections only on root command (--help-all)
|
|
304
|
+
if (cmd.parent === null) {
|
|
305
|
+
out = out.concat([`
|
|
255
306
|
Output Formats:
|
|
256
307
|
--json JSON output with full metadata
|
|
257
308
|
--html Raw HTML output
|
|
@@ -295,16 +346,106 @@ Agent Integration:
|
|
|
295
346
|
$ webpeel pipe "https://example.com" | jq .content Pipe-friendly JSON
|
|
296
347
|
$ webpeel "https://site.com" --json --silent Same as pipe
|
|
297
348
|
$ curl https://webpeel.dev/llms.txt AI-readable docs
|
|
298
|
-
`);
|
|
349
|
+
`]);
|
|
350
|
+
}
|
|
351
|
+
return out.join('\n');
|
|
352
|
+
}
|
|
353
|
+
/**
|
|
354
|
+
* Condensed, Anthropic-style help for the root command (default --help).
|
|
355
|
+
*/
|
|
356
|
+
function buildCondensedHelp() {
|
|
357
|
+
const v = cliVersion;
|
|
358
|
+
return [
|
|
359
|
+
'',
|
|
360
|
+
` ${bold('◆ WebPeel')} ${dim(`v${v}`)}`,
|
|
361
|
+
` ${dim('The web data platform for AI agents')}`,
|
|
362
|
+
'',
|
|
363
|
+
` ${bold('Usage:')} webpeel [url] [options]`,
|
|
364
|
+
` webpeel <command> [options]`,
|
|
365
|
+
'',
|
|
366
|
+
` ${bold('Examples:')}`,
|
|
367
|
+
` webpeel https://example.com ${dim('Clean content (reader mode)')}`,
|
|
368
|
+
` webpeel read https://example.com ${dim('Explicit reader mode')}`,
|
|
369
|
+
` webpeel screenshot https://example.com ${dim('Screenshot any page')}`,
|
|
370
|
+
` webpeel ask https://news.com "summary" ${dim('Ask about any page')}`,
|
|
371
|
+
` webpeel search "webpeel vs jina" ${dim('Web search')}`,
|
|
372
|
+
` echo "url" | webpeel ${dim('Pipe mode (auto JSON)')}`,
|
|
373
|
+
'',
|
|
374
|
+
` ${bold('Commands:')}`,
|
|
375
|
+
` fetch (default) Fetch a URL as clean markdown`,
|
|
376
|
+
` read <url> Reader mode (article content only)`,
|
|
377
|
+
` screenshot <url> Take a screenshot`,
|
|
378
|
+
` ask <url> <question> Ask about any page`,
|
|
379
|
+
` search <query> Search the web (DuckDuckGo + sources)`,
|
|
380
|
+
` crawl <url> Crawl a website`,
|
|
381
|
+
` mcp Start MCP server for AI tools`,
|
|
382
|
+
` ${dim('... (use --help-all for all 25+ commands)')}`,
|
|
383
|
+
'',
|
|
384
|
+
` ${bold('Common Options:')}`,
|
|
385
|
+
` -r, --render Browser rendering (JS-heavy sites)`,
|
|
386
|
+
` --stealth Stealth mode (anti-bot bypass)`,
|
|
387
|
+
` --raw Full page (disable auto reader mode)`,
|
|
388
|
+
` --full Full page, no budget limit`,
|
|
389
|
+
` --json JSON output with metadata`,
|
|
390
|
+
` --budget: 4000)`,
|
|
391
|
+
` -q, --question <q> Ask about the content`,
|
|
392
|
+
` -s, --silent No spinner output`,
|
|
393
|
+
'',
|
|
394
|
+
` Use ${cyan("'webpeel <command> --help'")} for command-specific options.`,
|
|
395
|
+
` Use ${cyan("'webpeel --help-all'")} for the full option reference.`,
|
|
396
|
+
'',
|
|
397
|
+
` Docs: ${cyan('https://webpeel.dev/docs')}`,
|
|
398
|
+
'',
|
|
399
|
+
].join('\n');
|
|
400
|
+
}
|
|
401
|
+
program.configureHelp({
|
|
402
|
+
sortSubcommands: true,
|
|
403
|
+
showGlobalOptions: false,
|
|
404
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
405
|
+
formatHelp: (cmd, helper) => {
|
|
406
|
+
// Subcommands always get standard Commander help.
|
|
407
|
+
// Root command with --help-all also gets standard full help.
|
|
408
|
+
if (cmd.parent !== null || isHelpAll) {
|
|
409
|
+
return buildCommanderHelp(cmd, helper);
|
|
410
|
+
}
|
|
411
|
+
// Root command default: beautiful condensed help.
|
|
412
|
+
return buildCondensedHelp();
|
|
413
|
+
},
|
|
414
|
+
});
|
|
299
415
|
// Main fetch handler — shared with the `pipe` subcommand
|
|
300
416
|
async function runFetch(url, options) {
|
|
301
|
-
// Smart defaults: when piped (not a TTY), default to silent JSON
|
|
417
|
+
// Smart defaults: when piped (not a TTY), default to silent JSON + budget
|
|
302
418
|
const isPiped = !process.stdout.isTTY;
|
|
303
419
|
if (isPiped && !options.html && !options.text) {
|
|
304
420
|
if (!options.json)
|
|
305
421
|
options.json = true;
|
|
306
422
|
if (!options.silent)
|
|
307
423
|
options.silent = true;
|
|
424
|
+
// Auto-enable readability for AI consumers — clean content by default
|
|
425
|
+
if (!options.readable && !options.fullNav) {
|
|
426
|
+
options.readable = true;
|
|
427
|
+
}
|
|
428
|
+
// Auto token budget for piped mode (AI consumers want concise content)
|
|
429
|
+
if (options.budget === undefined && !options.fullContent && !options.raw && !options.full) {
|
|
430
|
+
options.budget = 4000;
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
// --full alias: sets raw + fullContent
|
|
434
|
+
if (options.full) {
|
|
435
|
+
options.raw = true;
|
|
436
|
+
options.fullContent = true;
|
|
437
|
+
}
|
|
438
|
+
// Smart defaults for terminal (interactive) mode
|
|
439
|
+
const isTerminal = process.stdout.isTTY && !isPiped;
|
|
440
|
+
if (isTerminal && !options.raw && !options.html && !options.text) {
|
|
441
|
+
// Auto-readable: clean content by default (like browser Reader Mode)
|
|
442
|
+
if (!options.readable && !options.fullNav && !options.selector) {
|
|
443
|
+
options.readable = true;
|
|
444
|
+
}
|
|
445
|
+
// Default token budget: don't flood the terminal with 20K tokens
|
|
446
|
+
if (options.budget === undefined && !options.fullContent && !options.raw) {
|
|
447
|
+
options.budget = 4000;
|
|
448
|
+
}
|
|
308
449
|
}
|
|
309
450
|
// --agent sets sensible defaults for AI agents; explicit flags override
|
|
310
451
|
if (options.agent) {
|
|
@@ -316,6 +457,10 @@ async function runFetch(url, options) {
|
|
|
316
457
|
options.extractAll = true;
|
|
317
458
|
if (options.budget === undefined)
|
|
318
459
|
options.budget = 4000;
|
|
460
|
+
// Agent mode = clean content by default
|
|
461
|
+
if (!options.readable && !options.fullNav) {
|
|
462
|
+
options.readable = true;
|
|
463
|
+
}
|
|
319
464
|
}
|
|
320
465
|
const isJson = options.json;
|
|
321
466
|
// --- --list-schemas: print all available schemas and exit ---
|
|
@@ -414,8 +559,9 @@ async function runFetch(url, options) {
|
|
|
414
559
|
render: options.render,
|
|
415
560
|
stealth: options.stealth,
|
|
416
561
|
selector: options.selector,
|
|
417
|
-
format: options.html ? 'html' : options.text ? 'text' : 'markdown',
|
|
562
|
+
format: options.html ? 'html' : options.text ? 'text' : options.clean ? 'clean' : 'markdown',
|
|
418
563
|
budget: null, // Budget excluded from cache key — cache stores full content
|
|
564
|
+
readable: options.readable || false,
|
|
419
565
|
};
|
|
420
566
|
const cachedResult = getCache(url, cacheOptions);
|
|
421
567
|
if (cachedResult) {
|
|
@@ -643,6 +789,7 @@ async function runFetch(url, options) {
|
|
|
643
789
|
|| !!options.waitUntil
|
|
644
790
|
|| !!options.waitSelector
|
|
645
791
|
|| !!options.blockResources
|
|
792
|
+
|| !!options.screenshot // Auto-enable render for screenshot (needs browser)
|
|
646
793
|
|| false;
|
|
647
794
|
// Inject scroll actions when --scroll-extract N (fixed count) is used
|
|
648
795
|
if (scrollExtractCount > 0) {
|
|
@@ -731,6 +878,8 @@ async function runFetch(url, options) {
|
|
|
731
878
|
}
|
|
732
879
|
else if (options.clean) {
|
|
733
880
|
peelOptions.format = 'clean';
|
|
881
|
+
// --clean implies readable mode (article content only, no navs/footers)
|
|
882
|
+
peelOptions.readable = true;
|
|
734
883
|
}
|
|
735
884
|
else {
|
|
736
885
|
peelOptions.format = 'markdown';
|
|
@@ -787,6 +936,7 @@ async function runFetch(url, options) {
|
|
|
787
936
|
selector: options.selector,
|
|
788
937
|
format: peelOptions.format,
|
|
789
938
|
budget: null, // Budget excluded — cache stores full content, budget applied post-cache
|
|
939
|
+
readable: options.readable || false,
|
|
790
940
|
});
|
|
791
941
|
}
|
|
792
942
|
// Apply smart budget distillation AFTER caching (cache always stores full content)
|
|
@@ -1114,6 +1264,34 @@ program
|
|
|
1114
1264
|
.action(async (url, options) => {
|
|
1115
1265
|
await runFetch(url, options);
|
|
1116
1266
|
});
|
|
1267
|
+
// Read subcommand (explicit readable mode)
|
|
1268
|
+
program
|
|
1269
|
+
.command('read <url>')
|
|
1270
|
+
.description('Read a page in clean reader mode (like browser Reader View)')
|
|
1271
|
+
.option('--json', 'Output as JSON')
|
|
1272
|
+
.option('-s, --silent', 'Silent mode')
|
|
1273
|
+
.option('--budget <n>', 'Token budget (default: 4000)', parseInt)
|
|
1274
|
+
.option('--focus <query>', 'Focus on content relevant to this query')
|
|
1275
|
+
.action(async (url, opts) => {
|
|
1276
|
+
await runFetch(url, {
|
|
1277
|
+
...opts,
|
|
1278
|
+
readable: true,
|
|
1279
|
+
budget: 4000,
|
|
1280
|
+
});
|
|
1281
|
+
});
|
|
1282
|
+
// Ask subcommand (question mode)
|
|
1283
|
+
program
|
|
1284
|
+
.command('ask <url> <question>')
|
|
1285
|
+
.description('Ask a question about any page')
|
|
1286
|
+
.option('--json', 'Output as JSON')
|
|
1287
|
+
.option('-s, --silent', 'Silent mode')
|
|
1288
|
+
.action(async (url, question, opts) => {
|
|
1289
|
+
await runFetch(url, {
|
|
1290
|
+
...opts,
|
|
1291
|
+
question,
|
|
1292
|
+
readable: true,
|
|
1293
|
+
});
|
|
1294
|
+
});
|
|
1117
1295
|
// Search command
|
|
1118
1296
|
program
|
|
1119
1297
|
.command('search <query>')
|
|
@@ -1288,7 +1466,7 @@ program
|
|
|
1288
1466
|
}
|
|
1289
1467
|
}
|
|
1290
1468
|
else if (isJson) {
|
|
1291
|
-
const jsonStr = JSON.stringify(results, null, 2);
|
|
1469
|
+
const jsonStr = JSON.stringify({ query, results, count: results.length }, null, 2);
|
|
1292
1470
|
await writeStdout(jsonStr + '\n');
|
|
1293
1471
|
}
|
|
1294
1472
|
else {
|
|
@@ -1534,7 +1712,7 @@ program
|
|
|
1534
1712
|
showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, options.stealth || false);
|
|
1535
1713
|
}
|
|
1536
1714
|
if (options.json) {
|
|
1537
|
-
console.log(JSON.stringify(results, null, 2));
|
|
1715
|
+
console.log(JSON.stringify({ pages: results, count: results.length }, null, 2));
|
|
1538
1716
|
}
|
|
1539
1717
|
else {
|
|
1540
1718
|
results.forEach((result, i) => {
|
|
@@ -1876,6 +2054,7 @@ program
|
|
|
1876
2054
|
.option('-q, --question <q>', 'Quick answer')
|
|
1877
2055
|
.option('--proxy <url>', 'Proxy URL')
|
|
1878
2056
|
.option('--timeout <ms>', 'Timeout in ms', parseInt)
|
|
2057
|
+
.option('-s, --silent', 'Silent mode (always on for pipe, accepted for compatibility)')
|
|
1879
2058
|
.action(async (url, opts) => {
|
|
1880
2059
|
// Force JSON + silent — always, unconditionally
|
|
1881
2060
|
opts.json = true;
|
|
@@ -2922,6 +3101,7 @@ program
|
|
|
2922
3101
|
// Screenshot command
|
|
2923
3102
|
program
|
|
2924
3103
|
.command('screenshot <url>')
|
|
3104
|
+
.alias('snap')
|
|
2925
3105
|
.description('Take a screenshot of a URL and save as PNG/JPEG')
|
|
2926
3106
|
.option('--full-page', 'Capture full page (not just viewport)')
|
|
2927
3107
|
.option('--width <px>', 'Viewport width in pixels (default: 1280)', parseInt)
|
|
@@ -3575,7 +3755,7 @@ program
|
|
|
3575
3755
|
.option('--llm-key <key>', 'LLM API key for synthesis (or env OPENAI_API_KEY)')
|
|
3576
3756
|
.option('--llm-model <model>', 'LLM model for synthesis (default: gpt-4o-mini)')
|
|
3577
3757
|
.option('--llm-base-url <url>', 'LLM API base URL (default: https://api.openai.com/v1)')
|
|
3578
|
-
.option('--timeout <ms>', 'Max research time in ms (default:
|
|
3758
|
+
.option('--timeout <ms>', 'Max research time in ms (default: 40000)', '60000')
|
|
3579
3759
|
.option('--json', 'Output result as JSON')
|
|
3580
3760
|
.option('-s, --silent', 'Suppress progress output')
|
|
3581
3761
|
.action(async (query, options) => {
|
|
@@ -3861,11 +4041,36 @@ async function outputResult(result, options, extra = {}) {
|
|
|
3861
4041
|
output.truncated = true;
|
|
3862
4042
|
if (extra.totalAvailable !== undefined)
|
|
3863
4043
|
output.totalAvailable = extra.totalAvailable;
|
|
3864
|
-
output._meta = { version: cliVersion, method: result.method || 'simple', timing: result.timing };
|
|
4044
|
+
output._meta = { version: cliVersion, method: result.method || 'simple', timing: result.timing, serverMarkdown: result.serverMarkdown || false };
|
|
3865
4045
|
await writeStdout(JSON.stringify(output, null, 2) + '\n');
|
|
3866
4046
|
}
|
|
3867
4047
|
else {
|
|
4048
|
+
// Smart terminal header (interactive mode only)
|
|
4049
|
+
const isTerminalOutput = process.stdout.isTTY && !options.silent;
|
|
4050
|
+
if (isTerminalOutput) {
|
|
4051
|
+
const meta = result.metadata || {};
|
|
4052
|
+
const parts = [];
|
|
4053
|
+
if (meta.title || result.title)
|
|
4054
|
+
parts.push(`\x1b[1m${meta.title || result.title}\x1b[0m`);
|
|
4055
|
+
if (meta.author)
|
|
4056
|
+
parts.push(`By ${meta.author}`);
|
|
4057
|
+
if (meta.wordCount)
|
|
4058
|
+
parts.push(`${meta.wordCount} words`);
|
|
4059
|
+
const totalMs = result.timing?.total ?? result.elapsed;
|
|
4060
|
+
if (totalMs)
|
|
4061
|
+
parts.push(`${totalMs}ms`);
|
|
4062
|
+
if (parts.length > 0) {
|
|
4063
|
+
await writeStdout(`\n ${parts.join(' · ')}\n`);
|
|
4064
|
+
await writeStdout(' ' + '─'.repeat(60) + '\n\n');
|
|
4065
|
+
}
|
|
4066
|
+
}
|
|
4067
|
+
// Stream content immediately to stdout — consumer gets it without waiting
|
|
3868
4068
|
await writeStdout(result.content + '\n');
|
|
4069
|
+
// Append timing summary to stderr so it doesn't pollute piped content
|
|
4070
|
+
if (!options.silent) {
|
|
4071
|
+
const totalMs = result.timing?.total ?? result.elapsed;
|
|
4072
|
+
process.stderr.write(`\n--- ${result.tokens} tokens · ${totalMs}ms ---\n`);
|
|
4073
|
+
}
|
|
3869
4074
|
}
|
|
3870
4075
|
}
|
|
3871
4076
|
function writeStdout(data) {
|