webpeel 0.19.1 → 0.19.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +131 -4
- package/dist/cli.js.map +1 -1
- package/dist/server/routes/mcp.d.ts.map +1 -1
- package/dist/server/routes/mcp.js +35 -0
- package/dist/server/routes/mcp.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -22,6 +22,14 @@ import { getCache, setCache, parseTTL, clearCache, cacheStats } from './cache.js
|
|
|
22
22
|
import { estimateTokens } from './core/markdown.js';
|
|
23
23
|
import { distillToBudget, budgetListings } from './core/budget.js';
|
|
24
24
|
import { SCHEMA_TEMPLATES, getSchemaTemplate, listSchemaTemplates } from './core/schema-templates.js';
|
|
25
|
+
// Intercept verb-first syntax before Commander parses
|
|
26
|
+
// "webpeel fetch <url>" → "webpeel <url>"
|
|
27
|
+
// Note: 'read' is intentionally excluded — it's a registered subcommand with its own behavior.
|
|
28
|
+
const VERB_ALIASES = new Set(['fetch', 'get', 'scrape', 'peel']);
|
|
29
|
+
if (process.argv.length >= 3 && VERB_ALIASES.has(process.argv[2]?.toLowerCase())) {
|
|
30
|
+
// Remove the verb, shift URL to its position
|
|
31
|
+
process.argv.splice(2, 1);
|
|
32
|
+
}
|
|
25
33
|
const program = new Command();
|
|
26
34
|
// Read version from package.json dynamically
|
|
27
35
|
import { fileURLToPath } from 'url';
|
|
@@ -255,7 +263,8 @@ program
|
|
|
255
263
|
})
|
|
256
264
|
.option('--wait-until <event>', 'Page load event: domcontentloaded, networkidle, load, commit (auto-enables --render)')
|
|
257
265
|
.option('--wait-selector <css>', 'Wait for CSS selector before extracting (auto-enables --render)')
|
|
258
|
-
.option('--block-resources <types>', 'Block resource types, comma-separated: image,stylesheet,font,media,script (auto-enables --render)')
|
|
266
|
+
.option('--block-resources <types>', 'Block resource types, comma-separated: image,stylesheet,font,media,script (auto-enables --render)')
|
|
267
|
+
.option('--format <type>', 'Output format: markdown (default), text, html, json');
|
|
259
268
|
// ─── Help System ─────────────────────────────────────────────────────────────
|
|
260
269
|
// Detect --help-all early, before Commander parses argv.
|
|
261
270
|
const isHelpAll = process.argv.slice(2).some(a => a === '--help-all');
|
|
@@ -423,7 +432,11 @@ program.configureHelp({
|
|
|
423
432
|
// API-based fetch (routes through WebPeel API, no local Playwright)
|
|
424
433
|
// ============================================================
|
|
425
434
|
async function fetchViaApi(url, options, apiKey, apiUrl) {
|
|
426
|
-
|
|
435
|
+
// --format is a CLI output flag; API format is always the content extraction format
|
|
436
|
+
const apiFormat = (['text', 'html', 'markdown', 'md'].includes((options.format || '').toLowerCase()))
|
|
437
|
+
? (options.format.toLowerCase() === 'md' ? 'markdown' : options.format.toLowerCase())
|
|
438
|
+
: (options.html ? 'html' : options.text ? 'text' : 'markdown');
|
|
439
|
+
const params = new URLSearchParams({ url, format: apiFormat });
|
|
427
440
|
if (options.render)
|
|
428
441
|
params.set('render', 'true');
|
|
429
442
|
if (options.stealth)
|
|
@@ -474,9 +487,26 @@ async function fetchViaApi(url, options, apiKey, apiUrl) {
|
|
|
474
487
|
}
|
|
475
488
|
// Main fetch handler — shared with the `pipe` subcommand
|
|
476
489
|
async function runFetch(url, options) {
|
|
490
|
+
// Handle --format flag: maps to existing boolean flags
|
|
491
|
+
if (options.format) {
|
|
492
|
+
const fmt = options.format.toLowerCase();
|
|
493
|
+
if (fmt === 'text')
|
|
494
|
+
options.text = true;
|
|
495
|
+
else if (fmt === 'html')
|
|
496
|
+
options.html = true;
|
|
497
|
+
else if (fmt === 'json')
|
|
498
|
+
options.json = true;
|
|
499
|
+
else if (fmt === 'markdown' || fmt === 'md') { /* default, do nothing */ }
|
|
500
|
+
else {
|
|
501
|
+
console.error(`Unknown format: ${options.format}. Use: text, markdown, html, or json`);
|
|
502
|
+
process.exit(1);
|
|
503
|
+
}
|
|
504
|
+
}
|
|
477
505
|
// Smart defaults: when piped (not a TTY), default to silent JSON + budget
|
|
506
|
+
// BUT respect explicit --format flag (user chose the output format)
|
|
478
507
|
const isPiped = !process.stdout.isTTY;
|
|
479
|
-
|
|
508
|
+
const hasExplicitFormat = options.format && ['text', 'html', 'markdown', 'md'].includes(options.format.toLowerCase());
|
|
509
|
+
if (isPiped && !options.html && !options.text && !hasExplicitFormat) {
|
|
480
510
|
if (!options.json)
|
|
481
511
|
options.json = true;
|
|
482
512
|
if (!options.silent)
|
|
@@ -589,7 +619,14 @@ async function runFetch(url, options) {
|
|
|
589
619
|
}
|
|
590
620
|
}
|
|
591
621
|
catch {
|
|
592
|
-
|
|
622
|
+
// Check if it looks like a command/verb the user typed by mistake
|
|
623
|
+
const commonVerbs = ['fetch', 'get', 'scrape', 'read', 'download', 'curl', 'wget', 'peel'];
|
|
624
|
+
if (commonVerbs.includes(url.toLowerCase())) {
|
|
625
|
+
exitWithJsonError(`Did you mean: webpeel "${program.args[1] || '<url>'}"?\nThe URL goes directly after webpeel — no verb needed.\nExample: webpeel "https://example.com" --json`, 'INVALID_URL');
|
|
626
|
+
}
|
|
627
|
+
else {
|
|
628
|
+
exitWithJsonError(`Invalid URL: "${url}"\nMake sure to include the protocol (https://)\nExample: webpeel "https://${url}" --json`, 'INVALID_URL');
|
|
629
|
+
}
|
|
593
630
|
}
|
|
594
631
|
const useStealth = options.stealth || false;
|
|
595
632
|
// Check usage quota
|
|
@@ -2208,6 +2245,96 @@ program
|
|
|
2208
2245
|
process.exit(1);
|
|
2209
2246
|
}
|
|
2210
2247
|
});
|
|
2248
|
+
program
|
|
2249
|
+
.command('doctor')
|
|
2250
|
+
.description('Diagnose WebPeel installation (API key, connectivity, fetch test)')
|
|
2251
|
+
.action(async () => {
|
|
2252
|
+
const cfg = loadConfig();
|
|
2253
|
+
const apiKey = cfg.apiKey || process.env.WEBPEEL_API_KEY;
|
|
2254
|
+
const apiUrl = process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev';
|
|
2255
|
+
console.log('WebPeel Doctor\n');
|
|
2256
|
+
console.log(`Version: ${cliVersion}`);
|
|
2257
|
+
console.log(`API URL: ${apiUrl}`);
|
|
2258
|
+
console.log(`API Key: ${apiKey ? apiKey.slice(0, 12) + '...' : '❌ Not configured'}`);
|
|
2259
|
+
if (!apiKey) {
|
|
2260
|
+
console.log('\n❌ No API key. Run: webpeel auth <your-key>');
|
|
2261
|
+
console.log(' Get a free key at: https://app.webpeel.dev/keys');
|
|
2262
|
+
process.exit(1);
|
|
2263
|
+
}
|
|
2264
|
+
// Check API connectivity
|
|
2265
|
+
console.log('\nChecking API connectivity...');
|
|
2266
|
+
try {
|
|
2267
|
+
const healthRes = await fetch(`${apiUrl}/health`, { signal: AbortSignal.timeout(10000) });
|
|
2268
|
+
const health = await healthRes.json();
|
|
2269
|
+
console.log(`API Health: ✅ ${health.status || 'ok'} (uptime: ${Math.round((health.uptime || 0) / 60)}min)`);
|
|
2270
|
+
}
|
|
2271
|
+
catch (err) {
|
|
2272
|
+
console.log(`API Health: ❌ Cannot reach ${apiUrl} (${err.message})`);
|
|
2273
|
+
}
|
|
2274
|
+
// Check API key validity
|
|
2275
|
+
console.log('Checking API key...');
|
|
2276
|
+
try {
|
|
2277
|
+
const usageRes = await fetch(`${apiUrl}/v1/usage`, {
|
|
2278
|
+
headers: { Authorization: `Bearer ${apiKey}` },
|
|
2279
|
+
signal: AbortSignal.timeout(10000),
|
|
2280
|
+
});
|
|
2281
|
+
if (usageRes.ok) {
|
|
2282
|
+
const usage = await usageRes.json();
|
|
2283
|
+
const plan = usage?.tier || (typeof usage?.plan === 'string' ? usage?.plan : usage?.plan?.tier) || 'free';
|
|
2284
|
+
const used = usage?.used ?? usage?.totalRequests ?? usage?.weekly?.used ?? 0;
|
|
2285
|
+
const limit = usage?.limit ?? usage?.weeklyLimit ?? usage?.weekly?.limit ?? 500;
|
|
2286
|
+
console.log(`API Key: ✅ Valid (${plan} plan, ${used}/${limit} used this week)`);
|
|
2287
|
+
}
|
|
2288
|
+
else if (usageRes.status === 401) {
|
|
2289
|
+
console.log('API Key: ❌ Invalid or expired. Run: webpeel auth <new-key>');
|
|
2290
|
+
}
|
|
2291
|
+
else {
|
|
2292
|
+
console.log(`API Key: ⚠️ Unexpected response (${usageRes.status})`);
|
|
2293
|
+
}
|
|
2294
|
+
}
|
|
2295
|
+
catch (err) {
|
|
2296
|
+
console.log(`API Key: ❌ Check failed (${err.message})`);
|
|
2297
|
+
}
|
|
2298
|
+
// Quick fetch test
|
|
2299
|
+
console.log('Testing fetch...');
|
|
2300
|
+
try {
|
|
2301
|
+
const testRes = await fetch(`${apiUrl}/v1/fetch?url=https://example.com`, {
|
|
2302
|
+
headers: { Authorization: `Bearer ${apiKey}` },
|
|
2303
|
+
signal: AbortSignal.timeout(15000),
|
|
2304
|
+
});
|
|
2305
|
+
if (testRes.ok) {
|
|
2306
|
+
const data = await testRes.json();
|
|
2307
|
+
console.log(`Fetch Test: ✅ OK (${data.tokenCount || data.tokens || '?'} tokens, ${data.fetchTimeMs || data.elapsed || '?'}ms)`);
|
|
2308
|
+
}
|
|
2309
|
+
else {
|
|
2310
|
+
console.log(`Fetch Test: ❌ Failed (${testRes.status})`);
|
|
2311
|
+
}
|
|
2312
|
+
}
|
|
2313
|
+
catch (err) {
|
|
2314
|
+
console.log(`Fetch Test: ❌ Failed (${err.message})`);
|
|
2315
|
+
}
|
|
2316
|
+
// Check YouTube
|
|
2317
|
+
console.log('Testing YouTube...');
|
|
2318
|
+
try {
|
|
2319
|
+
const ytRes = await fetch(`${apiUrl}/v1/fetch?url=${encodeURIComponent('https://www.youtube.com/watch?v=dQw4w9WgXcQ')}`, {
|
|
2320
|
+
headers: { Authorization: `Bearer ${apiKey}` },
|
|
2321
|
+
signal: AbortSignal.timeout(15000),
|
|
2322
|
+
});
|
|
2323
|
+
if (ytRes.ok) {
|
|
2324
|
+
const data = await ytRes.json();
|
|
2325
|
+
const hasContent = (data.content || '').length > 100;
|
|
2326
|
+
console.log(`YouTube: ${hasContent ? '✅' : '⚠️'} ${hasContent ? `Content extracted (${data.tokenCount || data.tokens || '?'} tokens)` : 'Content limited'}`);
|
|
2327
|
+
}
|
|
2328
|
+
else {
|
|
2329
|
+
console.log(`YouTube: ⚠️ Response ${ytRes.status}`);
|
|
2330
|
+
}
|
|
2331
|
+
}
|
|
2332
|
+
catch (err) {
|
|
2333
|
+
console.log(`YouTube: ⚠️ ${err.message}`);
|
|
2334
|
+
}
|
|
2335
|
+
console.log('\n✅ WebPeel is ready to use!');
|
|
2336
|
+
console.log(' Try: webpeel "https://news.ycombinator.com" --json');
|
|
2337
|
+
});
|
|
2211
2338
|
program
|
|
2212
2339
|
.command('login')
|
|
2213
2340
|
.description('Authenticate the CLI with your API key')
|