webpeel 0.19.2 → 0.19.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -22,6 +22,14 @@ import { getCache, setCache, parseTTL, clearCache, cacheStats } from './cache.js
22
22
  import { estimateTokens } from './core/markdown.js';
23
23
  import { distillToBudget, budgetListings } from './core/budget.js';
24
24
  import { SCHEMA_TEMPLATES, getSchemaTemplate, listSchemaTemplates } from './core/schema-templates.js';
25
+ // Intercept verb-first syntax before Commander parses
26
+ // "webpeel fetch <url>" → "webpeel <url>"
27
+ // Note: 'read' is intentionally excluded — it's a registered subcommand with its own behavior.
28
+ const VERB_ALIASES = new Set(['fetch', 'get', 'scrape', 'peel']);
29
+ if (process.argv.length >= 3 && VERB_ALIASES.has(process.argv[2]?.toLowerCase())) {
30
+ // Remove the verb, shift URL to its position
31
+ process.argv.splice(2, 1);
32
+ }
25
33
  const program = new Command();
26
34
  // Read version from package.json dynamically
27
35
  import { fileURLToPath } from 'url';
@@ -255,7 +263,8 @@ program
255
263
  })
256
264
  .option('--wait-until <event>', 'Page load event: domcontentloaded, networkidle, load, commit (auto-enables --render)')
257
265
  .option('--wait-selector <css>', 'Wait for CSS selector before extracting (auto-enables --render)')
258
- .option('--block-resources <types>', 'Block resource types, comma-separated: image,stylesheet,font,media,script (auto-enables --render)');
266
+ .option('--block-resources <types>', 'Block resource types, comma-separated: image,stylesheet,font,media,script (auto-enables --render)')
267
+ .option('--format <type>', 'Output format: markdown (default), text, html, json');
259
268
  // ─── Help System ─────────────────────────────────────────────────────────────
260
269
  // Detect --help-all early, before Commander parses argv.
261
270
  const isHelpAll = process.argv.slice(2).some(a => a === '--help-all');
@@ -423,7 +432,11 @@ program.configureHelp({
423
432
  // API-based fetch (routes through WebPeel API, no local Playwright)
424
433
  // ============================================================
425
434
  async function fetchViaApi(url, options, apiKey, apiUrl) {
426
- const params = new URLSearchParams({ url, format: options.format || 'markdown' });
435
+ // --format is a CLI output flag; API format is always the content extraction format
436
+ const apiFormat = (['text', 'html', 'markdown', 'md'].includes((options.format || '').toLowerCase()))
437
+ ? (options.format.toLowerCase() === 'md' ? 'markdown' : options.format.toLowerCase())
438
+ : (options.html ? 'html' : options.text ? 'text' : 'markdown');
439
+ const params = new URLSearchParams({ url, format: apiFormat });
427
440
  if (options.render)
428
441
  params.set('render', 'true');
429
442
  if (options.stealth)
@@ -474,9 +487,26 @@ async function fetchViaApi(url, options, apiKey, apiUrl) {
474
487
  }
475
488
  // Main fetch handler — shared with the `pipe` subcommand
476
489
  async function runFetch(url, options) {
490
+ // Handle --format flag: maps to existing boolean flags
491
+ if (options.format) {
492
+ const fmt = options.format.toLowerCase();
493
+ if (fmt === 'text')
494
+ options.text = true;
495
+ else if (fmt === 'html')
496
+ options.html = true;
497
+ else if (fmt === 'json')
498
+ options.json = true;
499
+ else if (fmt === 'markdown' || fmt === 'md') { /* default, do nothing */ }
500
+ else {
501
+ console.error(`Unknown format: ${options.format}. Use: text, markdown, html, or json`);
502
+ process.exit(1);
503
+ }
504
+ }
477
505
  // Smart defaults: when piped (not a TTY), default to silent JSON + budget
506
+ // BUT respect explicit --format flag (user chose the output format)
478
507
  const isPiped = !process.stdout.isTTY;
479
- if (isPiped && !options.html && !options.text) {
508
+ const hasExplicitFormat = options.format && ['text', 'html', 'markdown', 'md'].includes(options.format.toLowerCase());
509
+ if (isPiped && !options.html && !options.text && !hasExplicitFormat) {
480
510
  if (!options.json)
481
511
  options.json = true;
482
512
  if (!options.silent)
@@ -589,7 +619,14 @@ async function runFetch(url, options) {
589
619
  }
590
620
  }
591
621
  catch {
592
- exitWithJsonError(`Invalid URL format: ${url}`, 'INVALID_URL');
622
+ // Check if it looks like a command/verb the user typed by mistake
623
+ const commonVerbs = ['fetch', 'get', 'scrape', 'read', 'download', 'curl', 'wget', 'peel'];
624
+ if (commonVerbs.includes(url.toLowerCase())) {
625
+ exitWithJsonError(`Did you mean: webpeel "${program.args[1] || '<url>'}"?\nThe URL goes directly after webpeel — no verb needed.\nExample: webpeel "https://example.com" --json`, 'INVALID_URL');
626
+ }
627
+ else {
628
+ exitWithJsonError(`Invalid URL: "${url}"\nMake sure to include the protocol (https://)\nExample: webpeel "https://${url}" --json`, 'INVALID_URL');
629
+ }
593
630
  }
594
631
  const useStealth = options.stealth || false;
595
632
  // Check usage quota
@@ -2208,6 +2245,96 @@ program
2208
2245
  process.exit(1);
2209
2246
  }
2210
2247
  });
2248
+ program
2249
+ .command('doctor')
2250
+ .description('Diagnose WebPeel installation (API key, connectivity, fetch test)')
2251
+ .action(async () => {
2252
+ const cfg = loadConfig();
2253
+ const apiKey = cfg.apiKey || process.env.WEBPEEL_API_KEY;
2254
+ const apiUrl = process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev';
2255
+ console.log('WebPeel Doctor\n');
2256
+ console.log(`Version: ${cliVersion}`);
2257
+ console.log(`API URL: ${apiUrl}`);
2258
+ console.log(`API Key: ${apiKey ? apiKey.slice(0, 12) + '...' : '❌ Not configured'}`);
2259
+ if (!apiKey) {
2260
+ console.log('\n❌ No API key. Run: webpeel auth <your-key>');
2261
+ console.log(' Get a free key at: https://app.webpeel.dev/keys');
2262
+ process.exit(1);
2263
+ }
2264
+ // Check API connectivity
2265
+ console.log('\nChecking API connectivity...');
2266
+ try {
2267
+ const healthRes = await fetch(`${apiUrl}/health`, { signal: AbortSignal.timeout(10000) });
2268
+ const health = await healthRes.json();
2269
+ console.log(`API Health: ✅ ${health.status || 'ok'} (uptime: ${Math.round((health.uptime || 0) / 60)}min)`);
2270
+ }
2271
+ catch (err) {
2272
+ console.log(`API Health: ❌ Cannot reach ${apiUrl} (${err.message})`);
2273
+ }
2274
+ // Check API key validity
2275
+ console.log('Checking API key...');
2276
+ try {
2277
+ const usageRes = await fetch(`${apiUrl}/v1/usage`, {
2278
+ headers: { Authorization: `Bearer ${apiKey}` },
2279
+ signal: AbortSignal.timeout(10000),
2280
+ });
2281
+ if (usageRes.ok) {
2282
+ const usage = await usageRes.json();
2283
+ const plan = usage?.tier || (typeof usage?.plan === 'string' ? usage?.plan : usage?.plan?.tier) || 'free';
2284
+ const used = usage?.used ?? usage?.totalRequests ?? usage?.weekly?.used ?? 0;
2285
+ const limit = usage?.limit ?? usage?.weeklyLimit ?? usage?.weekly?.limit ?? 500;
2286
+ console.log(`API Key: ✅ Valid (${plan} plan, ${used}/${limit} used this week)`);
2287
+ }
2288
+ else if (usageRes.status === 401) {
2289
+ console.log('API Key: ❌ Invalid or expired. Run: webpeel auth <new-key>');
2290
+ }
2291
+ else {
2292
+ console.log(`API Key: ⚠️ Unexpected response (${usageRes.status})`);
2293
+ }
2294
+ }
2295
+ catch (err) {
2296
+ console.log(`API Key: ❌ Check failed (${err.message})`);
2297
+ }
2298
+ // Quick fetch test
2299
+ console.log('Testing fetch...');
2300
+ try {
2301
+ const testRes = await fetch(`${apiUrl}/v1/fetch?url=https://example.com`, {
2302
+ headers: { Authorization: `Bearer ${apiKey}` },
2303
+ signal: AbortSignal.timeout(15000),
2304
+ });
2305
+ if (testRes.ok) {
2306
+ const data = await testRes.json();
2307
+ console.log(`Fetch Test: ✅ OK (${data.tokenCount || data.tokens || '?'} tokens, ${data.fetchTimeMs || data.elapsed || '?'}ms)`);
2308
+ }
2309
+ else {
2310
+ console.log(`Fetch Test: ❌ Failed (${testRes.status})`);
2311
+ }
2312
+ }
2313
+ catch (err) {
2314
+ console.log(`Fetch Test: ❌ Failed (${err.message})`);
2315
+ }
2316
+ // Check YouTube
2317
+ console.log('Testing YouTube...');
2318
+ try {
2319
+ const ytRes = await fetch(`${apiUrl}/v1/fetch?url=${encodeURIComponent('https://www.youtube.com/watch?v=dQw4w9WgXcQ')}`, {
2320
+ headers: { Authorization: `Bearer ${apiKey}` },
2321
+ signal: AbortSignal.timeout(15000),
2322
+ });
2323
+ if (ytRes.ok) {
2324
+ const data = await ytRes.json();
2325
+ const hasContent = (data.content || '').length > 100;
2326
+ console.log(`YouTube: ${hasContent ? '✅' : '⚠️'} ${hasContent ? `Content extracted (${data.tokenCount || data.tokens || '?'} tokens)` : 'Content limited'}`);
2327
+ }
2328
+ else {
2329
+ console.log(`YouTube: ⚠️ Response ${ytRes.status}`);
2330
+ }
2331
+ }
2332
+ catch (err) {
2333
+ console.log(`YouTube: ⚠️ ${err.message}`);
2334
+ }
2335
+ console.log('\n✅ WebPeel is ready to use!');
2336
+ console.log(' Try: webpeel "https://news.ycombinator.com" --json');
2337
+ });
2211
2338
  program
2212
2339
  .command('login')
2213
2340
  .description('Authenticate the CLI with your API key')