npm - webpeel - Versions diffs - 0.1.2 → 0.3.0 - Mend

webpeel 0.1.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +118 -34
package/dist/cli.js +339 -13
package/dist/cli.js.map +1 -1
package/dist/core/crawler.d.ts +58 -0
package/dist/core/crawler.d.ts.map +1 -0
package/dist/core/crawler.js +205 -0
package/dist/core/crawler.js.map +1 -0
package/dist/core/fetcher.d.ts +8 -1
package/dist/core/fetcher.d.ts.map +1 -1
package/dist/core/fetcher.js +111 -24
package/dist/core/fetcher.js.map +1 -1
package/dist/core/markdown.d.ts +5 -0
package/dist/core/markdown.d.ts.map +1 -1
package/dist/core/markdown.js +50 -22
package/dist/core/markdown.js.map +1 -1
package/dist/core/strategies.d.ts +14 -3
package/dist/core/strategies.d.ts.map +1 -1
package/dist/core/strategies.js +44 -8
package/dist/core/strategies.js.map +1 -1
package/dist/index.d.ts +22 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +70 -7
package/dist/index.js.map +1 -1
package/dist/mcp/server.js +308 -8
package/dist/mcp/server.js.map +1 -1
package/dist/types.d.ts +18 -2
package/dist/types.d.ts.map +1 -1
package/dist/types.js.map +1 -1
package/llms.txt +1 -1
package/package.json +7 -3

package/README.md CHANGED Viewed

@@ -2,11 +2,12 @@
 [![npm version](https://img.shields.io/npm/v/webpeel.svg)](https://www.npmjs.com/package/webpeel)
 [![npm downloads](https://img.shields.io/npm/dm/webpeel.svg)](https://www.npmjs.com/package/webpeel)
+[![GitHub stars](https://img.shields.io/github/stars/JakeLiuMe/webpeel.svg)](https://github.com/JakeLiuMe/webpeel/stargazers)
 [![CI](https://github.com/JakeLiuMe/webpeel/actions/workflows/ci.yml/badge.svg)](https://github.com/JakeLiuMe/webpeel/actions/workflows/ci.yml)
 [![TypeScript](https://img.shields.io/badge/TypeScript-5.6-blue.svg)](https://www.typescriptlang.org/)
 [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
-Turn any web page into clean markdown. Zero config. Free forever.
+Turn any web page into clean markdown. **Stealth mode. Crawl mode. Zero config. Free forever.**
 ```bash
 npx webpeel https://news.ycombinator.com
@@ -36,17 +37,26 @@ npx webpeel https://news.ycombinator.com
 |---|:---:|:---:|:---:|:---:|
 | **Local execution** | ✅ Free forever | ❌ Cloud only | ❌ Cloud only | ✅ Free |
 | **JS rendering** | ✅ Auto-escalates | ✅ Always | ❌ No | ❌ No |
-| **Anti-bot handling** | ✅ Stealth mode | ✅ Yes | ⚠️ Limited | ❌ No |
+| **Stealth mode** | ✅ Built-in | ✅ Yes | ⚠️ Limited | ❌ No |
+| **Crawl mode** | ✅ Built-in | ✅ Yes | ❌ No | ❌ No |
 | **MCP Server** | ✅ Built-in | ✅ Separate repo | ❌ No | ✅ Yes |
 | **Zero config** | ✅ `npx webpeel` | ❌ API key required | ❌ API key required | ✅ Yes |
 | **Free tier** | ∞ Unlimited local | 500 pages (one-time) | 1000 req/month | ∞ Local only |
-| **Hosted API** | $9/mo (5K pages) | $16/mo (3K pages) | $200/mo (Starter) | N/A |
-| **Credit rollover** | ✅ Up to 1 month | ❌ Expire monthly | ❌ N/A | ❌ N/A |
+| **Hosted API** | $9/mo (1,250/wk) | $16/mo (3K/mo) | $200/mo (Starter) | N/A |
+| **Weekly reset** | N/A | ❌ Monthly only | ❌ Monthly only | ❌ N/A |
+| **Extra usage** | N/A | ✅ Pay-as-you-go | ❌ Upgrade only | N/A |
+| **Rollover** | N/A | ✅ 1 week | ❌ Expire monthly | ❌ N/A |
 | **Soft limits** | ✅ Never blocked | ❌ Hard cut-off | ❌ Rate limited | ❌ N/A |
 | **Markdown output** | ✅ Optimized for AI | ✅ Yes | ✅ Yes | ⚠️ Basic |
 **WebPeel gives you Firecrawl's power without the price tag.** Run locally for free, or use our hosted API when you need scale.
+### Highlights
+1. **🎭 Stealth Mode** — Bypass bot detection with playwright-extra stealth plugin. Works on sites that block regular scrapers.
+2. **🕷️ Crawl Mode** — Follow links and extract entire sites. Respects robots.txt and rate limits automatically.
+3. **💰 Actually Free** — Run unlimited requests locally. No API keys, no credit cards, no surprises. Open source MIT.
 ---
 ## Quick Start
@@ -57,6 +67,12 @@ npx webpeel https://news.ycombinator.com
 # Basic usage
 npx webpeel https://example.com
+# Stealth mode (bypass bot detection)
+npx webpeel https://protected-site.com --stealth
+# Crawl a website (follow links, respect robots.txt)
+npx webpeel crawl https://example.com --max-pages 20 --max-depth 2
 # JSON output with metadata
 npx webpeel https://example.com --json
@@ -91,9 +107,9 @@ const result = await peel('https://example.com', {
 });
 ```
-### MCP Server (Claude Desktop, Cursor, VS Code)
+### MCP Server (Claude Desktop, Cursor, VS Code, Windsurf)
-WebPeel provides two MCP tools: `webpeel_fetch` (fetch a URL) and `webpeel_search` (DuckDuckGo search + fetch results).
+WebPeel provides four MCP tools: `webpeel_fetch` (fetch a URL), `webpeel_search` (search the web), `webpeel_batch` (fetch multiple URLs), and `webpeel_crawl` (crawl a site).
 #### Claude Desktop
@@ -145,6 +161,50 @@ Or install with one click:
 [![Install in Claude Desktop](https://img.shields.io/badge/Install-Claude%20Desktop-5B3FFF?style=for-the-badge&logo=anthropic)](https://mcp.so/install/webpeel?for=claude)
 [![Install in VS Code](https://img.shields.io/badge/Install-VS%20Code-007ACC?style=for-the-badge&logo=visualstudiocode)](https://mcp.so/install/webpeel?for=vscode)
+#### Windsurf
+Add to `~/.codeium/windsurf/mcp_config.json`:
+```json
+{
+  "mcpServers": {
+    "webpeel": {
+      "command": "npx",
+      "args": ["-y", "webpeel", "mcp"]
+    }
+  }
+}
+```
+---
+## Use with Claude Code
+One command to add WebPeel to Claude Code:
+```bash
+claude mcp add webpeel -- npx -y webpeel mcp
+```
+Or add to your project's `.mcp.json` for team sharing:
+```json
+{
+  "mcpServers": {
+    "webpeel": {
+      "command": "npx",
+      "args": ["-y", "webpeel", "mcp"]
+    }
+  }
+}
+```
+This gives Claude Code access to:
+- **webpeel_fetch** — Fetch any URL as clean markdown (with stealth mode for protected sites)
+- **webpeel_search** — Search the web via DuckDuckGo
+- **webpeel_batch** — Fetch multiple URLs concurrently
+- **webpeel_crawl** — Crawl websites following links
 ---
 ## How It Works: Smart Escalation
@@ -156,16 +216,16 @@ WebPeel tries the fastest method first, then escalates only when needed:
 │                    Smart Escalation                          │
 └─────────────────────────────────────────────────────────────┘
-Simple HTTP Fetch          Browser Rendering         Stealth Mode
-    ~200ms                      ~2 seconds             ~5 seconds
+Simple HTTP Fetch     →     Browser Rendering    →     Stealth Mode
+    ~200ms                      ~2 seconds               ~5 seconds
        │                            │                       │
        ├─ User-Agent headers        ├─ Full JS execution   ├─ Anti-detect
-       ├─ Cheerio parsing           ├─ Wait for content    ├─ Proxy rotation
-       ├─ Fast & cheap              ├─ Screenshots         └─ Cloudflare bypass
-       │                            │
-       ▼                            ▼
-   Works for 80%              Works for 19%            Works for 1%
-   of websites                (JS-heavy sites)         (heavily protected)
+       ├─ Cheerio parsing           ├─ Wait for content    ├─ Fingerprint mask
+       ├─ Fast & cheap              ├─ Screenshots         ├─ Cloudflare bypass
+       │                            │                       │
+       ▼                            ▼                       ▼
+   Works for 80%              Works for 15%            Works for 5%
+   of websites                (JS-heavy sites)         (bot-protected)
 ```
 **Why this matters:**
@@ -257,29 +317,46 @@ curl "https://webpeel-api.onrender.com/v1/fetch?url=https://example.com" \
   -H "Authorization: Bearer wp_live_your_api_key"
 ```
-### Pricing
+### Pricing — Weekly Reset Model
+Usage resets every **Monday at 00:00 UTC**, just like Claude Code.
+| Plan | Price | Weekly Fetches | Burst Limit | Stealth Mode | Extra Usage |
+|------|------:|---------------:|:-----------:|:------------:|:-----------:|
+| **Local CLI** | $0 | ∞ Unlimited | N/A | ✅ | N/A |
+| **Cloud Free** | $0 | 125/wk (~500/mo) | 25/hr | ❌ | ❌ |
+| **Cloud Pro** | $9/mo | 1,250/wk (~5K/mo) | 100/hr | ✅ | ✅ |
+| **Cloud Max** | $29/mo | 6,250/wk (~25K/mo) | 500/hr | ✅ | ✅ |
+**Three layers of usage control:**
+1. **Burst limit** — Per-hour cap (25/hr free, 100/hr Pro, 500/hr Max) prevents hammering
+2. **Weekly limit** — Main usage gate, resets every Monday
+3. **Extra usage** — When you hit your weekly limit, keep fetching at pay-as-you-go rates
-| Plan | Price | Fetches/Month | JS Rendering | Key Features |
-|------|------:|---------------:|:------------:|----------|
-| **Local CLI** | $0 | ∞ Unlimited | ✅ | Full power, your machine |
-| **Cloud Free** | $0 | 500 | ❌ | Soft limits — never blocked |
-| **Cloud Pro** | $9/mo | 5,000 | ✅ | Credit rollover, soft limits |
-| **Cloud Max** | $29/mo | 25,000 | ✅ | Priority queue, credit rollover |
+**Extra usage rates (Pro/Max only):**
+| Fetch Type | Cost |
+|-----------|------|
+| Basic (HTTP) | $0.002 |
+| Stealth (browser) | $0.01 |
+| Search | $0.001 |
-### Why WebPeel Pro Beats Firecrawl
+### Why WebPeel Beats Firecrawl
 | Feature | WebPeel Local | WebPeel Pro | Firecrawl Hobby |
 |---------|:-------------:|:-----------:|:---------------:|
 | **Price** | $0 | $9/mo | $16/mo |
-| **Monthly Fetches** | ∞ | 5,000 | 3,000 |
-| **Credit Rollover** | N/A | ✅ 1 month | ❌ Expire monthly |
+| **Weekly Fetches** | ∞ | 1,250/wk | ~750/wk |
+| **Rollover** | N/A | ✅ 1 week | ❌ Expire monthly |
 | **Soft Limits** | ✅ Always | ✅ Never locked out | ❌ Hard cut-off |
+| **Extra Usage** | N/A | ✅ Pay-as-you-go | ❌ Upgrade only |
 | **Self-Host** | ✅ MIT | N/A | ❌ AGPL |
 **Key differentiators:**
-- **Soft limits on every tier** — When you hit your limit, we degrade to HTTP-only instead of blocking you. Even free users are never locked out.
-- **Credits roll over** — Unused fetches carry forward for 1 month (Firecrawl expires monthly)
-- **CLI is always free** — No vendor lock-in. Run unlimited locally forever.
+- **Weekly resets** — Your usage refreshes every Monday, not once a month
+- **Soft limits on every tier** — At 100%, we degrade to HTTP-only instead of blocking you
+- **Extra usage** — Pro/Max users can toggle on pay-as-you-go with spending caps (no surprise bills)
+- **Rollover** — Unused fetches carry forward 1 week
+- **CLI is always free** — No vendor lock-in. Run unlimited locally forever
 See pricing at [webpeel.dev](https://webpeel.dev/#pricing)
@@ -388,12 +465,19 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
 - [x] CLI with smart escalation
 - [x] TypeScript library
 - [x] MCP server for Claude/Cursor/VS Code
-- [ ] Hosted API with authentication
-- [ ] Rate limiting and caching
-- [ ] Batch processing API
-- [ ] Screenshot capture
+- [x] Hosted API with authentication and usage tracking
+- [x] Rate limiting and caching
+- [x] Batch processing API (`batch <file>`)
+- [x] Screenshot capture (`--screenshot`)
+- [x] CSS selector filtering (`--selector`, `--exclude`)
+- [x] DuckDuckGo search (`search <query>`)
+- [x] Custom headers and cookies
+- [x] Weekly reset usage model with extra usage
+- [x] Stealth mode (playwright-extra + anti-detect)
+- [x] Crawl mode (follow links, respect robots.txt)
 - [ ] PDF extraction
 - [ ] Webhook notifications for monitoring
+- [ ] AI CAPTCHA solving (planned)
 Vote on features and roadmap at [GitHub Discussions](https://github.com/JakeLiuMe/webpeel/discussions).
@@ -408,13 +492,13 @@ A: WebPeel runs locally for free (Firecrawl is cloud-only). We also have smart e
 A: Yes! Run `npm run serve` to start the API server. See [docs/self-hosting.md](docs/self-hosting.md) (coming soon).
 **Q: Does this violate websites' Terms of Service?**
-A: WebPeel respects `robots.txt` by default. Always check a site's ToS before scraping at scale.
+A: WebPeel is a tool — how you use it is up to you. Always check a site's ToS before fetching at scale. We recommend respecting `robots.txt` in your own workflows.
 **Q: What about CAPTCHA and Cloudflare?**
-A: WebPeel handles most Cloudflare challenges automatically. For CAPTCHAs, you'll need a solving service (not included).
+A: WebPeel handles most Cloudflare challenges automatically via stealth mode. AI-powered CAPTCHA solving is on our roadmap.
 **Q: Can I use this in production?**
-A: Yes, but be mindful of rate limits. The hosted API (coming soon) is better for high-volume production use.
+A: Yes! The hosted API at `https://webpeel-api.onrender.com` is production-ready with authentication, rate limiting, and usage tracking.
 ---

package/dist/cli.js CHANGED Viewed

@@ -14,15 +14,18 @@
  */
 import { Command } from 'commander';
 import ora from 'ora';
-import { peel, cleanup } from './index.js';
+import { writeFileSync } from 'fs';
+import { peel, peelBatch, cleanup } from './index.js';
 const program = new Command();
 program
     .name('webpeel')
     .description('Fast web fetcher for AI agents')
-    .version('0.1.2');
+    .version('0.3.0')
+    .enablePositionalOptions();
 program
     .argument('[url]', 'URL to fetch')
     .option('-r, --render', 'Use headless browser (for JS-heavy sites)')
+    .option('--stealth', 'Use stealth mode to bypass bot detection (auto-enables --render)')
     .option('-w, --wait <ms>', 'Wait time after page load (ms)', parseInt)
     .option('--html', 'Output raw HTML instead of markdown')
     .option('--text', 'Output plain text instead of markdown')
@@ -30,6 +33,12 @@ program
     .option('-t, --timeout <ms>', 'Request timeout (ms)', parseInt, 30000)
     .option('--ua <agent>', 'Custom user agent')
     .option('-s, --silent', 'Silent mode (no spinner)')
+    .option('--screenshot [path]', 'Take a screenshot (optionally save to file path)')
+    .option('--full-page', 'Full-page screenshot (use with --screenshot)')
+    .option('--selector <css>', 'CSS selector to extract (e.g., "article", ".content")')
+    .option('--exclude <selectors...>', 'CSS selectors to exclude (e.g., ".sidebar" ".ads")')
+    .option('-H, --header <header...>', 'Custom headers (e.g., "Authorization: Bearer token")')
+    .option('--cookie <cookie...>', 'Cookies to set (e.g., "session=abc123")')
     .action(async (url, options) => {
     if (!url) {
         console.error('Error: URL is required\n');
@@ -65,12 +74,35 @@ program
             console.error('Error: Wait time must be between 0 and 60000ms');
             process.exit(1);
         }
+        // Parse custom headers
+        let headers;
+        if (options.header && options.header.length > 0) {
+            headers = {};
+            for (const header of options.header) {
+                const colonIndex = header.indexOf(':');
+                if (colonIndex === -1) {
+                    console.error(`Error: Invalid header format: ${header}`);
+                    console.error('Expected format: "Key: Value"');
+                    process.exit(1);
+                }
+                const key = header.slice(0, colonIndex).trim();
+                const value = header.slice(colonIndex + 1).trim();
+                headers[key] = value;
+            }
+        }
         // Build peel options
         const peelOptions = {
             render: options.render || false,
+            stealth: options.stealth || false,
             wait: options.wait || 0,
             timeout: options.timeout,
             userAgent: options.ua,
+            screenshot: options.screenshot !== undefined,
+            screenshotFullPage: options.fullPage || false,
+            selector: options.selector,
+            exclude: options.exclude,
+            headers,
+            cookies: options.cookie,
         };
         // Determine format
         if (options.html) {
@@ -87,12 +119,42 @@ program
         if (spinner) {
             spinner.succeed(`Fetched in ${result.elapsed}ms using ${result.method} method`);
         }
-        // Output results
+        // Handle screenshot saving
+        if (options.screenshot && result.screenshot) {
+            const screenshotPath = typeof options.screenshot === 'string'
+                ? options.screenshot
+                : 'screenshot.png';
+            const screenshotBuffer = Buffer.from(result.screenshot, 'base64');
+            writeFileSync(screenshotPath, screenshotBuffer);
+            if (!options.silent) {
+                console.error(`Screenshot saved to: ${screenshotPath}`);
+            }
+            // Remove screenshot from JSON output if saving to file
+            if (typeof options.screenshot === 'string') {
+                delete result.screenshot;
+            }
+        }
+        // Output results with proper stdout flushing
         if (options.json) {
-            console.log(JSON.stringify(result, null, 2));
+            const jsonStr = JSON.stringify(result, null, 2);
+            await new Promise((resolve, reject) => {
+                process.stdout.write(jsonStr + '\n', (err) => {
+                    if (err)
+                        reject(err);
+                    else
+                        resolve();
+                });
+            });
         }
         else {
-            console.log(result.content);
+            await new Promise((resolve, reject) => {
+                process.stdout.write(result.content + '\n', (err) => {
+                    if (err)
+                        reject(err);
+                    else
+                        resolve();
+                });
+            });
         }
         // Clean up and exit
         await cleanup();
@@ -112,15 +174,279 @@ program
         process.exit(1);
     }
 });
-// Future commands
+// Search command
+program
+    .command('search <query>')
+    .description('Search using DuckDuckGo')
+    .option('-n, --count <n>', 'Number of results (1-10)', '5')
+    .option('--json', 'Output as JSON')
+    .option('-s, --silent', 'Silent mode')
+    .action(async (query, options) => {
+    const isJson = options.json;
+    const isSilent = options.silent;
+    const count = parseInt(options.count) || 5;
+    const spinner = isSilent ? null : ora('Searching...').start();
+    try {
+        // Import the search function dynamically
+        const { fetch: undiciFetch } = await import('undici');
+        const { load } = await import('cheerio');
+        const searchUrl = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`;
+        const response = await undiciFetch(searchUrl, {
+            headers: {
+                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
+            },
+        });
+        if (!response.ok) {
+            throw new Error(`Search failed: HTTP ${response.status}`);
+        }
+        const html = await response.text();
+        const $ = load(html);
+        const results = [];
+        $('.result').each((_i, elem) => {
+            if (results.length >= count)
+                return;
+            const $result = $(elem);
+            const title = $result.find('.result__title').text().trim();
+            const rawUrl = $result.find('.result__a').attr('href') || '';
+            const snippet = $result.find('.result__snippet').text().trim();
+            if (!title || !rawUrl)
+                return;
+            // Extract actual URL from DuckDuckGo redirect
+            let url = rawUrl;
+            try {
+                const ddgUrl = new URL(rawUrl, 'https://duckduckgo.com');
+                const uddg = ddgUrl.searchParams.get('uddg');
+                if (uddg) {
+                    url = decodeURIComponent(uddg);
+                }
+            }
+            catch {
+                // Use raw URL if parsing fails
+            }
+            // Validate final URL
+            try {
+                const parsed = new URL(url);
+                if (!['http:', 'https:'].includes(parsed.protocol)) {
+                    return;
+                }
+                url = parsed.href;
+            }
+            catch {
+                return;
+            }
+            results.push({
+                title: title.slice(0, 200),
+                url,
+                snippet: snippet.slice(0, 500)
+            });
+        });
+        if (spinner) {
+            spinner.succeed(`Found ${results.length} results`);
+        }
+        if (isJson) {
+            const jsonStr = JSON.stringify(results, null, 2);
+            await new Promise((resolve, reject) => {
+                process.stdout.write(jsonStr + '\n', (err) => {
+                    if (err)
+                        reject(err);
+                    else
+                        resolve();
+                });
+            });
+        }
+        else {
+            for (const result of results) {
+                console.log(`\n${result.title}`);
+                console.log(result.url);
+                console.log(result.snippet);
+            }
+        }
+        process.exit(0);
+    }
+    catch (error) {
+        if (spinner) {
+            spinner.fail('Search failed');
+        }
+        if (error instanceof Error) {
+            console.error(`\nError: ${error.message}`);
+        }
+        else {
+            console.error('\nError: Unknown error occurred');
+        }
+        process.exit(1);
+    }
+});
+// Batch command
+program
+    .command('batch <file>')
+    .description('Fetch multiple URLs')
+    .option('-c, --concurrency <n>', 'Max concurrent fetches (default: 3)', '3')
+    .option('-o, --output <dir>', 'Output directory (one file per URL)')
+    .option('--json', 'Output as JSON array')
+    .option('-s, --silent', 'Silent mode')
+    .option('-r, --render', 'Use headless browser')
+    .option('--selector <css>', 'CSS selector to extract')
+    .action(async (file, options) => {
+    const isJson = options.json;
+    const isSilent = options.silent;
+    const shouldRender = options.render;
+    const selector = options.selector;
+    const spinner = isSilent ? null : ora('Loading URLs...').start();
+    try {
+        const { readFileSync } = await import('fs');
+        // Read URLs from file
+        let urls;
+        try {
+            const content = readFileSync(file, 'utf-8');
+            urls = content.split('\n')
+                .map(line => line.trim())
+                .filter(line => line && !line.startsWith('#'));
+        }
+        catch (error) {
+            throw new Error(`Failed to read file: ${file}`);
+        }
+        if (urls.length === 0) {
+            throw new Error('No URLs found in file');
+        }
+        if (spinner) {
+            spinner.text = `Fetching ${urls.length} URLs (concurrency: ${options.concurrency})...`;
+        }
+        // Batch fetch
+        const results = await peelBatch(urls, {
+            concurrency: parseInt(options.concurrency) || 3,
+            render: shouldRender,
+            selector: selector,
+        });
+        if (spinner) {
+            const successCount = results.filter(r => 'content' in r).length;
+            spinner.succeed(`Completed: ${successCount}/${urls.length} successful`);
+        }
+        // Output results
+        if (isJson) {
+            const jsonStr = JSON.stringify(results, null, 2);
+            await new Promise((resolve, reject) => {
+                process.stdout.write(jsonStr + '\n', (err) => {
+                    if (err)
+                        reject(err);
+                    else
+                        resolve();
+                });
+            });
+        }
+        else if (options.output) {
+            const { writeFileSync, mkdirSync } = await import('fs');
+            const { join } = await import('path');
+            // Create output directory
+            mkdirSync(options.output, { recursive: true });
+            results.forEach((result, i) => {
+                const urlObj = new URL(urls[i]);
+                const filename = `${i + 1}_${urlObj.hostname.replace(/[^a-z0-9]/gi, '_')}.md`;
+                const filepath = join(options.output, filename);
+                if ('content' in result) {
+                    writeFileSync(filepath, result.content);
+                }
+                else {
+                    writeFileSync(filepath, `Error: ${result.error}`);
+                }
+            });
+            if (!isSilent) {
+                console.log(`\nResults saved to: ${options.output}`);
+            }
+        }
+        else {
+            // Print results to stdout
+            results.forEach((result, i) => {
+                console.log(`\n=== ${urls[i]} ===\n`);
+                if ('content' in result) {
+                    console.log(result.content.slice(0, 500) + '...');
+                }
+                else {
+                    console.log(`Error: ${result.error}`);
+                }
+            });
+        }
+        await cleanup();
+        process.exit(0);
+    }
+    catch (error) {
+        if (spinner) {
+            spinner.fail('Batch fetch failed');
+        }
+        if (error instanceof Error) {
+            console.error(`\nError: ${error.message}`);
+        }
+        else {
+            console.error('\nError: Unknown error occurred');
+        }
+        await cleanup();
+        process.exit(1);
+    }
+});
 program
-    .command('search')
-    .argument('<query>', 'Search query')
-    .description('Search using DuckDuckGo (future)')
-    .action(() => {
-    console.log('Search command not yet implemented');
-    console.log('Coming soon: DuckDuckGo search integration');
-    process.exit(1);
+    .command('crawl <url>')
+    .description('Crawl a website starting from a URL')
+    .option('--max-pages <number>', 'Maximum number of pages to crawl (default: 10, max: 100)', parseInt, 10)
+    .option('--max-depth <number>', 'Maximum depth to crawl (default: 2, max: 5)', parseInt, 2)
+    .option('--allowed-domains <domains...>', 'Only crawl these domains (default: same as starting URL)')
+    .option('--exclude <patterns...>', 'Exclude URLs matching these regex patterns')
+    .option('--ignore-robots', 'Ignore robots.txt (default: respect robots.txt)')
+    .option('--rate-limit <ms>', 'Rate limit between requests in ms (default: 1000)', parseInt, 1000)
+    .option('-r, --render', 'Use headless browser for all pages')
+    .option('--stealth', 'Use stealth mode for all pages')
+    .option('-s, --silent', 'Silent mode (no spinner)')
+    .option('--json', 'Output as JSON')
+    .action(async (url, options) => {
+    const { crawl } = await import('./core/crawler.js');
+    const spinner = options.silent ? null : ora('Crawling...').start();
+    try {
+        const results = await crawl(url, {
+            maxPages: options.maxPages,
+            maxDepth: options.maxDepth,
+            allowedDomains: options.allowedDomains,
+            excludePatterns: options.exclude,
+            respectRobotsTxt: !options.ignoreRobots,
+            rateLimitMs: options.rateLimit,
+            render: options.render || false,
+            stealth: options.stealth || false,
+        });
+        if (spinner) {
+            spinner.succeed(`Crawled ${results.length} pages`);
+        }
+        if (options.json) {
+            console.log(JSON.stringify(results, null, 2));
+        }
+        else {
+            results.forEach((result, i) => {
+                console.log(`\n${'='.repeat(60)}`);
+                console.log(`[${i + 1}/${results.length}] ${result.title}`);
+                console.log(`URL: ${result.url}`);
+                console.log(`Depth: ${result.depth}${result.parent ? ` (from: ${result.parent})` : ''}`);
+                console.log(`Links found: ${result.links.length}`);
+                console.log(`Elapsed: ${result.elapsed}ms`);
+                if (result.error) {
+                    console.log(`ERROR: ${result.error}`);
+                }
+                else {
+                    console.log(`\n${result.markdown.slice(0, 500)}${result.markdown.length > 500 ? '...' : ''}`);
+                }
+            });
+        }
+        await cleanup();
+        process.exit(0);
+    }
+    catch (error) {
+        if (spinner) {
+            spinner.fail('Crawl failed');
+        }
+        if (error instanceof Error) {
+            console.error(`\nError: ${error.message}`);
+        }
+        else {
+            console.error('\nError: Unknown error occurred');
+        }
+        await cleanup();
+        process.exit(1);
+    }
 });
 program
     .command('serve')