webpeel 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +415 -0
  3. package/dist/cli.d.ts +16 -0
  4. package/dist/cli.d.ts.map +1 -0
  5. package/dist/cli.js +140 -0
  6. package/dist/cli.js.map +1 -0
  7. package/dist/core/fetcher.d.ts +32 -0
  8. package/dist/core/fetcher.d.ts.map +1 -0
  9. package/dist/core/fetcher.js +479 -0
  10. package/dist/core/fetcher.js.map +1 -0
  11. package/dist/core/markdown.d.ts +17 -0
  12. package/dist/core/markdown.d.ts.map +1 -0
  13. package/dist/core/markdown.js +143 -0
  14. package/dist/core/markdown.js.map +1 -0
  15. package/dist/core/metadata.d.ts +17 -0
  16. package/dist/core/metadata.d.ts.map +1 -0
  17. package/dist/core/metadata.js +159 -0
  18. package/dist/core/metadata.js.map +1 -0
  19. package/dist/core/strategies.d.ts +30 -0
  20. package/dist/core/strategies.d.ts.map +1 -0
  21. package/dist/core/strategies.js +67 -0
  22. package/dist/core/strategies.js.map +1 -0
  23. package/dist/index.d.ts +31 -0
  24. package/dist/index.d.ts.map +1 -0
  25. package/dist/index.js +81 -0
  26. package/dist/index.js.map +1 -0
  27. package/dist/mcp/server.d.ts +7 -0
  28. package/dist/mcp/server.d.ts.map +1 -0
  29. package/dist/mcp/server.js +248 -0
  30. package/dist/mcp/server.js.map +1 -0
  31. package/dist/server/app.d.ts +13 -0
  32. package/dist/server/app.d.ts.map +1 -0
  33. package/dist/server/app.js +89 -0
  34. package/dist/server/app.js.map +1 -0
  35. package/dist/server/auth-store.d.ts +28 -0
  36. package/dist/server/auth-store.d.ts.map +1 -0
  37. package/dist/server/auth-store.js +87 -0
  38. package/dist/server/auth-store.js.map +1 -0
  39. package/dist/server/middleware/auth.d.ts +18 -0
  40. package/dist/server/middleware/auth.d.ts.map +1 -0
  41. package/dist/server/middleware/auth.js +55 -0
  42. package/dist/server/middleware/auth.js.map +1 -0
  43. package/dist/server/middleware/rate-limit.d.ts +23 -0
  44. package/dist/server/middleware/rate-limit.d.ts.map +1 -0
  45. package/dist/server/middleware/rate-limit.js +85 -0
  46. package/dist/server/middleware/rate-limit.js.map +1 -0
  47. package/dist/server/routes/fetch.d.ts +7 -0
  48. package/dist/server/routes/fetch.d.ts.map +1 -0
  49. package/dist/server/routes/fetch.js +127 -0
  50. package/dist/server/routes/fetch.js.map +1 -0
  51. package/dist/server/routes/health.d.ts +6 -0
  52. package/dist/server/routes/health.d.ts.map +1 -0
  53. package/dist/server/routes/health.js +19 -0
  54. package/dist/server/routes/health.js.map +1 -0
  55. package/dist/server/routes/search.d.ts +7 -0
  56. package/dist/server/routes/search.d.ts.map +1 -0
  57. package/dist/server/routes/search.js +124 -0
  58. package/dist/server/routes/search.js.map +1 -0
  59. package/dist/types.d.ts +59 -0
  60. package/dist/types.d.ts.map +1 -0
  61. package/dist/types.js +30 -0
  62. package/dist/types.js.map +1 -0
  63. package/llms.txt +60 -0
  64. package/package.json +80 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Jake Liu
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,415 @@
1
+ # WebPeel
2
+
3
+ [![npm version](https://img.shields.io/npm/v/webpeel.svg)](https://www.npmjs.com/package/webpeel)
4
+ [![npm downloads](https://img.shields.io/npm/dm/webpeel.svg)](https://www.npmjs.com/package/webpeel)
5
+ [![CI](https://github.com/JakeLiuMe/webpeel/actions/workflows/ci.yml/badge.svg)](https://github.com/JakeLiuMe/webpeel/actions/workflows/ci.yml)
6
+ [![TypeScript](https://img.shields.io/badge/TypeScript-5.6-blue.svg)](https://www.typescriptlang.org/)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
8
+
9
+ Turn any web page into clean markdown. Zero config. Free forever.
10
+
11
+ ```bash
12
+ npx webpeel https://news.ycombinator.com
13
+ ```
14
+
15
+ **Output:**
16
+ ```markdown
17
+ # Hacker News
18
+
19
+ **New** | **Past** | **Comments** | **Ask** | **Show** | **Jobs** | **Submit**
20
+
21
+ ## Top Stories
22
+
23
+ 1. **Show HN: WebPeel – Turn any webpage into AI-ready markdown**
24
+ [https://github.com/JakeLiuMe/webpeel](https://github.com/JakeLiuMe/webpeel)
25
+ 142 points by jakeliu 2 hours ago | 31 comments
26
+
27
+ 2. **The End of the API Era**
28
+ ...
29
+ ```
30
+
31
+ ---
32
+
33
+ ## Why WebPeel?
34
+
35
+ | | **WebPeel** | Firecrawl | Jina Reader | MCP Fetch |
36
+ |---|:---:|:---:|:---:|:---:|
37
+ | **Local execution** | ✅ Free forever | ❌ Cloud only | ❌ Cloud only | ✅ Free |
38
+ | **JS rendering** | ✅ Auto-escalates | ✅ Always | ❌ No | ❌ No |
39
+ | **Anti-bot handling** | ✅ Stealth mode | ✅ Yes | ⚠️ Limited | ❌ No |
40
+ | **MCP Server** | ✅ Built-in | ✅ Separate repo | ❌ No | ✅ Yes |
41
+ | **Zero config** | ✅ `npx webpeel` | ❌ API key required | ❌ API key required | ✅ Yes |
42
+ | **Free tier** | ∞ Unlimited local | 500 pages/month | 1000 req/month | ∞ Local only |
43
+ | **Hosted API** | Coming soon | $16/mo (Starter) | $200/mo (Starter) | N/A |
44
+ | **Markdown output** | ✅ Optimized for AI | ✅ Yes | ✅ Yes | ⚠️ Basic |
45
+
46
+ **WebPeel gives you Firecrawl's power without the price tag.** Run locally for free, or use our hosted API when you need scale.
47
+
48
+ ---
49
+
50
+ ## Quick Start
51
+
52
+ ### CLI (Zero Install)
53
+
54
+ ```bash
55
+ # Basic usage
56
+ npx webpeel https://example.com
57
+
58
+ # JSON output with metadata
59
+ npx webpeel https://example.com --json
60
+
61
+ # Force browser rendering (for JS-heavy sites)
62
+ npx webpeel https://x.com/elonmusk --render
63
+
64
+ # Wait for dynamic content
65
+ npx webpeel https://example.com --render --wait 3000
66
+ ```
67
+
68
+ ### Library (TypeScript)
69
+
70
+ ```bash
71
+ npm install webpeel
72
+ ```
73
+
74
+ ```typescript
75
+ import { peel } from 'webpeel';
76
+
77
+ // Simple usage
78
+ const result = await peel('https://example.com');
79
+ console.log(result.content); // Clean markdown
80
+ console.log(result.metadata); // { title, description, author, ... }
81
+ console.log(result.tokens); // Estimated token count
82
+
83
+ // With options
84
+ const result = await peel('https://example.com', {
85
+ format: 'markdown', // 'markdown' | 'text' | 'html'
86
+ render: true, // Force browser mode
87
+ wait: 3000, // Wait 3s for dynamic content
88
+ timeout: 30000, // Request timeout (ms)
89
+ });
90
+ ```
91
+
92
+ ### MCP Server (Claude Desktop, Cursor, VS Code)
93
+
94
+ WebPeel provides two MCP tools: `webpeel_fetch` (fetch a URL) and `webpeel_search` (DuckDuckGo search + fetch results).
95
+
96
+ #### Claude Desktop
97
+
98
+ Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
99
+
100
+ ```json
101
+ {
102
+ "mcpServers": {
103
+ "webpeel": {
104
+ "command": "npx",
105
+ "args": ["-y", "webpeel", "mcp"]
106
+ }
107
+ }
108
+ }
109
+ ```
110
+
111
+ #### Cursor
112
+
113
+ Add to Cursor Settings → MCP Servers:
114
+
115
+ ```json
116
+ {
117
+ "mcpServers": {
118
+ "webpeel": {
119
+ "command": "npx",
120
+ "args": ["-y", "webpeel", "mcp"]
121
+ }
122
+ }
123
+ }
124
+ ```
125
+
126
+ #### VS Code (with Cline or other MCP clients)
127
+
128
+ Create or edit `~/.vscode/mcp.json`:
129
+
130
+ ```json
131
+ {
132
+ "mcpServers": {
133
+ "webpeel": {
134
+ "command": "npx",
135
+ "args": ["-y", "webpeel", "mcp"]
136
+ }
137
+ }
138
+ }
139
+ ```
140
+
141
+ Or install with one click:
142
+
143
+ [![Install in Claude Desktop](https://img.shields.io/badge/Install-Claude%20Desktop-5B3FFF?style=for-the-badge&logo=anthropic)](https://mcp.so/install/webpeel?for=claude)
144
+ [![Install in VS Code](https://img.shields.io/badge/Install-VS%20Code-007ACC?style=for-the-badge&logo=visualstudiocode)](https://mcp.so/install/webpeel?for=vscode)
145
+
146
+ ---
147
+
148
+ ## How It Works: Smart Escalation
149
+
150
+ WebPeel tries the fastest method first, then escalates only when needed:
151
+
152
+ ```
153
+ ┌─────────────────────────────────────────────────────────────┐
154
+ │ Smart Escalation │
155
+ └─────────────────────────────────────────────────────────────┘
156
+
157
+ Simple HTTP Fetch Browser Rendering Stealth Mode
158
+ ~200ms ~2 seconds ~5 seconds
159
+ │ │ │
160
+ ├─ User-Agent headers ├─ Full JS execution ├─ Anti-detect
161
+ ├─ Cheerio parsing ├─ Wait for content ├─ Proxy rotation
162
+ ├─ Fast & cheap ├─ Screenshots └─ Cloudflare bypass
163
+ │ │
164
+ ▼ ▼
165
+ Works for 80% Works for 19% Works for 1%
166
+ of websites (JS-heavy sites) (heavily protected)
167
+ ```
168
+
169
+ **Why this matters:**
170
+ - **Speed**: Don't waste 2 seconds rendering when 200ms will do
171
+ - **Cost**: Headless browsers burn CPU and memory
172
+ - **Reliability**: Auto-retry with browser if simple fetch fails
173
+
174
+ WebPeel automatically detects blocked requests (403, 503, Cloudflare challenges) and retries with browser mode. You get the best of both worlds.
175
+
176
+ ---
177
+
178
+ ## API Reference
179
+
180
+ ### `peel(url, options?)`
181
+
182
+ Fetch and extract content from a URL.
183
+
184
+ ```typescript
185
+ interface PeelOptions {
186
+ render?: boolean; // Force browser mode (default: false)
187
+ wait?: number; // Wait time after page load in ms (default: 0)
188
+ format?: 'markdown' | 'text' | 'html'; // Output format (default: 'markdown')
189
+ timeout?: number; // Request timeout in ms (default: 30000)
190
+ userAgent?: string; // Custom user agent
191
+ }
192
+
193
+ interface PeelResult {
194
+ url: string; // Final URL (after redirects)
195
+ title: string; // Page title
196
+ content: string; // Page content in requested format
197
+ metadata: { // Extracted metadata
198
+ description?: string;
199
+ author?: string;
200
+ published?: string; // ISO 8601 date
201
+ image?: string; // Open Graph image
202
+ canonical?: string;
203
+ };
204
+ links: string[]; // All links on page (absolute URLs)
205
+ tokens: number; // Estimated token count
206
+ method: 'simple' | 'browser'; // Method used
207
+ elapsed: number; // Time taken (ms)
208
+ }
209
+ ```
210
+
211
+ ### Error Types
212
+
213
+ ```typescript
214
+ import { TimeoutError, BlockedError, NetworkError } from 'webpeel';
215
+
216
+ try {
217
+ const result = await peel('https://example.com');
218
+ } catch (error) {
219
+ if (error instanceof TimeoutError) {
220
+ // Request timed out
221
+ } else if (error instanceof BlockedError) {
222
+ // Site blocked the request (403, Cloudflare, etc.)
223
+ } else if (error instanceof NetworkError) {
224
+ // Network/DNS error
225
+ }
226
+ }
227
+ ```
228
+
229
+ ### `cleanup()`
230
+
231
+ Clean up browser resources. Call this when you're done using WebPeel in your application:
232
+
233
+ ```typescript
234
+ import { peel, cleanup } from 'webpeel';
235
+
236
+ // ... use peel() ...
237
+
238
+ await cleanup(); // Close browser instances
239
+ ```
240
+
241
+ ---
242
+
243
+ ## Hosted API (Coming Soon)
244
+
245
+ Run WebPeel locally for free, or use our hosted API for scale:
246
+
247
+ | Plan | Price | Requests/Month | Features |
248
+ |------|------:|---------------:|----------|
249
+ | **Free** | $0 | Unlimited local | CLI, library, MCP server |
250
+ | **Hosted Free** | $0 | 1,000 | API access, no credit card |
251
+ | **Pro** | $9 | 50,000 | Priority queue, 99.9% SLA |
252
+ | **Scale** | $29 | 250,000 | Dedicated instances, webhook support |
253
+
254
+ **Compare:** Firecrawl Starter is $16/mo for 3,000 requests. Our Pro tier gives you 50,000 for $9/mo.
255
+
256
+ Join the waitlist at [webpeel.dev](https://webpeel.dev)
257
+
258
+ ---
259
+
260
+ ## Examples
261
+
262
+ ### Extract blog post metadata
263
+
264
+ ```typescript
265
+ const result = await peel('https://example.com/blog/post');
266
+
267
+ console.log(result.metadata);
268
+ // {
269
+ // title: "How We Built WebPeel",
270
+ // description: "A deep dive into smart escalation...",
271
+ // author: "Jake Liu",
272
+ // published: "2026-02-12T18:00:00Z",
273
+ // image: "https://example.com/og-image.png"
274
+ // }
275
+ ```
276
+
277
+ ### Get all links from a page
278
+
279
+ ```typescript
280
+ const result = await peel('https://news.ycombinator.com');
281
+
282
+ console.log(result.links.slice(0, 5));
283
+ // [
284
+ // "https://news.ycombinator.com/newest",
285
+ // "https://news.ycombinator.com/submit",
286
+ // "https://github.com/example/repo",
287
+ // ...
288
+ // ]
289
+ ```
290
+
291
+ ### Force browser rendering for JavaScript-heavy sites
292
+
293
+ ```typescript
294
+ // Twitter/X requires JavaScript
295
+ const result = await peel('https://x.com/elonmusk', {
296
+ render: true,
297
+ wait: 2000, // Wait for tweets to load
298
+ });
299
+
300
+ console.log(result.content); // Rendered tweet content
301
+ ```
302
+
303
+ ### Token counting for LLM usage
304
+
305
+ ```typescript
306
+ const result = await peel('https://example.com/long-article');
307
+
308
+ console.log(`Content is ~${result.tokens} tokens`);
309
+ // Content is ~3,247 tokens
310
+
311
+ if (result.tokens > 4000) {
312
+ console.log('Too long for GPT-3.5 context window');
313
+ }
314
+ ```
315
+
316
+ ---
317
+
318
+ ## Use Cases
319
+
320
+ - **AI Agents**: Feed web content to Claude, GPT, or local LLMs
321
+ - **Research**: Bulk extract articles, docs, or social media
322
+ - **Monitoring**: Track content changes on websites
323
+ - **Archiving**: Save web pages as clean markdown
324
+ - **Data Pipelines**: Extract structured data from web sources
325
+
326
+ ---
327
+
328
+ ## Development
329
+
330
+ ```bash
331
+ # Clone the repo
332
+ git clone https://github.com/JakeLiuMe/webpeel.git
333
+ cd webpeel
334
+
335
+ # Install dependencies
336
+ npm install
337
+
338
+ # Build
339
+ npm run build
340
+
341
+ # Run tests
342
+ npm test
343
+
344
+ # Watch mode (auto-rebuild)
345
+ npm run dev
346
+
347
+ # Test the CLI locally
348
+ node dist/cli.js https://example.com
349
+
350
+ # Test the MCP server
351
+ npm run mcp
352
+ ```
353
+
354
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
355
+
356
+ ---
357
+
358
+ ## Roadmap
359
+
360
+ - [x] CLI with smart escalation
361
+ - [x] TypeScript library
362
+ - [x] MCP server for Claude/Cursor/VS Code
363
+ - [ ] Hosted API with authentication
364
+ - [ ] Rate limiting and caching
365
+ - [ ] Batch processing API
366
+ - [ ] Screenshot capture
367
+ - [ ] PDF extraction
368
+ - [ ] Webhook notifications for monitoring
369
+
370
+ Vote on features and roadmap at [GitHub Discussions](https://github.com/JakeLiuMe/webpeel/discussions).
371
+
372
+ ---
373
+
374
+ ## FAQ
375
+
376
+ **Q: How is this different from Firecrawl?**
377
+ A: WebPeel runs locally for free (Firecrawl is cloud-only). We also have smart escalation to avoid burning resources on simple pages.
378
+
379
+ **Q: Can I self-host the API server?**
380
+ A: Yes! Run `npm run serve` to start the API server. See [docs/self-hosting.md](docs/self-hosting.md) (coming soon).
381
+
382
+ **Q: Does this violate websites' Terms of Service?**
383
+ A: WebPeel respects `robots.txt` by default. Always check a site's ToS before scraping at scale.
384
+
385
+ **Q: What about CAPTCHA and Cloudflare?**
386
+ A: WebPeel handles most Cloudflare challenges automatically. For CAPTCHAs, you'll need a solving service (not included).
387
+
388
+ **Q: Can I use this in production?**
389
+ A: Yes, but be mindful of rate limits. The hosted API (coming soon) is better for high-volume production use.
390
+
391
+ ---
392
+
393
+ ## Credits
394
+
395
+ Built with:
396
+ - [Playwright](https://playwright.dev/) — Headless browser automation
397
+ - [Cheerio](https://cheerio.js.org/) — Fast HTML parsing
398
+ - [Turndown](https://github.com/mixmark-io/turndown) — HTML to Markdown conversion
399
+ - [Commander](https://github.com/tj/commander.js) — CLI framework
400
+
401
+ ---
402
+
403
+ ## Contributing
404
+
405
+ Contributions are welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
406
+
407
+ ---
408
+
409
+ ## License
410
+
411
+ MIT © [Jake Liu](https://github.com/JakeLiuMe)
412
+
413
+ ---
414
+
415
+ **Like WebPeel?** [⭐ Star us on GitHub](https://github.com/JakeLiuMe/webpeel) — it helps others discover the project!
package/dist/cli.d.ts ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * WebPeel CLI
4
+ *
5
+ * Usage:
6
+ * npx webpeel <url> - Fetch and convert to markdown
7
+ * npx webpeel <url> --json - Output as JSON
8
+ * npx webpeel <url> --html - Output raw HTML
9
+ * npx webpeel <url> --render - Force browser mode
10
+ * npx webpeel <url> --wait 5000 - Wait 5s for JS to load
11
+ * npx webpeel search "query" - DuckDuckGo search
12
+ * npx webpeel serve - Start API server (future)
13
+ * npx webpeel mcp - Start MCP server (future)
14
+ */
15
+ export {};
16
+ //# sourceMappingURL=cli.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA;;;;;;;;;;;;GAYG"}
package/dist/cli.js ADDED
@@ -0,0 +1,140 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * WebPeel CLI
4
+ *
5
+ * Usage:
6
+ * npx webpeel <url> - Fetch and convert to markdown
7
+ * npx webpeel <url> --json - Output as JSON
8
+ * npx webpeel <url> --html - Output raw HTML
9
+ * npx webpeel <url> --render - Force browser mode
10
+ * npx webpeel <url> --wait 5000 - Wait 5s for JS to load
11
+ * npx webpeel search "query" - DuckDuckGo search
12
+ * npx webpeel serve - Start API server (future)
13
+ * npx webpeel mcp - Start MCP server (future)
14
+ */
15
+ import { Command } from 'commander';
16
+ import ora from 'ora';
17
+ import { peel, cleanup } from './index.js';
18
+ const program = new Command();
19
+ program
20
+ .name('webpeel')
21
+ .description('Fast web fetcher for AI agents')
22
+ .version('0.1.0');
23
+ program
24
+ .argument('[url]', 'URL to fetch')
25
+ .option('-r, --render', 'Use headless browser (for JS-heavy sites)')
26
+ .option('-w, --wait <ms>', 'Wait time after page load (ms)', parseInt)
27
+ .option('--html', 'Output raw HTML instead of markdown')
28
+ .option('--text', 'Output plain text instead of markdown')
29
+ .option('--json', 'Output as JSON')
30
+ .option('-t, --timeout <ms>', 'Request timeout (ms)', parseInt, 30000)
31
+ .option('--ua <agent>', 'Custom user agent')
32
+ .option('-s, --silent', 'Silent mode (no spinner)')
33
+ .action(async (url, options) => {
34
+ if (!url) {
35
+ console.error('Error: URL is required\n');
36
+ program.help();
37
+ process.exit(1);
38
+ }
39
+ // SECURITY: Enhanced URL validation
40
+ if (url.length > 2048) {
41
+ console.error('Error: URL too long (max 2048 characters)');
42
+ process.exit(1);
43
+ }
44
+ // Check for control characters
45
+ if (/[\x00-\x1F\x7F]/.test(url)) {
46
+ console.error('Error: URL contains invalid control characters');
47
+ process.exit(1);
48
+ }
49
+ // Validate URL format
50
+ try {
51
+ const parsed = new URL(url);
52
+ if (!['http:', 'https:'].includes(parsed.protocol)) {
53
+ console.error('Error: Only HTTP and HTTPS protocols are allowed');
54
+ process.exit(1);
55
+ }
56
+ }
57
+ catch {
58
+ console.error(`Error: Invalid URL format: ${url}`);
59
+ process.exit(1);
60
+ }
61
+ const spinner = options.silent ? null : ora('Fetching...').start();
62
+ try {
63
+ // Validate options
64
+ if (options.wait && (options.wait < 0 || options.wait > 60000)) {
65
+ console.error('Error: Wait time must be between 0 and 60000ms');
66
+ process.exit(1);
67
+ }
68
+ // Build peel options
69
+ const peelOptions = {
70
+ render: options.render || false,
71
+ wait: options.wait || 0,
72
+ timeout: options.timeout,
73
+ userAgent: options.ua,
74
+ };
75
+ // Determine format
76
+ if (options.html) {
77
+ peelOptions.format = 'html';
78
+ }
79
+ else if (options.text) {
80
+ peelOptions.format = 'text';
81
+ }
82
+ else {
83
+ peelOptions.format = 'markdown';
84
+ }
85
+ // Fetch the page
86
+ const result = await peel(url, peelOptions);
87
+ if (spinner) {
88
+ spinner.succeed(`Fetched in ${result.elapsed}ms using ${result.method} method`);
89
+ }
90
+ // Output results
91
+ if (options.json) {
92
+ console.log(JSON.stringify(result, null, 2));
93
+ }
94
+ else {
95
+ console.log(result.content);
96
+ }
97
+ // Clean up and exit
98
+ await cleanup();
99
+ process.exit(0);
100
+ }
101
+ catch (error) {
102
+ if (spinner) {
103
+ spinner.fail('Failed to fetch');
104
+ }
105
+ if (error instanceof Error) {
106
+ console.error(`\nError: ${error.message}`);
107
+ }
108
+ else {
109
+ console.error('\nError: Unknown error occurred');
110
+ }
111
+ await cleanup();
112
+ process.exit(1);
113
+ }
114
+ });
115
+ // Future commands
116
+ program
117
+ .command('search')
118
+ .argument('<query>', 'Search query')
119
+ .description('Search using DuckDuckGo (future)')
120
+ .action(() => {
121
+ console.log('Search command not yet implemented');
122
+ console.log('Coming soon: DuckDuckGo search integration');
123
+ process.exit(1);
124
+ });
125
+ program
126
+ .command('serve')
127
+ .description('Start API server')
128
+ .option('-p, --port <port>', 'Port number', '3000')
129
+ .action(async (options) => {
130
+ const { startServer } = await import('./server/app.js');
131
+ startServer({ port: parseInt(options.port, 10) });
132
+ });
133
+ program
134
+ .command('mcp')
135
+ .description('Start MCP server for Claude Desktop / Cursor')
136
+ .action(async () => {
137
+ await import('./mcp/server.js');
138
+ });
139
+ program.parse();
140
+ //# sourceMappingURL=cli.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,GAAG,MAAM,KAAK,CAAC;AACtB,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AAG3C,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,SAAS,CAAC;KACf,WAAW,CAAC,gCAAgC,CAAC;KAC7C,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,OAAO;KACJ,QAAQ,CAAC,OAAO,EAAE,cAAc,CAAC;KACjC,MAAM,CAAC,cAAc,EAAE,2CAA2C,CAAC;KACnE,MAAM,CAAC,iBAAiB,EAAE,gCAAgC,EAAE,QAAQ,CAAC;KACrE,MAAM,CAAC,QAAQ,EAAE,qCAAqC,CAAC;KACvD,MAAM,CAAC,QAAQ,EAAE,uCAAuC,CAAC;KACzD,MAAM,CAAC,QAAQ,EAAE,gBAAgB,CAAC;KAClC,MAAM,CAAC,oBAAoB,EAAE,sBAAsB,EAAE,QAAQ,EAAE,KAAK,CAAC;KACrE,MAAM,CAAC,cAAc,EAAE,mBAAmB,CAAC;KAC3C,MAAM,CAAC,cAAc,EAAE,0BAA0B,CAAC;KAClD,MAAM,CAAC,KAAK,EAAE,GAAuB,EAAE,OAAO,EAAE,EAAE;IACjD,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;QAC1C,OAAO,CAAC,IAAI,EAAE,CAAC;QACf,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,oCAAoC;IACpC,IAAI,GAAG,CAAC,MAAM,GAAG,IAAI,EAAE,CAAC;QACtB,OAAO,CAAC,KAAK,CAAC,2CAA2C,CAAC,CAAC;QAC3D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,+BAA+B;IAC/B,IAAI,iBAAiB,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QAChC,OAAO,CAAC,KAAK,CAAC,gDAAgD,CAAC,CAAC;QAChE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,sBAAsB;IACtB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC5B,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;YACnD,OAAO,CAAC,KAAK,CAAC,kDAAkD,CAAC,CAAC;YAClE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,KAAK,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAC;QACnD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,KAAK,EAAE,CAAC;IAEnE,IAAI,CAAC;QACH,mBAAmB;QACnB,IAAI,OAAO,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC,IAAI,OAAO,CAAC,IAAI,GAAG,KAAK,CAAC,EAAE,CAAC;YAC/D,OAAO,CAAC,KAAK,CAAC,gDAAgD,CAAC,CAAC;YAChE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,qBAAqB;QACrB,MAAM,WAAW,GAAgB;YAC/B,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,KAAK;YAC/B,IAAI,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC;YACvB,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,SAAS,EAAE,OAAO,CAAC,EAAE;SACtB,CAAC;QAEF,mBAAmB;QACnB,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,WAAW,CAAC,MAAM,GAAG,MAAM,CAAC;QAC9B,CAAC;aAAM,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACxB,WAAW,CAAC,MAAM,GAAG,MAAM,CAAC;QAC9B,CAAC;aAAM,CAAC;YACN,WAAW,CAAC,MAAM,GAAG,UAAU,CAAC;QAClC,CAAC;QAED,iBAAiB;QACjB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,CAAC;QAE5C,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,OAAO,CAAC,cAAc,MAAM,CAAC,OAAO,YAAY,MAAM,CAAC,MAAM,SAAS,CAAC,CAAC;QAClF,CAAC;QAED,iBAAiB;QACjB,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/C,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC9B,CAAC;QAED,oBAAoB;QACpB,MAAM,OAAO,EAAE,CAAC;QAChB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;QAClC,CAAC;QAED,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,YAAY,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC7C,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,iCAAiC,CAAC,CAAC;QACnD,CAAC;QAED,MAAM,OAAO,EAAE,CAAC;QAChB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,kBAAkB;AAClB,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,QAAQ,CAAC,SAAS,EAAE,cAAc,CAAC;KACnC,WAAW,CAAC,kCAAkC,CAAC;KAC/C,MAAM,CAAC,GAAG,EAAE;IACX,OAAO,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;IAClD,OAAO,CAAC,GAAG,CAAC,4CAA4C,CAAC,CAAC;IAC1D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,OAAO,CAAC;KAChB,WAAW,CAAC,kBAAkB,CAAC;KAC/B,MAAM,CAAC,mBAAmB,EAAE,aAAa,EAAE,MAAM,CAAC;KAClD,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;IACxB,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;IACxD,WAAW,CAAC,EAAE,IAAI,EAAE,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC;AACpD,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,KAAK,CAAC;KACd,WAAW,CAAC,8CAA8C,CAAC;KAC3D,MAAM,CAAC,KAAK,IAAI,EAAE;IACjB,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;AAClC,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,EAAE,CAAC"}
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Core fetching logic: simple HTTP and browser-based fetching
3
+ */
4
+ export interface FetchResult {
5
+ html: string;
6
+ url: string;
7
+ statusCode?: number;
8
+ }
9
+ /**
10
+ * Simple HTTP fetch using native fetch + Cheerio
11
+ * Fast and lightweight, but can be blocked by Cloudflare/bot detection
12
+ * SECURITY: Manual redirect handling with SSRF re-validation
13
+ */
14
+ export declare function simpleFetch(url: string, userAgent?: string, timeoutMs?: number): Promise<FetchResult>;
15
+ /**
16
+ * Fetch using headless Chromium via Playwright
17
+ * Slower but can handle JavaScript-heavy sites and bypass some bot detection
18
+ */
19
+ export declare function browserFetch(url: string, options?: {
20
+ userAgent?: string;
21
+ waitMs?: number;
22
+ timeoutMs?: number;
23
+ }): Promise<FetchResult>;
24
+ /**
25
+ * Retry a fetch operation with exponential backoff
26
+ */
27
+ export declare function retryFetch<T>(fn: () => Promise<T>, maxAttempts?: number, baseDelayMs?: number): Promise<T>;
28
+ /**
29
+ * Clean up browser resources
30
+ */
31
+ export declare function cleanup(): Promise<void>;
32
+ //# sourceMappingURL=fetcher.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/core/fetcher.ts"],"names":[],"mappings":"AAAA;;GAEG;AA2PH,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;;;GAIG;AACH,wBAAsB,WAAW,CAC/B,GAAG,EAAE,MAAM,EACX,SAAS,CAAC,EAAE,MAAM,EAClB,SAAS,GAAE,MAAc,GACxB,OAAO,CAAC,WAAW,CAAC,CAyItB;AAuBD;;;GAGG;AACH,wBAAsB,YAAY,CAChC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IACP,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;CACf,GACL,OAAO,CAAC,WAAW,CAAC,CAoGtB;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,CAAC,EAChC,EAAE,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,EACpB,WAAW,GAAE,MAAU,EACvB,WAAW,GAAE,MAAa,GACzB,OAAO,CAAC,CAAC,CAAC,CAsBZ;AAED;;GAEG;AACH,wBAAsB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAK7C"}