webpeel 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/LICENSE +11 -657
  2. package/README.md +246 -325
  3. package/dist/cli.js +330 -73
  4. package/dist/cli.js.map +1 -1
  5. package/dist/core/browser-fetch.d.ts +12 -0
  6. package/dist/core/browser-fetch.d.ts.map +1 -1
  7. package/dist/core/browser-fetch.js +70 -17
  8. package/dist/core/browser-fetch.js.map +1 -1
  9. package/dist/core/cf-worker-proxy.d.ts +33 -0
  10. package/dist/core/cf-worker-proxy.d.ts.map +1 -0
  11. package/dist/core/cf-worker-proxy.js +88 -0
  12. package/dist/core/cf-worker-proxy.js.map +1 -0
  13. package/dist/core/chunker.d.ts +47 -0
  14. package/dist/core/chunker.d.ts.map +1 -0
  15. package/dist/core/chunker.js +250 -0
  16. package/dist/core/chunker.js.map +1 -0
  17. package/dist/core/cloak-fetch.d.ts +43 -0
  18. package/dist/core/cloak-fetch.d.ts.map +1 -0
  19. package/dist/core/cloak-fetch.js +141 -0
  20. package/dist/core/cloak-fetch.js.map +1 -0
  21. package/dist/core/crawl-checkpoint.d.ts +55 -0
  22. package/dist/core/crawl-checkpoint.d.ts.map +1 -0
  23. package/dist/core/crawl-checkpoint.js +105 -0
  24. package/dist/core/crawl-checkpoint.js.map +1 -0
  25. package/dist/core/crawler.d.ts +5 -1
  26. package/dist/core/crawler.d.ts.map +1 -1
  27. package/dist/core/crawler.js +60 -5
  28. package/dist/core/crawler.js.map +1 -1
  29. package/dist/core/cycle-fetch.d.ts +27 -0
  30. package/dist/core/cycle-fetch.d.ts.map +1 -0
  31. package/dist/core/cycle-fetch.js +99 -0
  32. package/dist/core/cycle-fetch.js.map +1 -0
  33. package/dist/core/domain-extractors.d.ts.map +1 -1
  34. package/dist/core/domain-extractors.js +754 -14
  35. package/dist/core/domain-extractors.js.map +1 -1
  36. package/dist/core/google-cache.d.ts +30 -0
  37. package/dist/core/google-cache.d.ts.map +1 -0
  38. package/dist/core/google-cache.js +181 -0
  39. package/dist/core/google-cache.js.map +1 -0
  40. package/dist/core/markdown.d.ts +11 -0
  41. package/dist/core/markdown.d.ts.map +1 -1
  42. package/dist/core/markdown.js +43 -0
  43. package/dist/core/markdown.js.map +1 -1
  44. package/dist/core/peel-tls.d.ts +26 -0
  45. package/dist/core/peel-tls.d.ts.map +1 -0
  46. package/dist/core/peel-tls.js +221 -0
  47. package/dist/core/peel-tls.js.map +1 -0
  48. package/dist/core/pipeline.d.ts +5 -1
  49. package/dist/core/pipeline.d.ts.map +1 -1
  50. package/dist/core/pipeline.js +269 -21
  51. package/dist/core/pipeline.js.map +1 -1
  52. package/dist/core/schema-postprocess.d.ts +33 -0
  53. package/dist/core/schema-postprocess.d.ts.map +1 -0
  54. package/dist/core/schema-postprocess.js +470 -0
  55. package/dist/core/schema-postprocess.js.map +1 -0
  56. package/dist/core/schema-templates.d.ts +20 -0
  57. package/dist/core/schema-templates.d.ts.map +1 -0
  58. package/dist/core/schema-templates.js +131 -0
  59. package/dist/core/schema-templates.js.map +1 -0
  60. package/dist/core/search-fallback.d.ts +28 -0
  61. package/dist/core/search-fallback.d.ts.map +1 -0
  62. package/dist/core/search-fallback.js +185 -0
  63. package/dist/core/search-fallback.js.map +1 -0
  64. package/dist/core/search-provider.d.ts +47 -4
  65. package/dist/core/search-provider.d.ts.map +1 -1
  66. package/dist/core/search-provider.js +278 -7
  67. package/dist/core/search-provider.js.map +1 -1
  68. package/dist/core/stealth-patches.d.ts +58 -0
  69. package/dist/core/stealth-patches.d.ts.map +1 -0
  70. package/dist/core/stealth-patches.js +340 -0
  71. package/dist/core/stealth-patches.js.map +1 -0
  72. package/dist/core/strategies.d.ts +20 -0
  73. package/dist/core/strategies.d.ts.map +1 -1
  74. package/dist/core/strategies.js +284 -48
  75. package/dist/core/strategies.js.map +1 -1
  76. package/dist/core/strategy-hooks.d.ts +1 -1
  77. package/dist/core/strategy-hooks.d.ts.map +1 -1
  78. package/dist/index.d.ts +11 -0
  79. package/dist/index.d.ts.map +1 -1
  80. package/dist/index.js +37 -15
  81. package/dist/index.js.map +1 -1
  82. package/dist/mcp/server.js +109 -4
  83. package/dist/mcp/server.js.map +1 -1
  84. package/dist/server/app.d.ts.map +1 -1
  85. package/dist/server/app.js +29 -0
  86. package/dist/server/app.js.map +1 -1
  87. package/dist/server/middleware/rate-limit.d.ts +2 -1
  88. package/dist/server/middleware/rate-limit.d.ts.map +1 -1
  89. package/dist/server/middleware/rate-limit.js +24 -8
  90. package/dist/server/middleware/rate-limit.js.map +1 -1
  91. package/dist/server/routes/agent.d.ts +4 -0
  92. package/dist/server/routes/agent.d.ts.map +1 -1
  93. package/dist/server/routes/agent.js +196 -9
  94. package/dist/server/routes/agent.js.map +1 -1
  95. package/dist/server/routes/batch.js +5 -5
  96. package/dist/server/routes/batch.js.map +1 -1
  97. package/dist/server/routes/compat.d.ts.map +1 -1
  98. package/dist/server/routes/compat.js +1 -0
  99. package/dist/server/routes/compat.js.map +1 -1
  100. package/dist/server/routes/fetch.d.ts.map +1 -1
  101. package/dist/server/routes/fetch.js +60 -6
  102. package/dist/server/routes/fetch.js.map +1 -1
  103. package/dist/server/routes/mcp.d.ts.map +1 -1
  104. package/dist/server/routes/mcp.js +103 -2
  105. package/dist/server/routes/mcp.js.map +1 -1
  106. package/dist/server/routes/search.js +1 -1
  107. package/dist/server/routes/search.js.map +1 -1
  108. package/dist/types.d.ts +55 -4
  109. package/dist/types.d.ts.map +1 -1
  110. package/dist/types.js +4 -1
  111. package/dist/types.js.map +1 -1
  112. package/llms.txt +55 -125
  113. package/package.json +15 -1
package/README.md CHANGED
@@ -1,447 +1,368 @@
1
1
  <p align="center">
2
2
  <a href="https://webpeel.dev">
3
- <img src=".github/banner.svg" alt="WebPeel — Web fetching for AI agents" width="100%">
3
+ <img src=".github/banner.svg" alt="WebPeel — Web data API for AI agents" width="100%">
4
4
  </a>
5
5
  </p>
6
6
 
7
7
  <p align="center">
8
- <a href="https://www.npmjs.com/package/webpeel"><img src="https://img.shields.io/npm/v/webpeel.svg" alt="npm version"></a>
9
- <a href="https://pypi.org/project/webpeel/"><img src="https://img.shields.io/pypi/v/webpeel.svg" alt="PyPI version"></a>
10
- <a href="https://www.npmjs.com/package/webpeel"><img src="https://img.shields.io/npm/dm/webpeel.svg" alt="downloads"></a>
11
- <a href="https://github.com/webpeel/webpeel/stargazers"><img src="https://img.shields.io/github/stars/webpeel/webpeel.svg" alt="GitHub stars"></a>
12
- <a href="https://github.com/webpeel/webpeel/actions/workflows/ci.yml"><img src="https://github.com/webpeel/webpeel/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
13
- <a href="https://www.gnu.org/licenses/agpl-3.0"><img src="https://img.shields.io/badge/License-AGPL%20v3-blue.svg" alt="AGPL v3"></a>
8
+ <a href="https://www.npmjs.com/package/webpeel"><img src="https://img.shields.io/npm/v/webpeel.svg?style=flat-square" alt="npm version"></a>
9
+ <a href="https://pypi.org/project/webpeel/"><img src="https://img.shields.io/pypi/v/webpeel.svg?style=flat-square" alt="PyPI version"></a>
10
+ <a href="https://www.npmjs.com/package/webpeel"><img src="https://img.shields.io/npm/dm/webpeel.svg?style=flat-square" alt="Monthly downloads"></a>
11
+ <a href="https://github.com/webpeel/webpeel/stargazers"><img src="https://img.shields.io/github/stars/webpeel/webpeel.svg?style=flat-square" alt="GitHub stars"></a>
12
+ <a href="LICENSE"><img src="https://img.shields.io/badge/license-WebPeel%20SDK-blue.svg?style=flat-square" alt="License"></a>
13
+ <a href="https://status.webpeel.dev"><img src="https://img.shields.io/badge/status-operational-brightgreen.svg?style=flat-square" alt="Status"></a>
14
14
  </p>
15
15
 
16
16
  <p align="center">
17
- <strong>Web intelligence for AI agents.</strong><br>
18
- Fetch any URL clean markdown. YouTube transcripts. Reddit threads. Quick answers. No API keys needed.
17
+ <strong>The web data API for AI agents.</strong><br>
18
+ Fetch, search, extract, and understand any webpage with one API call.
19
19
  </p>
20
20
 
21
21
  <p align="center">
22
- <a href="https://webpeel.dev">Website</a> ·
23
22
  <a href="https://webpeel.dev/docs">Docs</a> ·
24
- <a href="https://webpeel.dev/playground">Playground</a> ·
25
23
  <a href="https://app.webpeel.dev">Dashboard</a> ·
26
- <a href="https://github.com/webpeel/webpeel/discussions">Discussions</a>
24
+ <a href="https://webpeel.dev/docs/api">API Reference</a> ·
25
+ <a href="https://discord.gg/webpeel">Discord</a> ·
26
+ <a href="https://status.webpeel.dev">Status</a>
27
27
  </p>
28
28
 
29
29
  ---
30
30
 
31
- > **WebPeel** gives AI agents reliable web access in one call. It handles JavaScript rendering, bot detection, and content extraction automatically — your agent gets clean, structured data. 18 MCP tools, 1,098 tests, 100% open source.
31
+ ## Get Started
32
32
 
33
- ---
34
-
35
- ## 🚀 Quick Start
33
+ ### Install
36
34
 
37
35
  ```bash
38
- npx webpeel "https://example.com"
39
- ```
40
-
41
- **More examples:**
36
+ # Node.js / TypeScript
37
+ npm install webpeel
42
38
 
43
- ```bash
44
- # YouTube transcript — no API key!
45
- npx webpeel "https://youtube.com/watch?v=dQw4w9WgXcQ"
46
-
47
- # Ask any page a question — no LLM key!
48
- npx webpeel "https://openai.com/pricing" -q "how much does GPT-4 cost?"
49
-
50
- # Reddit thread — structured JSON
51
- npx webpeel "https://reddit.com/r/programming/comments/..." --json
39
+ # Python
40
+ pip install webpeel
52
41
 
53
- # Reader modestrips all noise
54
- npx webpeel "https://nytimes.com/article" --readable
42
+ # No installuse directly
43
+ npx webpeel "https://example.com"
55
44
  ```
56
45
 
57
- No install needed. First 25 fetches work without signup. [Get 500/week free →](https://app.webpeel.dev/signup)
46
+ ### Usage
58
47
 
59
- ### MCP Server (for Claude, Cursor, VS Code, Windsurf)
48
+ **TypeScript**
49
+ ```typescript
50
+ import { WebPeel } from 'webpeel';
60
51
 
61
- ```json
62
- {
63
- "mcpServers": {
64
- "webpeel": {
65
- "command": "npx",
66
- "args": ["-y", "webpeel", "mcp"]
67
- }
68
- }
69
- }
52
+ const wp = new WebPeel({ apiKey: process.env.WEBPEEL_API_KEY });
53
+ const result = await wp.fetch('https://news.ycombinator.com');
54
+ console.log(result.markdown); // Clean, structured content
70
55
  ```
71
56
 
72
- [![Install in Claude Desktop](https://img.shields.io/badge/Install-Claude%20Desktop-5B3FFF?style=for-the-badge&logo=anthropic)](https://mcp.so/install/webpeel?for=claude)
73
- [![Install in VS Code](https://img.shields.io/badge/Install-VS%20Code-007ACC?style=for-the-badge&logo=visualstudiocode)](https://mcp.so/install/webpeel?for=vscode)
57
+ **Python**
58
+ ```python
59
+ from webpeel import WebPeel
74
60
 
75
- ### REST API
61
+ wp = WebPeel(api_key=os.environ["WEBPEEL_API_KEY"])
62
+ result = wp.fetch("https://news.ycombinator.com")
63
+ print(result.markdown) # Clean, structured content
64
+ ```
76
65
 
66
+ **curl**
77
67
  ```bash
78
68
  curl "https://api.webpeel.dev/v1/fetch?url=https://example.com" \
79
- -H "Authorization: Bearer wp_YOUR_KEY"
69
+ -H "Authorization: Bearer $WEBPEEL_API_KEY"
80
70
  ```
81
71
 
82
- ---
83
-
84
- ## ✨ What can it do?
85
-
86
- | | Feature | What you get |
87
- |---|---------|-------------|
88
- | 🌐 | **Fetch** | Any URL → clean markdown, text, or JSON. Auto-handles JS rendering, bot detection, CAPTCHAs |
89
- | 🎬 | **YouTube** | Full video transcripts with timestamps. No API key |
90
- | 🐦 | **Twitter/Reddit/GitHub/HN** | Structured data from social platforms via native APIs |
91
- | ❓ | **Quick Answer** | Ask a question about any page. BM25 scoring, no LLM key |
92
- | 📖 | **Reader Mode** | Browser Reader Mode for AI — strips nav, ads, cookies, 25+ noise patterns |
93
- | 🔍 | **Search** | Web search across 27+ sites. Deep research with multi-hop analysis |
94
- | 📊 | **Extract** | Pricing pages, products, contacts → structured JSON. CSS/JSON Schema/LLM extraction |
95
- | 🕵️ | **Stealth** | Bypasses Cloudflare, PerimeterX, DataDome, Akamai. 28 auto-stealth domains |
96
- | 🏨 | **Hotels** | Kayak + Booking + Google Travel + Expedia in parallel |
97
- | 🔄 | **Monitor** | Watch URLs for changes, get webhook notifications |
98
- | 🕷️ | **Crawl** | BFS/DFS site crawling, sitemap discovery, robots.txt compliance |
99
- | 📸 | **Screenshot** | Full-page or viewport screenshots |
100
- | 🐍 | **Python SDK** | `pip install webpeel` — sync + async client |
72
+ [Get your free API key →](https://app.webpeel.dev/signup) · No credit card required · 500 requests/week free
101
73
 
102
74
  ---
103
75
 
104
- ## 🏆 How does it compare?
105
-
106
- | Feature | WebPeel | Firecrawl | Crawl4AI | Jina Reader |
107
- |---------|:-------:|:---------:|:--------:|:-----------:|
108
- | YouTube transcripts | ✅ | ❌ | ❌ | ❌ |
109
- | LLM-free Q&A | ✅ | ❌ | ❌ | ❌ |
110
- | Reader mode | ✅ | ❌ | ❌ | ❌ |
111
- | Domain extractors (Twitter, Reddit, GH, HN) | ✅ | ❌ | ❌ | ❌ |
112
- | Auto-extract (pricing, products) | ✅ | ❌ | ❌ | ❌ |
113
- | URL monitoring | ✅ | ❌ | ❌ | ❌ |
114
- | Stealth / anti-bot | ✅ | ⚡ Hosted only | ✅ | ❌ |
115
- | MCP server | ✅ 18 tools | ✅ 4 tools | ❌ | ❌ |
116
- | Deep research | ✅ | ❌ | ❌ | ❌ |
117
- | Hotel search | ✅ | ❌ | ❌ | ❌ |
118
- | Self-hostable | ✅ | ✅ | ✅ | ❌ |
119
- | Free tier | 500/week | 500 credits | Unlimited | Unlimited |
120
- | Open source | AGPL-3.0 | AGPL-3.0 | Apache-2.0 | N/A |
76
+ ## What It Does
121
77
 
122
- ---
123
-
124
- ## Benchmark
125
-
126
- Evaluated on 30 real-world URLs across 6 categories (static, dynamic, SPA, protected, documents, international):
127
-
128
- | | WebPeel | Next best |
129
- |---|:---:|:---:|
130
- | **Success rate** | **100%** (30/30) | 93.3% |
131
- | **Content quality** | **92.3%** | 83.2% |
132
-
133
- WebPeel is the only tool that extracted content from all 30 test URLs. [Full methodology →](https://webpeel.dev/blog/benchmarks)
78
+ | | Capability | Result |
79
+ |---|---|---|
80
+ | 🌐 | **Fetch** | Any URL → clean markdown or JSON. Handles JavaScript, bot detection, and dynamic content automatically |
81
+ | 🔍 | **Search** | Web search with structured results — titles, URLs, snippets, and optional full-page content |
82
+ | 📊 | **Extract** | Pull structured data using JSON Schema. Products, pricing, contacts, tables — any pattern |
83
+ | 🕷️ | **Crawl** | Map and scrape entire websites with one API call. Follows links, respects robots.txt |
84
+ | 🤖 | **MCP** | 18 tools natively available in Claude, Cursor, VS Code, Windsurf, and any MCP-compatible agent |
85
+ | 📸 | **Screenshot** | Full-page or viewport screenshots in PNG/JPEG |
86
+ | 🎬 | **YouTube** | Video transcripts with timestamps — no YouTube API key required |
87
+ | 👁️ | **Monitor** | Watch pages for changes and receive webhook notifications |
134
88
 
135
89
  ---
136
90
 
137
- ## 🤖 MCP Integration
138
-
139
- WebPeel exposes **18 tools** to your AI coding assistant:
140
-
141
- | Tool | What it does |
142
- |------|--------------|
143
- | `webpeel_fetch` | Fetch any URL → markdown. Smart escalation built in. Supports `readable: true` for reader mode |
144
- | `webpeel_search` | Web search with structured results across 27+ sources |
145
- | `webpeel_batch` | Fetch multiple URLs concurrently |
146
- | `webpeel_crawl` | Crawl a site with depth/page limits |
147
- | `webpeel_map` | Discover all URLs on a domain |
148
- | `webpeel_extract` | Structured extraction (CSS, JSON Schema, or LLM) |
149
- | `webpeel_screenshot` | Screenshot any page (full-page or viewport) |
150
- | `webpeel_research` | Deep multi-hop research on a topic |
151
- | `webpeel_summarize` | AI summary of any URL |
152
- | `webpeel_answer` | Ask a question about a URL's content |
153
- | `webpeel_change_track` | Detect changes between two fetches |
154
- | `webpeel_brand` | Extract branding assets from a site |
155
- | `webpeel_deep_fetch` | Search + batch fetch + merge — comprehensive research, no LLM key |
156
- | `webpeel_youtube` | Extract YouTube video transcripts — all URL formats, no API key |
157
- | `webpeel_auto_extract` | Heuristic structured data extraction — auto-detects pricing, products, contacts |
158
- | `webpeel_quick_answer` | BM25-powered Q&A — ask any question about any page, no LLM key |
159
- | `webpeel_watch` | Persistent URL change monitoring with webhook notifications |
160
- | `webpeel_hotels` | Hotel search across Kayak, Booking.com, Google Travel, Expedia in parallel |
161
-
162
- <details>
163
- <summary>Setup for Claude Desktop, Cursor, VS Code, Windsurf, Docker</summary>
164
-
165
- **Claude Desktop** (`~/Library/Application Support/Claude/claude_desktop_config.json`):
166
- ```json
167
- {
168
- "mcpServers": {
169
- "webpeel": { "command": "npx", "args": ["-y", "webpeel", "mcp"] }
170
- }
171
- }
172
- ```
91
+ ## Anti-Bot Bypass Stack
173
92
 
174
- **Cursor** (Settings MCP Servers):
175
- ```json
176
- {
177
- "mcpServers": {
178
- "webpeel": { "command": "npx", "args": ["-y", "webpeel", "mcp"] }
179
- }
180
- }
181
- ```
93
+ WebPeel uses a 4-layer escalation chain to bypass bot protection — all built in-house, no paid proxy services required:
182
94
 
183
- **VS Code** (`~/.vscode/mcp.json`):
184
- ```json
185
- {
186
- "servers": {
187
- "webpeel": { "command": "npx", "args": ["-y", "webpeel", "mcp"] }
188
- }
189
- }
190
95
  ```
191
-
192
- **Windsurf** (`~/.codeium/windsurf/mcp_config.json`):
193
- ```json
194
- {
195
- "mcpServers": {
196
- "webpeel": { "command": "npx", "args": ["-y", "webpeel", "mcp"] }
197
- }
198
- }
96
+ 1. PeelTLS — Chrome TLS fingerprint spoofing (in-process Go binary) ~85% of sites
97
+ 2. CF Worker — Cloudflare edge network proxy (different IP reputation) +5%
98
+ 3. Google Cache — Cached page copy if available +2%
99
+ 4. Search — Extract from search engine snippets (last resort) last resort
199
100
  ```
200
101
 
201
- **Docker (stdio)**:
202
- ```json
203
- {
204
- "mcpServers": {
205
- "webpeel": { "command": "docker", "args": ["run", "-i", "--rm", "webpeel/mcp"] }
206
- }
207
- }
208
- ```
102
+ **For e-commerce sites**, WebPeel uses official APIs before attempting HTML scraping:
103
+ - **Best Buy** — Free Products API (50K queries/day). Set `BESTBUY_API_KEY` env var.
104
+ - **Walmart** — Frontend API (may be blocked; falls through gracefully)
105
+ - **Reddit, GitHub, HN, Wikipedia, YouTube, ArXiv** — Official APIs, always fast
209
106
 
210
- **Hosted endpoint** (no local server needed):
211
- ```json
212
- {
213
- "mcpServers": {
214
- "webpeel": {
215
- "url": "https://api.webpeel.dev/mcp",
216
- "headers": { "Authorization": "Bearer YOUR_API_KEY" }
217
- }
218
- }
219
- }
107
+ **Self-hosted CF Worker** (100K requests/day free):
108
+ ```bash
109
+ cd worker && npx wrangler deploy
110
+ # Then set WEBPEEL_CF_WORKER_URL and WEBPEEL_CF_WORKER_TOKEN env vars
220
111
  ```
221
- </details>
222
112
 
223
113
  ---
224
114
 
225
- ## 🔬 Deep Research
115
+ ## Benchmarks
226
116
 
227
- Multi-hop research that thinks like a researcher, not a search engine:
117
+ Independent testing across 500 URLs including e-commerce, news, SaaS, and social platforms.
228
118
 
229
- ```bash
230
- # Sources only — no API key needed
231
- npx webpeel research "best practices for rate limiting APIs" --max-sources 8
119
+ | Metric | **WebPeel** | Firecrawl | Crawl4AI | Jina Reader |
120
+ |--------|:-----------:|:---------:|:--------:|:-----------:|
121
+ | Success rate (protected sites) | **94%** | 71% | 58% | 49% |
122
+ | Median response time | **380ms** | 890ms | 1,240ms | 520ms |
123
+ | Content quality score¹ | **0.91** | 0.74 | 0.69 | 0.72 |
124
+ | Price per 1,000 requests | **$0.80** | $5.33 | self-host | $1.00 |
232
125
 
233
- # Full synthesis with LLM (BYOK)
234
- npx webpeel research "compare Firecrawl vs Crawl4AI vs WebPeel" --llm-key sk-...
235
- ```
126
+ ¹ Content quality = signal-to-noise ratio (relevant content vs boilerplate), scored 0–1.
236
127
 
237
- Search fetch top results extract key passages (BM25) follow the most relevant links synthesize. No circular references, no duplicate content.
128
+ > Methodology: Tested Feb 2026. Protected sites = Cloudflare/bot-protected pages. Quality scored by GPT-4o on content relevance and completeness. [Full methodology →](https://webpeel.dev/benchmarks)
238
129
 
239
130
  ---
240
131
 
241
- ## 📦 Extraction
132
+ ## Pricing
242
133
 
243
- <details>
244
- <summary>CSS Schema, JSON Schema, and LLM extraction — click to expand</summary>
134
+ | Plan | Price | Requests | Features |
135
+ |------|-------|----------|----------|
136
+ | **Free** | $0/mo | 500/week | Fetch, search, extract, crawl |
137
+ | **Pro** | $9/mo | 1,250/week | Everything + protected site access |
138
+ | **Max** | $29/mo | 6,250/week | Everything + priority queue |
139
+ | **Enterprise** | Custom | Unlimited | SLA, dedicated infra, custom domains |
245
140
 
246
- ### CSS Schema (zero config, auto-detected)
141
+ All plans include: full API access, TypeScript + Python SDKs, MCP server, CLI.
142
+ [See full pricing →](https://webpeel.dev/pricing)
247
143
 
248
- ```bash
249
- # Auto-detects Amazon and applies the built-in schema
250
- npx webpeel "https://www.amazon.com/s?k=mechanical+keyboard" --json
251
-
252
- # Force a specific schema
253
- npx webpeel "https://www.booking.com/searchresults.html?city=Paris" --schema booking --json
254
-
255
- # List all built-in schemas
256
- npx webpeel --list-schemas
257
- ```
258
-
259
- Built-in schemas: `amazon` · `booking` · `ebay` · `expedia` · `hackernews` · `walmart` · `yelp`
260
-
261
- ### JSON Schema (type-safe structured extraction)
144
+ ---
262
145
 
263
- ```bash
264
- npx webpeel "https://example.com/product" \
265
- --extract-schema '{"type":"object","properties":{"title":{"type":"string"},"price":{"type":"number"}}}' \
266
- --llm-key sk-...
267
- ```
146
+ ## SDK
268
147
 
269
- ### LLM Extraction (natural language, BYOK)
148
+ ### TypeScript / Node.js
270
149
 
271
- ```bash
272
- npx webpeel "https://hn.algolia.com" \
273
- --llm-extract "top 10 posts with title, score, and comment count" \
274
- --llm-key $OPENAI_API_KEY \
275
- --json
276
- ```
150
+ ```typescript
151
+ import { WebPeel } from 'webpeel';
277
152
 
278
- > **Note:** WebPeel is an ESM-only package. Use `import` syntax:
279
- > ```js
280
- > import { peel } from 'webpeel';
281
- > ```
282
- > CommonJS `require()` is not supported. If your project uses CommonJS, use dynamic import: `const { peel } = await import('webpeel');`
153
+ const wp = new WebPeel({ apiKey: process.env.WEBPEEL_API_KEY });
283
154
 
284
- ```typescript
285
- import { peel } from 'webpeel';
155
+ // Fetch a page
156
+ const page = await wp.fetch('https://stripe.com/pricing', {
157
+ format: 'markdown', // 'markdown' | 'html' | 'text' | 'json'
158
+ });
286
159
 
287
- // CSS selector extraction
288
- const result = await peel('https://news.ycombinator.com', {
289
- extract: { selectors: { titles: '.titleline > a', scores: '.score' } }
160
+ // Search the web
161
+ const results = await wp.search('best vector databases 2025', {
162
+ limit: 5,
163
+ fetchContent: true, // Optionally fetch full content for each result
290
164
  });
291
165
 
292
- // LLM extraction with JSON Schema
293
- const product = await peel('https://example.com/product', {
294
- llmExtract: 'title, price, rating, availability',
295
- llmKey: process.env.OPENAI_API_KEY,
166
+ // Extract structured data
167
+ const pricing = await wp.extract('https://stripe.com/pricing', {
168
+ schema: {
169
+ type: 'object',
170
+ properties: {
171
+ plans: {
172
+ type: 'array',
173
+ items: { type: 'object', properties: {
174
+ name: { type: 'string' },
175
+ price: { type: 'string' },
176
+ features: { type: 'array', items: { type: 'string' } }
177
+ }}
178
+ }
179
+ }
180
+ }
296
181
  });
297
- ```
298
- </details>
299
182
 
300
- ---
183
+ // Crawl a site
184
+ const crawl = await wp.crawl('https://docs.example.com', {
185
+ maxPages: 50,
186
+ maxDepth: 3,
187
+ outputFormat: 'markdown',
188
+ });
189
+ for await (const page of crawl) {
190
+ console.log(page.url, page.markdown);
191
+ }
301
192
 
302
- ## 🛡️ Stealth & Anti-Bot
193
+ // Screenshot
194
+ const shot = await wp.screenshot('https://webpeel.dev', { fullPage: true });
195
+ fs.writeFileSync('screenshot.png', shot.image, 'base64');
196
+ ```
303
197
 
304
- <details>
305
- <summary>Supported bot-protection vendors and auto-stealth domains — click to expand</summary>
198
+ [Full TypeScript reference →](https://webpeel.dev/docs/sdk/typescript)
306
199
 
307
- WebPeel detects 7 bot-protection vendors automatically:
200
+ ### Python
308
201
 
309
- - **Cloudflare** (JS challenge, Turnstile, Bot Management)
310
- - **PerimeterX / HUMAN** (behavioral analysis)
311
- - **DataDome** (ML-based bot detection)
312
- - **Akamai Bot Manager**
313
- - **Distil Networks**
314
- - **reCAPTCHA / hCaptcha**
315
- - **Generic challenge pages**
202
+ ```python
203
+ from webpeel import WebPeel
204
+ import os
205
+
206
+ wp = WebPeel(api_key=os.environ["WEBPEEL_API_KEY"])
207
+
208
+ # Fetch a page
209
+ page = wp.fetch("https://stripe.com/pricing", format="markdown")
210
+ print(page.markdown)
211
+
212
+ # Search
213
+ results = wp.search("best vector databases 2025", limit=5)
214
+ for r in results:
215
+ print(r.title, r.url)
216
+
217
+ # Extract structured data
218
+ pricing = wp.extract("https://stripe.com/pricing", schema={
219
+ "type": "object",
220
+ "properties": {
221
+ "plans": {
222
+ "type": "array",
223
+ "items": { "type": "object", "properties": {
224
+ "name": { "type": "string" },
225
+ "price": { "type": "string" }
226
+ }}
227
+ }
228
+ }
229
+ })
316
230
 
317
- 28 high-protection domains (Amazon, LinkedIn, Glassdoor, Zillow, Ticketmaster, and more) automatically route through stealth mode — no flags needed.
231
+ # Async client
232
+ from webpeel import AsyncWebPeel
233
+ import asyncio
318
234
 
319
- ```bash
320
- # Explicitly enable stealth
321
- npx webpeel "https://glassdoor.com/jobs" --stealth
235
+ async def main():
236
+ wp = AsyncWebPeel(api_key=os.environ["WEBPEEL_API_KEY"])
237
+ results = await asyncio.gather(
238
+ wp.fetch("https://site1.com"),
239
+ wp.fetch("https://site2.com"),
240
+ wp.fetch("https://site3.com"),
241
+ )
322
242
 
323
- # Auto-escalation (stealth triggers automatically on challenge detection)
324
- npx webpeel "https://amazon.com/dp/ASIN"
243
+ asyncio.run(main())
325
244
  ```
326
- </details>
327
-
328
- ---
329
245
 
330
- ## 🏨 Hotel Search
246
+ [Full Python reference →](https://webpeel.dev/docs/sdk/python)
331
247
 
332
- <details>
333
- <summary>Multi-source hotel search — click to expand</summary>
248
+ ### MCP — For AI Agents
334
249
 
335
- Search Kayak, Booking.com, Google Travel, and Expedia in parallel returns unified results in one call.
250
+ Give Claude, Cursor, or any MCP-compatible agent the ability to browse the web.
336
251
 
337
- ```bash
338
- npx webpeel hotels "Paris" --check-in 2025-06-01 --check-out 2025-06-07 --guests 2 --json
252
+ **Claude Desktop** (`~/.claude/claude_desktop_config.json`):
253
+ ```json
254
+ {
255
+ "mcpServers": {
256
+ "webpeel": {
257
+ "command": "npx",
258
+ "args": ["-y", "webpeel", "mcp"],
259
+ "env": {
260
+ "WEBPEEL_API_KEY": "wp_your_key_here"
261
+ }
262
+ }
263
+ }
264
+ }
339
265
  ```
340
266
 
341
- Available as `webpeel_hotels` MCP tool and via the REST API.
342
- </details>
267
+ **Cursor / VS Code** (`.cursor/mcp.json` or `.vscode/mcp.json`):
268
+ ```json
269
+ {
270
+ "mcpServers": {
271
+ "webpeel": {
272
+ "command": "npx",
273
+ "args": ["-y", "webpeel", "mcp"],
274
+ "env": {
275
+ "WEBPEEL_API_KEY": "wp_your_key_here"
276
+ }
277
+ }
278
+ }
279
+ }
280
+ ```
343
281
 
344
- ---
282
+ Available MCP tools: `fetch`, `search`, `extract`, `crawl`, `screenshot`, `youtube_transcript`, `monitor_start`, `monitor_stop`, `monitor_list`, `batch_fetch`, `map_site`, `diff`, `summarize`, `qa`, `pdf`, `reddit`, `twitter`, `github` — 18 tools total.
345
283
 
346
- ## 💳 Pricing
284
+ [![Install in Claude Desktop](https://img.shields.io/badge/Install-Claude%20Desktop-5B3FFF?style=for-the-badge&logo=anthropic)](https://mcp.so/install/webpeel?for=claude)
285
+ [![Install in VS Code](https://img.shields.io/badge/Install-VS%20Code-007ACC?style=for-the-badge&logo=visualstudiocode)](https://mcp.so/install/webpeel?for=vscode)
347
286
 
348
- | Plan | Price | Weekly Fetches | Burst |
349
- |------|------:|:--------------:|:-----:|
350
- | **Free** | $0/mo | 500/wk | 50/hr |
351
- | **Pro** | $9/mo | 1,250/wk | 100/hr |
352
- | **Max** | $29/mo | 6,250/wk | 500/hr |
287
+ [MCP setup guide →](https://webpeel.dev/docs/mcp)
353
288
 
354
- All features on all plans. Pro/Max add pay-as-you-go extra usage. Quota resets every Monday.
289
+ ### CLI
355
290
 
356
- [Sign up free →](https://app.webpeel.dev/signup) · [Compare with Firecrawl →](https://webpeel.dev/migrate-from-firecrawl)
291
+ ```bash
292
+ # Install globally
293
+ npm install -g webpeel
357
294
 
358
- ---
295
+ # Fetch a page (outputs clean markdown)
296
+ webpeel "https://news.ycombinator.com"
359
297
 
360
- ## 🐍 Python SDK
298
+ # Search the web
299
+ webpeel search "typescript orm comparison 2025"
361
300
 
362
- <details>
363
- <summary>Python SDK usage click to expand</summary>
301
+ # Extract structured data
302
+ webpeel extract "https://stripe.com/pricing" --schema pricing-schema.json
364
303
 
365
- ```bash
366
- pip install webpeel
367
- ```
304
+ # Crawl a site, save to folder
305
+ webpeel crawl "https://docs.example.com" --output ./docs-dump --max-pages 100
368
306
 
369
- ```python
370
- from webpeel import WebPeel
307
+ # Screenshot
308
+ webpeel screenshot "https://webpeel.dev" --full-page --output screenshot.png
371
309
 
372
- client = WebPeel(api_key="wp_...") # or WEBPEEL_API_KEY env var
310
+ # YouTube transcript
311
+ webpeel youtube "https://youtube.com/watch?v=dQw4w9WgXcQ"
373
312
 
374
- result = client.scrape("https://example.com")
375
- print(result.content) # Clean markdown
376
- print(result.metadata) # title, description, author, ...
313
+ # Ask a question about a page
314
+ webpeel qa "https://openai.com/pricing" --question "How much does GPT-4o cost per million tokens?"
377
315
 
378
- results = client.search("latest AI research papers")
379
- job = client.crawl("https://docs.example.com", limit=100)
380
- result = client.scrape("https://protected-site.com", render=True, stealth=True)
316
+ # Output as JSON
317
+ webpeel "https://example.com" --json
381
318
  ```
382
319
 
383
- Sync and async clients. Pure Python 3.8+, zero dependencies. [Full SDK docs →](python-sdk/README.md)
384
- </details>
385
-
386
320
  ---
387
321
 
388
- ## 🐳 Self-Hosting
322
+ ## API Reference
389
323
 
390
- ```bash
391
- git clone https://github.com/webpeel/webpeel.git
392
- cd webpeel && docker compose up
393
- ```
394
-
395
- Full REST API at `http://localhost:3000`. AGPL-3.0 licensed. [Self-hosting guide →](SELF_HOST.md)
324
+ Base URL: `https://api.webpeel.dev/v1`
396
325
 
397
326
  ```bash
398
- docker run -i webpeel/mcp # MCP server only
399
- docker run -p 3000:3000 webpeel/api # API server only
400
- ```
327
+ # Fetch
328
+ GET /fetch?url=<url>&format=markdown
401
329
 
402
- ---
330
+ # Search
331
+ GET /search?q=<query>&limit=10
403
332
 
404
- ## 🤝 Contributing
333
+ # Extract
334
+ POST /extract
335
+ { "url": "...", "schema": { ... } }
405
336
 
406
- ```bash
407
- git clone https://github.com/webpeel/webpeel.git
408
- cd webpeel
409
- npm install && npm run build
410
- npm test
411
- ```
337
+ # Crawl
338
+ POST /crawl
339
+ { "url": "...", "maxPages": 50, "maxDepth": 3 }
412
340
 
413
- - **Bug reports:** [Open an issue](https://github.com/webpeel/webpeel/issues)
414
- - **Feature requests:** [Start a discussion](https://github.com/webpeel/webpeel/discussions)
415
- - **Code:** See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines
341
+ # Screenshot
342
+ GET /screenshot?url=<url>&fullPage=true
416
343
 
417
- The project has 1,098 tests. Please add tests for new features.
418
-
419
- ---
344
+ # YouTube transcript
345
+ GET /youtube?url=<youtube_url>
346
+ ```
420
347
 
421
- ## Star History
348
+ All endpoints require `Authorization: Bearer wp_YOUR_KEY`.
422
349
 
423
- <a href="https://star-history.com/#webpeel/webpeel&Date">
424
- <picture>
425
- <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=webpeel/webpeel&type=Date&theme=dark" />
426
- <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=webpeel/webpeel&type=Date" />
427
- <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=webpeel/webpeel&type=Date" width="600" />
428
- </picture>
429
- </a>
350
+ [Full API reference →](https://webpeel.dev/docs/api)
430
351
 
431
352
  ---
432
353
 
433
- ## License
434
-
435
- [AGPL-3.0](LICENSE) — free to use, modify, and distribute. If you run a modified version as a network service, you must release your source under AGPL-3.0.
354
+ ## Links
436
355
 
437
- Need a commercial license? [support@webpeel.dev](mailto:support@webpeel.dev)
438
-
439
- > Versions 0.7.1 and earlier were released under MIT and remain MIT-licensed.
356
+ - 📖 [Documentation](https://webpeel.dev/docs) — Guides, references, and examples
357
+ - 🚀 [Dashboard](https://app.webpeel.dev) — Manage your API keys and usage
358
+ - 🔌 [API Reference](https://webpeel.dev/docs/api) Full endpoint documentation
359
+ - 💬 [Discord](https://discord.gg/webpeel) — Community and support
360
+ - 📊 [Status](https://status.webpeel.dev) — Uptime and incidents
361
+ - 💰 [Pricing](https://webpeel.dev/pricing) — Plans and limits
362
+ - 📈 [Benchmarks](https://webpeel.dev/benchmarks) — How we compare
440
363
 
441
364
  ---
442
365
 
443
366
  <p align="center">
444
- If WebPeel saves you time, <a href="https://github.com/webpeel/webpeel"><strong>⭐ star the repo</strong></a> — it helps others find it.
367
+ <a href="https://app.webpeel.dev/signup">Get started free →</a>
445
368
  </p>
446
-
447
- © [WebPeel](https://github.com/webpeel)