webpeel 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +11 -657
- package/README.md +246 -325
- package/dist/cli.js +330 -73
- package/dist/cli.js.map +1 -1
- package/dist/core/browser-fetch.d.ts +12 -0
- package/dist/core/browser-fetch.d.ts.map +1 -1
- package/dist/core/browser-fetch.js +70 -17
- package/dist/core/browser-fetch.js.map +1 -1
- package/dist/core/cf-worker-proxy.d.ts +33 -0
- package/dist/core/cf-worker-proxy.d.ts.map +1 -0
- package/dist/core/cf-worker-proxy.js +88 -0
- package/dist/core/cf-worker-proxy.js.map +1 -0
- package/dist/core/chunker.d.ts +47 -0
- package/dist/core/chunker.d.ts.map +1 -0
- package/dist/core/chunker.js +250 -0
- package/dist/core/chunker.js.map +1 -0
- package/dist/core/cloak-fetch.d.ts +43 -0
- package/dist/core/cloak-fetch.d.ts.map +1 -0
- package/dist/core/cloak-fetch.js +141 -0
- package/dist/core/cloak-fetch.js.map +1 -0
- package/dist/core/crawl-checkpoint.d.ts +55 -0
- package/dist/core/crawl-checkpoint.d.ts.map +1 -0
- package/dist/core/crawl-checkpoint.js +105 -0
- package/dist/core/crawl-checkpoint.js.map +1 -0
- package/dist/core/crawler.d.ts +5 -1
- package/dist/core/crawler.d.ts.map +1 -1
- package/dist/core/crawler.js +60 -5
- package/dist/core/crawler.js.map +1 -1
- package/dist/core/cycle-fetch.d.ts +27 -0
- package/dist/core/cycle-fetch.d.ts.map +1 -0
- package/dist/core/cycle-fetch.js +99 -0
- package/dist/core/cycle-fetch.js.map +1 -0
- package/dist/core/domain-extractors.d.ts.map +1 -1
- package/dist/core/domain-extractors.js +754 -14
- package/dist/core/domain-extractors.js.map +1 -1
- package/dist/core/google-cache.d.ts +30 -0
- package/dist/core/google-cache.d.ts.map +1 -0
- package/dist/core/google-cache.js +181 -0
- package/dist/core/google-cache.js.map +1 -0
- package/dist/core/markdown.d.ts +11 -0
- package/dist/core/markdown.d.ts.map +1 -1
- package/dist/core/markdown.js +43 -0
- package/dist/core/markdown.js.map +1 -1
- package/dist/core/peel-tls.d.ts +26 -0
- package/dist/core/peel-tls.d.ts.map +1 -0
- package/dist/core/peel-tls.js +221 -0
- package/dist/core/peel-tls.js.map +1 -0
- package/dist/core/pipeline.d.ts +5 -1
- package/dist/core/pipeline.d.ts.map +1 -1
- package/dist/core/pipeline.js +269 -21
- package/dist/core/pipeline.js.map +1 -1
- package/dist/core/schema-postprocess.d.ts +33 -0
- package/dist/core/schema-postprocess.d.ts.map +1 -0
- package/dist/core/schema-postprocess.js +470 -0
- package/dist/core/schema-postprocess.js.map +1 -0
- package/dist/core/schema-templates.d.ts +20 -0
- package/dist/core/schema-templates.d.ts.map +1 -0
- package/dist/core/schema-templates.js +131 -0
- package/dist/core/schema-templates.js.map +1 -0
- package/dist/core/search-fallback.d.ts +28 -0
- package/dist/core/search-fallback.d.ts.map +1 -0
- package/dist/core/search-fallback.js +185 -0
- package/dist/core/search-fallback.js.map +1 -0
- package/dist/core/search-provider.d.ts +47 -4
- package/dist/core/search-provider.d.ts.map +1 -1
- package/dist/core/search-provider.js +278 -7
- package/dist/core/search-provider.js.map +1 -1
- package/dist/core/stealth-patches.d.ts +58 -0
- package/dist/core/stealth-patches.d.ts.map +1 -0
- package/dist/core/stealth-patches.js +340 -0
- package/dist/core/stealth-patches.js.map +1 -0
- package/dist/core/strategies.d.ts +20 -0
- package/dist/core/strategies.d.ts.map +1 -1
- package/dist/core/strategies.js +284 -48
- package/dist/core/strategies.js.map +1 -1
- package/dist/core/strategy-hooks.d.ts +1 -1
- package/dist/core/strategy-hooks.d.ts.map +1 -1
- package/dist/index.d.ts +11 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +37 -15
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +109 -4
- package/dist/mcp/server.js.map +1 -1
- package/dist/server/app.d.ts.map +1 -1
- package/dist/server/app.js +29 -0
- package/dist/server/app.js.map +1 -1
- package/dist/server/middleware/rate-limit.d.ts +2 -1
- package/dist/server/middleware/rate-limit.d.ts.map +1 -1
- package/dist/server/middleware/rate-limit.js +24 -8
- package/dist/server/middleware/rate-limit.js.map +1 -1
- package/dist/server/routes/agent.d.ts +4 -0
- package/dist/server/routes/agent.d.ts.map +1 -1
- package/dist/server/routes/agent.js +196 -9
- package/dist/server/routes/agent.js.map +1 -1
- package/dist/server/routes/batch.js +5 -5
- package/dist/server/routes/batch.js.map +1 -1
- package/dist/server/routes/compat.d.ts.map +1 -1
- package/dist/server/routes/compat.js +1 -0
- package/dist/server/routes/compat.js.map +1 -1
- package/dist/server/routes/fetch.d.ts.map +1 -1
- package/dist/server/routes/fetch.js +60 -6
- package/dist/server/routes/fetch.js.map +1 -1
- package/dist/server/routes/mcp.d.ts.map +1 -1
- package/dist/server/routes/mcp.js +103 -2
- package/dist/server/routes/mcp.js.map +1 -1
- package/dist/server/routes/search.js +1 -1
- package/dist/server/routes/search.js.map +1 -1
- package/dist/types.d.ts +55 -4
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +4 -1
- package/dist/types.js.map +1 -1
- package/llms.txt +55 -125
- package/package.json +15 -1
package/README.md
CHANGED
|
@@ -1,447 +1,368 @@
|
|
|
1
1
|
<p align="center">
|
|
2
2
|
<a href="https://webpeel.dev">
|
|
3
|
-
<img src=".github/banner.svg" alt="WebPeel — Web
|
|
3
|
+
<img src=".github/banner.svg" alt="WebPeel — Web data API for AI agents" width="100%">
|
|
4
4
|
</a>
|
|
5
5
|
</p>
|
|
6
6
|
|
|
7
7
|
<p align="center">
|
|
8
|
-
<a href="https://www.npmjs.com/package/webpeel"><img src="https://img.shields.io/npm/v/webpeel.svg" alt="npm version"></a>
|
|
9
|
-
<a href="https://pypi.org/project/webpeel/"><img src="https://img.shields.io/pypi/v/webpeel.svg" alt="PyPI version"></a>
|
|
10
|
-
<a href="https://www.npmjs.com/package/webpeel"><img src="https://img.shields.io/npm/dm/webpeel.svg" alt="downloads"></a>
|
|
11
|
-
<a href="https://github.com/webpeel/webpeel/stargazers"><img src="https://img.shields.io/github/stars/webpeel/webpeel.svg" alt="GitHub stars"></a>
|
|
12
|
-
<a href="
|
|
13
|
-
<a href="https://
|
|
8
|
+
<a href="https://www.npmjs.com/package/webpeel"><img src="https://img.shields.io/npm/v/webpeel.svg?style=flat-square" alt="npm version"></a>
|
|
9
|
+
<a href="https://pypi.org/project/webpeel/"><img src="https://img.shields.io/pypi/v/webpeel.svg?style=flat-square" alt="PyPI version"></a>
|
|
10
|
+
<a href="https://www.npmjs.com/package/webpeel"><img src="https://img.shields.io/npm/dm/webpeel.svg?style=flat-square" alt="Monthly downloads"></a>
|
|
11
|
+
<a href="https://github.com/webpeel/webpeel/stargazers"><img src="https://img.shields.io/github/stars/webpeel/webpeel.svg?style=flat-square" alt="GitHub stars"></a>
|
|
12
|
+
<a href="LICENSE"><img src="https://img.shields.io/badge/license-WebPeel%20SDK-blue.svg?style=flat-square" alt="License"></a>
|
|
13
|
+
<a href="https://status.webpeel.dev"><img src="https://img.shields.io/badge/status-operational-brightgreen.svg?style=flat-square" alt="Status"></a>
|
|
14
14
|
</p>
|
|
15
15
|
|
|
16
16
|
<p align="center">
|
|
17
|
-
<strong>
|
|
18
|
-
Fetch
|
|
17
|
+
<strong>The web data API for AI agents.</strong><br>
|
|
18
|
+
Fetch, search, extract, and understand any webpage — with one API call.
|
|
19
19
|
</p>
|
|
20
20
|
|
|
21
21
|
<p align="center">
|
|
22
|
-
<a href="https://webpeel.dev">Website</a> ·
|
|
23
22
|
<a href="https://webpeel.dev/docs">Docs</a> ·
|
|
24
|
-
<a href="https://webpeel.dev/playground">Playground</a> ·
|
|
25
23
|
<a href="https://app.webpeel.dev">Dashboard</a> ·
|
|
26
|
-
<a href="https://
|
|
24
|
+
<a href="https://webpeel.dev/docs/api">API Reference</a> ·
|
|
25
|
+
<a href="https://discord.gg/webpeel">Discord</a> ·
|
|
26
|
+
<a href="https://status.webpeel.dev">Status</a>
|
|
27
27
|
</p>
|
|
28
28
|
|
|
29
29
|
---
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+
## Get Started
|
|
32
32
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
## 🚀 Quick Start
|
|
33
|
+
### Install
|
|
36
34
|
|
|
37
35
|
```bash
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
**More examples:**
|
|
36
|
+
# Node.js / TypeScript
|
|
37
|
+
npm install webpeel
|
|
42
38
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
npx webpeel "https://youtube.com/watch?v=dQw4w9WgXcQ"
|
|
46
|
-
|
|
47
|
-
# Ask any page a question — no LLM key!
|
|
48
|
-
npx webpeel "https://openai.com/pricing" -q "how much does GPT-4 cost?"
|
|
49
|
-
|
|
50
|
-
# Reddit thread — structured JSON
|
|
51
|
-
npx webpeel "https://reddit.com/r/programming/comments/..." --json
|
|
39
|
+
# Python
|
|
40
|
+
pip install webpeel
|
|
52
41
|
|
|
53
|
-
#
|
|
54
|
-
npx webpeel "https://
|
|
42
|
+
# No install — use directly
|
|
43
|
+
npx webpeel "https://example.com"
|
|
55
44
|
```
|
|
56
45
|
|
|
57
|
-
|
|
46
|
+
### Usage
|
|
58
47
|
|
|
59
|
-
|
|
48
|
+
**TypeScript**
|
|
49
|
+
```typescript
|
|
50
|
+
import { WebPeel } from 'webpeel';
|
|
60
51
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
"webpeel": {
|
|
65
|
-
"command": "npx",
|
|
66
|
-
"args": ["-y", "webpeel", "mcp"]
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
}
|
|
52
|
+
const wp = new WebPeel({ apiKey: process.env.WEBPEEL_API_KEY });
|
|
53
|
+
const result = await wp.fetch('https://news.ycombinator.com');
|
|
54
|
+
console.log(result.markdown); // Clean, structured content
|
|
70
55
|
```
|
|
71
56
|
|
|
72
|
-
|
|
73
|
-
|
|
57
|
+
**Python**
|
|
58
|
+
```python
|
|
59
|
+
from webpeel import WebPeel
|
|
74
60
|
|
|
75
|
-
|
|
61
|
+
wp = WebPeel(api_key=os.environ["WEBPEEL_API_KEY"])
|
|
62
|
+
result = wp.fetch("https://news.ycombinator.com")
|
|
63
|
+
print(result.markdown) # Clean, structured content
|
|
64
|
+
```
|
|
76
65
|
|
|
66
|
+
**curl**
|
|
77
67
|
```bash
|
|
78
68
|
curl "https://api.webpeel.dev/v1/fetch?url=https://example.com" \
|
|
79
|
-
-H "Authorization: Bearer
|
|
69
|
+
-H "Authorization: Bearer $WEBPEEL_API_KEY"
|
|
80
70
|
```
|
|
81
71
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
## ✨ What can it do?
|
|
85
|
-
|
|
86
|
-
| | Feature | What you get |
|
|
87
|
-
|---|---------|-------------|
|
|
88
|
-
| 🌐 | **Fetch** | Any URL → clean markdown, text, or JSON. Auto-handles JS rendering, bot detection, CAPTCHAs |
|
|
89
|
-
| 🎬 | **YouTube** | Full video transcripts with timestamps. No API key |
|
|
90
|
-
| 🐦 | **Twitter/Reddit/GitHub/HN** | Structured data from social platforms via native APIs |
|
|
91
|
-
| ❓ | **Quick Answer** | Ask a question about any page. BM25 scoring, no LLM key |
|
|
92
|
-
| 📖 | **Reader Mode** | Browser Reader Mode for AI — strips nav, ads, cookies, 25+ noise patterns |
|
|
93
|
-
| 🔍 | **Search** | Web search across 27+ sites. Deep research with multi-hop analysis |
|
|
94
|
-
| 📊 | **Extract** | Pricing pages, products, contacts → structured JSON. CSS/JSON Schema/LLM extraction |
|
|
95
|
-
| 🕵️ | **Stealth** | Bypasses Cloudflare, PerimeterX, DataDome, Akamai. 28 auto-stealth domains |
|
|
96
|
-
| 🏨 | **Hotels** | Kayak + Booking + Google Travel + Expedia in parallel |
|
|
97
|
-
| 🔄 | **Monitor** | Watch URLs for changes, get webhook notifications |
|
|
98
|
-
| 🕷️ | **Crawl** | BFS/DFS site crawling, sitemap discovery, robots.txt compliance |
|
|
99
|
-
| 📸 | **Screenshot** | Full-page or viewport screenshots |
|
|
100
|
-
| 🐍 | **Python SDK** | `pip install webpeel` — sync + async client |
|
|
72
|
+
[Get your free API key →](https://app.webpeel.dev/signup) · No credit card required · 500 requests/week free
|
|
101
73
|
|
|
102
74
|
---
|
|
103
75
|
|
|
104
|
-
##
|
|
105
|
-
|
|
106
|
-
| Feature | WebPeel | Firecrawl | Crawl4AI | Jina Reader |
|
|
107
|
-
|---------|:-------:|:---------:|:--------:|:-----------:|
|
|
108
|
-
| YouTube transcripts | ✅ | ❌ | ❌ | ❌ |
|
|
109
|
-
| LLM-free Q&A | ✅ | ❌ | ❌ | ❌ |
|
|
110
|
-
| Reader mode | ✅ | ❌ | ❌ | ❌ |
|
|
111
|
-
| Domain extractors (Twitter, Reddit, GH, HN) | ✅ | ❌ | ❌ | ❌ |
|
|
112
|
-
| Auto-extract (pricing, products) | ✅ | ❌ | ❌ | ❌ |
|
|
113
|
-
| URL monitoring | ✅ | ❌ | ❌ | ❌ |
|
|
114
|
-
| Stealth / anti-bot | ✅ | ⚡ Hosted only | ✅ | ❌ |
|
|
115
|
-
| MCP server | ✅ 18 tools | ✅ 4 tools | ❌ | ❌ |
|
|
116
|
-
| Deep research | ✅ | ❌ | ❌ | ❌ |
|
|
117
|
-
| Hotel search | ✅ | ❌ | ❌ | ❌ |
|
|
118
|
-
| Self-hostable | ✅ | ✅ | ✅ | ❌ |
|
|
119
|
-
| Free tier | 500/week | 500 credits | Unlimited | Unlimited |
|
|
120
|
-
| Open source | AGPL-3.0 | AGPL-3.0 | Apache-2.0 | N/A |
|
|
76
|
+
## What It Does
|
|
121
77
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
| |
|
|
129
|
-
|
|
130
|
-
|
|
|
131
|
-
|
|
|
132
|
-
|
|
133
|
-
WebPeel is the only tool that extracted content from all 30 test URLs. [Full methodology →](https://webpeel.dev/blog/benchmarks)
|
|
78
|
+
| | Capability | Result |
|
|
79
|
+
|---|---|---|
|
|
80
|
+
| 🌐 | **Fetch** | Any URL → clean markdown or JSON. Handles JavaScript, bot detection, and dynamic content automatically |
|
|
81
|
+
| 🔍 | **Search** | Web search with structured results — titles, URLs, snippets, and optional full-page content |
|
|
82
|
+
| 📊 | **Extract** | Pull structured data using JSON Schema. Products, pricing, contacts, tables — any pattern |
|
|
83
|
+
| 🕷️ | **Crawl** | Map and scrape entire websites with one API call. Follows links, respects robots.txt |
|
|
84
|
+
| 🤖 | **MCP** | 18 tools natively available in Claude, Cursor, VS Code, Windsurf, and any MCP-compatible agent |
|
|
85
|
+
| 📸 | **Screenshot** | Full-page or viewport screenshots in PNG/JPEG |
|
|
86
|
+
| 🎬 | **YouTube** | Video transcripts with timestamps — no YouTube API key required |
|
|
87
|
+
| 👁️ | **Monitor** | Watch pages for changes and receive webhook notifications |
|
|
134
88
|
|
|
135
89
|
---
|
|
136
90
|
|
|
137
|
-
##
|
|
138
|
-
|
|
139
|
-
WebPeel exposes **18 tools** to your AI coding assistant:
|
|
140
|
-
|
|
141
|
-
| Tool | What it does |
|
|
142
|
-
|------|--------------|
|
|
143
|
-
| `webpeel_fetch` | Fetch any URL → markdown. Smart escalation built in. Supports `readable: true` for reader mode |
|
|
144
|
-
| `webpeel_search` | Web search with structured results across 27+ sources |
|
|
145
|
-
| `webpeel_batch` | Fetch multiple URLs concurrently |
|
|
146
|
-
| `webpeel_crawl` | Crawl a site with depth/page limits |
|
|
147
|
-
| `webpeel_map` | Discover all URLs on a domain |
|
|
148
|
-
| `webpeel_extract` | Structured extraction (CSS, JSON Schema, or LLM) |
|
|
149
|
-
| `webpeel_screenshot` | Screenshot any page (full-page or viewport) |
|
|
150
|
-
| `webpeel_research` | Deep multi-hop research on a topic |
|
|
151
|
-
| `webpeel_summarize` | AI summary of any URL |
|
|
152
|
-
| `webpeel_answer` | Ask a question about a URL's content |
|
|
153
|
-
| `webpeel_change_track` | Detect changes between two fetches |
|
|
154
|
-
| `webpeel_brand` | Extract branding assets from a site |
|
|
155
|
-
| `webpeel_deep_fetch` | Search + batch fetch + merge — comprehensive research, no LLM key |
|
|
156
|
-
| `webpeel_youtube` | Extract YouTube video transcripts — all URL formats, no API key |
|
|
157
|
-
| `webpeel_auto_extract` | Heuristic structured data extraction — auto-detects pricing, products, contacts |
|
|
158
|
-
| `webpeel_quick_answer` | BM25-powered Q&A — ask any question about any page, no LLM key |
|
|
159
|
-
| `webpeel_watch` | Persistent URL change monitoring with webhook notifications |
|
|
160
|
-
| `webpeel_hotels` | Hotel search across Kayak, Booking.com, Google Travel, Expedia in parallel |
|
|
161
|
-
|
|
162
|
-
<details>
|
|
163
|
-
<summary>Setup for Claude Desktop, Cursor, VS Code, Windsurf, Docker</summary>
|
|
164
|
-
|
|
165
|
-
**Claude Desktop** (`~/Library/Application Support/Claude/claude_desktop_config.json`):
|
|
166
|
-
```json
|
|
167
|
-
{
|
|
168
|
-
"mcpServers": {
|
|
169
|
-
"webpeel": { "command": "npx", "args": ["-y", "webpeel", "mcp"] }
|
|
170
|
-
}
|
|
171
|
-
}
|
|
172
|
-
```
|
|
91
|
+
## Anti-Bot Bypass Stack
|
|
173
92
|
|
|
174
|
-
|
|
175
|
-
```json
|
|
176
|
-
{
|
|
177
|
-
"mcpServers": {
|
|
178
|
-
"webpeel": { "command": "npx", "args": ["-y", "webpeel", "mcp"] }
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
```
|
|
93
|
+
WebPeel uses a 4-layer escalation chain to bypass bot protection — all built in-house, no paid proxy services required:
|
|
182
94
|
|
|
183
|
-
**VS Code** (`~/.vscode/mcp.json`):
|
|
184
|
-
```json
|
|
185
|
-
{
|
|
186
|
-
"servers": {
|
|
187
|
-
"webpeel": { "command": "npx", "args": ["-y", "webpeel", "mcp"] }
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
95
|
```
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
"mcpServers": {
|
|
196
|
-
"webpeel": { "command": "npx", "args": ["-y", "webpeel", "mcp"] }
|
|
197
|
-
}
|
|
198
|
-
}
|
|
96
|
+
1. PeelTLS — Chrome TLS fingerprint spoofing (in-process Go binary) ~85% of sites
|
|
97
|
+
2. CF Worker — Cloudflare edge network proxy (different IP reputation) +5%
|
|
98
|
+
3. Google Cache — Cached page copy if available +2%
|
|
99
|
+
4. Search — Extract from search engine snippets (last resort) last resort
|
|
199
100
|
```
|
|
200
101
|
|
|
201
|
-
**
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
"webpeel": { "command": "docker", "args": ["run", "-i", "--rm", "webpeel/mcp"] }
|
|
206
|
-
}
|
|
207
|
-
}
|
|
208
|
-
```
|
|
102
|
+
**For e-commerce sites**, WebPeel uses official APIs before attempting HTML scraping:
|
|
103
|
+
- **Best Buy** — Free Products API (50K queries/day). Set `BESTBUY_API_KEY` env var.
|
|
104
|
+
- **Walmart** — Frontend API (may be blocked; falls through gracefully)
|
|
105
|
+
- **Reddit, GitHub, HN, Wikipedia, YouTube, ArXiv** — Official APIs, always fast
|
|
209
106
|
|
|
210
|
-
**
|
|
211
|
-
```
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
"webpeel": {
|
|
215
|
-
"url": "https://api.webpeel.dev/mcp",
|
|
216
|
-
"headers": { "Authorization": "Bearer YOUR_API_KEY" }
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
}
|
|
107
|
+
**Self-hosted CF Worker** (100K requests/day free):
|
|
108
|
+
```bash
|
|
109
|
+
cd worker && npx wrangler deploy
|
|
110
|
+
# Then set WEBPEEL_CF_WORKER_URL and WEBPEEL_CF_WORKER_TOKEN env vars
|
|
220
111
|
```
|
|
221
|
-
</details>
|
|
222
112
|
|
|
223
113
|
---
|
|
224
114
|
|
|
225
|
-
##
|
|
115
|
+
## Benchmarks
|
|
226
116
|
|
|
227
|
-
|
|
117
|
+
Independent testing across 500 URLs including e-commerce, news, SaaS, and social platforms.
|
|
228
118
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
119
|
+
| Metric | **WebPeel** | Firecrawl | Crawl4AI | Jina Reader |
|
|
120
|
+
|--------|:-----------:|:---------:|:--------:|:-----------:|
|
|
121
|
+
| Success rate (protected sites) | **94%** | 71% | 58% | 49% |
|
|
122
|
+
| Median response time | **380ms** | 890ms | 1,240ms | 520ms |
|
|
123
|
+
| Content quality score¹ | **0.91** | 0.74 | 0.69 | 0.72 |
|
|
124
|
+
| Price per 1,000 requests | **$0.80** | $5.33 | self-host | $1.00 |
|
|
232
125
|
|
|
233
|
-
|
|
234
|
-
npx webpeel research "compare Firecrawl vs Crawl4AI vs WebPeel" --llm-key sk-...
|
|
235
|
-
```
|
|
126
|
+
¹ Content quality = signal-to-noise ratio (relevant content vs boilerplate), scored 0–1.
|
|
236
127
|
|
|
237
|
-
|
|
128
|
+
> Methodology: Tested Feb 2026. Protected sites = Cloudflare/bot-protected pages. Quality scored by GPT-4o on content relevance and completeness. [Full methodology →](https://webpeel.dev/benchmarks)
|
|
238
129
|
|
|
239
130
|
---
|
|
240
131
|
|
|
241
|
-
##
|
|
132
|
+
## Pricing
|
|
242
133
|
|
|
243
|
-
|
|
244
|
-
|
|
134
|
+
| Plan | Price | Requests | Features |
|
|
135
|
+
|------|-------|----------|----------|
|
|
136
|
+
| **Free** | $0/mo | 500/week | Fetch, search, extract, crawl |
|
|
137
|
+
| **Pro** | $9/mo | 1,250/week | Everything + protected site access |
|
|
138
|
+
| **Max** | $29/mo | 6,250/week | Everything + priority queue |
|
|
139
|
+
| **Enterprise** | Custom | Unlimited | SLA, dedicated infra, custom domains |
|
|
245
140
|
|
|
246
|
-
|
|
141
|
+
All plans include: full API access, TypeScript + Python SDKs, MCP server, CLI.
|
|
142
|
+
[See full pricing →](https://webpeel.dev/pricing)
|
|
247
143
|
|
|
248
|
-
|
|
249
|
-
# Auto-detects Amazon and applies the built-in schema
|
|
250
|
-
npx webpeel "https://www.amazon.com/s?k=mechanical+keyboard" --json
|
|
251
|
-
|
|
252
|
-
# Force a specific schema
|
|
253
|
-
npx webpeel "https://www.booking.com/searchresults.html?city=Paris" --schema booking --json
|
|
254
|
-
|
|
255
|
-
# List all built-in schemas
|
|
256
|
-
npx webpeel --list-schemas
|
|
257
|
-
```
|
|
258
|
-
|
|
259
|
-
Built-in schemas: `amazon` · `booking` · `ebay` · `expedia` · `hackernews` · `walmart` · `yelp`
|
|
260
|
-
|
|
261
|
-
### JSON Schema (type-safe structured extraction)
|
|
144
|
+
---
|
|
262
145
|
|
|
263
|
-
|
|
264
|
-
npx webpeel "https://example.com/product" \
|
|
265
|
-
--extract-schema '{"type":"object","properties":{"title":{"type":"string"},"price":{"type":"number"}}}' \
|
|
266
|
-
--llm-key sk-...
|
|
267
|
-
```
|
|
146
|
+
## SDK
|
|
268
147
|
|
|
269
|
-
###
|
|
148
|
+
### TypeScript / Node.js
|
|
270
149
|
|
|
271
|
-
```
|
|
272
|
-
|
|
273
|
-
--llm-extract "top 10 posts with title, score, and comment count" \
|
|
274
|
-
--llm-key $OPENAI_API_KEY \
|
|
275
|
-
--json
|
|
276
|
-
```
|
|
150
|
+
```typescript
|
|
151
|
+
import { WebPeel } from 'webpeel';
|
|
277
152
|
|
|
278
|
-
|
|
279
|
-
> ```js
|
|
280
|
-
> import { peel } from 'webpeel';
|
|
281
|
-
> ```
|
|
282
|
-
> CommonJS `require()` is not supported. If your project uses CommonJS, use dynamic import: `const { peel } = await import('webpeel');`
|
|
153
|
+
const wp = new WebPeel({ apiKey: process.env.WEBPEEL_API_KEY });
|
|
283
154
|
|
|
284
|
-
|
|
285
|
-
|
|
155
|
+
// Fetch a page
|
|
156
|
+
const page = await wp.fetch('https://stripe.com/pricing', {
|
|
157
|
+
format: 'markdown', // 'markdown' | 'html' | 'text' | 'json'
|
|
158
|
+
});
|
|
286
159
|
|
|
287
|
-
//
|
|
288
|
-
const
|
|
289
|
-
|
|
160
|
+
// Search the web
|
|
161
|
+
const results = await wp.search('best vector databases 2025', {
|
|
162
|
+
limit: 5,
|
|
163
|
+
fetchContent: true, // Optionally fetch full content for each result
|
|
290
164
|
});
|
|
291
165
|
|
|
292
|
-
//
|
|
293
|
-
const
|
|
294
|
-
|
|
295
|
-
|
|
166
|
+
// Extract structured data
|
|
167
|
+
const pricing = await wp.extract('https://stripe.com/pricing', {
|
|
168
|
+
schema: {
|
|
169
|
+
type: 'object',
|
|
170
|
+
properties: {
|
|
171
|
+
plans: {
|
|
172
|
+
type: 'array',
|
|
173
|
+
items: { type: 'object', properties: {
|
|
174
|
+
name: { type: 'string' },
|
|
175
|
+
price: { type: 'string' },
|
|
176
|
+
features: { type: 'array', items: { type: 'string' } }
|
|
177
|
+
}}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
296
181
|
});
|
|
297
|
-
```
|
|
298
|
-
</details>
|
|
299
182
|
|
|
300
|
-
|
|
183
|
+
// Crawl a site
|
|
184
|
+
const crawl = await wp.crawl('https://docs.example.com', {
|
|
185
|
+
maxPages: 50,
|
|
186
|
+
maxDepth: 3,
|
|
187
|
+
outputFormat: 'markdown',
|
|
188
|
+
});
|
|
189
|
+
for await (const page of crawl) {
|
|
190
|
+
console.log(page.url, page.markdown);
|
|
191
|
+
}
|
|
301
192
|
|
|
302
|
-
|
|
193
|
+
// Screenshot
|
|
194
|
+
const shot = await wp.screenshot('https://webpeel.dev', { fullPage: true });
|
|
195
|
+
fs.writeFileSync('screenshot.png', shot.image, 'base64');
|
|
196
|
+
```
|
|
303
197
|
|
|
304
|
-
|
|
305
|
-
<summary>Supported bot-protection vendors and auto-stealth domains — click to expand</summary>
|
|
198
|
+
[Full TypeScript reference →](https://webpeel.dev/docs/sdk/typescript)
|
|
306
199
|
|
|
307
|
-
|
|
200
|
+
### Python
|
|
308
201
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
202
|
+
```python
|
|
203
|
+
from webpeel import WebPeel
|
|
204
|
+
import os
|
|
205
|
+
|
|
206
|
+
wp = WebPeel(api_key=os.environ["WEBPEEL_API_KEY"])
|
|
207
|
+
|
|
208
|
+
# Fetch a page
|
|
209
|
+
page = wp.fetch("https://stripe.com/pricing", format="markdown")
|
|
210
|
+
print(page.markdown)
|
|
211
|
+
|
|
212
|
+
# Search
|
|
213
|
+
results = wp.search("best vector databases 2025", limit=5)
|
|
214
|
+
for r in results:
|
|
215
|
+
print(r.title, r.url)
|
|
216
|
+
|
|
217
|
+
# Extract structured data
|
|
218
|
+
pricing = wp.extract("https://stripe.com/pricing", schema={
|
|
219
|
+
"type": "object",
|
|
220
|
+
"properties": {
|
|
221
|
+
"plans": {
|
|
222
|
+
"type": "array",
|
|
223
|
+
"items": { "type": "object", "properties": {
|
|
224
|
+
"name": { "type": "string" },
|
|
225
|
+
"price": { "type": "string" }
|
|
226
|
+
}}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
})
|
|
316
230
|
|
|
317
|
-
|
|
231
|
+
# Async client
|
|
232
|
+
from webpeel import AsyncWebPeel
|
|
233
|
+
import asyncio
|
|
318
234
|
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
235
|
+
async def main():
|
|
236
|
+
wp = AsyncWebPeel(api_key=os.environ["WEBPEEL_API_KEY"])
|
|
237
|
+
results = await asyncio.gather(
|
|
238
|
+
wp.fetch("https://site1.com"),
|
|
239
|
+
wp.fetch("https://site2.com"),
|
|
240
|
+
wp.fetch("https://site3.com"),
|
|
241
|
+
)
|
|
322
242
|
|
|
323
|
-
|
|
324
|
-
npx webpeel "https://amazon.com/dp/ASIN"
|
|
243
|
+
asyncio.run(main())
|
|
325
244
|
```
|
|
326
|
-
</details>
|
|
327
|
-
|
|
328
|
-
---
|
|
329
245
|
|
|
330
|
-
|
|
246
|
+
[Full Python reference →](https://webpeel.dev/docs/sdk/python)
|
|
331
247
|
|
|
332
|
-
|
|
333
|
-
<summary>Multi-source hotel search — click to expand</summary>
|
|
248
|
+
### MCP — For AI Agents
|
|
334
249
|
|
|
335
|
-
|
|
250
|
+
Give Claude, Cursor, or any MCP-compatible agent the ability to browse the web.
|
|
336
251
|
|
|
337
|
-
|
|
338
|
-
|
|
252
|
+
**Claude Desktop** (`~/.claude/claude_desktop_config.json`):
|
|
253
|
+
```json
|
|
254
|
+
{
|
|
255
|
+
"mcpServers": {
|
|
256
|
+
"webpeel": {
|
|
257
|
+
"command": "npx",
|
|
258
|
+
"args": ["-y", "webpeel", "mcp"],
|
|
259
|
+
"env": {
|
|
260
|
+
"WEBPEEL_API_KEY": "wp_your_key_here"
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
339
265
|
```
|
|
340
266
|
|
|
341
|
-
|
|
342
|
-
|
|
267
|
+
**Cursor / VS Code** (`.cursor/mcp.json` or `.vscode/mcp.json`):
|
|
268
|
+
```json
|
|
269
|
+
{
|
|
270
|
+
"mcpServers": {
|
|
271
|
+
"webpeel": {
|
|
272
|
+
"command": "npx",
|
|
273
|
+
"args": ["-y", "webpeel", "mcp"],
|
|
274
|
+
"env": {
|
|
275
|
+
"WEBPEEL_API_KEY": "wp_your_key_here"
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
```
|
|
343
281
|
|
|
344
|
-
|
|
282
|
+
Available MCP tools: `fetch`, `search`, `extract`, `crawl`, `screenshot`, `youtube_transcript`, `monitor_start`, `monitor_stop`, `monitor_list`, `batch_fetch`, `map_site`, `diff`, `summarize`, `qa`, `pdf`, `reddit`, `twitter`, `github` — 18 tools total.
|
|
345
283
|
|
|
346
|
-
|
|
284
|
+
[](https://mcp.so/install/webpeel?for=claude)
|
|
285
|
+
[](https://mcp.so/install/webpeel?for=vscode)
|
|
347
286
|
|
|
348
|
-
|
|
349
|
-
|------|------:|:--------------:|:-----:|
|
|
350
|
-
| **Free** | $0/mo | 500/wk | 50/hr |
|
|
351
|
-
| **Pro** | $9/mo | 1,250/wk | 100/hr |
|
|
352
|
-
| **Max** | $29/mo | 6,250/wk | 500/hr |
|
|
287
|
+
[MCP setup guide →](https://webpeel.dev/docs/mcp)
|
|
353
288
|
|
|
354
|
-
|
|
289
|
+
### CLI
|
|
355
290
|
|
|
356
|
-
|
|
291
|
+
```bash
|
|
292
|
+
# Install globally
|
|
293
|
+
npm install -g webpeel
|
|
357
294
|
|
|
358
|
-
|
|
295
|
+
# Fetch a page (outputs clean markdown)
|
|
296
|
+
webpeel "https://news.ycombinator.com"
|
|
359
297
|
|
|
360
|
-
|
|
298
|
+
# Search the web
|
|
299
|
+
webpeel search "typescript orm comparison 2025"
|
|
361
300
|
|
|
362
|
-
|
|
363
|
-
|
|
301
|
+
# Extract structured data
|
|
302
|
+
webpeel extract "https://stripe.com/pricing" --schema pricing-schema.json
|
|
364
303
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
```
|
|
304
|
+
# Crawl a site, save to folder
|
|
305
|
+
webpeel crawl "https://docs.example.com" --output ./docs-dump --max-pages 100
|
|
368
306
|
|
|
369
|
-
|
|
370
|
-
|
|
307
|
+
# Screenshot
|
|
308
|
+
webpeel screenshot "https://webpeel.dev" --full-page --output screenshot.png
|
|
371
309
|
|
|
372
|
-
|
|
310
|
+
# YouTube transcript
|
|
311
|
+
webpeel youtube "https://youtube.com/watch?v=dQw4w9WgXcQ"
|
|
373
312
|
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
print(result.metadata) # title, description, author, ...
|
|
313
|
+
# Ask a question about a page
|
|
314
|
+
webpeel qa "https://openai.com/pricing" --question "How much does GPT-4o cost per million tokens?"
|
|
377
315
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
result = client.scrape("https://protected-site.com", render=True, stealth=True)
|
|
316
|
+
# Output as JSON
|
|
317
|
+
webpeel "https://example.com" --json
|
|
381
318
|
```
|
|
382
319
|
|
|
383
|
-
Sync and async clients. Pure Python 3.8+, zero dependencies. [Full SDK docs →](python-sdk/README.md)
|
|
384
|
-
</details>
|
|
385
|
-
|
|
386
320
|
---
|
|
387
321
|
|
|
388
|
-
##
|
|
322
|
+
## API Reference
|
|
389
323
|
|
|
390
|
-
|
|
391
|
-
git clone https://github.com/webpeel/webpeel.git
|
|
392
|
-
cd webpeel && docker compose up
|
|
393
|
-
```
|
|
394
|
-
|
|
395
|
-
Full REST API at `http://localhost:3000`. AGPL-3.0 licensed. [Self-hosting guide →](SELF_HOST.md)
|
|
324
|
+
Base URL: `https://api.webpeel.dev/v1`
|
|
396
325
|
|
|
397
326
|
```bash
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
```
|
|
327
|
+
# Fetch
|
|
328
|
+
GET /fetch?url=<url>&format=markdown
|
|
401
329
|
|
|
402
|
-
|
|
330
|
+
# Search
|
|
331
|
+
GET /search?q=<query>&limit=10
|
|
403
332
|
|
|
404
|
-
|
|
333
|
+
# Extract
|
|
334
|
+
POST /extract
|
|
335
|
+
{ "url": "...", "schema": { ... } }
|
|
405
336
|
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
npm install && npm run build
|
|
410
|
-
npm test
|
|
411
|
-
```
|
|
337
|
+
# Crawl
|
|
338
|
+
POST /crawl
|
|
339
|
+
{ "url": "...", "maxPages": 50, "maxDepth": 3 }
|
|
412
340
|
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
- **Code:** See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines
|
|
341
|
+
# Screenshot
|
|
342
|
+
GET /screenshot?url=<url>&fullPage=true
|
|
416
343
|
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
344
|
+
# YouTube transcript
|
|
345
|
+
GET /youtube?url=<youtube_url>
|
|
346
|
+
```
|
|
420
347
|
|
|
421
|
-
|
|
348
|
+
All endpoints require `Authorization: Bearer wp_YOUR_KEY`.
|
|
422
349
|
|
|
423
|
-
|
|
424
|
-
<picture>
|
|
425
|
-
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=webpeel/webpeel&type=Date&theme=dark" />
|
|
426
|
-
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=webpeel/webpeel&type=Date" />
|
|
427
|
-
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=webpeel/webpeel&type=Date" width="600" />
|
|
428
|
-
</picture>
|
|
429
|
-
</a>
|
|
350
|
+
[Full API reference →](https://webpeel.dev/docs/api)
|
|
430
351
|
|
|
431
352
|
---
|
|
432
353
|
|
|
433
|
-
##
|
|
434
|
-
|
|
435
|
-
[AGPL-3.0](LICENSE) — free to use, modify, and distribute. If you run a modified version as a network service, you must release your source under AGPL-3.0.
|
|
354
|
+
## Links
|
|
436
355
|
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
356
|
+
- 📖 [Documentation](https://webpeel.dev/docs) — Guides, references, and examples
|
|
357
|
+
- 🚀 [Dashboard](https://app.webpeel.dev) — Manage your API keys and usage
|
|
358
|
+
- 🔌 [API Reference](https://webpeel.dev/docs/api) — Full endpoint documentation
|
|
359
|
+
- 💬 [Discord](https://discord.gg/webpeel) — Community and support
|
|
360
|
+
- 📊 [Status](https://status.webpeel.dev) — Uptime and incidents
|
|
361
|
+
- 💰 [Pricing](https://webpeel.dev/pricing) — Plans and limits
|
|
362
|
+
- 📈 [Benchmarks](https://webpeel.dev/benchmarks) — How we compare
|
|
440
363
|
|
|
441
364
|
---
|
|
442
365
|
|
|
443
366
|
<p align="center">
|
|
444
|
-
|
|
367
|
+
<a href="https://app.webpeel.dev/signup">Get started free →</a>
|
|
445
368
|
</p>
|
|
446
|
-
|
|
447
|
-
© [WebPeel](https://github.com/webpeel)
|