webpeel 0.21.86 → 0.21.88

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/README.md +89 -279
  2. package/dist/cli/commands/fetch.js +28 -1
  3. package/dist/cli/commands/monitor.d.ts +12 -0
  4. package/dist/cli/commands/monitor.js +197 -0
  5. package/dist/cli/commands/search.js +15 -2
  6. package/dist/cli/utils.js +10 -1
  7. package/dist/cli.js +2 -0
  8. package/dist/core/browser-fetch.d.ts +2 -0
  9. package/dist/core/browser-fetch.js +24 -7
  10. package/dist/core/content-pruner.js +3 -0
  11. package/dist/core/crawler.d.ts +2 -0
  12. package/dist/core/crawler.js +3 -1
  13. package/dist/core/http-fetch.js +19 -2
  14. package/dist/core/markdown.js +38 -5
  15. package/dist/core/metadata.d.ts +7 -0
  16. package/dist/core/metadata.js +27 -1
  17. package/dist/core/pipeline.js +54 -25
  18. package/dist/core/readability.js +2 -1
  19. package/dist/core/schema-templates.js +37 -24
  20. package/dist/core/search-provider.d.ts +2 -0
  21. package/dist/core/search-provider.js +9 -2
  22. package/dist/core/searxng-provider.d.ts +1 -0
  23. package/dist/core/searxng-provider.js +1 -0
  24. package/dist/core/strategies.d.ts +4 -1
  25. package/dist/core/strategies.js +17 -3
  26. package/dist/core/watch-manager.d.ts +5 -1
  27. package/dist/core/watch-manager.js +39 -12
  28. package/dist/core/watch.d.ts +2 -0
  29. package/dist/core/watch.js +31 -9
  30. package/dist/ee/domain-extractors.d.ts +4 -44
  31. package/dist/ee/domain-extractors.js +4 -6338
  32. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  33. package/dist/ee/extractors/allrecipes.js +120 -0
  34. package/dist/ee/extractors/amazon.d.ts +2 -0
  35. package/dist/ee/extractors/amazon.js +78 -0
  36. package/dist/ee/extractors/arxiv.d.ts +2 -0
  37. package/dist/ee/extractors/arxiv.js +137 -0
  38. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  39. package/dist/ee/extractors/bestbuy.js +78 -0
  40. package/dist/ee/extractors/carscom.d.ts +2 -0
  41. package/dist/ee/extractors/carscom.js +121 -0
  42. package/dist/ee/extractors/coingecko.d.ts +2 -0
  43. package/dist/ee/extractors/coingecko.js +134 -0
  44. package/dist/ee/extractors/craigslist.d.ts +2 -0
  45. package/dist/ee/extractors/craigslist.js +92 -0
  46. package/dist/ee/extractors/devto.d.ts +2 -0
  47. package/dist/ee/extractors/devto.js +135 -0
  48. package/dist/ee/extractors/ebay.d.ts +2 -0
  49. package/dist/ee/extractors/ebay.js +90 -0
  50. package/dist/ee/extractors/espn.d.ts +2 -0
  51. package/dist/ee/extractors/espn.js +255 -0
  52. package/dist/ee/extractors/etsy.d.ts +2 -0
  53. package/dist/ee/extractors/etsy.js +52 -0
  54. package/dist/ee/extractors/facebook.d.ts +2 -0
  55. package/dist/ee/extractors/facebook.js +46 -0
  56. package/dist/ee/extractors/github.d.ts +2 -0
  57. package/dist/ee/extractors/github.js +196 -0
  58. package/dist/ee/extractors/google-flights.d.ts +2 -0
  59. package/dist/ee/extractors/google-flights.js +176 -0
  60. package/dist/ee/extractors/hackernews.d.ts +2 -0
  61. package/dist/ee/extractors/hackernews.js +147 -0
  62. package/dist/ee/extractors/imdb.d.ts +2 -0
  63. package/dist/ee/extractors/imdb.js +172 -0
  64. package/dist/ee/extractors/index.d.ts +26 -0
  65. package/dist/ee/extractors/index.js +247 -0
  66. package/dist/ee/extractors/instagram.d.ts +2 -0
  67. package/dist/ee/extractors/instagram.js +102 -0
  68. package/dist/ee/extractors/kalshi.d.ts +2 -0
  69. package/dist/ee/extractors/kalshi.js +115 -0
  70. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  71. package/dist/ee/extractors/kayak-cars.js +270 -0
  72. package/dist/ee/extractors/linkedin.d.ts +2 -0
  73. package/dist/ee/extractors/linkedin.js +113 -0
  74. package/dist/ee/extractors/medium.d.ts +2 -0
  75. package/dist/ee/extractors/medium.js +130 -0
  76. package/dist/ee/extractors/news.d.ts +4 -0
  77. package/dist/ee/extractors/news.js +173 -0
  78. package/dist/ee/extractors/npm.d.ts +2 -0
  79. package/dist/ee/extractors/npm.js +86 -0
  80. package/dist/ee/extractors/pdf.d.ts +2 -0
  81. package/dist/ee/extractors/pdf.js +108 -0
  82. package/dist/ee/extractors/pinterest.d.ts +2 -0
  83. package/dist/ee/extractors/pinterest.js +34 -0
  84. package/dist/ee/extractors/polymarket.d.ts +2 -0
  85. package/dist/ee/extractors/polymarket.js +162 -0
  86. package/dist/ee/extractors/producthunt.d.ts +2 -0
  87. package/dist/ee/extractors/producthunt.js +88 -0
  88. package/dist/ee/extractors/pubmed.d.ts +2 -0
  89. package/dist/ee/extractors/pubmed.js +162 -0
  90. package/dist/ee/extractors/pypi.d.ts +2 -0
  91. package/dist/ee/extractors/pypi.js +80 -0
  92. package/dist/ee/extractors/reddit.d.ts +2 -0
  93. package/dist/ee/extractors/reddit.js +308 -0
  94. package/dist/ee/extractors/redfin.d.ts +2 -0
  95. package/dist/ee/extractors/redfin.js +156 -0
  96. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  97. package/dist/ee/extractors/semanticscholar.js +131 -0
  98. package/dist/ee/extractors/shared.d.ts +12 -0
  99. package/dist/ee/extractors/shared.js +76 -0
  100. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  101. package/dist/ee/extractors/soundcloud.js +34 -0
  102. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  103. package/dist/ee/extractors/sportsbetting.js +37 -0
  104. package/dist/ee/extractors/spotify.d.ts +2 -0
  105. package/dist/ee/extractors/spotify.js +34 -0
  106. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  107. package/dist/ee/extractors/stackoverflow.js +61 -0
  108. package/dist/ee/extractors/substack.d.ts +2 -0
  109. package/dist/ee/extractors/substack.js +115 -0
  110. package/dist/ee/extractors/substackroot.d.ts +2 -0
  111. package/dist/ee/extractors/substackroot.js +46 -0
  112. package/dist/ee/extractors/tiktok.d.ts +2 -0
  113. package/dist/ee/extractors/tiktok.js +29 -0
  114. package/dist/ee/extractors/tradingview.d.ts +2 -0
  115. package/dist/ee/extractors/tradingview.js +176 -0
  116. package/dist/ee/extractors/twitch.d.ts +2 -0
  117. package/dist/ee/extractors/twitch.js +36 -0
  118. package/dist/ee/extractors/twitter.d.ts +2 -0
  119. package/dist/ee/extractors/twitter.js +327 -0
  120. package/dist/ee/extractors/types.d.ts +14 -0
  121. package/dist/ee/extractors/types.js +1 -0
  122. package/dist/ee/extractors/utils.d.ts +12 -0
  123. package/dist/ee/extractors/utils.js +84 -0
  124. package/dist/ee/extractors/walmart.d.ts +2 -0
  125. package/dist/ee/extractors/walmart.js +50 -0
  126. package/dist/ee/extractors/weather.d.ts +2 -0
  127. package/dist/ee/extractors/weather.js +133 -0
  128. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  129. package/dist/ee/extractors/wikipedia.js +235 -0
  130. package/dist/ee/extractors/yelp.d.ts +2 -0
  131. package/dist/ee/extractors/yelp.js +216 -0
  132. package/dist/ee/extractors/youtube.d.ts +2 -0
  133. package/dist/ee/extractors/youtube.js +189 -0
  134. package/dist/ee/extractors/zillow.d.ts +54 -0
  135. package/dist/ee/extractors/zillow.js +247 -0
  136. package/dist/mcp/handlers/definitions.js +37 -16
  137. package/dist/server/app.js +33 -0
  138. package/dist/server/bull-queues.d.ts +1 -0
  139. package/dist/server/email-service.d.ts +30 -0
  140. package/dist/server/email-service.js +86 -0
  141. package/dist/server/middleware/audit-log.d.ts +12 -0
  142. package/dist/server/middleware/audit-log.js +40 -0
  143. package/dist/server/pg-auth-store.d.ts +7 -0
  144. package/dist/server/pg-auth-store.js +39 -0
  145. package/dist/server/routes/feed.d.ts +15 -0
  146. package/dist/server/routes/feed.js +311 -0
  147. package/dist/server/routes/fetch-queue.js +1 -0
  148. package/dist/server/routes/fetch.js +153 -17
  149. package/dist/server/routes/go.d.ts +14 -0
  150. package/dist/server/routes/go.js +81 -0
  151. package/dist/server/routes/jobs.js +1 -0
  152. package/dist/server/routes/mcp.js +62 -6
  153. package/dist/server/routes/screenshot.js +3 -0
  154. package/dist/server/routes/search.js +5 -0
  155. package/dist/server/routes/smart-search.d.ts +5 -3
  156. package/dist/server/routes/smart-search.js +1848 -141
  157. package/dist/server/routes/users.js +120 -0
  158. package/dist/types.d.ts +6 -0
  159. package/package.json +29 -4
package/README.md CHANGED
@@ -5,248 +5,61 @@
5
5
  </p>
6
6
 
7
7
  <p align="center">
8
- <a href="https://github.com/webpeel/webpeel/actions/workflows/ci.yml"><img src="https://github.com/webpeel/webpeel/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
9
8
  <a href="https://www.npmjs.com/package/webpeel"><img src="https://img.shields.io/npm/v/webpeel.svg?style=flat-square" alt="npm version"></a>
10
- <a href="https://pypi.org/project/webpeel/"><img src="https://img.shields.io/pypi/v/webpeel.svg?style=flat-square" alt="PyPI version"></a>
9
+ <a href="https://www.npmjs.com/package/webpeel"><img src="https://img.shields.io/npm/dm/webpeel.svg?style=flat-square" alt="npm downloads"></a>
10
+ <a href="https://github.com/webpeel/webpeel/stargazers"><img src="https://img.shields.io/github/stars/webpeel/webpeel?style=flat-square" alt="GitHub stars"></a>
11
11
  <a href="LICENSE"><img src="https://img.shields.io/badge/license-WebPeel%20SDK-blue.svg?style=flat-square" alt="License"></a>
12
- <a href="https://webpeel.dev/status"><img src="https://img.shields.io/badge/status-operational-brightgreen.svg?style=flat-square" alt="Status"></a>
13
- </p>
14
-
15
- <p align="center">
16
- <strong>The web data API for AI agents.</strong><br>
17
- Fetch, search, extract, and understand any webpage — with one API call.
12
+ <a href="https://github.com/webpeel/webpeel/actions/workflows/ci.yml"><img src="https://github.com/webpeel/webpeel/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
18
13
  </p>
19
14
 
20
15
  <p align="center">
21
- <a href="https://webpeel.dev/docs">Docs</a> ·
22
- <a href="https://app.webpeel.dev">Dashboard</a> ·
23
- <a href="https://webpeel.dev/docs/api">API Reference</a> ·
24
- <a href="https://discord.gg/webpeel">Discord</a> ·
25
- <a href="https://webpeel.dev/status">Status</a>
16
+ <strong>The web data platform for AI agents — fetch, search, crawl, extract, monitor, screenshot, and research any URL.</strong>
26
17
  </p>
27
18
 
28
19
  ---
29
20
 
30
- ## Get Started
31
-
32
- ### Install
33
-
34
- ```bash
35
- # Node.js / TypeScript
36
- npm install webpeel
37
-
38
- # Python
39
- pip install webpeel
40
-
41
- # No install — use directly
42
- npx webpeel "https://example.com"
43
- ```
44
-
45
- ### Usage
46
-
47
- **TypeScript**
48
- ```typescript
49
- import { WebPeel } from 'webpeel';
50
-
51
- const wp = new WebPeel({ apiKey: process.env.WEBPEEL_API_KEY });
52
- const result = await wp.fetch('https://news.ycombinator.com');
53
- console.log(result.markdown); // Clean, structured content
54
- ```
55
-
56
- **Python**
57
- ```python
58
- from webpeel import WebPeel
21
+ ## Quick Start
59
22
 
60
- wp = WebPeel(api_key=os.environ["WEBPEEL_API_KEY"])
61
- result = wp.fetch("https://news.ycombinator.com")
62
- print(result.markdown) # Clean, structured content
63
- ```
64
-
65
- **curl**
66
23
  ```bash
67
- curl "https://api.webpeel.dev/v1/fetch?url=https://example.com" \
68
- -H "Authorization: Bearer $WEBPEEL_API_KEY"
24
+ npx webpeel "https://example.com" # Clean markdown
25
+ npx webpeel search "AI trends 2025" # Web search
26
+ npx webpeel crawl docs.example.com # Crawl entire site
69
27
  ```
70
28
 
71
29
  [Get your free API key →](https://app.webpeel.dev/signup) · No credit card required · 500 requests/week free
72
30
 
73
31
  ---
74
32
 
75
- ## What It Does
76
-
77
- | | Capability | Result |
78
- |---|---|---|
79
- | 🌐 | **Fetch** | Any URL → clean markdown or JSON. Handles JavaScript, bot detection, and dynamic content automatically |
80
- | 🔍 | **Search** | Web search with structured results — titles, URLs, snippets, and optional full-page content |
81
- | 📊 | **Extract** | Pull structured data using JSON Schema. Products, pricing, contacts, tables — any pattern |
82
- | 🕷️ | **Crawl** | Map and scrape entire websites with one API call. Follows links, respects robots.txt |
83
- | 🤖 | **MCP** | 7 tools natively available in Claude, Cursor, VS Code, Windsurf, and any MCP-compatible agent |
84
- | 📸 | **Screenshot** | Full-page or viewport screenshots in PNG/JPEG |
85
- | 🎬 | **YouTube** | Video transcripts with timestamps — no YouTube API key required |
86
- | 👁️ | **Monitor** | Watch pages for changes and receive webhook notifications |
87
-
88
- ---
89
-
90
- ## Anti-Bot Bypass Stack
91
-
92
- WebPeel uses a 4-layer escalation chain to bypass bot protection — all built in-house, no paid proxy services required:
93
-
94
- ```
95
- 1. PeelTLS — Chrome TLS fingerprint spoofing (in-process Go binary) ~85% of sites
96
- 2. CF Worker — Cloudflare edge network proxy (different IP reputation) +5%
97
- 3. Google Cache — Cached page copy if available +2%
98
- 4. Search — Extract from search engine snippets (last resort) last resort
99
- ```
100
-
101
- **For e-commerce sites**, WebPeel uses official APIs before attempting HTML scraping:
102
- - **Best Buy** — Free Products API (50K queries/day). Set `BESTBUY_API_KEY` env var.
103
- - **Walmart** — Frontend API (may be blocked; falls through gracefully)
104
- - **Reddit, GitHub, HN, Wikipedia, YouTube, ArXiv** — Official APIs, always fast
105
-
106
- **Self-hosted CF Worker** (100K requests/day free):
107
- ```bash
108
- cd worker && npx wrangler deploy
109
- # Then set WEBPEEL_CF_WORKER_URL and WEBPEEL_CF_WORKER_TOKEN env vars
110
- ```
111
-
112
- ---
113
-
114
- ## Benchmarks
115
-
116
- Independent testing across 500 URLs including e-commerce, news, SaaS, and social platforms.
117
-
118
- | Metric | **WebPeel** | Firecrawl | Crawl4AI | Jina Reader |
119
- |--------|:-----------:|:---------:|:--------:|:-----------:|
120
- | Success rate (protected sites) | **97.6%** | 71% | 58% | 49% |
121
- | Median response time | **380ms** | 890ms | 1,240ms | 520ms |
122
- | Content quality score¹ | **0.91** | 0.74 | 0.69 | 0.72 |
123
- | Price per 1,000 requests | **$0.80** | $5.33 | self-host | $1.00 |
33
+ ## Why WebPeel
124
34
 
125
- ¹ Content quality = signal-to-noise ratio (relevant content vs boilerplate), scored 0–1.
126
-
127
- > Methodology: Tested Feb 2026. Protected sites = Cloudflare/bot-protected pages. Quality scored by GPT-4o on content relevance and completeness. [Full methodology →](https://webpeel.dev/benchmarks)
35
+ - **65–98% token savings** — domain-specific extractors strip boilerplate, ads, and nav before content reaches your agent
36
+ - **29 domain extractors** — purpose-built parsers for Reddit, Wikipedia, GitHub, Hacker News, YouTube, ArXiv, Amazon, and 22 more
37
+ - **Zero-config Cloudflare bypass** 4-layer escalation stack handles TLS fingerprinting, edge proxying, and cache fallback automatically
128
38
 
129
39
  ---
130
40
 
131
- ## Pricing
132
-
133
- | Plan | Price | Requests | Features |
134
- |------|-------|----------|----------|
135
- | **Free** | $0/mo | 500/week | Fetch, search, extract, crawl |
136
- | **Pro** | $9/mo | 1,250/week | Everything + protected site access |
137
- | **Max** | $29/mo | 6,250/week | Everything + priority queue |
138
- | **Enterprise** | Custom | Unlimited | SLA, dedicated infra, custom domains |
139
-
140
- All plans include: full API access, TypeScript + Python SDKs, MCP server, CLI.
141
- [See full pricing →](https://webpeel.dev/pricing)
41
+ ## Features
42
+
43
+ | Feature | Command / API |
44
+ |---------|---------------|
45
+ | Fetch any URL | `webpeel "url"` |
46
+ | Web search | `webpeel search "query"` |
47
+ | Crawl sites | `webpeel crawl "url" --max-pages 50` |
48
+ | Screenshots | `webpeel screenshot "url"` |
49
+ | Monitor changes | `webpeel monitor "url" --interval 300` |
50
+ | Browser actions | `--action 'click:.btn,wait:2000'` |
51
+ | YouTube transcripts | auto-detected |
52
+ | PDF extraction | auto-detected |
53
+ | MCP server | `webpeel mcp` |
54
+ | Schema extraction | `POST /v1/fetch` with `extract.schema` |
55
+ | Research agent | `POST /v1/agent` |
56
+ | Smart search | `POST /v1/search/smart` |
142
57
 
143
58
  ---
144
59
 
145
- ## SDK
146
-
147
- ### TypeScript / Node.js
148
-
149
- ```typescript
150
- import { WebPeel } from 'webpeel';
151
-
152
- const wp = new WebPeel({ apiKey: process.env.WEBPEEL_API_KEY });
153
-
154
- // Fetch a page
155
- const page = await wp.fetch('https://stripe.com/pricing', {
156
- format: 'markdown', // 'markdown' | 'html' | 'text' | 'json'
157
- });
158
-
159
- // Search the web
160
- const results = await wp.search('best vector databases 2025', {
161
- limit: 5,
162
- fetchContent: true, // Optionally fetch full content for each result
163
- });
164
-
165
- // Extract structured data
166
- const pricing = await wp.extract('https://stripe.com/pricing', {
167
- schema: {
168
- type: 'object',
169
- properties: {
170
- plans: {
171
- type: 'array',
172
- items: { type: 'object', properties: {
173
- name: { type: 'string' },
174
- price: { type: 'string' },
175
- features: { type: 'array', items: { type: 'string' } }
176
- }}
177
- }
178
- }
179
- }
180
- });
181
-
182
- // Crawl a site
183
- const crawl = await wp.crawl('https://docs.example.com', {
184
- maxPages: 50,
185
- maxDepth: 3,
186
- outputFormat: 'markdown',
187
- });
188
- for await (const page of crawl) {
189
- console.log(page.url, page.markdown);
190
- }
60
+ ## MCP Integration
191
61
 
192
- // Screenshot
193
- const shot = await wp.screenshot('https://webpeel.dev', { fullPage: true });
194
- fs.writeFileSync('screenshot.png', shot.image, 'base64');
195
- ```
196
-
197
- [Full TypeScript reference →](https://webpeel.dev/docs/sdk/typescript)
198
-
199
- ### Python
200
-
201
- ```python
202
- from webpeel import WebPeel
203
- import os
204
-
205
- wp = WebPeel(api_key=os.environ["WEBPEEL_API_KEY"])
206
-
207
- # Fetch a page
208
- page = wp.fetch("https://stripe.com/pricing", format="markdown")
209
- print(page.markdown)
210
-
211
- # Search
212
- results = wp.search("best vector databases 2025", limit=5)
213
- for r in results:
214
- print(r.title, r.url)
215
-
216
- # Extract structured data
217
- pricing = wp.extract("https://stripe.com/pricing", schema={
218
- "type": "object",
219
- "properties": {
220
- "plans": {
221
- "type": "array",
222
- "items": { "type": "object", "properties": {
223
- "name": { "type": "string" },
224
- "price": { "type": "string" }
225
- }}
226
- }
227
- }
228
- })
229
-
230
- # Async client
231
- from webpeel import AsyncWebPeel
232
- import asyncio
233
-
234
- async def main():
235
- wp = AsyncWebPeel(api_key=os.environ["WEBPEEL_API_KEY"])
236
- results = await asyncio.gather(
237
- wp.fetch("https://site1.com"),
238
- wp.fetch("https://site2.com"),
239
- wp.fetch("https://site3.com"),
240
- )
241
-
242
- asyncio.run(main())
243
- ```
244
-
245
- [Full Python reference →](https://webpeel.dev/docs/sdk/python)
246
-
247
- ### MCP — For AI Agents
248
-
249
- Give Claude, Cursor, or any MCP-compatible agent the ability to browse the web.
62
+ Give Claude, Cursor, or any MCP-compatible agent the ability to browse the web in one config change.
250
63
 
251
64
  **Claude Desktop** (`~/.claude/claude_desktop_config.json`):
252
65
  ```json
@@ -278,96 +91,93 @@ Give Claude, Cursor, or any MCP-compatible agent the ability to browse the web.
278
91
  }
279
92
  ```
280
93
 
281
- Available MCP tools:
282
- - `webpeel` — general fetch and extract
283
- - `webpeel_read` — fetch and read page content
284
- - `webpeel_see` — screenshot and visual analysis
285
- - `webpeel_find` — web search
286
- - `webpeel_extract` — structured data extraction
287
- - `webpeel_monitor` — watch URLs for changes
288
- - `webpeel_act` — interact with dynamic pages
94
+ Available MCP tools: `webpeel_read`, `webpeel_find`, `webpeel_see`, `webpeel_extract`, `webpeel_monitor`, `webpeel_act`, `webpeel_crawl`
289
95
 
290
- [![Install in Claude Desktop](https://img.shields.io/badge/Install-Claude%20Desktop-5B3FFF?style=for-the-badge&logo=anthropic)](https://mcp.so/install/webpeel?for=claude)
291
- [![Install in VS Code](https://img.shields.io/badge/Install-VS%20Code-007ACC?style=for-the-badge&logo=visualstudiocode)](https://mcp.so/install/webpeel?for=vscode)
96
+ [Full MCP setup guide →](https://webpeel.dev/docs/mcp)
292
97
 
293
- [MCP setup guide →](https://webpeel.dev/docs/mcp)
98
+ ---
294
99
 
295
- ### CLI
100
+ ## API Example
296
101
 
297
102
  ```bash
298
- # Install globally
299
- npm install -g webpeel
300
-
301
- # Fetch a page (outputs clean markdown)
302
- webpeel "https://news.ycombinator.com"
103
+ # Fetch any page — returns clean markdown + metadata
104
+ curl "https://api.webpeel.dev/v1/fetch?url=https://stripe.com/pricing" \
105
+ -H "Authorization: Bearer $WEBPEEL_API_KEY"
106
+ ```
303
107
 
304
- # Search the web
305
- webpeel search "typescript orm comparison 2025"
108
+ ```json
109
+ {
110
+ "url": "https://stripe.com/pricing",
111
+ "markdown": "# Stripe Pricing\n\n**Integrated per-transaction fees**...",
112
+ "metadata": {
113
+ "title": "Pricing & Fees | Stripe",
114
+ "tokens": 420,
115
+ "tokensOriginal": 8200,
116
+ "savingsPct": 94.9
117
+ }
118
+ }
119
+ ```
306
120
 
307
- # Extract structured data with a JSON schema
308
- webpeel "https://stripe.com/pricing" --extract-schema pricing-schema.json
121
+ [Full API reference →](https://webpeel.dev/docs/api)
309
122
 
310
- # Crawl a site
311
- webpeel crawl "https://docs.example.com" --max-pages 100
123
+ ---
312
124
 
313
- # Screenshot
314
- webpeel screenshot "https://webpeel.dev" --full-page --output screenshot.png
125
+ ## Token Efficiency
315
126
 
316
- # YouTube transcript
317
- webpeel "https://youtube.com/watch?v=dQw4w9WgXcQ" --json
127
+ WebPeel's 29 domain-specific extractors strip navigation, ads, sidebars, and boilerplate before sending content to your agent.
318
128
 
319
- # Ask a question about a page
320
- webpeel ask "https://openai.com/pricing" "How much does GPT-4o cost per million tokens?"
129
+ | Site type | Raw HTML tokens | WebPeel tokens | Savings |
130
+ |-----------|:--------------:|:--------------:|:-------:|
131
+ | News article | 18,000 | 640 | **96%** |
132
+ | Reddit thread | 24,000 | 890 | **96%** |
133
+ | Wikipedia page | 31,000 | 2,100 | **93%** |
134
+ | GitHub README | 5,200 | 1,800 | **65%** |
135
+ | E-commerce product | 14,000 | 310 | **98%** |
321
136
 
322
- # Output as JSON
323
- webpeel "https://example.com" --json
324
- ```
137
+ Less context used = lower costs + faster inference + longer agent chains.
325
138
 
326
139
  ---
327
140
 
328
- ## API Reference
141
+ ## Security
329
142
 
330
- Base URL: `https://api.webpeel.dev/v1`
143
+ WebPeel is built with security-first principles:
331
144
 
332
- ```bash
333
- # Fetch
334
- GET /fetch?url=<url>&format=markdown
145
+ - **Helmet.js headers** — HSTS, X-Frame-Options, nosniff, XSS protection on all responses
146
+ - **Webhook signing** — HMAC-SHA256 signatures on all outbound webhooks
147
+ - **Audit logging** — every API call logged with IP, key, and action
148
+ - **GDPR compliant** — `DELETE /v1/account` for full data erasure
149
+ - **SSH hardened** — Fail2Ban, MaxAuthTries, key-only auth on all infrastructure
335
150
 
336
- # Search
337
- GET /search?q=<query>&limit=10
151
+ [Security policy →](https://webpeel.dev/security)
338
152
 
339
- # Extract
340
- POST /extract
341
- { "url": "...", "schema": { ... } }
153
+ ---
342
154
 
343
- # Crawl
344
- POST /crawl
345
- { "url": "...", "maxPages": 50, "maxDepth": 3 }
155
+ ## Links
346
156
 
347
- # Screenshot
348
- GET /screenshot?url=<url>&fullPage=true
157
+ - 📖 [Documentation](https://webpeel.dev/docs) — Guides, references, and examples
158
+ - 💰 [Pricing](https://webpeel.dev/pricing) — Plans and limits
159
+ - 📝 [Blog](https://webpeel.dev/blog) — Tutorials, comparisons, and use cases
160
+ - 📊 [Status](https://webpeel.dev/status) — Uptime and incidents
161
+ - 🔒 [Security](https://webpeel.dev/security) — Security policy and disclosure
162
+ - 📋 [SLA](https://webpeel.dev/sla) — Uptime commitments
349
163
 
350
- # YouTube transcript
351
- GET /youtube?url=<youtube_url>
352
- ```
164
+ ---
353
165
 
354
- All endpoints require `Authorization: Bearer wp_YOUR_KEY`.
166
+ ## Contributing
355
167
 
356
- [Full API reference →](https://webpeel.dev/docs/api)
168
+ Pull requests welcome! Please open an issue first to discuss major changes.
357
169
 
358
- ---
170
+ 1. Fork the repo
171
+ 2. Create your feature branch (`git checkout -b feat/my-feature`)
172
+ 3. Commit your changes (`git commit -m 'feat: add my feature'`)
173
+ 4. Push to the branch (`git push origin feat/my-feature`)
174
+ 5. Open a Pull Request
359
175
 
360
- ## Links
176
+ ---
361
177
 
362
- - 📖 [Documentation](https://webpeel.dev/docs) — Guides, references, and examples
363
- - 🚀 [Dashboard](https://app.webpeel.dev) — Manage your API keys and usage
364
- - 🔌 [API Reference](https://webpeel.dev/docs/api) — Full endpoint documentation
365
- - 💬 [Discord](https://discord.gg/webpeel) — Community and support
366
- - 📊 [Status](https://webpeel.dev/status) — Uptime and incidents
367
- - 💰 [Pricing](https://webpeel.dev/pricing) — Plans and limits
368
- - 📈 [Benchmarks](https://webpeel.dev/benchmarks) — How we compare
178
+ ## License
369
179
 
370
- ---
180
+ [WebPeel SDK License](LICENSE) — free for personal and commercial use with attribution. See LICENSE for full terms.
371
181
 
372
182
  <p align="center">
373
183
  <a href="https://app.webpeel.dev/signup">Get started free →</a>
@@ -4,6 +4,7 @@
4
4
  import ora from 'ora';
5
5
  import { writeFileSync, readFileSync, existsSync } from 'fs';
6
6
  import { getProfilePath, loadStorageState, touchProfile } from '../../core/profiles.js';
7
+ import { shouldForceBrowser } from '../../core/strategies.js';
7
8
  import { peel, cleanup } from '../../index.js';
8
9
  import { checkUsage, showUsageFooter, loadConfig } from '../../cli-auth.js';
9
10
  import { getCache, setCache, parseTTL } from '../../cache.js';
@@ -289,6 +290,7 @@ export async function runFetch(url, options) {
289
290
  format: options.html ? 'html' : options.text ? 'text' : options.clean ? 'clean' : 'markdown',
290
291
  budget: null, // Budget excluded from cache key — cache stores full content
291
292
  readable: options.readable || false,
293
+ noDomainApi: options.skipDomainApi || false, // Different cache for domain-api bypass
292
294
  };
293
295
  const cachedResult = getCache(url, cacheOptions);
294
296
  if (cachedResult) {
@@ -603,6 +605,7 @@ export async function runFetch(url, options) {
603
605
  headers,
604
606
  cookies: options.cookie,
605
607
  raw: options.raw || false,
608
+ noDomainApi: options.skipDomainApi || false,
606
609
  lite: options.lite || false,
607
610
  actions,
608
611
  maxTokens: options.maxTokens,
@@ -626,6 +629,7 @@ export async function runFetch(url, options) {
626
629
  device: options.device,
627
630
  viewportWidth: options.viewport ? options.viewport.width : undefined,
628
631
  viewportHeight: options.viewport ? options.viewport.height : undefined,
632
+ deviceScaleFactor: options.scale,
629
633
  waitUntil: options.waitUntil,
630
634
  waitSelector: options.waitSelector,
631
635
  blockResources: options.blockResources ? options.blockResources.split(',').map((s) => s.trim()) : undefined,
@@ -676,8 +680,19 @@ export async function runFetch(url, options) {
676
680
  const fetchCfg = loadConfig();
677
681
  const fetchApiKey = fetchCfg.apiKey || process.env.WEBPEEL_API_KEY;
678
682
  const fetchApiUrl = process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev';
683
+ // Features that require a local browser and cannot be delegated to the remote API.
684
+ // Also include domains (like amazon.com) that require stealth/browser rendering —
685
+ // the remote API won't render them correctly without special flags, so route locally.
686
+ const domainNeedsLocalBrowser = !!(shouldForceBrowser(url));
687
+ const needsLocalBrowser = !!(peelOptions.screenshot ||
688
+ peelOptions.actions?.length ||
689
+ peelOptions.profileDir ||
690
+ peelOptions.headed ||
691
+ peelOptions.storageState ||
692
+ peelOptions.cloaked ||
693
+ domainNeedsLocalBrowser);
679
694
  let result;
680
- if (fetchApiKey) {
695
+ if (fetchApiKey && !needsLocalBrowser) {
681
696
  // Use the WebPeel API — no local Playwright needed
682
697
  result = await fetchViaApi(url, peelOptions, fetchApiKey, fetchApiUrl);
683
698
  }
@@ -724,6 +739,16 @@ export async function runFetch(url, options) {
724
739
  ? ` [${result.domainData.domain}:${result.domainData.type}]`
725
740
  : '';
726
741
  spinner.succeed(`Fetched in ${result.elapsed}ms using ${result.method} method${domainTag}`);
742
+ // Smart hints — suggest features the user might not know about
743
+ if (!options.silent && !options.json && !options.skipDomainApi) {
744
+ if (result.method === 'domain-api') {
745
+ const extractorName = result.domainData?.domain || new URL(url).hostname.replace('www.', '') || 'domain';
746
+ console.error(`\x1b[33m💡 Tip: Using our ${extractorName} extractor. Want the raw page instead? Add --skip-domain-api\x1b[0m`);
747
+ }
748
+ }
749
+ if (!options.silent && !options.json && result.tokens && result.tokens < 50 && !options.render) {
750
+ console.error(`\x1b[33m💡 Tip: Page returned very little content. Try --render for JavaScript-heavy sites or --stealth if blocked.\x1b[0m`);
751
+ }
727
752
  }
728
753
  // Show metadata header
729
754
  const pageTitle = result.metadata?.title || result.title;
@@ -1176,6 +1201,7 @@ export function registerFetchCommands(program) {
1176
1201
  .option('--images', 'Output image URLs from the page')
1177
1202
  .option('--meta', 'Output only the page metadata (title, description, author, etc.)')
1178
1203
  .option('--raw', 'Return full page without smart content extraction')
1204
+ .option('--skip-domain-api', 'Bypass domain-specific API extractors — force actual page scraping')
1179
1205
  .option('--full', 'Alias for --raw — full page content, no budget')
1180
1206
  .option('--lite', 'Lite mode — minimal processing, maximum speed (skip pruning, budget, metadata)')
1181
1207
  .option('--action <actions...>', 'Page actions before scraping (e.g., "click:.btn" "wait:2000" "scroll:bottom")')
@@ -1208,6 +1234,7 @@ export function registerFetchCommands(program) {
1208
1234
  const [w, h] = val.split('x').map(Number);
1209
1235
  return { width: w, height: h };
1210
1236
  })
1237
+ .option('--scale <factor>', 'Device scale factor (pixel density) for screenshots (default: auto from device profile)', parseFloat)
1211
1238
  .option('--wait-until <event>', 'Page load event: domcontentloaded, networkidle, load, commit (auto-enables --render)')
1212
1239
  .option('--wait-selector <css>', 'Wait for CSS selector before extracting (auto-enables --render)')
1213
1240
  .option('--block-resources <types>', 'Block resource types, comma-separated: image,stylesheet,font,media,script (auto-enables --render)')
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Monitor command: content change detection for URLs
3
+ *
4
+ * Usage:
5
+ * webpeel monitor <url> - Fetch & snapshot (or diff if prev exists)
6
+ * webpeel monitor <url> --interval 300 - Watch mode: re-check every 5 minutes
7
+ * webpeel monitor <url> --json - JSON output for automation
8
+ * webpeel monitor <url> --render - Use browser rendering
9
+ * webpeel monitor <url> --selector <css> - Monitor specific section only
10
+ */
11
+ import type { Command } from 'commander';
12
+ export declare function registerMonitorCommands(program: Command): void;