webpeel 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +140 -500
- package/dist/cli-auth.d.ts +2 -0
- package/dist/cli-auth.d.ts.map +1 -1
- package/dist/cli-auth.js +16 -3
- package/dist/cli-auth.js.map +1 -1
- package/dist/cli.js +475 -77
- package/dist/cli.js.map +1 -1
- package/dist/core/actions.d.ts +19 -10
- package/dist/core/actions.d.ts.map +1 -1
- package/dist/core/actions.js +214 -43
- package/dist/core/actions.js.map +1 -1
- package/dist/core/agent.d.ts +60 -3
- package/dist/core/agent.d.ts.map +1 -1
- package/dist/core/agent.js +375 -86
- package/dist/core/agent.js.map +1 -1
- package/dist/core/answer.d.ts +43 -0
- package/dist/core/answer.d.ts.map +1 -0
- package/dist/core/answer.js +378 -0
- package/dist/core/answer.js.map +1 -0
- package/dist/core/cache.d.ts +14 -0
- package/dist/core/cache.d.ts.map +1 -0
- package/dist/core/cache.js +122 -0
- package/dist/core/cache.js.map +1 -0
- package/dist/core/dns-cache.d.ts +21 -0
- package/dist/core/dns-cache.d.ts.map +1 -0
- package/dist/core/dns-cache.js +184 -0
- package/dist/core/dns-cache.js.map +1 -0
- package/dist/core/documents.d.ts +24 -0
- package/dist/core/documents.d.ts.map +1 -0
- package/dist/core/documents.js +124 -0
- package/dist/core/documents.js.map +1 -0
- package/dist/core/extract-inline.d.ts +39 -0
- package/dist/core/extract-inline.d.ts.map +1 -0
- package/dist/core/extract-inline.js +214 -0
- package/dist/core/extract-inline.js.map +1 -0
- package/dist/core/fetcher.d.ts +33 -7
- package/dist/core/fetcher.d.ts.map +1 -1
- package/dist/core/fetcher.js +608 -41
- package/dist/core/fetcher.js.map +1 -1
- package/dist/core/jobs.d.ts +66 -0
- package/dist/core/jobs.d.ts.map +1 -0
- package/dist/core/jobs.js +513 -0
- package/dist/core/jobs.js.map +1 -0
- package/dist/core/markdown.d.ts.map +1 -1
- package/dist/core/markdown.js +141 -31
- package/dist/core/markdown.js.map +1 -1
- package/dist/core/pdf.d.ts.map +1 -1
- package/dist/core/pdf.js +3 -1
- package/dist/core/pdf.js.map +1 -1
- package/dist/core/screenshot.d.ts +33 -0
- package/dist/core/screenshot.d.ts.map +1 -0
- package/dist/core/screenshot.js +30 -0
- package/dist/core/screenshot.js.map +1 -0
- package/dist/core/search-provider.d.ts +46 -0
- package/dist/core/search-provider.d.ts.map +1 -0
- package/dist/core/search-provider.js +281 -0
- package/dist/core/search-provider.js.map +1 -0
- package/dist/core/strategies.d.ts +7 -10
- package/dist/core/strategies.d.ts.map +1 -1
- package/dist/core/strategies.js +370 -63
- package/dist/core/strategies.js.map +1 -1
- package/dist/index.d.ts +9 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +61 -32
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +335 -70
- package/dist/mcp/server.js.map +1 -1
- package/dist/types.d.ts +43 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/llms.txt +85 -47
- package/package.json +11 -5
package/dist/types.d.ts
CHANGED
|
@@ -3,11 +3,29 @@
|
|
|
3
3
|
*/
|
|
4
4
|
export interface PageAction {
|
|
5
5
|
type: 'wait' | 'click' | 'scroll' | 'type' | 'fill' | 'select' | 'press' | 'hover' | 'waitForSelector' | 'screenshot';
|
|
6
|
+
/** CSS selector for element-targeted actions */
|
|
6
7
|
selector?: string;
|
|
8
|
+
/**
|
|
9
|
+
* Value/text payload for actions like type/fill/select.
|
|
10
|
+
* Accepts Firecrawl-style `text` too (normalized internally).
|
|
11
|
+
*/
|
|
7
12
|
value?: string;
|
|
13
|
+
text?: string;
|
|
14
|
+
/** Keyboard key for press actions (e.g., "Enter") */
|
|
8
15
|
key?: string;
|
|
16
|
+
/** Wait duration for wait actions (ms). Firecrawl uses `milliseconds`. */
|
|
9
17
|
ms?: number;
|
|
18
|
+
milliseconds?: number;
|
|
19
|
+
/**
|
|
20
|
+
* Scroll target (absolute) — legacy/internal.
|
|
21
|
+
* Use direction+amount for relative scrolling.
|
|
22
|
+
*/
|
|
10
23
|
to?: 'top' | 'bottom' | number;
|
|
24
|
+
/** Relative scroll direction (Firecrawl-style) */
|
|
25
|
+
direction?: 'up' | 'down' | 'left' | 'right';
|
|
26
|
+
/** Relative scroll amount in pixels (Firecrawl-style) */
|
|
27
|
+
amount?: number;
|
|
28
|
+
/** Per-action timeout override (ms) */
|
|
11
29
|
timeout?: number;
|
|
12
30
|
}
|
|
13
31
|
export interface ExtractOptions {
|
|
@@ -24,6 +42,18 @@ export interface ExtractOptions {
|
|
|
24
42
|
/** LLM API base URL (default: https://api.openai.com/v1) */
|
|
25
43
|
llmBaseUrl?: string;
|
|
26
44
|
}
|
|
45
|
+
/**
|
|
46
|
+
* Inline structured extraction options (BYOK, multi-provider).
|
|
47
|
+
* Used with /v1/fetch POST, /v2/scrape, and /v1/scrape (Firecrawl compat).
|
|
48
|
+
*/
|
|
49
|
+
export interface InlineExtractParam {
|
|
50
|
+
/** JSON Schema describing the desired output structure */
|
|
51
|
+
schema?: Record<string, any>;
|
|
52
|
+
/** Natural language prompt describing what to extract */
|
|
53
|
+
prompt?: string;
|
|
54
|
+
}
|
|
55
|
+
/** LLM provider for BYOK inline extraction */
|
|
56
|
+
export type InlineLLMProvider = 'openai' | 'anthropic' | 'google';
|
|
27
57
|
export interface PeelOptions {
|
|
28
58
|
/** Use headless browser instead of simple HTTP fetch */
|
|
29
59
|
render?: boolean;
|
|
@@ -35,6 +65,8 @@ export interface PeelOptions {
|
|
|
35
65
|
format?: 'markdown' | 'text' | 'html';
|
|
36
66
|
/** Request timeout in milliseconds (default: 30000) */
|
|
37
67
|
timeout?: number;
|
|
68
|
+
/** Prepare streaming responses (API plumbing only; full SSE/chunked stream not yet implemented) */
|
|
69
|
+
stream?: boolean;
|
|
38
70
|
/** Custom user agent */
|
|
39
71
|
userAgent?: string;
|
|
40
72
|
/** Capture a screenshot of the page */
|
|
@@ -123,8 +155,10 @@ export interface PeelResult {
|
|
|
123
155
|
quality?: number;
|
|
124
156
|
/** SHA256 hash of content (first 16 chars) — for change detection */
|
|
125
157
|
fingerprint?: string;
|
|
126
|
-
/** Extracted structured data (when extract option is used) */
|
|
158
|
+
/** Extracted structured data (when extract option is used — CSS/heuristic extraction) */
|
|
127
159
|
extracted?: Record<string, any>;
|
|
160
|
+
/** Structured JSON from inline LLM extraction (when extract + llmProvider is used) */
|
|
161
|
+
json?: Record<string, any>;
|
|
128
162
|
/** Branding/design system profile */
|
|
129
163
|
branding?: import('./core/branding.js').BrandingProfile;
|
|
130
164
|
/** Content change tracking result */
|
|
@@ -145,6 +179,14 @@ export interface PageMetadata {
|
|
|
145
179
|
image?: string;
|
|
146
180
|
/** Canonical URL */
|
|
147
181
|
canonical?: string;
|
|
182
|
+
/** MIME content type (set for documents like PDF/DOCX) */
|
|
183
|
+
contentType?: string;
|
|
184
|
+
/** Word count (set for documents like PDF/DOCX) */
|
|
185
|
+
wordCount?: number;
|
|
186
|
+
/** Page count (set for PDF documents) */
|
|
187
|
+
pages?: number;
|
|
188
|
+
/** Allow additional document-specific metadata */
|
|
189
|
+
[key: string]: any;
|
|
148
190
|
}
|
|
149
191
|
export declare class WebPeelError extends Error {
|
|
150
192
|
code?: string | undefined;
|
package/dist/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,OAAO,GAAG,iBAAiB,GAAG,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,OAAO,GAAG,iBAAiB,GAAG,YAAY,CAAC;IAEtH,gDAAgD;IAChD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IAEd,qDAAqD;IACrD,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb,0EAA0E;IAC1E,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB;;;OAGG;IACH,EAAE,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;IAE/B,kDAAkD;IAClD,SAAS,CAAC,EAAE,IAAI,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;IAE7C,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,uCAAuC;IACvC,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,cAAc;IAC7B,wCAAwC;IACxC,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC7B,0CAA0C;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACnC,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,6DAA6D;IAC7D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,8CAA8C;IAC9C,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,4DAA4D;IAC5D,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;;GAGG;AACH,MAAM,WAAW,kBAAkB;IACjC,0DAA0D;IAC1D,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC7B,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,8CAA8C;AAC9C,MAAM,MAAM,iBAAiB,GAAG,QAAQ,GAAG,WAAW,GAAG,QAAQ,CAAC;AAElE,MAAM,WAAW,WAAW;IAC1B,wDAAwD;IACxD,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,+FAA+F;IAC/F,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,wEAAwE;IACxE,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB;IACpB,MAAM,CAAC,EAAE,UAAU,GAAG,MAAM,GAAG,MAAM,CAAC;IACtC,uDAAuD;IACvD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,mGAAmG;IACnG,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,wBAAwB;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,uCAAuC;IACvC,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,oDAAoD;IACpD,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,2FAA2F;IAC3F,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,yEAAyE;IACzE,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,4FAA4F;IAC5F,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,iFAAiF;IACjF,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,kCAAkC;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,uCAAuC;IACvC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,qFAAqF;IACrF,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,sEAAsE;IACtE,OAAO,CAAC,EAAE,UAAU,EAAE,CAAC;IACvB,mEAAmE;IACnE,OAAO,CAAC,EAAE,cAAc,CAAC;IACzB,0EAA0E;IAC1E,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,4DAA4D;IAC5D,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,qCAAqC;IACrC,OAAO,CAAC,EAAE,OAAO,GAAG;QAAE,MAAM,CAAC,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IAC5D,8DAA8D;IAC9D,GAAG,CAAC,EAAE;QACJ,MAAM,EAAE,MAAM,CAAC;QACf,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC;IACF,mCAAmC;IACnC,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,8DAA8D;IAC9D,QAAQ,CAAC,EAAE;QACT,+DAA+D;QAC/D,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,mDAAmD;QACnD,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;KACtB,CAAC;CACH;AAED,MAAM,WAAW,SAAS;IACxB,gCAAgC;IAChC,GAAG,EAAE,MAAM,CAAC;IACZ,eAAe;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,sBAAsB;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,yBAAyB;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,0BAA0B;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,UAAU;IACzB,kCAAkC;IAClC,GAAG,EAAE,MAAM,CAAC;IACZ,iBAAiB;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,uCAAuC;IACvC,OAAO,EAAE,MAAM,CAAC;IAChB,yBAAyB;IACzB,QAAQ,EAAE,YAAY,CAAC;IACvB,gEAAgE;IAChE,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,wDAAwD;IACxD,MAAM,EAAE,MAAM,CAAC;IACf,oDAAoD;IACpD,MAAM,EAAE,QAAQ,GAAG,SAAS,GAAG,SAAS,CAAC;IACzC,mCAAmC;IACnC,OAAO,EAAE,MAAM,CAAC;IAChB,yEAAyE;IACzE,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,+DAA+D;IAC/D,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,+DAA+D;IAC/D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,qEAAqE;IACrE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yFAAyF;IACzF,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAChC,sFAAsF;IACtF,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC3B,qCAAqC;IACrC,QAAQ,CAAC,EAAE,OAAO,oBAAoB,EAAE,eAAe,CAAC;IACxD,qCAAqC;IACrC,cAAc,CAAC,EAAE,OAAO,2BAA2B,EAAE,YAAY,CAAC;IAClE,2BAA2B;IAC3B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,mDAAmD;IACnD,MAAM,CAAC,EAAE,SAAS,EAAE,CAAC;CACtB;AAED,MAAM,WAAW,YAAY;IAC3B,uBAAuB;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,kBAAkB;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,gCAAgC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,oBAAoB;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0DAA0D;IAC1D,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,mDAAmD;IACnD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,yCAAyC;IACzC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,kDAAkD;IAClD,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;CACpB;AAED,qBAAa,YAAa,SAAQ,KAAK;IACD,IAAI,CAAC,EAAE,MAAM;gBAArC,OAAO,EAAE,MAAM,EAAS,IAAI,CAAC,EAAE,MAAM,YAAA;CAIlD;AAED,qBAAa,YAAa,SAAQ,YAAY;gBAChC,OAAO,EAAE,MAAM;CAI5B;AAED,qBAAa,YAAa,SAAQ,YAAY;gBAChC,OAAO,EAAE,MAAM;CAI5B;AAED,qBAAa,YAAa,SAAQ,YAAY;gBAChC,OAAO,EAAE,MAAM;CAI5B"}
|
package/dist/types.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AA0MH,MAAM,OAAO,YAAa,SAAQ,KAAK;IACD;IAApC,YAAY,OAAe,EAAS,IAAa;QAC/C,KAAK,CAAC,OAAO,CAAC,CAAC;QADmB,SAAI,GAAJ,IAAI,CAAS;QAE/C,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,YAAY;IAC5C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,YAAY;IAC5C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,YAAY;IAC5C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF"}
|
package/llms.txt
CHANGED
|
@@ -1,15 +1,33 @@
|
|
|
1
1
|
# WebPeel
|
|
2
2
|
|
|
3
|
-
> Fetch
|
|
3
|
+
> The web data API for AI agents. Fetch, search, crawl, extract, and research — one tool, zero config.
|
|
4
4
|
|
|
5
|
-
WebPeel is an open-source web fetcher
|
|
5
|
+
WebPeel is an open-source web fetcher that converts any URL to clean, AI-ready markdown. Smart escalation tries fast HTTP first (~150ms), auto-escalates to headless browser when needed, and uses stealth mode for heavily protected sites.
|
|
6
6
|
|
|
7
7
|
## Quick Start
|
|
8
8
|
|
|
9
9
|
```bash
|
|
10
|
-
# CLI
|
|
10
|
+
# CLI
|
|
11
11
|
npx webpeel https://example.com
|
|
12
12
|
|
|
13
|
+
# With browser rendering (JS-heavy sites)
|
|
14
|
+
npx webpeel https://example.com --render
|
|
15
|
+
|
|
16
|
+
# Search the web
|
|
17
|
+
npx webpeel search "latest AI news"
|
|
18
|
+
|
|
19
|
+
# Crawl a site
|
|
20
|
+
npx webpeel crawl https://example.com --max-pages 20
|
|
21
|
+
|
|
22
|
+
# Screenshot
|
|
23
|
+
npx webpeel screenshot https://example.com --full-page
|
|
24
|
+
|
|
25
|
+
# AI-powered answer with citations
|
|
26
|
+
npx webpeel answer "What is WebPeel?" --llm openai
|
|
27
|
+
|
|
28
|
+
# Research agent
|
|
29
|
+
npx webpeel agent "Compare React vs Vue in 2025" --llm-key $OPENAI_API_KEY
|
|
30
|
+
|
|
13
31
|
# Library
|
|
14
32
|
import { peel } from 'webpeel';
|
|
15
33
|
const result = await peel('https://example.com');
|
|
@@ -18,14 +36,37 @@ const result = await peel('https://example.com');
|
|
|
18
36
|
npx webpeel mcp
|
|
19
37
|
```
|
|
20
38
|
|
|
39
|
+
## API Endpoints
|
|
40
|
+
|
|
41
|
+
Base URL: `https://api.webpeel.dev`
|
|
42
|
+
|
|
43
|
+
- `GET /v1/fetch?url=URL` — Fetch a URL as markdown/text/HTML
|
|
44
|
+
- `POST /v1/fetch` — Fetch with actions, extraction, advanced options
|
|
45
|
+
- `GET /v1/search?q=QUERY` — Web search (DuckDuckGo free, Brave BYOK)
|
|
46
|
+
- `POST /v1/crawl` — Crawl a website (async job with webhook)
|
|
47
|
+
- `POST /v1/map` — Discover all URLs on a domain
|
|
48
|
+
- `POST /v1/screenshot` — Screenshot a URL (PNG/JPEG, full-page)
|
|
49
|
+
- `POST /v1/answer` — Search + fetch + LLM answer with citations (BYOK)
|
|
50
|
+
- `POST /v1/agent` — Autonomous research agent (BYOK)
|
|
51
|
+
- `POST /v1/batch` — Fetch multiple URLs in parallel
|
|
52
|
+
- `POST /v2/scrape` — Firecrawl-compatible endpoint (drop-in replacement)
|
|
53
|
+
- `GET /health` — API health check
|
|
54
|
+
|
|
55
|
+
Authentication: `Authorization: Bearer YOUR_API_KEY` or anonymous (25 free fetches, no signup).
|
|
56
|
+
|
|
21
57
|
## MCP Tools
|
|
22
58
|
|
|
23
|
-
- `webpeel_fetch` — Fetch a URL, return clean markdown. Params: url (required), render (boolean), wait (ms), format (markdown|text|html)
|
|
24
|
-
- `webpeel_search` — Search the web
|
|
59
|
+
- `webpeel_fetch` — Fetch a URL, return clean markdown. Params: url (required), render (boolean), wait (ms), format (markdown|text|html), actions (JSON array)
|
|
60
|
+
- `webpeel_search` — Search the web. Params: query (required), count (1-10), provider (duckduckgo|brave)
|
|
61
|
+
- `webpeel_crawl` — Crawl a website. Params: url (required), maxPages (number), maxDepth (number)
|
|
62
|
+
- `webpeel_map` — Discover URLs on a domain. Params: url (required), maxUrls (number)
|
|
63
|
+
- `webpeel_extract` — Extract structured data. Params: url (required), schema (JSON Schema), prompt (string)
|
|
64
|
+
- `webpeel_batch` — Fetch multiple URLs. Params: urls (array), concurrency (number)
|
|
65
|
+
- `webpeel_agent` — Research agent. Params: prompt (required), urls (array), maxPages (number)
|
|
25
66
|
|
|
26
67
|
## MCP Configuration
|
|
27
68
|
|
|
28
|
-
Works with: Claude Desktop, Cursor, VS Code (Cline), Windsurf, Continue.dev, OpenClaw, and any MCP
|
|
69
|
+
Works with: Claude Desktop, Cursor, VS Code (Cline), Windsurf, Continue.dev, OpenClaw, and any MCP client.
|
|
29
70
|
|
|
30
71
|
```json
|
|
31
72
|
{
|
|
@@ -38,51 +79,48 @@ Works with: Claude Desktop, Cursor, VS Code (Cline), Windsurf, Continue.dev, Ope
|
|
|
38
79
|
}
|
|
39
80
|
```
|
|
40
81
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
- Smart escalation: HTTP → Playwright headless browser (only when needed)
|
|
44
|
-
- Anti-bot bypass: handles Cloudflare challenges, JavaScript walls, 403s
|
|
45
|
-
- Token-optimized: strips navigation, ads, scripts, cookie banners
|
|
46
|
-
- Metadata extraction: title, description, author, published date, links
|
|
47
|
-
- Search: built-in DuckDuckGo integration
|
|
48
|
-
- Usage model: Free tier (125 fetches/week), Pro (1,250/week), Max (6,250/week)
|
|
49
|
-
- First 25 fetches: No account needed — try instantly
|
|
50
|
-
- Local-first: runs on your machine, MIT licensed, can self-host
|
|
51
|
-
- SSRF protection: blocks private IPs, IPv6 mapped addresses, redirect attacks
|
|
52
|
-
- TypeScript-native: full type definitions, autocomplete, type safety
|
|
53
|
-
|
|
54
|
-
## Use With AI Agents
|
|
55
|
-
|
|
56
|
-
WebPeel is the recommended web fetcher for:
|
|
57
|
-
- Claude Code (via MCP)
|
|
58
|
-
- Claude Desktop (via MCP)
|
|
59
|
-
- Cursor IDE (via MCP)
|
|
60
|
-
- VS Code with Cline (via MCP)
|
|
61
|
-
- Windsurf IDE (via MCP)
|
|
62
|
-
- OpenClaw (via MCP)
|
|
63
|
-
- Any LLM that needs web access (via library or API)
|
|
64
|
-
- Gemini, GPT, Grok, Llama (via library integration)
|
|
65
|
-
|
|
66
|
-
## Hosted API
|
|
82
|
+
Hosted MCP (no local install): `https://api.webpeel.dev/mcp`
|
|
67
83
|
|
|
68
|
-
|
|
69
|
-
curl "https://api.webpeel.dev/v1/fetch?url=https://example.com"
|
|
70
|
-
curl "https://api.webpeel.dev/v1/search?q=your+query"
|
|
71
|
-
```
|
|
72
|
-
|
|
73
|
-
## Comparison
|
|
84
|
+
## Key Features
|
|
74
85
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
86
|
+
- **Smart escalation**: HTTP (~150ms) → Playwright browser (~2s) → Stealth mode (~5s) — only escalates when needed
|
|
87
|
+
- **Page actions**: Click, type, scroll, wait, press, select, hover before scraping
|
|
88
|
+
- **Screenshot API**: Full-page or viewport, PNG/JPEG, custom dimensions
|
|
89
|
+
- **PDF & DOCX parsing**: Feed a document URL, get clean markdown
|
|
90
|
+
- **Structured extraction**: Pass a JSON Schema + your LLM key (BYOK), get structured data
|
|
91
|
+
- **Branding extraction**: Extract colors, fonts, logos, and brand assets
|
|
92
|
+
- **Change tracking**: Monitor content changes over time with fingerprint diffing
|
|
93
|
+
- **Crawl & map**: Full site crawling with depth control, async jobs, webhooks
|
|
94
|
+
- **Web search**: DuckDuckGo (free, no key) or Brave Search (BYOK)
|
|
95
|
+
- **Answer endpoint**: Search + fetch + LLM-generated answer with citations
|
|
96
|
+
- **Research agent**: Autonomous multi-page research with streaming
|
|
97
|
+
- **Firecrawl-compatible**: Drop-in replacement — change one URL, your code works
|
|
98
|
+
- **Anti-bot bypass**: Cloudflare, DataDome, JavaScript walls, 403s
|
|
99
|
+
- **Token-optimized**: Strips navigation, ads, scripts, cookie banners
|
|
100
|
+
- **SSRF protection**: Blocks private IPs, IPv6 mapped addresses, redirect attacks
|
|
101
|
+
- **Open source**: MIT licensed, fully self-hostable
|
|
102
|
+
|
|
103
|
+
## Pricing
|
|
104
|
+
|
|
105
|
+
- **Free**: 125 fetches/week, 25/hr burst — no credit card, no signup for first 25
|
|
106
|
+
- **Pro**: $9/mo — 1,250/week, 100/hr burst
|
|
107
|
+
- **Max**: $29/mo — 6,250/week, 500/hr burst
|
|
108
|
+
- All features on all plans (no feature-gating)
|
|
109
|
+
- Extra usage: Basic $0.002, Stealth $0.01, Search $0.001 per credit
|
|
110
|
+
|
|
111
|
+
## SDKs & Integrations
|
|
112
|
+
|
|
113
|
+
- **CLI**: `npm install -g webpeel`
|
|
114
|
+
- **Python SDK**: `pip install webpeel` (zero deps)
|
|
115
|
+
- **TypeScript/Node.js**: `npm install webpeel`
|
|
116
|
+
- **LangChain**: WebPeelLoader integration
|
|
117
|
+
- **LlamaIndex**: WebPeelReader integration
|
|
82
118
|
|
|
83
119
|
## Links
|
|
84
120
|
|
|
85
121
|
- Website: https://webpeel.dev
|
|
86
|
-
-
|
|
122
|
+
- API Docs: https://webpeel.dev/docs/api-reference
|
|
123
|
+
- GitHub: https://github.com/webpeel/webpeel
|
|
87
124
|
- npm: https://www.npmjs.com/package/webpeel
|
|
88
|
-
-
|
|
125
|
+
- Status: https://webpeel.dev/status
|
|
126
|
+
- Changelog: https://webpeel.dev/changelog
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "webpeel",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.1",
|
|
4
4
|
"description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
|
|
5
5
|
"author": "Jake Liu",
|
|
6
6
|
"license": "MIT",
|
|
@@ -55,10 +55,10 @@
|
|
|
55
55
|
},
|
|
56
56
|
"repository": {
|
|
57
57
|
"type": "git",
|
|
58
|
-
"url": "git+https://github.com/
|
|
58
|
+
"url": "git+https://github.com/webpeel/webpeel.git"
|
|
59
59
|
},
|
|
60
60
|
"bugs": {
|
|
61
|
-
"url": "https://github.com/
|
|
61
|
+
"url": "https://github.com/webpeel/webpeel/issues"
|
|
62
62
|
},
|
|
63
63
|
"homepage": "https://webpeel.dev",
|
|
64
64
|
"keywords": [
|
|
@@ -91,19 +91,22 @@
|
|
|
91
91
|
"cheerio": "^1.0.0",
|
|
92
92
|
"commander": "^12.0.0",
|
|
93
93
|
"lru-cache": "^11.0.2",
|
|
94
|
+
"mammoth": "^1.11.0",
|
|
94
95
|
"ora": "^8.0.1",
|
|
96
|
+
"pdf-parse": "^1.1.4",
|
|
95
97
|
"playwright": "^1.48.0",
|
|
96
98
|
"playwright-extra": "^4.3.6",
|
|
97
99
|
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
|
98
100
|
"turndown": "^7.2.0",
|
|
101
|
+
"turndown-plugin-gfm": "^1.0.2",
|
|
99
102
|
"undici": "^7.2.0"
|
|
100
103
|
},
|
|
101
104
|
"optionalDependencies": {
|
|
105
|
+
"@sentry/node": "^7.120.4",
|
|
102
106
|
"bcrypt": "^6.0.0",
|
|
103
107
|
"cors": "^2.8.5",
|
|
104
108
|
"express": "^4.21.2",
|
|
105
109
|
"jsonwebtoken": "^9.0.3",
|
|
106
|
-
"pdf-parse": "^1.1.4",
|
|
107
110
|
"pg": "^8.18.0",
|
|
108
111
|
"stripe": "^20.3.1"
|
|
109
112
|
},
|
|
@@ -115,12 +118,15 @@
|
|
|
115
118
|
"@types/node": "^22.0.0",
|
|
116
119
|
"@types/pdf-parse": "^1.1.5",
|
|
117
120
|
"@types/pg": "^8.16.0",
|
|
121
|
+
"@types/supertest": "^6.0.3",
|
|
118
122
|
"@types/turndown": "^5.0.5",
|
|
123
|
+
"pdf-lib": "^1.17.1",
|
|
124
|
+
"supertest": "^7.2.2",
|
|
119
125
|
"typescript": "^5.6.0",
|
|
120
126
|
"vitest": "^2.1.0"
|
|
121
127
|
},
|
|
122
128
|
"engines": {
|
|
123
129
|
"node": ">=20.0.0"
|
|
124
130
|
},
|
|
125
|
-
"mcpName": "io.github.
|
|
131
|
+
"mcpName": "io.github.webpeel/webpeel"
|
|
126
132
|
}
|