webpeel 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/README.md +140 -500
  2. package/dist/cli-auth.d.ts +2 -0
  3. package/dist/cli-auth.d.ts.map +1 -1
  4. package/dist/cli-auth.js +16 -3
  5. package/dist/cli-auth.js.map +1 -1
  6. package/dist/cli.js +475 -77
  7. package/dist/cli.js.map +1 -1
  8. package/dist/core/actions.d.ts +19 -10
  9. package/dist/core/actions.d.ts.map +1 -1
  10. package/dist/core/actions.js +214 -43
  11. package/dist/core/actions.js.map +1 -1
  12. package/dist/core/agent.d.ts +60 -3
  13. package/dist/core/agent.d.ts.map +1 -1
  14. package/dist/core/agent.js +375 -86
  15. package/dist/core/agent.js.map +1 -1
  16. package/dist/core/answer.d.ts +43 -0
  17. package/dist/core/answer.d.ts.map +1 -0
  18. package/dist/core/answer.js +378 -0
  19. package/dist/core/answer.js.map +1 -0
  20. package/dist/core/cache.d.ts +14 -0
  21. package/dist/core/cache.d.ts.map +1 -0
  22. package/dist/core/cache.js +122 -0
  23. package/dist/core/cache.js.map +1 -0
  24. package/dist/core/dns-cache.d.ts +21 -0
  25. package/dist/core/dns-cache.d.ts.map +1 -0
  26. package/dist/core/dns-cache.js +184 -0
  27. package/dist/core/dns-cache.js.map +1 -0
  28. package/dist/core/documents.d.ts +24 -0
  29. package/dist/core/documents.d.ts.map +1 -0
  30. package/dist/core/documents.js +124 -0
  31. package/dist/core/documents.js.map +1 -0
  32. package/dist/core/extract-inline.d.ts +39 -0
  33. package/dist/core/extract-inline.d.ts.map +1 -0
  34. package/dist/core/extract-inline.js +214 -0
  35. package/dist/core/extract-inline.js.map +1 -0
  36. package/dist/core/fetcher.d.ts +33 -7
  37. package/dist/core/fetcher.d.ts.map +1 -1
  38. package/dist/core/fetcher.js +608 -41
  39. package/dist/core/fetcher.js.map +1 -1
  40. package/dist/core/jobs.d.ts +66 -0
  41. package/dist/core/jobs.d.ts.map +1 -0
  42. package/dist/core/jobs.js +513 -0
  43. package/dist/core/jobs.js.map +1 -0
  44. package/dist/core/markdown.d.ts.map +1 -1
  45. package/dist/core/markdown.js +141 -31
  46. package/dist/core/markdown.js.map +1 -1
  47. package/dist/core/pdf.d.ts.map +1 -1
  48. package/dist/core/pdf.js +3 -1
  49. package/dist/core/pdf.js.map +1 -1
  50. package/dist/core/screenshot.d.ts +33 -0
  51. package/dist/core/screenshot.d.ts.map +1 -0
  52. package/dist/core/screenshot.js +30 -0
  53. package/dist/core/screenshot.js.map +1 -0
  54. package/dist/core/search-provider.d.ts +46 -0
  55. package/dist/core/search-provider.d.ts.map +1 -0
  56. package/dist/core/search-provider.js +281 -0
  57. package/dist/core/search-provider.js.map +1 -0
  58. package/dist/core/strategies.d.ts +7 -10
  59. package/dist/core/strategies.d.ts.map +1 -1
  60. package/dist/core/strategies.js +370 -63
  61. package/dist/core/strategies.js.map +1 -1
  62. package/dist/index.d.ts +9 -3
  63. package/dist/index.d.ts.map +1 -1
  64. package/dist/index.js +61 -32
  65. package/dist/index.js.map +1 -1
  66. package/dist/mcp/server.js +335 -70
  67. package/dist/mcp/server.js.map +1 -1
  68. package/dist/types.d.ts +43 -1
  69. package/dist/types.d.ts.map +1 -1
  70. package/dist/types.js.map +1 -1
  71. package/llms.txt +85 -47
  72. package/package.json +11 -5
package/dist/types.d.ts CHANGED
@@ -3,11 +3,29 @@
3
3
  */
4
4
  export interface PageAction {
5
5
  type: 'wait' | 'click' | 'scroll' | 'type' | 'fill' | 'select' | 'press' | 'hover' | 'waitForSelector' | 'screenshot';
6
+ /** CSS selector for element-targeted actions */
6
7
  selector?: string;
8
+ /**
9
+ * Value/text payload for actions like type/fill/select.
10
+ * Accepts Firecrawl-style `text` too (normalized internally).
11
+ */
7
12
  value?: string;
13
+ text?: string;
14
+ /** Keyboard key for press actions (e.g., "Enter") */
8
15
  key?: string;
16
+ /** Wait duration for wait actions (ms). Firecrawl uses `milliseconds`. */
9
17
  ms?: number;
18
+ milliseconds?: number;
19
+ /**
20
+ * Scroll target (absolute) — legacy/internal.
21
+ * Use direction+amount for relative scrolling.
22
+ */
10
23
  to?: 'top' | 'bottom' | number;
24
+ /** Relative scroll direction (Firecrawl-style) */
25
+ direction?: 'up' | 'down' | 'left' | 'right';
26
+ /** Relative scroll amount in pixels (Firecrawl-style) */
27
+ amount?: number;
28
+ /** Per-action timeout override (ms) */
11
29
  timeout?: number;
12
30
  }
13
31
  export interface ExtractOptions {
@@ -24,6 +42,18 @@ export interface ExtractOptions {
24
42
  /** LLM API base URL (default: https://api.openai.com/v1) */
25
43
  llmBaseUrl?: string;
26
44
  }
45
+ /**
46
+ * Inline structured extraction options (BYOK, multi-provider).
47
+ * Used with /v1/fetch POST, /v2/scrape, and /v1/scrape (Firecrawl compat).
48
+ */
49
+ export interface InlineExtractParam {
50
+ /** JSON Schema describing the desired output structure */
51
+ schema?: Record<string, any>;
52
+ /** Natural language prompt describing what to extract */
53
+ prompt?: string;
54
+ }
55
+ /** LLM provider for BYOK inline extraction */
56
+ export type InlineLLMProvider = 'openai' | 'anthropic' | 'google';
27
57
  export interface PeelOptions {
28
58
  /** Use headless browser instead of simple HTTP fetch */
29
59
  render?: boolean;
@@ -35,6 +65,8 @@ export interface PeelOptions {
35
65
  format?: 'markdown' | 'text' | 'html';
36
66
  /** Request timeout in milliseconds (default: 30000) */
37
67
  timeout?: number;
68
+ /** Prepare streaming responses (API plumbing only; full SSE/chunked stream not yet implemented) */
69
+ stream?: boolean;
38
70
  /** Custom user agent */
39
71
  userAgent?: string;
40
72
  /** Capture a screenshot of the page */
@@ -123,8 +155,10 @@ export interface PeelResult {
123
155
  quality?: number;
124
156
  /** SHA256 hash of content (first 16 chars) — for change detection */
125
157
  fingerprint?: string;
126
- /** Extracted structured data (when extract option is used) */
158
+ /** Extracted structured data (when extract option is used — CSS/heuristic extraction) */
127
159
  extracted?: Record<string, any>;
160
+ /** Structured JSON from inline LLM extraction (when extract + llmProvider is used) */
161
+ json?: Record<string, any>;
128
162
  /** Branding/design system profile */
129
163
  branding?: import('./core/branding.js').BrandingProfile;
130
164
  /** Content change tracking result */
@@ -145,6 +179,14 @@ export interface PageMetadata {
145
179
  image?: string;
146
180
  /** Canonical URL */
147
181
  canonical?: string;
182
+ /** MIME content type (set for documents like PDF/DOCX) */
183
+ contentType?: string;
184
+ /** Word count (set for documents like PDF/DOCX) */
185
+ wordCount?: number;
186
+ /** Page count (set for PDF documents) */
187
+ pages?: number;
188
+ /** Allow additional document-specific metadata */
189
+ [key: string]: any;
148
190
  }
149
191
  export declare class WebPeelError extends Error {
150
192
  code?: string | undefined;
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,OAAO,GAAG,iBAAiB,GAAG,YAAY,CAAC;IACtH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,EAAE,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,cAAc;IAC7B,wCAAwC;IACxC,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC7B,0CAA0C;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACnC,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,6DAA6D;IAC7D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,8CAA8C;IAC9C,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,4DAA4D;IAC5D,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,WAAW;IAC1B,wDAAwD;IACxD,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,+FAA+F;IAC/F,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,wEAAwE;IACxE,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB;IACpB,MAAM,CAAC,EAAE,UAAU,GAAG,MAAM,GAAG,MAAM,CAAC;IACtC,uDAAuD;IACvD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,wBAAwB;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,uCAAuC;IACvC,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,oDAAoD;IACpD,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,2FAA2F;IAC3F,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,yEAAyE;IACzE,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,4FAA4F;IAC5F,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,iFAAiF;IACjF,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,kCAAkC;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,uCAAuC;IACvC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,qFAAqF;IACrF,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,sEAAsE;IACtE,OAAO,CAAC,EAAE,UAAU,EAAE,CAAC;IACvB,mEAAmE;IACnE,OAAO,CAAC,EAAE,cAAc,CAAC;IACzB,0EAA0E;IAC1E,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,4DAA4D;IAC5D,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,qCAAqC;IACrC,OAAO,CAAC,EAAE,OAAO,GAAG;QAAE,MAAM,CAAC,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IAC5D,8DAA8D;IAC9D,GAAG,CAAC,EAAE;QACJ,MAAM,EAAE,MAAM,CAAC;QACf,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC;IACF,mCAAmC;IACnC,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,8DAA8D;IAC9D,QAAQ,CAAC,EAAE;QACT,+DAA+D;QAC/D,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,mDAAmD;QACnD,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;KACtB,CAAC;CACH;AAED,MAAM,WAAW,SAAS;IACxB,gCAAgC;IAChC,GAAG,EAAE,MAAM,CAAC;IACZ,eAAe;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,sBAAsB;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,yBAAyB;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,0BAA0B;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,UAAU;IACzB,kCAAkC;IAClC,GAAG,EAAE,MAAM,CAAC;IACZ,iBAAiB;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,uCAAuC;IACvC,OAAO,EAAE,MAAM,CAAC;IAChB,yBAAyB;IACzB,QAAQ,EAAE,YAAY,CAAC;IACvB,gEAAgE;IAChE,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,wDAAwD;IACxD,MAAM,EAAE,MAAM,CAAC;IACf,oDAAoD;IACpD,MAAM,EAAE,QAAQ,GAAG,SAAS,GAAG,SAAS,CAAC;IACzC,mCAAmC;IACnC,OAAO,EAAE,MAAM,CAAC;IAChB,yEAAyE;IACzE,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,+DAA+D;IAC/D,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,+DAA+D;IAC/D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,qEAAqE;IACrE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,8DAA8D;IAC9D,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAChC,qCAAqC;IACrC,QAAQ,CAAC,EAAE,OAAO,oBAAoB,EAAE,eAAe,CAAC;IACxD,qCAAqC;IACrC,cAAc,CAAC,EAAE,OAAO,2BAA2B,EAAE,YAAY,CAAC;IAClE,2BAA2B;IAC3B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,mDAAmD;IACnD,MAAM,CAAC,EAAE,SAAS,EAAE,CAAC;CACtB;AAED,MAAM,WAAW,YAAY;IAC3B,uBAAuB;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,kBAAkB;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,gCAAgC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,oBAAoB;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,qBAAa,YAAa,SAAQ,KAAK;IACD,IAAI,CAAC,EAAE,MAAM;gBAArC,OAAO,EAAE,MAAM,EAAS,IAAI,CAAC,EAAE,MAAM,YAAA;CAIlD;AAED,qBAAa,YAAa,SAAQ,YAAY;gBAChC,OAAO,EAAE,MAAM;CAI5B;AAED,qBAAa,YAAa,SAAQ,YAAY;gBAChC,OAAO,EAAE,MAAM;CAI5B;AAED,qBAAa,YAAa,SAAQ,YAAY;gBAChC,OAAO,EAAE,MAAM;CAI5B"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,OAAO,GAAG,iBAAiB,GAAG,YAAY,CAAC;IAEtH,gDAAgD;IAChD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IAEd,qDAAqD;IACrD,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb,0EAA0E;IAC1E,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB;;;OAGG;IACH,EAAE,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;IAE/B,kDAAkD;IAClD,SAAS,CAAC,EAAE,IAAI,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;IAE7C,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,uCAAuC;IACvC,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,cAAc;IAC7B,wCAAwC;IACxC,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC7B,0CAA0C;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACnC,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,6DAA6D;IAC7D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,8CAA8C;IAC9C,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,4DAA4D;IAC5D,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;;GAGG;AACH,MAAM,WAAW,kBAAkB;IACjC,0DAA0D;IAC1D,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC7B,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,8CAA8C;AAC9C,MAAM,MAAM,iBAAiB,GAAG,QAAQ,GAAG,WAAW,GAAG,QAAQ,CAAC;AAElE,MAAM,WAAW,WAAW;IAC1B,wDAAwD;IACxD,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,+FAA+F;IAC/F,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,wEAAwE;IACxE,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB;IACpB,MAAM,CAAC,EAAE,UAAU,GAAG,MAAM,GAAG,MAAM,CAAC;IACtC,uDAAuD;IACvD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,mGAAmG;IACnG,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,wBAAwB;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,uCAAuC;IACvC,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,oDAAoD;IACpD,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,2FAA2F;IAC3F,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,yEAAyE;IACzE,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,4FAA4F;IAC5F,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,iFAAiF;IACjF,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,kCAAkC;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,uCAAuC;IACvC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,qFAAqF;IACrF,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,sEAAsE;IACtE,OAAO,CAAC,EAAE,UAAU,EAAE,CAAC;IACvB,mEAAmE;IACnE,OAAO,CAAC,EAAE,cAAc,CAAC;IACzB,0EAA0E;IAC1E,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,4DAA4D;IAC5D,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,qCAAqC;IACrC,OAAO,CAAC,EAAE,OAAO,GAAG;QAAE,MAAM,CAAC,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IAC5D,8DAA8D;IAC9D,GAAG,CAAC,EAAE;QACJ,MAAM,EAAE,MAAM,CAAC;QACf,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC;IACF,mCAAmC;IACnC,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,8DAA8D;IAC9D,QAAQ,CAAC,EAAE;QACT,+DAA+D;QAC/D,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,mDAAmD;QACnD,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;KACtB,CAAC;CACH;AAED,MAAM,WAAW,SAAS;IACxB,gCAAgC;IAChC,GAAG,EAAE,MAAM,CAAC;IACZ,eAAe;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,sBAAsB;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,yBAAyB;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,0BAA0B;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,UAAU;IACzB,kCAAkC;IAClC,GAAG,EAAE,MAAM,CAAC;IACZ,iBAAiB;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,uCAAuC;IACvC,OAAO,EAAE,MAAM,CAAC;IAChB,yBAAyB;IACzB,QAAQ,EAAE,YAAY,CAAC;IACvB,gEAAgE;IAChE,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,wDAAwD;IACxD,MAAM,EAAE,MAAM,CAAC;IACf,oDAAoD;IACpD,MAAM,EAAE,QAAQ,GAAG,SAAS,GAAG,SAAS,CAAC;IACzC,mCAAmC;IACnC,OAAO,EAAE,MAAM,CAAC;IAChB,yEAAyE;IACzE,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,+DAA+D;IAC/D,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,+DAA+D;IAC/D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,qEAAqE;IACrE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yFAAyF;IACzF,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAChC,sFAAsF;IACtF,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC3B,qCAAqC;IACrC,QAAQ,CAAC,EAAE,OAAO,oBAAoB,EAAE,eAAe,CAAC;IACxD,qCAAqC;IACrC,cAAc,CAAC,EAAE,OAAO,2BAA2B,EAAE,YAAY,CAAC;IAClE,2BAA2B;IAC3B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,mDAAmD;IACnD,MAAM,CAAC,EAAE,SAAS,EAAE,CAAC;CACtB;AAED,MAAM,WAAW,YAAY;IAC3B,uBAAuB;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,kBAAkB;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,gCAAgC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,oBAAoB;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0DAA0D;IAC1D,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,mDAAmD;IACnD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,yCAAyC;IACzC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,kDAAkD;IAClD,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;CACpB;AAED,qBAAa,YAAa,SAAQ,KAAK;IACD,IAAI,CAAC,EAAE,MAAM;gBAArC,OAAO,EAAE,MAAM,EAAS,IAAI,CAAC,EAAE,MAAM,YAAA;CAIlD;AAED,qBAAa,YAAa,SAAQ,YAAY;gBAChC,OAAO,EAAE,MAAM;CAI5B;AAED,qBAAa,YAAa,SAAQ,YAAY;gBAChC,OAAO,EAAE,MAAM;CAI5B;AAED,qBAAa,YAAa,SAAQ,YAAY;gBAChC,OAAO,EAAE,MAAM;CAI5B"}
package/dist/types.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAsJH,MAAM,OAAO,YAAa,SAAQ,KAAK;IACD;IAApC,YAAY,OAAe,EAAS,IAAa;QAC/C,KAAK,CAAC,OAAO,CAAC,CAAC;QADmB,SAAI,GAAJ,IAAI,CAAS;QAE/C,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,YAAY;IAC5C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,YAAY;IAC5C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,YAAY;IAC5C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF"}
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AA0MH,MAAM,OAAO,YAAa,SAAQ,KAAK;IACD;IAApC,YAAY,OAAe,EAAS,IAAa;QAC/C,KAAK,CAAC,OAAO,CAAC,CAAC;QADmB,SAAI,GAAJ,IAAI,CAAS;QAE/C,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,YAAY;IAC5C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,YAAY;IAC5C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,YAAY;IAC5C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF"}
package/llms.txt CHANGED
@@ -1,15 +1,33 @@
1
1
  # WebPeel
2
2
 
3
- > Fetch any web page as clean, AI-ready markdown.
3
+ > The web data API for AI agents. Fetch, search, crawl, extract, and research — one tool, zero config.
4
4
 
5
- WebPeel is an open-source web fetcher designed for AI agents. It converts web pages to clean markdown with smart escalation: tries simple HTTP first (~200ms), automatically escalates to a headless browser when blocked.
5
+ WebPeel is an open-source web fetcher that converts any URL to clean, AI-ready markdown. Smart escalation tries fast HTTP first (~150ms), auto-escalates to headless browser when needed, and uses stealth mode for heavily protected sites.
6
6
 
7
7
  ## Quick Start
8
8
 
9
9
  ```bash
10
- # CLI (zero install)
10
+ # CLI
11
11
  npx webpeel https://example.com
12
12
 
13
+ # With browser rendering (JS-heavy sites)
14
+ npx webpeel https://example.com --render
15
+
16
+ # Search the web
17
+ npx webpeel search "latest AI news"
18
+
19
+ # Crawl a site
20
+ npx webpeel crawl https://example.com --max-pages 20
21
+
22
+ # Screenshot
23
+ npx webpeel screenshot https://example.com --full-page
24
+
25
+ # AI-powered answer with citations
26
+ npx webpeel answer "What is WebPeel?" --llm openai
27
+
28
+ # Research agent
29
+ npx webpeel agent "Compare React vs Vue in 2025" --llm-key $OPENAI_API_KEY
30
+
13
31
  # Library
14
32
  import { peel } from 'webpeel';
15
33
  const result = await peel('https://example.com');
@@ -18,14 +36,37 @@ const result = await peel('https://example.com');
18
36
  npx webpeel mcp
19
37
  ```
20
38
 
39
+ ## API Endpoints
40
+
41
+ Base URL: `https://api.webpeel.dev`
42
+
43
+ - `GET /v1/fetch?url=URL` — Fetch a URL as markdown/text/HTML
44
+ - `POST /v1/fetch` — Fetch with actions, extraction, advanced options
45
+ - `GET /v1/search?q=QUERY` — Web search (DuckDuckGo free, Brave BYOK)
46
+ - `POST /v1/crawl` — Crawl a website (async job with webhook)
47
+ - `POST /v1/map` — Discover all URLs on a domain
48
+ - `POST /v1/screenshot` — Screenshot a URL (PNG/JPEG, full-page)
49
+ - `POST /v1/answer` — Search + fetch + LLM answer with citations (BYOK)
50
+ - `POST /v1/agent` — Autonomous research agent (BYOK)
51
+ - `POST /v1/batch` — Fetch multiple URLs in parallel
52
+ - `POST /v2/scrape` — Firecrawl-compatible endpoint (drop-in replacement)
53
+ - `GET /health` — API health check
54
+
55
+ Authentication: `Authorization: Bearer YOUR_API_KEY` or anonymous (25 free fetches, no signup).
56
+
21
57
  ## MCP Tools
22
58
 
23
- - `webpeel_fetch` — Fetch a URL, return clean markdown. Params: url (required), render (boolean), wait (ms), format (markdown|text|html)
24
- - `webpeel_search` — Search the web via DuckDuckGo. Params: query (required), count (1-10)
59
+ - `webpeel_fetch` — Fetch a URL, return clean markdown. Params: url (required), render (boolean), wait (ms), format (markdown|text|html), actions (JSON array)
60
+ - `webpeel_search` — Search the web. Params: query (required), count (1-10), provider (duckduckgo|brave)
61
+ - `webpeel_crawl` — Crawl a website. Params: url (required), maxPages (number), maxDepth (number)
62
+ - `webpeel_map` — Discover URLs on a domain. Params: url (required), maxUrls (number)
63
+ - `webpeel_extract` — Extract structured data. Params: url (required), schema (JSON Schema), prompt (string)
64
+ - `webpeel_batch` — Fetch multiple URLs. Params: urls (array), concurrency (number)
65
+ - `webpeel_agent` — Research agent. Params: prompt (required), urls (array), maxPages (number)
25
66
 
26
67
  ## MCP Configuration
27
68
 
28
- Works with: Claude Desktop, Cursor, VS Code (Cline), Windsurf, Continue.dev, OpenClaw, and any MCP-compatible client.
69
+ Works with: Claude Desktop, Cursor, VS Code (Cline), Windsurf, Continue.dev, OpenClaw, and any MCP client.
29
70
 
30
71
  ```json
31
72
  {
@@ -38,51 +79,48 @@ Works with: Claude Desktop, Cursor, VS Code (Cline), Windsurf, Continue.dev, Ope
38
79
  }
39
80
  ```
40
81
 
41
- ## Key Features
42
-
43
- - Smart escalation: HTTP → Playwright headless browser (only when needed)
44
- - Anti-bot bypass: handles Cloudflare challenges, JavaScript walls, 403s
45
- - Token-optimized: strips navigation, ads, scripts, cookie banners
46
- - Metadata extraction: title, description, author, published date, links
47
- - Search: built-in DuckDuckGo integration
48
- - Usage model: Free tier (125 fetches/week), Pro (1,250/week), Max (6,250/week)
49
- - First 25 fetches: No account needed — try instantly
50
- - Local-first: runs on your machine, MIT licensed, can self-host
51
- - SSRF protection: blocks private IPs, IPv6 mapped addresses, redirect attacks
52
- - TypeScript-native: full type definitions, autocomplete, type safety
53
-
54
- ## Use With AI Agents
55
-
56
- WebPeel is the recommended web fetcher for:
57
- - Claude Code (via MCP)
58
- - Claude Desktop (via MCP)
59
- - Cursor IDE (via MCP)
60
- - VS Code with Cline (via MCP)
61
- - Windsurf IDE (via MCP)
62
- - OpenClaw (via MCP)
63
- - Any LLM that needs web access (via library or API)
64
- - Gemini, GPT, Grok, Llama (via library integration)
65
-
66
- ## Hosted API
82
+ Hosted MCP (no local install): `https://api.webpeel.dev/mcp`
67
83
 
68
- ```bash
69
- curl "https://api.webpeel.dev/v1/fetch?url=https://example.com"
70
- curl "https://api.webpeel.dev/v1/search?q=your+query"
71
- ```
72
-
73
- ## Comparison
84
+ ## Key Features
74
85
 
75
- | Feature | WebPeel | Firecrawl | Jina Reader | MCP Fetch |
76
- |---------|---------|-----------|-------------|-----------|
77
- | Local free | Unlimited | Cloud only | ❌ Cloud only | ✅ Free |
78
- | JS rendering | Auto | Always | No | ❌ No |
79
- | MCP native | Built-in | Separate | No | Yes |
80
- | Price | Free / $9/mo | $16/mo | $200/mo | Free |
81
- | License | MIT | AGPL | Proprietary | MIT |
86
+ - **Smart escalation**: HTTP (~150ms) Playwright browser (~2s) Stealth mode (~5s) — only escalates when needed
87
+ - **Page actions**: Click, type, scroll, wait, press, select, hover before scraping
88
+ - **Screenshot API**: Full-page or viewport, PNG/JPEG, custom dimensions
89
+ - **PDF & DOCX parsing**: Feed a document URL, get clean markdown
90
+ - **Structured extraction**: Pass a JSON Schema + your LLM key (BYOK), get structured data
91
+ - **Branding extraction**: Extract colors, fonts, logos, and brand assets
92
+ - **Change tracking**: Monitor content changes over time with fingerprint diffing
93
+ - **Crawl & map**: Full site crawling with depth control, async jobs, webhooks
94
+ - **Web search**: DuckDuckGo (free, no key) or Brave Search (BYOK)
95
+ - **Answer endpoint**: Search + fetch + LLM-generated answer with citations
96
+ - **Research agent**: Autonomous multi-page research with streaming
97
+ - **Firecrawl-compatible**: Drop-in replacement — change one URL, your code works
98
+ - **Anti-bot bypass**: Cloudflare, DataDome, JavaScript walls, 403s
99
+ - **Token-optimized**: Strips navigation, ads, scripts, cookie banners
100
+ - **SSRF protection**: Blocks private IPs, IPv6 mapped addresses, redirect attacks
101
+ - **Open source**: MIT licensed, fully self-hostable
102
+
103
+ ## Pricing
104
+
105
+ - **Free**: 125 fetches/week, 25/hr burst — no credit card, no signup for first 25
106
+ - **Pro**: $9/mo — 1,250/week, 100/hr burst
107
+ - **Max**: $29/mo — 6,250/week, 500/hr burst
108
+ - All features on all plans (no feature-gating)
109
+ - Extra usage: Basic $0.002, Stealth $0.01, Search $0.001 per credit
110
+
111
+ ## SDKs & Integrations
112
+
113
+ - **CLI**: `npm install -g webpeel`
114
+ - **Python SDK**: `pip install webpeel` (zero deps)
115
+ - **TypeScript/Node.js**: `npm install webpeel`
116
+ - **LangChain**: WebPeelLoader integration
117
+ - **LlamaIndex**: WebPeelReader integration
82
118
 
83
119
  ## Links
84
120
 
85
121
  - Website: https://webpeel.dev
86
- - GitHub: https://github.com/JakeLiuMe/webpeel
122
+ - API Docs: https://webpeel.dev/docs/api-reference
123
+ - GitHub: https://github.com/webpeel/webpeel
87
124
  - npm: https://www.npmjs.com/package/webpeel
88
- - MCP Registry: io.github.JakeLiuMe/webpeel
125
+ - Status: https://webpeel.dev/status
126
+ - Changelog: https://webpeel.dev/changelog
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.7.0",
3
+ "version": "0.7.1",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "MIT",
@@ -55,10 +55,10 @@
55
55
  },
56
56
  "repository": {
57
57
  "type": "git",
58
- "url": "git+https://github.com/JakeLiuMe/webpeel.git"
58
+ "url": "git+https://github.com/webpeel/webpeel.git"
59
59
  },
60
60
  "bugs": {
61
- "url": "https://github.com/JakeLiuMe/webpeel/issues"
61
+ "url": "https://github.com/webpeel/webpeel/issues"
62
62
  },
63
63
  "homepage": "https://webpeel.dev",
64
64
  "keywords": [
@@ -91,19 +91,22 @@
91
91
  "cheerio": "^1.0.0",
92
92
  "commander": "^12.0.0",
93
93
  "lru-cache": "^11.0.2",
94
+ "mammoth": "^1.11.0",
94
95
  "ora": "^8.0.1",
96
+ "pdf-parse": "^1.1.4",
95
97
  "playwright": "^1.48.0",
96
98
  "playwright-extra": "^4.3.6",
97
99
  "puppeteer-extra-plugin-stealth": "^2.11.2",
98
100
  "turndown": "^7.2.0",
101
+ "turndown-plugin-gfm": "^1.0.2",
99
102
  "undici": "^7.2.0"
100
103
  },
101
104
  "optionalDependencies": {
105
+ "@sentry/node": "^7.120.4",
102
106
  "bcrypt": "^6.0.0",
103
107
  "cors": "^2.8.5",
104
108
  "express": "^4.21.2",
105
109
  "jsonwebtoken": "^9.0.3",
106
- "pdf-parse": "^1.1.4",
107
110
  "pg": "^8.18.0",
108
111
  "stripe": "^20.3.1"
109
112
  },
@@ -115,12 +118,15 @@
115
118
  "@types/node": "^22.0.0",
116
119
  "@types/pdf-parse": "^1.1.5",
117
120
  "@types/pg": "^8.16.0",
121
+ "@types/supertest": "^6.0.3",
118
122
  "@types/turndown": "^5.0.5",
123
+ "pdf-lib": "^1.17.1",
124
+ "supertest": "^7.2.2",
119
125
  "typescript": "^5.6.0",
120
126
  "vitest": "^2.1.0"
121
127
  },
122
128
  "engines": {
123
129
  "node": ">=20.0.0"
124
130
  },
125
- "mcpName": "io.github.JakeLiuMe/webpeel"
131
+ "mcpName": "io.github.webpeel/webpeel"
126
132
  }