webpeel 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/README.md +140 -500
  2. package/dist/cli-auth.d.ts +2 -0
  3. package/dist/cli-auth.d.ts.map +1 -1
  4. package/dist/cli-auth.js +16 -3
  5. package/dist/cli-auth.js.map +1 -1
  6. package/dist/cli.js +475 -77
  7. package/dist/cli.js.map +1 -1
  8. package/dist/core/actions.d.ts +19 -10
  9. package/dist/core/actions.d.ts.map +1 -1
  10. package/dist/core/actions.js +214 -43
  11. package/dist/core/actions.js.map +1 -1
  12. package/dist/core/agent.d.ts +60 -3
  13. package/dist/core/agent.d.ts.map +1 -1
  14. package/dist/core/agent.js +375 -86
  15. package/dist/core/agent.js.map +1 -1
  16. package/dist/core/answer.d.ts +43 -0
  17. package/dist/core/answer.d.ts.map +1 -0
  18. package/dist/core/answer.js +378 -0
  19. package/dist/core/answer.js.map +1 -0
  20. package/dist/core/cache.d.ts +14 -0
  21. package/dist/core/cache.d.ts.map +1 -0
  22. package/dist/core/cache.js +122 -0
  23. package/dist/core/cache.js.map +1 -0
  24. package/dist/core/dns-cache.d.ts +21 -0
  25. package/dist/core/dns-cache.d.ts.map +1 -0
  26. package/dist/core/dns-cache.js +184 -0
  27. package/dist/core/dns-cache.js.map +1 -0
  28. package/dist/core/documents.d.ts +24 -0
  29. package/dist/core/documents.d.ts.map +1 -0
  30. package/dist/core/documents.js +124 -0
  31. package/dist/core/documents.js.map +1 -0
  32. package/dist/core/extract-inline.d.ts +39 -0
  33. package/dist/core/extract-inline.d.ts.map +1 -0
  34. package/dist/core/extract-inline.js +214 -0
  35. package/dist/core/extract-inline.js.map +1 -0
  36. package/dist/core/fetcher.d.ts +33 -7
  37. package/dist/core/fetcher.d.ts.map +1 -1
  38. package/dist/core/fetcher.js +608 -41
  39. package/dist/core/fetcher.js.map +1 -1
  40. package/dist/core/jobs.d.ts +66 -0
  41. package/dist/core/jobs.d.ts.map +1 -0
  42. package/dist/core/jobs.js +513 -0
  43. package/dist/core/jobs.js.map +1 -0
  44. package/dist/core/markdown.d.ts.map +1 -1
  45. package/dist/core/markdown.js +141 -31
  46. package/dist/core/markdown.js.map +1 -1
  47. package/dist/core/pdf.d.ts.map +1 -1
  48. package/dist/core/pdf.js +3 -1
  49. package/dist/core/pdf.js.map +1 -1
  50. package/dist/core/screenshot.d.ts +33 -0
  51. package/dist/core/screenshot.d.ts.map +1 -0
  52. package/dist/core/screenshot.js +30 -0
  53. package/dist/core/screenshot.js.map +1 -0
  54. package/dist/core/search-provider.d.ts +46 -0
  55. package/dist/core/search-provider.d.ts.map +1 -0
  56. package/dist/core/search-provider.js +281 -0
  57. package/dist/core/search-provider.js.map +1 -0
  58. package/dist/core/strategies.d.ts +7 -10
  59. package/dist/core/strategies.d.ts.map +1 -1
  60. package/dist/core/strategies.js +370 -63
  61. package/dist/core/strategies.js.map +1 -1
  62. package/dist/index.d.ts +9 -3
  63. package/dist/index.d.ts.map +1 -1
  64. package/dist/index.js +61 -32
  65. package/dist/index.js.map +1 -1
  66. package/dist/mcp/server.js +335 -70
  67. package/dist/mcp/server.js.map +1 -1
  68. package/dist/types.d.ts +43 -1
  69. package/dist/types.d.ts.map +1 -1
  70. package/dist/types.js.map +1 -1
  71. package/llms.txt +85 -47
  72. package/package.json +11 -5
@@ -1,11 +1,16 @@
1
1
  /**
2
2
  * Core fetching logic: simple HTTP and browser-based fetching
3
3
  */
4
+ import type { PageAction } from '../types.js';
4
5
  export interface FetchResult {
6
+ /** Text content (HTML/JSON/XML/plain text). For binary documents, this may be an empty string. */
5
7
  html: string;
8
+ /** Raw response body (used for binary documents like PDFs/DOCX). */
9
+ buffer?: Buffer;
6
10
  url: string;
7
11
  statusCode?: number;
8
12
  screenshot?: Buffer;
13
+ /** Raw Content-Type header from the response (may include charset). */
9
14
  contentType?: string;
10
15
  /** Playwright page object (only available in browser/stealth mode, must be closed by caller) */
11
16
  page?: import('playwright-core').Page;
@@ -17,7 +22,9 @@ export interface FetchResult {
17
22
  * Fast and lightweight, but can be blocked by Cloudflare/bot detection
18
23
  * SECURITY: Manual redirect handling with SSRF re-validation
19
24
  */
20
- export declare function simpleFetch(url: string, userAgent?: string, timeoutMs?: number, customHeaders?: Record<string, string>): Promise<FetchResult>;
25
+ export declare function simpleFetch(url: string, userAgent?: string, timeoutMs?: number, customHeaders?: Record<string, string>, abortSignal?: AbortSignal): Promise<FetchResult>;
26
+ export declare function closePool(): Promise<void>;
27
+ export declare function warmup(): Promise<void>;
21
28
  /**
22
29
  * Fetch using headless Chromium via Playwright
23
30
  * Slower but can handle JavaScript-heavy sites and bypass some bot detection
@@ -31,6 +38,27 @@ export declare function browserFetch(url: string, options?: {
31
38
  headers?: Record<string, string>;
32
39
  cookies?: string[];
33
40
  stealth?: boolean;
41
+ actions?: PageAction[];
42
+ /** Keep the browser page open after fetch (caller must close page + browser) */
43
+ keepPageOpen?: boolean;
44
+ /** Abort signal for internal races/cancellation */
45
+ signal?: AbortSignal;
46
+ }): Promise<FetchResult>;
47
+ /**
48
+ * Retry a fetch operation with exponential backoff
49
+ */
50
+ export declare function browserScreenshot(url: string, options?: {
51
+ fullPage?: boolean;
52
+ width?: number;
53
+ height?: number;
54
+ format?: 'png' | 'jpeg';
55
+ quality?: number;
56
+ waitMs?: number;
57
+ timeoutMs?: number;
58
+ userAgent?: string;
59
+ headers?: Record<string, string>;
60
+ cookies?: string[];
61
+ stealth?: boolean;
34
62
  actions?: Array<{
35
63
  type: 'wait' | 'click' | 'scroll' | 'type' | 'fill' | 'select' | 'press' | 'hover' | 'waitForSelector' | 'screenshot';
36
64
  selector?: string;
@@ -40,12 +68,10 @@ export declare function browserFetch(url: string, options?: {
40
68
  to?: 'top' | 'bottom' | number;
41
69
  timeout?: number;
42
70
  }>;
43
- /** Keep the browser page open after fetch (caller must close page + browser) */
44
- keepPageOpen?: boolean;
45
- }): Promise<FetchResult>;
46
- /**
47
- * Retry a fetch operation with exponential backoff
48
- */
71
+ }): Promise<{
72
+ buffer: Buffer;
73
+ finalUrl: string;
74
+ }>;
49
75
  export declare function retryFetch<T>(fn: () => Promise<T>, maxAttempts?: number, baseDelayMs?: number): Promise<T>;
50
76
  /**
51
77
  * Clean up browser resources
@@ -1 +1 @@
1
- {"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/core/fetcher.ts"],"names":[],"mappings":"AAAA;;GAEG;AAiQH,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,gGAAgG;IAChG,IAAI,CAAC,EAAE,OAAO,iBAAiB,EAAE,IAAI,CAAC;IACtC,mGAAmG;IACnG,OAAO,CAAC,EAAE,OAAO,iBAAiB,EAAE,OAAO,CAAC;CAC7C;AAED;;;;GAIG;AACH,wBAAsB,WAAW,CAC/B,GAAG,EAAE,MAAM,EACX,SAAS,CAAC,EAAE,MAAM,EAClB,SAAS,GAAE,MAAc,EACzB,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GACrC,OAAO,CAAC,WAAW,CAAC,CAgMtB;AAyCD;;;GAGG;AACH,wBAAsB,YAAY,CAChC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IACP,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,OAAO,CAAC,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,OAAO,GAAG,iBAAiB,GAAG,YAAY,CAAC;QACtH,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,EAAE,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;QAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC,CAAC;IACH,gFAAgF;IAChF,YAAY,CAAC,EAAE,OAAO,CAAC;CACnB,GACL,OAAO,CAAC,WAAW,CAAC,CAyLtB;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,CAAC,EAChC,EAAE,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,EACpB,WAAW,GAAE,MAAU,EACvB,WAAW,GAAE,MAAa,GACzB,OAAO,CAAC,CAAC,CAAC,CAsBZ;AAED;;GAEG;AACH,wBAAsB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAS7C"}
1
+ {"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/core/fetcher.ts"],"names":[],"mappings":"AAAA;;GAEG;AAcH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAsX9C,MAAM,WAAW,WAAW;IAC1B,kGAAkG;IAClG,IAAI,EAAE,MAAM,CAAC;IACb,oEAAoE;IACpE,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,uEAAuE;IACvE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,gGAAgG;IAChG,IAAI,CAAC,EAAE,OAAO,iBAAiB,EAAE,IAAI,CAAC;IACtC,mGAAmG;IACnG,OAAO,CAAC,EAAE,OAAO,iBAAiB,EAAE,OAAO,CAAC;CAC7C;AAED;;;;GAIG;AACH,wBAAsB,WAAW,CAC/B,GAAG,EAAE,MAAM,EACX,SAAS,CAAC,EAAE,MAAM,EAClB,SAAS,GAAE,MAAc,EACzB,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EACtC,WAAW,CAAC,EAAE,WAAW,GACxB,OAAO,CAAC,WAAW,CAAC,CA4RtB;AAED,wBAAsB,SAAS,IAAI,OAAO,CAAC,IAAI,CAAC,CAI/C;AA2FD,wBAAsB,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC,CAI5C;AA4CD;;;GAGG;AACH,wBAAsB,YAAY,CAChC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IACP,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,OAAO,CAAC,EAAE,UAAU,EAAE,CAAC;IACvB,gFAAgF;IAChF,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,mDAAmD;IACnD,MAAM,CAAC,EAAE,WAAW,CAAC;CACjB,GACL,OAAO,CAAC,WAAW,CAAC,CAsUtB;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CACrC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IACP,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC;IACxB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,OAAO,CAAC,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,OAAO,GAAG,iBAAiB,GAAG,YAAY,CAAC;QACtH,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,EAAE,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;QAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC,CAAC;CACC,GACL,OAAO,CAAC;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,CAAC,CA2M/C;AAED,wBAAsB,UAAU,CAAC,CAAC,EAChC,EAAE,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,EACpB,WAAW,GAAE,MAAU,EACvB,WAAW,GAAE,MAAa,GACzB,OAAO,CAAC,CAAC,CAAC,CAsBZ;AAED;;GAEG;AACH,wBAAsB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAkB7C"}