webpeel 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/LICENSE +11 -657
  2. package/README.md +246 -325
  3. package/dist/cli.js +330 -73
  4. package/dist/cli.js.map +1 -1
  5. package/dist/core/browser-fetch.d.ts +12 -0
  6. package/dist/core/browser-fetch.d.ts.map +1 -1
  7. package/dist/core/browser-fetch.js +70 -17
  8. package/dist/core/browser-fetch.js.map +1 -1
  9. package/dist/core/cf-worker-proxy.d.ts +33 -0
  10. package/dist/core/cf-worker-proxy.d.ts.map +1 -0
  11. package/dist/core/cf-worker-proxy.js +88 -0
  12. package/dist/core/cf-worker-proxy.js.map +1 -0
  13. package/dist/core/chunker.d.ts +47 -0
  14. package/dist/core/chunker.d.ts.map +1 -0
  15. package/dist/core/chunker.js +250 -0
  16. package/dist/core/chunker.js.map +1 -0
  17. package/dist/core/cloak-fetch.d.ts +43 -0
  18. package/dist/core/cloak-fetch.d.ts.map +1 -0
  19. package/dist/core/cloak-fetch.js +141 -0
  20. package/dist/core/cloak-fetch.js.map +1 -0
  21. package/dist/core/crawl-checkpoint.d.ts +55 -0
  22. package/dist/core/crawl-checkpoint.d.ts.map +1 -0
  23. package/dist/core/crawl-checkpoint.js +105 -0
  24. package/dist/core/crawl-checkpoint.js.map +1 -0
  25. package/dist/core/crawler.d.ts +5 -1
  26. package/dist/core/crawler.d.ts.map +1 -1
  27. package/dist/core/crawler.js +60 -5
  28. package/dist/core/crawler.js.map +1 -1
  29. package/dist/core/cycle-fetch.d.ts +27 -0
  30. package/dist/core/cycle-fetch.d.ts.map +1 -0
  31. package/dist/core/cycle-fetch.js +99 -0
  32. package/dist/core/cycle-fetch.js.map +1 -0
  33. package/dist/core/domain-extractors.d.ts.map +1 -1
  34. package/dist/core/domain-extractors.js +754 -14
  35. package/dist/core/domain-extractors.js.map +1 -1
  36. package/dist/core/google-cache.d.ts +30 -0
  37. package/dist/core/google-cache.d.ts.map +1 -0
  38. package/dist/core/google-cache.js +181 -0
  39. package/dist/core/google-cache.js.map +1 -0
  40. package/dist/core/markdown.d.ts +11 -0
  41. package/dist/core/markdown.d.ts.map +1 -1
  42. package/dist/core/markdown.js +43 -0
  43. package/dist/core/markdown.js.map +1 -1
  44. package/dist/core/peel-tls.d.ts +26 -0
  45. package/dist/core/peel-tls.d.ts.map +1 -0
  46. package/dist/core/peel-tls.js +221 -0
  47. package/dist/core/peel-tls.js.map +1 -0
  48. package/dist/core/pipeline.d.ts +5 -1
  49. package/dist/core/pipeline.d.ts.map +1 -1
  50. package/dist/core/pipeline.js +269 -21
  51. package/dist/core/pipeline.js.map +1 -1
  52. package/dist/core/schema-postprocess.d.ts +33 -0
  53. package/dist/core/schema-postprocess.d.ts.map +1 -0
  54. package/dist/core/schema-postprocess.js +470 -0
  55. package/dist/core/schema-postprocess.js.map +1 -0
  56. package/dist/core/schema-templates.d.ts +20 -0
  57. package/dist/core/schema-templates.d.ts.map +1 -0
  58. package/dist/core/schema-templates.js +131 -0
  59. package/dist/core/schema-templates.js.map +1 -0
  60. package/dist/core/search-fallback.d.ts +28 -0
  61. package/dist/core/search-fallback.d.ts.map +1 -0
  62. package/dist/core/search-fallback.js +185 -0
  63. package/dist/core/search-fallback.js.map +1 -0
  64. package/dist/core/search-provider.d.ts +47 -4
  65. package/dist/core/search-provider.d.ts.map +1 -1
  66. package/dist/core/search-provider.js +278 -7
  67. package/dist/core/search-provider.js.map +1 -1
  68. package/dist/core/stealth-patches.d.ts +58 -0
  69. package/dist/core/stealth-patches.d.ts.map +1 -0
  70. package/dist/core/stealth-patches.js +340 -0
  71. package/dist/core/stealth-patches.js.map +1 -0
  72. package/dist/core/strategies.d.ts +20 -0
  73. package/dist/core/strategies.d.ts.map +1 -1
  74. package/dist/core/strategies.js +284 -48
  75. package/dist/core/strategies.js.map +1 -1
  76. package/dist/core/strategy-hooks.d.ts +1 -1
  77. package/dist/core/strategy-hooks.d.ts.map +1 -1
  78. package/dist/index.d.ts +11 -0
  79. package/dist/index.d.ts.map +1 -1
  80. package/dist/index.js +37 -15
  81. package/dist/index.js.map +1 -1
  82. package/dist/mcp/server.js +109 -4
  83. package/dist/mcp/server.js.map +1 -1
  84. package/dist/server/app.d.ts.map +1 -1
  85. package/dist/server/app.js +29 -0
  86. package/dist/server/app.js.map +1 -1
  87. package/dist/server/middleware/rate-limit.d.ts +2 -1
  88. package/dist/server/middleware/rate-limit.d.ts.map +1 -1
  89. package/dist/server/middleware/rate-limit.js +24 -8
  90. package/dist/server/middleware/rate-limit.js.map +1 -1
  91. package/dist/server/routes/agent.d.ts +4 -0
  92. package/dist/server/routes/agent.d.ts.map +1 -1
  93. package/dist/server/routes/agent.js +196 -9
  94. package/dist/server/routes/agent.js.map +1 -1
  95. package/dist/server/routes/batch.js +5 -5
  96. package/dist/server/routes/batch.js.map +1 -1
  97. package/dist/server/routes/compat.d.ts.map +1 -1
  98. package/dist/server/routes/compat.js +1 -0
  99. package/dist/server/routes/compat.js.map +1 -1
  100. package/dist/server/routes/fetch.d.ts.map +1 -1
  101. package/dist/server/routes/fetch.js +60 -6
  102. package/dist/server/routes/fetch.js.map +1 -1
  103. package/dist/server/routes/mcp.d.ts.map +1 -1
  104. package/dist/server/routes/mcp.js +103 -2
  105. package/dist/server/routes/mcp.js.map +1 -1
  106. package/dist/server/routes/search.js +1 -1
  107. package/dist/server/routes/search.js.map +1 -1
  108. package/dist/types.d.ts +55 -4
  109. package/dist/types.d.ts.map +1 -1
  110. package/dist/types.js +4 -1
  111. package/dist/types.js.map +1 -1
  112. package/llms.txt +55 -125
  113. package/package.json +15 -1
@@ -6,6 +6,7 @@ import { peel } from '../index.js';
6
6
  import { fetch as undiciFetch } from 'undici';
7
7
  import { createHash } from 'crypto';
8
8
  import { discoverSitemap } from './sitemap.js';
9
+ import { generateJobId, loadCheckpoint, saveCheckpoint, deleteCheckpoint, } from './crawl-checkpoint.js';
9
10
  /** Safely compile a user-supplied regex pattern. Rejects patterns longer than 200 chars
10
11
  * and wraps compilation in a try-catch to prevent invalid regex crashes. */
11
12
  function safeRegex(pattern) {
@@ -19,6 +20,15 @@ function safeRegex(pattern) {
19
20
  throw new Error(`Invalid regex pattern: ${pattern}`);
20
21
  }
21
22
  }
23
+ /** Maximum pages allowed per tier */
24
+ const TIER_MAX_PAGES = {
25
+ free: 10,
26
+ starter: 25,
27
+ pro: 50,
28
+ enterprise: 100,
29
+ max: 100,
30
+ admin: 10000,
31
+ };
22
32
  /**
23
33
  * Parse robots.txt and return disallowed paths for User-agent: *
24
34
  */
@@ -106,10 +116,11 @@ function isAllowedByRobots(url, rules) {
106
116
  * ```
107
117
  */
108
118
  export async function crawl(startUrl, options = {}) {
109
- const { maxPages = 10, maxDepth = 2, allowedDomains, excludePatterns = [], respectRobotsTxt = true, rateLimitMs = 1000, sitemapFirst = false, strategy = 'bfs', deduplication = true, includePatterns = [], onProgress, onPage, ...peelOptions } = options;
119
+ const { maxPages = 10, tier, maxDepth = 2, allowedDomains, excludePatterns = [], respectRobotsTxt = true, rateLimitMs = 1000, sitemapFirst = false, strategy = 'bfs', deduplication = true, includePatterns = [], resume = false, onProgress, onPage, ...peelOptions } = options;
110
120
  const crawlStartTime = Date.now();
111
121
  // Validate limits
112
- const validatedMaxPages = Math.min(Math.max(maxPages, 1), 100);
122
+ const tierMaxPages = TIER_MAX_PAGES[tier || 'free'] ?? TIER_MAX_PAGES.free;
123
+ const validatedMaxPages = Math.min(Math.max(maxPages, 1), tierMaxPages);
113
124
  const validatedMaxDepth = Math.min(Math.max(maxDepth, 1), 5);
114
125
  const validatedRateLimit = Math.max(rateLimitMs, 100); // Min 100ms between requests
115
126
  // Parse starting URL
@@ -133,14 +144,40 @@ export async function crawl(startUrl, options = {}) {
133
144
  }
134
145
  }
135
146
  const effectiveRateLimit = robotsRules.crawlDelay || validatedRateLimit;
147
+ // Checkpoint: generate a deterministic job ID for this crawl
148
+ const crawlOptionsForCheckpoint = {
149
+ maxPages: validatedMaxPages,
150
+ maxDepth: validatedMaxDepth,
151
+ includes: includePatterns,
152
+ excludes: excludePatterns,
153
+ };
154
+ const jobId = generateJobId(startUrl, crawlOptionsForCheckpoint);
155
+ // Load existing checkpoint if resume is requested
156
+ const checkpoint = resume ? loadCheckpoint(jobId) : null;
157
+ if (checkpoint) {
158
+ console.error(`[Crawler] Resuming crawl from checkpoint: ${checkpoint.completed.size} pages already crawled`);
159
+ }
136
160
  // State tracking
137
161
  const results = [];
138
162
  const visited = new Set();
139
163
  const contentFingerprints = new Set();
140
164
  let failedCount = 0;
141
- const queue = [
142
- { url: startUrl, depth: 0, parent: null },
143
- ];
165
+ // If resuming, restore visited/results from checkpoint
166
+ if (checkpoint) {
167
+ for (const [url] of checkpoint.completed) {
168
+ visited.add(url);
169
+ }
170
+ }
171
+ const queue = [];
172
+ // If resuming with pending URLs, restore queue; otherwise start from scratch
173
+ if (checkpoint && checkpoint.pending.length > 0) {
174
+ for (const pendingUrl of checkpoint.pending) {
175
+ queue.push({ url: pendingUrl, depth: 1, parent: startUrl });
176
+ }
177
+ }
178
+ else {
179
+ queue.push({ url: startUrl, depth: 0, parent: null });
180
+ }
144
181
  // Sitemap-first: Discover URLs from sitemap before crawling
145
182
  if (sitemapFirst) {
146
183
  try {
@@ -223,6 +260,22 @@ export async function crawl(startUrl, options = {}) {
223
260
  crawlResult.fingerprint = fingerprint;
224
261
  }
225
262
  results.push(crawlResult);
263
+ // Save checkpoint every 5 pages
264
+ if (results.length % 5 === 0) {
265
+ saveCheckpoint({
266
+ jobId,
267
+ startUrl,
268
+ completed: new Map(results
269
+ .filter(r => !r.error)
270
+ .map(r => [r.url, { status: 200, contentLength: r.markdown.length, timestamp: Date.now() }])),
271
+ pending: queue.map(q => q.url),
272
+ discovered: [],
273
+ options: crawlOptionsForCheckpoint,
274
+ startedAt: crawlStartTime,
275
+ lastCheckpoint: Date.now(),
276
+ maxPages: validatedMaxPages,
277
+ });
278
+ }
226
279
  // Call per-page callback with full result
227
280
  if (onPage) {
228
281
  onPage(crawlResult);
@@ -286,6 +339,8 @@ export async function crawl(startUrl, options = {}) {
286
339
  }
287
340
  }
288
341
  }
342
+ // Crawl complete — clean up checkpoint
343
+ deleteCheckpoint(jobId);
289
344
  return results;
290
345
  }
291
346
  //# sourceMappingURL=crawler.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"crawler.js","sourceRoot":"","sources":["../../src/core/crawler.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,IAAI,EAAE,MAAM,aAAa,CAAC;AAEnC,OAAO,EAAE,KAAK,IAAI,WAAW,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAE/C;6EAC6E;AAC7E,SAAS,SAAS,CAAC,OAAe;IAChC,IAAI,OAAO,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,2BAA2B,OAAO,CAAC,MAAM,kBAAkB,CAAC,CAAC;IAC/E,CAAC;IACD,IAAI,CAAC;QACH,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC;IAC7B,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,0BAA0B,OAAO,EAAE,CAAC,CAAC;IACvD,CAAC;AACH,CAAC;AA+DD;;GAEG;AACH,KAAK,UAAU,cAAc,CAAC,MAAc;IAC1C,MAAM,SAAS,GAAG,WAAW,MAAM,aAAa,CAAC;IAEjD,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE;YAC5C,OAAO,EAAE;gBACP,YAAY,EAAE,sCAAsC;aACrD;YACD,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,mBAAmB;SACvD,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,gDAAgD;YAChD,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;QACjC,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAE/B,MAAM,eAAe,GAAa,EAAE,CAAC;QACrC,IAAI,UAA8B,CAAC;QACnC,IAAI,eAAe,GAAG,KAAK,CAAC;QAE5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAE5B,0BAA0B;YAC1B,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;gBACpD,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC7D,eAAe,GAAG,KAAK,KAAK,GAAG,CAAC;gBAChC,SAAS;YACX,CAAC;YAED,IAAI,CAAC,eAAe;gBAAE,SAAS;YAE/B,4BAA4B;YAC5B,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;gBAClD,MAAM,IAAI,GAAG,OAAO,CAAC,SAAS,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC1D,IAAI,IAAI,EAAE,CAAC;oBACT,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAC7B,CAAC;YACH,CAAC;YAED,8BAA8B;YAC9B,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,cAAc,CAAC,EAAE,CAAC;gBACrD,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBACxE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;oBAClB,UAAU,GAAG,KAAK,GAAG,IAAI,CAAC,CAAC,0BAA0B;gBACvD,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACP,iDAAiD;QACjD,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IACjC,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,GAAW,EAAE,KAAkB;IACxD,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IAC5B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC;IAE7B,KAAK,MAAM,UAAU,IAAI,KAAK,CAAC,eAAe,EAAE,CAAC;QAC/C,4EAA4E;QAC5E,IAAI,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAChC,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,CAAC,KAAK,UAAU,KAAK,CACzB,QAAgB,EAChB,UAAwB,EAAE;IAE1B,MAAM,EACJ,QAAQ,GAAG,EAAE,EACb,QAAQ,GAAG,CAAC,EACZ,cAAc,EACd,eAAe,GAAG,EAAE,EACpB,gBAAgB,GAAG,IAAI,EACvB,WAAW,GAAG,IAAI,EAClB,YAAY,GAAG,KAAK,EACpB,QAAQ,GAAG,KAAK,EAChB,aAAa,GAAG,IAAI,EACpB,eAAe,GAAG,EAAE,EACpB,UAAU,EACV,MAAM,EACN,GAAG,WAAW,EACf,GAAG,OAAO,CAAC;IAEZ,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAElC,kBAAkB;IAClB,MAAM,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAC/D,MAAM,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAC7D,MAAM,kBAAkB,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAC,6BAA6B;IAEpF,qBAAqB;IACrB,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,WAAW,GAAG,WAAW,CAAC,QAAQ,CAAC;IAEzC,kDAAkD;IAClD,MAAM,uBAAuB,GAAG,cAAc,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC;QACzE,CAAC,CAAC,cAAc;QAChB,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;IAElB,mEAAmE;IACnE,MAAM,cAAc,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;IAE1E,mEAAmE;IACnE,MAAM,cAAc,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;IAE1E,6BAA6B;IAC7B,IAAI,WAAW,GAAgB,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IACvD,IAAI,gBAAgB,EAAE,CAAC;QACrB,WAAW,GAAG,MAAM,cAAc,CAAC,WAAW,CAAC,CAAC;QAEhD,qEAAqE;QACrE,IAAI,WAAW,CAAC,UAAU,IAAI,WAAW,CAAC,UAAU,GAAG,kBAAkB,EAAE,CAAC;YAC1E,OAAO,CAAC,KAAK,CAAC,gDAAgD,WAAW,CAAC,UAAU,IAAI,CAAC,CAAC;QAC5F,CAAC;IACH,CAAC;IAED,MAAM,kBAAkB,GAAG,WAAW,CAAC,UAAU,IAAI,kBAAkB,CAAC;IAExE,iBAAiB;IACjB,MAAM,OAAO,GAAkB,EAAE,CAAC;IAClC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,MAAM,mBAAmB,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9C,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,MAAM,KAAK,GAAiE;QAC1E,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE;KAC1C,CAAC;IAEF,4DAA4D;IAC5D,IAAI,YAAY,EAAE,CAAC;QACjB,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,eAAe,CAAC,WAAW,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,iBAAiB,EAAE,CAAC,CAAC;YACnG,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;gBACjC,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC;gBAC3B,IAAI,CAAC;oBACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;oBACtC,IAAI,uBAAuB,CAAC,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;wBAC3D,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAC;oBAC5D,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC,CAAC,uBAAuB,CAAC,CAAC;YACrC,CAAC;QACH,CAAC;QAAC,MAAM,CAAC,CAAC,yBAAyB,CAAC,CAAC;IACvC,CAAC;IAED,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,iBAAiB,EAAE,CAAC;QAC9D,0CAA0C;QAC1C,MAAM,IAAI,GAAG,QAAQ,KAAK,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,EAAG,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,EAAG,CAAC;QAChE,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;QAEpC,0BAA0B;QAC1B,IAAI,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,SAAS;QAC/B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAEjB,yBAAyB;QACzB,IAAI,KAAK,GAAG,iBAAiB;YAAE,SAAS;QAExC,eAAe;QACf,IAAI,MAAW,CAAC;QAChB,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACxB,CAAC;QAAC,MAAM,CAAC;YACP,SAAS,CAAC,oBAAoB;QAChC,CAAC;QAED,6BAA6B;QAC7B,IAAI,CAAC,uBAAuB,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;YACvD,SAAS;QACX,CAAC;QAED,yBAAyB;QACzB,IAAI,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAClD,SAAS;QACX,CAAC;QAED,yBAAyB;QACzB,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAChF,SAAS;QACX,CAAC;QAED,mBAAmB;QACnB,IAAI,gBAAgB,IAAI,CAAC,iBAAiB,CAAC,GAAG,EAAE,WAAW,CAAC,EAAE,CAAC;YAC7D,OAAO,CAAC,KAAK,CAAC,sBAAsB,GAAG,6BAA6B,CAAC,CAAC;YACtE,SAAS;QACX,CAAC;QAED,iBAAiB;QACjB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,EAAE;gBAC7B,GAAG,WAAW;gBACd,MAAM,EAAE,UAAU;aACnB,CAAC,CAAC;YAEH,6CAA6C;YAC7C,IAAI,WAA+B,CAAC;YACpC,IAAI,aAAa,EAAE,CAAC;gBAClB,WAAW,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBACxE,IAAI,mBAAmB,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;oBACzC,yBAAyB;oBACzB,SAAS;gBACX,CAAC;gBACD,mBAAmB,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;YACvC,CAAC;YAED,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,MAAM,CAAC,GAAG;gBACf,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,QAAQ,EAAE,MAAM,CAAC,OAAO;gBACxB,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,KAAK;gBACL,MAAM;gBACN,OAAO,EAAE,MAAM,CAAC,OAAO;aACxB,CAAC;YAEF,IAAI,WAAW,EAAE,CAAC;gBAChB,WAAW,CAAC,WAAW,GAAG,WAAW,CAAC;YACxC,CAAC;YAED,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAE1B,0CAA0C;YAC1C,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,CAAC,WAAW,CAAC,CAAC;YACtB,CAAC;YAED,yBAAyB;YACzB,IAAI,UAAU,EAAE,CAAC;gBACf,UAAU,CAAC;oBACT,OAAO,EAAE,OAAO,CAAC,MAAM;oBACvB,MAAM,EAAE,KAAK,CAAC,MAAM;oBACpB,MAAM,EAAE,WAAW;oBACnB,UAAU,EAAE,GAAG;oBACf,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,cAAc;iBACrC,CAAC,CAAC;YACL,CAAC;YAED,gCAAgC;YAChC,IAAI,KAAK,GAAG,iBAAiB,EAAE,CAAC;gBAC9B,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;oBAChC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;wBACvB,KAAK,CAAC,IAAI,CAAC;4BACT,GAAG,EAAE,IAAI;4BACT,KAAK,EAAE,KAAK,GAAG,CAAC;4BAChB,MAAM,EAAE,GAAG;yBACZ,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;YACH,CAAC;YAED,gBAAgB;YAChB,IAAI,OAAO,CAAC,MAAM,GAAG,iBAAiB,EAAE,CAAC;gBACvC,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,kBAAkB,CAAC,CAAC,CAAC;YACxE,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,yBAAyB;YACzB,WAAW,EAAE,CAAC;YACd,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YAC9E,OAAO,CAAC,KAAK,CAAC,6BAA6B,GAAG,KAAK,YAAY,EAAE,CAAC,CAAC;YAEnE,MAAM,WAAW,GAAgB;gBAC/B,GAAG;gBACH,KAAK,EAAE,EAAE;gBACT,QAAQ,EAAE,EAAE;gBACZ,KAAK,EAAE,EAAE;gBACT,KAAK;gBACL,MAAM;gBACN,OAAO,EAAE,CAAC;gBACV,KAAK,EAAE,YAAY;aACpB,CAAC;YAEF,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAE1B,2CAA2C;YAC3C,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,CAAC,WAAW,CAAC,CAAC;YACtB,CAAC;YAED,+CAA+C;YAC/C,IAAI,UAAU,EAAE,CAAC;gBACf,UAAU,CAAC;oBACT,OAAO,EAAE,OAAO,CAAC,MAAM;oBACvB,MAAM,EAAE,KAAK,CAAC,MAAM;oBACpB,MAAM,EAAE,WAAW;oBACnB,UAAU,EAAE,GAAG;oBACf,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,cAAc;iBACrC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
1
+ {"version":3,"file":"crawler.js","sourceRoot":"","sources":["../../src/core/crawler.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,IAAI,EAAE,MAAM,aAAa,CAAC;AAEnC,OAAO,EAAE,KAAK,IAAI,WAAW,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAC/C,OAAO,EACL,aAAa,EACb,cAAc,EACd,cAAc,EACd,gBAAgB,GACjB,MAAM,uBAAuB,CAAC;AAE/B;6EAC6E;AAC7E,SAAS,SAAS,CAAC,OAAe;IAChC,IAAI,OAAO,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,2BAA2B,OAAO,CAAC,MAAM,kBAAkB,CAAC,CAAC;IAC/E,CAAC;IACD,IAAI,CAAC;QACH,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC;IAC7B,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,0BAA0B,OAAO,EAAE,CAAC,CAAC;IACvD,CAAC;AACH,CAAC;AAED,qCAAqC;AACrC,MAAM,cAAc,GAA2B;IAC7C,IAAI,EAAE,EAAE;IACR,OAAO,EAAE,EAAE;IACX,GAAG,EAAE,EAAE;IACP,UAAU,EAAE,GAAG;IACf,GAAG,EAAE,GAAG;IACR,KAAK,EAAE,KAAK;CACb,CAAC;AAmEF;;GAEG;AACH,KAAK,UAAU,cAAc,CAAC,MAAc;IAC1C,MAAM,SAAS,GAAG,WAAW,MAAM,aAAa,CAAC;IAEjD,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE;YAC5C,OAAO,EAAE;gBACP,YAAY,EAAE,sCAAsC;aACrD;YACD,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,mBAAmB;SACvD,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,gDAAgD;YAChD,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;QACjC,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAE/B,MAAM,eAAe,GAAa,EAAE,CAAC;QACrC,IAAI,UAA8B,CAAC;QACnC,IAAI,eAAe,GAAG,KAAK,CAAC;QAE5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAE5B,0BAA0B;YAC1B,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;gBACpD,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC7D,eAAe,GAAG,KAAK,KAAK,GAAG,CAAC;gBAChC,SAAS;YACX,CAAC;YAED,IAAI,CAAC,eAAe;gBAAE,SAAS;YAE/B,4BAA4B;YAC5B,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;gBAClD,MAAM,IAAI,GAAG,OAAO,CAAC,SAAS,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC1D,IAAI,IAAI,EAAE,CAAC;oBACT,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAC7B,CAAC;YACH,CAAC;YAED,8BAA8B;YAC9B,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,cAAc,CAAC,EAAE,CAAC;gBACrD,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBACxE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;oBAClB,UAAU,GAAG,KAAK,GAAG,IAAI,CAAC,CAAC,0BAA0B;gBACvD,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACP,iDAAiD;QACjD,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IACjC,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,GAAW,EAAE,KAAkB;IACxD,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IAC5B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC;IAE7B,KAAK,MAAM,UAAU,IAAI,KAAK,CAAC,eAAe,EAAE,CAAC;QAC/C,4EAA4E;QAC5E,IAAI,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAChC,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,CAAC,KAAK,UAAU,KAAK,CACzB,QAAgB,EAChB,UAAwB,EAAE;IAE1B,MAAM,EACJ,QAAQ,GAAG,EAAE,EACb,IAAI,EACJ,QAAQ,GAAG,CAAC,EACZ,cAAc,EACd,eAAe,GAAG,EAAE,EACpB,gBAAgB,GAAG,IAAI,EACvB,WAAW,GAAG,IAAI,EAClB,YAAY,GAAG,KAAK,EACpB,QAAQ,GAAG,KAAK,EAChB,aAAa,GAAG,IAAI,EACpB,eAAe,GAAG,EAAE,EACpB,MAAM,GAAG,KAAK,EACd,UAAU,EACV,MAAM,EACN,GAAG,WAAW,EACf,GAAG,OAAO,CAAC;IAEZ,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAElC,kBAAkB;IAClB,MAAM,YAAY,GAAG,cAAc,CAAC,IAAI,IAAI,MAAM,CAAC,IAAI,cAAc,CAAC,IAAI,CAAC;IAC3E,MAAM,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC;IACxE,MAAM,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAC7D,MAAM,kBAAkB,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAC,6BAA6B;IAEpF,qBAAqB;IACrB,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,WAAW,GAAG,WAAW,CAAC,QAAQ,CAAC;IAEzC,kDAAkD;IAClD,MAAM,uBAAuB,GAAG,cAAc,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC;QACzE,CAAC,CAAC,cAAc;QAChB,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;IAElB,mEAAmE;IACnE,MAAM,cAAc,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;IAE1E,mEAAmE;IACnE,MAAM,cAAc,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;IAE1E,6BAA6B;IAC7B,IAAI,WAAW,GAAgB,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IACvD,IAAI,gBAAgB,EAAE,CAAC;QACrB,WAAW,GAAG,MAAM,cAAc,CAAC,WAAW,CAAC,CAAC;QAEhD,qEAAqE;QACrE,IAAI,WAAW,CAAC,UAAU,IAAI,WAAW,CAAC,UAAU,GAAG,kBAAkB,EAAE,CAAC;YAC1E,OAAO,CAAC,KAAK,CAAC,gDAAgD,WAAW,CAAC,UAAU,IAAI,CAAC,CAAC;QAC5F,CAAC;IACH,CAAC;IAED,MAAM,kBAAkB,GAAG,WAAW,CAAC,UAAU,IAAI,kBAAkB,CAAC;IAExE,6DAA6D;IAC7D,MAAM,yBAAyB,GAAwB;QACrD,QAAQ,EAAE,iBAAiB;QAC3B,QAAQ,EAAE,iBAAiB;QAC3B,QAAQ,EAAE,eAAe;QACzB,QAAQ,EAAE,eAAe;KAC1B,CAAC;IACF,MAAM,KAAK,GAAG,aAAa,CAAC,QAAQ,EAAE,yBAAyB,CAAC,CAAC;IAEjE,kDAAkD;IAClD,MAAM,UAAU,GAAG,MAAM,CAAC,CAAC,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACzD,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,6CAA6C,UAAU,CAAC,SAAS,CAAC,IAAI,wBAAwB,CAAC,CAAC;IAChH,CAAC;IAED,iBAAiB;IACjB,MAAM,OAAO,GAAkB,EAAE,CAAC;IAClC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,MAAM,mBAAmB,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9C,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,uDAAuD;IACvD,IAAI,UAAU,EAAE,CAAC;QACf,KAAK,MAAM,CAAC,GAAG,CAAC,IAAI,UAAU,CAAC,SAAS,EAAE,CAAC;YACzC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACnB,CAAC;IACH,CAAC;IAED,MAAM,KAAK,GAAiE,EAAE,CAAC;IAE/E,6EAA6E;IAC7E,IAAI,UAAU,IAAI,UAAU,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChD,KAAK,MAAM,UAAU,IAAI,UAAU,CAAC,OAAO,EAAE,CAAC;YAC5C,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAC;QAC9D,CAAC;IACH,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;IACxD,CAAC;IAED,4DAA4D;IAC5D,IAAI,YAAY,EAAE,CAAC;QACjB,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,eAAe,CAAC,WAAW,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,iBAAiB,EAAE,CAAC,CAAC;YACnG,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;gBACjC,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC;gBAC3B,IAAI,CAAC;oBACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;oBACtC,IAAI,uBAAuB,CAAC,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;wBAC3D,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAC;oBAC5D,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC,CAAC,uBAAuB,CAAC,CAAC;YACrC,CAAC;QACH,CAAC;QAAC,MAAM,CAAC,CAAC,yBAAyB,CAAC,CAAC;IACvC,CAAC;IAED,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,iBAAiB,EAAE,CAAC;QAC9D,0CAA0C;QAC1C,MAAM,IAAI,GAAG,QAAQ,KAAK,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,EAAG,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,EAAG,CAAC;QAChE,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;QAEpC,0BAA0B;QAC1B,IAAI,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,SAAS;QAC/B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAEjB,yBAAyB;QACzB,IAAI,KAAK,GAAG,iBAAiB;YAAE,SAAS;QAExC,eAAe;QACf,IAAI,MAAW,CAAC;QAChB,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACxB,CAAC;QAAC,MAAM,CAAC;YACP,SAAS,CAAC,oBAAoB;QAChC,CAAC;QAED,6BAA6B;QAC7B,IAAI,CAAC,uBAAuB,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;YACvD,SAAS;QACX,CAAC;QAED,yBAAyB;QACzB,IAAI,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAClD,SAAS;QACX,CAAC;QAED,yBAAyB;QACzB,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAChF,SAAS;QACX,CAAC;QAED,mBAAmB;QACnB,IAAI,gBAAgB,IAAI,CAAC,iBAAiB,CAAC,GAAG,EAAE,WAAW,CAAC,EAAE,CAAC;YAC7D,OAAO,CAAC,KAAK,CAAC,sBAAsB,GAAG,6BAA6B,CAAC,CAAC;YACtE,SAAS;QACX,CAAC;QAED,iBAAiB;QACjB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,EAAE;gBAC7B,GAAG,WAAW;gBACd,MAAM,EAAE,UAAU;aACnB,CAAC,CAAC;YAEH,6CAA6C;YAC7C,IAAI,WAA+B,CAAC;YACpC,IAAI,aAAa,EAAE,CAAC;gBAClB,WAAW,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBACxE,IAAI,mBAAmB,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;oBACzC,yBAAyB;oBACzB,SAAS;gBACX,CAAC;gBACD,mBAAmB,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;YACvC,CAAC;YAED,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,MAAM,CAAC,GAAG;gBACf,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,QAAQ,EAAE,MAAM,CAAC,OAAO;gBACxB,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,KAAK;gBACL,MAAM;gBACN,OAAO,EAAE,MAAM,CAAC,OAAO;aACxB,CAAC;YAEF,IAAI,WAAW,EAAE,CAAC;gBAChB,WAAW,CAAC,WAAW,GAAG,WAAW,CAAC;YACxC,CAAC;YAED,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAE1B,gCAAgC;YAChC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC7B,cAAc,CAAC;oBACb,KAAK;oBACL,QAAQ;oBACR,SAAS,EAAE,IAAI,GAAG,CAChB,OAAO;yBACJ,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;yBACrB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC,QAAQ,CAAC,MAAM,EAAE,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,CAC/F;oBACD,OAAO,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;oBAC9B,UAAU,EAAE,EAAE;oBACd,OAAO,EAAE,yBAAyB;oBAClC,SAAS,EAAE,cAAc;oBACzB,cAAc,EAAE,IAAI,CAAC,GAAG,EAAE;oBAC1B,QAAQ,EAAE,iBAAiB;iBAC5B,CAAC,CAAC;YACL,CAAC;YAED,0CAA0C;YAC1C,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,CAAC,WAAW,CAAC,CAAC;YACtB,CAAC;YAED,yBAAyB;YACzB,IAAI,UAAU,EAAE,CAAC;gBACf,UAAU,CAAC;oBACT,OAAO,EAAE,OAAO,CAAC,MAAM;oBACvB,MAAM,EAAE,KAAK,CAAC,MAAM;oBACpB,MAAM,EAAE,WAAW;oBACnB,UAAU,EAAE,GAAG;oBACf,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,cAAc;iBACrC,CAAC,CAAC;YACL,CAAC;YAED,gCAAgC;YAChC,IAAI,KAAK,GAAG,iBAAiB,EAAE,CAAC;gBAC9B,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;oBAChC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;wBACvB,KAAK,CAAC,IAAI,CAAC;4BACT,GAAG,EAAE,IAAI;4BACT,KAAK,EAAE,KAAK,GAAG,CAAC;4BAChB,MAAM,EAAE,GAAG;yBACZ,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;YACH,CAAC;YAED,gBAAgB;YAChB,IAAI,OAAO,CAAC,MAAM,GAAG,iBAAiB,EAAE,CAAC;gBACvC,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,kBAAkB,CAAC,CAAC,CAAC;YACxE,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,yBAAyB;YACzB,WAAW,EAAE,CAAC;YACd,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YAC9E,OAAO,CAAC,KAAK,CAAC,6BAA6B,GAAG,KAAK,YAAY,EAAE,CAAC,CAAC;YAEnE,MAAM,WAAW,GAAgB;gBAC/B,GAAG;gBACH,KAAK,EAAE,EAAE;gBACT,QAAQ,EAAE,EAAE;gBACZ,KAAK,EAAE,EAAE;gBACT,KAAK;gBACL,MAAM;gBACN,OAAO,EAAE,CAAC;gBACV,KAAK,EAAE,YAAY;aACpB,CAAC;YAEF,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAE1B,2CAA2C;YAC3C,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,CAAC,WAAW,CAAC,CAAC;YACtB,CAAC;YAED,+CAA+C;YAC/C,IAAI,UAAU,EAAE,CAAC;gBACf,UAAU,CAAC;oBACT,OAAO,EAAE,OAAO,CAAC,MAAM;oBACvB,MAAM,EAAE,KAAK,CAAC,MAAM;oBACpB,MAAM,EAAE,WAAW;oBACnB,UAAU,EAAE,GAAG;oBACf,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,cAAc;iBACrC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,uCAAuC;IACvC,gBAAgB,CAAC,KAAK,CAAC,CAAC;IAExB,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -0,0 +1,27 @@
1
+ /**
2
+ * CycleTLS-based fetching for sites that detect Node.js TLS fingerprints.
3
+ * Uses a Go binary to spoof Chrome's exact TLS handshake.
4
+ *
5
+ * This is tried when stealth browser fetch fails due to TLS/HTTP2 blocks.
6
+ *
7
+ * Requires: npm install cycletls
8
+ * Usage: peel(url, { cycle: true })
9
+ */
10
+ import type { FetchResult } from './fetcher.js';
11
+ export interface CycleFetchOptions {
12
+ timeout?: number;
13
+ proxy?: string;
14
+ headers?: Record<string, string>;
15
+ }
16
+ export interface CycleFetchResult extends FetchResult {
17
+ method: 'cycle';
18
+ }
19
+ /**
20
+ * Check if CycleTLS is installed.
21
+ */
22
+ export declare function isCycleTLSAvailable(): boolean;
23
+ /**
24
+ * Fetch a URL using CycleTLS with Chrome JA3 fingerprint spoofing.
25
+ */
26
+ export declare function cycleFetch(url: string, options?: CycleFetchOptions): Promise<CycleFetchResult>;
27
+ //# sourceMappingURL=cycle-fetch.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cycle-fetch.d.ts","sourceRoot":"","sources":["../../src/core/cycle-fetch.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAIH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAoBhD,MAAM,WAAW,iBAAiB;IAChC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,gBAAiB,SAAQ,WAAW;IACnD,MAAM,EAAE,OAAO,CAAC;CACjB;AAID;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,OAAO,CAY7C;AAuBD;;GAEG;AACH,wBAAsB,UAAU,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAuCpG"}
@@ -0,0 +1,99 @@
1
+ /**
2
+ * CycleTLS-based fetching for sites that detect Node.js TLS fingerprints.
3
+ * Uses a Go binary to spoof Chrome's exact TLS handshake.
4
+ *
5
+ * This is tried when stealth browser fetch fails due to TLS/HTTP2 blocks.
6
+ *
7
+ * Requires: npm install cycletls
8
+ * Usage: peel(url, { cycle: true })
9
+ */
10
+ import { existsSync } from 'fs';
11
+ import { resolve as pathResolve } from 'path';
12
+ // Chrome 134 JA3 fingerprint
13
+ const CHROME_134_JA3 = '771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,0-23-65281-10-11-35-16-5-13-18-51-45-43-27-17513-21,29-23-24,0';
14
+ const CHROME_134_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36';
15
+ const DEFAULT_HEADERS = {
16
+ 'Sec-Ch-Ua': '"Chromium";v="134", "Google Chrome";v="134"',
17
+ 'Sec-Ch-Ua-Mobile': '?0',
18
+ 'Sec-Ch-Ua-Platform': '"macOS"',
19
+ 'Sec-Fetch-Dest': 'document',
20
+ 'Sec-Fetch-Mode': 'navigate',
21
+ 'Sec-Fetch-Site': 'none',
22
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
23
+ 'Accept-Language': 'en-US,en;q=0.9',
24
+ };
25
+ let cycleTLSModule = null;
26
+ /**
27
+ * Check if CycleTLS is installed.
28
+ */
29
+ export function isCycleTLSAvailable() {
30
+ try {
31
+ // ESM-compatible check: look for the package in node_modules
32
+ // Same pattern as search-provider.ts uses for playwright-extra
33
+ const cwd = process.cwd();
34
+ return (existsSync(pathResolve(cwd, 'node_modules', 'cycletls')) ||
35
+ existsSync(pathResolve(cwd, '..', 'node_modules', 'cycletls')));
36
+ }
37
+ catch {
38
+ return false;
39
+ }
40
+ }
41
+ /**
42
+ * Lazy-load CycleTLS module.
43
+ */
44
+ async function getCycleTLS() {
45
+ if (cycleTLSModule)
46
+ return cycleTLSModule;
47
+ try {
48
+ // eslint-disable-next-line @typescript-eslint/ban-ts-comment
49
+ // @ts-ignore — cycletls is an optional peer dependency
50
+ const mod = await import('cycletls');
51
+ // CycleTLS exports: module.default or module itself depending on bundler
52
+ cycleTLSModule = mod.default ?? mod;
53
+ return cycleTLSModule;
54
+ }
55
+ catch (e) {
56
+ throw new Error('CycleTLS not installed. Run: npm install cycletls\n' +
57
+ 'CycleTLS spoofs Chrome\'s TLS/JA3 fingerprint using a Go binary.\n' +
58
+ 'Learn more: https://github.com/Danny-Dasilva/CycleTLS');
59
+ }
60
+ }
61
+ /**
62
+ * Fetch a URL using CycleTLS with Chrome JA3 fingerprint spoofing.
63
+ */
64
+ export async function cycleFetch(url, options) {
65
+ const initCycleTLS = await getCycleTLS();
66
+ const cycleTLS = await initCycleTLS();
67
+ const mergedHeaders = {
68
+ ...DEFAULT_HEADERS,
69
+ ...(options?.headers ?? {}),
70
+ };
71
+ const requestOptions = {
72
+ ja3: CHROME_134_JA3,
73
+ userAgent: CHROME_134_UA,
74
+ headers: mergedHeaders,
75
+ timeout: Math.round((options?.timeout ?? 30000) / 1000), // CycleTLS uses seconds
76
+ };
77
+ if (options?.proxy) {
78
+ requestOptions.proxy = options.proxy;
79
+ }
80
+ try {
81
+ if (process.env.DEBUG) {
82
+ console.debug('[webpeel]', 'CycleTLS fetch:', url);
83
+ }
84
+ const response = await cycleTLS(url, requestOptions, 'get');
85
+ const body = await response.text();
86
+ return {
87
+ html: body,
88
+ url,
89
+ statusCode: response.status,
90
+ contentType: response.headers?.['content-type'] ?? 'text/html',
91
+ method: 'cycle',
92
+ };
93
+ }
94
+ finally {
95
+ // Always clean up the Go process
96
+ await cycleTLS.exit().catch(() => { });
97
+ }
98
+ }
99
+ //# sourceMappingURL=cycle-fetch.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cycle-fetch.js","sourceRoot":"","sources":["../../src/core/cycle-fetch.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAChC,OAAO,EAAE,OAAO,IAAI,WAAW,EAAE,MAAM,MAAM,CAAC;AAG9C,6BAA6B;AAC7B,MAAM,cAAc,GAClB,iJAAiJ,CAAC;AAEpJ,MAAM,aAAa,GACjB,uHAAuH,CAAC;AAE1H,MAAM,eAAe,GAA2B;IAC9C,WAAW,EAAE,6CAA6C;IAC1D,kBAAkB,EAAE,IAAI;IACxB,oBAAoB,EAAE,SAAS;IAC/B,gBAAgB,EAAE,UAAU;IAC5B,gBAAgB,EAAE,UAAU;IAC5B,gBAAgB,EAAE,MAAM;IACxB,QAAQ,EAAE,iEAAiE;IAC3E,iBAAiB,EAAE,gBAAgB;CACpC,CAAC;AAYF,IAAI,cAAc,GAAQ,IAAI,CAAC;AAE/B;;GAEG;AACH,MAAM,UAAU,mBAAmB;IACjC,IAAI,CAAC;QACH,6DAA6D;QAC7D,+DAA+D;QAC/D,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;QAC1B,OAAO,CACL,UAAU,CAAC,WAAW,CAAC,GAAG,EAAE,cAAc,EAAE,UAAU,CAAC,CAAC;YACxD,UAAU,CAAC,WAAW,CAAC,GAAG,EAAE,IAAI,EAAE,cAAc,EAAE,UAAU,CAAC,CAAC,CAC/D,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,WAAW;IACxB,IAAI,cAAc;QAAE,OAAO,cAAc,CAAC;IAC1C,IAAI,CAAC;QACH,6DAA6D;QAC7D,uDAAuD;QACvD,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,UAAU,CAAC,CAAC;QACrC,yEAAyE;QACzE,cAAc,GAAG,GAAG,CAAC,OAAO,IAAI,GAAG,CAAC;QACpC,OAAO,cAAc,CAAC;IACxB,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CACb,qDAAqD;YACrD,oEAAoE;YACpE,uDAAuD,CACxD,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,GAAW,EAAE,OAA2B;IACvE,MAAM,YAAY,GAAG,MAAM,WAAW,EAAE,CAAC;IACzC,MAAM,QAAQ,GAAG,MAAM,YAAY,EAAE,CAAC;IAEtC,MAAM,aAAa,GAA2B;QAC5C,GAAG,eAAe;QAClB,GAAG,CAAC,OAAO,EAAE,OAAO,IAAI,EAAE,CAAC;KAC5B,CAAC;IAEF,MAAM,cAAc,GAAwB;QAC1C,GAAG,EAAE,cAAc;QACnB,SAAS,EAAE,aAAa;QACxB,OAAO,EAAE,aAAa;QACtB,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,OAAO,EAAE,OAAO,IAAI,KAAK,CAAC,GAAG,IAAI,CAAC,EAAE,wBAAwB;KAClF,CAAC;IAEF,IAAI,OAAO,EAAE,KAAK,EAAE,CAAC;QACnB,cAAc,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;IACvC,CAAC;IAED,IAAI,CAAC;QACH,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC;YACtB,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,iBAAiB,EAAE,GAAG,CAAC,CAAC;QACrD,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,GAAG,EAAE,cAAc,EAAE,KAAK,CAAC,CAAC;QAC5D,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAEnC,OAAO;YACL,IAAI,EAAE,IAAI;YACV,GAAG;YACH,UAAU,EAAE,QAAQ,CAAC,MAAM;YAC3B,WAAW,EAAE,QAAQ,CAAC,OAAO,EAAE,CAAC,cAAc,CAAC,IAAI,WAAW;YAC9D,MAAM,EAAE,OAAO;SAChB,CAAC;IACJ,CAAC;YAAS,CAAC;QACT,iCAAiC;QACjC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;IACxC,CAAC;AACH,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"domain-extractors.d.ts","sourceRoot":"","sources":["../../src/core/domain-extractors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAQH,MAAM,WAAW,mBAAmB;IAClC,iDAAiD;IACjD,MAAM,EAAE,MAAM,CAAC;IACf,4EAA4E;IAC5E,IAAI,EAAE,MAAM,CAAC;IACb,sCAAsC;IACtC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAChC,mDAAmD;IACnD,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,+EAA+E;AAC/E,MAAM,MAAM,eAAe,GAAG,CAC5B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,KACR,OAAO,CAAC,mBAAmB,GAAG,IAAI,CAAC,CAAC;AAgBzC;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,eAAe,GAAG,IAAI,CAWtE;AAED;;;GAGG;AACH,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,GACV,OAAO,CAAC,mBAAmB,GAAG,IAAI,CAAC,CAQrC"}
1
+ {"version":3,"file":"domain-extractors.d.ts","sourceRoot":"","sources":["../../src/core/domain-extractors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAyDH,MAAM,WAAW,mBAAmB;IAClC,iDAAiD;IACjD,MAAM,EAAE,MAAM,CAAC;IACf,4EAA4E;IAC5E,IAAI,EAAE,MAAM,CAAC;IACb,sCAAsC;IACtC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAChC,mDAAmD;IACnD,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,+EAA+E;AAC/E,MAAM,MAAM,eAAe,GAAG,CAC5B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,KACR,OAAO,CAAC,mBAAmB,GAAG,IAAI,CAAC,CAAC;AAuBzC;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,eAAe,GAAG,IAAI,CAWtE;AAED;;;GAGG;AACH,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,GACV,OAAO,CAAC,mBAAmB,GAAG,IAAI,CAAC,CAQrC"}