webpeel 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/LICENSE +11 -657
  2. package/README.md +246 -325
  3. package/dist/cli.js +330 -73
  4. package/dist/cli.js.map +1 -1
  5. package/dist/core/browser-fetch.d.ts +12 -0
  6. package/dist/core/browser-fetch.d.ts.map +1 -1
  7. package/dist/core/browser-fetch.js +70 -17
  8. package/dist/core/browser-fetch.js.map +1 -1
  9. package/dist/core/cf-worker-proxy.d.ts +33 -0
  10. package/dist/core/cf-worker-proxy.d.ts.map +1 -0
  11. package/dist/core/cf-worker-proxy.js +88 -0
  12. package/dist/core/cf-worker-proxy.js.map +1 -0
  13. package/dist/core/chunker.d.ts +47 -0
  14. package/dist/core/chunker.d.ts.map +1 -0
  15. package/dist/core/chunker.js +250 -0
  16. package/dist/core/chunker.js.map +1 -0
  17. package/dist/core/cloak-fetch.d.ts +43 -0
  18. package/dist/core/cloak-fetch.d.ts.map +1 -0
  19. package/dist/core/cloak-fetch.js +141 -0
  20. package/dist/core/cloak-fetch.js.map +1 -0
  21. package/dist/core/crawl-checkpoint.d.ts +55 -0
  22. package/dist/core/crawl-checkpoint.d.ts.map +1 -0
  23. package/dist/core/crawl-checkpoint.js +105 -0
  24. package/dist/core/crawl-checkpoint.js.map +1 -0
  25. package/dist/core/crawler.d.ts +5 -1
  26. package/dist/core/crawler.d.ts.map +1 -1
  27. package/dist/core/crawler.js +60 -5
  28. package/dist/core/crawler.js.map +1 -1
  29. package/dist/core/cycle-fetch.d.ts +27 -0
  30. package/dist/core/cycle-fetch.d.ts.map +1 -0
  31. package/dist/core/cycle-fetch.js +99 -0
  32. package/dist/core/cycle-fetch.js.map +1 -0
  33. package/dist/core/domain-extractors.d.ts.map +1 -1
  34. package/dist/core/domain-extractors.js +754 -14
  35. package/dist/core/domain-extractors.js.map +1 -1
  36. package/dist/core/google-cache.d.ts +30 -0
  37. package/dist/core/google-cache.d.ts.map +1 -0
  38. package/dist/core/google-cache.js +181 -0
  39. package/dist/core/google-cache.js.map +1 -0
  40. package/dist/core/markdown.d.ts +11 -0
  41. package/dist/core/markdown.d.ts.map +1 -1
  42. package/dist/core/markdown.js +43 -0
  43. package/dist/core/markdown.js.map +1 -1
  44. package/dist/core/peel-tls.d.ts +26 -0
  45. package/dist/core/peel-tls.d.ts.map +1 -0
  46. package/dist/core/peel-tls.js +221 -0
  47. package/dist/core/peel-tls.js.map +1 -0
  48. package/dist/core/pipeline.d.ts +5 -1
  49. package/dist/core/pipeline.d.ts.map +1 -1
  50. package/dist/core/pipeline.js +269 -21
  51. package/dist/core/pipeline.js.map +1 -1
  52. package/dist/core/schema-postprocess.d.ts +33 -0
  53. package/dist/core/schema-postprocess.d.ts.map +1 -0
  54. package/dist/core/schema-postprocess.js +470 -0
  55. package/dist/core/schema-postprocess.js.map +1 -0
  56. package/dist/core/schema-templates.d.ts +20 -0
  57. package/dist/core/schema-templates.d.ts.map +1 -0
  58. package/dist/core/schema-templates.js +131 -0
  59. package/dist/core/schema-templates.js.map +1 -0
  60. package/dist/core/search-fallback.d.ts +28 -0
  61. package/dist/core/search-fallback.d.ts.map +1 -0
  62. package/dist/core/search-fallback.js +185 -0
  63. package/dist/core/search-fallback.js.map +1 -0
  64. package/dist/core/search-provider.d.ts +47 -4
  65. package/dist/core/search-provider.d.ts.map +1 -1
  66. package/dist/core/search-provider.js +278 -7
  67. package/dist/core/search-provider.js.map +1 -1
  68. package/dist/core/stealth-patches.d.ts +58 -0
  69. package/dist/core/stealth-patches.d.ts.map +1 -0
  70. package/dist/core/stealth-patches.js +340 -0
  71. package/dist/core/stealth-patches.js.map +1 -0
  72. package/dist/core/strategies.d.ts +20 -0
  73. package/dist/core/strategies.d.ts.map +1 -1
  74. package/dist/core/strategies.js +284 -48
  75. package/dist/core/strategies.js.map +1 -1
  76. package/dist/core/strategy-hooks.d.ts +1 -1
  77. package/dist/core/strategy-hooks.d.ts.map +1 -1
  78. package/dist/index.d.ts +11 -0
  79. package/dist/index.d.ts.map +1 -1
  80. package/dist/index.js +37 -15
  81. package/dist/index.js.map +1 -1
  82. package/dist/mcp/server.js +109 -4
  83. package/dist/mcp/server.js.map +1 -1
  84. package/dist/server/app.d.ts.map +1 -1
  85. package/dist/server/app.js +29 -0
  86. package/dist/server/app.js.map +1 -1
  87. package/dist/server/middleware/rate-limit.d.ts +2 -1
  88. package/dist/server/middleware/rate-limit.d.ts.map +1 -1
  89. package/dist/server/middleware/rate-limit.js +24 -8
  90. package/dist/server/middleware/rate-limit.js.map +1 -1
  91. package/dist/server/routes/agent.d.ts +4 -0
  92. package/dist/server/routes/agent.d.ts.map +1 -1
  93. package/dist/server/routes/agent.js +196 -9
  94. package/dist/server/routes/agent.js.map +1 -1
  95. package/dist/server/routes/batch.js +5 -5
  96. package/dist/server/routes/batch.js.map +1 -1
  97. package/dist/server/routes/compat.d.ts.map +1 -1
  98. package/dist/server/routes/compat.js +1 -0
  99. package/dist/server/routes/compat.js.map +1 -1
  100. package/dist/server/routes/fetch.d.ts.map +1 -1
  101. package/dist/server/routes/fetch.js +60 -6
  102. package/dist/server/routes/fetch.js.map +1 -1
  103. package/dist/server/routes/mcp.d.ts.map +1 -1
  104. package/dist/server/routes/mcp.js +103 -2
  105. package/dist/server/routes/mcp.js.map +1 -1
  106. package/dist/server/routes/search.js +1 -1
  107. package/dist/server/routes/search.js.map +1 -1
  108. package/dist/types.d.ts +55 -4
  109. package/dist/types.d.ts.map +1 -1
  110. package/dist/types.js +4 -1
  111. package/dist/types.js.map +1 -1
  112. package/llms.txt +55 -125
  113. package/package.json +15 -1
@@ -0,0 +1,250 @@
1
+ /**
2
+ * Content chunker for RAG pipelines.
3
+ * Splits markdown content into overlapping chunks with rich metadata.
4
+ */
5
+ const DEFAULT_MAX_TOKENS = 512;
6
+ const DEFAULT_OVERLAP = 50;
7
+ const CHARS_PER_TOKEN = 4; // rough approximation
8
+ /**
9
+ * Split content into RAG-ready chunks with metadata.
10
+ */
11
+ export function chunkContent(content, options = {}) {
12
+ const maxTokens = options.maxTokens || DEFAULT_MAX_TOKENS;
13
+ const overlap = options.overlap || DEFAULT_OVERLAP;
14
+ const strategy = options.strategy || 'section';
15
+ const opts = { maxTokens, overlap, strategy };
16
+ let chunks;
17
+ switch (strategy) {
18
+ case 'section':
19
+ chunks = chunkBySection(content, maxTokens, overlap);
20
+ break;
21
+ case 'paragraph':
22
+ chunks = chunkByParagraph(content, maxTokens, overlap);
23
+ break;
24
+ case 'fixed':
25
+ chunks = chunkByFixed(content, maxTokens, overlap);
26
+ break;
27
+ default:
28
+ chunks = chunkBySection(content, maxTokens, overlap);
29
+ }
30
+ return {
31
+ chunks,
32
+ totalChunks: chunks.length,
33
+ originalLength: content.length,
34
+ strategy,
35
+ options: opts,
36
+ };
37
+ }
38
+ /**
39
+ * Section-based chunking (recommended for RAG).
40
+ * Splits on markdown headings (## / ### etc.), then splits large sections by paragraph.
41
+ * Each chunk includes its section heading for context.
42
+ */
43
+ function chunkBySection(content, maxTokens, overlap) {
44
+ const maxChars = maxTokens * CHARS_PER_TOKEN;
45
+ const overlapChars = overlap * CHARS_PER_TOKEN;
46
+ const chunks = [];
47
+ // Split content into sections by headings
48
+ const sections = splitByHeadings(content);
49
+ let chunkIndex = 0;
50
+ for (const section of sections) {
51
+ const { heading, depth, body, startOffset } = section;
52
+ if (!body.trim())
53
+ continue;
54
+ // If section fits in one chunk, use it directly
55
+ if (body.length <= maxChars) {
56
+ const text = heading ? `${heading}\n\n${body.trim()}` : body.trim();
57
+ chunks.push({
58
+ index: chunkIndex++,
59
+ text,
60
+ tokenCount: Math.ceil(text.length / CHARS_PER_TOKEN),
61
+ wordCount: text.split(/\s+/).filter(Boolean).length,
62
+ section: heading ? heading.replace(/^#+\s*/, '') : null,
63
+ sectionDepth: depth,
64
+ startOffset,
65
+ endOffset: startOffset + body.length,
66
+ });
67
+ }
68
+ else {
69
+ // Large section — split by paragraphs with overlap
70
+ const paragraphs = body.split(/\n\n+/).filter(p => p.trim());
71
+ let currentText = '';
72
+ let currentStart = startOffset;
73
+ for (const para of paragraphs) {
74
+ const candidate = currentText ? `${currentText}\n\n${para}` : para;
75
+ if (candidate.length > maxChars && currentText) {
76
+ // Emit current chunk
77
+ const text = heading ? `${heading}\n\n${currentText.trim()}` : currentText.trim();
78
+ chunks.push({
79
+ index: chunkIndex++,
80
+ text,
81
+ tokenCount: Math.ceil(text.length / CHARS_PER_TOKEN),
82
+ wordCount: text.split(/\s+/).filter(Boolean).length,
83
+ section: heading ? heading.replace(/^#+\s*/, '') : null,
84
+ sectionDepth: depth,
85
+ startOffset: currentStart,
86
+ endOffset: currentStart + currentText.length,
87
+ });
88
+ // Start new chunk with overlap from end of previous
89
+ if (overlapChars > 0 && currentText.length > overlapChars) {
90
+ currentText = currentText.slice(-overlapChars) + '\n\n' + para;
91
+ }
92
+ else {
93
+ currentText = para;
94
+ }
95
+ currentStart = startOffset + body.indexOf(para);
96
+ }
97
+ else {
98
+ currentText = candidate;
99
+ }
100
+ }
101
+ // Emit remaining
102
+ if (currentText.trim()) {
103
+ const text = heading ? `${heading}\n\n${currentText.trim()}` : currentText.trim();
104
+ chunks.push({
105
+ index: chunkIndex++,
106
+ text,
107
+ tokenCount: Math.ceil(text.length / CHARS_PER_TOKEN),
108
+ wordCount: text.split(/\s+/).filter(Boolean).length,
109
+ section: heading ? heading.replace(/^#+\s*/, '') : null,
110
+ sectionDepth: depth,
111
+ startOffset: currentStart,
112
+ endOffset: currentStart + currentText.length,
113
+ });
114
+ }
115
+ }
116
+ }
117
+ return chunks;
118
+ }
119
+ /**
120
+ * Paragraph-based chunking.
121
+ * Groups paragraphs together up to maxTokens, with overlap.
122
+ */
123
+ function chunkByParagraph(content, maxTokens, overlap) {
124
+ const maxChars = maxTokens * CHARS_PER_TOKEN;
125
+ const overlapChars = overlap * CHARS_PER_TOKEN;
126
+ const chunks = [];
127
+ const paragraphs = content.split(/\n\n+/).filter(p => p.trim());
128
+ let currentText = '';
129
+ let currentStart = 0;
130
+ let chunkIndex = 0;
131
+ // Track current section heading
132
+ let currentHeading = null;
133
+ let currentDepth = null;
134
+ for (const para of paragraphs) {
135
+ // Check if paragraph is a heading
136
+ const headingMatch = para.match(/^(#{1,6})\s+(.+)/);
137
+ if (headingMatch) {
138
+ currentHeading = headingMatch[2];
139
+ currentDepth = headingMatch[1].length;
140
+ }
141
+ const candidate = currentText ? `${currentText}\n\n${para}` : para;
142
+ if (candidate.length > maxChars && currentText) {
143
+ chunks.push({
144
+ index: chunkIndex++,
145
+ text: currentText.trim(),
146
+ tokenCount: Math.ceil(currentText.length / CHARS_PER_TOKEN),
147
+ wordCount: currentText.split(/\s+/).filter(Boolean).length,
148
+ section: currentHeading,
149
+ sectionDepth: currentDepth,
150
+ startOffset: currentStart,
151
+ endOffset: currentStart + currentText.length,
152
+ });
153
+ if (overlapChars > 0 && currentText.length > overlapChars) {
154
+ currentText = currentText.slice(-overlapChars) + '\n\n' + para;
155
+ }
156
+ else {
157
+ currentText = para;
158
+ }
159
+ currentStart = content.indexOf(para, currentStart);
160
+ }
161
+ else {
162
+ currentText = candidate;
163
+ }
164
+ }
165
+ if (currentText.trim()) {
166
+ chunks.push({
167
+ index: chunkIndex++,
168
+ text: currentText.trim(),
169
+ tokenCount: Math.ceil(currentText.length / CHARS_PER_TOKEN),
170
+ wordCount: currentText.split(/\s+/).filter(Boolean).length,
171
+ section: currentHeading,
172
+ sectionDepth: currentDepth,
173
+ startOffset: currentStart,
174
+ endOffset: currentStart + currentText.length,
175
+ });
176
+ }
177
+ return chunks;
178
+ }
179
+ /**
180
+ * Fixed-size chunking with overlap.
181
+ * Simple character-based splitting for predictable chunk sizes.
182
+ */
183
+ function chunkByFixed(content, maxTokens, overlap) {
184
+ const maxChars = maxTokens * CHARS_PER_TOKEN;
185
+ const overlapChars = overlap * CHARS_PER_TOKEN;
186
+ const step = Math.max(maxChars - overlapChars, 100);
187
+ const chunks = [];
188
+ let chunkIndex = 0;
189
+ for (let i = 0; i < content.length; i += step) {
190
+ const text = content.slice(i, i + maxChars).trim();
191
+ if (!text)
192
+ continue;
193
+ // Try to find section heading within this chunk
194
+ const headingMatch = text.match(/^(#{1,6})\s+(.+)/m);
195
+ chunks.push({
196
+ index: chunkIndex++,
197
+ text,
198
+ tokenCount: Math.ceil(text.length / CHARS_PER_TOKEN),
199
+ wordCount: text.split(/\s+/).filter(Boolean).length,
200
+ section: headingMatch ? headingMatch[2] : null,
201
+ sectionDepth: headingMatch ? headingMatch[1].length : null,
202
+ startOffset: i,
203
+ endOffset: Math.min(i + maxChars, content.length),
204
+ });
205
+ }
206
+ return chunks;
207
+ }
208
+ /** Split content into sections based on markdown headings */
209
+ function splitByHeadings(content) {
210
+ const lines = content.split('\n');
211
+ const sections = [];
212
+ let currentHeading = null;
213
+ let currentDepth = null;
214
+ let currentBody = [];
215
+ let currentStart = 0;
216
+ let offset = 0;
217
+ for (const line of lines) {
218
+ const headingMatch = line.match(/^(#{1,6})\s+(.+)/);
219
+ if (headingMatch) {
220
+ // Save previous section
221
+ if (currentBody.length > 0 || currentHeading) {
222
+ sections.push({
223
+ heading: currentHeading,
224
+ depth: currentDepth,
225
+ body: currentBody.join('\n'),
226
+ startOffset: currentStart,
227
+ });
228
+ }
229
+ currentHeading = line;
230
+ currentDepth = headingMatch[1].length;
231
+ currentBody = [];
232
+ currentStart = offset;
233
+ }
234
+ else {
235
+ currentBody.push(line);
236
+ }
237
+ offset += line.length + 1; // +1 for newline
238
+ }
239
+ // Don't forget last section
240
+ if (currentBody.length > 0 || currentHeading) {
241
+ sections.push({
242
+ heading: currentHeading,
243
+ depth: currentDepth,
244
+ body: currentBody.join('\n'),
245
+ startOffset: currentStart,
246
+ });
247
+ }
248
+ return sections;
249
+ }
250
+ //# sourceMappingURL=chunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/core/chunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AA2CH,MAAM,kBAAkB,GAAG,GAAG,CAAC;AAC/B,MAAM,eAAe,GAAG,EAAE,CAAC;AAC3B,MAAM,eAAe,GAAG,CAAC,CAAC,CAAC,sBAAsB;AAEjD;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,OAAe,EAAE,UAAwB,EAAE;IACtE,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,kBAAkB,CAAC;IAC1D,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,eAAe,CAAC;IACnD,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,SAAS,CAAC;IAE/C,MAAM,IAAI,GAA2B,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC;IAEtE,IAAI,MAAsB,CAAC;IAE3B,QAAQ,QAAQ,EAAE,CAAC;QACjB,KAAK,SAAS;YACZ,MAAM,GAAG,cAAc,CAAC,OAAO,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;YACrD,MAAM;QACR,KAAK,WAAW;YACd,MAAM,GAAG,gBAAgB,CAAC,OAAO,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;YACvD,MAAM;QACR,KAAK,OAAO;YACV,MAAM,GAAG,YAAY,CAAC,OAAO,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;YACnD,MAAM;QACR;YACE,MAAM,GAAG,cAAc,CAAC,OAAO,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;IACzD,CAAC;IAED,OAAO;QACL,MAAM;QACN,WAAW,EAAE,MAAM,CAAC,MAAM;QAC1B,cAAc,EAAE,OAAO,CAAC,MAAM;QAC9B,QAAQ;QACR,OAAO,EAAE,IAAI;KACd,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,SAAS,cAAc,CAAC,OAAe,EAAE,SAAiB,EAAE,OAAe;IACzE,MAAM,QAAQ,GAAG,SAAS,GAAG,eAAe,CAAC;IAC7C,MAAM,YAAY,GAAG,OAAO,GAAG,eAAe,CAAC;IAC/C,MAAM,MAAM,GAAmB,EAAE,CAAC;IAElC,0CAA0C;IAC1C,MAAM,QAAQ,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;IAE1C,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC;QAEtD,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YAAE,SAAS;QAE3B,gDAAgD;QAChD,IAAI,IAAI,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;YAC5B,MAAM,IAAI,GAAG,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,OAAO,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YACpE,MAAM,CAAC,IAAI,CAAC;gBACV,KAAK,EAAE,UAAU,EAAE;gBACnB,IAAI;gBACJ,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,eAAe,CAAC;gBACpD,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM;gBACnD,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI;gBACvD,YAAY,EAAE,KAAK;gBACnB,WAAW;gBACX,SAAS,EAAE,WAAW,GAAG,IAAI,CAAC,MAAM;aACrC,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,mDAAmD;YACnD,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YAC7D,IAAI,WAAW,GAAG,EAAE,CAAC;YACrB,IAAI,YAAY,GAAG,WAAW,CAAC;YAE/B,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;gBAC9B,MAAM,SAAS,GAAG,WAAW,CAAC,CAAC,CAAC,GAAG,WAAW,OAAO,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;gBAEnE,IAAI,SAAS,CAAC,MAAM,GAAG,QAAQ,IAAI,WAAW,EAAE,CAAC;oBAC/C,qBAAqB;oBACrB,MAAM,IAAI,GAAG,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,OAAO,WAAW,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;oBAClF,MAAM,CAAC,IAAI,CAAC;wBACV,KAAK,EAAE,UAAU,EAAE;wBACnB,IAAI;wBACJ,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,eAAe,CAAC;wBACpD,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM;wBACnD,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI;wBACvD,YAAY,EAAE,KAAK;wBACnB,WAAW,EAAE,YAAY;wBACzB,SAAS,EAAE,YAAY,GAAG,WAAW,CAAC,MAAM;qBAC7C,CAAC,CAAC;oBAEH,oDAAoD;oBACpD,IAAI,YAAY,GAAG,CAAC,IAAI,WAAW,CAAC,MAAM,GAAG,YAAY,EAAE,CAAC;wBAC1D,WAAW,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,GAAG,MAAM,GAAG,IAAI,CAAC;oBACjE,CAAC;yBAAM,CAAC;wBACN,WAAW,GAAG,IAAI,CAAC;oBACrB,CAAC;oBACD,YAAY,GAAG,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;gBAClD,CAAC;qBAAM,CAAC;oBACN,WAAW,GAAG,SAAS,CAAC;gBAC1B,CAAC;YACH,CAAC;YAED,iBAAiB;YACjB,IAAI,WAAW,CAAC,IAAI,EAAE,EAAE,CAAC;gBACvB,MAAM,IAAI,GAAG,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,OAAO,WAAW,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;gBAClF,MAAM,CAAC,IAAI,CAAC;oBACV,KAAK,EAAE,UAAU,EAAE;oBACnB,IAAI;oBACJ,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,eAAe,CAAC;oBACpD,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM;oBACnD,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI;oBACvD,YAAY,EAAE,KAAK;oBACnB,WAAW,EAAE,YAAY;oBACzB,SAAS,EAAE,YAAY,GAAG,WAAW,CAAC,MAAM;iBAC7C,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CAAC,OAAe,EAAE,SAAiB,EAAE,OAAe;IAC3E,MAAM,QAAQ,GAAG,SAAS,GAAG,eAAe,CAAC;IAC7C,MAAM,YAAY,GAAG,OAAO,GAAG,eAAe,CAAC;IAC/C,MAAM,MAAM,GAAmB,EAAE,CAAC;IAClC,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAEhE,IAAI,WAAW,GAAG,EAAE,CAAC;IACrB,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,gCAAgC;IAChC,IAAI,cAAc,GAAkB,IAAI,CAAC;IACzC,IAAI,YAAY,GAAkB,IAAI,CAAC;IAEvC,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,kCAAkC;QAClC,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACpD,IAAI,YAAY,EAAE,CAAC;YACjB,cAAc,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;YACjC,YAAY,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QACxC,CAAC;QAED,MAAM,SAAS,GAAG,WAAW,CAAC,CAAC,CAAC,GAAG,WAAW,OAAO,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QAEnE,IAAI,SAAS,CAAC,MAAM,GAAG,QAAQ,IAAI,WAAW,EAAE,CAAC;YAC/C,MAAM,CAAC,IAAI,CAAC;gBACV,KAAK,EAAE,UAAU,EAAE;gBACnB,IAAI,EAAE,WAAW,CAAC,IAAI,EAAE;gBACxB,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,GAAG,eAAe,CAAC;gBAC3D,SAAS,EAAE,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM;gBAC1D,OAAO,EAAE,cAAc;gBACvB,YAAY,EAAE,YAAY;gBAC1B,WAAW,EAAE,YAAY;gBACzB,SAAS,EAAE,YAAY,GAAG,WAAW,CAAC,MAAM;aAC7C,CAAC,CAAC;YAEH,IAAI,YAAY,GAAG,CAAC,IAAI,WAAW,CAAC,MAAM,GAAG,YAAY,EAAE,CAAC;gBAC1D,WAAW,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,GAAG,MAAM,GAAG,IAAI,CAAC;YACjE,CAAC;iBAAM,CAAC;gBACN,WAAW,GAAG,IAAI,CAAC;YACrB,CAAC;YACD,YAAY,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;QACrD,CAAC;aAAM,CAAC;YACN,WAAW,GAAG,SAAS,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,IAAI,WAAW,CAAC,IAAI,EAAE,EAAE,CAAC;QACvB,MAAM,CAAC,IAAI,CAAC;YACV,KAAK,EAAE,UAAU,EAAE;YACnB,IAAI,EAAE,WAAW,CAAC,IAAI,EAAE;YACxB,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,GAAG,eAAe,CAAC;YAC3D,SAAS,EAAE,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM;YAC1D,OAAO,EAAE,cAAc;YACvB,YAAY,EAAE,YAAY;YAC1B,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,YAAY,GAAG,WAAW,CAAC,MAAM;SAC7C,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,SAAS,YAAY,CAAC,OAAe,EAAE,SAAiB,EAAE,OAAe;IACvE,MAAM,QAAQ,GAAG,SAAS,GAAG,eAAe,CAAC;IAC7C,MAAM,YAAY,GAAG,OAAO,GAAG,eAAe,CAAC;IAC/C,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,YAAY,EAAE,GAAG,CAAC,CAAC;IACpD,MAAM,MAAM,GAAmB,EAAE,CAAC;IAElC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,IAAI,EAAE,CAAC;QAC9C,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC;QACnD,IAAI,CAAC,IAAI;YAAE,SAAS;QAEpB,gDAAgD;QAChD,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;QAErD,MAAM,CAAC,IAAI,CAAC;YACV,KAAK,EAAE,UAAU,EAAE;YACnB,IAAI;YACJ,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,eAAe,CAAC;YACpD,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM;YACnD,OAAO,EAAE,YAAY,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI;YAC9C,YAAY,EAAE,YAAY,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI;YAC1D,WAAW,EAAE,CAAC;YACd,SAAS,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,QAAQ,EAAE,OAAO,CAAC,MAAM,CAAC;SAClD,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,6DAA6D;AAC7D,SAAS,eAAe,CAAC,OAAe;IACtC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,QAAQ,GAA+F,EAAE,CAAC;IAEhH,IAAI,cAAc,GAAkB,IAAI,CAAC;IACzC,IAAI,YAAY,GAAkB,IAAI,CAAC;IACvC,IAAI,WAAW,GAAa,EAAE,CAAC;IAC/B,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,IAAI,MAAM,GAAG,CAAC,CAAC;IAEf,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;QAEpD,IAAI,YAAY,EAAE,CAAC;YACjB,wBAAwB;YACxB,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,IAAI,cAAc,EAAE,CAAC;gBAC7C,QAAQ,CAAC,IAAI,CAAC;oBACZ,OAAO,EAAE,cAAc;oBACvB,KAAK,EAAE,YAAY;oBACnB,IAAI,EAAE,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC;oBAC5B,WAAW,EAAE,YAAY;iBAC1B,CAAC,CAAC;YACL,CAAC;YAED,cAAc,GAAG,IAAI,CAAC;YACtB,YAAY,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;YACtC,WAAW,GAAG,EAAE,CAAC;YACjB,YAAY,GAAG,MAAM,CAAC;QACxB,CAAC;aAAM,CAAC;YACN,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACzB,CAAC;QAED,MAAM,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,iBAAiB;IAC9C,CAAC;IAED,4BAA4B;IAC5B,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,IAAI,cAAc,EAAE,CAAC;QAC7C,QAAQ,CAAC,IAAI,CAAC;YACZ,OAAO,EAAE,cAAc;YACvB,KAAK,EAAE,YAAY;YACnB,IAAI,EAAE,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC;YAC5B,WAAW,EAAE,YAAY;SAC1B,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,43 @@
1
+ /**
2
+ * CloakBrowser fetch — enterprise-grade stealth using patched Chromium.
3
+ *
4
+ * CloakBrowser patches Chromium at the C++ source level (not JS injection).
5
+ * Passes reCAPTCHA v3 (0.9), Cloudflare Turnstile, DataDome, 14/14 tests.
6
+ *
7
+ * Requires: npm install cloakbrowser playwright-core
8
+ * Usage: peel(url, { cloaked: true })
9
+ */
10
+ export interface CloakFetchOptions {
11
+ url: string;
12
+ proxy?: string;
13
+ userAgent?: string;
14
+ viewportWidth?: number;
15
+ viewportHeight?: number;
16
+ waitMs?: number;
17
+ waitSelector?: string;
18
+ waitUntil?: string;
19
+ timeoutMs?: number;
20
+ screenshot?: boolean;
21
+ screenshotFullPage?: boolean;
22
+ actions?: any[];
23
+ headers?: Record<string, string>;
24
+ headed?: boolean;
25
+ }
26
+ export interface CloakFetchResult {
27
+ html: string;
28
+ url: string;
29
+ statusCode?: number;
30
+ contentType?: string;
31
+ method: 'cloaked';
32
+ screenshot?: Buffer;
33
+ challengeDetected?: boolean;
34
+ }
35
+ /**
36
+ * Check if CloakBrowser is installed.
37
+ */
38
+ export declare function isCloakBrowserAvailable(): boolean;
39
+ /**
40
+ * Fetch a URL using CloakBrowser's patched Chromium.
41
+ */
42
+ export declare function cloakFetch(options: CloakFetchOptions): Promise<CloakFetchResult>;
43
+ //# sourceMappingURL=cloak-fetch.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cloak-fetch.d.ts","sourceRoot":"","sources":["../../src/core/cloak-fetch.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAIH,MAAM,WAAW,iBAAiB;IAChC,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,OAAO,CAAC,EAAE,GAAG,EAAE,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,SAAS,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iBAAiB,CAAC,EAAE,OAAO,CAAC;CAC7B;AAID;;GAEG;AACH,wBAAgB,uBAAuB,IAAI,OAAO,CAOjD;AAqBD;;GAEG;AACH,wBAAsB,UAAU,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAyGtF"}
@@ -0,0 +1,141 @@
1
+ /**
2
+ * CloakBrowser fetch — enterprise-grade stealth using patched Chromium.
3
+ *
4
+ * CloakBrowser patches Chromium at the C++ source level (not JS injection).
5
+ * Passes reCAPTCHA v3 (0.9), Cloudflare Turnstile, DataDome, 14/14 tests.
6
+ *
7
+ * Requires: npm install cloakbrowser playwright-core
8
+ * Usage: peel(url, { cloaked: true })
9
+ */
10
+ let cloakModule = null;
11
+ /**
12
+ * Check if CloakBrowser is installed.
13
+ */
14
+ export function isCloakBrowserAvailable() {
15
+ try {
16
+ require.resolve('cloakbrowser');
17
+ return true;
18
+ }
19
+ catch {
20
+ return false;
21
+ }
22
+ }
23
+ /**
24
+ * Lazy-load CloakBrowser module.
25
+ */
26
+ async function getCloakBrowser() {
27
+ if (cloakModule)
28
+ return cloakModule;
29
+ try {
30
+ // eslint-disable-next-line @typescript-eslint/ban-ts-comment
31
+ // @ts-ignore — cloakbrowser is an optional peer dependency
32
+ cloakModule = await import('cloakbrowser');
33
+ return cloakModule;
34
+ }
35
+ catch (e) {
36
+ throw new Error('CloakBrowser not installed. Run: npm install cloakbrowser playwright-core\n' +
37
+ 'CloakBrowser provides enterprise-grade stealth using a patched Chromium binary.\n' +
38
+ 'Learn more: https://github.com/CloakHQ/cloakbrowser');
39
+ }
40
+ }
41
+ /**
42
+ * Fetch a URL using CloakBrowser's patched Chromium.
43
+ */
44
+ export async function cloakFetch(options) {
45
+ const cloak = await getCloakBrowser();
46
+ const launchOptions = {
47
+ headless: !options.headed,
48
+ };
49
+ if (options.proxy) {
50
+ launchOptions.proxy = options.proxy;
51
+ }
52
+ let browser = null;
53
+ let page = null;
54
+ try {
55
+ // Use launchContext if available for richer options, otherwise launch
56
+ if (cloak.launchContext) {
57
+ const contextOptions = {};
58
+ if (options.userAgent)
59
+ contextOptions.userAgent = options.userAgent;
60
+ if (options.viewportWidth && options.viewportHeight) {
61
+ contextOptions.viewport = { width: options.viewportWidth, height: options.viewportHeight };
62
+ }
63
+ const context = await cloak.launchContext({
64
+ ...launchOptions,
65
+ ...contextOptions,
66
+ });
67
+ page = await context.newPage();
68
+ browser = context.browser();
69
+ }
70
+ else {
71
+ browser = await cloak.launch(launchOptions);
72
+ const context = await browser.newContext({
73
+ userAgent: options.userAgent,
74
+ viewport: options.viewportWidth && options.viewportHeight
75
+ ? { width: options.viewportWidth, height: options.viewportHeight }
76
+ : undefined,
77
+ });
78
+ page = await context.newPage();
79
+ }
80
+ // Set custom headers if provided
81
+ if (options.headers && Object.keys(options.headers).length > 0) {
82
+ await page.setExtraHTTPHeaders(options.headers);
83
+ }
84
+ // Navigate
85
+ const waitUntil = options.waitUntil || 'domcontentloaded';
86
+ const timeout = options.timeoutMs || 30000;
87
+ const response = await page.goto(options.url, {
88
+ waitUntil,
89
+ timeout,
90
+ });
91
+ // Wait for selector if specified
92
+ if (options.waitSelector) {
93
+ await page.waitForSelector(options.waitSelector, { timeout }).catch(() => {
94
+ if (process.env.DEBUG)
95
+ console.debug('[webpeel]', `waitSelector "${options.waitSelector}" timed out`);
96
+ });
97
+ }
98
+ // Additional wait
99
+ if (options.waitMs && options.waitMs > 0) {
100
+ await page.waitForTimeout(options.waitMs);
101
+ }
102
+ // Execute actions if provided
103
+ if (options.actions && options.actions.length > 0) {
104
+ const { executeActions } = await import('./actions.js');
105
+ await executeActions(page, options.actions);
106
+ }
107
+ // Get content
108
+ const html = await page.content();
109
+ const statusCode = response?.status();
110
+ const contentType = response?.headers()['content-type'];
111
+ const finalUrl = page.url();
112
+ // Screenshot if requested
113
+ let screenshotBuffer;
114
+ if (options.screenshot) {
115
+ screenshotBuffer = await page.screenshot({
116
+ fullPage: options.screenshotFullPage || false,
117
+ type: 'png',
118
+ });
119
+ }
120
+ // Check for challenge pages
121
+ const { detectChallenge } = await import('./challenge-detection.js');
122
+ const challengeCheck = detectChallenge(html, statusCode ?? 200);
123
+ return {
124
+ html,
125
+ url: finalUrl,
126
+ statusCode,
127
+ contentType,
128
+ method: 'cloaked',
129
+ screenshot: screenshotBuffer,
130
+ challengeDetected: challengeCheck.isChallenge && challengeCheck.confidence >= 0.7,
131
+ };
132
+ }
133
+ finally {
134
+ // Cleanup
135
+ if (page)
136
+ await page.close().catch(() => { });
137
+ if (browser)
138
+ await browser.close().catch(() => { });
139
+ }
140
+ }
141
+ //# sourceMappingURL=cloak-fetch.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cloak-fetch.js","sourceRoot":"","sources":["../../src/core/cloak-fetch.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AA+BH,IAAI,WAAW,GAAQ,IAAI,CAAC;AAE5B;;GAEG;AACH,MAAM,UAAU,uBAAuB;IACrC,IAAI,CAAC;QACH,OAAO,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;QAChC,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,eAAe;IAC5B,IAAI,WAAW;QAAE,OAAO,WAAW,CAAC;IACpC,IAAI,CAAC;QACH,6DAA6D;QAC7D,2DAA2D;QAC3D,WAAW,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAC;QAC3C,OAAO,WAAW,CAAC;IACrB,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CACb,6EAA6E;YAC7E,mFAAmF;YACnF,qDAAqD,CACtD,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,OAA0B;IACzD,MAAM,KAAK,GAAG,MAAM,eAAe,EAAE,CAAC;IAEtC,MAAM,aAAa,GAAQ;QACzB,QAAQ,EAAE,CAAC,OAAO,CAAC,MAAM;KAC1B,CAAC;IAEF,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QAClB,aAAa,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;IACtC,CAAC;IAED,IAAI,OAAO,GAAmB,IAAI,CAAC;IACnC,IAAI,IAAI,GAAgB,IAAI,CAAC;IAE7B,IAAI,CAAC;QACH,sEAAsE;QACtE,IAAI,KAAK,CAAC,aAAa,EAAE,CAAC;YACxB,MAAM,cAAc,GAAQ,EAAE,CAAC;YAC/B,IAAI,OAAO,CAAC,SAAS;gBAAE,cAAc,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;YACpE,IAAI,OAAO,CAAC,aAAa,IAAI,OAAO,CAAC,cAAc,EAAE,CAAC;gBACpD,cAAc,CAAC,QAAQ,GAAG,EAAE,KAAK,EAAE,OAAO,CAAC,aAAa,EAAE,MAAM,EAAE,OAAO,CAAC,cAAc,EAAE,CAAC;YAC7F,CAAC;YAED,MAAM,OAAO,GAAmB,MAAM,KAAK,CAAC,aAAa,CAAC;gBACxD,GAAG,aAAa;gBAChB,GAAG,cAAc;aAClB,CAAC,CAAC;YACH,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;YAC/B,OAAO,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;QAC9B,CAAC;aAAM,CAAC;YACN,OAAO,GAAG,MAAM,KAAK,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;YAC5C,MAAM,OAAO,GAAG,MAAM,OAAQ,CAAC,UAAU,CAAC;gBACxC,SAAS,EAAE,OAAO,CAAC,SAAS;gBAC5B,QAAQ,EAAE,OAAO,CAAC,aAAa,IAAI,OAAO,CAAC,cAAc;oBACvD,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,aAAa,EAAE,MAAM,EAAE,OAAO,CAAC,cAAc,EAAE;oBAClE,CAAC,CAAC,SAAS;aACd,CAAC,CAAC;YACH,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QACjC,CAAC;QAED,iCAAiC;QACjC,IAAI,OAAO,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/D,MAAM,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAClD,CAAC;QAED,WAAW;QACX,MAAM,SAAS,GAAI,OAAO,CAAC,SAAiB,IAAI,kBAAkB,CAAC;QACnE,MAAM,OAAO,GAAG,OAAO,CAAC,SAAS,IAAI,KAAK,CAAC;QAE3C,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YAC5C,SAAS;YACT,OAAO;SACR,CAAC,CAAC;QAEH,iCAAiC;QACjC,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;YACzB,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE;gBACvE,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK;oBAAE,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,iBAAiB,OAAO,CAAC,YAAY,aAAa,CAAC,CAAC;YACxG,CAAC,CAAC,CAAC;QACL,CAAC;QAED,kBAAkB;QAClB,IAAI,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzC,MAAM,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAC5C,CAAC;QAED,8BAA8B;QAC9B,IAAI,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClD,MAAM,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAC;YACxD,MAAM,cAAc,CAAC,IAAI,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;QAC9C,CAAC;QAED,cAAc;QACd,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;QAClC,MAAM,UAAU,GAAG,QAAQ,EAAE,MAAM,EAAE,CAAC;QACtC,MAAM,WAAW,GAAG,QAAQ,EAAE,OAAO,EAAE,CAAC,cAAc,CAAC,CAAC;QACxD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE5B,0BAA0B;QAC1B,IAAI,gBAAoC,CAAC;QACzC,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;YACvB,gBAAgB,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC;gBACvC,QAAQ,EAAE,OAAO,CAAC,kBAAkB,IAAI,KAAK;gBAC7C,IAAI,EAAE,KAAK;aACZ,CAAC,CAAC;QACL,CAAC;QAED,4BAA4B;QAC5B,MAAM,EAAE,eAAe,EAAE,GAAG,MAAM,MAAM,CAAC,0BAA0B,CAAC,CAAC;QACrE,MAAM,cAAc,GAAG,eAAe,CAAC,IAAI,EAAE,UAAU,IAAI,GAAG,CAAC,CAAC;QAEhE,OAAO;YACL,IAAI;YACJ,GAAG,EAAE,QAAQ;YACb,UAAU;YACV,WAAW;YACX,MAAM,EAAE,SAAS;YACjB,UAAU,EAAE,gBAAgB;YAC5B,iBAAiB,EAAE,cAAc,CAAC,WAAW,IAAI,cAAc,CAAC,UAAU,IAAI,GAAG;SAClF,CAAC;IACJ,CAAC;YAAS,CAAC;QACT,UAAU;QACV,IAAI,IAAI;YAAE,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAC7C,IAAI,OAAO;YAAE,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;IACrD,CAAC;AACH,CAAC"}
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Crawl checkpoint system for resume capability.
3
+ * Saves progress to a JSON file so interrupted crawls can continue.
4
+ */
5
+ export interface CrawlCheckpoint {
6
+ /** Unique crawl job ID (hash of start URL + options) */
7
+ jobId: string;
8
+ /** Starting URL */
9
+ startUrl: string;
10
+ /** URLs already crawled (with their results) */
11
+ completed: Map<string, {
12
+ status: number;
13
+ contentLength: number;
14
+ timestamp: number;
15
+ }>;
16
+ /** URLs queued but not yet crawled */
17
+ pending: string[];
18
+ /** URLs discovered but not yet queued */
19
+ discovered: string[];
20
+ /** Crawl options (serialized) */
21
+ options: Record<string, any>;
22
+ /** When crawl started */
23
+ startedAt: number;
24
+ /** Last checkpoint time */
25
+ lastCheckpoint: number;
26
+ /** Total pages target */
27
+ maxPages: number;
28
+ }
29
+ /**
30
+ * Generate a deterministic job ID from URL + options.
31
+ */
32
+ export declare function generateJobId(url: string, options?: Record<string, any>): string;
33
+ /**
34
+ * Save a checkpoint to disk.
35
+ */
36
+ export declare function saveCheckpoint(checkpoint: CrawlCheckpoint): void;
37
+ /**
38
+ * Load a checkpoint from disk.
39
+ */
40
+ export declare function loadCheckpoint(jobId: string): CrawlCheckpoint | null;
41
+ /**
42
+ * Delete a checkpoint (crawl completed or abandoned).
43
+ */
44
+ export declare function deleteCheckpoint(jobId: string): void;
45
+ /**
46
+ * List all active checkpoints.
47
+ */
48
+ export declare function listCheckpoints(): Array<{
49
+ jobId: string;
50
+ startUrl: string;
51
+ completed: number;
52
+ pending: number;
53
+ lastCheckpoint: number;
54
+ }>;
55
+ //# sourceMappingURL=crawl-checkpoint.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"crawl-checkpoint.d.ts","sourceRoot":"","sources":["../../src/core/crawl-checkpoint.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAMH,MAAM,WAAW,eAAe;IAC9B,wDAAwD;IACxD,KAAK,EAAE,MAAM,CAAC;IACd,mBAAmB;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,gDAAgD;IAChD,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,aAAa,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IACrF,sCAAsC;IACtC,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,yCAAyC;IACzC,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,iCAAiC;IACjC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC7B,yBAAyB;IACzB,SAAS,EAAE,MAAM,CAAC;IAClB,2BAA2B;IAC3B,cAAc,EAAE,MAAM,CAAC;IACvB,yBAAyB;IACzB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAID;;GAEG;AACH,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAM,GAAG,MAAM,CASpF;AASD;;GAEG;AACH,wBAAgB,cAAc,CAAC,UAAU,EAAE,eAAe,GAAG,IAAI,CAchE;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,GAAG,eAAe,GAAG,IAAI,CAapE;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAOpD;AAED;;GAEG;AACH,wBAAgB,eAAe,IAAI,KAAK,CAAC;IACvC,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,cAAc,EAAE,MAAM,CAAC;CACxB,CAAC,CA4BD"}
@@ -0,0 +1,105 @@
1
+ /**
2
+ * Crawl checkpoint system for resume capability.
3
+ * Saves progress to a JSON file so interrupted crawls can continue.
4
+ */
5
+ import { existsSync, readFileSync, writeFileSync, mkdirSync, unlinkSync, readdirSync } from 'fs';
6
+ import { join } from 'path';
7
+ import { createHash } from 'crypto';
8
+ const CHECKPOINT_DIR = join(process.env.HOME || '/tmp', '.webpeel', 'checkpoints');
9
+ /**
10
+ * Generate a deterministic job ID from URL + options.
11
+ */
12
+ export function generateJobId(url, options = {}) {
13
+ const key = JSON.stringify({
14
+ url,
15
+ maxPages: options.maxPages,
16
+ maxDepth: options.maxDepth,
17
+ includes: options.includes,
18
+ excludes: options.excludes,
19
+ });
20
+ return createHash('sha256').update(key).digest('hex').slice(0, 16);
21
+ }
22
+ /**
23
+ * Get the checkpoint file path for a job.
24
+ */
25
+ function getCheckpointPath(jobId) {
26
+ return join(CHECKPOINT_DIR, `${jobId}.json`);
27
+ }
28
+ /**
29
+ * Save a checkpoint to disk.
30
+ */
31
+ export function saveCheckpoint(checkpoint) {
32
+ try {
33
+ mkdirSync(CHECKPOINT_DIR, { recursive: true });
34
+ const data = {
35
+ ...checkpoint,
36
+ completed: Object.fromEntries(checkpoint.completed),
37
+ lastCheckpoint: Date.now(),
38
+ };
39
+ writeFileSync(getCheckpointPath(checkpoint.jobId), JSON.stringify(data, null, 2));
40
+ }
41
+ catch (e) {
42
+ if (process.env.DEBUG) {
43
+ console.debug('[webpeel]', 'Failed to save checkpoint:', e instanceof Error ? e.message : e);
44
+ }
45
+ }
46
+ }
47
+ /**
48
+ * Load a checkpoint from disk.
49
+ */
50
+ export function loadCheckpoint(jobId) {
51
+ const path = getCheckpointPath(jobId);
52
+ if (!existsSync(path))
53
+ return null;
54
+ try {
55
+ const raw = JSON.parse(readFileSync(path, 'utf-8'));
56
+ return {
57
+ ...raw,
58
+ completed: new Map(Object.entries(raw.completed || {})),
59
+ };
60
+ }
61
+ catch {
62
+ return null;
63
+ }
64
+ }
65
+ /**
66
+ * Delete a checkpoint (crawl completed or abandoned).
67
+ */
68
+ export function deleteCheckpoint(jobId) {
69
+ const path = getCheckpointPath(jobId);
70
+ try {
71
+ if (existsSync(path)) {
72
+ unlinkSync(path);
73
+ }
74
+ }
75
+ catch { /* ignore */ }
76
+ }
77
+ /**
78
+ * List all active checkpoints.
79
+ */
80
+ export function listCheckpoints() {
81
+ try {
82
+ if (!existsSync(CHECKPOINT_DIR))
83
+ return [];
84
+ const files = readdirSync(CHECKPOINT_DIR).filter((f) => f.endsWith('.json'));
85
+ return files.map(f => {
86
+ try {
87
+ const raw = JSON.parse(readFileSync(join(CHECKPOINT_DIR, f), 'utf-8'));
88
+ return {
89
+ jobId: raw.jobId,
90
+ startUrl: raw.startUrl,
91
+ completed: Object.keys(raw.completed || {}).length,
92
+ pending: (raw.pending || []).length,
93
+ lastCheckpoint: raw.lastCheckpoint,
94
+ };
95
+ }
96
+ catch {
97
+ return null;
98
+ }
99
+ }).filter(Boolean);
100
+ }
101
+ catch {
102
+ return [];
103
+ }
104
+ }
105
+ //# sourceMappingURL=crawl-checkpoint.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"crawl-checkpoint.js","sourceRoot":"","sources":["../../src/core/crawl-checkpoint.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,SAAS,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,IAAI,CAAC;AACjG,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAuBpC,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,MAAM,EAAE,UAAU,EAAE,aAAa,CAAC,CAAC;AAEnF;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,GAAW,EAAE,UAA+B,EAAE;IAC1E,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC;QACzB,GAAG;QACH,QAAQ,EAAE,OAAO,CAAC,QAAQ;QAC1B,QAAQ,EAAE,OAAO,CAAC,QAAQ;QAC1B,QAAQ,EAAE,OAAO,CAAC,QAAQ;QAC1B,QAAQ,EAAE,OAAO,CAAC,QAAQ;KAC3B,CAAC,CAAC;IACH,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AACrE,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,KAAa;IACtC,OAAO,IAAI,CAAC,cAAc,EAAE,GAAG,KAAK,OAAO,CAAC,CAAC;AAC/C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,UAA2B;IACxD,IAAI,CAAC;QACH,SAAS,CAAC,cAAc,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC/C,MAAM,IAAI,GAAG;YACX,GAAG,UAAU;YACb,SAAS,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,SAAS,CAAC;YACnD,cAAc,EAAE,IAAI,CAAC,GAAG,EAAE;SAC3B,CAAC;QACF,aAAa,CAAC,iBAAiB,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACpF,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC;YACtB,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,4BAA4B,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/F,CAAC;IACH,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,KAAa;IAC1C,MAAM,IAAI,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;IACtC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAEnC,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC;QACpD,OAAO;YACL,GAAG,GAAG;YACN,SAAS,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC;SACxD,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,KAAa;IAC5C,MAAM,IAAI,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;IACtC,IAAI,CAAC;QACH,IAAI,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YACrB,UAAU,CAAC,IAAI,CAAC,CAAC;QACnB,CAAC;IACH,CAAC;IAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe;IAO7B,IAAI,CAAC;QACH,IAAI,CAAC,UAAU,CAAC,cAAc,CAAC;YAAE,OAAO,EAAE,CAAC;QAC3C,MAAM,KAAK,GAAa,WAAW,CAAC,cAAc,CAAC,CAAC,MAAM,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;QAE/F,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;YACnB,IAAI,CAAC;gBACH,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,cAAc,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;gBACvE,OAAO;oBACL,KAAK,EAAE,GAAG,CAAC,KAAK;oBAChB,QAAQ,EAAE,GAAG,CAAC,QAAQ;oBACtB,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC,MAAM;oBAClD,OAAO,EAAE,CAAC,GAAG,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,MAAM;oBACnC,cAAc,EAAE,GAAG,CAAC,cAAc;iBACnC,CAAC;YACJ,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAMf,CAAC;IACL,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC"}
@@ -4,8 +4,10 @@
4
4
  */
5
5
  import type { PeelOptions } from '../types.js';
6
6
  export interface CrawlOptions extends Omit<PeelOptions, 'format'> {
7
- /** Maximum number of pages to crawl (default: 10, max: 100) */
7
+ /** Maximum number of pages to crawl (default: 10, max: tier-dependent) */
8
8
  maxPages?: number;
9
+ /** Tier for determining the max pages cap (default: 'free') */
10
+ tier?: string;
9
11
  /** Maximum depth to crawl (default: 2, max: 5) */
10
12
  maxDepth?: number;
11
13
  /** Only crawl URLs from these domains (default: same domain as starting URL) */
@@ -28,6 +30,8 @@ export interface CrawlOptions extends Omit<PeelOptions, 'format'> {
28
30
  onProgress?: (status: CrawlProgress) => void;
29
31
  /** Per-page callback — receives the full result as soon as a page completes */
30
32
  onPage?: (result: CrawlResult) => void;
33
+ /** Resume an interrupted crawl from its last checkpoint */
34
+ resume?: boolean;
31
35
  }
32
36
  export interface CrawlProgress {
33
37
  crawled: number;
@@ -1 +1 @@
1
- {"version":3,"file":"crawler.d.ts","sourceRoot":"","sources":["../../src/core/crawler.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAkB/C,MAAM,WAAW,YAAa,SAAQ,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC;IAC/D,+DAA+D;IAC/D,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,kDAAkD;IAClD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,gFAAgF;IAChF,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,2DAA2D;IAC3D,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,yCAAyC;IACzC,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,gFAAgF;IAChF,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,8DAA8D;IAC9D,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,oEAAoE;IACpE,QAAQ,CAAC,EAAE,KAAK,GAAG,KAAK,CAAC;IACzB,kEAAkE;IAClE,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,oDAAoD;IACpD,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,+CAA+C;IAC/C,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,aAAa,KAAK,IAAI,CAAC;IAC7C,+EAA+E;IAC/E,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,WAAW,KAAK,IAAI,CAAC;CACxC;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,WAAW;IAC1B,8BAA8B;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,iBAAiB;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,uBAAuB;IACvB,QAAQ,EAAE,MAAM,CAAC;IACjB,mDAAmD;IACnD,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,qCAAqC;IACrC,KAAK,EAAE,MAAM,CAAC;IACd,kEAAkE;IAClE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,2CAA2C;IAC3C,OAAO,EAAE,MAAM,CAAC;IAChB,4CAA4C;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,4CAA4C;IAC5C,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAsFD;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAsB,KAAK,CACzB,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,WAAW,EAAE,CAAC,CA+NxB"}
1
+ {"version":3,"file":"crawler.d.ts","sourceRoot":"","sources":["../../src/core/crawler.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAkC/C,MAAM,WAAW,YAAa,SAAQ,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC;IAC/D,0EAA0E;IAC1E,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,+DAA+D;IAC/D,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,kDAAkD;IAClD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,gFAAgF;IAChF,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,2DAA2D;IAC3D,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,yCAAyC;IACzC,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,gFAAgF;IAChF,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,8DAA8D;IAC9D,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,oEAAoE;IACpE,QAAQ,CAAC,EAAE,KAAK,GAAG,KAAK,CAAC;IACzB,kEAAkE;IAClE,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,oDAAoD;IACpD,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,+CAA+C;IAC/C,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,aAAa,KAAK,IAAI,CAAC;IAC7C,+EAA+E;IAC/E,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,WAAW,KAAK,IAAI,CAAC;IACvC,2DAA2D;IAC3D,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,WAAW;IAC1B,8BAA8B;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,iBAAiB;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,uBAAuB;IACvB,QAAQ,EAAE,MAAM,CAAC;IACjB,mDAAmD;IACnD,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,qCAAqC;IACrC,KAAK,EAAE,MAAM,CAAC;IACd,kEAAkE;IAClE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,2CAA2C;IAC3C,OAAO,EAAE,MAAM,CAAC;IAChB,4CAA4C;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,4CAA4C;IAC5C,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAsFD;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAsB,KAAK,CACzB,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,WAAW,EAAE,CAAC,CAsRxB"}