webpeel 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +11 -657
- package/README.md +246 -325
- package/dist/cli.js +330 -73
- package/dist/cli.js.map +1 -1
- package/dist/core/browser-fetch.d.ts +12 -0
- package/dist/core/browser-fetch.d.ts.map +1 -1
- package/dist/core/browser-fetch.js +70 -17
- package/dist/core/browser-fetch.js.map +1 -1
- package/dist/core/cf-worker-proxy.d.ts +33 -0
- package/dist/core/cf-worker-proxy.d.ts.map +1 -0
- package/dist/core/cf-worker-proxy.js +88 -0
- package/dist/core/cf-worker-proxy.js.map +1 -0
- package/dist/core/chunker.d.ts +47 -0
- package/dist/core/chunker.d.ts.map +1 -0
- package/dist/core/chunker.js +250 -0
- package/dist/core/chunker.js.map +1 -0
- package/dist/core/cloak-fetch.d.ts +43 -0
- package/dist/core/cloak-fetch.d.ts.map +1 -0
- package/dist/core/cloak-fetch.js +141 -0
- package/dist/core/cloak-fetch.js.map +1 -0
- package/dist/core/crawl-checkpoint.d.ts +55 -0
- package/dist/core/crawl-checkpoint.d.ts.map +1 -0
- package/dist/core/crawl-checkpoint.js +105 -0
- package/dist/core/crawl-checkpoint.js.map +1 -0
- package/dist/core/crawler.d.ts +5 -1
- package/dist/core/crawler.d.ts.map +1 -1
- package/dist/core/crawler.js +60 -5
- package/dist/core/crawler.js.map +1 -1
- package/dist/core/cycle-fetch.d.ts +27 -0
- package/dist/core/cycle-fetch.d.ts.map +1 -0
- package/dist/core/cycle-fetch.js +99 -0
- package/dist/core/cycle-fetch.js.map +1 -0
- package/dist/core/domain-extractors.d.ts.map +1 -1
- package/dist/core/domain-extractors.js +754 -14
- package/dist/core/domain-extractors.js.map +1 -1
- package/dist/core/google-cache.d.ts +30 -0
- package/dist/core/google-cache.d.ts.map +1 -0
- package/dist/core/google-cache.js +181 -0
- package/dist/core/google-cache.js.map +1 -0
- package/dist/core/markdown.d.ts +11 -0
- package/dist/core/markdown.d.ts.map +1 -1
- package/dist/core/markdown.js +43 -0
- package/dist/core/markdown.js.map +1 -1
- package/dist/core/peel-tls.d.ts +26 -0
- package/dist/core/peel-tls.d.ts.map +1 -0
- package/dist/core/peel-tls.js +221 -0
- package/dist/core/peel-tls.js.map +1 -0
- package/dist/core/pipeline.d.ts +5 -1
- package/dist/core/pipeline.d.ts.map +1 -1
- package/dist/core/pipeline.js +269 -21
- package/dist/core/pipeline.js.map +1 -1
- package/dist/core/schema-postprocess.d.ts +33 -0
- package/dist/core/schema-postprocess.d.ts.map +1 -0
- package/dist/core/schema-postprocess.js +470 -0
- package/dist/core/schema-postprocess.js.map +1 -0
- package/dist/core/schema-templates.d.ts +20 -0
- package/dist/core/schema-templates.d.ts.map +1 -0
- package/dist/core/schema-templates.js +131 -0
- package/dist/core/schema-templates.js.map +1 -0
- package/dist/core/search-fallback.d.ts +28 -0
- package/dist/core/search-fallback.d.ts.map +1 -0
- package/dist/core/search-fallback.js +185 -0
- package/dist/core/search-fallback.js.map +1 -0
- package/dist/core/search-provider.d.ts +47 -4
- package/dist/core/search-provider.d.ts.map +1 -1
- package/dist/core/search-provider.js +278 -7
- package/dist/core/search-provider.js.map +1 -1
- package/dist/core/stealth-patches.d.ts +58 -0
- package/dist/core/stealth-patches.d.ts.map +1 -0
- package/dist/core/stealth-patches.js +340 -0
- package/dist/core/stealth-patches.js.map +1 -0
- package/dist/core/strategies.d.ts +20 -0
- package/dist/core/strategies.d.ts.map +1 -1
- package/dist/core/strategies.js +284 -48
- package/dist/core/strategies.js.map +1 -1
- package/dist/core/strategy-hooks.d.ts +1 -1
- package/dist/core/strategy-hooks.d.ts.map +1 -1
- package/dist/index.d.ts +11 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +37 -15
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +109 -4
- package/dist/mcp/server.js.map +1 -1
- package/dist/server/app.d.ts.map +1 -1
- package/dist/server/app.js +29 -0
- package/dist/server/app.js.map +1 -1
- package/dist/server/middleware/rate-limit.d.ts +2 -1
- package/dist/server/middleware/rate-limit.d.ts.map +1 -1
- package/dist/server/middleware/rate-limit.js +24 -8
- package/dist/server/middleware/rate-limit.js.map +1 -1
- package/dist/server/routes/agent.d.ts +4 -0
- package/dist/server/routes/agent.d.ts.map +1 -1
- package/dist/server/routes/agent.js +196 -9
- package/dist/server/routes/agent.js.map +1 -1
- package/dist/server/routes/batch.js +5 -5
- package/dist/server/routes/batch.js.map +1 -1
- package/dist/server/routes/compat.d.ts.map +1 -1
- package/dist/server/routes/compat.js +1 -0
- package/dist/server/routes/compat.js.map +1 -1
- package/dist/server/routes/fetch.d.ts.map +1 -1
- package/dist/server/routes/fetch.js +60 -6
- package/dist/server/routes/fetch.js.map +1 -1
- package/dist/server/routes/mcp.d.ts.map +1 -1
- package/dist/server/routes/mcp.js +103 -2
- package/dist/server/routes/mcp.js.map +1 -1
- package/dist/server/routes/search.js +1 -1
- package/dist/server/routes/search.js.map +1 -1
- package/dist/types.d.ts +55 -4
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +4 -1
- package/dist/types.js.map +1 -1
- package/llms.txt +55 -125
- package/package.json +15 -1
package/dist/core/crawler.js
CHANGED
|
@@ -6,6 +6,7 @@ import { peel } from '../index.js';
|
|
|
6
6
|
import { fetch as undiciFetch } from 'undici';
|
|
7
7
|
import { createHash } from 'crypto';
|
|
8
8
|
import { discoverSitemap } from './sitemap.js';
|
|
9
|
+
import { generateJobId, loadCheckpoint, saveCheckpoint, deleteCheckpoint, } from './crawl-checkpoint.js';
|
|
9
10
|
/** Safely compile a user-supplied regex pattern. Rejects patterns longer than 200 chars
|
|
10
11
|
* and wraps compilation in a try-catch to prevent invalid regex crashes. */
|
|
11
12
|
function safeRegex(pattern) {
|
|
@@ -19,6 +20,15 @@ function safeRegex(pattern) {
|
|
|
19
20
|
throw new Error(`Invalid regex pattern: ${pattern}`);
|
|
20
21
|
}
|
|
21
22
|
}
|
|
23
|
+
/** Maximum pages allowed per tier */
|
|
24
|
+
const TIER_MAX_PAGES = {
|
|
25
|
+
free: 10,
|
|
26
|
+
starter: 25,
|
|
27
|
+
pro: 50,
|
|
28
|
+
enterprise: 100,
|
|
29
|
+
max: 100,
|
|
30
|
+
admin: 10000,
|
|
31
|
+
};
|
|
22
32
|
/**
|
|
23
33
|
* Parse robots.txt and return disallowed paths for User-agent: *
|
|
24
34
|
*/
|
|
@@ -106,10 +116,11 @@ function isAllowedByRobots(url, rules) {
|
|
|
106
116
|
* ```
|
|
107
117
|
*/
|
|
108
118
|
export async function crawl(startUrl, options = {}) {
|
|
109
|
-
const { maxPages = 10, maxDepth = 2, allowedDomains, excludePatterns = [], respectRobotsTxt = true, rateLimitMs = 1000, sitemapFirst = false, strategy = 'bfs', deduplication = true, includePatterns = [], onProgress, onPage, ...peelOptions } = options;
|
|
119
|
+
const { maxPages = 10, tier, maxDepth = 2, allowedDomains, excludePatterns = [], respectRobotsTxt = true, rateLimitMs = 1000, sitemapFirst = false, strategy = 'bfs', deduplication = true, includePatterns = [], resume = false, onProgress, onPage, ...peelOptions } = options;
|
|
110
120
|
const crawlStartTime = Date.now();
|
|
111
121
|
// Validate limits
|
|
112
|
-
const
|
|
122
|
+
const tierMaxPages = TIER_MAX_PAGES[tier || 'free'] ?? TIER_MAX_PAGES.free;
|
|
123
|
+
const validatedMaxPages = Math.min(Math.max(maxPages, 1), tierMaxPages);
|
|
113
124
|
const validatedMaxDepth = Math.min(Math.max(maxDepth, 1), 5);
|
|
114
125
|
const validatedRateLimit = Math.max(rateLimitMs, 100); // Min 100ms between requests
|
|
115
126
|
// Parse starting URL
|
|
@@ -133,14 +144,40 @@ export async function crawl(startUrl, options = {}) {
|
|
|
133
144
|
}
|
|
134
145
|
}
|
|
135
146
|
const effectiveRateLimit = robotsRules.crawlDelay || validatedRateLimit;
|
|
147
|
+
// Checkpoint: generate a deterministic job ID for this crawl
|
|
148
|
+
const crawlOptionsForCheckpoint = {
|
|
149
|
+
maxPages: validatedMaxPages,
|
|
150
|
+
maxDepth: validatedMaxDepth,
|
|
151
|
+
includes: includePatterns,
|
|
152
|
+
excludes: excludePatterns,
|
|
153
|
+
};
|
|
154
|
+
const jobId = generateJobId(startUrl, crawlOptionsForCheckpoint);
|
|
155
|
+
// Load existing checkpoint if resume is requested
|
|
156
|
+
const checkpoint = resume ? loadCheckpoint(jobId) : null;
|
|
157
|
+
if (checkpoint) {
|
|
158
|
+
console.error(`[Crawler] Resuming crawl from checkpoint: ${checkpoint.completed.size} pages already crawled`);
|
|
159
|
+
}
|
|
136
160
|
// State tracking
|
|
137
161
|
const results = [];
|
|
138
162
|
const visited = new Set();
|
|
139
163
|
const contentFingerprints = new Set();
|
|
140
164
|
let failedCount = 0;
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
165
|
+
// If resuming, restore visited/results from checkpoint
|
|
166
|
+
if (checkpoint) {
|
|
167
|
+
for (const [url] of checkpoint.completed) {
|
|
168
|
+
visited.add(url);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
const queue = [];
|
|
172
|
+
// If resuming with pending URLs, restore queue; otherwise start from scratch
|
|
173
|
+
if (checkpoint && checkpoint.pending.length > 0) {
|
|
174
|
+
for (const pendingUrl of checkpoint.pending) {
|
|
175
|
+
queue.push({ url: pendingUrl, depth: 1, parent: startUrl });
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
else {
|
|
179
|
+
queue.push({ url: startUrl, depth: 0, parent: null });
|
|
180
|
+
}
|
|
144
181
|
// Sitemap-first: Discover URLs from sitemap before crawling
|
|
145
182
|
if (sitemapFirst) {
|
|
146
183
|
try {
|
|
@@ -223,6 +260,22 @@ export async function crawl(startUrl, options = {}) {
|
|
|
223
260
|
crawlResult.fingerprint = fingerprint;
|
|
224
261
|
}
|
|
225
262
|
results.push(crawlResult);
|
|
263
|
+
// Save checkpoint every 5 pages
|
|
264
|
+
if (results.length % 5 === 0) {
|
|
265
|
+
saveCheckpoint({
|
|
266
|
+
jobId,
|
|
267
|
+
startUrl,
|
|
268
|
+
completed: new Map(results
|
|
269
|
+
.filter(r => !r.error)
|
|
270
|
+
.map(r => [r.url, { status: 200, contentLength: r.markdown.length, timestamp: Date.now() }])),
|
|
271
|
+
pending: queue.map(q => q.url),
|
|
272
|
+
discovered: [],
|
|
273
|
+
options: crawlOptionsForCheckpoint,
|
|
274
|
+
startedAt: crawlStartTime,
|
|
275
|
+
lastCheckpoint: Date.now(),
|
|
276
|
+
maxPages: validatedMaxPages,
|
|
277
|
+
});
|
|
278
|
+
}
|
|
226
279
|
// Call per-page callback with full result
|
|
227
280
|
if (onPage) {
|
|
228
281
|
onPage(crawlResult);
|
|
@@ -286,6 +339,8 @@ export async function crawl(startUrl, options = {}) {
|
|
|
286
339
|
}
|
|
287
340
|
}
|
|
288
341
|
}
|
|
342
|
+
// Crawl complete — clean up checkpoint
|
|
343
|
+
deleteCheckpoint(jobId);
|
|
289
344
|
return results;
|
|
290
345
|
}
|
|
291
346
|
//# sourceMappingURL=crawler.js.map
|
package/dist/core/crawler.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"crawler.js","sourceRoot":"","sources":["../../src/core/crawler.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,IAAI,EAAE,MAAM,aAAa,CAAC;AAEnC,OAAO,EAAE,KAAK,IAAI,WAAW,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAE/C;6EAC6E;AAC7E,SAAS,SAAS,CAAC,OAAe;IAChC,IAAI,OAAO,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,2BAA2B,OAAO,CAAC,MAAM,kBAAkB,CAAC,CAAC;IAC/E,CAAC;IACD,IAAI,CAAC;QACH,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC;IAC7B,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,0BAA0B,OAAO,EAAE,CAAC,CAAC;IACvD,CAAC;AACH,CAAC;AA+DD;;GAEG;AACH,KAAK,UAAU,cAAc,CAAC,MAAc;IAC1C,MAAM,SAAS,GAAG,WAAW,MAAM,aAAa,CAAC;IAEjD,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE;YAC5C,OAAO,EAAE;gBACP,YAAY,EAAE,sCAAsC;aACrD;YACD,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,mBAAmB;SACvD,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,gDAAgD;YAChD,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;QACjC,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAE/B,MAAM,eAAe,GAAa,EAAE,CAAC;QACrC,IAAI,UAA8B,CAAC;QACnC,IAAI,eAAe,GAAG,KAAK,CAAC;QAE5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAE5B,0BAA0B;YAC1B,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;gBACpD,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC7D,eAAe,GAAG,KAAK,KAAK,GAAG,CAAC;gBAChC,SAAS;YACX,CAAC;YAED,IAAI,CAAC,eAAe;gBAAE,SAAS;YAE/B,4BAA4B;YAC5B,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;gBAClD,MAAM,IAAI,GAAG,OAAO,CAAC,SAAS,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC1D,IAAI,IAAI,EAAE,CAAC;oBACT,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAC7B,CAAC;YACH,CAAC;YAED,8BAA8B;YAC9B,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,cAAc,CAAC,EAAE,CAAC;gBACrD,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBACxE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;oBAClB,UAAU,GAAG,KAAK,GAAG,IAAI,CAAC,CAAC,0BAA0B;gBACvD,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACP,iDAAiD;QACjD,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IACjC,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,GAAW,EAAE,KAAkB;IACxD,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IAC5B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC;IAE7B,KAAK,MAAM,UAAU,IAAI,KAAK,CAAC,eAAe,EAAE,CAAC;QAC/C,4EAA4E;QAC5E,IAAI,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAChC,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,CAAC,KAAK,UAAU,KAAK,CACzB,QAAgB,EAChB,UAAwB,EAAE;IAE1B,MAAM,EACJ,QAAQ,GAAG,EAAE,EACb,QAAQ,GAAG,CAAC,EACZ,cAAc,EACd,eAAe,GAAG,EAAE,EACpB,gBAAgB,GAAG,IAAI,EACvB,WAAW,GAAG,IAAI,EAClB,YAAY,GAAG,KAAK,EACpB,QAAQ,GAAG,KAAK,EAChB,aAAa,GAAG,IAAI,EACpB,eAAe,GAAG,EAAE,EACpB,UAAU,EACV,MAAM,EACN,GAAG,WAAW,EACf,GAAG,OAAO,CAAC;IAEZ,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAElC,kBAAkB;IAClB,MAAM,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAC/D,MAAM,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAC7D,MAAM,kBAAkB,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAC,6BAA6B;IAEpF,qBAAqB;IACrB,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,WAAW,GAAG,WAAW,CAAC,QAAQ,CAAC;IAEzC,kDAAkD;IAClD,MAAM,uBAAuB,GAAG,cAAc,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC;QACzE,CAAC,CAAC,cAAc;QAChB,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;IAElB,mEAAmE;IACnE,MAAM,cAAc,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;IAE1E,mEAAmE;IACnE,MAAM,cAAc,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;IAE1E,6BAA6B;IAC7B,IAAI,WAAW,GAAgB,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IACvD,IAAI,gBAAgB,EAAE,CAAC;QACrB,WAAW,GAAG,MAAM,cAAc,CAAC,WAAW,CAAC,CAAC;QAEhD,qEAAqE;QACrE,IAAI,WAAW,CAAC,UAAU,IAAI,WAAW,CAAC,UAAU,GAAG,kBAAkB,EAAE,CAAC;YAC1E,OAAO,CAAC,KAAK,CAAC,gDAAgD,WAAW,CAAC,UAAU,IAAI,CAAC,CAAC;QAC5F,CAAC;IACH,CAAC;IAED,MAAM,kBAAkB,GAAG,WAAW,CAAC,UAAU,IAAI,kBAAkB,CAAC;IAExE,iBAAiB;IACjB,MAAM,OAAO,GAAkB,EAAE,CAAC;IAClC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,MAAM,mBAAmB,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9C,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,MAAM,KAAK,GAAiE;QAC1E,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE;KAC1C,CAAC;IAEF,4DAA4D;IAC5D,IAAI,YAAY,EAAE,CAAC;QACjB,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,eAAe,CAAC,WAAW,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,iBAAiB,EAAE,CAAC,CAAC;YACnG,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;gBACjC,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC;gBAC3B,IAAI,CAAC;oBACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;oBACtC,IAAI,uBAAuB,CAAC,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;wBAC3D,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAC;oBAC5D,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC,CAAC,uBAAuB,CAAC,CAAC;YACrC,CAAC;QACH,CAAC;QAAC,MAAM,CAAC,CAAC,yBAAyB,CAAC,CAAC;IACvC,CAAC;IAED,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,iBAAiB,EAAE,CAAC;QAC9D,0CAA0C;QAC1C,MAAM,IAAI,GAAG,QAAQ,KAAK,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,EAAG,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,EAAG,CAAC;QAChE,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;QAEpC,0BAA0B;QAC1B,IAAI,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,SAAS;QAC/B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAEjB,yBAAyB;QACzB,IAAI,KAAK,GAAG,iBAAiB;YAAE,SAAS;QAExC,eAAe;QACf,IAAI,MAAW,CAAC;QAChB,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACxB,CAAC;QAAC,MAAM,CAAC;YACP,SAAS,CAAC,oBAAoB;QAChC,CAAC;QAED,6BAA6B;QAC7B,IAAI,CAAC,uBAAuB,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;YACvD,SAAS;QACX,CAAC;QAED,yBAAyB;QACzB,IAAI,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAClD,SAAS;QACX,CAAC;QAED,yBAAyB;QACzB,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAChF,SAAS;QACX,CAAC;QAED,mBAAmB;QACnB,IAAI,gBAAgB,IAAI,CAAC,iBAAiB,CAAC,GAAG,EAAE,WAAW,CAAC,EAAE,CAAC;YAC7D,OAAO,CAAC,KAAK,CAAC,sBAAsB,GAAG,6BAA6B,CAAC,CAAC;YACtE,SAAS;QACX,CAAC;QAED,iBAAiB;QACjB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,EAAE;gBAC7B,GAAG,WAAW;gBACd,MAAM,EAAE,UAAU;aACnB,CAAC,CAAC;YAEH,6CAA6C;YAC7C,IAAI,WAA+B,CAAC;YACpC,IAAI,aAAa,EAAE,CAAC;gBAClB,WAAW,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBACxE,IAAI,mBAAmB,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;oBACzC,yBAAyB;oBACzB,SAAS;gBACX,CAAC;gBACD,mBAAmB,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;YACvC,CAAC;YAED,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,MAAM,CAAC,GAAG;gBACf,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,QAAQ,EAAE,MAAM,CAAC,OAAO;gBACxB,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,KAAK;gBACL,MAAM;gBACN,OAAO,EAAE,MAAM,CAAC,OAAO;aACxB,CAAC;YAEF,IAAI,WAAW,EAAE,CAAC;gBAChB,WAAW,CAAC,WAAW,GAAG,WAAW,CAAC;YACxC,CAAC;YAED,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAE1B,0CAA0C;YAC1C,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,CAAC,WAAW,CAAC,CAAC;YACtB,CAAC;YAED,yBAAyB;YACzB,IAAI,UAAU,EAAE,CAAC;gBACf,UAAU,CAAC;oBACT,OAAO,EAAE,OAAO,CAAC,MAAM;oBACvB,MAAM,EAAE,KAAK,CAAC,MAAM;oBACpB,MAAM,EAAE,WAAW;oBACnB,UAAU,EAAE,GAAG;oBACf,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,cAAc;iBACrC,CAAC,CAAC;YACL,CAAC;YAED,gCAAgC;YAChC,IAAI,KAAK,GAAG,iBAAiB,EAAE,CAAC;gBAC9B,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;oBAChC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;wBACvB,KAAK,CAAC,IAAI,CAAC;4BACT,GAAG,EAAE,IAAI;4BACT,KAAK,EAAE,KAAK,GAAG,CAAC;4BAChB,MAAM,EAAE,GAAG;yBACZ,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;YACH,CAAC;YAED,gBAAgB;YAChB,IAAI,OAAO,CAAC,MAAM,GAAG,iBAAiB,EAAE,CAAC;gBACvC,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,kBAAkB,CAAC,CAAC,CAAC;YACxE,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,yBAAyB;YACzB,WAAW,EAAE,CAAC;YACd,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YAC9E,OAAO,CAAC,KAAK,CAAC,6BAA6B,GAAG,KAAK,YAAY,EAAE,CAAC,CAAC;YAEnE,MAAM,WAAW,GAAgB;gBAC/B,GAAG;gBACH,KAAK,EAAE,EAAE;gBACT,QAAQ,EAAE,EAAE;gBACZ,KAAK,EAAE,EAAE;gBACT,KAAK;gBACL,MAAM;gBACN,OAAO,EAAE,CAAC;gBACV,KAAK,EAAE,YAAY;aACpB,CAAC;YAEF,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAE1B,2CAA2C;YAC3C,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,CAAC,WAAW,CAAC,CAAC;YACtB,CAAC;YAED,+CAA+C;YAC/C,IAAI,UAAU,EAAE,CAAC;gBACf,UAAU,CAAC;oBACT,OAAO,EAAE,OAAO,CAAC,MAAM;oBACvB,MAAM,EAAE,KAAK,CAAC,MAAM;oBACpB,MAAM,EAAE,WAAW;oBACnB,UAAU,EAAE,GAAG;oBACf,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,cAAc;iBACrC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
1
|
+
{"version":3,"file":"crawler.js","sourceRoot":"","sources":["../../src/core/crawler.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,IAAI,EAAE,MAAM,aAAa,CAAC;AAEnC,OAAO,EAAE,KAAK,IAAI,WAAW,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAC/C,OAAO,EACL,aAAa,EACb,cAAc,EACd,cAAc,EACd,gBAAgB,GACjB,MAAM,uBAAuB,CAAC;AAE/B;6EAC6E;AAC7E,SAAS,SAAS,CAAC,OAAe;IAChC,IAAI,OAAO,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,2BAA2B,OAAO,CAAC,MAAM,kBAAkB,CAAC,CAAC;IAC/E,CAAC;IACD,IAAI,CAAC;QACH,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC;IAC7B,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,0BAA0B,OAAO,EAAE,CAAC,CAAC;IACvD,CAAC;AACH,CAAC;AAED,qCAAqC;AACrC,MAAM,cAAc,GAA2B;IAC7C,IAAI,EAAE,EAAE;IACR,OAAO,EAAE,EAAE;IACX,GAAG,EAAE,EAAE;IACP,UAAU,EAAE,GAAG;IACf,GAAG,EAAE,GAAG;IACR,KAAK,EAAE,KAAK;CACb,CAAC;AAmEF;;GAEG;AACH,KAAK,UAAU,cAAc,CAAC,MAAc;IAC1C,MAAM,SAAS,GAAG,WAAW,MAAM,aAAa,CAAC;IAEjD,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE;YAC5C,OAAO,EAAE;gBACP,YAAY,EAAE,sCAAsC;aACrD;YACD,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,mBAAmB;SACvD,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,gDAAgD;YAChD,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;QACjC,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAE/B,MAAM,eAAe,GAAa,EAAE,CAAC;QACrC,IAAI,UAA8B,CAAC;QACnC,IAAI,eAAe,GAAG,KAAK,CAAC;QAE5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAE5B,0BAA0B;YAC1B,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;gBACpD,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC7D,eAAe,GAAG,KAAK,KAAK,GAAG,CAAC;gBAChC,SAAS;YACX,CAAC;YAED,IAAI,CAAC,eAAe;gBAAE,SAAS;YAE/B,4BAA4B;YAC5B,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;gBAClD,MAAM,IAAI,GAAG,OAAO,CAAC,SAAS,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC1D,IAAI,IAAI,EAAE,CAAC;oBACT,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAC7B,CAAC;YACH,CAAC;YAED,8BAA8B;YAC9B,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,cAAc,CAAC,EAAE,CAAC;gBACrD,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBACxE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;oBAClB,UAAU,GAAG,KAAK,GAAG,IAAI,CAAC,CAAC,0BAA0B;gBACvD,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACP,iDAAiD;QACjD,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IACjC,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,GAAW,EAAE,KAAkB;IACxD,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IAC5B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC;IAE7B,KAAK,MAAM,UAAU,IAAI,KAAK,CAAC,eAAe,EAAE,CAAC;QAC/C,4EAA4E;QAC5E,IAAI,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAChC,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,CAAC,KAAK,UAAU,KAAK,CACzB,QAAgB,EAChB,UAAwB,EAAE;IAE1B,MAAM,EACJ,QAAQ,GAAG,EAAE,EACb,IAAI,EACJ,QAAQ,GAAG,CAAC,EACZ,cAAc,EACd,eAAe,GAAG,EAAE,EACpB,gBAAgB,GAAG,IAAI,EACvB,WAAW,GAAG,IAAI,EAClB,YAAY,GAAG,KAAK,EACpB,QAAQ,GAAG,KAAK,EAChB,aAAa,GAAG,IAAI,EACpB,eAAe,GAAG,EAAE,EACpB,MAAM,GAAG,KAAK,EACd,UAAU,EACV,MAAM,EACN,GAAG,WAAW,EACf,GAAG,OAAO,CAAC;IAEZ,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAElC,kBAAkB;IAClB,MAAM,YAAY,GAAG,cAAc,CAAC,IAAI,IAAI,MAAM,CAAC,IAAI,cAAc,CAAC,IAAI,CAAC;IAC3E,MAAM,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC;IACxE,MAAM,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAC7D,MAAM,kBAAkB,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAC,6BAA6B;IAEpF,qBAAqB;IACrB,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,WAAW,GAAG,WAAW,CAAC,QAAQ,CAAC;IAEzC,kDAAkD;IAClD,MAAM,uBAAuB,GAAG,cAAc,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC;QACzE,CAAC,CAAC,cAAc;QAChB,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;IAElB,mEAAmE;IACnE,MAAM,cAAc,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;IAE1E,mEAAmE;IACnE,MAAM,cAAc,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;IAE1E,6BAA6B;IAC7B,IAAI,WAAW,GAAgB,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IACvD,IAAI,gBAAgB,EAAE,CAAC;QACrB,WAAW,GAAG,MAAM,cAAc,CAAC,WAAW,CAAC,CAAC;QAEhD,qEAAqE;QACrE,IAAI,WAAW,CAAC,UAAU,IAAI,WAAW,CAAC,UAAU,GAAG,kBAAkB,EAAE,CAAC;YAC1E,OAAO,CAAC,KAAK,CAAC,gDAAgD,WAAW,CAAC,UAAU,IAAI,CAAC,CAAC;QAC5F,CAAC;IACH,CAAC;IAED,MAAM,kBAAkB,GAAG,WAAW,CAAC,UAAU,IAAI,kBAAkB,CAAC;IAExE,6DAA6D;IAC7D,MAAM,yBAAyB,GAAwB;QACrD,QAAQ,EAAE,iBAAiB;QAC3B,QAAQ,EAAE,iBAAiB;QAC3B,QAAQ,EAAE,eAAe;QACzB,QAAQ,EAAE,eAAe;KAC1B,CAAC;IACF,MAAM,KAAK,GAAG,aAAa,CAAC,QAAQ,EAAE,yBAAyB,CAAC,CAAC;IAEjE,kDAAkD;IAClD,MAAM,UAAU,GAAG,MAAM,CAAC,CAAC,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACzD,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,6CAA6C,UAAU,CAAC,SAAS,CAAC,IAAI,wBAAwB,CAAC,CAAC;IAChH,CAAC;IAED,iBAAiB;IACjB,MAAM,OAAO,GAAkB,EAAE,CAAC;IAClC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,MAAM,mBAAmB,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9C,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,uDAAuD;IACvD,IAAI,UAAU,EAAE,CAAC;QACf,KAAK,MAAM,CAAC,GAAG,CAAC,IAAI,UAAU,CAAC,SAAS,EAAE,CAAC;YACzC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACnB,CAAC;IACH,CAAC;IAED,MAAM,KAAK,GAAiE,EAAE,CAAC;IAE/E,6EAA6E;IAC7E,IAAI,UAAU,IAAI,UAAU,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChD,KAAK,MAAM,UAAU,IAAI,UAAU,CAAC,OAAO,EAAE,CAAC;YAC5C,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAC;QAC9D,CAAC;IACH,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;IACxD,CAAC;IAED,4DAA4D;IAC5D,IAAI,YAAY,EAAE,CAAC;QACjB,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,eAAe,CAAC,WAAW,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,iBAAiB,EAAE,CAAC,CAAC;YACnG,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;gBACjC,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC;gBAC3B,IAAI,CAAC;oBACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;oBACtC,IAAI,uBAAuB,CAAC,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;wBAC3D,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAC;oBAC5D,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC,CAAC,uBAAuB,CAAC,CAAC;YACrC,CAAC;QACH,CAAC;QAAC,MAAM,CAAC,CAAC,yBAAyB,CAAC,CAAC;IACvC,CAAC;IAED,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,iBAAiB,EAAE,CAAC;QAC9D,0CAA0C;QAC1C,MAAM,IAAI,GAAG,QAAQ,KAAK,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,EAAG,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,EAAG,CAAC;QAChE,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;QAEpC,0BAA0B;QAC1B,IAAI,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,SAAS;QAC/B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAEjB,yBAAyB;QACzB,IAAI,KAAK,GAAG,iBAAiB;YAAE,SAAS;QAExC,eAAe;QACf,IAAI,MAAW,CAAC;QAChB,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACxB,CAAC;QAAC,MAAM,CAAC;YACP,SAAS,CAAC,oBAAoB;QAChC,CAAC;QAED,6BAA6B;QAC7B,IAAI,CAAC,uBAAuB,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;YACvD,SAAS;QACX,CAAC;QAED,yBAAyB;QACzB,IAAI,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAClD,SAAS;QACX,CAAC;QAED,yBAAyB;QACzB,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAChF,SAAS;QACX,CAAC;QAED,mBAAmB;QACnB,IAAI,gBAAgB,IAAI,CAAC,iBAAiB,CAAC,GAAG,EAAE,WAAW,CAAC,EAAE,CAAC;YAC7D,OAAO,CAAC,KAAK,CAAC,sBAAsB,GAAG,6BAA6B,CAAC,CAAC;YACtE,SAAS;QACX,CAAC;QAED,iBAAiB;QACjB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,EAAE;gBAC7B,GAAG,WAAW;gBACd,MAAM,EAAE,UAAU;aACnB,CAAC,CAAC;YAEH,6CAA6C;YAC7C,IAAI,WAA+B,CAAC;YACpC,IAAI,aAAa,EAAE,CAAC;gBAClB,WAAW,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBACxE,IAAI,mBAAmB,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;oBACzC,yBAAyB;oBACzB,SAAS;gBACX,CAAC;gBACD,mBAAmB,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;YACvC,CAAC;YAED,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,MAAM,CAAC,GAAG;gBACf,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,QAAQ,EAAE,MAAM,CAAC,OAAO;gBACxB,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,KAAK;gBACL,MAAM;gBACN,OAAO,EAAE,MAAM,CAAC,OAAO;aACxB,CAAC;YAEF,IAAI,WAAW,EAAE,CAAC;gBAChB,WAAW,CAAC,WAAW,GAAG,WAAW,CAAC;YACxC,CAAC;YAED,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAE1B,gCAAgC;YAChC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC7B,cAAc,CAAC;oBACb,KAAK;oBACL,QAAQ;oBACR,SAAS,EAAE,IAAI,GAAG,CAChB,OAAO;yBACJ,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;yBACrB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC,QAAQ,CAAC,MAAM,EAAE,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,CAC/F;oBACD,OAAO,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;oBAC9B,UAAU,EAAE,EAAE;oBACd,OAAO,EAAE,yBAAyB;oBAClC,SAAS,EAAE,cAAc;oBACzB,cAAc,EAAE,IAAI,CAAC,GAAG,EAAE;oBAC1B,QAAQ,EAAE,iBAAiB;iBAC5B,CAAC,CAAC;YACL,CAAC;YAED,0CAA0C;YAC1C,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,CAAC,WAAW,CAAC,CAAC;YACtB,CAAC;YAED,yBAAyB;YACzB,IAAI,UAAU,EAAE,CAAC;gBACf,UAAU,CAAC;oBACT,OAAO,EAAE,OAAO,CAAC,MAAM;oBACvB,MAAM,EAAE,KAAK,CAAC,MAAM;oBACpB,MAAM,EAAE,WAAW;oBACnB,UAAU,EAAE,GAAG;oBACf,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,cAAc;iBACrC,CAAC,CAAC;YACL,CAAC;YAED,gCAAgC;YAChC,IAAI,KAAK,GAAG,iBAAiB,EAAE,CAAC;gBAC9B,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;oBAChC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;wBACvB,KAAK,CAAC,IAAI,CAAC;4BACT,GAAG,EAAE,IAAI;4BACT,KAAK,EAAE,KAAK,GAAG,CAAC;4BAChB,MAAM,EAAE,GAAG;yBACZ,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;YACH,CAAC;YAED,gBAAgB;YAChB,IAAI,OAAO,CAAC,MAAM,GAAG,iBAAiB,EAAE,CAAC;gBACvC,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,kBAAkB,CAAC,CAAC,CAAC;YACxE,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,yBAAyB;YACzB,WAAW,EAAE,CAAC;YACd,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YAC9E,OAAO,CAAC,KAAK,CAAC,6BAA6B,GAAG,KAAK,YAAY,EAAE,CAAC,CAAC;YAEnE,MAAM,WAAW,GAAgB;gBAC/B,GAAG;gBACH,KAAK,EAAE,EAAE;gBACT,QAAQ,EAAE,EAAE;gBACZ,KAAK,EAAE,EAAE;gBACT,KAAK;gBACL,MAAM;gBACN,OAAO,EAAE,CAAC;gBACV,KAAK,EAAE,YAAY;aACpB,CAAC;YAEF,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAE1B,2CAA2C;YAC3C,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,CAAC,WAAW,CAAC,CAAC;YACtB,CAAC;YAED,+CAA+C;YAC/C,IAAI,UAAU,EAAE,CAAC;gBACf,UAAU,CAAC;oBACT,OAAO,EAAE,OAAO,CAAC,MAAM;oBACvB,MAAM,EAAE,KAAK,CAAC,MAAM;oBACpB,MAAM,EAAE,WAAW;oBACnB,UAAU,EAAE,GAAG;oBACf,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,cAAc;iBACrC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,uCAAuC;IACvC,gBAAgB,CAAC,KAAK,CAAC,CAAC;IAExB,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CycleTLS-based fetching for sites that detect Node.js TLS fingerprints.
|
|
3
|
+
* Uses a Go binary to spoof Chrome's exact TLS handshake.
|
|
4
|
+
*
|
|
5
|
+
* This is tried when stealth browser fetch fails due to TLS/HTTP2 blocks.
|
|
6
|
+
*
|
|
7
|
+
* Requires: npm install cycletls
|
|
8
|
+
* Usage: peel(url, { cycle: true })
|
|
9
|
+
*/
|
|
10
|
+
import type { FetchResult } from './fetcher.js';
|
|
11
|
+
export interface CycleFetchOptions {
|
|
12
|
+
timeout?: number;
|
|
13
|
+
proxy?: string;
|
|
14
|
+
headers?: Record<string, string>;
|
|
15
|
+
}
|
|
16
|
+
export interface CycleFetchResult extends FetchResult {
|
|
17
|
+
method: 'cycle';
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Check if CycleTLS is installed.
|
|
21
|
+
*/
|
|
22
|
+
export declare function isCycleTLSAvailable(): boolean;
|
|
23
|
+
/**
|
|
24
|
+
* Fetch a URL using CycleTLS with Chrome JA3 fingerprint spoofing.
|
|
25
|
+
*/
|
|
26
|
+
export declare function cycleFetch(url: string, options?: CycleFetchOptions): Promise<CycleFetchResult>;
|
|
27
|
+
//# sourceMappingURL=cycle-fetch.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cycle-fetch.d.ts","sourceRoot":"","sources":["../../src/core/cycle-fetch.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAIH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAoBhD,MAAM,WAAW,iBAAiB;IAChC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,gBAAiB,SAAQ,WAAW;IACnD,MAAM,EAAE,OAAO,CAAC;CACjB;AAID;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,OAAO,CAY7C;AAuBD;;GAEG;AACH,wBAAsB,UAAU,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAuCpG"}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CycleTLS-based fetching for sites that detect Node.js TLS fingerprints.
|
|
3
|
+
* Uses a Go binary to spoof Chrome's exact TLS handshake.
|
|
4
|
+
*
|
|
5
|
+
* This is tried when stealth browser fetch fails due to TLS/HTTP2 blocks.
|
|
6
|
+
*
|
|
7
|
+
* Requires: npm install cycletls
|
|
8
|
+
* Usage: peel(url, { cycle: true })
|
|
9
|
+
*/
|
|
10
|
+
import { existsSync } from 'fs';
|
|
11
|
+
import { resolve as pathResolve } from 'path';
|
|
12
|
+
// Chrome 134 JA3 fingerprint
|
|
13
|
+
const CHROME_134_JA3 = '771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,0-23-65281-10-11-35-16-5-13-18-51-45-43-27-17513-21,29-23-24,0';
|
|
14
|
+
const CHROME_134_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36';
|
|
15
|
+
const DEFAULT_HEADERS = {
|
|
16
|
+
'Sec-Ch-Ua': '"Chromium";v="134", "Google Chrome";v="134"',
|
|
17
|
+
'Sec-Ch-Ua-Mobile': '?0',
|
|
18
|
+
'Sec-Ch-Ua-Platform': '"macOS"',
|
|
19
|
+
'Sec-Fetch-Dest': 'document',
|
|
20
|
+
'Sec-Fetch-Mode': 'navigate',
|
|
21
|
+
'Sec-Fetch-Site': 'none',
|
|
22
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
23
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
24
|
+
};
|
|
25
|
+
let cycleTLSModule = null;
|
|
26
|
+
/**
|
|
27
|
+
* Check if CycleTLS is installed.
|
|
28
|
+
*/
|
|
29
|
+
export function isCycleTLSAvailable() {
|
|
30
|
+
try {
|
|
31
|
+
// ESM-compatible check: look for the package in node_modules
|
|
32
|
+
// Same pattern as search-provider.ts uses for playwright-extra
|
|
33
|
+
const cwd = process.cwd();
|
|
34
|
+
return (existsSync(pathResolve(cwd, 'node_modules', 'cycletls')) ||
|
|
35
|
+
existsSync(pathResolve(cwd, '..', 'node_modules', 'cycletls')));
|
|
36
|
+
}
|
|
37
|
+
catch {
|
|
38
|
+
return false;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Lazy-load CycleTLS module.
|
|
43
|
+
*/
|
|
44
|
+
async function getCycleTLS() {
|
|
45
|
+
if (cycleTLSModule)
|
|
46
|
+
return cycleTLSModule;
|
|
47
|
+
try {
|
|
48
|
+
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
|
49
|
+
// @ts-ignore — cycletls is an optional peer dependency
|
|
50
|
+
const mod = await import('cycletls');
|
|
51
|
+
// CycleTLS exports: module.default or module itself depending on bundler
|
|
52
|
+
cycleTLSModule = mod.default ?? mod;
|
|
53
|
+
return cycleTLSModule;
|
|
54
|
+
}
|
|
55
|
+
catch (e) {
|
|
56
|
+
throw new Error('CycleTLS not installed. Run: npm install cycletls\n' +
|
|
57
|
+
'CycleTLS spoofs Chrome\'s TLS/JA3 fingerprint using a Go binary.\n' +
|
|
58
|
+
'Learn more: https://github.com/Danny-Dasilva/CycleTLS');
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Fetch a URL using CycleTLS with Chrome JA3 fingerprint spoofing.
|
|
63
|
+
*/
|
|
64
|
+
export async function cycleFetch(url, options) {
|
|
65
|
+
const initCycleTLS = await getCycleTLS();
|
|
66
|
+
const cycleTLS = await initCycleTLS();
|
|
67
|
+
const mergedHeaders = {
|
|
68
|
+
...DEFAULT_HEADERS,
|
|
69
|
+
...(options?.headers ?? {}),
|
|
70
|
+
};
|
|
71
|
+
const requestOptions = {
|
|
72
|
+
ja3: CHROME_134_JA3,
|
|
73
|
+
userAgent: CHROME_134_UA,
|
|
74
|
+
headers: mergedHeaders,
|
|
75
|
+
timeout: Math.round((options?.timeout ?? 30000) / 1000), // CycleTLS uses seconds
|
|
76
|
+
};
|
|
77
|
+
if (options?.proxy) {
|
|
78
|
+
requestOptions.proxy = options.proxy;
|
|
79
|
+
}
|
|
80
|
+
try {
|
|
81
|
+
if (process.env.DEBUG) {
|
|
82
|
+
console.debug('[webpeel]', 'CycleTLS fetch:', url);
|
|
83
|
+
}
|
|
84
|
+
const response = await cycleTLS(url, requestOptions, 'get');
|
|
85
|
+
const body = await response.text();
|
|
86
|
+
return {
|
|
87
|
+
html: body,
|
|
88
|
+
url,
|
|
89
|
+
statusCode: response.status,
|
|
90
|
+
contentType: response.headers?.['content-type'] ?? 'text/html',
|
|
91
|
+
method: 'cycle',
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
finally {
|
|
95
|
+
// Always clean up the Go process
|
|
96
|
+
await cycleTLS.exit().catch(() => { });
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
//# sourceMappingURL=cycle-fetch.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cycle-fetch.js","sourceRoot":"","sources":["../../src/core/cycle-fetch.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAChC,OAAO,EAAE,OAAO,IAAI,WAAW,EAAE,MAAM,MAAM,CAAC;AAG9C,6BAA6B;AAC7B,MAAM,cAAc,GAClB,iJAAiJ,CAAC;AAEpJ,MAAM,aAAa,GACjB,uHAAuH,CAAC;AAE1H,MAAM,eAAe,GAA2B;IAC9C,WAAW,EAAE,6CAA6C;IAC1D,kBAAkB,EAAE,IAAI;IACxB,oBAAoB,EAAE,SAAS;IAC/B,gBAAgB,EAAE,UAAU;IAC5B,gBAAgB,EAAE,UAAU;IAC5B,gBAAgB,EAAE,MAAM;IACxB,QAAQ,EAAE,iEAAiE;IAC3E,iBAAiB,EAAE,gBAAgB;CACpC,CAAC;AAYF,IAAI,cAAc,GAAQ,IAAI,CAAC;AAE/B;;GAEG;AACH,MAAM,UAAU,mBAAmB;IACjC,IAAI,CAAC;QACH,6DAA6D;QAC7D,+DAA+D;QAC/D,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;QAC1B,OAAO,CACL,UAAU,CAAC,WAAW,CAAC,GAAG,EAAE,cAAc,EAAE,UAAU,CAAC,CAAC;YACxD,UAAU,CAAC,WAAW,CAAC,GAAG,EAAE,IAAI,EAAE,cAAc,EAAE,UAAU,CAAC,CAAC,CAC/D,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,WAAW;IACxB,IAAI,cAAc;QAAE,OAAO,cAAc,CAAC;IAC1C,IAAI,CAAC;QACH,6DAA6D;QAC7D,uDAAuD;QACvD,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,UAAU,CAAC,CAAC;QACrC,yEAAyE;QACzE,cAAc,GAAG,GAAG,CAAC,OAAO,IAAI,GAAG,CAAC;QACpC,OAAO,cAAc,CAAC;IACxB,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CACb,qDAAqD;YACrD,oEAAoE;YACpE,uDAAuD,CACxD,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,GAAW,EAAE,OAA2B;IACvE,MAAM,YAAY,GAAG,MAAM,WAAW,EAAE,CAAC;IACzC,MAAM,QAAQ,GAAG,MAAM,YAAY,EAAE,CAAC;IAEtC,MAAM,aAAa,GAA2B;QAC5C,GAAG,eAAe;QAClB,GAAG,CAAC,OAAO,EAAE,OAAO,IAAI,EAAE,CAAC;KAC5B,CAAC;IAEF,MAAM,cAAc,GAAwB;QAC1C,GAAG,EAAE,cAAc;QACnB,SAAS,EAAE,aAAa;QACxB,OAAO,EAAE,aAAa;QACtB,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,OAAO,EAAE,OAAO,IAAI,KAAK,CAAC,GAAG,IAAI,CAAC,EAAE,wBAAwB;KAClF,CAAC;IAEF,IAAI,OAAO,EAAE,KAAK,EAAE,CAAC;QACnB,cAAc,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;IACvC,CAAC;IAED,IAAI,CAAC;QACH,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC;YACtB,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,iBAAiB,EAAE,GAAG,CAAC,CAAC;QACrD,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,GAAG,EAAE,cAAc,EAAE,KAAK,CAAC,CAAC;QAC5D,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAEnC,OAAO;YACL,IAAI,EAAE,IAAI;YACV,GAAG;YACH,UAAU,EAAE,QAAQ,CAAC,MAAM;YAC3B,WAAW,EAAE,QAAQ,CAAC,OAAO,EAAE,CAAC,cAAc,CAAC,IAAI,WAAW;YAC9D,MAAM,EAAE,OAAO;SAChB,CAAC;IACJ,CAAC;YAAS,CAAC;QACT,iCAAiC;QACjC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;IACxC,CAAC;AACH,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"domain-extractors.d.ts","sourceRoot":"","sources":["../../src/core/domain-extractors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;
|
|
1
|
+
{"version":3,"file":"domain-extractors.d.ts","sourceRoot":"","sources":["../../src/core/domain-extractors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAyDH,MAAM,WAAW,mBAAmB;IAClC,iDAAiD;IACjD,MAAM,EAAE,MAAM,CAAC;IACf,4EAA4E;IAC5E,IAAI,EAAE,MAAM,CAAC;IACb,sCAAsC;IACtC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAChC,mDAAmD;IACnD,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,+EAA+E;AAC/E,MAAM,MAAM,eAAe,GAAG,CAC5B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,KACR,OAAO,CAAC,mBAAmB,GAAG,IAAI,CAAC,CAAC;AAuBzC;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,eAAe,GAAG,IAAI,CAWtE;AAED;;;GAGG;AACH,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,GACV,OAAO,CAAC,mBAAmB,GAAG,IAAI,CAAC,CAQrC"}
|