@dyyz1993/agent-browser 0.26.3 → 0.26.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/actions/crawl.d.ts +1 -1
- package/dist/actions/crawl.d.ts.map +1 -1
- package/dist/actions/crawl.js +244 -27
- package/dist/actions/crawl.js.map +1 -1
- package/dist/actions/map.d.ts.map +1 -1
- package/dist/actions/map.js +80 -21
- package/dist/actions/map.js.map +1 -1
- package/dist/actions/scrape.d.ts +19 -0
- package/dist/actions/scrape.d.ts.map +1 -1
- package/dist/actions/scrape.js +70 -3
- package/dist/actions/scrape.js.map +1 -1
- package/dist/actions/search.d.ts.map +1 -1
- package/dist/actions/search.js +77 -20
- package/dist/actions/search.js.map +1 -1
- package/dist/actions/utils.d.ts.map +1 -1
- package/dist/actions/utils.js +124 -54
- package/dist/actions/utils.js.map +1 -1
- package/dist/browser/browser-manager.d.ts.map +1 -1
- package/dist/browser/browser-manager.js +5 -7
- package/dist/browser/browser-manager.js.map +1 -1
- package/dist/cli/commands.d.ts.map +1 -1
- package/dist/cli/commands.js +47 -0
- package/dist/cli/commands.js.map +1 -1
- package/dist/cli/help.d.ts.map +1 -1
- package/dist/cli/help.js +25 -4
- package/dist/cli/help.js.map +1 -1
- package/dist/protocol.d.ts.map +1 -1
- package/dist/protocol.js +23 -0
- package/dist/protocol.js.map +1 -1
- package/dist/types.d.ts +19 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +4 -1
package/dist/actions/crawl.d.ts
CHANGED
|
@@ -2,7 +2,7 @@ import type { Page } from 'playwright-core';
|
|
|
2
2
|
import type { BrowserManager } from '../browser/index.js';
|
|
3
3
|
import type { CrawlCommand, CrawlResult, Response } from '../types.js';
|
|
4
4
|
export declare function handleCrawl(command: CrawlCommand, browser: BrowserManager): Promise<Response<CrawlResult>>;
|
|
5
|
-
export declare function discoverLinks(page: Page, baseOrigin: string, baseHostname: string, basePath: string): Promise<string[]>;
|
|
5
|
+
export declare function discoverLinks(page: Page, baseOrigin: string, baseHostname: string, basePath: string, allowExternal?: boolean): Promise<string[]>;
|
|
6
6
|
export declare function normalizeUrl(url: string): string;
|
|
7
7
|
export declare function normalizeUrlFromUrl(u: URL): string;
|
|
8
8
|
//# sourceMappingURL=crawl.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"crawl.d.ts","sourceRoot":"","sources":["../../src/actions/crawl.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,iBAAiB,CAAC;AAC5C,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAC1D,OAAO,KAAK,EAAE,YAAY,EAAa,WAAW,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"crawl.d.ts","sourceRoot":"","sources":["../../src/actions/crawl.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,iBAAiB,CAAC;AAC5C,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAC1D,OAAO,KAAK,EAAE,YAAY,EAAa,WAAW,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AA+MlF,wBAAsB,WAAW,CAC/B,OAAO,EAAE,YAAY,EACrB,OAAO,EAAE,cAAc,GACtB,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,CAwLhC;AA0HD,wBAAsB,aAAa,CACjC,IAAI,EAAE,IAAI,EACV,UAAU,EAAE,MAAM,EAClB,YAAY,EAAE,MAAM,EACpB,QAAQ,EAAE,MAAM,EAChB,aAAa,GAAE,OAAe,GAC7B,OAAO,CAAC,MAAM,EAAE,CAAC,CAoDnB;AAyCD,wBAAgB,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAOhD;AAED,wBAAgB,mBAAmB,CAAC,CAAC,EAAE,GAAG,GAAG,MAAM,CAelD"}
|
package/dist/actions/crawl.js
CHANGED
|
@@ -5,31 +5,63 @@ const STATIC_EXTENSIONS = [
|
|
|
5
5
|
'.jpg',
|
|
6
6
|
'.jpeg',
|
|
7
7
|
'.gif',
|
|
8
|
-
'.webp',
|
|
9
8
|
'.svg',
|
|
10
9
|
'.ico',
|
|
11
|
-
'.
|
|
12
|
-
'.
|
|
10
|
+
'.webp',
|
|
11
|
+
'.avif',
|
|
12
|
+
'.bmp',
|
|
13
|
+
'.tiff',
|
|
14
|
+
'.mp3',
|
|
15
|
+
'.mp4',
|
|
16
|
+
'.avi',
|
|
17
|
+
'.mov',
|
|
18
|
+
'.wmv',
|
|
19
|
+
'.flv',
|
|
20
|
+
'.webm',
|
|
21
|
+
'.ogg',
|
|
22
|
+
'.wav',
|
|
13
23
|
'.woff',
|
|
14
24
|
'.woff2',
|
|
15
25
|
'.ttf',
|
|
16
26
|
'.eot',
|
|
17
|
-
'.
|
|
18
|
-
'.
|
|
19
|
-
'.
|
|
20
|
-
'.
|
|
27
|
+
'.otf',
|
|
28
|
+
'.css',
|
|
29
|
+
'.js',
|
|
30
|
+
'.mjs',
|
|
21
31
|
'.pdf',
|
|
22
32
|
'.doc',
|
|
23
33
|
'.docx',
|
|
24
34
|
'.xls',
|
|
25
35
|
'.xlsx',
|
|
26
|
-
'.
|
|
27
|
-
'.
|
|
28
|
-
'.
|
|
29
|
-
'.
|
|
36
|
+
'.ppt',
|
|
37
|
+
'.pptx',
|
|
38
|
+
'.odt',
|
|
39
|
+
'.ods',
|
|
40
|
+
'.odp',
|
|
41
|
+
'.rtf',
|
|
42
|
+
'.zip',
|
|
43
|
+
'.gz',
|
|
44
|
+
'.tar',
|
|
45
|
+
'.rar',
|
|
46
|
+
'.7z',
|
|
47
|
+
'.bz2',
|
|
48
|
+
'.exe',
|
|
49
|
+
'.dmg',
|
|
50
|
+
'.deb',
|
|
51
|
+
'.rpm',
|
|
52
|
+
'.msi',
|
|
53
|
+
'.xml',
|
|
54
|
+
'.json',
|
|
55
|
+
'.yaml',
|
|
56
|
+
'.yml',
|
|
57
|
+
'.csv',
|
|
58
|
+
'.bin',
|
|
59
|
+
'.iso',
|
|
60
|
+
'.img',
|
|
61
|
+
'.apk',
|
|
62
|
+
'.ipa',
|
|
30
63
|
'.rss',
|
|
31
64
|
'.atom',
|
|
32
|
-
'.xml',
|
|
33
65
|
];
|
|
34
66
|
const SOCIAL_DOMAINS = [
|
|
35
67
|
'facebook.com',
|
|
@@ -91,6 +123,71 @@ function filterUrlByPatterns(url, excludePatterns, includePatterns) {
|
|
|
91
123
|
}
|
|
92
124
|
return true;
|
|
93
125
|
}
|
|
126
|
+
async function fetchRobotsTxt(origin) {
|
|
127
|
+
try {
|
|
128
|
+
const res = await fetch(`${origin}/robots.txt`, {
|
|
129
|
+
signal: AbortSignal.timeout(5000),
|
|
130
|
+
});
|
|
131
|
+
return res.ok ? await res.text() : '';
|
|
132
|
+
}
|
|
133
|
+
catch {
|
|
134
|
+
return '';
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
function parseRobotsTxt(robotsTxt, userAgent = '*') {
|
|
138
|
+
const allowed = [];
|
|
139
|
+
const disallowed = [];
|
|
140
|
+
let crawlDelay;
|
|
141
|
+
const lines = robotsTxt.split('\n').map((l) => l.trim());
|
|
142
|
+
let matchAgent = false;
|
|
143
|
+
for (const line of lines) {
|
|
144
|
+
if (line.startsWith('#') || !line)
|
|
145
|
+
continue;
|
|
146
|
+
const colonIdx = line.indexOf(':');
|
|
147
|
+
if (colonIdx === -1)
|
|
148
|
+
continue;
|
|
149
|
+
const key = line.slice(0, colonIdx).trim().toLowerCase();
|
|
150
|
+
const value = line.slice(colonIdx + 1).trim();
|
|
151
|
+
if (key === 'user-agent') {
|
|
152
|
+
matchAgent = value === '*' || value.toLowerCase() === (userAgent || '*').toLowerCase();
|
|
153
|
+
}
|
|
154
|
+
else if (matchAgent) {
|
|
155
|
+
if (key === 'disallow' && value) {
|
|
156
|
+
disallowed.push(value);
|
|
157
|
+
}
|
|
158
|
+
else if (key === 'allow' && value) {
|
|
159
|
+
allowed.push(value);
|
|
160
|
+
}
|
|
161
|
+
else if (key === 'crawl-delay' && value) {
|
|
162
|
+
crawlDelay = parseFloat(value);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return { allowed, disallowed, crawlDelay };
|
|
167
|
+
}
|
|
168
|
+
function matchesRobotsPattern(pathname, pattern) {
|
|
169
|
+
const regex = pattern
|
|
170
|
+
.replace(/[.+^${}()|[\]\\]/g, '\\$&')
|
|
171
|
+
.replace(/\*/g, '.*')
|
|
172
|
+
.replace(/\$$/, '');
|
|
173
|
+
try {
|
|
174
|
+
return new RegExp(`^${regex}`).test(pathname);
|
|
175
|
+
}
|
|
176
|
+
catch {
|
|
177
|
+
return false;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
function isAllowedByRobots(pathname, rules) {
|
|
181
|
+
for (const pattern of rules.allowed) {
|
|
182
|
+
if (matchesRobotsPattern(pathname, pattern))
|
|
183
|
+
return true;
|
|
184
|
+
}
|
|
185
|
+
for (const pattern of rules.disallowed) {
|
|
186
|
+
if (matchesRobotsPattern(pathname, pattern))
|
|
187
|
+
return false;
|
|
188
|
+
}
|
|
189
|
+
return true;
|
|
190
|
+
}
|
|
94
191
|
export async function handleCrawl(command, browser) {
|
|
95
192
|
const page = browser.getPage();
|
|
96
193
|
if (!page) {
|
|
@@ -107,16 +204,73 @@ export async function handleCrawl(command, browser) {
|
|
|
107
204
|
const allowExternal = command.allowExternal ?? false;
|
|
108
205
|
const excludePatterns = command.excludePatterns;
|
|
109
206
|
const includePatterns = command.includePatterns;
|
|
207
|
+
const concurrency = command.concurrency ?? 1;
|
|
208
|
+
const context = page.context();
|
|
209
|
+
if (command.cookies && command.cookies.length > 0) {
|
|
210
|
+
await context.addCookies(command.cookies);
|
|
211
|
+
}
|
|
110
212
|
const startUrl = normalizeUrl(command.url);
|
|
111
213
|
const parsedStart = new URL(startUrl);
|
|
112
214
|
const baseOrigin = parsedStart.origin;
|
|
113
215
|
const baseHostname = parsedStart.hostname.replace(/^www\./, '');
|
|
114
216
|
const basePath = parsedStart.pathname.replace(/\/$/, '');
|
|
217
|
+
const robotsTxt = await fetchRobotsTxt(baseOrigin);
|
|
218
|
+
const robotsRules = parseRobotsTxt(robotsTxt);
|
|
219
|
+
const crawlDelay = robotsRules.crawlDelay ? robotsRules.crawlDelay * 1000 : 0;
|
|
115
220
|
const visited = new Set();
|
|
116
221
|
const pages = [];
|
|
117
222
|
const pageUrls = new Set();
|
|
118
223
|
let failed = 0;
|
|
119
224
|
const queue = [{ url: startUrl, depth: 0, priority: 0 }];
|
|
225
|
+
function generateUrlPermutations(url) {
|
|
226
|
+
try {
|
|
227
|
+
const u = new URL(url);
|
|
228
|
+
const variants = [u.href];
|
|
229
|
+
if (u.hostname.startsWith('www.')) {
|
|
230
|
+
const stripped = new URL(u.href);
|
|
231
|
+
stripped.hostname = u.hostname.slice(4);
|
|
232
|
+
variants.push(stripped.href);
|
|
233
|
+
}
|
|
234
|
+
else {
|
|
235
|
+
const withWWW = new URL(u.href);
|
|
236
|
+
withWWW.hostname = 'www.' + u.hostname;
|
|
237
|
+
variants.push(withWWW.href);
|
|
238
|
+
}
|
|
239
|
+
if (u.pathname.endsWith('/')) {
|
|
240
|
+
const noSlash = new URL(u.href);
|
|
241
|
+
noSlash.pathname = noSlash.pathname.replace(/\/$/, '') || '/';
|
|
242
|
+
variants.push(noSlash.href);
|
|
243
|
+
}
|
|
244
|
+
else {
|
|
245
|
+
const withSlash = new URL(u.href);
|
|
246
|
+
withSlash.pathname += '/';
|
|
247
|
+
variants.push(withSlash.href);
|
|
248
|
+
}
|
|
249
|
+
const indexPattern = /\/(index\.(html|htm|php|aspx?))$/i;
|
|
250
|
+
if (indexPattern.test(u.pathname)) {
|
|
251
|
+
const noIndex = new URL(u.href);
|
|
252
|
+
noIndex.pathname = u.pathname.replace(indexPattern, '/') || '/';
|
|
253
|
+
variants.push(noIndex.href);
|
|
254
|
+
}
|
|
255
|
+
if (u.hash && !u.hash.startsWith('#/') && !u.hash.startsWith('#!')) {
|
|
256
|
+
const noHash = new URL(u.href);
|
|
257
|
+
noHash.hash = '';
|
|
258
|
+
variants.push(noHash.href);
|
|
259
|
+
}
|
|
260
|
+
return [...new Set(variants)];
|
|
261
|
+
}
|
|
262
|
+
catch {
|
|
263
|
+
return [url];
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
function isVisited(url) {
|
|
267
|
+
return generateUrlPermutations(url).some((perm) => visited.has(perm));
|
|
268
|
+
}
|
|
269
|
+
function markVisited(url) {
|
|
270
|
+
for (const perm of generateUrlPermutations(url)) {
|
|
271
|
+
visited.add(perm);
|
|
272
|
+
}
|
|
273
|
+
}
|
|
120
274
|
function urlPriority(url) {
|
|
121
275
|
for (const p of LOW_VALUE_PATTERNS) {
|
|
122
276
|
if (p.test(url))
|
|
@@ -127,15 +281,17 @@ export async function handleCrawl(command, browser) {
|
|
|
127
281
|
while (queue.length > 0 && pages.length < maxPages) {
|
|
128
282
|
queue.sort((a, b) => a.priority - b.priority || a.depth - b.depth);
|
|
129
283
|
const batch = [];
|
|
130
|
-
while (queue.length > 0 &&
|
|
284
|
+
while (queue.length > 0 &&
|
|
285
|
+
batch.length < concurrency &&
|
|
286
|
+
pages.length + batch.length < maxPages) {
|
|
131
287
|
const entry = queue.shift();
|
|
132
288
|
const normalized = normalizeUrl(entry.url);
|
|
133
|
-
if (
|
|
289
|
+
if (isVisited(normalized))
|
|
134
290
|
continue;
|
|
135
|
-
|
|
291
|
+
markVisited(normalized);
|
|
136
292
|
batch.push({ ...entry, url: normalized });
|
|
137
293
|
}
|
|
138
|
-
const results = await Promise.allSettled(batch.map((entry) => crawlPage(page, entry.url, baseOrigin, baseHostname, basePath, format, command.selector, timeoutMs, allowExternal, excludePatterns, includePatterns)));
|
|
294
|
+
const results = await Promise.allSettled(batch.map((entry) => crawlPage(browser, page, entry.url, baseOrigin, baseHostname, basePath, format, command.selector, timeoutMs, allowExternal, excludePatterns, includePatterns, command.javaScriptEnabled, robotsRules, crawlDelay, concurrency > 1)));
|
|
139
295
|
for (let i = 0; i < results.length; i++) {
|
|
140
296
|
const result = results[i];
|
|
141
297
|
const entry = batch[i];
|
|
@@ -145,11 +301,15 @@ export async function handleCrawl(command, browser) {
|
|
|
145
301
|
if (pageUrls.has(finalUrl))
|
|
146
302
|
continue;
|
|
147
303
|
pageUrls.add(finalUrl);
|
|
304
|
+
markVisited(finalUrl);
|
|
148
305
|
pages.push(crawlPageData);
|
|
306
|
+
if (crawlDelay > 0) {
|
|
307
|
+
await new Promise((resolve) => setTimeout(resolve, crawlDelay));
|
|
308
|
+
}
|
|
149
309
|
if (entry.depth < maxDepth) {
|
|
150
310
|
for (const link of crawlPageData.links || []) {
|
|
151
311
|
const normalized = normalizeUrl(link);
|
|
152
|
-
if (
|
|
312
|
+
if (isVisited(normalized))
|
|
153
313
|
continue;
|
|
154
314
|
if (!isAllowedUrl(normalized, baseOrigin, baseHostname, basePath, allowExternal))
|
|
155
315
|
continue;
|
|
@@ -178,9 +338,55 @@ export async function handleCrawl(command, browser) {
|
|
|
178
338
|
failed,
|
|
179
339
|
});
|
|
180
340
|
}
|
|
181
|
-
async function crawlPage(
|
|
341
|
+
async function crawlPage(browser, mainPage, url, baseOrigin, baseHostname, basePath, format, selector, timeoutMs = 15000, allowExternal = false, excludePatterns, includePatterns, javaScriptEnabled, robotsRules, crawlDelay, useNewTab = false) {
|
|
342
|
+
let page = mainPage;
|
|
343
|
+
let disposable = false;
|
|
344
|
+
if (useNewTab) {
|
|
345
|
+
const browserInstance = browser.getBrowser();
|
|
346
|
+
if (!browserInstance)
|
|
347
|
+
return null;
|
|
348
|
+
try {
|
|
349
|
+
page = await browserInstance.newPage(javaScriptEnabled === false ? { javaScriptEnabled: false } : undefined);
|
|
350
|
+
disposable = true;
|
|
351
|
+
}
|
|
352
|
+
catch {
|
|
353
|
+
return null;
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
else if (javaScriptEnabled === false) {
|
|
357
|
+
const browserInstance = browser.getBrowser();
|
|
358
|
+
if (browserInstance) {
|
|
359
|
+
page = await browserInstance.newPage({ javaScriptEnabled: false });
|
|
360
|
+
disposable = true;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
182
363
|
try {
|
|
364
|
+
if (robotsRules) {
|
|
365
|
+
const urlPath = new URL(url).pathname;
|
|
366
|
+
if (!isAllowedByRobots(urlPath, robotsRules)) {
|
|
367
|
+
return null;
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
if (!useNewTab) {
|
|
371
|
+
const currentUrl = page.url();
|
|
372
|
+
if (currentUrl !== 'about:blank' && currentUrl !== url) {
|
|
373
|
+
try {
|
|
374
|
+
const currentHost = new URL(currentUrl).hostname;
|
|
375
|
+
const targetHost = new URL(url).hostname;
|
|
376
|
+
if (currentHost !== targetHost) {
|
|
377
|
+
await page.goto('about:blank').catch(() => { });
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
catch {
|
|
381
|
+
await page.goto('about:blank').catch(() => { });
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
}
|
|
183
385
|
await page.goto(url, { timeout: timeoutMs, waitUntil: 'domcontentloaded' });
|
|
386
|
+
const contentType = await page.evaluate(() => document.contentType).catch(() => '');
|
|
387
|
+
if (contentType && !contentType.includes('html')) {
|
|
388
|
+
return null;
|
|
389
|
+
}
|
|
184
390
|
await page.waitForLoadState('networkidle', { timeout: 5000 }).catch(() => { });
|
|
185
391
|
if (url.includes('#/') || url.includes('#!')) {
|
|
186
392
|
await page.waitForLoadState('networkidle', { timeout: 3000 }).catch(() => { });
|
|
@@ -197,7 +403,7 @@ async function crawlPage(page, url, baseOrigin, baseHostname, basePath, format,
|
|
|
197
403
|
const [title, content, links] = await Promise.all([
|
|
198
404
|
page.title(),
|
|
199
405
|
extractContentFromPage(page, format, selector),
|
|
200
|
-
discoverLinks(page, baseOrigin, baseHostname, basePath),
|
|
406
|
+
discoverLinks(page, baseOrigin, baseHostname, basePath, allowExternal),
|
|
201
407
|
]);
|
|
202
408
|
return { url: page.url(), title, content, links };
|
|
203
409
|
}
|
|
@@ -209,26 +415,35 @@ async function crawlPage(page, url, baseOrigin, baseHostname, basePath, format,
|
|
|
209
415
|
page.title().catch(() => url),
|
|
210
416
|
extractContentFromPage(page, format, selector).catch(() => ''),
|
|
211
417
|
]);
|
|
212
|
-
const links = await discoverLinks(page, baseOrigin, baseHostname, basePath).catch(() => []);
|
|
418
|
+
const links = await discoverLinks(page, baseOrigin, baseHostname, basePath, allowExternal).catch(() => []);
|
|
213
419
|
return { url: page.url(), title, content, links };
|
|
214
420
|
}
|
|
215
421
|
catch {
|
|
216
422
|
return null;
|
|
217
423
|
}
|
|
424
|
+
finally {
|
|
425
|
+
if (disposable)
|
|
426
|
+
await page.close().catch(() => { });
|
|
427
|
+
}
|
|
218
428
|
}
|
|
219
429
|
return null;
|
|
220
430
|
}
|
|
431
|
+
finally {
|
|
432
|
+
if (disposable)
|
|
433
|
+
await page.close().catch(() => { });
|
|
434
|
+
}
|
|
221
435
|
}
|
|
222
|
-
export async function discoverLinks(page, baseOrigin, baseHostname, basePath) {
|
|
223
|
-
const hrefs = await page.evaluate((
|
|
436
|
+
export async function discoverLinks(page, baseOrigin, baseHostname, basePath, allowExternal = false) {
|
|
437
|
+
const hrefs = await page.evaluate(() => {
|
|
224
438
|
const anchors = document.querySelectorAll('a[href]');
|
|
225
439
|
const results = [];
|
|
440
|
+
const base = document.baseURI;
|
|
226
441
|
anchors.forEach((a) => {
|
|
227
442
|
const href = a.getAttribute('href');
|
|
228
443
|
if (!href)
|
|
229
444
|
return;
|
|
230
445
|
try {
|
|
231
|
-
const fullUrl = new URL(href,
|
|
446
|
+
const fullUrl = new URL(href, base).href;
|
|
232
447
|
results.push(fullUrl);
|
|
233
448
|
}
|
|
234
449
|
catch {
|
|
@@ -236,7 +451,7 @@ export async function discoverLinks(page, baseOrigin, baseHostname, basePath) {
|
|
|
236
451
|
}
|
|
237
452
|
});
|
|
238
453
|
return results;
|
|
239
|
-
}
|
|
454
|
+
});
|
|
240
455
|
const filtered = new Set();
|
|
241
456
|
for (const href of hrefs) {
|
|
242
457
|
try {
|
|
@@ -249,9 +464,11 @@ export async function discoverLinks(page, baseOrigin, baseHostname, basePath) {
|
|
|
249
464
|
const hostname = url.hostname.replace(/^www\./, '');
|
|
250
465
|
if (SOCIAL_DOMAINS.some((d) => hostname === d || hostname.endsWith('.' + d)))
|
|
251
466
|
continue;
|
|
252
|
-
if (
|
|
253
|
-
if (
|
|
254
|
-
|
|
467
|
+
if (!allowExternal) {
|
|
468
|
+
if (hostname !== baseHostname && !hostname.endsWith('.' + baseHostname)) {
|
|
469
|
+
if (url.origin !== baseOrigin)
|
|
470
|
+
continue;
|
|
471
|
+
}
|
|
255
472
|
}
|
|
256
473
|
if (basePath && basePath !== '/') {
|
|
257
474
|
const normalizedBase = basePath.endsWith('/') ? basePath : basePath + '/';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"crawl.js","sourceRoot":"","sources":["../../src/actions/crawl.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAEvE,MAAM,iBAAiB,GAAG;IACxB,MAAM;IACN,MAAM;IACN,OAAO;IACP,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,KAAK;IACL,OAAO;IACP,QAAQ;IACR,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,KAAK;IACL,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;IACP,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;IACP,MAAM;CACP,CAAC;AAEF,MAAM,cAAc,GAAG;IACrB,cAAc;IACd,aAAa;IACb,OAAO;IACP,eAAe;IACf,cAAc;IACd,aAAa;IACb,YAAY;IACZ,YAAY;IACZ,eAAe;IACf,YAAY;IACZ,WAAW;IACX,WAAW;CACZ,CAAC;AAEF,MAAM,kBAAkB,GAAG;IACzB,aAAa;IACb,WAAW;IACX,WAAW;IACX,UAAU;IACV,eAAe;IACf,cAAc;IACd,SAAS;IACT,eAAe;IACf,aAAa;IACb,YAAY;IACZ,WAAW;IACX,aAAa;IACb,cAAc;IACd,cAAc;IACd,eAAe;IACf,WAAW;IACX,gBAAgB;CACjB,CAAC;AAEF,SAAS,cAAc,CAAC,GAAW,EAAE,OAAe;IAClD,MAAM,KAAK,GAAG,IAAI,MAAM,CACtB,GAAG;QACD,OAAO;aACJ,OAAO,CAAC,mBAAmB,EAAE,MAAM,CAAC;aACpC,OAAO,CAAC,OAAO,EAAE,gBAAgB,CAAC;aAClC,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC;aACvB,OAAO,CAAC,iBAAiB,EAAE,IAAI,CAAC;aAChC,OAAO,CAAC,KAAK,EAAE,MAAM,CAAC;QACzB,GAAG,CACN,CAAC;IACF,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,SAAS,mBAAmB,CAC1B,GAAW,EACX,eAA0B,EAC1B,eAA0B;IAE1B,IAAI,eAAe,EAAE,MAAM,EAAE,CAAC;QAC5B,KAAK,MAAM,OAAO,IAAI,eAAe,EAAE,CAAC;YACtC,IAAI,cAAc,CAAC,GAAG,EAAE,OAAO,CAAC;gBAAE,OAAO,KAAK,CAAC;QACjD,CAAC;IACH,CAAC;IACD,IAAI,eAAe,EAAE,MAAM,EAAE,CAAC;QAC5B,KAAK,MAAM,OAAO,IAAI,eAAe,EAAE,CAAC;YACtC,IAAI,cAAc,CAAC,GAAG,EAAE,OAAO,CAAC;gBAAE,OAAO,IAAI,CAAC;QAChD,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,OAAqB,EACrB,OAAuB;IAEvB,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;IAC/B,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO;YACL,EAAE,EAAE,OAAO,CAAC,EAAE;YACd,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,8BAA8B;SACtC,CAAC;IACJ,CAAC;IAED,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,IAAI,CAAC,CAAC;IACpC,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;IACrC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,UAAU,CAAC;IAC5C,MAAM,SAAS,GAAG,CAAC,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC,GAAG,IAAI,CAAC;IACjD,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,KAAK,CAAC;IACrD,MAAM,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC;IAChD,MAAM,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC;IAEhD,MAAM,QAAQ,GAAG,YAAY,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC3C,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,UAAU,GAAG,WAAW,CAAC,MAAM,CAAC;IACtC,MAAM,YAAY,GAAG,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IAChE,MAAM,QAAQ,GAAG,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IACzD,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,MAAM,KAAK,GAAgB,EAAE,CAAC;IAC9B,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IACnC,IAAI,MAAM,GAAG,CAAC,CAAC;IAGf,MAAM,KAAK,GAAiB,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC;IAEvE,SAAS,WAAW,CAAC,GAAW;QAC9B,KAAK,MAAM,CAAC,IAAI,kBAAkB,EAAE,CAAC;YACnC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;gBAAE,OAAO,EAAE,CAAC;QAC7B,CAAC;QACD,OAAO,CAAC,CAAC;IACX,CAAC;IAED,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;QACnD,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAEnE,MAAM,KAAK,GAAiB,EAAE,CAAC;QAC/B,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;YACtF,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,EAAG,CAAC;YAC7B,MAAM,UAAU,GAAG,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAC3C,IAAI,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC;gBAAE,SAAS;YACtC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;YACxB,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,KAAK,EAAE,GAAG,EAAE,UAAU,EAAE,CAAC,CAAC;QAC5C,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CACtC,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAClB,SAAS,CACP,IAAI,EACJ,KAAK,CAAC,GAAG,EACT,UAAU,EACV,YAAY,EACZ,QAAQ,EACR,MAAM,EACN,OAAO,CAAC,QAAQ,EAChB,SAAS,EACT,aAAa,EACb,eAAe,EACf,eAAe,CAChB,CACF,CACF,CAAC;QAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAEvB,IAAI,MAAM,CAAC,MAAM,KAAK,WAAW,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;gBAClD,MAAM,aAAa,GAAG,MAAM,CAAC,KAAK,CAAC;gBACnC,MAAM,QAAQ,GAAG,YAAY,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;gBACjD,IAAI,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC;oBAAE,SAAS;gBACrC,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;gBACvB,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;gBAE1B,IAAI,KAAK,CAAC,KAAK,GAAG,QAAQ,EAAE,CAAC;oBAC3B,KAAK,MAAM,IAAI,IAAI,aAAa,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;wBAC7C,MAAM,UAAU,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;wBACtC,IAAI,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC;4BAAE,SAAS;wBACtC,IAAI,CAAC,YAAY,CAAC,UAAU,EAAE,UAAU,EAAE,YAAY,EAAE,QAAQ,EAAE,aAAa,CAAC;4BAC9E,SAAS;wBACX,IAAI,CAAC,mBAAmB,CAAC,UAAU,EAAE,eAAe,EAAE,eAAe,CAAC;4BAAE,SAAS;wBACjF,IAAI,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI,QAAQ;4BAAE,MAAM;wBACnD,KAAK,CAAC,IAAI,CAAC;4BACT,GAAG,EAAE,UAAU;4BACf,KAAK,EAAE,KAAK,CAAC,KAAK,GAAG,CAAC;4BACtB,QAAQ,EAAE,WAAW,CAAC,UAAU,CAAC;yBAClC,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,MAAM,EAAE,CAAC;YACX,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,eAAe,CAAC,OAAO,CAAC,EAAE,EAAE;QACjC,GAAG,EAAE,QAAQ;QACb,KAAK;QACL,KAAK,EAAE,KAAK,CAAC,MAAM;QACnB,OAAO,EAAE,KAAK,CAAC,MAAM;QACrB,MAAM;KACP,CAAC,CAAC;AACL,CAAC;AAED,KAAK,UAAU,SAAS,CACtB,IAAU,EACV,GAAW,EACX,UAAkB,EAClB,YAAoB,EACpB,QAAgB,EAChB,MAAoC,EACpC,QAAiB,EACjB,YAAoB,KAAK,EACzB,gBAAyB,KAAK,EAC9B,eAA0B,EAC1B,eAA0B;IAE1B,IAAI,CAAC;QACH,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,kBAAkB,EAAE,CAAC,CAAC;QAE5E,MAAM,IAAI,CAAC,gBAAgB,CAAC,aAAa,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAE9E,IAAI,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7C,MAAM,IAAI,CAAC,gBAAgB,CAAC,aAAa,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;YAC9E,MAAM,iBAAiB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QACtC,CAAC;QAED,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,CAAC;gBACH,MAAM,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,KAAK,EAAE,CAAC,OAAO,CAAC,EAAE,KAAK,EAAE,UAAU,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;YACrF,CAAC;YAAC,MAAM,CAAC;gBACP,sCAAsC;YACxC,CAAC;QACH,CAAC;QAED,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAChD,IAAI,CAAC,KAAK,EAAE;YACZ,sBAAsB,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC;YAC9C,aAAa,CAAC,IAAI,EAAE,UAAU,EAAE,YAAY,EAAE,QAAQ,CAAC;SACxD,CAAC,CAAC;QAEH,OAAO,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IACpD,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAC7D,IAAI,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;YACvD,IAAI,CAAC;gBACH,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;oBACzC,IAAI,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC;oBAC7B,sBAAsB,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC;iBAC/D,CAAC,CAAC;gBACH,MAAM,KAAK,GAAG,MAAM,aAAa,CAAC,IAAI,EAAE,UAAU,EAAE,YAAY,EAAE,QAAQ,CAAC,CAAC,KAAK,CAC/E,GAAG,EAAE,CAAC,EAAc,CACrB,CAAC;gBACF,OAAO,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;YACpD,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,IAAU,EACV,UAAkB,EAClB,YAAoB,EACpB,QAAgB;IAEhB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAC,MAAc,EAAE,EAAE;QACnD,MAAM,OAAO,GAAG,QAAQ,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;QACrD,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE;YACpB,MAAM,IAAI,GAAG,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YACpC,IAAI,CAAC,IAAI;gBAAE,OAAO;YAClB,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC;gBAC3C,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACxB,CAAC;YAAC,MAAM,CAAC;gBACP,cAAc;YAChB,CAAC;QACH,CAAC,CAAC,CAAC;QACH,OAAO,OAAO,CAAC;IACjB,CAAC,EAAE,UAAU,CAAC,CAAC;IAEf,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IAEnC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;YAE1B,IAAI,GAAG,CAAC,QAAQ,KAAK,OAAO,IAAI,GAAG,CAAC,QAAQ,KAAK,QAAQ;gBAAE,SAAS;YAEpE,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;YAC5C,IAAI,iBAAiB,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;gBAAE,SAAS;YAEtE,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;YACpD,IAAI,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,KAAK,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC;gBAAE,SAAS;YAEvF,IAAI,QAAQ,KAAK,YAAY,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,GAAG,YAAY,CAAC,EAAE,CAAC;gBACxE,IAAI,GAAG,CAAC,MAAM,KAAK,UAAU;oBAAE,SAAS;YAC1C,CAAC;YAED,IAAI,QAAQ,IAAI,QAAQ,KAAK,GAAG,EAAE,CAAC;gBACjC,MAAM,cAAc,GAAG,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,GAAG,GAAG,CAAC;gBAC1E,MAAM,OAAO,GAAG,GAAG,CAAC,QAAQ,CAAC;gBAC7B,IAAI,OAAO,KAAK,QAAQ,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,cAAc,CAAC;oBAAE,SAAS;YAC5E,CAAC;YAED,MAAM,UAAU,GAAG,mBAAmB,CAAC,GAAG,CAAC,CAAC;YAC5C,IAAI,UAAU;gBAAE,QAAQ,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAC3C,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;AAC9B,CAAC;AAED,SAAS,YAAY,CACnB,GAAW,EACX,UAAkB,EAClB,YAAoB,EACpB,QAAgB,EAChB,gBAAyB,KAAK;IAE9B,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACvB,IAAI,CAAC,CAAC,QAAQ,KAAK,OAAO,IAAI,CAAC,CAAC,QAAQ,KAAK,QAAQ;YAAE,OAAO,KAAK,CAAC;QACpE,MAAM,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;QAC1C,IAAI,iBAAiB,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;YAAE,OAAO,KAAK,CAAC;QAC1E,MAAM,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAClD,IAAI,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,KAAK,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC;YAAE,OAAO,KAAK,CAAC;QAE3F,IAAI,CAAC,aAAa,EAAE,CAAC;YACnB,IACE,QAAQ,KAAK,YAAY;gBACzB,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,GAAG,YAAY,CAAC;gBACtC,CAAC,CAAC,MAAM,KAAK,UAAU,EACvB,CAAC;gBACD,OAAO,KAAK,CAAC;YACf,CAAC;QACH,CAAC;QAED,IAAI,QAAQ,IAAI,QAAQ,KAAK,GAAG,EAAE,CAAC;YACjC,MAAM,cAAc,GAAG,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,GAAG,GAAG,CAAC;YAC1E,MAAM,OAAO,GAAG,CAAC,CAAC,QAAQ,CAAC;YAC3B,IAAI,OAAO,KAAK,QAAQ,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,cAAc,CAAC,EAAE,CAAC;gBAChE,OAAO,KAAK,CAAC;YACf,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,GAAW;IACtC,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACvB,OAAO,mBAAmB,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,CAAC;IACb,CAAC;AACH,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,CAAM;IACxC,IAAI,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC;IAC1B,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAClD,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACnC,CAAC;IACD,MAAM,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC;IACpB,IAAI,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,aAAa,GAAG,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAChD,IAAI,CAAC,aAAa,IAAI,aAAa,KAAK,GAAG,EAAE,CAAC;YAC5C,OAAO,GAAG,CAAC,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;QAClC,CAAC;QACD,OAAO,GAAG,CAAC,CAAC,MAAM,GAAG,QAAQ,KAAK,aAAa,EAAE,CAAC;IACpD,CAAC;IACD,OAAO,GAAG,CAAC,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;AAClC,CAAC"}
|
|
1
|
+
{"version":3,"file":"crawl.js","sourceRoot":"","sources":["../../src/actions/crawl.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAEvE,MAAM,iBAAiB,GAAG;IACxB,MAAM;IACN,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;IACP,OAAO;IACP,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,OAAO;IACP,QAAQ;IACR,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,KAAK;IACL,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;IACP,MAAM;IACN,OAAO;IACP,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,KAAK;IACL,MAAM;IACN,MAAM;IACN,KAAK;IACL,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;IACP,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;CACR,CAAC;AAEF,MAAM,cAAc,GAAG;IACrB,cAAc;IACd,aAAa;IACb,OAAO;IACP,eAAe;IACf,cAAc;IACd,aAAa;IACb,YAAY;IACZ,YAAY;IACZ,eAAe;IACf,YAAY;IACZ,WAAW;IACX,WAAW;CACZ,CAAC;AAEF,MAAM,kBAAkB,GAAG;IACzB,aAAa;IACb,WAAW;IACX,WAAW;IACX,UAAU;IACV,eAAe;IACf,cAAc;IACd,SAAS;IACT,eAAe;IACf,aAAa;IACb,YAAY;IACZ,WAAW;IACX,aAAa;IACb,cAAc;IACd,cAAc;IACd,eAAe;IACf,WAAW;IACX,gBAAgB;CACjB,CAAC;AAEF,SAAS,cAAc,CAAC,GAAW,EAAE,OAAe;IAClD,MAAM,KAAK,GAAG,IAAI,MAAM,CACtB,GAAG;QACD,OAAO;aACJ,OAAO,CAAC,mBAAmB,EAAE,MAAM,CAAC;aACpC,OAAO,CAAC,OAAO,EAAE,gBAAgB,CAAC;aAClC,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC;aACvB,OAAO,CAAC,iBAAiB,EAAE,IAAI,CAAC;aAChC,OAAO,CAAC,KAAK,EAAE,MAAM,CAAC;QACzB,GAAG,CACN,CAAC;IACF,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,SAAS,mBAAmB,CAC1B,GAAW,EACX,eAA0B,EAC1B,eAA0B;IAE1B,IAAI,eAAe,EAAE,MAAM,EAAE,CAAC;QAC5B,KAAK,MAAM,OAAO,IAAI,eAAe,EAAE,CAAC;YACtC,IAAI,cAAc,CAAC,GAAG,EAAE,OAAO,CAAC;gBAAE,OAAO,KAAK,CAAC;QACjD,CAAC;IACH,CAAC;IACD,IAAI,eAAe,EAAE,MAAM,EAAE,CAAC;QAC5B,KAAK,MAAM,OAAO,IAAI,eAAe,EAAE,CAAC;YACtC,IAAI,cAAc,CAAC,GAAG,EAAE,OAAO,CAAC;gBAAE,OAAO,IAAI,CAAC;QAChD,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAQD,KAAK,UAAU,cAAc,CAAC,MAAc;IAC1C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,MAAM,aAAa,EAAE;YAC9C,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC;SAClC,CAAC,CAAC;QACH,OAAO,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IACxC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED,SAAS,cAAc,CAAC,SAAiB,EAAE,YAAoB,GAAG;IAChE,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,MAAM,UAAU,GAAa,EAAE,CAAC;IAChC,IAAI,UAA8B,CAAC;IAEnC,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACzD,IAAI,UAAU,GAAG,KAAK,CAAC;IAEvB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI;YAAE,SAAS;QAE5C,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACnC,IAAI,QAAQ,KAAK,CAAC,CAAC;YAAE,SAAS;QAC9B,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACzD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAE9C,IAAI,GAAG,KAAK,YAAY,EAAE,CAAC;YACzB,UAAU,GAAG,KAAK,KAAK,GAAG,IAAI,KAAK,CAAC,WAAW,EAAE,KAAK,CAAC,SAAS,IAAI,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;QACzF,CAAC;aAAM,IAAI,UAAU,EAAE,CAAC;YACtB,IAAI,GAAG,KAAK,UAAU,IAAI,KAAK,EAAE,CAAC;gBAChC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACzB,CAAC;iBAAM,IAAI,GAAG,KAAK,OAAO,IAAI,KAAK,EAAE,CAAC;gBACpC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACtB,CAAC;iBAAM,IAAI,GAAG,KAAK,aAAa,IAAI,KAAK,EAAE,CAAC;gBAC1C,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,CAAC;AAC7C,CAAC;AAED,SAAS,oBAAoB,CAAC,QAAgB,EAAE,OAAe;IAC7D,MAAM,KAAK,GAAG,OAAO;SAClB,OAAO,CAAC,mBAAmB,EAAE,MAAM,CAAC;SACpC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC;SACpB,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IACtB,IAAI,CAAC;QACH,OAAO,IAAI,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAChD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAgB,EAAE,KAAiB;IAC5D,KAAK,MAAM,OAAO,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;QACpC,IAAI,oBAAoB,CAAC,QAAQ,EAAE,OAAO,CAAC;YAAE,OAAO,IAAI,CAAC;IAC3D,CAAC;IACD,KAAK,MAAM,OAAO,IAAI,KAAK,CAAC,UAAU,EAAE,CAAC;QACvC,IAAI,oBAAoB,CAAC,QAAQ,EAAE,OAAO,CAAC;YAAE,OAAO,KAAK,CAAC;IAC5D,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,OAAqB,EACrB,OAAuB;IAEvB,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;IAC/B,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO;YACL,EAAE,EAAE,OAAO,CAAC,EAAE;YACd,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,8BAA8B;SACtC,CAAC;IACJ,CAAC;IAED,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,IAAI,CAAC,CAAC;IACpC,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;IACrC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,UAAU,CAAC;IAC5C,MAAM,SAAS,GAAG,CAAC,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC,GAAG,IAAI,CAAC;IACjD,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,KAAK,CAAC;IACrD,MAAM,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC;IAChD,MAAM,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC;IAChD,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,CAAC,CAAC;IAE7C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;IAC/B,IAAI,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAClD,MAAM,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IAC5C,CAAC;IAED,MAAM,QAAQ,GAAG,YAAY,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC3C,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,UAAU,GAAG,WAAW,CAAC,MAAM,CAAC;IACtC,MAAM,YAAY,GAAG,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IAChE,MAAM,QAAQ,GAAG,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IACzD,MAAM,SAAS,GAAG,MAAM,cAAc,CAAC,UAAU,CAAC,CAAC;IACnD,MAAM,WAAW,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;IAC9C,MAAM,UAAU,GAAG,WAAW,CAAC,UAAU,CAAC,CAAC,CAAC,WAAW,CAAC,UAAU,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IAC9E,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,MAAM,KAAK,GAAgB,EAAE,CAAC;IAC9B,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IACnC,IAAI,MAAM,GAAG,CAAC,CAAC;IAGf,MAAM,KAAK,GAAiB,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC;IAEvE,SAAS,uBAAuB,CAAC,GAAW;QAC1C,IAAI,CAAC;YACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;YACvB,MAAM,QAAQ,GAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAEpC,IAAI,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;gBAClC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gBACjC,QAAQ,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBACxC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;YAC/B,CAAC;iBAAM,CAAC;gBACN,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gBAChC,OAAO,CAAC,QAAQ,GAAG,MAAM,GAAG,CAAC,CAAC,QAAQ,CAAC;gBACvC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YAC9B,CAAC;YAED,IAAI,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC7B,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gBAChC,OAAO,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,GAAG,CAAC;gBAC9D,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YAC9B,CAAC;iBAAM,CAAC;gBACN,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gBAClC,SAAS,CAAC,QAAQ,IAAI,GAAG,CAAC;gBAC1B,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YAChC,CAAC;YAED,MAAM,YAAY,GAAG,mCAAmC,CAAC;YACzD,IAAI,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAClC,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gBAChC,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,YAAY,EAAE,GAAG,CAAC,IAAI,GAAG,CAAC;gBAChE,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YAC9B,CAAC;YAED,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;gBACnE,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gBAC/B,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;gBACjB,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAC7B,CAAC;YAED,OAAO,CAAC,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC;QAChC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,CAAC,GAAG,CAAC,CAAC;QACf,CAAC;IACH,CAAC;IAED,SAAS,SAAS,CAAC,GAAW;QAC5B,OAAO,uBAAuB,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC;IACxE,CAAC;IAED,SAAS,WAAW,CAAC,GAAW;QAC9B,KAAK,MAAM,IAAI,IAAI,uBAAuB,CAAC,GAAG,CAAC,EAAE,CAAC;YAChD,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACpB,CAAC;IACH,CAAC;IAED,SAAS,WAAW,CAAC,GAAW;QAC9B,KAAK,MAAM,CAAC,IAAI,kBAAkB,EAAE,CAAC;YACnC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;gBAAE,OAAO,EAAE,CAAC;QAC7B,CAAC;QACD,OAAO,CAAC,CAAC;IACX,CAAC;IAED,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;QACnD,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAEnE,MAAM,KAAK,GAAiB,EAAE,CAAC;QAC/B,OACE,KAAK,CAAC,MAAM,GAAG,CAAC;YAChB,KAAK,CAAC,MAAM,GAAG,WAAW;YAC1B,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,GAAG,QAAQ,EACtC,CAAC;YACD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,EAAG,CAAC;YAC7B,MAAM,UAAU,GAAG,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAC3C,IAAI,SAAS,CAAC,UAAU,CAAC;gBAAE,SAAS;YACpC,WAAW,CAAC,UAAU,CAAC,CAAC;YACxB,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,KAAK,EAAE,GAAG,EAAE,UAAU,EAAE,CAAC,CAAC;QAC5C,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CACtC,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAClB,SAAS,CACP,OAAO,EACP,IAAI,EACJ,KAAK,CAAC,GAAG,EACT,UAAU,EACV,YAAY,EACZ,QAAQ,EACR,MAAM,EACN,OAAO,CAAC,QAAQ,EAChB,SAAS,EACT,aAAa,EACb,eAAe,EACf,eAAe,EACf,OAAO,CAAC,iBAAiB,EACzB,WAAW,EACX,UAAU,EACV,WAAW,GAAG,CAAC,CAChB,CACF,CACF,CAAC;QAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAEvB,IAAI,MAAM,CAAC,MAAM,KAAK,WAAW,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;gBAClD,MAAM,aAAa,GAAG,MAAM,CAAC,KAAK,CAAC;gBACnC,MAAM,QAAQ,GAAG,YAAY,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;gBACjD,IAAI,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC;oBAAE,SAAS;gBACrC,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;gBACvB,WAAW,CAAC,QAAQ,CAAC,CAAC;gBACtB,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;gBAE1B,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;oBACnB,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC;gBAClE,CAAC;gBAED,IAAI,KAAK,CAAC,KAAK,GAAG,QAAQ,EAAE,CAAC;oBAC3B,KAAK,MAAM,IAAI,IAAI,aAAa,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;wBAC7C,MAAM,UAAU,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;wBACtC,IAAI,SAAS,CAAC,UAAU,CAAC;4BAAE,SAAS;wBACpC,IAAI,CAAC,YAAY,CAAC,UAAU,EAAE,UAAU,EAAE,YAAY,EAAE,QAAQ,EAAE,aAAa,CAAC;4BAC9E,SAAS;wBACX,IAAI,CAAC,mBAAmB,CAAC,UAAU,EAAE,eAAe,EAAE,eAAe,CAAC;4BAAE,SAAS;wBACjF,IAAI,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI,QAAQ;4BAAE,MAAM;wBACnD,KAAK,CAAC,IAAI,CAAC;4BACT,GAAG,EAAE,UAAU;4BACf,KAAK,EAAE,KAAK,CAAC,KAAK,GAAG,CAAC;4BACtB,QAAQ,EAAE,WAAW,CAAC,UAAU,CAAC;yBAClC,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,MAAM,EAAE,CAAC;YACX,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,eAAe,CAAC,OAAO,CAAC,EAAE,EAAE;QACjC,GAAG,EAAE,QAAQ;QACb,KAAK;QACL,KAAK,EAAE,KAAK,CAAC,MAAM;QACnB,OAAO,EAAE,KAAK,CAAC,MAAM;QACrB,MAAM;KACP,CAAC,CAAC;AACL,CAAC;AAED,KAAK,UAAU,SAAS,CACtB,OAAuB,EACvB,QAAc,EACd,GAAW,EACX,UAAkB,EAClB,YAAoB,EACpB,QAAgB,EAChB,MAAoC,EACpC,QAAiB,EACjB,YAAoB,KAAK,EACzB,gBAAyB,KAAK,EAC9B,eAA0B,EAC1B,eAA0B,EAC1B,iBAA2B,EAC3B,WAAwB,EACxB,UAAmB,EACnB,YAAqB,KAAK;IAE1B,IAAI,IAAI,GAAG,QAAQ,CAAC;IACpB,IAAI,UAAU,GAAG,KAAK,CAAC;IAEvB,IAAI,SAAS,EAAE,CAAC;QACd,MAAM,eAAe,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC;QAC7C,IAAI,CAAC,eAAe;YAAE,OAAO,IAAI,CAAC;QAClC,IAAI,CAAC;YACH,IAAI,GAAG,MAAM,eAAe,CAAC,OAAO,CAClC,iBAAiB,KAAK,KAAK,CAAC,CAAC,CAAC,EAAE,iBAAiB,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,SAAS,CACvE,CAAC;YACF,UAAU,GAAG,IAAI,CAAC;QACpB,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;SAAM,IAAI,iBAAiB,KAAK,KAAK,EAAE,CAAC;QACvC,MAAM,eAAe,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC;QAC7C,IAAI,eAAe,EAAE,CAAC;YACpB,IAAI,GAAG,MAAM,eAAe,CAAC,OAAO,CAAC,EAAE,iBAAiB,EAAE,KAAK,EAAE,CAAC,CAAC;YACnE,UAAU,GAAG,IAAI,CAAC;QACpB,CAAC;IACH,CAAC;IAED,IAAI,CAAC;QACH,IAAI,WAAW,EAAE,CAAC;YAChB,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACtC,IAAI,CAAC,iBAAiB,CAAC,OAAO,EAAE,WAAW,CAAC,EAAE,CAAC;gBAC7C,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QAED,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC9B,IAAI,UAAU,KAAK,aAAa,IAAI,UAAU,KAAK,GAAG,EAAE,CAAC;gBACvD,IAAI,CAAC;oBACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC,QAAQ,CAAC;oBACjD,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;oBACzC,IAAI,WAAW,KAAK,UAAU,EAAE,CAAC;wBAC/B,MAAM,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;oBACjD,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,MAAM,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;gBACjD,CAAC;YACH,CAAC;QACH,CAAC;QAED,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,kBAAkB,EAAE,CAAC,CAAC;QAE5E,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;QACpF,IAAI,WAAW,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YACjD,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,IAAI,CAAC,gBAAgB,CAAC,aAAa,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAE9E,IAAI,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7C,MAAM,IAAI,CAAC,gBAAgB,CAAC,aAAa,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;YAC9E,MAAM,iBAAiB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QACtC,CAAC;QAED,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,CAAC;gBACH,MAAM,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,KAAK,EAAE,CAAC,OAAO,CAAC,EAAE,KAAK,EAAE,UAAU,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;YACrF,CAAC;YAAC,MAAM,CAAC;gBACP,sCAAsC;YACxC,CAAC;QACH,CAAC;QAED,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAChD,IAAI,CAAC,KAAK,EAAE;YACZ,sBAAsB,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC;YAC9C,aAAa,CAAC,IAAI,EAAE,UAAU,EAAE,YAAY,EAAE,QAAQ,EAAE,aAAa,CAAC;SACvE,CAAC,CAAC;QAEH,OAAO,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IACpD,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAC7D,IAAI,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;YACvD,IAAI,CAAC;gBACH,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;oBACzC,IAAI,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC;oBAC7B,sBAAsB,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC;iBAC/D,CAAC,CAAC;gBACH,MAAM,KAAK,GAAG,MAAM,aAAa,CAC/B,IAAI,EACJ,UAAU,EACV,YAAY,EACZ,QAAQ,EACR,aAAa,CACd,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAc,CAAC,CAAC;gBAC9B,OAAO,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;YACpD,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;oBAAS,CAAC;gBACT,IAAI,UAAU;oBAAE,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;YACrD,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;YAAS,CAAC;QACT,IAAI,UAAU;YAAE,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;IACrD,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,IAAU,EACV,UAAkB,EAClB,YAAoB,EACpB,QAAgB,EAChB,gBAAyB,KAAK;IAE9B,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;QACrC,MAAM,OAAO,GAAG,QAAQ,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;QACrD,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC;QAC9B,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE;YACpB,MAAM,IAAI,GAAG,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YACpC,IAAI,CAAC,IAAI;gBAAE,OAAO;YAClB,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC;gBACzC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACxB,CAAC;YAAC,MAAM,CAAC;gBACP,cAAc;YAChB,CAAC;QACH,CAAC,CAAC,CAAC;QACH,OAAO,OAAO,CAAC;IACjB,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IAEnC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;YAE1B,IAAI,GAAG,CAAC,QAAQ,KAAK,OAAO,IAAI,GAAG,CAAC,QAAQ,KAAK,QAAQ;gBAAE,SAAS;YAEpE,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;YAC5C,IAAI,iBAAiB,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;gBAAE,SAAS;YAEtE,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;YACpD,IAAI,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,KAAK,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC;gBAAE,SAAS;YAEvF,IAAI,CAAC,aAAa,EAAE,CAAC;gBACnB,IAAI,QAAQ,KAAK,YAAY,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,GAAG,YAAY,CAAC,EAAE,CAAC;oBACxE,IAAI,GAAG,CAAC,MAAM,KAAK,UAAU;wBAAE,SAAS;gBAC1C,CAAC;YACH,CAAC;YAED,IAAI,QAAQ,IAAI,QAAQ,KAAK,GAAG,EAAE,CAAC;gBACjC,MAAM,cAAc,GAAG,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,GAAG,GAAG,CAAC;gBAC1E,MAAM,OAAO,GAAG,GAAG,CAAC,QAAQ,CAAC;gBAC7B,IAAI,OAAO,KAAK,QAAQ,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,cAAc,CAAC;oBAAE,SAAS;YAC5E,CAAC;YAED,MAAM,UAAU,GAAG,mBAAmB,CAAC,GAAG,CAAC,CAAC;YAC5C,IAAI,UAAU;gBAAE,QAAQ,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAC3C,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;AAC9B,CAAC;AAED,SAAS,YAAY,CACnB,GAAW,EACX,UAAkB,EAClB,YAAoB,EACpB,QAAgB,EAChB,gBAAyB,KAAK;IAE9B,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACvB,IAAI,CAAC,CAAC,QAAQ,KAAK,OAAO,IAAI,CAAC,CAAC,QAAQ,KAAK,QAAQ;YAAE,OAAO,KAAK,CAAC;QACpE,MAAM,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;QAC1C,IAAI,iBAAiB,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;YAAE,OAAO,KAAK,CAAC;QAC1E,MAAM,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAClD,IAAI,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,KAAK,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC;YAAE,OAAO,KAAK,CAAC;QAE3F,IAAI,CAAC,aAAa,EAAE,CAAC;YACnB,IACE,QAAQ,KAAK,YAAY;gBACzB,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,GAAG,YAAY,CAAC;gBACtC,CAAC,CAAC,MAAM,KAAK,UAAU,EACvB,CAAC;gBACD,OAAO,KAAK,CAAC;YACf,CAAC;QACH,CAAC;QAED,IAAI,QAAQ,IAAI,QAAQ,KAAK,GAAG,EAAE,CAAC;YACjC,MAAM,cAAc,GAAG,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,GAAG,GAAG,CAAC;YAC1E,MAAM,OAAO,GAAG,CAAC,CAAC,QAAQ,CAAC;YAC3B,IAAI,OAAO,KAAK,QAAQ,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,cAAc,CAAC,EAAE,CAAC;gBAChE,OAAO,KAAK,CAAC;YACf,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,GAAW;IACtC,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACvB,OAAO,mBAAmB,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,CAAC;IACb,CAAC;AACH,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,CAAM;IACxC,IAAI,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC;IAC1B,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAClD,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACnC,CAAC;IACD,MAAM,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC;IACpB,IAAI,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,aAAa,GAAG,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAChD,IAAI,CAAC,aAAa,IAAI,aAAa,KAAK,GAAG,EAAE,CAAC;YAC5C,OAAO,GAAG,CAAC,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;QAClC,CAAC;QACD,OAAO,GAAG,CAAC,CAAC,MAAM,GAAG,QAAQ,KAAK,aAAa,EAAE,CAAC;IACpD,CAAC;IACD,OAAO,GAAG,CAAC,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;AAClC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"map.d.ts","sourceRoot":"","sources":["../../src/actions/map.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAC1D,OAAO,KAAK,EAAE,UAAU,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"map.d.ts","sourceRoot":"","sources":["../../src/actions/map.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAC1D,OAAO,KAAK,EAAE,UAAU,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AA8FnE,wBAAsB,SAAS,CAC7B,OAAO,EAAE,UAAU,EACnB,OAAO,EAAE,cAAc,GACtB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAuE9B"}
|
package/dist/actions/map.js
CHANGED
|
@@ -1,34 +1,93 @@
|
|
|
1
1
|
import { successResponse } from '../protocol.js';
|
|
2
2
|
import { discoverLinks } from './crawl.js';
|
|
3
|
-
|
|
3
|
+
function parseSitemapXml(xml) {
|
|
4
4
|
const urls = [];
|
|
5
|
+
const urlMatches = xml.matchAll(/<loc>([^<]+)<\/loc>/g);
|
|
6
|
+
for (const match of urlMatches) {
|
|
7
|
+
urls.push(match[1].trim());
|
|
8
|
+
}
|
|
9
|
+
return urls;
|
|
10
|
+
}
|
|
11
|
+
function parseSitemapIndex(xml) {
|
|
12
|
+
const sitemapUrls = [];
|
|
13
|
+
const locRegex = /<sitemap[^>]*>[\s\S]*?<loc>([^<]+)<\/loc>[\s\S]*?<\/sitemap>/gi;
|
|
14
|
+
let match;
|
|
15
|
+
while ((match = locRegex.exec(xml)) !== null) {
|
|
16
|
+
sitemapUrls.push(match[1].trim());
|
|
17
|
+
}
|
|
18
|
+
return sitemapUrls;
|
|
19
|
+
}
|
|
20
|
+
async function discoverSitemapsFromRobots(origin) {
|
|
5
21
|
try {
|
|
6
|
-
const
|
|
7
|
-
const sitemapUrl = `${base.origin}/sitemap.xml`;
|
|
8
|
-
const response = await fetch(sitemapUrl, {
|
|
22
|
+
const res = await fetch(`${origin}/robots.txt`, {
|
|
9
23
|
signal: AbortSignal.timeout(5000),
|
|
10
24
|
});
|
|
11
|
-
if (
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
25
|
+
if (!res.ok)
|
|
26
|
+
return [];
|
|
27
|
+
const text = await res.text();
|
|
28
|
+
return text
|
|
29
|
+
.split('\n')
|
|
30
|
+
.filter((l) => l.toLowerCase().startsWith('sitemap:'))
|
|
31
|
+
.map((l) => l.split(':').slice(1).join(':').trim());
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
return [];
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
async function fetchSitemapUrls(baseUrl) {
|
|
38
|
+
const allUrls = [];
|
|
39
|
+
const base = new URL(baseUrl);
|
|
40
|
+
const robotsSitemaps = await discoverSitemapsFromRobots(base.origin);
|
|
41
|
+
const sitemapUrls = [
|
|
42
|
+
...robotsSitemaps,
|
|
43
|
+
new URL('/sitemap.xml', baseUrl).href,
|
|
44
|
+
new URL('/sitemap_index.xml', baseUrl).href,
|
|
45
|
+
new URL('/sitemap/', baseUrl).href,
|
|
46
|
+
];
|
|
47
|
+
const tried = new Set();
|
|
48
|
+
for (const sitemapUrl of sitemapUrls) {
|
|
49
|
+
if (tried.has(sitemapUrl))
|
|
50
|
+
continue;
|
|
51
|
+
tried.add(sitemapUrl);
|
|
52
|
+
try {
|
|
53
|
+
const res = await fetch(sitemapUrl, { signal: AbortSignal.timeout(10000) });
|
|
54
|
+
if (!res.ok)
|
|
55
|
+
continue;
|
|
56
|
+
const xml = await res.text();
|
|
57
|
+
if (xml.includes('<sitemapindex')) {
|
|
58
|
+
const childUrls = parseSitemapIndex(xml);
|
|
59
|
+
for (const childUrl of childUrls) {
|
|
60
|
+
try {
|
|
61
|
+
const childRes = await fetch(childUrl, { signal: AbortSignal.timeout(10000) });
|
|
62
|
+
if (childRes.ok) {
|
|
63
|
+
const childXml = await childRes.text();
|
|
64
|
+
allUrls.push(...parseSitemapXml(childXml));
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
catch {
|
|
68
|
+
// child sitemap fetch failed
|
|
20
69
|
}
|
|
21
|
-
}
|
|
22
|
-
catch {
|
|
23
|
-
// invalid URL
|
|
24
70
|
}
|
|
25
71
|
}
|
|
72
|
+
else {
|
|
73
|
+
allUrls.push(...parseSitemapXml(xml));
|
|
74
|
+
}
|
|
75
|
+
if (allUrls.length > 0)
|
|
76
|
+
break;
|
|
77
|
+
}
|
|
78
|
+
catch {
|
|
79
|
+
// sitemap not available
|
|
26
80
|
}
|
|
27
81
|
}
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
82
|
+
return allUrls.filter((url) => {
|
|
83
|
+
try {
|
|
84
|
+
const parsedUrl = new URL(url);
|
|
85
|
+
return parsedUrl.hostname === base.hostname;
|
|
86
|
+
}
|
|
87
|
+
catch {
|
|
88
|
+
return false;
|
|
89
|
+
}
|
|
90
|
+
});
|
|
32
91
|
}
|
|
33
92
|
export async function handleMap(command, browser) {
|
|
34
93
|
if (!browser.isLaunched()) {
|
|
@@ -43,7 +102,7 @@ export async function handleMap(command, browser) {
|
|
|
43
102
|
const baseUrl = command.url;
|
|
44
103
|
const baseOrigin = new URL(baseUrl).origin;
|
|
45
104
|
const baseHostname = new URL(baseUrl).hostname.replace(/^www\./, '');
|
|
46
|
-
const sitemapUrls = await
|
|
105
|
+
const sitemapUrls = await fetchSitemapUrls(baseUrl);
|
|
47
106
|
const page = browser.getPage();
|
|
48
107
|
await page.goto(baseUrl, {
|
|
49
108
|
timeout: timeout * 1000,
|
package/dist/actions/map.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"map.js","sourceRoot":"","sources":["../../src/actions/map.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAE3C,
|
|
1
|
+
{"version":3,"file":"map.js","sourceRoot":"","sources":["../../src/actions/map.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAE3C,SAAS,eAAe,CAAC,GAAW;IAClC,MAAM,IAAI,GAAa,EAAE,CAAC;IAC1B,MAAM,UAAU,GAAG,GAAG,CAAC,QAAQ,CAAC,sBAAsB,CAAC,CAAC;IACxD,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;QAC/B,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC7B,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,iBAAiB,CAAC,GAAW;IACpC,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,MAAM,QAAQ,GAAG,gEAAgE,CAAC;IAClF,IAAI,KAAK,CAAC;IACV,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC7C,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACpC,CAAC;IACD,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,KAAK,UAAU,0BAA0B,CAAC,MAAc;IACtD,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,MAAM,aAAa,EAAE;YAC9C,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC;SAClC,CAAC,CAAC;QACH,IAAI,CAAC,GAAG,CAAC,EAAE;YAAE,OAAO,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,OAAO,IAAI;aACR,KAAK,CAAC,IAAI,CAAC;aACX,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC;aACrD,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACxD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,OAAe;IAC7C,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;IAE9B,MAAM,cAAc,GAAG,MAAM,0BAA0B,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAErE,MAAM,WAAW,GAAG;QAClB,GAAG,cAAc;QACjB,IAAI,GAAG,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC,IAAI;QACrC,IAAI,GAAG,CAAC,oBAAoB,EAAE,OAAO,CAAC,CAAC,IAAI;QAC3C,IAAI,GAAG,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC,IAAI;KACnC,CAAC;IAEF,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAChC,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;QACrC,IAAI,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC;YAAE,SAAS;QACpC,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QACtB,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,UAAU,EAAE,EAAE,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YAC5E,IAAI,CAAC,GAAG,CAAC,EAAE;gBAAE,SAAS;YACtB,MAAM,GAAG,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;YAE7B,IAAI,GAAG,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;gBAClC,MAAM,SAAS,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC;gBACzC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;oBACjC,IAAI,CAAC;wBACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,QAAQ,EAAE,EAAE,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;wBAC/E,IAAI,QAAQ,CAAC,EAAE,EAAE,CAAC;4BAChB,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;4BACvC,OAAO,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAC,CAAC;wBAC7C,CAAC;oBACH,CAAC;oBAAC,MAAM,CAAC;wBACP,6BAA6B;oBAC/B,CAAC;gBACH,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC;YACxC,CAAC;YAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;gBAAE,MAAM;QAChC,CAAC;QAAC,MAAM,CAAC;YACP,wBAAwB;QAC1B,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE;QAC5B,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;YAC/B,OAAO,SAAS,CAAC,QAAQ,KAAK,IAAI,CAAC,QAAQ,CAAC;QAC9C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,OAAmB,EACnB,OAAuB;IAEvB,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC;QAC1B,MAAM,OAAO,CAAC,MAAM,CAAC;YACnB,EAAE,EAAE,MAAM;YACV,MAAM,EAAE,QAAQ;YAChB,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,IAAI;SACnC,CAAC,CAAC;IACL,CAAC;IAED,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,GAAG,CAAC;IACnC,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC;IACtC,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC;IAC5B,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IAC3C,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IAErE,MAAM,WAAW,GAAG,MAAM,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAEpD,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;IAC/B,MAAM,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE;QACvB,OAAO,EAAE,OAAO,GAAG,IAAI;QACvB,SAAS,EAAE,kBAAkB;KAC9B,CAAC,CAAC;IAEH,MAAM,OAAO,CAAC,IAAI,CAAC;QACjB,IAAI;aACD,gBAAgB,CAAC,aAAa,EAAE,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,GAAG,IAAI,EAAE,IAAI,CAAC,EAAE,CAAC;aAC5E,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC;QAClB,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC;KAC1B,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,MAAM,aAAa,CAAC,IAAI,EAAE,UAAU,EAAE,YAAY,EAAE,EAAE,CAAC,CAAC;IAEzE,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAElC,KAAK,MAAM,GAAG,IAAI,CAAC,GAAG,WAAW,EAAE,GAAG,QAAQ,CAAC,EAAE,CAAC;QAChD,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;YAC9C,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC;YACnC,IAAI,MAAM,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;gBACjC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,cAAc;QAChB,CAAC;IACH,CAAC;IAED,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IAEjD,MAAM,QAAQ,GACZ,OAAO,CAAC,eAAe,IAAI,OAAO,CAAC,eAAe;QAChD,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE;YAClB,IAAI,OAAO,CAAC,eAAe,EAAE,MAAM,EAAE,CAAC;gBACpC,KAAK,MAAM,OAAO,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;oBAC9C,IAAI,SAAS,CAAC,GAAG,EAAE,OAAO,CAAC;wBAAE,OAAO,KAAK,CAAC;gBAC5C,CAAC;YACH,CAAC;YACD,IAAI,OAAO,CAAC,eAAe,EAAE,MAAM,EAAE,CAAC;gBACpC,KAAK,MAAM,OAAO,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;oBAC9C,IAAI,SAAS,CAAC,GAAG,EAAE,OAAO,CAAC;wBAAE,OAAO,IAAI,CAAC;gBAC3C,CAAC;gBACD,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,CAAC;QACJ,CAAC,CAAC,IAAI,CAAC;IAEX,OAAO,eAAe,CAAC,OAAO,CAAC,EAAE,EAAE;QACjC,GAAG,EAAE,OAAO;QACZ,IAAI,EAAE,QAAQ;QACd,KAAK,EAAE,QAAQ,CAAC,MAAM;KACvB,CAAC,CAAC;AACL,CAAC;AAED,SAAS,SAAS,CAAC,GAAW,EAAE,OAAe;IAC7C,MAAM,KAAK,GAAG,IAAI,MAAM,CACtB,GAAG;QACD,OAAO;aACJ,OAAO,CAAC,mBAAmB,EAAE,MAAM,CAAC;aACpC,OAAO,CAAC,OAAO,EAAE,gBAAgB,CAAC;aAClC,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC;aACvB,OAAO,CAAC,iBAAiB,EAAE,IAAI,CAAC;aAChC,OAAO,CAAC,KAAK,EAAE,MAAM,CAAC;QACzB,GAAG,CACN,CAAC;IACF,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC"}
|
package/dist/actions/scrape.d.ts
CHANGED
|
@@ -1,10 +1,29 @@
|
|
|
1
1
|
import type { BrowserManager } from '../browser/index.js';
|
|
2
2
|
import type { ScrapeCommand, Response } from '../types.js';
|
|
3
|
+
export interface ScrapeMetadata {
|
|
4
|
+
description?: string;
|
|
5
|
+
keywords?: string;
|
|
6
|
+
author?: string;
|
|
7
|
+
robots?: string;
|
|
8
|
+
canonical?: string;
|
|
9
|
+
favicon?: string;
|
|
10
|
+
lang?: string;
|
|
11
|
+
ogTitle?: string;
|
|
12
|
+
ogDescription?: string;
|
|
13
|
+
ogImage?: string;
|
|
14
|
+
ogUrl?: string;
|
|
15
|
+
ogSiteName?: string;
|
|
16
|
+
publishedTime?: string;
|
|
17
|
+
modifiedTime?: string;
|
|
18
|
+
articleTag?: string;
|
|
19
|
+
articleSection?: string;
|
|
20
|
+
}
|
|
3
21
|
export interface ScrapeResult {
|
|
4
22
|
url: string;
|
|
5
23
|
title: string;
|
|
6
24
|
content: string;
|
|
7
25
|
format: 'text' | 'html' | 'markdown';
|
|
26
|
+
metadata?: ScrapeMetadata;
|
|
8
27
|
}
|
|
9
28
|
export declare function handleScrape(command: ScrapeCommand, browser: BrowserManager): Promise<Response<ScrapeResult>>;
|
|
10
29
|
//# sourceMappingURL=scrape.d.ts.map
|