@crawlee/utils 4.0.0-beta.20 → 4.0.0-beta.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/internals/extract-urls.d.ts +5 -0
- package/internals/extract-urls.d.ts.map +1 -1
- package/internals/extract-urls.js +7 -3
- package/internals/extract-urls.js.map +1 -1
- package/internals/robots.d.ts +9 -2
- package/internals/robots.d.ts.map +1 -1
- package/internals/robots.js +21 -29
- package/internals/robots.js.map +1 -1
- package/internals/sitemap.d.ts +9 -6
- package/internals/sitemap.d.ts.map +1 -1
- package/internals/sitemap.js +25 -20
- package/internals/sitemap.js.map +1 -1
- package/internals/url.d.ts +1 -1
- package/internals/url.d.ts.map +1 -1
- package/package.json +4 -4
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { BaseHttpClient } from '@crawlee/types';
|
|
1
2
|
export interface DownloadListOfUrlsOptions {
|
|
2
3
|
/**
|
|
3
4
|
* URL to the file
|
|
@@ -16,6 +17,10 @@ export interface DownloadListOfUrlsOptions {
|
|
|
16
17
|
urlRegExp?: RegExp;
|
|
17
18
|
/** Allows to use a proxy for the download request. */
|
|
18
19
|
proxyUrl?: string;
|
|
20
|
+
/**
|
|
21
|
+
* Custom HTTP client to use for downloading the file.
|
|
22
|
+
*/
|
|
23
|
+
httpClient?: BaseHttpClient;
|
|
19
24
|
}
|
|
20
25
|
/**
|
|
21
26
|
* Returns a promise that resolves to an array of urls parsed from the resource available at the provided url.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extract-urls.d.ts","sourceRoot":"","sources":["../../src/internals/extract-urls.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"extract-urls.d.ts","sourceRoot":"","sources":["../../src/internals/extract-urls.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAKrD,MAAM,WAAW,yBAAyB;IACtC;;OAEG;IACH,GAAG,EAAE,MAAM,CAAC;IAEZ;;;OAGG;IACH,QAAQ,CAAC,EAAE,cAAc,CAAC;IAE1B;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,sDAAsD;IACtD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;OAEG;IACH,UAAU,CAAC,EAAE,cAAc,CAAC;CAC/B;AAED;;;GAGG;AACH,wBAAsB,kBAAkB,CAAC,OAAO,EAAE,yBAAyB,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAkC9F;AAED,MAAM,WAAW,kBAAkB;IAC/B;;OAEG;IACH,MAAM,EAAE,MAAM,CAAC;IAEf;;;OAGG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,OAAO,EAAE,kBAAkB,GAAG,MAAM,EAAE,CAiBjE;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAMhF"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { ImpitHttpClient } from '@crawlee/impit-client';
|
|
2
2
|
import ow from 'ow';
|
|
3
3
|
import { URL_NO_COMMAS_REGEX } from './general.js';
|
|
4
4
|
/**
|
|
@@ -11,15 +11,19 @@ export async function downloadListOfUrls(options) {
|
|
|
11
11
|
encoding: ow.optional.string,
|
|
12
12
|
urlRegExp: ow.optional.regExp,
|
|
13
13
|
proxyUrl: ow.optional.string,
|
|
14
|
+
httpClient: ow.optional.object,
|
|
14
15
|
}));
|
|
15
|
-
const { url, encoding = 'utf8', urlRegExp = URL_NO_COMMAS_REGEX, proxyUrl } = options;
|
|
16
|
+
const { url, encoding = 'utf8', urlRegExp = URL_NO_COMMAS_REGEX, proxyUrl, httpClient = new ImpitHttpClient(), } = options;
|
|
16
17
|
// Try to detect wrong urls and fix them. Currently, detects only sharing url instead of csv download one.
|
|
17
18
|
const match = url.match(/^(https:\/\/docs\.google\.com\/spreadsheets\/d\/(?:\w|-)+)\/?/);
|
|
18
19
|
let fixedUrl = url;
|
|
19
20
|
if (match) {
|
|
20
21
|
fixedUrl = `${match[1]}/gviz/tq?tqx=out:csv`;
|
|
21
22
|
}
|
|
22
|
-
const
|
|
23
|
+
const response = await httpClient.sendRequest(new Request(fixedUrl, { method: 'GET' }), {
|
|
24
|
+
proxyUrl,
|
|
25
|
+
});
|
|
26
|
+
const string = new TextDecoder(encoding).decode(new Uint8Array(await response.arrayBuffer()));
|
|
23
27
|
return extractUrls({ string, urlRegExp });
|
|
24
28
|
}
|
|
25
29
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extract-urls.js","sourceRoot":"","sources":["../../src/internals/extract-urls.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"extract-urls.js","sourceRoot":"","sources":["../../src/internals/extract-urls.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAExD,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC;AA8BnD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,OAAkC;IACvE,EAAE,CACE,OAAc,EACd,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC;QACjB,GAAG,EAAE,EAAE,CAAC,MAAM,CAAC,GAAG;QAClB,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;QAC5B,SAAS,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;QAC7B,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;QAC5B,UAAU,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;KACjC,CAAC,CACL,CAAC;IACF,MAAM,EACF,GAAG,EACH,QAAQ,GAAG,MAAM,EACjB,SAAS,GAAG,mBAAmB,EAC/B,QAAQ,EACR,UAAU,GAAG,IAAI,eAAe,EAAE,GACrC,GAAG,OAAO,CAAC;IAEZ,0GAA0G;IAC1G,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,+DAA+D,CAAC,CAAC;IACzF,IAAI,QAAQ,GAAG,GAAG,CAAC;IAEnB,IAAI,KAAK,EAAE,CAAC;QACR,QAAQ,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,sBAAsB,CAAC;IACjD,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,UAAU,CAAC,WAAW,CAAC,IAAI,OAAO,CAAC,QAAQ,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE;QACpF,QAAQ;KACX,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,UAAU,CAAC,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;IAE9F,OAAO,WAAW,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;AAC9C,CAAC;AAeD;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,OAA2B;IACnD,EAAE,CACE,OAAc,EACd,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC;QACjB,MAAM,EAAE,EAAE,CAAC,MAAM;QACjB,SAAS,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;KAChC,CAAC,CACL,CAAC;IACF,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACzC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,mBAAmB,CAAC;IAE3D,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACvB,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAClD,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,OAAe;IACxD,IAAI,CAAC;QACD,OAAO,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;IACvC,CAAC;IAAC,MAAM,CAAC;QACL,OAAO,SAAS,CAAC;IACrB,CAAC;AACL,CAAC"}
|
package/internals/robots.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { BaseHttpClient } from '@crawlee/types';
|
|
1
2
|
import { Sitemap } from './sitemap.js';
|
|
2
3
|
/**
|
|
3
4
|
* Loads and queries information from a [robots.txt file](https://en.wikipedia.org/wiki/Robots.txt).
|
|
@@ -26,7 +27,10 @@ export declare class RobotsTxtFile {
|
|
|
26
27
|
* @param url the URL to fetch robots.txt for
|
|
27
28
|
* @param [proxyUrl] a proxy to be used for fetching the robots.txt file
|
|
28
29
|
*/
|
|
29
|
-
static find(url: string,
|
|
30
|
+
static find(url: string, options?: {
|
|
31
|
+
proxyUrl?: string;
|
|
32
|
+
httpClient?: BaseHttpClient;
|
|
33
|
+
}): Promise<RobotsTxtFile>;
|
|
30
34
|
/**
|
|
31
35
|
* Allows providing the URL and robots.txt content explicitly instead of loading it from the target site.
|
|
32
36
|
* @param url the URL for robots.txt file
|
|
@@ -34,7 +38,10 @@ export declare class RobotsTxtFile {
|
|
|
34
38
|
* @param [proxyUrl] a proxy to be used for fetching the robots.txt file
|
|
35
39
|
*/
|
|
36
40
|
static from(url: string, content: string, proxyUrl?: string): RobotsTxtFile;
|
|
37
|
-
protected static load(url: string,
|
|
41
|
+
protected static load(url: string, options?: {
|
|
42
|
+
proxyUrl?: string;
|
|
43
|
+
httpClient?: BaseHttpClient;
|
|
44
|
+
}): Promise<RobotsTxtFile>;
|
|
38
45
|
/**
|
|
39
46
|
* Check if a URL should be crawled by robots.
|
|
40
47
|
* @param url the URL to check against the rules in robots.txt
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"robots.d.ts","sourceRoot":"","sources":["../../src/internals/robots.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"robots.d.ts","sourceRoot":"","sources":["../../src/internals/robots.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAIrD,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAEvC;;;;;;;;;;;;;;;;;GAiBG;AACH,qBAAa,aAAa;IAElB,OAAO,CAAC,MAAM;IACd,OAAO,CAAC,QAAQ,CAAC;IAFrB,OAAO;IAKP;;;;OAIG;WACU,IAAI,CACb,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE;QAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,cAAc,CAAA;KAAE,GAC7D,OAAO,CAAC,aAAa,CAAC;IAQzB;;;;;OAKG;IACH,MAAM,CAAC,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,aAAa;qBAKpD,IAAI,CACvB,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE;QAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,cAAc,CAAA;KAAE,GAC7D,OAAO,CAAC,aAAa,CAAC;IA6BzB;;;;OAIG;IACH,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,SAAS,SAAM,GAAG,OAAO;IAIhD;;OAEG;IACH,WAAW,IAAI,MAAM,EAAE;IAIvB;;OAEG;IACG,aAAa,IAAI,OAAO,CAAC,OAAO,CAAC;IAIvC;;OAEG;IACG,qBAAqB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;CAGnD;AAGD,OAAO,EAAE,aAAa,IAAI,UAAU,EAAE,CAAC"}
|
package/internals/robots.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { ImpitHttpClient } from '@crawlee/impit-client';
|
|
2
2
|
import robotsParser from 'robots-parser';
|
|
3
3
|
import { Sitemap } from './sitemap.js';
|
|
4
|
-
let HTTPError;
|
|
5
4
|
/**
|
|
6
5
|
* Loads and queries information from a [robots.txt file](https://en.wikipedia.org/wiki/Robots.txt).
|
|
7
6
|
*
|
|
@@ -32,11 +31,11 @@ export class RobotsTxtFile {
|
|
|
32
31
|
* @param url the URL to fetch robots.txt for
|
|
33
32
|
* @param [proxyUrl] a proxy to be used for fetching the robots.txt file
|
|
34
33
|
*/
|
|
35
|
-
static async find(url,
|
|
34
|
+
static async find(url, options) {
|
|
36
35
|
const robotsTxtFileUrl = new URL(url);
|
|
37
36
|
robotsTxtFileUrl.pathname = '/robots.txt';
|
|
38
37
|
robotsTxtFileUrl.search = '';
|
|
39
|
-
return RobotsTxtFile.load(robotsTxtFileUrl.toString(),
|
|
38
|
+
return RobotsTxtFile.load(robotsTxtFileUrl.toString(), options);
|
|
40
39
|
}
|
|
41
40
|
/**
|
|
42
41
|
* Allows providing the URL and robots.txt content explicitly instead of loading it from the target site.
|
|
@@ -48,33 +47,26 @@ export class RobotsTxtFile {
|
|
|
48
47
|
// @ts-ignore
|
|
49
48
|
return new RobotsTxtFile(robotsParser(url, content), proxyUrl);
|
|
50
49
|
}
|
|
51
|
-
static async load(url,
|
|
52
|
-
|
|
53
|
-
|
|
50
|
+
static async load(url, options) {
|
|
51
|
+
const { proxyUrl, httpClient = new ImpitHttpClient({ followRedirects: true }) } = options || {};
|
|
52
|
+
const response = await httpClient.sendRequest(new Request(url, { method: 'GET' }), {
|
|
53
|
+
proxyUrl,
|
|
54
|
+
});
|
|
55
|
+
if (response.status < 200 || response.status >= 300) {
|
|
56
|
+
throw new Error(`Failed to load robots.txt from ${url}: HTTP ${response.status}`);
|
|
54
57
|
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
}
|
|
65
|
-
catch (e) {
|
|
66
|
-
if (e instanceof HTTPError && e.response.statusCode === 404) {
|
|
67
|
-
return new RobotsTxtFile({
|
|
68
|
-
isAllowed() {
|
|
69
|
-
return true;
|
|
70
|
-
},
|
|
71
|
-
getSitemaps() {
|
|
72
|
-
return [];
|
|
73
|
-
},
|
|
74
|
-
}, proxyUrl);
|
|
75
|
-
}
|
|
76
|
-
throw e;
|
|
58
|
+
if (response.status === 404) {
|
|
59
|
+
return new RobotsTxtFile({
|
|
60
|
+
isAllowed() {
|
|
61
|
+
return true;
|
|
62
|
+
},
|
|
63
|
+
getSitemaps() {
|
|
64
|
+
return [];
|
|
65
|
+
},
|
|
66
|
+
}, proxyUrl);
|
|
77
67
|
}
|
|
68
|
+
// @ts-ignore
|
|
69
|
+
return new RobotsTxtFile(robotsParser(url.toString(), await response.text()), proxyUrl);
|
|
78
70
|
}
|
|
79
71
|
/**
|
|
80
72
|
* Check if a URL should be crawled by robots.
|
package/internals/robots.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"robots.js","sourceRoot":"","sources":["../../src/internals/robots.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"robots.js","sourceRoot":"","sources":["../../src/internals/robots.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAGxD,OAAO,YAAY,MAAM,eAAe,CAAC;AAEzC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAEvC;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,OAAO,aAAa;IAEV;IACA;IAFZ,YACY,MAAgD,EAChD,QAAiB;QADjB,WAAM,GAAN,MAAM,CAA0C;QAChD,aAAQ,GAAR,QAAQ,CAAS;IAC1B,CAAC;IAEJ;;;;OAIG;IACH,MAAM,CAAC,KAAK,CAAC,IAAI,CACb,GAAW,EACX,OAA4D;QAE5D,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACtC,gBAAgB,CAAC,QAAQ,GAAG,aAAa,CAAC;QAC1C,gBAAgB,CAAC,MAAM,GAAG,EAAE,CAAC;QAE7B,OAAO,aAAa,CAAC,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,EAAE,OAAO,CAAC,CAAC;IACpE,CAAC;IAED;;;;;OAKG;IACH,MAAM,CAAC,IAAI,CAAC,GAAW,EAAE,OAAe,EAAE,QAAiB;QACvD,aAAa;QACb,OAAO,IAAI,aAAa,CAAC,YAAY,CAAC,GAAG,EAAE,OAAO,CAAC,EAAE,QAAQ,CAAC,CAAC;IACnE,CAAC;IAES,MAAM,CAAC,KAAK,CAAC,IAAI,CACvB,GAAW,EACX,OAA4D;QAE5D,MAAM,EAAE,QAAQ,EAAE,UAAU,GAAG,IAAI,eAAe,CAAC,EAAE,eAAe,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,OAAO,IAAI,EAAE,CAAC;QAEhG,MAAM,QAAQ,GAAG,MAAM,UAAU,CAAC,WAAW,CAAC,IAAI,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE;YAC/E,QAAQ;SACX,CAAC,CAAC;QAEH,IAAI,QAAQ,CAAC,MAAM,GAAG,GAAG,IAAI,QAAQ,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;YAClD,MAAM,IAAI,KAAK,CAAC,kCAAkC,GAAG,UAAU,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QACtF,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;YAC1B,OAAO,IAAI,aAAa,CACpB;gBACI,SAAS;oBACL,OAAO,IAAI,CAAC;gBAChB,CAAC;gBACD,WAAW;oBACP,OAAO,EAAE,CAAC;gBACd,CAAC;aACJ,EACD,QAAQ,CACX,CAAC;QACN,CAAC;QAED,aAAa;QACb,OAAO,IAAI,aAAa,CAAC,YAAY,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,EAAE,QAAQ,CAAC,CAAC;IAC5F,CAAC;IAED;;;;OAIG;IACH,SAAS,CAAC,GAAW,EAAE,SAAS,GAAG,GAAG;QAClC,OAAO,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,GAAG,EAAE,SAAS,CAAC,IAAI,IAAI,CAAC,CAAC,+FAA+F;IACzJ,CAAC;IAED;;OAEG;IACH,WAAW;QACP,OAAO,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,aAAa;QACf,OAAO,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;IAClE,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,qBAAqB;QACvB,OAAO,CAAC,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC,IAAI,CAAC;IAC7C,CAAC;CACJ;AAED,+BAA+B;AAC/B,OAAO,EAAE,aAAa,IAAI,UAAU,EAAE,CAAC"}
|
package/internals/sitemap.d.ts
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
import type { Delays } from 'got-scraping';
|
|
1
|
+
import type { BaseHttpClient } from '@crawlee/types';
|
|
3
2
|
interface SitemapUrlData {
|
|
4
3
|
loc: string;
|
|
5
4
|
lastmod?: Date;
|
|
@@ -36,14 +35,18 @@ export interface ParseSitemapOptions {
|
|
|
36
35
|
*/
|
|
37
36
|
sitemapRetries?: number;
|
|
38
37
|
/**
|
|
39
|
-
*
|
|
38
|
+
* Timeout settings for network requests when fetching sitemaps. By default this is `30000` milliseconds (30 seconds).
|
|
40
39
|
*/
|
|
41
|
-
|
|
40
|
+
timeoutMillis?: number;
|
|
42
41
|
/**
|
|
43
42
|
* If true, the parser will log a warning if it fails to fetch a sitemap due to a network error
|
|
44
43
|
* @default true
|
|
45
44
|
*/
|
|
46
45
|
reportNetworkErrors?: boolean;
|
|
46
|
+
/**
|
|
47
|
+
* Custom HTTP client to be used for fetching sitemaps.
|
|
48
|
+
*/
|
|
49
|
+
httpClient?: BaseHttpClient;
|
|
47
50
|
}
|
|
48
51
|
export declare function parseSitemap<T extends ParseSitemapOptions>(initialSources: SitemapSource[], proxyUrl?: string, options?: T): AsyncIterable<T['emitNestedSitemaps'] extends true ? SitemapUrl | NestedSitemap : SitemapUrl>;
|
|
49
52
|
/**
|
|
@@ -67,7 +70,7 @@ export declare class Sitemap {
|
|
|
67
70
|
* @param url The domain URL to fetch the sitemap for.
|
|
68
71
|
* @param proxyUrl A proxy to be used for fetching the sitemap file.
|
|
69
72
|
*/
|
|
70
|
-
static tryCommonNames(url: string, proxyUrl?: string): Promise<Sitemap>;
|
|
73
|
+
static tryCommonNames(url: string, proxyUrl?: string, parseSitemapOptions?: ParseSitemapOptions): Promise<Sitemap>;
|
|
71
74
|
/**
|
|
72
75
|
* Fetch sitemap content from given URL or URLs and return URLs of referenced pages.
|
|
73
76
|
* @param urls sitemap URL(s)
|
|
@@ -79,7 +82,7 @@ export declare class Sitemap {
|
|
|
79
82
|
* @param content XML sitemap content
|
|
80
83
|
* @param proxyUrl URL of a proxy to be used for fetching sitemap contents
|
|
81
84
|
*/
|
|
82
|
-
static fromXmlString(content: string, proxyUrl?: string): Promise<Sitemap>;
|
|
85
|
+
static fromXmlString(content: string, proxyUrl?: string, parseSitemapOptions?: ParseSitemapOptions): Promise<Sitemap>;
|
|
83
86
|
protected static parse(sources: SitemapSource[], proxyUrl?: string, parseSitemapOptions?: ParseSitemapOptions): Promise<Sitemap>;
|
|
84
87
|
}
|
|
85
88
|
export {};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sitemap.d.ts","sourceRoot":"","sources":["../../src/internals/sitemap.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"sitemap.d.ts","sourceRoot":"","sources":["../../src/internals/sitemap.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAOrD,UAAU,cAAc;IACpB,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,CAAC,EAAE,IAAI,CAAC;IACf,UAAU,CAAC,EAAE,QAAQ,GAAG,QAAQ,GAAG,OAAO,GAAG,QAAQ,GAAG,SAAS,GAAG,QAAQ,GAAG,OAAO,CAAC;IACvF,QAAQ,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,MAAM,UAAU,GAAG,cAAc,GAAG;IACtC,gBAAgB,EAAE,MAAM,CAAC;CAC5B,CAAC;AAEF,UAAU,aAAa;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,gBAAgB,EAAE,IAAI,CAAC;CAC1B;AAED,KAAK,aAAa,GAAG,CAAC;IAAE,IAAI,EAAE,KAAK,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,GAAG;IAAE,IAAI,EAAE,KAAK,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,GAAG;IAAE,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AA8I5G,MAAM,WAAW,mBAAmB;IAChC;;OAEG;IACH,kBAAkB,CAAC,EAAE,IAAI,GAAG,KAAK,CAAC;IAClC;;OAEG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;OAEG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;OAEG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB;;;OAGG;IACH,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B;;OAEG;IACH,UAAU,CAAC,EAAE,cAAc,CAAC;CAC/B;AAED,wBAAuB,YAAY,CAAC,CAAC,SAAS,mBAAmB,EAC7D,cAAc,EAAE,aAAa,EAAE,EAC/B,QAAQ,CAAC,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,CAAC,GACZ,aAAa,CAAC,CAAC,CAAC,oBAAoB,CAAC,SAAS,IAAI,GAAG,UAAU,GAAG,aAAa,GAAG,UAAU,CAAC,CAkK/F;AAED;;;;;;;;;;;GAWG;AACH,qBAAa,OAAO;IACJ,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE;gBAAd,IAAI,EAAE,MAAM,EAAE;IAEnC;;;;;OAKG;WACU,cAAc,CACvB,GAAG,EAAE,MAAM,EACX,QAAQ,CAAC,EAAE,MAAM,EACjB,mBAAmB,CAAC,EAAE,mBAAmB,GAC1C,OAAO,CAAC,OAAO,CAAC;IAenB;;;;OAIG;WACU,IAAI,CACb,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,EACvB,QAAQ,CAAC,EAAE,MAAM,EACjB,mBAAmB,CAAC,EAAE,mBAAmB,GAC1C,OAAO,CAAC,OAAO,CAAC;IAQnB;;;;OAIG;WACU,aAAa,CACtB,OAAO,EAAE,MAAM,EACf,QAAQ,CAAC,EAAE,MAAM,EACjB,mBAAmB,CAAC,EAAE,mBAAmB,GAC1C,OAAO,CAAC,OAAO,CAAC;qBAII,KAAK,CACxB,OAAO,EAAE,aAAa,EAAE,EACxB,QAAQ,CAAC,EAAE,MAAM,EACjB,mBAAmB,CAAC,EAAE,mBAAmB,GAC1C,OAAO,CAAC,OAAO,CAAC;CAatB"}
|
package/internals/sitemap.js
CHANGED
|
@@ -2,6 +2,8 @@ import { createHash } from 'node:crypto';
|
|
|
2
2
|
import { PassThrough, pipeline, Readable, Transform } from 'node:stream';
|
|
3
3
|
import { StringDecoder } from 'node:string_decoder';
|
|
4
4
|
import { createGunzip } from 'node:zlib';
|
|
5
|
+
import { ImpitHttpClient } from '@crawlee/impit-client';
|
|
6
|
+
import { fileTypeStream } from 'file-type';
|
|
5
7
|
import sax from 'sax';
|
|
6
8
|
import MIMEType from 'whatwg-mimetype';
|
|
7
9
|
import log from '@apify/log';
|
|
@@ -121,9 +123,7 @@ class SitemapXmlParser extends Transform {
|
|
|
121
123
|
}
|
|
122
124
|
}
|
|
123
125
|
export async function* parseSitemap(initialSources, proxyUrl, options) {
|
|
124
|
-
const {
|
|
125
|
-
const { fileTypeStream } = await import('file-type');
|
|
126
|
-
const { emitNestedSitemaps = false, maxDepth = Infinity, sitemapRetries = 3, networkTimeouts, reportNetworkErrors = true, } = options ?? {};
|
|
126
|
+
const { httpClient = new ImpitHttpClient(), emitNestedSitemaps = false, maxDepth = Infinity, sitemapRetries = 3, timeoutMillis: timeout = 30000, reportNetworkErrors = true, } = options ?? {};
|
|
127
127
|
const sources = [...initialSources];
|
|
128
128
|
const visitedSitemapUrls = new Set();
|
|
129
129
|
const createParser = (contentType = '', url) => {
|
|
@@ -155,23 +155,28 @@ export async function* parseSitemap(initialSources, proxyUrl, options) {
|
|
|
155
155
|
let retriesLeft = sitemapRetries + 1;
|
|
156
156
|
while (retriesLeft-- > 0) {
|
|
157
157
|
try {
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
proxyUrl,
|
|
158
|
+
let sitemapResponse;
|
|
159
|
+
try {
|
|
160
|
+
sitemapResponse = await httpClient.stream(new Request(sitemapUrl, {
|
|
162
161
|
method: 'GET',
|
|
163
|
-
timeout: networkTimeouts,
|
|
164
162
|
headers: {
|
|
165
163
|
accept: 'text/plain, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8',
|
|
166
164
|
},
|
|
165
|
+
}), {
|
|
166
|
+
proxyUrl,
|
|
167
|
+
timeout,
|
|
167
168
|
});
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
169
|
+
}
|
|
170
|
+
catch (error) {
|
|
171
|
+
sitemapResponse = null;
|
|
172
|
+
}
|
|
171
173
|
let error = null;
|
|
172
|
-
if (
|
|
173
|
-
let contentType =
|
|
174
|
-
|
|
174
|
+
if (sitemapResponse && sitemapResponse.status >= 200 && sitemapResponse.status < 300) {
|
|
175
|
+
let contentType = sitemapResponse.headers.get('content-type');
|
|
176
|
+
if (sitemapResponse.body === null) {
|
|
177
|
+
break;
|
|
178
|
+
}
|
|
179
|
+
const streamWithType = await fileTypeStream(Readable.fromWeb(sitemapResponse.body));
|
|
175
180
|
if (streamWithType.fileType !== undefined) {
|
|
176
181
|
contentType = streamWithType.fileType.mime;
|
|
177
182
|
}
|
|
@@ -184,7 +189,7 @@ export async function* parseSitemap(initialSources, proxyUrl, options) {
|
|
|
184
189
|
sitemapUrl.pathname = sitemapUrl.pathname.substring(0, sitemapUrl.pathname.length - 3);
|
|
185
190
|
}
|
|
186
191
|
}
|
|
187
|
-
items = pipeline(streamWithType, isGzipped ? createGunzip() : new PassThrough(), createParser(contentType, sitemapUrl), (e) => {
|
|
192
|
+
items = pipeline(streamWithType, isGzipped ? createGunzip() : new PassThrough(), createParser(contentType ?? undefined, sitemapUrl), (e) => {
|
|
188
193
|
if (e !== undefined && e !== null) {
|
|
189
194
|
error = { type: 'parser', error: e };
|
|
190
195
|
}
|
|
@@ -193,7 +198,7 @@ export async function* parseSitemap(initialSources, proxyUrl, options) {
|
|
|
193
198
|
else {
|
|
194
199
|
error = {
|
|
195
200
|
type: 'fetch',
|
|
196
|
-
error: new Error(`Failed to fetch sitemap: ${sitemapUrl}, status code: ${
|
|
201
|
+
error: new Error(`Failed to fetch sitemap: ${sitemapUrl}, status code: ${sitemapResponse?.status}`),
|
|
197
202
|
};
|
|
198
203
|
}
|
|
199
204
|
if (error !== null) {
|
|
@@ -262,7 +267,7 @@ export class Sitemap {
|
|
|
262
267
|
* @param url The domain URL to fetch the sitemap for.
|
|
263
268
|
* @param proxyUrl A proxy to be used for fetching the sitemap file.
|
|
264
269
|
*/
|
|
265
|
-
static async tryCommonNames(url, proxyUrl) {
|
|
270
|
+
static async tryCommonNames(url, proxyUrl, parseSitemapOptions) {
|
|
266
271
|
const sitemapUrls = [];
|
|
267
272
|
const sitemapUrl = new URL(url);
|
|
268
273
|
sitemapUrl.search = '';
|
|
@@ -270,7 +275,7 @@ export class Sitemap {
|
|
|
270
275
|
sitemapUrls.push(sitemapUrl.toString());
|
|
271
276
|
sitemapUrl.pathname = '/sitemap.txt';
|
|
272
277
|
sitemapUrls.push(sitemapUrl.toString());
|
|
273
|
-
return Sitemap.load(sitemapUrls, proxyUrl, { reportNetworkErrors: false });
|
|
278
|
+
return Sitemap.load(sitemapUrls, proxyUrl, { reportNetworkErrors: false, ...parseSitemapOptions });
|
|
274
279
|
}
|
|
275
280
|
/**
|
|
276
281
|
* Fetch sitemap content from given URL or URLs and return URLs of referenced pages.
|
|
@@ -285,8 +290,8 @@ export class Sitemap {
|
|
|
285
290
|
* @param content XML sitemap content
|
|
286
291
|
* @param proxyUrl URL of a proxy to be used for fetching sitemap contents
|
|
287
292
|
*/
|
|
288
|
-
static async fromXmlString(content, proxyUrl) {
|
|
289
|
-
return await this.parse([{ type: 'raw', content }], proxyUrl);
|
|
293
|
+
static async fromXmlString(content, proxyUrl, parseSitemapOptions) {
|
|
294
|
+
return await this.parse([{ type: 'raw', content }], proxyUrl, parseSitemapOptions);
|
|
290
295
|
}
|
|
291
296
|
static async parse(sources, proxyUrl, parseSitemapOptions) {
|
|
292
297
|
const urls = [];
|
package/internals/sitemap.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sitemap.js","sourceRoot":"","sources":["../../src/internals/sitemap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,OAAO,EAAE,WAAW,EAAE,QAAQ,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACzE,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAGzC,OAAO,GAAG,MAAM,KAAK,CAAC;AACtB,OAAO,QAAQ,MAAM,iBAAiB,CAAC;AAEvC,OAAO,GAAG,MAAM,YAAY,CAAC;AAqB7B,MAAM,gBAAiB,SAAQ,SAAS;IAC5B,OAAO,GAAkB,IAAI,aAAa,CAAC,MAAM,CAAC,CAAC;IACnD,MAAM,GAAG,EAAE,CAAC;IAEpB;QACI,KAAK,CAAC;YACF,kBAAkB,EAAE,IAAI;YACxB,SAAS,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE;gBACtC,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,KAAK,CAAC,CAAC;gBACrD,QAAQ,EAAE,CAAC;YACf,CAAC;YACD,KAAK,EAAE,CAAC,QAAQ,EAAE,EAAE;gBAChB,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,IAAI,CAAC,CAAC;gBAC7C,QAAQ,EAAE,CAAC;YACf,CAAC;SACJ,CAAC,CAAC;IACP,CAAC;IAEO,aAAa,CAAC,KAAa,EAAE,QAAiB;QAClD,IAAI,CAAC,MAAM,IAAI,KAAK,CAAC;QAErB,IAAI,QAAQ,IAAI,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACzC,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM;iBACpB,KAAK,CAAC,IAAI,CAAC;iBACX,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;iBAC1B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YAEvC,IAAI,QAAQ,EAAE,CAAC;gBACX,KAAK,MAAM,GAAG,IAAI,KAAK,EAAE,CAAC;oBACtB,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAwB,CAAC,CAAC;gBAC/D,CAAC;gBAED,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;YACrB,CAAC;iBAAM,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1B,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnC,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAwB,CAAC,CAAC;gBAC/D,CAAC;gBAED,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,CAAE,CAAC;YAChC,CAAC;QACL,CAAC;IACL,CAAC;CACJ;AAED,MAAM,gBAAiB,SAAQ,SAAS;IAC5B,OAAO,GAAkB,IAAI,aAAa,CAAC,MAAM,CAAC,CAAC;IACnD,MAAM,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IAEjC,WAAW,CAA6B;IACxC,UAAU,GAAmD,SAAS,CAAC;IACvE,GAAG,GAAwB,EAAE,CAAC;IAEtC;QACI,KAAK,CAAC;YACF,kBAAkB,EAAE,IAAI;YACxB,SAAS,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE;gBACtC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;gBAC7C,QAAQ,EAAE,CAAC;YACf,CAAC;YACD,KAAK,EAAE,CAAC,QAAQ,EAAE,EAAE;gBAChB,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC;gBAChC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAClB,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAC5B,CAAC;gBAED,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,CAAC;gBAClB,QAAQ,EAAE,CAAC;YACf,CAAC;SACJ,CAAC,CAAC;QAEH,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClD,IAAI,CAAC,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEpD,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC5C,IAAI,CAAC,MAAM,CAAC,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE7C,IAAI,CAAC,MAAM,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAClD,CAAC;IAEO,SAAS,CAAC,IAAgC;QAC9C,IAAI,IAAI,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;YACjC,IACI,IAAI,CAAC,IAAI,KAAK,KAAK;gBACnB,IAAI,CAAC,IAAI,KAAK,SAAS;gBACvB,IAAI,CAAC,IAAI,KAAK,UAAU;gBACxB,IAAI,CAAC,IAAI,KAAK,YAAY,EAC5B,CAAC;gBACC,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC;YAChC,CAAC;QACL,CAAC;QACD,IAAI,IAAI,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACzB,IAAI,CAAC,WAAW,GAAG,QAAQ,CAAC;QAChC,CAAC;QACD,IAAI,IAAI,CAAC,IAAI,KAAK,cAAc,EAAE,CAAC;YAC/B,IAAI,CAAC,WAAW,GAAG,cAAc,CAAC;QACtC,CAAC;IACL,CAAC;IAEO,UAAU,CAAC,IAAY;QAC3B,IAAI,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,UAAU,IAAI,IAAI,KAAK,YAAY,EAAE,CAAC;YACvF,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAChC,CAAC;QAED,IAAI,IAAI,KAAK,KAAK,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;YAC/C,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAwB,CAAC,CAAC;YACjF,IAAI,CAAC,GAAG,GAAG,EAAE,CAAC;QAClB,CAAC;IACL,CAAC;IAEO,MAAM,CAAC,IAAY;QACvB,IAAI,IAAI,CAAC,UAAU,KAAK,KAAK,EAAE,CAAC;YAC5B,IAAI,IAAI,CAAC,WAAW,KAAK,cAAc,EAAE,CAAC;gBACtC,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,GAAG,EAAE,IAAI,CAAC,IAAI,EAAE,EAAwB,CAAC,CAAC;YAC9E,CAAC;YAED,IAAI,IAAI,CAAC,WAAW,KAAK,QAAQ,EAAE,CAAC;gBAChC,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC;gBAChB,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC/B,CAAC;QACL,CAAC;QAED,IAAI,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAEnB,IAAI,IAAI,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;YAChC,IAAI,CAAC,GAAG,CAAC,OAAO,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,CAAC;QACtC,CAAC;QAED,IAAI,IAAI,CAAC,UAAU,KAAK,UAAU,EAAE,CAAC;YACjC,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;QACrC,CAAC;QAED,IAAI,IAAI,CAAC,UAAU,KAAK,YAAY,EAAE,CAAC;YACnC,IAAI,CAAC,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;gBACvF,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,IAAgC,CAAC;YAC3D,CAAC;QACL,CAAC;IACL,CAAC;CACJ;AA0BD,MAAM,CAAC,KAAK,SAAS,CAAC,CAAC,YAAY,CAC/B,cAA+B,EAC/B,QAAiB,EACjB,OAAW;IAEX,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAC;IACrD,MAAM,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAAC,WAAW,CAAC,CAAC;IACrD,MAAM,EACF,kBAAkB,GAAG,KAAK,EAC1B,QAAQ,GAAG,QAAQ,EACnB,cAAc,GAAG,CAAC,EAClB,eAAe,EACf,mBAAmB,GAAG,IAAI,GAC7B,GAAG,OAAO,IAAI,EAAE,CAAC;IAElB,MAAM,OAAO,GAAG,CAAC,GAAG,cAAc,CAAC,CAAC;IACpC,MAAM,kBAAkB,GAAG,IAAI,GAAG,EAAU,CAAC;IAE7C,MAAM,YAAY,GAAG,CAAC,WAAW,GAAG,EAAE,EAAE,GAAS,EAAU,EAAE;QACzD,IAAI,QAAyB,CAAC;QAE9B,IAAI,CAAC;YACD,QAAQ,GAAG,IAAI,QAAQ,CAAC,WAAW,CAAC,CAAC;QACzC,CAAC;QAAC,MAAM,CAAC;YACL,QAAQ,GAAG,IAAI,CAAC;QACpB,CAAC;QAED,IAAI,QAAQ,EAAE,KAAK,EAAE,IAAI,GAAG,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YACtD,OAAO,IAAI,gBAAgB,EAAE,CAAC;QAClC,CAAC;QAED,IAAI,QAAQ,EAAE,OAAO,KAAK,YAAY,IAAI,GAAG,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YACvE,OAAO,IAAI,gBAAgB,EAAE,CAAC;QAClC,CAAC;QAED,MAAM,IAAI,KAAK,CAAC,mDAAmD,WAAW,WAAW,GAAG,EAAE,QAAQ,EAAE,GAAG,CAAC,CAAC;IACjH,CAAC,CAAC;IAEF,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,EAAG,CAAC;QAEhC,IAAI,CAAC,MAAM,EAAE,KAAK,IAAI,CAAC,CAAC,GAAG,QAAQ,EAAE,CAAC;YAClC,GAAG,CAAC,KAAK,CACL,oBAAoB,MAAM,CAAC,IAAI,KAAK,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,iCAAiC,QAAQ,GAAG,CAC1G,CAAC;YACF,SAAS;QACb,CAAC;QAED,IAAI,KAAK,GAAsC,IAAI,CAAC;QAEpD,IAAI,MAAM,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;YACxB,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACvC,kBAAkB,CAAC,GAAG,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC,CAAC;YAC9C,IAAI,WAAW,GAAG,cAAc,GAAG,CAAC,CAAC;YAErC,OAAO,WAAW,EAAE,GAAG,CAAC,EAAE,CAAC;gBACvB,IAAI,CAAC;oBACD,MAAM,aAAa,GAAG,MAAM,IAAI,OAAO,CACnC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;wBAChB,MAAM,OAAO,GAAG,WAAW,CAAC,MAAM,CAAC;4BAC/B,GAAG,EAAE,UAAU;4BACf,QAAQ;4BACR,MAAM,EAAE,KAAK;4BACb,OAAO,EAAE,eAAe;4BACxB,OAAO,EAAE;gCACL,MAAM,EAAE,qEAAqE;6BAChF;yBACJ,CAAC,CAAC;wBACH,OAAO,CAAC,EAAE,CAAC,UAAU,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;wBAC/C,OAAO,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;oBAChC,CAAC,CACJ,CAAC;oBAEF,IAAI,KAAK,GAAsD,IAAI,CAAC;oBAEpE,IAAI,aAAa,CAAC,QAAS,CAAC,UAAU,IAAI,GAAG,IAAI,aAAa,CAAC,QAAS,CAAC,UAAU,GAAG,GAAG,EAAE,CAAC;wBACxF,IAAI,WAAW,GAAG,aAAa,CAAC,QAAS,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;wBAElE,MAAM,cAAc,GAAG,MAAM,cAAc,CAAC,aAAa,CAAC,CAAC;wBAC3D,IAAI,cAAc,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;4BACxC,WAAW,GAAG,cAAc,CAAC,QAAQ,CAAC,IAAI,CAAC;wBAC/C,CAAC;wBAED,IAAI,SAAS,GAAG,KAAK,CAAC;wBAEtB,IACI,WAAW,KAAK,SAAS;4BACrB,CAAC,CAAC,WAAW,KAAK,kBAAkB;4BACpC,CAAC,CAAC,UAAU,CAAC,QAAQ,CAAC,QAAQ,CAAC,KAAK,CAAC,EAC3C,CAAC;4BACC,SAAS,GAAG,IAAI,CAAC;4BAEjB,IAAI,UAAU,CAAC,QAAQ,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gCACtC,UAAU,CAAC,QAAQ,GAAG,UAAU,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,UAAU,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;4BAC3F,CAAC;wBACL,CAAC;wBAED,KAAK,GAAG,QAAQ,CACZ,cAAc,EACd,SAAS,CAAC,CAAC,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,IAAI,WAAW,EAAE,EAC9C,YAAY,CAAC,WAAW,EAAE,UAAU,CAAC,EACrC,CAAC,CAAC,EAAE,EAAE;4BACF,IAAI,CAAC,KAAK,SAAS,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;gCAChC,KAAK,GAAG,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;4BACzC,CAAC;wBACL,CAAC,CACJ,CAAC;oBACN,CAAC;yBAAM,CAAC;wBACJ,KAAK,GAAG;4BACJ,IAAI,EAAE,OAAO;4BACb,KAAK,EAAE,IAAI,KAAK,CACZ,4BAA4B,UAAU,kBAAkB,aAAa,CAAC,QAAS,CAAC,UAAU,EAAE,CAC/F;yBACJ,CAAC;oBACN,CAAC;oBAED,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;wBACjB,MAAM,iBAAiB,GAAG,KAAK,CAAC,IAAI,KAAK,OAAO,IAAI,CAAC,mBAAmB,CAAC;wBACzE,IAAI,CAAC,iBAAiB,EAAE,CAAC;4BACrB,MAAM,KAAK,CAAC,KAAK,CAAC;wBACtB,CAAC;oBACL,CAAC;yBAAM,CAAC;wBACJ,MAAM;oBACV,CAAC;gBACL,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACT,GAAG,CAAC,OAAO,CACP,8BAA8B,UAAU,KAAK,WAAW,KAAK,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,aAAa,KAAK,CAAC,GAAG,CAC/G,CAAC;gBACN,CAAC;YACL,CAAC;QACL,CAAC;aAAM,IAAI,MAAM,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;YAC/B,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,EAAE,YAAY,CAAC,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,EAAE;gBAClF,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;oBACtB,GAAG,CAAC,OAAO,CAAC,8BAA8B,KAAK,EAAE,CAAC,CAAC;gBACvD,CAAC;YACL,CAAC,CAAC,CAAC;QACP,CAAC;QAED,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACjB,SAAS;QACb,CAAC;QAED,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YAC7B,IAAI,IAAI,CAAC,IAAI,KAAK,YAAY,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBAClE,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,MAAM,CAAC,KAAK,IAAI,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBAC7E,IAAI,kBAAkB,EAAE,CAAC;oBACrB,MAAM,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,gBAAgB,EAAE,IAAI,EAAS,CAAC;gBAC3D,CAAC;YACL,CAAC;YAED,IAAI,IAAI,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;gBACtB,MAAM;oBACF,GAAG,IAAI;oBACP,gBAAgB,EACZ,MAAM,CAAC,IAAI,KAAK,KAAK;wBACjB,CAAC,CAAC,MAAM,CAAC,GAAG;wBACZ,CAAC,CAAC,SAAS,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE;iBACpF,CAAC;YACN,CAAC;QACL,CAAC;IACL,CAAC;AACL,CAAC;AAED;;;;;;;;;;;GAWG;AACH,MAAM,OAAO,OAAO;IACK;IAArB,YAAqB,IAAc;QAAd,SAAI,GAAJ,IAAI,CAAU;IAAG,CAAC;IAEvC;;;;;OAKG;IACH,MAAM,CAAC,KAAK,CAAC,cAAc,CAAC,GAAW,EAAE,QAAiB;QACtD,MAAM,WAAW,GAAa,EAAE,CAAC;QAEjC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAChC,UAAU,CAAC,MAAM,GAAG,EAAE,CAAC;QAEvB,UAAU,CAAC,QAAQ,GAAG,cAAc,CAAC;QACrC,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC,CAAC;QAExC,UAAU,CAAC,QAAQ,GAAG,cAAc,CAAC;QACrC,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC,CAAC;QAExC,OAAO,OAAO,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,EAAE,EAAE,mBAAmB,EAAE,KAAK,EAAE,CAAC,CAAC;IAC/E,CAAC;IAED;;;;OAIG;IACH,MAAM,CAAC,KAAK,CAAC,IAAI,CACb,IAAuB,EACvB,QAAiB,EACjB,mBAAyC;QAEzC,OAAO,MAAM,IAAI,CAAC,KAAK,CACnB,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,EAC1E,QAAQ,EACR,mBAAmB,CACtB,CAAC;IACN,CAAC;IAED;;;;OAIG;IACH,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,OAAe,EAAE,QAAiB;QACzD,OAAO,MAAM,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,EAAE,QAAQ,CAAC,CAAC;IAClE,CAAC;IAES,MAAM,CAAC,KAAK,CAAC,KAAK,CACxB,OAAwB,EACxB,QAAiB,EACjB,mBAAyC;QAEzC,MAAM,IAAI,GAAa,EAAE,CAAC;QAE1B,IAAI,CAAC;YACD,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,YAAY,CAAC,OAAO,EAAE,QAAQ,EAAE,mBAAmB,CAAC,EAAE,CAAC;gBAC5E,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACxB,CAAC;QACL,CAAC;QAAC,MAAM,CAAC;YACL,OAAO,IAAI,OAAO,CAAC,EAAE,CAAC,CAAC;QAC3B,CAAC;QAED,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAC7B,CAAC;CACJ"}
|
|
1
|
+
{"version":3,"file":"sitemap.js","sourceRoot":"","sources":["../../src/internals/sitemap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,OAAO,EAAE,WAAW,EAAE,QAAQ,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACzE,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAEzC,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAExD,OAAO,EAAE,cAAc,EAAE,MAAM,WAAW,CAAC;AAC3C,OAAO,GAAG,MAAM,KAAK,CAAC;AACtB,OAAO,QAAQ,MAAM,iBAAiB,CAAC;AAEvC,OAAO,GAAG,MAAM,YAAY,CAAC;AAqB7B,MAAM,gBAAiB,SAAQ,SAAS;IAC5B,OAAO,GAAkB,IAAI,aAAa,CAAC,MAAM,CAAC,CAAC;IACnD,MAAM,GAAG,EAAE,CAAC;IAEpB;QACI,KAAK,CAAC;YACF,kBAAkB,EAAE,IAAI;YACxB,SAAS,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE;gBACtC,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,KAAK,CAAC,CAAC;gBACrD,QAAQ,EAAE,CAAC;YACf,CAAC;YACD,KAAK,EAAE,CAAC,QAAQ,EAAE,EAAE;gBAChB,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,IAAI,CAAC,CAAC;gBAC7C,QAAQ,EAAE,CAAC;YACf,CAAC;SACJ,CAAC,CAAC;IACP,CAAC;IAEO,aAAa,CAAC,KAAa,EAAE,QAAiB;QAClD,IAAI,CAAC,MAAM,IAAI,KAAK,CAAC;QAErB,IAAI,QAAQ,IAAI,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACzC,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM;iBACpB,KAAK,CAAC,IAAI,CAAC;iBACX,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;iBAC1B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YAEvC,IAAI,QAAQ,EAAE,CAAC;gBACX,KAAK,MAAM,GAAG,IAAI,KAAK,EAAE,CAAC;oBACtB,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAwB,CAAC,CAAC;gBAC/D,CAAC;gBAED,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;YACrB,CAAC;iBAAM,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1B,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnC,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAwB,CAAC,CAAC;gBAC/D,CAAC;gBAED,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,CAAE,CAAC;YAChC,CAAC;QACL,CAAC;IACL,CAAC;CACJ;AAED,MAAM,gBAAiB,SAAQ,SAAS;IAC5B,OAAO,GAAkB,IAAI,aAAa,CAAC,MAAM,CAAC,CAAC;IACnD,MAAM,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IAEjC,WAAW,CAA6B;IACxC,UAAU,GAAmD,SAAS,CAAC;IACvE,GAAG,GAAwB,EAAE,CAAC;IAEtC;QACI,KAAK,CAAC;YACF,kBAAkB,EAAE,IAAI;YACxB,SAAS,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE;gBACtC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;gBAC7C,QAAQ,EAAE,CAAC;YACf,CAAC;YACD,KAAK,EAAE,CAAC,QAAQ,EAAE,EAAE;gBAChB,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC;gBAChC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAClB,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAC5B,CAAC;gBAED,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,CAAC;gBAClB,QAAQ,EAAE,CAAC;YACf,CAAC;SACJ,CAAC,CAAC;QAEH,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClD,IAAI,CAAC,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEpD,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC5C,IAAI,CAAC,MAAM,CAAC,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE7C,IAAI,CAAC,MAAM,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAClD,CAAC;IAEO,SAAS,CAAC,IAAgC;QAC9C,IAAI,IAAI,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;YACjC,IACI,IAAI,CAAC,IAAI,KAAK,KAAK;gBACnB,IAAI,CAAC,IAAI,KAAK,SAAS;gBACvB,IAAI,CAAC,IAAI,KAAK,UAAU;gBACxB,IAAI,CAAC,IAAI,KAAK,YAAY,EAC5B,CAAC;gBACC,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC;YAChC,CAAC;QACL,CAAC;QACD,IAAI,IAAI,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACzB,IAAI,CAAC,WAAW,GAAG,QAAQ,CAAC;QAChC,CAAC;QACD,IAAI,IAAI,CAAC,IAAI,KAAK,cAAc,EAAE,CAAC;YAC/B,IAAI,CAAC,WAAW,GAAG,cAAc,CAAC;QACtC,CAAC;IACL,CAAC;IAEO,UAAU,CAAC,IAAY;QAC3B,IAAI,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,UAAU,IAAI,IAAI,KAAK,YAAY,EAAE,CAAC;YACvF,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAChC,CAAC;QAED,IAAI,IAAI,KAAK,KAAK,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;YAC/C,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAwB,CAAC,CAAC;YACjF,IAAI,CAAC,GAAG,GAAG,EAAE,CAAC;QAClB,CAAC;IACL,CAAC;IAEO,MAAM,CAAC,IAAY;QACvB,IAAI,IAAI,CAAC,UAAU,KAAK,KAAK,EAAE,CAAC;YAC5B,IAAI,IAAI,CAAC,WAAW,KAAK,cAAc,EAAE,CAAC;gBACtC,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,GAAG,EAAE,IAAI,CAAC,IAAI,EAAE,EAAwB,CAAC,CAAC;YAC9E,CAAC;YAED,IAAI,IAAI,CAAC,WAAW,KAAK,QAAQ,EAAE,CAAC;gBAChC,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC;gBAChB,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC/B,CAAC;QACL,CAAC;QAED,IAAI,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAEnB,IAAI,IAAI,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;YAChC,IAAI,CAAC,GAAG,CAAC,OAAO,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,CAAC;QACtC,CAAC;QAED,IAAI,IAAI,CAAC,UAAU,KAAK,UAAU,EAAE,CAAC;YACjC,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;QACrC,CAAC;QAED,IAAI,IAAI,CAAC,UAAU,KAAK,YAAY,EAAE,CAAC;YACnC,IAAI,CAAC,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;gBACvF,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,IAAgC,CAAC;YAC3D,CAAC;QACL,CAAC;IACL,CAAC;CACJ;AA8BD,MAAM,CAAC,KAAK,SAAS,CAAC,CAAC,YAAY,CAC/B,cAA+B,EAC/B,QAAiB,EACjB,OAAW;IAEX,MAAM,EACF,UAAU,GAAG,IAAI,eAAe,EAAE,EAClC,kBAAkB,GAAG,KAAK,EAC1B,QAAQ,GAAG,QAAQ,EACnB,cAAc,GAAG,CAAC,EAClB,aAAa,EAAE,OAAO,GAAG,KAAK,EAC9B,mBAAmB,GAAG,IAAI,GAC7B,GAAG,OAAO,IAAI,EAAE,CAAC;IAElB,MAAM,OAAO,GAAG,CAAC,GAAG,cAAc,CAAC,CAAC;IACpC,MAAM,kBAAkB,GAAG,IAAI,GAAG,EAAU,CAAC;IAE7C,MAAM,YAAY,GAAG,CAAC,WAAW,GAAG,EAAE,EAAE,GAAS,EAAU,EAAE;QACzD,IAAI,QAAyB,CAAC;QAE9B,IAAI,CAAC;YACD,QAAQ,GAAG,IAAI,QAAQ,CAAC,WAAW,CAAC,CAAC;QACzC,CAAC;QAAC,MAAM,CAAC;YACL,QAAQ,GAAG,IAAI,CAAC;QACpB,CAAC;QAED,IAAI,QAAQ,EAAE,KAAK,EAAE,IAAI,GAAG,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YACtD,OAAO,IAAI,gBAAgB,EAAE,CAAC;QAClC,CAAC;QAED,IAAI,QAAQ,EAAE,OAAO,KAAK,YAAY,IAAI,GAAG,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YACvE,OAAO,IAAI,gBAAgB,EAAE,CAAC;QAClC,CAAC;QAED,MAAM,IAAI,KAAK,CAAC,mDAAmD,WAAW,WAAW,GAAG,EAAE,QAAQ,EAAE,GAAG,CAAC,CAAC;IACjH,CAAC,CAAC;IAEF,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,EAAG,CAAC;QAEhC,IAAI,CAAC,MAAM,EAAE,KAAK,IAAI,CAAC,CAAC,GAAG,QAAQ,EAAE,CAAC;YAClC,GAAG,CAAC,KAAK,CACL,oBAAoB,MAAM,CAAC,IAAI,KAAK,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,iCAAiC,QAAQ,GAAG,CAC1G,CAAC;YACF,SAAS;QACb,CAAC;QAED,IAAI,KAAK,GAAsC,IAAI,CAAC;QAEpD,IAAI,MAAM,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;YACxB,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACvC,kBAAkB,CAAC,GAAG,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC,CAAC;YAC9C,IAAI,WAAW,GAAG,cAAc,GAAG,CAAC,CAAC;YAErC,OAAO,WAAW,EAAE,GAAG,CAAC,EAAE,CAAC;gBACvB,IAAI,CAAC;oBACD,IAAI,eAAgC,CAAC;oBAErC,IAAI,CAAC;wBACD,eAAe,GAAG,MAAM,UAAU,CAAC,MAAM,CACrC,IAAI,OAAO,CAAC,UAAU,EAAE;4BACpB,MAAM,EAAE,KAAK;4BACb,OAAO,EAAE;gCACL,MAAM,EAAE,qEAAqE;6BAChF;yBACJ,CAAC,EACF;4BACI,QAAQ;4BACR,OAAO;yBACV,CACJ,CAAC;oBACN,CAAC;oBAAC,OAAO,KAAU,EAAE,CAAC;wBAClB,eAAe,GAAG,IAAI,CAAC;oBAC3B,CAAC;oBAED,IAAI,KAAK,GAAsD,IAAI,CAAC;oBAEpE,IAAI,eAAe,IAAI,eAAe,CAAC,MAAM,IAAI,GAAG,IAAI,eAAe,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;wBACnF,IAAI,WAAW,GAAG,eAAe,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;wBAE9D,IAAI,eAAe,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC;4BAChC,MAAM;wBACV,CAAC;wBACD,MAAM,cAAc,GAAG,MAAM,cAAc,CAAC,QAAQ,CAAC,OAAO,CAAC,eAAe,CAAC,IAAW,CAAC,CAAC,CAAC;wBAC3F,IAAI,cAAc,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;4BACxC,WAAW,GAAG,cAAc,CAAC,QAAQ,CAAC,IAAI,CAAC;wBAC/C,CAAC;wBAED,IAAI,SAAS,GAAG,KAAK,CAAC;wBAEtB,IACI,WAAW,KAAK,SAAS;4BACrB,CAAC,CAAC,WAAW,KAAK,kBAAkB;4BACpC,CAAC,CAAC,UAAU,CAAC,QAAQ,CAAC,QAAQ,CAAC,KAAK,CAAC,EAC3C,CAAC;4BACC,SAAS,GAAG,IAAI,CAAC;4BAEjB,IAAI,UAAU,CAAC,QAAQ,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gCACtC,UAAU,CAAC,QAAQ,GAAG,UAAU,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,UAAU,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;4BAC3F,CAAC;wBACL,CAAC;wBAED,KAAK,GAAG,QAAQ,CACZ,cAAc,EACd,SAAS,CAAC,CAAC,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,IAAI,WAAW,EAAE,EAC9C,YAAY,CAAC,WAAW,IAAI,SAAS,EAAE,UAAU,CAAC,EAClD,CAAC,CAAC,EAAE,EAAE;4BACF,IAAI,CAAC,KAAK,SAAS,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;gCAChC,KAAK,GAAG,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;4BACzC,CAAC;wBACL,CAAC,CACJ,CAAC;oBACN,CAAC;yBAAM,CAAC;wBACJ,KAAK,GAAG;4BACJ,IAAI,EAAE,OAAO;4BACb,KAAK,EAAE,IAAI,KAAK,CACZ,4BAA4B,UAAU,kBAAkB,eAAe,EAAE,MAAM,EAAE,CACpF;yBACJ,CAAC;oBACN,CAAC;oBAED,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;wBACjB,MAAM,iBAAiB,GAAG,KAAK,CAAC,IAAI,KAAK,OAAO,IAAI,CAAC,mBAAmB,CAAC;wBACzE,IAAI,CAAC,iBAAiB,EAAE,CAAC;4BACrB,MAAM,KAAK,CAAC,KAAK,CAAC;wBACtB,CAAC;oBACL,CAAC;yBAAM,CAAC;wBACJ,MAAM;oBACV,CAAC;gBACL,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACT,GAAG,CAAC,OAAO,CACP,8BAA8B,UAAU,KAAK,WAAW,KAAK,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,aAAa,KAAK,CAAC,GAAG,CAC/G,CAAC;gBACN,CAAC;YACL,CAAC;QACL,CAAC;aAAM,IAAI,MAAM,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;YAC/B,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,EAAE,YAAY,CAAC,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,EAAE;gBAClF,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;oBACtB,GAAG,CAAC,OAAO,CAAC,8BAA8B,KAAK,EAAE,CAAC,CAAC;gBACvD,CAAC;YACL,CAAC,CAAC,CAAC;QACP,CAAC;QAED,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACjB,SAAS;QACb,CAAC;QAED,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YAC7B,IAAI,IAAI,CAAC,IAAI,KAAK,YAAY,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBAClE,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,MAAM,CAAC,KAAK,IAAI,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBAC7E,IAAI,kBAAkB,EAAE,CAAC;oBACrB,MAAM,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,gBAAgB,EAAE,IAAI,EAAS,CAAC;gBAC3D,CAAC;YACL,CAAC;YAED,IAAI,IAAI,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;gBACtB,MAAM;oBACF,GAAG,IAAI;oBACP,gBAAgB,EACZ,MAAM,CAAC,IAAI,KAAK,KAAK;wBACjB,CAAC,CAAC,MAAM,CAAC,GAAG;wBACZ,CAAC,CAAC,SAAS,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE;iBACpF,CAAC;YACN,CAAC;QACL,CAAC;IACL,CAAC;AACL,CAAC;AAED;;;;;;;;;;;GAWG;AACH,MAAM,OAAO,OAAO;IACK;IAArB,YAAqB,IAAc;QAAd,SAAI,GAAJ,IAAI,CAAU;IAAG,CAAC;IAEvC;;;;;OAKG;IACH,MAAM,CAAC,KAAK,CAAC,cAAc,CACvB,GAAW,EACX,QAAiB,EACjB,mBAAyC;QAEzC,MAAM,WAAW,GAAa,EAAE,CAAC;QAEjC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAChC,UAAU,CAAC,MAAM,GAAG,EAAE,CAAC;QAEvB,UAAU,CAAC,QAAQ,GAAG,cAAc,CAAC;QACrC,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC,CAAC;QAExC,UAAU,CAAC,QAAQ,GAAG,cAAc,CAAC;QACrC,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC,CAAC;QAExC,OAAO,OAAO,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,EAAE,EAAE,mBAAmB,EAAE,KAAK,EAAE,GAAG,mBAAmB,EAAE,CAAC,CAAC;IACvG,CAAC;IAED;;;;OAIG;IACH,MAAM,CAAC,KAAK,CAAC,IAAI,CACb,IAAuB,EACvB,QAAiB,EACjB,mBAAyC;QAEzC,OAAO,MAAM,IAAI,CAAC,KAAK,CACnB,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,EAC1E,QAAQ,EACR,mBAAmB,CACtB,CAAC;IACN,CAAC;IAED;;;;OAIG;IACH,MAAM,CAAC,KAAK,CAAC,aAAa,CACtB,OAAe,EACf,QAAiB,EACjB,mBAAyC;QAEzC,OAAO,MAAM,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,EAAE,QAAQ,EAAE,mBAAmB,CAAC,CAAC;IACvF,CAAC;IAES,MAAM,CAAC,KAAK,CAAC,KAAK,CACxB,OAAwB,EACxB,QAAiB,EACjB,mBAAyC;QAEzC,MAAM,IAAI,GAAa,EAAE,CAAC;QAE1B,IAAI,CAAC;YACD,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,YAAY,CAAC,OAAO,EAAE,QAAQ,EAAE,mBAAmB,CAAC,EAAE,CAAC;gBAC5E,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACxB,CAAC;QACL,CAAC;QAAC,MAAM,CAAC;YACL,OAAO,IAAI,OAAO,CAAC,EAAE,CAAC,CAAC;QAC3B,CAAC;QAED,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAC7B,CAAC;CACJ"}
|
package/internals/url.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
import type { SearchParams } from '@crawlee/types';
|
|
2
2
|
/**
|
|
3
3
|
* Appends search (query string) parameters to a URL, replacing the original value (if any).
|
|
4
4
|
*
|
package/internals/url.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"url.d.ts","sourceRoot":"","sources":["../../src/internals/url.ts"],"names":[],"mappings":"AAAA,
|
|
1
|
+
{"version":3,"file":"url.d.ts","sourceRoot":"","sources":["../../src/internals/url.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAEnD;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,GAAG,EAAE,YAAY,EAAE,YAAY,GAAG,SAAS,GAAG,IAAI,CA4BxF"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/utils",
|
|
3
|
-
"version": "4.0.0-beta.
|
|
3
|
+
"version": "4.0.0-beta.22",
|
|
4
4
|
"description": "A set of shared utilities that can be used by crawlers",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=22.0.0"
|
|
@@ -43,12 +43,12 @@
|
|
|
43
43
|
"dependencies": {
|
|
44
44
|
"@apify/log": "^2.5.18",
|
|
45
45
|
"@apify/ps-tree": "^1.2.0",
|
|
46
|
-
"@crawlee/
|
|
46
|
+
"@crawlee/impit-client": "4.0.0-beta.22",
|
|
47
|
+
"@crawlee/types": "4.0.0-beta.22",
|
|
47
48
|
"@types/sax": "^1.2.7",
|
|
48
49
|
"cheerio": "^1.0.0",
|
|
49
50
|
"domhandler": "^5.0.3",
|
|
50
51
|
"file-type": "^21.0.0",
|
|
51
|
-
"got-scraping": "^4.1.1",
|
|
52
52
|
"ow": "^2.0.0",
|
|
53
53
|
"robots-parser": "^3.0.1",
|
|
54
54
|
"sax": "^1.4.1",
|
|
@@ -62,5 +62,5 @@
|
|
|
62
62
|
}
|
|
63
63
|
}
|
|
64
64
|
},
|
|
65
|
-
"gitHead": "
|
|
65
|
+
"gitHead": "74e5a8dae2b5c919ca40cc3f9135744448aca0ac"
|
|
66
66
|
}
|