@crawlee/core 4.0.0-beta.13 → 4.0.0-beta.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -1
- package/crawlers/crawler_commons.d.ts +0 -3
- package/crawlers/crawler_commons.d.ts.map +1 -1
- package/crawlers/crawler_commons.js +0 -8
- package/crawlers/crawler_commons.js.map +1 -1
- package/enqueue_links/enqueue_links.d.ts +22 -15
- package/enqueue_links/enqueue_links.d.ts.map +1 -1
- package/enqueue_links/enqueue_links.js +33 -14
- package/enqueue_links/enqueue_links.js.map +1 -1
- package/enqueue_links/shared.d.ts +8 -2
- package/enqueue_links/shared.d.ts.map +1 -1
- package/enqueue_links/shared.js +40 -18
- package/enqueue_links/shared.js.map +1 -1
- package/http_clients/got-scraping-http-client.d.ts.map +1 -1
- package/http_clients/got-scraping-http-client.js +4 -1
- package/http_clients/got-scraping-http-client.js.map +1 -1
- package/index.d.ts +1 -0
- package/index.d.ts.map +1 -1
- package/index.js +1 -0
- package/index.js.map +1 -1
- package/log.d.ts +4 -2
- package/log.d.ts.map +1 -1
- package/log.js.map +1 -1
- package/package.json +6 -6
- package/proxy_configuration.d.ts +7 -9
- package/proxy_configuration.d.ts.map +1 -1
- package/proxy_configuration.js +2 -5
- package/proxy_configuration.js.map +1 -1
- package/recoverable_state.d.ts +123 -0
- package/recoverable_state.d.ts.map +1 -0
- package/recoverable_state.js +140 -0
- package/recoverable_state.js.map +1 -0
- package/request.d.ts +23 -0
- package/request.d.ts.map +1 -1
- package/request.js +19 -1
- package/request.js.map +1 -1
- package/storages/dataset.d.ts +7 -0
- package/storages/dataset.d.ts.map +1 -1
- package/storages/dataset.js +10 -1
- package/storages/dataset.js.map +1 -1
- package/storages/index.d.ts +2 -0
- package/storages/index.d.ts.map +1 -1
- package/storages/index.js +2 -0
- package/storages/index.js.map +1 -1
- package/storages/key_value_store.d.ts +8 -0
- package/storages/key_value_store.d.ts.map +1 -1
- package/storages/key_value_store.js +5 -3
- package/storages/key_value_store.js.map +1 -1
- package/storages/request_list.d.ts +0 -6
- package/storages/request_list.d.ts.map +1 -1
- package/storages/request_list.js.map +1 -1
- package/storages/request_list_adapter.d.ts +58 -0
- package/storages/request_list_adapter.d.ts.map +1 -0
- package/storages/request_list_adapter.js +81 -0
- package/storages/request_list_adapter.js.map +1 -0
- package/storages/request_manager_tandem.d.ts +68 -0
- package/storages/request_manager_tandem.d.ts.map +1 -0
- package/storages/request_manager_tandem.js +124 -0
- package/storages/request_manager_tandem.js.map +1 -0
- package/storages/request_provider.d.ts +67 -4
- package/storages/request_provider.d.ts.map +1 -1
- package/storages/request_provider.js +74 -44
- package/storages/request_provider.js.map +1 -1
- package/storages/request_queue_v2.d.ts +2 -2
- package/storages/request_queue_v2.d.ts.map +1 -1
- package/storages/request_queue_v2.js.map +1 -1
- package/storages/sitemap_request_list.d.ts.map +1 -1
- package/storages/sitemap_request_list.js.map +1 -1
- package/storages/storage_manager.d.ts.map +1 -1
- package/storages/storage_manager.js +1 -1
- package/storages/storage_manager.js.map +1 -1
- package/tsconfig.build.tsbuildinfo +0 -1
package/README.md
CHANGED
|
@@ -9,6 +9,10 @@
|
|
|
9
9
|
<small>A web scraping and browser automation library</small>
|
|
10
10
|
</h1>
|
|
11
11
|
|
|
12
|
+
<p align=center>
|
|
13
|
+
<a href="https://trendshift.io/repositories/5179" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5179" alt="apify%2Fcrawlee | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
|
14
|
+
</p>
|
|
15
|
+
|
|
12
16
|
<p align=center>
|
|
13
17
|
<a href="https://www.npmjs.com/package/@crawlee/core" rel="nofollow"><img src="https://img.shields.io/npm/v/@crawlee/core.svg" alt="NPM latest version" data-canonical-src="https://img.shields.io/npm/v/@crawlee/core/next.svg" style="max-width: 100%;"></a>
|
|
14
18
|
<a href="https://www.npmjs.com/package/@crawlee/core" rel="nofollow"><img src="https://img.shields.io/npm/dm/@crawlee/core.svg" alt="Downloads" data-canonical-src="https://img.shields.io/npm/dm/@crawlee/core.svg" style="max-width: 100%;"></a>
|
|
@@ -24,7 +28,7 @@ Crawlee is available as the [`crawlee`](https://www.npmjs.com/package/crawlee) N
|
|
|
24
28
|
|
|
25
29
|
> 👉 **View full documentation, guides and examples on the [Crawlee project website](https://crawlee.dev)** 👈
|
|
26
30
|
|
|
27
|
-
>
|
|
31
|
+
> Do you prefer 🐍 Python instead of JavaScript? [👉 Checkout Crawlee for Python 👈](https://github.com/apify/crawlee-python).
|
|
28
32
|
|
|
29
33
|
## Installation
|
|
30
34
|
|
|
@@ -148,7 +148,6 @@ export declare class RequestHandlerResult {
|
|
|
148
148
|
private _keyValueStoreChanges;
|
|
149
149
|
private pushDataCalls;
|
|
150
150
|
private addRequestsCalls;
|
|
151
|
-
private enqueueLinksCalls;
|
|
152
151
|
constructor(config: Configuration, crawleeStateKey: string);
|
|
153
152
|
/**
|
|
154
153
|
* A record of calls to {@link RestrictedCrawlingContext.pushData}, {@link RestrictedCrawlingContext.addRequests}, {@link RestrictedCrawlingContext.enqueueLinks} made by a request handler.
|
|
@@ -156,7 +155,6 @@ export declare class RequestHandlerResult {
|
|
|
156
155
|
get calls(): ReadonlyDeep<{
|
|
157
156
|
pushData: Parameters<RestrictedCrawlingContext['pushData']>[];
|
|
158
157
|
addRequests: Parameters<RestrictedCrawlingContext['addRequests']>[];
|
|
159
|
-
enqueueLinks: Parameters<RestrictedCrawlingContext['enqueueLinks']>[];
|
|
160
158
|
}>;
|
|
161
159
|
/**
|
|
162
160
|
* A record of changes made to key-value stores by a request handler.
|
|
@@ -187,7 +185,6 @@ export declare class RequestHandlerResult {
|
|
|
187
185
|
label?: string;
|
|
188
186
|
}[]>;
|
|
189
187
|
pushData: RestrictedCrawlingContext['pushData'];
|
|
190
|
-
enqueueLinks: RestrictedCrawlingContext['enqueueLinks'];
|
|
191
188
|
addRequests: RestrictedCrawlingContext['addRequests'];
|
|
192
189
|
useState: RestrictedCrawlingContext['useState'];
|
|
193
190
|
getKeyValueStore: RestrictedCrawlingContext['getKeyValueStore'];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"crawler_commons.d.ts","sourceRoot":"","sources":["../../src/crawlers/crawler_commons.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAChD,OAAO,KAAK,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAE3D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,mCAAmC,CAAC;AAC7E,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAC1D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,EAAE,aAAa,EAAE,KAAK,aAAa,EAAE,MAAM,gCAAgC,CAAC;AACnF,OAAO,KAAK,EAAE,4BAA4B,EAAE,MAAM,iCAAiC,CAAC;AAEpF,gBAAgB;AAChB,MAAM,MAAM,KAAK,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,IAAI,GAAG,KAAK,CAAC;AAEtD,gBAAgB;AAChB,MAAM,MAAM,YAAY,CAAC,CAAC,EAAE,CAAC,SAAS,MAAM,CAAC,IAAI,CAAC,GAAG;KAAG,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;CAAE,CAAC;AAE1E,MAAM,MAAM,aAAa,CAAC,CAAC,SAAS,OAAO,IAAI,YAAY,CAAC,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC,CAAC;AAEnF,gBAAgB;AAChB,MAAM,MAAM,aAAa,CAAC,OAAO,SAAS,yBAAyB,IAAI,KAAK,CAAC,OAAO,CAAC,SAAS,IAAI,GAC5F,OAAO,GACP;IACI,OAAO,EAAE,aAAa,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC;CAC9C,GAAG,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;AAEnC,MAAM,WAAW,yBAAyB,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU;IAC/E,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,CAAC,EAAE,OAAO,CAAC;IAElB;;;OAGG;IACH,SAAS,CAAC,EAAE,SAAS,CAAC;IAEtB;;OAEG;IACH,OAAO,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IAE3B;;;;;;OAMG;IACH,QAAQ,CAAC,IAAI,EAAE,YAAY,CAAC,UAAU,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,eAAe,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAE1G;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACH,YAAY,EAAE,CACV,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,mBAAmB,EAAE,MAAM,CAAC,EAAE,cAAc,GAAG,eAAe,CAAC,CAAC,KACtG,OAAO,CAAC,OAAO,CAAC,CAAC;IAEtB;;;;;OAKG;IACH,WAAW,EAAE,CACT,YAAY,EAAE,YAAY,CAAC,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,CAAC,EAC/C,OAAO,CAAC,EAAE,YAAY,CAAC,4BAA4B,CAAC,KACnD,OAAO,CAAC,IAAI,CAAC,CAAC;IAEnB;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,SAAS,UAAU,GAAG,UAAU,EAAE,YAAY,CAAC,EAAE,KAAK,KAAK,OAAO,CAAC,KAAK,CAAC,CAAC;IAE1F;;OAEG;IACH,gBAAgB,EAAE,CACd,QAAQ,CAAC,EAAE,MAAM,KAChB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,IAAI,GAAG,MAAM,GAAG,UAAU,GAAG,mBAAmB,GAAG,UAAU,GAAG,cAAc,CAAC,CAAC,CAAC;IAElH;;OAEG;IACH,GAAG,EAAE,GAAG,CAAC;CACZ;AAED,MAAM,WAAW,eAAe,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU,CAAE,SAAQ,yBAAyB,CAAC,QAAQ,CAAC;IAClH;;;;;;;;;;;;;;;;;;;;;;;;OAwBG;IACH,YAAY,CACR,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,mBAAmB,EAAE,MAAM,CAAC,EAAE,cAAc,GAAG,eAAe,CAAC,CAAC,GACnG,IAAI,CAAC,mBAAmB,EAAE,cAAc,GAAG,eAAe,CAAC,GAChE,OAAO,CAAC,OAAO,CAAC,CAAC;IAEpB;;;;;;;;;;;;;;;;OAgBG;IACH,WAAW,CAAC,eAAe,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAEvE;;OAEG;IACH,uBAAuB,CAAC,OAAO,EAAE,MAAM,OAAO,CAAC,OAAO,CAAC,GAAG,IAAI,CAAC;CAClE;AAED;;;;GAIG;AACH,qBAAa,oBAAoB;
|
|
1
|
+
{"version":3,"file":"crawler_commons.d.ts","sourceRoot":"","sources":["../../src/crawlers/crawler_commons.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAChD,OAAO,KAAK,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAE3D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,mCAAmC,CAAC;AAC7E,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAC1D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,EAAE,aAAa,EAAE,KAAK,aAAa,EAAE,MAAM,gCAAgC,CAAC;AACnF,OAAO,KAAK,EAAE,4BAA4B,EAAE,MAAM,iCAAiC,CAAC;AAEpF,gBAAgB;AAChB,MAAM,MAAM,KAAK,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,IAAI,GAAG,KAAK,CAAC;AAEtD,gBAAgB;AAChB,MAAM,MAAM,YAAY,CAAC,CAAC,EAAE,CAAC,SAAS,MAAM,CAAC,IAAI,CAAC,GAAG;KAAG,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;CAAE,CAAC;AAE1E,MAAM,MAAM,aAAa,CAAC,CAAC,SAAS,OAAO,IAAI,YAAY,CAAC,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC,CAAC;AAEnF,gBAAgB;AAChB,MAAM,MAAM,aAAa,CAAC,OAAO,SAAS,yBAAyB,IAAI,KAAK,CAAC,OAAO,CAAC,SAAS,IAAI,GAC5F,OAAO,GACP;IACI,OAAO,EAAE,aAAa,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC;CAC9C,GAAG,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;AAEnC,MAAM,WAAW,yBAAyB,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU;IAC/E,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,CAAC,EAAE,OAAO,CAAC;IAElB;;;OAGG;IACH,SAAS,CAAC,EAAE,SAAS,CAAC;IAEtB;;OAEG;IACH,OAAO,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IAE3B;;;;;;OAMG;IACH,QAAQ,CAAC,IAAI,EAAE,YAAY,CAAC,UAAU,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,eAAe,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAE1G;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACH,YAAY,EAAE,CACV,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,mBAAmB,EAAE,MAAM,CAAC,EAAE,cAAc,GAAG,eAAe,CAAC,CAAC,KACtG,OAAO,CAAC,OAAO,CAAC,CAAC;IAEtB;;;;;OAKG;IACH,WAAW,EAAE,CACT,YAAY,EAAE,YAAY,CAAC,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,CAAC,EAC/C,OAAO,CAAC,EAAE,YAAY,CAAC,4BAA4B,CAAC,KACnD,OAAO,CAAC,IAAI,CAAC,CAAC;IAEnB;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,SAAS,UAAU,GAAG,UAAU,EAAE,YAAY,CAAC,EAAE,KAAK,KAAK,OAAO,CAAC,KAAK,CAAC,CAAC;IAE1F;;OAEG;IACH,gBAAgB,EAAE,CACd,QAAQ,CAAC,EAAE,MAAM,KAChB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,IAAI,GAAG,MAAM,GAAG,UAAU,GAAG,mBAAmB,GAAG,UAAU,GAAG,cAAc,CAAC,CAAC,CAAC;IAElH;;OAEG;IACH,GAAG,EAAE,GAAG,CAAC;CACZ;AAED,MAAM,WAAW,eAAe,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU,CAAE,SAAQ,yBAAyB,CAAC,QAAQ,CAAC;IAClH;;;;;;;;;;;;;;;;;;;;;;;;OAwBG;IACH,YAAY,CACR,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,mBAAmB,EAAE,MAAM,CAAC,EAAE,cAAc,GAAG,eAAe,CAAC,CAAC,GACnG,IAAI,CAAC,mBAAmB,EAAE,cAAc,GAAG,eAAe,CAAC,GAChE,OAAO,CAAC,OAAO,CAAC,CAAC;IAEpB;;;;;;;;;;;;;;;;OAgBG;IACH,WAAW,CAAC,eAAe,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAEvE;;OAEG;IACH,uBAAuB,CAAC,OAAO,EAAE,MAAM,OAAO,CAAC,OAAO,CAAC,GAAG,IAAI,CAAC;CAClE;AAED;;;;GAIG;AACH,qBAAa,oBAAoB;IASzB,OAAO,CAAC,MAAM;IACd,OAAO,CAAC,eAAe;IAT3B,OAAO,CAAC,qBAAqB,CACtB;IAEP,OAAO,CAAC,aAAa,CAA2D;IAEhF,OAAO,CAAC,gBAAgB,CAA8D;gBAG1E,MAAM,EAAE,aAAa,EACrB,eAAe,EAAE,MAAM;IAGnC;;OAEG;IACH,IAAI,KAAK,IAAI,YAAY,CAAC;QACtB,QAAQ,EAAE,UAAU,CAAC,yBAAyB,CAAC,UAAU,CAAC,CAAC,EAAE,CAAC;QAC9D,WAAW,EAAE,UAAU,CAAC,yBAAyB,CAAC,aAAa,CAAC,CAAC,EAAE,CAAC;KACvE,CAAC,CAKD;IAED;;OAEG;IACH,IAAI,oBAAoB,IAAI,YAAY,CACpC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE;QAAE,YAAY,EAAE,OAAO,CAAC;QAAC,OAAO,CAAC,EAAE,aAAa,CAAA;KAAE,CAAC,CAAC,CACrF,CAEA;IAED;;OAEG;IACH,IAAI,YAAY,IAAI,YAAY,CAAC;QAAE,IAAI,EAAE,UAAU,CAAC;QAAC,eAAe,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC,CAIjF;IAED;;OAEG;IACH,IAAI,YAAY,IAAI,YAAY,CAAC;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC,CAkBlE;IAED;;OAEG;IACH,IAAI,gBAAgB,IAAI,YAAY,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC,CAgB1E;IAED,QAAQ,EAAE,yBAAyB,CAAC,UAAU,CAAC,CAE7C;IAEF,WAAW,EAAE,yBAAyB,CAAC,aAAa,CAAC,CAEnD;IAEF,QAAQ,EAAE,yBAAyB,CAAC,UAAU,CAAC,CAG7C;IAEF,gBAAgB,EAAE,yBAAyB,CAAC,kBAAkB,CAAC,CAa7D;IAEF,OAAO,CAAC,WAAW,CAAwF;IAE3G,OAAO,CAAC,4BAA4B,CAIlC;IAEF,OAAO,CAAC,4BAA4B,CASlC;CACL"}
|
|
@@ -10,7 +10,6 @@ export class RequestHandlerResult {
|
|
|
10
10
|
_keyValueStoreChanges = {};
|
|
11
11
|
pushDataCalls = [];
|
|
12
12
|
addRequestsCalls = [];
|
|
13
|
-
enqueueLinksCalls = [];
|
|
14
13
|
constructor(config, crawleeStateKey) {
|
|
15
14
|
this.config = config;
|
|
16
15
|
this.crawleeStateKey = crawleeStateKey;
|
|
@@ -22,7 +21,6 @@ export class RequestHandlerResult {
|
|
|
22
21
|
return {
|
|
23
22
|
pushData: this.pushDataCalls,
|
|
24
23
|
addRequests: this.addRequestsCalls,
|
|
25
|
-
enqueueLinks: this.enqueueLinksCalls,
|
|
26
24
|
};
|
|
27
25
|
}
|
|
28
26
|
/**
|
|
@@ -42,9 +40,6 @@ export class RequestHandlerResult {
|
|
|
42
40
|
*/
|
|
43
41
|
get enqueuedUrls() {
|
|
44
42
|
const result = [];
|
|
45
|
-
for (const [options] of this.enqueueLinksCalls) {
|
|
46
|
-
result.push(...(options?.urls?.map((url) => ({ url, label: options?.label })) ?? []));
|
|
47
|
-
}
|
|
48
43
|
for (const [requests] of this.addRequestsCalls) {
|
|
49
44
|
for (const request of requests) {
|
|
50
45
|
if (typeof request === 'object' &&
|
|
@@ -78,9 +73,6 @@ export class RequestHandlerResult {
|
|
|
78
73
|
pushData = async (data, datasetIdOrName) => {
|
|
79
74
|
this.pushDataCalls.push([data, datasetIdOrName]);
|
|
80
75
|
};
|
|
81
|
-
enqueueLinks = async (options) => {
|
|
82
|
-
this.enqueueLinksCalls.push([options]);
|
|
83
|
-
};
|
|
84
76
|
addRequests = async (requests, options = {}) => {
|
|
85
77
|
this.addRequestsCalls.push([requests, options]);
|
|
86
78
|
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"crawler_commons.js","sourceRoot":"","sources":["../../src/crawlers/crawler_commons.ts"],"names":[],"mappings":"AAWA,OAAO,EAAE,aAAa,EAAsB,MAAM,gCAAgC,CAAC;AA2JnF;;;;GAIG;AACH,MAAM,OAAO,oBAAoB;
|
|
1
|
+
{"version":3,"file":"crawler_commons.js","sourceRoot":"","sources":["../../src/crawlers/crawler_commons.ts"],"names":[],"mappings":"AAWA,OAAO,EAAE,aAAa,EAAsB,MAAM,gCAAgC,CAAC;AA2JnF;;;;GAIG;AACH,MAAM,OAAO,oBAAoB;IASjB;IACA;IATJ,qBAAqB,GACzB,EAAE,CAAC;IAEC,aAAa,GAAwD,EAAE,CAAC;IAExE,gBAAgB,GAA2D,EAAE,CAAC;IAEtF,YACY,MAAqB,EACrB,eAAuB;QADvB,WAAM,GAAN,MAAM,CAAe;QACrB,oBAAe,GAAf,eAAe,CAAQ;IAChC,CAAC;IAEJ;;OAEG;IACH,IAAI,KAAK;QAIL,OAAO;YACH,QAAQ,EAAE,IAAI,CAAC,aAAa;YAC5B,WAAW,EAAE,IAAI,CAAC,gBAAgB;SACrC,CAAC;IACN,CAAC;IAED;;OAEG;IACH,IAAI,oBAAoB;QAGpB,OAAO,IAAI,CAAC,qBAAqB,CAAC;IACtC,CAAC;IAED;;OAEG;IACH,IAAI,YAAY;QACZ,OAAO,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,eAAe,CAAC,EAAE,EAAE,CAC1D,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,eAAe,EAAE,CAAC,CAAC,CACnF,CAAC;IACN,CAAC;IAED;;OAEG;IACH,IAAI,YAAY;QACZ,MAAM,MAAM,GAAsC,EAAE,CAAC;QAErD,KAAK,MAAM,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC7C,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;gBAC7B,IACI,OAAO,OAAO,KAAK,QAAQ;oBAC3B,CAAC,CAAC,CAAC,iBAAiB,IAAI,OAAO,CAAC,IAAI,OAAO,CAAC,eAAe,KAAK,SAAS,CAAC;oBAC1E,OAAO,CAAC,GAAG,KAAK,SAAS,EAC3B,CAAC;oBACC,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;gBAC5D,CAAC;qBAAM,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;oBACrC,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;gBAClC,CAAC;YACL,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,IAAI,gBAAgB;QAChB,MAAM,MAAM,GAA0C,EAAE,CAAC;QAEzD,KAAK,MAAM,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC7C,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;gBAC7B,IACI,OAAO,OAAO,KAAK,QAAQ;oBAC3B,iBAAiB,IAAI,OAAO;oBAC5B,OAAO,CAAC,eAAe,KAAK,SAAS,EACvC,CAAC;oBACC,MAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,eAAe,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;gBAC5E,CAAC;YACL,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAED,QAAQ,GAA0C,KAAK,EAAE,IAAI,EAAE,eAAe,EAAE,EAAE;QAC9E,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,eAAe,CAAC,CAAC,CAAC;IACrD,CAAC,CAAC;IAEF,WAAW,GAA6C,KAAK,EAAE,QAAQ,EAAE,OAAO,GAAG,EAAE,EAAE,EAAE;QACrF,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;IACpD,CAAC,CAAC;IAEF,QAAQ,GAA0C,KAAK,EAAE,YAAY,EAAE,EAAE;QACrE,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;QACrD,OAAO,MAAM,KAAK,CAAC,iBAAiB,CAAC,IAAI,CAAC,eAAe,EAAE,YAAY,CAAC,CAAC;IAC7E,CAAC,CAAC;IAEF,gBAAgB,GAAkD,KAAK,EAAE,QAAQ,EAAE,EAAE;QACjF,MAAM,KAAK,GAAG,MAAM,aAAa,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QAE1E,OAAO;YACH,EAAE,EAAE,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC;YAC9B,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,KAAK,EAAE,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,4BAA4B,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;YACxG,QAAQ,EAAE,KAAK,EAAE,GAAG,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;gBACpC,IAAI,CAAC,4BAA4B,CAAC,QAAQ,EAAE,GAAG,EAAE,KAAK,EAAE,OAAO,CAAC,CAAC;YACrE,CAAC;YACD,iBAAiB,EAAE,KAAK,CAAC,iBAAiB,CAAC,IAAI,CAAC,KAAK,CAAC;YACtD,YAAY,EAAE,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC;SAC/C,CAAC;IACN,CAAC,CAAC;IAEM,WAAW,GAAG,CAAC,QAAiB,EAAU,EAAE,CAAC,QAAQ,IAAI,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;IAEnG,4BAA4B,GAAG,CAAC,QAA4B,EAAE,GAAW,EAAE,EAAE;QACjF,MAAM,EAAE,GAAG,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;QACtC,IAAI,CAAC,qBAAqB,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC;QACtC,OAAO,IAAI,CAAC,oBAAoB,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,YAAY,IAAI,IAAI,CAAC;IACpE,CAAC,CAAC;IAEM,4BAA4B,GAAG,CACnC,QAA4B,EAC5B,GAAW,EACX,YAAqB,EACrB,OAAuB,EACzB,EAAE;QACA,MAAM,EAAE,GAAG,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;QACtC,IAAI,CAAC,qBAAqB,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC;QACtC,IAAI,CAAC,qBAAqB,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,GAAG,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;IACpE,CAAC,CAAC;CACL"}
|
|
@@ -1,12 +1,9 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { BatchAddRequestsResult, Dictionary } from '@crawlee/types';
|
|
2
2
|
import { type RobotsTxtFile } from '@crawlee/utils';
|
|
3
3
|
import type { SetRequired } from 'type-fest';
|
|
4
|
-
import type {
|
|
5
|
-
import type {
|
|
6
|
-
|
|
7
|
-
url: string;
|
|
8
|
-
reason: 'robotsTxt';
|
|
9
|
-
}) => Awaitable<void>;
|
|
4
|
+
import type { Request } from '../request.js';
|
|
5
|
+
import type { AddRequestsBatchedOptions, AddRequestsBatchedResult, RequestProvider, RequestQueueOperationOptions } from '../storages/request_provider.js';
|
|
6
|
+
import type { GlobInput, PseudoUrlInput, RegExpInput, RequestTransform, SkippedRequestCallback } from './shared.js';
|
|
10
7
|
export interface EnqueueLinksOptions extends RequestQueueOperationOptions {
|
|
11
8
|
/** Limit the amount of actually enqueued URLs to this number. Useful for testing across the entire crawling scope. */
|
|
12
9
|
limit?: number;
|
|
@@ -18,7 +15,12 @@ export interface EnqueueLinksOptions extends RequestQueueOperationOptions {
|
|
|
18
15
|
selector?: string;
|
|
19
16
|
/** Sets {@link Request.userData} for newly enqueued requests. */
|
|
20
17
|
userData?: Dictionary;
|
|
21
|
-
/**
|
|
18
|
+
/**
|
|
19
|
+
* Sets {@link Request.label} for newly enqueued requests.
|
|
20
|
+
*
|
|
21
|
+
* Note that the request options specified in `globs`, `regexps`, or `pseudoUrls` objects
|
|
22
|
+
* have priority over this option.
|
|
23
|
+
*/
|
|
22
24
|
label?: string;
|
|
23
25
|
/**
|
|
24
26
|
* If set to `true`, tells the crawler to skip navigation and process the request directly.
|
|
@@ -49,7 +51,6 @@ export interface EnqueueLinksOptions extends RequestQueueOperationOptions {
|
|
|
49
51
|
* containing patterns matching URLs that will **never** be enqueued.
|
|
50
52
|
*
|
|
51
53
|
* The plain objects must include either the `glob` property or the `regexp` property.
|
|
52
|
-
* All remaining keys will be used as request options for the corresponding enqueued {@link Request} objects.
|
|
53
54
|
*
|
|
54
55
|
* Glob matching is always case-insensitive.
|
|
55
56
|
* If you need case-sensitive matching, provide a regexp.
|
|
@@ -105,9 +106,8 @@ export interface EnqueueLinksOptions extends RequestQueueOperationOptions {
|
|
|
105
106
|
* }
|
|
106
107
|
* ```
|
|
107
108
|
*
|
|
108
|
-
* Note that
|
|
109
|
-
*
|
|
110
|
-
* and thus some options could be over-written by `transformRequestFunction`.
|
|
109
|
+
* Note that the request options specified in `globs`, `regexps`, or `pseudoUrls` objects
|
|
110
|
+
* have priority over this function. Some request options returned by `transformRequestFunction` may be overwritten by pattern-based options from `globs`, `regexps`, or `pseudoUrls`.
|
|
111
111
|
*/
|
|
112
112
|
transformRequestFunction?: RequestTransform;
|
|
113
113
|
/**
|
|
@@ -138,10 +138,13 @@ export interface EnqueueLinksOptions extends RequestQueueOperationOptions {
|
|
|
138
138
|
* RobotsTxtFile instance for the current request that triggered the `enqueueLinks`.
|
|
139
139
|
* If provided, disallowed URLs will be ignored.
|
|
140
140
|
*/
|
|
141
|
-
robotsTxtFile?: RobotsTxtFile
|
|
141
|
+
robotsTxtFile?: Pick<RobotsTxtFile, 'isAllowed'>;
|
|
142
142
|
/**
|
|
143
143
|
* When a request is skipped for some reason, you can use this callback to act on it.
|
|
144
|
-
* This is currently fired
|
|
144
|
+
* This is currently fired for requests skipped
|
|
145
|
+
* 1. based on robots.txt file,
|
|
146
|
+
* 2. because they don't match enqueueLinks filters,
|
|
147
|
+
* 3. or because the maxRequestsPerCrawl limit has been reached
|
|
145
148
|
*/
|
|
146
149
|
onSkippedRequest?: SkippedRequestCallback;
|
|
147
150
|
}
|
|
@@ -220,7 +223,11 @@ export declare enum EnqueueStrategy {
|
|
|
220
223
|
* @param options All `enqueueLinks()` parameters are passed via an options object.
|
|
221
224
|
* @returns Promise that resolves to {@link BatchAddRequestsResult} object.
|
|
222
225
|
*/
|
|
223
|
-
export declare function enqueueLinks(options: SetRequired<EnqueueLinksOptions, 'requestQueue'
|
|
226
|
+
export declare function enqueueLinks(options: SetRequired<Omit<EnqueueLinksOptions, 'requestQueue'>, 'urls'> & {
|
|
227
|
+
requestQueue: {
|
|
228
|
+
addRequestsBatched: (requests: Request<Dictionary>[], options: AddRequestsBatchedOptions) => Promise<AddRequestsBatchedResult>;
|
|
229
|
+
};
|
|
230
|
+
}): Promise<BatchAddRequestsResult>;
|
|
224
231
|
/**
|
|
225
232
|
* @internal
|
|
226
233
|
* This method helps resolve the baseUrl that will be used for filtering in {@link enqueueLinks}.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"enqueue_links.d.ts","sourceRoot":"","sources":["../../src/enqueue_links/enqueue_links.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"enqueue_links.d.ts","sourceRoot":"","sources":["../../src/enqueue_links/enqueue_links.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,sBAAsB,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACzE,OAAO,EAAE,KAAK,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAGpD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAI7C,OAAO,KAAK,EAAE,OAAO,EAAkB,MAAM,eAAe,CAAC;AAC7D,OAAO,KAAK,EACR,yBAAyB,EACzB,wBAAwB,EACxB,eAAe,EACf,4BAA4B,EAC/B,MAAM,iCAAiC,CAAC;AACzC,OAAO,KAAK,EACR,SAAS,EACT,cAAc,EACd,WAAW,EACX,gBAAgB,EAChB,sBAAsB,EAGzB,MAAM,aAAa,CAAC;AAUrB,MAAM,WAAW,mBAAoB,SAAQ,4BAA4B;IACrE,sHAAsH;IACtH,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf,mCAAmC;IACnC,IAAI,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IAEzB,0DAA0D;IAC1D,YAAY,CAAC,EAAE,eAAe,CAAC;IAE/B,oDAAoD;IACpD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB,oEAAoE;IACpE,QAAQ,CAAC,EAAE,UAAU,CAAC;IAEtB;;;;;OAKG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf;;;OAGG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;;;;;;;;;;OAYG;IACH,KAAK,CAAC,EAAE,SAAS,SAAS,EAAE,CAAC;IAE7B;;;;;;;;OAQG;IACH,OAAO,CAAC,EAAE,SAAS,CAAC,SAAS,GAAG,WAAW,CAAC,EAAE,CAAC;IAE/C;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,SAAS,WAAW,EAAE,CAAC;IAEjC;;;;;;;;;;;;;;;;;OAiBG;IACH,UAAU,CAAC,EAAE,SAAS,cAAc,EAAE,CAAC;IAEvC;;;;;;;;;;;;;;;;;;;;;;OAsBG;IACH,wBAAwB,CAAC,EAAE,gBAAgB,CAAC;IAE5C;;;;;;;;;;;;;;;;;OAiBG;IACH,QAAQ,CAAC,EAAE,eAAe,GAAG,KAAK,GAAG,aAAa,GAAG,eAAe,GAAG,aAAa,CAAC;IAErF;;;OAGG;IACH,2BAA2B,CAAC,EAAE,OAAO,CAAC;IAEtC;;;OAGG;IACH,aAAa,CAAC,EAAE,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC;IAEjD;;;;;;OAMG;IACH,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;CAC7C;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,oBAAY,eAAe;IACvB;;OAEG;IACH,GAAG,QAAQ;IAEX;;;;;;OAMG;IACH,YAAY,kBAAkB;IAE9B;;;;;;OAMG;IACH,UAAU,gBAAgB;IAE1B;;;;;;OAMG;IACH,UAAU,gBAAgB;CAC7B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAsB,YAAY,CAC9B,OAAO,EAAE,WAAW,CAAC,IAAI,CAAC,mBAAmB,EAAE,cAAc,CAAC,EAAE,MAAM,CAAC,GAAG;IACtE,YAAY,EAAE;QACV,kBAAkB,EAAE,CAChB,QAAQ,EAAE,OAAO,CAAC,UAAU,CAAC,EAAE,EAC/B,OAAO,EAAE,yBAAyB,KACjC,OAAO,CAAC,wBAAwB,CAAC,CAAC;KAC1C,CAAC;CACL,GACF,OAAO,CAAC,sBAAsB,CAAC,CA4NjC;AAED;;;;;;;GAOG;AACH,wBAAgB,sCAAsC,CAAC,EACnD,eAAe,EACf,eAAe,EACf,kBAAkB,EAClB,mBAAmB,GACtB,EAAE,cAAc,sBA+BhB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC3B,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,eAAe,CAAC,EAAE,mBAAmB,CAAC,UAAU,CAAC,CAAC;IAClD,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B"}
|
|
@@ -87,7 +87,7 @@ export async function enqueueLinks(options) {
|
|
|
87
87
|
}
|
|
88
88
|
ow(options, ow.object.exactShape({
|
|
89
89
|
urls: ow.array.ofType(ow.string),
|
|
90
|
-
requestQueue: ow.object.hasKeys('
|
|
90
|
+
requestQueue: ow.object.hasKeys('addRequestsBatched'),
|
|
91
91
|
robotsTxtFile: ow.optional.object.hasKeys('isAllowed'),
|
|
92
92
|
onSkippedRequest: ow.optional.function,
|
|
93
93
|
forefront: ow.optional.boolean,
|
|
@@ -167,6 +167,16 @@ export async function enqueueLinks(options) {
|
|
|
167
167
|
break;
|
|
168
168
|
}
|
|
169
169
|
}
|
|
170
|
+
async function reportSkippedRequests(skippedRequests, reason) {
|
|
171
|
+
if (onSkippedRequest && skippedRequests.length > 0) {
|
|
172
|
+
await Promise.all(skippedRequests.map((request) => {
|
|
173
|
+
return onSkippedRequest({
|
|
174
|
+
url: request.url,
|
|
175
|
+
reason: request.skippedReason ?? reason,
|
|
176
|
+
});
|
|
177
|
+
}));
|
|
178
|
+
}
|
|
179
|
+
}
|
|
170
180
|
let requestOptions = createRequestOptions(urls, options);
|
|
171
181
|
if (robotsTxtFile) {
|
|
172
182
|
const skippedRequests = [];
|
|
@@ -177,30 +187,39 @@ export async function enqueueLinks(options) {
|
|
|
177
187
|
skippedRequests.push(request);
|
|
178
188
|
return false;
|
|
179
189
|
});
|
|
180
|
-
|
|
181
|
-
await Promise.all(skippedRequests.map((request) => {
|
|
182
|
-
return onSkippedRequest({ url: request.url, reason: 'robotsTxt' });
|
|
183
|
-
}));
|
|
184
|
-
}
|
|
190
|
+
await reportSkippedRequests(skippedRequests, 'robotsTxt');
|
|
185
191
|
}
|
|
186
192
|
if (transformRequestFunction) {
|
|
193
|
+
const skippedRequests = [];
|
|
187
194
|
requestOptions = requestOptions
|
|
188
|
-
.map((request) =>
|
|
189
|
-
|
|
195
|
+
.map((request) => {
|
|
196
|
+
const transformedRequest = transformRequestFunction(request);
|
|
197
|
+
if (!transformedRequest) {
|
|
198
|
+
skippedRequests.push(request);
|
|
199
|
+
}
|
|
200
|
+
return transformedRequest;
|
|
201
|
+
})
|
|
202
|
+
.filter((r) => Boolean(r));
|
|
203
|
+
await reportSkippedRequests(skippedRequests, 'filters');
|
|
190
204
|
}
|
|
191
|
-
function createFilteredRequests() {
|
|
205
|
+
async function createFilteredRequests() {
|
|
206
|
+
const skippedRequests = [];
|
|
192
207
|
// No user provided patterns means we can skip an extra filtering step
|
|
193
208
|
if (urlPatternObjects.length === 0) {
|
|
194
|
-
return createRequests(requestOptions, enqueueStrategyPatterns, urlExcludePatternObjects, options.strategy);
|
|
209
|
+
return createRequests(requestOptions, enqueueStrategyPatterns, urlExcludePatternObjects, options.strategy, (url) => skippedRequests.push(url));
|
|
195
210
|
}
|
|
196
211
|
// Generate requests based on the user patterns first
|
|
197
|
-
const generatedRequestsFromUserFilters = createRequests(requestOptions, urlPatternObjects, urlExcludePatternObjects, options.strategy);
|
|
212
|
+
const generatedRequestsFromUserFilters = createRequests(requestOptions, urlPatternObjects, urlExcludePatternObjects, options.strategy, (url) => skippedRequests.push(url));
|
|
198
213
|
// ...then filter them by the enqueue links strategy (making this an AND check)
|
|
199
|
-
|
|
214
|
+
const filtered = filterRequestsByPatterns(generatedRequestsFromUserFilters, enqueueStrategyPatterns, (url) => skippedRequests.push(url));
|
|
215
|
+
await reportSkippedRequests(skippedRequests.map((url) => ({ url })), 'filters');
|
|
216
|
+
return filtered;
|
|
200
217
|
}
|
|
201
|
-
let requests = createFilteredRequests();
|
|
202
|
-
if (limit)
|
|
218
|
+
let requests = await createFilteredRequests();
|
|
219
|
+
if (typeof limit === 'number' && limit < requests.length) {
|
|
220
|
+
await reportSkippedRequests(requests.slice(limit), 'enqueueLimit');
|
|
203
221
|
requests = requests.slice(0, limit);
|
|
222
|
+
}
|
|
204
223
|
const { addedRequests } = await requestQueue.addRequestsBatched(requests, {
|
|
205
224
|
forefront,
|
|
206
225
|
waitForAllRequestsToBeAdded,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"enqueue_links.js","sourceRoot":"","sources":["../../src/enqueue_links/enqueue_links.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAE,SAAS,EAAE,MAAM,OAAO,CAAC;AAGlC,OAAO,GAAG,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"enqueue_links.js","sourceRoot":"","sources":["../../src/enqueue_links/enqueue_links.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAE,SAAS,EAAE,MAAM,OAAO,CAAC;AAGlC,OAAO,GAAG,MAAM,YAAY,CAAC;AAkB7B,OAAO,EACH,6BAA6B,EAC7B,oCAAoC,EACpC,iCAAiC,EACjC,oBAAoB,EACpB,cAAc,EACd,wBAAwB,GAC3B,MAAM,aAAa,CAAC;AAmKrB;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,MAAM,CAAN,IAAY,eAgCX;AAhCD,WAAY,eAAe;IACvB;;OAEG;IACH,8BAAW,CAAA;IAEX;;;;;;OAMG;IACH,iDAA8B,CAAA;IAE9B;;;;;;OAMG;IACH,6CAA0B,CAAA;IAE1B;;;;;;OAMG;IACH,6CAA0B,CAAA;AAC9B,CAAC,EAhCW,eAAe,KAAf,eAAe,QAgC1B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAC9B,OAOC;IAED,IAAI,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChD,MAAM,IAAI,UAAU,CAChB;YACI,4JAA4J;YAC5J,kHAAkH;SACrH,CAAC,IAAI,CAAC,IAAI,CAAC,CACf,CAAC;IACN,CAAC;IAED,EAAE,CACE,OAAc,EACd,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC;QACjB,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC;QAChC,YAAY,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,oBAAoB,CAAC;QACrD,aAAa,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC;QACtD,gBAAgB,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ;QACtC,SAAS,EAAE,EAAE,CAAC,QAAQ,CAAC,OAAO;QAC9B,cAAc,EAAE,EAAE,CAAC,QAAQ,CAAC,OAAO;QACnC,KAAK,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;QACzB,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;QAC5B,OAAO,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;QAC3B,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;QAC5B,KAAK,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;QACzB,UAAU,EAAE,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC;QAClF,KAAK,EAAE,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC;QAC7E,OAAO,EAAE,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAC7B,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CACvF;QACD,OAAO,EAAE,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;QACjF,wBAAwB,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ;QAC9C,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;QAClE,2BAA2B,EAAE,EAAE,CAAC,QAAQ,CAAC,OAAO;KACnD,CAAC,CACL,CAAC;IAEF,MAAM,EACF,YAAY,EACZ,KAAK,EACL,IAAI,EACJ,UAAU,EACV,OAAO,EACP,KAAK,EACL,OAAO,EACP,wBAAwB,EACxB,SAAS,EACT,2BAA2B,EAC3B,aAAa,EACb,gBAAgB,GACnB,GAAG,OAAO,CAAC;IAEZ,MAAM,wBAAwB,GAAuB,EAAE,CAAC;IACxD,MAAM,iBAAiB,GAAuB,EAAE,CAAC;IAEjD,IAAI,OAAO,EAAE,MAAM,EAAE,CAAC;QAClB,KAAK,MAAM,IAAI,IAAI,OAAO,EAAE,CAAC;YACzB,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,MAAM,IAAI,IAAI,EAAE,CAAC;gBAC7C,wBAAwB,CAAC,IAAI,CAAC,GAAG,6BAA6B,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAC5E,CAAC;iBAAM,IAAI,IAAI,YAAY,MAAM,IAAI,QAAQ,IAAI,IAAI,EAAE,CAAC;gBACpD,wBAAwB,CAAC,IAAI,CAAC,GAAG,iCAAiC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAChF,CAAC;QACL,CAAC;IACL,CAAC;IAED,IAAI,UAAU,EAAE,MAAM,EAAE,CAAC;QACrB,GAAG,CAAC,UAAU,CAAC,qEAAqE,CAAC,CAAC;QACtF,iBAAiB,CAAC,IAAI,CAAC,GAAG,oCAAoC,CAAC,UAAU,CAAC,CAAC,CAAC;IAChF,CAAC;IAED,IAAI,KAAK,EAAE,MAAM,EAAE,CAAC;QAChB,iBAAiB,CAAC,IAAI,CAAC,GAAG,6BAA6B,CAAC,KAAK,CAAC,CAAC,CAAC;IACpE,CAAC;IAED,IAAI,OAAO,EAAE,MAAM,EAAE,CAAC;QAClB,iBAAiB,CAAC,IAAI,CAAC,GAAG,iCAAiC,CAAC,OAAO,CAAC,CAAC,CAAC;IAC1E,CAAC;IAED,IAAI,CAAC,iBAAiB,CAAC,MAAM,EAAE,CAAC;QAC5B,OAAO,CAAC,QAAQ,KAAK,eAAe,CAAC,YAAY,CAAC;IACtD,CAAC;IAED,MAAM,uBAAuB,GAAuB,EAAE,CAAC;IAEvD,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QAClB,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAErC,QAAQ,OAAO,CAAC,QAAQ,EAAE,CAAC;YACvB,KAAK,eAAe,CAAC,YAAY;gBAC7B,sFAAsF;gBACtF,uFAAuF;gBACvF,yCAAyC;gBACzC,uBAAuB,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,gBAAgB,CAAC,GAAG,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC,CAAC;gBAC7E,MAAM;YACV,KAAK,eAAe,CAAC,UAAU,CAAC,CAAC,CAAC;gBAC9B,4CAA4C;gBAC5C,MAAM,eAAe,GAAG,SAAS,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC,CAAC;gBAExE,IAAI,eAAe,EAAE,CAAC;oBAClB,gHAAgH;oBAChH,GAAG,CAAC,QAAQ,GAAG,eAAe,CAAC;oBAC/B,uBAAuB,CAAC,IAAI,CACxB,EAAE,IAAI,EAAE,gBAAgB,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,eAAe,EAAE,KAAK,eAAe,EAAE,CAAC,KAAK,CAAC,EAAE,EAC/F,EAAE,IAAI,EAAE,gBAAgB,CAAC,GAAG,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CACjD,CAAC;gBACN,CAAC;qBAAM,CAAC;oBACJ,6FAA6F;oBAC7F,4BAA4B;oBAC5B,uBAAuB,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,gBAAgB,CAAC,GAAG,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC,CAAC;gBACjF,CAAC;gBAED,MAAM;YACV,CAAC;YACD,KAAK,eAAe,CAAC,UAAU,CAAC,CAAC,CAAC;gBAC9B,4EAA4E;gBAC5E,uBAAuB,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,GAAG,CAAC,MAAM,KAAK,EAAE,CAAC,CAAC;gBAC3D,MAAM;YACV,CAAC;YACD,KAAK,eAAe,CAAC,GAAG,CAAC;YACzB;gBACI,uBAAuB,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,eAAe,EAAE,CAAC,CAAC;gBACxD,MAAM;QACd,CAAC;IACL,CAAC;IAED,KAAK,UAAU,qBAAqB,CAChC,eAAwE,EACxE,MAA4B;QAE5B,IAAI,gBAAgB,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjD,MAAM,OAAO,CAAC,GAAG,CACb,eAAe,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE;gBAC5B,OAAO,gBAAgB,CAAC;oBACpB,GAAG,EAAE,OAAO,CAAC,GAAG;oBAChB,MAAM,EAAE,OAAO,CAAC,aAAa,IAAI,MAAM;iBAC1C,CAAkB,CAAC;YACxB,CAAC,CAAC,CACL,CAAC;QACN,CAAC;IACL,CAAC;IAED,IAAI,cAAc,GAAG,oBAAoB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAEzD,IAAI,aAAa,EAAE,CAAC;QAChB,MAAM,eAAe,GAAqB,EAAE,CAAC;QAE7C,cAAc,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,OAAO,EAAE,EAAE;YAC/C,IAAI,aAAa,CAAC,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;gBACvC,OAAO,IAAI,CAAC;YAChB,CAAC;YAED,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAC9B,OAAO,KAAK,CAAC;QACjB,CAAC,CAAC,CAAC;QAEH,MAAM,qBAAqB,CAAC,eAAe,EAAE,WAAW,CAAC,CAAC;IAC9D,CAAC;IAED,IAAI,wBAAwB,EAAE,CAAC;QAC3B,MAAM,eAAe,GAAqB,EAAE,CAAC;QAE7C,cAAc,GAAG,cAAc;aAC1B,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE;YACb,MAAM,kBAAkB,GAAG,wBAAwB,CAAC,OAAO,CAAC,CAAC;YAC7D,IAAI,CAAC,kBAAkB,EAAE,CAAC;gBACtB,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAClC,CAAC;YACD,OAAO,kBAAkB,CAAC;QAC9B,CAAC,CAAC;aACD,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,CAAqB,CAAC;QAEnD,MAAM,qBAAqB,CAAC,eAAe,EAAE,SAAS,CAAC,CAAC;IAC5D,CAAC;IAED,KAAK,UAAU,sBAAsB;QACjC,MAAM,eAAe,GAAa,EAAE,CAAC;QAErC,sEAAsE;QACtE,IAAI,iBAAiB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACjC,OAAO,cAAc,CACjB,cAAc,EACd,uBAAuB,EACvB,wBAAwB,EACxB,OAAO,CAAC,QAAQ,EAChB,CAAC,GAAG,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC,CACrC,CAAC;QACN,CAAC;QAED,qDAAqD;QACrD,MAAM,gCAAgC,GAAG,cAAc,CACnD,cAAc,EACd,iBAAiB,EACjB,wBAAwB,EACxB,OAAO,CAAC,QAAQ,EAChB,CAAC,GAAG,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC,CACrC,CAAC;QACF,+EAA+E;QAC/E,MAAM,QAAQ,GAAG,wBAAwB,CAAC,gCAAgC,EAAE,uBAAuB,EAAE,CAAC,GAAG,EAAE,EAAE,CACzG,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC,CAC5B,CAAC;QAEF,MAAM,qBAAqB,CACvB,eAAe,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,EACvC,SAAS,CACZ,CAAC;QAEF,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED,IAAI,QAAQ,GAAG,MAAM,sBAAsB,EAAE,CAAC;IAC9C,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;QACvD,MAAM,qBAAqB,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,cAAc,CAAC,CAAC;QACnE,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IACxC,CAAC;IAED,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,YAAY,CAAC,kBAAkB,CAAC,QAAQ,EAAE;QACtE,SAAS;QACT,2BAA2B;KAC9B,CAAC,CAAC;IAEH,OAAO,EAAE,iBAAiB,EAAE,aAAa,EAAE,mBAAmB,EAAE,EAAE,EAAE,CAAC;AACzE,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,sCAAsC,CAAC,EACnD,eAAe,EACf,eAAe,EACf,kBAAkB,EAClB,mBAAmB,GACN;IACb,wCAAwC;IACxC,IAAI,mBAAmB,EAAE,CAAC;QACtB,OAAO,mBAAmB,CAAC;IAC/B,CAAC;IAED,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,kBAAkB,CAAC,CAAC,MAAM,CAAC;IAC7D,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,eAAe,IAAI,kBAAkB,CAAC,CAAC,MAAM,CAAC;IAE7E,6DAA6D;IAC7D,IAAI,eAAe,KAAK,eAAe,CAAC,GAAG,EAAE,CAAC;QAC1C,OAAO,cAAc,CAAC;IAC1B,CAAC;IAED,0HAA0H;IAC1H,+IAA+I;IAC/I,4DAA4D;IAC5D,IAAI,eAAe,KAAK,eAAe,CAAC,UAAU,EAAE,CAAC;QACjD,MAAM,gBAAgB,GAAG,SAAS,CAAC,iBAAiB,EAAE,EAAE,WAAW,EAAE,KAAK,EAAE,CAAE,CAAC;QAC/E,MAAM,aAAa,GAAG,SAAS,CAAC,cAAc,EAAE,EAAE,WAAW,EAAE,KAAK,EAAE,CAAE,CAAC;QAEzE,IAAI,gBAAgB,KAAK,aAAa,EAAE,CAAC;YACrC,OAAO,cAAc,CAAC;QAC1B,CAAC;QAED,OAAO,SAAS,CAAC;IACrB,CAAC;IAED,2JAA2J;IAC3J,mCAAmC;IACnC,OAAO,iBAAiB,CAAC;AAC7B,CAAC;AAYD;;GAEG;AACH,SAAS,gBAAgB,CAAC,OAAe;IACrC,OAAO,OAAO,CAAC,OAAO,CAAC,gBAAgB,EAAE,aAAa,CAAC,CAAC;AAC5D,CAAC"}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { Awaitable } from '@crawlee/types';
|
|
1
2
|
import type { RequestOptions } from '../request.js';
|
|
2
3
|
import { Request } from '../request.js';
|
|
3
4
|
import type { EnqueueLinksOptions } from './enqueue_links.js';
|
|
@@ -18,6 +19,11 @@ export type RegExpObject = {
|
|
|
18
19
|
regexp: RegExp;
|
|
19
20
|
} & Pick<RequestOptions, 'method' | 'payload' | 'label' | 'userData' | 'headers'>;
|
|
20
21
|
export type RegExpInput = RegExp | RegExpObject;
|
|
22
|
+
export type SkippedRequestReason = 'robotsTxt' | 'limit' | 'enqueueLimit' | 'filters' | 'redirect' | 'depth';
|
|
23
|
+
export type SkippedRequestCallback = (args: {
|
|
24
|
+
url: string;
|
|
25
|
+
reason: SkippedRequestReason;
|
|
26
|
+
}) => Awaitable<void>;
|
|
21
27
|
/**
|
|
22
28
|
* @ignore
|
|
23
29
|
*/
|
|
@@ -47,8 +53,8 @@ export declare function constructRegExpObjectsFromRegExps(regexps: readonly RegE
|
|
|
47
53
|
/**
|
|
48
54
|
* @ignore
|
|
49
55
|
*/
|
|
50
|
-
export declare function createRequests(requestOptions: (string | RequestOptions)[], urlPatternObjects?: UrlPatternObject[], excludePatternObjects?: UrlPatternObject[], strategy?: EnqueueLinksOptions['strategy']): Request[];
|
|
51
|
-
export declare function filterRequestsByPatterns(requests: Request[], patterns?: UrlPatternObject[]): Request[];
|
|
56
|
+
export declare function createRequests(requestOptions: (string | RequestOptions)[], urlPatternObjects?: UrlPatternObject[], excludePatternObjects?: UrlPatternObject[], strategy?: EnqueueLinksOptions['strategy'], onSkippedUrl?: (url: string) => void): Request[];
|
|
57
|
+
export declare function filterRequestsByPatterns(requests: Request[], patterns?: UrlPatternObject[], onSkippedUrl?: (url: string) => void): Request[];
|
|
52
58
|
/**
|
|
53
59
|
* @ignore
|
|
54
60
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"shared.d.ts","sourceRoot":"","sources":["../../src/enqueue_links/shared.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"shared.d.ts","sourceRoot":"","sources":["../../src/enqueue_links/shared.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAKhD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AACpD,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AACxC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAE9D,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAYhD,MAAM,MAAM,gBAAgB,GAAG;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;CACnB,GAAG,IAAI,CAAC,cAAc,EAAE,QAAQ,GAAG,SAAS,GAAG,OAAO,GAAG,UAAU,GAAG,SAAS,CAAC,CAAC;AAElF,MAAM,MAAM,eAAe,GAAG;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CACjD,cAAc,EACd,QAAQ,GAAG,SAAS,GAAG,OAAO,GAAG,UAAU,GAAG,SAAS,CAC1D,CAAC;AAEF,MAAM,MAAM,cAAc,GAAG,MAAM,GAAG,eAAe,CAAC;AAEtD,MAAM,MAAM,UAAU,GAAG;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAC5C,cAAc,EACd,QAAQ,GAAG,SAAS,GAAG,OAAO,GAAG,UAAU,GAAG,SAAS,CAC1D,CAAC;AAEF,MAAM,MAAM,SAAS,GAAG,MAAM,GAAG,UAAU,CAAC;AAE5C,MAAM,MAAM,YAAY,GAAG;IAAE,MAAM,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAChD,cAAc,EACd,QAAQ,GAAG,SAAS,GAAG,OAAO,GAAG,UAAU,GAAG,SAAS,CAC1D,CAAC;AAEF,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,YAAY,CAAC;AAEhD,MAAM,MAAM,oBAAoB,GAAG,WAAW,GAAG,OAAO,GAAG,cAAc,GAAG,SAAS,GAAG,UAAU,GAAG,OAAO,CAAC;AAE7G,MAAM,MAAM,sBAAsB,GAAG,CAAC,IAAI,EAAE;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,oBAAoB,CAAA;CAAE,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAE9G;;GAEG;AACH,wBAAgB,8BAA8B,CAC1C,IAAI,EAAE,SAAS,GAAG,WAAW,GAAG,cAAc,EAC9C,OAAO,EAAE,YAAY,GAAG,UAAU,GACnC,IAAI,CAMN;AAED;;;;GAIG;AACH,wBAAgB,oCAAoC,CAAC,UAAU,EAAE,SAAS,cAAc,EAAE,GAAG,YAAY,EAAE,CAiB1G;AAED;;;;GAIG;AACH,wBAAgB,6BAA6B,CAAC,KAAK,EAAE,SAAS,SAAS,EAAE,GAAG,UAAU,EAAE,CAkCvF;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAKxD;AAED;;;;GAIG;AACH,wBAAgB,iCAAiC,CAAC,OAAO,EAAE,SAAS,WAAW,EAAE,GAAG,YAAY,EAAE,CAgBjG;AAED;;GAEG;AACH,wBAAgB,cAAc,CAC1B,cAAc,EAAE,CAAC,MAAM,GAAG,cAAc,CAAC,EAAE,EAC3C,iBAAiB,CAAC,EAAE,gBAAgB,EAAE,EACtC,qBAAqB,GAAE,gBAAgB,EAAO,EAC9C,QAAQ,CAAC,EAAE,mBAAmB,CAAC,UAAU,CAAC,EAC1C,YAAY,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,GACrC,OAAO,EAAE,CAqCX;AAED,wBAAgB,wBAAwB,CACpC,QAAQ,EAAE,OAAO,EAAE,EACnB,QAAQ,CAAC,EAAE,gBAAgB,EAAE,EAC7B,YAAY,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,GACrC,OAAO,EAAE,CAmBX;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAChC,OAAO,EAAE,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,EAAE,EACtD,OAAO,GAAE,IAAI,CAAC,mBAAmB,EAAE,OAAO,GAAG,UAAU,GAAG,SAAS,GAAG,gBAAgB,GAAG,UAAU,CAAM,GAC1G,cAAc,EAAE,CA+BlB;AAmBD;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC7B;;;OAGG;IACH,CAAC,QAAQ,EAAE,cAAc,GAAG,cAAc,GAAG,KAAK,GAAG,SAAS,GAAG,IAAI,CAAC;CACzE"}
|
package/enqueue_links/shared.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { URL } from 'node:url';
|
|
2
|
-
import {
|
|
2
|
+
import { Minimatch } from 'minimatch';
|
|
3
3
|
import { purlToRegExp } from '@apify/pseudo_url';
|
|
4
4
|
import { Request } from '../request.js';
|
|
5
5
|
export { tryAbsoluteURL } from '@crawlee/utils';
|
|
@@ -112,22 +112,25 @@ export function constructRegExpObjectsFromRegExps(regexps) {
|
|
|
112
112
|
/**
|
|
113
113
|
* @ignore
|
|
114
114
|
*/
|
|
115
|
-
export function createRequests(requestOptions, urlPatternObjects, excludePatternObjects = [], strategy) {
|
|
115
|
+
export function createRequests(requestOptions, urlPatternObjects, excludePatternObjects = [], strategy, onSkippedUrl) {
|
|
116
|
+
const excludePatternObjectMatchers = excludePatternObjects.map(createPatternObjectMatcher);
|
|
117
|
+
const urlPatternObjectMatchers = urlPatternObjects?.map(createPatternObjectMatcher);
|
|
116
118
|
return requestOptions
|
|
117
119
|
.map((opts) => ({ url: typeof opts === 'string' ? opts : opts.url, opts }))
|
|
118
120
|
.filter(({ url }) => {
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
}
|
|
121
|
+
const matchesExcludePatterns = excludePatternObjectMatchers.some(({ match }) => match(url));
|
|
122
|
+
if (matchesExcludePatterns) {
|
|
123
|
+
onSkippedUrl?.(url);
|
|
124
|
+
}
|
|
125
|
+
return !matchesExcludePatterns;
|
|
123
126
|
})
|
|
124
127
|
.map(({ url, opts }) => {
|
|
125
|
-
if (!
|
|
128
|
+
if (!urlPatternObjectMatchers || !urlPatternObjectMatchers.length) {
|
|
126
129
|
return new Request(typeof opts === 'string' ? { url: opts, enqueueStrategy: strategy } : { ...opts });
|
|
127
130
|
}
|
|
128
|
-
for (const urlPatternObject of
|
|
129
|
-
const {
|
|
130
|
-
if (
|
|
131
|
+
for (const urlPatternObject of urlPatternObjectMatchers) {
|
|
132
|
+
const { match, glob, regexp, ...requestRegExpOptions } = urlPatternObject;
|
|
133
|
+
if (match(url)) {
|
|
131
134
|
const request = typeof opts === 'string'
|
|
132
135
|
? { url: opts, ...requestRegExpOptions, enqueueStrategy: strategy }
|
|
133
136
|
: { ...opts, ...requestRegExpOptions, enqueueStrategy: strategy };
|
|
@@ -135,23 +138,24 @@ export function createRequests(requestOptions, urlPatternObjects, excludePattern
|
|
|
135
138
|
}
|
|
136
139
|
}
|
|
137
140
|
// didn't match any positive pattern
|
|
141
|
+
onSkippedUrl?.(url);
|
|
138
142
|
return null;
|
|
139
143
|
})
|
|
140
144
|
.filter((request) => request);
|
|
141
145
|
}
|
|
142
|
-
export function filterRequestsByPatterns(requests, patterns) {
|
|
146
|
+
export function filterRequestsByPatterns(requests, patterns, onSkippedUrl) {
|
|
143
147
|
if (!patterns?.length) {
|
|
144
148
|
return requests;
|
|
145
149
|
}
|
|
146
150
|
const filtered = [];
|
|
151
|
+
const patternMatchers = patterns?.map(createPatternObjectMatcher);
|
|
147
152
|
for (const request of requests) {
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
}
|
|
153
|
+
const matchingPattern = patternMatchers.find(({ match }) => match(request.url));
|
|
154
|
+
if (matchingPattern !== undefined) {
|
|
155
|
+
filtered.push(request);
|
|
156
|
+
}
|
|
157
|
+
else {
|
|
158
|
+
onSkippedUrl?.(request.url);
|
|
155
159
|
}
|
|
156
160
|
}
|
|
157
161
|
return filtered;
|
|
@@ -187,4 +191,22 @@ export function createRequestOptions(sources, options = {}) {
|
|
|
187
191
|
return requestOptions;
|
|
188
192
|
});
|
|
189
193
|
}
|
|
194
|
+
/**
|
|
195
|
+
* @ignore
|
|
196
|
+
*/
|
|
197
|
+
function createPatternObjectMatcher(urlPatternObject) {
|
|
198
|
+
const { regexp, glob } = urlPatternObject;
|
|
199
|
+
let match;
|
|
200
|
+
if (regexp) {
|
|
201
|
+
match = (url) => regexp.test(url);
|
|
202
|
+
}
|
|
203
|
+
else if (glob) {
|
|
204
|
+
const m = new Minimatch(glob, { nocase: true });
|
|
205
|
+
match = (url) => m.match(url);
|
|
206
|
+
}
|
|
207
|
+
else {
|
|
208
|
+
match = () => false;
|
|
209
|
+
}
|
|
210
|
+
return { ...urlPatternObject, match };
|
|
211
|
+
}
|
|
190
212
|
//# sourceMappingURL=shared.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"shared.js","sourceRoot":"","sources":["../../src/enqueue_links/shared.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"shared.js","sourceRoot":"","sources":["../../src/enqueue_links/shared.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAG/B,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,OAAO,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAGjD,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAGxC,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAEhD,MAAM,4BAA4B,GAAG,IAAI,CAAC;AAE1C;;;;;GAKG;AACH,MAAM,wBAAwB,GAAG,IAAI,GAAG,EAAE,CAAC;AAgC3C;;GAEG;AACH,MAAM,UAAU,8BAA8B,CAC1C,IAA8C,EAC9C,OAAkC;IAElC,wBAAwB,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC5C,IAAI,wBAAwB,CAAC,IAAI,GAAG,4BAA4B,EAAE,CAAC;QAC/D,MAAM,GAAG,GAAG,wBAAwB,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC;QACzD,wBAAwB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IACzC,CAAC;AACL,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,oCAAoC,CAAC,UAAqC;IACtF,OAAO,UAAU,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QAC3B,mCAAmC;QACnC,IAAI,YAAY,GAAG,wBAAwB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACtD,IAAI,YAAY;YAAE,OAAO,YAAY,CAAC;QAEtC,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC3B,YAAY,GAAG,EAAE,MAAM,EAAE,YAAY,CAAC,IAAI,CAAC,EAAE,CAAC;QAClD,CAAC;aAAM,CAAC;YACJ,MAAM,EAAE,IAAI,EAAE,GAAG,cAAc,EAAE,GAAG,IAAI,CAAC;YACzC,YAAY,GAAG,EAAE,MAAM,EAAE,YAAY,CAAC,IAAI,CAAC,EAAE,GAAG,cAAc,EAAE,CAAC;QACrE,CAAC;QAED,8BAA8B,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;QAEnD,OAAO,YAAY,CAAC;IACxB,CAAC,CAAC,CAAC;AACP,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,6BAA6B,CAAC,KAA2B;IACrE,OAAO,KAAK;SACP,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;QACb,uCAAuC;QACvC,IAAI,CAAC,IAAI,EAAE,CAAC;YACR,OAAO,KAAK,CAAC;QACjB,CAAC;QAED,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC3B,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;QAClC,CAAC;QAED,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACZ,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;QACvC,CAAC;QAED,OAAO,KAAK,CAAC;IACjB,CAAC,CAAC;SACD,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACV,8BAA8B;QAC9B,IAAI,UAAU,GAAG,wBAAwB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACpD,IAAI,UAAU;YAAE,OAAO,UAAU,CAAC;QAElC,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC3B,UAAU,GAAG,EAAE,IAAI,EAAE,mBAAmB,CAAC,IAAI,CAAC,EAAE,CAAC;QACrD,CAAC;aAAM,CAAC;YACJ,MAAM,EAAE,IAAI,EAAE,GAAG,cAAc,EAAE,GAAG,IAAI,CAAC;YACzC,UAAU,GAAG,EAAE,IAAI,EAAE,mBAAmB,CAAC,IAAI,CAAC,EAAE,GAAG,cAAc,EAAE,CAAC;QACxE,CAAC;QAED,8BAA8B,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;QAEjD,OAAO,UAAU,CAAC;IACtB,CAAC,CAAC,CAAC;AACX,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,IAAY;IAC5C,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAChC,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QACxB,MAAM,IAAI,KAAK,CAAC,8BAA8B,WAAW,mCAAmC,CAAC,CAAC;IAClG,OAAO,WAAW,CAAC;AACvB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,iCAAiC,CAAC,OAA+B;IAC7E,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACxB,gCAAgC;QAChC,IAAI,YAAY,GAAG,wBAAwB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACtD,IAAI,YAAY;YAAE,OAAO,YAAY,CAAC;QAEtC,IAAI,IAAI,YAAY,MAAM,EAAE,CAAC;YACzB,YAAY,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;QACpC,CAAC;aAAM,CAAC;YACJ,YAAY,GAAG,IAAI,CAAC;QACxB,CAAC;QAED,8BAA8B,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;QAEnD,OAAO,YAAY,CAAC;IACxB,CAAC,CAAC,CAAC;AACP,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAC1B,cAA2C,EAC3C,iBAAsC,EACtC,wBAA4C,EAAE,EAC9C,QAA0C,EAC1C,YAAoC;IAEpC,MAAM,4BAA4B,GAAG,qBAAqB,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;IAC3F,MAAM,wBAAwB,GAAG,iBAAiB,EAAE,GAAG,CAAC,0BAA0B,CAAC,CAAC;IAEpF,OAAO,cAAc;SAChB,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;SAC1E,MAAM,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,EAAE;QAChB,MAAM,sBAAsB,GAAG,4BAA4B,CAAC,IAAI,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC;QAE5F,IAAI,sBAAsB,EAAE,CAAC;YACzB,YAAY,EAAE,CAAC,GAAG,CAAC,CAAC;QACxB,CAAC;QAED,OAAO,CAAC,sBAAsB,CAAC;IACnC,CAAC,CAAC;SACD,GAAG,CAAC,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE,EAAE;QACnB,IAAI,CAAC,wBAAwB,IAAI,CAAC,wBAAwB,CAAC,MAAM,EAAE,CAAC;YAChE,OAAO,IAAI,OAAO,CAAC,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,eAAe,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,GAAG,IAAI,EAAE,CAAC,CAAC;QAC1G,CAAC;QAED,KAAK,MAAM,gBAAgB,IAAI,wBAAwB,EAAE,CAAC;YACtD,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,oBAAoB,EAAE,GAAG,gBAAgB,CAAC;YAC1E,IAAI,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;gBACb,MAAM,OAAO,GACT,OAAO,IAAI,KAAK,QAAQ;oBACpB,CAAC,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,oBAAoB,EAAE,eAAe,EAAE,QAAQ,EAAE;oBACnE,CAAC,CAAC,EAAE,GAAG,IAAI,EAAE,GAAG,oBAAoB,EAAE,eAAe,EAAE,QAAQ,EAAE,CAAC;gBAE1E,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC;YAChC,CAAC;QACL,CAAC;QAED,oCAAoC;QACpC,YAAY,EAAE,CAAC,GAAG,CAAC,CAAC;QACpB,OAAO,IAAI,CAAC;IAChB,CAAC,CAAC;SACD,MAAM,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAc,CAAC;AACnD,CAAC;AAED,MAAM,UAAU,wBAAwB,CACpC,QAAmB,EACnB,QAA6B,EAC7B,YAAoC;IAEpC,IAAI,CAAC,QAAQ,EAAE,MAAM,EAAE,CAAC;QACpB,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED,MAAM,QAAQ,GAAc,EAAE,CAAC;IAC/B,MAAM,eAAe,GAAG,QAAQ,EAAE,GAAG,CAAC,0BAA0B,CAAC,CAAC;IAElE,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC7B,MAAM,eAAe,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;QAEhF,IAAI,eAAe,KAAK,SAAS,EAAE,CAAC;YAChC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC3B,CAAC;aAAM,CAAC;YACJ,YAAY,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QAChC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,oBAAoB,CAChC,OAAsD,EACtD,UAAuG,EAAE;IAEzG,OAAO,OAAO;SACT,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CACT,OAAO,GAAG,KAAK,QAAQ;QACnB,CAAC,CAAC,EAAE,GAAG,EAAE,GAAG,EAAE,eAAe,EAAE,OAAO,CAAC,QAAQ,EAAE;QACjD,CAAC,CAAE,EAAE,GAAG,GAAG,EAAE,eAAe,EAAE,OAAO,CAAC,QAAQ,EAAqB,CAC1E;SACA,MAAM,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,EAAE;QAChB,IAAI,CAAC;YACD,OAAO,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC;QAC9C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,OAAO,KAAK,CAAC;QACjB,CAAC;IACL,CAAC,CAAC;SACD,GAAG,CAAC,CAAC,cAAc,EAAE,EAAE;QACpB,cAAc,CAAC,GAAG,GAAG,IAAI,GAAG,CAAC,cAAc,CAAC,GAAG,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC;QACvE,cAAc,CAAC,QAAQ,KAAK,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;QAEnD,IAAI,OAAO,OAAO,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACpC,cAAc,CAAC,QAAQ,GAAG;gBACtB,GAAG,cAAc,CAAC,QAAQ;gBAC1B,KAAK,EAAE,OAAO,CAAC,KAAK;aACvB,CAAC;QACN,CAAC;QAED,IAAI,OAAO,CAAC,cAAc,EAAE,CAAC;YACzB,cAAc,CAAC,cAAc,GAAG,IAAI,CAAC;QACzC,CAAC;QAED,OAAO,cAAc,CAAC;IAC1B,CAAC,CAAC,CAAC;AACX,CAAC;AAED;;GAEG;AACH,SAAS,0BAA0B,CAAC,gBAAkC;IAClE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,gBAAgB,CAAC;IAC1C,IAAI,KAAK,CAAC;IACV,IAAI,MAAM,EAAE,CAAC;QACT,KAAK,GAAG,CAAC,GAAW,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC9C,CAAC;SAAM,IAAI,IAAI,EAAE,CAAC;QACd,MAAM,CAAC,GAAG,IAAI,SAAS,CAAC,IAAI,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;QAChD,KAAK,GAAG,CAAC,GAAW,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC1C,CAAC;SAAM,CAAC;QACJ,KAAK,GAAG,GAAG,EAAE,CAAC,KAAK,CAAC;IACxB,CAAC;IACD,OAAO,EAAE,GAAG,gBAAgB,EAAE,KAAK,EAAE,CAAC;AAC1C,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"got-scraping-http-client.d.ts","sourceRoot":"","sources":["../../src/http_clients/got-scraping-http-client.ts"],"names":[],"mappings":"AAKA,OAAO,EACH,KAAK,cAAc,EACnB,KAAK,WAAW,EAChB,KAAK,eAAe,EACpB,KAAK,aAAa,EAErB,MAAM,uBAAuB,CAAC;AAE/B;;GAEG;AACH,qBAAa,qBAAsB,YAAW,cAAc;IACxD;;;OAGG;IACH,OAAO,CAAC,eAAe;IAMvB;;OAEG;IACG,WAAW,CAAC,aAAa,SAAS,MAAM,aAAa,EACvD,OAAO,EAAE,WAAW,CAAC,aAAa,CAAC,GACpC,OAAO,CAAC,QAAQ,CAAC;
|
|
1
|
+
{"version":3,"file":"got-scraping-http-client.d.ts","sourceRoot":"","sources":["../../src/http_clients/got-scraping-http-client.ts"],"names":[],"mappings":"AAKA,OAAO,EACH,KAAK,cAAc,EACnB,KAAK,WAAW,EAChB,KAAK,eAAe,EACpB,KAAK,aAAa,EAErB,MAAM,uBAAuB,CAAC;AAE/B;;GAEG;AACH,qBAAa,qBAAsB,YAAW,cAAc;IACxD;;;OAGG;IACH,OAAO,CAAC,eAAe;IAMvB;;OAEG;IACG,WAAW,CAAC,aAAa,SAAS,MAAM,aAAa,EACvD,OAAO,EAAE,WAAW,CAAC,aAAa,CAAC,GACpC,OAAO,CAAC,QAAQ,CAAC;IAoCpB;;OAEG;IACG,MAAM,CAAC,OAAO,EAAE,WAAW,EAAE,cAAc,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,QAAQ,CAAC;CAsC1F"}
|
|
@@ -21,6 +21,9 @@ export class GotScrapingHttpClient {
|
|
|
21
21
|
}
|
|
22
22
|
const gotResult = await gotScraping({
|
|
23
23
|
...request,
|
|
24
|
+
// `HttpCrawler` reads the cookies beforehand and sets them in `request.gotOptions`.
|
|
25
|
+
// Using the `cookieJar` option directly would override that.
|
|
26
|
+
cookieJar: undefined,
|
|
24
27
|
retry: {
|
|
25
28
|
limit: 0,
|
|
26
29
|
...request.retry,
|
|
@@ -52,7 +55,7 @@ export class GotScrapingHttpClient {
|
|
|
52
55
|
}
|
|
53
56
|
// eslint-disable-next-line no-async-promise-executor
|
|
54
57
|
return new Promise(async (resolve, reject) => {
|
|
55
|
-
const stream = gotScraping({ ...request, isStream: true });
|
|
58
|
+
const stream = gotScraping({ ...request, isStream: true, cookieJar: undefined });
|
|
56
59
|
stream.on('redirect', (updatedOptions, redirectResponse) => {
|
|
57
60
|
const nativeRedirectResponse = new ResponseWithUrl(redirectResponse.rawBody, {
|
|
58
61
|
headers: redirectResponse.headers,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"got-scraping-http-client.js","sourceRoot":"","sources":["../../src/http_clients/got-scraping-http-client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAE3C,OAAO,EAKH,eAAe,GAClB,MAAM,uBAAuB,CAAC;AAE/B;;GAEG;AACH,MAAM,OAAO,qBAAqB;IAC9B;;;OAGG;IACK,eAAe,CACnB,OAAU;QAEV,OAAO,CAAC,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAO,CAAC,CAAC;IAC7D,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,WAAW,CACb,OAAmC;QAEnC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,EAAE,CAAC;YACjC,MAAM,IAAI,KAAK,CAAC,wEAAwE,CAAC,CAAC;QAC9F,CAAC;QAED,MAAM,SAAS,GAAG,MAAM,WAAW,CAAC;YAChC,GAAG,OAAO;YACV,KAAK,EAAE;gBACH,KAAK,EAAE,CAAC;gBACR,GAAI,OAAO,CAAC,KAA6C;aAC5D;SACJ,CAAC,CAAC;QAEH,MAAM,aAAa,GAAG,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC;aAClD,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE;YAClB,IAAI,KAAK,KAAK,SAAS;gBAAE,OAAO,EAAE,CAAC;YAEnC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;gBACvB,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;YACtC,CAAC;YAED,OAAO,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC;QAC1B,CAAC,CAAC;aACD,IAAI,EAAwB,CAAC;QAElC,OAAO,IAAI,eAAe,CAAC,IAAI,UAAU,CAAC,SAAS,CAAC,OAAO,CAAC,EAAE;YAC1D,OAAO,EAAE,IAAI,OAAO,CAAC,aAAa,CAAC;YACnC,MAAM,EAAE,SAAS,CAAC,UAAU;YAC5B,UAAU,EAAE,SAAS,CAAC,aAAa,IAAI,EAAE;YACzC,GAAG,EAAE,SAAS,CAAC,GAAG;SACrB,CAAC,CAAC;IACP,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,OAAoB,EAAE,cAAgC;QAC/D,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,EAAE,CAAC;YACjC,MAAM,IAAI,KAAK,CAAC,wEAAwE,CAAC,CAAC;QAC9F,CAAC;QACD,qDAAqD;QACrD,OAAO,IAAI,OAAO,CAAC,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE;YACzC,MAAM,MAAM,GAAG,WAAW,CAAC,EAAE,GAAG,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"got-scraping-http-client.js","sourceRoot":"","sources":["../../src/http_clients/got-scraping-http-client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAE3C,OAAO,EAKH,eAAe,GAClB,MAAM,uBAAuB,CAAC;AAE/B;;GAEG;AACH,MAAM,OAAO,qBAAqB;IAC9B;;;OAGG;IACK,eAAe,CACnB,OAAU;QAEV,OAAO,CAAC,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAO,CAAC,CAAC;IAC7D,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,WAAW,CACb,OAAmC;QAEnC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,EAAE,CAAC;YACjC,MAAM,IAAI,KAAK,CAAC,wEAAwE,CAAC,CAAC;QAC9F,CAAC;QAED,MAAM,SAAS,GAAG,MAAM,WAAW,CAAC;YAChC,GAAG,OAAO;YACV,oFAAoF;YACpF,6DAA6D;YAC7D,SAAS,EAAE,SAAS;YACpB,KAAK,EAAE;gBACH,KAAK,EAAE,CAAC;gBACR,GAAI,OAAO,CAAC,KAA6C;aAC5D;SACJ,CAAC,CAAC;QAEH,MAAM,aAAa,GAAG,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC;aAClD,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE;YAClB,IAAI,KAAK,KAAK,SAAS;gBAAE,OAAO,EAAE,CAAC;YAEnC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;gBACvB,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;YACtC,CAAC;YAED,OAAO,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC;QAC1B,CAAC,CAAC;aACD,IAAI,EAAwB,CAAC;QAElC,OAAO,IAAI,eAAe,CAAC,IAAI,UAAU,CAAC,SAAS,CAAC,OAAO,CAAC,EAAE;YAC1D,OAAO,EAAE,IAAI,OAAO,CAAC,aAAa,CAAC;YACnC,MAAM,EAAE,SAAS,CAAC,UAAU;YAC5B,UAAU,EAAE,SAAS,CAAC,aAAa,IAAI,EAAE;YACzC,GAAG,EAAE,SAAS,CAAC,GAAG;SACrB,CAAC,CAAC;IACP,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,OAAoB,EAAE,cAAgC;QAC/D,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,EAAE,CAAC;YACjC,MAAM,IAAI,KAAK,CAAC,wEAAwE,CAAC,CAAC;QAC9F,CAAC;QACD,qDAAqD;QACrD,OAAO,IAAI,OAAO,CAAC,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE;YACzC,MAAM,MAAM,GAAG,WAAW,CAAC,EAAE,GAAG,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC;YAEjF,MAAM,CAAC,EAAE,CAAC,UAAU,EAAE,CAAC,cAAuB,EAAE,gBAAqB,EAAE,EAAE;gBACrE,MAAM,sBAAsB,GAAG,IAAI,eAAe,CAAC,gBAAgB,CAAC,OAAO,EAAE;oBACzE,OAAO,EAAE,gBAAgB,CAAC,OAAO;oBACjC,MAAM,EAAE,gBAAgB,CAAC,UAAU;oBACnC,UAAU,EAAE,gBAAgB,CAAC,aAAa;oBAC1C,GAAG,EAAE,gBAAgB,CAAC,GAAG;iBAC5B,CAAC,CAAC;gBACH,cAAc,EAAE,CAAC,sBAAsB,EAAE,cAAc,CAAC,CAAC;YAC7D,CAAC,CAAC,CAAC;YAEH,yEAAyE;YACzE,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;gBAClE,MAAM,CAAC,GAAG,EAAE,CAAC;YACjB,CAAC;YAED,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;YAE3B,MAAM,CAAC,EAAE,CAAC,UAAU,EAAE,CAAC,QAAuB,EAAE,EAAE;gBAC9C,qFAAqF;gBACrF,OAAO,CACH,IAAI,eAAe,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAQ,EAAE;oBAC/C,MAAM,EAAE,QAAQ,CAAC,UAAU;oBAC3B,UAAU,EAAE,QAAQ,CAAC,aAAa,IAAI,EAAE;oBACxC,OAAO,EAAE,QAAQ,CAAC,OAAsB;oBACxC,GAAG,EAAE,QAAQ,CAAC,GAAG;iBACpB,CAAC,CACL,CAAC;YACN,CAAC,CAAC,CAAC;QACP,CAAC,CAAC,CAAC;IACP,CAAC;CACJ"}
|
package/index.d.ts
CHANGED
|
@@ -14,6 +14,7 @@ export * from './session_pool/index.js';
|
|
|
14
14
|
export * from './storages/index.js';
|
|
15
15
|
export * from './validators.js';
|
|
16
16
|
export * from './cookie_utils.js';
|
|
17
|
+
export * from './recoverable_state.js';
|
|
17
18
|
export { PseudoUrl } from '@apify/pseudo_url';
|
|
18
19
|
export { Dictionary, Awaitable, Constructor, StorageClient, Cookie, QueueOperationInfo } from '@crawlee/types';
|
|
19
20
|
//# sourceMappingURL=index.d.ts.map
|
package/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC;AAC5B,cAAc,wBAAwB,CAAC;AACvC,cAAc,oBAAoB,CAAC;AACnC,cAAc,qBAAqB,CAAC;AACpC,cAAc,0BAA0B,CAAC;AACzC,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,UAAU,CAAC;AACzB,cAAc,0BAA0B,CAAC;AACzC,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,oBAAoB,CAAC;AACnC,cAAc,yBAAyB,CAAC;AACxC,cAAc,qBAAqB,CAAC;AACpC,cAAc,iBAAiB,CAAC;AAChC,cAAc,mBAAmB,CAAC;AAClC,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9C,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC;AAC5B,cAAc,wBAAwB,CAAC;AACvC,cAAc,oBAAoB,CAAC;AACnC,cAAc,qBAAqB,CAAC;AACpC,cAAc,0BAA0B,CAAC;AACzC,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,UAAU,CAAC;AACzB,cAAc,0BAA0B,CAAC;AACzC,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,oBAAoB,CAAC;AACnC,cAAc,yBAAyB,CAAC;AACxC,cAAc,qBAAqB,CAAC;AACpC,cAAc,iBAAiB,CAAC;AAChC,cAAc,mBAAmB,CAAC;AAClC,cAAc,wBAAwB,CAAC;AACvC,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9C,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC"}
|
package/index.js
CHANGED
|
@@ -14,5 +14,6 @@ export * from './session_pool/index.js';
|
|
|
14
14
|
export * from './storages/index.js';
|
|
15
15
|
export * from './validators.js';
|
|
16
16
|
export * from './cookie_utils.js';
|
|
17
|
+
export * from './recoverable_state.js';
|
|
17
18
|
export { PseudoUrl } from '@apify/pseudo_url';
|
|
18
19
|
//# sourceMappingURL=index.js.map
|
package/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC;AAC5B,cAAc,wBAAwB,CAAC;AACvC,cAAc,oBAAoB,CAAC;AACnC,cAAc,qBAAqB,CAAC;AACpC,cAAc,0BAA0B,CAAC;AACzC,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,UAAU,CAAC;AACzB,cAAc,0BAA0B,CAAC;AACzC,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,oBAAoB,CAAC;AACnC,cAAc,yBAAyB,CAAC;AACxC,cAAc,qBAAqB,CAAC;AACpC,cAAc,iBAAiB,CAAC;AAChC,cAAc,mBAAmB,CAAC;AAClC,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC;AAC5B,cAAc,wBAAwB,CAAC;AACvC,cAAc,oBAAoB,CAAC;AACnC,cAAc,qBAAqB,CAAC;AACpC,cAAc,0BAA0B,CAAC;AACzC,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,UAAU,CAAC;AACzB,cAAc,0BAA0B,CAAC;AACzC,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,oBAAoB,CAAC;AACnC,cAAc,yBAAyB,CAAC;AACxC,cAAc,qBAAqB,CAAC;AACpC,cAAc,iBAAiB,CAAC;AAChC,cAAc,mBAAmB,CAAC;AAClC,cAAc,wBAAwB,CAAC;AACvC,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC"}
|
package/log.d.ts
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
1
|
+
import type { LoggerOptions } from '@apify/log';
|
|
2
|
+
import log, { Log, Logger, LoggerJson, LoggerText, LogLevel } from '@apify/log';
|
|
3
|
+
export { log, Log, LogLevel, Logger, LoggerJson, LoggerText };
|
|
4
|
+
export type { LoggerOptions };
|
|
3
5
|
//# sourceMappingURL=log.d.ts.map
|
package/log.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"log.d.ts","sourceRoot":"","sources":["../src/log.ts"],"names":[],"mappings":"AAAA,OAAO,GAAG,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,
|
|
1
|
+
{"version":3,"file":"log.d.ts","sourceRoot":"","sources":["../src/log.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,GAAG,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEhF,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,CAAC;AAC9D,YAAY,EAAE,aAAa,EAAE,CAAC"}
|
package/log.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"log.js","sourceRoot":"","sources":["../src/log.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"log.js","sourceRoot":"","sources":["../src/log.ts"],"names":[],"mappings":"AACA,OAAO,GAAG,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEhF,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,CAAC"}
|