@crawlee/cheerio 4.0.0-beta.64 → 4.0.0-beta.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import type { BasicCrawlingContext, EnqueueLinksOptions, ErrorHandler, GetUserDataFromRequest, HttpCrawlerOptions, InternalHttpCrawlingContext, InternalHttpHook, RequestHandler, RequestProvider, RouterRoutes, SkippedRequestCallback } from '@crawlee/http';
1
+ import type { BasicCrawlingContext, EnqueueLinksOptions, ErrorHandler, GetUserDataFromRequest, HttpCrawlerOptions, InternalHttpCrawlingContext, InternalHttpHook, IRequestManager, RequestHandler, RouterRoutes, SkippedRequestCallback } from '@crawlee/http';
2
2
  import { HttpCrawler } from '@crawlee/http';
3
3
  import type { BatchAddRequestsResult, Dictionary } from '@crawlee/types';
4
4
  import { type CheerioRoot, type RobotsTxtFile } from '@crawlee/utils';
@@ -73,13 +73,15 @@ JSONData extends Dictionary = any> = RequestHandler<CheerioCrawlingContext<UserD
73
73
  * and then invokes the user-provided {@link CheerioCrawlerOptions.requestHandler} to extract page data
74
74
  * using a [jQuery](https://jquery.com/)-like interface to the parsed HTML DOM.
75
75
  *
76
- * The source URLs are represented using {@link Request} objects that are fed from
77
- * {@link RequestList} or {@link RequestQueue} instances provided by the {@link CheerioCrawlerOptions.requestList}
78
- * or {@link CheerioCrawlerOptions.requestQueue} constructor options, respectively.
76
+ * The source URLs are represented using {@link Request} objects that are fed from the
77
+ * {@link IRequestManager|request manager} provided via the {@link CheerioCrawlerOptions.requestManager|`requestManager`}
78
+ * constructor option (a {@link RequestQueue} is itself a request manager). To read from a read-only source such
79
+ * as a {@link RequestList} while still being able to enqueue new requests, combine it with a queue into a
80
+ * {@link RequestManagerTandem} via {@link IRequestLoader.toTandem|`requestLoader.toTandem()`} and pass the
81
+ * result as `requestManager`.
79
82
  *
80
- * If both {@link CheerioCrawlerOptions.requestList} and {@link CheerioCrawlerOptions.requestQueue} are used,
81
- * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
82
- * to {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
83
+ * > The {@link CheerioCrawlerOptions.requestList|`requestList`} and {@link CheerioCrawlerOptions.requestQueue|`requestQueue`}
84
+ * > options are deprecated; they are still accepted and folded into a single `requestManager` for back-compat.
83
85
  *
84
86
  * The crawler finishes when there are no more {@link Request} objects to crawl.
85
87
  *
@@ -154,7 +156,7 @@ export declare class CheerioCrawler<ContextExtension = Dictionary<never>, Extend
154
156
  interface EnqueueLinksInternalOptions {
155
157
  options?: EnqueueLinksOptions;
156
158
  $: cheerio.CheerioAPI | null;
157
- requestQueue: RequestProvider;
159
+ requestManager: IRequestManager;
158
160
  robotsTxtFile?: RobotsTxtFile;
159
161
  onSkippedRequest?: SkippedRequestCallback;
160
162
  originalRequestUrl: string;
@@ -1 +1 @@
1
- {"version":3,"file":"cheerio-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,oBAAoB,EACpB,mBAAmB,EACnB,YAAY,EACZ,sBAAsB,EACtB,kBAAkB,EAClB,2BAA2B,EAC3B,gBAAgB,EAChB,cAAc,EACd,eAAe,EACf,YAAY,EACZ,sBAAsB,EACzB,MAAM,eAAe,CAAC;AACvB,OAAO,EAEH,WAAW,EAId,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,sBAAsB,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACzE,OAAO,EAAE,KAAK,WAAW,EAA0B,KAAK,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC9F,OAAO,KAAK,EAAE,UAAU,EAAkB,MAAM,SAAS,CAAC;AAC1D,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,MAAM,MAAM,mBAAmB,CAC3B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,YAAY,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE7D,MAAM,WAAW,qBAAqB,CAClC,gBAAgB,GAAG,UAAU,CAAC,KAAK,CAAC,EACpC,eAAe,SAAS,sBAAsB,GAAG,sBAAsB,GAAG,gBAAgB,EAC1F,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,kBAAkB,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,EAAE,gBAAgB,EAAE,eAAe,CAAC;CAAG;AAE9G,MAAM,MAAM,WAAW,CACnB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,gBAAgB,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEjE,MAAM,WAAW,sBAAsB,CACnC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC;IACrD;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IAEb;;;OAGG;IACH,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC;IAEtB;;;;;;;;;;;OAWG;IACH,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErE;;;;;;;;;;;;;OAaG;IACH,gBAAgB,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IAE9E;;OAEG;IACH,YAAY,CAAC,OAAO,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAAC;CAChF;AAED,MAAM,MAAM,qBAAqB,CAC7B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,cAAc,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE/D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4EG;AACH,qBAAa,cAAc,CACvB,gBAAgB,GAAG,UAAU,CAAC,KAAK,CAAC,EACpC,eAAe,SAAS,sBAAsB,GAAG,sBAAsB,GAAG,gBAAgB,CAC5F,SAAQ,WAAW,CAAC,sBAAsB,EAAE,gBAAgB,EAAE,eAAe,CAAC;IAC5E;;OAEG;gBACS,OAAO,CAAC,EAAE,qBAAqB,CAAC,gBAAgB,EAAE,eAAe,CAAC;cAS3D,oBAAoB;uBA2BX,MAAM;oBAMT,UAAU;;wCAiBW,mBAAmB;oCAYvB,MAAM,eAAe,MAAM;sCAKzB,MAAM,cAAc,MAAM;;YA1DxD,YAAY;YAqCZ,UAAU;CA8B3B;AAED,UAAU,2BAA2B;IACjC,OAAO,CAAC,EAAE,mBAAmB,CAAC;IAC9B,CAAC,EAAE,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAC7B,YAAY,EAAE,eAAe,CAAC;IAC9B,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;IAC1C,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,UAAU,gCAAgC;IACtC,YAAY,EAAE,oBAAoB,CAAC,cAAc,CAAC,CAAC;IACnD,OAAO,CAAC,EAAE,mBAAmB,CAAC;IAC9B,CAAC,EAAE,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAC7B,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AASD,gBAAgB;AAChB,wBAAsB,0BAA0B,CAC5C,OAAO,EAAE,2BAA2B,GAAG,gCAAgC,oBAmC1E;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,mBAAmB,CAC/B,OAAO,SAAS,sBAAsB,GAAG,sBAAsB,EAC/D,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,kDAEzC"}
1
+ {"version":3,"file":"cheerio-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,oBAAoB,EACpB,mBAAmB,EACnB,YAAY,EACZ,sBAAsB,EACtB,kBAAkB,EAClB,2BAA2B,EAC3B,gBAAgB,EAChB,eAAe,EACf,cAAc,EACd,YAAY,EACZ,sBAAsB,EACzB,MAAM,eAAe,CAAC;AACvB,OAAO,EAEH,WAAW,EAId,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,sBAAsB,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACzE,OAAO,EAAE,KAAK,WAAW,EAA0B,KAAK,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC9F,OAAO,KAAK,EAAE,UAAU,EAAkB,MAAM,SAAS,CAAC;AAC1D,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,MAAM,MAAM,mBAAmB,CAC3B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,YAAY,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE7D,MAAM,WAAW,qBAAqB,CAClC,gBAAgB,GAAG,UAAU,CAAC,KAAK,CAAC,EACpC,eAAe,SAAS,sBAAsB,GAAG,sBAAsB,GAAG,gBAAgB,EAC1F,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,kBAAkB,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,EAAE,gBAAgB,EAAE,eAAe,CAAC;CAAG;AAE9G,MAAM,MAAM,WAAW,CACnB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,gBAAgB,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEjE,MAAM,WAAW,sBAAsB,CACnC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC;IACrD;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IAEb;;;OAGG;IACH,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC;IAEtB;;;;;;;;;;;OAWG;IACH,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErE;;;;;;;;;;;;;OAaG;IACH,gBAAgB,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IAE9E;;OAEG;IACH,YAAY,CAAC,OAAO,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAAC;CAChF;AAED,MAAM,MAAM,qBAAqB,CAC7B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,cAAc,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE/D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8EG;AACH,qBAAa,cAAc,CACvB,gBAAgB,GAAG,UAAU,CAAC,KAAK,CAAC,EACpC,eAAe,SAAS,sBAAsB,GAAG,sBAAsB,GAAG,gBAAgB,CAC5F,SAAQ,WAAW,CAAC,sBAAsB,EAAE,gBAAgB,EAAE,eAAe,CAAC;IAC5E;;OAEG;gBACS,OAAO,CAAC,EAAE,qBAAqB,CAAC,gBAAgB,EAAE,eAAe,CAAC;cAS3D,oBAAoB;uBA2BX,MAAM;oBAMT,UAAU;;wCAiBW,mBAAmB;oCAevB,MAAM,eAAe,MAAM;sCAKzB,MAAM,cAAc,MAAM;;YA7DxD,YAAY;YAqCZ,UAAU;CAiC3B;AAED,UAAU,2BAA2B;IACjC,OAAO,CAAC,EAAE,mBAAmB,CAAC;IAC9B,CAAC,EAAE,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAC7B,cAAc,EAAE,eAAe,CAAC;IAChC,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;IAC1C,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,UAAU,gCAAgC;IACtC,YAAY,EAAE,oBAAoB,CAAC,cAAc,CAAC,CAAC;IACnD,OAAO,CAAC,EAAE,mBAAmB,CAAC;IAC9B,CAAC,EAAE,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAC7B,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AASD,gBAAgB;AAChB,wBAAsB,0BAA0B,CAC5C,OAAO,EAAE,2BAA2B,GAAG,gCAAgC,oBAmC1E;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,mBAAmB,CAC/B,OAAO,SAAS,sBAAsB,GAAG,sBAAsB,EAC/D,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,kDAEzC"}
@@ -18,13 +18,15 @@ import { parseDocument } from 'htmlparser2';
18
18
  * and then invokes the user-provided {@link CheerioCrawlerOptions.requestHandler} to extract page data
19
19
  * using a [jQuery](https://jquery.com/)-like interface to the parsed HTML DOM.
20
20
  *
21
- * The source URLs are represented using {@link Request} objects that are fed from
22
- * {@link RequestList} or {@link RequestQueue} instances provided by the {@link CheerioCrawlerOptions.requestList}
23
- * or {@link CheerioCrawlerOptions.requestQueue} constructor options, respectively.
21
+ * The source URLs are represented using {@link Request} objects that are fed from the
22
+ * {@link IRequestManager|request manager} provided via the {@link CheerioCrawlerOptions.requestManager|`requestManager`}
23
+ * constructor option (a {@link RequestQueue} is itself a request manager). To read from a read-only source such
24
+ * as a {@link RequestList} while still being able to enqueue new requests, combine it with a queue into a
25
+ * {@link RequestManagerTandem} via {@link IRequestLoader.toTandem|`requestLoader.toTandem()`} and pass the
26
+ * result as `requestManager`.
24
27
  *
25
- * If both {@link CheerioCrawlerOptions.requestList} and {@link CheerioCrawlerOptions.requestQueue} are used,
26
- * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
27
- * to {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
28
+ * > The {@link CheerioCrawlerOptions.requestList|`requestList`} and {@link CheerioCrawlerOptions.requestQueue|`requestQueue`}
29
+ * > options are deprecated; they are still accepted and folded into a single `requestManager` for back-compat.
28
30
  *
29
31
  * The crawler finishes when there are no more {@link Request} objects to crawl.
30
32
  *
@@ -132,9 +134,12 @@ export class CheerioCrawler extends HttpCrawler {
132
134
  return {
133
135
  enqueueLinks: async (enqueueOptions) => {
134
136
  return (await cheerioCrawlerEnqueueLinks({
135
- options: { ...enqueueOptions, limit: this.calculateEnqueuedRequestLimit(enqueueOptions?.limit) },
137
+ options: {
138
+ ...enqueueOptions,
139
+ limit: await this.calculateEnqueuedRequestLimit(enqueueOptions?.limit),
140
+ },
136
141
  $: crawlingContext.$,
137
- requestQueue: await this.getRequestQueue(),
142
+ requestManager: await this.getRequestManager(),
138
143
  robotsTxtFile: await this.getRobotsTxtFileForUrl(crawlingContext.request.url),
139
144
  onSkippedRequest: this.handleSkippedRequest,
140
145
  originalRequestUrl: crawlingContext.request.url,
@@ -181,7 +186,7 @@ export async function cheerioCrawlerEnqueueLinks(options) {
181
186
  });
182
187
  }
183
188
  return enqueueLinks({
184
- requestQueue: options.requestQueue,
189
+ requestManager: options.requestManager,
185
190
  robotsTxtFile: options.robotsTxtFile,
186
191
  onSkippedRequest: options.onSkippedRequest,
187
192
  urls,
@@ -1 +1 @@
1
- {"version":3,"file":"cheerio-crawler.js","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"AAaA,OAAO,EACH,YAAY,EACZ,WAAW,EACX,sBAAsB,EACtB,sCAAsC,EACtC,MAAM,GACT,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAoB,sBAAsB,EAAsB,MAAM,gBAAgB,CAAC;AAE9F,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AA2E5C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4EG;AACH,MAAM,OAAO,cAGX,SAAQ,WAAsE;IAC5E;;OAEG;IACH,YAAY,OAAkE;QAC1E,MAAM,EAAE,sBAAsB,EAAE,GAAG,IAAI,EAAE,GAAG,OAAO,IAAI,EAAE,CAAC;QAE1D,KAAK,CAAC;YACF,GAAG,IAAI;YACP,sBAAsB,EAAE,sBAAsB,IAAI,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,oBAAoB,EAAE,CAAC;SACxF,CAAC,CAAC;IACP,CAAC;IAEkB,oBAAoB;QACnC,OAAO,KAAK;aACP,oBAAoB,EAAE;aACtB,OAAO,CAAC;YACL,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC,MAAM,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC;SAC9D,CAAC;aACD,OAAO,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC,MAAM,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAChF,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,eAA4C;QACnE,IAAI,CAAC;YACD,MAAM,KAAK,GAAG,eAAe,CAAC,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YAC/D,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,eAAe,CAAC,IAAI,CAAC;gBAC9C,CAAC,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC,eAAe,CAAC,WAAW,CAAC,QAAQ,CAAC;gBACrE,CAAC,CAAC,eAAe,CAAC,IAAI,CAAC;YAC3B,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;YAC1E,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE;gBACxB,GAAG,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE;aAC9B,CAAC,CAAC;YAErB,OAAO;gBACH,CAAC;gBACD,IAAI;aACP,CAAC;QACN,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,IAAI,GAAG,YAAY,sBAAsB,EAAE,CAAC;gBACxC,OAAO;oBACH,IAAI,IAAI;wBACJ,MAAM,IAAI,sBAAsB,CAC5B,kEAAkE,EAClE,EAAE,KAAK,EAAE,GAAG,EAAE,CACjB,CAAC;oBACN,CAAC;oBACD,IAAI,CAAC;wBACD,MAAM,IAAI,sBAAsB,CAC5B,+DAA+D,EAC/D,EAAE,KAAK,EAAE,GAAG,EAAE,CACjB,CAAC;oBACN,CAAC;iBACJ,CAAC;YACN,CAAC;YAED,MAAM,GAAG,CAAC;QACd,CAAC;IACL,CAAC;IAEO,KAAK,CAAC,UAAU,CAAC,eAAgE;QACrF,MAAM,oBAAoB,GAAG,eAAe,CAAC,YAAY,CAAC;QAE1D,OAAO;YACH,YAAY,EAAE,KAAK,EAAE,cAAoC,EAAE,EAAE;gBACzD,OAAO,CAAC,MAAM,0BAA0B,CAAC;oBACrC,OAAO,EAAE,EAAE,GAAG,cAAc,EAAE,KAAK,EAAE,IAAI,CAAC,6BAA6B,CAAC,cAAc,EAAE,KAAK,CAAC,EAAE;oBAChG,CAAC,EAAE,eAAe,CAAC,CAAC;oBACpB,YAAY,EAAE,MAAM,IAAI,CAAC,eAAe,EAAE;oBAC1C,aAAa,EAAE,MAAM,IAAI,CAAC,sBAAsB,CAAC,eAAe,CAAC,OAAO,CAAC,GAAG,CAAC;oBAC7E,gBAAgB,EAAE,IAAI,CAAC,oBAAoB;oBAC3C,kBAAkB,EAAE,eAAe,CAAC,OAAO,CAAC,GAAG;oBAC/C,eAAe,EAAE,eAAe,CAAC,OAAO,CAAC,SAAS;oBAClD,YAAY,EAAE,oBAAoB;iBACrC,CAAC,CAA2B,CAAC,CAAC,2BAA2B;YAC9D,CAAC;YACD,eAAe,EAAE,KAAK,EAAE,QAAgB,EAAE,UAAmB,EAAE,EAAE;gBAC7D,IAAI,eAAe,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBACjD,MAAM,IAAI,KAAK,CAAC,aAAa,QAAQ,cAAc,CAAC,CAAC;gBACzD,CAAC;YACL,CAAC;YACD,gBAAgB,EAAE,KAAK,EAAE,QAAiB,EAAE,SAAkB,EAAE,EAAE;gBAC9D,IAAI,QAAQ,EAAE,CAAC;oBACX,MAAM,eAAe,CAAC,eAAe,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;gBAC/D,CAAC;gBAED,OAAO,eAAe,CAAC,CAAC,CAAC;YAC7B,CAAC;SACJ,CAAC;IACN,CAAC;CACJ;AAoBD,gBAAgB;AAChB,SAAS,oBAAoB,CACzB,OAAuE;IAEvE,OAAO,CAAC,CAAE,OAA4C,CAAC,YAAY,CAAC;AACxE,CAAC;AAED,gBAAgB;AAChB,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAC5C,OAAuE;IAEvE,MAAM,EAAE,OAAO,EAAE,mBAAmB,EAAE,CAAC,EAAE,kBAAkB,EAAE,eAAe,EAAE,GAAG,OAAO,CAAC;IACzF,IAAI,CAAC,CAAC,EAAE,CAAC;QACL,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,OAAO,GAAG,sCAAsC,CAAC;QACnD,eAAe,EAAE,mBAAmB,EAAE,QAAQ;QAC9C,eAAe;QACf,kBAAkB;QAClB,mBAAmB,EAAE,mBAAmB,EAAE,OAAO;KACpD,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,sBAAsB,CAC/B,CAAC,EACD,mBAAmB,EAAE,QAAQ,IAAI,GAAG,EACpC,mBAAmB,EAAE,OAAO,IAAI,eAAe,IAAI,kBAAkB,CACxE,CAAC;IAEF,IAAI,oBAAoB,CAAC,OAAO,CAAC,EAAE,CAAC;QAChC,OAAO,OAAO,CAAC,YAAY,CAAC;YACxB,IAAI;YACJ,OAAO;YACP,GAAG,mBAAmB;SACzB,CAAC,CAAC;IACP,CAAC;IACD,OAAO,YAAY,CAAC;QAChB,YAAY,EAAE,OAAO,CAAC,YAAY;QAClC,aAAa,EAAE,OAAO,CAAC,aAAa;QACpC,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;QAC1C,IAAI;QACJ,OAAO;QACP,GAAG,mBAAmB;KACzB,CAAC,CAAC;AACP,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,UAAU,mBAAmB,CAGjC,MAAwC;IACtC,OAAO,MAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
1
+ {"version":3,"file":"cheerio-crawler.js","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"AAaA,OAAO,EACH,YAAY,EACZ,WAAW,EACX,sBAAsB,EACtB,sCAAsC,EACtC,MAAM,GACT,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAoB,sBAAsB,EAAsB,MAAM,gBAAgB,CAAC;AAE9F,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AA2E5C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8EG;AACH,MAAM,OAAO,cAGX,SAAQ,WAAsE;IAC5E;;OAEG;IACH,YAAY,OAAkE;QAC1E,MAAM,EAAE,sBAAsB,EAAE,GAAG,IAAI,EAAE,GAAG,OAAO,IAAI,EAAE,CAAC;QAE1D,KAAK,CAAC;YACF,GAAG,IAAI;YACP,sBAAsB,EAAE,sBAAsB,IAAI,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,oBAAoB,EAAE,CAAC;SACxF,CAAC,CAAC;IACP,CAAC;IAEkB,oBAAoB;QACnC,OAAO,KAAK;aACP,oBAAoB,EAAE;aACtB,OAAO,CAAC;YACL,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC,MAAM,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC;SAC9D,CAAC;aACD,OAAO,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC,MAAM,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAChF,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,eAA4C;QACnE,IAAI,CAAC;YACD,MAAM,KAAK,GAAG,eAAe,CAAC,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YAC/D,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,eAAe,CAAC,IAAI,CAAC;gBAC9C,CAAC,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC,eAAe,CAAC,WAAW,CAAC,QAAQ,CAAC;gBACrE,CAAC,CAAC,eAAe,CAAC,IAAI,CAAC;YAC3B,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;YAC1E,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE;gBACxB,GAAG,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE;aAC9B,CAAC,CAAC;YAErB,OAAO;gBACH,CAAC;gBACD,IAAI;aACP,CAAC;QACN,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,IAAI,GAAG,YAAY,sBAAsB,EAAE,CAAC;gBACxC,OAAO;oBACH,IAAI,IAAI;wBACJ,MAAM,IAAI,sBAAsB,CAC5B,kEAAkE,EAClE,EAAE,KAAK,EAAE,GAAG,EAAE,CACjB,CAAC;oBACN,CAAC;oBACD,IAAI,CAAC;wBACD,MAAM,IAAI,sBAAsB,CAC5B,+DAA+D,EAC/D,EAAE,KAAK,EAAE,GAAG,EAAE,CACjB,CAAC;oBACN,CAAC;iBACJ,CAAC;YACN,CAAC;YAED,MAAM,GAAG,CAAC;QACd,CAAC;IACL,CAAC;IAEO,KAAK,CAAC,UAAU,CAAC,eAAgE;QACrF,MAAM,oBAAoB,GAAG,eAAe,CAAC,YAAY,CAAC;QAE1D,OAAO;YACH,YAAY,EAAE,KAAK,EAAE,cAAoC,EAAE,EAAE;gBACzD,OAAO,CAAC,MAAM,0BAA0B,CAAC;oBACrC,OAAO,EAAE;wBACL,GAAG,cAAc;wBACjB,KAAK,EAAE,MAAM,IAAI,CAAC,6BAA6B,CAAC,cAAc,EAAE,KAAK,CAAC;qBACzE;oBACD,CAAC,EAAE,eAAe,CAAC,CAAC;oBACpB,cAAc,EAAE,MAAM,IAAI,CAAC,iBAAiB,EAAE;oBAC9C,aAAa,EAAE,MAAM,IAAI,CAAC,sBAAsB,CAAC,eAAe,CAAC,OAAO,CAAC,GAAG,CAAC;oBAC7E,gBAAgB,EAAE,IAAI,CAAC,oBAAoB;oBAC3C,kBAAkB,EAAE,eAAe,CAAC,OAAO,CAAC,GAAG;oBAC/C,eAAe,EAAE,eAAe,CAAC,OAAO,CAAC,SAAS;oBAClD,YAAY,EAAE,oBAAoB;iBACrC,CAAC,CAA2B,CAAC,CAAC,2BAA2B;YAC9D,CAAC;YACD,eAAe,EAAE,KAAK,EAAE,QAAgB,EAAE,UAAmB,EAAE,EAAE;gBAC7D,IAAI,eAAe,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBACjD,MAAM,IAAI,KAAK,CAAC,aAAa,QAAQ,cAAc,CAAC,CAAC;gBACzD,CAAC;YACL,CAAC;YACD,gBAAgB,EAAE,KAAK,EAAE,QAAiB,EAAE,SAAkB,EAAE,EAAE;gBAC9D,IAAI,QAAQ,EAAE,CAAC;oBACX,MAAM,eAAe,CAAC,eAAe,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;gBAC/D,CAAC;gBAED,OAAO,eAAe,CAAC,CAAC,CAAC;YAC7B,CAAC;SACJ,CAAC;IACN,CAAC;CACJ;AAoBD,gBAAgB;AAChB,SAAS,oBAAoB,CACzB,OAAuE;IAEvE,OAAO,CAAC,CAAE,OAA4C,CAAC,YAAY,CAAC;AACxE,CAAC;AAED,gBAAgB;AAChB,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAC5C,OAAuE;IAEvE,MAAM,EAAE,OAAO,EAAE,mBAAmB,EAAE,CAAC,EAAE,kBAAkB,EAAE,eAAe,EAAE,GAAG,OAAO,CAAC;IACzF,IAAI,CAAC,CAAC,EAAE,CAAC;QACL,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,OAAO,GAAG,sCAAsC,CAAC;QACnD,eAAe,EAAE,mBAAmB,EAAE,QAAQ;QAC9C,eAAe;QACf,kBAAkB;QAClB,mBAAmB,EAAE,mBAAmB,EAAE,OAAO;KACpD,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,sBAAsB,CAC/B,CAAC,EACD,mBAAmB,EAAE,QAAQ,IAAI,GAAG,EACpC,mBAAmB,EAAE,OAAO,IAAI,eAAe,IAAI,kBAAkB,CACxE,CAAC;IAEF,IAAI,oBAAoB,CAAC,OAAO,CAAC,EAAE,CAAC;QAChC,OAAO,OAAO,CAAC,YAAY,CAAC;YACxB,IAAI;YACJ,OAAO;YACP,GAAG,mBAAmB;SACzB,CAAC,CAAC;IACP,CAAC;IACD,OAAO,YAAY,CAAC;QAChB,cAAc,EAAE,OAAO,CAAC,cAAc;QACtC,aAAa,EAAE,OAAO,CAAC,aAAa;QACpC,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;QAC1C,IAAI;QACJ,OAAO;QACP,GAAG,mBAAmB;KACzB,CAAC,CAAC;AACP,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,UAAU,mBAAmB,CAGjC,MAAwC;IACtC,OAAO,MAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@crawlee/cheerio",
3
- "version": "4.0.0-beta.64",
3
+ "version": "4.0.0-beta.66",
4
4
  "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
5
5
  "engines": {
6
6
  "node": ">=22.0.0"
@@ -47,9 +47,9 @@
47
47
  "access": "public"
48
48
  },
49
49
  "dependencies": {
50
- "@crawlee/http": "4.0.0-beta.64",
51
- "@crawlee/types": "4.0.0-beta.64",
52
- "@crawlee/utils": "4.0.0-beta.64",
50
+ "@crawlee/http": "4.0.0-beta.66",
51
+ "@crawlee/types": "4.0.0-beta.66",
52
+ "@crawlee/utils": "4.0.0-beta.66",
53
53
  "cheerio": "^1.0.0",
54
54
  "htmlparser2": "^10.0.0",
55
55
  "tslib": "^2.8.1"
@@ -61,5 +61,5 @@
61
61
  }
62
62
  }
63
63
  },
64
- "gitHead": "76ba0fe1101b1e14fb5e1962540107775ad4e513"
64
+ "gitHead": "0890633b51caf16d93940be6f7da7881ad839992"
65
65
  }