@crawlee/cheerio 4.0.0-beta.10 → 4.0.0-beta.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,21 @@
1
- import type { IncomingMessage } from 'node:http';
2
1
  import type { Configuration, EnqueueLinksOptions, ErrorHandler, GetUserDataFromRequest, HttpCrawlerOptions, InternalHttpCrawlingContext, InternalHttpHook, RequestHandler, RequestProvider, RouterRoutes, SkippedRequestCallback } from '@crawlee/http';
3
2
  import { HttpCrawler } from '@crawlee/http';
4
- import type { Dictionary } from '@crawlee/types';
3
+ import type { BatchAddRequestsResult, Dictionary } from '@crawlee/types';
5
4
  import { type CheerioRoot, type RobotsTxtFile } from '@crawlee/utils';
6
5
  import * as cheerio from 'cheerio';
7
6
  export type CheerioErrorHandler<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
8
7
  JSONData extends Dictionary = any> = ErrorHandler<CheerioCrawlingContext<UserData, JSONData>>;
9
- export interface CheerioCrawlerOptions<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
10
- JSONData extends Dictionary = any> extends HttpCrawlerOptions<CheerioCrawlingContext<UserData, JSONData>> {
8
+ export interface CheerioCrawlerOptions<ExtendedContext extends CheerioCrawlingContext, UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
9
+ JSONData extends Dictionary = any> extends HttpCrawlerOptions<CheerioCrawlingContext<UserData, JSONData>, ExtendedContext> {
11
10
  }
12
11
  export type CheerioHook<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
13
12
  JSONData extends Dictionary = any> = InternalHttpHook<CheerioCrawlingContext<UserData, JSONData>>;
14
13
  export interface CheerioCrawlingContext<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
15
- JSONData extends Dictionary = any> extends InternalHttpCrawlingContext<UserData, JSONData, CheerioCrawler> {
14
+ JSONData extends Dictionary = any> extends InternalHttpCrawlingContext<UserData, JSONData> {
15
+ /**
16
+ * The raw HTML content of the web page as a string.
17
+ */
18
+ body: string;
16
19
  /**
17
20
  * The [Cheerio](https://cheerio.js.org/) object with parsed HTML.
18
21
  * Cheerio is available only for HTML and XML content types.
@@ -46,6 +49,10 @@ JSONData extends Dictionary = any> extends InternalHttpCrawlingContext<UserData,
46
49
  * ```
47
50
  */
48
51
  parseWithCheerio(selector?: string, timeoutMs?: number): Promise<CheerioRoot>;
52
+ /**
53
+ * Helper function for extracting URLs from the parsed HTML and adding them to the request queue.
54
+ */
55
+ enqueueLinks(options?: EnqueueLinksOptions): Promise<BatchAddRequestsResult>;
49
56
  }
50
57
  export type CheerioRequestHandler<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
51
58
  JSONData extends Dictionary = any> = RequestHandler<CheerioCrawlingContext<UserData, JSONData>>;
@@ -126,20 +133,13 @@ JSONData extends Dictionary = any> = RequestHandler<CheerioCrawlingContext<UserD
126
133
  * ```
127
134
  * @category Crawlers
128
135
  */
129
- export declare class CheerioCrawler extends HttpCrawler<CheerioCrawlingContext> {
136
+ export declare class CheerioCrawler<ExtendedContext extends CheerioCrawlingContext = CheerioCrawlingContext> extends HttpCrawler<CheerioCrawlingContext, ExtendedContext> {
130
137
  /**
131
138
  * All `CheerioCrawler` parameters are passed via an options object.
132
139
  */
133
- constructor(options?: CheerioCrawlerOptions, config?: Configuration);
134
- protected _parseHTML(response: IncomingMessage, isXml: boolean, crawlingContext: CheerioCrawlingContext): Promise<{
135
- // @ts-ignore optional peer dependency or compatibility with es2022
136
- dom: import("domhandler").Document;
137
- $: cheerio.CheerioAPI;
138
- body: string;
139
- // @ts-ignore optional peer dependency or compatibility with es2022
140
- enqueueLinks: (enqueueOptions?: EnqueueLinksOptions) => Promise<import("@crawlee/types").BatchAddRequestsResult>;
141
- }>;
142
- protected _runRequestHandler(context: CheerioCrawlingContext): Promise<void>;
140
+ constructor(options?: CheerioCrawlerOptions<ExtendedContext>, config?: Configuration);
141
+ private parseContent;
142
+ private addHelpers;
143
143
  }
144
144
  interface EnqueueLinksInternalOptions {
145
145
  options?: EnqueueLinksOptions;
@@ -151,8 +151,7 @@ interface EnqueueLinksInternalOptions {
151
151
  finalRequestUrl?: string;
152
152
  }
153
153
  /** @internal */
154
- // @ts-ignore optional peer dependency or compatibility with es2022
155
- export declare function cheerioCrawlerEnqueueLinks({ options, $, requestQueue, robotsTxtFile, onSkippedRequest, originalRequestUrl, finalRequestUrl, }: EnqueueLinksInternalOptions): Promise<import("@crawlee/types").BatchAddRequestsResult>;
154
+ export declare function cheerioCrawlerEnqueueLinks({ options, $, requestQueue, robotsTxtFile, onSkippedRequest, originalRequestUrl, finalRequestUrl, }: EnqueueLinksInternalOptions): Promise<BatchAddRequestsResult>;
156
155
  /**
157
156
  * Creates new {@link Router} instance that works based on request labels.
158
157
  * This instance can then serve as a `requestHandler` of your {@link CheerioCrawler}.
@@ -1 +1 @@
1
- {"version":3,"file":"cheerio-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAGjD,OAAO,KAAK,EACR,aAAa,EACb,mBAAmB,EACnB,YAAY,EACZ,sBAAsB,EACtB,kBAAkB,EAClB,2BAA2B,EAC3B,gBAAgB,EAChB,cAAc,EACd,eAAe,EACf,YAAY,EACZ,sBAAsB,EACzB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAgB,WAAW,EAAkD,MAAM,eAAe,CAAC;AAC1G,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,KAAK,WAAW,EAA0B,KAAK,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAE9F,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,MAAM,MAAM,mBAAmB,CAC3B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,YAAY,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE7D,MAAM,WAAW,qBAAqB,CAClC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,kBAAkB,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;CAAG;AAE3E,MAAM,MAAM,WAAW,CACnB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,gBAAgB,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEjE,MAAM,WAAW,sBAAsB,CACnC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,EAAE,cAAc,CAAC;IACrE;;;OAGG;IACH,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC;IAEtB;;;;;;;;;;;OAWG;IACH,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErE;;;;;;;;;;;;;OAaG;IACH,gBAAgB,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;CACjF;AAED,MAAM,MAAM,qBAAqB,CAC7B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,cAAc,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE/D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4EG;AACH,qBAAa,cAAe,SAAQ,WAAW,CAAC,sBAAsB,CAAC;IACnE;;OAEG;gBAES,OAAO,CAAC,EAAE,qBAAqB,EAAE,MAAM,CAAC,EAAE,aAAa;cAI1C,UAAU,CAC/B,QAAQ,EAAE,eAAe,EACzB,KAAK,EAAE,OAAO,EACd,eAAe,EAAE,sBAAsB;;;;wCAYG,mBAAmB;;cAcxC,kBAAkB,CAAC,OAAO,EAAE,sBAAsB;CAgB9E;AAED,UAAU,2BAA2B;IACjC,OAAO,CAAC,EAAE,mBAAmB,CAAC;IAC9B,CAAC,EAAE,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAC7B,YAAY,EAAE,eAAe,CAAC;IAC9B,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;IAC1C,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,gBAAgB;AAChB,wBAAsB,0BAA0B,CAAC,EAC7C,OAAO,EACP,CAAC,EACD,YAAY,EACZ,aAAa,EACb,gBAAgB,EAChB,kBAAkB,EAClB,eAAe,GAClB,EAAE,2BAA2B,4DA0B7B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,mBAAmB,CAC/B,OAAO,SAAS,sBAAsB,GAAG,sBAAsB,EAC/D,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,kDAEzC"}
1
+ {"version":3,"file":"cheerio-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,aAAa,EACb,mBAAmB,EACnB,YAAY,EACZ,sBAAsB,EACtB,kBAAkB,EAClB,2BAA2B,EAC3B,gBAAgB,EAChB,cAAc,EACd,eAAe,EACf,YAAY,EACZ,sBAAsB,EACzB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAgB,WAAW,EAAkD,MAAM,eAAe,CAAC;AAC1G,OAAO,KAAK,EAAE,sBAAsB,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACzE,OAAO,EAAE,KAAK,WAAW,EAA0B,KAAK,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAE9F,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,MAAM,MAAM,mBAAmB,CAC3B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,YAAY,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE7D,MAAM,WAAW,qBAAqB,CAClC,eAAe,SAAS,sBAAsB,EAC9C,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,kBAAkB,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,EAAE,eAAe,CAAC;CAAG;AAE5F,MAAM,MAAM,WAAW,CACnB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,gBAAgB,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEjE,MAAM,WAAW,sBAAsB,CACnC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC;IACrD;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IAEb;;;OAGG;IACH,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC;IAEtB;;;;;;;;;;;OAWG;IACH,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErE;;;;;;;;;;;;;OAaG;IACH,gBAAgB,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IAE9E;;OAEG;IACH,YAAY,CAAC,OAAO,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAAC;CAChF;AAED,MAAM,MAAM,qBAAqB,CAC7B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,cAAc,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE/D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4EG;AACH,qBAAa,cAAc,CACvB,eAAe,SAAS,sBAAsB,GAAG,sBAAsB,CACzE,SAAQ,WAAW,CAAC,sBAAsB,EAAE,eAAe,CAAC;IAC1D;;OAEG;gBACS,OAAO,CAAC,EAAE,qBAAqB,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC,EAAE,aAAa;YAetE,YAAY;YAgBZ,UAAU;CA2B3B;AAED,UAAU,2BAA2B;IACjC,OAAO,CAAC,EAAE,mBAAmB,CAAC;IAC9B,CAAC,EAAE,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAC7B,YAAY,EAAE,eAAe,CAAC;IAC9B,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;IAC1C,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,gBAAgB;AAChB,wBAAsB,0BAA0B,CAAC,EAC7C,OAAO,EACP,CAAC,EACD,YAAY,EACZ,aAAa,EACb,gBAAgB,EAChB,kBAAkB,EAClB,eAAe,GAClB,EAAE,2BAA2B,mCA0B7B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,mBAAmB,CAC/B,OAAO,SAAS,sBAAsB,GAAG,sBAAsB,EAC/D,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,kDAEzC"}
@@ -1,4 +1,3 @@
1
- import { text as readStreamToString } from 'node:stream/consumers';
2
1
  import { enqueueLinks, HttpCrawler, resolveBaseUrlForEnqueueLinksFiltering, Router } from '@crawlee/http';
3
2
  import { extractUrlsFromCheerio } from '@crawlee/utils';
4
3
  import * as cheerio from 'cheerio';
@@ -84,24 +83,36 @@ export class CheerioCrawler extends HttpCrawler {
84
83
  /**
85
84
  * All `CheerioCrawler` parameters are passed via an options object.
86
85
  */
87
- // eslint-disable-next-line @typescript-eslint/no-useless-constructor
88
86
  constructor(options, config) {
89
- super(options, config);
87
+ super({
88
+ ...options,
89
+ contextPipelineBuilder: () => this.buildContextPipeline()
90
+ .compose({
91
+ action: async (context) => await this.parseContent(context),
92
+ })
93
+ .compose({ action: async (context) => await this.addHelpers(context) }),
94
+ }, config);
90
95
  }
91
- async _parseHTML(response, isXml, crawlingContext) {
92
- const body = await readStreamToString(response);
96
+ async parseContent(crawlingContext) {
97
+ const isXml = crawlingContext.contentType.type.includes('xml');
98
+ const body = Buffer.isBuffer(crawlingContext.body)
99
+ ? crawlingContext.body.toString(crawlingContext.contentType.encoding)
100
+ : crawlingContext.body;
93
101
  const dom = parseDocument(body, { decodeEntities: true, xmlMode: isXml });
94
102
  const $ = cheerio.load(dom, {
95
103
  xml: { decodeEntities: true, xmlMode: isXml },
96
104
  });
97
105
  return {
98
- dom,
99
106
  $,
100
107
  body,
108
+ };
109
+ }
110
+ async addHelpers(crawlingContext) {
111
+ return {
101
112
  enqueueLinks: async (enqueueOptions) => {
102
113
  return cheerioCrawlerEnqueueLinks({
103
114
  options: enqueueOptions,
104
- $,
115
+ $: crawlingContext.$,
105
116
  requestQueue: await this.getRequestQueue(),
106
117
  robotsTxtFile: await this.getRobotsTxtFileForUrl(crawlingContext.request.url),
107
118
  onSkippedRequest: this.onSkippedRequest,
@@ -109,22 +120,19 @@ export class CheerioCrawler extends HttpCrawler {
109
120
  finalRequestUrl: crawlingContext.request.loadedUrl,
110
121
  });
111
122
  },
123
+ waitForSelector: async (selector, _timeoutMs) => {
124
+ if (crawlingContext.$(selector).get().length === 0) {
125
+ throw new Error(`Selector '${selector}' not found.`);
126
+ }
127
+ },
128
+ parseWithCheerio: async (selector, timeoutMs) => {
129
+ if (selector) {
130
+ await crawlingContext.waitForSelector(selector, timeoutMs);
131
+ }
132
+ return crawlingContext.$;
133
+ },
112
134
  };
113
135
  }
114
- async _runRequestHandler(context) {
115
- context.waitForSelector = async (selector, _timeoutMs) => {
116
- if (context.$(selector).get().length === 0) {
117
- throw new Error(`Selector '${selector}' not found.`);
118
- }
119
- };
120
- context.parseWithCheerio = async (selector, timeoutMs) => {
121
- if (selector) {
122
- await context.waitForSelector(selector, timeoutMs);
123
- }
124
- return context.$;
125
- };
126
- await super._runRequestHandler(context);
127
- }
128
136
  }
129
137
  /** @internal */
130
138
  export async function cheerioCrawlerEnqueueLinks({ options, $, requestQueue, robotsTxtFile, onSkippedRequest, originalRequestUrl, finalRequestUrl, }) {
@@ -1 +1 @@
1
- {"version":3,"file":"cheerio-crawler.js","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,IAAI,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAenE,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,sCAAsC,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAE1G,OAAO,EAAoB,sBAAsB,EAAsB,MAAM,gBAAgB,CAAC;AAE9F,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AA+D5C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4EG;AACH,MAAM,OAAO,cAAe,SAAQ,WAAmC;IACnE;;OAEG;IACH,qEAAqE;IACrE,YAAY,OAA+B,EAAE,MAAsB;QAC/D,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAC3B,CAAC;IAEkB,KAAK,CAAC,UAAU,CAC/B,QAAyB,EACzB,KAAc,EACd,eAAuC;QAEvC,MAAM,IAAI,GAAG,MAAM,kBAAkB,CAAC,QAAQ,CAAC,CAAC;QAChD,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QAC1E,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE;YACxB,GAAG,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE;SAC9B,CAAC,CAAC;QAErB,OAAO;YACH,GAAG;YACH,CAAC;YACD,IAAI;YACJ,YAAY,EAAE,KAAK,EAAE,cAAoC,EAAE,EAAE;gBACzD,OAAO,0BAA0B,CAAC;oBAC9B,OAAO,EAAE,cAAc;oBACvB,CAAC;oBACD,YAAY,EAAE,MAAM,IAAI,CAAC,eAAe,EAAE;oBAC1C,aAAa,EAAE,MAAM,IAAI,CAAC,sBAAsB,CAAC,eAAe,CAAC,OAAO,CAAC,GAAG,CAAC;oBAC7E,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;oBACvC,kBAAkB,EAAE,eAAe,CAAC,OAAO,CAAC,GAAG;oBAC/C,eAAe,EAAE,eAAe,CAAC,OAAO,CAAC,SAAS;iBACrD,CAAC,CAAC;YACP,CAAC;SACJ,CAAC;IACN,CAAC;IAEkB,KAAK,CAAC,kBAAkB,CAAC,OAA+B;QACvE,OAAO,CAAC,eAAe,GAAG,KAAK,EAAE,QAAiB,EAAE,UAAmB,EAAE,EAAE;YACvE,IAAI,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACzC,MAAM,IAAI,KAAK,CAAC,aAAa,QAAQ,cAAc,CAAC,CAAC;YACzD,CAAC;QACL,CAAC,CAAC;QACF,OAAO,CAAC,gBAAgB,GAAG,KAAK,EAAE,QAAiB,EAAE,SAAkB,EAAE,EAAE;YACvE,IAAI,QAAQ,EAAE,CAAC;gBACX,MAAM,OAAO,CAAC,eAAe,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YACvD,CAAC;YAED,OAAO,OAAO,CAAC,CAAC,CAAC;QACrB,CAAC,CAAC;QAEF,MAAM,KAAK,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAC5C,CAAC;CACJ;AAYD,gBAAgB;AAChB,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAAC,EAC7C,OAAO,EACP,CAAC,EACD,YAAY,EACZ,aAAa,EACb,gBAAgB,EAChB,kBAAkB,EAClB,eAAe,GACW;IAC1B,IAAI,CAAC,CAAC,EAAE,CAAC;QACL,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,OAAO,GAAG,sCAAsC,CAAC;QACnD,eAAe,EAAE,OAAO,EAAE,QAAQ;QAClC,eAAe;QACf,kBAAkB;QAClB,mBAAmB,EAAE,OAAO,EAAE,OAAO;KACxC,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,sBAAsB,CAC/B,CAAC,EACD,OAAO,EAAE,QAAQ,IAAI,GAAG,EACxB,OAAO,EAAE,OAAO,IAAI,eAAe,IAAI,kBAAkB,CAC5D,CAAC;IAEF,OAAO,YAAY,CAAC;QAChB,YAAY;QACZ,aAAa;QACb,gBAAgB;QAChB,IAAI;QACJ,OAAO;QACP,GAAG,OAAO;KACb,CAAC,CAAC;AACP,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,UAAU,mBAAmB,CAGjC,MAAwC;IACtC,OAAO,MAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
1
+ {"version":3,"file":"cheerio-crawler.js","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"AAaA,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,sCAAsC,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAE1G,OAAO,EAAoB,sBAAsB,EAAsB,MAAM,gBAAgB,CAAC;AAE9F,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AA0E5C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4EG;AACH,MAAM,OAAO,cAEX,SAAQ,WAAoD;IAC1D;;OAEG;IACH,YAAY,OAAgD,EAAE,MAAsB;QAChF,KAAK,CACD;YACI,GAAG,OAAO;YACV,sBAAsB,EAAE,GAAG,EAAE,CACzB,IAAI,CAAC,oBAAoB,EAAE;iBACtB,OAAO,CAAC;gBACL,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC,MAAM,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC;aAC9D,CAAC;iBACD,OAAO,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC,MAAM,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;SAClF,EACD,MAAM,CACT,CAAC;IACN,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,eAA4C;QACnE,MAAM,KAAK,GAAG,eAAe,CAAC,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;QAC/D,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,eAAe,CAAC,IAAI,CAAC;YAC9C,CAAC,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC,eAAe,CAAC,WAAW,CAAC,QAAQ,CAAC;YACrE,CAAC,CAAC,eAAe,CAAC,IAAI,CAAC;QAC3B,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QAC1E,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE;YACxB,GAAG,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE;SAC9B,CAAC,CAAC;QAErB,OAAO;YACH,CAAC;YACD,IAAI;SACP,CAAC;IACN,CAAC;IAEO,KAAK,CAAC,UAAU,CAAC,eAAgE;QACrF,OAAO;YACH,YAAY,EAAE,KAAK,EAAE,cAAoC,EAAE,EAAE;gBACzD,OAAO,0BAA0B,CAAC;oBAC9B,OAAO,EAAE,cAAc;oBACvB,CAAC,EAAE,eAAe,CAAC,CAAC;oBACpB,YAAY,EAAE,MAAM,IAAI,CAAC,eAAe,EAAE;oBAC1C,aAAa,EAAE,MAAM,IAAI,CAAC,sBAAsB,CAAC,eAAe,CAAC,OAAO,CAAC,GAAG,CAAC;oBAC7E,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;oBACvC,kBAAkB,EAAE,eAAe,CAAC,OAAO,CAAC,GAAG;oBAC/C,eAAe,EAAE,eAAe,CAAC,OAAO,CAAC,SAAS;iBACrD,CAAC,CAAC;YACP,CAAC;YACD,eAAe,EAAE,KAAK,EAAE,QAAgB,EAAE,UAAmB,EAAE,EAAE;gBAC7D,IAAI,eAAe,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBACjD,MAAM,IAAI,KAAK,CAAC,aAAa,QAAQ,cAAc,CAAC,CAAC;gBACzD,CAAC;YACL,CAAC;YACD,gBAAgB,EAAE,KAAK,EAAE,QAAiB,EAAE,SAAkB,EAAE,EAAE;gBAC9D,IAAI,QAAQ,EAAE,CAAC;oBACX,MAAM,eAAe,CAAC,eAAe,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;gBAC/D,CAAC;gBAED,OAAO,eAAe,CAAC,CAAC,CAAC;YAC7B,CAAC;SACJ,CAAC;IACN,CAAC;CACJ;AAYD,gBAAgB;AAChB,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAAC,EAC7C,OAAO,EACP,CAAC,EACD,YAAY,EACZ,aAAa,EACb,gBAAgB,EAChB,kBAAkB,EAClB,eAAe,GACW;IAC1B,IAAI,CAAC,CAAC,EAAE,CAAC;QACL,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,OAAO,GAAG,sCAAsC,CAAC;QACnD,eAAe,EAAE,OAAO,EAAE,QAAQ;QAClC,eAAe;QACf,kBAAkB;QAClB,mBAAmB,EAAE,OAAO,EAAE,OAAO;KACxC,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,sBAAsB,CAC/B,CAAC,EACD,OAAO,EAAE,QAAQ,IAAI,GAAG,EACxB,OAAO,EAAE,OAAO,IAAI,eAAe,IAAI,kBAAkB,CAC5D,CAAC;IAEF,OAAO,YAAY,CAAC;QAChB,YAAY;QACZ,aAAa;QACb,gBAAgB;QAChB,IAAI;QACJ,OAAO;QACP,GAAG,OAAO;KACb,CAAC,CAAC;AACP,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,UAAU,mBAAmB,CAGjC,MAAwC;IACtC,OAAO,MAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@crawlee/cheerio",
3
- "version": "4.0.0-beta.10",
3
+ "version": "4.0.0-beta.11",
4
4
  "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
5
5
  "engines": {
6
6
  "node": ">=22.0.0"
@@ -47,9 +47,9 @@
47
47
  "access": "public"
48
48
  },
49
49
  "dependencies": {
50
- "@crawlee/http": "4.0.0-beta.10",
51
- "@crawlee/types": "4.0.0-beta.10",
52
- "@crawlee/utils": "4.0.0-beta.10",
50
+ "@crawlee/http": "4.0.0-beta.11",
51
+ "@crawlee/types": "4.0.0-beta.11",
52
+ "@crawlee/utils": "4.0.0-beta.11",
53
53
  "cheerio": "^1.0.0",
54
54
  "htmlparser2": "^10.0.0",
55
55
  "tslib": "^2.8.1"
@@ -61,5 +61,5 @@
61
61
  }
62
62
  }
63
63
  },
64
- "gitHead": "b35240c5780d87a6d96046a286e0c9cf78be9e4f"
64
+ "gitHead": "790ff75aeb412a1fd306891c85b5cc15c337cfaf"
65
65
  }