@crawlee/cheerio 3.13.6-beta.0 → 4.0.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.d.ts CHANGED
@@ -1,3 +1,3 @@
1
1
  export * from '@crawlee/http';
2
- export * from './internals/cheerio-crawler';
2
+ export * from './internals/cheerio-crawler.js';
3
3
  //# sourceMappingURL=index.d.ts.map
package/index.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,6BAA6B,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,gCAAgC,CAAC"}
package/index.js CHANGED
@@ -1,6 +1,3 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- const tslib_1 = require("tslib");
4
- tslib_1.__exportStar(require("@crawlee/http"), exports);
5
- tslib_1.__exportStar(require("./internals/cheerio-crawler"), exports);
1
+ export * from '@crawlee/http';
2
+ export * from './internals/cheerio-crawler.js';
6
3
  //# sourceMappingURL=index.js.map
package/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,wDAA8B;AAC9B,sEAA4C"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,gCAAgC,CAAC"}
@@ -132,13 +132,13 @@ export declare class CheerioCrawler extends HttpCrawler<CheerioCrawlingContext>
132
132
  */
133
133
  constructor(options?: CheerioCrawlerOptions, config?: Configuration);
134
134
  protected _parseHTML(response: IncomingMessage, isXml: boolean, crawlingContext: CheerioCrawlingContext): Promise<{
135
- dom: cheerio.Document;
135
+ // @ts-ignore optional peer dependency or compatibility with es2022
136
+ dom: import("domhandler").Document;
136
137
  $: cheerio.CheerioAPI;
137
138
  body: string;
138
139
  // @ts-ignore optional peer dependency or compatibility with es2022
139
140
  enqueueLinks: (enqueueOptions?: EnqueueLinksOptions) => Promise<import("@crawlee/types").BatchAddRequestsResult>;
140
141
  }>;
141
- protected _parseHtmlToDom(response: IncomingMessage, isXml: boolean): Promise<unknown>;
142
142
  protected _runRequestHandler(context: CheerioCrawlingContext): Promise<void>;
143
143
  }
144
144
  interface EnqueueLinksInternalOptions {
@@ -1 +1 @@
1
- {"version":3,"file":"cheerio-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAGjD,OAAO,KAAK,EACR,aAAa,EACb,mBAAmB,EACnB,YAAY,EACZ,sBAAsB,EACtB,kBAAkB,EAClB,2BAA2B,EAC3B,gBAAgB,EAChB,cAAc,EACd,eAAe,EACf,YAAY,EACZ,sBAAsB,EACzB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAgB,WAAW,EAAkD,MAAM,eAAe,CAAC;AAC1G,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,KAAK,WAAW,EAA0B,KAAK,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAE9F,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAInC,MAAM,MAAM,mBAAmB,CAC3B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,YAAY,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE7D,MAAM,WAAW,qBAAqB,CAClC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,kBAAkB,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;CAAG;AAE3E,MAAM,MAAM,WAAW,CACnB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,gBAAgB,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEjE,MAAM,WAAW,sBAAsB,CACnC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,EAAE,cAAc,CAAC;IACrE;;;OAGG;IACH,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC;IAEtB;;;;;;;;;;;OAWG;IACH,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErE;;;;;;;;;;;;;OAaG;IACH,gBAAgB,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;CACjF;AAED,MAAM,MAAM,qBAAqB,CAC7B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,cAAc,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE/D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4EG;AACH,qBAAa,cAAe,SAAQ,WAAW,CAAC,sBAAsB,CAAC;IACnE;;OAEG;gBAES,OAAO,CAAC,EAAE,qBAAqB,EAAE,MAAM,CAAC,EAAE,aAAa;cAI1C,UAAU,CAC/B,QAAQ,EAAE,eAAe,EACzB,KAAK,EAAE,OAAO,EACd,eAAe,EAAE,sBAAsB;;;;wCAiBG,mBAAmB;;cAejD,eAAe,CAAC,QAAQ,EAAE,eAAe,EAAE,KAAK,EAAE,OAAO;cAehD,kBAAkB,CAAC,OAAO,EAAE,sBAAsB;CAgB9E;AAED,UAAU,2BAA2B;IACjC,OAAO,CAAC,EAAE,mBAAmB,CAAC;IAC9B,CAAC,EAAE,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAC7B,YAAY,EAAE,eAAe,CAAC;IAC9B,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;IAC1C,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,gBAAgB;AAChB,wBAAsB,0BAA0B,CAAC,EAC7C,OAAO,EACP,CAAC,EACD,YAAY,EACZ,aAAa,EACb,gBAAgB,EAChB,kBAAkB,EAClB,eAAe,GAClB,EAAE,2BAA2B,4DA0B7B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,mBAAmB,CAC/B,OAAO,SAAS,sBAAsB,GAAG,sBAAsB,EAC/D,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,kDAEzC"}
1
+ {"version":3,"file":"cheerio-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAGjD,OAAO,KAAK,EACR,aAAa,EACb,mBAAmB,EACnB,YAAY,EACZ,sBAAsB,EACtB,kBAAkB,EAClB,2BAA2B,EAC3B,gBAAgB,EAChB,cAAc,EACd,eAAe,EACf,YAAY,EACZ,sBAAsB,EACzB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAgB,WAAW,EAAkD,MAAM,eAAe,CAAC;AAC1G,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,KAAK,WAAW,EAA0B,KAAK,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAE9F,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,MAAM,MAAM,mBAAmB,CAC3B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,YAAY,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE7D,MAAM,WAAW,qBAAqB,CAClC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,kBAAkB,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;CAAG;AAE3E,MAAM,MAAM,WAAW,CACnB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,gBAAgB,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEjE,MAAM,WAAW,sBAAsB,CACnC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,EAAE,cAAc,CAAC;IACrE;;;OAGG;IACH,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC;IAEtB;;;;;;;;;;;OAWG;IACH,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErE;;;;;;;;;;;;;OAaG;IACH,gBAAgB,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;CACjF;AAED,MAAM,MAAM,qBAAqB,CAC7B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,cAAc,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE/D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4EG;AACH,qBAAa,cAAe,SAAQ,WAAW,CAAC,sBAAsB,CAAC;IACnE;;OAEG;gBAES,OAAO,CAAC,EAAE,qBAAqB,EAAE,MAAM,CAAC,EAAE,aAAa;cAI1C,UAAU,CAC/B,QAAQ,EAAE,eAAe,EACzB,KAAK,EAAE,OAAO,EACd,eAAe,EAAE,sBAAsB;;;;wCAYG,mBAAmB;;cAcxC,kBAAkB,CAAC,OAAO,EAAE,sBAAsB;CAgB9E;AAED,UAAU,2BAA2B;IACjC,OAAO,CAAC,EAAE,mBAAmB,CAAC;IAC9B,CAAC,EAAE,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAC7B,YAAY,EAAE,eAAe,CAAC;IAC9B,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;IAC1C,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,gBAAgB;AAChB,wBAAsB,0BAA0B,CAAC,EAC7C,OAAO,EACP,CAAC,EACD,YAAY,EACZ,aAAa,EACb,gBAAgB,EAChB,kBAAkB,EAClB,eAAe,GAClB,EAAE,2BAA2B,4DA0B7B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,mBAAmB,CAC/B,OAAO,SAAS,sBAAsB,GAAG,sBAAsB,EAC/D,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,kDAEzC"}
@@ -1,15 +1,8 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.CheerioCrawler = void 0;
4
- exports.cheerioCrawlerEnqueueLinks = cheerioCrawlerEnqueueLinks;
5
- exports.createCheerioRouter = createCheerioRouter;
6
- const tslib_1 = require("tslib");
7
- const consumers_1 = require("node:stream/consumers");
8
- const http_1 = require("@crawlee/http");
9
- const utils_1 = require("@crawlee/utils");
10
- const cheerio = tslib_1.__importStar(require("cheerio"));
11
- const htmlparser2_1 = require("htmlparser2");
12
- const WritableStream_1 = require("htmlparser2/lib/WritableStream");
1
+ import { text as readStreamToString } from 'node:stream/consumers';
2
+ import { enqueueLinks, HttpCrawler, resolveBaseUrlForEnqueueLinksFiltering, Router } from '@crawlee/http';
3
+ import { extractUrlsFromCheerio } from '@crawlee/utils';
4
+ import * as cheerio from 'cheerio';
5
+ import { parseDocument } from 'htmlparser2';
13
6
  /**
14
7
  * Provides a framework for the parallel crawling of web pages using plain HTTP requests and
15
8
  * [cheerio](https://www.npmjs.com/package/cheerio) HTML parser.
@@ -87,7 +80,7 @@ const WritableStream_1 = require("htmlparser2/lib/WritableStream");
87
80
  * ```
88
81
  * @category Crawlers
89
82
  */
90
- class CheerioCrawler extends http_1.HttpCrawler {
83
+ export class CheerioCrawler extends HttpCrawler {
91
84
  /**
92
85
  * All `CheerioCrawler` parameters are passed via an options object.
93
86
  */
@@ -96,14 +89,10 @@ class CheerioCrawler extends http_1.HttpCrawler {
96
89
  super(options, config);
97
90
  }
98
91
  async _parseHTML(response, isXml, crawlingContext) {
99
- const body = await (0, consumers_1.text)(response);
100
- const dom = (0, htmlparser2_1.parseDocument)(body, { decodeEntities: true, xmlMode: isXml });
101
- const $ = cheerio.load(body, {
102
- xmlMode: isXml,
103
- // Recent versions of cheerio use parse5 as the HTML parser/serializer. It's more strict than htmlparser2
104
- // and not good for scraping. It also does not have a great streaming interface.
105
- // Here we tell cheerio to use htmlparser2 for serialization, otherwise the conflict produces weird errors.
106
- _useHtmlParser2: true,
92
+ const body = await readStreamToString(response);
93
+ const dom = parseDocument(body, { decodeEntities: true, xmlMode: isXml });
94
+ const $ = cheerio.load(dom, {
95
+ xml: { decodeEntities: true, xmlMode: isXml },
107
96
  });
108
97
  return {
109
98
  dom,
@@ -122,20 +111,6 @@ class CheerioCrawler extends http_1.HttpCrawler {
122
111
  },
123
112
  };
124
113
  }
125
- // TODO: unused code - remove in 4.0
126
- async _parseHtmlToDom(response, isXml) {
127
- return new Promise((resolve, reject) => {
128
- const domHandler = new htmlparser2_1.DomHandler((err, dom) => {
129
- if (err)
130
- reject(err);
131
- else
132
- resolve(dom);
133
- }, { xmlMode: isXml });
134
- const parser = new WritableStream_1.WritableStream(domHandler, { decodeEntities: true, xmlMode: isXml });
135
- parser.on('error', reject);
136
- response.on('error', reject).pipe(parser);
137
- });
138
- }
139
114
  async _runRequestHandler(context) {
140
115
  context.waitForSelector = async (selector, _timeoutMs) => {
141
116
  if (context.$(selector).get().length === 0) {
@@ -151,20 +126,19 @@ class CheerioCrawler extends http_1.HttpCrawler {
151
126
  await super._runRequestHandler(context);
152
127
  }
153
128
  }
154
- exports.CheerioCrawler = CheerioCrawler;
155
129
  /** @internal */
156
- async function cheerioCrawlerEnqueueLinks({ options, $, requestQueue, robotsTxtFile, onSkippedRequest, originalRequestUrl, finalRequestUrl, }) {
130
+ export async function cheerioCrawlerEnqueueLinks({ options, $, requestQueue, robotsTxtFile, onSkippedRequest, originalRequestUrl, finalRequestUrl, }) {
157
131
  if (!$) {
158
132
  throw new Error('Cannot enqueue links because the DOM is not available.');
159
133
  }
160
- const baseUrl = (0, http_1.resolveBaseUrlForEnqueueLinksFiltering)({
134
+ const baseUrl = resolveBaseUrlForEnqueueLinksFiltering({
161
135
  enqueueStrategy: options?.strategy,
162
136
  finalRequestUrl,
163
137
  originalRequestUrl,
164
138
  userProvidedBaseUrl: options?.baseUrl,
165
139
  });
166
- const urls = (0, utils_1.extractUrlsFromCheerio)($, options?.selector ?? 'a', options?.baseUrl ?? finalRequestUrl ?? originalRequestUrl);
167
- return (0, http_1.enqueueLinks)({
140
+ const urls = extractUrlsFromCheerio($, options?.selector ?? 'a', options?.baseUrl ?? finalRequestUrl ?? originalRequestUrl);
141
+ return enqueueLinks({
168
142
  requestQueue,
169
143
  robotsTxtFile,
170
144
  onSkippedRequest,
@@ -197,7 +171,7 @@ async function cheerioCrawlerEnqueueLinks({ options, $, requestQueue, robotsTxtF
197
171
  * await crawler.run();
198
172
  * ```
199
173
  */
200
- function createCheerioRouter(routes) {
201
- return http_1.Router.create(routes);
174
+ export function createCheerioRouter(routes) {
175
+ return Router.create(routes);
202
176
  }
203
177
  //# sourceMappingURL=cheerio-crawler.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"cheerio-crawler.js","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":";;;AA0PA,gEAkCC;AA0BD,kDAKC;;AA1TD,qDAAmE;AAenE,wCAA0G;AAE1G,0CAA8F;AAE9F,yDAAmC;AACnC,6CAAwD;AACxD,mEAAgE;AA+DhE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4EG;AACH,MAAa,cAAe,SAAQ,kBAAmC;IACnE;;OAEG;IACH,qEAAqE;IACrE,YAAY,OAA+B,EAAE,MAAsB;QAC/D,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAC3B,CAAC;IAEkB,KAAK,CAAC,UAAU,CAC/B,QAAyB,EACzB,KAAc,EACd,eAAuC;QAEvC,MAAM,IAAI,GAAG,MAAM,IAAA,gBAAkB,EAAC,QAAQ,CAAC,CAAC;QAChD,MAAM,GAAG,GAAG,IAAA,2BAAa,EAAC,IAAI,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QAE1E,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE;YACzB,OAAO,EAAE,KAAK;YACd,yGAAyG;YACzG,gFAAgF;YAChF,2GAA2G;YAC3G,eAAe,EAAE,IAAI;SACN,CAAC,CAAC;QAErB,OAAO;YACH,GAAG;YACH,CAAC;YACD,IAAI;YACJ,YAAY,EAAE,KAAK,EAAE,cAAoC,EAAE,EAAE;gBACzD,OAAO,0BAA0B,CAAC;oBAC9B,OAAO,EAAE,cAAc;oBACvB,CAAC;oBACD,YAAY,EAAE,MAAM,IAAI,CAAC,eAAe,EAAE;oBAC1C,aAAa,EAAE,MAAM,IAAI,CAAC,sBAAsB,CAAC,eAAe,CAAC,OAAO,CAAC,GAAG,CAAC;oBAC7E,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;oBACvC,kBAAkB,EAAE,eAAe,CAAC,OAAO,CAAC,GAAG;oBAC/C,eAAe,EAAE,eAAe,CAAC,OAAO,CAAC,SAAS;iBACrD,CAAC,CAAC;YACP,CAAC;SACJ,CAAC;IACN,CAAC;IAED,oCAAoC;IAC1B,KAAK,CAAC,eAAe,CAAC,QAAyB,EAAE,KAAc;QACrE,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YACnC,MAAM,UAAU,GAAG,IAAI,wBAAU,CAC7B,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;gBACT,IAAI,GAAG;oBAAE,MAAM,CAAC,GAAG,CAAC,CAAC;;oBAChB,OAAO,CAAC,GAAG,CAAC,CAAC;YACtB,CAAC,EACD,EAAE,OAAO,EAAE,KAAK,EAAE,CACrB,CAAC;YACF,MAAM,MAAM,GAAG,IAAI,+BAAc,CAAC,UAAU,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;YACxF,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;YAC3B,QAAQ,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC9C,CAAC,CAAC,CAAC;IACP,CAAC;IAEkB,KAAK,CAAC,kBAAkB,CAAC,OAA+B;QACvE,OAAO,CAAC,eAAe,GAAG,KAAK,EAAE,QAAiB,EAAE,UAAmB,EAAE,EAAE;YACvE,IAAI,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACzC,MAAM,IAAI,KAAK,CAAC,aAAa,QAAQ,cAAc,CAAC,CAAC;YACzD,CAAC;QACL,CAAC,CAAC;QACF,OAAO,CAAC,gBAAgB,GAAG,KAAK,EAAE,QAAiB,EAAE,SAAkB,EAAE,EAAE;YACvE,IAAI,QAAQ,EAAE,CAAC;gBACX,MAAM,OAAO,CAAC,eAAe,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YACvD,CAAC;YAED,OAAO,OAAO,CAAC,CAAC,CAAC;QACrB,CAAC,CAAC;QAEF,MAAM,KAAK,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAC5C,CAAC;CACJ;AA3ED,wCA2EC;AAYD,gBAAgB;AACT,KAAK,UAAU,0BAA0B,CAAC,EAC7C,OAAO,EACP,CAAC,EACD,YAAY,EACZ,aAAa,EACb,gBAAgB,EAChB,kBAAkB,EAClB,eAAe,GACW;IAC1B,IAAI,CAAC,CAAC,EAAE,CAAC;QACL,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,OAAO,GAAG,IAAA,6CAAsC,EAAC;QACnD,eAAe,EAAE,OAAO,EAAE,QAAQ;QAClC,eAAe;QACf,kBAAkB;QAClB,mBAAmB,EAAE,OAAO,EAAE,OAAO;KACxC,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,IAAA,8BAAsB,EAC/B,CAAC,EACD,OAAO,EAAE,QAAQ,IAAI,GAAG,EACxB,OAAO,EAAE,OAAO,IAAI,eAAe,IAAI,kBAAkB,CAC5D,CAAC;IAEF,OAAO,IAAA,mBAAY,EAAC;QAChB,YAAY;QACZ,aAAa;QACb,gBAAgB;QAChB,IAAI;QACJ,OAAO;QACP,GAAG,OAAO;KACb,CAAC,CAAC;AACP,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,SAAgB,mBAAmB,CAGjC,MAAwC;IACtC,OAAO,aAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
1
+ {"version":3,"file":"cheerio-crawler.js","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,IAAI,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAenE,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,sCAAsC,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAE1G,OAAO,EAAoB,sBAAsB,EAAsB,MAAM,gBAAgB,CAAC;AAE9F,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AA+D5C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4EG;AACH,MAAM,OAAO,cAAe,SAAQ,WAAmC;IACnE;;OAEG;IACH,qEAAqE;IACrE,YAAY,OAA+B,EAAE,MAAsB;QAC/D,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAC3B,CAAC;IAEkB,KAAK,CAAC,UAAU,CAC/B,QAAyB,EACzB,KAAc,EACd,eAAuC;QAEvC,MAAM,IAAI,GAAG,MAAM,kBAAkB,CAAC,QAAQ,CAAC,CAAC;QAChD,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QAC1E,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE;YACxB,GAAG,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE;SAC9B,CAAC,CAAC;QAErB,OAAO;YACH,GAAG;YACH,CAAC;YACD,IAAI;YACJ,YAAY,EAAE,KAAK,EAAE,cAAoC,EAAE,EAAE;gBACzD,OAAO,0BAA0B,CAAC;oBAC9B,OAAO,EAAE,cAAc;oBACvB,CAAC;oBACD,YAAY,EAAE,MAAM,IAAI,CAAC,eAAe,EAAE;oBAC1C,aAAa,EAAE,MAAM,IAAI,CAAC,sBAAsB,CAAC,eAAe,CAAC,OAAO,CAAC,GAAG,CAAC;oBAC7E,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;oBACvC,kBAAkB,EAAE,eAAe,CAAC,OAAO,CAAC,GAAG;oBAC/C,eAAe,EAAE,eAAe,CAAC,OAAO,CAAC,SAAS;iBACrD,CAAC,CAAC;YACP,CAAC;SACJ,CAAC;IACN,CAAC;IAEkB,KAAK,CAAC,kBAAkB,CAAC,OAA+B;QACvE,OAAO,CAAC,eAAe,GAAG,KAAK,EAAE,QAAiB,EAAE,UAAmB,EAAE,EAAE;YACvE,IAAI,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACzC,MAAM,IAAI,KAAK,CAAC,aAAa,QAAQ,cAAc,CAAC,CAAC;YACzD,CAAC;QACL,CAAC,CAAC;QACF,OAAO,CAAC,gBAAgB,GAAG,KAAK,EAAE,QAAiB,EAAE,SAAkB,EAAE,EAAE;YACvE,IAAI,QAAQ,EAAE,CAAC;gBACX,MAAM,OAAO,CAAC,eAAe,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YACvD,CAAC;YAED,OAAO,OAAO,CAAC,CAAC,CAAC;QACrB,CAAC,CAAC;QAEF,MAAM,KAAK,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAC5C,CAAC;CACJ;AAYD,gBAAgB;AAChB,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAAC,EAC7C,OAAO,EACP,CAAC,EACD,YAAY,EACZ,aAAa,EACb,gBAAgB,EAChB,kBAAkB,EAClB,eAAe,GACW;IAC1B,IAAI,CAAC,CAAC,EAAE,CAAC;QACL,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,OAAO,GAAG,sCAAsC,CAAC;QACnD,eAAe,EAAE,OAAO,EAAE,QAAQ;QAClC,eAAe;QACf,kBAAkB;QAClB,mBAAmB,EAAE,OAAO,EAAE,OAAO;KACxC,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,sBAAsB,CAC/B,CAAC,EACD,OAAO,EAAE,QAAQ,IAAI,GAAG,EACxB,OAAO,EAAE,OAAO,IAAI,eAAe,IAAI,kBAAkB,CAC5D,CAAC;IAEF,OAAO,YAAY,CAAC;QAChB,YAAY;QACZ,aAAa;QACb,gBAAgB;QAChB,IAAI;QACJ,OAAO;QACP,GAAG,OAAO;KACb,CAAC,CAAC;AACP,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,UAAU,mBAAmB,CAGjC,MAAwC;IACtC,OAAO,MAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
package/package.json CHANGED
@@ -1,19 +1,13 @@
1
1
  {
2
2
  "name": "@crawlee/cheerio",
3
- "version": "3.13.6-beta.0",
3
+ "version": "4.0.0-beta.0",
4
4
  "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
5
5
  "engines": {
6
- "node": ">=16.0.0"
6
+ "node": ">=22.0.0"
7
7
  },
8
- "main": "./index.js",
9
- "module": "./index.mjs",
10
- "types": "./index.d.ts",
8
+ "type": "module",
11
9
  "exports": {
12
- ".": {
13
- "import": "./index.mjs",
14
- "require": "./index.js",
15
- "types": "./index.d.ts"
16
- },
10
+ ".": "./index.js",
17
11
  "./package.json": "./package.json"
18
12
  },
19
13
  "keywords": [
@@ -46,19 +40,19 @@
46
40
  "scripts": {
47
41
  "build": "yarn clean && yarn compile && yarn copy",
48
42
  "clean": "rimraf ./dist",
49
- "compile": "tsc -p tsconfig.build.json && gen-esm-wrapper ./index.js ./index.mjs",
43
+ "compile": "tsc -p tsconfig.build.json",
50
44
  "copy": "tsx ../../scripts/copy.ts"
51
45
  },
52
46
  "publishConfig": {
53
47
  "access": "public"
54
48
  },
55
49
  "dependencies": {
56
- "@crawlee/http": "^3.13.6-beta.0",
57
- "@crawlee/types": "^3.13.6-beta.0",
58
- "@crawlee/utils": "^3.13.6-beta.0",
59
- "cheerio": "1.0.0-rc.12",
60
- "htmlparser2": "^9.0.0",
61
- "tslib": "^2.4.0"
50
+ "@crawlee/http": "4.0.0-beta.0",
51
+ "@crawlee/types": "4.0.0-beta.0",
52
+ "@crawlee/utils": "4.0.0-beta.0",
53
+ "cheerio": "^1.0.0",
54
+ "htmlparser2": "^10.0.0",
55
+ "tslib": "^2.8.1"
62
56
  },
63
57
  "lerna": {
64
58
  "command": {
@@ -67,5 +61,5 @@
67
61
  }
68
62
  }
69
63
  },
70
- "gitHead": "595148da5825258b9c92e49071ae6c76bc824c86"
64
+ "gitHead": "927bdafa403ff347327158b01d20b817378168a7"
71
65
  }