@crawlee/cheerio 3.13.6-beta.0 → 4.0.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.d.ts +1 -1
- package/index.d.ts.map +1 -1
- package/index.js +2 -5
- package/index.js.map +1 -1
- package/internals/cheerio-crawler.d.ts +2 -2
- package/internals/cheerio-crawler.d.ts.map +1 -1
- package/internals/cheerio-crawler.js +16 -42
- package/internals/cheerio-crawler.js.map +1 -1
- package/package.json +12 -18
- package/tsconfig.build.tsbuildinfo +1 -1
- package/index.mjs +0 -100
package/index.d.ts
CHANGED
package/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,gCAAgC,CAAC"}
|
package/index.js
CHANGED
|
@@ -1,6 +1,3 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
const tslib_1 = require("tslib");
|
|
4
|
-
tslib_1.__exportStar(require("@crawlee/http"), exports);
|
|
5
|
-
tslib_1.__exportStar(require("./internals/cheerio-crawler"), exports);
|
|
1
|
+
export * from '@crawlee/http';
|
|
2
|
+
export * from './internals/cheerio-crawler.js';
|
|
6
3
|
//# sourceMappingURL=index.js.map
|
package/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,gCAAgC,CAAC"}
|
|
@@ -132,13 +132,13 @@ export declare class CheerioCrawler extends HttpCrawler<CheerioCrawlingContext>
|
|
|
132
132
|
*/
|
|
133
133
|
constructor(options?: CheerioCrawlerOptions, config?: Configuration);
|
|
134
134
|
protected _parseHTML(response: IncomingMessage, isXml: boolean, crawlingContext: CheerioCrawlingContext): Promise<{
|
|
135
|
-
|
|
135
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
136
|
+
dom: import("domhandler").Document;
|
|
136
137
|
$: cheerio.CheerioAPI;
|
|
137
138
|
body: string;
|
|
138
139
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
139
140
|
enqueueLinks: (enqueueOptions?: EnqueueLinksOptions) => Promise<import("@crawlee/types").BatchAddRequestsResult>;
|
|
140
141
|
}>;
|
|
141
|
-
protected _parseHtmlToDom(response: IncomingMessage, isXml: boolean): Promise<unknown>;
|
|
142
142
|
protected _runRequestHandler(context: CheerioCrawlingContext): Promise<void>;
|
|
143
143
|
}
|
|
144
144
|
interface EnqueueLinksInternalOptions {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cheerio-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAGjD,OAAO,KAAK,EACR,aAAa,EACb,mBAAmB,EACnB,YAAY,EACZ,sBAAsB,EACtB,kBAAkB,EAClB,2BAA2B,EAC3B,gBAAgB,EAChB,cAAc,EACd,eAAe,EACf,YAAY,EACZ,sBAAsB,EACzB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAgB,WAAW,EAAkD,MAAM,eAAe,CAAC;AAC1G,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,KAAK,WAAW,EAA0B,KAAK,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAE9F,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"cheerio-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAGjD,OAAO,KAAK,EACR,aAAa,EACb,mBAAmB,EACnB,YAAY,EACZ,sBAAsB,EACtB,kBAAkB,EAClB,2BAA2B,EAC3B,gBAAgB,EAChB,cAAc,EACd,eAAe,EACf,YAAY,EACZ,sBAAsB,EACzB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAgB,WAAW,EAAkD,MAAM,eAAe,CAAC;AAC1G,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,KAAK,WAAW,EAA0B,KAAK,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAE9F,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,MAAM,MAAM,mBAAmB,CAC3B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,YAAY,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE7D,MAAM,WAAW,qBAAqB,CAClC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,kBAAkB,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;CAAG;AAE3E,MAAM,MAAM,WAAW,CACnB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,gBAAgB,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEjE,MAAM,WAAW,sBAAsB,CACnC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,EAAE,cAAc,CAAC;IACrE;;;OAGG;IACH,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC;IAEtB;;;;;;;;;;;OAWG;IACH,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErE;;;;;;;;;;;;;OAaG;IACH,gBAAgB,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;CACjF;AAED,MAAM,MAAM,qBAAqB,CAC7B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,cAAc,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE/D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4EG;AACH,qBAAa,cAAe,SAAQ,WAAW,CAAC,sBAAsB,CAAC;IACnE;;OAEG;gBAES,OAAO,CAAC,EAAE,qBAAqB,EAAE,MAAM,CAAC,EAAE,aAAa;cAI1C,UAAU,CAC/B,QAAQ,EAAE,eAAe,EACzB,KAAK,EAAE,OAAO,EACd,eAAe,EAAE,sBAAsB;;;;wCAYG,mBAAmB;;cAcxC,kBAAkB,CAAC,OAAO,EAAE,sBAAsB;CAgB9E;AAED,UAAU,2BAA2B;IACjC,OAAO,CAAC,EAAE,mBAAmB,CAAC;IAC9B,CAAC,EAAE,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAC7B,YAAY,EAAE,eAAe,CAAC;IAC9B,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;IAC1C,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,gBAAgB;AAChB,wBAAsB,0BAA0B,CAAC,EAC7C,OAAO,EACP,CAAC,EACD,YAAY,EACZ,aAAa,EACb,gBAAgB,EAChB,kBAAkB,EAClB,eAAe,GAClB,EAAE,2BAA2B,4DA0B7B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,mBAAmB,CAC/B,OAAO,SAAS,sBAAsB,GAAG,sBAAsB,EAC/D,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,kDAEzC"}
|
|
@@ -1,15 +1,8 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
const tslib_1 = require("tslib");
|
|
7
|
-
const consumers_1 = require("node:stream/consumers");
|
|
8
|
-
const http_1 = require("@crawlee/http");
|
|
9
|
-
const utils_1 = require("@crawlee/utils");
|
|
10
|
-
const cheerio = tslib_1.__importStar(require("cheerio"));
|
|
11
|
-
const htmlparser2_1 = require("htmlparser2");
|
|
12
|
-
const WritableStream_1 = require("htmlparser2/lib/WritableStream");
|
|
1
|
+
import { text as readStreamToString } from 'node:stream/consumers';
|
|
2
|
+
import { enqueueLinks, HttpCrawler, resolveBaseUrlForEnqueueLinksFiltering, Router } from '@crawlee/http';
|
|
3
|
+
import { extractUrlsFromCheerio } from '@crawlee/utils';
|
|
4
|
+
import * as cheerio from 'cheerio';
|
|
5
|
+
import { parseDocument } from 'htmlparser2';
|
|
13
6
|
/**
|
|
14
7
|
* Provides a framework for the parallel crawling of web pages using plain HTTP requests and
|
|
15
8
|
* [cheerio](https://www.npmjs.com/package/cheerio) HTML parser.
|
|
@@ -87,7 +80,7 @@ const WritableStream_1 = require("htmlparser2/lib/WritableStream");
|
|
|
87
80
|
* ```
|
|
88
81
|
* @category Crawlers
|
|
89
82
|
*/
|
|
90
|
-
class CheerioCrawler extends
|
|
83
|
+
export class CheerioCrawler extends HttpCrawler {
|
|
91
84
|
/**
|
|
92
85
|
* All `CheerioCrawler` parameters are passed via an options object.
|
|
93
86
|
*/
|
|
@@ -96,14 +89,10 @@ class CheerioCrawler extends http_1.HttpCrawler {
|
|
|
96
89
|
super(options, config);
|
|
97
90
|
}
|
|
98
91
|
async _parseHTML(response, isXml, crawlingContext) {
|
|
99
|
-
const body = await (
|
|
100
|
-
const dom =
|
|
101
|
-
const $ = cheerio.load(
|
|
102
|
-
xmlMode: isXml,
|
|
103
|
-
// Recent versions of cheerio use parse5 as the HTML parser/serializer. It's more strict than htmlparser2
|
|
104
|
-
// and not good for scraping. It also does not have a great streaming interface.
|
|
105
|
-
// Here we tell cheerio to use htmlparser2 for serialization, otherwise the conflict produces weird errors.
|
|
106
|
-
_useHtmlParser2: true,
|
|
92
|
+
const body = await readStreamToString(response);
|
|
93
|
+
const dom = parseDocument(body, { decodeEntities: true, xmlMode: isXml });
|
|
94
|
+
const $ = cheerio.load(dom, {
|
|
95
|
+
xml: { decodeEntities: true, xmlMode: isXml },
|
|
107
96
|
});
|
|
108
97
|
return {
|
|
109
98
|
dom,
|
|
@@ -122,20 +111,6 @@ class CheerioCrawler extends http_1.HttpCrawler {
|
|
|
122
111
|
},
|
|
123
112
|
};
|
|
124
113
|
}
|
|
125
|
-
// TODO: unused code - remove in 4.0
|
|
126
|
-
async _parseHtmlToDom(response, isXml) {
|
|
127
|
-
return new Promise((resolve, reject) => {
|
|
128
|
-
const domHandler = new htmlparser2_1.DomHandler((err, dom) => {
|
|
129
|
-
if (err)
|
|
130
|
-
reject(err);
|
|
131
|
-
else
|
|
132
|
-
resolve(dom);
|
|
133
|
-
}, { xmlMode: isXml });
|
|
134
|
-
const parser = new WritableStream_1.WritableStream(domHandler, { decodeEntities: true, xmlMode: isXml });
|
|
135
|
-
parser.on('error', reject);
|
|
136
|
-
response.on('error', reject).pipe(parser);
|
|
137
|
-
});
|
|
138
|
-
}
|
|
139
114
|
async _runRequestHandler(context) {
|
|
140
115
|
context.waitForSelector = async (selector, _timeoutMs) => {
|
|
141
116
|
if (context.$(selector).get().length === 0) {
|
|
@@ -151,20 +126,19 @@ class CheerioCrawler extends http_1.HttpCrawler {
|
|
|
151
126
|
await super._runRequestHandler(context);
|
|
152
127
|
}
|
|
153
128
|
}
|
|
154
|
-
exports.CheerioCrawler = CheerioCrawler;
|
|
155
129
|
/** @internal */
|
|
156
|
-
async function cheerioCrawlerEnqueueLinks({ options, $, requestQueue, robotsTxtFile, onSkippedRequest, originalRequestUrl, finalRequestUrl, }) {
|
|
130
|
+
export async function cheerioCrawlerEnqueueLinks({ options, $, requestQueue, robotsTxtFile, onSkippedRequest, originalRequestUrl, finalRequestUrl, }) {
|
|
157
131
|
if (!$) {
|
|
158
132
|
throw new Error('Cannot enqueue links because the DOM is not available.');
|
|
159
133
|
}
|
|
160
|
-
const baseUrl =
|
|
134
|
+
const baseUrl = resolveBaseUrlForEnqueueLinksFiltering({
|
|
161
135
|
enqueueStrategy: options?.strategy,
|
|
162
136
|
finalRequestUrl,
|
|
163
137
|
originalRequestUrl,
|
|
164
138
|
userProvidedBaseUrl: options?.baseUrl,
|
|
165
139
|
});
|
|
166
|
-
const urls =
|
|
167
|
-
return
|
|
140
|
+
const urls = extractUrlsFromCheerio($, options?.selector ?? 'a', options?.baseUrl ?? finalRequestUrl ?? originalRequestUrl);
|
|
141
|
+
return enqueueLinks({
|
|
168
142
|
requestQueue,
|
|
169
143
|
robotsTxtFile,
|
|
170
144
|
onSkippedRequest,
|
|
@@ -197,7 +171,7 @@ async function cheerioCrawlerEnqueueLinks({ options, $, requestQueue, robotsTxtF
|
|
|
197
171
|
* await crawler.run();
|
|
198
172
|
* ```
|
|
199
173
|
*/
|
|
200
|
-
function createCheerioRouter(routes) {
|
|
201
|
-
return
|
|
174
|
+
export function createCheerioRouter(routes) {
|
|
175
|
+
return Router.create(routes);
|
|
202
176
|
}
|
|
203
177
|
//# sourceMappingURL=cheerio-crawler.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cheerio-crawler.js","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"cheerio-crawler.js","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,IAAI,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAenE,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,sCAAsC,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAE1G,OAAO,EAAoB,sBAAsB,EAAsB,MAAM,gBAAgB,CAAC;AAE9F,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AA+D5C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4EG;AACH,MAAM,OAAO,cAAe,SAAQ,WAAmC;IACnE;;OAEG;IACH,qEAAqE;IACrE,YAAY,OAA+B,EAAE,MAAsB;QAC/D,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAC3B,CAAC;IAEkB,KAAK,CAAC,UAAU,CAC/B,QAAyB,EACzB,KAAc,EACd,eAAuC;QAEvC,MAAM,IAAI,GAAG,MAAM,kBAAkB,CAAC,QAAQ,CAAC,CAAC;QAChD,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QAC1E,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE;YACxB,GAAG,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE;SAC9B,CAAC,CAAC;QAErB,OAAO;YACH,GAAG;YACH,CAAC;YACD,IAAI;YACJ,YAAY,EAAE,KAAK,EAAE,cAAoC,EAAE,EAAE;gBACzD,OAAO,0BAA0B,CAAC;oBAC9B,OAAO,EAAE,cAAc;oBACvB,CAAC;oBACD,YAAY,EAAE,MAAM,IAAI,CAAC,eAAe,EAAE;oBAC1C,aAAa,EAAE,MAAM,IAAI,CAAC,sBAAsB,CAAC,eAAe,CAAC,OAAO,CAAC,GAAG,CAAC;oBAC7E,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;oBACvC,kBAAkB,EAAE,eAAe,CAAC,OAAO,CAAC,GAAG;oBAC/C,eAAe,EAAE,eAAe,CAAC,OAAO,CAAC,SAAS;iBACrD,CAAC,CAAC;YACP,CAAC;SACJ,CAAC;IACN,CAAC;IAEkB,KAAK,CAAC,kBAAkB,CAAC,OAA+B;QACvE,OAAO,CAAC,eAAe,GAAG,KAAK,EAAE,QAAiB,EAAE,UAAmB,EAAE,EAAE;YACvE,IAAI,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACzC,MAAM,IAAI,KAAK,CAAC,aAAa,QAAQ,cAAc,CAAC,CAAC;YACzD,CAAC;QACL,CAAC,CAAC;QACF,OAAO,CAAC,gBAAgB,GAAG,KAAK,EAAE,QAAiB,EAAE,SAAkB,EAAE,EAAE;YACvE,IAAI,QAAQ,EAAE,CAAC;gBACX,MAAM,OAAO,CAAC,eAAe,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YACvD,CAAC;YAED,OAAO,OAAO,CAAC,CAAC,CAAC;QACrB,CAAC,CAAC;QAEF,MAAM,KAAK,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAC5C,CAAC;CACJ;AAYD,gBAAgB;AAChB,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAAC,EAC7C,OAAO,EACP,CAAC,EACD,YAAY,EACZ,aAAa,EACb,gBAAgB,EAChB,kBAAkB,EAClB,eAAe,GACW;IAC1B,IAAI,CAAC,CAAC,EAAE,CAAC;QACL,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,OAAO,GAAG,sCAAsC,CAAC;QACnD,eAAe,EAAE,OAAO,EAAE,QAAQ;QAClC,eAAe;QACf,kBAAkB;QAClB,mBAAmB,EAAE,OAAO,EAAE,OAAO;KACxC,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,sBAAsB,CAC/B,CAAC,EACD,OAAO,EAAE,QAAQ,IAAI,GAAG,EACxB,OAAO,EAAE,OAAO,IAAI,eAAe,IAAI,kBAAkB,CAC5D,CAAC;IAEF,OAAO,YAAY,CAAC;QAChB,YAAY;QACZ,aAAa;QACb,gBAAgB;QAChB,IAAI;QACJ,OAAO;QACP,GAAG,OAAO;KACb,CAAC,CAAC;AACP,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,UAAU,mBAAmB,CAGjC,MAAwC;IACtC,OAAO,MAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,19 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/cheerio",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "4.0.0-beta.0",
|
|
4
4
|
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
|
|
5
5
|
"engines": {
|
|
6
|
-
"node": ">=
|
|
6
|
+
"node": ">=22.0.0"
|
|
7
7
|
},
|
|
8
|
-
"
|
|
9
|
-
"module": "./index.mjs",
|
|
10
|
-
"types": "./index.d.ts",
|
|
8
|
+
"type": "module",
|
|
11
9
|
"exports": {
|
|
12
|
-
".":
|
|
13
|
-
"import": "./index.mjs",
|
|
14
|
-
"require": "./index.js",
|
|
15
|
-
"types": "./index.d.ts"
|
|
16
|
-
},
|
|
10
|
+
".": "./index.js",
|
|
17
11
|
"./package.json": "./package.json"
|
|
18
12
|
},
|
|
19
13
|
"keywords": [
|
|
@@ -46,19 +40,19 @@
|
|
|
46
40
|
"scripts": {
|
|
47
41
|
"build": "yarn clean && yarn compile && yarn copy",
|
|
48
42
|
"clean": "rimraf ./dist",
|
|
49
|
-
"compile": "tsc -p tsconfig.build.json
|
|
43
|
+
"compile": "tsc -p tsconfig.build.json",
|
|
50
44
|
"copy": "tsx ../../scripts/copy.ts"
|
|
51
45
|
},
|
|
52
46
|
"publishConfig": {
|
|
53
47
|
"access": "public"
|
|
54
48
|
},
|
|
55
49
|
"dependencies": {
|
|
56
|
-
"@crawlee/http": "
|
|
57
|
-
"@crawlee/types": "
|
|
58
|
-
"@crawlee/utils": "
|
|
59
|
-
"cheerio": "1.0.0
|
|
60
|
-
"htmlparser2": "^
|
|
61
|
-
"tslib": "^2.
|
|
50
|
+
"@crawlee/http": "4.0.0-beta.0",
|
|
51
|
+
"@crawlee/types": "4.0.0-beta.0",
|
|
52
|
+
"@crawlee/utils": "4.0.0-beta.0",
|
|
53
|
+
"cheerio": "^1.0.0",
|
|
54
|
+
"htmlparser2": "^10.0.0",
|
|
55
|
+
"tslib": "^2.8.1"
|
|
62
56
|
},
|
|
63
57
|
"lerna": {
|
|
64
58
|
"command": {
|
|
@@ -67,5 +61,5 @@
|
|
|
67
61
|
}
|
|
68
62
|
}
|
|
69
63
|
},
|
|
70
|
-
"gitHead": "
|
|
64
|
+
"gitHead": "927bdafa403ff347327158b01d20b817378168a7"
|
|
71
65
|
}
|