@crawlee/cheerio 4.0.0-beta.27 → 4.0.0-beta.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { BasicCrawlingContext,
|
|
1
|
+
import type { BasicCrawlingContext, EnqueueLinksOptions, ErrorHandler, GetUserDataFromRequest, HttpCrawlerOptions, InternalHttpCrawlingContext, InternalHttpHook, RequestHandler, RequestProvider, RouterRoutes, SkippedRequestCallback } from '@crawlee/http';
|
|
2
2
|
import { HttpCrawler } from '@crawlee/http';
|
|
3
3
|
import type { BatchAddRequestsResult, Dictionary } from '@crawlee/types';
|
|
4
4
|
import { type CheerioRoot, type RobotsTxtFile } from '@crawlee/utils';
|
|
@@ -137,7 +137,7 @@ export declare class CheerioCrawler<ContextExtension = Dictionary<never>, Extend
|
|
|
137
137
|
/**
|
|
138
138
|
* All `CheerioCrawler` parameters are passed via an options object.
|
|
139
139
|
*/
|
|
140
|
-
constructor(options?: CheerioCrawlerOptions<ContextExtension, ExtendedContext
|
|
140
|
+
constructor(options?: CheerioCrawlerOptions<ContextExtension, ExtendedContext>);
|
|
141
141
|
private parseContent;
|
|
142
142
|
private addHelpers;
|
|
143
143
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cheerio-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,oBAAoB,EACpB,
|
|
1
|
+
{"version":3,"file":"cheerio-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,oBAAoB,EACpB,mBAAmB,EACnB,YAAY,EACZ,sBAAsB,EACtB,kBAAkB,EAClB,2BAA2B,EAC3B,gBAAgB,EAChB,cAAc,EACd,eAAe,EACf,YAAY,EACZ,sBAAsB,EACzB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAgB,WAAW,EAAkD,MAAM,eAAe,CAAC;AAC1G,OAAO,KAAK,EAAE,sBAAsB,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACzE,OAAO,EAAE,KAAK,WAAW,EAA0B,KAAK,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAE9F,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,MAAM,MAAM,mBAAmB,CAC3B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,YAAY,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE7D,MAAM,WAAW,qBAAqB,CAClC,gBAAgB,GAAG,UAAU,CAAC,KAAK,CAAC,EACpC,eAAe,SAAS,sBAAsB,GAAG,sBAAsB,GAAG,gBAAgB,EAC1F,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,kBAAkB,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,EAAE,gBAAgB,EAAE,eAAe,CAAC;CAAG;AAE9G,MAAM,MAAM,WAAW,CACnB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,gBAAgB,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEjE,MAAM,WAAW,sBAAsB,CACnC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC;IACrD;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IAEb;;;OAGG;IACH,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC;IAEtB;;;;;;;;;;;OAWG;IACH,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErE;;;;;;;;;;;;;OAaG;IACH,gBAAgB,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IAE9E;;OAEG;IACH,YAAY,CAAC,OAAO,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAAC;CAChF;AAED,MAAM,MAAM,qBAAqB,CAC7B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,cAAc,CAAC,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE/D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4EG;AACH,qBAAa,cAAc,CACvB,gBAAgB,GAAG,UAAU,CAAC,KAAK,CAAC,EACpC,eAAe,SAAS,sBAAsB,GAAG,sBAAsB,GAAG,gBAAgB,CAC5F,SAAQ,WAAW,CAAC,sBAAsB,EAAE,gBAAgB,EAAE,eAAe,CAAC;IAC5E;;OAEG;gBACS,OAAO,CAAC,EAAE,qBAAqB,CAAC,gBAAgB,EAAE,eAAe,CAAC;YAYhE,YAAY;YAgBZ,UAAU;CA8B3B;AAED,UAAU,2BAA2B;IACjC,OAAO,CAAC,EAAE,mBAAmB,CAAC;IAC9B,CAAC,EAAE,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAC7B,YAAY,EAAE,eAAe,CAAC;IAC9B,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;IAC1C,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,UAAU,gCAAgC;IACtC,YAAY,EAAE,oBAAoB,CAAC,cAAc,CAAC,CAAC;IACnD,OAAO,CAAC,EAAE,mBAAmB,CAAC;IAC9B,CAAC,EAAE,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAC7B,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AASD,gBAAgB;AAChB,wBAAsB,0BAA0B,CAC5C,OAAO,EAAE,2BAA2B,GAAG,gCAAgC,oBAmC1E;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,mBAAmB,CAC/B,OAAO,SAAS,sBAAsB,GAAG,sBAAsB,EAC/D,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,kDAEzC"}
|
|
@@ -83,7 +83,7 @@ export class CheerioCrawler extends HttpCrawler {
|
|
|
83
83
|
/**
|
|
84
84
|
* All `CheerioCrawler` parameters are passed via an options object.
|
|
85
85
|
*/
|
|
86
|
-
constructor(options
|
|
86
|
+
constructor(options) {
|
|
87
87
|
super({
|
|
88
88
|
...options,
|
|
89
89
|
contextPipelineBuilder: () => this.buildContextPipeline()
|
|
@@ -91,7 +91,7 @@ export class CheerioCrawler extends HttpCrawler {
|
|
|
91
91
|
action: async (context) => await this.parseContent(context),
|
|
92
92
|
})
|
|
93
93
|
.compose({ action: async (context) => await this.addHelpers(context) }),
|
|
94
|
-
}
|
|
94
|
+
});
|
|
95
95
|
}
|
|
96
96
|
async parseContent(crawlingContext) {
|
|
97
97
|
const isXml = crawlingContext.contentType.type.includes('xml');
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cheerio-crawler.js","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"cheerio-crawler.js","sourceRoot":"","sources":["../../src/internals/cheerio-crawler.ts"],"names":[],"mappings":"AAaA,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,sCAAsC,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAE1G,OAAO,EAAoB,sBAAsB,EAAsB,MAAM,gBAAgB,CAAC;AAE9F,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AA2E5C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4EG;AACH,MAAM,OAAO,cAGX,SAAQ,WAAsE;IAC5E;;OAEG;IACH,YAAY,OAAkE;QAC1E,KAAK,CAAC;YACF,GAAG,OAAO;YACV,sBAAsB,EAAE,GAAG,EAAE,CACzB,IAAI,CAAC,oBAAoB,EAAE;iBACtB,OAAO,CAAC;gBACL,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC,MAAM,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC;aAC9D,CAAC;iBACD,OAAO,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC,MAAM,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;SAClF,CAAC,CAAC;IACP,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,eAA4C;QACnE,MAAM,KAAK,GAAG,eAAe,CAAC,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;QAC/D,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,eAAe,CAAC,IAAI,CAAC;YAC9C,CAAC,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC,eAAe,CAAC,WAAW,CAAC,QAAQ,CAAC;YACrE,CAAC,CAAC,eAAe,CAAC,IAAI,CAAC;QAC3B,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QAC1E,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE;YACxB,GAAG,EAAE,EAAE,cAAc,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE;SAC9B,CAAC,CAAC;QAErB,OAAO;YACH,CAAC;YACD,IAAI;SACP,CAAC;IACN,CAAC;IAEO,KAAK,CAAC,UAAU,CAAC,eAAgE;QACrF,MAAM,oBAAoB,GAAG,eAAe,CAAC,YAAY,CAAC;QAE1D,OAAO;YACH,YAAY,EAAE,KAAK,EAAE,cAAoC,EAAE,EAAE;gBACzD,OAAO,CAAC,MAAM,0BAA0B,CAAC;oBACrC,OAAO,EAAE,EAAE,GAAG,cAAc,EAAE,KAAK,EAAE,IAAI,CAAC,6BAA6B,CAAC,cAAc,EAAE,KAAK,CAAC,EAAE;oBAChG,CAAC,EAAE,eAAe,CAAC,CAAC;oBACpB,YAAY,EAAE,MAAM,IAAI,CAAC,eAAe,EAAE;oBAC1C,aAAa,EAAE,MAAM,IAAI,CAAC,sBAAsB,CAAC,eAAe,CAAC,OAAO,CAAC,GAAG,CAAC;oBAC7E,gBAAgB,EAAE,IAAI,CAAC,oBAAoB;oBAC3C,kBAAkB,EAAE,eAAe,CAAC,OAAO,CAAC,GAAG;oBAC/C,eAAe,EAAE,eAAe,CAAC,OAAO,CAAC,SAAS;oBAClD,YAAY,EAAE,oBAAoB;iBACrC,CAAC,CAA2B,CAAC,CAAC,2BAA2B;YAC9D,CAAC;YACD,eAAe,EAAE,KAAK,EAAE,QAAgB,EAAE,UAAmB,EAAE,EAAE;gBAC7D,IAAI,eAAe,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBACjD,MAAM,IAAI,KAAK,CAAC,aAAa,QAAQ,cAAc,CAAC,CAAC;gBACzD,CAAC;YACL,CAAC;YACD,gBAAgB,EAAE,KAAK,EAAE,QAAiB,EAAE,SAAkB,EAAE,EAAE;gBAC9D,IAAI,QAAQ,EAAE,CAAC;oBACX,MAAM,eAAe,CAAC,eAAe,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;gBAC/D,CAAC;gBAED,OAAO,eAAe,CAAC,CAAC,CAAC;YAC7B,CAAC;SACJ,CAAC;IACN,CAAC;CACJ;AAoBD,gBAAgB;AAChB,SAAS,oBAAoB,CACzB,OAAuE;IAEvE,OAAO,CAAC,CAAE,OAA4C,CAAC,YAAY,CAAC;AACxE,CAAC;AAED,gBAAgB;AAChB,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAC5C,OAAuE;IAEvE,MAAM,EAAE,OAAO,EAAE,mBAAmB,EAAE,CAAC,EAAE,kBAAkB,EAAE,eAAe,EAAE,GAAG,OAAO,CAAC;IACzF,IAAI,CAAC,CAAC,EAAE,CAAC;QACL,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,OAAO,GAAG,sCAAsC,CAAC;QACnD,eAAe,EAAE,mBAAmB,EAAE,QAAQ;QAC9C,eAAe;QACf,kBAAkB;QAClB,mBAAmB,EAAE,mBAAmB,EAAE,OAAO;KACpD,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,sBAAsB,CAC/B,CAAC,EACD,mBAAmB,EAAE,QAAQ,IAAI,GAAG,EACpC,mBAAmB,EAAE,OAAO,IAAI,eAAe,IAAI,kBAAkB,CACxE,CAAC;IAEF,IAAI,oBAAoB,CAAC,OAAO,CAAC,EAAE,CAAC;QAChC,OAAO,OAAO,CAAC,YAAY,CAAC;YACxB,IAAI;YACJ,OAAO;YACP,GAAG,mBAAmB;SACzB,CAAC,CAAC;IACP,CAAC;IACD,OAAO,YAAY,CAAC;QAChB,YAAY,EAAE,OAAO,CAAC,YAAY;QAClC,aAAa,EAAE,OAAO,CAAC,aAAa;QACpC,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;QAC1C,IAAI;QACJ,OAAO;QACP,GAAG,mBAAmB;KACzB,CAAC,CAAC;AACP,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,UAAU,mBAAmB,CAGjC,MAAwC;IACtC,OAAO,MAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/cheerio",
|
|
3
|
-
"version": "4.0.0-beta.
|
|
3
|
+
"version": "4.0.0-beta.29",
|
|
4
4
|
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=22.0.0"
|
|
@@ -47,9 +47,9 @@
|
|
|
47
47
|
"access": "public"
|
|
48
48
|
},
|
|
49
49
|
"dependencies": {
|
|
50
|
-
"@crawlee/http": "4.0.0-beta.
|
|
51
|
-
"@crawlee/types": "4.0.0-beta.
|
|
52
|
-
"@crawlee/utils": "4.0.0-beta.
|
|
50
|
+
"@crawlee/http": "4.0.0-beta.29",
|
|
51
|
+
"@crawlee/types": "4.0.0-beta.29",
|
|
52
|
+
"@crawlee/utils": "4.0.0-beta.29",
|
|
53
53
|
"cheerio": "^1.0.0",
|
|
54
54
|
"htmlparser2": "^10.0.0",
|
|
55
55
|
"tslib": "^2.8.1"
|
|
@@ -61,5 +61,5 @@
|
|
|
61
61
|
}
|
|
62
62
|
}
|
|
63
63
|
},
|
|
64
|
-
"gitHead": "
|
|
64
|
+
"gitHead": "827042195782864fca26dcdf809aec4b4aa6bd06"
|
|
65
65
|
}
|