@crawlee/linkedom 3.13.8-beta.2 → 3.13.8-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -95,11 +95,11 @@ class LinkeDOMCrawler extends http_1.HttpCrawler {
|
|
|
95
95
|
},
|
|
96
96
|
enqueueLinks: async (enqueueOptions) => {
|
|
97
97
|
return linkedomCrawlerEnqueueLinks({
|
|
98
|
-
options: enqueueOptions,
|
|
98
|
+
options: { ...enqueueOptions, limit: this.calculateEnqueuedRequestLimit(enqueueOptions?.limit) },
|
|
99
99
|
window: document.defaultView,
|
|
100
100
|
requestQueue: await this.getRequestQueue(),
|
|
101
101
|
robotsTxtFile: await this.getRobotsTxtFileForUrl(crawlingContext.request.url),
|
|
102
|
-
onSkippedRequest: this.
|
|
102
|
+
onSkippedRequest: this.handleSkippedRequest,
|
|
103
103
|
originalRequestUrl: crawlingContext.request.url,
|
|
104
104
|
finalRequestUrl: crawlingContext.request.loadedUrl,
|
|
105
105
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"linkedom-crawler.js","sourceRoot":"","sources":["../../src/internals/linkedom-crawler.ts"],"names":[],"mappings":";;;AA8OA,kEAkCC;AA2CD,oDAKC;;AAlTD,wCAMuB;AAEvB,0CAA6E;AAC7E,yDAAmC;AACnC,mHAAmH;AACnH,4CAA4C;AAE5C,gDAAwD;AAkExD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqEG;AAEH,MAAa,eAAgB,SAAQ,kBAAoC;IAGlD,KAAK,CAAC,UAAU,CAC/B,QAAyB,EACzB,KAAc,EACd,eAAwC;QAExC,MAAM,IAAI,GAAG,MAAM,IAAA,gCAAoB,EAAC,QAAQ,CAAC,CAAC;QAElD,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;QAE3G,OAAO;YACH,MAAM,EAAE,QAAQ,CAAC,WAAW;YAC5B,IAAI,IAAI;gBACJ,OAAO,QAAQ,CAAC,eAAe,CAAC,SAAS,CAAC;YAC9C,CAAC;YACD,IAAI,QAAQ;gBACR,iEAAiE;gBACjE,OAAO,QAA+B,CAAC;YAC3C,CAAC;YACD,YAAY,EAAE,KAAK,EAAE,cAAmD,EAAE,EAAE;gBACxE,OAAO,2BAA2B,CAAC;oBAC/B,OAAO,EAAE,cAAc;
|
|
1
|
+
{"version":3,"file":"linkedom-crawler.js","sourceRoot":"","sources":["../../src/internals/linkedom-crawler.ts"],"names":[],"mappings":";;;AA8OA,kEAkCC;AA2CD,oDAKC;;AAlTD,wCAMuB;AAEvB,0CAA6E;AAC7E,yDAAmC;AACnC,mHAAmH;AACnH,4CAA4C;AAE5C,gDAAwD;AAkExD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqEG;AAEH,MAAa,eAAgB,SAAQ,kBAAoC;IAGlD,KAAK,CAAC,UAAU,CAC/B,QAAyB,EACzB,KAAc,EACd,eAAwC;QAExC,MAAM,IAAI,GAAG,MAAM,IAAA,gCAAoB,EAAC,QAAQ,CAAC,CAAC;QAElD,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;QAE3G,OAAO;YACH,MAAM,EAAE,QAAQ,CAAC,WAAW;YAC5B,IAAI,IAAI;gBACJ,OAAO,QAAQ,CAAC,eAAe,CAAC,SAAS,CAAC;YAC9C,CAAC;YACD,IAAI,QAAQ;gBACR,iEAAiE;gBACjE,OAAO,QAA+B,CAAC;YAC3C,CAAC;YACD,YAAY,EAAE,KAAK,EAAE,cAAmD,EAAE,EAAE;gBACxE,OAAO,2BAA2B,CAAC;oBAC/B,OAAO,EAAE,EAAE,GAAG,cAAc,EAAE,KAAK,EAAE,IAAI,CAAC,6BAA6B,CAAC,cAAc,EAAE,KAAK,CAAC,EAAE;oBAChG,MAAM,EAAE,QAAQ,CAAC,WAAW;oBAC5B,YAAY,EAAE,MAAM,IAAI,CAAC,eAAe,EAAE;oBAC1C,aAAa,EAAE,MAAM,IAAI,CAAC,sBAAsB,CAAC,eAAe,CAAC,OAAO,CAAC,GAAG,CAAC;oBAC7E,gBAAgB,EAAE,IAAI,CAAC,oBAAoB;oBAC3C,kBAAkB,EAAE,eAAe,CAAC,OAAO,CAAC,GAAG;oBAC/C,eAAe,EAAE,eAAe,CAAC,OAAO,CAAC,SAAS;iBACrD,CAAC,CAAC;YACP,CAAC;SACJ,CAAC;IACN,CAAC;IAEQ,KAAK,CAAC,kBAAkB,CAAC,OAAgC;QAC9D,OAAO,CAAC,eAAe,GAAG,KAAK,EAAE,QAAgB,EAAE,SAAS,GAAG,IAAK,EAAE,EAAE;YACpE,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YAErC,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACjC,IAAI,SAAS,EAAE,CAAC;oBACZ,MAAM,IAAA,aAAK,EAAC,EAAE,CAAC,CAAC;oBAChB,MAAM,OAAO,CAAC,eAAe,CAAC,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,SAAS,GAAG,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;oBACrE,OAAO;gBACX,CAAC;gBAED,MAAM,IAAI,KAAK,CAAC,aAAa,QAAQ,cAAc,CAAC,CAAC;YACzD,CAAC;QACL,CAAC,CAAC;QACF,OAAO,CAAC,gBAAgB,GAAG,KAAK,EAAE,QAAiB,EAAE,UAAU,GAAG,IAAK,EAAE,EAAE;YACvE,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YAErC,IAAI,QAAQ,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC7C,MAAM,IAAI,KAAK,CAAC,aAAa,QAAQ,cAAc,CAAC,CAAC;YACzD,CAAC;YAED,OAAO,CAAC,CAAC;QACb,CAAC,CAAC;QAEF,MAAM,KAAK,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAC5C,CAAC;;AA5DL,0CA6DC;AA5DkB;;;;WAAS,IAAI,kBAAS,EAAE;GAAC;AAwE5C,gBAAgB;AACT,KAAK,UAAU,2BAA2B,CAAC,EAC9C,OAAO,EACP,MAAM,EACN,YAAY,EACZ,aAAa,EACb,gBAAgB,EAChB,kBAAkB,EAClB,eAAe,GACW;IAC1B,IAAI,CAAC,MAAM,EAAE,CAAC;QACV,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,OAAO,GAAG,IAAA,6CAAsC,EAAC;QACnD,eAAe,EAAE,OAAO,EAAE,QAAQ;QAClC,eAAe;QACf,kBAAkB;QAClB,mBAAmB,EAAE,OAAO,EAAE,OAAO;KACxC,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,qBAAqB,CAC9B,MAAM,EACN,OAAO,EAAE,QAAQ,IAAI,GAAG,EACxB,OAAO,EAAE,OAAO,IAAI,eAAe,IAAI,kBAAkB,CAC5D,CAAC;IAEF,OAAO,IAAA,mBAAY,EAAC;QAChB,YAAY;QACZ,aAAa;QACb,gBAAgB;QAChB,IAAI;QACJ,OAAO;QACP,GAAG,OAAO;KACb,CAAC,CAAC;AACP,CAAC;AAED;;;GAGG;AACH,SAAS,qBAAqB,CAAC,MAAc,EAAE,QAAgB,EAAE,OAAe;IAC5E,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;SACxD,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;SACvB,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,EAAE,CAAC;SACnD,GAAG,CAAC,CAAC,IAAwB,EAAE,EAAE;QAC9B,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACrB,OAAO,SAAS,CAAC;QACrB,CAAC;QACD,OAAO,IAAA,qBAAc,EAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACzC,CAAC,CAAC;SACD,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,EAAE,CAAa,CAAC;AACzE,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,SAAgB,oBAAoB,CAGlC,MAAwC;IACtC,OAAO,aAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/linkedom",
|
|
3
|
-
"version": "3.13.8-beta.
|
|
3
|
+
"version": "3.13.8-beta.3",
|
|
4
4
|
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=16.0.0"
|
|
@@ -55,8 +55,8 @@
|
|
|
55
55
|
"dependencies": {
|
|
56
56
|
"@apify/timeout": "^0.3.0",
|
|
57
57
|
"@apify/utilities": "^2.7.10",
|
|
58
|
-
"@crawlee/http": "3.13.8-beta.
|
|
59
|
-
"@crawlee/types": "3.13.8-beta.
|
|
58
|
+
"@crawlee/http": "3.13.8-beta.3",
|
|
59
|
+
"@crawlee/types": "3.13.8-beta.3",
|
|
60
60
|
"linkedom": "^0.18.0",
|
|
61
61
|
"ow": "^0.28.2",
|
|
62
62
|
"tslib": "^2.4.0"
|
|
@@ -68,5 +68,5 @@
|
|
|
68
68
|
}
|
|
69
69
|
}
|
|
70
70
|
},
|
|
71
|
-
"gitHead": "
|
|
71
|
+
"gitHead": "267fdd451657f67fd1ae4254e78fa16ddcc44a91"
|
|
72
72
|
}
|