@crawlee/linkedom 3.10.6-beta.2 → 3.10.6-beta.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -16,17 +16,6 @@
|
|
|
16
16
|
<a href="https://github.com/apify/crawlee/actions/workflows/test-ci.yml"><img src="https://github.com/apify/crawlee/actions/workflows/test-ci.yml/badge.svg?branch=master" alt="Build Status" style="max-width: 100%;"></a>
|
|
17
17
|
</p>
|
|
18
18
|
|
|
19
|
-
<h1 align="center">
|
|
20
|
-
<a href="https://apify.com/resources/scraping-with-crawlee">
|
|
21
|
-
<picture>
|
|
22
|
-
<source media="(prefers-color-scheme: dark)" srcset="https://cdn-cms.apify.com/Scraping_with_Crawlee_101_de11e9de45.png">
|
|
23
|
-
<img alt="Crawlee" src="https://cdn-cms.apify.com/Scraping_with_Crawlee_101_de11e9de45.png" width="500">
|
|
24
|
-
</picture>
|
|
25
|
-
</a>
|
|
26
|
-
<br>
|
|
27
|
-
<small>Join us for a free webinar about Crawlee at <a href="https://apify.com/resources/scraping-with-crawlee"> 9 AM EST on June 12th, 2024</a> to learn how to build reliable web scrapers fast!</small>
|
|
28
|
-
</h1>
|
|
29
|
-
|
|
30
19
|
Crawlee covers your crawling and scraping end-to-end and **helps you build reliable scrapers. Fast.**
|
|
31
20
|
|
|
32
21
|
Your crawlers will appear human-like and fly under the radar of modern bot protections even with the default configuration. Crawlee gives you the tools to crawl the web for links, scrape data, and store it to disk or cloud while staying configurable to suit your project's needs.
|
|
@@ -1,9 +1,3 @@
|
|
|
1
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
2
|
-
/// <reference types="node" />
|
|
3
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
4
|
-
/// <reference types="node/http" />
|
|
5
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
6
|
-
/// <reference types="node_modules/got-scraping/node_modules/got/dist/source/core/timed-out" />
|
|
7
1
|
import type { IncomingMessage } from 'http';
|
|
8
2
|
import type { HttpCrawlerOptions, InternalHttpCrawlingContext, InternalHttpHook, ErrorHandler, RequestHandler, EnqueueLinksOptions, GetUserDataFromRequest, RouterRoutes, RequestProvider } from '@crawlee/http';
|
|
9
3
|
import { HttpCrawler } from '@crawlee/http';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"linkedom-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/linkedom-crawler.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"linkedom-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/linkedom-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,MAAM,CAAC;AAG5C,OAAO,KAAK,EACR,kBAAkB,EAClB,2BAA2B,EAC3B,gBAAgB,EAChB,YAAY,EACZ,cAAc,EACd,mBAAmB,EACnB,sBAAsB,EACtB,YAAY,EACZ,eAAe,EAClB,MAAM,eAAe,CAAC;AACvB,OAAO,EACH,WAAW,EAKd,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,KAAK,WAAW,EAAS,MAAM,gBAAgB,CAAC;AAKzD,MAAM,MAAM,oBAAoB,CAC5B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,YAAY,CAAC,uBAAuB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE9D,MAAM,WAAW,sBAAsB,CACnC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,kBAAkB,CAAC,uBAAuB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;CAAG;AAE5E,MAAM,WAAW,kCAAmC,SAAQ,IAAI,CAAC,mBAAmB,EAAE,MAAM,GAAG,cAAc,CAAC;CAAG;AAEjH,MAAM,MAAM,YAAY,CACpB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,gBAAgB,CAAC,uBAAuB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAElE,MAAM,WAAW,uBAAuB,CACpC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,EAAE,eAAe,CAAC;IACtE,MAAM,EAAE,MAAM,CAAC;IAMf,QAAQ,EAAE,QAAQ,CAAC;IAEnB;;;;;;;;;;;;OAYG;IACH,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErE;;;;;;;;;;;OAWG;IACH,gBAAgB,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;CACjF;AAED,MAAM,MAAM,sBAAsB,CAC9B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,cAAc,CAAC,uBAAuB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEhE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqEG;AAEH,qBAAa,eAAgB,SAAQ,WAAW,CAAC,uBAAuB,CAAC;IACrE,OAAO,CAAC,MAAM,CAAC,MAAM,CAAmB;cAEf,UAAU,CAC/B,QAAQ,EAAE,eAAe,EACzB,KAAK,EAAE,OAAO,EACd,eAAe,EAAE,uBAAuB;;;2BAaF,QAAQ;wCAEJ,kCAAkC;;IAYjE,kBAAkB,CAAC,OAAO,EAAE,uBAAuB;CAyBrE;AAED,UAAU,2BAA2B;IACjC,OAAO,CAAC,EAAE,kCAAkC,CAAC;IAC7C,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,YAAY,EAAE,eAAe,CAAC;IAC9B,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,gBAAgB;AAChB,wBAAsB,2BAA2B,CAAC,EAC9C,OAAO,EACP,MAAM,EACN,YAAY,EACZ,kBAAkB,EAClB,eAAe,GAClB,EAAE,2BAA2B,4DAwB7B;AAmBD;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,oBAAoB,CAChC,OAAO,SAAS,uBAAuB,GAAG,uBAAuB,EACjE,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,kDAEzC"}
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.
|
|
3
|
+
exports.LinkeDOMCrawler = void 0;
|
|
4
|
+
exports.linkedomCrawlerEnqueueLinks = linkedomCrawlerEnqueueLinks;
|
|
5
|
+
exports.createLinkeDOMRouter = createLinkeDOMRouter;
|
|
4
6
|
const tslib_1 = require("tslib");
|
|
5
7
|
const utilities_1 = require("@apify/utilities");
|
|
6
8
|
const http_1 = require("@crawlee/http");
|
|
@@ -149,7 +151,6 @@ async function linkedomCrawlerEnqueueLinks({ options, window, requestQueue, orig
|
|
|
149
151
|
...options,
|
|
150
152
|
});
|
|
151
153
|
}
|
|
152
|
-
exports.linkedomCrawlerEnqueueLinks = linkedomCrawlerEnqueueLinks;
|
|
153
154
|
/**
|
|
154
155
|
* Extracts URLs from a given Window object.
|
|
155
156
|
* @ignore
|
|
@@ -193,5 +194,4 @@ function extractUrlsFromWindow(window, selector, baseUrl) {
|
|
|
193
194
|
function createLinkeDOMRouter(routes) {
|
|
194
195
|
return http_1.Router.create(routes);
|
|
195
196
|
}
|
|
196
|
-
exports.createLinkeDOMRouter = createLinkeDOMRouter;
|
|
197
197
|
//# sourceMappingURL=linkedom-crawler.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"linkedom-crawler.js","sourceRoot":"","sources":["../../src/internals/linkedom-crawler.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"linkedom-crawler.js","sourceRoot":"","sources":["../../src/internals/linkedom-crawler.ts"],"names":[],"mappings":";;;AAuOA,kEA8BC;AA2CD,oDAKC;;AAnTD,gDAAwD;AAYxD,wCAMuB;AAEvB,0CAAyD;AACzD,yDAAmC;AACnC,mHAAmH;AACnH,4CAA4C;AAkE5C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqEG;AAEH,MAAa,eAAgB,SAAQ,kBAAoC;IAGlD,KAAK,CAAC,UAAU,CAC/B,QAAyB,EACzB,KAAc,EACd,eAAwC;QAExC,MAAM,IAAI,GAAG,MAAM,IAAA,gCAAoB,EAAC,QAAQ,CAAC,CAAC;QAElD,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;QAE3G,OAAO;YACH,MAAM,EAAE,QAAQ,CAAC,WAAW;YAC5B,IAAI,IAAI;gBACJ,OAAO,QAAQ,CAAC,eAAe,CAAC,SAAS,CAAC;YAC9C,CAAC;YACD,IAAI,QAAQ;gBACR,iEAAiE;gBACjE,OAAO,QAA+B,CAAC;YAC3C,CAAC;YACD,YAAY,EAAE,KAAK,EAAE,cAAmD,EAAE,EAAE;gBACxE,OAAO,2BAA2B,CAAC;oBAC/B,OAAO,EAAE,cAAc;oBACvB,MAAM,EAAE,QAAQ,CAAC,WAAW;oBAC5B,YAAY,EAAE,MAAM,IAAI,CAAC,eAAe,EAAE;oBAC1C,kBAAkB,EAAE,eAAe,CAAC,OAAO,CAAC,GAAG;oBAC/C,eAAe,EAAE,eAAe,CAAC,OAAO,CAAC,SAAS;iBACrD,CAAC,CAAC;YACP,CAAC;SACJ,CAAC;IACN,CAAC;IAEQ,KAAK,CAAC,kBAAkB,CAAC,OAAgC;QAC9D,OAAO,CAAC,eAAe,GAAG,KAAK,EAAE,QAAgB,EAAE,SAAS,GAAG,IAAK,EAAE,EAAE;YACpE,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YAErC,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACjC,IAAI,SAAS,EAAE,CAAC;oBACZ,MAAM,IAAA,aAAK,EAAC,EAAE,CAAC,CAAC;oBAChB,OAAO,OAAO,CAAC,eAAe,CAAC,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,SAAS,GAAG,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;gBAC1E,CAAC;gBAED,MAAM,IAAI,KAAK,CAAC,aAAa,QAAQ,cAAc,CAAC,CAAC;YACzD,CAAC;QACL,CAAC,CAAC;QACF,OAAO,CAAC,gBAAgB,GAAG,KAAK,EAAE,QAAiB,EAAE,UAAU,GAAG,IAAK,EAAE,EAAE;YACvE,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YAErC,IAAI,QAAQ,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC7C,MAAM,IAAI,KAAK,CAAC,aAAa,QAAQ,cAAc,CAAC,CAAC;YACzD,CAAC;YAED,OAAO,CAAC,CAAC;QACb,CAAC,CAAC;QAEF,MAAM,KAAK,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAC5C,CAAC;;AAzDL,0CA0DC;AAzDkB;;;;WAAS,IAAI,kBAAS,EAAE;GAAC;AAmE5C,gBAAgB;AACT,KAAK,UAAU,2BAA2B,CAAC,EAC9C,OAAO,EACP,MAAM,EACN,YAAY,EACZ,kBAAkB,EAClB,eAAe,GACW;IAC1B,IAAI,CAAC,MAAM,EAAE,CAAC;QACV,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,OAAO,GAAG,IAAA,6CAAsC,EAAC;QACnD,eAAe,EAAE,OAAO,EAAE,QAAQ;QAClC,eAAe;QACf,kBAAkB;QAClB,mBAAmB,EAAE,OAAO,EAAE,OAAO;KACxC,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,qBAAqB,CAC9B,MAAM,EACN,OAAO,EAAE,QAAQ,IAAI,GAAG,EACxB,OAAO,EAAE,OAAO,IAAI,eAAe,IAAI,kBAAkB,CAC5D,CAAC;IAEF,OAAO,IAAA,mBAAY,EAAC;QAChB,YAAY;QACZ,IAAI;QACJ,OAAO;QACP,GAAG,OAAO;KACb,CAAC,CAAC;AACP,CAAC;AAED;;;GAGG;AACH,SAAS,qBAAqB,CAAC,MAAc,EAAE,QAAgB,EAAE,OAAe;IAC5E,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;SACxD,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;SACvB,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,EAAE,CAAC;SACnD,GAAG,CAAC,CAAC,IAAwB,EAAE,EAAE;QAC9B,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACrB,OAAO,SAAS,CAAC;QACrB,CAAC;QACD,OAAO,IAAA,qBAAc,EAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACzC,CAAC,CAAC;SACD,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,EAAE,CAAa,CAAC;AACzE,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,SAAgB,oBAAoB,CAGlC,MAAwC;IACtC,OAAO,aAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/linkedom",
|
|
3
|
-
"version": "3.10.6-beta.
|
|
3
|
+
"version": "3.10.6-beta.20",
|
|
4
4
|
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=16.0.0"
|
|
@@ -55,8 +55,8 @@
|
|
|
55
55
|
"dependencies": {
|
|
56
56
|
"@apify/timeout": "^0.3.0",
|
|
57
57
|
"@apify/utilities": "^2.7.10",
|
|
58
|
-
"@crawlee/http": "3.10.6-beta.
|
|
59
|
-
"@crawlee/types": "3.10.6-beta.
|
|
58
|
+
"@crawlee/http": "3.10.6-beta.20",
|
|
59
|
+
"@crawlee/types": "3.10.6-beta.20",
|
|
60
60
|
"linkedom": "^0.18.0",
|
|
61
61
|
"ow": "^0.28.2",
|
|
62
62
|
"tslib": "^2.4.0"
|
|
@@ -68,5 +68,5 @@
|
|
|
68
68
|
}
|
|
69
69
|
}
|
|
70
70
|
},
|
|
71
|
-
"gitHead": "
|
|
71
|
+
"gitHead": "727094022a36c55e97f9444c88050e9e7c05933e"
|
|
72
72
|
}
|