@crawlee/linkedom 3.13.3-beta.8 → 3.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,7 +31,7 @@ JSONData extends Dictionary = any> extends InternalHttpCrawlingContext<UserData,
31
31
  */
32
32
  waitForSelector(selector: string, timeoutMs?: number): Promise<void>;
33
33
  /**
34
- * Returns Cheerio handle, allowing to work with the data same way as with {@apilink CheerioCrawler}.
34
+ * Returns Cheerio handle, allowing to work with the data same way as with {@link CheerioCrawler}.
35
35
  * When provided with the `selector` argument, it will first look for the selector with a 5s timeout.
36
36
  *
37
37
  * **Example usage:**
@@ -54,7 +54,7 @@ JSONData extends Dictionary = any> = RequestHandler<LinkeDOMCrawlingContext<User
54
54
  *
55
55
  * Since `LinkeDOMCrawler` uses raw HTTP requests to download web pages,
56
56
  * it is very fast and efficient on data bandwidth. However, if the target website requires JavaScript
57
- * to display the content, you might need to use {@apilink PuppeteerCrawler} or {@apilink PlaywrightCrawler} instead,
57
+ * to display the content, you might need to use {@link PuppeteerCrawler} or {@link PlaywrightCrawler} instead,
58
58
  * because it loads the pages using full-featured headless Chrome browser.
59
59
  *
60
60
  * **Limitation**:
@@ -62,18 +62,18 @@ JSONData extends Dictionary = any> = RequestHandler<LinkeDOMCrawlingContext<User
62
62
  *
63
63
  * `LinkeDOMCrawler` downloads each URL using a plain HTTP request,
64
64
  * parses the HTML content using [LinkeDOM](https://www.npmjs.com/package/linkedom)
65
- * and then invokes the user-provided {@apilink LinkeDOMCrawlerOptions.requestHandler} to extract page data
65
+ * and then invokes the user-provided {@link LinkeDOMCrawlerOptions.requestHandler} to extract page data
66
66
  * using the `window` object.
67
67
  *
68
- * The source URLs are represented using {@apilink Request} objects that are fed from
69
- * {@apilink RequestList} or {@apilink RequestQueue} instances provided by the {@apilink LinkeDOMCrawlerOptions.requestList}
70
- * or {@apilink LinkeDOMCrawlerOptions.requestQueue} constructor options, respectively.
68
+ * The source URLs are represented using {@link Request} objects that are fed from
69
+ * {@link RequestList} or {@link RequestQueue} instances provided by the {@link LinkeDOMCrawlerOptions.requestList}
70
+ * or {@link LinkeDOMCrawlerOptions.requestQueue} constructor options, respectively.
71
71
  *
72
- * If both {@apilink LinkeDOMCrawlerOptions.requestList} and {@apilink LinkeDOMCrawlerOptions.requestQueue} are used,
73
- * the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them
74
- * to {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
72
+ * If both {@link LinkeDOMCrawlerOptions.requestList} and {@link LinkeDOMCrawlerOptions.requestQueue} are used,
73
+ * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
74
+ * to {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
75
75
  *
76
- * The crawler finishes when there are no more {@apilink Request} objects to crawl.
76
+ * The crawler finishes when there are no more {@link Request} objects to crawl.
77
77
  *
78
78
  * We can use the `preNavigationHooks` to adjust `gotOptions`:
79
79
  *
@@ -88,15 +88,15 @@ JSONData extends Dictionary = any> = RequestHandler<LinkeDOMCrawlingContext<User
88
88
  * By default, `LinkeDOMCrawler` only processes web pages with the `text/html`
89
89
  * and `application/xhtml+xml` MIME content types (as reported by the `Content-Type` HTTP header),
90
90
  * and skips pages with other content types. If you want the crawler to process other content types,
91
- * use the {@apilink LinkeDOMCrawlerOptions.additionalMimeTypes} constructor option.
91
+ * use the {@link LinkeDOMCrawlerOptions.additionalMimeTypes} constructor option.
92
92
  * Beware that the parsing behavior differs for HTML, XML, JSON and other types of content.
93
- * For more details, see {@apilink LinkeDOMCrawlerOptions.requestHandler}.
93
+ * For more details, see {@link LinkeDOMCrawlerOptions.requestHandler}.
94
94
  *
95
95
  * New requests are only dispatched when there is enough free CPU and memory available,
96
- * using the functionality provided by the {@apilink AutoscaledPool} class.
97
- * All {@apilink AutoscaledPool} configuration options can be passed to the `autoscaledPoolOptions`
96
+ * using the functionality provided by the {@link AutoscaledPool} class.
97
+ * All {@link AutoscaledPool} configuration options can be passed to the `autoscaledPoolOptions`
98
98
  * parameter of the `CheerioCrawler` constructor. For user convenience, the `minConcurrency` and `maxConcurrency`
99
- * {@apilink AutoscaledPool} options are available directly in the `CheerioCrawler` constructor.
99
+ * {@link AutoscaledPool} options are available directly in the `CheerioCrawler` constructor.
100
100
  *
101
101
  * **Example usage:**
102
102
  *
@@ -140,9 +140,9 @@ interface EnqueueLinksInternalOptions {
140
140
  // @ts-ignore optional peer dependency or compatibility with es2022
141
141
  export declare function linkedomCrawlerEnqueueLinks({ options, window, requestQueue, robotsTxtFile, onSkippedRequest, originalRequestUrl, finalRequestUrl, }: EnqueueLinksInternalOptions): Promise<import("@crawlee/types").BatchAddRequestsResult>;
142
142
  /**
143
- * Creates new {@apilink Router} instance that works based on request labels.
144
- * This instance can then serve as a `requestHandler` of your {@apilink LinkeDOMCrawler}.
145
- * Defaults to the {@apilink LinkeDOMCrawlingContext}.
143
+ * Creates new {@link Router} instance that works based on request labels.
144
+ * This instance can then serve as a `requestHandler` of your {@link LinkeDOMCrawler}.
145
+ * Defaults to the {@link LinkeDOMCrawlingContext}.
146
146
  *
147
147
  * > Serves as a shortcut for using `Router.create<LinkeDOMCrawlingContext>()`.
148
148
  *
@@ -18,7 +18,7 @@ const utilities_1 = require("@apify/utilities");
18
18
  *
19
19
  * Since `LinkeDOMCrawler` uses raw HTTP requests to download web pages,
20
20
  * it is very fast and efficient on data bandwidth. However, if the target website requires JavaScript
21
- * to display the content, you might need to use {@apilink PuppeteerCrawler} or {@apilink PlaywrightCrawler} instead,
21
+ * to display the content, you might need to use {@link PuppeteerCrawler} or {@link PlaywrightCrawler} instead,
22
22
  * because it loads the pages using full-featured headless Chrome browser.
23
23
  *
24
24
  * **Limitation**:
@@ -26,18 +26,18 @@ const utilities_1 = require("@apify/utilities");
26
26
  *
27
27
  * `LinkeDOMCrawler` downloads each URL using a plain HTTP request,
28
28
  * parses the HTML content using [LinkeDOM](https://www.npmjs.com/package/linkedom)
29
- * and then invokes the user-provided {@apilink LinkeDOMCrawlerOptions.requestHandler} to extract page data
29
+ * and then invokes the user-provided {@link LinkeDOMCrawlerOptions.requestHandler} to extract page data
30
30
  * using the `window` object.
31
31
  *
32
- * The source URLs are represented using {@apilink Request} objects that are fed from
33
- * {@apilink RequestList} or {@apilink RequestQueue} instances provided by the {@apilink LinkeDOMCrawlerOptions.requestList}
34
- * or {@apilink LinkeDOMCrawlerOptions.requestQueue} constructor options, respectively.
32
+ * The source URLs are represented using {@link Request} objects that are fed from
33
+ * {@link RequestList} or {@link RequestQueue} instances provided by the {@link LinkeDOMCrawlerOptions.requestList}
34
+ * or {@link LinkeDOMCrawlerOptions.requestQueue} constructor options, respectively.
35
35
  *
36
- * If both {@apilink LinkeDOMCrawlerOptions.requestList} and {@apilink LinkeDOMCrawlerOptions.requestQueue} are used,
37
- * the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them
38
- * to {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
36
+ * If both {@link LinkeDOMCrawlerOptions.requestList} and {@link LinkeDOMCrawlerOptions.requestQueue} are used,
37
+ * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
38
+ * to {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
39
39
  *
40
- * The crawler finishes when there are no more {@apilink Request} objects to crawl.
40
+ * The crawler finishes when there are no more {@link Request} objects to crawl.
41
41
  *
42
42
  * We can use the `preNavigationHooks` to adjust `gotOptions`:
43
43
  *
@@ -52,15 +52,15 @@ const utilities_1 = require("@apify/utilities");
52
52
  * By default, `LinkeDOMCrawler` only processes web pages with the `text/html`
53
53
  * and `application/xhtml+xml` MIME content types (as reported by the `Content-Type` HTTP header),
54
54
  * and skips pages with other content types. If you want the crawler to process other content types,
55
- * use the {@apilink LinkeDOMCrawlerOptions.additionalMimeTypes} constructor option.
55
+ * use the {@link LinkeDOMCrawlerOptions.additionalMimeTypes} constructor option.
56
56
  * Beware that the parsing behavior differs for HTML, XML, JSON and other types of content.
57
- * For more details, see {@apilink LinkeDOMCrawlerOptions.requestHandler}.
57
+ * For more details, see {@link LinkeDOMCrawlerOptions.requestHandler}.
58
58
  *
59
59
  * New requests are only dispatched when there is enough free CPU and memory available,
60
- * using the functionality provided by the {@apilink AutoscaledPool} class.
61
- * All {@apilink AutoscaledPool} configuration options can be passed to the `autoscaledPoolOptions`
60
+ * using the functionality provided by the {@link AutoscaledPool} class.
61
+ * All {@link AutoscaledPool} configuration options can be passed to the `autoscaledPoolOptions`
62
62
  * parameter of the `CheerioCrawler` constructor. For user convenience, the `minConcurrency` and `maxConcurrency`
63
- * {@apilink AutoscaledPool} options are available directly in the `CheerioCrawler` constructor.
63
+ * {@link AutoscaledPool} options are available directly in the `CheerioCrawler` constructor.
64
64
  *
65
65
  * **Example usage:**
66
66
  *
@@ -173,9 +173,9 @@ function extractUrlsFromWindow(window, selector, baseUrl) {
173
173
  .filter((href) => href !== undefined && href !== '');
174
174
  }
175
175
  /**
176
- * Creates new {@apilink Router} instance that works based on request labels.
177
- * This instance can then serve as a `requestHandler` of your {@apilink LinkeDOMCrawler}.
178
- * Defaults to the {@apilink LinkeDOMCrawlingContext}.
176
+ * Creates new {@link Router} instance that works based on request labels.
177
+ * This instance can then serve as a `requestHandler` of your {@link LinkeDOMCrawler}.
178
+ * Defaults to the {@link LinkeDOMCrawlingContext}.
179
179
  *
180
180
  * > Serves as a shortcut for using `Router.create<LinkeDOMCrawlingContext>()`.
181
181
  *
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@crawlee/linkedom",
3
- "version": "3.13.3-beta.8",
3
+ "version": "3.13.3",
4
4
  "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
5
5
  "engines": {
6
6
  "node": ">=16.0.0"
@@ -55,8 +55,8 @@
55
55
  "dependencies": {
56
56
  "@apify/timeout": "^0.3.0",
57
57
  "@apify/utilities": "^2.7.10",
58
- "@crawlee/http": "3.13.3-beta.8",
59
- "@crawlee/types": "3.13.3-beta.8",
58
+ "@crawlee/http": "3.13.3",
59
+ "@crawlee/types": "3.13.3",
60
60
  "linkedom": "^0.18.0",
61
61
  "ow": "^0.28.2",
62
62
  "tslib": "^2.4.0"
@@ -68,5 +68,5 @@
68
68
  }
69
69
  }
70
70
  },
71
- "gitHead": "0bcd58dd82d533da05a2bd7a524624fa441e7a71"
71
+ "gitHead": "279cadbd3cd6342f36cc4d841e07b999e472420d"
72
72
  }