apify 2.3.1-beta.4 → 3.0.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -5
- package/package.json +69 -128
- package/build/actor.d.ts +0 -113
- package/build/actor.d.ts.map +0 -1
- package/build/actor.js +0 -582
- package/build/actor.js.map +0 -1
- package/build/apify.d.ts +0 -752
- package/build/apify.d.ts.map +0 -1
- package/build/apify.js +0 -877
- package/build/apify.js.map +0 -1
- package/build/autoscaling/autoscaled_pool.d.ts +0 -384
- package/build/autoscaling/autoscaled_pool.d.ts.map +0 -1
- package/build/autoscaling/autoscaled_pool.js +0 -557
- package/build/autoscaling/autoscaled_pool.js.map +0 -1
- package/build/autoscaling/snapshotter.d.ts +0 -278
- package/build/autoscaling/snapshotter.d.ts.map +0 -1
- package/build/autoscaling/snapshotter.js +0 -447
- package/build/autoscaling/snapshotter.js.map +0 -1
- package/build/autoscaling/system_status.d.ts +0 -224
- package/build/autoscaling/system_status.d.ts.map +0 -1
- package/build/autoscaling/system_status.js +0 -228
- package/build/autoscaling/system_status.js.map +0 -1
- package/build/browser_launchers/browser_launcher.d.ts +0 -154
- package/build/browser_launchers/browser_launcher.d.ts.map +0 -1
- package/build/browser_launchers/browser_launcher.js +0 -160
- package/build/browser_launchers/browser_launcher.js.map +0 -1
- package/build/browser_launchers/browser_plugin.d.ts +0 -23
- package/build/browser_launchers/browser_plugin.d.ts.map +0 -1
- package/build/browser_launchers/browser_plugin.js +0 -25
- package/build/browser_launchers/browser_plugin.js.map +0 -1
- package/build/browser_launchers/playwright_launcher.d.ts +0 -131
- package/build/browser_launchers/playwright_launcher.d.ts.map +0 -1
- package/build/browser_launchers/playwright_launcher.js +0 -150
- package/build/browser_launchers/playwright_launcher.js.map +0 -1
- package/build/browser_launchers/puppeteer_launcher.d.ts +0 -153
- package/build/browser_launchers/puppeteer_launcher.d.ts.map +0 -1
- package/build/browser_launchers/puppeteer_launcher.js +0 -197
- package/build/browser_launchers/puppeteer_launcher.js.map +0 -1
- package/build/cache_container.d.ts +0 -31
- package/build/cache_container.d.ts.map +0 -1
- package/build/cache_container.js +0 -48
- package/build/cache_container.js.map +0 -1
- package/build/configuration.d.ts +0 -226
- package/build/configuration.d.ts.map +0 -1
- package/build/configuration.js +0 -325
- package/build/configuration.js.map +0 -1
- package/build/constants.d.ts +0 -37
- package/build/constants.d.ts.map +0 -1
- package/build/constants.js +0 -41
- package/build/constants.js.map +0 -1
- package/build/crawlers/basic_crawler.d.ts +0 -443
- package/build/crawlers/basic_crawler.d.ts.map +0 -1
- package/build/crawlers/basic_crawler.js +0 -664
- package/build/crawlers/basic_crawler.js.map +0 -1
- package/build/crawlers/browser_crawler.d.ts +0 -512
- package/build/crawlers/browser_crawler.d.ts.map +0 -1
- package/build/crawlers/browser_crawler.js +0 -540
- package/build/crawlers/browser_crawler.js.map +0 -1
- package/build/crawlers/cheerio_crawler.d.ts +0 -931
- package/build/crawlers/cheerio_crawler.d.ts.map +0 -1
- package/build/crawlers/cheerio_crawler.js +0 -913
- package/build/crawlers/cheerio_crawler.js.map +0 -1
- package/build/crawlers/crawler_extension.d.ts +0 -10
- package/build/crawlers/crawler_extension.d.ts.map +0 -1
- package/build/crawlers/crawler_extension.js +0 -19
- package/build/crawlers/crawler_extension.js.map +0 -1
- package/build/crawlers/crawler_utils.d.ts +0 -34
- package/build/crawlers/crawler_utils.d.ts.map +0 -1
- package/build/crawlers/crawler_utils.js +0 -87
- package/build/crawlers/crawler_utils.js.map +0 -1
- package/build/crawlers/playwright_crawler.d.ts +0 -448
- package/build/crawlers/playwright_crawler.d.ts.map +0 -1
- package/build/crawlers/playwright_crawler.js +0 -299
- package/build/crawlers/playwright_crawler.js.map +0 -1
- package/build/crawlers/puppeteer_crawler.d.ts +0 -425
- package/build/crawlers/puppeteer_crawler.d.ts.map +0 -1
- package/build/crawlers/puppeteer_crawler.js +0 -299
- package/build/crawlers/puppeteer_crawler.js.map +0 -1
- package/build/crawlers/statistics.d.ts +0 -185
- package/build/crawlers/statistics.d.ts.map +0 -1
- package/build/crawlers/statistics.js +0 -331
- package/build/crawlers/statistics.js.map +0 -1
- package/build/enqueue_links/click_elements.d.ts +0 -179
- package/build/enqueue_links/click_elements.d.ts.map +0 -1
- package/build/enqueue_links/click_elements.js +0 -434
- package/build/enqueue_links/click_elements.js.map +0 -1
- package/build/enqueue_links/enqueue_links.d.ts +0 -117
- package/build/enqueue_links/enqueue_links.d.ts.map +0 -1
- package/build/enqueue_links/enqueue_links.js +0 -163
- package/build/enqueue_links/enqueue_links.js.map +0 -1
- package/build/enqueue_links/shared.d.ts +0 -42
- package/build/enqueue_links/shared.d.ts.map +0 -1
- package/build/enqueue_links/shared.js +0 -121
- package/build/enqueue_links/shared.js.map +0 -1
- package/build/errors.d.ts +0 -29
- package/build/errors.d.ts.map +0 -1
- package/build/errors.js +0 -38
- package/build/errors.js.map +0 -1
- package/build/events.d.ts +0 -11
- package/build/events.d.ts.map +0 -1
- package/build/events.js +0 -147
- package/build/events.js.map +0 -1
- package/build/index.d.ts +0 -4
- package/build/index.d.ts.map +0 -1
- package/build/index.js +0 -7
- package/build/index.js.map +0 -1
- package/build/main.d.ts +0 -179
- package/build/main.d.ts.map +0 -1
- package/build/main.js +0 -81
- package/build/main.js.map +0 -1
- package/build/playwright_utils.d.ts +0 -9
- package/build/playwright_utils.d.ts.map +0 -1
- package/build/playwright_utils.js +0 -90
- package/build/playwright_utils.js.map +0 -1
- package/build/proxy_configuration.d.ts +0 -411
- package/build/proxy_configuration.d.ts.map +0 -1
- package/build/proxy_configuration.js +0 -517
- package/build/proxy_configuration.js.map +0 -1
- package/build/pseudo_url.d.ts +0 -86
- package/build/pseudo_url.d.ts.map +0 -1
- package/build/pseudo_url.js +0 -153
- package/build/pseudo_url.js.map +0 -1
- package/build/puppeteer_request_interception.d.ts +0 -8
- package/build/puppeteer_request_interception.d.ts.map +0 -1
- package/build/puppeteer_request_interception.js +0 -235
- package/build/puppeteer_request_interception.js.map +0 -1
- package/build/puppeteer_utils.d.ts +0 -250
- package/build/puppeteer_utils.d.ts.map +0 -1
- package/build/puppeteer_utils.js +0 -551
- package/build/puppeteer_utils.js.map +0 -1
- package/build/request.d.ts +0 -180
- package/build/request.d.ts.map +0 -1
- package/build/request.js +0 -261
- package/build/request.js.map +0 -1
- package/build/request_list.d.ts +0 -581
- package/build/request_list.d.ts.map +0 -1
- package/build/request_list.js +0 -826
- package/build/request_list.js.map +0 -1
- package/build/serialization.d.ts +0 -5
- package/build/serialization.d.ts.map +0 -1
- package/build/serialization.js +0 -139
- package/build/serialization.js.map +0 -1
- package/build/session_pool/errors.d.ts +0 -11
- package/build/session_pool/errors.d.ts.map +0 -1
- package/build/session_pool/errors.js +0 -18
- package/build/session_pool/errors.js.map +0 -1
- package/build/session_pool/events.d.ts +0 -5
- package/build/session_pool/events.d.ts.map +0 -1
- package/build/session_pool/events.js +0 -6
- package/build/session_pool/events.js.map +0 -1
- package/build/session_pool/session.d.ts +0 -286
- package/build/session_pool/session.d.ts.map +0 -1
- package/build/session_pool/session.js +0 -355
- package/build/session_pool/session.js.map +0 -1
- package/build/session_pool/session_pool.d.ts +0 -280
- package/build/session_pool/session_pool.d.ts.map +0 -1
- package/build/session_pool/session_pool.js +0 -393
- package/build/session_pool/session_pool.js.map +0 -1
- package/build/session_pool/session_utils.d.ts +0 -4
- package/build/session_pool/session_utils.d.ts.map +0 -1
- package/build/session_pool/session_utils.js +0 -24
- package/build/session_pool/session_utils.js.map +0 -1
- package/build/stealth/hiding_tricks.d.ts +0 -22
- package/build/stealth/hiding_tricks.d.ts.map +0 -1
- package/build/stealth/hiding_tricks.js +0 -308
- package/build/stealth/hiding_tricks.js.map +0 -1
- package/build/stealth/stealth.d.ts +0 -56
- package/build/stealth/stealth.d.ts.map +0 -1
- package/build/stealth/stealth.js +0 -125
- package/build/stealth/stealth.js.map +0 -1
- package/build/storages/dataset.d.ts +0 -288
- package/build/storages/dataset.d.ts.map +0 -1
- package/build/storages/dataset.js +0 -480
- package/build/storages/dataset.js.map +0 -1
- package/build/storages/key_value_store.d.ts +0 -243
- package/build/storages/key_value_store.d.ts.map +0 -1
- package/build/storages/key_value_store.js +0 -462
- package/build/storages/key_value_store.js.map +0 -1
- package/build/storages/request_queue.d.ts +0 -318
- package/build/storages/request_queue.d.ts.map +0 -1
- package/build/storages/request_queue.js +0 -636
- package/build/storages/request_queue.js.map +0 -1
- package/build/storages/storage_manager.d.ts +0 -87
- package/build/storages/storage_manager.d.ts.map +0 -1
- package/build/storages/storage_manager.js +0 -150
- package/build/storages/storage_manager.js.map +0 -1
- package/build/tsconfig.tsbuildinfo +0 -1
- package/build/typedefs.d.ts +0 -146
- package/build/typedefs.d.ts.map +0 -1
- package/build/typedefs.js +0 -88
- package/build/typedefs.js.map +0 -1
- package/build/utils.d.ts +0 -175
- package/build/utils.d.ts.map +0 -1
- package/build/utils.js +0 -731
- package/build/utils.js.map +0 -1
- package/build/utils_log.d.ts +0 -41
- package/build/utils_log.d.ts.map +0 -1
- package/build/utils_log.js +0 -192
- package/build/utils_log.js.map +0 -1
- package/build/utils_request.d.ts +0 -77
- package/build/utils_request.d.ts.map +0 -1
- package/build/utils_request.js +0 -385
- package/build/utils_request.js.map +0 -1
- package/build/utils_social.d.ts +0 -210
- package/build/utils_social.d.ts.map +0 -1
- package/build/utils_social.js +0 -787
- package/build/utils_social.js.map +0 -1
- package/build/validators.d.ts +0 -23
- package/build/validators.d.ts.map +0 -1
- package/build/validators.js +0 -29
- package/build/validators.js.map +0 -1
|
@@ -1,163 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.extractUrlsFromCheerio = exports.extractUrlsFromPage = exports.enqueueLinks = void 0;
|
|
4
|
-
const tslib_1 = require("tslib");
|
|
5
|
-
const ow_1 = (0, tslib_1.__importStar)(require("ow"));
|
|
6
|
-
const url_1 = require("url");
|
|
7
|
-
const utils_log_1 = (0, tslib_1.__importDefault)(require("../utils_log"));
|
|
8
|
-
/* eslint-disable import/no-duplicates */
|
|
9
|
-
const shared_1 = require("./shared");
|
|
10
|
-
const validators_1 = require("../validators");
|
|
11
|
-
/* eslint-enable no-unused-vars,import/named,import/no-duplicates,import/order */
|
|
12
|
-
/**
|
|
13
|
-
* The function finds elements matching a specific CSS selector (HTML anchor (`<a>`) by default)
|
|
14
|
-
* either in a Puppeteer page, or in a Cheerio object (parsed HTML),
|
|
15
|
-
* and enqueues the URLs in their `href` attributes to the provided {@link RequestQueue}.
|
|
16
|
-
* If you're looking to find URLs in JavaScript heavy pages where links are not available
|
|
17
|
-
* in `href` elements, but rather navigations are triggered in click handlers
|
|
18
|
-
* see {@link puppeteer#enqueueLinksByClickingElements}.
|
|
19
|
-
*
|
|
20
|
-
* Optionally, the function allows you to filter the target links' URLs using an array of {@link PseudoUrl} objects
|
|
21
|
-
* and override settings of the enqueued {@link Request} objects.
|
|
22
|
-
*
|
|
23
|
-
* **Example usage**
|
|
24
|
-
*
|
|
25
|
-
* ```javascript
|
|
26
|
-
* await Apify.utils.enqueueLinks({
|
|
27
|
-
* page,
|
|
28
|
-
* requestQueue,
|
|
29
|
-
* selector: 'a.product-detail',
|
|
30
|
-
* pseudoUrls: [
|
|
31
|
-
* 'https://www.example.com/handbags/[.*]',
|
|
32
|
-
* 'https://www.example.com/purses/[.*]'
|
|
33
|
-
* ],
|
|
34
|
-
* });
|
|
35
|
-
* ```
|
|
36
|
-
*
|
|
37
|
-
* @param {object} options
|
|
38
|
-
* All `enqueueLinks()` parameters are passed
|
|
39
|
-
* via an options object with the following keys:
|
|
40
|
-
* @param {PuppeteerPage|PlaywrightPage} [options.page]
|
|
41
|
-
* Puppeteer [`Page`](https://pptr.dev/#?product=Puppeteer&show=api-class-page) object.
|
|
42
|
-
* Either `page` or `$` option must be provided.
|
|
43
|
-
* @param {Number} [options.limit]
|
|
44
|
-
* Limit the count of actually enqueued URLs to this number. Useful for testing across the entire crawling scope.
|
|
45
|
-
* @param {CheerioAPI} [options.$]
|
|
46
|
-
* [`Cheerio`](https://github.com/cheeriojs/cheerio) function with loaded HTML.
|
|
47
|
-
* Either `page` or `$` option must be provided.
|
|
48
|
-
* @param {RequestQueue} options.requestQueue
|
|
49
|
-
* A request queue to which the URLs will be enqueued.
|
|
50
|
-
* @param {string} [options.selector='a']
|
|
51
|
-
* A CSS selector matching links to be enqueued.
|
|
52
|
-
* @param {string} [options.baseUrl]
|
|
53
|
-
* A base URL that will be used to resolve relative URLs when using Cheerio. Ignored when using Puppeteer,
|
|
54
|
-
* since the relative URL resolution is done inside the browser automatically.
|
|
55
|
-
* @param {Array<Object<string, *>>|Array<string>} [options.pseudoUrls]
|
|
56
|
-
* An array of {@link PseudoUrl}s matching the URLs to be enqueued,
|
|
57
|
-
* or an array of strings or RegExps or plain Objects from which the {@link PseudoUrl}s can be constructed.
|
|
58
|
-
*
|
|
59
|
-
* The plain objects must include at least the `purl` property, which holds the pseudo-URL string or RegExp.
|
|
60
|
-
* All remaining keys will be used as the `requestTemplate` argument of the {@link PseudoUrl} constructor,
|
|
61
|
-
* which lets you specify special properties for the enqueued {@link Request} objects.
|
|
62
|
-
*
|
|
63
|
-
* If `pseudoUrls` is an empty array, `null` or `undefined`, then the function
|
|
64
|
-
* enqueues all links found on the page.
|
|
65
|
-
* @param {RequestTransform} [options.transformRequestFunction]
|
|
66
|
-
* Just before a new {@link Request} is constructed and enqueued to the {@link RequestQueue}, this function can be used
|
|
67
|
-
* to remove it or modify its contents such as `userData`, `payload` or, most importantly `uniqueKey`. This is useful
|
|
68
|
-
* when you need to enqueue multiple `Requests` to the queue that share the same URL, but differ in methods or payloads,
|
|
69
|
-
* or to dynamically update or create `userData`.
|
|
70
|
-
*
|
|
71
|
-
* For example: by adding `keepUrlFragment: true` to the `request` object, URL fragments will not be removed
|
|
72
|
-
* when `uniqueKey` is computed.
|
|
73
|
-
*
|
|
74
|
-
* **Example:**
|
|
75
|
-
* ```javascript
|
|
76
|
-
* {
|
|
77
|
-
* transformRequestFunction: (request) => {
|
|
78
|
-
* request.userData.foo = 'bar';
|
|
79
|
-
* request.keepUrlFragment = true;
|
|
80
|
-
* return request;
|
|
81
|
-
* }
|
|
82
|
-
* }
|
|
83
|
-
* ```
|
|
84
|
-
* @return {Promise<Array<QueueOperationInfo>>}
|
|
85
|
-
* Promise that resolves to an array of {@link QueueOperationInfo} objects.
|
|
86
|
-
* @memberOf utils
|
|
87
|
-
* @name enqueueLinks
|
|
88
|
-
* @function
|
|
89
|
-
*/
|
|
90
|
-
async function enqueueLinks(options) {
|
|
91
|
-
const { page, $, requestQueue, limit, selector = 'a', baseUrl, pseudoUrls, transformRequestFunction, } = options;
|
|
92
|
-
if (!page && !$) {
|
|
93
|
-
throw new ow_1.ArgumentError('One of the parameters "options.page" or "options.$" must be provided!', enqueueLinks);
|
|
94
|
-
}
|
|
95
|
-
if (page && $) {
|
|
96
|
-
throw new ow_1.ArgumentError('Only one of the parameters "options.page" or "options.$" must be provided!', enqueueLinks);
|
|
97
|
-
}
|
|
98
|
-
(0, ow_1.default)(options, ow_1.default.object.exactShape({
|
|
99
|
-
page: ow_1.default.optional.object.hasKeys('goto', 'evaluate'),
|
|
100
|
-
$: ow_1.default.optional.function,
|
|
101
|
-
requestQueue: ow_1.default.object.hasKeys('fetchNextRequest', 'addRequest'),
|
|
102
|
-
limit: ow_1.default.optional.number,
|
|
103
|
-
selector: ow_1.default.optional.string,
|
|
104
|
-
baseUrl: ow_1.default.optional.string,
|
|
105
|
-
pseudoUrls: ow_1.default.any(ow_1.default.null, ow_1.default.optional.array.ofType(ow_1.default.any(ow_1.default.string, ow_1.default.regExp, ow_1.default.object.hasKeys('purl'), ow_1.default.object.validate(validators_1.validators.pseudoUrl)))),
|
|
106
|
-
transformRequestFunction: ow_1.default.optional.function,
|
|
107
|
-
}));
|
|
108
|
-
if (baseUrl && page)
|
|
109
|
-
utils_log_1.default.warning('The parameter options.baseUrl can only be used when parsing a Cheerio object. It will be ignored.');
|
|
110
|
-
// Construct pseudoUrls from input where necessary.
|
|
111
|
-
const pseudoUrlInstances = (0, shared_1.constructPseudoUrlInstances)(pseudoUrls || []);
|
|
112
|
-
const urls = page ? await extractUrlsFromPage(page, selector) : extractUrlsFromCheerio($, selector, baseUrl);
|
|
113
|
-
let requestOptions = (0, shared_1.createRequestOptions)(urls);
|
|
114
|
-
if (transformRequestFunction) {
|
|
115
|
-
requestOptions = requestOptions.map(transformRequestFunction).filter((r) => !!r);
|
|
116
|
-
}
|
|
117
|
-
let requests = (0, shared_1.createRequests)(requestOptions, pseudoUrlInstances);
|
|
118
|
-
if (limit)
|
|
119
|
-
requests = requests.slice(0, limit);
|
|
120
|
-
return (0, shared_1.addRequestsToQueueInBatches)(requests, requestQueue);
|
|
121
|
-
}
|
|
122
|
-
exports.enqueueLinks = enqueueLinks;
|
|
123
|
-
/**
|
|
124
|
-
* Extracts URLs from a given Puppeteer Page.
|
|
125
|
-
*
|
|
126
|
-
* @param {PuppeteerPage|PlaywrightPage} page
|
|
127
|
-
* @param {string} selector
|
|
128
|
-
* @return {Promise<Array<string>>}
|
|
129
|
-
* @ignore
|
|
130
|
-
*/
|
|
131
|
-
async function extractUrlsFromPage(page, selector) {
|
|
132
|
-
/* istanbul ignore next */
|
|
133
|
-
return page.$$eval(selector, (linkEls) => linkEls.map((link) => link.href).filter((href) => !!href));
|
|
134
|
-
}
|
|
135
|
-
exports.extractUrlsFromPage = extractUrlsFromPage;
|
|
136
|
-
/**
|
|
137
|
-
* Extracts URLs from a given Cheerio object.
|
|
138
|
-
*
|
|
139
|
-
* @param {CheerioAPI} $
|
|
140
|
-
* @param {string} selector
|
|
141
|
-
* @param {string} baseUrl
|
|
142
|
-
* @return {string[]}
|
|
143
|
-
* @ignore
|
|
144
|
-
*/
|
|
145
|
-
function extractUrlsFromCheerio($, selector, baseUrl) {
|
|
146
|
-
return $(selector)
|
|
147
|
-
.map((i, el) => $(el).attr('href'))
|
|
148
|
-
.get()
|
|
149
|
-
.filter((href) => !!href)
|
|
150
|
-
.map((href) => {
|
|
151
|
-
// Throw a meaningful error when only a relative URL would be extracted instead of waiting for the Request to fail later.
|
|
152
|
-
const isHrefAbsolute = /^[a-z][a-z0-9+.-]*:/.test(href); // Grabbed this in 'is-absolute-url' package.
|
|
153
|
-
if (!isHrefAbsolute && !baseUrl) {
|
|
154
|
-
throw new Error(`An extracted URL: ${href} is relative and options.baseUrl is not set. `
|
|
155
|
-
+ 'Use options.baseUrl in utils.enqueueLinks() to automatically resolve relative URLs.');
|
|
156
|
-
}
|
|
157
|
-
return baseUrl
|
|
158
|
-
? (new url_1.URL(href, baseUrl)).href
|
|
159
|
-
: href;
|
|
160
|
-
});
|
|
161
|
-
}
|
|
162
|
-
exports.extractUrlsFromCheerio = extractUrlsFromCheerio;
|
|
163
|
-
//# sourceMappingURL=enqueue_links.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"enqueue_links.js","sourceRoot":"","sources":["../../src/enqueue_links/enqueue_links.js"],"names":[],"mappings":";;;;AAAA,sDAAuC;AACvC,6BAA0B;AAC1B,0EAA+B;AAC/B,yCAAyC;AACzC,qCAA0H;AAU1H,8CAA2C;AAE3C,iFAAiF;AAEjF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6EG;AACI,KAAK,UAAU,YAAY,CAAC,OAAO;IACtC,MAAM,EACF,IAAI,EACJ,CAAC,EACD,YAAY,EACZ,KAAK,EACL,QAAQ,GAAG,GAAG,EACd,OAAO,EACP,UAAU,EACV,wBAAwB,GAC3B,GAAG,OAAO,CAAC;IAEZ,IAAI,CAAC,IAAI,IAAI,CAAC,CAAC,EAAE;QACb,MAAM,IAAI,kBAAa,CAAC,uEAAuE,EAAE,YAAY,CAAC,CAAC;KAClH;IACD,IAAI,IAAI,IAAI,CAAC,EAAE;QACX,MAAM,IAAI,kBAAa,CAAC,4EAA4E,EAAE,YAAY,CAAC,CAAC;KACvH;IACD,IAAA,YAAE,EAAC,OAAO,EAAE,YAAE,CAAC,MAAM,CAAC,UAAU,CAAC;QAC7B,IAAI,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,UAAU,CAAC;QACpD,CAAC,EAAE,YAAE,CAAC,QAAQ,CAAC,QAAQ;QACvB,YAAY,EAAE,YAAE,CAAC,MAAM,CAAC,OAAO,CAAC,kBAAkB,EAAE,YAAY,CAAC;QACjE,KAAK,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QACzB,QAAQ,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QAC5B,OAAO,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QAC3B,UAAU,EAAE,YAAE,CAAC,GAAG,CAAC,YAAE,CAAC,IAAI,EAAE,YAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,YAAE,CAAC,GAAG,CACvD,YAAE,CAAC,MAAM,EACT,YAAE,CAAC,MAAM,EACT,YAAE,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EACzB,YAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,uBAAU,CAAC,SAAS,CAAC,CAC3C,CAAC,CAAC;QACH,wBAAwB,EAAE,YAAE,CAAC,QAAQ,CAAC,QAAQ;KACjD,CAAC,CAAC,CAAC;IAEJ,IAAI,OAAO,IAAI,IAAI;QAAE,mBAAG,CAAC,OAAO,CAAC,mGAAmG,CAAC,CAAC;IAEtI,mDAAmD;IACnD,MAAM,kBAAkB,GAAG,IAAA,oCAA2B,EAAC,UAAU,IAAI,EAAE,CAAC,CAAC;IAEzE,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,MAAM,mBAAmB,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,sBAAsB,CAAC,CAAC,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;IAC7G,IAAI,cAAc,GAAG,IAAA,6BAAoB,EAAC,IAAI,CAAC,CAAC;IAChD,IAAI,wBAAwB,EAAE;QAC1B,cAAc,GAAG,cAAc,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;KACpF;IACD,IAAI,QAAQ,GAAG,IAAA,uBAAc,EAAC,cAAc,EAAE,kBAAkB,CAAC,CAAC;IAClE,IAAI,KAAK;QAAE,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IAE/C,OAAO,IAAA,oCAA2B,EAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;AAC/D,CAAC;AAhDD,oCAgDC;AAED;;;;;;;GAOG;AACI,KAAK,UAAU,mBAAmB,CAAC,IAAI,EAAE,QAAQ;IACpD,0BAA0B;IAC1B,OAAO,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;AACzG,CAAC;AAHD,kDAGC;AAED;;;;;;;;GAQG;AACH,SAAgB,sBAAsB,CAAC,CAAC,EAAE,QAAQ,EAAE,OAAO;IACvD,OAAO,CAAC,CAAC,QAAQ,CAAC;SACb,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;SAClC,GAAG,EAAE;SACL,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;SACxB,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACV,yHAAyH;QACzH,MAAM,cAAc,GAAG,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,6CAA6C;QACtG,IAAI,CAAC,cAAc,IAAI,CAAC,OAAO,EAAE;YAC7B,MAAM,IAAI,KAAK,CAAC,qBAAqB,IAAI,+CAA+C;kBAClF,qFAAqF,CAAC,CAAC;SAChG;QACD,OAAO,OAAO;YACV,CAAC,CAAC,CAAC,IAAI,SAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,IAAI;YAC/B,CAAC,CAAC,IAAI,CAAC;IACf,CAAC,CAAC,CAAC;AACX,CAAC;AAhBD,wDAgBC"}
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Helper factory used in the `enqueueLinks()` and enqueueLinksByClickingElements() function.
|
|
3
|
-
* @param {Array<(string|RegExp|{ purl: string|RegExp })>} pseudoUrls
|
|
4
|
-
* @return {Array<PseudoUrl>}
|
|
5
|
-
* @ignore
|
|
6
|
-
*/
|
|
7
|
-
export function constructPseudoUrlInstances(pseudoUrls: Array<(string | RegExp | {
|
|
8
|
-
purl: string | RegExp;
|
|
9
|
-
})>): Array<PseudoUrl>;
|
|
10
|
-
/**
|
|
11
|
-
* @param {Array<(string|Object)>} requestOptions
|
|
12
|
-
* @param {Array<PseudoUrl>} pseudoUrls
|
|
13
|
-
* @return {Array<Request>}
|
|
14
|
-
* @ignore
|
|
15
|
-
*/
|
|
16
|
-
export function createRequests(requestOptions: Array<(string | Object)>, pseudoUrls: Array<PseudoUrl>): Array<Request>;
|
|
17
|
-
/**
|
|
18
|
-
* @param {Array<(string|Object)>} sources
|
|
19
|
-
* @ignore
|
|
20
|
-
*/
|
|
21
|
-
export function createRequestOptions(sources: Array<(string | Object)>): (Object | {
|
|
22
|
-
url: string;
|
|
23
|
-
})[];
|
|
24
|
-
/**
|
|
25
|
-
* @param {Array<Request>} requests
|
|
26
|
-
* @param {RequestQueue} requestQueue
|
|
27
|
-
* @param {number} batchSize
|
|
28
|
-
* @return {Promise<Array<QueueOperationInfo>>}
|
|
29
|
-
* @ignore
|
|
30
|
-
*/
|
|
31
|
-
export function addRequestsToQueueInBatches(requests: Array<Request>, requestQueue: RequestQueue, batchSize?: number): Promise<Array<QueueOperationInfo>>;
|
|
32
|
-
/**
|
|
33
|
-
* Takes an Apify {RequestOptions} object and changes it's attributes in a desired way. This user-function is used
|
|
34
|
-
* {@link utilsenqueueLinks } to modify requests before enqueuing them.
|
|
35
|
-
*/
|
|
36
|
-
export type RequestTransform = (original: RequestOptions) => RequestOptions;
|
|
37
|
-
import PseudoUrl from "../pseudo_url";
|
|
38
|
-
import Request from "../request";
|
|
39
|
-
import { RequestQueue } from "../storages/request_queue";
|
|
40
|
-
import { QueueOperationInfo } from "../storages/request_queue";
|
|
41
|
-
import { RequestOptions } from "../request";
|
|
42
|
-
//# sourceMappingURL=shared.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"shared.d.ts","sourceRoot":"","sources":["../../src/enqueue_links/shared.js"],"names":[],"mappings":"AAqBA;;;;;GAKG;AACH,wDAJW,MAAM,CAAC,MAAM,GAAC,MAAM,GAAC;IAAE,IAAI,EAAE,MAAM,GAAC,MAAM,CAAA;CAAE,CAAC,CAAC,GAC7C,MAAM,SAAS,CAAC,CAyB3B;AACD;;;;;GAKG;AACH,+CALW,MAAM,CAAC,MAAM,GAAC,MAAM,CAAC,CAAC,cACtB,MAAM,SAAS,CAAC,GACf,MAAM,OAAO,CAAC,CAkBzB;AAED;;;GAGG;AACH,8CAHW,MAAM,CAAC,MAAM,GAAC,MAAM,CAAC,CAAC;;KAqBhC;AAED;;;;;;GAMG;AACH,sDANW,MAAM,OAAO,CAAC,gBACd,YAAY,cACZ,MAAM,GACL,QAAQ,MAAM,kBAAkB,CAAC,CAAC,CAU7C;;;;;0CAMU,cAAc,KACb,cAAc"}
|
|
@@ -1,121 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.addRequestsToQueueInBatches = exports.createRequestOptions = exports.createRequests = exports.constructPseudoUrlInstances = void 0;
|
|
4
|
-
const tslib_1 = require("tslib");
|
|
5
|
-
const url_1 = require("url");
|
|
6
|
-
const underscore_1 = (0, tslib_1.__importDefault)(require("underscore"));
|
|
7
|
-
const pseudo_url_1 = (0, tslib_1.__importDefault)(require("../pseudo_url"));
|
|
8
|
-
const request_1 = (0, tslib_1.__importDefault)(require("../request")); // eslint-disable-line import/no-duplicates
|
|
9
|
-
/* eslint-enable */
|
|
10
|
-
const MAX_ENQUEUE_LINKS_CACHE_SIZE = 1000;
|
|
11
|
-
/**
|
|
12
|
-
* To enable direct use of the Actor UI `pseudoUrls` output while keeping high performance,
|
|
13
|
-
* all the pseudoUrls from the output are only constructed once and kept in a cache
|
|
14
|
-
* by the `enqueueLinks()` function.
|
|
15
|
-
* @ignore
|
|
16
|
-
*/
|
|
17
|
-
const enqueueLinksPseudoUrlCache = new Map();
|
|
18
|
-
/**
|
|
19
|
-
* Helper factory used in the `enqueueLinks()` and enqueueLinksByClickingElements() function.
|
|
20
|
-
* @param {Array<(string|RegExp|{ purl: string|RegExp })>} pseudoUrls
|
|
21
|
-
* @return {Array<PseudoUrl>}
|
|
22
|
-
* @ignore
|
|
23
|
-
*/
|
|
24
|
-
function constructPseudoUrlInstances(pseudoUrls) {
|
|
25
|
-
return pseudoUrls.map((item) => {
|
|
26
|
-
// Get pseudoUrl instance from cache.
|
|
27
|
-
let pUrl = enqueueLinksPseudoUrlCache.get(item);
|
|
28
|
-
if (pUrl)
|
|
29
|
-
return pUrl;
|
|
30
|
-
// Nothing in cache, make a new instance.
|
|
31
|
-
// If it's already a PseudoURL, just save it.
|
|
32
|
-
if (item instanceof pseudo_url_1.default)
|
|
33
|
-
pUrl = item;
|
|
34
|
-
// If it's a string or RegExp, construct a PURL from it directly.
|
|
35
|
-
else if (typeof item === 'string' || item instanceof RegExp)
|
|
36
|
-
pUrl = new pseudo_url_1.default(item);
|
|
37
|
-
// If it's an object, look for a purl property and use it and the rest to construct a PURL with a Request template.
|
|
38
|
-
else
|
|
39
|
-
pUrl = new pseudo_url_1.default(item.purl, underscore_1.default.omit(item, 'purl'));
|
|
40
|
-
// Manage cache
|
|
41
|
-
enqueueLinksPseudoUrlCache.set(item, pUrl);
|
|
42
|
-
if (enqueueLinksPseudoUrlCache.size > MAX_ENQUEUE_LINKS_CACHE_SIZE) {
|
|
43
|
-
const key = enqueueLinksPseudoUrlCache.keys().next().value;
|
|
44
|
-
enqueueLinksPseudoUrlCache.delete(key);
|
|
45
|
-
}
|
|
46
|
-
return pUrl;
|
|
47
|
-
});
|
|
48
|
-
}
|
|
49
|
-
exports.constructPseudoUrlInstances = constructPseudoUrlInstances;
|
|
50
|
-
/**
|
|
51
|
-
* @param {Array<(string|Object)>} requestOptions
|
|
52
|
-
* @param {Array<PseudoUrl>} pseudoUrls
|
|
53
|
-
* @return {Array<Request>}
|
|
54
|
-
* @ignore
|
|
55
|
-
*/
|
|
56
|
-
function createRequests(requestOptions, pseudoUrls) {
|
|
57
|
-
if (!(pseudoUrls && pseudoUrls.length)) {
|
|
58
|
-
return requestOptions.map((opts) => new request_1.default(opts));
|
|
59
|
-
}
|
|
60
|
-
const requests = [];
|
|
61
|
-
requestOptions.forEach((opts) => {
|
|
62
|
-
pseudoUrls
|
|
63
|
-
.filter((purl) => purl.matches(opts.url))
|
|
64
|
-
.forEach((purl) => {
|
|
65
|
-
const request = purl.createRequest(opts);
|
|
66
|
-
requests.push(request);
|
|
67
|
-
});
|
|
68
|
-
});
|
|
69
|
-
return requests;
|
|
70
|
-
}
|
|
71
|
-
exports.createRequests = createRequests;
|
|
72
|
-
/**
|
|
73
|
-
* @param {Array<(string|Object)>} sources
|
|
74
|
-
* @ignore
|
|
75
|
-
*/
|
|
76
|
-
function createRequestOptions(sources) {
|
|
77
|
-
return sources
|
|
78
|
-
.map((src) => {
|
|
79
|
-
const reqOpts = typeof src === 'string'
|
|
80
|
-
? { url: src }
|
|
81
|
-
: src;
|
|
82
|
-
// TODO Remove with v1, there are examples
|
|
83
|
-
// which depend on userData existing here.
|
|
84
|
-
reqOpts.userData = { ...reqOpts.userData };
|
|
85
|
-
return reqOpts;
|
|
86
|
-
})
|
|
87
|
-
.filter(({ url }) => {
|
|
88
|
-
try {
|
|
89
|
-
return new url_1.URL(url).href;
|
|
90
|
-
}
|
|
91
|
-
catch (err) {
|
|
92
|
-
return false;
|
|
93
|
-
}
|
|
94
|
-
});
|
|
95
|
-
}
|
|
96
|
-
exports.createRequestOptions = createRequestOptions;
|
|
97
|
-
/**
|
|
98
|
-
* @param {Array<Request>} requests
|
|
99
|
-
* @param {RequestQueue} requestQueue
|
|
100
|
-
* @param {number} batchSize
|
|
101
|
-
* @return {Promise<Array<QueueOperationInfo>>}
|
|
102
|
-
* @ignore
|
|
103
|
-
*/
|
|
104
|
-
async function addRequestsToQueueInBatches(requests, requestQueue, batchSize = 5) {
|
|
105
|
-
const queueOperationInfos = [];
|
|
106
|
-
for (const request of requests) {
|
|
107
|
-
queueOperationInfos.push(requestQueue.addRequest(request));
|
|
108
|
-
if (queueOperationInfos.length % batchSize === 0)
|
|
109
|
-
await Promise.all(queueOperationInfos);
|
|
110
|
-
}
|
|
111
|
-
return Promise.all(queueOperationInfos);
|
|
112
|
-
}
|
|
113
|
-
exports.addRequestsToQueueInBatches = addRequestsToQueueInBatches;
|
|
114
|
-
/**
|
|
115
|
-
* Takes an Apify {RequestOptions} object and changes it's attributes in a desired way. This user-function is used
|
|
116
|
-
* {@link utils#enqueueLinks} to modify requests before enqueuing them.
|
|
117
|
-
* @callback RequestTransform
|
|
118
|
-
* @param {RequestOptions} original Request options to be modified.
|
|
119
|
-
* @return {RequestOptions} The modified request options to enqueue.
|
|
120
|
-
*/
|
|
121
|
-
//# sourceMappingURL=shared.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"shared.js","sourceRoot":"","sources":["../../src/enqueue_links/shared.js"],"names":[],"mappings":";;;;AAAA,6BAA0B;AAC1B,yEAA2B;AAC3B,4EAAsC;AACtC,sEAAiC,CAAC,2CAA2C;AAM7E,mBAAmB;AAEnB,MAAM,4BAA4B,GAAG,IAAI,CAAC;AAE1C;;;;;GAKG;AACH,MAAM,0BAA0B,GAAG,IAAI,GAAG,EAAE,CAAC;AAE7C;;;;;GAKG;AACH,SAAgB,2BAA2B,CAAC,UAAU;IAClD,OAAO,UAAU,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QAC3B,qCAAqC;QACrC,IAAI,IAAI,GAAG,0BAA0B,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAChD,IAAI,IAAI;YAAE,OAAO,IAAI,CAAC;QAEtB,yCAAyC;QACzC,6CAA6C;QAC7C,IAAI,IAAI,YAAY,oBAAS;YAAE,IAAI,GAAG,IAAI,CAAC;QAC3C,iEAAiE;aAC5D,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,YAAY,MAAM;YAAE,IAAI,GAAG,IAAI,oBAAS,CAAC,IAAI,CAAC,CAAC;QACxF,mHAAmH;;YAC9G,IAAI,GAAG,IAAI,oBAAS,CAAC,IAAI,CAAC,IAAI,EAAE,oBAAC,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC;QAE3D,eAAe;QACf,0BAA0B,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QAC3C,IAAI,0BAA0B,CAAC,IAAI,GAAG,4BAA4B,EAAE;YAChE,MAAM,GAAG,GAAG,0BAA0B,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC;YAC3D,0BAA0B,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;SAC1C;QACD,OAAO,IAAI,CAAC;IAChB,CAAC,CAAC,CAAC;AACP,CAAC;AAtBD,kEAsBC;AACD;;;;;GAKG;AACH,SAAgB,cAAc,CAAC,cAAc,EAAE,UAAU;IACrD,IAAI,CAAC,CAAC,UAAU,IAAI,UAAU,CAAC,MAAM,CAAC,EAAE;QACpC,OAAO,cAAc,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,iBAAO,CAAC,IAAI,CAAC,CAAC,CAAC;KAC1D;IAED,MAAM,QAAQ,GAAG,EAAE,CAAC;IACpB,cAAc,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE;QAC5B,UAAU;aACL,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;aACxC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE;YACd,MAAM,OAAO,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;YACzC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC3B,CAAC,CAAC,CAAC;IACX,CAAC,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC;AACpB,CAAC;AAfD,wCAeC;AAED;;;GAGG;AACH,SAAgB,oBAAoB,CAAC,OAAO;IACxC,OAAO,OAAO;SACT,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QACT,MAAM,OAAO,GAAG,OAAO,GAAG,KAAK,QAAQ;YACnC,CAAC,CAAC,EAAE,GAAG,EAAE,GAAG,EAAE;YACd,CAAC,CAAC,GAAG,CAAC;QACV,0CAA0C;QAC1C,0CAA0C;QAC1C,OAAO,CAAC,QAAQ,GAAG,EAAE,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;QAC3C,OAAO,OAAO,CAAC;IACnB,CAAC,CAAC;SACD,MAAM,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,EAAE;QAChB,IAAI;YACA,OAAO,IAAI,SAAG,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;SAC5B;QAAC,OAAO,GAAG,EAAE;YACV,OAAO,KAAK,CAAC;SAChB;IACL,CAAC,CAAC,CAAC;AACX,CAAC;AAlBD,oDAkBC;AAED;;;;;;GAMG;AACI,KAAK,UAAU,2BAA2B,CAAC,QAAQ,EAAE,YAAY,EAAE,SAAS,GAAG,CAAC;IACnF,MAAM,mBAAmB,GAAG,EAAE,CAAC;IAC/B,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE;QAC5B,mBAAmB,CAAC,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC;QAC3D,IAAI,mBAAmB,CAAC,MAAM,GAAG,SAAS,KAAK,CAAC;YAAE,MAAM,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;KAC5F;IACD,OAAO,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;AAC5C,CAAC;AAPD,kEAOC;AAED;;;;;;GAMG"}
|
package/build/errors.d.ts
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
export const APIFY_CALL_ERROR_NAME: "ApifyCallError";
|
|
2
|
-
/**
|
|
3
|
-
* The class represents exceptions thrown
|
|
4
|
-
* by the {@link Apify#call} function.
|
|
5
|
-
*
|
|
6
|
-
* @property {string} message
|
|
7
|
-
* Error message
|
|
8
|
-
* @property {ActorRun} run
|
|
9
|
-
* Object representing the failed actor run.
|
|
10
|
-
* @property {string} name
|
|
11
|
-
* Contains `"ApifyCallError"`
|
|
12
|
-
*/
|
|
13
|
-
export class ApifyCallError extends Error {
|
|
14
|
-
/**
|
|
15
|
-
* @param {ActorRun} run
|
|
16
|
-
* @param {string} [message]
|
|
17
|
-
*/
|
|
18
|
-
constructor(run: ActorRun, message?: string | undefined);
|
|
19
|
-
run: ActorRun;
|
|
20
|
-
}
|
|
21
|
-
/**
|
|
22
|
-
* TimeoutError class.
|
|
23
|
-
* This error should be thrown after request timeout from `requestAsBrowser`.
|
|
24
|
-
* @ignore
|
|
25
|
-
*/
|
|
26
|
-
export class TimeoutError extends Error {
|
|
27
|
-
}
|
|
28
|
-
import { ActorRun } from "./typedefs";
|
|
29
|
-
//# sourceMappingURL=errors.d.ts.map
|
package/build/errors.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.js"],"names":[],"mappings":"AACA,qDAAsD;AAKtD;;;;;;;;;;GAUG;AACH;IACI;;;OAGG;IACH,iBAHW,QAAQ,gCASlB;IAHG,cAAc;CAIrB;AAED;;;;GAIG;AACH;CAA0C"}
|
package/build/errors.js
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.TimeoutError = exports.ApifyCallError = exports.APIFY_CALL_ERROR_NAME = void 0;
|
|
4
|
-
/* eslint-disable max-classes-per-file */
|
|
5
|
-
exports.APIFY_CALL_ERROR_NAME = 'ApifyCallError';
|
|
6
|
-
/**
|
|
7
|
-
* The class represents exceptions thrown
|
|
8
|
-
* by the {@link Apify#call} function.
|
|
9
|
-
*
|
|
10
|
-
* @property {string} message
|
|
11
|
-
* Error message
|
|
12
|
-
* @property {ActorRun} run
|
|
13
|
-
* Object representing the failed actor run.
|
|
14
|
-
* @property {string} name
|
|
15
|
-
* Contains `"ApifyCallError"`
|
|
16
|
-
*/
|
|
17
|
-
class ApifyCallError extends Error {
|
|
18
|
-
/**
|
|
19
|
-
* @param {ActorRun} run
|
|
20
|
-
* @param {string} [message]
|
|
21
|
-
*/
|
|
22
|
-
constructor(run, message = 'The actor invoked by Apify.call() did not succeed') {
|
|
23
|
-
super(`${message} (run ID: ${run.id})`);
|
|
24
|
-
this.name = exports.APIFY_CALL_ERROR_NAME;
|
|
25
|
-
this.run = run;
|
|
26
|
-
Error.captureStackTrace(this, ApifyCallError);
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
exports.ApifyCallError = ApifyCallError;
|
|
30
|
-
/**
|
|
31
|
-
* TimeoutError class.
|
|
32
|
-
* This error should be thrown after request timeout from `requestAsBrowser`.
|
|
33
|
-
* @ignore
|
|
34
|
-
*/
|
|
35
|
-
class TimeoutError extends Error {
|
|
36
|
-
}
|
|
37
|
-
exports.TimeoutError = TimeoutError;
|
|
38
|
-
//# sourceMappingURL=errors.js.map
|
package/build/errors.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.js"],"names":[],"mappings":";;;AAAA,yCAAyC;AAC5B,QAAA,qBAAqB,GAAG,gBAAgB,CAAC;AAKtD;;;;;;;;;;GAUG;AACH,MAAa,cAAe,SAAQ,KAAK;IACrC;;;OAGG;IACH,YAAY,GAAG,EAAE,OAAO,GAAG,mDAAmD;QAC1E,KAAK,CAAC,GAAG,OAAO,aAAa,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC;QACxC,IAAI,CAAC,IAAI,GAAG,6BAAqB,CAAC;QAClC,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC;QAEf,KAAK,CAAC,iBAAiB,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;IAClD,CAAC;CACJ;AAZD,wCAYC;AAED;;;;GAIG;AACH,MAAa,YAAa,SAAQ,KAAK;CAAG;AAA1C,oCAA0C"}
|
package/build/events.d.ts
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
export default events;
|
|
2
|
-
export function initializeEvents(config?: Configuration | undefined): void;
|
|
3
|
-
export function stopEvents(): void;
|
|
4
|
-
/**
|
|
5
|
-
* Event emitter providing events from underlying Actor infrastructure and Apify package.
|
|
6
|
-
* @ignore
|
|
7
|
-
*/
|
|
8
|
-
declare const events: EventEmitter;
|
|
9
|
-
import { Configuration } from "./configuration";
|
|
10
|
-
import { EventEmitter } from "events";
|
|
11
|
-
//# sourceMappingURL=events.d.ts.map
|
package/build/events.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"events.d.ts","sourceRoot":"","sources":["../src/events.js"],"names":[],"mappings":";AAyFO,2EA6CN;AAYM,mCAIN;AA/ID;;;GAGG;AACH,mCAAkC"}
|
package/build/events.js
DELETED
|
@@ -1,147 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.stopEvents = exports.initializeEvents = void 0;
|
|
4
|
-
const tslib_1 = require("tslib");
|
|
5
|
-
const events_1 = require("events");
|
|
6
|
-
const ws_1 = (0, tslib_1.__importDefault)(require("ws"));
|
|
7
|
-
const consts_1 = require("@apify/consts");
|
|
8
|
-
const constants_1 = require("./constants");
|
|
9
|
-
const utils_log_1 = (0, tslib_1.__importDefault)(require("./utils_log"));
|
|
10
|
-
const configuration_1 = require("./configuration");
|
|
11
|
-
/**
|
|
12
|
-
* Event emitter providing events from underlying Actor infrastructure and Apify package.
|
|
13
|
-
* @ignore
|
|
14
|
-
*/
|
|
15
|
-
const events = new events_1.EventEmitter();
|
|
16
|
-
/**
|
|
17
|
-
* Websocket connection to actor events.
|
|
18
|
-
* @type {*}
|
|
19
|
-
* @ignore
|
|
20
|
-
*/
|
|
21
|
-
let eventsWs = null;
|
|
22
|
-
/**
|
|
23
|
-
* Interval that emits persist state events.
|
|
24
|
-
* @type {*}
|
|
25
|
-
* @ignore
|
|
26
|
-
*/
|
|
27
|
-
let persistStateInterval = null;
|
|
28
|
-
/**
|
|
29
|
-
* Gets an instance of a Node.js'
|
|
30
|
-
* [EventEmitter](https://nodejs.org/api/events.html#events_class_eventemitter)
|
|
31
|
-
* class that emits various events from the SDK or the Apify platform.
|
|
32
|
-
* The event emitter is initialized by calling the {@link Apify#main} function.
|
|
33
|
-
*
|
|
34
|
-
* **Example usage:**
|
|
35
|
-
*
|
|
36
|
-
* ```javascript
|
|
37
|
-
* Apify.events.on('cpuInfo', (data) => {
|
|
38
|
-
* if (data.isCpuOverloaded) console.log('Oh no, the CPU is overloaded!');
|
|
39
|
-
* });
|
|
40
|
-
* ```
|
|
41
|
-
*
|
|
42
|
-
* The following events are emitted:
|
|
43
|
-
*
|
|
44
|
-
* - `cpuInfo`: `{ "isCpuOverloaded": Boolean }`
|
|
45
|
-
* The event is emitted approximately every second
|
|
46
|
-
* and it indicates whether the actor is using the maximum of available CPU resources.
|
|
47
|
-
* If that's the case, the actor should not add more workload.
|
|
48
|
-
* For example, this event is used by the {@link AutoscaledPool} class.
|
|
49
|
-
* - `migrating`: `void`
|
|
50
|
-
* Emitted when the actor running on the Apify platform is going to be migrated to another worker server soon.
|
|
51
|
-
* You can use it to persist the state of the actor and abort the run, to speed up migration.
|
|
52
|
-
* For example, this is used by the {@link RequestList} class.
|
|
53
|
-
* - `aborting`: `void`
|
|
54
|
-
* When a user aborts an actor run on the Apify platform, they can choose to abort gracefully to allow
|
|
55
|
-
* the actor some time before getting killed. This graceful abort emits the `aborting` event which the SDK
|
|
56
|
-
* uses to gracefully stop running crawls and you can use it to do your own cleanup as well.
|
|
57
|
-
* - `persistState`: `{ "isMigrating": Boolean }`
|
|
58
|
-
* Emitted in regular intervals (by default 60 seconds) to notify all components of Apify SDK that it is time to persist
|
|
59
|
-
* their state, in order to avoid repeating all work when the actor restarts.
|
|
60
|
-
* This event is automatically emitted together with the `migrating` event,
|
|
61
|
-
* in which case the `isMigrating` flag is set to `true`. Otherwise the flag is `false`.
|
|
62
|
-
* Note that the `persistState` event is provided merely for user convenience,
|
|
63
|
-
* you can achieve the same effect using `setInterval()` and listening for the `migrating` event.
|
|
64
|
-
*
|
|
65
|
-
* @memberof module:Apify
|
|
66
|
-
* @name events
|
|
67
|
-
*/
|
|
68
|
-
exports.default = events;
|
|
69
|
-
/**
|
|
70
|
-
* Emits event telling all components that they should persist their state at regular intervals and also when an actor is being
|
|
71
|
-
* migrated to another worker.
|
|
72
|
-
*
|
|
73
|
-
* @ignore
|
|
74
|
-
*/
|
|
75
|
-
const emitPersistStateEvent = (isMigrating = false) => {
|
|
76
|
-
events.emit(constants_1.ACTOR_EVENT_NAMES_EX.PERSIST_STATE, { isMigrating });
|
|
77
|
-
};
|
|
78
|
-
/**
|
|
79
|
-
* Initializes `Apify.events` event emitter by creating a connection to a websocket that provides them.
|
|
80
|
-
* This is an internal function that is automatically called by `Apify.main()`.
|
|
81
|
-
*
|
|
82
|
-
* @memberof module:Apify
|
|
83
|
-
* @name initializeEvents
|
|
84
|
-
* @param {Configuration} [config]
|
|
85
|
-
* @function
|
|
86
|
-
* @ignore
|
|
87
|
-
*/
|
|
88
|
-
const initializeEvents = (config = configuration_1.Configuration.getGlobalConfig()) => {
|
|
89
|
-
if (eventsWs)
|
|
90
|
-
return;
|
|
91
|
-
const log = utils_log_1.default.child({ prefix: 'Events' });
|
|
92
|
-
if (!persistStateInterval) {
|
|
93
|
-
const intervalMillis = config.get('persistStateIntervalMillis');
|
|
94
|
-
persistStateInterval = setInterval(() => emitPersistStateEvent(), intervalMillis);
|
|
95
|
-
}
|
|
96
|
-
const eventsWsUrl = config.get('actorEventsWsUrl');
|
|
97
|
-
// Locally there is no web socket to connect, so just print a log message.
|
|
98
|
-
if (!eventsWsUrl) {
|
|
99
|
-
log.debug(`Environment variable ${consts_1.ENV_VARS.ACTOR_EVENTS_WS_URL} is not set, no events from Apify platform will be emitted.`);
|
|
100
|
-
return;
|
|
101
|
-
}
|
|
102
|
-
eventsWs = new ws_1.default(eventsWsUrl);
|
|
103
|
-
eventsWs.on('message', (message) => {
|
|
104
|
-
if (!message)
|
|
105
|
-
return;
|
|
106
|
-
try {
|
|
107
|
-
const { name, data } = JSON.parse(message);
|
|
108
|
-
events.emit(name, data);
|
|
109
|
-
if (name === consts_1.ACTOR_EVENT_NAMES.MIGRATING) {
|
|
110
|
-
clearInterval(persistStateInterval); // Don't send any other persist state event.
|
|
111
|
-
emitPersistStateEvent(true);
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
catch (err) {
|
|
115
|
-
log.exception(err, 'Cannot parse actor event');
|
|
116
|
-
}
|
|
117
|
-
});
|
|
118
|
-
eventsWs.on('error', (err) => {
|
|
119
|
-
// Don't print this error as this happens in the case of very short Apify.main().
|
|
120
|
-
if (err.message === 'WebSocket was closed before the connection was established')
|
|
121
|
-
return;
|
|
122
|
-
log.exception(err, 'web socket connection failed');
|
|
123
|
-
});
|
|
124
|
-
eventsWs.on('close', () => {
|
|
125
|
-
log.warning('web socket has been closed');
|
|
126
|
-
eventsWs = null;
|
|
127
|
-
});
|
|
128
|
-
};
|
|
129
|
-
exports.initializeEvents = initializeEvents;
|
|
130
|
-
/**
|
|
131
|
-
* Closes websocket providing events from Actor infrastructure and also stops sending internal events
|
|
132
|
-
* of Apify package such as `persistState`.
|
|
133
|
-
* This is automatically called at the end of `Apify.main()`.
|
|
134
|
-
*
|
|
135
|
-
* @memberof module:Apify
|
|
136
|
-
* @name stopEvents
|
|
137
|
-
* @function
|
|
138
|
-
* @ignore
|
|
139
|
-
*/
|
|
140
|
-
const stopEvents = () => {
|
|
141
|
-
if (eventsWs)
|
|
142
|
-
eventsWs.close();
|
|
143
|
-
clearInterval(persistStateInterval);
|
|
144
|
-
persistStateInterval = null;
|
|
145
|
-
};
|
|
146
|
-
exports.stopEvents = stopEvents;
|
|
147
|
-
//# sourceMappingURL=events.js.map
|
package/build/events.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"events.js","sourceRoot":"","sources":["../src/events.js"],"names":[],"mappings":";;;;AAAA,mCAAsC;AACtC,yDAA2B;AAC3B,0CAA4D;AAC5D,2CAAmD;AACnD,yEAAqC;AACrC,mDAAgD;AAEhD;;;GAGG;AACH,MAAM,MAAM,GAAG,IAAI,qBAAY,EAAE,CAAC;AAElC;;;;GAIG;AACH,IAAI,QAAQ,GAAG,IAAI,CAAC;AAEpB;;;;GAIG;AACH,IAAI,oBAAoB,GAAG,IAAI,CAAC;AAEhC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuCG;AACH,kBAAe,MAAM,CAAC;AAEtB;;;;;GAKG;AACH,MAAM,qBAAqB,GAAG,CAAC,WAAW,GAAG,KAAK,EAAE,EAAE;IAClD,MAAM,CAAC,IAAI,CAAC,gCAAoB,CAAC,aAAa,EAAE,EAAE,WAAW,EAAE,CAAC,CAAC;AACrE,CAAC,CAAC;AAEF;;;;;;;;;GASG;AACI,MAAM,gBAAgB,GAAG,CAAC,MAAM,GAAG,6BAAa,CAAC,eAAe,EAAE,EAAE,EAAE;IACzE,IAAI,QAAQ;QAAE,OAAO;IAErB,MAAM,GAAG,GAAG,mBAAU,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAC;IAEnD,IAAI,CAAC,oBAAoB,EAAE;QACvB,MAAM,cAAc,GAAG,MAAM,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;QAChE,oBAAoB,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC,qBAAqB,EAAE,EAAE,cAAc,CAAC,CAAC;KACrF;IAED,MAAM,WAAW,GAAG,MAAM,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;IAEnD,0EAA0E;IAC1E,IAAI,CAAC,WAAW,EAAE;QACd,GAAG,CAAC,KAAK,CAAC,wBAAwB,iBAAQ,CAAC,mBAAmB,6DAA6D,CAAC,CAAC;QAC7H,OAAO;KACV;IAED,QAAQ,GAAG,IAAI,YAAS,CAAC,WAAW,CAAC,CAAC;IACtC,QAAQ,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,OAAO,EAAE,EAAE;QAC/B,IAAI,CAAC,OAAO;YAAE,OAAO;QAErB,IAAI;YACA,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAE3C,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;YAExB,IAAI,IAAI,KAAK,0BAAiB,CAAC,SAAS,EAAE;gBACtC,aAAa,CAAC,oBAAoB,CAAC,CAAC,CAAC,4CAA4C;gBACjF,qBAAqB,CAAC,IAAI,CAAC,CAAC;aAC/B;SACJ;QAAC,OAAO,GAAG,EAAE;YACV,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,0BAA0B,CAAC,CAAC;SAClD;IACL,CAAC,CAAC,CAAC;IACH,QAAQ,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;QACzB,iFAAiF;QACjF,IAAI,GAAG,CAAC,OAAO,KAAK,4DAA4D;YAAE,OAAO;QAEzF,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,8BAA8B,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IACH,QAAQ,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;QACtB,GAAG,CAAC,OAAO,CAAC,4BAA4B,CAAC,CAAC;QAC1C,QAAQ,GAAG,IAAI,CAAC;IACpB,CAAC,CAAC,CAAC;AACP,CAAC,CAAC;AA7CW,QAAA,gBAAgB,oBA6C3B;AAEF;;;;;;;;;GASG;AACI,MAAM,UAAU,GAAG,GAAG,EAAE;IAC3B,IAAI,QAAQ;QAAE,QAAQ,CAAC,KAAK,EAAE,CAAC;IAC/B,aAAa,CAAC,oBAAoB,CAAC,CAAC;IACpC,oBAAoB,GAAG,IAAI,CAAC;AAChC,CAAC,CAAC;AAJW,QAAA,UAAU,cAIrB"}
|
package/build/index.d.ts
DELETED
package/build/index.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.js"],"names":[],"mappings":""}
|
package/build/index.js
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
const tslib_1 = require("tslib");
|
|
4
|
-
const Apify = (0, tslib_1.__importStar)(require("./main"));
|
|
5
|
-
exports.default = Apify;
|
|
6
|
-
(0, tslib_1.__exportStar)(require("./main"), exports);
|
|
7
|
-
//# sourceMappingURL=index.js.map
|
package/build/index.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.js"],"names":[],"mappings":";;;AAAA,2DAAgC;AAEhC,kBAAe,KAAK,CAAC;AACrB,sDAAuB"}
|