apify 2.3.1-beta.4 → 3.0.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -5
- package/package.json +69 -128
- package/build/actor.d.ts +0 -113
- package/build/actor.d.ts.map +0 -1
- package/build/actor.js +0 -582
- package/build/actor.js.map +0 -1
- package/build/apify.d.ts +0 -752
- package/build/apify.d.ts.map +0 -1
- package/build/apify.js +0 -877
- package/build/apify.js.map +0 -1
- package/build/autoscaling/autoscaled_pool.d.ts +0 -384
- package/build/autoscaling/autoscaled_pool.d.ts.map +0 -1
- package/build/autoscaling/autoscaled_pool.js +0 -557
- package/build/autoscaling/autoscaled_pool.js.map +0 -1
- package/build/autoscaling/snapshotter.d.ts +0 -278
- package/build/autoscaling/snapshotter.d.ts.map +0 -1
- package/build/autoscaling/snapshotter.js +0 -447
- package/build/autoscaling/snapshotter.js.map +0 -1
- package/build/autoscaling/system_status.d.ts +0 -224
- package/build/autoscaling/system_status.d.ts.map +0 -1
- package/build/autoscaling/system_status.js +0 -228
- package/build/autoscaling/system_status.js.map +0 -1
- package/build/browser_launchers/browser_launcher.d.ts +0 -154
- package/build/browser_launchers/browser_launcher.d.ts.map +0 -1
- package/build/browser_launchers/browser_launcher.js +0 -160
- package/build/browser_launchers/browser_launcher.js.map +0 -1
- package/build/browser_launchers/browser_plugin.d.ts +0 -23
- package/build/browser_launchers/browser_plugin.d.ts.map +0 -1
- package/build/browser_launchers/browser_plugin.js +0 -25
- package/build/browser_launchers/browser_plugin.js.map +0 -1
- package/build/browser_launchers/playwright_launcher.d.ts +0 -131
- package/build/browser_launchers/playwright_launcher.d.ts.map +0 -1
- package/build/browser_launchers/playwright_launcher.js +0 -150
- package/build/browser_launchers/playwright_launcher.js.map +0 -1
- package/build/browser_launchers/puppeteer_launcher.d.ts +0 -153
- package/build/browser_launchers/puppeteer_launcher.d.ts.map +0 -1
- package/build/browser_launchers/puppeteer_launcher.js +0 -197
- package/build/browser_launchers/puppeteer_launcher.js.map +0 -1
- package/build/cache_container.d.ts +0 -31
- package/build/cache_container.d.ts.map +0 -1
- package/build/cache_container.js +0 -48
- package/build/cache_container.js.map +0 -1
- package/build/configuration.d.ts +0 -226
- package/build/configuration.d.ts.map +0 -1
- package/build/configuration.js +0 -325
- package/build/configuration.js.map +0 -1
- package/build/constants.d.ts +0 -37
- package/build/constants.d.ts.map +0 -1
- package/build/constants.js +0 -41
- package/build/constants.js.map +0 -1
- package/build/crawlers/basic_crawler.d.ts +0 -443
- package/build/crawlers/basic_crawler.d.ts.map +0 -1
- package/build/crawlers/basic_crawler.js +0 -664
- package/build/crawlers/basic_crawler.js.map +0 -1
- package/build/crawlers/browser_crawler.d.ts +0 -512
- package/build/crawlers/browser_crawler.d.ts.map +0 -1
- package/build/crawlers/browser_crawler.js +0 -540
- package/build/crawlers/browser_crawler.js.map +0 -1
- package/build/crawlers/cheerio_crawler.d.ts +0 -931
- package/build/crawlers/cheerio_crawler.d.ts.map +0 -1
- package/build/crawlers/cheerio_crawler.js +0 -913
- package/build/crawlers/cheerio_crawler.js.map +0 -1
- package/build/crawlers/crawler_extension.d.ts +0 -10
- package/build/crawlers/crawler_extension.d.ts.map +0 -1
- package/build/crawlers/crawler_extension.js +0 -19
- package/build/crawlers/crawler_extension.js.map +0 -1
- package/build/crawlers/crawler_utils.d.ts +0 -34
- package/build/crawlers/crawler_utils.d.ts.map +0 -1
- package/build/crawlers/crawler_utils.js +0 -87
- package/build/crawlers/crawler_utils.js.map +0 -1
- package/build/crawlers/playwright_crawler.d.ts +0 -448
- package/build/crawlers/playwright_crawler.d.ts.map +0 -1
- package/build/crawlers/playwright_crawler.js +0 -299
- package/build/crawlers/playwright_crawler.js.map +0 -1
- package/build/crawlers/puppeteer_crawler.d.ts +0 -425
- package/build/crawlers/puppeteer_crawler.d.ts.map +0 -1
- package/build/crawlers/puppeteer_crawler.js +0 -299
- package/build/crawlers/puppeteer_crawler.js.map +0 -1
- package/build/crawlers/statistics.d.ts +0 -185
- package/build/crawlers/statistics.d.ts.map +0 -1
- package/build/crawlers/statistics.js +0 -331
- package/build/crawlers/statistics.js.map +0 -1
- package/build/enqueue_links/click_elements.d.ts +0 -179
- package/build/enqueue_links/click_elements.d.ts.map +0 -1
- package/build/enqueue_links/click_elements.js +0 -434
- package/build/enqueue_links/click_elements.js.map +0 -1
- package/build/enqueue_links/enqueue_links.d.ts +0 -117
- package/build/enqueue_links/enqueue_links.d.ts.map +0 -1
- package/build/enqueue_links/enqueue_links.js +0 -163
- package/build/enqueue_links/enqueue_links.js.map +0 -1
- package/build/enqueue_links/shared.d.ts +0 -42
- package/build/enqueue_links/shared.d.ts.map +0 -1
- package/build/enqueue_links/shared.js +0 -121
- package/build/enqueue_links/shared.js.map +0 -1
- package/build/errors.d.ts +0 -29
- package/build/errors.d.ts.map +0 -1
- package/build/errors.js +0 -38
- package/build/errors.js.map +0 -1
- package/build/events.d.ts +0 -11
- package/build/events.d.ts.map +0 -1
- package/build/events.js +0 -147
- package/build/events.js.map +0 -1
- package/build/index.d.ts +0 -4
- package/build/index.d.ts.map +0 -1
- package/build/index.js +0 -7
- package/build/index.js.map +0 -1
- package/build/main.d.ts +0 -179
- package/build/main.d.ts.map +0 -1
- package/build/main.js +0 -81
- package/build/main.js.map +0 -1
- package/build/playwright_utils.d.ts +0 -9
- package/build/playwright_utils.d.ts.map +0 -1
- package/build/playwright_utils.js +0 -90
- package/build/playwright_utils.js.map +0 -1
- package/build/proxy_configuration.d.ts +0 -411
- package/build/proxy_configuration.d.ts.map +0 -1
- package/build/proxy_configuration.js +0 -517
- package/build/proxy_configuration.js.map +0 -1
- package/build/pseudo_url.d.ts +0 -86
- package/build/pseudo_url.d.ts.map +0 -1
- package/build/pseudo_url.js +0 -153
- package/build/pseudo_url.js.map +0 -1
- package/build/puppeteer_request_interception.d.ts +0 -8
- package/build/puppeteer_request_interception.d.ts.map +0 -1
- package/build/puppeteer_request_interception.js +0 -235
- package/build/puppeteer_request_interception.js.map +0 -1
- package/build/puppeteer_utils.d.ts +0 -250
- package/build/puppeteer_utils.d.ts.map +0 -1
- package/build/puppeteer_utils.js +0 -551
- package/build/puppeteer_utils.js.map +0 -1
- package/build/request.d.ts +0 -180
- package/build/request.d.ts.map +0 -1
- package/build/request.js +0 -261
- package/build/request.js.map +0 -1
- package/build/request_list.d.ts +0 -581
- package/build/request_list.d.ts.map +0 -1
- package/build/request_list.js +0 -826
- package/build/request_list.js.map +0 -1
- package/build/serialization.d.ts +0 -5
- package/build/serialization.d.ts.map +0 -1
- package/build/serialization.js +0 -139
- package/build/serialization.js.map +0 -1
- package/build/session_pool/errors.d.ts +0 -11
- package/build/session_pool/errors.d.ts.map +0 -1
- package/build/session_pool/errors.js +0 -18
- package/build/session_pool/errors.js.map +0 -1
- package/build/session_pool/events.d.ts +0 -5
- package/build/session_pool/events.d.ts.map +0 -1
- package/build/session_pool/events.js +0 -6
- package/build/session_pool/events.js.map +0 -1
- package/build/session_pool/session.d.ts +0 -286
- package/build/session_pool/session.d.ts.map +0 -1
- package/build/session_pool/session.js +0 -355
- package/build/session_pool/session.js.map +0 -1
- package/build/session_pool/session_pool.d.ts +0 -280
- package/build/session_pool/session_pool.d.ts.map +0 -1
- package/build/session_pool/session_pool.js +0 -393
- package/build/session_pool/session_pool.js.map +0 -1
- package/build/session_pool/session_utils.d.ts +0 -4
- package/build/session_pool/session_utils.d.ts.map +0 -1
- package/build/session_pool/session_utils.js +0 -24
- package/build/session_pool/session_utils.js.map +0 -1
- package/build/stealth/hiding_tricks.d.ts +0 -22
- package/build/stealth/hiding_tricks.d.ts.map +0 -1
- package/build/stealth/hiding_tricks.js +0 -308
- package/build/stealth/hiding_tricks.js.map +0 -1
- package/build/stealth/stealth.d.ts +0 -56
- package/build/stealth/stealth.d.ts.map +0 -1
- package/build/stealth/stealth.js +0 -125
- package/build/stealth/stealth.js.map +0 -1
- package/build/storages/dataset.d.ts +0 -288
- package/build/storages/dataset.d.ts.map +0 -1
- package/build/storages/dataset.js +0 -480
- package/build/storages/dataset.js.map +0 -1
- package/build/storages/key_value_store.d.ts +0 -243
- package/build/storages/key_value_store.d.ts.map +0 -1
- package/build/storages/key_value_store.js +0 -462
- package/build/storages/key_value_store.js.map +0 -1
- package/build/storages/request_queue.d.ts +0 -318
- package/build/storages/request_queue.d.ts.map +0 -1
- package/build/storages/request_queue.js +0 -636
- package/build/storages/request_queue.js.map +0 -1
- package/build/storages/storage_manager.d.ts +0 -87
- package/build/storages/storage_manager.d.ts.map +0 -1
- package/build/storages/storage_manager.js +0 -150
- package/build/storages/storage_manager.js.map +0 -1
- package/build/tsconfig.tsbuildinfo +0 -1
- package/build/typedefs.d.ts +0 -146
- package/build/typedefs.d.ts.map +0 -1
- package/build/typedefs.js +0 -88
- package/build/typedefs.js.map +0 -1
- package/build/utils.d.ts +0 -175
- package/build/utils.d.ts.map +0 -1
- package/build/utils.js +0 -731
- package/build/utils.js.map +0 -1
- package/build/utils_log.d.ts +0 -41
- package/build/utils_log.d.ts.map +0 -1
- package/build/utils_log.js +0 -192
- package/build/utils_log.js.map +0 -1
- package/build/utils_request.d.ts +0 -77
- package/build/utils_request.d.ts.map +0 -1
- package/build/utils_request.js +0 -385
- package/build/utils_request.js.map +0 -1
- package/build/utils_social.d.ts +0 -210
- package/build/utils_social.d.ts.map +0 -1
- package/build/utils_social.js +0 -787
- package/build/utils_social.js.map +0 -1
- package/build/validators.d.ts +0 -23
- package/build/validators.d.ts.map +0 -1
- package/build/validators.js +0 -29
- package/build/validators.js.map +0 -1
package/build/utils.js
DELETED
|
@@ -1,731 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.publicUtils = exports.purgeLocalStorage = exports.waitForRunToFinish = exports.parseContentTypeFromResponse = exports.printOutdatedSdkWarning = exports.snakeCaseToCamelCase = exports.sleep = exports.isAtHome = exports.getTypicalChromeExecutablePath = exports.getFirstKey = exports.getMemoryInfo = exports.weightedAvg = exports.isDocker = exports.addCharsetToContentType = exports.apifyClient = exports.logSystemInfo = exports.newClient = void 0;
|
|
4
|
-
const tslib_1 = require("tslib");
|
|
5
|
-
const ps_tree_1 = (0, tslib_1.__importDefault)(require("@apify/ps-tree"));
|
|
6
|
-
const child_process_1 = require("child_process");
|
|
7
|
-
const apify_client_1 = require("apify-client");
|
|
8
|
-
const package_json_1 = require("apify-client/package.json");
|
|
9
|
-
const consts_1 = require("@apify/consts");
|
|
10
|
-
// eslint-disable-next-line import/no-duplicates
|
|
11
|
-
const cheerio_1 = (0, tslib_1.__importDefault)(require("cheerio"));
|
|
12
|
-
const content_type_1 = (0, tslib_1.__importDefault)(require("content-type"));
|
|
13
|
-
const fs_1 = (0, tslib_1.__importDefault)(require("fs"));
|
|
14
|
-
const mime_types_1 = (0, tslib_1.__importDefault)(require("mime-types"));
|
|
15
|
-
const os_1 = (0, tslib_1.__importDefault)(require("os"));
|
|
16
|
-
const ow_1 = (0, tslib_1.__importDefault)(require("ow"));
|
|
17
|
-
const path_1 = (0, tslib_1.__importDefault)(require("path"));
|
|
18
|
-
const semver_1 = (0, tslib_1.__importDefault)(require("semver"));
|
|
19
|
-
const underscore_1 = (0, tslib_1.__importDefault)(require("underscore"));
|
|
20
|
-
const url_1 = require("url");
|
|
21
|
-
const util_1 = (0, tslib_1.__importDefault)(require("util"));
|
|
22
|
-
const rimraf_1 = (0, tslib_1.__importDefault)(require("rimraf"));
|
|
23
|
-
const package_json_2 = require("../package.json");
|
|
24
|
-
const utils_log_1 = (0, tslib_1.__importDefault)(require("./utils_log"));
|
|
25
|
-
const utils_request_1 = require("./utils_request");
|
|
26
|
-
const configuration_1 = require("./configuration");
|
|
27
|
-
/* eslint-enable no-unused-vars,import/named,import/no-duplicates,import/order */
|
|
28
|
-
const rimrafp = util_1.default.promisify(rimraf_1.default);
|
|
29
|
-
/**
|
|
30
|
-
* Default regular expression to match URLs in a string that may be plain text, JSON, CSV or other. It supports common URL characters
|
|
31
|
-
* and does not support URLs containing commas or spaces. The URLs also may contain Unicode letters (not symbols).
|
|
32
|
-
* @memberOf utils
|
|
33
|
-
*/
|
|
34
|
-
const URL_NO_COMMAS_REGEX = RegExp('https?://(www\\.)?[\\p{L}0-9][-\\p{L}0-9@:%._\\+~#=]{0,254}[\\p{L}0-9]\\.[a-z]{2,63}(:\\d{1,5})?(/[-\\p{L}0-9@:%_\\+.~#?&//=\\(\\)]*)?', 'giu'); // eslint-disable-line
|
|
35
|
-
/**
|
|
36
|
-
* Regular expression that, in addition to the default regular expression `URL_NO_COMMAS_REGEX`, supports matching commas in URL path and query.
|
|
37
|
-
* Note, however, that this may prevent parsing URLs from comma delimited lists, or the URLs may become malformed.
|
|
38
|
-
* @memberOf utils
|
|
39
|
-
*/
|
|
40
|
-
const URL_WITH_COMMAS_REGEX = RegExp('https?://(www\\.)?[\\p{L}0-9][-\\p{L}0-9@:%._\\+~#=]{0,254}[\\p{L}0-9]\\.[a-z]{2,63}(:\\d{1,5})?(/[-\\p{L}0-9@:%_\\+,.~#?&//=\\(\\)]*)?', 'giu'); // eslint-disable-line
|
|
41
|
-
const MEMORY_FILE_PATHS = {
|
|
42
|
-
TOTAL: {
|
|
43
|
-
V1: '/sys/fs/cgroup/memory/memory.limit_in_bytes',
|
|
44
|
-
V2: '/sys/fs/cgroup/memory.max',
|
|
45
|
-
},
|
|
46
|
-
USED: {
|
|
47
|
-
V1: '/sys/fs/cgroup/memory/memory.usage_in_bytes',
|
|
48
|
-
V2: '/sys/fs/cgroup/memory.current',
|
|
49
|
-
},
|
|
50
|
-
};
|
|
51
|
-
// Set encoding to utf-8 so fs.readFile returns string instead of buffer
|
|
52
|
-
const MEMORY_FILE_ENCODING = 'utf-8';
|
|
53
|
-
const psTreePromised = util_1.default.promisify(ps_tree_1.default);
|
|
54
|
-
/**
|
|
55
|
-
* Returns a new instance of the Apify API client. The `ApifyClient` class is provided
|
|
56
|
-
* by the <a href="https://www.npmjs.com/package/apify-client" target="_blank">apify-client</a>
|
|
57
|
-
* NPM package, and it is automatically configured using the `APIFY_API_BASE_URL`, and `APIFY_TOKEN`
|
|
58
|
-
* environment variables. You can override the token via the available options. That's useful
|
|
59
|
-
* if you want to use the client as a different Apify user than the SDK internals are using.
|
|
60
|
-
*
|
|
61
|
-
* @param {object} [options]
|
|
62
|
-
* @param {string} [options.token]
|
|
63
|
-
* @param {string} [options.maxRetries]
|
|
64
|
-
* @param {string} [options.minDelayBetweenRetriesMillis]
|
|
65
|
-
* @memberof module:Apify
|
|
66
|
-
* @function
|
|
67
|
-
* @name newClient
|
|
68
|
-
* @return {ApifyClient}
|
|
69
|
-
*/
|
|
70
|
-
const newClient = (options = {}) => {
|
|
71
|
-
(0, ow_1.default)(options, ow_1.default.object.exactShape({
|
|
72
|
-
baseUrl: ow_1.default.optional.string.url,
|
|
73
|
-
token: ow_1.default.optional.string,
|
|
74
|
-
maxRetries: ow_1.default.optional.number,
|
|
75
|
-
minDelayBetweenRetriesMillis: ow_1.default.optional.number,
|
|
76
|
-
}));
|
|
77
|
-
const { baseUrl = process.env[consts_1.ENV_VARS.API_BASE_URL], token = process.env[consts_1.ENV_VARS.TOKEN], } = options;
|
|
78
|
-
return new apify_client_1.ApifyClient({
|
|
79
|
-
...options,
|
|
80
|
-
baseUrl,
|
|
81
|
-
token,
|
|
82
|
-
});
|
|
83
|
-
};
|
|
84
|
-
exports.newClient = newClient;
|
|
85
|
-
/**
|
|
86
|
-
* Logs info about system, node version and apify package version.
|
|
87
|
-
*/
|
|
88
|
-
const logSystemInfo = () => {
|
|
89
|
-
utils_log_1.default.info('System info', {
|
|
90
|
-
apifyVersion: package_json_2.version,
|
|
91
|
-
apifyClientVersion: package_json_1.version,
|
|
92
|
-
osType: os_1.default.type(),
|
|
93
|
-
nodeVersion: process.version,
|
|
94
|
-
});
|
|
95
|
-
};
|
|
96
|
-
exports.logSystemInfo = logSystemInfo;
|
|
97
|
-
/**
|
|
98
|
-
* The default instance of `ApifyClient` used internally
|
|
99
|
-
* by the SDK.
|
|
100
|
-
*
|
|
101
|
-
* @type {*}
|
|
102
|
-
* @ignore
|
|
103
|
-
*/
|
|
104
|
-
exports.apifyClient = configuration_1.Configuration.getGlobalConfig().getClient();
|
|
105
|
-
/**
|
|
106
|
-
* Adds charset=utf-8 to given content type if this parameter is missing.
|
|
107
|
-
*
|
|
108
|
-
* @param {string} contentType
|
|
109
|
-
* @returns {string}
|
|
110
|
-
*
|
|
111
|
-
* @ignore
|
|
112
|
-
*/
|
|
113
|
-
const addCharsetToContentType = (contentType) => {
|
|
114
|
-
if (!contentType)
|
|
115
|
-
return contentType;
|
|
116
|
-
const parsed = content_type_1.default.parse(contentType);
|
|
117
|
-
if (parsed.parameters.charset)
|
|
118
|
-
return contentType;
|
|
119
|
-
parsed.parameters.charset = 'utf-8';
|
|
120
|
-
return content_type_1.default.format(parsed);
|
|
121
|
-
};
|
|
122
|
-
exports.addCharsetToContentType = addCharsetToContentType;
|
|
123
|
-
let isDockerPromiseCache;
|
|
124
|
-
const createIsDockerPromise = () => {
|
|
125
|
-
const promise1 = util_1.default
|
|
126
|
-
.promisify(fs_1.default.stat)('/.dockerenv')
|
|
127
|
-
.then(() => true)
|
|
128
|
-
.catch(() => false);
|
|
129
|
-
const promise2 = util_1.default
|
|
130
|
-
.promisify(fs_1.default.readFile)('/proc/self/cgroup', 'utf8')
|
|
131
|
-
.then((content) => content.indexOf('docker') !== -1)
|
|
132
|
-
.catch(() => false);
|
|
133
|
-
return Promise
|
|
134
|
-
.all([promise1, promise2])
|
|
135
|
-
.then(([result1, result2]) => result1 || result2);
|
|
136
|
-
};
|
|
137
|
-
/**
|
|
138
|
-
* Returns a `Promise` that resolves to true if the code is running in a Docker container.
|
|
139
|
-
*
|
|
140
|
-
* @param {boolean} forceReset
|
|
141
|
-
* @return {Promise<boolean>}
|
|
142
|
-
*
|
|
143
|
-
* @memberof utils
|
|
144
|
-
* @name isDocker
|
|
145
|
-
* @function
|
|
146
|
-
*/
|
|
147
|
-
const isDocker = (forceReset) => {
|
|
148
|
-
// Parameter forceReset is just internal for unit tests.
|
|
149
|
-
if (!isDockerPromiseCache || forceReset)
|
|
150
|
-
isDockerPromiseCache = createIsDockerPromise();
|
|
151
|
-
return isDockerPromiseCache;
|
|
152
|
-
};
|
|
153
|
-
exports.isDocker = isDocker;
|
|
154
|
-
/**
|
|
155
|
-
* Computes a weighted average of an array of numbers, complemented by an array of weights.
|
|
156
|
-
*
|
|
157
|
-
* @param {number[]} arrValues
|
|
158
|
-
* @param {number[]} arrWeights
|
|
159
|
-
* @return {number}
|
|
160
|
-
*
|
|
161
|
-
* @ignore
|
|
162
|
-
*/
|
|
163
|
-
const weightedAvg = (arrValues, arrWeights) => {
|
|
164
|
-
const result = arrValues.map((value, i) => {
|
|
165
|
-
const weight = arrWeights[i];
|
|
166
|
-
const sum = value * weight; // eslint-disable-line no-shadow
|
|
167
|
-
return [sum, weight];
|
|
168
|
-
}).reduce((p, c) => [p[0] + c[0], p[1] + c[1]], [0, 0]);
|
|
169
|
-
return result[0] / result[1];
|
|
170
|
-
};
|
|
171
|
-
exports.weightedAvg = weightedAvg;
|
|
172
|
-
/**
|
|
173
|
-
* Describes memory usage of an Actor.
|
|
174
|
-
*
|
|
175
|
-
* @typedef MemoryInfo
|
|
176
|
-
* @property {number} totalBytes Total memory available in the system or container
|
|
177
|
-
* @property {number} freeBytes Amount of free memory in the system or container
|
|
178
|
-
* @property {number} usedBytes Amount of memory used (= totalBytes - freeBytes)
|
|
179
|
-
* @property {number} mainProcessBytes Amount of memory used the current Node.js process
|
|
180
|
-
* @property {number} childProcessesBytes Amount of memory used by child processes of the current Node.js process
|
|
181
|
-
*/
|
|
182
|
-
/**
|
|
183
|
-
* Returns memory statistics of the process and the system, see {@link MemoryInfo}.
|
|
184
|
-
*
|
|
185
|
-
* If the process runs inside of Docker, the `getMemoryInfo` gets container memory limits,
|
|
186
|
-
* otherwise it gets system memory limits.
|
|
187
|
-
*
|
|
188
|
-
* Beware that the function is quite inefficient because it spawns a new process.
|
|
189
|
-
* Therefore you shouldn't call it too often, like more than once per second.
|
|
190
|
-
*
|
|
191
|
-
* @returns {Promise<MemoryInfo>}
|
|
192
|
-
*
|
|
193
|
-
* @memberof module:Apify
|
|
194
|
-
* @name getMemoryInfo
|
|
195
|
-
* @function
|
|
196
|
-
*/
|
|
197
|
-
const getMemoryInfo = async () => {
|
|
198
|
-
// lambda does *not* have `ps` and other command line tools
|
|
199
|
-
// required to extract memory usage.
|
|
200
|
-
const isLambdaEnvironment = process.platform === 'linux'
|
|
201
|
-
&& !!process.env.AWS_LAMBDA_FUNCTION_MEMORY_SIZE;
|
|
202
|
-
// module.exports must be here so that we can mock it.
|
|
203
|
-
const isDockerVar = !isLambdaEnvironment && (await module.exports.isDocker());
|
|
204
|
-
let mainProcessBytes = -1;
|
|
205
|
-
let childProcessesBytes = 0;
|
|
206
|
-
if (isLambdaEnvironment) {
|
|
207
|
-
// reported in bytes
|
|
208
|
-
mainProcessBytes = process.memoryUsage().rss;
|
|
209
|
-
// https://stackoverflow.com/a/55914335/129415
|
|
210
|
-
childProcessesBytes = (0, child_process_1.execSync)('cat /proc/meminfo')
|
|
211
|
-
.toString()
|
|
212
|
-
.split(/[\n: ]/)
|
|
213
|
-
.filter((val) => val.trim())[19]
|
|
214
|
-
// meminfo reports in kb, not bytes
|
|
215
|
-
* 1000
|
|
216
|
-
// the total used memory is reported by meminfo
|
|
217
|
-
// subtract memory used by the main node proces
|
|
218
|
-
// in order to infer memory used by any child processes
|
|
219
|
-
- mainProcessBytes;
|
|
220
|
-
}
|
|
221
|
-
else {
|
|
222
|
-
// Query both root and child processes
|
|
223
|
-
const processes = await psTreePromised(process.pid, true);
|
|
224
|
-
processes.forEach((rec) => {
|
|
225
|
-
// Skip the 'ps' or 'wmic' commands used by ps-tree to query the processes
|
|
226
|
-
if (rec.COMMAND === 'ps' || rec.COMMAND === 'WMIC.exe') {
|
|
227
|
-
return;
|
|
228
|
-
}
|
|
229
|
-
const bytes = parseInt(rec.RSS, 10);
|
|
230
|
-
// Obtain main process' memory separately
|
|
231
|
-
if (rec.PID === `${process.pid}`) {
|
|
232
|
-
mainProcessBytes = bytes;
|
|
233
|
-
return;
|
|
234
|
-
}
|
|
235
|
-
childProcessesBytes += bytes;
|
|
236
|
-
});
|
|
237
|
-
}
|
|
238
|
-
let totalBytes;
|
|
239
|
-
let usedBytes;
|
|
240
|
-
let freeBytes;
|
|
241
|
-
if (isLambdaEnvironment) {
|
|
242
|
-
// memory size is defined in megabytes
|
|
243
|
-
totalBytes = parseInt(process.env.AWS_LAMBDA_FUNCTION_MEMORY_SIZE, 10) * 1000000;
|
|
244
|
-
usedBytes = mainProcessBytes + childProcessesBytes;
|
|
245
|
-
freeBytes = totalBytes - usedBytes;
|
|
246
|
-
utils_log_1.default.debug(`lambda size of ${totalBytes} with ${freeBytes} free bytes`);
|
|
247
|
-
}
|
|
248
|
-
else if (isDockerVar) {
|
|
249
|
-
// When running inside Docker container, use container memory limits
|
|
250
|
-
// This must be promisified here so that we can mock it.
|
|
251
|
-
const readPromised = util_1.default.promisify(fs_1.default.readFile);
|
|
252
|
-
const accessPromised = util_1.default.promisify(fs_1.default.access);
|
|
253
|
-
// Check wheter cgroups V1 or V2 is used
|
|
254
|
-
let cgroupsVersion = 'V1';
|
|
255
|
-
try {
|
|
256
|
-
// If this directory does not exists, assume docker is using cgroups V2
|
|
257
|
-
await accessPromised('/sys/fs/cgroup/memory/', fs_1.default.constants.R_OK);
|
|
258
|
-
}
|
|
259
|
-
catch (err) {
|
|
260
|
-
cgroupsVersion = 'V2';
|
|
261
|
-
}
|
|
262
|
-
try {
|
|
263
|
-
let [totalBytesStr, usedBytesStr] = await Promise.all([
|
|
264
|
-
readPromised(MEMORY_FILE_PATHS.TOTAL[cgroupsVersion], MEMORY_FILE_ENCODING),
|
|
265
|
-
readPromised(MEMORY_FILE_PATHS.USED[cgroupsVersion], MEMORY_FILE_ENCODING),
|
|
266
|
-
]);
|
|
267
|
-
// Cgroups V2 files contains newline character. Getting rid of it for better handling in later part of the code.
|
|
268
|
-
totalBytesStr = totalBytesStr.replace(/[^a-zA-Z0-9 ]/g, '');
|
|
269
|
-
usedBytesStr = usedBytesStr.replace(/[^a-zA-Z0-9 ]/g, '');
|
|
270
|
-
// Cgroups V2 contains 'max' string if memory is not limited
|
|
271
|
-
// See https://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/tree/Documentation/admin-guide/cgroup-v2.rst (see "memory.max")
|
|
272
|
-
if (totalBytesStr === 'max') {
|
|
273
|
-
totalBytes = os_1.default.totalmem();
|
|
274
|
-
// Cgroups V1 is set to number related to platform and page size if memory is not limited
|
|
275
|
-
// See https://unix.stackexchange.com/q/420906
|
|
276
|
-
}
|
|
277
|
-
else {
|
|
278
|
-
totalBytes = parseInt(totalBytesStr, 10);
|
|
279
|
-
const containerRunsWithUnlimitedMemory = totalBytes > Number.MAX_SAFE_INTEGER;
|
|
280
|
-
if (containerRunsWithUnlimitedMemory)
|
|
281
|
-
totalBytes = os_1.default.totalmem();
|
|
282
|
-
}
|
|
283
|
-
usedBytes = parseInt(usedBytesStr, 10);
|
|
284
|
-
freeBytes = totalBytes - usedBytes;
|
|
285
|
-
}
|
|
286
|
-
catch (err) {
|
|
287
|
-
// log.deprecated logs a warning only once
|
|
288
|
-
utils_log_1.default.deprecated('Your environment is Docker, but your system does not support memory cgroups. '
|
|
289
|
-
+ 'If you\'re running containers with limited memory, memory auto-scaling will not work properly.\n\n'
|
|
290
|
-
+ `Cause: ${err.message}`);
|
|
291
|
-
totalBytes = os_1.default.totalmem();
|
|
292
|
-
freeBytes = os_1.default.freemem();
|
|
293
|
-
usedBytes = totalBytes - freeBytes;
|
|
294
|
-
}
|
|
295
|
-
}
|
|
296
|
-
else {
|
|
297
|
-
totalBytes = os_1.default.totalmem();
|
|
298
|
-
freeBytes = os_1.default.freemem();
|
|
299
|
-
usedBytes = totalBytes - freeBytes;
|
|
300
|
-
}
|
|
301
|
-
return {
|
|
302
|
-
totalBytes,
|
|
303
|
-
freeBytes,
|
|
304
|
-
usedBytes,
|
|
305
|
-
mainProcessBytes,
|
|
306
|
-
childProcessesBytes,
|
|
307
|
-
};
|
|
308
|
-
};
|
|
309
|
-
exports.getMemoryInfo = getMemoryInfo;
|
|
310
|
-
/**
|
|
311
|
-
* Helper function that returns the first key from plain object.
|
|
312
|
-
*
|
|
313
|
-
* @ignore
|
|
314
|
-
*/
|
|
315
|
-
const getFirstKey = (dict) => {
|
|
316
|
-
for (const key in dict) { // eslint-disable-line
|
|
317
|
-
return key;
|
|
318
|
-
}
|
|
319
|
-
};
|
|
320
|
-
exports.getFirstKey = getFirstKey;
|
|
321
|
-
/**
|
|
322
|
-
* Gets a typical path to Chrome executable, depending on the current operating system.
|
|
323
|
-
*
|
|
324
|
-
* @return {string}
|
|
325
|
-
* @ignore
|
|
326
|
-
*/
|
|
327
|
-
const getTypicalChromeExecutablePath = () => {
|
|
328
|
-
/**
|
|
329
|
-
* Return path of Chrome executable by its OS environment variable to deal with non-english language OS.
|
|
330
|
-
* Taking also in account the old [chrome 380177 issue](https://bugs.chromium.org/p/chromium/issues/detail?id=380177).
|
|
331
|
-
*
|
|
332
|
-
* @returns {string}
|
|
333
|
-
* @ignore
|
|
334
|
-
*/
|
|
335
|
-
const getWin32Path = () => {
|
|
336
|
-
let chromeExecutablePath = 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe';
|
|
337
|
-
const path00 = `${process.env.ProgramFiles}\\Google\\Chrome\\Application\\chrome.exe`;
|
|
338
|
-
const path86 = `${process.env['ProgramFiles(x86)']}\\Google\\Chrome\\Application\\chrome.exe`;
|
|
339
|
-
if (fs_1.default.existsSync(path00)) {
|
|
340
|
-
chromeExecutablePath = path00;
|
|
341
|
-
}
|
|
342
|
-
else if (fs_1.default.existsSync(path86)) {
|
|
343
|
-
chromeExecutablePath = path86;
|
|
344
|
-
}
|
|
345
|
-
return chromeExecutablePath;
|
|
346
|
-
};
|
|
347
|
-
switch (os_1.default.platform()) {
|
|
348
|
-
case 'darwin': return '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome';
|
|
349
|
-
case 'win32': return getWin32Path();
|
|
350
|
-
default: return '/usr/bin/google-chrome';
|
|
351
|
-
}
|
|
352
|
-
};
|
|
353
|
-
exports.getTypicalChromeExecutablePath = getTypicalChromeExecutablePath;
|
|
354
|
-
/**
|
|
355
|
-
* Returns `true` when code is running on Apify platform and `false` otherwise (for example locally).
|
|
356
|
-
*
|
|
357
|
-
* @returns {boolean}
|
|
358
|
-
*
|
|
359
|
-
* @memberof module:Apify
|
|
360
|
-
* @name isAtHome
|
|
361
|
-
* @function
|
|
362
|
-
*/
|
|
363
|
-
const isAtHome = () => !!process.env[consts_1.ENV_VARS.IS_AT_HOME];
|
|
364
|
-
exports.isAtHome = isAtHome;
|
|
365
|
-
/**
|
|
366
|
-
* Returns a `Promise` that resolves after a specific period of time. This is useful to implement waiting
|
|
367
|
-
* in your code, e.g. to prevent overloading of target website or to avoid bot detection.
|
|
368
|
-
*
|
|
369
|
-
* **Example usage:**
|
|
370
|
-
*
|
|
371
|
-
* ```
|
|
372
|
-
* const Apify = require('apify');
|
|
373
|
-
*
|
|
374
|
-
* ...
|
|
375
|
-
*
|
|
376
|
-
* // Sleep 1.5 seconds
|
|
377
|
-
* await Apify.utils.sleep(1500);
|
|
378
|
-
* ```
|
|
379
|
-
* @param {number} millis Period of time to sleep, in milliseconds. If not a positive number, the returned promise resolves immediately.
|
|
380
|
-
* @memberof utils
|
|
381
|
-
* @name sleep
|
|
382
|
-
* @function
|
|
383
|
-
* @return {Promise<void>}
|
|
384
|
-
*/
|
|
385
|
-
const sleep = (millis) => {
|
|
386
|
-
return new Promise((res) => setTimeout(res, millis));
|
|
387
|
-
};
|
|
388
|
-
exports.sleep = sleep;
|
|
389
|
-
/**
|
|
390
|
-
* Returns a promise that resolves to an array of urls parsed from the resource available at the provided url.
|
|
391
|
-
* Optionally, custom regular expression and encoding may be provided.
|
|
392
|
-
*
|
|
393
|
-
* @param {object} options
|
|
394
|
-
* @param {string} options.url URL to the file
|
|
395
|
-
* @param {string} [options.encoding='utf8'] The encoding of the file.
|
|
396
|
-
* @param {string} [options.proxyUrl] The proxy url to be used for the request.
|
|
397
|
-
* @param {RegExp} [options.urlRegExp=URL_NO_COMMAS_REGEX]
|
|
398
|
-
* Custom regular expression to identify the URLs in the file to extract.
|
|
399
|
-
* The regular expression should be case-insensitive and have global flag set (i.e. `/something/gi`).
|
|
400
|
-
* @returns {Promise<Array<string>>}
|
|
401
|
-
* @memberOf utils
|
|
402
|
-
*/
|
|
403
|
-
const downloadListOfUrls = async (options) => {
|
|
404
|
-
(0, ow_1.default)(options, ow_1.default.object.exactShape({
|
|
405
|
-
url: ow_1.default.string.url,
|
|
406
|
-
encoding: ow_1.default.optional.string,
|
|
407
|
-
urlRegExp: ow_1.default.optional.regExp,
|
|
408
|
-
proxyUrl: ow_1.default.optional.string,
|
|
409
|
-
}));
|
|
410
|
-
const { url, encoding = 'utf8', urlRegExp = URL_NO_COMMAS_REGEX, proxyUrl } = options;
|
|
411
|
-
// Try to detect wrong urls and fix them. Currently, detects only sharing url instead of csv download one.
|
|
412
|
-
const match = url.match(/^(https:\/\/docs\.google\.com\/spreadsheets\/d\/(?:\w|-)+)\/?/);
|
|
413
|
-
let fixedUrl = url;
|
|
414
|
-
if (match) {
|
|
415
|
-
fixedUrl = `${match[1]}/gviz/tq?tqx=out:csv`;
|
|
416
|
-
}
|
|
417
|
-
const { body: string } = await (0, utils_request_1.requestAsBrowser)({ url: fixedUrl, encoding, proxyUrl });
|
|
418
|
-
return extractUrls({ string, urlRegExp });
|
|
419
|
-
};
|
|
420
|
-
/**
|
|
421
|
-
* Collects all URLs in an arbitrary string to an array, optionally using a custom regular expression.
|
|
422
|
-
* @param {object} options
|
|
423
|
-
* @param {string} options.string
|
|
424
|
-
* @param {RegExp} [options.urlRegExp=Apify.utils.URL_NO_COMMAS_REGEX]
|
|
425
|
-
* @returns {string[]}
|
|
426
|
-
* @memberOf utils
|
|
427
|
-
*/
|
|
428
|
-
const extractUrls = (options) => {
|
|
429
|
-
(0, ow_1.default)(options, ow_1.default.object.exactShape({
|
|
430
|
-
string: ow_1.default.string,
|
|
431
|
-
urlRegExp: ow_1.default.optional.regExp,
|
|
432
|
-
}));
|
|
433
|
-
const { string, urlRegExp = URL_NO_COMMAS_REGEX } = options;
|
|
434
|
-
return string.match(urlRegExp) || [];
|
|
435
|
-
};
|
|
436
|
-
// NOTE: We skipping 'noscript' since it's content is evaluated as text, instead of HTML elements. That damages the results.
|
|
437
|
-
const SKIP_TAGS_REGEX = /^(script|style|canvas|svg|noscript)$/i;
|
|
438
|
-
const BLOCK_TAGS_REGEX = /^(p|h1|h2|h3|h4|h5|h6|ol|ul|li|pre|address|blockquote|dl|div|fieldset|form|table|tr|select|option)$/i;
|
|
439
|
-
/**
|
|
440
|
-
* The function converts a HTML document to a plain text.
|
|
441
|
-
*
|
|
442
|
-
* The plain text generated by the function is similar to a text captured
|
|
443
|
-
* by pressing Ctrl+A and Ctrl+C on a page when loaded in a web browser.
|
|
444
|
-
* The function doesn't aspire to preserve the formatting or to be perfectly correct with respect to HTML specifications.
|
|
445
|
-
* However, it attempts to generate newlines and whitespaces in and around HTML elements
|
|
446
|
-
* to avoid merging distinct parts of text and thus enable extraction of data from the text (e.g. phone numbers).
|
|
447
|
-
*
|
|
448
|
-
* **Example usage**
|
|
449
|
-
* ```javascript
|
|
450
|
-
* const text = htmlToText('<html><body>Some text</body></html>');
|
|
451
|
-
* console.log(text);
|
|
452
|
-
* ```
|
|
453
|
-
*
|
|
454
|
-
* Note that the function uses [cheerio](https://www.npmjs.com/package/cheerio) to parse the HTML.
|
|
455
|
-
* Optionally, to avoid duplicate parsing of HTML and thus improve performance, you can pass
|
|
456
|
-
* an existing Cheerio object to the function instead of the HTML text. The HTML should be parsed
|
|
457
|
-
* with the `decodeEntities` option set to `true`. For example:
|
|
458
|
-
*
|
|
459
|
-
* ```javascript
|
|
460
|
-
* const cheerio = require('cheerio');
|
|
461
|
-
* const html = '<html><body>Some text</body></html>';
|
|
462
|
-
* const text = htmlToText(cheerio.load(html, { decodeEntities: true }));
|
|
463
|
-
* ```
|
|
464
|
-
* @param {(string|CheerioAPI)} html HTML text or parsed HTML represented using a
|
|
465
|
-
* [cheerio](https://www.npmjs.com/package/cheerio) function.
|
|
466
|
-
* @return {string} Plain text
|
|
467
|
-
* @memberOf utils
|
|
468
|
-
* @function
|
|
469
|
-
*/
|
|
470
|
-
const htmlToText = (html) => {
|
|
471
|
-
if (!html)
|
|
472
|
-
return '';
|
|
473
|
-
// TODO: Add support for "html" being a Cheerio element, otherwise the only way
|
|
474
|
-
// to use it is e.g. htmlToText($('p').html())) which is inefficient
|
|
475
|
-
// Also, it seems this doesn't work well in CheerioScraper, e.g. htmlToText($)
|
|
476
|
-
// produces really text with a lot of HTML elements in it. Let's just deprecate this sort of usage,
|
|
477
|
-
// and make the parameter "htmlOrCheerioElement"
|
|
478
|
-
/**
|
|
479
|
-
* @type {CheerioAPI}
|
|
480
|
-
* @ignore
|
|
481
|
-
*/
|
|
482
|
-
const $ = typeof html === 'function' ? html : cheerio_1.default.load(html, { decodeEntities: true });
|
|
483
|
-
let text = '';
|
|
484
|
-
const process = (elems) => {
|
|
485
|
-
const len = elems ? elems.length : 0;
|
|
486
|
-
for (let i = 0; i < len; i++) {
|
|
487
|
-
const elem = elems[i];
|
|
488
|
-
if (elem.type === 'text') {
|
|
489
|
-
// Compress spaces, unless we're inside <pre> element
|
|
490
|
-
let compr;
|
|
491
|
-
if (elem.parent && elem.parent.tagName === 'pre')
|
|
492
|
-
compr = elem.data;
|
|
493
|
-
else
|
|
494
|
-
compr = elem.data.replace(/\s+/g, ' ');
|
|
495
|
-
// If text is empty or ends with a whitespace, don't add the leading whitepsace
|
|
496
|
-
if (compr.startsWith(' ') && /(^|\s)$/.test(text))
|
|
497
|
-
compr = compr.substr(1);
|
|
498
|
-
text += compr;
|
|
499
|
-
}
|
|
500
|
-
else if (elem.type === 'comment' || SKIP_TAGS_REGEX.test(elem.tagName)) {
|
|
501
|
-
// Skip comments and special elements
|
|
502
|
-
}
|
|
503
|
-
else if (elem.tagName === 'br') {
|
|
504
|
-
text += '\n';
|
|
505
|
-
}
|
|
506
|
-
else if (elem.tagName === 'td') {
|
|
507
|
-
process(elem.children);
|
|
508
|
-
text += '\t';
|
|
509
|
-
}
|
|
510
|
-
else {
|
|
511
|
-
// Block elements must be surrounded by newlines (unless beginning of text)
|
|
512
|
-
const isBlockTag = BLOCK_TAGS_REGEX.test(elem.tagName);
|
|
513
|
-
if (isBlockTag && !/(^|\n)$/.test(text))
|
|
514
|
-
text += '\n';
|
|
515
|
-
process(elem.children);
|
|
516
|
-
if (isBlockTag && !text.endsWith('\n'))
|
|
517
|
-
text += '\n';
|
|
518
|
-
}
|
|
519
|
-
}
|
|
520
|
-
};
|
|
521
|
-
// If HTML document has body, only convert that, otherwise convert the entire HTML
|
|
522
|
-
const $body = $('body');
|
|
523
|
-
process($body.length > 0 ? $body : $.root());
|
|
524
|
-
return text.trim();
|
|
525
|
-
};
|
|
526
|
-
/**
|
|
527
|
-
* Creates a standardized debug info from request and response. This info is usually added to dataset under the hidden `#debug` field.
|
|
528
|
-
*
|
|
529
|
-
* @param {(Request|RequestOptions)} request [Apify.Request](https://sdk.apify.com/docs/api/request) object.
|
|
530
|
-
* @param {(*|IncomingMessage|PuppeteerResponse)} [response]
|
|
531
|
-
* Puppeteer [`Response`](https://pptr.dev/#?product=Puppeteer&version=v1.11.0&show=api-class-response)
|
|
532
|
-
* or NodeJS [`http.IncomingMessage`](https://nodejs.org/api/http.html#http_class_http_serverresponse).
|
|
533
|
-
* @param {Object<string, *>} [additionalFields] Object containing additional fields to be added.
|
|
534
|
-
|
|
535
|
-
* @return {Object<string, *>}
|
|
536
|
-
*/
|
|
537
|
-
const createRequestDebugInfo = (request, response = {}, additionalFields = {}) => {
|
|
538
|
-
(0, ow_1.default)(request, ow_1.default.object);
|
|
539
|
-
(0, ow_1.default)(response, ow_1.default.object);
|
|
540
|
-
(0, ow_1.default)(additionalFields, ow_1.default.object);
|
|
541
|
-
return {
|
|
542
|
-
requestId: request.id,
|
|
543
|
-
url: request.url,
|
|
544
|
-
loadedUrl: request.loadedUrl,
|
|
545
|
-
method: request.method,
|
|
546
|
-
retryCount: request.retryCount,
|
|
547
|
-
errorMessages: request.errorMessages,
|
|
548
|
-
// Puppeteer response has .status() funtion and NodeJS response ,statusCode property.
|
|
549
|
-
statusCode: underscore_1.default.isFunction(response.status) ? response.status() : response.statusCode,
|
|
550
|
-
...additionalFields,
|
|
551
|
-
};
|
|
552
|
-
};
|
|
553
|
-
/**
|
|
554
|
-
* Converts SNAKE_CASE to camelCase.
|
|
555
|
-
*
|
|
556
|
-
* @param {string} snakeCaseStr
|
|
557
|
-
* @return {string}
|
|
558
|
-
* @ignore
|
|
559
|
-
*/
|
|
560
|
-
const snakeCaseToCamelCase = (snakeCaseStr) => {
|
|
561
|
-
return snakeCaseStr
|
|
562
|
-
.toLowerCase()
|
|
563
|
-
.split('_')
|
|
564
|
-
.map((part, index) => {
|
|
565
|
-
return index > 0
|
|
566
|
-
? part.charAt(0).toUpperCase() + part.slice(1)
|
|
567
|
-
: part;
|
|
568
|
-
})
|
|
569
|
-
.join('');
|
|
570
|
-
};
|
|
571
|
-
exports.snakeCaseToCamelCase = snakeCaseToCamelCase;
|
|
572
|
-
/**
|
|
573
|
-
* Prints a warning if this version of Apify SDK is outdated.
|
|
574
|
-
*
|
|
575
|
-
* @ignore
|
|
576
|
-
*/
|
|
577
|
-
const printOutdatedSdkWarning = () => {
|
|
578
|
-
if (process.env[consts_1.ENV_VARS.DISABLE_OUTDATED_WARNING])
|
|
579
|
-
return;
|
|
580
|
-
const latestApifyVersion = process.env[consts_1.ENV_VARS.SDK_LATEST_VERSION];
|
|
581
|
-
if (!latestApifyVersion || !semver_1.default.lt(package_json_2.version, latestApifyVersion))
|
|
582
|
-
return;
|
|
583
|
-
// eslint-disable-next-line
|
|
584
|
-
utils_log_1.default.warning(`You are using an outdated version (${package_json_2.version}) of Apify SDK. We recommend you to update to the latest version (${latestApifyVersion}).
|
|
585
|
-
Read more about Apify SDK versioning at: https://help.apify.com/en/articles/3184510-updates-and-versioning-of-apify-sdk`);
|
|
586
|
-
};
|
|
587
|
-
exports.printOutdatedSdkWarning = printOutdatedSdkWarning;
|
|
588
|
-
/**
|
|
589
|
-
* Gets parsed content type from response object
|
|
590
|
-
* @param {IncomingMessage} response - HTTP response object
|
|
591
|
-
* @return {{ type: string, charset: string }}
|
|
592
|
-
* @ignore
|
|
593
|
-
*/
|
|
594
|
-
const parseContentTypeFromResponse = (response) => {
|
|
595
|
-
(0, ow_1.default)(response, ow_1.default.object.partialShape({
|
|
596
|
-
url: ow_1.default.string.url,
|
|
597
|
-
headers: ow_1.default.object,
|
|
598
|
-
}));
|
|
599
|
-
const { url, headers } = response;
|
|
600
|
-
let parsedContentType;
|
|
601
|
-
if (headers['content-type']) {
|
|
602
|
-
try {
|
|
603
|
-
parsedContentType = content_type_1.default.parse(headers['content-type']);
|
|
604
|
-
}
|
|
605
|
-
catch (err) {
|
|
606
|
-
// Can not parse content type from Content-Type header. Try to parse it from file extension.
|
|
607
|
-
}
|
|
608
|
-
}
|
|
609
|
-
// Parse content type from file extension as fallback
|
|
610
|
-
if (!parsedContentType) {
|
|
611
|
-
const parsedUrl = new url_1.URL(url);
|
|
612
|
-
const contentTypeFromExtname = mime_types_1.default.contentType(path_1.default.extname(parsedUrl.pathname))
|
|
613
|
-
|| 'application/octet-stream; charset=utf-8'; // Fallback content type, specified in https://tools.ietf.org/html/rfc7231#section-3.1.1.5
|
|
614
|
-
parsedContentType = content_type_1.default.parse(contentTypeFromExtname);
|
|
615
|
-
}
|
|
616
|
-
return {
|
|
617
|
-
type: parsedContentType.type,
|
|
618
|
-
charset: parsedContentType.parameters.charset,
|
|
619
|
-
};
|
|
620
|
-
};
|
|
621
|
-
exports.parseContentTypeFromResponse = parseContentTypeFromResponse;
|
|
622
|
-
/**
|
|
623
|
-
* Returns a promise that resolves with the finished Run object when the provided actor run finishes
|
|
624
|
-
* or with the unfinished Run object when the `waitSecs` timeout lapses. The promise is NOT rejected
|
|
625
|
-
* based on run status. You can inspect the `status` property of the Run object to find out its status.
|
|
626
|
-
*
|
|
627
|
-
* This is useful when you need to chain actor executions. Similar effect can be achieved
|
|
628
|
-
* by using webhooks, so be sure to review which technique fits your use-case better.
|
|
629
|
-
*
|
|
630
|
-
* @param {object} options
|
|
631
|
-
* @param {string} options.actorId
|
|
632
|
-
* ID of the actor that started the run.
|
|
633
|
-
* @param {string} options.runId
|
|
634
|
-
* ID of the run itself.
|
|
635
|
-
* @param {string} [options.waitSecs]
|
|
636
|
-
* Maximum time to wait for the run to finish, in seconds.
|
|
637
|
-
* If the limit is reached, the returned promise is resolved to a run object that will have
|
|
638
|
-
* status `READY` or `RUNNING`. If `waitSecs` omitted, the function waits indefinitely.
|
|
639
|
-
* @param {string} [options.token]
|
|
640
|
-
* You can supply an Apify token to override the default one
|
|
641
|
-
* that's used by the default ApifyClient instance.
|
|
642
|
-
* E.g. you can track other users' runs.
|
|
643
|
-
* @returns {Promise<ActorRun>}
|
|
644
|
-
* @memberOf utils
|
|
645
|
-
* @name waitForRunToFinish
|
|
646
|
-
* @function
|
|
647
|
-
* @deprecated
|
|
648
|
-
* Please use the 'waitForFinish' functions of 'apify-client'.
|
|
649
|
-
* @ignore
|
|
650
|
-
*/
|
|
651
|
-
const waitForRunToFinish = async (options) => {
|
|
652
|
-
(0, ow_1.default)(options, ow_1.default.object.exactShape({
|
|
653
|
-
actorId: ow_1.default.string,
|
|
654
|
-
runId: ow_1.default.string,
|
|
655
|
-
waitSecs: ow_1.default.optional.number,
|
|
656
|
-
}));
|
|
657
|
-
const { actorId, runId, waitSecs, } = options;
|
|
658
|
-
let run;
|
|
659
|
-
const startedAt = Date.now();
|
|
660
|
-
const shouldRepeat = () => {
|
|
661
|
-
if (waitSecs && (Date.now() - startedAt) / 1000 >= waitSecs)
|
|
662
|
-
return false;
|
|
663
|
-
if (run && consts_1.ACT_JOB_TERMINAL_STATUSES.includes(run.status))
|
|
664
|
-
return false;
|
|
665
|
-
return true;
|
|
666
|
-
};
|
|
667
|
-
while (shouldRepeat()) {
|
|
668
|
-
const waitForFinish = waitSecs
|
|
669
|
-
? Math.round(waitSecs - (Date.now() - startedAt) / 1000)
|
|
670
|
-
: 999999;
|
|
671
|
-
run = await exports.apifyClient.run(runId, actorId).waitForFinish({ waitSecs: waitForFinish });
|
|
672
|
-
// It might take some time for database replicas to get up-to-date,
|
|
673
|
-
// so getRun() might return null. Wait a little bit and try it again.
|
|
674
|
-
if (!run)
|
|
675
|
-
await (0, exports.sleep)(250);
|
|
676
|
-
}
|
|
677
|
-
if (!run) {
|
|
678
|
-
throw new Error('Waiting for run to finish failed. Cannot fetch actor run details from the server.');
|
|
679
|
-
}
|
|
680
|
-
return run;
|
|
681
|
-
};
|
|
682
|
-
exports.waitForRunToFinish = waitForRunToFinish;
|
|
683
|
-
/**
|
|
684
|
-
* Cleans up the local storage folder created when testing locally.
|
|
685
|
-
* This is useful in the event you are debugging your code locally.
|
|
686
|
-
*
|
|
687
|
-
* Be careful as this will remove the folder you provide and everything in it!
|
|
688
|
-
*
|
|
689
|
-
* @param {string} [folder] The folder to clean up
|
|
690
|
-
* @returns {Promise<void>}
|
|
691
|
-
* @memberOf utils
|
|
692
|
-
* @name purgeLocalStorage
|
|
693
|
-
* @function
|
|
694
|
-
*/
|
|
695
|
-
const purgeLocalStorage = async (folder) => {
|
|
696
|
-
// If the user did not provide a folder, try to get it from the env variables, or the default one
|
|
697
|
-
if (!folder) {
|
|
698
|
-
folder = process.env[consts_1.ENV_VARS.LOCAL_STORAGE_DIR] || 'apify_storage';
|
|
699
|
-
}
|
|
700
|
-
// Clear the folder
|
|
701
|
-
await rimrafp(folder);
|
|
702
|
-
};
|
|
703
|
-
exports.purgeLocalStorage = purgeLocalStorage;
|
|
704
|
-
/**
|
|
705
|
-
* A namespace that contains various utilities.
|
|
706
|
-
*
|
|
707
|
-
* **Example usage:**
|
|
708
|
-
*
|
|
709
|
-
* ```javascript
|
|
710
|
-
* const Apify = require('apify');
|
|
711
|
-
*
|
|
712
|
-
* ...
|
|
713
|
-
*
|
|
714
|
-
* // Sleep 1.5 seconds
|
|
715
|
-
* await Apify.utils.sleep(1500);
|
|
716
|
-
* ```
|
|
717
|
-
* @namespace utils
|
|
718
|
-
*/
|
|
719
|
-
exports.publicUtils = {
|
|
720
|
-
isDocker: exports.isDocker,
|
|
721
|
-
sleep: exports.sleep,
|
|
722
|
-
downloadListOfUrls,
|
|
723
|
-
extractUrls,
|
|
724
|
-
htmlToText,
|
|
725
|
-
URL_NO_COMMAS_REGEX,
|
|
726
|
-
URL_WITH_COMMAS_REGEX,
|
|
727
|
-
createRequestDebugInfo,
|
|
728
|
-
waitForRunToFinish: exports.waitForRunToFinish,
|
|
729
|
-
purgeLocalStorage: exports.purgeLocalStorage,
|
|
730
|
-
};
|
|
731
|
-
//# sourceMappingURL=utils.js.map
|