apify 2.3.1-beta.4 → 3.0.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. package/README.md +6 -5
  2. package/package.json +69 -128
  3. package/build/actor.d.ts +0 -113
  4. package/build/actor.d.ts.map +0 -1
  5. package/build/actor.js +0 -582
  6. package/build/actor.js.map +0 -1
  7. package/build/apify.d.ts +0 -752
  8. package/build/apify.d.ts.map +0 -1
  9. package/build/apify.js +0 -877
  10. package/build/apify.js.map +0 -1
  11. package/build/autoscaling/autoscaled_pool.d.ts +0 -384
  12. package/build/autoscaling/autoscaled_pool.d.ts.map +0 -1
  13. package/build/autoscaling/autoscaled_pool.js +0 -557
  14. package/build/autoscaling/autoscaled_pool.js.map +0 -1
  15. package/build/autoscaling/snapshotter.d.ts +0 -278
  16. package/build/autoscaling/snapshotter.d.ts.map +0 -1
  17. package/build/autoscaling/snapshotter.js +0 -447
  18. package/build/autoscaling/snapshotter.js.map +0 -1
  19. package/build/autoscaling/system_status.d.ts +0 -224
  20. package/build/autoscaling/system_status.d.ts.map +0 -1
  21. package/build/autoscaling/system_status.js +0 -228
  22. package/build/autoscaling/system_status.js.map +0 -1
  23. package/build/browser_launchers/browser_launcher.d.ts +0 -154
  24. package/build/browser_launchers/browser_launcher.d.ts.map +0 -1
  25. package/build/browser_launchers/browser_launcher.js +0 -160
  26. package/build/browser_launchers/browser_launcher.js.map +0 -1
  27. package/build/browser_launchers/browser_plugin.d.ts +0 -23
  28. package/build/browser_launchers/browser_plugin.d.ts.map +0 -1
  29. package/build/browser_launchers/browser_plugin.js +0 -25
  30. package/build/browser_launchers/browser_plugin.js.map +0 -1
  31. package/build/browser_launchers/playwright_launcher.d.ts +0 -131
  32. package/build/browser_launchers/playwright_launcher.d.ts.map +0 -1
  33. package/build/browser_launchers/playwright_launcher.js +0 -150
  34. package/build/browser_launchers/playwright_launcher.js.map +0 -1
  35. package/build/browser_launchers/puppeteer_launcher.d.ts +0 -153
  36. package/build/browser_launchers/puppeteer_launcher.d.ts.map +0 -1
  37. package/build/browser_launchers/puppeteer_launcher.js +0 -197
  38. package/build/browser_launchers/puppeteer_launcher.js.map +0 -1
  39. package/build/cache_container.d.ts +0 -31
  40. package/build/cache_container.d.ts.map +0 -1
  41. package/build/cache_container.js +0 -48
  42. package/build/cache_container.js.map +0 -1
  43. package/build/configuration.d.ts +0 -226
  44. package/build/configuration.d.ts.map +0 -1
  45. package/build/configuration.js +0 -325
  46. package/build/configuration.js.map +0 -1
  47. package/build/constants.d.ts +0 -37
  48. package/build/constants.d.ts.map +0 -1
  49. package/build/constants.js +0 -41
  50. package/build/constants.js.map +0 -1
  51. package/build/crawlers/basic_crawler.d.ts +0 -443
  52. package/build/crawlers/basic_crawler.d.ts.map +0 -1
  53. package/build/crawlers/basic_crawler.js +0 -664
  54. package/build/crawlers/basic_crawler.js.map +0 -1
  55. package/build/crawlers/browser_crawler.d.ts +0 -512
  56. package/build/crawlers/browser_crawler.d.ts.map +0 -1
  57. package/build/crawlers/browser_crawler.js +0 -540
  58. package/build/crawlers/browser_crawler.js.map +0 -1
  59. package/build/crawlers/cheerio_crawler.d.ts +0 -931
  60. package/build/crawlers/cheerio_crawler.d.ts.map +0 -1
  61. package/build/crawlers/cheerio_crawler.js +0 -913
  62. package/build/crawlers/cheerio_crawler.js.map +0 -1
  63. package/build/crawlers/crawler_extension.d.ts +0 -10
  64. package/build/crawlers/crawler_extension.d.ts.map +0 -1
  65. package/build/crawlers/crawler_extension.js +0 -19
  66. package/build/crawlers/crawler_extension.js.map +0 -1
  67. package/build/crawlers/crawler_utils.d.ts +0 -34
  68. package/build/crawlers/crawler_utils.d.ts.map +0 -1
  69. package/build/crawlers/crawler_utils.js +0 -87
  70. package/build/crawlers/crawler_utils.js.map +0 -1
  71. package/build/crawlers/playwright_crawler.d.ts +0 -448
  72. package/build/crawlers/playwright_crawler.d.ts.map +0 -1
  73. package/build/crawlers/playwright_crawler.js +0 -299
  74. package/build/crawlers/playwright_crawler.js.map +0 -1
  75. package/build/crawlers/puppeteer_crawler.d.ts +0 -425
  76. package/build/crawlers/puppeteer_crawler.d.ts.map +0 -1
  77. package/build/crawlers/puppeteer_crawler.js +0 -299
  78. package/build/crawlers/puppeteer_crawler.js.map +0 -1
  79. package/build/crawlers/statistics.d.ts +0 -185
  80. package/build/crawlers/statistics.d.ts.map +0 -1
  81. package/build/crawlers/statistics.js +0 -331
  82. package/build/crawlers/statistics.js.map +0 -1
  83. package/build/enqueue_links/click_elements.d.ts +0 -179
  84. package/build/enqueue_links/click_elements.d.ts.map +0 -1
  85. package/build/enqueue_links/click_elements.js +0 -434
  86. package/build/enqueue_links/click_elements.js.map +0 -1
  87. package/build/enqueue_links/enqueue_links.d.ts +0 -117
  88. package/build/enqueue_links/enqueue_links.d.ts.map +0 -1
  89. package/build/enqueue_links/enqueue_links.js +0 -163
  90. package/build/enqueue_links/enqueue_links.js.map +0 -1
  91. package/build/enqueue_links/shared.d.ts +0 -42
  92. package/build/enqueue_links/shared.d.ts.map +0 -1
  93. package/build/enqueue_links/shared.js +0 -121
  94. package/build/enqueue_links/shared.js.map +0 -1
  95. package/build/errors.d.ts +0 -29
  96. package/build/errors.d.ts.map +0 -1
  97. package/build/errors.js +0 -38
  98. package/build/errors.js.map +0 -1
  99. package/build/events.d.ts +0 -11
  100. package/build/events.d.ts.map +0 -1
  101. package/build/events.js +0 -147
  102. package/build/events.js.map +0 -1
  103. package/build/index.d.ts +0 -4
  104. package/build/index.d.ts.map +0 -1
  105. package/build/index.js +0 -7
  106. package/build/index.js.map +0 -1
  107. package/build/main.d.ts +0 -179
  108. package/build/main.d.ts.map +0 -1
  109. package/build/main.js +0 -81
  110. package/build/main.js.map +0 -1
  111. package/build/playwright_utils.d.ts +0 -9
  112. package/build/playwright_utils.d.ts.map +0 -1
  113. package/build/playwright_utils.js +0 -90
  114. package/build/playwright_utils.js.map +0 -1
  115. package/build/proxy_configuration.d.ts +0 -411
  116. package/build/proxy_configuration.d.ts.map +0 -1
  117. package/build/proxy_configuration.js +0 -517
  118. package/build/proxy_configuration.js.map +0 -1
  119. package/build/pseudo_url.d.ts +0 -86
  120. package/build/pseudo_url.d.ts.map +0 -1
  121. package/build/pseudo_url.js +0 -153
  122. package/build/pseudo_url.js.map +0 -1
  123. package/build/puppeteer_request_interception.d.ts +0 -8
  124. package/build/puppeteer_request_interception.d.ts.map +0 -1
  125. package/build/puppeteer_request_interception.js +0 -235
  126. package/build/puppeteer_request_interception.js.map +0 -1
  127. package/build/puppeteer_utils.d.ts +0 -250
  128. package/build/puppeteer_utils.d.ts.map +0 -1
  129. package/build/puppeteer_utils.js +0 -551
  130. package/build/puppeteer_utils.js.map +0 -1
  131. package/build/request.d.ts +0 -180
  132. package/build/request.d.ts.map +0 -1
  133. package/build/request.js +0 -261
  134. package/build/request.js.map +0 -1
  135. package/build/request_list.d.ts +0 -581
  136. package/build/request_list.d.ts.map +0 -1
  137. package/build/request_list.js +0 -826
  138. package/build/request_list.js.map +0 -1
  139. package/build/serialization.d.ts +0 -5
  140. package/build/serialization.d.ts.map +0 -1
  141. package/build/serialization.js +0 -139
  142. package/build/serialization.js.map +0 -1
  143. package/build/session_pool/errors.d.ts +0 -11
  144. package/build/session_pool/errors.d.ts.map +0 -1
  145. package/build/session_pool/errors.js +0 -18
  146. package/build/session_pool/errors.js.map +0 -1
  147. package/build/session_pool/events.d.ts +0 -5
  148. package/build/session_pool/events.d.ts.map +0 -1
  149. package/build/session_pool/events.js +0 -6
  150. package/build/session_pool/events.js.map +0 -1
  151. package/build/session_pool/session.d.ts +0 -286
  152. package/build/session_pool/session.d.ts.map +0 -1
  153. package/build/session_pool/session.js +0 -355
  154. package/build/session_pool/session.js.map +0 -1
  155. package/build/session_pool/session_pool.d.ts +0 -280
  156. package/build/session_pool/session_pool.d.ts.map +0 -1
  157. package/build/session_pool/session_pool.js +0 -393
  158. package/build/session_pool/session_pool.js.map +0 -1
  159. package/build/session_pool/session_utils.d.ts +0 -4
  160. package/build/session_pool/session_utils.d.ts.map +0 -1
  161. package/build/session_pool/session_utils.js +0 -24
  162. package/build/session_pool/session_utils.js.map +0 -1
  163. package/build/stealth/hiding_tricks.d.ts +0 -22
  164. package/build/stealth/hiding_tricks.d.ts.map +0 -1
  165. package/build/stealth/hiding_tricks.js +0 -308
  166. package/build/stealth/hiding_tricks.js.map +0 -1
  167. package/build/stealth/stealth.d.ts +0 -56
  168. package/build/stealth/stealth.d.ts.map +0 -1
  169. package/build/stealth/stealth.js +0 -125
  170. package/build/stealth/stealth.js.map +0 -1
  171. package/build/storages/dataset.d.ts +0 -288
  172. package/build/storages/dataset.d.ts.map +0 -1
  173. package/build/storages/dataset.js +0 -480
  174. package/build/storages/dataset.js.map +0 -1
  175. package/build/storages/key_value_store.d.ts +0 -243
  176. package/build/storages/key_value_store.d.ts.map +0 -1
  177. package/build/storages/key_value_store.js +0 -462
  178. package/build/storages/key_value_store.js.map +0 -1
  179. package/build/storages/request_queue.d.ts +0 -318
  180. package/build/storages/request_queue.d.ts.map +0 -1
  181. package/build/storages/request_queue.js +0 -636
  182. package/build/storages/request_queue.js.map +0 -1
  183. package/build/storages/storage_manager.d.ts +0 -87
  184. package/build/storages/storage_manager.d.ts.map +0 -1
  185. package/build/storages/storage_manager.js +0 -150
  186. package/build/storages/storage_manager.js.map +0 -1
  187. package/build/tsconfig.tsbuildinfo +0 -1
  188. package/build/typedefs.d.ts +0 -146
  189. package/build/typedefs.d.ts.map +0 -1
  190. package/build/typedefs.js +0 -88
  191. package/build/typedefs.js.map +0 -1
  192. package/build/utils.d.ts +0 -175
  193. package/build/utils.d.ts.map +0 -1
  194. package/build/utils.js +0 -731
  195. package/build/utils.js.map +0 -1
  196. package/build/utils_log.d.ts +0 -41
  197. package/build/utils_log.d.ts.map +0 -1
  198. package/build/utils_log.js +0 -192
  199. package/build/utils_log.js.map +0 -1
  200. package/build/utils_request.d.ts +0 -77
  201. package/build/utils_request.d.ts.map +0 -1
  202. package/build/utils_request.js +0 -385
  203. package/build/utils_request.js.map +0 -1
  204. package/build/utils_social.d.ts +0 -210
  205. package/build/utils_social.d.ts.map +0 -1
  206. package/build/utils_social.js +0 -787
  207. package/build/utils_social.js.map +0 -1
  208. package/build/validators.d.ts +0 -23
  209. package/build/validators.d.ts.map +0 -1
  210. package/build/validators.js +0 -29
  211. package/build/validators.js.map +0 -1
package/build/utils.js DELETED
@@ -1,731 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.publicUtils = exports.purgeLocalStorage = exports.waitForRunToFinish = exports.parseContentTypeFromResponse = exports.printOutdatedSdkWarning = exports.snakeCaseToCamelCase = exports.sleep = exports.isAtHome = exports.getTypicalChromeExecutablePath = exports.getFirstKey = exports.getMemoryInfo = exports.weightedAvg = exports.isDocker = exports.addCharsetToContentType = exports.apifyClient = exports.logSystemInfo = exports.newClient = void 0;
4
- const tslib_1 = require("tslib");
5
- const ps_tree_1 = (0, tslib_1.__importDefault)(require("@apify/ps-tree"));
6
- const child_process_1 = require("child_process");
7
- const apify_client_1 = require("apify-client");
8
- const package_json_1 = require("apify-client/package.json");
9
- const consts_1 = require("@apify/consts");
10
- // eslint-disable-next-line import/no-duplicates
11
- const cheerio_1 = (0, tslib_1.__importDefault)(require("cheerio"));
12
- const content_type_1 = (0, tslib_1.__importDefault)(require("content-type"));
13
- const fs_1 = (0, tslib_1.__importDefault)(require("fs"));
14
- const mime_types_1 = (0, tslib_1.__importDefault)(require("mime-types"));
15
- const os_1 = (0, tslib_1.__importDefault)(require("os"));
16
- const ow_1 = (0, tslib_1.__importDefault)(require("ow"));
17
- const path_1 = (0, tslib_1.__importDefault)(require("path"));
18
- const semver_1 = (0, tslib_1.__importDefault)(require("semver"));
19
- const underscore_1 = (0, tslib_1.__importDefault)(require("underscore"));
20
- const url_1 = require("url");
21
- const util_1 = (0, tslib_1.__importDefault)(require("util"));
22
- const rimraf_1 = (0, tslib_1.__importDefault)(require("rimraf"));
23
- const package_json_2 = require("../package.json");
24
- const utils_log_1 = (0, tslib_1.__importDefault)(require("./utils_log"));
25
- const utils_request_1 = require("./utils_request");
26
- const configuration_1 = require("./configuration");
27
- /* eslint-enable no-unused-vars,import/named,import/no-duplicates,import/order */
28
- const rimrafp = util_1.default.promisify(rimraf_1.default);
29
- /**
30
- * Default regular expression to match URLs in a string that may be plain text, JSON, CSV or other. It supports common URL characters
31
- * and does not support URLs containing commas or spaces. The URLs also may contain Unicode letters (not symbols).
32
- * @memberOf utils
33
- */
34
- const URL_NO_COMMAS_REGEX = RegExp('https?://(www\\.)?[\\p{L}0-9][-\\p{L}0-9@:%._\\+~#=]{0,254}[\\p{L}0-9]\\.[a-z]{2,63}(:\\d{1,5})?(/[-\\p{L}0-9@:%_\\+.~#?&//=\\(\\)]*)?', 'giu'); // eslint-disable-line
35
- /**
36
- * Regular expression that, in addition to the default regular expression `URL_NO_COMMAS_REGEX`, supports matching commas in URL path and query.
37
- * Note, however, that this may prevent parsing URLs from comma delimited lists, or the URLs may become malformed.
38
- * @memberOf utils
39
- */
40
- const URL_WITH_COMMAS_REGEX = RegExp('https?://(www\\.)?[\\p{L}0-9][-\\p{L}0-9@:%._\\+~#=]{0,254}[\\p{L}0-9]\\.[a-z]{2,63}(:\\d{1,5})?(/[-\\p{L}0-9@:%_\\+,.~#?&//=\\(\\)]*)?', 'giu'); // eslint-disable-line
41
- const MEMORY_FILE_PATHS = {
42
- TOTAL: {
43
- V1: '/sys/fs/cgroup/memory/memory.limit_in_bytes',
44
- V2: '/sys/fs/cgroup/memory.max',
45
- },
46
- USED: {
47
- V1: '/sys/fs/cgroup/memory/memory.usage_in_bytes',
48
- V2: '/sys/fs/cgroup/memory.current',
49
- },
50
- };
51
- // Set encoding to utf-8 so fs.readFile returns string instead of buffer
52
- const MEMORY_FILE_ENCODING = 'utf-8';
53
- const psTreePromised = util_1.default.promisify(ps_tree_1.default);
54
- /**
55
- * Returns a new instance of the Apify API client. The `ApifyClient` class is provided
56
- * by the <a href="https://www.npmjs.com/package/apify-client" target="_blank">apify-client</a>
57
- * NPM package, and it is automatically configured using the `APIFY_API_BASE_URL`, and `APIFY_TOKEN`
58
- * environment variables. You can override the token via the available options. That's useful
59
- * if you want to use the client as a different Apify user than the SDK internals are using.
60
- *
61
- * @param {object} [options]
62
- * @param {string} [options.token]
63
- * @param {string} [options.maxRetries]
64
- * @param {string} [options.minDelayBetweenRetriesMillis]
65
- * @memberof module:Apify
66
- * @function
67
- * @name newClient
68
- * @return {ApifyClient}
69
- */
70
- const newClient = (options = {}) => {
71
- (0, ow_1.default)(options, ow_1.default.object.exactShape({
72
- baseUrl: ow_1.default.optional.string.url,
73
- token: ow_1.default.optional.string,
74
- maxRetries: ow_1.default.optional.number,
75
- minDelayBetweenRetriesMillis: ow_1.default.optional.number,
76
- }));
77
- const { baseUrl = process.env[consts_1.ENV_VARS.API_BASE_URL], token = process.env[consts_1.ENV_VARS.TOKEN], } = options;
78
- return new apify_client_1.ApifyClient({
79
- ...options,
80
- baseUrl,
81
- token,
82
- });
83
- };
84
- exports.newClient = newClient;
85
- /**
86
- * Logs info about system, node version and apify package version.
87
- */
88
- const logSystemInfo = () => {
89
- utils_log_1.default.info('System info', {
90
- apifyVersion: package_json_2.version,
91
- apifyClientVersion: package_json_1.version,
92
- osType: os_1.default.type(),
93
- nodeVersion: process.version,
94
- });
95
- };
96
- exports.logSystemInfo = logSystemInfo;
97
- /**
98
- * The default instance of `ApifyClient` used internally
99
- * by the SDK.
100
- *
101
- * @type {*}
102
- * @ignore
103
- */
104
- exports.apifyClient = configuration_1.Configuration.getGlobalConfig().getClient();
105
- /**
106
- * Adds charset=utf-8 to given content type if this parameter is missing.
107
- *
108
- * @param {string} contentType
109
- * @returns {string}
110
- *
111
- * @ignore
112
- */
113
- const addCharsetToContentType = (contentType) => {
114
- if (!contentType)
115
- return contentType;
116
- const parsed = content_type_1.default.parse(contentType);
117
- if (parsed.parameters.charset)
118
- return contentType;
119
- parsed.parameters.charset = 'utf-8';
120
- return content_type_1.default.format(parsed);
121
- };
122
- exports.addCharsetToContentType = addCharsetToContentType;
123
- let isDockerPromiseCache;
124
- const createIsDockerPromise = () => {
125
- const promise1 = util_1.default
126
- .promisify(fs_1.default.stat)('/.dockerenv')
127
- .then(() => true)
128
- .catch(() => false);
129
- const promise2 = util_1.default
130
- .promisify(fs_1.default.readFile)('/proc/self/cgroup', 'utf8')
131
- .then((content) => content.indexOf('docker') !== -1)
132
- .catch(() => false);
133
- return Promise
134
- .all([promise1, promise2])
135
- .then(([result1, result2]) => result1 || result2);
136
- };
137
- /**
138
- * Returns a `Promise` that resolves to true if the code is running in a Docker container.
139
- *
140
- * @param {boolean} forceReset
141
- * @return {Promise<boolean>}
142
- *
143
- * @memberof utils
144
- * @name isDocker
145
- * @function
146
- */
147
- const isDocker = (forceReset) => {
148
- // Parameter forceReset is just internal for unit tests.
149
- if (!isDockerPromiseCache || forceReset)
150
- isDockerPromiseCache = createIsDockerPromise();
151
- return isDockerPromiseCache;
152
- };
153
- exports.isDocker = isDocker;
154
- /**
155
- * Computes a weighted average of an array of numbers, complemented by an array of weights.
156
- *
157
- * @param {number[]} arrValues
158
- * @param {number[]} arrWeights
159
- * @return {number}
160
- *
161
- * @ignore
162
- */
163
- const weightedAvg = (arrValues, arrWeights) => {
164
- const result = arrValues.map((value, i) => {
165
- const weight = arrWeights[i];
166
- const sum = value * weight; // eslint-disable-line no-shadow
167
- return [sum, weight];
168
- }).reduce((p, c) => [p[0] + c[0], p[1] + c[1]], [0, 0]);
169
- return result[0] / result[1];
170
- };
171
- exports.weightedAvg = weightedAvg;
172
- /**
173
- * Describes memory usage of an Actor.
174
- *
175
- * @typedef MemoryInfo
176
- * @property {number} totalBytes Total memory available in the system or container
177
- * @property {number} freeBytes Amount of free memory in the system or container
178
- * @property {number} usedBytes Amount of memory used (= totalBytes - freeBytes)
179
- * @property {number} mainProcessBytes Amount of memory used the current Node.js process
180
- * @property {number} childProcessesBytes Amount of memory used by child processes of the current Node.js process
181
- */
182
- /**
183
- * Returns memory statistics of the process and the system, see {@link MemoryInfo}.
184
- *
185
- * If the process runs inside of Docker, the `getMemoryInfo` gets container memory limits,
186
- * otherwise it gets system memory limits.
187
- *
188
- * Beware that the function is quite inefficient because it spawns a new process.
189
- * Therefore you shouldn't call it too often, like more than once per second.
190
- *
191
- * @returns {Promise<MemoryInfo>}
192
- *
193
- * @memberof module:Apify
194
- * @name getMemoryInfo
195
- * @function
196
- */
197
- const getMemoryInfo = async () => {
198
- // lambda does *not* have `ps` and other command line tools
199
- // required to extract memory usage.
200
- const isLambdaEnvironment = process.platform === 'linux'
201
- && !!process.env.AWS_LAMBDA_FUNCTION_MEMORY_SIZE;
202
- // module.exports must be here so that we can mock it.
203
- const isDockerVar = !isLambdaEnvironment && (await module.exports.isDocker());
204
- let mainProcessBytes = -1;
205
- let childProcessesBytes = 0;
206
- if (isLambdaEnvironment) {
207
- // reported in bytes
208
- mainProcessBytes = process.memoryUsage().rss;
209
- // https://stackoverflow.com/a/55914335/129415
210
- childProcessesBytes = (0, child_process_1.execSync)('cat /proc/meminfo')
211
- .toString()
212
- .split(/[\n: ]/)
213
- .filter((val) => val.trim())[19]
214
- // meminfo reports in kb, not bytes
215
- * 1000
216
- // the total used memory is reported by meminfo
217
- // subtract memory used by the main node proces
218
- // in order to infer memory used by any child processes
219
- - mainProcessBytes;
220
- }
221
- else {
222
- // Query both root and child processes
223
- const processes = await psTreePromised(process.pid, true);
224
- processes.forEach((rec) => {
225
- // Skip the 'ps' or 'wmic' commands used by ps-tree to query the processes
226
- if (rec.COMMAND === 'ps' || rec.COMMAND === 'WMIC.exe') {
227
- return;
228
- }
229
- const bytes = parseInt(rec.RSS, 10);
230
- // Obtain main process' memory separately
231
- if (rec.PID === `${process.pid}`) {
232
- mainProcessBytes = bytes;
233
- return;
234
- }
235
- childProcessesBytes += bytes;
236
- });
237
- }
238
- let totalBytes;
239
- let usedBytes;
240
- let freeBytes;
241
- if (isLambdaEnvironment) {
242
- // memory size is defined in megabytes
243
- totalBytes = parseInt(process.env.AWS_LAMBDA_FUNCTION_MEMORY_SIZE, 10) * 1000000;
244
- usedBytes = mainProcessBytes + childProcessesBytes;
245
- freeBytes = totalBytes - usedBytes;
246
- utils_log_1.default.debug(`lambda size of ${totalBytes} with ${freeBytes} free bytes`);
247
- }
248
- else if (isDockerVar) {
249
- // When running inside Docker container, use container memory limits
250
- // This must be promisified here so that we can mock it.
251
- const readPromised = util_1.default.promisify(fs_1.default.readFile);
252
- const accessPromised = util_1.default.promisify(fs_1.default.access);
253
- // Check wheter cgroups V1 or V2 is used
254
- let cgroupsVersion = 'V1';
255
- try {
256
- // If this directory does not exists, assume docker is using cgroups V2
257
- await accessPromised('/sys/fs/cgroup/memory/', fs_1.default.constants.R_OK);
258
- }
259
- catch (err) {
260
- cgroupsVersion = 'V2';
261
- }
262
- try {
263
- let [totalBytesStr, usedBytesStr] = await Promise.all([
264
- readPromised(MEMORY_FILE_PATHS.TOTAL[cgroupsVersion], MEMORY_FILE_ENCODING),
265
- readPromised(MEMORY_FILE_PATHS.USED[cgroupsVersion], MEMORY_FILE_ENCODING),
266
- ]);
267
- // Cgroups V2 files contains newline character. Getting rid of it for better handling in later part of the code.
268
- totalBytesStr = totalBytesStr.replace(/[^a-zA-Z0-9 ]/g, '');
269
- usedBytesStr = usedBytesStr.replace(/[^a-zA-Z0-9 ]/g, '');
270
- // Cgroups V2 contains 'max' string if memory is not limited
271
- // See https://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/tree/Documentation/admin-guide/cgroup-v2.rst (see "memory.max")
272
- if (totalBytesStr === 'max') {
273
- totalBytes = os_1.default.totalmem();
274
- // Cgroups V1 is set to number related to platform and page size if memory is not limited
275
- // See https://unix.stackexchange.com/q/420906
276
- }
277
- else {
278
- totalBytes = parseInt(totalBytesStr, 10);
279
- const containerRunsWithUnlimitedMemory = totalBytes > Number.MAX_SAFE_INTEGER;
280
- if (containerRunsWithUnlimitedMemory)
281
- totalBytes = os_1.default.totalmem();
282
- }
283
- usedBytes = parseInt(usedBytesStr, 10);
284
- freeBytes = totalBytes - usedBytes;
285
- }
286
- catch (err) {
287
- // log.deprecated logs a warning only once
288
- utils_log_1.default.deprecated('Your environment is Docker, but your system does not support memory cgroups. '
289
- + 'If you\'re running containers with limited memory, memory auto-scaling will not work properly.\n\n'
290
- + `Cause: ${err.message}`);
291
- totalBytes = os_1.default.totalmem();
292
- freeBytes = os_1.default.freemem();
293
- usedBytes = totalBytes - freeBytes;
294
- }
295
- }
296
- else {
297
- totalBytes = os_1.default.totalmem();
298
- freeBytes = os_1.default.freemem();
299
- usedBytes = totalBytes - freeBytes;
300
- }
301
- return {
302
- totalBytes,
303
- freeBytes,
304
- usedBytes,
305
- mainProcessBytes,
306
- childProcessesBytes,
307
- };
308
- };
309
- exports.getMemoryInfo = getMemoryInfo;
310
- /**
311
- * Helper function that returns the first key from plain object.
312
- *
313
- * @ignore
314
- */
315
- const getFirstKey = (dict) => {
316
- for (const key in dict) { // eslint-disable-line
317
- return key;
318
- }
319
- };
320
- exports.getFirstKey = getFirstKey;
321
- /**
322
- * Gets a typical path to Chrome executable, depending on the current operating system.
323
- *
324
- * @return {string}
325
- * @ignore
326
- */
327
- const getTypicalChromeExecutablePath = () => {
328
- /**
329
- * Return path of Chrome executable by its OS environment variable to deal with non-english language OS.
330
- * Taking also in account the old [chrome 380177 issue](https://bugs.chromium.org/p/chromium/issues/detail?id=380177).
331
- *
332
- * @returns {string}
333
- * @ignore
334
- */
335
- const getWin32Path = () => {
336
- let chromeExecutablePath = 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe';
337
- const path00 = `${process.env.ProgramFiles}\\Google\\Chrome\\Application\\chrome.exe`;
338
- const path86 = `${process.env['ProgramFiles(x86)']}\\Google\\Chrome\\Application\\chrome.exe`;
339
- if (fs_1.default.existsSync(path00)) {
340
- chromeExecutablePath = path00;
341
- }
342
- else if (fs_1.default.existsSync(path86)) {
343
- chromeExecutablePath = path86;
344
- }
345
- return chromeExecutablePath;
346
- };
347
- switch (os_1.default.platform()) {
348
- case 'darwin': return '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome';
349
- case 'win32': return getWin32Path();
350
- default: return '/usr/bin/google-chrome';
351
- }
352
- };
353
- exports.getTypicalChromeExecutablePath = getTypicalChromeExecutablePath;
354
- /**
355
- * Returns `true` when code is running on Apify platform and `false` otherwise (for example locally).
356
- *
357
- * @returns {boolean}
358
- *
359
- * @memberof module:Apify
360
- * @name isAtHome
361
- * @function
362
- */
363
- const isAtHome = () => !!process.env[consts_1.ENV_VARS.IS_AT_HOME];
364
- exports.isAtHome = isAtHome;
365
- /**
366
- * Returns a `Promise` that resolves after a specific period of time. This is useful to implement waiting
367
- * in your code, e.g. to prevent overloading of target website or to avoid bot detection.
368
- *
369
- * **Example usage:**
370
- *
371
- * ```
372
- * const Apify = require('apify');
373
- *
374
- * ...
375
- *
376
- * // Sleep 1.5 seconds
377
- * await Apify.utils.sleep(1500);
378
- * ```
379
- * @param {number} millis Period of time to sleep, in milliseconds. If not a positive number, the returned promise resolves immediately.
380
- * @memberof utils
381
- * @name sleep
382
- * @function
383
- * @return {Promise<void>}
384
- */
385
- const sleep = (millis) => {
386
- return new Promise((res) => setTimeout(res, millis));
387
- };
388
- exports.sleep = sleep;
389
- /**
390
- * Returns a promise that resolves to an array of urls parsed from the resource available at the provided url.
391
- * Optionally, custom regular expression and encoding may be provided.
392
- *
393
- * @param {object} options
394
- * @param {string} options.url URL to the file
395
- * @param {string} [options.encoding='utf8'] The encoding of the file.
396
- * @param {string} [options.proxyUrl] The proxy url to be used for the request.
397
- * @param {RegExp} [options.urlRegExp=URL_NO_COMMAS_REGEX]
398
- * Custom regular expression to identify the URLs in the file to extract.
399
- * The regular expression should be case-insensitive and have global flag set (i.e. `/something/gi`).
400
- * @returns {Promise<Array<string>>}
401
- * @memberOf utils
402
- */
403
- const downloadListOfUrls = async (options) => {
404
- (0, ow_1.default)(options, ow_1.default.object.exactShape({
405
- url: ow_1.default.string.url,
406
- encoding: ow_1.default.optional.string,
407
- urlRegExp: ow_1.default.optional.regExp,
408
- proxyUrl: ow_1.default.optional.string,
409
- }));
410
- const { url, encoding = 'utf8', urlRegExp = URL_NO_COMMAS_REGEX, proxyUrl } = options;
411
- // Try to detect wrong urls and fix them. Currently, detects only sharing url instead of csv download one.
412
- const match = url.match(/^(https:\/\/docs\.google\.com\/spreadsheets\/d\/(?:\w|-)+)\/?/);
413
- let fixedUrl = url;
414
- if (match) {
415
- fixedUrl = `${match[1]}/gviz/tq?tqx=out:csv`;
416
- }
417
- const { body: string } = await (0, utils_request_1.requestAsBrowser)({ url: fixedUrl, encoding, proxyUrl });
418
- return extractUrls({ string, urlRegExp });
419
- };
420
- /**
421
- * Collects all URLs in an arbitrary string to an array, optionally using a custom regular expression.
422
- * @param {object} options
423
- * @param {string} options.string
424
- * @param {RegExp} [options.urlRegExp=Apify.utils.URL_NO_COMMAS_REGEX]
425
- * @returns {string[]}
426
- * @memberOf utils
427
- */
428
- const extractUrls = (options) => {
429
- (0, ow_1.default)(options, ow_1.default.object.exactShape({
430
- string: ow_1.default.string,
431
- urlRegExp: ow_1.default.optional.regExp,
432
- }));
433
- const { string, urlRegExp = URL_NO_COMMAS_REGEX } = options;
434
- return string.match(urlRegExp) || [];
435
- };
436
- // NOTE: We skipping 'noscript' since it's content is evaluated as text, instead of HTML elements. That damages the results.
437
- const SKIP_TAGS_REGEX = /^(script|style|canvas|svg|noscript)$/i;
438
- const BLOCK_TAGS_REGEX = /^(p|h1|h2|h3|h4|h5|h6|ol|ul|li|pre|address|blockquote|dl|div|fieldset|form|table|tr|select|option)$/i;
439
- /**
440
- * The function converts a HTML document to a plain text.
441
- *
442
- * The plain text generated by the function is similar to a text captured
443
- * by pressing Ctrl+A and Ctrl+C on a page when loaded in a web browser.
444
- * The function doesn't aspire to preserve the formatting or to be perfectly correct with respect to HTML specifications.
445
- * However, it attempts to generate newlines and whitespaces in and around HTML elements
446
- * to avoid merging distinct parts of text and thus enable extraction of data from the text (e.g. phone numbers).
447
- *
448
- * **Example usage**
449
- * ```javascript
450
- * const text = htmlToText('<html><body>Some text</body></html>');
451
- * console.log(text);
452
- * ```
453
- *
454
- * Note that the function uses [cheerio](https://www.npmjs.com/package/cheerio) to parse the HTML.
455
- * Optionally, to avoid duplicate parsing of HTML and thus improve performance, you can pass
456
- * an existing Cheerio object to the function instead of the HTML text. The HTML should be parsed
457
- * with the `decodeEntities` option set to `true`. For example:
458
- *
459
- * ```javascript
460
- * const cheerio = require('cheerio');
461
- * const html = '<html><body>Some text</body></html>';
462
- * const text = htmlToText(cheerio.load(html, { decodeEntities: true }));
463
- * ```
464
- * @param {(string|CheerioAPI)} html HTML text or parsed HTML represented using a
465
- * [cheerio](https://www.npmjs.com/package/cheerio) function.
466
- * @return {string} Plain text
467
- * @memberOf utils
468
- * @function
469
- */
470
- const htmlToText = (html) => {
471
- if (!html)
472
- return '';
473
- // TODO: Add support for "html" being a Cheerio element, otherwise the only way
474
- // to use it is e.g. htmlToText($('p').html())) which is inefficient
475
- // Also, it seems this doesn't work well in CheerioScraper, e.g. htmlToText($)
476
- // produces really text with a lot of HTML elements in it. Let's just deprecate this sort of usage,
477
- // and make the parameter "htmlOrCheerioElement"
478
- /**
479
- * @type {CheerioAPI}
480
- * @ignore
481
- */
482
- const $ = typeof html === 'function' ? html : cheerio_1.default.load(html, { decodeEntities: true });
483
- let text = '';
484
- const process = (elems) => {
485
- const len = elems ? elems.length : 0;
486
- for (let i = 0; i < len; i++) {
487
- const elem = elems[i];
488
- if (elem.type === 'text') {
489
- // Compress spaces, unless we're inside <pre> element
490
- let compr;
491
- if (elem.parent && elem.parent.tagName === 'pre')
492
- compr = elem.data;
493
- else
494
- compr = elem.data.replace(/\s+/g, ' ');
495
- // If text is empty or ends with a whitespace, don't add the leading whitepsace
496
- if (compr.startsWith(' ') && /(^|\s)$/.test(text))
497
- compr = compr.substr(1);
498
- text += compr;
499
- }
500
- else if (elem.type === 'comment' || SKIP_TAGS_REGEX.test(elem.tagName)) {
501
- // Skip comments and special elements
502
- }
503
- else if (elem.tagName === 'br') {
504
- text += '\n';
505
- }
506
- else if (elem.tagName === 'td') {
507
- process(elem.children);
508
- text += '\t';
509
- }
510
- else {
511
- // Block elements must be surrounded by newlines (unless beginning of text)
512
- const isBlockTag = BLOCK_TAGS_REGEX.test(elem.tagName);
513
- if (isBlockTag && !/(^|\n)$/.test(text))
514
- text += '\n';
515
- process(elem.children);
516
- if (isBlockTag && !text.endsWith('\n'))
517
- text += '\n';
518
- }
519
- }
520
- };
521
- // If HTML document has body, only convert that, otherwise convert the entire HTML
522
- const $body = $('body');
523
- process($body.length > 0 ? $body : $.root());
524
- return text.trim();
525
- };
526
- /**
527
- * Creates a standardized debug info from request and response. This info is usually added to dataset under the hidden `#debug` field.
528
- *
529
- * @param {(Request|RequestOptions)} request [Apify.Request](https://sdk.apify.com/docs/api/request) object.
530
- * @param {(*|IncomingMessage|PuppeteerResponse)} [response]
531
- * Puppeteer [`Response`](https://pptr.dev/#?product=Puppeteer&version=v1.11.0&show=api-class-response)
532
- * or NodeJS [`http.IncomingMessage`](https://nodejs.org/api/http.html#http_class_http_serverresponse).
533
- * @param {Object<string, *>} [additionalFields] Object containing additional fields to be added.
534
-
535
- * @return {Object<string, *>}
536
- */
537
- const createRequestDebugInfo = (request, response = {}, additionalFields = {}) => {
538
- (0, ow_1.default)(request, ow_1.default.object);
539
- (0, ow_1.default)(response, ow_1.default.object);
540
- (0, ow_1.default)(additionalFields, ow_1.default.object);
541
- return {
542
- requestId: request.id,
543
- url: request.url,
544
- loadedUrl: request.loadedUrl,
545
- method: request.method,
546
- retryCount: request.retryCount,
547
- errorMessages: request.errorMessages,
548
- // Puppeteer response has .status() funtion and NodeJS response ,statusCode property.
549
- statusCode: underscore_1.default.isFunction(response.status) ? response.status() : response.statusCode,
550
- ...additionalFields,
551
- };
552
- };
553
- /**
554
- * Converts SNAKE_CASE to camelCase.
555
- *
556
- * @param {string} snakeCaseStr
557
- * @return {string}
558
- * @ignore
559
- */
560
- const snakeCaseToCamelCase = (snakeCaseStr) => {
561
- return snakeCaseStr
562
- .toLowerCase()
563
- .split('_')
564
- .map((part, index) => {
565
- return index > 0
566
- ? part.charAt(0).toUpperCase() + part.slice(1)
567
- : part;
568
- })
569
- .join('');
570
- };
571
- exports.snakeCaseToCamelCase = snakeCaseToCamelCase;
572
- /**
573
- * Prints a warning if this version of Apify SDK is outdated.
574
- *
575
- * @ignore
576
- */
577
- const printOutdatedSdkWarning = () => {
578
- if (process.env[consts_1.ENV_VARS.DISABLE_OUTDATED_WARNING])
579
- return;
580
- const latestApifyVersion = process.env[consts_1.ENV_VARS.SDK_LATEST_VERSION];
581
- if (!latestApifyVersion || !semver_1.default.lt(package_json_2.version, latestApifyVersion))
582
- return;
583
- // eslint-disable-next-line
584
- utils_log_1.default.warning(`You are using an outdated version (${package_json_2.version}) of Apify SDK. We recommend you to update to the latest version (${latestApifyVersion}).
585
- Read more about Apify SDK versioning at: https://help.apify.com/en/articles/3184510-updates-and-versioning-of-apify-sdk`);
586
- };
587
- exports.printOutdatedSdkWarning = printOutdatedSdkWarning;
588
- /**
589
- * Gets parsed content type from response object
590
- * @param {IncomingMessage} response - HTTP response object
591
- * @return {{ type: string, charset: string }}
592
- * @ignore
593
- */
594
- const parseContentTypeFromResponse = (response) => {
595
- (0, ow_1.default)(response, ow_1.default.object.partialShape({
596
- url: ow_1.default.string.url,
597
- headers: ow_1.default.object,
598
- }));
599
- const { url, headers } = response;
600
- let parsedContentType;
601
- if (headers['content-type']) {
602
- try {
603
- parsedContentType = content_type_1.default.parse(headers['content-type']);
604
- }
605
- catch (err) {
606
- // Can not parse content type from Content-Type header. Try to parse it from file extension.
607
- }
608
- }
609
- // Parse content type from file extension as fallback
610
- if (!parsedContentType) {
611
- const parsedUrl = new url_1.URL(url);
612
- const contentTypeFromExtname = mime_types_1.default.contentType(path_1.default.extname(parsedUrl.pathname))
613
- || 'application/octet-stream; charset=utf-8'; // Fallback content type, specified in https://tools.ietf.org/html/rfc7231#section-3.1.1.5
614
- parsedContentType = content_type_1.default.parse(contentTypeFromExtname);
615
- }
616
- return {
617
- type: parsedContentType.type,
618
- charset: parsedContentType.parameters.charset,
619
- };
620
- };
621
- exports.parseContentTypeFromResponse = parseContentTypeFromResponse;
622
- /**
623
- * Returns a promise that resolves with the finished Run object when the provided actor run finishes
624
- * or with the unfinished Run object when the `waitSecs` timeout lapses. The promise is NOT rejected
625
- * based on run status. You can inspect the `status` property of the Run object to find out its status.
626
- *
627
- * This is useful when you need to chain actor executions. Similar effect can be achieved
628
- * by using webhooks, so be sure to review which technique fits your use-case better.
629
- *
630
- * @param {object} options
631
- * @param {string} options.actorId
632
- * ID of the actor that started the run.
633
- * @param {string} options.runId
634
- * ID of the run itself.
635
- * @param {string} [options.waitSecs]
636
- * Maximum time to wait for the run to finish, in seconds.
637
- * If the limit is reached, the returned promise is resolved to a run object that will have
638
- * status `READY` or `RUNNING`. If `waitSecs` omitted, the function waits indefinitely.
639
- * @param {string} [options.token]
640
- * You can supply an Apify token to override the default one
641
- * that's used by the default ApifyClient instance.
642
- * E.g. you can track other users' runs.
643
- * @returns {Promise<ActorRun>}
644
- * @memberOf utils
645
- * @name waitForRunToFinish
646
- * @function
647
- * @deprecated
648
- * Please use the 'waitForFinish' functions of 'apify-client'.
649
- * @ignore
650
- */
651
- const waitForRunToFinish = async (options) => {
652
- (0, ow_1.default)(options, ow_1.default.object.exactShape({
653
- actorId: ow_1.default.string,
654
- runId: ow_1.default.string,
655
- waitSecs: ow_1.default.optional.number,
656
- }));
657
- const { actorId, runId, waitSecs, } = options;
658
- let run;
659
- const startedAt = Date.now();
660
- const shouldRepeat = () => {
661
- if (waitSecs && (Date.now() - startedAt) / 1000 >= waitSecs)
662
- return false;
663
- if (run && consts_1.ACT_JOB_TERMINAL_STATUSES.includes(run.status))
664
- return false;
665
- return true;
666
- };
667
- while (shouldRepeat()) {
668
- const waitForFinish = waitSecs
669
- ? Math.round(waitSecs - (Date.now() - startedAt) / 1000)
670
- : 999999;
671
- run = await exports.apifyClient.run(runId, actorId).waitForFinish({ waitSecs: waitForFinish });
672
- // It might take some time for database replicas to get up-to-date,
673
- // so getRun() might return null. Wait a little bit and try it again.
674
- if (!run)
675
- await (0, exports.sleep)(250);
676
- }
677
- if (!run) {
678
- throw new Error('Waiting for run to finish failed. Cannot fetch actor run details from the server.');
679
- }
680
- return run;
681
- };
682
- exports.waitForRunToFinish = waitForRunToFinish;
683
- /**
684
- * Cleans up the local storage folder created when testing locally.
685
- * This is useful in the event you are debugging your code locally.
686
- *
687
- * Be careful as this will remove the folder you provide and everything in it!
688
- *
689
- * @param {string} [folder] The folder to clean up
690
- * @returns {Promise<void>}
691
- * @memberOf utils
692
- * @name purgeLocalStorage
693
- * @function
694
- */
695
- const purgeLocalStorage = async (folder) => {
696
- // If the user did not provide a folder, try to get it from the env variables, or the default one
697
- if (!folder) {
698
- folder = process.env[consts_1.ENV_VARS.LOCAL_STORAGE_DIR] || 'apify_storage';
699
- }
700
- // Clear the folder
701
- await rimrafp(folder);
702
- };
703
- exports.purgeLocalStorage = purgeLocalStorage;
704
- /**
705
- * A namespace that contains various utilities.
706
- *
707
- * **Example usage:**
708
- *
709
- * ```javascript
710
- * const Apify = require('apify');
711
- *
712
- * ...
713
- *
714
- * // Sleep 1.5 seconds
715
- * await Apify.utils.sleep(1500);
716
- * ```
717
- * @namespace utils
718
- */
719
- exports.publicUtils = {
720
- isDocker: exports.isDocker,
721
- sleep: exports.sleep,
722
- downloadListOfUrls,
723
- extractUrls,
724
- htmlToText,
725
- URL_NO_COMMAS_REGEX,
726
- URL_WITH_COMMAS_REGEX,
727
- createRequestDebugInfo,
728
- waitForRunToFinish: exports.waitForRunToFinish,
729
- purgeLocalStorage: exports.purgeLocalStorage,
730
- };
731
- //# sourceMappingURL=utils.js.map