apify 2.3.1-beta.3 → 3.0.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. package/README.md +6 -5
  2. package/package.json +69 -128
  3. package/build/actor.d.ts +0 -113
  4. package/build/actor.d.ts.map +0 -1
  5. package/build/actor.js +0 -582
  6. package/build/actor.js.map +0 -1
  7. package/build/apify.d.ts +0 -752
  8. package/build/apify.d.ts.map +0 -1
  9. package/build/apify.js +0 -877
  10. package/build/apify.js.map +0 -1
  11. package/build/autoscaling/autoscaled_pool.d.ts +0 -384
  12. package/build/autoscaling/autoscaled_pool.d.ts.map +0 -1
  13. package/build/autoscaling/autoscaled_pool.js +0 -557
  14. package/build/autoscaling/autoscaled_pool.js.map +0 -1
  15. package/build/autoscaling/snapshotter.d.ts +0 -278
  16. package/build/autoscaling/snapshotter.d.ts.map +0 -1
  17. package/build/autoscaling/snapshotter.js +0 -447
  18. package/build/autoscaling/snapshotter.js.map +0 -1
  19. package/build/autoscaling/system_status.d.ts +0 -224
  20. package/build/autoscaling/system_status.d.ts.map +0 -1
  21. package/build/autoscaling/system_status.js +0 -228
  22. package/build/autoscaling/system_status.js.map +0 -1
  23. package/build/browser_launchers/browser_launcher.d.ts +0 -154
  24. package/build/browser_launchers/browser_launcher.d.ts.map +0 -1
  25. package/build/browser_launchers/browser_launcher.js +0 -160
  26. package/build/browser_launchers/browser_launcher.js.map +0 -1
  27. package/build/browser_launchers/browser_plugin.d.ts +0 -23
  28. package/build/browser_launchers/browser_plugin.d.ts.map +0 -1
  29. package/build/browser_launchers/browser_plugin.js +0 -25
  30. package/build/browser_launchers/browser_plugin.js.map +0 -1
  31. package/build/browser_launchers/playwright_launcher.d.ts +0 -131
  32. package/build/browser_launchers/playwright_launcher.d.ts.map +0 -1
  33. package/build/browser_launchers/playwright_launcher.js +0 -150
  34. package/build/browser_launchers/playwright_launcher.js.map +0 -1
  35. package/build/browser_launchers/puppeteer_launcher.d.ts +0 -153
  36. package/build/browser_launchers/puppeteer_launcher.d.ts.map +0 -1
  37. package/build/browser_launchers/puppeteer_launcher.js +0 -197
  38. package/build/browser_launchers/puppeteer_launcher.js.map +0 -1
  39. package/build/cache_container.d.ts +0 -31
  40. package/build/cache_container.d.ts.map +0 -1
  41. package/build/cache_container.js +0 -48
  42. package/build/cache_container.js.map +0 -1
  43. package/build/configuration.d.ts +0 -226
  44. package/build/configuration.d.ts.map +0 -1
  45. package/build/configuration.js +0 -325
  46. package/build/configuration.js.map +0 -1
  47. package/build/constants.d.ts +0 -37
  48. package/build/constants.d.ts.map +0 -1
  49. package/build/constants.js +0 -41
  50. package/build/constants.js.map +0 -1
  51. package/build/crawlers/basic_crawler.d.ts +0 -443
  52. package/build/crawlers/basic_crawler.d.ts.map +0 -1
  53. package/build/crawlers/basic_crawler.js +0 -664
  54. package/build/crawlers/basic_crawler.js.map +0 -1
  55. package/build/crawlers/browser_crawler.d.ts +0 -512
  56. package/build/crawlers/browser_crawler.d.ts.map +0 -1
  57. package/build/crawlers/browser_crawler.js +0 -540
  58. package/build/crawlers/browser_crawler.js.map +0 -1
  59. package/build/crawlers/cheerio_crawler.d.ts +0 -931
  60. package/build/crawlers/cheerio_crawler.d.ts.map +0 -1
  61. package/build/crawlers/cheerio_crawler.js +0 -913
  62. package/build/crawlers/cheerio_crawler.js.map +0 -1
  63. package/build/crawlers/crawler_extension.d.ts +0 -10
  64. package/build/crawlers/crawler_extension.d.ts.map +0 -1
  65. package/build/crawlers/crawler_extension.js +0 -19
  66. package/build/crawlers/crawler_extension.js.map +0 -1
  67. package/build/crawlers/crawler_utils.d.ts +0 -34
  68. package/build/crawlers/crawler_utils.d.ts.map +0 -1
  69. package/build/crawlers/crawler_utils.js +0 -87
  70. package/build/crawlers/crawler_utils.js.map +0 -1
  71. package/build/crawlers/playwright_crawler.d.ts +0 -448
  72. package/build/crawlers/playwright_crawler.d.ts.map +0 -1
  73. package/build/crawlers/playwright_crawler.js +0 -299
  74. package/build/crawlers/playwright_crawler.js.map +0 -1
  75. package/build/crawlers/puppeteer_crawler.d.ts +0 -425
  76. package/build/crawlers/puppeteer_crawler.d.ts.map +0 -1
  77. package/build/crawlers/puppeteer_crawler.js +0 -299
  78. package/build/crawlers/puppeteer_crawler.js.map +0 -1
  79. package/build/crawlers/statistics.d.ts +0 -185
  80. package/build/crawlers/statistics.d.ts.map +0 -1
  81. package/build/crawlers/statistics.js +0 -331
  82. package/build/crawlers/statistics.js.map +0 -1
  83. package/build/enqueue_links/click_elements.d.ts +0 -179
  84. package/build/enqueue_links/click_elements.d.ts.map +0 -1
  85. package/build/enqueue_links/click_elements.js +0 -434
  86. package/build/enqueue_links/click_elements.js.map +0 -1
  87. package/build/enqueue_links/enqueue_links.d.ts +0 -117
  88. package/build/enqueue_links/enqueue_links.d.ts.map +0 -1
  89. package/build/enqueue_links/enqueue_links.js +0 -163
  90. package/build/enqueue_links/enqueue_links.js.map +0 -1
  91. package/build/enqueue_links/shared.d.ts +0 -42
  92. package/build/enqueue_links/shared.d.ts.map +0 -1
  93. package/build/enqueue_links/shared.js +0 -121
  94. package/build/enqueue_links/shared.js.map +0 -1
  95. package/build/errors.d.ts +0 -29
  96. package/build/errors.d.ts.map +0 -1
  97. package/build/errors.js +0 -38
  98. package/build/errors.js.map +0 -1
  99. package/build/events.d.ts +0 -11
  100. package/build/events.d.ts.map +0 -1
  101. package/build/events.js +0 -147
  102. package/build/events.js.map +0 -1
  103. package/build/index.d.ts +0 -4
  104. package/build/index.d.ts.map +0 -1
  105. package/build/index.js +0 -7
  106. package/build/index.js.map +0 -1
  107. package/build/main.d.ts +0 -179
  108. package/build/main.d.ts.map +0 -1
  109. package/build/main.js +0 -81
  110. package/build/main.js.map +0 -1
  111. package/build/playwright_utils.d.ts +0 -9
  112. package/build/playwright_utils.d.ts.map +0 -1
  113. package/build/playwright_utils.js +0 -90
  114. package/build/playwright_utils.js.map +0 -1
  115. package/build/proxy_configuration.d.ts +0 -411
  116. package/build/proxy_configuration.d.ts.map +0 -1
  117. package/build/proxy_configuration.js +0 -517
  118. package/build/proxy_configuration.js.map +0 -1
  119. package/build/pseudo_url.d.ts +0 -86
  120. package/build/pseudo_url.d.ts.map +0 -1
  121. package/build/pseudo_url.js +0 -153
  122. package/build/pseudo_url.js.map +0 -1
  123. package/build/puppeteer_request_interception.d.ts +0 -8
  124. package/build/puppeteer_request_interception.d.ts.map +0 -1
  125. package/build/puppeteer_request_interception.js +0 -235
  126. package/build/puppeteer_request_interception.js.map +0 -1
  127. package/build/puppeteer_utils.d.ts +0 -250
  128. package/build/puppeteer_utils.d.ts.map +0 -1
  129. package/build/puppeteer_utils.js +0 -551
  130. package/build/puppeteer_utils.js.map +0 -1
  131. package/build/request.d.ts +0 -180
  132. package/build/request.d.ts.map +0 -1
  133. package/build/request.js +0 -261
  134. package/build/request.js.map +0 -1
  135. package/build/request_list.d.ts +0 -581
  136. package/build/request_list.d.ts.map +0 -1
  137. package/build/request_list.js +0 -826
  138. package/build/request_list.js.map +0 -1
  139. package/build/serialization.d.ts +0 -5
  140. package/build/serialization.d.ts.map +0 -1
  141. package/build/serialization.js +0 -139
  142. package/build/serialization.js.map +0 -1
  143. package/build/session_pool/errors.d.ts +0 -11
  144. package/build/session_pool/errors.d.ts.map +0 -1
  145. package/build/session_pool/errors.js +0 -18
  146. package/build/session_pool/errors.js.map +0 -1
  147. package/build/session_pool/events.d.ts +0 -5
  148. package/build/session_pool/events.d.ts.map +0 -1
  149. package/build/session_pool/events.js +0 -6
  150. package/build/session_pool/events.js.map +0 -1
  151. package/build/session_pool/session.d.ts +0 -286
  152. package/build/session_pool/session.d.ts.map +0 -1
  153. package/build/session_pool/session.js +0 -355
  154. package/build/session_pool/session.js.map +0 -1
  155. package/build/session_pool/session_pool.d.ts +0 -280
  156. package/build/session_pool/session_pool.d.ts.map +0 -1
  157. package/build/session_pool/session_pool.js +0 -393
  158. package/build/session_pool/session_pool.js.map +0 -1
  159. package/build/session_pool/session_utils.d.ts +0 -4
  160. package/build/session_pool/session_utils.d.ts.map +0 -1
  161. package/build/session_pool/session_utils.js +0 -24
  162. package/build/session_pool/session_utils.js.map +0 -1
  163. package/build/stealth/hiding_tricks.d.ts +0 -22
  164. package/build/stealth/hiding_tricks.d.ts.map +0 -1
  165. package/build/stealth/hiding_tricks.js +0 -308
  166. package/build/stealth/hiding_tricks.js.map +0 -1
  167. package/build/stealth/stealth.d.ts +0 -56
  168. package/build/stealth/stealth.d.ts.map +0 -1
  169. package/build/stealth/stealth.js +0 -125
  170. package/build/stealth/stealth.js.map +0 -1
  171. package/build/storages/dataset.d.ts +0 -288
  172. package/build/storages/dataset.d.ts.map +0 -1
  173. package/build/storages/dataset.js +0 -480
  174. package/build/storages/dataset.js.map +0 -1
  175. package/build/storages/key_value_store.d.ts +0 -243
  176. package/build/storages/key_value_store.d.ts.map +0 -1
  177. package/build/storages/key_value_store.js +0 -462
  178. package/build/storages/key_value_store.js.map +0 -1
  179. package/build/storages/request_queue.d.ts +0 -318
  180. package/build/storages/request_queue.d.ts.map +0 -1
  181. package/build/storages/request_queue.js +0 -636
  182. package/build/storages/request_queue.js.map +0 -1
  183. package/build/storages/storage_manager.d.ts +0 -87
  184. package/build/storages/storage_manager.d.ts.map +0 -1
  185. package/build/storages/storage_manager.js +0 -150
  186. package/build/storages/storage_manager.js.map +0 -1
  187. package/build/tsconfig.tsbuildinfo +0 -1
  188. package/build/typedefs.d.ts +0 -146
  189. package/build/typedefs.d.ts.map +0 -1
  190. package/build/typedefs.js +0 -88
  191. package/build/typedefs.js.map +0 -1
  192. package/build/utils.d.ts +0 -175
  193. package/build/utils.d.ts.map +0 -1
  194. package/build/utils.js +0 -754
  195. package/build/utils.js.map +0 -1
  196. package/build/utils_log.d.ts +0 -41
  197. package/build/utils_log.d.ts.map +0 -1
  198. package/build/utils_log.js +0 -192
  199. package/build/utils_log.js.map +0 -1
  200. package/build/utils_request.d.ts +0 -77
  201. package/build/utils_request.d.ts.map +0 -1
  202. package/build/utils_request.js +0 -385
  203. package/build/utils_request.js.map +0 -1
  204. package/build/utils_social.d.ts +0 -210
  205. package/build/utils_social.d.ts.map +0 -1
  206. package/build/utils_social.js +0 -787
  207. package/build/utils_social.js.map +0 -1
  208. package/build/validators.d.ts +0 -23
  209. package/build/validators.d.ts.map +0 -1
  210. package/build/validators.js +0 -29
  211. package/build/validators.js.map +0 -1
package/build/utils.js DELETED
@@ -1,754 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.publicUtils = exports.purgeLocalStorage = exports.waitForRunToFinish = exports.parseContentTypeFromResponse = exports.printOutdatedSdkWarning = exports.snakeCaseToCamelCase = exports.sleep = exports.isAtHome = exports.getTypicalChromeExecutablePath = exports.getFirstKey = exports.getMemoryInfo = exports.weightedAvg = exports.isDocker = exports.addCharsetToContentType = exports.apifyClient = exports.logSystemInfo = exports.newClient = void 0;
4
- const tslib_1 = require("tslib");
5
- const ps_tree_1 = (0, tslib_1.__importDefault)(require("@apify/ps-tree"));
6
- const child_process_1 = require("child_process");
7
- const apify_client_1 = require("apify-client");
8
- const package_json_1 = require("apify-client/package.json");
9
- const consts_1 = require("@apify/consts");
10
- // eslint-disable-next-line import/no-duplicates
11
- const cheerio_1 = (0, tslib_1.__importDefault)(require("cheerio"));
12
- const content_type_1 = (0, tslib_1.__importDefault)(require("content-type"));
13
- const fs_1 = (0, tslib_1.__importDefault)(require("fs"));
14
- const mime_types_1 = (0, tslib_1.__importDefault)(require("mime-types"));
15
- const os_1 = (0, tslib_1.__importDefault)(require("os"));
16
- const ow_1 = (0, tslib_1.__importDefault)(require("ow"));
17
- const path_1 = (0, tslib_1.__importDefault)(require("path"));
18
- const semver_1 = (0, tslib_1.__importDefault)(require("semver"));
19
- const underscore_1 = (0, tslib_1.__importDefault)(require("underscore"));
20
- const url_1 = require("url");
21
- const util_1 = (0, tslib_1.__importDefault)(require("util"));
22
- const rimraf_1 = (0, tslib_1.__importDefault)(require("rimraf"));
23
- const package_json_2 = require("../package.json");
24
- const utils_log_1 = (0, tslib_1.__importDefault)(require("./utils_log"));
25
- const utils_request_1 = require("./utils_request");
26
- const configuration_1 = require("./configuration");
27
- /* eslint-enable no-unused-vars,import/named,import/no-duplicates,import/order */
28
- const rimrafp = util_1.default.promisify(rimraf_1.default);
29
- /**
30
- * Default regular expression to match URLs in a string that may be plain text, JSON, CSV or other. It supports common URL characters
31
- * and does not support URLs containing commas or spaces. The URLs also may contain Unicode letters (not symbols).
32
- * @memberOf utils
33
- */
34
- const URL_NO_COMMAS_REGEX = RegExp('https?://(www\\.)?[\\p{L}0-9][-\\p{L}0-9@:%._\\+~#=]{0,254}[\\p{L}0-9]\\.[a-z]{2,63}(:\\d{1,5})?(/[-\\p{L}0-9@:%_\\+.~#?&//=\\(\\)]*)?', 'giu'); // eslint-disable-line
35
- /**
36
- * Regular expression that, in addition to the default regular expression `URL_NO_COMMAS_REGEX`, supports matching commas in URL path and query.
37
- * Note, however, that this may prevent parsing URLs from comma delimited lists, or the URLs may become malformed.
38
- * @memberOf utils
39
- */
40
- const URL_WITH_COMMAS_REGEX = RegExp('https?://(www\\.)?[\\p{L}0-9][-\\p{L}0-9@:%._\\+~#=]{0,254}[\\p{L}0-9]\\.[a-z]{2,63}(:\\d{1,5})?(/[-\\p{L}0-9@:%_\\+,.~#?&//=\\(\\)]*)?', 'giu'); // eslint-disable-line
41
- const MEMORY_FILE_PATHS = {
42
- TOTAL: {
43
- V1: '/sys/fs/cgroup/memory/memory.limit_in_bytes',
44
- V2: '/sys/fs/cgroup/memory.max',
45
- },
46
- USED: {
47
- V1: '/sys/fs/cgroup/memory/memory.usage_in_bytes',
48
- V2: '/sys/fs/cgroup/memory.current',
49
- },
50
- };
51
- // Set encoding to utf-8 so fs.readFile returns string instead of buffer
52
- const MEMORY_FILE_ENCODING = 'utf-8';
53
- const psTreePromised = util_1.default.promisify(ps_tree_1.default);
54
- /**
55
- * Returns a new instance of the Apify API client. The `ApifyClient` class is provided
56
- * by the <a href="https://www.npmjs.com/package/apify-client" target="_blank">apify-client</a>
57
- * NPM package, and it is automatically configured using the `APIFY_API_BASE_URL`, and `APIFY_TOKEN`
58
- * environment variables. You can override the token via the available options. That's useful
59
- * if you want to use the client as a different Apify user than the SDK internals are using.
60
- *
61
- * @param {object} [options]
62
- * @param {string} [options.token]
63
- * @param {string} [options.maxRetries]
64
- * @param {string} [options.minDelayBetweenRetriesMillis]
65
- * @memberof module:Apify
66
- * @function
67
- * @name newClient
68
- * @return {ApifyClient}
69
- */
70
- const newClient = (options = {}) => {
71
- (0, ow_1.default)(options, ow_1.default.object.exactShape({
72
- baseUrl: ow_1.default.optional.string.url,
73
- token: ow_1.default.optional.string,
74
- maxRetries: ow_1.default.optional.number,
75
- minDelayBetweenRetriesMillis: ow_1.default.optional.number,
76
- }));
77
- const { baseUrl = process.env[consts_1.ENV_VARS.API_BASE_URL], token = process.env[consts_1.ENV_VARS.TOKEN], } = options;
78
- return new apify_client_1.ApifyClient({
79
- ...options,
80
- baseUrl,
81
- token,
82
- });
83
- };
84
- exports.newClient = newClient;
85
- /**
86
- * Logs info about system, node version and apify package version.
87
- */
88
- const logSystemInfo = () => {
89
- utils_log_1.default.info('System info', {
90
- apifyVersion: package_json_2.version,
91
- apifyClientVersion: package_json_1.version,
92
- osType: os_1.default.type(),
93
- nodeVersion: process.version,
94
- });
95
- };
96
- exports.logSystemInfo = logSystemInfo;
97
- /**
98
- * The default instance of `ApifyClient` used internally
99
- * by the SDK.
100
- *
101
- * @type {*}
102
- * @ignore
103
- */
104
- exports.apifyClient = new Proxy(
105
- /**
106
- * Getting the client directly from the global Configuration invokes storage
107
- * creation right away during the module import. Using the proxy object delays this
108
- * until the first time the client is accessed.
109
- * This allows the user to set the envVars and other options before the client is
110
- * actually used.
111
- */
112
- {}, {
113
- get(target, prop) {
114
- return target[prop] ?? Reflect.get(configuration_1.Configuration.getGlobalConfig().getClient(), prop);
115
- },
116
- // for mocking purpose in tests
117
- set(target, prop, value) {
118
- target[prop] = value;
119
- return true;
120
- },
121
- getOwnPropertyDescriptor(target, prop) {
122
- return Reflect.getOwnPropertyDescriptor(configuration_1.Configuration.getGlobalConfig().getClient(), prop);
123
- },
124
- getPrototypeOf() {
125
- return Object.getPrototypeOf(configuration_1.Configuration.getGlobalConfig().getClient());
126
- },
127
- });
128
- /**
129
- * Adds charset=utf-8 to given content type if this parameter is missing.
130
- *
131
- * @param {string} contentType
132
- * @returns {string}
133
- *
134
- * @ignore
135
- */
136
- const addCharsetToContentType = (contentType) => {
137
- if (!contentType)
138
- return contentType;
139
- const parsed = content_type_1.default.parse(contentType);
140
- if (parsed.parameters.charset)
141
- return contentType;
142
- parsed.parameters.charset = 'utf-8';
143
- return content_type_1.default.format(parsed);
144
- };
145
- exports.addCharsetToContentType = addCharsetToContentType;
146
- let isDockerPromiseCache;
147
- const createIsDockerPromise = () => {
148
- const promise1 = util_1.default
149
- .promisify(fs_1.default.stat)('/.dockerenv')
150
- .then(() => true)
151
- .catch(() => false);
152
- const promise2 = util_1.default
153
- .promisify(fs_1.default.readFile)('/proc/self/cgroup', 'utf8')
154
- .then((content) => content.indexOf('docker') !== -1)
155
- .catch(() => false);
156
- return Promise
157
- .all([promise1, promise2])
158
- .then(([result1, result2]) => result1 || result2);
159
- };
160
- /**
161
- * Returns a `Promise` that resolves to true if the code is running in a Docker container.
162
- *
163
- * @param {boolean} forceReset
164
- * @return {Promise<boolean>}
165
- *
166
- * @memberof utils
167
- * @name isDocker
168
- * @function
169
- */
170
- const isDocker = (forceReset) => {
171
- // Parameter forceReset is just internal for unit tests.
172
- if (!isDockerPromiseCache || forceReset)
173
- isDockerPromiseCache = createIsDockerPromise();
174
- return isDockerPromiseCache;
175
- };
176
- exports.isDocker = isDocker;
177
- /**
178
- * Computes a weighted average of an array of numbers, complemented by an array of weights.
179
- *
180
- * @param {number[]} arrValues
181
- * @param {number[]} arrWeights
182
- * @return {number}
183
- *
184
- * @ignore
185
- */
186
- const weightedAvg = (arrValues, arrWeights) => {
187
- const result = arrValues.map((value, i) => {
188
- const weight = arrWeights[i];
189
- const sum = value * weight; // eslint-disable-line no-shadow
190
- return [sum, weight];
191
- }).reduce((p, c) => [p[0] + c[0], p[1] + c[1]], [0, 0]);
192
- return result[0] / result[1];
193
- };
194
- exports.weightedAvg = weightedAvg;
195
- /**
196
- * Describes memory usage of an Actor.
197
- *
198
- * @typedef MemoryInfo
199
- * @property {number} totalBytes Total memory available in the system or container
200
- * @property {number} freeBytes Amount of free memory in the system or container
201
- * @property {number} usedBytes Amount of memory used (= totalBytes - freeBytes)
202
- * @property {number} mainProcessBytes Amount of memory used the current Node.js process
203
- * @property {number} childProcessesBytes Amount of memory used by child processes of the current Node.js process
204
- */
205
- /**
206
- * Returns memory statistics of the process and the system, see {@link MemoryInfo}.
207
- *
208
- * If the process runs inside of Docker, the `getMemoryInfo` gets container memory limits,
209
- * otherwise it gets system memory limits.
210
- *
211
- * Beware that the function is quite inefficient because it spawns a new process.
212
- * Therefore you shouldn't call it too often, like more than once per second.
213
- *
214
- * @returns {Promise<MemoryInfo>}
215
- *
216
- * @memberof module:Apify
217
- * @name getMemoryInfo
218
- * @function
219
- */
220
- const getMemoryInfo = async () => {
221
- // lambda does *not* have `ps` and other command line tools
222
- // required to extract memory usage.
223
- const isLambdaEnvironment = process.platform === 'linux'
224
- && !!process.env.AWS_LAMBDA_FUNCTION_MEMORY_SIZE;
225
- // module.exports must be here so that we can mock it.
226
- const isDockerVar = !isLambdaEnvironment && (await module.exports.isDocker());
227
- let mainProcessBytes = -1;
228
- let childProcessesBytes = 0;
229
- if (isLambdaEnvironment) {
230
- // reported in bytes
231
- mainProcessBytes = process.memoryUsage().rss;
232
- // https://stackoverflow.com/a/55914335/129415
233
- childProcessesBytes = (0, child_process_1.execSync)('cat /proc/meminfo')
234
- .toString()
235
- .split(/[\n: ]/)
236
- .filter((val) => val.trim())[19]
237
- // meminfo reports in kb, not bytes
238
- * 1000
239
- // the total used memory is reported by meminfo
240
- // subtract memory used by the main node proces
241
- // in order to infer memory used by any child processes
242
- - mainProcessBytes;
243
- }
244
- else {
245
- // Query both root and child processes
246
- const processes = await psTreePromised(process.pid, true);
247
- processes.forEach((rec) => {
248
- // Skip the 'ps' or 'wmic' commands used by ps-tree to query the processes
249
- if (rec.COMMAND === 'ps' || rec.COMMAND === 'WMIC.exe') {
250
- return;
251
- }
252
- const bytes = parseInt(rec.RSS, 10);
253
- // Obtain main process' memory separately
254
- if (rec.PID === `${process.pid}`) {
255
- mainProcessBytes = bytes;
256
- return;
257
- }
258
- childProcessesBytes += bytes;
259
- });
260
- }
261
- let totalBytes;
262
- let usedBytes;
263
- let freeBytes;
264
- if (isLambdaEnvironment) {
265
- // memory size is defined in megabytes
266
- totalBytes = parseInt(process.env.AWS_LAMBDA_FUNCTION_MEMORY_SIZE, 10) * 1000000;
267
- usedBytes = mainProcessBytes + childProcessesBytes;
268
- freeBytes = totalBytes - usedBytes;
269
- utils_log_1.default.debug(`lambda size of ${totalBytes} with ${freeBytes} free bytes`);
270
- }
271
- else if (isDockerVar) {
272
- // When running inside Docker container, use container memory limits
273
- // This must be promisified here so that we can mock it.
274
- const readPromised = util_1.default.promisify(fs_1.default.readFile);
275
- const accessPromised = util_1.default.promisify(fs_1.default.access);
276
- // Check wheter cgroups V1 or V2 is used
277
- let cgroupsVersion = 'V1';
278
- try {
279
- // If this directory does not exists, assume docker is using cgroups V2
280
- await accessPromised('/sys/fs/cgroup/memory/', fs_1.default.constants.R_OK);
281
- }
282
- catch (err) {
283
- cgroupsVersion = 'V2';
284
- }
285
- try {
286
- let [totalBytesStr, usedBytesStr] = await Promise.all([
287
- readPromised(MEMORY_FILE_PATHS.TOTAL[cgroupsVersion], MEMORY_FILE_ENCODING),
288
- readPromised(MEMORY_FILE_PATHS.USED[cgroupsVersion], MEMORY_FILE_ENCODING),
289
- ]);
290
- // Cgroups V2 files contains newline character. Getting rid of it for better handling in later part of the code.
291
- totalBytesStr = totalBytesStr.replace(/[^a-zA-Z0-9 ]/g, '');
292
- usedBytesStr = usedBytesStr.replace(/[^a-zA-Z0-9 ]/g, '');
293
- // Cgroups V2 contains 'max' string if memory is not limited
294
- // See https://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/tree/Documentation/admin-guide/cgroup-v2.rst (see "memory.max")
295
- if (totalBytesStr === 'max') {
296
- totalBytes = os_1.default.totalmem();
297
- // Cgroups V1 is set to number related to platform and page size if memory is not limited
298
- // See https://unix.stackexchange.com/q/420906
299
- }
300
- else {
301
- totalBytes = parseInt(totalBytesStr, 10);
302
- const containerRunsWithUnlimitedMemory = totalBytes > Number.MAX_SAFE_INTEGER;
303
- if (containerRunsWithUnlimitedMemory)
304
- totalBytes = os_1.default.totalmem();
305
- }
306
- usedBytes = parseInt(usedBytesStr, 10);
307
- freeBytes = totalBytes - usedBytes;
308
- }
309
- catch (err) {
310
- // log.deprecated logs a warning only once
311
- utils_log_1.default.deprecated('Your environment is Docker, but your system does not support memory cgroups. '
312
- + 'If you\'re running containers with limited memory, memory auto-scaling will not work properly.\n\n'
313
- + `Cause: ${err.message}`);
314
- totalBytes = os_1.default.totalmem();
315
- freeBytes = os_1.default.freemem();
316
- usedBytes = totalBytes - freeBytes;
317
- }
318
- }
319
- else {
320
- totalBytes = os_1.default.totalmem();
321
- freeBytes = os_1.default.freemem();
322
- usedBytes = totalBytes - freeBytes;
323
- }
324
- return {
325
- totalBytes,
326
- freeBytes,
327
- usedBytes,
328
- mainProcessBytes,
329
- childProcessesBytes,
330
- };
331
- };
332
- exports.getMemoryInfo = getMemoryInfo;
333
- /**
334
- * Helper function that returns the first key from plain object.
335
- *
336
- * @ignore
337
- */
338
- const getFirstKey = (dict) => {
339
- for (const key in dict) { // eslint-disable-line
340
- return key;
341
- }
342
- };
343
- exports.getFirstKey = getFirstKey;
344
- /**
345
- * Gets a typical path to Chrome executable, depending on the current operating system.
346
- *
347
- * @return {string}
348
- * @ignore
349
- */
350
- const getTypicalChromeExecutablePath = () => {
351
- /**
352
- * Return path of Chrome executable by its OS environment variable to deal with non-english language OS.
353
- * Taking also in account the old [chrome 380177 issue](https://bugs.chromium.org/p/chromium/issues/detail?id=380177).
354
- *
355
- * @returns {string}
356
- * @ignore
357
- */
358
- const getWin32Path = () => {
359
- let chromeExecutablePath = 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe';
360
- const path00 = `${process.env.ProgramFiles}\\Google\\Chrome\\Application\\chrome.exe`;
361
- const path86 = `${process.env['ProgramFiles(x86)']}\\Google\\Chrome\\Application\\chrome.exe`;
362
- if (fs_1.default.existsSync(path00)) {
363
- chromeExecutablePath = path00;
364
- }
365
- else if (fs_1.default.existsSync(path86)) {
366
- chromeExecutablePath = path86;
367
- }
368
- return chromeExecutablePath;
369
- };
370
- switch (os_1.default.platform()) {
371
- case 'darwin': return '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome';
372
- case 'win32': return getWin32Path();
373
- default: return '/usr/bin/google-chrome';
374
- }
375
- };
376
- exports.getTypicalChromeExecutablePath = getTypicalChromeExecutablePath;
377
- /**
378
- * Returns `true` when code is running on Apify platform and `false` otherwise (for example locally).
379
- *
380
- * @returns {boolean}
381
- *
382
- * @memberof module:Apify
383
- * @name isAtHome
384
- * @function
385
- */
386
- const isAtHome = () => !!process.env[consts_1.ENV_VARS.IS_AT_HOME];
387
- exports.isAtHome = isAtHome;
388
- /**
389
- * Returns a `Promise` that resolves after a specific period of time. This is useful to implement waiting
390
- * in your code, e.g. to prevent overloading of target website or to avoid bot detection.
391
- *
392
- * **Example usage:**
393
- *
394
- * ```
395
- * const Apify = require('apify');
396
- *
397
- * ...
398
- *
399
- * // Sleep 1.5 seconds
400
- * await Apify.utils.sleep(1500);
401
- * ```
402
- * @param {number} millis Period of time to sleep, in milliseconds. If not a positive number, the returned promise resolves immediately.
403
- * @memberof utils
404
- * @name sleep
405
- * @function
406
- * @return {Promise<void>}
407
- */
408
- const sleep = (millis) => {
409
- return new Promise((res) => setTimeout(res, millis));
410
- };
411
- exports.sleep = sleep;
412
- /**
413
- * Returns a promise that resolves to an array of urls parsed from the resource available at the provided url.
414
- * Optionally, custom regular expression and encoding may be provided.
415
- *
416
- * @param {object} options
417
- * @param {string} options.url URL to the file
418
- * @param {string} [options.encoding='utf8'] The encoding of the file.
419
- * @param {string} [options.proxyUrl] The proxy url to be used for the request.
420
- * @param {RegExp} [options.urlRegExp=URL_NO_COMMAS_REGEX]
421
- * Custom regular expression to identify the URLs in the file to extract.
422
- * The regular expression should be case-insensitive and have global flag set (i.e. `/something/gi`).
423
- * @returns {Promise<Array<string>>}
424
- * @memberOf utils
425
- */
426
- const downloadListOfUrls = async (options) => {
427
- (0, ow_1.default)(options, ow_1.default.object.exactShape({
428
- url: ow_1.default.string.url,
429
- encoding: ow_1.default.optional.string,
430
- urlRegExp: ow_1.default.optional.regExp,
431
- proxyUrl: ow_1.default.optional.string,
432
- }));
433
- const { url, encoding = 'utf8', urlRegExp = URL_NO_COMMAS_REGEX, proxyUrl } = options;
434
- // Try to detect wrong urls and fix them. Currently, detects only sharing url instead of csv download one.
435
- const match = url.match(/^(https:\/\/docs\.google\.com\/spreadsheets\/d\/(?:\w|-)+)\/?/);
436
- let fixedUrl = url;
437
- if (match) {
438
- fixedUrl = `${match[1]}/gviz/tq?tqx=out:csv`;
439
- }
440
- const { body: string } = await (0, utils_request_1.requestAsBrowser)({ url: fixedUrl, encoding, proxyUrl });
441
- return extractUrls({ string, urlRegExp });
442
- };
443
- /**
444
- * Collects all URLs in an arbitrary string to an array, optionally using a custom regular expression.
445
- * @param {object} options
446
- * @param {string} options.string
447
- * @param {RegExp} [options.urlRegExp=Apify.utils.URL_NO_COMMAS_REGEX]
448
- * @returns {string[]}
449
- * @memberOf utils
450
- */
451
- const extractUrls = (options) => {
452
- (0, ow_1.default)(options, ow_1.default.object.exactShape({
453
- string: ow_1.default.string,
454
- urlRegExp: ow_1.default.optional.regExp,
455
- }));
456
- const { string, urlRegExp = URL_NO_COMMAS_REGEX } = options;
457
- return string.match(urlRegExp) || [];
458
- };
459
- // NOTE: We skipping 'noscript' since it's content is evaluated as text, instead of HTML elements. That damages the results.
460
- const SKIP_TAGS_REGEX = /^(script|style|canvas|svg|noscript)$/i;
461
- const BLOCK_TAGS_REGEX = /^(p|h1|h2|h3|h4|h5|h6|ol|ul|li|pre|address|blockquote|dl|div|fieldset|form|table|tr|select|option)$/i;
462
- /**
463
- * The function converts a HTML document to a plain text.
464
- *
465
- * The plain text generated by the function is similar to a text captured
466
- * by pressing Ctrl+A and Ctrl+C on a page when loaded in a web browser.
467
- * The function doesn't aspire to preserve the formatting or to be perfectly correct with respect to HTML specifications.
468
- * However, it attempts to generate newlines and whitespaces in and around HTML elements
469
- * to avoid merging distinct parts of text and thus enable extraction of data from the text (e.g. phone numbers).
470
- *
471
- * **Example usage**
472
- * ```javascript
473
- * const text = htmlToText('<html><body>Some text</body></html>');
474
- * console.log(text);
475
- * ```
476
- *
477
- * Note that the function uses [cheerio](https://www.npmjs.com/package/cheerio) to parse the HTML.
478
- * Optionally, to avoid duplicate parsing of HTML and thus improve performance, you can pass
479
- * an existing Cheerio object to the function instead of the HTML text. The HTML should be parsed
480
- * with the `decodeEntities` option set to `true`. For example:
481
- *
482
- * ```javascript
483
- * const cheerio = require('cheerio');
484
- * const html = '<html><body>Some text</body></html>';
485
- * const text = htmlToText(cheerio.load(html, { decodeEntities: true }));
486
- * ```
487
- * @param {(string|CheerioAPI)} html HTML text or parsed HTML represented using a
488
- * [cheerio](https://www.npmjs.com/package/cheerio) function.
489
- * @return {string} Plain text
490
- * @memberOf utils
491
- * @function
492
- */
493
- const htmlToText = (html) => {
494
- if (!html)
495
- return '';
496
- // TODO: Add support for "html" being a Cheerio element, otherwise the only way
497
- // to use it is e.g. htmlToText($('p').html())) which is inefficient
498
- // Also, it seems this doesn't work well in CheerioScraper, e.g. htmlToText($)
499
- // produces really text with a lot of HTML elements in it. Let's just deprecate this sort of usage,
500
- // and make the parameter "htmlOrCheerioElement"
501
- /**
502
- * @type {CheerioAPI}
503
- * @ignore
504
- */
505
- const $ = typeof html === 'function' ? html : cheerio_1.default.load(html, { decodeEntities: true });
506
- let text = '';
507
- const process = (elems) => {
508
- const len = elems ? elems.length : 0;
509
- for (let i = 0; i < len; i++) {
510
- const elem = elems[i];
511
- if (elem.type === 'text') {
512
- // Compress spaces, unless we're inside <pre> element
513
- let compr;
514
- if (elem.parent && elem.parent.tagName === 'pre')
515
- compr = elem.data;
516
- else
517
- compr = elem.data.replace(/\s+/g, ' ');
518
- // If text is empty or ends with a whitespace, don't add the leading whitepsace
519
- if (compr.startsWith(' ') && /(^|\s)$/.test(text))
520
- compr = compr.substr(1);
521
- text += compr;
522
- }
523
- else if (elem.type === 'comment' || SKIP_TAGS_REGEX.test(elem.tagName)) {
524
- // Skip comments and special elements
525
- }
526
- else if (elem.tagName === 'br') {
527
- text += '\n';
528
- }
529
- else if (elem.tagName === 'td') {
530
- process(elem.children);
531
- text += '\t';
532
- }
533
- else {
534
- // Block elements must be surrounded by newlines (unless beginning of text)
535
- const isBlockTag = BLOCK_TAGS_REGEX.test(elem.tagName);
536
- if (isBlockTag && !/(^|\n)$/.test(text))
537
- text += '\n';
538
- process(elem.children);
539
- if (isBlockTag && !text.endsWith('\n'))
540
- text += '\n';
541
- }
542
- }
543
- };
544
- // If HTML document has body, only convert that, otherwise convert the entire HTML
545
- const $body = $('body');
546
- process($body.length > 0 ? $body : $.root());
547
- return text.trim();
548
- };
549
- /**
550
- * Creates a standardized debug info from request and response. This info is usually added to dataset under the hidden `#debug` field.
551
- *
552
- * @param {(Request|RequestOptions)} request [Apify.Request](https://sdk.apify.com/docs/api/request) object.
553
- * @param {(*|IncomingMessage|PuppeteerResponse)} [response]
554
- * Puppeteer [`Response`](https://pptr.dev/#?product=Puppeteer&version=v1.11.0&show=api-class-response)
555
- * or NodeJS [`http.IncomingMessage`](https://nodejs.org/api/http.html#http_class_http_serverresponse).
556
- * @param {Object<string, *>} [additionalFields] Object containing additional fields to be added.
557
-
558
- * @return {Object<string, *>}
559
- */
560
- const createRequestDebugInfo = (request, response = {}, additionalFields = {}) => {
561
- (0, ow_1.default)(request, ow_1.default.object);
562
- (0, ow_1.default)(response, ow_1.default.object);
563
- (0, ow_1.default)(additionalFields, ow_1.default.object);
564
- return {
565
- requestId: request.id,
566
- url: request.url,
567
- loadedUrl: request.loadedUrl,
568
- method: request.method,
569
- retryCount: request.retryCount,
570
- errorMessages: request.errorMessages,
571
- // Puppeteer response has .status() funtion and NodeJS response ,statusCode property.
572
- statusCode: underscore_1.default.isFunction(response.status) ? response.status() : response.statusCode,
573
- ...additionalFields,
574
- };
575
- };
576
- /**
577
- * Converts SNAKE_CASE to camelCase.
578
- *
579
- * @param {string} snakeCaseStr
580
- * @return {string}
581
- * @ignore
582
- */
583
- const snakeCaseToCamelCase = (snakeCaseStr) => {
584
- return snakeCaseStr
585
- .toLowerCase()
586
- .split('_')
587
- .map((part, index) => {
588
- return index > 0
589
- ? part.charAt(0).toUpperCase() + part.slice(1)
590
- : part;
591
- })
592
- .join('');
593
- };
594
- exports.snakeCaseToCamelCase = snakeCaseToCamelCase;
595
- /**
596
- * Prints a warning if this version of Apify SDK is outdated.
597
- *
598
- * @ignore
599
- */
600
- const printOutdatedSdkWarning = () => {
601
- if (process.env[consts_1.ENV_VARS.DISABLE_OUTDATED_WARNING])
602
- return;
603
- const latestApifyVersion = process.env[consts_1.ENV_VARS.SDK_LATEST_VERSION];
604
- if (!latestApifyVersion || !semver_1.default.lt(package_json_2.version, latestApifyVersion))
605
- return;
606
- // eslint-disable-next-line
607
- utils_log_1.default.warning(`You are using an outdated version (${package_json_2.version}) of Apify SDK. We recommend you to update to the latest version (${latestApifyVersion}).
608
- Read more about Apify SDK versioning at: https://help.apify.com/en/articles/3184510-updates-and-versioning-of-apify-sdk`);
609
- };
610
- exports.printOutdatedSdkWarning = printOutdatedSdkWarning;
611
- /**
612
- * Gets parsed content type from response object
613
- * @param {IncomingMessage} response - HTTP response object
614
- * @return {{ type: string, charset: string }}
615
- * @ignore
616
- */
617
- const parseContentTypeFromResponse = (response) => {
618
- (0, ow_1.default)(response, ow_1.default.object.partialShape({
619
- url: ow_1.default.string.url,
620
- headers: ow_1.default.object,
621
- }));
622
- const { url, headers } = response;
623
- let parsedContentType;
624
- if (headers['content-type']) {
625
- try {
626
- parsedContentType = content_type_1.default.parse(headers['content-type']);
627
- }
628
- catch (err) {
629
- // Can not parse content type from Content-Type header. Try to parse it from file extension.
630
- }
631
- }
632
- // Parse content type from file extension as fallback
633
- if (!parsedContentType) {
634
- const parsedUrl = new url_1.URL(url);
635
- const contentTypeFromExtname = mime_types_1.default.contentType(path_1.default.extname(parsedUrl.pathname))
636
- || 'application/octet-stream; charset=utf-8'; // Fallback content type, specified in https://tools.ietf.org/html/rfc7231#section-3.1.1.5
637
- parsedContentType = content_type_1.default.parse(contentTypeFromExtname);
638
- }
639
- return {
640
- type: parsedContentType.type,
641
- charset: parsedContentType.parameters.charset,
642
- };
643
- };
644
- exports.parseContentTypeFromResponse = parseContentTypeFromResponse;
645
- /**
646
- * Returns a promise that resolves with the finished Run object when the provided actor run finishes
647
- * or with the unfinished Run object when the `waitSecs` timeout lapses. The promise is NOT rejected
648
- * based on run status. You can inspect the `status` property of the Run object to find out its status.
649
- *
650
- * This is useful when you need to chain actor executions. Similar effect can be achieved
651
- * by using webhooks, so be sure to review which technique fits your use-case better.
652
- *
653
- * @param {object} options
654
- * @param {string} options.actorId
655
- * ID of the actor that started the run.
656
- * @param {string} options.runId
657
- * ID of the run itself.
658
- * @param {string} [options.waitSecs]
659
- * Maximum time to wait for the run to finish, in seconds.
660
- * If the limit is reached, the returned promise is resolved to a run object that will have
661
- * status `READY` or `RUNNING`. If `waitSecs` omitted, the function waits indefinitely.
662
- * @param {string} [options.token]
663
- * You can supply an Apify token to override the default one
664
- * that's used by the default ApifyClient instance.
665
- * E.g. you can track other users' runs.
666
- * @returns {Promise<ActorRun>}
667
- * @memberOf utils
668
- * @name waitForRunToFinish
669
- * @function
670
- * @deprecated
671
- * Please use the 'waitForFinish' functions of 'apify-client'.
672
- * @ignore
673
- */
674
- const waitForRunToFinish = async (options) => {
675
- (0, ow_1.default)(options, ow_1.default.object.exactShape({
676
- actorId: ow_1.default.string,
677
- runId: ow_1.default.string,
678
- waitSecs: ow_1.default.optional.number,
679
- }));
680
- const { actorId, runId, waitSecs, } = options;
681
- let run;
682
- const startedAt = Date.now();
683
- const shouldRepeat = () => {
684
- if (waitSecs && (Date.now() - startedAt) / 1000 >= waitSecs)
685
- return false;
686
- if (run && consts_1.ACT_JOB_TERMINAL_STATUSES.includes(run.status))
687
- return false;
688
- return true;
689
- };
690
- while (shouldRepeat()) {
691
- const waitForFinish = waitSecs
692
- ? Math.round(waitSecs - (Date.now() - startedAt) / 1000)
693
- : 999999;
694
- run = await exports.apifyClient.run(runId, actorId).waitForFinish({ waitSecs: waitForFinish });
695
- // It might take some time for database replicas to get up-to-date,
696
- // so getRun() might return null. Wait a little bit and try it again.
697
- if (!run)
698
- await (0, exports.sleep)(250);
699
- }
700
- if (!run) {
701
- throw new Error('Waiting for run to finish failed. Cannot fetch actor run details from the server.');
702
- }
703
- return run;
704
- };
705
- exports.waitForRunToFinish = waitForRunToFinish;
706
- /**
707
- * Cleans up the local storage folder created when testing locally.
708
- * This is useful in the event you are debugging your code locally.
709
- *
710
- * Be careful as this will remove the folder you provide and everything in it!
711
- *
712
- * @param {string} [folder] The folder to clean up
713
- * @returns {Promise<void>}
714
- * @memberOf utils
715
- * @name purgeLocalStorage
716
- * @function
717
- */
718
- const purgeLocalStorage = async (folder) => {
719
- // If the user did not provide a folder, try to get it from the env variables, or the default one
720
- if (!folder) {
721
- folder = process.env[consts_1.ENV_VARS.LOCAL_STORAGE_DIR] || 'apify_storage';
722
- }
723
- // Clear the folder
724
- await rimrafp(folder);
725
- };
726
- exports.purgeLocalStorage = purgeLocalStorage;
727
- /**
728
- * A namespace that contains various utilities.
729
- *
730
- * **Example usage:**
731
- *
732
- * ```javascript
733
- * const Apify = require('apify');
734
- *
735
- * ...
736
- *
737
- * // Sleep 1.5 seconds
738
- * await Apify.utils.sleep(1500);
739
- * ```
740
- * @namespace utils
741
- */
742
- exports.publicUtils = {
743
- isDocker: exports.isDocker,
744
- sleep: exports.sleep,
745
- downloadListOfUrls,
746
- extractUrls,
747
- htmlToText,
748
- URL_NO_COMMAS_REGEX,
749
- URL_WITH_COMMAS_REGEX,
750
- createRequestDebugInfo,
751
- waitForRunToFinish: exports.waitForRunToFinish,
752
- purgeLocalStorage: exports.purgeLocalStorage,
753
- };
754
- //# sourceMappingURL=utils.js.map