apify 2.3.1-beta.4 → 3.0.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. package/README.md +6 -5
  2. package/package.json +69 -128
  3. package/build/actor.d.ts +0 -113
  4. package/build/actor.d.ts.map +0 -1
  5. package/build/actor.js +0 -582
  6. package/build/actor.js.map +0 -1
  7. package/build/apify.d.ts +0 -752
  8. package/build/apify.d.ts.map +0 -1
  9. package/build/apify.js +0 -877
  10. package/build/apify.js.map +0 -1
  11. package/build/autoscaling/autoscaled_pool.d.ts +0 -384
  12. package/build/autoscaling/autoscaled_pool.d.ts.map +0 -1
  13. package/build/autoscaling/autoscaled_pool.js +0 -557
  14. package/build/autoscaling/autoscaled_pool.js.map +0 -1
  15. package/build/autoscaling/snapshotter.d.ts +0 -278
  16. package/build/autoscaling/snapshotter.d.ts.map +0 -1
  17. package/build/autoscaling/snapshotter.js +0 -447
  18. package/build/autoscaling/snapshotter.js.map +0 -1
  19. package/build/autoscaling/system_status.d.ts +0 -224
  20. package/build/autoscaling/system_status.d.ts.map +0 -1
  21. package/build/autoscaling/system_status.js +0 -228
  22. package/build/autoscaling/system_status.js.map +0 -1
  23. package/build/browser_launchers/browser_launcher.d.ts +0 -154
  24. package/build/browser_launchers/browser_launcher.d.ts.map +0 -1
  25. package/build/browser_launchers/browser_launcher.js +0 -160
  26. package/build/browser_launchers/browser_launcher.js.map +0 -1
  27. package/build/browser_launchers/browser_plugin.d.ts +0 -23
  28. package/build/browser_launchers/browser_plugin.d.ts.map +0 -1
  29. package/build/browser_launchers/browser_plugin.js +0 -25
  30. package/build/browser_launchers/browser_plugin.js.map +0 -1
  31. package/build/browser_launchers/playwright_launcher.d.ts +0 -131
  32. package/build/browser_launchers/playwright_launcher.d.ts.map +0 -1
  33. package/build/browser_launchers/playwright_launcher.js +0 -150
  34. package/build/browser_launchers/playwright_launcher.js.map +0 -1
  35. package/build/browser_launchers/puppeteer_launcher.d.ts +0 -153
  36. package/build/browser_launchers/puppeteer_launcher.d.ts.map +0 -1
  37. package/build/browser_launchers/puppeteer_launcher.js +0 -197
  38. package/build/browser_launchers/puppeteer_launcher.js.map +0 -1
  39. package/build/cache_container.d.ts +0 -31
  40. package/build/cache_container.d.ts.map +0 -1
  41. package/build/cache_container.js +0 -48
  42. package/build/cache_container.js.map +0 -1
  43. package/build/configuration.d.ts +0 -226
  44. package/build/configuration.d.ts.map +0 -1
  45. package/build/configuration.js +0 -325
  46. package/build/configuration.js.map +0 -1
  47. package/build/constants.d.ts +0 -37
  48. package/build/constants.d.ts.map +0 -1
  49. package/build/constants.js +0 -41
  50. package/build/constants.js.map +0 -1
  51. package/build/crawlers/basic_crawler.d.ts +0 -443
  52. package/build/crawlers/basic_crawler.d.ts.map +0 -1
  53. package/build/crawlers/basic_crawler.js +0 -664
  54. package/build/crawlers/basic_crawler.js.map +0 -1
  55. package/build/crawlers/browser_crawler.d.ts +0 -512
  56. package/build/crawlers/browser_crawler.d.ts.map +0 -1
  57. package/build/crawlers/browser_crawler.js +0 -540
  58. package/build/crawlers/browser_crawler.js.map +0 -1
  59. package/build/crawlers/cheerio_crawler.d.ts +0 -931
  60. package/build/crawlers/cheerio_crawler.d.ts.map +0 -1
  61. package/build/crawlers/cheerio_crawler.js +0 -913
  62. package/build/crawlers/cheerio_crawler.js.map +0 -1
  63. package/build/crawlers/crawler_extension.d.ts +0 -10
  64. package/build/crawlers/crawler_extension.d.ts.map +0 -1
  65. package/build/crawlers/crawler_extension.js +0 -19
  66. package/build/crawlers/crawler_extension.js.map +0 -1
  67. package/build/crawlers/crawler_utils.d.ts +0 -34
  68. package/build/crawlers/crawler_utils.d.ts.map +0 -1
  69. package/build/crawlers/crawler_utils.js +0 -87
  70. package/build/crawlers/crawler_utils.js.map +0 -1
  71. package/build/crawlers/playwright_crawler.d.ts +0 -448
  72. package/build/crawlers/playwright_crawler.d.ts.map +0 -1
  73. package/build/crawlers/playwright_crawler.js +0 -299
  74. package/build/crawlers/playwright_crawler.js.map +0 -1
  75. package/build/crawlers/puppeteer_crawler.d.ts +0 -425
  76. package/build/crawlers/puppeteer_crawler.d.ts.map +0 -1
  77. package/build/crawlers/puppeteer_crawler.js +0 -299
  78. package/build/crawlers/puppeteer_crawler.js.map +0 -1
  79. package/build/crawlers/statistics.d.ts +0 -185
  80. package/build/crawlers/statistics.d.ts.map +0 -1
  81. package/build/crawlers/statistics.js +0 -331
  82. package/build/crawlers/statistics.js.map +0 -1
  83. package/build/enqueue_links/click_elements.d.ts +0 -179
  84. package/build/enqueue_links/click_elements.d.ts.map +0 -1
  85. package/build/enqueue_links/click_elements.js +0 -434
  86. package/build/enqueue_links/click_elements.js.map +0 -1
  87. package/build/enqueue_links/enqueue_links.d.ts +0 -117
  88. package/build/enqueue_links/enqueue_links.d.ts.map +0 -1
  89. package/build/enqueue_links/enqueue_links.js +0 -163
  90. package/build/enqueue_links/enqueue_links.js.map +0 -1
  91. package/build/enqueue_links/shared.d.ts +0 -42
  92. package/build/enqueue_links/shared.d.ts.map +0 -1
  93. package/build/enqueue_links/shared.js +0 -121
  94. package/build/enqueue_links/shared.js.map +0 -1
  95. package/build/errors.d.ts +0 -29
  96. package/build/errors.d.ts.map +0 -1
  97. package/build/errors.js +0 -38
  98. package/build/errors.js.map +0 -1
  99. package/build/events.d.ts +0 -11
  100. package/build/events.d.ts.map +0 -1
  101. package/build/events.js +0 -147
  102. package/build/events.js.map +0 -1
  103. package/build/index.d.ts +0 -4
  104. package/build/index.d.ts.map +0 -1
  105. package/build/index.js +0 -7
  106. package/build/index.js.map +0 -1
  107. package/build/main.d.ts +0 -179
  108. package/build/main.d.ts.map +0 -1
  109. package/build/main.js +0 -81
  110. package/build/main.js.map +0 -1
  111. package/build/playwright_utils.d.ts +0 -9
  112. package/build/playwright_utils.d.ts.map +0 -1
  113. package/build/playwright_utils.js +0 -90
  114. package/build/playwright_utils.js.map +0 -1
  115. package/build/proxy_configuration.d.ts +0 -411
  116. package/build/proxy_configuration.d.ts.map +0 -1
  117. package/build/proxy_configuration.js +0 -517
  118. package/build/proxy_configuration.js.map +0 -1
  119. package/build/pseudo_url.d.ts +0 -86
  120. package/build/pseudo_url.d.ts.map +0 -1
  121. package/build/pseudo_url.js +0 -153
  122. package/build/pseudo_url.js.map +0 -1
  123. package/build/puppeteer_request_interception.d.ts +0 -8
  124. package/build/puppeteer_request_interception.d.ts.map +0 -1
  125. package/build/puppeteer_request_interception.js +0 -235
  126. package/build/puppeteer_request_interception.js.map +0 -1
  127. package/build/puppeteer_utils.d.ts +0 -250
  128. package/build/puppeteer_utils.d.ts.map +0 -1
  129. package/build/puppeteer_utils.js +0 -551
  130. package/build/puppeteer_utils.js.map +0 -1
  131. package/build/request.d.ts +0 -180
  132. package/build/request.d.ts.map +0 -1
  133. package/build/request.js +0 -261
  134. package/build/request.js.map +0 -1
  135. package/build/request_list.d.ts +0 -581
  136. package/build/request_list.d.ts.map +0 -1
  137. package/build/request_list.js +0 -826
  138. package/build/request_list.js.map +0 -1
  139. package/build/serialization.d.ts +0 -5
  140. package/build/serialization.d.ts.map +0 -1
  141. package/build/serialization.js +0 -139
  142. package/build/serialization.js.map +0 -1
  143. package/build/session_pool/errors.d.ts +0 -11
  144. package/build/session_pool/errors.d.ts.map +0 -1
  145. package/build/session_pool/errors.js +0 -18
  146. package/build/session_pool/errors.js.map +0 -1
  147. package/build/session_pool/events.d.ts +0 -5
  148. package/build/session_pool/events.d.ts.map +0 -1
  149. package/build/session_pool/events.js +0 -6
  150. package/build/session_pool/events.js.map +0 -1
  151. package/build/session_pool/session.d.ts +0 -286
  152. package/build/session_pool/session.d.ts.map +0 -1
  153. package/build/session_pool/session.js +0 -355
  154. package/build/session_pool/session.js.map +0 -1
  155. package/build/session_pool/session_pool.d.ts +0 -280
  156. package/build/session_pool/session_pool.d.ts.map +0 -1
  157. package/build/session_pool/session_pool.js +0 -393
  158. package/build/session_pool/session_pool.js.map +0 -1
  159. package/build/session_pool/session_utils.d.ts +0 -4
  160. package/build/session_pool/session_utils.d.ts.map +0 -1
  161. package/build/session_pool/session_utils.js +0 -24
  162. package/build/session_pool/session_utils.js.map +0 -1
  163. package/build/stealth/hiding_tricks.d.ts +0 -22
  164. package/build/stealth/hiding_tricks.d.ts.map +0 -1
  165. package/build/stealth/hiding_tricks.js +0 -308
  166. package/build/stealth/hiding_tricks.js.map +0 -1
  167. package/build/stealth/stealth.d.ts +0 -56
  168. package/build/stealth/stealth.d.ts.map +0 -1
  169. package/build/stealth/stealth.js +0 -125
  170. package/build/stealth/stealth.js.map +0 -1
  171. package/build/storages/dataset.d.ts +0 -288
  172. package/build/storages/dataset.d.ts.map +0 -1
  173. package/build/storages/dataset.js +0 -480
  174. package/build/storages/dataset.js.map +0 -1
  175. package/build/storages/key_value_store.d.ts +0 -243
  176. package/build/storages/key_value_store.d.ts.map +0 -1
  177. package/build/storages/key_value_store.js +0 -462
  178. package/build/storages/key_value_store.js.map +0 -1
  179. package/build/storages/request_queue.d.ts +0 -318
  180. package/build/storages/request_queue.d.ts.map +0 -1
  181. package/build/storages/request_queue.js +0 -636
  182. package/build/storages/request_queue.js.map +0 -1
  183. package/build/storages/storage_manager.d.ts +0 -87
  184. package/build/storages/storage_manager.d.ts.map +0 -1
  185. package/build/storages/storage_manager.js +0 -150
  186. package/build/storages/storage_manager.js.map +0 -1
  187. package/build/tsconfig.tsbuildinfo +0 -1
  188. package/build/typedefs.d.ts +0 -146
  189. package/build/typedefs.d.ts.map +0 -1
  190. package/build/typedefs.js +0 -88
  191. package/build/typedefs.js.map +0 -1
  192. package/build/utils.d.ts +0 -175
  193. package/build/utils.d.ts.map +0 -1
  194. package/build/utils.js +0 -731
  195. package/build/utils.js.map +0 -1
  196. package/build/utils_log.d.ts +0 -41
  197. package/build/utils_log.d.ts.map +0 -1
  198. package/build/utils_log.js +0 -192
  199. package/build/utils_log.js.map +0 -1
  200. package/build/utils_request.d.ts +0 -77
  201. package/build/utils_request.d.ts.map +0 -1
  202. package/build/utils_request.js +0 -385
  203. package/build/utils_request.js.map +0 -1
  204. package/build/utils_social.d.ts +0 -210
  205. package/build/utils_social.d.ts.map +0 -1
  206. package/build/utils_social.js +0 -787
  207. package/build/utils_social.js.map +0 -1
  208. package/build/validators.d.ts +0 -23
  209. package/build/validators.d.ts.map +0 -1
  210. package/build/validators.js +0 -29
  211. package/build/validators.js.map +0 -1
@@ -1,385 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.requestAsBrowser = void 0;
4
- const tslib_1 = require("tslib");
5
- const got_scraping_1 = require("got-scraping");
6
- const ow_1 = (0, tslib_1.__importDefault)(require("ow"));
7
- const utils_log_1 = (0, tslib_1.__importDefault)(require("./utils_log"));
8
- /* eslint-enable no-unused-vars,import/named,import/order */
9
- /**
10
- * @typedef {(IncomingMessage & { body: string })} RequestAsBrowserResult
11
- */
12
- /**
13
- * @typedef RequestAsBrowserOptions
14
- * @property {string} url
15
- * URL of the target endpoint. Supports both HTTP and HTTPS schemes.
16
- * @property {string} [method="GET"]
17
- * HTTP method.
18
- * @property {Object<string, string>} [headers]
19
- * Additional HTTP headers to add. It's only recommended to use this option,
20
- * with headers that are typically added by websites, such as cookies. Overriding
21
- * default browser headers will remove the masking this function provides.
22
- * @property {string} [proxyUrl]
23
- * An HTTP proxy to be passed down to the HTTP request. Supports proxy authentication with Basic Auth.
24
- * @property {object} [headerGeneratorOptions]
25
- * Configuration to be used for generating correct browser headers.
26
- * See the [`header-generator`](https://github.com/apify/header-generator) library.
27
- * @property {string} [languageCode=en]
28
- * Two-letter ISO 639 language code.
29
- * @property {string} [countryCode=US]
30
- * Two-letter ISO 3166 country code.
31
- * @property {boolean} [useMobileVersion]
32
- * If `true`, the function uses User-Agent of a mobile browser.
33
- * @property {boolean} [ignoreSslErrors=true]
34
- * If set to true, SSL/TLS certificate errors will be ignored.
35
- * @property {boolean} [useInsecureHttpParser=true]
36
- * Node.js' HTTP parser is stricter than parsers used by web browsers, which prevents scraping of websites
37
- * whose servers do not comply with HTTP specs, either by accident or due to some anti-scraping protections,
38
- * causing e.g. the `invalid header value char` error. The `useInsecureHttpParser` option forces
39
- * the HTTP parser to ignore certain errors which lets you scrape such websites.
40
- * However, it will also open your application to some security vulnerabilities,
41
- * although the risk should be negligible as these vulnerabilities mainly relate to server applications, not clients.
42
- * Learn more in this [blog post](https://snyk.io/blog/node-js-release-fixes-a-critical-http-security-vulnerability/).
43
- * @property {AbortFunction} [abortFunction]
44
- * Function accepts `response` object as a single parameter and should return `true` or `false`.
45
- * If function returns true, request gets aborted.
46
- * @property {boolean} [useHttp2=true]
47
- * If set to false, it will prevent use of HTTP2 requests. This is strongly discouraged. Websites
48
- * expect HTTP2 connections, because browsers use HTTP2 by default. It will automatically downgrade
49
- * to HTTP/1.1 for websites that do not support HTTP2.
50
- * @property {object} [sessionToken]
51
- * A unique object used to generate browser headers. By default, new headers are generated on every call.
52
- * Set this option to make these headers persistent.
53
- */
54
- /**
55
- * @callback AbortFunction
56
- * @param {IncomingMessage} response
57
- * @returns {boolean}
58
- */
59
- /**
60
- * **IMPORTANT:** This function uses an insecure version of HTTP parser by default
61
- * and also ignores SSL/TLS errors. This is very useful in scraping, because it allows bypassing
62
- * certain anti-scraping walls, but it also exposes some vulnerability. For other than scraping
63
- * scenarios, please set `useInsecureHttpParser: false` and `ignoreSslErrors: false`.
64
- *
65
- * Sends a HTTP request that looks like a request sent by a web browser,
66
- * fully emulating browser's HTTP headers. It uses HTTP2 by default for Node 12+.
67
- *
68
- * This function is useful for web scraping of websites that send the full HTML in the first response.
69
- * Thanks to this function, the target web server has no simple way to find out the request
70
- * hasn't been sent by a human's web browser. Using a headless browser for such requests
71
- * is an order of magnitude more resource-intensive than this function.
72
- *
73
- * The function emulates the Chrome and Firefox web browsers. If you want more control
74
- * over the browsers and their versions, use the `headerGeneratorOptions` property.
75
- * You can find more info in the readme of the [`header-generator`](https://github.com/apify/header-generator) library.
76
- *
77
- * Internally, the function uses the [`got-scraping`](https://github.com/apify/got-scraping) library to perform the request.
78
- * All `options` not recognized by this function are passed to it so see it for more details.
79
- *
80
- * **Example usage:**
81
- * ```js
82
- * const Apify = require('apify');
83
- *
84
- * const { utils: { requestAsBrowser } } = Apify;
85
- *
86
- * ...
87
- *
88
- * const response = await requestAsBrowser({ url: 'https://www.example.com/' });
89
- *
90
- * const html = response.body;
91
- * const status = response.statusCode;
92
- * const contentType = response.headers['content-type'];
93
- * ```
94
- *
95
- * @param {RequestAsBrowserOptions} options All `requestAsBrowser` configuration options.
96
- *
97
- * @return {Promise<RequestAsBrowserResult>} The result can be various objects, but it will always be like a
98
- * [Node.js HTTP response stream](https://nodejs.org/api/http.html#http_class_http_incomingmessage)
99
- * with a 'body' property for the parsed response body, unless the 'stream' option is used.
100
- * @memberOf utils
101
- * @name requestAsBrowser
102
- * @function
103
- */
104
- const requestAsBrowser = async (options = {}) => {
105
- logDeprecatedOptions(options);
106
- (0, ow_1.default)(options, 'RequestAsBrowserOptions', ow_1.default.object.partialShape({
107
- payload: ow_1.default.optional.any(ow_1.default.string, ow_1.default.buffer),
108
- proxyUrl: ow_1.default.optional.string.url,
109
- languageCode: ow_1.default.optional.string.length(2),
110
- countryCode: ow_1.default.optional.string.length(2),
111
- useMobileVersion: ow_1.default.optional.boolean,
112
- abortFunction: ow_1.default.optional.function,
113
- ignoreSslErrors: ow_1.default.optional.boolean,
114
- useInsecureHttpParser: ow_1.default.optional.boolean,
115
- useHttp2: ow_1.default.optional.boolean,
116
- timeoutSecs: ow_1.default.optional.number,
117
- throwOnHttpErrors: ow_1.default.optional.boolean,
118
- headerGeneratorOptions: ow_1.default.optional.object,
119
- stream: ow_1.default.optional.boolean,
120
- decodeBody: ow_1.default.optional.boolean,
121
- sessionToken: ow_1.default.optional.object,
122
- }));
123
- (0, ow_1.default)(options, 'RequestAsBrowserOptions', ow_1.default.object.validate((opts) => ({
124
- validator: areBodyOptionsCompatible(opts),
125
- message: (label) => `The 'payload', 'body', 'json' and 'form' options of ${label} are mutually exclusive.`,
126
- })));
127
- // We created the `got-scraping` package which replaced underlying @apify/http-request.
128
- // At the same time, we want users to be able to use requestAsBrowser without breaking changes.
129
- // So we do a lot of property mapping here, to make sure that everything works as expected.
130
- // TODO Update this with SDK v3 and use `got-scraping` API directly.
131
- const { payload, // alias for body to allow direct passing of our Request objects
132
- json, headerGeneratorOptions, languageCode = 'en', countryCode = 'US', useMobileVersion = false, abortFunction = () => false, ignoreSslErrors = true, useInsecureHttpParser = true, useHttp2 = true, timeoutSecs = 30, throwOnHttpErrors = false, stream = false, decodeBody = true, forceUrlEncoding, // TODO remove in v3. It's not used, but we keep it here to prevent validation errors in got.
133
- ...gotParams } = options;
134
- const gotScrapingOptions = {
135
- insecureHTTPParser: useInsecureHttpParser,
136
- http2: useHttp2,
137
- timeout: { request: timeoutSecs * 1000 },
138
- throwHttpErrors: throwOnHttpErrors,
139
- isStream: stream,
140
- decompress: decodeBody,
141
- // We overwrite the above arguments because we want to give the official
142
- // got interface a priority over our requestAsBrowser one.
143
- // E.g. { isStream: false, stream: true } should produce { isStream: false }.
144
- ...gotParams,
145
- https: {
146
- ...gotParams.https,
147
- rejectUnauthorized: !ignoreSslErrors,
148
- },
149
- };
150
- // Order is important
151
- normalizePayloadOption(payload, gotScrapingOptions);
152
- normalizeJsonOption(json, gotScrapingOptions);
153
- ensureCorrectHttp2Headers(gotScrapingOptions);
154
- maybeAddAbortHook(abortFunction, gotScrapingOptions);
155
- if (!headerGeneratorOptions) {
156
- // Values that respect old requestAsBrowser user-agents and settings
157
- gotScrapingOptions.headerGeneratorOptions = {
158
- devices: useMobileVersion ? ['mobile'] : ['desktop'],
159
- locales: [`${languageCode}-${countryCode}`],
160
- };
161
- }
162
- else {
163
- gotScrapingOptions.headerGeneratorOptions = headerGeneratorOptions;
164
- }
165
- // Return the promise directly
166
- if (!gotScrapingOptions.isStream) {
167
- return (0, got_scraping_1.gotScraping)(gotScrapingOptions);
168
- }
169
- // abortFunction must be handled separately for streams :(
170
- const duplexStream = (0, got_scraping_1.gotScraping)(gotScrapingOptions);
171
- ensureRequestIsDispatched(duplexStream, gotScrapingOptions);
172
- return new Promise((resolve, reject) => {
173
- duplexStream
174
- .on('error', reject)
175
- .on('response', (res) => {
176
- try {
177
- const shouldAbort = abortFunction(res);
178
- if (shouldAbort) {
179
- const err = new Error(`Request for ${gotScrapingOptions.url} aborted due to abortFunction.`);
180
- duplexStream.destroy(err);
181
- return reject(err);
182
- }
183
- }
184
- catch (e) {
185
- duplexStream.destroy(e);
186
- return reject(e);
187
- }
188
- addResponsePropertiesToStream(duplexStream, res);
189
- return resolve(duplexStream);
190
- });
191
- });
192
- };
193
- exports.requestAsBrowser = requestAsBrowser;
194
- /**
195
- * `got` has a `body` option and 2 helpers, `json` and `form`, to provide specific bodies.
196
- * Those options are mutually exclusive. `requestAsBrowser` also supports `payload` as
197
- * an alias of `body`. It must be exclusive as well.
198
- * @param {RequestAsBrowserOptions} requestAsBrowserOptions
199
- * @return {boolean}
200
- * @private
201
- * @ignore
202
- */
203
- function areBodyOptionsCompatible(requestAsBrowserOptions) {
204
- const { payload, json, body, form } = requestAsBrowserOptions;
205
- // A boolean is old requestAsBrowser interface and not a real "body"
206
- // See the normalizeJsonOption function.
207
- const jsonBody = typeof json === 'boolean' ? undefined : json;
208
- const possibleOpts = [payload, jsonBody, body, form];
209
- const usedOpts = possibleOpts.filter((opt) => opt !== undefined);
210
- // Only a single option out of the 4 can be used.
211
- return usedOpts.length <= 1;
212
- }
213
- /**
214
- * got-scraping uses 'body', but we also support 'payload' from {@link Request}.
215
- * @param {string|Buffer} payload
216
- * @param {GotScrapingOptions} gotScrapingOptions
217
- * @ignore
218
- * @private
219
- */
220
- function normalizePayloadOption(payload, gotScrapingOptions) {
221
- if (payload !== undefined)
222
- gotScrapingOptions.body = payload;
223
- }
224
- /**
225
- * `json` is a boolean flag in `requestAsBrowser`, but a `body` alias that
226
- * adds a 'content-type: application/json' header in got. To stay backwards
227
- * compatible we need to figure out which option the user provided.
228
- * @param {*} json
229
- * @param {GotScrapingOptions} gotScrapingOptions
230
- * @ignore
231
- * @private
232
- */
233
- function normalizeJsonOption(json, gotScrapingOptions) {
234
- // If it's a boolean, then it's the old requestAsBrowser API.
235
- // If it's true, it means the user expects a JSON response.
236
- const deprecationMessage = `"options.json" of type: Boolean is deprecated.`
237
- + 'If you expect a JSON response, use "options.responseType = \'json\'"'
238
- + 'Use "options.json" with a plain object to provide a JSON body.';
239
- if (json === true) {
240
- utils_log_1.default.deprecated(deprecationMessage);
241
- gotScrapingOptions.responseType = 'json';
242
- gotScrapingOptions.https.ciphers = undefined;
243
- }
244
- else if (json === false) {
245
- utils_log_1.default.deprecated(deprecationMessage);
246
- // Do nothing, it means the user expects something else than JSON.
247
- }
248
- else {
249
- // If it's something else, we let `got` handle it as a request body.
250
- gotScrapingOptions.json = json;
251
- }
252
- }
253
- /**
254
- * 'connection' and 'host' headers are forbidden when using HTTP2. We delete
255
- * them from user-provided headers because we switched the default from HTTP1 to 2.
256
- * @param {GotScrapingOptions} gotScrapingOptions
257
- * @ignore
258
- * @private
259
- */
260
- function ensureCorrectHttp2Headers(gotScrapingOptions) {
261
- if (gotScrapingOptions.http2 && gotScrapingOptions.headers) {
262
- gotScrapingOptions.headers = { ...gotScrapingOptions.headers };
263
- // eslint-disable-next-line no-restricted-syntax, guard-for-in
264
- for (const key in gotScrapingOptions.headers) {
265
- const lkey = key.toLowerCase();
266
- if (lkey === 'connection' || lkey === 'host') {
267
- delete gotScrapingOptions.headers[key];
268
- }
269
- }
270
- }
271
- }
272
- /**
273
- * `abortFunction` is an old `requestAsBrowser` interface for aborting requests before
274
- * the response body is read to save bandwidth.
275
- * @param {function} abortFunction
276
- * @param {GotScrapingOptions} gotScrapingOptions
277
- * @ignore
278
- * @private
279
- */
280
- function maybeAddAbortHook(abortFunction, gotScrapingOptions) {
281
- // Stream aborting must be handled on the response object because `got`
282
- // does not execute `afterResponse` hooks for streams :(
283
- if (gotScrapingOptions.isStream)
284
- return;
285
- const abortHook = (response) => {
286
- const shouldAbort = abortFunction(response);
287
- if (shouldAbort) {
288
- throw new Error(`Request for ${gotScrapingOptions.url} aborted due to abortFunction.`);
289
- }
290
- return response;
291
- };
292
- const { hooks } = gotScrapingOptions;
293
- const fixedHooks = {
294
- ...hooks,
295
- afterResponse: [
296
- ...((hooks && hooks.afterResponse) || []),
297
- abortHook,
298
- ],
299
- };
300
- gotScrapingOptions.hooks = fixedHooks;
301
- }
302
- /**
303
- * 'got' will not dispatch non-GET request stream until a body is provided.
304
- * @param {stream.Duplex} duplexStream
305
- * @param {GotScrapingOptions} gotScrapingOptions
306
- */
307
- function ensureRequestIsDispatched(duplexStream, gotScrapingOptions) {
308
- const { method } = gotScrapingOptions;
309
- const bodyIsEmpty = gotScrapingOptions.body === undefined
310
- && gotScrapingOptions.json === undefined
311
- && gotScrapingOptions.form === undefined;
312
- if (method && method.toLowerCase() !== 'get' && bodyIsEmpty) {
313
- duplexStream.end();
314
- }
315
- }
316
- /**
317
- * @param {RequestAsBrowserOptions} options
318
- * @ignore
319
- * @private
320
- */
321
- function logDeprecatedOptions(options) {
322
- const deprecatedOptions = [
323
- // 'json' is handled in the JSON handler, because it has a conflict of types
324
- ['languageCode', 'headerGeneratorOptions.locales'],
325
- ['countryCode', 'headerGeneratorOptions.locales'],
326
- ['useMobileVersion', 'headerGeneratorOptions.devices'],
327
- ['payload', 'body'],
328
- ['useHttp2', 'http2'],
329
- ['stream', 'isStream'],
330
- ['decodeBody', 'decompress'],
331
- ['throwOnHttpErrors', 'throwHttpErrors'],
332
- ['timeoutSecs', 'timeout.request'],
333
- ['ignoreSslErrors', 'https.rejectUnauthorized'],
334
- ['abortFunction'], // custom message below
335
- ];
336
- for (const [deprecatedOption, newOption] of deprecatedOptions) {
337
- if (options[deprecatedOption] !== undefined) {
338
- // This will log only for the first property thanks to log.deprecated logging only once.
339
- const initialMessage = 'requestAsBrowser internal implementation has been replaced with the got-scraping module. '
340
- + 'To make the switch without breaking changes, we mapped all existing options to the got-scraping options. '
341
- + 'This mapping will be removed in SDK v3 and we advise you to update your code using the hints below: ';
342
- utils_log_1.default.deprecated(initialMessage);
343
- if (deprecatedOption === 'abortFunction') {
344
- utils_log_1.default.deprecated(`"options.${deprecatedOption}" is deprecated.`
345
- + 'Use a request cancellation process appropriate for your request type.'
346
- + 'Either a Stream or a Promise. See Got documentation for more info: https://github.com/sindresorhus/got');
347
- }
348
- else {
349
- utils_log_1.default.deprecated(`"options.${deprecatedOption}" is deprecated. Use "options.${newOption}" instead.`);
350
- }
351
- }
352
- }
353
- }
354
- /**
355
- * The stream object returned from got does not have the below properties.
356
- * At the same time, you can't read data directly from the response stream,
357
- * because they won't get emitted unless you also read from the primary
358
- * got stream. To be able to work with only one stream, we move the expected props
359
- * from the response stream to the got stream.
360
- * @param {GotStream} stream
361
- * @param {http.IncomingMessage} response
362
- * @return {GotStream}
363
- * @ignore
364
- * @private
365
- */
366
- function addResponsePropertiesToStream(stream, response) {
367
- const properties = [
368
- 'statusCode', 'statusMessage', 'headers',
369
- 'complete', 'httpVersion', 'rawHeaders',
370
- 'rawTrailers', 'trailers', 'url',
371
- 'request',
372
- ];
373
- response.on('end', () => {
374
- Object.assign(stream.rawTrailers, response.rawTrailers);
375
- Object.assign(stream.trailers, response.trailers);
376
- stream.complete = response.complete;
377
- });
378
- for (const prop of properties) {
379
- if (!(prop in stream)) {
380
- stream[prop] = response[prop];
381
- }
382
- }
383
- return stream;
384
- }
385
- //# sourceMappingURL=utils_request.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"utils_request.js","sourceRoot":"","sources":["../src/utils_request.js"],"names":[],"mappings":";;;;AAAA,+CAA2C;AAC3C,yDAAoB;AACpB,yEAA8B;AAI9B,4DAA4D;AAE5D;;GAEG;AAEH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAyCG;AAEH;;;;GAIG;AAEH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4CG;AACI,MAAM,gBAAgB,GAAG,KAAK,EAAE,OAAO,GAAG,EAAE,EAAE,EAAE;IACnD,oBAAoB,CAAC,OAAO,CAAC,CAAC;IAC9B,IAAA,YAAE,EAAC,OAAO,EAAE,yBAAyB,EAAE,YAAE,CAAC,MAAM,CAAC,YAAY,CAAC;QAC1D,OAAO,EAAE,YAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,YAAE,CAAC,MAAM,EAAE,YAAE,CAAC,MAAM,CAAC;QAC9C,QAAQ,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,GAAG;QAChC,YAAY,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;QAC1C,WAAW,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;QACzC,gBAAgB,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;QACrC,aAAa,EAAE,YAAE,CAAC,QAAQ,CAAC,QAAQ;QACnC,eAAe,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;QACpC,qBAAqB,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;QAC1C,QAAQ,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;QAC7B,WAAW,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QAC/B,iBAAiB,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;QACtC,sBAAsB,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QAC1C,MAAM,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;QAC3B,UAAU,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;QAC/B,YAAY,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;KACnC,CAAC,CAAC,CAAC;IAEJ,IAAA,YAAE,EAAC,OAAO,EAAE,yBAAyB,EAAE,YAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACjE,SAAS,EAAE,wBAAwB,CAAC,IAAI,CAAC;QACzC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,uDAAuD,KAAK,0BAA0B;KAC7G,CAAC,CAAC,CAAC,CAAC;IAEL,uFAAuF;IACvF,+FAA+F;IAC/F,2FAA2F;IAC3F,oEAAoE;IACpE,MAAM,EACF,OAAO,EAAE,gEAAgE;IACzE,IAAI,EACJ,sBAAsB,EACtB,YAAY,GAAG,IAAI,EACnB,WAAW,GAAG,IAAI,EAClB,gBAAgB,GAAG,KAAK,EACxB,aAAa,GAAG,GAAG,EAAE,CAAC,KAAK,EAC3B,eAAe,GAAG,IAAI,EACtB,qBAAqB,GAAG,IAAI,EAC5B,QAAQ,GAAG,IAAI,EACf,WAAW,GAAG,EAAE,EAChB,iBAAiB,GAAG,KAAK,EACzB,MAAM,GAAG,KAAK,EACd,UAAU,GAAG,IAAI,EACjB,gBAAgB,EAAE,6FAA6F;IAC/G,GAAG,SAAS,EACf,GAAG,OAAO,CAAC;IAEZ,MAAM,kBAAkB,GAAG;QACvB,kBAAkB,EAAE,qBAAqB;QACzC,KAAK,EAAE,QAAQ;QACf,OAAO,EAAE,EAAE,OAAO,EAAE,WAAW,GAAG,IAAI,EAAE;QACxC,eAAe,EAAE,iBAAiB;QAClC,QAAQ,EAAE,MAAM;QAChB,UAAU,EAAE,UAAU;QACtB,wEAAwE;QACxE,0DAA0D;QAC1D,6EAA6E;QAC7E,GAAG,SAAS;QACZ,KAAK,EAAE;YACH,GAAG,SAAS,CAAC,KAAK;YAClB,kBAAkB,EAAE,CAAC,eAAe;SACvC;KACJ,CAAC;IAEF,qBAAqB;IACrB,sBAAsB,CAAC,OAAO,EAAE,kBAAkB,CAAC,CAAC;IACpD,mBAAmB,CAAC,IAAI,EAAE,kBAAkB,CAAC,CAAC;IAC9C,yBAAyB,CAAC,kBAAkB,CAAC,CAAC;IAC9C,iBAAiB,CAAC,aAAa,EAAE,kBAAkB,CAAC,CAAC;IACrD,IAAI,CAAC,sBAAsB,EAAE;QACzB,oEAAoE;QACpE,kBAAkB,CAAC,sBAAsB,GAAG;YACxC,OAAO,EAAE,gBAAgB,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YACpD,OAAO,EAAE,CAAC,GAAG,YAAY,IAAI,WAAW,EAAE,CAAC;SAC9C,CAAC;KACL;SAAM;QACH,kBAAkB,CAAC,sBAAsB,GAAG,sBAAsB,CAAC;KACtE;IAED,8BAA8B;IAC9B,IAAI,CAAC,kBAAkB,CAAC,QAAQ,EAAE;QAC9B,OAAO,IAAA,0BAAW,EAAC,kBAAkB,CAAC,CAAC;KAC1C;IAED,0DAA0D;IAC1D,MAAM,YAAY,GAAG,IAAA,0BAAW,EAAC,kBAAkB,CAAC,CAAC;IAErD,yBAAyB,CAAC,YAAY,EAAE,kBAAkB,CAAC,CAAC;IAE5D,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACnC,YAAY;aACP,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC;aACnB,EAAE,CAAC,UAAU,EAAE,CAAC,GAAG,EAAE,EAAE;YACpB,IAAI;gBACA,MAAM,WAAW,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;gBACvC,IAAI,WAAW,EAAE;oBACb,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,eAAe,kBAAkB,CAAC,GAAG,gCAAgC,CAAC,CAAC;oBAC7F,YAAY,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;oBAC1B,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC;iBACtB;aACJ;YAAC,OAAO,CAAC,EAAE;gBACR,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;gBACxB,OAAO,MAAM,CAAC,CAAC,CAAC,CAAC;aACpB;YAED,6BAA6B,CAAC,YAAY,EAAE,GAAG,CAAC,CAAC;YAEjD,OAAO,OAAO,CAAC,YAAY,CAAC,CAAC;QACjC,CAAC,CAAC,CAAC;IACX,CAAC,CAAC,CAAC;AACP,CAAC,CAAC;AA/GW,QAAA,gBAAgB,oBA+G3B;AAEF;;;;;;;;GAQG;AACH,SAAS,wBAAwB,CAAC,uBAAuB;IACrD,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,uBAAuB,CAAC;IAC9D,oEAAoE;IACpE,wCAAwC;IACxC,MAAM,QAAQ,GAAG,OAAO,IAAI,KAAK,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC;IAE9D,MAAM,YAAY,GAAG,CAAC,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;IACrD,MAAM,QAAQ,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,KAAK,SAAS,CAAC,CAAC;IAEjE,iDAAiD;IACjD,OAAO,QAAQ,CAAC,MAAM,IAAI,CAAC,CAAC;AAChC,CAAC;AAED;;;;;;GAMG;AACH,SAAS,sBAAsB,CAAC,OAAO,EAAE,kBAAkB;IACvD,IAAI,OAAO,KAAK,SAAS;QAAE,kBAAkB,CAAC,IAAI,GAAG,OAAO,CAAC;AACjE,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,mBAAmB,CAAC,IAAI,EAAE,kBAAkB;IACjD,6DAA6D;IAC7D,2DAA2D;IAC3D,MAAM,kBAAkB,GAAG,gDAAgD;UACrE,sEAAsE;UACtE,gEAAgE,CAAC;IACvE,IAAI,IAAI,KAAK,IAAI,EAAE;QACf,mBAAG,CAAC,UAAU,CAAC,kBAAkB,CAAC,CAAC;QACnC,kBAAkB,CAAC,YAAY,GAAG,MAAM,CAAC;QACzC,kBAAkB,CAAC,KAAK,CAAC,OAAO,GAAG,SAAS,CAAC;KAChD;SAAM,IAAI,IAAI,KAAK,KAAK,EAAE;QACvB,mBAAG,CAAC,UAAU,CAAC,kBAAkB,CAAC,CAAC;QACnC,kEAAkE;KACrE;SAAM;QACH,oEAAoE;QACpE,kBAAkB,CAAC,IAAI,GAAG,IAAI,CAAC;KAClC;AACL,CAAC;AAED;;;;;;GAMG;AACH,SAAS,yBAAyB,CAAC,kBAAkB;IACjD,IAAI,kBAAkB,CAAC,KAAK,IAAI,kBAAkB,CAAC,OAAO,EAAE;QACxD,kBAAkB,CAAC,OAAO,GAAG,EAAE,GAAG,kBAAkB,CAAC,OAAO,EAAE,CAAC;QAE/D,8DAA8D;QAC9D,KAAK,MAAM,GAAG,IAAI,kBAAkB,CAAC,OAAO,EAAE;YAC1C,MAAM,IAAI,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;YAE/B,IAAI,IAAI,KAAK,YAAY,IAAI,IAAI,KAAK,MAAM,EAAE;gBAC1C,OAAO,kBAAkB,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;aAC1C;SACJ;KACJ;AACL,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,iBAAiB,CAAC,aAAa,EAAE,kBAAkB;IACxD,uEAAuE;IACvE,wDAAwD;IACxD,IAAI,kBAAkB,CAAC,QAAQ;QAAE,OAAO;IAExC,MAAM,SAAS,GAAG,CAAC,QAAQ,EAAE,EAAE;QAC3B,MAAM,WAAW,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;QAC5C,IAAI,WAAW,EAAE;YACb,MAAM,IAAI,KAAK,CAAC,eAAe,kBAAkB,CAAC,GAAG,gCAAgC,CAAC,CAAC;SAC1F;QACD,OAAO,QAAQ,CAAC;IACpB,CAAC,CAAC;IAEF,MAAM,EAAE,KAAK,EAAE,GAAG,kBAAkB,CAAC;IACrC,MAAM,UAAU,GAAG;QACf,GAAG,KAAK;QACR,aAAa,EAAE;YACX,GAAG,CAAC,CAAC,KAAK,IAAI,KAAK,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC;YACzC,SAAS;SACZ;KACJ,CAAC;IAEF,kBAAkB,CAAC,KAAK,GAAG,UAAU,CAAC;AAC1C,CAAC;AAED;;;;GAIG;AACH,SAAS,yBAAyB,CAAC,YAAY,EAAE,kBAAkB;IAC/D,MAAM,EAAE,MAAM,EAAE,GAAG,kBAAkB,CAAC;IACtC,MAAM,WAAW,GAAG,kBAAkB,CAAC,IAAI,KAAK,SAAS;WAClD,kBAAkB,CAAC,IAAI,KAAK,SAAS;WACrC,kBAAkB,CAAC,IAAI,KAAK,SAAS,CAAC;IAE7C,IAAI,MAAM,IAAI,MAAM,CAAC,WAAW,EAAE,KAAK,KAAK,IAAI,WAAW,EAAE;QACzD,YAAY,CAAC,GAAG,EAAE,CAAC;KACtB;AACL,CAAC;AAED;;;;GAIG;AACH,SAAS,oBAAoB,CAAC,OAAO;IACjC,MAAM,iBAAiB,GAAG;QACtB,4EAA4E;QAC5E,CAAC,cAAc,EAAE,gCAAgC,CAAC;QAClD,CAAC,aAAa,EAAE,gCAAgC,CAAC;QACjD,CAAC,kBAAkB,EAAE,gCAAgC,CAAC;QACtD,CAAC,SAAS,EAAE,MAAM,CAAC;QACnB,CAAC,UAAU,EAAE,OAAO,CAAC;QACrB,CAAC,QAAQ,EAAE,UAAU,CAAC;QACtB,CAAC,YAAY,EAAE,YAAY,CAAC;QAC5B,CAAC,mBAAmB,EAAE,iBAAiB,CAAC;QACxC,CAAC,aAAa,EAAE,iBAAiB,CAAC;QAClC,CAAC,iBAAiB,EAAE,0BAA0B,CAAC;QAC/C,CAAC,eAAe,CAAC,EAAE,uBAAuB;KAC7C,CAAC;IAEF,KAAK,MAAM,CAAC,gBAAgB,EAAE,SAAS,CAAC,IAAI,iBAAiB,EAAE;QAC3D,IAAI,OAAO,CAAC,gBAAgB,CAAC,KAAK,SAAS,EAAE;YACzC,wFAAwF;YACxF,MAAM,cAAc,GAAG,2FAA2F;kBAC5G,2GAA2G;kBAC3G,sGAAsG,CAAC;YAC7G,mBAAG,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC;YAE/B,IAAI,gBAAgB,KAAK,eAAe,EAAE;gBACtC,mBAAG,CAAC,UAAU,CAAC,YAAY,gBAAgB,kBAAkB;sBACvD,uEAAuE;sBACvE,wGAAwG,CAAC,CAAC;aACnH;iBAAM;gBACH,mBAAG,CAAC,UAAU,CAAC,YAAY,gBAAgB,iCAAiC,SAAS,YAAY,CAAC,CAAC;aACtG;SACJ;KACJ;AACL,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAS,6BAA6B,CAAC,MAAM,EAAE,QAAQ;IACnD,MAAM,UAAU,GAAG;QACf,YAAY,EAAE,eAAe,EAAE,SAAS;QACxC,UAAU,EAAE,aAAa,EAAE,YAAY;QACvC,aAAa,EAAE,UAAU,EAAE,KAAK;QAChC,SAAS;KACZ,CAAC;IAEF,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;QACpB,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,WAAW,CAAC,CAAC;QACxD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,QAAQ,EAAE,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAElD,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE;QAC3B,IAAI,CAAC,CAAC,IAAI,IAAI,MAAM,CAAC,EAAE;YACnB,MAAM,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;SACjC;KACJ;IAED,OAAO,MAAM,CAAC;AAClB,CAAC"}
@@ -1,210 +0,0 @@
1
- export namespace socialUtils {
2
- export { emailsFromText };
3
- export { emailsFromUrls };
4
- export { phonesFromText };
5
- export { phonesFromUrls };
6
- export { parseHandlesFromHtml };
7
- export { EMAIL_REGEX };
8
- export { EMAIL_REGEX_GLOBAL };
9
- export { LINKEDIN_REGEX };
10
- export { LINKEDIN_REGEX_GLOBAL };
11
- export { INSTAGRAM_REGEX };
12
- export { INSTAGRAM_REGEX_GLOBAL };
13
- export { TWITTER_REGEX };
14
- export { TWITTER_REGEX_GLOBAL };
15
- export { FACEBOOK_REGEX };
16
- export { FACEBOOK_REGEX_GLOBAL };
17
- export { YOUTUBE_REGEX };
18
- export { YOUTUBE_REGEX_GLOBAL };
19
- export { TIKTOK_REGEX };
20
- export { TIKTOK_REGEX_GLOBAL };
21
- export { PINTEREST_REGEX };
22
- export { PINTEREST_REGEX_GLOBAL };
23
- export { DISCORD_REGEX };
24
- export { DISCORD_REGEX_GLOBAL };
25
- }
26
- /**
27
- * Representation of social handles parsed from a HTML page.
28
- *
29
- * The object has the following structure:
30
- *
31
- * ```
32
- * {
33
- * emails: String[],
34
- * phones: String[],
35
- * phonesUncertain: String[],
36
- * linkedIns: String[],
37
- * twitters: String[],
38
- * instagrams: String[],
39
- * facebooks: String[],
40
- * youtubes: String[],
41
- * tiktoks: String[],
42
- * pinterests: String[],
43
- * discords: String[],
44
- * }
45
- * ```
46
- */
47
- export type SocialHandles = {
48
- emails: string[];
49
- phones: string[];
50
- phonesUncertain: string[];
51
- linkedIns: string[];
52
- twitters: string[];
53
- instagrams: string[];
54
- facebooks: string[];
55
- youtubes: string[];
56
- tiktoks: string[];
57
- pinterests: string[];
58
- discords: string[];
59
- };
60
- /**
61
- * The function extracts email addresses from a plain text.
62
- * Note that the function preserves the order of emails and keep duplicates.
63
- * @param {string} text Text to search in.
64
- * @return {string[]} Array of emails addresses found.
65
- * If no emails are found, the function returns an empty array.
66
- * @memberOf social
67
- */
68
- declare function emailsFromText(text: string): string[];
69
- /**
70
- * The function extracts email addresses from a list of URLs.
71
- * Basically it looks for all `mailto:` URLs and returns valid email addresses from them.
72
- * Note that the function preserves the order of emails and keep duplicates.
73
- * @param {string[]} urls Array of URLs.
74
- * @return {string[]} Array of emails addresses found.
75
- * If no emails are found, the function returns an empty array.
76
- * @memberOf social
77
- */
78
- declare function emailsFromUrls(urls: string[]): string[];
79
- /**
80
- * The function attempts to extract phone numbers from a text. Please note that
81
- * the results might not be accurate, since phone numbers appear in a large variety of formats and conventions.
82
- * If you encounter some problems, please [file an issue](https://github.com/apify/apify-js/issues).
83
- * @param {string} text Text to search the phone numbers in.
84
- * @return {string[]} Array of phone numbers found.
85
- * If no phone numbers are found, the function returns an empty array.
86
- * @memberOf social
87
- */
88
- declare function phonesFromText(text: string): string[];
89
- /**
90
- * Finds phone number links in an array of URLs and extracts the phone numbers from them.
91
- * Note that the phone number links look like `tel://123456789`, `tel:/123456789` or `tel:123456789`.
92
- * @param {string[]} urls Array of URLs.
93
- * @return {string[]} Array of phone numbers found.
94
- * If no phone numbers are found, the function returns an empty array.
95
- * @memberOf social
96
- */
97
- declare function phonesFromUrls(urls: string[]): string[];
98
- /**
99
- * Representation of social handles parsed from a HTML page.
100
- *
101
- * The object has the following structure:
102
- *
103
- * ```
104
- * {
105
- * emails: String[],
106
- * phones: String[],
107
- * phonesUncertain: String[],
108
- * linkedIns: String[],
109
- * twitters: String[],
110
- * instagrams: String[],
111
- * facebooks: String[],
112
- * youtubes: String[],
113
- * tiktoks: String[],
114
- * pinterests: String[],
115
- * discords: String[],
116
- * }
117
- * ```
118
- * @typedef SocialHandles
119
- * @property {string[]} emails
120
- * @property {string[]} phones
121
- * @property {string[]} phonesUncertain
122
- * @property {string[]} linkedIns
123
- * @property {string[]} twitters
124
- * @property {string[]} instagrams
125
- * @property {string[]} facebooks
126
- * @property {string[]} youtubes
127
- * @property {string[]} tiktoks
128
- * @property {string[]} pinterests
129
- * @property {string[]} discords
130
- */
131
- /**
132
- * The function attempts to extract emails, phone numbers and social profile URLs from a HTML document,
133
- * specifically LinkedIn, Twitter, Instagram and Facebook profile URLs.
134
- * The function removes duplicates from the resulting arrays and sorts the items alphabetically.
135
- *
136
- * Note that the `phones` field contains phone numbers extracted from the special phone links
137
- * such as `[call us](tel:+1234556789)` (see {@link social#phonesFromUrls})
138
- * and potentially other sources with high certainty, while `phonesUncertain` contains phone numbers
139
- * extracted from the plain text, which might be very inaccurate.
140
- *
141
- * **Example usage:**
142
- * ```javascript
143
- * const Apify = require('apify');
144
- *
145
- * const browser = await Apify.launchPuppeteer();
146
- * const page = await browser.newPage();
147
- * await page.goto('http://www.example.com');
148
- * const html = await page.content();
149
- *
150
- * const result = Apify.utils.social.parseHandlesFromHtml(html);
151
- * console.log('Social handles:');
152
- * console.dir(result);
153
- * ```
154
- *
155
- * @param {string} html HTML text
156
- * @param {*|null} [data] Optional object which will receive the `text` and `$` properties
157
- * that contain text content of the HTML and `cheerio` object, respectively. This is an optimization
158
- * so that the caller doesn't need to parse the HTML document again, if needed.
159
- * @return {SocialHandles} An object with the social handles.
160
- *
161
- * @memberOf social
162
- */
163
- declare function parseHandlesFromHtml(html: string, data?: any | null): SocialHandles;
164
- /**
165
- * Regular expression to exactly match a single email address.
166
- * It has the following form: `/^...$/i`.
167
- * @type {RegExp}
168
- * @memberOf social
169
- */
170
- declare const EMAIL_REGEX: RegExp;
171
- /**
172
- * Regular expression to find multiple email addresses in a text.
173
- * It has the following form: `/.../ig`.
174
- * @type {RegExp}
175
- * @memberOf social
176
- */
177
- declare const EMAIL_REGEX_GLOBAL: RegExp;
178
- /** @type RegExp */
179
- declare let LINKEDIN_REGEX: RegExp;
180
- /** @type RegExp */
181
- declare let LINKEDIN_REGEX_GLOBAL: RegExp;
182
- /** @type RegExp */
183
- declare let INSTAGRAM_REGEX: RegExp;
184
- /** @type RegExp */
185
- declare let INSTAGRAM_REGEX_GLOBAL: RegExp;
186
- /** @type RegExp */
187
- declare let TWITTER_REGEX: RegExp;
188
- /** @type RegExp */
189
- declare let TWITTER_REGEX_GLOBAL: RegExp;
190
- /** @type RegExp */
191
- declare let FACEBOOK_REGEX: RegExp;
192
- /** @type RegExp */
193
- declare let FACEBOOK_REGEX_GLOBAL: RegExp;
194
- /** @type RegExp */
195
- declare let YOUTUBE_REGEX: RegExp;
196
- /** @type RegExp */
197
- declare let YOUTUBE_REGEX_GLOBAL: RegExp;
198
- /** @type RegExp */
199
- declare let TIKTOK_REGEX: RegExp;
200
- /** @type RegExp */
201
- declare let TIKTOK_REGEX_GLOBAL: RegExp;
202
- /** @type RegExp */
203
- declare let PINTEREST_REGEX: RegExp;
204
- /** @type RegExp */
205
- declare let PINTEREST_REGEX_GLOBAL: RegExp;
206
- declare let DISCORD_REGEX: any;
207
- /** @type RegExp */
208
- declare let DISCORD_REGEX_GLOBAL: RegExp;
209
- export {};
210
- //# sourceMappingURL=utils_social.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"utils_social.d.ts","sourceRoot":"","sources":["../src/utils_social.js"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAgrBc,MAAM,EAAE;YACR,MAAM,EAAE;qBACR,MAAM,EAAE;eACR,MAAM,EAAE;cACR,MAAM,EAAE;gBACR,MAAM,EAAE;eACR,MAAM,EAAE;cACR,MAAM,EAAE;aACR,MAAM,EAAE;gBACR,MAAM,EAAE;cACR,MAAM,EAAE;;AA9pBtB;;;;;;;GAOG;AACH,sCALW,MAAM,GACL,MAAM,EAAE,CAOnB;AAED;;;;;;;;GAQG;AACH,sCALW,MAAM,EAAE,GACP,MAAM,EAAE,CAgBnB;AAgED;;;;;;;;GAQG;AACH,sCALW,MAAM,GACL,MAAM,EAAE,CAqBnB;AAED;;;;;;;GAOG;AACH,sCALW,MAAM,EAAE,GACP,MAAM,EAAE,CAgBnB;AA6eD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AAEH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,4CARW,MAAM,SACN,MAAE,IAAI,GAGL,aAAa,CA4DxB;AA3wBD;;;;;GAKG;AACH,2BAHU,MAAM,CAG+C;AAE/D;;;;;GAKG;AACH,kCAHU,MAAM,CAGgD;AAwLhE,mBAAmB;AACnB,4BADU,MAAM,CACG;AACnB,mBAAmB;AACnB,mCADU,MAAM,CACU;AAC1B,mBAAmB;AACnB,6BADU,MAAM,CACI;AACpB,mBAAmB;AACnB,oCADU,MAAM,CACW;AAC3B,mBAAmB;AACnB,2BADU,MAAM,CACE;AAClB,mBAAmB;AACnB,kCADU,MAAM,CACS;AACzB,mBAAmB;AACnB,4BADU,MAAM,CACG;AACnB,mBAAmB;AACnB,mCADU,MAAM,CACU;AAC1B,mBAAmB;AACnB,2BADU,MAAM,CACE;AAClB,mBAAmB;AACnB,kCADU,MAAM,CACS;AACzB,mBAAmB;AACnB,0BADU,MAAM,CACC;AACjB,mBAAmB;AACnB,iCADU,MAAM,CACQ;AACxB,mBAAmB;AACnB,6BADU,MAAM,CACI;AACpB,mBAAmB;AACnB,oCADU,MAAM,CACW;AAC3B,+BAAkB;AAClB,mBAAmB;AACnB,kCADU,MAAM,CACS"}