apify 2.3.1-beta.4 → 3.0.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -5
- package/package.json +69 -128
- package/build/actor.d.ts +0 -113
- package/build/actor.d.ts.map +0 -1
- package/build/actor.js +0 -582
- package/build/actor.js.map +0 -1
- package/build/apify.d.ts +0 -752
- package/build/apify.d.ts.map +0 -1
- package/build/apify.js +0 -877
- package/build/apify.js.map +0 -1
- package/build/autoscaling/autoscaled_pool.d.ts +0 -384
- package/build/autoscaling/autoscaled_pool.d.ts.map +0 -1
- package/build/autoscaling/autoscaled_pool.js +0 -557
- package/build/autoscaling/autoscaled_pool.js.map +0 -1
- package/build/autoscaling/snapshotter.d.ts +0 -278
- package/build/autoscaling/snapshotter.d.ts.map +0 -1
- package/build/autoscaling/snapshotter.js +0 -447
- package/build/autoscaling/snapshotter.js.map +0 -1
- package/build/autoscaling/system_status.d.ts +0 -224
- package/build/autoscaling/system_status.d.ts.map +0 -1
- package/build/autoscaling/system_status.js +0 -228
- package/build/autoscaling/system_status.js.map +0 -1
- package/build/browser_launchers/browser_launcher.d.ts +0 -154
- package/build/browser_launchers/browser_launcher.d.ts.map +0 -1
- package/build/browser_launchers/browser_launcher.js +0 -160
- package/build/browser_launchers/browser_launcher.js.map +0 -1
- package/build/browser_launchers/browser_plugin.d.ts +0 -23
- package/build/browser_launchers/browser_plugin.d.ts.map +0 -1
- package/build/browser_launchers/browser_plugin.js +0 -25
- package/build/browser_launchers/browser_plugin.js.map +0 -1
- package/build/browser_launchers/playwright_launcher.d.ts +0 -131
- package/build/browser_launchers/playwright_launcher.d.ts.map +0 -1
- package/build/browser_launchers/playwright_launcher.js +0 -150
- package/build/browser_launchers/playwright_launcher.js.map +0 -1
- package/build/browser_launchers/puppeteer_launcher.d.ts +0 -153
- package/build/browser_launchers/puppeteer_launcher.d.ts.map +0 -1
- package/build/browser_launchers/puppeteer_launcher.js +0 -197
- package/build/browser_launchers/puppeteer_launcher.js.map +0 -1
- package/build/cache_container.d.ts +0 -31
- package/build/cache_container.d.ts.map +0 -1
- package/build/cache_container.js +0 -48
- package/build/cache_container.js.map +0 -1
- package/build/configuration.d.ts +0 -226
- package/build/configuration.d.ts.map +0 -1
- package/build/configuration.js +0 -325
- package/build/configuration.js.map +0 -1
- package/build/constants.d.ts +0 -37
- package/build/constants.d.ts.map +0 -1
- package/build/constants.js +0 -41
- package/build/constants.js.map +0 -1
- package/build/crawlers/basic_crawler.d.ts +0 -443
- package/build/crawlers/basic_crawler.d.ts.map +0 -1
- package/build/crawlers/basic_crawler.js +0 -664
- package/build/crawlers/basic_crawler.js.map +0 -1
- package/build/crawlers/browser_crawler.d.ts +0 -512
- package/build/crawlers/browser_crawler.d.ts.map +0 -1
- package/build/crawlers/browser_crawler.js +0 -540
- package/build/crawlers/browser_crawler.js.map +0 -1
- package/build/crawlers/cheerio_crawler.d.ts +0 -931
- package/build/crawlers/cheerio_crawler.d.ts.map +0 -1
- package/build/crawlers/cheerio_crawler.js +0 -913
- package/build/crawlers/cheerio_crawler.js.map +0 -1
- package/build/crawlers/crawler_extension.d.ts +0 -10
- package/build/crawlers/crawler_extension.d.ts.map +0 -1
- package/build/crawlers/crawler_extension.js +0 -19
- package/build/crawlers/crawler_extension.js.map +0 -1
- package/build/crawlers/crawler_utils.d.ts +0 -34
- package/build/crawlers/crawler_utils.d.ts.map +0 -1
- package/build/crawlers/crawler_utils.js +0 -87
- package/build/crawlers/crawler_utils.js.map +0 -1
- package/build/crawlers/playwright_crawler.d.ts +0 -448
- package/build/crawlers/playwright_crawler.d.ts.map +0 -1
- package/build/crawlers/playwright_crawler.js +0 -299
- package/build/crawlers/playwright_crawler.js.map +0 -1
- package/build/crawlers/puppeteer_crawler.d.ts +0 -425
- package/build/crawlers/puppeteer_crawler.d.ts.map +0 -1
- package/build/crawlers/puppeteer_crawler.js +0 -299
- package/build/crawlers/puppeteer_crawler.js.map +0 -1
- package/build/crawlers/statistics.d.ts +0 -185
- package/build/crawlers/statistics.d.ts.map +0 -1
- package/build/crawlers/statistics.js +0 -331
- package/build/crawlers/statistics.js.map +0 -1
- package/build/enqueue_links/click_elements.d.ts +0 -179
- package/build/enqueue_links/click_elements.d.ts.map +0 -1
- package/build/enqueue_links/click_elements.js +0 -434
- package/build/enqueue_links/click_elements.js.map +0 -1
- package/build/enqueue_links/enqueue_links.d.ts +0 -117
- package/build/enqueue_links/enqueue_links.d.ts.map +0 -1
- package/build/enqueue_links/enqueue_links.js +0 -163
- package/build/enqueue_links/enqueue_links.js.map +0 -1
- package/build/enqueue_links/shared.d.ts +0 -42
- package/build/enqueue_links/shared.d.ts.map +0 -1
- package/build/enqueue_links/shared.js +0 -121
- package/build/enqueue_links/shared.js.map +0 -1
- package/build/errors.d.ts +0 -29
- package/build/errors.d.ts.map +0 -1
- package/build/errors.js +0 -38
- package/build/errors.js.map +0 -1
- package/build/events.d.ts +0 -11
- package/build/events.d.ts.map +0 -1
- package/build/events.js +0 -147
- package/build/events.js.map +0 -1
- package/build/index.d.ts +0 -4
- package/build/index.d.ts.map +0 -1
- package/build/index.js +0 -7
- package/build/index.js.map +0 -1
- package/build/main.d.ts +0 -179
- package/build/main.d.ts.map +0 -1
- package/build/main.js +0 -81
- package/build/main.js.map +0 -1
- package/build/playwright_utils.d.ts +0 -9
- package/build/playwright_utils.d.ts.map +0 -1
- package/build/playwright_utils.js +0 -90
- package/build/playwright_utils.js.map +0 -1
- package/build/proxy_configuration.d.ts +0 -411
- package/build/proxy_configuration.d.ts.map +0 -1
- package/build/proxy_configuration.js +0 -517
- package/build/proxy_configuration.js.map +0 -1
- package/build/pseudo_url.d.ts +0 -86
- package/build/pseudo_url.d.ts.map +0 -1
- package/build/pseudo_url.js +0 -153
- package/build/pseudo_url.js.map +0 -1
- package/build/puppeteer_request_interception.d.ts +0 -8
- package/build/puppeteer_request_interception.d.ts.map +0 -1
- package/build/puppeteer_request_interception.js +0 -235
- package/build/puppeteer_request_interception.js.map +0 -1
- package/build/puppeteer_utils.d.ts +0 -250
- package/build/puppeteer_utils.d.ts.map +0 -1
- package/build/puppeteer_utils.js +0 -551
- package/build/puppeteer_utils.js.map +0 -1
- package/build/request.d.ts +0 -180
- package/build/request.d.ts.map +0 -1
- package/build/request.js +0 -261
- package/build/request.js.map +0 -1
- package/build/request_list.d.ts +0 -581
- package/build/request_list.d.ts.map +0 -1
- package/build/request_list.js +0 -826
- package/build/request_list.js.map +0 -1
- package/build/serialization.d.ts +0 -5
- package/build/serialization.d.ts.map +0 -1
- package/build/serialization.js +0 -139
- package/build/serialization.js.map +0 -1
- package/build/session_pool/errors.d.ts +0 -11
- package/build/session_pool/errors.d.ts.map +0 -1
- package/build/session_pool/errors.js +0 -18
- package/build/session_pool/errors.js.map +0 -1
- package/build/session_pool/events.d.ts +0 -5
- package/build/session_pool/events.d.ts.map +0 -1
- package/build/session_pool/events.js +0 -6
- package/build/session_pool/events.js.map +0 -1
- package/build/session_pool/session.d.ts +0 -286
- package/build/session_pool/session.d.ts.map +0 -1
- package/build/session_pool/session.js +0 -355
- package/build/session_pool/session.js.map +0 -1
- package/build/session_pool/session_pool.d.ts +0 -280
- package/build/session_pool/session_pool.d.ts.map +0 -1
- package/build/session_pool/session_pool.js +0 -393
- package/build/session_pool/session_pool.js.map +0 -1
- package/build/session_pool/session_utils.d.ts +0 -4
- package/build/session_pool/session_utils.d.ts.map +0 -1
- package/build/session_pool/session_utils.js +0 -24
- package/build/session_pool/session_utils.js.map +0 -1
- package/build/stealth/hiding_tricks.d.ts +0 -22
- package/build/stealth/hiding_tricks.d.ts.map +0 -1
- package/build/stealth/hiding_tricks.js +0 -308
- package/build/stealth/hiding_tricks.js.map +0 -1
- package/build/stealth/stealth.d.ts +0 -56
- package/build/stealth/stealth.d.ts.map +0 -1
- package/build/stealth/stealth.js +0 -125
- package/build/stealth/stealth.js.map +0 -1
- package/build/storages/dataset.d.ts +0 -288
- package/build/storages/dataset.d.ts.map +0 -1
- package/build/storages/dataset.js +0 -480
- package/build/storages/dataset.js.map +0 -1
- package/build/storages/key_value_store.d.ts +0 -243
- package/build/storages/key_value_store.d.ts.map +0 -1
- package/build/storages/key_value_store.js +0 -462
- package/build/storages/key_value_store.js.map +0 -1
- package/build/storages/request_queue.d.ts +0 -318
- package/build/storages/request_queue.d.ts.map +0 -1
- package/build/storages/request_queue.js +0 -636
- package/build/storages/request_queue.js.map +0 -1
- package/build/storages/storage_manager.d.ts +0 -87
- package/build/storages/storage_manager.d.ts.map +0 -1
- package/build/storages/storage_manager.js +0 -150
- package/build/storages/storage_manager.js.map +0 -1
- package/build/tsconfig.tsbuildinfo +0 -1
- package/build/typedefs.d.ts +0 -146
- package/build/typedefs.d.ts.map +0 -1
- package/build/typedefs.js +0 -88
- package/build/typedefs.js.map +0 -1
- package/build/utils.d.ts +0 -175
- package/build/utils.d.ts.map +0 -1
- package/build/utils.js +0 -731
- package/build/utils.js.map +0 -1
- package/build/utils_log.d.ts +0 -41
- package/build/utils_log.d.ts.map +0 -1
- package/build/utils_log.js +0 -192
- package/build/utils_log.js.map +0 -1
- package/build/utils_request.d.ts +0 -77
- package/build/utils_request.d.ts.map +0 -1
- package/build/utils_request.js +0 -385
- package/build/utils_request.js.map +0 -1
- package/build/utils_social.d.ts +0 -210
- package/build/utils_social.d.ts.map +0 -1
- package/build/utils_social.js +0 -787
- package/build/utils_social.js.map +0 -1
- package/build/validators.d.ts +0 -23
- package/build/validators.d.ts.map +0 -1
- package/build/validators.js +0 -29
- package/build/validators.js.map +0 -1
package/build/utils_request.js
DELETED
|
@@ -1,385 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.requestAsBrowser = void 0;
|
|
4
|
-
const tslib_1 = require("tslib");
|
|
5
|
-
const got_scraping_1 = require("got-scraping");
|
|
6
|
-
const ow_1 = (0, tslib_1.__importDefault)(require("ow"));
|
|
7
|
-
const utils_log_1 = (0, tslib_1.__importDefault)(require("./utils_log"));
|
|
8
|
-
/* eslint-enable no-unused-vars,import/named,import/order */
|
|
9
|
-
/**
|
|
10
|
-
* @typedef {(IncomingMessage & { body: string })} RequestAsBrowserResult
|
|
11
|
-
*/
|
|
12
|
-
/**
|
|
13
|
-
* @typedef RequestAsBrowserOptions
|
|
14
|
-
* @property {string} url
|
|
15
|
-
* URL of the target endpoint. Supports both HTTP and HTTPS schemes.
|
|
16
|
-
* @property {string} [method="GET"]
|
|
17
|
-
* HTTP method.
|
|
18
|
-
* @property {Object<string, string>} [headers]
|
|
19
|
-
* Additional HTTP headers to add. It's only recommended to use this option,
|
|
20
|
-
* with headers that are typically added by websites, such as cookies. Overriding
|
|
21
|
-
* default browser headers will remove the masking this function provides.
|
|
22
|
-
* @property {string} [proxyUrl]
|
|
23
|
-
* An HTTP proxy to be passed down to the HTTP request. Supports proxy authentication with Basic Auth.
|
|
24
|
-
* @property {object} [headerGeneratorOptions]
|
|
25
|
-
* Configuration to be used for generating correct browser headers.
|
|
26
|
-
* See the [`header-generator`](https://github.com/apify/header-generator) library.
|
|
27
|
-
* @property {string} [languageCode=en]
|
|
28
|
-
* Two-letter ISO 639 language code.
|
|
29
|
-
* @property {string} [countryCode=US]
|
|
30
|
-
* Two-letter ISO 3166 country code.
|
|
31
|
-
* @property {boolean} [useMobileVersion]
|
|
32
|
-
* If `true`, the function uses User-Agent of a mobile browser.
|
|
33
|
-
* @property {boolean} [ignoreSslErrors=true]
|
|
34
|
-
* If set to true, SSL/TLS certificate errors will be ignored.
|
|
35
|
-
* @property {boolean} [useInsecureHttpParser=true]
|
|
36
|
-
* Node.js' HTTP parser is stricter than parsers used by web browsers, which prevents scraping of websites
|
|
37
|
-
* whose servers do not comply with HTTP specs, either by accident or due to some anti-scraping protections,
|
|
38
|
-
* causing e.g. the `invalid header value char` error. The `useInsecureHttpParser` option forces
|
|
39
|
-
* the HTTP parser to ignore certain errors which lets you scrape such websites.
|
|
40
|
-
* However, it will also open your application to some security vulnerabilities,
|
|
41
|
-
* although the risk should be negligible as these vulnerabilities mainly relate to server applications, not clients.
|
|
42
|
-
* Learn more in this [blog post](https://snyk.io/blog/node-js-release-fixes-a-critical-http-security-vulnerability/).
|
|
43
|
-
* @property {AbortFunction} [abortFunction]
|
|
44
|
-
* Function accepts `response` object as a single parameter and should return `true` or `false`.
|
|
45
|
-
* If function returns true, request gets aborted.
|
|
46
|
-
* @property {boolean} [useHttp2=true]
|
|
47
|
-
* If set to false, it will prevent use of HTTP2 requests. This is strongly discouraged. Websites
|
|
48
|
-
* expect HTTP2 connections, because browsers use HTTP2 by default. It will automatically downgrade
|
|
49
|
-
* to HTTP/1.1 for websites that do not support HTTP2.
|
|
50
|
-
* @property {object} [sessionToken]
|
|
51
|
-
* A unique object used to generate browser headers. By default, new headers are generated on every call.
|
|
52
|
-
* Set this option to make these headers persistent.
|
|
53
|
-
*/
|
|
54
|
-
/**
|
|
55
|
-
* @callback AbortFunction
|
|
56
|
-
* @param {IncomingMessage} response
|
|
57
|
-
* @returns {boolean}
|
|
58
|
-
*/
|
|
59
|
-
/**
|
|
60
|
-
* **IMPORTANT:** This function uses an insecure version of HTTP parser by default
|
|
61
|
-
* and also ignores SSL/TLS errors. This is very useful in scraping, because it allows bypassing
|
|
62
|
-
* certain anti-scraping walls, but it also exposes some vulnerability. For other than scraping
|
|
63
|
-
* scenarios, please set `useInsecureHttpParser: false` and `ignoreSslErrors: false`.
|
|
64
|
-
*
|
|
65
|
-
* Sends a HTTP request that looks like a request sent by a web browser,
|
|
66
|
-
* fully emulating browser's HTTP headers. It uses HTTP2 by default for Node 12+.
|
|
67
|
-
*
|
|
68
|
-
* This function is useful for web scraping of websites that send the full HTML in the first response.
|
|
69
|
-
* Thanks to this function, the target web server has no simple way to find out the request
|
|
70
|
-
* hasn't been sent by a human's web browser. Using a headless browser for such requests
|
|
71
|
-
* is an order of magnitude more resource-intensive than this function.
|
|
72
|
-
*
|
|
73
|
-
* The function emulates the Chrome and Firefox web browsers. If you want more control
|
|
74
|
-
* over the browsers and their versions, use the `headerGeneratorOptions` property.
|
|
75
|
-
* You can find more info in the readme of the [`header-generator`](https://github.com/apify/header-generator) library.
|
|
76
|
-
*
|
|
77
|
-
* Internally, the function uses the [`got-scraping`](https://github.com/apify/got-scraping) library to perform the request.
|
|
78
|
-
* All `options` not recognized by this function are passed to it so see it for more details.
|
|
79
|
-
*
|
|
80
|
-
* **Example usage:**
|
|
81
|
-
* ```js
|
|
82
|
-
* const Apify = require('apify');
|
|
83
|
-
*
|
|
84
|
-
* const { utils: { requestAsBrowser } } = Apify;
|
|
85
|
-
*
|
|
86
|
-
* ...
|
|
87
|
-
*
|
|
88
|
-
* const response = await requestAsBrowser({ url: 'https://www.example.com/' });
|
|
89
|
-
*
|
|
90
|
-
* const html = response.body;
|
|
91
|
-
* const status = response.statusCode;
|
|
92
|
-
* const contentType = response.headers['content-type'];
|
|
93
|
-
* ```
|
|
94
|
-
*
|
|
95
|
-
* @param {RequestAsBrowserOptions} options All `requestAsBrowser` configuration options.
|
|
96
|
-
*
|
|
97
|
-
* @return {Promise<RequestAsBrowserResult>} The result can be various objects, but it will always be like a
|
|
98
|
-
* [Node.js HTTP response stream](https://nodejs.org/api/http.html#http_class_http_incomingmessage)
|
|
99
|
-
* with a 'body' property for the parsed response body, unless the 'stream' option is used.
|
|
100
|
-
* @memberOf utils
|
|
101
|
-
* @name requestAsBrowser
|
|
102
|
-
* @function
|
|
103
|
-
*/
|
|
104
|
-
const requestAsBrowser = async (options = {}) => {
|
|
105
|
-
logDeprecatedOptions(options);
|
|
106
|
-
(0, ow_1.default)(options, 'RequestAsBrowserOptions', ow_1.default.object.partialShape({
|
|
107
|
-
payload: ow_1.default.optional.any(ow_1.default.string, ow_1.default.buffer),
|
|
108
|
-
proxyUrl: ow_1.default.optional.string.url,
|
|
109
|
-
languageCode: ow_1.default.optional.string.length(2),
|
|
110
|
-
countryCode: ow_1.default.optional.string.length(2),
|
|
111
|
-
useMobileVersion: ow_1.default.optional.boolean,
|
|
112
|
-
abortFunction: ow_1.default.optional.function,
|
|
113
|
-
ignoreSslErrors: ow_1.default.optional.boolean,
|
|
114
|
-
useInsecureHttpParser: ow_1.default.optional.boolean,
|
|
115
|
-
useHttp2: ow_1.default.optional.boolean,
|
|
116
|
-
timeoutSecs: ow_1.default.optional.number,
|
|
117
|
-
throwOnHttpErrors: ow_1.default.optional.boolean,
|
|
118
|
-
headerGeneratorOptions: ow_1.default.optional.object,
|
|
119
|
-
stream: ow_1.default.optional.boolean,
|
|
120
|
-
decodeBody: ow_1.default.optional.boolean,
|
|
121
|
-
sessionToken: ow_1.default.optional.object,
|
|
122
|
-
}));
|
|
123
|
-
(0, ow_1.default)(options, 'RequestAsBrowserOptions', ow_1.default.object.validate((opts) => ({
|
|
124
|
-
validator: areBodyOptionsCompatible(opts),
|
|
125
|
-
message: (label) => `The 'payload', 'body', 'json' and 'form' options of ${label} are mutually exclusive.`,
|
|
126
|
-
})));
|
|
127
|
-
// We created the `got-scraping` package which replaced underlying @apify/http-request.
|
|
128
|
-
// At the same time, we want users to be able to use requestAsBrowser without breaking changes.
|
|
129
|
-
// So we do a lot of property mapping here, to make sure that everything works as expected.
|
|
130
|
-
// TODO Update this with SDK v3 and use `got-scraping` API directly.
|
|
131
|
-
const { payload, // alias for body to allow direct passing of our Request objects
|
|
132
|
-
json, headerGeneratorOptions, languageCode = 'en', countryCode = 'US', useMobileVersion = false, abortFunction = () => false, ignoreSslErrors = true, useInsecureHttpParser = true, useHttp2 = true, timeoutSecs = 30, throwOnHttpErrors = false, stream = false, decodeBody = true, forceUrlEncoding, // TODO remove in v3. It's not used, but we keep it here to prevent validation errors in got.
|
|
133
|
-
...gotParams } = options;
|
|
134
|
-
const gotScrapingOptions = {
|
|
135
|
-
insecureHTTPParser: useInsecureHttpParser,
|
|
136
|
-
http2: useHttp2,
|
|
137
|
-
timeout: { request: timeoutSecs * 1000 },
|
|
138
|
-
throwHttpErrors: throwOnHttpErrors,
|
|
139
|
-
isStream: stream,
|
|
140
|
-
decompress: decodeBody,
|
|
141
|
-
// We overwrite the above arguments because we want to give the official
|
|
142
|
-
// got interface a priority over our requestAsBrowser one.
|
|
143
|
-
// E.g. { isStream: false, stream: true } should produce { isStream: false }.
|
|
144
|
-
...gotParams,
|
|
145
|
-
https: {
|
|
146
|
-
...gotParams.https,
|
|
147
|
-
rejectUnauthorized: !ignoreSslErrors,
|
|
148
|
-
},
|
|
149
|
-
};
|
|
150
|
-
// Order is important
|
|
151
|
-
normalizePayloadOption(payload, gotScrapingOptions);
|
|
152
|
-
normalizeJsonOption(json, gotScrapingOptions);
|
|
153
|
-
ensureCorrectHttp2Headers(gotScrapingOptions);
|
|
154
|
-
maybeAddAbortHook(abortFunction, gotScrapingOptions);
|
|
155
|
-
if (!headerGeneratorOptions) {
|
|
156
|
-
// Values that respect old requestAsBrowser user-agents and settings
|
|
157
|
-
gotScrapingOptions.headerGeneratorOptions = {
|
|
158
|
-
devices: useMobileVersion ? ['mobile'] : ['desktop'],
|
|
159
|
-
locales: [`${languageCode}-${countryCode}`],
|
|
160
|
-
};
|
|
161
|
-
}
|
|
162
|
-
else {
|
|
163
|
-
gotScrapingOptions.headerGeneratorOptions = headerGeneratorOptions;
|
|
164
|
-
}
|
|
165
|
-
// Return the promise directly
|
|
166
|
-
if (!gotScrapingOptions.isStream) {
|
|
167
|
-
return (0, got_scraping_1.gotScraping)(gotScrapingOptions);
|
|
168
|
-
}
|
|
169
|
-
// abortFunction must be handled separately for streams :(
|
|
170
|
-
const duplexStream = (0, got_scraping_1.gotScraping)(gotScrapingOptions);
|
|
171
|
-
ensureRequestIsDispatched(duplexStream, gotScrapingOptions);
|
|
172
|
-
return new Promise((resolve, reject) => {
|
|
173
|
-
duplexStream
|
|
174
|
-
.on('error', reject)
|
|
175
|
-
.on('response', (res) => {
|
|
176
|
-
try {
|
|
177
|
-
const shouldAbort = abortFunction(res);
|
|
178
|
-
if (shouldAbort) {
|
|
179
|
-
const err = new Error(`Request for ${gotScrapingOptions.url} aborted due to abortFunction.`);
|
|
180
|
-
duplexStream.destroy(err);
|
|
181
|
-
return reject(err);
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
catch (e) {
|
|
185
|
-
duplexStream.destroy(e);
|
|
186
|
-
return reject(e);
|
|
187
|
-
}
|
|
188
|
-
addResponsePropertiesToStream(duplexStream, res);
|
|
189
|
-
return resolve(duplexStream);
|
|
190
|
-
});
|
|
191
|
-
});
|
|
192
|
-
};
|
|
193
|
-
exports.requestAsBrowser = requestAsBrowser;
|
|
194
|
-
/**
|
|
195
|
-
* `got` has a `body` option and 2 helpers, `json` and `form`, to provide specific bodies.
|
|
196
|
-
* Those options are mutually exclusive. `requestAsBrowser` also supports `payload` as
|
|
197
|
-
* an alias of `body`. It must be exclusive as well.
|
|
198
|
-
* @param {RequestAsBrowserOptions} requestAsBrowserOptions
|
|
199
|
-
* @return {boolean}
|
|
200
|
-
* @private
|
|
201
|
-
* @ignore
|
|
202
|
-
*/
|
|
203
|
-
function areBodyOptionsCompatible(requestAsBrowserOptions) {
|
|
204
|
-
const { payload, json, body, form } = requestAsBrowserOptions;
|
|
205
|
-
// A boolean is old requestAsBrowser interface and not a real "body"
|
|
206
|
-
// See the normalizeJsonOption function.
|
|
207
|
-
const jsonBody = typeof json === 'boolean' ? undefined : json;
|
|
208
|
-
const possibleOpts = [payload, jsonBody, body, form];
|
|
209
|
-
const usedOpts = possibleOpts.filter((opt) => opt !== undefined);
|
|
210
|
-
// Only a single option out of the 4 can be used.
|
|
211
|
-
return usedOpts.length <= 1;
|
|
212
|
-
}
|
|
213
|
-
/**
|
|
214
|
-
* got-scraping uses 'body', but we also support 'payload' from {@link Request}.
|
|
215
|
-
* @param {string|Buffer} payload
|
|
216
|
-
* @param {GotScrapingOptions} gotScrapingOptions
|
|
217
|
-
* @ignore
|
|
218
|
-
* @private
|
|
219
|
-
*/
|
|
220
|
-
function normalizePayloadOption(payload, gotScrapingOptions) {
|
|
221
|
-
if (payload !== undefined)
|
|
222
|
-
gotScrapingOptions.body = payload;
|
|
223
|
-
}
|
|
224
|
-
/**
|
|
225
|
-
* `json` is a boolean flag in `requestAsBrowser`, but a `body` alias that
|
|
226
|
-
* adds a 'content-type: application/json' header in got. To stay backwards
|
|
227
|
-
* compatible we need to figure out which option the user provided.
|
|
228
|
-
* @param {*} json
|
|
229
|
-
* @param {GotScrapingOptions} gotScrapingOptions
|
|
230
|
-
* @ignore
|
|
231
|
-
* @private
|
|
232
|
-
*/
|
|
233
|
-
function normalizeJsonOption(json, gotScrapingOptions) {
|
|
234
|
-
// If it's a boolean, then it's the old requestAsBrowser API.
|
|
235
|
-
// If it's true, it means the user expects a JSON response.
|
|
236
|
-
const deprecationMessage = `"options.json" of type: Boolean is deprecated.`
|
|
237
|
-
+ 'If you expect a JSON response, use "options.responseType = \'json\'"'
|
|
238
|
-
+ 'Use "options.json" with a plain object to provide a JSON body.';
|
|
239
|
-
if (json === true) {
|
|
240
|
-
utils_log_1.default.deprecated(deprecationMessage);
|
|
241
|
-
gotScrapingOptions.responseType = 'json';
|
|
242
|
-
gotScrapingOptions.https.ciphers = undefined;
|
|
243
|
-
}
|
|
244
|
-
else if (json === false) {
|
|
245
|
-
utils_log_1.default.deprecated(deprecationMessage);
|
|
246
|
-
// Do nothing, it means the user expects something else than JSON.
|
|
247
|
-
}
|
|
248
|
-
else {
|
|
249
|
-
// If it's something else, we let `got` handle it as a request body.
|
|
250
|
-
gotScrapingOptions.json = json;
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
/**
|
|
254
|
-
* 'connection' and 'host' headers are forbidden when using HTTP2. We delete
|
|
255
|
-
* them from user-provided headers because we switched the default from HTTP1 to 2.
|
|
256
|
-
* @param {GotScrapingOptions} gotScrapingOptions
|
|
257
|
-
* @ignore
|
|
258
|
-
* @private
|
|
259
|
-
*/
|
|
260
|
-
function ensureCorrectHttp2Headers(gotScrapingOptions) {
|
|
261
|
-
if (gotScrapingOptions.http2 && gotScrapingOptions.headers) {
|
|
262
|
-
gotScrapingOptions.headers = { ...gotScrapingOptions.headers };
|
|
263
|
-
// eslint-disable-next-line no-restricted-syntax, guard-for-in
|
|
264
|
-
for (const key in gotScrapingOptions.headers) {
|
|
265
|
-
const lkey = key.toLowerCase();
|
|
266
|
-
if (lkey === 'connection' || lkey === 'host') {
|
|
267
|
-
delete gotScrapingOptions.headers[key];
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
}
|
|
272
|
-
/**
|
|
273
|
-
* `abortFunction` is an old `requestAsBrowser` interface for aborting requests before
|
|
274
|
-
* the response body is read to save bandwidth.
|
|
275
|
-
* @param {function} abortFunction
|
|
276
|
-
* @param {GotScrapingOptions} gotScrapingOptions
|
|
277
|
-
* @ignore
|
|
278
|
-
* @private
|
|
279
|
-
*/
|
|
280
|
-
function maybeAddAbortHook(abortFunction, gotScrapingOptions) {
|
|
281
|
-
// Stream aborting must be handled on the response object because `got`
|
|
282
|
-
// does not execute `afterResponse` hooks for streams :(
|
|
283
|
-
if (gotScrapingOptions.isStream)
|
|
284
|
-
return;
|
|
285
|
-
const abortHook = (response) => {
|
|
286
|
-
const shouldAbort = abortFunction(response);
|
|
287
|
-
if (shouldAbort) {
|
|
288
|
-
throw new Error(`Request for ${gotScrapingOptions.url} aborted due to abortFunction.`);
|
|
289
|
-
}
|
|
290
|
-
return response;
|
|
291
|
-
};
|
|
292
|
-
const { hooks } = gotScrapingOptions;
|
|
293
|
-
const fixedHooks = {
|
|
294
|
-
...hooks,
|
|
295
|
-
afterResponse: [
|
|
296
|
-
...((hooks && hooks.afterResponse) || []),
|
|
297
|
-
abortHook,
|
|
298
|
-
],
|
|
299
|
-
};
|
|
300
|
-
gotScrapingOptions.hooks = fixedHooks;
|
|
301
|
-
}
|
|
302
|
-
/**
|
|
303
|
-
* 'got' will not dispatch non-GET request stream until a body is provided.
|
|
304
|
-
* @param {stream.Duplex} duplexStream
|
|
305
|
-
* @param {GotScrapingOptions} gotScrapingOptions
|
|
306
|
-
*/
|
|
307
|
-
function ensureRequestIsDispatched(duplexStream, gotScrapingOptions) {
|
|
308
|
-
const { method } = gotScrapingOptions;
|
|
309
|
-
const bodyIsEmpty = gotScrapingOptions.body === undefined
|
|
310
|
-
&& gotScrapingOptions.json === undefined
|
|
311
|
-
&& gotScrapingOptions.form === undefined;
|
|
312
|
-
if (method && method.toLowerCase() !== 'get' && bodyIsEmpty) {
|
|
313
|
-
duplexStream.end();
|
|
314
|
-
}
|
|
315
|
-
}
|
|
316
|
-
/**
|
|
317
|
-
* @param {RequestAsBrowserOptions} options
|
|
318
|
-
* @ignore
|
|
319
|
-
* @private
|
|
320
|
-
*/
|
|
321
|
-
function logDeprecatedOptions(options) {
|
|
322
|
-
const deprecatedOptions = [
|
|
323
|
-
// 'json' is handled in the JSON handler, because it has a conflict of types
|
|
324
|
-
['languageCode', 'headerGeneratorOptions.locales'],
|
|
325
|
-
['countryCode', 'headerGeneratorOptions.locales'],
|
|
326
|
-
['useMobileVersion', 'headerGeneratorOptions.devices'],
|
|
327
|
-
['payload', 'body'],
|
|
328
|
-
['useHttp2', 'http2'],
|
|
329
|
-
['stream', 'isStream'],
|
|
330
|
-
['decodeBody', 'decompress'],
|
|
331
|
-
['throwOnHttpErrors', 'throwHttpErrors'],
|
|
332
|
-
['timeoutSecs', 'timeout.request'],
|
|
333
|
-
['ignoreSslErrors', 'https.rejectUnauthorized'],
|
|
334
|
-
['abortFunction'], // custom message below
|
|
335
|
-
];
|
|
336
|
-
for (const [deprecatedOption, newOption] of deprecatedOptions) {
|
|
337
|
-
if (options[deprecatedOption] !== undefined) {
|
|
338
|
-
// This will log only for the first property thanks to log.deprecated logging only once.
|
|
339
|
-
const initialMessage = 'requestAsBrowser internal implementation has been replaced with the got-scraping module. '
|
|
340
|
-
+ 'To make the switch without breaking changes, we mapped all existing options to the got-scraping options. '
|
|
341
|
-
+ 'This mapping will be removed in SDK v3 and we advise you to update your code using the hints below: ';
|
|
342
|
-
utils_log_1.default.deprecated(initialMessage);
|
|
343
|
-
if (deprecatedOption === 'abortFunction') {
|
|
344
|
-
utils_log_1.default.deprecated(`"options.${deprecatedOption}" is deprecated.`
|
|
345
|
-
+ 'Use a request cancellation process appropriate for your request type.'
|
|
346
|
-
+ 'Either a Stream or a Promise. See Got documentation for more info: https://github.com/sindresorhus/got');
|
|
347
|
-
}
|
|
348
|
-
else {
|
|
349
|
-
utils_log_1.default.deprecated(`"options.${deprecatedOption}" is deprecated. Use "options.${newOption}" instead.`);
|
|
350
|
-
}
|
|
351
|
-
}
|
|
352
|
-
}
|
|
353
|
-
}
|
|
354
|
-
/**
|
|
355
|
-
* The stream object returned from got does not have the below properties.
|
|
356
|
-
* At the same time, you can't read data directly from the response stream,
|
|
357
|
-
* because they won't get emitted unless you also read from the primary
|
|
358
|
-
* got stream. To be able to work with only one stream, we move the expected props
|
|
359
|
-
* from the response stream to the got stream.
|
|
360
|
-
* @param {GotStream} stream
|
|
361
|
-
* @param {http.IncomingMessage} response
|
|
362
|
-
* @return {GotStream}
|
|
363
|
-
* @ignore
|
|
364
|
-
* @private
|
|
365
|
-
*/
|
|
366
|
-
function addResponsePropertiesToStream(stream, response) {
|
|
367
|
-
const properties = [
|
|
368
|
-
'statusCode', 'statusMessage', 'headers',
|
|
369
|
-
'complete', 'httpVersion', 'rawHeaders',
|
|
370
|
-
'rawTrailers', 'trailers', 'url',
|
|
371
|
-
'request',
|
|
372
|
-
];
|
|
373
|
-
response.on('end', () => {
|
|
374
|
-
Object.assign(stream.rawTrailers, response.rawTrailers);
|
|
375
|
-
Object.assign(stream.trailers, response.trailers);
|
|
376
|
-
stream.complete = response.complete;
|
|
377
|
-
});
|
|
378
|
-
for (const prop of properties) {
|
|
379
|
-
if (!(prop in stream)) {
|
|
380
|
-
stream[prop] = response[prop];
|
|
381
|
-
}
|
|
382
|
-
}
|
|
383
|
-
return stream;
|
|
384
|
-
}
|
|
385
|
-
//# sourceMappingURL=utils_request.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"utils_request.js","sourceRoot":"","sources":["../src/utils_request.js"],"names":[],"mappings":";;;;AAAA,+CAA2C;AAC3C,yDAAoB;AACpB,yEAA8B;AAI9B,4DAA4D;AAE5D;;GAEG;AAEH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAyCG;AAEH;;;;GAIG;AAEH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4CG;AACI,MAAM,gBAAgB,GAAG,KAAK,EAAE,OAAO,GAAG,EAAE,EAAE,EAAE;IACnD,oBAAoB,CAAC,OAAO,CAAC,CAAC;IAC9B,IAAA,YAAE,EAAC,OAAO,EAAE,yBAAyB,EAAE,YAAE,CAAC,MAAM,CAAC,YAAY,CAAC;QAC1D,OAAO,EAAE,YAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,YAAE,CAAC,MAAM,EAAE,YAAE,CAAC,MAAM,CAAC;QAC9C,QAAQ,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,GAAG;QAChC,YAAY,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;QAC1C,WAAW,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;QACzC,gBAAgB,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;QACrC,aAAa,EAAE,YAAE,CAAC,QAAQ,CAAC,QAAQ;QACnC,eAAe,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;QACpC,qBAAqB,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;QAC1C,QAAQ,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;QAC7B,WAAW,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QAC/B,iBAAiB,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;QACtC,sBAAsB,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QAC1C,MAAM,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;QAC3B,UAAU,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;QAC/B,YAAY,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;KACnC,CAAC,CAAC,CAAC;IAEJ,IAAA,YAAE,EAAC,OAAO,EAAE,yBAAyB,EAAE,YAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACjE,SAAS,EAAE,wBAAwB,CAAC,IAAI,CAAC;QACzC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,uDAAuD,KAAK,0BAA0B;KAC7G,CAAC,CAAC,CAAC,CAAC;IAEL,uFAAuF;IACvF,+FAA+F;IAC/F,2FAA2F;IAC3F,oEAAoE;IACpE,MAAM,EACF,OAAO,EAAE,gEAAgE;IACzE,IAAI,EACJ,sBAAsB,EACtB,YAAY,GAAG,IAAI,EACnB,WAAW,GAAG,IAAI,EAClB,gBAAgB,GAAG,KAAK,EACxB,aAAa,GAAG,GAAG,EAAE,CAAC,KAAK,EAC3B,eAAe,GAAG,IAAI,EACtB,qBAAqB,GAAG,IAAI,EAC5B,QAAQ,GAAG,IAAI,EACf,WAAW,GAAG,EAAE,EAChB,iBAAiB,GAAG,KAAK,EACzB,MAAM,GAAG,KAAK,EACd,UAAU,GAAG,IAAI,EACjB,gBAAgB,EAAE,6FAA6F;IAC/G,GAAG,SAAS,EACf,GAAG,OAAO,CAAC;IAEZ,MAAM,kBAAkB,GAAG;QACvB,kBAAkB,EAAE,qBAAqB;QACzC,KAAK,EAAE,QAAQ;QACf,OAAO,EAAE,EAAE,OAAO,EAAE,WAAW,GAAG,IAAI,EAAE;QACxC,eAAe,EAAE,iBAAiB;QAClC,QAAQ,EAAE,MAAM;QAChB,UAAU,EAAE,UAAU;QACtB,wEAAwE;QACxE,0DAA0D;QAC1D,6EAA6E;QAC7E,GAAG,SAAS;QACZ,KAAK,EAAE;YACH,GAAG,SAAS,CAAC,KAAK;YAClB,kBAAkB,EAAE,CAAC,eAAe;SACvC;KACJ,CAAC;IAEF,qBAAqB;IACrB,sBAAsB,CAAC,OAAO,EAAE,kBAAkB,CAAC,CAAC;IACpD,mBAAmB,CAAC,IAAI,EAAE,kBAAkB,CAAC,CAAC;IAC9C,yBAAyB,CAAC,kBAAkB,CAAC,CAAC;IAC9C,iBAAiB,CAAC,aAAa,EAAE,kBAAkB,CAAC,CAAC;IACrD,IAAI,CAAC,sBAAsB,EAAE;QACzB,oEAAoE;QACpE,kBAAkB,CAAC,sBAAsB,GAAG;YACxC,OAAO,EAAE,gBAAgB,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YACpD,OAAO,EAAE,CAAC,GAAG,YAAY,IAAI,WAAW,EAAE,CAAC;SAC9C,CAAC;KACL;SAAM;QACH,kBAAkB,CAAC,sBAAsB,GAAG,sBAAsB,CAAC;KACtE;IAED,8BAA8B;IAC9B,IAAI,CAAC,kBAAkB,CAAC,QAAQ,EAAE;QAC9B,OAAO,IAAA,0BAAW,EAAC,kBAAkB,CAAC,CAAC;KAC1C;IAED,0DAA0D;IAC1D,MAAM,YAAY,GAAG,IAAA,0BAAW,EAAC,kBAAkB,CAAC,CAAC;IAErD,yBAAyB,CAAC,YAAY,EAAE,kBAAkB,CAAC,CAAC;IAE5D,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACnC,YAAY;aACP,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC;aACnB,EAAE,CAAC,UAAU,EAAE,CAAC,GAAG,EAAE,EAAE;YACpB,IAAI;gBACA,MAAM,WAAW,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;gBACvC,IAAI,WAAW,EAAE;oBACb,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,eAAe,kBAAkB,CAAC,GAAG,gCAAgC,CAAC,CAAC;oBAC7F,YAAY,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;oBAC1B,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC;iBACtB;aACJ;YAAC,OAAO,CAAC,EAAE;gBACR,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;gBACxB,OAAO,MAAM,CAAC,CAAC,CAAC,CAAC;aACpB;YAED,6BAA6B,CAAC,YAAY,EAAE,GAAG,CAAC,CAAC;YAEjD,OAAO,OAAO,CAAC,YAAY,CAAC,CAAC;QACjC,CAAC,CAAC,CAAC;IACX,CAAC,CAAC,CAAC;AACP,CAAC,CAAC;AA/GW,QAAA,gBAAgB,oBA+G3B;AAEF;;;;;;;;GAQG;AACH,SAAS,wBAAwB,CAAC,uBAAuB;IACrD,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,uBAAuB,CAAC;IAC9D,oEAAoE;IACpE,wCAAwC;IACxC,MAAM,QAAQ,GAAG,OAAO,IAAI,KAAK,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC;IAE9D,MAAM,YAAY,GAAG,CAAC,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;IACrD,MAAM,QAAQ,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,KAAK,SAAS,CAAC,CAAC;IAEjE,iDAAiD;IACjD,OAAO,QAAQ,CAAC,MAAM,IAAI,CAAC,CAAC;AAChC,CAAC;AAED;;;;;;GAMG;AACH,SAAS,sBAAsB,CAAC,OAAO,EAAE,kBAAkB;IACvD,IAAI,OAAO,KAAK,SAAS;QAAE,kBAAkB,CAAC,IAAI,GAAG,OAAO,CAAC;AACjE,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,mBAAmB,CAAC,IAAI,EAAE,kBAAkB;IACjD,6DAA6D;IAC7D,2DAA2D;IAC3D,MAAM,kBAAkB,GAAG,gDAAgD;UACrE,sEAAsE;UACtE,gEAAgE,CAAC;IACvE,IAAI,IAAI,KAAK,IAAI,EAAE;QACf,mBAAG,CAAC,UAAU,CAAC,kBAAkB,CAAC,CAAC;QACnC,kBAAkB,CAAC,YAAY,GAAG,MAAM,CAAC;QACzC,kBAAkB,CAAC,KAAK,CAAC,OAAO,GAAG,SAAS,CAAC;KAChD;SAAM,IAAI,IAAI,KAAK,KAAK,EAAE;QACvB,mBAAG,CAAC,UAAU,CAAC,kBAAkB,CAAC,CAAC;QACnC,kEAAkE;KACrE;SAAM;QACH,oEAAoE;QACpE,kBAAkB,CAAC,IAAI,GAAG,IAAI,CAAC;KAClC;AACL,CAAC;AAED;;;;;;GAMG;AACH,SAAS,yBAAyB,CAAC,kBAAkB;IACjD,IAAI,kBAAkB,CAAC,KAAK,IAAI,kBAAkB,CAAC,OAAO,EAAE;QACxD,kBAAkB,CAAC,OAAO,GAAG,EAAE,GAAG,kBAAkB,CAAC,OAAO,EAAE,CAAC;QAE/D,8DAA8D;QAC9D,KAAK,MAAM,GAAG,IAAI,kBAAkB,CAAC,OAAO,EAAE;YAC1C,MAAM,IAAI,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;YAE/B,IAAI,IAAI,KAAK,YAAY,IAAI,IAAI,KAAK,MAAM,EAAE;gBAC1C,OAAO,kBAAkB,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;aAC1C;SACJ;KACJ;AACL,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,iBAAiB,CAAC,aAAa,EAAE,kBAAkB;IACxD,uEAAuE;IACvE,wDAAwD;IACxD,IAAI,kBAAkB,CAAC,QAAQ;QAAE,OAAO;IAExC,MAAM,SAAS,GAAG,CAAC,QAAQ,EAAE,EAAE;QAC3B,MAAM,WAAW,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;QAC5C,IAAI,WAAW,EAAE;YACb,MAAM,IAAI,KAAK,CAAC,eAAe,kBAAkB,CAAC,GAAG,gCAAgC,CAAC,CAAC;SAC1F;QACD,OAAO,QAAQ,CAAC;IACpB,CAAC,CAAC;IAEF,MAAM,EAAE,KAAK,EAAE,GAAG,kBAAkB,CAAC;IACrC,MAAM,UAAU,GAAG;QACf,GAAG,KAAK;QACR,aAAa,EAAE;YACX,GAAG,CAAC,CAAC,KAAK,IAAI,KAAK,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC;YACzC,SAAS;SACZ;KACJ,CAAC;IAEF,kBAAkB,CAAC,KAAK,GAAG,UAAU,CAAC;AAC1C,CAAC;AAED;;;;GAIG;AACH,SAAS,yBAAyB,CAAC,YAAY,EAAE,kBAAkB;IAC/D,MAAM,EAAE,MAAM,EAAE,GAAG,kBAAkB,CAAC;IACtC,MAAM,WAAW,GAAG,kBAAkB,CAAC,IAAI,KAAK,SAAS;WAClD,kBAAkB,CAAC,IAAI,KAAK,SAAS;WACrC,kBAAkB,CAAC,IAAI,KAAK,SAAS,CAAC;IAE7C,IAAI,MAAM,IAAI,MAAM,CAAC,WAAW,EAAE,KAAK,KAAK,IAAI,WAAW,EAAE;QACzD,YAAY,CAAC,GAAG,EAAE,CAAC;KACtB;AACL,CAAC;AAED;;;;GAIG;AACH,SAAS,oBAAoB,CAAC,OAAO;IACjC,MAAM,iBAAiB,GAAG;QACtB,4EAA4E;QAC5E,CAAC,cAAc,EAAE,gCAAgC,CAAC;QAClD,CAAC,aAAa,EAAE,gCAAgC,CAAC;QACjD,CAAC,kBAAkB,EAAE,gCAAgC,CAAC;QACtD,CAAC,SAAS,EAAE,MAAM,CAAC;QACnB,CAAC,UAAU,EAAE,OAAO,CAAC;QACrB,CAAC,QAAQ,EAAE,UAAU,CAAC;QACtB,CAAC,YAAY,EAAE,YAAY,CAAC;QAC5B,CAAC,mBAAmB,EAAE,iBAAiB,CAAC;QACxC,CAAC,aAAa,EAAE,iBAAiB,CAAC;QAClC,CAAC,iBAAiB,EAAE,0BAA0B,CAAC;QAC/C,CAAC,eAAe,CAAC,EAAE,uBAAuB;KAC7C,CAAC;IAEF,KAAK,MAAM,CAAC,gBAAgB,EAAE,SAAS,CAAC,IAAI,iBAAiB,EAAE;QAC3D,IAAI,OAAO,CAAC,gBAAgB,CAAC,KAAK,SAAS,EAAE;YACzC,wFAAwF;YACxF,MAAM,cAAc,GAAG,2FAA2F;kBAC5G,2GAA2G;kBAC3G,sGAAsG,CAAC;YAC7G,mBAAG,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC;YAE/B,IAAI,gBAAgB,KAAK,eAAe,EAAE;gBACtC,mBAAG,CAAC,UAAU,CAAC,YAAY,gBAAgB,kBAAkB;sBACvD,uEAAuE;sBACvE,wGAAwG,CAAC,CAAC;aACnH;iBAAM;gBACH,mBAAG,CAAC,UAAU,CAAC,YAAY,gBAAgB,iCAAiC,SAAS,YAAY,CAAC,CAAC;aACtG;SACJ;KACJ;AACL,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAS,6BAA6B,CAAC,MAAM,EAAE,QAAQ;IACnD,MAAM,UAAU,GAAG;QACf,YAAY,EAAE,eAAe,EAAE,SAAS;QACxC,UAAU,EAAE,aAAa,EAAE,YAAY;QACvC,aAAa,EAAE,UAAU,EAAE,KAAK;QAChC,SAAS;KACZ,CAAC;IAEF,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;QACpB,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,WAAW,CAAC,CAAC;QACxD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,QAAQ,EAAE,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAElD,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE;QAC3B,IAAI,CAAC,CAAC,IAAI,IAAI,MAAM,CAAC,EAAE;YACnB,MAAM,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;SACjC;KACJ;IAED,OAAO,MAAM,CAAC;AAClB,CAAC"}
|
package/build/utils_social.d.ts
DELETED
|
@@ -1,210 +0,0 @@
|
|
|
1
|
-
export namespace socialUtils {
|
|
2
|
-
export { emailsFromText };
|
|
3
|
-
export { emailsFromUrls };
|
|
4
|
-
export { phonesFromText };
|
|
5
|
-
export { phonesFromUrls };
|
|
6
|
-
export { parseHandlesFromHtml };
|
|
7
|
-
export { EMAIL_REGEX };
|
|
8
|
-
export { EMAIL_REGEX_GLOBAL };
|
|
9
|
-
export { LINKEDIN_REGEX };
|
|
10
|
-
export { LINKEDIN_REGEX_GLOBAL };
|
|
11
|
-
export { INSTAGRAM_REGEX };
|
|
12
|
-
export { INSTAGRAM_REGEX_GLOBAL };
|
|
13
|
-
export { TWITTER_REGEX };
|
|
14
|
-
export { TWITTER_REGEX_GLOBAL };
|
|
15
|
-
export { FACEBOOK_REGEX };
|
|
16
|
-
export { FACEBOOK_REGEX_GLOBAL };
|
|
17
|
-
export { YOUTUBE_REGEX };
|
|
18
|
-
export { YOUTUBE_REGEX_GLOBAL };
|
|
19
|
-
export { TIKTOK_REGEX };
|
|
20
|
-
export { TIKTOK_REGEX_GLOBAL };
|
|
21
|
-
export { PINTEREST_REGEX };
|
|
22
|
-
export { PINTEREST_REGEX_GLOBAL };
|
|
23
|
-
export { DISCORD_REGEX };
|
|
24
|
-
export { DISCORD_REGEX_GLOBAL };
|
|
25
|
-
}
|
|
26
|
-
/**
|
|
27
|
-
* Representation of social handles parsed from a HTML page.
|
|
28
|
-
*
|
|
29
|
-
* The object has the following structure:
|
|
30
|
-
*
|
|
31
|
-
* ```
|
|
32
|
-
* {
|
|
33
|
-
* emails: String[],
|
|
34
|
-
* phones: String[],
|
|
35
|
-
* phonesUncertain: String[],
|
|
36
|
-
* linkedIns: String[],
|
|
37
|
-
* twitters: String[],
|
|
38
|
-
* instagrams: String[],
|
|
39
|
-
* facebooks: String[],
|
|
40
|
-
* youtubes: String[],
|
|
41
|
-
* tiktoks: String[],
|
|
42
|
-
* pinterests: String[],
|
|
43
|
-
* discords: String[],
|
|
44
|
-
* }
|
|
45
|
-
* ```
|
|
46
|
-
*/
|
|
47
|
-
export type SocialHandles = {
|
|
48
|
-
emails: string[];
|
|
49
|
-
phones: string[];
|
|
50
|
-
phonesUncertain: string[];
|
|
51
|
-
linkedIns: string[];
|
|
52
|
-
twitters: string[];
|
|
53
|
-
instagrams: string[];
|
|
54
|
-
facebooks: string[];
|
|
55
|
-
youtubes: string[];
|
|
56
|
-
tiktoks: string[];
|
|
57
|
-
pinterests: string[];
|
|
58
|
-
discords: string[];
|
|
59
|
-
};
|
|
60
|
-
/**
|
|
61
|
-
* The function extracts email addresses from a plain text.
|
|
62
|
-
* Note that the function preserves the order of emails and keep duplicates.
|
|
63
|
-
* @param {string} text Text to search in.
|
|
64
|
-
* @return {string[]} Array of emails addresses found.
|
|
65
|
-
* If no emails are found, the function returns an empty array.
|
|
66
|
-
* @memberOf social
|
|
67
|
-
*/
|
|
68
|
-
declare function emailsFromText(text: string): string[];
|
|
69
|
-
/**
|
|
70
|
-
* The function extracts email addresses from a list of URLs.
|
|
71
|
-
* Basically it looks for all `mailto:` URLs and returns valid email addresses from them.
|
|
72
|
-
* Note that the function preserves the order of emails and keep duplicates.
|
|
73
|
-
* @param {string[]} urls Array of URLs.
|
|
74
|
-
* @return {string[]} Array of emails addresses found.
|
|
75
|
-
* If no emails are found, the function returns an empty array.
|
|
76
|
-
* @memberOf social
|
|
77
|
-
*/
|
|
78
|
-
declare function emailsFromUrls(urls: string[]): string[];
|
|
79
|
-
/**
|
|
80
|
-
* The function attempts to extract phone numbers from a text. Please note that
|
|
81
|
-
* the results might not be accurate, since phone numbers appear in a large variety of formats and conventions.
|
|
82
|
-
* If you encounter some problems, please [file an issue](https://github.com/apify/apify-js/issues).
|
|
83
|
-
* @param {string} text Text to search the phone numbers in.
|
|
84
|
-
* @return {string[]} Array of phone numbers found.
|
|
85
|
-
* If no phone numbers are found, the function returns an empty array.
|
|
86
|
-
* @memberOf social
|
|
87
|
-
*/
|
|
88
|
-
declare function phonesFromText(text: string): string[];
|
|
89
|
-
/**
|
|
90
|
-
* Finds phone number links in an array of URLs and extracts the phone numbers from them.
|
|
91
|
-
* Note that the phone number links look like `tel://123456789`, `tel:/123456789` or `tel:123456789`.
|
|
92
|
-
* @param {string[]} urls Array of URLs.
|
|
93
|
-
* @return {string[]} Array of phone numbers found.
|
|
94
|
-
* If no phone numbers are found, the function returns an empty array.
|
|
95
|
-
* @memberOf social
|
|
96
|
-
*/
|
|
97
|
-
declare function phonesFromUrls(urls: string[]): string[];
|
|
98
|
-
/**
|
|
99
|
-
* Representation of social handles parsed from a HTML page.
|
|
100
|
-
*
|
|
101
|
-
* The object has the following structure:
|
|
102
|
-
*
|
|
103
|
-
* ```
|
|
104
|
-
* {
|
|
105
|
-
* emails: String[],
|
|
106
|
-
* phones: String[],
|
|
107
|
-
* phonesUncertain: String[],
|
|
108
|
-
* linkedIns: String[],
|
|
109
|
-
* twitters: String[],
|
|
110
|
-
* instagrams: String[],
|
|
111
|
-
* facebooks: String[],
|
|
112
|
-
* youtubes: String[],
|
|
113
|
-
* tiktoks: String[],
|
|
114
|
-
* pinterests: String[],
|
|
115
|
-
* discords: String[],
|
|
116
|
-
* }
|
|
117
|
-
* ```
|
|
118
|
-
* @typedef SocialHandles
|
|
119
|
-
* @property {string[]} emails
|
|
120
|
-
* @property {string[]} phones
|
|
121
|
-
* @property {string[]} phonesUncertain
|
|
122
|
-
* @property {string[]} linkedIns
|
|
123
|
-
* @property {string[]} twitters
|
|
124
|
-
* @property {string[]} instagrams
|
|
125
|
-
* @property {string[]} facebooks
|
|
126
|
-
* @property {string[]} youtubes
|
|
127
|
-
* @property {string[]} tiktoks
|
|
128
|
-
* @property {string[]} pinterests
|
|
129
|
-
* @property {string[]} discords
|
|
130
|
-
*/
|
|
131
|
-
/**
|
|
132
|
-
* The function attempts to extract emails, phone numbers and social profile URLs from a HTML document,
|
|
133
|
-
* specifically LinkedIn, Twitter, Instagram and Facebook profile URLs.
|
|
134
|
-
* The function removes duplicates from the resulting arrays and sorts the items alphabetically.
|
|
135
|
-
*
|
|
136
|
-
* Note that the `phones` field contains phone numbers extracted from the special phone links
|
|
137
|
-
* such as `[call us](tel:+1234556789)` (see {@link social#phonesFromUrls})
|
|
138
|
-
* and potentially other sources with high certainty, while `phonesUncertain` contains phone numbers
|
|
139
|
-
* extracted from the plain text, which might be very inaccurate.
|
|
140
|
-
*
|
|
141
|
-
* **Example usage:**
|
|
142
|
-
* ```javascript
|
|
143
|
-
* const Apify = require('apify');
|
|
144
|
-
*
|
|
145
|
-
* const browser = await Apify.launchPuppeteer();
|
|
146
|
-
* const page = await browser.newPage();
|
|
147
|
-
* await page.goto('http://www.example.com');
|
|
148
|
-
* const html = await page.content();
|
|
149
|
-
*
|
|
150
|
-
* const result = Apify.utils.social.parseHandlesFromHtml(html);
|
|
151
|
-
* console.log('Social handles:');
|
|
152
|
-
* console.dir(result);
|
|
153
|
-
* ```
|
|
154
|
-
*
|
|
155
|
-
* @param {string} html HTML text
|
|
156
|
-
* @param {*|null} [data] Optional object which will receive the `text` and `$` properties
|
|
157
|
-
* that contain text content of the HTML and `cheerio` object, respectively. This is an optimization
|
|
158
|
-
* so that the caller doesn't need to parse the HTML document again, if needed.
|
|
159
|
-
* @return {SocialHandles} An object with the social handles.
|
|
160
|
-
*
|
|
161
|
-
* @memberOf social
|
|
162
|
-
*/
|
|
163
|
-
declare function parseHandlesFromHtml(html: string, data?: any | null): SocialHandles;
|
|
164
|
-
/**
|
|
165
|
-
* Regular expression to exactly match a single email address.
|
|
166
|
-
* It has the following form: `/^...$/i`.
|
|
167
|
-
* @type {RegExp}
|
|
168
|
-
* @memberOf social
|
|
169
|
-
*/
|
|
170
|
-
declare const EMAIL_REGEX: RegExp;
|
|
171
|
-
/**
|
|
172
|
-
* Regular expression to find multiple email addresses in a text.
|
|
173
|
-
* It has the following form: `/.../ig`.
|
|
174
|
-
* @type {RegExp}
|
|
175
|
-
* @memberOf social
|
|
176
|
-
*/
|
|
177
|
-
declare const EMAIL_REGEX_GLOBAL: RegExp;
|
|
178
|
-
/** @type RegExp */
|
|
179
|
-
declare let LINKEDIN_REGEX: RegExp;
|
|
180
|
-
/** @type RegExp */
|
|
181
|
-
declare let LINKEDIN_REGEX_GLOBAL: RegExp;
|
|
182
|
-
/** @type RegExp */
|
|
183
|
-
declare let INSTAGRAM_REGEX: RegExp;
|
|
184
|
-
/** @type RegExp */
|
|
185
|
-
declare let INSTAGRAM_REGEX_GLOBAL: RegExp;
|
|
186
|
-
/** @type RegExp */
|
|
187
|
-
declare let TWITTER_REGEX: RegExp;
|
|
188
|
-
/** @type RegExp */
|
|
189
|
-
declare let TWITTER_REGEX_GLOBAL: RegExp;
|
|
190
|
-
/** @type RegExp */
|
|
191
|
-
declare let FACEBOOK_REGEX: RegExp;
|
|
192
|
-
/** @type RegExp */
|
|
193
|
-
declare let FACEBOOK_REGEX_GLOBAL: RegExp;
|
|
194
|
-
/** @type RegExp */
|
|
195
|
-
declare let YOUTUBE_REGEX: RegExp;
|
|
196
|
-
/** @type RegExp */
|
|
197
|
-
declare let YOUTUBE_REGEX_GLOBAL: RegExp;
|
|
198
|
-
/** @type RegExp */
|
|
199
|
-
declare let TIKTOK_REGEX: RegExp;
|
|
200
|
-
/** @type RegExp */
|
|
201
|
-
declare let TIKTOK_REGEX_GLOBAL: RegExp;
|
|
202
|
-
/** @type RegExp */
|
|
203
|
-
declare let PINTEREST_REGEX: RegExp;
|
|
204
|
-
/** @type RegExp */
|
|
205
|
-
declare let PINTEREST_REGEX_GLOBAL: RegExp;
|
|
206
|
-
declare let DISCORD_REGEX: any;
|
|
207
|
-
/** @type RegExp */
|
|
208
|
-
declare let DISCORD_REGEX_GLOBAL: RegExp;
|
|
209
|
-
export {};
|
|
210
|
-
//# sourceMappingURL=utils_social.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"utils_social.d.ts","sourceRoot":"","sources":["../src/utils_social.js"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAgrBc,MAAM,EAAE;YACR,MAAM,EAAE;qBACR,MAAM,EAAE;eACR,MAAM,EAAE;cACR,MAAM,EAAE;gBACR,MAAM,EAAE;eACR,MAAM,EAAE;cACR,MAAM,EAAE;aACR,MAAM,EAAE;gBACR,MAAM,EAAE;cACR,MAAM,EAAE;;AA9pBtB;;;;;;;GAOG;AACH,sCALW,MAAM,GACL,MAAM,EAAE,CAOnB;AAED;;;;;;;;GAQG;AACH,sCALW,MAAM,EAAE,GACP,MAAM,EAAE,CAgBnB;AAgED;;;;;;;;GAQG;AACH,sCALW,MAAM,GACL,MAAM,EAAE,CAqBnB;AAED;;;;;;;GAOG;AACH,sCALW,MAAM,EAAE,GACP,MAAM,EAAE,CAgBnB;AA6eD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AAEH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,4CARW,MAAM,SACN,MAAE,IAAI,GAGL,aAAa,CA4DxB;AA3wBD;;;;;GAKG;AACH,2BAHU,MAAM,CAG+C;AAE/D;;;;;GAKG;AACH,kCAHU,MAAM,CAGgD;AAwLhE,mBAAmB;AACnB,4BADU,MAAM,CACG;AACnB,mBAAmB;AACnB,mCADU,MAAM,CACU;AAC1B,mBAAmB;AACnB,6BADU,MAAM,CACI;AACpB,mBAAmB;AACnB,oCADU,MAAM,CACW;AAC3B,mBAAmB;AACnB,2BADU,MAAM,CACE;AAClB,mBAAmB;AACnB,kCADU,MAAM,CACS;AACzB,mBAAmB;AACnB,4BADU,MAAM,CACG;AACnB,mBAAmB;AACnB,mCADU,MAAM,CACU;AAC1B,mBAAmB;AACnB,2BADU,MAAM,CACE;AAClB,mBAAmB;AACnB,kCADU,MAAM,CACS;AACzB,mBAAmB;AACnB,0BADU,MAAM,CACC;AACjB,mBAAmB;AACnB,iCADU,MAAM,CACQ;AACxB,mBAAmB;AACnB,6BADU,MAAM,CACI;AACpB,mBAAmB;AACnB,oCADU,MAAM,CACW;AAC3B,+BAAkB;AAClB,mBAAmB;AACnB,kCADU,MAAM,CACS"}
|