apify 2.3.1-beta.4 → 3.0.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -5
- package/package.json +69 -128
- package/build/actor.d.ts +0 -113
- package/build/actor.d.ts.map +0 -1
- package/build/actor.js +0 -582
- package/build/actor.js.map +0 -1
- package/build/apify.d.ts +0 -752
- package/build/apify.d.ts.map +0 -1
- package/build/apify.js +0 -877
- package/build/apify.js.map +0 -1
- package/build/autoscaling/autoscaled_pool.d.ts +0 -384
- package/build/autoscaling/autoscaled_pool.d.ts.map +0 -1
- package/build/autoscaling/autoscaled_pool.js +0 -557
- package/build/autoscaling/autoscaled_pool.js.map +0 -1
- package/build/autoscaling/snapshotter.d.ts +0 -278
- package/build/autoscaling/snapshotter.d.ts.map +0 -1
- package/build/autoscaling/snapshotter.js +0 -447
- package/build/autoscaling/snapshotter.js.map +0 -1
- package/build/autoscaling/system_status.d.ts +0 -224
- package/build/autoscaling/system_status.d.ts.map +0 -1
- package/build/autoscaling/system_status.js +0 -228
- package/build/autoscaling/system_status.js.map +0 -1
- package/build/browser_launchers/browser_launcher.d.ts +0 -154
- package/build/browser_launchers/browser_launcher.d.ts.map +0 -1
- package/build/browser_launchers/browser_launcher.js +0 -160
- package/build/browser_launchers/browser_launcher.js.map +0 -1
- package/build/browser_launchers/browser_plugin.d.ts +0 -23
- package/build/browser_launchers/browser_plugin.d.ts.map +0 -1
- package/build/browser_launchers/browser_plugin.js +0 -25
- package/build/browser_launchers/browser_plugin.js.map +0 -1
- package/build/browser_launchers/playwright_launcher.d.ts +0 -131
- package/build/browser_launchers/playwright_launcher.d.ts.map +0 -1
- package/build/browser_launchers/playwright_launcher.js +0 -150
- package/build/browser_launchers/playwright_launcher.js.map +0 -1
- package/build/browser_launchers/puppeteer_launcher.d.ts +0 -153
- package/build/browser_launchers/puppeteer_launcher.d.ts.map +0 -1
- package/build/browser_launchers/puppeteer_launcher.js +0 -197
- package/build/browser_launchers/puppeteer_launcher.js.map +0 -1
- package/build/cache_container.d.ts +0 -31
- package/build/cache_container.d.ts.map +0 -1
- package/build/cache_container.js +0 -48
- package/build/cache_container.js.map +0 -1
- package/build/configuration.d.ts +0 -226
- package/build/configuration.d.ts.map +0 -1
- package/build/configuration.js +0 -325
- package/build/configuration.js.map +0 -1
- package/build/constants.d.ts +0 -37
- package/build/constants.d.ts.map +0 -1
- package/build/constants.js +0 -41
- package/build/constants.js.map +0 -1
- package/build/crawlers/basic_crawler.d.ts +0 -443
- package/build/crawlers/basic_crawler.d.ts.map +0 -1
- package/build/crawlers/basic_crawler.js +0 -664
- package/build/crawlers/basic_crawler.js.map +0 -1
- package/build/crawlers/browser_crawler.d.ts +0 -512
- package/build/crawlers/browser_crawler.d.ts.map +0 -1
- package/build/crawlers/browser_crawler.js +0 -540
- package/build/crawlers/browser_crawler.js.map +0 -1
- package/build/crawlers/cheerio_crawler.d.ts +0 -931
- package/build/crawlers/cheerio_crawler.d.ts.map +0 -1
- package/build/crawlers/cheerio_crawler.js +0 -913
- package/build/crawlers/cheerio_crawler.js.map +0 -1
- package/build/crawlers/crawler_extension.d.ts +0 -10
- package/build/crawlers/crawler_extension.d.ts.map +0 -1
- package/build/crawlers/crawler_extension.js +0 -19
- package/build/crawlers/crawler_extension.js.map +0 -1
- package/build/crawlers/crawler_utils.d.ts +0 -34
- package/build/crawlers/crawler_utils.d.ts.map +0 -1
- package/build/crawlers/crawler_utils.js +0 -87
- package/build/crawlers/crawler_utils.js.map +0 -1
- package/build/crawlers/playwright_crawler.d.ts +0 -448
- package/build/crawlers/playwright_crawler.d.ts.map +0 -1
- package/build/crawlers/playwright_crawler.js +0 -299
- package/build/crawlers/playwright_crawler.js.map +0 -1
- package/build/crawlers/puppeteer_crawler.d.ts +0 -425
- package/build/crawlers/puppeteer_crawler.d.ts.map +0 -1
- package/build/crawlers/puppeteer_crawler.js +0 -299
- package/build/crawlers/puppeteer_crawler.js.map +0 -1
- package/build/crawlers/statistics.d.ts +0 -185
- package/build/crawlers/statistics.d.ts.map +0 -1
- package/build/crawlers/statistics.js +0 -331
- package/build/crawlers/statistics.js.map +0 -1
- package/build/enqueue_links/click_elements.d.ts +0 -179
- package/build/enqueue_links/click_elements.d.ts.map +0 -1
- package/build/enqueue_links/click_elements.js +0 -434
- package/build/enqueue_links/click_elements.js.map +0 -1
- package/build/enqueue_links/enqueue_links.d.ts +0 -117
- package/build/enqueue_links/enqueue_links.d.ts.map +0 -1
- package/build/enqueue_links/enqueue_links.js +0 -163
- package/build/enqueue_links/enqueue_links.js.map +0 -1
- package/build/enqueue_links/shared.d.ts +0 -42
- package/build/enqueue_links/shared.d.ts.map +0 -1
- package/build/enqueue_links/shared.js +0 -121
- package/build/enqueue_links/shared.js.map +0 -1
- package/build/errors.d.ts +0 -29
- package/build/errors.d.ts.map +0 -1
- package/build/errors.js +0 -38
- package/build/errors.js.map +0 -1
- package/build/events.d.ts +0 -11
- package/build/events.d.ts.map +0 -1
- package/build/events.js +0 -147
- package/build/events.js.map +0 -1
- package/build/index.d.ts +0 -4
- package/build/index.d.ts.map +0 -1
- package/build/index.js +0 -7
- package/build/index.js.map +0 -1
- package/build/main.d.ts +0 -179
- package/build/main.d.ts.map +0 -1
- package/build/main.js +0 -81
- package/build/main.js.map +0 -1
- package/build/playwright_utils.d.ts +0 -9
- package/build/playwright_utils.d.ts.map +0 -1
- package/build/playwright_utils.js +0 -90
- package/build/playwright_utils.js.map +0 -1
- package/build/proxy_configuration.d.ts +0 -411
- package/build/proxy_configuration.d.ts.map +0 -1
- package/build/proxy_configuration.js +0 -517
- package/build/proxy_configuration.js.map +0 -1
- package/build/pseudo_url.d.ts +0 -86
- package/build/pseudo_url.d.ts.map +0 -1
- package/build/pseudo_url.js +0 -153
- package/build/pseudo_url.js.map +0 -1
- package/build/puppeteer_request_interception.d.ts +0 -8
- package/build/puppeteer_request_interception.d.ts.map +0 -1
- package/build/puppeteer_request_interception.js +0 -235
- package/build/puppeteer_request_interception.js.map +0 -1
- package/build/puppeteer_utils.d.ts +0 -250
- package/build/puppeteer_utils.d.ts.map +0 -1
- package/build/puppeteer_utils.js +0 -551
- package/build/puppeteer_utils.js.map +0 -1
- package/build/request.d.ts +0 -180
- package/build/request.d.ts.map +0 -1
- package/build/request.js +0 -261
- package/build/request.js.map +0 -1
- package/build/request_list.d.ts +0 -581
- package/build/request_list.d.ts.map +0 -1
- package/build/request_list.js +0 -826
- package/build/request_list.js.map +0 -1
- package/build/serialization.d.ts +0 -5
- package/build/serialization.d.ts.map +0 -1
- package/build/serialization.js +0 -139
- package/build/serialization.js.map +0 -1
- package/build/session_pool/errors.d.ts +0 -11
- package/build/session_pool/errors.d.ts.map +0 -1
- package/build/session_pool/errors.js +0 -18
- package/build/session_pool/errors.js.map +0 -1
- package/build/session_pool/events.d.ts +0 -5
- package/build/session_pool/events.d.ts.map +0 -1
- package/build/session_pool/events.js +0 -6
- package/build/session_pool/events.js.map +0 -1
- package/build/session_pool/session.d.ts +0 -286
- package/build/session_pool/session.d.ts.map +0 -1
- package/build/session_pool/session.js +0 -355
- package/build/session_pool/session.js.map +0 -1
- package/build/session_pool/session_pool.d.ts +0 -280
- package/build/session_pool/session_pool.d.ts.map +0 -1
- package/build/session_pool/session_pool.js +0 -393
- package/build/session_pool/session_pool.js.map +0 -1
- package/build/session_pool/session_utils.d.ts +0 -4
- package/build/session_pool/session_utils.d.ts.map +0 -1
- package/build/session_pool/session_utils.js +0 -24
- package/build/session_pool/session_utils.js.map +0 -1
- package/build/stealth/hiding_tricks.d.ts +0 -22
- package/build/stealth/hiding_tricks.d.ts.map +0 -1
- package/build/stealth/hiding_tricks.js +0 -308
- package/build/stealth/hiding_tricks.js.map +0 -1
- package/build/stealth/stealth.d.ts +0 -56
- package/build/stealth/stealth.d.ts.map +0 -1
- package/build/stealth/stealth.js +0 -125
- package/build/stealth/stealth.js.map +0 -1
- package/build/storages/dataset.d.ts +0 -288
- package/build/storages/dataset.d.ts.map +0 -1
- package/build/storages/dataset.js +0 -480
- package/build/storages/dataset.js.map +0 -1
- package/build/storages/key_value_store.d.ts +0 -243
- package/build/storages/key_value_store.d.ts.map +0 -1
- package/build/storages/key_value_store.js +0 -462
- package/build/storages/key_value_store.js.map +0 -1
- package/build/storages/request_queue.d.ts +0 -318
- package/build/storages/request_queue.d.ts.map +0 -1
- package/build/storages/request_queue.js +0 -636
- package/build/storages/request_queue.js.map +0 -1
- package/build/storages/storage_manager.d.ts +0 -87
- package/build/storages/storage_manager.d.ts.map +0 -1
- package/build/storages/storage_manager.js +0 -150
- package/build/storages/storage_manager.js.map +0 -1
- package/build/tsconfig.tsbuildinfo +0 -1
- package/build/typedefs.d.ts +0 -146
- package/build/typedefs.d.ts.map +0 -1
- package/build/typedefs.js +0 -88
- package/build/typedefs.js.map +0 -1
- package/build/utils.d.ts +0 -175
- package/build/utils.d.ts.map +0 -1
- package/build/utils.js +0 -731
- package/build/utils.js.map +0 -1
- package/build/utils_log.d.ts +0 -41
- package/build/utils_log.d.ts.map +0 -1
- package/build/utils_log.js +0 -192
- package/build/utils_log.js.map +0 -1
- package/build/utils_request.d.ts +0 -77
- package/build/utils_request.d.ts.map +0 -1
- package/build/utils_request.js +0 -385
- package/build/utils_request.js.map +0 -1
- package/build/utils_social.d.ts +0 -210
- package/build/utils_social.d.ts.map +0 -1
- package/build/utils_social.js +0 -787
- package/build/utils_social.js.map +0 -1
- package/build/validators.d.ts +0 -23
- package/build/validators.d.ts.map +0 -1
- package/build/validators.js +0 -29
- package/build/validators.js.map +0 -1
|
@@ -1,517 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.createProxyConfiguration = exports.ProxyConfiguration = void 0;
|
|
4
|
-
const tslib_1 = require("tslib");
|
|
5
|
-
const consts_1 = require("@apify/consts");
|
|
6
|
-
const ow_1 = (0, tslib_1.__importDefault)(require("ow"));
|
|
7
|
-
const constants_1 = require("./constants");
|
|
8
|
-
const utils_1 = require("./utils");
|
|
9
|
-
const utils_request_1 = require("./utils_request");
|
|
10
|
-
const utils_log_1 = (0, tslib_1.__importDefault)(require("./utils_log"));
|
|
11
|
-
const configuration_1 = require("./configuration");
|
|
12
|
-
// CONSTANTS
|
|
13
|
-
const PROTOCOL = 'http';
|
|
14
|
-
// https://docs.apify.com/proxy/datacenter-proxy#username-parameters
|
|
15
|
-
const MAX_SESSION_ID_LENGTH = 50;
|
|
16
|
-
const CHECK_ACCESS_REQUEST_TIMEOUT_MILLIS = 4000;
|
|
17
|
-
const CHECK_ACCESS_MAX_ATTEMPTS = 2;
|
|
18
|
-
/**
|
|
19
|
-
* @callback ProxyConfigurationFunction
|
|
20
|
-
* @param {string|number} sessionId
|
|
21
|
-
* @returns {string}
|
|
22
|
-
*/
|
|
23
|
-
/**
|
|
24
|
-
* @typedef ProxyConfigurationOptions
|
|
25
|
-
* @property {string} [password]
|
|
26
|
-
* User's password for the proxy. By default, it is taken from the `APIFY_PROXY_PASSWORD`
|
|
27
|
-
* environment variable, which is automatically set by the system when running the actors.
|
|
28
|
-
* @property {string[]} [groups]
|
|
29
|
-
* An array of proxy groups to be used by the [Apify Proxy](https://docs.apify.com/proxy).
|
|
30
|
-
* If not provided, the proxy will select the groups automatically.
|
|
31
|
-
* @property {string} [countryCode]
|
|
32
|
-
* If set and relevant proxies are available in your Apify account, all proxied requests will
|
|
33
|
-
* use IP addresses that are geolocated to the specified country. For example `GB` for IPs
|
|
34
|
-
* from Great Britain. Note that online services often have their own rules for handling
|
|
35
|
-
* geolocation and thus the country selection is a best attempt at geolocation, rather than
|
|
36
|
-
* a guaranteed hit. This parameter is optional, by default, each proxied request is assigned
|
|
37
|
-
* an IP address from a random country. The country code needs to be a two letter ISO country code. See the
|
|
38
|
-
* [full list of available country codes](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2#Officially_assigned_code_elements).
|
|
39
|
-
* This parameter is optional, by default, the proxy uses all available proxy servers from all countries.
|
|
40
|
-
* on the Apify cloud, or when using the [Apify CLI](https://github.com/apify/apify-cli).
|
|
41
|
-
* @property {string[]} [apifyProxyGroups]
|
|
42
|
-
* Same option as `groups` which can be used to
|
|
43
|
-
* configurate the proxy by UI input schema. You should use the `groups` option in your crawler code.
|
|
44
|
-
* @property {string} [apifyProxyCountry]
|
|
45
|
-
* Same option as `countryCode` which can be used to
|
|
46
|
-
* configurate the proxy by UI input schema. You should use the `countryCode` option in your crawler code.
|
|
47
|
-
* @property {string[]} [proxyUrls]
|
|
48
|
-
* An array of custom proxy URLs to be rotated.
|
|
49
|
-
* Custom proxies are not compatible with Apify Proxy and an attempt to use both
|
|
50
|
-
* configuration options will cause an error to be thrown on initialize.
|
|
51
|
-
* @property {ProxyConfigurationFunction} [newUrlFunction]
|
|
52
|
-
* Custom function that allows you to generate the new proxy URL dynamically. It gets the `sessionId` as a parameter
|
|
53
|
-
* and should always return stringified proxy URL.
|
|
54
|
-
* This function is used to generate the URL when {@link ProxyConfiguration.newUrl} or {@link ProxyConfiguration.newProxyInfo} is called.
|
|
55
|
-
*/
|
|
56
|
-
/**
|
|
57
|
-
* The main purpose of the ProxyInfo object is to provide information
|
|
58
|
-
* about the current proxy connection used by the crawler for the request.
|
|
59
|
-
* Outside of crawlers, you can get this object by calling {@link ProxyConfiguration.newProxyInfo}.
|
|
60
|
-
*
|
|
61
|
-
* **Example usage:**
|
|
62
|
-
*
|
|
63
|
-
* ```javascript
|
|
64
|
-
*
|
|
65
|
-
* const proxyConfiguration = await Apify.createProxyConfiguration({
|
|
66
|
-
* groups: ['GROUP1', 'GROUP2'] // List of Apify Proxy groups
|
|
67
|
-
* countryCode: 'US',
|
|
68
|
-
* });
|
|
69
|
-
*
|
|
70
|
-
* // Getting proxyInfo object by calling class method directly
|
|
71
|
-
* const proxyInfo = proxyConfiguration.newProxyInfo();
|
|
72
|
-
*
|
|
73
|
-
* // In crawler
|
|
74
|
-
* const crawler = new Apify.CheerioCrawler({
|
|
75
|
-
* // ...
|
|
76
|
-
* proxyConfiguration,
|
|
77
|
-
* handlePageFunction: ({ proxyInfo }) => {
|
|
78
|
-
* // Getting used proxy URL
|
|
79
|
-
* const proxyUrl = proxyInfo.url;
|
|
80
|
-
*
|
|
81
|
-
* // Getting ID of used Session
|
|
82
|
-
* const sessionIdentifier = proxyInfo.sessionId;
|
|
83
|
-
* }
|
|
84
|
-
* })
|
|
85
|
-
*
|
|
86
|
-
* ```
|
|
87
|
-
* @typedef ProxyInfo
|
|
88
|
-
* @property {string} [sessionId]
|
|
89
|
-
* The identifier of used {@link Session}, if used.
|
|
90
|
-
* @property {string} url
|
|
91
|
-
* The URL of the proxy.
|
|
92
|
-
* @property {string[]} groups
|
|
93
|
-
* An array of proxy groups to be used by the [Apify Proxy](https://docs.apify.com/proxy).
|
|
94
|
-
* If not provided, the proxy will select the groups automatically.
|
|
95
|
-
* @property {string} [countryCode]
|
|
96
|
-
* If set and relevant proxies are available in your Apify account, all proxied requests will
|
|
97
|
-
* use IP addresses that are geolocated to the specified country. For example `GB` for IPs
|
|
98
|
-
* from Great Britain. Note that online services often have their own rules for handling
|
|
99
|
-
* geolocation and thus the country selection is a best attempt at geolocation, rather than
|
|
100
|
-
* a guaranteed hit. This parameter is optional, by default, each proxied request is assigned
|
|
101
|
-
* an IP address from a random country. The country code needs to be a two letter ISO country code. See the
|
|
102
|
-
* [full list of available country codes](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2#Officially_assigned_code_elements).
|
|
103
|
-
* This parameter is optional, by default, the proxy uses all available proxy servers from all countries.
|
|
104
|
-
* @property {string} password
|
|
105
|
-
* User's password for the proxy. By default, it is taken from the `APIFY_PROXY_PASSWORD`
|
|
106
|
-
* environment variable, which is automatically set by the system when running the actors
|
|
107
|
-
* on the Apify cloud, or when using the [Apify CLI](https://github.com/apify/apify-cli).
|
|
108
|
-
* @property {string} hostname
|
|
109
|
-
* Hostname of your proxy.
|
|
110
|
-
* @property {string} port
|
|
111
|
-
* Proxy port.
|
|
112
|
-
*/
|
|
113
|
-
/**
|
|
114
|
-
* Configures connection to a proxy server with the provided options. Proxy servers are used to prevent target websites from blocking
|
|
115
|
-
* your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
|
|
116
|
-
* them to use the selected proxies for all connections. You can get information about the currently used proxy by inspecting
|
|
117
|
-
* the {@link ProxyInfo} property in your crawler's page function. There, you can inspect the proxy's URL and other attributes.
|
|
118
|
-
*
|
|
119
|
-
* The proxy servers are managed by [Apify Proxy](https://docs.apify.com/proxy). To be able to use Apify Proxy,
|
|
120
|
-
* you need an Apify account and access to the selected proxies. If you provide no configuration option,
|
|
121
|
-
* the proxies will be managed automatically using a smart algorithm.
|
|
122
|
-
*
|
|
123
|
-
* If you want to use your own proxies, use the {@link ProxyConfigurationOptions.proxyUrls} option. Your list of proxy URLs will
|
|
124
|
-
* be rotated by the configuration if this option is provided.
|
|
125
|
-
*
|
|
126
|
-
* **Example usage:**
|
|
127
|
-
*
|
|
128
|
-
* ```javascript
|
|
129
|
-
*
|
|
130
|
-
* const proxyConfiguration = await Apify.createProxyConfiguration({
|
|
131
|
-
* groups: ['GROUP1', 'GROUP2'] // List of Apify Proxy groups
|
|
132
|
-
* countryCode: 'US',
|
|
133
|
-
* });
|
|
134
|
-
*
|
|
135
|
-
* const crawler = new Apify.CheerioCrawler({
|
|
136
|
-
* // ...
|
|
137
|
-
* proxyConfiguration,
|
|
138
|
-
* handlePageFunction: ({ proxyInfo }) => {
|
|
139
|
-
* const usedProxyUrl = proxyInfo.url; // Getting the proxy URL
|
|
140
|
-
* }
|
|
141
|
-
* })
|
|
142
|
-
*
|
|
143
|
-
* ```
|
|
144
|
-
* @hideconstructor
|
|
145
|
-
*/
|
|
146
|
-
class ProxyConfiguration {
|
|
147
|
-
/**
|
|
148
|
-
* Configuration of proxy.
|
|
149
|
-
*
|
|
150
|
-
* @param {ProxyConfigurationOptions} [options] All `ProxyConfiguration` options.
|
|
151
|
-
* @param {Configuration} [config]
|
|
152
|
-
*/
|
|
153
|
-
constructor(options = {}, config = configuration_1.Configuration.getGlobalConfig()) {
|
|
154
|
-
(0, ow_1.default)(options, ow_1.default.object.exactShape({
|
|
155
|
-
groups: ow_1.default.optional.array.ofType(ow_1.default.string.matches(consts_1.APIFY_PROXY_VALUE_REGEX)),
|
|
156
|
-
apifyProxyGroups: ow_1.default.optional.array.ofType(ow_1.default.string.matches(consts_1.APIFY_PROXY_VALUE_REGEX)),
|
|
157
|
-
countryCode: ow_1.default.optional.string.matches(constants_1.COUNTRY_CODE_REGEX),
|
|
158
|
-
apifyProxyCountry: ow_1.default.optional.string.matches(constants_1.COUNTRY_CODE_REGEX),
|
|
159
|
-
proxyUrls: ow_1.default.optional.array.nonEmpty.ofType(ow_1.default.string.url),
|
|
160
|
-
password: ow_1.default.optional.string,
|
|
161
|
-
newUrlFunction: ow_1.default.optional.function,
|
|
162
|
-
}));
|
|
163
|
-
const { groups = [], apifyProxyGroups = [], countryCode, apifyProxyCountry, proxyUrls, password = config.get('proxyPassword'), newUrlFunction, } = options;
|
|
164
|
-
const groupsToUse = groups.length ? groups : apifyProxyGroups;
|
|
165
|
-
const countryCodeToUse = countryCode || apifyProxyCountry;
|
|
166
|
-
const hostname = config.get('proxyHostname');
|
|
167
|
-
const port = config.get('proxyPort');
|
|
168
|
-
// Validation
|
|
169
|
-
if (((proxyUrls || newUrlFunction) && ((groupsToUse.length) || countryCodeToUse))) {
|
|
170
|
-
this._throwCannotCombineCustomWithApify();
|
|
171
|
-
}
|
|
172
|
-
if (proxyUrls && newUrlFunction)
|
|
173
|
-
this._throwCannotCombineCustomMethods();
|
|
174
|
-
this.groups = groupsToUse;
|
|
175
|
-
this.countryCode = countryCodeToUse;
|
|
176
|
-
this.password = password;
|
|
177
|
-
this.hostname = hostname;
|
|
178
|
-
this.port = port;
|
|
179
|
-
this.nextCustomUrlIndex = 0;
|
|
180
|
-
this.proxyUrls = proxyUrls;
|
|
181
|
-
this.usedProxyUrls = new Map();
|
|
182
|
-
this.newUrlFunction = newUrlFunction;
|
|
183
|
-
this.usesApifyProxy = !this.proxyUrls && !this.newUrlFunction;
|
|
184
|
-
this.log = utils_log_1.default.child({ prefix: 'ProxyConfiguration' });
|
|
185
|
-
this.config = config;
|
|
186
|
-
this.isManInTheMiddle = false;
|
|
187
|
-
if (proxyUrls && proxyUrls.some((url) => url.includes('apify.com'))) {
|
|
188
|
-
this.log.warning('Some Apify proxy features may work incorrectly. Please consider setting up Apify properties instead of `proxyUrls`.\n'
|
|
189
|
-
+ 'See https://sdk.apify.com/docs/guides/proxy-management#apify-proxy-configuration');
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
/**
|
|
193
|
-
* Loads proxy password if token is provided and checks access to Apify Proxy and provided proxy groups
|
|
194
|
-
* if Apify Proxy configuration is used.
|
|
195
|
-
* Also checks if country has access to Apify Proxy groups if the country code is provided.
|
|
196
|
-
*
|
|
197
|
-
* You should use the {@link Apify.createProxyConfiguration} function to create a pre-initialized
|
|
198
|
-
* `ProxyConfiguration` instance instead of calling this manually.
|
|
199
|
-
*
|
|
200
|
-
* @returns {Promise<void>}
|
|
201
|
-
*/
|
|
202
|
-
async initialize() {
|
|
203
|
-
if (this.usesApifyProxy) {
|
|
204
|
-
await this._setPasswordIfToken();
|
|
205
|
-
await this._checkAccess();
|
|
206
|
-
}
|
|
207
|
-
}
|
|
208
|
-
/**
|
|
209
|
-
* This function creates a new {@link ProxyInfo} info object.
|
|
210
|
-
* It is used by CheerioCrawler and PuppeteerCrawler to generate proxy URLs and also to allow the user to inspect
|
|
211
|
-
* the currently used proxy via the handlePageFunction parameter: proxyInfo.
|
|
212
|
-
* Use it if you want to work with a rich representation of a proxy URL.
|
|
213
|
-
* If you need the URL string only, use {@link ProxyConfiguration.newUrl}.
|
|
214
|
-
* @param {string|number} [sessionId]
|
|
215
|
-
* Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
|
|
216
|
-
* you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
|
|
217
|
-
* When the provided sessionId is a number, it's converted to a string. Property sessionId of
|
|
218
|
-
* {@link ProxyInfo} is always returned as a type string.
|
|
219
|
-
*
|
|
220
|
-
* All the HTTP requests going through the proxy with the same session identifier
|
|
221
|
-
* will use the same target proxy server (i.e. the same IP address).
|
|
222
|
-
* The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
|
|
223
|
-
* @return {ProxyInfo} represents information about used proxy and its configuration.
|
|
224
|
-
*/
|
|
225
|
-
newProxyInfo(sessionId) {
|
|
226
|
-
if (typeof sessionId === 'number')
|
|
227
|
-
sessionId = `${sessionId}`;
|
|
228
|
-
(0, ow_1.default)(sessionId, ow_1.default.optional.string.maxLength(MAX_SESSION_ID_LENGTH).matches(consts_1.APIFY_PROXY_VALUE_REGEX));
|
|
229
|
-
const url = this.newUrl(sessionId);
|
|
230
|
-
const { groups, countryCode, password, port, hostname } = this.usesApifyProxy ? this : new URL(url);
|
|
231
|
-
return {
|
|
232
|
-
sessionId,
|
|
233
|
-
url,
|
|
234
|
-
groups,
|
|
235
|
-
countryCode,
|
|
236
|
-
password,
|
|
237
|
-
hostname,
|
|
238
|
-
port,
|
|
239
|
-
};
|
|
240
|
-
}
|
|
241
|
-
/**
|
|
242
|
-
* Returns a new proxy URL based on provided configuration options and the `sessionId` parameter.
|
|
243
|
-
* @param {string|number} [sessionId]
|
|
244
|
-
* Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
|
|
245
|
-
* you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
|
|
246
|
-
* When the provided sessionId is a number, it's converted to a string.
|
|
247
|
-
*
|
|
248
|
-
* All the HTTP requests going through the proxy with the same session identifier
|
|
249
|
-
* will use the same target proxy server (i.e. the same IP address).
|
|
250
|
-
* The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
|
|
251
|
-
* @return {string} A string with a proxy URL, including authentication credentials and port number.
|
|
252
|
-
* For example, `http://bob:password123@proxy.example.com:8000`
|
|
253
|
-
*/
|
|
254
|
-
newUrl(sessionId) {
|
|
255
|
-
if (typeof sessionId === 'number')
|
|
256
|
-
sessionId = `${sessionId}`;
|
|
257
|
-
(0, ow_1.default)(sessionId, ow_1.default.optional.string.maxLength(MAX_SESSION_ID_LENGTH).matches(consts_1.APIFY_PROXY_VALUE_REGEX));
|
|
258
|
-
if (this.newUrlFunction) {
|
|
259
|
-
return this._callNewUrlFunction(sessionId);
|
|
260
|
-
}
|
|
261
|
-
if (this.proxyUrls) {
|
|
262
|
-
return this._handleCustomUrl(sessionId);
|
|
263
|
-
}
|
|
264
|
-
const username = this._getUsername(sessionId);
|
|
265
|
-
const { password, hostname, port } = this;
|
|
266
|
-
return `${PROTOCOL}://${username}:${password}@${hostname}:${port}`;
|
|
267
|
-
}
|
|
268
|
-
/**
|
|
269
|
-
* Returns proxy username.
|
|
270
|
-
* @param {string} [sessionId]
|
|
271
|
-
* @return {string} the proxy username
|
|
272
|
-
* @ignore
|
|
273
|
-
* @protected
|
|
274
|
-
* @internal
|
|
275
|
-
*/
|
|
276
|
-
_getUsername(sessionId) {
|
|
277
|
-
let username;
|
|
278
|
-
const { groups, countryCode } = this;
|
|
279
|
-
const parts = [];
|
|
280
|
-
if (groups && groups.length) {
|
|
281
|
-
parts.push(`groups-${groups.join('+')}`);
|
|
282
|
-
}
|
|
283
|
-
if (sessionId) {
|
|
284
|
-
parts.push(`session-${sessionId}`);
|
|
285
|
-
}
|
|
286
|
-
if (countryCode) {
|
|
287
|
-
parts.push(`country-${countryCode}`);
|
|
288
|
-
}
|
|
289
|
-
username = parts.join(',');
|
|
290
|
-
if (parts.length === 0)
|
|
291
|
-
username = 'auto';
|
|
292
|
-
return username;
|
|
293
|
-
}
|
|
294
|
-
/**
|
|
295
|
-
* Checks if Apify Token is provided in env
|
|
296
|
-
* and gets the password via API and sets it to env
|
|
297
|
-
* @returns {Promise<void>}
|
|
298
|
-
* @ignore
|
|
299
|
-
* @protected
|
|
300
|
-
* @internal
|
|
301
|
-
*/
|
|
302
|
-
async _setPasswordIfToken() {
|
|
303
|
-
const token = this.config.get('token');
|
|
304
|
-
if (token) {
|
|
305
|
-
const { proxy: { password } } = await utils_1.apifyClient.user().get();
|
|
306
|
-
if (this.password) {
|
|
307
|
-
if (this.password !== password) {
|
|
308
|
-
this.log.warning('The Apify Proxy password you provided belongs to'
|
|
309
|
-
+ ' a different user than the Apify token you are using. Are you sure this is correct?');
|
|
310
|
-
}
|
|
311
|
-
}
|
|
312
|
-
else {
|
|
313
|
-
this.password = password;
|
|
314
|
-
}
|
|
315
|
-
}
|
|
316
|
-
if (!this.password) {
|
|
317
|
-
throw new Error(`Apify Proxy password must be provided using options.password or the "${consts_1.ENV_VARS.PROXY_PASSWORD}" environment variable.`
|
|
318
|
-
+ `If you add the "${consts_1.ENV_VARS.TOKEN}" environment variable, the password will be automatically inferred.`);
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
/**
|
|
322
|
-
* Checks whether the user has access to the proxies specified in the provided ProxyConfigurationOptions.
|
|
323
|
-
* If the check can not be made, it only prints a warning and allows the program to continue. This is to
|
|
324
|
-
* prevent program crashes caused by short downtimes of Proxy.
|
|
325
|
-
*
|
|
326
|
-
* @returns {Promise<void>}
|
|
327
|
-
* @protected
|
|
328
|
-
* @ignore
|
|
329
|
-
* @internal
|
|
330
|
-
*/
|
|
331
|
-
async _checkAccess() {
|
|
332
|
-
const status = await this._fetchStatus();
|
|
333
|
-
if (status) {
|
|
334
|
-
const { connected, connectionError, isManInTheMiddle } = status;
|
|
335
|
-
this.isManInTheMiddle = isManInTheMiddle;
|
|
336
|
-
if (!connected)
|
|
337
|
-
this._throwApifyProxyConnectionError(connectionError);
|
|
338
|
-
}
|
|
339
|
-
else {
|
|
340
|
-
this.log.warning('Apify Proxy access check timed out. Watch out for errors with status code 407. '
|
|
341
|
-
+ 'If you see some, it most likely means you don\'t have access to either all or some of the proxies you\'re trying to use.');
|
|
342
|
-
}
|
|
343
|
-
}
|
|
344
|
-
/**
|
|
345
|
-
* Apify Proxy can be down for a second or a minute, but this should not crash processes.
|
|
346
|
-
*
|
|
347
|
-
* @return {Promise<{ connected: boolean, connectionError: string } | undefined>}
|
|
348
|
-
* @protected
|
|
349
|
-
* @ignore
|
|
350
|
-
* @internal
|
|
351
|
-
*/
|
|
352
|
-
async _fetchStatus() {
|
|
353
|
-
const requestOpts = {
|
|
354
|
-
url: `${this.config.get('proxyStatusUrl')}/?format=json`,
|
|
355
|
-
proxyUrl: this.newUrl(),
|
|
356
|
-
timeout: { request: CHECK_ACCESS_REQUEST_TIMEOUT_MILLIS },
|
|
357
|
-
responseType: 'json',
|
|
358
|
-
};
|
|
359
|
-
for (let attempt = 1; attempt <= CHECK_ACCESS_MAX_ATTEMPTS; attempt++) {
|
|
360
|
-
try {
|
|
361
|
-
const response = await (0, utils_request_1.requestAsBrowser)(requestOpts);
|
|
362
|
-
return response.body;
|
|
363
|
-
}
|
|
364
|
-
catch (err) {
|
|
365
|
-
// retry connection errors
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
}
|
|
369
|
-
/**
|
|
370
|
-
* Handles custom url rotation with session
|
|
371
|
-
* @param {string} [sessionId]
|
|
372
|
-
* @returns {string} url
|
|
373
|
-
* @protected
|
|
374
|
-
* @ignore
|
|
375
|
-
* @internal
|
|
376
|
-
*/
|
|
377
|
-
_handleCustomUrl(sessionId) {
|
|
378
|
-
let customUrlToUse;
|
|
379
|
-
if (sessionId) {
|
|
380
|
-
if (this.usedProxyUrls.has(sessionId)) {
|
|
381
|
-
customUrlToUse = this.usedProxyUrls.get(sessionId);
|
|
382
|
-
}
|
|
383
|
-
else {
|
|
384
|
-
customUrlToUse = this.proxyUrls[this.nextCustomUrlIndex++ % this.proxyUrls.length];
|
|
385
|
-
this.usedProxyUrls.set(sessionId, customUrlToUse);
|
|
386
|
-
}
|
|
387
|
-
}
|
|
388
|
-
else {
|
|
389
|
-
customUrlToUse = this.proxyUrls[this.nextCustomUrlIndex++ % this.proxyUrls.length];
|
|
390
|
-
}
|
|
391
|
-
return customUrlToUse;
|
|
392
|
-
}
|
|
393
|
-
/**
|
|
394
|
-
* Calls the custom newUrlFunction and checks format of its return value
|
|
395
|
-
* @param {string} [sessionId]
|
|
396
|
-
* @protected
|
|
397
|
-
* @ignore
|
|
398
|
-
* @internal
|
|
399
|
-
*/
|
|
400
|
-
_callNewUrlFunction(sessionId) {
|
|
401
|
-
let proxyUrl;
|
|
402
|
-
try {
|
|
403
|
-
proxyUrl = this.newUrlFunction(sessionId);
|
|
404
|
-
new URL(proxyUrl); // eslint-disable-line no-new
|
|
405
|
-
}
|
|
406
|
-
catch (err) {
|
|
407
|
-
this._throwNewUrlFunctionInvalid(err);
|
|
408
|
-
}
|
|
409
|
-
return proxyUrl;
|
|
410
|
-
}
|
|
411
|
-
/**
|
|
412
|
-
* Throws invalid custom newUrlFunction return
|
|
413
|
-
* @param {Error} err
|
|
414
|
-
* @protected
|
|
415
|
-
* @ignore
|
|
416
|
-
* @internal
|
|
417
|
-
*/
|
|
418
|
-
_throwNewUrlFunctionInvalid(err) {
|
|
419
|
-
throw new Error(`The provided newUrlFunction did not return a valid URL.\nCause: ${err.message}`);
|
|
420
|
-
}
|
|
421
|
-
/**
|
|
422
|
-
* Throws invalid proxy value error
|
|
423
|
-
* @param {string} param
|
|
424
|
-
* @param {string} value
|
|
425
|
-
* @protected
|
|
426
|
-
* @ignore
|
|
427
|
-
* @internal
|
|
428
|
-
*/
|
|
429
|
-
_throwInvalidProxyValueError(param, value) {
|
|
430
|
-
throw new Error(`The provided proxy ${param} name "${value}" can only contain the following characters: 0-9, a-z, A-Z, ".", "_" and "~"`);
|
|
431
|
-
}
|
|
432
|
-
/**
|
|
433
|
-
* Throws Apify Proxy is not connected
|
|
434
|
-
* @protected
|
|
435
|
-
* @ignore
|
|
436
|
-
* @internal
|
|
437
|
-
*/
|
|
438
|
-
_throwApifyProxyConnectionError(errorMessage) {
|
|
439
|
-
throw new Error(errorMessage);
|
|
440
|
-
}
|
|
441
|
-
/**
|
|
442
|
-
* Throws cannot combine custom proxies with Apify Proxy
|
|
443
|
-
* @protected
|
|
444
|
-
* @ignore
|
|
445
|
-
* @internal
|
|
446
|
-
*/
|
|
447
|
-
_throwCannotCombineCustomWithApify() {
|
|
448
|
-
throw new Error('Cannot combine custom proxies with Apify Proxy!'
|
|
449
|
-
+ 'It is not allowed to set "options.proxyUrls" or "options.newUrlFunction" combined with '
|
|
450
|
-
+ '"options.groups" or "options.apifyProxyGroups" and "options.countryCode" or "options.apifyProxyCountry".');
|
|
451
|
-
}
|
|
452
|
-
/**
|
|
453
|
-
* Throws cannot combine custom 2 custom methods
|
|
454
|
-
* @protected
|
|
455
|
-
* @ignore
|
|
456
|
-
* @internal
|
|
457
|
-
*/
|
|
458
|
-
_throwCannotCombineCustomMethods() {
|
|
459
|
-
throw new Error('Cannot combine custom proxies "options.proxyUrls" with custom generating function "options.newUrlFunction".');
|
|
460
|
-
}
|
|
461
|
-
}
|
|
462
|
-
exports.ProxyConfiguration = ProxyConfiguration;
|
|
463
|
-
/**
|
|
464
|
-
* Creates a proxy configuration and returns a promise resolving to an instance
|
|
465
|
-
* of the {@link ProxyConfiguration} class that is already initialized.
|
|
466
|
-
*
|
|
467
|
-
* Configures connection to a proxy server with the provided options. Proxy servers are used to prevent target websites from blocking
|
|
468
|
-
* your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
|
|
469
|
-
* them to use the selected proxies for all connections.
|
|
470
|
-
*
|
|
471
|
-
* For more details and code examples, see the {@link ProxyConfiguration} class.
|
|
472
|
-
*
|
|
473
|
-
* ```javascript
|
|
474
|
-
*
|
|
475
|
-
* // Returns initialized proxy configuration class
|
|
476
|
-
* const proxyConfiguration = await Apify.createProxyConfiguration({
|
|
477
|
-
* groups: ['GROUP1', 'GROUP2'] // List of Apify proxy groups
|
|
478
|
-
* countryCode: 'US'
|
|
479
|
-
* });
|
|
480
|
-
*
|
|
481
|
-
* const crawler = new Apify.CheerioCrawler({
|
|
482
|
-
* // ...
|
|
483
|
-
* proxyConfiguration,
|
|
484
|
-
* handlePageFunction: ({ proxyInfo }) => {
|
|
485
|
-
* const usedProxyUrl = proxyInfo.url; // Getting the proxy URL
|
|
486
|
-
* }
|
|
487
|
-
* })
|
|
488
|
-
*
|
|
489
|
-
* ```
|
|
490
|
-
*
|
|
491
|
-
* For compatibility with existing Actor Input UI (Input Schema), this function
|
|
492
|
-
* returns `undefined` when the following object is passed as `proxyConfigurationOptions`.
|
|
493
|
-
*
|
|
494
|
-
* ```
|
|
495
|
-
* { useApifyProxy: false }
|
|
496
|
-
* ```
|
|
497
|
-
*
|
|
498
|
-
* @param {ProxyConfigurationOptions} [proxyConfigurationOptions]
|
|
499
|
-
* @returns {Promise<ProxyConfiguration|undefined>}
|
|
500
|
-
* @memberof module:Apify
|
|
501
|
-
* @name createProxyConfiguration
|
|
502
|
-
* @function
|
|
503
|
-
*/
|
|
504
|
-
const createProxyConfiguration = async (proxyConfigurationOptions = {}) => {
|
|
505
|
-
// Compatibility fix for Input UI where proxy: None returns { useApifyProxy: false }
|
|
506
|
-
// Without this, it would cause proxy to use the zero config / auto mode.
|
|
507
|
-
const { useApifyProxy, ...options } = proxyConfigurationOptions;
|
|
508
|
-
const dontUseApifyProxy = useApifyProxy === false;
|
|
509
|
-
const dontUseCustomProxies = !proxyConfigurationOptions.proxyUrls;
|
|
510
|
-
if (dontUseApifyProxy && dontUseCustomProxies)
|
|
511
|
-
return undefined;
|
|
512
|
-
const proxyConfiguration = new ProxyConfiguration(options);
|
|
513
|
-
await proxyConfiguration.initialize();
|
|
514
|
-
return proxyConfiguration;
|
|
515
|
-
};
|
|
516
|
-
exports.createProxyConfiguration = createProxyConfiguration;
|
|
517
|
-
//# sourceMappingURL=proxy_configuration.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"proxy_configuration.js","sourceRoot":"","sources":["../src/proxy_configuration.js"],"names":[],"mappings":";;;;AAAA,0CAAkE;AAClE,yDAAoB;AACpB,2CAAiD;AACjD,mCAAsC;AACtC,mDAAmD;AACnD,yEAAqC;AACrC,mDAAgD;AAEhD,YAAY;AACZ,MAAM,QAAQ,GAAG,MAAM,CAAC;AACxB,oEAAoE;AACpE,MAAM,qBAAqB,GAAG,EAAE,CAAC;AACjC,MAAM,mCAAmC,GAAG,IAAK,CAAC;AAClD,MAAM,yBAAyB,GAAG,CAAC,CAAC;AAEpC;;;;GAIG;AAEH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AAEH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAwDG;AAEH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,MAAa,kBAAkB;IAC3B;;;;;OAKG;IACH,YAAY,OAAO,GAAG,EAAE,EAAE,MAAM,GAAG,6BAAa,CAAC,eAAe,EAAE;QAC9D,IAAA,YAAE,EAAC,OAAO,EAAE,YAAE,CAAC,MAAM,CAAC,UAAU,CAAC;YAC7B,MAAM,EAAE,YAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,YAAE,CAAC,MAAM,CAAC,OAAO,CAAC,gCAAuB,CAAC,CAAC;YAC5E,gBAAgB,EAAE,YAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,YAAE,CAAC,MAAM,CAAC,OAAO,CAAC,gCAAuB,CAAC,CAAC;YACtF,WAAW,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,8BAAkB,CAAC;YAC3D,iBAAiB,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,8BAAkB,CAAC;YACjE,SAAS,EAAE,YAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,YAAE,CAAC,MAAM,CAAC,GAAG,CAAC;YAC3D,QAAQ,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;YAC5B,cAAc,EAAE,YAAE,CAAC,QAAQ,CAAC,QAAQ;SACvC,CAAC,CAAC,CAAC;QAEJ,MAAM,EACF,MAAM,GAAG,EAAE,EACX,gBAAgB,GAAG,EAAE,EACrB,WAAW,EACX,iBAAiB,EACjB,SAAS,EACT,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,eAAe,CAAC,EACtC,cAAc,GACjB,GAAG,OAAO,CAAC;QAEZ,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,gBAAgB,CAAC;QAC9D,MAAM,gBAAgB,GAAG,WAAW,IAAI,iBAAiB,CAAC;QAC1D,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;QAC7C,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;QAErC,aAAa;QACb,IAAI,CAAC,CAAC,SAAS,IAAI,cAAc,CAAC,IAAI,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,IAAI,gBAAgB,CAAC,CAAC,EAAE;YAC/E,IAAI,CAAC,kCAAkC,EAAE,CAAC;SAC7C;QACD,IAAI,SAAS,IAAI,cAAc;YAAE,IAAI,CAAC,gCAAgC,EAAE,CAAC;QAEzE,IAAI,CAAC,MAAM,GAAG,WAAW,CAAC;QAC1B,IAAI,CAAC,WAAW,GAAG,gBAAgB,CAAC;QACpC,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,kBAAkB,GAAG,CAAC,CAAC;QAC5B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,aAAa,GAAG,IAAI,GAAG,EAAE,CAAC;QAC/B,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;QACrC,IAAI,CAAC,cAAc,GAAG,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC;QAC9D,IAAI,CAAC,GAAG,GAAG,mBAAU,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,oBAAoB,EAAE,CAAC,CAAC;QAC9D,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,gBAAgB,GAAG,KAAK,CAAC;QAE9B,IAAI,SAAS,IAAI,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,EAAE;YACjE,IAAI,CAAC,GAAG,CAAC,OAAO,CACZ,uHAAuH;kBACrH,kFAAkF,CACvF,CAAC;SACL;IACL,CAAC;IAED;;;;;;;;;OASG;IACH,KAAK,CAAC,UAAU;QACZ,IAAI,IAAI,CAAC,cAAc,EAAE;YACrB,MAAM,IAAI,CAAC,mBAAmB,EAAE,CAAC;YAEjC,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;SAC7B;IACL,CAAC;IAED;;;;;;;;;;;;;;;;OAgBG;IACH,YAAY,CAAC,SAAS;QAClB,IAAI,OAAO,SAAS,KAAK,QAAQ;YAAE,SAAS,GAAG,GAAG,SAAS,EAAE,CAAC;QAC9D,IAAA,YAAE,EAAC,SAAS,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,SAAS,CAAC,qBAAqB,CAAC,CAAC,OAAO,CAAC,gCAAuB,CAAC,CAAC,CAAC;QACpG,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAEnC,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAEpG,OAAO;YACH,SAAS;YACT,GAAG;YACH,MAAM;YACN,WAAW;YACX,QAAQ;YACR,QAAQ;YACR,IAAI;SACP,CAAC;IACN,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,MAAM,CAAC,SAAS;QACZ,IAAI,OAAO,SAAS,KAAK,QAAQ;YAAE,SAAS,GAAG,GAAG,SAAS,EAAE,CAAC;QAC9D,IAAA,YAAE,EAAC,SAAS,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,SAAS,CAAC,qBAAqB,CAAC,CAAC,OAAO,CAAC,gCAAuB,CAAC,CAAC,CAAC;QACpG,IAAI,IAAI,CAAC,cAAc,EAAE;YACrB,OAAO,IAAI,CAAC,mBAAmB,CAAC,SAAS,CAAC,CAAC;SAC9C;QACD,IAAI,IAAI,CAAC,SAAS,EAAE;YAChB,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;SAC3C;QACD,MAAM,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;QAC9C,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,GAAG,IAAI,CAAC;QAE1C,OAAO,GAAG,QAAQ,MAAM,QAAQ,IAAI,QAAQ,IAAI,QAAQ,IAAI,IAAI,EAAE,CAAC;IACvE,CAAC;IAED;;;;;;;OAOG;IACH,YAAY,CAAC,SAAS;QAClB,IAAI,QAAQ,CAAC;QACb,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,IAAI,CAAC;QACrC,MAAM,KAAK,GAAG,EAAE,CAAC;QAEjB,IAAI,MAAM,IAAI,MAAM,CAAC,MAAM,EAAE;YACzB,KAAK,CAAC,IAAI,CAAC,UAAU,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;SAC5C;QACD,IAAI,SAAS,EAAE;YACX,KAAK,CAAC,IAAI,CAAC,WAAW,SAAS,EAAE,CAAC,CAAC;SACtC;QACD,IAAI,WAAW,EAAE;YACb,KAAK,CAAC,IAAI,CAAC,WAAW,WAAW,EAAE,CAAC,CAAC;SACxC;QAED,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAE3B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,QAAQ,GAAG,MAAM,CAAC;QAE1C,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,mBAAmB;QACrB,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QACvC,IAAI,KAAK,EAAE;YACP,MAAM,EAAE,KAAK,EAAE,EAAE,QAAQ,EAAE,EAAE,GAAG,MAAM,mBAAW,CAAC,IAAI,EAAE,CAAC,GAAG,EAAE,CAAC;YAC/D,IAAI,IAAI,CAAC,QAAQ,EAAE;gBACf,IAAI,IAAI,CAAC,QAAQ,KAAK,QAAQ,EAAE;oBAC5B,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,kDAAkD;0BACjE,qFAAqF,CAAC,CAAC;iBAC5F;aACJ;iBAAM;gBACH,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;aAC5B;SACJ;QACD,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE;YAChB,MAAM,IAAI,KAAK,CAAC,wEAAwE,iBAAQ,CAAC,cAAc,yBAAyB;kBAClI,mBAAmB,iBAAQ,CAAC,KAAK,sEAAsE,CAAC,CAAC;SAClH;IACL,CAAC;IAED;;;;;;;;;OASG;IACH,KAAK,CAAC,YAAY;QACd,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;QACzC,IAAI,MAAM,EAAE;YACR,MAAM,EAAE,SAAS,EAAE,eAAe,EAAE,gBAAgB,EAAE,GAAG,MAAM,CAAC;YAChE,IAAI,CAAC,gBAAgB,GAAG,gBAAgB,CAAC;YAEzC,IAAI,CAAC,SAAS;gBAAE,IAAI,CAAC,+BAA+B,CAAC,eAAe,CAAC,CAAC;SACzE;aAAM;YACH,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,iFAAiF;kBAC5F,0HAA0H,CAAC,CAAC;SACrI;IACL,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,YAAY;QACd,MAAM,WAAW,GAAG;YAChB,GAAG,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,gBAAgB,CAAC,eAAe;YACxD,QAAQ,EAAE,IAAI,CAAC,MAAM,EAAE;YACvB,OAAO,EAAE,EAAE,OAAO,EAAE,mCAAmC,EAAE;YACzD,YAAY,EAAE,MAAM;SACvB,CAAC;QACF,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,yBAAyB,EAAE,OAAO,EAAE,EAAE;YACnE,IAAI;gBACA,MAAM,QAAQ,GAAG,MAAM,IAAA,gCAAgB,EAAC,WAAW,CAAC,CAAC;gBACrD,OAAO,QAAQ,CAAC,IAAI,CAAC;aACxB;YAAC,OAAO,GAAG,EAAE;gBACV,0BAA0B;aAC7B;SACJ;IACL,CAAC;IAED;;;;;;;OAOG;IACH,gBAAgB,CAAC,SAAS;QACtB,IAAI,cAAc,CAAC;QACnB,IAAI,SAAS,EAAE;YACX,IAAI,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE;gBACnC,cAAc,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;aACtD;iBAAM;gBACH,cAAc,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;gBACnF,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,SAAS,EAAE,cAAc,CAAC,CAAC;aACrD;SACJ;aAAM;YACH,cAAc,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;SACtF;QACD,OAAO,cAAc,CAAC;IAC1B,CAAC;IAED;;;;;;OAMG;IACH,mBAAmB,CAAC,SAAS;QACzB,IAAI,QAAQ,CAAC;QACb,IAAI;YACA,QAAQ,GAAG,IAAI,CAAC,cAAc,CAAC,SAAS,CAAC,CAAC;YAC1C,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,6BAA6B;SACnD;QAAC,OAAO,GAAG,EAAE;YACV,IAAI,CAAC,2BAA2B,CAAC,GAAG,CAAC,CAAC;SACzC;QACD,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;;;;;OAMG;IACH,2BAA2B,CAAC,GAAG;QAC3B,MAAM,IAAI,KAAK,CAAC,mEAAmE,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;IACtG,CAAC;IAED;;;;;;;OAOG;IACH,4BAA4B,CAAC,KAAK,EAAE,KAAK;QACrC,MAAM,IAAI,KAAK,CAAC,sBAAsB,KAAK,UAAU,KAAK,8EAA8E,CAAC,CAAC;IAC9I,CAAC;IAED;;;;;OAKG;IACH,+BAA+B,CAAC,YAAY;QACxC,MAAM,IAAI,KAAK,CAAC,YAAY,CAAC,CAAC;IAClC,CAAC;IAED;;;;;OAKG;IACH,kCAAkC;QAC9B,MAAM,IAAI,KAAK,CAAC,iDAAiD;cAC3D,yFAAyF;cACzF,0GAA0G,CAAC,CAAC;IACtH,CAAC;IAED;;;;;OAKG;IACH,gCAAgC;QAC5B,MAAM,IAAI,KAAK,CAAC,6GAA6G,CAAC,CAAC;IACnI,CAAC;CACJ;AAtVD,gDAsVC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAwCG;AACI,MAAM,wBAAwB,GAAG,KAAK,EAAE,yBAAyB,GAAG,EAAE,EAAE,EAAE;IAC7E,oFAAoF;IACpF,yEAAyE;IACzE,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,EAAE,GAAG,yBAAyB,CAAC;IAChE,MAAM,iBAAiB,GAAG,aAAa,KAAK,KAAK,CAAC;IAClD,MAAM,oBAAoB,GAAG,CAAC,yBAAyB,CAAC,SAAS,CAAC;IAClE,IAAI,iBAAiB,IAAI,oBAAoB;QAAE,OAAO,SAAS,CAAC;IAEhE,MAAM,kBAAkB,GAAG,IAAI,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAC3D,MAAM,kBAAkB,CAAC,UAAU,EAAE,CAAC;IAEtC,OAAO,kBAAkB,CAAC;AAC9B,CAAC,CAAC;AAZW,QAAA,wBAAwB,4BAYnC"}
|
package/build/pseudo_url.d.ts
DELETED
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
export default PseudoUrl;
|
|
2
|
-
/**
|
|
3
|
-
* Represents a pseudo-URL (PURL) - an URL pattern used by web crawlers
|
|
4
|
-
* to specify which URLs should the crawler visit.
|
|
5
|
-
* This class is used by the {@link utils#enqueueLinks} function.
|
|
6
|
-
*
|
|
7
|
-
* A PURL is simply a URL with special directives enclosed in `[]` brackets.
|
|
8
|
-
* Currently, the only supported directive is `[RegExp]`,
|
|
9
|
-
* which defines a JavaScript-style regular expression to match against the URL.
|
|
10
|
-
*
|
|
11
|
-
* The `PseudoUrl` class can be constructed either using a pseudo-URL string
|
|
12
|
-
* or a regular expression (an instance of the `RegExp` object).
|
|
13
|
-
* With a pseudo-URL string, the matching is always case-insensitive.
|
|
14
|
-
* If you need case-sensitive matching, use an appropriate `RegExp` object.
|
|
15
|
-
*
|
|
16
|
-
* For example, a PURL `http://www.example.com/pages/[(\w|-)*]` will match all of the following URLs:
|
|
17
|
-
*
|
|
18
|
-
* - `http://www.example.com/pages/`
|
|
19
|
-
* - `http://www.example.com/pages/my-awesome-page`
|
|
20
|
-
* - `http://www.example.com/pages/something`
|
|
21
|
-
*
|
|
22
|
-
* Be careful to correctly escape special characters in the pseudo-URL string.
|
|
23
|
-
* If either `[` or `]` is part of the normal query string, it must be encoded as `[\x5B]` or `[\x5D]`,
|
|
24
|
-
* respectively. For example, the following PURL:
|
|
25
|
-
* ```http
|
|
26
|
-
* http://www.example.com/search?do[\x5B]load[\x5D]=1
|
|
27
|
-
* ```
|
|
28
|
-
* will match the URL:
|
|
29
|
-
* ```http
|
|
30
|
-
* http://www.example.com/search?do[load]=1
|
|
31
|
-
* ```
|
|
32
|
-
*
|
|
33
|
-
* If the regular expression in the pseudo-URL contains a backslash character (\),
|
|
34
|
-
* you need to escape it with another back backslash, as shown in the example below.
|
|
35
|
-
*
|
|
36
|
-
* **Example usage:**
|
|
37
|
-
*
|
|
38
|
-
* ```javascript
|
|
39
|
-
* // Using a pseudo-URL string
|
|
40
|
-
* const purl = new Apify.PseudoUrl('http://www.example.com/pages/[(\\w|-)+]', {
|
|
41
|
-
* userData: { foo: 'bar' },
|
|
42
|
-
* });
|
|
43
|
-
*
|
|
44
|
-
* // Using a regular expression
|
|
45
|
-
* const purl2 = new Apify.PseudoUrl(/http:\/\/www\.example\.com\/pages\/(\w|-)+/);
|
|
46
|
-
*
|
|
47
|
-
* if (purl.matches('http://www.example.com/pages/my-awesome-page')) console.log('Match!');
|
|
48
|
-
* ```
|
|
49
|
-
*/
|
|
50
|
-
declare class PseudoUrl {
|
|
51
|
-
/**
|
|
52
|
-
* @param {(string|RegExp)} purl
|
|
53
|
-
* A pseudo-URL string or a regular expression object.
|
|
54
|
-
* Using a `RegExp` instance enables more granular control,
|
|
55
|
-
* such as making the matching case sensitive.
|
|
56
|
-
* @param {RequestOptions} requestTemplate
|
|
57
|
-
* Options for the new {@link Request} instances created for matching URLs
|
|
58
|
-
* by the {@link utils#enqueueLinks} function.
|
|
59
|
-
*/
|
|
60
|
-
constructor(purl: (string | RegExp), requestTemplate?: RequestOptions);
|
|
61
|
-
regex: RegExp;
|
|
62
|
-
requestTemplate: RequestOptions;
|
|
63
|
-
/**
|
|
64
|
-
* Determines whether a URL matches this pseudo-URL pattern.
|
|
65
|
-
*
|
|
66
|
-
* @param {string} url URL to be matched.
|
|
67
|
-
* @return {boolean} Returns `true` if given URL matches pseudo-URL.
|
|
68
|
-
*/
|
|
69
|
-
matches(url: string): boolean;
|
|
70
|
-
/**
|
|
71
|
-
* Creates a Request object from a provided `requestTemplate` and a given URL
|
|
72
|
-
* or an object that specifies ${@link Request} properties. In case of a collision
|
|
73
|
-
* the properties will override the template, except for `userData`, which will
|
|
74
|
-
* be merged together, with the `userData` property having preference over the template.
|
|
75
|
-
* This enables dynamic overriding of the template.
|
|
76
|
-
*
|
|
77
|
-
* @param {(string|Object<string, *>)} urlOrProps
|
|
78
|
-
* @return {Request}
|
|
79
|
-
*/
|
|
80
|
-
createRequest(urlOrProps: (string | {
|
|
81
|
-
[x: string]: any;
|
|
82
|
-
})): Request;
|
|
83
|
-
}
|
|
84
|
-
import { RequestOptions } from "./request";
|
|
85
|
-
import Request from "./request";
|
|
86
|
-
//# sourceMappingURL=pseudo_url.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pseudo_url.d.ts","sourceRoot":"","sources":["../src/pseudo_url.js"],"names":[],"mappings":";AAmDA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+CG;AACH;IACI;;;;;;;;OAQG;IACH,kBARW,CAAC,MAAM,GAAC,MAAM,CAAC,oBAIf,cAAc,EAiBxB;IARO,cAAiB;IAOrB,gCAAsC;IAG1C;;;;;OAKG;IACH,aAHW,MAAM,GACL,OAAO,CAIlB;IAED;;;;;;;;;OASG;IACH,0BAHW,CAAC,MAAM;YAAQ,MAAM;KAAI,CAAC,GACzB,OAAO,CAOlB;CACJ"}
|