apify 2.3.1-beta.4 → 3.0.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. package/README.md +6 -5
  2. package/package.json +69 -128
  3. package/build/actor.d.ts +0 -113
  4. package/build/actor.d.ts.map +0 -1
  5. package/build/actor.js +0 -582
  6. package/build/actor.js.map +0 -1
  7. package/build/apify.d.ts +0 -752
  8. package/build/apify.d.ts.map +0 -1
  9. package/build/apify.js +0 -877
  10. package/build/apify.js.map +0 -1
  11. package/build/autoscaling/autoscaled_pool.d.ts +0 -384
  12. package/build/autoscaling/autoscaled_pool.d.ts.map +0 -1
  13. package/build/autoscaling/autoscaled_pool.js +0 -557
  14. package/build/autoscaling/autoscaled_pool.js.map +0 -1
  15. package/build/autoscaling/snapshotter.d.ts +0 -278
  16. package/build/autoscaling/snapshotter.d.ts.map +0 -1
  17. package/build/autoscaling/snapshotter.js +0 -447
  18. package/build/autoscaling/snapshotter.js.map +0 -1
  19. package/build/autoscaling/system_status.d.ts +0 -224
  20. package/build/autoscaling/system_status.d.ts.map +0 -1
  21. package/build/autoscaling/system_status.js +0 -228
  22. package/build/autoscaling/system_status.js.map +0 -1
  23. package/build/browser_launchers/browser_launcher.d.ts +0 -154
  24. package/build/browser_launchers/browser_launcher.d.ts.map +0 -1
  25. package/build/browser_launchers/browser_launcher.js +0 -160
  26. package/build/browser_launchers/browser_launcher.js.map +0 -1
  27. package/build/browser_launchers/browser_plugin.d.ts +0 -23
  28. package/build/browser_launchers/browser_plugin.d.ts.map +0 -1
  29. package/build/browser_launchers/browser_plugin.js +0 -25
  30. package/build/browser_launchers/browser_plugin.js.map +0 -1
  31. package/build/browser_launchers/playwright_launcher.d.ts +0 -131
  32. package/build/browser_launchers/playwright_launcher.d.ts.map +0 -1
  33. package/build/browser_launchers/playwright_launcher.js +0 -150
  34. package/build/browser_launchers/playwright_launcher.js.map +0 -1
  35. package/build/browser_launchers/puppeteer_launcher.d.ts +0 -153
  36. package/build/browser_launchers/puppeteer_launcher.d.ts.map +0 -1
  37. package/build/browser_launchers/puppeteer_launcher.js +0 -197
  38. package/build/browser_launchers/puppeteer_launcher.js.map +0 -1
  39. package/build/cache_container.d.ts +0 -31
  40. package/build/cache_container.d.ts.map +0 -1
  41. package/build/cache_container.js +0 -48
  42. package/build/cache_container.js.map +0 -1
  43. package/build/configuration.d.ts +0 -226
  44. package/build/configuration.d.ts.map +0 -1
  45. package/build/configuration.js +0 -325
  46. package/build/configuration.js.map +0 -1
  47. package/build/constants.d.ts +0 -37
  48. package/build/constants.d.ts.map +0 -1
  49. package/build/constants.js +0 -41
  50. package/build/constants.js.map +0 -1
  51. package/build/crawlers/basic_crawler.d.ts +0 -443
  52. package/build/crawlers/basic_crawler.d.ts.map +0 -1
  53. package/build/crawlers/basic_crawler.js +0 -664
  54. package/build/crawlers/basic_crawler.js.map +0 -1
  55. package/build/crawlers/browser_crawler.d.ts +0 -512
  56. package/build/crawlers/browser_crawler.d.ts.map +0 -1
  57. package/build/crawlers/browser_crawler.js +0 -540
  58. package/build/crawlers/browser_crawler.js.map +0 -1
  59. package/build/crawlers/cheerio_crawler.d.ts +0 -931
  60. package/build/crawlers/cheerio_crawler.d.ts.map +0 -1
  61. package/build/crawlers/cheerio_crawler.js +0 -913
  62. package/build/crawlers/cheerio_crawler.js.map +0 -1
  63. package/build/crawlers/crawler_extension.d.ts +0 -10
  64. package/build/crawlers/crawler_extension.d.ts.map +0 -1
  65. package/build/crawlers/crawler_extension.js +0 -19
  66. package/build/crawlers/crawler_extension.js.map +0 -1
  67. package/build/crawlers/crawler_utils.d.ts +0 -34
  68. package/build/crawlers/crawler_utils.d.ts.map +0 -1
  69. package/build/crawlers/crawler_utils.js +0 -87
  70. package/build/crawlers/crawler_utils.js.map +0 -1
  71. package/build/crawlers/playwright_crawler.d.ts +0 -448
  72. package/build/crawlers/playwright_crawler.d.ts.map +0 -1
  73. package/build/crawlers/playwright_crawler.js +0 -299
  74. package/build/crawlers/playwright_crawler.js.map +0 -1
  75. package/build/crawlers/puppeteer_crawler.d.ts +0 -425
  76. package/build/crawlers/puppeteer_crawler.d.ts.map +0 -1
  77. package/build/crawlers/puppeteer_crawler.js +0 -299
  78. package/build/crawlers/puppeteer_crawler.js.map +0 -1
  79. package/build/crawlers/statistics.d.ts +0 -185
  80. package/build/crawlers/statistics.d.ts.map +0 -1
  81. package/build/crawlers/statistics.js +0 -331
  82. package/build/crawlers/statistics.js.map +0 -1
  83. package/build/enqueue_links/click_elements.d.ts +0 -179
  84. package/build/enqueue_links/click_elements.d.ts.map +0 -1
  85. package/build/enqueue_links/click_elements.js +0 -434
  86. package/build/enqueue_links/click_elements.js.map +0 -1
  87. package/build/enqueue_links/enqueue_links.d.ts +0 -117
  88. package/build/enqueue_links/enqueue_links.d.ts.map +0 -1
  89. package/build/enqueue_links/enqueue_links.js +0 -163
  90. package/build/enqueue_links/enqueue_links.js.map +0 -1
  91. package/build/enqueue_links/shared.d.ts +0 -42
  92. package/build/enqueue_links/shared.d.ts.map +0 -1
  93. package/build/enqueue_links/shared.js +0 -121
  94. package/build/enqueue_links/shared.js.map +0 -1
  95. package/build/errors.d.ts +0 -29
  96. package/build/errors.d.ts.map +0 -1
  97. package/build/errors.js +0 -38
  98. package/build/errors.js.map +0 -1
  99. package/build/events.d.ts +0 -11
  100. package/build/events.d.ts.map +0 -1
  101. package/build/events.js +0 -147
  102. package/build/events.js.map +0 -1
  103. package/build/index.d.ts +0 -4
  104. package/build/index.d.ts.map +0 -1
  105. package/build/index.js +0 -7
  106. package/build/index.js.map +0 -1
  107. package/build/main.d.ts +0 -179
  108. package/build/main.d.ts.map +0 -1
  109. package/build/main.js +0 -81
  110. package/build/main.js.map +0 -1
  111. package/build/playwright_utils.d.ts +0 -9
  112. package/build/playwright_utils.d.ts.map +0 -1
  113. package/build/playwright_utils.js +0 -90
  114. package/build/playwright_utils.js.map +0 -1
  115. package/build/proxy_configuration.d.ts +0 -411
  116. package/build/proxy_configuration.d.ts.map +0 -1
  117. package/build/proxy_configuration.js +0 -517
  118. package/build/proxy_configuration.js.map +0 -1
  119. package/build/pseudo_url.d.ts +0 -86
  120. package/build/pseudo_url.d.ts.map +0 -1
  121. package/build/pseudo_url.js +0 -153
  122. package/build/pseudo_url.js.map +0 -1
  123. package/build/puppeteer_request_interception.d.ts +0 -8
  124. package/build/puppeteer_request_interception.d.ts.map +0 -1
  125. package/build/puppeteer_request_interception.js +0 -235
  126. package/build/puppeteer_request_interception.js.map +0 -1
  127. package/build/puppeteer_utils.d.ts +0 -250
  128. package/build/puppeteer_utils.d.ts.map +0 -1
  129. package/build/puppeteer_utils.js +0 -551
  130. package/build/puppeteer_utils.js.map +0 -1
  131. package/build/request.d.ts +0 -180
  132. package/build/request.d.ts.map +0 -1
  133. package/build/request.js +0 -261
  134. package/build/request.js.map +0 -1
  135. package/build/request_list.d.ts +0 -581
  136. package/build/request_list.d.ts.map +0 -1
  137. package/build/request_list.js +0 -826
  138. package/build/request_list.js.map +0 -1
  139. package/build/serialization.d.ts +0 -5
  140. package/build/serialization.d.ts.map +0 -1
  141. package/build/serialization.js +0 -139
  142. package/build/serialization.js.map +0 -1
  143. package/build/session_pool/errors.d.ts +0 -11
  144. package/build/session_pool/errors.d.ts.map +0 -1
  145. package/build/session_pool/errors.js +0 -18
  146. package/build/session_pool/errors.js.map +0 -1
  147. package/build/session_pool/events.d.ts +0 -5
  148. package/build/session_pool/events.d.ts.map +0 -1
  149. package/build/session_pool/events.js +0 -6
  150. package/build/session_pool/events.js.map +0 -1
  151. package/build/session_pool/session.d.ts +0 -286
  152. package/build/session_pool/session.d.ts.map +0 -1
  153. package/build/session_pool/session.js +0 -355
  154. package/build/session_pool/session.js.map +0 -1
  155. package/build/session_pool/session_pool.d.ts +0 -280
  156. package/build/session_pool/session_pool.d.ts.map +0 -1
  157. package/build/session_pool/session_pool.js +0 -393
  158. package/build/session_pool/session_pool.js.map +0 -1
  159. package/build/session_pool/session_utils.d.ts +0 -4
  160. package/build/session_pool/session_utils.d.ts.map +0 -1
  161. package/build/session_pool/session_utils.js +0 -24
  162. package/build/session_pool/session_utils.js.map +0 -1
  163. package/build/stealth/hiding_tricks.d.ts +0 -22
  164. package/build/stealth/hiding_tricks.d.ts.map +0 -1
  165. package/build/stealth/hiding_tricks.js +0 -308
  166. package/build/stealth/hiding_tricks.js.map +0 -1
  167. package/build/stealth/stealth.d.ts +0 -56
  168. package/build/stealth/stealth.d.ts.map +0 -1
  169. package/build/stealth/stealth.js +0 -125
  170. package/build/stealth/stealth.js.map +0 -1
  171. package/build/storages/dataset.d.ts +0 -288
  172. package/build/storages/dataset.d.ts.map +0 -1
  173. package/build/storages/dataset.js +0 -480
  174. package/build/storages/dataset.js.map +0 -1
  175. package/build/storages/key_value_store.d.ts +0 -243
  176. package/build/storages/key_value_store.d.ts.map +0 -1
  177. package/build/storages/key_value_store.js +0 -462
  178. package/build/storages/key_value_store.js.map +0 -1
  179. package/build/storages/request_queue.d.ts +0 -318
  180. package/build/storages/request_queue.d.ts.map +0 -1
  181. package/build/storages/request_queue.js +0 -636
  182. package/build/storages/request_queue.js.map +0 -1
  183. package/build/storages/storage_manager.d.ts +0 -87
  184. package/build/storages/storage_manager.d.ts.map +0 -1
  185. package/build/storages/storage_manager.js +0 -150
  186. package/build/storages/storage_manager.js.map +0 -1
  187. package/build/tsconfig.tsbuildinfo +0 -1
  188. package/build/typedefs.d.ts +0 -146
  189. package/build/typedefs.d.ts.map +0 -1
  190. package/build/typedefs.js +0 -88
  191. package/build/typedefs.js.map +0 -1
  192. package/build/utils.d.ts +0 -175
  193. package/build/utils.d.ts.map +0 -1
  194. package/build/utils.js +0 -731
  195. package/build/utils.js.map +0 -1
  196. package/build/utils_log.d.ts +0 -41
  197. package/build/utils_log.d.ts.map +0 -1
  198. package/build/utils_log.js +0 -192
  199. package/build/utils_log.js.map +0 -1
  200. package/build/utils_request.d.ts +0 -77
  201. package/build/utils_request.d.ts.map +0 -1
  202. package/build/utils_request.js +0 -385
  203. package/build/utils_request.js.map +0 -1
  204. package/build/utils_social.d.ts +0 -210
  205. package/build/utils_social.d.ts.map +0 -1
  206. package/build/utils_social.js +0 -787
  207. package/build/utils_social.js.map +0 -1
  208. package/build/validators.d.ts +0 -23
  209. package/build/validators.d.ts.map +0 -1
  210. package/build/validators.js +0 -29
  211. package/build/validators.js.map +0 -1
@@ -1,517 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.createProxyConfiguration = exports.ProxyConfiguration = void 0;
4
- const tslib_1 = require("tslib");
5
- const consts_1 = require("@apify/consts");
6
- const ow_1 = (0, tslib_1.__importDefault)(require("ow"));
7
- const constants_1 = require("./constants");
8
- const utils_1 = require("./utils");
9
- const utils_request_1 = require("./utils_request");
10
- const utils_log_1 = (0, tslib_1.__importDefault)(require("./utils_log"));
11
- const configuration_1 = require("./configuration");
12
- // CONSTANTS
13
- const PROTOCOL = 'http';
14
- // https://docs.apify.com/proxy/datacenter-proxy#username-parameters
15
- const MAX_SESSION_ID_LENGTH = 50;
16
- const CHECK_ACCESS_REQUEST_TIMEOUT_MILLIS = 4000;
17
- const CHECK_ACCESS_MAX_ATTEMPTS = 2;
18
- /**
19
- * @callback ProxyConfigurationFunction
20
- * @param {string|number} sessionId
21
- * @returns {string}
22
- */
23
- /**
24
- * @typedef ProxyConfigurationOptions
25
- * @property {string} [password]
26
- * User's password for the proxy. By default, it is taken from the `APIFY_PROXY_PASSWORD`
27
- * environment variable, which is automatically set by the system when running the actors.
28
- * @property {string[]} [groups]
29
- * An array of proxy groups to be used by the [Apify Proxy](https://docs.apify.com/proxy).
30
- * If not provided, the proxy will select the groups automatically.
31
- * @property {string} [countryCode]
32
- * If set and relevant proxies are available in your Apify account, all proxied requests will
33
- * use IP addresses that are geolocated to the specified country. For example `GB` for IPs
34
- * from Great Britain. Note that online services often have their own rules for handling
35
- * geolocation and thus the country selection is a best attempt at geolocation, rather than
36
- * a guaranteed hit. This parameter is optional, by default, each proxied request is assigned
37
- * an IP address from a random country. The country code needs to be a two letter ISO country code. See the
38
- * [full list of available country codes](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2#Officially_assigned_code_elements).
39
- * This parameter is optional, by default, the proxy uses all available proxy servers from all countries.
40
- * on the Apify cloud, or when using the [Apify CLI](https://github.com/apify/apify-cli).
41
- * @property {string[]} [apifyProxyGroups]
42
- * Same option as `groups` which can be used to
43
- * configurate the proxy by UI input schema. You should use the `groups` option in your crawler code.
44
- * @property {string} [apifyProxyCountry]
45
- * Same option as `countryCode` which can be used to
46
- * configurate the proxy by UI input schema. You should use the `countryCode` option in your crawler code.
47
- * @property {string[]} [proxyUrls]
48
- * An array of custom proxy URLs to be rotated.
49
- * Custom proxies are not compatible with Apify Proxy and an attempt to use both
50
- * configuration options will cause an error to be thrown on initialize.
51
- * @property {ProxyConfigurationFunction} [newUrlFunction]
52
- * Custom function that allows you to generate the new proxy URL dynamically. It gets the `sessionId` as a parameter
53
- * and should always return stringified proxy URL.
54
- * This function is used to generate the URL when {@link ProxyConfiguration.newUrl} or {@link ProxyConfiguration.newProxyInfo} is called.
55
- */
56
- /**
57
- * The main purpose of the ProxyInfo object is to provide information
58
- * about the current proxy connection used by the crawler for the request.
59
- * Outside of crawlers, you can get this object by calling {@link ProxyConfiguration.newProxyInfo}.
60
- *
61
- * **Example usage:**
62
- *
63
- * ```javascript
64
- *
65
- * const proxyConfiguration = await Apify.createProxyConfiguration({
66
- * groups: ['GROUP1', 'GROUP2'] // List of Apify Proxy groups
67
- * countryCode: 'US',
68
- * });
69
- *
70
- * // Getting proxyInfo object by calling class method directly
71
- * const proxyInfo = proxyConfiguration.newProxyInfo();
72
- *
73
- * // In crawler
74
- * const crawler = new Apify.CheerioCrawler({
75
- * // ...
76
- * proxyConfiguration,
77
- * handlePageFunction: ({ proxyInfo }) => {
78
- * // Getting used proxy URL
79
- * const proxyUrl = proxyInfo.url;
80
- *
81
- * // Getting ID of used Session
82
- * const sessionIdentifier = proxyInfo.sessionId;
83
- * }
84
- * })
85
- *
86
- * ```
87
- * @typedef ProxyInfo
88
- * @property {string} [sessionId]
89
- * The identifier of used {@link Session}, if used.
90
- * @property {string} url
91
- * The URL of the proxy.
92
- * @property {string[]} groups
93
- * An array of proxy groups to be used by the [Apify Proxy](https://docs.apify.com/proxy).
94
- * If not provided, the proxy will select the groups automatically.
95
- * @property {string} [countryCode]
96
- * If set and relevant proxies are available in your Apify account, all proxied requests will
97
- * use IP addresses that are geolocated to the specified country. For example `GB` for IPs
98
- * from Great Britain. Note that online services often have their own rules for handling
99
- * geolocation and thus the country selection is a best attempt at geolocation, rather than
100
- * a guaranteed hit. This parameter is optional, by default, each proxied request is assigned
101
- * an IP address from a random country. The country code needs to be a two letter ISO country code. See the
102
- * [full list of available country codes](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2#Officially_assigned_code_elements).
103
- * This parameter is optional, by default, the proxy uses all available proxy servers from all countries.
104
- * @property {string} password
105
- * User's password for the proxy. By default, it is taken from the `APIFY_PROXY_PASSWORD`
106
- * environment variable, which is automatically set by the system when running the actors
107
- * on the Apify cloud, or when using the [Apify CLI](https://github.com/apify/apify-cli).
108
- * @property {string} hostname
109
- * Hostname of your proxy.
110
- * @property {string} port
111
- * Proxy port.
112
- */
113
- /**
114
- * Configures connection to a proxy server with the provided options. Proxy servers are used to prevent target websites from blocking
115
- * your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
116
- * them to use the selected proxies for all connections. You can get information about the currently used proxy by inspecting
117
- * the {@link ProxyInfo} property in your crawler's page function. There, you can inspect the proxy's URL and other attributes.
118
- *
119
- * The proxy servers are managed by [Apify Proxy](https://docs.apify.com/proxy). To be able to use Apify Proxy,
120
- * you need an Apify account and access to the selected proxies. If you provide no configuration option,
121
- * the proxies will be managed automatically using a smart algorithm.
122
- *
123
- * If you want to use your own proxies, use the {@link ProxyConfigurationOptions.proxyUrls} option. Your list of proxy URLs will
124
- * be rotated by the configuration if this option is provided.
125
- *
126
- * **Example usage:**
127
- *
128
- * ```javascript
129
- *
130
- * const proxyConfiguration = await Apify.createProxyConfiguration({
131
- * groups: ['GROUP1', 'GROUP2'] // List of Apify Proxy groups
132
- * countryCode: 'US',
133
- * });
134
- *
135
- * const crawler = new Apify.CheerioCrawler({
136
- * // ...
137
- * proxyConfiguration,
138
- * handlePageFunction: ({ proxyInfo }) => {
139
- * const usedProxyUrl = proxyInfo.url; // Getting the proxy URL
140
- * }
141
- * })
142
- *
143
- * ```
144
- * @hideconstructor
145
- */
146
- class ProxyConfiguration {
147
- /**
148
- * Configuration of proxy.
149
- *
150
- * @param {ProxyConfigurationOptions} [options] All `ProxyConfiguration` options.
151
- * @param {Configuration} [config]
152
- */
153
- constructor(options = {}, config = configuration_1.Configuration.getGlobalConfig()) {
154
- (0, ow_1.default)(options, ow_1.default.object.exactShape({
155
- groups: ow_1.default.optional.array.ofType(ow_1.default.string.matches(consts_1.APIFY_PROXY_VALUE_REGEX)),
156
- apifyProxyGroups: ow_1.default.optional.array.ofType(ow_1.default.string.matches(consts_1.APIFY_PROXY_VALUE_REGEX)),
157
- countryCode: ow_1.default.optional.string.matches(constants_1.COUNTRY_CODE_REGEX),
158
- apifyProxyCountry: ow_1.default.optional.string.matches(constants_1.COUNTRY_CODE_REGEX),
159
- proxyUrls: ow_1.default.optional.array.nonEmpty.ofType(ow_1.default.string.url),
160
- password: ow_1.default.optional.string,
161
- newUrlFunction: ow_1.default.optional.function,
162
- }));
163
- const { groups = [], apifyProxyGroups = [], countryCode, apifyProxyCountry, proxyUrls, password = config.get('proxyPassword'), newUrlFunction, } = options;
164
- const groupsToUse = groups.length ? groups : apifyProxyGroups;
165
- const countryCodeToUse = countryCode || apifyProxyCountry;
166
- const hostname = config.get('proxyHostname');
167
- const port = config.get('proxyPort');
168
- // Validation
169
- if (((proxyUrls || newUrlFunction) && ((groupsToUse.length) || countryCodeToUse))) {
170
- this._throwCannotCombineCustomWithApify();
171
- }
172
- if (proxyUrls && newUrlFunction)
173
- this._throwCannotCombineCustomMethods();
174
- this.groups = groupsToUse;
175
- this.countryCode = countryCodeToUse;
176
- this.password = password;
177
- this.hostname = hostname;
178
- this.port = port;
179
- this.nextCustomUrlIndex = 0;
180
- this.proxyUrls = proxyUrls;
181
- this.usedProxyUrls = new Map();
182
- this.newUrlFunction = newUrlFunction;
183
- this.usesApifyProxy = !this.proxyUrls && !this.newUrlFunction;
184
- this.log = utils_log_1.default.child({ prefix: 'ProxyConfiguration' });
185
- this.config = config;
186
- this.isManInTheMiddle = false;
187
- if (proxyUrls && proxyUrls.some((url) => url.includes('apify.com'))) {
188
- this.log.warning('Some Apify proxy features may work incorrectly. Please consider setting up Apify properties instead of `proxyUrls`.\n'
189
- + 'See https://sdk.apify.com/docs/guides/proxy-management#apify-proxy-configuration');
190
- }
191
- }
192
- /**
193
- * Loads proxy password if token is provided and checks access to Apify Proxy and provided proxy groups
194
- * if Apify Proxy configuration is used.
195
- * Also checks if country has access to Apify Proxy groups if the country code is provided.
196
- *
197
- * You should use the {@link Apify.createProxyConfiguration} function to create a pre-initialized
198
- * `ProxyConfiguration` instance instead of calling this manually.
199
- *
200
- * @returns {Promise<void>}
201
- */
202
- async initialize() {
203
- if (this.usesApifyProxy) {
204
- await this._setPasswordIfToken();
205
- await this._checkAccess();
206
- }
207
- }
208
- /**
209
- * This function creates a new {@link ProxyInfo} info object.
210
- * It is used by CheerioCrawler and PuppeteerCrawler to generate proxy URLs and also to allow the user to inspect
211
- * the currently used proxy via the handlePageFunction parameter: proxyInfo.
212
- * Use it if you want to work with a rich representation of a proxy URL.
213
- * If you need the URL string only, use {@link ProxyConfiguration.newUrl}.
214
- * @param {string|number} [sessionId]
215
- * Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
216
- * you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
217
- * When the provided sessionId is a number, it's converted to a string. Property sessionId of
218
- * {@link ProxyInfo} is always returned as a type string.
219
- *
220
- * All the HTTP requests going through the proxy with the same session identifier
221
- * will use the same target proxy server (i.e. the same IP address).
222
- * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
223
- * @return {ProxyInfo} represents information about used proxy and its configuration.
224
- */
225
- newProxyInfo(sessionId) {
226
- if (typeof sessionId === 'number')
227
- sessionId = `${sessionId}`;
228
- (0, ow_1.default)(sessionId, ow_1.default.optional.string.maxLength(MAX_SESSION_ID_LENGTH).matches(consts_1.APIFY_PROXY_VALUE_REGEX));
229
- const url = this.newUrl(sessionId);
230
- const { groups, countryCode, password, port, hostname } = this.usesApifyProxy ? this : new URL(url);
231
- return {
232
- sessionId,
233
- url,
234
- groups,
235
- countryCode,
236
- password,
237
- hostname,
238
- port,
239
- };
240
- }
241
- /**
242
- * Returns a new proxy URL based on provided configuration options and the `sessionId` parameter.
243
- * @param {string|number} [sessionId]
244
- * Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
245
- * you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
246
- * When the provided sessionId is a number, it's converted to a string.
247
- *
248
- * All the HTTP requests going through the proxy with the same session identifier
249
- * will use the same target proxy server (i.e. the same IP address).
250
- * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
251
- * @return {string} A string with a proxy URL, including authentication credentials and port number.
252
- * For example, `http://bob:password123@proxy.example.com:8000`
253
- */
254
- newUrl(sessionId) {
255
- if (typeof sessionId === 'number')
256
- sessionId = `${sessionId}`;
257
- (0, ow_1.default)(sessionId, ow_1.default.optional.string.maxLength(MAX_SESSION_ID_LENGTH).matches(consts_1.APIFY_PROXY_VALUE_REGEX));
258
- if (this.newUrlFunction) {
259
- return this._callNewUrlFunction(sessionId);
260
- }
261
- if (this.proxyUrls) {
262
- return this._handleCustomUrl(sessionId);
263
- }
264
- const username = this._getUsername(sessionId);
265
- const { password, hostname, port } = this;
266
- return `${PROTOCOL}://${username}:${password}@${hostname}:${port}`;
267
- }
268
- /**
269
- * Returns proxy username.
270
- * @param {string} [sessionId]
271
- * @return {string} the proxy username
272
- * @ignore
273
- * @protected
274
- * @internal
275
- */
276
- _getUsername(sessionId) {
277
- let username;
278
- const { groups, countryCode } = this;
279
- const parts = [];
280
- if (groups && groups.length) {
281
- parts.push(`groups-${groups.join('+')}`);
282
- }
283
- if (sessionId) {
284
- parts.push(`session-${sessionId}`);
285
- }
286
- if (countryCode) {
287
- parts.push(`country-${countryCode}`);
288
- }
289
- username = parts.join(',');
290
- if (parts.length === 0)
291
- username = 'auto';
292
- return username;
293
- }
294
- /**
295
- * Checks if Apify Token is provided in env
296
- * and gets the password via API and sets it to env
297
- * @returns {Promise<void>}
298
- * @ignore
299
- * @protected
300
- * @internal
301
- */
302
- async _setPasswordIfToken() {
303
- const token = this.config.get('token');
304
- if (token) {
305
- const { proxy: { password } } = await utils_1.apifyClient.user().get();
306
- if (this.password) {
307
- if (this.password !== password) {
308
- this.log.warning('The Apify Proxy password you provided belongs to'
309
- + ' a different user than the Apify token you are using. Are you sure this is correct?');
310
- }
311
- }
312
- else {
313
- this.password = password;
314
- }
315
- }
316
- if (!this.password) {
317
- throw new Error(`Apify Proxy password must be provided using options.password or the "${consts_1.ENV_VARS.PROXY_PASSWORD}" environment variable.`
318
- + `If you add the "${consts_1.ENV_VARS.TOKEN}" environment variable, the password will be automatically inferred.`);
319
- }
320
- }
321
- /**
322
- * Checks whether the user has access to the proxies specified in the provided ProxyConfigurationOptions.
323
- * If the check can not be made, it only prints a warning and allows the program to continue. This is to
324
- * prevent program crashes caused by short downtimes of Proxy.
325
- *
326
- * @returns {Promise<void>}
327
- * @protected
328
- * @ignore
329
- * @internal
330
- */
331
- async _checkAccess() {
332
- const status = await this._fetchStatus();
333
- if (status) {
334
- const { connected, connectionError, isManInTheMiddle } = status;
335
- this.isManInTheMiddle = isManInTheMiddle;
336
- if (!connected)
337
- this._throwApifyProxyConnectionError(connectionError);
338
- }
339
- else {
340
- this.log.warning('Apify Proxy access check timed out. Watch out for errors with status code 407. '
341
- + 'If you see some, it most likely means you don\'t have access to either all or some of the proxies you\'re trying to use.');
342
- }
343
- }
344
- /**
345
- * Apify Proxy can be down for a second or a minute, but this should not crash processes.
346
- *
347
- * @return {Promise<{ connected: boolean, connectionError: string } | undefined>}
348
- * @protected
349
- * @ignore
350
- * @internal
351
- */
352
- async _fetchStatus() {
353
- const requestOpts = {
354
- url: `${this.config.get('proxyStatusUrl')}/?format=json`,
355
- proxyUrl: this.newUrl(),
356
- timeout: { request: CHECK_ACCESS_REQUEST_TIMEOUT_MILLIS },
357
- responseType: 'json',
358
- };
359
- for (let attempt = 1; attempt <= CHECK_ACCESS_MAX_ATTEMPTS; attempt++) {
360
- try {
361
- const response = await (0, utils_request_1.requestAsBrowser)(requestOpts);
362
- return response.body;
363
- }
364
- catch (err) {
365
- // retry connection errors
366
- }
367
- }
368
- }
369
- /**
370
- * Handles custom url rotation with session
371
- * @param {string} [sessionId]
372
- * @returns {string} url
373
- * @protected
374
- * @ignore
375
- * @internal
376
- */
377
- _handleCustomUrl(sessionId) {
378
- let customUrlToUse;
379
- if (sessionId) {
380
- if (this.usedProxyUrls.has(sessionId)) {
381
- customUrlToUse = this.usedProxyUrls.get(sessionId);
382
- }
383
- else {
384
- customUrlToUse = this.proxyUrls[this.nextCustomUrlIndex++ % this.proxyUrls.length];
385
- this.usedProxyUrls.set(sessionId, customUrlToUse);
386
- }
387
- }
388
- else {
389
- customUrlToUse = this.proxyUrls[this.nextCustomUrlIndex++ % this.proxyUrls.length];
390
- }
391
- return customUrlToUse;
392
- }
393
- /**
394
- * Calls the custom newUrlFunction and checks format of its return value
395
- * @param {string} [sessionId]
396
- * @protected
397
- * @ignore
398
- * @internal
399
- */
400
- _callNewUrlFunction(sessionId) {
401
- let proxyUrl;
402
- try {
403
- proxyUrl = this.newUrlFunction(sessionId);
404
- new URL(proxyUrl); // eslint-disable-line no-new
405
- }
406
- catch (err) {
407
- this._throwNewUrlFunctionInvalid(err);
408
- }
409
- return proxyUrl;
410
- }
411
- /**
412
- * Throws invalid custom newUrlFunction return
413
- * @param {Error} err
414
- * @protected
415
- * @ignore
416
- * @internal
417
- */
418
- _throwNewUrlFunctionInvalid(err) {
419
- throw new Error(`The provided newUrlFunction did not return a valid URL.\nCause: ${err.message}`);
420
- }
421
- /**
422
- * Throws invalid proxy value error
423
- * @param {string} param
424
- * @param {string} value
425
- * @protected
426
- * @ignore
427
- * @internal
428
- */
429
- _throwInvalidProxyValueError(param, value) {
430
- throw new Error(`The provided proxy ${param} name "${value}" can only contain the following characters: 0-9, a-z, A-Z, ".", "_" and "~"`);
431
- }
432
- /**
433
- * Throws Apify Proxy is not connected
434
- * @protected
435
- * @ignore
436
- * @internal
437
- */
438
- _throwApifyProxyConnectionError(errorMessage) {
439
- throw new Error(errorMessage);
440
- }
441
- /**
442
- * Throws cannot combine custom proxies with Apify Proxy
443
- * @protected
444
- * @ignore
445
- * @internal
446
- */
447
- _throwCannotCombineCustomWithApify() {
448
- throw new Error('Cannot combine custom proxies with Apify Proxy!'
449
- + 'It is not allowed to set "options.proxyUrls" or "options.newUrlFunction" combined with '
450
- + '"options.groups" or "options.apifyProxyGroups" and "options.countryCode" or "options.apifyProxyCountry".');
451
- }
452
- /**
453
- * Throws cannot combine custom 2 custom methods
454
- * @protected
455
- * @ignore
456
- * @internal
457
- */
458
- _throwCannotCombineCustomMethods() {
459
- throw new Error('Cannot combine custom proxies "options.proxyUrls" with custom generating function "options.newUrlFunction".');
460
- }
461
- }
462
- exports.ProxyConfiguration = ProxyConfiguration;
463
- /**
464
- * Creates a proxy configuration and returns a promise resolving to an instance
465
- * of the {@link ProxyConfiguration} class that is already initialized.
466
- *
467
- * Configures connection to a proxy server with the provided options. Proxy servers are used to prevent target websites from blocking
468
- * your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
469
- * them to use the selected proxies for all connections.
470
- *
471
- * For more details and code examples, see the {@link ProxyConfiguration} class.
472
- *
473
- * ```javascript
474
- *
475
- * // Returns initialized proxy configuration class
476
- * const proxyConfiguration = await Apify.createProxyConfiguration({
477
- * groups: ['GROUP1', 'GROUP2'] // List of Apify proxy groups
478
- * countryCode: 'US'
479
- * });
480
- *
481
- * const crawler = new Apify.CheerioCrawler({
482
- * // ...
483
- * proxyConfiguration,
484
- * handlePageFunction: ({ proxyInfo }) => {
485
- * const usedProxyUrl = proxyInfo.url; // Getting the proxy URL
486
- * }
487
- * })
488
- *
489
- * ```
490
- *
491
- * For compatibility with existing Actor Input UI (Input Schema), this function
492
- * returns `undefined` when the following object is passed as `proxyConfigurationOptions`.
493
- *
494
- * ```
495
- * { useApifyProxy: false }
496
- * ```
497
- *
498
- * @param {ProxyConfigurationOptions} [proxyConfigurationOptions]
499
- * @returns {Promise<ProxyConfiguration|undefined>}
500
- * @memberof module:Apify
501
- * @name createProxyConfiguration
502
- * @function
503
- */
504
- const createProxyConfiguration = async (proxyConfigurationOptions = {}) => {
505
- // Compatibility fix for Input UI where proxy: None returns { useApifyProxy: false }
506
- // Without this, it would cause proxy to use the zero config / auto mode.
507
- const { useApifyProxy, ...options } = proxyConfigurationOptions;
508
- const dontUseApifyProxy = useApifyProxy === false;
509
- const dontUseCustomProxies = !proxyConfigurationOptions.proxyUrls;
510
- if (dontUseApifyProxy && dontUseCustomProxies)
511
- return undefined;
512
- const proxyConfiguration = new ProxyConfiguration(options);
513
- await proxyConfiguration.initialize();
514
- return proxyConfiguration;
515
- };
516
- exports.createProxyConfiguration = createProxyConfiguration;
517
- //# sourceMappingURL=proxy_configuration.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"proxy_configuration.js","sourceRoot":"","sources":["../src/proxy_configuration.js"],"names":[],"mappings":";;;;AAAA,0CAAkE;AAClE,yDAAoB;AACpB,2CAAiD;AACjD,mCAAsC;AACtC,mDAAmD;AACnD,yEAAqC;AACrC,mDAAgD;AAEhD,YAAY;AACZ,MAAM,QAAQ,GAAG,MAAM,CAAC;AACxB,oEAAoE;AACpE,MAAM,qBAAqB,GAAG,EAAE,CAAC;AACjC,MAAM,mCAAmC,GAAG,IAAK,CAAC;AAClD,MAAM,yBAAyB,GAAG,CAAC,CAAC;AAEpC;;;;GAIG;AAEH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AAEH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAwDG;AAEH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,MAAa,kBAAkB;IAC3B;;;;;OAKG;IACH,YAAY,OAAO,GAAG,EAAE,EAAE,MAAM,GAAG,6BAAa,CAAC,eAAe,EAAE;QAC9D,IAAA,YAAE,EAAC,OAAO,EAAE,YAAE,CAAC,MAAM,CAAC,UAAU,CAAC;YAC7B,MAAM,EAAE,YAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,YAAE,CAAC,MAAM,CAAC,OAAO,CAAC,gCAAuB,CAAC,CAAC;YAC5E,gBAAgB,EAAE,YAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,YAAE,CAAC,MAAM,CAAC,OAAO,CAAC,gCAAuB,CAAC,CAAC;YACtF,WAAW,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,8BAAkB,CAAC;YAC3D,iBAAiB,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,8BAAkB,CAAC;YACjE,SAAS,EAAE,YAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,YAAE,CAAC,MAAM,CAAC,GAAG,CAAC;YAC3D,QAAQ,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;YAC5B,cAAc,EAAE,YAAE,CAAC,QAAQ,CAAC,QAAQ;SACvC,CAAC,CAAC,CAAC;QAEJ,MAAM,EACF,MAAM,GAAG,EAAE,EACX,gBAAgB,GAAG,EAAE,EACrB,WAAW,EACX,iBAAiB,EACjB,SAAS,EACT,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,eAAe,CAAC,EACtC,cAAc,GACjB,GAAG,OAAO,CAAC;QAEZ,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,gBAAgB,CAAC;QAC9D,MAAM,gBAAgB,GAAG,WAAW,IAAI,iBAAiB,CAAC;QAC1D,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;QAC7C,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;QAErC,aAAa;QACb,IAAI,CAAC,CAAC,SAAS,IAAI,cAAc,CAAC,IAAI,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,IAAI,gBAAgB,CAAC,CAAC,EAAE;YAC/E,IAAI,CAAC,kCAAkC,EAAE,CAAC;SAC7C;QACD,IAAI,SAAS,IAAI,cAAc;YAAE,IAAI,CAAC,gCAAgC,EAAE,CAAC;QAEzE,IAAI,CAAC,MAAM,GAAG,WAAW,CAAC;QAC1B,IAAI,CAAC,WAAW,GAAG,gBAAgB,CAAC;QACpC,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,kBAAkB,GAAG,CAAC,CAAC;QAC5B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,aAAa,GAAG,IAAI,GAAG,EAAE,CAAC;QAC/B,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;QACrC,IAAI,CAAC,cAAc,GAAG,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC;QAC9D,IAAI,CAAC,GAAG,GAAG,mBAAU,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,oBAAoB,EAAE,CAAC,CAAC;QAC9D,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,gBAAgB,GAAG,KAAK,CAAC;QAE9B,IAAI,SAAS,IAAI,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,EAAE;YACjE,IAAI,CAAC,GAAG,CAAC,OAAO,CACZ,uHAAuH;kBACrH,kFAAkF,CACvF,CAAC;SACL;IACL,CAAC;IAED;;;;;;;;;OASG;IACH,KAAK,CAAC,UAAU;QACZ,IAAI,IAAI,CAAC,cAAc,EAAE;YACrB,MAAM,IAAI,CAAC,mBAAmB,EAAE,CAAC;YAEjC,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;SAC7B;IACL,CAAC;IAED;;;;;;;;;;;;;;;;OAgBG;IACH,YAAY,CAAC,SAAS;QAClB,IAAI,OAAO,SAAS,KAAK,QAAQ;YAAE,SAAS,GAAG,GAAG,SAAS,EAAE,CAAC;QAC9D,IAAA,YAAE,EAAC,SAAS,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,SAAS,CAAC,qBAAqB,CAAC,CAAC,OAAO,CAAC,gCAAuB,CAAC,CAAC,CAAC;QACpG,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAEnC,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAEpG,OAAO;YACH,SAAS;YACT,GAAG;YACH,MAAM;YACN,WAAW;YACX,QAAQ;YACR,QAAQ;YACR,IAAI;SACP,CAAC;IACN,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,MAAM,CAAC,SAAS;QACZ,IAAI,OAAO,SAAS,KAAK,QAAQ;YAAE,SAAS,GAAG,GAAG,SAAS,EAAE,CAAC;QAC9D,IAAA,YAAE,EAAC,SAAS,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,SAAS,CAAC,qBAAqB,CAAC,CAAC,OAAO,CAAC,gCAAuB,CAAC,CAAC,CAAC;QACpG,IAAI,IAAI,CAAC,cAAc,EAAE;YACrB,OAAO,IAAI,CAAC,mBAAmB,CAAC,SAAS,CAAC,CAAC;SAC9C;QACD,IAAI,IAAI,CAAC,SAAS,EAAE;YAChB,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;SAC3C;QACD,MAAM,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;QAC9C,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,GAAG,IAAI,CAAC;QAE1C,OAAO,GAAG,QAAQ,MAAM,QAAQ,IAAI,QAAQ,IAAI,QAAQ,IAAI,IAAI,EAAE,CAAC;IACvE,CAAC;IAED;;;;;;;OAOG;IACH,YAAY,CAAC,SAAS;QAClB,IAAI,QAAQ,CAAC;QACb,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,IAAI,CAAC;QACrC,MAAM,KAAK,GAAG,EAAE,CAAC;QAEjB,IAAI,MAAM,IAAI,MAAM,CAAC,MAAM,EAAE;YACzB,KAAK,CAAC,IAAI,CAAC,UAAU,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;SAC5C;QACD,IAAI,SAAS,EAAE;YACX,KAAK,CAAC,IAAI,CAAC,WAAW,SAAS,EAAE,CAAC,CAAC;SACtC;QACD,IAAI,WAAW,EAAE;YACb,KAAK,CAAC,IAAI,CAAC,WAAW,WAAW,EAAE,CAAC,CAAC;SACxC;QAED,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAE3B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,QAAQ,GAAG,MAAM,CAAC;QAE1C,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,mBAAmB;QACrB,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QACvC,IAAI,KAAK,EAAE;YACP,MAAM,EAAE,KAAK,EAAE,EAAE,QAAQ,EAAE,EAAE,GAAG,MAAM,mBAAW,CAAC,IAAI,EAAE,CAAC,GAAG,EAAE,CAAC;YAC/D,IAAI,IAAI,CAAC,QAAQ,EAAE;gBACf,IAAI,IAAI,CAAC,QAAQ,KAAK,QAAQ,EAAE;oBAC5B,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,kDAAkD;0BACjE,qFAAqF,CAAC,CAAC;iBAC5F;aACJ;iBAAM;gBACH,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;aAC5B;SACJ;QACD,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE;YAChB,MAAM,IAAI,KAAK,CAAC,wEAAwE,iBAAQ,CAAC,cAAc,yBAAyB;kBAClI,mBAAmB,iBAAQ,CAAC,KAAK,sEAAsE,CAAC,CAAC;SAClH;IACL,CAAC;IAED;;;;;;;;;OASG;IACH,KAAK,CAAC,YAAY;QACd,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;QACzC,IAAI,MAAM,EAAE;YACR,MAAM,EAAE,SAAS,EAAE,eAAe,EAAE,gBAAgB,EAAE,GAAG,MAAM,CAAC;YAChE,IAAI,CAAC,gBAAgB,GAAG,gBAAgB,CAAC;YAEzC,IAAI,CAAC,SAAS;gBAAE,IAAI,CAAC,+BAA+B,CAAC,eAAe,CAAC,CAAC;SACzE;aAAM;YACH,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,iFAAiF;kBAC5F,0HAA0H,CAAC,CAAC;SACrI;IACL,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,YAAY;QACd,MAAM,WAAW,GAAG;YAChB,GAAG,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,gBAAgB,CAAC,eAAe;YACxD,QAAQ,EAAE,IAAI,CAAC,MAAM,EAAE;YACvB,OAAO,EAAE,EAAE,OAAO,EAAE,mCAAmC,EAAE;YACzD,YAAY,EAAE,MAAM;SACvB,CAAC;QACF,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,yBAAyB,EAAE,OAAO,EAAE,EAAE;YACnE,IAAI;gBACA,MAAM,QAAQ,GAAG,MAAM,IAAA,gCAAgB,EAAC,WAAW,CAAC,CAAC;gBACrD,OAAO,QAAQ,CAAC,IAAI,CAAC;aACxB;YAAC,OAAO,GAAG,EAAE;gBACV,0BAA0B;aAC7B;SACJ;IACL,CAAC;IAED;;;;;;;OAOG;IACH,gBAAgB,CAAC,SAAS;QACtB,IAAI,cAAc,CAAC;QACnB,IAAI,SAAS,EAAE;YACX,IAAI,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE;gBACnC,cAAc,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;aACtD;iBAAM;gBACH,cAAc,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;gBACnF,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,SAAS,EAAE,cAAc,CAAC,CAAC;aACrD;SACJ;aAAM;YACH,cAAc,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;SACtF;QACD,OAAO,cAAc,CAAC;IAC1B,CAAC;IAED;;;;;;OAMG;IACH,mBAAmB,CAAC,SAAS;QACzB,IAAI,QAAQ,CAAC;QACb,IAAI;YACA,QAAQ,GAAG,IAAI,CAAC,cAAc,CAAC,SAAS,CAAC,CAAC;YAC1C,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,6BAA6B;SACnD;QAAC,OAAO,GAAG,EAAE;YACV,IAAI,CAAC,2BAA2B,CAAC,GAAG,CAAC,CAAC;SACzC;QACD,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;;;;;OAMG;IACH,2BAA2B,CAAC,GAAG;QAC3B,MAAM,IAAI,KAAK,CAAC,mEAAmE,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;IACtG,CAAC;IAED;;;;;;;OAOG;IACH,4BAA4B,CAAC,KAAK,EAAE,KAAK;QACrC,MAAM,IAAI,KAAK,CAAC,sBAAsB,KAAK,UAAU,KAAK,8EAA8E,CAAC,CAAC;IAC9I,CAAC;IAED;;;;;OAKG;IACH,+BAA+B,CAAC,YAAY;QACxC,MAAM,IAAI,KAAK,CAAC,YAAY,CAAC,CAAC;IAClC,CAAC;IAED;;;;;OAKG;IACH,kCAAkC;QAC9B,MAAM,IAAI,KAAK,CAAC,iDAAiD;cAC3D,yFAAyF;cACzF,0GAA0G,CAAC,CAAC;IACtH,CAAC;IAED;;;;;OAKG;IACH,gCAAgC;QAC5B,MAAM,IAAI,KAAK,CAAC,6GAA6G,CAAC,CAAC;IACnI,CAAC;CACJ;AAtVD,gDAsVC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAwCG;AACI,MAAM,wBAAwB,GAAG,KAAK,EAAE,yBAAyB,GAAG,EAAE,EAAE,EAAE;IAC7E,oFAAoF;IACpF,yEAAyE;IACzE,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,EAAE,GAAG,yBAAyB,CAAC;IAChE,MAAM,iBAAiB,GAAG,aAAa,KAAK,KAAK,CAAC;IAClD,MAAM,oBAAoB,GAAG,CAAC,yBAAyB,CAAC,SAAS,CAAC;IAClE,IAAI,iBAAiB,IAAI,oBAAoB;QAAE,OAAO,SAAS,CAAC;IAEhE,MAAM,kBAAkB,GAAG,IAAI,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAC3D,MAAM,kBAAkB,CAAC,UAAU,EAAE,CAAC;IAEtC,OAAO,kBAAkB,CAAC;AAC9B,CAAC,CAAC;AAZW,QAAA,wBAAwB,4BAYnC"}
@@ -1,86 +0,0 @@
1
- export default PseudoUrl;
2
- /**
3
- * Represents a pseudo-URL (PURL) - an URL pattern used by web crawlers
4
- * to specify which URLs should the crawler visit.
5
- * This class is used by the {@link utils#enqueueLinks} function.
6
- *
7
- * A PURL is simply a URL with special directives enclosed in `[]` brackets.
8
- * Currently, the only supported directive is `[RegExp]`,
9
- * which defines a JavaScript-style regular expression to match against the URL.
10
- *
11
- * The `PseudoUrl` class can be constructed either using a pseudo-URL string
12
- * or a regular expression (an instance of the `RegExp` object).
13
- * With a pseudo-URL string, the matching is always case-insensitive.
14
- * If you need case-sensitive matching, use an appropriate `RegExp` object.
15
- *
16
- * For example, a PURL `http://www.example.com/pages/[(\w|-)*]` will match all of the following URLs:
17
- *
18
- * - `http://www.example.com/pages/`
19
- * - `http://www.example.com/pages/my-awesome-page`
20
- * - `http://www.example.com/pages/something`
21
- *
22
- * Be careful to correctly escape special characters in the pseudo-URL string.
23
- * If either `[` or `]` is part of the normal query string, it must be encoded as `[\x5B]` or `[\x5D]`,
24
- * respectively. For example, the following PURL:
25
- * ```http
26
- * http://www.example.com/search?do[\x5B]load[\x5D]=1
27
- * ```
28
- * will match the URL:
29
- * ```http
30
- * http://www.example.com/search?do[load]=1
31
- * ```
32
- *
33
- * If the regular expression in the pseudo-URL contains a backslash character (\),
34
- * you need to escape it with another back backslash, as shown in the example below.
35
- *
36
- * **Example usage:**
37
- *
38
- * ```javascript
39
- * // Using a pseudo-URL string
40
- * const purl = new Apify.PseudoUrl('http://www.example.com/pages/[(\\w|-)+]', {
41
- * userData: { foo: 'bar' },
42
- * });
43
- *
44
- * // Using a regular expression
45
- * const purl2 = new Apify.PseudoUrl(/http:\/\/www\.example\.com\/pages\/(\w|-)+/);
46
- *
47
- * if (purl.matches('http://www.example.com/pages/my-awesome-page')) console.log('Match!');
48
- * ```
49
- */
50
- declare class PseudoUrl {
51
- /**
52
- * @param {(string|RegExp)} purl
53
- * A pseudo-URL string or a regular expression object.
54
- * Using a `RegExp` instance enables more granular control,
55
- * such as making the matching case sensitive.
56
- * @param {RequestOptions} requestTemplate
57
- * Options for the new {@link Request} instances created for matching URLs
58
- * by the {@link utils#enqueueLinks} function.
59
- */
60
- constructor(purl: (string | RegExp), requestTemplate?: RequestOptions);
61
- regex: RegExp;
62
- requestTemplate: RequestOptions;
63
- /**
64
- * Determines whether a URL matches this pseudo-URL pattern.
65
- *
66
- * @param {string} url URL to be matched.
67
- * @return {boolean} Returns `true` if given URL matches pseudo-URL.
68
- */
69
- matches(url: string): boolean;
70
- /**
71
- * Creates a Request object from a provided `requestTemplate` and a given URL
72
- * or an object that specifies ${@link Request} properties. In case of a collision
73
- * the properties will override the template, except for `userData`, which will
74
- * be merged together, with the `userData` property having preference over the template.
75
- * This enables dynamic overriding of the template.
76
- *
77
- * @param {(string|Object<string, *>)} urlOrProps
78
- * @return {Request}
79
- */
80
- createRequest(urlOrProps: (string | {
81
- [x: string]: any;
82
- })): Request;
83
- }
84
- import { RequestOptions } from "./request";
85
- import Request from "./request";
86
- //# sourceMappingURL=pseudo_url.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"pseudo_url.d.ts","sourceRoot":"","sources":["../src/pseudo_url.js"],"names":[],"mappings":";AAmDA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+CG;AACH;IACI;;;;;;;;OAQG;IACH,kBARW,CAAC,MAAM,GAAC,MAAM,CAAC,oBAIf,cAAc,EAiBxB;IARO,cAAiB;IAOrB,gCAAsC;IAG1C;;;;;OAKG;IACH,aAHW,MAAM,GACL,OAAO,CAIlB;IAED;;;;;;;;;OASG;IACH,0BAHW,CAAC,MAAM;YAAQ,MAAM;KAAI,CAAC,GACzB,OAAO,CAOlB;CACJ"}