@crawlee/core 3.8.3-beta.4 → 3.8.3-beta.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/core",
|
|
3
|
-
"version": "3.8.3-beta.
|
|
3
|
+
"version": "3.8.3-beta.6",
|
|
4
4
|
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=16.0.0"
|
|
@@ -59,9 +59,9 @@
|
|
|
59
59
|
"@apify/pseudo_url": "^2.0.30",
|
|
60
60
|
"@apify/timeout": "^0.3.0",
|
|
61
61
|
"@apify/utilities": "^2.7.10",
|
|
62
|
-
"@crawlee/memory-storage": "3.8.3-beta.
|
|
63
|
-
"@crawlee/types": "3.8.3-beta.
|
|
64
|
-
"@crawlee/utils": "3.8.3-beta.
|
|
62
|
+
"@crawlee/memory-storage": "3.8.3-beta.6",
|
|
63
|
+
"@crawlee/types": "3.8.3-beta.6",
|
|
64
|
+
"@crawlee/utils": "3.8.3-beta.6",
|
|
65
65
|
"@sapphire/async-queue": "^1.5.1",
|
|
66
66
|
"@types/tough-cookie": "^4.0.2",
|
|
67
67
|
"@vladfrangu/async_event_emitter": "^2.2.2",
|
|
@@ -85,5 +85,5 @@
|
|
|
85
85
|
}
|
|
86
86
|
}
|
|
87
87
|
},
|
|
88
|
-
"gitHead": "
|
|
88
|
+
"gitHead": "2fbf1b383ed38288483e3d8ef8c3e86bfd5dcc3d"
|
|
89
89
|
}
|
package/proxy_configuration.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { Request } from './request';
|
|
1
2
|
export interface ProxyConfigurationFunction {
|
|
2
3
|
(sessionId: string | number): string | Promise<string>;
|
|
3
4
|
}
|
|
@@ -14,6 +15,16 @@ export interface ProxyConfigurationOptions {
|
|
|
14
15
|
* This function is used to generate the URL when {@apilink ProxyConfiguration.newUrl} or {@apilink ProxyConfiguration.newProxyInfo} is called.
|
|
15
16
|
*/
|
|
16
17
|
newUrlFunction?: ProxyConfigurationFunction;
|
|
18
|
+
/**
|
|
19
|
+
* An array of custom proxy URLs to be rotated stratified in tiers.
|
|
20
|
+
* This is a more advanced version of `proxyUrls` that allows you to define a hierarchy of proxy URLs
|
|
21
|
+
* If everything goes well, all the requests will be sent through the first proxy URL in the list.
|
|
22
|
+
* Whenever the crawler encounters a problem with the current proxy on the given domain, it will switch to the higher tier for this domain.
|
|
23
|
+
* The crawler probes lower-level proxies at intervals to check if it can make the tier downshift.
|
|
24
|
+
*
|
|
25
|
+
* This feature is useful when you have a set of proxies with different performance characteristics (speed, price, antibot performance etc.) and you want to use the best one for each domain.
|
|
26
|
+
*/
|
|
27
|
+
tieredProxyUrls?: string[][];
|
|
17
28
|
}
|
|
18
29
|
/**
|
|
19
30
|
* The main purpose of the ProxyInfo object is to provide information
|
|
@@ -71,6 +82,36 @@ export interface ProxyInfo {
|
|
|
71
82
|
*/
|
|
72
83
|
port: number | string;
|
|
73
84
|
}
|
|
85
|
+
interface TieredProxyOptions {
|
|
86
|
+
request?: Request;
|
|
87
|
+
proxyTier?: number;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Internal class for tracking the proxy tier history for a specific domain.
|
|
91
|
+
*
|
|
92
|
+
* Predicts the best proxy tier for the next request based on the error history for different proxy tiers.
|
|
93
|
+
*/
|
|
94
|
+
declare class ProxyTierTracker {
|
|
95
|
+
private histogram;
|
|
96
|
+
private currentTier;
|
|
97
|
+
constructor(tieredProxyUrls: string[][]);
|
|
98
|
+
/**
|
|
99
|
+
* Processes a single step of the algorithm and updates the current tier prediction based on the error history.
|
|
100
|
+
*/
|
|
101
|
+
private processStep;
|
|
102
|
+
/**
|
|
103
|
+
* Increases the error score for the given proxy tier. This raises the chance of picking a different proxy tier for the subsequent requests.
|
|
104
|
+
*
|
|
105
|
+
* The error score is increased by 10 for the given tier. This means that this tier will be disadvantaged for the next 10 requests (every new request prediction decreases the error score by 1).
|
|
106
|
+
* @param tier The proxy tier to mark as problematic.
|
|
107
|
+
*/
|
|
108
|
+
addError(tier: number): void;
|
|
109
|
+
/**
|
|
110
|
+
* Returns the best proxy tier for the next request based on the error history for different proxy tiers.
|
|
111
|
+
* @returns The proxy tier prediction
|
|
112
|
+
*/
|
|
113
|
+
predictTier(): number;
|
|
114
|
+
}
|
|
74
115
|
/**
|
|
75
116
|
* Configures connection to a proxy server with the provided options. Proxy servers are used to prevent target websites from blocking
|
|
76
117
|
* your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
|
|
@@ -103,10 +144,12 @@ export declare class ProxyConfiguration {
|
|
|
103
144
|
isManInTheMiddle: boolean;
|
|
104
145
|
protected nextCustomUrlIndex: number;
|
|
105
146
|
protected proxyUrls?: string[];
|
|
147
|
+
protected tieredProxyUrls?: string[][];
|
|
106
148
|
protected usedProxyUrls: Map<string, string>;
|
|
107
149
|
protected newUrlFunction?: ProxyConfigurationFunction;
|
|
108
150
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
109
151
|
protected log: import("@apify/log").Log;
|
|
152
|
+
protected domainTiers: Map<string, ProxyTierTracker>;
|
|
110
153
|
/**
|
|
111
154
|
* Creates a {@apilink ProxyConfiguration} instance based on the provided options. Proxy servers are used to prevent target websites from
|
|
112
155
|
* blocking your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
|
|
@@ -145,7 +188,20 @@ export declare class ProxyConfiguration {
|
|
|
145
188
|
* The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
|
|
146
189
|
* @return Represents information about used proxy and its configuration.
|
|
147
190
|
*/
|
|
148
|
-
newProxyInfo(sessionId?: string | number): Promise<ProxyInfo>;
|
|
191
|
+
newProxyInfo(sessionId?: string | number, options?: TieredProxyOptions): Promise<ProxyInfo>;
|
|
192
|
+
/**
|
|
193
|
+
* Given a session identifier and a request / proxy tier, this function returns a new proxy URL based on the provided configuration options.
|
|
194
|
+
* @param _sessionId Session identifier
|
|
195
|
+
* @param options Options for the tiered proxy rotation
|
|
196
|
+
* @returns A string with a proxy URL.
|
|
197
|
+
*/
|
|
198
|
+
protected _handleTieredUrl(_sessionId: string, options?: TieredProxyOptions): string;
|
|
199
|
+
/**
|
|
200
|
+
* Given a `Request` object, this function returns the tier of the proxy that should be used for the request.
|
|
201
|
+
*
|
|
202
|
+
* This returns `null` if `tieredProxyUrls` option is not set.
|
|
203
|
+
*/
|
|
204
|
+
getProxyTier(request: Request): number | null;
|
|
149
205
|
/**
|
|
150
206
|
* Returns a new proxy URL based on provided configuration options and the `sessionId` parameter.
|
|
151
207
|
* @param [sessionId]
|
|
@@ -159,7 +215,7 @@ export declare class ProxyConfiguration {
|
|
|
159
215
|
* @return A string with a proxy URL, including authentication credentials and port number.
|
|
160
216
|
* For example, `http://bob:password123@proxy.example.com:8000`
|
|
161
217
|
*/
|
|
162
|
-
newUrl(sessionId?: string | number): Promise<string>;
|
|
218
|
+
newUrl(sessionId?: string | number, options?: TieredProxyOptions): Promise<string>;
|
|
163
219
|
/**
|
|
164
220
|
* Handles custom url rotation with session
|
|
165
221
|
*/
|
|
@@ -172,4 +228,5 @@ export declare class ProxyConfiguration {
|
|
|
172
228
|
protected _throwCannotCombineCustomMethods(): never;
|
|
173
229
|
protected _throwNoOptionsProvided(): never;
|
|
174
230
|
}
|
|
231
|
+
export {};
|
|
175
232
|
//# sourceMappingURL=proxy_configuration.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"proxy_configuration.d.ts","sourceRoot":"","sources":["../src/proxy_configuration.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,0BAA0B;IACvC,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CAC1D;AAED,MAAM,WAAW,yBAAyB;IACtC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IAErB;;;;OAIG;IACH,cAAc,CAAC,EAAE,0BAA0B,CAAC;
|
|
1
|
+
{"version":3,"file":"proxy_configuration.d.ts","sourceRoot":"","sources":["../src/proxy_configuration.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEzC,MAAM,WAAW,0BAA0B;IACvC,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CAC1D;AAED,MAAM,WAAW,yBAAyB;IACtC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IAErB;;;;OAIG;IACH,cAAc,CAAC,EAAE,0BAA0B,CAAC;IAE5C;;;;;;;;OAQG;IACH,eAAe,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;CAChC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACH,MAAM,WAAW,SAAS;IACtB;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;OAEG;IACH,GAAG,EAAE,MAAM,CAAC;IAEZ;;OAEG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,IAAI,EAAE,MAAM,GAAG,MAAM,CAAC;CACzB;AAED,UAAU,kBAAkB;IACxB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;GAIG;AACH,cAAM,gBAAgB;IAClB,OAAO,CAAC,SAAS,CAAW;IAC5B,OAAO,CAAC,WAAW,CAAS;gBAEhB,eAAe,EAAE,MAAM,EAAE,EAAE;IAKvC;;OAEG;IACH,OAAO,CAAC,WAAW;IAkBnB;;;;;OAKG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM;IAIrB;;;OAGG;IACH,WAAW;CAId;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,qBAAa,kBAAkB;IAC3B,gBAAgB,UAAS;IACzB,SAAS,CAAC,kBAAkB,SAAK;IACjC,SAAS,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IAC/B,SAAS,CAAC,eAAe,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;IACvC,SAAS,CAAC,aAAa,sBAA6B;IACpD,SAAS,CAAC,cAAc,CAAC,EAAE,0BAA0B,CAAC;IACtD,SAAS,CAAC,GAAG,2BAA+C;IAC5D,SAAS,CAAC,WAAW,gCAAuC;IAE5D;;;;;;;;;;;;;;;;;;;OAmBG;gBACS,OAAO,GAAE,yBAA8B;IAkBnD;;;;;;;;;;;;;;;;OAgBG;IACG,YAAY,CAAC,SAAS,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,SAAS,CAAC;IAiBjG;;;;;OAKG;IACH,SAAS,CAAC,gBAAgB,CAAC,UAAU,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,kBAAkB,GAAG,MAAM;IAmBpF;;;;OAIG;IACH,YAAY,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM,GAAG,IAAI;IAwB7C;;;;;;;;;;;;OAYG;IACG,MAAM,CAAC,SAAS,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,MAAM,CAAC;IAcxF;;OAEG;IACH,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,EAAE,MAAM,GAAG,MAAM;IAiBtD;;OAEG;cACa,mBAAmB,CAAC,SAAS,CAAC,EAAE,MAAM;IAYtD,SAAS,CAAC,2BAA2B,CAAC,GAAG,EAAE,KAAK,GAAI,KAAK;IAIzD,SAAS,CAAC,gCAAgC,IAAK,KAAK;IAIpD,SAAS,CAAC,uBAAuB,IAAK,KAAK;CAG9C"}
|
package/proxy_configuration.js
CHANGED
|
@@ -4,6 +4,65 @@ exports.ProxyConfiguration = void 0;
|
|
|
4
4
|
const tslib_1 = require("tslib");
|
|
5
5
|
const log_1 = tslib_1.__importDefault(require("@apify/log"));
|
|
6
6
|
const ow_1 = tslib_1.__importDefault(require("ow"));
|
|
7
|
+
/**
|
|
8
|
+
* Internal class for tracking the proxy tier history for a specific domain.
|
|
9
|
+
*
|
|
10
|
+
* Predicts the best proxy tier for the next request based on the error history for different proxy tiers.
|
|
11
|
+
*/
|
|
12
|
+
class ProxyTierTracker {
|
|
13
|
+
constructor(tieredProxyUrls) {
|
|
14
|
+
Object.defineProperty(this, "histogram", {
|
|
15
|
+
enumerable: true,
|
|
16
|
+
configurable: true,
|
|
17
|
+
writable: true,
|
|
18
|
+
value: void 0
|
|
19
|
+
});
|
|
20
|
+
Object.defineProperty(this, "currentTier", {
|
|
21
|
+
enumerable: true,
|
|
22
|
+
configurable: true,
|
|
23
|
+
writable: true,
|
|
24
|
+
value: void 0
|
|
25
|
+
});
|
|
26
|
+
this.histogram = tieredProxyUrls.map(() => 0);
|
|
27
|
+
this.currentTier = 0;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Processes a single step of the algorithm and updates the current tier prediction based on the error history.
|
|
31
|
+
*/
|
|
32
|
+
processStep() {
|
|
33
|
+
this.histogram.forEach((x, i) => {
|
|
34
|
+
if (this.currentTier === i)
|
|
35
|
+
return;
|
|
36
|
+
if (x > 0)
|
|
37
|
+
this.histogram[i]--;
|
|
38
|
+
});
|
|
39
|
+
const left = this.currentTier > 0 ? this.histogram[this.currentTier - 1] : Infinity;
|
|
40
|
+
const right = this.currentTier < this.histogram.length - 1 ? this.histogram[this.currentTier + 1] : Infinity;
|
|
41
|
+
if (this.histogram[this.currentTier] > Math.min(left, right)) {
|
|
42
|
+
this.currentTier = left <= right ? this.currentTier - 1 : this.currentTier + 1;
|
|
43
|
+
}
|
|
44
|
+
if (this.histogram[this.currentTier] === left) {
|
|
45
|
+
this.currentTier--;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Increases the error score for the given proxy tier. This raises the chance of picking a different proxy tier for the subsequent requests.
|
|
50
|
+
*
|
|
51
|
+
* The error score is increased by 10 for the given tier. This means that this tier will be disadvantaged for the next 10 requests (every new request prediction decreases the error score by 1).
|
|
52
|
+
* @param tier The proxy tier to mark as problematic.
|
|
53
|
+
*/
|
|
54
|
+
addError(tier) {
|
|
55
|
+
this.histogram[tier] += 10;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Returns the best proxy tier for the next request based on the error history for different proxy tiers.
|
|
59
|
+
* @returns The proxy tier prediction
|
|
60
|
+
*/
|
|
61
|
+
predictTier() {
|
|
62
|
+
this.processStep();
|
|
63
|
+
return this.currentTier;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
7
66
|
/**
|
|
8
67
|
* Configures connection to a proxy server with the provided options. Proxy servers are used to prevent target websites from blocking
|
|
9
68
|
* your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
|
|
@@ -72,6 +131,12 @@ class ProxyConfiguration {
|
|
|
72
131
|
writable: true,
|
|
73
132
|
value: void 0
|
|
74
133
|
});
|
|
134
|
+
Object.defineProperty(this, "tieredProxyUrls", {
|
|
135
|
+
enumerable: true,
|
|
136
|
+
configurable: true,
|
|
137
|
+
writable: true,
|
|
138
|
+
value: void 0
|
|
139
|
+
});
|
|
75
140
|
Object.defineProperty(this, "usedProxyUrls", {
|
|
76
141
|
enumerable: true,
|
|
77
142
|
configurable: true,
|
|
@@ -90,18 +155,26 @@ class ProxyConfiguration {
|
|
|
90
155
|
writable: true,
|
|
91
156
|
value: log_1.default.child({ prefix: 'ProxyConfiguration' })
|
|
92
157
|
});
|
|
158
|
+
Object.defineProperty(this, "domainTiers", {
|
|
159
|
+
enumerable: true,
|
|
160
|
+
configurable: true,
|
|
161
|
+
writable: true,
|
|
162
|
+
value: new Map()
|
|
163
|
+
});
|
|
93
164
|
const { validateRequired, ...rest } = options;
|
|
94
165
|
(0, ow_1.default)(rest, ow_1.default.object.exactShape({
|
|
95
166
|
proxyUrls: ow_1.default.optional.array.nonEmpty.ofType(ow_1.default.string.url),
|
|
96
167
|
newUrlFunction: ow_1.default.optional.function,
|
|
168
|
+
tieredProxyUrls: ow_1.default.optional.array.nonEmpty.ofType(ow_1.default.array.nonEmpty.ofType(ow_1.default.string.url)),
|
|
97
169
|
}));
|
|
98
|
-
const { proxyUrls, newUrlFunction } = options;
|
|
99
|
-
if (proxyUrls
|
|
170
|
+
const { proxyUrls, newUrlFunction, tieredProxyUrls } = options;
|
|
171
|
+
if ([proxyUrls, newUrlFunction, tieredProxyUrls].filter((x) => x).length > 1)
|
|
100
172
|
this._throwCannotCombineCustomMethods();
|
|
101
173
|
if (!proxyUrls && !newUrlFunction && validateRequired)
|
|
102
174
|
this._throwNoOptionsProvided();
|
|
103
175
|
this.proxyUrls = proxyUrls;
|
|
104
176
|
this.newUrlFunction = newUrlFunction;
|
|
177
|
+
this.tieredProxyUrls = tieredProxyUrls;
|
|
105
178
|
}
|
|
106
179
|
/**
|
|
107
180
|
* This function creates a new {@apilink ProxyInfo} info object.
|
|
@@ -120,10 +193,10 @@ class ProxyConfiguration {
|
|
|
120
193
|
* The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
|
|
121
194
|
* @return Represents information about used proxy and its configuration.
|
|
122
195
|
*/
|
|
123
|
-
async newProxyInfo(sessionId) {
|
|
196
|
+
async newProxyInfo(sessionId, options) {
|
|
124
197
|
if (typeof sessionId === 'number')
|
|
125
198
|
sessionId = `${sessionId}`;
|
|
126
|
-
const url = await this.newUrl(sessionId);
|
|
199
|
+
const url = await this.newUrl(sessionId, options);
|
|
127
200
|
const { username, password, port, hostname } = new URL(url);
|
|
128
201
|
return {
|
|
129
202
|
sessionId,
|
|
@@ -134,6 +207,49 @@ class ProxyConfiguration {
|
|
|
134
207
|
port: port,
|
|
135
208
|
};
|
|
136
209
|
}
|
|
210
|
+
/**
|
|
211
|
+
* Given a session identifier and a request / proxy tier, this function returns a new proxy URL based on the provided configuration options.
|
|
212
|
+
* @param _sessionId Session identifier
|
|
213
|
+
* @param options Options for the tiered proxy rotation
|
|
214
|
+
* @returns A string with a proxy URL.
|
|
215
|
+
*/
|
|
216
|
+
_handleTieredUrl(_sessionId, options) {
|
|
217
|
+
if (!this.tieredProxyUrls)
|
|
218
|
+
throw new Error('Tiered proxy URLs are not set');
|
|
219
|
+
if (!options || (!options?.request && options?.proxyTier === undefined)) {
|
|
220
|
+
const allProxyUrls = this.tieredProxyUrls.flat();
|
|
221
|
+
return allProxyUrls[this.nextCustomUrlIndex++ % allProxyUrls.length];
|
|
222
|
+
}
|
|
223
|
+
let tierPrediction = options.proxyTier;
|
|
224
|
+
if (typeof tierPrediction !== 'number') {
|
|
225
|
+
tierPrediction = this.getProxyTier(options.request);
|
|
226
|
+
}
|
|
227
|
+
const proxyTier = this.tieredProxyUrls[tierPrediction];
|
|
228
|
+
return proxyTier[this.nextCustomUrlIndex++ % proxyTier.length];
|
|
229
|
+
}
|
|
230
|
+
/**
|
|
231
|
+
* Given a `Request` object, this function returns the tier of the proxy that should be used for the request.
|
|
232
|
+
*
|
|
233
|
+
* This returns `null` if `tieredProxyUrls` option is not set.
|
|
234
|
+
*/
|
|
235
|
+
getProxyTier(request) {
|
|
236
|
+
var _a;
|
|
237
|
+
if (!this.tieredProxyUrls)
|
|
238
|
+
return null;
|
|
239
|
+
const domain = new URL(request.url).hostname;
|
|
240
|
+
if (!this.domainTiers.has(domain)) {
|
|
241
|
+
this.domainTiers.set(domain, new ProxyTierTracker(this.tieredProxyUrls));
|
|
242
|
+
}
|
|
243
|
+
(_a = request.userData).__crawlee ?? (_a.__crawlee = {});
|
|
244
|
+
const tracker = this.domainTiers.get(domain);
|
|
245
|
+
if (typeof request.userData.__crawlee.lastProxyTier === 'number') {
|
|
246
|
+
tracker.addError(request.userData.__crawlee.lastProxyTier);
|
|
247
|
+
}
|
|
248
|
+
const tierPrediction = tracker.predictTier();
|
|
249
|
+
request.userData.__crawlee.lastProxyTier = tierPrediction;
|
|
250
|
+
request.userData.__crawlee.forefront = true;
|
|
251
|
+
return tierPrediction;
|
|
252
|
+
}
|
|
137
253
|
/**
|
|
138
254
|
* Returns a new proxy URL based on provided configuration options and the `sessionId` parameter.
|
|
139
255
|
* @param [sessionId]
|
|
@@ -147,12 +263,15 @@ class ProxyConfiguration {
|
|
|
147
263
|
* @return A string with a proxy URL, including authentication credentials and port number.
|
|
148
264
|
* For example, `http://bob:password123@proxy.example.com:8000`
|
|
149
265
|
*/
|
|
150
|
-
async newUrl(sessionId) {
|
|
266
|
+
async newUrl(sessionId, options) {
|
|
151
267
|
if (typeof sessionId === 'number')
|
|
152
268
|
sessionId = `${sessionId}`;
|
|
153
269
|
if (this.newUrlFunction) {
|
|
154
270
|
return this._callNewUrlFunction(sessionId);
|
|
155
271
|
}
|
|
272
|
+
if (this.tieredProxyUrls) {
|
|
273
|
+
return this._handleTieredUrl(sessionId ?? Math.random().toString().slice(2, 6), options);
|
|
274
|
+
}
|
|
156
275
|
return this._handleCustomUrl(sessionId);
|
|
157
276
|
}
|
|
158
277
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"proxy_configuration.js","sourceRoot":"","sources":["../src/proxy_configuration.ts"],"names":[],"mappings":";;;;AAAA,6DAA6B;AAE7B,oDAAoB;
|
|
1
|
+
{"version":3,"file":"proxy_configuration.js","sourceRoot":"","sources":["../src/proxy_configuration.ts"],"names":[],"mappings":";;;;AAAA,6DAA6B;AAE7B,oDAAoB;AAsGpB;;;;GAIG;AACH,MAAM,gBAAgB;IAIlB,YAAY,eAA2B;QAH/B;;;;;WAAoB;QACpB;;;;;WAAoB;QAGxB,IAAI,CAAC,SAAS,GAAG,eAAe,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;QAC9C,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC;IACzB,CAAC;IAED;;OAEG;IACK,WAAW;QACf,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAC5B,IAAI,IAAI,CAAC,WAAW,KAAK,CAAC;gBAAE,OAAO;YACnC,IAAI,CAAC,GAAG,CAAC;gBAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;QACnC,CAAC,CAAC,CAAC;QAEH,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;QACpF,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;QAE7G,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,CAAC;YAC3D,IAAI,CAAC,WAAW,GAAG,IAAI,IAAI,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC;QACnF,CAAC;QAED,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,KAAK,IAAI,EAAE,CAAC;YAC5C,IAAI,CAAC,WAAW,EAAE,CAAC;QACvB,CAAC;IACL,CAAC;IAED;;;;;OAKG;IACH,QAAQ,CAAC,IAAY;QACjB,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;IAC/B,CAAC;IAED;;;OAGG;IACH,WAAW;QACP,IAAI,CAAC,WAAW,EAAE,CAAC;QACnB,OAAO,IAAI,CAAC,WAAW,CAAC;IAC5B,CAAC;CACJ;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,MAAa,kBAAkB;IAU3B;;;;;;;;;;;;;;;;;;;OAmBG;IACH,YAAY,UAAqC,EAAE;QA7BnD;;;;mBAAmB,KAAK;WAAC;QACf;;;;mBAAqB,CAAC;WAAC;QACvB;;;;;WAAqB;QACrB;;;;;WAA6B;QAC7B;;;;mBAAgB,IAAI,GAAG,EAAkB;WAAC;QAC1C;;;;;WAA4C;QAC5C;;;;mBAAM,aAAG,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,oBAAoB,EAAE,CAAC;WAAC;QAClD;;;;mBAAc,IAAI,GAAG,EAA4B;WAAC;QAuBxD,MAAM,EAAE,gBAAgB,EAAE,GAAG,IAAI,EAAE,GAAG,OAAqB,CAAC;QAC5D,IAAA,YAAE,EAAC,IAAI,EAAE,YAAE,CAAC,MAAM,CAAC,UAAU,CAAC;YAC1B,SAAS,EAAE,YAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,YAAE,CAAC,MAAM,CAAC,GAAG,CAAC;YAC3D,cAAc,EAAE,YAAE,CAAC,QAAQ,CAAC,QAAQ;YACpC,eAAe,EAAE,YAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,YAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,YAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;SAC9F,CAAC,CAAC,CAAC;QAEJ,MAAM,EAAE,SAAS,EAAE,cAAc,EAAE,eAAe,EAAE,GAAG,OAAO,CAAC;QAE/D,IAAI,CAAC,SAAS,EAAE,cAAc,EAAE,eAAe,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC;YAAE,IAAI,CAAC,gCAAgC,EAAE,CAAC;QACtH,IAAI,CAAC,SAAS,IAAI,CAAC,cAAc,IAAI,gBAAgB;YAAE,IAAI,CAAC,uBAAuB,EAAE,CAAC;QAEtF,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;QACrC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IAC3C,CAAC;IAED;;;;;;;;;;;;;;;;OAgBG;IACH,KAAK,CAAC,YAAY,CAAC,SAA2B,EAAE,OAA4B;QACxE,IAAI,OAAO,SAAS,KAAK,QAAQ;YAAE,SAAS,GAAG,GAAG,SAAS,EAAE,CAAC;QAE9D,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QAElD,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAE5D,OAAO;YACH,SAAS;YACT,GAAG;YACH,QAAQ;YACR,QAAQ;YACR,QAAQ;YACR,IAAI,EAAE,IAAK;SACd,CAAC;IACN,CAAC;IAED;;;;;OAKG;IACO,gBAAgB,CAAC,UAAkB,EAAE,OAA4B;QACvE,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,+BAA+B,CAAC,CAAC;QAE5E,IAAI,CAAC,OAAO,IAAI,CAAC,CAAC,OAAO,EAAE,OAAO,IAAI,OAAO,EAAE,SAAS,KAAK,SAAS,CAAC,EAAE,CAAC;YACtE,MAAM,YAAY,GAAG,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;YACjD,OAAO,YAAY,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;QACzE,CAAC;QAED,IAAI,cAAc,GAAG,OAAO,CAAC,SAAU,CAAC;QAExC,IAAI,OAAO,cAAc,KAAK,QAAQ,EAAE,CAAC;YACrC,cAAc,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,OAAQ,CAAE,CAAC;QAC1D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,eAAgB,CAAC,cAAc,CAAC,CAAC;QAExD,OAAO,SAAS,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;IACnE,CAAC;IAED;;;;OAIG;IACH,YAAY,CAAC,OAAgB;;QACzB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,OAAO,IAAI,CAAC;QAEvC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;QAC7C,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YAChC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,gBAAgB,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC;QAC7E,CAAC;QAED,MAAA,OAAO,CAAC,QAAQ,EAAC,SAAS,QAAT,SAAS,GAAK,EAAE,EAAC;QAElC,MAAM,OAAO,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC;QAE9C,IAAI,OAAO,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,aAAa,KAAK,QAAQ,EAAE,CAAC;YAC/D,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;QAC/D,CAAC;QAED,MAAM,cAAc,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;QAE7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,aAAa,GAAG,cAAc,CAAC;QAC1D,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,SAAS,GAAG,IAAI,CAAC;QAE5C,OAAO,cAAc,CAAC;IAC1B,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,KAAK,CAAC,MAAM,CAAC,SAA2B,EAAE,OAA4B;QAClE,IAAI,OAAO,SAAS,KAAK,QAAQ;YAAE,SAAS,GAAG,GAAG,SAAS,EAAE,CAAC;QAE9D,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACtB,OAAO,IAAI,CAAC,mBAAmB,CAAC,SAAS,CAAE,CAAC;QAChD,CAAC;QAED,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACvB,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;QAC7F,CAAC;QAED,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;IAC5C,CAAC;IAED;;OAEG;IACO,gBAAgB,CAAC,SAAkB;QACzC,IAAI,cAAsB,CAAC;QAE3B,IAAI,CAAC,SAAS,EAAE,CAAC;YACb,OAAO,IAAI,CAAC,SAAU,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,IAAI,CAAC,SAAU,CAAC,MAAM,CAAC,CAAC;QAC/E,CAAC;QAED,IAAI,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;YACpC,cAAc,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,SAAS,CAAE,CAAC;QACxD,CAAC;aAAM,CAAC;YACJ,cAAc,GAAG,IAAI,CAAC,SAAU,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,IAAI,CAAC,SAAU,CAAC,MAAM,CAAC,CAAC;YACrF,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,SAAS,EAAE,cAAc,CAAC,CAAC;QACtD,CAAC;QAED,OAAO,cAAc,CAAC;IAC1B,CAAC;IAED;;OAEG;IACO,KAAK,CAAC,mBAAmB,CAAC,SAAkB;QAClD,IAAI,QAAgB,CAAC;QAErB,IAAI,CAAC;YACD,QAAQ,GAAG,MAAM,IAAI,CAAC,cAAe,CAAC,SAAU,CAAC,CAAC;YAClD,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,6BAA6B;YAChD,OAAO,QAAQ,CAAC;QACpB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,IAAI,CAAC,2BAA2B,CAAC,GAAY,CAAC,CAAC;QACnD,CAAC;IACL,CAAC;IAES,2BAA2B,CAAC,GAAU;QAC5C,MAAM,IAAI,KAAK,CAAC,mEAAmE,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;IACtG,CAAC;IAES,gCAAgC;QACtC,MAAM,IAAI,KAAK,CAAC,6GAA6G,CAAC,CAAC;IACnI,CAAC;IAES,uBAAuB;QAC7B,MAAM,IAAI,KAAK,CAAC,8EAA8E,CAAC,CAAC;IACpG,CAAC;CACJ;AAjND,gDAiNC"}
|