rezo 1.0.41 → 1.0.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/curl.cjs +143 -32
- package/dist/adapters/curl.js +143 -32
- package/dist/adapters/entries/curl.d.ts +65 -0
- package/dist/adapters/entries/fetch.d.ts +65 -0
- package/dist/adapters/entries/http.d.ts +65 -0
- package/dist/adapters/entries/http2.d.ts +65 -0
- package/dist/adapters/entries/react-native.d.ts +65 -0
- package/dist/adapters/entries/xhr.d.ts +65 -0
- package/dist/adapters/fetch.cjs +98 -12
- package/dist/adapters/fetch.js +98 -12
- package/dist/adapters/http.cjs +26 -14
- package/dist/adapters/http.js +26 -14
- package/dist/adapters/http2.cjs +756 -227
- package/dist/adapters/http2.js +756 -227
- package/dist/adapters/index.cjs +6 -6
- package/dist/adapters/xhr.cjs +94 -2
- package/dist/adapters/xhr.js +94 -2
- package/dist/cache/dns-cache.cjs +5 -3
- package/dist/cache/dns-cache.js +5 -3
- package/dist/cache/file-cacher.cjs +7 -1
- package/dist/cache/file-cacher.js +7 -1
- package/dist/cache/index.cjs +15 -13
- package/dist/cache/index.js +1 -0
- package/dist/cache/navigation-history.cjs +298 -0
- package/dist/cache/navigation-history.js +296 -0
- package/dist/cache/url-store.cjs +7 -1
- package/dist/cache/url-store.js +7 -1
- package/dist/core/rezo.cjs +7 -0
- package/dist/core/rezo.js +7 -0
- package/dist/crawler.d.ts +196 -11
- package/dist/entries/crawler.cjs +5 -5
- package/dist/index.cjs +27 -24
- package/dist/index.d.ts +73 -0
- package/dist/index.js +1 -0
- package/dist/internal/agents/base.cjs +113 -0
- package/dist/internal/agents/base.js +110 -0
- package/dist/internal/agents/http-proxy.cjs +89 -0
- package/dist/internal/agents/http-proxy.js +86 -0
- package/dist/internal/agents/https-proxy.cjs +176 -0
- package/dist/internal/agents/https-proxy.js +173 -0
- package/dist/internal/agents/index.cjs +10 -0
- package/dist/internal/agents/index.js +5 -0
- package/dist/internal/agents/socks-client.cjs +571 -0
- package/dist/internal/agents/socks-client.js +567 -0
- package/dist/internal/agents/socks-proxy.cjs +75 -0
- package/dist/internal/agents/socks-proxy.js +72 -0
- package/dist/platform/browser.d.ts +65 -0
- package/dist/platform/bun.d.ts +65 -0
- package/dist/platform/deno.d.ts +65 -0
- package/dist/platform/node.d.ts +65 -0
- package/dist/platform/react-native.d.ts +65 -0
- package/dist/platform/worker.d.ts +65 -0
- package/dist/plugin/crawler-options.cjs +1 -1
- package/dist/plugin/crawler-options.js +1 -1
- package/dist/plugin/crawler.cjs +192 -1
- package/dist/plugin/crawler.js +192 -1
- package/dist/plugin/index.cjs +36 -36
- package/dist/proxy/index.cjs +18 -16
- package/dist/proxy/index.js +17 -12
- package/dist/queue/index.cjs +8 -8
- package/dist/responses/buildError.cjs +11 -2
- package/dist/responses/buildError.js +11 -2
- package/dist/responses/universal/index.cjs +11 -11
- package/dist/utils/agent-pool.cjs +1 -17
- package/dist/utils/agent-pool.js +1 -17
- package/dist/utils/curl.cjs +317 -0
- package/dist/utils/curl.js +314 -0
- package/package.json +1 -1
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import * as net from "node:net";
|
|
2
|
+
import * as tls from "node:tls";
|
|
3
|
+
import { Agent } from './base.js';
|
|
4
|
+
import { SocksClient } from './socks-client.js';
|
|
5
|
+
function parseSocksURL(url) {
|
|
6
|
+
let type;
|
|
7
|
+
switch (url.protocol.replace(":", "")) {
|
|
8
|
+
case "socks4":
|
|
9
|
+
type = 4;
|
|
10
|
+
break;
|
|
11
|
+
case "socks4a":
|
|
12
|
+
type = 4;
|
|
13
|
+
break;
|
|
14
|
+
case "socks5":
|
|
15
|
+
case "socks":
|
|
16
|
+
case "socks5h":
|
|
17
|
+
default:
|
|
18
|
+
type = 5;
|
|
19
|
+
break;
|
|
20
|
+
}
|
|
21
|
+
const host = url.hostname;
|
|
22
|
+
const port = url.port ? parseInt(url.port, 10) : 1080;
|
|
23
|
+
const userId = url.username ? decodeURIComponent(url.username) : undefined;
|
|
24
|
+
const password = url.password ? decodeURIComponent(url.password) : undefined;
|
|
25
|
+
return { host, port, type, userId, password };
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export class SocksProxyAgent extends Agent {
|
|
29
|
+
static protocols = [
|
|
30
|
+
"socks",
|
|
31
|
+
"socks4",
|
|
32
|
+
"socks4a",
|
|
33
|
+
"socks5",
|
|
34
|
+
"socks5h"
|
|
35
|
+
];
|
|
36
|
+
proxy;
|
|
37
|
+
tlsConnectionOptions;
|
|
38
|
+
timeout;
|
|
39
|
+
constructor(proxy, opts) {
|
|
40
|
+
super(opts);
|
|
41
|
+
const url = typeof proxy === "string" ? new URL(proxy) : proxy;
|
|
42
|
+
this.proxy = parseSocksURL(url);
|
|
43
|
+
this.timeout = opts?.timeout ?? null;
|
|
44
|
+
this.tlsConnectionOptions = opts ?? {};
|
|
45
|
+
}
|
|
46
|
+
async connect(_req, opts) {
|
|
47
|
+
const { host, port } = opts;
|
|
48
|
+
if (!host) {
|
|
49
|
+
throw new Error('No "host" provided');
|
|
50
|
+
}
|
|
51
|
+
const socksOpts = {
|
|
52
|
+
proxy: this.proxy,
|
|
53
|
+
destination: { host, port },
|
|
54
|
+
command: "connect"
|
|
55
|
+
};
|
|
56
|
+
if (this.timeout !== null) {
|
|
57
|
+
socksOpts.timeout = this.timeout;
|
|
58
|
+
}
|
|
59
|
+
const { socket } = await SocksClient.createConnection(socksOpts);
|
|
60
|
+
if (opts.secureEndpoint) {
|
|
61
|
+
const servername = opts.servername ?? host;
|
|
62
|
+
const tlsSocket = tls.connect({
|
|
63
|
+
...this.tlsConnectionOptions,
|
|
64
|
+
socket,
|
|
65
|
+
servername: !net.isIP(servername) ? servername : undefined
|
|
66
|
+
});
|
|
67
|
+
return tlsSocket;
|
|
68
|
+
}
|
|
69
|
+
return socket;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
export default SocksProxyAgent;
|
|
@@ -4355,6 +4355,71 @@ export declare class Rezo {
|
|
|
4355
4355
|
* @see {@link cookieJar} - Access the underlying RezoCookieJar for more control
|
|
4356
4356
|
*/
|
|
4357
4357
|
clearCookies(): void;
|
|
4358
|
+
/**
|
|
4359
|
+
* Convert a Rezo request configuration to a cURL command string.
|
|
4360
|
+
*
|
|
4361
|
+
* Generates a valid cURL command that can be executed in a terminal to
|
|
4362
|
+
* reproduce the same HTTP request. Useful for:
|
|
4363
|
+
* - Debugging and sharing requests
|
|
4364
|
+
* - Documentation and examples
|
|
4365
|
+
* - Testing requests outside of Node.js
|
|
4366
|
+
* - Exporting requests to other tools
|
|
4367
|
+
*
|
|
4368
|
+
* @param config - Request configuration object
|
|
4369
|
+
* @returns A cURL command string
|
|
4370
|
+
*
|
|
4371
|
+
* @example
|
|
4372
|
+
* ```typescript
|
|
4373
|
+
* const curl = Rezo.toCurl({
|
|
4374
|
+
* url: 'https://api.example.com/users',
|
|
4375
|
+
* method: 'POST',
|
|
4376
|
+
* headers: { 'Content-Type': 'application/json' },
|
|
4377
|
+
* body: { name: 'John', email: 'john@example.com' }
|
|
4378
|
+
* });
|
|
4379
|
+
* // Output: curl -X POST -H 'content-type: application/json' --data-raw '{"name":"John","email":"john@example.com"}' -L --compressed 'https://api.example.com/users'
|
|
4380
|
+
* ```
|
|
4381
|
+
*/
|
|
4382
|
+
static toCurl(config: RezoRequestConfig | RezoRequestOptions): string;
|
|
4383
|
+
/**
|
|
4384
|
+
* Parse a cURL command string into a Rezo request configuration.
|
|
4385
|
+
*
|
|
4386
|
+
* Converts a cURL command into a configuration object that can be
|
|
4387
|
+
* passed directly to Rezo request methods. Useful for:
|
|
4388
|
+
* - Importing requests from browser DevTools
|
|
4389
|
+
* - Converting curl examples from API documentation
|
|
4390
|
+
* - Migrating scripts from curl to Rezo
|
|
4391
|
+
*
|
|
4392
|
+
* Supports common cURL options:
|
|
4393
|
+
* - `-X, --request` - HTTP method
|
|
4394
|
+
* - `-H, --header` - Request headers
|
|
4395
|
+
* - `-d, --data, --data-raw, --data-binary` - Request body
|
|
4396
|
+
* - `-u, --user` - Basic authentication
|
|
4397
|
+
* - `-x, --proxy` - Proxy configuration
|
|
4398
|
+
* - `--socks5, --socks4` - SOCKS proxy
|
|
4399
|
+
* - `-L, --location` - Follow redirects
|
|
4400
|
+
* - `--max-redirs` - Maximum redirects
|
|
4401
|
+
* - `--max-time` - Request timeout
|
|
4402
|
+
* - `-k, --insecure` - Skip TLS verification
|
|
4403
|
+
* - `-A, --user-agent` - User agent header
|
|
4404
|
+
*
|
|
4405
|
+
* @param curlCommand - A cURL command string
|
|
4406
|
+
* @returns A request configuration object
|
|
4407
|
+
*
|
|
4408
|
+
* @example
|
|
4409
|
+
* ```typescript
|
|
4410
|
+
* // From browser DevTools "Copy as cURL"
|
|
4411
|
+
* const config = Rezo.fromCurl(`
|
|
4412
|
+
* curl 'https://api.example.com/data' \\
|
|
4413
|
+
* -H 'Authorization: Bearer token123' \\
|
|
4414
|
+
* -H 'Content-Type: application/json'
|
|
4415
|
+
* `);
|
|
4416
|
+
*
|
|
4417
|
+
* // Use with Rezo
|
|
4418
|
+
* const rezo = new Rezo();
|
|
4419
|
+
* const response = await rezo.request(config);
|
|
4420
|
+
* ```
|
|
4421
|
+
*/
|
|
4422
|
+
static fromCurl(curlCommand: string): RezoRequestOptions;
|
|
4358
4423
|
}
|
|
4359
4424
|
/**
|
|
4360
4425
|
* Extended Rezo instance with Axios-compatible static helpers.
|
package/dist/platform/bun.d.ts
CHANGED
|
@@ -4355,6 +4355,71 @@ export declare class Rezo {
|
|
|
4355
4355
|
* @see {@link cookieJar} - Access the underlying RezoCookieJar for more control
|
|
4356
4356
|
*/
|
|
4357
4357
|
clearCookies(): void;
|
|
4358
|
+
/**
|
|
4359
|
+
* Convert a Rezo request configuration to a cURL command string.
|
|
4360
|
+
*
|
|
4361
|
+
* Generates a valid cURL command that can be executed in a terminal to
|
|
4362
|
+
* reproduce the same HTTP request. Useful for:
|
|
4363
|
+
* - Debugging and sharing requests
|
|
4364
|
+
* - Documentation and examples
|
|
4365
|
+
* - Testing requests outside of Node.js
|
|
4366
|
+
* - Exporting requests to other tools
|
|
4367
|
+
*
|
|
4368
|
+
* @param config - Request configuration object
|
|
4369
|
+
* @returns A cURL command string
|
|
4370
|
+
*
|
|
4371
|
+
* @example
|
|
4372
|
+
* ```typescript
|
|
4373
|
+
* const curl = Rezo.toCurl({
|
|
4374
|
+
* url: 'https://api.example.com/users',
|
|
4375
|
+
* method: 'POST',
|
|
4376
|
+
* headers: { 'Content-Type': 'application/json' },
|
|
4377
|
+
* body: { name: 'John', email: 'john@example.com' }
|
|
4378
|
+
* });
|
|
4379
|
+
* // Output: curl -X POST -H 'content-type: application/json' --data-raw '{"name":"John","email":"john@example.com"}' -L --compressed 'https://api.example.com/users'
|
|
4380
|
+
* ```
|
|
4381
|
+
*/
|
|
4382
|
+
static toCurl(config: RezoRequestConfig | RezoRequestOptions): string;
|
|
4383
|
+
/**
|
|
4384
|
+
* Parse a cURL command string into a Rezo request configuration.
|
|
4385
|
+
*
|
|
4386
|
+
* Converts a cURL command into a configuration object that can be
|
|
4387
|
+
* passed directly to Rezo request methods. Useful for:
|
|
4388
|
+
* - Importing requests from browser DevTools
|
|
4389
|
+
* - Converting curl examples from API documentation
|
|
4390
|
+
* - Migrating scripts from curl to Rezo
|
|
4391
|
+
*
|
|
4392
|
+
* Supports common cURL options:
|
|
4393
|
+
* - `-X, --request` - HTTP method
|
|
4394
|
+
* - `-H, --header` - Request headers
|
|
4395
|
+
* - `-d, --data, --data-raw, --data-binary` - Request body
|
|
4396
|
+
* - `-u, --user` - Basic authentication
|
|
4397
|
+
* - `-x, --proxy` - Proxy configuration
|
|
4398
|
+
* - `--socks5, --socks4` - SOCKS proxy
|
|
4399
|
+
* - `-L, --location` - Follow redirects
|
|
4400
|
+
* - `--max-redirs` - Maximum redirects
|
|
4401
|
+
* - `--max-time` - Request timeout
|
|
4402
|
+
* - `-k, --insecure` - Skip TLS verification
|
|
4403
|
+
* - `-A, --user-agent` - User agent header
|
|
4404
|
+
*
|
|
4405
|
+
* @param curlCommand - A cURL command string
|
|
4406
|
+
* @returns A request configuration object
|
|
4407
|
+
*
|
|
4408
|
+
* @example
|
|
4409
|
+
* ```typescript
|
|
4410
|
+
* // From browser DevTools "Copy as cURL"
|
|
4411
|
+
* const config = Rezo.fromCurl(`
|
|
4412
|
+
* curl 'https://api.example.com/data' \\
|
|
4413
|
+
* -H 'Authorization: Bearer token123' \\
|
|
4414
|
+
* -H 'Content-Type: application/json'
|
|
4415
|
+
* `);
|
|
4416
|
+
*
|
|
4417
|
+
* // Use with Rezo
|
|
4418
|
+
* const rezo = new Rezo();
|
|
4419
|
+
* const response = await rezo.request(config);
|
|
4420
|
+
* ```
|
|
4421
|
+
*/
|
|
4422
|
+
static fromCurl(curlCommand: string): RezoRequestOptions;
|
|
4358
4423
|
}
|
|
4359
4424
|
/**
|
|
4360
4425
|
* Extended Rezo instance with Axios-compatible static helpers.
|
package/dist/platform/deno.d.ts
CHANGED
|
@@ -4355,6 +4355,71 @@ export declare class Rezo {
|
|
|
4355
4355
|
* @see {@link cookieJar} - Access the underlying RezoCookieJar for more control
|
|
4356
4356
|
*/
|
|
4357
4357
|
clearCookies(): void;
|
|
4358
|
+
/**
|
|
4359
|
+
* Convert a Rezo request configuration to a cURL command string.
|
|
4360
|
+
*
|
|
4361
|
+
* Generates a valid cURL command that can be executed in a terminal to
|
|
4362
|
+
* reproduce the same HTTP request. Useful for:
|
|
4363
|
+
* - Debugging and sharing requests
|
|
4364
|
+
* - Documentation and examples
|
|
4365
|
+
* - Testing requests outside of Node.js
|
|
4366
|
+
* - Exporting requests to other tools
|
|
4367
|
+
*
|
|
4368
|
+
* @param config - Request configuration object
|
|
4369
|
+
* @returns A cURL command string
|
|
4370
|
+
*
|
|
4371
|
+
* @example
|
|
4372
|
+
* ```typescript
|
|
4373
|
+
* const curl = Rezo.toCurl({
|
|
4374
|
+
* url: 'https://api.example.com/users',
|
|
4375
|
+
* method: 'POST',
|
|
4376
|
+
* headers: { 'Content-Type': 'application/json' },
|
|
4377
|
+
* body: { name: 'John', email: 'john@example.com' }
|
|
4378
|
+
* });
|
|
4379
|
+
* // Output: curl -X POST -H 'content-type: application/json' --data-raw '{"name":"John","email":"john@example.com"}' -L --compressed 'https://api.example.com/users'
|
|
4380
|
+
* ```
|
|
4381
|
+
*/
|
|
4382
|
+
static toCurl(config: RezoRequestConfig | RezoRequestOptions): string;
|
|
4383
|
+
/**
|
|
4384
|
+
* Parse a cURL command string into a Rezo request configuration.
|
|
4385
|
+
*
|
|
4386
|
+
* Converts a cURL command into a configuration object that can be
|
|
4387
|
+
* passed directly to Rezo request methods. Useful for:
|
|
4388
|
+
* - Importing requests from browser DevTools
|
|
4389
|
+
* - Converting curl examples from API documentation
|
|
4390
|
+
* - Migrating scripts from curl to Rezo
|
|
4391
|
+
*
|
|
4392
|
+
* Supports common cURL options:
|
|
4393
|
+
* - `-X, --request` - HTTP method
|
|
4394
|
+
* - `-H, --header` - Request headers
|
|
4395
|
+
* - `-d, --data, --data-raw, --data-binary` - Request body
|
|
4396
|
+
* - `-u, --user` - Basic authentication
|
|
4397
|
+
* - `-x, --proxy` - Proxy configuration
|
|
4398
|
+
* - `--socks5, --socks4` - SOCKS proxy
|
|
4399
|
+
* - `-L, --location` - Follow redirects
|
|
4400
|
+
* - `--max-redirs` - Maximum redirects
|
|
4401
|
+
* - `--max-time` - Request timeout
|
|
4402
|
+
* - `-k, --insecure` - Skip TLS verification
|
|
4403
|
+
* - `-A, --user-agent` - User agent header
|
|
4404
|
+
*
|
|
4405
|
+
* @param curlCommand - A cURL command string
|
|
4406
|
+
* @returns A request configuration object
|
|
4407
|
+
*
|
|
4408
|
+
* @example
|
|
4409
|
+
* ```typescript
|
|
4410
|
+
* // From browser DevTools "Copy as cURL"
|
|
4411
|
+
* const config = Rezo.fromCurl(`
|
|
4412
|
+
* curl 'https://api.example.com/data' \\
|
|
4413
|
+
* -H 'Authorization: Bearer token123' \\
|
|
4414
|
+
* -H 'Content-Type: application/json'
|
|
4415
|
+
* `);
|
|
4416
|
+
*
|
|
4417
|
+
* // Use with Rezo
|
|
4418
|
+
* const rezo = new Rezo();
|
|
4419
|
+
* const response = await rezo.request(config);
|
|
4420
|
+
* ```
|
|
4421
|
+
*/
|
|
4422
|
+
static fromCurl(curlCommand: string): RezoRequestOptions;
|
|
4358
4423
|
}
|
|
4359
4424
|
/**
|
|
4360
4425
|
* Extended Rezo instance with Axios-compatible static helpers.
|
package/dist/platform/node.d.ts
CHANGED
|
@@ -4355,6 +4355,71 @@ export declare class Rezo {
|
|
|
4355
4355
|
* @see {@link cookieJar} - Access the underlying RezoCookieJar for more control
|
|
4356
4356
|
*/
|
|
4357
4357
|
clearCookies(): void;
|
|
4358
|
+
/**
|
|
4359
|
+
* Convert a Rezo request configuration to a cURL command string.
|
|
4360
|
+
*
|
|
4361
|
+
* Generates a valid cURL command that can be executed in a terminal to
|
|
4362
|
+
* reproduce the same HTTP request. Useful for:
|
|
4363
|
+
* - Debugging and sharing requests
|
|
4364
|
+
* - Documentation and examples
|
|
4365
|
+
* - Testing requests outside of Node.js
|
|
4366
|
+
* - Exporting requests to other tools
|
|
4367
|
+
*
|
|
4368
|
+
* @param config - Request configuration object
|
|
4369
|
+
* @returns A cURL command string
|
|
4370
|
+
*
|
|
4371
|
+
* @example
|
|
4372
|
+
* ```typescript
|
|
4373
|
+
* const curl = Rezo.toCurl({
|
|
4374
|
+
* url: 'https://api.example.com/users',
|
|
4375
|
+
* method: 'POST',
|
|
4376
|
+
* headers: { 'Content-Type': 'application/json' },
|
|
4377
|
+
* body: { name: 'John', email: 'john@example.com' }
|
|
4378
|
+
* });
|
|
4379
|
+
* // Output: curl -X POST -H 'content-type: application/json' --data-raw '{"name":"John","email":"john@example.com"}' -L --compressed 'https://api.example.com/users'
|
|
4380
|
+
* ```
|
|
4381
|
+
*/
|
|
4382
|
+
static toCurl(config: RezoRequestConfig | RezoRequestOptions): string;
|
|
4383
|
+
/**
|
|
4384
|
+
* Parse a cURL command string into a Rezo request configuration.
|
|
4385
|
+
*
|
|
4386
|
+
* Converts a cURL command into a configuration object that can be
|
|
4387
|
+
* passed directly to Rezo request methods. Useful for:
|
|
4388
|
+
* - Importing requests from browser DevTools
|
|
4389
|
+
* - Converting curl examples from API documentation
|
|
4390
|
+
* - Migrating scripts from curl to Rezo
|
|
4391
|
+
*
|
|
4392
|
+
* Supports common cURL options:
|
|
4393
|
+
* - `-X, --request` - HTTP method
|
|
4394
|
+
* - `-H, --header` - Request headers
|
|
4395
|
+
* - `-d, --data, --data-raw, --data-binary` - Request body
|
|
4396
|
+
* - `-u, --user` - Basic authentication
|
|
4397
|
+
* - `-x, --proxy` - Proxy configuration
|
|
4398
|
+
* - `--socks5, --socks4` - SOCKS proxy
|
|
4399
|
+
* - `-L, --location` - Follow redirects
|
|
4400
|
+
* - `--max-redirs` - Maximum redirects
|
|
4401
|
+
* - `--max-time` - Request timeout
|
|
4402
|
+
* - `-k, --insecure` - Skip TLS verification
|
|
4403
|
+
* - `-A, --user-agent` - User agent header
|
|
4404
|
+
*
|
|
4405
|
+
* @param curlCommand - A cURL command string
|
|
4406
|
+
* @returns A request configuration object
|
|
4407
|
+
*
|
|
4408
|
+
* @example
|
|
4409
|
+
* ```typescript
|
|
4410
|
+
* // From browser DevTools "Copy as cURL"
|
|
4411
|
+
* const config = Rezo.fromCurl(`
|
|
4412
|
+
* curl 'https://api.example.com/data' \\
|
|
4413
|
+
* -H 'Authorization: Bearer token123' \\
|
|
4414
|
+
* -H 'Content-Type: application/json'
|
|
4415
|
+
* `);
|
|
4416
|
+
*
|
|
4417
|
+
* // Use with Rezo
|
|
4418
|
+
* const rezo = new Rezo();
|
|
4419
|
+
* const response = await rezo.request(config);
|
|
4420
|
+
* ```
|
|
4421
|
+
*/
|
|
4422
|
+
static fromCurl(curlCommand: string): RezoRequestOptions;
|
|
4358
4423
|
}
|
|
4359
4424
|
/**
|
|
4360
4425
|
* Extended Rezo instance with Axios-compatible static helpers.
|
|
@@ -4355,6 +4355,71 @@ export declare class Rezo {
|
|
|
4355
4355
|
* @see {@link cookieJar} - Access the underlying RezoCookieJar for more control
|
|
4356
4356
|
*/
|
|
4357
4357
|
clearCookies(): void;
|
|
4358
|
+
/**
|
|
4359
|
+
* Convert a Rezo request configuration to a cURL command string.
|
|
4360
|
+
*
|
|
4361
|
+
* Generates a valid cURL command that can be executed in a terminal to
|
|
4362
|
+
* reproduce the same HTTP request. Useful for:
|
|
4363
|
+
* - Debugging and sharing requests
|
|
4364
|
+
* - Documentation and examples
|
|
4365
|
+
* - Testing requests outside of Node.js
|
|
4366
|
+
* - Exporting requests to other tools
|
|
4367
|
+
*
|
|
4368
|
+
* @param config - Request configuration object
|
|
4369
|
+
* @returns A cURL command string
|
|
4370
|
+
*
|
|
4371
|
+
* @example
|
|
4372
|
+
* ```typescript
|
|
4373
|
+
* const curl = Rezo.toCurl({
|
|
4374
|
+
* url: 'https://api.example.com/users',
|
|
4375
|
+
* method: 'POST',
|
|
4376
|
+
* headers: { 'Content-Type': 'application/json' },
|
|
4377
|
+
* body: { name: 'John', email: 'john@example.com' }
|
|
4378
|
+
* });
|
|
4379
|
+
* // Output: curl -X POST -H 'content-type: application/json' --data-raw '{"name":"John","email":"john@example.com"}' -L --compressed 'https://api.example.com/users'
|
|
4380
|
+
* ```
|
|
4381
|
+
*/
|
|
4382
|
+
static toCurl(config: RezoRequestConfig | RezoRequestOptions): string;
|
|
4383
|
+
/**
|
|
4384
|
+
* Parse a cURL command string into a Rezo request configuration.
|
|
4385
|
+
*
|
|
4386
|
+
* Converts a cURL command into a configuration object that can be
|
|
4387
|
+
* passed directly to Rezo request methods. Useful for:
|
|
4388
|
+
* - Importing requests from browser DevTools
|
|
4389
|
+
* - Converting curl examples from API documentation
|
|
4390
|
+
* - Migrating scripts from curl to Rezo
|
|
4391
|
+
*
|
|
4392
|
+
* Supports common cURL options:
|
|
4393
|
+
* - `-X, --request` - HTTP method
|
|
4394
|
+
* - `-H, --header` - Request headers
|
|
4395
|
+
* - `-d, --data, --data-raw, --data-binary` - Request body
|
|
4396
|
+
* - `-u, --user` - Basic authentication
|
|
4397
|
+
* - `-x, --proxy` - Proxy configuration
|
|
4398
|
+
* - `--socks5, --socks4` - SOCKS proxy
|
|
4399
|
+
* - `-L, --location` - Follow redirects
|
|
4400
|
+
* - `--max-redirs` - Maximum redirects
|
|
4401
|
+
* - `--max-time` - Request timeout
|
|
4402
|
+
* - `-k, --insecure` - Skip TLS verification
|
|
4403
|
+
* - `-A, --user-agent` - User agent header
|
|
4404
|
+
*
|
|
4405
|
+
* @param curlCommand - A cURL command string
|
|
4406
|
+
* @returns A request configuration object
|
|
4407
|
+
*
|
|
4408
|
+
* @example
|
|
4409
|
+
* ```typescript
|
|
4410
|
+
* // From browser DevTools "Copy as cURL"
|
|
4411
|
+
* const config = Rezo.fromCurl(`
|
|
4412
|
+
* curl 'https://api.example.com/data' \\
|
|
4413
|
+
* -H 'Authorization: Bearer token123' \\
|
|
4414
|
+
* -H 'Content-Type: application/json'
|
|
4415
|
+
* `);
|
|
4416
|
+
*
|
|
4417
|
+
* // Use with Rezo
|
|
4418
|
+
* const rezo = new Rezo();
|
|
4419
|
+
* const response = await rezo.request(config);
|
|
4420
|
+
* ```
|
|
4421
|
+
*/
|
|
4422
|
+
static fromCurl(curlCommand: string): RezoRequestOptions;
|
|
4358
4423
|
}
|
|
4359
4424
|
/**
|
|
4360
4425
|
* Extended Rezo instance with Axios-compatible static helpers.
|
|
@@ -4355,6 +4355,71 @@ export declare class Rezo {
|
|
|
4355
4355
|
* @see {@link cookieJar} - Access the underlying RezoCookieJar for more control
|
|
4356
4356
|
*/
|
|
4357
4357
|
clearCookies(): void;
|
|
4358
|
+
/**
|
|
4359
|
+
* Convert a Rezo request configuration to a cURL command string.
|
|
4360
|
+
*
|
|
4361
|
+
* Generates a valid cURL command that can be executed in a terminal to
|
|
4362
|
+
* reproduce the same HTTP request. Useful for:
|
|
4363
|
+
* - Debugging and sharing requests
|
|
4364
|
+
* - Documentation and examples
|
|
4365
|
+
* - Testing requests outside of Node.js
|
|
4366
|
+
* - Exporting requests to other tools
|
|
4367
|
+
*
|
|
4368
|
+
* @param config - Request configuration object
|
|
4369
|
+
* @returns A cURL command string
|
|
4370
|
+
*
|
|
4371
|
+
* @example
|
|
4372
|
+
* ```typescript
|
|
4373
|
+
* const curl = Rezo.toCurl({
|
|
4374
|
+
* url: 'https://api.example.com/users',
|
|
4375
|
+
* method: 'POST',
|
|
4376
|
+
* headers: { 'Content-Type': 'application/json' },
|
|
4377
|
+
* body: { name: 'John', email: 'john@example.com' }
|
|
4378
|
+
* });
|
|
4379
|
+
* // Output: curl -X POST -H 'content-type: application/json' --data-raw '{"name":"John","email":"john@example.com"}' -L --compressed 'https://api.example.com/users'
|
|
4380
|
+
* ```
|
|
4381
|
+
*/
|
|
4382
|
+
static toCurl(config: RezoRequestConfig | RezoRequestOptions): string;
|
|
4383
|
+
/**
|
|
4384
|
+
* Parse a cURL command string into a Rezo request configuration.
|
|
4385
|
+
*
|
|
4386
|
+
* Converts a cURL command into a configuration object that can be
|
|
4387
|
+
* passed directly to Rezo request methods. Useful for:
|
|
4388
|
+
* - Importing requests from browser DevTools
|
|
4389
|
+
* - Converting curl examples from API documentation
|
|
4390
|
+
* - Migrating scripts from curl to Rezo
|
|
4391
|
+
*
|
|
4392
|
+
* Supports common cURL options:
|
|
4393
|
+
* - `-X, --request` - HTTP method
|
|
4394
|
+
* - `-H, --header` - Request headers
|
|
4395
|
+
* - `-d, --data, --data-raw, --data-binary` - Request body
|
|
4396
|
+
* - `-u, --user` - Basic authentication
|
|
4397
|
+
* - `-x, --proxy` - Proxy configuration
|
|
4398
|
+
* - `--socks5, --socks4` - SOCKS proxy
|
|
4399
|
+
* - `-L, --location` - Follow redirects
|
|
4400
|
+
* - `--max-redirs` - Maximum redirects
|
|
4401
|
+
* - `--max-time` - Request timeout
|
|
4402
|
+
* - `-k, --insecure` - Skip TLS verification
|
|
4403
|
+
* - `-A, --user-agent` - User agent header
|
|
4404
|
+
*
|
|
4405
|
+
* @param curlCommand - A cURL command string
|
|
4406
|
+
* @returns A request configuration object
|
|
4407
|
+
*
|
|
4408
|
+
* @example
|
|
4409
|
+
* ```typescript
|
|
4410
|
+
* // From browser DevTools "Copy as cURL"
|
|
4411
|
+
* const config = Rezo.fromCurl(`
|
|
4412
|
+
* curl 'https://api.example.com/data' \\
|
|
4413
|
+
* -H 'Authorization: Bearer token123' \\
|
|
4414
|
+
* -H 'Content-Type: application/json'
|
|
4415
|
+
* `);
|
|
4416
|
+
*
|
|
4417
|
+
* // Use with Rezo
|
|
4418
|
+
* const rezo = new Rezo();
|
|
4419
|
+
* const response = await rezo.request(config);
|
|
4420
|
+
* ```
|
|
4421
|
+
*/
|
|
4422
|
+
static fromCurl(curlCommand: string): RezoRequestOptions;
|
|
4358
4423
|
}
|
|
4359
4424
|
/**
|
|
4360
4425
|
* Extended Rezo instance with Axios-compatible static helpers.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
var{defineProperty:f,getOwnPropertyNames:x,getOwnPropertyDescriptor:y}=Object,A=Object.prototype.hasOwnProperty;var m=new WeakMap,v=(e)=>{var r=m.get(e),i;if(r)return r;if(r=f({},"__esModule",{value:!0}),e&&typeof e==="object"||typeof e==="function")x(e).map((a)=>!A.call(r,a)&&f(r,a,{get:()=>e[a],enumerable:!(i=y(e,a))||i.enumerable}));return m.set(e,r),r};var g={};module.exports=v(g);var{RezoQueue:$}=require("../queue/queue.cjs"),{Oxylabs:p}=require("./addon/oxylabs/index.cjs"),k=require("node:path"),M=require("node:os"),{Decodo:O}=require("./addon/decodo/index.cjs");class b{baseUrl;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=_();constructor(e={}){this.baseUrl=e.baseUrl||"",this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??k.join(M.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,a,t)=>t.indexOf(i)===a)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this.proxies=this.proxies.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this.limiters=this.limiters.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this.oxylabs=this.oxylabs.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this}_domainsEqual(e,r){if(Array.isArray(e)&&Array.isArray(r))return e.length===r.length&&e.every((i,a)=>i===r[a]);return e===r}getConfigurationSummary(){let e=(r)=>({total:r.length,global:r.filter((i)=>i.isGlobal).length,domainSpecific:r.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let r of e.httpHeaders){let{domain:i,isGlobal:a,headers:t}=r;if(!i&&!a)continue;if(r instanceof Headers&&Object.keys(Object.fromEntries(r.entries())).length<1)continue;else if(Object.keys(t).length<1)continue;t=r instanceof Headers?Object.fromEntries(r.entries()):t,this.requestHeaders.push({domain:i,isGlobal:a,headers:t})}}_addProxies(e){if(!e||!e.enable)return;for(let r of e.proxies){let{domain:i,isGlobal:a,proxy:t}=r;if(!i&&!a)continue;if(!t||Object.keys(t).length<1)continue;this.proxies.push({domain:i,isGlobal:a,proxy:t})}}_addLimiters(e){if(!e||!e.enable)return;for(let r of e.limiters){let{domain:i,isGlobal:a,options:t}=r;if(!i&&!a)continue;if(!t||Object.keys(t).length<1)continue;this.limiters.push({domain:i,isGlobal:a,pqueue:new $(t)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:i,isGlobal:a,options:t,queueOptions:s}=r;if(!i&&!a)continue;if(!t||Object.keys(t).length<1)continue;this.oxylabs.push({domain:i,isGlobal:a,adaptar:new p(t)})}}_addDecodo(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:i,isGlobal:a,options:t,queueOptions:s}=r;if(!i&&!a)continue;if(!t||Object.keys(t).length<1)continue;this.decodo.push({domain:i,isGlobal:a,adaptar:new O(t)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,r,i,a){if(!this.getDomainName(e))return null;let s=[],n=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:this.proxies;for(let o=0;o<n.length;o++)if(this._hasDomain(e,n[o].domain))s.push(o);if(s.length){let o=a?s[this.rnd(0,s.length-1)]:s[0];return r==="headers"?this.requestHeaders[o].headers:r==="limiters"?this.limiters[o].pqueue:r==="oxylabs"?this.oxylabs[o].adaptar:r==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}s.length=0;for(let o=0;o<n.length;o++)s.push(o);if(s.length){let o=a?s[this.rnd(0,s.length-1)]:s[0];if(n[o].isGlobal&&i)return r==="headers"?this.requestHeaders[o].headers:r==="limiters"?this.limiters[o].pqueue:r==="oxylabs"?this.oxylabs[o].adaptar:r==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,r=Number.MAX_VALUE){return Math.floor(Math.random()*(r-e+1))+e}hasDomain(e,r,i){if(!this.getDomainName(e))return!1;let t=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:this.proxies;for(let s=0;s<t.length;s++)if(this._hasDomain(e,t[s].domain))return!0;if(i){for(let s=0;s<t.length;s++)if(t[s].isGlobal)return!0}return!1}pickHeaders(e,r,i,a){let t=this.getAdapter(e,"headers",r),s=new Headers(t??{});if(i&&i instanceof Headers)for(let[n,o]of Object.entries(i.entries()))s.set(n,o);else if(i&&typeof i==="object"){for(let[n,o]of Object.entries(i))if(typeof o==="string")s.set(n,o)}if(a)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,r){if(!r)return!1;let i=this.getDomainName(e);if(!i)return!1;let a=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},t=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let n=s.toString().trim();if(i.toLowerCase()===n.toLowerCase())return!0;if(n.includes("*")){let l=n.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(i)||h.test(e)}if(a(n))try{let l=n,h="i",c=n.match(/^\/(.*)\/(\w*)$/);if(c)l=c[1],h=c[2]||"i";let u=new RegExp(l,h);return u.test(i)||u.test(e)}catch(l){return i.toLowerCase().includes(n.toLowerCase())}let o=i.toLowerCase(),d=n.toLowerCase();return o===d||o.endsWith("."+d)||d.endsWith("."+o)};if(Array.isArray(r)){for(let s of r)if(t(s))return!0;return!1}return t(r)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let r=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),r.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let r=new URL(e);if(!r.protocol||!["http:","https:"].includes(r.protocol.toLowerCase()))return!1;if(!r.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(r.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function _(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],r=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let a=0;a<200;a++){let t=e[Math.floor(Math.random()*e.length)],s=r[Math.floor(Math.random()*r.length)],n="";switch(t.name){case"Chrome":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":n=`Mozilla/5.0 (${s}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}i.push(n)}return i}g.CrawlerOptions=b;
|
|
1
|
+
var{defineProperty:f,getOwnPropertyNames:x,getOwnPropertyDescriptor:y}=Object,A=Object.prototype.hasOwnProperty;var m=new WeakMap,v=(e)=>{var r=m.get(e),i;if(r)return r;if(r=f({},"__esModule",{value:!0}),e&&typeof e==="object"||typeof e==="function")x(e).map((a)=>!A.call(r,a)&&f(r,a,{get:()=>e[a],enumerable:!(i=y(e,a))||i.enumerable}));return m.set(e,r),r};var g={};module.exports=v(g);var{RezoQueue:$}=require("../queue/queue.cjs"),{Oxylabs:p}=require("./addon/oxylabs/index.cjs"),k=require("node:path"),M=require("node:os"),{Decodo:O}=require("./addon/decodo/index.cjs");class b{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=_();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??k.join(M.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,a,t)=>t.indexOf(i)===a)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this.proxies=this.proxies.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this.limiters=this.limiters.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this.oxylabs=this.oxylabs.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this}_domainsEqual(e,r){if(Array.isArray(e)&&Array.isArray(r))return e.length===r.length&&e.every((i,a)=>i===r[a]);return e===r}getConfigurationSummary(){let e=(r)=>({total:r.length,global:r.filter((i)=>i.isGlobal).length,domainSpecific:r.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let r of e.httpHeaders){let{domain:i,isGlobal:a,headers:t}=r;if(!i&&!a)continue;if(r instanceof Headers&&Object.keys(Object.fromEntries(r.entries())).length<1)continue;else if(Object.keys(t).length<1)continue;t=r instanceof Headers?Object.fromEntries(r.entries()):t,this.requestHeaders.push({domain:i,isGlobal:a,headers:t})}}_addProxies(e){if(!e||!e.enable)return;for(let r of e.proxies){let{domain:i,isGlobal:a,proxy:t}=r;if(!i&&!a)continue;if(!t||Object.keys(t).length<1)continue;this.proxies.push({domain:i,isGlobal:a,proxy:t})}}_addLimiters(e){if(!e||!e.enable)return;for(let r of e.limiters){let{domain:i,isGlobal:a,options:t}=r;if(!i&&!a)continue;if(!t||Object.keys(t).length<1)continue;this.limiters.push({domain:i,isGlobal:a,pqueue:new $(t)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:i,isGlobal:a,options:t,queueOptions:s}=r;if(!i&&!a)continue;if(!t||Object.keys(t).length<1)continue;this.oxylabs.push({domain:i,isGlobal:a,adaptar:new p(t)})}}_addDecodo(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:i,isGlobal:a,options:t,queueOptions:s}=r;if(!i&&!a)continue;if(!t||Object.keys(t).length<1)continue;this.decodo.push({domain:i,isGlobal:a,adaptar:new O(t)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,r,i,a){if(!this.getDomainName(e))return null;let s=[],n=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:this.proxies;for(let o=0;o<n.length;o++)if(this._hasDomain(e,n[o].domain))s.push(o);if(s.length){let o=a?s[this.rnd(0,s.length-1)]:s[0];return r==="headers"?this.requestHeaders[o].headers:r==="limiters"?this.limiters[o].pqueue:r==="oxylabs"?this.oxylabs[o].adaptar:r==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}s.length=0;for(let o=0;o<n.length;o++)s.push(o);if(s.length){let o=a?s[this.rnd(0,s.length-1)]:s[0];if(n[o].isGlobal&&i)return r==="headers"?this.requestHeaders[o].headers:r==="limiters"?this.limiters[o].pqueue:r==="oxylabs"?this.oxylabs[o].adaptar:r==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,r=Number.MAX_VALUE){return Math.floor(Math.random()*(r-e+1))+e}hasDomain(e,r,i){if(!this.getDomainName(e))return!1;let t=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:this.proxies;for(let s=0;s<t.length;s++)if(this._hasDomain(e,t[s].domain))return!0;if(i){for(let s=0;s<t.length;s++)if(t[s].isGlobal)return!0}return!1}pickHeaders(e,r,i,a){let t=this.getAdapter(e,"headers",r),s=new Headers(t??{});if(i&&i instanceof Headers)for(let[n,o]of Object.entries(i.entries()))s.set(n,o);else if(i&&typeof i==="object"){for(let[n,o]of Object.entries(i))if(typeof o==="string")s.set(n,o)}if(a)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,r){if(!r)return!1;let i=this.getDomainName(e);if(!i)return!1;let a=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},t=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let n=s.toString().trim();if(i.toLowerCase()===n.toLowerCase())return!0;if(n.includes("*")){let l=n.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(i)||h.test(e)}if(a(n))try{let l=n,h="i",c=n.match(/^\/(.*)\/(\w*)$/);if(c)l=c[1],h=c[2]||"i";let u=new RegExp(l,h);return u.test(i)||u.test(e)}catch(l){return i.toLowerCase().includes(n.toLowerCase())}let o=i.toLowerCase(),d=n.toLowerCase();return o===d||o.endsWith("."+d)||d.endsWith("."+o)};if(Array.isArray(r)){for(let s of r)if(t(s))return!0;return!1}return t(r)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let r=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),r.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let r=new URL(e);if(!r.protocol||!["http:","https:"].includes(r.protocol.toLowerCase()))return!1;if(!r.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(r.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function _(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],r=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let a=0;a<200;a++){let t=e[Math.floor(Math.random()*e.length)],s=r[Math.floor(Math.random()*r.length)],n="";switch(t.name){case"Chrome":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":n=`Mozilla/5.0 (${s}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}i.push(n)}return i}g.CrawlerOptions=b;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{RezoQueue as f}from"../queue/queue.js";import{Oxylabs as m}from"./addon/oxylabs/index.js";import b from"node:path";import g from"node:os";import{Decodo as x}from"./addon/decodo/index.js";class y{baseUrl;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??b.join(g.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,o,t)=>t.indexOf(i)===o)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this.proxies=this.proxies.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this.limiters=this.limiters.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this.oxylabs=this.oxylabs.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this}_domainsEqual(e,r){if(Array.isArray(e)&&Array.isArray(r))return e.length===r.length&&e.every((i,o)=>i===r[o]);return e===r}getConfigurationSummary(){let e=(r)=>({total:r.length,global:r.filter((i)=>i.isGlobal).length,domainSpecific:r.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let r of e.httpHeaders){let{domain:i,isGlobal:o,headers:t}=r;if(!i&&!o)continue;if(r instanceof Headers&&Object.keys(Object.fromEntries(r.entries())).length<1)continue;else if(Object.keys(t).length<1)continue;t=r instanceof Headers?Object.fromEntries(r.entries()):t,this.requestHeaders.push({domain:i,isGlobal:o,headers:t})}}_addProxies(e){if(!e||!e.enable)return;for(let r of e.proxies){let{domain:i,isGlobal:o,proxy:t}=r;if(!i&&!o)continue;if(!t||Object.keys(t).length<1)continue;this.proxies.push({domain:i,isGlobal:o,proxy:t})}}_addLimiters(e){if(!e||!e.enable)return;for(let r of e.limiters){let{domain:i,isGlobal:o,options:t}=r;if(!i&&!o)continue;if(!t||Object.keys(t).length<1)continue;this.limiters.push({domain:i,isGlobal:o,pqueue:new f(t)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:i,isGlobal:o,options:t,queueOptions:s}=r;if(!i&&!o)continue;if(!t||Object.keys(t).length<1)continue;this.oxylabs.push({domain:i,isGlobal:o,adaptar:new m(t)})}}_addDecodo(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:i,isGlobal:o,options:t,queueOptions:s}=r;if(!i&&!o)continue;if(!t||Object.keys(t).length<1)continue;this.decodo.push({domain:i,isGlobal:o,adaptar:new x(t)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,r,i,o){if(!this.getDomainName(e))return null;let s=[],a=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:this.proxies;for(let n=0;n<a.length;n++)if(this._hasDomain(e,a[n].domain))s.push(n);if(s.length){let n=o?s[this.rnd(0,s.length-1)]:s[0];return r==="headers"?this.requestHeaders[n].headers:r==="limiters"?this.limiters[n].pqueue:r==="oxylabs"?this.oxylabs[n].adaptar:r==="decodo"?this.decodo[n].adaptar:this.proxies[n].proxy}s.length=0;for(let n=0;n<a.length;n++)s.push(n);if(s.length){let n=o?s[this.rnd(0,s.length-1)]:s[0];if(a[n].isGlobal&&i)return r==="headers"?this.requestHeaders[n].headers:r==="limiters"?this.limiters[n].pqueue:r==="oxylabs"?this.oxylabs[n].adaptar:r==="decodo"?this.decodo[n].adaptar:this.proxies[n].proxy}return null}rnd(e=0,r=Number.MAX_VALUE){return Math.floor(Math.random()*(r-e+1))+e}hasDomain(e,r,i){if(!this.getDomainName(e))return!1;let t=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:this.proxies;for(let s=0;s<t.length;s++)if(this._hasDomain(e,t[s].domain))return!0;if(i){for(let s=0;s<t.length;s++)if(t[s].isGlobal)return!0}return!1}pickHeaders(e,r,i,o){let t=this.getAdapter(e,"headers",r),s=new Headers(t??{});if(i&&i instanceof Headers)for(let[a,n]of Object.entries(i.entries()))s.set(a,n);else if(i&&typeof i==="object"){for(let[a,n]of Object.entries(i))if(typeof n==="string")s.set(a,n)}if(o)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,r){if(!r)return!1;let i=this.getDomainName(e);if(!i)return!1;let o=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},t=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let a=s.toString().trim();if(i.toLowerCase()===a.toLowerCase())return!0;if(a.includes("*")){let l=a.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(i)||h.test(e)}if(o(a))try{let l=a,h="i",c=a.match(/^\/(.*)\/(\w*)$/);if(c)l=c[1],h=c[2]||"i";let u=new RegExp(l,h);return u.test(i)||u.test(e)}catch(l){return i.toLowerCase().includes(a.toLowerCase())}let n=i.toLowerCase(),d=a.toLowerCase();return n===d||n.endsWith("."+d)||d.endsWith("."+n)};if(Array.isArray(r)){for(let s of r)if(t(s))return!0;return!1}return t(r)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let r=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),r.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let r=new URL(e);if(!r.protocol||!["http:","https:"].includes(r.protocol.toLowerCase()))return!1;if(!r.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(r.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],r=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let o=0;o<200;o++){let t=e[Math.floor(Math.random()*e.length)],s=r[Math.floor(Math.random()*r.length)],a="";switch(t.name){case"Chrome":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":a=`Mozilla/5.0 (${s}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}i.push(a)}return i}export{y as CrawlerOptions};
|
|
1
|
+
import{RezoQueue as f}from"../queue/queue.js";import{Oxylabs as m}from"./addon/oxylabs/index.js";import b from"node:path";import g from"node:os";import{Decodo as x}from"./addon/decodo/index.js";class y{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??b.join(g.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,o,t)=>t.indexOf(i)===o)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this.proxies=this.proxies.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this.limiters=this.limiters.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this.oxylabs=this.oxylabs.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this}_domainsEqual(e,r){if(Array.isArray(e)&&Array.isArray(r))return e.length===r.length&&e.every((i,o)=>i===r[o]);return e===r}getConfigurationSummary(){let e=(r)=>({total:r.length,global:r.filter((i)=>i.isGlobal).length,domainSpecific:r.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let r of e.httpHeaders){let{domain:i,isGlobal:o,headers:t}=r;if(!i&&!o)continue;if(r instanceof Headers&&Object.keys(Object.fromEntries(r.entries())).length<1)continue;else if(Object.keys(t).length<1)continue;t=r instanceof Headers?Object.fromEntries(r.entries()):t,this.requestHeaders.push({domain:i,isGlobal:o,headers:t})}}_addProxies(e){if(!e||!e.enable)return;for(let r of e.proxies){let{domain:i,isGlobal:o,proxy:t}=r;if(!i&&!o)continue;if(!t||Object.keys(t).length<1)continue;this.proxies.push({domain:i,isGlobal:o,proxy:t})}}_addLimiters(e){if(!e||!e.enable)return;for(let r of e.limiters){let{domain:i,isGlobal:o,options:t}=r;if(!i&&!o)continue;if(!t||Object.keys(t).length<1)continue;this.limiters.push({domain:i,isGlobal:o,pqueue:new f(t)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:i,isGlobal:o,options:t,queueOptions:s}=r;if(!i&&!o)continue;if(!t||Object.keys(t).length<1)continue;this.oxylabs.push({domain:i,isGlobal:o,adaptar:new m(t)})}}_addDecodo(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:i,isGlobal:o,options:t,queueOptions:s}=r;if(!i&&!o)continue;if(!t||Object.keys(t).length<1)continue;this.decodo.push({domain:i,isGlobal:o,adaptar:new x(t)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,r,i,o){if(!this.getDomainName(e))return null;let s=[],a=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:this.proxies;for(let n=0;n<a.length;n++)if(this._hasDomain(e,a[n].domain))s.push(n);if(s.length){let n=o?s[this.rnd(0,s.length-1)]:s[0];return r==="headers"?this.requestHeaders[n].headers:r==="limiters"?this.limiters[n].pqueue:r==="oxylabs"?this.oxylabs[n].adaptar:r==="decodo"?this.decodo[n].adaptar:this.proxies[n].proxy}s.length=0;for(let n=0;n<a.length;n++)s.push(n);if(s.length){let n=o?s[this.rnd(0,s.length-1)]:s[0];if(a[n].isGlobal&&i)return r==="headers"?this.requestHeaders[n].headers:r==="limiters"?this.limiters[n].pqueue:r==="oxylabs"?this.oxylabs[n].adaptar:r==="decodo"?this.decodo[n].adaptar:this.proxies[n].proxy}return null}rnd(e=0,r=Number.MAX_VALUE){return Math.floor(Math.random()*(r-e+1))+e}hasDomain(e,r,i){if(!this.getDomainName(e))return!1;let t=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:this.proxies;for(let s=0;s<t.length;s++)if(this._hasDomain(e,t[s].domain))return!0;if(i){for(let s=0;s<t.length;s++)if(t[s].isGlobal)return!0}return!1}pickHeaders(e,r,i,o){let t=this.getAdapter(e,"headers",r),s=new Headers(t??{});if(i&&i instanceof Headers)for(let[a,n]of Object.entries(i.entries()))s.set(a,n);else if(i&&typeof i==="object"){for(let[a,n]of Object.entries(i))if(typeof n==="string")s.set(a,n)}if(o)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,r){if(!r)return!1;let i=this.getDomainName(e);if(!i)return!1;let o=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},t=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let a=s.toString().trim();if(i.toLowerCase()===a.toLowerCase())return!0;if(a.includes("*")){let l=a.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(i)||h.test(e)}if(o(a))try{let l=a,h="i",c=a.match(/^\/(.*)\/(\w*)$/);if(c)l=c[1],h=c[2]||"i";let u=new RegExp(l,h);return u.test(i)||u.test(e)}catch(l){return i.toLowerCase().includes(a.toLowerCase())}let n=i.toLowerCase(),d=a.toLowerCase();return n===d||n.endsWith("."+d)||d.endsWith("."+n)};if(Array.isArray(r)){for(let s of r)if(t(s))return!0;return!1}return t(r)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let r=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),r.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let r=new URL(e);if(!r.protocol||!["http:","https:"].includes(r.protocol.toLowerCase()))return!1;if(!r.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(r.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],r=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let o=0;o<200;o++){let t=e[Math.floor(Math.random()*e.length)],s=r[Math.floor(Math.random()*r.length)],a="";switch(t.name){case"Chrome":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":a=`Mozilla/5.0 (${s}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}i.push(a)}return i}export{y as CrawlerOptions};
|