rezo 1.0.42 → 1.0.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/dist/adapters/curl.cjs +131 -29
  2. package/dist/adapters/curl.js +131 -29
  3. package/dist/adapters/entries/curl.d.ts +65 -0
  4. package/dist/adapters/entries/fetch.d.ts +65 -0
  5. package/dist/adapters/entries/http.d.ts +65 -0
  6. package/dist/adapters/entries/http2.d.ts +65 -0
  7. package/dist/adapters/entries/react-native.d.ts +65 -0
  8. package/dist/adapters/entries/xhr.d.ts +65 -0
  9. package/dist/adapters/http2.cjs +209 -22
  10. package/dist/adapters/http2.js +209 -22
  11. package/dist/adapters/index.cjs +6 -6
  12. package/dist/cache/index.cjs +9 -13
  13. package/dist/cache/index.js +0 -2
  14. package/dist/core/rezo.cjs +7 -0
  15. package/dist/core/rezo.js +7 -0
  16. package/dist/crawler/addon/decodo/index.cjs +1 -0
  17. package/dist/crawler/addon/decodo/index.js +1 -0
  18. package/dist/crawler/crawler-options.cjs +1 -0
  19. package/dist/crawler/crawler-options.js +1 -0
  20. package/dist/crawler/crawler.cjs +1070 -0
  21. package/dist/crawler/crawler.js +1068 -0
  22. package/dist/crawler/index.cjs +40 -0
  23. package/dist/{plugin → crawler}/index.js +4 -2
  24. package/dist/crawler/plugin/file-cacher.cjs +19 -0
  25. package/dist/crawler/plugin/file-cacher.js +19 -0
  26. package/dist/crawler/plugin/index.cjs +1 -0
  27. package/dist/crawler/plugin/index.js +1 -0
  28. package/dist/crawler/plugin/navigation-history.cjs +43 -0
  29. package/dist/crawler/plugin/navigation-history.js +43 -0
  30. package/dist/crawler/plugin/robots-txt.cjs +2 -0
  31. package/dist/crawler/plugin/robots-txt.js +2 -0
  32. package/dist/crawler/plugin/url-store.cjs +18 -0
  33. package/dist/crawler/plugin/url-store.js +18 -0
  34. package/dist/crawler.d.ts +511 -183
  35. package/dist/entries/crawler.cjs +5 -5
  36. package/dist/entries/crawler.js +2 -2
  37. package/dist/index.cjs +27 -24
  38. package/dist/index.d.ts +73 -0
  39. package/dist/index.js +1 -0
  40. package/dist/internal/agents/base.cjs +113 -0
  41. package/dist/internal/agents/base.js +110 -0
  42. package/dist/internal/agents/http-proxy.cjs +89 -0
  43. package/dist/internal/agents/http-proxy.js +86 -0
  44. package/dist/internal/agents/https-proxy.cjs +176 -0
  45. package/dist/internal/agents/https-proxy.js +173 -0
  46. package/dist/internal/agents/index.cjs +10 -0
  47. package/dist/internal/agents/index.js +5 -0
  48. package/dist/internal/agents/socks-client.cjs +571 -0
  49. package/dist/internal/agents/socks-client.js +567 -0
  50. package/dist/internal/agents/socks-proxy.cjs +75 -0
  51. package/dist/internal/agents/socks-proxy.js +72 -0
  52. package/dist/platform/browser.d.ts +65 -0
  53. package/dist/platform/bun.d.ts +65 -0
  54. package/dist/platform/deno.d.ts +65 -0
  55. package/dist/platform/node.d.ts +65 -0
  56. package/dist/platform/react-native.d.ts +65 -0
  57. package/dist/platform/worker.d.ts +65 -0
  58. package/dist/proxy/index.cjs +18 -16
  59. package/dist/proxy/index.js +17 -12
  60. package/dist/queue/index.cjs +8 -8
  61. package/dist/responses/buildError.cjs +11 -2
  62. package/dist/responses/buildError.js +11 -2
  63. package/dist/responses/universal/index.cjs +11 -11
  64. package/dist/utils/curl.cjs +317 -0
  65. package/dist/utils/curl.js +314 -0
  66. package/package.json +2 -6
  67. package/dist/cache/file-cacher.cjs +0 -264
  68. package/dist/cache/file-cacher.js +0 -261
  69. package/dist/cache/url-store.cjs +0 -288
  70. package/dist/cache/url-store.js +0 -285
  71. package/dist/plugin/addon/decodo/index.cjs +0 -1
  72. package/dist/plugin/addon/decodo/index.js +0 -1
  73. package/dist/plugin/crawler-options.cjs +0 -1
  74. package/dist/plugin/crawler-options.js +0 -1
  75. package/dist/plugin/crawler.cjs +0 -519
  76. package/dist/plugin/crawler.js +0 -517
  77. package/dist/plugin/index.cjs +0 -36
  78. /package/dist/{plugin → crawler}/addon/decodo/options.cjs +0 -0
  79. /package/dist/{plugin → crawler}/addon/decodo/options.js +0 -0
  80. /package/dist/{plugin → crawler}/addon/decodo/types.cjs +0 -0
  81. /package/dist/{plugin → crawler}/addon/decodo/types.js +0 -0
  82. /package/dist/{plugin → crawler}/addon/oxylabs/index.cjs +0 -0
  83. /package/dist/{plugin → crawler}/addon/oxylabs/index.js +0 -0
  84. /package/dist/{plugin → crawler}/addon/oxylabs/options.cjs +0 -0
  85. /package/dist/{plugin → crawler}/addon/oxylabs/options.js +0 -0
  86. /package/dist/{plugin → crawler}/addon/oxylabs/types.cjs +0 -0
  87. /package/dist/{plugin → crawler}/addon/oxylabs/types.js +0 -0
  88. /package/dist/{plugin → crawler}/scraper.cjs +0 -0
  89. /package/dist/{plugin → crawler}/scraper.js +0 -0
@@ -1,4 +1,5 @@
1
1
  import * as http2 from "node:http2";
2
+ import * as tls from "node:tls";
2
3
  import * as zlib from "node:zlib";
3
4
  import { URL } from "node:url";
4
5
  import { Readable } from "node:stream";
@@ -14,6 +15,8 @@ import { DownloadResponse } from '../responses/download.js';
14
15
  import { UploadResponse } from '../responses/upload.js';
15
16
  import { CompressionUtil } from '../utils/compression.js';
16
17
  import { isSameDomain, RezoPerformance } from '../utils/tools.js';
18
+ import { SocksClient } from '../internal/agents/socks-client.js';
19
+ import * as net from "node:net";
17
20
  import { ResponseCache } from '../cache/response-cache.js';
18
21
  let zstdDecompressSync = null;
19
22
  let zstdChecked = false;
@@ -194,8 +197,9 @@ class Http2SessionPool {
194
197
  this.cleanupInterval.unref();
195
198
  }
196
199
  }
197
- getSessionKey(url, options) {
198
- return `${url.protocol}//${url.host}`;
200
+ getSessionKey(url, options, proxy) {
201
+ const proxyKey = proxy ? typeof proxy === "string" ? proxy : `${proxy.protocol}://${proxy.host}:${proxy.port}` : "";
202
+ return `${url.protocol}//${url.host}${proxyKey ? `@${proxyKey}` : ""}`;
199
203
  }
200
204
  isSessionHealthy(session, entry) {
201
205
  if (session.closed || session.destroyed)
@@ -207,8 +211,8 @@ class Http2SessionPool {
207
211
  return false;
208
212
  return true;
209
213
  }
210
- async getSession(url, options, timeout, forceNew = false) {
211
- const key = this.getSessionKey(url, options);
214
+ async getSession(url, options, timeout, forceNew = false, proxy) {
215
+ const key = this.getSessionKey(url, options, proxy);
212
216
  const existing = this.sessions.get(key);
213
217
  if (!forceNew && existing && this.isSessionHealthy(existing.session, existing)) {
214
218
  existing.lastUsed = Date.now();
@@ -221,12 +225,13 @@ class Http2SessionPool {
221
225
  } catch {}
222
226
  this.sessions.delete(key);
223
227
  }
224
- const session = await this.createSession(url, options, timeout);
228
+ const session = await this.createSession(url, options, timeout, proxy);
225
229
  const entry = {
226
230
  session,
227
231
  lastUsed: Date.now(),
228
232
  refCount: 1,
229
- goawayReceived: false
233
+ goawayReceived: false,
234
+ proxy
230
235
  };
231
236
  this.sessions.set(key, entry);
232
237
  session.on("close", () => {
@@ -240,15 +245,19 @@ class Http2SessionPool {
240
245
  });
241
246
  return session;
242
247
  }
243
- createSession(url, options, timeout) {
248
+ async createSession(url, options, timeout, proxy) {
249
+ const authority = `${url.protocol}//${url.host}`;
250
+ const sessionOptions = {
251
+ ...options,
252
+ rejectUnauthorized: options?.rejectUnauthorized !== false,
253
+ ALPNProtocols: ["h2", "http/1.1"],
254
+ timeout
255
+ };
256
+ if (proxy) {
257
+ const tunnelSocket = await this.createProxyTunnel(url, proxy, timeout, options?.rejectUnauthorized);
258
+ sessionOptions.createConnection = () => tunnelSocket;
259
+ }
244
260
  return new Promise((resolve, reject) => {
245
- const authority = `${url.protocol}//${url.host}`;
246
- const sessionOptions = {
247
- ...options,
248
- rejectUnauthorized: options?.rejectUnauthorized !== false,
249
- ALPNProtocols: ["h2", "http/1.1"],
250
- timeout
251
- };
252
261
  const session = http2.connect(authority, sessionOptions);
253
262
  let settled = false;
254
263
  const timeoutId = timeout ? setTimeout(() => {
@@ -279,8 +288,186 @@ class Http2SessionPool {
279
288
  });
280
289
  });
281
290
  }
282
- releaseSession(url) {
283
- const key = this.getSessionKey(url);
291
+ async createProxyTunnel(url, proxy, timeout, rejectUnauthorized) {
292
+ return new Promise((resolve, reject) => {
293
+ let proxyUrl;
294
+ let proxyAuth;
295
+ if (typeof proxy === "string") {
296
+ proxyUrl = new URL(proxy);
297
+ if (proxyUrl.username || proxyUrl.password) {
298
+ proxyAuth = Buffer.from(`${decodeURIComponent(proxyUrl.username)}:${decodeURIComponent(proxyUrl.password)}`).toString("base64");
299
+ }
300
+ } else {
301
+ const protocol = proxy.protocol || "http";
302
+ let proxyUrlStr = `${protocol}://${proxy.host}:${proxy.port}`;
303
+ if (proxy.auth) {
304
+ const encodedUser = encodeURIComponent(proxy.auth.username);
305
+ const encodedPass = encodeURIComponent(proxy.auth.password);
306
+ proxyUrlStr = `${protocol}://${encodedUser}:${encodedPass}@${proxy.host}:${proxy.port}`;
307
+ proxyAuth = Buffer.from(`${proxy.auth.username}:${proxy.auth.password}`).toString("base64");
308
+ }
309
+ proxyUrl = new URL(proxyUrlStr);
310
+ }
311
+ const targetHost = url.hostname;
312
+ const targetPort = url.port || (url.protocol === "https:" ? "443" : "80");
313
+ if (proxyUrl.protocol.startsWith("socks")) {
314
+ const socksType = proxyUrl.protocol === "socks5:" || proxyUrl.protocol === "socks5h:" ? 5 : 4;
315
+ const socksOpts = {
316
+ proxy: {
317
+ host: proxyUrl.hostname,
318
+ port: parseInt(proxyUrl.port || "1080", 10),
319
+ type: socksType,
320
+ userId: proxyUrl.username ? decodeURIComponent(proxyUrl.username) : undefined,
321
+ password: proxyUrl.password ? decodeURIComponent(proxyUrl.password) : undefined
322
+ },
323
+ destination: {
324
+ host: targetHost,
325
+ port: parseInt(targetPort, 10)
326
+ },
327
+ command: "connect",
328
+ timeout
329
+ };
330
+ SocksClient.createConnection(socksOpts).then(({ socket }) => {
331
+ if (url.protocol === "https:") {
332
+ const tlsSocket = tls.connect({
333
+ socket,
334
+ host: targetHost,
335
+ servername: targetHost,
336
+ rejectUnauthorized: rejectUnauthorized !== false,
337
+ ALPNProtocols: ["h2", "http/1.1"]
338
+ });
339
+ const tlsTimeoutId = timeout ? setTimeout(() => {
340
+ tlsSocket.destroy();
341
+ reject(new Error(`TLS handshake timeout after ${timeout}ms`));
342
+ }, timeout) : null;
343
+ tlsSocket.on("secureConnect", () => {
344
+ if (tlsTimeoutId)
345
+ clearTimeout(tlsTimeoutId);
346
+ const alpn = tlsSocket.alpnProtocol;
347
+ if (alpn && alpn !== "h2") {
348
+ tlsSocket.destroy();
349
+ reject(new Error(`Server does not support HTTP/2 (negotiated: ${alpn})`));
350
+ return;
351
+ }
352
+ resolve(tlsSocket);
353
+ });
354
+ tlsSocket.on("error", (err) => {
355
+ if (tlsTimeoutId)
356
+ clearTimeout(tlsTimeoutId);
357
+ reject(new Error(`TLS handshake failed: ${err.message}`));
358
+ });
359
+ } else {
360
+ resolve(socket);
361
+ }
362
+ }).catch((err) => {
363
+ reject(new Error(`SOCKS proxy connection failed: ${err.message}`));
364
+ });
365
+ return;
366
+ }
367
+ const proxyHost = proxyUrl.hostname;
368
+ const proxyPort = parseInt(proxyUrl.port || (proxyUrl.protocol === "https:" ? "443" : "80"), 10);
369
+ let proxySocket;
370
+ const connectToProxy = () => {
371
+ if (proxyUrl.protocol === "https:") {
372
+ proxySocket = tls.connect({
373
+ host: proxyHost,
374
+ port: proxyPort,
375
+ rejectUnauthorized: rejectUnauthorized !== false
376
+ });
377
+ } else {
378
+ proxySocket = net.connect({
379
+ host: proxyHost,
380
+ port: proxyPort
381
+ });
382
+ }
383
+ let settled = false;
384
+ const timeoutId = timeout ? setTimeout(() => {
385
+ if (!settled) {
386
+ settled = true;
387
+ proxySocket.destroy();
388
+ reject(new Error(`Proxy connection timeout after ${timeout}ms`));
389
+ }
390
+ }, timeout) : null;
391
+ proxySocket.on("error", (err) => {
392
+ if (!settled) {
393
+ settled = true;
394
+ if (timeoutId)
395
+ clearTimeout(timeoutId);
396
+ reject(new Error(`Proxy connection error: ${err.message}`));
397
+ }
398
+ });
399
+ proxySocket.on("connect", () => {
400
+ const connectRequest = [
401
+ `CONNECT ${targetHost}:${targetPort} HTTP/1.1`,
402
+ `Host: ${targetHost}:${targetPort}`,
403
+ proxyAuth ? `Proxy-Authorization: Basic ${proxyAuth}` : "",
404
+ "",
405
+ ""
406
+ ].filter(Boolean).join(`\r
407
+ `);
408
+ proxySocket.write(connectRequest);
409
+ });
410
+ let responseBuffer = "";
411
+ proxySocket.on("data", function onData(data) {
412
+ if (settled)
413
+ return;
414
+ responseBuffer += data.toString();
415
+ const headerEnd = responseBuffer.indexOf(`\r
416
+ \r
417
+ `);
418
+ if (headerEnd !== -1) {
419
+ settled = true;
420
+ if (timeoutId)
421
+ clearTimeout(timeoutId);
422
+ proxySocket.removeListener("data", onData);
423
+ const statusLine = responseBuffer.split(`\r
424
+ `)[0];
425
+ const statusMatch = statusLine.match(/HTTP\/\d\.\d (\d{3})/);
426
+ const statusCode = statusMatch ? parseInt(statusMatch[1], 10) : 0;
427
+ if (statusCode === 200) {
428
+ if (url.protocol === "https:") {
429
+ const tlsSocket = tls.connect({
430
+ socket: proxySocket,
431
+ host: targetHost,
432
+ servername: targetHost,
433
+ rejectUnauthorized: rejectUnauthorized !== false,
434
+ ALPNProtocols: ["h2", "http/1.1"]
435
+ });
436
+ const tlsTimeoutId = timeout ? setTimeout(() => {
437
+ tlsSocket.destroy();
438
+ reject(new Error(`TLS handshake timeout after ${timeout}ms`));
439
+ }, timeout) : null;
440
+ tlsSocket.on("secureConnect", () => {
441
+ if (tlsTimeoutId)
442
+ clearTimeout(tlsTimeoutId);
443
+ const alpn = tlsSocket.alpnProtocol;
444
+ if (alpn && alpn !== "h2") {
445
+ tlsSocket.destroy();
446
+ reject(new Error(`Server does not support HTTP/2 (negotiated: ${alpn})`));
447
+ return;
448
+ }
449
+ resolve(tlsSocket);
450
+ });
451
+ tlsSocket.on("error", (err) => {
452
+ if (tlsTimeoutId)
453
+ clearTimeout(tlsTimeoutId);
454
+ reject(new Error(`TLS handshake failed: ${err.message}`));
455
+ });
456
+ } else {
457
+ resolve(proxySocket);
458
+ }
459
+ } else {
460
+ proxySocket.destroy();
461
+ reject(new Error(`Proxy CONNECT failed with status ${statusCode}: ${statusLine}`));
462
+ }
463
+ }
464
+ });
465
+ };
466
+ connectToProxy();
467
+ });
468
+ }
469
+ releaseSession(url, proxy) {
470
+ const key = this.getSessionKey(url, undefined, proxy);
284
471
  const entry = this.sessions.get(key);
285
472
  if (entry) {
286
473
  entry.refCount = Math.max(0, entry.refCount - 1);
@@ -293,8 +480,8 @@ class Http2SessionPool {
293
480
  }
294
481
  }
295
482
  }
296
- closeSession(url) {
297
- const key = this.getSessionKey(url);
483
+ closeSession(url, proxy) {
484
+ const key = this.getSessionKey(url, undefined, proxy);
298
485
  const entry = this.sessions.get(key);
299
486
  if (entry) {
300
487
  entry.session.close();
@@ -1008,10 +1195,10 @@ async function executeHttp2Stream(config, fetchOptions, requestCount, timing, _s
1008
1195
  const forceNewSession = requestCount > 0;
1009
1196
  let session;
1010
1197
  if (config.debug) {
1011
- console.log(`[Rezo Debug] HTTP/2: Acquiring session for ${url.host}${forceNewSession ? " (forcing new for redirect)" : ""}...`);
1198
+ console.log(`[Rezo Debug] HTTP/2: Acquiring session for ${url.host}${forceNewSession ? " (forcing new for redirect)" : ""}${fetchOptions.proxy ? " (via proxy)" : ""}...`);
1012
1199
  }
1013
1200
  try {
1014
- session = await (sessionPool || Http2SessionPool.getInstance()).getSession(url, sessionOptions, config.timeout !== null ? config.timeout : undefined, forceNewSession);
1201
+ session = await (sessionPool || Http2SessionPool.getInstance()).getSession(url, sessionOptions, config.timeout !== null ? config.timeout : undefined, forceNewSession, fetchOptions.proxy);
1015
1202
  if (config.debug) {
1016
1203
  console.log(`[Rezo Debug] HTTP/2: Session acquired successfully`);
1017
1204
  }
@@ -1240,7 +1427,7 @@ async function executeHttp2Stream(config, fetchOptions, requestCount, timing, _s
1240
1427
  config.transfer.requestSize = Buffer.byteLength(JSON.stringify(body), "utf8");
1241
1428
  }
1242
1429
  }
1243
- (sessionPool || Http2SessionPool.getInstance()).releaseSession(url);
1430
+ (sessionPool || Http2SessionPool.getInstance()).releaseSession(url, fetchOptions.proxy);
1244
1431
  if (isRedirect) {
1245
1432
  _stats.statusOnNext = "redirect";
1246
1433
  const partialResponse = {
@@ -1417,7 +1604,7 @@ async function executeHttp2Stream(config, fetchOptions, requestCount, timing, _s
1417
1604
  if (config.debug) {
1418
1605
  console.log(`[Rezo Debug] HTTP/2: Error in 'end' handler:`, endError.message);
1419
1606
  }
1420
- (sessionPool || Http2SessionPool.getInstance()).releaseSession(url);
1607
+ (sessionPool || Http2SessionPool.getInstance()).releaseSession(url, fetchOptions.proxy);
1421
1608
  const error = buildSmartError(config, fetchOptions, endError);
1422
1609
  _stats.statusOnNext = "error";
1423
1610
  resolve(error);
@@ -1,6 +1,6 @@
1
- const _mod_8y5us8 = require('./picker.cjs');
2
- exports.detectRuntime = _mod_8y5us8.detectRuntime;
3
- exports.getAdapterCapabilities = _mod_8y5us8.getAdapterCapabilities;
4
- exports.buildAdapterContext = _mod_8y5us8.buildAdapterContext;
5
- exports.getAvailableAdapters = _mod_8y5us8.getAvailableAdapters;
6
- exports.selectAdapter = _mod_8y5us8.selectAdapter;;
1
+ const _mod_ggr948 = require('./picker.cjs');
2
+ exports.detectRuntime = _mod_ggr948.detectRuntime;
3
+ exports.getAdapterCapabilities = _mod_ggr948.getAdapterCapabilities;
4
+ exports.buildAdapterContext = _mod_ggr948.buildAdapterContext;
5
+ exports.getAvailableAdapters = _mod_ggr948.getAvailableAdapters;
6
+ exports.selectAdapter = _mod_ggr948.selectAdapter;;
@@ -1,13 +1,9 @@
1
- const _mod_q88avp = require('./lru-cache.cjs');
2
- exports.LRUCache = _mod_q88avp.LRUCache;;
3
- const _mod_ycn5kr = require('./dns-cache.cjs');
4
- exports.DNSCache = _mod_ycn5kr.DNSCache;
5
- exports.getGlobalDNSCache = _mod_ycn5kr.getGlobalDNSCache;
6
- exports.resetGlobalDNSCache = _mod_ycn5kr.resetGlobalDNSCache;;
7
- const _mod_ca0ngi = require('./response-cache.cjs');
8
- exports.ResponseCache = _mod_ca0ngi.ResponseCache;
9
- exports.normalizeResponseCacheConfig = _mod_ca0ngi.normalizeResponseCacheConfig;;
10
- const _mod_pmo9e4 = require('./file-cacher.cjs');
11
- exports.FileCacher = _mod_pmo9e4.FileCacher;;
12
- const _mod_h6jprd = require('./url-store.cjs');
13
- exports.UrlStore = _mod_h6jprd.UrlStore;;
1
+ const _mod_dh1qvy = require('./lru-cache.cjs');
2
+ exports.LRUCache = _mod_dh1qvy.LRUCache;;
3
+ const _mod_2s112k = require('./dns-cache.cjs');
4
+ exports.DNSCache = _mod_2s112k.DNSCache;
5
+ exports.getGlobalDNSCache = _mod_2s112k.getGlobalDNSCache;
6
+ exports.resetGlobalDNSCache = _mod_2s112k.resetGlobalDNSCache;;
7
+ const _mod_xow5u3 = require('./response-cache.cjs');
8
+ exports.ResponseCache = _mod_xow5u3.ResponseCache;
9
+ exports.normalizeResponseCacheConfig = _mod_xow5u3.normalizeResponseCacheConfig;;
@@ -1,5 +1,3 @@
1
1
  export { LRUCache } from './lru-cache.js';
2
2
  export { DNSCache, getGlobalDNSCache, resetGlobalDNSCache } from './dns-cache.js';
3
3
  export { ResponseCache, normalizeResponseCacheConfig } from './response-cache.js';
4
- export { FileCacher } from './file-cacher.js';
5
- export { UrlStore } from './url-store.js';
@@ -7,6 +7,7 @@ const packageJson = require("../../package.json");
7
7
  const { createDefaultHooks, mergeHooks, runVoidHooksSync, runTransformHooks } = require('./hooks.cjs');
8
8
  const { ResponseCache, DNSCache } = require('../cache/index.cjs');
9
9
  const { ProxyManager } = require('../proxy/manager.cjs');
10
+ const { toCurl: toCurlUtil, fromCurl: fromCurlUtil } = require('../utils/curl.cjs');
10
11
  let globalAdapter = null;
11
12
  function setGlobalAdapter(adapter) {
12
13
  globalAdapter = adapter;
@@ -479,6 +480,12 @@ class Rezo {
479
480
  clearCookies() {
480
481
  this.jar?.removeAllCookiesSync();
481
482
  }
483
+ static toCurl(config) {
484
+ return toCurlUtil(config);
485
+ }
486
+ static fromCurl(curlCommand) {
487
+ return fromCurlUtil(curlCommand);
488
+ }
482
489
  }
483
490
  const defaultTransforms = exports.defaultTransforms = {
484
491
  request: [
package/dist/core/rezo.js CHANGED
@@ -7,6 +7,7 @@ import packageJson from "../../package.json" with { type: 'json' };
7
7
  import { createDefaultHooks, mergeHooks, runVoidHooksSync, runTransformHooks } from './hooks.js';
8
8
  import { ResponseCache, DNSCache } from '../cache/index.js';
9
9
  import { ProxyManager } from '../proxy/manager.js';
10
+ import { toCurl as toCurlUtil, fromCurl as fromCurlUtil } from '../utils/curl.js';
10
11
  let globalAdapter = null;
11
12
  export function setGlobalAdapter(adapter) {
12
13
  globalAdapter = adapter;
@@ -479,6 +480,12 @@ export class Rezo {
479
480
  clearCookies() {
480
481
  this.jar?.removeAllCookiesSync();
481
482
  }
483
+ static toCurl(config) {
484
+ return toCurlUtil(config);
485
+ }
486
+ static fromCurl(curlCommand) {
487
+ return fromCurlUtil(curlCommand);
488
+ }
482
489
  }
483
490
  export const defaultTransforms = {
484
491
  request: [
@@ -0,0 +1 @@
1
+ var{Rezo:h}=require("../../../core/rezo.cjs"),r=require("./options.cjs");exports.DECODO_DEVICE_TYPES=r.DECODO_DEVICE_TYPES;exports.DECODO_HEADLESS_MODES=r.DECODO_HEADLESS_MODES;exports.DECODO_COMMON_LOCALES=r.DECODO_COMMON_LOCALES;exports.DECODO_COMMON_COUNTRIES=r.DECODO_COMMON_COUNTRIES;exports.DECODO_EUROPEAN_COUNTRIES=r.DECODO_EUROPEAN_COUNTRIES;exports.DECODO_ASIAN_COUNTRIES=r.DECODO_ASIAN_COUNTRIES;exports.DECODO_US_STATES=r.DECODO_US_STATES;exports.DECODO_COMMON_CITIES=r.DECODO_COMMON_CITIES;exports.getRandomDeviceType=r.getRandomDeviceType;exports.getRandomLocale=r.getRandomLocale;exports.getRandomCountry=r.getRandomCountry;exports.getRandomCity=r.getRandomCity;exports.generateSessionId=r.generateSessionId;var u="https://scraper-api.smartproxy.com/v2/scrape";class d{config;http;authHeader;constructor(e){let t="username"in e&&"password"in e&&e.username&&e.password,s="token"in e&&e.token;if(!t&&!s)throw Error("Decodo requires either username/password or token for authentication");if(this.config={username:e.username??"",password:e.password??"",deviceType:e.deviceType??"desktop",locale:e.locale??"en-US",country:e.country??"",state:e.state??"",city:e.city??"",headless:e.headless??void 0,headers:e.headers??{},sessionId:e.sessionId??"",sessionDuration:e.sessionDuration??0,javascript:e.javascript??"",javascriptWait:e.javascriptWait??0,waitForCss:e.waitForCss??"",timeout:e.timeout??120000},this.http=new h({baseURL:u,timeout:this.config.timeout}),s)this.authHeader=`Basic ${e.token}`;else this.authHeader=`Basic ${Buffer.from(`${e.username}:${e.password}`).toString("base64")}`}async scrape(e,t){let s={...this.config,...t,headers:{...this.config.headers,...t?.headers||{}}},n=this.buildRequestBody(e,s),a=(await this.http.postJson(u,n,{headers:{Authorization:this.authHeader,"Content-Type":"application/json"}})).data;if(a.error)throw Error(`Decodo API error: ${a.error} (${a.error_code||"unknown"})`);if(!a.results||a.results.length===0)throw Error("Decodo API returned no results");let o=a.results[0];return{statusCode:o.status_code,url:o.url,content:o.body,cookies:o.cookies||[],headers:o.headers||{},taskId:a.id,rendered:!!s.headless,country:s.country||void 0,city:s.city||void 0,state:s.state||void 0,deviceType:s.deviceType,contentType:o.content_type,contentLength:o.content_length,raw:a}}async scrapeMany(e,t,s=1000){let n=[];for(let i=0;i<e.length;i++){let a=await this.scrape(e[i],t);if(n.push(a),i<e.length-1&&s>0)await new Promise((o)=>setTimeout(o,s))}return n}async scrapeWithSession(e,t,s=10){let n=`session_${Date.now()}_${Math.random().toString(36).substring(2,11)}`;return this.scrapeMany(e,{...t,sessionId:n,sessionDuration:s},500)}buildRequestBody(e,t){let s={url:e,return_cookies:!0,return_headers:!0};if(t.deviceType)s.device_type=t.deviceType;if(t.headless)s.headless=t.headless;if(t.locale)s.locale=t.locale;if(t.country)s.country=t.country;if(t.state)s.state=t.state;if(t.city)s.city=t.city;if(t.sessionId){if(s.session=t.sessionId,t.sessionDuration)s.session_duration=t.sessionDuration}if(t.headers&&Object.keys(t.headers).length>0)s.headers=t.headers;if(t.javascript){if(s.javascript=t.javascript,t.javascriptWait)s.javascript_wait=t.javascriptWait}if(t.waitForCss)s.wait_for_css=t.waitForCss;return s}getConfig(){return{...this.config,password:"***"}}withConfig(e){return new d({...this.config,...e})}async testConnection(){try{return await this.scrape("https://httpbin.org/ip"),!0}catch(e){throw Error(`Decodo connection test failed: ${e.message}`)}}}exports.Decodo=d;exports.default=d;module.exports=Object.assign(d,exports);
@@ -0,0 +1 @@
1
+ import{Rezo as u}from"../../../core/rezo.js";import{DECODO_DEVICE_TYPES as y,DECODO_HEADLESS_MODES as l,DECODO_COMMON_LOCALES as w,DECODO_COMMON_COUNTRIES as O,DECODO_EUROPEAN_COUNTRIES as _,DECODO_ASIAN_COUNTRIES as D,DECODO_US_STATES as C,DECODO_COMMON_CITIES as E,getRandomDeviceType as m,getRandomLocale as v,getRandomCountry as S,getRandomCity as T,generateSessionId as I}from"./options.js";var d="https://scraper-api.smartproxy.com/v2/scrape";class i{config;http;authHeader;constructor(e){let t="username"in e&&"password"in e&&e.username&&e.password,s="token"in e&&e.token;if(!t&&!s)throw Error("Decodo requires either username/password or token for authentication");if(this.config={username:e.username??"",password:e.password??"",deviceType:e.deviceType??"desktop",locale:e.locale??"en-US",country:e.country??"",state:e.state??"",city:e.city??"",headless:e.headless??void 0,headers:e.headers??{},sessionId:e.sessionId??"",sessionDuration:e.sessionDuration??0,javascript:e.javascript??"",javascriptWait:e.javascriptWait??0,waitForCss:e.waitForCss??"",timeout:e.timeout??120000},this.http=new u({baseURL:d,timeout:this.config.timeout}),s)this.authHeader=`Basic ${e.token}`;else this.authHeader=`Basic ${Buffer.from(`${e.username}:${e.password}`).toString("base64")}`}async scrape(e,t){let s={...this.config,...t,headers:{...this.config.headers,...t?.headers||{}}},o=this.buildRequestBody(e,s),r=(await this.http.postJson(d,o,{headers:{Authorization:this.authHeader,"Content-Type":"application/json"}})).data;if(r.error)throw Error(`Decodo API error: ${r.error} (${r.error_code||"unknown"})`);if(!r.results||r.results.length===0)throw Error("Decodo API returned no results");let a=r.results[0];return{statusCode:a.status_code,url:a.url,content:a.body,cookies:a.cookies||[],headers:a.headers||{},taskId:r.id,rendered:!!s.headless,country:s.country||void 0,city:s.city||void 0,state:s.state||void 0,deviceType:s.deviceType,contentType:a.content_type,contentLength:a.content_length,raw:r}}async scrapeMany(e,t,s=1000){let o=[];for(let n=0;n<e.length;n++){let r=await this.scrape(e[n],t);if(o.push(r),n<e.length-1&&s>0)await new Promise((a)=>setTimeout(a,s))}return o}async scrapeWithSession(e,t,s=10){let o=`session_${Date.now()}_${Math.random().toString(36).substring(2,11)}`;return this.scrapeMany(e,{...t,sessionId:o,sessionDuration:s},500)}buildRequestBody(e,t){let s={url:e,return_cookies:!0,return_headers:!0};if(t.deviceType)s.device_type=t.deviceType;if(t.headless)s.headless=t.headless;if(t.locale)s.locale=t.locale;if(t.country)s.country=t.country;if(t.state)s.state=t.state;if(t.city)s.city=t.city;if(t.sessionId){if(s.session=t.sessionId,t.sessionDuration)s.session_duration=t.sessionDuration}if(t.headers&&Object.keys(t.headers).length>0)s.headers=t.headers;if(t.javascript){if(s.javascript=t.javascript,t.javascriptWait)s.javascript_wait=t.javascriptWait}if(t.waitForCss)s.wait_for_css=t.waitForCss;return s}getConfig(){return{...this.config,password:"***"}}withConfig(e){return new i({...this.config,...e})}async testConnection(){try{return await this.scrape("https://httpbin.org/ip"),!0}catch(e){throw Error(`Decodo connection test failed: ${e.message}`)}}}var R=i;export{v as getRandomLocale,m as getRandomDeviceType,S as getRandomCountry,T as getRandomCity,I as generateSessionId,R as default,i as Decodo,C as DECODO_US_STATES,l as DECODO_HEADLESS_MODES,_ as DECODO_EUROPEAN_COUNTRIES,y as DECODO_DEVICE_TYPES,w as DECODO_COMMON_LOCALES,O as DECODO_COMMON_COUNTRIES,E as DECODO_COMMON_CITIES,D as DECODO_ASIAN_COUNTRIES};
@@ -0,0 +1 @@
1
+ var{defineProperty:f,getOwnPropertyNames:g,getOwnPropertyDescriptor:y}=Object,A=Object.prototype.hasOwnProperty;var m=new WeakMap,v=(e)=>{var t=m.get(e),s;if(t)return t;if(t=f({},"__esModule",{value:!0}),e&&typeof e==="object"||typeof e==="function")g(e).map((a)=>!A.call(t,a)&&f(t,a,{get:()=>e[a],enumerable:!(s=y(e,a))||s.enumerable}));return m.set(e,t),t};var b={};module.exports=v(b);var{RezoQueue:$}=require("../queue/queue.cjs"),{Oxylabs:M}=require("./addon/oxylabs/index.cjs"),w=require("node:path"),O=require("node:os"),{Decodo:p}=require("./addon/decodo/index.cjs");class x{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=D();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??w.join(O.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((s)=>s.domain).map((s)=>s.domain).filter((s,a,r)=>r.indexOf(s)===a)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((s,a)=>s===t[a]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((s)=>s.isGlobal).length,domainSpecific:t.filter((s)=>!s.isGlobal&&s.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:s,isGlobal:a,headers:r}=t;if(!s&&!a)continue;if(r instanceof Headers){let i=Object.fromEntries(r.entries());if(Object.keys(i).length<1)continue;r=i}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:s,isGlobal:a,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:s,isGlobal:a,proxy:r}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:s,isGlobal:a,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:s,isGlobal:a,options:r}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:s,isGlobal:a,pqueue:new $(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:a,options:r,queueOptions:i}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:s,isGlobal:a,adaptar:new M(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:a,options:r,queueOptions:i}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:s,isGlobal:a,adaptar:new p(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,s,a){if(!this.getDomainName(e))return null;let i=[],n=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let o=0;o<n.length;o++)if(this._hasDomain(e,n[o].domain))i.push(o);if(i.length){let o=a?i[this.rnd(0,i.length-1)]:i[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}i.length=0;for(let o=0;o<n.length;o++)i.push(o);if(i.length){let o=a?i[this.rnd(0,i.length-1)]:i[0];if(n[o].isGlobal&&s)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,s){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let i=0;i<r.length;i++)if(this._hasDomain(e,r[i].domain))return!0;if(s){for(let i=0;i<r.length;i++)if(r[i].isGlobal)return!0}return!1}pickHeaders(e,t,s,a){let r=this.getAdapter(e,"headers",t),i=new Headers(r??{});if(s&&s instanceof Headers)for(let[n,o]of Object.entries(s.entries()))i.set(n,o);else if(s&&typeof s==="object"){for(let[n,o]of Object.entries(s))if(typeof o==="string")i.set(n,o)}if(a)i.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(i.entries())}_hasDomain(e,t){if(!t)return!1;let s=this.getDomainName(e);if(!s)return!1;let a=(i)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(i)||i.startsWith("/")||i.includes(".*")||i.includes(".+")},r=(i)=>{if(i instanceof RegExp)return i.test(s)||i.test(e);let n=i.toString().trim();if(s.toLowerCase()===n.toLowerCase())return!0;if(n.includes("*")){let l=n.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(s)||h.test(e)}if(a(n))try{let l=n,h="i",u=n.match(/^\/(.*)\/(\w*)$/);if(u)l=u[1],h=u[2]||"i";let c=new RegExp(l,h);return c.test(s)||c.test(e)}catch(l){return s.toLowerCase().includes(n.toLowerCase())}let o=s.toLowerCase(),d=n.toLowerCase();return o===d||o.endsWith("."+d)||d.endsWith("."+o)};if(Array.isArray(t)){for(let i of t)if(r(i))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function D(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],s=[];for(let a=0;a<200;a++){let r=e[Math.floor(Math.random()*e.length)],i=t[Math.floor(Math.random()*t.length)],n="";switch(r.name){case"Chrome":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":n=`Mozilla/5.0 (${i}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}s.push(n)}return s}b.CrawlerOptions=x;
@@ -0,0 +1 @@
1
+ import{RezoQueue as m}from"../queue/queue.js";import{Oxylabs as f}from"./addon/oxylabs/index.js";import x from"node:path";import b from"node:os";import{Decodo as g}from"./addon/decodo/index.js";class y{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??x.join(b.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((s)=>s.domain).map((s)=>s.domain).filter((s,n,r)=>r.indexOf(s)===n)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((s,n)=>s===t[n]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((s)=>s.isGlobal).length,domainSpecific:t.filter((s)=>!s.isGlobal&&s.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:s,isGlobal:n,headers:r}=t;if(!s&&!n)continue;if(r instanceof Headers){let i=Object.fromEntries(r.entries());if(Object.keys(i).length<1)continue;r=i}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:s,isGlobal:n,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:s,isGlobal:n,proxy:r}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:s,isGlobal:n,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:s,isGlobal:n,options:r}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:s,isGlobal:n,pqueue:new m(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:n,options:r,queueOptions:i}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:s,isGlobal:n,adaptar:new f(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:n,options:r,queueOptions:i}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:s,isGlobal:n,adaptar:new g(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,s,n){if(!this.getDomainName(e))return null;let i=[],a=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let o=0;o<a.length;o++)if(this._hasDomain(e,a[o].domain))i.push(o);if(i.length){let o=n?i[this.rnd(0,i.length-1)]:i[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}i.length=0;for(let o=0;o<a.length;o++)i.push(o);if(i.length){let o=n?i[this.rnd(0,i.length-1)]:i[0];if(a[o].isGlobal&&s)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,s){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let i=0;i<r.length;i++)if(this._hasDomain(e,r[i].domain))return!0;if(s){for(let i=0;i<r.length;i++)if(r[i].isGlobal)return!0}return!1}pickHeaders(e,t,s,n){let r=this.getAdapter(e,"headers",t),i=new Headers(r??{});if(s&&s instanceof Headers)for(let[a,o]of Object.entries(s.entries()))i.set(a,o);else if(s&&typeof s==="object"){for(let[a,o]of Object.entries(s))if(typeof o==="string")i.set(a,o)}if(n)i.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(i.entries())}_hasDomain(e,t){if(!t)return!1;let s=this.getDomainName(e);if(!s)return!1;let n=(i)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(i)||i.startsWith("/")||i.includes(".*")||i.includes(".+")},r=(i)=>{if(i instanceof RegExp)return i.test(s)||i.test(e);let a=i.toString().trim();if(s.toLowerCase()===a.toLowerCase())return!0;if(a.includes("*")){let l=a.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(s)||h.test(e)}if(n(a))try{let l=a,h="i",u=a.match(/^\/(.*)\/(\w*)$/);if(u)l=u[1],h=u[2]||"i";let c=new RegExp(l,h);return c.test(s)||c.test(e)}catch(l){return s.toLowerCase().includes(a.toLowerCase())}let o=s.toLowerCase(),d=a.toLowerCase();return o===d||o.endsWith("."+d)||d.endsWith("."+o)};if(Array.isArray(t)){for(let i of t)if(r(i))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],s=[];for(let n=0;n<200;n++){let r=e[Math.floor(Math.random()*e.length)],i=t[Math.floor(Math.random()*t.length)],a="";switch(r.name){case"Chrome":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":a=`Mozilla/5.0 (${i}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}s.push(a)}return s}export{y as CrawlerOptions};