rezo 1.0.43 → 1.0.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/dist/adapters/entries/curl.d.ts +115 -0
  2. package/dist/adapters/entries/fetch.d.ts +115 -0
  3. package/dist/adapters/entries/http.d.ts +115 -0
  4. package/dist/adapters/entries/http2.d.ts +115 -0
  5. package/dist/adapters/entries/react-native.d.ts +115 -0
  6. package/dist/adapters/entries/xhr.d.ts +115 -0
  7. package/dist/adapters/fetch.cjs +18 -0
  8. package/dist/adapters/fetch.js +18 -0
  9. package/dist/adapters/http.cjs +18 -0
  10. package/dist/adapters/http.js +18 -0
  11. package/dist/adapters/http2.cjs +18 -0
  12. package/dist/adapters/http2.js +18 -0
  13. package/dist/adapters/index.cjs +6 -6
  14. package/dist/adapters/xhr.cjs +19 -0
  15. package/dist/adapters/xhr.js +19 -0
  16. package/dist/cache/index.cjs +9 -15
  17. package/dist/cache/index.js +0 -3
  18. package/dist/core/hooks.cjs +4 -2
  19. package/dist/core/hooks.js +4 -2
  20. package/dist/crawler/addon/decodo/index.cjs +1 -0
  21. package/dist/crawler/addon/decodo/index.js +1 -0
  22. package/dist/crawler/crawler-options.cjs +1 -0
  23. package/dist/crawler/crawler-options.js +1 -0
  24. package/dist/{plugin → crawler}/crawler.cjs +392 -32
  25. package/dist/{plugin → crawler}/crawler.js +392 -32
  26. package/dist/crawler/index.cjs +40 -0
  27. package/dist/{plugin → crawler}/index.js +4 -2
  28. package/dist/crawler/plugin/file-cacher.cjs +19 -0
  29. package/dist/crawler/plugin/file-cacher.js +19 -0
  30. package/dist/crawler/plugin/index.cjs +1 -0
  31. package/dist/crawler/plugin/index.js +1 -0
  32. package/dist/crawler/plugin/navigation-history.cjs +43 -0
  33. package/dist/crawler/plugin/navigation-history.js +43 -0
  34. package/dist/crawler/plugin/robots-txt.cjs +2 -0
  35. package/dist/crawler/plugin/robots-txt.js +2 -0
  36. package/dist/crawler/plugin/url-store.cjs +18 -0
  37. package/dist/crawler/plugin/url-store.js +18 -0
  38. package/dist/crawler.d.ts +430 -172
  39. package/dist/entries/crawler.cjs +5 -5
  40. package/dist/entries/crawler.js +2 -2
  41. package/dist/index.cjs +27 -27
  42. package/dist/index.d.ts +115 -0
  43. package/dist/internal/agents/index.cjs +10 -10
  44. package/dist/platform/browser.d.ts +115 -0
  45. package/dist/platform/bun.d.ts +115 -0
  46. package/dist/platform/deno.d.ts +115 -0
  47. package/dist/platform/node.d.ts +115 -0
  48. package/dist/platform/react-native.d.ts +115 -0
  49. package/dist/platform/worker.d.ts +115 -0
  50. package/dist/proxy/index.cjs +5 -5
  51. package/dist/proxy/index.js +1 -1
  52. package/dist/queue/index.cjs +8 -8
  53. package/dist/responses/universal/index.cjs +11 -11
  54. package/dist/utils/rate-limit-wait.cjs +217 -0
  55. package/dist/utils/rate-limit-wait.js +208 -0
  56. package/package.json +2 -6
  57. package/dist/cache/file-cacher.cjs +0 -270
  58. package/dist/cache/file-cacher.js +0 -267
  59. package/dist/cache/navigation-history.cjs +0 -298
  60. package/dist/cache/navigation-history.js +0 -296
  61. package/dist/cache/url-store.cjs +0 -294
  62. package/dist/cache/url-store.js +0 -291
  63. package/dist/plugin/addon/decodo/index.cjs +0 -1
  64. package/dist/plugin/addon/decodo/index.js +0 -1
  65. package/dist/plugin/crawler-options.cjs +0 -1
  66. package/dist/plugin/crawler-options.js +0 -1
  67. package/dist/plugin/index.cjs +0 -36
  68. /package/dist/{plugin → crawler}/addon/decodo/options.cjs +0 -0
  69. /package/dist/{plugin → crawler}/addon/decodo/options.js +0 -0
  70. /package/dist/{plugin → crawler}/addon/decodo/types.cjs +0 -0
  71. /package/dist/{plugin → crawler}/addon/decodo/types.js +0 -0
  72. /package/dist/{plugin → crawler}/addon/oxylabs/index.cjs +0 -0
  73. /package/dist/{plugin → crawler}/addon/oxylabs/index.js +0 -0
  74. /package/dist/{plugin → crawler}/addon/oxylabs/options.cjs +0 -0
  75. /package/dist/{plugin → crawler}/addon/oxylabs/options.js +0 -0
  76. /package/dist/{plugin → crawler}/addon/oxylabs/types.cjs +0 -0
  77. /package/dist/{plugin → crawler}/addon/oxylabs/types.js +0 -0
  78. /package/dist/{plugin → crawler}/scraper.cjs +0 -0
  79. /package/dist/{plugin → crawler}/scraper.js +0 -0
@@ -1416,6 +1416,35 @@ export type OnTimeoutHook = (event: TimeoutEvent, config: RezoConfig) => void;
1416
1416
  * Use for cleanup, logging
1417
1417
  */
1418
1418
  export type OnAbortHook = (event: AbortEvent, config: RezoConfig) => void;
1419
+ /**
1420
+ * Rate limit wait event data - fired when waiting due to rate limiting
1421
+ */
1422
+ export interface RateLimitWaitEvent {
1423
+ /** HTTP status code that triggered the wait (e.g., 429, 503) */
1424
+ status: number;
1425
+ /** Time to wait in milliseconds */
1426
+ waitTime: number;
1427
+ /** Current wait attempt number (1-indexed) */
1428
+ attempt: number;
1429
+ /** Maximum wait attempts configured */
1430
+ maxAttempts: number;
1431
+ /** Where the wait time was extracted from */
1432
+ source: "header" | "body" | "function" | "default";
1433
+ /** The header or body path used (if applicable) */
1434
+ sourcePath?: string;
1435
+ /** URL being requested */
1436
+ url: string;
1437
+ /** HTTP method of the request */
1438
+ method: string;
1439
+ /** Timestamp when the wait started */
1440
+ timestamp: number;
1441
+ }
1442
+ /**
1443
+ * Hook called when rate limit wait occurs
1444
+ * Informational only - cannot abort the wait
1445
+ * Use for logging, monitoring, alerting
1446
+ */
1447
+ export type OnRateLimitWaitHook = (event: RateLimitWaitEvent, config: RezoConfig) => void | Promise<void>;
1419
1448
  /**
1420
1449
  * Hook called before a proxy is selected
1421
1450
  * Can return a specific proxy to override selection
@@ -1496,6 +1525,7 @@ export interface RezoHooks {
1496
1525
  onTls: OnTlsHook[];
1497
1526
  onTimeout: OnTimeoutHook[];
1498
1527
  onAbort: OnAbortHook[];
1528
+ onRateLimitWait: OnRateLimitWaitHook[];
1499
1529
  }
1500
1530
  /**
1501
1531
  * Create empty hooks object with all arrays initialized
@@ -2426,6 +2456,91 @@ export interface RezoRequestConfig<D = any> {
2426
2456
  /** Weather to stop or continue retry when certain condition is met*/
2427
2457
  condition?: (error: RezoError) => boolean | Promise<boolean>;
2428
2458
  };
2459
+ /**
2460
+ * Rate limit wait configuration - wait and retry when receiving rate limit responses.
2461
+ *
2462
+ * This feature runs BEFORE the retry system. When a rate-limiting status code is received,
2463
+ * the client will wait for the specified time and automatically retry the request.
2464
+ *
2465
+ * **Basic Usage:**
2466
+ * - `waitOnStatus: true` - Enable waiting on 429 status (default behavior)
2467
+ * - `waitOnStatus: [429, 503]` - Enable waiting on specific status codes
2468
+ *
2469
+ * **Wait Time Sources:**
2470
+ * - `'retry-after'` - Use standard Retry-After header (default)
2471
+ * - `{ header: 'X-RateLimit-Reset' }` - Use custom header
2472
+ * - `{ body: 'retry_after' }` - Extract from JSON response body
2473
+ * - Custom function for complex logic
2474
+ *
2475
+ * @example
2476
+ * ```typescript
2477
+ * // Wait on 429 using Retry-After header
2478
+ * await rezo.get(url, { waitOnStatus: true });
2479
+ *
2480
+ * // Wait on 429 using custom header
2481
+ * await rezo.get(url, {
2482
+ * waitOnStatus: true,
2483
+ * waitTimeSource: { header: 'X-RateLimit-Reset' }
2484
+ * });
2485
+ *
2486
+ * // Wait on 429 extracting time from JSON body
2487
+ * await rezo.get(url, {
2488
+ * waitOnStatus: true,
2489
+ * waitTimeSource: { body: 'data.retry_after' }
2490
+ * });
2491
+ *
2492
+ * // Custom function for complex APIs
2493
+ * await rezo.get(url, {
2494
+ * waitOnStatus: [429, 503],
2495
+ * waitTimeSource: (response) => {
2496
+ * const reset = response.headers.get('x-ratelimit-reset');
2497
+ * return reset ? parseInt(reset) - Math.floor(Date.now() / 1000) : null;
2498
+ * }
2499
+ * });
2500
+ * ```
2501
+ */
2502
+ waitOnStatus?: boolean | number[];
2503
+ /**
2504
+ * Where to extract the wait time from when rate-limited.
2505
+ *
2506
+ * - `'retry-after'` - Standard Retry-After header (default)
2507
+ * - `{ header: string }` - Custom header name (e.g., 'X-RateLimit-Reset')
2508
+ * - `{ body: string }` - JSON path in response body (e.g., 'data.retry_after', 'wait_seconds')
2509
+ * - Function - Custom logic receiving the response, return seconds to wait or null
2510
+ *
2511
+ * @default 'retry-after'
2512
+ */
2513
+ waitTimeSource?: "retry-after" | {
2514
+ header: string;
2515
+ } | {
2516
+ body: string;
2517
+ } | ((response: {
2518
+ status: number;
2519
+ headers: RezoHeaders;
2520
+ data?: any;
2521
+ }) => number | null);
2522
+ /**
2523
+ * Maximum time to wait for rate limit in milliseconds.
2524
+ * If the extracted wait time exceeds this, the request will fail instead of waiting.
2525
+ * Set to 0 for unlimited wait time.
2526
+ *
2527
+ * @default 60000 (60 seconds)
2528
+ */
2529
+ maxWaitTime?: number;
2530
+ /**
2531
+ * Default wait time in milliseconds if the wait time source returns nothing.
2532
+ * Used as fallback when Retry-After header or body path is not present.
2533
+ *
2534
+ * @default 1000 (1 second)
2535
+ */
2536
+ defaultWaitTime?: number;
2537
+ /**
2538
+ * Maximum number of wait attempts before giving up.
2539
+ * After this many waits, the request will proceed to retry logic or fail.
2540
+ *
2541
+ * @default 3
2542
+ */
2543
+ maxWaitAttempts?: number;
2429
2544
  /** Whether to use a secure context for HTTPS requests */
2430
2545
  useSecureContext?: boolean;
2431
2546
  /** Custom secure context for TLS connections */
@@ -10,6 +10,7 @@ const { DownloadResponse } = require('../responses/universal/download.cjs');
10
10
  const { UploadResponse } = require('../responses/universal/upload.cjs');
11
11
  const { isSameDomain, RezoPerformance } = require('../utils/tools.cjs');
12
12
  const { ResponseCache } = require('../cache/universal-response-cache.cjs');
13
+ const { handleRateLimitWait, shouldWaitOnStatus } = require('../utils/rate-limit-wait.cjs');
13
14
  const Environment = {
14
15
  isNode: typeof process !== "undefined" && process.versions?.node,
15
16
  isBrowser: typeof window !== "undefined" && typeof document !== "undefined",
@@ -555,6 +556,23 @@ async function executeFetchRequest(fetchOptions, config, options, perform, strea
555
556
  return response;
556
557
  }
557
558
  if (statusOnNext === "error") {
559
+ if (shouldWaitOnStatus(response.status, options.waitOnStatus)) {
560
+ const rateLimitWaitAttempt = config._rateLimitWaitAttempt || 0;
561
+ const waitResult = await handleRateLimitWait({
562
+ status: response.status,
563
+ headers: response.headers,
564
+ data: response.data,
565
+ url: fetchOptions.fullUrl || fetchOptions.url?.toString() || "",
566
+ method: fetchOptions.method || "GET",
567
+ config,
568
+ options,
569
+ currentWaitAttempt: rateLimitWaitAttempt
570
+ });
571
+ if (waitResult.shouldRetry) {
572
+ config._rateLimitWaitAttempt = waitResult.waitAttempt;
573
+ continue;
574
+ }
575
+ }
558
576
  const httpError = builErrorFromResponse(`Request failed with status code ${response.status}`, response, config, fetchOptions);
559
577
  if (config.retry && statusCodes?.includes(response.status)) {
560
578
  if (maxRetries > retries) {
@@ -10,6 +10,7 @@ import { DownloadResponse } from '../responses/universal/download.js';
10
10
  import { UploadResponse } from '../responses/universal/upload.js';
11
11
  import { isSameDomain, RezoPerformance } from '../utils/tools.js';
12
12
  import { ResponseCache } from '../cache/universal-response-cache.js';
13
+ import { handleRateLimitWait, shouldWaitOnStatus } from '../utils/rate-limit-wait.js';
13
14
  const Environment = {
14
15
  isNode: typeof process !== "undefined" && process.versions?.node,
15
16
  isBrowser: typeof window !== "undefined" && typeof document !== "undefined",
@@ -555,6 +556,23 @@ async function executeFetchRequest(fetchOptions, config, options, perform, strea
555
556
  return response;
556
557
  }
557
558
  if (statusOnNext === "error") {
559
+ if (shouldWaitOnStatus(response.status, options.waitOnStatus)) {
560
+ const rateLimitWaitAttempt = config._rateLimitWaitAttempt || 0;
561
+ const waitResult = await handleRateLimitWait({
562
+ status: response.status,
563
+ headers: response.headers,
564
+ data: response.data,
565
+ url: fetchOptions.fullUrl || fetchOptions.url?.toString() || "",
566
+ method: fetchOptions.method || "GET",
567
+ config,
568
+ options,
569
+ currentWaitAttempt: rateLimitWaitAttempt
570
+ });
571
+ if (waitResult.shouldRetry) {
572
+ config._rateLimitWaitAttempt = waitResult.waitAttempt;
573
+ continue;
574
+ }
575
+ }
558
576
  const httpError = builErrorFromResponse(`Request failed with status code ${response.status}`, response, config, fetchOptions);
559
577
  if (config.retry && statusCodes?.includes(response.status)) {
560
578
  if (maxRetries > retries) {
@@ -21,6 +21,7 @@ const { getGlobalDNSCache } = require('../cache/dns-cache.cjs');
21
21
  const { ResponseCache } = require('../cache/response-cache.cjs');
22
22
  const { getGlobalAgentPool } = require('../utils/agent-pool.cjs');
23
23
  const { StagedTimeoutManager, parseStagedTimeouts } = require('../utils/staged-timeout.cjs');
24
+ const { handleRateLimitWait, shouldWaitOnStatus } = require('../utils/rate-limit-wait.cjs');
24
25
  const dns = require("node:dns");
25
26
  const debugLog = {
26
27
  requestStart: (config, url, method) => {
@@ -604,6 +605,23 @@ async function executeHttp1Request(fetchOptions, config, options, perform, fs, s
604
605
  continue;
605
606
  }
606
607
  if (statusOnNext === "error") {
608
+ if (shouldWaitOnStatus(response.status, options.waitOnStatus)) {
609
+ const rateLimitWaitAttempt = config._rateLimitWaitAttempt || 0;
610
+ const waitResult = await handleRateLimitWait({
611
+ status: response.status,
612
+ headers: response.headers,
613
+ data: response.data,
614
+ url: fetchOptions.fullUrl || fetchOptions.url?.toString() || "",
615
+ method: fetchOptions.method || "GET",
616
+ config,
617
+ options,
618
+ currentWaitAttempt: rateLimitWaitAttempt
619
+ });
620
+ if (waitResult.shouldRetry) {
621
+ config._rateLimitWaitAttempt = waitResult.waitAttempt;
622
+ continue;
623
+ }
624
+ }
607
625
  const httpError = builErrorFromResponse(`Request failed with status code ${response.status}`, response, config, fetchOptions);
608
626
  if (config.retry && statusCodes?.includes(response.status)) {
609
627
  if (maxRetries > retries) {
@@ -21,6 +21,7 @@ import { getGlobalDNSCache } from '../cache/dns-cache.js';
21
21
  import { ResponseCache } from '../cache/response-cache.js';
22
22
  import { getGlobalAgentPool } from '../utils/agent-pool.js';
23
23
  import { StagedTimeoutManager, parseStagedTimeouts } from '../utils/staged-timeout.js';
24
+ import { handleRateLimitWait, shouldWaitOnStatus } from '../utils/rate-limit-wait.js';
24
25
  import dns from "node:dns";
25
26
  const debugLog = {
26
27
  requestStart: (config, url, method) => {
@@ -604,6 +605,23 @@ async function executeHttp1Request(fetchOptions, config, options, perform, fs, s
604
605
  continue;
605
606
  }
606
607
  if (statusOnNext === "error") {
608
+ if (shouldWaitOnStatus(response.status, options.waitOnStatus)) {
609
+ const rateLimitWaitAttempt = config._rateLimitWaitAttempt || 0;
610
+ const waitResult = await handleRateLimitWait({
611
+ status: response.status,
612
+ headers: response.headers,
613
+ data: response.data,
614
+ url: fetchOptions.fullUrl || fetchOptions.url?.toString() || "",
615
+ method: fetchOptions.method || "GET",
616
+ config,
617
+ options,
618
+ currentWaitAttempt: rateLimitWaitAttempt
619
+ });
620
+ if (waitResult.shouldRetry) {
621
+ config._rateLimitWaitAttempt = waitResult.waitAttempt;
622
+ continue;
623
+ }
624
+ }
607
625
  const httpError = builErrorFromResponse(`Request failed with status code ${response.status}`, response, config, fetchOptions);
608
626
  if (config.retry && statusCodes?.includes(response.status)) {
609
627
  if (maxRetries > retries) {
@@ -18,6 +18,7 @@ const { isSameDomain, RezoPerformance } = require('../utils/tools.cjs');
18
18
  const { SocksClient } = require('../internal/agents/socks-client.cjs');
19
19
  const net = require("node:net");
20
20
  const { ResponseCache } = require('../cache/response-cache.cjs');
21
+ const { handleRateLimitWait, shouldWaitOnStatus } = require('../utils/rate-limit-wait.cjs');
21
22
  let zstdDecompressSync = null;
22
23
  let zstdChecked = false;
23
24
  const debugLog = {
@@ -960,6 +961,23 @@ async function executeHttp2Request(fetchOptions, config, options, perform, fs, s
960
961
  return response;
961
962
  }
962
963
  if (statusOnNext === "error") {
964
+ if (shouldWaitOnStatus(response.status, options.waitOnStatus)) {
965
+ const rateLimitWaitAttempt = config._rateLimitWaitAttempt || 0;
966
+ const waitResult = await handleRateLimitWait({
967
+ status: response.status,
968
+ headers: response.headers,
969
+ data: response.data,
970
+ url: fetchOptions.fullUrl || fetchOptions.url?.toString() || "",
971
+ method: fetchOptions.method || "GET",
972
+ config,
973
+ options,
974
+ currentWaitAttempt: rateLimitWaitAttempt
975
+ });
976
+ if (waitResult.shouldRetry) {
977
+ config._rateLimitWaitAttempt = waitResult.waitAttempt;
978
+ continue;
979
+ }
980
+ }
963
981
  const httpError = builErrorFromResponse(`Request failed with status code ${response.status}`, response, config, fetchOptions);
964
982
  if (config.retry && statusCodes?.includes(response.status)) {
965
983
  if (maxRetries > retries) {
@@ -18,6 +18,7 @@ import { isSameDomain, RezoPerformance } from '../utils/tools.js';
18
18
  import { SocksClient } from '../internal/agents/socks-client.js';
19
19
  import * as net from "node:net";
20
20
  import { ResponseCache } from '../cache/response-cache.js';
21
+ import { handleRateLimitWait, shouldWaitOnStatus } from '../utils/rate-limit-wait.js';
21
22
  let zstdDecompressSync = null;
22
23
  let zstdChecked = false;
23
24
  const debugLog = {
@@ -960,6 +961,23 @@ async function executeHttp2Request(fetchOptions, config, options, perform, fs, s
960
961
  return response;
961
962
  }
962
963
  if (statusOnNext === "error") {
964
+ if (shouldWaitOnStatus(response.status, options.waitOnStatus)) {
965
+ const rateLimitWaitAttempt = config._rateLimitWaitAttempt || 0;
966
+ const waitResult = await handleRateLimitWait({
967
+ status: response.status,
968
+ headers: response.headers,
969
+ data: response.data,
970
+ url: fetchOptions.fullUrl || fetchOptions.url?.toString() || "",
971
+ method: fetchOptions.method || "GET",
972
+ config,
973
+ options,
974
+ currentWaitAttempt: rateLimitWaitAttempt
975
+ });
976
+ if (waitResult.shouldRetry) {
977
+ config._rateLimitWaitAttempt = waitResult.waitAttempt;
978
+ continue;
979
+ }
980
+ }
963
981
  const httpError = builErrorFromResponse(`Request failed with status code ${response.status}`, response, config, fetchOptions);
964
982
  if (config.retry && statusCodes?.includes(response.status)) {
965
983
  if (maxRetries > retries) {
@@ -1,6 +1,6 @@
1
- const _mod_63iyz4 = require('./picker.cjs');
2
- exports.detectRuntime = _mod_63iyz4.detectRuntime;
3
- exports.getAdapterCapabilities = _mod_63iyz4.getAdapterCapabilities;
4
- exports.buildAdapterContext = _mod_63iyz4.buildAdapterContext;
5
- exports.getAvailableAdapters = _mod_63iyz4.getAvailableAdapters;
6
- exports.selectAdapter = _mod_63iyz4.selectAdapter;;
1
+ const _mod_jrw0vw = require('./picker.cjs');
2
+ exports.detectRuntime = _mod_jrw0vw.detectRuntime;
3
+ exports.getAdapterCapabilities = _mod_jrw0vw.getAdapterCapabilities;
4
+ exports.buildAdapterContext = _mod_jrw0vw.buildAdapterContext;
5
+ exports.getAvailableAdapters = _mod_jrw0vw.getAvailableAdapters;
6
+ exports.selectAdapter = _mod_jrw0vw.selectAdapter;;
@@ -10,6 +10,7 @@ const { DownloadResponse } = require('../responses/universal/download.cjs');
10
10
  const { UploadResponse } = require('../responses/universal/upload.cjs');
11
11
  const { RezoPerformance } = require('../utils/tools.cjs');
12
12
  const { ResponseCache } = require('../cache/universal-response-cache.cjs');
13
+ const { handleRateLimitWait, shouldWaitOnStatus } = require('../utils/rate-limit-wait.cjs');
13
14
  const Environment = {
14
15
  isBrowser: typeof window !== "undefined" && typeof document !== "undefined",
15
16
  hasXHR: typeof XMLHttpRequest !== "undefined",
@@ -390,6 +391,24 @@ async function executeXHRRequest(fetchOptions, config, options, perform, streamR
390
391
  try {
391
392
  const response = await executeSingleXHRRequest(config, fetchOptions, timing, streamResult, downloadResult, uploadResult);
392
393
  if (response instanceof RezoError) {
394
+ const errorStatus = response.status || 0;
395
+ if (shouldWaitOnStatus(errorStatus, options.waitOnStatus)) {
396
+ const rateLimitWaitAttempt = config._rateLimitWaitAttempt || 0;
397
+ const waitResult = await handleRateLimitWait({
398
+ status: errorStatus,
399
+ headers: response.response?.headers || new RezoHeaders,
400
+ data: response.response?.data,
401
+ url: fetchOptions.fullUrl || fetchOptions.url?.toString() || "",
402
+ method: fetchOptions.method || "GET",
403
+ config,
404
+ options,
405
+ currentWaitAttempt: rateLimitWaitAttempt
406
+ });
407
+ if (waitResult.shouldRetry) {
408
+ config._rateLimitWaitAttempt = waitResult.waitAttempt;
409
+ continue;
410
+ }
411
+ }
393
412
  config.errors.push({
394
413
  attempt: config.retryAttempts + 1,
395
414
  error: response,
@@ -10,6 +10,7 @@ import { DownloadResponse } from '../responses/universal/download.js';
10
10
  import { UploadResponse } from '../responses/universal/upload.js';
11
11
  import { RezoPerformance } from '../utils/tools.js';
12
12
  import { ResponseCache } from '../cache/universal-response-cache.js';
13
+ import { handleRateLimitWait, shouldWaitOnStatus } from '../utils/rate-limit-wait.js';
13
14
  const Environment = {
14
15
  isBrowser: typeof window !== "undefined" && typeof document !== "undefined",
15
16
  hasXHR: typeof XMLHttpRequest !== "undefined",
@@ -390,6 +391,24 @@ async function executeXHRRequest(fetchOptions, config, options, perform, streamR
390
391
  try {
391
392
  const response = await executeSingleXHRRequest(config, fetchOptions, timing, streamResult, downloadResult, uploadResult);
392
393
  if (response instanceof RezoError) {
394
+ const errorStatus = response.status || 0;
395
+ if (shouldWaitOnStatus(errorStatus, options.waitOnStatus)) {
396
+ const rateLimitWaitAttempt = config._rateLimitWaitAttempt || 0;
397
+ const waitResult = await handleRateLimitWait({
398
+ status: errorStatus,
399
+ headers: response.response?.headers || new RezoHeaders,
400
+ data: response.response?.data,
401
+ url: fetchOptions.fullUrl || fetchOptions.url?.toString() || "",
402
+ method: fetchOptions.method || "GET",
403
+ config,
404
+ options,
405
+ currentWaitAttempt: rateLimitWaitAttempt
406
+ });
407
+ if (waitResult.shouldRetry) {
408
+ config._rateLimitWaitAttempt = waitResult.waitAttempt;
409
+ continue;
410
+ }
411
+ }
393
412
  config.errors.push({
394
413
  attempt: config.retryAttempts + 1,
395
414
  error: response,
@@ -1,15 +1,9 @@
1
- const _mod_6dj0p0 = require('./lru-cache.cjs');
2
- exports.LRUCache = _mod_6dj0p0.LRUCache;;
3
- const _mod_9z4pm1 = require('./dns-cache.cjs');
4
- exports.DNSCache = _mod_9z4pm1.DNSCache;
5
- exports.getGlobalDNSCache = _mod_9z4pm1.getGlobalDNSCache;
6
- exports.resetGlobalDNSCache = _mod_9z4pm1.resetGlobalDNSCache;;
7
- const _mod_5ylf2b = require('./response-cache.cjs');
8
- exports.ResponseCache = _mod_5ylf2b.ResponseCache;
9
- exports.normalizeResponseCacheConfig = _mod_5ylf2b.normalizeResponseCacheConfig;;
10
- const _mod_fk65c2 = require('./file-cacher.cjs');
11
- exports.FileCacher = _mod_fk65c2.FileCacher;;
12
- const _mod_7tzsb5 = require('./url-store.cjs');
13
- exports.UrlStore = _mod_7tzsb5.UrlStore;;
14
- const _mod_04c8wb = require('./navigation-history.cjs');
15
- exports.NavigationHistory = _mod_04c8wb.NavigationHistory;;
1
+ const _mod_0ku4z2 = require('./lru-cache.cjs');
2
+ exports.LRUCache = _mod_0ku4z2.LRUCache;;
3
+ const _mod_odvwch = require('./dns-cache.cjs');
4
+ exports.DNSCache = _mod_odvwch.DNSCache;
5
+ exports.getGlobalDNSCache = _mod_odvwch.getGlobalDNSCache;
6
+ exports.resetGlobalDNSCache = _mod_odvwch.resetGlobalDNSCache;;
7
+ const _mod_b9x6qw = require('./response-cache.cjs');
8
+ exports.ResponseCache = _mod_b9x6qw.ResponseCache;
9
+ exports.normalizeResponseCacheConfig = _mod_b9x6qw.normalizeResponseCacheConfig;;
@@ -1,6 +1,3 @@
1
1
  export { LRUCache } from './lru-cache.js';
2
2
  export { DNSCache, getGlobalDNSCache, resetGlobalDNSCache } from './dns-cache.js';
3
3
  export { ResponseCache, normalizeResponseCacheConfig } from './response-cache.js';
4
- export { FileCacher } from './file-cacher.js';
5
- export { UrlStore } from './url-store.js';
6
- export { NavigationHistory } from './navigation-history.js';
@@ -24,7 +24,8 @@ function createDefaultHooks() {
24
24
  onDns: [],
25
25
  onTls: [],
26
26
  onTimeout: [],
27
- onAbort: []
27
+ onAbort: [],
28
+ onRateLimitWait: []
28
29
  };
29
30
  }
30
31
  function mergeHooks(base, overrides) {
@@ -55,7 +56,8 @@ function mergeHooks(base, overrides) {
55
56
  onDns: [...base.onDns, ...overrides.onDns || []],
56
57
  onTls: [...base.onTls, ...overrides.onTls || []],
57
58
  onTimeout: [...base.onTimeout, ...overrides.onTimeout || []],
58
- onAbort: [...base.onAbort, ...overrides.onAbort || []]
59
+ onAbort: [...base.onAbort, ...overrides.onAbort || []],
60
+ onRateLimitWait: [...base.onRateLimitWait, ...overrides.onRateLimitWait || []]
59
61
  };
60
62
  }
61
63
  function serializeHooks(hooks) {
@@ -24,7 +24,8 @@ export function createDefaultHooks() {
24
24
  onDns: [],
25
25
  onTls: [],
26
26
  onTimeout: [],
27
- onAbort: []
27
+ onAbort: [],
28
+ onRateLimitWait: []
28
29
  };
29
30
  }
30
31
  export function mergeHooks(base, overrides) {
@@ -55,7 +56,8 @@ export function mergeHooks(base, overrides) {
55
56
  onDns: [...base.onDns, ...overrides.onDns || []],
56
57
  onTls: [...base.onTls, ...overrides.onTls || []],
57
58
  onTimeout: [...base.onTimeout, ...overrides.onTimeout || []],
58
- onAbort: [...base.onAbort, ...overrides.onAbort || []]
59
+ onAbort: [...base.onAbort, ...overrides.onAbort || []],
60
+ onRateLimitWait: [...base.onRateLimitWait, ...overrides.onRateLimitWait || []]
59
61
  };
60
62
  }
61
63
  export function serializeHooks(hooks) {
@@ -0,0 +1 @@
1
+ var{Rezo:h}=require("../../../core/rezo.cjs"),r=require("./options.cjs");exports.DECODO_DEVICE_TYPES=r.DECODO_DEVICE_TYPES;exports.DECODO_HEADLESS_MODES=r.DECODO_HEADLESS_MODES;exports.DECODO_COMMON_LOCALES=r.DECODO_COMMON_LOCALES;exports.DECODO_COMMON_COUNTRIES=r.DECODO_COMMON_COUNTRIES;exports.DECODO_EUROPEAN_COUNTRIES=r.DECODO_EUROPEAN_COUNTRIES;exports.DECODO_ASIAN_COUNTRIES=r.DECODO_ASIAN_COUNTRIES;exports.DECODO_US_STATES=r.DECODO_US_STATES;exports.DECODO_COMMON_CITIES=r.DECODO_COMMON_CITIES;exports.getRandomDeviceType=r.getRandomDeviceType;exports.getRandomLocale=r.getRandomLocale;exports.getRandomCountry=r.getRandomCountry;exports.getRandomCity=r.getRandomCity;exports.generateSessionId=r.generateSessionId;var u="https://scraper-api.smartproxy.com/v2/scrape";class d{config;http;authHeader;constructor(e){let t="username"in e&&"password"in e&&e.username&&e.password,s="token"in e&&e.token;if(!t&&!s)throw Error("Decodo requires either username/password or token for authentication");if(this.config={username:e.username??"",password:e.password??"",deviceType:e.deviceType??"desktop",locale:e.locale??"en-US",country:e.country??"",state:e.state??"",city:e.city??"",headless:e.headless??void 0,headers:e.headers??{},sessionId:e.sessionId??"",sessionDuration:e.sessionDuration??0,javascript:e.javascript??"",javascriptWait:e.javascriptWait??0,waitForCss:e.waitForCss??"",timeout:e.timeout??120000},this.http=new h({baseURL:u,timeout:this.config.timeout}),s)this.authHeader=`Basic ${e.token}`;else this.authHeader=`Basic ${Buffer.from(`${e.username}:${e.password}`).toString("base64")}`}async scrape(e,t){let s={...this.config,...t,headers:{...this.config.headers,...t?.headers||{}}},n=this.buildRequestBody(e,s),a=(await this.http.postJson(u,n,{headers:{Authorization:this.authHeader,"Content-Type":"application/json"}})).data;if(a.error)throw Error(`Decodo API error: ${a.error} (${a.error_code||"unknown"})`);if(!a.results||a.results.length===0)throw Error("Decodo API returned no results");let o=a.results[0];return{statusCode:o.status_code,url:o.url,content:o.body,cookies:o.cookies||[],headers:o.headers||{},taskId:a.id,rendered:!!s.headless,country:s.country||void 0,city:s.city||void 0,state:s.state||void 0,deviceType:s.deviceType,contentType:o.content_type,contentLength:o.content_length,raw:a}}async scrapeMany(e,t,s=1000){let n=[];for(let i=0;i<e.length;i++){let a=await this.scrape(e[i],t);if(n.push(a),i<e.length-1&&s>0)await new Promise((o)=>setTimeout(o,s))}return n}async scrapeWithSession(e,t,s=10){let n=`session_${Date.now()}_${Math.random().toString(36).substring(2,11)}`;return this.scrapeMany(e,{...t,sessionId:n,sessionDuration:s},500)}buildRequestBody(e,t){let s={url:e,return_cookies:!0,return_headers:!0};if(t.deviceType)s.device_type=t.deviceType;if(t.headless)s.headless=t.headless;if(t.locale)s.locale=t.locale;if(t.country)s.country=t.country;if(t.state)s.state=t.state;if(t.city)s.city=t.city;if(t.sessionId){if(s.session=t.sessionId,t.sessionDuration)s.session_duration=t.sessionDuration}if(t.headers&&Object.keys(t.headers).length>0)s.headers=t.headers;if(t.javascript){if(s.javascript=t.javascript,t.javascriptWait)s.javascript_wait=t.javascriptWait}if(t.waitForCss)s.wait_for_css=t.waitForCss;return s}getConfig(){return{...this.config,password:"***"}}withConfig(e){return new d({...this.config,...e})}async testConnection(){try{return await this.scrape("https://httpbin.org/ip"),!0}catch(e){throw Error(`Decodo connection test failed: ${e.message}`)}}}exports.Decodo=d;exports.default=d;module.exports=Object.assign(d,exports);
@@ -0,0 +1 @@
1
+ import{Rezo as u}from"../../../core/rezo.js";import{DECODO_DEVICE_TYPES as y,DECODO_HEADLESS_MODES as l,DECODO_COMMON_LOCALES as w,DECODO_COMMON_COUNTRIES as O,DECODO_EUROPEAN_COUNTRIES as _,DECODO_ASIAN_COUNTRIES as D,DECODO_US_STATES as C,DECODO_COMMON_CITIES as E,getRandomDeviceType as m,getRandomLocale as v,getRandomCountry as S,getRandomCity as T,generateSessionId as I}from"./options.js";var d="https://scraper-api.smartproxy.com/v2/scrape";class i{config;http;authHeader;constructor(e){let t="username"in e&&"password"in e&&e.username&&e.password,s="token"in e&&e.token;if(!t&&!s)throw Error("Decodo requires either username/password or token for authentication");if(this.config={username:e.username??"",password:e.password??"",deviceType:e.deviceType??"desktop",locale:e.locale??"en-US",country:e.country??"",state:e.state??"",city:e.city??"",headless:e.headless??void 0,headers:e.headers??{},sessionId:e.sessionId??"",sessionDuration:e.sessionDuration??0,javascript:e.javascript??"",javascriptWait:e.javascriptWait??0,waitForCss:e.waitForCss??"",timeout:e.timeout??120000},this.http=new u({baseURL:d,timeout:this.config.timeout}),s)this.authHeader=`Basic ${e.token}`;else this.authHeader=`Basic ${Buffer.from(`${e.username}:${e.password}`).toString("base64")}`}async scrape(e,t){let s={...this.config,...t,headers:{...this.config.headers,...t?.headers||{}}},o=this.buildRequestBody(e,s),r=(await this.http.postJson(d,o,{headers:{Authorization:this.authHeader,"Content-Type":"application/json"}})).data;if(r.error)throw Error(`Decodo API error: ${r.error} (${r.error_code||"unknown"})`);if(!r.results||r.results.length===0)throw Error("Decodo API returned no results");let a=r.results[0];return{statusCode:a.status_code,url:a.url,content:a.body,cookies:a.cookies||[],headers:a.headers||{},taskId:r.id,rendered:!!s.headless,country:s.country||void 0,city:s.city||void 0,state:s.state||void 0,deviceType:s.deviceType,contentType:a.content_type,contentLength:a.content_length,raw:r}}async scrapeMany(e,t,s=1000){let o=[];for(let n=0;n<e.length;n++){let r=await this.scrape(e[n],t);if(o.push(r),n<e.length-1&&s>0)await new Promise((a)=>setTimeout(a,s))}return o}async scrapeWithSession(e,t,s=10){let o=`session_${Date.now()}_${Math.random().toString(36).substring(2,11)}`;return this.scrapeMany(e,{...t,sessionId:o,sessionDuration:s},500)}buildRequestBody(e,t){let s={url:e,return_cookies:!0,return_headers:!0};if(t.deviceType)s.device_type=t.deviceType;if(t.headless)s.headless=t.headless;if(t.locale)s.locale=t.locale;if(t.country)s.country=t.country;if(t.state)s.state=t.state;if(t.city)s.city=t.city;if(t.sessionId){if(s.session=t.sessionId,t.sessionDuration)s.session_duration=t.sessionDuration}if(t.headers&&Object.keys(t.headers).length>0)s.headers=t.headers;if(t.javascript){if(s.javascript=t.javascript,t.javascriptWait)s.javascript_wait=t.javascriptWait}if(t.waitForCss)s.wait_for_css=t.waitForCss;return s}getConfig(){return{...this.config,password:"***"}}withConfig(e){return new i({...this.config,...e})}async testConnection(){try{return await this.scrape("https://httpbin.org/ip"),!0}catch(e){throw Error(`Decodo connection test failed: ${e.message}`)}}}var R=i;export{v as getRandomLocale,m as getRandomDeviceType,S as getRandomCountry,T as getRandomCity,I as generateSessionId,R as default,i as Decodo,C as DECODO_US_STATES,l as DECODO_HEADLESS_MODES,_ as DECODO_EUROPEAN_COUNTRIES,y as DECODO_DEVICE_TYPES,w as DECODO_COMMON_LOCALES,O as DECODO_COMMON_COUNTRIES,E as DECODO_COMMON_CITIES,D as DECODO_ASIAN_COUNTRIES};
@@ -0,0 +1 @@
1
+ var{defineProperty:f,getOwnPropertyNames:g,getOwnPropertyDescriptor:y}=Object,A=Object.prototype.hasOwnProperty;var m=new WeakMap,v=(e)=>{var t=m.get(e),s;if(t)return t;if(t=f({},"__esModule",{value:!0}),e&&typeof e==="object"||typeof e==="function")g(e).map((a)=>!A.call(t,a)&&f(t,a,{get:()=>e[a],enumerable:!(s=y(e,a))||s.enumerable}));return m.set(e,t),t};var b={};module.exports=v(b);var{RezoQueue:$}=require("../queue/queue.cjs"),{Oxylabs:M}=require("./addon/oxylabs/index.cjs"),w=require("node:path"),O=require("node:os"),{Decodo:p}=require("./addon/decodo/index.cjs");class x{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=D();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??w.join(O.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((s)=>s.domain).map((s)=>s.domain).filter((s,a,r)=>r.indexOf(s)===a)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((s,a)=>s===t[a]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((s)=>s.isGlobal).length,domainSpecific:t.filter((s)=>!s.isGlobal&&s.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:s,isGlobal:a,headers:r}=t;if(!s&&!a)continue;if(r instanceof Headers){let i=Object.fromEntries(r.entries());if(Object.keys(i).length<1)continue;r=i}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:s,isGlobal:a,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:s,isGlobal:a,proxy:r}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:s,isGlobal:a,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:s,isGlobal:a,options:r}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:s,isGlobal:a,pqueue:new $(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:a,options:r,queueOptions:i}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:s,isGlobal:a,adaptar:new M(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:a,options:r,queueOptions:i}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:s,isGlobal:a,adaptar:new p(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,s,a){if(!this.getDomainName(e))return null;let i=[],n=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let o=0;o<n.length;o++)if(this._hasDomain(e,n[o].domain))i.push(o);if(i.length){let o=a?i[this.rnd(0,i.length-1)]:i[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}i.length=0;for(let o=0;o<n.length;o++)i.push(o);if(i.length){let o=a?i[this.rnd(0,i.length-1)]:i[0];if(n[o].isGlobal&&s)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,s){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let i=0;i<r.length;i++)if(this._hasDomain(e,r[i].domain))return!0;if(s){for(let i=0;i<r.length;i++)if(r[i].isGlobal)return!0}return!1}pickHeaders(e,t,s,a){let r=this.getAdapter(e,"headers",t),i=new Headers(r??{});if(s&&s instanceof Headers)for(let[n,o]of Object.entries(s.entries()))i.set(n,o);else if(s&&typeof s==="object"){for(let[n,o]of Object.entries(s))if(typeof o==="string")i.set(n,o)}if(a)i.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(i.entries())}_hasDomain(e,t){if(!t)return!1;let s=this.getDomainName(e);if(!s)return!1;let a=(i)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(i)||i.startsWith("/")||i.includes(".*")||i.includes(".+")},r=(i)=>{if(i instanceof RegExp)return i.test(s)||i.test(e);let n=i.toString().trim();if(s.toLowerCase()===n.toLowerCase())return!0;if(n.includes("*")){let l=n.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(s)||h.test(e)}if(a(n))try{let l=n,h="i",u=n.match(/^\/(.*)\/(\w*)$/);if(u)l=u[1],h=u[2]||"i";let c=new RegExp(l,h);return c.test(s)||c.test(e)}catch(l){return s.toLowerCase().includes(n.toLowerCase())}let o=s.toLowerCase(),d=n.toLowerCase();return o===d||o.endsWith("."+d)||d.endsWith("."+o)};if(Array.isArray(t)){for(let i of t)if(r(i))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function D(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],s=[];for(let a=0;a<200;a++){let r=e[Math.floor(Math.random()*e.length)],i=t[Math.floor(Math.random()*t.length)],n="";switch(r.name){case"Chrome":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":n=`Mozilla/5.0 (${i}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}s.push(n)}return s}b.CrawlerOptions=x;
@@ -0,0 +1 @@
1
+ import{RezoQueue as m}from"../queue/queue.js";import{Oxylabs as f}from"./addon/oxylabs/index.js";import x from"node:path";import b from"node:os";import{Decodo as g}from"./addon/decodo/index.js";class y{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??x.join(b.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((s)=>s.domain).map((s)=>s.domain).filter((s,n,r)=>r.indexOf(s)===n)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((s,n)=>s===t[n]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((s)=>s.isGlobal).length,domainSpecific:t.filter((s)=>!s.isGlobal&&s.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:s,isGlobal:n,headers:r}=t;if(!s&&!n)continue;if(r instanceof Headers){let i=Object.fromEntries(r.entries());if(Object.keys(i).length<1)continue;r=i}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:s,isGlobal:n,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:s,isGlobal:n,proxy:r}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:s,isGlobal:n,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:s,isGlobal:n,options:r}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:s,isGlobal:n,pqueue:new m(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:n,options:r,queueOptions:i}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:s,isGlobal:n,adaptar:new f(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:n,options:r,queueOptions:i}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:s,isGlobal:n,adaptar:new g(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,s,n){if(!this.getDomainName(e))return null;let i=[],a=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let o=0;o<a.length;o++)if(this._hasDomain(e,a[o].domain))i.push(o);if(i.length){let o=n?i[this.rnd(0,i.length-1)]:i[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}i.length=0;for(let o=0;o<a.length;o++)i.push(o);if(i.length){let o=n?i[this.rnd(0,i.length-1)]:i[0];if(a[o].isGlobal&&s)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,s){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let i=0;i<r.length;i++)if(this._hasDomain(e,r[i].domain))return!0;if(s){for(let i=0;i<r.length;i++)if(r[i].isGlobal)return!0}return!1}pickHeaders(e,t,s,n){let r=this.getAdapter(e,"headers",t),i=new Headers(r??{});if(s&&s instanceof Headers)for(let[a,o]of Object.entries(s.entries()))i.set(a,o);else if(s&&typeof s==="object"){for(let[a,o]of Object.entries(s))if(typeof o==="string")i.set(a,o)}if(n)i.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(i.entries())}_hasDomain(e,t){if(!t)return!1;let s=this.getDomainName(e);if(!s)return!1;let n=(i)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(i)||i.startsWith("/")||i.includes(".*")||i.includes(".+")},r=(i)=>{if(i instanceof RegExp)return i.test(s)||i.test(e);let a=i.toString().trim();if(s.toLowerCase()===a.toLowerCase())return!0;if(a.includes("*")){let l=a.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(s)||h.test(e)}if(n(a))try{let l=a,h="i",u=a.match(/^\/(.*)\/(\w*)$/);if(u)l=u[1],h=u[2]||"i";let c=new RegExp(l,h);return c.test(s)||c.test(e)}catch(l){return s.toLowerCase().includes(a.toLowerCase())}let o=s.toLowerCase(),d=a.toLowerCase();return o===d||o.endsWith("."+d)||d.endsWith("."+o)};if(Array.isArray(t)){for(let i of t)if(r(i))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],s=[];for(let n=0;n<200;n++){let r=e[Math.floor(Math.random()*e.length)],i=t[Math.floor(Math.random()*t.length)],a="";switch(r.name){case"Chrome":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":a=`Mozilla/5.0 (${i}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}s.push(a)}return s}export{y as CrawlerOptions};