rezo 1.0.73 → 1.0.75

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/adapters/entries/curl.d.ts +4 -1
  2. package/dist/adapters/entries/fetch.d.ts +4 -1
  3. package/dist/adapters/entries/http.d.ts +4 -1
  4. package/dist/adapters/entries/http2.d.ts +4 -1
  5. package/dist/adapters/entries/react-native.d.ts +4 -1
  6. package/dist/adapters/entries/xhr.d.ts +4 -1
  7. package/dist/adapters/http.cjs +2 -1
  8. package/dist/adapters/http.js +2 -1
  9. package/dist/adapters/index.cjs +6 -6
  10. package/dist/cache/index.cjs +9 -9
  11. package/dist/crawler/crawler-options.cjs +1 -1
  12. package/dist/crawler/crawler-options.js +1 -1
  13. package/dist/crawler/crawler.cjs +92 -11
  14. package/dist/crawler/crawler.js +92 -11
  15. package/dist/crawler/index.cjs +40 -40
  16. package/dist/crawler/plugin/index.cjs +1 -1
  17. package/dist/crawler.d.ts +105 -0
  18. package/dist/entries/crawler.cjs +4 -4
  19. package/dist/errors/rezo-error.cjs +3 -72
  20. package/dist/errors/rezo-error.js +3 -72
  21. package/dist/index.cjs +30 -30
  22. package/dist/index.d.ts +4 -1
  23. package/dist/internal/agents/bun-socks-http.cjs +573 -0
  24. package/dist/internal/agents/bun-socks-http.js +570 -0
  25. package/dist/internal/agents/index.cjs +14 -10
  26. package/dist/internal/agents/index.js +1 -0
  27. package/dist/platform/browser.d.ts +4 -1
  28. package/dist/platform/bun.d.ts +4 -1
  29. package/dist/platform/deno.d.ts +4 -1
  30. package/dist/platform/node.d.ts +4 -1
  31. package/dist/platform/react-native.d.ts +4 -1
  32. package/dist/platform/worker.d.ts +4 -1
  33. package/dist/proxy/index.cjs +4 -4
  34. package/dist/queue/index.cjs +8 -8
  35. package/dist/queue/queue.cjs +4 -1
  36. package/dist/queue/queue.js +4 -1
  37. package/dist/responses/universal/index.cjs +11 -11
  38. package/dist/utils/agent-pool.cjs +35 -0
  39. package/dist/utils/agent-pool.js +35 -0
  40. package/dist/version.cjs +1 -1
  41. package/dist/version.js +1 -1
  42. package/dist/wget/index.cjs +49 -49
  43. package/dist/wget/index.d.ts +3 -0
  44. package/package.json +1 -1
@@ -1997,6 +1997,8 @@ export declare class RezoError<T = any> extends Error {
1997
1997
  * Queue configuration options
1998
1998
  */
1999
1999
  export interface QueueConfig {
2000
+ /** Name of the queue - useful for debugging and logging */
2001
+ name?: string;
2000
2002
  /** Maximum concurrent tasks (default: Infinity) */
2001
2003
  concurrency?: number;
2002
2004
  /** Auto-start processing when tasks are added (default: true) */
@@ -2127,6 +2129,7 @@ declare class RezoQueue<T = any> {
2127
2129
  private isPausedFlag;
2128
2130
  private intervalId?;
2129
2131
  private intervalCount;
2132
+ readonly name: string;
2130
2133
  private intervalStart;
2131
2134
  private eventHandlers;
2132
2135
  private statsData;
@@ -4579,7 +4582,7 @@ export interface RezoInstance extends Rezo {
4579
4582
  *
4580
4583
  * IMPORTANT: Update these values when bumping package version.
4581
4584
  */
4582
- export declare const VERSION = "1.0.73";
4585
+ export declare const VERSION = "1.0.75";
4583
4586
  /**
4584
4587
  * cURL Options Configuration
4585
4588
  *
@@ -1997,6 +1997,8 @@ export declare class RezoError<T = any> extends Error {
1997
1997
  * Queue configuration options
1998
1998
  */
1999
1999
  export interface QueueConfig {
2000
+ /** Name of the queue - useful for debugging and logging */
2001
+ name?: string;
2000
2002
  /** Maximum concurrent tasks (default: Infinity) */
2001
2003
  concurrency?: number;
2002
2004
  /** Auto-start processing when tasks are added (default: true) */
@@ -2127,6 +2129,7 @@ declare class RezoQueue<T = any> {
2127
2129
  private isPausedFlag;
2128
2130
  private intervalId?;
2129
2131
  private intervalCount;
2132
+ readonly name: string;
2130
2133
  private intervalStart;
2131
2134
  private eventHandlers;
2132
2135
  private statsData;
@@ -4579,7 +4582,7 @@ export interface RezoInstance extends Rezo {
4579
4582
  *
4580
4583
  * IMPORTANT: Update these values when bumping package version.
4581
4584
  */
4582
- export declare const VERSION = "1.0.73";
4585
+ export declare const VERSION = "1.0.75";
4583
4586
  export declare const isRezoError: typeof RezoError.isRezoError;
4584
4587
  export declare const Cancel: typeof RezoError;
4585
4588
  export declare const CancelToken: {
@@ -1997,6 +1997,8 @@ export declare class RezoError<T = any> extends Error {
1997
1997
  * Queue configuration options
1998
1998
  */
1999
1999
  export interface QueueConfig {
2000
+ /** Name of the queue - useful for debugging and logging */
2001
+ name?: string;
2000
2002
  /** Maximum concurrent tasks (default: Infinity) */
2001
2003
  concurrency?: number;
2002
2004
  /** Auto-start processing when tasks are added (default: true) */
@@ -2127,6 +2129,7 @@ declare class RezoQueue<T = any> {
2127
2129
  private isPausedFlag;
2128
2130
  private intervalId?;
2129
2131
  private intervalCount;
2132
+ readonly name: string;
2130
2133
  private intervalStart;
2131
2134
  private eventHandlers;
2132
2135
  private statsData;
@@ -4579,7 +4582,7 @@ export interface RezoInstance extends Rezo {
4579
4582
  *
4580
4583
  * IMPORTANT: Update these values when bumping package version.
4581
4584
  */
4582
- export declare const VERSION = "1.0.73";
4585
+ export declare const VERSION = "1.0.75";
4583
4586
  /**
4584
4587
  * Type guard to check if an error is a RezoError instance.
4585
4588
  */
@@ -1997,6 +1997,8 @@ export declare class RezoError<T = any> extends Error {
1997
1997
  * Queue configuration options
1998
1998
  */
1999
1999
  export interface QueueConfig {
2000
+ /** Name of the queue - useful for debugging and logging */
2001
+ name?: string;
2000
2002
  /** Maximum concurrent tasks (default: Infinity) */
2001
2003
  concurrency?: number;
2002
2004
  /** Auto-start processing when tasks are added (default: true) */
@@ -2127,6 +2129,7 @@ declare class RezoQueue<T = any> {
2127
2129
  private isPausedFlag;
2128
2130
  private intervalId?;
2129
2131
  private intervalCount;
2132
+ readonly name: string;
2130
2133
  private intervalStart;
2131
2134
  private eventHandlers;
2132
2135
  private statsData;
@@ -4579,7 +4582,7 @@ export interface RezoInstance extends Rezo {
4579
4582
  *
4580
4583
  * IMPORTANT: Update these values when bumping package version.
4581
4584
  */
4582
- export declare const VERSION = "1.0.73";
4585
+ export declare const VERSION = "1.0.75";
4583
4586
  export declare const isRezoError: typeof RezoError.isRezoError;
4584
4587
  export declare const Cancel: typeof RezoError;
4585
4588
  export declare const CancelToken: {
@@ -1997,6 +1997,8 @@ export declare class RezoError<T = any> extends Error {
1997
1997
  * Queue configuration options
1998
1998
  */
1999
1999
  export interface QueueConfig {
2000
+ /** Name of the queue - useful for debugging and logging */
2001
+ name?: string;
2000
2002
  /** Maximum concurrent tasks (default: Infinity) */
2001
2003
  concurrency?: number;
2002
2004
  /** Auto-start processing when tasks are added (default: true) */
@@ -2127,6 +2129,7 @@ declare class RezoQueue<T = any> {
2127
2129
  private isPausedFlag;
2128
2130
  private intervalId?;
2129
2131
  private intervalCount;
2132
+ readonly name: string;
2130
2133
  private intervalStart;
2131
2134
  private eventHandlers;
2132
2135
  private statsData;
@@ -4579,7 +4582,7 @@ export interface RezoInstance extends Rezo {
4579
4582
  *
4580
4583
  * IMPORTANT: Update these values when bumping package version.
4581
4584
  */
4582
- export declare const VERSION = "1.0.73";
4585
+ export declare const VERSION = "1.0.75";
4583
4586
  export declare const isRezoError: typeof RezoError.isRezoError;
4584
4587
  export declare const Cancel: typeof RezoError;
4585
4588
  export declare const CancelToken: {
@@ -1997,6 +1997,8 @@ export declare class RezoError<T = any> extends Error {
1997
1997
  * Queue configuration options
1998
1998
  */
1999
1999
  export interface QueueConfig {
2000
+ /** Name of the queue - useful for debugging and logging */
2001
+ name?: string;
2000
2002
  /** Maximum concurrent tasks (default: Infinity) */
2001
2003
  concurrency?: number;
2002
2004
  /** Auto-start processing when tasks are added (default: true) */
@@ -2127,6 +2129,7 @@ declare class RezoQueue<T = any> {
2127
2129
  private isPausedFlag;
2128
2130
  private intervalId?;
2129
2131
  private intervalCount;
2132
+ readonly name: string;
2130
2133
  private intervalStart;
2131
2134
  private eventHandlers;
2132
2135
  private statsData;
@@ -4579,7 +4582,7 @@ export interface RezoInstance extends Rezo {
4579
4582
  *
4580
4583
  * IMPORTANT: Update these values when bumping package version.
4581
4584
  */
4582
- export declare const VERSION = "1.0.73";
4585
+ export declare const VERSION = "1.0.75";
4583
4586
  export declare const isRezoError: typeof RezoError.isRezoError;
4584
4587
  export declare const Cancel: typeof RezoError;
4585
4588
  export declare const CancelToken: {
@@ -24,6 +24,7 @@ const { StagedTimeoutManager, parseStagedTimeouts } = require('../utils/staged-t
24
24
  const { handleRateLimitWait, shouldWaitOnStatus } = require('../utils/rate-limit-wait.cjs');
25
25
  const { getSocketTelemetry, beginRequestContext } = require('../utils/socket-telemetry.cjs');
26
26
  const dns = require("node:dns");
27
+ const { bunHttp, isBunRuntime, isBunSocksRequest } = require('../internal/agents.cjs');
27
28
  const debugLog = {
28
29
  requestStart: (config, url, method) => {
29
30
  if (config.debug) {
@@ -661,7 +662,7 @@ async function request(config, fetchOptions, requestCount, timing, _stats, respo
661
662
  const { fullUrl, body, fileName: filename } = fetchOptions;
662
663
  const url = new URL(fullUrl || fetchOptions.url);
663
664
  const isSecure = url.protocol === "https:";
664
- const httpModule = isSecure ? config.isSecure && config.adapter ? config.adapter : https : !config.isSecure && config.adapter ? config.adapter : http;
665
+ const httpModule = isBunRuntime() && isBunSocksRequest(fetchOptions.proxy) ? bunHttp : isSecure ? config.isSecure && config.adapter ? config.adapter : https : !config.isSecure && config.adapter ? config.adapter : http;
665
666
  await setInitialConfig(config, fetchOptions, isSecure, url, httpModule, requestCount, timing.startTime, timing.startTimestamp);
666
667
  const eventEmitter = streamResult || downloadResult || uploadResult;
667
668
  if (eventEmitter && requestCount === 0) {
@@ -24,6 +24,7 @@ import { StagedTimeoutManager, parseStagedTimeouts } from '../utils/staged-timeo
24
24
  import { handleRateLimitWait, shouldWaitOnStatus } from '../utils/rate-limit-wait.js';
25
25
  import { getSocketTelemetry, beginRequestContext } from '../utils/socket-telemetry.js';
26
26
  import dns from "node:dns";
27
+ import { bunHttp, isBunRuntime, isBunSocksRequest } from '../internal/agents.js';
27
28
  const debugLog = {
28
29
  requestStart: (config, url, method) => {
29
30
  if (config.debug) {
@@ -661,7 +662,7 @@ async function request(config, fetchOptions, requestCount, timing, _stats, respo
661
662
  const { fullUrl, body, fileName: filename } = fetchOptions;
662
663
  const url = new URL(fullUrl || fetchOptions.url);
663
664
  const isSecure = url.protocol === "https:";
664
- const httpModule = isSecure ? config.isSecure && config.adapter ? config.adapter : https : !config.isSecure && config.adapter ? config.adapter : http;
665
+ const httpModule = isBunRuntime() && isBunSocksRequest(fetchOptions.proxy) ? bunHttp : isSecure ? config.isSecure && config.adapter ? config.adapter : https : !config.isSecure && config.adapter ? config.adapter : http;
665
666
  await setInitialConfig(config, fetchOptions, isSecure, url, httpModule, requestCount, timing.startTime, timing.startTimestamp);
666
667
  const eventEmitter = streamResult || downloadResult || uploadResult;
667
668
  if (eventEmitter && requestCount === 0) {
@@ -1,6 +1,6 @@
1
- const _mod_vstjof = require('./picker.cjs');
2
- exports.detectRuntime = _mod_vstjof.detectRuntime;
3
- exports.getAdapterCapabilities = _mod_vstjof.getAdapterCapabilities;
4
- exports.buildAdapterContext = _mod_vstjof.buildAdapterContext;
5
- exports.getAvailableAdapters = _mod_vstjof.getAvailableAdapters;
6
- exports.selectAdapter = _mod_vstjof.selectAdapter;;
1
+ const _mod_sfy4jl = require('./picker.cjs');
2
+ exports.detectRuntime = _mod_sfy4jl.detectRuntime;
3
+ exports.getAdapterCapabilities = _mod_sfy4jl.getAdapterCapabilities;
4
+ exports.buildAdapterContext = _mod_sfy4jl.buildAdapterContext;
5
+ exports.getAvailableAdapters = _mod_sfy4jl.getAvailableAdapters;
6
+ exports.selectAdapter = _mod_sfy4jl.selectAdapter;;
@@ -1,9 +1,9 @@
1
- const _mod_4yb0ry = require('./lru-cache.cjs');
2
- exports.LRUCache = _mod_4yb0ry.LRUCache;;
3
- const _mod_lb3zhr = require('./dns-cache.cjs');
4
- exports.DNSCache = _mod_lb3zhr.DNSCache;
5
- exports.getGlobalDNSCache = _mod_lb3zhr.getGlobalDNSCache;
6
- exports.resetGlobalDNSCache = _mod_lb3zhr.resetGlobalDNSCache;;
7
- const _mod_ogn45d = require('./response-cache.cjs');
8
- exports.ResponseCache = _mod_ogn45d.ResponseCache;
9
- exports.normalizeResponseCacheConfig = _mod_ogn45d.normalizeResponseCacheConfig;;
1
+ const _mod_jtrwsr = require('./lru-cache.cjs');
2
+ exports.LRUCache = _mod_jtrwsr.LRUCache;;
3
+ const _mod_v4g8pj = require('./dns-cache.cjs');
4
+ exports.DNSCache = _mod_v4g8pj.DNSCache;
5
+ exports.getGlobalDNSCache = _mod_v4g8pj.getGlobalDNSCache;
6
+ exports.resetGlobalDNSCache = _mod_v4g8pj.resetGlobalDNSCache;;
7
+ const _mod_2ixra6 = require('./response-cache.cjs');
8
+ exports.ResponseCache = _mod_2ixra6.ResponseCache;
9
+ exports.normalizeResponseCacheConfig = _mod_2ixra6.normalizeResponseCacheConfig;;
@@ -1 +1 @@
1
- var{RezoQueue:m}=require("../queue/queue.cjs"),{Oxylabs:x}=require("./addon/oxylabs/index.cjs"),b=require("node:path"),g=require("node:os"),{Decodo:y}=require("./addon/decodo/index.cjs");class f{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;concurrency;scraperConcurrency;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??b.join(g.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.concurrency=e.concurrency??100,this.scraperConcurrency=e.scraperConcurrency??this.concurrency,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,a,r)=>r.indexOf(i)===a)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((i,a)=>i===t[a]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((i)=>i.isGlobal).length,domainSpecific:t.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:i,isGlobal:a,headers:r}=t;if(!i&&!a)continue;if(r instanceof Headers){let s=Object.fromEntries(r.entries());if(Object.keys(s).length<1)continue;r=s}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:i,isGlobal:a,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:i,isGlobal:a,proxy:r}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:i,isGlobal:a,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:i,isGlobal:a,options:r}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:i,isGlobal:a,pqueue:new m(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:a,options:r,queueOptions:s}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:i,isGlobal:a,adaptar:new x(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:a,options:r,queueOptions:s}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:i,isGlobal:a,adaptar:new y(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}destroyLimiters(){for(let e of this.limiters)if(e.pqueue&&typeof e.pqueue.destroy==="function")e.pqueue.destroy();this.limiters=[]}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,i,a){if(!this.getDomainName(e))return null;let s=[],o=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let n=0;n<o.length;n++)if(this._hasDomain(e,o[n].domain))s.push(n);if(s.length){let n=a?s[this.rnd(0,s.length-1)]:s[0];return t==="headers"?this.requestHeaders[n].headers:t==="limiters"?this.limiters[n].pqueue:t==="oxylabs"?this.oxylabs[n].adaptar:t==="decodo"?this.decodo[n].adaptar:this.proxies[n].proxy}s.length=0;for(let n=0;n<o.length;n++)s.push(n);if(s.length){let n=a?s[this.rnd(0,s.length-1)]:s[0];if(o[n].isGlobal&&i)return t==="headers"?this.requestHeaders[n].headers:t==="limiters"?this.limiters[n].pqueue:t==="oxylabs"?this.oxylabs[n].adaptar:t==="decodo"?this.decodo[n].adaptar:this.proxies[n].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,i){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let s=0;s<r.length;s++)if(this._hasDomain(e,r[s].domain))return!0;if(i){for(let s=0;s<r.length;s++)if(r[s].isGlobal)return!0}return!1}pickHeaders(e,t,i,a){let r=this.getAdapter(e,"headers",t),s=new Headers(r??{}),o=s.count;if(i&&i instanceof Headers)for(let[n,l]of Object.entries(i.entries()))s.set(n,l);else if(i&&typeof i==="object"){for(let[n,l]of Object.entries(i))if(typeof l==="string")s.set(n,l)}if(a&&o===0)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,t){if(!t)return!1;let i=this.getDomainName(e);if(!i)return!1;let a=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},r=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let o=s.toString().trim();if(i.toLowerCase()===o.toLowerCase())return!0;if(o.includes("*")){let h=o.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),c=new RegExp(`^${h}$`,"i");return c.test(i)||c.test(e)}if(a(o))try{let h=o,c="i",u=o.match(/^\/(.*)\/(\w*)$/);if(u)h=u[1],c=u[2]||"i";let d=new RegExp(h,c);return d.test(i)||d.test(e)}catch(h){return i.toLowerCase().includes(o.toLowerCase())}let n=i.toLowerCase(),l=o.toLowerCase();return n===l||n.endsWith("."+l)||l.endsWith("."+n)};if(Array.isArray(t)){for(let s of t)if(r(s))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let a=0;a<200;a++){let r=e[Math.floor(Math.random()*e.length)],s=t[Math.floor(Math.random()*t.length)],o="";switch(r.name){case"Chrome":o=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":o=`Mozilla/5.0 (${s}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":o=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":o=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":o=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":o=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":o=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":o=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":o=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":o=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}i.push(o)}return i}exports.CrawlerOptions=f;
1
+ var{RezoQueue:f}=require("../queue/queue.cjs"),{Oxylabs:x}=require("./addon/oxylabs/index.cjs"),b=require("node:path"),y=require("node:os"),{Decodo:g}=require("./addon/decodo/index.cjs");class m{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;enableSignalHandlers;concurrency;scraperConcurrency;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];onLimiterAdded;requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??b.join(y.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.enableSignalHandlers=e.enableSignalHandlers??!1,this.concurrency=e.concurrency??100,this.scraperConcurrency=e.scraperConcurrency??this.concurrency,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((s)=>s.domain).map((s)=>s.domain).filter((s,a,r)=>r.indexOf(s)===a)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((s,a)=>s===t[a]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((s)=>s.isGlobal).length,domainSpecific:t.filter((s)=>!s.isGlobal&&s.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:s,isGlobal:a,headers:r}=t;if(!s&&!a)continue;if(r instanceof Headers){let i=Object.fromEntries(r.entries());if(Object.keys(i).length<1)continue;r=i}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:s,isGlobal:a,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:s,isGlobal:a,proxy:r}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:s,isGlobal:a,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:s,isGlobal:a,options:r}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;let i=new f(r);if(this.limiters.push({domain:s,isGlobal:a,pqueue:i,randomDelay:r.randomDelay}),this.onLimiterAdded)this.onLimiterAdded(i)}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:a,options:r,queueOptions:i}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:s,isGlobal:a,adaptar:new x(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:a,options:r,queueOptions:i}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:s,isGlobal:a,adaptar:new g(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}destroyLimiters(){for(let e of this.limiters)if(e.pqueue&&typeof e.pqueue.destroy==="function")e.pqueue.destroy();this.limiters=[]}getLimiters(){return this.limiters}getRandomDelay(e,t){if(!this.getDomainName(e))return;for(let a of this.limiters)if(this._hasDomain(e,a.domain)&&a.randomDelay!==void 0)return a.randomDelay;if(t){for(let a of this.limiters)if(a.isGlobal&&a.randomDelay!==void 0)return a.randomDelay}return}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,s,a){if(!this.getDomainName(e))return null;let i=[],o=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let n=0;n<o.length;n++)if(this._hasDomain(e,o[n].domain))i.push(n);if(i.length){let n=a?i[this.rnd(0,i.length-1)]:i[0];return t==="headers"?this.requestHeaders[n].headers:t==="limiters"?this.limiters[n].pqueue:t==="oxylabs"?this.oxylabs[n].adaptar:t==="decodo"?this.decodo[n].adaptar:this.proxies[n].proxy}i.length=0;for(let n=0;n<o.length;n++)i.push(n);if(i.length){let n=a?i[this.rnd(0,i.length-1)]:i[0];if(o[n].isGlobal&&s)return t==="headers"?this.requestHeaders[n].headers:t==="limiters"?this.limiters[n].pqueue:t==="oxylabs"?this.oxylabs[n].adaptar:t==="decodo"?this.decodo[n].adaptar:this.proxies[n].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,s){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let i=0;i<r.length;i++)if(this._hasDomain(e,r[i].domain))return!0;if(s){for(let i=0;i<r.length;i++)if(r[i].isGlobal)return!0}return!1}pickHeaders(e,t,s,a){let r=this.getAdapter(e,"headers",t),i=new Headers(r??{}),o=i.count;if(s&&s instanceof Headers)for(let[n,l]of Object.entries(s.entries()))i.set(n,l);else if(s&&typeof s==="object"){for(let[n,l]of Object.entries(s))if(typeof l==="string")i.set(n,l)}if(a&&o===0)i.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(i.entries())}_hasDomain(e,t){if(!t)return!1;let s=this.getDomainName(e);if(!s)return!1;let a=(i)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(i)||i.startsWith("/")||i.includes(".*")||i.includes(".+")},r=(i)=>{if(i instanceof RegExp)return i.test(s)||i.test(e);let o=i.toString().trim();if(s.toLowerCase()===o.toLowerCase())return!0;if(o.includes("*")){let h=o.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),d=new RegExp(`^${h}$`,"i");return d.test(s)||d.test(e)}if(a(o))try{let h=o,d="i",c=o.match(/^\/(.*)\/(\w*)$/);if(c)h=c[1],d=c[2]||"i";let u=new RegExp(h,d);return u.test(s)||u.test(e)}catch(h){return s.toLowerCase().includes(o.toLowerCase())}let n=s.toLowerCase(),l=o.toLowerCase();return n===l||n.endsWith("."+l)||l.endsWith("."+n)};if(Array.isArray(t)){for(let i of t)if(r(i))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],s=[];for(let a=0;a<200;a++){let r=e[Math.floor(Math.random()*e.length)],i=t[Math.floor(Math.random()*t.length)],o="";switch(r.name){case"Chrome":o=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":o=`Mozilla/5.0 (${i}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":o=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":o=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":o=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":o=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":o=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":o=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":o=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":o=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}s.push(o)}return s}exports.CrawlerOptions=m;
@@ -1 +1 @@
1
- import{RezoQueue as m}from"../queue/queue.js";import{Oxylabs as f}from"./addon/oxylabs/index.js";import x from"node:path";import b from"node:os";import{Decodo as g}from"./addon/decodo/index.js";class y{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;concurrency;scraperConcurrency;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??x.join(b.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.concurrency=e.concurrency??100,this.scraperConcurrency=e.scraperConcurrency??this.concurrency,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,a,r)=>r.indexOf(i)===a)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((i,a)=>i===t[a]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((i)=>i.isGlobal).length,domainSpecific:t.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:i,isGlobal:a,headers:r}=t;if(!i&&!a)continue;if(r instanceof Headers){let s=Object.fromEntries(r.entries());if(Object.keys(s).length<1)continue;r=s}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:i,isGlobal:a,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:i,isGlobal:a,proxy:r}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:i,isGlobal:a,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:i,isGlobal:a,options:r}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:i,isGlobal:a,pqueue:new m(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:a,options:r,queueOptions:s}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:i,isGlobal:a,adaptar:new f(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:a,options:r,queueOptions:s}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:i,isGlobal:a,adaptar:new g(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}destroyLimiters(){for(let e of this.limiters)if(e.pqueue&&typeof e.pqueue.destroy==="function")e.pqueue.destroy();this.limiters=[]}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,i,a){if(!this.getDomainName(e))return null;let s=[],n=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let o=0;o<n.length;o++)if(this._hasDomain(e,n[o].domain))s.push(o);if(s.length){let o=a?s[this.rnd(0,s.length-1)]:s[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}s.length=0;for(let o=0;o<n.length;o++)s.push(o);if(s.length){let o=a?s[this.rnd(0,s.length-1)]:s[0];if(n[o].isGlobal&&i)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,i){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let s=0;s<r.length;s++)if(this._hasDomain(e,r[s].domain))return!0;if(i){for(let s=0;s<r.length;s++)if(r[s].isGlobal)return!0}return!1}pickHeaders(e,t,i,a){let r=this.getAdapter(e,"headers",t),s=new Headers(r??{}),n=s.count;if(i&&i instanceof Headers)for(let[o,l]of Object.entries(i.entries()))s.set(o,l);else if(i&&typeof i==="object"){for(let[o,l]of Object.entries(i))if(typeof l==="string")s.set(o,l)}if(a&&n===0)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,t){if(!t)return!1;let i=this.getDomainName(e);if(!i)return!1;let a=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},r=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let n=s.toString().trim();if(i.toLowerCase()===n.toLowerCase())return!0;if(n.includes("*")){let h=n.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),c=new RegExp(`^${h}$`,"i");return c.test(i)||c.test(e)}if(a(n))try{let h=n,c="i",u=n.match(/^\/(.*)\/(\w*)$/);if(u)h=u[1],c=u[2]||"i";let d=new RegExp(h,c);return d.test(i)||d.test(e)}catch(h){return i.toLowerCase().includes(n.toLowerCase())}let o=i.toLowerCase(),l=n.toLowerCase();return o===l||o.endsWith("."+l)||l.endsWith("."+o)};if(Array.isArray(t)){for(let s of t)if(r(s))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let a=0;a<200;a++){let r=e[Math.floor(Math.random()*e.length)],s=t[Math.floor(Math.random()*t.length)],n="";switch(r.name){case"Chrome":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":n=`Mozilla/5.0 (${s}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}i.push(n)}return i}export{y as CrawlerOptions};
1
+ import{RezoQueue as m}from"../queue/queue.js";import{Oxylabs as f}from"./addon/oxylabs/index.js";import x from"node:path";import b from"node:os";import{Decodo as y}from"./addon/decodo/index.js";class g{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;enableSignalHandlers;concurrency;scraperConcurrency;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];onLimiterAdded;requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??x.join(b.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.enableSignalHandlers=e.enableSignalHandlers??!1,this.concurrency=e.concurrency??100,this.scraperConcurrency=e.scraperConcurrency??this.concurrency,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((s)=>s.domain).map((s)=>s.domain).filter((s,a,r)=>r.indexOf(s)===a)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((s,a)=>s===t[a]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((s)=>s.isGlobal).length,domainSpecific:t.filter((s)=>!s.isGlobal&&s.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:s,isGlobal:a,headers:r}=t;if(!s&&!a)continue;if(r instanceof Headers){let i=Object.fromEntries(r.entries());if(Object.keys(i).length<1)continue;r=i}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:s,isGlobal:a,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:s,isGlobal:a,proxy:r}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:s,isGlobal:a,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:s,isGlobal:a,options:r}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;let i=new m(r);if(this.limiters.push({domain:s,isGlobal:a,pqueue:i,randomDelay:r.randomDelay}),this.onLimiterAdded)this.onLimiterAdded(i)}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:a,options:r,queueOptions:i}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:s,isGlobal:a,adaptar:new f(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:a,options:r,queueOptions:i}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:s,isGlobal:a,adaptar:new y(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}destroyLimiters(){for(let e of this.limiters)if(e.pqueue&&typeof e.pqueue.destroy==="function")e.pqueue.destroy();this.limiters=[]}getLimiters(){return this.limiters}getRandomDelay(e,t){if(!this.getDomainName(e))return;for(let a of this.limiters)if(this._hasDomain(e,a.domain)&&a.randomDelay!==void 0)return a.randomDelay;if(t){for(let a of this.limiters)if(a.isGlobal&&a.randomDelay!==void 0)return a.randomDelay}return}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,s,a){if(!this.getDomainName(e))return null;let i=[],o=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let n=0;n<o.length;n++)if(this._hasDomain(e,o[n].domain))i.push(n);if(i.length){let n=a?i[this.rnd(0,i.length-1)]:i[0];return t==="headers"?this.requestHeaders[n].headers:t==="limiters"?this.limiters[n].pqueue:t==="oxylabs"?this.oxylabs[n].adaptar:t==="decodo"?this.decodo[n].adaptar:this.proxies[n].proxy}i.length=0;for(let n=0;n<o.length;n++)i.push(n);if(i.length){let n=a?i[this.rnd(0,i.length-1)]:i[0];if(o[n].isGlobal&&s)return t==="headers"?this.requestHeaders[n].headers:t==="limiters"?this.limiters[n].pqueue:t==="oxylabs"?this.oxylabs[n].adaptar:t==="decodo"?this.decodo[n].adaptar:this.proxies[n].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,s){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let i=0;i<r.length;i++)if(this._hasDomain(e,r[i].domain))return!0;if(s){for(let i=0;i<r.length;i++)if(r[i].isGlobal)return!0}return!1}pickHeaders(e,t,s,a){let r=this.getAdapter(e,"headers",t),i=new Headers(r??{}),o=i.count;if(s&&s instanceof Headers)for(let[n,l]of Object.entries(s.entries()))i.set(n,l);else if(s&&typeof s==="object"){for(let[n,l]of Object.entries(s))if(typeof l==="string")i.set(n,l)}if(a&&o===0)i.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(i.entries())}_hasDomain(e,t){if(!t)return!1;let s=this.getDomainName(e);if(!s)return!1;let a=(i)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(i)||i.startsWith("/")||i.includes(".*")||i.includes(".+")},r=(i)=>{if(i instanceof RegExp)return i.test(s)||i.test(e);let o=i.toString().trim();if(s.toLowerCase()===o.toLowerCase())return!0;if(o.includes("*")){let h=o.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),d=new RegExp(`^${h}$`,"i");return d.test(s)||d.test(e)}if(a(o))try{let h=o,d="i",c=o.match(/^\/(.*)\/(\w*)$/);if(c)h=c[1],d=c[2]||"i";let u=new RegExp(h,d);return u.test(s)||u.test(e)}catch(h){return s.toLowerCase().includes(o.toLowerCase())}let n=s.toLowerCase(),l=o.toLowerCase();return n===l||n.endsWith("."+l)||l.endsWith("."+n)};if(Array.isArray(t)){for(let i of t)if(r(i))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],s=[];for(let a=0;a<200;a++){let r=e[Math.floor(Math.random()*e.length)],i=t[Math.floor(Math.random()*t.length)],o="";switch(r.name){case"Chrome":o=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":o=`Mozilla/5.0 (${i}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":o=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":o=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":o=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":o=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":o=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":o=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":o=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":o=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}s.push(o)}return s}export{g as CrawlerOptions};
@@ -9,7 +9,7 @@ const { CappedMap } = require('./plugin/capped-map.cjs');
9
9
  const { CappedArray } = require('./plugin/capped-array.cjs');
10
10
  const { parseHTML } = require("linkedom");
11
11
  const path = require("node:path");
12
- const { Rezo } = require('../adapters/entries/http.cjs');
12
+ const rezo = require('../adapters/entries/http.cjs');
13
13
  const { RezoQueue } = require('../queue/queue.cjs');
14
14
  const { Scraper } = require('./scraper.cjs');
15
15
  const { CrawlerOptions } = require('./crawler-options.cjs');
@@ -85,6 +85,7 @@ class Crawler {
85
85
  startHandlers = [];
86
86
  finishHandlers = [];
87
87
  redirectHandlers = [];
88
+ queueChangeHandlers = [];
88
89
  collectedData = new CappedArray({
89
90
  maxSize: 1e5,
90
91
  evictionRatio: 0.1,
@@ -94,20 +95,28 @@ class Crawler {
94
95
  });
95
96
  crawlStarted = false;
96
97
  startHandlersPromise = null;
97
- constructor(crawlerOptions, http = new Rezo) {
98
+ constructor(crawlerOptions, http = rezo.create()) {
98
99
  this.http = http;
99
100
  this.config = new CrawlerOptions(crawlerOptions);
100
101
  this.adapterType = this.config.adapter;
101
102
  const concurrency = this.config.concurrency;
102
103
  this.queue = new RezoQueue({
103
- concurrency,
104
- timeout: 60000
104
+ name: "crawler",
105
+ concurrency
105
106
  });
106
107
  this.originalConcurrency = concurrency;
107
108
  this.scraperQueue = new RezoQueue({
108
- concurrency: this.config.scraperConcurrency,
109
- timeout: 60000
109
+ name: "scraper",
110
+ concurrency: this.config.scraperConcurrency
110
111
  });
112
+ this._subscribeToQueueEvents(this.queue, "crawler");
113
+ this._subscribeToQueueEvents(this.scraperQueue, "scraper");
114
+ this.config.onLimiterAdded = (queue) => {
115
+ if (!this.subscribedLimiterQueues.has(queue)) {
116
+ this._subscribeToQueueEvents(queue, "limiter");
117
+ this.subscribedLimiterQueues.add(queue);
118
+ }
119
+ };
111
120
  this.memoryMonitor = new MemoryMonitor({ warningRatio: 0.7, criticalRatio: 0.85 });
112
121
  this.healthMetrics = new HealthMetrics({ windowSize: 60000 });
113
122
  const enableCache = this.config.enableCache;
@@ -178,7 +187,9 @@ class Crawler {
178
187
  if (this.config.baseUrl) {
179
188
  this.urlDepthMap.set(this.config.baseUrl, 0);
180
189
  }
181
- this.registerShutdownHandlers();
190
+ if (this.config.enableSignalHandlers) {
191
+ this.registerShutdownHandlers();
192
+ }
182
193
  }
183
194
  registerShutdownHandlers() {
184
195
  if (this.shutdownHandler)
@@ -227,6 +238,7 @@ class Crawler {
227
238
  }
228
239
  await this.destroy();
229
240
  console.log("[Crawler] Graceful shutdown complete");
241
+ process.exit(0);
230
242
  }
231
243
  async initializeAdapter() {
232
244
  try {
@@ -595,6 +607,11 @@ class Crawler {
595
607
  this.redirectHandlers.push(handler);
596
608
  return this;
597
609
  }
610
+ onQueueChange(handler) {
611
+ this.queueChangeHandlers.push(handler);
612
+ this._subscribeToLimiterQueues();
613
+ return this;
614
+ }
598
615
  onRawData(handler) {
599
616
  this.rawResponseEvents.push({
600
617
  handler: "_onRawResponse",
@@ -665,6 +682,52 @@ class Crawler {
665
682
  });
666
683
  return this;
667
684
  }
685
+ subscribedLimiterQueues = new Set;
686
+ _subscribeToQueueEvents(queue, queueType) {
687
+ const emitEvent = (event, taskId) => {
688
+ if (this.queueChangeHandlers.length === 0)
689
+ return;
690
+ const state = queue.state;
691
+ const queueChangeEvent = {
692
+ queueName: queue.name,
693
+ queueType,
694
+ event,
695
+ pending: state.pending,
696
+ size: state.size,
697
+ total: state.total,
698
+ isPaused: state.isPaused,
699
+ isIdle: state.isIdle,
700
+ taskId
701
+ };
702
+ for (const handler of this.queueChangeHandlers) {
703
+ try {
704
+ handler(queueChangeEvent);
705
+ } catch (err) {
706
+ if (this.config.debug)
707
+ console.error("[Crawler] onQueueChange handler error:", err);
708
+ }
709
+ }
710
+ };
711
+ queue.on("add", (data) => emitEvent("add", data.id));
712
+ queue.on("start", (data) => emitEvent("start", data.id));
713
+ queue.on("completed", (data) => emitEvent("completed", data.id));
714
+ queue.on("error", (data) => emitEvent("error", data.id));
715
+ queue.on("timeout", (data) => emitEvent("timeout", data.id));
716
+ queue.on("cancelled", (data) => emitEvent("cancelled", data.id));
717
+ queue.on("idle", () => emitEvent("idle"));
718
+ queue.on("active", () => emitEvent("active"));
719
+ queue.on("paused", () => emitEvent("paused"));
720
+ queue.on("resumed", () => emitEvent("resumed"));
721
+ }
722
+ _subscribeToLimiterQueues() {
723
+ const limiters = this.config.getLimiters();
724
+ for (const limiter of limiters) {
725
+ if (!this.subscribedLimiterQueues.has(limiter.pqueue)) {
726
+ this._subscribeToQueueEvents(limiter.pqueue, "limiter");
727
+ this.subscribedLimiterQueues.add(limiter.pqueue);
728
+ }
729
+ }
730
+ }
668
731
  _onBody(handler, document) {
669
732
  this.queue.add(() => handler(document.body));
670
733
  }
@@ -1056,8 +1119,6 @@ class Crawler {
1056
1119
  const headersObj = headers instanceof Headers ? Object.fromEntries(headers.entries()) : headers;
1057
1120
  this.addToNavigationQueue(url, method, body, headersObj);
1058
1121
  }
1059
- if (url.includes(`/www.yellowpages.com/search?`))
1060
- console.log("Visiting: ", url);
1061
1122
  this.crawlStarted = true;
1062
1123
  if (deepEmailFinder) {
1063
1124
  this.execute2(method, url, body, _options, forceRevisit, emailMetadata);
@@ -1120,7 +1181,16 @@ class Crawler {
1120
1181
  }
1121
1182
  this.crawlStats.urlsQueued++;
1122
1183
  const domain = new URL(url).hostname;
1123
- const delay = this.getAutoThrottleDelay(domain);
1184
+ const limiterRandomDelay = this.config.getRandomDelay(url, true);
1185
+ let delay = 0;
1186
+ if (limiterRandomDelay !== undefined && limiterRandomDelay > 0) {
1187
+ delay = Math.floor(Math.random() * limiterRandomDelay);
1188
+ if (this.config.debug) {
1189
+ console.log(`[RandomDelay] ${domain}: ${delay}ms (max: ${limiterRandomDelay}ms)`);
1190
+ }
1191
+ } else {
1192
+ delay = this.getAutoThrottleDelay(domain);
1193
+ }
1124
1194
  if (delay > 0) {
1125
1195
  await new Promise((resolve) => setTimeout(resolve, delay));
1126
1196
  }
@@ -1133,7 +1203,7 @@ class Crawler {
1133
1203
  return;
1134
1204
  }
1135
1205
  const requestStartTime = Date.now();
1136
- const response = cache && method === "GET" && !skipCache ? cache : oxylabsInstanse && oxylabsOptions ? await oxylabsInstanse.scrape(url) : decodoInstanse && decodoOptions ? await decodoInstanse.scrape(url) : await (method === "GET" ? this.http.get(url, options) : method === "PATCH" ? this.http.patch(url, body, options) : method === "POST" ? this.http.post(url, body, options) : this.http.put(url, body, options));
1206
+ const response = cache && method === "GET" && !skipCache ? cache : oxylabsInstanse && oxylabsOptions ? await oxylabsInstanse.scrape(url) : decodoInstanse && decodoOptions ? await decodoInstanse.scrape(url) : method === "GET" ? await this.http.get(url, options) : method === "PATCH" ? await this.http.patch(url, body, options) : method === "POST" ? await this.http.post(url, body, options) : await this.http.put(url, body, options);
1137
1207
  if (!response) {
1138
1208
  this.crawlStats.urlsFailed++;
1139
1209
  this.healthMetrics.recordRequest(Date.now() - requestStartTime, false);
@@ -1268,6 +1338,17 @@ class Crawler {
1268
1338
  }
1269
1339
  }
1270
1340
  async waitForAll() {
1341
+ if (!this.crawlStarted) {
1342
+ await new Promise((resolve) => setImmediate(resolve));
1343
+ const maxWaitForStart = 1000;
1344
+ const startWait = Date.now();
1345
+ while (!this.crawlStarted && Date.now() - startWait < maxWaitForStart) {
1346
+ await new Promise((resolve) => setTimeout(resolve, 10));
1347
+ }
1348
+ if (!this.crawlStarted) {
1349
+ return;
1350
+ }
1351
+ }
1271
1352
  const MIN_DELAY = 50;
1272
1353
  const MAX_DELAY = 500;
1273
1354
  let currentDelay = MIN_DELAY;