rezo 1.0.131 → 1.0.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/dist/adapters/entries/curl.d.ts +1 -1
  2. package/dist/adapters/entries/fetch.d.ts +1 -1
  3. package/dist/adapters/entries/http.d.ts +1 -1
  4. package/dist/adapters/entries/http2.d.ts +1 -1
  5. package/dist/adapters/entries/react-native.cjs +6 -6
  6. package/dist/adapters/entries/react-native.d.ts +1 -1
  7. package/dist/adapters/entries/xhr.d.ts +1 -1
  8. package/dist/adapters/index.cjs +6 -6
  9. package/dist/cache/index.cjs +9 -9
  10. package/dist/cookies/cookie-jar.cjs +4 -4
  11. package/dist/cookies/index.cjs +10 -10
  12. package/dist/crawler/addon/oxylabs/index.cjs +1 -1
  13. package/dist/crawler/crawler-options.cjs +1 -1
  14. package/dist/crawler/crawler-options.js +1 -1
  15. package/dist/crawler/crawler.cjs +24 -4
  16. package/dist/crawler/crawler.js +24 -4
  17. package/dist/crawler/index.cjs +42 -42
  18. package/dist/crawler/plugin/index.cjs +1 -1
  19. package/dist/crawler/plugin/sqlite-utils.cjs +1 -1
  20. package/dist/crawler/plugin/sqlite-utils.js +1 -1
  21. package/dist/crawler.d.ts +7 -0
  22. package/dist/dom/index.cjs +1 -23
  23. package/dist/dom/index.d.ts +3 -0
  24. package/dist/dom/index.js +1 -20
  25. package/dist/entries/crawler.cjs +6 -6
  26. package/dist/index.cjs +48 -48
  27. package/dist/index.d.ts +1 -1
  28. package/dist/internal/agents/index.cjs +14 -14
  29. package/dist/platform/browser.d.ts +1 -1
  30. package/dist/platform/bun.d.ts +1 -1
  31. package/dist/platform/deno.d.ts +1 -1
  32. package/dist/platform/node.d.ts +1 -1
  33. package/dist/platform/react-native.cjs +6 -6
  34. package/dist/platform/react-native.d.ts +1 -1
  35. package/dist/platform/worker.d.ts +1 -1
  36. package/dist/proxy/index.cjs +4 -4
  37. package/dist/queue/index.cjs +8 -8
  38. package/dist/responses/universal/index.cjs +11 -11
  39. package/dist/stealth/index.cjs +17 -17
  40. package/dist/stealth/profiles/index.cjs +10 -10
  41. package/dist/version.cjs +1 -1
  42. package/dist/version.js +1 -1
  43. package/dist/wget/asset-extractor.cjs +187 -19
  44. package/dist/wget/asset-extractor.js +184 -19
  45. package/dist/wget/downloader.cjs +288 -12
  46. package/dist/wget/downloader.js +288 -12
  47. package/dist/wget/index.cjs +221 -52
  48. package/dist/wget/index.d.ts +219 -9
  49. package/dist/wget/index.js +170 -1
  50. package/dist/wget/progress.cjs +90 -7
  51. package/dist/wget/progress.js +90 -7
  52. package/dist/wget/session.cjs +122 -0
  53. package/dist/wget/session.js +119 -0
  54. package/dist/wget/types.cjs +6 -0
  55. package/dist/wget/types.js +6 -0
  56. package/dist/wget/url-filter.cjs +4 -1
  57. package/dist/wget/url-filter.js +4 -1
  58. package/package.json +23 -22
@@ -6027,7 +6027,7 @@ export interface RezoInstance extends Rezo, RezoCallable {
6027
6027
  *
6028
6028
  * IMPORTANT: Update these values when bumping package version.
6029
6029
  */
6030
- export declare const VERSION = "1.0.131";
6030
+ export declare const VERSION = "1.0.133";
6031
6031
  /**
6032
6032
  * cURL Options Configuration
6033
6033
  *
@@ -6027,7 +6027,7 @@ export interface RezoInstance extends Rezo, RezoCallable {
6027
6027
  *
6028
6028
  * IMPORTANT: Update these values when bumping package version.
6029
6029
  */
6030
- export declare const VERSION = "1.0.131";
6030
+ export declare const VERSION = "1.0.133";
6031
6031
  export declare const isRezoError: typeof RezoError.isRezoError;
6032
6032
  export declare const Cancel: typeof RezoError;
6033
6033
  export declare const CancelToken: {
@@ -6027,7 +6027,7 @@ export interface RezoInstance extends Rezo, RezoCallable {
6027
6027
  *
6028
6028
  * IMPORTANT: Update these values when bumping package version.
6029
6029
  */
6030
- export declare const VERSION = "1.0.131";
6030
+ export declare const VERSION = "1.0.133";
6031
6031
  /**
6032
6032
  * Type guard to check if an error is a RezoError instance.
6033
6033
  */
@@ -6027,7 +6027,7 @@ export interface RezoInstance extends Rezo, RezoCallable {
6027
6027
  *
6028
6028
  * IMPORTANT: Update these values when bumping package version.
6029
6029
  */
6030
- export declare const VERSION = "1.0.131";
6030
+ export declare const VERSION = "1.0.133";
6031
6031
  export declare const isRezoError: typeof RezoError.isRezoError;
6032
6032
  export declare const Cancel: typeof RezoError;
6033
6033
  export declare const CancelToken: {
@@ -6,12 +6,12 @@ const { RezoFormData } = require('../../utils/form-data.cjs');
6
6
  const { RezoCookieJar, Cookie } = require('../../cookies/cookie-jar.cjs');
7
7
  const { createDefaultHooks, mergeHooks } = require('../../core/hooks.cjs');
8
8
  const { VERSION } = require('../../version.cjs');
9
- const _mod_elhp6j = require('../../platform/react-native-providers.cjs');
10
- exports.createFetchStreamTransport = _mod_elhp6j.createFetchStreamTransport;
11
- exports.createExpoFileSystemAdapter = _mod_elhp6j.createExpoFileSystemAdapter;
12
- exports.createReactNativeFsAdapter = _mod_elhp6j.createReactNativeFsAdapter;
13
- exports.createNetInfoProvider = _mod_elhp6j.createNetInfoProvider;
14
- exports.createExpoBackgroundTaskProvider = _mod_elhp6j.createExpoBackgroundTaskProvider;;
9
+ const _mod_6jezic = require('../../platform/react-native-providers.cjs');
10
+ exports.createFetchStreamTransport = _mod_6jezic.createFetchStreamTransport;
11
+ exports.createExpoFileSystemAdapter = _mod_6jezic.createExpoFileSystemAdapter;
12
+ exports.createReactNativeFsAdapter = _mod_6jezic.createReactNativeFsAdapter;
13
+ exports.createNetInfoProvider = _mod_6jezic.createNetInfoProvider;
14
+ exports.createExpoBackgroundTaskProvider = _mod_6jezic.createExpoBackgroundTaskProvider;;
15
15
  exports.Rezo = Rezo;
16
16
  exports.RezoError = RezoError;
17
17
  exports.RezoErrorCode = RezoErrorCode;
@@ -6027,7 +6027,7 @@ export interface RezoInstance extends Rezo, RezoCallable {
6027
6027
  *
6028
6028
  * IMPORTANT: Update these values when bumping package version.
6029
6029
  */
6030
- export declare const VERSION = "1.0.131";
6030
+ export declare const VERSION = "1.0.133";
6031
6031
  export interface ExpoFileSystemFileLike {
6032
6032
  uri?: string;
6033
6033
  size?: number;
@@ -6027,7 +6027,7 @@ export interface RezoInstance extends Rezo, RezoCallable {
6027
6027
  *
6028
6028
  * IMPORTANT: Update these values when bumping package version.
6029
6029
  */
6030
- export declare const VERSION = "1.0.131";
6030
+ export declare const VERSION = "1.0.133";
6031
6031
  export declare const isRezoError: typeof RezoError.isRezoError;
6032
6032
  export declare const Cancel: typeof RezoError;
6033
6033
  export declare const CancelToken: {
@@ -1,6 +1,6 @@
1
- const _mod_ta19rw = require('./picker.cjs');
2
- exports.detectRuntime = _mod_ta19rw.detectRuntime;
3
- exports.getAdapterCapabilities = _mod_ta19rw.getAdapterCapabilities;
4
- exports.buildAdapterContext = _mod_ta19rw.buildAdapterContext;
5
- exports.getAvailableAdapters = _mod_ta19rw.getAvailableAdapters;
6
- exports.selectAdapter = _mod_ta19rw.selectAdapter;;
1
+ const _mod_wl45y9 = require('./picker.cjs');
2
+ exports.detectRuntime = _mod_wl45y9.detectRuntime;
3
+ exports.getAdapterCapabilities = _mod_wl45y9.getAdapterCapabilities;
4
+ exports.buildAdapterContext = _mod_wl45y9.buildAdapterContext;
5
+ exports.getAvailableAdapters = _mod_wl45y9.getAvailableAdapters;
6
+ exports.selectAdapter = _mod_wl45y9.selectAdapter;;
@@ -1,9 +1,9 @@
1
- const _mod_2sir3t = require('./lru-cache.cjs');
2
- exports.LRUCache = _mod_2sir3t.LRUCache;;
3
- const _mod_h1s52p = require('./dns-cache.cjs');
4
- exports.DNSCache = _mod_h1s52p.DNSCache;
5
- exports.getGlobalDNSCache = _mod_h1s52p.getGlobalDNSCache;
6
- exports.resetGlobalDNSCache = _mod_h1s52p.resetGlobalDNSCache;;
7
- const _mod_a5smaj = require('./response-cache.cjs');
8
- exports.ResponseCache = _mod_a5smaj.ResponseCache;
9
- exports.normalizeResponseCacheConfig = _mod_a5smaj.normalizeResponseCacheConfig;;
1
+ const _mod_pwv10q = require('./lru-cache.cjs');
2
+ exports.LRUCache = _mod_pwv10q.LRUCache;;
3
+ const _mod_r89cab = require('./dns-cache.cjs');
4
+ exports.DNSCache = _mod_r89cab.DNSCache;
5
+ exports.getGlobalDNSCache = _mod_r89cab.getGlobalDNSCache;
6
+ exports.resetGlobalDNSCache = _mod_r89cab.resetGlobalDNSCache;;
7
+ const _mod_vz9afj = require('./response-cache.cjs');
8
+ exports.ResponseCache = _mod_vz9afj.ResponseCache;
9
+ exports.normalizeResponseCacheConfig = _mod_vz9afj.normalizeResponseCacheConfig;;
@@ -1,8 +1,8 @@
1
1
  const { CookieJar: TouchCookieJar } = require("tough-cookie");
2
2
  const { Cookie } = require('./cookie.cjs');
3
3
  const { requireNodeModule } = require('../utils/node-runtime.cjs');
4
- const _mod_2xetkm = require('./cookie.cjs');
5
- exports.Cookie = _mod_2xetkm.Cookie;;
4
+ const _mod_egd39a = require('./cookie.cjs');
5
+ exports.Cookie = _mod_egd39a.Cookie;;
6
6
 
7
7
  class RezoCookieJar extends TouchCookieJar {
8
8
  constructor(store, options) {
@@ -481,7 +481,7 @@ class RezoCookieJar extends TouchCookieJar {
481
481
  }
482
482
  }
483
483
  const CookieJar = exports.CookieJar = RezoCookieJar;
484
- const _mod_o2fr98 = require("tough-cookie");
485
- exports.Store = _mod_o2fr98.Store;;
484
+ const _mod_uokl3t = require("tough-cookie");
485
+ exports.Store = _mod_uokl3t.Store;;
486
486
 
487
487
  exports.RezoCookieJar = RezoCookieJar;
@@ -1,10 +1,10 @@
1
- const _mod_dnn0yd = require('./cookie.cjs');
2
- exports.Cookie = _mod_dnn0yd.Cookie;
3
- exports.RezoCookie = _mod_dnn0yd.RezoCookie;;
4
- const _mod_euh49t = require('./cookie-store.cjs');
5
- exports.RezoCookieStore = _mod_euh49t.RezoCookieStore;;
6
- const _mod_n9r664 = require('./cookie-jar.cjs');
7
- exports.RezoCookieJar = _mod_n9r664.RezoCookieJar;
8
- exports.CookieJar = _mod_n9r664.CookieJar;;
9
- const _mod_5ka56g = require("tough-cookie");
10
- exports.Store = _mod_5ka56g.Store;;
1
+ const _mod_3anom3 = require('./cookie.cjs');
2
+ exports.Cookie = _mod_3anom3.Cookie;
3
+ exports.RezoCookie = _mod_3anom3.RezoCookie;;
4
+ const _mod_p2lai5 = require('./cookie-store.cjs');
5
+ exports.RezoCookieStore = _mod_p2lai5.RezoCookieStore;;
6
+ const _mod_dze7qd = require('./cookie-jar.cjs');
7
+ exports.RezoCookieJar = _mod_dze7qd.RezoCookieJar;
8
+ exports.CookieJar = _mod_dze7qd.CookieJar;;
9
+ const _mod_e0fpg1 = require("tough-cookie");
10
+ exports.Store = _mod_e0fpg1.Store;;
@@ -1 +1 @@
1
- var{Rezo:h}=require("../../../core/rezo.cjs"),a=require("./options.cjs");exports.OXYLABS_BROWSER_TYPES=a.OXYLABS_BROWSER_TYPES;exports.OXYLABS_COMMON_LOCALES=a.OXYLABS_COMMON_LOCALES;exports.OXYLABS_COMMON_GEO_LOCATIONS=a.OXYLABS_COMMON_GEO_LOCATIONS;exports.OXYLABS_US_STATES=a.OXYLABS_US_STATES;exports.OXYLABS_EUROPEAN_COUNTRIES=a.OXYLABS_EUROPEAN_COUNTRIES;exports.OXYLABS_ASIAN_COUNTRIES=a.OXYLABS_ASIAN_COUNTRIES;exports.getRandomBrowserType=a.getRandomBrowserType;exports.getRandomLocale=a.getRandomLocale;exports.getRandomGeoLocation=a.getRandomGeoLocation;var i="https://realtime.oxylabs.io/v1/queries";class d{config;http;authHeader;constructor(s){if(!s.username||!s.password)throw Error("Oxylabs username and password are required");this.config={username:s.username,password:s.password,browserType:s.browserType??"desktop",locale:s.locale??"en-US",geoLocation:s.geoLocation??"",render:s.render??!1,context:s.context??{},timeout:s.timeout??120000},this.http=new h({baseURL:i,timeout:this.config.timeout}),this.authHeader=`Basic ${Buffer.from(`${s.username}:${s.password}`).toString("base64")}`}async scrape(s,e){let t={...this.config,...e},r=this.buildRequestBody(s,t),o=(await this.http.postJson(i,r,{headers:{Authorization:this.authHeader,"Content-Type":"application/json"}})).data;if(o.error)throw Error(`Oxylabs API error: ${o.error}`);if(!o.results||o.results.length===0)throw Error("Oxylabs API returned no results");let n=o.results[0],l=n._response?.cookies||[],_=n._response?.headers||{};return{statusCode:n.status_code,url:n.url,content:n.content,cookies:l,headers:_,jobId:o.job_id||n.job_id,rendered:t.render,geoLocation:t.geoLocation||void 0,locale:t.locale,browserType:t.browserType,raw:o}}async scrapeMany(s,e,t=1000){let r=[];for(let u=0;u<s.length;u++){let o=await this.scrape(s[u],e);if(r.push(o),u<s.length-1&&t>0)await new Promise((n)=>setTimeout(n,t))}return r}buildRequestBody(s,e){let t=[];if(t.push({key:"return_page_cookies",value:!0}),t.push({key:"return_page_headers",value:!0}),e.context)for(let[u,o]of Object.entries(e.context))t.push({key:u,value:o});if(e.headers&&Object.keys(e.headers).length>0)t.push({key:"headers",value:e.headers});if(e.cookies&&e.cookies.length>0)t.push({key:"cookies",value:e.cookies});if(e.session_id)t.push({key:"session_id",value:e.session_id});if(e.http_method)t.push({key:"http_method",value:e.http_method});if(e.base64Body)t.push({key:"content",value:e.base64Body});if(typeof e.follow_redirects==="boolean")t.push({key:"follow_redirects",value:e.follow_redirects});if(e.successful_status_codes&&e.successful_status_codes.length>0)t.push({key:"successful_status_codes",value:e.successful_status_codes});let r={source:"universal",url:s,context:t};if(e.render||e.javascript_rendering)r.render="html";if(e.browserType)r.user_agent_type=e.browserType;if(e.locale)r.locale=e.locale;if(e.geoLocation)r.geo_location=e.geoLocation;if(e.returnAsBase64)r.content_encoding="base64";return r}getConfig(){return{...this.config,password:"***"}}withConfig(s){return new d({...this.config,...s})}async testConnection(){try{return await this.scrape("https://httpbin.org/ip"),!0}catch(s){throw Error(`Oxylabs connection test failed: ${s.message}`)}}}exports.Oxylabs=d;exports.default=d;module.exports=Object.assign(d,exports);
1
+ var{Rezo:_}=require("../../../core/rezo.cjs"),a=require("./options.cjs");exports.OXYLABS_BROWSER_TYPES=a.OXYLABS_BROWSER_TYPES;exports.OXYLABS_COMMON_LOCALES=a.OXYLABS_COMMON_LOCALES;exports.OXYLABS_COMMON_GEO_LOCATIONS=a.OXYLABS_COMMON_GEO_LOCATIONS;exports.OXYLABS_US_STATES=a.OXYLABS_US_STATES;exports.OXYLABS_EUROPEAN_COUNTRIES=a.OXYLABS_EUROPEAN_COUNTRIES;exports.OXYLABS_ASIAN_COUNTRIES=a.OXYLABS_ASIAN_COUNTRIES;exports.getRandomBrowserType=a.getRandomBrowserType;exports.getRandomLocale=a.getRandomLocale;exports.getRandomGeoLocation=a.getRandomGeoLocation;var i="https://realtime.oxylabs.io/v1/queries";class d{config;http;authHeader;constructor(s){if(!s.username||!s.password)throw Error("Oxylabs username and password are required");this.config={username:s.username,password:s.password,browserType:s.browserType??"desktop",locale:s.locale??"en-US",geoLocation:s.geoLocation??"",render:s.render??!1,context:s.context??{},timeout:s.timeout??120000},this.http=new _({baseURL:i,timeout:this.config.timeout}),this.authHeader=`Basic ${Buffer.from(`${s.username}:${s.password}`).toString("base64")}`}async scrape(s,e){let t={...this.config,...e},r=this.buildRequestBody(s,t),o=(await this.http.postJson(i,r,{headers:{Authorization:this.authHeader,"Content-Type":"application/json"}})).data;if(o.error)throw Error(`Oxylabs API error: ${o.error}`);if(!o.results||o.results.length===0)throw Error("Oxylabs API returned no results");let n=o.results[0],l=n._response?.cookies||[],h=n._response?.headers||{};return{statusCode:n.status_code,url:n.url,content:n.content,cookies:l,headers:h,jobId:o.job_id||n.job_id,rendered:t.render,geoLocation:t.geoLocation||void 0,locale:t.locale,browserType:t.browserType,raw:o}}async scrapeMany(s,e,t=1000){let r=[];for(let u=0;u<s.length;u++){let o=await this.scrape(s[u],e);if(r.push(o),u<s.length-1&&t>0)await new Promise((n)=>setTimeout(n,t))}return r}buildRequestBody(s,e){let t=[];if(t.push({key:"return_page_cookies",value:!0}),t.push({key:"return_page_headers",value:!0}),e.context)for(let[u,o]of Object.entries(e.context))t.push({key:u,value:o});if(e.headers&&Object.keys(e.headers).length>0)t.push({key:"headers",value:e.headers});if(e.cookies&&e.cookies.length>0)t.push({key:"cookies",value:e.cookies});if(e.session_id)t.push({key:"session_id",value:e.session_id});if(e.http_method)t.push({key:"http_method",value:e.http_method});if(e.base64Body)t.push({key:"content",value:e.base64Body});if(typeof e.follow_redirects==="boolean")t.push({key:"follow_redirects",value:e.follow_redirects});if(e.successful_status_codes&&e.successful_status_codes.length>0)t.push({key:"successful_status_codes",value:e.successful_status_codes});let r={source:"universal",url:s,context:t};if(e.render||e.javascript_rendering)r.render="html";if(e.browserType)r.user_agent_type=e.browserType;if(e.locale)r.locale=e.locale;if(e.geoLocation)r.geo_location=e.geoLocation;if(e.returnAsBase64)r.content_encoding="base64";return r}getConfig(){return{...this.config,password:"***"}}withConfig(s){return new d({...this.config,...s})}async testConnection(){try{return await this.scrape("https://httpbin.org/ip"),!0}catch(s){throw Error(`Oxylabs connection test failed: ${s.message}`)}}}exports.Oxylabs=d;exports.default=d;module.exports=Object.assign(d,exports);
@@ -1 +1 @@
1
- var{RezoQueue:y}=require("../queue/queue.cjs"),{Oxylabs:x}=require("./addon/oxylabs/index.cjs"),v=require("node:path"),A=require("node:os"),{Decodo:p}=require("./addon/decodo/index.cjs"),{Rezo:D}=require("../core/rezo.cjs");class l{static destroyQueue(e){if(e&&typeof e.destroy==="function")e.destroy()}static destroyConfigQueues(e){for(let r of e)l.destroyQueue(r.pqueue)}static splitConfigs(e,r){let s=[],a=[];for(let t of e)if(r(t))a.push(t);else s.push(t);return{kept:s,removed:a}}static getHostname(e){try{return new URL(e).hostname||void 0}catch{return}}static createStableThroughputOptions(e){let{baseUrl:r,concurrency:s=40,scraperConcurrency:a=10,retryDelay:t=1000,maxRetryAttempts:i=2,retryOnStatusCode:o=[408,500,502,503,504],maxWaitOn429:d=15000,alwaysWaitOn429:n=!1,globalLimiter:h={concurrency:8},domain:u=l.getHostname(r),domainLimiter:c={concurrency:2,interval:1000,intervalCap:2,randomDelay:150},domainRetry:m={enable:!0,max429Retries:2,retryDelay:1000,maxRetryAttempts:2,backoff:!0},extraLimiters:g=[],overrides:b={}}=e,f=[];if(h)f.push({isGlobal:!0,options:h});if(u&&c)f.push({domain:u,options:c,retry:m||void 0});return f.push(...g),{...{baseUrl:r,autoThrottle:!1,concurrency:s,scraperConcurrency:a,retryDelay:t,maxRetryAttempts:i,retryOnStatusCode:o,maxWaitOn429:d,alwaysWaitOn429:n,limiter:f.length>0?{enable:!0,limiters:f}:!1},...b,baseUrl:r}}baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;enableSignalHandlers;concurrency;scraperConcurrency;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;stealth;oxylabs=[];decodo=[];stealthProfiles=[];proxies=[];limiters=[];onLimiterAdded;onProviderAdded;requestHeaders=[];userAgents=$();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??v.join(A.tmpdir(),"rezo_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.enableSignalHandlers=e.enableSignalHandlers??!1,this.concurrency=e.concurrency??100,this.scraperConcurrency=e.scraperConcurrency??this.concurrency,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter),this._addStealths(e.stealths),this.stealth=e.stealth}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:e==="decodo"?this.decodo:e==="stealth"?this.stealthProfiles:this.proxies).filter((s)=>s.domain).map((s)=>s.domain).filter((s,a,t)=>t.indexOf(s)===a)}removeDomain(e){this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e));let r=l.splitConfigs(this.limiters,(t)=>!!t.domain&&this._domainsEqual(t.domain,e));l.destroyConfigQueues(r.removed),this.limiters=r.kept;let s=l.splitConfigs(this.oxylabs,(t)=>!!t.domain&&this._domainsEqual(t.domain,e));l.destroyConfigQueues(s.removed),this.oxylabs=s.kept;let a=l.splitConfigs(this.decodo,(t)=>!!t.domain&&this._domainsEqual(t.domain,e));return l.destroyConfigQueues(a.removed),this.decodo=a.kept,this.stealthProfiles=this.stealthProfiles.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,r){if(Array.isArray(e)&&Array.isArray(r))return e.length===r.length&&e.every((s,a)=>s===r[a]);return e===r}getConfigurationSummary(){let e=(r)=>({total:r.length,global:r.filter((s)=>s.isGlobal).length,domainSpecific:r.filter((s)=>!s.isGlobal&&s.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs),decodo:e(this.decodo)}}_addHeaders(e){if(!e||!e.enable)return;for(let r of e.httpHeaders){let{domain:s,isGlobal:a,headers:t}=r;if(!s&&!a)continue;if(t instanceof Headers){let i=Object.fromEntries(t.entries());if(Object.keys(i).length<1)continue;t=i}else if(!t||Object.keys(t).length<1)continue;this.requestHeaders.push({domain:s,isGlobal:a,headers:t})}}_addProxies(e){if(!e||!e.enable)return;for(let r of e.proxies){let{domain:s,isGlobal:a,proxy:t,rotating:i}=r;if(!s&&!a)continue;if(!t||Object.keys(t).length<1)continue;this.proxies.push({domain:s,isGlobal:a,proxy:t,rotating:!!i})}}_addLimiters(e){if(!e||!e.enable)return;for(let r of e.limiters){let{domain:s,isGlobal:a,options:t,retry:i}=r;if(!s&&!a)continue;if(!t&&!i)continue;let o=t?new y(t):new y({name:"limiter"});if(this.limiters.push({domain:s,isGlobal:a,pqueue:o,randomDelay:t?.randomDelay,retry:i}),this.onLimiterAdded)this.onLimiterAdded(o)}}_addOxylabs(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:s,isGlobal:a,options:t,queueOptions:i}=r;if(!s&&!a)continue;if(!t||Object.keys(t).length<1)continue;let o=i?new y(i):void 0;if(this.oxylabs.push({domain:s,isGlobal:a,adaptar:new x(t),pqueue:o}),o&&this.onProviderAdded)this.onProviderAdded(o)}}_addDecodo(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:s,isGlobal:a,options:t,queueOptions:i}=r;if(!s&&!a)continue;if(!t||Object.keys(t).length<1)continue;let o=i?new y(i):void 0;if(this.decodo.push({domain:s,isGlobal:a,adaptar:new p(t),pqueue:o}),o&&this.onProviderAdded)this.onProviderAdded(o)}}_addStealths(e){if(!e||!e.enable)return;for(let r of e.profiles){let{domain:s,isGlobal:a,stealth:t}=r;if(!s&&!a)continue;if(!t)continue;this.stealthProfiles.push({domain:s,isGlobal:a,adaptar:new D({stealth:t})})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addProxies(e){return this._addProxies({enable:!0,proxies:e}),this}getProxyConfig(e,r,s){let a=this.selectConfigEntry(e,this.proxies,r,s);if(!a)return null;return{proxy:a.proxy,rotating:!!a.rotating}}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}addStealth(e){return this._addStealths({enable:!0,profiles:[e]}),this}destroyLimiters(){for(let e of this.limiters)l.destroyQueue(e.pqueue);this.limiters=[]}destroyProviderQueues(){for(let e of this.oxylabs)l.destroyQueue(e.pqueue),delete e.pqueue;for(let e of this.decodo)l.destroyQueue(e.pqueue),delete e.pqueue}getLimiters(){return this.limiters}getRandomDelay(e,r){if(!this.getDomainName(e))return;for(let a of this.limiters)if(this._hasDomain(e,a.domain)&&a.randomDelay!==void 0)return a.randomDelay;if(r){for(let a of this.limiters)if(a.isGlobal&&a.randomDelay!==void 0)return a.randomDelay}return}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs)){let e=l.splitConfigs(this.oxylabs,(r)=>!!r.isGlobal);l.destroyConfigQueues(e.removed),this.oxylabs=e.kept}if(Array.isArray(this.decodo)){let e=l.splitConfigs(this.decodo,(r)=>!!r.isGlobal);l.destroyConfigQueues(e.removed),this.decodo=e.kept}if(Array.isArray(this.limiters)){let e=l.splitConfigs(this.limiters,(r)=>!!r.isGlobal);l.destroyConfigQueues(e.removed),this.limiters=e.kept}if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);if(Array.isArray(this.stealthProfiles))this.stealthProfiles=this.stealthProfiles.filter((e)=>!e.isGlobal);return this}getAdapter(e,r,s,a){if(!this.getDomainName(e))return null;let i=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:r==="stealth"?this.stealthProfiles:this.proxies,o=this.selectConfigEntry(e,i,s,a);if(!o)return null;return r==="headers"?o.headers:r==="limiters"?o.pqueue:r==="oxylabs"?o.adaptar:r==="decodo"?o.adaptar:r==="stealth"?o.adaptar:o.proxy}getRetryOptions(e){if(!this.getDomainName(e))return null;for(let s=0;s<this.limiters.length;s++)if(this._hasDomain(e,this.limiters[s].domain))return this.limiters[s].retry||null;for(let s=0;s<this.limiters.length;s++)if(this.limiters[s].isGlobal&&this.limiters[s].retry)return this.limiters[s].retry;return null}rnd(e=0,r=Number.MAX_VALUE){return Math.floor(Math.random()*(r-e+1))+e}hasDomain(e,r,s){if(!this.getDomainName(e))return!1;let t=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:r==="stealth"?this.stealthProfiles:this.proxies;for(let i=0;i<t.length;i++)if(this._hasDomain(e,t[i].domain))return!0;if(s){for(let i=0;i<t.length;i++)if(t[i].isGlobal)return!0}return!1}pickHeaders(e,r,s,a){let t=this.getAdapter(e,"headers",r),i=new Headers(t??{}),o=i.count;if(s&&s instanceof Headers)for(let[d,n]of Object.entries(s.entries()))i.set(d,n);else if(s&&typeof s==="object"){for(let[d,n]of Object.entries(s))if(typeof n==="string")i.set(d,n)}if(a&&o===0&&!this.stealth)i.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(i.entries())}_hasDomain(e,r){if(!r)return!1;let s=this.getDomainName(e);if(!s)return!1;let a=(i)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(i)||i.startsWith("/")||i.includes(".*")||i.includes(".+")},t=(i)=>{if(i instanceof RegExp)return i.test(s)||i.test(e);let o=i.toString().trim();if(s.toLowerCase()===o.toLowerCase())return!0;if(o.includes("*")){let h=o.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),u=new RegExp(`^${h}$`,"i");return u.test(s)||u.test(e)}if(a(o))try{let h=o,u="i",c=o.match(/^\/(.*)\/(\w*)$/);if(c)h=c[1],u=c[2]||"i";let m=new RegExp(h,u);return m.test(s)||m.test(e)}catch(h){return s.toLowerCase().includes(o.toLowerCase())}let d=s.toLowerCase(),n=o.toLowerCase();return d===n||d.endsWith("."+n)||n.endsWith("."+d)};if(Array.isArray(r)){for(let i of r)if(t(i))return!0;return!1}return t(r)}selectConfigEntry(e,r,s,a){if(!this.getDomainName(e))return null;let i=(n)=>{if(!n.length)return null;let h=a?this.rnd(0,n.length-1):0;return n[h]},o=[];for(let n of r)if(this._hasDomain(e,n.domain))o.push(n);let d=i(o);if(d)return d;if(s){let n=[];for(let h of r)if(h.isGlobal)n.push(h);return i(n)}return null}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let r=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),r.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let r=new URL(e);if(!r.protocol||!["http:","https:"].includes(r.protocol.toLowerCase()))return!1;if(!r.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(r.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function $(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],r=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],s=[];for(let a=0;a<200;a++){let t=e[Math.floor(Math.random()*e.length)],i=r[Math.floor(Math.random()*r.length)],o="";switch(t.name){case"Chrome":o=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":o=`Mozilla/5.0 (${i}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":o=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":o=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":o=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":o=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":o=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":o=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":o=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":o=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}s.push(o)}return s}exports.CrawlerOptions=l;
1
+ var{RezoQueue:y}=require("../queue/queue.cjs"),{Oxylabs:x}=require("./addon/oxylabs/index.cjs"),v=require("node:path"),A=require("node:os"),{Decodo:p}=require("./addon/decodo/index.cjs"),{Rezo:D}=require("../core/rezo.cjs");class l{static destroyQueue(e){if(e&&typeof e.destroy==="function")e.destroy()}static destroyConfigQueues(e){for(let r of e)l.destroyQueue(r.pqueue)}static splitConfigs(e,r){let s=[],a=[];for(let t of e)if(r(t))a.push(t);else s.push(t);return{kept:s,removed:a}}static getHostname(e){try{return new URL(e).hostname||void 0}catch{return}}static createStableThroughputOptions(e){let{baseUrl:r,concurrency:s=40,scraperConcurrency:a=10,retryDelay:t=1000,maxRetryAttempts:i=2,retryOnStatusCode:o=[408,500,502,503,504],maxWaitOn429:d=15000,alwaysWaitOn429:n=!1,globalLimiter:h={concurrency:8},domain:u=l.getHostname(r),domainLimiter:c={concurrency:2,interval:1000,intervalCap:2,randomDelay:150},domainRetry:m={enable:!0,max429Retries:2,retryDelay:1000,maxRetryAttempts:2,backoff:!0},extraLimiters:g=[],overrides:b={}}=e,f=[];if(h)f.push({isGlobal:!0,options:h});if(u&&c)f.push({domain:u,options:c,retry:m||void 0});return f.push(...g),{...{baseUrl:r,autoThrottle:!1,concurrency:s,scraperConcurrency:a,retryDelay:t,maxRetryAttempts:i,retryOnStatusCode:o,maxWaitOn429:d,alwaysWaitOn429:n,limiter:f.length>0?{enable:!0,limiters:f}:!1},...b,baseUrl:r}}baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;enableSignalHandlers;concurrency;scraperConcurrency;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;stealth;oxylabs=[];decodo=[];stealthProfiles=[];proxies=[];limiters=[];onLimiterAdded;onProviderAdded;requestHeaders=[];userAgents=$();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??v.join(A.tmpdir(),"rezo_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.enableSignalHandlers=e.enableSignalHandlers??!1,this.concurrency=e.concurrency??100,this.scraperConcurrency=e.scraperConcurrency??this.concurrency,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter),this._addStealths(e.stealths),this.stealth=e.stealth}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:e==="decodo"?this.decodo:e==="stealth"?this.stealthProfiles:this.proxies).filter((s)=>s.domain).map((s)=>s.domain).filter((s,a,t)=>t.indexOf(s)===a)}removeDomain(e){this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e));let r=l.splitConfigs(this.limiters,(t)=>!!t.domain&&this._domainsEqual(t.domain,e));l.destroyConfigQueues(r.removed),this.limiters=r.kept;let s=l.splitConfigs(this.oxylabs,(t)=>!!t.domain&&this._domainsEqual(t.domain,e));l.destroyConfigQueues(s.removed),this.oxylabs=s.kept;let a=l.splitConfigs(this.decodo,(t)=>!!t.domain&&this._domainsEqual(t.domain,e));return l.destroyConfigQueues(a.removed),this.decodo=a.kept,this.stealthProfiles=this.stealthProfiles.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,r){if(Array.isArray(e)&&Array.isArray(r))return e.length===r.length&&e.every((s,a)=>s===r[a]);return e===r}getConfigurationSummary(){let e=(r)=>({total:r.length,global:r.filter((s)=>s.isGlobal).length,domainSpecific:r.filter((s)=>!s.isGlobal&&s.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs),decodo:e(this.decodo)}}_addHeaders(e){if(!e||!e.enable)return;for(let r of e.httpHeaders){let{domain:s,isGlobal:a,headers:t}=r;if(!s&&!a)continue;if(t instanceof Headers){let i=Object.fromEntries(t.entries());if(Object.keys(i).length<1)continue;t=i}else if(!t||Object.keys(t).length<1)continue;this.requestHeaders.push({domain:s,isGlobal:a,headers:t})}}_addProxies(e){if(!e||!e.enable)return;for(let r of e.proxies){let{domain:s,isGlobal:a,proxy:t,rotating:i}=r;if(!s&&!a)continue;if(!t||Object.keys(t).length<1)continue;this.proxies.push({domain:s,isGlobal:a,proxy:t,rotating:!!i})}}_addLimiters(e){if(!e||!e.enable)return;for(let r of e.limiters){let{domain:s,isGlobal:a,options:t,retry:i}=r;if(!s&&!a)continue;if(!t&&!i)continue;let o=t?new y(t):new y({name:"limiter"});if(this.limiters.push({domain:s,isGlobal:a,pqueue:o,randomDelay:t?.randomDelay,retry:i}),this.onLimiterAdded)this.onLimiterAdded(o)}}_addOxylabs(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:s,isGlobal:a,options:t,queueOptions:i}=r;if(!s&&!a)continue;if(!t||Object.keys(t).length<1)continue;let o=i?new y(i):void 0;if(this.oxylabs.push({domain:s,isGlobal:a,adaptar:new x(t),pqueue:o}),o&&this.onProviderAdded)this.onProviderAdded(o)}}_addDecodo(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:s,isGlobal:a,options:t,queueOptions:i}=r;if(!s&&!a)continue;if(!t||Object.keys(t).length<1)continue;let o=i?new y(i):void 0;if(this.decodo.push({domain:s,isGlobal:a,adaptar:new p(t),pqueue:o}),o&&this.onProviderAdded)this.onProviderAdded(o)}}_addStealths(e){if(!e||!e.enable)return;for(let r of e.profiles){let{domain:s,isGlobal:a,stealth:t}=r;if(!s&&!a)continue;if(!t)continue;this.stealthProfiles.push({domain:s,isGlobal:a,adaptar:new D({stealth:t})})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addProxies(e){return this._addProxies({enable:!0,proxies:e}),this}getProxyConfig(e,r,s){let a=this.selectConfigEntry(e,this.proxies,r,s);if(!a)return null;return{proxy:a.proxy,rotating:!!a.rotating}}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}addStealth(e){return this._addStealths({enable:!0,profiles:[e]}),this}destroyLimiters(){for(let e of this.limiters)l.destroyQueue(e.pqueue);this.limiters=[]}destroyProviderQueues(){for(let e of this.oxylabs)l.destroyQueue(e.pqueue),delete e.pqueue;for(let e of this.decodo)l.destroyQueue(e.pqueue),delete e.pqueue}getLimiters(){return this.limiters}getRandomDelay(e,r){if(!this.getDomainName(e))return;for(let a of this.limiters)if(this._hasDomain(e,a.domain)&&a.randomDelay!==void 0)return a.randomDelay;if(r){for(let a of this.limiters)if(a.isGlobal&&a.randomDelay!==void 0)return a.randomDelay}return}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs)){let e=l.splitConfigs(this.oxylabs,(r)=>!!r.isGlobal);l.destroyConfigQueues(e.removed),this.oxylabs=e.kept}if(Array.isArray(this.decodo)){let e=l.splitConfigs(this.decodo,(r)=>!!r.isGlobal);l.destroyConfigQueues(e.removed),this.decodo=e.kept}if(Array.isArray(this.limiters)){let e=l.splitConfigs(this.limiters,(r)=>!!r.isGlobal);l.destroyConfigQueues(e.removed),this.limiters=e.kept}if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);if(Array.isArray(this.stealthProfiles))this.stealthProfiles=this.stealthProfiles.filter((e)=>!e.isGlobal);return this}getAdapter(e,r,s,a){if(!this.getDomainName(e))return null;let i=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:r==="stealth"?this.stealthProfiles:this.proxies,o=this.selectConfigEntry(e,i,s,a);if(!o)return null;return r==="headers"?o.headers:r==="limiters"?o.pqueue:r==="oxylabs"?o.adaptar:r==="decodo"?o.adaptar:r==="stealth"?o.adaptar:o.proxy}getRetryOptions(e){if(!this.getDomainName(e))return null;for(let s=0;s<this.limiters.length;s++)if(this._hasDomain(e,this.limiters[s].domain))return this.limiters[s].retry||null;for(let s=0;s<this.limiters.length;s++)if(this.limiters[s].isGlobal&&this.limiters[s].retry)return this.limiters[s].retry;return null}rnd(e=0,r=Number.MAX_VALUE){return Math.floor(Math.random()*(r-e+1))+e}hasDomain(e,r,s){if(!this.getDomainName(e))return!1;let t=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:r==="stealth"?this.stealthProfiles:this.proxies;for(let i=0;i<t.length;i++)if(this._hasDomain(e,t[i].domain))return!0;if(s){for(let i=0;i<t.length;i++)if(t[i].isGlobal)return!0}return!1}pickHeaders(e,r,s,a){let t=this.getAdapter(e,"headers",r),i=new Headers(t??{}),o=[...i.keys()].length;if(s&&s instanceof Headers)for(let[d,n]of Object.entries(s.entries()))i.set(d,n);else if(s&&typeof s==="object"){for(let[d,n]of Object.entries(s))if(typeof n==="string")i.set(d,n)}if(a&&o===0&&!this.stealth)i.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(i.entries())}_hasDomain(e,r){if(!r)return!1;let s=this.getDomainName(e);if(!s)return!1;let a=(i)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(i)||i.startsWith("/")||i.includes(".*")||i.includes(".+")},t=(i)=>{if(i instanceof RegExp)return i.test(s)||i.test(e);let o=i.toString().trim();if(s.toLowerCase()===o.toLowerCase())return!0;if(o.includes("*")){let h=o.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),u=new RegExp(`^${h}$`,"i");return u.test(s)||u.test(e)}if(a(o))try{let h=o,u="i",c=o.match(/^\/(.*)\/(\w*)$/);if(c)h=c[1],u=c[2]||"i";let m=new RegExp(h,u);return m.test(s)||m.test(e)}catch(h){return s.toLowerCase().includes(o.toLowerCase())}let d=s.toLowerCase(),n=o.toLowerCase();return d===n||d.endsWith("."+n)||n.endsWith("."+d)};if(Array.isArray(r)){for(let i of r)if(t(i))return!0;return!1}return t(r)}selectConfigEntry(e,r,s,a){if(!this.getDomainName(e))return null;let i=(n)=>{if(!n.length)return null;let h=a?this.rnd(0,n.length-1):0;return n[h]},o=[];for(let n of r)if(this._hasDomain(e,n.domain))o.push(n);let d=i(o);if(d)return d;if(s){let n=[];for(let h of r)if(h.isGlobal)n.push(h);return i(n)}return null}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let r=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),r.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let r=new URL(e);if(!r.protocol||!["http:","https:"].includes(r.protocol.toLowerCase()))return!1;if(!r.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(r.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function $(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],r=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],s=[];for(let a=0;a<200;a++){let t=e[Math.floor(Math.random()*e.length)],i=r[Math.floor(Math.random()*r.length)],o="";switch(t.name){case"Chrome":o=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":o=`Mozilla/5.0 (${i}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":o=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":o=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":o=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":o=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":o=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":o=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":o=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":o=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}s.push(o)}return s}exports.CrawlerOptions=l;
@@ -1 +1 @@
1
- import{RezoQueue as y}from"../queue/queue.js";import{Oxylabs as x}from"./addon/oxylabs/index.js";import p from"node:path";import v from"node:os";import{Decodo as A}from"./addon/decodo/index.js";import{Rezo as D}from"../core/rezo.js";class l{static destroyQueue(e){if(e&&typeof e.destroy==="function")e.destroy()}static destroyConfigQueues(e){for(let r of e)l.destroyQueue(r.pqueue)}static splitConfigs(e,r){let i=[],a=[];for(let t of e)if(r(t))a.push(t);else i.push(t);return{kept:i,removed:a}}static getHostname(e){try{return new URL(e).hostname||void 0}catch{return}}static createStableThroughputOptions(e){let{baseUrl:r,concurrency:i=40,scraperConcurrency:a=10,retryDelay:t=1000,maxRetryAttempts:s=2,retryOnStatusCode:o=[408,500,502,503,504],maxWaitOn429:d=15000,alwaysWaitOn429:n=!1,globalLimiter:h={concurrency:8},domain:u=l.getHostname(r),domainLimiter:c={concurrency:2,interval:1000,intervalCap:2,randomDelay:150},domainRetry:m={enable:!0,max429Retries:2,retryDelay:1000,maxRetryAttempts:2,backoff:!0},extraLimiters:g=[],overrides:b={}}=e,f=[];if(h)f.push({isGlobal:!0,options:h});if(u&&c)f.push({domain:u,options:c,retry:m||void 0});return f.push(...g),{...{baseUrl:r,autoThrottle:!1,concurrency:i,scraperConcurrency:a,retryDelay:t,maxRetryAttempts:s,retryOnStatusCode:o,maxWaitOn429:d,alwaysWaitOn429:n,limiter:f.length>0?{enable:!0,limiters:f}:!1},...b,baseUrl:r}}baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;enableSignalHandlers;concurrency;scraperConcurrency;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;stealth;oxylabs=[];decodo=[];stealthProfiles=[];proxies=[];limiters=[];onLimiterAdded;onProviderAdded;requestHeaders=[];userAgents=$();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??p.join(v.tmpdir(),"rezo_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.enableSignalHandlers=e.enableSignalHandlers??!1,this.concurrency=e.concurrency??100,this.scraperConcurrency=e.scraperConcurrency??this.concurrency,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter),this._addStealths(e.stealths),this.stealth=e.stealth}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:e==="decodo"?this.decodo:e==="stealth"?this.stealthProfiles:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,a,t)=>t.indexOf(i)===a)}removeDomain(e){this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e));let r=l.splitConfigs(this.limiters,(t)=>!!t.domain&&this._domainsEqual(t.domain,e));l.destroyConfigQueues(r.removed),this.limiters=r.kept;let i=l.splitConfigs(this.oxylabs,(t)=>!!t.domain&&this._domainsEqual(t.domain,e));l.destroyConfigQueues(i.removed),this.oxylabs=i.kept;let a=l.splitConfigs(this.decodo,(t)=>!!t.domain&&this._domainsEqual(t.domain,e));return l.destroyConfigQueues(a.removed),this.decodo=a.kept,this.stealthProfiles=this.stealthProfiles.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,r){if(Array.isArray(e)&&Array.isArray(r))return e.length===r.length&&e.every((i,a)=>i===r[a]);return e===r}getConfigurationSummary(){let e=(r)=>({total:r.length,global:r.filter((i)=>i.isGlobal).length,domainSpecific:r.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs),decodo:e(this.decodo)}}_addHeaders(e){if(!e||!e.enable)return;for(let r of e.httpHeaders){let{domain:i,isGlobal:a,headers:t}=r;if(!i&&!a)continue;if(t instanceof Headers){let s=Object.fromEntries(t.entries());if(Object.keys(s).length<1)continue;t=s}else if(!t||Object.keys(t).length<1)continue;this.requestHeaders.push({domain:i,isGlobal:a,headers:t})}}_addProxies(e){if(!e||!e.enable)return;for(let r of e.proxies){let{domain:i,isGlobal:a,proxy:t,rotating:s}=r;if(!i&&!a)continue;if(!t||Object.keys(t).length<1)continue;this.proxies.push({domain:i,isGlobal:a,proxy:t,rotating:!!s})}}_addLimiters(e){if(!e||!e.enable)return;for(let r of e.limiters){let{domain:i,isGlobal:a,options:t,retry:s}=r;if(!i&&!a)continue;if(!t&&!s)continue;let o=t?new y(t):new y({name:"limiter"});if(this.limiters.push({domain:i,isGlobal:a,pqueue:o,randomDelay:t?.randomDelay,retry:s}),this.onLimiterAdded)this.onLimiterAdded(o)}}_addOxylabs(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:i,isGlobal:a,options:t,queueOptions:s}=r;if(!i&&!a)continue;if(!t||Object.keys(t).length<1)continue;let o=s?new y(s):void 0;if(this.oxylabs.push({domain:i,isGlobal:a,adaptar:new x(t),pqueue:o}),o&&this.onProviderAdded)this.onProviderAdded(o)}}_addDecodo(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:i,isGlobal:a,options:t,queueOptions:s}=r;if(!i&&!a)continue;if(!t||Object.keys(t).length<1)continue;let o=s?new y(s):void 0;if(this.decodo.push({domain:i,isGlobal:a,adaptar:new A(t),pqueue:o}),o&&this.onProviderAdded)this.onProviderAdded(o)}}_addStealths(e){if(!e||!e.enable)return;for(let r of e.profiles){let{domain:i,isGlobal:a,stealth:t}=r;if(!i&&!a)continue;if(!t)continue;this.stealthProfiles.push({domain:i,isGlobal:a,adaptar:new D({stealth:t})})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addProxies(e){return this._addProxies({enable:!0,proxies:e}),this}getProxyConfig(e,r,i){let a=this.selectConfigEntry(e,this.proxies,r,i);if(!a)return null;return{proxy:a.proxy,rotating:!!a.rotating}}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}addStealth(e){return this._addStealths({enable:!0,profiles:[e]}),this}destroyLimiters(){for(let e of this.limiters)l.destroyQueue(e.pqueue);this.limiters=[]}destroyProviderQueues(){for(let e of this.oxylabs)l.destroyQueue(e.pqueue),delete e.pqueue;for(let e of this.decodo)l.destroyQueue(e.pqueue),delete e.pqueue}getLimiters(){return this.limiters}getRandomDelay(e,r){if(!this.getDomainName(e))return;for(let a of this.limiters)if(this._hasDomain(e,a.domain)&&a.randomDelay!==void 0)return a.randomDelay;if(r){for(let a of this.limiters)if(a.isGlobal&&a.randomDelay!==void 0)return a.randomDelay}return}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs)){let e=l.splitConfigs(this.oxylabs,(r)=>!!r.isGlobal);l.destroyConfigQueues(e.removed),this.oxylabs=e.kept}if(Array.isArray(this.decodo)){let e=l.splitConfigs(this.decodo,(r)=>!!r.isGlobal);l.destroyConfigQueues(e.removed),this.decodo=e.kept}if(Array.isArray(this.limiters)){let e=l.splitConfigs(this.limiters,(r)=>!!r.isGlobal);l.destroyConfigQueues(e.removed),this.limiters=e.kept}if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);if(Array.isArray(this.stealthProfiles))this.stealthProfiles=this.stealthProfiles.filter((e)=>!e.isGlobal);return this}getAdapter(e,r,i,a){if(!this.getDomainName(e))return null;let s=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:r==="stealth"?this.stealthProfiles:this.proxies,o=this.selectConfigEntry(e,s,i,a);if(!o)return null;return r==="headers"?o.headers:r==="limiters"?o.pqueue:r==="oxylabs"?o.adaptar:r==="decodo"?o.adaptar:r==="stealth"?o.adaptar:o.proxy}getRetryOptions(e){if(!this.getDomainName(e))return null;for(let i=0;i<this.limiters.length;i++)if(this._hasDomain(e,this.limiters[i].domain))return this.limiters[i].retry||null;for(let i=0;i<this.limiters.length;i++)if(this.limiters[i].isGlobal&&this.limiters[i].retry)return this.limiters[i].retry;return null}rnd(e=0,r=Number.MAX_VALUE){return Math.floor(Math.random()*(r-e+1))+e}hasDomain(e,r,i){if(!this.getDomainName(e))return!1;let t=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:r==="stealth"?this.stealthProfiles:this.proxies;for(let s=0;s<t.length;s++)if(this._hasDomain(e,t[s].domain))return!0;if(i){for(let s=0;s<t.length;s++)if(t[s].isGlobal)return!0}return!1}pickHeaders(e,r,i,a){let t=this.getAdapter(e,"headers",r),s=new Headers(t??{}),o=s.count;if(i&&i instanceof Headers)for(let[d,n]of Object.entries(i.entries()))s.set(d,n);else if(i&&typeof i==="object"){for(let[d,n]of Object.entries(i))if(typeof n==="string")s.set(d,n)}if(a&&o===0&&!this.stealth)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,r){if(!r)return!1;let i=this.getDomainName(e);if(!i)return!1;let a=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},t=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let o=s.toString().trim();if(i.toLowerCase()===o.toLowerCase())return!0;if(o.includes("*")){let h=o.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),u=new RegExp(`^${h}$`,"i");return u.test(i)||u.test(e)}if(a(o))try{let h=o,u="i",c=o.match(/^\/(.*)\/(\w*)$/);if(c)h=c[1],u=c[2]||"i";let m=new RegExp(h,u);return m.test(i)||m.test(e)}catch(h){return i.toLowerCase().includes(o.toLowerCase())}let d=i.toLowerCase(),n=o.toLowerCase();return d===n||d.endsWith("."+n)||n.endsWith("."+d)};if(Array.isArray(r)){for(let s of r)if(t(s))return!0;return!1}return t(r)}selectConfigEntry(e,r,i,a){if(!this.getDomainName(e))return null;let s=(n)=>{if(!n.length)return null;let h=a?this.rnd(0,n.length-1):0;return n[h]},o=[];for(let n of r)if(this._hasDomain(e,n.domain))o.push(n);let d=s(o);if(d)return d;if(i){let n=[];for(let h of r)if(h.isGlobal)n.push(h);return s(n)}return null}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let r=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),r.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let r=new URL(e);if(!r.protocol||!["http:","https:"].includes(r.protocol.toLowerCase()))return!1;if(!r.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(r.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function $(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],r=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let a=0;a<200;a++){let t=e[Math.floor(Math.random()*e.length)],s=r[Math.floor(Math.random()*r.length)],o="";switch(t.name){case"Chrome":o=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":o=`Mozilla/5.0 (${s}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":o=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":o=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":o=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":o=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":o=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":o=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":o=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":o=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}i.push(o)}return i}export{l as CrawlerOptions};
1
+ import{RezoQueue as y}from"../queue/queue.js";import{Oxylabs as x}from"./addon/oxylabs/index.js";import p from"node:path";import v from"node:os";import{Decodo as A}from"./addon/decodo/index.js";import{Rezo as D}from"../core/rezo.js";class l{static destroyQueue(e){if(e&&typeof e.destroy==="function")e.destroy()}static destroyConfigQueues(e){for(let r of e)l.destroyQueue(r.pqueue)}static splitConfigs(e,r){let i=[],a=[];for(let t of e)if(r(t))a.push(t);else i.push(t);return{kept:i,removed:a}}static getHostname(e){try{return new URL(e).hostname||void 0}catch{return}}static createStableThroughputOptions(e){let{baseUrl:r,concurrency:i=40,scraperConcurrency:a=10,retryDelay:t=1000,maxRetryAttempts:s=2,retryOnStatusCode:o=[408,500,502,503,504],maxWaitOn429:d=15000,alwaysWaitOn429:n=!1,globalLimiter:h={concurrency:8},domain:u=l.getHostname(r),domainLimiter:c={concurrency:2,interval:1000,intervalCap:2,randomDelay:150},domainRetry:m={enable:!0,max429Retries:2,retryDelay:1000,maxRetryAttempts:2,backoff:!0},extraLimiters:g=[],overrides:b={}}=e,f=[];if(h)f.push({isGlobal:!0,options:h});if(u&&c)f.push({domain:u,options:c,retry:m||void 0});return f.push(...g),{...{baseUrl:r,autoThrottle:!1,concurrency:i,scraperConcurrency:a,retryDelay:t,maxRetryAttempts:s,retryOnStatusCode:o,maxWaitOn429:d,alwaysWaitOn429:n,limiter:f.length>0?{enable:!0,limiters:f}:!1},...b,baseUrl:r}}baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;enableSignalHandlers;concurrency;scraperConcurrency;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;stealth;oxylabs=[];decodo=[];stealthProfiles=[];proxies=[];limiters=[];onLimiterAdded;onProviderAdded;requestHeaders=[];userAgents=$();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??p.join(v.tmpdir(),"rezo_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.enableSignalHandlers=e.enableSignalHandlers??!1,this.concurrency=e.concurrency??100,this.scraperConcurrency=e.scraperConcurrency??this.concurrency,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter),this._addStealths(e.stealths),this.stealth=e.stealth}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:e==="decodo"?this.decodo:e==="stealth"?this.stealthProfiles:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,a,t)=>t.indexOf(i)===a)}removeDomain(e){this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e));let r=l.splitConfigs(this.limiters,(t)=>!!t.domain&&this._domainsEqual(t.domain,e));l.destroyConfigQueues(r.removed),this.limiters=r.kept;let i=l.splitConfigs(this.oxylabs,(t)=>!!t.domain&&this._domainsEqual(t.domain,e));l.destroyConfigQueues(i.removed),this.oxylabs=i.kept;let a=l.splitConfigs(this.decodo,(t)=>!!t.domain&&this._domainsEqual(t.domain,e));return l.destroyConfigQueues(a.removed),this.decodo=a.kept,this.stealthProfiles=this.stealthProfiles.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,r){if(Array.isArray(e)&&Array.isArray(r))return e.length===r.length&&e.every((i,a)=>i===r[a]);return e===r}getConfigurationSummary(){let e=(r)=>({total:r.length,global:r.filter((i)=>i.isGlobal).length,domainSpecific:r.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs),decodo:e(this.decodo)}}_addHeaders(e){if(!e||!e.enable)return;for(let r of e.httpHeaders){let{domain:i,isGlobal:a,headers:t}=r;if(!i&&!a)continue;if(t instanceof Headers){let s=Object.fromEntries(t.entries());if(Object.keys(s).length<1)continue;t=s}else if(!t||Object.keys(t).length<1)continue;this.requestHeaders.push({domain:i,isGlobal:a,headers:t})}}_addProxies(e){if(!e||!e.enable)return;for(let r of e.proxies){let{domain:i,isGlobal:a,proxy:t,rotating:s}=r;if(!i&&!a)continue;if(!t||Object.keys(t).length<1)continue;this.proxies.push({domain:i,isGlobal:a,proxy:t,rotating:!!s})}}_addLimiters(e){if(!e||!e.enable)return;for(let r of e.limiters){let{domain:i,isGlobal:a,options:t,retry:s}=r;if(!i&&!a)continue;if(!t&&!s)continue;let o=t?new y(t):new y({name:"limiter"});if(this.limiters.push({domain:i,isGlobal:a,pqueue:o,randomDelay:t?.randomDelay,retry:s}),this.onLimiterAdded)this.onLimiterAdded(o)}}_addOxylabs(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:i,isGlobal:a,options:t,queueOptions:s}=r;if(!i&&!a)continue;if(!t||Object.keys(t).length<1)continue;let o=s?new y(s):void 0;if(this.oxylabs.push({domain:i,isGlobal:a,adaptar:new x(t),pqueue:o}),o&&this.onProviderAdded)this.onProviderAdded(o)}}_addDecodo(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:i,isGlobal:a,options:t,queueOptions:s}=r;if(!i&&!a)continue;if(!t||Object.keys(t).length<1)continue;let o=s?new y(s):void 0;if(this.decodo.push({domain:i,isGlobal:a,adaptar:new A(t),pqueue:o}),o&&this.onProviderAdded)this.onProviderAdded(o)}}_addStealths(e){if(!e||!e.enable)return;for(let r of e.profiles){let{domain:i,isGlobal:a,stealth:t}=r;if(!i&&!a)continue;if(!t)continue;this.stealthProfiles.push({domain:i,isGlobal:a,adaptar:new D({stealth:t})})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addProxies(e){return this._addProxies({enable:!0,proxies:e}),this}getProxyConfig(e,r,i){let a=this.selectConfigEntry(e,this.proxies,r,i);if(!a)return null;return{proxy:a.proxy,rotating:!!a.rotating}}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}addStealth(e){return this._addStealths({enable:!0,profiles:[e]}),this}destroyLimiters(){for(let e of this.limiters)l.destroyQueue(e.pqueue);this.limiters=[]}destroyProviderQueues(){for(let e of this.oxylabs)l.destroyQueue(e.pqueue),delete e.pqueue;for(let e of this.decodo)l.destroyQueue(e.pqueue),delete e.pqueue}getLimiters(){return this.limiters}getRandomDelay(e,r){if(!this.getDomainName(e))return;for(let a of this.limiters)if(this._hasDomain(e,a.domain)&&a.randomDelay!==void 0)return a.randomDelay;if(r){for(let a of this.limiters)if(a.isGlobal&&a.randomDelay!==void 0)return a.randomDelay}return}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs)){let e=l.splitConfigs(this.oxylabs,(r)=>!!r.isGlobal);l.destroyConfigQueues(e.removed),this.oxylabs=e.kept}if(Array.isArray(this.decodo)){let e=l.splitConfigs(this.decodo,(r)=>!!r.isGlobal);l.destroyConfigQueues(e.removed),this.decodo=e.kept}if(Array.isArray(this.limiters)){let e=l.splitConfigs(this.limiters,(r)=>!!r.isGlobal);l.destroyConfigQueues(e.removed),this.limiters=e.kept}if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);if(Array.isArray(this.stealthProfiles))this.stealthProfiles=this.stealthProfiles.filter((e)=>!e.isGlobal);return this}getAdapter(e,r,i,a){if(!this.getDomainName(e))return null;let s=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:r==="stealth"?this.stealthProfiles:this.proxies,o=this.selectConfigEntry(e,s,i,a);if(!o)return null;return r==="headers"?o.headers:r==="limiters"?o.pqueue:r==="oxylabs"?o.adaptar:r==="decodo"?o.adaptar:r==="stealth"?o.adaptar:o.proxy}getRetryOptions(e){if(!this.getDomainName(e))return null;for(let i=0;i<this.limiters.length;i++)if(this._hasDomain(e,this.limiters[i].domain))return this.limiters[i].retry||null;for(let i=0;i<this.limiters.length;i++)if(this.limiters[i].isGlobal&&this.limiters[i].retry)return this.limiters[i].retry;return null}rnd(e=0,r=Number.MAX_VALUE){return Math.floor(Math.random()*(r-e+1))+e}hasDomain(e,r,i){if(!this.getDomainName(e))return!1;let t=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:r==="stealth"?this.stealthProfiles:this.proxies;for(let s=0;s<t.length;s++)if(this._hasDomain(e,t[s].domain))return!0;if(i){for(let s=0;s<t.length;s++)if(t[s].isGlobal)return!0}return!1}pickHeaders(e,r,i,a){let t=this.getAdapter(e,"headers",r),s=new Headers(t??{}),o=[...s.keys()].length;if(i&&i instanceof Headers)for(let[d,n]of Object.entries(i.entries()))s.set(d,n);else if(i&&typeof i==="object"){for(let[d,n]of Object.entries(i))if(typeof n==="string")s.set(d,n)}if(a&&o===0&&!this.stealth)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,r){if(!r)return!1;let i=this.getDomainName(e);if(!i)return!1;let a=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},t=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let o=s.toString().trim();if(i.toLowerCase()===o.toLowerCase())return!0;if(o.includes("*")){let h=o.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),u=new RegExp(`^${h}$`,"i");return u.test(i)||u.test(e)}if(a(o))try{let h=o,u="i",c=o.match(/^\/(.*)\/(\w*)$/);if(c)h=c[1],u=c[2]||"i";let m=new RegExp(h,u);return m.test(i)||m.test(e)}catch(h){return i.toLowerCase().includes(o.toLowerCase())}let d=i.toLowerCase(),n=o.toLowerCase();return d===n||d.endsWith("."+n)||n.endsWith("."+d)};if(Array.isArray(r)){for(let s of r)if(t(s))return!0;return!1}return t(r)}selectConfigEntry(e,r,i,a){if(!this.getDomainName(e))return null;let s=(n)=>{if(!n.length)return null;let h=a?this.rnd(0,n.length-1):0;return n[h]},o=[];for(let n of r)if(this._hasDomain(e,n.domain))o.push(n);let d=s(o);if(d)return d;if(i){let n=[];for(let h of r)if(h.isGlobal)n.push(h);return s(n)}return null}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let r=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),r.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let r=new URL(e);if(!r.protocol||!["http:","https:"].includes(r.protocol.toLowerCase()))return!1;if(!r.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(r.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function $(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],r=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let a=0;a<200;a++){let t=e[Math.floor(Math.random()*e.length)],s=r[Math.floor(Math.random()*r.length)],o="";switch(t.name){case"Chrome":o=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":o=`Mozilla/5.0 (${s}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":o=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":o=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":o=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":o=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":o=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":o=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":o=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":o=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}i.push(o)}return i}export{l as CrawlerOptions};
@@ -927,6 +927,18 @@ class Crawler {
927
927
  }
928
928
  }
929
929
  }
930
+ dispatchErrorEvents(err) {
931
+ for (let i = 0;i < this.errorEvents.length; i++) {
932
+ const event = this.errorEvents[i];
933
+ try {
934
+ this[event.handler](...event.attr, err);
935
+ } catch (handlerErr) {
936
+ if (this.config.debug) {
937
+ console.error("[Crawler] error-event handler threw:", handlerErr);
938
+ }
939
+ }
940
+ }
941
+ }
930
942
  _runHandler(handler, arg) {
931
943
  this.eventCount++;
932
944
  new Promise(async (resolve) => {
@@ -1410,8 +1422,10 @@ class Crawler {
1410
1422
  await this.waitForNavigationHistory();
1411
1423
  await this.executeHttp(method, url, body, options, proxyRotating, isEmail, forceRevisit, oxylabsOptions, oxylabsInstance, oxylabsQueue, decodoInstance, decodoOptions, decodoQueue, 0, undefined, skipCache, emailMetadata, stealthInstance);
1412
1424
  }).catch((err) => {
1413
- if (this.config.debug)
1414
- console.warn("[Crawler] execute() task error:", err?.message);
1425
+ console.error(`[Crawler] execute() task error for ${url}:`, err?.message ?? err);
1426
+ if (this.config.debug && err?.stack)
1427
+ console.error(err.stack);
1428
+ this.dispatchErrorEvents(err);
1415
1429
  });
1416
1430
  }
1417
1431
  async execute2(method, url, body, options = {}, forceRevisit, emailMetadata) {
@@ -1436,8 +1450,10 @@ class Crawler {
1436
1450
  emailMetadata
1437
1451
  }, forceRevisit, true);
1438
1452
  }).catch((err) => {
1439
- if (this.config.debug)
1440
- console.warn("[Crawler] execute2() task error:", err?.message);
1453
+ console.error(`[Crawler] execute2() task error for ${url}:`, err?.message ?? err);
1454
+ if (this.config.debug && err?.stack)
1455
+ console.error(err.stack);
1456
+ this.dispatchErrorEvents(err);
1441
1457
  });
1442
1458
  }
1443
1459
  async executeHttp(method, url, body, options = {}, proxyRotating = false, isEmail, forceRevisit, oxylabsOptions, oxylabsInstance, oxylabsQueue, decodoInstance, decodoOptions, decodoQueue, retryCount = 0, parentUrl, skipCache, emailMetadata, stealthInstance) {
@@ -1726,6 +1742,10 @@ class Crawler {
1726
1742
  this.rawResponseEvents.length = 0;
1727
1743
  this.emailDiscoveredEvents.length = 0;
1728
1744
  this.emailLeadsEvents.length = 0;
1745
+ this.startHandlers.length = 0;
1746
+ this.finishHandlers.length = 0;
1747
+ this.redirectHandlers.length = 0;
1748
+ this.queueChangeHandlers.length = 0;
1729
1749
  this.domainResponseTimes.clear();
1730
1750
  this.domainCurrentDelay.clear();
1731
1751
  this.urlDepthMap.clear();
@@ -927,6 +927,18 @@ export class Crawler {
927
927
  }
928
928
  }
929
929
  }
930
+ dispatchErrorEvents(err) {
931
+ for (let i = 0;i < this.errorEvents.length; i++) {
932
+ const event = this.errorEvents[i];
933
+ try {
934
+ this[event.handler](...event.attr, err);
935
+ } catch (handlerErr) {
936
+ if (this.config.debug) {
937
+ console.error("[Crawler] error-event handler threw:", handlerErr);
938
+ }
939
+ }
940
+ }
941
+ }
930
942
  _runHandler(handler, arg) {
931
943
  this.eventCount++;
932
944
  new Promise(async (resolve) => {
@@ -1410,8 +1422,10 @@ export class Crawler {
1410
1422
  await this.waitForNavigationHistory();
1411
1423
  await this.executeHttp(method, url, body, options, proxyRotating, isEmail, forceRevisit, oxylabsOptions, oxylabsInstance, oxylabsQueue, decodoInstance, decodoOptions, decodoQueue, 0, undefined, skipCache, emailMetadata, stealthInstance);
1412
1424
  }).catch((err) => {
1413
- if (this.config.debug)
1414
- console.warn("[Crawler] execute() task error:", err?.message);
1425
+ console.error(`[Crawler] execute() task error for ${url}:`, err?.message ?? err);
1426
+ if (this.config.debug && err?.stack)
1427
+ console.error(err.stack);
1428
+ this.dispatchErrorEvents(err);
1415
1429
  });
1416
1430
  }
1417
1431
  async execute2(method, url, body, options = {}, forceRevisit, emailMetadata) {
@@ -1436,8 +1450,10 @@ export class Crawler {
1436
1450
  emailMetadata
1437
1451
  }, forceRevisit, true);
1438
1452
  }).catch((err) => {
1439
- if (this.config.debug)
1440
- console.warn("[Crawler] execute2() task error:", err?.message);
1453
+ console.error(`[Crawler] execute2() task error for ${url}:`, err?.message ?? err);
1454
+ if (this.config.debug && err?.stack)
1455
+ console.error(err.stack);
1456
+ this.dispatchErrorEvents(err);
1441
1457
  });
1442
1458
  }
1443
1459
  async executeHttp(method, url, body, options = {}, proxyRotating = false, isEmail, forceRevisit, oxylabsOptions, oxylabsInstance, oxylabsQueue, decodoInstance, decodoOptions, decodoQueue, retryCount = 0, parentUrl, skipCache, emailMetadata, stealthInstance) {
@@ -1726,6 +1742,10 @@ export class Crawler {
1726
1742
  this.rawResponseEvents.length = 0;
1727
1743
  this.emailDiscoveredEvents.length = 0;
1728
1744
  this.emailLeadsEvents.length = 0;
1745
+ this.startHandlers.length = 0;
1746
+ this.finishHandlers.length = 0;
1747
+ this.redirectHandlers.length = 0;
1748
+ this.queueChangeHandlers.length = 0;
1729
1749
  this.domainResponseTimes.clear();
1730
1750
  this.domainCurrentDelay.clear();
1731
1751
  this.urlDepthMap.clear();
@@ -1,42 +1,42 @@
1
- const _mod_22900y = require('./crawler.cjs');
2
- exports.Crawler = _mod_22900y.Crawler;;
3
- const _mod_22ubyp = require('./crawler-options.cjs');
4
- exports.CrawlerOptions = _mod_22ubyp.CrawlerOptions;;
5
- const _mod_h84o2j = require('./plugin/robots-txt.cjs');
6
- exports.RobotsTxt = _mod_h84o2j.RobotsTxt;;
7
- const _mod_jjmsn2 = require('./plugin/file-cacher.cjs');
8
- exports.FileCacher = _mod_jjmsn2.FileCacher;;
9
- const _mod_538aj2 = require('./plugin/url-store.cjs');
10
- exports.UrlStore = _mod_538aj2.UrlStore;;
11
- const _mod_0mnqbz = require('./plugin/navigation-history.cjs');
12
- exports.NavigationHistory = _mod_0mnqbz.NavigationHistory;;
13
- const _mod_euwe7r = require('./addon/oxylabs/index.cjs');
14
- exports.Oxylabs = _mod_euwe7r.Oxylabs;;
15
- const _mod_gp9gpd = require('./addon/oxylabs/options.cjs');
16
- exports.OXYLABS_BROWSER_TYPES = _mod_gp9gpd.OXYLABS_BROWSER_TYPES;
17
- exports.OXYLABS_COMMON_LOCALES = _mod_gp9gpd.OXYLABS_COMMON_LOCALES;
18
- exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_gp9gpd.OXYLABS_COMMON_GEO_LOCATIONS;
19
- exports.OXYLABS_US_STATES = _mod_gp9gpd.OXYLABS_US_STATES;
20
- exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_gp9gpd.OXYLABS_EUROPEAN_COUNTRIES;
21
- exports.OXYLABS_ASIAN_COUNTRIES = _mod_gp9gpd.OXYLABS_ASIAN_COUNTRIES;
22
- exports.getRandomOxylabsBrowserType = _mod_gp9gpd.getRandomBrowserType;
23
- exports.getRandomOxylabsLocale = _mod_gp9gpd.getRandomLocale;
24
- exports.getRandomOxylabsGeoLocation = _mod_gp9gpd.getRandomGeoLocation;;
25
- const _mod_d6ta58 = require('./scraper.cjs');
26
- exports.isRestrictedDomain = _mod_d6ta58.isRestrictedDomain;;
27
- const _mod_sdriey = require('./addon/decodo/index.cjs');
28
- exports.Decodo = _mod_sdriey.Decodo;;
29
- const _mod_udz2vx = require('./addon/decodo/options.cjs');
30
- exports.DECODO_DEVICE_TYPES = _mod_udz2vx.DECODO_DEVICE_TYPES;
31
- exports.DECODO_HEADLESS_MODES = _mod_udz2vx.DECODO_HEADLESS_MODES;
32
- exports.DECODO_COMMON_LOCALES = _mod_udz2vx.DECODO_COMMON_LOCALES;
33
- exports.DECODO_COMMON_COUNTRIES = _mod_udz2vx.DECODO_COMMON_COUNTRIES;
34
- exports.DECODO_EUROPEAN_COUNTRIES = _mod_udz2vx.DECODO_EUROPEAN_COUNTRIES;
35
- exports.DECODO_ASIAN_COUNTRIES = _mod_udz2vx.DECODO_ASIAN_COUNTRIES;
36
- exports.DECODO_US_STATES = _mod_udz2vx.DECODO_US_STATES;
37
- exports.DECODO_COMMON_CITIES = _mod_udz2vx.DECODO_COMMON_CITIES;
38
- exports.getRandomDecodoDeviceType = _mod_udz2vx.getRandomDeviceType;
39
- exports.getRandomDecodoLocale = _mod_udz2vx.getRandomLocale;
40
- exports.getRandomDecodoCountry = _mod_udz2vx.getRandomCountry;
41
- exports.getRandomDecodoCity = _mod_udz2vx.getRandomCity;
42
- exports.generateDecodoSessionId = _mod_udz2vx.generateSessionId;;
1
+ const _mod_0d9h80 = require('./crawler.cjs');
2
+ exports.Crawler = _mod_0d9h80.Crawler;;
3
+ const _mod_flsl96 = require('./crawler-options.cjs');
4
+ exports.CrawlerOptions = _mod_flsl96.CrawlerOptions;;
5
+ const _mod_hshb8k = require('./plugin/robots-txt.cjs');
6
+ exports.RobotsTxt = _mod_hshb8k.RobotsTxt;;
7
+ const _mod_qmo1zd = require('./plugin/file-cacher.cjs');
8
+ exports.FileCacher = _mod_qmo1zd.FileCacher;;
9
+ const _mod_lktnzz = require('./plugin/url-store.cjs');
10
+ exports.UrlStore = _mod_lktnzz.UrlStore;;
11
+ const _mod_g62vuv = require('./plugin/navigation-history.cjs');
12
+ exports.NavigationHistory = _mod_g62vuv.NavigationHistory;;
13
+ const _mod_dfvupb = require('./addon/oxylabs/index.cjs');
14
+ exports.Oxylabs = _mod_dfvupb.Oxylabs;;
15
+ const _mod_lffstj = require('./addon/oxylabs/options.cjs');
16
+ exports.OXYLABS_BROWSER_TYPES = _mod_lffstj.OXYLABS_BROWSER_TYPES;
17
+ exports.OXYLABS_COMMON_LOCALES = _mod_lffstj.OXYLABS_COMMON_LOCALES;
18
+ exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_lffstj.OXYLABS_COMMON_GEO_LOCATIONS;
19
+ exports.OXYLABS_US_STATES = _mod_lffstj.OXYLABS_US_STATES;
20
+ exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_lffstj.OXYLABS_EUROPEAN_COUNTRIES;
21
+ exports.OXYLABS_ASIAN_COUNTRIES = _mod_lffstj.OXYLABS_ASIAN_COUNTRIES;
22
+ exports.getRandomOxylabsBrowserType = _mod_lffstj.getRandomBrowserType;
23
+ exports.getRandomOxylabsLocale = _mod_lffstj.getRandomLocale;
24
+ exports.getRandomOxylabsGeoLocation = _mod_lffstj.getRandomGeoLocation;;
25
+ const _mod_6v3fut = require('./scraper.cjs');
26
+ exports.isRestrictedDomain = _mod_6v3fut.isRestrictedDomain;;
27
+ const _mod_jcv8ev = require('./addon/decodo/index.cjs');
28
+ exports.Decodo = _mod_jcv8ev.Decodo;;
29
+ const _mod_quo7cv = require('./addon/decodo/options.cjs');
30
+ exports.DECODO_DEVICE_TYPES = _mod_quo7cv.DECODO_DEVICE_TYPES;
31
+ exports.DECODO_HEADLESS_MODES = _mod_quo7cv.DECODO_HEADLESS_MODES;
32
+ exports.DECODO_COMMON_LOCALES = _mod_quo7cv.DECODO_COMMON_LOCALES;
33
+ exports.DECODO_COMMON_COUNTRIES = _mod_quo7cv.DECODO_COMMON_COUNTRIES;
34
+ exports.DECODO_EUROPEAN_COUNTRIES = _mod_quo7cv.DECODO_EUROPEAN_COUNTRIES;
35
+ exports.DECODO_ASIAN_COUNTRIES = _mod_quo7cv.DECODO_ASIAN_COUNTRIES;
36
+ exports.DECODO_US_STATES = _mod_quo7cv.DECODO_US_STATES;
37
+ exports.DECODO_COMMON_CITIES = _mod_quo7cv.DECODO_COMMON_CITIES;
38
+ exports.getRandomDecodoDeviceType = _mod_quo7cv.getRandomDeviceType;
39
+ exports.getRandomDecodoLocale = _mod_quo7cv.getRandomLocale;
40
+ exports.getRandomDecodoCountry = _mod_quo7cv.getRandomCountry;
41
+ exports.getRandomDecodoCity = _mod_quo7cv.getRandomCity;
42
+ exports.generateDecodoSessionId = _mod_quo7cv.generateSessionId;;
@@ -1 +1 @@
1
- var e=require("./file-cacher.cjs");exports.FileCacher=e.FileCacher;var r=require("./url-store.cjs");exports.UrlStore=r.UrlStore;var o=require("./result-stream.cjs");exports.ResultStream=o.ResultStream;var t=require("./memory-monitor.cjs");exports.MemoryMonitor=t.MemoryMonitor;var a=require("./health-metrics.cjs");exports.HealthMetrics=a.HealthMetrics;var i=require("./capped-map.cjs");exports.CappedMap=i.CappedMap;var m=require("./capped-array.cjs");exports.CappedArray=m.CappedArray;
1
+ var e=require("./file-cacher.cjs");exports.FileCacher=e.FileCacher;var r=require("./url-store.cjs");exports.UrlStore=r.UrlStore;var o=require("./result-stream.cjs");exports.ResultStream=o.ResultStream;var t=require("./memory-monitor.cjs");exports.MemoryMonitor=t.MemoryMonitor;var a=require("./health-metrics.cjs");exports.HealthMetrics=a.HealthMetrics;var i=require("./capped-map.cjs");exports.CappedMap=i.CappedMap;var d=require("./capped-array.cjs");exports.CappedArray=d.CappedArray;
@@ -1 +1 @@
1
- var y=exports.isBun=typeof globalThis.Bun<"u",h={maxRetries:5,initialDelayMs:50,maxDelayMs:1000};function f(s){if(!s)return!1;let t=s.message||s.toString();return t.includes("SQLITE_BUSY")||t.includes("database is locked")||t.includes("database table is locked")}function d(s){return new Promise((t)=>setTimeout(t,s))}function u(s,t={}){let r={...h,...t},l;for(let n=0;n<=r.maxRetries;n++)try{return s()}catch(i){if(l=i,!f(i)||n>=r.maxRetries)throw i;let e=Math.min(r.initialDelayMs*Math.pow(2,n)+Math.random()*50,r.maxDelayMs),a=Date.now();while(Date.now()-a<e);}throw l}async function g(s,t={}){let r={...h,...t},l;for(let n=0;n<=r.maxRetries;n++)try{return await s()}catch(i){if(l=i,!f(i)||n>=r.maxRetries)throw i;let e=Math.min(r.initialDelayMs*Math.pow(2,n)+Math.random()*50,r.maxDelayMs);await d(e)}throw l}async function M(s,t={}){if(y){let{Database:e}=await import("bun:sqlite"),a=new e(s),m=new Map,w=(c)=>{let o=m.get(c);if(!o)o=a.query(c),m.set(c,o);return o};return{run:(c,...o)=>u(()=>{if(o.length===0)a.run(c);else w(c).run(...o)},t),get:(c,...o)=>u(()=>w(c).get(...o),t),all:(c,...o)=>u(()=>w(c).all(...o),t),exec:(c)=>u(()=>a.exec(c),t),close:()=>{m.clear(),a.close()}}}let{DatabaseSync:r}=await import("node:sqlite"),l=new r(s),n=new Map,i=(e)=>{let a=n.get(e);if(!a)a=l.prepare(e),n.set(e,a);return a};return{run:(e,...a)=>u(()=>{if(a.length===0)l.exec(e);else i(e).run(...a)},t),get:(e,...a)=>u(()=>{return i(e).get(...a)},t),all:(e,...a)=>u(()=>{return i(e).all(...a)},t),exec:(e)=>u(()=>l.exec(e),t),close:()=>{n.clear(),l.close()}}}exports.withRetrySync=u;exports.withRetryAsync=g;exports.createDatabase=M;
1
+ var d=exports.isBun=typeof globalThis.Bun<"u",f={maxRetries:5,initialDelayMs:50,maxDelayMs:1000};function y(a){if(!a)return!1;let e=a.message||a.toString();return e.includes("SQLITE_BUSY")||e.includes("database is locked")||e.includes("database table is locked")}function g(a){return new Promise((e)=>setTimeout(e,a))}function M(a){if(a<=0)return;try{let e=m??(m=new Int32Array(new SharedArrayBuffer(4)));Atomics.wait(e,0,0,a)}catch{let e=Date.now();while(Date.now()-e<a);}}var m=null;function u(a,e={}){let c={...f,...e},o;for(let r=0;r<=c.maxRetries;r++)try{return a()}catch(i){if(o=i,!y(i)||r>=c.maxRetries)throw i;let t=Math.min(c.initialDelayMs*Math.pow(2,r)+Math.random()*50,c.maxDelayMs);M(t)}throw o}async function x(a,e={}){let c={...f,...e},o;for(let r=0;r<=c.maxRetries;r++)try{return await a()}catch(i){if(o=i,!y(i)||r>=c.maxRetries)throw i;let t=Math.min(c.initialDelayMs*Math.pow(2,r)+Math.random()*50,c.maxDelayMs);await g(t)}throw o}async function D(a,e={}){if(d){let{Database:t}=await import("bun:sqlite"),n=new t(a),w=new Map,h=(s)=>{let l=w.get(s);if(!l)l=n.query(s),w.set(s,l);return l};return{run:(s,...l)=>u(()=>{if(l.length===0)n.run(s);else h(s).run(...l)},e),get:(s,...l)=>u(()=>h(s).get(...l),e),all:(s,...l)=>u(()=>h(s).all(...l),e),exec:(s)=>u(()=>n.exec(s),e),close:()=>{w.clear(),n.close()}}}let{DatabaseSync:c}=await import("node:sqlite"),o=new c(a),r=new Map,i=(t)=>{let n=r.get(t);if(!n)n=o.prepare(t),r.set(t,n);return n};return{run:(t,...n)=>u(()=>{if(n.length===0)o.exec(t);else i(t).run(...n)},e),get:(t,...n)=>u(()=>{return i(t).get(...n)},e),all:(t,...n)=>u(()=>{return i(t).all(...n)},e),exec:(t)=>u(()=>o.exec(t),e),close:()=>{r.clear(),o.close()}}}exports.withRetrySync=u;exports.withRetryAsync=x;exports.createDatabase=D;
@@ -1 +1 @@
1
- import{createRequire as d}from"node:module";var h=d(import.meta.url);var g=typeof globalThis.Bun<"u",f={maxRetries:5,initialDelayMs:50,maxDelayMs:1000};function x(s){if(!s)return!1;let t=s.message||s.toString();return t.includes("SQLITE_BUSY")||t.includes("database is locked")||t.includes("database table is locked")}function M(s){return new Promise((t)=>setTimeout(t,s))}function u(s,t={}){let c={...f,...t},l;for(let n=0;n<=c.maxRetries;n++)try{return s()}catch(o){if(l=o,!x(o)||n>=c.maxRetries)throw o;let e=Math.min(c.initialDelayMs*Math.pow(2,n)+Math.random()*50,c.maxDelayMs),a=Date.now();while(Date.now()-a<e);}throw l}async function p(s,t={}){let c={...f,...t},l;for(let n=0;n<=c.maxRetries;n++)try{return await s()}catch(o){if(l=o,!x(o)||n>=c.maxRetries)throw o;let e=Math.min(c.initialDelayMs*Math.pow(2,n)+Math.random()*50,c.maxDelayMs);await M(e)}throw l}async function R(s,t={}){if(g){let{Database:e}=await import("bun:sqlite"),a=new e(s),m=new Map,w=(r)=>{let i=m.get(r);if(!i)i=a.query(r),m.set(r,i);return i};return{run:(r,...i)=>u(()=>{if(i.length===0)a.run(r);else w(r).run(...i)},t),get:(r,...i)=>u(()=>w(r).get(...i),t),all:(r,...i)=>u(()=>w(r).all(...i),t),exec:(r)=>u(()=>a.exec(r),t),close:()=>{m.clear(),a.close()}}}let{DatabaseSync:c}=await import("node:sqlite"),l=new c(s),n=new Map,o=(e)=>{let a=n.get(e);if(!a)a=l.prepare(e),n.set(e,a);return a};return{run:(e,...a)=>u(()=>{if(a.length===0)l.exec(e);else o(e).run(...a)},t),get:(e,...a)=>u(()=>{return o(e).get(...a)},t),all:(e,...a)=>u(()=>{return o(e).all(...a)},t),exec:(e)=>u(()=>l.exec(e),t),close:()=>{n.clear(),l.close()}}}export{u as withRetrySync,p as withRetryAsync,g as isBun,R as createDatabase};
1
+ import{createRequire as d}from"node:module";var f=d(import.meta.url);var g=typeof globalThis.Bun<"u",x={maxRetries:5,initialDelayMs:50,maxDelayMs:1000};function y(a){if(!a)return!1;let e=a.message||a.toString();return e.includes("SQLITE_BUSY")||e.includes("database is locked")||e.includes("database table is locked")}function M(a){return new Promise((e)=>setTimeout(e,a))}function D(a){if(a<=0)return;try{let e=h??(h=new Int32Array(new SharedArrayBuffer(4)));Atomics.wait(e,0,0,a)}catch{let e=Date.now();while(Date.now()-e<a);}}var h=null;function u(a,e={}){let c={...x,...e},l;for(let r=0;r<=c.maxRetries;r++)try{return a()}catch(o){if(l=o,!y(o)||r>=c.maxRetries)throw o;let t=Math.min(c.initialDelayMs*Math.pow(2,r)+Math.random()*50,c.maxDelayMs);D(t)}throw l}async function R(a,e={}){let c={...x,...e},l;for(let r=0;r<=c.maxRetries;r++)try{return await a()}catch(o){if(l=o,!y(o)||r>=c.maxRetries)throw o;let t=Math.min(c.initialDelayMs*Math.pow(2,r)+Math.random()*50,c.maxDelayMs);await M(t)}throw l}async function b(a,e={}){if(g){let{Database:t}=await import("bun:sqlite"),n=new t(a),w=new Map,m=(s)=>{let i=w.get(s);if(!i)i=n.query(s),w.set(s,i);return i};return{run:(s,...i)=>u(()=>{if(i.length===0)n.run(s);else m(s).run(...i)},e),get:(s,...i)=>u(()=>m(s).get(...i),e),all:(s,...i)=>u(()=>m(s).all(...i),e),exec:(s)=>u(()=>n.exec(s),e),close:()=>{w.clear(),n.close()}}}let{DatabaseSync:c}=await import("node:sqlite"),l=new c(a),r=new Map,o=(t)=>{let n=r.get(t);if(!n)n=l.prepare(t),r.set(t,n);return n};return{run:(t,...n)=>u(()=>{if(n.length===0)l.exec(t);else o(t).run(...n)},e),get:(t,...n)=>u(()=>{return o(t).get(...n)},e),all:(t,...n)=>u(()=>{return o(t).all(...n)},e),exec:(t)=>u(()=>l.exec(t),e),close:()=>{r.clear(),l.close()}}}export{u as withRetrySync,R as withRetryAsync,g as isBun,b as createDatabase};
package/dist/crawler.d.ts CHANGED
@@ -9372,6 +9372,13 @@ export declare class Crawler {
9372
9372
  * Called when onQueueChange handlers are registered to ensure limiter events are captured.
9373
9373
  */
9374
9374
  private _subscribeToManagedQueues;
9375
+ /**
9376
+ * Invokes every registered error-event handler with the given error.
9377
+ * Swallows per-handler throws so one misbehaving handler can't mask others.
9378
+ * Used from the fire-and-forget execute/execute2 catch blocks so those
9379
+ * errors reach the same handler chain as in-crawl errors.
9380
+ */
9381
+ private dispatchErrorEvents;
9375
9382
  /**
9376
9383
  * Run a handler with event tracking (not through queue).
9377
9384
  * Increments eventCount before running, decrements after completion.
@@ -1,23 +1 @@
1
- let _linkedom = null;
2
- function getLinkedom() {
3
- if (_linkedom)
4
- return _linkedom;
5
- try {
6
- _linkedom = require("linkedom");
7
- return _linkedom;
8
- } catch {
9
- throw new Error(`linkedom is required for DOM parsing but is not installed.
10
- ` + `Install it with: npm install linkedom
11
- ` + "Or: bun add linkedom");
12
- }
13
- }
14
- function parseHTML(html) {
15
- return getLinkedom().parseHTML(html);
16
- }
17
- function createDOMParser() {
18
- const Linkedom = getLinkedom();
19
- return new Linkedom.DOMParser;
20
- }
21
-
22
- exports.parseHTML = parseHTML;
23
- exports.createDOMParser = createDOMParser;
1
+ Object.assign(exports, require("linkedom"));