rezo 1.0.102 → 1.0.103
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/entries/curl.d.ts +1 -1
- package/dist/adapters/entries/fetch.d.ts +1 -1
- package/dist/adapters/entries/http.d.ts +2 -2
- package/dist/adapters/entries/http2.d.ts +2 -2
- package/dist/adapters/entries/react-native.d.ts +1 -1
- package/dist/adapters/entries/xhr.d.ts +1 -1
- package/dist/adapters/index.cjs +6 -6
- package/dist/cache/index.cjs +9 -9
- package/dist/cookies/cookie-jar.cjs +4 -4
- package/dist/cookies/index.cjs +12 -12
- package/dist/crawler/crawler-options.cjs +1 -1
- package/dist/crawler/crawler-options.js +1 -1
- package/dist/crawler/crawler.cjs +44 -6
- package/dist/crawler/crawler.js +44 -6
- package/dist/crawler/index.cjs +42 -42
- package/dist/crawler/plugin/index.cjs +1 -1
- package/dist/crawler.d.ts +35 -2
- package/dist/entries/crawler.cjs +6 -6
- package/dist/index.cjs +41 -41
- package/dist/index.d.ts +1 -1
- package/dist/internal/agents/index.cjs +14 -14
- package/dist/platform/browser.d.ts +1 -1
- package/dist/platform/bun.d.ts +1 -1
- package/dist/platform/deno.d.ts +1 -1
- package/dist/platform/node.d.ts +1 -1
- package/dist/platform/react-native.d.ts +1 -1
- package/dist/platform/worker.d.ts +1 -1
- package/dist/proxy/index.cjs +4 -4
- package/dist/queue/index.cjs +9 -9
- package/dist/responses/universal/index.cjs +11 -11
- package/dist/stealth/index.cjs +17 -17
- package/dist/stealth/profiles/index.cjs +10 -10
- package/dist/version.cjs +1 -1
- package/dist/version.js +1 -1
- package/dist/wget/index.cjs +51 -51
- package/package.json +1 -1
|
@@ -5514,7 +5514,7 @@ export interface RezoInstance extends Rezo, RezoCallable {
|
|
|
5514
5514
|
*
|
|
5515
5515
|
* IMPORTANT: Update these values when bumping package version.
|
|
5516
5516
|
*/
|
|
5517
|
-
export declare const VERSION = "1.0.
|
|
5517
|
+
export declare const VERSION = "1.0.103";
|
|
5518
5518
|
/**
|
|
5519
5519
|
* cURL Options Configuration
|
|
5520
5520
|
*
|
|
@@ -5514,7 +5514,7 @@ export interface RezoInstance extends Rezo, RezoCallable {
|
|
|
5514
5514
|
*
|
|
5515
5515
|
* IMPORTANT: Update these values when bumping package version.
|
|
5516
5516
|
*/
|
|
5517
|
-
export declare const VERSION = "1.0.
|
|
5517
|
+
export declare const VERSION = "1.0.103";
|
|
5518
5518
|
export declare const isRezoError: typeof RezoError.isRezoError;
|
|
5519
5519
|
export declare const Cancel: typeof RezoError;
|
|
5520
5520
|
export declare const CancelToken: {
|
|
@@ -2664,7 +2664,7 @@ export interface ResolvedStealthProfile {
|
|
|
2664
2664
|
/** Navigator properties for JS environment emulation */
|
|
2665
2665
|
navigator: BrowserProfile["navigator"];
|
|
2666
2666
|
}
|
|
2667
|
-
declare class RezoStealth {
|
|
2667
|
+
export declare class RezoStealth {
|
|
2668
2668
|
private readonly _input;
|
|
2669
2669
|
private _resolved;
|
|
2670
2670
|
/** True when constructed with no args — profile will be detected from request headers */
|
|
@@ -5514,7 +5514,7 @@ export interface RezoInstance extends Rezo, RezoCallable {
|
|
|
5514
5514
|
*
|
|
5515
5515
|
* IMPORTANT: Update these values when bumping package version.
|
|
5516
5516
|
*/
|
|
5517
|
-
export declare const VERSION = "1.0.
|
|
5517
|
+
export declare const VERSION = "1.0.103";
|
|
5518
5518
|
/**
|
|
5519
5519
|
* Type guard to check if an error is a RezoError instance.
|
|
5520
5520
|
*/
|
|
@@ -2664,7 +2664,7 @@ export interface ResolvedStealthProfile {
|
|
|
2664
2664
|
/** Navigator properties for JS environment emulation */
|
|
2665
2665
|
navigator: BrowserProfile["navigator"];
|
|
2666
2666
|
}
|
|
2667
|
-
declare class RezoStealth {
|
|
2667
|
+
export declare class RezoStealth {
|
|
2668
2668
|
private readonly _input;
|
|
2669
2669
|
private _resolved;
|
|
2670
2670
|
/** True when constructed with no args — profile will be detected from request headers */
|
|
@@ -5514,7 +5514,7 @@ export interface RezoInstance extends Rezo, RezoCallable {
|
|
|
5514
5514
|
*
|
|
5515
5515
|
* IMPORTANT: Update these values when bumping package version.
|
|
5516
5516
|
*/
|
|
5517
|
-
export declare const VERSION = "1.0.
|
|
5517
|
+
export declare const VERSION = "1.0.103";
|
|
5518
5518
|
export declare const isRezoError: typeof RezoError.isRezoError;
|
|
5519
5519
|
export declare const Cancel: typeof RezoError;
|
|
5520
5520
|
export declare const CancelToken: {
|
|
@@ -5514,7 +5514,7 @@ export interface RezoInstance extends Rezo, RezoCallable {
|
|
|
5514
5514
|
*
|
|
5515
5515
|
* IMPORTANT: Update these values when bumping package version.
|
|
5516
5516
|
*/
|
|
5517
|
-
export declare const VERSION = "1.0.
|
|
5517
|
+
export declare const VERSION = "1.0.103";
|
|
5518
5518
|
export declare const isRezoError: typeof RezoError.isRezoError;
|
|
5519
5519
|
export declare const Cancel: typeof RezoError;
|
|
5520
5520
|
export declare const CancelToken: {
|
|
@@ -5514,7 +5514,7 @@ export interface RezoInstance extends Rezo, RezoCallable {
|
|
|
5514
5514
|
*
|
|
5515
5515
|
* IMPORTANT: Update these values when bumping package version.
|
|
5516
5516
|
*/
|
|
5517
|
-
export declare const VERSION = "1.0.
|
|
5517
|
+
export declare const VERSION = "1.0.103";
|
|
5518
5518
|
export declare const isRezoError: typeof RezoError.isRezoError;
|
|
5519
5519
|
export declare const Cancel: typeof RezoError;
|
|
5520
5520
|
export declare const CancelToken: {
|
package/dist/adapters/index.cjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.detectRuntime =
|
|
3
|
-
exports.getAdapterCapabilities =
|
|
4
|
-
exports.buildAdapterContext =
|
|
5
|
-
exports.getAvailableAdapters =
|
|
6
|
-
exports.selectAdapter =
|
|
1
|
+
const _mod_1q3b5r = require('./picker.cjs');
|
|
2
|
+
exports.detectRuntime = _mod_1q3b5r.detectRuntime;
|
|
3
|
+
exports.getAdapterCapabilities = _mod_1q3b5r.getAdapterCapabilities;
|
|
4
|
+
exports.buildAdapterContext = _mod_1q3b5r.buildAdapterContext;
|
|
5
|
+
exports.getAvailableAdapters = _mod_1q3b5r.getAvailableAdapters;
|
|
6
|
+
exports.selectAdapter = _mod_1q3b5r.selectAdapter;;
|
package/dist/cache/index.cjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.LRUCache =
|
|
3
|
-
const
|
|
4
|
-
exports.DNSCache =
|
|
5
|
-
exports.getGlobalDNSCache =
|
|
6
|
-
exports.resetGlobalDNSCache =
|
|
7
|
-
const
|
|
8
|
-
exports.ResponseCache =
|
|
9
|
-
exports.normalizeResponseCacheConfig =
|
|
1
|
+
const _mod_qju1ca = require('./lru-cache.cjs');
|
|
2
|
+
exports.LRUCache = _mod_qju1ca.LRUCache;;
|
|
3
|
+
const _mod_w5a8hm = require('./dns-cache.cjs');
|
|
4
|
+
exports.DNSCache = _mod_w5a8hm.DNSCache;
|
|
5
|
+
exports.getGlobalDNSCache = _mod_w5a8hm.getGlobalDNSCache;
|
|
6
|
+
exports.resetGlobalDNSCache = _mod_w5a8hm.resetGlobalDNSCache;;
|
|
7
|
+
const _mod_jl8e34 = require('./response-cache.cjs');
|
|
8
|
+
exports.ResponseCache = _mod_jl8e34.ResponseCache;
|
|
9
|
+
exports.normalizeResponseCacheConfig = _mod_jl8e34.normalizeResponseCacheConfig;;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
const { CookieJar: TouchCookieJar } = require("tough-cookie");
|
|
2
2
|
const { Cookie } = require('./cookie.cjs');
|
|
3
|
-
const
|
|
4
|
-
exports.Cookie =
|
|
3
|
+
const _mod_e2fve9 = require('./cookie.cjs');
|
|
4
|
+
exports.Cookie = _mod_e2fve9.Cookie;;
|
|
5
5
|
|
|
6
6
|
class RezoCookieJar extends TouchCookieJar {
|
|
7
7
|
constructor(store, options) {
|
|
@@ -469,7 +469,7 @@ class RezoCookieJar extends TouchCookieJar {
|
|
|
469
469
|
}
|
|
470
470
|
}
|
|
471
471
|
const CookieJar = exports.CookieJar = RezoCookieJar;
|
|
472
|
-
const
|
|
473
|
-
exports.Store =
|
|
472
|
+
const _mod_fmpdbk = require("tough-cookie");
|
|
473
|
+
exports.Store = _mod_fmpdbk.Store;;
|
|
474
474
|
|
|
475
475
|
exports.RezoCookieJar = RezoCookieJar;
|
package/dist/cookies/index.cjs
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Cookie =
|
|
3
|
-
exports.RezoCookie =
|
|
4
|
-
const
|
|
5
|
-
exports.RezoCookieStore =
|
|
6
|
-
const
|
|
7
|
-
exports.RezoFileCookieStore =
|
|
8
|
-
const
|
|
9
|
-
exports.RezoCookieJar =
|
|
10
|
-
exports.CookieJar =
|
|
11
|
-
const
|
|
12
|
-
exports.Store =
|
|
1
|
+
const _mod_pbjc3l = require('./cookie.cjs');
|
|
2
|
+
exports.Cookie = _mod_pbjc3l.Cookie;
|
|
3
|
+
exports.RezoCookie = _mod_pbjc3l.RezoCookie;;
|
|
4
|
+
const _mod_9t7p5v = require('./cookie-store.cjs');
|
|
5
|
+
exports.RezoCookieStore = _mod_9t7p5v.RezoCookieStore;;
|
|
6
|
+
const _mod_7nhc7y = require('./file-store.cjs');
|
|
7
|
+
exports.RezoFileCookieStore = _mod_7nhc7y.RezoFileCookieStore;;
|
|
8
|
+
const _mod_atrck9 = require('./cookie-jar.cjs');
|
|
9
|
+
exports.RezoCookieJar = _mod_atrck9.RezoCookieJar;
|
|
10
|
+
exports.CookieJar = _mod_atrck9.CookieJar;;
|
|
11
|
+
const _mod_fbhb47 = require("tough-cookie");
|
|
12
|
+
exports.Store = _mod_fbhb47.Store;;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
var{RezoQueue:m}=require("../queue/queue.cjs"),{Oxylabs:b}=require("./addon/oxylabs/index.cjs"),x=require("node:path"),g=require("node:os"),{Decodo:y}=require("./addon/decodo/index.cjs"),{Rezo:A}=require("../core/rezo.cjs");class f{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;enableSignalHandlers;concurrency;scraperConcurrency;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;stealth;oxylabs=[];decodo=[];stealthProfiles=[];proxies=[];limiters=[];onLimiterAdded;requestHeaders=[];userAgents=v();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??x.join(g.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.enableSignalHandlers=e.enableSignalHandlers??!1,this.concurrency=e.concurrency??100,this.scraperConcurrency=e.scraperConcurrency??this.concurrency,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter),this._addStealths(e.stealths),this.stealth=e.stealth}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:e==="stealth"?this.stealthProfiles:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,a,r)=>r.indexOf(i)===a)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.stealthProfiles=this.stealthProfiles.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((i,a)=>i===t[a]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((i)=>i.isGlobal).length,domainSpecific:t.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:i,isGlobal:a,headers:r}=t;if(!i&&!a)continue;if(r instanceof Headers){let s=Object.fromEntries(r.entries());if(Object.keys(s).length<1)continue;r=s}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:i,isGlobal:a,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:i,isGlobal:a,proxy:r}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:i,isGlobal:a,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:i,isGlobal:a,options:r}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;let s=new m(r);if(this.limiters.push({domain:i,isGlobal:a,pqueue:s,randomDelay:r.randomDelay}),this.onLimiterAdded)this.onLimiterAdded(s)}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:a,options:r,queueOptions:s}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:i,isGlobal:a,adaptar:new b(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:a,options:r,queueOptions:s}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:i,isGlobal:a,adaptar:new y(r)})}}_addStealths(e){if(!e||!e.enable)return;for(let t of e.profiles){let{domain:i,isGlobal:a,stealth:r}=t;if(!i&&!a)continue;if(!r)continue;this.stealthProfiles.push({domain:i,isGlobal:a,adaptar:new A({stealth:r})})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}addStealth(e){return this._addStealths({enable:!0,profiles:[e]}),this}destroyLimiters(){for(let e of this.limiters)if(e.pqueue&&typeof e.pqueue.destroy==="function")e.pqueue.destroy();this.limiters=[]}getLimiters(){return this.limiters}getRandomDelay(e,t){if(!this.getDomainName(e))return;for(let a of this.limiters)if(this._hasDomain(e,a.domain)&&a.randomDelay!==void 0)return a.randomDelay;if(t){for(let a of this.limiters)if(a.isGlobal&&a.randomDelay!==void 0)return a.randomDelay}return}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);if(Array.isArray(this.stealthProfiles))this.stealthProfiles=this.stealthProfiles.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,i,a){if(!this.getDomainName(e))return null;let s=[],o=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:t==="stealth"?this.stealthProfiles:this.proxies;for(let n=0;n<o.length;n++)if(this._hasDomain(e,o[n].domain))s.push(n);if(s.length){let n=a?s[this.rnd(0,s.length-1)]:s[0];return t==="headers"?this.requestHeaders[n].headers:t==="limiters"?this.limiters[n].pqueue:t==="oxylabs"?this.oxylabs[n].adaptar:t==="decodo"?this.decodo[n].adaptar:t==="stealth"?this.stealthProfiles[n].adaptar:this.proxies[n].proxy}s.length=0;for(let n=0;n<o.length;n++)s.push(n);if(s.length){let n=a?s[this.rnd(0,s.length-1)]:s[0];if(o[n].isGlobal&&i)return t==="headers"?this.requestHeaders[n].headers:t==="limiters"?this.limiters[n].pqueue:t==="oxylabs"?this.oxylabs[n].adaptar:t==="decodo"?this.decodo[n].adaptar:t==="stealth"?this.stealthProfiles[n].adaptar:this.proxies[n].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,i){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:t==="stealth"?this.stealthProfiles:this.proxies;for(let s=0;s<r.length;s++)if(this._hasDomain(e,r[s].domain))return!0;if(i){for(let s=0;s<r.length;s++)if(r[s].isGlobal)return!0}return!1}pickHeaders(e,t,i,a){let r=this.getAdapter(e,"headers",t),s=new Headers(r??{}),o=s.count;if(i&&i instanceof Headers)for(let[n,l]of Object.entries(i.entries()))s.set(n,l);else if(i&&typeof i==="object"){for(let[n,l]of Object.entries(i))if(typeof l==="string")s.set(n,l)}if(a&&o===0&&!this.stealth)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,t){if(!t)return!1;let i=this.getDomainName(e);if(!i)return!1;let a=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},r=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let o=s.toString().trim();if(i.toLowerCase()===o.toLowerCase())return!0;if(o.includes("*")){let h=o.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),d=new RegExp(`^${h}$`,"i");return d.test(i)||d.test(e)}if(a(o))try{let h=o,d="i",u=o.match(/^\/(.*)\/(\w*)$/);if(u)h=u[1],d=u[2]||"i";let c=new RegExp(h,d);return c.test(i)||c.test(e)}catch(h){return i.toLowerCase().includes(o.toLowerCase())}let n=i.toLowerCase(),l=o.toLowerCase();return n===l||n.endsWith("."+l)||l.endsWith("."+n)};if(Array.isArray(t)){for(let s of t)if(r(s))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function v(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let a=0;a<200;a++){let r=e[Math.floor(Math.random()*e.length)],s=t[Math.floor(Math.random()*t.length)],o="";switch(r.name){case"Chrome":o=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":o=`Mozilla/5.0 (${s}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":o=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":o=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":o=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":o=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":o=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":o=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":o=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":o=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}i.push(o)}return i}exports.CrawlerOptions=f;
|
|
1
|
+
var{RezoQueue:f}=require("../queue/queue.cjs"),{Oxylabs:b}=require("./addon/oxylabs/index.cjs"),g=require("node:path"),x=require("node:os"),{Decodo:y}=require("./addon/decodo/index.cjs"),{Rezo:A}=require("../core/rezo.cjs");class m{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;enableSignalHandlers;concurrency;scraperConcurrency;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;stealth;oxylabs=[];decodo=[];stealthProfiles=[];proxies=[];limiters=[];onLimiterAdded;requestHeaders=[];userAgents=D();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??g.join(x.tmpdir(),"rezo_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.enableSignalHandlers=e.enableSignalHandlers??!1,this.concurrency=e.concurrency??100,this.scraperConcurrency=e.scraperConcurrency??this.concurrency,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter),this._addStealths(e.stealths),this.stealth=e.stealth}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:e==="stealth"?this.stealthProfiles:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,a,r)=>r.indexOf(i)===a)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.stealthProfiles=this.stealthProfiles.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((i,a)=>i===t[a]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((i)=>i.isGlobal).length,domainSpecific:t.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:i,isGlobal:a,headers:r}=t;if(!i&&!a)continue;if(r instanceof Headers){let s=Object.fromEntries(r.entries());if(Object.keys(s).length<1)continue;r=s}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:i,isGlobal:a,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:i,isGlobal:a,proxy:r}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:i,isGlobal:a,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:i,isGlobal:a,options:r,retry:s}=t;if(!i&&!a)continue;if(!r&&!s)continue;let n=r?new f(r):new f({name:"limiter"});if(this.limiters.push({domain:i,isGlobal:a,pqueue:n,randomDelay:r?.randomDelay,retry:s}),this.onLimiterAdded)this.onLimiterAdded(n)}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:a,options:r,queueOptions:s}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:i,isGlobal:a,adaptar:new b(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:a,options:r,queueOptions:s}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:i,isGlobal:a,adaptar:new y(r)})}}_addStealths(e){if(!e||!e.enable)return;for(let t of e.profiles){let{domain:i,isGlobal:a,stealth:r}=t;if(!i&&!a)continue;if(!r)continue;this.stealthProfiles.push({domain:i,isGlobal:a,adaptar:new A({stealth:r})})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}addStealth(e){return this._addStealths({enable:!0,profiles:[e]}),this}destroyLimiters(){for(let e of this.limiters)if(e.pqueue&&typeof e.pqueue.destroy==="function")e.pqueue.destroy();this.limiters=[]}getLimiters(){return this.limiters}getRandomDelay(e,t){if(!this.getDomainName(e))return;for(let a of this.limiters)if(this._hasDomain(e,a.domain)&&a.randomDelay!==void 0)return a.randomDelay;if(t){for(let a of this.limiters)if(a.isGlobal&&a.randomDelay!==void 0)return a.randomDelay}return}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);if(Array.isArray(this.stealthProfiles))this.stealthProfiles=this.stealthProfiles.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,i,a){if(!this.getDomainName(e))return null;let s=[],n=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:t==="stealth"?this.stealthProfiles:this.proxies;for(let o=0;o<n.length;o++)if(this._hasDomain(e,n[o].domain))s.push(o);if(s.length){let o=a?s[this.rnd(0,s.length-1)]:s[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:t==="stealth"?this.stealthProfiles[o].adaptar:this.proxies[o].proxy}s.length=0;for(let o=0;o<n.length;o++)s.push(o);if(s.length){let o=a?s[this.rnd(0,s.length-1)]:s[0];if(n[o].isGlobal&&i)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:t==="stealth"?this.stealthProfiles[o].adaptar:this.proxies[o].proxy}return null}getRetryOptions(e){if(!this.getDomainName(e))return null;for(let i=0;i<this.limiters.length;i++)if(this._hasDomain(e,this.limiters[i].domain))return this.limiters[i].retry||null;for(let i=0;i<this.limiters.length;i++)if(this.limiters[i].isGlobal&&this.limiters[i].retry)return this.limiters[i].retry;return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,i){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:t==="stealth"?this.stealthProfiles:this.proxies;for(let s=0;s<r.length;s++)if(this._hasDomain(e,r[s].domain))return!0;if(i){for(let s=0;s<r.length;s++)if(r[s].isGlobal)return!0}return!1}pickHeaders(e,t,i,a){let r=this.getAdapter(e,"headers",t),s=new Headers(r??{}),n=s.count;if(i&&i instanceof Headers)for(let[o,l]of Object.entries(i.entries()))s.set(o,l);else if(i&&typeof i==="object"){for(let[o,l]of Object.entries(i))if(typeof l==="string")s.set(o,l)}if(a&&n===0&&!this.stealth)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,t){if(!t)return!1;let i=this.getDomainName(e);if(!i)return!1;let a=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},r=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let n=s.toString().trim();if(i.toLowerCase()===n.toLowerCase())return!0;if(n.includes("*")){let h=n.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),d=new RegExp(`^${h}$`,"i");return d.test(i)||d.test(e)}if(a(n))try{let h=n,d="i",u=n.match(/^\/(.*)\/(\w*)$/);if(u)h=u[1],d=u[2]||"i";let c=new RegExp(h,d);return c.test(i)||c.test(e)}catch(h){return i.toLowerCase().includes(n.toLowerCase())}let o=i.toLowerCase(),l=n.toLowerCase();return o===l||o.endsWith("."+l)||l.endsWith("."+o)};if(Array.isArray(t)){for(let s of t)if(r(s))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function D(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let a=0;a<200;a++){let r=e[Math.floor(Math.random()*e.length)],s=t[Math.floor(Math.random()*t.length)],n="";switch(r.name){case"Chrome":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":n=`Mozilla/5.0 (${s}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}i.push(n)}return i}exports.CrawlerOptions=m;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{RezoQueue as f}from"../queue/queue.js";import{Oxylabs as m}from"./addon/oxylabs/index.js";import b from"node:path";import x from"node:os";import{Decodo as g}from"./addon/decodo/index.js";import{Rezo as y}from"../core/rezo.js";class A{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;enableSignalHandlers;concurrency;scraperConcurrency;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;stealth;oxylabs=[];decodo=[];stealthProfiles=[];proxies=[];limiters=[];onLimiterAdded;requestHeaders=[];userAgents=v();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??b.join(x.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.enableSignalHandlers=e.enableSignalHandlers??!1,this.concurrency=e.concurrency??100,this.scraperConcurrency=e.scraperConcurrency??this.concurrency,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter),this._addStealths(e.stealths),this.stealth=e.stealth}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:e==="stealth"?this.stealthProfiles:this.proxies).filter((s)=>s.domain).map((s)=>s.domain).filter((s,a,r)=>r.indexOf(s)===a)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.stealthProfiles=this.stealthProfiles.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((s,a)=>s===t[a]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((s)=>s.isGlobal).length,domainSpecific:t.filter((s)=>!s.isGlobal&&s.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:s,isGlobal:a,headers:r}=t;if(!s&&!a)continue;if(r instanceof Headers){let i=Object.fromEntries(r.entries());if(Object.keys(i).length<1)continue;r=i}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:s,isGlobal:a,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:s,isGlobal:a,proxy:r}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:s,isGlobal:a,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:s,isGlobal:a,options:r}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;let i=new f(r);if(this.limiters.push({domain:s,isGlobal:a,pqueue:i,randomDelay:r.randomDelay}),this.onLimiterAdded)this.onLimiterAdded(i)}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:a,options:r,queueOptions:i}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:s,isGlobal:a,adaptar:new m(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:a,options:r,queueOptions:i}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:s,isGlobal:a,adaptar:new g(r)})}}_addStealths(e){if(!e||!e.enable)return;for(let t of e.profiles){let{domain:s,isGlobal:a,stealth:r}=t;if(!s&&!a)continue;if(!r)continue;this.stealthProfiles.push({domain:s,isGlobal:a,adaptar:new y({stealth:r})})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}addStealth(e){return this._addStealths({enable:!0,profiles:[e]}),this}destroyLimiters(){for(let e of this.limiters)if(e.pqueue&&typeof e.pqueue.destroy==="function")e.pqueue.destroy();this.limiters=[]}getLimiters(){return this.limiters}getRandomDelay(e,t){if(!this.getDomainName(e))return;for(let a of this.limiters)if(this._hasDomain(e,a.domain)&&a.randomDelay!==void 0)return a.randomDelay;if(t){for(let a of this.limiters)if(a.isGlobal&&a.randomDelay!==void 0)return a.randomDelay}return}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);if(Array.isArray(this.stealthProfiles))this.stealthProfiles=this.stealthProfiles.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,s,a){if(!this.getDomainName(e))return null;let i=[],n=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:t==="stealth"?this.stealthProfiles:this.proxies;for(let o=0;o<n.length;o++)if(this._hasDomain(e,n[o].domain))i.push(o);if(i.length){let o=a?i[this.rnd(0,i.length-1)]:i[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:t==="stealth"?this.stealthProfiles[o].adaptar:this.proxies[o].proxy}i.length=0;for(let o=0;o<n.length;o++)i.push(o);if(i.length){let o=a?i[this.rnd(0,i.length-1)]:i[0];if(n[o].isGlobal&&s)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:t==="stealth"?this.stealthProfiles[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,s){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:t==="stealth"?this.stealthProfiles:this.proxies;for(let i=0;i<r.length;i++)if(this._hasDomain(e,r[i].domain))return!0;if(s){for(let i=0;i<r.length;i++)if(r[i].isGlobal)return!0}return!1}pickHeaders(e,t,s,a){let r=this.getAdapter(e,"headers",t),i=new Headers(r??{}),n=i.count;if(s&&s instanceof Headers)for(let[o,l]of Object.entries(s.entries()))i.set(o,l);else if(s&&typeof s==="object"){for(let[o,l]of Object.entries(s))if(typeof l==="string")i.set(o,l)}if(a&&n===0&&!this.stealth)i.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(i.entries())}_hasDomain(e,t){if(!t)return!1;let s=this.getDomainName(e);if(!s)return!1;let a=(i)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(i)||i.startsWith("/")||i.includes(".*")||i.includes(".+")},r=(i)=>{if(i instanceof RegExp)return i.test(s)||i.test(e);let n=i.toString().trim();if(s.toLowerCase()===n.toLowerCase())return!0;if(n.includes("*")){let h=n.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),d=new RegExp(`^${h}$`,"i");return d.test(s)||d.test(e)}if(a(n))try{let h=n,d="i",u=n.match(/^\/(.*)\/(\w*)$/);if(u)h=u[1],d=u[2]||"i";let c=new RegExp(h,d);return c.test(s)||c.test(e)}catch(h){return s.toLowerCase().includes(n.toLowerCase())}let o=s.toLowerCase(),l=n.toLowerCase();return o===l||o.endsWith("."+l)||l.endsWith("."+o)};if(Array.isArray(t)){for(let i of t)if(r(i))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function v(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],s=[];for(let a=0;a<200;a++){let r=e[Math.floor(Math.random()*e.length)],i=t[Math.floor(Math.random()*t.length)],n="";switch(r.name){case"Chrome":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":n=`Mozilla/5.0 (${i}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}s.push(n)}return s}export{A as CrawlerOptions};
|
|
1
|
+
import{RezoQueue as m}from"../queue/queue.js";import{Oxylabs as f}from"./addon/oxylabs/index.js";import b from"node:path";import x from"node:os";import{Decodo as g}from"./addon/decodo/index.js";import{Rezo as y}from"../core/rezo.js";class A{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;enableSignalHandlers;concurrency;scraperConcurrency;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;stealth;oxylabs=[];decodo=[];stealthProfiles=[];proxies=[];limiters=[];onLimiterAdded;requestHeaders=[];userAgents=D();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??b.join(x.tmpdir(),"rezo_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.enableSignalHandlers=e.enableSignalHandlers??!1,this.concurrency=e.concurrency??100,this.scraperConcurrency=e.scraperConcurrency??this.concurrency,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter),this._addStealths(e.stealths),this.stealth=e.stealth}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:e==="stealth"?this.stealthProfiles:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,a,r)=>r.indexOf(i)===a)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.stealthProfiles=this.stealthProfiles.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((i,a)=>i===t[a]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((i)=>i.isGlobal).length,domainSpecific:t.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:i,isGlobal:a,headers:r}=t;if(!i&&!a)continue;if(r instanceof Headers){let s=Object.fromEntries(r.entries());if(Object.keys(s).length<1)continue;r=s}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:i,isGlobal:a,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:i,isGlobal:a,proxy:r}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:i,isGlobal:a,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:i,isGlobal:a,options:r,retry:s}=t;if(!i&&!a)continue;if(!r&&!s)continue;let n=r?new m(r):new m({name:"limiter"});if(this.limiters.push({domain:i,isGlobal:a,pqueue:n,randomDelay:r?.randomDelay,retry:s}),this.onLimiterAdded)this.onLimiterAdded(n)}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:a,options:r,queueOptions:s}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:i,isGlobal:a,adaptar:new f(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:a,options:r,queueOptions:s}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:i,isGlobal:a,adaptar:new g(r)})}}_addStealths(e){if(!e||!e.enable)return;for(let t of e.profiles){let{domain:i,isGlobal:a,stealth:r}=t;if(!i&&!a)continue;if(!r)continue;this.stealthProfiles.push({domain:i,isGlobal:a,adaptar:new y({stealth:r})})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}addStealth(e){return this._addStealths({enable:!0,profiles:[e]}),this}destroyLimiters(){for(let e of this.limiters)if(e.pqueue&&typeof e.pqueue.destroy==="function")e.pqueue.destroy();this.limiters=[]}getLimiters(){return this.limiters}getRandomDelay(e,t){if(!this.getDomainName(e))return;for(let a of this.limiters)if(this._hasDomain(e,a.domain)&&a.randomDelay!==void 0)return a.randomDelay;if(t){for(let a of this.limiters)if(a.isGlobal&&a.randomDelay!==void 0)return a.randomDelay}return}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);if(Array.isArray(this.stealthProfiles))this.stealthProfiles=this.stealthProfiles.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,i,a){if(!this.getDomainName(e))return null;let s=[],n=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:t==="stealth"?this.stealthProfiles:this.proxies;for(let o=0;o<n.length;o++)if(this._hasDomain(e,n[o].domain))s.push(o);if(s.length){let o=a?s[this.rnd(0,s.length-1)]:s[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:t==="stealth"?this.stealthProfiles[o].adaptar:this.proxies[o].proxy}s.length=0;for(let o=0;o<n.length;o++)s.push(o);if(s.length){let o=a?s[this.rnd(0,s.length-1)]:s[0];if(n[o].isGlobal&&i)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:t==="stealth"?this.stealthProfiles[o].adaptar:this.proxies[o].proxy}return null}getRetryOptions(e){if(!this.getDomainName(e))return null;for(let i=0;i<this.limiters.length;i++)if(this._hasDomain(e,this.limiters[i].domain))return this.limiters[i].retry||null;for(let i=0;i<this.limiters.length;i++)if(this.limiters[i].isGlobal&&this.limiters[i].retry)return this.limiters[i].retry;return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,i){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:t==="stealth"?this.stealthProfiles:this.proxies;for(let s=0;s<r.length;s++)if(this._hasDomain(e,r[s].domain))return!0;if(i){for(let s=0;s<r.length;s++)if(r[s].isGlobal)return!0}return!1}pickHeaders(e,t,i,a){let r=this.getAdapter(e,"headers",t),s=new Headers(r??{}),n=s.count;if(i&&i instanceof Headers)for(let[o,l]of Object.entries(i.entries()))s.set(o,l);else if(i&&typeof i==="object"){for(let[o,l]of Object.entries(i))if(typeof l==="string")s.set(o,l)}if(a&&n===0&&!this.stealth)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,t){if(!t)return!1;let i=this.getDomainName(e);if(!i)return!1;let a=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},r=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let n=s.toString().trim();if(i.toLowerCase()===n.toLowerCase())return!0;if(n.includes("*")){let h=n.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),d=new RegExp(`^${h}$`,"i");return d.test(i)||d.test(e)}if(a(n))try{let h=n,d="i",u=n.match(/^\/(.*)\/(\w*)$/);if(u)h=u[1],d=u[2]||"i";let c=new RegExp(h,d);return c.test(i)||c.test(e)}catch(h){return i.toLowerCase().includes(n.toLowerCase())}let o=i.toLowerCase(),l=n.toLowerCase();return o===l||o.endsWith("."+l)||l.endsWith("."+o)};if(Array.isArray(t)){for(let s of t)if(r(s))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function D(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let a=0;a<200;a++){let r=e[Math.floor(Math.random()*e.length)],s=t[Math.floor(Math.random()*t.length)],n="";switch(r.name){case"Chrome":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":n=`Mozilla/5.0 (${s}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}i.push(n)}return i}export{A as CrawlerOptions};
|
package/dist/crawler/crawler.cjs
CHANGED
|
@@ -9,7 +9,7 @@ const { CappedMap } = require('./plugin/capped-map.cjs');
|
|
|
9
9
|
const { CappedArray } = require('./plugin/capped-array.cjs');
|
|
10
10
|
const { parseHTML } = require("linkedom");
|
|
11
11
|
const path = require("node:path");
|
|
12
|
-
|
|
12
|
+
import rezo, { RezoHeaders } from '../index.cjs';
|
|
13
13
|
const { RezoQueue } = require('../queue/queue.cjs');
|
|
14
14
|
const { Scraper } = require('./scraper.cjs');
|
|
15
15
|
const { CrawlerOptions } = require('./crawler-options.cjs');
|
|
@@ -83,6 +83,7 @@ class Crawler {
|
|
|
83
83
|
healthMetrics;
|
|
84
84
|
originalConcurrency = 100;
|
|
85
85
|
shutdownHandler = null;
|
|
86
|
+
rateLimitedDomains = new Map;
|
|
86
87
|
startHandlers = [];
|
|
87
88
|
finishHandlers = [];
|
|
88
89
|
redirectHandlers = [];
|
|
@@ -101,7 +102,9 @@ class Crawler {
|
|
|
101
102
|
this.config = new CrawlerOptions(crawlerOptions);
|
|
102
103
|
this.adapterType = this.config.adapter;
|
|
103
104
|
if (this.config.stealth) {
|
|
104
|
-
this.http
|
|
105
|
+
this.http = rezo.create({
|
|
106
|
+
stealth: this.config.stealth
|
|
107
|
+
});
|
|
105
108
|
}
|
|
106
109
|
const concurrency = this.config.concurrency;
|
|
107
110
|
this.queue = new RezoQueue({
|
|
@@ -863,7 +866,17 @@ class Crawler {
|
|
|
863
866
|
return 0;
|
|
864
867
|
return this.domainCurrentDelay.get(domain) || this.config.autoThrottleMinDelay;
|
|
865
868
|
}
|
|
866
|
-
async handle429Response(url, response) {
|
|
869
|
+
async handle429Response(url, response, retryCount) {
|
|
870
|
+
const retryOpts = this.config.getRetryOptions(url);
|
|
871
|
+
const max429Retries = retryOpts?.max429Retries ?? 3;
|
|
872
|
+
const baseDelay = retryOpts?.retryDelay ?? 1000;
|
|
873
|
+
const useBackoff = retryOpts?.backoff ?? true;
|
|
874
|
+
if (retryCount >= max429Retries) {
|
|
875
|
+
if (this.config.debug) {
|
|
876
|
+
console.log(`[Crawler] 429 retry cap reached (${max429Retries}) for ${url}, giving up`);
|
|
877
|
+
}
|
|
878
|
+
return { shouldRetry: false, waitTime: 0 };
|
|
879
|
+
}
|
|
867
880
|
let retryAfter = 0;
|
|
868
881
|
const retryAfterHeader = response?.headers?.["retry-after"] || response?.headers?.get?.("retry-after");
|
|
869
882
|
if (retryAfterHeader) {
|
|
@@ -878,7 +891,7 @@ class Crawler {
|
|
|
878
891
|
}
|
|
879
892
|
}
|
|
880
893
|
if (retryAfter <= 0) {
|
|
881
|
-
retryAfter =
|
|
894
|
+
retryAfter = useBackoff ? baseDelay * Math.pow(2, retryCount) : baseDelay;
|
|
882
895
|
}
|
|
883
896
|
const maxWait = this.config.maxWaitOn429;
|
|
884
897
|
const alwaysWait = this.config.alwaysWaitOn429;
|
|
@@ -894,8 +907,12 @@ class Crawler {
|
|
|
894
907
|
const waitMinutes = Math.round(retryAfter / 60000);
|
|
895
908
|
console.warn(`[Crawler] WARNING: Rate limited on ${url}. Server requested ${waitMinutes} minute wait. Waiting because alwaysWaitOn429 is enabled.`);
|
|
896
909
|
}
|
|
910
|
+
try {
|
|
911
|
+
const domain = new URL(url).hostname;
|
|
912
|
+
this.rateLimitedDomains.set(domain, Date.now() + retryAfter);
|
|
913
|
+
} catch {}
|
|
897
914
|
if (this.config.debug) {
|
|
898
|
-
console.log(`[Crawler] 429 Rate Limited: waiting ${Math.round(retryAfter / 1000)}s before retry`);
|
|
915
|
+
console.log(`[Crawler] 429 Rate Limited: waiting ${Math.round(retryAfter / 1000)}s before retry (attempt ${retryCount + 1}/${max429Retries})`);
|
|
899
916
|
}
|
|
900
917
|
return { shouldRetry: true, waitTime: retryAfter };
|
|
901
918
|
}
|
|
@@ -1208,6 +1225,17 @@ class Crawler {
|
|
|
1208
1225
|
if (delay > 0) {
|
|
1209
1226
|
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
1210
1227
|
}
|
|
1228
|
+
const rateLimitUntil = this.rateLimitedDomains.get(domain);
|
|
1229
|
+
if (rateLimitUntil) {
|
|
1230
|
+
const remaining = rateLimitUntil - Date.now();
|
|
1231
|
+
if (remaining > 0) {
|
|
1232
|
+
if (this.config.debug) {
|
|
1233
|
+
console.log(`[Crawler] Domain ${domain} rate-limited, waiting ${Math.round(remaining / 1000)}s`);
|
|
1234
|
+
}
|
|
1235
|
+
await new Promise((resolve) => setTimeout(resolve, remaining));
|
|
1236
|
+
}
|
|
1237
|
+
this.rateLimitedDomains.delete(domain);
|
|
1238
|
+
}
|
|
1211
1239
|
const isVisited = forceRevisit ? false : await this.hasUrlInCache(url);
|
|
1212
1240
|
const cache = method.toLowerCase() === "get" ? await this.getCache(url) : undefined;
|
|
1213
1241
|
if (method.toLowerCase() === "get") {
|
|
@@ -1218,6 +1246,8 @@ class Crawler {
|
|
|
1218
1246
|
}
|
|
1219
1247
|
const requestStartTime = Date.now();
|
|
1220
1248
|
const http = stealthInstanse || this.http;
|
|
1249
|
+
console.log(new RezoHeaders(options.headers));
|
|
1250
|
+
console.log(http.defaults);
|
|
1221
1251
|
const response = cache && method === "GET" && !skipCache ? cache : oxylabsInstanse && oxylabsOptions ? await oxylabsInstanse.scrape(url) : decodoInstanse && decodoOptions ? await decodoInstanse.scrape(url) : method === "GET" ? await http.get(url, options) : method === "PATCH" ? await http.patch(url, body, options) : method === "POST" ? await http.post(url, body, options) : await http.put(url, body, options);
|
|
1222
1252
|
if (!response) {
|
|
1223
1253
|
this.crawlStats.urlsFailed++;
|
|
@@ -1298,12 +1328,20 @@ class Crawler {
|
|
|
1298
1328
|
}
|
|
1299
1329
|
} catch (e) {
|
|
1300
1330
|
const error = e;
|
|
1331
|
+
console.log(error);
|
|
1301
1332
|
if (error?.response?.status === 429 || error?.status === 429) {
|
|
1302
1333
|
try {
|
|
1303
|
-
const { shouldRetry, waitTime } = await this.handle429Response(url, error.response || error);
|
|
1334
|
+
const { shouldRetry, waitTime } = await this.handle429Response(url, error.response || error, retryCount);
|
|
1304
1335
|
if (shouldRetry) {
|
|
1305
1336
|
await this.sleep(waitTime);
|
|
1306
1337
|
return await this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount + 1, parentUrl, skipCache, emailMetadata, stealthInstanse);
|
|
1338
|
+
} else {
|
|
1339
|
+
this.crawlStats.urlsFailed++;
|
|
1340
|
+
for (let i = 0;i < this.errorEvents.length; i++) {
|
|
1341
|
+
const event = this.errorEvents[i];
|
|
1342
|
+
this[event.handler](...event.attr, error);
|
|
1343
|
+
}
|
|
1344
|
+
return;
|
|
1307
1345
|
}
|
|
1308
1346
|
} catch (rateLimitError) {
|
|
1309
1347
|
this.crawlStats.urlsFailed++;
|
package/dist/crawler/crawler.js
CHANGED
|
@@ -9,7 +9,7 @@ import { CappedMap } from './plugin/capped-map.js';
|
|
|
9
9
|
import { CappedArray } from './plugin/capped-array.js';
|
|
10
10
|
import { parseHTML } from "linkedom";
|
|
11
11
|
import path from "node:path";
|
|
12
|
-
import rezo from '../
|
|
12
|
+
import rezo, { RezoHeaders } from '../index.js';
|
|
13
13
|
import { RezoQueue } from '../queue/queue.js';
|
|
14
14
|
import { Scraper } from './scraper.js';
|
|
15
15
|
import { CrawlerOptions } from './crawler-options.js';
|
|
@@ -83,6 +83,7 @@ export class Crawler {
|
|
|
83
83
|
healthMetrics;
|
|
84
84
|
originalConcurrency = 100;
|
|
85
85
|
shutdownHandler = null;
|
|
86
|
+
rateLimitedDomains = new Map;
|
|
86
87
|
startHandlers = [];
|
|
87
88
|
finishHandlers = [];
|
|
88
89
|
redirectHandlers = [];
|
|
@@ -101,7 +102,9 @@ export class Crawler {
|
|
|
101
102
|
this.config = new CrawlerOptions(crawlerOptions);
|
|
102
103
|
this.adapterType = this.config.adapter;
|
|
103
104
|
if (this.config.stealth) {
|
|
104
|
-
this.http
|
|
105
|
+
this.http = rezo.create({
|
|
106
|
+
stealth: this.config.stealth
|
|
107
|
+
});
|
|
105
108
|
}
|
|
106
109
|
const concurrency = this.config.concurrency;
|
|
107
110
|
this.queue = new RezoQueue({
|
|
@@ -863,7 +866,17 @@ export class Crawler {
|
|
|
863
866
|
return 0;
|
|
864
867
|
return this.domainCurrentDelay.get(domain) || this.config.autoThrottleMinDelay;
|
|
865
868
|
}
|
|
866
|
-
async handle429Response(url, response) {
|
|
869
|
+
async handle429Response(url, response, retryCount) {
|
|
870
|
+
const retryOpts = this.config.getRetryOptions(url);
|
|
871
|
+
const max429Retries = retryOpts?.max429Retries ?? 3;
|
|
872
|
+
const baseDelay = retryOpts?.retryDelay ?? 1000;
|
|
873
|
+
const useBackoff = retryOpts?.backoff ?? true;
|
|
874
|
+
if (retryCount >= max429Retries) {
|
|
875
|
+
if (this.config.debug) {
|
|
876
|
+
console.log(`[Crawler] 429 retry cap reached (${max429Retries}) for ${url}, giving up`);
|
|
877
|
+
}
|
|
878
|
+
return { shouldRetry: false, waitTime: 0 };
|
|
879
|
+
}
|
|
867
880
|
let retryAfter = 0;
|
|
868
881
|
const retryAfterHeader = response?.headers?.["retry-after"] || response?.headers?.get?.("retry-after");
|
|
869
882
|
if (retryAfterHeader) {
|
|
@@ -878,7 +891,7 @@ export class Crawler {
|
|
|
878
891
|
}
|
|
879
892
|
}
|
|
880
893
|
if (retryAfter <= 0) {
|
|
881
|
-
retryAfter =
|
|
894
|
+
retryAfter = useBackoff ? baseDelay * Math.pow(2, retryCount) : baseDelay;
|
|
882
895
|
}
|
|
883
896
|
const maxWait = this.config.maxWaitOn429;
|
|
884
897
|
const alwaysWait = this.config.alwaysWaitOn429;
|
|
@@ -894,8 +907,12 @@ export class Crawler {
|
|
|
894
907
|
const waitMinutes = Math.round(retryAfter / 60000);
|
|
895
908
|
console.warn(`[Crawler] WARNING: Rate limited on ${url}. Server requested ${waitMinutes} minute wait. Waiting because alwaysWaitOn429 is enabled.`);
|
|
896
909
|
}
|
|
910
|
+
try {
|
|
911
|
+
const domain = new URL(url).hostname;
|
|
912
|
+
this.rateLimitedDomains.set(domain, Date.now() + retryAfter);
|
|
913
|
+
} catch {}
|
|
897
914
|
if (this.config.debug) {
|
|
898
|
-
console.log(`[Crawler] 429 Rate Limited: waiting ${Math.round(retryAfter / 1000)}s before retry`);
|
|
915
|
+
console.log(`[Crawler] 429 Rate Limited: waiting ${Math.round(retryAfter / 1000)}s before retry (attempt ${retryCount + 1}/${max429Retries})`);
|
|
899
916
|
}
|
|
900
917
|
return { shouldRetry: true, waitTime: retryAfter };
|
|
901
918
|
}
|
|
@@ -1208,6 +1225,17 @@ export class Crawler {
|
|
|
1208
1225
|
if (delay > 0) {
|
|
1209
1226
|
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
1210
1227
|
}
|
|
1228
|
+
const rateLimitUntil = this.rateLimitedDomains.get(domain);
|
|
1229
|
+
if (rateLimitUntil) {
|
|
1230
|
+
const remaining = rateLimitUntil - Date.now();
|
|
1231
|
+
if (remaining > 0) {
|
|
1232
|
+
if (this.config.debug) {
|
|
1233
|
+
console.log(`[Crawler] Domain ${domain} rate-limited, waiting ${Math.round(remaining / 1000)}s`);
|
|
1234
|
+
}
|
|
1235
|
+
await new Promise((resolve) => setTimeout(resolve, remaining));
|
|
1236
|
+
}
|
|
1237
|
+
this.rateLimitedDomains.delete(domain);
|
|
1238
|
+
}
|
|
1211
1239
|
const isVisited = forceRevisit ? false : await this.hasUrlInCache(url);
|
|
1212
1240
|
const cache = method.toLowerCase() === "get" ? await this.getCache(url) : undefined;
|
|
1213
1241
|
if (method.toLowerCase() === "get") {
|
|
@@ -1218,6 +1246,8 @@ export class Crawler {
|
|
|
1218
1246
|
}
|
|
1219
1247
|
const requestStartTime = Date.now();
|
|
1220
1248
|
const http = stealthInstanse || this.http;
|
|
1249
|
+
console.log(new RezoHeaders(options.headers));
|
|
1250
|
+
console.log(http.defaults);
|
|
1221
1251
|
const response = cache && method === "GET" && !skipCache ? cache : oxylabsInstanse && oxylabsOptions ? await oxylabsInstanse.scrape(url) : decodoInstanse && decodoOptions ? await decodoInstanse.scrape(url) : method === "GET" ? await http.get(url, options) : method === "PATCH" ? await http.patch(url, body, options) : method === "POST" ? await http.post(url, body, options) : await http.put(url, body, options);
|
|
1222
1252
|
if (!response) {
|
|
1223
1253
|
this.crawlStats.urlsFailed++;
|
|
@@ -1298,12 +1328,20 @@ export class Crawler {
|
|
|
1298
1328
|
}
|
|
1299
1329
|
} catch (e) {
|
|
1300
1330
|
const error = e;
|
|
1331
|
+
console.log(error);
|
|
1301
1332
|
if (error?.response?.status === 429 || error?.status === 429) {
|
|
1302
1333
|
try {
|
|
1303
|
-
const { shouldRetry, waitTime } = await this.handle429Response(url, error.response || error);
|
|
1334
|
+
const { shouldRetry, waitTime } = await this.handle429Response(url, error.response || error, retryCount);
|
|
1304
1335
|
if (shouldRetry) {
|
|
1305
1336
|
await this.sleep(waitTime);
|
|
1306
1337
|
return await this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount + 1, parentUrl, skipCache, emailMetadata, stealthInstanse);
|
|
1338
|
+
} else {
|
|
1339
|
+
this.crawlStats.urlsFailed++;
|
|
1340
|
+
for (let i = 0;i < this.errorEvents.length; i++) {
|
|
1341
|
+
const event = this.errorEvents[i];
|
|
1342
|
+
this[event.handler](...event.attr, error);
|
|
1343
|
+
}
|
|
1344
|
+
return;
|
|
1307
1345
|
}
|
|
1308
1346
|
} catch (rateLimitError) {
|
|
1309
1347
|
this.crawlStats.urlsFailed++;
|
package/dist/crawler/index.cjs
CHANGED
|
@@ -1,42 +1,42 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Crawler =
|
|
3
|
-
const
|
|
4
|
-
exports.CrawlerOptions =
|
|
5
|
-
const
|
|
6
|
-
exports.RobotsTxt =
|
|
7
|
-
const
|
|
8
|
-
exports.FileCacher =
|
|
9
|
-
const
|
|
10
|
-
exports.UrlStore =
|
|
11
|
-
const
|
|
12
|
-
exports.NavigationHistory =
|
|
13
|
-
const
|
|
14
|
-
exports.Oxylabs =
|
|
15
|
-
const
|
|
16
|
-
exports.OXYLABS_BROWSER_TYPES =
|
|
17
|
-
exports.OXYLABS_COMMON_LOCALES =
|
|
18
|
-
exports.OXYLABS_COMMON_GEO_LOCATIONS =
|
|
19
|
-
exports.OXYLABS_US_STATES =
|
|
20
|
-
exports.OXYLABS_EUROPEAN_COUNTRIES =
|
|
21
|
-
exports.OXYLABS_ASIAN_COUNTRIES =
|
|
22
|
-
exports.getRandomOxylabsBrowserType =
|
|
23
|
-
exports.getRandomOxylabsLocale =
|
|
24
|
-
exports.getRandomOxylabsGeoLocation =
|
|
25
|
-
const
|
|
26
|
-
exports.isRestrictedDomain =
|
|
27
|
-
const
|
|
28
|
-
exports.Decodo =
|
|
29
|
-
const
|
|
30
|
-
exports.DECODO_DEVICE_TYPES =
|
|
31
|
-
exports.DECODO_HEADLESS_MODES =
|
|
32
|
-
exports.DECODO_COMMON_LOCALES =
|
|
33
|
-
exports.DECODO_COMMON_COUNTRIES =
|
|
34
|
-
exports.DECODO_EUROPEAN_COUNTRIES =
|
|
35
|
-
exports.DECODO_ASIAN_COUNTRIES =
|
|
36
|
-
exports.DECODO_US_STATES =
|
|
37
|
-
exports.DECODO_COMMON_CITIES =
|
|
38
|
-
exports.getRandomDecodoDeviceType =
|
|
39
|
-
exports.getRandomDecodoLocale =
|
|
40
|
-
exports.getRandomDecodoCountry =
|
|
41
|
-
exports.getRandomDecodoCity =
|
|
42
|
-
exports.generateDecodoSessionId =
|
|
1
|
+
const _mod_8bpcyt = require('./crawler.cjs');
|
|
2
|
+
exports.Crawler = _mod_8bpcyt.Crawler;;
|
|
3
|
+
const _mod_988v6z = require('./crawler-options.cjs');
|
|
4
|
+
exports.CrawlerOptions = _mod_988v6z.CrawlerOptions;;
|
|
5
|
+
const _mod_4plonu = require('./plugin/robots-txt.cjs');
|
|
6
|
+
exports.RobotsTxt = _mod_4plonu.RobotsTxt;;
|
|
7
|
+
const _mod_ev7lys = require('./plugin/file-cacher.cjs');
|
|
8
|
+
exports.FileCacher = _mod_ev7lys.FileCacher;;
|
|
9
|
+
const _mod_jcjmld = require('./plugin/url-store.cjs');
|
|
10
|
+
exports.UrlStore = _mod_jcjmld.UrlStore;;
|
|
11
|
+
const _mod_s1b6ok = require('./plugin/navigation-history.cjs');
|
|
12
|
+
exports.NavigationHistory = _mod_s1b6ok.NavigationHistory;;
|
|
13
|
+
const _mod_ekix3v = require('./addon/oxylabs/index.cjs');
|
|
14
|
+
exports.Oxylabs = _mod_ekix3v.Oxylabs;;
|
|
15
|
+
const _mod_unv0ew = require('./addon/oxylabs/options.cjs');
|
|
16
|
+
exports.OXYLABS_BROWSER_TYPES = _mod_unv0ew.OXYLABS_BROWSER_TYPES;
|
|
17
|
+
exports.OXYLABS_COMMON_LOCALES = _mod_unv0ew.OXYLABS_COMMON_LOCALES;
|
|
18
|
+
exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_unv0ew.OXYLABS_COMMON_GEO_LOCATIONS;
|
|
19
|
+
exports.OXYLABS_US_STATES = _mod_unv0ew.OXYLABS_US_STATES;
|
|
20
|
+
exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_unv0ew.OXYLABS_EUROPEAN_COUNTRIES;
|
|
21
|
+
exports.OXYLABS_ASIAN_COUNTRIES = _mod_unv0ew.OXYLABS_ASIAN_COUNTRIES;
|
|
22
|
+
exports.getRandomOxylabsBrowserType = _mod_unv0ew.getRandomBrowserType;
|
|
23
|
+
exports.getRandomOxylabsLocale = _mod_unv0ew.getRandomLocale;
|
|
24
|
+
exports.getRandomOxylabsGeoLocation = _mod_unv0ew.getRandomGeoLocation;;
|
|
25
|
+
const _mod_fcrp4b = require('./scraper.cjs');
|
|
26
|
+
exports.isRestrictedDomain = _mod_fcrp4b.isRestrictedDomain;;
|
|
27
|
+
const _mod_wte3po = require('./addon/decodo/index.cjs');
|
|
28
|
+
exports.Decodo = _mod_wte3po.Decodo;;
|
|
29
|
+
const _mod_mllxcw = require('./addon/decodo/options.cjs');
|
|
30
|
+
exports.DECODO_DEVICE_TYPES = _mod_mllxcw.DECODO_DEVICE_TYPES;
|
|
31
|
+
exports.DECODO_HEADLESS_MODES = _mod_mllxcw.DECODO_HEADLESS_MODES;
|
|
32
|
+
exports.DECODO_COMMON_LOCALES = _mod_mllxcw.DECODO_COMMON_LOCALES;
|
|
33
|
+
exports.DECODO_COMMON_COUNTRIES = _mod_mllxcw.DECODO_COMMON_COUNTRIES;
|
|
34
|
+
exports.DECODO_EUROPEAN_COUNTRIES = _mod_mllxcw.DECODO_EUROPEAN_COUNTRIES;
|
|
35
|
+
exports.DECODO_ASIAN_COUNTRIES = _mod_mllxcw.DECODO_ASIAN_COUNTRIES;
|
|
36
|
+
exports.DECODO_US_STATES = _mod_mllxcw.DECODO_US_STATES;
|
|
37
|
+
exports.DECODO_COMMON_CITIES = _mod_mllxcw.DECODO_COMMON_CITIES;
|
|
38
|
+
exports.getRandomDecodoDeviceType = _mod_mllxcw.getRandomDeviceType;
|
|
39
|
+
exports.getRandomDecodoLocale = _mod_mllxcw.getRandomLocale;
|
|
40
|
+
exports.getRandomDecodoCountry = _mod_mllxcw.getRandomCountry;
|
|
41
|
+
exports.getRandomDecodoCity = _mod_mllxcw.getRandomCity;
|
|
42
|
+
exports.generateDecodoSessionId = _mod_mllxcw.generateSessionId;;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
var e=require("./file-cacher.cjs");exports.FileCacher=e.FileCacher;var r=require("./url-store.cjs");exports.UrlStore=r.UrlStore;var o=require("./result-stream.cjs");exports.ResultStream=o.ResultStream;var t=require("./memory-monitor.cjs");exports.MemoryMonitor=t.MemoryMonitor;var a=require("./health-metrics.cjs");exports.HealthMetrics=a.HealthMetrics;var i=require("./capped-map.cjs");exports.CappedMap=i.CappedMap;var
|
|
1
|
+
var e=require("./file-cacher.cjs");exports.FileCacher=e.FileCacher;var r=require("./url-store.cjs");exports.UrlStore=r.UrlStore;var o=require("./result-stream.cjs");exports.ResultStream=o.ResultStream;var t=require("./memory-monitor.cjs");exports.MemoryMonitor=t.MemoryMonitor;var a=require("./health-metrics.cjs");exports.HealthMetrics=a.HealthMetrics;var i=require("./capped-map.cjs");exports.CappedMap=i.CappedMap;var m=require("./capped-array.cjs");exports.CappedArray=m.CappedArray;
|
package/dist/crawler.d.ts
CHANGED
|
@@ -7334,6 +7334,25 @@ export type Domain = string[] | string | RegExp;
|
|
|
7334
7334
|
* - 'fetch': Browser-compatible Fetch API adapter
|
|
7335
7335
|
*/
|
|
7336
7336
|
export type CrawlerAdapterType = "http" | "http2" | "curl" | "fetch";
|
|
7337
|
+
/**
|
|
7338
|
+
* Per-domain retry/rate-limit options, attachable via addLimiter()
|
|
7339
|
+
*/
|
|
7340
|
+
export interface LimiterRetryOptions {
|
|
7341
|
+
/** Enable retry handling for this domain (default: true) */
|
|
7342
|
+
enable: boolean;
|
|
7343
|
+
/** Max retries on 429 before giving up on that URL (default: 3) */
|
|
7344
|
+
max429Retries?: number;
|
|
7345
|
+
/** Base delay in ms for retry (default: 1000) */
|
|
7346
|
+
retryDelay?: number;
|
|
7347
|
+
/** Status codes to retry on (e.g. [500, 502, 503]) */
|
|
7348
|
+
retryOnStatusCode?: number[];
|
|
7349
|
+
/** Status codes that should retry without proxy */
|
|
7350
|
+
retryWithoutProxyOnStatusCode?: number[];
|
|
7351
|
+
/** Max retry attempts for non-429 errors (default: 3) */
|
|
7352
|
+
maxRetryAttempts?: number;
|
|
7353
|
+
/** Use exponential backoff — doubles delay on each retry (default: true) */
|
|
7354
|
+
backoff?: boolean;
|
|
7355
|
+
}
|
|
7337
7356
|
/**
|
|
7338
7357
|
* Configuration interface for the CrawlerOptions class
|
|
7339
7358
|
* @description Defines all available options for configuring web crawler behavior,
|
|
@@ -7823,8 +7842,13 @@ export declare class CrawlerOptions {
|
|
|
7823
7842
|
addLimiter(options: {
|
|
7824
7843
|
domain: Domain;
|
|
7825
7844
|
isGlobal?: boolean;
|
|
7845
|
+
} & ({
|
|
7826
7846
|
options: queueOptions$1;
|
|
7827
|
-
|
|
7847
|
+
retry?: LimiterRetryOptions;
|
|
7848
|
+
} | {
|
|
7849
|
+
retry: LimiterRetryOptions;
|
|
7850
|
+
options?: queueOptions$1;
|
|
7851
|
+
})): this;
|
|
7828
7852
|
/**
|
|
7829
7853
|
* Add Oxylabs proxy service configuration for specific domains or globally
|
|
7830
7854
|
* @param options - Configuration object containing domain pattern, Oxylabs settings, and global flag
|
|
@@ -7964,6 +7988,12 @@ export declare class CrawlerOptions {
|
|
|
7964
7988
|
getAdapter(url: string, type: "decodo", useGlobal?: boolean, random?: boolean): Decodo | null;
|
|
7965
7989
|
getAdapter(url: string, type: "stealth", useGlobal?: boolean, random?: boolean): Rezo | null;
|
|
7966
7990
|
getAdapter(url: string, type: "headers", useGlobal?: boolean, random?: boolean): OutgoingHttpHeaders | null;
|
|
7991
|
+
/**
|
|
7992
|
+
* Get retry options for a specific URL from its domain limiter config
|
|
7993
|
+
* @param url - The URL to look up retry options for
|
|
7994
|
+
* @returns LimiterRetryOptions if configured for this domain, null otherwise
|
|
7995
|
+
*/
|
|
7996
|
+
getRetryOptions(url: string): LimiterRetryOptions | null;
|
|
7967
7997
|
/**
|
|
7968
7998
|
* Generate a random integer between min and max values (inclusive)
|
|
7969
7999
|
* @param min - Minimum value (default: 0)
|
|
@@ -8239,6 +8269,8 @@ export declare class Crawler {
|
|
|
8239
8269
|
private originalConcurrency;
|
|
8240
8270
|
/** Shutdown handler reference for cleanup */
|
|
8241
8271
|
private shutdownHandler;
|
|
8272
|
+
/** Per-domain rate limit tracking: domain → backoff-until timestamp */
|
|
8273
|
+
private rateLimitedDomains;
|
|
8242
8274
|
/** Lifecycle event handlers */
|
|
8243
8275
|
private startHandlers;
|
|
8244
8276
|
private finishHandlers;
|
|
@@ -8772,7 +8804,8 @@ export declare class Crawler {
|
|
|
8772
8804
|
*/
|
|
8773
8805
|
private getAutoThrottleDelay;
|
|
8774
8806
|
/**
|
|
8775
|
-
* Handle 429 Too Many Requests response with
|
|
8807
|
+
* Handle 429 Too Many Requests response with retry cap, exponential backoff, and per-domain tracking.
|
|
8808
|
+
* Uses domain-specific LimiterRetryOptions when configured via addLimiter(), otherwise falls back to global config.
|
|
8776
8809
|
*/
|
|
8777
8810
|
private handle429Response;
|
|
8778
8811
|
/**
|
package/dist/entries/crawler.cjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Crawler =
|
|
3
|
-
const
|
|
4
|
-
exports.CrawlerOptions =
|
|
5
|
-
const
|
|
6
|
-
exports.isRestrictedDomain =
|
|
1
|
+
const _mod_u5d3zp = require('../crawler/crawler.cjs');
|
|
2
|
+
exports.Crawler = _mod_u5d3zp.Crawler;;
|
|
3
|
+
const _mod_w6q6rd = require('../crawler/crawler-options.cjs');
|
|
4
|
+
exports.CrawlerOptions = _mod_w6q6rd.CrawlerOptions;;
|
|
5
|
+
const _mod_rcnka5 = require('../crawler/scraper.cjs');
|
|
6
|
+
exports.isRestrictedDomain = _mod_rcnka5.isRestrictedDomain;;
|
package/dist/index.cjs
CHANGED
|
@@ -1,41 +1,41 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Rezo =
|
|
3
|
-
exports.createRezoInstance =
|
|
4
|
-
exports.createDefaultInstance =
|
|
5
|
-
const
|
|
6
|
-
exports.RezoError =
|
|
7
|
-
exports.RezoErrorCode =
|
|
8
|
-
const
|
|
9
|
-
exports.RezoHeaders =
|
|
10
|
-
const
|
|
11
|
-
exports.RezoFormData =
|
|
12
|
-
const
|
|
13
|
-
exports.RezoCookieJar =
|
|
14
|
-
exports.CookieJar =
|
|
15
|
-
exports.Cookie =
|
|
16
|
-
exports.Store =
|
|
17
|
-
const
|
|
18
|
-
exports.RezoCookieStore =
|
|
19
|
-
const
|
|
20
|
-
exports.toCurl =
|
|
21
|
-
exports.fromCurl =
|
|
22
|
-
const
|
|
23
|
-
exports.createDefaultHooks =
|
|
24
|
-
exports.mergeHooks =
|
|
25
|
-
const
|
|
26
|
-
exports.ProxyManager =
|
|
27
|
-
const
|
|
28
|
-
exports.RezoStealth =
|
|
29
|
-
const
|
|
30
|
-
exports.listProfiles =
|
|
31
|
-
exports.getProfile =
|
|
32
|
-
exports.getProfilesByFamily =
|
|
33
|
-
const
|
|
34
|
-
exports.RezoQueue =
|
|
35
|
-
exports.HttpQueue =
|
|
36
|
-
exports.RezoHttpQueue =
|
|
37
|
-
exports.Priority =
|
|
38
|
-
exports.HttpMethodPriority =
|
|
1
|
+
const _mod_4zcq5u = require('./core/rezo.cjs');
|
|
2
|
+
exports.Rezo = _mod_4zcq5u.Rezo;
|
|
3
|
+
exports.createRezoInstance = _mod_4zcq5u.createRezoInstance;
|
|
4
|
+
exports.createDefaultInstance = _mod_4zcq5u.createDefaultInstance;;
|
|
5
|
+
const _mod_370086 = require('./errors/rezo-error.cjs');
|
|
6
|
+
exports.RezoError = _mod_370086.RezoError;
|
|
7
|
+
exports.RezoErrorCode = _mod_370086.RezoErrorCode;;
|
|
8
|
+
const _mod_wc6ukr = require('./utils/headers.cjs');
|
|
9
|
+
exports.RezoHeaders = _mod_wc6ukr.RezoHeaders;;
|
|
10
|
+
const _mod_6492nx = require('./utils/form-data.cjs');
|
|
11
|
+
exports.RezoFormData = _mod_6492nx.RezoFormData;;
|
|
12
|
+
const _mod_fyvxa9 = require('./cookies/cookie-jar.cjs');
|
|
13
|
+
exports.RezoCookieJar = _mod_fyvxa9.RezoCookieJar;
|
|
14
|
+
exports.CookieJar = _mod_fyvxa9.CookieJar;
|
|
15
|
+
exports.Cookie = _mod_fyvxa9.Cookie;
|
|
16
|
+
exports.Store = _mod_fyvxa9.Store;;
|
|
17
|
+
const _mod_kce1ch = require('./cookies/cookie-store.cjs');
|
|
18
|
+
exports.RezoCookieStore = _mod_kce1ch.RezoCookieStore;;
|
|
19
|
+
const _mod_ysqe1g = require('./utils/curl.cjs');
|
|
20
|
+
exports.toCurl = _mod_ysqe1g.toCurl;
|
|
21
|
+
exports.fromCurl = _mod_ysqe1g.fromCurl;;
|
|
22
|
+
const _mod_e1uka3 = require('./core/hooks.cjs');
|
|
23
|
+
exports.createDefaultHooks = _mod_e1uka3.createDefaultHooks;
|
|
24
|
+
exports.mergeHooks = _mod_e1uka3.mergeHooks;;
|
|
25
|
+
const _mod_prmnn3 = require('./proxy/manager.cjs');
|
|
26
|
+
exports.ProxyManager = _mod_prmnn3.ProxyManager;;
|
|
27
|
+
const _mod_jsw5rc = require('./stealth/stealth.cjs');
|
|
28
|
+
exports.RezoStealth = _mod_jsw5rc.RezoStealth;;
|
|
29
|
+
const _mod_brkcii = require('./stealth/profiles/index.cjs');
|
|
30
|
+
exports.listProfiles = _mod_brkcii.listProfiles;
|
|
31
|
+
exports.getProfile = _mod_brkcii.getProfile;
|
|
32
|
+
exports.getProfilesByFamily = _mod_brkcii.getProfilesByFamily;;
|
|
33
|
+
const _mod_sna33b = require('./queue/index.cjs');
|
|
34
|
+
exports.RezoQueue = _mod_sna33b.RezoQueue;
|
|
35
|
+
exports.HttpQueue = _mod_sna33b.HttpQueue;
|
|
36
|
+
exports.RezoHttpQueue = _mod_sna33b.RezoHttpQueue;
|
|
37
|
+
exports.Priority = _mod_sna33b.Priority;
|
|
38
|
+
exports.HttpMethodPriority = _mod_sna33b.HttpMethodPriority;;
|
|
39
39
|
const { RezoError } = require('./errors/rezo-error.cjs');
|
|
40
40
|
const isRezoError = exports.isRezoError = RezoError.isRezoError;
|
|
41
41
|
const Cancel = exports.Cancel = RezoError;
|
|
@@ -45,9 +45,9 @@ const isCancel = exports.isCancel = (error) => {
|
|
|
45
45
|
};
|
|
46
46
|
const all = exports.all = Promise.all.bind(Promise);
|
|
47
47
|
const spread = exports.spread = (callback) => (array) => callback(...array);
|
|
48
|
-
const
|
|
49
|
-
exports.VERSION =
|
|
50
|
-
exports.PACKAGE_NAME =
|
|
48
|
+
const _mod_fi788b = require('./version.cjs');
|
|
49
|
+
exports.VERSION = _mod_fi788b.VERSION;
|
|
50
|
+
exports.PACKAGE_NAME = _mod_fi788b.PACKAGE_NAME;;
|
|
51
51
|
const { executeRequest } = require('./adapters/http.cjs');
|
|
52
52
|
const { setGlobalAdapter, createRezoInstance } = require('./core/rezo.cjs');
|
|
53
53
|
setGlobalAdapter(executeRequest);
|
package/dist/index.d.ts
CHANGED
|
@@ -5736,7 +5736,7 @@ export declare function listProfiles(): string[];
|
|
|
5736
5736
|
*
|
|
5737
5737
|
* IMPORTANT: Update these values when bumping package version.
|
|
5738
5738
|
*/
|
|
5739
|
-
export declare const VERSION = "1.0.
|
|
5739
|
+
export declare const VERSION = "1.0.103";
|
|
5740
5740
|
export declare const PACKAGE_NAME = "rezo";
|
|
5741
5741
|
export declare const isRezoError: typeof RezoError.isRezoError;
|
|
5742
5742
|
export declare const Cancel: typeof RezoError;
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Agent =
|
|
3
|
-
const
|
|
4
|
-
exports.HttpProxyAgent =
|
|
5
|
-
const
|
|
6
|
-
exports.HttpsProxyAgent =
|
|
7
|
-
const
|
|
8
|
-
exports.SocksProxyAgent =
|
|
9
|
-
const
|
|
10
|
-
exports.SocksClient =
|
|
11
|
-
const
|
|
12
|
-
exports.bunHttp =
|
|
13
|
-
exports.isBunRuntime =
|
|
14
|
-
exports.isBunSocksRequest =
|
|
1
|
+
const _mod_y8ufew = require('./base.cjs');
|
|
2
|
+
exports.Agent = _mod_y8ufew.Agent;;
|
|
3
|
+
const _mod_5j5k29 = require('./http-proxy.cjs');
|
|
4
|
+
exports.HttpProxyAgent = _mod_5j5k29.HttpProxyAgent;;
|
|
5
|
+
const _mod_b781z2 = require('./https-proxy.cjs');
|
|
6
|
+
exports.HttpsProxyAgent = _mod_b781z2.HttpsProxyAgent;;
|
|
7
|
+
const _mod_w4vo7h = require('./socks-proxy.cjs');
|
|
8
|
+
exports.SocksProxyAgent = _mod_w4vo7h.SocksProxyAgent;;
|
|
9
|
+
const _mod_xo6q9q = require('./socks-client.cjs');
|
|
10
|
+
exports.SocksClient = _mod_xo6q9q.SocksClient;;
|
|
11
|
+
const _mod_cbcssg = require('./bun-socks-http.cjs');
|
|
12
|
+
exports.bunHttp = _mod_cbcssg.bunHttp;
|
|
13
|
+
exports.isBunRuntime = _mod_cbcssg.isBunRuntime;
|
|
14
|
+
exports.isBunSocksRequest = _mod_cbcssg.isBunSocksRequest;;
|
|
@@ -5514,7 +5514,7 @@ export interface RezoInstance extends Rezo, RezoCallable {
|
|
|
5514
5514
|
*
|
|
5515
5515
|
* IMPORTANT: Update these values when bumping package version.
|
|
5516
5516
|
*/
|
|
5517
|
-
export declare const VERSION = "1.0.
|
|
5517
|
+
export declare const VERSION = "1.0.103";
|
|
5518
5518
|
export declare const isRezoError: typeof RezoError.isRezoError;
|
|
5519
5519
|
export declare const Cancel: typeof RezoError;
|
|
5520
5520
|
export declare const CancelToken: {
|
package/dist/platform/bun.d.ts
CHANGED
|
@@ -5514,7 +5514,7 @@ export interface RezoInstance extends Rezo, RezoCallable {
|
|
|
5514
5514
|
*
|
|
5515
5515
|
* IMPORTANT: Update these values when bumping package version.
|
|
5516
5516
|
*/
|
|
5517
|
-
export declare const VERSION = "1.0.
|
|
5517
|
+
export declare const VERSION = "1.0.103";
|
|
5518
5518
|
export declare const isRezoError: typeof RezoError.isRezoError;
|
|
5519
5519
|
export declare const Cancel: typeof RezoError;
|
|
5520
5520
|
export declare const CancelToken: {
|
package/dist/platform/deno.d.ts
CHANGED
|
@@ -5514,7 +5514,7 @@ export interface RezoInstance extends Rezo, RezoCallable {
|
|
|
5514
5514
|
*
|
|
5515
5515
|
* IMPORTANT: Update these values when bumping package version.
|
|
5516
5516
|
*/
|
|
5517
|
-
export declare const VERSION = "1.0.
|
|
5517
|
+
export declare const VERSION = "1.0.103";
|
|
5518
5518
|
export declare const isRezoError: typeof RezoError.isRezoError;
|
|
5519
5519
|
export declare const Cancel: typeof RezoError;
|
|
5520
5520
|
export declare const CancelToken: {
|
package/dist/platform/node.d.ts
CHANGED
|
@@ -5514,7 +5514,7 @@ export interface RezoInstance extends Rezo, RezoCallable {
|
|
|
5514
5514
|
*
|
|
5515
5515
|
* IMPORTANT: Update these values when bumping package version.
|
|
5516
5516
|
*/
|
|
5517
|
-
export declare const VERSION = "1.0.
|
|
5517
|
+
export declare const VERSION = "1.0.103";
|
|
5518
5518
|
export declare const isRezoError: typeof RezoError.isRezoError;
|
|
5519
5519
|
export declare const Cancel: typeof RezoError;
|
|
5520
5520
|
export declare const CancelToken: {
|
|
@@ -5514,7 +5514,7 @@ export interface RezoInstance extends Rezo, RezoCallable {
|
|
|
5514
5514
|
*
|
|
5515
5515
|
* IMPORTANT: Update these values when bumping package version.
|
|
5516
5516
|
*/
|
|
5517
|
-
export declare const VERSION = "1.0.
|
|
5517
|
+
export declare const VERSION = "1.0.103";
|
|
5518
5518
|
export declare const isRezoError: typeof RezoError.isRezoError;
|
|
5519
5519
|
export declare const Cancel: typeof RezoError;
|
|
5520
5520
|
export declare const CancelToken: {
|
|
@@ -5514,7 +5514,7 @@ export interface RezoInstance extends Rezo, RezoCallable {
|
|
|
5514
5514
|
*
|
|
5515
5515
|
* IMPORTANT: Update these values when bumping package version.
|
|
5516
5516
|
*/
|
|
5517
|
-
export declare const VERSION = "1.0.
|
|
5517
|
+
export declare const VERSION = "1.0.103";
|
|
5518
5518
|
export declare const isRezoError: typeof RezoError.isRezoError;
|
|
5519
5519
|
export declare const Cancel: typeof RezoError;
|
|
5520
5520
|
export declare const CancelToken: {
|
package/dist/proxy/index.cjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
const { Agent, HttpProxyAgent, HttpsProxyAgent, SocksProxyAgent } = require('../internal/agents/index.cjs');
|
|
2
2
|
const { parseProxyString } = require('./parse.cjs');
|
|
3
|
-
const
|
|
4
|
-
exports.ProxyManager =
|
|
5
|
-
const
|
|
6
|
-
exports.parseProxyString =
|
|
3
|
+
const _mod_r8nh4v = require('./manager.cjs');
|
|
4
|
+
exports.ProxyManager = _mod_r8nh4v.ProxyManager;;
|
|
5
|
+
const _mod_28d64o = require('./parse.cjs');
|
|
6
|
+
exports.parseProxyString = _mod_28d64o.parseProxyString;;
|
|
7
7
|
function createOptions(uri, opts) {
|
|
8
8
|
if (uri instanceof URL || typeof uri === "string") {
|
|
9
9
|
return {
|
package/dist/queue/index.cjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.RezoQueue =
|
|
3
|
-
const
|
|
4
|
-
exports.HttpQueue =
|
|
5
|
-
exports.RezoHttpQueue =
|
|
6
|
-
exports.extractDomain =
|
|
7
|
-
const
|
|
8
|
-
exports.Priority =
|
|
9
|
-
exports.HttpMethodPriority =
|
|
1
|
+
const _mod_2iykbp = require('./queue.cjs');
|
|
2
|
+
exports.RezoQueue = _mod_2iykbp.RezoQueue;;
|
|
3
|
+
const _mod_0gzglg = require('./http-queue.cjs');
|
|
4
|
+
exports.HttpQueue = _mod_0gzglg.HttpQueue;
|
|
5
|
+
exports.RezoHttpQueue = _mod_0gzglg.HttpQueue;
|
|
6
|
+
exports.extractDomain = _mod_0gzglg.extractDomain;;
|
|
7
|
+
const _mod_rst962 = require('./types.cjs');
|
|
8
|
+
exports.Priority = _mod_rst962.Priority;
|
|
9
|
+
exports.HttpMethodPriority = _mod_rst962.HttpMethodPriority;;
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.UniversalEventEmitter =
|
|
3
|
-
const
|
|
4
|
-
exports.UniversalStreamResponse =
|
|
5
|
-
exports.StreamResponse =
|
|
6
|
-
const
|
|
7
|
-
exports.UniversalDownloadResponse =
|
|
8
|
-
exports.DownloadResponse =
|
|
9
|
-
const
|
|
10
|
-
exports.UniversalUploadResponse =
|
|
11
|
-
exports.UploadResponse =
|
|
1
|
+
const _mod_0qnk0d = require('./event-emitter.cjs');
|
|
2
|
+
exports.UniversalEventEmitter = _mod_0qnk0d.UniversalEventEmitter;;
|
|
3
|
+
const _mod_y2ec7n = require('./stream.cjs');
|
|
4
|
+
exports.UniversalStreamResponse = _mod_y2ec7n.UniversalStreamResponse;
|
|
5
|
+
exports.StreamResponse = _mod_y2ec7n.StreamResponse;;
|
|
6
|
+
const _mod_6hn0du = require('./download.cjs');
|
|
7
|
+
exports.UniversalDownloadResponse = _mod_6hn0du.UniversalDownloadResponse;
|
|
8
|
+
exports.DownloadResponse = _mod_6hn0du.DownloadResponse;;
|
|
9
|
+
const _mod_hi9myu = require('./upload.cjs');
|
|
10
|
+
exports.UniversalUploadResponse = _mod_hi9myu.UniversalUploadResponse;
|
|
11
|
+
exports.UploadResponse = _mod_hi9myu.UploadResponse;;
|
package/dist/stealth/index.cjs
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.RezoStealth =
|
|
3
|
-
const
|
|
4
|
-
exports.createSecureContext =
|
|
5
|
-
exports.buildTlsOptions =
|
|
6
|
-
const
|
|
7
|
-
exports.resolveProfile =
|
|
8
|
-
exports.detectProfileFromUserAgent =
|
|
9
|
-
const
|
|
10
|
-
exports.getProfile =
|
|
11
|
-
exports.getProfilesByFamily =
|
|
12
|
-
exports.getProfilesByDevice =
|
|
13
|
-
exports.getRandomProfile =
|
|
14
|
-
exports.getRandomProfileByFamily =
|
|
15
|
-
exports.listProfiles =
|
|
16
|
-
exports.listProfilesByFamily =
|
|
17
|
-
exports.PROFILE_REGISTRY =
|
|
1
|
+
const _mod_lb9c38 = require('./stealth.cjs');
|
|
2
|
+
exports.RezoStealth = _mod_lb9c38.RezoStealth;;
|
|
3
|
+
const _mod_25yxla = require('./tls-fingerprint.cjs');
|
|
4
|
+
exports.createSecureContext = _mod_25yxla.createSecureContext;
|
|
5
|
+
exports.buildTlsOptions = _mod_25yxla.buildTlsOptions;;
|
|
6
|
+
const _mod_iuoyzn = require('./resolver.cjs');
|
|
7
|
+
exports.resolveProfile = _mod_iuoyzn.resolveProfile;
|
|
8
|
+
exports.detectProfileFromUserAgent = _mod_iuoyzn.detectProfileFromUserAgent;;
|
|
9
|
+
const _mod_asoox9 = require('./profiles/index.cjs');
|
|
10
|
+
exports.getProfile = _mod_asoox9.getProfile;
|
|
11
|
+
exports.getProfilesByFamily = _mod_asoox9.getProfilesByFamily;
|
|
12
|
+
exports.getProfilesByDevice = _mod_asoox9.getProfilesByDevice;
|
|
13
|
+
exports.getRandomProfile = _mod_asoox9.getRandomProfile;
|
|
14
|
+
exports.getRandomProfileByFamily = _mod_asoox9.getRandomProfileByFamily;
|
|
15
|
+
exports.listProfiles = _mod_asoox9.listProfiles;
|
|
16
|
+
exports.listProfilesByFamily = _mod_asoox9.listProfilesByFamily;
|
|
17
|
+
exports.PROFILE_REGISTRY = _mod_asoox9.PROFILE_REGISTRY;;
|
|
@@ -43,16 +43,16 @@ function getRandomProfileByFamily(family) {
|
|
|
43
43
|
throw new Error(`No profiles found for family: ${family}`);
|
|
44
44
|
return profiles[Math.floor(Math.random() * profiles.length)];
|
|
45
45
|
}
|
|
46
|
-
const
|
|
47
|
-
exports.expandPseudoOrder =
|
|
48
|
-
const
|
|
49
|
-
exports.CHROME_PROFILES =
|
|
50
|
-
const
|
|
51
|
-
exports.FIREFOX_PROFILES =
|
|
52
|
-
const
|
|
53
|
-
exports.SAFARI_PROFILES =
|
|
54
|
-
const
|
|
55
|
-
exports.EDGE_PROFILES =
|
|
46
|
+
const _mod_eo8qle = require('./constants.cjs');
|
|
47
|
+
exports.expandPseudoOrder = _mod_eo8qle.expandPseudoOrder;;
|
|
48
|
+
const _mod_6wzedx = require('./chrome-profiles.cjs');
|
|
49
|
+
exports.CHROME_PROFILES = _mod_6wzedx.CHROME_PROFILES;;
|
|
50
|
+
const _mod_x85qd3 = require('./firefox-profiles.cjs');
|
|
51
|
+
exports.FIREFOX_PROFILES = _mod_x85qd3.FIREFOX_PROFILES;;
|
|
52
|
+
const _mod_fqnol1 = require('./safari-profiles.cjs');
|
|
53
|
+
exports.SAFARI_PROFILES = _mod_fqnol1.SAFARI_PROFILES;;
|
|
54
|
+
const _mod_zu0qne = require('./edge-profiles.cjs');
|
|
55
|
+
exports.EDGE_PROFILES = _mod_zu0qne.EDGE_PROFILES;;
|
|
56
56
|
|
|
57
57
|
exports.getProfile = getProfile;
|
|
58
58
|
exports.getProfilesByFamily = getProfilesByFamily;
|
package/dist/version.cjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
const VERSION = exports.VERSION = "1.0.
|
|
1
|
+
const VERSION = exports.VERSION = "1.0.103";
|
|
2
2
|
const PACKAGE_NAME = exports.PACKAGE_NAME = "rezo";
|
package/dist/version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export const VERSION = "1.0.
|
|
1
|
+
export const VERSION = "1.0.103";
|
|
2
2
|
export const PACKAGE_NAME = "rezo";
|
package/dist/wget/index.cjs
CHANGED
|
@@ -1,54 +1,54 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.WgetError =
|
|
3
|
-
const
|
|
4
|
-
exports.AssetExtractor =
|
|
5
|
-
const
|
|
6
|
-
exports.UrlFilter =
|
|
7
|
-
const
|
|
8
|
-
exports.FileWriter =
|
|
9
|
-
const
|
|
10
|
-
exports.RobotsHandler =
|
|
11
|
-
const
|
|
12
|
-
exports.ResumeHandler =
|
|
13
|
-
const
|
|
14
|
-
exports.ProgressReporter =
|
|
15
|
-
exports.ProgressTracker =
|
|
16
|
-
exports.parseSize =
|
|
17
|
-
const
|
|
18
|
-
exports.LinkConverter =
|
|
19
|
-
const
|
|
20
|
-
exports.StyleExtractor =
|
|
21
|
-
const
|
|
22
|
-
exports.Downloader =
|
|
23
|
-
const
|
|
24
|
-
exports.AssetOrganizer =
|
|
25
|
-
exports.DEFAULT_ASSET_FOLDERS =
|
|
26
|
-
const
|
|
27
|
-
exports.DownloadCache =
|
|
28
|
-
const
|
|
29
|
-
exports.EXECUTABLE_EXTENSIONS =
|
|
30
|
-
exports.ARCHIVE_EXTENSIONS =
|
|
31
|
-
exports.DOCUMENT_EXTENSIONS =
|
|
32
|
-
exports.IMAGE_EXTENSIONS =
|
|
33
|
-
exports.VIDEO_EXTENSIONS =
|
|
34
|
-
exports.AUDIO_EXTENSIONS =
|
|
35
|
-
exports.FONT_EXTENSIONS =
|
|
36
|
-
exports.WEB_ASSET_EXTENSIONS =
|
|
37
|
-
exports.DATA_EXTENSIONS =
|
|
38
|
-
exports.EXECUTABLE_MIME_TYPES =
|
|
39
|
-
exports.ARCHIVE_MIME_TYPES =
|
|
40
|
-
exports.DOCUMENT_MIME_TYPES =
|
|
41
|
-
exports.IMAGE_MIME_TYPES =
|
|
42
|
-
exports.VIDEO_MIME_TYPES =
|
|
43
|
-
exports.AUDIO_MIME_TYPES =
|
|
44
|
-
exports.FONT_MIME_TYPES =
|
|
45
|
-
exports.WEB_ASSET_MIME_TYPES =
|
|
46
|
-
exports.DATA_MIME_TYPES =
|
|
47
|
-
exports.SAFE_WEB_PRESET =
|
|
48
|
-
exports.DOCUMENTS_ONLY_PRESET =
|
|
49
|
-
exports.NO_MEDIA_PRESET =
|
|
50
|
-
exports.MINIMAL_MIRROR_PRESET =
|
|
51
|
-
exports.TEXT_ONLY_PRESET =
|
|
1
|
+
const _mod_4cn6wv = require('./types.cjs');
|
|
2
|
+
exports.WgetError = _mod_4cn6wv.WgetError;;
|
|
3
|
+
const _mod_egjoud = require('./asset-extractor.cjs');
|
|
4
|
+
exports.AssetExtractor = _mod_egjoud.AssetExtractor;;
|
|
5
|
+
const _mod_re94hs = require('./url-filter.cjs');
|
|
6
|
+
exports.UrlFilter = _mod_re94hs.UrlFilter;;
|
|
7
|
+
const _mod_m0jacj = require('./file-writer.cjs');
|
|
8
|
+
exports.FileWriter = _mod_m0jacj.FileWriter;;
|
|
9
|
+
const _mod_3bg6d3 = require('./robots.cjs');
|
|
10
|
+
exports.RobotsHandler = _mod_3bg6d3.RobotsHandler;;
|
|
11
|
+
const _mod_v6zi0p = require('./resume.cjs');
|
|
12
|
+
exports.ResumeHandler = _mod_v6zi0p.ResumeHandler;;
|
|
13
|
+
const _mod_y7oler = require('./progress.cjs');
|
|
14
|
+
exports.ProgressReporter = _mod_y7oler.ProgressReporter;
|
|
15
|
+
exports.ProgressTracker = _mod_y7oler.ProgressTracker;
|
|
16
|
+
exports.parseSize = _mod_y7oler.parseSize;;
|
|
17
|
+
const _mod_mh0crx = require('./link-converter.cjs');
|
|
18
|
+
exports.LinkConverter = _mod_mh0crx.LinkConverter;;
|
|
19
|
+
const _mod_be1ijk = require('./style-extractor.cjs');
|
|
20
|
+
exports.StyleExtractor = _mod_be1ijk.StyleExtractor;;
|
|
21
|
+
const _mod_2jcf4q = require('./downloader.cjs');
|
|
22
|
+
exports.Downloader = _mod_2jcf4q.Downloader;;
|
|
23
|
+
const _mod_v1l9cr = require('./asset-organizer.cjs');
|
|
24
|
+
exports.AssetOrganizer = _mod_v1l9cr.AssetOrganizer;
|
|
25
|
+
exports.DEFAULT_ASSET_FOLDERS = _mod_v1l9cr.DEFAULT_ASSET_FOLDERS;;
|
|
26
|
+
const _mod_zo6uf5 = require('./download-cache.cjs');
|
|
27
|
+
exports.DownloadCache = _mod_zo6uf5.DownloadCache;;
|
|
28
|
+
const _mod_f0yq16 = require('./filter-lists.cjs');
|
|
29
|
+
exports.EXECUTABLE_EXTENSIONS = _mod_f0yq16.EXECUTABLE_EXTENSIONS;
|
|
30
|
+
exports.ARCHIVE_EXTENSIONS = _mod_f0yq16.ARCHIVE_EXTENSIONS;
|
|
31
|
+
exports.DOCUMENT_EXTENSIONS = _mod_f0yq16.DOCUMENT_EXTENSIONS;
|
|
32
|
+
exports.IMAGE_EXTENSIONS = _mod_f0yq16.IMAGE_EXTENSIONS;
|
|
33
|
+
exports.VIDEO_EXTENSIONS = _mod_f0yq16.VIDEO_EXTENSIONS;
|
|
34
|
+
exports.AUDIO_EXTENSIONS = _mod_f0yq16.AUDIO_EXTENSIONS;
|
|
35
|
+
exports.FONT_EXTENSIONS = _mod_f0yq16.FONT_EXTENSIONS;
|
|
36
|
+
exports.WEB_ASSET_EXTENSIONS = _mod_f0yq16.WEB_ASSET_EXTENSIONS;
|
|
37
|
+
exports.DATA_EXTENSIONS = _mod_f0yq16.DATA_EXTENSIONS;
|
|
38
|
+
exports.EXECUTABLE_MIME_TYPES = _mod_f0yq16.EXECUTABLE_MIME_TYPES;
|
|
39
|
+
exports.ARCHIVE_MIME_TYPES = _mod_f0yq16.ARCHIVE_MIME_TYPES;
|
|
40
|
+
exports.DOCUMENT_MIME_TYPES = _mod_f0yq16.DOCUMENT_MIME_TYPES;
|
|
41
|
+
exports.IMAGE_MIME_TYPES = _mod_f0yq16.IMAGE_MIME_TYPES;
|
|
42
|
+
exports.VIDEO_MIME_TYPES = _mod_f0yq16.VIDEO_MIME_TYPES;
|
|
43
|
+
exports.AUDIO_MIME_TYPES = _mod_f0yq16.AUDIO_MIME_TYPES;
|
|
44
|
+
exports.FONT_MIME_TYPES = _mod_f0yq16.FONT_MIME_TYPES;
|
|
45
|
+
exports.WEB_ASSET_MIME_TYPES = _mod_f0yq16.WEB_ASSET_MIME_TYPES;
|
|
46
|
+
exports.DATA_MIME_TYPES = _mod_f0yq16.DATA_MIME_TYPES;
|
|
47
|
+
exports.SAFE_WEB_PRESET = _mod_f0yq16.SAFE_WEB_PRESET;
|
|
48
|
+
exports.DOCUMENTS_ONLY_PRESET = _mod_f0yq16.DOCUMENTS_ONLY_PRESET;
|
|
49
|
+
exports.NO_MEDIA_PRESET = _mod_f0yq16.NO_MEDIA_PRESET;
|
|
50
|
+
exports.MINIMAL_MIRROR_PRESET = _mod_f0yq16.MINIMAL_MIRROR_PRESET;
|
|
51
|
+
exports.TEXT_ONLY_PRESET = _mod_f0yq16.TEXT_ONLY_PRESET;;
|
|
52
52
|
const { Downloader } = require('./downloader.cjs');
|
|
53
53
|
const rezo = require('../index.cjs');
|
|
54
54
|
const { promises: fs } = require("node:fs");
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rezo",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.103",
|
|
4
4
|
"description": "Lightning-fast, enterprise-grade HTTP client for modern JavaScript. Full HTTP/2 support, intelligent cookie management, multiple adapters (HTTP, Fetch, cURL, XHR), streaming, proxy support (HTTP/HTTPS/SOCKS), and cross-environment compatibility.",
|
|
5
5
|
"main": "dist/index.cjs",
|
|
6
6
|
"module": "dist/index.js",
|