rezo 1.0.55 → 1.0.57
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/index.cjs +6 -6
- package/dist/cache/index.cjs +9 -9
- package/dist/crawler/crawler-options.cjs +1 -1
- package/dist/crawler/crawler-options.js +1 -1
- package/dist/crawler/crawler.cjs +18 -0
- package/dist/crawler/crawler.js +18 -0
- package/dist/crawler/index.cjs +40 -40
- package/dist/crawler.d.ts +23 -1
- package/dist/entries/crawler.cjs +4 -5
- package/dist/entries/crawler.js +1 -1
- package/dist/index.cjs +27 -27
- package/dist/internal/agents/index.cjs +10 -10
- package/dist/proxy/index.cjs +4 -4
- package/dist/queue/index.cjs +8 -8
- package/dist/responses/universal/index.cjs +11 -11
- package/package.json +1 -1
package/dist/adapters/index.cjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.detectRuntime =
|
|
3
|
-
exports.getAdapterCapabilities =
|
|
4
|
-
exports.buildAdapterContext =
|
|
5
|
-
exports.getAvailableAdapters =
|
|
6
|
-
exports.selectAdapter =
|
|
1
|
+
const _mod_w274ym = require('./picker.cjs');
|
|
2
|
+
exports.detectRuntime = _mod_w274ym.detectRuntime;
|
|
3
|
+
exports.getAdapterCapabilities = _mod_w274ym.getAdapterCapabilities;
|
|
4
|
+
exports.buildAdapterContext = _mod_w274ym.buildAdapterContext;
|
|
5
|
+
exports.getAvailableAdapters = _mod_w274ym.getAvailableAdapters;
|
|
6
|
+
exports.selectAdapter = _mod_w274ym.selectAdapter;;
|
package/dist/cache/index.cjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.LRUCache =
|
|
3
|
-
const
|
|
4
|
-
exports.DNSCache =
|
|
5
|
-
exports.getGlobalDNSCache =
|
|
6
|
-
exports.resetGlobalDNSCache =
|
|
7
|
-
const
|
|
8
|
-
exports.ResponseCache =
|
|
9
|
-
exports.normalizeResponseCacheConfig =
|
|
1
|
+
const _mod_n2z5g1 = require('./lru-cache.cjs');
|
|
2
|
+
exports.LRUCache = _mod_n2z5g1.LRUCache;;
|
|
3
|
+
const _mod_l1roxo = require('./dns-cache.cjs');
|
|
4
|
+
exports.DNSCache = _mod_l1roxo.DNSCache;
|
|
5
|
+
exports.getGlobalDNSCache = _mod_l1roxo.getGlobalDNSCache;
|
|
6
|
+
exports.resetGlobalDNSCache = _mod_l1roxo.resetGlobalDNSCache;;
|
|
7
|
+
const _mod_fbczja = require('./response-cache.cjs');
|
|
8
|
+
exports.ResponseCache = _mod_fbczja.ResponseCache;
|
|
9
|
+
exports.normalizeResponseCacheConfig = _mod_fbczja.normalizeResponseCacheConfig;;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
var{RezoQueue:m}=require("../queue/queue.cjs"),{Oxylabs:x}=require("./addon/oxylabs/index.cjs"),b=require("node:path"),g=require("node:os"),{Decodo:y}=require("./addon/decodo/index.cjs");class f{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??b.join(g.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,o,r)=>r.indexOf(i)===o)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((i,o)=>i===t[o]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((i)=>i.isGlobal).length,domainSpecific:t.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:i,isGlobal:o,headers:r}=t;if(!i&&!o)continue;if(r instanceof Headers){let s=Object.fromEntries(r.entries());if(Object.keys(s).length<1)continue;r=s}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:i,isGlobal:o,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:i,isGlobal:o,proxy:r}=t;if(!i&&!o)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:i,isGlobal:o,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:i,isGlobal:o,options:r}=t;if(!i&&!o)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:i,isGlobal:o,pqueue:new m(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:o,options:r,queueOptions:s}=t;if(!i&&!o)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:i,isGlobal:o,adaptar:new x(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:o,options:r,queueOptions:s}=t;if(!i&&!o)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:i,isGlobal:o,adaptar:new y(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,i,o){if(!this.getDomainName(e))return null;let s=[],a=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let n=0;n<a.length;n++)if(this._hasDomain(e,a[n].domain))s.push(n);if(s.length){let n=o?s[this.rnd(0,s.length-1)]:s[0];return t==="headers"?this.requestHeaders[n].headers:t==="limiters"?this.limiters[n].pqueue:t==="oxylabs"?this.oxylabs[n].adaptar:t==="decodo"?this.decodo[n].adaptar:this.proxies[n].proxy}s.length=0;for(let n=0;n<a.length;n++)s.push(n);if(s.length){let n=o?s[this.rnd(0,s.length-1)]:s[0];if(a[n].isGlobal&&i)return t==="headers"?this.requestHeaders[n].headers:t==="limiters"?this.limiters[n].pqueue:t==="oxylabs"?this.oxylabs[n].adaptar:t==="decodo"?this.decodo[n].adaptar:this.proxies[n].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,i){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let s=0;s<r.length;s++)if(this._hasDomain(e,r[s].domain))return!0;if(i){for(let s=0;s<r.length;s++)if(r[s].isGlobal)return!0}return!1}pickHeaders(e,t,i,o){let r=this.getAdapter(e,"headers",t),s=new Headers(r??{});if(i&&i instanceof Headers)for(let[a,n]of Object.entries(i.entries()))s.set(a,n);else if(i&&typeof i==="object"){for(let[a,n]of Object.entries(i))if(typeof n==="string")s.set(a,n)}if(o)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,t){if(!t)return!1;let i=this.getDomainName(e);if(!i)return!1;let o=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},r=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let a=s.toString().trim();if(i.toLowerCase()===a.toLowerCase())return!0;if(a.includes("*")){let l=a.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(i)||h.test(e)}if(o(a))try{let l=a,h="i",u=a.match(/^\/(.*)\/(\w*)$/);if(u)l=u[1],h=u[2]||"i";let c=new RegExp(l,h);return c.test(i)||c.test(e)}catch(l){return i.toLowerCase().includes(a.toLowerCase())}let n=i.toLowerCase(),d=a.toLowerCase();return n===d||n.endsWith("."+d)||d.endsWith("."+n)};if(Array.isArray(t)){for(let s of t)if(r(s))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let o=0;o<200;o++){let r=e[Math.floor(Math.random()*e.length)],s=t[Math.floor(Math.random()*t.length)],a="";switch(r.name){case"Chrome":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":a=`Mozilla/5.0 (${s}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}i.push(a)}return i}exports.CrawlerOptions=f;
|
|
1
|
+
var{RezoQueue:m}=require("../queue/queue.cjs"),{Oxylabs:x}=require("./addon/oxylabs/index.cjs"),b=require("node:path"),g=require("node:os"),{Decodo:y}=require("./addon/decodo/index.cjs");class f{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??b.join(g.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,n,r)=>r.indexOf(i)===n)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((i,n)=>i===t[n]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((i)=>i.isGlobal).length,domainSpecific:t.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:i,isGlobal:n,headers:r}=t;if(!i&&!n)continue;if(r instanceof Headers){let s=Object.fromEntries(r.entries());if(Object.keys(s).length<1)continue;r=s}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:i,isGlobal:n,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:i,isGlobal:n,proxy:r}=t;if(!i&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:i,isGlobal:n,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:i,isGlobal:n,options:r}=t;if(!i&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:i,isGlobal:n,pqueue:new m(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:n,options:r,queueOptions:s}=t;if(!i&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:i,isGlobal:n,adaptar:new x(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:n,options:r,queueOptions:s}=t;if(!i&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:i,isGlobal:n,adaptar:new y(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}destroyLimiters(){for(let e of this.limiters)if(e.pqueue&&typeof e.pqueue.destroy==="function")e.pqueue.destroy();this.limiters=[]}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,i,n){if(!this.getDomainName(e))return null;let s=[],a=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let o=0;o<a.length;o++)if(this._hasDomain(e,a[o].domain))s.push(o);if(s.length){let o=n?s[this.rnd(0,s.length-1)]:s[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}s.length=0;for(let o=0;o<a.length;o++)s.push(o);if(s.length){let o=n?s[this.rnd(0,s.length-1)]:s[0];if(a[o].isGlobal&&i)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,i){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let s=0;s<r.length;s++)if(this._hasDomain(e,r[s].domain))return!0;if(i){for(let s=0;s<r.length;s++)if(r[s].isGlobal)return!0}return!1}pickHeaders(e,t,i,n){let r=this.getAdapter(e,"headers",t),s=new Headers(r??{});if(i&&i instanceof Headers)for(let[a,o]of Object.entries(i.entries()))s.set(a,o);else if(i&&typeof i==="object"){for(let[a,o]of Object.entries(i))if(typeof o==="string")s.set(a,o)}if(n)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,t){if(!t)return!1;let i=this.getDomainName(e);if(!i)return!1;let n=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},r=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let a=s.toString().trim();if(i.toLowerCase()===a.toLowerCase())return!0;if(a.includes("*")){let l=a.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(i)||h.test(e)}if(n(a))try{let l=a,h="i",u=a.match(/^\/(.*)\/(\w*)$/);if(u)l=u[1],h=u[2]||"i";let c=new RegExp(l,h);return c.test(i)||c.test(e)}catch(l){return i.toLowerCase().includes(a.toLowerCase())}let o=i.toLowerCase(),d=a.toLowerCase();return o===d||o.endsWith("."+d)||d.endsWith("."+o)};if(Array.isArray(t)){for(let s of t)if(r(s))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let n=0;n<200;n++){let r=e[Math.floor(Math.random()*e.length)],s=t[Math.floor(Math.random()*t.length)],a="";switch(r.name){case"Chrome":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":a=`Mozilla/5.0 (${s}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}i.push(a)}return i}exports.CrawlerOptions=f;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{RezoQueue as m}from"../queue/queue.js";import{Oxylabs as f}from"./addon/oxylabs/index.js";import x from"node:path";import b from"node:os";import{Decodo as g}from"./addon/decodo/index.js";class y{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??x.join(b.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((s)=>s.domain).map((s)=>s.domain).filter((s,n,r)=>r.indexOf(s)===n)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((s,n)=>s===t[n]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((s)=>s.isGlobal).length,domainSpecific:t.filter((s)=>!s.isGlobal&&s.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:s,isGlobal:n,headers:r}=t;if(!s&&!n)continue;if(r instanceof Headers){let i=Object.fromEntries(r.entries());if(Object.keys(i).length<1)continue;r=i}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:s,isGlobal:n,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:s,isGlobal:n,proxy:r}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:s,isGlobal:n,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:s,isGlobal:n,options:r}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:s,isGlobal:n,pqueue:new m(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:n,options:r,queueOptions:i}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:s,isGlobal:n,adaptar:new f(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:n,options:r,queueOptions:i}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:s,isGlobal:n,adaptar:new g(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,s,n){if(!this.getDomainName(e))return null;let i=[],a=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let o=0;o<a.length;o++)if(this._hasDomain(e,a[o].domain))i.push(o);if(i.length){let o=n?i[this.rnd(0,i.length-1)]:i[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}i.length=0;for(let o=0;o<a.length;o++)i.push(o);if(i.length){let o=n?i[this.rnd(0,i.length-1)]:i[0];if(a[o].isGlobal&&s)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,s){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let i=0;i<r.length;i++)if(this._hasDomain(e,r[i].domain))return!0;if(s){for(let i=0;i<r.length;i++)if(r[i].isGlobal)return!0}return!1}pickHeaders(e,t,s,n){let r=this.getAdapter(e,"headers",t),i=new Headers(r??{});if(s&&s instanceof Headers)for(let[a,o]of Object.entries(s.entries()))i.set(a,o);else if(s&&typeof s==="object"){for(let[a,o]of Object.entries(s))if(typeof o==="string")i.set(a,o)}if(n)i.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(i.entries())}_hasDomain(e,t){if(!t)return!1;let s=this.getDomainName(e);if(!s)return!1;let n=(i)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(i)||i.startsWith("/")||i.includes(".*")||i.includes(".+")},r=(i)=>{if(i instanceof RegExp)return i.test(s)||i.test(e);let a=i.toString().trim();if(s.toLowerCase()===a.toLowerCase())return!0;if(a.includes("*")){let l=a.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(s)||h.test(e)}if(n(a))try{let l=a,h="i",u=a.match(/^\/(.*)\/(\w*)$/);if(u)l=u[1],h=u[2]||"i";let c=new RegExp(l,h);return c.test(s)||c.test(e)}catch(l){return s.toLowerCase().includes(a.toLowerCase())}let o=s.toLowerCase(),d=a.toLowerCase();return o===d||o.endsWith("."+d)||d.endsWith("."+o)};if(Array.isArray(t)){for(let i of t)if(r(i))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],s=[];for(let n=0;n<200;n++){let r=e[Math.floor(Math.random()*e.length)],i=t[Math.floor(Math.random()*t.length)],a="";switch(r.name){case"Chrome":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":a=`Mozilla/5.0 (${i}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}s.push(a)}return s}export{y as CrawlerOptions};
|
|
1
|
+
import{RezoQueue as m}from"../queue/queue.js";import{Oxylabs as f}from"./addon/oxylabs/index.js";import x from"node:path";import b from"node:os";import{Decodo as g}from"./addon/decodo/index.js";class y{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??x.join(b.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((s)=>s.domain).map((s)=>s.domain).filter((s,n,r)=>r.indexOf(s)===n)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((s,n)=>s===t[n]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((s)=>s.isGlobal).length,domainSpecific:t.filter((s)=>!s.isGlobal&&s.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:s,isGlobal:n,headers:r}=t;if(!s&&!n)continue;if(r instanceof Headers){let i=Object.fromEntries(r.entries());if(Object.keys(i).length<1)continue;r=i}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:s,isGlobal:n,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:s,isGlobal:n,proxy:r}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:s,isGlobal:n,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:s,isGlobal:n,options:r}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:s,isGlobal:n,pqueue:new m(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:n,options:r,queueOptions:i}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:s,isGlobal:n,adaptar:new f(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:n,options:r,queueOptions:i}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:s,isGlobal:n,adaptar:new g(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}destroyLimiters(){for(let e of this.limiters)if(e.pqueue&&typeof e.pqueue.destroy==="function")e.pqueue.destroy();this.limiters=[]}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,s,n){if(!this.getDomainName(e))return null;let i=[],a=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let o=0;o<a.length;o++)if(this._hasDomain(e,a[o].domain))i.push(o);if(i.length){let o=n?i[this.rnd(0,i.length-1)]:i[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}i.length=0;for(let o=0;o<a.length;o++)i.push(o);if(i.length){let o=n?i[this.rnd(0,i.length-1)]:i[0];if(a[o].isGlobal&&s)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,s){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let i=0;i<r.length;i++)if(this._hasDomain(e,r[i].domain))return!0;if(s){for(let i=0;i<r.length;i++)if(r[i].isGlobal)return!0}return!1}pickHeaders(e,t,s,n){let r=this.getAdapter(e,"headers",t),i=new Headers(r??{});if(s&&s instanceof Headers)for(let[a,o]of Object.entries(s.entries()))i.set(a,o);else if(s&&typeof s==="object"){for(let[a,o]of Object.entries(s))if(typeof o==="string")i.set(a,o)}if(n)i.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(i.entries())}_hasDomain(e,t){if(!t)return!1;let s=this.getDomainName(e);if(!s)return!1;let n=(i)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(i)||i.startsWith("/")||i.includes(".*")||i.includes(".+")},r=(i)=>{if(i instanceof RegExp)return i.test(s)||i.test(e);let a=i.toString().trim();if(s.toLowerCase()===a.toLowerCase())return!0;if(a.includes("*")){let l=a.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(s)||h.test(e)}if(n(a))try{let l=a,h="i",u=a.match(/^\/(.*)\/(\w*)$/);if(u)l=u[1],h=u[2]||"i";let c=new RegExp(l,h);return c.test(s)||c.test(e)}catch(l){return s.toLowerCase().includes(a.toLowerCase())}let o=s.toLowerCase(),d=a.toLowerCase();return o===d||o.endsWith("."+d)||d.endsWith("."+o)};if(Array.isArray(t)){for(let i of t)if(r(i))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],s=[];for(let n=0;n<200;n++){let r=e[Math.floor(Math.random()*e.length)],i=t[Math.floor(Math.random()*t.length)],a="";switch(r.name){case"Chrome":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":a=`Mozilla/5.0 (${i}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}s.push(a)}return s}export{y as CrawlerOptions};
|
package/dist/crawler/crawler.cjs
CHANGED
|
@@ -57,6 +57,8 @@ class Crawler {
|
|
|
57
57
|
adapterExecutor = null;
|
|
58
58
|
adapterType;
|
|
59
59
|
pendingExecutions = new Set;
|
|
60
|
+
isDestroyed = false;
|
|
61
|
+
queueOptions = { concurrency: 1000 };
|
|
60
62
|
robotsTxt;
|
|
61
63
|
domainResponseTimes = new Map;
|
|
62
64
|
domainCurrentDelay = new Map;
|
|
@@ -159,6 +161,16 @@ class Crawler {
|
|
|
159
161
|
}
|
|
160
162
|
}
|
|
161
163
|
}
|
|
164
|
+
ensureActive() {
|
|
165
|
+
if (!this.isDestroyed)
|
|
166
|
+
return;
|
|
167
|
+
this.queue = new RezoQueue(this.queueOptions);
|
|
168
|
+
this.pendingExecutions.clear();
|
|
169
|
+
this.isDestroyed = false;
|
|
170
|
+
if (this.config.debug) {
|
|
171
|
+
console.log("[Crawler] Restored from destroyed state");
|
|
172
|
+
}
|
|
173
|
+
}
|
|
162
174
|
async initializeNavigationHistory(navHistoryDir) {
|
|
163
175
|
try {
|
|
164
176
|
const history = await NavigationHistory.create({
|
|
@@ -233,6 +245,7 @@ class Crawler {
|
|
|
233
245
|
return this.config.sessionId;
|
|
234
246
|
}
|
|
235
247
|
async resume(sessionId) {
|
|
248
|
+
this.ensureActive();
|
|
236
249
|
if (!this.config.enableNavigationHistory) {
|
|
237
250
|
throw new Error("Navigation history is not enabled. Set enableNavigationHistory: true in options.");
|
|
238
251
|
}
|
|
@@ -799,6 +812,7 @@ class Crawler {
|
|
|
799
812
|
return url;
|
|
800
813
|
}
|
|
801
814
|
visit(url, options) {
|
|
815
|
+
this.ensureActive();
|
|
802
816
|
if (this.config.baseUrl)
|
|
803
817
|
url = new URL(url, this.config.baseUrl).href;
|
|
804
818
|
if (options?.params && (options.useOxylabsScraperAi || this.config.hasDomain(url, "oxylabs"))) {
|
|
@@ -1077,7 +1091,11 @@ class Crawler {
|
|
|
1077
1091
|
} catch {}
|
|
1078
1092
|
}
|
|
1079
1093
|
async destroy() {
|
|
1094
|
+
if (this.isDestroyed)
|
|
1095
|
+
return;
|
|
1096
|
+
this.isDestroyed = true;
|
|
1080
1097
|
this.queue.destroy();
|
|
1098
|
+
this.config.destroyLimiters();
|
|
1081
1099
|
this.events.length = 0;
|
|
1082
1100
|
this.jsonEvents.length = 0;
|
|
1083
1101
|
this.errorEvents.length = 0;
|
package/dist/crawler/crawler.js
CHANGED
|
@@ -57,6 +57,8 @@ export class Crawler {
|
|
|
57
57
|
adapterExecutor = null;
|
|
58
58
|
adapterType;
|
|
59
59
|
pendingExecutions = new Set;
|
|
60
|
+
isDestroyed = false;
|
|
61
|
+
queueOptions = { concurrency: 1000 };
|
|
60
62
|
robotsTxt;
|
|
61
63
|
domainResponseTimes = new Map;
|
|
62
64
|
domainCurrentDelay = new Map;
|
|
@@ -159,6 +161,16 @@ export class Crawler {
|
|
|
159
161
|
}
|
|
160
162
|
}
|
|
161
163
|
}
|
|
164
|
+
ensureActive() {
|
|
165
|
+
if (!this.isDestroyed)
|
|
166
|
+
return;
|
|
167
|
+
this.queue = new RezoQueue(this.queueOptions);
|
|
168
|
+
this.pendingExecutions.clear();
|
|
169
|
+
this.isDestroyed = false;
|
|
170
|
+
if (this.config.debug) {
|
|
171
|
+
console.log("[Crawler] Restored from destroyed state");
|
|
172
|
+
}
|
|
173
|
+
}
|
|
162
174
|
async initializeNavigationHistory(navHistoryDir) {
|
|
163
175
|
try {
|
|
164
176
|
const history = await NavigationHistory.create({
|
|
@@ -233,6 +245,7 @@ export class Crawler {
|
|
|
233
245
|
return this.config.sessionId;
|
|
234
246
|
}
|
|
235
247
|
async resume(sessionId) {
|
|
248
|
+
this.ensureActive();
|
|
236
249
|
if (!this.config.enableNavigationHistory) {
|
|
237
250
|
throw new Error("Navigation history is not enabled. Set enableNavigationHistory: true in options.");
|
|
238
251
|
}
|
|
@@ -799,6 +812,7 @@ export class Crawler {
|
|
|
799
812
|
return url;
|
|
800
813
|
}
|
|
801
814
|
visit(url, options) {
|
|
815
|
+
this.ensureActive();
|
|
802
816
|
if (this.config.baseUrl)
|
|
803
817
|
url = new URL(url, this.config.baseUrl).href;
|
|
804
818
|
if (options?.params && (options.useOxylabsScraperAi || this.config.hasDomain(url, "oxylabs"))) {
|
|
@@ -1077,7 +1091,11 @@ export class Crawler {
|
|
|
1077
1091
|
} catch {}
|
|
1078
1092
|
}
|
|
1079
1093
|
async destroy() {
|
|
1094
|
+
if (this.isDestroyed)
|
|
1095
|
+
return;
|
|
1096
|
+
this.isDestroyed = true;
|
|
1080
1097
|
this.queue.destroy();
|
|
1098
|
+
this.config.destroyLimiters();
|
|
1081
1099
|
this.events.length = 0;
|
|
1082
1100
|
this.jsonEvents.length = 0;
|
|
1083
1101
|
this.errorEvents.length = 0;
|
package/dist/crawler/index.cjs
CHANGED
|
@@ -1,40 +1,40 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Crawler =
|
|
3
|
-
const
|
|
4
|
-
exports.CrawlerOptions =
|
|
5
|
-
const
|
|
6
|
-
exports.RobotsTxt =
|
|
7
|
-
const
|
|
8
|
-
exports.FileCacher =
|
|
9
|
-
const
|
|
10
|
-
exports.UrlStore =
|
|
11
|
-
const
|
|
12
|
-
exports.NavigationHistory =
|
|
13
|
-
const
|
|
14
|
-
exports.Oxylabs =
|
|
15
|
-
const
|
|
16
|
-
exports.OXYLABS_BROWSER_TYPES =
|
|
17
|
-
exports.OXYLABS_COMMON_LOCALES =
|
|
18
|
-
exports.OXYLABS_COMMON_GEO_LOCATIONS =
|
|
19
|
-
exports.OXYLABS_US_STATES =
|
|
20
|
-
exports.OXYLABS_EUROPEAN_COUNTRIES =
|
|
21
|
-
exports.OXYLABS_ASIAN_COUNTRIES =
|
|
22
|
-
exports.getRandomOxylabsBrowserType =
|
|
23
|
-
exports.getRandomOxylabsLocale =
|
|
24
|
-
exports.getRandomOxylabsGeoLocation =
|
|
25
|
-
const
|
|
26
|
-
exports.Decodo =
|
|
27
|
-
const
|
|
28
|
-
exports.DECODO_DEVICE_TYPES =
|
|
29
|
-
exports.DECODO_HEADLESS_MODES =
|
|
30
|
-
exports.DECODO_COMMON_LOCALES =
|
|
31
|
-
exports.DECODO_COMMON_COUNTRIES =
|
|
32
|
-
exports.DECODO_EUROPEAN_COUNTRIES =
|
|
33
|
-
exports.DECODO_ASIAN_COUNTRIES =
|
|
34
|
-
exports.DECODO_US_STATES =
|
|
35
|
-
exports.DECODO_COMMON_CITIES =
|
|
36
|
-
exports.getRandomDecodoDeviceType =
|
|
37
|
-
exports.getRandomDecodoLocale =
|
|
38
|
-
exports.getRandomDecodoCountry =
|
|
39
|
-
exports.getRandomDecodoCity =
|
|
40
|
-
exports.generateDecodoSessionId =
|
|
1
|
+
const _mod_ooxmqc = require('./crawler.cjs');
|
|
2
|
+
exports.Crawler = _mod_ooxmqc.Crawler;;
|
|
3
|
+
const _mod_vzzk69 = require('./crawler-options.cjs');
|
|
4
|
+
exports.CrawlerOptions = _mod_vzzk69.CrawlerOptions;;
|
|
5
|
+
const _mod_gsic6h = require('./plugin/robots-txt.cjs');
|
|
6
|
+
exports.RobotsTxt = _mod_gsic6h.RobotsTxt;;
|
|
7
|
+
const _mod_8jo54n = require('./plugin/file-cacher.cjs');
|
|
8
|
+
exports.FileCacher = _mod_8jo54n.FileCacher;;
|
|
9
|
+
const _mod_hkwhmy = require('./plugin/url-store.cjs');
|
|
10
|
+
exports.UrlStore = _mod_hkwhmy.UrlStore;;
|
|
11
|
+
const _mod_9bidrg = require('./plugin/navigation-history.cjs');
|
|
12
|
+
exports.NavigationHistory = _mod_9bidrg.NavigationHistory;;
|
|
13
|
+
const _mod_8ldzfr = require('./addon/oxylabs/index.cjs');
|
|
14
|
+
exports.Oxylabs = _mod_8ldzfr.Oxylabs;;
|
|
15
|
+
const _mod_x8yxp9 = require('./addon/oxylabs/options.cjs');
|
|
16
|
+
exports.OXYLABS_BROWSER_TYPES = _mod_x8yxp9.OXYLABS_BROWSER_TYPES;
|
|
17
|
+
exports.OXYLABS_COMMON_LOCALES = _mod_x8yxp9.OXYLABS_COMMON_LOCALES;
|
|
18
|
+
exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_x8yxp9.OXYLABS_COMMON_GEO_LOCATIONS;
|
|
19
|
+
exports.OXYLABS_US_STATES = _mod_x8yxp9.OXYLABS_US_STATES;
|
|
20
|
+
exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_x8yxp9.OXYLABS_EUROPEAN_COUNTRIES;
|
|
21
|
+
exports.OXYLABS_ASIAN_COUNTRIES = _mod_x8yxp9.OXYLABS_ASIAN_COUNTRIES;
|
|
22
|
+
exports.getRandomOxylabsBrowserType = _mod_x8yxp9.getRandomBrowserType;
|
|
23
|
+
exports.getRandomOxylabsLocale = _mod_x8yxp9.getRandomLocale;
|
|
24
|
+
exports.getRandomOxylabsGeoLocation = _mod_x8yxp9.getRandomGeoLocation;;
|
|
25
|
+
const _mod_jeqdt1 = require('./addon/decodo/index.cjs');
|
|
26
|
+
exports.Decodo = _mod_jeqdt1.Decodo;;
|
|
27
|
+
const _mod_p59evs = require('./addon/decodo/options.cjs');
|
|
28
|
+
exports.DECODO_DEVICE_TYPES = _mod_p59evs.DECODO_DEVICE_TYPES;
|
|
29
|
+
exports.DECODO_HEADLESS_MODES = _mod_p59evs.DECODO_HEADLESS_MODES;
|
|
30
|
+
exports.DECODO_COMMON_LOCALES = _mod_p59evs.DECODO_COMMON_LOCALES;
|
|
31
|
+
exports.DECODO_COMMON_COUNTRIES = _mod_p59evs.DECODO_COMMON_COUNTRIES;
|
|
32
|
+
exports.DECODO_EUROPEAN_COUNTRIES = _mod_p59evs.DECODO_EUROPEAN_COUNTRIES;
|
|
33
|
+
exports.DECODO_ASIAN_COUNTRIES = _mod_p59evs.DECODO_ASIAN_COUNTRIES;
|
|
34
|
+
exports.DECODO_US_STATES = _mod_p59evs.DECODO_US_STATES;
|
|
35
|
+
exports.DECODO_COMMON_CITIES = _mod_p59evs.DECODO_COMMON_CITIES;
|
|
36
|
+
exports.getRandomDecodoDeviceType = _mod_p59evs.getRandomDeviceType;
|
|
37
|
+
exports.getRandomDecodoLocale = _mod_p59evs.getRandomLocale;
|
|
38
|
+
exports.getRandomDecodoCountry = _mod_p59evs.getRandomCountry;
|
|
39
|
+
exports.getRandomDecodoCity = _mod_p59evs.getRandomCity;
|
|
40
|
+
exports.generateDecodoSessionId = _mod_p59evs.generateSessionId;;
|
package/dist/crawler.d.ts
CHANGED
|
@@ -6871,6 +6871,18 @@ export declare class CrawlerOptions {
|
|
|
6871
6871
|
options: DecodoOptions;
|
|
6872
6872
|
queueOptions: queueOptions$1;
|
|
6873
6873
|
}): CrawlerOptions;
|
|
6874
|
+
/**
|
|
6875
|
+
* Destroy all limiter queues to release resources and stop intervals
|
|
6876
|
+
* @description Properly destroys all RezoQueue instances created by addLimiter().
|
|
6877
|
+
* This stops any setInterval timers that would otherwise keep the process alive.
|
|
6878
|
+
* Called automatically by Crawler.destroy().
|
|
6879
|
+
* @example
|
|
6880
|
+
* ```typescript
|
|
6881
|
+
* // Clean up all limiters
|
|
6882
|
+
* options.destroyLimiters();
|
|
6883
|
+
* ```
|
|
6884
|
+
*/
|
|
6885
|
+
destroyLimiters(): void;
|
|
6874
6886
|
/**
|
|
6875
6887
|
* Clear all global configurations from headers, proxies, limiters, Decodo, and Oxylabs
|
|
6876
6888
|
* @returns The CrawlerOptions instance for method chaining
|
|
@@ -7072,7 +7084,7 @@ export declare class Crawler {
|
|
|
7072
7084
|
* Uses SQLite as the underlying storage mechanism.
|
|
7073
7085
|
*/
|
|
7074
7086
|
cacher: FileCacher;
|
|
7075
|
-
private
|
|
7087
|
+
private queue;
|
|
7076
7088
|
private readonly isCacheEnabled;
|
|
7077
7089
|
readonly config: CrawlerOptions;
|
|
7078
7090
|
private urlStorage;
|
|
@@ -7090,6 +7102,10 @@ export declare class Crawler {
|
|
|
7090
7102
|
private adapterType;
|
|
7091
7103
|
/** Track pending execute() calls for proper done() behavior */
|
|
7092
7104
|
private pendingExecutions;
|
|
7105
|
+
/** Track if the crawler has been destroyed */
|
|
7106
|
+
private isDestroyed;
|
|
7107
|
+
/** Original queue options for restoration */
|
|
7108
|
+
private readonly queueOptions;
|
|
7093
7109
|
/** robots.txt parser and validator */
|
|
7094
7110
|
private robotsTxt;
|
|
7095
7111
|
/** AutoThrottle: track response times per domain for adaptive rate limiting */
|
|
@@ -7142,6 +7158,12 @@ export declare class Crawler {
|
|
|
7142
7158
|
* Initialize the HTTP adapter based on configuration
|
|
7143
7159
|
*/
|
|
7144
7160
|
private initializeAdapter;
|
|
7161
|
+
/**
|
|
7162
|
+
* Ensures the crawler is active and ready for use.
|
|
7163
|
+
* If the crawler was previously destroyed, this method will restore it.
|
|
7164
|
+
* Called internally before any public operation.
|
|
7165
|
+
*/
|
|
7166
|
+
private ensureActive;
|
|
7145
7167
|
/**
|
|
7146
7168
|
* Initialize navigation history and session
|
|
7147
7169
|
*/
|
package/dist/entries/crawler.cjs
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Crawler =
|
|
3
|
-
const
|
|
4
|
-
exports.CrawlerOptions =
|
|
5
|
-
exports.Domain = _mod_qsdeb2.Domain;;
|
|
1
|
+
const _mod_7u46bc = require('../crawler/crawler.cjs');
|
|
2
|
+
exports.Crawler = _mod_7u46bc.Crawler;;
|
|
3
|
+
const _mod_nvz29w = require('../crawler/crawler-options.cjs');
|
|
4
|
+
exports.CrawlerOptions = _mod_nvz29w.CrawlerOptions;;
|
package/dist/entries/crawler.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
export { Crawler } from '../crawler/crawler.js';
|
|
2
|
-
export { CrawlerOptions
|
|
2
|
+
export { CrawlerOptions } from '../crawler/crawler-options.js';
|
package/dist/index.cjs
CHANGED
|
@@ -1,30 +1,30 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Rezo =
|
|
3
|
-
exports.createRezoInstance =
|
|
4
|
-
exports.createDefaultInstance =
|
|
5
|
-
const
|
|
6
|
-
exports.RezoError =
|
|
7
|
-
exports.RezoErrorCode =
|
|
8
|
-
const
|
|
9
|
-
exports.RezoHeaders =
|
|
10
|
-
const
|
|
11
|
-
exports.RezoFormData =
|
|
12
|
-
const
|
|
13
|
-
exports.RezoCookieJar =
|
|
14
|
-
exports.Cookie =
|
|
15
|
-
const
|
|
16
|
-
exports.toCurl =
|
|
17
|
-
exports.fromCurl =
|
|
18
|
-
const
|
|
19
|
-
exports.createDefaultHooks =
|
|
20
|
-
exports.mergeHooks =
|
|
21
|
-
const
|
|
22
|
-
exports.ProxyManager =
|
|
23
|
-
const
|
|
24
|
-
exports.RezoQueue =
|
|
25
|
-
exports.HttpQueue =
|
|
26
|
-
exports.Priority =
|
|
27
|
-
exports.HttpMethodPriority =
|
|
1
|
+
const _mod_s9lieh = require('./core/rezo.cjs');
|
|
2
|
+
exports.Rezo = _mod_s9lieh.Rezo;
|
|
3
|
+
exports.createRezoInstance = _mod_s9lieh.createRezoInstance;
|
|
4
|
+
exports.createDefaultInstance = _mod_s9lieh.createDefaultInstance;;
|
|
5
|
+
const _mod_5rlfq9 = require('./errors/rezo-error.cjs');
|
|
6
|
+
exports.RezoError = _mod_5rlfq9.RezoError;
|
|
7
|
+
exports.RezoErrorCode = _mod_5rlfq9.RezoErrorCode;;
|
|
8
|
+
const _mod_6xv93u = require('./utils/headers.cjs');
|
|
9
|
+
exports.RezoHeaders = _mod_6xv93u.RezoHeaders;;
|
|
10
|
+
const _mod_fqgdip = require('./utils/form-data.cjs');
|
|
11
|
+
exports.RezoFormData = _mod_fqgdip.RezoFormData;;
|
|
12
|
+
const _mod_56ytl2 = require('./utils/cookies.cjs');
|
|
13
|
+
exports.RezoCookieJar = _mod_56ytl2.RezoCookieJar;
|
|
14
|
+
exports.Cookie = _mod_56ytl2.Cookie;;
|
|
15
|
+
const _mod_99elmq = require('./utils/curl.cjs');
|
|
16
|
+
exports.toCurl = _mod_99elmq.toCurl;
|
|
17
|
+
exports.fromCurl = _mod_99elmq.fromCurl;;
|
|
18
|
+
const _mod_tvrmqp = require('./core/hooks.cjs');
|
|
19
|
+
exports.createDefaultHooks = _mod_tvrmqp.createDefaultHooks;
|
|
20
|
+
exports.mergeHooks = _mod_tvrmqp.mergeHooks;;
|
|
21
|
+
const _mod_04et4j = require('./proxy/manager.cjs');
|
|
22
|
+
exports.ProxyManager = _mod_04et4j.ProxyManager;;
|
|
23
|
+
const _mod_vxzvia = require('./queue/index.cjs');
|
|
24
|
+
exports.RezoQueue = _mod_vxzvia.RezoQueue;
|
|
25
|
+
exports.HttpQueue = _mod_vxzvia.HttpQueue;
|
|
26
|
+
exports.Priority = _mod_vxzvia.Priority;
|
|
27
|
+
exports.HttpMethodPriority = _mod_vxzvia.HttpMethodPriority;;
|
|
28
28
|
const { RezoError } = require('./errors/rezo-error.cjs');
|
|
29
29
|
const isRezoError = exports.isRezoError = RezoError.isRezoError;
|
|
30
30
|
const Cancel = exports.Cancel = RezoError;
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Agent =
|
|
3
|
-
const
|
|
4
|
-
exports.HttpProxyAgent =
|
|
5
|
-
const
|
|
6
|
-
exports.HttpsProxyAgent =
|
|
7
|
-
const
|
|
8
|
-
exports.SocksProxyAgent =
|
|
9
|
-
const
|
|
10
|
-
exports.SocksClient =
|
|
1
|
+
const _mod_e7qx8k = require('./base.cjs');
|
|
2
|
+
exports.Agent = _mod_e7qx8k.Agent;;
|
|
3
|
+
const _mod_q2hbye = require('./http-proxy.cjs');
|
|
4
|
+
exports.HttpProxyAgent = _mod_q2hbye.HttpProxyAgent;;
|
|
5
|
+
const _mod_bl101b = require('./https-proxy.cjs');
|
|
6
|
+
exports.HttpsProxyAgent = _mod_bl101b.HttpsProxyAgent;;
|
|
7
|
+
const _mod_ad41no = require('./socks-proxy.cjs');
|
|
8
|
+
exports.SocksProxyAgent = _mod_ad41no.SocksProxyAgent;;
|
|
9
|
+
const _mod_5q8p36 = require('./socks-client.cjs');
|
|
10
|
+
exports.SocksClient = _mod_5q8p36.SocksClient;;
|
package/dist/proxy/index.cjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
const { Agent, HttpProxyAgent, HttpsProxyAgent, SocksProxyAgent } = require('../internal/agents/index.cjs');
|
|
2
2
|
const { parseProxyString } = require('./parse.cjs');
|
|
3
|
-
const
|
|
4
|
-
exports.ProxyManager =
|
|
5
|
-
const
|
|
6
|
-
exports.parseProxyString =
|
|
3
|
+
const _mod_mucfqi = require('./manager.cjs');
|
|
4
|
+
exports.ProxyManager = _mod_mucfqi.ProxyManager;;
|
|
5
|
+
const _mod_gp9w9o = require('./parse.cjs');
|
|
6
|
+
exports.parseProxyString = _mod_gp9w9o.parseProxyString;;
|
|
7
7
|
function createOptions(uri, opts) {
|
|
8
8
|
if (uri instanceof URL || typeof uri === "string") {
|
|
9
9
|
return {
|
package/dist/queue/index.cjs
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.RezoQueue =
|
|
3
|
-
const
|
|
4
|
-
exports.HttpQueue =
|
|
5
|
-
exports.extractDomain =
|
|
6
|
-
const
|
|
7
|
-
exports.Priority =
|
|
8
|
-
exports.HttpMethodPriority =
|
|
1
|
+
const _mod_w5fj4r = require('./queue.cjs');
|
|
2
|
+
exports.RezoQueue = _mod_w5fj4r.RezoQueue;;
|
|
3
|
+
const _mod_td8atq = require('./http-queue.cjs');
|
|
4
|
+
exports.HttpQueue = _mod_td8atq.HttpQueue;
|
|
5
|
+
exports.extractDomain = _mod_td8atq.extractDomain;;
|
|
6
|
+
const _mod_togvwr = require('./types.cjs');
|
|
7
|
+
exports.Priority = _mod_togvwr.Priority;
|
|
8
|
+
exports.HttpMethodPriority = _mod_togvwr.HttpMethodPriority;;
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.UniversalEventEmitter =
|
|
3
|
-
const
|
|
4
|
-
exports.UniversalStreamResponse =
|
|
5
|
-
exports.StreamResponse =
|
|
6
|
-
const
|
|
7
|
-
exports.UniversalDownloadResponse =
|
|
8
|
-
exports.DownloadResponse =
|
|
9
|
-
const
|
|
10
|
-
exports.UniversalUploadResponse =
|
|
11
|
-
exports.UploadResponse =
|
|
1
|
+
const _mod_wh90je = require('./event-emitter.cjs');
|
|
2
|
+
exports.UniversalEventEmitter = _mod_wh90je.UniversalEventEmitter;;
|
|
3
|
+
const _mod_8xyb5w = require('./stream.cjs');
|
|
4
|
+
exports.UniversalStreamResponse = _mod_8xyb5w.UniversalStreamResponse;
|
|
5
|
+
exports.StreamResponse = _mod_8xyb5w.StreamResponse;;
|
|
6
|
+
const _mod_a20qdf = require('./download.cjs');
|
|
7
|
+
exports.UniversalDownloadResponse = _mod_a20qdf.UniversalDownloadResponse;
|
|
8
|
+
exports.DownloadResponse = _mod_a20qdf.DownloadResponse;;
|
|
9
|
+
const _mod_orr5i8 = require('./upload.cjs');
|
|
10
|
+
exports.UniversalUploadResponse = _mod_orr5i8.UniversalUploadResponse;
|
|
11
|
+
exports.UploadResponse = _mod_orr5i8.UploadResponse;;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rezo",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.57",
|
|
4
4
|
"description": "Lightning-fast, enterprise-grade HTTP client for modern JavaScript. Full HTTP/2 support, intelligent cookie management, multiple adapters (HTTP, Fetch, cURL, XHR), streaming, proxy support (HTTP/HTTPS/SOCKS), and cross-environment compatibility.",
|
|
5
5
|
"main": "dist/index.cjs",
|
|
6
6
|
"module": "dist/index.js",
|