rezo 1.0.56 → 1.0.58
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/index.cjs +6 -6
- package/dist/cache/index.cjs +9 -9
- package/dist/crawler/crawler-options.cjs +1 -1
- package/dist/crawler/crawler-options.js +1 -1
- package/dist/crawler/crawler.cjs +23 -11
- package/dist/crawler/crawler.js +23 -11
- package/dist/crawler/index.cjs +40 -40
- package/dist/crawler.d.ts +23 -1
- package/dist/entries/crawler.cjs +4 -4
- package/dist/index.cjs +27 -27
- package/dist/internal/agents/index.cjs +10 -10
- package/dist/proxy/index.cjs +4 -4
- package/dist/queue/index.cjs +8 -8
- package/dist/responses/universal/index.cjs +11 -11
- package/package.json +1 -1
package/dist/adapters/index.cjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.detectRuntime =
|
|
3
|
-
exports.getAdapterCapabilities =
|
|
4
|
-
exports.buildAdapterContext =
|
|
5
|
-
exports.getAvailableAdapters =
|
|
6
|
-
exports.selectAdapter =
|
|
1
|
+
const _mod_ronpz5 = require('./picker.cjs');
|
|
2
|
+
exports.detectRuntime = _mod_ronpz5.detectRuntime;
|
|
3
|
+
exports.getAdapterCapabilities = _mod_ronpz5.getAdapterCapabilities;
|
|
4
|
+
exports.buildAdapterContext = _mod_ronpz5.buildAdapterContext;
|
|
5
|
+
exports.getAvailableAdapters = _mod_ronpz5.getAvailableAdapters;
|
|
6
|
+
exports.selectAdapter = _mod_ronpz5.selectAdapter;;
|
package/dist/cache/index.cjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.LRUCache =
|
|
3
|
-
const
|
|
4
|
-
exports.DNSCache =
|
|
5
|
-
exports.getGlobalDNSCache =
|
|
6
|
-
exports.resetGlobalDNSCache =
|
|
7
|
-
const
|
|
8
|
-
exports.ResponseCache =
|
|
9
|
-
exports.normalizeResponseCacheConfig =
|
|
1
|
+
const _mod_ea4mgu = require('./lru-cache.cjs');
|
|
2
|
+
exports.LRUCache = _mod_ea4mgu.LRUCache;;
|
|
3
|
+
const _mod_9o2noq = require('./dns-cache.cjs');
|
|
4
|
+
exports.DNSCache = _mod_9o2noq.DNSCache;
|
|
5
|
+
exports.getGlobalDNSCache = _mod_9o2noq.getGlobalDNSCache;
|
|
6
|
+
exports.resetGlobalDNSCache = _mod_9o2noq.resetGlobalDNSCache;;
|
|
7
|
+
const _mod_jow0hp = require('./response-cache.cjs');
|
|
8
|
+
exports.ResponseCache = _mod_jow0hp.ResponseCache;
|
|
9
|
+
exports.normalizeResponseCacheConfig = _mod_jow0hp.normalizeResponseCacheConfig;;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
var{RezoQueue:m}=require("../queue/queue.cjs"),{Oxylabs:x}=require("./addon/oxylabs/index.cjs"),b=require("node:path"),g=require("node:os"),{Decodo:y}=require("./addon/decodo/index.cjs");class f{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??b.join(g.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,o,r)=>r.indexOf(i)===o)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((i,o)=>i===t[o]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((i)=>i.isGlobal).length,domainSpecific:t.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:i,isGlobal:o,headers:r}=t;if(!i&&!o)continue;if(r instanceof Headers){let s=Object.fromEntries(r.entries());if(Object.keys(s).length<1)continue;r=s}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:i,isGlobal:o,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:i,isGlobal:o,proxy:r}=t;if(!i&&!o)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:i,isGlobal:o,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:i,isGlobal:o,options:r}=t;if(!i&&!o)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:i,isGlobal:o,pqueue:new m(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:o,options:r,queueOptions:s}=t;if(!i&&!o)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:i,isGlobal:o,adaptar:new x(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:o,options:r,queueOptions:s}=t;if(!i&&!o)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:i,isGlobal:o,adaptar:new y(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,i,o){if(!this.getDomainName(e))return null;let s=[],a=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let n=0;n<a.length;n++)if(this._hasDomain(e,a[n].domain))s.push(n);if(s.length){let n=o?s[this.rnd(0,s.length-1)]:s[0];return t==="headers"?this.requestHeaders[n].headers:t==="limiters"?this.limiters[n].pqueue:t==="oxylabs"?this.oxylabs[n].adaptar:t==="decodo"?this.decodo[n].adaptar:this.proxies[n].proxy}s.length=0;for(let n=0;n<a.length;n++)s.push(n);if(s.length){let n=o?s[this.rnd(0,s.length-1)]:s[0];if(a[n].isGlobal&&i)return t==="headers"?this.requestHeaders[n].headers:t==="limiters"?this.limiters[n].pqueue:t==="oxylabs"?this.oxylabs[n].adaptar:t==="decodo"?this.decodo[n].adaptar:this.proxies[n].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,i){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let s=0;s<r.length;s++)if(this._hasDomain(e,r[s].domain))return!0;if(i){for(let s=0;s<r.length;s++)if(r[s].isGlobal)return!0}return!1}pickHeaders(e,t,i,o){let r=this.getAdapter(e,"headers",t),s=new Headers(r??{});if(i&&i instanceof Headers)for(let[a,n]of Object.entries(i.entries()))s.set(a,n);else if(i&&typeof i==="object"){for(let[a,n]of Object.entries(i))if(typeof n==="string")s.set(a,n)}if(o)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,t){if(!t)return!1;let i=this.getDomainName(e);if(!i)return!1;let o=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},r=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let a=s.toString().trim();if(i.toLowerCase()===a.toLowerCase())return!0;if(a.includes("*")){let l=a.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(i)||h.test(e)}if(o(a))try{let l=a,h="i",u=a.match(/^\/(.*)\/(\w*)$/);if(u)l=u[1],h=u[2]||"i";let c=new RegExp(l,h);return c.test(i)||c.test(e)}catch(l){return i.toLowerCase().includes(a.toLowerCase())}let n=i.toLowerCase(),d=a.toLowerCase();return n===d||n.endsWith("."+d)||d.endsWith("."+n)};if(Array.isArray(t)){for(let s of t)if(r(s))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let o=0;o<200;o++){let r=e[Math.floor(Math.random()*e.length)],s=t[Math.floor(Math.random()*t.length)],a="";switch(r.name){case"Chrome":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":a=`Mozilla/5.0 (${s}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}i.push(a)}return i}exports.CrawlerOptions=f;
|
|
1
|
+
var{RezoQueue:m}=require("../queue/queue.cjs"),{Oxylabs:x}=require("./addon/oxylabs/index.cjs"),b=require("node:path"),g=require("node:os"),{Decodo:y}=require("./addon/decodo/index.cjs");class f{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??b.join(g.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,n,r)=>r.indexOf(i)===n)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((i,n)=>i===t[n]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((i)=>i.isGlobal).length,domainSpecific:t.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:i,isGlobal:n,headers:r}=t;if(!i&&!n)continue;if(r instanceof Headers){let s=Object.fromEntries(r.entries());if(Object.keys(s).length<1)continue;r=s}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:i,isGlobal:n,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:i,isGlobal:n,proxy:r}=t;if(!i&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:i,isGlobal:n,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:i,isGlobal:n,options:r}=t;if(!i&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:i,isGlobal:n,pqueue:new m(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:n,options:r,queueOptions:s}=t;if(!i&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:i,isGlobal:n,adaptar:new x(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:n,options:r,queueOptions:s}=t;if(!i&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:i,isGlobal:n,adaptar:new y(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}destroyLimiters(){for(let e of this.limiters)if(e.pqueue&&typeof e.pqueue.destroy==="function")e.pqueue.destroy();this.limiters=[]}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,i,n){if(!this.getDomainName(e))return null;let s=[],a=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let o=0;o<a.length;o++)if(this._hasDomain(e,a[o].domain))s.push(o);if(s.length){let o=n?s[this.rnd(0,s.length-1)]:s[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}s.length=0;for(let o=0;o<a.length;o++)s.push(o);if(s.length){let o=n?s[this.rnd(0,s.length-1)]:s[0];if(a[o].isGlobal&&i)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,i){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let s=0;s<r.length;s++)if(this._hasDomain(e,r[s].domain))return!0;if(i){for(let s=0;s<r.length;s++)if(r[s].isGlobal)return!0}return!1}pickHeaders(e,t,i,n){let r=this.getAdapter(e,"headers",t),s=new Headers(r??{});if(i&&i instanceof Headers)for(let[a,o]of Object.entries(i.entries()))s.set(a,o);else if(i&&typeof i==="object"){for(let[a,o]of Object.entries(i))if(typeof o==="string")s.set(a,o)}if(n)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,t){if(!t)return!1;let i=this.getDomainName(e);if(!i)return!1;let n=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},r=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let a=s.toString().trim();if(i.toLowerCase()===a.toLowerCase())return!0;if(a.includes("*")){let l=a.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(i)||h.test(e)}if(n(a))try{let l=a,h="i",u=a.match(/^\/(.*)\/(\w*)$/);if(u)l=u[1],h=u[2]||"i";let c=new RegExp(l,h);return c.test(i)||c.test(e)}catch(l){return i.toLowerCase().includes(a.toLowerCase())}let o=i.toLowerCase(),d=a.toLowerCase();return o===d||o.endsWith("."+d)||d.endsWith("."+o)};if(Array.isArray(t)){for(let s of t)if(r(s))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let n=0;n<200;n++){let r=e[Math.floor(Math.random()*e.length)],s=t[Math.floor(Math.random()*t.length)],a="";switch(r.name){case"Chrome":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":a=`Mozilla/5.0 (${s}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}i.push(a)}return i}exports.CrawlerOptions=f;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{RezoQueue as m}from"../queue/queue.js";import{Oxylabs as f}from"./addon/oxylabs/index.js";import x from"node:path";import b from"node:os";import{Decodo as g}from"./addon/decodo/index.js";class y{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??x.join(b.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((s)=>s.domain).map((s)=>s.domain).filter((s,n,r)=>r.indexOf(s)===n)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((s,n)=>s===t[n]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((s)=>s.isGlobal).length,domainSpecific:t.filter((s)=>!s.isGlobal&&s.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:s,isGlobal:n,headers:r}=t;if(!s&&!n)continue;if(r instanceof Headers){let i=Object.fromEntries(r.entries());if(Object.keys(i).length<1)continue;r=i}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:s,isGlobal:n,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:s,isGlobal:n,proxy:r}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:s,isGlobal:n,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:s,isGlobal:n,options:r}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:s,isGlobal:n,pqueue:new m(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:n,options:r,queueOptions:i}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:s,isGlobal:n,adaptar:new f(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:n,options:r,queueOptions:i}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:s,isGlobal:n,adaptar:new g(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,s,n){if(!this.getDomainName(e))return null;let i=[],a=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let o=0;o<a.length;o++)if(this._hasDomain(e,a[o].domain))i.push(o);if(i.length){let o=n?i[this.rnd(0,i.length-1)]:i[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}i.length=0;for(let o=0;o<a.length;o++)i.push(o);if(i.length){let o=n?i[this.rnd(0,i.length-1)]:i[0];if(a[o].isGlobal&&s)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,s){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let i=0;i<r.length;i++)if(this._hasDomain(e,r[i].domain))return!0;if(s){for(let i=0;i<r.length;i++)if(r[i].isGlobal)return!0}return!1}pickHeaders(e,t,s,n){let r=this.getAdapter(e,"headers",t),i=new Headers(r??{});if(s&&s instanceof Headers)for(let[a,o]of Object.entries(s.entries()))i.set(a,o);else if(s&&typeof s==="object"){for(let[a,o]of Object.entries(s))if(typeof o==="string")i.set(a,o)}if(n)i.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(i.entries())}_hasDomain(e,t){if(!t)return!1;let s=this.getDomainName(e);if(!s)return!1;let n=(i)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(i)||i.startsWith("/")||i.includes(".*")||i.includes(".+")},r=(i)=>{if(i instanceof RegExp)return i.test(s)||i.test(e);let a=i.toString().trim();if(s.toLowerCase()===a.toLowerCase())return!0;if(a.includes("*")){let l=a.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(s)||h.test(e)}if(n(a))try{let l=a,h="i",u=a.match(/^\/(.*)\/(\w*)$/);if(u)l=u[1],h=u[2]||"i";let c=new RegExp(l,h);return c.test(s)||c.test(e)}catch(l){return s.toLowerCase().includes(a.toLowerCase())}let o=s.toLowerCase(),d=a.toLowerCase();return o===d||o.endsWith("."+d)||d.endsWith("."+o)};if(Array.isArray(t)){for(let i of t)if(r(i))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],s=[];for(let n=0;n<200;n++){let r=e[Math.floor(Math.random()*e.length)],i=t[Math.floor(Math.random()*t.length)],a="";switch(r.name){case"Chrome":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":a=`Mozilla/5.0 (${i}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}s.push(a)}return s}export{y as CrawlerOptions};
|
|
1
|
+
import{RezoQueue as m}from"../queue/queue.js";import{Oxylabs as f}from"./addon/oxylabs/index.js";import x from"node:path";import b from"node:os";import{Decodo as g}from"./addon/decodo/index.js";class y{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??x.join(b.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((s)=>s.domain).map((s)=>s.domain).filter((s,n,r)=>r.indexOf(s)===n)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((s,n)=>s===t[n]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((s)=>s.isGlobal).length,domainSpecific:t.filter((s)=>!s.isGlobal&&s.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:s,isGlobal:n,headers:r}=t;if(!s&&!n)continue;if(r instanceof Headers){let i=Object.fromEntries(r.entries());if(Object.keys(i).length<1)continue;r=i}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:s,isGlobal:n,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:s,isGlobal:n,proxy:r}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:s,isGlobal:n,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:s,isGlobal:n,options:r}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:s,isGlobal:n,pqueue:new m(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:n,options:r,queueOptions:i}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:s,isGlobal:n,adaptar:new f(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:n,options:r,queueOptions:i}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:s,isGlobal:n,adaptar:new g(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}destroyLimiters(){for(let e of this.limiters)if(e.pqueue&&typeof e.pqueue.destroy==="function")e.pqueue.destroy();this.limiters=[]}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,s,n){if(!this.getDomainName(e))return null;let i=[],a=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let o=0;o<a.length;o++)if(this._hasDomain(e,a[o].domain))i.push(o);if(i.length){let o=n?i[this.rnd(0,i.length-1)]:i[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}i.length=0;for(let o=0;o<a.length;o++)i.push(o);if(i.length){let o=n?i[this.rnd(0,i.length-1)]:i[0];if(a[o].isGlobal&&s)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,s){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let i=0;i<r.length;i++)if(this._hasDomain(e,r[i].domain))return!0;if(s){for(let i=0;i<r.length;i++)if(r[i].isGlobal)return!0}return!1}pickHeaders(e,t,s,n){let r=this.getAdapter(e,"headers",t),i=new Headers(r??{});if(s&&s instanceof Headers)for(let[a,o]of Object.entries(s.entries()))i.set(a,o);else if(s&&typeof s==="object"){for(let[a,o]of Object.entries(s))if(typeof o==="string")i.set(a,o)}if(n)i.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(i.entries())}_hasDomain(e,t){if(!t)return!1;let s=this.getDomainName(e);if(!s)return!1;let n=(i)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(i)||i.startsWith("/")||i.includes(".*")||i.includes(".+")},r=(i)=>{if(i instanceof RegExp)return i.test(s)||i.test(e);let a=i.toString().trim();if(s.toLowerCase()===a.toLowerCase())return!0;if(a.includes("*")){let l=a.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(s)||h.test(e)}if(n(a))try{let l=a,h="i",u=a.match(/^\/(.*)\/(\w*)$/);if(u)l=u[1],h=u[2]||"i";let c=new RegExp(l,h);return c.test(s)||c.test(e)}catch(l){return s.toLowerCase().includes(a.toLowerCase())}let o=s.toLowerCase(),d=a.toLowerCase();return o===d||o.endsWith("."+d)||d.endsWith("."+o)};if(Array.isArray(t)){for(let i of t)if(r(i))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],s=[];for(let n=0;n<200;n++){let r=e[Math.floor(Math.random()*e.length)],i=t[Math.floor(Math.random()*t.length)],a="";switch(r.name){case"Chrome":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":a=`Mozilla/5.0 (${i}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}s.push(a)}return s}export{y as CrawlerOptions};
|
package/dist/crawler/crawler.cjs
CHANGED
|
@@ -57,6 +57,8 @@ class Crawler {
|
|
|
57
57
|
adapterExecutor = null;
|
|
58
58
|
adapterType;
|
|
59
59
|
pendingExecutions = new Set;
|
|
60
|
+
isDestroyed = false;
|
|
61
|
+
queueOptions = { concurrency: 1000 };
|
|
60
62
|
robotsTxt;
|
|
61
63
|
domainResponseTimes = new Map;
|
|
62
64
|
domainCurrentDelay = new Map;
|
|
@@ -159,6 +161,16 @@ class Crawler {
|
|
|
159
161
|
}
|
|
160
162
|
}
|
|
161
163
|
}
|
|
164
|
+
ensureActive() {
|
|
165
|
+
if (!this.isDestroyed)
|
|
166
|
+
return;
|
|
167
|
+
this.queue = new RezoQueue(this.queueOptions);
|
|
168
|
+
this.pendingExecutions.clear();
|
|
169
|
+
this.isDestroyed = false;
|
|
170
|
+
if (this.config.debug) {
|
|
171
|
+
console.log("[Crawler] Restored from destroyed state");
|
|
172
|
+
}
|
|
173
|
+
}
|
|
162
174
|
async initializeNavigationHistory(navHistoryDir) {
|
|
163
175
|
try {
|
|
164
176
|
const history = await NavigationHistory.create({
|
|
@@ -233,6 +245,7 @@ class Crawler {
|
|
|
233
245
|
return this.config.sessionId;
|
|
234
246
|
}
|
|
235
247
|
async resume(sessionId) {
|
|
248
|
+
this.ensureActive();
|
|
236
249
|
if (!this.config.enableNavigationHistory) {
|
|
237
250
|
throw new Error("Navigation history is not enabled. Set enableNavigationHistory: true in options.");
|
|
238
251
|
}
|
|
@@ -550,10 +563,10 @@ class Crawler {
|
|
|
550
563
|
this.queue.add(() => handler(error));
|
|
551
564
|
}
|
|
552
565
|
async _onEmailDiscovered(handler, email) {
|
|
553
|
-
|
|
566
|
+
this.queue.add(() => handler(email));
|
|
554
567
|
}
|
|
555
568
|
async _onEmailLeads(handler, emails) {
|
|
556
|
-
|
|
569
|
+
this.queue.add(() => handler(emails));
|
|
557
570
|
}
|
|
558
571
|
_onRawResponse(handler, rawResponse) {
|
|
559
572
|
this.queue.add(() => handler(rawResponse));
|
|
@@ -757,7 +770,7 @@ class Crawler {
|
|
|
757
770
|
this.crawlStats.startTime = Date.now();
|
|
758
771
|
for (const handler of this.startHandlers) {
|
|
759
772
|
try {
|
|
760
|
-
|
|
773
|
+
this.queue.add(() => handler());
|
|
761
774
|
} catch (error) {
|
|
762
775
|
if (this.config.debug) {
|
|
763
776
|
console.error("[Crawler] onStart handler error:", error);
|
|
@@ -769,7 +782,7 @@ class Crawler {
|
|
|
769
782
|
this.crawlStats.endTime = Date.now();
|
|
770
783
|
for (const handler of this.finishHandlers) {
|
|
771
784
|
try {
|
|
772
|
-
|
|
785
|
+
this.queue.add(() => handler(this.crawlStats));
|
|
773
786
|
} catch (error) {
|
|
774
787
|
if (this.config.debug) {
|
|
775
788
|
console.error("[Crawler] onFinish handler error:", error);
|
|
@@ -780,7 +793,7 @@ class Crawler {
|
|
|
780
793
|
async triggerRedirectHandlers(event) {
|
|
781
794
|
for (const handler of this.redirectHandlers) {
|
|
782
795
|
try {
|
|
783
|
-
|
|
796
|
+
this.queue.add(() => handler(event));
|
|
784
797
|
} catch (error) {
|
|
785
798
|
if (this.config.debug) {
|
|
786
799
|
console.error("[Crawler] onRedirect handler error:", error);
|
|
@@ -799,6 +812,7 @@ class Crawler {
|
|
|
799
812
|
return url;
|
|
800
813
|
}
|
|
801
814
|
visit(url, options) {
|
|
815
|
+
this.ensureActive();
|
|
802
816
|
if (this.config.baseUrl)
|
|
803
817
|
url = new URL(url, this.config.baseUrl).href;
|
|
804
818
|
if (options?.params && (options.useOxylabsScraperAi || this.config.hasDomain(url, "oxylabs"))) {
|
|
@@ -869,15 +883,12 @@ class Crawler {
|
|
|
869
883
|
}
|
|
870
884
|
async execute(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions) {
|
|
871
885
|
await this.waitForStorage();
|
|
872
|
-
console.log("Waiting for storage...");
|
|
873
886
|
if (this.isCacheEnabled) {
|
|
874
887
|
await this.waitForCache();
|
|
875
888
|
}
|
|
876
|
-
console.log("Waiting for cache...");
|
|
877
889
|
if (this.config.enableNavigationHistory) {
|
|
878
890
|
await this.waitForNavigationHistory();
|
|
879
891
|
}
|
|
880
|
-
console.log("Waiting for navigation history...");
|
|
881
892
|
const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions));
|
|
882
893
|
task.finally(() => this.pendingExecutions.delete(task));
|
|
883
894
|
}
|
|
@@ -904,9 +915,7 @@ class Crawler {
|
|
|
904
915
|
}
|
|
905
916
|
async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl) {
|
|
906
917
|
try {
|
|
907
|
-
console.log("Triggering start handlers...");
|
|
908
918
|
await this.triggerStartHandlers();
|
|
909
|
-
console.log("Checking crawl limits...");
|
|
910
919
|
const limitCheck = await this.checkCrawlLimits(url, parentUrl);
|
|
911
920
|
if (!limitCheck.allowed) {
|
|
912
921
|
if (this.config.debug) {
|
|
@@ -930,7 +939,6 @@ class Crawler {
|
|
|
930
939
|
}
|
|
931
940
|
const requestStartTime = Date.now();
|
|
932
941
|
const response = cache && method === "GET" ? cache : oxylabsInstanse && oxylabsOptions ? await oxylabsInstanse.scrape(url) : decodoInstanse && decodoOptions ? await decodoInstanse.scrape(url) : await (method === "GET" ? this.http.get(url, options) : method === "PATCH" ? this.http.patch(url, body, options) : method === "POST" ? this.http.post(url, body, options) : this.http.put(url, body, options));
|
|
933
|
-
console.log("Response received...");
|
|
934
942
|
if (!cache) {
|
|
935
943
|
const responseTime = Date.now() - requestStartTime;
|
|
936
944
|
this.calculateAutoThrottleDelay(domain, responseTime);
|
|
@@ -1077,7 +1085,11 @@ class Crawler {
|
|
|
1077
1085
|
} catch {}
|
|
1078
1086
|
}
|
|
1079
1087
|
async destroy() {
|
|
1088
|
+
if (this.isDestroyed)
|
|
1089
|
+
return;
|
|
1090
|
+
this.isDestroyed = true;
|
|
1080
1091
|
this.queue.destroy();
|
|
1092
|
+
this.config.destroyLimiters();
|
|
1081
1093
|
this.events.length = 0;
|
|
1082
1094
|
this.jsonEvents.length = 0;
|
|
1083
1095
|
this.errorEvents.length = 0;
|
package/dist/crawler/crawler.js
CHANGED
|
@@ -57,6 +57,8 @@ export class Crawler {
|
|
|
57
57
|
adapterExecutor = null;
|
|
58
58
|
adapterType;
|
|
59
59
|
pendingExecutions = new Set;
|
|
60
|
+
isDestroyed = false;
|
|
61
|
+
queueOptions = { concurrency: 1000 };
|
|
60
62
|
robotsTxt;
|
|
61
63
|
domainResponseTimes = new Map;
|
|
62
64
|
domainCurrentDelay = new Map;
|
|
@@ -159,6 +161,16 @@ export class Crawler {
|
|
|
159
161
|
}
|
|
160
162
|
}
|
|
161
163
|
}
|
|
164
|
+
ensureActive() {
|
|
165
|
+
if (!this.isDestroyed)
|
|
166
|
+
return;
|
|
167
|
+
this.queue = new RezoQueue(this.queueOptions);
|
|
168
|
+
this.pendingExecutions.clear();
|
|
169
|
+
this.isDestroyed = false;
|
|
170
|
+
if (this.config.debug) {
|
|
171
|
+
console.log("[Crawler] Restored from destroyed state");
|
|
172
|
+
}
|
|
173
|
+
}
|
|
162
174
|
async initializeNavigationHistory(navHistoryDir) {
|
|
163
175
|
try {
|
|
164
176
|
const history = await NavigationHistory.create({
|
|
@@ -233,6 +245,7 @@ export class Crawler {
|
|
|
233
245
|
return this.config.sessionId;
|
|
234
246
|
}
|
|
235
247
|
async resume(sessionId) {
|
|
248
|
+
this.ensureActive();
|
|
236
249
|
if (!this.config.enableNavigationHistory) {
|
|
237
250
|
throw new Error("Navigation history is not enabled. Set enableNavigationHistory: true in options.");
|
|
238
251
|
}
|
|
@@ -550,10 +563,10 @@ export class Crawler {
|
|
|
550
563
|
this.queue.add(() => handler(error));
|
|
551
564
|
}
|
|
552
565
|
async _onEmailDiscovered(handler, email) {
|
|
553
|
-
|
|
566
|
+
this.queue.add(() => handler(email));
|
|
554
567
|
}
|
|
555
568
|
async _onEmailLeads(handler, emails) {
|
|
556
|
-
|
|
569
|
+
this.queue.add(() => handler(emails));
|
|
557
570
|
}
|
|
558
571
|
_onRawResponse(handler, rawResponse) {
|
|
559
572
|
this.queue.add(() => handler(rawResponse));
|
|
@@ -757,7 +770,7 @@ export class Crawler {
|
|
|
757
770
|
this.crawlStats.startTime = Date.now();
|
|
758
771
|
for (const handler of this.startHandlers) {
|
|
759
772
|
try {
|
|
760
|
-
|
|
773
|
+
this.queue.add(() => handler());
|
|
761
774
|
} catch (error) {
|
|
762
775
|
if (this.config.debug) {
|
|
763
776
|
console.error("[Crawler] onStart handler error:", error);
|
|
@@ -769,7 +782,7 @@ export class Crawler {
|
|
|
769
782
|
this.crawlStats.endTime = Date.now();
|
|
770
783
|
for (const handler of this.finishHandlers) {
|
|
771
784
|
try {
|
|
772
|
-
|
|
785
|
+
this.queue.add(() => handler(this.crawlStats));
|
|
773
786
|
} catch (error) {
|
|
774
787
|
if (this.config.debug) {
|
|
775
788
|
console.error("[Crawler] onFinish handler error:", error);
|
|
@@ -780,7 +793,7 @@ export class Crawler {
|
|
|
780
793
|
async triggerRedirectHandlers(event) {
|
|
781
794
|
for (const handler of this.redirectHandlers) {
|
|
782
795
|
try {
|
|
783
|
-
|
|
796
|
+
this.queue.add(() => handler(event));
|
|
784
797
|
} catch (error) {
|
|
785
798
|
if (this.config.debug) {
|
|
786
799
|
console.error("[Crawler] onRedirect handler error:", error);
|
|
@@ -799,6 +812,7 @@ export class Crawler {
|
|
|
799
812
|
return url;
|
|
800
813
|
}
|
|
801
814
|
visit(url, options) {
|
|
815
|
+
this.ensureActive();
|
|
802
816
|
if (this.config.baseUrl)
|
|
803
817
|
url = new URL(url, this.config.baseUrl).href;
|
|
804
818
|
if (options?.params && (options.useOxylabsScraperAi || this.config.hasDomain(url, "oxylabs"))) {
|
|
@@ -869,15 +883,12 @@ export class Crawler {
|
|
|
869
883
|
}
|
|
870
884
|
async execute(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions) {
|
|
871
885
|
await this.waitForStorage();
|
|
872
|
-
console.log("Waiting for storage...");
|
|
873
886
|
if (this.isCacheEnabled) {
|
|
874
887
|
await this.waitForCache();
|
|
875
888
|
}
|
|
876
|
-
console.log("Waiting for cache...");
|
|
877
889
|
if (this.config.enableNavigationHistory) {
|
|
878
890
|
await this.waitForNavigationHistory();
|
|
879
891
|
}
|
|
880
|
-
console.log("Waiting for navigation history...");
|
|
881
892
|
const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions));
|
|
882
893
|
task.finally(() => this.pendingExecutions.delete(task));
|
|
883
894
|
}
|
|
@@ -904,9 +915,7 @@ export class Crawler {
|
|
|
904
915
|
}
|
|
905
916
|
async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl) {
|
|
906
917
|
try {
|
|
907
|
-
console.log("Triggering start handlers...");
|
|
908
918
|
await this.triggerStartHandlers();
|
|
909
|
-
console.log("Checking crawl limits...");
|
|
910
919
|
const limitCheck = await this.checkCrawlLimits(url, parentUrl);
|
|
911
920
|
if (!limitCheck.allowed) {
|
|
912
921
|
if (this.config.debug) {
|
|
@@ -930,7 +939,6 @@ export class Crawler {
|
|
|
930
939
|
}
|
|
931
940
|
const requestStartTime = Date.now();
|
|
932
941
|
const response = cache && method === "GET" ? cache : oxylabsInstanse && oxylabsOptions ? await oxylabsInstanse.scrape(url) : decodoInstanse && decodoOptions ? await decodoInstanse.scrape(url) : await (method === "GET" ? this.http.get(url, options) : method === "PATCH" ? this.http.patch(url, body, options) : method === "POST" ? this.http.post(url, body, options) : this.http.put(url, body, options));
|
|
933
|
-
console.log("Response received...");
|
|
934
942
|
if (!cache) {
|
|
935
943
|
const responseTime = Date.now() - requestStartTime;
|
|
936
944
|
this.calculateAutoThrottleDelay(domain, responseTime);
|
|
@@ -1077,7 +1085,11 @@ export class Crawler {
|
|
|
1077
1085
|
} catch {}
|
|
1078
1086
|
}
|
|
1079
1087
|
async destroy() {
|
|
1088
|
+
if (this.isDestroyed)
|
|
1089
|
+
return;
|
|
1090
|
+
this.isDestroyed = true;
|
|
1080
1091
|
this.queue.destroy();
|
|
1092
|
+
this.config.destroyLimiters();
|
|
1081
1093
|
this.events.length = 0;
|
|
1082
1094
|
this.jsonEvents.length = 0;
|
|
1083
1095
|
this.errorEvents.length = 0;
|
package/dist/crawler/index.cjs
CHANGED
|
@@ -1,40 +1,40 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Crawler =
|
|
3
|
-
const
|
|
4
|
-
exports.CrawlerOptions =
|
|
5
|
-
const
|
|
6
|
-
exports.RobotsTxt =
|
|
7
|
-
const
|
|
8
|
-
exports.FileCacher =
|
|
9
|
-
const
|
|
10
|
-
exports.UrlStore =
|
|
11
|
-
const
|
|
12
|
-
exports.NavigationHistory =
|
|
13
|
-
const
|
|
14
|
-
exports.Oxylabs =
|
|
15
|
-
const
|
|
16
|
-
exports.OXYLABS_BROWSER_TYPES =
|
|
17
|
-
exports.OXYLABS_COMMON_LOCALES =
|
|
18
|
-
exports.OXYLABS_COMMON_GEO_LOCATIONS =
|
|
19
|
-
exports.OXYLABS_US_STATES =
|
|
20
|
-
exports.OXYLABS_EUROPEAN_COUNTRIES =
|
|
21
|
-
exports.OXYLABS_ASIAN_COUNTRIES =
|
|
22
|
-
exports.getRandomOxylabsBrowserType =
|
|
23
|
-
exports.getRandomOxylabsLocale =
|
|
24
|
-
exports.getRandomOxylabsGeoLocation =
|
|
25
|
-
const
|
|
26
|
-
exports.Decodo =
|
|
27
|
-
const
|
|
28
|
-
exports.DECODO_DEVICE_TYPES =
|
|
29
|
-
exports.DECODO_HEADLESS_MODES =
|
|
30
|
-
exports.DECODO_COMMON_LOCALES =
|
|
31
|
-
exports.DECODO_COMMON_COUNTRIES =
|
|
32
|
-
exports.DECODO_EUROPEAN_COUNTRIES =
|
|
33
|
-
exports.DECODO_ASIAN_COUNTRIES =
|
|
34
|
-
exports.DECODO_US_STATES =
|
|
35
|
-
exports.DECODO_COMMON_CITIES =
|
|
36
|
-
exports.getRandomDecodoDeviceType =
|
|
37
|
-
exports.getRandomDecodoLocale =
|
|
38
|
-
exports.getRandomDecodoCountry =
|
|
39
|
-
exports.getRandomDecodoCity =
|
|
40
|
-
exports.generateDecodoSessionId =
|
|
1
|
+
const _mod_6edby9 = require('./crawler.cjs');
|
|
2
|
+
exports.Crawler = _mod_6edby9.Crawler;;
|
|
3
|
+
const _mod_axv8z7 = require('./crawler-options.cjs');
|
|
4
|
+
exports.CrawlerOptions = _mod_axv8z7.CrawlerOptions;;
|
|
5
|
+
const _mod_6wttbe = require('./plugin/robots-txt.cjs');
|
|
6
|
+
exports.RobotsTxt = _mod_6wttbe.RobotsTxt;;
|
|
7
|
+
const _mod_2zjk4f = require('./plugin/file-cacher.cjs');
|
|
8
|
+
exports.FileCacher = _mod_2zjk4f.FileCacher;;
|
|
9
|
+
const _mod_xmrwxh = require('./plugin/url-store.cjs');
|
|
10
|
+
exports.UrlStore = _mod_xmrwxh.UrlStore;;
|
|
11
|
+
const _mod_pl47sn = require('./plugin/navigation-history.cjs');
|
|
12
|
+
exports.NavigationHistory = _mod_pl47sn.NavigationHistory;;
|
|
13
|
+
const _mod_dbnsr9 = require('./addon/oxylabs/index.cjs');
|
|
14
|
+
exports.Oxylabs = _mod_dbnsr9.Oxylabs;;
|
|
15
|
+
const _mod_h2o4d1 = require('./addon/oxylabs/options.cjs');
|
|
16
|
+
exports.OXYLABS_BROWSER_TYPES = _mod_h2o4d1.OXYLABS_BROWSER_TYPES;
|
|
17
|
+
exports.OXYLABS_COMMON_LOCALES = _mod_h2o4d1.OXYLABS_COMMON_LOCALES;
|
|
18
|
+
exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_h2o4d1.OXYLABS_COMMON_GEO_LOCATIONS;
|
|
19
|
+
exports.OXYLABS_US_STATES = _mod_h2o4d1.OXYLABS_US_STATES;
|
|
20
|
+
exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_h2o4d1.OXYLABS_EUROPEAN_COUNTRIES;
|
|
21
|
+
exports.OXYLABS_ASIAN_COUNTRIES = _mod_h2o4d1.OXYLABS_ASIAN_COUNTRIES;
|
|
22
|
+
exports.getRandomOxylabsBrowserType = _mod_h2o4d1.getRandomBrowserType;
|
|
23
|
+
exports.getRandomOxylabsLocale = _mod_h2o4d1.getRandomLocale;
|
|
24
|
+
exports.getRandomOxylabsGeoLocation = _mod_h2o4d1.getRandomGeoLocation;;
|
|
25
|
+
const _mod_63jx8t = require('./addon/decodo/index.cjs');
|
|
26
|
+
exports.Decodo = _mod_63jx8t.Decodo;;
|
|
27
|
+
const _mod_5y9hg7 = require('./addon/decodo/options.cjs');
|
|
28
|
+
exports.DECODO_DEVICE_TYPES = _mod_5y9hg7.DECODO_DEVICE_TYPES;
|
|
29
|
+
exports.DECODO_HEADLESS_MODES = _mod_5y9hg7.DECODO_HEADLESS_MODES;
|
|
30
|
+
exports.DECODO_COMMON_LOCALES = _mod_5y9hg7.DECODO_COMMON_LOCALES;
|
|
31
|
+
exports.DECODO_COMMON_COUNTRIES = _mod_5y9hg7.DECODO_COMMON_COUNTRIES;
|
|
32
|
+
exports.DECODO_EUROPEAN_COUNTRIES = _mod_5y9hg7.DECODO_EUROPEAN_COUNTRIES;
|
|
33
|
+
exports.DECODO_ASIAN_COUNTRIES = _mod_5y9hg7.DECODO_ASIAN_COUNTRIES;
|
|
34
|
+
exports.DECODO_US_STATES = _mod_5y9hg7.DECODO_US_STATES;
|
|
35
|
+
exports.DECODO_COMMON_CITIES = _mod_5y9hg7.DECODO_COMMON_CITIES;
|
|
36
|
+
exports.getRandomDecodoDeviceType = _mod_5y9hg7.getRandomDeviceType;
|
|
37
|
+
exports.getRandomDecodoLocale = _mod_5y9hg7.getRandomLocale;
|
|
38
|
+
exports.getRandomDecodoCountry = _mod_5y9hg7.getRandomCountry;
|
|
39
|
+
exports.getRandomDecodoCity = _mod_5y9hg7.getRandomCity;
|
|
40
|
+
exports.generateDecodoSessionId = _mod_5y9hg7.generateSessionId;;
|
package/dist/crawler.d.ts
CHANGED
|
@@ -6871,6 +6871,18 @@ export declare class CrawlerOptions {
|
|
|
6871
6871
|
options: DecodoOptions;
|
|
6872
6872
|
queueOptions: queueOptions$1;
|
|
6873
6873
|
}): CrawlerOptions;
|
|
6874
|
+
/**
|
|
6875
|
+
* Destroy all limiter queues to release resources and stop intervals
|
|
6876
|
+
* @description Properly destroys all RezoQueue instances created by addLimiter().
|
|
6877
|
+
* This stops any setInterval timers that would otherwise keep the process alive.
|
|
6878
|
+
* Called automatically by Crawler.destroy().
|
|
6879
|
+
* @example
|
|
6880
|
+
* ```typescript
|
|
6881
|
+
* // Clean up all limiters
|
|
6882
|
+
* options.destroyLimiters();
|
|
6883
|
+
* ```
|
|
6884
|
+
*/
|
|
6885
|
+
destroyLimiters(): void;
|
|
6874
6886
|
/**
|
|
6875
6887
|
* Clear all global configurations from headers, proxies, limiters, Decodo, and Oxylabs
|
|
6876
6888
|
* @returns The CrawlerOptions instance for method chaining
|
|
@@ -7072,7 +7084,7 @@ export declare class Crawler {
|
|
|
7072
7084
|
* Uses SQLite as the underlying storage mechanism.
|
|
7073
7085
|
*/
|
|
7074
7086
|
cacher: FileCacher;
|
|
7075
|
-
private
|
|
7087
|
+
private queue;
|
|
7076
7088
|
private readonly isCacheEnabled;
|
|
7077
7089
|
readonly config: CrawlerOptions;
|
|
7078
7090
|
private urlStorage;
|
|
@@ -7090,6 +7102,10 @@ export declare class Crawler {
|
|
|
7090
7102
|
private adapterType;
|
|
7091
7103
|
/** Track pending execute() calls for proper done() behavior */
|
|
7092
7104
|
private pendingExecutions;
|
|
7105
|
+
/** Track if the crawler has been destroyed */
|
|
7106
|
+
private isDestroyed;
|
|
7107
|
+
/** Original queue options for restoration */
|
|
7108
|
+
private readonly queueOptions;
|
|
7093
7109
|
/** robots.txt parser and validator */
|
|
7094
7110
|
private robotsTxt;
|
|
7095
7111
|
/** AutoThrottle: track response times per domain for adaptive rate limiting */
|
|
@@ -7142,6 +7158,12 @@ export declare class Crawler {
|
|
|
7142
7158
|
* Initialize the HTTP adapter based on configuration
|
|
7143
7159
|
*/
|
|
7144
7160
|
private initializeAdapter;
|
|
7161
|
+
/**
|
|
7162
|
+
* Ensures the crawler is active and ready for use.
|
|
7163
|
+
* If the crawler was previously destroyed, this method will restore it.
|
|
7164
|
+
* Called internally before any public operation.
|
|
7165
|
+
*/
|
|
7166
|
+
private ensureActive;
|
|
7145
7167
|
/**
|
|
7146
7168
|
* Initialize navigation history and session
|
|
7147
7169
|
*/
|
package/dist/entries/crawler.cjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Crawler =
|
|
3
|
-
const
|
|
4
|
-
exports.CrawlerOptions =
|
|
1
|
+
const _mod_wyq9kz = require('../crawler/crawler.cjs');
|
|
2
|
+
exports.Crawler = _mod_wyq9kz.Crawler;;
|
|
3
|
+
const _mod_w2piwy = require('../crawler/crawler-options.cjs');
|
|
4
|
+
exports.CrawlerOptions = _mod_w2piwy.CrawlerOptions;;
|
package/dist/index.cjs
CHANGED
|
@@ -1,30 +1,30 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Rezo =
|
|
3
|
-
exports.createRezoInstance =
|
|
4
|
-
exports.createDefaultInstance =
|
|
5
|
-
const
|
|
6
|
-
exports.RezoError =
|
|
7
|
-
exports.RezoErrorCode =
|
|
8
|
-
const
|
|
9
|
-
exports.RezoHeaders =
|
|
10
|
-
const
|
|
11
|
-
exports.RezoFormData =
|
|
12
|
-
const
|
|
13
|
-
exports.RezoCookieJar =
|
|
14
|
-
exports.Cookie =
|
|
15
|
-
const
|
|
16
|
-
exports.toCurl =
|
|
17
|
-
exports.fromCurl =
|
|
18
|
-
const
|
|
19
|
-
exports.createDefaultHooks =
|
|
20
|
-
exports.mergeHooks =
|
|
21
|
-
const
|
|
22
|
-
exports.ProxyManager =
|
|
23
|
-
const
|
|
24
|
-
exports.RezoQueue =
|
|
25
|
-
exports.HttpQueue =
|
|
26
|
-
exports.Priority =
|
|
27
|
-
exports.HttpMethodPriority =
|
|
1
|
+
const _mod_i5u6sg = require('./core/rezo.cjs');
|
|
2
|
+
exports.Rezo = _mod_i5u6sg.Rezo;
|
|
3
|
+
exports.createRezoInstance = _mod_i5u6sg.createRezoInstance;
|
|
4
|
+
exports.createDefaultInstance = _mod_i5u6sg.createDefaultInstance;;
|
|
5
|
+
const _mod_41z8dt = require('./errors/rezo-error.cjs');
|
|
6
|
+
exports.RezoError = _mod_41z8dt.RezoError;
|
|
7
|
+
exports.RezoErrorCode = _mod_41z8dt.RezoErrorCode;;
|
|
8
|
+
const _mod_n2o0jv = require('./utils/headers.cjs');
|
|
9
|
+
exports.RezoHeaders = _mod_n2o0jv.RezoHeaders;;
|
|
10
|
+
const _mod_imk3ix = require('./utils/form-data.cjs');
|
|
11
|
+
exports.RezoFormData = _mod_imk3ix.RezoFormData;;
|
|
12
|
+
const _mod_5jwled = require('./utils/cookies.cjs');
|
|
13
|
+
exports.RezoCookieJar = _mod_5jwled.RezoCookieJar;
|
|
14
|
+
exports.Cookie = _mod_5jwled.Cookie;;
|
|
15
|
+
const _mod_uk995z = require('./utils/curl.cjs');
|
|
16
|
+
exports.toCurl = _mod_uk995z.toCurl;
|
|
17
|
+
exports.fromCurl = _mod_uk995z.fromCurl;;
|
|
18
|
+
const _mod_b3o4e4 = require('./core/hooks.cjs');
|
|
19
|
+
exports.createDefaultHooks = _mod_b3o4e4.createDefaultHooks;
|
|
20
|
+
exports.mergeHooks = _mod_b3o4e4.mergeHooks;;
|
|
21
|
+
const _mod_wmzjgb = require('./proxy/manager.cjs');
|
|
22
|
+
exports.ProxyManager = _mod_wmzjgb.ProxyManager;;
|
|
23
|
+
const _mod_mhxh9j = require('./queue/index.cjs');
|
|
24
|
+
exports.RezoQueue = _mod_mhxh9j.RezoQueue;
|
|
25
|
+
exports.HttpQueue = _mod_mhxh9j.HttpQueue;
|
|
26
|
+
exports.Priority = _mod_mhxh9j.Priority;
|
|
27
|
+
exports.HttpMethodPriority = _mod_mhxh9j.HttpMethodPriority;;
|
|
28
28
|
const { RezoError } = require('./errors/rezo-error.cjs');
|
|
29
29
|
const isRezoError = exports.isRezoError = RezoError.isRezoError;
|
|
30
30
|
const Cancel = exports.Cancel = RezoError;
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Agent =
|
|
3
|
-
const
|
|
4
|
-
exports.HttpProxyAgent =
|
|
5
|
-
const
|
|
6
|
-
exports.HttpsProxyAgent =
|
|
7
|
-
const
|
|
8
|
-
exports.SocksProxyAgent =
|
|
9
|
-
const
|
|
10
|
-
exports.SocksClient =
|
|
1
|
+
const _mod_1twhkh = require('./base.cjs');
|
|
2
|
+
exports.Agent = _mod_1twhkh.Agent;;
|
|
3
|
+
const _mod_6s90bc = require('./http-proxy.cjs');
|
|
4
|
+
exports.HttpProxyAgent = _mod_6s90bc.HttpProxyAgent;;
|
|
5
|
+
const _mod_y3mef2 = require('./https-proxy.cjs');
|
|
6
|
+
exports.HttpsProxyAgent = _mod_y3mef2.HttpsProxyAgent;;
|
|
7
|
+
const _mod_o8zfcz = require('./socks-proxy.cjs');
|
|
8
|
+
exports.SocksProxyAgent = _mod_o8zfcz.SocksProxyAgent;;
|
|
9
|
+
const _mod_ysdaev = require('./socks-client.cjs');
|
|
10
|
+
exports.SocksClient = _mod_ysdaev.SocksClient;;
|
package/dist/proxy/index.cjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
const { Agent, HttpProxyAgent, HttpsProxyAgent, SocksProxyAgent } = require('../internal/agents/index.cjs');
|
|
2
2
|
const { parseProxyString } = require('./parse.cjs');
|
|
3
|
-
const
|
|
4
|
-
exports.ProxyManager =
|
|
5
|
-
const
|
|
6
|
-
exports.parseProxyString =
|
|
3
|
+
const _mod_c9uthc = require('./manager.cjs');
|
|
4
|
+
exports.ProxyManager = _mod_c9uthc.ProxyManager;;
|
|
5
|
+
const _mod_nqntdj = require('./parse.cjs');
|
|
6
|
+
exports.parseProxyString = _mod_nqntdj.parseProxyString;;
|
|
7
7
|
function createOptions(uri, opts) {
|
|
8
8
|
if (uri instanceof URL || typeof uri === "string") {
|
|
9
9
|
return {
|
package/dist/queue/index.cjs
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.RezoQueue =
|
|
3
|
-
const
|
|
4
|
-
exports.HttpQueue =
|
|
5
|
-
exports.extractDomain =
|
|
6
|
-
const
|
|
7
|
-
exports.Priority =
|
|
8
|
-
exports.HttpMethodPriority =
|
|
1
|
+
const _mod_uinp36 = require('./queue.cjs');
|
|
2
|
+
exports.RezoQueue = _mod_uinp36.RezoQueue;;
|
|
3
|
+
const _mod_p0ep5c = require('./http-queue.cjs');
|
|
4
|
+
exports.HttpQueue = _mod_p0ep5c.HttpQueue;
|
|
5
|
+
exports.extractDomain = _mod_p0ep5c.extractDomain;;
|
|
6
|
+
const _mod_5o7x2e = require('./types.cjs');
|
|
7
|
+
exports.Priority = _mod_5o7x2e.Priority;
|
|
8
|
+
exports.HttpMethodPriority = _mod_5o7x2e.HttpMethodPriority;;
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.UniversalEventEmitter =
|
|
3
|
-
const
|
|
4
|
-
exports.UniversalStreamResponse =
|
|
5
|
-
exports.StreamResponse =
|
|
6
|
-
const
|
|
7
|
-
exports.UniversalDownloadResponse =
|
|
8
|
-
exports.DownloadResponse =
|
|
9
|
-
const
|
|
10
|
-
exports.UniversalUploadResponse =
|
|
11
|
-
exports.UploadResponse =
|
|
1
|
+
const _mod_nbsy03 = require('./event-emitter.cjs');
|
|
2
|
+
exports.UniversalEventEmitter = _mod_nbsy03.UniversalEventEmitter;;
|
|
3
|
+
const _mod_q18xhk = require('./stream.cjs');
|
|
4
|
+
exports.UniversalStreamResponse = _mod_q18xhk.UniversalStreamResponse;
|
|
5
|
+
exports.StreamResponse = _mod_q18xhk.StreamResponse;;
|
|
6
|
+
const _mod_yoyvb4 = require('./download.cjs');
|
|
7
|
+
exports.UniversalDownloadResponse = _mod_yoyvb4.UniversalDownloadResponse;
|
|
8
|
+
exports.DownloadResponse = _mod_yoyvb4.DownloadResponse;;
|
|
9
|
+
const _mod_evsbw4 = require('./upload.cjs');
|
|
10
|
+
exports.UniversalUploadResponse = _mod_evsbw4.UniversalUploadResponse;
|
|
11
|
+
exports.UploadResponse = _mod_evsbw4.UploadResponse;;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rezo",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.58",
|
|
4
4
|
"description": "Lightning-fast, enterprise-grade HTTP client for modern JavaScript. Full HTTP/2 support, intelligent cookie management, multiple adapters (HTTP, Fetch, cURL, XHR), streaming, proxy support (HTTP/HTTPS/SOCKS), and cross-environment compatibility.",
|
|
5
5
|
"main": "dist/index.cjs",
|
|
6
6
|
"module": "dist/index.js",
|