rezo 1.0.62 → 1.0.64
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/index.cjs +6 -6
- package/dist/cache/index.cjs +9 -9
- package/dist/crawler/crawler-options.cjs +1 -1
- package/dist/crawler/crawler-options.js +1 -1
- package/dist/crawler/crawler.cjs +8 -7
- package/dist/crawler/crawler.js +8 -7
- package/dist/crawler/index.cjs +40 -40
- package/dist/crawler/plugin/index.cjs +1 -1
- package/dist/crawler.d.ts +2 -22
- package/dist/entries/crawler.cjs +4 -4
- package/dist/index.cjs +27 -27
- package/dist/internal/agents/index.cjs +10 -10
- package/dist/proxy/index.cjs +4 -4
- package/dist/queue/index.cjs +8 -8
- package/dist/responses/universal/index.cjs +11 -11
- package/package.json +1 -1
package/dist/adapters/index.cjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.detectRuntime =
|
|
3
|
-
exports.getAdapterCapabilities =
|
|
4
|
-
exports.buildAdapterContext =
|
|
5
|
-
exports.getAvailableAdapters =
|
|
6
|
-
exports.selectAdapter =
|
|
1
|
+
const _mod_4c0peh = require('./picker.cjs');
|
|
2
|
+
exports.detectRuntime = _mod_4c0peh.detectRuntime;
|
|
3
|
+
exports.getAdapterCapabilities = _mod_4c0peh.getAdapterCapabilities;
|
|
4
|
+
exports.buildAdapterContext = _mod_4c0peh.buildAdapterContext;
|
|
5
|
+
exports.getAvailableAdapters = _mod_4c0peh.getAvailableAdapters;
|
|
6
|
+
exports.selectAdapter = _mod_4c0peh.selectAdapter;;
|
package/dist/cache/index.cjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.LRUCache =
|
|
3
|
-
const
|
|
4
|
-
exports.DNSCache =
|
|
5
|
-
exports.getGlobalDNSCache =
|
|
6
|
-
exports.resetGlobalDNSCache =
|
|
7
|
-
const
|
|
8
|
-
exports.ResponseCache =
|
|
9
|
-
exports.normalizeResponseCacheConfig =
|
|
1
|
+
const _mod_z313xk = require('./lru-cache.cjs');
|
|
2
|
+
exports.LRUCache = _mod_z313xk.LRUCache;;
|
|
3
|
+
const _mod_s9k0nm = require('./dns-cache.cjs');
|
|
4
|
+
exports.DNSCache = _mod_s9k0nm.DNSCache;
|
|
5
|
+
exports.getGlobalDNSCache = _mod_s9k0nm.getGlobalDNSCache;
|
|
6
|
+
exports.resetGlobalDNSCache = _mod_s9k0nm.resetGlobalDNSCache;;
|
|
7
|
+
const _mod_mk2k1e = require('./response-cache.cjs');
|
|
8
|
+
exports.ResponseCache = _mod_mk2k1e.ResponseCache;
|
|
9
|
+
exports.normalizeResponseCacheConfig = _mod_mk2k1e.normalizeResponseCacheConfig;;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
var{RezoQueue:m}=require("../queue/queue.cjs"),{Oxylabs:x}=require("./addon/oxylabs/index.cjs"),b=require("node:path"),g=require("node:os"),{Decodo:y}=require("./addon/decodo/index.cjs");class f{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??b.join(g.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,n,r)=>r.indexOf(i)===n)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((i,n)=>i===t[n]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((i)=>i.isGlobal).length,domainSpecific:t.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:i,isGlobal:n,headers:r}=t;if(!i&&!n)continue;if(r instanceof Headers){let s=Object.fromEntries(r.entries());if(Object.keys(s).length<1)continue;r=s}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:i,isGlobal:n,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:i,isGlobal:n,proxy:r}=t;if(!i&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:i,isGlobal:n,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:i,isGlobal:n,options:r}=t;if(!i&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:i,isGlobal:n,pqueue:new m(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:n,options:r,queueOptions:s}=t;if(!i&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:i,isGlobal:n,adaptar:new x(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:n,options:r,queueOptions:s}=t;if(!i&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:i,isGlobal:n,adaptar:new y(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}destroyLimiters(){for(let e of this.limiters)if(e.pqueue&&typeof e.pqueue.destroy==="function")e.pqueue.destroy();this.limiters=[]}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,i,n){if(!this.getDomainName(e))return null;let s=[],a=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let o=0;o<a.length;o++)if(this._hasDomain(e,a[o].domain))s.push(o);if(s.length){let o=n?s[this.rnd(0,s.length-1)]:s[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}s.length=0;for(let o=0;o<a.length;o++)s.push(o);if(s.length){let o=n?s[this.rnd(0,s.length-1)]:s[0];if(a[o].isGlobal&&i)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,i){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let s=0;s<r.length;s++)if(this._hasDomain(e,r[s].domain))return!0;if(i){for(let s=0;s<r.length;s++)if(r[s].isGlobal)return!0}return!1}pickHeaders(e,t,i,n){let r=this.getAdapter(e,"headers",t),s=new Headers(r??{});if(i&&i instanceof Headers)for(let[a,o]of Object.entries(i.entries()))s.set(a,o);else if(i&&typeof i==="object"){for(let[a,o]of Object.entries(i))if(typeof o==="string")s.set(a,o)}if(n)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,t){if(!t)return!1;let i=this.getDomainName(e);if(!i)return!1;let n=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},r=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let a=s.toString().trim();if(i.toLowerCase()===a.toLowerCase())return!0;if(a.includes("*")){let l=a.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(i)||h.test(e)}if(n(a))try{let l=a,h="i",u=a.match(/^\/(.*)\/(\w*)$/);if(u)l=u[1],h=u[2]||"i";let c=new RegExp(l,h);return c.test(i)||c.test(e)}catch(l){return i.toLowerCase().includes(a.toLowerCase())}let o=i.toLowerCase(),d=a.toLowerCase();return o===d||o.endsWith("."+d)||d.endsWith("."+o)};if(Array.isArray(t)){for(let s of t)if(r(s))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let n=0;n<200;n++){let r=e[Math.floor(Math.random()*e.length)],s=t[Math.floor(Math.random()*t.length)],a="";switch(r.name){case"Chrome":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":a=`Mozilla/5.0 (${s}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}i.push(a)}return i}exports.CrawlerOptions=f;
|
|
1
|
+
var{RezoQueue:m}=require("../queue/queue.cjs"),{Oxylabs:x}=require("./addon/oxylabs/index.cjs"),b=require("node:path"),g=require("node:os"),{Decodo:y}=require("./addon/decodo/index.cjs");class f{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??b.join(g.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,a,r)=>r.indexOf(i)===a)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((i,a)=>i===t[a]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((i)=>i.isGlobal).length,domainSpecific:t.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:i,isGlobal:a,headers:r}=t;if(!i&&!a)continue;if(r instanceof Headers){let s=Object.fromEntries(r.entries());if(Object.keys(s).length<1)continue;r=s}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:i,isGlobal:a,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:i,isGlobal:a,proxy:r}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:i,isGlobal:a,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:i,isGlobal:a,options:r}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:i,isGlobal:a,pqueue:new m(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:a,options:r,queueOptions:s}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:i,isGlobal:a,adaptar:new x(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:a,options:r,queueOptions:s}=t;if(!i&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:i,isGlobal:a,adaptar:new y(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}destroyLimiters(){for(let e of this.limiters)if(e.pqueue&&typeof e.pqueue.destroy==="function")e.pqueue.destroy();this.limiters=[]}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,i,a){if(!this.getDomainName(e))return null;let s=[],n=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let o=0;o<n.length;o++)if(this._hasDomain(e,n[o].domain))s.push(o);if(s.length){let o=a?s[this.rnd(0,s.length-1)]:s[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}s.length=0;for(let o=0;o<n.length;o++)s.push(o);if(s.length){let o=a?s[this.rnd(0,s.length-1)]:s[0];if(n[o].isGlobal&&i)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,i){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let s=0;s<r.length;s++)if(this._hasDomain(e,r[s].domain))return!0;if(i){for(let s=0;s<r.length;s++)if(r[s].isGlobal)return!0}return!1}pickHeaders(e,t,i,a){let r=this.getAdapter(e,"headers",t),s=new Headers(r??{}),n=s.count;if(i&&i instanceof Headers)for(let[o,l]of Object.entries(i.entries()))s.set(o,l);else if(i&&typeof i==="object"){for(let[o,l]of Object.entries(i))if(typeof l==="string")s.set(o,l)}if(a&&n===0)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,t){if(!t)return!1;let i=this.getDomainName(e);if(!i)return!1;let a=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},r=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let n=s.toString().trim();if(i.toLowerCase()===n.toLowerCase())return!0;if(n.includes("*")){let h=n.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),d=new RegExp(`^${h}$`,"i");return d.test(i)||d.test(e)}if(a(n))try{let h=n,d="i",u=n.match(/^\/(.*)\/(\w*)$/);if(u)h=u[1],d=u[2]||"i";let c=new RegExp(h,d);return c.test(i)||c.test(e)}catch(h){return i.toLowerCase().includes(n.toLowerCase())}let o=i.toLowerCase(),l=n.toLowerCase();return o===l||o.endsWith("."+l)||l.endsWith("."+o)};if(Array.isArray(t)){for(let s of t)if(r(s))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let a=0;a<200;a++){let r=e[Math.floor(Math.random()*e.length)],s=t[Math.floor(Math.random()*t.length)],n="";switch(r.name){case"Chrome":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":n=`Mozilla/5.0 (${s}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":n=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}i.push(n)}return i}exports.CrawlerOptions=f;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{RezoQueue as m}from"../queue/queue.js";import{Oxylabs as f}from"./addon/oxylabs/index.js";import x from"node:path";import b from"node:os";import{Decodo as g}from"./addon/decodo/index.js";class y{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??x.join(b.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((s)=>s.domain).map((s)=>s.domain).filter((s,n,r)=>r.indexOf(s)===n)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((s,n)=>s===t[n]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((s)=>s.isGlobal).length,domainSpecific:t.filter((s)=>!s.isGlobal&&s.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:s,isGlobal:n,headers:r}=t;if(!s&&!n)continue;if(r instanceof Headers){let i=Object.fromEntries(r.entries());if(Object.keys(i).length<1)continue;r=i}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:s,isGlobal:n,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:s,isGlobal:n,proxy:r}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:s,isGlobal:n,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:s,isGlobal:n,options:r}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:s,isGlobal:n,pqueue:new m(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:n,options:r,queueOptions:i}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:s,isGlobal:n,adaptar:new f(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:n,options:r,queueOptions:i}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:s,isGlobal:n,adaptar:new g(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}destroyLimiters(){for(let e of this.limiters)if(e.pqueue&&typeof e.pqueue.destroy==="function")e.pqueue.destroy();this.limiters=[]}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,s,n){if(!this.getDomainName(e))return null;let i=[],a=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let o=0;o<a.length;o++)if(this._hasDomain(e,a[o].domain))i.push(o);if(i.length){let o=n?i[this.rnd(0,i.length-1)]:i[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}i.length=0;for(let o=0;o<a.length;o++)i.push(o);if(i.length){let o=n?i[this.rnd(0,i.length-1)]:i[0];if(a[o].isGlobal&&s)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,s){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let i=0;i<r.length;i++)if(this._hasDomain(e,r[i].domain))return!0;if(s){for(let i=0;i<r.length;i++)if(r[i].isGlobal)return!0}return!1}pickHeaders(e,t,s,n){let r=this.getAdapter(e,"headers",t),i=new Headers(r??{});if(s&&s instanceof Headers)for(let[a,o]of Object.entries(s.entries()))i.set(a,o);else if(s&&typeof s==="object"){for(let[a,o]of Object.entries(s))if(typeof o==="string")i.set(a,o)}if(n)i.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(i.entries())}_hasDomain(e,t){if(!t)return!1;let s=this.getDomainName(e);if(!s)return!1;let n=(i)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(i)||i.startsWith("/")||i.includes(".*")||i.includes(".+")},r=(i)=>{if(i instanceof RegExp)return i.test(s)||i.test(e);let a=i.toString().trim();if(s.toLowerCase()===a.toLowerCase())return!0;if(a.includes("*")){let l=a.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(s)||h.test(e)}if(n(a))try{let l=a,h="i",u=a.match(/^\/(.*)\/(\w*)$/);if(u)l=u[1],h=u[2]||"i";let c=new RegExp(l,h);return c.test(s)||c.test(e)}catch(l){return s.toLowerCase().includes(a.toLowerCase())}let o=s.toLowerCase(),d=a.toLowerCase();return o===d||o.endsWith("."+d)||d.endsWith("."+o)};if(Array.isArray(t)){for(let i of t)if(r(i))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],s=[];for(let n=0;n<200;n++){let r=e[Math.floor(Math.random()*e.length)],i=t[Math.floor(Math.random()*t.length)],a="";switch(r.name){case"Chrome":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":a=`Mozilla/5.0 (${i}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}s.push(a)}return s}export{y as CrawlerOptions};
|
|
1
|
+
import{RezoQueue as m}from"../queue/queue.js";import{Oxylabs as f}from"./addon/oxylabs/index.js";import x from"node:path";import b from"node:os";import{Decodo as g}from"./addon/decodo/index.js";class y{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??x.join(b.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((s)=>s.domain).map((s)=>s.domain).filter((s,a,r)=>r.indexOf(s)===a)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((s,a)=>s===t[a]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((s)=>s.isGlobal).length,domainSpecific:t.filter((s)=>!s.isGlobal&&s.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:s,isGlobal:a,headers:r}=t;if(!s&&!a)continue;if(r instanceof Headers){let i=Object.fromEntries(r.entries());if(Object.keys(i).length<1)continue;r=i}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:s,isGlobal:a,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:s,isGlobal:a,proxy:r}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:s,isGlobal:a,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:s,isGlobal:a,options:r}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:s,isGlobal:a,pqueue:new m(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:a,options:r,queueOptions:i}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:s,isGlobal:a,adaptar:new f(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:a,options:r,queueOptions:i}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:s,isGlobal:a,adaptar:new g(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}destroyLimiters(){for(let e of this.limiters)if(e.pqueue&&typeof e.pqueue.destroy==="function")e.pqueue.destroy();this.limiters=[]}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,s,a){if(!this.getDomainName(e))return null;let i=[],n=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let o=0;o<n.length;o++)if(this._hasDomain(e,n[o].domain))i.push(o);if(i.length){let o=a?i[this.rnd(0,i.length-1)]:i[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}i.length=0;for(let o=0;o<n.length;o++)i.push(o);if(i.length){let o=a?i[this.rnd(0,i.length-1)]:i[0];if(n[o].isGlobal&&s)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,s){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let i=0;i<r.length;i++)if(this._hasDomain(e,r[i].domain))return!0;if(s){for(let i=0;i<r.length;i++)if(r[i].isGlobal)return!0}return!1}pickHeaders(e,t,s,a){let r=this.getAdapter(e,"headers",t),i=new Headers(r??{}),n=i.count;if(s&&s instanceof Headers)for(let[o,l]of Object.entries(s.entries()))i.set(o,l);else if(s&&typeof s==="object"){for(let[o,l]of Object.entries(s))if(typeof l==="string")i.set(o,l)}if(a&&n===0)i.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(i.entries())}_hasDomain(e,t){if(!t)return!1;let s=this.getDomainName(e);if(!s)return!1;let a=(i)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(i)||i.startsWith("/")||i.includes(".*")||i.includes(".+")},r=(i)=>{if(i instanceof RegExp)return i.test(s)||i.test(e);let n=i.toString().trim();if(s.toLowerCase()===n.toLowerCase())return!0;if(n.includes("*")){let h=n.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),d=new RegExp(`^${h}$`,"i");return d.test(s)||d.test(e)}if(a(n))try{let h=n,d="i",u=n.match(/^\/(.*)\/(\w*)$/);if(u)h=u[1],d=u[2]||"i";let c=new RegExp(h,d);return c.test(s)||c.test(e)}catch(h){return s.toLowerCase().includes(n.toLowerCase())}let o=s.toLowerCase(),l=n.toLowerCase();return o===l||o.endsWith("."+l)||l.endsWith("."+o)};if(Array.isArray(t)){for(let i of t)if(r(i))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],s=[];for(let a=0;a<200;a++){let r=e[Math.floor(Math.random()*e.length)],i=t[Math.floor(Math.random()*t.length)],n="";switch(r.name){case"Chrome":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":n=`Mozilla/5.0 (${i}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}s.push(n)}return s}export{y as CrawlerOptions};
|
package/dist/crawler/crawler.cjs
CHANGED
|
@@ -833,7 +833,8 @@ class Crawler {
|
|
|
833
833
|
deepEmailFinder = false,
|
|
834
834
|
useOxylabsScraperAi = false,
|
|
835
835
|
useOxylabsRotation = true,
|
|
836
|
-
useDecodo = false
|
|
836
|
+
useDecodo = false,
|
|
837
|
+
skipCache = false
|
|
837
838
|
} = options || {};
|
|
838
839
|
const _options = {
|
|
839
840
|
headers: this.config.pickHeaders(url, true, headers, true),
|
|
@@ -842,7 +843,7 @@ class Crawler {
|
|
|
842
843
|
params,
|
|
843
844
|
proxy: useProxy ? this.config.getAdapter(url, "proxies", true, true) || undefined : undefined,
|
|
844
845
|
rejectUnauthorized: typeof rejectUnauthorized === "boolean" ? rejectUnauthorized : this.config.rejectUnauthorized,
|
|
845
|
-
|
|
846
|
+
queue: this.config.getAdapter(url, "limiters", useQueue, useQueue) || undefined
|
|
846
847
|
};
|
|
847
848
|
let oxylabsOptions = {};
|
|
848
849
|
let oxylabsInstanse = undefined;
|
|
@@ -876,12 +877,12 @@ class Crawler {
|
|
|
876
877
|
p.finally(() => this.pendingExecutions.delete(p));
|
|
877
878
|
return this;
|
|
878
879
|
}
|
|
879
|
-
const p = this.execute(method, url, body, _options, extractLeads, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions);
|
|
880
|
+
const p = this.execute(method, url, body, _options, extractLeads, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, skipCache);
|
|
880
881
|
this.pendingExecutions.add(p);
|
|
881
882
|
p.finally(() => this.pendingExecutions.delete(p));
|
|
882
883
|
return this;
|
|
883
884
|
}
|
|
884
|
-
async execute(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions) {
|
|
885
|
+
async execute(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, skipCache) {
|
|
885
886
|
await this.waitForStorage();
|
|
886
887
|
if (this.isCacheEnabled) {
|
|
887
888
|
await this.waitForCache();
|
|
@@ -889,7 +890,7 @@ class Crawler {
|
|
|
889
890
|
if (this.config.enableNavigationHistory) {
|
|
890
891
|
await this.waitForNavigationHistory();
|
|
891
892
|
}
|
|
892
|
-
const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions));
|
|
893
|
+
const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, 0, undefined, skipCache));
|
|
893
894
|
task.finally(() => this.pendingExecutions.delete(task));
|
|
894
895
|
}
|
|
895
896
|
async execute2(method, url, body, options = {}, forceRevisit) {
|
|
@@ -913,7 +914,7 @@ class Crawler {
|
|
|
913
914
|
allowCrossDomainTravel: true
|
|
914
915
|
}, forceRevisit, true)).then();
|
|
915
916
|
}
|
|
916
|
-
async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl) {
|
|
917
|
+
async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl, skipCache) {
|
|
917
918
|
try {
|
|
918
919
|
await this.triggerStartHandlers();
|
|
919
920
|
const limitCheck = await this.checkCrawlLimits(url, parentUrl);
|
|
@@ -938,7 +939,7 @@ class Crawler {
|
|
|
938
939
|
return;
|
|
939
940
|
}
|
|
940
941
|
const requestStartTime = Date.now();
|
|
941
|
-
const response = cache && method === "GET" ? cache : oxylabsInstanse && oxylabsOptions ? await oxylabsInstanse.scrape(url) : decodoInstanse && decodoOptions ? await decodoInstanse.scrape(url) : await (method === "GET" ? this.http.get(url, options) : method === "PATCH" ? this.http.patch(url, body, options) : method === "POST" ? this.http.post(url, body, options) : this.http.put(url, body, options));
|
|
942
|
+
const response = cache && method === "GET" && !skipCache ? cache : oxylabsInstanse && oxylabsOptions ? await oxylabsInstanse.scrape(url) : decodoInstanse && decodoOptions ? await decodoInstanse.scrape(url) : await (method === "GET" ? this.http.get(url, options) : method === "PATCH" ? this.http.patch(url, body, options) : method === "POST" ? this.http.post(url, body, options) : this.http.put(url, body, options));
|
|
942
943
|
if (!cache) {
|
|
943
944
|
const responseTime = Date.now() - requestStartTime;
|
|
944
945
|
this.calculateAutoThrottleDelay(domain, responseTime);
|
package/dist/crawler/crawler.js
CHANGED
|
@@ -833,7 +833,8 @@ export class Crawler {
|
|
|
833
833
|
deepEmailFinder = false,
|
|
834
834
|
useOxylabsScraperAi = false,
|
|
835
835
|
useOxylabsRotation = true,
|
|
836
|
-
useDecodo = false
|
|
836
|
+
useDecodo = false,
|
|
837
|
+
skipCache = false
|
|
837
838
|
} = options || {};
|
|
838
839
|
const _options = {
|
|
839
840
|
headers: this.config.pickHeaders(url, true, headers, true),
|
|
@@ -842,7 +843,7 @@ export class Crawler {
|
|
|
842
843
|
params,
|
|
843
844
|
proxy: useProxy ? this.config.getAdapter(url, "proxies", true, true) || undefined : undefined,
|
|
844
845
|
rejectUnauthorized: typeof rejectUnauthorized === "boolean" ? rejectUnauthorized : this.config.rejectUnauthorized,
|
|
845
|
-
|
|
846
|
+
queue: this.config.getAdapter(url, "limiters", useQueue, useQueue) || undefined
|
|
846
847
|
};
|
|
847
848
|
let oxylabsOptions = {};
|
|
848
849
|
let oxylabsInstanse = undefined;
|
|
@@ -876,12 +877,12 @@ export class Crawler {
|
|
|
876
877
|
p.finally(() => this.pendingExecutions.delete(p));
|
|
877
878
|
return this;
|
|
878
879
|
}
|
|
879
|
-
const p = this.execute(method, url, body, _options, extractLeads, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions);
|
|
880
|
+
const p = this.execute(method, url, body, _options, extractLeads, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, skipCache);
|
|
880
881
|
this.pendingExecutions.add(p);
|
|
881
882
|
p.finally(() => this.pendingExecutions.delete(p));
|
|
882
883
|
return this;
|
|
883
884
|
}
|
|
884
|
-
async execute(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions) {
|
|
885
|
+
async execute(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, skipCache) {
|
|
885
886
|
await this.waitForStorage();
|
|
886
887
|
if (this.isCacheEnabled) {
|
|
887
888
|
await this.waitForCache();
|
|
@@ -889,7 +890,7 @@ export class Crawler {
|
|
|
889
890
|
if (this.config.enableNavigationHistory) {
|
|
890
891
|
await this.waitForNavigationHistory();
|
|
891
892
|
}
|
|
892
|
-
const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions));
|
|
893
|
+
const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, 0, undefined, skipCache));
|
|
893
894
|
task.finally(() => this.pendingExecutions.delete(task));
|
|
894
895
|
}
|
|
895
896
|
async execute2(method, url, body, options = {}, forceRevisit) {
|
|
@@ -913,7 +914,7 @@ export class Crawler {
|
|
|
913
914
|
allowCrossDomainTravel: true
|
|
914
915
|
}, forceRevisit, true)).then();
|
|
915
916
|
}
|
|
916
|
-
async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl) {
|
|
917
|
+
async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl, skipCache) {
|
|
917
918
|
try {
|
|
918
919
|
await this.triggerStartHandlers();
|
|
919
920
|
const limitCheck = await this.checkCrawlLimits(url, parentUrl);
|
|
@@ -938,7 +939,7 @@ export class Crawler {
|
|
|
938
939
|
return;
|
|
939
940
|
}
|
|
940
941
|
const requestStartTime = Date.now();
|
|
941
|
-
const response = cache && method === "GET" ? cache : oxylabsInstanse && oxylabsOptions ? await oxylabsInstanse.scrape(url) : decodoInstanse && decodoOptions ? await decodoInstanse.scrape(url) : await (method === "GET" ? this.http.get(url, options) : method === "PATCH" ? this.http.patch(url, body, options) : method === "POST" ? this.http.post(url, body, options) : this.http.put(url, body, options));
|
|
942
|
+
const response = cache && method === "GET" && !skipCache ? cache : oxylabsInstanse && oxylabsOptions ? await oxylabsInstanse.scrape(url) : decodoInstanse && decodoOptions ? await decodoInstanse.scrape(url) : await (method === "GET" ? this.http.get(url, options) : method === "PATCH" ? this.http.patch(url, body, options) : method === "POST" ? this.http.post(url, body, options) : this.http.put(url, body, options));
|
|
942
943
|
if (!cache) {
|
|
943
944
|
const responseTime = Date.now() - requestStartTime;
|
|
944
945
|
this.calculateAutoThrottleDelay(domain, responseTime);
|
package/dist/crawler/index.cjs
CHANGED
|
@@ -1,40 +1,40 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Crawler =
|
|
3
|
-
const
|
|
4
|
-
exports.CrawlerOptions =
|
|
5
|
-
const
|
|
6
|
-
exports.RobotsTxt =
|
|
7
|
-
const
|
|
8
|
-
exports.FileCacher =
|
|
9
|
-
const
|
|
10
|
-
exports.UrlStore =
|
|
11
|
-
const
|
|
12
|
-
exports.NavigationHistory =
|
|
13
|
-
const
|
|
14
|
-
exports.Oxylabs =
|
|
15
|
-
const
|
|
16
|
-
exports.OXYLABS_BROWSER_TYPES =
|
|
17
|
-
exports.OXYLABS_COMMON_LOCALES =
|
|
18
|
-
exports.OXYLABS_COMMON_GEO_LOCATIONS =
|
|
19
|
-
exports.OXYLABS_US_STATES =
|
|
20
|
-
exports.OXYLABS_EUROPEAN_COUNTRIES =
|
|
21
|
-
exports.OXYLABS_ASIAN_COUNTRIES =
|
|
22
|
-
exports.getRandomOxylabsBrowserType =
|
|
23
|
-
exports.getRandomOxylabsLocale =
|
|
24
|
-
exports.getRandomOxylabsGeoLocation =
|
|
25
|
-
const
|
|
26
|
-
exports.Decodo =
|
|
27
|
-
const
|
|
28
|
-
exports.DECODO_DEVICE_TYPES =
|
|
29
|
-
exports.DECODO_HEADLESS_MODES =
|
|
30
|
-
exports.DECODO_COMMON_LOCALES =
|
|
31
|
-
exports.DECODO_COMMON_COUNTRIES =
|
|
32
|
-
exports.DECODO_EUROPEAN_COUNTRIES =
|
|
33
|
-
exports.DECODO_ASIAN_COUNTRIES =
|
|
34
|
-
exports.DECODO_US_STATES =
|
|
35
|
-
exports.DECODO_COMMON_CITIES =
|
|
36
|
-
exports.getRandomDecodoDeviceType =
|
|
37
|
-
exports.getRandomDecodoLocale =
|
|
38
|
-
exports.getRandomDecodoCountry =
|
|
39
|
-
exports.getRandomDecodoCity =
|
|
40
|
-
exports.generateDecodoSessionId =
|
|
1
|
+
const _mod_o1i7dy = require('./crawler.cjs');
|
|
2
|
+
exports.Crawler = _mod_o1i7dy.Crawler;;
|
|
3
|
+
const _mod_9ywpl4 = require('./crawler-options.cjs');
|
|
4
|
+
exports.CrawlerOptions = _mod_9ywpl4.CrawlerOptions;;
|
|
5
|
+
const _mod_z2gv2k = require('./plugin/robots-txt.cjs');
|
|
6
|
+
exports.RobotsTxt = _mod_z2gv2k.RobotsTxt;;
|
|
7
|
+
const _mod_hipgel = require('./plugin/file-cacher.cjs');
|
|
8
|
+
exports.FileCacher = _mod_hipgel.FileCacher;;
|
|
9
|
+
const _mod_xk7y1s = require('./plugin/url-store.cjs');
|
|
10
|
+
exports.UrlStore = _mod_xk7y1s.UrlStore;;
|
|
11
|
+
const _mod_x224qe = require('./plugin/navigation-history.cjs');
|
|
12
|
+
exports.NavigationHistory = _mod_x224qe.NavigationHistory;;
|
|
13
|
+
const _mod_3sph6m = require('./addon/oxylabs/index.cjs');
|
|
14
|
+
exports.Oxylabs = _mod_3sph6m.Oxylabs;;
|
|
15
|
+
const _mod_3e4t9e = require('./addon/oxylabs/options.cjs');
|
|
16
|
+
exports.OXYLABS_BROWSER_TYPES = _mod_3e4t9e.OXYLABS_BROWSER_TYPES;
|
|
17
|
+
exports.OXYLABS_COMMON_LOCALES = _mod_3e4t9e.OXYLABS_COMMON_LOCALES;
|
|
18
|
+
exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_3e4t9e.OXYLABS_COMMON_GEO_LOCATIONS;
|
|
19
|
+
exports.OXYLABS_US_STATES = _mod_3e4t9e.OXYLABS_US_STATES;
|
|
20
|
+
exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_3e4t9e.OXYLABS_EUROPEAN_COUNTRIES;
|
|
21
|
+
exports.OXYLABS_ASIAN_COUNTRIES = _mod_3e4t9e.OXYLABS_ASIAN_COUNTRIES;
|
|
22
|
+
exports.getRandomOxylabsBrowserType = _mod_3e4t9e.getRandomBrowserType;
|
|
23
|
+
exports.getRandomOxylabsLocale = _mod_3e4t9e.getRandomLocale;
|
|
24
|
+
exports.getRandomOxylabsGeoLocation = _mod_3e4t9e.getRandomGeoLocation;;
|
|
25
|
+
const _mod_jbcztc = require('./addon/decodo/index.cjs');
|
|
26
|
+
exports.Decodo = _mod_jbcztc.Decodo;;
|
|
27
|
+
const _mod_zqnknu = require('./addon/decodo/options.cjs');
|
|
28
|
+
exports.DECODO_DEVICE_TYPES = _mod_zqnknu.DECODO_DEVICE_TYPES;
|
|
29
|
+
exports.DECODO_HEADLESS_MODES = _mod_zqnknu.DECODO_HEADLESS_MODES;
|
|
30
|
+
exports.DECODO_COMMON_LOCALES = _mod_zqnknu.DECODO_COMMON_LOCALES;
|
|
31
|
+
exports.DECODO_COMMON_COUNTRIES = _mod_zqnknu.DECODO_COMMON_COUNTRIES;
|
|
32
|
+
exports.DECODO_EUROPEAN_COUNTRIES = _mod_zqnknu.DECODO_EUROPEAN_COUNTRIES;
|
|
33
|
+
exports.DECODO_ASIAN_COUNTRIES = _mod_zqnknu.DECODO_ASIAN_COUNTRIES;
|
|
34
|
+
exports.DECODO_US_STATES = _mod_zqnknu.DECODO_US_STATES;
|
|
35
|
+
exports.DECODO_COMMON_CITIES = _mod_zqnknu.DECODO_COMMON_CITIES;
|
|
36
|
+
exports.getRandomDecodoDeviceType = _mod_zqnknu.getRandomDeviceType;
|
|
37
|
+
exports.getRandomDecodoLocale = _mod_zqnknu.getRandomLocale;
|
|
38
|
+
exports.getRandomDecodoCountry = _mod_zqnknu.getRandomCountry;
|
|
39
|
+
exports.getRandomDecodoCity = _mod_zqnknu.getRandomCity;
|
|
40
|
+
exports.generateDecodoSessionId = _mod_zqnknu.generateSessionId;;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
var
|
|
1
|
+
var r=require("./file-cacher.cjs");exports.FileCacher=r.FileCacher;var e=require("./url-store.cjs");exports.UrlStore=e.UrlStore;
|
package/dist/crawler.d.ts
CHANGED
|
@@ -4540,33 +4540,12 @@ declare class Rezo {
|
|
|
4540
4540
|
*/
|
|
4541
4541
|
static fromCurl(curlCommand: string): RezoRequestOptions;
|
|
4542
4542
|
}
|
|
4543
|
-
/**
|
|
4544
|
-
* Rezo HTTP Client - Core Types
|
|
4545
|
-
*
|
|
4546
|
-
* Shared type definitions used across the crawler module.
|
|
4547
|
-
*
|
|
4548
|
-
* @module types/types
|
|
4549
|
-
* @author Rezo HTTP Client Library
|
|
4550
|
-
*/
|
|
4551
4543
|
/**
|
|
4552
4544
|
* Proxy configuration interface
|
|
4553
4545
|
* @description Defines the structure for proxy server configuration
|
|
4554
4546
|
* supporting HTTP, HTTPS, and SOCKS proxies with authentication.
|
|
4555
4547
|
*/
|
|
4556
|
-
export
|
|
4557
|
-
/** Proxy server hostname or IP address */
|
|
4558
|
-
host: string;
|
|
4559
|
-
/** Proxy server port number */
|
|
4560
|
-
port: number;
|
|
4561
|
-
/** Proxy protocol type */
|
|
4562
|
-
protocol?: "http" | "https" | "socks4" | "socks5";
|
|
4563
|
-
/** Authentication credentials (format: "username:password") */
|
|
4564
|
-
auth?: string;
|
|
4565
|
-
/** Username for proxy authentication */
|
|
4566
|
-
username?: string;
|
|
4567
|
-
/** Password for proxy authentication */
|
|
4568
|
-
password?: string;
|
|
4569
|
-
}
|
|
4548
|
+
export type IProxy = RezoRequestConfig["proxy"];
|
|
4570
4549
|
interface queueOptions$1 {
|
|
4571
4550
|
/** Maximum concurrent requests */
|
|
4572
4551
|
concurrency?: number;
|
|
@@ -7708,6 +7687,7 @@ export declare class Crawler {
|
|
|
7708
7687
|
useOxylabsScraperAi?: boolean;
|
|
7709
7688
|
useOxylabsRotation?: boolean;
|
|
7710
7689
|
useDecodo?: boolean;
|
|
7690
|
+
skipCache?: boolean;
|
|
7711
7691
|
}): Crawler;
|
|
7712
7692
|
private execute;
|
|
7713
7693
|
private execute2;
|
package/dist/entries/crawler.cjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Crawler =
|
|
3
|
-
const
|
|
4
|
-
exports.CrawlerOptions =
|
|
1
|
+
const _mod_w4kkhv = require('../crawler/crawler.cjs');
|
|
2
|
+
exports.Crawler = _mod_w4kkhv.Crawler;;
|
|
3
|
+
const _mod_fiyx3u = require('../crawler/crawler-options.cjs');
|
|
4
|
+
exports.CrawlerOptions = _mod_fiyx3u.CrawlerOptions;;
|
package/dist/index.cjs
CHANGED
|
@@ -1,30 +1,30 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Rezo =
|
|
3
|
-
exports.createRezoInstance =
|
|
4
|
-
exports.createDefaultInstance =
|
|
5
|
-
const
|
|
6
|
-
exports.RezoError =
|
|
7
|
-
exports.RezoErrorCode =
|
|
8
|
-
const
|
|
9
|
-
exports.RezoHeaders =
|
|
10
|
-
const
|
|
11
|
-
exports.RezoFormData =
|
|
12
|
-
const
|
|
13
|
-
exports.RezoCookieJar =
|
|
14
|
-
exports.Cookie =
|
|
15
|
-
const
|
|
16
|
-
exports.toCurl =
|
|
17
|
-
exports.fromCurl =
|
|
18
|
-
const
|
|
19
|
-
exports.createDefaultHooks =
|
|
20
|
-
exports.mergeHooks =
|
|
21
|
-
const
|
|
22
|
-
exports.ProxyManager =
|
|
23
|
-
const
|
|
24
|
-
exports.RezoQueue =
|
|
25
|
-
exports.HttpQueue =
|
|
26
|
-
exports.Priority =
|
|
27
|
-
exports.HttpMethodPriority =
|
|
1
|
+
const _mod_7y7owb = require('./core/rezo.cjs');
|
|
2
|
+
exports.Rezo = _mod_7y7owb.Rezo;
|
|
3
|
+
exports.createRezoInstance = _mod_7y7owb.createRezoInstance;
|
|
4
|
+
exports.createDefaultInstance = _mod_7y7owb.createDefaultInstance;;
|
|
5
|
+
const _mod_m2gp9k = require('./errors/rezo-error.cjs');
|
|
6
|
+
exports.RezoError = _mod_m2gp9k.RezoError;
|
|
7
|
+
exports.RezoErrorCode = _mod_m2gp9k.RezoErrorCode;;
|
|
8
|
+
const _mod_1kei6w = require('./utils/headers.cjs');
|
|
9
|
+
exports.RezoHeaders = _mod_1kei6w.RezoHeaders;;
|
|
10
|
+
const _mod_otsozo = require('./utils/form-data.cjs');
|
|
11
|
+
exports.RezoFormData = _mod_otsozo.RezoFormData;;
|
|
12
|
+
const _mod_i1qoqb = require('./utils/cookies.cjs');
|
|
13
|
+
exports.RezoCookieJar = _mod_i1qoqb.RezoCookieJar;
|
|
14
|
+
exports.Cookie = _mod_i1qoqb.Cookie;;
|
|
15
|
+
const _mod_l234vd = require('./utils/curl.cjs');
|
|
16
|
+
exports.toCurl = _mod_l234vd.toCurl;
|
|
17
|
+
exports.fromCurl = _mod_l234vd.fromCurl;;
|
|
18
|
+
const _mod_4xnhr8 = require('./core/hooks.cjs');
|
|
19
|
+
exports.createDefaultHooks = _mod_4xnhr8.createDefaultHooks;
|
|
20
|
+
exports.mergeHooks = _mod_4xnhr8.mergeHooks;;
|
|
21
|
+
const _mod_mibvuq = require('./proxy/manager.cjs');
|
|
22
|
+
exports.ProxyManager = _mod_mibvuq.ProxyManager;;
|
|
23
|
+
const _mod_cye7iw = require('./queue/index.cjs');
|
|
24
|
+
exports.RezoQueue = _mod_cye7iw.RezoQueue;
|
|
25
|
+
exports.HttpQueue = _mod_cye7iw.HttpQueue;
|
|
26
|
+
exports.Priority = _mod_cye7iw.Priority;
|
|
27
|
+
exports.HttpMethodPriority = _mod_cye7iw.HttpMethodPriority;;
|
|
28
28
|
const { RezoError } = require('./errors/rezo-error.cjs');
|
|
29
29
|
const isRezoError = exports.isRezoError = RezoError.isRezoError;
|
|
30
30
|
const Cancel = exports.Cancel = RezoError;
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Agent =
|
|
3
|
-
const
|
|
4
|
-
exports.HttpProxyAgent =
|
|
5
|
-
const
|
|
6
|
-
exports.HttpsProxyAgent =
|
|
7
|
-
const
|
|
8
|
-
exports.SocksProxyAgent =
|
|
9
|
-
const
|
|
10
|
-
exports.SocksClient =
|
|
1
|
+
const _mod_5wmaq1 = require('./base.cjs');
|
|
2
|
+
exports.Agent = _mod_5wmaq1.Agent;;
|
|
3
|
+
const _mod_rekzdg = require('./http-proxy.cjs');
|
|
4
|
+
exports.HttpProxyAgent = _mod_rekzdg.HttpProxyAgent;;
|
|
5
|
+
const _mod_pstipg = require('./https-proxy.cjs');
|
|
6
|
+
exports.HttpsProxyAgent = _mod_pstipg.HttpsProxyAgent;;
|
|
7
|
+
const _mod_haz0pb = require('./socks-proxy.cjs');
|
|
8
|
+
exports.SocksProxyAgent = _mod_haz0pb.SocksProxyAgent;;
|
|
9
|
+
const _mod_mxslm0 = require('./socks-client.cjs');
|
|
10
|
+
exports.SocksClient = _mod_mxslm0.SocksClient;;
|
package/dist/proxy/index.cjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
const { Agent, HttpProxyAgent, HttpsProxyAgent, SocksProxyAgent } = require('../internal/agents/index.cjs');
|
|
2
2
|
const { parseProxyString } = require('./parse.cjs');
|
|
3
|
-
const
|
|
4
|
-
exports.ProxyManager =
|
|
5
|
-
const
|
|
6
|
-
exports.parseProxyString =
|
|
3
|
+
const _mod_0p8pms = require('./manager.cjs');
|
|
4
|
+
exports.ProxyManager = _mod_0p8pms.ProxyManager;;
|
|
5
|
+
const _mod_t2470u = require('./parse.cjs');
|
|
6
|
+
exports.parseProxyString = _mod_t2470u.parseProxyString;;
|
|
7
7
|
function createOptions(uri, opts) {
|
|
8
8
|
if (uri instanceof URL || typeof uri === "string") {
|
|
9
9
|
return {
|
package/dist/queue/index.cjs
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.RezoQueue =
|
|
3
|
-
const
|
|
4
|
-
exports.HttpQueue =
|
|
5
|
-
exports.extractDomain =
|
|
6
|
-
const
|
|
7
|
-
exports.Priority =
|
|
8
|
-
exports.HttpMethodPriority =
|
|
1
|
+
const _mod_sm75pf = require('./queue.cjs');
|
|
2
|
+
exports.RezoQueue = _mod_sm75pf.RezoQueue;;
|
|
3
|
+
const _mod_jkxs9z = require('./http-queue.cjs');
|
|
4
|
+
exports.HttpQueue = _mod_jkxs9z.HttpQueue;
|
|
5
|
+
exports.extractDomain = _mod_jkxs9z.extractDomain;;
|
|
6
|
+
const _mod_3nhfhq = require('./types.cjs');
|
|
7
|
+
exports.Priority = _mod_3nhfhq.Priority;
|
|
8
|
+
exports.HttpMethodPriority = _mod_3nhfhq.HttpMethodPriority;;
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.UniversalEventEmitter =
|
|
3
|
-
const
|
|
4
|
-
exports.UniversalStreamResponse =
|
|
5
|
-
exports.StreamResponse =
|
|
6
|
-
const
|
|
7
|
-
exports.UniversalDownloadResponse =
|
|
8
|
-
exports.DownloadResponse =
|
|
9
|
-
const
|
|
10
|
-
exports.UniversalUploadResponse =
|
|
11
|
-
exports.UploadResponse =
|
|
1
|
+
const _mod_ilaezh = require('./event-emitter.cjs');
|
|
2
|
+
exports.UniversalEventEmitter = _mod_ilaezh.UniversalEventEmitter;;
|
|
3
|
+
const _mod_vh2ew1 = require('./stream.cjs');
|
|
4
|
+
exports.UniversalStreamResponse = _mod_vh2ew1.UniversalStreamResponse;
|
|
5
|
+
exports.StreamResponse = _mod_vh2ew1.StreamResponse;;
|
|
6
|
+
const _mod_rsdt23 = require('./download.cjs');
|
|
7
|
+
exports.UniversalDownloadResponse = _mod_rsdt23.UniversalDownloadResponse;
|
|
8
|
+
exports.DownloadResponse = _mod_rsdt23.DownloadResponse;;
|
|
9
|
+
const _mod_15456i = require('./upload.cjs');
|
|
10
|
+
exports.UniversalUploadResponse = _mod_15456i.UniversalUploadResponse;
|
|
11
|
+
exports.UploadResponse = _mod_15456i.UploadResponse;;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rezo",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.64",
|
|
4
4
|
"description": "Lightning-fast, enterprise-grade HTTP client for modern JavaScript. Full HTTP/2 support, intelligent cookie management, multiple adapters (HTTP, Fetch, cURL, XHR), streaming, proxy support (HTTP/HTTPS/SOCKS), and cross-environment compatibility.",
|
|
5
5
|
"main": "dist/index.cjs",
|
|
6
6
|
"module": "dist/index.js",
|