rezo 1.0.50 → 1.0.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/http.cjs +1 -1
- package/dist/adapters/http.js +1 -1
- package/dist/adapters/http2.cjs +10 -2
- package/dist/adapters/http2.js +10 -2
- package/dist/adapters/index.cjs +6 -6
- package/dist/cache/index.cjs +9 -9
- package/dist/crawler/addon/oxylabs/index.cjs +1 -1
- package/dist/crawler/crawler-options.cjs +1 -1
- package/dist/crawler/crawler.cjs +32 -9
- package/dist/crawler/crawler.js +32 -9
- package/dist/crawler/index.cjs +40 -40
- package/dist/entries/crawler.cjs +5 -5
- package/dist/index.cjs +27 -27
- package/dist/internal/agents/index.cjs +10 -10
- package/dist/proxy/index.cjs +4 -4
- package/dist/queue/index.cjs +8 -8
- package/dist/responses/universal/index.cjs +11 -11
- package/dist/utils/compression.cjs +2 -33
- package/dist/utils/compression.js +2 -33
- package/dist/utils/http-config.cjs +1 -1
- package/dist/utils/http-config.js +1 -1
- package/package.json +1 -1
package/dist/adapters/http.cjs
CHANGED
|
@@ -776,7 +776,7 @@ async function request(config, fetchOptions, requestCount, timing, _stats, respo
|
|
|
776
776
|
if (isRedirected)
|
|
777
777
|
_stats.statusOnNext = "redirect";
|
|
778
778
|
if (isRedirected && location) {
|
|
779
|
-
const redirectUrlObj = new URL(location, url);
|
|
779
|
+
const redirectUrlObj = new URL(typeof location === "string" ? location : location.toString(), url);
|
|
780
780
|
if (!redirectUrlObj.hash && url.hash) {
|
|
781
781
|
redirectUrlObj.hash = url.hash;
|
|
782
782
|
}
|
package/dist/adapters/http.js
CHANGED
|
@@ -776,7 +776,7 @@ async function request(config, fetchOptions, requestCount, timing, _stats, respo
|
|
|
776
776
|
if (isRedirected)
|
|
777
777
|
_stats.statusOnNext = "redirect";
|
|
778
778
|
if (isRedirected && location) {
|
|
779
|
-
const redirectUrlObj = new URL(location, url);
|
|
779
|
+
const redirectUrlObj = new URL(typeof location === "string" ? location : location.toString(), url);
|
|
780
780
|
if (!redirectUrlObj.hash && url.hash) {
|
|
781
781
|
redirectUrlObj.hash = url.hash;
|
|
782
782
|
}
|
package/dist/adapters/http2.cjs
CHANGED
|
@@ -1253,12 +1253,15 @@ async function executeHttp2Stream(config, fetchOptions, requestCount, timing, _s
|
|
|
1253
1253
|
resolve(error);
|
|
1254
1254
|
}
|
|
1255
1255
|
};
|
|
1256
|
-
session.
|
|
1257
|
-
session.
|
|
1256
|
+
session.once("error", sessionErrorHandler);
|
|
1257
|
+
session.once("goaway", (errorCode, lastStreamID) => {
|
|
1258
1258
|
if (config.debug) {
|
|
1259
1259
|
console.log(`[Rezo Debug] HTTP/2: Session GOAWAY received (errorCode: ${errorCode}, lastStreamID: ${lastStreamID})`);
|
|
1260
1260
|
}
|
|
1261
1261
|
});
|
|
1262
|
+
const cleanupSessionListeners = () => {
|
|
1263
|
+
session.removeListener("error", sessionErrorHandler);
|
|
1264
|
+
};
|
|
1262
1265
|
if (config.debug) {
|
|
1263
1266
|
console.log(`[Rezo Debug] HTTP/2: Creating request stream...`);
|
|
1264
1267
|
}
|
|
@@ -1282,6 +1285,7 @@ async function executeHttp2Stream(config, fetchOptions, requestCount, timing, _s
|
|
|
1282
1285
|
timeoutId = setTimeout(() => {
|
|
1283
1286
|
if (!resolved) {
|
|
1284
1287
|
resolved = true;
|
|
1288
|
+
cleanupSessionListeners();
|
|
1285
1289
|
if (config.debug) {
|
|
1286
1290
|
console.log(`[Rezo Debug] HTTP/2: Request timeout after ${requestTimeout}ms (no response received)`);
|
|
1287
1291
|
}
|
|
@@ -1302,6 +1306,7 @@ async function executeHttp2Stream(config, fetchOptions, requestCount, timing, _s
|
|
|
1302
1306
|
if (!resolved && status === 0) {
|
|
1303
1307
|
resolved = true;
|
|
1304
1308
|
clearTimeout(timeoutId);
|
|
1309
|
+
cleanupSessionListeners();
|
|
1305
1310
|
if (config.debug) {
|
|
1306
1311
|
console.log(`[Rezo Debug] HTTP/2: Stream closed without response - retrying with new session`);
|
|
1307
1312
|
}
|
|
@@ -1317,6 +1322,7 @@ async function executeHttp2Stream(config, fetchOptions, requestCount, timing, _s
|
|
|
1317
1322
|
if (!resolved) {
|
|
1318
1323
|
resolved = true;
|
|
1319
1324
|
clearTimeout(timeoutId);
|
|
1325
|
+
cleanupSessionListeners();
|
|
1320
1326
|
const error = buildSmartError(config, fetchOptions, new Error("HTTP/2 stream aborted"));
|
|
1321
1327
|
_stats.statusOnNext = "error";
|
|
1322
1328
|
resolve(error);
|
|
@@ -1329,6 +1335,7 @@ async function executeHttp2Stream(config, fetchOptions, requestCount, timing, _s
|
|
|
1329
1335
|
if (!resolved) {
|
|
1330
1336
|
resolved = true;
|
|
1331
1337
|
clearTimeout(timeoutId);
|
|
1338
|
+
cleanupSessionListeners();
|
|
1332
1339
|
const error = buildSmartError(config, fetchOptions, err);
|
|
1333
1340
|
_stats.statusOnNext = "error";
|
|
1334
1341
|
resolve(error);
|
|
@@ -1427,6 +1434,7 @@ async function executeHttp2Stream(config, fetchOptions, requestCount, timing, _s
|
|
|
1427
1434
|
}
|
|
1428
1435
|
resolved = true;
|
|
1429
1436
|
clearTimeout(timeoutId);
|
|
1437
|
+
cleanupSessionListeners();
|
|
1430
1438
|
try {
|
|
1431
1439
|
updateTiming(config, timing, contentLengthCounter);
|
|
1432
1440
|
if (!config.transfer) {
|
package/dist/adapters/http2.js
CHANGED
|
@@ -1253,12 +1253,15 @@ async function executeHttp2Stream(config, fetchOptions, requestCount, timing, _s
|
|
|
1253
1253
|
resolve(error);
|
|
1254
1254
|
}
|
|
1255
1255
|
};
|
|
1256
|
-
session.
|
|
1257
|
-
session.
|
|
1256
|
+
session.once("error", sessionErrorHandler);
|
|
1257
|
+
session.once("goaway", (errorCode, lastStreamID) => {
|
|
1258
1258
|
if (config.debug) {
|
|
1259
1259
|
console.log(`[Rezo Debug] HTTP/2: Session GOAWAY received (errorCode: ${errorCode}, lastStreamID: ${lastStreamID})`);
|
|
1260
1260
|
}
|
|
1261
1261
|
});
|
|
1262
|
+
const cleanupSessionListeners = () => {
|
|
1263
|
+
session.removeListener("error", sessionErrorHandler);
|
|
1264
|
+
};
|
|
1262
1265
|
if (config.debug) {
|
|
1263
1266
|
console.log(`[Rezo Debug] HTTP/2: Creating request stream...`);
|
|
1264
1267
|
}
|
|
@@ -1282,6 +1285,7 @@ async function executeHttp2Stream(config, fetchOptions, requestCount, timing, _s
|
|
|
1282
1285
|
timeoutId = setTimeout(() => {
|
|
1283
1286
|
if (!resolved) {
|
|
1284
1287
|
resolved = true;
|
|
1288
|
+
cleanupSessionListeners();
|
|
1285
1289
|
if (config.debug) {
|
|
1286
1290
|
console.log(`[Rezo Debug] HTTP/2: Request timeout after ${requestTimeout}ms (no response received)`);
|
|
1287
1291
|
}
|
|
@@ -1302,6 +1306,7 @@ async function executeHttp2Stream(config, fetchOptions, requestCount, timing, _s
|
|
|
1302
1306
|
if (!resolved && status === 0) {
|
|
1303
1307
|
resolved = true;
|
|
1304
1308
|
clearTimeout(timeoutId);
|
|
1309
|
+
cleanupSessionListeners();
|
|
1305
1310
|
if (config.debug) {
|
|
1306
1311
|
console.log(`[Rezo Debug] HTTP/2: Stream closed without response - retrying with new session`);
|
|
1307
1312
|
}
|
|
@@ -1317,6 +1322,7 @@ async function executeHttp2Stream(config, fetchOptions, requestCount, timing, _s
|
|
|
1317
1322
|
if (!resolved) {
|
|
1318
1323
|
resolved = true;
|
|
1319
1324
|
clearTimeout(timeoutId);
|
|
1325
|
+
cleanupSessionListeners();
|
|
1320
1326
|
const error = buildSmartError(config, fetchOptions, new Error("HTTP/2 stream aborted"));
|
|
1321
1327
|
_stats.statusOnNext = "error";
|
|
1322
1328
|
resolve(error);
|
|
@@ -1329,6 +1335,7 @@ async function executeHttp2Stream(config, fetchOptions, requestCount, timing, _s
|
|
|
1329
1335
|
if (!resolved) {
|
|
1330
1336
|
resolved = true;
|
|
1331
1337
|
clearTimeout(timeoutId);
|
|
1338
|
+
cleanupSessionListeners();
|
|
1332
1339
|
const error = buildSmartError(config, fetchOptions, err);
|
|
1333
1340
|
_stats.statusOnNext = "error";
|
|
1334
1341
|
resolve(error);
|
|
@@ -1427,6 +1434,7 @@ async function executeHttp2Stream(config, fetchOptions, requestCount, timing, _s
|
|
|
1427
1434
|
}
|
|
1428
1435
|
resolved = true;
|
|
1429
1436
|
clearTimeout(timeoutId);
|
|
1437
|
+
cleanupSessionListeners();
|
|
1430
1438
|
try {
|
|
1431
1439
|
updateTiming(config, timing, contentLengthCounter);
|
|
1432
1440
|
if (!config.transfer) {
|
package/dist/adapters/index.cjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.detectRuntime =
|
|
3
|
-
exports.getAdapterCapabilities =
|
|
4
|
-
exports.buildAdapterContext =
|
|
5
|
-
exports.getAvailableAdapters =
|
|
6
|
-
exports.selectAdapter =
|
|
1
|
+
const _mod_5w28h8 = require('./picker.cjs');
|
|
2
|
+
exports.detectRuntime = _mod_5w28h8.detectRuntime;
|
|
3
|
+
exports.getAdapterCapabilities = _mod_5w28h8.getAdapterCapabilities;
|
|
4
|
+
exports.buildAdapterContext = _mod_5w28h8.buildAdapterContext;
|
|
5
|
+
exports.getAvailableAdapters = _mod_5w28h8.getAvailableAdapters;
|
|
6
|
+
exports.selectAdapter = _mod_5w28h8.selectAdapter;;
|
package/dist/cache/index.cjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.LRUCache =
|
|
3
|
-
const
|
|
4
|
-
exports.DNSCache =
|
|
5
|
-
exports.getGlobalDNSCache =
|
|
6
|
-
exports.resetGlobalDNSCache =
|
|
7
|
-
const
|
|
8
|
-
exports.ResponseCache =
|
|
9
|
-
exports.normalizeResponseCacheConfig =
|
|
1
|
+
const _mod_xclwey = require('./lru-cache.cjs');
|
|
2
|
+
exports.LRUCache = _mod_xclwey.LRUCache;;
|
|
3
|
+
const _mod_dqcrie = require('./dns-cache.cjs');
|
|
4
|
+
exports.DNSCache = _mod_dqcrie.DNSCache;
|
|
5
|
+
exports.getGlobalDNSCache = _mod_dqcrie.getGlobalDNSCache;
|
|
6
|
+
exports.resetGlobalDNSCache = _mod_dqcrie.resetGlobalDNSCache;;
|
|
7
|
+
const _mod_oqeitx = require('./response-cache.cjs');
|
|
8
|
+
exports.ResponseCache = _mod_oqeitx.ResponseCache;
|
|
9
|
+
exports.normalizeResponseCacheConfig = _mod_oqeitx.normalizeResponseCacheConfig;;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
var{Rezo:
|
|
1
|
+
var{Rezo:h}=require("../../../core/rezo.cjs"),a=require("./options.cjs");exports.OXYLABS_BROWSER_TYPES=a.OXYLABS_BROWSER_TYPES;exports.OXYLABS_COMMON_LOCALES=a.OXYLABS_COMMON_LOCALES;exports.OXYLABS_COMMON_GEO_LOCATIONS=a.OXYLABS_COMMON_GEO_LOCATIONS;exports.OXYLABS_US_STATES=a.OXYLABS_US_STATES;exports.OXYLABS_EUROPEAN_COUNTRIES=a.OXYLABS_EUROPEAN_COUNTRIES;exports.OXYLABS_ASIAN_COUNTRIES=a.OXYLABS_ASIAN_COUNTRIES;exports.getRandomBrowserType=a.getRandomBrowserType;exports.getRandomLocale=a.getRandomLocale;exports.getRandomGeoLocation=a.getRandomGeoLocation;var c="https://realtime.oxylabs.io/v1/queries";class u{config;http;authHeader;constructor(e){if(!e.username||!e.password)throw Error("Oxylabs username and password are required");this.config={username:e.username,password:e.password,browserType:e.browserType??"desktop",locale:e.locale??"en-US",geoLocation:e.geoLocation??"",render:e.render??!1,context:e.context??{},timeout:e.timeout??120000},this.http=new h({baseURL:c,timeout:this.config.timeout}),this.authHeader=`Basic ${Buffer.from(`${e.username}:${e.password}`).toString("base64")}`}async scrape(e,t){let r={...this.config,...t},s=this.buildRequestBody(e,r),o=(await this.http.postJson(c,s,{headers:{Authorization:this.authHeader,"Content-Type":"application/json"}})).data;if(o.error)throw Error(`Oxylabs API error: ${o.error}`);if(!o.results||o.results.length===0)throw Error("Oxylabs API returned no results");let n=o.results[0],d=n._response?.cookies||[],l=n._response?.headers||{};return{statusCode:n.status_code,url:n.url,content:n.content,cookies:d,headers:l,jobId:o.job_id||n.job_id,rendered:r.render,geoLocation:r.geoLocation||void 0,locale:r.locale,browserType:r.browserType,raw:o}}async scrapeMany(e,t,r=1000){let s=[];for(let i=0;i<e.length;i++){let o=await this.scrape(e[i],t);if(s.push(o),i<e.length-1&&r>0)await new Promise((n)=>setTimeout(n,r))}return s}buildRequestBody(e,t){let r=[];if(r.push({key:"return_page_cookies",value:!0}),r.push({key:"return_page_headers",value:!0}),t.context)for(let[i,o]of Object.entries(t.context))r.push({key:i,value:o});let s={source:"universal",url:e,context:r};if(t.render)s.render="html";if(t.browserType)s.user_agent_type=t.browserType;if(t.locale)s.locale=t.locale;if(t.geoLocation)s.geo_location=t.geoLocation;return s}getConfig(){return{...this.config,password:"***"}}withConfig(e){return new u({...this.config,...e})}async testConnection(){try{return await this.scrape("https://httpbin.org/ip"),!0}catch(e){throw Error(`Oxylabs connection test failed: ${e.message}`)}}}exports.Oxylabs=u;exports.default=u;module.exports=Object.assign(u,exports);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
var{defineProperty:f,getOwnPropertyNames:g,getOwnPropertyDescriptor:y}=Object,A=Object.prototype.hasOwnProperty;var m=new WeakMap,v=(e)=>{var t=m.get(e),s;if(t)return t;if(t=f({},"__esModule",{value:!0}),e&&typeof e==="object"||typeof e==="function")g(e).map((a)=>!A.call(t,a)&&f(t,a,{get:()=>e[a],enumerable:!(s=y(e,a))||s.enumerable}));return m.set(e,t),t};var b={};module.exports=v(b);var{RezoQueue:$}=require("../queue/queue.cjs"),{Oxylabs:M}=require("./addon/oxylabs/index.cjs"),w=require("node:path"),O=require("node:os"),{Decodo:p}=require("./addon/decodo/index.cjs");class x{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=D();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??w.join(O.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((s)=>s.domain).map((s)=>s.domain).filter((s,a,r)=>r.indexOf(s)===a)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((s,a)=>s===t[a]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((s)=>s.isGlobal).length,domainSpecific:t.filter((s)=>!s.isGlobal&&s.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:s,isGlobal:a,headers:r}=t;if(!s&&!a)continue;if(r instanceof Headers){let i=Object.fromEntries(r.entries());if(Object.keys(i).length<1)continue;r=i}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:s,isGlobal:a,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:s,isGlobal:a,proxy:r}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:s,isGlobal:a,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:s,isGlobal:a,options:r}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:s,isGlobal:a,pqueue:new $(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:a,options:r,queueOptions:i}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:s,isGlobal:a,adaptar:new M(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:a,options:r,queueOptions:i}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:s,isGlobal:a,adaptar:new p(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,s,a){if(!this.getDomainName(e))return null;let i=[],n=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let o=0;o<n.length;o++)if(this._hasDomain(e,n[o].domain))i.push(o);if(i.length){let o=a?i[this.rnd(0,i.length-1)]:i[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}i.length=0;for(let o=0;o<n.length;o++)i.push(o);if(i.length){let o=a?i[this.rnd(0,i.length-1)]:i[0];if(n[o].isGlobal&&s)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,s){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let i=0;i<r.length;i++)if(this._hasDomain(e,r[i].domain))return!0;if(s){for(let i=0;i<r.length;i++)if(r[i].isGlobal)return!0}return!1}pickHeaders(e,t,s,a){let r=this.getAdapter(e,"headers",t),i=new Headers(r??{});if(s&&s instanceof Headers)for(let[n,o]of Object.entries(s.entries()))i.set(n,o);else if(s&&typeof s==="object"){for(let[n,o]of Object.entries(s))if(typeof o==="string")i.set(n,o)}if(a)i.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(i.entries())}_hasDomain(e,t){if(!t)return!1;let s=this.getDomainName(e);if(!s)return!1;let a=(i)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(i)||i.startsWith("/")||i.includes(".*")||i.includes(".+")},r=(i)=>{if(i instanceof RegExp)return i.test(s)||i.test(e);let n=i.toString().trim();if(s.toLowerCase()===n.toLowerCase())return!0;if(n.includes("*")){let l=n.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(s)||h.test(e)}if(a(n))try{let l=n,h="i",u=n.match(/^\/(.*)\/(\w*)$/);if(u)l=u[1],h=u[2]||"i";let c=new RegExp(l,h);return c.test(s)||c.test(e)}catch(l){return s.toLowerCase().includes(n.toLowerCase())}let o=s.toLowerCase(),d=n.toLowerCase();return o===d||o.endsWith("."+d)||d.endsWith("."+o)};if(Array.isArray(t)){for(let i of t)if(r(i))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function D(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],s=[];for(let a=0;a<200;a++){let r=e[Math.floor(Math.random()*e.length)],i=t[Math.floor(Math.random()*t.length)],n="";switch(r.name){case"Chrome":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":n=`Mozilla/5.0 (${i}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}s.push(n)}return s}b.CrawlerOptions=x;
|
|
1
|
+
var{RezoQueue:m}=require("../queue/queue.cjs"),{Oxylabs:x}=require("./addon/oxylabs/index.cjs"),b=require("node:path"),g=require("node:os"),{Decodo:y}=require("./addon/decodo/index.cjs");class f{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??b.join(g.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,o,r)=>r.indexOf(i)===o)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((i,o)=>i===t[o]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((i)=>i.isGlobal).length,domainSpecific:t.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:i,isGlobal:o,headers:r}=t;if(!i&&!o)continue;if(r instanceof Headers){let s=Object.fromEntries(r.entries());if(Object.keys(s).length<1)continue;r=s}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:i,isGlobal:o,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:i,isGlobal:o,proxy:r}=t;if(!i&&!o)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:i,isGlobal:o,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:i,isGlobal:o,options:r}=t;if(!i&&!o)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:i,isGlobal:o,pqueue:new m(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:o,options:r,queueOptions:s}=t;if(!i&&!o)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:i,isGlobal:o,adaptar:new x(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:i,isGlobal:o,options:r,queueOptions:s}=t;if(!i&&!o)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:i,isGlobal:o,adaptar:new y(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,i,o){if(!this.getDomainName(e))return null;let s=[],a=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let n=0;n<a.length;n++)if(this._hasDomain(e,a[n].domain))s.push(n);if(s.length){let n=o?s[this.rnd(0,s.length-1)]:s[0];return t==="headers"?this.requestHeaders[n].headers:t==="limiters"?this.limiters[n].pqueue:t==="oxylabs"?this.oxylabs[n].adaptar:t==="decodo"?this.decodo[n].adaptar:this.proxies[n].proxy}s.length=0;for(let n=0;n<a.length;n++)s.push(n);if(s.length){let n=o?s[this.rnd(0,s.length-1)]:s[0];if(a[n].isGlobal&&i)return t==="headers"?this.requestHeaders[n].headers:t==="limiters"?this.limiters[n].pqueue:t==="oxylabs"?this.oxylabs[n].adaptar:t==="decodo"?this.decodo[n].adaptar:this.proxies[n].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,i){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let s=0;s<r.length;s++)if(this._hasDomain(e,r[s].domain))return!0;if(i){for(let s=0;s<r.length;s++)if(r[s].isGlobal)return!0}return!1}pickHeaders(e,t,i,o){let r=this.getAdapter(e,"headers",t),s=new Headers(r??{});if(i&&i instanceof Headers)for(let[a,n]of Object.entries(i.entries()))s.set(a,n);else if(i&&typeof i==="object"){for(let[a,n]of Object.entries(i))if(typeof n==="string")s.set(a,n)}if(o)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,t){if(!t)return!1;let i=this.getDomainName(e);if(!i)return!1;let o=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},r=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let a=s.toString().trim();if(i.toLowerCase()===a.toLowerCase())return!0;if(a.includes("*")){let l=a.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(i)||h.test(e)}if(o(a))try{let l=a,h="i",u=a.match(/^\/(.*)\/(\w*)$/);if(u)l=u[1],h=u[2]||"i";let c=new RegExp(l,h);return c.test(i)||c.test(e)}catch(l){return i.toLowerCase().includes(a.toLowerCase())}let n=i.toLowerCase(),d=a.toLowerCase();return n===d||n.endsWith("."+d)||d.endsWith("."+n)};if(Array.isArray(t)){for(let s of t)if(r(s))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let o=0;o<200;o++){let r=e[Math.floor(Math.random()*e.length)],s=t[Math.floor(Math.random()*t.length)],a="";switch(r.name){case"Chrome":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":a=`Mozilla/5.0 (${s}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":a=`Mozilla/5.0 (${s}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}i.push(a)}return i}exports.CrawlerOptions=f;
|
package/dist/crawler/crawler.cjs
CHANGED
|
@@ -5,11 +5,12 @@ const { NavigationHistory } = require('./plugin/navigation-history.cjs');
|
|
|
5
5
|
const { RobotsTxt } = require('./plugin/robots-txt.cjs');
|
|
6
6
|
const { parseHTML } = require("linkedom");
|
|
7
7
|
const path = require("node:path");
|
|
8
|
-
const { Rezo } = require('../
|
|
8
|
+
const { Rezo } = require('../adapters/entries/http.cjs');
|
|
9
9
|
const { RezoQueue } = require('../queue/queue.cjs');
|
|
10
10
|
const { Scraper } = require('./scraper.cjs');
|
|
11
11
|
const { CrawlerOptions } = require('./crawler-options.cjs');
|
|
12
12
|
const { loadAdapter } = require('../adapters/picker.cjs');
|
|
13
|
+
const { resetGlobalAgentPool } = require('../utils/agent-pool.cjs');
|
|
13
14
|
String.prototype.addBaseUrl = function(url) {
|
|
14
15
|
url = url instanceof URL ? url.href : url;
|
|
15
16
|
const html = this.replace(/<base\b[^>]*?>/gi, "");
|
|
@@ -94,6 +95,10 @@ class Crawler {
|
|
|
94
95
|
}).then((storage) => {
|
|
95
96
|
this.cacher = storage;
|
|
96
97
|
this.isCacheReady = true;
|
|
98
|
+
}).catch((err) => {
|
|
99
|
+
if (this.config.debug)
|
|
100
|
+
console.warn("[Crawler] Failed to initialize cache:", err);
|
|
101
|
+
this.isCacheReady = true;
|
|
97
102
|
});
|
|
98
103
|
const dit = path.resolve(cacheDir, "urls");
|
|
99
104
|
if (!fs.existsSync(dit))
|
|
@@ -105,6 +110,10 @@ class Crawler {
|
|
|
105
110
|
}).then((storage) => {
|
|
106
111
|
this.urlStorage = storage;
|
|
107
112
|
this.isStorageReady = true;
|
|
113
|
+
}).catch((err) => {
|
|
114
|
+
if (this.config.debug)
|
|
115
|
+
console.warn("[Crawler] Failed to initialize URL storage:", err);
|
|
116
|
+
this.isStorageReady = true;
|
|
108
117
|
});
|
|
109
118
|
} else {
|
|
110
119
|
const dit = path.resolve(this.config.cacheDir, "./cache/urls");
|
|
@@ -117,6 +126,10 @@ class Crawler {
|
|
|
117
126
|
}).then((storage) => {
|
|
118
127
|
this.urlStorage = storage;
|
|
119
128
|
this.isStorageReady = true;
|
|
129
|
+
}).catch((err) => {
|
|
130
|
+
if (this.config.debug)
|
|
131
|
+
console.warn("[Crawler] Failed to initialize URL storage:", err);
|
|
132
|
+
this.isStorageReady = true;
|
|
120
133
|
});
|
|
121
134
|
}
|
|
122
135
|
if (this.config.enableNavigationHistory) {
|
|
@@ -856,12 +869,15 @@ class Crawler {
|
|
|
856
869
|
}
|
|
857
870
|
async execute(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions) {
|
|
858
871
|
await this.waitForStorage();
|
|
872
|
+
console.log("Waiting for storage...");
|
|
859
873
|
if (this.isCacheEnabled) {
|
|
860
874
|
await this.waitForCache();
|
|
861
875
|
}
|
|
876
|
+
console.log("Waiting for cache...");
|
|
862
877
|
if (this.config.enableNavigationHistory) {
|
|
863
878
|
await this.waitForNavigationHistory();
|
|
864
879
|
}
|
|
880
|
+
console.log("Waiting for navigation history...");
|
|
865
881
|
const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions));
|
|
866
882
|
task.finally(() => this.pendingExecutions.delete(task));
|
|
867
883
|
}
|
|
@@ -888,7 +904,9 @@ class Crawler {
|
|
|
888
904
|
}
|
|
889
905
|
async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl) {
|
|
890
906
|
try {
|
|
907
|
+
console.log("Triggering start handlers...");
|
|
891
908
|
await this.triggerStartHandlers();
|
|
909
|
+
console.log("Checking crawl limits...");
|
|
892
910
|
const limitCheck = await this.checkCrawlLimits(url, parentUrl);
|
|
893
911
|
if (!limitCheck.allowed) {
|
|
894
912
|
if (this.config.debug) {
|
|
@@ -903,13 +921,16 @@ class Crawler {
|
|
|
903
921
|
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
904
922
|
}
|
|
905
923
|
const isVisited = forceRevisit ? false : await this.hasUrlInCache(url);
|
|
906
|
-
const cache = await this.getCache(url);
|
|
907
|
-
if (
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
924
|
+
const cache = method.toLowerCase() === "get" ? await this.getCache(url) : undefined;
|
|
925
|
+
if (method.toLowerCase() === "get") {
|
|
926
|
+
if (isVisited && !cache)
|
|
927
|
+
return;
|
|
928
|
+
if (isVisited)
|
|
929
|
+
return;
|
|
930
|
+
}
|
|
911
931
|
const requestStartTime = Date.now();
|
|
912
932
|
const response = cache && method === "GET" ? cache : oxylabsInstanse && oxylabsOptions ? await oxylabsInstanse.scrape(url) : decodoInstanse && decodoOptions ? await decodoInstanse.scrape(url) : await (method === "GET" ? this.http.get(url, options) : method === "PATCH" ? this.http.patch(url, body, options) : method === "POST" ? this.http.post(url, body, options) : this.http.put(url, body, options));
|
|
933
|
+
console.log("Response received...");
|
|
913
934
|
if (!cache) {
|
|
914
935
|
const responseTime = Date.now() - requestStartTime;
|
|
915
936
|
this.calculateAutoThrottleDelay(domain, responseTime);
|
|
@@ -943,9 +964,9 @@ class Crawler {
|
|
|
943
964
|
statusCode: res.status
|
|
944
965
|
});
|
|
945
966
|
}
|
|
946
|
-
if (!cache)
|
|
967
|
+
if (!cache && method === "GET")
|
|
947
968
|
await this.saveCache(url, res);
|
|
948
|
-
if (!isVisited)
|
|
969
|
+
if (!isVisited && method === "GET")
|
|
949
970
|
await this.saveUrl(url);
|
|
950
971
|
await this.markUrlVisited(url, {
|
|
951
972
|
status: res.status,
|
|
@@ -1039,6 +1060,7 @@ class Crawler {
|
|
|
1039
1060
|
}
|
|
1040
1061
|
await this.queue.onIdle();
|
|
1041
1062
|
await this.triggerFinishHandlers();
|
|
1063
|
+
await this.destroy();
|
|
1042
1064
|
}
|
|
1043
1065
|
async done() {
|
|
1044
1066
|
return this.waitForAll();
|
|
@@ -1055,7 +1077,7 @@ class Crawler {
|
|
|
1055
1077
|
} catch {}
|
|
1056
1078
|
}
|
|
1057
1079
|
async destroy() {
|
|
1058
|
-
this.queue.
|
|
1080
|
+
this.queue.destroy();
|
|
1059
1081
|
this.events.length = 0;
|
|
1060
1082
|
this.jsonEvents.length = 0;
|
|
1061
1083
|
this.errorEvents.length = 0;
|
|
@@ -1064,6 +1086,7 @@ class Crawler {
|
|
|
1064
1086
|
this.emailDiscoveredEvents.length = 0;
|
|
1065
1087
|
this.emailLeadsEvents.length = 0;
|
|
1066
1088
|
await this.close();
|
|
1089
|
+
resetGlobalAgentPool();
|
|
1067
1090
|
}
|
|
1068
1091
|
}
|
|
1069
1092
|
|
package/dist/crawler/crawler.js
CHANGED
|
@@ -5,11 +5,12 @@ import { NavigationHistory } from './plugin/navigation-history.js';
|
|
|
5
5
|
import { RobotsTxt } from './plugin/robots-txt.js';
|
|
6
6
|
import { parseHTML } from "linkedom";
|
|
7
7
|
import path from "node:path";
|
|
8
|
-
import { Rezo } from '../
|
|
8
|
+
import { Rezo } from '../adapters/entries/http.js';
|
|
9
9
|
import { RezoQueue } from '../queue/queue.js';
|
|
10
10
|
import { Scraper } from './scraper.js';
|
|
11
11
|
import { CrawlerOptions } from './crawler-options.js';
|
|
12
12
|
import { loadAdapter } from '../adapters/picker.js';
|
|
13
|
+
import { resetGlobalAgentPool } from '../utils/agent-pool.js';
|
|
13
14
|
String.prototype.addBaseUrl = function(url) {
|
|
14
15
|
url = url instanceof URL ? url.href : url;
|
|
15
16
|
const html = this.replace(/<base\b[^>]*?>/gi, "");
|
|
@@ -94,6 +95,10 @@ export class Crawler {
|
|
|
94
95
|
}).then((storage) => {
|
|
95
96
|
this.cacher = storage;
|
|
96
97
|
this.isCacheReady = true;
|
|
98
|
+
}).catch((err) => {
|
|
99
|
+
if (this.config.debug)
|
|
100
|
+
console.warn("[Crawler] Failed to initialize cache:", err);
|
|
101
|
+
this.isCacheReady = true;
|
|
97
102
|
});
|
|
98
103
|
const dit = path.resolve(cacheDir, "urls");
|
|
99
104
|
if (!fs.existsSync(dit))
|
|
@@ -105,6 +110,10 @@ export class Crawler {
|
|
|
105
110
|
}).then((storage) => {
|
|
106
111
|
this.urlStorage = storage;
|
|
107
112
|
this.isStorageReady = true;
|
|
113
|
+
}).catch((err) => {
|
|
114
|
+
if (this.config.debug)
|
|
115
|
+
console.warn("[Crawler] Failed to initialize URL storage:", err);
|
|
116
|
+
this.isStorageReady = true;
|
|
108
117
|
});
|
|
109
118
|
} else {
|
|
110
119
|
const dit = path.resolve(this.config.cacheDir, "./cache/urls");
|
|
@@ -117,6 +126,10 @@ export class Crawler {
|
|
|
117
126
|
}).then((storage) => {
|
|
118
127
|
this.urlStorage = storage;
|
|
119
128
|
this.isStorageReady = true;
|
|
129
|
+
}).catch((err) => {
|
|
130
|
+
if (this.config.debug)
|
|
131
|
+
console.warn("[Crawler] Failed to initialize URL storage:", err);
|
|
132
|
+
this.isStorageReady = true;
|
|
120
133
|
});
|
|
121
134
|
}
|
|
122
135
|
if (this.config.enableNavigationHistory) {
|
|
@@ -856,12 +869,15 @@ export class Crawler {
|
|
|
856
869
|
}
|
|
857
870
|
async execute(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions) {
|
|
858
871
|
await this.waitForStorage();
|
|
872
|
+
console.log("Waiting for storage...");
|
|
859
873
|
if (this.isCacheEnabled) {
|
|
860
874
|
await this.waitForCache();
|
|
861
875
|
}
|
|
876
|
+
console.log("Waiting for cache...");
|
|
862
877
|
if (this.config.enableNavigationHistory) {
|
|
863
878
|
await this.waitForNavigationHistory();
|
|
864
879
|
}
|
|
880
|
+
console.log("Waiting for navigation history...");
|
|
865
881
|
const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions));
|
|
866
882
|
task.finally(() => this.pendingExecutions.delete(task));
|
|
867
883
|
}
|
|
@@ -888,7 +904,9 @@ export class Crawler {
|
|
|
888
904
|
}
|
|
889
905
|
async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl) {
|
|
890
906
|
try {
|
|
907
|
+
console.log("Triggering start handlers...");
|
|
891
908
|
await this.triggerStartHandlers();
|
|
909
|
+
console.log("Checking crawl limits...");
|
|
892
910
|
const limitCheck = await this.checkCrawlLimits(url, parentUrl);
|
|
893
911
|
if (!limitCheck.allowed) {
|
|
894
912
|
if (this.config.debug) {
|
|
@@ -903,13 +921,16 @@ export class Crawler {
|
|
|
903
921
|
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
904
922
|
}
|
|
905
923
|
const isVisited = forceRevisit ? false : await this.hasUrlInCache(url);
|
|
906
|
-
const cache = await this.getCache(url);
|
|
907
|
-
if (
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
924
|
+
const cache = method.toLowerCase() === "get" ? await this.getCache(url) : undefined;
|
|
925
|
+
if (method.toLowerCase() === "get") {
|
|
926
|
+
if (isVisited && !cache)
|
|
927
|
+
return;
|
|
928
|
+
if (isVisited)
|
|
929
|
+
return;
|
|
930
|
+
}
|
|
911
931
|
const requestStartTime = Date.now();
|
|
912
932
|
const response = cache && method === "GET" ? cache : oxylabsInstanse && oxylabsOptions ? await oxylabsInstanse.scrape(url) : decodoInstanse && decodoOptions ? await decodoInstanse.scrape(url) : await (method === "GET" ? this.http.get(url, options) : method === "PATCH" ? this.http.patch(url, body, options) : method === "POST" ? this.http.post(url, body, options) : this.http.put(url, body, options));
|
|
933
|
+
console.log("Response received...");
|
|
913
934
|
if (!cache) {
|
|
914
935
|
const responseTime = Date.now() - requestStartTime;
|
|
915
936
|
this.calculateAutoThrottleDelay(domain, responseTime);
|
|
@@ -943,9 +964,9 @@ export class Crawler {
|
|
|
943
964
|
statusCode: res.status
|
|
944
965
|
});
|
|
945
966
|
}
|
|
946
|
-
if (!cache)
|
|
967
|
+
if (!cache && method === "GET")
|
|
947
968
|
await this.saveCache(url, res);
|
|
948
|
-
if (!isVisited)
|
|
969
|
+
if (!isVisited && method === "GET")
|
|
949
970
|
await this.saveUrl(url);
|
|
950
971
|
await this.markUrlVisited(url, {
|
|
951
972
|
status: res.status,
|
|
@@ -1039,6 +1060,7 @@ export class Crawler {
|
|
|
1039
1060
|
}
|
|
1040
1061
|
await this.queue.onIdle();
|
|
1041
1062
|
await this.triggerFinishHandlers();
|
|
1063
|
+
await this.destroy();
|
|
1042
1064
|
}
|
|
1043
1065
|
async done() {
|
|
1044
1066
|
return this.waitForAll();
|
|
@@ -1055,7 +1077,7 @@ export class Crawler {
|
|
|
1055
1077
|
} catch {}
|
|
1056
1078
|
}
|
|
1057
1079
|
async destroy() {
|
|
1058
|
-
this.queue.
|
|
1080
|
+
this.queue.destroy();
|
|
1059
1081
|
this.events.length = 0;
|
|
1060
1082
|
this.jsonEvents.length = 0;
|
|
1061
1083
|
this.errorEvents.length = 0;
|
|
@@ -1064,5 +1086,6 @@ export class Crawler {
|
|
|
1064
1086
|
this.emailDiscoveredEvents.length = 0;
|
|
1065
1087
|
this.emailLeadsEvents.length = 0;
|
|
1066
1088
|
await this.close();
|
|
1089
|
+
resetGlobalAgentPool();
|
|
1067
1090
|
}
|
|
1068
1091
|
}
|
package/dist/crawler/index.cjs
CHANGED
|
@@ -1,40 +1,40 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Crawler =
|
|
3
|
-
const
|
|
4
|
-
exports.CrawlerOptions =
|
|
5
|
-
const
|
|
6
|
-
exports.RobotsTxt =
|
|
7
|
-
const
|
|
8
|
-
exports.FileCacher =
|
|
9
|
-
const
|
|
10
|
-
exports.UrlStore =
|
|
11
|
-
const
|
|
12
|
-
exports.NavigationHistory =
|
|
13
|
-
const
|
|
14
|
-
exports.Oxylabs =
|
|
15
|
-
const
|
|
16
|
-
exports.OXYLABS_BROWSER_TYPES =
|
|
17
|
-
exports.OXYLABS_COMMON_LOCALES =
|
|
18
|
-
exports.OXYLABS_COMMON_GEO_LOCATIONS =
|
|
19
|
-
exports.OXYLABS_US_STATES =
|
|
20
|
-
exports.OXYLABS_EUROPEAN_COUNTRIES =
|
|
21
|
-
exports.OXYLABS_ASIAN_COUNTRIES =
|
|
22
|
-
exports.getRandomOxylabsBrowserType =
|
|
23
|
-
exports.getRandomOxylabsLocale =
|
|
24
|
-
exports.getRandomOxylabsGeoLocation =
|
|
25
|
-
const
|
|
26
|
-
exports.Decodo =
|
|
27
|
-
const
|
|
28
|
-
exports.DECODO_DEVICE_TYPES =
|
|
29
|
-
exports.DECODO_HEADLESS_MODES =
|
|
30
|
-
exports.DECODO_COMMON_LOCALES =
|
|
31
|
-
exports.DECODO_COMMON_COUNTRIES =
|
|
32
|
-
exports.DECODO_EUROPEAN_COUNTRIES =
|
|
33
|
-
exports.DECODO_ASIAN_COUNTRIES =
|
|
34
|
-
exports.DECODO_US_STATES =
|
|
35
|
-
exports.DECODO_COMMON_CITIES =
|
|
36
|
-
exports.getRandomDecodoDeviceType =
|
|
37
|
-
exports.getRandomDecodoLocale =
|
|
38
|
-
exports.getRandomDecodoCountry =
|
|
39
|
-
exports.getRandomDecodoCity =
|
|
40
|
-
exports.generateDecodoSessionId =
|
|
1
|
+
const _mod_rvxqq1 = require('./crawler.cjs');
|
|
2
|
+
exports.Crawler = _mod_rvxqq1.Crawler;;
|
|
3
|
+
const _mod_jy5ex9 = require('./crawler-options.cjs');
|
|
4
|
+
exports.CrawlerOptions = _mod_jy5ex9.CrawlerOptions;;
|
|
5
|
+
const _mod_3wi87q = require('./plugin/robots-txt.cjs');
|
|
6
|
+
exports.RobotsTxt = _mod_3wi87q.RobotsTxt;;
|
|
7
|
+
const _mod_fl8p4x = require('./plugin/file-cacher.cjs');
|
|
8
|
+
exports.FileCacher = _mod_fl8p4x.FileCacher;;
|
|
9
|
+
const _mod_mdvmnw = require('./plugin/url-store.cjs');
|
|
10
|
+
exports.UrlStore = _mod_mdvmnw.UrlStore;;
|
|
11
|
+
const _mod_p8gmd0 = require('./plugin/navigation-history.cjs');
|
|
12
|
+
exports.NavigationHistory = _mod_p8gmd0.NavigationHistory;;
|
|
13
|
+
const _mod_e2qt7i = require('./addon/oxylabs/index.cjs');
|
|
14
|
+
exports.Oxylabs = _mod_e2qt7i.Oxylabs;;
|
|
15
|
+
const _mod_5utni8 = require('./addon/oxylabs/options.cjs');
|
|
16
|
+
exports.OXYLABS_BROWSER_TYPES = _mod_5utni8.OXYLABS_BROWSER_TYPES;
|
|
17
|
+
exports.OXYLABS_COMMON_LOCALES = _mod_5utni8.OXYLABS_COMMON_LOCALES;
|
|
18
|
+
exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_5utni8.OXYLABS_COMMON_GEO_LOCATIONS;
|
|
19
|
+
exports.OXYLABS_US_STATES = _mod_5utni8.OXYLABS_US_STATES;
|
|
20
|
+
exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_5utni8.OXYLABS_EUROPEAN_COUNTRIES;
|
|
21
|
+
exports.OXYLABS_ASIAN_COUNTRIES = _mod_5utni8.OXYLABS_ASIAN_COUNTRIES;
|
|
22
|
+
exports.getRandomOxylabsBrowserType = _mod_5utni8.getRandomBrowserType;
|
|
23
|
+
exports.getRandomOxylabsLocale = _mod_5utni8.getRandomLocale;
|
|
24
|
+
exports.getRandomOxylabsGeoLocation = _mod_5utni8.getRandomGeoLocation;;
|
|
25
|
+
const _mod_gao0f0 = require('./addon/decodo/index.cjs');
|
|
26
|
+
exports.Decodo = _mod_gao0f0.Decodo;;
|
|
27
|
+
const _mod_gzkadc = require('./addon/decodo/options.cjs');
|
|
28
|
+
exports.DECODO_DEVICE_TYPES = _mod_gzkadc.DECODO_DEVICE_TYPES;
|
|
29
|
+
exports.DECODO_HEADLESS_MODES = _mod_gzkadc.DECODO_HEADLESS_MODES;
|
|
30
|
+
exports.DECODO_COMMON_LOCALES = _mod_gzkadc.DECODO_COMMON_LOCALES;
|
|
31
|
+
exports.DECODO_COMMON_COUNTRIES = _mod_gzkadc.DECODO_COMMON_COUNTRIES;
|
|
32
|
+
exports.DECODO_EUROPEAN_COUNTRIES = _mod_gzkadc.DECODO_EUROPEAN_COUNTRIES;
|
|
33
|
+
exports.DECODO_ASIAN_COUNTRIES = _mod_gzkadc.DECODO_ASIAN_COUNTRIES;
|
|
34
|
+
exports.DECODO_US_STATES = _mod_gzkadc.DECODO_US_STATES;
|
|
35
|
+
exports.DECODO_COMMON_CITIES = _mod_gzkadc.DECODO_COMMON_CITIES;
|
|
36
|
+
exports.getRandomDecodoDeviceType = _mod_gzkadc.getRandomDeviceType;
|
|
37
|
+
exports.getRandomDecodoLocale = _mod_gzkadc.getRandomLocale;
|
|
38
|
+
exports.getRandomDecodoCountry = _mod_gzkadc.getRandomCountry;
|
|
39
|
+
exports.getRandomDecodoCity = _mod_gzkadc.getRandomCity;
|
|
40
|
+
exports.generateDecodoSessionId = _mod_gzkadc.generateSessionId;;
|
package/dist/entries/crawler.cjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Crawler =
|
|
3
|
-
const
|
|
4
|
-
exports.CrawlerOptions =
|
|
5
|
-
exports.Domain =
|
|
1
|
+
const _mod_f7dbqn = require('../crawler/crawler.cjs');
|
|
2
|
+
exports.Crawler = _mod_f7dbqn.Crawler;;
|
|
3
|
+
const _mod_h6mpf1 = require('../crawler/crawler-options.cjs');
|
|
4
|
+
exports.CrawlerOptions = _mod_h6mpf1.CrawlerOptions;
|
|
5
|
+
exports.Domain = _mod_h6mpf1.Domain;;
|
package/dist/index.cjs
CHANGED
|
@@ -1,30 +1,30 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Rezo =
|
|
3
|
-
exports.createRezoInstance =
|
|
4
|
-
exports.createDefaultInstance =
|
|
5
|
-
const
|
|
6
|
-
exports.RezoError =
|
|
7
|
-
exports.RezoErrorCode =
|
|
8
|
-
const
|
|
9
|
-
exports.RezoHeaders =
|
|
10
|
-
const
|
|
11
|
-
exports.RezoFormData =
|
|
12
|
-
const
|
|
13
|
-
exports.RezoCookieJar =
|
|
14
|
-
exports.Cookie =
|
|
15
|
-
const
|
|
16
|
-
exports.toCurl =
|
|
17
|
-
exports.fromCurl =
|
|
18
|
-
const
|
|
19
|
-
exports.createDefaultHooks =
|
|
20
|
-
exports.mergeHooks =
|
|
21
|
-
const
|
|
22
|
-
exports.ProxyManager =
|
|
23
|
-
const
|
|
24
|
-
exports.RezoQueue =
|
|
25
|
-
exports.HttpQueue =
|
|
26
|
-
exports.Priority =
|
|
27
|
-
exports.HttpMethodPriority =
|
|
1
|
+
const _mod_jh14m9 = require('./core/rezo.cjs');
|
|
2
|
+
exports.Rezo = _mod_jh14m9.Rezo;
|
|
3
|
+
exports.createRezoInstance = _mod_jh14m9.createRezoInstance;
|
|
4
|
+
exports.createDefaultInstance = _mod_jh14m9.createDefaultInstance;;
|
|
5
|
+
const _mod_5th6ta = require('./errors/rezo-error.cjs');
|
|
6
|
+
exports.RezoError = _mod_5th6ta.RezoError;
|
|
7
|
+
exports.RezoErrorCode = _mod_5th6ta.RezoErrorCode;;
|
|
8
|
+
const _mod_45ccpv = require('./utils/headers.cjs');
|
|
9
|
+
exports.RezoHeaders = _mod_45ccpv.RezoHeaders;;
|
|
10
|
+
const _mod_4hcsiy = require('./utils/form-data.cjs');
|
|
11
|
+
exports.RezoFormData = _mod_4hcsiy.RezoFormData;;
|
|
12
|
+
const _mod_4l7ckn = require('./utils/cookies.cjs');
|
|
13
|
+
exports.RezoCookieJar = _mod_4l7ckn.RezoCookieJar;
|
|
14
|
+
exports.Cookie = _mod_4l7ckn.Cookie;;
|
|
15
|
+
const _mod_uz0amh = require('./utils/curl.cjs');
|
|
16
|
+
exports.toCurl = _mod_uz0amh.toCurl;
|
|
17
|
+
exports.fromCurl = _mod_uz0amh.fromCurl;;
|
|
18
|
+
const _mod_yngoam = require('./core/hooks.cjs');
|
|
19
|
+
exports.createDefaultHooks = _mod_yngoam.createDefaultHooks;
|
|
20
|
+
exports.mergeHooks = _mod_yngoam.mergeHooks;;
|
|
21
|
+
const _mod_k1cygh = require('./proxy/manager.cjs');
|
|
22
|
+
exports.ProxyManager = _mod_k1cygh.ProxyManager;;
|
|
23
|
+
const _mod_di8wn9 = require('./queue/index.cjs');
|
|
24
|
+
exports.RezoQueue = _mod_di8wn9.RezoQueue;
|
|
25
|
+
exports.HttpQueue = _mod_di8wn9.HttpQueue;
|
|
26
|
+
exports.Priority = _mod_di8wn9.Priority;
|
|
27
|
+
exports.HttpMethodPriority = _mod_di8wn9.HttpMethodPriority;;
|
|
28
28
|
const { RezoError } = require('./errors/rezo-error.cjs');
|
|
29
29
|
const isRezoError = exports.isRezoError = RezoError.isRezoError;
|
|
30
30
|
const Cancel = exports.Cancel = RezoError;
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Agent =
|
|
3
|
-
const
|
|
4
|
-
exports.HttpProxyAgent =
|
|
5
|
-
const
|
|
6
|
-
exports.HttpsProxyAgent =
|
|
7
|
-
const
|
|
8
|
-
exports.SocksProxyAgent =
|
|
9
|
-
const
|
|
10
|
-
exports.SocksClient =
|
|
1
|
+
const _mod_yzlkcq = require('./base.cjs');
|
|
2
|
+
exports.Agent = _mod_yzlkcq.Agent;;
|
|
3
|
+
const _mod_ebay39 = require('./http-proxy.cjs');
|
|
4
|
+
exports.HttpProxyAgent = _mod_ebay39.HttpProxyAgent;;
|
|
5
|
+
const _mod_xntect = require('./https-proxy.cjs');
|
|
6
|
+
exports.HttpsProxyAgent = _mod_xntect.HttpsProxyAgent;;
|
|
7
|
+
const _mod_ti239q = require('./socks-proxy.cjs');
|
|
8
|
+
exports.SocksProxyAgent = _mod_ti239q.SocksProxyAgent;;
|
|
9
|
+
const _mod_8g5j5m = require('./socks-client.cjs');
|
|
10
|
+
exports.SocksClient = _mod_8g5j5m.SocksClient;;
|
package/dist/proxy/index.cjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
const { Agent, HttpProxyAgent, HttpsProxyAgent, SocksProxyAgent } = require('../internal/agents/index.cjs');
|
|
2
2
|
const { parseProxyString } = require('./parse.cjs');
|
|
3
|
-
const
|
|
4
|
-
exports.ProxyManager =
|
|
5
|
-
const
|
|
6
|
-
exports.parseProxyString =
|
|
3
|
+
const _mod_pkrm9n = require('./manager.cjs');
|
|
4
|
+
exports.ProxyManager = _mod_pkrm9n.ProxyManager;;
|
|
5
|
+
const _mod_glpq6p = require('./parse.cjs');
|
|
6
|
+
exports.parseProxyString = _mod_glpq6p.parseProxyString;;
|
|
7
7
|
function createOptions(uri, opts) {
|
|
8
8
|
if (uri instanceof URL || typeof uri === "string") {
|
|
9
9
|
return {
|
package/dist/queue/index.cjs
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.RezoQueue =
|
|
3
|
-
const
|
|
4
|
-
exports.HttpQueue =
|
|
5
|
-
exports.extractDomain =
|
|
6
|
-
const
|
|
7
|
-
exports.Priority =
|
|
8
|
-
exports.HttpMethodPriority =
|
|
1
|
+
const _mod_kc1i35 = require('./queue.cjs');
|
|
2
|
+
exports.RezoQueue = _mod_kc1i35.RezoQueue;;
|
|
3
|
+
const _mod_ajadj5 = require('./http-queue.cjs');
|
|
4
|
+
exports.HttpQueue = _mod_ajadj5.HttpQueue;
|
|
5
|
+
exports.extractDomain = _mod_ajadj5.extractDomain;;
|
|
6
|
+
const _mod_6ghr69 = require('./types.cjs');
|
|
7
|
+
exports.Priority = _mod_6ghr69.Priority;
|
|
8
|
+
exports.HttpMethodPriority = _mod_6ghr69.HttpMethodPriority;;
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.UniversalEventEmitter =
|
|
3
|
-
const
|
|
4
|
-
exports.UniversalStreamResponse =
|
|
5
|
-
exports.StreamResponse =
|
|
6
|
-
const
|
|
7
|
-
exports.UniversalDownloadResponse =
|
|
8
|
-
exports.DownloadResponse =
|
|
9
|
-
const
|
|
10
|
-
exports.UniversalUploadResponse =
|
|
11
|
-
exports.UploadResponse =
|
|
1
|
+
const _mod_yo50yl = require('./event-emitter.cjs');
|
|
2
|
+
exports.UniversalEventEmitter = _mod_yo50yl.UniversalEventEmitter;;
|
|
3
|
+
const _mod_l4durm = require('./stream.cjs');
|
|
4
|
+
exports.UniversalStreamResponse = _mod_l4durm.UniversalStreamResponse;
|
|
5
|
+
exports.StreamResponse = _mod_l4durm.StreamResponse;;
|
|
6
|
+
const _mod_0wvtwo = require('./download.cjs');
|
|
7
|
+
exports.UniversalDownloadResponse = _mod_0wvtwo.UniversalDownloadResponse;
|
|
8
|
+
exports.DownloadResponse = _mod_0wvtwo.DownloadResponse;;
|
|
9
|
+
const _mod_c7py98 = require('./upload.cjs');
|
|
10
|
+
exports.UniversalUploadResponse = _mod_c7py98.UniversalUploadResponse;
|
|
11
|
+
exports.UploadResponse = _mod_c7py98.UploadResponse;;
|
|
@@ -1,23 +1,4 @@
|
|
|
1
|
-
const { createGunzip, createInflate, createBrotliDecompress } = require("node:zlib");
|
|
2
|
-
const { Transform } = require("node:stream");
|
|
3
|
-
let createZstdDecompress = null;
|
|
4
|
-
let zstdAvailable = null;
|
|
5
|
-
function getZstdDecompressor() {
|
|
6
|
-
if (zstdAvailable === null) {
|
|
7
|
-
try {
|
|
8
|
-
const zlib = require("node:zlib");
|
|
9
|
-
if (typeof zlib.createZstdDecompress === "function") {
|
|
10
|
-
createZstdDecompress = zlib.createZstdDecompress;
|
|
11
|
-
zstdAvailable = true;
|
|
12
|
-
} else {
|
|
13
|
-
zstdAvailable = false;
|
|
14
|
-
}
|
|
15
|
-
} catch {
|
|
16
|
-
zstdAvailable = false;
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
return createZstdDecompress;
|
|
20
|
-
}
|
|
1
|
+
const { createGunzip, createInflate, createBrotliDecompress, createZstdDecompress } = require("node:zlib");
|
|
21
2
|
|
|
22
3
|
class CompressionUtil {
|
|
23
4
|
static decompressStream(response, contentEncoding, config) {
|
|
@@ -41,7 +22,7 @@ class CompressionUtil {
|
|
|
41
22
|
case "brotli":
|
|
42
23
|
return response.pipe(createBrotliDecompress());
|
|
43
24
|
case "zstd":
|
|
44
|
-
return
|
|
25
|
+
return response.pipe(createZstdDecompress());
|
|
45
26
|
default:
|
|
46
27
|
return response;
|
|
47
28
|
}
|
|
@@ -61,18 +42,6 @@ class CompressionUtil {
|
|
|
61
42
|
}
|
|
62
43
|
return true;
|
|
63
44
|
}
|
|
64
|
-
static createZstdDecompressStream(response) {
|
|
65
|
-
const decompressor = getZstdDecompressor();
|
|
66
|
-
if (!decompressor) {
|
|
67
|
-
const passthrough = new Transform({
|
|
68
|
-
transform(chunk, encoding, callback) {
|
|
69
|
-
callback(new Error("zstd decompression not available: requires Node.js 22.15+ with native zstd support"));
|
|
70
|
-
}
|
|
71
|
-
});
|
|
72
|
-
return response.pipe(passthrough);
|
|
73
|
-
}
|
|
74
|
-
return response.pipe(decompressor());
|
|
75
|
-
}
|
|
76
45
|
static getSupportedAlgorithms() {
|
|
77
46
|
return ["gzip", "x-gzip", "deflate", "x-deflate", "gzip-raw", "br", "brotli", "zstd"];
|
|
78
47
|
}
|
|
@@ -1,23 +1,4 @@
|
|
|
1
|
-
import { createGunzip, createInflate, createBrotliDecompress } from "node:zlib";
|
|
2
|
-
import { Transform } from "node:stream";
|
|
3
|
-
let createZstdDecompress = null;
|
|
4
|
-
let zstdAvailable = null;
|
|
5
|
-
function getZstdDecompressor() {
|
|
6
|
-
if (zstdAvailable === null) {
|
|
7
|
-
try {
|
|
8
|
-
const zlib = require("node:zlib");
|
|
9
|
-
if (typeof zlib.createZstdDecompress === "function") {
|
|
10
|
-
createZstdDecompress = zlib.createZstdDecompress;
|
|
11
|
-
zstdAvailable = true;
|
|
12
|
-
} else {
|
|
13
|
-
zstdAvailable = false;
|
|
14
|
-
}
|
|
15
|
-
} catch {
|
|
16
|
-
zstdAvailable = false;
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
return createZstdDecompress;
|
|
20
|
-
}
|
|
1
|
+
import { createGunzip, createInflate, createBrotliDecompress, createZstdDecompress } from "node:zlib";
|
|
21
2
|
|
|
22
3
|
export class CompressionUtil {
|
|
23
4
|
static decompressStream(response, contentEncoding, config) {
|
|
@@ -41,7 +22,7 @@ export class CompressionUtil {
|
|
|
41
22
|
case "brotli":
|
|
42
23
|
return response.pipe(createBrotliDecompress());
|
|
43
24
|
case "zstd":
|
|
44
|
-
return
|
|
25
|
+
return response.pipe(createZstdDecompress());
|
|
45
26
|
default:
|
|
46
27
|
return response;
|
|
47
28
|
}
|
|
@@ -61,18 +42,6 @@ export class CompressionUtil {
|
|
|
61
42
|
}
|
|
62
43
|
return true;
|
|
63
44
|
}
|
|
64
|
-
static createZstdDecompressStream(response) {
|
|
65
|
-
const decompressor = getZstdDecompressor();
|
|
66
|
-
if (!decompressor) {
|
|
67
|
-
const passthrough = new Transform({
|
|
68
|
-
transform(chunk, encoding, callback) {
|
|
69
|
-
callback(new Error("zstd decompression not available: requires Node.js 22.15+ with native zstd support"));
|
|
70
|
-
}
|
|
71
|
-
});
|
|
72
|
-
return response.pipe(passthrough);
|
|
73
|
-
}
|
|
74
|
-
return response.pipe(decompressor());
|
|
75
|
-
}
|
|
76
45
|
static getSupportedAlgorithms() {
|
|
77
46
|
return ["gzip", "x-gzip", "deflate", "x-deflate", "gzip-raw", "br", "brotli", "zstd"];
|
|
78
47
|
}
|
|
@@ -173,7 +173,7 @@ function setSignal() {
|
|
|
173
173
|
clearTimeout(this.timeoutClearInstanse);
|
|
174
174
|
if (this.timeout && typeof this.timeout === "number" && this.timeout > 100) {
|
|
175
175
|
const controller = new AbortController;
|
|
176
|
-
const timer = setTimeout(() => controller.abort(), this.timeout);
|
|
176
|
+
const timer = setTimeout(() => controller.abort(), this.timeout).unref();
|
|
177
177
|
this.timeoutClearInstanse = timer;
|
|
178
178
|
this.signal = controller.signal;
|
|
179
179
|
}
|
|
@@ -173,7 +173,7 @@ function setSignal() {
|
|
|
173
173
|
clearTimeout(this.timeoutClearInstanse);
|
|
174
174
|
if (this.timeout && typeof this.timeout === "number" && this.timeout > 100) {
|
|
175
175
|
const controller = new AbortController;
|
|
176
|
-
const timer = setTimeout(() => controller.abort(), this.timeout);
|
|
176
|
+
const timer = setTimeout(() => controller.abort(), this.timeout).unref();
|
|
177
177
|
this.timeoutClearInstanse = timer;
|
|
178
178
|
this.signal = controller.signal;
|
|
179
179
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rezo",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.52",
|
|
4
4
|
"description": "Lightning-fast, enterprise-grade HTTP client for modern JavaScript. Full HTTP/2 support, intelligent cookie management, multiple adapters (HTTP, Fetch, cURL, XHR), streaming, proxy support (HTTP/HTTPS/SOCKS), and cross-environment compatibility.",
|
|
5
5
|
"main": "dist/index.cjs",
|
|
6
6
|
"module": "dist/index.js",
|