rezo 1.0.63 → 1.0.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- const _mod_g8v6iy = require('./picker.cjs');
2
- exports.detectRuntime = _mod_g8v6iy.detectRuntime;
3
- exports.getAdapterCapabilities = _mod_g8v6iy.getAdapterCapabilities;
4
- exports.buildAdapterContext = _mod_g8v6iy.buildAdapterContext;
5
- exports.getAvailableAdapters = _mod_g8v6iy.getAvailableAdapters;
6
- exports.selectAdapter = _mod_g8v6iy.selectAdapter;;
1
+ const _mod_zprug4 = require('./picker.cjs');
2
+ exports.detectRuntime = _mod_zprug4.detectRuntime;
3
+ exports.getAdapterCapabilities = _mod_zprug4.getAdapterCapabilities;
4
+ exports.buildAdapterContext = _mod_zprug4.buildAdapterContext;
5
+ exports.getAvailableAdapters = _mod_zprug4.getAvailableAdapters;
6
+ exports.selectAdapter = _mod_zprug4.selectAdapter;;
@@ -1,9 +1,9 @@
1
- const _mod_d8k5x3 = require('./lru-cache.cjs');
2
- exports.LRUCache = _mod_d8k5x3.LRUCache;;
3
- const _mod_tyx5sp = require('./dns-cache.cjs');
4
- exports.DNSCache = _mod_tyx5sp.DNSCache;
5
- exports.getGlobalDNSCache = _mod_tyx5sp.getGlobalDNSCache;
6
- exports.resetGlobalDNSCache = _mod_tyx5sp.resetGlobalDNSCache;;
7
- const _mod_1wi3uo = require('./response-cache.cjs');
8
- exports.ResponseCache = _mod_1wi3uo.ResponseCache;
9
- exports.normalizeResponseCacheConfig = _mod_1wi3uo.normalizeResponseCacheConfig;;
1
+ const _mod_0xjclg = require('./lru-cache.cjs');
2
+ exports.LRUCache = _mod_0xjclg.LRUCache;;
3
+ const _mod_epwbuk = require('./dns-cache.cjs');
4
+ exports.DNSCache = _mod_epwbuk.DNSCache;
5
+ exports.getGlobalDNSCache = _mod_epwbuk.getGlobalDNSCache;
6
+ exports.resetGlobalDNSCache = _mod_epwbuk.resetGlobalDNSCache;;
7
+ const _mod_ukilsx = require('./response-cache.cjs');
8
+ exports.ResponseCache = _mod_ukilsx.ResponseCache;
9
+ exports.normalizeResponseCacheConfig = _mod_ukilsx.normalizeResponseCacheConfig;;
@@ -834,7 +834,8 @@ class Crawler {
834
834
  useOxylabsScraperAi = false,
835
835
  useOxylabsRotation = true,
836
836
  useDecodo = false,
837
- skipCache = false
837
+ skipCache = false,
838
+ emailMetadata = {}
838
839
  } = options || {};
839
840
  const _options = {
840
841
  headers: this.config.pickHeaders(url, true, headers, true),
@@ -843,7 +844,7 @@ class Crawler {
843
844
  params,
844
845
  proxy: useProxy ? this.config.getAdapter(url, "proxies", true, true) || undefined : undefined,
845
846
  rejectUnauthorized: typeof rejectUnauthorized === "boolean" ? rejectUnauthorized : this.config.rejectUnauthorized,
846
- pqueue: this.config.getAdapter(url, "limiters", useQueue, useQueue) || undefined
847
+ queue: this.config.getAdapter(url, "limiters", useQueue, useQueue) || undefined
847
848
  };
848
849
  let oxylabsOptions = {};
849
850
  let oxylabsInstanse = undefined;
@@ -872,7 +873,7 @@ class Crawler {
872
873
  this.addToNavigationQueue(url, method, body, headersObj);
873
874
  }
874
875
  if (deepEmailFinder) {
875
- const p = this.execute2(method, url, body, _options, forceRevisit);
876
+ const p = this.execute2(method, url, body, _options, forceRevisit, emailMetadata);
876
877
  this.pendingExecutions.add(p);
877
878
  p.finally(() => this.pendingExecutions.delete(p));
878
879
  return this;
@@ -893,7 +894,7 @@ class Crawler {
893
894
  const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, 0, undefined, skipCache));
894
895
  task.finally(() => this.pendingExecutions.delete(task));
895
896
  }
896
- async execute2(method, url, body, options = {}, forceRevisit) {
897
+ async execute2(method, url, body, options = {}, forceRevisit, emailMetadata) {
897
898
  await this.waitForStorage();
898
899
  if (this.isCacheEnabled) {
899
900
  await this.waitForCache();
@@ -911,10 +912,11 @@ class Crawler {
911
912
  onEmails: this.emailLeadsEvents,
912
913
  queue: this.queue,
913
914
  depth: 1,
914
- allowCrossDomainTravel: true
915
+ allowCrossDomainTravel: true,
916
+ emailMetadata
915
917
  }, forceRevisit, true)).then();
916
918
  }
917
- async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl, skipCache) {
919
+ async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl, skipCache, emailMetadata) {
918
920
  try {
919
921
  await this.triggerStartHandlers();
920
922
  const limitCheck = await this.checkCrawlLimits(url, parentUrl);
@@ -940,7 +942,6 @@ class Crawler {
940
942
  }
941
943
  const requestStartTime = Date.now();
942
944
  const response = cache && method === "GET" && !skipCache ? cache : oxylabsInstanse && oxylabsOptions ? await oxylabsInstanse.scrape(url) : decodoInstanse && decodoOptions ? await decodoInstanse.scrape(url) : await (method === "GET" ? this.http.get(url, options) : method === "PATCH" ? this.http.patch(url, body, options) : method === "POST" ? this.http.post(url, body, options) : this.http.put(url, body, options));
943
- console.log({ response, cache });
944
945
  if (!cache) {
945
946
  const responseTime = Date.now() - requestStartTime;
946
947
  this.calculateAutoThrottleDelay(domain, responseTime);
@@ -985,7 +986,7 @@ class Crawler {
985
986
  });
986
987
  if (res.contentType && res.contentType.includes("/json")) {
987
988
  if (this.emailDiscoveredEvents.length > 0 || this.emailLeadsEvents.length > 0) {
988
- this.leadsFinder.extractEmails(JSON.stringify(res.data), res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue);
989
+ this.leadsFinder.extractEmails(JSON.stringify(res.data), res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue, emailMetadata);
989
990
  }
990
991
  for (let i = 0;i < this.jsonEvents.length; i++) {
991
992
  const event = this.jsonEvents[i];
@@ -1000,7 +1001,7 @@ class Crawler {
1000
1001
  if (!res.contentType || !res.contentType.includes("/html") || typeof res.data !== "string")
1001
1002
  return;
1002
1003
  if ((this.emailDiscoveredEvents.length > 0 || this.emailLeadsEvents.length > 0) && isEmail) {
1003
- this.leadsFinder.extractEmails(res.data, res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue);
1004
+ this.leadsFinder.extractEmails(res.data, res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue, emailMetadata);
1004
1005
  }
1005
1006
  const { document } = parseHTML(res.data.addBaseUrl(res.finalUrl));
1006
1007
  document.URL = res.finalUrl;
@@ -834,7 +834,8 @@ export class Crawler {
834
834
  useOxylabsScraperAi = false,
835
835
  useOxylabsRotation = true,
836
836
  useDecodo = false,
837
- skipCache = false
837
+ skipCache = false,
838
+ emailMetadata = {}
838
839
  } = options || {};
839
840
  const _options = {
840
841
  headers: this.config.pickHeaders(url, true, headers, true),
@@ -843,7 +844,7 @@ export class Crawler {
843
844
  params,
844
845
  proxy: useProxy ? this.config.getAdapter(url, "proxies", true, true) || undefined : undefined,
845
846
  rejectUnauthorized: typeof rejectUnauthorized === "boolean" ? rejectUnauthorized : this.config.rejectUnauthorized,
846
- pqueue: this.config.getAdapter(url, "limiters", useQueue, useQueue) || undefined
847
+ queue: this.config.getAdapter(url, "limiters", useQueue, useQueue) || undefined
847
848
  };
848
849
  let oxylabsOptions = {};
849
850
  let oxylabsInstanse = undefined;
@@ -872,7 +873,7 @@ export class Crawler {
872
873
  this.addToNavigationQueue(url, method, body, headersObj);
873
874
  }
874
875
  if (deepEmailFinder) {
875
- const p = this.execute2(method, url, body, _options, forceRevisit);
876
+ const p = this.execute2(method, url, body, _options, forceRevisit, emailMetadata);
876
877
  this.pendingExecutions.add(p);
877
878
  p.finally(() => this.pendingExecutions.delete(p));
878
879
  return this;
@@ -893,7 +894,7 @@ export class Crawler {
893
894
  const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, 0, undefined, skipCache));
894
895
  task.finally(() => this.pendingExecutions.delete(task));
895
896
  }
896
- async execute2(method, url, body, options = {}, forceRevisit) {
897
+ async execute2(method, url, body, options = {}, forceRevisit, emailMetadata) {
897
898
  await this.waitForStorage();
898
899
  if (this.isCacheEnabled) {
899
900
  await this.waitForCache();
@@ -911,10 +912,11 @@ export class Crawler {
911
912
  onEmails: this.emailLeadsEvents,
912
913
  queue: this.queue,
913
914
  depth: 1,
914
- allowCrossDomainTravel: true
915
+ allowCrossDomainTravel: true,
916
+ emailMetadata
915
917
  }, forceRevisit, true)).then();
916
918
  }
917
- async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl, skipCache) {
919
+ async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl, skipCache, emailMetadata) {
918
920
  try {
919
921
  await this.triggerStartHandlers();
920
922
  const limitCheck = await this.checkCrawlLimits(url, parentUrl);
@@ -940,7 +942,6 @@ export class Crawler {
940
942
  }
941
943
  const requestStartTime = Date.now();
942
944
  const response = cache && method === "GET" && !skipCache ? cache : oxylabsInstanse && oxylabsOptions ? await oxylabsInstanse.scrape(url) : decodoInstanse && decodoOptions ? await decodoInstanse.scrape(url) : await (method === "GET" ? this.http.get(url, options) : method === "PATCH" ? this.http.patch(url, body, options) : method === "POST" ? this.http.post(url, body, options) : this.http.put(url, body, options));
943
- console.log({ response, cache });
944
945
  if (!cache) {
945
946
  const responseTime = Date.now() - requestStartTime;
946
947
  this.calculateAutoThrottleDelay(domain, responseTime);
@@ -985,7 +986,7 @@ export class Crawler {
985
986
  });
986
987
  if (res.contentType && res.contentType.includes("/json")) {
987
988
  if (this.emailDiscoveredEvents.length > 0 || this.emailLeadsEvents.length > 0) {
988
- this.leadsFinder.extractEmails(JSON.stringify(res.data), res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue);
989
+ this.leadsFinder.extractEmails(JSON.stringify(res.data), res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue, emailMetadata);
989
990
  }
990
991
  for (let i = 0;i < this.jsonEvents.length; i++) {
991
992
  const event = this.jsonEvents[i];
@@ -1000,7 +1001,7 @@ export class Crawler {
1000
1001
  if (!res.contentType || !res.contentType.includes("/html") || typeof res.data !== "string")
1001
1002
  return;
1002
1003
  if ((this.emailDiscoveredEvents.length > 0 || this.emailLeadsEvents.length > 0) && isEmail) {
1003
- this.leadsFinder.extractEmails(res.data, res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue);
1004
+ this.leadsFinder.extractEmails(res.data, res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue, emailMetadata);
1004
1005
  }
1005
1006
  const { document } = parseHTML(res.data.addBaseUrl(res.finalUrl));
1006
1007
  document.URL = res.finalUrl;
@@ -1,40 +1,40 @@
1
- const _mod_3jkbax = require('./crawler.cjs');
2
- exports.Crawler = _mod_3jkbax.Crawler;;
3
- const _mod_5h2jee = require('./crawler-options.cjs');
4
- exports.CrawlerOptions = _mod_5h2jee.CrawlerOptions;;
5
- const _mod_5q2rh4 = require('./plugin/robots-txt.cjs');
6
- exports.RobotsTxt = _mod_5q2rh4.RobotsTxt;;
7
- const _mod_kc9oto = require('./plugin/file-cacher.cjs');
8
- exports.FileCacher = _mod_kc9oto.FileCacher;;
9
- const _mod_aowqnb = require('./plugin/url-store.cjs');
10
- exports.UrlStore = _mod_aowqnb.UrlStore;;
11
- const _mod_5l7136 = require('./plugin/navigation-history.cjs');
12
- exports.NavigationHistory = _mod_5l7136.NavigationHistory;;
13
- const _mod_wfvdzf = require('./addon/oxylabs/index.cjs');
14
- exports.Oxylabs = _mod_wfvdzf.Oxylabs;;
15
- const _mod_p94a8c = require('./addon/oxylabs/options.cjs');
16
- exports.OXYLABS_BROWSER_TYPES = _mod_p94a8c.OXYLABS_BROWSER_TYPES;
17
- exports.OXYLABS_COMMON_LOCALES = _mod_p94a8c.OXYLABS_COMMON_LOCALES;
18
- exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_p94a8c.OXYLABS_COMMON_GEO_LOCATIONS;
19
- exports.OXYLABS_US_STATES = _mod_p94a8c.OXYLABS_US_STATES;
20
- exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_p94a8c.OXYLABS_EUROPEAN_COUNTRIES;
21
- exports.OXYLABS_ASIAN_COUNTRIES = _mod_p94a8c.OXYLABS_ASIAN_COUNTRIES;
22
- exports.getRandomOxylabsBrowserType = _mod_p94a8c.getRandomBrowserType;
23
- exports.getRandomOxylabsLocale = _mod_p94a8c.getRandomLocale;
24
- exports.getRandomOxylabsGeoLocation = _mod_p94a8c.getRandomGeoLocation;;
25
- const _mod_l9efqh = require('./addon/decodo/index.cjs');
26
- exports.Decodo = _mod_l9efqh.Decodo;;
27
- const _mod_48mva0 = require('./addon/decodo/options.cjs');
28
- exports.DECODO_DEVICE_TYPES = _mod_48mva0.DECODO_DEVICE_TYPES;
29
- exports.DECODO_HEADLESS_MODES = _mod_48mva0.DECODO_HEADLESS_MODES;
30
- exports.DECODO_COMMON_LOCALES = _mod_48mva0.DECODO_COMMON_LOCALES;
31
- exports.DECODO_COMMON_COUNTRIES = _mod_48mva0.DECODO_COMMON_COUNTRIES;
32
- exports.DECODO_EUROPEAN_COUNTRIES = _mod_48mva0.DECODO_EUROPEAN_COUNTRIES;
33
- exports.DECODO_ASIAN_COUNTRIES = _mod_48mva0.DECODO_ASIAN_COUNTRIES;
34
- exports.DECODO_US_STATES = _mod_48mva0.DECODO_US_STATES;
35
- exports.DECODO_COMMON_CITIES = _mod_48mva0.DECODO_COMMON_CITIES;
36
- exports.getRandomDecodoDeviceType = _mod_48mva0.getRandomDeviceType;
37
- exports.getRandomDecodoLocale = _mod_48mva0.getRandomLocale;
38
- exports.getRandomDecodoCountry = _mod_48mva0.getRandomCountry;
39
- exports.getRandomDecodoCity = _mod_48mva0.getRandomCity;
40
- exports.generateDecodoSessionId = _mod_48mva0.generateSessionId;;
1
+ const _mod_slq7h8 = require('./crawler.cjs');
2
+ exports.Crawler = _mod_slq7h8.Crawler;;
3
+ const _mod_ofhuhf = require('./crawler-options.cjs');
4
+ exports.CrawlerOptions = _mod_ofhuhf.CrawlerOptions;;
5
+ const _mod_ec4x0l = require('./plugin/robots-txt.cjs');
6
+ exports.RobotsTxt = _mod_ec4x0l.RobotsTxt;;
7
+ const _mod_84pqvp = require('./plugin/file-cacher.cjs');
8
+ exports.FileCacher = _mod_84pqvp.FileCacher;;
9
+ const _mod_6809kp = require('./plugin/url-store.cjs');
10
+ exports.UrlStore = _mod_6809kp.UrlStore;;
11
+ const _mod_2ns1bx = require('./plugin/navigation-history.cjs');
12
+ exports.NavigationHistory = _mod_2ns1bx.NavigationHistory;;
13
+ const _mod_nhjlyc = require('./addon/oxylabs/index.cjs');
14
+ exports.Oxylabs = _mod_nhjlyc.Oxylabs;;
15
+ const _mod_cn9oad = require('./addon/oxylabs/options.cjs');
16
+ exports.OXYLABS_BROWSER_TYPES = _mod_cn9oad.OXYLABS_BROWSER_TYPES;
17
+ exports.OXYLABS_COMMON_LOCALES = _mod_cn9oad.OXYLABS_COMMON_LOCALES;
18
+ exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_cn9oad.OXYLABS_COMMON_GEO_LOCATIONS;
19
+ exports.OXYLABS_US_STATES = _mod_cn9oad.OXYLABS_US_STATES;
20
+ exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_cn9oad.OXYLABS_EUROPEAN_COUNTRIES;
21
+ exports.OXYLABS_ASIAN_COUNTRIES = _mod_cn9oad.OXYLABS_ASIAN_COUNTRIES;
22
+ exports.getRandomOxylabsBrowserType = _mod_cn9oad.getRandomBrowserType;
23
+ exports.getRandomOxylabsLocale = _mod_cn9oad.getRandomLocale;
24
+ exports.getRandomOxylabsGeoLocation = _mod_cn9oad.getRandomGeoLocation;;
25
+ const _mod_l5ipzy = require('./addon/decodo/index.cjs');
26
+ exports.Decodo = _mod_l5ipzy.Decodo;;
27
+ const _mod_je5dfu = require('./addon/decodo/options.cjs');
28
+ exports.DECODO_DEVICE_TYPES = _mod_je5dfu.DECODO_DEVICE_TYPES;
29
+ exports.DECODO_HEADLESS_MODES = _mod_je5dfu.DECODO_HEADLESS_MODES;
30
+ exports.DECODO_COMMON_LOCALES = _mod_je5dfu.DECODO_COMMON_LOCALES;
31
+ exports.DECODO_COMMON_COUNTRIES = _mod_je5dfu.DECODO_COMMON_COUNTRIES;
32
+ exports.DECODO_EUROPEAN_COUNTRIES = _mod_je5dfu.DECODO_EUROPEAN_COUNTRIES;
33
+ exports.DECODO_ASIAN_COUNTRIES = _mod_je5dfu.DECODO_ASIAN_COUNTRIES;
34
+ exports.DECODO_US_STATES = _mod_je5dfu.DECODO_US_STATES;
35
+ exports.DECODO_COMMON_CITIES = _mod_je5dfu.DECODO_COMMON_CITIES;
36
+ exports.getRandomDecodoDeviceType = _mod_je5dfu.getRandomDeviceType;
37
+ exports.getRandomDecodoLocale = _mod_je5dfu.getRandomLocale;
38
+ exports.getRandomDecodoCountry = _mod_je5dfu.getRandomCountry;
39
+ exports.getRandomDecodoCity = _mod_je5dfu.getRandomCity;
40
+ exports.generateDecodoSessionId = _mod_je5dfu.generateSessionId;;
@@ -1 +1 @@
1
- var{parseHTML:w}=require("linkedom");class x extends Set{maxSize;constructor(e){super();this.maxSize=e}add(e){if(this.has(e))return this;if(this.size>=this.maxSize){let n=this.values().next().value;if(n)this.delete(n)}return super.add(e)}}class v{http;httpOptions;onEmailLeads;onEmailDiscovered;debug;discoveredEmails=new x(1e4);userAgents=[];fileExtensions=[];restrictedDomains=$();forbiddenProtocols=["mailto:","tel:","javascript:","data:","sms:","ftp:","file:","irc:","blob:","chrome:","about:","intent:"];constructor(e,n,a,o,t=!1){this.http=e,this.httpOptions=n,this.onEmailLeads=a,this.onEmailDiscovered=o,this.debug=t,this.userAgents=y()}sleep(e){return new Promise((n)=>setTimeout(n,e))}async executeHttp(e,n,a,o,t,i=0){let{getCache:r,saveCache:c,hasUrlInCache:u,saveUrl:l,httpConfig:s={}}=o;if(!e||e.length<3||this.forbiddenProtocols.some((d)=>e.startsWith(d)))return;try{let d=t?!1:await u(e),h=await r(e);if(d&&!h)return!1;if(d&&n!=="GET")return!1;let m=h&&n==="GET"?h:await(n==="GET"?this.http.get(e,s):n==="PATCH"?this.http.patch(e,a,s):n==="POST"?this.http.post(e,a,s):this.http.put(e,a,s));if(!h)await c(e,{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl});if(!d)await l(e);if(!m.contentType||!m.contentType.includes("/html")||!m.contentType.includes("text/")||typeof m.data!=="string")return null;return{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl}}catch(d){let h=d,m=this.httpOptions;if(h&&h.response){let g=h.response.status,p=m.retryDelay||100,b=m.maxRetryAttempts||3,f=m.retryWithoutProxyOnStatusCode||void 0,k=m.maxRetryOnProxyError||3;if(f&&s.proxy&&f.includes(g)&&i<b)return await this.sleep(p),delete s.proxy,await this.executeHttp(e,n,a,o,t,i+1);else if(m.retryOnStatusCode&&s.proxy&&m.retryOnStatusCode.includes(g)&&i<b)return await this.sleep(p),await this.executeHttp(e,n,a,o,t,i+1);else if(m.retryOnProxyError&&s.proxy&&i<k)return await this.sleep(p),await this.executeHttp(e,n,a,o,t,i+1)}if(this.debug){if(this.debug)console.log(`Error: unable to ${n} ${e}: ${d.message}`)}return null}}extractEmails(e,n,a,o,t){let i=this.extractEmailsFromContent(e?.replaceAll("mailto:"," ")),r=[];for(let c of i)if(this.handleEmailDiscovery(c,n,a,t))r.push(c);if(o&&o.length>0&&r.length>0)t.add(async()=>Promise.all(o.map((c)=>c(r))));i.length=0,r.length=0}async parseExternalWebsite(e,n,a,o,t,i=!0,r,c){let u=o.httpConfig?.headers?o.httpConfig.headers instanceof Headers?Object.fromEntries(o.httpConfig.headers.entries()):o.httpConfig.headers:{};o.httpConfig=o.httpConfig||{},o.httpConfig.headers={"user-agent":this.getRandomUserAgent(),accept:"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8","accept-language":"en-US,en;q=0.9","cache-control":"no-cache",pragma:"no-cache",...u},o.httpConfig.timeout=o.httpConfig.timeout||15000,o.depth=o.depth||0,o.allowCrossDomainTravel=o.allowCrossDomainTravel||!1,t=t&&i;let l=[];try{let s=new URL(e),d=this.extractRootDomain(e);if(this.isLinktreeUrl(e))return await this.parseLinktreeProfile(e,o,t);if(this.isRestrictedDomain(e)){if(this.debug)console.warn(`⚠️ Skipped URL (restricted url): ${e}`);return l}let h=await this.executeHttp(e,n,a,o,t);if(!h){if(this.debug&&h===null)console.warn(`⚠️ Failed to fetch page content: ${e}`);if(this.debug&&h===!1)console.warn(`⚠️ Skipped URL (already visited): ${e}`);return l}let m=this.extractEmailsFromContent(h.data?.replaceAll("mailto:"," "));for(let g of m)if(this.handleEmailDiscovery(g,e,o.onEmailDiscovered,o.queue))l.push(g);if(o.depth>0||!r){let g=w(h.data).document,p=this.extractRelevantLinks(g,s,d,o.depth,o.allowCrossDomainTravel);o.depth--;let b=await Promise.allSettled(p.map((f)=>this.parseExternalWebsite(f,"GET",null,{...o,depth:o.depth},t,!1,!0)));for(let f of b)if(f.status==="fulfilled")l.push(...f.value);else if(this.debug)console.warn("⚠️ Failed to parse child URL:",f.reason?.message)}}catch(s){if(this.debug)console.error(`❌ Error parsing external website: ${e}`,s?.message)}if(i){if(o.onEmails&&o.onEmails.length>0)o.queue.add(async()=>Promise.all(o.onEmails.map((s)=>s(l))))}return l}async parseLinktreeProfile(e,n,a){let o=[];try{let t=await this.executeHttp(e,"GET",null,n,a);if(!t){if(this.debug)console.warn(`⚠️ Failed to fetch Linktree profile: ${e}`);return o}let r=w(t).document.getElementById("links-container");if(!r){if(this.debug)console.warn(`\uD83D\uDD0D No links container found in Linktree profile: ${e}`);return o}let c=this.extractLinktreeExternalUrls(r,e);if(c.length===0){if(this.debug)console.info("\uD83D\uDCED No valid external links found in Linktree profile");return o}if(this.debug)console.info(`\uD83C\uDFAF Found ${c.length} external links in Linktree profile`);let u=await Promise.allSettled(c.map((l)=>this.parseExternalWebsite(l,"GET",null,n,a,!1)));for(let l of u)if(l.status==="fulfilled")o.push(...l.value);else if(this.debug)console.warn("⚠️ Failed to parse Linktree external URL:",l.reason?.message)}catch(t){if(this.debug)console.error(`❌ Error parsing Linktree profile: ${e}`,t?.message)}return o}extractLinktreeExternalUrls(e,n){let a=new Set,o=e.querySelectorAll("a[href][target='_blank']");for(let t of o){let i=t.getAttribute("href");if(!i||i.length<3||this.forbiddenProtocols.some((r)=>i.startsWith(r)))continue;try{let r=new URL(i,n).href,c=this.extractRootDomain(r);if(c!=="linktr.ee"&&!this.isRestrictedDomain(r)&&c.length>3)a.add(r)}catch(r){if(this.debug)console.warn(`\uD83D\uDD17 Invalid URL in Linktree: ${i}`)}}return Array.from(a)}handleEmailDiscovery(e,n,a,o){if(!this.discoveredEmails.has(e)){this.discoveredEmails.add(e);let t={email:e,discoveredAt:n,timestamp:new Date};if(a&&a.length>0)o.add(async()=>Promise.all(a.map((i)=>i(t))));if(this.debug)console.info(`\uD83D\uDCE7 New email discovered: ${e} at ${n}`);return!0}return!1}isDomainAccessAllowed(e,n,a,o){if(o)return!0;if(a===0)return e===n;return e===n||e.endsWith(`.${n}`)||n.endsWith(`.${e}`)}extractRelevantLinks(e,n,a,o,t){let i=[],r=["about","contact","help","support","reach","email","mail","message","company","team","staff","info","inquiry","feedback","service","assistance","connect","touch"],c=e.querySelectorAll("a[href]");for(let u of c){let l=u.getAttribute("href");if(!l||l.length<2)continue;try{let s=this.normalizeUrl(l,n),d=this.extractRootDomain(s);if(!this.isDomainAccessAllowed(d,a,o,t))continue;if(r.some((m)=>s.toLowerCase().includes(m))||this.isLinktreeUrl(s))i.push(s)}catch(s){if(this.debug)console.warn(`\uD83D\uDD17 Invalid link found: ${l}`,s?.message)}}return i}extractEmailsFromContent(e){let n=e.replace(/[^\w@.-\s]/g," "),a=/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,o=(c)=>{let u=/^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$/,l=c.split("@")[1]?.toLowerCase(),s=c.split(".").pop()?.toLowerCase();return u.test(c)&&l!==void 0&&s!==void 0&&!this.fileExtensions.includes(`.${s}`)&&!this.isRestrictedDomain(`https://${l}`)},t=(c)=>{return(c.match(a)||[]).filter(o)},i=e.replace(/<[^>]*>/g," "),r=[...t(i),...t(n)];return[...new Set(r)]}isRestrictedDomain(e){try{let n=new URL(e).host.toLowerCase();return this.restrictedDomains.some((a)=>n===a.toLowerCase()||n.endsWith(`.${a.toLowerCase()}`))}catch{return!0}}isLinktreeUrl(e){try{return this.extractRootDomain(e)==="linktr.ee"}catch{return!1}}extractRootDomain(e){try{let a=new URL(e).hostname.toLowerCase();return a.startsWith("www.")?a.slice(4):a}catch{return""}}normalizeUrl(e,n){if(e.startsWith("http://")||e.startsWith("https://"))return e;if(e.startsWith("//"))return`${n.protocol}${e}`;return new URL(e,n.href).href}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function y(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],n=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],a=[];for(let o=0;o<200;o++){let t=e[Math.floor(Math.random()*e.length)],i=n[Math.floor(Math.random()*n.length)],r="";switch(t.name){case"Chrome":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":r=`Mozilla/5.0 (${i}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}a.push(r)}return a}function $(){return["facebook.com","fb.com","messenger.com","instagram.com","threads.net","twitter.com","x.com","linkedin.com","pinterest.com","pin.it","reddit.com","tumblr.com","snapchat.com","tiktok.com","douyin.com","youtube.com","youtu.be","whatsapp.com","telegram.org","t.me","medium.com","quora.com","flickr.com","vimeo.com","vk.com","weibo.com","sina.com.cn","line.me","discord.com","discordapp.com","twitch.tv","meetup.com","nextdoor.com","xing.com","yelp.com","zalo.me","mastodon.social","clubhouse.com","patreon.com","onlyfans.com","douban.com","goodreads.com","soundcloud.com","spotify.com","last.fm","behance.net","dribbble.com","deviantart.com","pixiv.net","slideshare.net","tinder.com","bumble.com","etsy.com","indeed.com","glassdoor.com","monster.com","careerbuilder.com","dice.com","ziprecruiter.com","simplyhired.com","upwork.com","freelancer.com","fiverr.com","stackoverflow.com","stackoverflow.co","angel.co","wellfound.com","quora.com","stackexchange.com","yahoo.com","answers.microsoft.com","askubuntu.com","superuser.com","serverfault.com","mathoverflow.net","xda-developers.com","gamespot.com","ign.com","4chan.org","9gag.com","gizmodo.com","slashdot.org","hacker-news.news","ycombinator.com","producthunt.com","discourse.org","google.com","google.co.uk","google.de","google.fr","google.co.jp","bing.com","yahoo.com","search.yahoo.com","duckduckgo.com","baidu.com","yandex.com","yandex.ru","ask.com","wolframalpha.com","ecosia.org","startpage.com","qwant.com","searx.me","gibiru.com","swisscows.com","gmail.com","googlemail.com","outlook.com","hotmail.com","live.com","msn.com","yahoo.com","ymail.com","aol.com","icloud.com","me.com","mac.com","protonmail.com","pm.me","zoho.com","mail.com","gmx.com","gmx.net","yandex.com","yandex.ru","tutanota.com","tutanota.de","fastmail.com","hushmail.com","mailbox.org","posteo.de","runbox.com","disroot.org","163.com","qq.com","rambler.ru","mail.ru","yelp.com","yelp.ca","yelp.co.uk","yelp.com.au","yellowpages.com","yellowpages.ca","yell.com","tripadvisor.com","tripadvisor.co.uk","tripadvisor.ca","foursquare.com","angieslist.com","bbb.org","manta.com","thumbtack.com","homeadvisor.com","superpages.com","whitepages.com","local.com","citysearch.com","merchantcircle.com","insiderpages.com","kudzu.com","hotfrog.com","buildzoom.com","houzz.com","porch.com","mapquest.com","zagat.com","zomato.com","opentable.com","viator.com","expedia.com","booking.com","airbnb.com","vrbo.com","homeaway.com","craigslist.org","nextdoor.com","patch.com","meetup.com","eventbrite.com","groupon.com","livingsocial.com","gumtree.com","gumtree.com.au","kijiji.ca","leboncoin.fr","finn.no","blocket.se","58.com","dianping.com","tabelog.com","ypcdn.com"]}exports.Scraper=v;exports.CappedSet=x;
1
+ var{parseHTML:w}=require("linkedom");class x extends Set{maxSize;constructor(e){super();this.maxSize=e}add(e){if(this.has(e))return this;if(this.size>=this.maxSize){let a=this.values().next().value;if(a)this.delete(a)}return super.add(e)}}class v{http;httpOptions;onEmailLeads;onEmailDiscovered;debug;discoveredEmails=new x(1e4);userAgents=[];fileExtensions=[];restrictedDomains=$();forbiddenProtocols=["mailto:","tel:","javascript:","data:","sms:","ftp:","file:","irc:","blob:","chrome:","about:","intent:"];constructor(e,a,n,o,t=!1){this.http=e,this.httpOptions=a,this.onEmailLeads=n,this.onEmailDiscovered=o,this.debug=t,this.userAgents=y()}sleep(e){return new Promise((a)=>setTimeout(a,e))}async executeHttp(e,a,n,o,t,i=0){let{getCache:r,saveCache:l,hasUrlInCache:d,saveUrl:c,httpConfig:s={}}=o;if(!e||e.length<3||this.forbiddenProtocols.some((u)=>e.startsWith(u)))return;try{let u=t?!1:await d(e),h=await r(e);if(u&&!h)return!1;if(u&&a!=="GET")return!1;let m=h&&a==="GET"?h:await(a==="GET"?this.http.get(e,s):a==="PATCH"?this.http.patch(e,n,s):a==="POST"?this.http.post(e,n,s):this.http.put(e,n,s));if(!h)await l(e,{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl});if(!u)await c(e);if(!m.contentType||!m.contentType.includes("/html")||!m.contentType.includes("text/")||typeof m.data!=="string")return null;return{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl}}catch(u){let h=u,m=this.httpOptions;if(h&&h.response){let g=h.response.status,p=m.retryDelay||100,b=m.maxRetryAttempts||3,f=m.retryWithoutProxyOnStatusCode||void 0,k=m.maxRetryOnProxyError||3;if(f&&s.proxy&&f.includes(g)&&i<b)return await this.sleep(p),delete s.proxy,await this.executeHttp(e,a,n,o,t,i+1);else if(m.retryOnStatusCode&&s.proxy&&m.retryOnStatusCode.includes(g)&&i<b)return await this.sleep(p),await this.executeHttp(e,a,n,o,t,i+1);else if(m.retryOnProxyError&&s.proxy&&i<k)return await this.sleep(p),await this.executeHttp(e,a,n,o,t,i+1)}if(this.debug){if(this.debug)console.log(`Error: unable to ${a} ${e}: ${u.message}`)}return null}}extractEmails(e,a,n,o,t,i){let r=this.extractEmailsFromContent(e?.replaceAll("mailto:"," ")),l=[];for(let d of r)if(this.handleEmailDiscovery(d,a,n,t,i))l.push(d);if(o&&o.length>0&&l.length>0)t.add(async()=>Promise.all(o.map((d)=>d(l))));r.length=0,l.length=0}async parseExternalWebsite(e,a,n,o,t,i=!0,r,l){let d=o.httpConfig?.headers?o.httpConfig.headers instanceof Headers?Object.fromEntries(o.httpConfig.headers.entries()):o.httpConfig.headers:{};o.httpConfig=o.httpConfig||{},o.httpConfig.headers={"user-agent":this.getRandomUserAgent(),accept:"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8","accept-language":"en-US,en;q=0.9","cache-control":"no-cache",pragma:"no-cache",...d},o.httpConfig.timeout=o.httpConfig.timeout||15000,o.depth=o.depth||0,o.allowCrossDomainTravel=o.allowCrossDomainTravel||!1,t=t&&i;let c=[];try{let s=new URL(e),u=this.extractRootDomain(e);if(this.isLinktreeUrl(e))return await this.parseLinktreeProfile(e,o,t);if(this.isRestrictedDomain(e)){if(this.debug)console.warn(`⚠️ Skipped URL (restricted url): ${e}`);return c}let h=await this.executeHttp(e,a,n,o,t);if(!h){if(this.debug&&h===null)console.warn(`⚠️ Failed to fetch page content: ${e}`);if(this.debug&&h===!1)console.warn(`⚠️ Skipped URL (already visited): ${e}`);return c}let m=this.extractEmailsFromContent(h.data?.replaceAll("mailto:"," "));for(let g of m)if(this.handleEmailDiscovery(g,e,o.onEmailDiscovered,o.queue,o.emailMetadata))c.push(g);if(o.depth>0||!r){let g=w(h.data).document,p=this.extractRelevantLinks(g,s,u,o.depth,o.allowCrossDomainTravel);o.depth--;let b=await Promise.allSettled(p.map((f)=>this.parseExternalWebsite(f,"GET",null,{...o,depth:o.depth},t,!1,!0)));for(let f of b)if(f.status==="fulfilled")c.push(...f.value);else if(this.debug)console.warn("⚠️ Failed to parse child URL:",f.reason?.message)}}catch(s){if(this.debug)console.error(`❌ Error parsing external website: ${e}`,s?.message)}if(i){if(o.onEmails&&o.onEmails.length>0)o.queue.add(async()=>Promise.all(o.onEmails.map((s)=>s(c))))}return c}async parseLinktreeProfile(e,a,n){let o=[];try{let t=await this.executeHttp(e,"GET",null,a,n);if(!t){if(this.debug)console.warn(`⚠️ Failed to fetch Linktree profile: ${e}`);return o}let r=w(t).document.getElementById("links-container");if(!r){if(this.debug)console.warn(`\uD83D\uDD0D No links container found in Linktree profile: ${e}`);return o}let l=this.extractLinktreeExternalUrls(r,e);if(l.length===0){if(this.debug)console.info("\uD83D\uDCED No valid external links found in Linktree profile");return o}if(this.debug)console.info(`\uD83C\uDFAF Found ${l.length} external links in Linktree profile`);let d=await Promise.allSettled(l.map((c)=>this.parseExternalWebsite(c,"GET",null,a,n,!1)));for(let c of d)if(c.status==="fulfilled")o.push(...c.value);else if(this.debug)console.warn("⚠️ Failed to parse Linktree external URL:",c.reason?.message)}catch(t){if(this.debug)console.error(`❌ Error parsing Linktree profile: ${e}`,t?.message)}return o}extractLinktreeExternalUrls(e,a){let n=new Set,o=e.querySelectorAll("a[href][target='_blank']");for(let t of o){let i=t.getAttribute("href");if(!i||i.length<3||this.forbiddenProtocols.some((r)=>i.startsWith(r)))continue;try{let r=new URL(i,a).href,l=this.extractRootDomain(r);if(l!=="linktr.ee"&&!this.isRestrictedDomain(r)&&l.length>3)n.add(r)}catch(r){if(this.debug)console.warn(`\uD83D\uDD17 Invalid URL in Linktree: ${i}`)}}return Array.from(n)}handleEmailDiscovery(e,a,n,o,t){if(!this.discoveredEmails.has(e)){this.discoveredEmails.add(e);let i={email:e,discoveredAt:a,timestamp:new Date,metadata:t||{}};if(n&&n.length>0)o.add(async()=>Promise.all(n.map((r)=>r(i))));if(this.debug)console.info(`\uD83D\uDCE7 New email discovered: ${e} at ${a}`);return!0}return!1}isDomainAccessAllowed(e,a,n,o){if(o)return!0;if(n===0)return e===a;return e===a||e.endsWith(`.${a}`)||a.endsWith(`.${e}`)}extractRelevantLinks(e,a,n,o,t){let i=[],r=["about","contact","help","support","reach","email","mail","message","company","team","staff","info","inquiry","feedback","service","assistance","connect","touch"],l=e.querySelectorAll("a[href]");for(let d of l){let c=d.getAttribute("href");if(!c||c.length<2)continue;try{let s=this.normalizeUrl(c,a),u=this.extractRootDomain(s);if(!this.isDomainAccessAllowed(u,n,o,t))continue;if(r.some((m)=>s.toLowerCase().includes(m))||this.isLinktreeUrl(s))i.push(s)}catch(s){if(this.debug)console.warn(`\uD83D\uDD17 Invalid link found: ${c}`,s?.message)}}return i}extractEmailsFromContent(e){let a=e.replace(/[^\w@.-\s]/g," "),n=/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,o=(l)=>{let d=/^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$/,c=l.split("@")[1]?.toLowerCase(),s=l.split(".").pop()?.toLowerCase();return d.test(l)&&c!==void 0&&s!==void 0&&!this.fileExtensions.includes(`.${s}`)&&!this.isRestrictedDomain(`https://${c}`)},t=(l)=>{return(l.match(n)||[]).filter(o)},i=e.replace(/<[^>]*>/g," "),r=[...t(i),...t(a)];return[...new Set(r)]}isRestrictedDomain(e){try{let a=new URL(e).host.toLowerCase();return this.restrictedDomains.some((n)=>a===n.toLowerCase()||a.endsWith(`.${n.toLowerCase()}`))}catch{return!0}}isLinktreeUrl(e){try{return this.extractRootDomain(e)==="linktr.ee"}catch{return!1}}extractRootDomain(e){try{let n=new URL(e).hostname.toLowerCase();return n.startsWith("www.")?n.slice(4):n}catch{return""}}normalizeUrl(e,a){if(e.startsWith("http://")||e.startsWith("https://"))return e;if(e.startsWith("//"))return`${a.protocol}${e}`;return new URL(e,a.href).href}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function y(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],a=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],n=[];for(let o=0;o<200;o++){let t=e[Math.floor(Math.random()*e.length)],i=a[Math.floor(Math.random()*a.length)],r="";switch(t.name){case"Chrome":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":r=`Mozilla/5.0 (${i}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}n.push(r)}return n}function $(){return["facebook.com","fb.com","messenger.com","instagram.com","threads.net","twitter.com","x.com","linkedin.com","pinterest.com","pin.it","reddit.com","tumblr.com","snapchat.com","tiktok.com","douyin.com","youtube.com","youtu.be","whatsapp.com","telegram.org","t.me","medium.com","quora.com","flickr.com","vimeo.com","vk.com","weibo.com","sina.com.cn","line.me","discord.com","discordapp.com","twitch.tv","meetup.com","nextdoor.com","xing.com","yelp.com","zalo.me","mastodon.social","clubhouse.com","patreon.com","onlyfans.com","douban.com","goodreads.com","soundcloud.com","spotify.com","last.fm","behance.net","dribbble.com","deviantart.com","pixiv.net","slideshare.net","tinder.com","bumble.com","etsy.com","indeed.com","glassdoor.com","monster.com","careerbuilder.com","dice.com","ziprecruiter.com","simplyhired.com","upwork.com","freelancer.com","fiverr.com","stackoverflow.com","stackoverflow.co","angel.co","wellfound.com","quora.com","stackexchange.com","yahoo.com","answers.microsoft.com","askubuntu.com","superuser.com","serverfault.com","mathoverflow.net","xda-developers.com","gamespot.com","ign.com","4chan.org","9gag.com","gizmodo.com","slashdot.org","hacker-news.news","ycombinator.com","producthunt.com","discourse.org","google.com","google.co.uk","google.de","google.fr","google.co.jp","bing.com","yahoo.com","search.yahoo.com","duckduckgo.com","baidu.com","yandex.com","yandex.ru","ask.com","wolframalpha.com","ecosia.org","startpage.com","qwant.com","searx.me","gibiru.com","swisscows.com","gmail.com","googlemail.com","outlook.com","hotmail.com","live.com","msn.com","yahoo.com","ymail.com","aol.com","icloud.com","me.com","mac.com","protonmail.com","pm.me","zoho.com","mail.com","gmx.com","gmx.net","yandex.com","yandex.ru","tutanota.com","tutanota.de","fastmail.com","hushmail.com","mailbox.org","posteo.de","runbox.com","disroot.org","163.com","qq.com","rambler.ru","mail.ru","yelp.com","yelp.ca","yelp.co.uk","yelp.com.au","yellowpages.com","yellowpages.ca","yell.com","tripadvisor.com","tripadvisor.co.uk","tripadvisor.ca","foursquare.com","angieslist.com","bbb.org","manta.com","thumbtack.com","homeadvisor.com","superpages.com","whitepages.com","local.com","citysearch.com","merchantcircle.com","insiderpages.com","kudzu.com","hotfrog.com","buildzoom.com","houzz.com","porch.com","mapquest.com","zagat.com","zomato.com","opentable.com","viator.com","expedia.com","booking.com","airbnb.com","vrbo.com","homeaway.com","craigslist.org","nextdoor.com","patch.com","meetup.com","eventbrite.com","groupon.com","livingsocial.com","gumtree.com","gumtree.com.au","kijiji.ca","leboncoin.fr","finn.no","blocket.se","58.com","dianping.com","tabelog.com","ypcdn.com"]}exports.Scraper=v;exports.CappedSet=x;
@@ -1 +1 @@
1
- import{parseHTML as x}from"linkedom";class w extends Set{maxSize;constructor(e){super();this.maxSize=e}add(e){if(this.has(e))return this;if(this.size>=this.maxSize){let n=this.values().next().value;if(n)this.delete(n)}return super.add(e)}}class k{http;httpOptions;onEmailLeads;onEmailDiscovered;debug;discoveredEmails=new w(1e4);userAgents=[];fileExtensions=[];restrictedDomains=$();forbiddenProtocols=["mailto:","tel:","javascript:","data:","sms:","ftp:","file:","irc:","blob:","chrome:","about:","intent:"];constructor(e,n,a,o,t=!1){this.http=e,this.httpOptions=n,this.onEmailLeads=a,this.onEmailDiscovered=o,this.debug=t,this.userAgents=y()}sleep(e){return new Promise((n)=>setTimeout(n,e))}async executeHttp(e,n,a,o,t,i=0){let{getCache:r,saveCache:c,hasUrlInCache:u,saveUrl:l,httpConfig:s={}}=o;if(!e||e.length<3||this.forbiddenProtocols.some((d)=>e.startsWith(d)))return;try{let d=t?!1:await u(e),h=await r(e);if(d&&!h)return!1;if(d&&n!=="GET")return!1;let m=h&&n==="GET"?h:await(n==="GET"?this.http.get(e,s):n==="PATCH"?this.http.patch(e,a,s):n==="POST"?this.http.post(e,a,s):this.http.put(e,a,s));if(!h)await c(e,{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl});if(!d)await l(e);if(!m.contentType||!m.contentType.includes("/html")||!m.contentType.includes("text/")||typeof m.data!=="string")return null;return{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl}}catch(d){let h=d,m=this.httpOptions;if(h&&h.response){let g=h.response.status,p=m.retryDelay||100,b=m.maxRetryAttempts||3,f=m.retryWithoutProxyOnStatusCode||void 0,v=m.maxRetryOnProxyError||3;if(f&&s.proxy&&f.includes(g)&&i<b)return await this.sleep(p),delete s.proxy,await this.executeHttp(e,n,a,o,t,i+1);else if(m.retryOnStatusCode&&s.proxy&&m.retryOnStatusCode.includes(g)&&i<b)return await this.sleep(p),await this.executeHttp(e,n,a,o,t,i+1);else if(m.retryOnProxyError&&s.proxy&&i<v)return await this.sleep(p),await this.executeHttp(e,n,a,o,t,i+1)}if(this.debug){if(this.debug)console.log(`Error: unable to ${n} ${e}: ${d.message}`)}return null}}extractEmails(e,n,a,o,t){let i=this.extractEmailsFromContent(e?.replaceAll("mailto:"," ")),r=[];for(let c of i)if(this.handleEmailDiscovery(c,n,a,t))r.push(c);if(o&&o.length>0&&r.length>0)t.add(async()=>Promise.all(o.map((c)=>c(r))));i.length=0,r.length=0}async parseExternalWebsite(e,n,a,o,t,i=!0,r,c){let u=o.httpConfig?.headers?o.httpConfig.headers instanceof Headers?Object.fromEntries(o.httpConfig.headers.entries()):o.httpConfig.headers:{};o.httpConfig=o.httpConfig||{},o.httpConfig.headers={"user-agent":this.getRandomUserAgent(),accept:"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8","accept-language":"en-US,en;q=0.9","cache-control":"no-cache",pragma:"no-cache",...u},o.httpConfig.timeout=o.httpConfig.timeout||15000,o.depth=o.depth||0,o.allowCrossDomainTravel=o.allowCrossDomainTravel||!1,t=t&&i;let l=[];try{let s=new URL(e),d=this.extractRootDomain(e);if(this.isLinktreeUrl(e))return await this.parseLinktreeProfile(e,o,t);if(this.isRestrictedDomain(e)){if(this.debug)console.warn(`⚠️ Skipped URL (restricted url): ${e}`);return l}let h=await this.executeHttp(e,n,a,o,t);if(!h){if(this.debug&&h===null)console.warn(`⚠️ Failed to fetch page content: ${e}`);if(this.debug&&h===!1)console.warn(`⚠️ Skipped URL (already visited): ${e}`);return l}let m=this.extractEmailsFromContent(h.data?.replaceAll("mailto:"," "));for(let g of m)if(this.handleEmailDiscovery(g,e,o.onEmailDiscovered,o.queue))l.push(g);if(o.depth>0||!r){let g=x(h.data).document,p=this.extractRelevantLinks(g,s,d,o.depth,o.allowCrossDomainTravel);o.depth--;let b=await Promise.allSettled(p.map((f)=>this.parseExternalWebsite(f,"GET",null,{...o,depth:o.depth},t,!1,!0)));for(let f of b)if(f.status==="fulfilled")l.push(...f.value);else if(this.debug)console.warn("⚠️ Failed to parse child URL:",f.reason?.message)}}catch(s){if(this.debug)console.error(`❌ Error parsing external website: ${e}`,s?.message)}if(i){if(o.onEmails&&o.onEmails.length>0)o.queue.add(async()=>Promise.all(o.onEmails.map((s)=>s(l))))}return l}async parseLinktreeProfile(e,n,a){let o=[];try{let t=await this.executeHttp(e,"GET",null,n,a);if(!t){if(this.debug)console.warn(`⚠️ Failed to fetch Linktree profile: ${e}`);return o}let r=x(t).document.getElementById("links-container");if(!r){if(this.debug)console.warn(`\uD83D\uDD0D No links container found in Linktree profile: ${e}`);return o}let c=this.extractLinktreeExternalUrls(r,e);if(c.length===0){if(this.debug)console.info("\uD83D\uDCED No valid external links found in Linktree profile");return o}if(this.debug)console.info(`\uD83C\uDFAF Found ${c.length} external links in Linktree profile`);let u=await Promise.allSettled(c.map((l)=>this.parseExternalWebsite(l,"GET",null,n,a,!1)));for(let l of u)if(l.status==="fulfilled")o.push(...l.value);else if(this.debug)console.warn("⚠️ Failed to parse Linktree external URL:",l.reason?.message)}catch(t){if(this.debug)console.error(`❌ Error parsing Linktree profile: ${e}`,t?.message)}return o}extractLinktreeExternalUrls(e,n){let a=new Set,o=e.querySelectorAll("a[href][target='_blank']");for(let t of o){let i=t.getAttribute("href");if(!i||i.length<3||this.forbiddenProtocols.some((r)=>i.startsWith(r)))continue;try{let r=new URL(i,n).href,c=this.extractRootDomain(r);if(c!=="linktr.ee"&&!this.isRestrictedDomain(r)&&c.length>3)a.add(r)}catch(r){if(this.debug)console.warn(`\uD83D\uDD17 Invalid URL in Linktree: ${i}`)}}return Array.from(a)}handleEmailDiscovery(e,n,a,o){if(!this.discoveredEmails.has(e)){this.discoveredEmails.add(e);let t={email:e,discoveredAt:n,timestamp:new Date};if(a&&a.length>0)o.add(async()=>Promise.all(a.map((i)=>i(t))));if(this.debug)console.info(`\uD83D\uDCE7 New email discovered: ${e} at ${n}`);return!0}return!1}isDomainAccessAllowed(e,n,a,o){if(o)return!0;if(a===0)return e===n;return e===n||e.endsWith(`.${n}`)||n.endsWith(`.${e}`)}extractRelevantLinks(e,n,a,o,t){let i=[],r=["about","contact","help","support","reach","email","mail","message","company","team","staff","info","inquiry","feedback","service","assistance","connect","touch"],c=e.querySelectorAll("a[href]");for(let u of c){let l=u.getAttribute("href");if(!l||l.length<2)continue;try{let s=this.normalizeUrl(l,n),d=this.extractRootDomain(s);if(!this.isDomainAccessAllowed(d,a,o,t))continue;if(r.some((m)=>s.toLowerCase().includes(m))||this.isLinktreeUrl(s))i.push(s)}catch(s){if(this.debug)console.warn(`\uD83D\uDD17 Invalid link found: ${l}`,s?.message)}}return i}extractEmailsFromContent(e){let n=e.replace(/[^\w@.-\s]/g," "),a=/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,o=(c)=>{let u=/^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$/,l=c.split("@")[1]?.toLowerCase(),s=c.split(".").pop()?.toLowerCase();return u.test(c)&&l!==void 0&&s!==void 0&&!this.fileExtensions.includes(`.${s}`)&&!this.isRestrictedDomain(`https://${l}`)},t=(c)=>{return(c.match(a)||[]).filter(o)},i=e.replace(/<[^>]*>/g," "),r=[...t(i),...t(n)];return[...new Set(r)]}isRestrictedDomain(e){try{let n=new URL(e).host.toLowerCase();return this.restrictedDomains.some((a)=>n===a.toLowerCase()||n.endsWith(`.${a.toLowerCase()}`))}catch{return!0}}isLinktreeUrl(e){try{return this.extractRootDomain(e)==="linktr.ee"}catch{return!1}}extractRootDomain(e){try{let a=new URL(e).hostname.toLowerCase();return a.startsWith("www.")?a.slice(4):a}catch{return""}}normalizeUrl(e,n){if(e.startsWith("http://")||e.startsWith("https://"))return e;if(e.startsWith("//"))return`${n.protocol}${e}`;return new URL(e,n.href).href}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function y(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],n=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],a=[];for(let o=0;o<200;o++){let t=e[Math.floor(Math.random()*e.length)],i=n[Math.floor(Math.random()*n.length)],r="";switch(t.name){case"Chrome":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":r=`Mozilla/5.0 (${i}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}a.push(r)}return a}function $(){return["facebook.com","fb.com","messenger.com","instagram.com","threads.net","twitter.com","x.com","linkedin.com","pinterest.com","pin.it","reddit.com","tumblr.com","snapchat.com","tiktok.com","douyin.com","youtube.com","youtu.be","whatsapp.com","telegram.org","t.me","medium.com","quora.com","flickr.com","vimeo.com","vk.com","weibo.com","sina.com.cn","line.me","discord.com","discordapp.com","twitch.tv","meetup.com","nextdoor.com","xing.com","yelp.com","zalo.me","mastodon.social","clubhouse.com","patreon.com","onlyfans.com","douban.com","goodreads.com","soundcloud.com","spotify.com","last.fm","behance.net","dribbble.com","deviantart.com","pixiv.net","slideshare.net","tinder.com","bumble.com","etsy.com","indeed.com","glassdoor.com","monster.com","careerbuilder.com","dice.com","ziprecruiter.com","simplyhired.com","upwork.com","freelancer.com","fiverr.com","stackoverflow.com","stackoverflow.co","angel.co","wellfound.com","quora.com","stackexchange.com","yahoo.com","answers.microsoft.com","askubuntu.com","superuser.com","serverfault.com","mathoverflow.net","xda-developers.com","gamespot.com","ign.com","4chan.org","9gag.com","gizmodo.com","slashdot.org","hacker-news.news","ycombinator.com","producthunt.com","discourse.org","google.com","google.co.uk","google.de","google.fr","google.co.jp","bing.com","yahoo.com","search.yahoo.com","duckduckgo.com","baidu.com","yandex.com","yandex.ru","ask.com","wolframalpha.com","ecosia.org","startpage.com","qwant.com","searx.me","gibiru.com","swisscows.com","gmail.com","googlemail.com","outlook.com","hotmail.com","live.com","msn.com","yahoo.com","ymail.com","aol.com","icloud.com","me.com","mac.com","protonmail.com","pm.me","zoho.com","mail.com","gmx.com","gmx.net","yandex.com","yandex.ru","tutanota.com","tutanota.de","fastmail.com","hushmail.com","mailbox.org","posteo.de","runbox.com","disroot.org","163.com","qq.com","rambler.ru","mail.ru","yelp.com","yelp.ca","yelp.co.uk","yelp.com.au","yellowpages.com","yellowpages.ca","yell.com","tripadvisor.com","tripadvisor.co.uk","tripadvisor.ca","foursquare.com","angieslist.com","bbb.org","manta.com","thumbtack.com","homeadvisor.com","superpages.com","whitepages.com","local.com","citysearch.com","merchantcircle.com","insiderpages.com","kudzu.com","hotfrog.com","buildzoom.com","houzz.com","porch.com","mapquest.com","zagat.com","zomato.com","opentable.com","viator.com","expedia.com","booking.com","airbnb.com","vrbo.com","homeaway.com","craigslist.org","nextdoor.com","patch.com","meetup.com","eventbrite.com","groupon.com","livingsocial.com","gumtree.com","gumtree.com.au","kijiji.ca","leboncoin.fr","finn.no","blocket.se","58.com","dianping.com","tabelog.com","ypcdn.com"]}export{k as Scraper,w as CappedSet};
1
+ import{parseHTML as x}from"linkedom";class w extends Set{maxSize;constructor(e){super();this.maxSize=e}add(e){if(this.has(e))return this;if(this.size>=this.maxSize){let a=this.values().next().value;if(a)this.delete(a)}return super.add(e)}}class k{http;httpOptions;onEmailLeads;onEmailDiscovered;debug;discoveredEmails=new w(1e4);userAgents=[];fileExtensions=[];restrictedDomains=$();forbiddenProtocols=["mailto:","tel:","javascript:","data:","sms:","ftp:","file:","irc:","blob:","chrome:","about:","intent:"];constructor(e,a,n,o,t=!1){this.http=e,this.httpOptions=a,this.onEmailLeads=n,this.onEmailDiscovered=o,this.debug=t,this.userAgents=y()}sleep(e){return new Promise((a)=>setTimeout(a,e))}async executeHttp(e,a,n,o,t,i=0){let{getCache:r,saveCache:l,hasUrlInCache:d,saveUrl:c,httpConfig:s={}}=o;if(!e||e.length<3||this.forbiddenProtocols.some((u)=>e.startsWith(u)))return;try{let u=t?!1:await d(e),h=await r(e);if(u&&!h)return!1;if(u&&a!=="GET")return!1;let m=h&&a==="GET"?h:await(a==="GET"?this.http.get(e,s):a==="PATCH"?this.http.patch(e,n,s):a==="POST"?this.http.post(e,n,s):this.http.put(e,n,s));if(!h)await l(e,{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl});if(!u)await c(e);if(!m.contentType||!m.contentType.includes("/html")||!m.contentType.includes("text/")||typeof m.data!=="string")return null;return{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl}}catch(u){let h=u,m=this.httpOptions;if(h&&h.response){let g=h.response.status,p=m.retryDelay||100,b=m.maxRetryAttempts||3,f=m.retryWithoutProxyOnStatusCode||void 0,v=m.maxRetryOnProxyError||3;if(f&&s.proxy&&f.includes(g)&&i<b)return await this.sleep(p),delete s.proxy,await this.executeHttp(e,a,n,o,t,i+1);else if(m.retryOnStatusCode&&s.proxy&&m.retryOnStatusCode.includes(g)&&i<b)return await this.sleep(p),await this.executeHttp(e,a,n,o,t,i+1);else if(m.retryOnProxyError&&s.proxy&&i<v)return await this.sleep(p),await this.executeHttp(e,a,n,o,t,i+1)}if(this.debug){if(this.debug)console.log(`Error: unable to ${a} ${e}: ${u.message}`)}return null}}extractEmails(e,a,n,o,t,i){let r=this.extractEmailsFromContent(e?.replaceAll("mailto:"," ")),l=[];for(let d of r)if(this.handleEmailDiscovery(d,a,n,t,i))l.push(d);if(o&&o.length>0&&l.length>0)t.add(async()=>Promise.all(o.map((d)=>d(l))));r.length=0,l.length=0}async parseExternalWebsite(e,a,n,o,t,i=!0,r,l){let d=o.httpConfig?.headers?o.httpConfig.headers instanceof Headers?Object.fromEntries(o.httpConfig.headers.entries()):o.httpConfig.headers:{};o.httpConfig=o.httpConfig||{},o.httpConfig.headers={"user-agent":this.getRandomUserAgent(),accept:"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8","accept-language":"en-US,en;q=0.9","cache-control":"no-cache",pragma:"no-cache",...d},o.httpConfig.timeout=o.httpConfig.timeout||15000,o.depth=o.depth||0,o.allowCrossDomainTravel=o.allowCrossDomainTravel||!1,t=t&&i;let c=[];try{let s=new URL(e),u=this.extractRootDomain(e);if(this.isLinktreeUrl(e))return await this.parseLinktreeProfile(e,o,t);if(this.isRestrictedDomain(e)){if(this.debug)console.warn(`⚠️ Skipped URL (restricted url): ${e}`);return c}let h=await this.executeHttp(e,a,n,o,t);if(!h){if(this.debug&&h===null)console.warn(`⚠️ Failed to fetch page content: ${e}`);if(this.debug&&h===!1)console.warn(`⚠️ Skipped URL (already visited): ${e}`);return c}let m=this.extractEmailsFromContent(h.data?.replaceAll("mailto:"," "));for(let g of m)if(this.handleEmailDiscovery(g,e,o.onEmailDiscovered,o.queue,o.emailMetadata))c.push(g);if(o.depth>0||!r){let g=x(h.data).document,p=this.extractRelevantLinks(g,s,u,o.depth,o.allowCrossDomainTravel);o.depth--;let b=await Promise.allSettled(p.map((f)=>this.parseExternalWebsite(f,"GET",null,{...o,depth:o.depth},t,!1,!0)));for(let f of b)if(f.status==="fulfilled")c.push(...f.value);else if(this.debug)console.warn("⚠️ Failed to parse child URL:",f.reason?.message)}}catch(s){if(this.debug)console.error(`❌ Error parsing external website: ${e}`,s?.message)}if(i){if(o.onEmails&&o.onEmails.length>0)o.queue.add(async()=>Promise.all(o.onEmails.map((s)=>s(c))))}return c}async parseLinktreeProfile(e,a,n){let o=[];try{let t=await this.executeHttp(e,"GET",null,a,n);if(!t){if(this.debug)console.warn(`⚠️ Failed to fetch Linktree profile: ${e}`);return o}let r=x(t).document.getElementById("links-container");if(!r){if(this.debug)console.warn(`\uD83D\uDD0D No links container found in Linktree profile: ${e}`);return o}let l=this.extractLinktreeExternalUrls(r,e);if(l.length===0){if(this.debug)console.info("\uD83D\uDCED No valid external links found in Linktree profile");return o}if(this.debug)console.info(`\uD83C\uDFAF Found ${l.length} external links in Linktree profile`);let d=await Promise.allSettled(l.map((c)=>this.parseExternalWebsite(c,"GET",null,a,n,!1)));for(let c of d)if(c.status==="fulfilled")o.push(...c.value);else if(this.debug)console.warn("⚠️ Failed to parse Linktree external URL:",c.reason?.message)}catch(t){if(this.debug)console.error(`❌ Error parsing Linktree profile: ${e}`,t?.message)}return o}extractLinktreeExternalUrls(e,a){let n=new Set,o=e.querySelectorAll("a[href][target='_blank']");for(let t of o){let i=t.getAttribute("href");if(!i||i.length<3||this.forbiddenProtocols.some((r)=>i.startsWith(r)))continue;try{let r=new URL(i,a).href,l=this.extractRootDomain(r);if(l!=="linktr.ee"&&!this.isRestrictedDomain(r)&&l.length>3)n.add(r)}catch(r){if(this.debug)console.warn(`\uD83D\uDD17 Invalid URL in Linktree: ${i}`)}}return Array.from(n)}handleEmailDiscovery(e,a,n,o,t){if(!this.discoveredEmails.has(e)){this.discoveredEmails.add(e);let i={email:e,discoveredAt:a,timestamp:new Date,metadata:t||{}};if(n&&n.length>0)o.add(async()=>Promise.all(n.map((r)=>r(i))));if(this.debug)console.info(`\uD83D\uDCE7 New email discovered: ${e} at ${a}`);return!0}return!1}isDomainAccessAllowed(e,a,n,o){if(o)return!0;if(n===0)return e===a;return e===a||e.endsWith(`.${a}`)||a.endsWith(`.${e}`)}extractRelevantLinks(e,a,n,o,t){let i=[],r=["about","contact","help","support","reach","email","mail","message","company","team","staff","info","inquiry","feedback","service","assistance","connect","touch"],l=e.querySelectorAll("a[href]");for(let d of l){let c=d.getAttribute("href");if(!c||c.length<2)continue;try{let s=this.normalizeUrl(c,a),u=this.extractRootDomain(s);if(!this.isDomainAccessAllowed(u,n,o,t))continue;if(r.some((m)=>s.toLowerCase().includes(m))||this.isLinktreeUrl(s))i.push(s)}catch(s){if(this.debug)console.warn(`\uD83D\uDD17 Invalid link found: ${c}`,s?.message)}}return i}extractEmailsFromContent(e){let a=e.replace(/[^\w@.-\s]/g," "),n=/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,o=(l)=>{let d=/^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$/,c=l.split("@")[1]?.toLowerCase(),s=l.split(".").pop()?.toLowerCase();return d.test(l)&&c!==void 0&&s!==void 0&&!this.fileExtensions.includes(`.${s}`)&&!this.isRestrictedDomain(`https://${c}`)},t=(l)=>{return(l.match(n)||[]).filter(o)},i=e.replace(/<[^>]*>/g," "),r=[...t(i),...t(a)];return[...new Set(r)]}isRestrictedDomain(e){try{let a=new URL(e).host.toLowerCase();return this.restrictedDomains.some((n)=>a===n.toLowerCase()||a.endsWith(`.${n.toLowerCase()}`))}catch{return!0}}isLinktreeUrl(e){try{return this.extractRootDomain(e)==="linktr.ee"}catch{return!1}}extractRootDomain(e){try{let n=new URL(e).hostname.toLowerCase();return n.startsWith("www.")?n.slice(4):n}catch{return""}}normalizeUrl(e,a){if(e.startsWith("http://")||e.startsWith("https://"))return e;if(e.startsWith("//"))return`${a.protocol}${e}`;return new URL(e,a.href).href}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function y(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],a=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],n=[];for(let o=0;o<200;o++){let t=e[Math.floor(Math.random()*e.length)],i=a[Math.floor(Math.random()*a.length)],r="";switch(t.name){case"Chrome":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":r=`Mozilla/5.0 (${i}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}n.push(r)}return n}function $(){return["facebook.com","fb.com","messenger.com","instagram.com","threads.net","twitter.com","x.com","linkedin.com","pinterest.com","pin.it","reddit.com","tumblr.com","snapchat.com","tiktok.com","douyin.com","youtube.com","youtu.be","whatsapp.com","telegram.org","t.me","medium.com","quora.com","flickr.com","vimeo.com","vk.com","weibo.com","sina.com.cn","line.me","discord.com","discordapp.com","twitch.tv","meetup.com","nextdoor.com","xing.com","yelp.com","zalo.me","mastodon.social","clubhouse.com","patreon.com","onlyfans.com","douban.com","goodreads.com","soundcloud.com","spotify.com","last.fm","behance.net","dribbble.com","deviantart.com","pixiv.net","slideshare.net","tinder.com","bumble.com","etsy.com","indeed.com","glassdoor.com","monster.com","careerbuilder.com","dice.com","ziprecruiter.com","simplyhired.com","upwork.com","freelancer.com","fiverr.com","stackoverflow.com","stackoverflow.co","angel.co","wellfound.com","quora.com","stackexchange.com","yahoo.com","answers.microsoft.com","askubuntu.com","superuser.com","serverfault.com","mathoverflow.net","xda-developers.com","gamespot.com","ign.com","4chan.org","9gag.com","gizmodo.com","slashdot.org","hacker-news.news","ycombinator.com","producthunt.com","discourse.org","google.com","google.co.uk","google.de","google.fr","google.co.jp","bing.com","yahoo.com","search.yahoo.com","duckduckgo.com","baidu.com","yandex.com","yandex.ru","ask.com","wolframalpha.com","ecosia.org","startpage.com","qwant.com","searx.me","gibiru.com","swisscows.com","gmail.com","googlemail.com","outlook.com","hotmail.com","live.com","msn.com","yahoo.com","ymail.com","aol.com","icloud.com","me.com","mac.com","protonmail.com","pm.me","zoho.com","mail.com","gmx.com","gmx.net","yandex.com","yandex.ru","tutanota.com","tutanota.de","fastmail.com","hushmail.com","mailbox.org","posteo.de","runbox.com","disroot.org","163.com","qq.com","rambler.ru","mail.ru","yelp.com","yelp.ca","yelp.co.uk","yelp.com.au","yellowpages.com","yellowpages.ca","yell.com","tripadvisor.com","tripadvisor.co.uk","tripadvisor.ca","foursquare.com","angieslist.com","bbb.org","manta.com","thumbtack.com","homeadvisor.com","superpages.com","whitepages.com","local.com","citysearch.com","merchantcircle.com","insiderpages.com","kudzu.com","hotfrog.com","buildzoom.com","houzz.com","porch.com","mapquest.com","zagat.com","zomato.com","opentable.com","viator.com","expedia.com","booking.com","airbnb.com","vrbo.com","homeaway.com","craigslist.org","nextdoor.com","patch.com","meetup.com","eventbrite.com","groupon.com","livingsocial.com","gumtree.com","gumtree.com.au","kijiji.ca","leboncoin.fr","finn.no","blocket.se","58.com","dianping.com","tabelog.com","ypcdn.com"]}export{k as Scraper,w as CappedSet};
package/dist/crawler.d.ts CHANGED
@@ -6980,10 +6980,11 @@ export declare class CrawlerOptions {
6980
6980
  */
6981
6981
  private getRandomUserAgent;
6982
6982
  }
6983
- export interface EmailDiscoveryEvent {
6983
+ export interface EmailDiscoveryEvent<T = Record<string, any>> {
6984
6984
  email: string;
6985
6985
  discoveredAt: string;
6986
6986
  timestamp: Date;
6987
+ metadata: T;
6987
6988
  }
6988
6989
  interface RedirectEvent$1 {
6989
6990
  originalUrl: string;
@@ -7259,7 +7260,7 @@ export declare class Crawler {
7259
7260
  * });
7260
7261
  * ```
7261
7262
  */
7262
- onEmailDiscovered(handler: (email: EmailDiscoveryEvent) => Promise<void>): Crawler;
7263
+ onEmailDiscovered<T = Record<string, any>>(handler: (email: EmailDiscoveryEvent<T>) => Promise<void>): Crawler;
7263
7264
  /**
7264
7265
  * Registers a handler for bulk email leads discovery.
7265
7266
  * Triggered when multiple email addresses are found and processed.
@@ -7688,6 +7689,7 @@ export declare class Crawler {
7688
7689
  useOxylabsRotation?: boolean;
7689
7690
  useDecodo?: boolean;
7690
7691
  skipCache?: boolean;
7692
+ emailMetadata?: Record<string, any>;
7691
7693
  }): Crawler;
7692
7694
  private execute;
7693
7695
  private execute2;
@@ -1,4 +1,4 @@
1
- const _mod_a4zsgn = require('../crawler/crawler.cjs');
2
- exports.Crawler = _mod_a4zsgn.Crawler;;
3
- const _mod_0lvdun = require('../crawler/crawler-options.cjs');
4
- exports.CrawlerOptions = _mod_0lvdun.CrawlerOptions;;
1
+ const _mod_lowijh = require('../crawler/crawler.cjs');
2
+ exports.Crawler = _mod_lowijh.Crawler;;
3
+ const _mod_b0j8l0 = require('../crawler/crawler-options.cjs');
4
+ exports.CrawlerOptions = _mod_b0j8l0.CrawlerOptions;;
package/dist/index.cjs CHANGED
@@ -1,30 +1,30 @@
1
- const _mod_5tk9fp = require('./core/rezo.cjs');
2
- exports.Rezo = _mod_5tk9fp.Rezo;
3
- exports.createRezoInstance = _mod_5tk9fp.createRezoInstance;
4
- exports.createDefaultInstance = _mod_5tk9fp.createDefaultInstance;;
5
- const _mod_e3e5sj = require('./errors/rezo-error.cjs');
6
- exports.RezoError = _mod_e3e5sj.RezoError;
7
- exports.RezoErrorCode = _mod_e3e5sj.RezoErrorCode;;
8
- const _mod_4cfsre = require('./utils/headers.cjs');
9
- exports.RezoHeaders = _mod_4cfsre.RezoHeaders;;
10
- const _mod_bwbkqa = require('./utils/form-data.cjs');
11
- exports.RezoFormData = _mod_bwbkqa.RezoFormData;;
12
- const _mod_1jxd05 = require('./utils/cookies.cjs');
13
- exports.RezoCookieJar = _mod_1jxd05.RezoCookieJar;
14
- exports.Cookie = _mod_1jxd05.Cookie;;
15
- const _mod_zdro76 = require('./utils/curl.cjs');
16
- exports.toCurl = _mod_zdro76.toCurl;
17
- exports.fromCurl = _mod_zdro76.fromCurl;;
18
- const _mod_epvv3d = require('./core/hooks.cjs');
19
- exports.createDefaultHooks = _mod_epvv3d.createDefaultHooks;
20
- exports.mergeHooks = _mod_epvv3d.mergeHooks;;
21
- const _mod_r3abhc = require('./proxy/manager.cjs');
22
- exports.ProxyManager = _mod_r3abhc.ProxyManager;;
23
- const _mod_o7hgqe = require('./queue/index.cjs');
24
- exports.RezoQueue = _mod_o7hgqe.RezoQueue;
25
- exports.HttpQueue = _mod_o7hgqe.HttpQueue;
26
- exports.Priority = _mod_o7hgqe.Priority;
27
- exports.HttpMethodPriority = _mod_o7hgqe.HttpMethodPriority;;
1
+ const _mod_s4on4w = require('./core/rezo.cjs');
2
+ exports.Rezo = _mod_s4on4w.Rezo;
3
+ exports.createRezoInstance = _mod_s4on4w.createRezoInstance;
4
+ exports.createDefaultInstance = _mod_s4on4w.createDefaultInstance;;
5
+ const _mod_j10648 = require('./errors/rezo-error.cjs');
6
+ exports.RezoError = _mod_j10648.RezoError;
7
+ exports.RezoErrorCode = _mod_j10648.RezoErrorCode;;
8
+ const _mod_cr1p87 = require('./utils/headers.cjs');
9
+ exports.RezoHeaders = _mod_cr1p87.RezoHeaders;;
10
+ const _mod_y6ple5 = require('./utils/form-data.cjs');
11
+ exports.RezoFormData = _mod_y6ple5.RezoFormData;;
12
+ const _mod_mt0dfs = require('./utils/cookies.cjs');
13
+ exports.RezoCookieJar = _mod_mt0dfs.RezoCookieJar;
14
+ exports.Cookie = _mod_mt0dfs.Cookie;;
15
+ const _mod_bivoyi = require('./utils/curl.cjs');
16
+ exports.toCurl = _mod_bivoyi.toCurl;
17
+ exports.fromCurl = _mod_bivoyi.fromCurl;;
18
+ const _mod_h18prc = require('./core/hooks.cjs');
19
+ exports.createDefaultHooks = _mod_h18prc.createDefaultHooks;
20
+ exports.mergeHooks = _mod_h18prc.mergeHooks;;
21
+ const _mod_hr8h18 = require('./proxy/manager.cjs');
22
+ exports.ProxyManager = _mod_hr8h18.ProxyManager;;
23
+ const _mod_x31mfg = require('./queue/index.cjs');
24
+ exports.RezoQueue = _mod_x31mfg.RezoQueue;
25
+ exports.HttpQueue = _mod_x31mfg.HttpQueue;
26
+ exports.Priority = _mod_x31mfg.Priority;
27
+ exports.HttpMethodPriority = _mod_x31mfg.HttpMethodPriority;;
28
28
  const { RezoError } = require('./errors/rezo-error.cjs');
29
29
  const isRezoError = exports.isRezoError = RezoError.isRezoError;
30
30
  const Cancel = exports.Cancel = RezoError;
@@ -1,10 +1,10 @@
1
- const _mod_wz12eh = require('./base.cjs');
2
- exports.Agent = _mod_wz12eh.Agent;;
3
- const _mod_ry2xii = require('./http-proxy.cjs');
4
- exports.HttpProxyAgent = _mod_ry2xii.HttpProxyAgent;;
5
- const _mod_ofw4c7 = require('./https-proxy.cjs');
6
- exports.HttpsProxyAgent = _mod_ofw4c7.HttpsProxyAgent;;
7
- const _mod_v60671 = require('./socks-proxy.cjs');
8
- exports.SocksProxyAgent = _mod_v60671.SocksProxyAgent;;
9
- const _mod_eqgzmz = require('./socks-client.cjs');
10
- exports.SocksClient = _mod_eqgzmz.SocksClient;;
1
+ const _mod_c6cw86 = require('./base.cjs');
2
+ exports.Agent = _mod_c6cw86.Agent;;
3
+ const _mod_j0if6b = require('./http-proxy.cjs');
4
+ exports.HttpProxyAgent = _mod_j0if6b.HttpProxyAgent;;
5
+ const _mod_ze9lyt = require('./https-proxy.cjs');
6
+ exports.HttpsProxyAgent = _mod_ze9lyt.HttpsProxyAgent;;
7
+ const _mod_fewvsu = require('./socks-proxy.cjs');
8
+ exports.SocksProxyAgent = _mod_fewvsu.SocksProxyAgent;;
9
+ const _mod_matq8q = require('./socks-client.cjs');
10
+ exports.SocksClient = _mod_matq8q.SocksClient;;
@@ -1,9 +1,9 @@
1
1
  const { Agent, HttpProxyAgent, HttpsProxyAgent, SocksProxyAgent } = require('../internal/agents/index.cjs');
2
2
  const { parseProxyString } = require('./parse.cjs');
3
- const _mod_1cnwp5 = require('./manager.cjs');
4
- exports.ProxyManager = _mod_1cnwp5.ProxyManager;;
5
- const _mod_f59afy = require('./parse.cjs');
6
- exports.parseProxyString = _mod_f59afy.parseProxyString;;
3
+ const _mod_31ry9y = require('./manager.cjs');
4
+ exports.ProxyManager = _mod_31ry9y.ProxyManager;;
5
+ const _mod_k69lld = require('./parse.cjs');
6
+ exports.parseProxyString = _mod_k69lld.parseProxyString;;
7
7
  function createOptions(uri, opts) {
8
8
  if (uri instanceof URL || typeof uri === "string") {
9
9
  return {
@@ -1,8 +1,8 @@
1
- const _mod_03tvob = require('./queue.cjs');
2
- exports.RezoQueue = _mod_03tvob.RezoQueue;;
3
- const _mod_hxj6ft = require('./http-queue.cjs');
4
- exports.HttpQueue = _mod_hxj6ft.HttpQueue;
5
- exports.extractDomain = _mod_hxj6ft.extractDomain;;
6
- const _mod_fx9et0 = require('./types.cjs');
7
- exports.Priority = _mod_fx9et0.Priority;
8
- exports.HttpMethodPriority = _mod_fx9et0.HttpMethodPriority;;
1
+ const _mod_19g5tr = require('./queue.cjs');
2
+ exports.RezoQueue = _mod_19g5tr.RezoQueue;;
3
+ const _mod_0zl05k = require('./http-queue.cjs');
4
+ exports.HttpQueue = _mod_0zl05k.HttpQueue;
5
+ exports.extractDomain = _mod_0zl05k.extractDomain;;
6
+ const _mod_0r6z2k = require('./types.cjs');
7
+ exports.Priority = _mod_0r6z2k.Priority;
8
+ exports.HttpMethodPriority = _mod_0r6z2k.HttpMethodPriority;;
@@ -1,11 +1,11 @@
1
- const _mod_eszyh6 = require('./event-emitter.cjs');
2
- exports.UniversalEventEmitter = _mod_eszyh6.UniversalEventEmitter;;
3
- const _mod_4jlayk = require('./stream.cjs');
4
- exports.UniversalStreamResponse = _mod_4jlayk.UniversalStreamResponse;
5
- exports.StreamResponse = _mod_4jlayk.StreamResponse;;
6
- const _mod_krwf9o = require('./download.cjs');
7
- exports.UniversalDownloadResponse = _mod_krwf9o.UniversalDownloadResponse;
8
- exports.DownloadResponse = _mod_krwf9o.DownloadResponse;;
9
- const _mod_0dnzr0 = require('./upload.cjs');
10
- exports.UniversalUploadResponse = _mod_0dnzr0.UniversalUploadResponse;
11
- exports.UploadResponse = _mod_0dnzr0.UploadResponse;;
1
+ const _mod_jpnm5w = require('./event-emitter.cjs');
2
+ exports.UniversalEventEmitter = _mod_jpnm5w.UniversalEventEmitter;;
3
+ const _mod_t7q5p9 = require('./stream.cjs');
4
+ exports.UniversalStreamResponse = _mod_t7q5p9.UniversalStreamResponse;
5
+ exports.StreamResponse = _mod_t7q5p9.StreamResponse;;
6
+ const _mod_r1o7zn = require('./download.cjs');
7
+ exports.UniversalDownloadResponse = _mod_r1o7zn.UniversalDownloadResponse;
8
+ exports.DownloadResponse = _mod_r1o7zn.DownloadResponse;;
9
+ const _mod_0lnu3y = require('./upload.cjs');
10
+ exports.UniversalUploadResponse = _mod_0lnu3y.UniversalUploadResponse;
11
+ exports.UploadResponse = _mod_0lnu3y.UploadResponse;;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "rezo",
3
- "version": "1.0.63",
3
+ "version": "1.0.65",
4
4
  "description": "Lightning-fast, enterprise-grade HTTP client for modern JavaScript. Full HTTP/2 support, intelligent cookie management, multiple adapters (HTTP, Fetch, cURL, XHR), streaming, proxy support (HTTP/HTTPS/SOCKS), and cross-environment compatibility.",
5
5
  "main": "dist/index.cjs",
6
6
  "module": "dist/index.js",