rezo 1.0.64 → 1.0.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- const _mod_4c0peh = require('./picker.cjs');
2
- exports.detectRuntime = _mod_4c0peh.detectRuntime;
3
- exports.getAdapterCapabilities = _mod_4c0peh.getAdapterCapabilities;
4
- exports.buildAdapterContext = _mod_4c0peh.buildAdapterContext;
5
- exports.getAvailableAdapters = _mod_4c0peh.getAvailableAdapters;
6
- exports.selectAdapter = _mod_4c0peh.selectAdapter;;
1
+ const _mod_hjzrbn = require('./picker.cjs');
2
+ exports.detectRuntime = _mod_hjzrbn.detectRuntime;
3
+ exports.getAdapterCapabilities = _mod_hjzrbn.getAdapterCapabilities;
4
+ exports.buildAdapterContext = _mod_hjzrbn.buildAdapterContext;
5
+ exports.getAvailableAdapters = _mod_hjzrbn.getAvailableAdapters;
6
+ exports.selectAdapter = _mod_hjzrbn.selectAdapter;;
@@ -1,9 +1,9 @@
1
- const _mod_z313xk = require('./lru-cache.cjs');
2
- exports.LRUCache = _mod_z313xk.LRUCache;;
3
- const _mod_s9k0nm = require('./dns-cache.cjs');
4
- exports.DNSCache = _mod_s9k0nm.DNSCache;
5
- exports.getGlobalDNSCache = _mod_s9k0nm.getGlobalDNSCache;
6
- exports.resetGlobalDNSCache = _mod_s9k0nm.resetGlobalDNSCache;;
7
- const _mod_mk2k1e = require('./response-cache.cjs');
8
- exports.ResponseCache = _mod_mk2k1e.ResponseCache;
9
- exports.normalizeResponseCacheConfig = _mod_mk2k1e.normalizeResponseCacheConfig;;
1
+ const _mod_q7t2ir = require('./lru-cache.cjs');
2
+ exports.LRUCache = _mod_q7t2ir.LRUCache;;
3
+ const _mod_sa1kxt = require('./dns-cache.cjs');
4
+ exports.DNSCache = _mod_sa1kxt.DNSCache;
5
+ exports.getGlobalDNSCache = _mod_sa1kxt.getGlobalDNSCache;
6
+ exports.resetGlobalDNSCache = _mod_sa1kxt.resetGlobalDNSCache;;
7
+ const _mod_r6s1np = require('./response-cache.cjs');
8
+ exports.ResponseCache = _mod_r6s1np.ResponseCache;
9
+ exports.normalizeResponseCacheConfig = _mod_r6s1np.normalizeResponseCacheConfig;;
@@ -834,7 +834,8 @@ class Crawler {
834
834
  useOxylabsScraperAi = false,
835
835
  useOxylabsRotation = true,
836
836
  useDecodo = false,
837
- skipCache = false
837
+ skipCache = false,
838
+ emailMetadata = {}
838
839
  } = options || {};
839
840
  const _options = {
840
841
  headers: this.config.pickHeaders(url, true, headers, true),
@@ -872,17 +873,17 @@ class Crawler {
872
873
  this.addToNavigationQueue(url, method, body, headersObj);
873
874
  }
874
875
  if (deepEmailFinder) {
875
- const p = this.execute2(method, url, body, _options, forceRevisit);
876
+ const p = this.execute2(method, url, body, _options, forceRevisit, emailMetadata);
876
877
  this.pendingExecutions.add(p);
877
878
  p.finally(() => this.pendingExecutions.delete(p));
878
879
  return this;
879
880
  }
880
- const p = this.execute(method, url, body, _options, extractLeads, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, skipCache);
881
+ const p = this.execute(method, url, body, _options, extractLeads, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, skipCache, emailMetadata);
881
882
  this.pendingExecutions.add(p);
882
883
  p.finally(() => this.pendingExecutions.delete(p));
883
884
  return this;
884
885
  }
885
- async execute(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, skipCache) {
886
+ async execute(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, skipCache, emailMetadata) {
886
887
  await this.waitForStorage();
887
888
  if (this.isCacheEnabled) {
888
889
  await this.waitForCache();
@@ -890,10 +891,10 @@ class Crawler {
890
891
  if (this.config.enableNavigationHistory) {
891
892
  await this.waitForNavigationHistory();
892
893
  }
893
- const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, 0, undefined, skipCache));
894
+ const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, 0, undefined, skipCache, emailMetadata));
894
895
  task.finally(() => this.pendingExecutions.delete(task));
895
896
  }
896
- async execute2(method, url, body, options = {}, forceRevisit) {
897
+ async execute2(method, url, body, options = {}, forceRevisit, emailMetadata) {
897
898
  await this.waitForStorage();
898
899
  if (this.isCacheEnabled) {
899
900
  await this.waitForCache();
@@ -911,10 +912,11 @@ class Crawler {
911
912
  onEmails: this.emailLeadsEvents,
912
913
  queue: this.queue,
913
914
  depth: 1,
914
- allowCrossDomainTravel: true
915
+ allowCrossDomainTravel: true,
916
+ emailMetadata
915
917
  }, forceRevisit, true)).then();
916
918
  }
917
- async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl, skipCache) {
919
+ async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl, skipCache, emailMetadata) {
918
920
  try {
919
921
  await this.triggerStartHandlers();
920
922
  const limitCheck = await this.checkCrawlLimits(url, parentUrl);
@@ -984,7 +986,7 @@ class Crawler {
984
986
  });
985
987
  if (res.contentType && res.contentType.includes("/json")) {
986
988
  if (this.emailDiscoveredEvents.length > 0 || this.emailLeadsEvents.length > 0) {
987
- this.leadsFinder.extractEmails(JSON.stringify(res.data), res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue);
989
+ this.leadsFinder.extractEmails(JSON.stringify(res.data), res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue, emailMetadata);
988
990
  }
989
991
  for (let i = 0;i < this.jsonEvents.length; i++) {
990
992
  const event = this.jsonEvents[i];
@@ -999,7 +1001,7 @@ class Crawler {
999
1001
  if (!res.contentType || !res.contentType.includes("/html") || typeof res.data !== "string")
1000
1002
  return;
1001
1003
  if ((this.emailDiscoveredEvents.length > 0 || this.emailLeadsEvents.length > 0) && isEmail) {
1002
- this.leadsFinder.extractEmails(res.data, res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue);
1004
+ this.leadsFinder.extractEmails(res.data, res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue, emailMetadata);
1003
1005
  }
1004
1006
  const { document } = parseHTML(res.data.addBaseUrl(res.finalUrl));
1005
1007
  document.URL = res.finalUrl;
@@ -834,7 +834,8 @@ export class Crawler {
834
834
  useOxylabsScraperAi = false,
835
835
  useOxylabsRotation = true,
836
836
  useDecodo = false,
837
- skipCache = false
837
+ skipCache = false,
838
+ emailMetadata = {}
838
839
  } = options || {};
839
840
  const _options = {
840
841
  headers: this.config.pickHeaders(url, true, headers, true),
@@ -872,17 +873,17 @@ export class Crawler {
872
873
  this.addToNavigationQueue(url, method, body, headersObj);
873
874
  }
874
875
  if (deepEmailFinder) {
875
- const p = this.execute2(method, url, body, _options, forceRevisit);
876
+ const p = this.execute2(method, url, body, _options, forceRevisit, emailMetadata);
876
877
  this.pendingExecutions.add(p);
877
878
  p.finally(() => this.pendingExecutions.delete(p));
878
879
  return this;
879
880
  }
880
- const p = this.execute(method, url, body, _options, extractLeads, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, skipCache);
881
+ const p = this.execute(method, url, body, _options, extractLeads, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, skipCache, emailMetadata);
881
882
  this.pendingExecutions.add(p);
882
883
  p.finally(() => this.pendingExecutions.delete(p));
883
884
  return this;
884
885
  }
885
- async execute(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, skipCache) {
886
+ async execute(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, skipCache, emailMetadata) {
886
887
  await this.waitForStorage();
887
888
  if (this.isCacheEnabled) {
888
889
  await this.waitForCache();
@@ -890,10 +891,10 @@ export class Crawler {
890
891
  if (this.config.enableNavigationHistory) {
891
892
  await this.waitForNavigationHistory();
892
893
  }
893
- const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, 0, undefined, skipCache));
894
+ const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, 0, undefined, skipCache, emailMetadata));
894
895
  task.finally(() => this.pendingExecutions.delete(task));
895
896
  }
896
- async execute2(method, url, body, options = {}, forceRevisit) {
897
+ async execute2(method, url, body, options = {}, forceRevisit, emailMetadata) {
897
898
  await this.waitForStorage();
898
899
  if (this.isCacheEnabled) {
899
900
  await this.waitForCache();
@@ -911,10 +912,11 @@ export class Crawler {
911
912
  onEmails: this.emailLeadsEvents,
912
913
  queue: this.queue,
913
914
  depth: 1,
914
- allowCrossDomainTravel: true
915
+ allowCrossDomainTravel: true,
916
+ emailMetadata
915
917
  }, forceRevisit, true)).then();
916
918
  }
917
- async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl, skipCache) {
919
+ async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl, skipCache, emailMetadata) {
918
920
  try {
919
921
  await this.triggerStartHandlers();
920
922
  const limitCheck = await this.checkCrawlLimits(url, parentUrl);
@@ -984,7 +986,7 @@ export class Crawler {
984
986
  });
985
987
  if (res.contentType && res.contentType.includes("/json")) {
986
988
  if (this.emailDiscoveredEvents.length > 0 || this.emailLeadsEvents.length > 0) {
987
- this.leadsFinder.extractEmails(JSON.stringify(res.data), res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue);
989
+ this.leadsFinder.extractEmails(JSON.stringify(res.data), res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue, emailMetadata);
988
990
  }
989
991
  for (let i = 0;i < this.jsonEvents.length; i++) {
990
992
  const event = this.jsonEvents[i];
@@ -999,7 +1001,7 @@ export class Crawler {
999
1001
  if (!res.contentType || !res.contentType.includes("/html") || typeof res.data !== "string")
1000
1002
  return;
1001
1003
  if ((this.emailDiscoveredEvents.length > 0 || this.emailLeadsEvents.length > 0) && isEmail) {
1002
- this.leadsFinder.extractEmails(res.data, res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue);
1004
+ this.leadsFinder.extractEmails(res.data, res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue, emailMetadata);
1003
1005
  }
1004
1006
  const { document } = parseHTML(res.data.addBaseUrl(res.finalUrl));
1005
1007
  document.URL = res.finalUrl;
@@ -1,40 +1,40 @@
1
- const _mod_o1i7dy = require('./crawler.cjs');
2
- exports.Crawler = _mod_o1i7dy.Crawler;;
3
- const _mod_9ywpl4 = require('./crawler-options.cjs');
4
- exports.CrawlerOptions = _mod_9ywpl4.CrawlerOptions;;
5
- const _mod_z2gv2k = require('./plugin/robots-txt.cjs');
6
- exports.RobotsTxt = _mod_z2gv2k.RobotsTxt;;
7
- const _mod_hipgel = require('./plugin/file-cacher.cjs');
8
- exports.FileCacher = _mod_hipgel.FileCacher;;
9
- const _mod_xk7y1s = require('./plugin/url-store.cjs');
10
- exports.UrlStore = _mod_xk7y1s.UrlStore;;
11
- const _mod_x224qe = require('./plugin/navigation-history.cjs');
12
- exports.NavigationHistory = _mod_x224qe.NavigationHistory;;
13
- const _mod_3sph6m = require('./addon/oxylabs/index.cjs');
14
- exports.Oxylabs = _mod_3sph6m.Oxylabs;;
15
- const _mod_3e4t9e = require('./addon/oxylabs/options.cjs');
16
- exports.OXYLABS_BROWSER_TYPES = _mod_3e4t9e.OXYLABS_BROWSER_TYPES;
17
- exports.OXYLABS_COMMON_LOCALES = _mod_3e4t9e.OXYLABS_COMMON_LOCALES;
18
- exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_3e4t9e.OXYLABS_COMMON_GEO_LOCATIONS;
19
- exports.OXYLABS_US_STATES = _mod_3e4t9e.OXYLABS_US_STATES;
20
- exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_3e4t9e.OXYLABS_EUROPEAN_COUNTRIES;
21
- exports.OXYLABS_ASIAN_COUNTRIES = _mod_3e4t9e.OXYLABS_ASIAN_COUNTRIES;
22
- exports.getRandomOxylabsBrowserType = _mod_3e4t9e.getRandomBrowserType;
23
- exports.getRandomOxylabsLocale = _mod_3e4t9e.getRandomLocale;
24
- exports.getRandomOxylabsGeoLocation = _mod_3e4t9e.getRandomGeoLocation;;
25
- const _mod_jbcztc = require('./addon/decodo/index.cjs');
26
- exports.Decodo = _mod_jbcztc.Decodo;;
27
- const _mod_zqnknu = require('./addon/decodo/options.cjs');
28
- exports.DECODO_DEVICE_TYPES = _mod_zqnknu.DECODO_DEVICE_TYPES;
29
- exports.DECODO_HEADLESS_MODES = _mod_zqnknu.DECODO_HEADLESS_MODES;
30
- exports.DECODO_COMMON_LOCALES = _mod_zqnknu.DECODO_COMMON_LOCALES;
31
- exports.DECODO_COMMON_COUNTRIES = _mod_zqnknu.DECODO_COMMON_COUNTRIES;
32
- exports.DECODO_EUROPEAN_COUNTRIES = _mod_zqnknu.DECODO_EUROPEAN_COUNTRIES;
33
- exports.DECODO_ASIAN_COUNTRIES = _mod_zqnknu.DECODO_ASIAN_COUNTRIES;
34
- exports.DECODO_US_STATES = _mod_zqnknu.DECODO_US_STATES;
35
- exports.DECODO_COMMON_CITIES = _mod_zqnknu.DECODO_COMMON_CITIES;
36
- exports.getRandomDecodoDeviceType = _mod_zqnknu.getRandomDeviceType;
37
- exports.getRandomDecodoLocale = _mod_zqnknu.getRandomLocale;
38
- exports.getRandomDecodoCountry = _mod_zqnknu.getRandomCountry;
39
- exports.getRandomDecodoCity = _mod_zqnknu.getRandomCity;
40
- exports.generateDecodoSessionId = _mod_zqnknu.generateSessionId;;
1
+ const _mod_ysn0d5 = require('./crawler.cjs');
2
+ exports.Crawler = _mod_ysn0d5.Crawler;;
3
+ const _mod_9bawe3 = require('./crawler-options.cjs');
4
+ exports.CrawlerOptions = _mod_9bawe3.CrawlerOptions;;
5
+ const _mod_6prluk = require('./plugin/robots-txt.cjs');
6
+ exports.RobotsTxt = _mod_6prluk.RobotsTxt;;
7
+ const _mod_ixpf59 = require('./plugin/file-cacher.cjs');
8
+ exports.FileCacher = _mod_ixpf59.FileCacher;;
9
+ const _mod_bms0ov = require('./plugin/url-store.cjs');
10
+ exports.UrlStore = _mod_bms0ov.UrlStore;;
11
+ const _mod_wuzjow = require('./plugin/navigation-history.cjs');
12
+ exports.NavigationHistory = _mod_wuzjow.NavigationHistory;;
13
+ const _mod_uhf8sk = require('./addon/oxylabs/index.cjs');
14
+ exports.Oxylabs = _mod_uhf8sk.Oxylabs;;
15
+ const _mod_7o8pzc = require('./addon/oxylabs/options.cjs');
16
+ exports.OXYLABS_BROWSER_TYPES = _mod_7o8pzc.OXYLABS_BROWSER_TYPES;
17
+ exports.OXYLABS_COMMON_LOCALES = _mod_7o8pzc.OXYLABS_COMMON_LOCALES;
18
+ exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_7o8pzc.OXYLABS_COMMON_GEO_LOCATIONS;
19
+ exports.OXYLABS_US_STATES = _mod_7o8pzc.OXYLABS_US_STATES;
20
+ exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_7o8pzc.OXYLABS_EUROPEAN_COUNTRIES;
21
+ exports.OXYLABS_ASIAN_COUNTRIES = _mod_7o8pzc.OXYLABS_ASIAN_COUNTRIES;
22
+ exports.getRandomOxylabsBrowserType = _mod_7o8pzc.getRandomBrowserType;
23
+ exports.getRandomOxylabsLocale = _mod_7o8pzc.getRandomLocale;
24
+ exports.getRandomOxylabsGeoLocation = _mod_7o8pzc.getRandomGeoLocation;;
25
+ const _mod_2w0w24 = require('./addon/decodo/index.cjs');
26
+ exports.Decodo = _mod_2w0w24.Decodo;;
27
+ const _mod_wkuxky = require('./addon/decodo/options.cjs');
28
+ exports.DECODO_DEVICE_TYPES = _mod_wkuxky.DECODO_DEVICE_TYPES;
29
+ exports.DECODO_HEADLESS_MODES = _mod_wkuxky.DECODO_HEADLESS_MODES;
30
+ exports.DECODO_COMMON_LOCALES = _mod_wkuxky.DECODO_COMMON_LOCALES;
31
+ exports.DECODO_COMMON_COUNTRIES = _mod_wkuxky.DECODO_COMMON_COUNTRIES;
32
+ exports.DECODO_EUROPEAN_COUNTRIES = _mod_wkuxky.DECODO_EUROPEAN_COUNTRIES;
33
+ exports.DECODO_ASIAN_COUNTRIES = _mod_wkuxky.DECODO_ASIAN_COUNTRIES;
34
+ exports.DECODO_US_STATES = _mod_wkuxky.DECODO_US_STATES;
35
+ exports.DECODO_COMMON_CITIES = _mod_wkuxky.DECODO_COMMON_CITIES;
36
+ exports.getRandomDecodoDeviceType = _mod_wkuxky.getRandomDeviceType;
37
+ exports.getRandomDecodoLocale = _mod_wkuxky.getRandomLocale;
38
+ exports.getRandomDecodoCountry = _mod_wkuxky.getRandomCountry;
39
+ exports.getRandomDecodoCity = _mod_wkuxky.getRandomCity;
40
+ exports.generateDecodoSessionId = _mod_wkuxky.generateSessionId;;
@@ -1 +1 @@
1
- var r=require("./file-cacher.cjs");exports.FileCacher=r.FileCacher;var e=require("./url-store.cjs");exports.UrlStore=e.UrlStore;
1
+ var e=require("./file-cacher.cjs");exports.FileCacher=e.FileCacher;var r=require("./url-store.cjs");exports.UrlStore=r.UrlStore;
@@ -1 +1 @@
1
- var{parseHTML:w}=require("linkedom");class x extends Set{maxSize;constructor(e){super();this.maxSize=e}add(e){if(this.has(e))return this;if(this.size>=this.maxSize){let n=this.values().next().value;if(n)this.delete(n)}return super.add(e)}}class v{http;httpOptions;onEmailLeads;onEmailDiscovered;debug;discoveredEmails=new x(1e4);userAgents=[];fileExtensions=[];restrictedDomains=$();forbiddenProtocols=["mailto:","tel:","javascript:","data:","sms:","ftp:","file:","irc:","blob:","chrome:","about:","intent:"];constructor(e,n,a,o,t=!1){this.http=e,this.httpOptions=n,this.onEmailLeads=a,this.onEmailDiscovered=o,this.debug=t,this.userAgents=y()}sleep(e){return new Promise((n)=>setTimeout(n,e))}async executeHttp(e,n,a,o,t,i=0){let{getCache:r,saveCache:c,hasUrlInCache:u,saveUrl:l,httpConfig:s={}}=o;if(!e||e.length<3||this.forbiddenProtocols.some((d)=>e.startsWith(d)))return;try{let d=t?!1:await u(e),h=await r(e);if(d&&!h)return!1;if(d&&n!=="GET")return!1;let m=h&&n==="GET"?h:await(n==="GET"?this.http.get(e,s):n==="PATCH"?this.http.patch(e,a,s):n==="POST"?this.http.post(e,a,s):this.http.put(e,a,s));if(!h)await c(e,{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl});if(!d)await l(e);if(!m.contentType||!m.contentType.includes("/html")||!m.contentType.includes("text/")||typeof m.data!=="string")return null;return{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl}}catch(d){let h=d,m=this.httpOptions;if(h&&h.response){let g=h.response.status,p=m.retryDelay||100,b=m.maxRetryAttempts||3,f=m.retryWithoutProxyOnStatusCode||void 0,k=m.maxRetryOnProxyError||3;if(f&&s.proxy&&f.includes(g)&&i<b)return await this.sleep(p),delete s.proxy,await this.executeHttp(e,n,a,o,t,i+1);else if(m.retryOnStatusCode&&s.proxy&&m.retryOnStatusCode.includes(g)&&i<b)return await this.sleep(p),await this.executeHttp(e,n,a,o,t,i+1);else if(m.retryOnProxyError&&s.proxy&&i<k)return await this.sleep(p),await this.executeHttp(e,n,a,o,t,i+1)}if(this.debug){if(this.debug)console.log(`Error: unable to ${n} ${e}: ${d.message}`)}return null}}extractEmails(e,n,a,o,t){let i=this.extractEmailsFromContent(e?.replaceAll("mailto:"," ")),r=[];for(let c of i)if(this.handleEmailDiscovery(c,n,a,t))r.push(c);if(o&&o.length>0&&r.length>0)t.add(async()=>Promise.all(o.map((c)=>c(r))));i.length=0,r.length=0}async parseExternalWebsite(e,n,a,o,t,i=!0,r,c){let u=o.httpConfig?.headers?o.httpConfig.headers instanceof Headers?Object.fromEntries(o.httpConfig.headers.entries()):o.httpConfig.headers:{};o.httpConfig=o.httpConfig||{},o.httpConfig.headers={"user-agent":this.getRandomUserAgent(),accept:"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8","accept-language":"en-US,en;q=0.9","cache-control":"no-cache",pragma:"no-cache",...u},o.httpConfig.timeout=o.httpConfig.timeout||15000,o.depth=o.depth||0,o.allowCrossDomainTravel=o.allowCrossDomainTravel||!1,t=t&&i;let l=[];try{let s=new URL(e),d=this.extractRootDomain(e);if(this.isLinktreeUrl(e))return await this.parseLinktreeProfile(e,o,t);if(this.isRestrictedDomain(e)){if(this.debug)console.warn(`⚠️ Skipped URL (restricted url): ${e}`);return l}let h=await this.executeHttp(e,n,a,o,t);if(!h){if(this.debug&&h===null)console.warn(`⚠️ Failed to fetch page content: ${e}`);if(this.debug&&h===!1)console.warn(`⚠️ Skipped URL (already visited): ${e}`);return l}let m=this.extractEmailsFromContent(h.data?.replaceAll("mailto:"," "));for(let g of m)if(this.handleEmailDiscovery(g,e,o.onEmailDiscovered,o.queue))l.push(g);if(o.depth>0||!r){let g=w(h.data).document,p=this.extractRelevantLinks(g,s,d,o.depth,o.allowCrossDomainTravel);o.depth--;let b=await Promise.allSettled(p.map((f)=>this.parseExternalWebsite(f,"GET",null,{...o,depth:o.depth},t,!1,!0)));for(let f of b)if(f.status==="fulfilled")l.push(...f.value);else if(this.debug)console.warn("⚠️ Failed to parse child URL:",f.reason?.message)}}catch(s){if(this.debug)console.error(`❌ Error parsing external website: ${e}`,s?.message)}if(i){if(o.onEmails&&o.onEmails.length>0)o.queue.add(async()=>Promise.all(o.onEmails.map((s)=>s(l))))}return l}async parseLinktreeProfile(e,n,a){let o=[];try{let t=await this.executeHttp(e,"GET",null,n,a);if(!t){if(this.debug)console.warn(`⚠️ Failed to fetch Linktree profile: ${e}`);return o}let r=w(t).document.getElementById("links-container");if(!r){if(this.debug)console.warn(`\uD83D\uDD0D No links container found in Linktree profile: ${e}`);return o}let c=this.extractLinktreeExternalUrls(r,e);if(c.length===0){if(this.debug)console.info("\uD83D\uDCED No valid external links found in Linktree profile");return o}if(this.debug)console.info(`\uD83C\uDFAF Found ${c.length} external links in Linktree profile`);let u=await Promise.allSettled(c.map((l)=>this.parseExternalWebsite(l,"GET",null,n,a,!1)));for(let l of u)if(l.status==="fulfilled")o.push(...l.value);else if(this.debug)console.warn("⚠️ Failed to parse Linktree external URL:",l.reason?.message)}catch(t){if(this.debug)console.error(`❌ Error parsing Linktree profile: ${e}`,t?.message)}return o}extractLinktreeExternalUrls(e,n){let a=new Set,o=e.querySelectorAll("a[href][target='_blank']");for(let t of o){let i=t.getAttribute("href");if(!i||i.length<3||this.forbiddenProtocols.some((r)=>i.startsWith(r)))continue;try{let r=new URL(i,n).href,c=this.extractRootDomain(r);if(c!=="linktr.ee"&&!this.isRestrictedDomain(r)&&c.length>3)a.add(r)}catch(r){if(this.debug)console.warn(`\uD83D\uDD17 Invalid URL in Linktree: ${i}`)}}return Array.from(a)}handleEmailDiscovery(e,n,a,o){if(!this.discoveredEmails.has(e)){this.discoveredEmails.add(e);let t={email:e,discoveredAt:n,timestamp:new Date};if(a&&a.length>0)o.add(async()=>Promise.all(a.map((i)=>i(t))));if(this.debug)console.info(`\uD83D\uDCE7 New email discovered: ${e} at ${n}`);return!0}return!1}isDomainAccessAllowed(e,n,a,o){if(o)return!0;if(a===0)return e===n;return e===n||e.endsWith(`.${n}`)||n.endsWith(`.${e}`)}extractRelevantLinks(e,n,a,o,t){let i=[],r=["about","contact","help","support","reach","email","mail","message","company","team","staff","info","inquiry","feedback","service","assistance","connect","touch"],c=e.querySelectorAll("a[href]");for(let u of c){let l=u.getAttribute("href");if(!l||l.length<2)continue;try{let s=this.normalizeUrl(l,n),d=this.extractRootDomain(s);if(!this.isDomainAccessAllowed(d,a,o,t))continue;if(r.some((m)=>s.toLowerCase().includes(m))||this.isLinktreeUrl(s))i.push(s)}catch(s){if(this.debug)console.warn(`\uD83D\uDD17 Invalid link found: ${l}`,s?.message)}}return i}extractEmailsFromContent(e){let n=e.replace(/[^\w@.-\s]/g," "),a=/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,o=(c)=>{let u=/^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$/,l=c.split("@")[1]?.toLowerCase(),s=c.split(".").pop()?.toLowerCase();return u.test(c)&&l!==void 0&&s!==void 0&&!this.fileExtensions.includes(`.${s}`)&&!this.isRestrictedDomain(`https://${l}`)},t=(c)=>{return(c.match(a)||[]).filter(o)},i=e.replace(/<[^>]*>/g," "),r=[...t(i),...t(n)];return[...new Set(r)]}isRestrictedDomain(e){try{let n=new URL(e).host.toLowerCase();return this.restrictedDomains.some((a)=>n===a.toLowerCase()||n.endsWith(`.${a.toLowerCase()}`))}catch{return!0}}isLinktreeUrl(e){try{return this.extractRootDomain(e)==="linktr.ee"}catch{return!1}}extractRootDomain(e){try{let a=new URL(e).hostname.toLowerCase();return a.startsWith("www.")?a.slice(4):a}catch{return""}}normalizeUrl(e,n){if(e.startsWith("http://")||e.startsWith("https://"))return e;if(e.startsWith("//"))return`${n.protocol}${e}`;return new URL(e,n.href).href}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function y(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],n=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],a=[];for(let o=0;o<200;o++){let t=e[Math.floor(Math.random()*e.length)],i=n[Math.floor(Math.random()*n.length)],r="";switch(t.name){case"Chrome":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":r=`Mozilla/5.0 (${i}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}a.push(r)}return a}function $(){return["facebook.com","fb.com","messenger.com","instagram.com","threads.net","twitter.com","x.com","linkedin.com","pinterest.com","pin.it","reddit.com","tumblr.com","snapchat.com","tiktok.com","douyin.com","youtube.com","youtu.be","whatsapp.com","telegram.org","t.me","medium.com","quora.com","flickr.com","vimeo.com","vk.com","weibo.com","sina.com.cn","line.me","discord.com","discordapp.com","twitch.tv","meetup.com","nextdoor.com","xing.com","yelp.com","zalo.me","mastodon.social","clubhouse.com","patreon.com","onlyfans.com","douban.com","goodreads.com","soundcloud.com","spotify.com","last.fm","behance.net","dribbble.com","deviantart.com","pixiv.net","slideshare.net","tinder.com","bumble.com","etsy.com","indeed.com","glassdoor.com","monster.com","careerbuilder.com","dice.com","ziprecruiter.com","simplyhired.com","upwork.com","freelancer.com","fiverr.com","stackoverflow.com","stackoverflow.co","angel.co","wellfound.com","quora.com","stackexchange.com","yahoo.com","answers.microsoft.com","askubuntu.com","superuser.com","serverfault.com","mathoverflow.net","xda-developers.com","gamespot.com","ign.com","4chan.org","9gag.com","gizmodo.com","slashdot.org","hacker-news.news","ycombinator.com","producthunt.com","discourse.org","google.com","google.co.uk","google.de","google.fr","google.co.jp","bing.com","yahoo.com","search.yahoo.com","duckduckgo.com","baidu.com","yandex.com","yandex.ru","ask.com","wolframalpha.com","ecosia.org","startpage.com","qwant.com","searx.me","gibiru.com","swisscows.com","gmail.com","googlemail.com","outlook.com","hotmail.com","live.com","msn.com","yahoo.com","ymail.com","aol.com","icloud.com","me.com","mac.com","protonmail.com","pm.me","zoho.com","mail.com","gmx.com","gmx.net","yandex.com","yandex.ru","tutanota.com","tutanota.de","fastmail.com","hushmail.com","mailbox.org","posteo.de","runbox.com","disroot.org","163.com","qq.com","rambler.ru","mail.ru","yelp.com","yelp.ca","yelp.co.uk","yelp.com.au","yellowpages.com","yellowpages.ca","yell.com","tripadvisor.com","tripadvisor.co.uk","tripadvisor.ca","foursquare.com","angieslist.com","bbb.org","manta.com","thumbtack.com","homeadvisor.com","superpages.com","whitepages.com","local.com","citysearch.com","merchantcircle.com","insiderpages.com","kudzu.com","hotfrog.com","buildzoom.com","houzz.com","porch.com","mapquest.com","zagat.com","zomato.com","opentable.com","viator.com","expedia.com","booking.com","airbnb.com","vrbo.com","homeaway.com","craigslist.org","nextdoor.com","patch.com","meetup.com","eventbrite.com","groupon.com","livingsocial.com","gumtree.com","gumtree.com.au","kijiji.ca","leboncoin.fr","finn.no","blocket.se","58.com","dianping.com","tabelog.com","ypcdn.com"]}exports.Scraper=v;exports.CappedSet=x;
1
+ var{parseHTML:w}=require("linkedom");class x extends Set{maxSize;constructor(e){super();this.maxSize=e}add(e){if(this.has(e))return this;if(this.size>=this.maxSize){let a=this.values().next().value;if(a)this.delete(a)}return super.add(e)}}class v{http;httpOptions;onEmailLeads;onEmailDiscovered;debug;discoveredEmails=new x(1e4);userAgents=[];fileExtensions=[];restrictedDomains=$();forbiddenProtocols=["mailto:","tel:","javascript:","data:","sms:","ftp:","file:","irc:","blob:","chrome:","about:","intent:"];constructor(e,a,n,o,t=!1){this.http=e,this.httpOptions=a,this.onEmailLeads=n,this.onEmailDiscovered=o,this.debug=t,this.userAgents=y()}sleep(e){return new Promise((a)=>setTimeout(a,e))}async executeHttp(e,a,n,o,t,i=0){let{getCache:r,saveCache:l,hasUrlInCache:d,saveUrl:c,httpConfig:s={}}=o;if(!e||e.length<3||this.forbiddenProtocols.some((u)=>e.startsWith(u)))return;try{let u=t?!1:await d(e),h=await r(e);if(u&&!h)return!1;if(u&&a!=="GET")return!1;let m=h&&a==="GET"?h:await(a==="GET"?this.http.get(e,s):a==="PATCH"?this.http.patch(e,n,s):a==="POST"?this.http.post(e,n,s):this.http.put(e,n,s));if(!h)await l(e,{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl});if(!u)await c(e);if(!m.contentType||!m.contentType.includes("/html")||!m.contentType.includes("text/")||typeof m.data!=="string")return null;return{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl}}catch(u){let h=u,m=this.httpOptions;if(h&&h.response){let g=h.response.status,p=m.retryDelay||100,b=m.maxRetryAttempts||3,f=m.retryWithoutProxyOnStatusCode||void 0,k=m.maxRetryOnProxyError||3;if(f&&s.proxy&&f.includes(g)&&i<b)return await this.sleep(p),delete s.proxy,await this.executeHttp(e,a,n,o,t,i+1);else if(m.retryOnStatusCode&&s.proxy&&m.retryOnStatusCode.includes(g)&&i<b)return await this.sleep(p),await this.executeHttp(e,a,n,o,t,i+1);else if(m.retryOnProxyError&&s.proxy&&i<k)return await this.sleep(p),await this.executeHttp(e,a,n,o,t,i+1)}if(this.debug){if(this.debug)console.log(`Error: unable to ${a} ${e}: ${u.message}`)}return null}}extractEmails(e,a,n,o,t,i){let r=this.extractEmailsFromContent(e?.replaceAll("mailto:"," ")),l=[];for(let d of r)if(this.handleEmailDiscovery(d,a,n,t,i))l.push(d);if(o&&o.length>0&&l.length>0)t.add(async()=>Promise.all(o.map((d)=>d(l))));r.length=0,l.length=0}async parseExternalWebsite(e,a,n,o,t,i=!0,r,l){let d=o.httpConfig?.headers?o.httpConfig.headers instanceof Headers?Object.fromEntries(o.httpConfig.headers.entries()):o.httpConfig.headers:{};o.httpConfig=o.httpConfig||{},o.httpConfig.headers={"user-agent":this.getRandomUserAgent(),accept:"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8","accept-language":"en-US,en;q=0.9","cache-control":"no-cache",pragma:"no-cache",...d},o.httpConfig.timeout=o.httpConfig.timeout||15000,o.depth=o.depth||0,o.allowCrossDomainTravel=o.allowCrossDomainTravel||!1,t=t&&i;let c=[];try{let s=new URL(e),u=this.extractRootDomain(e);if(this.isLinktreeUrl(e))return await this.parseLinktreeProfile(e,o,t);if(this.isRestrictedDomain(e)){if(this.debug)console.warn(`⚠️ Skipped URL (restricted url): ${e}`);return c}let h=await this.executeHttp(e,a,n,o,t);if(!h){if(this.debug&&h===null)console.warn(`⚠️ Failed to fetch page content: ${e}`);if(this.debug&&h===!1)console.warn(`⚠️ Skipped URL (already visited): ${e}`);return c}let m=this.extractEmailsFromContent(h.data?.replaceAll("mailto:"," "));for(let g of m)if(this.handleEmailDiscovery(g,e,o.onEmailDiscovered,o.queue,o.emailMetadata))c.push(g);if(o.depth>0||!r){let g=w(h.data).document,p=this.extractRelevantLinks(g,s,u,o.depth,o.allowCrossDomainTravel);o.depth--;let b=await Promise.allSettled(p.map((f)=>this.parseExternalWebsite(f,"GET",null,{...o,depth:o.depth},t,!1,!0)));for(let f of b)if(f.status==="fulfilled")c.push(...f.value);else if(this.debug)console.warn("⚠️ Failed to parse child URL:",f.reason?.message)}}catch(s){if(this.debug)console.error(`❌ Error parsing external website: ${e}`,s?.message)}if(i){if(o.onEmails&&o.onEmails.length>0)o.queue.add(async()=>Promise.all(o.onEmails.map((s)=>s(c))))}return c}async parseLinktreeProfile(e,a,n){let o=[];try{let t=await this.executeHttp(e,"GET",null,a,n);if(!t){if(this.debug)console.warn(`⚠️ Failed to fetch Linktree profile: ${e}`);return o}let r=w(t).document.getElementById("links-container");if(!r){if(this.debug)console.warn(`\uD83D\uDD0D No links container found in Linktree profile: ${e}`);return o}let l=this.extractLinktreeExternalUrls(r,e);if(l.length===0){if(this.debug)console.info("\uD83D\uDCED No valid external links found in Linktree profile");return o}if(this.debug)console.info(`\uD83C\uDFAF Found ${l.length} external links in Linktree profile`);let d=await Promise.allSettled(l.map((c)=>this.parseExternalWebsite(c,"GET",null,a,n,!1)));for(let c of d)if(c.status==="fulfilled")o.push(...c.value);else if(this.debug)console.warn("⚠️ Failed to parse Linktree external URL:",c.reason?.message)}catch(t){if(this.debug)console.error(`❌ Error parsing Linktree profile: ${e}`,t?.message)}return o}extractLinktreeExternalUrls(e,a){let n=new Set,o=e.querySelectorAll("a[href][target='_blank']");for(let t of o){let i=t.getAttribute("href");if(!i||i.length<3||this.forbiddenProtocols.some((r)=>i.startsWith(r)))continue;try{let r=new URL(i,a).href,l=this.extractRootDomain(r);if(l!=="linktr.ee"&&!this.isRestrictedDomain(r)&&l.length>3)n.add(r)}catch(r){if(this.debug)console.warn(`\uD83D\uDD17 Invalid URL in Linktree: ${i}`)}}return Array.from(n)}handleEmailDiscovery(e,a,n,o,t){if(!this.discoveredEmails.has(e)){this.discoveredEmails.add(e);let i={email:e,discoveredAt:a,timestamp:new Date,metadata:t||{}};if(n&&n.length>0)o.add(async()=>Promise.all(n.map((r)=>r(i))));if(this.debug)console.info(`\uD83D\uDCE7 New email discovered: ${e} at ${a}`);return!0}return!1}isDomainAccessAllowed(e,a,n,o){if(o)return!0;if(n===0)return e===a;return e===a||e.endsWith(`.${a}`)||a.endsWith(`.${e}`)}extractRelevantLinks(e,a,n,o,t){let i=[],r=["about","contact","help","support","reach","email","mail","message","company","team","staff","info","inquiry","feedback","service","assistance","connect","touch"],l=e.querySelectorAll("a[href]");for(let d of l){let c=d.getAttribute("href");if(!c||c.length<2)continue;try{let s=this.normalizeUrl(c,a),u=this.extractRootDomain(s);if(!this.isDomainAccessAllowed(u,n,o,t))continue;if(r.some((m)=>s.toLowerCase().includes(m))||this.isLinktreeUrl(s))i.push(s)}catch(s){if(this.debug)console.warn(`\uD83D\uDD17 Invalid link found: ${c}`,s?.message)}}return i}extractEmailsFromContent(e){let a=e.replace(/[^\w@.-\s]/g," "),n=/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,o=(l)=>{let d=/^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$/,c=l.split("@")[1]?.toLowerCase(),s=l.split(".").pop()?.toLowerCase();return d.test(l)&&c!==void 0&&s!==void 0&&!this.fileExtensions.includes(`.${s}`)&&!this.isRestrictedDomain(`https://${c}`)},t=(l)=>{return(l.match(n)||[]).filter(o)},i=e.replace(/<[^>]*>/g," "),r=[...t(i),...t(a)];return[...new Set(r)]}isRestrictedDomain(e){try{let a=new URL(e).host.toLowerCase();return this.restrictedDomains.some((n)=>a===n.toLowerCase()||a.endsWith(`.${n.toLowerCase()}`))}catch{return!0}}isLinktreeUrl(e){try{return this.extractRootDomain(e)==="linktr.ee"}catch{return!1}}extractRootDomain(e){try{let n=new URL(e).hostname.toLowerCase();return n.startsWith("www.")?n.slice(4):n}catch{return""}}normalizeUrl(e,a){if(e.startsWith("http://")||e.startsWith("https://"))return e;if(e.startsWith("//"))return`${a.protocol}${e}`;return new URL(e,a.href).href}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function y(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],a=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],n=[];for(let o=0;o<200;o++){let t=e[Math.floor(Math.random()*e.length)],i=a[Math.floor(Math.random()*a.length)],r="";switch(t.name){case"Chrome":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":r=`Mozilla/5.0 (${i}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}n.push(r)}return n}function $(){return["facebook.com","fb.com","messenger.com","instagram.com","threads.net","twitter.com","x.com","linkedin.com","pinterest.com","pin.it","reddit.com","tumblr.com","snapchat.com","tiktok.com","douyin.com","youtube.com","youtu.be","whatsapp.com","telegram.org","t.me","medium.com","quora.com","flickr.com","vimeo.com","vk.com","weibo.com","sina.com.cn","line.me","discord.com","discordapp.com","twitch.tv","meetup.com","nextdoor.com","xing.com","yelp.com","zalo.me","mastodon.social","clubhouse.com","patreon.com","onlyfans.com","douban.com","goodreads.com","soundcloud.com","spotify.com","last.fm","behance.net","dribbble.com","deviantart.com","pixiv.net","slideshare.net","tinder.com","bumble.com","etsy.com","indeed.com","glassdoor.com","monster.com","careerbuilder.com","dice.com","ziprecruiter.com","simplyhired.com","upwork.com","freelancer.com","fiverr.com","stackoverflow.com","stackoverflow.co","angel.co","wellfound.com","quora.com","stackexchange.com","yahoo.com","answers.microsoft.com","askubuntu.com","superuser.com","serverfault.com","mathoverflow.net","xda-developers.com","gamespot.com","ign.com","4chan.org","9gag.com","gizmodo.com","slashdot.org","hacker-news.news","ycombinator.com","producthunt.com","discourse.org","google.com","google.co.uk","google.de","google.fr","google.co.jp","bing.com","yahoo.com","search.yahoo.com","duckduckgo.com","baidu.com","yandex.com","yandex.ru","ask.com","wolframalpha.com","ecosia.org","startpage.com","qwant.com","searx.me","gibiru.com","swisscows.com","gmail.com","googlemail.com","outlook.com","hotmail.com","live.com","msn.com","yahoo.com","ymail.com","aol.com","icloud.com","me.com","mac.com","protonmail.com","pm.me","zoho.com","mail.com","gmx.com","gmx.net","yandex.com","yandex.ru","tutanota.com","tutanota.de","fastmail.com","hushmail.com","mailbox.org","posteo.de","runbox.com","disroot.org","163.com","qq.com","rambler.ru","mail.ru","yelp.com","yelp.ca","yelp.co.uk","yelp.com.au","yellowpages.com","yellowpages.ca","yell.com","tripadvisor.com","tripadvisor.co.uk","tripadvisor.ca","foursquare.com","angieslist.com","bbb.org","manta.com","thumbtack.com","homeadvisor.com","superpages.com","whitepages.com","local.com","citysearch.com","merchantcircle.com","insiderpages.com","kudzu.com","hotfrog.com","buildzoom.com","houzz.com","porch.com","mapquest.com","zagat.com","zomato.com","opentable.com","viator.com","expedia.com","booking.com","airbnb.com","vrbo.com","homeaway.com","craigslist.org","nextdoor.com","patch.com","meetup.com","eventbrite.com","groupon.com","livingsocial.com","gumtree.com","gumtree.com.au","kijiji.ca","leboncoin.fr","finn.no","blocket.se","58.com","dianping.com","tabelog.com","ypcdn.com"]}exports.Scraper=v;exports.CappedSet=x;
@@ -1 +1 @@
1
- import{parseHTML as x}from"linkedom";class w extends Set{maxSize;constructor(e){super();this.maxSize=e}add(e){if(this.has(e))return this;if(this.size>=this.maxSize){let n=this.values().next().value;if(n)this.delete(n)}return super.add(e)}}class k{http;httpOptions;onEmailLeads;onEmailDiscovered;debug;discoveredEmails=new w(1e4);userAgents=[];fileExtensions=[];restrictedDomains=$();forbiddenProtocols=["mailto:","tel:","javascript:","data:","sms:","ftp:","file:","irc:","blob:","chrome:","about:","intent:"];constructor(e,n,a,o,t=!1){this.http=e,this.httpOptions=n,this.onEmailLeads=a,this.onEmailDiscovered=o,this.debug=t,this.userAgents=y()}sleep(e){return new Promise((n)=>setTimeout(n,e))}async executeHttp(e,n,a,o,t,i=0){let{getCache:r,saveCache:c,hasUrlInCache:u,saveUrl:l,httpConfig:s={}}=o;if(!e||e.length<3||this.forbiddenProtocols.some((d)=>e.startsWith(d)))return;try{let d=t?!1:await u(e),h=await r(e);if(d&&!h)return!1;if(d&&n!=="GET")return!1;let m=h&&n==="GET"?h:await(n==="GET"?this.http.get(e,s):n==="PATCH"?this.http.patch(e,a,s):n==="POST"?this.http.post(e,a,s):this.http.put(e,a,s));if(!h)await c(e,{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl});if(!d)await l(e);if(!m.contentType||!m.contentType.includes("/html")||!m.contentType.includes("text/")||typeof m.data!=="string")return null;return{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl}}catch(d){let h=d,m=this.httpOptions;if(h&&h.response){let g=h.response.status,p=m.retryDelay||100,b=m.maxRetryAttempts||3,f=m.retryWithoutProxyOnStatusCode||void 0,v=m.maxRetryOnProxyError||3;if(f&&s.proxy&&f.includes(g)&&i<b)return await this.sleep(p),delete s.proxy,await this.executeHttp(e,n,a,o,t,i+1);else if(m.retryOnStatusCode&&s.proxy&&m.retryOnStatusCode.includes(g)&&i<b)return await this.sleep(p),await this.executeHttp(e,n,a,o,t,i+1);else if(m.retryOnProxyError&&s.proxy&&i<v)return await this.sleep(p),await this.executeHttp(e,n,a,o,t,i+1)}if(this.debug){if(this.debug)console.log(`Error: unable to ${n} ${e}: ${d.message}`)}return null}}extractEmails(e,n,a,o,t){let i=this.extractEmailsFromContent(e?.replaceAll("mailto:"," ")),r=[];for(let c of i)if(this.handleEmailDiscovery(c,n,a,t))r.push(c);if(o&&o.length>0&&r.length>0)t.add(async()=>Promise.all(o.map((c)=>c(r))));i.length=0,r.length=0}async parseExternalWebsite(e,n,a,o,t,i=!0,r,c){let u=o.httpConfig?.headers?o.httpConfig.headers instanceof Headers?Object.fromEntries(o.httpConfig.headers.entries()):o.httpConfig.headers:{};o.httpConfig=o.httpConfig||{},o.httpConfig.headers={"user-agent":this.getRandomUserAgent(),accept:"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8","accept-language":"en-US,en;q=0.9","cache-control":"no-cache",pragma:"no-cache",...u},o.httpConfig.timeout=o.httpConfig.timeout||15000,o.depth=o.depth||0,o.allowCrossDomainTravel=o.allowCrossDomainTravel||!1,t=t&&i;let l=[];try{let s=new URL(e),d=this.extractRootDomain(e);if(this.isLinktreeUrl(e))return await this.parseLinktreeProfile(e,o,t);if(this.isRestrictedDomain(e)){if(this.debug)console.warn(`⚠️ Skipped URL (restricted url): ${e}`);return l}let h=await this.executeHttp(e,n,a,o,t);if(!h){if(this.debug&&h===null)console.warn(`⚠️ Failed to fetch page content: ${e}`);if(this.debug&&h===!1)console.warn(`⚠️ Skipped URL (already visited): ${e}`);return l}let m=this.extractEmailsFromContent(h.data?.replaceAll("mailto:"," "));for(let g of m)if(this.handleEmailDiscovery(g,e,o.onEmailDiscovered,o.queue))l.push(g);if(o.depth>0||!r){let g=x(h.data).document,p=this.extractRelevantLinks(g,s,d,o.depth,o.allowCrossDomainTravel);o.depth--;let b=await Promise.allSettled(p.map((f)=>this.parseExternalWebsite(f,"GET",null,{...o,depth:o.depth},t,!1,!0)));for(let f of b)if(f.status==="fulfilled")l.push(...f.value);else if(this.debug)console.warn("⚠️ Failed to parse child URL:",f.reason?.message)}}catch(s){if(this.debug)console.error(`❌ Error parsing external website: ${e}`,s?.message)}if(i){if(o.onEmails&&o.onEmails.length>0)o.queue.add(async()=>Promise.all(o.onEmails.map((s)=>s(l))))}return l}async parseLinktreeProfile(e,n,a){let o=[];try{let t=await this.executeHttp(e,"GET",null,n,a);if(!t){if(this.debug)console.warn(`⚠️ Failed to fetch Linktree profile: ${e}`);return o}let r=x(t).document.getElementById("links-container");if(!r){if(this.debug)console.warn(`\uD83D\uDD0D No links container found in Linktree profile: ${e}`);return o}let c=this.extractLinktreeExternalUrls(r,e);if(c.length===0){if(this.debug)console.info("\uD83D\uDCED No valid external links found in Linktree profile");return o}if(this.debug)console.info(`\uD83C\uDFAF Found ${c.length} external links in Linktree profile`);let u=await Promise.allSettled(c.map((l)=>this.parseExternalWebsite(l,"GET",null,n,a,!1)));for(let l of u)if(l.status==="fulfilled")o.push(...l.value);else if(this.debug)console.warn("⚠️ Failed to parse Linktree external URL:",l.reason?.message)}catch(t){if(this.debug)console.error(`❌ Error parsing Linktree profile: ${e}`,t?.message)}return o}extractLinktreeExternalUrls(e,n){let a=new Set,o=e.querySelectorAll("a[href][target='_blank']");for(let t of o){let i=t.getAttribute("href");if(!i||i.length<3||this.forbiddenProtocols.some((r)=>i.startsWith(r)))continue;try{let r=new URL(i,n).href,c=this.extractRootDomain(r);if(c!=="linktr.ee"&&!this.isRestrictedDomain(r)&&c.length>3)a.add(r)}catch(r){if(this.debug)console.warn(`\uD83D\uDD17 Invalid URL in Linktree: ${i}`)}}return Array.from(a)}handleEmailDiscovery(e,n,a,o){if(!this.discoveredEmails.has(e)){this.discoveredEmails.add(e);let t={email:e,discoveredAt:n,timestamp:new Date};if(a&&a.length>0)o.add(async()=>Promise.all(a.map((i)=>i(t))));if(this.debug)console.info(`\uD83D\uDCE7 New email discovered: ${e} at ${n}`);return!0}return!1}isDomainAccessAllowed(e,n,a,o){if(o)return!0;if(a===0)return e===n;return e===n||e.endsWith(`.${n}`)||n.endsWith(`.${e}`)}extractRelevantLinks(e,n,a,o,t){let i=[],r=["about","contact","help","support","reach","email","mail","message","company","team","staff","info","inquiry","feedback","service","assistance","connect","touch"],c=e.querySelectorAll("a[href]");for(let u of c){let l=u.getAttribute("href");if(!l||l.length<2)continue;try{let s=this.normalizeUrl(l,n),d=this.extractRootDomain(s);if(!this.isDomainAccessAllowed(d,a,o,t))continue;if(r.some((m)=>s.toLowerCase().includes(m))||this.isLinktreeUrl(s))i.push(s)}catch(s){if(this.debug)console.warn(`\uD83D\uDD17 Invalid link found: ${l}`,s?.message)}}return i}extractEmailsFromContent(e){let n=e.replace(/[^\w@.-\s]/g," "),a=/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,o=(c)=>{let u=/^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$/,l=c.split("@")[1]?.toLowerCase(),s=c.split(".").pop()?.toLowerCase();return u.test(c)&&l!==void 0&&s!==void 0&&!this.fileExtensions.includes(`.${s}`)&&!this.isRestrictedDomain(`https://${l}`)},t=(c)=>{return(c.match(a)||[]).filter(o)},i=e.replace(/<[^>]*>/g," "),r=[...t(i),...t(n)];return[...new Set(r)]}isRestrictedDomain(e){try{let n=new URL(e).host.toLowerCase();return this.restrictedDomains.some((a)=>n===a.toLowerCase()||n.endsWith(`.${a.toLowerCase()}`))}catch{return!0}}isLinktreeUrl(e){try{return this.extractRootDomain(e)==="linktr.ee"}catch{return!1}}extractRootDomain(e){try{let a=new URL(e).hostname.toLowerCase();return a.startsWith("www.")?a.slice(4):a}catch{return""}}normalizeUrl(e,n){if(e.startsWith("http://")||e.startsWith("https://"))return e;if(e.startsWith("//"))return`${n.protocol}${e}`;return new URL(e,n.href).href}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function y(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],n=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],a=[];for(let o=0;o<200;o++){let t=e[Math.floor(Math.random()*e.length)],i=n[Math.floor(Math.random()*n.length)],r="";switch(t.name){case"Chrome":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":r=`Mozilla/5.0 (${i}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}a.push(r)}return a}function $(){return["facebook.com","fb.com","messenger.com","instagram.com","threads.net","twitter.com","x.com","linkedin.com","pinterest.com","pin.it","reddit.com","tumblr.com","snapchat.com","tiktok.com","douyin.com","youtube.com","youtu.be","whatsapp.com","telegram.org","t.me","medium.com","quora.com","flickr.com","vimeo.com","vk.com","weibo.com","sina.com.cn","line.me","discord.com","discordapp.com","twitch.tv","meetup.com","nextdoor.com","xing.com","yelp.com","zalo.me","mastodon.social","clubhouse.com","patreon.com","onlyfans.com","douban.com","goodreads.com","soundcloud.com","spotify.com","last.fm","behance.net","dribbble.com","deviantart.com","pixiv.net","slideshare.net","tinder.com","bumble.com","etsy.com","indeed.com","glassdoor.com","monster.com","careerbuilder.com","dice.com","ziprecruiter.com","simplyhired.com","upwork.com","freelancer.com","fiverr.com","stackoverflow.com","stackoverflow.co","angel.co","wellfound.com","quora.com","stackexchange.com","yahoo.com","answers.microsoft.com","askubuntu.com","superuser.com","serverfault.com","mathoverflow.net","xda-developers.com","gamespot.com","ign.com","4chan.org","9gag.com","gizmodo.com","slashdot.org","hacker-news.news","ycombinator.com","producthunt.com","discourse.org","google.com","google.co.uk","google.de","google.fr","google.co.jp","bing.com","yahoo.com","search.yahoo.com","duckduckgo.com","baidu.com","yandex.com","yandex.ru","ask.com","wolframalpha.com","ecosia.org","startpage.com","qwant.com","searx.me","gibiru.com","swisscows.com","gmail.com","googlemail.com","outlook.com","hotmail.com","live.com","msn.com","yahoo.com","ymail.com","aol.com","icloud.com","me.com","mac.com","protonmail.com","pm.me","zoho.com","mail.com","gmx.com","gmx.net","yandex.com","yandex.ru","tutanota.com","tutanota.de","fastmail.com","hushmail.com","mailbox.org","posteo.de","runbox.com","disroot.org","163.com","qq.com","rambler.ru","mail.ru","yelp.com","yelp.ca","yelp.co.uk","yelp.com.au","yellowpages.com","yellowpages.ca","yell.com","tripadvisor.com","tripadvisor.co.uk","tripadvisor.ca","foursquare.com","angieslist.com","bbb.org","manta.com","thumbtack.com","homeadvisor.com","superpages.com","whitepages.com","local.com","citysearch.com","merchantcircle.com","insiderpages.com","kudzu.com","hotfrog.com","buildzoom.com","houzz.com","porch.com","mapquest.com","zagat.com","zomato.com","opentable.com","viator.com","expedia.com","booking.com","airbnb.com","vrbo.com","homeaway.com","craigslist.org","nextdoor.com","patch.com","meetup.com","eventbrite.com","groupon.com","livingsocial.com","gumtree.com","gumtree.com.au","kijiji.ca","leboncoin.fr","finn.no","blocket.se","58.com","dianping.com","tabelog.com","ypcdn.com"]}export{k as Scraper,w as CappedSet};
1
+ import{parseHTML as x}from"linkedom";class w extends Set{maxSize;constructor(e){super();this.maxSize=e}add(e){if(this.has(e))return this;if(this.size>=this.maxSize){let a=this.values().next().value;if(a)this.delete(a)}return super.add(e)}}class k{http;httpOptions;onEmailLeads;onEmailDiscovered;debug;discoveredEmails=new w(1e4);userAgents=[];fileExtensions=[];restrictedDomains=$();forbiddenProtocols=["mailto:","tel:","javascript:","data:","sms:","ftp:","file:","irc:","blob:","chrome:","about:","intent:"];constructor(e,a,n,o,t=!1){this.http=e,this.httpOptions=a,this.onEmailLeads=n,this.onEmailDiscovered=o,this.debug=t,this.userAgents=y()}sleep(e){return new Promise((a)=>setTimeout(a,e))}async executeHttp(e,a,n,o,t,i=0){let{getCache:r,saveCache:l,hasUrlInCache:d,saveUrl:c,httpConfig:s={}}=o;if(!e||e.length<3||this.forbiddenProtocols.some((u)=>e.startsWith(u)))return;try{let u=t?!1:await d(e),h=await r(e);if(u&&!h)return!1;if(u&&a!=="GET")return!1;let m=h&&a==="GET"?h:await(a==="GET"?this.http.get(e,s):a==="PATCH"?this.http.patch(e,n,s):a==="POST"?this.http.post(e,n,s):this.http.put(e,n,s));if(!h)await l(e,{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl});if(!u)await c(e);if(!m.contentType||!m.contentType.includes("/html")||!m.contentType.includes("text/")||typeof m.data!=="string")return null;return{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl}}catch(u){let h=u,m=this.httpOptions;if(h&&h.response){let g=h.response.status,p=m.retryDelay||100,b=m.maxRetryAttempts||3,f=m.retryWithoutProxyOnStatusCode||void 0,v=m.maxRetryOnProxyError||3;if(f&&s.proxy&&f.includes(g)&&i<b)return await this.sleep(p),delete s.proxy,await this.executeHttp(e,a,n,o,t,i+1);else if(m.retryOnStatusCode&&s.proxy&&m.retryOnStatusCode.includes(g)&&i<b)return await this.sleep(p),await this.executeHttp(e,a,n,o,t,i+1);else if(m.retryOnProxyError&&s.proxy&&i<v)return await this.sleep(p),await this.executeHttp(e,a,n,o,t,i+1)}if(this.debug){if(this.debug)console.log(`Error: unable to ${a} ${e}: ${u.message}`)}return null}}extractEmails(e,a,n,o,t,i){let r=this.extractEmailsFromContent(e?.replaceAll("mailto:"," ")),l=[];for(let d of r)if(this.handleEmailDiscovery(d,a,n,t,i))l.push(d);if(o&&o.length>0&&l.length>0)t.add(async()=>Promise.all(o.map((d)=>d(l))));r.length=0,l.length=0}async parseExternalWebsite(e,a,n,o,t,i=!0,r,l){let d=o.httpConfig?.headers?o.httpConfig.headers instanceof Headers?Object.fromEntries(o.httpConfig.headers.entries()):o.httpConfig.headers:{};o.httpConfig=o.httpConfig||{},o.httpConfig.headers={"user-agent":this.getRandomUserAgent(),accept:"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8","accept-language":"en-US,en;q=0.9","cache-control":"no-cache",pragma:"no-cache",...d},o.httpConfig.timeout=o.httpConfig.timeout||15000,o.depth=o.depth||0,o.allowCrossDomainTravel=o.allowCrossDomainTravel||!1,t=t&&i;let c=[];try{let s=new URL(e),u=this.extractRootDomain(e);if(this.isLinktreeUrl(e))return await this.parseLinktreeProfile(e,o,t);if(this.isRestrictedDomain(e)){if(this.debug)console.warn(`⚠️ Skipped URL (restricted url): ${e}`);return c}let h=await this.executeHttp(e,a,n,o,t);if(!h){if(this.debug&&h===null)console.warn(`⚠️ Failed to fetch page content: ${e}`);if(this.debug&&h===!1)console.warn(`⚠️ Skipped URL (already visited): ${e}`);return c}let m=this.extractEmailsFromContent(h.data?.replaceAll("mailto:"," "));for(let g of m)if(this.handleEmailDiscovery(g,e,o.onEmailDiscovered,o.queue,o.emailMetadata))c.push(g);if(o.depth>0||!r){let g=x(h.data).document,p=this.extractRelevantLinks(g,s,u,o.depth,o.allowCrossDomainTravel);o.depth--;let b=await Promise.allSettled(p.map((f)=>this.parseExternalWebsite(f,"GET",null,{...o,depth:o.depth},t,!1,!0)));for(let f of b)if(f.status==="fulfilled")c.push(...f.value);else if(this.debug)console.warn("⚠️ Failed to parse child URL:",f.reason?.message)}}catch(s){if(this.debug)console.error(`❌ Error parsing external website: ${e}`,s?.message)}if(i){if(o.onEmails&&o.onEmails.length>0)o.queue.add(async()=>Promise.all(o.onEmails.map((s)=>s(c))))}return c}async parseLinktreeProfile(e,a,n){let o=[];try{let t=await this.executeHttp(e,"GET",null,a,n);if(!t){if(this.debug)console.warn(`⚠️ Failed to fetch Linktree profile: ${e}`);return o}let r=x(t).document.getElementById("links-container");if(!r){if(this.debug)console.warn(`\uD83D\uDD0D No links container found in Linktree profile: ${e}`);return o}let l=this.extractLinktreeExternalUrls(r,e);if(l.length===0){if(this.debug)console.info("\uD83D\uDCED No valid external links found in Linktree profile");return o}if(this.debug)console.info(`\uD83C\uDFAF Found ${l.length} external links in Linktree profile`);let d=await Promise.allSettled(l.map((c)=>this.parseExternalWebsite(c,"GET",null,a,n,!1)));for(let c of d)if(c.status==="fulfilled")o.push(...c.value);else if(this.debug)console.warn("⚠️ Failed to parse Linktree external URL:",c.reason?.message)}catch(t){if(this.debug)console.error(`❌ Error parsing Linktree profile: ${e}`,t?.message)}return o}extractLinktreeExternalUrls(e,a){let n=new Set,o=e.querySelectorAll("a[href][target='_blank']");for(let t of o){let i=t.getAttribute("href");if(!i||i.length<3||this.forbiddenProtocols.some((r)=>i.startsWith(r)))continue;try{let r=new URL(i,a).href,l=this.extractRootDomain(r);if(l!=="linktr.ee"&&!this.isRestrictedDomain(r)&&l.length>3)n.add(r)}catch(r){if(this.debug)console.warn(`\uD83D\uDD17 Invalid URL in Linktree: ${i}`)}}return Array.from(n)}handleEmailDiscovery(e,a,n,o,t){if(!this.discoveredEmails.has(e)){this.discoveredEmails.add(e);let i={email:e,discoveredAt:a,timestamp:new Date,metadata:t||{}};if(n&&n.length>0)o.add(async()=>Promise.all(n.map((r)=>r(i))));if(this.debug)console.info(`\uD83D\uDCE7 New email discovered: ${e} at ${a}`);return!0}return!1}isDomainAccessAllowed(e,a,n,o){if(o)return!0;if(n===0)return e===a;return e===a||e.endsWith(`.${a}`)||a.endsWith(`.${e}`)}extractRelevantLinks(e,a,n,o,t){let i=[],r=["about","contact","help","support","reach","email","mail","message","company","team","staff","info","inquiry","feedback","service","assistance","connect","touch"],l=e.querySelectorAll("a[href]");for(let d of l){let c=d.getAttribute("href");if(!c||c.length<2)continue;try{let s=this.normalizeUrl(c,a),u=this.extractRootDomain(s);if(!this.isDomainAccessAllowed(u,n,o,t))continue;if(r.some((m)=>s.toLowerCase().includes(m))||this.isLinktreeUrl(s))i.push(s)}catch(s){if(this.debug)console.warn(`\uD83D\uDD17 Invalid link found: ${c}`,s?.message)}}return i}extractEmailsFromContent(e){let a=e.replace(/[^\w@.-\s]/g," "),n=/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,o=(l)=>{let d=/^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$/,c=l.split("@")[1]?.toLowerCase(),s=l.split(".").pop()?.toLowerCase();return d.test(l)&&c!==void 0&&s!==void 0&&!this.fileExtensions.includes(`.${s}`)&&!this.isRestrictedDomain(`https://${c}`)},t=(l)=>{return(l.match(n)||[]).filter(o)},i=e.replace(/<[^>]*>/g," "),r=[...t(i),...t(a)];return[...new Set(r)]}isRestrictedDomain(e){try{let a=new URL(e).host.toLowerCase();return this.restrictedDomains.some((n)=>a===n.toLowerCase()||a.endsWith(`.${n.toLowerCase()}`))}catch{return!0}}isLinktreeUrl(e){try{return this.extractRootDomain(e)==="linktr.ee"}catch{return!1}}extractRootDomain(e){try{let n=new URL(e).hostname.toLowerCase();return n.startsWith("www.")?n.slice(4):n}catch{return""}}normalizeUrl(e,a){if(e.startsWith("http://")||e.startsWith("https://"))return e;if(e.startsWith("//"))return`${a.protocol}${e}`;return new URL(e,a.href).href}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function y(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],a=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],n=[];for(let o=0;o<200;o++){let t=e[Math.floor(Math.random()*e.length)],i=a[Math.floor(Math.random()*a.length)],r="";switch(t.name){case"Chrome":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":r=`Mozilla/5.0 (${i}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}n.push(r)}return n}function $(){return["facebook.com","fb.com","messenger.com","instagram.com","threads.net","twitter.com","x.com","linkedin.com","pinterest.com","pin.it","reddit.com","tumblr.com","snapchat.com","tiktok.com","douyin.com","youtube.com","youtu.be","whatsapp.com","telegram.org","t.me","medium.com","quora.com","flickr.com","vimeo.com","vk.com","weibo.com","sina.com.cn","line.me","discord.com","discordapp.com","twitch.tv","meetup.com","nextdoor.com","xing.com","yelp.com","zalo.me","mastodon.social","clubhouse.com","patreon.com","onlyfans.com","douban.com","goodreads.com","soundcloud.com","spotify.com","last.fm","behance.net","dribbble.com","deviantart.com","pixiv.net","slideshare.net","tinder.com","bumble.com","etsy.com","indeed.com","glassdoor.com","monster.com","careerbuilder.com","dice.com","ziprecruiter.com","simplyhired.com","upwork.com","freelancer.com","fiverr.com","stackoverflow.com","stackoverflow.co","angel.co","wellfound.com","quora.com","stackexchange.com","yahoo.com","answers.microsoft.com","askubuntu.com","superuser.com","serverfault.com","mathoverflow.net","xda-developers.com","gamespot.com","ign.com","4chan.org","9gag.com","gizmodo.com","slashdot.org","hacker-news.news","ycombinator.com","producthunt.com","discourse.org","google.com","google.co.uk","google.de","google.fr","google.co.jp","bing.com","yahoo.com","search.yahoo.com","duckduckgo.com","baidu.com","yandex.com","yandex.ru","ask.com","wolframalpha.com","ecosia.org","startpage.com","qwant.com","searx.me","gibiru.com","swisscows.com","gmail.com","googlemail.com","outlook.com","hotmail.com","live.com","msn.com","yahoo.com","ymail.com","aol.com","icloud.com","me.com","mac.com","protonmail.com","pm.me","zoho.com","mail.com","gmx.com","gmx.net","yandex.com","yandex.ru","tutanota.com","tutanota.de","fastmail.com","hushmail.com","mailbox.org","posteo.de","runbox.com","disroot.org","163.com","qq.com","rambler.ru","mail.ru","yelp.com","yelp.ca","yelp.co.uk","yelp.com.au","yellowpages.com","yellowpages.ca","yell.com","tripadvisor.com","tripadvisor.co.uk","tripadvisor.ca","foursquare.com","angieslist.com","bbb.org","manta.com","thumbtack.com","homeadvisor.com","superpages.com","whitepages.com","local.com","citysearch.com","merchantcircle.com","insiderpages.com","kudzu.com","hotfrog.com","buildzoom.com","houzz.com","porch.com","mapquest.com","zagat.com","zomato.com","opentable.com","viator.com","expedia.com","booking.com","airbnb.com","vrbo.com","homeaway.com","craigslist.org","nextdoor.com","patch.com","meetup.com","eventbrite.com","groupon.com","livingsocial.com","gumtree.com","gumtree.com.au","kijiji.ca","leboncoin.fr","finn.no","blocket.se","58.com","dianping.com","tabelog.com","ypcdn.com"]}export{k as Scraper,w as CappedSet};
package/dist/crawler.d.ts CHANGED
@@ -6980,10 +6980,11 @@ export declare class CrawlerOptions {
6980
6980
  */
6981
6981
  private getRandomUserAgent;
6982
6982
  }
6983
- export interface EmailDiscoveryEvent {
6983
+ export interface EmailDiscoveryEvent<T = Record<string, any>> {
6984
6984
  email: string;
6985
6985
  discoveredAt: string;
6986
6986
  timestamp: Date;
6987
+ metadata: T;
6987
6988
  }
6988
6989
  interface RedirectEvent$1 {
6989
6990
  originalUrl: string;
@@ -7259,7 +7260,7 @@ export declare class Crawler {
7259
7260
  * });
7260
7261
  * ```
7261
7262
  */
7262
- onEmailDiscovered(handler: (email: EmailDiscoveryEvent) => Promise<void>): Crawler;
7263
+ onEmailDiscovered<T = Record<string, any>>(handler: (email: EmailDiscoveryEvent<T>) => Promise<void>): Crawler;
7263
7264
  /**
7264
7265
  * Registers a handler for bulk email leads discovery.
7265
7266
  * Triggered when multiple email addresses are found and processed.
@@ -7688,6 +7689,7 @@ export declare class Crawler {
7688
7689
  useOxylabsRotation?: boolean;
7689
7690
  useDecodo?: boolean;
7690
7691
  skipCache?: boolean;
7692
+ emailMetadata?: Record<string, any>;
7691
7693
  }): Crawler;
7692
7694
  private execute;
7693
7695
  private execute2;
@@ -1,4 +1,4 @@
1
- const _mod_w4kkhv = require('../crawler/crawler.cjs');
2
- exports.Crawler = _mod_w4kkhv.Crawler;;
3
- const _mod_fiyx3u = require('../crawler/crawler-options.cjs');
4
- exports.CrawlerOptions = _mod_fiyx3u.CrawlerOptions;;
1
+ const _mod_zoj9w3 = require('../crawler/crawler.cjs');
2
+ exports.Crawler = _mod_zoj9w3.Crawler;;
3
+ const _mod_hxjem3 = require('../crawler/crawler-options.cjs');
4
+ exports.CrawlerOptions = _mod_hxjem3.CrawlerOptions;;
package/dist/index.cjs CHANGED
@@ -1,30 +1,30 @@
1
- const _mod_7y7owb = require('./core/rezo.cjs');
2
- exports.Rezo = _mod_7y7owb.Rezo;
3
- exports.createRezoInstance = _mod_7y7owb.createRezoInstance;
4
- exports.createDefaultInstance = _mod_7y7owb.createDefaultInstance;;
5
- const _mod_m2gp9k = require('./errors/rezo-error.cjs');
6
- exports.RezoError = _mod_m2gp9k.RezoError;
7
- exports.RezoErrorCode = _mod_m2gp9k.RezoErrorCode;;
8
- const _mod_1kei6w = require('./utils/headers.cjs');
9
- exports.RezoHeaders = _mod_1kei6w.RezoHeaders;;
10
- const _mod_otsozo = require('./utils/form-data.cjs');
11
- exports.RezoFormData = _mod_otsozo.RezoFormData;;
12
- const _mod_i1qoqb = require('./utils/cookies.cjs');
13
- exports.RezoCookieJar = _mod_i1qoqb.RezoCookieJar;
14
- exports.Cookie = _mod_i1qoqb.Cookie;;
15
- const _mod_l234vd = require('./utils/curl.cjs');
16
- exports.toCurl = _mod_l234vd.toCurl;
17
- exports.fromCurl = _mod_l234vd.fromCurl;;
18
- const _mod_4xnhr8 = require('./core/hooks.cjs');
19
- exports.createDefaultHooks = _mod_4xnhr8.createDefaultHooks;
20
- exports.mergeHooks = _mod_4xnhr8.mergeHooks;;
21
- const _mod_mibvuq = require('./proxy/manager.cjs');
22
- exports.ProxyManager = _mod_mibvuq.ProxyManager;;
23
- const _mod_cye7iw = require('./queue/index.cjs');
24
- exports.RezoQueue = _mod_cye7iw.RezoQueue;
25
- exports.HttpQueue = _mod_cye7iw.HttpQueue;
26
- exports.Priority = _mod_cye7iw.Priority;
27
- exports.HttpMethodPriority = _mod_cye7iw.HttpMethodPriority;;
1
+ const _mod_p3krsk = require('./core/rezo.cjs');
2
+ exports.Rezo = _mod_p3krsk.Rezo;
3
+ exports.createRezoInstance = _mod_p3krsk.createRezoInstance;
4
+ exports.createDefaultInstance = _mod_p3krsk.createDefaultInstance;;
5
+ const _mod_nycqy3 = require('./errors/rezo-error.cjs');
6
+ exports.RezoError = _mod_nycqy3.RezoError;
7
+ exports.RezoErrorCode = _mod_nycqy3.RezoErrorCode;;
8
+ const _mod_0cy8uv = require('./utils/headers.cjs');
9
+ exports.RezoHeaders = _mod_0cy8uv.RezoHeaders;;
10
+ const _mod_70z3jb = require('./utils/form-data.cjs');
11
+ exports.RezoFormData = _mod_70z3jb.RezoFormData;;
12
+ const _mod_nh98e4 = require('./utils/cookies.cjs');
13
+ exports.RezoCookieJar = _mod_nh98e4.RezoCookieJar;
14
+ exports.Cookie = _mod_nh98e4.Cookie;;
15
+ const _mod_8ffkly = require('./utils/curl.cjs');
16
+ exports.toCurl = _mod_8ffkly.toCurl;
17
+ exports.fromCurl = _mod_8ffkly.fromCurl;;
18
+ const _mod_b04yh2 = require('./core/hooks.cjs');
19
+ exports.createDefaultHooks = _mod_b04yh2.createDefaultHooks;
20
+ exports.mergeHooks = _mod_b04yh2.mergeHooks;;
21
+ const _mod_t2dau4 = require('./proxy/manager.cjs');
22
+ exports.ProxyManager = _mod_t2dau4.ProxyManager;;
23
+ const _mod_v8kv6x = require('./queue/index.cjs');
24
+ exports.RezoQueue = _mod_v8kv6x.RezoQueue;
25
+ exports.HttpQueue = _mod_v8kv6x.HttpQueue;
26
+ exports.Priority = _mod_v8kv6x.Priority;
27
+ exports.HttpMethodPriority = _mod_v8kv6x.HttpMethodPriority;;
28
28
  const { RezoError } = require('./errors/rezo-error.cjs');
29
29
  const isRezoError = exports.isRezoError = RezoError.isRezoError;
30
30
  const Cancel = exports.Cancel = RezoError;
@@ -1,10 +1,10 @@
1
- const _mod_5wmaq1 = require('./base.cjs');
2
- exports.Agent = _mod_5wmaq1.Agent;;
3
- const _mod_rekzdg = require('./http-proxy.cjs');
4
- exports.HttpProxyAgent = _mod_rekzdg.HttpProxyAgent;;
5
- const _mod_pstipg = require('./https-proxy.cjs');
6
- exports.HttpsProxyAgent = _mod_pstipg.HttpsProxyAgent;;
7
- const _mod_haz0pb = require('./socks-proxy.cjs');
8
- exports.SocksProxyAgent = _mod_haz0pb.SocksProxyAgent;;
9
- const _mod_mxslm0 = require('./socks-client.cjs');
10
- exports.SocksClient = _mod_mxslm0.SocksClient;;
1
+ const _mod_i74201 = require('./base.cjs');
2
+ exports.Agent = _mod_i74201.Agent;;
3
+ const _mod_zuse17 = require('./http-proxy.cjs');
4
+ exports.HttpProxyAgent = _mod_zuse17.HttpProxyAgent;;
5
+ const _mod_jal5o3 = require('./https-proxy.cjs');
6
+ exports.HttpsProxyAgent = _mod_jal5o3.HttpsProxyAgent;;
7
+ const _mod_qhpd82 = require('./socks-proxy.cjs');
8
+ exports.SocksProxyAgent = _mod_qhpd82.SocksProxyAgent;;
9
+ const _mod_qis1ax = require('./socks-client.cjs');
10
+ exports.SocksClient = _mod_qis1ax.SocksClient;;
@@ -1,9 +1,9 @@
1
1
  const { Agent, HttpProxyAgent, HttpsProxyAgent, SocksProxyAgent } = require('../internal/agents/index.cjs');
2
2
  const { parseProxyString } = require('./parse.cjs');
3
- const _mod_0p8pms = require('./manager.cjs');
4
- exports.ProxyManager = _mod_0p8pms.ProxyManager;;
5
- const _mod_t2470u = require('./parse.cjs');
6
- exports.parseProxyString = _mod_t2470u.parseProxyString;;
3
+ const _mod_8l1pku = require('./manager.cjs');
4
+ exports.ProxyManager = _mod_8l1pku.ProxyManager;;
5
+ const _mod_ampgfr = require('./parse.cjs');
6
+ exports.parseProxyString = _mod_ampgfr.parseProxyString;;
7
7
  function createOptions(uri, opts) {
8
8
  if (uri instanceof URL || typeof uri === "string") {
9
9
  return {
@@ -1,8 +1,8 @@
1
- const _mod_sm75pf = require('./queue.cjs');
2
- exports.RezoQueue = _mod_sm75pf.RezoQueue;;
3
- const _mod_jkxs9z = require('./http-queue.cjs');
4
- exports.HttpQueue = _mod_jkxs9z.HttpQueue;
5
- exports.extractDomain = _mod_jkxs9z.extractDomain;;
6
- const _mod_3nhfhq = require('./types.cjs');
7
- exports.Priority = _mod_3nhfhq.Priority;
8
- exports.HttpMethodPriority = _mod_3nhfhq.HttpMethodPriority;;
1
+ const _mod_3rd9kt = require('./queue.cjs');
2
+ exports.RezoQueue = _mod_3rd9kt.RezoQueue;;
3
+ const _mod_yfrbwq = require('./http-queue.cjs');
4
+ exports.HttpQueue = _mod_yfrbwq.HttpQueue;
5
+ exports.extractDomain = _mod_yfrbwq.extractDomain;;
6
+ const _mod_2qwp27 = require('./types.cjs');
7
+ exports.Priority = _mod_2qwp27.Priority;
8
+ exports.HttpMethodPriority = _mod_2qwp27.HttpMethodPriority;;
@@ -1,11 +1,11 @@
1
- const _mod_ilaezh = require('./event-emitter.cjs');
2
- exports.UniversalEventEmitter = _mod_ilaezh.UniversalEventEmitter;;
3
- const _mod_vh2ew1 = require('./stream.cjs');
4
- exports.UniversalStreamResponse = _mod_vh2ew1.UniversalStreamResponse;
5
- exports.StreamResponse = _mod_vh2ew1.StreamResponse;;
6
- const _mod_rsdt23 = require('./download.cjs');
7
- exports.UniversalDownloadResponse = _mod_rsdt23.UniversalDownloadResponse;
8
- exports.DownloadResponse = _mod_rsdt23.DownloadResponse;;
9
- const _mod_15456i = require('./upload.cjs');
10
- exports.UniversalUploadResponse = _mod_15456i.UniversalUploadResponse;
11
- exports.UploadResponse = _mod_15456i.UploadResponse;;
1
+ const _mod_v16hwz = require('./event-emitter.cjs');
2
+ exports.UniversalEventEmitter = _mod_v16hwz.UniversalEventEmitter;;
3
+ const _mod_495yjz = require('./stream.cjs');
4
+ exports.UniversalStreamResponse = _mod_495yjz.UniversalStreamResponse;
5
+ exports.StreamResponse = _mod_495yjz.StreamResponse;;
6
+ const _mod_rc9q4g = require('./download.cjs');
7
+ exports.UniversalDownloadResponse = _mod_rc9q4g.UniversalDownloadResponse;
8
+ exports.DownloadResponse = _mod_rc9q4g.DownloadResponse;;
9
+ const _mod_q7jgxt = require('./upload.cjs');
10
+ exports.UniversalUploadResponse = _mod_q7jgxt.UniversalUploadResponse;
11
+ exports.UploadResponse = _mod_q7jgxt.UploadResponse;;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "rezo",
3
- "version": "1.0.64",
3
+ "version": "1.0.66",
4
4
  "description": "Lightning-fast, enterprise-grade HTTP client for modern JavaScript. Full HTTP/2 support, intelligent cookie management, multiple adapters (HTTP, Fetch, cURL, XHR), streaming, proxy support (HTTP/HTTPS/SOCKS), and cross-environment compatibility.",
5
5
  "main": "dist/index.cjs",
6
6
  "module": "dist/index.js",