rezo 1.0.64 → 1.0.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- const _mod_4c0peh = require('./picker.cjs');
2
- exports.detectRuntime = _mod_4c0peh.detectRuntime;
3
- exports.getAdapterCapabilities = _mod_4c0peh.getAdapterCapabilities;
4
- exports.buildAdapterContext = _mod_4c0peh.buildAdapterContext;
5
- exports.getAvailableAdapters = _mod_4c0peh.getAvailableAdapters;
6
- exports.selectAdapter = _mod_4c0peh.selectAdapter;;
1
+ const _mod_zprug4 = require('./picker.cjs');
2
+ exports.detectRuntime = _mod_zprug4.detectRuntime;
3
+ exports.getAdapterCapabilities = _mod_zprug4.getAdapterCapabilities;
4
+ exports.buildAdapterContext = _mod_zprug4.buildAdapterContext;
5
+ exports.getAvailableAdapters = _mod_zprug4.getAvailableAdapters;
6
+ exports.selectAdapter = _mod_zprug4.selectAdapter;;
@@ -1,9 +1,9 @@
1
- const _mod_z313xk = require('./lru-cache.cjs');
2
- exports.LRUCache = _mod_z313xk.LRUCache;;
3
- const _mod_s9k0nm = require('./dns-cache.cjs');
4
- exports.DNSCache = _mod_s9k0nm.DNSCache;
5
- exports.getGlobalDNSCache = _mod_s9k0nm.getGlobalDNSCache;
6
- exports.resetGlobalDNSCache = _mod_s9k0nm.resetGlobalDNSCache;;
7
- const _mod_mk2k1e = require('./response-cache.cjs');
8
- exports.ResponseCache = _mod_mk2k1e.ResponseCache;
9
- exports.normalizeResponseCacheConfig = _mod_mk2k1e.normalizeResponseCacheConfig;;
1
+ const _mod_0xjclg = require('./lru-cache.cjs');
2
+ exports.LRUCache = _mod_0xjclg.LRUCache;;
3
+ const _mod_epwbuk = require('./dns-cache.cjs');
4
+ exports.DNSCache = _mod_epwbuk.DNSCache;
5
+ exports.getGlobalDNSCache = _mod_epwbuk.getGlobalDNSCache;
6
+ exports.resetGlobalDNSCache = _mod_epwbuk.resetGlobalDNSCache;;
7
+ const _mod_ukilsx = require('./response-cache.cjs');
8
+ exports.ResponseCache = _mod_ukilsx.ResponseCache;
9
+ exports.normalizeResponseCacheConfig = _mod_ukilsx.normalizeResponseCacheConfig;;
@@ -834,7 +834,8 @@ class Crawler {
834
834
  useOxylabsScraperAi = false,
835
835
  useOxylabsRotation = true,
836
836
  useDecodo = false,
837
- skipCache = false
837
+ skipCache = false,
838
+ emailMetadata = {}
838
839
  } = options || {};
839
840
  const _options = {
840
841
  headers: this.config.pickHeaders(url, true, headers, true),
@@ -872,7 +873,7 @@ class Crawler {
872
873
  this.addToNavigationQueue(url, method, body, headersObj);
873
874
  }
874
875
  if (deepEmailFinder) {
875
- const p = this.execute2(method, url, body, _options, forceRevisit);
876
+ const p = this.execute2(method, url, body, _options, forceRevisit, emailMetadata);
876
877
  this.pendingExecutions.add(p);
877
878
  p.finally(() => this.pendingExecutions.delete(p));
878
879
  return this;
@@ -893,7 +894,7 @@ class Crawler {
893
894
  const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, 0, undefined, skipCache));
894
895
  task.finally(() => this.pendingExecutions.delete(task));
895
896
  }
896
- async execute2(method, url, body, options = {}, forceRevisit) {
897
+ async execute2(method, url, body, options = {}, forceRevisit, emailMetadata) {
897
898
  await this.waitForStorage();
898
899
  if (this.isCacheEnabled) {
899
900
  await this.waitForCache();
@@ -911,10 +912,11 @@ class Crawler {
911
912
  onEmails: this.emailLeadsEvents,
912
913
  queue: this.queue,
913
914
  depth: 1,
914
- allowCrossDomainTravel: true
915
+ allowCrossDomainTravel: true,
916
+ emailMetadata
915
917
  }, forceRevisit, true)).then();
916
918
  }
917
- async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl, skipCache) {
919
+ async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl, skipCache, emailMetadata) {
918
920
  try {
919
921
  await this.triggerStartHandlers();
920
922
  const limitCheck = await this.checkCrawlLimits(url, parentUrl);
@@ -984,7 +986,7 @@ class Crawler {
984
986
  });
985
987
  if (res.contentType && res.contentType.includes("/json")) {
986
988
  if (this.emailDiscoveredEvents.length > 0 || this.emailLeadsEvents.length > 0) {
987
- this.leadsFinder.extractEmails(JSON.stringify(res.data), res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue);
989
+ this.leadsFinder.extractEmails(JSON.stringify(res.data), res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue, emailMetadata);
988
990
  }
989
991
  for (let i = 0;i < this.jsonEvents.length; i++) {
990
992
  const event = this.jsonEvents[i];
@@ -999,7 +1001,7 @@ class Crawler {
999
1001
  if (!res.contentType || !res.contentType.includes("/html") || typeof res.data !== "string")
1000
1002
  return;
1001
1003
  if ((this.emailDiscoveredEvents.length > 0 || this.emailLeadsEvents.length > 0) && isEmail) {
1002
- this.leadsFinder.extractEmails(res.data, res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue);
1004
+ this.leadsFinder.extractEmails(res.data, res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue, emailMetadata);
1003
1005
  }
1004
1006
  const { document } = parseHTML(res.data.addBaseUrl(res.finalUrl));
1005
1007
  document.URL = res.finalUrl;
@@ -834,7 +834,8 @@ export class Crawler {
834
834
  useOxylabsScraperAi = false,
835
835
  useOxylabsRotation = true,
836
836
  useDecodo = false,
837
- skipCache = false
837
+ skipCache = false,
838
+ emailMetadata = {}
838
839
  } = options || {};
839
840
  const _options = {
840
841
  headers: this.config.pickHeaders(url, true, headers, true),
@@ -872,7 +873,7 @@ export class Crawler {
872
873
  this.addToNavigationQueue(url, method, body, headersObj);
873
874
  }
874
875
  if (deepEmailFinder) {
875
- const p = this.execute2(method, url, body, _options, forceRevisit);
876
+ const p = this.execute2(method, url, body, _options, forceRevisit, emailMetadata);
876
877
  this.pendingExecutions.add(p);
877
878
  p.finally(() => this.pendingExecutions.delete(p));
878
879
  return this;
@@ -893,7 +894,7 @@ export class Crawler {
893
894
  const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, 0, undefined, skipCache));
894
895
  task.finally(() => this.pendingExecutions.delete(task));
895
896
  }
896
- async execute2(method, url, body, options = {}, forceRevisit) {
897
+ async execute2(method, url, body, options = {}, forceRevisit, emailMetadata) {
897
898
  await this.waitForStorage();
898
899
  if (this.isCacheEnabled) {
899
900
  await this.waitForCache();
@@ -911,10 +912,11 @@ export class Crawler {
911
912
  onEmails: this.emailLeadsEvents,
912
913
  queue: this.queue,
913
914
  depth: 1,
914
- allowCrossDomainTravel: true
915
+ allowCrossDomainTravel: true,
916
+ emailMetadata
915
917
  }, forceRevisit, true)).then();
916
918
  }
917
- async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl, skipCache) {
919
+ async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl, skipCache, emailMetadata) {
918
920
  try {
919
921
  await this.triggerStartHandlers();
920
922
  const limitCheck = await this.checkCrawlLimits(url, parentUrl);
@@ -984,7 +986,7 @@ export class Crawler {
984
986
  });
985
987
  if (res.contentType && res.contentType.includes("/json")) {
986
988
  if (this.emailDiscoveredEvents.length > 0 || this.emailLeadsEvents.length > 0) {
987
- this.leadsFinder.extractEmails(JSON.stringify(res.data), res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue);
989
+ this.leadsFinder.extractEmails(JSON.stringify(res.data), res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue, emailMetadata);
988
990
  }
989
991
  for (let i = 0;i < this.jsonEvents.length; i++) {
990
992
  const event = this.jsonEvents[i];
@@ -999,7 +1001,7 @@ export class Crawler {
999
1001
  if (!res.contentType || !res.contentType.includes("/html") || typeof res.data !== "string")
1000
1002
  return;
1001
1003
  if ((this.emailDiscoveredEvents.length > 0 || this.emailLeadsEvents.length > 0) && isEmail) {
1002
- this.leadsFinder.extractEmails(res.data, res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue);
1004
+ this.leadsFinder.extractEmails(res.data, res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue, emailMetadata);
1003
1005
  }
1004
1006
  const { document } = parseHTML(res.data.addBaseUrl(res.finalUrl));
1005
1007
  document.URL = res.finalUrl;
@@ -1,40 +1,40 @@
1
- const _mod_o1i7dy = require('./crawler.cjs');
2
- exports.Crawler = _mod_o1i7dy.Crawler;;
3
- const _mod_9ywpl4 = require('./crawler-options.cjs');
4
- exports.CrawlerOptions = _mod_9ywpl4.CrawlerOptions;;
5
- const _mod_z2gv2k = require('./plugin/robots-txt.cjs');
6
- exports.RobotsTxt = _mod_z2gv2k.RobotsTxt;;
7
- const _mod_hipgel = require('./plugin/file-cacher.cjs');
8
- exports.FileCacher = _mod_hipgel.FileCacher;;
9
- const _mod_xk7y1s = require('./plugin/url-store.cjs');
10
- exports.UrlStore = _mod_xk7y1s.UrlStore;;
11
- const _mod_x224qe = require('./plugin/navigation-history.cjs');
12
- exports.NavigationHistory = _mod_x224qe.NavigationHistory;;
13
- const _mod_3sph6m = require('./addon/oxylabs/index.cjs');
14
- exports.Oxylabs = _mod_3sph6m.Oxylabs;;
15
- const _mod_3e4t9e = require('./addon/oxylabs/options.cjs');
16
- exports.OXYLABS_BROWSER_TYPES = _mod_3e4t9e.OXYLABS_BROWSER_TYPES;
17
- exports.OXYLABS_COMMON_LOCALES = _mod_3e4t9e.OXYLABS_COMMON_LOCALES;
18
- exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_3e4t9e.OXYLABS_COMMON_GEO_LOCATIONS;
19
- exports.OXYLABS_US_STATES = _mod_3e4t9e.OXYLABS_US_STATES;
20
- exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_3e4t9e.OXYLABS_EUROPEAN_COUNTRIES;
21
- exports.OXYLABS_ASIAN_COUNTRIES = _mod_3e4t9e.OXYLABS_ASIAN_COUNTRIES;
22
- exports.getRandomOxylabsBrowserType = _mod_3e4t9e.getRandomBrowserType;
23
- exports.getRandomOxylabsLocale = _mod_3e4t9e.getRandomLocale;
24
- exports.getRandomOxylabsGeoLocation = _mod_3e4t9e.getRandomGeoLocation;;
25
- const _mod_jbcztc = require('./addon/decodo/index.cjs');
26
- exports.Decodo = _mod_jbcztc.Decodo;;
27
- const _mod_zqnknu = require('./addon/decodo/options.cjs');
28
- exports.DECODO_DEVICE_TYPES = _mod_zqnknu.DECODO_DEVICE_TYPES;
29
- exports.DECODO_HEADLESS_MODES = _mod_zqnknu.DECODO_HEADLESS_MODES;
30
- exports.DECODO_COMMON_LOCALES = _mod_zqnknu.DECODO_COMMON_LOCALES;
31
- exports.DECODO_COMMON_COUNTRIES = _mod_zqnknu.DECODO_COMMON_COUNTRIES;
32
- exports.DECODO_EUROPEAN_COUNTRIES = _mod_zqnknu.DECODO_EUROPEAN_COUNTRIES;
33
- exports.DECODO_ASIAN_COUNTRIES = _mod_zqnknu.DECODO_ASIAN_COUNTRIES;
34
- exports.DECODO_US_STATES = _mod_zqnknu.DECODO_US_STATES;
35
- exports.DECODO_COMMON_CITIES = _mod_zqnknu.DECODO_COMMON_CITIES;
36
- exports.getRandomDecodoDeviceType = _mod_zqnknu.getRandomDeviceType;
37
- exports.getRandomDecodoLocale = _mod_zqnknu.getRandomLocale;
38
- exports.getRandomDecodoCountry = _mod_zqnknu.getRandomCountry;
39
- exports.getRandomDecodoCity = _mod_zqnknu.getRandomCity;
40
- exports.generateDecodoSessionId = _mod_zqnknu.generateSessionId;;
1
+ const _mod_slq7h8 = require('./crawler.cjs');
2
+ exports.Crawler = _mod_slq7h8.Crawler;;
3
+ const _mod_ofhuhf = require('./crawler-options.cjs');
4
+ exports.CrawlerOptions = _mod_ofhuhf.CrawlerOptions;;
5
+ const _mod_ec4x0l = require('./plugin/robots-txt.cjs');
6
+ exports.RobotsTxt = _mod_ec4x0l.RobotsTxt;;
7
+ const _mod_84pqvp = require('./plugin/file-cacher.cjs');
8
+ exports.FileCacher = _mod_84pqvp.FileCacher;;
9
+ const _mod_6809kp = require('./plugin/url-store.cjs');
10
+ exports.UrlStore = _mod_6809kp.UrlStore;;
11
+ const _mod_2ns1bx = require('./plugin/navigation-history.cjs');
12
+ exports.NavigationHistory = _mod_2ns1bx.NavigationHistory;;
13
+ const _mod_nhjlyc = require('./addon/oxylabs/index.cjs');
14
+ exports.Oxylabs = _mod_nhjlyc.Oxylabs;;
15
+ const _mod_cn9oad = require('./addon/oxylabs/options.cjs');
16
+ exports.OXYLABS_BROWSER_TYPES = _mod_cn9oad.OXYLABS_BROWSER_TYPES;
17
+ exports.OXYLABS_COMMON_LOCALES = _mod_cn9oad.OXYLABS_COMMON_LOCALES;
18
+ exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_cn9oad.OXYLABS_COMMON_GEO_LOCATIONS;
19
+ exports.OXYLABS_US_STATES = _mod_cn9oad.OXYLABS_US_STATES;
20
+ exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_cn9oad.OXYLABS_EUROPEAN_COUNTRIES;
21
+ exports.OXYLABS_ASIAN_COUNTRIES = _mod_cn9oad.OXYLABS_ASIAN_COUNTRIES;
22
+ exports.getRandomOxylabsBrowserType = _mod_cn9oad.getRandomBrowserType;
23
+ exports.getRandomOxylabsLocale = _mod_cn9oad.getRandomLocale;
24
+ exports.getRandomOxylabsGeoLocation = _mod_cn9oad.getRandomGeoLocation;;
25
+ const _mod_l5ipzy = require('./addon/decodo/index.cjs');
26
+ exports.Decodo = _mod_l5ipzy.Decodo;;
27
+ const _mod_je5dfu = require('./addon/decodo/options.cjs');
28
+ exports.DECODO_DEVICE_TYPES = _mod_je5dfu.DECODO_DEVICE_TYPES;
29
+ exports.DECODO_HEADLESS_MODES = _mod_je5dfu.DECODO_HEADLESS_MODES;
30
+ exports.DECODO_COMMON_LOCALES = _mod_je5dfu.DECODO_COMMON_LOCALES;
31
+ exports.DECODO_COMMON_COUNTRIES = _mod_je5dfu.DECODO_COMMON_COUNTRIES;
32
+ exports.DECODO_EUROPEAN_COUNTRIES = _mod_je5dfu.DECODO_EUROPEAN_COUNTRIES;
33
+ exports.DECODO_ASIAN_COUNTRIES = _mod_je5dfu.DECODO_ASIAN_COUNTRIES;
34
+ exports.DECODO_US_STATES = _mod_je5dfu.DECODO_US_STATES;
35
+ exports.DECODO_COMMON_CITIES = _mod_je5dfu.DECODO_COMMON_CITIES;
36
+ exports.getRandomDecodoDeviceType = _mod_je5dfu.getRandomDeviceType;
37
+ exports.getRandomDecodoLocale = _mod_je5dfu.getRandomLocale;
38
+ exports.getRandomDecodoCountry = _mod_je5dfu.getRandomCountry;
39
+ exports.getRandomDecodoCity = _mod_je5dfu.getRandomCity;
40
+ exports.generateDecodoSessionId = _mod_je5dfu.generateSessionId;;
@@ -1 +1 @@
1
- var r=require("./file-cacher.cjs");exports.FileCacher=r.FileCacher;var e=require("./url-store.cjs");exports.UrlStore=e.UrlStore;
1
+ var e=require("./file-cacher.cjs");exports.FileCacher=e.FileCacher;var r=require("./url-store.cjs");exports.UrlStore=r.UrlStore;
@@ -1 +1 @@
1
- var{parseHTML:w}=require("linkedom");class x extends Set{maxSize;constructor(e){super();this.maxSize=e}add(e){if(this.has(e))return this;if(this.size>=this.maxSize){let n=this.values().next().value;if(n)this.delete(n)}return super.add(e)}}class v{http;httpOptions;onEmailLeads;onEmailDiscovered;debug;discoveredEmails=new x(1e4);userAgents=[];fileExtensions=[];restrictedDomains=$();forbiddenProtocols=["mailto:","tel:","javascript:","data:","sms:","ftp:","file:","irc:","blob:","chrome:","about:","intent:"];constructor(e,n,a,o,t=!1){this.http=e,this.httpOptions=n,this.onEmailLeads=a,this.onEmailDiscovered=o,this.debug=t,this.userAgents=y()}sleep(e){return new Promise((n)=>setTimeout(n,e))}async executeHttp(e,n,a,o,t,i=0){let{getCache:r,saveCache:c,hasUrlInCache:u,saveUrl:l,httpConfig:s={}}=o;if(!e||e.length<3||this.forbiddenProtocols.some((d)=>e.startsWith(d)))return;try{let d=t?!1:await u(e),h=await r(e);if(d&&!h)return!1;if(d&&n!=="GET")return!1;let m=h&&n==="GET"?h:await(n==="GET"?this.http.get(e,s):n==="PATCH"?this.http.patch(e,a,s):n==="POST"?this.http.post(e,a,s):this.http.put(e,a,s));if(!h)await c(e,{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl});if(!d)await l(e);if(!m.contentType||!m.contentType.includes("/html")||!m.contentType.includes("text/")||typeof m.data!=="string")return null;return{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl}}catch(d){let h=d,m=this.httpOptions;if(h&&h.response){let g=h.response.status,p=m.retryDelay||100,b=m.maxRetryAttempts||3,f=m.retryWithoutProxyOnStatusCode||void 0,k=m.maxRetryOnProxyError||3;if(f&&s.proxy&&f.includes(g)&&i<b)return await this.sleep(p),delete s.proxy,await this.executeHttp(e,n,a,o,t,i+1);else if(m.retryOnStatusCode&&s.proxy&&m.retryOnStatusCode.includes(g)&&i<b)return await this.sleep(p),await this.executeHttp(e,n,a,o,t,i+1);else if(m.retryOnProxyError&&s.proxy&&i<k)return await this.sleep(p),await this.executeHttp(e,n,a,o,t,i+1)}if(this.debug){if(this.debug)console.log(`Error: unable to ${n} ${e}: ${d.message}`)}return null}}extractEmails(e,n,a,o,t){let i=this.extractEmailsFromContent(e?.replaceAll("mailto:"," ")),r=[];for(let c of i)if(this.handleEmailDiscovery(c,n,a,t))r.push(c);if(o&&o.length>0&&r.length>0)t.add(async()=>Promise.all(o.map((c)=>c(r))));i.length=0,r.length=0}async parseExternalWebsite(e,n,a,o,t,i=!0,r,c){let u=o.httpConfig?.headers?o.httpConfig.headers instanceof Headers?Object.fromEntries(o.httpConfig.headers.entries()):o.httpConfig.headers:{};o.httpConfig=o.httpConfig||{},o.httpConfig.headers={"user-agent":this.getRandomUserAgent(),accept:"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8","accept-language":"en-US,en;q=0.9","cache-control":"no-cache",pragma:"no-cache",...u},o.httpConfig.timeout=o.httpConfig.timeout||15000,o.depth=o.depth||0,o.allowCrossDomainTravel=o.allowCrossDomainTravel||!1,t=t&&i;let l=[];try{let s=new URL(e),d=this.extractRootDomain(e);if(this.isLinktreeUrl(e))return await this.parseLinktreeProfile(e,o,t);if(this.isRestrictedDomain(e)){if(this.debug)console.warn(`⚠️ Skipped URL (restricted url): ${e}`);return l}let h=await this.executeHttp(e,n,a,o,t);if(!h){if(this.debug&&h===null)console.warn(`⚠️ Failed to fetch page content: ${e}`);if(this.debug&&h===!1)console.warn(`⚠️ Skipped URL (already visited): ${e}`);return l}let m=this.extractEmailsFromContent(h.data?.replaceAll("mailto:"," "));for(let g of m)if(this.handleEmailDiscovery(g,e,o.onEmailDiscovered,o.queue))l.push(g);if(o.depth>0||!r){let g=w(h.data).document,p=this.extractRelevantLinks(g,s,d,o.depth,o.allowCrossDomainTravel);o.depth--;let b=await Promise.allSettled(p.map((f)=>this.parseExternalWebsite(f,"GET",null,{...o,depth:o.depth},t,!1,!0)));for(let f of b)if(f.status==="fulfilled")l.push(...f.value);else if(this.debug)console.warn("⚠️ Failed to parse child URL:",f.reason?.message)}}catch(s){if(this.debug)console.error(`❌ Error parsing external website: ${e}`,s?.message)}if(i){if(o.onEmails&&o.onEmails.length>0)o.queue.add(async()=>Promise.all(o.onEmails.map((s)=>s(l))))}return l}async parseLinktreeProfile(e,n,a){let o=[];try{let t=await this.executeHttp(e,"GET",null,n,a);if(!t){if(this.debug)console.warn(`⚠️ Failed to fetch Linktree profile: ${e}`);return o}let r=w(t).document.getElementById("links-container");if(!r){if(this.debug)console.warn(`\uD83D\uDD0D No links container found in Linktree profile: ${e}`);return o}let c=this.extractLinktreeExternalUrls(r,e);if(c.length===0){if(this.debug)console.info("\uD83D\uDCED No valid external links found in Linktree profile");return o}if(this.debug)console.info(`\uD83C\uDFAF Found ${c.length} external links in Linktree profile`);let u=await Promise.allSettled(c.map((l)=>this.parseExternalWebsite(l,"GET",null,n,a,!1)));for(let l of u)if(l.status==="fulfilled")o.push(...l.value);else if(this.debug)console.warn("⚠️ Failed to parse Linktree external URL:",l.reason?.message)}catch(t){if(this.debug)console.error(`❌ Error parsing Linktree profile: ${e}`,t?.message)}return o}extractLinktreeExternalUrls(e,n){let a=new Set,o=e.querySelectorAll("a[href][target='_blank']");for(let t of o){let i=t.getAttribute("href");if(!i||i.length<3||this.forbiddenProtocols.some((r)=>i.startsWith(r)))continue;try{let r=new URL(i,n).href,c=this.extractRootDomain(r);if(c!=="linktr.ee"&&!this.isRestrictedDomain(r)&&c.length>3)a.add(r)}catch(r){if(this.debug)console.warn(`\uD83D\uDD17 Invalid URL in Linktree: ${i}`)}}return Array.from(a)}handleEmailDiscovery(e,n,a,o){if(!this.discoveredEmails.has(e)){this.discoveredEmails.add(e);let t={email:e,discoveredAt:n,timestamp:new Date};if(a&&a.length>0)o.add(async()=>Promise.all(a.map((i)=>i(t))));if(this.debug)console.info(`\uD83D\uDCE7 New email discovered: ${e} at ${n}`);return!0}return!1}isDomainAccessAllowed(e,n,a,o){if(o)return!0;if(a===0)return e===n;return e===n||e.endsWith(`.${n}`)||n.endsWith(`.${e}`)}extractRelevantLinks(e,n,a,o,t){let i=[],r=["about","contact","help","support","reach","email","mail","message","company","team","staff","info","inquiry","feedback","service","assistance","connect","touch"],c=e.querySelectorAll("a[href]");for(let u of c){let l=u.getAttribute("href");if(!l||l.length<2)continue;try{let s=this.normalizeUrl(l,n),d=this.extractRootDomain(s);if(!this.isDomainAccessAllowed(d,a,o,t))continue;if(r.some((m)=>s.toLowerCase().includes(m))||this.isLinktreeUrl(s))i.push(s)}catch(s){if(this.debug)console.warn(`\uD83D\uDD17 Invalid link found: ${l}`,s?.message)}}return i}extractEmailsFromContent(e){let n=e.replace(/[^\w@.-\s]/g," "),a=/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,o=(c)=>{let u=/^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$/,l=c.split("@")[1]?.toLowerCase(),s=c.split(".").pop()?.toLowerCase();return u.test(c)&&l!==void 0&&s!==void 0&&!this.fileExtensions.includes(`.${s}`)&&!this.isRestrictedDomain(`https://${l}`)},t=(c)=>{return(c.match(a)||[]).filter(o)},i=e.replace(/<[^>]*>/g," "),r=[...t(i),...t(n)];return[...new Set(r)]}isRestrictedDomain(e){try{let n=new URL(e).host.toLowerCase();return this.restrictedDomains.some((a)=>n===a.toLowerCase()||n.endsWith(`.${a.toLowerCase()}`))}catch{return!0}}isLinktreeUrl(e){try{return this.extractRootDomain(e)==="linktr.ee"}catch{return!1}}extractRootDomain(e){try{let a=new URL(e).hostname.toLowerCase();return a.startsWith("www.")?a.slice(4):a}catch{return""}}normalizeUrl(e,n){if(e.startsWith("http://")||e.startsWith("https://"))return e;if(e.startsWith("//"))return`${n.protocol}${e}`;return new URL(e,n.href).href}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function y(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],n=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],a=[];for(let o=0;o<200;o++){let t=e[Math.floor(Math.random()*e.length)],i=n[Math.floor(Math.random()*n.length)],r="";switch(t.name){case"Chrome":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":r=`Mozilla/5.0 (${i}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}a.push(r)}return a}function $(){return["facebook.com","fb.com","messenger.com","instagram.com","threads.net","twitter.com","x.com","linkedin.com","pinterest.com","pin.it","reddit.com","tumblr.com","snapchat.com","tiktok.com","douyin.com","youtube.com","youtu.be","whatsapp.com","telegram.org","t.me","medium.com","quora.com","flickr.com","vimeo.com","vk.com","weibo.com","sina.com.cn","line.me","discord.com","discordapp.com","twitch.tv","meetup.com","nextdoor.com","xing.com","yelp.com","zalo.me","mastodon.social","clubhouse.com","patreon.com","onlyfans.com","douban.com","goodreads.com","soundcloud.com","spotify.com","last.fm","behance.net","dribbble.com","deviantart.com","pixiv.net","slideshare.net","tinder.com","bumble.com","etsy.com","indeed.com","glassdoor.com","monster.com","careerbuilder.com","dice.com","ziprecruiter.com","simplyhired.com","upwork.com","freelancer.com","fiverr.com","stackoverflow.com","stackoverflow.co","angel.co","wellfound.com","quora.com","stackexchange.com","yahoo.com","answers.microsoft.com","askubuntu.com","superuser.com","serverfault.com","mathoverflow.net","xda-developers.com","gamespot.com","ign.com","4chan.org","9gag.com","gizmodo.com","slashdot.org","hacker-news.news","ycombinator.com","producthunt.com","discourse.org","google.com","google.co.uk","google.de","google.fr","google.co.jp","bing.com","yahoo.com","search.yahoo.com","duckduckgo.com","baidu.com","yandex.com","yandex.ru","ask.com","wolframalpha.com","ecosia.org","startpage.com","qwant.com","searx.me","gibiru.com","swisscows.com","gmail.com","googlemail.com","outlook.com","hotmail.com","live.com","msn.com","yahoo.com","ymail.com","aol.com","icloud.com","me.com","mac.com","protonmail.com","pm.me","zoho.com","mail.com","gmx.com","gmx.net","yandex.com","yandex.ru","tutanota.com","tutanota.de","fastmail.com","hushmail.com","mailbox.org","posteo.de","runbox.com","disroot.org","163.com","qq.com","rambler.ru","mail.ru","yelp.com","yelp.ca","yelp.co.uk","yelp.com.au","yellowpages.com","yellowpages.ca","yell.com","tripadvisor.com","tripadvisor.co.uk","tripadvisor.ca","foursquare.com","angieslist.com","bbb.org","manta.com","thumbtack.com","homeadvisor.com","superpages.com","whitepages.com","local.com","citysearch.com","merchantcircle.com","insiderpages.com","kudzu.com","hotfrog.com","buildzoom.com","houzz.com","porch.com","mapquest.com","zagat.com","zomato.com","opentable.com","viator.com","expedia.com","booking.com","airbnb.com","vrbo.com","homeaway.com","craigslist.org","nextdoor.com","patch.com","meetup.com","eventbrite.com","groupon.com","livingsocial.com","gumtree.com","gumtree.com.au","kijiji.ca","leboncoin.fr","finn.no","blocket.se","58.com","dianping.com","tabelog.com","ypcdn.com"]}exports.Scraper=v;exports.CappedSet=x;
1
+ var{parseHTML:w}=require("linkedom");class x extends Set{maxSize;constructor(e){super();this.maxSize=e}add(e){if(this.has(e))return this;if(this.size>=this.maxSize){let a=this.values().next().value;if(a)this.delete(a)}return super.add(e)}}class v{http;httpOptions;onEmailLeads;onEmailDiscovered;debug;discoveredEmails=new x(1e4);userAgents=[];fileExtensions=[];restrictedDomains=$();forbiddenProtocols=["mailto:","tel:","javascript:","data:","sms:","ftp:","file:","irc:","blob:","chrome:","about:","intent:"];constructor(e,a,n,o,t=!1){this.http=e,this.httpOptions=a,this.onEmailLeads=n,this.onEmailDiscovered=o,this.debug=t,this.userAgents=y()}sleep(e){return new Promise((a)=>setTimeout(a,e))}async executeHttp(e,a,n,o,t,i=0){let{getCache:r,saveCache:l,hasUrlInCache:d,saveUrl:c,httpConfig:s={}}=o;if(!e||e.length<3||this.forbiddenProtocols.some((u)=>e.startsWith(u)))return;try{let u=t?!1:await d(e),h=await r(e);if(u&&!h)return!1;if(u&&a!=="GET")return!1;let m=h&&a==="GET"?h:await(a==="GET"?this.http.get(e,s):a==="PATCH"?this.http.patch(e,n,s):a==="POST"?this.http.post(e,n,s):this.http.put(e,n,s));if(!h)await l(e,{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl});if(!u)await c(e);if(!m.contentType||!m.contentType.includes("/html")||!m.contentType.includes("text/")||typeof m.data!=="string")return null;return{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl}}catch(u){let h=u,m=this.httpOptions;if(h&&h.response){let g=h.response.status,p=m.retryDelay||100,b=m.maxRetryAttempts||3,f=m.retryWithoutProxyOnStatusCode||void 0,k=m.maxRetryOnProxyError||3;if(f&&s.proxy&&f.includes(g)&&i<b)return await this.sleep(p),delete s.proxy,await this.executeHttp(e,a,n,o,t,i+1);else if(m.retryOnStatusCode&&s.proxy&&m.retryOnStatusCode.includes(g)&&i<b)return await this.sleep(p),await this.executeHttp(e,a,n,o,t,i+1);else if(m.retryOnProxyError&&s.proxy&&i<k)return await this.sleep(p),await this.executeHttp(e,a,n,o,t,i+1)}if(this.debug){if(this.debug)console.log(`Error: unable to ${a} ${e}: ${u.message}`)}return null}}extractEmails(e,a,n,o,t,i){let r=this.extractEmailsFromContent(e?.replaceAll("mailto:"," ")),l=[];for(let d of r)if(this.handleEmailDiscovery(d,a,n,t,i))l.push(d);if(o&&o.length>0&&l.length>0)t.add(async()=>Promise.all(o.map((d)=>d(l))));r.length=0,l.length=0}async parseExternalWebsite(e,a,n,o,t,i=!0,r,l){let d=o.httpConfig?.headers?o.httpConfig.headers instanceof Headers?Object.fromEntries(o.httpConfig.headers.entries()):o.httpConfig.headers:{};o.httpConfig=o.httpConfig||{},o.httpConfig.headers={"user-agent":this.getRandomUserAgent(),accept:"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8","accept-language":"en-US,en;q=0.9","cache-control":"no-cache",pragma:"no-cache",...d},o.httpConfig.timeout=o.httpConfig.timeout||15000,o.depth=o.depth||0,o.allowCrossDomainTravel=o.allowCrossDomainTravel||!1,t=t&&i;let c=[];try{let s=new URL(e),u=this.extractRootDomain(e);if(this.isLinktreeUrl(e))return await this.parseLinktreeProfile(e,o,t);if(this.isRestrictedDomain(e)){if(this.debug)console.warn(`⚠️ Skipped URL (restricted url): ${e}`);return c}let h=await this.executeHttp(e,a,n,o,t);if(!h){if(this.debug&&h===null)console.warn(`⚠️ Failed to fetch page content: ${e}`);if(this.debug&&h===!1)console.warn(`⚠️ Skipped URL (already visited): ${e}`);return c}let m=this.extractEmailsFromContent(h.data?.replaceAll("mailto:"," "));for(let g of m)if(this.handleEmailDiscovery(g,e,o.onEmailDiscovered,o.queue,o.emailMetadata))c.push(g);if(o.depth>0||!r){let g=w(h.data).document,p=this.extractRelevantLinks(g,s,u,o.depth,o.allowCrossDomainTravel);o.depth--;let b=await Promise.allSettled(p.map((f)=>this.parseExternalWebsite(f,"GET",null,{...o,depth:o.depth},t,!1,!0)));for(let f of b)if(f.status==="fulfilled")c.push(...f.value);else if(this.debug)console.warn("⚠️ Failed to parse child URL:",f.reason?.message)}}catch(s){if(this.debug)console.error(`❌ Error parsing external website: ${e}`,s?.message)}if(i){if(o.onEmails&&o.onEmails.length>0)o.queue.add(async()=>Promise.all(o.onEmails.map((s)=>s(c))))}return c}async parseLinktreeProfile(e,a,n){let o=[];try{let t=await this.executeHttp(e,"GET",null,a,n);if(!t){if(this.debug)console.warn(`⚠️ Failed to fetch Linktree profile: ${e}`);return o}let r=w(t).document.getElementById("links-container");if(!r){if(this.debug)console.warn(`\uD83D\uDD0D No links container found in Linktree profile: ${e}`);return o}let l=this.extractLinktreeExternalUrls(r,e);if(l.length===0){if(this.debug)console.info("\uD83D\uDCED No valid external links found in Linktree profile");return o}if(this.debug)console.info(`\uD83C\uDFAF Found ${l.length} external links in Linktree profile`);let d=await Promise.allSettled(l.map((c)=>this.parseExternalWebsite(c,"GET",null,a,n,!1)));for(let c of d)if(c.status==="fulfilled")o.push(...c.value);else if(this.debug)console.warn("⚠️ Failed to parse Linktree external URL:",c.reason?.message)}catch(t){if(this.debug)console.error(`❌ Error parsing Linktree profile: ${e}`,t?.message)}return o}extractLinktreeExternalUrls(e,a){let n=new Set,o=e.querySelectorAll("a[href][target='_blank']");for(let t of o){let i=t.getAttribute("href");if(!i||i.length<3||this.forbiddenProtocols.some((r)=>i.startsWith(r)))continue;try{let r=new URL(i,a).href,l=this.extractRootDomain(r);if(l!=="linktr.ee"&&!this.isRestrictedDomain(r)&&l.length>3)n.add(r)}catch(r){if(this.debug)console.warn(`\uD83D\uDD17 Invalid URL in Linktree: ${i}`)}}return Array.from(n)}handleEmailDiscovery(e,a,n,o,t){if(!this.discoveredEmails.has(e)){this.discoveredEmails.add(e);let i={email:e,discoveredAt:a,timestamp:new Date,metadata:t||{}};if(n&&n.length>0)o.add(async()=>Promise.all(n.map((r)=>r(i))));if(this.debug)console.info(`\uD83D\uDCE7 New email discovered: ${e} at ${a}`);return!0}return!1}isDomainAccessAllowed(e,a,n,o){if(o)return!0;if(n===0)return e===a;return e===a||e.endsWith(`.${a}`)||a.endsWith(`.${e}`)}extractRelevantLinks(e,a,n,o,t){let i=[],r=["about","contact","help","support","reach","email","mail","message","company","team","staff","info","inquiry","feedback","service","assistance","connect","touch"],l=e.querySelectorAll("a[href]");for(let d of l){let c=d.getAttribute("href");if(!c||c.length<2)continue;try{let s=this.normalizeUrl(c,a),u=this.extractRootDomain(s);if(!this.isDomainAccessAllowed(u,n,o,t))continue;if(r.some((m)=>s.toLowerCase().includes(m))||this.isLinktreeUrl(s))i.push(s)}catch(s){if(this.debug)console.warn(`\uD83D\uDD17 Invalid link found: ${c}`,s?.message)}}return i}extractEmailsFromContent(e){let a=e.replace(/[^\w@.-\s]/g," "),n=/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,o=(l)=>{let d=/^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$/,c=l.split("@")[1]?.toLowerCase(),s=l.split(".").pop()?.toLowerCase();return d.test(l)&&c!==void 0&&s!==void 0&&!this.fileExtensions.includes(`.${s}`)&&!this.isRestrictedDomain(`https://${c}`)},t=(l)=>{return(l.match(n)||[]).filter(o)},i=e.replace(/<[^>]*>/g," "),r=[...t(i),...t(a)];return[...new Set(r)]}isRestrictedDomain(e){try{let a=new URL(e).host.toLowerCase();return this.restrictedDomains.some((n)=>a===n.toLowerCase()||a.endsWith(`.${n.toLowerCase()}`))}catch{return!0}}isLinktreeUrl(e){try{return this.extractRootDomain(e)==="linktr.ee"}catch{return!1}}extractRootDomain(e){try{let n=new URL(e).hostname.toLowerCase();return n.startsWith("www.")?n.slice(4):n}catch{return""}}normalizeUrl(e,a){if(e.startsWith("http://")||e.startsWith("https://"))return e;if(e.startsWith("//"))return`${a.protocol}${e}`;return new URL(e,a.href).href}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function y(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],a=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],n=[];for(let o=0;o<200;o++){let t=e[Math.floor(Math.random()*e.length)],i=a[Math.floor(Math.random()*a.length)],r="";switch(t.name){case"Chrome":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":r=`Mozilla/5.0 (${i}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}n.push(r)}return n}function $(){return["facebook.com","fb.com","messenger.com","instagram.com","threads.net","twitter.com","x.com","linkedin.com","pinterest.com","pin.it","reddit.com","tumblr.com","snapchat.com","tiktok.com","douyin.com","youtube.com","youtu.be","whatsapp.com","telegram.org","t.me","medium.com","quora.com","flickr.com","vimeo.com","vk.com","weibo.com","sina.com.cn","line.me","discord.com","discordapp.com","twitch.tv","meetup.com","nextdoor.com","xing.com","yelp.com","zalo.me","mastodon.social","clubhouse.com","patreon.com","onlyfans.com","douban.com","goodreads.com","soundcloud.com","spotify.com","last.fm","behance.net","dribbble.com","deviantart.com","pixiv.net","slideshare.net","tinder.com","bumble.com","etsy.com","indeed.com","glassdoor.com","monster.com","careerbuilder.com","dice.com","ziprecruiter.com","simplyhired.com","upwork.com","freelancer.com","fiverr.com","stackoverflow.com","stackoverflow.co","angel.co","wellfound.com","quora.com","stackexchange.com","yahoo.com","answers.microsoft.com","askubuntu.com","superuser.com","serverfault.com","mathoverflow.net","xda-developers.com","gamespot.com","ign.com","4chan.org","9gag.com","gizmodo.com","slashdot.org","hacker-news.news","ycombinator.com","producthunt.com","discourse.org","google.com","google.co.uk","google.de","google.fr","google.co.jp","bing.com","yahoo.com","search.yahoo.com","duckduckgo.com","baidu.com","yandex.com","yandex.ru","ask.com","wolframalpha.com","ecosia.org","startpage.com","qwant.com","searx.me","gibiru.com","swisscows.com","gmail.com","googlemail.com","outlook.com","hotmail.com","live.com","msn.com","yahoo.com","ymail.com","aol.com","icloud.com","me.com","mac.com","protonmail.com","pm.me","zoho.com","mail.com","gmx.com","gmx.net","yandex.com","yandex.ru","tutanota.com","tutanota.de","fastmail.com","hushmail.com","mailbox.org","posteo.de","runbox.com","disroot.org","163.com","qq.com","rambler.ru","mail.ru","yelp.com","yelp.ca","yelp.co.uk","yelp.com.au","yellowpages.com","yellowpages.ca","yell.com","tripadvisor.com","tripadvisor.co.uk","tripadvisor.ca","foursquare.com","angieslist.com","bbb.org","manta.com","thumbtack.com","homeadvisor.com","superpages.com","whitepages.com","local.com","citysearch.com","merchantcircle.com","insiderpages.com","kudzu.com","hotfrog.com","buildzoom.com","houzz.com","porch.com","mapquest.com","zagat.com","zomato.com","opentable.com","viator.com","expedia.com","booking.com","airbnb.com","vrbo.com","homeaway.com","craigslist.org","nextdoor.com","patch.com","meetup.com","eventbrite.com","groupon.com","livingsocial.com","gumtree.com","gumtree.com.au","kijiji.ca","leboncoin.fr","finn.no","blocket.se","58.com","dianping.com","tabelog.com","ypcdn.com"]}exports.Scraper=v;exports.CappedSet=x;
@@ -1 +1 @@
1
- import{parseHTML as x}from"linkedom";class w extends Set{maxSize;constructor(e){super();this.maxSize=e}add(e){if(this.has(e))return this;if(this.size>=this.maxSize){let n=this.values().next().value;if(n)this.delete(n)}return super.add(e)}}class k{http;httpOptions;onEmailLeads;onEmailDiscovered;debug;discoveredEmails=new w(1e4);userAgents=[];fileExtensions=[];restrictedDomains=$();forbiddenProtocols=["mailto:","tel:","javascript:","data:","sms:","ftp:","file:","irc:","blob:","chrome:","about:","intent:"];constructor(e,n,a,o,t=!1){this.http=e,this.httpOptions=n,this.onEmailLeads=a,this.onEmailDiscovered=o,this.debug=t,this.userAgents=y()}sleep(e){return new Promise((n)=>setTimeout(n,e))}async executeHttp(e,n,a,o,t,i=0){let{getCache:r,saveCache:c,hasUrlInCache:u,saveUrl:l,httpConfig:s={}}=o;if(!e||e.length<3||this.forbiddenProtocols.some((d)=>e.startsWith(d)))return;try{let d=t?!1:await u(e),h=await r(e);if(d&&!h)return!1;if(d&&n!=="GET")return!1;let m=h&&n==="GET"?h:await(n==="GET"?this.http.get(e,s):n==="PATCH"?this.http.patch(e,a,s):n==="POST"?this.http.post(e,a,s):this.http.put(e,a,s));if(!h)await c(e,{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl});if(!d)await l(e);if(!m.contentType||!m.contentType.includes("/html")||!m.contentType.includes("text/")||typeof m.data!=="string")return null;return{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl}}catch(d){let h=d,m=this.httpOptions;if(h&&h.response){let g=h.response.status,p=m.retryDelay||100,b=m.maxRetryAttempts||3,f=m.retryWithoutProxyOnStatusCode||void 0,v=m.maxRetryOnProxyError||3;if(f&&s.proxy&&f.includes(g)&&i<b)return await this.sleep(p),delete s.proxy,await this.executeHttp(e,n,a,o,t,i+1);else if(m.retryOnStatusCode&&s.proxy&&m.retryOnStatusCode.includes(g)&&i<b)return await this.sleep(p),await this.executeHttp(e,n,a,o,t,i+1);else if(m.retryOnProxyError&&s.proxy&&i<v)return await this.sleep(p),await this.executeHttp(e,n,a,o,t,i+1)}if(this.debug){if(this.debug)console.log(`Error: unable to ${n} ${e}: ${d.message}`)}return null}}extractEmails(e,n,a,o,t){let i=this.extractEmailsFromContent(e?.replaceAll("mailto:"," ")),r=[];for(let c of i)if(this.handleEmailDiscovery(c,n,a,t))r.push(c);if(o&&o.length>0&&r.length>0)t.add(async()=>Promise.all(o.map((c)=>c(r))));i.length=0,r.length=0}async parseExternalWebsite(e,n,a,o,t,i=!0,r,c){let u=o.httpConfig?.headers?o.httpConfig.headers instanceof Headers?Object.fromEntries(o.httpConfig.headers.entries()):o.httpConfig.headers:{};o.httpConfig=o.httpConfig||{},o.httpConfig.headers={"user-agent":this.getRandomUserAgent(),accept:"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8","accept-language":"en-US,en;q=0.9","cache-control":"no-cache",pragma:"no-cache",...u},o.httpConfig.timeout=o.httpConfig.timeout||15000,o.depth=o.depth||0,o.allowCrossDomainTravel=o.allowCrossDomainTravel||!1,t=t&&i;let l=[];try{let s=new URL(e),d=this.extractRootDomain(e);if(this.isLinktreeUrl(e))return await this.parseLinktreeProfile(e,o,t);if(this.isRestrictedDomain(e)){if(this.debug)console.warn(`⚠️ Skipped URL (restricted url): ${e}`);return l}let h=await this.executeHttp(e,n,a,o,t);if(!h){if(this.debug&&h===null)console.warn(`⚠️ Failed to fetch page content: ${e}`);if(this.debug&&h===!1)console.warn(`⚠️ Skipped URL (already visited): ${e}`);return l}let m=this.extractEmailsFromContent(h.data?.replaceAll("mailto:"," "));for(let g of m)if(this.handleEmailDiscovery(g,e,o.onEmailDiscovered,o.queue))l.push(g);if(o.depth>0||!r){let g=x(h.data).document,p=this.extractRelevantLinks(g,s,d,o.depth,o.allowCrossDomainTravel);o.depth--;let b=await Promise.allSettled(p.map((f)=>this.parseExternalWebsite(f,"GET",null,{...o,depth:o.depth},t,!1,!0)));for(let f of b)if(f.status==="fulfilled")l.push(...f.value);else if(this.debug)console.warn("⚠️ Failed to parse child URL:",f.reason?.message)}}catch(s){if(this.debug)console.error(`❌ Error parsing external website: ${e}`,s?.message)}if(i){if(o.onEmails&&o.onEmails.length>0)o.queue.add(async()=>Promise.all(o.onEmails.map((s)=>s(l))))}return l}async parseLinktreeProfile(e,n,a){let o=[];try{let t=await this.executeHttp(e,"GET",null,n,a);if(!t){if(this.debug)console.warn(`⚠️ Failed to fetch Linktree profile: ${e}`);return o}let r=x(t).document.getElementById("links-container");if(!r){if(this.debug)console.warn(`\uD83D\uDD0D No links container found in Linktree profile: ${e}`);return o}let c=this.extractLinktreeExternalUrls(r,e);if(c.length===0){if(this.debug)console.info("\uD83D\uDCED No valid external links found in Linktree profile");return o}if(this.debug)console.info(`\uD83C\uDFAF Found ${c.length} external links in Linktree profile`);let u=await Promise.allSettled(c.map((l)=>this.parseExternalWebsite(l,"GET",null,n,a,!1)));for(let l of u)if(l.status==="fulfilled")o.push(...l.value);else if(this.debug)console.warn("⚠️ Failed to parse Linktree external URL:",l.reason?.message)}catch(t){if(this.debug)console.error(`❌ Error parsing Linktree profile: ${e}`,t?.message)}return o}extractLinktreeExternalUrls(e,n){let a=new Set,o=e.querySelectorAll("a[href][target='_blank']");for(let t of o){let i=t.getAttribute("href");if(!i||i.length<3||this.forbiddenProtocols.some((r)=>i.startsWith(r)))continue;try{let r=new URL(i,n).href,c=this.extractRootDomain(r);if(c!=="linktr.ee"&&!this.isRestrictedDomain(r)&&c.length>3)a.add(r)}catch(r){if(this.debug)console.warn(`\uD83D\uDD17 Invalid URL in Linktree: ${i}`)}}return Array.from(a)}handleEmailDiscovery(e,n,a,o){if(!this.discoveredEmails.has(e)){this.discoveredEmails.add(e);let t={email:e,discoveredAt:n,timestamp:new Date};if(a&&a.length>0)o.add(async()=>Promise.all(a.map((i)=>i(t))));if(this.debug)console.info(`\uD83D\uDCE7 New email discovered: ${e} at ${n}`);return!0}return!1}isDomainAccessAllowed(e,n,a,o){if(o)return!0;if(a===0)return e===n;return e===n||e.endsWith(`.${n}`)||n.endsWith(`.${e}`)}extractRelevantLinks(e,n,a,o,t){let i=[],r=["about","contact","help","support","reach","email","mail","message","company","team","staff","info","inquiry","feedback","service","assistance","connect","touch"],c=e.querySelectorAll("a[href]");for(let u of c){let l=u.getAttribute("href");if(!l||l.length<2)continue;try{let s=this.normalizeUrl(l,n),d=this.extractRootDomain(s);if(!this.isDomainAccessAllowed(d,a,o,t))continue;if(r.some((m)=>s.toLowerCase().includes(m))||this.isLinktreeUrl(s))i.push(s)}catch(s){if(this.debug)console.warn(`\uD83D\uDD17 Invalid link found: ${l}`,s?.message)}}return i}extractEmailsFromContent(e){let n=e.replace(/[^\w@.-\s]/g," "),a=/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,o=(c)=>{let u=/^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$/,l=c.split("@")[1]?.toLowerCase(),s=c.split(".").pop()?.toLowerCase();return u.test(c)&&l!==void 0&&s!==void 0&&!this.fileExtensions.includes(`.${s}`)&&!this.isRestrictedDomain(`https://${l}`)},t=(c)=>{return(c.match(a)||[]).filter(o)},i=e.replace(/<[^>]*>/g," "),r=[...t(i),...t(n)];return[...new Set(r)]}isRestrictedDomain(e){try{let n=new URL(e).host.toLowerCase();return this.restrictedDomains.some((a)=>n===a.toLowerCase()||n.endsWith(`.${a.toLowerCase()}`))}catch{return!0}}isLinktreeUrl(e){try{return this.extractRootDomain(e)==="linktr.ee"}catch{return!1}}extractRootDomain(e){try{let a=new URL(e).hostname.toLowerCase();return a.startsWith("www.")?a.slice(4):a}catch{return""}}normalizeUrl(e,n){if(e.startsWith("http://")||e.startsWith("https://"))return e;if(e.startsWith("//"))return`${n.protocol}${e}`;return new URL(e,n.href).href}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function y(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],n=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],a=[];for(let o=0;o<200;o++){let t=e[Math.floor(Math.random()*e.length)],i=n[Math.floor(Math.random()*n.length)],r="";switch(t.name){case"Chrome":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":r=`Mozilla/5.0 (${i}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}a.push(r)}return a}function $(){return["facebook.com","fb.com","messenger.com","instagram.com","threads.net","twitter.com","x.com","linkedin.com","pinterest.com","pin.it","reddit.com","tumblr.com","snapchat.com","tiktok.com","douyin.com","youtube.com","youtu.be","whatsapp.com","telegram.org","t.me","medium.com","quora.com","flickr.com","vimeo.com","vk.com","weibo.com","sina.com.cn","line.me","discord.com","discordapp.com","twitch.tv","meetup.com","nextdoor.com","xing.com","yelp.com","zalo.me","mastodon.social","clubhouse.com","patreon.com","onlyfans.com","douban.com","goodreads.com","soundcloud.com","spotify.com","last.fm","behance.net","dribbble.com","deviantart.com","pixiv.net","slideshare.net","tinder.com","bumble.com","etsy.com","indeed.com","glassdoor.com","monster.com","careerbuilder.com","dice.com","ziprecruiter.com","simplyhired.com","upwork.com","freelancer.com","fiverr.com","stackoverflow.com","stackoverflow.co","angel.co","wellfound.com","quora.com","stackexchange.com","yahoo.com","answers.microsoft.com","askubuntu.com","superuser.com","serverfault.com","mathoverflow.net","xda-developers.com","gamespot.com","ign.com","4chan.org","9gag.com","gizmodo.com","slashdot.org","hacker-news.news","ycombinator.com","producthunt.com","discourse.org","google.com","google.co.uk","google.de","google.fr","google.co.jp","bing.com","yahoo.com","search.yahoo.com","duckduckgo.com","baidu.com","yandex.com","yandex.ru","ask.com","wolframalpha.com","ecosia.org","startpage.com","qwant.com","searx.me","gibiru.com","swisscows.com","gmail.com","googlemail.com","outlook.com","hotmail.com","live.com","msn.com","yahoo.com","ymail.com","aol.com","icloud.com","me.com","mac.com","protonmail.com","pm.me","zoho.com","mail.com","gmx.com","gmx.net","yandex.com","yandex.ru","tutanota.com","tutanota.de","fastmail.com","hushmail.com","mailbox.org","posteo.de","runbox.com","disroot.org","163.com","qq.com","rambler.ru","mail.ru","yelp.com","yelp.ca","yelp.co.uk","yelp.com.au","yellowpages.com","yellowpages.ca","yell.com","tripadvisor.com","tripadvisor.co.uk","tripadvisor.ca","foursquare.com","angieslist.com","bbb.org","manta.com","thumbtack.com","homeadvisor.com","superpages.com","whitepages.com","local.com","citysearch.com","merchantcircle.com","insiderpages.com","kudzu.com","hotfrog.com","buildzoom.com","houzz.com","porch.com","mapquest.com","zagat.com","zomato.com","opentable.com","viator.com","expedia.com","booking.com","airbnb.com","vrbo.com","homeaway.com","craigslist.org","nextdoor.com","patch.com","meetup.com","eventbrite.com","groupon.com","livingsocial.com","gumtree.com","gumtree.com.au","kijiji.ca","leboncoin.fr","finn.no","blocket.se","58.com","dianping.com","tabelog.com","ypcdn.com"]}export{k as Scraper,w as CappedSet};
1
+ import{parseHTML as x}from"linkedom";class w extends Set{maxSize;constructor(e){super();this.maxSize=e}add(e){if(this.has(e))return this;if(this.size>=this.maxSize){let a=this.values().next().value;if(a)this.delete(a)}return super.add(e)}}class k{http;httpOptions;onEmailLeads;onEmailDiscovered;debug;discoveredEmails=new w(1e4);userAgents=[];fileExtensions=[];restrictedDomains=$();forbiddenProtocols=["mailto:","tel:","javascript:","data:","sms:","ftp:","file:","irc:","blob:","chrome:","about:","intent:"];constructor(e,a,n,o,t=!1){this.http=e,this.httpOptions=a,this.onEmailLeads=n,this.onEmailDiscovered=o,this.debug=t,this.userAgents=y()}sleep(e){return new Promise((a)=>setTimeout(a,e))}async executeHttp(e,a,n,o,t,i=0){let{getCache:r,saveCache:l,hasUrlInCache:d,saveUrl:c,httpConfig:s={}}=o;if(!e||e.length<3||this.forbiddenProtocols.some((u)=>e.startsWith(u)))return;try{let u=t?!1:await d(e),h=await r(e);if(u&&!h)return!1;if(u&&a!=="GET")return!1;let m=h&&a==="GET"?h:await(a==="GET"?this.http.get(e,s):a==="PATCH"?this.http.patch(e,n,s):a==="POST"?this.http.post(e,n,s):this.http.put(e,n,s));if(!h)await l(e,{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl});if(!u)await c(e);if(!m.contentType||!m.contentType.includes("/html")||!m.contentType.includes("text/")||typeof m.data!=="string")return null;return{data:m.data,contentType:m.contentType,finalUrl:m.finalUrl}}catch(u){let h=u,m=this.httpOptions;if(h&&h.response){let g=h.response.status,p=m.retryDelay||100,b=m.maxRetryAttempts||3,f=m.retryWithoutProxyOnStatusCode||void 0,v=m.maxRetryOnProxyError||3;if(f&&s.proxy&&f.includes(g)&&i<b)return await this.sleep(p),delete s.proxy,await this.executeHttp(e,a,n,o,t,i+1);else if(m.retryOnStatusCode&&s.proxy&&m.retryOnStatusCode.includes(g)&&i<b)return await this.sleep(p),await this.executeHttp(e,a,n,o,t,i+1);else if(m.retryOnProxyError&&s.proxy&&i<v)return await this.sleep(p),await this.executeHttp(e,a,n,o,t,i+1)}if(this.debug){if(this.debug)console.log(`Error: unable to ${a} ${e}: ${u.message}`)}return null}}extractEmails(e,a,n,o,t,i){let r=this.extractEmailsFromContent(e?.replaceAll("mailto:"," ")),l=[];for(let d of r)if(this.handleEmailDiscovery(d,a,n,t,i))l.push(d);if(o&&o.length>0&&l.length>0)t.add(async()=>Promise.all(o.map((d)=>d(l))));r.length=0,l.length=0}async parseExternalWebsite(e,a,n,o,t,i=!0,r,l){let d=o.httpConfig?.headers?o.httpConfig.headers instanceof Headers?Object.fromEntries(o.httpConfig.headers.entries()):o.httpConfig.headers:{};o.httpConfig=o.httpConfig||{},o.httpConfig.headers={"user-agent":this.getRandomUserAgent(),accept:"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8","accept-language":"en-US,en;q=0.9","cache-control":"no-cache",pragma:"no-cache",...d},o.httpConfig.timeout=o.httpConfig.timeout||15000,o.depth=o.depth||0,o.allowCrossDomainTravel=o.allowCrossDomainTravel||!1,t=t&&i;let c=[];try{let s=new URL(e),u=this.extractRootDomain(e);if(this.isLinktreeUrl(e))return await this.parseLinktreeProfile(e,o,t);if(this.isRestrictedDomain(e)){if(this.debug)console.warn(`⚠️ Skipped URL (restricted url): ${e}`);return c}let h=await this.executeHttp(e,a,n,o,t);if(!h){if(this.debug&&h===null)console.warn(`⚠️ Failed to fetch page content: ${e}`);if(this.debug&&h===!1)console.warn(`⚠️ Skipped URL (already visited): ${e}`);return c}let m=this.extractEmailsFromContent(h.data?.replaceAll("mailto:"," "));for(let g of m)if(this.handleEmailDiscovery(g,e,o.onEmailDiscovered,o.queue,o.emailMetadata))c.push(g);if(o.depth>0||!r){let g=x(h.data).document,p=this.extractRelevantLinks(g,s,u,o.depth,o.allowCrossDomainTravel);o.depth--;let b=await Promise.allSettled(p.map((f)=>this.parseExternalWebsite(f,"GET",null,{...o,depth:o.depth},t,!1,!0)));for(let f of b)if(f.status==="fulfilled")c.push(...f.value);else if(this.debug)console.warn("⚠️ Failed to parse child URL:",f.reason?.message)}}catch(s){if(this.debug)console.error(`❌ Error parsing external website: ${e}`,s?.message)}if(i){if(o.onEmails&&o.onEmails.length>0)o.queue.add(async()=>Promise.all(o.onEmails.map((s)=>s(c))))}return c}async parseLinktreeProfile(e,a,n){let o=[];try{let t=await this.executeHttp(e,"GET",null,a,n);if(!t){if(this.debug)console.warn(`⚠️ Failed to fetch Linktree profile: ${e}`);return o}let r=x(t).document.getElementById("links-container");if(!r){if(this.debug)console.warn(`\uD83D\uDD0D No links container found in Linktree profile: ${e}`);return o}let l=this.extractLinktreeExternalUrls(r,e);if(l.length===0){if(this.debug)console.info("\uD83D\uDCED No valid external links found in Linktree profile");return o}if(this.debug)console.info(`\uD83C\uDFAF Found ${l.length} external links in Linktree profile`);let d=await Promise.allSettled(l.map((c)=>this.parseExternalWebsite(c,"GET",null,a,n,!1)));for(let c of d)if(c.status==="fulfilled")o.push(...c.value);else if(this.debug)console.warn("⚠️ Failed to parse Linktree external URL:",c.reason?.message)}catch(t){if(this.debug)console.error(`❌ Error parsing Linktree profile: ${e}`,t?.message)}return o}extractLinktreeExternalUrls(e,a){let n=new Set,o=e.querySelectorAll("a[href][target='_blank']");for(let t of o){let i=t.getAttribute("href");if(!i||i.length<3||this.forbiddenProtocols.some((r)=>i.startsWith(r)))continue;try{let r=new URL(i,a).href,l=this.extractRootDomain(r);if(l!=="linktr.ee"&&!this.isRestrictedDomain(r)&&l.length>3)n.add(r)}catch(r){if(this.debug)console.warn(`\uD83D\uDD17 Invalid URL in Linktree: ${i}`)}}return Array.from(n)}handleEmailDiscovery(e,a,n,o,t){if(!this.discoveredEmails.has(e)){this.discoveredEmails.add(e);let i={email:e,discoveredAt:a,timestamp:new Date,metadata:t||{}};if(n&&n.length>0)o.add(async()=>Promise.all(n.map((r)=>r(i))));if(this.debug)console.info(`\uD83D\uDCE7 New email discovered: ${e} at ${a}`);return!0}return!1}isDomainAccessAllowed(e,a,n,o){if(o)return!0;if(n===0)return e===a;return e===a||e.endsWith(`.${a}`)||a.endsWith(`.${e}`)}extractRelevantLinks(e,a,n,o,t){let i=[],r=["about","contact","help","support","reach","email","mail","message","company","team","staff","info","inquiry","feedback","service","assistance","connect","touch"],l=e.querySelectorAll("a[href]");for(let d of l){let c=d.getAttribute("href");if(!c||c.length<2)continue;try{let s=this.normalizeUrl(c,a),u=this.extractRootDomain(s);if(!this.isDomainAccessAllowed(u,n,o,t))continue;if(r.some((m)=>s.toLowerCase().includes(m))||this.isLinktreeUrl(s))i.push(s)}catch(s){if(this.debug)console.warn(`\uD83D\uDD17 Invalid link found: ${c}`,s?.message)}}return i}extractEmailsFromContent(e){let a=e.replace(/[^\w@.-\s]/g," "),n=/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,o=(l)=>{let d=/^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$/,c=l.split("@")[1]?.toLowerCase(),s=l.split(".").pop()?.toLowerCase();return d.test(l)&&c!==void 0&&s!==void 0&&!this.fileExtensions.includes(`.${s}`)&&!this.isRestrictedDomain(`https://${c}`)},t=(l)=>{return(l.match(n)||[]).filter(o)},i=e.replace(/<[^>]*>/g," "),r=[...t(i),...t(a)];return[...new Set(r)]}isRestrictedDomain(e){try{let a=new URL(e).host.toLowerCase();return this.restrictedDomains.some((n)=>a===n.toLowerCase()||a.endsWith(`.${n.toLowerCase()}`))}catch{return!0}}isLinktreeUrl(e){try{return this.extractRootDomain(e)==="linktr.ee"}catch{return!1}}extractRootDomain(e){try{let n=new URL(e).hostname.toLowerCase();return n.startsWith("www.")?n.slice(4):n}catch{return""}}normalizeUrl(e,a){if(e.startsWith("http://")||e.startsWith("https://"))return e;if(e.startsWith("//"))return`${a.protocol}${e}`;return new URL(e,a.href).href}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function y(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],a=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],n=[];for(let o=0;o<200;o++){let t=e[Math.floor(Math.random()*e.length)],i=a[Math.floor(Math.random()*a.length)],r="";switch(t.name){case"Chrome":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":r=`Mozilla/5.0 (${i}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":r=`Mozilla/5.0 (${i}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}n.push(r)}return n}function $(){return["facebook.com","fb.com","messenger.com","instagram.com","threads.net","twitter.com","x.com","linkedin.com","pinterest.com","pin.it","reddit.com","tumblr.com","snapchat.com","tiktok.com","douyin.com","youtube.com","youtu.be","whatsapp.com","telegram.org","t.me","medium.com","quora.com","flickr.com","vimeo.com","vk.com","weibo.com","sina.com.cn","line.me","discord.com","discordapp.com","twitch.tv","meetup.com","nextdoor.com","xing.com","yelp.com","zalo.me","mastodon.social","clubhouse.com","patreon.com","onlyfans.com","douban.com","goodreads.com","soundcloud.com","spotify.com","last.fm","behance.net","dribbble.com","deviantart.com","pixiv.net","slideshare.net","tinder.com","bumble.com","etsy.com","indeed.com","glassdoor.com","monster.com","careerbuilder.com","dice.com","ziprecruiter.com","simplyhired.com","upwork.com","freelancer.com","fiverr.com","stackoverflow.com","stackoverflow.co","angel.co","wellfound.com","quora.com","stackexchange.com","yahoo.com","answers.microsoft.com","askubuntu.com","superuser.com","serverfault.com","mathoverflow.net","xda-developers.com","gamespot.com","ign.com","4chan.org","9gag.com","gizmodo.com","slashdot.org","hacker-news.news","ycombinator.com","producthunt.com","discourse.org","google.com","google.co.uk","google.de","google.fr","google.co.jp","bing.com","yahoo.com","search.yahoo.com","duckduckgo.com","baidu.com","yandex.com","yandex.ru","ask.com","wolframalpha.com","ecosia.org","startpage.com","qwant.com","searx.me","gibiru.com","swisscows.com","gmail.com","googlemail.com","outlook.com","hotmail.com","live.com","msn.com","yahoo.com","ymail.com","aol.com","icloud.com","me.com","mac.com","protonmail.com","pm.me","zoho.com","mail.com","gmx.com","gmx.net","yandex.com","yandex.ru","tutanota.com","tutanota.de","fastmail.com","hushmail.com","mailbox.org","posteo.de","runbox.com","disroot.org","163.com","qq.com","rambler.ru","mail.ru","yelp.com","yelp.ca","yelp.co.uk","yelp.com.au","yellowpages.com","yellowpages.ca","yell.com","tripadvisor.com","tripadvisor.co.uk","tripadvisor.ca","foursquare.com","angieslist.com","bbb.org","manta.com","thumbtack.com","homeadvisor.com","superpages.com","whitepages.com","local.com","citysearch.com","merchantcircle.com","insiderpages.com","kudzu.com","hotfrog.com","buildzoom.com","houzz.com","porch.com","mapquest.com","zagat.com","zomato.com","opentable.com","viator.com","expedia.com","booking.com","airbnb.com","vrbo.com","homeaway.com","craigslist.org","nextdoor.com","patch.com","meetup.com","eventbrite.com","groupon.com","livingsocial.com","gumtree.com","gumtree.com.au","kijiji.ca","leboncoin.fr","finn.no","blocket.se","58.com","dianping.com","tabelog.com","ypcdn.com"]}export{k as Scraper,w as CappedSet};
package/dist/crawler.d.ts CHANGED
@@ -6980,10 +6980,11 @@ export declare class CrawlerOptions {
6980
6980
  */
6981
6981
  private getRandomUserAgent;
6982
6982
  }
6983
- export interface EmailDiscoveryEvent {
6983
+ export interface EmailDiscoveryEvent<T = Record<string, any>> {
6984
6984
  email: string;
6985
6985
  discoveredAt: string;
6986
6986
  timestamp: Date;
6987
+ metadata: T;
6987
6988
  }
6988
6989
  interface RedirectEvent$1 {
6989
6990
  originalUrl: string;
@@ -7259,7 +7260,7 @@ export declare class Crawler {
7259
7260
  * });
7260
7261
  * ```
7261
7262
  */
7262
- onEmailDiscovered(handler: (email: EmailDiscoveryEvent) => Promise<void>): Crawler;
7263
+ onEmailDiscovered<T = Record<string, any>>(handler: (email: EmailDiscoveryEvent<T>) => Promise<void>): Crawler;
7263
7264
  /**
7264
7265
  * Registers a handler for bulk email leads discovery.
7265
7266
  * Triggered when multiple email addresses are found and processed.
@@ -7688,6 +7689,7 @@ export declare class Crawler {
7688
7689
  useOxylabsRotation?: boolean;
7689
7690
  useDecodo?: boolean;
7690
7691
  skipCache?: boolean;
7692
+ emailMetadata?: Record<string, any>;
7691
7693
  }): Crawler;
7692
7694
  private execute;
7693
7695
  private execute2;
@@ -1,4 +1,4 @@
1
- const _mod_w4kkhv = require('../crawler/crawler.cjs');
2
- exports.Crawler = _mod_w4kkhv.Crawler;;
3
- const _mod_fiyx3u = require('../crawler/crawler-options.cjs');
4
- exports.CrawlerOptions = _mod_fiyx3u.CrawlerOptions;;
1
+ const _mod_lowijh = require('../crawler/crawler.cjs');
2
+ exports.Crawler = _mod_lowijh.Crawler;;
3
+ const _mod_b0j8l0 = require('../crawler/crawler-options.cjs');
4
+ exports.CrawlerOptions = _mod_b0j8l0.CrawlerOptions;;
package/dist/index.cjs CHANGED
@@ -1,30 +1,30 @@
1
- const _mod_7y7owb = require('./core/rezo.cjs');
2
- exports.Rezo = _mod_7y7owb.Rezo;
3
- exports.createRezoInstance = _mod_7y7owb.createRezoInstance;
4
- exports.createDefaultInstance = _mod_7y7owb.createDefaultInstance;;
5
- const _mod_m2gp9k = require('./errors/rezo-error.cjs');
6
- exports.RezoError = _mod_m2gp9k.RezoError;
7
- exports.RezoErrorCode = _mod_m2gp9k.RezoErrorCode;;
8
- const _mod_1kei6w = require('./utils/headers.cjs');
9
- exports.RezoHeaders = _mod_1kei6w.RezoHeaders;;
10
- const _mod_otsozo = require('./utils/form-data.cjs');
11
- exports.RezoFormData = _mod_otsozo.RezoFormData;;
12
- const _mod_i1qoqb = require('./utils/cookies.cjs');
13
- exports.RezoCookieJar = _mod_i1qoqb.RezoCookieJar;
14
- exports.Cookie = _mod_i1qoqb.Cookie;;
15
- const _mod_l234vd = require('./utils/curl.cjs');
16
- exports.toCurl = _mod_l234vd.toCurl;
17
- exports.fromCurl = _mod_l234vd.fromCurl;;
18
- const _mod_4xnhr8 = require('./core/hooks.cjs');
19
- exports.createDefaultHooks = _mod_4xnhr8.createDefaultHooks;
20
- exports.mergeHooks = _mod_4xnhr8.mergeHooks;;
21
- const _mod_mibvuq = require('./proxy/manager.cjs');
22
- exports.ProxyManager = _mod_mibvuq.ProxyManager;;
23
- const _mod_cye7iw = require('./queue/index.cjs');
24
- exports.RezoQueue = _mod_cye7iw.RezoQueue;
25
- exports.HttpQueue = _mod_cye7iw.HttpQueue;
26
- exports.Priority = _mod_cye7iw.Priority;
27
- exports.HttpMethodPriority = _mod_cye7iw.HttpMethodPriority;;
1
+ const _mod_s4on4w = require('./core/rezo.cjs');
2
+ exports.Rezo = _mod_s4on4w.Rezo;
3
+ exports.createRezoInstance = _mod_s4on4w.createRezoInstance;
4
+ exports.createDefaultInstance = _mod_s4on4w.createDefaultInstance;;
5
+ const _mod_j10648 = require('./errors/rezo-error.cjs');
6
+ exports.RezoError = _mod_j10648.RezoError;
7
+ exports.RezoErrorCode = _mod_j10648.RezoErrorCode;;
8
+ const _mod_cr1p87 = require('./utils/headers.cjs');
9
+ exports.RezoHeaders = _mod_cr1p87.RezoHeaders;;
10
+ const _mod_y6ple5 = require('./utils/form-data.cjs');
11
+ exports.RezoFormData = _mod_y6ple5.RezoFormData;;
12
+ const _mod_mt0dfs = require('./utils/cookies.cjs');
13
+ exports.RezoCookieJar = _mod_mt0dfs.RezoCookieJar;
14
+ exports.Cookie = _mod_mt0dfs.Cookie;;
15
+ const _mod_bivoyi = require('./utils/curl.cjs');
16
+ exports.toCurl = _mod_bivoyi.toCurl;
17
+ exports.fromCurl = _mod_bivoyi.fromCurl;;
18
+ const _mod_h18prc = require('./core/hooks.cjs');
19
+ exports.createDefaultHooks = _mod_h18prc.createDefaultHooks;
20
+ exports.mergeHooks = _mod_h18prc.mergeHooks;;
21
+ const _mod_hr8h18 = require('./proxy/manager.cjs');
22
+ exports.ProxyManager = _mod_hr8h18.ProxyManager;;
23
+ const _mod_x31mfg = require('./queue/index.cjs');
24
+ exports.RezoQueue = _mod_x31mfg.RezoQueue;
25
+ exports.HttpQueue = _mod_x31mfg.HttpQueue;
26
+ exports.Priority = _mod_x31mfg.Priority;
27
+ exports.HttpMethodPriority = _mod_x31mfg.HttpMethodPriority;;
28
28
  const { RezoError } = require('./errors/rezo-error.cjs');
29
29
  const isRezoError = exports.isRezoError = RezoError.isRezoError;
30
30
  const Cancel = exports.Cancel = RezoError;
@@ -1,10 +1,10 @@
1
- const _mod_5wmaq1 = require('./base.cjs');
2
- exports.Agent = _mod_5wmaq1.Agent;;
3
- const _mod_rekzdg = require('./http-proxy.cjs');
4
- exports.HttpProxyAgent = _mod_rekzdg.HttpProxyAgent;;
5
- const _mod_pstipg = require('./https-proxy.cjs');
6
- exports.HttpsProxyAgent = _mod_pstipg.HttpsProxyAgent;;
7
- const _mod_haz0pb = require('./socks-proxy.cjs');
8
- exports.SocksProxyAgent = _mod_haz0pb.SocksProxyAgent;;
9
- const _mod_mxslm0 = require('./socks-client.cjs');
10
- exports.SocksClient = _mod_mxslm0.SocksClient;;
1
+ const _mod_c6cw86 = require('./base.cjs');
2
+ exports.Agent = _mod_c6cw86.Agent;;
3
+ const _mod_j0if6b = require('./http-proxy.cjs');
4
+ exports.HttpProxyAgent = _mod_j0if6b.HttpProxyAgent;;
5
+ const _mod_ze9lyt = require('./https-proxy.cjs');
6
+ exports.HttpsProxyAgent = _mod_ze9lyt.HttpsProxyAgent;;
7
+ const _mod_fewvsu = require('./socks-proxy.cjs');
8
+ exports.SocksProxyAgent = _mod_fewvsu.SocksProxyAgent;;
9
+ const _mod_matq8q = require('./socks-client.cjs');
10
+ exports.SocksClient = _mod_matq8q.SocksClient;;
@@ -1,9 +1,9 @@
1
1
  const { Agent, HttpProxyAgent, HttpsProxyAgent, SocksProxyAgent } = require('../internal/agents/index.cjs');
2
2
  const { parseProxyString } = require('./parse.cjs');
3
- const _mod_0p8pms = require('./manager.cjs');
4
- exports.ProxyManager = _mod_0p8pms.ProxyManager;;
5
- const _mod_t2470u = require('./parse.cjs');
6
- exports.parseProxyString = _mod_t2470u.parseProxyString;;
3
+ const _mod_31ry9y = require('./manager.cjs');
4
+ exports.ProxyManager = _mod_31ry9y.ProxyManager;;
5
+ const _mod_k69lld = require('./parse.cjs');
6
+ exports.parseProxyString = _mod_k69lld.parseProxyString;;
7
7
  function createOptions(uri, opts) {
8
8
  if (uri instanceof URL || typeof uri === "string") {
9
9
  return {
@@ -1,8 +1,8 @@
1
- const _mod_sm75pf = require('./queue.cjs');
2
- exports.RezoQueue = _mod_sm75pf.RezoQueue;;
3
- const _mod_jkxs9z = require('./http-queue.cjs');
4
- exports.HttpQueue = _mod_jkxs9z.HttpQueue;
5
- exports.extractDomain = _mod_jkxs9z.extractDomain;;
6
- const _mod_3nhfhq = require('./types.cjs');
7
- exports.Priority = _mod_3nhfhq.Priority;
8
- exports.HttpMethodPriority = _mod_3nhfhq.HttpMethodPriority;;
1
+ const _mod_19g5tr = require('./queue.cjs');
2
+ exports.RezoQueue = _mod_19g5tr.RezoQueue;;
3
+ const _mod_0zl05k = require('./http-queue.cjs');
4
+ exports.HttpQueue = _mod_0zl05k.HttpQueue;
5
+ exports.extractDomain = _mod_0zl05k.extractDomain;;
6
+ const _mod_0r6z2k = require('./types.cjs');
7
+ exports.Priority = _mod_0r6z2k.Priority;
8
+ exports.HttpMethodPriority = _mod_0r6z2k.HttpMethodPriority;;
@@ -1,11 +1,11 @@
1
- const _mod_ilaezh = require('./event-emitter.cjs');
2
- exports.UniversalEventEmitter = _mod_ilaezh.UniversalEventEmitter;;
3
- const _mod_vh2ew1 = require('./stream.cjs');
4
- exports.UniversalStreamResponse = _mod_vh2ew1.UniversalStreamResponse;
5
- exports.StreamResponse = _mod_vh2ew1.StreamResponse;;
6
- const _mod_rsdt23 = require('./download.cjs');
7
- exports.UniversalDownloadResponse = _mod_rsdt23.UniversalDownloadResponse;
8
- exports.DownloadResponse = _mod_rsdt23.DownloadResponse;;
9
- const _mod_15456i = require('./upload.cjs');
10
- exports.UniversalUploadResponse = _mod_15456i.UniversalUploadResponse;
11
- exports.UploadResponse = _mod_15456i.UploadResponse;;
1
+ const _mod_jpnm5w = require('./event-emitter.cjs');
2
+ exports.UniversalEventEmitter = _mod_jpnm5w.UniversalEventEmitter;;
3
+ const _mod_t7q5p9 = require('./stream.cjs');
4
+ exports.UniversalStreamResponse = _mod_t7q5p9.UniversalStreamResponse;
5
+ exports.StreamResponse = _mod_t7q5p9.StreamResponse;;
6
+ const _mod_r1o7zn = require('./download.cjs');
7
+ exports.UniversalDownloadResponse = _mod_r1o7zn.UniversalDownloadResponse;
8
+ exports.DownloadResponse = _mod_r1o7zn.DownloadResponse;;
9
+ const _mod_0lnu3y = require('./upload.cjs');
10
+ exports.UniversalUploadResponse = _mod_0lnu3y.UniversalUploadResponse;
11
+ exports.UploadResponse = _mod_0lnu3y.UploadResponse;;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "rezo",
3
- "version": "1.0.64",
3
+ "version": "1.0.65",
4
4
  "description": "Lightning-fast, enterprise-grade HTTP client for modern JavaScript. Full HTTP/2 support, intelligent cookie management, multiple adapters (HTTP, Fetch, cURL, XHR), streaming, proxy support (HTTP/HTTPS/SOCKS), and cross-environment compatibility.",
5
5
  "main": "dist/index.cjs",
6
6
  "module": "dist/index.js",