@mgsoftwarebv/mg-dashboard-mcp 2.6.2 → 2.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -852,7 +852,19 @@ var NOISE_EMAIL_PATTERNS = [
852
852
  /smith@/i,
853
853
  /doe@/i,
854
854
  /demo@/i,
855
- /sample@/i
855
+ /sample@/i,
856
+ /naam@/i,
857
+ /voorbeeld/i,
858
+ /your-?email/i,
859
+ /email@/i,
860
+ /@domein\./i,
861
+ /@bedrijf\./i,
862
+ /@domain\./i,
863
+ /@sentry/i,
864
+ /@wixpress/i,
865
+ /@lieferkassen/i,
866
+ /john@/i,
867
+ /jane@/i
856
868
  ];
857
869
  var CONTACT_PATH_KEYWORDS = [
858
870
  "contact",
@@ -974,6 +986,21 @@ function guessCommonEmails(domain) {
974
986
  const d = domain.replace(/^www\./, "");
975
987
  return [`info@${d}`, `contact@${d}`, `hello@${d}`, `administratie@${d}`, `verkoop@${d}`];
976
988
  }
989
+ var BOT_CHALLENGE_INDICATORS = [
990
+ "sgcaptcha",
991
+ "challenge-platform",
992
+ "cf-browser-verification",
993
+ "Just a moment",
994
+ "Checking your browser",
995
+ "Enable JavaScript and cookies",
996
+ "Attention Required",
997
+ "DDoS protection by"
998
+ ];
999
+ function isBotChallengePage(html) {
1000
+ if (html.length > 2e3) return false;
1001
+ const lower = html.toLowerCase();
1002
+ return BOT_CHALLENGE_INDICATORS.some((ind) => lower.includes(ind.toLowerCase()));
1003
+ }
977
1004
  async function fetchRawHtml(url, timeoutMs = 1e4) {
978
1005
  const controller = new AbortController();
979
1006
  const timer = setTimeout(() => controller.abort(), timeoutMs);
@@ -987,10 +1014,33 @@ async function fetchRawHtml(url, timeoutMs = 1e4) {
987
1014
  redirect: "follow",
988
1015
  signal: controller.signal
989
1016
  });
990
- if (!res.ok) return null;
991
1017
  const ct = res.headers.get("content-type") || "";
992
1018
  if (!ct.includes("text/html") && !ct.includes("text/plain") && !ct.includes("xhtml")) return null;
993
- return await res.text();
1019
+ if (!res.ok && res.status !== 403) return null;
1020
+ const html = await res.text();
1021
+ if (isBotChallengePage(html)) return null;
1022
+ return html;
1023
+ } catch {
1024
+ return null;
1025
+ } finally {
1026
+ clearTimeout(timer);
1027
+ }
1028
+ }
1029
+ async function fetchWaybackHtml(url, timeoutMs = 15e3) {
1030
+ const cleanUrl = url.replace(/^https?:\/\//, "");
1031
+ const wbUrl = `https://web.archive.org/web/2024/${cleanUrl}`;
1032
+ const controller = new AbortController();
1033
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
1034
+ try {
1035
+ const res = await fetch(wbUrl, {
1036
+ headers: { "User-Agent": WEB_USER_AGENT },
1037
+ redirect: "follow",
1038
+ signal: controller.signal
1039
+ });
1040
+ if (!res.ok) return null;
1041
+ const html = await res.text();
1042
+ if (html.length < 500) return null;
1043
+ return html;
994
1044
  } catch {
995
1045
  return null;
996
1046
  } finally {
@@ -1608,18 +1658,23 @@ LinkedIn: ${pageLinkedIn.join(", ")}`;
1608
1658
  if (!fullUrl.includes("www.")) urlsToTry.push(fullUrl.replace("https://", "http://www."));
1609
1659
  }
1610
1660
  let html = null;
1661
+ let usedWayback = false;
1611
1662
  for (const tryUrl of urlsToTry) {
1612
1663
  html = await fetchRawHtml(tryUrl, 12e3);
1613
1664
  if (html) break;
1614
1665
  }
1666
+ if (!html) {
1667
+ html = await fetchWaybackHtml(`https://${domain}`, 15e3);
1668
+ if (html) usedWayback = true;
1669
+ }
1615
1670
  if (!html) throw new Error(`Could not fetch ${fullUrl} (site may be down or blocking)`);
1616
- const contactPages = discoverContactPages(html, fullUrl);
1671
+ const contactPages = usedWayback ? [] : discoverContactPages(html, fullUrl);
1617
1672
  const pagePromises = contactPages.map(async (pageUrl) => {
1618
1673
  const pageHtml = await fetchRawHtml(pageUrl, 8e3);
1619
1674
  return { url: pageUrl, html: pageHtml };
1620
1675
  });
1621
1676
  const pageResults = await Promise.allSettled(pagePromises);
1622
- const successPages = [fullUrl];
1677
+ const successPages = [usedWayback ? `(wayback) ${domain}` : fullUrl];
1623
1678
  const allHtmls = [html];
1624
1679
  for (const result of pageResults) {
1625
1680
  if (result.status === "fulfilled" && result.value.html) {
@@ -1627,6 +1682,20 @@ LinkedIn: ${pageLinkedIn.join(", ")}`;
1627
1682
  successPages.push(result.value.url);
1628
1683
  }
1629
1684
  }
1685
+ if (usedWayback) {
1686
+ const waybackContactPaths = ["/contact", "/over-ons", "/about", "/team"];
1687
+ const wbPromises = waybackContactPaths.map(async (path) => {
1688
+ const wbHtml = await fetchWaybackHtml(`https://${domain}${path}`, 12e3);
1689
+ return { path, html: wbHtml };
1690
+ });
1691
+ const wbResults = await Promise.allSettled(wbPromises);
1692
+ for (const wr of wbResults) {
1693
+ if (wr.status === "fulfilled" && wr.value.html) {
1694
+ allHtmls.push(wr.value.html);
1695
+ successPages.push(`(wayback) ${domain}${wr.value.path}`);
1696
+ }
1697
+ }
1698
+ }
1630
1699
  const allEmails = /* @__PURE__ */ new Set();
1631
1700
  const allPhones = /* @__PURE__ */ new Set();
1632
1701
  const allLinkedIn = /* @__PURE__ */ new Set();