@mgsoftwarebv/mg-dashboard-mcp 2.6.2 → 2.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +74 -5
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -852,7 +852,19 @@ var NOISE_EMAIL_PATTERNS = [
|
|
|
852
852
|
/smith@/i,
|
|
853
853
|
/doe@/i,
|
|
854
854
|
/demo@/i,
|
|
855
|
-
/sample@/i
|
|
855
|
+
/sample@/i,
|
|
856
|
+
/naam@/i,
|
|
857
|
+
/voorbeeld/i,
|
|
858
|
+
/your-?email/i,
|
|
859
|
+
/email@/i,
|
|
860
|
+
/@domein\./i,
|
|
861
|
+
/@bedrijf\./i,
|
|
862
|
+
/@domain\./i,
|
|
863
|
+
/@sentry/i,
|
|
864
|
+
/@wixpress/i,
|
|
865
|
+
/@lieferkassen/i,
|
|
866
|
+
/john@/i,
|
|
867
|
+
/jane@/i
|
|
856
868
|
];
|
|
857
869
|
var CONTACT_PATH_KEYWORDS = [
|
|
858
870
|
"contact",
|
|
@@ -974,6 +986,21 @@ function guessCommonEmails(domain) {
|
|
|
974
986
|
const d = domain.replace(/^www\./, "");
|
|
975
987
|
return [`info@${d}`, `contact@${d}`, `hello@${d}`, `administratie@${d}`, `verkoop@${d}`];
|
|
976
988
|
}
|
|
989
|
+
var BOT_CHALLENGE_INDICATORS = [
|
|
990
|
+
"sgcaptcha",
|
|
991
|
+
"challenge-platform",
|
|
992
|
+
"cf-browser-verification",
|
|
993
|
+
"Just a moment",
|
|
994
|
+
"Checking your browser",
|
|
995
|
+
"Enable JavaScript and cookies",
|
|
996
|
+
"Attention Required",
|
|
997
|
+
"DDoS protection by"
|
|
998
|
+
];
|
|
999
|
+
function isBotChallengePage(html) {
|
|
1000
|
+
if (html.length > 2e3) return false;
|
|
1001
|
+
const lower = html.toLowerCase();
|
|
1002
|
+
return BOT_CHALLENGE_INDICATORS.some((ind) => lower.includes(ind.toLowerCase()));
|
|
1003
|
+
}
|
|
977
1004
|
async function fetchRawHtml(url, timeoutMs = 1e4) {
|
|
978
1005
|
const controller = new AbortController();
|
|
979
1006
|
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
@@ -987,10 +1014,33 @@ async function fetchRawHtml(url, timeoutMs = 1e4) {
|
|
|
987
1014
|
redirect: "follow",
|
|
988
1015
|
signal: controller.signal
|
|
989
1016
|
});
|
|
990
|
-
if (!res.ok) return null;
|
|
991
1017
|
const ct = res.headers.get("content-type") || "";
|
|
992
1018
|
if (!ct.includes("text/html") && !ct.includes("text/plain") && !ct.includes("xhtml")) return null;
|
|
993
|
-
|
|
1019
|
+
if (!res.ok && res.status !== 403) return null;
|
|
1020
|
+
const html = await res.text();
|
|
1021
|
+
if (isBotChallengePage(html)) return null;
|
|
1022
|
+
return html;
|
|
1023
|
+
} catch {
|
|
1024
|
+
return null;
|
|
1025
|
+
} finally {
|
|
1026
|
+
clearTimeout(timer);
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
async function fetchWaybackHtml(url, timeoutMs = 15e3) {
|
|
1030
|
+
const cleanUrl = url.replace(/^https?:\/\//, "");
|
|
1031
|
+
const wbUrl = `https://web.archive.org/web/2024/${cleanUrl}`;
|
|
1032
|
+
const controller = new AbortController();
|
|
1033
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
1034
|
+
try {
|
|
1035
|
+
const res = await fetch(wbUrl, {
|
|
1036
|
+
headers: { "User-Agent": WEB_USER_AGENT },
|
|
1037
|
+
redirect: "follow",
|
|
1038
|
+
signal: controller.signal
|
|
1039
|
+
});
|
|
1040
|
+
if (!res.ok) return null;
|
|
1041
|
+
const html = await res.text();
|
|
1042
|
+
if (html.length < 500) return null;
|
|
1043
|
+
return html;
|
|
994
1044
|
} catch {
|
|
995
1045
|
return null;
|
|
996
1046
|
} finally {
|
|
@@ -1608,18 +1658,23 @@ LinkedIn: ${pageLinkedIn.join(", ")}`;
|
|
|
1608
1658
|
if (!fullUrl.includes("www.")) urlsToTry.push(fullUrl.replace("https://", "http://www."));
|
|
1609
1659
|
}
|
|
1610
1660
|
let html = null;
|
|
1661
|
+
let usedWayback = false;
|
|
1611
1662
|
for (const tryUrl of urlsToTry) {
|
|
1612
1663
|
html = await fetchRawHtml(tryUrl, 12e3);
|
|
1613
1664
|
if (html) break;
|
|
1614
1665
|
}
|
|
1666
|
+
if (!html) {
|
|
1667
|
+
html = await fetchWaybackHtml(`https://${domain}`, 15e3);
|
|
1668
|
+
if (html) usedWayback = true;
|
|
1669
|
+
}
|
|
1615
1670
|
if (!html) throw new Error(`Could not fetch ${fullUrl} (site may be down or blocking)`);
|
|
1616
|
-
const contactPages = discoverContactPages(html, fullUrl);
|
|
1671
|
+
const contactPages = usedWayback ? [] : discoverContactPages(html, fullUrl);
|
|
1617
1672
|
const pagePromises = contactPages.map(async (pageUrl) => {
|
|
1618
1673
|
const pageHtml = await fetchRawHtml(pageUrl, 8e3);
|
|
1619
1674
|
return { url: pageUrl, html: pageHtml };
|
|
1620
1675
|
});
|
|
1621
1676
|
const pageResults = await Promise.allSettled(pagePromises);
|
|
1622
|
-
const successPages = [fullUrl];
|
|
1677
|
+
const successPages = [usedWayback ? `(wayback) ${domain}` : fullUrl];
|
|
1623
1678
|
const allHtmls = [html];
|
|
1624
1679
|
for (const result of pageResults) {
|
|
1625
1680
|
if (result.status === "fulfilled" && result.value.html) {
|
|
@@ -1627,6 +1682,20 @@ LinkedIn: ${pageLinkedIn.join(", ")}`;
|
|
|
1627
1682
|
successPages.push(result.value.url);
|
|
1628
1683
|
}
|
|
1629
1684
|
}
|
|
1685
|
+
if (usedWayback) {
|
|
1686
|
+
const waybackContactPaths = ["/contact", "/over-ons", "/about", "/team"];
|
|
1687
|
+
const wbPromises = waybackContactPaths.map(async (path) => {
|
|
1688
|
+
const wbHtml = await fetchWaybackHtml(`https://${domain}${path}`, 12e3);
|
|
1689
|
+
return { path, html: wbHtml };
|
|
1690
|
+
});
|
|
1691
|
+
const wbResults = await Promise.allSettled(wbPromises);
|
|
1692
|
+
for (const wr of wbResults) {
|
|
1693
|
+
if (wr.status === "fulfilled" && wr.value.html) {
|
|
1694
|
+
allHtmls.push(wr.value.html);
|
|
1695
|
+
successPages.push(`(wayback) ${domain}${wr.value.path}`);
|
|
1696
|
+
}
|
|
1697
|
+
}
|
|
1698
|
+
}
|
|
1630
1699
|
const allEmails = /* @__PURE__ */ new Set();
|
|
1631
1700
|
const allPhones = /* @__PURE__ */ new Set();
|
|
1632
1701
|
const allLinkedIn = /* @__PURE__ */ new Set();
|