@mgsoftwarebv/mg-dashboard-mcp 2.6.0 → 2.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +424 -7
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -723,7 +723,7 @@ var AGENT_TOOLS = [
|
|
|
723
723
|
},
|
|
724
724
|
{
|
|
725
725
|
name: "web-fetch",
|
|
726
|
-
description: "Fetch a web page and return its text content (HTML tags stripped).
|
|
726
|
+
description: "Fetch a web page and return its text content (HTML tags stripped). Automatically extracts emails and phone numbers found on the page. Returns at most 15000 characters of cleaned text.",
|
|
727
727
|
inputSchema: {
|
|
728
728
|
type: "object",
|
|
729
729
|
properties: {
|
|
@@ -732,6 +732,28 @@ var AGENT_TOOLS = [
|
|
|
732
732
|
},
|
|
733
733
|
required: ["url"]
|
|
734
734
|
}
|
|
735
|
+
},
|
|
736
|
+
{
|
|
737
|
+
name: "web-find-contacts",
|
|
738
|
+
description: "POWERFUL contact finder. Crawls a company website (homepage + contact/about/team pages), extracts ALL emails, phone numbers, and LinkedIn URLs via regex + mailto/tel parsing. Also searches Google for the company email as fallback. Returns structured contact data. USE THIS for every lead instead of manually browsing contact pages.",
|
|
739
|
+
inputSchema: {
|
|
740
|
+
type: "object",
|
|
741
|
+
properties: {
|
|
742
|
+
url: {
|
|
743
|
+
type: "string",
|
|
744
|
+
description: 'Company website URL or domain (e.g. "https://example.nl" or "example.nl")'
|
|
745
|
+
},
|
|
746
|
+
company_name: {
|
|
747
|
+
type: "string",
|
|
748
|
+
description: "Company name (improves Google search accuracy)"
|
|
749
|
+
},
|
|
750
|
+
include_search: {
|
|
751
|
+
type: "boolean",
|
|
752
|
+
description: "Also search Google for emails (default: true)"
|
|
753
|
+
}
|
|
754
|
+
},
|
|
755
|
+
required: ["url"]
|
|
756
|
+
}
|
|
735
757
|
}
|
|
736
758
|
];
|
|
737
759
|
var AGENT_TOOL_NAMES = new Set(AGENT_TOOLS.map((t) => t.name));
|
|
@@ -747,7 +769,8 @@ var AGENT_TOOL_MODULE_MAP = {
|
|
|
747
769
|
"agent-save-email-draft": "agent_reporting",
|
|
748
770
|
"agent-complete-target": "agent_reporting",
|
|
749
771
|
"web-search": "agent_reporting",
|
|
750
|
-
"web-fetch": "agent_reporting"
|
|
772
|
+
"web-fetch": "agent_reporting",
|
|
773
|
+
"web-find-contacts": "agent_reporting"
|
|
751
774
|
};
|
|
752
775
|
function clamp(val, min, max) {
|
|
753
776
|
return Math.max(min, Math.min(max, val));
|
|
@@ -788,6 +811,242 @@ async function webSearch(query, maxResults) {
|
|
|
788
811
|
}
|
|
789
812
|
return results;
|
|
790
813
|
}
|
|
814
|
+
var EMAIL_REGEX = /[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}/g;
|
|
815
|
+
var PHONE_NL_REGEX = /(?:\+31|0)[\s\-.]?\(?\d{1,3}\)?[\s\-.]?\d{3,4}[\s\-.]?\d{2,4}/g;
|
|
816
|
+
var LINKEDIN_REGEX = /https?:\/\/(?:www\.)?linkedin\.com\/(?:in|company)\/[a-zA-Z0-9\-_%]+\/?/gi;
|
|
817
|
+
var NOISE_EMAIL_PATTERNS = [
|
|
818
|
+
/noreply@/i,
|
|
819
|
+
/no-reply@/i,
|
|
820
|
+
/mailer-daemon@/i,
|
|
821
|
+
/postmaster@/i,
|
|
822
|
+
/@example\./i,
|
|
823
|
+
/test@/i,
|
|
824
|
+
/@wix\.com$/i,
|
|
825
|
+
/@sentry\.io$/i,
|
|
826
|
+
/@wordpress\./i,
|
|
827
|
+
/@gravatar\.com$/i,
|
|
828
|
+
/@schema\.org$/i,
|
|
829
|
+
/@w3\.org$/i,
|
|
830
|
+
/@facebook\.com$/i,
|
|
831
|
+
/@google\.com$/i,
|
|
832
|
+
/@twitter\.com$/i,
|
|
833
|
+
/@github\.com$/i,
|
|
834
|
+
/@cloudflare\./i,
|
|
835
|
+
/@vercel\./i,
|
|
836
|
+
/@netlify\./i,
|
|
837
|
+
/@cookiebot\./i,
|
|
838
|
+
/@hotjar\./i,
|
|
839
|
+
/@hubspot\./i,
|
|
840
|
+
/@mailchimp\./i,
|
|
841
|
+
/@googleusercontent/i,
|
|
842
|
+
/@gstatic/i,
|
|
843
|
+
/@youtube/i,
|
|
844
|
+
/@recaptcha/i,
|
|
845
|
+
/@privacy/i,
|
|
846
|
+
/@cookie/i,
|
|
847
|
+
/@gdpr/i,
|
|
848
|
+
/@dynamicweb\./i,
|
|
849
|
+
/@placeholder\./i,
|
|
850
|
+
/@yourcompany\./i,
|
|
851
|
+
/@company\./i,
|
|
852
|
+
/smith@/i,
|
|
853
|
+
/doe@/i,
|
|
854
|
+
/demo@/i,
|
|
855
|
+
/sample@/i,
|
|
856
|
+
/naam@/i,
|
|
857
|
+
/voorbeeld/i,
|
|
858
|
+
/your-?email/i,
|
|
859
|
+
/email@/i,
|
|
860
|
+
/@domein\./i,
|
|
861
|
+
/@bedrijf\./i,
|
|
862
|
+
/@domain\./i,
|
|
863
|
+
/@sentry/i,
|
|
864
|
+
/@wixpress/i,
|
|
865
|
+
/@lieferkassen/i,
|
|
866
|
+
/john@/i,
|
|
867
|
+
/jane@/i
|
|
868
|
+
];
|
|
869
|
+
var CONTACT_PATH_KEYWORDS = [
|
|
870
|
+
"contact",
|
|
871
|
+
"about",
|
|
872
|
+
"over-ons",
|
|
873
|
+
"over",
|
|
874
|
+
"team",
|
|
875
|
+
"medewerker",
|
|
876
|
+
"impressum",
|
|
877
|
+
"imprint",
|
|
878
|
+
"wie-zijn",
|
|
879
|
+
"ons-team",
|
|
880
|
+
"werknemers",
|
|
881
|
+
"organisatie",
|
|
882
|
+
"zakelijk",
|
|
883
|
+
"bedrijfsinfo",
|
|
884
|
+
"neem-contact",
|
|
885
|
+
"footer"
|
|
886
|
+
];
|
|
887
|
+
var CONTACT_PATHS_TO_TRY = [
|
|
888
|
+
"/contact",
|
|
889
|
+
"/over-ons",
|
|
890
|
+
"/about",
|
|
891
|
+
"/about-us",
|
|
892
|
+
"/team",
|
|
893
|
+
"/contact-us",
|
|
894
|
+
"/over",
|
|
895
|
+
"/wie-zijn-wij",
|
|
896
|
+
"/ons-team",
|
|
897
|
+
"/zakelijk/over-ons",
|
|
898
|
+
"/medewerkers",
|
|
899
|
+
"/organisatie",
|
|
900
|
+
"/bedrijfsinformatie",
|
|
901
|
+
"/impressum"
|
|
902
|
+
];
|
|
903
|
+
function decodeUnicodeEscapes(text) {
|
|
904
|
+
return text.replace(
|
|
905
|
+
/\\u([0-9a-fA-F]{4})/g,
|
|
906
|
+
(_, hex) => String.fromCharCode(parseInt(hex, 16))
|
|
907
|
+
);
|
|
908
|
+
}
|
|
909
|
+
function decodeHtmlEntities(text) {
|
|
910
|
+
return text.replace(/&#(\d+);/g, (_, num) => String.fromCharCode(parseInt(num))).replace(
|
|
911
|
+
/&#x([0-9a-fA-F]+);/g,
|
|
912
|
+
(_, hex) => String.fromCharCode(parseInt(hex, 16))
|
|
913
|
+
);
|
|
914
|
+
}
|
|
915
|
+
function extractEmailsFromHtml(html) {
|
|
916
|
+
const emails = /* @__PURE__ */ new Set();
|
|
917
|
+
const decoded = decodeHtmlEntities(decodeUnicodeEscapes(html));
|
|
918
|
+
const mailtoRegex = /mailto:([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})/gi;
|
|
919
|
+
let m;
|
|
920
|
+
for (const src of [html, decoded]) {
|
|
921
|
+
while ((m = mailtoRegex.exec(src)) !== null) emails.add(m[1].toLowerCase());
|
|
922
|
+
}
|
|
923
|
+
const noStyles = decoded.replace(/<style[\s\S]*?<\/style>/gi, "");
|
|
924
|
+
const textEmails = noStyles.match(EMAIL_REGEX) || [];
|
|
925
|
+
for (const e of textEmails) {
|
|
926
|
+
const lower = e.toLowerCase();
|
|
927
|
+
if (!/\.(png|jpg|jpeg|gif|svg|webp|css|js|woff|ttf|eot|ico)$/i.test(lower)) {
|
|
928
|
+
emails.add(lower);
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
return [...emails].filter((e) => !NOISE_EMAIL_PATTERNS.some((p) => p.test(e)));
|
|
932
|
+
}
|
|
933
|
+
function extractPhonesFromHtml(html) {
|
|
934
|
+
const phones = /* @__PURE__ */ new Set();
|
|
935
|
+
const decoded = decodeHtmlEntities(decodeUnicodeEscapes(html));
|
|
936
|
+
const telRegex = /href="tel:([^"]+)"/gi;
|
|
937
|
+
let m;
|
|
938
|
+
for (const src of [html, decoded]) {
|
|
939
|
+
while ((m = telRegex.exec(src)) !== null) {
|
|
940
|
+
const clean = m[1].replace(/[\s\-().]/g, "");
|
|
941
|
+
if (clean.length >= 10 && clean.length <= 14) phones.add(m[1].trim());
|
|
942
|
+
}
|
|
943
|
+
}
|
|
944
|
+
const stripped = decoded.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]+>/g, " ");
|
|
945
|
+
const textPhones = stripped.match(PHONE_NL_REGEX) || [];
|
|
946
|
+
for (const p of textPhones) {
|
|
947
|
+
const clean = p.replace(/[\s\-().]/g, "");
|
|
948
|
+
if (clean.length >= 10 && clean.length <= 14) phones.add(p.trim());
|
|
949
|
+
}
|
|
950
|
+
return [...phones].slice(0, 10);
|
|
951
|
+
}
|
|
952
|
+
function extractLinkedInFromHtml(html) {
|
|
953
|
+
const links = /* @__PURE__ */ new Set();
|
|
954
|
+
const matches = html.match(LINKEDIN_REGEX) || [];
|
|
955
|
+
for (const l of matches) links.add(l.replace(/\/$/, ""));
|
|
956
|
+
return [...links];
|
|
957
|
+
}
|
|
958
|
+
function discoverContactPages(html, baseUrl) {
|
|
959
|
+
let base;
|
|
960
|
+
try {
|
|
961
|
+
base = new URL(baseUrl);
|
|
962
|
+
} catch {
|
|
963
|
+
return [];
|
|
964
|
+
}
|
|
965
|
+
const pages = /* @__PURE__ */ new Set();
|
|
966
|
+
for (const path of CONTACT_PATHS_TO_TRY) {
|
|
967
|
+
pages.add(`${base.origin}${path}`);
|
|
968
|
+
}
|
|
969
|
+
const linkRegex = /<a\s[^>]*href="([^"#]*)"[^>]*>/gi;
|
|
970
|
+
let m;
|
|
971
|
+
while ((m = linkRegex.exec(html)) !== null) {
|
|
972
|
+
try {
|
|
973
|
+
const url = new URL(m[1], baseUrl);
|
|
974
|
+
if (url.hostname !== base.hostname) continue;
|
|
975
|
+
const path = url.pathname.toLowerCase();
|
|
976
|
+
if (CONTACT_PATH_KEYWORDS.some((kw) => path.includes(kw))) {
|
|
977
|
+
pages.add(url.origin + url.pathname);
|
|
978
|
+
}
|
|
979
|
+
} catch {
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
pages.delete(base.origin + base.pathname);
|
|
983
|
+
return [...pages].slice(0, 10);
|
|
984
|
+
}
|
|
985
|
+
function guessCommonEmails(domain) {
|
|
986
|
+
const d = domain.replace(/^www\./, "");
|
|
987
|
+
return [`info@${d}`, `contact@${d}`, `hello@${d}`, `administratie@${d}`, `verkoop@${d}`];
|
|
988
|
+
}
|
|
989
|
+
var BOT_CHALLENGE_INDICATORS = [
|
|
990
|
+
"sgcaptcha",
|
|
991
|
+
"challenge-platform",
|
|
992
|
+
"cf-browser-verification",
|
|
993
|
+
"Just a moment",
|
|
994
|
+
"Checking your browser",
|
|
995
|
+
"Enable JavaScript and cookies",
|
|
996
|
+
"Attention Required",
|
|
997
|
+
"DDoS protection by"
|
|
998
|
+
];
|
|
999
|
+
function isBotChallengePage(html) {
|
|
1000
|
+
if (html.length > 2e3) return false;
|
|
1001
|
+
const lower = html.toLowerCase();
|
|
1002
|
+
return BOT_CHALLENGE_INDICATORS.some((ind) => lower.includes(ind.toLowerCase()));
|
|
1003
|
+
}
|
|
1004
|
+
async function fetchRawHtml(url, timeoutMs = 1e4) {
|
|
1005
|
+
const controller = new AbortController();
|
|
1006
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
1007
|
+
try {
|
|
1008
|
+
const res = await fetch(url, {
|
|
1009
|
+
headers: {
|
|
1010
|
+
"User-Agent": WEB_USER_AGENT,
|
|
1011
|
+
"Accept": "text/html,application/xhtml+xml",
|
|
1012
|
+
"Accept-Language": "nl,en;q=0.9"
|
|
1013
|
+
},
|
|
1014
|
+
redirect: "follow",
|
|
1015
|
+
signal: controller.signal
|
|
1016
|
+
});
|
|
1017
|
+
const ct = res.headers.get("content-type") || "";
|
|
1018
|
+
if (!ct.includes("text/html") && !ct.includes("text/plain") && !ct.includes("xhtml")) return null;
|
|
1019
|
+
if (!res.ok && res.status !== 403) return null;
|
|
1020
|
+
const html = await res.text();
|
|
1021
|
+
if (isBotChallengePage(html)) return null;
|
|
1022
|
+
return html;
|
|
1023
|
+
} catch {
|
|
1024
|
+
return null;
|
|
1025
|
+
} finally {
|
|
1026
|
+
clearTimeout(timer);
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
async function fetchWaybackHtml(url, timeoutMs = 15e3) {
|
|
1030
|
+
const cleanUrl = url.replace(/^https?:\/\//, "");
|
|
1031
|
+
const wbUrl = `https://web.archive.org/web/2024/${cleanUrl}`;
|
|
1032
|
+
const controller = new AbortController();
|
|
1033
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
1034
|
+
try {
|
|
1035
|
+
const res = await fetch(wbUrl, {
|
|
1036
|
+
headers: { "User-Agent": WEB_USER_AGENT },
|
|
1037
|
+
redirect: "follow",
|
|
1038
|
+
signal: controller.signal
|
|
1039
|
+
});
|
|
1040
|
+
if (!res.ok) return null;
|
|
1041
|
+
const html = await res.text();
|
|
1042
|
+
if (html.length < 500) return null;
|
|
1043
|
+
return html;
|
|
1044
|
+
} catch {
|
|
1045
|
+
return null;
|
|
1046
|
+
} finally {
|
|
1047
|
+
clearTimeout(timer);
|
|
1048
|
+
}
|
|
1049
|
+
}
|
|
791
1050
|
async function webFetch(url, extractLinks) {
|
|
792
1051
|
const controller = new AbortController();
|
|
793
1052
|
const timeout = setTimeout(() => controller.abort(), 15e3);
|
|
@@ -824,7 +1083,7 @@ async function webFetch(url, extractLinks) {
|
|
|
824
1083
|
if (text.length > MAX_TEXT) {
|
|
825
1084
|
text = text.slice(0, MAX_TEXT) + "\n\n[... truncated at 15000 chars ...]";
|
|
826
1085
|
}
|
|
827
|
-
return { text, links };
|
|
1086
|
+
return { text, rawHtml: html, links };
|
|
828
1087
|
} finally {
|
|
829
1088
|
clearTimeout(timeout);
|
|
830
1089
|
}
|
|
@@ -1308,15 +1567,14 @@ ${output}` }]
|
|
|
1308
1567
|
const resultsCount = clamp(Number(args2.results_count) || 0, 0, 9999);
|
|
1309
1568
|
if (!targetId) throw new Error("target_id is required");
|
|
1310
1569
|
const { error } = await supabase2.from("lead_generation_target").update({
|
|
1311
|
-
status: "completed",
|
|
1312
1570
|
results_count: resultsCount,
|
|
1313
|
-
|
|
1571
|
+
assigned_workspace_id: null
|
|
1314
1572
|
}).eq("id", targetId);
|
|
1315
|
-
if (error) throw new Error(`Failed to
|
|
1573
|
+
if (error) throw new Error(`Failed to update target: ${error.message}`);
|
|
1316
1574
|
return {
|
|
1317
1575
|
content: [{
|
|
1318
1576
|
type: "text",
|
|
1319
|
-
text: `Target ${targetId}
|
|
1577
|
+
text: `Target ${targetId} updated: ${resultsCount} leads found. Workspace released.`
|
|
1320
1578
|
}]
|
|
1321
1579
|
};
|
|
1322
1580
|
}
|
|
@@ -1362,10 +1620,169 @@ ${result.text}`;
|
|
|
1362
1620
|
|
|
1363
1621
|
--- Links (${result.links.length} found, showing max 50) ---
|
|
1364
1622
|
${linkList}`;
|
|
1623
|
+
}
|
|
1624
|
+
const pageEmails = extractEmailsFromHtml(result.rawHtml);
|
|
1625
|
+
const pagePhones = extractPhonesFromHtml(result.rawHtml);
|
|
1626
|
+
const pageLinkedIn = extractLinkedInFromHtml(result.rawHtml);
|
|
1627
|
+
if (pageEmails.length > 0 || pagePhones.length > 0 || pageLinkedIn.length > 0) {
|
|
1628
|
+
text += "\n\n--- Auto-extracted Contact Info ---";
|
|
1629
|
+
if (pageEmails.length > 0) text += `
|
|
1630
|
+
Emails: ${pageEmails.join(", ")}`;
|
|
1631
|
+
if (pagePhones.length > 0) text += `
|
|
1632
|
+
Phones: ${pagePhones.join(", ")}`;
|
|
1633
|
+
if (pageLinkedIn.length > 0) text += `
|
|
1634
|
+
LinkedIn: ${pageLinkedIn.join(", ")}`;
|
|
1365
1635
|
}
|
|
1366
1636
|
return { content: [{ type: "text", text }] };
|
|
1367
1637
|
}
|
|
1368
1638
|
// -----------------------------------------------------------------
|
|
1639
|
+
// Web Find Contacts — SOTA multi-page crawler + email extraction
|
|
1640
|
+
// -----------------------------------------------------------------
|
|
1641
|
+
case "web-find-contacts": {
|
|
1642
|
+
const inputUrl = sanitizeString(args2.url, 2e3);
|
|
1643
|
+
if (!inputUrl) throw new Error("url is required");
|
|
1644
|
+
const companyName = sanitizeString(args2.company_name, 500);
|
|
1645
|
+
const includeSearch = args2.include_search !== false;
|
|
1646
|
+
const fullUrl = inputUrl.startsWith("http") ? inputUrl : `https://${inputUrl}`;
|
|
1647
|
+
let baseUrl;
|
|
1648
|
+
try {
|
|
1649
|
+
baseUrl = new URL(fullUrl);
|
|
1650
|
+
} catch {
|
|
1651
|
+
throw new Error(`Invalid URL: ${inputUrl}`);
|
|
1652
|
+
}
|
|
1653
|
+
const domain = baseUrl.hostname.replace(/^www\./, "");
|
|
1654
|
+
const urlsToTry = [fullUrl];
|
|
1655
|
+
if (!fullUrl.includes("www.")) urlsToTry.push(fullUrl.replace("://", "://www."));
|
|
1656
|
+
if (fullUrl.startsWith("https")) {
|
|
1657
|
+
urlsToTry.push(fullUrl.replace("https", "http"));
|
|
1658
|
+
if (!fullUrl.includes("www.")) urlsToTry.push(fullUrl.replace("https://", "http://www."));
|
|
1659
|
+
}
|
|
1660
|
+
let html = null;
|
|
1661
|
+
let usedWayback = false;
|
|
1662
|
+
for (const tryUrl of urlsToTry) {
|
|
1663
|
+
html = await fetchRawHtml(tryUrl, 12e3);
|
|
1664
|
+
if (html) break;
|
|
1665
|
+
}
|
|
1666
|
+
if (!html) {
|
|
1667
|
+
html = await fetchWaybackHtml(`https://${domain}`, 15e3);
|
|
1668
|
+
if (html) usedWayback = true;
|
|
1669
|
+
}
|
|
1670
|
+
if (!html) throw new Error(`Could not fetch ${fullUrl} (site may be down or blocking)`);
|
|
1671
|
+
const contactPages = usedWayback ? [] : discoverContactPages(html, fullUrl);
|
|
1672
|
+
const pagePromises = contactPages.map(async (pageUrl) => {
|
|
1673
|
+
const pageHtml = await fetchRawHtml(pageUrl, 8e3);
|
|
1674
|
+
return { url: pageUrl, html: pageHtml };
|
|
1675
|
+
});
|
|
1676
|
+
const pageResults = await Promise.allSettled(pagePromises);
|
|
1677
|
+
const successPages = [usedWayback ? `(wayback) ${domain}` : fullUrl];
|
|
1678
|
+
const allHtmls = [html];
|
|
1679
|
+
for (const result of pageResults) {
|
|
1680
|
+
if (result.status === "fulfilled" && result.value.html) {
|
|
1681
|
+
allHtmls.push(result.value.html);
|
|
1682
|
+
successPages.push(result.value.url);
|
|
1683
|
+
}
|
|
1684
|
+
}
|
|
1685
|
+
if (usedWayback) {
|
|
1686
|
+
const waybackContactPaths = ["/contact", "/over-ons", "/about", "/team"];
|
|
1687
|
+
const wbPromises = waybackContactPaths.map(async (path) => {
|
|
1688
|
+
const wbHtml = await fetchWaybackHtml(`https://${domain}${path}`, 12e3);
|
|
1689
|
+
return { path, html: wbHtml };
|
|
1690
|
+
});
|
|
1691
|
+
const wbResults = await Promise.allSettled(wbPromises);
|
|
1692
|
+
for (const wr of wbResults) {
|
|
1693
|
+
if (wr.status === "fulfilled" && wr.value.html) {
|
|
1694
|
+
allHtmls.push(wr.value.html);
|
|
1695
|
+
successPages.push(`(wayback) ${domain}${wr.value.path}`);
|
|
1696
|
+
}
|
|
1697
|
+
}
|
|
1698
|
+
}
|
|
1699
|
+
const allEmails = /* @__PURE__ */ new Set();
|
|
1700
|
+
const allPhones = /* @__PURE__ */ new Set();
|
|
1701
|
+
const allLinkedIn = /* @__PURE__ */ new Set();
|
|
1702
|
+
for (const pageHtml of allHtmls) {
|
|
1703
|
+
for (const e of extractEmailsFromHtml(pageHtml)) allEmails.add(e);
|
|
1704
|
+
for (const p of extractPhonesFromHtml(pageHtml)) allPhones.add(p);
|
|
1705
|
+
for (const l of extractLinkedInFromHtml(pageHtml)) allLinkedIn.add(l);
|
|
1706
|
+
}
|
|
1707
|
+
let searchNote = "";
|
|
1708
|
+
if (includeSearch) {
|
|
1709
|
+
try {
|
|
1710
|
+
const queries = companyName ? [`"${companyName}" "${domain}" email`, `"@${domain}" email`] : [`"${domain}" email contact`, `"@${domain}" email`, `site:${domain} "@"`];
|
|
1711
|
+
let totalSearchResults = 0;
|
|
1712
|
+
for (const searchQuery of queries) {
|
|
1713
|
+
const searchResults = await webSearch(searchQuery, 8);
|
|
1714
|
+
totalSearchResults += searchResults.length;
|
|
1715
|
+
for (const r of searchResults) {
|
|
1716
|
+
const combined = `${r.title} ${r.snippet}`;
|
|
1717
|
+
const decoded = decodeHtmlEntities(decodeUnicodeEscapes(combined));
|
|
1718
|
+
const snippetEmails = decoded.match(EMAIL_REGEX) || [];
|
|
1719
|
+
for (const e of snippetEmails) {
|
|
1720
|
+
const lower = e.toLowerCase();
|
|
1721
|
+
const domainBase = domain.split(".")[0] ?? domain;
|
|
1722
|
+
if (lower.includes(domainBase) && !NOISE_EMAIL_PATTERNS.some((p) => p.test(lower))) {
|
|
1723
|
+
allEmails.add(lower);
|
|
1724
|
+
}
|
|
1725
|
+
}
|
|
1726
|
+
const snippetPhones = decoded.match(PHONE_NL_REGEX) || [];
|
|
1727
|
+
for (const p of snippetPhones) {
|
|
1728
|
+
const clean = p.replace(/[\s\-().]/g, "");
|
|
1729
|
+
if (clean.length >= 10 && clean.length <= 14) allPhones.add(p.trim());
|
|
1730
|
+
}
|
|
1731
|
+
const snippetLi = decoded.match(LINKEDIN_REGEX) || [];
|
|
1732
|
+
for (const l of snippetLi) allLinkedIn.add(l.replace(/\/$/, ""));
|
|
1733
|
+
}
|
|
1734
|
+
if (allEmails.size > 0) break;
|
|
1735
|
+
}
|
|
1736
|
+
searchNote = ` + ${totalSearchResults} search results`;
|
|
1737
|
+
} catch {
|
|
1738
|
+
searchNote = " (search failed)";
|
|
1739
|
+
}
|
|
1740
|
+
}
|
|
1741
|
+
const foundEmails = [...allEmails];
|
|
1742
|
+
const generalPatterns = /^(info|contact|hello|admin|office|receptie|secretariaat|verkoop|administratie|support|service|boekingen|reserveringen)@/i;
|
|
1743
|
+
const generalEmails = foundEmails.filter((e) => generalPatterns.test(e));
|
|
1744
|
+
const personalEmails = foundEmails.filter((e) => !generalPatterns.test(e));
|
|
1745
|
+
const guessed = guessCommonEmails(domain);
|
|
1746
|
+
const newGuesses = guessed.filter((g) => !allEmails.has(g));
|
|
1747
|
+
const lines = [
|
|
1748
|
+
`=== CONTACT SCAN: ${domain} ===`,
|
|
1749
|
+
`Pages scanned: ${successPages.length}${searchNote}`,
|
|
1750
|
+
""
|
|
1751
|
+
];
|
|
1752
|
+
if (foundEmails.length > 0) {
|
|
1753
|
+
lines.push(`EMAILS FOUND (${foundEmails.length}):`);
|
|
1754
|
+
if (generalEmails.length > 0) {
|
|
1755
|
+
lines.push(" General:");
|
|
1756
|
+
for (const e of generalEmails) lines.push(` * ${e}`);
|
|
1757
|
+
}
|
|
1758
|
+
if (personalEmails.length > 0) {
|
|
1759
|
+
lines.push(" Personal/Department:");
|
|
1760
|
+
for (const e of personalEmails) lines.push(` * ${e}`);
|
|
1761
|
+
}
|
|
1762
|
+
} else {
|
|
1763
|
+
lines.push("EMAILS FOUND: None on website");
|
|
1764
|
+
lines.push("SUGGESTED EMAILS (common patterns, verify before use):");
|
|
1765
|
+
for (const e of newGuesses) lines.push(` ? ${e}`);
|
|
1766
|
+
}
|
|
1767
|
+
lines.push("");
|
|
1768
|
+
if ([...allPhones].length > 0) {
|
|
1769
|
+
lines.push(`PHONE NUMBERS (${allPhones.size}):`);
|
|
1770
|
+
for (const p of allPhones) lines.push(` ${p}`);
|
|
1771
|
+
} else {
|
|
1772
|
+
lines.push("PHONE NUMBERS: None found");
|
|
1773
|
+
}
|
|
1774
|
+
lines.push("");
|
|
1775
|
+
if ([...allLinkedIn].length > 0) {
|
|
1776
|
+
lines.push(`LINKEDIN (${allLinkedIn.size}):`);
|
|
1777
|
+
for (const l of allLinkedIn) lines.push(` ${l}`);
|
|
1778
|
+
}
|
|
1779
|
+
lines.push("", "PAGES CHECKED:");
|
|
1780
|
+
for (const p of successPages) lines.push(` [OK] ${p}`);
|
|
1781
|
+
const failedPages = contactPages.filter((p) => !successPages.includes(p));
|
|
1782
|
+
for (const p of failedPages) lines.push(` [--] ${p}`);
|
|
1783
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
1784
|
+
}
|
|
1785
|
+
// -----------------------------------------------------------------
|
|
1369
1786
|
default:
|
|
1370
1787
|
return { content: [{ type: "text", text: `Unknown agent tool: ${name}` }] };
|
|
1371
1788
|
}
|