@mgsoftwarebv/mg-dashboard-mcp 2.6.0 → 2.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -723,7 +723,7 @@ var AGENT_TOOLS = [
723
723
  },
724
724
  {
725
725
  name: "web-fetch",
726
- description: "Fetch a web page and return its text content (HTML tags stripped). Use this to read company websites, extract contact info, about pages, etc. Returns at most 15000 characters of cleaned text.",
726
+ description: "Fetch a web page and return its text content (HTML tags stripped). Automatically extracts emails and phone numbers found on the page. Returns at most 15000 characters of cleaned text.",
727
727
  inputSchema: {
728
728
  type: "object",
729
729
  properties: {
@@ -732,6 +732,28 @@ var AGENT_TOOLS = [
732
732
  },
733
733
  required: ["url"]
734
734
  }
735
+ },
736
+ {
737
+ name: "web-find-contacts",
738
+ description: "POWERFUL contact finder. Crawls a company website (homepage + contact/about/team pages), extracts ALL emails, phone numbers, and LinkedIn URLs via regex + mailto/tel parsing. Also searches Google for the company email as fallback. Returns structured contact data. USE THIS for every lead instead of manually browsing contact pages.",
739
+ inputSchema: {
740
+ type: "object",
741
+ properties: {
742
+ url: {
743
+ type: "string",
744
+ description: 'Company website URL or domain (e.g. "https://example.nl" or "example.nl")'
745
+ },
746
+ company_name: {
747
+ type: "string",
748
+ description: "Company name (improves Google search accuracy)"
749
+ },
750
+ include_search: {
751
+ type: "boolean",
752
+ description: "Also search Google for emails (default: true)"
753
+ }
754
+ },
755
+ required: ["url"]
756
+ }
735
757
  }
736
758
  ];
737
759
  var AGENT_TOOL_NAMES = new Set(AGENT_TOOLS.map((t) => t.name));
@@ -747,7 +769,8 @@ var AGENT_TOOL_MODULE_MAP = {
747
769
  "agent-save-email-draft": "agent_reporting",
748
770
  "agent-complete-target": "agent_reporting",
749
771
  "web-search": "agent_reporting",
750
- "web-fetch": "agent_reporting"
772
+ "web-fetch": "agent_reporting",
773
+ "web-find-contacts": "agent_reporting"
751
774
  };
752
775
  function clamp(val, min, max) {
753
776
  return Math.max(min, Math.min(max, val));
@@ -788,6 +811,242 @@ async function webSearch(query, maxResults) {
788
811
  }
789
812
  return results;
790
813
  }
814
+ var EMAIL_REGEX = /[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}/g;
815
+ var PHONE_NL_REGEX = /(?:\+31|0)[\s\-.]?\(?\d{1,3}\)?[\s\-.]?\d{3,4}[\s\-.]?\d{2,4}/g;
816
+ var LINKEDIN_REGEX = /https?:\/\/(?:www\.)?linkedin\.com\/(?:in|company)\/[a-zA-Z0-9\-_%]+\/?/gi;
817
+ var NOISE_EMAIL_PATTERNS = [
818
+ /noreply@/i,
819
+ /no-reply@/i,
820
+ /mailer-daemon@/i,
821
+ /postmaster@/i,
822
+ /@example\./i,
823
+ /test@/i,
824
+ /@wix\.com$/i,
825
+ /@sentry\.io$/i,
826
+ /@wordpress\./i,
827
+ /@gravatar\.com$/i,
828
+ /@schema\.org$/i,
829
+ /@w3\.org$/i,
830
+ /@facebook\.com$/i,
831
+ /@google\.com$/i,
832
+ /@twitter\.com$/i,
833
+ /@github\.com$/i,
834
+ /@cloudflare\./i,
835
+ /@vercel\./i,
836
+ /@netlify\./i,
837
+ /@cookiebot\./i,
838
+ /@hotjar\./i,
839
+ /@hubspot\./i,
840
+ /@mailchimp\./i,
841
+ /@googleusercontent/i,
842
+ /@gstatic/i,
843
+ /@youtube/i,
844
+ /@recaptcha/i,
845
+ /@privacy/i,
846
+ /@cookie/i,
847
+ /@gdpr/i,
848
+ /@dynamicweb\./i,
849
+ /@placeholder\./i,
850
+ /@yourcompany\./i,
851
+ /@company\./i,
852
+ /smith@/i,
853
+ /doe@/i,
854
+ /demo@/i,
855
+ /sample@/i,
856
+ /naam@/i,
857
+ /voorbeeld/i,
858
+ /your-?email/i,
859
+ /email@/i,
860
+ /@domein\./i,
861
+ /@bedrijf\./i,
862
+ /@domain\./i,
863
+ /@sentry/i,
864
+ /@wixpress/i,
865
+ /@lieferkassen/i,
866
+ /john@/i,
867
+ /jane@/i
868
+ ];
869
+ var CONTACT_PATH_KEYWORDS = [
870
+ "contact",
871
+ "about",
872
+ "over-ons",
873
+ "over",
874
+ "team",
875
+ "medewerker",
876
+ "impressum",
877
+ "imprint",
878
+ "wie-zijn",
879
+ "ons-team",
880
+ "werknemers",
881
+ "organisatie",
882
+ "zakelijk",
883
+ "bedrijfsinfo",
884
+ "neem-contact",
885
+ "footer"
886
+ ];
887
+ var CONTACT_PATHS_TO_TRY = [
888
+ "/contact",
889
+ "/over-ons",
890
+ "/about",
891
+ "/about-us",
892
+ "/team",
893
+ "/contact-us",
894
+ "/over",
895
+ "/wie-zijn-wij",
896
+ "/ons-team",
897
+ "/zakelijk/over-ons",
898
+ "/medewerkers",
899
+ "/organisatie",
900
+ "/bedrijfsinformatie",
901
+ "/impressum"
902
+ ];
903
+ function decodeUnicodeEscapes(text) {
904
+ return text.replace(
905
+ /\\u([0-9a-fA-F]{4})/g,
906
+ (_, hex) => String.fromCharCode(parseInt(hex, 16))
907
+ );
908
+ }
909
+ function decodeHtmlEntities(text) {
910
+ return text.replace(/&#(\d+);/g, (_, num) => String.fromCharCode(parseInt(num))).replace(
911
+ /&#x([0-9a-fA-F]+);/g,
912
+ (_, hex) => String.fromCharCode(parseInt(hex, 16))
913
+ );
914
+ }
915
+ function extractEmailsFromHtml(html) {
916
+ const emails = /* @__PURE__ */ new Set();
917
+ const decoded = decodeHtmlEntities(decodeUnicodeEscapes(html));
918
+ const mailtoRegex = /mailto:([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})/gi;
919
+ let m;
920
+ for (const src of [html, decoded]) {
921
+ while ((m = mailtoRegex.exec(src)) !== null) emails.add(m[1].toLowerCase());
922
+ }
923
+ const noStyles = decoded.replace(/<style[\s\S]*?<\/style>/gi, "");
924
+ const textEmails = noStyles.match(EMAIL_REGEX) || [];
925
+ for (const e of textEmails) {
926
+ const lower = e.toLowerCase();
927
+ if (!/\.(png|jpg|jpeg|gif|svg|webp|css|js|woff|ttf|eot|ico)$/i.test(lower)) {
928
+ emails.add(lower);
929
+ }
930
+ }
931
+ return [...emails].filter((e) => !NOISE_EMAIL_PATTERNS.some((p) => p.test(e)));
932
+ }
933
+ function extractPhonesFromHtml(html) {
934
+ const phones = /* @__PURE__ */ new Set();
935
+ const decoded = decodeHtmlEntities(decodeUnicodeEscapes(html));
936
+ const telRegex = /href="tel:([^"]+)"/gi;
937
+ let m;
938
+ for (const src of [html, decoded]) {
939
+ while ((m = telRegex.exec(src)) !== null) {
940
+ const clean = m[1].replace(/[\s\-().]/g, "");
941
+ if (clean.length >= 10 && clean.length <= 14) phones.add(m[1].trim());
942
+ }
943
+ }
944
+ const stripped = decoded.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]+>/g, " ");
945
+ const textPhones = stripped.match(PHONE_NL_REGEX) || [];
946
+ for (const p of textPhones) {
947
+ const clean = p.replace(/[\s\-().]/g, "");
948
+ if (clean.length >= 10 && clean.length <= 14) phones.add(p.trim());
949
+ }
950
+ return [...phones].slice(0, 10);
951
+ }
952
+ function extractLinkedInFromHtml(html) {
953
+ const links = /* @__PURE__ */ new Set();
954
+ const matches = html.match(LINKEDIN_REGEX) || [];
955
+ for (const l of matches) links.add(l.replace(/\/$/, ""));
956
+ return [...links];
957
+ }
958
+ function discoverContactPages(html, baseUrl) {
959
+ let base;
960
+ try {
961
+ base = new URL(baseUrl);
962
+ } catch {
963
+ return [];
964
+ }
965
+ const pages = /* @__PURE__ */ new Set();
966
+ for (const path of CONTACT_PATHS_TO_TRY) {
967
+ pages.add(`${base.origin}${path}`);
968
+ }
969
+ const linkRegex = /<a\s[^>]*href="([^"#]*)"[^>]*>/gi;
970
+ let m;
971
+ while ((m = linkRegex.exec(html)) !== null) {
972
+ try {
973
+ const url = new URL(m[1], baseUrl);
974
+ if (url.hostname !== base.hostname) continue;
975
+ const path = url.pathname.toLowerCase();
976
+ if (CONTACT_PATH_KEYWORDS.some((kw) => path.includes(kw))) {
977
+ pages.add(url.origin + url.pathname);
978
+ }
979
+ } catch {
980
+ }
981
+ }
982
+ pages.delete(base.origin + base.pathname);
983
+ return [...pages].slice(0, 10);
984
+ }
985
+ function guessCommonEmails(domain) {
986
+ const d = domain.replace(/^www\./, "");
987
+ return [`info@${d}`, `contact@${d}`, `hello@${d}`, `administratie@${d}`, `verkoop@${d}`];
988
+ }
989
+ var BOT_CHALLENGE_INDICATORS = [
990
+ "sgcaptcha",
991
+ "challenge-platform",
992
+ "cf-browser-verification",
993
+ "Just a moment",
994
+ "Checking your browser",
995
+ "Enable JavaScript and cookies",
996
+ "Attention Required",
997
+ "DDoS protection by"
998
+ ];
999
+ function isBotChallengePage(html) {
1000
+ if (html.length > 2e3) return false;
1001
+ const lower = html.toLowerCase();
1002
+ return BOT_CHALLENGE_INDICATORS.some((ind) => lower.includes(ind.toLowerCase()));
1003
+ }
1004
+ async function fetchRawHtml(url, timeoutMs = 1e4) {
1005
+ const controller = new AbortController();
1006
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
1007
+ try {
1008
+ const res = await fetch(url, {
1009
+ headers: {
1010
+ "User-Agent": WEB_USER_AGENT,
1011
+ "Accept": "text/html,application/xhtml+xml",
1012
+ "Accept-Language": "nl,en;q=0.9"
1013
+ },
1014
+ redirect: "follow",
1015
+ signal: controller.signal
1016
+ });
1017
+ const ct = res.headers.get("content-type") || "";
1018
+ if (!ct.includes("text/html") && !ct.includes("text/plain") && !ct.includes("xhtml")) return null;
1019
+ if (!res.ok && res.status !== 403) return null;
1020
+ const html = await res.text();
1021
+ if (isBotChallengePage(html)) return null;
1022
+ return html;
1023
+ } catch {
1024
+ return null;
1025
+ } finally {
1026
+ clearTimeout(timer);
1027
+ }
1028
+ }
1029
+ async function fetchWaybackHtml(url, timeoutMs = 15e3) {
1030
+ const cleanUrl = url.replace(/^https?:\/\//, "");
1031
+ const wbUrl = `https://web.archive.org/web/2024/${cleanUrl}`;
1032
+ const controller = new AbortController();
1033
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
1034
+ try {
1035
+ const res = await fetch(wbUrl, {
1036
+ headers: { "User-Agent": WEB_USER_AGENT },
1037
+ redirect: "follow",
1038
+ signal: controller.signal
1039
+ });
1040
+ if (!res.ok) return null;
1041
+ const html = await res.text();
1042
+ if (html.length < 500) return null;
1043
+ return html;
1044
+ } catch {
1045
+ return null;
1046
+ } finally {
1047
+ clearTimeout(timer);
1048
+ }
1049
+ }
791
1050
  async function webFetch(url, extractLinks) {
792
1051
  const controller = new AbortController();
793
1052
  const timeout = setTimeout(() => controller.abort(), 15e3);
@@ -824,7 +1083,7 @@ async function webFetch(url, extractLinks) {
824
1083
  if (text.length > MAX_TEXT) {
825
1084
  text = text.slice(0, MAX_TEXT) + "\n\n[... truncated at 15000 chars ...]";
826
1085
  }
827
- return { text, links };
1086
+ return { text, rawHtml: html, links };
828
1087
  } finally {
829
1088
  clearTimeout(timeout);
830
1089
  }
@@ -1308,15 +1567,14 @@ ${output}` }]
1308
1567
  const resultsCount = clamp(Number(args2.results_count) || 0, 0, 9999);
1309
1568
  if (!targetId) throw new Error("target_id is required");
1310
1569
  const { error } = await supabase2.from("lead_generation_target").update({
1311
- status: "completed",
1312
1570
  results_count: resultsCount,
1313
- completed_at: (/* @__PURE__ */ new Date()).toISOString()
1571
+ assigned_workspace_id: null
1314
1572
  }).eq("id", targetId);
1315
- if (error) throw new Error(`Failed to complete target: ${error.message}`);
1573
+ if (error) throw new Error(`Failed to update target: ${error.message}`);
1316
1574
  return {
1317
1575
  content: [{
1318
1576
  type: "text",
1319
- text: `Target ${targetId} marked completed. ${resultsCount} leads found.`
1577
+ text: `Target ${targetId} updated: ${resultsCount} leads found. Workspace released.`
1320
1578
  }]
1321
1579
  };
1322
1580
  }
@@ -1362,10 +1620,169 @@ ${result.text}`;
1362
1620
 
1363
1621
  --- Links (${result.links.length} found, showing max 50) ---
1364
1622
  ${linkList}`;
1623
+ }
1624
+ const pageEmails = extractEmailsFromHtml(result.rawHtml);
1625
+ const pagePhones = extractPhonesFromHtml(result.rawHtml);
1626
+ const pageLinkedIn = extractLinkedInFromHtml(result.rawHtml);
1627
+ if (pageEmails.length > 0 || pagePhones.length > 0 || pageLinkedIn.length > 0) {
1628
+ text += "\n\n--- Auto-extracted Contact Info ---";
1629
+ if (pageEmails.length > 0) text += `
1630
+ Emails: ${pageEmails.join(", ")}`;
1631
+ if (pagePhones.length > 0) text += `
1632
+ Phones: ${pagePhones.join(", ")}`;
1633
+ if (pageLinkedIn.length > 0) text += `
1634
+ LinkedIn: ${pageLinkedIn.join(", ")}`;
1365
1635
  }
1366
1636
  return { content: [{ type: "text", text }] };
1367
1637
  }
1368
1638
  // -----------------------------------------------------------------
1639
+ // Web Find Contacts — SOTA multi-page crawler + email extraction
1640
+ // -----------------------------------------------------------------
1641
+ case "web-find-contacts": {
1642
+ const inputUrl = sanitizeString(args2.url, 2e3);
1643
+ if (!inputUrl) throw new Error("url is required");
1644
+ const companyName = sanitizeString(args2.company_name, 500);
1645
+ const includeSearch = args2.include_search !== false;
1646
+ const fullUrl = inputUrl.startsWith("http") ? inputUrl : `https://${inputUrl}`;
1647
+ let baseUrl;
1648
+ try {
1649
+ baseUrl = new URL(fullUrl);
1650
+ } catch {
1651
+ throw new Error(`Invalid URL: ${inputUrl}`);
1652
+ }
1653
+ const domain = baseUrl.hostname.replace(/^www\./, "");
1654
+ const urlsToTry = [fullUrl];
1655
+ if (!fullUrl.includes("www.")) urlsToTry.push(fullUrl.replace("://", "://www."));
1656
+ if (fullUrl.startsWith("https")) {
1657
+ urlsToTry.push(fullUrl.replace("https", "http"));
1658
+ if (!fullUrl.includes("www.")) urlsToTry.push(fullUrl.replace("https://", "http://www."));
1659
+ }
1660
+ let html = null;
1661
+ let usedWayback = false;
1662
+ for (const tryUrl of urlsToTry) {
1663
+ html = await fetchRawHtml(tryUrl, 12e3);
1664
+ if (html) break;
1665
+ }
1666
+ if (!html) {
1667
+ html = await fetchWaybackHtml(`https://${domain}`, 15e3);
1668
+ if (html) usedWayback = true;
1669
+ }
1670
+ if (!html) throw new Error(`Could not fetch ${fullUrl} (site may be down or blocking)`);
1671
+ const contactPages = usedWayback ? [] : discoverContactPages(html, fullUrl);
1672
+ const pagePromises = contactPages.map(async (pageUrl) => {
1673
+ const pageHtml = await fetchRawHtml(pageUrl, 8e3);
1674
+ return { url: pageUrl, html: pageHtml };
1675
+ });
1676
+ const pageResults = await Promise.allSettled(pagePromises);
1677
+ const successPages = [usedWayback ? `(wayback) ${domain}` : fullUrl];
1678
+ const allHtmls = [html];
1679
+ for (const result of pageResults) {
1680
+ if (result.status === "fulfilled" && result.value.html) {
1681
+ allHtmls.push(result.value.html);
1682
+ successPages.push(result.value.url);
1683
+ }
1684
+ }
1685
+ if (usedWayback) {
1686
+ const waybackContactPaths = ["/contact", "/over-ons", "/about", "/team"];
1687
+ const wbPromises = waybackContactPaths.map(async (path) => {
1688
+ const wbHtml = await fetchWaybackHtml(`https://${domain}${path}`, 12e3);
1689
+ return { path, html: wbHtml };
1690
+ });
1691
+ const wbResults = await Promise.allSettled(wbPromises);
1692
+ for (const wr of wbResults) {
1693
+ if (wr.status === "fulfilled" && wr.value.html) {
1694
+ allHtmls.push(wr.value.html);
1695
+ successPages.push(`(wayback) ${domain}${wr.value.path}`);
1696
+ }
1697
+ }
1698
+ }
1699
+ const allEmails = /* @__PURE__ */ new Set();
1700
+ const allPhones = /* @__PURE__ */ new Set();
1701
+ const allLinkedIn = /* @__PURE__ */ new Set();
1702
+ for (const pageHtml of allHtmls) {
1703
+ for (const e of extractEmailsFromHtml(pageHtml)) allEmails.add(e);
1704
+ for (const p of extractPhonesFromHtml(pageHtml)) allPhones.add(p);
1705
+ for (const l of extractLinkedInFromHtml(pageHtml)) allLinkedIn.add(l);
1706
+ }
1707
+ let searchNote = "";
1708
+ if (includeSearch) {
1709
+ try {
1710
+ const queries = companyName ? [`"${companyName}" "${domain}" email`, `"@${domain}" email`] : [`"${domain}" email contact`, `"@${domain}" email`, `site:${domain} "@"`];
1711
+ let totalSearchResults = 0;
1712
+ for (const searchQuery of queries) {
1713
+ const searchResults = await webSearch(searchQuery, 8);
1714
+ totalSearchResults += searchResults.length;
1715
+ for (const r of searchResults) {
1716
+ const combined = `${r.title} ${r.snippet}`;
1717
+ const decoded = decodeHtmlEntities(decodeUnicodeEscapes(combined));
1718
+ const snippetEmails = decoded.match(EMAIL_REGEX) || [];
1719
+ for (const e of snippetEmails) {
1720
+ const lower = e.toLowerCase();
1721
+ const domainBase = domain.split(".")[0] ?? domain;
1722
+ if (lower.includes(domainBase) && !NOISE_EMAIL_PATTERNS.some((p) => p.test(lower))) {
1723
+ allEmails.add(lower);
1724
+ }
1725
+ }
1726
+ const snippetPhones = decoded.match(PHONE_NL_REGEX) || [];
1727
+ for (const p of snippetPhones) {
1728
+ const clean = p.replace(/[\s\-().]/g, "");
1729
+ if (clean.length >= 10 && clean.length <= 14) allPhones.add(p.trim());
1730
+ }
1731
+ const snippetLi = decoded.match(LINKEDIN_REGEX) || [];
1732
+ for (const l of snippetLi) allLinkedIn.add(l.replace(/\/$/, ""));
1733
+ }
1734
+ if (allEmails.size > 0) break;
1735
+ }
1736
+ searchNote = ` + ${totalSearchResults} search results`;
1737
+ } catch {
1738
+ searchNote = " (search failed)";
1739
+ }
1740
+ }
1741
+ const foundEmails = [...allEmails];
1742
+ const generalPatterns = /^(info|contact|hello|admin|office|receptie|secretariaat|verkoop|administratie|support|service|boekingen|reserveringen)@/i;
1743
+ const generalEmails = foundEmails.filter((e) => generalPatterns.test(e));
1744
+ const personalEmails = foundEmails.filter((e) => !generalPatterns.test(e));
1745
+ const guessed = guessCommonEmails(domain);
1746
+ const newGuesses = guessed.filter((g) => !allEmails.has(g));
1747
+ const lines = [
1748
+ `=== CONTACT SCAN: ${domain} ===`,
1749
+ `Pages scanned: ${successPages.length}${searchNote}`,
1750
+ ""
1751
+ ];
1752
+ if (foundEmails.length > 0) {
1753
+ lines.push(`EMAILS FOUND (${foundEmails.length}):`);
1754
+ if (generalEmails.length > 0) {
1755
+ lines.push(" General:");
1756
+ for (const e of generalEmails) lines.push(` * ${e}`);
1757
+ }
1758
+ if (personalEmails.length > 0) {
1759
+ lines.push(" Personal/Department:");
1760
+ for (const e of personalEmails) lines.push(` * ${e}`);
1761
+ }
1762
+ } else {
1763
+ lines.push("EMAILS FOUND: None on website");
1764
+ lines.push("SUGGESTED EMAILS (common patterns, verify before use):");
1765
+ for (const e of newGuesses) lines.push(` ? ${e}`);
1766
+ }
1767
+ lines.push("");
1768
+ if ([...allPhones].length > 0) {
1769
+ lines.push(`PHONE NUMBERS (${allPhones.size}):`);
1770
+ for (const p of allPhones) lines.push(` ${p}`);
1771
+ } else {
1772
+ lines.push("PHONE NUMBERS: None found");
1773
+ }
1774
+ lines.push("");
1775
+ if ([...allLinkedIn].length > 0) {
1776
+ lines.push(`LINKEDIN (${allLinkedIn.size}):`);
1777
+ for (const l of allLinkedIn) lines.push(` ${l}`);
1778
+ }
1779
+ lines.push("", "PAGES CHECKED:");
1780
+ for (const p of successPages) lines.push(` [OK] ${p}`);
1781
+ const failedPages = contactPages.filter((p) => !successPages.includes(p));
1782
+ for (const p of failedPages) lines.push(` [--] ${p}`);
1783
+ return { content: [{ type: "text", text: lines.join("\n") }] };
1784
+ }
1785
+ // -----------------------------------------------------------------
1369
1786
  default:
1370
1787
  return { content: [{ type: "text", text: `Unknown agent tool: ${name}` }] };
1371
1788
  }