@mgsoftwarebv/mg-dashboard-mcp 2.5.1 → 2.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -723,7 +723,7 @@ var AGENT_TOOLS = [
723
723
  },
724
724
  {
725
725
  name: "web-fetch",
726
- description: "Fetch a web page and return its text content (HTML tags stripped). Use this to read company websites, extract contact info, about pages, etc. Returns at most 15000 characters of cleaned text.",
726
+ description: "Fetch a web page and return its text content (HTML tags stripped). Automatically extracts emails and phone numbers found on the page. Returns at most 15000 characters of cleaned text.",
727
727
  inputSchema: {
728
728
  type: "object",
729
729
  properties: {
@@ -732,6 +732,28 @@ var AGENT_TOOLS = [
732
732
  },
733
733
  required: ["url"]
734
734
  }
735
+ },
736
+ {
737
+ name: "web-find-contacts",
738
+ description: "POWERFUL contact finder. Crawls a company website (homepage + contact/about/team pages), extracts ALL emails, phone numbers, and LinkedIn URLs via regex + mailto/tel parsing. Also searches Google for the company email as fallback. Returns structured contact data. USE THIS for every lead instead of manually browsing contact pages.",
739
+ inputSchema: {
740
+ type: "object",
741
+ properties: {
742
+ url: {
743
+ type: "string",
744
+ description: 'Company website URL or domain (e.g. "https://example.nl" or "example.nl")'
745
+ },
746
+ company_name: {
747
+ type: "string",
748
+ description: "Company name (improves Google search accuracy)"
749
+ },
750
+ include_search: {
751
+ type: "boolean",
752
+ description: "Also search Google for emails (default: true)"
753
+ }
754
+ },
755
+ required: ["url"]
756
+ }
735
757
  }
736
758
  ];
737
759
  var AGENT_TOOL_NAMES = new Set(AGENT_TOOLS.map((t) => t.name));
@@ -747,7 +769,8 @@ var AGENT_TOOL_MODULE_MAP = {
747
769
  "agent-save-email-draft": "agent_reporting",
748
770
  "agent-complete-target": "agent_reporting",
749
771
  "web-search": "agent_reporting",
750
- "web-fetch": "agent_reporting"
772
+ "web-fetch": "agent_reporting",
773
+ "web-find-contacts": "agent_reporting"
751
774
  };
752
775
  function clamp(val, min, max) {
753
776
  return Math.max(min, Math.min(max, val));
@@ -788,6 +811,192 @@ async function webSearch(query, maxResults) {
788
811
  }
789
812
  return results;
790
813
  }
814
+ var EMAIL_REGEX = /[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}/g;
815
+ var PHONE_NL_REGEX = /(?:\+31|0)[\s\-.]?\(?\d{1,3}\)?[\s\-.]?\d{3,4}[\s\-.]?\d{2,4}/g;
816
+ var LINKEDIN_REGEX = /https?:\/\/(?:www\.)?linkedin\.com\/(?:in|company)\/[a-zA-Z0-9\-_%]+\/?/gi;
817
+ var NOISE_EMAIL_PATTERNS = [
818
+ /noreply@/i,
819
+ /no-reply@/i,
820
+ /mailer-daemon@/i,
821
+ /postmaster@/i,
822
+ /@example\./i,
823
+ /test@/i,
824
+ /@wix\.com$/i,
825
+ /@sentry\.io$/i,
826
+ /@wordpress\./i,
827
+ /@gravatar\.com$/i,
828
+ /@schema\.org$/i,
829
+ /@w3\.org$/i,
830
+ /@facebook\.com$/i,
831
+ /@google\.com$/i,
832
+ /@twitter\.com$/i,
833
+ /@github\.com$/i,
834
+ /@cloudflare\./i,
835
+ /@vercel\./i,
836
+ /@netlify\./i,
837
+ /@cookiebot\./i,
838
+ /@hotjar\./i,
839
+ /@hubspot\./i,
840
+ /@mailchimp\./i,
841
+ /@googleusercontent/i,
842
+ /@gstatic/i,
843
+ /@youtube/i,
844
+ /@recaptcha/i,
845
+ /@privacy/i,
846
+ /@cookie/i,
847
+ /@gdpr/i,
848
+ /@dynamicweb\./i,
849
+ /@placeholder\./i,
850
+ /@yourcompany\./i,
851
+ /@company\./i,
852
+ /smith@/i,
853
+ /doe@/i,
854
+ /demo@/i,
855
+ /sample@/i
856
+ ];
857
+ var CONTACT_PATH_KEYWORDS = [
858
+ "contact",
859
+ "about",
860
+ "over-ons",
861
+ "over",
862
+ "team",
863
+ "medewerker",
864
+ "impressum",
865
+ "imprint",
866
+ "wie-zijn",
867
+ "ons-team",
868
+ "werknemers",
869
+ "organisatie",
870
+ "zakelijk",
871
+ "bedrijfsinfo",
872
+ "neem-contact",
873
+ "footer"
874
+ ];
875
+ var CONTACT_PATHS_TO_TRY = [
876
+ "/contact",
877
+ "/over-ons",
878
+ "/about",
879
+ "/about-us",
880
+ "/team",
881
+ "/contact-us",
882
+ "/over",
883
+ "/wie-zijn-wij",
884
+ "/ons-team",
885
+ "/zakelijk/over-ons",
886
+ "/medewerkers",
887
+ "/organisatie",
888
+ "/bedrijfsinformatie",
889
+ "/impressum"
890
+ ];
891
+ function decodeUnicodeEscapes(text) {
892
+ return text.replace(
893
+ /\\u([0-9a-fA-F]{4})/g,
894
+ (_, hex) => String.fromCharCode(parseInt(hex, 16))
895
+ );
896
+ }
897
+ function decodeHtmlEntities(text) {
898
+ return text.replace(/&#(\d+);/g, (_, num) => String.fromCharCode(parseInt(num))).replace(
899
+ /&#x([0-9a-fA-F]+);/g,
900
+ (_, hex) => String.fromCharCode(parseInt(hex, 16))
901
+ );
902
+ }
903
+ function extractEmailsFromHtml(html) {
904
+ const emails = /* @__PURE__ */ new Set();
905
+ const decoded = decodeHtmlEntities(decodeUnicodeEscapes(html));
906
+ const mailtoRegex = /mailto:([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})/gi;
907
+ let m;
908
+ for (const src of [html, decoded]) {
909
+ while ((m = mailtoRegex.exec(src)) !== null) emails.add(m[1].toLowerCase());
910
+ }
911
+ const noStyles = decoded.replace(/<style[\s\S]*?<\/style>/gi, "");
912
+ const textEmails = noStyles.match(EMAIL_REGEX) || [];
913
+ for (const e of textEmails) {
914
+ const lower = e.toLowerCase();
915
+ if (!/\.(png|jpg|jpeg|gif|svg|webp|css|js|woff|ttf|eot|ico)$/i.test(lower)) {
916
+ emails.add(lower);
917
+ }
918
+ }
919
+ return [...emails].filter((e) => !NOISE_EMAIL_PATTERNS.some((p) => p.test(e)));
920
+ }
921
+ function extractPhonesFromHtml(html) {
922
+ const phones = /* @__PURE__ */ new Set();
923
+ const decoded = decodeHtmlEntities(decodeUnicodeEscapes(html));
924
+ const telRegex = /href="tel:([^"]+)"/gi;
925
+ let m;
926
+ for (const src of [html, decoded]) {
927
+ while ((m = telRegex.exec(src)) !== null) {
928
+ const clean = m[1].replace(/[\s\-().]/g, "");
929
+ if (clean.length >= 10 && clean.length <= 14) phones.add(m[1].trim());
930
+ }
931
+ }
932
+ const stripped = decoded.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]+>/g, " ");
933
+ const textPhones = stripped.match(PHONE_NL_REGEX) || [];
934
+ for (const p of textPhones) {
935
+ const clean = p.replace(/[\s\-().]/g, "");
936
+ if (clean.length >= 10 && clean.length <= 14) phones.add(p.trim());
937
+ }
938
+ return [...phones].slice(0, 10);
939
+ }
940
+ function extractLinkedInFromHtml(html) {
941
+ const links = /* @__PURE__ */ new Set();
942
+ const matches = html.match(LINKEDIN_REGEX) || [];
943
+ for (const l of matches) links.add(l.replace(/\/$/, ""));
944
+ return [...links];
945
+ }
946
+ function discoverContactPages(html, baseUrl) {
947
+ let base;
948
+ try {
949
+ base = new URL(baseUrl);
950
+ } catch {
951
+ return [];
952
+ }
953
+ const pages = /* @__PURE__ */ new Set();
954
+ for (const path of CONTACT_PATHS_TO_TRY) {
955
+ pages.add(`${base.origin}${path}`);
956
+ }
957
+ const linkRegex = /<a\s[^>]*href="([^"#]*)"[^>]*>/gi;
958
+ let m;
959
+ while ((m = linkRegex.exec(html)) !== null) {
960
+ try {
961
+ const url = new URL(m[1], baseUrl);
962
+ if (url.hostname !== base.hostname) continue;
963
+ const path = url.pathname.toLowerCase();
964
+ if (CONTACT_PATH_KEYWORDS.some((kw) => path.includes(kw))) {
965
+ pages.add(url.origin + url.pathname);
966
+ }
967
+ } catch {
968
+ }
969
+ }
970
+ pages.delete(base.origin + base.pathname);
971
+ return [...pages].slice(0, 10);
972
+ }
973
+ function guessCommonEmails(domain) {
974
+ const d = domain.replace(/^www\./, "");
975
+ return [`info@${d}`, `contact@${d}`, `hello@${d}`, `administratie@${d}`, `verkoop@${d}`];
976
+ }
977
+ async function fetchRawHtml(url, timeoutMs = 1e4) {
978
+ const controller = new AbortController();
979
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
980
+ try {
981
+ const res = await fetch(url, {
982
+ headers: {
983
+ "User-Agent": WEB_USER_AGENT,
984
+ "Accept": "text/html,application/xhtml+xml",
985
+ "Accept-Language": "nl,en;q=0.9"
986
+ },
987
+ redirect: "follow",
988
+ signal: controller.signal
989
+ });
990
+ if (!res.ok) return null;
991
+ const ct = res.headers.get("content-type") || "";
992
+ if (!ct.includes("text/html") && !ct.includes("text/plain") && !ct.includes("xhtml")) return null;
993
+ return await res.text();
994
+ } catch {
995
+ return null;
996
+ } finally {
997
+ clearTimeout(timer);
998
+ }
999
+ }
791
1000
  async function webFetch(url, extractLinks) {
792
1001
  const controller = new AbortController();
793
1002
  const timeout = setTimeout(() => controller.abort(), 15e3);
@@ -824,7 +1033,7 @@ async function webFetch(url, extractLinks) {
824
1033
  if (text.length > MAX_TEXT) {
825
1034
  text = text.slice(0, MAX_TEXT) + "\n\n[... truncated at 15000 chars ...]";
826
1035
  }
827
- return { text, links };
1036
+ return { text, rawHtml: html, links };
828
1037
  } finally {
829
1038
  clearTimeout(timeout);
830
1039
  }
@@ -1308,15 +1517,14 @@ ${output}` }]
1308
1517
  const resultsCount = clamp(Number(args2.results_count) || 0, 0, 9999);
1309
1518
  if (!targetId) throw new Error("target_id is required");
1310
1519
  const { error } = await supabase2.from("lead_generation_target").update({
1311
- status: "completed",
1312
1520
  results_count: resultsCount,
1313
- completed_at: (/* @__PURE__ */ new Date()).toISOString()
1521
+ assigned_workspace_id: null
1314
1522
  }).eq("id", targetId);
1315
- if (error) throw new Error(`Failed to complete target: ${error.message}`);
1523
+ if (error) throw new Error(`Failed to update target: ${error.message}`);
1316
1524
  return {
1317
1525
  content: [{
1318
1526
  type: "text",
1319
- text: `Target ${targetId} marked completed. ${resultsCount} leads found.`
1527
+ text: `Target ${targetId} updated: ${resultsCount} leads found. Workspace released.`
1320
1528
  }]
1321
1529
  };
1322
1530
  }
@@ -1362,10 +1570,150 @@ ${result.text}`;
1362
1570
 
1363
1571
  --- Links (${result.links.length} found, showing max 50) ---
1364
1572
  ${linkList}`;
1573
+ }
1574
+ const pageEmails = extractEmailsFromHtml(result.rawHtml);
1575
+ const pagePhones = extractPhonesFromHtml(result.rawHtml);
1576
+ const pageLinkedIn = extractLinkedInFromHtml(result.rawHtml);
1577
+ if (pageEmails.length > 0 || pagePhones.length > 0 || pageLinkedIn.length > 0) {
1578
+ text += "\n\n--- Auto-extracted Contact Info ---";
1579
+ if (pageEmails.length > 0) text += `
1580
+ Emails: ${pageEmails.join(", ")}`;
1581
+ if (pagePhones.length > 0) text += `
1582
+ Phones: ${pagePhones.join(", ")}`;
1583
+ if (pageLinkedIn.length > 0) text += `
1584
+ LinkedIn: ${pageLinkedIn.join(", ")}`;
1365
1585
  }
1366
1586
  return { content: [{ type: "text", text }] };
1367
1587
  }
1368
1588
  // -----------------------------------------------------------------
1589
+ // Web Find Contacts — SOTA multi-page crawler + email extraction
1590
+ // -----------------------------------------------------------------
1591
+ case "web-find-contacts": {
1592
+ const inputUrl = sanitizeString(args2.url, 2e3);
1593
+ if (!inputUrl) throw new Error("url is required");
1594
+ const companyName = sanitizeString(args2.company_name, 500);
1595
+ const includeSearch = args2.include_search !== false;
1596
+ const fullUrl = inputUrl.startsWith("http") ? inputUrl : `https://${inputUrl}`;
1597
+ let baseUrl;
1598
+ try {
1599
+ baseUrl = new URL(fullUrl);
1600
+ } catch {
1601
+ throw new Error(`Invalid URL: ${inputUrl}`);
1602
+ }
1603
+ const domain = baseUrl.hostname.replace(/^www\./, "");
1604
+ const urlsToTry = [fullUrl];
1605
+ if (!fullUrl.includes("www.")) urlsToTry.push(fullUrl.replace("://", "://www."));
1606
+ if (fullUrl.startsWith("https")) {
1607
+ urlsToTry.push(fullUrl.replace("https", "http"));
1608
+ if (!fullUrl.includes("www.")) urlsToTry.push(fullUrl.replace("https://", "http://www."));
1609
+ }
1610
+ let html = null;
1611
+ for (const tryUrl of urlsToTry) {
1612
+ html = await fetchRawHtml(tryUrl, 12e3);
1613
+ if (html) break;
1614
+ }
1615
+ if (!html) throw new Error(`Could not fetch ${fullUrl} (site may be down or blocking)`);
1616
+ const contactPages = discoverContactPages(html, fullUrl);
1617
+ const pagePromises = contactPages.map(async (pageUrl) => {
1618
+ const pageHtml = await fetchRawHtml(pageUrl, 8e3);
1619
+ return { url: pageUrl, html: pageHtml };
1620
+ });
1621
+ const pageResults = await Promise.allSettled(pagePromises);
1622
+ const successPages = [fullUrl];
1623
+ const allHtmls = [html];
1624
+ for (const result of pageResults) {
1625
+ if (result.status === "fulfilled" && result.value.html) {
1626
+ allHtmls.push(result.value.html);
1627
+ successPages.push(result.value.url);
1628
+ }
1629
+ }
1630
+ const allEmails = /* @__PURE__ */ new Set();
1631
+ const allPhones = /* @__PURE__ */ new Set();
1632
+ const allLinkedIn = /* @__PURE__ */ new Set();
1633
+ for (const pageHtml of allHtmls) {
1634
+ for (const e of extractEmailsFromHtml(pageHtml)) allEmails.add(e);
1635
+ for (const p of extractPhonesFromHtml(pageHtml)) allPhones.add(p);
1636
+ for (const l of extractLinkedInFromHtml(pageHtml)) allLinkedIn.add(l);
1637
+ }
1638
+ let searchNote = "";
1639
+ if (includeSearch) {
1640
+ try {
1641
+ const queries = companyName ? [`"${companyName}" "${domain}" email`, `"@${domain}" email`] : [`"${domain}" email contact`, `"@${domain}" email`, `site:${domain} "@"`];
1642
+ let totalSearchResults = 0;
1643
+ for (const searchQuery of queries) {
1644
+ const searchResults = await webSearch(searchQuery, 8);
1645
+ totalSearchResults += searchResults.length;
1646
+ for (const r of searchResults) {
1647
+ const combined = `${r.title} ${r.snippet}`;
1648
+ const decoded = decodeHtmlEntities(decodeUnicodeEscapes(combined));
1649
+ const snippetEmails = decoded.match(EMAIL_REGEX) || [];
1650
+ for (const e of snippetEmails) {
1651
+ const lower = e.toLowerCase();
1652
+ const domainBase = domain.split(".")[0] ?? domain;
1653
+ if (lower.includes(domainBase) && !NOISE_EMAIL_PATTERNS.some((p) => p.test(lower))) {
1654
+ allEmails.add(lower);
1655
+ }
1656
+ }
1657
+ const snippetPhones = decoded.match(PHONE_NL_REGEX) || [];
1658
+ for (const p of snippetPhones) {
1659
+ const clean = p.replace(/[\s\-().]/g, "");
1660
+ if (clean.length >= 10 && clean.length <= 14) allPhones.add(p.trim());
1661
+ }
1662
+ const snippetLi = decoded.match(LINKEDIN_REGEX) || [];
1663
+ for (const l of snippetLi) allLinkedIn.add(l.replace(/\/$/, ""));
1664
+ }
1665
+ if (allEmails.size > 0) break;
1666
+ }
1667
+ searchNote = ` + ${totalSearchResults} search results`;
1668
+ } catch {
1669
+ searchNote = " (search failed)";
1670
+ }
1671
+ }
1672
+ const foundEmails = [...allEmails];
1673
+ const generalPatterns = /^(info|contact|hello|admin|office|receptie|secretariaat|verkoop|administratie|support|service|boekingen|reserveringen)@/i;
1674
+ const generalEmails = foundEmails.filter((e) => generalPatterns.test(e));
1675
+ const personalEmails = foundEmails.filter((e) => !generalPatterns.test(e));
1676
+ const guessed = guessCommonEmails(domain);
1677
+ const newGuesses = guessed.filter((g) => !allEmails.has(g));
1678
+ const lines = [
1679
+ `=== CONTACT SCAN: ${domain} ===`,
1680
+ `Pages scanned: ${successPages.length}${searchNote}`,
1681
+ ""
1682
+ ];
1683
+ if (foundEmails.length > 0) {
1684
+ lines.push(`EMAILS FOUND (${foundEmails.length}):`);
1685
+ if (generalEmails.length > 0) {
1686
+ lines.push(" General:");
1687
+ for (const e of generalEmails) lines.push(` * ${e}`);
1688
+ }
1689
+ if (personalEmails.length > 0) {
1690
+ lines.push(" Personal/Department:");
1691
+ for (const e of personalEmails) lines.push(` * ${e}`);
1692
+ }
1693
+ } else {
1694
+ lines.push("EMAILS FOUND: None on website");
1695
+ lines.push("SUGGESTED EMAILS (common patterns, verify before use):");
1696
+ for (const e of newGuesses) lines.push(` ? ${e}`);
1697
+ }
1698
+ lines.push("");
1699
+ if ([...allPhones].length > 0) {
1700
+ lines.push(`PHONE NUMBERS (${allPhones.size}):`);
1701
+ for (const p of allPhones) lines.push(` ${p}`);
1702
+ } else {
1703
+ lines.push("PHONE NUMBERS: None found");
1704
+ }
1705
+ lines.push("");
1706
+ if ([...allLinkedIn].length > 0) {
1707
+ lines.push(`LINKEDIN (${allLinkedIn.size}):`);
1708
+ for (const l of allLinkedIn) lines.push(` ${l}`);
1709
+ }
1710
+ lines.push("", "PAGES CHECKED:");
1711
+ for (const p of successPages) lines.push(` [OK] ${p}`);
1712
+ const failedPages = contactPages.filter((p) => !successPages.includes(p));
1713
+ for (const p of failedPages) lines.push(` [--] ${p}`);
1714
+ return { content: [{ type: "text", text: lines.join("\n") }] };
1715
+ }
1716
+ // -----------------------------------------------------------------
1369
1717
  default:
1370
1718
  return { content: [{ type: "text", text: `Unknown agent tool: ${name}` }] };
1371
1719
  }