npm - 50c - Versions diffs - 2.7.0 → 2.8.0 - Mend

50c 2.7.0 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/lib/index.js CHANGED Viewed

@@ -23,13 +23,15 @@ const cf = require('./packs/cf');
 const wp = require('./packs/wp');
 const ux = require('./packs/ux');
 const promptEngine = require('./packs/prompt_engine');
+const grabr = require('./packs/grabr');
 // Tool name mappings by pack
 const TOOL_PACKS = {
   beacon: ['hints', 'hints_plus', 'roast', 'quick_vibe', 'one_liner', 'name_it', 'price_it', 'compute', 'ide_conversation', 'learning_stats'],
   labs: ['genius', 'mind_opener', 'idea_fold', 'agent_autopsy', 'prompt_fortress', 'context_health', 'context_compress', 'context_extract', 'context_reposition'],
   labs_plus: ['bcalc', 'genius_plus', 'bcalc_why', 'discovery_collision', 'cvi_loop', 'cvi_verify', 'chaos_fingerprint', 'resonance', 'prime_residue', 'echo_sequence', 'conversation_diagnostic', 'handoff'],
-  prompt_engine: ['prompt_extract', 'prompt_phases', 'prompt_refine', 'prompt_expand', 'prompt_categorize']
+  prompt_engine: ['prompt_extract', 'prompt_phases', 'prompt_refine', 'prompt_expand', 'prompt_categorize'],
+  grabr: ['grabr_scrape', 'grabr_contact', 'grabr_wayback', 'grabr_sitemap', 'grabr_batch', 'grabr_intel']
 };
 // Get all available tools based on enabled packs
@@ -56,6 +58,7 @@ async function getTools() {
   if (config.packs.wp) tools.push(...wp.WP_TOOLS);
   if (config.packs.ux) tools.push(...ux.UX_TOOLS);
   if (config.packs.prompt_engine) tools.push(...promptEngine.PROMPT_ENGINE_TOOLS);
+  if (config.packs.grabr) tools.push(...grabr.GRABR_TOOLS);
   // ENTERPRISE tier
   if (config.packs.labs_plus) tools.push(...labsPlus.LABS_PLUS_TOOLS);
@@ -112,6 +115,14 @@ async function handleTool(name, args = {}) {
     return promptEngine.handleTool(name, args);
   }
+  // Grabr tools (PRO)
+  if (TOOL_PACKS.grabr.includes(name) || name.startsWith('grabr_')) {
+    if (!config.packs.grabr) {
+      return { error: 'Requires Pro tier ($99/mo). Enable grabr pack or upgrade at sales.50c.ai/50c-pro/' };
+    }
+    return grabr.handleTool(name, args);
+  }
   // Labs+ tools (ENTERPRISE)
   if (TOOL_PACKS.labs_plus.includes(name)) {
     if (!config.packs.labs_plus) {
@@ -183,5 +194,6 @@ module.exports = {
   beacon,
   labs,
   labsPlus,
-  promptEngine
+  promptEngine,
+  grabr
 };

package/lib/packs/grabr.js ADDED Viewed

@@ -0,0 +1,443 @@
+/**
+ * 50c Grabr Pack - PRO Tier
+ * Web scraping + contact extraction + Wayback Machine
+ * Designed for: Lead gen, OSINT, competitive intel, domain recovery
+ */
+const { apiRequest } = require('../config');
+// URL validation regex
+const URL_REGEX = /^https?:\/\/[^\s/$.?#].[^\s]*$/i;
+const DOMAIN_REGEX = /^[a-zA-Z0-9][-a-zA-Z0-9]*(\.[a-zA-Z0-9][-a-zA-Z0-9]*)+$/;
+// Contact extraction patterns (from Grabr)
+const EMAIL_REGEX = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g;
+const PHONE_PATTERNS = [
+  /\+1[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g,
+  /\b1[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g,
+  /\b\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g,
+];
+const SOCIAL_PATTERNS = {
+  twitter: /https?:\/\/(www\.)?(twitter\.com|x\.com)\/[a-zA-Z0-9_]+/gi,
+  facebook: /https?:\/\/(www\.)?facebook\.com\/[a-zA-Z0-9.]+/gi,
+  linkedin: /https?:\/\/(www\.)?linkedin\.com\/(in|company)\/[a-zA-Z0-9-]+/gi,
+  instagram: /https?:\/\/(www\.)?instagram\.com\/[a-zA-Z0-9_.]+/gi,
+  youtube: /https?:\/\/(www\.)?youtube\.com\/(c|channel|user)\/[a-zA-Z0-9_-]+/gi,
+};
+// Invalid email patterns to filter
+const INVALID_EMAIL_PATTERNS = [
+  /\.(png|jpg|jpeg|gif|svg|webp)$/i,
+  /example\.com/i,
+  /test\.com/i,
+  /sentry\.io/i,
+];
+function isValidEmail(email) {
+  const lower = email.toLowerCase();
+  for (const pattern of INVALID_EMAIL_PATTERNS) {
+    if (pattern.test(lower)) return false;
+  }
+  const parts = email.split('@');
+  if (parts.length !== 2) return false;
+  const [local, domain] = parts;
+  if (!local || !domain || !domain.includes('.')) return false;
+  return true;
+}
+function extractEmails(text) {
+  const matches = text.match(EMAIL_REGEX) || [];
+  return [...new Set(matches.filter(isValidEmail).map(e => e.toLowerCase()))];
+}
+function extractPhones(text) {
+  const phones = new Set();
+  for (const pattern of PHONE_PATTERNS) {
+    const matches = text.matchAll(pattern);
+    for (const match of matches) {
+      let digits = match[0].replace(/\D/g, '');
+      if (digits.length === 11 && digits[0] === '1') digits = digits.slice(1);
+      if (digits.length === 10) {
+        // Validate area code
+        if (digits[0] === '0' || digits[0] === '1') continue;
+        if (digits[3] === '0' || digits[3] === '1') continue;
+        if (/^(\d)\1+$/.test(digits)) continue; // All same digits
+        const formatted = `+1 (${digits.slice(0,3)}) ${digits.slice(3,6)}-${digits.slice(6)}`;
+        phones.add(formatted);
+      }
+    }
+  }
+  return [...phones];
+}
+function extractSocialLinks(text) {
+  const social = {};
+  for (const [platform, pattern] of Object.entries(SOCIAL_PATTERNS)) {
+    const matches = text.match(pattern);
+    if (matches && matches.length > 0) {
+      // Filter out share/intent links
+      const valid = matches.filter(url =>
+        !url.includes('/share') &&
+        !url.includes('/intent') &&
+        !url.includes('/sharer')
+      );
+      if (valid.length > 0) social[platform] = valid[0];
+    }
+  }
+  return social;
+}
+function extractAddress(text) {
+  const addressRegex = /(\d+\s+[A-Za-z\s]+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Lane|Ln|Drive|Dr|Court|Ct|Way|Place|Pl)\.?),?\s*([A-Za-z\s]+),?\s*([A-Z]{2})\s*(\d{5})/gi;
+  const match = addressRegex.exec(text);
+  if (match) {
+    return {
+      street: match[1].trim(),
+      city: match[2].trim(),
+      state: match[3],
+      zip: match[4]
+    };
+  }
+  return null;
+}
+// Tool implementations
+async function grabrScrape(url, depth = 1) {
+  if (!URL_REGEX.test(url)) {
+    return { error: 'Invalid URL format' };
+  }
+  try {
+    // Use 50c page_fetch via API
+    const result = await apiRequest('page_fetch', { url });
+    if (result.error) return { error: result.error };
+    const html = result.content || result.text || '';
+    const contacts = {
+      emails: extractEmails(html),
+      phones: extractPhones(html),
+      address: extractAddress(html),
+      social: extractSocialLinks(html)
+    };
+    // Deep scrape - follow contact/about pages
+    if (depth >= 2 && contacts.emails.length === 0) {
+      const contactLinks = html.match(/href=["']([^"']*(?:contact|about)[^"']*)["']/gi) || [];
+      for (const linkMatch of contactLinks.slice(0, 2)) {
+        const href = linkMatch.match(/href=["']([^"']+)["']/)?.[1];
+        if (href) {
+          try {
+            const fullUrl = href.startsWith('http') ? href : new URL(href, url).href;
+            const subResult = await apiRequest('page_fetch', { url: fullUrl });
+            if (subResult.content) {
+              contacts.emails.push(...extractEmails(subResult.content));
+              contacts.phones.push(...extractPhones(subResult.content));
+              if (!contacts.address) contacts.address = extractAddress(subResult.content);
+              Object.assign(contacts.social, extractSocialLinks(subResult.content));
+            }
+          } catch (e) { /* skip broken links */ }
+        }
+      }
+      // Dedupe
+      contacts.emails = [...new Set(contacts.emails)];
+      contacts.phones = [...new Set(contacts.phones)];
+    }
+    return {
+      success: true,
+      url,
+      depth,
+      contacts,
+      meta: {
+        title: (html.match(/<title>([^<]+)<\/title>/i) || [])[1] || null,
+        description: (html.match(/<meta[^>]*name=["']description["'][^>]*content=["']([^"']+)["']/i) || [])[1] || null
+      }
+    };
+  } catch (e) {
+    return { error: e.message || 'Scrape failed' };
+  }
+}
+async function grabrContact(content) {
+  if (!content || typeof content !== 'string') {
+    return { error: 'Content required' };
+  }
+  return {
+    success: true,
+    emails: extractEmails(content),
+    phones: extractPhones(content),
+    address: extractAddress(content),
+    social: extractSocialLinks(content)
+  };
+}
+async function grabrWayback(url, years = 5) {
+  if (!URL_REGEX.test(url)) {
+    return { error: 'Invalid URL format' };
+  }
+  try {
+    const snapshots = [];
+    const currentYear = new Date().getFullYear();
+    const startYear = currentYear - years;
+    const months = [1, 6]; // Check Jan and June
+    for (let year = currentYear; year >= startYear && snapshots.length < 10; year--) {
+      for (const month of months) {
+        if (snapshots.length >= 10) break;
+        const timestamp = `${year}${String(month).padStart(2, '0')}01`;
+        const checkUrl = `https://archive.org/wayback/available?url=${encodeURIComponent(url)}&timestamp=${timestamp}`;
+        try {
+          const resp = await fetch(checkUrl, {
+            headers: { 'User-Agent': '50c-grabr/1.0' },
+            signal: AbortSignal.timeout(10000)
+          });
+          if (resp.ok) {
+            const data = await resp.json();
+            if (data.archived_snapshots?.closest?.available) {
+              const snap = data.archived_snapshots.closest;
+              if (!snapshots.find(s => s.timestamp === snap.timestamp)) {
+                snapshots.push({
+                  timestamp: snap.timestamp,
+                  url: snap.url,
+                  date: `${snap.timestamp.slice(0,4)}-${snap.timestamp.slice(4,6)}-${snap.timestamp.slice(6,8)}`
+                });
+              }
+            }
+          }
+        } catch (e) { /* skip failed checks */ }
+        // Rate limit
+        await new Promise(r => setTimeout(r, 500));
+      }
+    }
+    return {
+      success: true,
+      url,
+      snapshots,
+      oldest: snapshots[snapshots.length - 1]?.date || null,
+      newest: snapshots[0]?.date || null
+    };
+  } catch (e) {
+    return { error: e.message || 'Wayback lookup failed' };
+  }
+}
+async function grabrSitemap(url) {
+  // Normalize to sitemap URL
+  let sitemapUrl = url;
+  if (!url.includes('sitemap')) {
+    const base = url.replace(/\/$/, '');
+    sitemapUrl = `${base}/sitemap.xml`;
+  }
+  try {
+    const result = await apiRequest('page_fetch', { url: sitemapUrl });
+    if (result.error) {
+      // Try robots.txt fallback
+      const robotsUrl = url.replace(/\/$/, '') + '/robots.txt';
+      const robotsResult = await apiRequest('page_fetch', { url: robotsUrl });
+      if (robotsResult.content) {
+        const sitemapMatch = robotsResult.content.match(/Sitemap:\s*(\S+)/i);
+        if (sitemapMatch) {
+          const altResult = await apiRequest('page_fetch', { url: sitemapMatch[1] });
+          if (altResult.content) {
+            result.content = altResult.content;
+          }
+        }
+      }
+    }
+    if (!result.content) {
+      return { error: 'Sitemap not found' };
+    }
+    // Parse sitemap XML
+    const urls = [];
+    const locMatches = result.content.matchAll(/<loc>([^<]+)<\/loc>/gi);
+    for (const match of locMatches) {
+      urls.push(match[1]);
+    }
+    return {
+      success: true,
+      sitemapUrl,
+      urls: urls.slice(0, 500), // Cap at 500
+      total: urls.length
+    };
+  } catch (e) {
+    return { error: e.message || 'Sitemap parse failed' };
+  }
+}
+async function grabrBatch(urls, delayMs = 1000) {
+  if (!Array.isArray(urls) || urls.length === 0) {
+    return { error: 'URLs array required' };
+  }
+  if (urls.length > 10) {
+    return { error: 'Maximum 10 URLs per batch' };
+  }
+  const results = [];
+  for (const url of urls) {
+    const result = await grabrScrape(url, 1);
+    results.push({ url, ...result });
+    // Rate limit between requests
+    if (delayMs > 0) {
+      await new Promise(r => setTimeout(r, Math.max(delayMs, 500)));
+    }
+  }
+  return {
+    success: true,
+    processed: results.length,
+    results
+  };
+}
+async function grabrIntel(domain) {
+  if (!DOMAIN_REGEX.test(domain)) {
+    return { error: 'Invalid domain format' };
+  }
+  const url = `https://${domain}`;
+  // Parallel fetch main info
+  const [scrapeResult, sitemapResult, waybackResult] = await Promise.all([
+    grabrScrape(url, 2),
+    grabrSitemap(url).catch(() => ({ urls: [], total: 0 })),
+    grabrWayback(url, 3).catch(() => ({ snapshots: [] }))
+  ]);
+  return {
+    success: true,
+    domain,
+    contacts: scrapeResult.contacts || {},
+    meta: scrapeResult.meta || {},
+    pages: {
+      total: sitemapResult.total || 0,
+      sample: (sitemapResult.urls || []).slice(0, 10)
+    },
+    history: {
+      snapshots: (waybackResult.snapshots || []).length,
+      oldest: waybackResult.oldest,
+      newest: waybackResult.newest
+    }
+  };
+}
+// Tool definitions for MCP
+const GRABR_TOOLS = [
+  {
+    name: 'grabr_scrape',
+    description: 'Deep scrape URL for contacts (email, phone, address) + social links. $0.05',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        url: { type: 'string', description: 'URL to scrape' },
+        depth: { type: 'number', description: 'Scrape depth 1-3 (default 1)', default: 1 }
+      },
+      required: ['url']
+    },
+    cost: 0.05,
+    tier: 'pro'
+  },
+  {
+    name: 'grabr_contact',
+    description: 'Extract contacts from HTML/text content. $0.02',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        content: { type: 'string', description: 'HTML or text to extract from' }
+      },
+      required: ['content']
+    },
+    cost: 0.02,
+    tier: 'pro'
+  },
+  {
+    name: 'grabr_wayback',
+    description: 'Get Wayback Machine snapshots for URL. $0.02',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        url: { type: 'string', description: 'URL to check' },
+        years: { type: 'number', description: 'Years to look back (default 5)', default: 5 }
+      },
+      required: ['url']
+    },
+    cost: 0.02,
+    tier: 'pro'
+  },
+  {
+    name: 'grabr_sitemap',
+    description: 'Parse sitemap.xml and return all page URLs. $0.02',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        url: { type: 'string', description: 'Site URL or sitemap URL' }
+      },
+      required: ['url']
+    },
+    cost: 0.02,
+    tier: 'pro'
+  },
+  {
+    name: 'grabr_batch',
+    description: 'Scrape up to 10 URLs with rate limiting. $0.10',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        urls: { type: 'array', items: { type: 'string' }, description: 'URLs to scrape (max 10)' },
+        delayMs: { type: 'number', description: 'Delay between requests in ms (min 500)', default: 1000 }
+      },
+      required: ['urls']
+    },
+    cost: 0.10,
+    tier: 'pro'
+  },
+  {
+    name: 'grabr_intel',
+    description: 'Full domain intel: contacts, pages, social, history. $0.08',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        domain: { type: 'string', description: 'Domain name (e.g., example.com)' }
+      },
+      required: ['domain']
+    },
+    cost: 0.08,
+    tier: 'pro'
+  }
+];
+async function handleTool(name, args) {
+  try {
+    switch (name) {
+      case 'grabr_scrape':
+        return await grabrScrape(args.url, args.depth || 1);
+      case 'grabr_contact':
+        return await grabrContact(args.content);
+      case 'grabr_wayback':
+        return await grabrWayback(args.url, args.years || 5);
+      case 'grabr_sitemap':
+        return await grabrSitemap(args.url);
+      case 'grabr_batch':
+        return await grabrBatch(args.urls, args.delayMs || 1000);
+      case 'grabr_intel':
+        return await grabrIntel(args.domain);
+      default:
+        return { error: `Unknown grabr tool: ${name}` };
+    }
+  } catch (e) {
+    return { error: e.message || 'Tool execution failed' };
+  }
+}
+module.exports = { GRABR_TOOLS, handleTool };

package/lib/packs.js CHANGED Viewed

@@ -87,6 +87,13 @@ const PACKS = {
     tier: 'pro',
     highlights: ['domains_expiring', 'writing_draft', 'bookmarks_prune', 'csv_clean']
   },
+  grabr: {
+    name: 'grabr',
+    description: 'Web scraping - contacts, social links, Wayback, sitemaps',
+    tools: 6,
+    tier: 'pro',
+    highlights: ['grabr_scrape', 'grabr_intel', 'grabr_wayback', 'grabr_batch']
+  },
   // === ENTERPRISE TIER ($499/mo) ===
   labs_plus: {

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "50c",
-  "version": "2.7.0",
-  "description": "AI toolkit with prompt engine. One install, 105+ tools.",
+  "version": "2.8.0",
+  "description": "AI toolkit with grabr web scraping. One install, 111+ tools.",
   "main": "lib/index.js",
   "bin": {
     "50c": "./bin/50c.js"
@@ -13,17 +13,16 @@
     "tools",
     "genius",
     "prompt-engine",
-    "agent-autopsy",
-    "prompt-fortress",
+    "grabr",
+    "scraper",
+    "wayback",
     "bcalc",
     "vault",
     "cloudflare",
     "whm",
     "cpanel",
     "wordpress",
-    "librarian",
-    "bookmarks",
-    "csv"
+    "librarian"
   ],
   "author": "genxis.com",
   "license": "MIT",