50c 2.7.0 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/index.js CHANGED
@@ -23,13 +23,15 @@ const cf = require('./packs/cf');
23
23
  const wp = require('./packs/wp');
24
24
  const ux = require('./packs/ux');
25
25
  const promptEngine = require('./packs/prompt_engine');
26
+ const grabr = require('./packs/grabr');
26
27
 
27
28
  // Tool name mappings by pack
28
29
  const TOOL_PACKS = {
29
30
  beacon: ['hints', 'hints_plus', 'roast', 'quick_vibe', 'one_liner', 'name_it', 'price_it', 'compute', 'ide_conversation', 'learning_stats'],
30
31
  labs: ['genius', 'mind_opener', 'idea_fold', 'agent_autopsy', 'prompt_fortress', 'context_health', 'context_compress', 'context_extract', 'context_reposition'],
31
32
  labs_plus: ['bcalc', 'genius_plus', 'bcalc_why', 'discovery_collision', 'cvi_loop', 'cvi_verify', 'chaos_fingerprint', 'resonance', 'prime_residue', 'echo_sequence', 'conversation_diagnostic', 'handoff'],
32
- prompt_engine: ['prompt_extract', 'prompt_phases', 'prompt_refine', 'prompt_expand', 'prompt_categorize']
33
+ prompt_engine: ['prompt_extract', 'prompt_phases', 'prompt_refine', 'prompt_expand', 'prompt_categorize'],
34
+ grabr: ['grabr_scrape', 'grabr_contact', 'grabr_wayback', 'grabr_sitemap', 'grabr_batch', 'grabr_intel']
33
35
  };
34
36
 
35
37
  // Get all available tools based on enabled packs
@@ -56,6 +58,7 @@ async function getTools() {
56
58
  if (config.packs.wp) tools.push(...wp.WP_TOOLS);
57
59
  if (config.packs.ux) tools.push(...ux.UX_TOOLS);
58
60
  if (config.packs.prompt_engine) tools.push(...promptEngine.PROMPT_ENGINE_TOOLS);
61
+ if (config.packs.grabr) tools.push(...grabr.GRABR_TOOLS);
59
62
 
60
63
  // ENTERPRISE tier
61
64
  if (config.packs.labs_plus) tools.push(...labsPlus.LABS_PLUS_TOOLS);
@@ -112,6 +115,14 @@ async function handleTool(name, args = {}) {
112
115
  return promptEngine.handleTool(name, args);
113
116
  }
114
117
 
118
+ // Grabr tools (PRO)
119
+ if (TOOL_PACKS.grabr.includes(name) || name.startsWith('grabr_')) {
120
+ if (!config.packs.grabr) {
121
+ return { error: 'Requires Pro tier ($99/mo). Enable grabr pack or upgrade at sales.50c.ai/50c-pro/' };
122
+ }
123
+ return grabr.handleTool(name, args);
124
+ }
125
+
115
126
  // Labs+ tools (ENTERPRISE)
116
127
  if (TOOL_PACKS.labs_plus.includes(name)) {
117
128
  if (!config.packs.labs_plus) {
@@ -183,5 +194,6 @@ module.exports = {
183
194
  beacon,
184
195
  labs,
185
196
  labsPlus,
186
- promptEngine
197
+ promptEngine,
198
+ grabr
187
199
  };
@@ -0,0 +1,443 @@
1
+ /**
2
+ * 50c Grabr Pack - PRO Tier
3
+ * Web scraping + contact extraction + Wayback Machine
4
+ * Designed for: Lead gen, OSINT, competitive intel, domain recovery
5
+ */
6
+
7
+ const { apiRequest } = require('../config');
8
+
9
+ // URL validation regex
10
+ const URL_REGEX = /^https?:\/\/[^\s/$.?#].[^\s]*$/i;
11
+ const DOMAIN_REGEX = /^[a-zA-Z0-9][-a-zA-Z0-9]*(\.[a-zA-Z0-9][-a-zA-Z0-9]*)+$/;
12
+
13
+ // Contact extraction patterns (from Grabr)
14
+ const EMAIL_REGEX = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g;
15
+ const PHONE_PATTERNS = [
16
+ /\+1[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g,
17
+ /\b1[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g,
18
+ /\b\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g,
19
+ ];
20
+
21
+ const SOCIAL_PATTERNS = {
22
+ twitter: /https?:\/\/(www\.)?(twitter\.com|x\.com)\/[a-zA-Z0-9_]+/gi,
23
+ facebook: /https?:\/\/(www\.)?facebook\.com\/[a-zA-Z0-9.]+/gi,
24
+ linkedin: /https?:\/\/(www\.)?linkedin\.com\/(in|company)\/[a-zA-Z0-9-]+/gi,
25
+ instagram: /https?:\/\/(www\.)?instagram\.com\/[a-zA-Z0-9_.]+/gi,
26
+ youtube: /https?:\/\/(www\.)?youtube\.com\/(c|channel|user)\/[a-zA-Z0-9_-]+/gi,
27
+ };
28
+
29
+ // Invalid email patterns to filter
30
+ const INVALID_EMAIL_PATTERNS = [
31
+ /\.(png|jpg|jpeg|gif|svg|webp)$/i,
32
+ /example\.com/i,
33
+ /test\.com/i,
34
+ /sentry\.io/i,
35
+ ];
36
+
37
+ function isValidEmail(email) {
38
+ const lower = email.toLowerCase();
39
+ for (const pattern of INVALID_EMAIL_PATTERNS) {
40
+ if (pattern.test(lower)) return false;
41
+ }
42
+ const parts = email.split('@');
43
+ if (parts.length !== 2) return false;
44
+ const [local, domain] = parts;
45
+ if (!local || !domain || !domain.includes('.')) return false;
46
+ return true;
47
+ }
48
+
49
+ function extractEmails(text) {
50
+ const matches = text.match(EMAIL_REGEX) || [];
51
+ return [...new Set(matches.filter(isValidEmail).map(e => e.toLowerCase()))];
52
+ }
53
+
54
+ function extractPhones(text) {
55
+ const phones = new Set();
56
+ for (const pattern of PHONE_PATTERNS) {
57
+ const matches = text.matchAll(pattern);
58
+ for (const match of matches) {
59
+ let digits = match[0].replace(/\D/g, '');
60
+ if (digits.length === 11 && digits[0] === '1') digits = digits.slice(1);
61
+ if (digits.length === 10) {
62
+ // Validate area code
63
+ if (digits[0] === '0' || digits[0] === '1') continue;
64
+ if (digits[3] === '0' || digits[3] === '1') continue;
65
+ if (/^(\d)\1+$/.test(digits)) continue; // All same digits
66
+ const formatted = `+1 (${digits.slice(0,3)}) ${digits.slice(3,6)}-${digits.slice(6)}`;
67
+ phones.add(formatted);
68
+ }
69
+ }
70
+ }
71
+ return [...phones];
72
+ }
73
+
74
+ function extractSocialLinks(text) {
75
+ const social = {};
76
+ for (const [platform, pattern] of Object.entries(SOCIAL_PATTERNS)) {
77
+ const matches = text.match(pattern);
78
+ if (matches && matches.length > 0) {
79
+ // Filter out share/intent links
80
+ const valid = matches.filter(url =>
81
+ !url.includes('/share') &&
82
+ !url.includes('/intent') &&
83
+ !url.includes('/sharer')
84
+ );
85
+ if (valid.length > 0) social[platform] = valid[0];
86
+ }
87
+ }
88
+ return social;
89
+ }
90
+
91
+ function extractAddress(text) {
92
+ const addressRegex = /(\d+\s+[A-Za-z\s]+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Lane|Ln|Drive|Dr|Court|Ct|Way|Place|Pl)\.?),?\s*([A-Za-z\s]+),?\s*([A-Z]{2})\s*(\d{5})/gi;
93
+ const match = addressRegex.exec(text);
94
+ if (match) {
95
+ return {
96
+ street: match[1].trim(),
97
+ city: match[2].trim(),
98
+ state: match[3],
99
+ zip: match[4]
100
+ };
101
+ }
102
+ return null;
103
+ }
104
+
105
+ // Tool implementations
106
+ async function grabrScrape(url, depth = 1) {
107
+ if (!URL_REGEX.test(url)) {
108
+ return { error: 'Invalid URL format' };
109
+ }
110
+
111
+ try {
112
+ // Use 50c page_fetch via API
113
+ const result = await apiRequest('page_fetch', { url });
114
+ if (result.error) return { error: result.error };
115
+
116
+ const html = result.content || result.text || '';
117
+
118
+ const contacts = {
119
+ emails: extractEmails(html),
120
+ phones: extractPhones(html),
121
+ address: extractAddress(html),
122
+ social: extractSocialLinks(html)
123
+ };
124
+
125
+ // Deep scrape - follow contact/about pages
126
+ if (depth >= 2 && contacts.emails.length === 0) {
127
+ const contactLinks = html.match(/href=["']([^"']*(?:contact|about)[^"']*)["']/gi) || [];
128
+ for (const linkMatch of contactLinks.slice(0, 2)) {
129
+ const href = linkMatch.match(/href=["']([^"']+)["']/)?.[1];
130
+ if (href) {
131
+ try {
132
+ const fullUrl = href.startsWith('http') ? href : new URL(href, url).href;
133
+ const subResult = await apiRequest('page_fetch', { url: fullUrl });
134
+ if (subResult.content) {
135
+ contacts.emails.push(...extractEmails(subResult.content));
136
+ contacts.phones.push(...extractPhones(subResult.content));
137
+ if (!contacts.address) contacts.address = extractAddress(subResult.content);
138
+ Object.assign(contacts.social, extractSocialLinks(subResult.content));
139
+ }
140
+ } catch (e) { /* skip broken links */ }
141
+ }
142
+ }
143
+ // Dedupe
144
+ contacts.emails = [...new Set(contacts.emails)];
145
+ contacts.phones = [...new Set(contacts.phones)];
146
+ }
147
+
148
+ return {
149
+ success: true,
150
+ url,
151
+ depth,
152
+ contacts,
153
+ meta: {
154
+ title: (html.match(/<title>([^<]+)<\/title>/i) || [])[1] || null,
155
+ description: (html.match(/<meta[^>]*name=["']description["'][^>]*content=["']([^"']+)["']/i) || [])[1] || null
156
+ }
157
+ };
158
+ } catch (e) {
159
+ return { error: e.message || 'Scrape failed' };
160
+ }
161
+ }
162
+
163
+ async function grabrContact(content) {
164
+ if (!content || typeof content !== 'string') {
165
+ return { error: 'Content required' };
166
+ }
167
+
168
+ return {
169
+ success: true,
170
+ emails: extractEmails(content),
171
+ phones: extractPhones(content),
172
+ address: extractAddress(content),
173
+ social: extractSocialLinks(content)
174
+ };
175
+ }
176
+
177
+ async function grabrWayback(url, years = 5) {
178
+ if (!URL_REGEX.test(url)) {
179
+ return { error: 'Invalid URL format' };
180
+ }
181
+
182
+ try {
183
+ const snapshots = [];
184
+ const currentYear = new Date().getFullYear();
185
+ const startYear = currentYear - years;
186
+ const months = [1, 6]; // Check Jan and June
187
+
188
+ for (let year = currentYear; year >= startYear && snapshots.length < 10; year--) {
189
+ for (const month of months) {
190
+ if (snapshots.length >= 10) break;
191
+ const timestamp = `${year}${String(month).padStart(2, '0')}01`;
192
+ const checkUrl = `https://archive.org/wayback/available?url=${encodeURIComponent(url)}&timestamp=${timestamp}`;
193
+
194
+ try {
195
+ const resp = await fetch(checkUrl, {
196
+ headers: { 'User-Agent': '50c-grabr/1.0' },
197
+ signal: AbortSignal.timeout(10000)
198
+ });
199
+ if (resp.ok) {
200
+ const data = await resp.json();
201
+ if (data.archived_snapshots?.closest?.available) {
202
+ const snap = data.archived_snapshots.closest;
203
+ if (!snapshots.find(s => s.timestamp === snap.timestamp)) {
204
+ snapshots.push({
205
+ timestamp: snap.timestamp,
206
+ url: snap.url,
207
+ date: `${snap.timestamp.slice(0,4)}-${snap.timestamp.slice(4,6)}-${snap.timestamp.slice(6,8)}`
208
+ });
209
+ }
210
+ }
211
+ }
212
+ } catch (e) { /* skip failed checks */ }
213
+
214
+ // Rate limit
215
+ await new Promise(r => setTimeout(r, 500));
216
+ }
217
+ }
218
+
219
+ return {
220
+ success: true,
221
+ url,
222
+ snapshots,
223
+ oldest: snapshots[snapshots.length - 1]?.date || null,
224
+ newest: snapshots[0]?.date || null
225
+ };
226
+ } catch (e) {
227
+ return { error: e.message || 'Wayback lookup failed' };
228
+ }
229
+ }
230
+
231
+ async function grabrSitemap(url) {
232
+ // Normalize to sitemap URL
233
+ let sitemapUrl = url;
234
+ if (!url.includes('sitemap')) {
235
+ const base = url.replace(/\/$/, '');
236
+ sitemapUrl = `${base}/sitemap.xml`;
237
+ }
238
+
239
+ try {
240
+ const result = await apiRequest('page_fetch', { url: sitemapUrl });
241
+ if (result.error) {
242
+ // Try robots.txt fallback
243
+ const robotsUrl = url.replace(/\/$/, '') + '/robots.txt';
244
+ const robotsResult = await apiRequest('page_fetch', { url: robotsUrl });
245
+ if (robotsResult.content) {
246
+ const sitemapMatch = robotsResult.content.match(/Sitemap:\s*(\S+)/i);
247
+ if (sitemapMatch) {
248
+ const altResult = await apiRequest('page_fetch', { url: sitemapMatch[1] });
249
+ if (altResult.content) {
250
+ result.content = altResult.content;
251
+ }
252
+ }
253
+ }
254
+ }
255
+
256
+ if (!result.content) {
257
+ return { error: 'Sitemap not found' };
258
+ }
259
+
260
+ // Parse sitemap XML
261
+ const urls = [];
262
+ const locMatches = result.content.matchAll(/<loc>([^<]+)<\/loc>/gi);
263
+ for (const match of locMatches) {
264
+ urls.push(match[1]);
265
+ }
266
+
267
+ return {
268
+ success: true,
269
+ sitemapUrl,
270
+ urls: urls.slice(0, 500), // Cap at 500
271
+ total: urls.length
272
+ };
273
+ } catch (e) {
274
+ return { error: e.message || 'Sitemap parse failed' };
275
+ }
276
+ }
277
+
278
+ async function grabrBatch(urls, delayMs = 1000) {
279
+ if (!Array.isArray(urls) || urls.length === 0) {
280
+ return { error: 'URLs array required' };
281
+ }
282
+ if (urls.length > 10) {
283
+ return { error: 'Maximum 10 URLs per batch' };
284
+ }
285
+
286
+ const results = [];
287
+ for (const url of urls) {
288
+ const result = await grabrScrape(url, 1);
289
+ results.push({ url, ...result });
290
+
291
+ // Rate limit between requests
292
+ if (delayMs > 0) {
293
+ await new Promise(r => setTimeout(r, Math.max(delayMs, 500)));
294
+ }
295
+ }
296
+
297
+ return {
298
+ success: true,
299
+ processed: results.length,
300
+ results
301
+ };
302
+ }
303
+
304
+ async function grabrIntel(domain) {
305
+ if (!DOMAIN_REGEX.test(domain)) {
306
+ return { error: 'Invalid domain format' };
307
+ }
308
+
309
+ const url = `https://${domain}`;
310
+
311
+ // Parallel fetch main info
312
+ const [scrapeResult, sitemapResult, waybackResult] = await Promise.all([
313
+ grabrScrape(url, 2),
314
+ grabrSitemap(url).catch(() => ({ urls: [], total: 0 })),
315
+ grabrWayback(url, 3).catch(() => ({ snapshots: [] }))
316
+ ]);
317
+
318
+ return {
319
+ success: true,
320
+ domain,
321
+ contacts: scrapeResult.contacts || {},
322
+ meta: scrapeResult.meta || {},
323
+ pages: {
324
+ total: sitemapResult.total || 0,
325
+ sample: (sitemapResult.urls || []).slice(0, 10)
326
+ },
327
+ history: {
328
+ snapshots: (waybackResult.snapshots || []).length,
329
+ oldest: waybackResult.oldest,
330
+ newest: waybackResult.newest
331
+ }
332
+ };
333
+ }
334
+
335
+ // Tool definitions for MCP
336
+ const GRABR_TOOLS = [
337
+ {
338
+ name: 'grabr_scrape',
339
+ description: 'Deep scrape URL for contacts (email, phone, address) + social links. $0.05',
340
+ inputSchema: {
341
+ type: 'object',
342
+ properties: {
343
+ url: { type: 'string', description: 'URL to scrape' },
344
+ depth: { type: 'number', description: 'Scrape depth 1-3 (default 1)', default: 1 }
345
+ },
346
+ required: ['url']
347
+ },
348
+ cost: 0.05,
349
+ tier: 'pro'
350
+ },
351
+ {
352
+ name: 'grabr_contact',
353
+ description: 'Extract contacts from HTML/text content. $0.02',
354
+ inputSchema: {
355
+ type: 'object',
356
+ properties: {
357
+ content: { type: 'string', description: 'HTML or text to extract from' }
358
+ },
359
+ required: ['content']
360
+ },
361
+ cost: 0.02,
362
+ tier: 'pro'
363
+ },
364
+ {
365
+ name: 'grabr_wayback',
366
+ description: 'Get Wayback Machine snapshots for URL. $0.02',
367
+ inputSchema: {
368
+ type: 'object',
369
+ properties: {
370
+ url: { type: 'string', description: 'URL to check' },
371
+ years: { type: 'number', description: 'Years to look back (default 5)', default: 5 }
372
+ },
373
+ required: ['url']
374
+ },
375
+ cost: 0.02,
376
+ tier: 'pro'
377
+ },
378
+ {
379
+ name: 'grabr_sitemap',
380
+ description: 'Parse sitemap.xml and return all page URLs. $0.02',
381
+ inputSchema: {
382
+ type: 'object',
383
+ properties: {
384
+ url: { type: 'string', description: 'Site URL or sitemap URL' }
385
+ },
386
+ required: ['url']
387
+ },
388
+ cost: 0.02,
389
+ tier: 'pro'
390
+ },
391
+ {
392
+ name: 'grabr_batch',
393
+ description: 'Scrape up to 10 URLs with rate limiting. $0.10',
394
+ inputSchema: {
395
+ type: 'object',
396
+ properties: {
397
+ urls: { type: 'array', items: { type: 'string' }, description: 'URLs to scrape (max 10)' },
398
+ delayMs: { type: 'number', description: 'Delay between requests in ms (min 500)', default: 1000 }
399
+ },
400
+ required: ['urls']
401
+ },
402
+ cost: 0.10,
403
+ tier: 'pro'
404
+ },
405
+ {
406
+ name: 'grabr_intel',
407
+ description: 'Full domain intel: contacts, pages, social, history. $0.08',
408
+ inputSchema: {
409
+ type: 'object',
410
+ properties: {
411
+ domain: { type: 'string', description: 'Domain name (e.g., example.com)' }
412
+ },
413
+ required: ['domain']
414
+ },
415
+ cost: 0.08,
416
+ tier: 'pro'
417
+ }
418
+ ];
419
+
420
+ async function handleTool(name, args) {
421
+ try {
422
+ switch (name) {
423
+ case 'grabr_scrape':
424
+ return await grabrScrape(args.url, args.depth || 1);
425
+ case 'grabr_contact':
426
+ return await grabrContact(args.content);
427
+ case 'grabr_wayback':
428
+ return await grabrWayback(args.url, args.years || 5);
429
+ case 'grabr_sitemap':
430
+ return await grabrSitemap(args.url);
431
+ case 'grabr_batch':
432
+ return await grabrBatch(args.urls, args.delayMs || 1000);
433
+ case 'grabr_intel':
434
+ return await grabrIntel(args.domain);
435
+ default:
436
+ return { error: `Unknown grabr tool: ${name}` };
437
+ }
438
+ } catch (e) {
439
+ return { error: e.message || 'Tool execution failed' };
440
+ }
441
+ }
442
+
443
+ module.exports = { GRABR_TOOLS, handleTool };
package/lib/packs.js CHANGED
@@ -87,6 +87,13 @@ const PACKS = {
87
87
  tier: 'pro',
88
88
  highlights: ['domains_expiring', 'writing_draft', 'bookmarks_prune', 'csv_clean']
89
89
  },
90
+ grabr: {
91
+ name: 'grabr',
92
+ description: 'Web scraping - contacts, social links, Wayback, sitemaps',
93
+ tools: 6,
94
+ tier: 'pro',
95
+ highlights: ['grabr_scrape', 'grabr_intel', 'grabr_wayback', 'grabr_batch']
96
+ },
90
97
 
91
98
  // === ENTERPRISE TIER ($499/mo) ===
92
99
  labs_plus: {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "50c",
3
- "version": "2.7.0",
4
- "description": "AI toolkit with prompt engine. One install, 105+ tools.",
3
+ "version": "2.8.0",
4
+ "description": "AI toolkit with grabr web scraping. One install, 111+ tools.",
5
5
  "main": "lib/index.js",
6
6
  "bin": {
7
7
  "50c": "./bin/50c.js"
@@ -13,17 +13,16 @@
13
13
  "tools",
14
14
  "genius",
15
15
  "prompt-engine",
16
- "agent-autopsy",
17
- "prompt-fortress",
16
+ "grabr",
17
+ "scraper",
18
+ "wayback",
18
19
  "bcalc",
19
20
  "vault",
20
21
  "cloudflare",
21
22
  "whm",
22
23
  "cpanel",
23
24
  "wordpress",
24
- "librarian",
25
- "bookmarks",
26
- "csv"
25
+ "librarian"
27
26
  ],
28
27
  "author": "genxis.com",
29
28
  "license": "MIT",