50c 2.7.0 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/50c.js +264 -25
- package/lib/index.js +14 -2
- package/lib/packs/grabr.js +443 -0
- package/lib/packs.js +7 -0
- package/package.json +6 -7
package/bin/50c.js
CHANGED
|
@@ -73,12 +73,243 @@ async function runMCP() {
|
|
|
73
73
|
});
|
|
74
74
|
}
|
|
75
75
|
|
|
76
|
+
// Detect IDE from environment/process
|
|
77
|
+
function detectIDE() {
|
|
78
|
+
const env = process.env;
|
|
79
|
+
if (env.CURSOR_SESSION || env.CURSOR_TRACE_ID) return 'cursor';
|
|
80
|
+
if (env.WINDSURF_SESSION) return 'windsurf';
|
|
81
|
+
if (env.VSCODE_PID || env.TERM_PROGRAM === 'vscode') return 'vscode';
|
|
82
|
+
if (env.CLAUDE_DESKTOP) return 'claude-desktop';
|
|
83
|
+
// Check for common IDE paths
|
|
84
|
+
const cwd = process.cwd();
|
|
85
|
+
if (cwd.includes('.cursor')) return 'cursor';
|
|
86
|
+
if (cwd.includes('windsurf')) return 'windsurf';
|
|
87
|
+
return null;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Get MCP config path for IDE
|
|
91
|
+
function getMCPConfigPath(ide) {
|
|
92
|
+
const os = require('os');
|
|
93
|
+
const path = require('path');
|
|
94
|
+
const home = os.homedir();
|
|
95
|
+
|
|
96
|
+
const paths = {
|
|
97
|
+
'cursor': path.join(home, '.cursor', 'mcp.json'),
|
|
98
|
+
'windsurf': path.join(home, '.codeium', 'windsurf', 'mcp_config.json'),
|
|
99
|
+
'vscode': path.join(home, '.vscode', 'mcp.json'),
|
|
100
|
+
'claude-desktop': process.platform === 'win32'
|
|
101
|
+
? path.join(home, 'AppData', 'Roaming', 'Claude', 'claude_desktop_config.json')
|
|
102
|
+
: path.join(home, 'Library', 'Application Support', 'Claude', 'claude_desktop_config.json'),
|
|
103
|
+
'verdent': path.join(home, '.verdent', 'mcp.json')
|
|
104
|
+
};
|
|
105
|
+
return paths[ide] || null;
|
|
106
|
+
}
|
|
107
|
+
|
|
76
108
|
// CLI Mode
|
|
77
109
|
async function runCLI(args) {
|
|
78
110
|
const cmd = args[0];
|
|
79
111
|
const cmdArgs = args.slice(1);
|
|
112
|
+
const fs = require('fs');
|
|
113
|
+
const path = require('path');
|
|
114
|
+
const os = require('os');
|
|
80
115
|
|
|
81
116
|
switch (cmd) {
|
|
117
|
+
case 'init': {
|
|
118
|
+
console.log('\n 50c init - Quick Setup\n');
|
|
119
|
+
|
|
120
|
+
// Detect IDE
|
|
121
|
+
const ide = detectIDE() || cmdArgs[0];
|
|
122
|
+
const ides = ['cursor', 'windsurf', 'vscode', 'claude-desktop', 'verdent'];
|
|
123
|
+
|
|
124
|
+
if (!ide) {
|
|
125
|
+
console.log(' Detected IDE: none\n');
|
|
126
|
+
console.log(' Which IDE are you using?');
|
|
127
|
+
ides.forEach((i, idx) => console.log(` ${idx + 1}. ${i}`));
|
|
128
|
+
console.log('\n Run: 50c init <ide>');
|
|
129
|
+
console.log(' Example: 50c init cursor\n');
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if (!ides.includes(ide)) {
|
|
134
|
+
console.log(` Unknown IDE: ${ide}`);
|
|
135
|
+
console.log(` Supported: ${ides.join(', ')}\n`);
|
|
136
|
+
break;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
console.log(` IDE: ${ide}`);
|
|
140
|
+
|
|
141
|
+
// Check for API key
|
|
142
|
+
const apiKey = process.env.FIFTYC_API_KEY || process.env.FIFTY_CENT_API_KEY;
|
|
143
|
+
if (apiKey) {
|
|
144
|
+
console.log(` API Key: ${apiKey.slice(0, 8)}...`);
|
|
145
|
+
} else {
|
|
146
|
+
console.log(' API Key: not set');
|
|
147
|
+
console.log('\n Get your key at: https://50c.ai');
|
|
148
|
+
console.log(' Then run: 50c init <ide>\n');
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Get config path
|
|
152
|
+
const configPath = getMCPConfigPath(ide);
|
|
153
|
+
if (!configPath) {
|
|
154
|
+
console.log(` Config path: unknown for ${ide}\n`);
|
|
155
|
+
break;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
console.log(` Config: ${configPath}`);
|
|
159
|
+
|
|
160
|
+
// Generate config
|
|
161
|
+
const mcpConfig = {
|
|
162
|
+
mcpServers: {
|
|
163
|
+
'50c': {
|
|
164
|
+
command: 'npx',
|
|
165
|
+
args: ['-y', '50c'],
|
|
166
|
+
env: apiKey ? { FIFTYC_API_KEY: apiKey } : {}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
};
|
|
170
|
+
|
|
171
|
+
// Check if file exists
|
|
172
|
+
let existingConfig = {};
|
|
173
|
+
try {
|
|
174
|
+
if (fs.existsSync(configPath)) {
|
|
175
|
+
existingConfig = JSON.parse(fs.readFileSync(configPath, 'utf8'));
|
|
176
|
+
console.log(' Status: updating existing config');
|
|
177
|
+
} else {
|
|
178
|
+
console.log(' Status: creating new config');
|
|
179
|
+
}
|
|
180
|
+
} catch (e) {
|
|
181
|
+
console.log(' Status: creating new config');
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Merge configs
|
|
185
|
+
const finalConfig = {
|
|
186
|
+
...existingConfig,
|
|
187
|
+
mcpServers: {
|
|
188
|
+
...(existingConfig.mcpServers || {}),
|
|
189
|
+
'50c': mcpConfig.mcpServers['50c']
|
|
190
|
+
}
|
|
191
|
+
};
|
|
192
|
+
|
|
193
|
+
// Create directory if needed
|
|
194
|
+
const configDir = path.dirname(configPath);
|
|
195
|
+
if (!fs.existsSync(configDir)) {
|
|
196
|
+
fs.mkdirSync(configDir, { recursive: true });
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Write config
|
|
200
|
+
fs.writeFileSync(configPath, JSON.stringify(finalConfig, null, 2));
|
|
201
|
+
console.log('\n Done! 50c added to ' + ide);
|
|
202
|
+
|
|
203
|
+
// Show next steps
|
|
204
|
+
console.log('\n Next steps:');
|
|
205
|
+
console.log(' 1. Restart ' + ide);
|
|
206
|
+
console.log(' 2. 50c should appear with 117+ tools');
|
|
207
|
+
if (!apiKey) {
|
|
208
|
+
console.log(' 3. Set FIFTYC_API_KEY in the config for full access');
|
|
209
|
+
}
|
|
210
|
+
console.log('\n Try: 50c tools (see all available tools)');
|
|
211
|
+
console.log(' 50c doctor (check setup)\n');
|
|
212
|
+
break;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
case 'tools': {
|
|
216
|
+
const tools = await lib.getTools();
|
|
217
|
+
const packFilter = cmdArgs.find(a => a.startsWith('--pack='))?.split('=')[1];
|
|
218
|
+
const search = cmdArgs.find(a => !a.startsWith('--'));
|
|
219
|
+
|
|
220
|
+
let filtered = tools;
|
|
221
|
+
if (packFilter) {
|
|
222
|
+
filtered = tools.filter(t => t.name.startsWith(packFilter) || t.name.includes(packFilter));
|
|
223
|
+
}
|
|
224
|
+
if (search) {
|
|
225
|
+
const s = search.toLowerCase();
|
|
226
|
+
filtered = filtered.filter(t =>
|
|
227
|
+
t.name.toLowerCase().includes(s) ||
|
|
228
|
+
(t.description || '').toLowerCase().includes(s)
|
|
229
|
+
);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
console.log(`\n 50c Tools (${filtered.length}/${tools.length})\n`);
|
|
233
|
+
|
|
234
|
+
// Group by prefix
|
|
235
|
+
const groups = {};
|
|
236
|
+
filtered.forEach(t => {
|
|
237
|
+
const prefix = t.name.split('_')[0];
|
|
238
|
+
if (!groups[prefix]) groups[prefix] = [];
|
|
239
|
+
groups[prefix].push(t);
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
for (const [prefix, groupTools] of Object.entries(groups)) {
|
|
243
|
+
console.log(` ${prefix} (${groupTools.length}):`);
|
|
244
|
+
groupTools.slice(0, 5).forEach(t => {
|
|
245
|
+
const cost = t.cost ? ` $${t.cost.toFixed(2)}` : ' FREE';
|
|
246
|
+
console.log(` ${t.name}${cost}`);
|
|
247
|
+
});
|
|
248
|
+
if (groupTools.length > 5) {
|
|
249
|
+
console.log(` ... and ${groupTools.length - 5} more`);
|
|
250
|
+
}
|
|
251
|
+
console.log('');
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
console.log(' Filter: 50c tools <search>');
|
|
255
|
+
console.log(' 50c tools --pack=grabr\n');
|
|
256
|
+
break;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
case 'doctor': {
|
|
260
|
+
console.log('\n 50c doctor - System Check\n');
|
|
261
|
+
|
|
262
|
+
// Check API key
|
|
263
|
+
const apiKey = process.env.FIFTYC_API_KEY || process.env.FIFTY_CENT_API_KEY;
|
|
264
|
+
console.log(` API Key: ${apiKey ? 'set (' + apiKey.slice(0, 8) + '...)' : 'not set'}`);
|
|
265
|
+
|
|
266
|
+
// Check vault
|
|
267
|
+
const vaultStatus = await lib.vault.status();
|
|
268
|
+
console.log(` Vault: ${vaultStatus.initialized ? 'initialized' : 'not initialized'}`);
|
|
269
|
+
if (vaultStatus.initialized) {
|
|
270
|
+
console.log(` ${vaultStatus.locked ? 'locked' : 'unlocked'}`);
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// Check packs
|
|
274
|
+
const packs = await lib.packs.listPacks();
|
|
275
|
+
console.log(` Packs: ${packs.enabled?.length || 0} enabled`);
|
|
276
|
+
|
|
277
|
+
// Check tools
|
|
278
|
+
const tools = await lib.getTools();
|
|
279
|
+
console.log(` Tools: ${tools.length} available`);
|
|
280
|
+
|
|
281
|
+
// Check IDE configs
|
|
282
|
+
console.log('\n IDE Configs:');
|
|
283
|
+
const ides = ['cursor', 'windsurf', 'vscode', 'claude-desktop', 'verdent'];
|
|
284
|
+
for (const ide of ides) {
|
|
285
|
+
const configPath = getMCPConfigPath(ide);
|
|
286
|
+
if (configPath && fs.existsSync(configPath)) {
|
|
287
|
+
try {
|
|
288
|
+
const config = JSON.parse(fs.readFileSync(configPath, 'utf8'));
|
|
289
|
+
const has50c = config.mcpServers?.['50c'] ? 'configured' : 'no 50c';
|
|
290
|
+
console.log(` ${ide}: ${has50c}`);
|
|
291
|
+
} catch (e) {
|
|
292
|
+
console.log(` ${ide}: invalid config`);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// Overall status
|
|
298
|
+
console.log('\n Status:');
|
|
299
|
+
if (!apiKey) {
|
|
300
|
+
console.log(' ! Set FIFTYC_API_KEY for API access');
|
|
301
|
+
}
|
|
302
|
+
if (!vaultStatus.initialized) {
|
|
303
|
+
console.log(' ! Run: 50c vault init <passphrase>');
|
|
304
|
+
}
|
|
305
|
+
if (apiKey && vaultStatus.initialized && tools.length > 50) {
|
|
306
|
+
console.log(' All good! 50c is ready.\n');
|
|
307
|
+
} else {
|
|
308
|
+
console.log('\n Run: 50c init <ide> to configure\n');
|
|
309
|
+
}
|
|
310
|
+
break;
|
|
311
|
+
}
|
|
312
|
+
|
|
82
313
|
case 'status':
|
|
83
314
|
console.log(JSON.stringify(await lib.getStatus(), null, 2));
|
|
84
315
|
break;
|
|
@@ -185,36 +416,44 @@ async function runCLI(args) {
|
|
|
185
416
|
break;
|
|
186
417
|
|
|
187
418
|
case 'help':
|
|
419
|
+
case '-h':
|
|
420
|
+
case '--help':
|
|
188
421
|
default:
|
|
189
|
-
console.log(`
|
|
422
|
+
console.log(`
|
|
423
|
+
50c - AI Toolkit (117+ tools)
|
|
190
424
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
50c enable <pack> Enable a pack
|
|
196
|
-
50c disable <pack> Disable a pack
|
|
197
|
-
50c packs List packs
|
|
198
|
-
50c vault <cmd> Vault commands
|
|
425
|
+
Quick Start:
|
|
426
|
+
50c init <ide> Auto-configure for your IDE
|
|
427
|
+
50c tools List all available tools
|
|
428
|
+
50c doctor Check setup and diagnose issues
|
|
199
429
|
|
|
200
|
-
Packs:
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
ux UI/UX toolkit (17 tools)
|
|
430
|
+
Packs:
|
|
431
|
+
50c packs List enabled/available packs
|
|
432
|
+
50c enable <pack> Enable a pack
|
|
433
|
+
50c disable <pack> Disable a pack
|
|
434
|
+
50c discover Show pack recommendations
|
|
206
435
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
436
|
+
Vault (secure storage):
|
|
437
|
+
50c vault init <pass> Initialize encrypted vault
|
|
438
|
+
50c vault add <k> <v> Store a credential
|
|
439
|
+
50c vault get <key> Retrieve a credential
|
|
440
|
+
50c vault list List stored credentials
|
|
441
|
+
|
|
442
|
+
MCP Server:
|
|
443
|
+
50c Start MCP server (for IDEs)
|
|
444
|
+
50c --mcp Force MCP mode
|
|
445
|
+
|
|
446
|
+
Supported IDEs:
|
|
447
|
+
cursor, windsurf, vscode, claude-desktop, verdent
|
|
448
|
+
|
|
449
|
+
Example:
|
|
450
|
+
$ npx -y 50c init cursor
|
|
451
|
+
$ 50c tools grabr
|
|
452
|
+
$ 50c vault init mysecret
|
|
216
453
|
|
|
217
|
-
|
|
454
|
+
Docs: https://50c.ai/docs
|
|
455
|
+
Help: https://discord.gg/50c
|
|
456
|
+
`);
|
|
218
457
|
}
|
|
219
458
|
}
|
|
220
459
|
|
package/lib/index.js
CHANGED
|
@@ -23,13 +23,15 @@ const cf = require('./packs/cf');
|
|
|
23
23
|
const wp = require('./packs/wp');
|
|
24
24
|
const ux = require('./packs/ux');
|
|
25
25
|
const promptEngine = require('./packs/prompt_engine');
|
|
26
|
+
const grabr = require('./packs/grabr');
|
|
26
27
|
|
|
27
28
|
// Tool name mappings by pack
|
|
28
29
|
const TOOL_PACKS = {
|
|
29
30
|
beacon: ['hints', 'hints_plus', 'roast', 'quick_vibe', 'one_liner', 'name_it', 'price_it', 'compute', 'ide_conversation', 'learning_stats'],
|
|
30
31
|
labs: ['genius', 'mind_opener', 'idea_fold', 'agent_autopsy', 'prompt_fortress', 'context_health', 'context_compress', 'context_extract', 'context_reposition'],
|
|
31
32
|
labs_plus: ['bcalc', 'genius_plus', 'bcalc_why', 'discovery_collision', 'cvi_loop', 'cvi_verify', 'chaos_fingerprint', 'resonance', 'prime_residue', 'echo_sequence', 'conversation_diagnostic', 'handoff'],
|
|
32
|
-
prompt_engine: ['prompt_extract', 'prompt_phases', 'prompt_refine', 'prompt_expand', 'prompt_categorize']
|
|
33
|
+
prompt_engine: ['prompt_extract', 'prompt_phases', 'prompt_refine', 'prompt_expand', 'prompt_categorize'],
|
|
34
|
+
grabr: ['grabr_scrape', 'grabr_contact', 'grabr_wayback', 'grabr_sitemap', 'grabr_batch', 'grabr_intel']
|
|
33
35
|
};
|
|
34
36
|
|
|
35
37
|
// Get all available tools based on enabled packs
|
|
@@ -56,6 +58,7 @@ async function getTools() {
|
|
|
56
58
|
if (config.packs.wp) tools.push(...wp.WP_TOOLS);
|
|
57
59
|
if (config.packs.ux) tools.push(...ux.UX_TOOLS);
|
|
58
60
|
if (config.packs.prompt_engine) tools.push(...promptEngine.PROMPT_ENGINE_TOOLS);
|
|
61
|
+
if (config.packs.grabr) tools.push(...grabr.GRABR_TOOLS);
|
|
59
62
|
|
|
60
63
|
// ENTERPRISE tier
|
|
61
64
|
if (config.packs.labs_plus) tools.push(...labsPlus.LABS_PLUS_TOOLS);
|
|
@@ -112,6 +115,14 @@ async function handleTool(name, args = {}) {
|
|
|
112
115
|
return promptEngine.handleTool(name, args);
|
|
113
116
|
}
|
|
114
117
|
|
|
118
|
+
// Grabr tools (PRO)
|
|
119
|
+
if (TOOL_PACKS.grabr.includes(name) || name.startsWith('grabr_')) {
|
|
120
|
+
if (!config.packs.grabr) {
|
|
121
|
+
return { error: 'Requires Pro tier ($99/mo). Enable grabr pack or upgrade at sales.50c.ai/50c-pro/' };
|
|
122
|
+
}
|
|
123
|
+
return grabr.handleTool(name, args);
|
|
124
|
+
}
|
|
125
|
+
|
|
115
126
|
// Labs+ tools (ENTERPRISE)
|
|
116
127
|
if (TOOL_PACKS.labs_plus.includes(name)) {
|
|
117
128
|
if (!config.packs.labs_plus) {
|
|
@@ -183,5 +194,6 @@ module.exports = {
|
|
|
183
194
|
beacon,
|
|
184
195
|
labs,
|
|
185
196
|
labsPlus,
|
|
186
|
-
promptEngine
|
|
197
|
+
promptEngine,
|
|
198
|
+
grabr
|
|
187
199
|
};
|
|
@@ -0,0 +1,443 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 50c Grabr Pack - PRO Tier
|
|
3
|
+
* Web scraping + contact extraction + Wayback Machine
|
|
4
|
+
* Designed for: Lead gen, OSINT, competitive intel, domain recovery
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const { apiRequest } = require('../config');
|
|
8
|
+
|
|
9
|
+
// URL validation regex
|
|
10
|
+
const URL_REGEX = /^https?:\/\/[^\s/$.?#].[^\s]*$/i;
|
|
11
|
+
const DOMAIN_REGEX = /^[a-zA-Z0-9][-a-zA-Z0-9]*(\.[a-zA-Z0-9][-a-zA-Z0-9]*)+$/;
|
|
12
|
+
|
|
13
|
+
// Contact extraction patterns (from Grabr)
|
|
14
|
+
const EMAIL_REGEX = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g;
|
|
15
|
+
const PHONE_PATTERNS = [
|
|
16
|
+
/\+1[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g,
|
|
17
|
+
/\b1[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g,
|
|
18
|
+
/\b\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g,
|
|
19
|
+
];
|
|
20
|
+
|
|
21
|
+
const SOCIAL_PATTERNS = {
|
|
22
|
+
twitter: /https?:\/\/(www\.)?(twitter\.com|x\.com)\/[a-zA-Z0-9_]+/gi,
|
|
23
|
+
facebook: /https?:\/\/(www\.)?facebook\.com\/[a-zA-Z0-9.]+/gi,
|
|
24
|
+
linkedin: /https?:\/\/(www\.)?linkedin\.com\/(in|company)\/[a-zA-Z0-9-]+/gi,
|
|
25
|
+
instagram: /https?:\/\/(www\.)?instagram\.com\/[a-zA-Z0-9_.]+/gi,
|
|
26
|
+
youtube: /https?:\/\/(www\.)?youtube\.com\/(c|channel|user)\/[a-zA-Z0-9_-]+/gi,
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
// Invalid email patterns to filter
|
|
30
|
+
const INVALID_EMAIL_PATTERNS = [
|
|
31
|
+
/\.(png|jpg|jpeg|gif|svg|webp)$/i,
|
|
32
|
+
/example\.com/i,
|
|
33
|
+
/test\.com/i,
|
|
34
|
+
/sentry\.io/i,
|
|
35
|
+
];
|
|
36
|
+
|
|
37
|
+
function isValidEmail(email) {
|
|
38
|
+
const lower = email.toLowerCase();
|
|
39
|
+
for (const pattern of INVALID_EMAIL_PATTERNS) {
|
|
40
|
+
if (pattern.test(lower)) return false;
|
|
41
|
+
}
|
|
42
|
+
const parts = email.split('@');
|
|
43
|
+
if (parts.length !== 2) return false;
|
|
44
|
+
const [local, domain] = parts;
|
|
45
|
+
if (!local || !domain || !domain.includes('.')) return false;
|
|
46
|
+
return true;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function extractEmails(text) {
|
|
50
|
+
const matches = text.match(EMAIL_REGEX) || [];
|
|
51
|
+
return [...new Set(matches.filter(isValidEmail).map(e => e.toLowerCase()))];
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function extractPhones(text) {
|
|
55
|
+
const phones = new Set();
|
|
56
|
+
for (const pattern of PHONE_PATTERNS) {
|
|
57
|
+
const matches = text.matchAll(pattern);
|
|
58
|
+
for (const match of matches) {
|
|
59
|
+
let digits = match[0].replace(/\D/g, '');
|
|
60
|
+
if (digits.length === 11 && digits[0] === '1') digits = digits.slice(1);
|
|
61
|
+
if (digits.length === 10) {
|
|
62
|
+
// Validate area code
|
|
63
|
+
if (digits[0] === '0' || digits[0] === '1') continue;
|
|
64
|
+
if (digits[3] === '0' || digits[3] === '1') continue;
|
|
65
|
+
if (/^(\d)\1+$/.test(digits)) continue; // All same digits
|
|
66
|
+
const formatted = `+1 (${digits.slice(0,3)}) ${digits.slice(3,6)}-${digits.slice(6)}`;
|
|
67
|
+
phones.add(formatted);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return [...phones];
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function extractSocialLinks(text) {
|
|
75
|
+
const social = {};
|
|
76
|
+
for (const [platform, pattern] of Object.entries(SOCIAL_PATTERNS)) {
|
|
77
|
+
const matches = text.match(pattern);
|
|
78
|
+
if (matches && matches.length > 0) {
|
|
79
|
+
// Filter out share/intent links
|
|
80
|
+
const valid = matches.filter(url =>
|
|
81
|
+
!url.includes('/share') &&
|
|
82
|
+
!url.includes('/intent') &&
|
|
83
|
+
!url.includes('/sharer')
|
|
84
|
+
);
|
|
85
|
+
if (valid.length > 0) social[platform] = valid[0];
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return social;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function extractAddress(text) {
|
|
92
|
+
const addressRegex = /(\d+\s+[A-Za-z\s]+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Lane|Ln|Drive|Dr|Court|Ct|Way|Place|Pl)\.?),?\s*([A-Za-z\s]+),?\s*([A-Z]{2})\s*(\d{5})/gi;
|
|
93
|
+
const match = addressRegex.exec(text);
|
|
94
|
+
if (match) {
|
|
95
|
+
return {
|
|
96
|
+
street: match[1].trim(),
|
|
97
|
+
city: match[2].trim(),
|
|
98
|
+
state: match[3],
|
|
99
|
+
zip: match[4]
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Tool implementations
|
|
106
|
+
async function grabrScrape(url, depth = 1) {
|
|
107
|
+
if (!URL_REGEX.test(url)) {
|
|
108
|
+
return { error: 'Invalid URL format' };
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
try {
|
|
112
|
+
// Use 50c page_fetch via API
|
|
113
|
+
const result = await apiRequest('page_fetch', { url });
|
|
114
|
+
if (result.error) return { error: result.error };
|
|
115
|
+
|
|
116
|
+
const html = result.content || result.text || '';
|
|
117
|
+
|
|
118
|
+
const contacts = {
|
|
119
|
+
emails: extractEmails(html),
|
|
120
|
+
phones: extractPhones(html),
|
|
121
|
+
address: extractAddress(html),
|
|
122
|
+
social: extractSocialLinks(html)
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
// Deep scrape - follow contact/about pages
|
|
126
|
+
if (depth >= 2 && contacts.emails.length === 0) {
|
|
127
|
+
const contactLinks = html.match(/href=["']([^"']*(?:contact|about)[^"']*)["']/gi) || [];
|
|
128
|
+
for (const linkMatch of contactLinks.slice(0, 2)) {
|
|
129
|
+
const href = linkMatch.match(/href=["']([^"']+)["']/)?.[1];
|
|
130
|
+
if (href) {
|
|
131
|
+
try {
|
|
132
|
+
const fullUrl = href.startsWith('http') ? href : new URL(href, url).href;
|
|
133
|
+
const subResult = await apiRequest('page_fetch', { url: fullUrl });
|
|
134
|
+
if (subResult.content) {
|
|
135
|
+
contacts.emails.push(...extractEmails(subResult.content));
|
|
136
|
+
contacts.phones.push(...extractPhones(subResult.content));
|
|
137
|
+
if (!contacts.address) contacts.address = extractAddress(subResult.content);
|
|
138
|
+
Object.assign(contacts.social, extractSocialLinks(subResult.content));
|
|
139
|
+
}
|
|
140
|
+
} catch (e) { /* skip broken links */ }
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
// Dedupe
|
|
144
|
+
contacts.emails = [...new Set(contacts.emails)];
|
|
145
|
+
contacts.phones = [...new Set(contacts.phones)];
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return {
|
|
149
|
+
success: true,
|
|
150
|
+
url,
|
|
151
|
+
depth,
|
|
152
|
+
contacts,
|
|
153
|
+
meta: {
|
|
154
|
+
title: (html.match(/<title>([^<]+)<\/title>/i) || [])[1] || null,
|
|
155
|
+
description: (html.match(/<meta[^>]*name=["']description["'][^>]*content=["']([^"']+)["']/i) || [])[1] || null
|
|
156
|
+
}
|
|
157
|
+
};
|
|
158
|
+
} catch (e) {
|
|
159
|
+
return { error: e.message || 'Scrape failed' };
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
async function grabrContact(content) {
|
|
164
|
+
if (!content || typeof content !== 'string') {
|
|
165
|
+
return { error: 'Content required' };
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
return {
|
|
169
|
+
success: true,
|
|
170
|
+
emails: extractEmails(content),
|
|
171
|
+
phones: extractPhones(content),
|
|
172
|
+
address: extractAddress(content),
|
|
173
|
+
social: extractSocialLinks(content)
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
async function grabrWayback(url, years = 5) {
|
|
178
|
+
if (!URL_REGEX.test(url)) {
|
|
179
|
+
return { error: 'Invalid URL format' };
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
try {
|
|
183
|
+
const snapshots = [];
|
|
184
|
+
const currentYear = new Date().getFullYear();
|
|
185
|
+
const startYear = currentYear - years;
|
|
186
|
+
const months = [1, 6]; // Check Jan and June
|
|
187
|
+
|
|
188
|
+
for (let year = currentYear; year >= startYear && snapshots.length < 10; year--) {
|
|
189
|
+
for (const month of months) {
|
|
190
|
+
if (snapshots.length >= 10) break;
|
|
191
|
+
const timestamp = `${year}${String(month).padStart(2, '0')}01`;
|
|
192
|
+
const checkUrl = `https://archive.org/wayback/available?url=${encodeURIComponent(url)}×tamp=${timestamp}`;
|
|
193
|
+
|
|
194
|
+
try {
|
|
195
|
+
const resp = await fetch(checkUrl, {
|
|
196
|
+
headers: { 'User-Agent': '50c-grabr/1.0' },
|
|
197
|
+
signal: AbortSignal.timeout(10000)
|
|
198
|
+
});
|
|
199
|
+
if (resp.ok) {
|
|
200
|
+
const data = await resp.json();
|
|
201
|
+
if (data.archived_snapshots?.closest?.available) {
|
|
202
|
+
const snap = data.archived_snapshots.closest;
|
|
203
|
+
if (!snapshots.find(s => s.timestamp === snap.timestamp)) {
|
|
204
|
+
snapshots.push({
|
|
205
|
+
timestamp: snap.timestamp,
|
|
206
|
+
url: snap.url,
|
|
207
|
+
date: `${snap.timestamp.slice(0,4)}-${snap.timestamp.slice(4,6)}-${snap.timestamp.slice(6,8)}`
|
|
208
|
+
});
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
} catch (e) { /* skip failed checks */ }
|
|
213
|
+
|
|
214
|
+
// Rate limit
|
|
215
|
+
await new Promise(r => setTimeout(r, 500));
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
return {
|
|
220
|
+
success: true,
|
|
221
|
+
url,
|
|
222
|
+
snapshots,
|
|
223
|
+
oldest: snapshots[snapshots.length - 1]?.date || null,
|
|
224
|
+
newest: snapshots[0]?.date || null
|
|
225
|
+
};
|
|
226
|
+
} catch (e) {
|
|
227
|
+
return { error: e.message || 'Wayback lookup failed' };
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
async function grabrSitemap(url) {
|
|
232
|
+
// Normalize to sitemap URL
|
|
233
|
+
let sitemapUrl = url;
|
|
234
|
+
if (!url.includes('sitemap')) {
|
|
235
|
+
const base = url.replace(/\/$/, '');
|
|
236
|
+
sitemapUrl = `${base}/sitemap.xml`;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
try {
|
|
240
|
+
const result = await apiRequest('page_fetch', { url: sitemapUrl });
|
|
241
|
+
if (result.error) {
|
|
242
|
+
// Try robots.txt fallback
|
|
243
|
+
const robotsUrl = url.replace(/\/$/, '') + '/robots.txt';
|
|
244
|
+
const robotsResult = await apiRequest('page_fetch', { url: robotsUrl });
|
|
245
|
+
if (robotsResult.content) {
|
|
246
|
+
const sitemapMatch = robotsResult.content.match(/Sitemap:\s*(\S+)/i);
|
|
247
|
+
if (sitemapMatch) {
|
|
248
|
+
const altResult = await apiRequest('page_fetch', { url: sitemapMatch[1] });
|
|
249
|
+
if (altResult.content) {
|
|
250
|
+
result.content = altResult.content;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
if (!result.content) {
|
|
257
|
+
return { error: 'Sitemap not found' };
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// Parse sitemap XML
|
|
261
|
+
const urls = [];
|
|
262
|
+
const locMatches = result.content.matchAll(/<loc>([^<]+)<\/loc>/gi);
|
|
263
|
+
for (const match of locMatches) {
|
|
264
|
+
urls.push(match[1]);
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
return {
|
|
268
|
+
success: true,
|
|
269
|
+
sitemapUrl,
|
|
270
|
+
urls: urls.slice(0, 500), // Cap at 500
|
|
271
|
+
total: urls.length
|
|
272
|
+
};
|
|
273
|
+
} catch (e) {
|
|
274
|
+
return { error: e.message || 'Sitemap parse failed' };
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
async function grabrBatch(urls, delayMs = 1000) {
|
|
279
|
+
if (!Array.isArray(urls) || urls.length === 0) {
|
|
280
|
+
return { error: 'URLs array required' };
|
|
281
|
+
}
|
|
282
|
+
if (urls.length > 10) {
|
|
283
|
+
return { error: 'Maximum 10 URLs per batch' };
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
const results = [];
|
|
287
|
+
for (const url of urls) {
|
|
288
|
+
const result = await grabrScrape(url, 1);
|
|
289
|
+
results.push({ url, ...result });
|
|
290
|
+
|
|
291
|
+
// Rate limit between requests
|
|
292
|
+
if (delayMs > 0) {
|
|
293
|
+
await new Promise(r => setTimeout(r, Math.max(delayMs, 500)));
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
return {
|
|
298
|
+
success: true,
|
|
299
|
+
processed: results.length,
|
|
300
|
+
results
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
async function grabrIntel(domain) {
|
|
305
|
+
if (!DOMAIN_REGEX.test(domain)) {
|
|
306
|
+
return { error: 'Invalid domain format' };
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
const url = `https://${domain}`;
|
|
310
|
+
|
|
311
|
+
// Parallel fetch main info
|
|
312
|
+
const [scrapeResult, sitemapResult, waybackResult] = await Promise.all([
|
|
313
|
+
grabrScrape(url, 2),
|
|
314
|
+
grabrSitemap(url).catch(() => ({ urls: [], total: 0 })),
|
|
315
|
+
grabrWayback(url, 3).catch(() => ({ snapshots: [] }))
|
|
316
|
+
]);
|
|
317
|
+
|
|
318
|
+
return {
|
|
319
|
+
success: true,
|
|
320
|
+
domain,
|
|
321
|
+
contacts: scrapeResult.contacts || {},
|
|
322
|
+
meta: scrapeResult.meta || {},
|
|
323
|
+
pages: {
|
|
324
|
+
total: sitemapResult.total || 0,
|
|
325
|
+
sample: (sitemapResult.urls || []).slice(0, 10)
|
|
326
|
+
},
|
|
327
|
+
history: {
|
|
328
|
+
snapshots: (waybackResult.snapshots || []).length,
|
|
329
|
+
oldest: waybackResult.oldest,
|
|
330
|
+
newest: waybackResult.newest
|
|
331
|
+
}
|
|
332
|
+
};
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// Tool definitions for MCP
|
|
336
|
+
const GRABR_TOOLS = [
|
|
337
|
+
{
|
|
338
|
+
name: 'grabr_scrape',
|
|
339
|
+
description: 'Deep scrape URL for contacts (email, phone, address) + social links. $0.05',
|
|
340
|
+
inputSchema: {
|
|
341
|
+
type: 'object',
|
|
342
|
+
properties: {
|
|
343
|
+
url: { type: 'string', description: 'URL to scrape' },
|
|
344
|
+
depth: { type: 'number', description: 'Scrape depth 1-3 (default 1)', default: 1 }
|
|
345
|
+
},
|
|
346
|
+
required: ['url']
|
|
347
|
+
},
|
|
348
|
+
cost: 0.05,
|
|
349
|
+
tier: 'pro'
|
|
350
|
+
},
|
|
351
|
+
{
|
|
352
|
+
name: 'grabr_contact',
|
|
353
|
+
description: 'Extract contacts from HTML/text content. $0.02',
|
|
354
|
+
inputSchema: {
|
|
355
|
+
type: 'object',
|
|
356
|
+
properties: {
|
|
357
|
+
content: { type: 'string', description: 'HTML or text to extract from' }
|
|
358
|
+
},
|
|
359
|
+
required: ['content']
|
|
360
|
+
},
|
|
361
|
+
cost: 0.02,
|
|
362
|
+
tier: 'pro'
|
|
363
|
+
},
|
|
364
|
+
{
|
|
365
|
+
name: 'grabr_wayback',
|
|
366
|
+
description: 'Get Wayback Machine snapshots for URL. $0.02',
|
|
367
|
+
inputSchema: {
|
|
368
|
+
type: 'object',
|
|
369
|
+
properties: {
|
|
370
|
+
url: { type: 'string', description: 'URL to check' },
|
|
371
|
+
years: { type: 'number', description: 'Years to look back (default 5)', default: 5 }
|
|
372
|
+
},
|
|
373
|
+
required: ['url']
|
|
374
|
+
},
|
|
375
|
+
cost: 0.02,
|
|
376
|
+
tier: 'pro'
|
|
377
|
+
},
|
|
378
|
+
{
|
|
379
|
+
name: 'grabr_sitemap',
|
|
380
|
+
description: 'Parse sitemap.xml and return all page URLs. $0.02',
|
|
381
|
+
inputSchema: {
|
|
382
|
+
type: 'object',
|
|
383
|
+
properties: {
|
|
384
|
+
url: { type: 'string', description: 'Site URL or sitemap URL' }
|
|
385
|
+
},
|
|
386
|
+
required: ['url']
|
|
387
|
+
},
|
|
388
|
+
cost: 0.02,
|
|
389
|
+
tier: 'pro'
|
|
390
|
+
},
|
|
391
|
+
{
|
|
392
|
+
name: 'grabr_batch',
|
|
393
|
+
description: 'Scrape up to 10 URLs with rate limiting. $0.10',
|
|
394
|
+
inputSchema: {
|
|
395
|
+
type: 'object',
|
|
396
|
+
properties: {
|
|
397
|
+
urls: { type: 'array', items: { type: 'string' }, description: 'URLs to scrape (max 10)' },
|
|
398
|
+
delayMs: { type: 'number', description: 'Delay between requests in ms (min 500)', default: 1000 }
|
|
399
|
+
},
|
|
400
|
+
required: ['urls']
|
|
401
|
+
},
|
|
402
|
+
cost: 0.10,
|
|
403
|
+
tier: 'pro'
|
|
404
|
+
},
|
|
405
|
+
{
|
|
406
|
+
name: 'grabr_intel',
|
|
407
|
+
description: 'Full domain intel: contacts, pages, social, history. $0.08',
|
|
408
|
+
inputSchema: {
|
|
409
|
+
type: 'object',
|
|
410
|
+
properties: {
|
|
411
|
+
domain: { type: 'string', description: 'Domain name (e.g., example.com)' }
|
|
412
|
+
},
|
|
413
|
+
required: ['domain']
|
|
414
|
+
},
|
|
415
|
+
cost: 0.08,
|
|
416
|
+
tier: 'pro'
|
|
417
|
+
}
|
|
418
|
+
];
|
|
419
|
+
|
|
420
|
+
async function handleTool(name, args) {
|
|
421
|
+
try {
|
|
422
|
+
switch (name) {
|
|
423
|
+
case 'grabr_scrape':
|
|
424
|
+
return await grabrScrape(args.url, args.depth || 1);
|
|
425
|
+
case 'grabr_contact':
|
|
426
|
+
return await grabrContact(args.content);
|
|
427
|
+
case 'grabr_wayback':
|
|
428
|
+
return await grabrWayback(args.url, args.years || 5);
|
|
429
|
+
case 'grabr_sitemap':
|
|
430
|
+
return await grabrSitemap(args.url);
|
|
431
|
+
case 'grabr_batch':
|
|
432
|
+
return await grabrBatch(args.urls, args.delayMs || 1000);
|
|
433
|
+
case 'grabr_intel':
|
|
434
|
+
return await grabrIntel(args.domain);
|
|
435
|
+
default:
|
|
436
|
+
return { error: `Unknown grabr tool: ${name}` };
|
|
437
|
+
}
|
|
438
|
+
} catch (e) {
|
|
439
|
+
return { error: e.message || 'Tool execution failed' };
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
module.exports = { GRABR_TOOLS, handleTool };
|
package/lib/packs.js
CHANGED
|
@@ -87,6 +87,13 @@ const PACKS = {
|
|
|
87
87
|
tier: 'pro',
|
|
88
88
|
highlights: ['domains_expiring', 'writing_draft', 'bookmarks_prune', 'csv_clean']
|
|
89
89
|
},
|
|
90
|
+
grabr: {
|
|
91
|
+
name: 'grabr',
|
|
92
|
+
description: 'Web scraping - contacts, social links, Wayback, sitemaps',
|
|
93
|
+
tools: 6,
|
|
94
|
+
tier: 'pro',
|
|
95
|
+
highlights: ['grabr_scrape', 'grabr_intel', 'grabr_wayback', 'grabr_batch']
|
|
96
|
+
},
|
|
90
97
|
|
|
91
98
|
// === ENTERPRISE TIER ($499/mo) ===
|
|
92
99
|
labs_plus: {
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "50c",
|
|
3
|
-
"version": "2.
|
|
4
|
-
"description": "AI toolkit with
|
|
3
|
+
"version": "2.9.0",
|
|
4
|
+
"description": "AI toolkit with init/doctor CLI. One install, 117+ tools.",
|
|
5
5
|
"main": "lib/index.js",
|
|
6
6
|
"bin": {
|
|
7
7
|
"50c": "./bin/50c.js"
|
|
@@ -13,17 +13,16 @@
|
|
|
13
13
|
"tools",
|
|
14
14
|
"genius",
|
|
15
15
|
"prompt-engine",
|
|
16
|
-
"
|
|
17
|
-
"
|
|
16
|
+
"grabr",
|
|
17
|
+
"scraper",
|
|
18
|
+
"wayback",
|
|
18
19
|
"bcalc",
|
|
19
20
|
"vault",
|
|
20
21
|
"cloudflare",
|
|
21
22
|
"whm",
|
|
22
23
|
"cpanel",
|
|
23
24
|
"wordpress",
|
|
24
|
-
"librarian"
|
|
25
|
-
"bookmarks",
|
|
26
|
-
"csv"
|
|
25
|
+
"librarian"
|
|
27
26
|
],
|
|
28
27
|
"author": "genxis.com",
|
|
29
28
|
"license": "MIT",
|