agent-security-scanner-mcp 3.10.3 → 3.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,518 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * scan-clawhub-full.js
5
+ *
6
+ * FULL ClawHub Implementation Scanning
7
+ *
8
+ * Strategy:
9
+ * 1. Download SKILL.md files from all ClawHub skills (metadata)
10
+ * 2. For each skill, extract GitHub repo URL or npm package name
11
+ * 3. Download actual implementation code (GitHub clone OR npm pack)
12
+ * 4. Scan implementation using full AST + taint + regex engine
13
+ * 5. Generate comprehensive security report
14
+ *
15
+ * This matches Snyk's ToxicSkills methodology but adds:
16
+ * - AST + taint analysis (vs their LLM-powered scanning)
17
+ * - Package hallucination detection
18
+ * - Cross-file taint tracking
19
+ * - Interprocedural dataflow analysis
20
+ */
21
+
22
+ import { exec } from 'child_process';
23
+ import { promisify } from 'util';
24
+ import fs from 'fs/promises';
25
+ import path from 'path';
26
+ import { existsSync } from 'fs';
27
+ import https from 'https';
28
+ import { fileURLToPath } from 'url';
29
+ import { cloneAndExtract, parseGitHubUrl } from '../utils/github-clone.js';
30
+ import { downloadAndExtract, getPackageMetadata } from '../utils/npm-download.js';
31
+
32
+ const __filename = fileURLToPath(import.meta.url);
33
+ const __dirname = path.dirname(__filename);
34
+
35
+ const execAsync = promisify(exec);
36
+
37
+ // Configuration
38
+ const SCAN_DIR = path.join(process.cwd(), 'clawhub-scan-full');
39
+ const SKILLS_METADATA_DIR = path.join(SCAN_DIR, 'skills-metadata');
40
+ const SOURCE_CODE_DIR = path.join(SCAN_DIR, 'source-code');
41
+ const RESULTS_FILE = path.join(SCAN_DIR, 'results.json');
42
+ const REPORT_FILE = path.join(SCAN_DIR, 'report.json');
43
+
44
+ const CONCURRENT_DOWNLOADS = 5;
45
+ const CONCURRENT_SCANS = 3;
46
+ const SKILL_TIMEOUT_MS = 180_000; // 3 minutes per skill
47
+
48
+ /**
49
+ * Fetch from URL
50
+ */
51
+ function fetchUrl(url) {
52
+ return new Promise((resolve, reject) => {
53
+ https.get(url, (res) => {
54
+ let data = '';
55
+ res.on('data', chunk => data += chunk);
56
+ res.on('end', () => resolve({ statusCode: res.statusCode, body: data }));
57
+ }).on('error', reject);
58
+ });
59
+ }
60
+
61
+ /**
62
+ * Fetch all skills from ClawHub registry
63
+ */
64
+ async function fetchAllSkills() {
65
+ console.log('\nšŸ“„ Fetching all skills from ClawHub...\n');
66
+
67
+ const allSkills = new Map();
68
+ const sorts = ['newest', 'downloads', 'installsAllTime'];
69
+
70
+ for (const sort of sorts) {
71
+ try {
72
+ console.log(` Fetching ${sort}...`);
73
+
74
+ //Add delay between requests to avoid rate limits
75
+ if (allSkills.size > 0) {
76
+ await new Promise(resolve => setTimeout(resolve, 3000));
77
+ }
78
+
79
+ const { stdout } = await execAsync(
80
+ `clawhub explore --limit 200 --sort ${sort} --json 2>&1`,
81
+ { timeout: 60000 }
82
+ );
83
+
84
+ // Parse JSON - skip first line which is progress text
85
+ const lines = stdout.trim().split('\n');
86
+ const jsonStartIndex = lines.findIndex(line => line.trim().startsWith('{'));
87
+
88
+ if (jsonStartIndex === -1) {
89
+ console.log(` āœ— No JSON output for ${sort}`);
90
+ continue;
91
+ }
92
+
93
+ // Join all lines from the JSON start onwards
94
+ const jsonText = lines.slice(jsonStartIndex).join('\n');
95
+ const data = JSON.parse(jsonText);
96
+ const skills = data.items || [];
97
+
98
+ for (const skill of skills) {
99
+ const key = `${skill.author_username || skill.author}/${skill.skill_slug || skill.slug}`;
100
+ if (!allSkills.has(key)) {
101
+ allSkills.set(key, {
102
+ skill_slug: skill.slug,
103
+ author_username: skill.author_username,
104
+ });
105
+ }
106
+ }
107
+
108
+ console.log(` āœ“ Fetched ${skills.length} skills (sort: ${sort})`);
109
+ } catch (error) {
110
+ console.log(` āœ— Error fetching skills (sort: ${sort}): ${error.message}`);
111
+ }
112
+ }
113
+
114
+ const skillsArray = Array.from(allSkills.values());
115
+ console.log(`\nāœ… Total unique skills found: ${skillsArray.length}\n`);
116
+
117
+ return skillsArray;
118
+ }
119
+
120
+ /**
121
+ * Download SKILL.md files using clawhub inspect
122
+ */
123
+ async function downloadSkillMetadata(skills) {
124
+ console.log(`šŸ“¦ Downloading ${skills.length} skill SKILL.md files...\n`);
125
+
126
+ await fs.mkdir(SKILLS_METADATA_DIR, { recursive: true });
127
+
128
+ const results = [];
129
+ const chunks = [];
130
+
131
+ for (let i = 0; i < skills.length; i += CONCURRENT_DOWNLOADS) {
132
+ chunks.push(skills.slice(i, i + CONCURRENT_DOWNLOADS));
133
+ }
134
+
135
+ for (const chunk of chunks) {
136
+ const promises = chunk.map(async (skill) => {
137
+ const skillDir = path.join(SKILLS_METADATA_DIR, skill.skill_slug);
138
+ await fs.mkdir(skillDir, { recursive: true });
139
+
140
+ const skillFilePath = path.join(skillDir, 'SKILL.md');
141
+
142
+ try {
143
+ // Step 1: Get metadata (owner info) using --json
144
+ const { stdout: metadataOut } = await execAsync(
145
+ `clawhub inspect ${skill.skill_slug} --json 2>&1`,
146
+ { timeout: 30000 }
147
+ );
148
+
149
+ // Parse JSON - skip progress line
150
+ const lines = metadataOut.trim().split('\n');
151
+ const jsonStartIndex = lines.findIndex(line => line.trim().startsWith('{'));
152
+
153
+ if (jsonStartIndex === -1) {
154
+ return { slug: skill.skill_slug, success: false, error: 'No JSON in inspect output' };
155
+ }
156
+
157
+ const jsonText = lines.slice(jsonStartIndex).join('\n');
158
+ const metadata = JSON.parse(jsonText);
159
+
160
+ // Extract author from metadata
161
+ const author = metadata.owner?.handle || metadata.owner?.displayName || 'unknown';
162
+
163
+ // Step 2: Get SKILL.md file content using --file
164
+ const { stdout: fileOut } = await execAsync(
165
+ `clawhub inspect ${skill.skill_slug} --file SKILL.md 2>&1`,
166
+ { timeout: 30000 }
167
+ );
168
+
169
+ // Skip the "- Fetching skill" progress line
170
+ const fileLines = fileOut.trim().split('\n');
171
+ const contentStartIndex = fileLines.findIndex(line => line.startsWith('---'));
172
+
173
+ if (contentStartIndex === -1) {
174
+ return { slug: skill.skill_slug, success: false, error: 'No SKILL.md content found' };
175
+ }
176
+
177
+ const skillMdContent = fileLines.slice(contentStartIndex).join('\n');
178
+
179
+ if (!skillMdContent) {
180
+ return { slug: skill.skill_slug, success: false, error: 'Empty SKILL.md file' };
181
+ }
182
+
183
+ await fs.writeFile(skillFilePath, skillMdContent, 'utf-8');
184
+
185
+ return {
186
+ slug: skill.skill_slug,
187
+ author,
188
+ skillMdPath: skillFilePath,
189
+ skillMdContent,
190
+ success: true,
191
+ };
192
+ } catch (error) {
193
+ return { slug: skill.skill_slug, success: false, error: error.message };
194
+ }
195
+ });
196
+
197
+ const chunkResults = await Promise.all(promises);
198
+ results.push(...chunkResults);
199
+
200
+ const completed = results.length;
201
+ const successful = results.filter(r => r.success).length;
202
+ console.log(` [${((completed / skills.length) * 100).toFixed(1)}%] ${completed}/${skills.length} (${successful} ok)`);
203
+ }
204
+
205
+ const successful = results.filter(r => r.success);
206
+ console.log(`\nāœ… Download complete: ${successful.length} successful, ${results.length - successful.length} failed\n`);
207
+
208
+ return successful;
209
+ }
210
+
211
+ /**
212
+ * Extract GitHub or npm info from SKILL.md
213
+ */
214
+ function extractSourceInfo(skillMdContent) {
215
+ // Extract frontmatter
216
+ const frontmatterMatch = skillMdContent.match(/^---\n([\s\S]*?)\n---/);
217
+ if (!frontmatterMatch) return null;
218
+
219
+ const frontmatter = frontmatterMatch[1];
220
+ const lines = frontmatter.split('\n');
221
+
222
+ const info = {
223
+ homepage: null,
224
+ npm: null,
225
+ github: null,
226
+ };
227
+
228
+ for (const line of lines) {
229
+ if (line.startsWith('homepage:')) {
230
+ info.homepage = line.replace('homepage:', '').trim();
231
+ }
232
+ if (line.startsWith('npm:')) {
233
+ info.npm = line.replace('npm:', '').trim();
234
+ }
235
+ }
236
+
237
+ // Parse GitHub URL from homepage
238
+ if (info.homepage && info.homepage.includes('github.com')) {
239
+ const parsed = parseGitHubUrl(info.homepage);
240
+ if (parsed) {
241
+ info.github = `https://github.com/${parsed.owner}/${parsed.repo}`;
242
+ }
243
+ }
244
+
245
+ // Extract npm package name from URL
246
+ if (info.npm && info.npm.includes('npmjs.com/package/')) {
247
+ const match = info.npm.match(/npmjs\.com\/package\/([^\/\?#]+)/);
248
+ if (match) {
249
+ info.npmPackage = match[1];
250
+ }
251
+ }
252
+
253
+ return info;
254
+ }
255
+
256
+ /**
257
+ * Download source code for a skill (GitHub OR npm)
258
+ */
259
+ async function downloadSkillSource(skill) {
260
+ const sourceInfo = extractSourceInfo(skill.skillMdContent);
261
+
262
+ if (!sourceInfo) {
263
+ return {
264
+ slug: skill.slug,
265
+ success: false,
266
+ error: 'No source info found in SKILL.md',
267
+ };
268
+ }
269
+
270
+ const skillSourceDir = path.join(SOURCE_CODE_DIR, skill.slug);
271
+ await fs.mkdir(skillSourceDir, { recursive: true });
272
+
273
+ // Try GitHub first (safer)
274
+ if (sourceInfo.github) {
275
+ try {
276
+ const result = await cloneAndExtract(sourceInfo.github, skillSourceDir);
277
+
278
+ if (result.success) {
279
+ return {
280
+ slug: skill.slug,
281
+ author: skill.author,
282
+ success: true,
283
+ method: 'github',
284
+ sourcePath: result.sourcePath,
285
+ size: result.repoSize,
286
+ };
287
+ }
288
+ } catch (error) {
289
+ console.log(` GitHub clone failed for ${skill.slug}: ${error.message}`);
290
+ }
291
+ }
292
+
293
+ // Fallback to npm
294
+ if (sourceInfo.npmPackage) {
295
+ try {
296
+ const result = await downloadAndExtract(sourceInfo.npmPackage, skillSourceDir);
297
+
298
+ if (result.success) {
299
+ return {
300
+ slug: skill.slug,
301
+ author: skill.author,
302
+ success: true,
303
+ method: 'npm',
304
+ sourcePath: result.sourcePath,
305
+ size: result.packageSize,
306
+ };
307
+ }
308
+ } catch (error) {
309
+ console.log(` npm download failed for ${skill.slug}: ${error.message}`);
310
+ }
311
+ }
312
+
313
+ return {
314
+ slug: skill.slug,
315
+ success: false,
316
+ error: 'Neither GitHub nor npm source available',
317
+ };
318
+ }
319
+
320
+ /**
321
+ * Download all skill source code
322
+ */
323
+ async function downloadAllSources(skillsWithMetadata) {
324
+ console.log(`\nšŸ” Downloading source code for ${skillsWithMetadata.length} skills...\n`);
325
+
326
+ const results = [];
327
+ const chunks = [];
328
+
329
+ for (let i = 0; i < skillsWithMetadata.length; i += CONCURRENT_DOWNLOADS) {
330
+ chunks.push(skillsWithMetadata.slice(i, i + CONCURRENT_DOWNLOADS));
331
+ }
332
+
333
+ for (const chunk of chunks) {
334
+ const promises = chunk.map(skill => downloadSkillSource(skill));
335
+ const chunkResults = await Promise.all(promises);
336
+ results.push(...chunkResults);
337
+
338
+ const completed = results.length;
339
+ const successful = results.filter(r => r.success).length;
340
+ const github = results.filter(r => r.method === 'github').length;
341
+ const npm = results.filter(r => r.method === 'npm').length;
342
+
343
+ console.log(` [${((completed / skillsWithMetadata.length) * 100).toFixed(1)}%] ${completed}/${skillsWithMetadata.length} (${successful} ok: ${github} github, ${npm} npm)`);
344
+ }
345
+
346
+ const successful = results.filter(r => r.success);
347
+ const github = results.filter(r => r.method === 'github').length;
348
+ const npm = results.filter(r => r.method === 'npm').length;
349
+
350
+ console.log(`\nāœ… Source download complete: ${successful.length}/${results.length} successful`);
351
+ console.log(` šŸ“¦ GitHub: ${github}, npm: ${npm}\n`);
352
+
353
+ return successful;
354
+ }
355
+
356
+ /**
357
+ * Scan a single skill's source code
358
+ */
359
+ async function scanSkillSource(skill) {
360
+ try {
361
+ const scanCmd = `node ${path.join(__dirname, '..', '..', 'index.js')} scan-project "${skill.sourcePath}" --format json`;
362
+
363
+ const { stdout } = await execAsync(scanCmd, {
364
+ timeout: SKILL_TIMEOUT_MS,
365
+ maxBuffer: 10 * 1024 * 1024,
366
+ });
367
+
368
+ const scanResult = JSON.parse(stdout);
369
+
370
+ return {
371
+ slug: skill.slug,
372
+ author: skill.author,
373
+ method: skill.method,
374
+ grade: scanResult.grade || 'F',
375
+ findings: scanResult.findings || [],
376
+ findingsCount: scanResult.findingsCount || 0,
377
+ criticalCount: scanResult.criticalFindings || 0,
378
+ recommendation: scanResult.recommendation || '',
379
+ success: true,
380
+ };
381
+ } catch (error) {
382
+ return {
383
+ slug: skill.slug,
384
+ success: false,
385
+ grade: 'F',
386
+ error: error.message,
387
+ };
388
+ }
389
+ }
390
+
391
+ /**
392
+ * Scan all skill sources
393
+ */
394
+ async function scanAllSources(skillsWithSources) {
395
+ console.log(`\nšŸ” Scanning ${skillsWithSources.length} skills...\n`);
396
+
397
+ const results = [];
398
+ const chunks = [];
399
+
400
+ for (let i = 0; i < skillsWithSources.length; i += CONCURRENT_SCANS) {
401
+ chunks.push(skillsWithSources.slice(i, i + CONCURRENT_SCANS));
402
+ }
403
+
404
+ for (const chunk of chunks) {
405
+ const promises = chunk.map(skill => scanSkillSource(skill));
406
+ const chunkResults = await Promise.all(promises);
407
+ results.push(...chunkResults);
408
+
409
+ console.log(` [${((results.length / skillsWithSources.length) * 100).toFixed(1)}%] Scanned ${results.length}/${skillsWithSources.length}`);
410
+ }
411
+
412
+ console.log(`\nāœ… Scan complete!\n`);
413
+
414
+ return results;
415
+ }
416
+
417
+ /**
418
+ * Generate report
419
+ */
420
+ function generateReport(scanResults) {
421
+ const gradeDistribution = { A: 0, B: 0, C: 0, D: 0, F: 0 };
422
+ const totalFindings = { critical: 0, high: 0, medium: 0, low: 0 };
423
+ const issuesByRule = new Map();
424
+
425
+ for (const result of scanResults) {
426
+ gradeDistribution[result.grade] = (gradeDistribution[result.grade] || 0) + 1;
427
+
428
+ for (const finding of (result.findings || [])) {
429
+ const severity = finding.severity || 'MEDIUM';
430
+ totalFindings[severity.toLowerCase()] = (totalFindings[severity.toLowerCase()] || 0) + 1;
431
+
432
+ const ruleId = finding.rule || 'unknown';
433
+ issuesByRule.set(ruleId, (issuesByRule.get(ruleId) || 0) + 1);
434
+ }
435
+ }
436
+
437
+ const vulnerableSkills = scanResults.filter(r => r.findingsCount > 0).length;
438
+ const topIssues = Array.from(issuesByRule.entries())
439
+ .map(([rule, count]) => ({ rule, count }))
440
+ .sort((a, b) => b.count - a.count)
441
+ .slice(0, 10);
442
+
443
+ return {
444
+ summary: {
445
+ totalSkills: scanResults.length,
446
+ vulnerableSkills,
447
+ vulnerabilityRate: ((vulnerableSkills / scanResults.length) * 100).toFixed(1) + '%',
448
+ gradeDistribution,
449
+ totalFindings,
450
+ },
451
+ topIssues,
452
+ scanResults,
453
+ };
454
+ }
455
+
456
+ /**
457
+ * Main execution
458
+ */
459
+ async function main() {
460
+ console.log('šŸ›”ļø ClawHub Full Implementation Scanner\n');
461
+ console.log('════════════════════════════════════════════════════════════');
462
+ console.log('SAFE MODE: GitHub clone + npm pack (NO code execution)');
463
+ console.log('DEEP ANALYSIS: AST + taint + regex + hallucination detection');
464
+ console.log('════════════════════════════════════════════════════════════\n');
465
+
466
+ try {
467
+ // Step 1: Fetch skill list
468
+ const allSkills = await fetchAllSkills();
469
+
470
+ // Step 2: Download SKILL.md files
471
+ const skillsWithMetadata = await downloadSkillMetadata(allSkills);
472
+
473
+ // Step 3: Download source code (GitHub + npm)
474
+ const skillsWithSources = await downloadAllSources(skillsWithMetadata);
475
+
476
+ // Step 4: Scan source code
477
+ const scanResults = await scanAllSources(skillsWithSources);
478
+
479
+ // Step 5: Generate report
480
+ const report = generateReport(scanResults);
481
+
482
+ // Save results
483
+ await fs.mkdir(SCAN_DIR, { recursive: true });
484
+ await fs.writeFile(RESULTS_FILE, JSON.stringify(scanResults, null, 2));
485
+ await fs.writeFile(REPORT_FILE, JSON.stringify(report, null, 2));
486
+
487
+ // Print summary
488
+ console.log('\nšŸ“Š Results Summary\n');
489
+ console.log('────────────────────────────────────────────────────────────');
490
+ console.log(`Total skills scanned: ${report.summary.totalSkills}`);
491
+ console.log(`Vulnerable skills: ${report.summary.vulnerableSkills} (${report.summary.vulnerabilityRate})`);
492
+ console.log('');
493
+ console.log('Grade Distribution:');
494
+ for (const [grade, count] of Object.entries(report.summary.gradeDistribution)) {
495
+ console.log(` ${grade}: ${count}`);
496
+ }
497
+ console.log('');
498
+ console.log('Total Findings:');
499
+ for (const [severity, count] of Object.entries(report.summary.totalFindings)) {
500
+ console.log(` ${severity}: ${count}`);
501
+ }
502
+ console.log('');
503
+ console.log('Top 10 Issues:');
504
+ for (const issue of report.topIssues) {
505
+ console.log(` ${issue.rule}: ${issue.count}`);
506
+ }
507
+ console.log('────────────────────────────────────────────────────────────\n');
508
+ console.log(`āœ… Results saved to:`);
509
+ console.log(` ${RESULTS_FILE}`);
510
+ console.log(` ${REPORT_FILE}\n`);
511
+
512
+ } catch (error) {
513
+ console.error(`\nāŒ Fatal error: ${error.message}\n`);
514
+ process.exit(1);
515
+ }
516
+ }
517
+
518
+ main();