agent-security-scanner-mcp 3.10.3 → 3.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -4
- package/analyzer.py +4 -0
- package/index.js +37 -14
- package/openclaw.plugin.json +3 -3
- package/package.json +3 -1
- package/skills/clawhub/CLAWPROOF.md +448 -0
- package/src/cli/audit.js +10 -3
- package/src/cli/demo.js +3 -13
- package/src/cli/doctor.js +15 -13
- package/src/cli/harden.js +10 -3
- package/src/cli/init.js +11 -5
- package/src/cli/scan-clawhub-full.js +518 -0
- package/src/cli/scan-clawhub-safe.js +393 -0
- package/src/cli/scan-clawhub.js +308 -0
- package/src/config.js +4 -1
- package/src/daemon-client.js +3 -1
- package/src/python.js +54 -0
- package/src/tools/scan-action.js +222 -2
- package/src/tools/scan-prompt.js +34 -0
- package/src/tools/scan-skill-prompt.js +547 -0
- package/src/tools/scan-skill.js +438 -80
- package/src/utils.js +71 -12
package/src/tools/scan-skill.js
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
// supply chain verification, and rug pull detection.
|
|
4
4
|
|
|
5
5
|
import { z } from "zod";
|
|
6
|
-
import { existsSync, readFileSync, readdirSync, statSync, writeFileSync, mkdirSync, unlinkSync } from "fs";
|
|
6
|
+
import { existsSync, readFileSync, readdirSync, statSync, lstatSync, realpathSync, writeFileSync, mkdirSync, unlinkSync, renameSync, chmodSync } from "fs";
|
|
7
7
|
import { resolve, basename, dirname, extname, join, sep } from "path";
|
|
8
8
|
import { createHash } from "crypto";
|
|
9
9
|
import { tmpdir, homedir } from "os";
|
|
@@ -54,8 +54,21 @@ const CODE_FILE_EXTENSIONS = new Set([
|
|
|
54
54
|
'.java', '.php', '.c', '.cpp', '.rs', '.cs', '.h', '.hpp',
|
|
55
55
|
]);
|
|
56
56
|
|
|
57
|
+
// Manifest / dependency files for supply-chain scanning
|
|
58
|
+
const MANIFEST_FILES = new Set([
|
|
59
|
+
'package.json', 'package-lock.json',
|
|
60
|
+
'requirements.txt', 'setup.py', 'setup.cfg', 'pyproject.toml',
|
|
61
|
+
'gemfile', 'gemfile.lock',
|
|
62
|
+
'cargo.toml', 'cargo.lock',
|
|
63
|
+
'go.mod', 'go.sum',
|
|
64
|
+
'composer.json', 'composer.lock',
|
|
65
|
+
]);
|
|
66
|
+
|
|
57
67
|
const MAX_FILE_SIZE = 500 * 1024; // 500 KB
|
|
58
|
-
const
|
|
68
|
+
const MAX_SKILL_MD_SIZE = 1024 * 1024; // 1 MB cap for SKILL.md
|
|
69
|
+
const MAX_SUPPORTING_FILES = 50;
|
|
70
|
+
const MAX_WALK_DEPTH = 5;
|
|
71
|
+
const MAX_TOTAL_WALK_BYTES = 5 * 1024 * 1024; // 5 MB cumulative
|
|
59
72
|
const SCAN_TIMEOUT_MS = 120_000; // 120s total scan timeout
|
|
60
73
|
|
|
61
74
|
const PYTHON_BUILTINS = new Set([
|
|
@@ -107,6 +120,14 @@ const SOURCE_WEIGHTS = {
|
|
|
107
120
|
|
|
108
121
|
const SEVERITY_MULTIPLIER = { CRITICAL: 4, HIGH: 2, MEDIUM: 1 };
|
|
109
122
|
|
|
123
|
+
// On Windows, paths are case-insensitive; normalize for containment checks.
|
|
124
|
+
const IS_WIN = process.platform === 'win32';
|
|
125
|
+
function normPath(p) { return IS_WIN ? p.toLowerCase() : p; }
|
|
126
|
+
function pathStartsWith(child, parent) {
|
|
127
|
+
return normPath(child) === normPath(parent) || normPath(child).startsWith(normPath(parent) + sep);
|
|
128
|
+
}
|
|
129
|
+
const MAX_CLAWHAVOC_SCAN_LEN = 2 * 1024 * 1024; // 2 MB cap for regex matching
|
|
130
|
+
|
|
110
131
|
// ---------------------------------------------------------------------------
|
|
111
132
|
// Layer 4: ClawHavoc YAML loader (cached)
|
|
112
133
|
// ---------------------------------------------------------------------------
|
|
@@ -190,8 +211,46 @@ function loadClawHavocRules() {
|
|
|
190
211
|
|
|
191
212
|
async function runPromptScan(content) {
|
|
192
213
|
try {
|
|
193
|
-
|
|
214
|
+
// Strip YAML frontmatter (---\n...\n---) to reduce false positives from
|
|
215
|
+
// benign metadata keys. However, we still scan the frontmatter VALUES
|
|
216
|
+
// separately so that malicious content hidden in metadata is not missed.
|
|
217
|
+
let textToScan = content;
|
|
218
|
+
let frontmatterValues = '';
|
|
219
|
+
if (textToScan.startsWith('---\n') || textToScan.startsWith('---\r\n')) {
|
|
220
|
+
const endMarker = textToScan.match(/\r?\n---\s*(?:\r?\n|$)/);
|
|
221
|
+
if (endMarker) {
|
|
222
|
+
const rawFrontmatter = textToScan.substring(0, endMarker.index + endMarker[0].length);
|
|
223
|
+
textToScan = textToScan.substring(endMarker.index + endMarker[0].length);
|
|
224
|
+
// Extract YAML values (everything after the colon on each line)
|
|
225
|
+
for (const line of rawFrontmatter.split('\n')) {
|
|
226
|
+
const colonIdx = line.indexOf(':');
|
|
227
|
+
if (colonIdx > 0) {
|
|
228
|
+
const val = line.substring(colonIdx + 1).trim().replace(/^["']|["']$/g, '');
|
|
229
|
+
if (val.length > 10) frontmatterValues += val + '\n';
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Scan body + frontmatter values together
|
|
236
|
+
const combinedText = frontmatterValues ? textToScan + '\n' + frontmatterValues : textToScan;
|
|
237
|
+
const result = await scanAgentPrompt({ prompt_text: combinedText, verbosity: 'full' });
|
|
194
238
|
const parsed = JSON.parse(result.content[0].text);
|
|
239
|
+
|
|
240
|
+
// Handle oversized-input or error responses from the prompt scanner
|
|
241
|
+
if (parsed.error) {
|
|
242
|
+
return [{
|
|
243
|
+
category: 'prompt_scan_error',
|
|
244
|
+
severity: 'CRITICAL',
|
|
245
|
+
message: parsed.error,
|
|
246
|
+
matched_text: '',
|
|
247
|
+
file: 'SKILL.md',
|
|
248
|
+
source: 'prompt_scanner',
|
|
249
|
+
rule_id: 'prompt_scanner.oversized_or_error',
|
|
250
|
+
confidence: 'HIGH',
|
|
251
|
+
}];
|
|
252
|
+
}
|
|
253
|
+
|
|
195
254
|
return (parsed.findings || []).map(f => ({
|
|
196
255
|
category: f.category || 'prompt_injection',
|
|
197
256
|
severity: f.severity === 'ERROR' ? 'CRITICAL' : f.severity === 'WARNING' ? 'HIGH' : 'MEDIUM',
|
|
@@ -204,7 +263,17 @@ async function runPromptScan(content) {
|
|
|
204
263
|
}));
|
|
205
264
|
} catch (error) {
|
|
206
265
|
console.error("Layer 1 (prompt scan) failed:", error.message);
|
|
207
|
-
|
|
266
|
+
// Fail-closed: a crashed prompt scanner should not silently improve the grade
|
|
267
|
+
return [{
|
|
268
|
+
category: 'prompt_scan_error',
|
|
269
|
+
severity: 'HIGH',
|
|
270
|
+
message: `Prompt scanner failed: ${error.message}`,
|
|
271
|
+
matched_text: '',
|
|
272
|
+
file: 'SKILL.md',
|
|
273
|
+
source: 'prompt_scanner',
|
|
274
|
+
rule_id: 'prompt_scanner.layer_failure',
|
|
275
|
+
confidence: 'MEDIUM',
|
|
276
|
+
}];
|
|
208
277
|
}
|
|
209
278
|
}
|
|
210
279
|
|
|
@@ -214,24 +283,27 @@ async function runPromptScan(content) {
|
|
|
214
283
|
|
|
215
284
|
function extractCodeBlocks(content) {
|
|
216
285
|
const blocks = [];
|
|
217
|
-
|
|
286
|
+
// Match both backtick (```) and tilde (~~~) fenced code blocks.
|
|
287
|
+
// Uses backreference (\1) to ensure closing fence uses the same character as opening.
|
|
288
|
+
const codeBlockRegex = /(`{3,}|~{3,})(\w*)\r?\n([\s\S]*?)\1/g;
|
|
218
289
|
let match;
|
|
219
290
|
while ((match = codeBlockRegex.exec(content)) !== null) {
|
|
220
|
-
const lang = (match[
|
|
221
|
-
const code = match[
|
|
291
|
+
const lang = (match[2] || '').toLowerCase();
|
|
292
|
+
const code = match[3];
|
|
222
293
|
if (code.length < 10) continue;
|
|
223
294
|
blocks.push({ lang, code });
|
|
224
295
|
}
|
|
225
296
|
return blocks;
|
|
226
297
|
}
|
|
227
298
|
|
|
228
|
-
async function runCodeBlockScan(blocks) {
|
|
299
|
+
async function runCodeBlockScan(blocks, signal) {
|
|
229
300
|
const findings = [];
|
|
230
301
|
|
|
231
302
|
for (const { lang, code } of blocks) {
|
|
303
|
+
if (signal && signal.aborted) break;
|
|
232
304
|
try {
|
|
233
|
-
// Shell blocks -> scanAgentAction
|
|
234
|
-
if (['bash', 'sh', 'shell', 'zsh'].includes(lang)) {
|
|
305
|
+
// Shell-like blocks -> scanAgentAction
|
|
306
|
+
if (['bash', 'sh', 'shell', 'zsh', 'powershell', 'ps1', 'bat', 'cmd', 'fish'].includes(lang)) {
|
|
235
307
|
const result = await scanAgentAction({
|
|
236
308
|
action_type: 'bash',
|
|
237
309
|
action_value: code,
|
|
@@ -262,7 +334,7 @@ async function runCodeBlockScan(blocks) {
|
|
|
262
334
|
|
|
263
335
|
try {
|
|
264
336
|
writeFileSync(tmpPath, code, 'utf-8');
|
|
265
|
-
const issues = await runAnalyzerAsync(tmpPath);
|
|
337
|
+
const issues = await runAnalyzerAsync(tmpPath, 'auto', signal);
|
|
266
338
|
if (Array.isArray(issues)) {
|
|
267
339
|
for (const issue of issues) {
|
|
268
340
|
findings.push({
|
|
@@ -292,31 +364,77 @@ async function runCodeBlockScan(blocks) {
|
|
|
292
364
|
// Layer 3: Supporting Files
|
|
293
365
|
// ---------------------------------------------------------------------------
|
|
294
366
|
|
|
295
|
-
|
|
296
|
-
|
|
367
|
+
/**
|
|
368
|
+
* Recursively collect scannable files under skillDir, respecting limits.
|
|
369
|
+
* Skips symlinks, hidden dirs, node_modules, and __pycache__.
|
|
370
|
+
*/
|
|
371
|
+
function collectSupportingFiles(dir, skillFile, depth, state) {
|
|
372
|
+
if (depth > MAX_WALK_DEPTH) return;
|
|
373
|
+
if (state.files.length >= MAX_SUPPORTING_FILES) return;
|
|
374
|
+
if (state.totalBytes >= MAX_TOTAL_WALK_BYTES) return;
|
|
375
|
+
|
|
376
|
+
let entries;
|
|
377
|
+
try { entries = readdirSync(dir); } catch { return; }
|
|
378
|
+
|
|
379
|
+
for (const entry of entries) {
|
|
380
|
+
if (state.files.length >= MAX_SUPPORTING_FILES) break;
|
|
381
|
+
if (state.totalBytes >= MAX_TOTAL_WALK_BYTES) break;
|
|
382
|
+
|
|
383
|
+
// Skip most hidden entries, node_modules, __pycache__.
|
|
384
|
+
// Allow security-sensitive hidden files (.env*, .npmrc) through.
|
|
385
|
+
if (entry === 'node_modules' || entry === '__pycache__') continue;
|
|
386
|
+
if (entry.startsWith('.') && !entry.startsWith('.env') && entry !== '.npmrc' && entry !== '.github') continue;
|
|
387
|
+
|
|
388
|
+
const filePath = join(dir, entry);
|
|
389
|
+
let lst;
|
|
390
|
+
try { lst = lstatSync(filePath); } catch { continue; }
|
|
391
|
+
|
|
392
|
+
// Reject symlinks
|
|
393
|
+
if (lst.isSymbolicLink()) continue;
|
|
394
|
+
|
|
395
|
+
if (lst.isDirectory()) {
|
|
396
|
+
collectSupportingFiles(filePath, skillFile, depth + 1, state);
|
|
397
|
+
continue;
|
|
398
|
+
}
|
|
297
399
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
let scannedCount = 0;
|
|
400
|
+
if (!lst.isFile()) continue;
|
|
401
|
+
if (lst.size > MAX_FILE_SIZE) continue;
|
|
301
402
|
|
|
302
|
-
|
|
303
|
-
|
|
403
|
+
// Skip SKILL.md — already scanned by L1/L2
|
|
404
|
+
if (resolve(filePath) === resolve(skillFile)) continue;
|
|
304
405
|
|
|
305
|
-
|
|
406
|
+
const ext = extname(entry).toLowerCase();
|
|
407
|
+
const lowerEntry = entry.toLowerCase();
|
|
408
|
+
// Accept code files, manifest files, and security-sensitive dotfiles
|
|
409
|
+
const isSecurityDotfile = lowerEntry.startsWith('.env') || lowerEntry === '.npmrc';
|
|
410
|
+
if (!CODE_FILE_EXTENSIONS.has(ext) && !MANIFEST_FILES.has(lowerEntry) && !isSecurityDotfile) continue;
|
|
306
411
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
412
|
+
state.totalBytes += lst.size;
|
|
413
|
+
// Relative path from skillDir for the file field
|
|
414
|
+
const relPath = filePath.substring(state.rootLen).replace(/\\/g, '/').replace(/^\//, '');
|
|
415
|
+
state.files.push({ filePath, relPath, size: lst.size });
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
async function runSupportingFilesScan(skillDir, skillFile, preCollected, signal) {
|
|
420
|
+
const findings = [];
|
|
311
421
|
|
|
312
|
-
|
|
313
|
-
|
|
422
|
+
try {
|
|
423
|
+
const fileList = preCollected || (() => {
|
|
424
|
+
const state = { files: [], totalBytes: 0, rootLen: skillDir.length };
|
|
425
|
+
collectSupportingFiles(skillDir, skillFile, 0, state);
|
|
426
|
+
return state.files;
|
|
427
|
+
})();
|
|
428
|
+
|
|
429
|
+
for (const { filePath, relPath } of fileList) {
|
|
430
|
+
if (signal && signal.aborted) break;
|
|
431
|
+
try {
|
|
432
|
+
const ext = extname(filePath).toLowerCase();
|
|
314
433
|
|
|
315
|
-
|
|
316
|
-
if (
|
|
434
|
+
// Manifest / dependency files are handled by supply-chain layer — skip code analysis
|
|
435
|
+
if (MANIFEST_FILES.has(basename(filePath).toLowerCase())) continue;
|
|
317
436
|
|
|
318
|
-
const issues = await runAnalyzerAsync(filePath);
|
|
319
|
-
scannedCount++;
|
|
437
|
+
const issues = await runAnalyzerAsync(filePath, 'auto', signal);
|
|
320
438
|
if (Array.isArray(issues)) {
|
|
321
439
|
for (const issue of issues) {
|
|
322
440
|
findings.push({
|
|
@@ -324,7 +442,8 @@ async function runSupportingFilesScan(skillDir, skillFile) {
|
|
|
324
442
|
severity: issue.severity === 'error' ? 'HIGH' : issue.severity === 'warning' ? 'MEDIUM' : 'MEDIUM',
|
|
325
443
|
message: issue.message,
|
|
326
444
|
matched_text: (issue.line_content || '').substring(0, 200),
|
|
327
|
-
file:
|
|
445
|
+
file: relPath,
|
|
446
|
+
line: issue.line ?? undefined,
|
|
328
447
|
source: 'code_analysis',
|
|
329
448
|
rule_id: issue.ruleId || '',
|
|
330
449
|
confidence: 'HIGH',
|
|
@@ -332,7 +451,7 @@ async function runSupportingFilesScan(skillDir, skillFile) {
|
|
|
332
451
|
}
|
|
333
452
|
}
|
|
334
453
|
} catch (error) {
|
|
335
|
-
console.error(`Layer 3 (supporting file) failed for ${
|
|
454
|
+
console.error(`Layer 3 (supporting file) failed for ${relPath}:`, error.message);
|
|
336
455
|
}
|
|
337
456
|
}
|
|
338
457
|
} catch (error) {
|
|
@@ -353,7 +472,9 @@ function runClawHavocScan(content, codeBlocks) {
|
|
|
353
472
|
const rules = loadClawHavocRules();
|
|
354
473
|
// Concatenate all code block content for matching
|
|
355
474
|
const allCode = codeBlocks.map(b => b.code).join('\n');
|
|
356
|
-
|
|
475
|
+
// Cap total text to prevent ReDoS on pathological input
|
|
476
|
+
const raw = content + '\n' + allCode;
|
|
477
|
+
const scanText = raw.length > MAX_CLAWHAVOC_SCAN_LEN ? raw.substring(0, MAX_CLAWHAVOC_SCAN_LEN) : raw;
|
|
357
478
|
|
|
358
479
|
for (const rule of rules) {
|
|
359
480
|
let matched = false;
|
|
@@ -392,27 +513,94 @@ function runClawHavocScan(content, codeBlocks) {
|
|
|
392
513
|
// Layer 5: Package Supply Chain
|
|
393
514
|
// ---------------------------------------------------------------------------
|
|
394
515
|
|
|
395
|
-
|
|
516
|
+
/**
|
|
517
|
+
* Extract packages from a manifest file and return {ecosystem, packages[]} pairs.
|
|
518
|
+
*/
|
|
519
|
+
function extractPackagesFromManifest(filePath, content) {
|
|
520
|
+
const fileName = basename(filePath).toLowerCase();
|
|
521
|
+
const packages = [];
|
|
522
|
+
|
|
523
|
+
try {
|
|
524
|
+
if (fileName === 'package.json') {
|
|
525
|
+
const pkg = JSON.parse(content);
|
|
526
|
+
for (const depKey of ['dependencies', 'devDependencies', 'peerDependencies', 'optionalDependencies']) {
|
|
527
|
+
if (pkg[depKey] && typeof pkg[depKey] === 'object') {
|
|
528
|
+
packages.push(...Object.keys(pkg[depKey]));
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
return { ecosystem: 'npm', packages };
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
if (fileName === 'requirements.txt') {
|
|
535
|
+
for (const line of content.split('\n')) {
|
|
536
|
+
const trimmed = line.trim();
|
|
537
|
+
if (!trimmed || trimmed.startsWith('#') || trimmed.startsWith('-')) continue;
|
|
538
|
+
const pkg = trimmed.split(/[><=!~\[\s;]/)[0].trim();
|
|
539
|
+
if (pkg) packages.push(pkg);
|
|
540
|
+
}
|
|
541
|
+
return { ecosystem: 'pypi', packages };
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
if (fileName === 'gemfile') {
|
|
545
|
+
const gemMatches = content.matchAll(/^\s*gem\s+['"]([^'"]+)['"]/gm);
|
|
546
|
+
for (const m of gemMatches) packages.push(m[1]);
|
|
547
|
+
return { ecosystem: 'rubygems', packages };
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
if (fileName === 'cargo.toml') {
|
|
551
|
+
// Section-aware: only extract keys under [dependencies], [dev-dependencies],
|
|
552
|
+
// [build-dependencies], or [*dependencies.*] (e.g. [target.'...'.dependencies])
|
|
553
|
+
let inDepSection = false;
|
|
554
|
+
for (const line of content.split('\n')) {
|
|
555
|
+
const trimmed = line.trim();
|
|
556
|
+
// Detect section headers
|
|
557
|
+
const sectionMatch = trimmed.match(/^\[([^\]]+)\]/);
|
|
558
|
+
if (sectionMatch) {
|
|
559
|
+
const section = sectionMatch[1].toLowerCase();
|
|
560
|
+
inDepSection = /(?:^|\.)(?:dependencies|dev-dependencies|build-dependencies)$/.test(section);
|
|
561
|
+
continue;
|
|
562
|
+
}
|
|
563
|
+
if (!inDepSection) continue;
|
|
564
|
+
// Extract "name = ..." lines within dependency sections
|
|
565
|
+
const depMatch = trimmed.match(/^([a-zA-Z0-9_-]+)\s*=/);
|
|
566
|
+
if (depMatch) {
|
|
567
|
+
packages.push(depMatch[1]);
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
return { ecosystem: 'crates', packages };
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
// go.mod: Go ecosystem not yet supported by the hallucination bloom filter.
|
|
574
|
+
// Return a sentinel so callers can surface an informational finding.
|
|
575
|
+
if (fileName === 'go.mod') {
|
|
576
|
+
return { ecosystem: 'go', packages: [], unsupported: true };
|
|
577
|
+
}
|
|
578
|
+
} catch {
|
|
579
|
+
// Parse failed — return empty
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
return { ecosystem: null, packages: [] };
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
async function runSupplyChainScan(codeBlocks, skillDir, skillFile, preCollected, signal) {
|
|
396
586
|
const findings = [];
|
|
397
587
|
const checked = new Set();
|
|
398
588
|
|
|
399
589
|
try {
|
|
590
|
+
// 1. Scan code blocks (existing behavior)
|
|
400
591
|
for (const { lang, code } of codeBlocks) {
|
|
592
|
+
if (signal && signal.aborted) break;
|
|
401
593
|
let packages = [];
|
|
402
594
|
let ecosystem = null;
|
|
403
595
|
|
|
404
|
-
// JS/TS imports
|
|
405
596
|
if (['javascript', 'js', 'typescript', 'ts'].includes(lang)) {
|
|
406
597
|
ecosystem = 'npm';
|
|
407
|
-
// require('pkg')
|
|
408
598
|
const requireMatches = code.matchAll(/require\s*\(\s*['"]([^'"]+)['"]\s*\)/g);
|
|
409
599
|
for (const m of requireMatches) packages.push(m[1]);
|
|
410
|
-
// import ... from 'pkg'
|
|
411
600
|
const importFromMatches = code.matchAll(/import\s+(?:[\s\S]*?\s+from\s+)?['"]([^'"]+)['"]/g);
|
|
412
601
|
for (const m of importFromMatches) packages.push(m[1]);
|
|
413
602
|
}
|
|
414
603
|
|
|
415
|
-
// Python imports
|
|
416
604
|
if (['python', 'py'].includes(lang)) {
|
|
417
605
|
ecosystem = 'pypi';
|
|
418
606
|
const importMatches = code.matchAll(/^\s*import\s+(\S+)/gm);
|
|
@@ -424,27 +612,20 @@ async function runSupplyChainScan(codeBlocks) {
|
|
|
424
612
|
if (!ecosystem || packages.length === 0) continue;
|
|
425
613
|
|
|
426
614
|
for (let pkg of packages) {
|
|
427
|
-
// Skip relative imports
|
|
428
615
|
if (pkg.startsWith('.') || pkg.startsWith('/')) continue;
|
|
429
616
|
|
|
430
|
-
// Normalize package names
|
|
431
617
|
if (ecosystem === 'npm') {
|
|
432
|
-
// Scoped packages: @scope/name -> @scope/name
|
|
433
|
-
// Non-scoped: take first segment before /
|
|
434
618
|
if (pkg.startsWith('@')) {
|
|
435
619
|
const parts = pkg.split('/');
|
|
436
620
|
pkg = parts.length >= 2 ? `${parts[0]}/${parts[1]}` : pkg;
|
|
437
621
|
} else {
|
|
438
622
|
pkg = pkg.split('/')[0];
|
|
439
623
|
}
|
|
440
|
-
// Skip Node builtins
|
|
441
624
|
if (NODE_BUILTINS.has(pkg)) continue;
|
|
442
625
|
}
|
|
443
626
|
|
|
444
627
|
if (ecosystem === 'pypi') {
|
|
445
|
-
// Take the top-level module name
|
|
446
628
|
pkg = pkg.split('.')[0];
|
|
447
|
-
// Skip Python builtins
|
|
448
629
|
if (PYTHON_BUILTINS.has(pkg)) continue;
|
|
449
630
|
}
|
|
450
631
|
|
|
@@ -471,6 +652,73 @@ async function runSupplyChainScan(codeBlocks) {
|
|
|
471
652
|
}
|
|
472
653
|
}
|
|
473
654
|
}
|
|
655
|
+
|
|
656
|
+
// 2. Scan manifest / dependency files in skill directory
|
|
657
|
+
const fileList = preCollected || (() => {
|
|
658
|
+
const state = { files: [], totalBytes: 0, rootLen: skillDir.length };
|
|
659
|
+
collectSupportingFiles(skillDir, skillFile, 0, state);
|
|
660
|
+
return state.files;
|
|
661
|
+
})();
|
|
662
|
+
for (const { filePath } of fileList) {
|
|
663
|
+
if (signal && signal.aborted) break;
|
|
664
|
+
const fname = basename(filePath).toLowerCase();
|
|
665
|
+
if (!MANIFEST_FILES.has(fname)) continue;
|
|
666
|
+
|
|
667
|
+
try {
|
|
668
|
+
const content = readFileSync(filePath, 'utf-8');
|
|
669
|
+
const manifest = extractPackagesFromManifest(filePath, content);
|
|
670
|
+
const { ecosystem, packages } = manifest;
|
|
671
|
+
if (!ecosystem) continue;
|
|
672
|
+
|
|
673
|
+
// Surface unsupported ecosystems as informational finding
|
|
674
|
+
if (manifest.unsupported) {
|
|
675
|
+
const relPath = filePath.substring(skillDir.length).replace(/\\/g, '/').replace(/^\//, '');
|
|
676
|
+
findings.push({
|
|
677
|
+
category: 'unsupported_ecosystem',
|
|
678
|
+
severity: 'MEDIUM',
|
|
679
|
+
message: `${fname} found but "${ecosystem}" ecosystem is not yet supported for supply-chain verification`,
|
|
680
|
+
matched_text: fname,
|
|
681
|
+
file: relPath,
|
|
682
|
+
source: 'supply_chain',
|
|
683
|
+
rule_id: `supply_chain.unsupported.${ecosystem}`,
|
|
684
|
+
confidence: 'HIGH',
|
|
685
|
+
});
|
|
686
|
+
continue;
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
if (packages.length === 0) continue;
|
|
690
|
+
|
|
691
|
+
for (let pkg of packages) {
|
|
692
|
+
if (ecosystem === 'npm' && NODE_BUILTINS.has(pkg)) continue;
|
|
693
|
+
if (ecosystem === 'pypi' && PYTHON_BUILTINS.has(pkg)) continue;
|
|
694
|
+
|
|
695
|
+
const key = `${ecosystem}:${pkg}`;
|
|
696
|
+
if (checked.has(key)) continue;
|
|
697
|
+
checked.add(key);
|
|
698
|
+
|
|
699
|
+
try {
|
|
700
|
+
const result = isHallucinated(pkg, ecosystem);
|
|
701
|
+
if (result.hallucinated) {
|
|
702
|
+
const relPath = filePath.substring(skillDir.length).replace(/\\/g, '/').replace(/^\//, '');
|
|
703
|
+
findings.push({
|
|
704
|
+
category: 'hallucinated_package',
|
|
705
|
+
severity: 'CRITICAL',
|
|
706
|
+
message: `Package "${pkg}" not found in ${ecosystem} registry — possible hallucinated or malicious dependency`,
|
|
707
|
+
matched_text: pkg,
|
|
708
|
+
file: relPath,
|
|
709
|
+
source: 'supply_chain',
|
|
710
|
+
rule_id: `supply_chain.hallucinated.${ecosystem}`,
|
|
711
|
+
confidence: result.bloomFilter ? 'MEDIUM' : 'HIGH',
|
|
712
|
+
});
|
|
713
|
+
}
|
|
714
|
+
} catch (error) {
|
|
715
|
+
console.error(`Layer 5 (supply chain) manifest check failed for ${pkg}:`, error.message);
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
} catch {
|
|
719
|
+
// Skip unreadable manifests
|
|
720
|
+
}
|
|
721
|
+
}
|
|
474
722
|
} catch (error) {
|
|
475
723
|
console.error("Layer 5 (supply chain scan) failed:", error.message);
|
|
476
724
|
}
|
|
@@ -487,33 +735,57 @@ function getBaselineDir() {
|
|
|
487
735
|
}
|
|
488
736
|
|
|
489
737
|
function getBaselinePath(skillDir) {
|
|
490
|
-
|
|
491
|
-
|
|
738
|
+
// Use slug + hash of canonical path to avoid collisions between skills with
|
|
739
|
+
// the same folder name in different locations.
|
|
740
|
+
const slug = basename(skillDir).replace(/[^a-zA-Z0-9_-]/g, '_').substring(0, 64);
|
|
741
|
+
const pathHash = createHash('sha256').update(skillDir).digest('hex').substring(0, 12);
|
|
742
|
+
return join(getBaselineDir(), `${slug}-${pathHash}.json`);
|
|
492
743
|
}
|
|
493
744
|
|
|
494
|
-
function
|
|
495
|
-
return createHash('sha256').update(content).digest('hex');
|
|
496
|
-
}
|
|
497
|
-
|
|
498
|
-
function runRugPullCheck(content, skillDir, saveBaseline) {
|
|
745
|
+
function runRugPullCheck(content, skillDir, saveBaseline, collectedFiles) {
|
|
499
746
|
const findings = [];
|
|
500
|
-
|
|
747
|
+
// Hash SKILL.md + all supporting files with path boundaries and sorted
|
|
748
|
+
// order so the hash is canonical and structural changes are detected.
|
|
749
|
+
const hasher = createHash('sha256');
|
|
750
|
+
hasher.update('SKILL.md\0');
|
|
751
|
+
hasher.update(content);
|
|
752
|
+
if (collectedFiles) {
|
|
753
|
+
// Sort by relative path for deterministic ordering (readdirSync order is OS-dependent)
|
|
754
|
+
const sorted = [...collectedFiles].sort((a, b) => a.relPath.localeCompare(b.relPath));
|
|
755
|
+
for (const { filePath, relPath } of sorted) {
|
|
756
|
+
try {
|
|
757
|
+
hasher.update('\0' + relPath + '\0');
|
|
758
|
+
hasher.update(readFileSync(filePath));
|
|
759
|
+
} catch { /* skip unreadable */ }
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
const hash = hasher.digest('hex');
|
|
501
763
|
|
|
502
764
|
try {
|
|
503
765
|
const baselinePath = getBaselinePath(skillDir);
|
|
504
766
|
|
|
505
767
|
if (saveBaseline) {
|
|
506
|
-
// Save baseline
|
|
768
|
+
// Save baseline with atomic write (temp + rename) and restrictive perms
|
|
507
769
|
const baselineDir = getBaselineDir();
|
|
508
770
|
if (!existsSync(baselineDir)) {
|
|
509
|
-
mkdirSync(baselineDir, { recursive: true });
|
|
771
|
+
mkdirSync(baselineDir, { recursive: true, mode: 0o700 });
|
|
510
772
|
}
|
|
511
|
-
|
|
773
|
+
const data = JSON.stringify({
|
|
512
774
|
hash,
|
|
513
775
|
skill_path: skillDir,
|
|
514
776
|
saved_at: new Date().toISOString(),
|
|
515
777
|
content_length: content.length,
|
|
516
|
-
}, null, 2)
|
|
778
|
+
}, null, 2);
|
|
779
|
+
const tmpFile = baselinePath + `.tmp.${process.pid}.${Date.now().toString(36)}${Math.random().toString(36).slice(2, 6)}`;
|
|
780
|
+
writeFileSync(tmpFile, data, { encoding: 'utf-8', mode: 0o600 });
|
|
781
|
+
try {
|
|
782
|
+
renameSync(tmpFile, baselinePath);
|
|
783
|
+
} catch (renameErr) {
|
|
784
|
+
try { unlinkSync(tmpFile); } catch { /* best effort cleanup */ }
|
|
785
|
+
throw renameErr;
|
|
786
|
+
}
|
|
787
|
+
// On platforms where rename doesn't preserve mode, enforce it
|
|
788
|
+
try { chmodSync(baselinePath, 0o600); } catch { /* best effort */ }
|
|
517
789
|
} else if (existsSync(baselinePath)) {
|
|
518
790
|
// Compare against baseline
|
|
519
791
|
try {
|
|
@@ -550,7 +822,10 @@ function deduplicateFindings(findings) {
|
|
|
550
822
|
const unique = [];
|
|
551
823
|
|
|
552
824
|
for (const f of findings) {
|
|
553
|
-
|
|
825
|
+
// Include source, line, and normalized matched_text so that distinct
|
|
826
|
+
// findings on different lines are not collapsed.
|
|
827
|
+
const normText = (f.matched_text || '').trim().substring(0, 80).toLowerCase();
|
|
828
|
+
const key = `${f.rule_id || f.message}::${f.source || ''}::${f.file}::${f.line ?? ''}::${normText}`;
|
|
554
829
|
if (seen.has(key)) continue;
|
|
555
830
|
seen.add(key);
|
|
556
831
|
unique.push(f);
|
|
@@ -606,15 +881,20 @@ export async function scanSkill({ skill_path, verbosity, baseline }) {
|
|
|
606
881
|
// Path resolution
|
|
607
882
|
const resolvedPath = resolve(skill_path);
|
|
608
883
|
|
|
609
|
-
// Path containment —
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
const
|
|
613
|
-
|
|
884
|
+
// Path containment — check on resolved path FIRST (before existence)
|
|
885
|
+
// so that invalid external paths get rejected with the right error message.
|
|
886
|
+
// Use raw cwd here (resolvedPath is also non-canonical at this point).
|
|
887
|
+
const rawCwd = process.cwd();
|
|
888
|
+
const allowedSkillRoots = [
|
|
889
|
+
resolve(homedir(), '.openclaw', 'skills'),
|
|
890
|
+
resolve(homedir(), '.openclaw', 'workspace', 'skills'),
|
|
891
|
+
];
|
|
892
|
+
const isAllowed = pathStartsWith(resolvedPath, rawCwd)
|
|
893
|
+
|| allowedSkillRoots.some(root => pathStartsWith(resolvedPath, root));
|
|
614
894
|
if (!isAllowed) {
|
|
615
895
|
return {
|
|
616
896
|
content: [{ type: "text", text: JSON.stringify({
|
|
617
|
-
error: "skill_path must be within the current working directory or ~/.openclaw/skills/",
|
|
897
|
+
error: "skill_path must be within the current working directory or ~/.openclaw/skills/ (or ~/.openclaw/workspace/skills/)",
|
|
618
898
|
skill_path: resolvedPath
|
|
619
899
|
}) }]
|
|
620
900
|
};
|
|
@@ -626,15 +906,45 @@ export async function scanSkill({ skill_path, verbosity, baseline }) {
|
|
|
626
906
|
};
|
|
627
907
|
}
|
|
628
908
|
|
|
629
|
-
|
|
909
|
+
// Reject symlinks at the top level to prevent symlink-based path escapes
|
|
910
|
+
const topStat = lstatSync(resolvedPath);
|
|
911
|
+
if (topStat.isSymbolicLink()) {
|
|
912
|
+
return {
|
|
913
|
+
content: [{ type: "text", text: JSON.stringify({
|
|
914
|
+
error: "Symbolic links are not allowed as skill_path — resolve the real path first",
|
|
915
|
+
skill_path: resolvedPath
|
|
916
|
+
}) }]
|
|
917
|
+
};
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
// Resolve to real path and re-verify containment (defeats symlink escapes)
|
|
921
|
+
// Use canonical cwd here since realPath is also canonical.
|
|
922
|
+
const realPath = realpathSync(resolvedPath);
|
|
923
|
+
let canonCwd;
|
|
924
|
+
try { canonCwd = realpathSync(rawCwd); } catch { canonCwd = rawCwd; }
|
|
925
|
+
const canonRoots = allowedSkillRoots.map(root => {
|
|
926
|
+
try { return realpathSync(root); } catch { return root; }
|
|
927
|
+
});
|
|
928
|
+
const realAllowed = pathStartsWith(realPath, canonCwd)
|
|
929
|
+
|| canonRoots.some(root => pathStartsWith(realPath, root));
|
|
930
|
+
if (!realAllowed) {
|
|
931
|
+
return {
|
|
932
|
+
content: [{ type: "text", text: JSON.stringify({
|
|
933
|
+
error: "skill_path must be within the current working directory or ~/.openclaw/skills/ (or ~/.openclaw/workspace/skills/)",
|
|
934
|
+
skill_path: realPath
|
|
935
|
+
}) }]
|
|
936
|
+
};
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
const stat = statSync(realPath);
|
|
630
940
|
let skillDir, skillFile;
|
|
631
941
|
|
|
632
942
|
if (stat.isDirectory()) {
|
|
633
|
-
skillDir =
|
|
634
|
-
skillFile = resolve(
|
|
943
|
+
skillDir = realPath;
|
|
944
|
+
skillFile = resolve(realPath, 'SKILL.md');
|
|
635
945
|
} else {
|
|
636
|
-
skillDir = dirname(
|
|
637
|
-
skillFile =
|
|
946
|
+
skillDir = dirname(realPath);
|
|
947
|
+
skillFile = realPath;
|
|
638
948
|
}
|
|
639
949
|
|
|
640
950
|
if (!existsSync(skillFile)) {
|
|
@@ -643,6 +953,19 @@ export async function scanSkill({ skill_path, verbosity, baseline }) {
|
|
|
643
953
|
};
|
|
644
954
|
}
|
|
645
955
|
|
|
956
|
+
// Enforce size cap before reading to prevent OOM on adversarial inputs
|
|
957
|
+
const skillStat = statSync(skillFile);
|
|
958
|
+
if (skillStat.size > MAX_SKILL_MD_SIZE) {
|
|
959
|
+
return {
|
|
960
|
+
content: [{ type: "text", text: JSON.stringify({
|
|
961
|
+
error: `SKILL.md exceeds size limit (${(skillStat.size / 1024 / 1024).toFixed(1)} MB > 1 MB)`,
|
|
962
|
+
skill_path: realPath,
|
|
963
|
+
grade: 'F',
|
|
964
|
+
recommendation: 'SKILL.md is abnormally large — possible resource exhaustion attack',
|
|
965
|
+
}, null, 2) }]
|
|
966
|
+
};
|
|
967
|
+
}
|
|
968
|
+
|
|
646
969
|
const content = readFileSync(skillFile, 'utf-8');
|
|
647
970
|
const codeBlocks = extractCodeBlocks(content);
|
|
648
971
|
|
|
@@ -651,25 +974,59 @@ export async function scanSkill({ skill_path, verbosity, baseline }) {
|
|
|
651
974
|
// L1, L2, L3, L5 run in parallel. L4 and L6 are synchronous — run after.
|
|
652
975
|
// ---------------------------------------------------------------------------
|
|
653
976
|
|
|
977
|
+
// Collect supporting files once and share between L3 and L5
|
|
978
|
+
const supportingState = { files: [], totalBytes: 0, rootLen: skillDir.length };
|
|
979
|
+
collectSupportingFiles(skillDir, skillFile, 0, supportingState);
|
|
980
|
+
const collectedFiles = supportingState.files;
|
|
981
|
+
|
|
982
|
+
// AbortController allows layers to check signal.aborted between iterations
|
|
983
|
+
// so they stop starting new work after the timeout fires.
|
|
984
|
+
const abortController = new AbortController();
|
|
985
|
+
const { signal } = abortController;
|
|
986
|
+
|
|
654
987
|
const scanPromise = (async () => {
|
|
988
|
+
const timings = {};
|
|
989
|
+
const wallStart = Date.now();
|
|
990
|
+
|
|
991
|
+
// Timed wrapper
|
|
992
|
+
async function timed(label, fn) {
|
|
993
|
+
const start = Date.now();
|
|
994
|
+
const result = await fn();
|
|
995
|
+
timings[label] = Date.now() - start;
|
|
996
|
+
return result;
|
|
997
|
+
}
|
|
998
|
+
function timedSync(label, fn) {
|
|
999
|
+
const start = Date.now();
|
|
1000
|
+
const result = fn();
|
|
1001
|
+
timings[label] = Date.now() - start;
|
|
1002
|
+
return result;
|
|
1003
|
+
}
|
|
1004
|
+
|
|
655
1005
|
const [promptFindings, codeBlockFindings, supportingFindings, supplyChainFindings] =
|
|
656
1006
|
await Promise.all([
|
|
657
|
-
runPromptScan(content),
|
|
658
|
-
runCodeBlockScan(codeBlocks),
|
|
659
|
-
runSupportingFilesScan(skillDir, skillFile),
|
|
660
|
-
runSupplyChainScan(codeBlocks),
|
|
1007
|
+
timed('prompt_scan', () => runPromptScan(content)), // L1
|
|
1008
|
+
timed('code_blocks', () => runCodeBlockScan(codeBlocks, signal)), // L2
|
|
1009
|
+
timed('supporting_files', () => runSupportingFilesScan(skillDir, skillFile, collectedFiles, signal)), // L3
|
|
1010
|
+
timed('supply_chain', () => runSupplyChainScan(codeBlocks, skillDir, skillFile, collectedFiles, signal)), // L5
|
|
661
1011
|
]);
|
|
662
1012
|
|
|
663
|
-
|
|
1013
|
+
if (signal.aborted) throw new Error('Scan timed out after 120s');
|
|
1014
|
+
|
|
1015
|
+
const clawHavocFindings = timedSync('clawhavoc', () => runClawHavocScan(content, codeBlocks)); // L4
|
|
664
1016
|
const { findings: rugPullFindings, hash: contentHash } =
|
|
665
|
-
runRugPullCheck(content, skillDir, !!baseline);
|
|
1017
|
+
timedSync('rug_pull', () => runRugPullCheck(content, skillDir, !!baseline, collectedFiles)); // L6
|
|
1018
|
+
|
|
1019
|
+
timings.total = Date.now() - wallStart;
|
|
666
1020
|
|
|
667
|
-
return { promptFindings, codeBlockFindings, supportingFindings, clawHavocFindings, supplyChainFindings, rugPullFindings, contentHash };
|
|
1021
|
+
return { promptFindings, codeBlockFindings, supportingFindings, clawHavocFindings, supplyChainFindings, rugPullFindings, contentHash, timings };
|
|
668
1022
|
})();
|
|
669
1023
|
|
|
670
1024
|
let timeoutId;
|
|
671
1025
|
const timeoutPromise = new Promise((_, reject) => {
|
|
672
|
-
timeoutId = setTimeout(() =>
|
|
1026
|
+
timeoutId = setTimeout(() => {
|
|
1027
|
+
abortController.abort();
|
|
1028
|
+
reject(new Error('Scan timed out after 120s'));
|
|
1029
|
+
}, SCAN_TIMEOUT_MS);
|
|
673
1030
|
});
|
|
674
1031
|
|
|
675
1032
|
let layerResults;
|
|
@@ -680,7 +1037,7 @@ export async function scanSkill({ skill_path, verbosity, baseline }) {
|
|
|
680
1037
|
return {
|
|
681
1038
|
content: [{ type: "text", text: JSON.stringify({
|
|
682
1039
|
error: error.message,
|
|
683
|
-
skill_path:
|
|
1040
|
+
skill_path: realPath,
|
|
684
1041
|
grade: 'F',
|
|
685
1042
|
recommendation: 'Scan failed — could not complete analysis within time limit',
|
|
686
1043
|
}, null, 2) }]
|
|
@@ -688,7 +1045,7 @@ export async function scanSkill({ skill_path, verbosity, baseline }) {
|
|
|
688
1045
|
}
|
|
689
1046
|
clearTimeout(timeoutId);
|
|
690
1047
|
|
|
691
|
-
const { promptFindings, codeBlockFindings, supportingFindings, clawHavocFindings, supplyChainFindings, rugPullFindings, contentHash } = layerResults;
|
|
1048
|
+
const { promptFindings, codeBlockFindings, supportingFindings, clawHavocFindings, supplyChainFindings, rugPullFindings, contentHash, timings } = layerResults;
|
|
692
1049
|
|
|
693
1050
|
// ---------------------------------------------------------------------------
|
|
694
1051
|
// Merge, deduplicate, grade
|
|
@@ -722,7 +1079,7 @@ export async function scanSkill({ skill_path, verbosity, baseline }) {
|
|
|
722
1079
|
const level = verbosity || 'compact';
|
|
723
1080
|
|
|
724
1081
|
const result = {
|
|
725
|
-
skill_path:
|
|
1082
|
+
skill_path: realPath,
|
|
726
1083
|
grade,
|
|
727
1084
|
findings_count: allFindings.length,
|
|
728
1085
|
recommendation,
|
|
@@ -731,11 +1088,12 @@ export async function scanSkill({ skill_path, verbosity, baseline }) {
|
|
|
731
1088
|
if (level === 'full') {
|
|
732
1089
|
result.content_hash = contentHash;
|
|
733
1090
|
result.layers_executed = layersExecuted;
|
|
1091
|
+
result.timings_ms = timings;
|
|
734
1092
|
result.findings = allFindings;
|
|
735
1093
|
} else if (level === 'compact') {
|
|
736
1094
|
result.findings = allFindings;
|
|
737
1095
|
}
|
|
738
|
-
// 'minimal' — omit findings array and
|
|
1096
|
+
// 'minimal' — omit findings array, layers_executed, and timings
|
|
739
1097
|
|
|
740
1098
|
return {
|
|
741
1099
|
content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
|