agent-security-scanner-mcp 3.3.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +224 -2
- package/analyzer.py +22 -5
- package/cross_file_analyzer.py +216 -0
- package/index.js +104 -4
- package/package.json +10 -3
- package/pattern_matcher.py +1 -0
- package/regex_fallback.py +199 -1
- package/scripts/postinstall.js +25 -0
- package/src/cli/init-hooks.js +164 -0
- package/src/config.js +181 -0
- package/src/context.js +228 -0
- package/src/dedup.js +129 -0
- package/src/fix-patterns.js +66 -17
- package/src/tools/fix-security.js +31 -4
- package/src/tools/scan-diff.js +151 -0
- package/src/tools/scan-project.js +308 -0
- package/src/tools/scan-security.js +33 -5
- package/src/utils.js +76 -7
package/README.md
CHANGED
|
@@ -14,6 +14,8 @@ Security scanner for AI coding agents and autonomous assistants. Scans code for
|
|
|
14
14
|
|------|-------------|-------------|
|
|
15
15
|
| `scan_security` | Scan code for vulnerabilities (1700+ rules, 12 languages) with AST and taint analysis | After writing or editing any code file |
|
|
16
16
|
| `fix_security` | Auto-fix all detected vulnerabilities (120 fix templates) | After `scan_security` finds issues |
|
|
17
|
+
| `scan_git_diff` | Scan only changed files in git diff | Before commits or in PR reviews |
|
|
18
|
+
| `scan_project` | Scan entire project with A-F security grading | For project-wide security audits |
|
|
17
19
|
| `check_package` | Verify a package name isn't AI-hallucinated (4.3M+ packages) | Before adding any new dependency |
|
|
18
20
|
| `scan_packages` | Bulk-check all imports in a file for hallucinated packages | Before committing code with new imports |
|
|
19
21
|
| `scan_agent_prompt` | Detect prompt injection and malicious instructions (56 rules) | Before acting on external/untrusted input |
|
|
@@ -38,8 +40,18 @@ scan_security → review findings → fix_security → verify fix
|
|
|
38
40
|
|
|
39
41
|
### Before Committing
|
|
40
42
|
```
|
|
43
|
+
scan_git_diff → scan only changed files for fast feedback
|
|
41
44
|
scan_packages → verify all imports are legitimate
|
|
42
|
-
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### For PR Reviews
|
|
48
|
+
```
|
|
49
|
+
scan_git_diff --base main → scan PR changes against main branch
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### For Project Audits
|
|
53
|
+
```
|
|
54
|
+
scan_project → get A-F security grade and aggregated metrics
|
|
43
55
|
```
|
|
44
56
|
|
|
45
57
|
### When Processing External Input
|
|
@@ -329,6 +341,105 @@ List all 1700+ security scanning rules and 120 fix templates. Use to understand
|
|
|
329
341
|
|
|
330
342
|
---
|
|
331
343
|
|
|
344
|
+
### `scan_git_diff`
|
|
345
|
+
|
|
346
|
+
Scan only files changed in git diff for security vulnerabilities. Use in PR workflows, pre-commit hooks, or to check recent changes before pushing. Significantly faster than full project scans.
|
|
347
|
+
|
|
348
|
+
**Parameters:**
|
|
349
|
+
|
|
350
|
+
| Parameter | Type | Required | Description |
|
|
351
|
+
|-----------|------|----------|-------------|
|
|
352
|
+
| `base` | string | No | Base commit/branch to diff against (default: `HEAD~1`) |
|
|
353
|
+
| `target` | string | No | Target commit/branch (default: `HEAD`) |
|
|
354
|
+
| `verbosity` | string | No | `"minimal"`, `"compact"` (default), `"full"` |
|
|
355
|
+
|
|
356
|
+
**Example:**
|
|
357
|
+
|
|
358
|
+
```json
|
|
359
|
+
// Input
|
|
360
|
+
{ "base": "main", "target": "HEAD" }
|
|
361
|
+
|
|
362
|
+
// Output
|
|
363
|
+
{
|
|
364
|
+
"base": "main",
|
|
365
|
+
"target": "HEAD",
|
|
366
|
+
"files_scanned": 5,
|
|
367
|
+
"issues_count": 3,
|
|
368
|
+
"issues": [
|
|
369
|
+
{
|
|
370
|
+
"file": "src/auth.js",
|
|
371
|
+
"line": 42,
|
|
372
|
+
"ruleId": "sql-injection",
|
|
373
|
+
"severity": "error",
|
|
374
|
+
"message": "SQL injection vulnerability detected"
|
|
375
|
+
}
|
|
376
|
+
]
|
|
377
|
+
}
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
---
|
|
381
|
+
|
|
382
|
+
### `scan_project`
|
|
383
|
+
|
|
384
|
+
Scan an entire project or directory for security vulnerabilities with aggregated metrics and A-F security grading. Use for security audits, compliance checks, or initial codebase assessment.
|
|
385
|
+
|
|
386
|
+
**Parameters:**
|
|
387
|
+
|
|
388
|
+
| Parameter | Type | Required | Description |
|
|
389
|
+
|-----------|------|----------|-------------|
|
|
390
|
+
| `directory` | string | Yes | Path to project directory to scan |
|
|
391
|
+
| `include_patterns` | array | No | Glob patterns to include (e.g., `["**/*.js", "**/*.py"]`) |
|
|
392
|
+
| `exclude_patterns` | array | No | Glob patterns to exclude (default: `node_modules`, `.git`, etc.) |
|
|
393
|
+
| `verbosity` | string | No | `"minimal"`, `"compact"` (default), `"full"` |
|
|
394
|
+
|
|
395
|
+
**Example:**
|
|
396
|
+
|
|
397
|
+
```json
|
|
398
|
+
// Input
|
|
399
|
+
{ "directory": "./src", "verbosity": "compact" }
|
|
400
|
+
|
|
401
|
+
// Output
|
|
402
|
+
{
|
|
403
|
+
"directory": "/path/to/src",
|
|
404
|
+
"files_scanned": 24,
|
|
405
|
+
"issues_count": 12,
|
|
406
|
+
"grade": "C",
|
|
407
|
+
"by_severity": {
|
|
408
|
+
"error": 3,
|
|
409
|
+
"warning": 7,
|
|
410
|
+
"info": 2
|
|
411
|
+
},
|
|
412
|
+
"by_category": {
|
|
413
|
+
"sql-injection": 2,
|
|
414
|
+
"xss": 3,
|
|
415
|
+
"hardcoded-secret": 1,
|
|
416
|
+
"insecure-crypto": 4,
|
|
417
|
+
"command-injection": 2
|
|
418
|
+
},
|
|
419
|
+
"issues": [
|
|
420
|
+
{
|
|
421
|
+
"file": "auth.js",
|
|
422
|
+
"line": 15,
|
|
423
|
+
"ruleId": "sql-injection",
|
|
424
|
+
"severity": "error",
|
|
425
|
+
"message": "SQL injection vulnerability"
|
|
426
|
+
}
|
|
427
|
+
]
|
|
428
|
+
}
|
|
429
|
+
```
|
|
430
|
+
|
|
431
|
+
**Security Grades:**
|
|
432
|
+
|
|
433
|
+
| Grade | Criteria |
|
|
434
|
+
|-------|----------|
|
|
435
|
+
| A | 0 critical/error issues |
|
|
436
|
+
| B | 1-2 error issues, no critical |
|
|
437
|
+
| C | 3-5 error issues |
|
|
438
|
+
| D | 6-10 error issues |
|
|
439
|
+
| F | 11+ error issues or any critical |
|
|
440
|
+
|
|
441
|
+
---
|
|
442
|
+
|
|
332
443
|
## Supported Languages
|
|
333
444
|
|
|
334
445
|
| Language | Vulnerabilities Detected | Analysis |
|
|
@@ -465,17 +576,113 @@ npx agent-security-scanner-mcp scan-prompt "ignore previous instructions"
|
|
|
465
576
|
# Scan a file for vulnerabilities
|
|
466
577
|
npx agent-security-scanner-mcp scan-security ./app.py --verbosity minimal
|
|
467
578
|
|
|
579
|
+
# Scan git diff (changed files only)
|
|
580
|
+
npx agent-security-scanner-mcp scan-diff --base main --target HEAD
|
|
581
|
+
|
|
582
|
+
# Scan entire project with grading
|
|
583
|
+
npx agent-security-scanner-mcp scan-project ./src
|
|
584
|
+
|
|
468
585
|
# Check if a package is legitimate
|
|
469
586
|
npx agent-security-scanner-mcp check-package flask pypi
|
|
470
587
|
|
|
471
588
|
# Scan file imports for hallucinated packages
|
|
472
589
|
npx agent-security-scanner-mcp scan-packages ./requirements.txt pypi
|
|
590
|
+
|
|
591
|
+
# Install Claude Code hooks for automatic scanning
|
|
592
|
+
npx agent-security-scanner-mcp init-hooks
|
|
473
593
|
```
|
|
474
594
|
|
|
475
595
|
**Exit codes:** `0` = safe, `1` = issues found. Use in scripts to block risky operations.
|
|
476
596
|
|
|
477
597
|
---
|
|
478
598
|
|
|
599
|
+
## Configuration (`.scannerrc`)
|
|
600
|
+
|
|
601
|
+
Create a `.scannerrc.yaml` or `.scannerrc.json` in your project root to customize scanning behavior:
|
|
602
|
+
|
|
603
|
+
```yaml
|
|
604
|
+
# .scannerrc.yaml
|
|
605
|
+
version: 1
|
|
606
|
+
|
|
607
|
+
# Suppress specific rules
|
|
608
|
+
suppress:
|
|
609
|
+
- rule: "insecure-random"
|
|
610
|
+
reason: "Using for non-cryptographic purposes"
|
|
611
|
+
- rule: "detect-disable-mustache-escape"
|
|
612
|
+
paths: ["src/cli/**"]
|
|
613
|
+
|
|
614
|
+
# Exclude paths from scanning
|
|
615
|
+
exclude:
|
|
616
|
+
- "node_modules/**"
|
|
617
|
+
- "dist/**"
|
|
618
|
+
- "**/*.test.js"
|
|
619
|
+
- "**/*.spec.ts"
|
|
620
|
+
|
|
621
|
+
# Minimum severity to report
|
|
622
|
+
severity_threshold: "warning" # "info", "warning", or "error"
|
|
623
|
+
|
|
624
|
+
# Context-aware filtering (enabled by default)
|
|
625
|
+
context_filtering: true
|
|
626
|
+
```
|
|
627
|
+
|
|
628
|
+
**Configuration options:**
|
|
629
|
+
|
|
630
|
+
| Option | Type | Description |
|
|
631
|
+
|--------|------|-------------|
|
|
632
|
+
| `suppress` | array | Rules to suppress, optionally scoped to paths |
|
|
633
|
+
| `exclude` | array | Glob patterns for paths to skip |
|
|
634
|
+
| `severity_threshold` | string | Minimum severity to report (`info`, `warning`, `error`) |
|
|
635
|
+
| `context_filtering` | boolean | Enable/disable safe module filtering (default: `true`) |
|
|
636
|
+
|
|
637
|
+
The scanner automatically loads config from the current directory or any parent directory.
|
|
638
|
+
|
|
639
|
+
---
|
|
640
|
+
|
|
641
|
+
## Claude Code Hooks
|
|
642
|
+
|
|
643
|
+
Automatically scan files after every edit with Claude Code hooks integration.
|
|
644
|
+
|
|
645
|
+
### Install Hooks
|
|
646
|
+
|
|
647
|
+
```bash
|
|
648
|
+
npx agent-security-scanner-mcp init-hooks
|
|
649
|
+
```
|
|
650
|
+
|
|
651
|
+
This installs a `post-tool-use` hook that triggers security scanning after `Write`, `Edit`, or `MultiEdit` operations.
|
|
652
|
+
|
|
653
|
+
### With Prompt Guard
|
|
654
|
+
|
|
655
|
+
```bash
|
|
656
|
+
npx agent-security-scanner-mcp init-hooks --with-prompt-guard
|
|
657
|
+
```
|
|
658
|
+
|
|
659
|
+
Adds a `PreToolUse` hook that scans prompts for injection attacks before executing tools.
|
|
660
|
+
|
|
661
|
+
### What Gets Installed
|
|
662
|
+
|
|
663
|
+
The command adds hooks to `~/.claude/settings.json`:
|
|
664
|
+
|
|
665
|
+
```json
|
|
666
|
+
{
|
|
667
|
+
"hooks": {
|
|
668
|
+
"post-tool-use": [
|
|
669
|
+
{
|
|
670
|
+
"matcher": "Write|Edit|MultiEdit",
|
|
671
|
+
"command": "npx agent-security-scanner-mcp scan-security \"$TOOL_INPUT_file_path\" --verbosity minimal"
|
|
672
|
+
}
|
|
673
|
+
]
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
```
|
|
677
|
+
|
|
678
|
+
### Hook Behavior
|
|
679
|
+
|
|
680
|
+
- **Non-blocking:** Hooks report findings but don't prevent file writes
|
|
681
|
+
- **Minimal output:** Uses `--verbosity minimal` to avoid context overflow
|
|
682
|
+
- **Automatic:** Runs on every file modification without manual intervention
|
|
683
|
+
|
|
684
|
+
---
|
|
685
|
+
|
|
479
686
|
## OpenClaw Integration
|
|
480
687
|
|
|
481
688
|
[OpenClaw](https://openclaw.ai) is an autonomous AI assistant with broad system access. This scanner provides security guardrails for OpenClaw users.
|
|
@@ -567,7 +774,7 @@ AI coding agents introduce attack surfaces that traditional security tools weren
|
|
|
567
774
|
|----------|-------|
|
|
568
775
|
| **Transport** | stdio |
|
|
569
776
|
| **Package** | `agent-security-scanner-mcp` (npm) |
|
|
570
|
-
| **Tools** |
|
|
777
|
+
| **Tools** | 8 |
|
|
571
778
|
| **Languages** | 12 |
|
|
572
779
|
| **Ecosystems** | 7 |
|
|
573
780
|
| **Auth** | None required |
|
|
@@ -649,6 +856,21 @@ All MCP tools support a `verbosity` parameter to minimize context window consump
|
|
|
649
856
|
|
|
650
857
|
## Changelog
|
|
651
858
|
|
|
859
|
+
### v3.4.0
|
|
860
|
+
- **Severity Calibration** - 207-rule severity map with HIGH/MEDIUM/LOW confidence scores for more accurate prioritization
|
|
861
|
+
- **Cross-Engine Deduplication** - ~30-50% noise reduction by deduplicating findings across AST, taint, and regex engines
|
|
862
|
+
- **Context-Aware Filtering** - 80+ known safe modules (logging, testing, sanitizers) reduce false positives
|
|
863
|
+
- **`.scannerrc` Configuration** - YAML/JSON project config for suppressing rules, excluding paths, and setting severity thresholds
|
|
864
|
+
- **`scan_git_diff` Tool** - Scan only changed files in git diff for PR workflows and pre-commit hooks
|
|
865
|
+
- **`scan_project` Tool** - Project-level scanning with A-F security grading and aggregated metrics
|
|
866
|
+
- **`init-hooks` CLI** - `npx agent-security-scanner-mcp init-hooks` installs Claude Code post-tool-use hooks for automatic scanning
|
|
867
|
+
- **Safe Fix Validation** - `validateFix()` ensures auto-fixes don't introduce new vulnerabilities
|
|
868
|
+
- **Cross-File Taint Analysis** - Import graph tracking for dataflow analysis across module boundaries
|
|
869
|
+
|
|
870
|
+
### v3.3.0
|
|
871
|
+
- **OpenClaw Integration** - Full support with 30+ rules targeting autonomous AI threats
|
|
872
|
+
- **OpenClaw-Specific Rules** - Data exfiltration, credential theft, messaging abuse, unsafe automation detection
|
|
873
|
+
|
|
652
874
|
### v3.2.0
|
|
653
875
|
- **Token Optimization** - New `verbosity` parameter for all tools reduces context window usage by up to 98%
|
|
654
876
|
- **Three Verbosity Levels** - `minimal` (~50 tokens), `compact` (~200 tokens, default), `full` (~2,500 tokens)
|
package/analyzer.py
CHANGED
|
@@ -11,6 +11,7 @@ import sys
|
|
|
11
11
|
import json
|
|
12
12
|
import os
|
|
13
13
|
import re
|
|
14
|
+
import argparse
|
|
14
15
|
from typing import List, Dict, Any
|
|
15
16
|
|
|
16
17
|
# Add the directory containing this script to the path
|
|
@@ -91,6 +92,7 @@ def analyze_file_regex(file_path):
|
|
|
91
92
|
'column': match.start() + col_offset,
|
|
92
93
|
'length': match.end() - match.start(),
|
|
93
94
|
'severity': rule['severity'],
|
|
95
|
+
'confidence': rule.get('metadata', {}).get('confidence', 'MEDIUM'),
|
|
94
96
|
'metadata': rule.get('metadata', {}),
|
|
95
97
|
'engine': 'regex'
|
|
96
98
|
})
|
|
@@ -191,6 +193,7 @@ def analyze_file_ast(file_path):
|
|
|
191
193
|
'column': f.column,
|
|
192
194
|
'length': length,
|
|
193
195
|
'severity': f.severity,
|
|
196
|
+
'confidence': f.metadata.get('confidence', getattr(f, 'confidence', 'MEDIUM')),
|
|
194
197
|
'metadata': f.metadata,
|
|
195
198
|
'engine': 'taint' if is_taint else 'ast',
|
|
196
199
|
})
|
|
@@ -229,16 +232,30 @@ def analyze_file(file_path):
|
|
|
229
232
|
|
|
230
233
|
|
|
231
234
|
def main():
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
+
parser = argparse.ArgumentParser(description='Security Analyzer - AST-based with regex fallback')
|
|
236
|
+
parser.add_argument('file_path', help='Path to the file to analyze')
|
|
237
|
+
parser.add_argument('--engine', choices=['auto', 'ast', 'regex'], default='auto',
|
|
238
|
+
help='Analysis engine: auto (default), ast (tree-sitter only), regex (regex only)')
|
|
239
|
+
args = parser.parse_args()
|
|
235
240
|
|
|
236
|
-
file_path =
|
|
241
|
+
file_path = args.file_path
|
|
237
242
|
if not os.path.exists(file_path):
|
|
238
243
|
print(json.dumps({'error': f'File not found: {file_path}'}))
|
|
239
244
|
sys.exit(1)
|
|
240
245
|
|
|
241
|
-
|
|
246
|
+
engine = args.engine
|
|
247
|
+
|
|
248
|
+
if engine == 'regex':
|
|
249
|
+
results = analyze_file_regex(file_path)
|
|
250
|
+
elif engine == 'ast':
|
|
251
|
+
if not HAS_AST_ENGINE:
|
|
252
|
+
print(json.dumps({'error': 'AST engine requested but tree-sitter is not available. Install dependencies: python3 -m pip install -r requirements.txt'}))
|
|
253
|
+
sys.exit(1)
|
|
254
|
+
results = analyze_file_ast(file_path)
|
|
255
|
+
else:
|
|
256
|
+
# auto: use AST if available, otherwise regex
|
|
257
|
+
results = analyze_file(file_path)
|
|
258
|
+
|
|
242
259
|
print(json.dumps(results))
|
|
243
260
|
|
|
244
261
|
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Cross-file taint analysis for security scanning.
|
|
3
|
+
|
|
4
|
+
Builds an import graph across local files, runs per-file analysis,
|
|
5
|
+
and propagates taint warnings when a file imports from another file
|
|
6
|
+
that has ERROR-severity findings.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import re
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
# Import the per-file analyzer
|
|
15
|
+
from analyzer import analyze_file
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def extract_js_imports(source):
|
|
19
|
+
"""Extract import/require statements from JavaScript/TypeScript."""
|
|
20
|
+
imports = []
|
|
21
|
+
# require('...')
|
|
22
|
+
for m in re.finditer(r'''require\s*\(\s*['"]([^'"]+)['"]\s*\)''', source):
|
|
23
|
+
imports.append(m.group(1))
|
|
24
|
+
# import ... from '...'
|
|
25
|
+
for m in re.finditer(r'''from\s+['"]([^'"]+)['"]''', source):
|
|
26
|
+
imports.append(m.group(1))
|
|
27
|
+
# import '...'
|
|
28
|
+
for m in re.finditer(r'''import\s+['"]([^'"]+)['"]''', source):
|
|
29
|
+
imports.append(m.group(1))
|
|
30
|
+
return imports
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def extract_py_imports(source):
|
|
34
|
+
"""Extract import statements from Python."""
|
|
35
|
+
imports = []
|
|
36
|
+
# import module
|
|
37
|
+
for m in re.finditer(r'^import\s+(\S+)', source, re.MULTILINE):
|
|
38
|
+
imports.append(m.group(1).split('.')[0])
|
|
39
|
+
# from module import ...
|
|
40
|
+
for m in re.finditer(r'^from\s+(\S+)\s+import', source, re.MULTILINE):
|
|
41
|
+
imports.append(m.group(1).split('.')[0])
|
|
42
|
+
return imports
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def detect_language(file_path):
|
|
46
|
+
"""Detect language from file extension."""
|
|
47
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
48
|
+
lang_map = {
|
|
49
|
+
'.py': 'python', '.js': 'javascript', '.ts': 'typescript',
|
|
50
|
+
'.tsx': 'typescript', '.jsx': 'javascript',
|
|
51
|
+
}
|
|
52
|
+
return lang_map.get(ext, 'unknown')
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def resolve_local_import(module, base_dir, lang):
|
|
56
|
+
"""Resolve a relative/local import to an actual file path."""
|
|
57
|
+
if lang in ('javascript', 'typescript'):
|
|
58
|
+
# Only resolve relative imports
|
|
59
|
+
if not module.startswith('.'):
|
|
60
|
+
return None
|
|
61
|
+
# Try common extensions
|
|
62
|
+
candidates = [
|
|
63
|
+
module,
|
|
64
|
+
module + '.js', module + '.ts', module + '.tsx', module + '.jsx',
|
|
65
|
+
os.path.join(module, 'index.js'), os.path.join(module, 'index.ts'),
|
|
66
|
+
]
|
|
67
|
+
for candidate in candidates:
|
|
68
|
+
full = os.path.normpath(os.path.join(base_dir, candidate))
|
|
69
|
+
if os.path.isfile(full):
|
|
70
|
+
return full
|
|
71
|
+
elif lang == 'python':
|
|
72
|
+
# Only resolve relative imports (starting with .)
|
|
73
|
+
if module.startswith('.'):
|
|
74
|
+
rel = module.lstrip('.')
|
|
75
|
+
candidates = [
|
|
76
|
+
os.path.join(base_dir, rel.replace('.', os.sep) + '.py'),
|
|
77
|
+
os.path.join(base_dir, rel.replace('.', os.sep), '__init__.py'),
|
|
78
|
+
]
|
|
79
|
+
for candidate in candidates:
|
|
80
|
+
if os.path.isfile(candidate):
|
|
81
|
+
return candidate
|
|
82
|
+
# Also check if the module name matches a sibling file
|
|
83
|
+
sibling = os.path.join(base_dir, module + '.py')
|
|
84
|
+
if os.path.isfile(sibling):
|
|
85
|
+
return sibling
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def extract_exports(source, lang):
|
|
90
|
+
"""Extract exported function/class names."""
|
|
91
|
+
exports = []
|
|
92
|
+
if lang in ('javascript', 'typescript'):
|
|
93
|
+
for m in re.finditer(r'export\s+(?:function|class|const|let|var)\s+(\w+)', source):
|
|
94
|
+
exports.append(m.group(1))
|
|
95
|
+
for m in re.finditer(r'module\.exports\s*=', source):
|
|
96
|
+
exports.append('default')
|
|
97
|
+
elif lang == 'python':
|
|
98
|
+
for m in re.finditer(r'^(?:def|class)\s+(\w+)', source, re.MULTILINE):
|
|
99
|
+
exports.append(m.group(1))
|
|
100
|
+
return exports
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def build_import_graph(file_paths):
|
|
104
|
+
"""Build import graph: {file -> [{module, resolved_path, line}]}."""
|
|
105
|
+
graph = {}
|
|
106
|
+
file_set = set(os.path.abspath(f) for f in file_paths)
|
|
107
|
+
|
|
108
|
+
for file_path in file_paths:
|
|
109
|
+
abs_path = os.path.abspath(file_path)
|
|
110
|
+
lang = detect_language(file_path)
|
|
111
|
+
if lang == 'unknown':
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
source = open(file_path, 'r', encoding='utf-8', errors='ignore').read()
|
|
116
|
+
except (OSError, IOError):
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
if lang in ('javascript', 'typescript'):
|
|
120
|
+
modules = extract_js_imports(source)
|
|
121
|
+
elif lang == 'python':
|
|
122
|
+
modules = extract_py_imports(source)
|
|
123
|
+
else:
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
base_dir = os.path.dirname(abs_path)
|
|
127
|
+
edges = []
|
|
128
|
+
for mod in modules:
|
|
129
|
+
resolved = resolve_local_import(mod, base_dir, lang)
|
|
130
|
+
if resolved:
|
|
131
|
+
resolved_abs = os.path.abspath(resolved)
|
|
132
|
+
if resolved_abs in file_set and resolved_abs != abs_path:
|
|
133
|
+
edges.append({
|
|
134
|
+
'module': mod,
|
|
135
|
+
'resolved_path': resolved_abs,
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
graph[abs_path] = edges
|
|
139
|
+
|
|
140
|
+
return graph
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def cross_file_analyze(file_paths):
|
|
144
|
+
"""Run cross-file taint analysis.
|
|
145
|
+
|
|
146
|
+
1. Analyze each file independently
|
|
147
|
+
2. Build import graph
|
|
148
|
+
3. For each file importing from another file with ERROR-severity findings,
|
|
149
|
+
add a cross-file-taint-warning
|
|
150
|
+
"""
|
|
151
|
+
# Analyze each file
|
|
152
|
+
file_findings = {}
|
|
153
|
+
all_findings = []
|
|
154
|
+
|
|
155
|
+
for file_path in file_paths:
|
|
156
|
+
try:
|
|
157
|
+
results = analyze_file(file_path)
|
|
158
|
+
if isinstance(results, list):
|
|
159
|
+
file_findings[os.path.abspath(file_path)] = results
|
|
160
|
+
for finding in results:
|
|
161
|
+
finding['file'] = file_path
|
|
162
|
+
all_findings.extend(results)
|
|
163
|
+
except Exception:
|
|
164
|
+
continue
|
|
165
|
+
|
|
166
|
+
# Build import graph
|
|
167
|
+
graph = build_import_graph(file_paths)
|
|
168
|
+
|
|
169
|
+
# Propagate taint warnings
|
|
170
|
+
cross_file_warnings = []
|
|
171
|
+
for file_path, edges in graph.items():
|
|
172
|
+
for edge in edges:
|
|
173
|
+
imported_path = edge['resolved_path']
|
|
174
|
+
imported_findings = file_findings.get(imported_path, [])
|
|
175
|
+
|
|
176
|
+
# Check for ERROR-severity findings in imported file
|
|
177
|
+
error_findings = [f for f in imported_findings if f.get('severity') == 'error']
|
|
178
|
+
if error_findings:
|
|
179
|
+
warning = {
|
|
180
|
+
'ruleId': 'cross-file-taint-warning',
|
|
181
|
+
'severity': 'warning',
|
|
182
|
+
'message': f"Imports from '{os.path.basename(imported_path)}' which has {len(error_findings)} critical finding(s): {', '.join(set(f.get('ruleId', 'unknown') for f in error_findings))}",
|
|
183
|
+
'file': file_path,
|
|
184
|
+
'line': 0,
|
|
185
|
+
'metadata': {
|
|
186
|
+
'imported_file': imported_path,
|
|
187
|
+
'imported_findings_count': len(error_findings),
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
cross_file_warnings.append(warning)
|
|
191
|
+
|
|
192
|
+
# Combine: per-file findings + cross-file warnings
|
|
193
|
+
combined = all_findings + cross_file_warnings
|
|
194
|
+
return combined
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def main():
|
|
198
|
+
"""CLI entry point. Accepts file paths as arguments, outputs JSON."""
|
|
199
|
+
if len(sys.argv) < 2:
|
|
200
|
+
print(json.dumps({'error': 'Usage: cross_file_analyzer.py file1 file2 ...'}))
|
|
201
|
+
sys.exit(1)
|
|
202
|
+
|
|
203
|
+
file_paths = sys.argv[1:]
|
|
204
|
+
# Filter to existing files
|
|
205
|
+
file_paths = [f for f in file_paths if os.path.isfile(f)]
|
|
206
|
+
|
|
207
|
+
if not file_paths:
|
|
208
|
+
print(json.dumps({'error': 'No valid files provided'}))
|
|
209
|
+
sys.exit(1)
|
|
210
|
+
|
|
211
|
+
results = cross_file_analyze(file_paths)
|
|
212
|
+
print(json.dumps(results))
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
if __name__ == '__main__':
|
|
216
|
+
main()
|