agent-security-scanner-mcp 3.2.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +283 -3
- package/analyzer.py +22 -5
- package/cross_file_analyzer.py +216 -0
- package/index.js +191 -2
- package/package.json +15 -5
- package/pattern_matcher.py +1 -0
- package/regex_fallback.py +199 -1
- package/rules/openclaw.security.yaml +283 -0
- package/scripts/postinstall.js +25 -0
- package/skills/openclaw/SKILL.md +102 -0
- package/skills/security-scan-batch.md +107 -0
- package/skills/security-scanner.md +76 -0
- package/src/cli/init-hooks.js +164 -0
- package/src/cli/init.js +93 -0
- package/src/config.js +181 -0
- package/src/context.js +228 -0
- package/src/dedup.js +129 -0
- package/src/fix-patterns.js +66 -17
- package/src/tools/fix-security.js +31 -4
- package/src/tools/scan-diff.js +151 -0
- package/src/tools/scan-project.js +308 -0
- package/src/tools/scan-prompt.js +71 -1
- package/src/tools/scan-security.js +33 -5
- package/src/utils.js +76 -7
package/README.md
CHANGED
|
@@ -1,17 +1,21 @@
|
|
|
1
1
|
# agent-security-scanner-mcp
|
|
2
2
|
|
|
3
|
-
Security scanner
|
|
3
|
+
Security scanner for AI coding agents and autonomous assistants. Scans code for vulnerabilities, detects hallucinated packages, and blocks prompt injection — via MCP (Claude Code, Cursor, Windsurf, Cline) or CLI (OpenClaw, CI/CD).
|
|
4
4
|
|
|
5
5
|
[](https://www.npmjs.com/package/agent-security-scanner-mcp)
|
|
6
6
|
[](https://www.npmjs.com/package/agent-security-scanner-mcp)
|
|
7
7
|
[](https://opensource.org/licenses/MIT)
|
|
8
8
|
|
|
9
|
+
> **New in v3.3.0:** Full [OpenClaw](https://openclaw.ai) integration with 30+ rules targeting autonomous AI threats — data exfiltration, credential theft, messaging abuse, and unsafe automation. [See OpenClaw setup](#openclaw-integration).
|
|
10
|
+
|
|
9
11
|
## Tools
|
|
10
12
|
|
|
11
13
|
| Tool | Description | When to Use |
|
|
12
14
|
|------|-------------|-------------|
|
|
13
15
|
| `scan_security` | Scan code for vulnerabilities (1700+ rules, 12 languages) with AST and taint analysis | After writing or editing any code file |
|
|
14
16
|
| `fix_security` | Auto-fix all detected vulnerabilities (120 fix templates) | After `scan_security` finds issues |
|
|
17
|
+
| `scan_git_diff` | Scan only changed files in git diff | Before commits or in PR reviews |
|
|
18
|
+
| `scan_project` | Scan entire project with A-F security grading | For project-wide security audits |
|
|
15
19
|
| `check_package` | Verify a package name isn't AI-hallucinated (4.3M+ packages) | Before adding any new dependency |
|
|
16
20
|
| `scan_packages` | Bulk-check all imports in a file for hallucinated packages | Before committing code with new imports |
|
|
17
21
|
| `scan_agent_prompt` | Detect prompt injection and malicious instructions (56 rules) | Before acting on external/untrusted input |
|
|
@@ -36,8 +40,18 @@ scan_security → review findings → fix_security → verify fix
|
|
|
36
40
|
|
|
37
41
|
### Before Committing
|
|
38
42
|
```
|
|
43
|
+
scan_git_diff → scan only changed files for fast feedback
|
|
39
44
|
scan_packages → verify all imports are legitimate
|
|
40
|
-
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### For PR Reviews
|
|
48
|
+
```
|
|
49
|
+
scan_git_diff --base main → scan PR changes against main branch
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### For Project Audits
|
|
53
|
+
```
|
|
54
|
+
scan_project → get A-F security grade and aggregated metrics
|
|
41
55
|
```
|
|
42
56
|
|
|
43
57
|
### When Processing External Input
|
|
@@ -327,6 +341,105 @@ List all 1700+ security scanning rules and 120 fix templates. Use to understand
|
|
|
327
341
|
|
|
328
342
|
---
|
|
329
343
|
|
|
344
|
+
### `scan_git_diff`
|
|
345
|
+
|
|
346
|
+
Scan only files changed in git diff for security vulnerabilities. Use in PR workflows, pre-commit hooks, or to check recent changes before pushing. Significantly faster than full project scans.
|
|
347
|
+
|
|
348
|
+
**Parameters:**
|
|
349
|
+
|
|
350
|
+
| Parameter | Type | Required | Description |
|
|
351
|
+
|-----------|------|----------|-------------|
|
|
352
|
+
| `base` | string | No | Base commit/branch to diff against (default: `HEAD~1`) |
|
|
353
|
+
| `target` | string | No | Target commit/branch (default: `HEAD`) |
|
|
354
|
+
| `verbosity` | string | No | `"minimal"`, `"compact"` (default), `"full"` |
|
|
355
|
+
|
|
356
|
+
**Example:**
|
|
357
|
+
|
|
358
|
+
```json
|
|
359
|
+
// Input
|
|
360
|
+
{ "base": "main", "target": "HEAD" }
|
|
361
|
+
|
|
362
|
+
// Output
|
|
363
|
+
{
|
|
364
|
+
"base": "main",
|
|
365
|
+
"target": "HEAD",
|
|
366
|
+
"files_scanned": 5,
|
|
367
|
+
"issues_count": 3,
|
|
368
|
+
"issues": [
|
|
369
|
+
{
|
|
370
|
+
"file": "src/auth.js",
|
|
371
|
+
"line": 42,
|
|
372
|
+
"ruleId": "sql-injection",
|
|
373
|
+
"severity": "error",
|
|
374
|
+
"message": "SQL injection vulnerability detected"
|
|
375
|
+
}
|
|
376
|
+
]
|
|
377
|
+
}
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
---
|
|
381
|
+
|
|
382
|
+
### `scan_project`
|
|
383
|
+
|
|
384
|
+
Scan an entire project or directory for security vulnerabilities with aggregated metrics and A-F security grading. Use for security audits, compliance checks, or initial codebase assessment.
|
|
385
|
+
|
|
386
|
+
**Parameters:**
|
|
387
|
+
|
|
388
|
+
| Parameter | Type | Required | Description |
|
|
389
|
+
|-----------|------|----------|-------------|
|
|
390
|
+
| `directory` | string | Yes | Path to project directory to scan |
|
|
391
|
+
| `include_patterns` | array | No | Glob patterns to include (e.g., `["**/*.js", "**/*.py"]`) |
|
|
392
|
+
| `exclude_patterns` | array | No | Glob patterns to exclude (default: `node_modules`, `.git`, etc.) |
|
|
393
|
+
| `verbosity` | string | No | `"minimal"`, `"compact"` (default), `"full"` |
|
|
394
|
+
|
|
395
|
+
**Example:**
|
|
396
|
+
|
|
397
|
+
```json
|
|
398
|
+
// Input
|
|
399
|
+
{ "directory": "./src", "verbosity": "compact" }
|
|
400
|
+
|
|
401
|
+
// Output
|
|
402
|
+
{
|
|
403
|
+
"directory": "/path/to/src",
|
|
404
|
+
"files_scanned": 24,
|
|
405
|
+
"issues_count": 12,
|
|
406
|
+
"grade": "C",
|
|
407
|
+
"by_severity": {
|
|
408
|
+
"error": 3,
|
|
409
|
+
"warning": 7,
|
|
410
|
+
"info": 2
|
|
411
|
+
},
|
|
412
|
+
"by_category": {
|
|
413
|
+
"sql-injection": 2,
|
|
414
|
+
"xss": 3,
|
|
415
|
+
"hardcoded-secret": 1,
|
|
416
|
+
"insecure-crypto": 4,
|
|
417
|
+
"command-injection": 2
|
|
418
|
+
},
|
|
419
|
+
"issues": [
|
|
420
|
+
{
|
|
421
|
+
"file": "auth.js",
|
|
422
|
+
"line": 15,
|
|
423
|
+
"ruleId": "sql-injection",
|
|
424
|
+
"severity": "error",
|
|
425
|
+
"message": "SQL injection vulnerability"
|
|
426
|
+
}
|
|
427
|
+
]
|
|
428
|
+
}
|
|
429
|
+
```
|
|
430
|
+
|
|
431
|
+
**Security Grades:**
|
|
432
|
+
|
|
433
|
+
| Grade | Criteria |
|
|
434
|
+
|-------|----------|
|
|
435
|
+
| A | 0 critical/error issues |
|
|
436
|
+
| B | 1-2 error issues, no critical |
|
|
437
|
+
| C | 3-5 error issues |
|
|
438
|
+
| D | 6-10 error issues |
|
|
439
|
+
| F | 11+ error issues or any critical |
|
|
440
|
+
|
|
441
|
+
---
|
|
442
|
+
|
|
330
443
|
## Supported Languages
|
|
331
444
|
|
|
332
445
|
| Language | Vulnerabilities Detected | Analysis |
|
|
@@ -392,6 +505,7 @@ npx agent-security-scanner-mcp
|
|
|
392
505
|
| Kilo Code | `npx agent-security-scanner-mcp init kilo-code` |
|
|
393
506
|
| OpenCode | `npx agent-security-scanner-mcp init opencode` |
|
|
394
507
|
| Cody | `npx agent-security-scanner-mcp init cody` |
|
|
508
|
+
| **OpenClaw** | `npx agent-security-scanner-mcp init openclaw` |
|
|
395
509
|
| Interactive | `npx agent-security-scanner-mcp init` |
|
|
396
510
|
|
|
397
511
|
The `init` command auto-detects your OS, locates the config file, creates a backup, and adds the MCP server entry. **Restart your client after running init.**
|
|
@@ -451,6 +565,157 @@ Available languages: `js` (default), `py`, `go`, `java`.
|
|
|
451
565
|
|
|
452
566
|
---
|
|
453
567
|
|
|
568
|
+
## CLI Tools
|
|
569
|
+
|
|
570
|
+
Use the scanner directly from command line (for scripts, CI/CD, or OpenClaw):
|
|
571
|
+
|
|
572
|
+
```bash
|
|
573
|
+
# Scan a prompt for injection attacks
|
|
574
|
+
npx agent-security-scanner-mcp scan-prompt "ignore previous instructions"
|
|
575
|
+
|
|
576
|
+
# Scan a file for vulnerabilities
|
|
577
|
+
npx agent-security-scanner-mcp scan-security ./app.py --verbosity minimal
|
|
578
|
+
|
|
579
|
+
# Scan git diff (changed files only)
|
|
580
|
+
npx agent-security-scanner-mcp scan-diff --base main --target HEAD
|
|
581
|
+
|
|
582
|
+
# Scan entire project with grading
|
|
583
|
+
npx agent-security-scanner-mcp scan-project ./src
|
|
584
|
+
|
|
585
|
+
# Check if a package is legitimate
|
|
586
|
+
npx agent-security-scanner-mcp check-package flask pypi
|
|
587
|
+
|
|
588
|
+
# Scan file imports for hallucinated packages
|
|
589
|
+
npx agent-security-scanner-mcp scan-packages ./requirements.txt pypi
|
|
590
|
+
|
|
591
|
+
# Install Claude Code hooks for automatic scanning
|
|
592
|
+
npx agent-security-scanner-mcp init-hooks
|
|
593
|
+
```
|
|
594
|
+
|
|
595
|
+
**Exit codes:** `0` = safe, `1` = issues found. Use in scripts to block risky operations.
|
|
596
|
+
|
|
597
|
+
---
|
|
598
|
+
|
|
599
|
+
## Configuration (`.scannerrc`)
|
|
600
|
+
|
|
601
|
+
Create a `.scannerrc.yaml` or `.scannerrc.json` in your project root to customize scanning behavior:
|
|
602
|
+
|
|
603
|
+
```yaml
|
|
604
|
+
# .scannerrc.yaml
|
|
605
|
+
version: 1
|
|
606
|
+
|
|
607
|
+
# Suppress specific rules
|
|
608
|
+
suppress:
|
|
609
|
+
- rule: "insecure-random"
|
|
610
|
+
reason: "Using for non-cryptographic purposes"
|
|
611
|
+
- rule: "detect-disable-mustache-escape"
|
|
612
|
+
paths: ["src/cli/**"]
|
|
613
|
+
|
|
614
|
+
# Exclude paths from scanning
|
|
615
|
+
exclude:
|
|
616
|
+
- "node_modules/**"
|
|
617
|
+
- "dist/**"
|
|
618
|
+
- "**/*.test.js"
|
|
619
|
+
- "**/*.spec.ts"
|
|
620
|
+
|
|
621
|
+
# Minimum severity to report
|
|
622
|
+
severity_threshold: "warning" # "info", "warning", or "error"
|
|
623
|
+
|
|
624
|
+
# Context-aware filtering (enabled by default)
|
|
625
|
+
context_filtering: true
|
|
626
|
+
```
|
|
627
|
+
|
|
628
|
+
**Configuration options:**
|
|
629
|
+
|
|
630
|
+
| Option | Type | Description |
|
|
631
|
+
|--------|------|-------------|
|
|
632
|
+
| `suppress` | array | Rules to suppress, optionally scoped to paths |
|
|
633
|
+
| `exclude` | array | Glob patterns for paths to skip |
|
|
634
|
+
| `severity_threshold` | string | Minimum severity to report (`info`, `warning`, `error`) |
|
|
635
|
+
| `context_filtering` | boolean | Enable/disable safe module filtering (default: `true`) |
|
|
636
|
+
|
|
637
|
+
The scanner automatically loads config from the current directory or any parent directory.
|
|
638
|
+
|
|
639
|
+
---
|
|
640
|
+
|
|
641
|
+
## Claude Code Hooks
|
|
642
|
+
|
|
643
|
+
Automatically scan files after every edit with Claude Code hooks integration.
|
|
644
|
+
|
|
645
|
+
### Install Hooks
|
|
646
|
+
|
|
647
|
+
```bash
|
|
648
|
+
npx agent-security-scanner-mcp init-hooks
|
|
649
|
+
```
|
|
650
|
+
|
|
651
|
+
This installs a `post-tool-use` hook that triggers security scanning after `Write`, `Edit`, or `MultiEdit` operations.
|
|
652
|
+
|
|
653
|
+
### With Prompt Guard
|
|
654
|
+
|
|
655
|
+
```bash
|
|
656
|
+
npx agent-security-scanner-mcp init-hooks --with-prompt-guard
|
|
657
|
+
```
|
|
658
|
+
|
|
659
|
+
Adds a `PreToolUse` hook that scans prompts for injection attacks before executing tools.
|
|
660
|
+
|
|
661
|
+
### What Gets Installed
|
|
662
|
+
|
|
663
|
+
The command adds hooks to `~/.claude/settings.json`:
|
|
664
|
+
|
|
665
|
+
```json
|
|
666
|
+
{
|
|
667
|
+
"hooks": {
|
|
668
|
+
"post-tool-use": [
|
|
669
|
+
{
|
|
670
|
+
"matcher": "Write|Edit|MultiEdit",
|
|
671
|
+
"command": "npx agent-security-scanner-mcp scan-security \"$TOOL_INPUT_file_path\" --verbosity minimal"
|
|
672
|
+
}
|
|
673
|
+
]
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
```
|
|
677
|
+
|
|
678
|
+
### Hook Behavior
|
|
679
|
+
|
|
680
|
+
- **Non-blocking:** Hooks report findings but don't prevent file writes
|
|
681
|
+
- **Minimal output:** Uses `--verbosity minimal` to avoid context overflow
|
|
682
|
+
- **Automatic:** Runs on every file modification without manual intervention
|
|
683
|
+
|
|
684
|
+
---
|
|
685
|
+
|
|
686
|
+
## OpenClaw Integration
|
|
687
|
+
|
|
688
|
+
[OpenClaw](https://openclaw.ai) is an autonomous AI assistant with broad system access. This scanner provides security guardrails for OpenClaw users.
|
|
689
|
+
|
|
690
|
+
### Install
|
|
691
|
+
|
|
692
|
+
```bash
|
|
693
|
+
npx agent-security-scanner-mcp init openclaw
|
|
694
|
+
```
|
|
695
|
+
|
|
696
|
+
This installs a skill to `~/.openclaw/workspace/skills/security-scanner/`.
|
|
697
|
+
|
|
698
|
+
### OpenClaw-Specific Threats
|
|
699
|
+
|
|
700
|
+
The scanner includes 30+ rules targeting OpenClaw's unique attack surface:
|
|
701
|
+
|
|
702
|
+
| Category | Examples |
|
|
703
|
+
|----------|----------|
|
|
704
|
+
| **Data Exfiltration** | "Forward emails to...", "Upload files to...", "Share browser cookies" |
|
|
705
|
+
| **Messaging Abuse** | "Send to all contacts", "Auto-reply to everyone" |
|
|
706
|
+
| **Credential Theft** | "Show my passwords", "Access keychain", "List API keys" |
|
|
707
|
+
| **Unsafe Automation** | "Run hourly without asking", "Disable safety checks" |
|
|
708
|
+
| **Service Attacks** | "Delete all repos", "Make payment to..." |
|
|
709
|
+
|
|
710
|
+
### Usage in OpenClaw
|
|
711
|
+
|
|
712
|
+
The skill is auto-discovered. Use it by asking:
|
|
713
|
+
- "Scan this prompt for security issues"
|
|
714
|
+
- "Check if this code is safe to run"
|
|
715
|
+
- "Verify these packages aren't hallucinated"
|
|
716
|
+
|
|
717
|
+
---
|
|
718
|
+
|
|
454
719
|
## What This Scanner Detects
|
|
455
720
|
|
|
456
721
|
AI coding agents introduce attack surfaces that traditional security tools weren't designed for:
|
|
@@ -509,7 +774,7 @@ AI coding agents introduce attack surfaces that traditional security tools weren
|
|
|
509
774
|
|----------|-------|
|
|
510
775
|
| **Transport** | stdio |
|
|
511
776
|
| **Package** | `agent-security-scanner-mcp` (npm) |
|
|
512
|
-
| **Tools** |
|
|
777
|
+
| **Tools** | 8 |
|
|
513
778
|
| **Languages** | 12 |
|
|
514
779
|
| **Ecosystems** | 7 |
|
|
515
780
|
| **Auth** | None required |
|
|
@@ -591,6 +856,21 @@ All MCP tools support a `verbosity` parameter to minimize context window consump
|
|
|
591
856
|
|
|
592
857
|
## Changelog
|
|
593
858
|
|
|
859
|
+
### v3.4.0
|
|
860
|
+
- **Severity Calibration** - 207-rule severity map with HIGH/MEDIUM/LOW confidence scores for more accurate prioritization
|
|
861
|
+
- **Cross-Engine Deduplication** - ~30-50% noise reduction by deduplicating findings across AST, taint, and regex engines
|
|
862
|
+
- **Context-Aware Filtering** - 80+ known safe modules (logging, testing, sanitizers) reduce false positives
|
|
863
|
+
- **`.scannerrc` Configuration** - YAML/JSON project config for suppressing rules, excluding paths, and setting severity thresholds
|
|
864
|
+
- **`scan_git_diff` Tool** - Scan only changed files in git diff for PR workflows and pre-commit hooks
|
|
865
|
+
- **`scan_project` Tool** - Project-level scanning with A-F security grading and aggregated metrics
|
|
866
|
+
- **`init-hooks` CLI** - `npx agent-security-scanner-mcp init-hooks` installs Claude Code post-tool-use hooks for automatic scanning
|
|
867
|
+
- **Safe Fix Validation** - `validateFix()` ensures auto-fixes don't introduce new vulnerabilities
|
|
868
|
+
- **Cross-File Taint Analysis** - Import graph tracking for dataflow analysis across module boundaries
|
|
869
|
+
|
|
870
|
+
### v3.3.0
|
|
871
|
+
- **OpenClaw Integration** - Full support with 30+ rules targeting autonomous AI threats
|
|
872
|
+
- **OpenClaw-Specific Rules** - Data exfiltration, credential theft, messaging abuse, unsafe automation detection
|
|
873
|
+
|
|
594
874
|
### v3.2.0
|
|
595
875
|
- **Token Optimization** - New `verbosity` parameter for all tools reduces context window usage by up to 98%
|
|
596
876
|
- **Three Verbosity Levels** - `minimal` (~50 tokens), `compact` (~200 tokens, default), `full` (~2,500 tokens)
|
package/analyzer.py
CHANGED
|
@@ -11,6 +11,7 @@ import sys
|
|
|
11
11
|
import json
|
|
12
12
|
import os
|
|
13
13
|
import re
|
|
14
|
+
import argparse
|
|
14
15
|
from typing import List, Dict, Any
|
|
15
16
|
|
|
16
17
|
# Add the directory containing this script to the path
|
|
@@ -91,6 +92,7 @@ def analyze_file_regex(file_path):
|
|
|
91
92
|
'column': match.start() + col_offset,
|
|
92
93
|
'length': match.end() - match.start(),
|
|
93
94
|
'severity': rule['severity'],
|
|
95
|
+
'confidence': rule.get('metadata', {}).get('confidence', 'MEDIUM'),
|
|
94
96
|
'metadata': rule.get('metadata', {}),
|
|
95
97
|
'engine': 'regex'
|
|
96
98
|
})
|
|
@@ -191,6 +193,7 @@ def analyze_file_ast(file_path):
|
|
|
191
193
|
'column': f.column,
|
|
192
194
|
'length': length,
|
|
193
195
|
'severity': f.severity,
|
|
196
|
+
'confidence': f.metadata.get('confidence', getattr(f, 'confidence', 'MEDIUM')),
|
|
194
197
|
'metadata': f.metadata,
|
|
195
198
|
'engine': 'taint' if is_taint else 'ast',
|
|
196
199
|
})
|
|
@@ -229,16 +232,30 @@ def analyze_file(file_path):
|
|
|
229
232
|
|
|
230
233
|
|
|
231
234
|
def main():
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
+
parser = argparse.ArgumentParser(description='Security Analyzer - AST-based with regex fallback')
|
|
236
|
+
parser.add_argument('file_path', help='Path to the file to analyze')
|
|
237
|
+
parser.add_argument('--engine', choices=['auto', 'ast', 'regex'], default='auto',
|
|
238
|
+
help='Analysis engine: auto (default), ast (tree-sitter only), regex (regex only)')
|
|
239
|
+
args = parser.parse_args()
|
|
235
240
|
|
|
236
|
-
file_path =
|
|
241
|
+
file_path = args.file_path
|
|
237
242
|
if not os.path.exists(file_path):
|
|
238
243
|
print(json.dumps({'error': f'File not found: {file_path}'}))
|
|
239
244
|
sys.exit(1)
|
|
240
245
|
|
|
241
|
-
|
|
246
|
+
engine = args.engine
|
|
247
|
+
|
|
248
|
+
if engine == 'regex':
|
|
249
|
+
results = analyze_file_regex(file_path)
|
|
250
|
+
elif engine == 'ast':
|
|
251
|
+
if not HAS_AST_ENGINE:
|
|
252
|
+
print(json.dumps({'error': 'AST engine requested but tree-sitter is not available. Install dependencies: python3 -m pip install -r requirements.txt'}))
|
|
253
|
+
sys.exit(1)
|
|
254
|
+
results = analyze_file_ast(file_path)
|
|
255
|
+
else:
|
|
256
|
+
# auto: use AST if available, otherwise regex
|
|
257
|
+
results = analyze_file(file_path)
|
|
258
|
+
|
|
242
259
|
print(json.dumps(results))
|
|
243
260
|
|
|
244
261
|
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Cross-file taint analysis for security scanning.
|
|
3
|
+
|
|
4
|
+
Builds an import graph across local files, runs per-file analysis,
|
|
5
|
+
and propagates taint warnings when a file imports from another file
|
|
6
|
+
that has ERROR-severity findings.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import re
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
# Import the per-file analyzer
|
|
15
|
+
from analyzer import analyze_file
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def extract_js_imports(source):
|
|
19
|
+
"""Extract import/require statements from JavaScript/TypeScript."""
|
|
20
|
+
imports = []
|
|
21
|
+
# require('...')
|
|
22
|
+
for m in re.finditer(r'''require\s*\(\s*['"]([^'"]+)['"]\s*\)''', source):
|
|
23
|
+
imports.append(m.group(1))
|
|
24
|
+
# import ... from '...'
|
|
25
|
+
for m in re.finditer(r'''from\s+['"]([^'"]+)['"]''', source):
|
|
26
|
+
imports.append(m.group(1))
|
|
27
|
+
# import '...'
|
|
28
|
+
for m in re.finditer(r'''import\s+['"]([^'"]+)['"]''', source):
|
|
29
|
+
imports.append(m.group(1))
|
|
30
|
+
return imports
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def extract_py_imports(source):
|
|
34
|
+
"""Extract import statements from Python."""
|
|
35
|
+
imports = []
|
|
36
|
+
# import module
|
|
37
|
+
for m in re.finditer(r'^import\s+(\S+)', source, re.MULTILINE):
|
|
38
|
+
imports.append(m.group(1).split('.')[0])
|
|
39
|
+
# from module import ...
|
|
40
|
+
for m in re.finditer(r'^from\s+(\S+)\s+import', source, re.MULTILINE):
|
|
41
|
+
imports.append(m.group(1).split('.')[0])
|
|
42
|
+
return imports
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def detect_language(file_path):
|
|
46
|
+
"""Detect language from file extension."""
|
|
47
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
48
|
+
lang_map = {
|
|
49
|
+
'.py': 'python', '.js': 'javascript', '.ts': 'typescript',
|
|
50
|
+
'.tsx': 'typescript', '.jsx': 'javascript',
|
|
51
|
+
}
|
|
52
|
+
return lang_map.get(ext, 'unknown')
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def resolve_local_import(module, base_dir, lang):
|
|
56
|
+
"""Resolve a relative/local import to an actual file path."""
|
|
57
|
+
if lang in ('javascript', 'typescript'):
|
|
58
|
+
# Only resolve relative imports
|
|
59
|
+
if not module.startswith('.'):
|
|
60
|
+
return None
|
|
61
|
+
# Try common extensions
|
|
62
|
+
candidates = [
|
|
63
|
+
module,
|
|
64
|
+
module + '.js', module + '.ts', module + '.tsx', module + '.jsx',
|
|
65
|
+
os.path.join(module, 'index.js'), os.path.join(module, 'index.ts'),
|
|
66
|
+
]
|
|
67
|
+
for candidate in candidates:
|
|
68
|
+
full = os.path.normpath(os.path.join(base_dir, candidate))
|
|
69
|
+
if os.path.isfile(full):
|
|
70
|
+
return full
|
|
71
|
+
elif lang == 'python':
|
|
72
|
+
# Only resolve relative imports (starting with .)
|
|
73
|
+
if module.startswith('.'):
|
|
74
|
+
rel = module.lstrip('.')
|
|
75
|
+
candidates = [
|
|
76
|
+
os.path.join(base_dir, rel.replace('.', os.sep) + '.py'),
|
|
77
|
+
os.path.join(base_dir, rel.replace('.', os.sep), '__init__.py'),
|
|
78
|
+
]
|
|
79
|
+
for candidate in candidates:
|
|
80
|
+
if os.path.isfile(candidate):
|
|
81
|
+
return candidate
|
|
82
|
+
# Also check if the module name matches a sibling file
|
|
83
|
+
sibling = os.path.join(base_dir, module + '.py')
|
|
84
|
+
if os.path.isfile(sibling):
|
|
85
|
+
return sibling
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def extract_exports(source, lang):
|
|
90
|
+
"""Extract exported function/class names."""
|
|
91
|
+
exports = []
|
|
92
|
+
if lang in ('javascript', 'typescript'):
|
|
93
|
+
for m in re.finditer(r'export\s+(?:function|class|const|let|var)\s+(\w+)', source):
|
|
94
|
+
exports.append(m.group(1))
|
|
95
|
+
for m in re.finditer(r'module\.exports\s*=', source):
|
|
96
|
+
exports.append('default')
|
|
97
|
+
elif lang == 'python':
|
|
98
|
+
for m in re.finditer(r'^(?:def|class)\s+(\w+)', source, re.MULTILINE):
|
|
99
|
+
exports.append(m.group(1))
|
|
100
|
+
return exports
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def build_import_graph(file_paths):
|
|
104
|
+
"""Build import graph: {file -> [{module, resolved_path, line}]}."""
|
|
105
|
+
graph = {}
|
|
106
|
+
file_set = set(os.path.abspath(f) for f in file_paths)
|
|
107
|
+
|
|
108
|
+
for file_path in file_paths:
|
|
109
|
+
abs_path = os.path.abspath(file_path)
|
|
110
|
+
lang = detect_language(file_path)
|
|
111
|
+
if lang == 'unknown':
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
source = open(file_path, 'r', encoding='utf-8', errors='ignore').read()
|
|
116
|
+
except (OSError, IOError):
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
if lang in ('javascript', 'typescript'):
|
|
120
|
+
modules = extract_js_imports(source)
|
|
121
|
+
elif lang == 'python':
|
|
122
|
+
modules = extract_py_imports(source)
|
|
123
|
+
else:
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
base_dir = os.path.dirname(abs_path)
|
|
127
|
+
edges = []
|
|
128
|
+
for mod in modules:
|
|
129
|
+
resolved = resolve_local_import(mod, base_dir, lang)
|
|
130
|
+
if resolved:
|
|
131
|
+
resolved_abs = os.path.abspath(resolved)
|
|
132
|
+
if resolved_abs in file_set and resolved_abs != abs_path:
|
|
133
|
+
edges.append({
|
|
134
|
+
'module': mod,
|
|
135
|
+
'resolved_path': resolved_abs,
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
graph[abs_path] = edges
|
|
139
|
+
|
|
140
|
+
return graph
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def cross_file_analyze(file_paths):
|
|
144
|
+
"""Run cross-file taint analysis.
|
|
145
|
+
|
|
146
|
+
1. Analyze each file independently
|
|
147
|
+
2. Build import graph
|
|
148
|
+
3. For each file importing from another file with ERROR-severity findings,
|
|
149
|
+
add a cross-file-taint-warning
|
|
150
|
+
"""
|
|
151
|
+
# Analyze each file
|
|
152
|
+
file_findings = {}
|
|
153
|
+
all_findings = []
|
|
154
|
+
|
|
155
|
+
for file_path in file_paths:
|
|
156
|
+
try:
|
|
157
|
+
results = analyze_file(file_path)
|
|
158
|
+
if isinstance(results, list):
|
|
159
|
+
file_findings[os.path.abspath(file_path)] = results
|
|
160
|
+
for finding in results:
|
|
161
|
+
finding['file'] = file_path
|
|
162
|
+
all_findings.extend(results)
|
|
163
|
+
except Exception:
|
|
164
|
+
continue
|
|
165
|
+
|
|
166
|
+
# Build import graph
|
|
167
|
+
graph = build_import_graph(file_paths)
|
|
168
|
+
|
|
169
|
+
# Propagate taint warnings
|
|
170
|
+
cross_file_warnings = []
|
|
171
|
+
for file_path, edges in graph.items():
|
|
172
|
+
for edge in edges:
|
|
173
|
+
imported_path = edge['resolved_path']
|
|
174
|
+
imported_findings = file_findings.get(imported_path, [])
|
|
175
|
+
|
|
176
|
+
# Check for ERROR-severity findings in imported file
|
|
177
|
+
error_findings = [f for f in imported_findings if f.get('severity') == 'error']
|
|
178
|
+
if error_findings:
|
|
179
|
+
warning = {
|
|
180
|
+
'ruleId': 'cross-file-taint-warning',
|
|
181
|
+
'severity': 'warning',
|
|
182
|
+
'message': f"Imports from '{os.path.basename(imported_path)}' which has {len(error_findings)} critical finding(s): {', '.join(set(f.get('ruleId', 'unknown') for f in error_findings))}",
|
|
183
|
+
'file': file_path,
|
|
184
|
+
'line': 0,
|
|
185
|
+
'metadata': {
|
|
186
|
+
'imported_file': imported_path,
|
|
187
|
+
'imported_findings_count': len(error_findings),
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
cross_file_warnings.append(warning)
|
|
191
|
+
|
|
192
|
+
# Combine: per-file findings + cross-file warnings
|
|
193
|
+
combined = all_findings + cross_file_warnings
|
|
194
|
+
return combined
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def main():
|
|
198
|
+
"""CLI entry point. Accepts file paths as arguments, outputs JSON."""
|
|
199
|
+
if len(sys.argv) < 2:
|
|
200
|
+
print(json.dumps({'error': 'Usage: cross_file_analyzer.py file1 file2 ...'}))
|
|
201
|
+
sys.exit(1)
|
|
202
|
+
|
|
203
|
+
file_paths = sys.argv[1:]
|
|
204
|
+
# Filter to existing files
|
|
205
|
+
file_paths = [f for f in file_paths if os.path.isfile(f)]
|
|
206
|
+
|
|
207
|
+
if not file_paths:
|
|
208
|
+
print(json.dumps({'error': 'No valid files provided'}))
|
|
209
|
+
sys.exit(1)
|
|
210
|
+
|
|
211
|
+
results = cross_file_analyze(file_paths)
|
|
212
|
+
print(json.dumps(results))
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
if __name__ == '__main__':
|
|
216
|
+
main()
|