redteam-mcp 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- redteam_mcp-0.2.0/.gitignore +20 -0
- redteam_mcp-0.2.0/LICENSE +21 -0
- redteam_mcp-0.2.0/PKG-INFO +238 -0
- redteam_mcp-0.2.0/README.md +206 -0
- redteam_mcp-0.2.0/mcp_redteam/__init__.py +2 -0
- redteam_mcp-0.2.0/mcp_redteam/cli.py +132 -0
- redteam_mcp-0.2.0/mcp_redteam/engine/__init__.py +0 -0
- redteam_mcp-0.2.0/mcp_redteam/engine/config_scanner.py +551 -0
- redteam_mcp-0.2.0/mcp_redteam/engine/semgrep_runner.py +182 -0
- redteam_mcp-0.2.0/mcp_redteam/formatters/__init__.py +13 -0
- redteam_mcp-0.2.0/mcp_redteam/formatters/json_fmt.py +15 -0
- redteam_mcp-0.2.0/mcp_redteam/formatters/sarif.py +208 -0
- redteam_mcp-0.2.0/mcp_redteam/formatters/terminal.py +113 -0
- redteam_mcp-0.2.0/mcp_redteam/llm/__init__.py +0 -0
- redteam_mcp-0.2.0/mcp_redteam/llm/analyzer.py +252 -0
- redteam_mcp-0.2.0/mcp_redteam/models.py +156 -0
- redteam_mcp-0.2.0/pyproject.toml +52 -0
- redteam_mcp-0.2.0/rules/javascript/mcp-js-command-injection.yaml +79 -0
- redteam_mcp-0.2.0/rules/javascript/mcp-js-eval.yaml +68 -0
- redteam_mcp-0.2.0/rules/javascript/mcp-js-path-traversal.yaml +76 -0
- redteam_mcp-0.2.0/rules/javascript/mcp-js-secrets.yaml +88 -0
- redteam_mcp-0.2.0/rules/javascript/mcp-js-ssrf.yaml +85 -0
- redteam_mcp-0.2.0/rules/javascript/mcp-js-stdout.yaml +29 -0
- redteam_mcp-0.2.0/rules/python/mcp-credential-in-code.yaml +38 -0
- redteam_mcp-0.2.0/rules/python/mcp-credential-in-response.yaml +36 -0
- redteam_mcp-0.2.0/rules/python/mcp-eval-injection.yaml +49 -0
- redteam_mcp-0.2.0/rules/python/mcp-missing-error-handling.yaml +89 -0
- redteam_mcp-0.2.0/rules/python/mcp-path-traversal.yaml +48 -0
- redteam_mcp-0.2.0/rules/python/mcp-shell-injection.yaml +59 -0
- redteam_mcp-0.2.0/rules/python/mcp-ssrf.yaml +62 -0
- redteam_mcp-0.2.0/rules/python/mcp-stdout-pollution.yaml +16 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
reports/*.html
|
|
2
|
+
!examples/sample-report.html
|
|
3
|
+
.DS_Store
|
|
4
|
+
docs/research-synthesis.md
|
|
5
|
+
docs/playbook-gaps.md
|
|
6
|
+
docs/agent-architecture-draft.md
|
|
7
|
+
docs/research-*.md
|
|
8
|
+
docs/cto-analysis.md
|
|
9
|
+
docs/documentation-audit.md
|
|
10
|
+
docs/competitive-analysis.md
|
|
11
|
+
docs/overnight-notes.md
|
|
12
|
+
docs/playbook-gaps.md
|
|
13
|
+
docs/agent-architecture-draft.md
|
|
14
|
+
__pycache__/
|
|
15
|
+
*.pyc
|
|
16
|
+
.venv/
|
|
17
|
+
dist/
|
|
18
|
+
build/
|
|
19
|
+
*.egg-info/
|
|
20
|
+
.pytest_cache/
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 m0rvayne
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: redteam-mcp
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: MCP server security auditor — deterministic engine + AI-native behavioral analysis
|
|
5
|
+
Project-URL: Homepage, https://github.com/m0rvayne/mcp-redteam
|
|
6
|
+
Project-URL: Repository, https://github.com/m0rvayne/mcp-redteam
|
|
7
|
+
Author-email: m0rvayne <m0rvayne@proton.me>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: audit,mcp,pentest,red-team,security
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Security
|
|
20
|
+
Classifier: Typing :: Typed
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Requires-Dist: pydantic>=2.0
|
|
23
|
+
Requires-Dist: rich>=13.0
|
|
24
|
+
Requires-Dist: typer>=0.12
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: hypothesis>=6.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
28
|
+
Provides-Extra: llm
|
|
29
|
+
Requires-Dist: anthropic>=0.40; extra == 'llm'
|
|
30
|
+
Requires-Dist: instructor>=1.0; extra == 'llm'
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
|
|
33
|
+
<div align="center">
|
|
34
|
+
|
|
35
|
+
<img src="assets/logo.svg" alt="mcp-redteam" width="700">
|
|
36
|
+
|
|
37
|
+
**It doesn't tell you where your walls are thin. It walks through them.**
|
|
38
|
+
|
|
39
|
+
[](https://github.com/m0rvayne/mcp-redteam/actions/workflows/test.yml)
|
|
40
|
+
[](LICENSE)
|
|
41
|
+
[](https://owasp.org/www-project-mcp-top-10/)
|
|
42
|
+
[](https://claude.ai/code)
|
|
43
|
+
|
|
44
|
+
</div>
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
MCP server security auditor. Two modes of operation:
|
|
49
|
+
|
|
50
|
+
- **Claude Code plugin** — AI-native deep audit. Reads source code semantically, probes tools, detects behavioral mismatches, maps cross-server attack chains. Interactive HTML report.
|
|
51
|
+
- **Standalone CLI** — deterministic scan. 14 Semgrep rules, config health checks, SARIF output. Works in CI/CD without Claude.
|
|
52
|
+
|
|
53
|
+
## What works today
|
|
54
|
+
|
|
55
|
+
| Feature | Status | How |
|
|
56
|
+
|---------|--------|-----|
|
|
57
|
+
| Config health scanner | Working | Dead servers, scope conflicts, credential exposure, supply chain, CVE checks |
|
|
58
|
+
| Semgrep code analysis | Working | 14 rules (Python + JS/TS): injection, traversal, SSRF, eval, secrets, stdout |
|
|
59
|
+
| SARIF output | Working | GitHub Security tab integration |
|
|
60
|
+
| JSON output | Working | Machine-readable for CI/CD |
|
|
61
|
+
| Terminal output | Working | Rich colored tables with risk scores |
|
|
62
|
+
| CI exit codes | Working | `--fail-on critical` returns exit 1 |
|
|
63
|
+
| LLM behavioral analysis | Working | Anthropic SDK, behavioral mismatch detection (optional) |
|
|
64
|
+
| Audit history | Working | JSONL log, cross-run comparison (new/confirmed/fixed) |
|
|
65
|
+
| Self-security audit | Working | 10 vulnerabilities found and fixed in own code |
|
|
66
|
+
| Claude Code plugin | Working | AI-driven deep audit with HTML report |
|
|
67
|
+
| 75+ tests | Passing | Unit, security, stress, edge cases, Hypothesis fuzzing |
|
|
68
|
+
|
|
69
|
+
## What doesn't work yet
|
|
70
|
+
|
|
71
|
+
- Cross-server chain detection in CLI (exists in Claude Code plugin only)
|
|
72
|
+
- Auto-fix in CLI (exists in Claude Code plugin only)
|
|
73
|
+
- HTML report generation in CLI
|
|
74
|
+
- MCPTox benchmark validation
|
|
75
|
+
- Community rule contributions
|
|
76
|
+
|
|
77
|
+
## Install
|
|
78
|
+
|
|
79
|
+
**Claude Code plugin** (deep AI-native audit):
|
|
80
|
+
```bash
|
|
81
|
+
claude plugin marketplace add m0rvayne/mcp-redteam
|
|
82
|
+
claude plugin install mcp-redteam
|
|
83
|
+
/mcp-redteam
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
**Standalone CLI** (deterministic, CI/CD ready):
|
|
87
|
+
```bash
|
|
88
|
+
pip install mcp-redteam
|
|
89
|
+
mcp-redteam scan ./your-mcp-server --no-llm
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Requires Python 3.10+. Semgrep installed separately for code analysis: `pip install semgrep`.
|
|
93
|
+
|
|
94
|
+
## What it checks
|
|
95
|
+
|
|
96
|
+
### Config Health (deterministic)
|
|
97
|
+
|
|
98
|
+
Dead/disconnected servers, scope conflicts (same server in multiple scopes), credentials in git-tracked config files (CVE-2025-59536), unpinned npx/uvx packages (supply chain), enableAllProjectMcpServers bypass (CVE-2026-21852), orphaned MCP processes.
|
|
99
|
+
|
|
100
|
+
### Code Security (Semgrep, 14 rules)
|
|
101
|
+
|
|
102
|
+
| Rule | What it detects | Languages |
|
|
103
|
+
|------|----------------|-----------|
|
|
104
|
+
| Shell injection | subprocess + shell=True with user input | Python |
|
|
105
|
+
| Path traversal | open()/Path() without realpath check | Python, JS/TS |
|
|
106
|
+
| SSRF | HTTP requests with user-controlled URL | Python, JS/TS |
|
|
107
|
+
| Eval injection | eval()/exec()/new Function() with user input | Python, JS/TS |
|
|
108
|
+
| Hardcoded secrets | API keys, tokens, passwords in source | Python, JS/TS |
|
|
109
|
+
| Stdout pollution | print()/console.log() in MCP handlers | Python, JS/TS |
|
|
110
|
+
| Missing error handling | Tool functions without try/except | Python |
|
|
111
|
+
| Credential in response | API keys/tokens in tool return values | Python |
|
|
112
|
+
|
|
113
|
+
Based on 48+ CVEs, OWASP MCP Top 10, and research from Invariant Labs, Trail of Bits, Palo Alto Unit 42, OX Security, and Snyk.
|
|
114
|
+
|
|
115
|
+
### LLM Behavioral Analysis (optional, requires API key)
|
|
116
|
+
|
|
117
|
+
- **Behavioral mismatch**: tool description claims X, code does Y
|
|
118
|
+
- **Hidden operations**: undeclared network requests, file writes, subprocess calls
|
|
119
|
+
- **Credential mishandling**: secrets logged, leaked in errors, stored insecurely
|
|
120
|
+
|
|
121
|
+
## How it compares
|
|
122
|
+
|
|
123
|
+
| | mcp-scan (Snyk) | Cisco MCP Scanner | **mcp-redteam** |
|
|
124
|
+
|---|---|---|---|
|
|
125
|
+
| Approach | Static description scan | YARA + LLM-as-judge | **Semgrep taint + LLM behavioral** |
|
|
126
|
+
| Reads source code | No | Python only | **Yes — Python + JS/TS** |
|
|
127
|
+
| Config validation | No | Config discovery | **Yes — 6 checks, CVE detection** |
|
|
128
|
+
| Behavioral mismatch | No | No | **Yes (LLM layer)** |
|
|
129
|
+
| SARIF output | No | No | **Yes** |
|
|
130
|
+
| CI exit codes | Yes | No | **Yes** |
|
|
131
|
+
| Self-tested | Unknown | Unknown | **75+ tests, self-security audit** |
|
|
132
|
+
| Cloud dependency | Snyk API required | Cisco API (optional) | **No — fully local** |
|
|
133
|
+
|
|
134
|
+
### Why not just use mcp-scan?
|
|
135
|
+
|
|
136
|
+
mcp-scan reads what a server **says about itself** — tool descriptions. mcp-redteam checks what a server **actually does** — source code analysis + behavioral analysis.
|
|
137
|
+
|
|
138
|
+
A server with clean descriptions but leaky code: mcp-scan passes it. We catch it.
|
|
139
|
+
|
|
140
|
+
Real findings mcp-scan cannot detect (they live in code, not descriptions):
|
|
141
|
+
- Trello API keys in `.env` committed to git
|
|
142
|
+
- Instagram session cookies stored in plaintext
|
|
143
|
+
- AppleScript injection via unescaped clipboard input
|
|
144
|
+
- Google OAuth tokens with permissions `644`
|
|
145
|
+
|
|
146
|
+
## Audit History
|
|
147
|
+
|
|
148
|
+
Each audit saves a compact JSONL log to `~/Desktop/redteam-results/`. On the next run, mcp-redteam reads previous results and compares:
|
|
149
|
+
|
|
150
|
+
- **confirmed** — found again, higher confidence
|
|
151
|
+
- **new** — first time seeing this
|
|
152
|
+
- **fixed** — was in previous audit, now gone
|
|
153
|
+
|
|
154
|
+
This turns LLM non-determinism into an advantage: each run is a new perspective, the intersection is ground truth.
|
|
155
|
+
|
|
156
|
+
## Architecture
|
|
157
|
+
|
|
158
|
+
```
|
|
159
|
+
/mcp-redteam
|
|
160
|
+
|
|
|
161
|
+
+-----------------+
|
|
162
|
+
| Phase 0: Config |
|
|
163
|
+
+-----------------+
|
|
164
|
+
|
|
|
165
|
+
+-----------+
|
|
166
|
+
| Discovery |
|
|
167
|
+
+-----------+
|
|
168
|
+
|
|
|
169
|
+
| 1 server = 1 agent
|
|
170
|
+
|
|
|
171
|
+
+----------+ +----------+ +----------+ +----------+
|
|
172
|
+
| Agent-01 | | Agent-02 | | Agent-03 | | Agent-N |
|
|
173
|
+
| youtube | | trello | | instagram| | server-N |
|
|
174
|
+
| health | | health | | health | | health |
|
|
175
|
+
| arch | | arch | | arch | | arch |
|
|
176
|
+
| complete | | complete | | complete | | complete |
|
|
177
|
+
| security | | security | | security | | security |
|
|
178
|
+
+----+-----+ +----+-----+ +----+-----+ +----+-----+
|
|
179
|
+
| | | |
|
|
180
|
+
+------+-----+-----+------+
|
|
181
|
+
|
|
|
182
|
+
+-------------------------+
|
|
183
|
+
| Chain analysis + report |
|
|
184
|
+
+-------------------------+
|
|
185
|
+
|
|
|
186
|
+
+----------------+
|
|
187
|
+
| HTML + Fix |
|
|
188
|
+
+----------------+
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## Tests
|
|
192
|
+
|
|
193
|
+
75+ tests across 6 test files:
|
|
194
|
+
|
|
195
|
+
- **test_semgrep.py** — each vulnerable fixture detected, each benign fixture clean
|
|
196
|
+
- **test_self_security.py** — 21 tests: our own code audited for vulnerabilities
|
|
197
|
+
- **test_stress.py** — 1000/10000 findings, concurrent scans, unicode
|
|
198
|
+
- **test_fuzzing.py** — Hypothesis property-based: any input, no crash
|
|
199
|
+
- **test_edge_cases.py** — corrupt JSON, missing files, null bytes, timeouts
|
|
200
|
+
- **test_models.py** + **test_formatters.py** — unit tests for core logic
|
|
201
|
+
|
|
202
|
+
## Current Limitations
|
|
203
|
+
|
|
204
|
+
- Plugin requires Claude Code with connected MCP servers
|
|
205
|
+
- CLI requires semgrep for code analysis (graceful skip if not installed)
|
|
206
|
+
- LLM analysis requires ANTHROPIC_API_KEY
|
|
207
|
+
- Destructive tests intentionally skipped — read-only probing only
|
|
208
|
+
- Source code analysis works for local servers; pip/npm packages may have limited access
|
|
209
|
+
- Plugin report quality scales with model capability (Opus > Sonnet > Haiku)
|
|
210
|
+
- False positive rate not yet measured on production MCP servers
|
|
211
|
+
|
|
212
|
+
### Known False Positive Patterns
|
|
213
|
+
|
|
214
|
+
- SSRF rule triggers on `httpx.get()` with URL built from config, not user input
|
|
215
|
+
- Path traversal rule triggers on `open()` where path is validated but validation isn't recognized as sanitizer
|
|
216
|
+
- Stdout pollution flags `print()` in `__main__` block (safe, not in MCP handler)
|
|
217
|
+
|
|
218
|
+
## Docs
|
|
219
|
+
|
|
220
|
+
The `docs/` folder is useful independently:
|
|
221
|
+
|
|
222
|
+
- **[attack-playbook.md](docs/attack-playbook.md)** — 18 attack categories, 48+ CVEs, payloads and detection methods
|
|
223
|
+
- **[best-practices.md](docs/best-practices.md)** — MCP server security checklist
|
|
224
|
+
- **[reference-server.md](docs/reference-server.md)** — secure server templates (Python + Node.js)
|
|
225
|
+
|
|
226
|
+
## References
|
|
227
|
+
|
|
228
|
+
- [OWASP MCP Top 10](https://owasp.org/www-project-mcp-top-10/)
|
|
229
|
+
- [Invariant Labs — Tool Poisoning Attacks](https://invariantlabs.ai/blog/mcp-security-notification-tool-poisoning-attacks)
|
|
230
|
+
- [Trail of Bits — MCP Security Layer](https://blog.trailofbits.com/2025/07/28/we-built-the-security-layer-mcp-always-needed/)
|
|
231
|
+
- [Palo Alto Unit 42 — MCP Attack Vectors](https://unit42.paloaltonetworks.com/model-context-protocol-attack-vectors/)
|
|
232
|
+
- [OX Security — STDIO Design Flaw](https://www.ox.security/blog/the-mother-of-all-ai-supply-chains-critical-systemic-vulnerability-at-the-core-of-the-mcp/)
|
|
233
|
+
- [NSA — MCP Security Guidance](https://www.nsa.gov/Portals/75/documents/Cybersecurity/CSI_MCP_SECURITY.pdf)
|
|
234
|
+
- [Vulnerable MCP Project](https://vulnerablemcp.info/)
|
|
235
|
+
|
|
236
|
+
## License
|
|
237
|
+
|
|
238
|
+
[MIT](LICENSE)
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
<img src="assets/logo.svg" alt="mcp-redteam" width="700">
|
|
4
|
+
|
|
5
|
+
**It doesn't tell you where your walls are thin. It walks through them.**
|
|
6
|
+
|
|
7
|
+
[](https://github.com/m0rvayne/mcp-redteam/actions/workflows/test.yml)
|
|
8
|
+
[](LICENSE)
|
|
9
|
+
[](https://owasp.org/www-project-mcp-top-10/)
|
|
10
|
+
[](https://claude.ai/code)
|
|
11
|
+
|
|
12
|
+
</div>
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
MCP server security auditor. Two modes of operation:
|
|
17
|
+
|
|
18
|
+
- **Claude Code plugin** — AI-native deep audit. Reads source code semantically, probes tools, detects behavioral mismatches, maps cross-server attack chains. Interactive HTML report.
|
|
19
|
+
- **Standalone CLI** — deterministic scan. 14 Semgrep rules, config health checks, SARIF output. Works in CI/CD without Claude.
|
|
20
|
+
|
|
21
|
+
## What works today
|
|
22
|
+
|
|
23
|
+
| Feature | Status | How |
|
|
24
|
+
|---------|--------|-----|
|
|
25
|
+
| Config health scanner | Working | Dead servers, scope conflicts, credential exposure, supply chain, CVE checks |
|
|
26
|
+
| Semgrep code analysis | Working | 14 rules (Python + JS/TS): injection, traversal, SSRF, eval, secrets, stdout |
|
|
27
|
+
| SARIF output | Working | GitHub Security tab integration |
|
|
28
|
+
| JSON output | Working | Machine-readable for CI/CD |
|
|
29
|
+
| Terminal output | Working | Rich colored tables with risk scores |
|
|
30
|
+
| CI exit codes | Working | `--fail-on critical` returns exit 1 |
|
|
31
|
+
| LLM behavioral analysis | Working | Anthropic SDK, behavioral mismatch detection (optional) |
|
|
32
|
+
| Audit history | Working | JSONL log, cross-run comparison (new/confirmed/fixed) |
|
|
33
|
+
| Self-security audit | Working | 10 vulnerabilities found and fixed in own code |
|
|
34
|
+
| Claude Code plugin | Working | AI-driven deep audit with HTML report |
|
|
35
|
+
| 75+ tests | Passing | Unit, security, stress, edge cases, Hypothesis fuzzing |
|
|
36
|
+
|
|
37
|
+
## What doesn't work yet
|
|
38
|
+
|
|
39
|
+
- Cross-server chain detection in CLI (exists in Claude Code plugin only)
|
|
40
|
+
- Auto-fix in CLI (exists in Claude Code plugin only)
|
|
41
|
+
- HTML report generation in CLI
|
|
42
|
+
- MCPTox benchmark validation
|
|
43
|
+
- Community rule contributions
|
|
44
|
+
|
|
45
|
+
## Install
|
|
46
|
+
|
|
47
|
+
**Claude Code plugin** (deep AI-native audit):
|
|
48
|
+
```bash
|
|
49
|
+
claude plugin marketplace add m0rvayne/mcp-redteam
|
|
50
|
+
claude plugin install mcp-redteam
|
|
51
|
+
/mcp-redteam
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
**Standalone CLI** (deterministic, CI/CD ready):
|
|
55
|
+
```bash
|
|
56
|
+
pip install mcp-redteam
|
|
57
|
+
mcp-redteam scan ./your-mcp-server --no-llm
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Requires Python 3.10+. Semgrep installed separately for code analysis: `pip install semgrep`.
|
|
61
|
+
|
|
62
|
+
## What it checks
|
|
63
|
+
|
|
64
|
+
### Config Health (deterministic)
|
|
65
|
+
|
|
66
|
+
Dead/disconnected servers, scope conflicts (same server in multiple scopes), credentials in git-tracked config files (CVE-2025-59536), unpinned npx/uvx packages (supply chain), enableAllProjectMcpServers bypass (CVE-2026-21852), orphaned MCP processes.
|
|
67
|
+
|
|
68
|
+
### Code Security (Semgrep, 14 rules)
|
|
69
|
+
|
|
70
|
+
| Rule | What it detects | Languages |
|
|
71
|
+
|------|----------------|-----------|
|
|
72
|
+
| Shell injection | subprocess + shell=True with user input | Python |
|
|
73
|
+
| Path traversal | open()/Path() without realpath check | Python, JS/TS |
|
|
74
|
+
| SSRF | HTTP requests with user-controlled URL | Python, JS/TS |
|
|
75
|
+
| Eval injection | eval()/exec()/new Function() with user input | Python, JS/TS |
|
|
76
|
+
| Hardcoded secrets | API keys, tokens, passwords in source | Python, JS/TS |
|
|
77
|
+
| Stdout pollution | print()/console.log() in MCP handlers | Python, JS/TS |
|
|
78
|
+
| Missing error handling | Tool functions without try/except | Python |
|
|
79
|
+
| Credential in response | API keys/tokens in tool return values | Python |
|
|
80
|
+
|
|
81
|
+
Based on 48+ CVEs, OWASP MCP Top 10, and research from Invariant Labs, Trail of Bits, Palo Alto Unit 42, OX Security, and Snyk.
|
|
82
|
+
|
|
83
|
+
### LLM Behavioral Analysis (optional, requires API key)
|
|
84
|
+
|
|
85
|
+
- **Behavioral mismatch**: tool description claims X, code does Y
|
|
86
|
+
- **Hidden operations**: undeclared network requests, file writes, subprocess calls
|
|
87
|
+
- **Credential mishandling**: secrets logged, leaked in errors, stored insecurely
|
|
88
|
+
|
|
89
|
+
## How it compares
|
|
90
|
+
|
|
91
|
+
| | mcp-scan (Snyk) | Cisco MCP Scanner | **mcp-redteam** |
|
|
92
|
+
|---|---|---|---|
|
|
93
|
+
| Approach | Static description scan | YARA + LLM-as-judge | **Semgrep taint + LLM behavioral** |
|
|
94
|
+
| Reads source code | No | Python only | **Yes — Python + JS/TS** |
|
|
95
|
+
| Config validation | No | Config discovery | **Yes — 6 checks, CVE detection** |
|
|
96
|
+
| Behavioral mismatch | No | No | **Yes (LLM layer)** |
|
|
97
|
+
| SARIF output | No | No | **Yes** |
|
|
98
|
+
| CI exit codes | Yes | No | **Yes** |
|
|
99
|
+
| Self-tested | Unknown | Unknown | **75+ tests, self-security audit** |
|
|
100
|
+
| Cloud dependency | Snyk API required | Cisco API (optional) | **No — fully local** |
|
|
101
|
+
|
|
102
|
+
### Why not just use mcp-scan?
|
|
103
|
+
|
|
104
|
+
mcp-scan reads what a server **says about itself** — tool descriptions. mcp-redteam checks what a server **actually does** — source code analysis + behavioral analysis.
|
|
105
|
+
|
|
106
|
+
A server with clean descriptions but leaky code: mcp-scan passes it. We catch it.
|
|
107
|
+
|
|
108
|
+
Real findings mcp-scan cannot detect (they live in code, not descriptions):
|
|
109
|
+
- Trello API keys in `.env` committed to git
|
|
110
|
+
- Instagram session cookies stored in plaintext
|
|
111
|
+
- AppleScript injection via unescaped clipboard input
|
|
112
|
+
- Google OAuth tokens with permissions `644`
|
|
113
|
+
|
|
114
|
+
## Audit History
|
|
115
|
+
|
|
116
|
+
Each audit saves a compact JSONL log to `~/Desktop/redteam-results/`. On the next run, mcp-redteam reads previous results and compares:
|
|
117
|
+
|
|
118
|
+
- **confirmed** — found again, higher confidence
|
|
119
|
+
- **new** — first time seeing this
|
|
120
|
+
- **fixed** — was in previous audit, now gone
|
|
121
|
+
|
|
122
|
+
This turns LLM non-determinism into an advantage: each run is a new perspective, the intersection is ground truth.
|
|
123
|
+
|
|
124
|
+
## Architecture
|
|
125
|
+
|
|
126
|
+
```
|
|
127
|
+
/mcp-redteam
|
|
128
|
+
|
|
|
129
|
+
+-----------------+
|
|
130
|
+
| Phase 0: Config |
|
|
131
|
+
+-----------------+
|
|
132
|
+
|
|
|
133
|
+
+-----------+
|
|
134
|
+
| Discovery |
|
|
135
|
+
+-----------+
|
|
136
|
+
|
|
|
137
|
+
| 1 server = 1 agent
|
|
138
|
+
|
|
|
139
|
+
+----------+ +----------+ +----------+ +----------+
|
|
140
|
+
| Agent-01 | | Agent-02 | | Agent-03 | | Agent-N |
|
|
141
|
+
| youtube | | trello | | instagram| | server-N |
|
|
142
|
+
| health | | health | | health | | health |
|
|
143
|
+
| arch | | arch | | arch | | arch |
|
|
144
|
+
| complete | | complete | | complete | | complete |
|
|
145
|
+
| security | | security | | security | | security |
|
|
146
|
+
+----+-----+ +----+-----+ +----+-----+ +----+-----+
|
|
147
|
+
| | | |
|
|
148
|
+
+------+-----+-----+------+
|
|
149
|
+
|
|
|
150
|
+
+-------------------------+
|
|
151
|
+
| Chain analysis + report |
|
|
152
|
+
+-------------------------+
|
|
153
|
+
|
|
|
154
|
+
+----------------+
|
|
155
|
+
| HTML + Fix |
|
|
156
|
+
+----------------+
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Tests
|
|
160
|
+
|
|
161
|
+
75+ tests across 6 test files:
|
|
162
|
+
|
|
163
|
+
- **test_semgrep.py** — each vulnerable fixture detected, each benign fixture clean
|
|
164
|
+
- **test_self_security.py** — 21 tests: our own code audited for vulnerabilities
|
|
165
|
+
- **test_stress.py** — 1000/10000 findings, concurrent scans, unicode
|
|
166
|
+
- **test_fuzzing.py** — Hypothesis property-based: any input, no crash
|
|
167
|
+
- **test_edge_cases.py** — corrupt JSON, missing files, null bytes, timeouts
|
|
168
|
+
- **test_models.py** + **test_formatters.py** — unit tests for core logic
|
|
169
|
+
|
|
170
|
+
## Current Limitations
|
|
171
|
+
|
|
172
|
+
- Plugin requires Claude Code with connected MCP servers
|
|
173
|
+
- CLI requires semgrep for code analysis (graceful skip if not installed)
|
|
174
|
+
- LLM analysis requires ANTHROPIC_API_KEY
|
|
175
|
+
- Destructive tests intentionally skipped — read-only probing only
|
|
176
|
+
- Source code analysis works for local servers; pip/npm packages may have limited access
|
|
177
|
+
- Plugin report quality scales with model capability (Opus > Sonnet > Haiku)
|
|
178
|
+
- False positive rate not yet measured on production MCP servers
|
|
179
|
+
|
|
180
|
+
### Known False Positive Patterns
|
|
181
|
+
|
|
182
|
+
- SSRF rule triggers on `httpx.get()` with URL built from config, not user input
|
|
183
|
+
- Path traversal rule triggers on `open()` where path is validated but validation isn't recognized as sanitizer
|
|
184
|
+
- Stdout pollution flags `print()` in `__main__` block (safe, not in MCP handler)
|
|
185
|
+
|
|
186
|
+
## Docs
|
|
187
|
+
|
|
188
|
+
The `docs/` folder is useful independently:
|
|
189
|
+
|
|
190
|
+
- **[attack-playbook.md](docs/attack-playbook.md)** — 18 attack categories, 48+ CVEs, payloads and detection methods
|
|
191
|
+
- **[best-practices.md](docs/best-practices.md)** — MCP server security checklist
|
|
192
|
+
- **[reference-server.md](docs/reference-server.md)** — secure server templates (Python + Node.js)
|
|
193
|
+
|
|
194
|
+
## References
|
|
195
|
+
|
|
196
|
+
- [OWASP MCP Top 10](https://owasp.org/www-project-mcp-top-10/)
|
|
197
|
+
- [Invariant Labs — Tool Poisoning Attacks](https://invariantlabs.ai/blog/mcp-security-notification-tool-poisoning-attacks)
|
|
198
|
+
- [Trail of Bits — MCP Security Layer](https://blog.trailofbits.com/2025/07/28/we-built-the-security-layer-mcp-always-needed/)
|
|
199
|
+
- [Palo Alto Unit 42 — MCP Attack Vectors](https://unit42.paloaltonetworks.com/model-context-protocol-attack-vectors/)
|
|
200
|
+
- [OX Security — STDIO Design Flaw](https://www.ox.security/blog/the-mother-of-all-ai-supply-chains-critical-systemic-vulnerability-at-the-core-of-the-mcp/)
|
|
201
|
+
- [NSA — MCP Security Guidance](https://www.nsa.gov/Portals/75/documents/Cybersecurity/CSI_MCP_SECURITY.pdf)
|
|
202
|
+
- [Vulnerable MCP Project](https://vulnerablemcp.info/)
|
|
203
|
+
|
|
204
|
+
## License
|
|
205
|
+
|
|
206
|
+
[MIT](LICENSE)
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import typer
|
|
3
|
+
from rich.console import Console
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
|
|
9
|
+
app = typer.Typer(
|
|
10
|
+
name="mcp-redteam",
|
|
11
|
+
help="MCP server security auditor — deterministic engine + AI-native behavioral analysis",
|
|
12
|
+
no_args_is_help=True,
|
|
13
|
+
)
|
|
14
|
+
console = Console()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class OutputFormat(str, Enum):
|
|
18
|
+
terminal = "terminal"
|
|
19
|
+
json = "json"
|
|
20
|
+
sarif = "sarif"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@app.command()
|
|
24
|
+
def scan(
|
|
25
|
+
path: Path = typer.Argument(..., help="Path to MCP server source code"),
|
|
26
|
+
format: OutputFormat = typer.Option(OutputFormat.terminal, "--format", "-f", help="Output format"),
|
|
27
|
+
no_llm: bool = typer.Option(False, "--no-llm", help="Deterministic checks only (no LLM)"),
|
|
28
|
+
config: bool = typer.Option(True, "--config/--no-config", help="Run config health checks"),
|
|
29
|
+
output: Optional[Path] = typer.Option(None, "--output", "-o", help="Output file path"),
|
|
30
|
+
fail_on: Optional[str] = typer.Option(None, "--fail-on", help="Exit 1 if findings at this severity or above (critical, high)"),
|
|
31
|
+
):
|
|
32
|
+
"""Scan MCP server for security vulnerabilities."""
|
|
33
|
+
from mcp_redteam.models import ScanResult, ScanMetadata, Severity
|
|
34
|
+
from mcp_redteam.engine.semgrep_runner import run_semgrep, is_semgrep_available
|
|
35
|
+
from mcp_redteam.engine.config_scanner import scan_config
|
|
36
|
+
from mcp_redteam.formatters import format_sarif, format_json, format_terminal
|
|
37
|
+
from mcp_redteam.formatters.sarif import write_sarif
|
|
38
|
+
from mcp_redteam.formatters.json_fmt import write_json
|
|
39
|
+
|
|
40
|
+
scan_start = datetime.now()
|
|
41
|
+
|
|
42
|
+
path = path.resolve() # VULN-08 fix: canonicalize path
|
|
43
|
+
if not path.exists():
|
|
44
|
+
console.print(f"[red]Error:[/red] path {path} does not exist")
|
|
45
|
+
raise typer.Exit(code=2)
|
|
46
|
+
|
|
47
|
+
findings = []
|
|
48
|
+
|
|
49
|
+
# Phase 0: Config health
|
|
50
|
+
if config:
|
|
51
|
+
console.print("[bold cyan]Phase 0:[/bold cyan] Config validation...")
|
|
52
|
+
config_findings = scan_config()
|
|
53
|
+
findings.extend(config_findings)
|
|
54
|
+
console.print(f" {len(config_findings)} config issues found")
|
|
55
|
+
|
|
56
|
+
# Phase 1: Semgrep deterministic scan
|
|
57
|
+
semgrep_available = is_semgrep_available()
|
|
58
|
+
if semgrep_available:
|
|
59
|
+
console.print("[bold cyan]Phase 1:[/bold cyan] Semgrep analysis...")
|
|
60
|
+
semgrep_findings = run_semgrep(path)
|
|
61
|
+
findings.extend(semgrep_findings)
|
|
62
|
+
console.print(f" {len(semgrep_findings)} code findings")
|
|
63
|
+
else:
|
|
64
|
+
console.print("[yellow]Phase 1:[/yellow] Semgrep not installed — skipping deterministic code scan")
|
|
65
|
+
console.print(" Install: [dim]pip install semgrep[/dim]")
|
|
66
|
+
|
|
67
|
+
# Phase 2: LLM behavioral analysis
|
|
68
|
+
if not no_llm:
|
|
69
|
+
from mcp_redteam.llm.analyzer import analyze_behavioral, is_llm_available
|
|
70
|
+
if is_llm_available():
|
|
71
|
+
console.print("[bold cyan]Phase 2:[/bold cyan] LLM behavioral analysis...")
|
|
72
|
+
llm_findings = analyze_behavioral(path)
|
|
73
|
+
findings.extend(llm_findings)
|
|
74
|
+
console.print(f" {len(llm_findings)} behavioral findings")
|
|
75
|
+
else:
|
|
76
|
+
console.print("[yellow]Phase 2:[/yellow] No ANTHROPIC_API_KEY — skipping LLM analysis")
|
|
77
|
+
console.print(" Set: [dim]export ANTHROPIC_API_KEY=sk-...[/dim]")
|
|
78
|
+
|
|
79
|
+
# Build result
|
|
80
|
+
result = ScanResult(
|
|
81
|
+
metadata=ScanMetadata(
|
|
82
|
+
scan_start=scan_start,
|
|
83
|
+
scan_end=datetime.now(),
|
|
84
|
+
target_path=str(path),
|
|
85
|
+
mode="deterministic" if no_llm else "hybrid",
|
|
86
|
+
llm_enabled=not no_llm,
|
|
87
|
+
semgrep_available=semgrep_available,
|
|
88
|
+
files_scanned=sum(1 for _ in path.rglob("*.py")) + sum(1 for _ in path.rglob("*.ts")) + sum(1 for _ in path.rglob("*.js")) if path.is_dir() else 1,
|
|
89
|
+
),
|
|
90
|
+
findings=findings,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Output
|
|
94
|
+
if format == OutputFormat.terminal:
|
|
95
|
+
format_terminal(result, console)
|
|
96
|
+
elif format == OutputFormat.sarif:
|
|
97
|
+
sarif_str = format_sarif(result)
|
|
98
|
+
if output:
|
|
99
|
+
write_sarif(result, output)
|
|
100
|
+
console.print(f"[green]SARIF written to {output}[/green]")
|
|
101
|
+
else:
|
|
102
|
+
print(sarif_str)
|
|
103
|
+
elif format == OutputFormat.json:
|
|
104
|
+
json_str = format_json(result)
|
|
105
|
+
if output:
|
|
106
|
+
write_json(result, output)
|
|
107
|
+
console.print(f"[green]JSON written to {output}[/green]")
|
|
108
|
+
else:
|
|
109
|
+
print(json_str)
|
|
110
|
+
|
|
111
|
+
# Exit code for CI
|
|
112
|
+
if fail_on:
|
|
113
|
+
threshold = {"critical": Severity.CRITICAL, "high": Severity.HIGH}.get(fail_on.lower())
|
|
114
|
+
if threshold:
|
|
115
|
+
failing = [f for f in findings if _severity_rank(f.severity) >= _severity_rank(threshold)]
|
|
116
|
+
if failing:
|
|
117
|
+
raise typer.Exit(code=1)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _severity_rank(severity) -> int:
|
|
121
|
+
from mcp_redteam.models import Severity
|
|
122
|
+
ranks = {Severity.INFO: 0, Severity.LOW: 1, Severity.MEDIUM: 2, Severity.HIGH: 3, Severity.CRITICAL: 4}
|
|
123
|
+
return ranks.get(severity, 0)
|
|
124
|
+
|
|
125
|
+
@app.command()
|
|
126
|
+
def version():
|
|
127
|
+
"""Show version."""
|
|
128
|
+
from mcp_redteam import __version__
|
|
129
|
+
console.print(f"mcp-redteam {__version__}")
|
|
130
|
+
|
|
131
|
+
if __name__ == "__main__":
|
|
132
|
+
app()
|
|
File without changes
|