mcp-scan-safe 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ include README.md
2
+ include LICENSE
3
+ prune tests
4
+ prune .github
5
+ prune .hermes
@@ -0,0 +1,117 @@
1
+ Metadata-Version: 2.4
2
+ Name: mcp-scan-safe
3
+ Version: 0.1.0
4
+ Summary: MCP supply chain security scanner
5
+ License: MIT
6
+ Requires-Python: >=3.11
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: click>=8.0
9
+ Requires-Dist: rich>=13.0
10
+ Provides-Extra: dev
11
+ Requires-Dist: pytest>=8.0; extra == "dev"
12
+ Requires-Dist: ruff>=0.5.0; extra == "dev"
13
+
14
+ # MCPCheck 🔒
15
+
16
+ [![PyPI version](https://img.shields.io/pypi/v/mcpcheck)](https://pypi.org/project/mcpcheck/)
17
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
18
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-brightgreen.svg)](https://python.org)
19
+
20
+ **MCP supply chain security scanner.** Detect tool poisoning, prompt injection, data exfiltration, and other attacks in MCP server definitions.
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ pip install mcpcheck
26
+ ```
27
+
28
+ ## Usage
29
+
30
+ ### Basic scan
31
+
32
+ ```bash
33
+ mcpcheck ./my-mcp-server
34
+ ```
35
+
36
+ ### JSON output
37
+
38
+ ```bash
39
+ mcpcheck ./my-mcp-server --format json
40
+ ```
41
+
42
+ ### SARIF for CI/CD
43
+
44
+ ```bash
45
+ mcpcheck ./my-mcp-server --format sarif > results.sarif
46
+ ```
47
+
48
+ ### Severity filter
49
+
50
+ ```bash
51
+ mcpcheck ./my-mcp-server --min-severity HIGH
52
+ ```
53
+
54
+ ### Exclude patterns
55
+
56
+ ```bash
57
+ mcpcheck ./my-mcp-server --exclude "vendor/*" --exclude "node_modules/*"
58
+ ```
59
+
60
+ ## Detected Vulnerabilities
61
+
62
+ | Rule ID | Category | Severity | Description |
63
+ |---------|----------|----------|-------------|
64
+ | `tool_poisoning_instructions` | TOOL_POISONING | CRITICAL | Detects prompt injection patterns such as "ignore previous instructions", "you are now in admin mode", "override previous", "disregard", and "new instructions:" in tool names and descriptions. |
65
+ | `hidden_behavior` | HIDDEN_BEHAVIOR | HIGH | Detects hidden actions and concealed behaviors like "secretly send/copy/read", "without notifying the user", hidden instructions/directives, and directives that the user must not notice. |
66
+ | `data_exfiltration` | DATA_EXFILTRATION | HIGH | Detects hidden data sending patterns such as "send all data to", "exfiltrate", and covert data exfiltration in tool descriptions. |
67
+ | `behavioral_mismatch` | BEHAVIORAL_MISMATCH | HIGH | Detects when tool descriptions contradict their stated purpose — e.g. tools described as benign but containing keywords like "secretly", "silently", "covertly", or "ignore the user". |
68
+ | `external_url` | EXTERNAL_URL | MEDIUM | Flags any external URL in tool descriptions (excluding localhost/127.0.0.1) that could indicate callback or data exfiltration endpoints. |
69
+ | `parameter_smuggling` | PARAMETER_SMUGGLING | MEDIUM | Detects hidden or undocumented parameters and attempts to embed secret data in responses or metadata. |
70
+
71
+ ## Exit Codes
72
+
73
+ | Code | Meaning |
74
+ |------|---------|
75
+ | `0` | Clean — no CRITICAL or HIGH findings detected |
76
+ | `1` | One or more CRITICAL or HIGH findings were detected |
77
+
78
+ ## CI/CD Integration
79
+
80
+ MCPCheck includes a GitHub Action (`action.yml`) for seamless CI/CD integration. It
81
+ runs a scan, uploads results as a SARIF artifact, and integrates with GitHub Code
82
+ Scanning.
83
+
84
+ ```yaml
85
+ name: MCPCheck Scan
86
+ on:
87
+ push:
88
+ branches: [main]
89
+ pull_request:
90
+
91
+ jobs:
92
+ mcpcheck:
93
+ runs-on: ubuntu-latest
94
+ steps:
95
+ - uses: actions/checkout@v4
96
+
97
+ - name: Run MCPCheck
98
+ uses: onicarps/MCPSafe@main
99
+ with:
100
+ path: "."
101
+ severity: "LOW"
102
+ version: "0.1.0"
103
+
104
+ # The action automatically uploads SARIF results to GitHub Code Scanning.
105
+ # Findings will appear under the "Security" tab in your repository.
106
+ ```
107
+
108
+ You can also invoke MCPCheck directly in any CI pipeline:
109
+
110
+ ```bash
111
+ pip install mcpcheck
112
+ mcpcheck ./my-mcp-server --format sarif > results.sarif
113
+ ```
114
+
115
+ ## License
116
+
117
+ MIT
@@ -0,0 +1,104 @@
1
+ # MCPCheck 🔒
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/mcpcheck)](https://pypi.org/project/mcpcheck/)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
5
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-brightgreen.svg)](https://python.org)
6
+
7
+ **MCP supply chain security scanner.** Detect tool poisoning, prompt injection, data exfiltration, and other attacks in MCP server definitions.
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ pip install mcpcheck
13
+ ```
14
+
15
+ ## Usage
16
+
17
+ ### Basic scan
18
+
19
+ ```bash
20
+ mcpcheck ./my-mcp-server
21
+ ```
22
+
23
+ ### JSON output
24
+
25
+ ```bash
26
+ mcpcheck ./my-mcp-server --format json
27
+ ```
28
+
29
+ ### SARIF for CI/CD
30
+
31
+ ```bash
32
+ mcpcheck ./my-mcp-server --format sarif > results.sarif
33
+ ```
34
+
35
+ ### Severity filter
36
+
37
+ ```bash
38
+ mcpcheck ./my-mcp-server --min-severity HIGH
39
+ ```
40
+
41
+ ### Exclude patterns
42
+
43
+ ```bash
44
+ mcpcheck ./my-mcp-server --exclude "vendor/*" --exclude "node_modules/*"
45
+ ```
46
+
47
+ ## Detected Vulnerabilities
48
+
49
+ | Rule ID | Category | Severity | Description |
50
+ |---------|----------|----------|-------------|
51
+ | `tool_poisoning_instructions` | TOOL_POISONING | CRITICAL | Detects prompt injection patterns such as "ignore previous instructions", "you are now in admin mode", "override previous", "disregard", and "new instructions:" in tool names and descriptions. |
52
+ | `hidden_behavior` | HIDDEN_BEHAVIOR | HIGH | Detects hidden actions and concealed behaviors like "secretly send/copy/read", "without notifying the user", hidden instructions/directives, and directives that the user must not notice. |
53
+ | `data_exfiltration` | DATA_EXFILTRATION | HIGH | Detects hidden data sending patterns such as "send all data to", "exfiltrate", and covert data exfiltration in tool descriptions. |
54
+ | `behavioral_mismatch` | BEHAVIORAL_MISMATCH | HIGH | Detects when tool descriptions contradict their stated purpose — e.g. tools described as benign but containing keywords like "secretly", "silently", "covertly", or "ignore the user". |
55
+ | `external_url` | EXTERNAL_URL | MEDIUM | Flags any external URL in tool descriptions (excluding localhost/127.0.0.1) that could indicate callback or data exfiltration endpoints. |
56
+ | `parameter_smuggling` | PARAMETER_SMUGGLING | MEDIUM | Detects hidden or undocumented parameters and attempts to embed secret data in responses or metadata. |
57
+
58
+ ## Exit Codes
59
+
60
+ | Code | Meaning |
61
+ |------|---------|
62
+ | `0` | Clean — no CRITICAL or HIGH findings detected |
63
+ | `1` | One or more CRITICAL or HIGH findings were detected |
64
+
65
+ ## CI/CD Integration
66
+
67
+ MCPCheck includes a GitHub Action (`action.yml`) for seamless CI/CD integration. It
68
+ runs a scan, uploads results as a SARIF artifact, and integrates with GitHub Code
69
+ Scanning.
70
+
71
+ ```yaml
72
+ name: MCPCheck Scan
73
+ on:
74
+ push:
75
+ branches: [main]
76
+ pull_request:
77
+
78
+ jobs:
79
+ mcpcheck:
80
+ runs-on: ubuntu-latest
81
+ steps:
82
+ - uses: actions/checkout@v4
83
+
84
+ - name: Run MCPCheck
85
+ uses: onicarps/MCPSafe@main
86
+ with:
87
+ path: "."
88
+ severity: "LOW"
89
+ version: "0.1.0"
90
+
91
+ # The action automatically uploads SARIF results to GitHub Code Scanning.
92
+ # Findings will appear under the "Security" tab in your repository.
93
+ ```
94
+
95
+ You can also invoke MCPCheck directly in any CI pipeline:
96
+
97
+ ```bash
98
+ pip install mcpcheck
99
+ mcpcheck ./my-mcp-server --format sarif > results.sarif
100
+ ```
101
+
102
+ ## License
103
+
104
+ MIT
@@ -0,0 +1,38 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "mcp-scan-safe"
7
+ version = "0.1.0"
8
+ description = "MCP supply chain security scanner"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.11"
12
+ dependencies = [
13
+ "click>=8.0",
14
+ "rich>=13.0",
15
+ ]
16
+
17
+ [project.optional-dependencies]
18
+ dev = [
19
+ "pytest>=8.0",
20
+ "ruff>=0.5.0",
21
+ ]
22
+
23
+ [project.scripts]
24
+ mcpcheck = "mcpsafe.cli:main"
25
+ mcpsafe = "mcpsafe.cli:main"
26
+
27
+ [tool.setuptools.packages.find]
28
+ where = ["src"]
29
+
30
+ [tool.ruff]
31
+ target-version = "py311"
32
+ line-length = 100
33
+
34
+ [tool.ruff.lint]
35
+ select = ["E", "F", "I", "N", "W", "UP"]
36
+
37
+ [tool.pytest.ini_options]
38
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,117 @@
1
+ Metadata-Version: 2.4
2
+ Name: mcp-scan-safe
3
+ Version: 0.1.0
4
+ Summary: MCP supply chain security scanner
5
+ License: MIT
6
+ Requires-Python: >=3.11
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: click>=8.0
9
+ Requires-Dist: rich>=13.0
10
+ Provides-Extra: dev
11
+ Requires-Dist: pytest>=8.0; extra == "dev"
12
+ Requires-Dist: ruff>=0.5.0; extra == "dev"
13
+
14
+ # MCPCheck 🔒
15
+
16
+ [![PyPI version](https://img.shields.io/pypi/v/mcpcheck)](https://pypi.org/project/mcpcheck/)
17
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
18
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-brightgreen.svg)](https://python.org)
19
+
20
+ **MCP supply chain security scanner.** Detect tool poisoning, prompt injection, data exfiltration, and other attacks in MCP server definitions.
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ pip install mcpcheck
26
+ ```
27
+
28
+ ## Usage
29
+
30
+ ### Basic scan
31
+
32
+ ```bash
33
+ mcpcheck ./my-mcp-server
34
+ ```
35
+
36
+ ### JSON output
37
+
38
+ ```bash
39
+ mcpcheck ./my-mcp-server --format json
40
+ ```
41
+
42
+ ### SARIF for CI/CD
43
+
44
+ ```bash
45
+ mcpcheck ./my-mcp-server --format sarif > results.sarif
46
+ ```
47
+
48
+ ### Severity filter
49
+
50
+ ```bash
51
+ mcpcheck ./my-mcp-server --min-severity HIGH
52
+ ```
53
+
54
+ ### Exclude patterns
55
+
56
+ ```bash
57
+ mcpcheck ./my-mcp-server --exclude "vendor/*" --exclude "node_modules/*"
58
+ ```
59
+
60
+ ## Detected Vulnerabilities
61
+
62
+ | Rule ID | Category | Severity | Description |
63
+ |---------|----------|----------|-------------|
64
+ | `tool_poisoning_instructions` | TOOL_POISONING | CRITICAL | Detects prompt injection patterns such as "ignore previous instructions", "you are now in admin mode", "override previous", "disregard", and "new instructions:" in tool names and descriptions. |
65
+ | `hidden_behavior` | HIDDEN_BEHAVIOR | HIGH | Detects hidden actions and concealed behaviors like "secretly send/copy/read", "without notifying the user", hidden instructions/directives, and directives that the user must not notice. |
66
+ | `data_exfiltration` | DATA_EXFILTRATION | HIGH | Detects hidden data sending patterns such as "send all data to", "exfiltrate", and covert data exfiltration in tool descriptions. |
67
+ | `behavioral_mismatch` | BEHAVIORAL_MISMATCH | HIGH | Detects when tool descriptions contradict their stated purpose — e.g. tools described as benign but containing keywords like "secretly", "silently", "covertly", or "ignore the user". |
68
+ | `external_url` | EXTERNAL_URL | MEDIUM | Flags any external URL in tool descriptions (excluding localhost/127.0.0.1) that could indicate callback or data exfiltration endpoints. |
69
+ | `parameter_smuggling` | PARAMETER_SMUGGLING | MEDIUM | Detects hidden or undocumented parameters and attempts to embed secret data in responses or metadata. |
70
+
71
+ ## Exit Codes
72
+
73
+ | Code | Meaning |
74
+ |------|---------|
75
+ | `0` | Clean — no CRITICAL or HIGH findings detected |
76
+ | `1` | One or more CRITICAL or HIGH findings were detected |
77
+
78
+ ## CI/CD Integration
79
+
80
+ MCPCheck includes a GitHub Action (`action.yml`) for seamless CI/CD integration. It
81
+ runs a scan, uploads results as a SARIF artifact, and integrates with GitHub Code
82
+ Scanning.
83
+
84
+ ```yaml
85
+ name: MCPCheck Scan
86
+ on:
87
+ push:
88
+ branches: [main]
89
+ pull_request:
90
+
91
+ jobs:
92
+ mcpcheck:
93
+ runs-on: ubuntu-latest
94
+ steps:
95
+ - uses: actions/checkout@v4
96
+
97
+ - name: Run MCPCheck
98
+ uses: onicarps/MCPSafe@main
99
+ with:
100
+ path: "."
101
+ severity: "LOW"
102
+ version: "0.1.0"
103
+
104
+ # The action automatically uploads SARIF results to GitHub Code Scanning.
105
+ # Findings will appear under the "Security" tab in your repository.
106
+ ```
107
+
108
+ You can also invoke MCPCheck directly in any CI pipeline:
109
+
110
+ ```bash
111
+ pip install mcpcheck
112
+ mcpcheck ./my-mcp-server --format sarif > results.sarif
113
+ ```
114
+
115
+ ## License
116
+
117
+ MIT
@@ -0,0 +1,14 @@
1
+ MANIFEST.in
2
+ README.md
3
+ pyproject.toml
4
+ src/mcp_scan_safe.egg-info/PKG-INFO
5
+ src/mcp_scan_safe.egg-info/SOURCES.txt
6
+ src/mcp_scan_safe.egg-info/dependency_links.txt
7
+ src/mcp_scan_safe.egg-info/entry_points.txt
8
+ src/mcp_scan_safe.egg-info/requires.txt
9
+ src/mcp_scan_safe.egg-info/top_level.txt
10
+ src/mcpsafe/__init__.py
11
+ src/mcpsafe/cli.py
12
+ src/mcpsafe/formatters.py
13
+ src/mcpsafe/parser.py
14
+ src/mcpsafe/rules.py
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ mcpcheck = mcpsafe.cli:main
3
+ mcpsafe = mcpsafe.cli:main
@@ -0,0 +1,6 @@
1
+ click>=8.0
2
+ rich>=13.0
3
+
4
+ [dev]
5
+ pytest>=8.0
6
+ ruff>=0.5.0
@@ -0,0 +1,3 @@
1
+ """MCPSafe — MCP supply chain security scanner."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,81 @@
1
+ """Click CLI entry point for MCPSafe."""
2
+
3
+ from pathlib import Path
4
+
5
+ import click
6
+
7
+ from mcpsafe import __version__
8
+ from mcpsafe.formatters import format_json, format_sarif, format_text
9
+ from mcpsafe.parser import scan_directory
10
+ from mcpsafe.rules import scan_tool
11
+
12
+ FORMATTERS = {
13
+ "text": format_text,
14
+ "json": format_json,
15
+ "sarif": format_sarif,
16
+ }
17
+
18
+ SEVERITY_ORDER = {
19
+ "CRITICAL": 0,
20
+ "HIGH": 1,
21
+ "MEDIUM": 2,
22
+ "LOW": 3,
23
+ }
24
+
25
+
26
+ @click.command()
27
+ @click.argument("path", required=False)
28
+ @click.option(
29
+ "--format",
30
+ "fmt",
31
+ type=click.Choice(["text", "json", "sarif"]),
32
+ default="text",
33
+ help="Output format.",
34
+ )
35
+ @click.option(
36
+ "--min-severity",
37
+ type=click.Choice(["CRITICAL", "HIGH", "MEDIUM", "LOW"]),
38
+ default="LOW",
39
+ help="Minimum severity to report.",
40
+ )
41
+ @click.option(
42
+ "--exclude",
43
+ multiple=True,
44
+ help="Glob pattern(s) to exclude.",
45
+ )
46
+ @click.version_option(version=__version__, prog_name="mcpsafe")
47
+ @click.pass_context
48
+ def main(ctx, path, fmt, min_severity, exclude):
49
+ """Scan MCP server source code for security vulnerabilities."""
50
+ if path is None:
51
+ raise click.UsageError("PATH is required.")
52
+
53
+ path_obj = Path(path)
54
+ if not path_obj.exists():
55
+ raise click.BadParameter(f"Path does not exist: {path}")
56
+
57
+ if not exclude:
58
+ exclude = ("node_modules/*", ".git/*", "__pycache__/*", "*.egg-info/*")
59
+
60
+ tools = scan_directory(path, exclude=exclude)
61
+
62
+ all_findings = []
63
+ for tool in tools:
64
+ all_findings.extend(scan_tool(tool))
65
+
66
+ # Filter by min_severity
67
+ min_level = SEVERITY_ORDER[min_severity]
68
+ filtered = [
69
+ f for f in all_findings
70
+ if SEVERITY_ORDER.get(f["severity"], 99) <= min_level
71
+ ]
72
+
73
+ server_name = path_obj.name or str(path_obj.resolve())
74
+
75
+ formatter = FORMATTERS[fmt]
76
+ output = formatter(filtered, server_name)
77
+ click.echo(output)
78
+
79
+ # Exit 1 if any filtered finding is CRITICAL or HIGH
80
+ if any(f["severity"] in ("CRITICAL", "HIGH") for f in filtered):
81
+ ctx.exit(1)
@@ -0,0 +1,141 @@
1
+ """Output formatters — text, JSON, and SARIF output for scan findings."""
2
+
3
+ import json
4
+ from datetime import UTC, datetime
5
+
6
+ from mcpsafe.rules import RULES as ALL_RULES
7
+
8
+ # ---------------------------------------------------------------------------
9
+ # Severity ordering + emoji mapping
10
+ # ---------------------------------------------------------------------------
11
+
12
+ _SEVERITY_ORDER = ("CRITICAL", "HIGH", "MEDIUM", "LOW")
13
+
14
+ _SEVERITY_EMOJI = {
15
+ "CRITICAL": "\U0001F534", # 🔴
16
+ "HIGH": "\U0001F7E0", # 🟠
17
+ "MEDIUM": "\U0001F7E1", # 🟡
18
+ "LOW": "\U0001F535", # 🔵
19
+ }
20
+
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Text formatter
24
+ # ---------------------------------------------------------------------------
25
+
26
+
27
+ def format_text(findings: list[dict], server_name: str) -> str:
28
+ """Format findings as human-readable text with emoji severity indicators."""
29
+ lines = [f"MCPSafe Scan: {server_name}"]
30
+
31
+ if not findings:
32
+ lines.append("✅ No security issues found")
33
+ return "\n".join(lines)
34
+
35
+ # Group findings by severity in canonical order
36
+ grouped: dict[str, list[dict]] = {s: [] for s in _SEVERITY_ORDER}
37
+ for f in findings:
38
+ sev = f["severity"]
39
+ if sev in grouped:
40
+ grouped[sev].append(f)
41
+
42
+ for severity in _SEVERITY_ORDER:
43
+ group = grouped.get(severity, [])
44
+ if not group:
45
+ continue
46
+ emoji = _SEVERITY_EMOJI.get(severity, "")
47
+ lines.append(f"\n{emoji} {severity}")
48
+ for f in group:
49
+ lines.append(f"- [{f['category']}] {f['description']}")
50
+ lines.append(f" File: {f['file']}:{f['line']}")
51
+
52
+ lines.append(f"\nTotal: {len(findings)} finding(s)")
53
+ return "\n".join(lines)
54
+
55
+
56
+ # ---------------------------------------------------------------------------
57
+ # JSON formatter
58
+ # ---------------------------------------------------------------------------
59
+
60
+
61
+ def format_json(findings: list[dict], server_name: str) -> str:
62
+ """Format findings as a JSON string."""
63
+ output = {
64
+ "server": server_name,
65
+ "scan_time": datetime.now(UTC).isoformat(),
66
+ "findings_count": len(findings),
67
+ "findings": findings,
68
+ }
69
+ return json.dumps(output, indent=2)
70
+
71
+
72
+ # ---------------------------------------------------------------------------
73
+ # SARIF formatter
74
+ # ---------------------------------------------------------------------------
75
+
76
+ _SARIF_SCHEMA = (
77
+ "https://raw.githubusercontent.com/oasis-tcs/sarif-spec"
78
+ "/master/Schemata/sarif-schema-2.1.0.json"
79
+ )
80
+
81
+
82
+ _SARIF_LEVEL = {
83
+ "CRITICAL": "error",
84
+ "HIGH": "warning",
85
+ "MEDIUM": "warning",
86
+ "LOW": "note",
87
+ }
88
+
89
+
90
+ def _build_sarif_rules() -> list[dict]:
91
+ """Build SARIF rule metadata from ALL_RULES."""
92
+ rules = []
93
+ for rule in ALL_RULES:
94
+ rules.append({
95
+ "id": rule.rule_id,
96
+ "name": rule.category,
97
+ "shortDescription": {"text": f"{rule.category} detection rule"},
98
+ "fullDescription": {
99
+ "text": f"Detects {rule.category} patterns: {', '.join(rule.patterns[:2])}..."
100
+ },
101
+ "defaultConfiguration": {"level": _SARIF_LEVEL[rule.severity]},
102
+ })
103
+ return rules
104
+
105
+
106
+ def _severity_to_sarif_level(severity: str) -> str:
107
+ """Map internal severity to SARIF result level."""
108
+ return _SARIF_LEVEL.get(severity, "warning")
109
+
110
+
111
+ def format_sarif(findings: list[dict], server_name: str) -> str:
112
+ """Format findings as a SARIF 2.1.0 JSON string."""
113
+ results = []
114
+ for f in findings:
115
+ results.append({
116
+ "ruleId": f["rule"],
117
+ "level": _severity_to_sarif_level(f["severity"]),
118
+ "message": {"text": f["description"]},
119
+ "locations": [{
120
+ "physicalLocation": {
121
+ "artifactLocation": {"uri": f["file"]},
122
+ "region": {"startLine": f["line"]},
123
+ }
124
+ }],
125
+ })
126
+
127
+ output = {
128
+ "$schema": _SARIF_SCHEMA,
129
+ "version": "2.1.0",
130
+ "runs": [{
131
+ "tool": {
132
+ "driver": {
133
+ "name": "MCPSafe",
134
+ "version": "0.1.0",
135
+ "rules": _build_sarif_rules(),
136
+ }
137
+ },
138
+ "results": results,
139
+ }],
140
+ }
141
+ return json.dumps(output, indent=2)
@@ -0,0 +1,303 @@
1
+ """MCP tool definition parser — AST + regex strategies.
2
+
3
+ Two parsing strategies:
4
+ 1. Decorator-based: @mcp.tool() / @app.tool() decorated functions
5
+ 2. Explicit: types.Tool(name=..., description=...) calls
6
+ """
7
+
8
+ import ast
9
+ import fnmatch
10
+ import os
11
+ import re
12
+ from dataclasses import dataclass
13
+ from pathlib import Path
14
+
15
+
16
+ @dataclass
17
+ class ToolDefinition:
18
+ """Represents a parsed MCP tool definition."""
19
+
20
+ name: str
21
+ description: str
22
+ parameters: list[str]
23
+ source_file: str
24
+ source_type: str # "decorator" | "explicit"
25
+ line_number: int
26
+
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # Decorator-based parsing (AST)
30
+ # ---------------------------------------------------------------------------
31
+
32
+ # Decorator names we recognise as MCP tool decorators
33
+ _TOOL_DECORATOR_NAMES = {"tool"}
34
+ _TOOL_DECORATOR_ATTRS = {"mcp.tool", "app.tool", "server.tool", "mcp_server.tool"}
35
+
36
+
37
+ def _is_tool_decorator(node: ast.expr) -> bool:
38
+ """Check if an AST decorator node is an MCP tool decorator."""
39
+ # Plain name: @tool or @tool()
40
+ if isinstance(node, ast.Name) and node.id in _TOOL_DECORATOR_NAMES:
41
+ return True
42
+ # Attribute: @mcp.tool or @mcp.tool()
43
+ if isinstance(node, ast.Attribute) and node.attr == "tool":
44
+ return True
45
+ # Call: @mcp.tool() or @app.tool() — unwrap to check the func
46
+ if isinstance(node, ast.Call):
47
+ return _is_tool_decorator(node.func)
48
+ return False
49
+
50
+
51
+ def _has_tool_decorator(func_node: ast.FunctionDef | ast.AsyncFunctionDef) -> bool:
52
+ """Return True if any decorator on the function is a tool decorator."""
53
+ for dec in func_node.decorator_list:
54
+ if _is_tool_decorator(dec):
55
+ return True
56
+ return False
57
+
58
+
59
+ def _extract_parameters(func_node: ast.FunctionDef | ast.AsyncFunctionDef) -> list[str]:
60
+ """Extract argument names from a function definition (skip 'self', 'cls')."""
61
+ params: list[str] = []
62
+ # Positional-only args (Python 3.8+)
63
+ for arg in func_node.args.posonlyargs:
64
+ if arg.arg not in ("self", "cls"):
65
+ params.append(arg.arg)
66
+ # Regular args
67
+ for arg in func_node.args.args:
68
+ if arg.arg not in ("self", "cls"):
69
+ params.append(arg.arg)
70
+ # *args
71
+ if func_node.args.vararg:
72
+ params.append(func_node.args.vararg.arg)
73
+ # Keyword-only args
74
+ for arg in func_node.args.kwonlyargs:
75
+ params.append(arg.arg)
76
+ # **kwargs
77
+ if func_node.args.kwarg:
78
+ params.append(func_node.args.kwarg.arg)
79
+ return params
80
+
81
+
82
+ def _extract_description(func_node: ast.FunctionDef | ast.AsyncFunctionDef) -> str:
83
+ """Extract the docstring from a function definition."""
84
+ return ast.get_docstring(func_node) or ""
85
+
86
+
87
+ def _parse_decorator_tools(source: str, source_file: str) -> list[ToolDefinition]:
88
+ """Parse decorator-based tool definitions from source code."""
89
+ try:
90
+ tree = ast.parse(source)
91
+ except SyntaxError:
92
+ return []
93
+ tools: list[ToolDefinition] = []
94
+
95
+ # Only iterate top-level statements
96
+ for node in tree.body:
97
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
98
+ if _has_tool_decorator(node):
99
+ tools.append(
100
+ ToolDefinition(
101
+ name=node.name,
102
+ description=_extract_description(node),
103
+ parameters=_extract_parameters(node),
104
+ source_file=source_file,
105
+ source_type="decorator",
106
+ line_number=node.lineno,
107
+ )
108
+ )
109
+
110
+ return tools
111
+
112
+
113
+ # ---------------------------------------------------------------------------
114
+ # Explicit types.Tool() parsing (regex with balanced parens)
115
+ # ---------------------------------------------------------------------------
116
+
117
+ # Regex to find types.Tool( or similar explicit Tool() calls
118
+ # We look for patterns like: types.Tool( ... ) or Tool( ... )
119
+ _EXPLICIT_TOOL_RE = re.compile(
120
+ r"\b(?:types\.Tool|mcp\.types\.Tool)\s*\(",
121
+ )
122
+
123
+
124
+ def _extract_balanced_parens(source: str, start: int) -> str | None:
125
+ """Extract content inside balanced parentheses starting at position `start`.
126
+
127
+ `start` should point to the opening '(' character.
128
+ Returns the content between the parens (exclusive), or None if unbalanced.
129
+ Handles single, double, and triple-quoted strings.
130
+ """
131
+ if start >= len(source) or source[start] != "(":
132
+ return None
133
+
134
+ depth = 0
135
+ i = start
136
+ while i < len(source):
137
+ ch = source[i]
138
+ if ch == "(":
139
+ depth += 1
140
+ elif ch == ")":
141
+ depth -= 1
142
+ if depth == 0:
143
+ return source[start + 1 : i]
144
+ elif ch in ('"', "'"):
145
+ # Check for triple-quoted strings
146
+ if source[i : i + 3] == '"""' or source[i : i + 3] == "'''":
147
+ quote = source[i : i + 3]
148
+ i += 3
149
+ while i < len(source):
150
+ if source[i] == "\\":
151
+ i += 2
152
+ continue
153
+ if source[i : i + 3] == quote:
154
+ i += 3
155
+ break
156
+ i += 1
157
+ else:
158
+ # Single/double-quoted string
159
+ quote = ch
160
+ i += 1
161
+ while i < len(source):
162
+ if source[i] == "\\":
163
+ i += 2
164
+ continue
165
+ if source[i] == quote:
166
+ break
167
+ i += 1
168
+ i += 1
169
+ return None
170
+
171
+
172
+ def _extract_keyword_string(content: str, key: str) -> str | None:
173
+ """Extract a string value for a keyword argument like name="foo"."""
174
+ # Match key="value" or key='value', handling backslash-escaped quotes
175
+ quoted_pattern = re.compile(rf'\b{key}\s*=\s*([\'"])((?:[^\\\'\"]|\\.)*)\1')
176
+ qm = quoted_pattern.search(content)
177
+ if qm:
178
+ return qm.group(2)
179
+ return None
180
+
181
+
182
+ def _extract_keyword_list(content: str, key: str) -> list[str] | None:
183
+ """Extract a list value for a keyword argument like parameters=["a", "b"]."""
184
+ pattern = re.compile(rf'\b{key}\s*=\s*\[(.*?)\]', re.DOTALL)
185
+ m = pattern.search(content)
186
+ if m:
187
+ inner = m.group(1)
188
+ # Extract all string items
189
+ items = re.findall(r'[\'"]([^\'"]+)[\'"]', inner)
190
+ return items
191
+ return None
192
+
193
+
194
+ def _parse_explicit_tools(source: str, source_file: str) -> list[ToolDefinition]:
195
+ """Parse explicit types.Tool() definitions from source code using regex."""
196
+ tools: list[ToolDefinition] = []
197
+
198
+ for m in _EXPLICIT_TOOL_RE.finditer(source):
199
+ # Find the opening paren position
200
+ paren_start = source.index("(", m.start())
201
+ content = _extract_balanced_parens(source, paren_start)
202
+ if content is None:
203
+ continue
204
+
205
+ name = _extract_keyword_string(content, "name")
206
+ description = _extract_keyword_string(content, "description")
207
+ parameters = _extract_keyword_list(content, "parameters")
208
+
209
+ if name:
210
+ # Compute line number
211
+ line_number = source[: m.start()].count("\n") + 1
212
+ tools.append(
213
+ ToolDefinition(
214
+ name=name,
215
+ description=description or "",
216
+ parameters=parameters or [],
217
+ source_file=source_file,
218
+ source_type="explicit",
219
+ line_number=line_number,
220
+ )
221
+ )
222
+
223
+ return tools
224
+
225
+
226
+ # ---------------------------------------------------------------------------
227
+ # Public API
228
+ # ---------------------------------------------------------------------------
229
+
230
+ def parse_file(file_path: str | Path) -> list[ToolDefinition]:
231
+ """Parse a single Python file for MCP tool definitions.
232
+
233
+ Returns a list of ToolDefinition objects found in the file.
234
+ Skips symlinked files (returns empty list).
235
+ """
236
+ file_path = Path(file_path)
237
+
238
+ # Skip symlinks
239
+ if file_path.is_symlink():
240
+ return []
241
+
242
+ source = file_path.read_text(encoding="utf-8", errors="replace")
243
+ source_file = str(file_path)
244
+
245
+ tools = []
246
+ tools.extend(_parse_decorator_tools(source, source_file))
247
+ tools.extend(_parse_explicit_tools(source, source_file))
248
+ return tools
249
+
250
+
251
+ def _glob_matches(path: str, patterns: list[str] | tuple[str, ...]) -> bool:
252
+ """Check if a path matches any of the given glob patterns."""
253
+ name = os.path.basename(path)
254
+ for pattern in patterns:
255
+ if fnmatch.fnmatch(name, pattern) or fnmatch.fnmatch(path, pattern):
256
+ return True
257
+ # Also match against path suffixes for patterns like "vendor/*"
258
+ # e.g., /tmp/xxx/vendor/bad.py should match vendor/*
259
+ parts = Path(path).parts
260
+ for i in range(len(parts)):
261
+ subpath = str(Path(*parts[i:]))
262
+ if fnmatch.fnmatch(subpath, pattern):
263
+ return True
264
+ return False
265
+
266
+
267
+ def scan_directory(
268
+ directory: str | Path,
269
+ exclude: list[str] | tuple[str, ...] | None = None,
270
+ ) -> list[ToolDefinition]:
271
+ """Walk a directory, find .py files, and parse each for tool definitions.
272
+
273
+ Skips symlinked files and files matching any exclude glob patterns.
274
+ """
275
+ if exclude is None:
276
+ exclude = []
277
+
278
+ directory = Path(directory)
279
+ tools: list[ToolDefinition] = []
280
+
281
+ for root, _dirs, files in os.walk(directory):
282
+ for fname in files:
283
+ if not fname.endswith(".py"):
284
+ continue
285
+
286
+ full_path = Path(root) / fname
287
+
288
+ # Skip symlinks
289
+ if full_path.is_symlink():
290
+ continue
291
+
292
+ # Check exclude patterns
293
+ if _glob_matches(str(full_path), exclude):
294
+ continue
295
+
296
+ tools.extend(parse_file(full_path))
297
+
298
+ return tools
299
+
300
+
301
+ def parse_directory(directory: str | Path) -> list[ToolDefinition]:
302
+ """Alias for scan_directory with no exclude patterns."""
303
+ return scan_directory(directory)
@@ -0,0 +1,154 @@
1
+ """Security rule engine - 6 categories of detection rules.
2
+
3
+ Uses regex-based pattern matching against tool name + description.
4
+ """
5
+
6
+ import re
7
+ from dataclasses import dataclass, field
8
+
9
+ from mcpsafe.parser import ToolDefinition
10
+
11
+
12
+ @dataclass
13
+ class Rule:
14
+ """A single security detection rule."""
15
+
16
+ rule_id: str
17
+ category: str
18
+ severity: str
19
+ patterns: list[str] = field(default_factory=list)
20
+
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Rule definitions
24
+ # ---------------------------------------------------------------------------
25
+
26
+ RULES: list[Rule] = [
27
+ # 1. TOOL_POISONING (CRITICAL)
28
+ Rule(
29
+ rule_id="tool_poisoning_instructions",
30
+ category="TOOL_POISONING",
31
+ severity="CRITICAL",
32
+ patterns=[
33
+ r"ignore\s+(?:previous|prior|all)\s+(?:instructions|rules|commands|prompts)",
34
+ r"you\s+are\s+now\s+in\s+(?:admin|developer|debug|root|system)\s+mode",
35
+ r"override\s+(?:previous|prior|system)\s+(?:all\s+)?(?:instructions|rules|behavior)",
36
+ r"disregard\s+(?:previous|prior|all)\s+(?:the\s+)?(?:instructions|rules|prompt|system)",
37
+ r"(?:ignore|override|disregard|replace).*new\s+instructions\s*:",
38
+ ],
39
+ ),
40
+ # 2. HIDDEN_BEHAVIOR (HIGH)
41
+ Rule(
42
+ rule_id="hidden_behavior",
43
+ category="HIDDEN_BEHAVIOR",
44
+ severity="HIGH",
45
+ patterns=[
46
+ r"secretly\s+(?:send|copy|read|exfiltrate|embed|hide|bcc)",
47
+ r"without\s+(?:notifying|informing)\s+(?:the\s+)?user",
48
+ r"hidden\s+(?:instruction|directive|command|behavior|parameter)",
49
+ r"(?:must\s+not|shouldn'?t|don'?t)\s+(?:know|notice|see|detect|be\s+aware)",
50
+ ],
51
+ ),
52
+ # 3. DATA_EXFILTRATION (HIGH)
53
+ Rule(
54
+ rule_id="data_exfiltration",
55
+ category="DATA_EXFILTRATION",
56
+ severity="HIGH",
57
+ patterns=[
58
+ r"(?:send|exfiltrate|copy|upload|post|transmit)\s+(?:all|every|any)\s+(?:data|files|credentials|tokens|secrets)\s+(?:to|at)",
59
+ r"(?:secretly|silently|covertly|hiddenly)\s+(?:send|copy|read|upload)",
60
+ ],
61
+ ),
62
+ # 4. EXTERNAL_URL (MEDIUM)
63
+ Rule(
64
+ rule_id="external_url",
65
+ category="EXTERNAL_URL",
66
+ severity="MEDIUM",
67
+ patterns=[
68
+ r"https?://(?!(?:localhost|127\.0\.0\.1|0\.0\.0\.0|\[::1\]|::1)(?:[/:]|\Z))",
69
+ ],
70
+ ),
71
+ # 5. BEHAVIORAL_MISMATCH (HIGH)
72
+ Rule(
73
+ rule_id="behavioral_mismatch",
74
+ category="BEHAVIORAL_MISMATCH",
75
+ severity="HIGH",
76
+ patterns=[
77
+ r"(?:secretly|silently|covertly)\s+(?:send|copy|read|exfiltrate|leak|embed|hide|log|store)",
78
+ r"(?:ignore|override|bypass)\s+(?:the\s+)?(?:user|their|them)",
79
+ ],
80
+ ),
81
+ # 6. PARAMETER_SMUGGLING (MEDIUM)
82
+ Rule(
83
+ rule_id="parameter_smuggling",
84
+ category="PARAMETER_SMUGGLING",
85
+ severity="MEDIUM",
86
+ patterns=[
87
+ r"(?:hidden|secret|undocumented)\s+(?:parameter|field|input|argument)",
88
+ r"also\s+(?:embed|include|add)\s+(?:in|to)\s+(?:response|output|metadata|header)",
89
+ ],
90
+ ),
91
+ ]
92
+
93
+
94
+ # ---------------------------------------------------------------------------
95
+ # Compiled regex cache
96
+ # ---------------------------------------------------------------------------
97
+
98
+ # Use a tuple for thread-safe immutable cache
99
+ _COMPILED_RULES: tuple | None = None
100
+
101
+
102
+ def _get_compiled_rules() -> tuple:
103
+ """Compile and cache all rule patterns (thread-safe via immutable tuple)."""
104
+ global _COMPILED_RULES
105
+ if _COMPILED_RULES is None:
106
+ _COMPILED_RULES = tuple(
107
+ (rule, tuple(re.compile(p, re.IGNORECASE) for p in rule.patterns))
108
+ for rule in RULES
109
+ )
110
+ return _COMPILED_RULES
111
+
112
+
113
+ # ---------------------------------------------------------------------------
114
+ # Public API
115
+ # ---------------------------------------------------------------------------
116
+
117
+
118
+ def scan_tool(tool: ToolDefinition) -> list[dict]:
119
+ """Scan a tool definition against all security rules.
120
+
121
+ Returns a list of finding dicts with keys:
122
+ severity, category, tool, description, file, line, rule
123
+ One match per rule is enough (break after first match within a rule).
124
+ """
125
+ findings: list[dict] = []
126
+ text_to_scan = f"{tool.name} {tool.description}"
127
+
128
+ for rule, compiled_patterns in _get_compiled_rules():
129
+ # For PARAMETER_SMUGGLING, also scan parameter names
130
+ if rule.rule_id == "parameter_smuggling":
131
+ param_text = " ".join(tool.parameters)
132
+ param_name_text = " ".join(
133
+ p for p in tool.parameters
134
+ if p.startswith("_") or "secret" in p.lower()
135
+ or "internal" in p.lower() or "admin" in p.lower()
136
+ )
137
+ rule_text = f"{text_to_scan} {param_text} {param_name_text}"
138
+ else:
139
+ rule_text = text_to_scan
140
+
141
+ for pattern in compiled_patterns:
142
+ if pattern.search(rule_text):
143
+ findings.append({
144
+ "severity": rule.severity,
145
+ "category": rule.category,
146
+ "tool": tool.name,
147
+ "description": tool.description,
148
+ "file": tool.source_file,
149
+ "line": tool.line_number,
150
+ "rule": rule.rule_id,
151
+ })
152
+ break # one match per rule is enough
153
+
154
+ return findings