shadow-mcp 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shadow_mcp-0.1.0/.github/workflows/ci.yml +38 -0
- shadow_mcp-0.1.0/.github/workflows/publish.yml +36 -0
- shadow_mcp-0.1.0/.gitignore +15 -0
- shadow_mcp-0.1.0/PKG-INFO +154 -0
- shadow_mcp-0.1.0/README.md +140 -0
- shadow_mcp-0.1.0/docs/future-network-scan.md +48 -0
- shadow_mcp-0.1.0/docs/risk-model.md +132 -0
- shadow_mcp-0.1.0/pyproject.toml +52 -0
- shadow_mcp-0.1.0/server.json +22 -0
- shadow_mcp-0.1.0/src/shadow_mcp/__init__.py +9 -0
- shadow_mcp-0.1.0/src/shadow_mcp/cli.py +268 -0
- shadow_mcp-0.1.0/src/shadow_mcp/collectors/__init__.py +5 -0
- shadow_mcp-0.1.0/src/shadow_mcp/collectors/_common.py +120 -0
- shadow_mcp-0.1.0/src/shadow_mcp/collectors/base.py +72 -0
- shadow_mcp-0.1.0/src/shadow_mcp/collectors/claude_cli.py +100 -0
- shadow_mcp-0.1.0/src/shadow_mcp/collectors/claude_code.py +43 -0
- shadow_mcp-0.1.0/src/shadow_mcp/collectors/claude_desktop.py +27 -0
- shadow_mcp-0.1.0/src/shadow_mcp/collectors/codex.py +84 -0
- shadow_mcp-0.1.0/src/shadow_mcp/collectors/dxt.py +61 -0
- shadow_mcp-0.1.0/src/shadow_mcp/collectors/processes.py +269 -0
- shadow_mcp-0.1.0/src/shadow_mcp/collectors/project_mcp.py +68 -0
- shadow_mcp-0.1.0/src/shadow_mcp/config.py +59 -0
- shadow_mcp-0.1.0/src/shadow_mcp/grading/__init__.py +63 -0
- shadow_mcp-0.1.0/src/shadow_mcp/grading/combine.py +106 -0
- shadow_mcp-0.1.0/src/shadow_mcp/grading/mcpaudit.py +118 -0
- shadow_mcp-0.1.0/src/shadow_mcp/grading/mcpaudit_connect.py +82 -0
- shadow_mcp-0.1.0/src/shadow_mcp/grading/mcptrust.py +136 -0
- shadow_mcp-0.1.0/src/shadow_mcp/grading/mcptrust_compute.py +50 -0
- shadow_mcp-0.1.0/src/shadow_mcp/identity.py +166 -0
- shadow_mcp-0.1.0/src/shadow_mcp/inventory.py +118 -0
- shadow_mcp-0.1.0/src/shadow_mcp/mcp_server.py +113 -0
- shadow_mcp-0.1.0/src/shadow_mcp/models.py +189 -0
- shadow_mcp-0.1.0/src/shadow_mcp/redact.py +70 -0
- shadow_mcp-0.1.0/src/shadow_mcp/report.py +155 -0
- shadow_mcp-0.1.0/src/shadow_mcp/shadow.py +101 -0
- shadow_mcp-0.1.0/tests/conftest.py +99 -0
- shadow_mcp-0.1.0/tests/test_cli.py +95 -0
- shadow_mcp-0.1.0/tests/test_collectors.py +143 -0
- shadow_mcp-0.1.0/tests/test_grading.py +194 -0
- shadow_mcp-0.1.0/tests/test_identity.py +87 -0
- shadow_mcp-0.1.0/tests/test_integration_mcpaudit.py +37 -0
- shadow_mcp-0.1.0/tests/test_inventory.py +143 -0
- shadow_mcp-0.1.0/tests/test_mcp_server.py +85 -0
- shadow_mcp-0.1.0/tests/test_redact.py +44 -0
- shadow_mcp-0.1.0/tests/test_report.py +119 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
concurrency:
|
|
9
|
+
group: ci-${{ github.ref }}
|
|
10
|
+
cancel-in-progress: true
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
test:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v5
|
|
17
|
+
|
|
18
|
+
- name: Install uv
|
|
19
|
+
uses: astral-sh/setup-uv@v8.2.0
|
|
20
|
+
with:
|
|
21
|
+
enable-cache: true
|
|
22
|
+
|
|
23
|
+
- name: Set up Python
|
|
24
|
+
run: uv python install 3.11
|
|
25
|
+
|
|
26
|
+
# All deps (incl. the mcp-audits + mcp-trust grading engines) now resolve
|
|
27
|
+
# from PyPI, so a plain sync installs the full grading path. The engine-backed
|
|
28
|
+
# tests run for real here — config-only scans with no live targets. (The one
|
|
29
|
+
# real-spawn connected test is gated behind SHADOW_MCP_RUN_CONNECT and stays
|
|
30
|
+
# skipped.)
|
|
31
|
+
- name: Install
|
|
32
|
+
run: uv sync
|
|
33
|
+
|
|
34
|
+
- name: Lint
|
|
35
|
+
run: uv run --no-sync ruff check .
|
|
36
|
+
|
|
37
|
+
- name: Test
|
|
38
|
+
run: uv run --no-sync pytest
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build-and-publish:
|
|
10
|
+
name: Build and publish to PyPI
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
environment: pypi
|
|
13
|
+
permissions:
|
|
14
|
+
contents: read # checkout needs read on this (private) repo
|
|
15
|
+
id-token: write # OIDC trusted publishing
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v7
|
|
19
|
+
|
|
20
|
+
- uses: astral-sh/setup-uv@v7
|
|
21
|
+
with:
|
|
22
|
+
python-version: "3.11"
|
|
23
|
+
|
|
24
|
+
# Gate the release on the test suite: a broken tag must not publish. The
|
|
25
|
+
# grading engines resolve from PyPI now, so the full suite (engine-backed
|
|
26
|
+
# tests included) runs as the gate.
|
|
27
|
+
- name: Test gate
|
|
28
|
+
run: uv run pytest -q
|
|
29
|
+
|
|
30
|
+
- name: Build wheel and sdist
|
|
31
|
+
run: uv build
|
|
32
|
+
|
|
33
|
+
- name: Publish to PyPI
|
|
34
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
35
|
+
with:
|
|
36
|
+
packages-dir: dist/
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
__pycache__/
|
|
2
|
+
*.pyc
|
|
3
|
+
.venv/
|
|
4
|
+
# shadow-mcp is a published library: consumers (and `uvx`) resolve deps fresh
|
|
5
|
+
# from PyPI, so we don't pin a lock. The old committed lock encoded editable
|
|
6
|
+
# local path-sources (../MCPAudit) that only resolved on one machine anyway.
|
|
7
|
+
uv.lock
|
|
8
|
+
.pytest_cache/
|
|
9
|
+
*.egg-info/
|
|
10
|
+
dist/
|
|
11
|
+
build/
|
|
12
|
+
.DS_Store
|
|
13
|
+
# never commit a captured inventory of a real machine
|
|
14
|
+
/out/
|
|
15
|
+
*.inventory.json
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: shadow-mcp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Discover and risk-grade the MCP servers actually present on this machine. Local-first shadow-MCP inventory.
|
|
5
|
+
Author: Saagar Patel
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.11
|
|
8
|
+
Requires-Dist: mcp-audits>=2.2.3
|
|
9
|
+
Requires-Dist: mcp-trust>=0.1.0
|
|
10
|
+
Requires-Dist: mcp<2,>=1.27.0
|
|
11
|
+
Requires-Dist: pydantic>=2
|
|
12
|
+
Requires-Dist: rich>=13
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
|
15
|
+
# shadow-mcp
|
|
16
|
+
<!-- mcp-name: io.github.saagpatel/shadow-mcp -->
|
|
17
|
+
|
|
18
|
+
Discover and risk-grade the MCP servers actually present on **this** machine.
|
|
19
|
+
|
|
20
|
+
Most MCP security tooling assumes you already have a list of servers to audit.
|
|
21
|
+
On a real developer machine you don't: servers are scattered across Claude Code,
|
|
22
|
+
Codex, Claude Desktop, project-local `.mcp.json` files, DXT extensions, and live
|
|
23
|
+
processes that bind no port. shadow-mcp finds them first, then grades them.
|
|
24
|
+
|
|
25
|
+
This is the local-first answer to **OWASP MCP09:2025 — Shadow MCP Servers**.
|
|
26
|
+
|
|
27
|
+
## What it does
|
|
28
|
+
|
|
29
|
+
```
|
|
30
|
+
discover -> inventory -> risk-grade -> report
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
1. **Discover** (read-only) every place an MCP server is declared or running:
|
|
34
|
+
Claude Code (`~/.claude.json`, user + project scope), `claude mcp list`
|
|
35
|
+
(catches remote + plugin servers no file contains), Codex
|
|
36
|
+
(`~/.codex/config.toml` + profiles), project `.mcp.json`, Claude Desktop
|
|
37
|
+
config + DXT extension manifests, and the live process table.
|
|
38
|
+
2. **Inventory**: merge sightings into one entry per logical server, even when a
|
|
39
|
+
server appears under different names across hosts (`personal-ops` vs
|
|
40
|
+
`personal_ops`), tracking every provenance.
|
|
41
|
+
3. **Risk-grade** by **delegating** to the existing engines rather than
|
|
42
|
+
reimplementing them:
|
|
43
|
+
- [MCPAudit](../MCPAudit) for a 0-10 capability composite + injection findings
|
|
44
|
+
- [mcp-trust](../mcp-trust) for an authoritative A-F danger grade (when known)
|
|
45
|
+
- a thin local layer for the config-shaped OWASP dimensions the engines under-cover
|
|
46
|
+
(secrets/MCP01, supply-chain provenance/MCP04, transport exposure/MCP07).
|
|
47
|
+
4. **Report**: a ranked terminal table, a machine-readable JSON inventory, or
|
|
48
|
+
markdown — plus a **Shadow & attention** section for the deltas that matter
|
|
49
|
+
(running-but-unconfigured, broad blast radius, capable-but-ungraded).
|
|
50
|
+
|
|
51
|
+
The risk model and its OWASP mapping live in [docs/risk-model.md](docs/risk-model.md).
|
|
52
|
+
|
|
53
|
+
## Install
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
uv sync # installs deps incl. MCPAudit as a local editable engine
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
shadow-mcp grades against your local checkouts of MCPAudit (`../MCPAudit`) and
|
|
60
|
+
mcp-trust (`../mcp-trust/registry.db`). Override with `SHADOW_MCP_MCPTRUST_DB`
|
|
61
|
+
or `--registry-db`.
|
|
62
|
+
|
|
63
|
+
## Use
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
uv run shadow-mcp scan # full pipeline, terminal report
|
|
67
|
+
uv run shadow-mcp scan --json out.json # machine-readable inventory
|
|
68
|
+
uv run shadow-mcp scan --format markdown # markdown report
|
|
69
|
+
uv run shadow-mcp discover # inventory only, no grading
|
|
70
|
+
uv run shadow-mcp sources # per-collector counts
|
|
71
|
+
uv run shadow-mcp grade-missing # A-F for servers the registry hasn't scanned
|
|
72
|
+
uv run shadow-mcp deep-scan cost-tracker # connect to a server, grade its real tools
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Useful flags: `--no-processes` (skip the live process scan), `--no-cli` (skip
|
|
76
|
+
`claude mcp list`), `--no-mcpaudit` (inventory + mcp-trust only), `--home PATH`
|
|
77
|
+
(point discovery at a fixture tree).
|
|
78
|
+
|
|
79
|
+
### Static vs connected grading
|
|
80
|
+
|
|
81
|
+
By default grading is **static** (config-only): no server is spawned, so grades
|
|
82
|
+
reflect what's visible in the config. That's safe but coarse — a server's real
|
|
83
|
+
capability only shows once you connect and list its tools.
|
|
84
|
+
|
|
85
|
+
`shadow-mcp scan --connect` (or `deep-scan [names...]`) **spawns** each stdio
|
|
86
|
+
server and enumerates its real tools, delegating to MCPAudit's connected engine
|
|
87
|
+
for a capability grade that actually differentiates (a filesystem server jumps
|
|
88
|
+
from a static `A` to a connected `D`). This is **opt-in** because connecting
|
|
89
|
+
executes the server; remote endpoints are never spawned (that's the network-scan
|
|
90
|
+
tier), and a server that needs real secrets to start falls back to its static
|
|
91
|
+
grade.
|
|
92
|
+
|
|
93
|
+
## Development
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
uv sync # dev tools + grading engines (the default groups)
|
|
97
|
+
uv run pytest # full suite (61 + engine-backed tests)
|
|
98
|
+
uv run ruff check . # lint
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
The grading engines are an optional `engines` dependency-group, resolved to your
|
|
102
|
+
local checkouts of `../MCPAudit` and `../mcp-trust` via `[tool.uv.sources]`. The
|
|
103
|
+
tool degrades to discovery-only without them (engine-backed tests skip cleanly),
|
|
104
|
+
so CI installs without them:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
uv sync --no-group engines # discovery + local OWASP layer only (what CI runs)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Safety
|
|
111
|
+
|
|
112
|
+
- **Read-only discovery.** Collectors parse configs and list processes; nothing
|
|
113
|
+
they find is ever mutated. (`--connect`/`deep-scan` is the one path that
|
|
114
|
+
*executes* servers, and only when you explicitly ask.)
|
|
115
|
+
- **Secrets stay out.** We record env variable *names* (to flag secret-bearing
|
|
116
|
+
servers per MCP01) but never their values. A captured inventory still contains
|
|
117
|
+
real local paths and hostnames, so treat `*.inventory.json` as private (it is
|
|
118
|
+
git-ignored by default).
|
|
119
|
+
|
|
120
|
+
## Use as an MCP server
|
|
121
|
+
|
|
122
|
+
shadow-mcp can serve its own inventory tools as an MCP server so an agent can
|
|
123
|
+
query your local MCP surface without leaving the conversation.
|
|
124
|
+
|
|
125
|
+
### Tools
|
|
126
|
+
|
|
127
|
+
| Tool | Description |
|
|
128
|
+
|---|---|
|
|
129
|
+
| `scan_local` | Full pipeline (discover → inventory → grade → report). Returns JSON. |
|
|
130
|
+
| `discover_local` | Inventory every MCP server without grading. Returns JSON. |
|
|
131
|
+
| `deep_scan` | Grade only the named servers (static, no spawning). Accepts `names: list[str]`. Returns JSON. |
|
|
132
|
+
| `list_sources` | Per-collector source counts from a discover run. Returns JSON. |
|
|
133
|
+
|
|
134
|
+
### Run the server
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
# directly from a local checkout
|
|
138
|
+
shadow-mcp mcp-serve
|
|
139
|
+
|
|
140
|
+
# via uvx (once published to PyPI)
|
|
141
|
+
uvx shadow-mcp mcp-serve
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
**LOCAL only.** The MCP server never connects to hosted MCP endpoints — all
|
|
145
|
+
grading is static (config-based). `connect=False` is enforced unconditionally;
|
|
146
|
+
no server is ever spawned from an MCP tool call.
|
|
147
|
+
|
|
148
|
+
## Scope
|
|
149
|
+
|
|
150
|
+
This is the **local-first** tool: it inventories one machine from its configs
|
|
151
|
+
and processes. A later network-scan expansion (probing hosts/ports for remote
|
|
152
|
+
MCP endpoints, org-wide fleet inventory, typosquat-distance provenance checks)
|
|
153
|
+
is deliberately out of scope here — see the bottom of `docs/risk-model.md` and
|
|
154
|
+
the project notes for what that would add.
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# shadow-mcp
|
|
2
|
+
<!-- mcp-name: io.github.saagpatel/shadow-mcp -->
|
|
3
|
+
|
|
4
|
+
Discover and risk-grade the MCP servers actually present on **this** machine.
|
|
5
|
+
|
|
6
|
+
Most MCP security tooling assumes you already have a list of servers to audit.
|
|
7
|
+
On a real developer machine you don't: servers are scattered across Claude Code,
|
|
8
|
+
Codex, Claude Desktop, project-local `.mcp.json` files, DXT extensions, and live
|
|
9
|
+
processes that bind no port. shadow-mcp finds them first, then grades them.
|
|
10
|
+
|
|
11
|
+
This is the local-first answer to **OWASP MCP09:2025 — Shadow MCP Servers**.
|
|
12
|
+
|
|
13
|
+
## What it does
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
discover -> inventory -> risk-grade -> report
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
1. **Discover** (read-only) every place an MCP server is declared or running:
|
|
20
|
+
Claude Code (`~/.claude.json`, user + project scope), `claude mcp list`
|
|
21
|
+
(catches remote + plugin servers no file contains), Codex
|
|
22
|
+
(`~/.codex/config.toml` + profiles), project `.mcp.json`, Claude Desktop
|
|
23
|
+
config + DXT extension manifests, and the live process table.
|
|
24
|
+
2. **Inventory**: merge sightings into one entry per logical server, even when a
|
|
25
|
+
server appears under different names across hosts (`personal-ops` vs
|
|
26
|
+
`personal_ops`), tracking every provenance.
|
|
27
|
+
3. **Risk-grade** by **delegating** to the existing engines rather than
|
|
28
|
+
reimplementing them:
|
|
29
|
+
- [MCPAudit](../MCPAudit) for a 0-10 capability composite + injection findings
|
|
30
|
+
- [mcp-trust](../mcp-trust) for an authoritative A-F danger grade (when known)
|
|
31
|
+
- a thin local layer for the config-shaped OWASP dimensions the engines under-cover
|
|
32
|
+
(secrets/MCP01, supply-chain provenance/MCP04, transport exposure/MCP07).
|
|
33
|
+
4. **Report**: a ranked terminal table, a machine-readable JSON inventory, or
|
|
34
|
+
markdown — plus a **Shadow & attention** section for the deltas that matter
|
|
35
|
+
(running-but-unconfigured, broad blast radius, capable-but-ungraded).
|
|
36
|
+
|
|
37
|
+
The risk model and its OWASP mapping live in [docs/risk-model.md](docs/risk-model.md).
|
|
38
|
+
|
|
39
|
+
## Install
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
uv sync # installs deps incl. MCPAudit as a local editable engine
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
shadow-mcp grades against your local checkouts of MCPAudit (`../MCPAudit`) and
|
|
46
|
+
mcp-trust (`../mcp-trust/registry.db`). Override with `SHADOW_MCP_MCPTRUST_DB`
|
|
47
|
+
or `--registry-db`.
|
|
48
|
+
|
|
49
|
+
## Use
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
uv run shadow-mcp scan # full pipeline, terminal report
|
|
53
|
+
uv run shadow-mcp scan --json out.json # machine-readable inventory
|
|
54
|
+
uv run shadow-mcp scan --format markdown # markdown report
|
|
55
|
+
uv run shadow-mcp discover # inventory only, no grading
|
|
56
|
+
uv run shadow-mcp sources # per-collector counts
|
|
57
|
+
uv run shadow-mcp grade-missing # A-F for servers the registry hasn't scanned
|
|
58
|
+
uv run shadow-mcp deep-scan cost-tracker # connect to a server, grade its real tools
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Useful flags: `--no-processes` (skip the live process scan), `--no-cli` (skip
|
|
62
|
+
`claude mcp list`), `--no-mcpaudit` (inventory + mcp-trust only), `--home PATH`
|
|
63
|
+
(point discovery at a fixture tree).
|
|
64
|
+
|
|
65
|
+
### Static vs connected grading
|
|
66
|
+
|
|
67
|
+
By default grading is **static** (config-only): no server is spawned, so grades
|
|
68
|
+
reflect what's visible in the config. That's safe but coarse — a server's real
|
|
69
|
+
capability only shows once you connect and list its tools.
|
|
70
|
+
|
|
71
|
+
`shadow-mcp scan --connect` (or `deep-scan [names...]`) **spawns** each stdio
|
|
72
|
+
server and enumerates its real tools, delegating to MCPAudit's connected engine
|
|
73
|
+
for a capability grade that actually differentiates (a filesystem server jumps
|
|
74
|
+
from a static `A` to a connected `D`). This is **opt-in** because connecting
|
|
75
|
+
executes the server; remote endpoints are never spawned (that's the network-scan
|
|
76
|
+
tier), and a server that needs real secrets to start falls back to its static
|
|
77
|
+
grade.
|
|
78
|
+
|
|
79
|
+
## Development
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
uv sync # dev tools + grading engines (the default groups)
|
|
83
|
+
uv run pytest # full suite (61 + engine-backed tests)
|
|
84
|
+
uv run ruff check . # lint
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
The grading engines are an optional `engines` dependency-group, resolved to your
|
|
88
|
+
local checkouts of `../MCPAudit` and `../mcp-trust` via `[tool.uv.sources]`. The
|
|
89
|
+
tool degrades to discovery-only without them (engine-backed tests skip cleanly),
|
|
90
|
+
so CI installs without them:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
uv sync --no-group engines # discovery + local OWASP layer only (what CI runs)
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Safety
|
|
97
|
+
|
|
98
|
+
- **Read-only discovery.** Collectors parse configs and list processes; nothing
|
|
99
|
+
they find is ever mutated. (`--connect`/`deep-scan` is the one path that
|
|
100
|
+
*executes* servers, and only when you explicitly ask.)
|
|
101
|
+
- **Secrets stay out.** We record env variable *names* (to flag secret-bearing
|
|
102
|
+
servers per MCP01) but never their values. A captured inventory still contains
|
|
103
|
+
real local paths and hostnames, so treat `*.inventory.json` as private (it is
|
|
104
|
+
git-ignored by default).
|
|
105
|
+
|
|
106
|
+
## Use as an MCP server
|
|
107
|
+
|
|
108
|
+
shadow-mcp can serve its own inventory tools as an MCP server so an agent can
|
|
109
|
+
query your local MCP surface without leaving the conversation.
|
|
110
|
+
|
|
111
|
+
### Tools
|
|
112
|
+
|
|
113
|
+
| Tool | Description |
|
|
114
|
+
|---|---|
|
|
115
|
+
| `scan_local` | Full pipeline (discover → inventory → grade → report). Returns JSON. |
|
|
116
|
+
| `discover_local` | Inventory every MCP server without grading. Returns JSON. |
|
|
117
|
+
| `deep_scan` | Grade only the named servers (static, no spawning). Accepts `names: list[str]`. Returns JSON. |
|
|
118
|
+
| `list_sources` | Per-collector source counts from a discover run. Returns JSON. |
|
|
119
|
+
|
|
120
|
+
### Run the server
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
# directly from a local checkout
|
|
124
|
+
shadow-mcp mcp-serve
|
|
125
|
+
|
|
126
|
+
# via uvx (once published to PyPI)
|
|
127
|
+
uvx shadow-mcp mcp-serve
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
**LOCAL only.** The MCP server never connects to hosted MCP endpoints — all
|
|
131
|
+
grading is static (config-based). `connect=False` is enforced unconditionally;
|
|
132
|
+
no server is ever spawned from an MCP tool call.
|
|
133
|
+
|
|
134
|
+
## Scope
|
|
135
|
+
|
|
136
|
+
This is the **local-first** tool: it inventories one machine from its configs
|
|
137
|
+
and processes. A later network-scan expansion (probing hosts/ports for remote
|
|
138
|
+
MCP endpoints, org-wide fleet inventory, typosquat-distance provenance checks)
|
|
139
|
+
is deliberately out of scope here — see the bottom of `docs/risk-model.md` and
|
|
140
|
+
the project notes for what that would add.
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Future: the network-scan expansion
|
|
2
|
+
|
|
3
|
+
shadow-mcp is deliberately **local-first**. It inventories one machine by reading
|
|
4
|
+
its configs and process table, and it is immediately useful and dogfoodable for
|
|
5
|
+
exactly that. This note records what a later **network-scan** tier would add, so
|
|
6
|
+
the boundary is a decision and not an accident. None of this is built yet.
|
|
7
|
+
|
|
8
|
+
## What local-first already covers
|
|
9
|
+
|
|
10
|
+
- Every config host on this machine (Claude Code, Codex, Claude Desktop, DXT,
|
|
11
|
+
project `.mcp.json`) plus the live process table.
|
|
12
|
+
- Per-server capability grade (MCPAudit), A-F danger grade (mcp-trust), and the
|
|
13
|
+
config-shaped OWASP layer: secrets (MCP01), transport exposure (MCP07),
|
|
14
|
+
blast radius and shadow deltas (MCP09).
|
|
15
|
+
|
|
16
|
+
## What a network-scan tier would add
|
|
17
|
+
|
|
18
|
+
1. **Remote/host discovery (the org dimension).** Probe a host list or CIDR
|
|
19
|
+
range for listening MCP endpoints (HTTP/SSE) that no local config references.
|
|
20
|
+
This is the true "shadow IT" inventory across many machines, vs one machine's
|
|
21
|
+
configs. Needs: a target list, an async port/endpoint prober, an MCP
|
|
22
|
+
handshake probe (`initialize`) to confirm a port is really MCP.
|
|
23
|
+
2. **Live capability enumeration.** Connect to each server and list its actual
|
|
24
|
+
`tools` / `resources` / `prompts`, instead of grading from the launch config.
|
|
25
|
+
This unlocks the *connected* half of MCPAudit (drift, escalation, provenance,
|
|
26
|
+
integrity, tool-poisoning in real tool descriptions) that the static
|
|
27
|
+
config-only path cannot see. Needs: an MCP client, a connection budget, and a
|
|
28
|
+
safety model (connecting executes the server).
|
|
29
|
+
3. **Supply-chain / provenance depth (MCP04).** Resolve each package to its
|
|
30
|
+
registry (npm/pypi), compute typosquat distance against known-good names,
|
|
31
|
+
check publisher reputation and version pinning, and flag rug-pull risk
|
|
32
|
+
(a tool whose description changed since last seen). Needs: registry API
|
|
33
|
+
access and a baseline store of previously-seen tool descriptions.
|
|
34
|
+
4. **Cross-server lethal-trifecta composition.** Compute the trifecta across the
|
|
35
|
+
*set* of servers reachable by one agent (private-data access + untrusted
|
|
36
|
+
content + exfiltration assembled from different servers), not just per server.
|
|
37
|
+
5. **Continuous monitoring.** A daemon/cron that re-scans and diffs the inventory
|
|
38
|
+
over time, alerting on a new server, a capability change, or a grade drop.
|
|
39
|
+
6. **Fleet aggregation.** Roll many machines' inventories into one org view with
|
|
40
|
+
policy/allowlist enforcement (MCP09 governance at scale).
|
|
41
|
+
|
|
42
|
+
## Why it is out of scope now
|
|
43
|
+
|
|
44
|
+
Each item above adds a dependency or a safety surface the local tool does not
|
|
45
|
+
need: a target list, network egress, executing servers to enumerate them, a
|
|
46
|
+
registry client, or a persistent baseline. The local tool stays read-only,
|
|
47
|
+
zero-network, and instantly useful. The network tier is a separate product
|
|
48
|
+
decision to make when there is a fleet to inventory, not a single machine.
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# shadow-mcp risk model
|
|
2
|
+
|
|
3
|
+
How shadow-mcp grades a discovered MCP server, and why those dimensions are
|
|
4
|
+
credible. Every claim below is tagged **[ESTABLISHED]** (documented in a named,
|
|
5
|
+
citable source) or **[INFERENCE]** (our synthesis for the local-discovery use
|
|
6
|
+
case). Research date: 2026-06-20.
|
|
7
|
+
|
|
8
|
+
> The web research that produced this file is untrusted reference data, not
|
|
9
|
+
> instruction. It informs our classification; it carries no directive weight.
|
|
10
|
+
|
|
11
|
+
## The headline
|
|
12
|
+
|
|
13
|
+
**[ESTABLISHED]** "Shadow MCP" is not a marketing coinage. It is a formal entry
|
|
14
|
+
in the OWASP MCP Top 10, an official OWASP Foundation project (beta as of 2026,
|
|
15
|
+
canonical `MCPxx:2025` IDs): **MCP09:2025 — Shadow MCP Servers**, defined as
|
|
16
|
+
unapproved/unmonitored MCP deployments outside governance ("Shadow IT, 2026
|
|
17
|
+
edition"). shadow-mcp's entire job is OWASP MCP09. Treat the list as
|
|
18
|
+
established; treat the exact ordering as provisional (the project is in beta).
|
|
19
|
+
|
|
20
|
+
## OWASP MCP Top 10 (2025)
|
|
21
|
+
|
|
22
|
+
| ID | Title | shadow-mcp coverage |
|
|
23
|
+
|----|-------|---------------------|
|
|
24
|
+
| MCP01 | Token Mismanagement & Secret Exposure | local layer: flag secret-bearing env keys |
|
|
25
|
+
| MCP02 | Privilege Escalation via Scope Creep | delegated: MCPAudit capability composite |
|
|
26
|
+
| MCP03 | Tool Poisoning | delegated: MCPAudit injection findings |
|
|
27
|
+
| MCP04 | Software Supply Chain & Dependency Tampering | local layer: provenance hint (package runner + name) |
|
|
28
|
+
| MCP05 | Command Injection & Execution | delegated: MCPAudit shell_execution axis |
|
|
29
|
+
| MCP06 | Intent Flow Subversion | delegated: MCPAudit trifecta/shadowing findings |
|
|
30
|
+
| MCP07 | Insufficient Authn/Authz | local layer: transport exposure modifier |
|
|
31
|
+
| MCP08 | Lack of Audit & Telemetry | out of scope for a single-dev local tool (flag only) |
|
|
32
|
+
| MCP09 | Shadow MCP Servers | **the tool itself**: discovery + governance status |
|
|
33
|
+
| MCP10 | Context Injection & Over-Sharing | delegated: MCPAudit injection findings |
|
|
34
|
+
|
|
35
|
+
## Division of labor
|
|
36
|
+
|
|
37
|
+
shadow-mcp does not reimplement grading. It delegates the runtime-capability
|
|
38
|
+
surface to the existing engines and adds a thin local composition layer for the
|
|
39
|
+
config-shaped dimensions those engines do not see.
|
|
40
|
+
|
|
41
|
+
**Delegated (the engines already do this well):**
|
|
42
|
+
- **MCPAudit** grades a 0-10 capability composite over `{file_read, file_write,
|
|
43
|
+
network, shell_execution, destructive, exfiltration}` plus injection / SSRF /
|
|
44
|
+
trifecta findings. Covers MCP02, MCP03, MCP05, MCP06, MCP10 and the capability
|
|
45
|
+
half of the lethal trifecta. **[ESTABLISHED]**
|
|
46
|
+
- **mcp-trust** gives an A-F danger grade weighting shell + file access highest.
|
|
47
|
+
Authoritative letter grade when the server is in its registry. **[ESTABLISHED]**
|
|
48
|
+
|
|
49
|
+
**Local composition layer (what the engines under-cover, and what a local tool
|
|
50
|
+
is best placed to inspect):**
|
|
51
|
+
1. **Secret / token handling (MCP01).** **[INFERENCE]** The engines grade tool
|
|
52
|
+
*capability*, not launch *config*. shadow-mcp reads the server's declared env
|
|
53
|
+
var **names** (never values) and flags secret-bearing keys. OWASP ranks this #1.
|
|
54
|
+
2. **Provenance / supply chain (MCP04).** **[INFERENCE]** Neither engine grades
|
|
55
|
+
where the package came from. shadow-mcp records the package-runner + package
|
|
56
|
+
name as a provenance hint (a full typosquat-distance check is future work; see
|
|
57
|
+
the network-scan expansion note).
|
|
58
|
+
3. **Transport exposure (MCP07).** **[INFERENCE]** stdio (local-only, pipe-attached)
|
|
59
|
+
vs HTTP/SSE (network-reachable, confused-deputy + session-hijack surface)
|
|
60
|
+
materially changes blast radius. Used as a risk modifier, not a base score.
|
|
61
|
+
4. **Governance / inventory status (MCP09).** **[ESTABLISHED]** the category;
|
|
62
|
+
**[INFERENCE]** that it should be surfaced explicitly. shadow-mcp reports
|
|
63
|
+
blast radius (how many hosts declare a server) and the shadow deltas
|
|
64
|
+
(running-but-unconfigured, configured-everywhere, capable-but-ungraded).
|
|
65
|
+
|
|
66
|
+
## The lethal trifecta
|
|
67
|
+
|
|
68
|
+
**[ESTABLISHED]** Origin: Simon Willison, 2025-06-16. A system is acutely
|
|
69
|
+
dangerous when it simultaneously has private-data access + untrusted-content
|
|
70
|
+
exposure + an exfiltration channel. **[INFERENCE]** For shadow-mcp the important
|
|
71
|
+
adaptation is that the trifecta is frequently assembled across *several* servers
|
|
72
|
+
on one client, not within one server. MCPAudit emits per-server trifecta
|
|
73
|
+
findings; composing the trifecta across the installed set is future work.
|
|
74
|
+
|
|
75
|
+
## How the band is computed
|
|
76
|
+
|
|
77
|
+
The sortable band (critical / high / medium / low / unknown) is derived from the
|
|
78
|
+
MCPAudit composite as the base, then adjusted:
|
|
79
|
+
- base: composite >= 7 critical, >= 5 high, >= 3.5 medium, > 0 low, else unknown
|
|
80
|
+
- mcp-trust grade of F or D raises the band by one step (authoritative danger signal)
|
|
81
|
+
- an HTTP/SSE transport raises a low/medium band by one step (MCP07 exposure)
|
|
82
|
+
- the assessment's `reasons` cite the OWASP ID behind each contribution
|
|
83
|
+
|
|
84
|
+
This keeps the numeric grade delegated and explainable while letting the local
|
|
85
|
+
layer (secrets, transport, provenance, governance) shape the final verdict with
|
|
86
|
+
cited justification.
|
|
87
|
+
|
|
88
|
+
## Registry vs computed A-F grades
|
|
89
|
+
|
|
90
|
+
mcp-trust only stores grades for servers someone has already scanned and seeded
|
|
91
|
+
(a handful). Every other discovered server gets a **computed** A-F letter: we
|
|
92
|
+
feed MCPAudit's static dimensions into mcp-trust's own `grade()` (its danger
|
|
93
|
+
weighting + critical cap), so the letter comes from mcp-trust's logic, not a
|
|
94
|
+
reimplementation, and without writing to its database. Computed grades are
|
|
95
|
+
marked with a trailing `~` and flagged `computed: true`.
|
|
96
|
+
|
|
97
|
+
**Honest caveat (the transparency axis).** A computed grade is derived from
|
|
98
|
+
*static config only* (no connection, no live tool enumeration), so for stdio
|
|
99
|
+
servers launched via wrappers/npx, MCPAudit sees little and most letters land at
|
|
100
|
+
**A**. Per mcp-trust's own transparency model, that is `transparency: low` —
|
|
101
|
+
"cannot verify safe", **not** "verified safe". So a computed `A~` means "no
|
|
102
|
+
capability risk detectable from config alone." The **band** (not the letter)
|
|
103
|
+
carries the differentiated signal, because the band folds in the local OWASP
|
|
104
|
+
layer (transport/MCP07, secrets/MCP01, blast radius/MCP09).
|
|
105
|
+
|
|
106
|
+
**Connected grading (`--connect` / `deep-scan`).** To populate the capability
|
|
107
|
+
dimensions for real, shadow-mcp can spawn a stdio server and enumerate its tools,
|
|
108
|
+
delegating to MCPAudit's connected engine. This moves a server off the static
|
|
109
|
+
`A` floor (a filesystem server connected-grades to `D`), so the computed letter
|
|
110
|
+
becomes meaningful. It is opt-in because connecting executes the server; remote
|
|
111
|
+
endpoints are never spawned (that is the network-scan tier), and a server that
|
|
112
|
+
needs real secrets to start falls back to its static grade. A connected grade is
|
|
113
|
+
flagged `connected: true` with the tool count in its reason line.
|
|
114
|
+
|
|
115
|
+
## Scale context (vendor-sourced, treat as directional)
|
|
116
|
+
|
|
117
|
+
**[ESTABLISHED, vendor-sourced]** Wallarm 2026 reported 315 MCP-related vulns in
|
|
118
|
+
2025; Palo Alto Unit 42 reported a 78.3% attack-success rate against one
|
|
119
|
+
compromised server among five connected; CVE-2025-49596 (CVSS 9.4) was
|
|
120
|
+
unauthenticated RCE via MCP Inspector. Much of the scale framing comes from
|
|
121
|
+
vendors selling MCP-security products, so treat specific percentages as
|
|
122
|
+
directional rather than precise.
|
|
123
|
+
|
|
124
|
+
## Citations
|
|
125
|
+
|
|
126
|
+
- OWASP MCP Top 10: https://owasp.org/www-project-mcp-top-10/
|
|
127
|
+
- OWASP MCP09 Shadow MCP Servers: https://owasp.org/www-project-mcp-top-10/2025/MCP09-2025%E2%80%93Shadow-MCP-Servers
|
|
128
|
+
- OWASP MCP Security Cheat Sheet: https://cheatsheetseries.owasp.org/cheatsheets/MCP_Security_Cheat_Sheet.html
|
|
129
|
+
- Lethal trifecta (Willison, 2025-06-16): https://simonwillison.net/2025/Jun/16/the-lethal-trifecta/
|
|
130
|
+
- MCP Spec security best practices (2025-06-18): https://modelcontextprotocol.io/specification/2025-06-18/basic/security_best_practices
|
|
131
|
+
- Invariant Labs, tool poisoning: https://invariantlabs.ai/blog/mcp-security-notification-tool-poisoning-attacks
|
|
132
|
+
- Mend.io, Shadow MCP: https://www.mend.io/blog/shadow-mcp-unauthorized-ai-connectivity-in-your-codebase/
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "shadow-mcp"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Discover and risk-grade the MCP servers actually present on this machine. Local-first shadow-MCP inventory."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
authors = [{ name = "Saagar Patel" }]
|
|
9
|
+
dependencies = [
|
|
10
|
+
"pydantic>=2",
|
|
11
|
+
"rich>=13",
|
|
12
|
+
"mcp>=1.27.0,<2",
|
|
13
|
+
# Grading engines, resolved from PyPI. mcp-audits provides the static
|
|
14
|
+
# capability analyzer (mcp_audit.api.scan_config_only_dict); mcp-trust
|
|
15
|
+
# provides the A-F danger-grade logic (mcp_trust.core.grading.grade). Both
|
|
16
|
+
# are imported lazily (try/except ImportError) so a partial install degrades
|
|
17
|
+
# to discovery-only rather than crashing — but a normal `uvx shadow-mcp`
|
|
18
|
+
# install pulls them from PyPI and grades fully. The mcp cap stays <2 so a
|
|
19
|
+
# fresh uvx resolve can't pull a v2 alpha that breaks the FastMCP API.
|
|
20
|
+
"mcp-audits>=2.2.3",
|
|
21
|
+
"mcp-trust>=0.1.0",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[dependency-groups]
|
|
25
|
+
dev = ["pytest>=8", "ruff>=0.6"]
|
|
26
|
+
|
|
27
|
+
[project.scripts]
|
|
28
|
+
shadow-mcp = "shadow_mcp.cli:main"
|
|
29
|
+
|
|
30
|
+
[build-system]
|
|
31
|
+
requires = ["hatchling"]
|
|
32
|
+
build-backend = "hatchling.build"
|
|
33
|
+
|
|
34
|
+
[tool.hatch.build.targets.wheel]
|
|
35
|
+
packages = ["src/shadow_mcp"]
|
|
36
|
+
|
|
37
|
+
[tool.pytest.ini_options]
|
|
38
|
+
testpaths = ["tests"]
|
|
39
|
+
addopts = "-q"
|
|
40
|
+
|
|
41
|
+
[tool.ruff]
|
|
42
|
+
line-length = 100
|
|
43
|
+
target-version = "py311"
|
|
44
|
+
src = ["src", "tests"]
|
|
45
|
+
|
|
46
|
+
[tool.ruff.lint]
|
|
47
|
+
select = ["E", "F", "I", "UP", "B"]
|
|
48
|
+
|
|
49
|
+
[tool.ruff.lint.per-file-ignores]
|
|
50
|
+
# Test fixtures reproduce real `ps`/CLI output lines verbatim; wrapping them
|
|
51
|
+
# would corrupt the data under test.
|
|
52
|
+
"tests/**" = ["E501"]
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json",
|
|
3
|
+
"name": "io.github.saagpatel/shadow-mcp",
|
|
4
|
+
"title": "shadow-mcp",
|
|
5
|
+
"description": "Local MCP server that inventories and risk-grades MCP servers configured on this machine.",
|
|
6
|
+
"repository": {
|
|
7
|
+
"url": "https://github.com/saagpatel/shadow-mcp",
|
|
8
|
+
"source": "github"
|
|
9
|
+
},
|
|
10
|
+
"version": "0.1.0",
|
|
11
|
+
"packages": [
|
|
12
|
+
{
|
|
13
|
+
"registryType": "pypi",
|
|
14
|
+
"registryBaseUrl": "https://pypi.org",
|
|
15
|
+
"identifier": "shadow-mcp",
|
|
16
|
+
"version": "0.1.0",
|
|
17
|
+
"runtimeHint": "uvx",
|
|
18
|
+
"transport": { "type": "stdio" },
|
|
19
|
+
"packageArguments": [{ "type": "positional", "value": "mcp-serve" }]
|
|
20
|
+
}
|
|
21
|
+
]
|
|
22
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""shadow-mcp: discover and risk-grade the MCP servers present on this machine.
|
|
2
|
+
|
|
3
|
+
Discovery is strictly read-only: collectors parse configs and list processes,
|
|
4
|
+
and never mutate anything they find. Risk-grading delegates to the existing
|
|
5
|
+
engines (MCPAudit for a 0-10 capability composite, mcp-trust for an A-F danger
|
|
6
|
+
grade) rather than reimplementing them.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__version__ = "0.1.0"
|