mcp-warden-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_warden/__init__.py +25 -0
- mcp_warden/capture.py +197 -0
- mcp_warden/check_core.py +98 -0
- mcp_warden/checks.py +163 -0
- mcp_warden/checks_secret.py +129 -0
- mcp_warden/checks_supply.py +146 -0
- mcp_warden/cli.py +367 -0
- mcp_warden/cli_diff.py +245 -0
- mcp_warden/cli_guard.py +301 -0
- mcp_warden/cli_lock.py +124 -0
- mcp_warden/cli_sign.py +304 -0
- mcp_warden/drift.py +272 -0
- mcp_warden/emit_res.py +102 -0
- mcp_warden/emitters.py +169 -0
- mcp_warden/framing.py +338 -0
- mcp_warden/guard.py +397 -0
- mcp_warden/guard_banner.py +211 -0
- mcp_warden/guard_io.py +70 -0
- mcp_warden/guard_lifecycle.py +320 -0
- mcp_warden/guard_list_gate.py +95 -0
- mcp_warden/guard_loop.py +332 -0
- mcp_warden/guard_result.py +249 -0
- mcp_warden/guard_strict.py +159 -0
- mcp_warden/hashing.py +102 -0
- mcp_warden/inspector.py +169 -0
- mcp_warden/lockfile.py +299 -0
- mcp_warden/models.py +300 -0
- mcp_warden/net_rules.py +80 -0
- mcp_warden/policy_eval.py +264 -0
- mcp_warden/policy_model.py +261 -0
- mcp_warden/precommit.py +259 -0
- mcp_warden/provenance.py +199 -0
- mcp_warden/redact.py +52 -0
- mcp_warden/res_catalog.py +198 -0
- mcp_warden/res_net.py +227 -0
- mcp_warden/res_rules.py +176 -0
- mcp_warden/result_inspection.py +201 -0
- mcp_warden/schema_diff.py +548 -0
- mcp_warden/signing.py +291 -0
- mcp_warden/tokenizer.py +199 -0
- mcp_warden/wire_block.py +258 -0
- mcp_warden_cli-1.0.0.dist-info/METADATA +517 -0
- mcp_warden_cli-1.0.0.dist-info/RECORD +46 -0
- mcp_warden_cli-1.0.0.dist-info/WHEEL +4 -0
- mcp_warden_cli-1.0.0.dist-info/entry_points.txt +3 -0
- mcp_warden_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
mcp_warden/__init__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""mcp-warden — CI-first MCP supply-chain integrity gate.
|
|
2
|
+
|
|
3
|
+
mcp-warden pins and verifies the *declared* tool/resource/prompt surface of an
|
|
4
|
+
MCP server (the ``(name, description, inputSchema)`` metadata returned by
|
|
5
|
+
``tools/list`` / ``resources/list`` / ``prompts/list``), then fails CI when that
|
|
6
|
+
surface drifts from an approved baseline. It operates on **definitions**, never
|
|
7
|
+
on runtime tool behavior or tool results. See ``docs/THREAT_MODEL.md``.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
__version__ = "1.0.0"
|
|
11
|
+
#: Lock schema version. Bumped 2 → 3 for #29 (in-document ``$ref`` resolution in
|
|
12
|
+
#: ``schema_diff.extract_skeleton``). Following refs changes the skeleton of any
|
|
13
|
+
#: ref-using tool → its ``entry_digest`` and the ``overall_digest`` (which embeds
|
|
14
|
+
#: ``schema_version``, lockfile.py:167). The bump makes that digest change a
|
|
15
|
+
#: declared schema-format migration rather than a silent surface change; drift.py
|
|
16
|
+
#: emits an additive ``schema-version-migrated`` advisory alongside (never in
|
|
17
|
+
#: place of) the ``unapproved-change`` finding so re-attestation is required.
|
|
18
|
+
SCHEMA_VERSION = 3
|
|
19
|
+
#: Provenance-block version (#19). Lives INSIDE the ``pin`` block, OUTSIDE the
|
|
20
|
+
#: ``overall_digest`` payload, so it can evolve for #16/#23 without changing any
|
|
21
|
+
#: server's digest. Deliberately distinct from ``SCHEMA_VERSION`` (which is in
|
|
22
|
+
#: the digest payload — bumping that would falsely trip drift on v2 baselines).
|
|
23
|
+
PROVENANCE_VERSION = 1
|
|
24
|
+
|
|
25
|
+
__all__ = ["__version__", "SCHEMA_VERSION", "PROVENANCE_VERSION"]
|
mcp_warden/capture.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""MCP stdio capture client.
|
|
2
|
+
|
|
3
|
+
Spawns the target MCP server **over stdio as an argv array, never via a shell**
|
|
4
|
+
(WARDEN_LOCK_SCHEMA.md §10.4), runs ``initialize`` + ``tools/list`` +
|
|
5
|
+
``resources/list`` + ``prompts/list``, and captures the declared surface.
|
|
6
|
+
|
|
7
|
+
A server that hangs, crashes, or exits nonzero must produce a clear
|
|
8
|
+
``CaptureError``, not a traceback.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import anyio
|
|
17
|
+
from mcp import ClientSession, StdioServerParameters
|
|
18
|
+
from mcp.client.stdio import stdio_client
|
|
19
|
+
|
|
20
|
+
from .models import (
|
|
21
|
+
CapturedPrompt,
|
|
22
|
+
CapturedResource,
|
|
23
|
+
CapturedSurface,
|
|
24
|
+
CapturedTool,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger("mcp_warden.capture")
|
|
28
|
+
|
|
29
|
+
#: Hard wall-clock timeout for the entire capture handshake (seconds).
|
|
30
|
+
DEFAULT_TIMEOUT_S = 30.0
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class CaptureError(Exception):
|
|
34
|
+
"""Raised when the MCP server cannot be captured cleanly.
|
|
35
|
+
|
|
36
|
+
Carries a human-readable message suitable for CLI display; never a raw
|
|
37
|
+
traceback from the child process.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _model_dump(obj: Any) -> dict[str, Any]:
|
|
42
|
+
"""Best-effort dict view of an MCP SDK model across pydantic versions."""
|
|
43
|
+
if hasattr(obj, "model_dump"):
|
|
44
|
+
return obj.model_dump() # pydantic v2
|
|
45
|
+
if hasattr(obj, "dict"):
|
|
46
|
+
return obj.dict() # pydantic v1 fallback
|
|
47
|
+
return dict(obj)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
async def _capture_async(command: str, args: list[str], timeout_s: float) -> CapturedSurface:
|
|
51
|
+
"""Inner async capture; wrapped with a timeout by :func:`capture_surface`."""
|
|
52
|
+
# StdioServerParameters passes command+args as an argv array to the OS; the
|
|
53
|
+
# MCP SDK does NOT spawn through a shell. This is the §10.4 guarantee.
|
|
54
|
+
params = StdioServerParameters(command=command, args=list(args))
|
|
55
|
+
|
|
56
|
+
async with stdio_client(params) as (read_stream, write_stream):
|
|
57
|
+
async with ClientSession(read_stream, write_stream) as session:
|
|
58
|
+
init_result = await session.initialize()
|
|
59
|
+
protocol_version = str(getattr(init_result, "protocolVersion", "") or "")
|
|
60
|
+
|
|
61
|
+
tools = await _list_tools(session)
|
|
62
|
+
resources = await _list_resources(session)
|
|
63
|
+
prompts = await _list_prompts(session)
|
|
64
|
+
|
|
65
|
+
return CapturedSurface(
|
|
66
|
+
command=command,
|
|
67
|
+
args=list(args),
|
|
68
|
+
protocol_version=protocol_version,
|
|
69
|
+
tools=tools,
|
|
70
|
+
resources=resources,
|
|
71
|
+
prompts=prompts,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
async def _list_tools(session: ClientSession) -> list[CapturedTool]:
|
|
76
|
+
"""Run ``tools/list`` and normalize results. Empty list if unsupported."""
|
|
77
|
+
try:
|
|
78
|
+
result = await session.list_tools()
|
|
79
|
+
except Exception as exc: # server may not declare the tools capability
|
|
80
|
+
logger.info("tools/list unavailable: %s", exc)
|
|
81
|
+
return []
|
|
82
|
+
out: list[CapturedTool] = []
|
|
83
|
+
for tool in getattr(result, "tools", []) or []:
|
|
84
|
+
data = _model_dump(tool)
|
|
85
|
+
out.append(
|
|
86
|
+
CapturedTool(
|
|
87
|
+
name=str(data.get("name", "")),
|
|
88
|
+
description=data.get("description"),
|
|
89
|
+
input_schema=data.get("inputSchema"),
|
|
90
|
+
)
|
|
91
|
+
)
|
|
92
|
+
return out
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
async def _list_resources(session: ClientSession) -> list[CapturedResource]:
|
|
96
|
+
"""Run ``resources/list`` and normalize results. Empty list if unsupported."""
|
|
97
|
+
try:
|
|
98
|
+
result = await session.list_resources()
|
|
99
|
+
except Exception as exc:
|
|
100
|
+
logger.info("resources/list unavailable: %s", exc)
|
|
101
|
+
return []
|
|
102
|
+
out: list[CapturedResource] = []
|
|
103
|
+
for res in getattr(result, "resources", []) or []:
|
|
104
|
+
data = _model_dump(res)
|
|
105
|
+
out.append(
|
|
106
|
+
CapturedResource(
|
|
107
|
+
uri=str(data.get("uri", "")),
|
|
108
|
+
name=data.get("name"),
|
|
109
|
+
description=data.get("description"),
|
|
110
|
+
mime_type=data.get("mimeType"),
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
return out
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
async def _list_prompts(session: ClientSession) -> list[CapturedPrompt]:
|
|
117
|
+
"""Run ``prompts/list`` and normalize results. Empty list if unsupported."""
|
|
118
|
+
try:
|
|
119
|
+
result = await session.list_prompts()
|
|
120
|
+
except Exception as exc:
|
|
121
|
+
logger.info("prompts/list unavailable: %s", exc)
|
|
122
|
+
return []
|
|
123
|
+
out: list[CapturedPrompt] = []
|
|
124
|
+
for prompt in getattr(result, "prompts", []) or []:
|
|
125
|
+
data = _model_dump(prompt)
|
|
126
|
+
arguments = data.get("arguments")
|
|
127
|
+
norm_args: list[dict[str, Any]] | None = None
|
|
128
|
+
if isinstance(arguments, list):
|
|
129
|
+
norm_args = [a if isinstance(a, dict) else _model_dump(a) for a in arguments]
|
|
130
|
+
out.append(
|
|
131
|
+
CapturedPrompt(
|
|
132
|
+
name=str(data.get("name", "")),
|
|
133
|
+
description=data.get("description"),
|
|
134
|
+
arguments=norm_args,
|
|
135
|
+
)
|
|
136
|
+
)
|
|
137
|
+
return out
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
async def capture_surface(
|
|
141
|
+
command: str,
|
|
142
|
+
args: list[str],
|
|
143
|
+
timeout_s: float = DEFAULT_TIMEOUT_S,
|
|
144
|
+
) -> CapturedSurface:
|
|
145
|
+
"""Spawn an MCP server over stdio and capture its declared surface.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
command: ``argv[0]`` of the server launch (no shell expansion performed).
|
|
149
|
+
args: Remaining argv, order preserved.
|
|
150
|
+
timeout_s: Wall-clock timeout for the whole handshake.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
The :class:`CapturedSurface` with tools/resources/prompts.
|
|
154
|
+
|
|
155
|
+
Raises:
|
|
156
|
+
CaptureError: If the server hangs (timeout), crashes, exits nonzero, or
|
|
157
|
+
the MCP handshake fails. The message is CLI-safe.
|
|
158
|
+
"""
|
|
159
|
+
logger.debug("spawning MCP server: command=%r args=%r", command, args)
|
|
160
|
+
try:
|
|
161
|
+
with anyio.fail_after(timeout_s):
|
|
162
|
+
return await _capture_async(command, args, timeout_s)
|
|
163
|
+
except TimeoutError as exc:
|
|
164
|
+
raise CaptureError(
|
|
165
|
+
f"MCP server '{command}' did not complete the handshake within {timeout_s:.0f}s "
|
|
166
|
+
f"(it may be hung or waiting on input)."
|
|
167
|
+
) from exc
|
|
168
|
+
except CaptureError:
|
|
169
|
+
raise
|
|
170
|
+
except FileNotFoundError as exc:
|
|
171
|
+
raise CaptureError(f"MCP server command not found: '{command}' ({exc}).") from exc
|
|
172
|
+
except Exception as exc:
|
|
173
|
+
# Covers nonzero exit, broken pipe, protocol error, decode failure, etc.
|
|
174
|
+
raise CaptureError(
|
|
175
|
+
f"Failed to capture MCP server '{command}': {type(exc).__name__}: {exc}"
|
|
176
|
+
) from exc
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def capture_surface_sync(
|
|
180
|
+
command: str,
|
|
181
|
+
args: list[str],
|
|
182
|
+
timeout_s: float = DEFAULT_TIMEOUT_S,
|
|
183
|
+
) -> CapturedSurface:
|
|
184
|
+
"""Synchronous wrapper around :func:`capture_surface` for the CLI.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
command: ``argv[0]`` of the server launch.
|
|
188
|
+
args: Remaining argv.
|
|
189
|
+
timeout_s: Wall-clock timeout.
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
The captured surface.
|
|
193
|
+
|
|
194
|
+
Raises:
|
|
195
|
+
CaptureError: On any capture failure (see :func:`capture_surface`).
|
|
196
|
+
"""
|
|
197
|
+
return anyio.run(capture_surface, command, args, timeout_s)
|
mcp_warden/check_core.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Shared check core: the single source of truth for the ``check`` verdict.
|
|
2
|
+
|
|
3
|
+
Both ``cli.py:check`` and the pre-commit wrapper (``precommit.py``) call
|
|
4
|
+
:func:`run_check` so a local hook and CI can never disagree on a drift verdict
|
|
5
|
+
(issue: "a hook that disagrees with CI is worse than no hook").
|
|
6
|
+
|
|
7
|
+
The sequence here mirrors what ``check`` has always done:
|
|
8
|
+
``read_lock`` -> ``capture_surface_sync`` -> ``run_checks`` -> ``build_lock``
|
|
9
|
+
(an in-memory CURRENT lock, never persisted) -> ``compute_drift``.
|
|
10
|
+
|
|
11
|
+
# INTERNAL STABILITY NOTE: the pre-commit wrapper (precommit.py) depends on this
|
|
12
|
+
# function's signature and exception contract (CaptureError for spawn/timeout
|
|
13
|
+
# failures; FileNotFoundError / ValueError for a missing/invalid lock). Do not
|
|
14
|
+
# change either without updating precommit.py.
|
|
15
|
+
#
|
|
16
|
+
# DETERMINISM: this shared verdict path MUST stay free of environment-dependent
|
|
17
|
+
# behavior (cwd-, time-, locale-, or env-var-conditioned branches). The local
|
|
18
|
+
# pre-commit hook and CI both reach the drift verdict through this exact code, so
|
|
19
|
+
# any non-deterministic branch here would let a local hook verdict diverge from
|
|
20
|
+
# CI — the precise failure ("a hook that disagrees with CI") this module exists
|
|
21
|
+
# to prevent.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
from dataclasses import dataclass
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
|
|
29
|
+
from .capture import capture_surface_sync
|
|
30
|
+
from .checks import run_checks
|
|
31
|
+
from .drift import DriftItem, compute_drift
|
|
32
|
+
from .lockfile import build_lock, read_lock
|
|
33
|
+
from .models import Finding
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass(frozen=True)
|
|
37
|
+
class CheckResult:
|
|
38
|
+
"""The full result of a check run, for callers that need more than drift.
|
|
39
|
+
|
|
40
|
+
``findings`` are the static-check findings on the current surface (needed by
|
|
41
|
+
the CLI's SARIF/JSON emitters); ``drift`` is the verdict set.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
findings: list[Finding]
|
|
45
|
+
drift: list[DriftItem]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def run_check_full(
|
|
49
|
+
command: str,
|
|
50
|
+
args: list[str],
|
|
51
|
+
lock_path: Path,
|
|
52
|
+
timeout_s: float,
|
|
53
|
+
) -> CheckResult:
|
|
54
|
+
"""Run the full check verdict path: read lock -> capture -> checks -> drift.
|
|
55
|
+
|
|
56
|
+
This is the single source of truth for the ``check`` verdict. ``cli.py:check``
|
|
57
|
+
calls it (and uses ``findings`` for SARIF/JSON output); the pre-commit wrapper
|
|
58
|
+
calls the thinner :func:`run_check` which discards ``findings``.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
command: The MCP server launch command (argv[0]).
|
|
62
|
+
args: The remaining server launch argv.
|
|
63
|
+
lock_path: Path to the baseline ``warden.lock``.
|
|
64
|
+
timeout_s: Capture timeout in seconds.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
A :class:`CheckResult` (``drift`` empty == clean).
|
|
68
|
+
|
|
69
|
+
Raises:
|
|
70
|
+
FileNotFoundError: The lock file does not exist.
|
|
71
|
+
ValueError: The lock file is invalid JSON or fails schema validation.
|
|
72
|
+
CaptureError: The server could not be spawned or did not respond in time.
|
|
73
|
+
"""
|
|
74
|
+
baseline = read_lock(lock_path)
|
|
75
|
+
surface = capture_surface_sync(command, args, timeout_s=timeout_s)
|
|
76
|
+
findings = run_checks(surface)
|
|
77
|
+
# build_lock constructs an IN-MEMORY current lock for diffing only; it is
|
|
78
|
+
# never written to disk on the check path.
|
|
79
|
+
current = build_lock(surface, findings)
|
|
80
|
+
drift = compute_drift(baseline, current)
|
|
81
|
+
return CheckResult(findings=findings, drift=drift)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def run_check(
|
|
85
|
+
command: str,
|
|
86
|
+
args: list[str],
|
|
87
|
+
lock_path: Path,
|
|
88
|
+
timeout_s: float,
|
|
89
|
+
) -> list[DriftItem]:
|
|
90
|
+
"""Run the check path and return only the drift set (verdict).
|
|
91
|
+
|
|
92
|
+
Convenience wrapper over :func:`run_check_full` for callers (the pre-commit
|
|
93
|
+
hook) that only need the drift verdict and never the static findings.
|
|
94
|
+
|
|
95
|
+
Raises:
|
|
96
|
+
FileNotFoundError, ValueError, CaptureError: see :func:`run_check_full`.
|
|
97
|
+
"""
|
|
98
|
+
return run_check_full(command, args, lock_path, timeout_s).drift
|
mcp_warden/checks.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""Static-check engine orchestrator (CHECKS.md).
|
|
2
|
+
|
|
3
|
+
Runs the full ``WRD-*`` catalog over a captured surface:
|
|
4
|
+
- capability checks ``WRD-CAP-*`` (via the shared tokenizer),
|
|
5
|
+
- secret checks ``WRD-SEC-*`` (checks_secret),
|
|
6
|
+
- supply-chain checks ``WRD-SUP-*`` (checks_supply),
|
|
7
|
+
- robustness ``WRD-SCHEMA-MALFORMED``.
|
|
8
|
+
|
|
9
|
+
Findings are returned sorted by ``(target, rule_id)`` for deterministic output
|
|
10
|
+
(CHECKS.md §5.1). CUT items (fuzzy/NLP, result scanning, etc.) are NOT here.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from .checks_secret import scan_field
|
|
18
|
+
from .checks_supply import check_launch_command
|
|
19
|
+
from .models import CapturedSurface, Finding
|
|
20
|
+
from .tokenizer import capability_evidence, derive_capabilities
|
|
21
|
+
|
|
22
|
+
# Capability flag -> (rule_id, severity) per CHECKS.md §4.1.
|
|
23
|
+
_CAP_RULES: dict[str, tuple[str, str]] = {
|
|
24
|
+
"shell-exec": ("WRD-CAP-SHELL", "critical"),
|
|
25
|
+
"fs-write": ("WRD-CAP-FS-WRITE", "high"),
|
|
26
|
+
"fs-read": ("WRD-CAP-FS-READ", "medium"),
|
|
27
|
+
"http-request": ("WRD-CAP-HTTP", "high"),
|
|
28
|
+
"sql-query": ("WRD-CAP-SQL", "high"),
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _string_values_from_schema(schema: dict[str, Any]) -> list[str]:
|
|
33
|
+
"""Collect string ``default``/``enum``/``examples`` values from a JSON Schema.
|
|
34
|
+
|
|
35
|
+
Recurses nested schemas (``properties``, ``items``, ``$defs``, etc.). Property
|
|
36
|
+
*keys* are intentionally NOT scanned (CHECKS.md §4.2 — a key named ``api_key``
|
|
37
|
+
is not a leak).
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
schema: A JSON Schema fragment.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Flat list of candidate string values to run secret scans over.
|
|
44
|
+
"""
|
|
45
|
+
out: list[str] = []
|
|
46
|
+
|
|
47
|
+
def walk(node: Any) -> None:
|
|
48
|
+
if isinstance(node, dict):
|
|
49
|
+
if isinstance(node.get("default"), str):
|
|
50
|
+
out.append(node["default"])
|
|
51
|
+
enum = node.get("enum")
|
|
52
|
+
if isinstance(enum, list):
|
|
53
|
+
out.extend(v for v in enum if isinstance(v, str))
|
|
54
|
+
examples = node.get("examples")
|
|
55
|
+
if isinstance(examples, list):
|
|
56
|
+
out.extend(v for v in examples if isinstance(v, str))
|
|
57
|
+
for key, val in node.items():
|
|
58
|
+
if key in ("default", "enum", "examples"):
|
|
59
|
+
continue
|
|
60
|
+
walk(val)
|
|
61
|
+
elif isinstance(node, list):
|
|
62
|
+
for item in node:
|
|
63
|
+
walk(item)
|
|
64
|
+
|
|
65
|
+
walk(schema)
|
|
66
|
+
return out
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _schema_is_malformed(schema: Any) -> bool:
|
|
70
|
+
"""Return True if an inputSchema is present but not analyzable (not an object)."""
|
|
71
|
+
return schema is not None and not isinstance(schema, dict)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def run_checks(surface: CapturedSurface) -> list[Finding]:
|
|
75
|
+
"""Run the full static-check catalog over a captured surface.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
surface: The captured declared surface.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Deterministically sorted (by ``target``, then ``rule_id``) list of
|
|
82
|
+
findings. Secret snippets are redacted by the scanners.
|
|
83
|
+
"""
|
|
84
|
+
findings: list[Finding] = []
|
|
85
|
+
|
|
86
|
+
# --- Launch / supply-chain (target = launch/command) ---
|
|
87
|
+
findings.extend(check_launch_command(surface.command, surface.args))
|
|
88
|
+
for arg in (surface.command, *surface.args):
|
|
89
|
+
findings.extend(scan_field(arg, "launch/command"))
|
|
90
|
+
|
|
91
|
+
# --- Tools ---
|
|
92
|
+
for tool in surface.tools:
|
|
93
|
+
target = f"tools/{tool.name}"
|
|
94
|
+
|
|
95
|
+
if _schema_is_malformed(tool.input_schema):
|
|
96
|
+
findings.append(
|
|
97
|
+
Finding(
|
|
98
|
+
rule_id="WRD-SCHEMA-MALFORMED",
|
|
99
|
+
severity="low",
|
|
100
|
+
target=target,
|
|
101
|
+
message="inputSchema is present but not a JSON object; capability analysis skipped",
|
|
102
|
+
snippet=f"inputSchema type={type(tool.input_schema).__name__}",
|
|
103
|
+
)
|
|
104
|
+
)
|
|
105
|
+
schema_obj: dict[str, Any] | None = None
|
|
106
|
+
else:
|
|
107
|
+
schema_obj = tool.input_schema
|
|
108
|
+
|
|
109
|
+
# Capability checks via the shared tokenizer.
|
|
110
|
+
for flag in derive_capabilities(tool.name, schema_obj):
|
|
111
|
+
rule_id, severity = _CAP_RULES[flag]
|
|
112
|
+
evidence = capability_evidence(tool.name, schema_obj, flag)
|
|
113
|
+
findings.append(
|
|
114
|
+
Finding(
|
|
115
|
+
rule_id=rule_id,
|
|
116
|
+
severity=severity,
|
|
117
|
+
target=target,
|
|
118
|
+
message=f"Tool derives capability '{flag}' ({evidence})",
|
|
119
|
+
snippet=evidence,
|
|
120
|
+
)
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Secret checks on name, description, and schema string values.
|
|
124
|
+
findings.extend(scan_field(tool.name, target))
|
|
125
|
+
if tool.description:
|
|
126
|
+
findings.extend(scan_field(tool.description, target))
|
|
127
|
+
if isinstance(schema_obj, dict):
|
|
128
|
+
for sval in _string_values_from_schema(schema_obj):
|
|
129
|
+
findings.extend(scan_field(sval, target))
|
|
130
|
+
|
|
131
|
+
# --- Resources ---
|
|
132
|
+
for res in surface.resources:
|
|
133
|
+
target = f"resources/{res.uri}"
|
|
134
|
+
for field in (res.uri, res.name, res.description):
|
|
135
|
+
if field:
|
|
136
|
+
findings.extend(scan_field(field, target))
|
|
137
|
+
|
|
138
|
+
# --- Prompts ---
|
|
139
|
+
for prompt in surface.prompts:
|
|
140
|
+
target = f"prompts/{prompt.name}"
|
|
141
|
+
findings.extend(scan_field(prompt.name, target))
|
|
142
|
+
if prompt.description:
|
|
143
|
+
findings.extend(scan_field(prompt.description, target))
|
|
144
|
+
|
|
145
|
+
return _dedupe_and_sort(findings)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _dedupe_and_sort(findings: list[Finding]) -> list[Finding]:
|
|
149
|
+
"""Collapse duplicate (rule_id, target, snippet) and sort by (target, rule_id).
|
|
150
|
+
|
|
151
|
+
CHECKS.md §5.1/§5.2: one finding per (rule_id, target, match-location);
|
|
152
|
+
emitted sorted by ``(target, rule_id)``.
|
|
153
|
+
"""
|
|
154
|
+
seen: set[tuple[str, str, str]] = set()
|
|
155
|
+
unique: list[Finding] = []
|
|
156
|
+
for f in findings:
|
|
157
|
+
key = (f.rule_id, f.target, f.snippet)
|
|
158
|
+
if key in seen:
|
|
159
|
+
continue
|
|
160
|
+
seen.add(key)
|
|
161
|
+
unique.append(f)
|
|
162
|
+
unique.sort(key=lambda f: (f.target, f.rule_id, f.snippet))
|
|
163
|
+
return unique
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Secret-leakage checks (MCP-SECRET) — ``WRD-SEC-*`` (CHECKS.md §4.2).
|
|
2
|
+
|
|
3
|
+
Deterministic regex + entropy heuristics over the declared surface's string
|
|
4
|
+
fields. Snippets are ALWAYS redacted (CHECKS.md §8.2).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import math
|
|
10
|
+
import re
|
|
11
|
+
from collections import Counter
|
|
12
|
+
|
|
13
|
+
from .models import Finding
|
|
14
|
+
from .redact import redact_secret
|
|
15
|
+
|
|
16
|
+
# --- Vendor patterns (CHECKS.md §4.2; case-sensitive unless noted) -----------
|
|
17
|
+
|
|
18
|
+
_VENDOR_PATTERNS: list[tuple[str, str, re.Pattern[str]]] = [
|
|
19
|
+
("WRD-SEC-OPENAI", "critical", re.compile(r"\bsk-[A-Za-z0-9]{20,}\b")),
|
|
20
|
+
# GitHub: ghp_ (36) plus gho_/ghu_/ghs_/ghr_ OAuth/app tokens.
|
|
21
|
+
("WRD-SEC-GITHUB", "critical", re.compile(r"\bgh[pousr]_[A-Za-z0-9]{36}\b")),
|
|
22
|
+
("WRD-SEC-AWS-AKID", "critical", re.compile(r"\bAKIA[0-9A-Z]{16}\b")),
|
|
23
|
+
("WRD-SEC-SLACK", "critical", re.compile(r"\bxox[baprs]-[A-Za-z0-9-]{10,}\b")),
|
|
24
|
+
(
|
|
25
|
+
"WRD-SEC-PRIVKEY",
|
|
26
|
+
"critical",
|
|
27
|
+
re.compile(r"-----BEGIN (RSA |EC |OPENSSH |DSA |PGP )?PRIVATE KEY-----"),
|
|
28
|
+
),
|
|
29
|
+
(
|
|
30
|
+
"WRD-SEC-JWT",
|
|
31
|
+
"high",
|
|
32
|
+
re.compile(r"\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b"),
|
|
33
|
+
),
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
#: Entropy candidate token pattern (CHECKS.md §4.2).
|
|
37
|
+
_ENTROPY_TOKEN = re.compile(r"[A-Za-z0-9+/_=-]{20,}")
|
|
38
|
+
|
|
39
|
+
#: Splitter for the entropy pass: whitespace + chars outside the candidate set.
|
|
40
|
+
_ENTROPY_SPLIT = re.compile(r"[^A-Za-z0-9+/_=.-]+")
|
|
41
|
+
|
|
42
|
+
ENTROPY_THRESHOLD = 4.0
|
|
43
|
+
ENTROPY_MIN_LEN = 24
|
|
44
|
+
ALNUM_DOMINANCE = 0.80
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def shannon_entropy(token: str) -> float:
|
|
48
|
+
"""Compute Shannon entropy (bits/char) over a token's character distribution.
|
|
49
|
+
|
|
50
|
+
``H = -Σ p_i log2 p_i``.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
token: The candidate string.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Entropy in bits per character; ``0.0`` for an empty string.
|
|
57
|
+
"""
|
|
58
|
+
if not token:
|
|
59
|
+
return 0.0
|
|
60
|
+
counts = Counter(token)
|
|
61
|
+
n = len(token)
|
|
62
|
+
return -sum((c / n) * math.log2(c / n) for c in counts.values())
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _alnum_ratio(token: str) -> float:
|
|
66
|
+
"""Fraction of characters in ``[A-Za-z0-9]``."""
|
|
67
|
+
if not token:
|
|
68
|
+
return 0.0
|
|
69
|
+
alnum = sum(1 for ch in token if ch.isalnum() and ch.isascii())
|
|
70
|
+
return alnum / len(token)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def scan_field(value: str, target: str) -> list[Finding]:
|
|
74
|
+
"""Scan one string field for secret patterns; return redacted findings.
|
|
75
|
+
|
|
76
|
+
Applies the vendor patterns first, then the entropy heuristic de-duped
|
|
77
|
+
against any token already matched by a vendor rule.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
value: The string field content to scan.
|
|
81
|
+
target: The finding target, e.g. ``"tools/<name>"`` or ``"launch/command"``.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
A list of :class:`Finding` with redacted snippets. May be empty.
|
|
85
|
+
"""
|
|
86
|
+
if not value:
|
|
87
|
+
return []
|
|
88
|
+
|
|
89
|
+
findings: list[Finding] = []
|
|
90
|
+
matched_spans: set[str] = set()
|
|
91
|
+
|
|
92
|
+
# 1) Explicit vendor patterns.
|
|
93
|
+
for rule_id, severity, pattern in _VENDOR_PATTERNS:
|
|
94
|
+
for m in pattern.finditer(value):
|
|
95
|
+
raw = m.group(0)
|
|
96
|
+
matched_spans.add(raw)
|
|
97
|
+
findings.append(
|
|
98
|
+
Finding(
|
|
99
|
+
rule_id=rule_id,
|
|
100
|
+
severity=severity,
|
|
101
|
+
target=target,
|
|
102
|
+
message=f"{rule_id}: possible secret in field",
|
|
103
|
+
snippet=redact_secret(raw),
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# 2) Entropy heuristic, de-duped against vendor matches.
|
|
108
|
+
for token in _ENTROPY_SPLIT.split(value):
|
|
109
|
+
if len(token) < ENTROPY_MIN_LEN:
|
|
110
|
+
continue
|
|
111
|
+
if not _ENTROPY_TOKEN.fullmatch(token):
|
|
112
|
+
continue
|
|
113
|
+
if any(token in span or span in token for span in matched_spans):
|
|
114
|
+
continue # already covered by a vendor rule
|
|
115
|
+
if _alnum_ratio(token) < ALNUM_DOMINANCE:
|
|
116
|
+
continue
|
|
117
|
+
if shannon_entropy(token) >= ENTROPY_THRESHOLD:
|
|
118
|
+
findings.append(
|
|
119
|
+
Finding(
|
|
120
|
+
rule_id="WRD-SEC-ENTROPY",
|
|
121
|
+
severity="high",
|
|
122
|
+
target=target,
|
|
123
|
+
message="WRD-SEC-ENTROPY: high-entropy token (possible secret)",
|
|
124
|
+
snippet=redact_secret(token),
|
|
125
|
+
)
|
|
126
|
+
)
|
|
127
|
+
matched_spans.add(token)
|
|
128
|
+
|
|
129
|
+
return findings
|