sast-mcp-server 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sast_mcp_server-0.6.0/.github/workflows/ci.yml +47 -0
- sast_mcp_server-0.6.0/.gitignore +42 -0
- sast_mcp_server-0.6.0/CHANGELOG.md +126 -0
- sast_mcp_server-0.6.0/CLAUDE.md +171 -0
- sast_mcp_server-0.6.0/Dockerfile +26 -0
- sast_mcp_server-0.6.0/GEMINI.md +100 -0
- sast_mcp_server-0.6.0/LICENSE +21 -0
- sast_mcp_server-0.6.0/PKG-INFO +494 -0
- sast_mcp_server-0.6.0/README.md +449 -0
- sast_mcp_server-0.6.0/docs/claude-desktop.md +82 -0
- sast_mcp_server-0.6.0/docs/cursor.md +67 -0
- sast_mcp_server-0.6.0/docs/openai.md +53 -0
- sast_mcp_server-0.6.0/gemini-extension.json +15 -0
- sast_mcp_server-0.6.0/pyproject.toml +78 -0
- sast_mcp_server-0.6.0/sast_mcp_server/__init__.py +3 -0
- sast_mcp_server-0.6.0/sast_mcp_server/__main__.py +6 -0
- sast_mcp_server-0.6.0/sast_mcp_server/aggregator.py +118 -0
- sast_mcp_server-0.6.0/sast_mcp_server/auth.py +234 -0
- sast_mcp_server-0.6.0/sast_mcp_server/cache.py +377 -0
- sast_mcp_server-0.6.0/sast_mcp_server/enrichment/__init__.py +1 -0
- sast_mcp_server-0.6.0/sast_mcp_server/enrichment/ast_context.py +182 -0
- sast_mcp_server-0.6.0/sast_mcp_server/enrichment/git_diff.py +42 -0
- sast_mcp_server-0.6.0/sast_mcp_server/enrichment/ignore_manager.py +109 -0
- sast_mcp_server-0.6.0/sast_mcp_server/enrichment/patch_apply.py +77 -0
- sast_mcp_server-0.6.0/sast_mcp_server/enrichment/patch_prompt.py +124 -0
- sast_mcp_server-0.6.0/sast_mcp_server/integrations/__init__.py +17 -0
- sast_mcp_server-0.6.0/sast_mcp_server/integrations/defectdojo.py +97 -0
- sast_mcp_server-0.6.0/sast_mcp_server/integrations/github.py +99 -0
- sast_mcp_server-0.6.0/sast_mcp_server/models.py +100 -0
- sast_mcp_server-0.6.0/sast_mcp_server/prompts.py +272 -0
- sast_mcp_server-0.6.0/sast_mcp_server/resources.py +194 -0
- sast_mcp_server-0.6.0/sast_mcp_server/sarif.py +290 -0
- sast_mcp_server-0.6.0/sast_mcp_server/scanners/__init__.py +28 -0
- sast_mcp_server-0.6.0/sast_mcp_server/scanners/bandit.py +125 -0
- sast_mcp_server-0.6.0/sast_mcp_server/scanners/base.py +68 -0
- sast_mcp_server-0.6.0/sast_mcp_server/scanners/bearer.py +168 -0
- sast_mcp_server-0.6.0/sast_mcp_server/scanners/checkov.py +227 -0
- sast_mcp_server-0.6.0/sast_mcp_server/scanners/codeql.py +301 -0
- sast_mcp_server-0.6.0/sast_mcp_server/scanners/factory.py +119 -0
- sast_mcp_server-0.6.0/sast_mcp_server/scanners/gitleaks.py +186 -0
- sast_mcp_server-0.6.0/sast_mcp_server/scanners/njsscan.py +172 -0
- sast_mcp_server-0.6.0/sast_mcp_server/scanners/osv_scanner.py +180 -0
- sast_mcp_server-0.6.0/sast_mcp_server/scanners/semgrep.py +187 -0
- sast_mcp_server-0.6.0/sast_mcp_server/scanners/trivy.py +345 -0
- sast_mcp_server-0.6.0/sast_mcp_server/scanners/zap.py +249 -0
- sast_mcp_server-0.6.0/sast_mcp_server/server.py +1123 -0
- sast_mcp_server-0.6.0/smithery.yaml +41 -0
- sast_mcp_server-0.6.0/tests/__init__.py +1 -0
- sast_mcp_server-0.6.0/tests/fixtures/bandit_output.json +32 -0
- sast_mcp_server-0.6.0/tests/fixtures/bearer_output.json +24 -0
- sast_mcp_server-0.6.0/tests/fixtures/checkov_output.json +57 -0
- sast_mcp_server-0.6.0/tests/fixtures/codeql_output.sarif +96 -0
- sast_mcp_server-0.6.0/tests/fixtures/njsscan_output.json +33 -0
- sast_mcp_server-0.6.0/tests/fixtures/semgrep_output.json +37 -0
- sast_mcp_server-0.6.0/tests/fixtures/test_target.py +10 -0
- sast_mcp_server-0.6.0/tests/fixtures/trivy_output.json +87 -0
- sast_mcp_server-0.6.0/tests/fixtures/vulnerable_app.js +16 -0
- sast_mcp_server-0.6.0/tests/fixtures/zap_output.json +64 -0
- sast_mcp_server-0.6.0/tests/test_aggregator.py +221 -0
- sast_mcp_server-0.6.0/tests/test_auth.py +100 -0
- sast_mcp_server-0.6.0/tests/test_cache.py +165 -0
- sast_mcp_server-0.6.0/tests/test_enrichment.py +195 -0
- sast_mcp_server-0.6.0/tests/test_integrations.py +263 -0
- sast_mcp_server-0.6.0/tests/test_new_scanners.py +444 -0
- sast_mcp_server-0.6.0/tests/test_prompts.py +21 -0
- sast_mcp_server-0.6.0/tests/test_resources.py +48 -0
- sast_mcp_server-0.6.0/tests/test_sarif.py +192 -0
- sast_mcp_server-0.6.0/tests/test_scanners.py +320 -0
- sast_mcp_server-0.6.0/tests/test_server.py +567 -0
- sast_mcp_server-0.6.0/uv.lock +2127 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
tags: ["v*"]
|
|
7
|
+
pull_request:
|
|
8
|
+
branches: [main]
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
lint:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
- uses: actions/setup-python@v5
|
|
16
|
+
with:
|
|
17
|
+
python-version: "3.12"
|
|
18
|
+
- run: pip install ruff
|
|
19
|
+
- run: ruff check sast_mcp_server/
|
|
20
|
+
|
|
21
|
+
test:
|
|
22
|
+
runs-on: ubuntu-latest
|
|
23
|
+
strategy:
|
|
24
|
+
matrix:
|
|
25
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
26
|
+
steps:
|
|
27
|
+
- uses: actions/checkout@v4
|
|
28
|
+
- uses: actions/setup-python@v5
|
|
29
|
+
with:
|
|
30
|
+
python-version: ${{ matrix.python-version }}
|
|
31
|
+
- run: pip install -e ".[dev]"
|
|
32
|
+
- run: pytest tests/ -v --tb=short
|
|
33
|
+
|
|
34
|
+
publish:
|
|
35
|
+
needs: [lint, test]
|
|
36
|
+
runs-on: ubuntu-latest
|
|
37
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
|
38
|
+
permissions:
|
|
39
|
+
id-token: write
|
|
40
|
+
steps:
|
|
41
|
+
- uses: actions/checkout@v4
|
|
42
|
+
- uses: actions/setup-python@v5
|
|
43
|
+
with:
|
|
44
|
+
python-version: "3.12"
|
|
45
|
+
- run: pip install build
|
|
46
|
+
- run: python -m build
|
|
47
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
|
|
7
|
+
# Distribution / packaging
|
|
8
|
+
dist/
|
|
9
|
+
build/
|
|
10
|
+
*.egg-info/
|
|
11
|
+
*.egg
|
|
12
|
+
|
|
13
|
+
# Virtual environments
|
|
14
|
+
.venv/
|
|
15
|
+
venv/
|
|
16
|
+
env/
|
|
17
|
+
|
|
18
|
+
# IDE
|
|
19
|
+
.vscode/
|
|
20
|
+
.idea/
|
|
21
|
+
*.swp
|
|
22
|
+
*.swo
|
|
23
|
+
*~
|
|
24
|
+
|
|
25
|
+
# Testing
|
|
26
|
+
.pytest_cache/
|
|
27
|
+
.coverage
|
|
28
|
+
htmlcov/
|
|
29
|
+
.mypy_cache/
|
|
30
|
+
|
|
31
|
+
# OS
|
|
32
|
+
.DS_Store
|
|
33
|
+
Thumbs.db
|
|
34
|
+
|
|
35
|
+
# Project-specific
|
|
36
|
+
.sast-mcp-ignore.json
|
|
37
|
+
|
|
38
|
+
# Testing
|
|
39
|
+
.pytest_cache/
|
|
40
|
+
.coverage
|
|
41
|
+
htmlcov/
|
|
42
|
+
.mypy_cache/
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
## [0.6.0] - 2026-06-12
|
|
6
|
+
### Added
|
|
7
|
+
- **DefectDojo integration**: New `upload_to_defectdojo` tool imports a SARIF
|
|
8
|
+
export into a DefectDojo engagement (`/api/v2/import-scan/`). Configured via
|
|
9
|
+
`DEFECTDOJO_URL` / `DEFECTDOJO_API_KEY` environment variables.
|
|
10
|
+
- **GitHub Code Scanning integration**: New `upload_to_github` tool pushes a
|
|
11
|
+
gzipped+base64 SARIF report to `/repos/{owner}/{repo}/code-scanning/sarifs`.
|
|
12
|
+
Configured via the `GITHUB_TOKEN` environment variable.
|
|
13
|
+
- **AI patch generation**: New `generate_fix_prompt` tool packages a cached
|
|
14
|
+
finding's vulnerable code and context into an LLM-ready prompt that asks for
|
|
15
|
+
a strict unified diff; the new `apply_patch` tool applies an agent-supplied
|
|
16
|
+
diff to disk via `git apply` (rejecting paths that escape the target).
|
|
17
|
+
- **`integrations/` package** (`defectdojo.py`, `github.py`) and
|
|
18
|
+
**`enrichment/patch_prompt.py`** / **`enrichment/patch_apply.py`** modules.
|
|
19
|
+
- `ScanCache.find_finding_by_hash()` to recover full finding details by hash.
|
|
20
|
+
|
|
21
|
+
### Changed
|
|
22
|
+
- Standardized a `name` class attribute on every scanner via `BaseScanner`,
|
|
23
|
+
replacing the inconsistent `name` property on the newer scanners.
|
|
24
|
+
- Added `httpx` as an explicit runtime dependency.
|
|
25
|
+
- The `fix_vulnerability` MCP prompt now drives the
|
|
26
|
+
`generate_fix_prompt` → `apply_patch` workflow.
|
|
27
|
+
|
|
28
|
+
### Fixed
|
|
29
|
+
- Resolved all `ruff` lint violations across the scanner modules so CI lint
|
|
30
|
+
passes again; cleaned up dead CVSS-parsing code in the OSV scanner and gave
|
|
31
|
+
it an honest `database_specific.severity` mapping.
|
|
32
|
+
- Aligned scanner `scan()` signatures with the base class
|
|
33
|
+
(`modified_files: set[str] | None`), fixing Liskov/type violations.
|
|
34
|
+
- Eliminated `coroutine was never awaited` warnings in the scanner test suite.
|
|
35
|
+
|
|
36
|
+
## [0.5.0] - 2026-06-12
|
|
37
|
+
### Added
|
|
38
|
+
- **Gitleaks Integration**: Deep secret scanning across entire git histories to detect leaked API keys and tokens from past commits.
|
|
39
|
+
- **OSV-Scanner Integration**: Advanced Software Composition Analysis (SCA) mapped to the Google OSV database.
|
|
40
|
+
- **OWASP ZAP Integration (DAST)**: Dynamic Application Security Testing capability via Docker Compose orchestration (`run_active_scan` tool).
|
|
41
|
+
|
|
42
|
+
## [0.4.0] - 2026-06-11
|
|
43
|
+
### Added
|
|
44
|
+
- **Streamable HTTP Transport**: Added `--transport streamable-http` for production remote deployments (MCP 2026 standard).
|
|
45
|
+
- **JWT Authentication**: Replaced static API key with JWT scoped tokens (`SAST_MCP_JWT_SECRET`).
|
|
46
|
+
- **Baseline Caching**: Scans are automatically cached. New tools `save_baseline` and `compare_baseline` to track security trends and regressions over time.
|
|
47
|
+
- **MCP Resources**: Expose read-only security dashboards, cache metadata, configuration, and scanner status via `sast://` resource URIs.
|
|
48
|
+
- **MCP Prompts**: Added pre-built security workflows for agents: `security_review`, `fix_vulnerability`, `pr_security_check`, and `compliance_report`.
|
|
49
|
+
|
|
50
|
+
### Changed
|
|
51
|
+
- SSE transport is now deprecated in favor of Streamable HTTP.
|
|
52
|
+
- Dependency `pyjwt` added for authentication capabilities.
|
|
53
|
+
|
|
54
|
+
## [0.3.0] - 2026-06-09
|
|
55
|
+
|
|
56
|
+
### Added
|
|
57
|
+
- **Trivy scanner** — 5th scanner covering dependency CVEs (SCA), secret detection, and IaC misconfigurations (Terraform, Kubernetes, Docker, CloudFormation)
|
|
58
|
+
- **CodeQL scanner** — 6th scanner providing deep semantic SAST with data-flow and taint tracking (by GitHub); supports database caching for faster re-scans
|
|
59
|
+
- **Checkov scanner** — 7th scanner for Infrastructure-as-Code policy enforcement with 1000+ built-in checks
|
|
60
|
+
- **`scan_all` tool** — run ALL installed scanners in parallel with automatic content-based deduplication; shows which scanners were used vs. skipped
|
|
61
|
+
- **`export_sarif` tool** — export scan results in SARIF 2.1.0 format for CI/CD integration (GitHub Code Scanning, GitLab SAST, Azure DevOps)
|
|
62
|
+
- **SARIF module** (`sarif.py`) — bidirectional SARIF 2.1.0 conversion: Finding→SARIF for export, SARIF→Finding for CodeQL import
|
|
63
|
+
- **Multi-scanner aggregator** (`aggregator.py`) — concurrent scanner execution with `asyncio.gather()`, hash-based deduplication keeping highest severity, severity-sorted output
|
|
64
|
+
- **CodeQL language auto-detection** — counts file extensions in target directory to pick the dominant supported language
|
|
65
|
+
- **CodeQL database caching** — stores database in `.sast-mcp-codeql-db/` to avoid rebuilding on every scan
|
|
66
|
+
- **Trivy multi-type parsing** — unified parser for three different Trivy JSON output structures (Vulnerabilities, Secrets, Misconfigurations)
|
|
67
|
+
- **Checkov multi-block parsing** — handles both single-dict and list-of-dicts output formats from Checkov
|
|
68
|
+
- **IaC language detection** — detects Terraform, Kubernetes YAML, Dockerfile, CloudFormation, Bicep, and Helm files
|
|
69
|
+
- **SARIF data flow support** — serializes `DataFlowStep` traces into SARIF `codeFlows` / `threadFlows`
|
|
70
|
+
- **SARIF fingerprint preservation** — round-trips finding hashes through SARIF `fingerprints.primaryLocationHash`
|
|
71
|
+
- **Comprehensive test suite** — 25+ new tests covering all three new scanners, SARIF export/parse, aggregator deduplication, and new server tools
|
|
72
|
+
|
|
73
|
+
### Changed
|
|
74
|
+
- Scanner factory now registers 7 scanners (was 4)
|
|
75
|
+
- MCP server instructions updated to describe all 7 scanners and new tools
|
|
76
|
+
- Updated `pyproject.toml` keywords to include trivy, codeql, checkov, sarif
|
|
77
|
+
- Version bumped to 0.3.0
|
|
78
|
+
|
|
79
|
+
## [0.2.0] - 2026-06-07
|
|
80
|
+
|
|
81
|
+
### Added
|
|
82
|
+
- **Semgrep scanner** — 4th scanner supporting 30+ languages with community rule registry
|
|
83
|
+
- **SSE/HTTP transport** — remote deployments via `--transport sse --port 8080`
|
|
84
|
+
- **API key authentication** — `SAST_MCP_API_KEY` env var for secure remote access
|
|
85
|
+
- **`list_scanners` tool** — check available scanners and their installation status
|
|
86
|
+
- **`unignore_vulnerability` tool** — reverse a previous ignore action
|
|
87
|
+
- **`list_ignored_vulnerabilities` tool** — view all suppressed findings
|
|
88
|
+
- **Typed data models** — `Finding`, `SeverityLevel`, `ConfidenceLevel` enums
|
|
89
|
+
- **Tree-sitter JavaScript/TypeScript support** — AST-aware context for JS/TS files
|
|
90
|
+
- **Configurable scan timeout** — `SAST_MCP_TIMEOUT` env var (default 300s)
|
|
91
|
+
- **Structured logging** — logs to stderr only (prevents stdio transport corruption)
|
|
92
|
+
- **Gemini CLI extension** — `gemini-extension.json` + `GEMINI.md` context file
|
|
93
|
+
- **Cross-platform docs** — setup guides for Claude Desktop, OpenAI, Cursor
|
|
94
|
+
- **Smithery.ai manifest** — marketplace listing for discovery
|
|
95
|
+
- **Docker support** — Dockerfile with all scanners pre-installed
|
|
96
|
+
- **CI/CD pipeline** — GitHub Actions for lint, test, and PyPI publish
|
|
97
|
+
- **Comprehensive test suite** — 30+ tests covering all scanners, enrichment, and server logic
|
|
98
|
+
- **Atomic ignore file writes** — prevents corruption on crash
|
|
99
|
+
- **Timestamps in ignore entries** — audit trail for suppressed findings
|
|
100
|
+
- **Backward-compatible ignore format** — migrates old `hash → reason` format
|
|
101
|
+
|
|
102
|
+
### Fixed
|
|
103
|
+
- **Bearer confidence hardcoded to HIGH** — now parsed from actual output
|
|
104
|
+
- **njsscan severity hardcoded to HIGH** — now parsed from metadata
|
|
105
|
+
- **njsscan only processed first file** — now handles all files in multi-file findings
|
|
106
|
+
- **Scanner errors silently swallowed** — stderr now propagated in error messages
|
|
107
|
+
- **Hash instability** — finding hashes no longer depend on line numbers
|
|
108
|
+
- **tree-sitter deps declared but unused** — now actually used for JS/TS context
|
|
109
|
+
- **Invalid severity/confidence accepted** — now validated against enum values
|
|
110
|
+
- **Logging to stdout corrupted stdio transport** — fixed to log to stderr only
|
|
111
|
+
|
|
112
|
+
### Changed
|
|
113
|
+
- Restructured to proper Python package layout (`sast_mcp_server/`)
|
|
114
|
+
- Updated `pyproject.toml` with full PyPI metadata, entry points, and tool configs
|
|
115
|
+
- Scanner factory now supports dynamic registration and metadata queries
|
|
116
|
+
- Replaced raw `Dict` returns with typed `Finding` dataclass
|
|
117
|
+
|
|
118
|
+
## [0.1.0] - Initial Release
|
|
119
|
+
|
|
120
|
+
### Added
|
|
121
|
+
- Basic MCP server with Bandit, njsscan, and Bearer scanners
|
|
122
|
+
- AST context enrichment for Python files
|
|
123
|
+
- Git diff filtering
|
|
124
|
+
- Finding ignore list
|
|
125
|
+
- Severity and confidence filtering
|
|
126
|
+
- Pagination support
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
SAST MCP Server — a [FastMCP](https://github.com/jlowin/fastmcp)-based MCP server that exposes
|
|
8
|
+
10 static/dynamic security scanners (Bandit, njsscan, Bearer, Semgrep, Trivy, CodeQL, Checkov,
|
|
9
|
+
Gitleaks, OSV-Scanner, OWASP ZAP) as tools for AI agents (Gemini CLI, Claude Desktop, Cursor,
|
|
10
|
+
OpenAI Agents, etc.). Findings are normalized into a typed `Finding` model, enriched with
|
|
11
|
+
AST-aware code context, deduplicated across scanners, and can be exported as SARIF 2.1.0 for
|
|
12
|
+
CI/CD.
|
|
13
|
+
|
|
14
|
+
## Development Commands
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
# Install with dev dependencies (editable)
|
|
18
|
+
pip install -e ".[dev]"
|
|
19
|
+
|
|
20
|
+
# Run the full test suite
|
|
21
|
+
pytest tests/ -v
|
|
22
|
+
|
|
23
|
+
# Run a single test file
|
|
24
|
+
pytest tests/test_scanners.py -v
|
|
25
|
+
|
|
26
|
+
# Run a single test
|
|
27
|
+
pytest tests/test_scanners.py::TestBanditScanner::test_parse_findings -v
|
|
28
|
+
|
|
29
|
+
# Lint (this is what CI runs)
|
|
30
|
+
ruff check sast_mcp_server/
|
|
31
|
+
|
|
32
|
+
# Type check (configured in pyproject.toml, not enforced in CI)
|
|
33
|
+
mypy sast_mcp_server/
|
|
34
|
+
|
|
35
|
+
# Run the server locally (stdio transport, for MCP client integration)
|
|
36
|
+
python -m sast_mcp_server
|
|
37
|
+
# or, once installed:
|
|
38
|
+
sast-mcp-server
|
|
39
|
+
|
|
40
|
+
# Run as a remote HTTP server (Streamable HTTP is the current standard; SSE is deprecated)
|
|
41
|
+
sast-mcp-server --transport streamable-http --host 0.0.0.0 --port 8080
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
CI (`.github/workflows/ci.yml`) runs `ruff check sast_mcp_server/` and
|
|
45
|
+
`pytest tests/ -v --tb=short` across Python 3.10–3.13, then publishes to PyPI on `v*` tags.
|
|
46
|
+
|
|
47
|
+
## Architecture
|
|
48
|
+
|
|
49
|
+
### Entry point and server registration
|
|
50
|
+
|
|
51
|
+
`sast_mcp_server/server.py` builds the `FastMCP` instance and registers all `@mcp.tool()`
|
|
52
|
+
functions (`scan_vulnerabilities`, `scan_all`, `scan_git_history`, `run_active_scan`,
|
|
53
|
+
`export_sarif`, `save_baseline`, `compare_baseline`, `list_scanners`, `ignore_vulnerability`,
|
|
54
|
+
`unignore_vulnerability`, `list_ignored_vulnerabilities`, plus the v0.6.0 tools
|
|
55
|
+
`upload_to_defectdojo`, `upload_to_github`, `generate_fix_prompt`, `apply_patch`).
|
|
56
|
+
`register_resources(mcp)` and `register_prompts(mcp)` wire up MCP Resources and Prompts from
|
|
57
|
+
`resources.py` / `prompts.py`. The decorated tool functions stay directly callable, so tests
|
|
58
|
+
import and `await` them as plain functions.
|
|
59
|
+
|
|
60
|
+
**Critical constraint:** all logging is configured to go to `stderr`
|
|
61
|
+
(`logging.basicConfig(..., stream=sys.stderr)`). Never write to stdout — it corrupts the stdio
|
|
62
|
+
MCP transport.
|
|
63
|
+
|
|
64
|
+
### Scanner plugin architecture
|
|
65
|
+
|
|
66
|
+
Each scanner lives in `sast_mcp_server/scanners/<name>.py` and subclasses `BaseScanner`
|
|
67
|
+
(`scanners/base.py`), implementing:
|
|
68
|
+
- `check_dependency()` — raises `RuntimeError` if the underlying CLI/binary isn't available.
|
|
69
|
+
- `scan(target_path, min_severity, min_confidence, modified_files, timeout)` — runs the tool
|
|
70
|
+
(usually via `asyncio.create_subprocess_exec`, parsing JSON/SARIF stdout or a report file) and
|
|
71
|
+
returns `list[Finding]`.
|
|
72
|
+
|
|
73
|
+
`scanners/factory.py` is the single source of truth for scanner registration:
|
|
74
|
+
- `_SCANNER_REGISTRY` maps scanner name → class (used by `ScannerFactory.get_scanner()` /
|
|
75
|
+
`list_scanners()`).
|
|
76
|
+
- `SCANNER_INFO` provides display metadata (description, languages, install command) for the
|
|
77
|
+
`list_scanners` tool and `sast://scanners` resource.
|
|
78
|
+
|
|
79
|
+
Every scanner sets a `name` class attribute (declared as `ClassVar[str]` on `BaseScanner`) equal
|
|
80
|
+
to its registry key; it is used both as `Finding.scanner` and by callers that persist results
|
|
81
|
+
(e.g. `cache.save_scan(scanner.name, ...)`). To add a new scanner: implement the `BaseScanner`
|
|
82
|
+
subclass (including `name`), register it in both `_SCANNER_REGISTRY` and `SCANNER_INFO` in
|
|
83
|
+
`factory.py`, and export it from `scanners/__init__.py`.
|
|
84
|
+
|
|
85
|
+
ZAP is the only DAST scanner — its `scan()` raises `NotImplementedError` and instead exposes
|
|
86
|
+
`run_dynamic_scan()`, invoked via the dedicated `run_active_scan` tool, which orchestrates
|
|
87
|
+
`docker compose up` → wait for the URL → run the `zap-baseline.py` container → `docker compose down`.
|
|
88
|
+
|
|
89
|
+
### Finding pipeline
|
|
90
|
+
|
|
91
|
+
1. Scanner produces raw `Finding` objects (`models.py`): `scanner`, `title`, `message`, `file`,
|
|
92
|
+
`line_start/end`, `severity` (`SeverityLevel`), `confidence` (`ConfidenceLevel`), `context`,
|
|
93
|
+
`language`, `finding_hash`, optional `data_flow: list[DataFlowStep]`.
|
|
94
|
+
2. `finding_hash` is computed by `BaseScanner.generate_hash(file_path, title, code_snippet)` —
|
|
95
|
+
a SHA-256 of `file:title:normalized_snippet`, designed to stay stable across line-number
|
|
96
|
+
shifts but change if the vulnerable code moves elsewhere. This hash is the key used for
|
|
97
|
+
ignore-list entries, dedup, and baseline diffing.
|
|
98
|
+
3. `enrichment/ast_context.get_function_context()` expands a bare line number into the full
|
|
99
|
+
enclosing function/class: native `ast` for Python, tree-sitter for JS/TS (if installed),
|
|
100
|
+
falling back to ±10 lines for everything else.
|
|
101
|
+
4. `enrichment/ignore_manager.IgnoreManager` filters out findings whose hash is in
|
|
102
|
+
`<target>/.sast-mcp-ignore.json` (atomic write via temp-file + `os.replace`).
|
|
103
|
+
5. `enrichment/git_diff.get_modified_files()` supports `git_diff_only=True` by intersecting
|
|
104
|
+
findings with `git diff --name-only HEAD`.
|
|
105
|
+
|
|
106
|
+
### Multi-scanner aggregation (`aggregator.py`)
|
|
107
|
+
|
|
108
|
+
`run_all_scanners()` checks `check_dependency()` for every registered scanner, runs the
|
|
109
|
+
available ones concurrently with `asyncio.gather`, then `_deduplicate_findings()` collapses
|
|
110
|
+
findings sharing a `finding_hash` (keeping the higher-severity / data-flow-bearing copy), and
|
|
111
|
+
sorts the result by `SEVERITY_ORDER` (CRITICAL first). Returns
|
|
112
|
+
`(findings, used_scanners, skipped_scanners)`.
|
|
113
|
+
|
|
114
|
+
### Caching & baselines (`cache.py`)
|
|
115
|
+
|
|
116
|
+
`ScanCache` persists scan results as JSON under `<target_path>/.sast-mcp-cache/`:
|
|
117
|
+
- `save_scan(..., tag=...)` writes a timestamped file and, if `tag` is set, also overwrites
|
|
118
|
+
`<scanner>_tag_<tag>.json` (used as a named baseline).
|
|
119
|
+
- `compare(baseline, current_findings)` returns a `ScanDiff` (new/fixed/unchanged findings +
|
|
120
|
+
per-severity trend), driving the `compare_baseline` tool and `pr_security_check` prompt.
|
|
121
|
+
- `cleanup_expired()` removes non-tagged scans older than `SAST_MCP_CACHE_TTL` (default 24h).
|
|
122
|
+
|
|
123
|
+
### SARIF (`sarif.py`)
|
|
124
|
+
|
|
125
|
+
Bidirectional SARIF 2.1.0 conversion: `findings_to_sarif()` powers `export_sarif`;
|
|
126
|
+
`parse_sarif()` is used by the CodeQL scanner to ingest CodeQL's native SARIF output and
|
|
127
|
+
re-enrich it with AST context and stable hashes.
|
|
128
|
+
|
|
129
|
+
### Dashboard integrations (`integrations/`)
|
|
130
|
+
|
|
131
|
+
`integrations/defectdojo.py` and `integrations/github.py` push a SARIF export to external
|
|
132
|
+
platforms via `httpx.AsyncClient`. Credentials come **only** from environment variables
|
|
133
|
+
(`DEFECTDOJO_URL`/`DEFECTDOJO_API_KEY`, `GITHUB_TOKEN`) so secrets never land in tool arguments
|
|
134
|
+
or transcripts. Both raise the shared `IntegrationError` (from `integrations/__init__.py`) on
|
|
135
|
+
missing config, unreadable files, or non-2xx responses; the server tools catch it and return a
|
|
136
|
+
friendly message. GitHub's endpoint requires the SARIF be gzip-compressed then base64-encoded.
|
|
137
|
+
|
|
138
|
+
### AI patch generation (`enrichment/patch_prompt.py`, `enrichment/patch_apply.py`)
|
|
139
|
+
|
|
140
|
+
Division of responsibility: the **server packages context**, the **agent generates the diff**,
|
|
141
|
+
and `apply_patch` writes it. `build_patch_prompt()` recovers a finding by hash via
|
|
142
|
+
`ScanCache.find_finding_by_hash()`, reads a line-numbered window of the source, and renders a
|
|
143
|
+
prompt instructing an LLM to emit a strict unified diff. `apply_unified_diff()` shells out to
|
|
144
|
+
`git apply` (works without a repo; rejects paths escaping the target unless `--unsafe-paths`,
|
|
145
|
+
which we never pass) and supports `check_only` for a dry run.
|
|
146
|
+
|
|
147
|
+
### Auth (`auth.py`)
|
|
148
|
+
|
|
149
|
+
`AuthProvider` supports three modes selected by env vars: no auth (stdio/local — all scopes
|
|
150
|
+
granted), static `SAST_MCP_API_KEY` (legacy), or `SAST_MCP_JWT_SECRET` (HMAC-SHA256 JWT with
|
|
151
|
+
`scopes`/`exp` claims, scopes drawn from `SCOPE_SCAN_READ`/`SCOPE_SCAN_WRITE`/`SCOPE_CONFIG_WRITE`/
|
|
152
|
+
`SCOPE_ADMIN`). `create_jwt()` is a helper for minting test tokens.
|
|
153
|
+
|
|
154
|
+
### MCP Resources & Prompts
|
|
155
|
+
|
|
156
|
+
- `resources.py` exposes read-only `sast://` URIs (`sast://scanners`, `sast://config`,
|
|
157
|
+
`sast://cache/{target_path}/latest`, `sast://cache/{target_path}/baseline/{tag}`,
|
|
158
|
+
`sast://ignore/{target_path}`, `sast://dashboard/{target_path}`) backed by `ScanCache` /
|
|
159
|
+
`IgnoreManager` / `ScannerFactory` — no scans are run.
|
|
160
|
+
- `prompts.py` defines reusable workflow templates (`security_review`, `fix_vulnerability`,
|
|
161
|
+
`pr_security_check`, `compliance_report`) that return instruction strings guiding an agent
|
|
162
|
+
through a sequence of tool calls.
|
|
163
|
+
|
|
164
|
+
## Testing conventions
|
|
165
|
+
|
|
166
|
+
- Tests live in `tests/`, fixtures (recorded scanner JSON/SARIF output) in `tests/fixtures/`.
|
|
167
|
+
- Subprocess-based scanners are tested by patching `asyncio.create_subprocess_exec` and
|
|
168
|
+
`asyncio.wait_for` to return a mocked process whose `communicate()` resolves to fixture bytes
|
|
169
|
+
(see the `_mock_subprocess` helper duplicated in `test_scanners.py` / `test_new_scanners.py`).
|
|
170
|
+
- `pytest-asyncio` is in `auto` mode (`pyproject.toml`), so `async def test_*` functions need no
|
|
171
|
+
extra decorator beyond `@pytest.mark.asyncio` (already used throughout for consistency).
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
FROM python:3.12-slim AS base
|
|
2
|
+
|
|
3
|
+
# Install system dependencies for Bearer
|
|
4
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
5
|
+
git \
|
|
6
|
+
curl \
|
|
7
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
8
|
+
|
|
9
|
+
# Install Bearer CLI
|
|
10
|
+
RUN curl -sfL https://raw.githubusercontent.com/Bearer/bearer/main/contrib/install.sh | sh
|
|
11
|
+
|
|
12
|
+
# Set up Python environment
|
|
13
|
+
WORKDIR /app
|
|
14
|
+
|
|
15
|
+
# Install Python package and scanner dependencies
|
|
16
|
+
COPY pyproject.toml README.md LICENSE ./
|
|
17
|
+
COPY sast_mcp_server/ ./sast_mcp_server/
|
|
18
|
+
|
|
19
|
+
RUN pip install --no-cache-dir . bandit njsscan
|
|
20
|
+
|
|
21
|
+
# Create non-root user for security
|
|
22
|
+
RUN useradd --create-home --shell /bin/bash scanner
|
|
23
|
+
USER scanner
|
|
24
|
+
|
|
25
|
+
# Default: run the MCP server in stdio mode
|
|
26
|
+
ENTRYPOINT ["sast-mcp-server"]
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
## Tool Purpose
|
|
2
|
+
|
|
3
|
+
This extension provides **SAST (Static Application Security Testing)** scanning capabilities.
|
|
4
|
+
It can scan codebases for security vulnerabilities using industry-standard tools:
|
|
5
|
+
Bandit (Python), njsscan (JavaScript/Node.js), Bearer (multi-language with data-flow analysis),
|
|
6
|
+
Semgrep (30+ languages with community rule registry), Trivy (vulnerabilities, secrets, IaC),
|
|
7
|
+
CodeQL (deep semantic analysis with taint tracking), Checkov (Infrastructure-as-Code policies),
|
|
8
|
+
Gitleaks (deep secret scanning in git history), OSV-Scanner (Software Composition Analysis),
|
|
9
|
+
and OWASP ZAP (Dynamic Application Security Testing via Docker orchestration).
|
|
10
|
+
|
|
11
|
+
## Available Tools
|
|
12
|
+
|
|
13
|
+
1. **`scan_vulnerabilities`** — Scan a directory for security vulnerabilities (SAST)
|
|
14
|
+
2. **`scan_all`** — Run ALL installed scanners in parallel with deduplication
|
|
15
|
+
3. **`scan_git_history`** — Scan the entire `.git` history for leaked secrets (Gitleaks)
|
|
16
|
+
4. **`run_active_scan`** — Run a dynamic (DAST) baseline scan by orchestrating Docker Compose (ZAP)
|
|
17
|
+
5. **`export_sarif`** — Export scan results in SARIF 2.1.0 format for CI/CD
|
|
18
|
+
6. **`list_scanners`** — Check which scanners are installed and available
|
|
19
|
+
7. **`ignore_vulnerability`** — Suppress a known false positive
|
|
20
|
+
8. **`unignore_vulnerability`** — Re-enable a previously ignored finding
|
|
21
|
+
9. **`list_ignored_vulnerabilities`** — Show all currently suppressed findings
|
|
22
|
+
10. **`save_baseline`** — Save a scan result as a baseline for future comparison
|
|
23
|
+
11. **`compare_baseline`** — Compare current codebase against a saved baseline
|
|
24
|
+
12. **`upload_to_defectdojo`** — Import a SARIF export into a DefectDojo engagement
|
|
25
|
+
13. **`upload_to_github`** — Upload a SARIF report to GitHub Code Scanning
|
|
26
|
+
14. **`generate_fix_prompt`** — Build an LLM prompt to fix a finding as a unified diff
|
|
27
|
+
15. **`apply_patch`** — Apply an agent-generated unified diff to disk (via `git apply`)
|
|
28
|
+
|
|
29
|
+
## Usage Patterns
|
|
30
|
+
|
|
31
|
+
### Initial Setup
|
|
32
|
+
Always start by calling `list_scanners` to verify which tools are available on the user's system.
|
|
33
|
+
|
|
34
|
+
### Choosing a Scanner
|
|
35
|
+
- **Python projects** → use `scanner_name="bandit"` or `scanner_name="semgrep"`
|
|
36
|
+
- **JavaScript/Node.js projects** → use `scanner_name="njsscan"` or `scanner_name="semgrep"`
|
|
37
|
+
- **Multi-language or uncertain** → use `scanner_name="semgrep"` (broadest coverage)
|
|
38
|
+
- **Data-flow analysis needed** → use `scanner_name="bearer"`
|
|
39
|
+
- **Dependency CVEs / supply chain** → use `scanner_name="osv-scanner"` or `trivy`
|
|
40
|
+
- **Secrets in git history** → use `scan_git_history`
|
|
41
|
+
- **Deep semantic analysis** → use `scanner_name="codeql"` (slower but finds complex flaws)
|
|
42
|
+
- **Infrastructure-as-Code policies** → use `scanner_name="checkov"`
|
|
43
|
+
- **Dynamic Application Testing (DAST)** → use `run_active_scan` (orchestrates ZAP via Docker)
|
|
44
|
+
- **Comprehensive scan** → use `scan_all` to run everything installed at once
|
|
45
|
+
|
|
46
|
+
### Scanning Workflow
|
|
47
|
+
1. Call `scan_vulnerabilities` (or `scan_all` for comprehensive coverage) with the project root path
|
|
48
|
+
2. Review findings — focus on HIGH and CRITICAL severity first
|
|
49
|
+
3. For each finding, suggest a fix based on the provided code context
|
|
50
|
+
4. If the user confirms a false positive, use `ignore_vulnerability` with a clear reason
|
|
51
|
+
|
|
52
|
+
### Regression & Trend Tracking (Baselines)
|
|
53
|
+
When establishing a security posture or before a major refactor:
|
|
54
|
+
1. Call `save_baseline(tag="main")` to snapshot the current state
|
|
55
|
+
2. After changes, call `compare_baseline(tag="main")` to see newly introduced vs. fixed vulnerabilities
|
|
56
|
+
|
|
57
|
+
### Using MCP Prompts (Workflows)
|
|
58
|
+
You have access to pre-built security workflows via MCP prompts. You should invoke these when asked to perform a broad security task:
|
|
59
|
+
- `security_review`: For a comprehensive codebase assessment
|
|
60
|
+
- `fix_vulnerability`: For deep-dive remediation of a specific finding hash
|
|
61
|
+
- `pr_security_check`: When reviewing a Pull Request or git diff
|
|
62
|
+
- `compliance_report`: When asked about OWASP Top 10 or PCI-DSS compliance
|
|
63
|
+
|
|
64
|
+
### Using MCP Resources (Context)
|
|
65
|
+
You can access read-only contextual data without running a scan by requesting these resources:
|
|
66
|
+
- `sast://dashboard/{target_path}`: View a high-level severity distribution and overall security posture
|
|
67
|
+
- `sast://cache/{target_path}/latest`: View metadata of the most recent scans without waiting for execution
|
|
68
|
+
|
|
69
|
+
### CI/CD Integration
|
|
70
|
+
Use `export_sarif` to generate SARIF 2.1.0 output that can be uploaded to:
|
|
71
|
+
- GitHub Code Scanning
|
|
72
|
+
- GitLab SAST
|
|
73
|
+
- Azure DevOps
|
|
74
|
+
- Any SARIF-compatible security dashboard
|
|
75
|
+
|
|
76
|
+
### Dashboard Upload & AI Remediation (v0.6.0)
|
|
77
|
+
- After `export_sarif(output_path=...)`, push the report to a dashboard with
|
|
78
|
+
`upload_to_defectdojo` or `upload_to_github`. Credentials come from
|
|
79
|
+
environment variables only (`DEFECTDOJO_URL`/`DEFECTDOJO_API_KEY`,
|
|
80
|
+
`GITHUB_TOKEN`) — never ask the user to paste secrets as arguments.
|
|
81
|
+
- To remediate a finding: call `generate_fix_prompt(target_path, finding_hash)`
|
|
82
|
+
to get an LLM-ready prompt, produce a unified diff, then `apply_patch` with
|
|
83
|
+
`check_only=True` before applying for real. The finding must come from a
|
|
84
|
+
cached scan (`scan_all`/`save_baseline`).
|
|
85
|
+
|
|
86
|
+
### Incremental Scanning
|
|
87
|
+
When reviewing a pull request or recent changes, use `git_diff_only=True` to only scan modified files.
|
|
88
|
+
|
|
89
|
+
### Pagination
|
|
90
|
+
For large codebases, use `min_severity="HIGH"` to focus on critical issues first,
|
|
91
|
+
or use `limit` and `offset` for pagination.
|
|
92
|
+
|
|
93
|
+
## Best Practices
|
|
94
|
+
- Always explain findings in plain language, not just technical jargon
|
|
95
|
+
- Provide concrete code fix suggestions alongside each finding
|
|
96
|
+
- Group related findings together when presenting results
|
|
97
|
+
- Use `min_severity="MEDIUM"` as a sensible default for most scans
|
|
98
|
+
- When a scan returns many results, summarize the top issues before showing details
|
|
99
|
+
- Use `scan_all` for the most comprehensive security assessment
|
|
100
|
+
- **New for v0.4.0**: Use `compare_baseline` to highlight newly introduced vulnerabilities instead of overwhelming the user with legacy issues.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Abdellah
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|