sast-mcp-server 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. sast_mcp_server-0.6.0/.github/workflows/ci.yml +47 -0
  2. sast_mcp_server-0.6.0/.gitignore +42 -0
  3. sast_mcp_server-0.6.0/CHANGELOG.md +126 -0
  4. sast_mcp_server-0.6.0/CLAUDE.md +171 -0
  5. sast_mcp_server-0.6.0/Dockerfile +26 -0
  6. sast_mcp_server-0.6.0/GEMINI.md +100 -0
  7. sast_mcp_server-0.6.0/LICENSE +21 -0
  8. sast_mcp_server-0.6.0/PKG-INFO +494 -0
  9. sast_mcp_server-0.6.0/README.md +449 -0
  10. sast_mcp_server-0.6.0/docs/claude-desktop.md +82 -0
  11. sast_mcp_server-0.6.0/docs/cursor.md +67 -0
  12. sast_mcp_server-0.6.0/docs/openai.md +53 -0
  13. sast_mcp_server-0.6.0/gemini-extension.json +15 -0
  14. sast_mcp_server-0.6.0/pyproject.toml +78 -0
  15. sast_mcp_server-0.6.0/sast_mcp_server/__init__.py +3 -0
  16. sast_mcp_server-0.6.0/sast_mcp_server/__main__.py +6 -0
  17. sast_mcp_server-0.6.0/sast_mcp_server/aggregator.py +118 -0
  18. sast_mcp_server-0.6.0/sast_mcp_server/auth.py +234 -0
  19. sast_mcp_server-0.6.0/sast_mcp_server/cache.py +377 -0
  20. sast_mcp_server-0.6.0/sast_mcp_server/enrichment/__init__.py +1 -0
  21. sast_mcp_server-0.6.0/sast_mcp_server/enrichment/ast_context.py +182 -0
  22. sast_mcp_server-0.6.0/sast_mcp_server/enrichment/git_diff.py +42 -0
  23. sast_mcp_server-0.6.0/sast_mcp_server/enrichment/ignore_manager.py +109 -0
  24. sast_mcp_server-0.6.0/sast_mcp_server/enrichment/patch_apply.py +77 -0
  25. sast_mcp_server-0.6.0/sast_mcp_server/enrichment/patch_prompt.py +124 -0
  26. sast_mcp_server-0.6.0/sast_mcp_server/integrations/__init__.py +17 -0
  27. sast_mcp_server-0.6.0/sast_mcp_server/integrations/defectdojo.py +97 -0
  28. sast_mcp_server-0.6.0/sast_mcp_server/integrations/github.py +99 -0
  29. sast_mcp_server-0.6.0/sast_mcp_server/models.py +100 -0
  30. sast_mcp_server-0.6.0/sast_mcp_server/prompts.py +272 -0
  31. sast_mcp_server-0.6.0/sast_mcp_server/resources.py +194 -0
  32. sast_mcp_server-0.6.0/sast_mcp_server/sarif.py +290 -0
  33. sast_mcp_server-0.6.0/sast_mcp_server/scanners/__init__.py +28 -0
  34. sast_mcp_server-0.6.0/sast_mcp_server/scanners/bandit.py +125 -0
  35. sast_mcp_server-0.6.0/sast_mcp_server/scanners/base.py +68 -0
  36. sast_mcp_server-0.6.0/sast_mcp_server/scanners/bearer.py +168 -0
  37. sast_mcp_server-0.6.0/sast_mcp_server/scanners/checkov.py +227 -0
  38. sast_mcp_server-0.6.0/sast_mcp_server/scanners/codeql.py +301 -0
  39. sast_mcp_server-0.6.0/sast_mcp_server/scanners/factory.py +119 -0
  40. sast_mcp_server-0.6.0/sast_mcp_server/scanners/gitleaks.py +186 -0
  41. sast_mcp_server-0.6.0/sast_mcp_server/scanners/njsscan.py +172 -0
  42. sast_mcp_server-0.6.0/sast_mcp_server/scanners/osv_scanner.py +180 -0
  43. sast_mcp_server-0.6.0/sast_mcp_server/scanners/semgrep.py +187 -0
  44. sast_mcp_server-0.6.0/sast_mcp_server/scanners/trivy.py +345 -0
  45. sast_mcp_server-0.6.0/sast_mcp_server/scanners/zap.py +249 -0
  46. sast_mcp_server-0.6.0/sast_mcp_server/server.py +1123 -0
  47. sast_mcp_server-0.6.0/smithery.yaml +41 -0
  48. sast_mcp_server-0.6.0/tests/__init__.py +1 -0
  49. sast_mcp_server-0.6.0/tests/fixtures/bandit_output.json +32 -0
  50. sast_mcp_server-0.6.0/tests/fixtures/bearer_output.json +24 -0
  51. sast_mcp_server-0.6.0/tests/fixtures/checkov_output.json +57 -0
  52. sast_mcp_server-0.6.0/tests/fixtures/codeql_output.sarif +96 -0
  53. sast_mcp_server-0.6.0/tests/fixtures/njsscan_output.json +33 -0
  54. sast_mcp_server-0.6.0/tests/fixtures/semgrep_output.json +37 -0
  55. sast_mcp_server-0.6.0/tests/fixtures/test_target.py +10 -0
  56. sast_mcp_server-0.6.0/tests/fixtures/trivy_output.json +87 -0
  57. sast_mcp_server-0.6.0/tests/fixtures/vulnerable_app.js +16 -0
  58. sast_mcp_server-0.6.0/tests/fixtures/zap_output.json +64 -0
  59. sast_mcp_server-0.6.0/tests/test_aggregator.py +221 -0
  60. sast_mcp_server-0.6.0/tests/test_auth.py +100 -0
  61. sast_mcp_server-0.6.0/tests/test_cache.py +165 -0
  62. sast_mcp_server-0.6.0/tests/test_enrichment.py +195 -0
  63. sast_mcp_server-0.6.0/tests/test_integrations.py +263 -0
  64. sast_mcp_server-0.6.0/tests/test_new_scanners.py +444 -0
  65. sast_mcp_server-0.6.0/tests/test_prompts.py +21 -0
  66. sast_mcp_server-0.6.0/tests/test_resources.py +48 -0
  67. sast_mcp_server-0.6.0/tests/test_sarif.py +192 -0
  68. sast_mcp_server-0.6.0/tests/test_scanners.py +320 -0
  69. sast_mcp_server-0.6.0/tests/test_server.py +567 -0
  70. sast_mcp_server-0.6.0/uv.lock +2127 -0
@@ -0,0 +1,47 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ tags: ["v*"]
7
+ pull_request:
8
+ branches: [main]
9
+
10
+ jobs:
11
+ lint:
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ - uses: actions/setup-python@v5
16
+ with:
17
+ python-version: "3.12"
18
+ - run: pip install ruff
19
+ - run: ruff check sast_mcp_server/
20
+
21
+ test:
22
+ runs-on: ubuntu-latest
23
+ strategy:
24
+ matrix:
25
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+ - uses: actions/setup-python@v5
29
+ with:
30
+ python-version: ${{ matrix.python-version }}
31
+ - run: pip install -e ".[dev]"
32
+ - run: pytest tests/ -v --tb=short
33
+
34
+ publish:
35
+ needs: [lint, test]
36
+ runs-on: ubuntu-latest
37
+ if: startsWith(github.ref, 'refs/tags/v')
38
+ permissions:
39
+ id-token: write
40
+ steps:
41
+ - uses: actions/checkout@v4
42
+ - uses: actions/setup-python@v5
43
+ with:
44
+ python-version: "3.12"
45
+ - run: pip install build
46
+ - run: python -m build
47
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,42 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+
7
+ # Distribution / packaging
8
+ dist/
9
+ build/
10
+ *.egg-info/
11
+ *.egg
12
+
13
+ # Virtual environments
14
+ .venv/
15
+ venv/
16
+ env/
17
+
18
+ # IDE
19
+ .vscode/
20
+ .idea/
21
+ *.swp
22
+ *.swo
23
+ *~
24
+
25
+ # Testing
26
+ .pytest_cache/
27
+ .coverage
28
+ htmlcov/
29
+ .mypy_cache/
30
+
31
+ # OS
32
+ .DS_Store
33
+ Thumbs.db
34
+
35
+ # Project-specific
36
+ .sast-mcp-ignore.json
37
+
38
+ # Testing
39
+ .pytest_cache/
40
+ .coverage
41
+ htmlcov/
42
+ .mypy_cache/
@@ -0,0 +1,126 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ ## [0.6.0] - 2026-06-12
6
+ ### Added
7
+ - **DefectDojo integration**: New `upload_to_defectdojo` tool imports a SARIF
8
+ export into a DefectDojo engagement (`/api/v2/import-scan/`). Configured via
9
+ `DEFECTDOJO_URL` / `DEFECTDOJO_API_KEY` environment variables.
10
+ - **GitHub Code Scanning integration**: New `upload_to_github` tool pushes a
11
+ gzipped+base64 SARIF report to `/repos/{owner}/{repo}/code-scanning/sarifs`.
12
+ Configured via the `GITHUB_TOKEN` environment variable.
13
+ - **AI patch generation**: New `generate_fix_prompt` tool packages a cached
14
+ finding's vulnerable code and context into an LLM-ready prompt that asks for
15
+ a strict unified diff; the new `apply_patch` tool applies an agent-supplied
16
+ diff to disk via `git apply` (rejecting paths that escape the target).
17
+ - **`integrations/` package** (`defectdojo.py`, `github.py`) and
18
+ **`enrichment/patch_prompt.py`** / **`enrichment/patch_apply.py`** modules.
19
+ - `ScanCache.find_finding_by_hash()` to recover full finding details by hash.
20
+
21
+ ### Changed
22
+ - Standardized a `name` class attribute on every scanner via `BaseScanner`,
23
+ replacing the inconsistent `name` property on the newer scanners.
24
+ - Added `httpx` as an explicit runtime dependency.
25
+ - The `fix_vulnerability` MCP prompt now drives the
26
+ `generate_fix_prompt` → `apply_patch` workflow.
27
+
28
+ ### Fixed
29
+ - Resolved all `ruff` lint violations across the scanner modules so CI lint
30
+ passes again; cleaned up dead CVSS-parsing code in the OSV scanner and gave
31
+ it an honest `database_specific.severity` mapping.
32
+ - Aligned scanner `scan()` signatures with the base class
33
+ (`modified_files: set[str] | None`), fixing Liskov/type violations.
34
+ - Eliminated `coroutine was never awaited` warnings in the scanner test suite.
35
+
36
+ ## [0.5.0] - 2026-06-12
37
+ ### Added
38
+ - **Gitleaks Integration**: Deep secret scanning across entire git histories to detect leaked API keys and tokens from past commits.
39
+ - **OSV-Scanner Integration**: Advanced Software Composition Analysis (SCA) mapped to the Google OSV database.
40
+ - **OWASP ZAP Integration (DAST)**: Dynamic Application Security Testing capability via Docker Compose orchestration (`run_active_scan` tool).
41
+
42
+ ## [0.4.0] - 2026-06-11
43
+ ### Added
44
+ - **Streamable HTTP Transport**: Added `--transport streamable-http` for production remote deployments (MCP 2026 standard).
45
+ - **JWT Authentication**: Replaced static API key with JWT scoped tokens (`SAST_MCP_JWT_SECRET`).
46
+ - **Baseline Caching**: Scans are automatically cached. New tools `save_baseline` and `compare_baseline` to track security trends and regressions over time.
47
+ - **MCP Resources**: Expose read-only security dashboards, cache metadata, configuration, and scanner status via `sast://` resource URIs.
48
+ - **MCP Prompts**: Added pre-built security workflows for agents: `security_review`, `fix_vulnerability`, `pr_security_check`, and `compliance_report`.
49
+
50
+ ### Changed
51
+ - SSE transport is now deprecated in favor of Streamable HTTP.
52
+ - Dependency `pyjwt` added for authentication capabilities.
53
+
54
+ ## [0.3.0] - 2026-06-09
55
+
56
+ ### Added
57
+ - **Trivy scanner** — 5th scanner covering dependency CVEs (SCA), secret detection, and IaC misconfigurations (Terraform, Kubernetes, Docker, CloudFormation)
58
+ - **CodeQL scanner** — 6th scanner providing deep semantic SAST with data-flow and taint tracking (by GitHub); supports database caching for faster re-scans
59
+ - **Checkov scanner** — 7th scanner for Infrastructure-as-Code policy enforcement with 1000+ built-in checks
60
+ - **`scan_all` tool** — run ALL installed scanners in parallel with automatic content-based deduplication; shows which scanners were used vs. skipped
61
+ - **`export_sarif` tool** — export scan results in SARIF 2.1.0 format for CI/CD integration (GitHub Code Scanning, GitLab SAST, Azure DevOps)
62
+ - **SARIF module** (`sarif.py`) — bidirectional SARIF 2.1.0 conversion: Finding→SARIF for export, SARIF→Finding for CodeQL import
63
+ - **Multi-scanner aggregator** (`aggregator.py`) — concurrent scanner execution with `asyncio.gather()`, hash-based deduplication keeping highest severity, severity-sorted output
64
+ - **CodeQL language auto-detection** — counts file extensions in target directory to pick the dominant supported language
65
+ - **CodeQL database caching** — stores database in `.sast-mcp-codeql-db/` to avoid rebuilding on every scan
66
+ - **Trivy multi-type parsing** — unified parser for three different Trivy JSON output structures (Vulnerabilities, Secrets, Misconfigurations)
67
+ - **Checkov multi-block parsing** — handles both single-dict and list-of-dicts output formats from Checkov
68
+ - **IaC language detection** — detects Terraform, Kubernetes YAML, Dockerfile, CloudFormation, Bicep, and Helm files
69
+ - **SARIF data flow support** — serializes `DataFlowStep` traces into SARIF `codeFlows` / `threadFlows`
70
+ - **SARIF fingerprint preservation** — round-trips finding hashes through SARIF `fingerprints.primaryLocationHash`
71
+ - **Comprehensive test suite** — 25+ new tests covering all three new scanners, SARIF export/parse, aggregator deduplication, and new server tools
72
+
73
+ ### Changed
74
+ - Scanner factory now registers 7 scanners (was 4)
75
+ - MCP server instructions updated to describe all 7 scanners and new tools
76
+ - Updated `pyproject.toml` keywords to include trivy, codeql, checkov, sarif
77
+ - Version bumped to 0.3.0
78
+
79
+ ## [0.2.0] - 2026-06-07
80
+
81
+ ### Added
82
+ - **Semgrep scanner** — 4th scanner supporting 30+ languages with community rule registry
83
+ - **SSE/HTTP transport** — remote deployments via `--transport sse --port 8080`
84
+ - **API key authentication** — `SAST_MCP_API_KEY` env var for secure remote access
85
+ - **`list_scanners` tool** — check available scanners and their installation status
86
+ - **`unignore_vulnerability` tool** — reverse a previous ignore action
87
+ - **`list_ignored_vulnerabilities` tool** — view all suppressed findings
88
+ - **Typed data models** — `Finding`, `SeverityLevel`, `ConfidenceLevel` enums
89
+ - **Tree-sitter JavaScript/TypeScript support** — AST-aware context for JS/TS files
90
+ - **Configurable scan timeout** — `SAST_MCP_TIMEOUT` env var (default 300s)
91
+ - **Structured logging** — logs to stderr only (prevents stdio transport corruption)
92
+ - **Gemini CLI extension** — `gemini-extension.json` + `GEMINI.md` context file
93
+ - **Cross-platform docs** — setup guides for Claude Desktop, OpenAI, Cursor
94
+ - **Smithery.ai manifest** — marketplace listing for discovery
95
+ - **Docker support** — Dockerfile with all scanners pre-installed
96
+ - **CI/CD pipeline** — GitHub Actions for lint, test, and PyPI publish
97
+ - **Comprehensive test suite** — 30+ tests covering all scanners, enrichment, and server logic
98
+ - **Atomic ignore file writes** — prevents corruption on crash
99
+ - **Timestamps in ignore entries** — audit trail for suppressed findings
100
+ - **Backward-compatible ignore format** — migrates old `hash → reason` format
101
+
102
+ ### Fixed
103
+ - **Bearer confidence hardcoded to HIGH** — now parsed from actual output
104
+ - **njsscan severity hardcoded to HIGH** — now parsed from metadata
105
+ - **njsscan only processed first file** — now handles all files in multi-file findings
106
+ - **Scanner errors silently swallowed** — stderr now propagated in error messages
107
+ - **Hash instability** — finding hashes no longer depend on line numbers
108
+ - **tree-sitter deps declared but unused** — now actually used for JS/TS context
109
+ - **Invalid severity/confidence accepted** — now validated against enum values
110
+ - **Logging to stdout corrupted stdio transport** — fixed to log to stderr only
111
+
112
+ ### Changed
113
+ - Restructured to proper Python package layout (`sast_mcp_server/`)
114
+ - Updated `pyproject.toml` with full PyPI metadata, entry points, and tool configs
115
+ - Scanner factory now supports dynamic registration and metadata queries
116
+ - Replaced raw `Dict` returns with typed `Finding` dataclass
117
+
118
+ ## [0.1.0] - Initial Release
119
+
120
+ ### Added
121
+ - Basic MCP server with Bandit, njsscan, and Bearer scanners
122
+ - AST context enrichment for Python files
123
+ - Git diff filtering
124
+ - Finding ignore list
125
+ - Severity and confidence filtering
126
+ - Pagination support
@@ -0,0 +1,171 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ SAST MCP Server — a [FastMCP](https://github.com/jlowin/fastmcp)-based MCP server that exposes
8
+ 10 static/dynamic security scanners (Bandit, njsscan, Bearer, Semgrep, Trivy, CodeQL, Checkov,
9
+ Gitleaks, OSV-Scanner, OWASP ZAP) as tools for AI agents (Gemini CLI, Claude Desktop, Cursor,
10
+ OpenAI Agents, etc.). Findings are normalized into a typed `Finding` model, enriched with
11
+ AST-aware code context, deduplicated across scanners, and can be exported as SARIF 2.1.0 for
12
+ CI/CD.
13
+
14
+ ## Development Commands
15
+
16
+ ```bash
17
+ # Install with dev dependencies (editable)
18
+ pip install -e ".[dev]"
19
+
20
+ # Run the full test suite
21
+ pytest tests/ -v
22
+
23
+ # Run a single test file
24
+ pytest tests/test_scanners.py -v
25
+
26
+ # Run a single test
27
+ pytest tests/test_scanners.py::TestBanditScanner::test_parse_findings -v
28
+
29
+ # Lint (this is what CI runs)
30
+ ruff check sast_mcp_server/
31
+
32
+ # Type check (configured in pyproject.toml, not enforced in CI)
33
+ mypy sast_mcp_server/
34
+
35
+ # Run the server locally (stdio transport, for MCP client integration)
36
+ python -m sast_mcp_server
37
+ # or, once installed:
38
+ sast-mcp-server
39
+
40
+ # Run as a remote HTTP server (Streamable HTTP is the current standard; SSE is deprecated)
41
+ sast-mcp-server --transport streamable-http --host 0.0.0.0 --port 8080
42
+ ```
43
+
44
+ CI (`.github/workflows/ci.yml`) runs `ruff check sast_mcp_server/` and
45
+ `pytest tests/ -v --tb=short` across Python 3.10–3.13, then publishes to PyPI on `v*` tags.
46
+
47
+ ## Architecture
48
+
49
+ ### Entry point and server registration
50
+
51
+ `sast_mcp_server/server.py` builds the `FastMCP` instance and registers all `@mcp.tool()`
52
+ functions (`scan_vulnerabilities`, `scan_all`, `scan_git_history`, `run_active_scan`,
53
+ `export_sarif`, `save_baseline`, `compare_baseline`, `list_scanners`, `ignore_vulnerability`,
54
+ `unignore_vulnerability`, `list_ignored_vulnerabilities`, plus the v0.6.0 tools
55
+ `upload_to_defectdojo`, `upload_to_github`, `generate_fix_prompt`, `apply_patch`).
56
+ `register_resources(mcp)` and `register_prompts(mcp)` wire up MCP Resources and Prompts from
57
+ `resources.py` / `prompts.py`. The decorated tool functions stay directly callable, so tests
58
+ import and `await` them as plain functions.
59
+
60
+ **Critical constraint:** all logging is configured to go to `stderr`
61
+ (`logging.basicConfig(..., stream=sys.stderr)`). Never write to stdout — it corrupts the stdio
62
+ MCP transport.
63
+
64
+ ### Scanner plugin architecture
65
+
66
+ Each scanner lives in `sast_mcp_server/scanners/<name>.py` and subclasses `BaseScanner`
67
+ (`scanners/base.py`), implementing:
68
+ - `check_dependency()` — raises `RuntimeError` if the underlying CLI/binary isn't available.
69
+ - `scan(target_path, min_severity, min_confidence, modified_files, timeout)` — runs the tool
70
+ (usually via `asyncio.create_subprocess_exec`, parsing JSON/SARIF stdout or a report file) and
71
+ returns `list[Finding]`.
72
+
73
+ `scanners/factory.py` is the single source of truth for scanner registration:
74
+ - `_SCANNER_REGISTRY` maps scanner name → class (used by `ScannerFactory.get_scanner()` /
75
+ `list_scanners()`).
76
+ - `SCANNER_INFO` provides display metadata (description, languages, install command) for the
77
+ `list_scanners` tool and `sast://scanners` resource.
78
+
79
+ Every scanner sets a `name` class attribute (declared as `ClassVar[str]` on `BaseScanner`) equal
80
+ to its registry key; it is used both as `Finding.scanner` and by callers that persist results
81
+ (e.g. `cache.save_scan(scanner.name, ...)`). To add a new scanner: implement the `BaseScanner`
82
+ subclass (including `name`), register it in both `_SCANNER_REGISTRY` and `SCANNER_INFO` in
83
+ `factory.py`, and export it from `scanners/__init__.py`.
84
+
85
+ ZAP is the only DAST scanner — its `scan()` raises `NotImplementedError` and instead exposes
86
+ `run_dynamic_scan()`, invoked via the dedicated `run_active_scan` tool, which orchestrates
87
+ `docker compose up` → wait for the URL → run the `zap-baseline.py` container → `docker compose down`.
88
+
89
+ ### Finding pipeline
90
+
91
+ 1. Scanner produces raw `Finding` objects (`models.py`): `scanner`, `title`, `message`, `file`,
92
+ `line_start/end`, `severity` (`SeverityLevel`), `confidence` (`ConfidenceLevel`), `context`,
93
+ `language`, `finding_hash`, optional `data_flow: list[DataFlowStep]`.
94
+ 2. `finding_hash` is computed by `BaseScanner.generate_hash(file_path, title, code_snippet)` —
95
+ a SHA-256 of `file:title:normalized_snippet`, designed to stay stable across line-number
96
+ shifts but change if the vulnerable code moves elsewhere. This hash is the key used for
97
+ ignore-list entries, dedup, and baseline diffing.
98
+ 3. `enrichment/ast_context.get_function_context()` expands a bare line number into the full
99
+ enclosing function/class: native `ast` for Python, tree-sitter for JS/TS (if installed),
100
+ falling back to ±10 lines for everything else.
101
+ 4. `enrichment/ignore_manager.IgnoreManager` filters out findings whose hash is in
102
+ `<target>/.sast-mcp-ignore.json` (atomic write via temp-file + `os.replace`).
103
+ 5. `enrichment/git_diff.get_modified_files()` supports `git_diff_only=True` by intersecting
104
+ findings with `git diff --name-only HEAD`.
105
+
106
+ ### Multi-scanner aggregation (`aggregator.py`)
107
+
108
+ `run_all_scanners()` checks `check_dependency()` for every registered scanner, runs the
109
+ available ones concurrently with `asyncio.gather`, then `_deduplicate_findings()` collapses
110
+ findings sharing a `finding_hash` (keeping the higher-severity / data-flow-bearing copy), and
111
+ sorts the result by `SEVERITY_ORDER` (CRITICAL first). Returns
112
+ `(findings, used_scanners, skipped_scanners)`.
113
+
114
+ ### Caching & baselines (`cache.py`)
115
+
116
+ `ScanCache` persists scan results as JSON under `<target_path>/.sast-mcp-cache/`:
117
+ - `save_scan(..., tag=...)` writes a timestamped file and, if `tag` is set, also overwrites
118
+ `<scanner>_tag_<tag>.json` (used as a named baseline).
119
+ - `compare(baseline, current_findings)` returns a `ScanDiff` (new/fixed/unchanged findings +
120
+ per-severity trend), driving the `compare_baseline` tool and `pr_security_check` prompt.
121
+ - `cleanup_expired()` removes non-tagged scans older than `SAST_MCP_CACHE_TTL` (default 24h).
122
+
123
+ ### SARIF (`sarif.py`)
124
+
125
+ Bidirectional SARIF 2.1.0 conversion: `findings_to_sarif()` powers `export_sarif`;
126
+ `parse_sarif()` is used by the CodeQL scanner to ingest CodeQL's native SARIF output and
127
+ re-enrich it with AST context and stable hashes.
128
+
129
+ ### Dashboard integrations (`integrations/`)
130
+
131
+ `integrations/defectdojo.py` and `integrations/github.py` push a SARIF export to external
132
+ platforms via `httpx.AsyncClient`. Credentials come **only** from environment variables
133
+ (`DEFECTDOJO_URL`/`DEFECTDOJO_API_KEY`, `GITHUB_TOKEN`) so secrets never land in tool arguments
134
+ or transcripts. Both raise the shared `IntegrationError` (from `integrations/__init__.py`) on
135
+ missing config, unreadable files, or non-2xx responses; the server tools catch it and return a
136
+ friendly message. GitHub's endpoint requires the SARIF be gzip-compressed then base64-encoded.
137
+
138
+ ### AI patch generation (`enrichment/patch_prompt.py`, `enrichment/patch_apply.py`)
139
+
140
+ Division of responsibility: the **server packages context**, the **agent generates the diff**,
141
+ and `apply_patch` writes it. `build_patch_prompt()` recovers a finding by hash via
142
+ `ScanCache.find_finding_by_hash()`, reads a line-numbered window of the source, and renders a
143
+ prompt instructing an LLM to emit a strict unified diff. `apply_unified_diff()` shells out to
144
+ `git apply` (works without a repo; rejects paths escaping the target unless `--unsafe-paths`,
145
+ which we never pass) and supports `check_only` for a dry run.
146
+
147
+ ### Auth (`auth.py`)
148
+
149
+ `AuthProvider` supports three modes selected by env vars: no auth (stdio/local — all scopes
150
+ granted), static `SAST_MCP_API_KEY` (legacy), or `SAST_MCP_JWT_SECRET` (HMAC-SHA256 JWT with
151
+ `scopes`/`exp` claims, scopes drawn from `SCOPE_SCAN_READ`/`SCOPE_SCAN_WRITE`/`SCOPE_CONFIG_WRITE`/
152
+ `SCOPE_ADMIN`). `create_jwt()` is a helper for minting test tokens.
153
+
154
+ ### MCP Resources & Prompts
155
+
156
+ - `resources.py` exposes read-only `sast://` URIs (`sast://scanners`, `sast://config`,
157
+ `sast://cache/{target_path}/latest`, `sast://cache/{target_path}/baseline/{tag}`,
158
+ `sast://ignore/{target_path}`, `sast://dashboard/{target_path}`) backed by `ScanCache` /
159
+ `IgnoreManager` / `ScannerFactory` — no scans are run.
160
+ - `prompts.py` defines reusable workflow templates (`security_review`, `fix_vulnerability`,
161
+ `pr_security_check`, `compliance_report`) that return instruction strings guiding an agent
162
+ through a sequence of tool calls.
163
+
164
+ ## Testing conventions
165
+
166
+ - Tests live in `tests/`, fixtures (recorded scanner JSON/SARIF output) in `tests/fixtures/`.
167
+ - Subprocess-based scanners are tested by patching `asyncio.create_subprocess_exec` and
168
+ `asyncio.wait_for` to return a mocked process whose `communicate()` resolves to fixture bytes
169
+ (see the `_mock_subprocess` helper duplicated in `test_scanners.py` / `test_new_scanners.py`).
170
+ - `pytest-asyncio` is in `auto` mode (`pyproject.toml`), so `async def test_*` functions need no
171
+ extra decorator beyond `@pytest.mark.asyncio` (already used throughout for consistency).
@@ -0,0 +1,26 @@
1
+ FROM python:3.12-slim AS base
2
+
3
+ # Install system dependencies for Bearer
4
+ RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ git \
6
+ curl \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ # Install Bearer CLI
10
+ RUN curl -sfL https://raw.githubusercontent.com/Bearer/bearer/main/contrib/install.sh | sh
11
+
12
+ # Set up Python environment
13
+ WORKDIR /app
14
+
15
+ # Install Python package and scanner dependencies
16
+ COPY pyproject.toml README.md LICENSE ./
17
+ COPY sast_mcp_server/ ./sast_mcp_server/
18
+
19
+ RUN pip install --no-cache-dir . bandit njsscan
20
+
21
+ # Create non-root user for security
22
+ RUN useradd --create-home --shell /bin/bash scanner
23
+ USER scanner
24
+
25
+ # Default: run the MCP server in stdio mode
26
+ ENTRYPOINT ["sast-mcp-server"]
@@ -0,0 +1,100 @@
1
+ ## Tool Purpose
2
+
3
+ This extension provides **SAST (Static Application Security Testing)** scanning capabilities.
4
+ It can scan codebases for security vulnerabilities using industry-standard tools:
5
+ Bandit (Python), njsscan (JavaScript/Node.js), Bearer (multi-language with data-flow analysis),
6
+ Semgrep (30+ languages with community rule registry), Trivy (vulnerabilities, secrets, IaC),
7
+ CodeQL (deep semantic analysis with taint tracking), Checkov (Infrastructure-as-Code policies),
8
+ Gitleaks (deep secret scanning in git history), OSV-Scanner (Software Composition Analysis),
9
+ and OWASP ZAP (Dynamic Application Security Testing via Docker orchestration).
10
+
11
+ ## Available Tools
12
+
13
+ 1. **`scan_vulnerabilities`** — Scan a directory for security vulnerabilities (SAST)
14
+ 2. **`scan_all`** — Run ALL installed scanners in parallel with deduplication
15
+ 3. **`scan_git_history`** — Scan the entire `.git` history for leaked secrets (Gitleaks)
16
+ 4. **`run_active_scan`** — Run a dynamic (DAST) baseline scan by orchestrating Docker Compose (ZAP)
17
+ 5. **`export_sarif`** — Export scan results in SARIF 2.1.0 format for CI/CD
18
+ 6. **`list_scanners`** — Check which scanners are installed and available
19
+ 7. **`ignore_vulnerability`** — Suppress a known false positive
20
+ 8. **`unignore_vulnerability`** — Re-enable a previously ignored finding
21
+ 9. **`list_ignored_vulnerabilities`** — Show all currently suppressed findings
22
+ 10. **`save_baseline`** — Save a scan result as a baseline for future comparison
23
+ 11. **`compare_baseline`** — Compare current codebase against a saved baseline
24
+ 12. **`upload_to_defectdojo`** — Import a SARIF export into a DefectDojo engagement
25
+ 13. **`upload_to_github`** — Upload a SARIF report to GitHub Code Scanning
26
+ 14. **`generate_fix_prompt`** — Build an LLM prompt to fix a finding as a unified diff
27
+ 15. **`apply_patch`** — Apply an agent-generated unified diff to disk (via `git apply`)
28
+
29
+ ## Usage Patterns
30
+
31
+ ### Initial Setup
32
+ Always start by calling `list_scanners` to verify which tools are available on the user's system.
33
+
34
+ ### Choosing a Scanner
35
+ - **Python projects** → use `scanner_name="bandit"` or `scanner_name="semgrep"`
36
+ - **JavaScript/Node.js projects** → use `scanner_name="njsscan"` or `scanner_name="semgrep"`
37
+ - **Multi-language or uncertain** → use `scanner_name="semgrep"` (broadest coverage)
38
+ - **Data-flow analysis needed** → use `scanner_name="bearer"`
39
+ - **Dependency CVEs / supply chain** → use `scanner_name="osv-scanner"` or `trivy`
40
+ - **Secrets in git history** → use `scan_git_history`
41
+ - **Deep semantic analysis** → use `scanner_name="codeql"` (slower but finds complex flaws)
42
+ - **Infrastructure-as-Code policies** → use `scanner_name="checkov"`
43
+ - **Dynamic Application Testing (DAST)** → use `run_active_scan` (orchestrates ZAP via Docker)
44
+ - **Comprehensive scan** → use `scan_all` to run everything installed at once
45
+
46
+ ### Scanning Workflow
47
+ 1. Call `scan_vulnerabilities` (or `scan_all` for comprehensive coverage) with the project root path
48
+ 2. Review findings — focus on HIGH and CRITICAL severity first
49
+ 3. For each finding, suggest a fix based on the provided code context
50
+ 4. If the user confirms a false positive, use `ignore_vulnerability` with a clear reason
51
+
52
+ ### Regression & Trend Tracking (Baselines)
53
+ When establishing a security posture or before a major refactor:
54
+ 1. Call `save_baseline(tag="main")` to snapshot the current state
55
+ 2. After changes, call `compare_baseline(tag="main")` to see newly introduced vs. fixed vulnerabilities
56
+
57
+ ### Using MCP Prompts (Workflows)
58
+ You have access to pre-built security workflows via MCP prompts. You should invoke these when asked to perform a broad security task:
59
+ - `security_review`: For a comprehensive codebase assessment
60
+ - `fix_vulnerability`: For deep-dive remediation of a specific finding hash
61
+ - `pr_security_check`: When reviewing a Pull Request or git diff
62
+ - `compliance_report`: When asked about OWASP Top 10 or PCI-DSS compliance
63
+
64
+ ### Using MCP Resources (Context)
65
+ You can access read-only contextual data without running a scan by requesting these resources:
66
+ - `sast://dashboard/{target_path}`: View a high-level severity distribution and overall security posture
67
+ - `sast://cache/{target_path}/latest`: View metadata of the most recent scans without waiting for execution
68
+
69
+ ### CI/CD Integration
70
+ Use `export_sarif` to generate SARIF 2.1.0 output that can be uploaded to:
71
+ - GitHub Code Scanning
72
+ - GitLab SAST
73
+ - Azure DevOps
74
+ - Any SARIF-compatible security dashboard
75
+
76
+ ### Dashboard Upload & AI Remediation (v0.6.0)
77
+ - After `export_sarif(output_path=...)`, push the report to a dashboard with
78
+ `upload_to_defectdojo` or `upload_to_github`. Credentials come from
79
+ environment variables only (`DEFECTDOJO_URL`/`DEFECTDOJO_API_KEY`,
80
+ `GITHUB_TOKEN`) — never ask the user to paste secrets as arguments.
81
+ - To remediate a finding: call `generate_fix_prompt(target_path, finding_hash)`
82
+ to get an LLM-ready prompt, produce a unified diff, then `apply_patch` with
83
+ `check_only=True` before applying for real. The finding must come from a
84
+ cached scan (`scan_all`/`save_baseline`).
85
+
86
+ ### Incremental Scanning
87
+ When reviewing a pull request or recent changes, use `git_diff_only=True` to only scan modified files.
88
+
89
+ ### Pagination
90
+ For large codebases, use `min_severity="HIGH"` to focus on critical issues first,
91
+ or use `limit` and `offset` for pagination.
92
+
93
+ ## Best Practices
94
+ - Always explain findings in plain language, not just technical jargon
95
+ - Provide concrete code fix suggestions alongside each finding
96
+ - Group related findings together when presenting results
97
+ - Use `min_severity="MEDIUM"` as a sensible default for most scans
98
+ - When a scan returns many results, summarize the top issues before showing details
99
+ - Use `scan_all` for the most comprehensive security assessment
100
+ - **New for v0.4.0**: Use `compare_baseline` to highlight newly introduced vulnerabilities instead of overwhelming the user with legacy issues.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Abdellah
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.