security-controls-mcp 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- security_controls_mcp/__init__.py +3 -0
- security_controls_mcp/__main__.py +8 -0
- security_controls_mcp/cli.py +255 -0
- security_controls_mcp/config.py +145 -0
- security_controls_mcp/data/framework-to-scf.json +13986 -0
- security_controls_mcp/data/scf-controls.json +50162 -0
- security_controls_mcp/data_loader.py +180 -0
- security_controls_mcp/extractors/__init__.py +5 -0
- security_controls_mcp/extractors/pdf_extractor.py +248 -0
- security_controls_mcp/http_server.py +477 -0
- security_controls_mcp/legal_notice.py +82 -0
- security_controls_mcp/providers.py +238 -0
- security_controls_mcp/registry.py +132 -0
- security_controls_mcp/server.py +613 -0
- security_controls_mcp-0.2.0.dist-info/METADATA +467 -0
- security_controls_mcp-0.2.0.dist-info/RECORD +21 -0
- security_controls_mcp-0.2.0.dist-info/WHEEL +5 -0
- security_controls_mcp-0.2.0.dist-info/entry_points.txt +2 -0
- security_controls_mcp-0.2.0.dist-info/licenses/LICENSE +17 -0
- security_controls_mcp-0.2.0.dist-info/licenses/LICENSE-DATA.md +61 -0
- security_controls_mcp-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""Data loader for SCF controls and framework mappings."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SCFData:
|
|
9
|
+
"""Loads and provides access to SCF control data."""
|
|
10
|
+
|
|
11
|
+
def __init__(self):
|
|
12
|
+
self.controls: list[dict[str, Any]] = []
|
|
13
|
+
self.controls_by_id: dict[str, dict[str, Any]] = {}
|
|
14
|
+
self.framework_to_scf: dict[str, dict[str, list[str]]] = {}
|
|
15
|
+
self.frameworks: dict[str, dict[str, Any]] = {}
|
|
16
|
+
self._load_data()
|
|
17
|
+
|
|
18
|
+
def _load_data(self):
|
|
19
|
+
"""Load SCF controls and reverse index from JSON files."""
|
|
20
|
+
data_dir = Path(__file__).parent / "data"
|
|
21
|
+
|
|
22
|
+
# Load controls
|
|
23
|
+
with open(data_dir / "scf-controls.json", "r", encoding="utf-8") as f:
|
|
24
|
+
data = json.load(f)
|
|
25
|
+
self.controls = data["controls"]
|
|
26
|
+
|
|
27
|
+
# Build ID index
|
|
28
|
+
self.controls_by_id = {ctrl["id"]: ctrl for ctrl in self.controls}
|
|
29
|
+
|
|
30
|
+
# Load reverse index
|
|
31
|
+
with open(data_dir / "framework-to-scf.json", "r", encoding="utf-8") as f:
|
|
32
|
+
self.framework_to_scf = json.load(f)
|
|
33
|
+
|
|
34
|
+
# Build framework metadata
|
|
35
|
+
self._build_framework_metadata()
|
|
36
|
+
|
|
37
|
+
def _build_framework_metadata(self):
|
|
38
|
+
"""Build framework metadata from controls."""
|
|
39
|
+
# Framework display names (keys must match actual data which uses dots in version numbers)
|
|
40
|
+
framework_names = {
|
|
41
|
+
"nist_csf_2.0": "NIST Cybersecurity Framework 2.0",
|
|
42
|
+
"nist_800_53_r5": "NIST SP 800-53 Revision 5",
|
|
43
|
+
"iso_27001_2022": "ISO/IEC 27001:2022",
|
|
44
|
+
"iso_27002_2022": "ISO/IEC 27002:2022",
|
|
45
|
+
"cis_csc_8.1": "CIS Critical Security Controls v8.1",
|
|
46
|
+
"pci_dss_4.0.1": "PCI DSS v4.0.1",
|
|
47
|
+
"cmmc_2.0_level_1": "CMMC 2.0 Level 1",
|
|
48
|
+
"cmmc_2.0_level_2": "CMMC 2.0 Level 2",
|
|
49
|
+
"soc_2_tsc": "SOC 2 (TSC 2017:2022)",
|
|
50
|
+
"dora": "Digital Operational Resilience Act (DORA)",
|
|
51
|
+
"nis2": "Network and Information Security Directive (NIS2)",
|
|
52
|
+
"gdpr": "General Data Protection Regulation (GDPR)",
|
|
53
|
+
"ncsc_caf_4.0": "NCSC Cyber Assessment Framework 4.0",
|
|
54
|
+
"uk_cyber_essentials": "UK Cyber Essentials",
|
|
55
|
+
"fedramp_r5_moderate": "FedRAMP Revision 5 (Moderate)",
|
|
56
|
+
"hipaa_security_rule": "HIPAA Security Rule",
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
# Count controls per framework
|
|
60
|
+
for fw_key, fw_name in framework_names.items():
|
|
61
|
+
count = sum(1 for ctrl in self.controls if ctrl["framework_mappings"].get(fw_key))
|
|
62
|
+
self.frameworks[fw_key] = {
|
|
63
|
+
"key": fw_key,
|
|
64
|
+
"name": fw_name,
|
|
65
|
+
"controls_mapped": count,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
def get_control(self, control_id: str) -> dict[str, Any] | None:
|
|
69
|
+
"""Get control by SCF ID."""
|
|
70
|
+
return self.controls_by_id.get(control_id)
|
|
71
|
+
|
|
72
|
+
def search_controls(
|
|
73
|
+
self, query: str, frameworks: list[str] | None = None, limit: int = 10
|
|
74
|
+
) -> list[dict[str, Any]]:
|
|
75
|
+
"""Search controls by description. Case-insensitive string matching for v1."""
|
|
76
|
+
query_lower = query.lower()
|
|
77
|
+
results = []
|
|
78
|
+
|
|
79
|
+
for ctrl in self.controls:
|
|
80
|
+
# Check if query matches name or description (case-insensitive)
|
|
81
|
+
name_lower = ctrl["name"].lower() if ctrl["name"] else ""
|
|
82
|
+
desc_lower = ctrl["description"].lower() if ctrl["description"] else ""
|
|
83
|
+
|
|
84
|
+
if query_lower in name_lower or query_lower in desc_lower:
|
|
85
|
+
# Filter by frameworks if specified
|
|
86
|
+
if frameworks:
|
|
87
|
+
has_mapping = any(ctrl["framework_mappings"].get(fw) for fw in frameworks)
|
|
88
|
+
if not has_mapping:
|
|
89
|
+
continue
|
|
90
|
+
|
|
91
|
+
# Get mapped frameworks for response
|
|
92
|
+
mapped_frameworks = [
|
|
93
|
+
fw for fw, mappings in ctrl["framework_mappings"].items() if mappings
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
# Create snippet (simple version - first 150 chars with highlight)
|
|
97
|
+
desc = ctrl["description"]
|
|
98
|
+
idx = desc.lower().find(query_lower)
|
|
99
|
+
if idx >= 0:
|
|
100
|
+
start = max(0, idx - 50)
|
|
101
|
+
end = min(len(desc), idx + len(query) + 100)
|
|
102
|
+
snippet = desc[start:end]
|
|
103
|
+
if start > 0:
|
|
104
|
+
snippet = "..." + snippet
|
|
105
|
+
if end < len(desc):
|
|
106
|
+
snippet = snippet + "..."
|
|
107
|
+
else:
|
|
108
|
+
snippet = desc[:150] + "..." if len(desc) > 150 else desc
|
|
109
|
+
|
|
110
|
+
results.append(
|
|
111
|
+
{
|
|
112
|
+
"control_id": ctrl["id"],
|
|
113
|
+
"name": ctrl["name"],
|
|
114
|
+
"snippet": snippet,
|
|
115
|
+
"relevance": 1.0, # Simple scoring for v1
|
|
116
|
+
"mapped_frameworks": mapped_frameworks,
|
|
117
|
+
}
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
if len(results) >= limit:
|
|
121
|
+
break
|
|
122
|
+
|
|
123
|
+
return results
|
|
124
|
+
|
|
125
|
+
def get_framework_controls(
|
|
126
|
+
self, framework: str, include_descriptions: bool = False
|
|
127
|
+
) -> list[dict[str, Any]]:
|
|
128
|
+
"""Get all controls that map to a framework."""
|
|
129
|
+
results = []
|
|
130
|
+
|
|
131
|
+
for ctrl in self.controls:
|
|
132
|
+
fw_mappings = ctrl["framework_mappings"].get(framework)
|
|
133
|
+
if fw_mappings:
|
|
134
|
+
result = {
|
|
135
|
+
"scf_id": ctrl["id"],
|
|
136
|
+
"scf_name": ctrl["name"],
|
|
137
|
+
"framework_control_ids": fw_mappings,
|
|
138
|
+
"weight": ctrl["weight"],
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
if include_descriptions:
|
|
142
|
+
result["description"] = ctrl["description"]
|
|
143
|
+
|
|
144
|
+
results.append(result)
|
|
145
|
+
|
|
146
|
+
return results
|
|
147
|
+
|
|
148
|
+
def map_frameworks(
|
|
149
|
+
self,
|
|
150
|
+
source_framework: str,
|
|
151
|
+
target_framework: str,
|
|
152
|
+
source_control: str | None = None,
|
|
153
|
+
) -> list[dict[str, Any]]:
|
|
154
|
+
"""Map controls between two frameworks via SCF."""
|
|
155
|
+
results = []
|
|
156
|
+
|
|
157
|
+
# If source_control specified, filter to only controls with that mapping
|
|
158
|
+
for ctrl in self.controls:
|
|
159
|
+
source_mappings = ctrl["framework_mappings"].get(source_framework)
|
|
160
|
+
target_mappings = ctrl["framework_mappings"].get(target_framework)
|
|
161
|
+
|
|
162
|
+
# Skip if no source mapping
|
|
163
|
+
if not source_mappings:
|
|
164
|
+
continue
|
|
165
|
+
|
|
166
|
+
# Filter by source_control if specified
|
|
167
|
+
if source_control and source_control not in source_mappings:
|
|
168
|
+
continue
|
|
169
|
+
|
|
170
|
+
results.append(
|
|
171
|
+
{
|
|
172
|
+
"scf_id": ctrl["id"],
|
|
173
|
+
"scf_name": ctrl["name"],
|
|
174
|
+
"source_controls": source_mappings,
|
|
175
|
+
"target_controls": target_mappings or [],
|
|
176
|
+
"weight": ctrl["weight"],
|
|
177
|
+
}
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
return results
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
"""PDF extraction for security standards."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict, List
|
|
7
|
+
|
|
8
|
+
import pdfplumber
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def extract_standard(
|
|
12
|
+
pdf_path: Path,
|
|
13
|
+
standard_id: str,
|
|
14
|
+
title: str,
|
|
15
|
+
version: str,
|
|
16
|
+
purchased_from: str,
|
|
17
|
+
purchase_date: str,
|
|
18
|
+
) -> Dict[str, Any]:
|
|
19
|
+
"""Extract a standard from PDF.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
pdf_path: Path to PDF file
|
|
23
|
+
standard_id: Unique identifier for the standard
|
|
24
|
+
title: Full title of the standard
|
|
25
|
+
version: Version string
|
|
26
|
+
purchased_from: Where it was purchased
|
|
27
|
+
purchase_date: When it was purchased
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
Dictionary with metadata and structure
|
|
31
|
+
"""
|
|
32
|
+
# Open PDF and extract text
|
|
33
|
+
with pdfplumber.open(pdf_path) as pdf:
|
|
34
|
+
total_pages = len(pdf.pages)
|
|
35
|
+
|
|
36
|
+
# Extract text from all pages
|
|
37
|
+
pages_text = []
|
|
38
|
+
for page_num, page in enumerate(pdf.pages, start=1):
|
|
39
|
+
text = page.extract_text() or ""
|
|
40
|
+
pages_text.append({"page": page_num, "text": text})
|
|
41
|
+
|
|
42
|
+
# Detect structure
|
|
43
|
+
sections = _detect_sections(pages_text)
|
|
44
|
+
annexes = _detect_annexes(pages_text)
|
|
45
|
+
|
|
46
|
+
# Build metadata
|
|
47
|
+
metadata = {
|
|
48
|
+
"standard_id": standard_id,
|
|
49
|
+
"title": title,
|
|
50
|
+
"version": version,
|
|
51
|
+
"purchased_from": purchased_from,
|
|
52
|
+
"purchase_date": purchase_date,
|
|
53
|
+
"imported_date": datetime.now().isoformat(),
|
|
54
|
+
"license": "Proprietary - Licensed to individual user",
|
|
55
|
+
"pages": total_pages,
|
|
56
|
+
"restrictions": [
|
|
57
|
+
"Personal use only",
|
|
58
|
+
"No redistribution",
|
|
59
|
+
"No derivative works without permission",
|
|
60
|
+
],
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Build structure
|
|
64
|
+
structure = {
|
|
65
|
+
"metadata": metadata,
|
|
66
|
+
"sections": sections,
|
|
67
|
+
"annexes": annexes,
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
# Calculate stats
|
|
71
|
+
total_clauses = len(sections)
|
|
72
|
+
for annex in annexes:
|
|
73
|
+
total_clauses += len(annex.get("controls", []))
|
|
74
|
+
|
|
75
|
+
stats = {
|
|
76
|
+
"pages": total_pages,
|
|
77
|
+
"sections": len(sections),
|
|
78
|
+
"annexes": len(annexes),
|
|
79
|
+
"total_clauses": total_clauses,
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return {"metadata": metadata, "structure": structure, "stats": stats}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _detect_sections(pages_text: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
86
|
+
"""Detect main sections in the document.
|
|
87
|
+
|
|
88
|
+
This uses heuristics to identify section headings like:
|
|
89
|
+
- "1 Scope"
|
|
90
|
+
- "5.1.2 Cryptographic controls"
|
|
91
|
+
- "Chapter 3: Requirements"
|
|
92
|
+
"""
|
|
93
|
+
sections = []
|
|
94
|
+
|
|
95
|
+
# Common section patterns
|
|
96
|
+
# Matches: "1 Title", "1.2 Title", "1.2.3 Title"
|
|
97
|
+
section_pattern = re.compile(r"^(\d+(?:\.\d+)*)\s+([A-Z][^\n]{5,80})$", re.MULTILINE)
|
|
98
|
+
|
|
99
|
+
for page_info in pages_text:
|
|
100
|
+
page_num = page_info["page"]
|
|
101
|
+
text = page_info["text"]
|
|
102
|
+
|
|
103
|
+
# Find all section headers on this page
|
|
104
|
+
matches = section_pattern.finditer(text)
|
|
105
|
+
|
|
106
|
+
for match in matches:
|
|
107
|
+
section_id = match.group(1)
|
|
108
|
+
section_title = match.group(2).strip()
|
|
109
|
+
|
|
110
|
+
# Extract content until next section or end of page
|
|
111
|
+
start_pos = match.end()
|
|
112
|
+
next_match = section_pattern.search(text, start_pos)
|
|
113
|
+
|
|
114
|
+
if next_match:
|
|
115
|
+
content = text[start_pos : next_match.start()].strip()
|
|
116
|
+
else:
|
|
117
|
+
content = text[start_pos:].strip()
|
|
118
|
+
|
|
119
|
+
# Only include if we have meaningful content
|
|
120
|
+
if content and len(content) > 20:
|
|
121
|
+
sections.append(
|
|
122
|
+
{
|
|
123
|
+
"id": section_id,
|
|
124
|
+
"title": section_title,
|
|
125
|
+
"page": page_num,
|
|
126
|
+
"content": content[:2000], # Limit length
|
|
127
|
+
"subsections": [],
|
|
128
|
+
}
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Build hierarchy (nest subsections)
|
|
132
|
+
sections = _build_hierarchy(sections)
|
|
133
|
+
|
|
134
|
+
return sections
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _detect_annexes(pages_text: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
138
|
+
"""Detect annexes (like Annex A in ISO 27001).
|
|
139
|
+
|
|
140
|
+
Annexes often contain control listings with IDs like:
|
|
141
|
+
- "A.5.15 Access control"
|
|
142
|
+
- "Annex B.2.1 Requirements"
|
|
143
|
+
"""
|
|
144
|
+
annexes = []
|
|
145
|
+
|
|
146
|
+
# Pattern for annex headers
|
|
147
|
+
annex_pattern = re.compile(r"^Annex\s+([A-Z])[:\s]+([^\n]+)$", re.MULTILINE | re.IGNORECASE)
|
|
148
|
+
|
|
149
|
+
# Pattern for controls within annexes
|
|
150
|
+
control_pattern = re.compile(r"^([A-Z]\.\d+(?:\.\d+)*)\s+([A-Z][^\n]{5,80})$", re.MULTILINE)
|
|
151
|
+
|
|
152
|
+
current_annex = None
|
|
153
|
+
|
|
154
|
+
for page_info in pages_text:
|
|
155
|
+
page_num = page_info["page"]
|
|
156
|
+
text = page_info["text"]
|
|
157
|
+
|
|
158
|
+
# Check for new annex
|
|
159
|
+
annex_match = annex_pattern.search(text)
|
|
160
|
+
if annex_match:
|
|
161
|
+
# Save previous annex if exists
|
|
162
|
+
if current_annex:
|
|
163
|
+
annexes.append(current_annex)
|
|
164
|
+
|
|
165
|
+
# Start new annex
|
|
166
|
+
annex_id = annex_match.group(1)
|
|
167
|
+
annex_title = annex_match.group(2).strip()
|
|
168
|
+
current_annex = {
|
|
169
|
+
"id": annex_id,
|
|
170
|
+
"title": annex_title,
|
|
171
|
+
"page": page_num,
|
|
172
|
+
"controls": [],
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
# If we're in an annex, look for controls
|
|
176
|
+
if current_annex:
|
|
177
|
+
control_matches = control_pattern.finditer(text)
|
|
178
|
+
|
|
179
|
+
for match in control_matches:
|
|
180
|
+
control_id = match.group(1)
|
|
181
|
+
control_title = match.group(2).strip()
|
|
182
|
+
|
|
183
|
+
# Extract content
|
|
184
|
+
start_pos = match.end()
|
|
185
|
+
next_match = control_pattern.search(text, start_pos)
|
|
186
|
+
|
|
187
|
+
if next_match:
|
|
188
|
+
content = text[start_pos : next_match.start()].strip()
|
|
189
|
+
else:
|
|
190
|
+
content = text[start_pos:].strip()
|
|
191
|
+
|
|
192
|
+
if content and len(content) > 10:
|
|
193
|
+
current_annex["controls"].append(
|
|
194
|
+
{
|
|
195
|
+
"id": control_id,
|
|
196
|
+
"title": control_title,
|
|
197
|
+
"content": content[:1000],
|
|
198
|
+
"page": page_num,
|
|
199
|
+
"category": f"Annex {current_annex['id']}",
|
|
200
|
+
"type": "normative",
|
|
201
|
+
}
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
# Add final annex
|
|
205
|
+
if current_annex and current_annex["controls"]:
|
|
206
|
+
annexes.append(current_annex)
|
|
207
|
+
|
|
208
|
+
return annexes
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _build_hierarchy(sections: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
212
|
+
"""Build hierarchical structure from flat section list.
|
|
213
|
+
|
|
214
|
+
Converts:
|
|
215
|
+
[{"id": "1"}, {"id": "1.1"}, {"id": "1.2"}, {"id": "2"}]
|
|
216
|
+
Into:
|
|
217
|
+
[{"id": "1", "subsections": [{"id": "1.1"}, {"id": "1.2"}]}, {"id": "2"}]
|
|
218
|
+
"""
|
|
219
|
+
if not sections:
|
|
220
|
+
return []
|
|
221
|
+
|
|
222
|
+
# Build a tree structure
|
|
223
|
+
root = []
|
|
224
|
+
stack = [] # Stack of (section, level)
|
|
225
|
+
|
|
226
|
+
for section in sections:
|
|
227
|
+
section_id = section["id"]
|
|
228
|
+
level = section_id.count(".")
|
|
229
|
+
|
|
230
|
+
# Remove subsections key to avoid duplication
|
|
231
|
+
section = {k: v for k, v in section.items() if k != "subsections"}
|
|
232
|
+
section["subsections"] = []
|
|
233
|
+
|
|
234
|
+
# Pop stack until we find the parent level
|
|
235
|
+
while stack and stack[-1][1] >= level:
|
|
236
|
+
stack.pop()
|
|
237
|
+
|
|
238
|
+
if not stack:
|
|
239
|
+
# Top level section
|
|
240
|
+
root.append(section)
|
|
241
|
+
stack.append((section, level))
|
|
242
|
+
else:
|
|
243
|
+
# Add as subsection of parent
|
|
244
|
+
parent = stack[-1][0]
|
|
245
|
+
parent["subsections"].append(section)
|
|
246
|
+
stack.append((section, level))
|
|
247
|
+
|
|
248
|
+
return root
|