fedramp-20x-mcp 0.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fedramp_20x_mcp/__init__.py +14 -0
- fedramp_20x_mcp/__main__.py +12 -0
- fedramp_20x_mcp/data_loader.py +673 -0
- fedramp_20x_mcp/prompts/__init__.py +62 -0
- fedramp_20x_mcp/prompts/api_design_guide.txt +432 -0
- fedramp_20x_mcp/prompts/ato_package_checklist.txt +75 -0
- fedramp_20x_mcp/prompts/audit_preparation.txt +592 -0
- fedramp_20x_mcp/prompts/authorization_boundary_review.txt +76 -0
- fedramp_20x_mcp/prompts/azure_ksi_automation.txt +997 -0
- fedramp_20x_mcp/prompts/continuous_monitoring_setup.txt +61 -0
- fedramp_20x_mcp/prompts/documentation_generator.txt +499 -0
- fedramp_20x_mcp/prompts/gap_analysis.txt +25 -0
- fedramp_20x_mcp/prompts/initial_assessment_roadmap.txt +202 -0
- fedramp_20x_mcp/prompts/ksi_implementation_priorities.txt +283 -0
- fedramp_20x_mcp/prompts/migration_from_rev5.txt +440 -0
- fedramp_20x_mcp/prompts/quarterly_review_checklist.txt +231 -0
- fedramp_20x_mcp/prompts/significant_change_assessment.txt +50 -0
- fedramp_20x_mcp/prompts/vendor_evaluation.txt +349 -0
- fedramp_20x_mcp/prompts/vulnerability_remediation_timeline.txt +45 -0
- fedramp_20x_mcp/server.py +270 -0
- fedramp_20x_mcp/templates/__init__.py +75 -0
- fedramp_20x_mcp/templates/bicep/afr.txt +33 -0
- fedramp_20x_mcp/templates/bicep/cna.txt +48 -0
- fedramp_20x_mcp/templates/bicep/generic.txt +47 -0
- fedramp_20x_mcp/templates/bicep/iam.txt +211 -0
- fedramp_20x_mcp/templates/bicep/mla.txt +82 -0
- fedramp_20x_mcp/templates/bicep/rpl.txt +44 -0
- fedramp_20x_mcp/templates/bicep/svc.txt +54 -0
- fedramp_20x_mcp/templates/code/generic_csharp.txt +65 -0
- fedramp_20x_mcp/templates/code/generic_powershell.txt +65 -0
- fedramp_20x_mcp/templates/code/generic_python.txt +63 -0
- fedramp_20x_mcp/templates/code/iam_csharp.txt +150 -0
- fedramp_20x_mcp/templates/code/iam_powershell.txt +162 -0
- fedramp_20x_mcp/templates/code/iam_python.txt +224 -0
- fedramp_20x_mcp/templates/code/mla_python.txt +124 -0
- fedramp_20x_mcp/templates/terraform/afr.txt +29 -0
- fedramp_20x_mcp/templates/terraform/cna.txt +50 -0
- fedramp_20x_mcp/templates/terraform/generic.txt +40 -0
- fedramp_20x_mcp/templates/terraform/iam.txt +219 -0
- fedramp_20x_mcp/templates/terraform/mla.txt +29 -0
- fedramp_20x_mcp/templates/terraform/rpl.txt +32 -0
- fedramp_20x_mcp/templates/terraform/svc.txt +46 -0
- fedramp_20x_mcp/tools/__init__.py +167 -0
- fedramp_20x_mcp/tools/definitions.py +154 -0
- fedramp_20x_mcp/tools/documentation.py +155 -0
- fedramp_20x_mcp/tools/enhancements.py +2256 -0
- fedramp_20x_mcp/tools/evidence.py +701 -0
- fedramp_20x_mcp/tools/export.py +753 -0
- fedramp_20x_mcp/tools/ksi.py +90 -0
- fedramp_20x_mcp/tools/requirements.py +163 -0
- fedramp_20x_mcp-0.4.8.dist-info/METADATA +877 -0
- fedramp_20x_mcp-0.4.8.dist-info/RECORD +55 -0
- fedramp_20x_mcp-0.4.8.dist-info/WHEEL +4 -0
- fedramp_20x_mcp-0.4.8.dist-info/entry_points.txt +2 -0
- fedramp_20x_mcp-0.4.8.dist-info/licenses/LICENSE +27 -0
|
@@ -0,0 +1,673 @@
|
|
|
1
|
+
"""
|
|
2
|
+
FedRAMP Data Loader
|
|
3
|
+
|
|
4
|
+
This module handles fetching and caching FedRAMP 20x requirements data
|
|
5
|
+
from the official GitHub repository.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Dict, List, Optional
|
|
13
|
+
from datetime import datetime, timedelta
|
|
14
|
+
|
|
15
|
+
import httpx
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
# GitHub API configuration
|
|
20
|
+
GITHUB_API_BASE = "https://api.github.com"
|
|
21
|
+
GITHUB_RAW_BASE = "https://raw.githubusercontent.com"
|
|
22
|
+
FEDRAMP_REPO = "FedRAMP/docs"
|
|
23
|
+
FEDRAMP_BRANCH = "main"
|
|
24
|
+
DATA_PATH = "data"
|
|
25
|
+
DOCS_PATH = "docs"
|
|
26
|
+
|
|
27
|
+
# Cache configuration
|
|
28
|
+
CACHE_DIR = Path(__file__).parent / "__fedramp_cache__"
|
|
29
|
+
CACHE_DURATION = timedelta(hours=1)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class FedRAMPDataLoader:
|
|
33
|
+
"""Loads and caches FedRAMP 20x requirements data."""
|
|
34
|
+
|
|
35
|
+
def __init__(self):
|
|
36
|
+
"""Initialize the data loader."""
|
|
37
|
+
self.cache_dir = CACHE_DIR
|
|
38
|
+
self.cache_dir.mkdir(exist_ok=True)
|
|
39
|
+
self._data_cache: Optional[Dict[str, Any]] = None
|
|
40
|
+
self._cache_timestamp: Optional[datetime] = None
|
|
41
|
+
self._docs_cache: Optional[Dict[str, str]] = None
|
|
42
|
+
self._docs_cache_timestamp: Optional[datetime] = None
|
|
43
|
+
|
|
44
|
+
def _get_cache_file(self) -> Path:
|
|
45
|
+
"""Get the cache file path."""
|
|
46
|
+
return self.cache_dir / "fedramp_controls.json"
|
|
47
|
+
|
|
48
|
+
def _get_docs_cache_file(self) -> Path:
|
|
49
|
+
"""Get the documentation cache file path."""
|
|
50
|
+
return self.cache_dir / "fedramp_docs.json"
|
|
51
|
+
|
|
52
|
+
def _is_cache_valid(self) -> bool:
|
|
53
|
+
"""Check if the cache is still valid."""
|
|
54
|
+
if not self._cache_timestamp:
|
|
55
|
+
return False
|
|
56
|
+
return datetime.now() - self._cache_timestamp < CACHE_DURATION
|
|
57
|
+
|
|
58
|
+
def _load_from_cache(self) -> Optional[Dict[str, Any]]:
|
|
59
|
+
"""Load data from local cache if available and valid."""
|
|
60
|
+
cache_file = self._get_cache_file()
|
|
61
|
+
|
|
62
|
+
if not cache_file.exists():
|
|
63
|
+
logger.info("No cache file found")
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
with open(cache_file, "r", encoding="utf-8") as f:
|
|
68
|
+
data = json.load(f)
|
|
69
|
+
logger.info("Loaded data from cache")
|
|
70
|
+
return data
|
|
71
|
+
except Exception as e:
|
|
72
|
+
logger.error(f"Failed to load cache: {e}")
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
def _save_to_cache(self, data: Dict[str, Any]) -> None:
|
|
76
|
+
"""Save data to local cache."""
|
|
77
|
+
cache_file = self._get_cache_file()
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
with open(cache_file, "w", encoding="utf-8") as f:
|
|
81
|
+
json.dump(data, f, indent=2)
|
|
82
|
+
logger.info("Saved data to cache")
|
|
83
|
+
except Exception as e:
|
|
84
|
+
logger.error(f"Failed to save cache: {e}")
|
|
85
|
+
|
|
86
|
+
async def _fetch_file_list(self) -> List[Dict[str, Any]]:
|
|
87
|
+
"""Fetch the list of JSON files from the GitHub repository."""
|
|
88
|
+
url = f"{GITHUB_API_BASE}/repos/{FEDRAMP_REPO}/contents/{DATA_PATH}"
|
|
89
|
+
|
|
90
|
+
# Use GITHUB_TOKEN if available to avoid rate limits
|
|
91
|
+
headers = {}
|
|
92
|
+
github_token = os.environ.get("GITHUB_TOKEN")
|
|
93
|
+
if github_token:
|
|
94
|
+
headers["Authorization"] = f"Bearer {github_token}"
|
|
95
|
+
|
|
96
|
+
async with httpx.AsyncClient() as client:
|
|
97
|
+
try:
|
|
98
|
+
response = await client.get(url, headers=headers)
|
|
99
|
+
response.raise_for_status()
|
|
100
|
+
files = response.json()
|
|
101
|
+
|
|
102
|
+
# Filter for JSON files
|
|
103
|
+
json_files = [
|
|
104
|
+
f for f in files
|
|
105
|
+
if isinstance(f, dict) and f.get("name", "").endswith(".json")
|
|
106
|
+
]
|
|
107
|
+
|
|
108
|
+
logger.info(f"Found {len(json_files)} JSON files in repository")
|
|
109
|
+
return json_files
|
|
110
|
+
except Exception as e:
|
|
111
|
+
logger.error(f"Failed to fetch file list: {e}")
|
|
112
|
+
return []
|
|
113
|
+
|
|
114
|
+
async def _fetch_json_file(self, filename: str) -> Optional[Dict[str, Any]]:
|
|
115
|
+
"""Fetch a single JSON file from the repository."""
|
|
116
|
+
url = f"{GITHUB_RAW_BASE}/{FEDRAMP_REPO}/{FEDRAMP_BRANCH}/{DATA_PATH}/{filename}"
|
|
117
|
+
|
|
118
|
+
async with httpx.AsyncClient() as client:
|
|
119
|
+
try:
|
|
120
|
+
response = await client.get(url)
|
|
121
|
+
response.raise_for_status()
|
|
122
|
+
data = response.json()
|
|
123
|
+
logger.info(f"Fetched {filename}")
|
|
124
|
+
return data
|
|
125
|
+
except Exception as e:
|
|
126
|
+
logger.error(f"Failed to fetch {filename}: {e}")
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
async def load_data(self, force_refresh: bool = False) -> Dict[str, Any]:
|
|
130
|
+
"""
|
|
131
|
+
Load FedRAMP requirements data.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
force_refresh: Force refresh from remote source
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
Dictionary containing all FedRAMP 20x requirement data
|
|
138
|
+
"""
|
|
139
|
+
# Check memory cache first
|
|
140
|
+
if not force_refresh and self._data_cache and self._is_cache_valid():
|
|
141
|
+
logger.info("Using in-memory cache")
|
|
142
|
+
return self._data_cache
|
|
143
|
+
|
|
144
|
+
# Try to load from disk cache
|
|
145
|
+
if not force_refresh:
|
|
146
|
+
cached_data = self._load_from_cache()
|
|
147
|
+
if cached_data:
|
|
148
|
+
self._data_cache = cached_data
|
|
149
|
+
self._cache_timestamp = datetime.now()
|
|
150
|
+
return cached_data
|
|
151
|
+
|
|
152
|
+
# Fetch from remote
|
|
153
|
+
logger.info("Fetching data from GitHub repository")
|
|
154
|
+
|
|
155
|
+
files = await self._fetch_file_list()
|
|
156
|
+
if not files:
|
|
157
|
+
# If fetch fails and we have cache, use it even if old
|
|
158
|
+
cached_data = self._load_from_cache()
|
|
159
|
+
if cached_data:
|
|
160
|
+
logger.warning("Using stale cache due to fetch failure")
|
|
161
|
+
return cached_data
|
|
162
|
+
raise Exception("Failed to fetch data and no cache available")
|
|
163
|
+
|
|
164
|
+
# Fetch all JSON files
|
|
165
|
+
all_data: Dict[str, Any] = {
|
|
166
|
+
"requirements": {}, # All requirements by ID
|
|
167
|
+
"documents": {}, # Documents by short_name
|
|
168
|
+
"families": {}, # Requirements grouped by family
|
|
169
|
+
"definitions": {}, # FedRAMP definitions (FRD)
|
|
170
|
+
"ksi": {}, # Key Security Indicators (KSI)
|
|
171
|
+
"metadata": {
|
|
172
|
+
"last_updated": datetime.now().isoformat(),
|
|
173
|
+
"source": f"{FEDRAMP_REPO}/{DATA_PATH}",
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
for file_info in files:
|
|
178
|
+
filename = file_info.get("name", "")
|
|
179
|
+
data = await self._fetch_json_file(filename)
|
|
180
|
+
|
|
181
|
+
if not data:
|
|
182
|
+
continue
|
|
183
|
+
|
|
184
|
+
# Extract document info
|
|
185
|
+
info = data.get("info", {})
|
|
186
|
+
short_name = info.get("short_name", filename.replace(".json", ""))
|
|
187
|
+
|
|
188
|
+
# Store the document
|
|
189
|
+
all_data["documents"][short_name] = {
|
|
190
|
+
"name": info.get("name", ""),
|
|
191
|
+
"short_name": short_name,
|
|
192
|
+
"effective": info.get("effective", {}),
|
|
193
|
+
"releases": info.get("releases", []),
|
|
194
|
+
"filename": filename,
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
# Extract requirements from all sections
|
|
198
|
+
for section_key, section_data in data.items():
|
|
199
|
+
if section_key in ["$schema", "$id", "info"]:
|
|
200
|
+
continue
|
|
201
|
+
|
|
202
|
+
# Each section can have subsections with requirements
|
|
203
|
+
if isinstance(section_data, dict):
|
|
204
|
+
for subsection_key, subsection_data in section_data.items():
|
|
205
|
+
# Handle KSI special structure: categories with 'indicators' lists
|
|
206
|
+
if short_name == "KSI" and isinstance(subsection_data, dict) and "indicators" in subsection_data:
|
|
207
|
+
indicator_list = subsection_data.get("indicators", [])
|
|
208
|
+
category_name = subsection_data.get("name", subsection_key)
|
|
209
|
+
|
|
210
|
+
for indicator in indicator_list:
|
|
211
|
+
if isinstance(indicator, dict) and "id" in indicator:
|
|
212
|
+
ksi_id = indicator["id"]
|
|
213
|
+
|
|
214
|
+
# Add document and category context
|
|
215
|
+
indicator["document"] = short_name
|
|
216
|
+
indicator["document_name"] = info.get("name", "")
|
|
217
|
+
indicator["section"] = f"{section_key}-{subsection_key}"
|
|
218
|
+
indicator["category"] = category_name
|
|
219
|
+
indicator["category_id"] = subsection_key
|
|
220
|
+
|
|
221
|
+
# Store in requirements and KSI
|
|
222
|
+
all_data["requirements"][ksi_id] = indicator
|
|
223
|
+
all_data["ksi"][ksi_id] = indicator
|
|
224
|
+
|
|
225
|
+
# Extract family from ID
|
|
226
|
+
family = ksi_id.split("-")[0] if "-" in ksi_id else "OTHER"
|
|
227
|
+
if family not in all_data["families"]:
|
|
228
|
+
all_data["families"][family] = []
|
|
229
|
+
all_data["families"][family].append(ksi_id)
|
|
230
|
+
|
|
231
|
+
# Handle nested dict structure: check if it contains sub-dicts with 'requirements' key
|
|
232
|
+
# This handles structures like FRR -> MAS -> base/application/exceptions -> requirements[]
|
|
233
|
+
elif isinstance(subsection_data, dict) and not "requirements" in subsection_data:
|
|
234
|
+
# Check if any nested values have 'requirements' key
|
|
235
|
+
has_nested_requirements = any(
|
|
236
|
+
isinstance(v, dict) and "requirements" in v
|
|
237
|
+
for v in subsection_data.values()
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
if has_nested_requirements:
|
|
241
|
+
# Iterate over nested sections (base, application, exceptions, etc.)
|
|
242
|
+
for nested_key, nested_data in subsection_data.items():
|
|
243
|
+
if isinstance(nested_data, dict) and "requirements" in nested_data:
|
|
244
|
+
req_list = nested_data.get("requirements", [])
|
|
245
|
+
nested_name = nested_data.get("name", nested_key)
|
|
246
|
+
nested_id = nested_data.get("id", f"{section_key}-{subsection_key}-{nested_key}")
|
|
247
|
+
|
|
248
|
+
for req in req_list:
|
|
249
|
+
if isinstance(req, dict) and "id" in req:
|
|
250
|
+
req_id = req["id"]
|
|
251
|
+
|
|
252
|
+
# Add document context
|
|
253
|
+
req["document"] = short_name
|
|
254
|
+
req["document_name"] = info.get("name", "")
|
|
255
|
+
req["section"] = f"{section_key}-{subsection_key}-{nested_key}"
|
|
256
|
+
req["subsection_name"] = nested_name
|
|
257
|
+
req["subsection_id"] = nested_id
|
|
258
|
+
req["category"] = subsection_key
|
|
259
|
+
|
|
260
|
+
# Store by ID
|
|
261
|
+
all_data["requirements"][req_id] = req
|
|
262
|
+
|
|
263
|
+
# Extract family from ID
|
|
264
|
+
family = req_id.split("-")[0] if "-" in req_id else "OTHER"
|
|
265
|
+
if family not in all_data["families"]:
|
|
266
|
+
all_data["families"][family] = []
|
|
267
|
+
all_data["families"][family].append(req_id)
|
|
268
|
+
|
|
269
|
+
# Handle direct dict structure with 'requirements' key
|
|
270
|
+
elif isinstance(subsection_data, dict) and "requirements" in subsection_data:
|
|
271
|
+
req_list = subsection_data.get("requirements", [])
|
|
272
|
+
subsection_name = subsection_data.get("name", subsection_key)
|
|
273
|
+
subsection_id = subsection_data.get("id", f"{section_key}-{subsection_key}")
|
|
274
|
+
|
|
275
|
+
for req in req_list:
|
|
276
|
+
if isinstance(req, dict) and "id" in req:
|
|
277
|
+
req_id = req["id"]
|
|
278
|
+
|
|
279
|
+
# Add document context
|
|
280
|
+
req["document"] = short_name
|
|
281
|
+
req["document_name"] = info.get("name", "")
|
|
282
|
+
req["section"] = f"{section_key}-{subsection_key}"
|
|
283
|
+
req["subsection_name"] = subsection_name
|
|
284
|
+
req["subsection_id"] = subsection_id
|
|
285
|
+
|
|
286
|
+
# Store by ID
|
|
287
|
+
all_data["requirements"][req_id] = req
|
|
288
|
+
|
|
289
|
+
# Extract family from ID
|
|
290
|
+
family = req_id.split("-")[0] if "-" in req_id else "OTHER"
|
|
291
|
+
if family not in all_data["families"]:
|
|
292
|
+
all_data["families"][family] = []
|
|
293
|
+
all_data["families"][family].append(req_id)
|
|
294
|
+
|
|
295
|
+
# Track definitions (FRD) separately
|
|
296
|
+
if short_name == "FRD" and "term" in req:
|
|
297
|
+
all_data["definitions"][req.get("term", req_id)] = req
|
|
298
|
+
|
|
299
|
+
# Handle regular list-based requirements
|
|
300
|
+
elif isinstance(subsection_data, list):
|
|
301
|
+
for req in subsection_data:
|
|
302
|
+
if isinstance(req, dict) and "id" in req:
|
|
303
|
+
req_id = req["id"]
|
|
304
|
+
|
|
305
|
+
# Add document context
|
|
306
|
+
req["document"] = short_name
|
|
307
|
+
req["document_name"] = info.get("name", "")
|
|
308
|
+
req["section"] = f"{section_key}-{subsection_key}"
|
|
309
|
+
|
|
310
|
+
# Store by ID
|
|
311
|
+
all_data["requirements"][req_id] = req
|
|
312
|
+
|
|
313
|
+
# Extract family from ID (e.g., "AC" from "AC-1")
|
|
314
|
+
family = req_id.split("-")[0] if "-" in req_id else "OTHER"
|
|
315
|
+
if family not in all_data["families"]:
|
|
316
|
+
all_data["families"][family] = []
|
|
317
|
+
all_data["families"][family].append(req_id)
|
|
318
|
+
|
|
319
|
+
# Track definitions (FRD) separately
|
|
320
|
+
if short_name == "FRD" and "term" in req:
|
|
321
|
+
all_data["definitions"][req.get("term", req_id)] = req
|
|
322
|
+
|
|
323
|
+
# Save to cache
|
|
324
|
+
self._save_to_cache(all_data)
|
|
325
|
+
self._data_cache = all_data
|
|
326
|
+
self._cache_timestamp = datetime.now()
|
|
327
|
+
|
|
328
|
+
logger.info(f"Loaded {len(all_data['requirements'])} requirements from {len(all_data['documents'])} documents")
|
|
329
|
+
return all_data
|
|
330
|
+
|
|
331
|
+
def get_control(self, control_id: str) -> Optional[Dict[str, Any]]:
|
|
332
|
+
"""
|
|
333
|
+
Get a specific requirement by ID.
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
control_id: The requirement identifier
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
Requirement data or None if not found
|
|
340
|
+
"""
|
|
341
|
+
if not self._data_cache:
|
|
342
|
+
return None
|
|
343
|
+
|
|
344
|
+
return self._data_cache["requirements"].get(control_id.upper())
|
|
345
|
+
|
|
346
|
+
def get_family_controls(self, family: str) -> List[Dict[str, Any]]:
|
|
347
|
+
"""
|
|
348
|
+
Get all requirements in a specific family.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
family: The family identifier
|
|
352
|
+
|
|
353
|
+
Returns:
|
|
354
|
+
List of requirements in the family
|
|
355
|
+
"""
|
|
356
|
+
if not self._data_cache:
|
|
357
|
+
return []
|
|
358
|
+
|
|
359
|
+
family_upper = family.upper()
|
|
360
|
+
req_ids = self._data_cache["families"].get(family_upper, [])
|
|
361
|
+
|
|
362
|
+
return [
|
|
363
|
+
self._data_cache["requirements"][req_id]
|
|
364
|
+
for req_id in req_ids
|
|
365
|
+
if req_id in self._data_cache["requirements"]
|
|
366
|
+
]
|
|
367
|
+
|
|
368
|
+
def search_controls(self, keywords: str) -> List[Dict[str, Any]]:
|
|
369
|
+
"""
|
|
370
|
+
Search requirements by keywords.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
keywords: Keywords to search for
|
|
374
|
+
|
|
375
|
+
Returns:
|
|
376
|
+
List of matching requirements
|
|
377
|
+
"""
|
|
378
|
+
if not self._data_cache:
|
|
379
|
+
return []
|
|
380
|
+
|
|
381
|
+
keywords_lower = keywords.lower()
|
|
382
|
+
results = []
|
|
383
|
+
|
|
384
|
+
for req_id, req in self._data_cache["requirements"].items():
|
|
385
|
+
# Search in requirement text fields
|
|
386
|
+
searchable_text = json.dumps(req).lower()
|
|
387
|
+
if keywords_lower in searchable_text:
|
|
388
|
+
results.append(req)
|
|
389
|
+
|
|
390
|
+
return results
|
|
391
|
+
|
|
392
|
+
def get_definition(self, term: str) -> Optional[Dict[str, Any]]:
|
|
393
|
+
"""
|
|
394
|
+
Get a FedRAMP definition by term.
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
term: The term to look up (case-insensitive)
|
|
398
|
+
|
|
399
|
+
Returns:
|
|
400
|
+
Definition data or None if not found
|
|
401
|
+
"""
|
|
402
|
+
if not self._data_cache:
|
|
403
|
+
return None
|
|
404
|
+
|
|
405
|
+
# Try exact match first
|
|
406
|
+
for key, definition in self._data_cache["definitions"].items():
|
|
407
|
+
if key.lower() == term.lower():
|
|
408
|
+
return definition
|
|
409
|
+
# Check alternatives
|
|
410
|
+
if "alts" in definition:
|
|
411
|
+
for alt in definition["alts"]:
|
|
412
|
+
if alt.lower() == term.lower():
|
|
413
|
+
return definition
|
|
414
|
+
|
|
415
|
+
return None
|
|
416
|
+
|
|
417
|
+
def list_all_definitions(self) -> List[Dict[str, Any]]:
|
|
418
|
+
"""
|
|
419
|
+
List all FedRAMP definitions.
|
|
420
|
+
|
|
421
|
+
Returns:
|
|
422
|
+
List of all definition entries
|
|
423
|
+
"""
|
|
424
|
+
if not self._data_cache:
|
|
425
|
+
return []
|
|
426
|
+
|
|
427
|
+
return list(self._data_cache["definitions"].values())
|
|
428
|
+
|
|
429
|
+
def get_ksi(self, ksi_id: str) -> Optional[Dict[str, Any]]:
|
|
430
|
+
"""
|
|
431
|
+
Get a Key Security Indicator by ID.
|
|
432
|
+
|
|
433
|
+
Args:
|
|
434
|
+
ksi_id: The KSI identifier
|
|
435
|
+
|
|
436
|
+
Returns:
|
|
437
|
+
KSI data or None if not found
|
|
438
|
+
"""
|
|
439
|
+
if not self._data_cache:
|
|
440
|
+
return None
|
|
441
|
+
|
|
442
|
+
return self._data_cache["ksi"].get(ksi_id.upper())
|
|
443
|
+
|
|
444
|
+
def list_all_ksi(self) -> List[Dict[str, Any]]:
|
|
445
|
+
"""
|
|
446
|
+
List all Key Security Indicators.
|
|
447
|
+
|
|
448
|
+
Returns:
|
|
449
|
+
List of all KSI entries
|
|
450
|
+
"""
|
|
451
|
+
if not self._data_cache:
|
|
452
|
+
return []
|
|
453
|
+
|
|
454
|
+
return list(self._data_cache["ksi"].values())
|
|
455
|
+
|
|
456
|
+
def search_definitions(self, keywords: str) -> List[Dict[str, Any]]:
|
|
457
|
+
"""
|
|
458
|
+
Search FedRAMP definitions by keywords.
|
|
459
|
+
|
|
460
|
+
Args:
|
|
461
|
+
keywords: Keywords to search for
|
|
462
|
+
|
|
463
|
+
Returns:
|
|
464
|
+
List of matching definitions
|
|
465
|
+
"""
|
|
466
|
+
if not self._data_cache:
|
|
467
|
+
return []
|
|
468
|
+
|
|
469
|
+
keywords_lower = keywords.lower()
|
|
470
|
+
results = []
|
|
471
|
+
|
|
472
|
+
for term, definition in self._data_cache["definitions"].items():
|
|
473
|
+
# Search in definition text
|
|
474
|
+
searchable_text = json.dumps(definition).lower()
|
|
475
|
+
if keywords_lower in searchable_text:
|
|
476
|
+
results.append(definition)
|
|
477
|
+
|
|
478
|
+
return results
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
async def _fetch_docs_file_list(self) -> List[Dict[str, Any]]:
|
|
482
|
+
"""Fetch the list of markdown files from the docs directory."""
|
|
483
|
+
url = f"{GITHUB_API_BASE}/repos/{FEDRAMP_REPO}/contents/{DOCS_PATH}"
|
|
484
|
+
|
|
485
|
+
# Use GITHUB_TOKEN if available to avoid rate limits
|
|
486
|
+
headers = {}
|
|
487
|
+
github_token = os.environ.get("GITHUB_TOKEN")
|
|
488
|
+
if github_token:
|
|
489
|
+
headers["Authorization"] = f"Bearer {github_token}"
|
|
490
|
+
|
|
491
|
+
async with httpx.AsyncClient() as client:
|
|
492
|
+
try:
|
|
493
|
+
response = await client.get(url, headers=headers)
|
|
494
|
+
response.raise_for_status()
|
|
495
|
+
files = response.json()
|
|
496
|
+
|
|
497
|
+
# Filter for markdown files only
|
|
498
|
+
markdown_files = [
|
|
499
|
+
f for f in files
|
|
500
|
+
if isinstance(f, dict) and f.get("name", "").endswith(".md") and f.get("type") == "file"
|
|
501
|
+
]
|
|
502
|
+
|
|
503
|
+
logger.info(f"Found {len(markdown_files)} markdown files in docs directory")
|
|
504
|
+
return markdown_files
|
|
505
|
+
except Exception as e:
|
|
506
|
+
logger.error(f"Failed to fetch docs file list: {e}")
|
|
507
|
+
return []
|
|
508
|
+
|
|
509
|
+
async def _fetch_markdown_file(self, filename: str) -> Optional[str]:
|
|
510
|
+
"""Fetch a single markdown file from the docs directory."""
|
|
511
|
+
url = f"{GITHUB_RAW_BASE}/{FEDRAMP_REPO}/{FEDRAMP_BRANCH}/{DOCS_PATH}/{filename}"
|
|
512
|
+
|
|
513
|
+
async with httpx.AsyncClient() as client:
|
|
514
|
+
try:
|
|
515
|
+
response = await client.get(url)
|
|
516
|
+
response.raise_for_status()
|
|
517
|
+
content = response.text
|
|
518
|
+
logger.info(f"Fetched {filename}")
|
|
519
|
+
return content
|
|
520
|
+
except Exception as e:
|
|
521
|
+
logger.error(f"Failed to fetch {filename}: {e}")
|
|
522
|
+
return None
|
|
523
|
+
|
|
524
|
+
async def load_documentation(self, force_refresh: bool = False) -> Dict[str, str]:
|
|
525
|
+
"""
|
|
526
|
+
Load FedRAMP documentation markdown files.
|
|
527
|
+
|
|
528
|
+
Args:
|
|
529
|
+
force_refresh: Force refresh from remote source
|
|
530
|
+
|
|
531
|
+
Returns:
|
|
532
|
+
Dictionary mapping filenames to their markdown content
|
|
533
|
+
"""
|
|
534
|
+
# Check memory cache first
|
|
535
|
+
if not force_refresh and self._docs_cache and self._docs_cache_timestamp:
|
|
536
|
+
if datetime.now() - self._docs_cache_timestamp < CACHE_DURATION:
|
|
537
|
+
logger.info("Using in-memory docs cache")
|
|
538
|
+
return self._docs_cache
|
|
539
|
+
|
|
540
|
+
# Try to load from disk cache
|
|
541
|
+
if not force_refresh:
|
|
542
|
+
cache_file = self._get_docs_cache_file()
|
|
543
|
+
if cache_file.exists():
|
|
544
|
+
try:
|
|
545
|
+
with open(cache_file, "r", encoding="utf-8") as f:
|
|
546
|
+
cached_data = json.load(f)
|
|
547
|
+
logger.info("Loaded documentation from cache")
|
|
548
|
+
self._docs_cache = cached_data
|
|
549
|
+
self._docs_cache_timestamp = datetime.now()
|
|
550
|
+
return cached_data
|
|
551
|
+
except Exception as e:
|
|
552
|
+
logger.error(f"Failed to load docs cache: {e}")
|
|
553
|
+
|
|
554
|
+
# Fetch from remote
|
|
555
|
+
logger.info("Fetching documentation from GitHub repository")
|
|
556
|
+
|
|
557
|
+
# Get list of markdown files
|
|
558
|
+
files = await self._fetch_docs_file_list()
|
|
559
|
+
if not files:
|
|
560
|
+
# If fetch fails and we have cache, use it even if old
|
|
561
|
+
cache_file = self._get_docs_cache_file()
|
|
562
|
+
if cache_file.exists():
|
|
563
|
+
try:
|
|
564
|
+
with open(cache_file, "r", encoding="utf-8") as f:
|
|
565
|
+
cached_data = json.load(f)
|
|
566
|
+
logger.warning("Using stale docs cache due to fetch failure")
|
|
567
|
+
return cached_data
|
|
568
|
+
except Exception as e:
|
|
569
|
+
logger.error(f"Failed to load stale cache: {e}")
|
|
570
|
+
raise Exception("Failed to fetch documentation and no cache available")
|
|
571
|
+
|
|
572
|
+
# Fetch all markdown files
|
|
573
|
+
docs_data: Dict[str, str] = {}
|
|
574
|
+
|
|
575
|
+
for file_info in files:
|
|
576
|
+
filename = file_info.get("name", "")
|
|
577
|
+
content = await self._fetch_markdown_file(filename)
|
|
578
|
+
|
|
579
|
+
if content:
|
|
580
|
+
docs_data[filename] = content
|
|
581
|
+
|
|
582
|
+
# Save to cache
|
|
583
|
+
cache_file = self._get_docs_cache_file()
|
|
584
|
+
try:
|
|
585
|
+
with open(cache_file, "w", encoding="utf-8") as f:
|
|
586
|
+
json.dump(docs_data, f, indent=2)
|
|
587
|
+
logger.info("Saved documentation to cache")
|
|
588
|
+
except Exception as e:
|
|
589
|
+
logger.error(f"Failed to save docs cache: {e}")
|
|
590
|
+
|
|
591
|
+
self._docs_cache = docs_data
|
|
592
|
+
self._docs_cache_timestamp = datetime.now()
|
|
593
|
+
|
|
594
|
+
logger.info(f"Loaded {len(docs_data)} documentation files")
|
|
595
|
+
return docs_data
|
|
596
|
+
|
|
597
|
+
def search_documentation(self, keywords: str) -> List[Dict[str, Any]]:
|
|
598
|
+
"""
|
|
599
|
+
Search FedRAMP documentation by keywords.
|
|
600
|
+
|
|
601
|
+
Args:
|
|
602
|
+
keywords: Keywords to search for
|
|
603
|
+
|
|
604
|
+
Returns:
|
|
605
|
+
List of matching documentation sections with context
|
|
606
|
+
"""
|
|
607
|
+
if not self._docs_cache:
|
|
608
|
+
return []
|
|
609
|
+
|
|
610
|
+
keywords_lower = keywords.lower()
|
|
611
|
+
results = []
|
|
612
|
+
|
|
613
|
+
for filename, content in self._docs_cache.items():
|
|
614
|
+
content_lower = content.lower()
|
|
615
|
+
|
|
616
|
+
# Check if keywords appear in the document
|
|
617
|
+
if keywords_lower in content_lower:
|
|
618
|
+
# Find all occurrences with context
|
|
619
|
+
lines = content.split('\n')
|
|
620
|
+
for i, line in enumerate(lines):
|
|
621
|
+
if keywords_lower in line.lower():
|
|
622
|
+
# Get context (3 lines before and after)
|
|
623
|
+
start = max(0, i - 3)
|
|
624
|
+
end = min(len(lines), i + 4)
|
|
625
|
+
context_lines = lines[start:end]
|
|
626
|
+
|
|
627
|
+
results.append({
|
|
628
|
+
"filename": filename,
|
|
629
|
+
"line_number": i + 1,
|
|
630
|
+
"match": line.strip(),
|
|
631
|
+
"context": '\n'.join(context_lines)
|
|
632
|
+
})
|
|
633
|
+
|
|
634
|
+
return results
|
|
635
|
+
|
|
636
|
+
def get_documentation_file(self, filename: str) -> Optional[str]:
|
|
637
|
+
"""
|
|
638
|
+
Get the full content of a specific documentation file.
|
|
639
|
+
|
|
640
|
+
Args:
|
|
641
|
+
filename: The markdown filename (e.g., "overview.md")
|
|
642
|
+
|
|
643
|
+
Returns:
|
|
644
|
+
Full markdown content or None if not found
|
|
645
|
+
"""
|
|
646
|
+
if not self._docs_cache:
|
|
647
|
+
return None
|
|
648
|
+
|
|
649
|
+
return self._docs_cache.get(filename)
|
|
650
|
+
|
|
651
|
+
def list_documentation_files(self) -> List[str]:
|
|
652
|
+
"""
|
|
653
|
+
List all available documentation files.
|
|
654
|
+
|
|
655
|
+
Returns:
|
|
656
|
+
List of documentation filenames
|
|
657
|
+
"""
|
|
658
|
+
if not self._docs_cache:
|
|
659
|
+
return []
|
|
660
|
+
|
|
661
|
+
return list(self._docs_cache.keys())
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
# Global data loader instance
|
|
665
|
+
_data_loader: Optional[FedRAMPDataLoader] = None
|
|
666
|
+
|
|
667
|
+
|
|
668
|
+
def get_data_loader() -> FedRAMPDataLoader:
|
|
669
|
+
"""Get or create the global data loader instance."""
|
|
670
|
+
global _data_loader
|
|
671
|
+
if _data_loader is None:
|
|
672
|
+
_data_loader = FedRAMPDataLoader()
|
|
673
|
+
return _data_loader
|