fedramp-20x-mcp 0.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. fedramp_20x_mcp/__init__.py +14 -0
  2. fedramp_20x_mcp/__main__.py +12 -0
  3. fedramp_20x_mcp/data_loader.py +673 -0
  4. fedramp_20x_mcp/prompts/__init__.py +62 -0
  5. fedramp_20x_mcp/prompts/api_design_guide.txt +432 -0
  6. fedramp_20x_mcp/prompts/ato_package_checklist.txt +75 -0
  7. fedramp_20x_mcp/prompts/audit_preparation.txt +592 -0
  8. fedramp_20x_mcp/prompts/authorization_boundary_review.txt +76 -0
  9. fedramp_20x_mcp/prompts/azure_ksi_automation.txt +997 -0
  10. fedramp_20x_mcp/prompts/continuous_monitoring_setup.txt +61 -0
  11. fedramp_20x_mcp/prompts/documentation_generator.txt +499 -0
  12. fedramp_20x_mcp/prompts/gap_analysis.txt +25 -0
  13. fedramp_20x_mcp/prompts/initial_assessment_roadmap.txt +202 -0
  14. fedramp_20x_mcp/prompts/ksi_implementation_priorities.txt +283 -0
  15. fedramp_20x_mcp/prompts/migration_from_rev5.txt +440 -0
  16. fedramp_20x_mcp/prompts/quarterly_review_checklist.txt +231 -0
  17. fedramp_20x_mcp/prompts/significant_change_assessment.txt +50 -0
  18. fedramp_20x_mcp/prompts/vendor_evaluation.txt +349 -0
  19. fedramp_20x_mcp/prompts/vulnerability_remediation_timeline.txt +45 -0
  20. fedramp_20x_mcp/server.py +270 -0
  21. fedramp_20x_mcp/templates/__init__.py +75 -0
  22. fedramp_20x_mcp/templates/bicep/afr.txt +33 -0
  23. fedramp_20x_mcp/templates/bicep/cna.txt +48 -0
  24. fedramp_20x_mcp/templates/bicep/generic.txt +47 -0
  25. fedramp_20x_mcp/templates/bicep/iam.txt +211 -0
  26. fedramp_20x_mcp/templates/bicep/mla.txt +82 -0
  27. fedramp_20x_mcp/templates/bicep/rpl.txt +44 -0
  28. fedramp_20x_mcp/templates/bicep/svc.txt +54 -0
  29. fedramp_20x_mcp/templates/code/generic_csharp.txt +65 -0
  30. fedramp_20x_mcp/templates/code/generic_powershell.txt +65 -0
  31. fedramp_20x_mcp/templates/code/generic_python.txt +63 -0
  32. fedramp_20x_mcp/templates/code/iam_csharp.txt +150 -0
  33. fedramp_20x_mcp/templates/code/iam_powershell.txt +162 -0
  34. fedramp_20x_mcp/templates/code/iam_python.txt +224 -0
  35. fedramp_20x_mcp/templates/code/mla_python.txt +124 -0
  36. fedramp_20x_mcp/templates/terraform/afr.txt +29 -0
  37. fedramp_20x_mcp/templates/terraform/cna.txt +50 -0
  38. fedramp_20x_mcp/templates/terraform/generic.txt +40 -0
  39. fedramp_20x_mcp/templates/terraform/iam.txt +219 -0
  40. fedramp_20x_mcp/templates/terraform/mla.txt +29 -0
  41. fedramp_20x_mcp/templates/terraform/rpl.txt +32 -0
  42. fedramp_20x_mcp/templates/terraform/svc.txt +46 -0
  43. fedramp_20x_mcp/tools/__init__.py +167 -0
  44. fedramp_20x_mcp/tools/definitions.py +154 -0
  45. fedramp_20x_mcp/tools/documentation.py +155 -0
  46. fedramp_20x_mcp/tools/enhancements.py +2256 -0
  47. fedramp_20x_mcp/tools/evidence.py +701 -0
  48. fedramp_20x_mcp/tools/export.py +753 -0
  49. fedramp_20x_mcp/tools/ksi.py +90 -0
  50. fedramp_20x_mcp/tools/requirements.py +163 -0
  51. fedramp_20x_mcp-0.4.8.dist-info/METADATA +877 -0
  52. fedramp_20x_mcp-0.4.8.dist-info/RECORD +55 -0
  53. fedramp_20x_mcp-0.4.8.dist-info/WHEEL +4 -0
  54. fedramp_20x_mcp-0.4.8.dist-info/entry_points.txt +2 -0
  55. fedramp_20x_mcp-0.4.8.dist-info/licenses/LICENSE +27 -0
@@ -0,0 +1,673 @@
1
+ """
2
+ FedRAMP Data Loader
3
+
4
+ This module handles fetching and caching FedRAMP 20x requirements data
5
+ from the official GitHub repository.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ import os
11
+ from pathlib import Path
12
+ from typing import Any, Dict, List, Optional
13
+ from datetime import datetime, timedelta
14
+
15
+ import httpx
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # GitHub API configuration
20
+ GITHUB_API_BASE = "https://api.github.com"
21
+ GITHUB_RAW_BASE = "https://raw.githubusercontent.com"
22
+ FEDRAMP_REPO = "FedRAMP/docs"
23
+ FEDRAMP_BRANCH = "main"
24
+ DATA_PATH = "data"
25
+ DOCS_PATH = "docs"
26
+
27
+ # Cache configuration
28
+ CACHE_DIR = Path(__file__).parent / "__fedramp_cache__"
29
+ CACHE_DURATION = timedelta(hours=1)
30
+
31
+
32
+ class FedRAMPDataLoader:
33
+ """Loads and caches FedRAMP 20x requirements data."""
34
+
35
+ def __init__(self):
36
+ """Initialize the data loader."""
37
+ self.cache_dir = CACHE_DIR
38
+ self.cache_dir.mkdir(exist_ok=True)
39
+ self._data_cache: Optional[Dict[str, Any]] = None
40
+ self._cache_timestamp: Optional[datetime] = None
41
+ self._docs_cache: Optional[Dict[str, str]] = None
42
+ self._docs_cache_timestamp: Optional[datetime] = None
43
+
44
+ def _get_cache_file(self) -> Path:
45
+ """Get the cache file path."""
46
+ return self.cache_dir / "fedramp_controls.json"
47
+
48
+ def _get_docs_cache_file(self) -> Path:
49
+ """Get the documentation cache file path."""
50
+ return self.cache_dir / "fedramp_docs.json"
51
+
52
+ def _is_cache_valid(self) -> bool:
53
+ """Check if the cache is still valid."""
54
+ if not self._cache_timestamp:
55
+ return False
56
+ return datetime.now() - self._cache_timestamp < CACHE_DURATION
57
+
58
+ def _load_from_cache(self) -> Optional[Dict[str, Any]]:
59
+ """Load data from local cache if available and valid."""
60
+ cache_file = self._get_cache_file()
61
+
62
+ if not cache_file.exists():
63
+ logger.info("No cache file found")
64
+ return None
65
+
66
+ try:
67
+ with open(cache_file, "r", encoding="utf-8") as f:
68
+ data = json.load(f)
69
+ logger.info("Loaded data from cache")
70
+ return data
71
+ except Exception as e:
72
+ logger.error(f"Failed to load cache: {e}")
73
+ return None
74
+
75
+ def _save_to_cache(self, data: Dict[str, Any]) -> None:
76
+ """Save data to local cache."""
77
+ cache_file = self._get_cache_file()
78
+
79
+ try:
80
+ with open(cache_file, "w", encoding="utf-8") as f:
81
+ json.dump(data, f, indent=2)
82
+ logger.info("Saved data to cache")
83
+ except Exception as e:
84
+ logger.error(f"Failed to save cache: {e}")
85
+
86
+ async def _fetch_file_list(self) -> List[Dict[str, Any]]:
87
+ """Fetch the list of JSON files from the GitHub repository."""
88
+ url = f"{GITHUB_API_BASE}/repos/{FEDRAMP_REPO}/contents/{DATA_PATH}"
89
+
90
+ # Use GITHUB_TOKEN if available to avoid rate limits
91
+ headers = {}
92
+ github_token = os.environ.get("GITHUB_TOKEN")
93
+ if github_token:
94
+ headers["Authorization"] = f"Bearer {github_token}"
95
+
96
+ async with httpx.AsyncClient() as client:
97
+ try:
98
+ response = await client.get(url, headers=headers)
99
+ response.raise_for_status()
100
+ files = response.json()
101
+
102
+ # Filter for JSON files
103
+ json_files = [
104
+ f for f in files
105
+ if isinstance(f, dict) and f.get("name", "").endswith(".json")
106
+ ]
107
+
108
+ logger.info(f"Found {len(json_files)} JSON files in repository")
109
+ return json_files
110
+ except Exception as e:
111
+ logger.error(f"Failed to fetch file list: {e}")
112
+ return []
113
+
114
+ async def _fetch_json_file(self, filename: str) -> Optional[Dict[str, Any]]:
115
+ """Fetch a single JSON file from the repository."""
116
+ url = f"{GITHUB_RAW_BASE}/{FEDRAMP_REPO}/{FEDRAMP_BRANCH}/{DATA_PATH}/{filename}"
117
+
118
+ async with httpx.AsyncClient() as client:
119
+ try:
120
+ response = await client.get(url)
121
+ response.raise_for_status()
122
+ data = response.json()
123
+ logger.info(f"Fetched {filename}")
124
+ return data
125
+ except Exception as e:
126
+ logger.error(f"Failed to fetch {filename}: {e}")
127
+ return None
128
+
129
+ async def load_data(self, force_refresh: bool = False) -> Dict[str, Any]:
130
+ """
131
+ Load FedRAMP requirements data.
132
+
133
+ Args:
134
+ force_refresh: Force refresh from remote source
135
+
136
+ Returns:
137
+ Dictionary containing all FedRAMP 20x requirement data
138
+ """
139
+ # Check memory cache first
140
+ if not force_refresh and self._data_cache and self._is_cache_valid():
141
+ logger.info("Using in-memory cache")
142
+ return self._data_cache
143
+
144
+ # Try to load from disk cache
145
+ if not force_refresh:
146
+ cached_data = self._load_from_cache()
147
+ if cached_data:
148
+ self._data_cache = cached_data
149
+ self._cache_timestamp = datetime.now()
150
+ return cached_data
151
+
152
+ # Fetch from remote
153
+ logger.info("Fetching data from GitHub repository")
154
+
155
+ files = await self._fetch_file_list()
156
+ if not files:
157
+ # If fetch fails and we have cache, use it even if old
158
+ cached_data = self._load_from_cache()
159
+ if cached_data:
160
+ logger.warning("Using stale cache due to fetch failure")
161
+ return cached_data
162
+ raise Exception("Failed to fetch data and no cache available")
163
+
164
+ # Fetch all JSON files
165
+ all_data: Dict[str, Any] = {
166
+ "requirements": {}, # All requirements by ID
167
+ "documents": {}, # Documents by short_name
168
+ "families": {}, # Requirements grouped by family
169
+ "definitions": {}, # FedRAMP definitions (FRD)
170
+ "ksi": {}, # Key Security Indicators (KSI)
171
+ "metadata": {
172
+ "last_updated": datetime.now().isoformat(),
173
+ "source": f"{FEDRAMP_REPO}/{DATA_PATH}",
174
+ }
175
+ }
176
+
177
+ for file_info in files:
178
+ filename = file_info.get("name", "")
179
+ data = await self._fetch_json_file(filename)
180
+
181
+ if not data:
182
+ continue
183
+
184
+ # Extract document info
185
+ info = data.get("info", {})
186
+ short_name = info.get("short_name", filename.replace(".json", ""))
187
+
188
+ # Store the document
189
+ all_data["documents"][short_name] = {
190
+ "name": info.get("name", ""),
191
+ "short_name": short_name,
192
+ "effective": info.get("effective", {}),
193
+ "releases": info.get("releases", []),
194
+ "filename": filename,
195
+ }
196
+
197
+ # Extract requirements from all sections
198
+ for section_key, section_data in data.items():
199
+ if section_key in ["$schema", "$id", "info"]:
200
+ continue
201
+
202
+ # Each section can have subsections with requirements
203
+ if isinstance(section_data, dict):
204
+ for subsection_key, subsection_data in section_data.items():
205
+ # Handle KSI special structure: categories with 'indicators' lists
206
+ if short_name == "KSI" and isinstance(subsection_data, dict) and "indicators" in subsection_data:
207
+ indicator_list = subsection_data.get("indicators", [])
208
+ category_name = subsection_data.get("name", subsection_key)
209
+
210
+ for indicator in indicator_list:
211
+ if isinstance(indicator, dict) and "id" in indicator:
212
+ ksi_id = indicator["id"]
213
+
214
+ # Add document and category context
215
+ indicator["document"] = short_name
216
+ indicator["document_name"] = info.get("name", "")
217
+ indicator["section"] = f"{section_key}-{subsection_key}"
218
+ indicator["category"] = category_name
219
+ indicator["category_id"] = subsection_key
220
+
221
+ # Store in requirements and KSI
222
+ all_data["requirements"][ksi_id] = indicator
223
+ all_data["ksi"][ksi_id] = indicator
224
+
225
+ # Extract family from ID
226
+ family = ksi_id.split("-")[0] if "-" in ksi_id else "OTHER"
227
+ if family not in all_data["families"]:
228
+ all_data["families"][family] = []
229
+ all_data["families"][family].append(ksi_id)
230
+
231
+ # Handle nested dict structure: check if it contains sub-dicts with 'requirements' key
232
+ # This handles structures like FRR -> MAS -> base/application/exceptions -> requirements[]
233
+ elif isinstance(subsection_data, dict) and not "requirements" in subsection_data:
234
+ # Check if any nested values have 'requirements' key
235
+ has_nested_requirements = any(
236
+ isinstance(v, dict) and "requirements" in v
237
+ for v in subsection_data.values()
238
+ )
239
+
240
+ if has_nested_requirements:
241
+ # Iterate over nested sections (base, application, exceptions, etc.)
242
+ for nested_key, nested_data in subsection_data.items():
243
+ if isinstance(nested_data, dict) and "requirements" in nested_data:
244
+ req_list = nested_data.get("requirements", [])
245
+ nested_name = nested_data.get("name", nested_key)
246
+ nested_id = nested_data.get("id", f"{section_key}-{subsection_key}-{nested_key}")
247
+
248
+ for req in req_list:
249
+ if isinstance(req, dict) and "id" in req:
250
+ req_id = req["id"]
251
+
252
+ # Add document context
253
+ req["document"] = short_name
254
+ req["document_name"] = info.get("name", "")
255
+ req["section"] = f"{section_key}-{subsection_key}-{nested_key}"
256
+ req["subsection_name"] = nested_name
257
+ req["subsection_id"] = nested_id
258
+ req["category"] = subsection_key
259
+
260
+ # Store by ID
261
+ all_data["requirements"][req_id] = req
262
+
263
+ # Extract family from ID
264
+ family = req_id.split("-")[0] if "-" in req_id else "OTHER"
265
+ if family not in all_data["families"]:
266
+ all_data["families"][family] = []
267
+ all_data["families"][family].append(req_id)
268
+
269
+ # Handle direct dict structure with 'requirements' key
270
+ elif isinstance(subsection_data, dict) and "requirements" in subsection_data:
271
+ req_list = subsection_data.get("requirements", [])
272
+ subsection_name = subsection_data.get("name", subsection_key)
273
+ subsection_id = subsection_data.get("id", f"{section_key}-{subsection_key}")
274
+
275
+ for req in req_list:
276
+ if isinstance(req, dict) and "id" in req:
277
+ req_id = req["id"]
278
+
279
+ # Add document context
280
+ req["document"] = short_name
281
+ req["document_name"] = info.get("name", "")
282
+ req["section"] = f"{section_key}-{subsection_key}"
283
+ req["subsection_name"] = subsection_name
284
+ req["subsection_id"] = subsection_id
285
+
286
+ # Store by ID
287
+ all_data["requirements"][req_id] = req
288
+
289
+ # Extract family from ID
290
+ family = req_id.split("-")[0] if "-" in req_id else "OTHER"
291
+ if family not in all_data["families"]:
292
+ all_data["families"][family] = []
293
+ all_data["families"][family].append(req_id)
294
+
295
+ # Track definitions (FRD) separately
296
+ if short_name == "FRD" and "term" in req:
297
+ all_data["definitions"][req.get("term", req_id)] = req
298
+
299
+ # Handle regular list-based requirements
300
+ elif isinstance(subsection_data, list):
301
+ for req in subsection_data:
302
+ if isinstance(req, dict) and "id" in req:
303
+ req_id = req["id"]
304
+
305
+ # Add document context
306
+ req["document"] = short_name
307
+ req["document_name"] = info.get("name", "")
308
+ req["section"] = f"{section_key}-{subsection_key}"
309
+
310
+ # Store by ID
311
+ all_data["requirements"][req_id] = req
312
+
313
+ # Extract family from ID (e.g., "AC" from "AC-1")
314
+ family = req_id.split("-")[0] if "-" in req_id else "OTHER"
315
+ if family not in all_data["families"]:
316
+ all_data["families"][family] = []
317
+ all_data["families"][family].append(req_id)
318
+
319
+ # Track definitions (FRD) separately
320
+ if short_name == "FRD" and "term" in req:
321
+ all_data["definitions"][req.get("term", req_id)] = req
322
+
323
+ # Save to cache
324
+ self._save_to_cache(all_data)
325
+ self._data_cache = all_data
326
+ self._cache_timestamp = datetime.now()
327
+
328
+ logger.info(f"Loaded {len(all_data['requirements'])} requirements from {len(all_data['documents'])} documents")
329
+ return all_data
330
+
331
+ def get_control(self, control_id: str) -> Optional[Dict[str, Any]]:
332
+ """
333
+ Get a specific requirement by ID.
334
+
335
+ Args:
336
+ control_id: The requirement identifier
337
+
338
+ Returns:
339
+ Requirement data or None if not found
340
+ """
341
+ if not self._data_cache:
342
+ return None
343
+
344
+ return self._data_cache["requirements"].get(control_id.upper())
345
+
346
+ def get_family_controls(self, family: str) -> List[Dict[str, Any]]:
347
+ """
348
+ Get all requirements in a specific family.
349
+
350
+ Args:
351
+ family: The family identifier
352
+
353
+ Returns:
354
+ List of requirements in the family
355
+ """
356
+ if not self._data_cache:
357
+ return []
358
+
359
+ family_upper = family.upper()
360
+ req_ids = self._data_cache["families"].get(family_upper, [])
361
+
362
+ return [
363
+ self._data_cache["requirements"][req_id]
364
+ for req_id in req_ids
365
+ if req_id in self._data_cache["requirements"]
366
+ ]
367
+
368
+ def search_controls(self, keywords: str) -> List[Dict[str, Any]]:
369
+ """
370
+ Search requirements by keywords.
371
+
372
+ Args:
373
+ keywords: Keywords to search for
374
+
375
+ Returns:
376
+ List of matching requirements
377
+ """
378
+ if not self._data_cache:
379
+ return []
380
+
381
+ keywords_lower = keywords.lower()
382
+ results = []
383
+
384
+ for req_id, req in self._data_cache["requirements"].items():
385
+ # Search in requirement text fields
386
+ searchable_text = json.dumps(req).lower()
387
+ if keywords_lower in searchable_text:
388
+ results.append(req)
389
+
390
+ return results
391
+
392
+ def get_definition(self, term: str) -> Optional[Dict[str, Any]]:
393
+ """
394
+ Get a FedRAMP definition by term.
395
+
396
+ Args:
397
+ term: The term to look up (case-insensitive)
398
+
399
+ Returns:
400
+ Definition data or None if not found
401
+ """
402
+ if not self._data_cache:
403
+ return None
404
+
405
+ # Try exact match first
406
+ for key, definition in self._data_cache["definitions"].items():
407
+ if key.lower() == term.lower():
408
+ return definition
409
+ # Check alternatives
410
+ if "alts" in definition:
411
+ for alt in definition["alts"]:
412
+ if alt.lower() == term.lower():
413
+ return definition
414
+
415
+ return None
416
+
417
+ def list_all_definitions(self) -> List[Dict[str, Any]]:
418
+ """
419
+ List all FedRAMP definitions.
420
+
421
+ Returns:
422
+ List of all definition entries
423
+ """
424
+ if not self._data_cache:
425
+ return []
426
+
427
+ return list(self._data_cache["definitions"].values())
428
+
429
+ def get_ksi(self, ksi_id: str) -> Optional[Dict[str, Any]]:
430
+ """
431
+ Get a Key Security Indicator by ID.
432
+
433
+ Args:
434
+ ksi_id: The KSI identifier
435
+
436
+ Returns:
437
+ KSI data or None if not found
438
+ """
439
+ if not self._data_cache:
440
+ return None
441
+
442
+ return self._data_cache["ksi"].get(ksi_id.upper())
443
+
444
+ def list_all_ksi(self) -> List[Dict[str, Any]]:
445
+ """
446
+ List all Key Security Indicators.
447
+
448
+ Returns:
449
+ List of all KSI entries
450
+ """
451
+ if not self._data_cache:
452
+ return []
453
+
454
+ return list(self._data_cache["ksi"].values())
455
+
456
+ def search_definitions(self, keywords: str) -> List[Dict[str, Any]]:
457
+ """
458
+ Search FedRAMP definitions by keywords.
459
+
460
+ Args:
461
+ keywords: Keywords to search for
462
+
463
+ Returns:
464
+ List of matching definitions
465
+ """
466
+ if not self._data_cache:
467
+ return []
468
+
469
+ keywords_lower = keywords.lower()
470
+ results = []
471
+
472
+ for term, definition in self._data_cache["definitions"].items():
473
+ # Search in definition text
474
+ searchable_text = json.dumps(definition).lower()
475
+ if keywords_lower in searchable_text:
476
+ results.append(definition)
477
+
478
+ return results
479
+
480
+
481
+ async def _fetch_docs_file_list(self) -> List[Dict[str, Any]]:
482
+ """Fetch the list of markdown files from the docs directory."""
483
+ url = f"{GITHUB_API_BASE}/repos/{FEDRAMP_REPO}/contents/{DOCS_PATH}"
484
+
485
+ # Use GITHUB_TOKEN if available to avoid rate limits
486
+ headers = {}
487
+ github_token = os.environ.get("GITHUB_TOKEN")
488
+ if github_token:
489
+ headers["Authorization"] = f"Bearer {github_token}"
490
+
491
+ async with httpx.AsyncClient() as client:
492
+ try:
493
+ response = await client.get(url, headers=headers)
494
+ response.raise_for_status()
495
+ files = response.json()
496
+
497
+ # Filter for markdown files only
498
+ markdown_files = [
499
+ f for f in files
500
+ if isinstance(f, dict) and f.get("name", "").endswith(".md") and f.get("type") == "file"
501
+ ]
502
+
503
+ logger.info(f"Found {len(markdown_files)} markdown files in docs directory")
504
+ return markdown_files
505
+ except Exception as e:
506
+ logger.error(f"Failed to fetch docs file list: {e}")
507
+ return []
508
+
509
+ async def _fetch_markdown_file(self, filename: str) -> Optional[str]:
510
+ """Fetch a single markdown file from the docs directory."""
511
+ url = f"{GITHUB_RAW_BASE}/{FEDRAMP_REPO}/{FEDRAMP_BRANCH}/{DOCS_PATH}/{filename}"
512
+
513
+ async with httpx.AsyncClient() as client:
514
+ try:
515
+ response = await client.get(url)
516
+ response.raise_for_status()
517
+ content = response.text
518
+ logger.info(f"Fetched {filename}")
519
+ return content
520
+ except Exception as e:
521
+ logger.error(f"Failed to fetch {filename}: {e}")
522
+ return None
523
+
524
+ async def load_documentation(self, force_refresh: bool = False) -> Dict[str, str]:
525
+ """
526
+ Load FedRAMP documentation markdown files.
527
+
528
+ Args:
529
+ force_refresh: Force refresh from remote source
530
+
531
+ Returns:
532
+ Dictionary mapping filenames to their markdown content
533
+ """
534
+ # Check memory cache first
535
+ if not force_refresh and self._docs_cache and self._docs_cache_timestamp:
536
+ if datetime.now() - self._docs_cache_timestamp < CACHE_DURATION:
537
+ logger.info("Using in-memory docs cache")
538
+ return self._docs_cache
539
+
540
+ # Try to load from disk cache
541
+ if not force_refresh:
542
+ cache_file = self._get_docs_cache_file()
543
+ if cache_file.exists():
544
+ try:
545
+ with open(cache_file, "r", encoding="utf-8") as f:
546
+ cached_data = json.load(f)
547
+ logger.info("Loaded documentation from cache")
548
+ self._docs_cache = cached_data
549
+ self._docs_cache_timestamp = datetime.now()
550
+ return cached_data
551
+ except Exception as e:
552
+ logger.error(f"Failed to load docs cache: {e}")
553
+
554
+ # Fetch from remote
555
+ logger.info("Fetching documentation from GitHub repository")
556
+
557
+ # Get list of markdown files
558
+ files = await self._fetch_docs_file_list()
559
+ if not files:
560
+ # If fetch fails and we have cache, use it even if old
561
+ cache_file = self._get_docs_cache_file()
562
+ if cache_file.exists():
563
+ try:
564
+ with open(cache_file, "r", encoding="utf-8") as f:
565
+ cached_data = json.load(f)
566
+ logger.warning("Using stale docs cache due to fetch failure")
567
+ return cached_data
568
+ except Exception as e:
569
+ logger.error(f"Failed to load stale cache: {e}")
570
+ raise Exception("Failed to fetch documentation and no cache available")
571
+
572
+ # Fetch all markdown files
573
+ docs_data: Dict[str, str] = {}
574
+
575
+ for file_info in files:
576
+ filename = file_info.get("name", "")
577
+ content = await self._fetch_markdown_file(filename)
578
+
579
+ if content:
580
+ docs_data[filename] = content
581
+
582
+ # Save to cache
583
+ cache_file = self._get_docs_cache_file()
584
+ try:
585
+ with open(cache_file, "w", encoding="utf-8") as f:
586
+ json.dump(docs_data, f, indent=2)
587
+ logger.info("Saved documentation to cache")
588
+ except Exception as e:
589
+ logger.error(f"Failed to save docs cache: {e}")
590
+
591
+ self._docs_cache = docs_data
592
+ self._docs_cache_timestamp = datetime.now()
593
+
594
+ logger.info(f"Loaded {len(docs_data)} documentation files")
595
+ return docs_data
596
+
597
+ def search_documentation(self, keywords: str) -> List[Dict[str, Any]]:
598
+ """
599
+ Search FedRAMP documentation by keywords.
600
+
601
+ Args:
602
+ keywords: Keywords to search for
603
+
604
+ Returns:
605
+ List of matching documentation sections with context
606
+ """
607
+ if not self._docs_cache:
608
+ return []
609
+
610
+ keywords_lower = keywords.lower()
611
+ results = []
612
+
613
+ for filename, content in self._docs_cache.items():
614
+ content_lower = content.lower()
615
+
616
+ # Check if keywords appear in the document
617
+ if keywords_lower in content_lower:
618
+ # Find all occurrences with context
619
+ lines = content.split('\n')
620
+ for i, line in enumerate(lines):
621
+ if keywords_lower in line.lower():
622
+ # Get context (3 lines before and after)
623
+ start = max(0, i - 3)
624
+ end = min(len(lines), i + 4)
625
+ context_lines = lines[start:end]
626
+
627
+ results.append({
628
+ "filename": filename,
629
+ "line_number": i + 1,
630
+ "match": line.strip(),
631
+ "context": '\n'.join(context_lines)
632
+ })
633
+
634
+ return results
635
+
636
+ def get_documentation_file(self, filename: str) -> Optional[str]:
637
+ """
638
+ Get the full content of a specific documentation file.
639
+
640
+ Args:
641
+ filename: The markdown filename (e.g., "overview.md")
642
+
643
+ Returns:
644
+ Full markdown content or None if not found
645
+ """
646
+ if not self._docs_cache:
647
+ return None
648
+
649
+ return self._docs_cache.get(filename)
650
+
651
+ def list_documentation_files(self) -> List[str]:
652
+ """
653
+ List all available documentation files.
654
+
655
+ Returns:
656
+ List of documentation filenames
657
+ """
658
+ if not self._docs_cache:
659
+ return []
660
+
661
+ return list(self._docs_cache.keys())
662
+
663
+
664
+ # Global data loader instance
665
+ _data_loader: Optional[FedRAMPDataLoader] = None
666
+
667
+
668
+ def get_data_loader() -> FedRAMPDataLoader:
669
+ """Get or create the global data loader instance."""
670
+ global _data_loader
671
+ if _data_loader is None:
672
+ _data_loader = FedRAMPDataLoader()
673
+ return _data_loader