regscale-cli 6.27.2.0__py3-none-any.whl → 6.28.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of regscale-cli might be problematic. Click here for more details.

Files changed (140) hide show
  1. regscale/_version.py +1 -1
  2. regscale/core/app/application.py +1 -0
  3. regscale/core/app/internal/control_editor.py +73 -21
  4. regscale/core/app/internal/login.py +4 -1
  5. regscale/core/app/internal/model_editor.py +219 -64
  6. regscale/core/app/utils/app_utils.py +11 -2
  7. regscale/core/login.py +21 -4
  8. regscale/core/utils/date.py +77 -1
  9. regscale/dev/cli.py +26 -0
  10. regscale/dev/version.py +72 -0
  11. regscale/integrations/commercial/__init__.py +15 -1
  12. regscale/integrations/commercial/amazon/amazon/__init__.py +0 -0
  13. regscale/integrations/commercial/amazon/amazon/common.py +204 -0
  14. regscale/integrations/commercial/amazon/common.py +48 -58
  15. regscale/integrations/commercial/aws/audit_manager_compliance.py +2671 -0
  16. regscale/integrations/commercial/aws/cli.py +3093 -55
  17. regscale/integrations/commercial/aws/cloudtrail_control_mappings.py +333 -0
  18. regscale/integrations/commercial/aws/cloudtrail_evidence.py +501 -0
  19. regscale/integrations/commercial/aws/cloudwatch_control_mappings.py +357 -0
  20. regscale/integrations/commercial/aws/cloudwatch_evidence.py +490 -0
  21. regscale/integrations/commercial/aws/config_compliance.py +914 -0
  22. regscale/integrations/commercial/aws/conformance_pack_mappings.py +198 -0
  23. regscale/integrations/commercial/aws/evidence_generator.py +283 -0
  24. regscale/integrations/commercial/aws/guardduty_control_mappings.py +340 -0
  25. regscale/integrations/commercial/aws/guardduty_evidence.py +1053 -0
  26. regscale/integrations/commercial/aws/iam_control_mappings.py +368 -0
  27. regscale/integrations/commercial/aws/iam_evidence.py +574 -0
  28. regscale/integrations/commercial/aws/inventory/__init__.py +223 -22
  29. regscale/integrations/commercial/aws/inventory/base.py +107 -5
  30. regscale/integrations/commercial/aws/inventory/resources/audit_manager.py +513 -0
  31. regscale/integrations/commercial/aws/inventory/resources/cloudtrail.py +315 -0
  32. regscale/integrations/commercial/aws/inventory/resources/cloudtrail_logs_metadata.py +476 -0
  33. regscale/integrations/commercial/aws/inventory/resources/cloudwatch.py +191 -0
  34. regscale/integrations/commercial/aws/inventory/resources/compute.py +66 -9
  35. regscale/integrations/commercial/aws/inventory/resources/config.py +464 -0
  36. regscale/integrations/commercial/aws/inventory/resources/containers.py +74 -9
  37. regscale/integrations/commercial/aws/inventory/resources/database.py +106 -31
  38. regscale/integrations/commercial/aws/inventory/resources/guardduty.py +286 -0
  39. regscale/integrations/commercial/aws/inventory/resources/iam.py +470 -0
  40. regscale/integrations/commercial/aws/inventory/resources/inspector.py +476 -0
  41. regscale/integrations/commercial/aws/inventory/resources/integration.py +175 -61
  42. regscale/integrations/commercial/aws/inventory/resources/kms.py +447 -0
  43. regscale/integrations/commercial/aws/inventory/resources/networking.py +103 -67
  44. regscale/integrations/commercial/aws/inventory/resources/s3.py +394 -0
  45. regscale/integrations/commercial/aws/inventory/resources/security.py +268 -72
  46. regscale/integrations/commercial/aws/inventory/resources/securityhub.py +473 -0
  47. regscale/integrations/commercial/aws/inventory/resources/storage.py +53 -29
  48. regscale/integrations/commercial/aws/inventory/resources/systems_manager.py +657 -0
  49. regscale/integrations/commercial/aws/inventory/resources/vpc.py +655 -0
  50. regscale/integrations/commercial/aws/kms_control_mappings.py +288 -0
  51. regscale/integrations/commercial/aws/kms_evidence.py +879 -0
  52. regscale/integrations/commercial/aws/ocsf/__init__.py +7 -0
  53. regscale/integrations/commercial/aws/ocsf/constants.py +115 -0
  54. regscale/integrations/commercial/aws/ocsf/mapper.py +435 -0
  55. regscale/integrations/commercial/aws/org_control_mappings.py +286 -0
  56. regscale/integrations/commercial/aws/org_evidence.py +666 -0
  57. regscale/integrations/commercial/aws/s3_control_mappings.py +356 -0
  58. regscale/integrations/commercial/aws/s3_evidence.py +632 -0
  59. regscale/integrations/commercial/aws/scanner.py +853 -205
  60. regscale/integrations/commercial/aws/security_hub.py +319 -0
  61. regscale/integrations/commercial/aws/session_manager.py +282 -0
  62. regscale/integrations/commercial/aws/ssm_control_mappings.py +291 -0
  63. regscale/integrations/commercial/aws/ssm_evidence.py +492 -0
  64. regscale/integrations/commercial/synqly/query_builder.py +4 -1
  65. regscale/integrations/compliance_integration.py +308 -38
  66. regscale/integrations/control_matcher.py +78 -23
  67. regscale/integrations/due_date_handler.py +3 -0
  68. regscale/integrations/public/csam/csam.py +572 -763
  69. regscale/integrations/public/csam/csam_agency_defined.py +179 -0
  70. regscale/integrations/public/csam/csam_common.py +154 -0
  71. regscale/integrations/public/csam/csam_controls.py +432 -0
  72. regscale/integrations/public/csam/csam_poam.py +124 -0
  73. regscale/integrations/public/fedramp/click.py +17 -4
  74. regscale/integrations/public/fedramp/fedramp_cis_crm.py +271 -62
  75. regscale/integrations/public/fedramp/poam/scanner.py +74 -7
  76. regscale/integrations/scanner_integration.py +415 -85
  77. regscale/models/integration_models/cisa_kev_data.json +80 -20
  78. regscale/models/integration_models/synqly_models/capabilities.json +1 -1
  79. regscale/models/integration_models/synqly_models/connectors/vulnerabilities.py +44 -3
  80. regscale/models/integration_models/synqly_models/ocsf_mapper.py +41 -12
  81. regscale/models/platform.py +3 -0
  82. regscale/models/regscale_models/__init__.py +5 -0
  83. regscale/models/regscale_models/assessment.py +2 -1
  84. regscale/models/regscale_models/component.py +1 -1
  85. regscale/models/regscale_models/control_implementation.py +55 -24
  86. regscale/models/regscale_models/control_objective.py +74 -5
  87. regscale/models/regscale_models/file.py +2 -0
  88. regscale/models/regscale_models/issue.py +2 -5
  89. regscale/models/regscale_models/organization.py +3 -0
  90. regscale/models/regscale_models/regscale_model.py +17 -5
  91. regscale/models/regscale_models/security_plan.py +1 -0
  92. regscale/regscale.py +11 -1
  93. {regscale_cli-6.27.2.0.dist-info → regscale_cli-6.28.0.0.dist-info}/METADATA +1 -1
  94. {regscale_cli-6.27.2.0.dist-info → regscale_cli-6.28.0.0.dist-info}/RECORD +140 -57
  95. tests/regscale/core/test_login.py +171 -4
  96. tests/regscale/integrations/commercial/aws/__init__.py +0 -0
  97. tests/regscale/integrations/commercial/aws/test_audit_manager_compliance.py +1304 -0
  98. tests/regscale/integrations/commercial/aws/test_audit_manager_evidence_aggregation.py +341 -0
  99. tests/regscale/integrations/commercial/aws/test_aws_audit_manager_collector.py +1155 -0
  100. tests/regscale/integrations/commercial/aws/test_aws_cloudtrail_collector.py +534 -0
  101. tests/regscale/integrations/commercial/aws/test_aws_config_collector.py +400 -0
  102. tests/regscale/integrations/commercial/aws/test_aws_guardduty_collector.py +315 -0
  103. tests/regscale/integrations/commercial/aws/test_aws_iam_collector.py +458 -0
  104. tests/regscale/integrations/commercial/aws/test_aws_inspector_collector.py +353 -0
  105. tests/regscale/integrations/commercial/aws/test_aws_inventory_integration.py +530 -0
  106. tests/regscale/integrations/commercial/aws/test_aws_kms_collector.py +919 -0
  107. tests/regscale/integrations/commercial/aws/test_aws_s3_collector.py +722 -0
  108. tests/regscale/integrations/commercial/aws/test_aws_scanner_integration.py +722 -0
  109. tests/regscale/integrations/commercial/aws/test_aws_securityhub_collector.py +792 -0
  110. tests/regscale/integrations/commercial/aws/test_aws_systems_manager_collector.py +918 -0
  111. tests/regscale/integrations/commercial/aws/test_aws_vpc_collector.py +996 -0
  112. tests/regscale/integrations/commercial/aws/test_cli_evidence.py +431 -0
  113. tests/regscale/integrations/commercial/aws/test_cloudtrail_control_mappings.py +452 -0
  114. tests/regscale/integrations/commercial/aws/test_cloudtrail_evidence.py +788 -0
  115. tests/regscale/integrations/commercial/aws/test_config_compliance.py +298 -0
  116. tests/regscale/integrations/commercial/aws/test_conformance_pack_mappings.py +200 -0
  117. tests/regscale/integrations/commercial/aws/test_evidence_generator.py +386 -0
  118. tests/regscale/integrations/commercial/aws/test_guardduty_control_mappings.py +564 -0
  119. tests/regscale/integrations/commercial/aws/test_guardduty_evidence.py +1041 -0
  120. tests/regscale/integrations/commercial/aws/test_iam_control_mappings.py +718 -0
  121. tests/regscale/integrations/commercial/aws/test_iam_evidence.py +1375 -0
  122. tests/regscale/integrations/commercial/aws/test_kms_control_mappings.py +656 -0
  123. tests/regscale/integrations/commercial/aws/test_kms_evidence.py +1163 -0
  124. tests/regscale/integrations/commercial/aws/test_ocsf_mapper.py +370 -0
  125. tests/regscale/integrations/commercial/aws/test_org_control_mappings.py +546 -0
  126. tests/regscale/integrations/commercial/aws/test_org_evidence.py +1240 -0
  127. tests/regscale/integrations/commercial/aws/test_s3_control_mappings.py +672 -0
  128. tests/regscale/integrations/commercial/aws/test_s3_evidence.py +987 -0
  129. tests/regscale/integrations/commercial/aws/test_scanner_evidence.py +373 -0
  130. tests/regscale/integrations/commercial/aws/test_security_hub_config_filtering.py +539 -0
  131. tests/regscale/integrations/commercial/aws/test_session_manager.py +516 -0
  132. tests/regscale/integrations/commercial/aws/test_ssm_control_mappings.py +588 -0
  133. tests/regscale/integrations/commercial/aws/test_ssm_evidence.py +735 -0
  134. tests/regscale/integrations/commercial/test_aws.py +55 -56
  135. tests/regscale/integrations/test_control_matcher.py +24 -0
  136. tests/regscale/models/test_control_implementation.py +118 -3
  137. {regscale_cli-6.27.2.0.dist-info → regscale_cli-6.28.0.0.dist-info}/LICENSE +0 -0
  138. {regscale_cli-6.27.2.0.dist-info → regscale_cli-6.28.0.0.dist-info}/WHEEL +0 -0
  139. {regscale_cli-6.27.2.0.dist-info → regscale_cli-6.28.0.0.dist-info}/entry_points.txt +0 -0
  140. {regscale_cli-6.27.2.0.dist-info → regscale_cli-6.28.0.0.dist-info}/top_level.txt +0 -0
@@ -120,17 +120,36 @@ def _build_potential_oscal_ids(variation: str) -> List[str]:
120
120
  """
121
121
  Build potential OSCAL ID formats from a control ID variation.
122
122
 
123
- :param str variation: Control ID variation (e.g., "AC-1", "AC-01")
123
+ :param str variation: Control ID variation (e.g., "AC-1", "AC-01", "AC-1.a")
124
124
  :return: List of potential OSCAL IDs
125
125
  :rtype: List[str]
126
126
  """
127
127
  variation_lower = variation.lower()
128
- return [
129
- f"{variation_lower}_smt",
130
- f"{variation_lower}_smt.a",
131
- f"{variation_lower}_smt.b",
132
- f"{variation_lower}_smt.c",
133
- ]
128
+ oscal_ids = []
129
+
130
+ # Check if this is a control with a letter part (e.g., "ac-1.a")
131
+ if re.match(r"^[a-z]+-\d+\.[a-z]$", variation_lower):
132
+ # For letter parts, map to OSCAL format: ac-1.a -> ac-1_smt.a
133
+ base_control = variation_lower.rsplit(".", 1)[0] # Get "ac-1" from "ac-1.a"
134
+ letter_part = variation_lower.rsplit(".", 1)[1] # Get "a" from "ac-1.a"
135
+ oscal_ids.extend(
136
+ [
137
+ f"{base_control}_smt.{letter_part}", # ac-1_smt.a (primary format)
138
+ f"{variation_lower}_smt", # ac-1.a_smt (alternative format)
139
+ ]
140
+ )
141
+ else:
142
+ # Base control without letter part - include all potential letter variations
143
+ oscal_ids.extend(
144
+ [
145
+ f"{variation_lower}_smt",
146
+ f"{variation_lower}_smt.a",
147
+ f"{variation_lower}_smt.b",
148
+ f"{variation_lower}_smt.c",
149
+ ]
150
+ )
151
+
152
+ return oscal_ids
134
153
 
135
154
 
136
155
  def _matches_oscal_id(obj_id: str, variation: str) -> bool:
@@ -213,15 +232,14 @@ def transform_control(control: str) -> str:
213
232
  :rtype: str
214
233
  """
215
234
  # Use regex to match the pattern and capture the parts (handle extra spaces)
216
- match = re.match(r"([A-Za-z]+)-(\d+)\s*\(\s*(\d+|[a-z])\s*\)", control)
217
- if match:
235
+ # Now handles both uppercase and lowercase letters in parentheses
236
+ if match := re.match(r"([A-Z]+)-(\d+)\s*\(\s*(\d+|[A-Z])\s*\)", control, re.IGNORECASE):
218
237
  control_name = match.group(1).lower()
219
238
  control_number = match.group(2)
220
- sub_control = match.group(3)
221
-
222
- if sub_control.isdigit():
239
+ try:
240
+ sub_control = match.group(3).lower() # Normalize to lowercase
223
241
  transformed_control = f"{control_name}-{control_number}.{sub_control}"
224
- else:
242
+ except IndexError:
225
243
  transformed_control = f"{control_name}-{control_number}"
226
244
 
227
245
  return transformed_control
@@ -262,29 +280,76 @@ def new_leveraged_auth(
262
280
  return new_leveraged_auth_id.id
263
281
 
264
282
 
265
- def gen_key(control_id: str):
283
+ def gen_key(control_id: str) -> str:
266
284
  """
267
- Function to generate a key for the control ID
285
+ Function to generate a key for the control ID by stripping letter-based parts.
286
+ Handles both parentheses notation (AC-1(a)) and dot notation (ac-1.a).
287
+
288
+ Examples:
289
+ - AC-1 (a) -> AC-1
290
+ - ac-1.a -> ac-1
291
+ - AC-2(1) -> AC-2(1) (numeric enhancement preserved)
292
+ - AC-17.2 -> AC-17.2 (numeric enhancement preserved)
268
293
 
269
294
  :param str control_id: The control ID to generate a key for
270
- :return: The generated key
295
+ :return: The generated key with letter parts stripped
271
296
  :rtype: str
272
297
  """
273
- # Match pattern: captures everything up to either:
274
- # 1. The last (number) if it exists
298
+ # First, try parentheses notation: ALPHA-NUM(LETTER) -> ALPHA-NUM
299
+ # Captures everything up to either:
300
+ # 1. The last (number) if it exists (preserved)
275
301
  # 2. The main control number if no enhancement exists
276
- # And excludes any trailing (letter) - handles extra spaces like AC-6 ( 1 ) ( a )
277
- pattern = r"^(\w+-\d+(?:\s*\(\s*\d+\s*\))?)(?:\s*\(\s*[a-zA-Z]\s*\))?$"
302
+ # Excludes trailing (letter) - handles extra spaces like AC-6 ( 1 ) ( a )
303
+ pattern_paren = r"^(\w+-\d+(?:\s*\(\s*\d+\s*\))?)(?:\s*\(\s*[a-zA-Z]\s*\))?$"
304
+ if match := re.match(pattern_paren, control_id):
305
+ return match.group(1)
278
306
 
279
- match = re.match(pattern, control_id)
280
- if match:
307
+ # Try dot notation: alpha-num.letter -> alpha-num
308
+ # Preserves numeric enhancements (ac-17.2) but strips letter parts (ac-1.a)
309
+ pattern_dot = r"^([a-z]+-\d+)\.([a-z])$"
310
+ if match := re.match(pattern_dot, control_id, re.IGNORECASE):
311
+ # Check if the part after dot is a single letter (not a number)
281
312
  return match.group(1)
313
+
314
+ # No match, return as-is
282
315
  return control_id
283
316
 
284
317
 
318
+ def _is_letter_based_control_part(control_id: str) -> bool:
319
+ """
320
+ Check if a control ID is a letter-based part (e.g., AC-1(a), ac-1.a).
321
+ Returns True for ALPHA-NUMERIC(ALPHA) or alpha-numeric.alpha patterns.
322
+ Returns False for numeric enhancements (AC-1(1), ac-17.2).
323
+
324
+ :param str control_id: The control ID to check
325
+ :return: True if it's a letter-based control part
326
+ :rtype: bool
327
+ """
328
+ # Pattern 1: Parentheses notation - ALPHA-NUMERIC(ALPHA) like AC-1(a), AC-2(B)
329
+ pattern_paren = r"^[A-Za-z]+-\d+\s*\(\s*[a-zA-Z]\s*\)$"
330
+ if re.match(pattern_paren, control_id):
331
+ return True
332
+
333
+ # Pattern 2: Dot notation - alpha-numeric.alpha like ac-1.a, ac-2.b
334
+ # Exclude numeric enhancements like ac-17.2
335
+ pattern_dot = r"^[a-z]+-\d+\.([a-z])$"
336
+ match = re.match(pattern_dot, control_id, re.IGNORECASE)
337
+ if match and match.group(1).isalpha():
338
+ return True
339
+
340
+ return False
341
+
342
+
285
343
  def map_implementation_status(control_id: str, cis_data: dict) -> str:
286
344
  """
287
- Function to map the selected implementation status on the CIS worksheet to a RegScale status
345
+ Function to map the selected implementation status on the CIS worksheet to a RegScale status.
346
+ Aggregates letter-based control parts (AC-1(a), AC-1(b), AC-1(c)) into base control (AC-1).
347
+
348
+ Aggregation logic for letter-based parts:
349
+ - All "Implemented" → "Fully Implemented"
350
+ - Mix with at least one "Implemented" → "Partially Implemented"
351
+ - All "Not Implemented" or empty → "Not Implemented"
352
+ - Any "Planned" (no implemented) → "Planned"
288
353
 
289
354
  :param str control_id: The control ID from RegScale
290
355
  :param dict cis_data: Data from the CIS worksheet to map the status from
@@ -292,7 +357,7 @@ def map_implementation_status(control_id: str, cis_data: dict) -> str:
292
357
  :rtype: str
293
358
  """
294
359
 
295
- # Extract matching records
360
+ # Extract matching records (gen_key strips letter parts to match base control)
296
361
  cis_records = [
297
362
  value
298
363
  for value in cis_data.values()
@@ -308,28 +373,45 @@ def map_implementation_status(control_id: str, cis_data: dict) -> str:
308
373
  logger.warning(f"No CIS records found for control {control_id}")
309
374
  return status_ret
310
375
 
376
+ # Check if these are letter-based control parts that need aggregation
377
+ has_letter_parts = any(_is_letter_based_control_part(rec.get("control_id", "")) for rec in cis_records)
378
+
311
379
  # Count implementation statuses
312
380
  status_counts = Counter(record.get("implementation_status", "") for record in cis_records)
313
- logger.debug("Status distribution for %s: %s", control_id, dict(status_counts))
381
+ logger.debug("Status distribution for %s: %s (letter parts: %s)", control_id, dict(status_counts), has_letter_parts)
314
382
 
315
- # Early returns for simple cases
383
+ # Early return for simple case: all same status
316
384
  if len(status_counts) == 1:
317
385
  status = next(iter(status_counts))
318
- return STATUS_MAPPING.get(status, ControlImplementationStatus.NotImplemented)
386
+ mapped_status = STATUS_MAPPING.get(status, ControlImplementationStatus.NotImplemented)
387
+ # If all letter parts have same status and it's "Implemented", return FullyImplemented
388
+ if has_letter_parts and status == "Implemented":
389
+ return ControlImplementationStatus.FullyImplemented
390
+ return mapped_status
319
391
 
392
+ # Aggregate statuses for letter-based control parts or multiple records
320
393
  implemented_count = status_counts.get("Implemented", 0)
394
+ not_implemented_count = status_counts.get("", 0) # Empty status counts as not implemented
395
+ partially_implemented_count = status_counts.get("Partially Implemented", 0)
396
+ planned_count = status_counts.get("Planned", 0)
321
397
  total_count = sum(status_counts.values())
322
398
 
399
+ # Aggregation logic
323
400
  if implemented_count == total_count:
401
+ # All parts are implemented
324
402
  return ControlImplementationStatus.FullyImplemented
325
- elif implemented_count > 0 or any(status == "Partially Implemented" for status in status_counts):
326
- status_ret = ControlImplementationStatus.PartiallyImplemented
327
- elif any(status == "Planned" for status in status_counts):
328
- status_ret = ControlImplementationStatus.Planned
403
+ elif implemented_count > 0 or partially_implemented_count > 0:
404
+ # Mix of implemented and other statuses, or any partially implemented
405
+ return ControlImplementationStatus.PartiallyImplemented
406
+ elif planned_count > 0 and not_implemented_count == 0:
407
+ # All are planned (no not-implemented)
408
+ return ControlImplementationStatus.Planned
329
409
  elif any(status in ["N/A", ALTERNATIVE_IMPLEMENTATION] for status in status_counts):
330
- status_ret = ControlImplementationStatus.NA
331
-
332
- return status_ret
410
+ # Any N/A or Alternative
411
+ return ControlImplementationStatus.NA
412
+ else:
413
+ # Default: not implemented
414
+ return ControlImplementationStatus.NotImplemented
333
415
 
334
416
 
335
417
  def map_origination(control_id: str, cis_data: dict) -> dict:
@@ -905,6 +987,26 @@ def process_implementation(
905
987
  return errors, processed_objectives
906
988
 
907
989
 
990
+ def _extract_base_control_id(control_id: str) -> str:
991
+ """
992
+ Extract the base control ID from a control ID that may have a letter part.
993
+
994
+ Examples:
995
+ - "AC-1.a" -> "AC-1"
996
+ - "AC-17.2" -> "AC-17.2" (numeric parts are preserved)
997
+ - "AC-1" -> "AC-1"
998
+
999
+ :param str control_id: Control ID that may have a letter part
1000
+ :return: Base control ID without letter part
1001
+ :rtype: str
1002
+ """
1003
+ # Check if the control has a letter part (e.g., AC-1.a)
1004
+ match = re.match(r"^([A-Z]+-\d+)\.[A-Z]$", control_id, re.IGNORECASE)
1005
+ if match:
1006
+ return match.group(1)
1007
+ return control_id
1008
+
1009
+
908
1010
  def gen_filtered_records(
909
1011
  implementation: ControlImplementation, sheet_data: dict, control_matcher: ControlMatcher
910
1012
  ) -> Tuple[List[ImplementationObjective], List[Dict[str, str]]]:
@@ -930,11 +1032,22 @@ def gen_filtered_records(
930
1032
  record_control_id = record["cis"].get("regscale_control_id", "")
931
1033
  # Parse the record's control ID
932
1034
  parsed_record_id = control_matcher.parse_control_id(record_control_id)
933
- if parsed_record_id:
934
- # Check if the parsed record control ID matches any variation
935
- # pylint: disable=protected-access # Using internal method for control ID variation matching
936
- if control_variations & control_matcher._get_control_id_variations(parsed_record_id):
937
- filtered_records.append(record)
1035
+ if not parsed_record_id:
1036
+ continue
1037
+ # Get variations for the parsed record ID
1038
+ # pylint: disable=protected-access # Using internal method for control ID variation matching
1039
+ record_variations = control_matcher._get_control_id_variations(parsed_record_id)
1040
+
1041
+ # Check if the parsed record control ID matches any variation
1042
+ if control_variations & record_variations:
1043
+ filtered_records.append(record)
1044
+ else:
1045
+ # If no direct match and record has a letter part, try matching the base control
1046
+ base_control_id = _extract_base_control_id(parsed_record_id)
1047
+ if base_control_id != parsed_record_id:
1048
+ base_variations = control_matcher._get_control_id_variations(base_control_id)
1049
+ if control_variations & base_variations:
1050
+ filtered_records.append(record)
938
1051
 
939
1052
  return existing_objectives, filtered_records
940
1053
 
@@ -1114,46 +1227,73 @@ def parse_crm_worksheet(file_path: click.Path, crm_sheet_name: str, version: Lit
1114
1227
  def _get_expected_cis_columns() -> List[str]:
1115
1228
  """
1116
1229
  Get the expected column names for CIS worksheet in order.
1230
+ These match the FedRAMP Rev 5 CIS worksheet format.
1117
1231
 
1118
1232
  :return: List of expected column names
1119
1233
  :rtype: List[str]
1120
1234
  """
1121
1235
  return [
1122
- CONTROL_ID,
1236
+ CONTROL_ID, # "Control ID"
1123
1237
  "Implemented",
1124
- ControlImplementationStatus.PartiallyImplemented,
1238
+ ControlImplementationStatus.PartiallyImplemented, # "Partially Implemented"
1125
1239
  "Planned",
1126
- ALT_IMPLEMENTATION,
1127
- ControlImplementationStatus.NA,
1240
+ ALTERNATIVE_IMPLEMENTATION, # "Alternative Implementation"
1241
+ ControlImplementationStatus.NA, # "N/A"
1128
1242
  SERVICE_PROVIDER_CORPORATE,
1129
1243
  SERVICE_PROVIDER_SYSTEM_SPECIFIC,
1130
1244
  SERVICE_PROVIDER_HYBRID,
1131
1245
  CONFIGURED_BY_CUSTOMER,
1132
1246
  PROVIDED_BY_CUSTOMER,
1133
1247
  SHARED,
1134
- INHERITED,
1248
+ INHERITED, # "Inherited from pre-existing FedRAMP Authorization"
1135
1249
  ]
1136
1250
 
1137
1251
 
1138
- def _normalize_cis_columns(cis_df, expected_columns: List[str]):
1252
+ def _normalize_cis_columns(cis_df: "pd.DataFrame", expected_columns: List[str]) -> "pd.DataFrame":
1139
1253
  """
1140
1254
  Normalize CIS dataframe columns by matching expected columns and handling missing ones.
1255
+ Uses fuzzy matching to handle truncated column names from merged cells.
1141
1256
 
1142
- :param cis_df: The CIS dataframe
1257
+ :param pd.DataFrame cis_df: The CIS dataframe
1143
1258
  :param List[str] expected_columns: List of expected column names
1144
1259
  :return: Normalized dataframe with standardized column names
1260
+ :rtype: pd.DataFrame
1145
1261
  """
1146
1262
  available_columns = cis_df.columns.tolist()
1147
1263
  columns_to_keep = []
1148
1264
 
1265
+ logger.debug(f"Available CIS columns: {available_columns}")
1266
+
1149
1267
  for expected_col in expected_columns:
1268
+ matching_col = None
1269
+
1270
+ # Try exact match first (case-insensitive)
1150
1271
  matching_col = next(
1151
1272
  (col for col in available_columns if str(col).strip().lower() == expected_col.lower()), None
1152
1273
  )
1274
+
1275
+ # If no exact match, try partial/fuzzy match for truncated column names
1276
+ if matching_col is None:
1277
+ # Create a simplified version for matching (first few significant words)
1278
+ # Filter out common words and take first 3 significant words
1279
+ skip_words = {"from", "by", "to", "the", "and", "or", "a", "an"}
1280
+ expected_words = [w for w in expected_col.lower().split() if w not in skip_words][:3]
1281
+
1282
+ for col in available_columns:
1283
+ col_str = str(col).lower()
1284
+ # Check if at least 2 of the significant words are in the column name (handles truncation & variations)
1285
+ matches = sum(1 for word in expected_words if word in col_str)
1286
+ if matches >= min(2, len(expected_words)): # Need at least 2 matches, or all if less than 2 words
1287
+ matching_col = col
1288
+ logger.debug(
1289
+ f"Fuzzy matched '{expected_col}' to '{col}' (matched {matches}/{len(expected_words)} words)"
1290
+ )
1291
+ break
1292
+
1153
1293
  if matching_col is not None:
1154
1294
  columns_to_keep.append(matching_col)
1155
1295
  else:
1156
- logger.warning(f"Expected column '{expected_col}' not found in CIS worksheet. Using empty values.")
1296
+ logger.info(f"Expected column '{expected_col}' not found in CIS worksheet. Using empty values.")
1157
1297
  cis_df[expected_col] = ""
1158
1298
  columns_to_keep.append(expected_col)
1159
1299
 
@@ -1162,6 +1302,69 @@ def _normalize_cis_columns(cis_df, expected_columns: List[str]):
1162
1302
  return cis_df.fillna("")
1163
1303
 
1164
1304
 
1305
+ def _find_control_id_row_index(df: "pd.DataFrame") -> Optional[int]:
1306
+ """
1307
+ Find the row index containing 'Control ID' in the first column.
1308
+
1309
+ :param pd.DataFrame df: The dataframe to search
1310
+ :return: Row index if found, None otherwise
1311
+ :rtype: Optional[int]
1312
+ """
1313
+ for idx, row in df.iterrows():
1314
+ if row.iloc[0] == CONTROL_ID:
1315
+ return idx
1316
+ return None
1317
+
1318
+
1319
+ def _merge_header_rows(header_row, sub_header_row) -> List[str]:
1320
+ """
1321
+ Merge two header rows into a single list of column names.
1322
+
1323
+ FedRAMP Rev5 has a two-row header structure where main headers span multiple columns
1324
+ and sub-headers provide specific column names.
1325
+
1326
+ :param header_row: The main header row (categories)
1327
+ :param sub_header_row: The sub-header row (specific columns)
1328
+ :return: List of merged column names
1329
+ :rtype: List[str]
1330
+ """
1331
+ pd = get_pandas()
1332
+ merged_headers = []
1333
+ current_category = None
1334
+
1335
+ for i, (main, sub) in enumerate(zip(header_row, sub_header_row)):
1336
+ # Update current category if main header has a value
1337
+ if pd.notna(main) and main and str(main).strip():
1338
+ current_category = str(main)
1339
+
1340
+ # Determine which header value to use
1341
+ header_value = _select_header_value(pd, main, sub, current_category, i)
1342
+ merged_headers.append(header_value)
1343
+
1344
+ return merged_headers
1345
+
1346
+
1347
+ def _select_header_value(pd: "pd.DataFrame", main, sub, current_category: Optional[str], index: int) -> str:
1348
+ """
1349
+ Select the appropriate header value based on priority: sub-header > main header > category > unnamed.
1350
+
1351
+ :param pd.DataFrame pd: The pandas dataframe
1352
+ :param main: Main header value
1353
+ :param sub: Sub-header value
1354
+ :param Optional[str] current_category: Current category from merged cells
1355
+ :param int index: Column index for fallback naming
1356
+ :return: Selected header value
1357
+ :rtype: str
1358
+ """
1359
+ if pd.notna(sub) and sub and str(sub).strip():
1360
+ return str(sub)
1361
+ if pd.notna(main) and main and str(main).strip():
1362
+ return str(main)
1363
+ if current_category:
1364
+ return f"{current_category}_{index}"
1365
+ return f"Unnamed_{index}"
1366
+
1367
+
1165
1368
  def _load_and_prepare_cis_dataframe(file_path: click.Path, cis_sheet_name: str, skip_rows: int):
1166
1369
  """
1167
1370
  Load and prepare the CIS dataframe from the workbook.
@@ -1171,26 +1374,32 @@ def _load_and_prepare_cis_dataframe(file_path: click.Path, cis_sheet_name: str,
1171
1374
  :param int skip_rows: Number of rows to skip
1172
1375
  :return: Tuple of (prepared dataframe, updated skip_rows) or (None, skip_rows) if empty
1173
1376
  """
1174
- validator = ImportValidater(
1175
- file_path=file_path,
1176
- disable_mapping=True,
1177
- required_headers=[],
1178
- mapping_file_path=gettempdir(),
1179
- prompt=False,
1180
- ignore_unnamed=True,
1181
- worksheet_name=cis_sheet_name,
1182
- warn_extra_headers=False,
1183
- )
1184
- if validator.data.empty:
1377
+ # Read the Excel file directly with pandas to preserve "N/A" as string
1378
+ pd = get_pandas()
1379
+ df = pd.read_excel(file_path, sheet_name=cis_sheet_name, header=None, keep_default_na=False)
1380
+
1381
+ if df.empty:
1185
1382
  return None, skip_rows
1186
1383
 
1187
- skip_rows = determine_skip_row(original_df=validator.data, text_to_find=CONTROL_ID, original_skip=skip_rows)
1384
+ # Find the row with "Control ID"
1385
+ control_id_row_idx = _find_control_id_row_index(df)
1386
+ if control_id_row_idx is None:
1387
+ logger.error("Could not find 'Control ID' in CIS worksheet")
1388
+ return None, skip_rows
1188
1389
 
1189
- cis_df = validator.data.iloc[skip_rows:].reset_index(drop=True)
1190
- cis_df.columns = cis_df.iloc[0]
1390
+ # Extract and merge the two header rows
1391
+ header_row = df.iloc[control_id_row_idx]
1392
+ sub_header_row = df.iloc[control_id_row_idx + 1]
1393
+ merged_headers = _merge_header_rows(header_row, sub_header_row)
1394
+
1395
+ # Get data starting from two rows after the main header row
1396
+ cis_df = df.iloc[control_id_row_idx + 2 :].reset_index(drop=True)
1397
+ cis_df.columns = merged_headers
1191
1398
  cis_df.dropna(how="all", inplace=True)
1192
1399
  cis_df.reset_index(drop=True, inplace=True)
1193
1400
 
1401
+ skip_rows = control_id_row_idx + 2
1402
+
1194
1403
  return cis_df, skip_rows
1195
1404
 
1196
1405
 
@@ -1207,7 +1416,7 @@ def _extract_status(data_row) -> str:
1207
1416
  "Implemented",
1208
1417
  ControlImplementationStatus.PartiallyImplemented,
1209
1418
  "Planned",
1210
- ALT_IMPLEMENTATION,
1419
+ ALTERNATIVE_IMPLEMENTATION, # Use the correct constant
1211
1420
  ControlImplementationStatus.NA,
1212
1421
  ]:
1213
1422
  if data_row[col]:
@@ -108,7 +108,7 @@ class FedrampPoamIntegration(ScannerIntegration):
108
108
  error_and_exit(FILE_PATH_ERROR)
109
109
  self.workbook = self.workbook or load_workbook(filename=self.file_path, data_only=True, read_only=True)
110
110
  self.poam_sheets = kwargs.get("poam_sheets") or [
111
- sheet for sheet in self.workbook.sheetnames if re.search("POA&M Items", sheet)
111
+ sheet for sheet in self.workbook.sheetnames if re.search("POA&M Items|Configuration Findings", sheet)
112
112
  ]
113
113
  except (FileNotFoundError, InvalidFileException, KeyError) as e:
114
114
  logger.error(f"Failed to load workbook: {e}")
@@ -338,7 +338,9 @@ class FedrampPoamIntegration(ScannerIntegration):
338
338
  yield from findings
339
339
 
340
340
  if not poam_id or not poam_id.upper():
341
- print(weakness_name, poam_id)
341
+ logger.debug(
342
+ f"Invalid POAM ID on row {index}, sheet {sheet}: weakness_name={weakness_name}, poam_id={poam_id}"
343
+ )
342
344
  logger.warning(f"Invalid POAM ID on row {index}, sheet {sheet}. Skipping.")
343
345
  yield from findings
344
346
 
@@ -403,6 +405,10 @@ class FedrampPoamIntegration(ScannerIntegration):
403
405
  if not status_date:
404
406
  continue
405
407
 
408
+ # Extract Controls field (Column B) for Configuration Findings
409
+ controls = val_mapping.get_value(data, "Controls")
410
+ affected_controls = str(controls) if controls else None
411
+
406
412
  # Validate pluginText
407
413
  finding = IntegrationFinding(
408
414
  control_labels=[],
@@ -436,6 +442,7 @@ class FedrampPoamIntegration(ScannerIntegration):
436
442
  risk_adjustment=self.determine_risk_adjustment(val_mapping.get_value(data, "Risk Adjustment")),
437
443
  operational_requirements=str(val_mapping.get_value(data, "Operational Requirement")),
438
444
  deviation_rationale=str(val_mapping.get_value(data, "Deviation Rationale")),
445
+ affected_controls=affected_controls,
439
446
  poam_id=poam_id,
440
447
  )
441
448
  if finding.is_valid():
@@ -498,6 +505,24 @@ class FedrampPoamIntegration(ScannerIntegration):
498
505
  asset_ids = val_mapping.get_value(data, ASSET_IDENTIFIER)
499
506
  if not asset_ids:
500
507
  return row_assets
508
+
509
+ # Skip rows where asset identifier contains date/description text (header rows)
510
+ asset_ids_str = str(asset_ids).lower()
511
+ if any(
512
+ keyword in asset_ids_str
513
+ for keyword in [
514
+ "date the weakness",
515
+ "aka discovery",
516
+ "permanent column",
517
+ "date of intended",
518
+ "last changed or closed",
519
+ "port/protocol",
520
+ "specified in the inventory",
521
+ ]
522
+ ):
523
+ logger.debug(f"Skipping row with header/description text in asset identifier: {str(asset_ids)[:100]}")
524
+ return row_assets
525
+
501
526
  asset_id_list = self.gen_asset_list(asset_ids)
502
527
 
503
528
  if not asset_id_list:
@@ -559,6 +584,9 @@ class FedrampPoamIntegration(ScannerIntegration):
559
584
  return raw_type
560
585
 
561
586
  for asset_id in asset_id_list:
587
+ # Handle long asset names
588
+ asset_name, asset_notes = self._handle_long_asset_name(asset_id)
589
+
562
590
  # Get raw values and clean them
563
591
  raw_values = {
564
592
  "ip": asset_id if validate_ip_address(asset_id) else "",
@@ -571,8 +599,8 @@ class FedrampPoamIntegration(ScannerIntegration):
571
599
  asset_type = determine_asset_type(asset_id, raw_values["type"])
572
600
 
573
601
  res = IntegrationAsset(
574
- name=asset_id,
575
- identifier=asset_id,
602
+ name=asset_name, # Use shortened name if needed
603
+ identifier=asset_name, # Use shortened name as identifier
576
604
  asset_type=asset_type, # Use determined asset type
577
605
  asset_category=regscale_models.AssetCategory.Hardware,
578
606
  parent_id=self.plan_id,
@@ -581,6 +609,7 @@ class FedrampPoamIntegration(ScannerIntegration):
581
609
  ip_address=raw_values["ip"],
582
610
  fqdn=raw_values["fqdn"],
583
611
  mac_address=raw_values["mac"],
612
+ notes=asset_notes, # Store full name if truncated
584
613
  date_last_updated=get_current_datetime(),
585
614
  )
586
615
  row_assets.append(res)
@@ -591,15 +620,52 @@ class FedrampPoamIntegration(ScannerIntegration):
591
620
 
592
621
  return row_assets
593
622
 
623
+ def _handle_long_asset_name(self, asset_id: str, max_length: int = 450) -> tuple[str, str]:
624
+ """
625
+ Handle asset names that exceed database field limits.
626
+ Generates a hash-based identifier for long names and preserves full name in notes.
627
+
628
+ :param str asset_id: The asset identifier
629
+ :param int max_length: Maximum allowed length (default: 450)
630
+ :return: Tuple of (shortened_name, notes)
631
+ :rtype: tuple[str, str]
632
+ """
633
+ if len(asset_id) <= max_length:
634
+ return asset_id, ""
635
+
636
+ # Generate hash-based identifier
637
+ import hashlib
638
+
639
+ hash_suffix = hashlib.sha256(asset_id.encode()).hexdigest()[:8]
640
+ truncated = asset_id[: max_length - 9] # Leave room for underscore and hash
641
+ short_name = f"{truncated}_{hash_suffix}"
642
+ notes = f"Full identifier: {asset_id}"
643
+
644
+ logger.warning(f"Asset identifier exceeds {max_length} chars, truncated to: {short_name[:100]}...")
645
+ return short_name, notes
646
+
594
647
  def gen_asset_list(self, asset_ids: str):
595
648
  """
596
649
  Generate a list of asset identifiers from a string.
650
+ Handles multiple separator types: commas, semicolons, pipes, tabs, newlines, single/multiple spaces.
651
+ Also removes surrounding brackets that might wrap the list.
597
652
 
598
653
  :param str asset_ids: The asset identifier string
599
654
  :return: The list of asset identifiers
600
655
  :rtype: List[str]
601
656
  """
602
- return [aid.strip() for aid in re.split(r"[,\n\r]+", asset_ids) if isinstance(aid, str) and aid.strip()]
657
+ # Remove surrounding brackets if present (handles cases like "[10.10.1.1 10.10.1.2]")
658
+ asset_ids = asset_ids.strip()
659
+ if asset_ids.startswith("[") and asset_ids.endswith("]"):
660
+ asset_ids = asset_ids[1:-1].strip()
661
+
662
+ # Split on: commas, semicolons, pipes, tabs, newlines, carriage returns, and ANY whitespace (including single spaces)
663
+ # Changed from \s{2,} to \s+ to handle single spaces between IPs
664
+ return [
665
+ aid.strip()
666
+ for aid in re.split(r"[,;\|\t\n\r]+|\s+", asset_ids)
667
+ if isinstance(aid, str) and aid.strip() and len(aid.strip()) > 0
668
+ ]
603
669
 
604
670
  @staticmethod
605
671
  def empty(string: Optional[str]) -> Optional[str]:
@@ -625,9 +691,10 @@ class FedrampPoamIntegration(ScannerIntegration):
625
691
  :return: The status (Open/Closed) or None
626
692
  :rtype: Optional[str]
627
693
  """
628
- if "closed" in sheet.lower():
694
+ sheet_lower = sheet.lower()
695
+ if "closed" in sheet_lower:
629
696
  return "Closed"
630
- elif "open" in sheet.lower():
697
+ elif "open" in sheet_lower or "configuration findings" in sheet_lower:
631
698
  return "Open"
632
699
  return None
633
700