regscale-cli 6.27.2.0__py3-none-any.whl → 6.27.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of regscale-cli might be problematic. Click here for more details.
- regscale/_version.py +1 -1
- regscale/core/app/application.py +1 -0
- regscale/core/app/internal/control_editor.py +73 -21
- regscale/core/app/internal/login.py +4 -1
- regscale/core/app/internal/model_editor.py +219 -64
- regscale/core/login.py +21 -4
- regscale/core/utils/date.py +77 -1
- regscale/integrations/commercial/aws/scanner.py +4 -1
- regscale/integrations/commercial/synqly/query_builder.py +4 -1
- regscale/integrations/control_matcher.py +78 -23
- regscale/integrations/public/csam/csam.py +572 -763
- regscale/integrations/public/csam/csam_agency_defined.py +179 -0
- regscale/integrations/public/csam/csam_common.py +154 -0
- regscale/integrations/public/csam/csam_controls.py +432 -0
- regscale/integrations/public/csam/csam_poam.py +124 -0
- regscale/integrations/public/fedramp/click.py +17 -4
- regscale/integrations/public/fedramp/fedramp_cis_crm.py +271 -62
- regscale/integrations/public/fedramp/poam/scanner.py +74 -7
- regscale/integrations/scanner_integration.py +16 -1
- regscale/models/integration_models/cisa_kev_data.json +49 -19
- regscale/models/integration_models/synqly_models/capabilities.json +1 -1
- regscale/models/integration_models/synqly_models/connectors/vulnerabilities.py +35 -2
- regscale/models/integration_models/synqly_models/ocsf_mapper.py +41 -12
- regscale/models/platform.py +3 -0
- regscale/models/regscale_models/__init__.py +5 -0
- regscale/models/regscale_models/component.py +1 -1
- regscale/models/regscale_models/control_implementation.py +55 -24
- regscale/models/regscale_models/organization.py +3 -0
- regscale/models/regscale_models/regscale_model.py +17 -5
- regscale/models/regscale_models/security_plan.py +1 -0
- regscale/regscale.py +11 -1
- {regscale_cli-6.27.2.0.dist-info → regscale_cli-6.27.3.0.dist-info}/METADATA +1 -1
- {regscale_cli-6.27.2.0.dist-info → regscale_cli-6.27.3.0.dist-info}/RECORD +40 -36
- tests/regscale/core/test_login.py +171 -4
- tests/regscale/integrations/test_control_matcher.py +24 -0
- tests/regscale/models/test_control_implementation.py +118 -3
- {regscale_cli-6.27.2.0.dist-info → regscale_cli-6.27.3.0.dist-info}/LICENSE +0 -0
- {regscale_cli-6.27.2.0.dist-info → regscale_cli-6.27.3.0.dist-info}/WHEEL +0 -0
- {regscale_cli-6.27.2.0.dist-info → regscale_cli-6.27.3.0.dist-info}/entry_points.txt +0 -0
- {regscale_cli-6.27.2.0.dist-info → regscale_cli-6.27.3.0.dist-info}/top_level.txt +0 -0
|
@@ -120,17 +120,36 @@ def _build_potential_oscal_ids(variation: str) -> List[str]:
|
|
|
120
120
|
"""
|
|
121
121
|
Build potential OSCAL ID formats from a control ID variation.
|
|
122
122
|
|
|
123
|
-
:param str variation: Control ID variation (e.g., "AC-1", "AC-01")
|
|
123
|
+
:param str variation: Control ID variation (e.g., "AC-1", "AC-01", "AC-1.a")
|
|
124
124
|
:return: List of potential OSCAL IDs
|
|
125
125
|
:rtype: List[str]
|
|
126
126
|
"""
|
|
127
127
|
variation_lower = variation.lower()
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
128
|
+
oscal_ids = []
|
|
129
|
+
|
|
130
|
+
# Check if this is a control with a letter part (e.g., "ac-1.a")
|
|
131
|
+
if re.match(r"^[a-z]+-\d+\.[a-z]$", variation_lower):
|
|
132
|
+
# For letter parts, map to OSCAL format: ac-1.a -> ac-1_smt.a
|
|
133
|
+
base_control = variation_lower.rsplit(".", 1)[0] # Get "ac-1" from "ac-1.a"
|
|
134
|
+
letter_part = variation_lower.rsplit(".", 1)[1] # Get "a" from "ac-1.a"
|
|
135
|
+
oscal_ids.extend(
|
|
136
|
+
[
|
|
137
|
+
f"{base_control}_smt.{letter_part}", # ac-1_smt.a (primary format)
|
|
138
|
+
f"{variation_lower}_smt", # ac-1.a_smt (alternative format)
|
|
139
|
+
]
|
|
140
|
+
)
|
|
141
|
+
else:
|
|
142
|
+
# Base control without letter part - include all potential letter variations
|
|
143
|
+
oscal_ids.extend(
|
|
144
|
+
[
|
|
145
|
+
f"{variation_lower}_smt",
|
|
146
|
+
f"{variation_lower}_smt.a",
|
|
147
|
+
f"{variation_lower}_smt.b",
|
|
148
|
+
f"{variation_lower}_smt.c",
|
|
149
|
+
]
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
return oscal_ids
|
|
134
153
|
|
|
135
154
|
|
|
136
155
|
def _matches_oscal_id(obj_id: str, variation: str) -> bool:
|
|
@@ -213,15 +232,14 @@ def transform_control(control: str) -> str:
|
|
|
213
232
|
:rtype: str
|
|
214
233
|
"""
|
|
215
234
|
# Use regex to match the pattern and capture the parts (handle extra spaces)
|
|
216
|
-
|
|
217
|
-
if match:
|
|
235
|
+
# Now handles both uppercase and lowercase letters in parentheses
|
|
236
|
+
if match := re.match(r"([A-Z]+)-(\d+)\s*\(\s*(\d+|[A-Z])\s*\)", control, re.IGNORECASE):
|
|
218
237
|
control_name = match.group(1).lower()
|
|
219
238
|
control_number = match.group(2)
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
if sub_control.isdigit():
|
|
239
|
+
try:
|
|
240
|
+
sub_control = match.group(3).lower() # Normalize to lowercase
|
|
223
241
|
transformed_control = f"{control_name}-{control_number}.{sub_control}"
|
|
224
|
-
|
|
242
|
+
except IndexError:
|
|
225
243
|
transformed_control = f"{control_name}-{control_number}"
|
|
226
244
|
|
|
227
245
|
return transformed_control
|
|
@@ -262,29 +280,76 @@ def new_leveraged_auth(
|
|
|
262
280
|
return new_leveraged_auth_id.id
|
|
263
281
|
|
|
264
282
|
|
|
265
|
-
def gen_key(control_id: str):
|
|
283
|
+
def gen_key(control_id: str) -> str:
|
|
266
284
|
"""
|
|
267
|
-
Function to generate a key for the control ID
|
|
285
|
+
Function to generate a key for the control ID by stripping letter-based parts.
|
|
286
|
+
Handles both parentheses notation (AC-1(a)) and dot notation (ac-1.a).
|
|
287
|
+
|
|
288
|
+
Examples:
|
|
289
|
+
- AC-1 (a) -> AC-1
|
|
290
|
+
- ac-1.a -> ac-1
|
|
291
|
+
- AC-2(1) -> AC-2(1) (numeric enhancement preserved)
|
|
292
|
+
- AC-17.2 -> AC-17.2 (numeric enhancement preserved)
|
|
268
293
|
|
|
269
294
|
:param str control_id: The control ID to generate a key for
|
|
270
|
-
:return: The generated key
|
|
295
|
+
:return: The generated key with letter parts stripped
|
|
271
296
|
:rtype: str
|
|
272
297
|
"""
|
|
273
|
-
#
|
|
274
|
-
#
|
|
298
|
+
# First, try parentheses notation: ALPHA-NUM(LETTER) -> ALPHA-NUM
|
|
299
|
+
# Captures everything up to either:
|
|
300
|
+
# 1. The last (number) if it exists (preserved)
|
|
275
301
|
# 2. The main control number if no enhancement exists
|
|
276
|
-
#
|
|
277
|
-
|
|
302
|
+
# Excludes trailing (letter) - handles extra spaces like AC-6 ( 1 ) ( a )
|
|
303
|
+
pattern_paren = r"^(\w+-\d+(?:\s*\(\s*\d+\s*\))?)(?:\s*\(\s*[a-zA-Z]\s*\))?$"
|
|
304
|
+
if match := re.match(pattern_paren, control_id):
|
|
305
|
+
return match.group(1)
|
|
278
306
|
|
|
279
|
-
|
|
280
|
-
|
|
307
|
+
# Try dot notation: alpha-num.letter -> alpha-num
|
|
308
|
+
# Preserves numeric enhancements (ac-17.2) but strips letter parts (ac-1.a)
|
|
309
|
+
pattern_dot = r"^([a-z]+-\d+)\.([a-z])$"
|
|
310
|
+
if match := re.match(pattern_dot, control_id, re.IGNORECASE):
|
|
311
|
+
# Check if the part after dot is a single letter (not a number)
|
|
281
312
|
return match.group(1)
|
|
313
|
+
|
|
314
|
+
# No match, return as-is
|
|
282
315
|
return control_id
|
|
283
316
|
|
|
284
317
|
|
|
318
|
+
def _is_letter_based_control_part(control_id: str) -> bool:
|
|
319
|
+
"""
|
|
320
|
+
Check if a control ID is a letter-based part (e.g., AC-1(a), ac-1.a).
|
|
321
|
+
Returns True for ALPHA-NUMERIC(ALPHA) or alpha-numeric.alpha patterns.
|
|
322
|
+
Returns False for numeric enhancements (AC-1(1), ac-17.2).
|
|
323
|
+
|
|
324
|
+
:param str control_id: The control ID to check
|
|
325
|
+
:return: True if it's a letter-based control part
|
|
326
|
+
:rtype: bool
|
|
327
|
+
"""
|
|
328
|
+
# Pattern 1: Parentheses notation - ALPHA-NUMERIC(ALPHA) like AC-1(a), AC-2(B)
|
|
329
|
+
pattern_paren = r"^[A-Za-z]+-\d+\s*\(\s*[a-zA-Z]\s*\)$"
|
|
330
|
+
if re.match(pattern_paren, control_id):
|
|
331
|
+
return True
|
|
332
|
+
|
|
333
|
+
# Pattern 2: Dot notation - alpha-numeric.alpha like ac-1.a, ac-2.b
|
|
334
|
+
# Exclude numeric enhancements like ac-17.2
|
|
335
|
+
pattern_dot = r"^[a-z]+-\d+\.([a-z])$"
|
|
336
|
+
match = re.match(pattern_dot, control_id, re.IGNORECASE)
|
|
337
|
+
if match and match.group(1).isalpha():
|
|
338
|
+
return True
|
|
339
|
+
|
|
340
|
+
return False
|
|
341
|
+
|
|
342
|
+
|
|
285
343
|
def map_implementation_status(control_id: str, cis_data: dict) -> str:
|
|
286
344
|
"""
|
|
287
|
-
Function to map the selected implementation status on the CIS worksheet to a RegScale status
|
|
345
|
+
Function to map the selected implementation status on the CIS worksheet to a RegScale status.
|
|
346
|
+
Aggregates letter-based control parts (AC-1(a), AC-1(b), AC-1(c)) into base control (AC-1).
|
|
347
|
+
|
|
348
|
+
Aggregation logic for letter-based parts:
|
|
349
|
+
- All "Implemented" → "Fully Implemented"
|
|
350
|
+
- Mix with at least one "Implemented" → "Partially Implemented"
|
|
351
|
+
- All "Not Implemented" or empty → "Not Implemented"
|
|
352
|
+
- Any "Planned" (no implemented) → "Planned"
|
|
288
353
|
|
|
289
354
|
:param str control_id: The control ID from RegScale
|
|
290
355
|
:param dict cis_data: Data from the CIS worksheet to map the status from
|
|
@@ -292,7 +357,7 @@ def map_implementation_status(control_id: str, cis_data: dict) -> str:
|
|
|
292
357
|
:rtype: str
|
|
293
358
|
"""
|
|
294
359
|
|
|
295
|
-
# Extract matching records
|
|
360
|
+
# Extract matching records (gen_key strips letter parts to match base control)
|
|
296
361
|
cis_records = [
|
|
297
362
|
value
|
|
298
363
|
for value in cis_data.values()
|
|
@@ -308,28 +373,45 @@ def map_implementation_status(control_id: str, cis_data: dict) -> str:
|
|
|
308
373
|
logger.warning(f"No CIS records found for control {control_id}")
|
|
309
374
|
return status_ret
|
|
310
375
|
|
|
376
|
+
# Check if these are letter-based control parts that need aggregation
|
|
377
|
+
has_letter_parts = any(_is_letter_based_control_part(rec.get("control_id", "")) for rec in cis_records)
|
|
378
|
+
|
|
311
379
|
# Count implementation statuses
|
|
312
380
|
status_counts = Counter(record.get("implementation_status", "") for record in cis_records)
|
|
313
|
-
logger.debug("Status distribution for %s: %s", control_id, dict(status_counts))
|
|
381
|
+
logger.debug("Status distribution for %s: %s (letter parts: %s)", control_id, dict(status_counts), has_letter_parts)
|
|
314
382
|
|
|
315
|
-
# Early
|
|
383
|
+
# Early return for simple case: all same status
|
|
316
384
|
if len(status_counts) == 1:
|
|
317
385
|
status = next(iter(status_counts))
|
|
318
|
-
|
|
386
|
+
mapped_status = STATUS_MAPPING.get(status, ControlImplementationStatus.NotImplemented)
|
|
387
|
+
# If all letter parts have same status and it's "Implemented", return FullyImplemented
|
|
388
|
+
if has_letter_parts and status == "Implemented":
|
|
389
|
+
return ControlImplementationStatus.FullyImplemented
|
|
390
|
+
return mapped_status
|
|
319
391
|
|
|
392
|
+
# Aggregate statuses for letter-based control parts or multiple records
|
|
320
393
|
implemented_count = status_counts.get("Implemented", 0)
|
|
394
|
+
not_implemented_count = status_counts.get("", 0) # Empty status counts as not implemented
|
|
395
|
+
partially_implemented_count = status_counts.get("Partially Implemented", 0)
|
|
396
|
+
planned_count = status_counts.get("Planned", 0)
|
|
321
397
|
total_count = sum(status_counts.values())
|
|
322
398
|
|
|
399
|
+
# Aggregation logic
|
|
323
400
|
if implemented_count == total_count:
|
|
401
|
+
# All parts are implemented
|
|
324
402
|
return ControlImplementationStatus.FullyImplemented
|
|
325
|
-
elif implemented_count > 0 or
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
403
|
+
elif implemented_count > 0 or partially_implemented_count > 0:
|
|
404
|
+
# Mix of implemented and other statuses, or any partially implemented
|
|
405
|
+
return ControlImplementationStatus.PartiallyImplemented
|
|
406
|
+
elif planned_count > 0 and not_implemented_count == 0:
|
|
407
|
+
# All are planned (no not-implemented)
|
|
408
|
+
return ControlImplementationStatus.Planned
|
|
329
409
|
elif any(status in ["N/A", ALTERNATIVE_IMPLEMENTATION] for status in status_counts):
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
410
|
+
# Any N/A or Alternative
|
|
411
|
+
return ControlImplementationStatus.NA
|
|
412
|
+
else:
|
|
413
|
+
# Default: not implemented
|
|
414
|
+
return ControlImplementationStatus.NotImplemented
|
|
333
415
|
|
|
334
416
|
|
|
335
417
|
def map_origination(control_id: str, cis_data: dict) -> dict:
|
|
@@ -905,6 +987,26 @@ def process_implementation(
|
|
|
905
987
|
return errors, processed_objectives
|
|
906
988
|
|
|
907
989
|
|
|
990
|
+
def _extract_base_control_id(control_id: str) -> str:
|
|
991
|
+
"""
|
|
992
|
+
Extract the base control ID from a control ID that may have a letter part.
|
|
993
|
+
|
|
994
|
+
Examples:
|
|
995
|
+
- "AC-1.a" -> "AC-1"
|
|
996
|
+
- "AC-17.2" -> "AC-17.2" (numeric parts are preserved)
|
|
997
|
+
- "AC-1" -> "AC-1"
|
|
998
|
+
|
|
999
|
+
:param str control_id: Control ID that may have a letter part
|
|
1000
|
+
:return: Base control ID without letter part
|
|
1001
|
+
:rtype: str
|
|
1002
|
+
"""
|
|
1003
|
+
# Check if the control has a letter part (e.g., AC-1.a)
|
|
1004
|
+
match = re.match(r"^([A-Z]+-\d+)\.[A-Z]$", control_id, re.IGNORECASE)
|
|
1005
|
+
if match:
|
|
1006
|
+
return match.group(1)
|
|
1007
|
+
return control_id
|
|
1008
|
+
|
|
1009
|
+
|
|
908
1010
|
def gen_filtered_records(
|
|
909
1011
|
implementation: ControlImplementation, sheet_data: dict, control_matcher: ControlMatcher
|
|
910
1012
|
) -> Tuple[List[ImplementationObjective], List[Dict[str, str]]]:
|
|
@@ -930,11 +1032,22 @@ def gen_filtered_records(
|
|
|
930
1032
|
record_control_id = record["cis"].get("regscale_control_id", "")
|
|
931
1033
|
# Parse the record's control ID
|
|
932
1034
|
parsed_record_id = control_matcher.parse_control_id(record_control_id)
|
|
933
|
-
if parsed_record_id:
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
1035
|
+
if not parsed_record_id:
|
|
1036
|
+
continue
|
|
1037
|
+
# Get variations for the parsed record ID
|
|
1038
|
+
# pylint: disable=protected-access # Using internal method for control ID variation matching
|
|
1039
|
+
record_variations = control_matcher._get_control_id_variations(parsed_record_id)
|
|
1040
|
+
|
|
1041
|
+
# Check if the parsed record control ID matches any variation
|
|
1042
|
+
if control_variations & record_variations:
|
|
1043
|
+
filtered_records.append(record)
|
|
1044
|
+
else:
|
|
1045
|
+
# If no direct match and record has a letter part, try matching the base control
|
|
1046
|
+
base_control_id = _extract_base_control_id(parsed_record_id)
|
|
1047
|
+
if base_control_id != parsed_record_id:
|
|
1048
|
+
base_variations = control_matcher._get_control_id_variations(base_control_id)
|
|
1049
|
+
if control_variations & base_variations:
|
|
1050
|
+
filtered_records.append(record)
|
|
938
1051
|
|
|
939
1052
|
return existing_objectives, filtered_records
|
|
940
1053
|
|
|
@@ -1114,46 +1227,73 @@ def parse_crm_worksheet(file_path: click.Path, crm_sheet_name: str, version: Lit
|
|
|
1114
1227
|
def _get_expected_cis_columns() -> List[str]:
|
|
1115
1228
|
"""
|
|
1116
1229
|
Get the expected column names for CIS worksheet in order.
|
|
1230
|
+
These match the FedRAMP Rev 5 CIS worksheet format.
|
|
1117
1231
|
|
|
1118
1232
|
:return: List of expected column names
|
|
1119
1233
|
:rtype: List[str]
|
|
1120
1234
|
"""
|
|
1121
1235
|
return [
|
|
1122
|
-
CONTROL_ID,
|
|
1236
|
+
CONTROL_ID, # "Control ID"
|
|
1123
1237
|
"Implemented",
|
|
1124
|
-
ControlImplementationStatus.PartiallyImplemented,
|
|
1238
|
+
ControlImplementationStatus.PartiallyImplemented, # "Partially Implemented"
|
|
1125
1239
|
"Planned",
|
|
1126
|
-
|
|
1127
|
-
ControlImplementationStatus.NA,
|
|
1240
|
+
ALTERNATIVE_IMPLEMENTATION, # "Alternative Implementation"
|
|
1241
|
+
ControlImplementationStatus.NA, # "N/A"
|
|
1128
1242
|
SERVICE_PROVIDER_CORPORATE,
|
|
1129
1243
|
SERVICE_PROVIDER_SYSTEM_SPECIFIC,
|
|
1130
1244
|
SERVICE_PROVIDER_HYBRID,
|
|
1131
1245
|
CONFIGURED_BY_CUSTOMER,
|
|
1132
1246
|
PROVIDED_BY_CUSTOMER,
|
|
1133
1247
|
SHARED,
|
|
1134
|
-
INHERITED,
|
|
1248
|
+
INHERITED, # "Inherited from pre-existing FedRAMP Authorization"
|
|
1135
1249
|
]
|
|
1136
1250
|
|
|
1137
1251
|
|
|
1138
|
-
def _normalize_cis_columns(cis_df, expected_columns: List[str]):
|
|
1252
|
+
def _normalize_cis_columns(cis_df: "pd.DataFrame", expected_columns: List[str]) -> "pd.DataFrame":
|
|
1139
1253
|
"""
|
|
1140
1254
|
Normalize CIS dataframe columns by matching expected columns and handling missing ones.
|
|
1255
|
+
Uses fuzzy matching to handle truncated column names from merged cells.
|
|
1141
1256
|
|
|
1142
|
-
:param cis_df: The CIS dataframe
|
|
1257
|
+
:param pd.DataFrame cis_df: The CIS dataframe
|
|
1143
1258
|
:param List[str] expected_columns: List of expected column names
|
|
1144
1259
|
:return: Normalized dataframe with standardized column names
|
|
1260
|
+
:rtype: pd.DataFrame
|
|
1145
1261
|
"""
|
|
1146
1262
|
available_columns = cis_df.columns.tolist()
|
|
1147
1263
|
columns_to_keep = []
|
|
1148
1264
|
|
|
1265
|
+
logger.debug(f"Available CIS columns: {available_columns}")
|
|
1266
|
+
|
|
1149
1267
|
for expected_col in expected_columns:
|
|
1268
|
+
matching_col = None
|
|
1269
|
+
|
|
1270
|
+
# Try exact match first (case-insensitive)
|
|
1150
1271
|
matching_col = next(
|
|
1151
1272
|
(col for col in available_columns if str(col).strip().lower() == expected_col.lower()), None
|
|
1152
1273
|
)
|
|
1274
|
+
|
|
1275
|
+
# If no exact match, try partial/fuzzy match for truncated column names
|
|
1276
|
+
if matching_col is None:
|
|
1277
|
+
# Create a simplified version for matching (first few significant words)
|
|
1278
|
+
# Filter out common words and take first 3 significant words
|
|
1279
|
+
skip_words = {"from", "by", "to", "the", "and", "or", "a", "an"}
|
|
1280
|
+
expected_words = [w for w in expected_col.lower().split() if w not in skip_words][:3]
|
|
1281
|
+
|
|
1282
|
+
for col in available_columns:
|
|
1283
|
+
col_str = str(col).lower()
|
|
1284
|
+
# Check if at least 2 of the significant words are in the column name (handles truncation & variations)
|
|
1285
|
+
matches = sum(1 for word in expected_words if word in col_str)
|
|
1286
|
+
if matches >= min(2, len(expected_words)): # Need at least 2 matches, or all if less than 2 words
|
|
1287
|
+
matching_col = col
|
|
1288
|
+
logger.debug(
|
|
1289
|
+
f"Fuzzy matched '{expected_col}' to '{col}' (matched {matches}/{len(expected_words)} words)"
|
|
1290
|
+
)
|
|
1291
|
+
break
|
|
1292
|
+
|
|
1153
1293
|
if matching_col is not None:
|
|
1154
1294
|
columns_to_keep.append(matching_col)
|
|
1155
1295
|
else:
|
|
1156
|
-
logger.
|
|
1296
|
+
logger.info(f"Expected column '{expected_col}' not found in CIS worksheet. Using empty values.")
|
|
1157
1297
|
cis_df[expected_col] = ""
|
|
1158
1298
|
columns_to_keep.append(expected_col)
|
|
1159
1299
|
|
|
@@ -1162,6 +1302,69 @@ def _normalize_cis_columns(cis_df, expected_columns: List[str]):
|
|
|
1162
1302
|
return cis_df.fillna("")
|
|
1163
1303
|
|
|
1164
1304
|
|
|
1305
|
+
def _find_control_id_row_index(df: "pd.DataFrame") -> Optional[int]:
|
|
1306
|
+
"""
|
|
1307
|
+
Find the row index containing 'Control ID' in the first column.
|
|
1308
|
+
|
|
1309
|
+
:param pd.DataFrame df: The dataframe to search
|
|
1310
|
+
:return: Row index if found, None otherwise
|
|
1311
|
+
:rtype: Optional[int]
|
|
1312
|
+
"""
|
|
1313
|
+
for idx, row in df.iterrows():
|
|
1314
|
+
if row.iloc[0] == CONTROL_ID:
|
|
1315
|
+
return idx
|
|
1316
|
+
return None
|
|
1317
|
+
|
|
1318
|
+
|
|
1319
|
+
def _merge_header_rows(header_row, sub_header_row) -> List[str]:
|
|
1320
|
+
"""
|
|
1321
|
+
Merge two header rows into a single list of column names.
|
|
1322
|
+
|
|
1323
|
+
FedRAMP Rev5 has a two-row header structure where main headers span multiple columns
|
|
1324
|
+
and sub-headers provide specific column names.
|
|
1325
|
+
|
|
1326
|
+
:param header_row: The main header row (categories)
|
|
1327
|
+
:param sub_header_row: The sub-header row (specific columns)
|
|
1328
|
+
:return: List of merged column names
|
|
1329
|
+
:rtype: List[str]
|
|
1330
|
+
"""
|
|
1331
|
+
pd = get_pandas()
|
|
1332
|
+
merged_headers = []
|
|
1333
|
+
current_category = None
|
|
1334
|
+
|
|
1335
|
+
for i, (main, sub) in enumerate(zip(header_row, sub_header_row)):
|
|
1336
|
+
# Update current category if main header has a value
|
|
1337
|
+
if pd.notna(main) and main and str(main).strip():
|
|
1338
|
+
current_category = str(main)
|
|
1339
|
+
|
|
1340
|
+
# Determine which header value to use
|
|
1341
|
+
header_value = _select_header_value(pd, main, sub, current_category, i)
|
|
1342
|
+
merged_headers.append(header_value)
|
|
1343
|
+
|
|
1344
|
+
return merged_headers
|
|
1345
|
+
|
|
1346
|
+
|
|
1347
|
+
def _select_header_value(pd: "pd.DataFrame", main, sub, current_category: Optional[str], index: int) -> str:
|
|
1348
|
+
"""
|
|
1349
|
+
Select the appropriate header value based on priority: sub-header > main header > category > unnamed.
|
|
1350
|
+
|
|
1351
|
+
:param pd.DataFrame pd: The pandas dataframe
|
|
1352
|
+
:param main: Main header value
|
|
1353
|
+
:param sub: Sub-header value
|
|
1354
|
+
:param Optional[str] current_category: Current category from merged cells
|
|
1355
|
+
:param int index: Column index for fallback naming
|
|
1356
|
+
:return: Selected header value
|
|
1357
|
+
:rtype: str
|
|
1358
|
+
"""
|
|
1359
|
+
if pd.notna(sub) and sub and str(sub).strip():
|
|
1360
|
+
return str(sub)
|
|
1361
|
+
if pd.notna(main) and main and str(main).strip():
|
|
1362
|
+
return str(main)
|
|
1363
|
+
if current_category:
|
|
1364
|
+
return f"{current_category}_{index}"
|
|
1365
|
+
return f"Unnamed_{index}"
|
|
1366
|
+
|
|
1367
|
+
|
|
1165
1368
|
def _load_and_prepare_cis_dataframe(file_path: click.Path, cis_sheet_name: str, skip_rows: int):
|
|
1166
1369
|
"""
|
|
1167
1370
|
Load and prepare the CIS dataframe from the workbook.
|
|
@@ -1171,26 +1374,32 @@ def _load_and_prepare_cis_dataframe(file_path: click.Path, cis_sheet_name: str,
|
|
|
1171
1374
|
:param int skip_rows: Number of rows to skip
|
|
1172
1375
|
:return: Tuple of (prepared dataframe, updated skip_rows) or (None, skip_rows) if empty
|
|
1173
1376
|
"""
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
prompt=False,
|
|
1180
|
-
ignore_unnamed=True,
|
|
1181
|
-
worksheet_name=cis_sheet_name,
|
|
1182
|
-
warn_extra_headers=False,
|
|
1183
|
-
)
|
|
1184
|
-
if validator.data.empty:
|
|
1377
|
+
# Read the Excel file directly with pandas to preserve "N/A" as string
|
|
1378
|
+
pd = get_pandas()
|
|
1379
|
+
df = pd.read_excel(file_path, sheet_name=cis_sheet_name, header=None, keep_default_na=False)
|
|
1380
|
+
|
|
1381
|
+
if df.empty:
|
|
1185
1382
|
return None, skip_rows
|
|
1186
1383
|
|
|
1187
|
-
|
|
1384
|
+
# Find the row with "Control ID"
|
|
1385
|
+
control_id_row_idx = _find_control_id_row_index(df)
|
|
1386
|
+
if control_id_row_idx is None:
|
|
1387
|
+
logger.error("Could not find 'Control ID' in CIS worksheet")
|
|
1388
|
+
return None, skip_rows
|
|
1188
1389
|
|
|
1189
|
-
|
|
1190
|
-
|
|
1390
|
+
# Extract and merge the two header rows
|
|
1391
|
+
header_row = df.iloc[control_id_row_idx]
|
|
1392
|
+
sub_header_row = df.iloc[control_id_row_idx + 1]
|
|
1393
|
+
merged_headers = _merge_header_rows(header_row, sub_header_row)
|
|
1394
|
+
|
|
1395
|
+
# Get data starting from two rows after the main header row
|
|
1396
|
+
cis_df = df.iloc[control_id_row_idx + 2 :].reset_index(drop=True)
|
|
1397
|
+
cis_df.columns = merged_headers
|
|
1191
1398
|
cis_df.dropna(how="all", inplace=True)
|
|
1192
1399
|
cis_df.reset_index(drop=True, inplace=True)
|
|
1193
1400
|
|
|
1401
|
+
skip_rows = control_id_row_idx + 2
|
|
1402
|
+
|
|
1194
1403
|
return cis_df, skip_rows
|
|
1195
1404
|
|
|
1196
1405
|
|
|
@@ -1207,7 +1416,7 @@ def _extract_status(data_row) -> str:
|
|
|
1207
1416
|
"Implemented",
|
|
1208
1417
|
ControlImplementationStatus.PartiallyImplemented,
|
|
1209
1418
|
"Planned",
|
|
1210
|
-
|
|
1419
|
+
ALTERNATIVE_IMPLEMENTATION, # Use the correct constant
|
|
1211
1420
|
ControlImplementationStatus.NA,
|
|
1212
1421
|
]:
|
|
1213
1422
|
if data_row[col]:
|
|
@@ -108,7 +108,7 @@ class FedrampPoamIntegration(ScannerIntegration):
|
|
|
108
108
|
error_and_exit(FILE_PATH_ERROR)
|
|
109
109
|
self.workbook = self.workbook or load_workbook(filename=self.file_path, data_only=True, read_only=True)
|
|
110
110
|
self.poam_sheets = kwargs.get("poam_sheets") or [
|
|
111
|
-
sheet for sheet in self.workbook.sheetnames if re.search("POA&M Items", sheet)
|
|
111
|
+
sheet for sheet in self.workbook.sheetnames if re.search("POA&M Items|Configuration Findings", sheet)
|
|
112
112
|
]
|
|
113
113
|
except (FileNotFoundError, InvalidFileException, KeyError) as e:
|
|
114
114
|
logger.error(f"Failed to load workbook: {e}")
|
|
@@ -338,7 +338,9 @@ class FedrampPoamIntegration(ScannerIntegration):
|
|
|
338
338
|
yield from findings
|
|
339
339
|
|
|
340
340
|
if not poam_id or not poam_id.upper():
|
|
341
|
-
|
|
341
|
+
logger.debug(
|
|
342
|
+
f"Invalid POAM ID on row {index}, sheet {sheet}: weakness_name={weakness_name}, poam_id={poam_id}"
|
|
343
|
+
)
|
|
342
344
|
logger.warning(f"Invalid POAM ID on row {index}, sheet {sheet}. Skipping.")
|
|
343
345
|
yield from findings
|
|
344
346
|
|
|
@@ -403,6 +405,10 @@ class FedrampPoamIntegration(ScannerIntegration):
|
|
|
403
405
|
if not status_date:
|
|
404
406
|
continue
|
|
405
407
|
|
|
408
|
+
# Extract Controls field (Column B) for Configuration Findings
|
|
409
|
+
controls = val_mapping.get_value(data, "Controls")
|
|
410
|
+
affected_controls = str(controls) if controls else None
|
|
411
|
+
|
|
406
412
|
# Validate pluginText
|
|
407
413
|
finding = IntegrationFinding(
|
|
408
414
|
control_labels=[],
|
|
@@ -436,6 +442,7 @@ class FedrampPoamIntegration(ScannerIntegration):
|
|
|
436
442
|
risk_adjustment=self.determine_risk_adjustment(val_mapping.get_value(data, "Risk Adjustment")),
|
|
437
443
|
operational_requirements=str(val_mapping.get_value(data, "Operational Requirement")),
|
|
438
444
|
deviation_rationale=str(val_mapping.get_value(data, "Deviation Rationale")),
|
|
445
|
+
affected_controls=affected_controls,
|
|
439
446
|
poam_id=poam_id,
|
|
440
447
|
)
|
|
441
448
|
if finding.is_valid():
|
|
@@ -498,6 +505,24 @@ class FedrampPoamIntegration(ScannerIntegration):
|
|
|
498
505
|
asset_ids = val_mapping.get_value(data, ASSET_IDENTIFIER)
|
|
499
506
|
if not asset_ids:
|
|
500
507
|
return row_assets
|
|
508
|
+
|
|
509
|
+
# Skip rows where asset identifier contains date/description text (header rows)
|
|
510
|
+
asset_ids_str = str(asset_ids).lower()
|
|
511
|
+
if any(
|
|
512
|
+
keyword in asset_ids_str
|
|
513
|
+
for keyword in [
|
|
514
|
+
"date the weakness",
|
|
515
|
+
"aka discovery",
|
|
516
|
+
"permanent column",
|
|
517
|
+
"date of intended",
|
|
518
|
+
"last changed or closed",
|
|
519
|
+
"port/protocol",
|
|
520
|
+
"specified in the inventory",
|
|
521
|
+
]
|
|
522
|
+
):
|
|
523
|
+
logger.debug(f"Skipping row with header/description text in asset identifier: {str(asset_ids)[:100]}")
|
|
524
|
+
return row_assets
|
|
525
|
+
|
|
501
526
|
asset_id_list = self.gen_asset_list(asset_ids)
|
|
502
527
|
|
|
503
528
|
if not asset_id_list:
|
|
@@ -559,6 +584,9 @@ class FedrampPoamIntegration(ScannerIntegration):
|
|
|
559
584
|
return raw_type
|
|
560
585
|
|
|
561
586
|
for asset_id in asset_id_list:
|
|
587
|
+
# Handle long asset names
|
|
588
|
+
asset_name, asset_notes = self._handle_long_asset_name(asset_id)
|
|
589
|
+
|
|
562
590
|
# Get raw values and clean them
|
|
563
591
|
raw_values = {
|
|
564
592
|
"ip": asset_id if validate_ip_address(asset_id) else "",
|
|
@@ -571,8 +599,8 @@ class FedrampPoamIntegration(ScannerIntegration):
|
|
|
571
599
|
asset_type = determine_asset_type(asset_id, raw_values["type"])
|
|
572
600
|
|
|
573
601
|
res = IntegrationAsset(
|
|
574
|
-
name=
|
|
575
|
-
identifier=
|
|
602
|
+
name=asset_name, # Use shortened name if needed
|
|
603
|
+
identifier=asset_name, # Use shortened name as identifier
|
|
576
604
|
asset_type=asset_type, # Use determined asset type
|
|
577
605
|
asset_category=regscale_models.AssetCategory.Hardware,
|
|
578
606
|
parent_id=self.plan_id,
|
|
@@ -581,6 +609,7 @@ class FedrampPoamIntegration(ScannerIntegration):
|
|
|
581
609
|
ip_address=raw_values["ip"],
|
|
582
610
|
fqdn=raw_values["fqdn"],
|
|
583
611
|
mac_address=raw_values["mac"],
|
|
612
|
+
notes=asset_notes, # Store full name if truncated
|
|
584
613
|
date_last_updated=get_current_datetime(),
|
|
585
614
|
)
|
|
586
615
|
row_assets.append(res)
|
|
@@ -591,15 +620,52 @@ class FedrampPoamIntegration(ScannerIntegration):
|
|
|
591
620
|
|
|
592
621
|
return row_assets
|
|
593
622
|
|
|
623
|
+
def _handle_long_asset_name(self, asset_id: str, max_length: int = 450) -> tuple[str, str]:
|
|
624
|
+
"""
|
|
625
|
+
Handle asset names that exceed database field limits.
|
|
626
|
+
Generates a hash-based identifier for long names and preserves full name in notes.
|
|
627
|
+
|
|
628
|
+
:param str asset_id: The asset identifier
|
|
629
|
+
:param int max_length: Maximum allowed length (default: 450)
|
|
630
|
+
:return: Tuple of (shortened_name, notes)
|
|
631
|
+
:rtype: tuple[str, str]
|
|
632
|
+
"""
|
|
633
|
+
if len(asset_id) <= max_length:
|
|
634
|
+
return asset_id, ""
|
|
635
|
+
|
|
636
|
+
# Generate hash-based identifier
|
|
637
|
+
import hashlib
|
|
638
|
+
|
|
639
|
+
hash_suffix = hashlib.sha256(asset_id.encode()).hexdigest()[:8]
|
|
640
|
+
truncated = asset_id[: max_length - 9] # Leave room for underscore and hash
|
|
641
|
+
short_name = f"{truncated}_{hash_suffix}"
|
|
642
|
+
notes = f"Full identifier: {asset_id}"
|
|
643
|
+
|
|
644
|
+
logger.warning(f"Asset identifier exceeds {max_length} chars, truncated to: {short_name[:100]}...")
|
|
645
|
+
return short_name, notes
|
|
646
|
+
|
|
594
647
|
def gen_asset_list(self, asset_ids: str):
|
|
595
648
|
"""
|
|
596
649
|
Generate a list of asset identifiers from a string.
|
|
650
|
+
Handles multiple separator types: commas, semicolons, pipes, tabs, newlines, single/multiple spaces.
|
|
651
|
+
Also removes surrounding brackets that might wrap the list.
|
|
597
652
|
|
|
598
653
|
:param str asset_ids: The asset identifier string
|
|
599
654
|
:return: The list of asset identifiers
|
|
600
655
|
:rtype: List[str]
|
|
601
656
|
"""
|
|
602
|
-
|
|
657
|
+
# Remove surrounding brackets if present (handles cases like "[10.10.1.1 10.10.1.2]")
|
|
658
|
+
asset_ids = asset_ids.strip()
|
|
659
|
+
if asset_ids.startswith("[") and asset_ids.endswith("]"):
|
|
660
|
+
asset_ids = asset_ids[1:-1].strip()
|
|
661
|
+
|
|
662
|
+
# Split on: commas, semicolons, pipes, tabs, newlines, carriage returns, and ANY whitespace (including single spaces)
|
|
663
|
+
# Changed from \s{2,} to \s+ to handle single spaces between IPs
|
|
664
|
+
return [
|
|
665
|
+
aid.strip()
|
|
666
|
+
for aid in re.split(r"[,;\|\t\n\r]+|\s+", asset_ids)
|
|
667
|
+
if isinstance(aid, str) and aid.strip() and len(aid.strip()) > 0
|
|
668
|
+
]
|
|
603
669
|
|
|
604
670
|
@staticmethod
|
|
605
671
|
def empty(string: Optional[str]) -> Optional[str]:
|
|
@@ -625,9 +691,10 @@ class FedrampPoamIntegration(ScannerIntegration):
|
|
|
625
691
|
:return: The status (Open/Closed) or None
|
|
626
692
|
:rtype: Optional[str]
|
|
627
693
|
"""
|
|
628
|
-
|
|
694
|
+
sheet_lower = sheet.lower()
|
|
695
|
+
if "closed" in sheet_lower:
|
|
629
696
|
return "Closed"
|
|
630
|
-
elif "open" in
|
|
697
|
+
elif "open" in sheet_lower or "configuration findings" in sheet_lower:
|
|
631
698
|
return "Open"
|
|
632
699
|
return None
|
|
633
700
|
|
|
@@ -2283,12 +2283,27 @@ class ScannerIntegration(ABC):
|
|
|
2283
2283
|
def _create_property_safe(self, issue: regscale_models.Issue, key: str, value: str, property_type: str) -> None:
|
|
2284
2284
|
"""
|
|
2285
2285
|
Safely create a property with error handling.
|
|
2286
|
+
Validates that the issue has a valid ID before attempting to create the property.
|
|
2286
2287
|
|
|
2287
2288
|
:param regscale_models.Issue issue: The issue to create property for
|
|
2288
2289
|
:param str key: The property key
|
|
2289
2290
|
:param str value: The property value
|
|
2290
2291
|
:param str property_type: Description for logging purposes
|
|
2291
2292
|
"""
|
|
2293
|
+
# Validate that the issue has a valid ID, if not, create the issue
|
|
2294
|
+
if not issue or not issue.id or issue.id == 0:
|
|
2295
|
+
issue = issue.create_or_update()
|
|
2296
|
+
|
|
2297
|
+
# Validate that the issue has a valid ID, if not, skip the property creation
|
|
2298
|
+
if not issue or not issue.id or issue.id == 0:
|
|
2299
|
+
logger.debug(
|
|
2300
|
+
"Skipping %s creation: issue ID is invalid (issue=%s, id=%s)",
|
|
2301
|
+
property_type,
|
|
2302
|
+
"None" if not issue else "present",
|
|
2303
|
+
issue.id if issue else "N/A",
|
|
2304
|
+
)
|
|
2305
|
+
return
|
|
2306
|
+
|
|
2292
2307
|
try:
|
|
2293
2308
|
regscale_models.Property(
|
|
2294
2309
|
key=key,
|
|
@@ -2298,7 +2313,7 @@ class ScannerIntegration(ABC):
|
|
|
2298
2313
|
).create_or_update()
|
|
2299
2314
|
logger.debug("Added %s %s to issue %s", property_type, value, issue.id)
|
|
2300
2315
|
except Exception as e:
|
|
2301
|
-
logger.warning("Failed to create %s: %s", property_type, str(e))
|
|
2316
|
+
logger.warning("Failed to create %s for issue %s: %s", property_type, issue.id, str(e))
|
|
2302
2317
|
|
|
2303
2318
|
def _create_issue_milestones(
|
|
2304
2319
|
self,
|