regscale-cli 6.19.2.0__py3-none-any.whl → 6.20.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of regscale-cli might be problematic. Click here for more details.
- regscale/__init__.py +1 -1
- regscale/airflow/config.py +2 -0
- regscale/airflow/tasks/groups.py +11 -47
- regscale/core/app/internal/login.py +49 -43
- regscale/core/app/internal/model_editor.py +2 -1
- regscale/dev/code_gen.py +2 -5
- regscale/integrations/commercial/synqly/assets.py +26 -0
- regscale/integrations/public/fedramp/appendix_parser.py +499 -104
- regscale/integrations/public/fedramp/fedramp_cis_crm.py +5 -3
- regscale/integrations/public/fedramp/fedramp_five.py +89 -43
- regscale/models/integration_models/cisa_kev_data.json +277 -22
- regscale/models/integration_models/synqly_models/capabilities.json +1 -1
- regscale/models/regscale_models/__init__.py +7 -0
- regscale/models/regscale_models/business_impact_assessment.py +71 -0
- regscale/models/regscale_models/control_implementation.py +15 -0
- regscale/models/regscale_models/evidence.py +72 -4
- regscale/models/regscale_models/evidence_mapping.py +1 -1
- regscale/models/regscale_models/master_assessment.py +19 -0
- regscale/models/regscale_models/policy.py +90 -0
- regscale/models/regscale_models/question.py +30 -2
- regscale/models/regscale_models/questionnaire.py +4 -3
- regscale/models/regscale_models/questionnaire_instance.py +37 -14
- regscale/models/regscale_models/rbac.py +0 -1
- regscale/models/regscale_models/risk_trend.py +67 -0
- regscale/models/regscale_models/task.py +14 -1
- {regscale_cli-6.19.2.0.dist-info → regscale_cli-6.20.1.0.dist-info}/METADATA +114 -55
- {regscale_cli-6.19.2.0.dist-info → regscale_cli-6.20.1.0.dist-info}/RECORD +31 -28
- {regscale_cli-6.19.2.0.dist-info → regscale_cli-6.20.1.0.dist-info}/LICENSE +0 -0
- {regscale_cli-6.19.2.0.dist-info → regscale_cli-6.20.1.0.dist-info}/WHEEL +0 -0
- {regscale_cli-6.19.2.0.dist-info → regscale_cli-6.20.1.0.dist-info}/entry_points.txt +0 -0
- {regscale_cli-6.19.2.0.dist-info → regscale_cli-6.20.1.0.dist-info}/top_level.txt +0 -0
|
@@ -44,7 +44,30 @@ ORIGINATIONS = [
|
|
|
44
44
|
]
|
|
45
45
|
LOWER_ORIGINATIONS = [origin.lower() for origin in ORIGINATIONS]
|
|
46
46
|
DEFAULT_ORIGINATION = "Service Provider Corporate"
|
|
47
|
-
POSITIVE_KEYWORDS = [
|
|
47
|
+
POSITIVE_KEYWORDS = [
|
|
48
|
+
"yes",
|
|
49
|
+
"true",
|
|
50
|
+
"1",
|
|
51
|
+
"☒",
|
|
52
|
+
"True",
|
|
53
|
+
"Yes",
|
|
54
|
+
"☑",
|
|
55
|
+
"☑️",
|
|
56
|
+
"✓",
|
|
57
|
+
"✔",
|
|
58
|
+
"✔️",
|
|
59
|
+
"✅",
|
|
60
|
+
"⬜",
|
|
61
|
+
"▣",
|
|
62
|
+
"■",
|
|
63
|
+
"□",
|
|
64
|
+
"⊠",
|
|
65
|
+
"⊗",
|
|
66
|
+
"×",
|
|
67
|
+
"checked",
|
|
68
|
+
"selected",
|
|
69
|
+
"chosen",
|
|
70
|
+
]
|
|
48
71
|
|
|
49
72
|
# Define your keywords or phrases that map to each status
|
|
50
73
|
STATUS_KEYWORDS = {
|
|
@@ -125,23 +148,135 @@ class AppendixAParser:
|
|
|
125
148
|
|
|
126
149
|
@staticmethod
|
|
127
150
|
def determine_origination(text: str) -> Optional[str]:
|
|
151
|
+
"""
|
|
152
|
+
Determine the origination from the text. Multiple originations may be found and
|
|
153
|
+
returned as a comma-separated string.
|
|
154
|
+
|
|
155
|
+
:param str text: The text to analyze for origination values
|
|
156
|
+
:return: Comma-separated string of origination values or None if none found
|
|
157
|
+
:rtype: Optional[str]
|
|
158
|
+
"""
|
|
159
|
+
if CONTROL_ORIGIN_KEY not in text:
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
# Clean and standardize the text for processing
|
|
163
|
+
lower_text = AppendixAParser._clean_text_for_processing(text)
|
|
164
|
+
|
|
165
|
+
# Find all matching originations
|
|
166
|
+
found_originations = AppendixAParser._find_originations_in_text(lower_text)
|
|
167
|
+
|
|
168
|
+
if found_originations:
|
|
169
|
+
return ",".join(found_originations)
|
|
170
|
+
return None
|
|
171
|
+
|
|
172
|
+
@staticmethod
|
|
173
|
+
def _clean_text_for_processing(text: str) -> str:
|
|
174
|
+
"""
|
|
175
|
+
Clean and standardize text for processing.
|
|
176
|
+
|
|
177
|
+
:param str text: The text to clean
|
|
178
|
+
:return: Cleaned and standardized text
|
|
179
|
+
:rtype: str
|
|
180
|
+
"""
|
|
128
181
|
tokens = text.split()
|
|
129
182
|
rejoined_text = " ".join(tokens) # this removes any newlines or spaces
|
|
130
183
|
rejoined_text = rejoined_text.replace("( ", "(")
|
|
131
184
|
rejoined_text = rejoined_text.replace(" )", ")")
|
|
185
|
+
return rejoined_text.lower()
|
|
186
|
+
|
|
187
|
+
@staticmethod
|
|
188
|
+
def _find_originations_in_text(lower_text: str) -> List[str]:
|
|
189
|
+
"""
|
|
190
|
+
Find all originations in the text.
|
|
191
|
+
|
|
192
|
+
:param str lower_text: The lowercase text to search for originations
|
|
193
|
+
:return: List of found originations
|
|
194
|
+
:rtype: List[str]
|
|
195
|
+
"""
|
|
196
|
+
# Common checkbox characters in various fonts and styles
|
|
197
|
+
checkbox_chars = ["☒", "☑", "☑️", "✓", "✔", "✔️", "✅", "⬜", "▣", "■", "□", "⊠", "⊗", "×"]
|
|
198
|
+
|
|
199
|
+
found_originations = []
|
|
132
200
|
|
|
133
|
-
if CONTROL_ORIGIN_KEY not in text:
|
|
134
|
-
return None
|
|
135
201
|
for origin in ORIGINATIONS:
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
202
|
+
if AppendixAParser._check_origin_with_keywords(origin, lower_text):
|
|
203
|
+
found_originations.append(origin)
|
|
204
|
+
continue
|
|
205
|
+
|
|
206
|
+
if AppendixAParser._check_origin_with_checkbox_chars(origin, lower_text, checkbox_chars):
|
|
207
|
+
found_originations.append(origin)
|
|
208
|
+
continue
|
|
209
|
+
|
|
210
|
+
if AppendixAParser._check_origin_with_text_patterns(origin, lower_text):
|
|
211
|
+
found_originations.append(origin)
|
|
212
|
+
|
|
213
|
+
return found_originations
|
|
214
|
+
|
|
215
|
+
@staticmethod
|
|
216
|
+
def _check_origin_with_keywords(origin: str, lower_text: str) -> bool:
|
|
217
|
+
"""
|
|
218
|
+
Check if origin is indicated with known keywords.
|
|
219
|
+
|
|
220
|
+
:param str origin: The origin to check for
|
|
221
|
+
:param str lower_text: The text to search in
|
|
222
|
+
:return: True if origin is found with keywords, False otherwise
|
|
223
|
+
:rtype: bool
|
|
224
|
+
"""
|
|
225
|
+
for keyword in POSITIVE_KEYWORDS:
|
|
226
|
+
# Check with space between checkbox and origin
|
|
227
|
+
valid_option_with_space = f"{keyword} {origin}".lower()
|
|
228
|
+
# Check without space between checkbox and origin
|
|
229
|
+
valid_option_without_space = f"{keyword}{origin}".lower()
|
|
230
|
+
|
|
231
|
+
if valid_option_with_space in lower_text or valid_option_without_space in lower_text:
|
|
232
|
+
return True
|
|
233
|
+
return False
|
|
234
|
+
|
|
235
|
+
@staticmethod
|
|
236
|
+
def _check_origin_with_checkbox_chars(origin: str, lower_text: str, checkbox_chars: List[str]) -> bool:
|
|
237
|
+
"""
|
|
238
|
+
Check if origin is indicated with checkbox characters.
|
|
239
|
+
|
|
240
|
+
:param str origin: The origin to check for
|
|
241
|
+
:param str lower_text: The text to search in
|
|
242
|
+
:param List[str] checkbox_chars: List of checkbox characters to check for
|
|
243
|
+
:return: True if origin is found with checkbox characters, False otherwise
|
|
244
|
+
:rtype: bool
|
|
245
|
+
"""
|
|
246
|
+
for char in checkbox_chars:
|
|
247
|
+
# Check with and without space
|
|
248
|
+
if f"{char} {origin}".lower() in lower_text or f"{char}{origin}".lower() in lower_text:
|
|
249
|
+
return True
|
|
250
|
+
return False
|
|
251
|
+
|
|
252
|
+
@staticmethod
|
|
253
|
+
def _check_origin_with_text_patterns(origin: str, lower_text: str) -> bool:
|
|
254
|
+
"""
|
|
255
|
+
Check if origin is indicated with text patterns.
|
|
256
|
+
|
|
257
|
+
:param str origin: The origin to check for
|
|
258
|
+
:param str lower_text: The text to search in
|
|
259
|
+
:return: True if origin is found with text patterns, False otherwise
|
|
260
|
+
:rtype: bool
|
|
261
|
+
"""
|
|
262
|
+
# Look for patterns like "X is checked" or "X is selected"
|
|
263
|
+
check_patterns = [
|
|
264
|
+
f"{origin.lower()} is checked",
|
|
265
|
+
f"{origin.lower()} is selected",
|
|
266
|
+
f"{origin.lower()} (checked)",
|
|
267
|
+
f"{origin.lower()} (selected)",
|
|
268
|
+
f"selected: {origin.lower()}",
|
|
269
|
+
]
|
|
270
|
+
return any(pattern in lower_text for pattern in check_patterns)
|
|
142
271
|
|
|
143
272
|
@staticmethod
|
|
144
273
|
def determine_status(text: str) -> str:
|
|
274
|
+
"""
|
|
275
|
+
Determine the implementation status from the text.
|
|
276
|
+
:param str text: The text to analyze for implementation status
|
|
277
|
+
:return: The determined implementation status
|
|
278
|
+
:rtype: str
|
|
279
|
+
"""
|
|
145
280
|
# Tokenize the input text
|
|
146
281
|
tokens = text.split()
|
|
147
282
|
|
|
@@ -150,23 +285,54 @@ class AppendixAParser:
|
|
|
150
285
|
|
|
151
286
|
matches = []
|
|
152
287
|
|
|
288
|
+
# Common checkbox characters in various fonts and styles
|
|
289
|
+
checkbox_chars = ["☒", "☑", "☑️", "✓", "✔", "✔️", "✅", "⬜", "▣", "■", "□", "⊠", "⊗", "×"]
|
|
290
|
+
|
|
153
291
|
# Search for keywords in the tokenized text to determine the status
|
|
154
292
|
for status, keywords in STATUS_KEYWORDS.items():
|
|
155
293
|
for keyword in keywords:
|
|
156
|
-
|
|
294
|
+
# Check patterns with space: "1 keyword" or "☒ keyword" or any other checkbox char
|
|
295
|
+
if f"1 {keyword}" in token_string or any(
|
|
296
|
+
f"{char} {keyword}" in token_string for char in checkbox_chars
|
|
297
|
+
):
|
|
157
298
|
matches.append(status)
|
|
299
|
+
break
|
|
300
|
+
|
|
301
|
+
# Check patterns without space: "1keyword" or "☒keyword" or any other checkbox char
|
|
302
|
+
elif f"1{keyword}" in token_string or any(
|
|
303
|
+
f"{char}{keyword}" in token_string for char in checkbox_chars
|
|
304
|
+
):
|
|
305
|
+
matches.append(status)
|
|
306
|
+
break
|
|
307
|
+
|
|
308
|
+
# Also check for direct True/Yes values next to keywords
|
|
309
|
+
elif any(pos + keyword in token_string for pos in ["true", "yes"]):
|
|
310
|
+
matches.append(status)
|
|
311
|
+
break
|
|
158
312
|
|
|
159
313
|
# Determine the status to return
|
|
160
314
|
if len(matches) > 1:
|
|
161
315
|
# More than one match found
|
|
162
|
-
#
|
|
163
|
-
if
|
|
164
|
-
return
|
|
316
|
+
# Not applicable takes precedence over planned/partially implemented (only 2 valid multi select statuses for fedramp)
|
|
317
|
+
if NA_STATUS in matches:
|
|
318
|
+
return NA_STATUS
|
|
165
319
|
else:
|
|
166
320
|
return matches[0]
|
|
167
321
|
elif matches:
|
|
168
322
|
return matches[0] # Return the first match if only one
|
|
169
323
|
else:
|
|
324
|
+
# Extra fallback for unusual checkbox patterns
|
|
325
|
+
# Look for any checkbox-like character anywhere in the text without keywords
|
|
326
|
+
for status, keywords in STATUS_KEYWORDS.items():
|
|
327
|
+
for keyword in keywords:
|
|
328
|
+
# Skip the checkbox characters themselves (already checked above)
|
|
329
|
+
if keyword in checkbox_chars:
|
|
330
|
+
continue
|
|
331
|
+
|
|
332
|
+
# Check if any checkbox character is present in the text alongside common implementation terms
|
|
333
|
+
if any(char in token_string for char in checkbox_chars) and keyword in token_string:
|
|
334
|
+
return status
|
|
335
|
+
|
|
170
336
|
return DEFAULT_STATUS # No matches found
|
|
171
337
|
|
|
172
338
|
@staticmethod
|
|
@@ -192,28 +358,100 @@ class AppendixAParser:
|
|
|
192
358
|
:return: The state of the checkbox.
|
|
193
359
|
:rtype: bool
|
|
194
360
|
"""
|
|
195
|
-
#
|
|
361
|
+
# Try different methods to determine checkbox state
|
|
362
|
+
methods = [
|
|
363
|
+
AppendixAParser._check_direct_val_attribute,
|
|
364
|
+
AppendixAParser._check_checked_element,
|
|
365
|
+
AppendixAParser._check_default_element,
|
|
366
|
+
AppendixAParser._check_child_elements,
|
|
367
|
+
AppendixAParser._check_attributes,
|
|
368
|
+
AppendixAParser._check_namespace_attributes,
|
|
369
|
+
]
|
|
370
|
+
|
|
371
|
+
for method in methods:
|
|
372
|
+
result = method(checkbox_element)
|
|
373
|
+
if result is not None:
|
|
374
|
+
return result
|
|
375
|
+
|
|
376
|
+
# If none of the methods worked, return False
|
|
377
|
+
return False
|
|
378
|
+
|
|
379
|
+
@staticmethod
|
|
380
|
+
def _check_direct_val_attribute(element: Any) -> Optional[bool]:
|
|
381
|
+
"""Check if element has a direct 'val' attribute."""
|
|
196
382
|
val = "{%s}%s" % (SCHEMA, "val")
|
|
197
|
-
|
|
198
|
-
default = "{%s}%s" % (SCHEMA, "default")
|
|
199
|
-
state = checkbox_element.get(val)
|
|
383
|
+
state = element.get(val)
|
|
200
384
|
if state is not None:
|
|
201
385
|
return state == "1"
|
|
386
|
+
return None
|
|
202
387
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
if
|
|
206
|
-
|
|
207
|
-
|
|
388
|
+
@staticmethod
|
|
389
|
+
def _check_checked_element(element: Any) -> Optional[bool]:
|
|
390
|
+
"""Check if element has a 'checked' child with a 'val' attribute."""
|
|
391
|
+
val = "{%s}%s" % (SCHEMA, "val")
|
|
392
|
+
checked = "{%s}%s" % (SCHEMA, "checked")
|
|
393
|
+
return AppendixAParser._check_element_with_val(element, checked, val)
|
|
208
394
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
if
|
|
212
|
-
|
|
213
|
-
|
|
395
|
+
@staticmethod
|
|
396
|
+
def _check_default_element(element: Any) -> Optional[bool]:
|
|
397
|
+
"""Check if element has a 'default' child with a 'val' attribute."""
|
|
398
|
+
val = "{%s}%s" % (SCHEMA, "val")
|
|
399
|
+
default = "{%s}%s" % (SCHEMA, "default")
|
|
400
|
+
return AppendixAParser._check_element_with_val(element, default, val)
|
|
214
401
|
|
|
215
|
-
|
|
216
|
-
|
|
402
|
+
@staticmethod
|
|
403
|
+
def _check_element_with_val(parent: Any, child_tag: str, val_tag: str) -> Optional[bool]:
|
|
404
|
+
"""
|
|
405
|
+
Check if a child element has a 'val' attribute.
|
|
406
|
+
|
|
407
|
+
:param Any parent: The parent element
|
|
408
|
+
:param str child_tag: The child element tag
|
|
409
|
+
:param str val_tag: The value attribute tag
|
|
410
|
+
:return: True if val is "1", False if val is not "1", None if element or val not found
|
|
411
|
+
:rtype: Optional[bool]
|
|
412
|
+
"""
|
|
413
|
+
child_element = parent.find(child_tag)
|
|
414
|
+
if child_element is not None:
|
|
415
|
+
state = child_element.get(val_tag)
|
|
416
|
+
if state is not None:
|
|
417
|
+
return state == "1"
|
|
418
|
+
return None
|
|
419
|
+
|
|
420
|
+
@staticmethod
|
|
421
|
+
def _check_child_elements(element: Any) -> Optional[bool]:
|
|
422
|
+
"""Check all child elements for a 'val' attribute."""
|
|
423
|
+
val = "{%s}%s" % (SCHEMA, "val")
|
|
424
|
+
try:
|
|
425
|
+
for child in element.getchildren():
|
|
426
|
+
if child.get(val) is not None:
|
|
427
|
+
return child.get(val) == "1"
|
|
428
|
+
except (AttributeError, TypeError):
|
|
429
|
+
pass
|
|
430
|
+
return None
|
|
431
|
+
|
|
432
|
+
@staticmethod
|
|
433
|
+
def _check_attributes(element: Any) -> Optional[bool]:
|
|
434
|
+
"""Check all attributes for check-related names."""
|
|
435
|
+
try:
|
|
436
|
+
for attr_name, attr_value in element.attrib.items():
|
|
437
|
+
if "checked" in attr_name.lower() or "val" in attr_name.lower() or "state" in attr_name.lower():
|
|
438
|
+
return attr_value in ["1", "true", "checked", "on"]
|
|
439
|
+
except (AttributeError, TypeError):
|
|
440
|
+
pass
|
|
441
|
+
return None
|
|
442
|
+
|
|
443
|
+
@staticmethod
|
|
444
|
+
def _check_namespace_attributes(element: Any) -> Optional[bool]:
|
|
445
|
+
"""Check attributes in all namespaces."""
|
|
446
|
+
try:
|
|
447
|
+
for ns, uri in element.nsmap.items():
|
|
448
|
+
for attr_name in ["val", "checked", "state", "default"]:
|
|
449
|
+
attr_with_ns = "{%s}%s" % (uri, attr_name)
|
|
450
|
+
if element.get(attr_with_ns) is not None:
|
|
451
|
+
return element.get(attr_with_ns) in ["1", "true", "checked", "on"]
|
|
452
|
+
except (AttributeError, TypeError):
|
|
453
|
+
pass
|
|
454
|
+
return None
|
|
217
455
|
|
|
218
456
|
def get_implementation_statuses(self) -> Dict:
|
|
219
457
|
"""
|
|
@@ -268,25 +506,58 @@ class AppendixAParser:
|
|
|
268
506
|
:param Dict control_dict: The dictionary containing the control implementation data.
|
|
269
507
|
:param str check: The check string to exclude from the part value.
|
|
270
508
|
"""
|
|
509
|
+
part_list = control_dict.get("parts", [])
|
|
510
|
+
|
|
271
511
|
if cell_count > 1:
|
|
272
|
-
|
|
273
|
-
value = self.get_cell_text(cells[1])
|
|
274
|
-
part_list = control_dict.get("parts", [])
|
|
275
|
-
val_dict = {"name": name, "value": value}
|
|
276
|
-
if check not in value.lower() and val_dict not in part_list:
|
|
277
|
-
part_list.append(val_dict)
|
|
278
|
-
control_dict["parts"] = part_list
|
|
512
|
+
self._handle_multicolumn_part(cells, part_list, check)
|
|
279
513
|
else:
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
514
|
+
self._handle_single_column_part(cells[0], part_list, check)
|
|
515
|
+
|
|
516
|
+
control_dict["parts"] = part_list
|
|
517
|
+
|
|
518
|
+
def _handle_multicolumn_part(self, cells: Any, part_list: List, check: str):
|
|
519
|
+
"""
|
|
520
|
+
Handle a part with multiple columns.
|
|
521
|
+
|
|
522
|
+
:param Any cells: The cells in the row.
|
|
523
|
+
:param List part_list: List to add parts to.
|
|
524
|
+
:param str check: The check string to exclude from part value.
|
|
525
|
+
"""
|
|
526
|
+
name = self.get_cell_text(cells[0]) if cells[0].text else DEFAULT_PART
|
|
527
|
+
value = self.get_cell_text(cells[1])
|
|
528
|
+
val_dict = {"name": name, "value": value}
|
|
529
|
+
if check not in value.lower() and val_dict not in part_list:
|
|
530
|
+
part_list.append(val_dict)
|
|
531
|
+
|
|
532
|
+
def _handle_single_column_part(self, cell: Any, part_list: List, check: str):
|
|
533
|
+
"""
|
|
534
|
+
Handle a part with a single column.
|
|
535
|
+
|
|
536
|
+
:param Any cell: The cell to process.
|
|
537
|
+
:param List part_list: List to add parts to.
|
|
538
|
+
:param str check: The check string to exclude from part value.
|
|
539
|
+
"""
|
|
540
|
+
value = self.get_cell_text(cell)
|
|
541
|
+
value_lower = value.lower()
|
|
542
|
+
|
|
543
|
+
# Find part name using regex pattern
|
|
544
|
+
name = self._extract_part_name(value_lower)
|
|
545
|
+
|
|
546
|
+
val_dict = {"name": name, "value": value}
|
|
547
|
+
if check.lower() not in value_lower and val_dict not in part_list:
|
|
548
|
+
part_list.append(val_dict)
|
|
549
|
+
|
|
550
|
+
def _extract_part_name(self, text: str) -> str:
|
|
551
|
+
"""
|
|
552
|
+
Extract part name from text using regex.
|
|
553
|
+
|
|
554
|
+
:param str text: The text to extract from.
|
|
555
|
+
:return: The extracted part name or default part name.
|
|
556
|
+
:rtype: str
|
|
557
|
+
"""
|
|
558
|
+
pattern = re.compile(r"\b(" + "|".join(re.escape(part) for part in self.parts_set) + r")\b", re.IGNORECASE)
|
|
559
|
+
match = pattern.search(text)
|
|
560
|
+
return match.group(1) if match else DEFAULT_PART
|
|
290
561
|
|
|
291
562
|
def set_cell_text(self, cell: Any):
|
|
292
563
|
"""
|
|
@@ -330,6 +601,8 @@ class AppendixAParser:
|
|
|
330
601
|
self._handle_implementation_status()
|
|
331
602
|
self._handle_implementation_origination()
|
|
332
603
|
self._handle_implementation_statement()
|
|
604
|
+
# Comment out the implementation parts handling as it requires parameters not available in this context
|
|
605
|
+
# We'll rely on the handle_row_parts method to handle parts instead
|
|
333
606
|
# self._handle_implementation_parts(cell_index, cells)
|
|
334
607
|
self._handle_responsibility()
|
|
335
608
|
|
|
@@ -363,23 +636,44 @@ class AppendixAParser:
|
|
|
363
636
|
"""
|
|
364
637
|
Handle the origination of the control implementation.
|
|
365
638
|
"""
|
|
639
|
+
origination_values = []
|
|
640
|
+
|
|
641
|
+
# Check if we're in a Control Summary section and have Control Origination text
|
|
366
642
|
if (
|
|
367
|
-
self.
|
|
368
|
-
and any(
|
|
369
|
-
[self.score_similarity(self.cell_data_status.lower(), origin) > 90 for origin in LOWER_ORIGINATIONS]
|
|
370
|
-
)
|
|
371
|
-
and CONTROL_SUMMARY_KEY.lower() in self.header_row_text.lower()
|
|
643
|
+
CONTROL_SUMMARY_KEY.lower() in self.header_row_text.lower()
|
|
372
644
|
and CONTROL_ORIGIN_KEY.lower() in self.joined_processed_texts.lower()
|
|
373
|
-
and self.
|
|
645
|
+
and self.control_id in self.controls_implementations
|
|
646
|
+
and self.controls_implementations[self.control_id] is not None
|
|
374
647
|
):
|
|
375
|
-
|
|
648
|
+
# Method 1: Check cell_data for origination values based on checkbox states
|
|
649
|
+
for key, value in self.cell_data.items():
|
|
650
|
+
if value and any(origin.lower() in key.lower() for origin in ORIGINATIONS):
|
|
651
|
+
# Find the matching origination from the known list
|
|
652
|
+
for origin in ORIGINATIONS:
|
|
653
|
+
if origin.lower() in key.lower():
|
|
654
|
+
logger.debug(f"Found origination from checkbox: {origin}")
|
|
655
|
+
if origin not in origination_values:
|
|
656
|
+
origination_values.append(origin)
|
|
657
|
+
break
|
|
658
|
+
|
|
659
|
+
# Method 2: Try determine_origination as backup
|
|
660
|
+
if orig := self.determine_origination(self.joined_processed_texts):
|
|
661
|
+
logger.debug(f"Found origination from text: {orig}")
|
|
662
|
+
# Handle multiple comma-separated values in the determine_origination result
|
|
663
|
+
for origin in orig.split(","):
|
|
664
|
+
if origin.strip() and origin.strip() not in origination_values:
|
|
665
|
+
origination_values.append(origin.strip())
|
|
666
|
+
|
|
667
|
+
# Save all origination values as comma-delimited string
|
|
668
|
+
if origination_values:
|
|
376
669
|
control_dict = self.controls_implementations[self.control_id]
|
|
377
|
-
control_dict["origination"] =
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
if
|
|
381
|
-
|
|
382
|
-
|
|
670
|
+
control_dict["origination"] = ",".join(origination_values)
|
|
671
|
+
logger.debug(f"Setting origination for {self.control_id}: {control_dict['origination']}")
|
|
672
|
+
elif DEFAULT_ORIGINATION:
|
|
673
|
+
# Set default if none found
|
|
674
|
+
control_dict = self.controls_implementations[self.control_id]
|
|
675
|
+
control_dict["origination"] = DEFAULT_ORIGINATION
|
|
676
|
+
logger.debug(f"Setting default origination for {self.control_id}: {DEFAULT_ORIGINATION}")
|
|
383
677
|
|
|
384
678
|
def _handle_implementation_status(self):
|
|
385
679
|
"""
|
|
@@ -439,38 +733,80 @@ class AppendixAParser:
|
|
|
439
733
|
"""
|
|
440
734
|
value_check = f"{self.control_id} What is the solution and how is it implemented?"
|
|
441
735
|
generic_value_check = "What is the solution and how is it implemented".lower()
|
|
442
|
-
|
|
736
|
+
|
|
737
|
+
# Skip processing if conditions aren't met
|
|
738
|
+
if not self._should_process_parts(value_check, generic_value_check):
|
|
739
|
+
return
|
|
740
|
+
|
|
741
|
+
part_value = self.joined_processed_texts.strip()
|
|
742
|
+
control_dict = self.controls_implementations.get(self.control_id, {})
|
|
743
|
+
part_list = control_dict.get("parts", [])
|
|
744
|
+
|
|
745
|
+
# Check if this is a part declaration
|
|
746
|
+
if not self._is_part_declaration(part_value):
|
|
747
|
+
return
|
|
748
|
+
|
|
749
|
+
part_name = part_value.strip() or DEFAULT_PART
|
|
750
|
+
part_value = self._combine_part_text(part_name, part_value, cell_index, cells)
|
|
751
|
+
|
|
752
|
+
# Build the part dictionary
|
|
753
|
+
self.build_part_dict(
|
|
754
|
+
part_name=part_name,
|
|
755
|
+
part_value=part_value,
|
|
756
|
+
control_dict=control_dict,
|
|
757
|
+
part_list=part_list,
|
|
758
|
+
generic_value_check=generic_value_check,
|
|
759
|
+
)
|
|
760
|
+
|
|
761
|
+
def _should_process_parts(self, value_check: str, generic_value_check: str) -> bool:
|
|
762
|
+
"""
|
|
763
|
+
Determine if parts processing should continue.
|
|
764
|
+
|
|
765
|
+
:param str value_check: Value check string for this specific control
|
|
766
|
+
:param str generic_value_check: Generic value check string
|
|
767
|
+
:return: True if processing should continue, False otherwise
|
|
768
|
+
:rtype: bool
|
|
769
|
+
"""
|
|
770
|
+
return (
|
|
443
771
|
generic_value_check in self.header_row_text.lower()
|
|
444
772
|
and value_check.lower() != self.joined_processed_texts.lower()
|
|
445
773
|
and self.control_id in self.controls_implementations
|
|
446
|
-
)
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
def _is_part_declaration(self, part_value: str) -> bool:
|
|
777
|
+
"""
|
|
778
|
+
Check if the value is a part declaration.
|
|
779
|
+
|
|
780
|
+
:param str part_value: The value to check
|
|
781
|
+
:return: True if it's a part declaration, False otherwise
|
|
782
|
+
:rtype: bool
|
|
783
|
+
"""
|
|
784
|
+
return any(
|
|
785
|
+
[
|
|
786
|
+
part_value.strip().lower() == p.lower() or part_value.strip().lower() == f"{p.lower()}:"
|
|
787
|
+
for p in self.parts
|
|
788
|
+
]
|
|
789
|
+
)
|
|
790
|
+
|
|
791
|
+
def _combine_part_text(self, part_name: str, part_value: str, cell_index: int, cells: Any) -> str:
|
|
792
|
+
"""
|
|
793
|
+
Combine part text from potentially multiple cells.
|
|
794
|
+
|
|
795
|
+
:param str part_name: Name of the part
|
|
796
|
+
:param str part_value: Current value text
|
|
797
|
+
:param int cell_index: Current cell index
|
|
798
|
+
:param Any cells: All cells in the row
|
|
799
|
+
:return: Combined part text
|
|
800
|
+
:rtype: str
|
|
801
|
+
"""
|
|
802
|
+
next_cell_text = self.get_cell_text(cells[cell_index + 1])
|
|
803
|
+
|
|
804
|
+
if ":" not in part_value:
|
|
805
|
+
# If part_value doesn't have a colon, add the next cell's text after a colon
|
|
806
|
+
return ": ".join([part_value.strip(), next_cell_text.strip()])
|
|
807
|
+
else:
|
|
808
|
+
# If part_value already has a colon, just add the next cell's text
|
|
809
|
+
return " ".join([part_value.strip(), next_cell_text.strip()])
|
|
474
810
|
|
|
475
811
|
def build_part_dict(
|
|
476
812
|
self, part_name: str, part_value: str, control_dict: Dict, part_list: List, generic_value_check: str
|
|
@@ -484,24 +820,42 @@ class AppendixAParser:
|
|
|
484
820
|
:param str generic_value_check: The generic value check string.
|
|
485
821
|
"""
|
|
486
822
|
if part_value.lower().startswith("part"):
|
|
487
|
-
|
|
488
|
-
part_dict = {"name": part_name, "value": DEFAULT_PART}
|
|
489
|
-
if len(parts) == 2 and parts[1].strip() != "":
|
|
490
|
-
part_dict["name"] = parts[0].strip()
|
|
491
|
-
part_dict["value"] = parts[1].strip()
|
|
492
|
-
logger.debug(f"Part: {part_dict}")
|
|
493
|
-
self.add_to_list(new_dict=part_dict, the_list=part_list)
|
|
494
|
-
elif part_value.strip() != "" and generic_value_check not in part_value.lower():
|
|
495
|
-
part_dict["value"] = part_value.strip()
|
|
496
|
-
self.add_to_list(new_dict=part_dict, the_list=part_list)
|
|
823
|
+
self._handle_part_value_starting_with_part(part_name, part_value, part_list, generic_value_check)
|
|
497
824
|
elif generic_value_check not in part_value.lower():
|
|
825
|
+
# For values that don't start with "part" but are valid
|
|
498
826
|
pdict = {
|
|
499
827
|
"name": DEFAULT_PART,
|
|
500
828
|
"value": part_value.strip(),
|
|
501
829
|
}
|
|
502
830
|
self.add_to_list(new_dict=pdict, the_list=part_list)
|
|
831
|
+
|
|
503
832
|
control_dict["parts"] = part_list
|
|
504
833
|
|
|
834
|
+
def _handle_part_value_starting_with_part(
|
|
835
|
+
self, part_name: str, part_value: str, part_list: List, generic_value_check: str
|
|
836
|
+
):
|
|
837
|
+
"""
|
|
838
|
+
Handle part values that start with "part".
|
|
839
|
+
|
|
840
|
+
:param str part_name: The name of the part
|
|
841
|
+
:param str part_value: The value of the part
|
|
842
|
+
:param List part_list: The list to add parts to
|
|
843
|
+
:param str generic_value_check: The generic value check string
|
|
844
|
+
"""
|
|
845
|
+
parts = part_value.split(":", 1)
|
|
846
|
+
part_dict = {"name": part_name, "value": DEFAULT_PART}
|
|
847
|
+
|
|
848
|
+
if len(parts) == 2 and parts[1].strip() != "":
|
|
849
|
+
# If part value has a colon and content after it
|
|
850
|
+
part_dict["name"] = parts[0].strip()
|
|
851
|
+
part_dict["value"] = parts[1].strip()
|
|
852
|
+
logger.debug(f"Part: {part_dict}")
|
|
853
|
+
self.add_to_list(new_dict=part_dict, the_list=part_list)
|
|
854
|
+
elif part_value.strip() != "" and generic_value_check not in part_value.lower():
|
|
855
|
+
# If part value has no colon but is not empty and not the generic check
|
|
856
|
+
part_dict["value"] = part_value.strip()
|
|
857
|
+
self.add_to_list(new_dict=part_dict, the_list=part_list)
|
|
858
|
+
|
|
505
859
|
@staticmethod
|
|
506
860
|
def add_to_list(new_dict: Dict, the_list: List):
|
|
507
861
|
"""
|
|
@@ -530,14 +884,55 @@ class AppendixAParser:
|
|
|
530
884
|
"""
|
|
531
885
|
Handle the checkbox states in the DOCX table.
|
|
532
886
|
"""
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
887
|
+
try:
|
|
888
|
+
# Get checkbox states
|
|
889
|
+
updated_checkbox_states = []
|
|
890
|
+
for checkbox in self.checkbox_states:
|
|
891
|
+
try:
|
|
892
|
+
is_checked = self._get_checkbox_state(checkbox)
|
|
893
|
+
updated_checkbox_states.append(is_checked)
|
|
894
|
+
logger.debug(f"Checkbox state: {is_checked}")
|
|
895
|
+
except Exception as e:
|
|
896
|
+
# If we can't determine the state, assume it's not checked
|
|
897
|
+
logger.debug(f"Error getting checkbox state: {e}")
|
|
898
|
+
updated_checkbox_states.append(False)
|
|
899
|
+
|
|
900
|
+
# Log total checkboxes found
|
|
901
|
+
logger.debug(f"Found {len(updated_checkbox_states)} checkbox states: {updated_checkbox_states}")
|
|
902
|
+
|
|
903
|
+
# First handle any dictionary items in processed_texts
|
|
904
|
+
for item in self.processed_texts:
|
|
905
|
+
if isinstance(item, dict):
|
|
906
|
+
self.cell_data.update(item)
|
|
907
|
+
|
|
908
|
+
# Handle text items with corresponding checkbox states
|
|
909
|
+
text_items = [item for item in self.processed_texts if not isinstance(item, dict)]
|
|
910
|
+
|
|
911
|
+
# Match checkbox states to text items
|
|
912
|
+
for i, item in enumerate(text_items):
|
|
913
|
+
if i < len(updated_checkbox_states):
|
|
914
|
+
self.cell_data[item.strip()] = updated_checkbox_states[i]
|
|
915
|
+
else:
|
|
916
|
+
# If we have more text items than checkbox states, assume unchecked
|
|
917
|
+
self.cell_data[item.strip()] = False
|
|
918
|
+
|
|
919
|
+
# Also check for checkbox character directly in text
|
|
920
|
+
for key in list(self.cell_data.keys()):
|
|
921
|
+
# If text contains a checkbox character and state is False, try to determine true state from text
|
|
922
|
+
if not self.cell_data[key]:
|
|
923
|
+
checkbox_chars = ["☒", "☑", "☑️", "✓", "✔", "✔️", "✅", "⬜", "▣", "■", "□", "⊠", "⊗", "×"]
|
|
924
|
+
if any(char in key for char in checkbox_chars):
|
|
925
|
+
self.cell_data[key] = True
|
|
926
|
+
|
|
927
|
+
# Update cell data status
|
|
539
928
|
self._get_cell_data_status()
|
|
540
929
|
|
|
930
|
+
except Exception as e:
|
|
931
|
+
logger.debug(f"Error in _handle_checkbox_states: {e}")
|
|
932
|
+
# Ensure we don't leave checkbox_states empty
|
|
933
|
+
if not hasattr(self, "cell_data") or self.cell_data is None:
|
|
934
|
+
self.cell_data = {}
|
|
935
|
+
|
|
541
936
|
def _get_cell_data_status(self):
|
|
542
937
|
"""
|
|
543
938
|
Get the status of the cell data.
|