regscale-cli 6.24.0.1__py3-none-any.whl → 6.25.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of regscale-cli might be problematic. Click here for more details.

Files changed (31) hide show
  1. regscale/_version.py +1 -1
  2. regscale/core/app/api.py +1 -1
  3. regscale/core/app/application.py +5 -3
  4. regscale/core/app/internal/evidence.py +308 -202
  5. regscale/dev/code_gen.py +84 -3
  6. regscale/integrations/commercial/__init__.py +2 -0
  7. regscale/integrations/commercial/jira.py +4 -4
  8. regscale/integrations/commercial/microsoft_defender/defender.py +326 -5
  9. regscale/integrations/commercial/microsoft_defender/defender_api.py +348 -14
  10. regscale/integrations/commercial/microsoft_defender/defender_constants.py +157 -0
  11. regscale/integrations/commercial/synqly/assets.py +99 -16
  12. regscale/integrations/commercial/synqly/query_builder.py +533 -0
  13. regscale/integrations/commercial/synqly/vulnerabilities.py +134 -14
  14. regscale/integrations/commercial/wizv2/compliance_report.py +22 -0
  15. regscale/integrations/compliance_integration.py +17 -0
  16. regscale/integrations/scanner_integration.py +16 -0
  17. regscale/models/integration_models/synqly_models/capabilities.json +1 -1
  18. regscale/models/integration_models/synqly_models/connectors/vulnerabilities.py +12 -2
  19. regscale/models/integration_models/synqly_models/filter_parser.py +332 -0
  20. regscale/models/integration_models/synqly_models/synqly_model.py +47 -3
  21. regscale/models/regscale_models/compliance_settings.py +28 -0
  22. regscale/models/regscale_models/component.py +1 -0
  23. regscale/models/regscale_models/control_implementation.py +130 -1
  24. regscale/regscale.py +1 -1
  25. regscale/validation/record.py +23 -1
  26. {regscale_cli-6.24.0.1.dist-info → regscale_cli-6.25.0.1.dist-info}/METADATA +1 -1
  27. {regscale_cli-6.24.0.1.dist-info → regscale_cli-6.25.0.1.dist-info}/RECORD +31 -29
  28. {regscale_cli-6.24.0.1.dist-info → regscale_cli-6.25.0.1.dist-info}/LICENSE +0 -0
  29. {regscale_cli-6.24.0.1.dist-info → regscale_cli-6.25.0.1.dist-info}/WHEEL +0 -0
  30. {regscale_cli-6.24.0.1.dist-info → regscale_cli-6.25.0.1.dist-info}/entry_points.txt +0 -0
  31. {regscale_cli-6.24.0.1.dist-info → regscale_cli-6.25.0.1.dist-info}/top_level.txt +0 -0
@@ -11,21 +11,23 @@ import os
11
11
  import shutil
12
12
  import zipfile
13
13
  from datetime import datetime
14
+ from logging import getLogger
15
+ from pathlib import Path
14
16
  from typing import Tuple
15
17
 
16
18
  import click # type: ignore
17
19
  import pdfplumber # type: ignore
18
20
  from docx import Document # type: ignore
19
- from pathlib import Path
20
21
  from rich.progress import Progress, TaskID
21
22
 
22
23
  from regscale.core.app.api import Api
23
24
  from regscale.core.app.application import Application
24
- from regscale.core.app.logz import create_logger
25
25
  from regscale.core.app.utils.app_utils import check_file_path, create_progress_object, error_and_exit
26
26
  from regscale.models.app_models.click import regscale_ssp_id
27
27
  from regscale.models.regscale_models import Assessment, File, Project, SecurityPlan
28
28
 
29
+ logger = getLogger("regscale")
30
+
29
31
 
30
32
  @click.group()
31
33
  def evidence():
@@ -64,74 +66,60 @@ def run_evidence_collection():
64
66
  app = Application()
65
67
  api = Api()
66
68
  config = app.config
69
+
70
+ logger.info("Starting evidence collection process")
71
+
67
72
  check_file_path("./static")
68
73
  progress = create_progress_object()
69
74
  with progress:
70
- task0 = progress.add_task("[white]Setting evidence folder directory variables...", total=3)
75
+ task1 = progress.add_task("[white]Initializing evidence collection...", total=4)
71
76
  # call function to define variable for use outside of function
72
77
  evidence_folder, dir_name, new_cwd = set_directory_variables(
73
- task=task0, evidence_folder=config["evidenceFolder"], progress=progress
78
+ task=task1, evidence_folder=config["evidenceFolder"], progress=progress
74
79
  )
75
80
 
76
- task1 = progress.add_task("[white]Building a required documents list from config.json...", total=3)
77
81
  # call function to define variable for use outside of function
78
82
  required_docs, document_list = parse_required_docs(
79
83
  evidence_folder=evidence_folder, task=task1, progress=progress
80
84
  )
81
85
 
82
- task2 = progress.add_task("[white]Calculating files last modified times...", total=5)
83
86
  # call function to define variable for use outside of function
84
- times = get_doc_timestamps(evidence_folder=new_cwd, directory=dir_name, task=task2, progress=progress)
87
+ times = get_doc_timestamps(evidence_folder=new_cwd, directory=dir_name, task=task1, progress=progress)
85
88
 
86
- task3 = progress.add_task("[white]Building a required texts list from config.json...", total=3)
87
89
  # call function to define variable for use outside of function
88
- texts = set_required_texts(evidence_folder=evidence_folder, task=task3, progress=progress)
89
-
90
- task4 = progress.add_task("[white]Searching evidence folder for required files...", total=4)
90
+ texts = set_required_texts(evidence_folder=evidence_folder, task=task1, progress=progress)
91
91
 
92
92
  # call function to define variable for use outside of function
93
- folders = find_required_files_in_folder(evidence_folder=new_cwd, task=task4, progress=progress)
93
+ folders = find_required_files_in_folder(evidence_folder=new_cwd, task=task1, progress=progress)
94
94
 
95
- task5 = progress.add_task("[white]Searching for digital signatures in documents...", total=2)
95
+ task2 = progress.add_task("[white]Analyzing documents and content...", total=6)
96
96
 
97
97
  # call function to define variable for use outside of function
98
98
  sig_results = signature_assessment_results(
99
- directory=folders, r_docs=required_docs, task=task5, progress=progress
99
+ directory=folders, r_docs=required_docs, task=task2, progress=progress
100
100
  )
101
101
 
102
- task6 = progress.add_task("[white]Testing if required documents are present...", total=2)
103
-
104
102
  # call function to define variable for use outside of function
105
103
  doc_results = document_assessment_results(
106
- directory=folders, documents=document_list, task=task6, progress=progress
104
+ directory=folders, documents=document_list, task=task2, progress=progress
107
105
  )
108
106
 
109
- task7 = progress.add_task("[white]Extracting texts from required files...", total=4)
110
-
111
107
  # call function to define variable for use outside of function
112
- file_texts = parse_required_text_from_files(evidence_folder=new_cwd, task=task7, progress=progress)
113
-
114
- task8 = progress.add_task("[white]Searching for required text in parsed documents...", total=2)
108
+ file_texts = parse_required_text_from_files(evidence_folder=new_cwd, task=task2, progress=progress)
115
109
 
116
110
  # call function to define variable for use outside of function
117
- search_results = text_string_search(f_texts=file_texts, req_texts=texts, task=task8, progress=progress)
118
-
119
- task9 = progress.add_task("[white]Testing if required texts are present", total=2)
111
+ search_results = text_string_search(f_texts=file_texts, req_texts=texts, task=task2, progress=progress)
120
112
 
121
113
  # call function to define variable for use outside of function
122
- text_results = text_assessment_results(searches=search_results, r_texts=texts, task=task9, progress=progress)
114
+ text_results = text_assessment_results(searches=search_results, r_texts=texts, task=task2, progress=progress)
123
115
 
124
- task10 = progress.add_task("[white]Retrieving data from the evidence test projects...", total=3)
116
+ task3 = progress.add_task("[white]Processing assessment data...", total=4)
125
117
 
126
118
  # call function to define variable for use outside of function
127
- data = gather_test_project_data(api=api, evidence_folder=evidence_folder, task=task10, progress=progress)
128
-
129
- task11 = progress.add_task("[white]Testing file modification times...", total=2)
119
+ data = gather_test_project_data(api=api, evidence_folder=evidence_folder, task=task3, progress=progress)
130
120
 
131
121
  # call function to define variable to use outside of function
132
- time_results = assess_doc_timestamps(timestamps=times, documents=required_docs, task=task11, progress=progress)
133
-
134
- task12 = progress.add_task("[white]Building assessment report...", total=4)
122
+ time_results = assess_doc_timestamps(timestamps=times, documents=required_docs, task=task3, progress=progress)
135
123
 
136
124
  # call function to define variable to use outside of function
137
125
  report = assessments_report(
@@ -139,32 +127,66 @@ def run_evidence_collection():
139
127
  textres=text_results,
140
128
  timeres=time_results,
141
129
  sigres=sig_results,
142
- task=task12,
130
+ task=task3,
143
131
  progress=progress,
144
132
  )
145
133
 
146
- task13 = progress.add_task("[white]Building assessment results dataframe...", total=4)
147
-
148
134
  # call function to define variable to use outside of function
149
- results = build_assessment_dataframe(assessments=report, task=task13, progress=progress)
150
-
151
- task14 = progress.add_task("[white]Calculating assessment score...", total=1)
135
+ results = build_assessment_dataframe(assessments=report, task=task3, progress=progress)
152
136
 
153
137
  # call function to define variable for use outside of function
154
- score_data = build_score_data(assessments=results, task=task14, progress=progress)
155
-
156
- task15 = progress.add_task("[white]Building a table for the assessment report...", total=4)
138
+ score_data = build_score_data(assessments=results, task=task3, progress=progress)
157
139
 
158
140
  # call function to define variable for use outside of function
159
- html_output = build_html_table(assessments=report, task=task15, progress=progress)
160
-
161
- task16 = progress.add_task("[white]Creating child assessment based on test results...", total=2)
141
+ html_output = build_html_table(assessments=report, task=task3, progress=progress)
162
142
 
163
143
  # call function to create child assessment via POST request
164
144
  create_child_assessments(
165
- api=api, project_data=data, output=html_output, score_data=score_data, task=task16, progress=progress
145
+ api=api, project_data=data, output=html_output, score_data=score_data, task=task3, progress=progress
166
146
  )
167
147
 
148
+ # Display collected files summary
149
+ display_collected_files(folders, evidence_folder)
150
+
151
+
152
+ def display_collected_files(folders: list[dict], evidence_folder: str) -> None:
153
+ """
154
+ Display a summary of collected files to the user
155
+
156
+ :param list[dict] folders: List of files found in evidence folder
157
+ :param str evidence_folder: Path to evidence folder
158
+ :rtype: None
159
+ """
160
+ if not folders:
161
+ logger.info("No files were collected from the evidence folder.")
162
+ return
163
+
164
+ logger.info("=" * 60)
165
+ logger.info("EVIDENCE COLLECTION SUMMARY")
166
+ logger.info("=" * 60)
167
+ logger.info(f"Evidence folder: {evidence_folder}")
168
+ logger.info(f"Total files collected: {len(folders)}")
169
+ logger.info("")
170
+
171
+ # Group files by program/folder
172
+ programs = {}
173
+ for file_info in folders:
174
+ program = file_info.get("program", "unknown")
175
+ filename = file_info.get("file", "unknown")
176
+ if program not in programs:
177
+ programs[program] = []
178
+ programs[program].append(filename)
179
+
180
+ # Display files by program
181
+ for program, files in programs.items():
182
+ logger.info(f"Program: {program}")
183
+ logger.info("-" * 40)
184
+ for file in sorted(files):
185
+ logger.info(f" • {file}")
186
+ logger.info("")
187
+
188
+ logger.info("=" * 60)
189
+
168
190
 
169
191
  def package_builder(ssp_id: int, path: Path):
170
192
  """Function to build a directory of evidence and produce a zip file for extraction and use
@@ -374,7 +396,7 @@ def remove_directory(directory_path: Path) -> None:
374
396
  :rtype: None
375
397
  """
376
398
  shutil.rmtree(directory_path.absolute())
377
- create_logger().info("Temporary Evidence directory removed successfully!")
399
+ logger.info("Temporary Evidence directory removed successfully!")
378
400
 
379
401
 
380
402
  def zip_folder(folder_path: Path, zip_path: Path) -> None:
@@ -397,7 +419,7 @@ def zip_folder(folder_path: Path, zip_path: Path) -> None:
397
419
  # Add the file to the ZIP archive using its relative path
398
420
  zipf.write(file_path, relative_path) # type: ignore
399
421
 
400
- create_logger().info("Folder zipped successfully!")
422
+ logger.info("Folder zipped successfully!")
401
423
 
402
424
 
403
425
  def remove(list_to_review: list) -> list:
@@ -462,7 +484,6 @@ def find_signatures(file: str) -> int:
462
484
  import pymupdf
463
485
 
464
486
  number = 0
465
- logger = create_logger()
466
487
  # if the file is a pdf document
467
488
  if file.endswith(".pdf"):
468
489
  try:
@@ -507,20 +528,30 @@ def set_directory_variables(task: TaskID, evidence_folder: str, progress: Progre
507
528
  # set evidence folder variable to init.yaml value
508
529
  # if evidence folder does not exist then create it so tests will pass
509
530
  check_file_path(evidence_folder)
531
+
510
532
  # if evidence folder does not exist or if it is empty then error out
511
- if evidence_folder is None or len(os.listdir(evidence_folder)) <= 1:
533
+ evidence_items = os.listdir(evidence_folder)
534
+
535
+ if evidence_folder is None or len(evidence_items) == 0:
512
536
  error_and_exit("The directory set to evidenceFolder cannot be found or is empty.")
513
537
  else:
514
538
  # otherwise change directory to the evidence folder
515
539
  os.chdir(evidence_folder)
516
540
  progress.update(task, advance=1)
517
- # include RegScale projects folder
518
- dir_name = [filename for filename in os.listdir(os.getcwd()) if os.path.isdir(os.path.join(os.getcwd(), filename))][
519
- 0
520
- ]
521
- progress.update(task, advance=1)
522
- # pick up subdirectory under the evidence folder
523
- new_cwd = os.getcwd() + os.sep + dir_name
541
+
542
+ # include RegScale projects folder or use current directory if no subdirs
543
+ subdirs = [filename for filename in os.listdir(os.getcwd()) if os.path.isdir(os.path.join(os.getcwd(), filename))]
544
+
545
+ if subdirs:
546
+ # Prefer 'project' directory if it exists, otherwise use the first one
547
+ if "project" in subdirs:
548
+ dir_name = "project"
549
+ else:
550
+ dir_name = subdirs[0]
551
+ new_cwd = os.getcwd() + os.sep + dir_name
552
+ else:
553
+ dir_name = "evidence"
554
+ new_cwd = os.getcwd()
524
555
  progress.update(task, advance=1)
525
556
  # return variables for use outside local scope
526
557
  return evidence_folder, dir_name, new_cwd
@@ -543,23 +574,41 @@ def parse_required_docs(evidence_folder: str, task: TaskID, progress: Progress)
543
574
  document_list = set()
544
575
  progress.update(task, advance=1)
545
576
  # open app//evidence//config.json file and read contents
546
- with open(f"{evidence_folder}{os.sep}config.json", "r", encoding="utf-8") as json_file:
547
- # load json object into a readable dictionary
548
- rules = json.load(json_file)
577
+ config_file = f"{evidence_folder}{os.sep}config.json"
578
+ if os.path.exists(config_file):
579
+ with open(config_file, "r", encoding="utf-8") as json_file:
580
+ # load json object into a readable dictionary
581
+ rules = json.load(json_file)
582
+ progress.update(task, advance=1)
583
+ # loop through required document dicts
584
+ for i in range(len(rules.get("required-documents", []))):
585
+ # add to a list of dictionaries for parsing
586
+ required_docs.append(
587
+ {
588
+ "file-name": rules["required-documents"][i].get("file-name"),
589
+ "last-updated-by": rules["required-documents"][i].get("last-updated-by"),
590
+ "signatures-required": rules["required-documents"][i].get("signatures-required"),
591
+ "signature-count": rules["required-documents"][i].get("signature-count"),
592
+ }
593
+ )
594
+ # update contents of list if it does not already exist
595
+ document_list.add(rules["required-documents"][i].get("file-name"))
596
+ else:
597
+ # No config file, use default requirements for any files found
549
598
  progress.update(task, advance=1)
550
- # loop through required document dicts
551
- for i in range(len(rules["required-documents"])):
552
- # add to a list of dictionaries for parsing
553
- required_docs.append(
554
- {
555
- "file-name": rules["required-documents"][i].get("file-name"),
556
- "last-updated-by": rules["required-documents"][i].get("last-updated-by"),
557
- "signatures-required": rules["required-documents"][i].get("signatures-required"),
558
- "signature-count": rules["required-documents"][i].get("signature-count"),
559
- }
560
- )
561
- # update contents of list if it does not already exist
562
- document_list.add(rules["required-documents"][i].get("file-name"))
599
+ # Get all files in evidence folder and subfolders
600
+ for root, dirs, files in os.walk(evidence_folder):
601
+ for file in files:
602
+ if not file.startswith(".") and file.lower().endswith((".pdf", ".docx", ".doc", ".txt")):
603
+ required_docs.append(
604
+ {
605
+ "file-name": file,
606
+ "last-updated-by": 365,
607
+ "signatures-required": False,
608
+ "signature-count": 0,
609
+ }
610
+ )
611
+ document_list.add(file)
563
612
  progress.update(task, advance=1)
564
613
  # return variables for use outside of local scope
565
614
  return required_docs, document_list
@@ -585,20 +634,37 @@ def get_doc_timestamps(evidence_folder: str, directory: str, task: TaskID, progr
585
634
  # remove any child folders that start with '.'
586
635
  new_folders = remove(list_to_review=folders_list)
587
636
  progress.update(task, advance=1)
588
- # loop through directory listing
589
- for folder in new_folders:
590
- # get list of files in each folder
591
- filelist = os.listdir(os.path.join(evidence_folder, folder))
592
- # remove any files that start with '.'
593
- remove(list_to_review=filelist)
594
- # loop through list of files in each folder
637
+
638
+ # Check if there are subdirectories
639
+ subdirs = [f for f in new_folders if os.path.isdir(os.path.join(evidence_folder, f))]
640
+
641
+ if subdirs:
642
+ # loop through directory listing
643
+ for folder in subdirs:
644
+ # get list of files in each folder
645
+ filelist = os.listdir(os.path.join(evidence_folder, folder))
646
+ # remove any files that start with '.'
647
+ filelist = remove(filelist)
648
+ # loop through list of files in each folder
649
+ modified_times.extend(
650
+ {
651
+ "program": folder,
652
+ "file": filename,
653
+ "last-modified": os.path.getmtime(os.path.join(directory, folder, filename)),
654
+ }
655
+ for filename in filelist
656
+ )
657
+ else:
658
+ # No subdirectories, process files directly in evidence folder
659
+ files = [f for f in new_folders if os.path.isfile(os.path.join(evidence_folder, f))]
660
+ files = remove(files)
595
661
  modified_times.extend(
596
662
  {
597
- "program": folder,
663
+ "program": "evidence",
598
664
  "file": filename,
599
- "last-modified": os.path.getmtime(os.path.join(directory, folder, filename)),
665
+ "last-modified": os.path.getmtime(os.path.join(evidence_folder, filename)),
600
666
  }
601
- for filename in filelist
667
+ for filename in files
602
668
  )
603
669
  progress.update(task, advance=1)
604
670
  # loop through the list of timestamps
@@ -624,17 +690,22 @@ def set_required_texts(evidence_folder: str, task: TaskID, progress: Progress) -
624
690
  required_text = set()
625
691
  progress.update(task, advance=1)
626
692
  # open app//evidence//config.json file and read contents
627
- with open(f"{evidence_folder}{os.sep}config.json", "r", encoding="utf-8") as json_file:
628
- # load json object into a readable dictionary
629
- rules = json.load(json_file)
630
- progress.update(task, advance=1)
631
- # create iterator to traverse dictionary
632
- for i in range(len(rules["rules-engine"])):
633
- # pull out required text to look for from config
634
- for items in rules["rules-engine"][i]["text-to-find"]:
635
- # exclude duplicate text to search from required text
636
- required_text.add(items)
693
+ config_file = f"{evidence_folder}{os.sep}config.json"
694
+ if os.path.exists(config_file):
695
+ with open(config_file, "r", encoding="utf-8") as json_file:
696
+ # load json object into a readable dictionary
697
+ rules = json.load(json_file)
698
+ progress.update(task, advance=1)
699
+ # create iterator to traverse dictionary
700
+ for i in range(len(rules.get("rules-engine", []))):
701
+ # pull out required text to look for from config
702
+ for items in rules["rules-engine"][i].get("text-to-find", []):
703
+ # exclude duplicate text to search from required text
704
+ required_text.add(items)
705
+ else:
706
+ # No config file, use default text requirements
637
707
  progress.update(task, advance=1)
708
+ required_text = {"security policy", "risk assessment", "compliance", "control", "audit"}
638
709
  # return variable for use outside of local scope
639
710
  return required_text
640
711
 
@@ -658,17 +729,53 @@ def find_required_files_in_folder(evidence_folder: str, task: TaskID, progress:
658
729
  # remove any folders starting with '.' from list
659
730
  new_folders_list = remove(folder_list)
660
731
  progress.update(task, advance=1)
661
- for folder in new_folders_list:
662
- # build a list of all files contained in sub-directories
663
- filelist = os.listdir(evidence_folder + os.sep + folder)
664
- # remove folders and file names that start with a .
665
- remove(filelist)
666
- dir_list.extend({"program": folder, "file": filename} for filename in filelist)
732
+
733
+ # Check if there are subdirectories
734
+ subdirs = [f for f in new_folders_list if os.path.isdir(os.path.join(evidence_folder, f))]
735
+
736
+ if subdirs:
737
+ for folder in subdirs:
738
+ # build a list of all files contained in sub-directories
739
+ filelist = os.listdir(evidence_folder + os.sep + folder)
740
+ # remove folders and file names that start with a .
741
+ filelist = remove(filelist)
742
+ dir_list.extend({"program": folder, "file": filename} for filename in filelist)
743
+ else:
744
+ # No subdirectories, process files directly in evidence folder
745
+ files = [f for f in new_folders_list if os.path.isfile(os.path.join(evidence_folder, f))]
746
+ files = remove(files)
747
+ dir_list.extend({"program": "evidence", "file": filename} for filename in files)
667
748
  progress.update(task, advance=1)
668
749
  # return variable for use outside of local scope
669
750
  return dir_list
670
751
 
671
752
 
753
+ def _create_signature_result(program: str, filename: str, test_name: str, result: bool) -> dict:
754
+ """Helper function to create signature assessment result"""
755
+ return {
756
+ "program": program,
757
+ "file": filename,
758
+ "test": test_name,
759
+ "result": result,
760
+ }
761
+
762
+
763
+ def _assess_signature_requirement(doc_file: dict, required: dict) -> list[dict]:
764
+ """Helper function to assess signature requirements for a document"""
765
+ results = []
766
+
767
+ if required["signatures-required"] is True:
768
+ sig_result = find_signatures(doc_file["file"])
769
+ test_name = "signature-required"
770
+ result = sig_result == 3
771
+ results.append(_create_signature_result(doc_file["program"], doc_file["file"], test_name, result))
772
+ elif required["signatures-required"] is False:
773
+ test_name = "signature-required (not required)"
774
+ results.append(_create_signature_result(doc_file["program"], doc_file["file"], test_name, True))
775
+
776
+ return results
777
+
778
+
672
779
  def signature_assessment_results(
673
780
  directory: list[dict], r_docs: list[dict], task: TaskID, progress: Progress
674
781
  ) -> list[dict]:
@@ -682,52 +789,15 @@ def signature_assessment_results(
682
789
  :return: Assessment of signatures
683
790
  :rtype: list[dict]
684
791
  """
685
- # create empty list to hold assessment results
686
792
  sig_assessments: list[dict] = []
687
793
  progress.update(task, advance=1)
688
- # loop through list of found documents in each sub-folder
794
+
689
795
  for doc_file in directory:
690
796
  for required in r_docs:
691
797
  if doc_file["file"] == required["file-name"]:
692
- # if the signatures-required field is set to true
693
- if required["signatures-required"] is True:
694
- # run the signature detection function for the file
695
- sig_result = find_signatures(doc_file["file"])
696
- # if the return value is 3 pass the test
697
- if sig_result == 3:
698
- # append a true result for each document tested
699
- sig_assessments.append(
700
- {
701
- "program": doc_file["program"],
702
- "file": doc_file["file"],
703
- "test": "signature-required",
704
- "result": True,
705
- }
706
- )
707
- # if the return value is 1, -1 or 0 fail the test
708
- else:
709
- # append a false result for each document tested
710
- sig_assessments.append(
711
- {
712
- "program": doc_file["program"],
713
- "file": doc_file["file"],
714
- "test": "signature-required",
715
- "result": False,
716
- }
717
- )
718
- # if the signatures-required field is set to false
719
- if required["signatures-required"] is False:
720
- # append a true result for each document not requiring a signature
721
- sig_assessments.append(
722
- {
723
- "program": doc_file["program"],
724
- "file": doc_file["file"],
725
- "test": "signature-required (not required)",
726
- "result": True,
727
- }
728
- )
798
+ sig_assessments.extend(_assess_signature_requirement(doc_file, required))
799
+
729
800
  progress.update(task, advance=1)
730
- # return variable for use outside of local scope
731
801
  return sig_assessments
732
802
 
733
803
 
@@ -775,6 +845,50 @@ def document_assessment_results(
775
845
  return doc_assessments
776
846
 
777
847
 
848
+ def _extract_docx_text(file_path: str) -> list[str]:
849
+ """Helper function to extract text from DOCX files"""
850
+ document = Document(file_path)
851
+ return [para.text for para in document.paragraphs]
852
+
853
+
854
+ def _extract_pdf_text(file_path: str) -> list[str]:
855
+ """Helper function to extract text from PDF files"""
856
+ output_text_list: list[str] = []
857
+ with pdfplumber.open(file_path) as pdf:
858
+ for page in pdf.pages:
859
+ text = page.extract_text()
860
+ if text: # Only append non-None text
861
+ output_text_list.append(text)
862
+ return output_text_list
863
+
864
+
865
+ def _process_file_for_text(filename: str, file_path: str, program: str) -> dict | None:
866
+ """Helper function to process a single file and extract text"""
867
+ if filename.endswith(".docx"):
868
+ text = _extract_docx_text(file_path)
869
+ elif filename.endswith(".pdf"):
870
+ text = _extract_pdf_text(file_path)
871
+ else:
872
+ return None
873
+
874
+ return {"program": program, "file": filename, "text": text}
875
+
876
+
877
+ def _process_files_in_folder(folder_path: str, program: str) -> list[dict]:
878
+ """Helper function to process all files in a specific folder"""
879
+ results = []
880
+ file_list = os.listdir(folder_path)
881
+ file_list = remove(file_list)
882
+
883
+ for filename in file_list:
884
+ file_path = os.path.join(folder_path, filename)
885
+ result = _process_file_for_text(filename, file_path, program)
886
+ if result:
887
+ results.append(result)
888
+
889
+ return results
890
+
891
+
778
892
  def parse_required_text_from_files(evidence_folder: str, task: TaskID, progress: Progress) -> list[dict]:
779
893
  """
780
894
  Parse text from docx/pdf file and hold strings representing required text to test
@@ -785,51 +899,26 @@ def parse_required_text_from_files(evidence_folder: str, task: TaskID, progress:
785
899
  :return: Results of text found for the files
786
900
  :rtype: list[dict]
787
901
  """
788
- # create an empty list to hold all strings from parsed documents
789
902
  full_text: list[dict] = []
790
903
  progress.update(task, advance=1)
791
- # build a list of files in the folder
904
+
792
905
  folder_list = os.listdir(evidence_folder)
793
906
  progress.update(task, advance=1)
794
- # remove all folders that start with '.'
795
907
  removed_folders_list = remove(folder_list)
796
908
  progress.update(task, advance=1)
797
- for folder in removed_folders_list:
798
- # create a list of files to iterate through for parsing
799
- file_list = os.listdir((os.path.join(evidence_folder, folder)))
800
- remove(file_list)
801
- # iterate through all files in the list
802
- for filename in file_list:
803
- # if the filename is a .docx file
804
- if filename.endswith(".docx"):
805
- # open the Word document to enable parsing
806
- document = Document(os.path.join(evidence_folder, folder, filename))
807
- output: list[str] = [para.text for para in document.paragraphs]
808
- # add each file and the requisite text to the dictionary to test
809
- full_text.append({"program": folder, "file": filename, "text": output})
810
- elif filename.endswith(".pdf"):
811
- # create empty list to hold text per file
812
- output_text_list: list[str] = []
813
- # open filename with pdfplumber
814
- with pdfplumber.open(filename) as pdf:
815
- # set number of pages
816
- pages = pdf.pages
817
- # for each page in the pdf document
818
- for page in pages:
819
- # extract the text
820
- text = page.extract_text()
821
- # write the text to a list
822
- output_text_list.append(text)
823
- # add each file and the requisite text to the dictionary to test
824
- full_text.append(
825
- {
826
- "program": folder,
827
- "file": filename,
828
- "text": output_text_list,
829
- }
830
- )
909
+
910
+ # Check if there are subdirectories
911
+ subdirs = [f for f in removed_folders_list if os.path.isdir(os.path.join(evidence_folder, f))]
912
+
913
+ if subdirs:
914
+ for folder in subdirs:
915
+ folder_path = os.path.join(evidence_folder, folder)
916
+ full_text.extend(_process_files_in_folder(folder_path, folder))
917
+ else:
918
+ # No subdirectories, process files directly in evidence folder
919
+ full_text.extend(_process_files_in_folder(evidence_folder, "evidence"))
920
+
831
921
  progress.update(task, advance=1)
832
- # return variable for use outside of local scope
833
922
  return full_text
834
923
 
835
924
 
@@ -924,17 +1013,22 @@ def gather_test_project_data(api: Api, evidence_folder: str, task: TaskID, progr
924
1013
  test_data: list[dict] = []
925
1014
  progress.update(task, advance=1)
926
1015
  # test project information created in RegScale UI
927
- with open(evidence_folder + os.sep + "list.json", "r", encoding="utf-8") as json_file:
928
- # load json object into a readable dictionary
929
- lists = json.load(json_file)
930
- # loop through projects in the list.json
931
- test_data.extend(
932
- {
933
- "id": lists["parser-list"][i].get("id"),
934
- "program": lists["parser-list"][i].get("folder-name"),
935
- }
936
- for i in range(len(lists["parser-list"]))
937
- )
1016
+ list_file = evidence_folder + os.sep + "list.json"
1017
+ if os.path.exists(list_file):
1018
+ with open(list_file, "r", encoding="utf-8") as json_file:
1019
+ # load json object into a readable dictionary
1020
+ lists = json.load(json_file)
1021
+ # loop through projects in the list.json
1022
+ test_data.extend(
1023
+ {
1024
+ "id": lists["parser-list"][i].get("id"),
1025
+ "program": lists["parser-list"][i].get("folder-name"),
1026
+ }
1027
+ for i in range(len(lists.get("parser-list", [])))
1028
+ )
1029
+ else:
1030
+ # No list.json, skip project data - evidence collection can work without it
1031
+ test_data = []
938
1032
  progress.update(task, advance=1)
939
1033
  # create empty list to hold json response data for each project
940
1034
  test_info: list[dict] = []
@@ -956,7 +1050,7 @@ def gather_test_project_data(api: Api, evidence_folder: str, task: TaskID, progr
956
1050
  }
957
1051
  )
958
1052
  else:
959
- api.logger.error("Project data retrieval was unsuccessful.")
1053
+ api.logger.warning(f"Project data retrieval was unsuccessful for ID {item['id']}, skipping this project.")
960
1054
  progress.update(task, advance=1)
961
1055
  # return variables for use outside of local scope
962
1056
  return test_info
@@ -1031,19 +1125,8 @@ def assessments_report(
1031
1125
  :rtype: list[dict]
1032
1126
  """
1033
1127
  progress.update(task, advance=1)
1034
- assessment_report: list[dict] = list(docres)
1035
- progress.update(task, advance=1)
1036
- # append all results to 1 master list
1037
- assessment_report.extend(iter(textres))
1038
- progress.update(task, advance=1)
1039
- # append all results to 1 master list
1040
- assessment_report.extend(iter(timeres))
1041
- progress.update(task, advance=1)
1042
- # append all results to 1 master list
1043
- assessment_report.extend(iter(sigres))
1044
- progress.update(task, advance=1)
1045
- # return variable for use outside of local scope
1046
- return assessment_report
1128
+ # combine all results into one master list
1129
+ return docres + textres + timeres + sigres
1047
1130
 
1048
1131
 
1049
1132
  def build_assessment_dataframe(assessments: list[dict], task: TaskID, progress: Progress) -> list[dict]:
@@ -1061,6 +1144,11 @@ def build_assessment_dataframe(assessments: list[dict], task: TaskID, progress:
1061
1144
 
1062
1145
  result_df = pd.DataFrame(assessments)
1063
1146
  progress.update(task, advance=1)
1147
+
1148
+ # Check if dataframe is empty
1149
+ if result_df.empty:
1150
+ return []
1151
+
1064
1152
  # fill in NaN cells
1065
1153
  result_df = result_df.fillna(" ")
1066
1154
  progress.update(task, advance=1)
@@ -1148,9 +1236,21 @@ def build_html_table(assessments: list[dict], task: TaskID, progress: Progress)
1148
1236
  import pandas as pd # Optimize import performance
1149
1237
 
1150
1238
  output_list: list[dict] = []
1239
+
1240
+ # Check if assessments is empty
1241
+ if not assessments:
1242
+ progress.update(task, advance=4) # Skip all remaining progress updates
1243
+ return output_list
1244
+
1151
1245
  # create a dataframe of a list of dicts
1152
1246
  table_df = pd.DataFrame(data=assessments)
1153
1247
  progress.update(task, advance=1)
1248
+
1249
+ # Check if dataframe is empty or missing required columns
1250
+ if table_df.empty or "program" not in table_df.columns:
1251
+ progress.update(task, advance=3) # Skip remaining progress updates
1252
+ return output_list
1253
+
1154
1254
  # fill in N/A cells with blank string
1155
1255
  table_df = table_df.fillna(" ")
1156
1256
  progress.update(task, advance=1)
@@ -1197,6 +1297,12 @@ def create_child_assessments(
1197
1297
  # set completion datetime to required format
1198
1298
  completion_date = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
1199
1299
  progress.update(task, advance=1)
1300
+
1301
+ # Check if we have project data to work with
1302
+ if not project_data:
1303
+ progress.update(task, advance=1)
1304
+ return
1305
+
1200
1306
  # loop through test projects and make an API call for each
1201
1307
  for i, project in enumerate(project_data):
1202
1308
  # call score calculation function