regscale-cli 6.24.0.0__py3-none-any.whl → 6.25.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of regscale-cli might be problematic. Click here for more details.
- regscale/_version.py +1 -1
- regscale/core/app/api.py +1 -1
- regscale/core/app/application.py +5 -3
- regscale/core/app/internal/evidence.py +308 -202
- regscale/dev/code_gen.py +84 -3
- regscale/integrations/commercial/__init__.py +2 -0
- regscale/integrations/commercial/jira.py +95 -22
- regscale/integrations/commercial/microsoft_defender/defender.py +326 -5
- regscale/integrations/commercial/microsoft_defender/defender_api.py +348 -14
- regscale/integrations/commercial/microsoft_defender/defender_constants.py +157 -0
- regscale/integrations/commercial/synqly/assets.py +99 -16
- regscale/integrations/commercial/synqly/query_builder.py +533 -0
- regscale/integrations/commercial/synqly/vulnerabilities.py +134 -14
- regscale/integrations/commercial/wizv2/click.py +23 -0
- regscale/integrations/commercial/wizv2/compliance_report.py +137 -26
- regscale/integrations/compliance_integration.py +247 -5
- regscale/integrations/scanner_integration.py +16 -0
- regscale/models/integration_models/synqly_models/capabilities.json +1 -1
- regscale/models/integration_models/synqly_models/connectors/vulnerabilities.py +12 -2
- regscale/models/integration_models/synqly_models/filter_parser.py +332 -0
- regscale/models/integration_models/synqly_models/synqly_model.py +47 -3
- regscale/models/regscale_models/compliance_settings.py +28 -0
- regscale/models/regscale_models/component.py +1 -0
- regscale/models/regscale_models/control_implementation.py +143 -4
- regscale/regscale.py +1 -1
- regscale/validation/record.py +23 -1
- {regscale_cli-6.24.0.0.dist-info → regscale_cli-6.25.0.0.dist-info}/METADATA +9 -9
- {regscale_cli-6.24.0.0.dist-info → regscale_cli-6.25.0.0.dist-info}/RECORD +32 -30
- {regscale_cli-6.24.0.0.dist-info → regscale_cli-6.25.0.0.dist-info}/LICENSE +0 -0
- {regscale_cli-6.24.0.0.dist-info → regscale_cli-6.25.0.0.dist-info}/WHEEL +0 -0
- {regscale_cli-6.24.0.0.dist-info → regscale_cli-6.25.0.0.dist-info}/entry_points.txt +0 -0
- {regscale_cli-6.24.0.0.dist-info → regscale_cli-6.25.0.0.dist-info}/top_level.txt +0 -0
|
@@ -11,21 +11,23 @@ import os
|
|
|
11
11
|
import shutil
|
|
12
12
|
import zipfile
|
|
13
13
|
from datetime import datetime
|
|
14
|
+
from logging import getLogger
|
|
15
|
+
from pathlib import Path
|
|
14
16
|
from typing import Tuple
|
|
15
17
|
|
|
16
18
|
import click # type: ignore
|
|
17
19
|
import pdfplumber # type: ignore
|
|
18
20
|
from docx import Document # type: ignore
|
|
19
|
-
from pathlib import Path
|
|
20
21
|
from rich.progress import Progress, TaskID
|
|
21
22
|
|
|
22
23
|
from regscale.core.app.api import Api
|
|
23
24
|
from regscale.core.app.application import Application
|
|
24
|
-
from regscale.core.app.logz import create_logger
|
|
25
25
|
from regscale.core.app.utils.app_utils import check_file_path, create_progress_object, error_and_exit
|
|
26
26
|
from regscale.models.app_models.click import regscale_ssp_id
|
|
27
27
|
from regscale.models.regscale_models import Assessment, File, Project, SecurityPlan
|
|
28
28
|
|
|
29
|
+
logger = getLogger("regscale")
|
|
30
|
+
|
|
29
31
|
|
|
30
32
|
@click.group()
|
|
31
33
|
def evidence():
|
|
@@ -64,74 +66,60 @@ def run_evidence_collection():
|
|
|
64
66
|
app = Application()
|
|
65
67
|
api = Api()
|
|
66
68
|
config = app.config
|
|
69
|
+
|
|
70
|
+
logger.info("Starting evidence collection process")
|
|
71
|
+
|
|
67
72
|
check_file_path("./static")
|
|
68
73
|
progress = create_progress_object()
|
|
69
74
|
with progress:
|
|
70
|
-
|
|
75
|
+
task1 = progress.add_task("[white]Initializing evidence collection...", total=4)
|
|
71
76
|
# call function to define variable for use outside of function
|
|
72
77
|
evidence_folder, dir_name, new_cwd = set_directory_variables(
|
|
73
|
-
task=
|
|
78
|
+
task=task1, evidence_folder=config["evidenceFolder"], progress=progress
|
|
74
79
|
)
|
|
75
80
|
|
|
76
|
-
task1 = progress.add_task("[white]Building a required documents list from config.json...", total=3)
|
|
77
81
|
# call function to define variable for use outside of function
|
|
78
82
|
required_docs, document_list = parse_required_docs(
|
|
79
83
|
evidence_folder=evidence_folder, task=task1, progress=progress
|
|
80
84
|
)
|
|
81
85
|
|
|
82
|
-
task2 = progress.add_task("[white]Calculating files last modified times...", total=5)
|
|
83
86
|
# call function to define variable for use outside of function
|
|
84
|
-
times = get_doc_timestamps(evidence_folder=new_cwd, directory=dir_name, task=
|
|
87
|
+
times = get_doc_timestamps(evidence_folder=new_cwd, directory=dir_name, task=task1, progress=progress)
|
|
85
88
|
|
|
86
|
-
task3 = progress.add_task("[white]Building a required texts list from config.json...", total=3)
|
|
87
89
|
# call function to define variable for use outside of function
|
|
88
|
-
texts = set_required_texts(evidence_folder=evidence_folder, task=
|
|
89
|
-
|
|
90
|
-
task4 = progress.add_task("[white]Searching evidence folder for required files...", total=4)
|
|
90
|
+
texts = set_required_texts(evidence_folder=evidence_folder, task=task1, progress=progress)
|
|
91
91
|
|
|
92
92
|
# call function to define variable for use outside of function
|
|
93
|
-
folders = find_required_files_in_folder(evidence_folder=new_cwd, task=
|
|
93
|
+
folders = find_required_files_in_folder(evidence_folder=new_cwd, task=task1, progress=progress)
|
|
94
94
|
|
|
95
|
-
|
|
95
|
+
task2 = progress.add_task("[white]Analyzing documents and content...", total=6)
|
|
96
96
|
|
|
97
97
|
# call function to define variable for use outside of function
|
|
98
98
|
sig_results = signature_assessment_results(
|
|
99
|
-
directory=folders, r_docs=required_docs, task=
|
|
99
|
+
directory=folders, r_docs=required_docs, task=task2, progress=progress
|
|
100
100
|
)
|
|
101
101
|
|
|
102
|
-
task6 = progress.add_task("[white]Testing if required documents are present...", total=2)
|
|
103
|
-
|
|
104
102
|
# call function to define variable for use outside of function
|
|
105
103
|
doc_results = document_assessment_results(
|
|
106
|
-
directory=folders, documents=document_list, task=
|
|
104
|
+
directory=folders, documents=document_list, task=task2, progress=progress
|
|
107
105
|
)
|
|
108
106
|
|
|
109
|
-
task7 = progress.add_task("[white]Extracting texts from required files...", total=4)
|
|
110
|
-
|
|
111
107
|
# call function to define variable for use outside of function
|
|
112
|
-
file_texts = parse_required_text_from_files(evidence_folder=new_cwd, task=
|
|
113
|
-
|
|
114
|
-
task8 = progress.add_task("[white]Searching for required text in parsed documents...", total=2)
|
|
108
|
+
file_texts = parse_required_text_from_files(evidence_folder=new_cwd, task=task2, progress=progress)
|
|
115
109
|
|
|
116
110
|
# call function to define variable for use outside of function
|
|
117
|
-
search_results = text_string_search(f_texts=file_texts, req_texts=texts, task=
|
|
118
|
-
|
|
119
|
-
task9 = progress.add_task("[white]Testing if required texts are present", total=2)
|
|
111
|
+
search_results = text_string_search(f_texts=file_texts, req_texts=texts, task=task2, progress=progress)
|
|
120
112
|
|
|
121
113
|
# call function to define variable for use outside of function
|
|
122
|
-
text_results = text_assessment_results(searches=search_results, r_texts=texts, task=
|
|
114
|
+
text_results = text_assessment_results(searches=search_results, r_texts=texts, task=task2, progress=progress)
|
|
123
115
|
|
|
124
|
-
|
|
116
|
+
task3 = progress.add_task("[white]Processing assessment data...", total=4)
|
|
125
117
|
|
|
126
118
|
# call function to define variable for use outside of function
|
|
127
|
-
data = gather_test_project_data(api=api, evidence_folder=evidence_folder, task=
|
|
128
|
-
|
|
129
|
-
task11 = progress.add_task("[white]Testing file modification times...", total=2)
|
|
119
|
+
data = gather_test_project_data(api=api, evidence_folder=evidence_folder, task=task3, progress=progress)
|
|
130
120
|
|
|
131
121
|
# call function to define variable to use outside of function
|
|
132
|
-
time_results = assess_doc_timestamps(timestamps=times, documents=required_docs, task=
|
|
133
|
-
|
|
134
|
-
task12 = progress.add_task("[white]Building assessment report...", total=4)
|
|
122
|
+
time_results = assess_doc_timestamps(timestamps=times, documents=required_docs, task=task3, progress=progress)
|
|
135
123
|
|
|
136
124
|
# call function to define variable to use outside of function
|
|
137
125
|
report = assessments_report(
|
|
@@ -139,32 +127,66 @@ def run_evidence_collection():
|
|
|
139
127
|
textres=text_results,
|
|
140
128
|
timeres=time_results,
|
|
141
129
|
sigres=sig_results,
|
|
142
|
-
task=
|
|
130
|
+
task=task3,
|
|
143
131
|
progress=progress,
|
|
144
132
|
)
|
|
145
133
|
|
|
146
|
-
task13 = progress.add_task("[white]Building assessment results dataframe...", total=4)
|
|
147
|
-
|
|
148
134
|
# call function to define variable to use outside of function
|
|
149
|
-
results = build_assessment_dataframe(assessments=report, task=
|
|
150
|
-
|
|
151
|
-
task14 = progress.add_task("[white]Calculating assessment score...", total=1)
|
|
135
|
+
results = build_assessment_dataframe(assessments=report, task=task3, progress=progress)
|
|
152
136
|
|
|
153
137
|
# call function to define variable for use outside of function
|
|
154
|
-
score_data = build_score_data(assessments=results, task=
|
|
155
|
-
|
|
156
|
-
task15 = progress.add_task("[white]Building a table for the assessment report...", total=4)
|
|
138
|
+
score_data = build_score_data(assessments=results, task=task3, progress=progress)
|
|
157
139
|
|
|
158
140
|
# call function to define variable for use outside of function
|
|
159
|
-
html_output = build_html_table(assessments=report, task=
|
|
160
|
-
|
|
161
|
-
task16 = progress.add_task("[white]Creating child assessment based on test results...", total=2)
|
|
141
|
+
html_output = build_html_table(assessments=report, task=task3, progress=progress)
|
|
162
142
|
|
|
163
143
|
# call function to create child assessment via POST request
|
|
164
144
|
create_child_assessments(
|
|
165
|
-
api=api, project_data=data, output=html_output, score_data=score_data, task=
|
|
145
|
+
api=api, project_data=data, output=html_output, score_data=score_data, task=task3, progress=progress
|
|
166
146
|
)
|
|
167
147
|
|
|
148
|
+
# Display collected files summary
|
|
149
|
+
display_collected_files(folders, evidence_folder)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def display_collected_files(folders: list[dict], evidence_folder: str) -> None:
|
|
153
|
+
"""
|
|
154
|
+
Display a summary of collected files to the user
|
|
155
|
+
|
|
156
|
+
:param list[dict] folders: List of files found in evidence folder
|
|
157
|
+
:param str evidence_folder: Path to evidence folder
|
|
158
|
+
:rtype: None
|
|
159
|
+
"""
|
|
160
|
+
if not folders:
|
|
161
|
+
logger.info("No files were collected from the evidence folder.")
|
|
162
|
+
return
|
|
163
|
+
|
|
164
|
+
logger.info("=" * 60)
|
|
165
|
+
logger.info("EVIDENCE COLLECTION SUMMARY")
|
|
166
|
+
logger.info("=" * 60)
|
|
167
|
+
logger.info(f"Evidence folder: {evidence_folder}")
|
|
168
|
+
logger.info(f"Total files collected: {len(folders)}")
|
|
169
|
+
logger.info("")
|
|
170
|
+
|
|
171
|
+
# Group files by program/folder
|
|
172
|
+
programs = {}
|
|
173
|
+
for file_info in folders:
|
|
174
|
+
program = file_info.get("program", "unknown")
|
|
175
|
+
filename = file_info.get("file", "unknown")
|
|
176
|
+
if program not in programs:
|
|
177
|
+
programs[program] = []
|
|
178
|
+
programs[program].append(filename)
|
|
179
|
+
|
|
180
|
+
# Display files by program
|
|
181
|
+
for program, files in programs.items():
|
|
182
|
+
logger.info(f"Program: {program}")
|
|
183
|
+
logger.info("-" * 40)
|
|
184
|
+
for file in sorted(files):
|
|
185
|
+
logger.info(f" • {file}")
|
|
186
|
+
logger.info("")
|
|
187
|
+
|
|
188
|
+
logger.info("=" * 60)
|
|
189
|
+
|
|
168
190
|
|
|
169
191
|
def package_builder(ssp_id: int, path: Path):
|
|
170
192
|
"""Function to build a directory of evidence and produce a zip file for extraction and use
|
|
@@ -374,7 +396,7 @@ def remove_directory(directory_path: Path) -> None:
|
|
|
374
396
|
:rtype: None
|
|
375
397
|
"""
|
|
376
398
|
shutil.rmtree(directory_path.absolute())
|
|
377
|
-
|
|
399
|
+
logger.info("Temporary Evidence directory removed successfully!")
|
|
378
400
|
|
|
379
401
|
|
|
380
402
|
def zip_folder(folder_path: Path, zip_path: Path) -> None:
|
|
@@ -397,7 +419,7 @@ def zip_folder(folder_path: Path, zip_path: Path) -> None:
|
|
|
397
419
|
# Add the file to the ZIP archive using its relative path
|
|
398
420
|
zipf.write(file_path, relative_path) # type: ignore
|
|
399
421
|
|
|
400
|
-
|
|
422
|
+
logger.info("Folder zipped successfully!")
|
|
401
423
|
|
|
402
424
|
|
|
403
425
|
def remove(list_to_review: list) -> list:
|
|
@@ -462,7 +484,6 @@ def find_signatures(file: str) -> int:
|
|
|
462
484
|
import pymupdf
|
|
463
485
|
|
|
464
486
|
number = 0
|
|
465
|
-
logger = create_logger()
|
|
466
487
|
# if the file is a pdf document
|
|
467
488
|
if file.endswith(".pdf"):
|
|
468
489
|
try:
|
|
@@ -507,20 +528,30 @@ def set_directory_variables(task: TaskID, evidence_folder: str, progress: Progre
|
|
|
507
528
|
# set evidence folder variable to init.yaml value
|
|
508
529
|
# if evidence folder does not exist then create it so tests will pass
|
|
509
530
|
check_file_path(evidence_folder)
|
|
531
|
+
|
|
510
532
|
# if evidence folder does not exist or if it is empty then error out
|
|
511
|
-
|
|
533
|
+
evidence_items = os.listdir(evidence_folder)
|
|
534
|
+
|
|
535
|
+
if evidence_folder is None or len(evidence_items) == 0:
|
|
512
536
|
error_and_exit("The directory set to evidenceFolder cannot be found or is empty.")
|
|
513
537
|
else:
|
|
514
538
|
# otherwise change directory to the evidence folder
|
|
515
539
|
os.chdir(evidence_folder)
|
|
516
540
|
progress.update(task, advance=1)
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
541
|
+
|
|
542
|
+
# include RegScale projects folder or use current directory if no subdirs
|
|
543
|
+
subdirs = [filename for filename in os.listdir(os.getcwd()) if os.path.isdir(os.path.join(os.getcwd(), filename))]
|
|
544
|
+
|
|
545
|
+
if subdirs:
|
|
546
|
+
# Prefer 'project' directory if it exists, otherwise use the first one
|
|
547
|
+
if "project" in subdirs:
|
|
548
|
+
dir_name = "project"
|
|
549
|
+
else:
|
|
550
|
+
dir_name = subdirs[0]
|
|
551
|
+
new_cwd = os.getcwd() + os.sep + dir_name
|
|
552
|
+
else:
|
|
553
|
+
dir_name = "evidence"
|
|
554
|
+
new_cwd = os.getcwd()
|
|
524
555
|
progress.update(task, advance=1)
|
|
525
556
|
# return variables for use outside local scope
|
|
526
557
|
return evidence_folder, dir_name, new_cwd
|
|
@@ -543,23 +574,41 @@ def parse_required_docs(evidence_folder: str, task: TaskID, progress: Progress)
|
|
|
543
574
|
document_list = set()
|
|
544
575
|
progress.update(task, advance=1)
|
|
545
576
|
# open app//evidence//config.json file and read contents
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
577
|
+
config_file = f"{evidence_folder}{os.sep}config.json"
|
|
578
|
+
if os.path.exists(config_file):
|
|
579
|
+
with open(config_file, "r", encoding="utf-8") as json_file:
|
|
580
|
+
# load json object into a readable dictionary
|
|
581
|
+
rules = json.load(json_file)
|
|
582
|
+
progress.update(task, advance=1)
|
|
583
|
+
# loop through required document dicts
|
|
584
|
+
for i in range(len(rules.get("required-documents", []))):
|
|
585
|
+
# add to a list of dictionaries for parsing
|
|
586
|
+
required_docs.append(
|
|
587
|
+
{
|
|
588
|
+
"file-name": rules["required-documents"][i].get("file-name"),
|
|
589
|
+
"last-updated-by": rules["required-documents"][i].get("last-updated-by"),
|
|
590
|
+
"signatures-required": rules["required-documents"][i].get("signatures-required"),
|
|
591
|
+
"signature-count": rules["required-documents"][i].get("signature-count"),
|
|
592
|
+
}
|
|
593
|
+
)
|
|
594
|
+
# update contents of list if it does not already exist
|
|
595
|
+
document_list.add(rules["required-documents"][i].get("file-name"))
|
|
596
|
+
else:
|
|
597
|
+
# No config file, use default requirements for any files found
|
|
549
598
|
progress.update(task, advance=1)
|
|
550
|
-
#
|
|
551
|
-
for
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
599
|
+
# Get all files in evidence folder and subfolders
|
|
600
|
+
for root, dirs, files in os.walk(evidence_folder):
|
|
601
|
+
for file in files:
|
|
602
|
+
if not file.startswith(".") and file.lower().endswith((".pdf", ".docx", ".doc", ".txt")):
|
|
603
|
+
required_docs.append(
|
|
604
|
+
{
|
|
605
|
+
"file-name": file,
|
|
606
|
+
"last-updated-by": 365,
|
|
607
|
+
"signatures-required": False,
|
|
608
|
+
"signature-count": 0,
|
|
609
|
+
}
|
|
610
|
+
)
|
|
611
|
+
document_list.add(file)
|
|
563
612
|
progress.update(task, advance=1)
|
|
564
613
|
# return variables for use outside of local scope
|
|
565
614
|
return required_docs, document_list
|
|
@@ -585,20 +634,37 @@ def get_doc_timestamps(evidence_folder: str, directory: str, task: TaskID, progr
|
|
|
585
634
|
# remove any child folders that start with '.'
|
|
586
635
|
new_folders = remove(list_to_review=folders_list)
|
|
587
636
|
progress.update(task, advance=1)
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
637
|
+
|
|
638
|
+
# Check if there are subdirectories
|
|
639
|
+
subdirs = [f for f in new_folders if os.path.isdir(os.path.join(evidence_folder, f))]
|
|
640
|
+
|
|
641
|
+
if subdirs:
|
|
642
|
+
# loop through directory listing
|
|
643
|
+
for folder in subdirs:
|
|
644
|
+
# get list of files in each folder
|
|
645
|
+
filelist = os.listdir(os.path.join(evidence_folder, folder))
|
|
646
|
+
# remove any files that start with '.'
|
|
647
|
+
filelist = remove(filelist)
|
|
648
|
+
# loop through list of files in each folder
|
|
649
|
+
modified_times.extend(
|
|
650
|
+
{
|
|
651
|
+
"program": folder,
|
|
652
|
+
"file": filename,
|
|
653
|
+
"last-modified": os.path.getmtime(os.path.join(directory, folder, filename)),
|
|
654
|
+
}
|
|
655
|
+
for filename in filelist
|
|
656
|
+
)
|
|
657
|
+
else:
|
|
658
|
+
# No subdirectories, process files directly in evidence folder
|
|
659
|
+
files = [f for f in new_folders if os.path.isfile(os.path.join(evidence_folder, f))]
|
|
660
|
+
files = remove(files)
|
|
595
661
|
modified_times.extend(
|
|
596
662
|
{
|
|
597
|
-
"program":
|
|
663
|
+
"program": "evidence",
|
|
598
664
|
"file": filename,
|
|
599
|
-
"last-modified": os.path.getmtime(os.path.join(
|
|
665
|
+
"last-modified": os.path.getmtime(os.path.join(evidence_folder, filename)),
|
|
600
666
|
}
|
|
601
|
-
for filename in
|
|
667
|
+
for filename in files
|
|
602
668
|
)
|
|
603
669
|
progress.update(task, advance=1)
|
|
604
670
|
# loop through the list of timestamps
|
|
@@ -624,17 +690,22 @@ def set_required_texts(evidence_folder: str, task: TaskID, progress: Progress) -
|
|
|
624
690
|
required_text = set()
|
|
625
691
|
progress.update(task, advance=1)
|
|
626
692
|
# open app//evidence//config.json file and read contents
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
#
|
|
634
|
-
for
|
|
635
|
-
#
|
|
636
|
-
|
|
693
|
+
config_file = f"{evidence_folder}{os.sep}config.json"
|
|
694
|
+
if os.path.exists(config_file):
|
|
695
|
+
with open(config_file, "r", encoding="utf-8") as json_file:
|
|
696
|
+
# load json object into a readable dictionary
|
|
697
|
+
rules = json.load(json_file)
|
|
698
|
+
progress.update(task, advance=1)
|
|
699
|
+
# create iterator to traverse dictionary
|
|
700
|
+
for i in range(len(rules.get("rules-engine", []))):
|
|
701
|
+
# pull out required text to look for from config
|
|
702
|
+
for items in rules["rules-engine"][i].get("text-to-find", []):
|
|
703
|
+
# exclude duplicate text to search from required text
|
|
704
|
+
required_text.add(items)
|
|
705
|
+
else:
|
|
706
|
+
# No config file, use default text requirements
|
|
637
707
|
progress.update(task, advance=1)
|
|
708
|
+
required_text = {"security policy", "risk assessment", "compliance", "control", "audit"}
|
|
638
709
|
# return variable for use outside of local scope
|
|
639
710
|
return required_text
|
|
640
711
|
|
|
@@ -658,17 +729,53 @@ def find_required_files_in_folder(evidence_folder: str, task: TaskID, progress:
|
|
|
658
729
|
# remove any folders starting with '.' from list
|
|
659
730
|
new_folders_list = remove(folder_list)
|
|
660
731
|
progress.update(task, advance=1)
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
732
|
+
|
|
733
|
+
# Check if there are subdirectories
|
|
734
|
+
subdirs = [f for f in new_folders_list if os.path.isdir(os.path.join(evidence_folder, f))]
|
|
735
|
+
|
|
736
|
+
if subdirs:
|
|
737
|
+
for folder in subdirs:
|
|
738
|
+
# build a list of all files contained in sub-directories
|
|
739
|
+
filelist = os.listdir(evidence_folder + os.sep + folder)
|
|
740
|
+
# remove folders and file names that start with a .
|
|
741
|
+
filelist = remove(filelist)
|
|
742
|
+
dir_list.extend({"program": folder, "file": filename} for filename in filelist)
|
|
743
|
+
else:
|
|
744
|
+
# No subdirectories, process files directly in evidence folder
|
|
745
|
+
files = [f for f in new_folders_list if os.path.isfile(os.path.join(evidence_folder, f))]
|
|
746
|
+
files = remove(files)
|
|
747
|
+
dir_list.extend({"program": "evidence", "file": filename} for filename in files)
|
|
667
748
|
progress.update(task, advance=1)
|
|
668
749
|
# return variable for use outside of local scope
|
|
669
750
|
return dir_list
|
|
670
751
|
|
|
671
752
|
|
|
753
|
+
def _create_signature_result(program: str, filename: str, test_name: str, result: bool) -> dict:
|
|
754
|
+
"""Helper function to create signature assessment result"""
|
|
755
|
+
return {
|
|
756
|
+
"program": program,
|
|
757
|
+
"file": filename,
|
|
758
|
+
"test": test_name,
|
|
759
|
+
"result": result,
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
|
|
763
|
+
def _assess_signature_requirement(doc_file: dict, required: dict) -> list[dict]:
|
|
764
|
+
"""Helper function to assess signature requirements for a document"""
|
|
765
|
+
results = []
|
|
766
|
+
|
|
767
|
+
if required["signatures-required"] is True:
|
|
768
|
+
sig_result = find_signatures(doc_file["file"])
|
|
769
|
+
test_name = "signature-required"
|
|
770
|
+
result = sig_result == 3
|
|
771
|
+
results.append(_create_signature_result(doc_file["program"], doc_file["file"], test_name, result))
|
|
772
|
+
elif required["signatures-required"] is False:
|
|
773
|
+
test_name = "signature-required (not required)"
|
|
774
|
+
results.append(_create_signature_result(doc_file["program"], doc_file["file"], test_name, True))
|
|
775
|
+
|
|
776
|
+
return results
|
|
777
|
+
|
|
778
|
+
|
|
672
779
|
def signature_assessment_results(
|
|
673
780
|
directory: list[dict], r_docs: list[dict], task: TaskID, progress: Progress
|
|
674
781
|
) -> list[dict]:
|
|
@@ -682,52 +789,15 @@ def signature_assessment_results(
|
|
|
682
789
|
:return: Assessment of signatures
|
|
683
790
|
:rtype: list[dict]
|
|
684
791
|
"""
|
|
685
|
-
# create empty list to hold assessment results
|
|
686
792
|
sig_assessments: list[dict] = []
|
|
687
793
|
progress.update(task, advance=1)
|
|
688
|
-
|
|
794
|
+
|
|
689
795
|
for doc_file in directory:
|
|
690
796
|
for required in r_docs:
|
|
691
797
|
if doc_file["file"] == required["file-name"]:
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
# run the signature detection function for the file
|
|
695
|
-
sig_result = find_signatures(doc_file["file"])
|
|
696
|
-
# if the return value is 3 pass the test
|
|
697
|
-
if sig_result == 3:
|
|
698
|
-
# append a true result for each document tested
|
|
699
|
-
sig_assessments.append(
|
|
700
|
-
{
|
|
701
|
-
"program": doc_file["program"],
|
|
702
|
-
"file": doc_file["file"],
|
|
703
|
-
"test": "signature-required",
|
|
704
|
-
"result": True,
|
|
705
|
-
}
|
|
706
|
-
)
|
|
707
|
-
# if the return value is 1, -1 or 0 fail the test
|
|
708
|
-
else:
|
|
709
|
-
# append a false result for each document tested
|
|
710
|
-
sig_assessments.append(
|
|
711
|
-
{
|
|
712
|
-
"program": doc_file["program"],
|
|
713
|
-
"file": doc_file["file"],
|
|
714
|
-
"test": "signature-required",
|
|
715
|
-
"result": False,
|
|
716
|
-
}
|
|
717
|
-
)
|
|
718
|
-
# if the signatures-required field is set to false
|
|
719
|
-
if required["signatures-required"] is False:
|
|
720
|
-
# append a true result for each document not requiring a signature
|
|
721
|
-
sig_assessments.append(
|
|
722
|
-
{
|
|
723
|
-
"program": doc_file["program"],
|
|
724
|
-
"file": doc_file["file"],
|
|
725
|
-
"test": "signature-required (not required)",
|
|
726
|
-
"result": True,
|
|
727
|
-
}
|
|
728
|
-
)
|
|
798
|
+
sig_assessments.extend(_assess_signature_requirement(doc_file, required))
|
|
799
|
+
|
|
729
800
|
progress.update(task, advance=1)
|
|
730
|
-
# return variable for use outside of local scope
|
|
731
801
|
return sig_assessments
|
|
732
802
|
|
|
733
803
|
|
|
@@ -775,6 +845,50 @@ def document_assessment_results(
|
|
|
775
845
|
return doc_assessments
|
|
776
846
|
|
|
777
847
|
|
|
848
|
+
def _extract_docx_text(file_path: str) -> list[str]:
|
|
849
|
+
"""Helper function to extract text from DOCX files"""
|
|
850
|
+
document = Document(file_path)
|
|
851
|
+
return [para.text for para in document.paragraphs]
|
|
852
|
+
|
|
853
|
+
|
|
854
|
+
def _extract_pdf_text(file_path: str) -> list[str]:
|
|
855
|
+
"""Helper function to extract text from PDF files"""
|
|
856
|
+
output_text_list: list[str] = []
|
|
857
|
+
with pdfplumber.open(file_path) as pdf:
|
|
858
|
+
for page in pdf.pages:
|
|
859
|
+
text = page.extract_text()
|
|
860
|
+
if text: # Only append non-None text
|
|
861
|
+
output_text_list.append(text)
|
|
862
|
+
return output_text_list
|
|
863
|
+
|
|
864
|
+
|
|
865
|
+
def _process_file_for_text(filename: str, file_path: str, program: str) -> dict | None:
|
|
866
|
+
"""Helper function to process a single file and extract text"""
|
|
867
|
+
if filename.endswith(".docx"):
|
|
868
|
+
text = _extract_docx_text(file_path)
|
|
869
|
+
elif filename.endswith(".pdf"):
|
|
870
|
+
text = _extract_pdf_text(file_path)
|
|
871
|
+
else:
|
|
872
|
+
return None
|
|
873
|
+
|
|
874
|
+
return {"program": program, "file": filename, "text": text}
|
|
875
|
+
|
|
876
|
+
|
|
877
|
+
def _process_files_in_folder(folder_path: str, program: str) -> list[dict]:
|
|
878
|
+
"""Helper function to process all files in a specific folder"""
|
|
879
|
+
results = []
|
|
880
|
+
file_list = os.listdir(folder_path)
|
|
881
|
+
file_list = remove(file_list)
|
|
882
|
+
|
|
883
|
+
for filename in file_list:
|
|
884
|
+
file_path = os.path.join(folder_path, filename)
|
|
885
|
+
result = _process_file_for_text(filename, file_path, program)
|
|
886
|
+
if result:
|
|
887
|
+
results.append(result)
|
|
888
|
+
|
|
889
|
+
return results
|
|
890
|
+
|
|
891
|
+
|
|
778
892
|
def parse_required_text_from_files(evidence_folder: str, task: TaskID, progress: Progress) -> list[dict]:
|
|
779
893
|
"""
|
|
780
894
|
Parse text from docx/pdf file and hold strings representing required text to test
|
|
@@ -785,51 +899,26 @@ def parse_required_text_from_files(evidence_folder: str, task: TaskID, progress:
|
|
|
785
899
|
:return: Results of text found for the files
|
|
786
900
|
:rtype: list[dict]
|
|
787
901
|
"""
|
|
788
|
-
# create an empty list to hold all strings from parsed documents
|
|
789
902
|
full_text: list[dict] = []
|
|
790
903
|
progress.update(task, advance=1)
|
|
791
|
-
|
|
904
|
+
|
|
792
905
|
folder_list = os.listdir(evidence_folder)
|
|
793
906
|
progress.update(task, advance=1)
|
|
794
|
-
# remove all folders that start with '.'
|
|
795
907
|
removed_folders_list = remove(folder_list)
|
|
796
908
|
progress.update(task, advance=1)
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
for
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
full_text.append({"program": folder, "file": filename, "text": output})
|
|
810
|
-
elif filename.endswith(".pdf"):
|
|
811
|
-
# create empty list to hold text per file
|
|
812
|
-
output_text_list: list[str] = []
|
|
813
|
-
# open filename with pdfplumber
|
|
814
|
-
with pdfplumber.open(filename) as pdf:
|
|
815
|
-
# set number of pages
|
|
816
|
-
pages = pdf.pages
|
|
817
|
-
# for each page in the pdf document
|
|
818
|
-
for page in pages:
|
|
819
|
-
# extract the text
|
|
820
|
-
text = page.extract_text()
|
|
821
|
-
# write the text to a list
|
|
822
|
-
output_text_list.append(text)
|
|
823
|
-
# add each file and the requisite text to the dictionary to test
|
|
824
|
-
full_text.append(
|
|
825
|
-
{
|
|
826
|
-
"program": folder,
|
|
827
|
-
"file": filename,
|
|
828
|
-
"text": output_text_list,
|
|
829
|
-
}
|
|
830
|
-
)
|
|
909
|
+
|
|
910
|
+
# Check if there are subdirectories
|
|
911
|
+
subdirs = [f for f in removed_folders_list if os.path.isdir(os.path.join(evidence_folder, f))]
|
|
912
|
+
|
|
913
|
+
if subdirs:
|
|
914
|
+
for folder in subdirs:
|
|
915
|
+
folder_path = os.path.join(evidence_folder, folder)
|
|
916
|
+
full_text.extend(_process_files_in_folder(folder_path, folder))
|
|
917
|
+
else:
|
|
918
|
+
# No subdirectories, process files directly in evidence folder
|
|
919
|
+
full_text.extend(_process_files_in_folder(evidence_folder, "evidence"))
|
|
920
|
+
|
|
831
921
|
progress.update(task, advance=1)
|
|
832
|
-
# return variable for use outside of local scope
|
|
833
922
|
return full_text
|
|
834
923
|
|
|
835
924
|
|
|
@@ -924,17 +1013,22 @@ def gather_test_project_data(api: Api, evidence_folder: str, task: TaskID, progr
|
|
|
924
1013
|
test_data: list[dict] = []
|
|
925
1014
|
progress.update(task, advance=1)
|
|
926
1015
|
# test project information created in RegScale UI
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
1016
|
+
list_file = evidence_folder + os.sep + "list.json"
|
|
1017
|
+
if os.path.exists(list_file):
|
|
1018
|
+
with open(list_file, "r", encoding="utf-8") as json_file:
|
|
1019
|
+
# load json object into a readable dictionary
|
|
1020
|
+
lists = json.load(json_file)
|
|
1021
|
+
# loop through projects in the list.json
|
|
1022
|
+
test_data.extend(
|
|
1023
|
+
{
|
|
1024
|
+
"id": lists["parser-list"][i].get("id"),
|
|
1025
|
+
"program": lists["parser-list"][i].get("folder-name"),
|
|
1026
|
+
}
|
|
1027
|
+
for i in range(len(lists.get("parser-list", [])))
|
|
1028
|
+
)
|
|
1029
|
+
else:
|
|
1030
|
+
# No list.json, skip project data - evidence collection can work without it
|
|
1031
|
+
test_data = []
|
|
938
1032
|
progress.update(task, advance=1)
|
|
939
1033
|
# create empty list to hold json response data for each project
|
|
940
1034
|
test_info: list[dict] = []
|
|
@@ -956,7 +1050,7 @@ def gather_test_project_data(api: Api, evidence_folder: str, task: TaskID, progr
|
|
|
956
1050
|
}
|
|
957
1051
|
)
|
|
958
1052
|
else:
|
|
959
|
-
api.logger.
|
|
1053
|
+
api.logger.warning(f"Project data retrieval was unsuccessful for ID {item['id']}, skipping this project.")
|
|
960
1054
|
progress.update(task, advance=1)
|
|
961
1055
|
# return variables for use outside of local scope
|
|
962
1056
|
return test_info
|
|
@@ -1031,19 +1125,8 @@ def assessments_report(
|
|
|
1031
1125
|
:rtype: list[dict]
|
|
1032
1126
|
"""
|
|
1033
1127
|
progress.update(task, advance=1)
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
# append all results to 1 master list
|
|
1037
|
-
assessment_report.extend(iter(textres))
|
|
1038
|
-
progress.update(task, advance=1)
|
|
1039
|
-
# append all results to 1 master list
|
|
1040
|
-
assessment_report.extend(iter(timeres))
|
|
1041
|
-
progress.update(task, advance=1)
|
|
1042
|
-
# append all results to 1 master list
|
|
1043
|
-
assessment_report.extend(iter(sigres))
|
|
1044
|
-
progress.update(task, advance=1)
|
|
1045
|
-
# return variable for use outside of local scope
|
|
1046
|
-
return assessment_report
|
|
1128
|
+
# combine all results into one master list
|
|
1129
|
+
return docres + textres + timeres + sigres
|
|
1047
1130
|
|
|
1048
1131
|
|
|
1049
1132
|
def build_assessment_dataframe(assessments: list[dict], task: TaskID, progress: Progress) -> list[dict]:
|
|
@@ -1061,6 +1144,11 @@ def build_assessment_dataframe(assessments: list[dict], task: TaskID, progress:
|
|
|
1061
1144
|
|
|
1062
1145
|
result_df = pd.DataFrame(assessments)
|
|
1063
1146
|
progress.update(task, advance=1)
|
|
1147
|
+
|
|
1148
|
+
# Check if dataframe is empty
|
|
1149
|
+
if result_df.empty:
|
|
1150
|
+
return []
|
|
1151
|
+
|
|
1064
1152
|
# fill in NaN cells
|
|
1065
1153
|
result_df = result_df.fillna(" ")
|
|
1066
1154
|
progress.update(task, advance=1)
|
|
@@ -1148,9 +1236,21 @@ def build_html_table(assessments: list[dict], task: TaskID, progress: Progress)
|
|
|
1148
1236
|
import pandas as pd # Optimize import performance
|
|
1149
1237
|
|
|
1150
1238
|
output_list: list[dict] = []
|
|
1239
|
+
|
|
1240
|
+
# Check if assessments is empty
|
|
1241
|
+
if not assessments:
|
|
1242
|
+
progress.update(task, advance=4) # Skip all remaining progress updates
|
|
1243
|
+
return output_list
|
|
1244
|
+
|
|
1151
1245
|
# create a dataframe of a list of dicts
|
|
1152
1246
|
table_df = pd.DataFrame(data=assessments)
|
|
1153
1247
|
progress.update(task, advance=1)
|
|
1248
|
+
|
|
1249
|
+
# Check if dataframe is empty or missing required columns
|
|
1250
|
+
if table_df.empty or "program" not in table_df.columns:
|
|
1251
|
+
progress.update(task, advance=3) # Skip remaining progress updates
|
|
1252
|
+
return output_list
|
|
1253
|
+
|
|
1154
1254
|
# fill in N/A cells with blank string
|
|
1155
1255
|
table_df = table_df.fillna(" ")
|
|
1156
1256
|
progress.update(task, advance=1)
|
|
@@ -1197,6 +1297,12 @@ def create_child_assessments(
|
|
|
1197
1297
|
# set completion datetime to required format
|
|
1198
1298
|
completion_date = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
|
|
1199
1299
|
progress.update(task, advance=1)
|
|
1300
|
+
|
|
1301
|
+
# Check if we have project data to work with
|
|
1302
|
+
if not project_data:
|
|
1303
|
+
progress.update(task, advance=1)
|
|
1304
|
+
return
|
|
1305
|
+
|
|
1200
1306
|
# loop through test projects and make an API call for each
|
|
1201
1307
|
for i, project in enumerate(project_data):
|
|
1202
1308
|
# call score calculation function
|