ethyca-fides 2.63.1b0__py2.py3-none-any.whl → 2.63.1b1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ethyca_fides-2.63.1b0.dist-info → ethyca_fides-2.63.1b1.dist-info}/METADATA +1 -1
- {ethyca_fides-2.63.1b0.dist-info → ethyca_fides-2.63.1b1.dist-info}/RECORD +107 -104
- fides/_version.py +3 -3
- fides/api/alembic/migrations/versions/bf713b5a021d_staged_resource_ancestor_link_data_.py +20 -11
- fides/api/migrations/post_upgrade_index_creation.py +3 -3
- fides/api/models/attachment.py +36 -23
- fides/api/service/privacy_request/attachment_handling.py +132 -0
- fides/api/service/privacy_request/dsr_package/dsr_report_builder.py +264 -46
- fides/api/service/privacy_request/dsr_package/templates/attachments_index.html +33 -0
- fides/api/service/privacy_request/dsr_package/templates/collection_index.html +34 -9
- fides/api/service/privacy_request/dsr_package/templates/main.css +45 -2
- fides/api/service/privacy_request/dsr_package/templates/welcome.html +12 -8
- fides/api/service/privacy_request/request_runner_service.py +258 -139
- fides/api/service/storage/gcs.py +15 -3
- fides/api/service/storage/s3.py +28 -14
- fides/api/service/storage/util.py +45 -7
- fides/api/tasks/csv_utils.py +170 -0
- fides/api/tasks/encryption_utils.py +42 -0
- fides/api/tasks/storage.py +85 -91
- fides/service/messaging/aws_ses_service.py +5 -1
- fides/ui-build/static/admin/404.html +1 -1
- fides/ui-build/static/admin/add-systems/manual.html +1 -1
- fides/ui-build/static/admin/add-systems/multiple.html +1 -1
- fides/ui-build/static/admin/add-systems.html +1 -1
- fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
- fides/ui-build/static/admin/consent/configure.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
- fides/ui-build/static/admin/consent/properties.html +1 -1
- fides/ui-build/static/admin/consent/reporting.html +1 -1
- fides/ui-build/static/admin/consent.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
- fides/ui-build/static/admin/data-catalog.html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
- fides/ui-build/static/admin/data-discovery/activity.html +1 -1
- fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/detection.html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
- fides/ui-build/static/admin/datamap.html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
- fides/ui-build/static/admin/dataset/new.html +1 -1
- fides/ui-build/static/admin/dataset.html +1 -1
- fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
- fides/ui-build/static/admin/datastore-connection/new.html +1 -1
- fides/ui-build/static/admin/datastore-connection.html +1 -1
- fides/ui-build/static/admin/index.html +1 -1
- fides/ui-build/static/admin/integrations/[id].html +1 -1
- fides/ui-build/static/admin/integrations.html +1 -1
- fides/ui-build/static/admin/login/[provider].html +1 -1
- fides/ui-build/static/admin/login.html +1 -1
- fides/ui-build/static/admin/messaging/[id].html +1 -1
- fides/ui-build/static/admin/messaging/add-template.html +1 -1
- fides/ui-build/static/admin/messaging.html +1 -1
- fides/ui-build/static/admin/poc/ant-components.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
- fides/ui-build/static/admin/poc/forms.html +1 -1
- fides/ui-build/static/admin/poc/table-migration.html +1 -1
- fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
- fides/ui-build/static/admin/privacy-requests.html +1 -1
- fides/ui-build/static/admin/properties/[id].html +1 -1
- fides/ui-build/static/admin/properties/add-property.html +1 -1
- fides/ui-build/static/admin/properties.html +1 -1
- fides/ui-build/static/admin/reporting/datamap.html +1 -1
- fides/ui-build/static/admin/settings/about/alpha.html +1 -1
- fides/ui-build/static/admin/settings/about.html +1 -1
- fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
- fides/ui-build/static/admin/settings/consent.html +1 -1
- fides/ui-build/static/admin/settings/custom-fields.html +1 -1
- fides/ui-build/static/admin/settings/domain-records.html +1 -1
- fides/ui-build/static/admin/settings/domains.html +1 -1
- fides/ui-build/static/admin/settings/email-templates.html +1 -1
- fides/ui-build/static/admin/settings/locations.html +1 -1
- fides/ui-build/static/admin/settings/organization.html +1 -1
- fides/ui-build/static/admin/settings/regulations.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id].html +1 -1
- fides/ui-build/static/admin/systems.html +1 -1
- fides/ui-build/static/admin/taxonomy.html +1 -1
- fides/ui-build/static/admin/user-management/new.html +1 -1
- fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
- fides/ui-build/static/admin/user-management.html +1 -1
- fides/api/service/privacy_request/dsr_package/templates/item.html +0 -37
- {ethyca_fides-2.63.1b0.dist-info → ethyca_fides-2.63.1b1.dist-info}/WHEEL +0 -0
- {ethyca_fides-2.63.1b0.dist-info → ethyca_fides-2.63.1b1.dist-info}/entry_points.txt +0 -0
- {ethyca_fides-2.63.1b0.dist-info → ethyca_fides-2.63.1b1.dist-info}/licenses/LICENSE +0 -0
- {ethyca_fides-2.63.1b0.dist-info → ethyca_fides-2.63.1b1.dist-info}/top_level.txt +0 -0
- /fides/ui-build/static/admin/_next/static/{j0v5qPG9TaezfK2WMkHhI → 74KgkHM2cEVIXGgJPlTZ3}/_buildManifest.js +0 -0
- /fides/ui-build/static/admin/_next/static/{j0v5qPG9TaezfK2WMkHhI → 74KgkHM2cEVIXGgJPlTZ3}/_ssgManifest.js +0 -0
@@ -0,0 +1,132 @@
|
|
1
|
+
import time as time_module
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from typing import Any, Dict, Iterator, List, Optional, Tuple
|
4
|
+
|
5
|
+
from loguru import logger
|
6
|
+
|
7
|
+
from fides.api.models.attachment import Attachment, AttachmentType
|
8
|
+
from fides.api.schemas.storage.storage import StorageDetails
|
9
|
+
|
10
|
+
|
11
|
+
@dataclass
|
12
|
+
class AttachmentData:
|
13
|
+
"""Data structure for attachment metadata and content.
|
14
|
+
Using a dataclass rather than a Pydantic model here for the following reasons:
|
15
|
+
- The data structure is simple and doesn't need complex validation.
|
16
|
+
- The fields being used have already been validated and are properly typed.
|
17
|
+
- The class is used internally for data transfer, not for API serialization.
|
18
|
+
- Performance is important since this is used in a data processing pipeline.
|
19
|
+
"""
|
20
|
+
|
21
|
+
file_name: str
|
22
|
+
file_size: Optional[int]
|
23
|
+
download_url: Optional[str]
|
24
|
+
content_type: str
|
25
|
+
bucket_name: str
|
26
|
+
file_key: str
|
27
|
+
storage_key: str
|
28
|
+
|
29
|
+
def to_upload_dict(self) -> Dict[str, Any]:
|
30
|
+
"""Convert to dictionary for upload, including presigned URL."""
|
31
|
+
return {
|
32
|
+
"file_name": self.file_name,
|
33
|
+
"file_size": self.file_size,
|
34
|
+
"download_url": self.download_url,
|
35
|
+
"content_type": self.content_type,
|
36
|
+
}
|
37
|
+
|
38
|
+
def to_storage_dict(self) -> Dict[str, Any]:
|
39
|
+
"""Convert to dictionary for storage, including the elements needed to recreated the presigned URL."""
|
40
|
+
return {
|
41
|
+
"file_name": self.file_name,
|
42
|
+
"file_size": self.file_size,
|
43
|
+
"content_type": self.content_type,
|
44
|
+
"bucket_name": self.bucket_name,
|
45
|
+
"file_key": self.file_key,
|
46
|
+
"storage_key": self.storage_key,
|
47
|
+
}
|
48
|
+
|
49
|
+
|
50
|
+
def get_attachments_content(
|
51
|
+
loaded_attachments: List[Attachment],
|
52
|
+
) -> Iterator[AttachmentData]:
|
53
|
+
"""
|
54
|
+
Retrieves all attachments associated with a privacy request that are marked to be included with the access package.
|
55
|
+
Yields AttachmentData objects containing attachment metadata and download urls.
|
56
|
+
Uses generators to minimize memory usage.
|
57
|
+
|
58
|
+
Args:
|
59
|
+
loaded_attachments: List of Attachment objects to process
|
60
|
+
|
61
|
+
Yields:
|
62
|
+
AttachmentData object containing attachment metadata and url
|
63
|
+
"""
|
64
|
+
start_time = time_module.time()
|
65
|
+
processed_count = 0
|
66
|
+
skipped_count = 0
|
67
|
+
error_count = 0
|
68
|
+
total_size = 0
|
69
|
+
|
70
|
+
for attachment in loaded_attachments:
|
71
|
+
if attachment.attachment_type != AttachmentType.include_with_access_package:
|
72
|
+
skipped_count += 1
|
73
|
+
continue
|
74
|
+
|
75
|
+
try:
|
76
|
+
# Get size and download URL using retrieve_attachment
|
77
|
+
size, url = attachment.retrieve_attachment()
|
78
|
+
total_size += size if size else 0
|
79
|
+
if url is None:
|
80
|
+
logger.warning(
|
81
|
+
"No download URL retrieved for attachment {}", attachment.file_name
|
82
|
+
)
|
83
|
+
skipped_count += 1
|
84
|
+
continue
|
85
|
+
|
86
|
+
processed_count += 1
|
87
|
+
yield AttachmentData(
|
88
|
+
file_name=attachment.file_name,
|
89
|
+
file_size=size,
|
90
|
+
download_url=str(url) if url else None,
|
91
|
+
content_type=attachment.content_type,
|
92
|
+
bucket_name=attachment.config.details[StorageDetails.BUCKET.value],
|
93
|
+
file_key=attachment.file_key,
|
94
|
+
storage_key=attachment.storage_key,
|
95
|
+
)
|
96
|
+
|
97
|
+
except Exception as e:
|
98
|
+
error_count += 1
|
99
|
+
logger.error(
|
100
|
+
"Error processing attachment {}: {}", attachment.file_name, str(e)
|
101
|
+
)
|
102
|
+
continue
|
103
|
+
|
104
|
+
# Log final metrics
|
105
|
+
time_taken = time_module.time() - start_time
|
106
|
+
logger.bind(
|
107
|
+
time_to_process=time_taken,
|
108
|
+
total_attachments=len(loaded_attachments),
|
109
|
+
processed_attachments=processed_count,
|
110
|
+
skipped_attachments=skipped_count,
|
111
|
+
error_attachments=error_count,
|
112
|
+
total_size_bytes=total_size,
|
113
|
+
).info("Attachment processing complete")
|
114
|
+
|
115
|
+
|
116
|
+
def process_attachments_for_upload(
|
117
|
+
attachments: Iterator[AttachmentData],
|
118
|
+
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
119
|
+
"""
|
120
|
+
Process attachments into separate upload and storage formats.
|
121
|
+
Returns both formats:
|
122
|
+
- upload_attachments: Used for uploading to access packages
|
123
|
+
- storage_attachments: Used for saving filtered access results
|
124
|
+
"""
|
125
|
+
upload_attachments = []
|
126
|
+
storage_attachments = []
|
127
|
+
|
128
|
+
for attachment in attachments:
|
129
|
+
storage_attachments.append(attachment.to_storage_dict())
|
130
|
+
upload_attachments.append(attachment.to_upload_dict())
|
131
|
+
|
132
|
+
return upload_attachments, storage_attachments
|
@@ -1,13 +1,15 @@
|
|
1
1
|
import json
|
2
2
|
import os
|
3
|
+
import time as time_module
|
3
4
|
import zipfile
|
4
5
|
from collections import defaultdict
|
5
6
|
from io import BytesIO
|
6
7
|
from pathlib import Path
|
7
|
-
from typing import Any,
|
8
|
+
from typing import Any, Optional
|
8
9
|
|
9
10
|
import jinja2
|
10
11
|
from jinja2 import Environment, FileSystemLoader
|
12
|
+
from loguru import logger
|
11
13
|
|
12
14
|
from fides.api.models.privacy_request import PrivacyRequest
|
13
15
|
from fides.api.schemas.policy import ActionType
|
@@ -22,54 +24,79 @@ BORDER_COLOR = "#E2E8F0"
|
|
22
24
|
|
23
25
|
# pylint: disable=too-many-instance-attributes
|
24
26
|
class DsrReportBuilder:
|
27
|
+
"""
|
28
|
+
Manages populating HTML templates from the given data and adding the generated
|
29
|
+
pages to a zip file in a way that the pages can be navigated between.
|
30
|
+
|
31
|
+
The zip file is structured as follows:
|
32
|
+
- welcome.html: the main index page
|
33
|
+
- data/dataset_name/index.html: the index page for the dataset
|
34
|
+
- data/dataset_name/collection_name/index.html: the index page for the collection
|
35
|
+
- data/dataset_name/collection_name/item_index.html: the detail page for the item
|
36
|
+
- attachments/index.html: the index page for the attachments
|
37
|
+
|
38
|
+
Args:
|
39
|
+
privacy_request: the privacy request object
|
40
|
+
dsr_data: the DSR data
|
41
|
+
"""
|
42
|
+
|
25
43
|
def __init__(
|
26
44
|
self,
|
27
45
|
privacy_request: PrivacyRequest,
|
28
|
-
dsr_data:
|
46
|
+
dsr_data: dict[str, Any],
|
29
47
|
):
|
30
48
|
"""
|
31
|
-
|
32
|
-
pages to a zip file in a way that the pages can be navigated between.
|
49
|
+
Initializes the DSR report builder.
|
33
50
|
"""
|
51
|
+
# Define pretty_print function for Jinja templates
|
52
|
+
jinja2.filters.FILTERS["pretty_print"] = lambda value, indent=4: json.dumps(
|
53
|
+
value, indent=indent, cls=StorageJSONEncoder
|
54
|
+
)
|
34
55
|
|
35
|
-
# zip file variables
|
56
|
+
# Initialize instance zip file variables
|
36
57
|
self.baos = BytesIO()
|
37
58
|
|
38
59
|
# we close this in the finally block of generate()
|
39
60
|
# pylint: disable=consider-using-with
|
40
61
|
self.out = zipfile.ZipFile(self.baos, "w")
|
41
|
-
|
42
|
-
# Jinja template environment initialization
|
43
|
-
def pretty_print(value: str, indent: int = 4) -> str:
|
44
|
-
return json.dumps(
|
45
|
-
value, indent=indent, default=StorageJSONEncoder().default
|
46
|
-
)
|
47
|
-
|
48
|
-
jinja2.filters.FILTERS["pretty_print"] = pretty_print
|
49
62
|
self.template_loader = Environment(
|
50
63
|
loader=FileSystemLoader(DSR_DIRECTORY), autoescape=True
|
51
64
|
)
|
52
65
|
|
53
66
|
# to pass in custom colors in the future
|
54
|
-
self.template_data:
|
67
|
+
self.template_data: dict[str, Any] = {
|
55
68
|
"text_color": TEXT_COLOR,
|
56
69
|
"header_color": HEADER_COLOR,
|
57
70
|
"border_color": BORDER_COLOR,
|
58
71
|
}
|
59
|
-
self.main_links:
|
72
|
+
self.main_links: dict[str, Any] = {} # used to track the generated pages
|
60
73
|
|
61
74
|
# report data to populate the templates
|
62
75
|
self.request_data = _map_privacy_request(privacy_request)
|
63
76
|
self.dsr_data = dsr_data
|
64
77
|
|
78
|
+
# Track used filenames across all attachments
|
79
|
+
self.used_filenames: set[str] = set()
|
80
|
+
|
65
81
|
def _populate_template(
|
66
82
|
self,
|
67
83
|
template_path: str,
|
68
84
|
heading: Optional[str] = None,
|
69
85
|
description: Optional[str] = None,
|
70
|
-
data: Optional[
|
86
|
+
data: Optional[dict[str, Any]] = None,
|
71
87
|
) -> str:
|
72
|
-
"""
|
88
|
+
"""
|
89
|
+
Populates the template with the given data.
|
90
|
+
|
91
|
+
Args:
|
92
|
+
template_path: the path to the template to populate
|
93
|
+
heading: the heading to display on the template
|
94
|
+
description: the description to display on the template
|
95
|
+
data: the data to populate the template with
|
96
|
+
|
97
|
+
Returns:
|
98
|
+
The rendered template as a string.
|
99
|
+
"""
|
73
100
|
report_data = {
|
74
101
|
"heading": heading,
|
75
102
|
"description": description,
|
@@ -82,14 +109,24 @@ class DsrReportBuilder:
|
|
82
109
|
return rendered_template
|
83
110
|
|
84
111
|
def _add_file(self, filename: str, contents: str) -> None:
|
85
|
-
"""
|
112
|
+
"""
|
113
|
+
Adds a file to the zip file.
|
114
|
+
|
115
|
+
Args:
|
116
|
+
filename: the name of the file to add
|
117
|
+
contents: the contents of the file to add
|
118
|
+
"""
|
86
119
|
if filename and contents:
|
87
120
|
self.out.writestr(f"{filename}", contents.encode("utf-8"))
|
88
121
|
|
89
|
-
def _add_dataset(self, dataset_name: str, collections:
|
122
|
+
def _add_dataset(self, dataset_name: str, collections: dict[str, Any]) -> None:
|
90
123
|
"""
|
91
124
|
Generates a page for each collection in the dataset and an index page for the dataset.
|
92
125
|
Tracks the generated links to build a root level index after each collection has been processed.
|
126
|
+
|
127
|
+
Args:
|
128
|
+
dataset_name: the name of the dataset to add
|
129
|
+
collections: the collections to add to the dataset
|
93
130
|
"""
|
94
131
|
# track links to collection indexes
|
95
132
|
collection_links = {}
|
@@ -109,40 +146,203 @@ class DsrReportBuilder:
|
|
109
146
|
),
|
110
147
|
)
|
111
148
|
|
149
|
+
def _get_unique_filename(self, filename: str) -> str:
|
150
|
+
"""
|
151
|
+
Generates a unique filename by appending a counter if the file already exists.
|
152
|
+
Now tracks filenames across all directories to ensure global uniqueness.
|
153
|
+
|
154
|
+
Args:
|
155
|
+
filename: The original filename
|
156
|
+
|
157
|
+
Returns:
|
158
|
+
A unique filename that won't conflict with existing files
|
159
|
+
"""
|
160
|
+
base_name, extension = os.path.splitext(filename)
|
161
|
+
counter = 1
|
162
|
+
unique_filename = filename
|
163
|
+
|
164
|
+
# Check if file exists in used_filenames set
|
165
|
+
while unique_filename in self.used_filenames:
|
166
|
+
unique_filename = f"{base_name}_{counter}{extension}"
|
167
|
+
counter += 1
|
168
|
+
|
169
|
+
# Add the new filename to the set
|
170
|
+
self.used_filenames.add(unique_filename)
|
171
|
+
return unique_filename
|
172
|
+
|
173
|
+
def _write_attachment_content(
|
174
|
+
self,
|
175
|
+
attachments: list[dict[str, Any]],
|
176
|
+
directory: str,
|
177
|
+
) -> dict[str, dict[str, str]]:
|
178
|
+
"""
|
179
|
+
Processes attachments and returns a dictionary mapping filenames to their download URLs and sizes.
|
180
|
+
|
181
|
+
Args:
|
182
|
+
attachments: The attachments to process
|
183
|
+
directory: The directory path (unused for presigned URLs)
|
184
|
+
|
185
|
+
Returns:
|
186
|
+
Dictionary mapping filenames to dictionaries containing url and size
|
187
|
+
"""
|
188
|
+
# First process all attachments into a list of tuples (filename, data)
|
189
|
+
processed_attachments = []
|
190
|
+
|
191
|
+
for attachment in attachments:
|
192
|
+
if not isinstance(attachment, dict):
|
193
|
+
continue
|
194
|
+
|
195
|
+
file_name = attachment.get("file_name")
|
196
|
+
if not file_name:
|
197
|
+
logger.warning("Skipping attachment with no file name")
|
198
|
+
continue
|
199
|
+
|
200
|
+
download_url = attachment.get("download_url")
|
201
|
+
if not download_url:
|
202
|
+
logger.warning("Skipping attachment with no download URL")
|
203
|
+
continue
|
204
|
+
|
205
|
+
file_size = attachment.get("file_size")
|
206
|
+
if isinstance(file_size, (int, float)):
|
207
|
+
file_size = self._format_size(float(file_size))
|
208
|
+
else:
|
209
|
+
file_size = "Unknown"
|
210
|
+
|
211
|
+
# Get a unique filename to prevent duplicates
|
212
|
+
unique_filename = self._get_unique_filename(file_name)
|
213
|
+
|
214
|
+
# Add to processed attachments
|
215
|
+
processed_attachments.append(
|
216
|
+
(unique_filename, {"url": download_url, "size": file_size})
|
217
|
+
)
|
218
|
+
|
219
|
+
# Convert list of tuples to dictionary
|
220
|
+
return dict(processed_attachments)
|
221
|
+
|
112
222
|
def _add_collection(
|
113
|
-
self, rows:
|
223
|
+
self, rows: list[dict[str, Any]], dataset_name: str, collection_name: str
|
114
224
|
) -> None:
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
225
|
+
"""
|
226
|
+
Adds a collection to the zip file.
|
227
|
+
|
228
|
+
Args:
|
229
|
+
rows: the rows to add to the collection
|
230
|
+
dataset_name: the name of the dataset to add the collection to
|
231
|
+
collection_name: the name of the collection to add
|
232
|
+
"""
|
233
|
+
items_content = []
|
234
|
+
|
235
|
+
for index, collection_item in enumerate(rows, 1):
|
236
|
+
# Create a copy of the item data to avoid modifying the original
|
237
|
+
item_data = collection_item.copy()
|
238
|
+
|
239
|
+
# Process any attachments in the item
|
240
|
+
if "attachments" in item_data and isinstance(
|
241
|
+
item_data["attachments"], list
|
242
|
+
):
|
243
|
+
# Process attachments and get their URLs
|
244
|
+
attachment_links = self._write_attachment_content(
|
245
|
+
item_data["attachments"],
|
246
|
+
f"data/{dataset_name}/{collection_name}",
|
247
|
+
)
|
248
|
+
# Add the attachment URLs to the item data
|
249
|
+
item_data["attachments"] = attachment_links
|
250
|
+
|
251
|
+
# Add item content to the list
|
252
|
+
items_content.append(
|
253
|
+
{
|
254
|
+
"index": index,
|
255
|
+
"heading": f"{collection_name} (item #{index})",
|
256
|
+
"data": item_data,
|
257
|
+
}
|
127
258
|
)
|
128
|
-
detail_links[f"item #{index}"] = detail_url
|
129
259
|
|
130
|
-
#
|
260
|
+
# Generate the collection index page
|
131
261
|
self._add_file(
|
132
262
|
f"data/{dataset_name}/{collection_name}/index.html",
|
133
263
|
self._populate_template(
|
134
264
|
"templates/collection_index.html",
|
135
265
|
collection_name,
|
136
266
|
None,
|
137
|
-
|
267
|
+
{"collection_items": items_content},
|
268
|
+
),
|
269
|
+
)
|
270
|
+
|
271
|
+
def _add_attachments(self, attachments: list[dict[str, Any]]) -> None:
|
272
|
+
"""
|
273
|
+
Adds top-level attachments to the zip file.
|
274
|
+
|
275
|
+
Args:
|
276
|
+
attachments: the attachments to add
|
277
|
+
"""
|
278
|
+
if not attachments or not isinstance(attachments, list):
|
279
|
+
return
|
280
|
+
|
281
|
+
# Process attachments and get the links
|
282
|
+
attachment_links = self._write_attachment_content(attachments, "attachments")
|
283
|
+
|
284
|
+
# Generate attachments index page using the attachments index template
|
285
|
+
self._add_file(
|
286
|
+
"attachments/index.html",
|
287
|
+
self._populate_template(
|
288
|
+
"templates/attachments_index.html",
|
289
|
+
"Attachments",
|
290
|
+
"Files attached to this privacy request",
|
291
|
+
attachment_links,
|
138
292
|
),
|
139
293
|
)
|
140
294
|
|
295
|
+
def _get_datasets_from_dsr_data(self) -> dict[str, Any]:
|
296
|
+
"""
|
297
|
+
Returns the datasets from the DSR data.
|
298
|
+
"""
|
299
|
+
# pre-process data to split the dataset:collection keys
|
300
|
+
datasets: dict[str, Any] = defaultdict(lambda: defaultdict(list))
|
301
|
+
for key, rows in self.dsr_data.items():
|
302
|
+
|
303
|
+
# we handle attachments separately
|
304
|
+
if key == "attachments":
|
305
|
+
continue
|
306
|
+
|
307
|
+
parts = key.split(":", 1)
|
308
|
+
if len(parts) > 1:
|
309
|
+
dataset_name, collection_name = parts
|
310
|
+
else:
|
311
|
+
for row in rows:
|
312
|
+
if "system_name" in row:
|
313
|
+
dataset_name = row["system_name"]
|
314
|
+
collection_name = parts[0]
|
315
|
+
break
|
316
|
+
else:
|
317
|
+
dataset_name = "manual"
|
318
|
+
collection_name = parts[0]
|
319
|
+
|
320
|
+
datasets[dataset_name][collection_name].extend(rows)
|
321
|
+
|
322
|
+
return datasets
|
323
|
+
|
324
|
+
def _format_size(self, size_bytes: float) -> str:
|
325
|
+
"""
|
326
|
+
Format size in bytes to human readable format.
|
327
|
+
|
328
|
+
Args:
|
329
|
+
size_bytes: Size in bytes
|
330
|
+
|
331
|
+
Returns:
|
332
|
+
Formatted string with appropriate unit (B, KB, MB, GB)
|
333
|
+
"""
|
334
|
+
for unit in ["B", "KB", "MB", "GB"]:
|
335
|
+
if size_bytes < 1024.0:
|
336
|
+
return f"{size_bytes:.1f} {unit}"
|
337
|
+
size_bytes /= 1024.0
|
338
|
+
return f"{size_bytes:.1f} TB"
|
339
|
+
|
141
340
|
def generate(self) -> BytesIO:
|
142
341
|
"""
|
143
342
|
Processes the request and DSR data to build zip file containing the DSR report.
|
144
343
|
Returns the zip file as an in-memory byte array.
|
145
344
|
"""
|
345
|
+
start_time = time_module.time()
|
146
346
|
try:
|
147
347
|
# all the css for the pages is in main.css
|
148
348
|
self._add_file(
|
@@ -157,18 +357,28 @@ class DsrReportBuilder:
|
|
157
357
|
)
|
158
358
|
|
159
359
|
# pre-process data to split the dataset:collection keys
|
160
|
-
datasets:
|
161
|
-
for key, rows in self.dsr_data.items():
|
162
|
-
parts = key.split(":", 1)
|
163
|
-
dataset_name, collection_name = (
|
164
|
-
parts if len(parts) > 1 else ("manual", parts[0])
|
165
|
-
)
|
166
|
-
datasets[dataset_name][collection_name].extend(rows)
|
360
|
+
datasets: dict[str, Any] = self._get_datasets_from_dsr_data()
|
167
361
|
|
168
|
-
|
169
|
-
|
362
|
+
# Sort datasets alphabetically, excluding special cases
|
363
|
+
regular_datasets = [
|
364
|
+
name for name in sorted(datasets.keys()) if name != "dataset"
|
365
|
+
] # pylint: disable=invalid-name
|
366
|
+
|
367
|
+
# Add regular datasets in alphabetical order
|
368
|
+
for dataset_name in regular_datasets:
|
369
|
+
self._add_dataset(dataset_name, datasets[dataset_name])
|
170
370
|
self.main_links[dataset_name] = f"data/{dataset_name}/index.html"
|
171
371
|
|
372
|
+
# Add Additional Data if it exists
|
373
|
+
if "dataset" in datasets:
|
374
|
+
self._add_dataset("dataset", datasets["dataset"])
|
375
|
+
self.main_links["Additional Data"] = "data/dataset/index.html"
|
376
|
+
|
377
|
+
# Add Additional Attachments last if it exists
|
378
|
+
if "attachments" in self.dsr_data:
|
379
|
+
self._add_attachments(self.dsr_data["attachments"])
|
380
|
+
self.main_links["Additional Attachments"] = "attachments/index.html"
|
381
|
+
|
172
382
|
# create the main index once all the datasets have been added
|
173
383
|
self._add_file(
|
174
384
|
"welcome.html",
|
@@ -182,12 +392,20 @@ class DsrReportBuilder:
|
|
182
392
|
|
183
393
|
# reset the file pointer so the file can be fully read by the caller
|
184
394
|
self.baos.seek(0)
|
395
|
+
|
396
|
+
# Calculate time taken and file size
|
397
|
+
time_taken = time_module.time() - start_time
|
398
|
+
file_size = self._format_size(float(len(self.baos.getvalue())))
|
399
|
+
|
400
|
+
logger.bind(time_to_generate=time_taken, dsr_package_size=file_size).info(
|
401
|
+
"DSR report generation complete."
|
402
|
+
)
|
185
403
|
return self.baos
|
186
404
|
|
187
405
|
|
188
|
-
def _map_privacy_request(privacy_request: PrivacyRequest) ->
|
406
|
+
def _map_privacy_request(privacy_request: PrivacyRequest) -> dict[str, Any]:
|
189
407
|
"""Creates a map with a subset of values from the privacy request"""
|
190
|
-
request_data:
|
408
|
+
request_data: dict[str, Any] = {}
|
191
409
|
request_data["id"] = privacy_request.id
|
192
410
|
|
193
411
|
action_type: Optional[ActionType] = privacy_request.policy.get_action_type()
|
@@ -0,0 +1,33 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600">
|
4
|
+
<link rel="stylesheet" href="../data/main.css">
|
5
|
+
</head>
|
6
|
+
<body>
|
7
|
+
<div class="container">
|
8
|
+
<div class="header"></div>
|
9
|
+
<div class="content">
|
10
|
+
<div class="button-container">
|
11
|
+
<a href="../welcome.html">
|
12
|
+
<div class="button"><img src="../data/back.svg"></div>
|
13
|
+
<span>Back to main page</span>
|
14
|
+
</a>
|
15
|
+
</div>
|
16
|
+
<h1>Attachments</h1>
|
17
|
+
<p class="expiration-notice">Note: All download links will expire in 7 days.</p>
|
18
|
+
<div class="table table-hover">
|
19
|
+
<div class="table-row">
|
20
|
+
<div class="table-cell" style="text-align: left;">File Name</div>
|
21
|
+
<div class="table-cell" style="text-align: left;">Size</div>
|
22
|
+
</div>
|
23
|
+
{% for name, info in data.items() %}
|
24
|
+
<a href="{{ info.url }}" class="table-row" target="_blank">
|
25
|
+
<div class="table-cell" style="text-align: left;">{{ name }}</div>
|
26
|
+
<div class="table-cell" style="text-align: left;">{{ info.size }}</div>
|
27
|
+
</a>
|
28
|
+
{% endfor %}
|
29
|
+
</div>
|
30
|
+
</div>
|
31
|
+
</div>
|
32
|
+
</body>
|
33
|
+
</html>
|
@@ -14,17 +14,42 @@
|
|
14
14
|
</a>
|
15
15
|
</div>
|
16
16
|
<h1>{{ heading }}</h1>
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
{% for item in data.collection_items %}
|
18
|
+
<div class="item-section">
|
19
|
+
<h2>{{ item.heading }}</h2>
|
20
|
+
<div class="table">
|
21
|
+
<div class="table-row">
|
22
|
+
<div class="table-cell">Field</div>
|
23
|
+
<div class="table-cell">Value</div>
|
24
|
+
</div>
|
25
|
+
{% for field, value in item.data.items() %}
|
26
|
+
<div class="table-row">
|
27
|
+
<div class="table-cell">{{ field }}</div>
|
28
|
+
<div class="table-cell">
|
29
|
+
{% if field == "attachments" and value is mapping and value|length > 0 %}
|
30
|
+
<p class="expiration-notice">Note: All download links will expire in 7 days.</p>
|
31
|
+
<div class="table table-hover">
|
32
|
+
<div class="table-row">
|
33
|
+
<div class="table-cell" style="text-align: left;">File Name</div>
|
34
|
+
<div class="table-cell" style="text-align: left;">Size</div>
|
35
|
+
</div>
|
36
|
+
{% for attachment_name, attachment_info in value.items() %}
|
37
|
+
<a href="{{ attachment_info.url }}" class="table-row" target="_blank">
|
38
|
+
<div class="table-cell" style="text-align: left;">{{ attachment_name }}</div>
|
39
|
+
<div class="table-cell" style="text-align: left;">{{ attachment_info.size }}</div>
|
40
|
+
</a>
|
41
|
+
{% endfor %}
|
42
|
+
</div>
|
43
|
+
{% else %}
|
44
|
+
<pre>{{ value | pretty_print }}</pre>
|
45
|
+
{% endif %}
|
46
|
+
</div>
|
47
|
+
</div>
|
48
|
+
{% endfor %}
|
20
49
|
</div>
|
21
|
-
{% for name, link in data.items() %}
|
22
|
-
<a href="{{ link }}" class="table-row">
|
23
|
-
<div class="table-cell">{{ name }}</div>
|
24
|
-
</a>
|
25
|
-
{% endfor %}
|
26
50
|
</div>
|
51
|
+
{% endfor %}
|
27
52
|
</div>
|
28
53
|
</div>
|
29
54
|
</body>
|
30
|
-
</html>
|
55
|
+
</html>
|