ethyca-fides 2.69.0rc9__py2.py3-none-any.whl → 2.69.0rc10__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ethyca-fides might be problematic. Click here for more details.
- {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/METADATA +1 -1
- {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/RECORD +98 -98
- fides/_version.py +3 -3
- fides/api/service/privacy_request/dsr_package/dsr_report_builder.py +253 -71
- fides/api/service/privacy_request/dsr_package/templates/attachments_index.html +4 -2
- fides/api/service/privacy_request/dsr_package/templates/collection_index.html +3 -1
- fides/api/service/privacy_request/dsr_package/templates/dataset_index.html +1 -1
- fides/api/service/privacy_request/request_runner_service.py +8 -2
- fides/api/service/storage/streaming/smart_open_streaming_storage.py +106 -169
- fides/api/service/storage/util.py +579 -0
- fides/api/task/manual/manual_task_graph_task.py +11 -9
- fides/ui-build/static/admin/404.html +1 -1
- fides/ui-build/static/admin/add-systems/manual.html +1 -1
- fides/ui-build/static/admin/add-systems/multiple.html +1 -1
- fides/ui-build/static/admin/add-systems.html +1 -1
- fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
- fides/ui-build/static/admin/consent/configure.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
- fides/ui-build/static/admin/consent/properties.html +1 -1
- fides/ui-build/static/admin/consent/reporting.html +1 -1
- fides/ui-build/static/admin/consent.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
- fides/ui-build/static/admin/data-catalog.html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
- fides/ui-build/static/admin/data-discovery/activity.html +1 -1
- fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/detection.html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
- fides/ui-build/static/admin/datamap.html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
- fides/ui-build/static/admin/dataset/new.html +1 -1
- fides/ui-build/static/admin/dataset.html +1 -1
- fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
- fides/ui-build/static/admin/datastore-connection/new.html +1 -1
- fides/ui-build/static/admin/datastore-connection.html +1 -1
- fides/ui-build/static/admin/index.html +1 -1
- fides/ui-build/static/admin/integrations/[id].html +1 -1
- fides/ui-build/static/admin/integrations.html +1 -1
- fides/ui-build/static/admin/login/[provider].html +1 -1
- fides/ui-build/static/admin/login.html +1 -1
- fides/ui-build/static/admin/messaging/[id].html +1 -1
- fides/ui-build/static/admin/messaging/add-template.html +1 -1
- fides/ui-build/static/admin/messaging.html +1 -1
- fides/ui-build/static/admin/poc/ant-components.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
- fides/ui-build/static/admin/poc/forms.html +1 -1
- fides/ui-build/static/admin/poc/table-migration.html +1 -1
- fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
- fides/ui-build/static/admin/privacy-requests.html +1 -1
- fides/ui-build/static/admin/properties/[id].html +1 -1
- fides/ui-build/static/admin/properties/add-property.html +1 -1
- fides/ui-build/static/admin/properties.html +1 -1
- fides/ui-build/static/admin/reporting/datamap.html +1 -1
- fides/ui-build/static/admin/settings/about/alpha.html +1 -1
- fides/ui-build/static/admin/settings/about.html +1 -1
- fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
- fides/ui-build/static/admin/settings/consent.html +1 -1
- fides/ui-build/static/admin/settings/custom-fields.html +1 -1
- fides/ui-build/static/admin/settings/domain-records.html +1 -1
- fides/ui-build/static/admin/settings/domains.html +1 -1
- fides/ui-build/static/admin/settings/email-templates.html +1 -1
- fides/ui-build/static/admin/settings/locations.html +1 -1
- fides/ui-build/static/admin/settings/organization.html +1 -1
- fides/ui-build/static/admin/settings/regulations.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id].html +1 -1
- fides/ui-build/static/admin/systems.html +1 -1
- fides/ui-build/static/admin/taxonomy.html +1 -1
- fides/ui-build/static/admin/user-management/new.html +1 -1
- fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
- fides/ui-build/static/admin/user-management.html +1 -1
- {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/WHEEL +0 -0
- {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/entry_points.txt +0 -0
- {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/licenses/LICENSE +0 -0
- {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/top_level.txt +0 -0
- /fides/ui-build/static/admin/_next/static/{XiHm-6CdVChTC5rbN9GtT → 8qfO1Ol3G3QbcXpHAnPlU}/_buildManifest.js +0 -0
- /fides/ui-build/static/admin/_next/static/{XiHm-6CdVChTC5rbN9GtT → 8qfO1Ol3G3QbcXpHAnPlU}/_ssgManifest.js +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
|
+
import copy
|
|
1
2
|
import json
|
|
2
3
|
import os
|
|
3
4
|
import time as time_module
|
|
4
5
|
import zipfile
|
|
5
|
-
from collections import defaultdict
|
|
6
6
|
from io import BytesIO
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from typing import TYPE_CHECKING, Any, Optional
|
|
@@ -12,7 +12,18 @@ from jinja2 import Environment, FileSystemLoader
|
|
|
12
12
|
from loguru import logger
|
|
13
13
|
|
|
14
14
|
from fides.api.schemas.policy import ActionType
|
|
15
|
+
from fides.api.service.storage.util import (
|
|
16
|
+
_get_datasets_from_dsr_data,
|
|
17
|
+
create_attachment_info_dict,
|
|
18
|
+
format_attachment_size,
|
|
19
|
+
generate_attachment_url_from_storage_path,
|
|
20
|
+
is_attachment_field,
|
|
21
|
+
process_attachment_naming,
|
|
22
|
+
process_attachments_contextually,
|
|
23
|
+
resolve_directory_from_context,
|
|
24
|
+
)
|
|
15
25
|
from fides.api.util.storage_util import StorageJSONEncoder, format_size
|
|
26
|
+
from fides.config import CONFIG
|
|
16
27
|
|
|
17
28
|
DSR_DIRECTORY = Path(__file__).parent.resolve()
|
|
18
29
|
|
|
@@ -46,6 +57,7 @@ class DsrReportBuilder:
|
|
|
46
57
|
self,
|
|
47
58
|
privacy_request: "PrivacyRequest",
|
|
48
59
|
dsr_data: dict[str, Any],
|
|
60
|
+
enable_streaming: bool = False,
|
|
49
61
|
):
|
|
50
62
|
"""
|
|
51
63
|
Initializes the DSR report builder.
|
|
@@ -54,7 +66,6 @@ class DsrReportBuilder:
|
|
|
54
66
|
jinja2.filters.FILTERS["pretty_print"] = lambda value, indent=4: json.dumps(
|
|
55
67
|
value, indent=indent, cls=StorageJSONEncoder
|
|
56
68
|
)
|
|
57
|
-
|
|
58
69
|
# Initialize instance zip file variables
|
|
59
70
|
self.baos = BytesIO()
|
|
60
71
|
|
|
@@ -70,15 +81,29 @@ class DsrReportBuilder:
|
|
|
70
81
|
"text_color": TEXT_COLOR,
|
|
71
82
|
"header_color": HEADER_COLOR,
|
|
72
83
|
"border_color": BORDER_COLOR,
|
|
84
|
+
"download_link_ttl_days": self._get_download_link_ttl_days(),
|
|
85
|
+
"enable_streaming": enable_streaming,
|
|
73
86
|
}
|
|
74
87
|
self.main_links: dict[str, Any] = {} # used to track the generated pages
|
|
75
88
|
|
|
76
89
|
# report data to populate the templates
|
|
77
90
|
self.request_data = _map_privacy_request(privacy_request)
|
|
78
91
|
self.dsr_data = dsr_data
|
|
92
|
+
self.enable_streaming = enable_streaming
|
|
93
|
+
|
|
94
|
+
# Track used filenames per dataset to prevent conflicts within the same dataset
|
|
95
|
+
# Maps dataset_name -> set of used filenames
|
|
96
|
+
self.used_filenames_per_dataset: dict[str, set[str]] = {}
|
|
97
|
+
|
|
98
|
+
# Track attachments by their unique identifier to prevent duplicate processing
|
|
99
|
+
# Maps (download_url, file_name) -> unique_filename
|
|
100
|
+
self.processed_attachments: dict[tuple[str, str], str] = {}
|
|
101
|
+
# Track which attachments were processed as dataset attachments (not top-level)
|
|
102
|
+
self.dataset_processed_attachments: set[tuple[str, str]] = set()
|
|
79
103
|
|
|
80
|
-
|
|
81
|
-
|
|
104
|
+
def _get_download_link_ttl_days(self) -> int:
|
|
105
|
+
"""Get the download link TTL in days from the security configuration."""
|
|
106
|
+
return int(CONFIG.security.subject_request_download_link_ttl_seconds / 86400)
|
|
82
107
|
|
|
83
108
|
def _populate_template(
|
|
84
109
|
self,
|
|
@@ -148,34 +173,12 @@ class DsrReportBuilder:
|
|
|
148
173
|
),
|
|
149
174
|
)
|
|
150
175
|
|
|
151
|
-
|
|
152
|
-
"""
|
|
153
|
-
Generates a unique filename by appending a counter if the file already exists.
|
|
154
|
-
Now tracks filenames across all directories to ensure global uniqueness.
|
|
155
|
-
|
|
156
|
-
Args:
|
|
157
|
-
filename: The original filename
|
|
158
|
-
|
|
159
|
-
Returns:
|
|
160
|
-
A unique filename that won't conflict with existing files
|
|
161
|
-
"""
|
|
162
|
-
base_name, extension = os.path.splitext(filename)
|
|
163
|
-
counter = 1
|
|
164
|
-
unique_filename = filename
|
|
165
|
-
|
|
166
|
-
# Check if file exists in used_filenames set
|
|
167
|
-
while unique_filename in self.used_filenames:
|
|
168
|
-
unique_filename = f"{base_name}_{counter}{extension}"
|
|
169
|
-
counter += 1
|
|
170
|
-
|
|
171
|
-
# Add the new filename to the set
|
|
172
|
-
self.used_filenames.add(unique_filename)
|
|
173
|
-
return unique_filename
|
|
174
|
-
|
|
176
|
+
# pylint: disable=too-many-branches
|
|
175
177
|
def _write_attachment_content(
|
|
176
178
|
self,
|
|
177
179
|
attachments: list[dict[str, Any]],
|
|
178
180
|
directory: str,
|
|
181
|
+
dataset_name: str = "attachments",
|
|
179
182
|
) -> dict[str, dict[str, str]]:
|
|
180
183
|
"""
|
|
181
184
|
Processes attachments and returns a dictionary mapping filenames to their download URLs and sizes.
|
|
@@ -194,33 +197,133 @@ class DsrReportBuilder:
|
|
|
194
197
|
if not isinstance(attachment, dict):
|
|
195
198
|
continue
|
|
196
199
|
|
|
197
|
-
|
|
198
|
-
if not
|
|
199
|
-
|
|
200
|
+
# Get or create the used_filenames set for this dataset
|
|
201
|
+
if dataset_name not in self.used_filenames_per_dataset:
|
|
202
|
+
self.used_filenames_per_dataset[dataset_name] = set()
|
|
203
|
+
used_filenames = self.used_filenames_per_dataset[dataset_name]
|
|
204
|
+
|
|
205
|
+
# Process attachment naming using shared utility
|
|
206
|
+
result = process_attachment_naming(
|
|
207
|
+
attachment, used_filenames, self.processed_attachments, dataset_name
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
if result is None: # Skip if processing failed
|
|
200
211
|
continue
|
|
201
212
|
|
|
213
|
+
unique_filename, attachment_key = result
|
|
214
|
+
# Track that this attachment was processed as a dataset attachment
|
|
215
|
+
self.dataset_processed_attachments.add(attachment_key)
|
|
216
|
+
|
|
217
|
+
# Format file size using shared utility
|
|
218
|
+
file_size = format_attachment_size(attachment.get("file_size"))
|
|
219
|
+
|
|
220
|
+
# Determine the actual directory for this attachment based on its context
|
|
221
|
+
actual_directory = resolve_directory_from_context(attachment, directory)
|
|
222
|
+
|
|
223
|
+
# Generate attachment URL using shared utility with actual storage path
|
|
202
224
|
download_url = attachment.get("download_url")
|
|
203
225
|
if not download_url:
|
|
204
|
-
logger.warning("Skipping attachment with no download URL")
|
|
205
226
|
continue
|
|
206
227
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
228
|
+
attachment_url = generate_attachment_url_from_storage_path(
|
|
229
|
+
download_url,
|
|
230
|
+
unique_filename,
|
|
231
|
+
actual_directory, # This is the base_path where the file will be stored
|
|
232
|
+
actual_directory, # This is the HTML template directory
|
|
233
|
+
self.enable_streaming,
|
|
234
|
+
)
|
|
212
235
|
|
|
213
|
-
#
|
|
214
|
-
|
|
236
|
+
# Create attachment info dictionary using shared utility
|
|
237
|
+
file_name = attachment.get("file_name")
|
|
238
|
+
if not file_name:
|
|
239
|
+
continue
|
|
215
240
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
(unique_filename, {"url": download_url, "size": file_size})
|
|
241
|
+
attachment_info = create_attachment_info_dict(
|
|
242
|
+
attachment_url, file_size, file_name
|
|
219
243
|
)
|
|
220
244
|
|
|
245
|
+
processed_attachments.append((unique_filename, attachment_info))
|
|
246
|
+
|
|
221
247
|
# Convert list of tuples to dictionary
|
|
222
248
|
return dict(processed_attachments)
|
|
223
249
|
|
|
250
|
+
def _get_processed_attachments_list(
|
|
251
|
+
self, data: dict[str, Any]
|
|
252
|
+
) -> list[dict[str, Any]]:
|
|
253
|
+
"""Get all processed attachments using shared contextual logic.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
data: The DSR data dictionary
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
List of processed attachment dictionaries
|
|
260
|
+
"""
|
|
261
|
+
# Create temporary sets for compatibility with the shared function
|
|
262
|
+
used_filenames_data = set()
|
|
263
|
+
used_filenames_attachments = set()
|
|
264
|
+
|
|
265
|
+
# Populate the temporary sets from our per-dataset tracking
|
|
266
|
+
for dataset_name, filenames in self.used_filenames_per_dataset.items():
|
|
267
|
+
if dataset_name == "attachments":
|
|
268
|
+
used_filenames_attachments.update(filenames)
|
|
269
|
+
else:
|
|
270
|
+
used_filenames_data.update(filenames)
|
|
271
|
+
|
|
272
|
+
processed_attachments_list = process_attachments_contextually(
|
|
273
|
+
data,
|
|
274
|
+
used_filenames_data,
|
|
275
|
+
used_filenames_attachments,
|
|
276
|
+
self.processed_attachments,
|
|
277
|
+
enable_streaming=self.enable_streaming,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# Trust the contextual processing completely - it already correctly determines
|
|
281
|
+
# context based on the attachment's location in the DSR data structure
|
|
282
|
+
filtered_list = processed_attachments_list
|
|
283
|
+
|
|
284
|
+
return filtered_list
|
|
285
|
+
|
|
286
|
+
def _generate_attachment_url_from_index(
|
|
287
|
+
self, context: dict[str, Any], unique_filename: str
|
|
288
|
+
) -> str:
|
|
289
|
+
"""Generate the correct URL from attachments/index.html to an attachment file.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
context: The attachment context information
|
|
293
|
+
unique_filename: The unique filename of the attachment
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
The relative URL from attachments/index.html to the attachment file
|
|
297
|
+
"""
|
|
298
|
+
if context.get("type") == "top_level":
|
|
299
|
+
# Top-level attachments are in the same directory as the index
|
|
300
|
+
return unique_filename
|
|
301
|
+
if context.get("type") in ["direct", "nested"]:
|
|
302
|
+
# Dataset attachments are in data/dataset/collection/attachments/
|
|
303
|
+
# From attachments/index.html, we need to go to ../data/dataset/collection/attachments/filename
|
|
304
|
+
dataset = context.get("dataset", "unknown")
|
|
305
|
+
collection = context.get("collection", "unknown")
|
|
306
|
+
return f"../data/{dataset}/{collection}/attachments/{unique_filename}"
|
|
307
|
+
# Fallback for other cases - return just the filename
|
|
308
|
+
return unique_filename
|
|
309
|
+
|
|
310
|
+
def _create_attachment_info_with_corrected_url(
|
|
311
|
+
self, attachment_info: dict[str, str], correct_url: str
|
|
312
|
+
) -> dict[str, str]:
|
|
313
|
+
"""Create attachment info with corrected URL.
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
attachment_info: The original attachment info
|
|
317
|
+
correct_url: The corrected URL
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
New attachment info with corrected URL and safe_url
|
|
321
|
+
"""
|
|
322
|
+
corrected_attachment_info = attachment_info.copy()
|
|
323
|
+
corrected_attachment_info["url"] = correct_url
|
|
324
|
+
corrected_attachment_info["safe_url"] = correct_url
|
|
325
|
+
return corrected_attachment_info
|
|
326
|
+
|
|
224
327
|
def _add_collection(
|
|
225
328
|
self, rows: list[dict[str, Any]], dataset_name: str, collection_name: str
|
|
226
329
|
) -> None:
|
|
@@ -235,10 +338,11 @@ class DsrReportBuilder:
|
|
|
235
338
|
items_content = []
|
|
236
339
|
|
|
237
340
|
for index, collection_item in enumerate(rows, 1):
|
|
238
|
-
# Create a copy of the item data to avoid modifying the original
|
|
239
|
-
|
|
341
|
+
# Create a deep copy of the item data to avoid modifying the original DSR data
|
|
342
|
+
# This ensures the comprehensive attachments index can access unmodified attachments
|
|
343
|
+
item_data = copy.deepcopy(collection_item)
|
|
240
344
|
|
|
241
|
-
# Process any attachments in the item
|
|
345
|
+
# Process any attachments in the item - First check for direct attachments key
|
|
242
346
|
if "attachments" in item_data and isinstance(
|
|
243
347
|
item_data["attachments"], list
|
|
244
348
|
):
|
|
@@ -246,9 +350,32 @@ class DsrReportBuilder:
|
|
|
246
350
|
attachment_links = self._write_attachment_content(
|
|
247
351
|
item_data["attachments"],
|
|
248
352
|
f"data/{dataset_name}/{collection_name}",
|
|
353
|
+
dataset_name,
|
|
249
354
|
)
|
|
250
355
|
# Add the attachment URLs to the item data
|
|
251
356
|
item_data["attachments"] = attachment_links
|
|
357
|
+
else:
|
|
358
|
+
# Check for nested attachment fields (ManualTask format)
|
|
359
|
+
attachment_fields_found = []
|
|
360
|
+
for field_name, field_value in item_data.items():
|
|
361
|
+
if isinstance(field_value, list) and field_value:
|
|
362
|
+
# Check if this field contains attachment-like data
|
|
363
|
+
first_item = field_value[0]
|
|
364
|
+
if isinstance(first_item, dict) and all(
|
|
365
|
+
key in first_item
|
|
366
|
+
for key in ["file_name", "download_url", "file_size"]
|
|
367
|
+
):
|
|
368
|
+
attachment_fields_found.append(field_name)
|
|
369
|
+
|
|
370
|
+
# Process attachments and get their URLs
|
|
371
|
+
attachment_links = self._write_attachment_content(
|
|
372
|
+
field_value,
|
|
373
|
+
f"data/{dataset_name}/{collection_name}",
|
|
374
|
+
dataset_name,
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
# Replace the field value with processed attachment links
|
|
378
|
+
item_data[field_name] = attachment_links
|
|
252
379
|
|
|
253
380
|
# Add item content to the list
|
|
254
381
|
items_content.append(
|
|
@@ -294,34 +421,67 @@ class DsrReportBuilder:
|
|
|
294
421
|
),
|
|
295
422
|
)
|
|
296
423
|
|
|
297
|
-
def
|
|
424
|
+
def _add_comprehensive_attachments_index(self) -> None:
|
|
298
425
|
"""
|
|
299
|
-
|
|
426
|
+
Creates a comprehensive attachments index that includes ALL attachments
|
|
427
|
+
from all datasets and top-level attachments, with links pointing to their
|
|
428
|
+
actual storage locations.
|
|
300
429
|
"""
|
|
301
|
-
#
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
430
|
+
# Get all processed attachments using shared logic on original DSR data
|
|
431
|
+
processed_attachments_list = self._get_processed_attachments_list(self.dsr_data)
|
|
432
|
+
|
|
433
|
+
# Create a comprehensive attachment links dictionary with deduplication
|
|
434
|
+
all_attachment_links = {}
|
|
435
|
+
seen_attachment_keys = set()
|
|
436
|
+
|
|
437
|
+
for processed_attachment in processed_attachments_list:
|
|
438
|
+
unique_filename = processed_attachment["unique_filename"]
|
|
439
|
+
attachment_info = processed_attachment["attachment_info"]
|
|
440
|
+
context = processed_attachment["context"]
|
|
441
|
+
attachment = processed_attachment["attachment"]
|
|
442
|
+
|
|
443
|
+
# Create a unique key based on download_url to avoid duplicates
|
|
444
|
+
attachment_key = attachment.get("download_url")
|
|
445
|
+
if attachment_key in seen_attachment_keys:
|
|
307
446
|
continue
|
|
447
|
+
seen_attachment_keys.add(attachment_key)
|
|
308
448
|
|
|
309
|
-
|
|
310
|
-
if
|
|
311
|
-
|
|
449
|
+
# Generate the correct URL based on streaming settings
|
|
450
|
+
if self.enable_streaming:
|
|
451
|
+
# For streaming mode, use local attachment references
|
|
452
|
+
correct_url = self._generate_attachment_url_from_index(
|
|
453
|
+
context, unique_filename
|
|
454
|
+
)
|
|
312
455
|
else:
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
456
|
+
# For non-streaming mode, use original download URLs
|
|
457
|
+
correct_url = attachment.get("download_url", unique_filename)
|
|
458
|
+
|
|
459
|
+
# Create a descriptive key that includes the source location
|
|
460
|
+
if context.get("type") == "top_level":
|
|
461
|
+
key = f"Top-level: {unique_filename}"
|
|
462
|
+
elif context.get("type") in ["direct", "nested"]:
|
|
463
|
+
dataset = context.get("dataset", "unknown")
|
|
464
|
+
collection = context.get("collection", "unknown")
|
|
465
|
+
key = f"{dataset}/{collection}: {unique_filename}"
|
|
466
|
+
else:
|
|
467
|
+
key = unique_filename
|
|
321
468
|
|
|
322
|
-
|
|
469
|
+
# Create new attachment info with the correct URL
|
|
470
|
+
corrected_attachment_info = self._create_attachment_info_with_corrected_url(
|
|
471
|
+
attachment_info, correct_url
|
|
472
|
+
)
|
|
473
|
+
all_attachment_links[key] = corrected_attachment_info
|
|
323
474
|
|
|
324
|
-
|
|
475
|
+
# Generate comprehensive attachments index page
|
|
476
|
+
self._add_file(
|
|
477
|
+
"attachments/index.html",
|
|
478
|
+
self._populate_template(
|
|
479
|
+
"templates/attachments_index.html",
|
|
480
|
+
"All Attachments",
|
|
481
|
+
"All files attached to this privacy request",
|
|
482
|
+
all_attachment_links,
|
|
483
|
+
),
|
|
484
|
+
)
|
|
325
485
|
|
|
326
486
|
def generate(self) -> BytesIO:
|
|
327
487
|
"""
|
|
@@ -343,7 +503,7 @@ class DsrReportBuilder:
|
|
|
343
503
|
)
|
|
344
504
|
|
|
345
505
|
# pre-process data to split the dataset:collection keys
|
|
346
|
-
datasets: dict[str, Any] = self.
|
|
506
|
+
datasets: dict[str, Any] = _get_datasets_from_dsr_data(self.dsr_data)
|
|
347
507
|
|
|
348
508
|
# Sort datasets alphabetically, excluding special cases
|
|
349
509
|
regular_datasets = [
|
|
@@ -360,10 +520,32 @@ class DsrReportBuilder:
|
|
|
360
520
|
self._add_dataset("dataset", datasets["dataset"])
|
|
361
521
|
self.main_links["Additional Data"] = "data/dataset/index.html"
|
|
362
522
|
|
|
363
|
-
# Add
|
|
364
|
-
if
|
|
365
|
-
|
|
366
|
-
self.
|
|
523
|
+
# Add comprehensive attachments index that includes ALL attachments
|
|
524
|
+
# Check if there are any attachments at all (top-level or in datasets)
|
|
525
|
+
has_top_level_attachments = (
|
|
526
|
+
"attachments" in self.dsr_data and self.dsr_data["attachments"]
|
|
527
|
+
)
|
|
528
|
+
has_dataset_attachments = any(
|
|
529
|
+
any(
|
|
530
|
+
"attachments" in item
|
|
531
|
+
or any(
|
|
532
|
+
is_attachment_field(field_value)
|
|
533
|
+
for field_value in item.values()
|
|
534
|
+
if isinstance(field_value, list)
|
|
535
|
+
)
|
|
536
|
+
for item in collection_items
|
|
537
|
+
if isinstance(item, dict)
|
|
538
|
+
)
|
|
539
|
+
for collection in datasets.values()
|
|
540
|
+
if isinstance(collection, dict)
|
|
541
|
+
for collection_items in collection.values()
|
|
542
|
+
if isinstance(collection_items, list)
|
|
543
|
+
)
|
|
544
|
+
has_attachments = has_top_level_attachments or has_dataset_attachments
|
|
545
|
+
|
|
546
|
+
if has_attachments:
|
|
547
|
+
self._add_comprehensive_attachments_index()
|
|
548
|
+
self.main_links["All Attachments"] = "attachments/index.html"
|
|
367
549
|
|
|
368
550
|
# create the main index once all the datasets have been added
|
|
369
551
|
self._add_file(
|
|
@@ -14,7 +14,9 @@
|
|
|
14
14
|
</a>
|
|
15
15
|
</div>
|
|
16
16
|
<h1>Attachments</h1>
|
|
17
|
-
|
|
17
|
+
{% if not enable_streaming %}
|
|
18
|
+
<p class="expiration-notice">Note: All download links will expire in {{ download_link_ttl_days }} days.</p>
|
|
19
|
+
{% endif %}
|
|
18
20
|
<div class="table table-hover">
|
|
19
21
|
<div class="table-row">
|
|
20
22
|
<div class="table-cell" style="text-align: left;">File Name</div>
|
|
@@ -22,7 +24,7 @@
|
|
|
22
24
|
</div>
|
|
23
25
|
{% for name, info in data.items() %}
|
|
24
26
|
<a href="{{ info.url }}" class="table-row" target="_blank">
|
|
25
|
-
<div class="table-cell" style="text-align: left;">{{ name }}</div>
|
|
27
|
+
<div class="table-cell" style="text-align: left;">{% if enable_streaming %}{{ name }}{% else %}{{ info.original_name }}{% endif %}</div>
|
|
26
28
|
<div class="table-cell" style="text-align: left;">{{ info.size }}</div>
|
|
27
29
|
</a>
|
|
28
30
|
{% endfor %}
|
|
@@ -35,7 +35,9 @@
|
|
|
35
35
|
{% endif %}
|
|
36
36
|
|
|
37
37
|
{% if _is_attachment_block %}
|
|
38
|
-
|
|
38
|
+
{% if not enable_streaming %}
|
|
39
|
+
<p class="expiration-notice">Note: All download links will expire in {{ download_link_ttl_days }} days.</p>
|
|
40
|
+
{% endif %}
|
|
39
41
|
<div class="table table-hover">
|
|
40
42
|
<div class="table-row">
|
|
41
43
|
<div class="table-cell" style="text-align: left;">File Name</div>
|
|
@@ -312,8 +312,14 @@ def upload_and_save_access_results( # pylint: disable=R0912
|
|
|
312
312
|
loaded_attachments = [
|
|
313
313
|
attachment
|
|
314
314
|
for attachment in privacy_request.attachments
|
|
315
|
-
if
|
|
316
|
-
|
|
315
|
+
if not any(
|
|
316
|
+
ref.reference_type
|
|
317
|
+
in [
|
|
318
|
+
AttachmentReferenceType.access_manual_webhook,
|
|
319
|
+
AttachmentReferenceType.manual_task_submission,
|
|
320
|
+
]
|
|
321
|
+
for ref in attachment.references
|
|
322
|
+
)
|
|
317
323
|
]
|
|
318
324
|
attachments = get_attachments_content(loaded_attachments)
|
|
319
325
|
# Process attachments once for both upload and storage
|