ethyca-fides 2.69.0rc9__py2.py3-none-any.whl → 2.69.0rc10__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ethyca-fides might be problematic. Click here for more details.

Files changed (98) hide show
  1. {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/METADATA +1 -1
  2. {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/RECORD +98 -98
  3. fides/_version.py +3 -3
  4. fides/api/service/privacy_request/dsr_package/dsr_report_builder.py +253 -71
  5. fides/api/service/privacy_request/dsr_package/templates/attachments_index.html +4 -2
  6. fides/api/service/privacy_request/dsr_package/templates/collection_index.html +3 -1
  7. fides/api/service/privacy_request/dsr_package/templates/dataset_index.html +1 -1
  8. fides/api/service/privacy_request/request_runner_service.py +8 -2
  9. fides/api/service/storage/streaming/smart_open_streaming_storage.py +106 -169
  10. fides/api/service/storage/util.py +579 -0
  11. fides/api/task/manual/manual_task_graph_task.py +11 -9
  12. fides/ui-build/static/admin/404.html +1 -1
  13. fides/ui-build/static/admin/add-systems/manual.html +1 -1
  14. fides/ui-build/static/admin/add-systems/multiple.html +1 -1
  15. fides/ui-build/static/admin/add-systems.html +1 -1
  16. fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
  17. fides/ui-build/static/admin/consent/configure.html +1 -1
  18. fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
  19. fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
  20. fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
  21. fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
  22. fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
  23. fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
  24. fides/ui-build/static/admin/consent/properties.html +1 -1
  25. fides/ui-build/static/admin/consent/reporting.html +1 -1
  26. fides/ui-build/static/admin/consent.html +1 -1
  27. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
  28. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
  29. fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
  30. fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
  31. fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
  32. fides/ui-build/static/admin/data-catalog.html +1 -1
  33. fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
  34. fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
  35. fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
  36. fides/ui-build/static/admin/data-discovery/activity.html +1 -1
  37. fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
  38. fides/ui-build/static/admin/data-discovery/detection.html +1 -1
  39. fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
  40. fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
  41. fides/ui-build/static/admin/datamap.html +1 -1
  42. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
  43. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
  44. fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
  45. fides/ui-build/static/admin/dataset/new.html +1 -1
  46. fides/ui-build/static/admin/dataset.html +1 -1
  47. fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
  48. fides/ui-build/static/admin/datastore-connection/new.html +1 -1
  49. fides/ui-build/static/admin/datastore-connection.html +1 -1
  50. fides/ui-build/static/admin/index.html +1 -1
  51. fides/ui-build/static/admin/integrations/[id].html +1 -1
  52. fides/ui-build/static/admin/integrations.html +1 -1
  53. fides/ui-build/static/admin/login/[provider].html +1 -1
  54. fides/ui-build/static/admin/login.html +1 -1
  55. fides/ui-build/static/admin/messaging/[id].html +1 -1
  56. fides/ui-build/static/admin/messaging/add-template.html +1 -1
  57. fides/ui-build/static/admin/messaging.html +1 -1
  58. fides/ui-build/static/admin/poc/ant-components.html +1 -1
  59. fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
  60. fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
  61. fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
  62. fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
  63. fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
  64. fides/ui-build/static/admin/poc/forms.html +1 -1
  65. fides/ui-build/static/admin/poc/table-migration.html +1 -1
  66. fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
  67. fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
  68. fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
  69. fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
  70. fides/ui-build/static/admin/privacy-requests.html +1 -1
  71. fides/ui-build/static/admin/properties/[id].html +1 -1
  72. fides/ui-build/static/admin/properties/add-property.html +1 -1
  73. fides/ui-build/static/admin/properties.html +1 -1
  74. fides/ui-build/static/admin/reporting/datamap.html +1 -1
  75. fides/ui-build/static/admin/settings/about/alpha.html +1 -1
  76. fides/ui-build/static/admin/settings/about.html +1 -1
  77. fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
  78. fides/ui-build/static/admin/settings/consent.html +1 -1
  79. fides/ui-build/static/admin/settings/custom-fields.html +1 -1
  80. fides/ui-build/static/admin/settings/domain-records.html +1 -1
  81. fides/ui-build/static/admin/settings/domains.html +1 -1
  82. fides/ui-build/static/admin/settings/email-templates.html +1 -1
  83. fides/ui-build/static/admin/settings/locations.html +1 -1
  84. fides/ui-build/static/admin/settings/organization.html +1 -1
  85. fides/ui-build/static/admin/settings/regulations.html +1 -1
  86. fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
  87. fides/ui-build/static/admin/systems/configure/[id].html +1 -1
  88. fides/ui-build/static/admin/systems.html +1 -1
  89. fides/ui-build/static/admin/taxonomy.html +1 -1
  90. fides/ui-build/static/admin/user-management/new.html +1 -1
  91. fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
  92. fides/ui-build/static/admin/user-management.html +1 -1
  93. {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/WHEEL +0 -0
  94. {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/entry_points.txt +0 -0
  95. {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/licenses/LICENSE +0 -0
  96. {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/top_level.txt +0 -0
  97. /fides/ui-build/static/admin/_next/static/{XiHm-6CdVChTC5rbN9GtT → 8qfO1Ol3G3QbcXpHAnPlU}/_buildManifest.js +0 -0
  98. /fides/ui-build/static/admin/_next/static/{XiHm-6CdVChTC5rbN9GtT → 8qfO1Ol3G3QbcXpHAnPlU}/_ssgManifest.js +0 -0
@@ -1,8 +1,13 @@
1
1
  import os
2
+ from collections import defaultdict
2
3
  from enum import Enum as EnumType
4
+ from typing import Any, Callable, Optional
5
+ from urllib.parse import quote
3
6
 
4
7
  from loguru import logger
5
8
 
9
+ from fides.api.util.storage_util import format_size
10
+
6
11
  # This is the max file size for downloading the content of an attachment.
7
12
  # This is an industry standard used by companies like Google and Microsoft.
8
13
  LARGE_FILE_THRESHOLD = 25 * 1024 * 1024 # 25 MB
@@ -84,3 +89,577 @@ def get_allowed_file_type_or_raise(file_key: str) -> str:
84
89
  return AllowedFileType[file_type].value
85
90
  except KeyError:
86
91
  raise ValueError(error_msg)
92
+
93
+
94
+ def get_unique_filename(filename: str, used_filenames: set[str]) -> str:
95
+ """
96
+ Generates a unique filename by appending a counter if the file already exists.
97
+ Tracks filenames per dataset to match DSR report builder behavior.
98
+
99
+ Args:
100
+ filename: The original filename
101
+ used_filenames: Set of filenames that have already been used
102
+
103
+ Returns:
104
+ A unique filename that won't conflict with existing files in the same dataset
105
+ """
106
+
107
+ base_name, extension = os.path.splitext(filename)
108
+ counter = 1
109
+ unique_filename = filename
110
+
111
+ # Check if file exists in this dataset's used_filenames set
112
+ while unique_filename in used_filenames:
113
+ unique_filename = f"{base_name}_{counter}{extension}"
114
+ counter += 1
115
+ return unique_filename
116
+
117
+
118
+ def determine_dataset_name_from_path(base_path: str) -> str:
119
+ """
120
+ Determine the dataset name from a base path.
121
+
122
+ Args:
123
+ base_path: The base path (e.g., "attachments", "data/manualtask/manual_data")
124
+
125
+ Returns:
126
+ The dataset name extracted from the path
127
+ """
128
+ if base_path == "attachments":
129
+ return "attachments"
130
+
131
+ # Extract dataset name from path like "data/manualtask/manual_data"
132
+ path_parts = base_path.split("/")
133
+ if len(path_parts) >= 2 and path_parts[0] == "data":
134
+ return path_parts[1] # e.g., "manualtask"
135
+
136
+ return "unknown"
137
+
138
+
139
+ def resolve_attachment_storage_path(
140
+ unique_filename: str,
141
+ base_path: str,
142
+ ) -> str:
143
+ """
144
+ Resolve the actual storage path for an attachment file.
145
+
146
+ This function provides a single source of truth for how attachment files
147
+ are stored in the ZIP file, ensuring consistency between DSR report builder
148
+ and streaming storage components.
149
+
150
+ Args:
151
+ unique_filename: The unique filename for the attachment
152
+ base_path: The base path for the attachment (e.g., "attachments", "data/dataset/collection")
153
+
154
+ Returns:
155
+ The full storage path for the attachment file
156
+ """
157
+ return f"{base_path}/{unique_filename}"
158
+
159
+
160
+ def generate_attachment_url_from_storage_path(
161
+ download_url: str,
162
+ unique_filename: str,
163
+ base_path: str,
164
+ html_directory: str,
165
+ enable_streaming: bool = False,
166
+ ) -> str:
167
+ """
168
+ Generate attachment URL based on the actual storage path and HTML template location.
169
+
170
+ This is the CURRENTLY USED function for generating attachment URLs in DSR packages.
171
+ It provides more sophisticated path resolution by:
172
+ 1. Using resolve_attachment_storage_path() to calculate the actual storage path
173
+ 2. Handling different directory structures (attachments vs data/dataset/collection)
174
+ 3. Generating proper relative paths from HTML template locations to attachment files
175
+
176
+ Used by:
177
+ - _process_attachment_list() in this file
178
+ - _write_attachment_content() in dsr_report_builder.py
179
+
180
+ Args:
181
+ download_url: The original download URL
182
+ unique_filename: The unique filename for the attachment
183
+ base_path: The base path where the attachment is stored (e.g., "attachments", "data/dataset/collection")
184
+ html_directory: The directory where the HTML template is located
185
+ enable_streaming: Whether streaming mode is enabled
186
+
187
+ Returns:
188
+ The appropriate attachment URL
189
+ """
190
+ if enable_streaming:
191
+ # Calculate the actual storage path
192
+ storage_path = resolve_attachment_storage_path(unique_filename, base_path)
193
+
194
+ # Generate relative path from HTML template directory to storage path
195
+ if html_directory == "attachments" and base_path == "attachments":
196
+ # From attachments/index.html to attachments/filename.pdf (same directory)
197
+ return unique_filename
198
+ if html_directory.startswith("data/") and base_path.startswith("data/"):
199
+ # From data/dataset/collection/index.html to data/dataset/collection/attachments/filename.pdf
200
+ # Both are in data/ structure, so go to attachments subdirectory
201
+ return f"attachments/{unique_filename}"
202
+ # For other cases, calculate relative path
203
+ # This is a simplified approach - in practice, you might need more sophisticated path resolution
204
+ return f"../{storage_path}"
205
+ return download_url
206
+
207
+
208
+ def process_attachment_naming(
209
+ attachment: dict[str, Any],
210
+ used_filenames: set[str],
211
+ processed_attachments: dict[tuple[str, str], str],
212
+ dataset_name: str = "attachments",
213
+ ) -> Optional[tuple[str, tuple[str, str]]]:
214
+ """
215
+ Process attachment naming and return unique filename and attachment key.
216
+
217
+ Args:
218
+ attachment: The attachment dictionary
219
+ used_filenames: Set of used filenames for this dataset
220
+ processed_attachments: Dictionary mapping attachment keys to unique filenames
221
+ dataset_name: The dataset name for context
222
+
223
+ Returns:
224
+ Tuple of (unique_filename, attachment_key) where attachment_key is (download_url, file_name)
225
+ """
226
+ file_name = attachment.get("file_name")
227
+ download_url = attachment.get("download_url")
228
+
229
+ if not file_name or not download_url:
230
+ logger.warning(
231
+ f"Skipping attachment with missing {'file name' if not file_name else 'download URL'}"
232
+ )
233
+ return None
234
+
235
+ # Get or generate unique filename
236
+ attachment_key = (download_url, file_name)
237
+ if attachment_key not in processed_attachments:
238
+ unique_filename = get_unique_filename(file_name, used_filenames)
239
+ used_filenames.add(unique_filename)
240
+ processed_attachments[attachment_key] = unique_filename
241
+ else:
242
+ unique_filename = processed_attachments[attachment_key]
243
+ # Ensure the filename is also added to the current used_filenames set
244
+ # to prevent conflicts in subsequent processing
245
+ used_filenames.add(unique_filename)
246
+
247
+ return (unique_filename, attachment_key)
248
+
249
+
250
+ def format_attachment_size(file_size: Any) -> str:
251
+ """
252
+ Format file size for display.
253
+
254
+ Args:
255
+ file_size: The file size (int, float, or other)
256
+
257
+ Returns:
258
+ Formatted file size string
259
+ """
260
+ return (
261
+ format_size(float(file_size))
262
+ if isinstance(file_size, (int, float))
263
+ else "Unknown"
264
+ )
265
+
266
+
267
+ def create_attachment_info_dict(
268
+ attachment_url: str, file_size: str, file_name: str
269
+ ) -> dict[str, str]:
270
+ """
271
+ Create attachment info dictionary for templates.
272
+
273
+ Args:
274
+ attachment_url: The attachment URL
275
+ file_size: The formatted file size
276
+ file_name: The original file name
277
+
278
+ Returns:
279
+ Dictionary with attachment information
280
+ """
281
+ # Always encode the URL for safe usage in templates
282
+ safe_url = quote(attachment_url, safe="/:")
283
+
284
+ return {
285
+ "url": attachment_url,
286
+ "safe_url": safe_url,
287
+ "size": file_size,
288
+ "original_name": file_name,
289
+ }
290
+
291
+
292
+ def is_attachment_field(field_value: Any) -> bool:
293
+ """
294
+ Check if a field value contains attachment-like data.
295
+
296
+ Args:
297
+ field_value: The field value to check
298
+
299
+ Returns:
300
+ True if the field contains attachment-like data
301
+ """
302
+ if not isinstance(field_value, list) or not field_value:
303
+ return False
304
+
305
+ first_item = field_value[0]
306
+ if not isinstance(first_item, dict):
307
+ return False
308
+
309
+ # Check if this field contains attachment-like data
310
+ return all(key in first_item for key in ["file_name", "download_url", "file_size"])
311
+
312
+
313
+ def process_attachments_contextually(
314
+ data: dict[str, Any],
315
+ used_filenames_data: set[str],
316
+ used_filenames_attachments: set[str],
317
+ processed_attachments: dict[tuple[str, str], str],
318
+ enable_streaming: bool = False,
319
+ callback: Optional[Callable] = None,
320
+ ) -> list[dict[str, Any]]:
321
+ """
322
+ Process attachments using the same contextual approach as DSR report builder.
323
+
324
+ This function iterates through the data structure and processes attachments
325
+ as they are encountered, maintaining the same logic as the DSR report builder.
326
+
327
+ Args:
328
+ data: The DSR data dictionary
329
+ used_filenames_data: Set of used filenames for data datasets
330
+ used_filenames_attachments: Set of used filenames for attachments
331
+ processed_attachments: Dictionary mapping attachment keys to unique filenames
332
+ enable_streaming: Whether streaming mode is enabled
333
+ callback: Optional callback function to process each attachment
334
+ Signature: callback(attachment, unique_filename, attachment_info, context)
335
+
336
+ Returns:
337
+ List of processed attachment dictionaries with context information
338
+ """
339
+ processed_attachments_list = []
340
+
341
+ # Process datasets (excluding attachments)
342
+ datasets = _get_datasets_from_dsr_data(data)
343
+
344
+ for dataset_name, collections in datasets.items():
345
+ for collection_name, items in collections.items():
346
+ for item in items:
347
+ if not isinstance(item, dict):
348
+ continue
349
+
350
+ # Process direct attachments in the item
351
+ if "attachments" in item and isinstance(item["attachments"], list):
352
+ directory = f"data/{dataset_name}/{collection_name}"
353
+ processed = _process_attachment_list(
354
+ item["attachments"],
355
+ directory,
356
+ dataset_name,
357
+ used_filenames_data,
358
+ used_filenames_attachments,
359
+ processed_attachments,
360
+ enable_streaming,
361
+ callback,
362
+ {
363
+ "dataset": dataset_name,
364
+ "collection": collection_name,
365
+ "type": "direct",
366
+ },
367
+ )
368
+ processed_attachments_list.extend(processed)
369
+
370
+ # Process nested attachment fields (ManualTask format)
371
+ for field_name, field_value in item.items():
372
+ if is_attachment_field(field_value):
373
+ directory = f"data/{dataset_name}/{collection_name}"
374
+ processed = _process_attachment_list(
375
+ field_value,
376
+ directory,
377
+ dataset_name,
378
+ used_filenames_data,
379
+ used_filenames_attachments,
380
+ processed_attachments,
381
+ enable_streaming,
382
+ callback,
383
+ {
384
+ "dataset": dataset_name,
385
+ "collection": collection_name,
386
+ "field": field_name,
387
+ "type": "nested",
388
+ },
389
+ )
390
+ processed_attachments_list.extend(processed)
391
+
392
+ # Process top-level attachments from the "attachments" key
393
+ # These are legitimate top-level attachments, not duplicates of dataset attachments
394
+ if "attachments" in data:
395
+ processed = _process_attachment_list(
396
+ data["attachments"],
397
+ "attachments",
398
+ "attachments",
399
+ used_filenames_data,
400
+ used_filenames_attachments,
401
+ processed_attachments,
402
+ enable_streaming,
403
+ callback,
404
+ {"type": "top_level"},
405
+ )
406
+ processed_attachments_list.extend(processed)
407
+
408
+ return processed_attachments_list
409
+
410
+
411
+ def _get_datasets_from_dsr_data(dsr_data: dict[str, Any]) -> dict[str, Any]:
412
+ """
413
+ Extract datasets from DSR data using the same logic as DSR report builder.
414
+
415
+ Args:
416
+ dsr_data: The DSR data dictionary
417
+
418
+ Returns:
419
+ Dictionary of datasets with collections
420
+ """
421
+
422
+ datasets: dict[str, Any] = defaultdict(lambda: defaultdict(list))
423
+
424
+ for key, rows in dsr_data.items():
425
+ # Skip attachments - they're handled separately
426
+ if key == "attachments":
427
+ continue
428
+
429
+ parts = key.split(":", 1)
430
+ if len(parts) > 1:
431
+ dataset_name, collection_name = parts
432
+ else:
433
+ # Try to determine dataset name from system_name in rows
434
+ dataset_name = "manual"
435
+ collection_name = parts[0]
436
+
437
+ for row in rows:
438
+ if isinstance(row, dict) and "system_name" in row:
439
+ dataset_name = row["system_name"]
440
+ break
441
+
442
+ datasets[dataset_name][collection_name].extend(rows)
443
+
444
+ return datasets
445
+
446
+
447
+ def _process_attachment_list(
448
+ attachments: list[dict[str, Any]],
449
+ directory: str,
450
+ dataset_name: str,
451
+ used_filenames_data: set[str],
452
+ used_filenames_attachments: set[str],
453
+ processed_attachments: dict[tuple[str, str], str],
454
+ enable_streaming: bool,
455
+ callback: Optional[Callable],
456
+ context: dict[str, Any],
457
+ ) -> list[dict[str, Any]]:
458
+ """
459
+ Process a list of attachments using the same logic as DSR report builder.
460
+
461
+ Args:
462
+ attachments: List of attachment dictionaries
463
+ directory: Directory path for the attachments
464
+ dataset_name: Name of the dataset
465
+ used_filenames_data: Set of used filenames for data datasets
466
+ used_filenames_attachments: Set of used filenames for attachments
467
+ processed_attachments: Dictionary mapping attachment keys to unique filenames
468
+ enable_streaming: Whether streaming mode is enabled
469
+ callback: Optional callback function to process each attachment
470
+ context: Context information about where the attachment was found
471
+
472
+ Returns:
473
+ List of processed attachment dictionaries
474
+ """
475
+ processed_attachments_list = []
476
+
477
+ for attachment in attachments:
478
+ if not isinstance(attachment, dict):
479
+ continue
480
+
481
+ # Get the appropriate used_filenames set based on dataset type
482
+ used_filenames = (
483
+ used_filenames_attachments
484
+ if dataset_name == "attachments"
485
+ else used_filenames_data
486
+ )
487
+
488
+ # Process attachment naming using shared utility
489
+ result = process_attachment_naming(
490
+ attachment, used_filenames, processed_attachments, dataset_name
491
+ )
492
+
493
+ if result is None: # Skip if processing failed
494
+ continue
495
+
496
+ unique_filename, _ = result
497
+
498
+ # Format file size using shared utility
499
+ file_size = format_attachment_size(attachment.get("file_size"))
500
+
501
+ # Generate attachment URL using shared utility with actual storage path
502
+ download_url = attachment.get("download_url")
503
+ if not download_url:
504
+ continue
505
+
506
+ attachment_url = generate_attachment_url_from_storage_path(
507
+ download_url,
508
+ unique_filename,
509
+ directory, # This is the base_path where the file will be stored
510
+ directory, # This is the HTML template directory
511
+ enable_streaming,
512
+ )
513
+
514
+ # Create attachment info dictionary using shared utility
515
+ file_name = attachment.get("file_name")
516
+ if not file_name:
517
+ continue
518
+
519
+ attachment_info = create_attachment_info_dict(
520
+ attachment_url, file_size, file_name
521
+ )
522
+
523
+ # Create processed attachment with context
524
+ processed_attachment = {
525
+ "attachment": attachment,
526
+ "unique_filename": unique_filename,
527
+ "attachment_info": attachment_info,
528
+ "context": context,
529
+ "directory": directory,
530
+ "dataset_name": dataset_name,
531
+ }
532
+
533
+ # Call callback if provided
534
+ if callback:
535
+ callback(attachment, unique_filename, attachment_info, context)
536
+
537
+ processed_attachments_list.append(processed_attachment)
538
+
539
+ return processed_attachments_list
540
+
541
+
542
+ def extract_storage_key_from_attachment(attachment: dict[str, Any]) -> str:
543
+ """
544
+ Extract storage key from attachment data with fallback logic.
545
+
546
+ This function provides a consistent way to extract storage keys from
547
+ attachment dictionaries across different components.
548
+
549
+ Args:
550
+ attachment: The attachment dictionary
551
+
552
+ Returns:
553
+ The storage key (URL or filename) for the attachment
554
+ """
555
+ if original_url := attachment.get("original_download_url"):
556
+ return original_url
557
+
558
+ if download_url := attachment.get("download_url"):
559
+ return download_url
560
+
561
+ file_name = attachment.get("file_name")
562
+ return file_name if file_name is not None else ""
563
+
564
+
565
+ def resolve_base_path_from_context(
566
+ attachment: dict[str, Any], default_base_path: str = "attachments"
567
+ ) -> str:
568
+ """
569
+ Resolve the base path for an attachment based on its context.
570
+
571
+ This function provides consistent base path resolution logic across
572
+ different storage components.
573
+
574
+ Args:
575
+ attachment: The attachment dictionary
576
+ default_base_path: Default base path if no context is found
577
+
578
+ Returns:
579
+ The resolved base path for the attachment
580
+ """
581
+ if not attachment.get("_context"):
582
+ return default_base_path
583
+
584
+ context = attachment["_context"]
585
+ context_type = context.get("type")
586
+
587
+ if context_type == "direct":
588
+ return f"data/{context['dataset']}/{context['collection']}/attachments"
589
+ if context_type == "nested":
590
+ return f"data/{context['dataset']}/{context['collection']}/attachments"
591
+ if context_type == "top_level":
592
+ return "attachments"
593
+ # Handle old context format
594
+ if context.get("key") and context.get("item_id"):
595
+ return f"{context['key']}/{context['item_id']}/attachments"
596
+ # Fallback for unknown context types
597
+ return "unknown/unknown/attachments"
598
+
599
+
600
+ def resolve_directory_from_context(
601
+ attachment: dict[str, Any], default_directory: str = "attachments"
602
+ ) -> str:
603
+ """
604
+ Resolve the directory path for an attachment based on its context.
605
+
606
+ This function provides consistent directory resolution logic for DSR report builder.
607
+
608
+ Args:
609
+ attachment: The attachment dictionary
610
+ default_directory: Default directory if no context is found
611
+
612
+ Returns:
613
+ The resolved directory path for the attachment
614
+ """
615
+ if not attachment.get("_context"):
616
+ return default_directory
617
+
618
+ context = attachment["_context"]
619
+ context_type = context.get("type")
620
+
621
+ if context_type == "direct":
622
+ return f"data/{context['dataset']}/{context['collection']}"
623
+ if context_type == "nested":
624
+ return f"data/{context['dataset']}/{context['collection']}"
625
+ if context_type == "top_level":
626
+ return "attachments"
627
+ if context.get("key") and context.get("item_id"):
628
+ return f"{context['key']}/{context['item_id']}"
629
+
630
+ return default_directory
631
+
632
+
633
+ def convert_processed_attachments_to_attachment_processing_info(
634
+ processed_attachments_list: list[dict[str, Any]], validate_attachment_func: Callable
635
+ ) -> list[Any]:
636
+ """
637
+ Convert processed attachments list to AttachmentProcessingInfo objects.
638
+
639
+ This is a shared utility function to avoid duplication between different
640
+ attachment collection methods.
641
+
642
+ Args:
643
+ processed_attachments_list: List of processed attachment dictionaries
644
+ validate_attachment_func: Function to validate individual attachments
645
+ Signature: validate_attachment_func(attachment_with_context) -> AttachmentProcessingInfo | None
646
+
647
+ Returns:
648
+ List of validated AttachmentProcessingInfo objects
649
+ """
650
+ validated_attachments = []
651
+
652
+ for processed_attachment in processed_attachments_list:
653
+ attachment_data = processed_attachment["attachment"]
654
+
655
+ # Add context information to the attachment data
656
+ attachment_with_context = attachment_data.copy()
657
+ attachment_with_context["_context"] = processed_attachment["context"]
658
+
659
+ # Validate and convert to AttachmentProcessingInfo
660
+ if validate_attachment_func is not None:
661
+ validated = validate_attachment_func(attachment_with_context)
662
+ if validated:
663
+ validated_attachments.append(validated)
664
+
665
+ return validated_attachments
@@ -33,7 +33,6 @@ from fides.api.task.manual.manual_task_utils import (
33
33
  )
34
34
  from fides.api.task.task_resources import TaskResources
35
35
  from fides.api.util.collection_util import Row
36
- from fides.api.util.storage_util import format_size
37
36
 
38
37
 
39
38
  class ManualTaskGraphTask(GraphTask):
@@ -398,9 +397,9 @@ class ManualTaskGraphTask(GraphTask):
398
397
 
399
398
  def _process_attachment_field(
400
399
  self, submission: ManualTaskSubmission
401
- ) -> Optional[dict[str, dict[str, Any]]]:
402
- """Process attachment field and return attachment map or None."""
403
- attachment_map: dict[str, dict[str, Any]] = {}
400
+ ) -> Optional[list[dict[str, Any]]]:
401
+ """Process attachment field and return attachment list or None."""
402
+ attachment_list: list[dict[str, Any]] = []
404
403
 
405
404
  for attachment in filter(
406
405
  lambda a: a.attachment_type == AttachmentType.include_with_access_package,
@@ -408,15 +407,18 @@ class ManualTaskGraphTask(GraphTask):
408
407
  ):
409
408
  try:
410
409
  size, url = attachment.retrieve_attachment()
411
- attachment_map[attachment.file_name] = {
412
- "url": str(url) if url else None,
413
- "size": (format_size(size) if size else "Unknown"),
414
- }
410
+ attachment_list.append(
411
+ {
412
+ "file_name": attachment.file_name,
413
+ "download_url": str(url) if url else None,
414
+ "file_size": size,
415
+ }
416
+ )
415
417
  except Exception as exc: # pylint: disable=broad-exception-caught
416
418
  logger.warning(
417
419
  f"Error retrieving attachment {attachment.file_name}: {str(exc)}"
418
420
  )
419
- return attachment_map or None
421
+ return attachment_list or None
420
422
 
421
423
  def _cleanup_manual_task_instances(
422
424
  self, manual_task: ManualTask, privacy_request: PrivacyRequest
@@ -1 +1 @@
1
- <!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link data-next-font="" rel="preconnect" href="/" crossorigin="anonymous"/><link rel="preload" href="/_next/static/css/650df9c348000a26.css" as="style"/><link rel="stylesheet" href="/_next/static/css/650df9c348000a26.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-42372ed130431b0a.js"></script><script src="/_next/static/chunks/webpack-678e89d68dbcd94f.js" defer=""></script><script src="/_next/static/chunks/framework-c92fc3344e6fd165.js" defer=""></script><script src="/_next/static/chunks/main-090643377c8254e6.js" defer=""></script><script src="/_next/static/chunks/pages/_app-ef8e1c986bc5b795.js" defer=""></script><script src="/_next/static/chunks/pages/404-471a6b18e712f050.js" defer=""></script><script src="/_next/static/XiHm-6CdVChTC5rbN9GtT/_buildManifest.js" defer=""></script><script src="/_next/static/XiHm-6CdVChTC5rbN9GtT/_ssgManifest.js" defer=""></script><style>.data-ant-cssinjs-cache-path{content:"";}</style></head><body><div id="__next"><div style="height:100%;display:flex"></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/404","query":{},"buildId":"XiHm-6CdVChTC5rbN9GtT","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link data-next-font="" rel="preconnect" href="/" crossorigin="anonymous"/><link rel="preload" href="/_next/static/css/650df9c348000a26.css" as="style"/><link rel="stylesheet" href="/_next/static/css/650df9c348000a26.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-42372ed130431b0a.js"></script><script src="/_next/static/chunks/webpack-678e89d68dbcd94f.js" defer=""></script><script src="/_next/static/chunks/framework-c92fc3344e6fd165.js" defer=""></script><script src="/_next/static/chunks/main-090643377c8254e6.js" defer=""></script><script src="/_next/static/chunks/pages/_app-ef8e1c986bc5b795.js" defer=""></script><script src="/_next/static/chunks/pages/404-471a6b18e712f050.js" defer=""></script><script src="/_next/static/8qfO1Ol3G3QbcXpHAnPlU/_buildManifest.js" defer=""></script><script src="/_next/static/8qfO1Ol3G3QbcXpHAnPlU/_ssgManifest.js" defer=""></script><style>.data-ant-cssinjs-cache-path{content:"";}</style></head><body><div id="__next"><div style="height:100%;display:flex"></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/404","query":{},"buildId":"8qfO1Ol3G3QbcXpHAnPlU","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
@@ -1 +1 @@
1
- <!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link data-next-font="" rel="preconnect" href="/" crossorigin="anonymous"/><link rel="preload" href="/_next/static/css/650df9c348000a26.css" as="style"/><link rel="stylesheet" href="/_next/static/css/650df9c348000a26.css" data-n-g=""/><link rel="preload" href="/_next/static/css/304c6f148886a8d4.css" as="style"/><link rel="stylesheet" href="/_next/static/css/304c6f148886a8d4.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-42372ed130431b0a.js"></script><script src="/_next/static/chunks/webpack-678e89d68dbcd94f.js" defer=""></script><script src="/_next/static/chunks/framework-c92fc3344e6fd165.js" defer=""></script><script src="/_next/static/chunks/main-090643377c8254e6.js" defer=""></script><script src="/_next/static/chunks/pages/_app-ef8e1c986bc5b795.js" defer=""></script><script src="/_next/static/chunks/431-86ad2beeb93c95c9.js" defer=""></script><script src="/_next/static/chunks/8765-f622a35b40a7ec63.js" defer=""></script><script src="/_next/static/chunks/9278-08cc704317fe535e.js" defer=""></script><script src="/_next/static/chunks/5163-e682273cd76a7d07.js" defer=""></script><script src="/_next/static/chunks/699-8ca44b0de9fa20f0.js" defer=""></script><script src="/_next/static/chunks/5277-e8a036319456127f.js" defer=""></script><script src="/_next/static/chunks/4164-355644b916ae0094.js" defer=""></script><script src="/_next/static/chunks/401-4af0a912e249d30f.js" defer=""></script><script src="/_next/static/chunks/9951-a88367a129b724ba.js" defer=""></script><script src="/_next/static/chunks/4808-78ca630f2d2503cd.js" defer=""></script><script src="/_next/static/chunks/4844-46324c3d848b8b6a.js" defer=""></script><script src="/_next/static/chunks/4786-0827aae7aceadd22.js" defer=""></script><script src="/_next/static/chunks/pages/add-systems/manual-9dc7e70ab5b05723.js" defer=""></script><script src="/_next/static/XiHm-6CdVChTC5rbN9GtT/_buildManifest.js" defer=""></script><script src="/_next/static/XiHm-6CdVChTC5rbN9GtT/_ssgManifest.js" defer=""></script><style>.data-ant-cssinjs-cache-path{content:"";}</style></head><body><div id="__next"><div style="height:100%;display:flex"></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/add-systems/manual","query":{},"buildId":"XiHm-6CdVChTC5rbN9GtT","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link data-next-font="" rel="preconnect" href="/" crossorigin="anonymous"/><link rel="preload" href="/_next/static/css/650df9c348000a26.css" as="style"/><link rel="stylesheet" href="/_next/static/css/650df9c348000a26.css" data-n-g=""/><link rel="preload" href="/_next/static/css/304c6f148886a8d4.css" as="style"/><link rel="stylesheet" href="/_next/static/css/304c6f148886a8d4.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-42372ed130431b0a.js"></script><script src="/_next/static/chunks/webpack-678e89d68dbcd94f.js" defer=""></script><script src="/_next/static/chunks/framework-c92fc3344e6fd165.js" defer=""></script><script src="/_next/static/chunks/main-090643377c8254e6.js" defer=""></script><script src="/_next/static/chunks/pages/_app-ef8e1c986bc5b795.js" defer=""></script><script src="/_next/static/chunks/431-86ad2beeb93c95c9.js" defer=""></script><script src="/_next/static/chunks/8765-f622a35b40a7ec63.js" defer=""></script><script src="/_next/static/chunks/9278-08cc704317fe535e.js" defer=""></script><script src="/_next/static/chunks/5163-e682273cd76a7d07.js" defer=""></script><script src="/_next/static/chunks/699-8ca44b0de9fa20f0.js" defer=""></script><script src="/_next/static/chunks/5277-e8a036319456127f.js" defer=""></script><script src="/_next/static/chunks/4164-355644b916ae0094.js" defer=""></script><script src="/_next/static/chunks/401-4af0a912e249d30f.js" defer=""></script><script src="/_next/static/chunks/9951-a88367a129b724ba.js" defer=""></script><script src="/_next/static/chunks/4808-78ca630f2d2503cd.js" defer=""></script><script src="/_next/static/chunks/4844-46324c3d848b8b6a.js" defer=""></script><script src="/_next/static/chunks/4786-0827aae7aceadd22.js" defer=""></script><script src="/_next/static/chunks/pages/add-systems/manual-9dc7e70ab5b05723.js" defer=""></script><script src="/_next/static/8qfO1Ol3G3QbcXpHAnPlU/_buildManifest.js" defer=""></script><script src="/_next/static/8qfO1Ol3G3QbcXpHAnPlU/_ssgManifest.js" defer=""></script><style>.data-ant-cssinjs-cache-path{content:"";}</style></head><body><div id="__next"><div style="height:100%;display:flex"></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/add-systems/manual","query":{},"buildId":"8qfO1Ol3G3QbcXpHAnPlU","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
@@ -1 +1 @@
1
- <!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link data-next-font="" rel="preconnect" href="/" crossorigin="anonymous"/><link rel="preload" href="/_next/static/css/650df9c348000a26.css" as="style"/><link rel="stylesheet" href="/_next/static/css/650df9c348000a26.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-42372ed130431b0a.js"></script><script src="/_next/static/chunks/webpack-678e89d68dbcd94f.js" defer=""></script><script src="/_next/static/chunks/framework-c92fc3344e6fd165.js" defer=""></script><script src="/_next/static/chunks/main-090643377c8254e6.js" defer=""></script><script src="/_next/static/chunks/pages/_app-ef8e1c986bc5b795.js" defer=""></script><script src="/_next/static/chunks/401-4af0a912e249d30f.js" defer=""></script><script src="/_next/static/chunks/3923-bb2417b8dcade7a4.js" defer=""></script><script src="/_next/static/chunks/796-db1e30119ea973c7.js" defer=""></script><script src="/_next/static/chunks/pages/add-systems/multiple-4b79a1652297ed9a.js" defer=""></script><script src="/_next/static/XiHm-6CdVChTC5rbN9GtT/_buildManifest.js" defer=""></script><script src="/_next/static/XiHm-6CdVChTC5rbN9GtT/_ssgManifest.js" defer=""></script><style>.data-ant-cssinjs-cache-path{content:"";}</style></head><body><div id="__next"><div style="height:100%;display:flex"></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/add-systems/multiple","query":{},"buildId":"XiHm-6CdVChTC5rbN9GtT","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link data-next-font="" rel="preconnect" href="/" crossorigin="anonymous"/><link rel="preload" href="/_next/static/css/650df9c348000a26.css" as="style"/><link rel="stylesheet" href="/_next/static/css/650df9c348000a26.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-42372ed130431b0a.js"></script><script src="/_next/static/chunks/webpack-678e89d68dbcd94f.js" defer=""></script><script src="/_next/static/chunks/framework-c92fc3344e6fd165.js" defer=""></script><script src="/_next/static/chunks/main-090643377c8254e6.js" defer=""></script><script src="/_next/static/chunks/pages/_app-ef8e1c986bc5b795.js" defer=""></script><script src="/_next/static/chunks/401-4af0a912e249d30f.js" defer=""></script><script src="/_next/static/chunks/3923-bb2417b8dcade7a4.js" defer=""></script><script src="/_next/static/chunks/796-db1e30119ea973c7.js" defer=""></script><script src="/_next/static/chunks/pages/add-systems/multiple-4b79a1652297ed9a.js" defer=""></script><script src="/_next/static/8qfO1Ol3G3QbcXpHAnPlU/_buildManifest.js" defer=""></script><script src="/_next/static/8qfO1Ol3G3QbcXpHAnPlU/_ssgManifest.js" defer=""></script><style>.data-ant-cssinjs-cache-path{content:"";}</style></head><body><div id="__next"><div style="height:100%;display:flex"></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/add-systems/multiple","query":{},"buildId":"8qfO1Ol3G3QbcXpHAnPlU","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>