ethyca-fides 2.69.0rc9__py2.py3-none-any.whl → 2.69.0rc10__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ethyca-fides might be problematic. Click here for more details.

Files changed (98) hide show
  1. {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/METADATA +1 -1
  2. {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/RECORD +98 -98
  3. fides/_version.py +3 -3
  4. fides/api/service/privacy_request/dsr_package/dsr_report_builder.py +253 -71
  5. fides/api/service/privacy_request/dsr_package/templates/attachments_index.html +4 -2
  6. fides/api/service/privacy_request/dsr_package/templates/collection_index.html +3 -1
  7. fides/api/service/privacy_request/dsr_package/templates/dataset_index.html +1 -1
  8. fides/api/service/privacy_request/request_runner_service.py +8 -2
  9. fides/api/service/storage/streaming/smart_open_streaming_storage.py +106 -169
  10. fides/api/service/storage/util.py +579 -0
  11. fides/api/task/manual/manual_task_graph_task.py +11 -9
  12. fides/ui-build/static/admin/404.html +1 -1
  13. fides/ui-build/static/admin/add-systems/manual.html +1 -1
  14. fides/ui-build/static/admin/add-systems/multiple.html +1 -1
  15. fides/ui-build/static/admin/add-systems.html +1 -1
  16. fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
  17. fides/ui-build/static/admin/consent/configure.html +1 -1
  18. fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
  19. fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
  20. fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
  21. fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
  22. fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
  23. fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
  24. fides/ui-build/static/admin/consent/properties.html +1 -1
  25. fides/ui-build/static/admin/consent/reporting.html +1 -1
  26. fides/ui-build/static/admin/consent.html +1 -1
  27. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
  28. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
  29. fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
  30. fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
  31. fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
  32. fides/ui-build/static/admin/data-catalog.html +1 -1
  33. fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
  34. fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
  35. fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
  36. fides/ui-build/static/admin/data-discovery/activity.html +1 -1
  37. fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
  38. fides/ui-build/static/admin/data-discovery/detection.html +1 -1
  39. fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
  40. fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
  41. fides/ui-build/static/admin/datamap.html +1 -1
  42. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
  43. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
  44. fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
  45. fides/ui-build/static/admin/dataset/new.html +1 -1
  46. fides/ui-build/static/admin/dataset.html +1 -1
  47. fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
  48. fides/ui-build/static/admin/datastore-connection/new.html +1 -1
  49. fides/ui-build/static/admin/datastore-connection.html +1 -1
  50. fides/ui-build/static/admin/index.html +1 -1
  51. fides/ui-build/static/admin/integrations/[id].html +1 -1
  52. fides/ui-build/static/admin/integrations.html +1 -1
  53. fides/ui-build/static/admin/login/[provider].html +1 -1
  54. fides/ui-build/static/admin/login.html +1 -1
  55. fides/ui-build/static/admin/messaging/[id].html +1 -1
  56. fides/ui-build/static/admin/messaging/add-template.html +1 -1
  57. fides/ui-build/static/admin/messaging.html +1 -1
  58. fides/ui-build/static/admin/poc/ant-components.html +1 -1
  59. fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
  60. fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
  61. fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
  62. fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
  63. fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
  64. fides/ui-build/static/admin/poc/forms.html +1 -1
  65. fides/ui-build/static/admin/poc/table-migration.html +1 -1
  66. fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
  67. fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
  68. fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
  69. fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
  70. fides/ui-build/static/admin/privacy-requests.html +1 -1
  71. fides/ui-build/static/admin/properties/[id].html +1 -1
  72. fides/ui-build/static/admin/properties/add-property.html +1 -1
  73. fides/ui-build/static/admin/properties.html +1 -1
  74. fides/ui-build/static/admin/reporting/datamap.html +1 -1
  75. fides/ui-build/static/admin/settings/about/alpha.html +1 -1
  76. fides/ui-build/static/admin/settings/about.html +1 -1
  77. fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
  78. fides/ui-build/static/admin/settings/consent.html +1 -1
  79. fides/ui-build/static/admin/settings/custom-fields.html +1 -1
  80. fides/ui-build/static/admin/settings/domain-records.html +1 -1
  81. fides/ui-build/static/admin/settings/domains.html +1 -1
  82. fides/ui-build/static/admin/settings/email-templates.html +1 -1
  83. fides/ui-build/static/admin/settings/locations.html +1 -1
  84. fides/ui-build/static/admin/settings/organization.html +1 -1
  85. fides/ui-build/static/admin/settings/regulations.html +1 -1
  86. fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
  87. fides/ui-build/static/admin/systems/configure/[id].html +1 -1
  88. fides/ui-build/static/admin/systems.html +1 -1
  89. fides/ui-build/static/admin/taxonomy.html +1 -1
  90. fides/ui-build/static/admin/user-management/new.html +1 -1
  91. fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
  92. fides/ui-build/static/admin/user-management.html +1 -1
  93. {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/WHEEL +0 -0
  94. {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/entry_points.txt +0 -0
  95. {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/licenses/LICENSE +0 -0
  96. {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/top_level.txt +0 -0
  97. /fides/ui-build/static/admin/_next/static/{XiHm-6CdVChTC5rbN9GtT → 8qfO1Ol3G3QbcXpHAnPlU}/_buildManifest.js +0 -0
  98. /fides/ui-build/static/admin/_next/static/{XiHm-6CdVChTC5rbN9GtT → 8qfO1Ol3G3QbcXpHAnPlU}/_ssgManifest.js +0 -0
@@ -1,5 +1,4 @@
1
- """Smart-open based streaming storage for efficient cloud-to-cloud data transfer."""
2
-
1
+ # pylint: disable=too-many-lines
3
2
  from __future__ import annotations
4
3
 
5
4
  import csv
@@ -34,6 +33,15 @@ from fides.api.service.storage.streaming.schemas import (
34
33
  StreamingBufferConfig,
35
34
  )
36
35
  from fides.api.service.storage.streaming.smart_open_client import SmartOpenStorageClient
36
+ from fides.api.service.storage.util import (
37
+ convert_processed_attachments_to_attachment_processing_info,
38
+ determine_dataset_name_from_path,
39
+ extract_storage_key_from_attachment,
40
+ get_unique_filename,
41
+ process_attachments_contextually,
42
+ resolve_attachment_storage_path,
43
+ resolve_base_path_from_context,
44
+ )
37
45
 
38
46
  DEFAULT_ATTACHMENT_NAME = "attachment"
39
47
  DEFAULT_FILE_MODE = 0o644
@@ -68,6 +76,9 @@ class SmartOpenStreamingStorage:
68
76
  """
69
77
  self.storage_client = storage_client
70
78
  self.chunk_size = chunk_size
79
+ # Track used filenames per dataset to match DSR report builder behavior
80
+ # Maps dataset_name -> set of used filenames
81
+ self.used_filenames_per_dataset: dict[str, set[str]] = {}
71
82
 
72
83
  def _parse_storage_url(self, storage_key: str) -> tuple[str, str]:
73
84
  """Parse storage URL and return (bucket, key).
@@ -229,138 +240,6 @@ class SmartOpenStreamingStorage:
229
240
 
230
241
  return packages
231
242
 
232
- def _collect_attachments(self, data: dict) -> list[dict]:
233
- """Collect all attachment data from the input data structure.
234
-
235
- This method handles both direct attachments (under 'attachments' key) and
236
- nested attachments within items. It returns raw attachment data without validation.
237
-
238
- Args:
239
- data: The data dictionary containing items with attachments
240
-
241
- Returns:
242
- List of raw attachment dictionaries with metadata
243
- """
244
- all_attachments = []
245
-
246
- for key, value in data.items():
247
-
248
- if not isinstance(value, list) or not value:
249
- continue
250
-
251
- # Collect direct attachments if this key is "attachments"
252
- if key == "attachments":
253
- all_attachments.extend(self._collect_direct_attachments(value))
254
-
255
- # Collect nested attachments from items
256
- all_attachments.extend(self._collect_nested_attachments(key, value))
257
-
258
- logger.debug(f"Collected {len(all_attachments)} raw attachments")
259
- return all_attachments
260
-
261
- def _collect_direct_attachments(self, attachments_list: list) -> list[dict]:
262
- """Collect attachments from a direct attachments list.
263
-
264
- Args:
265
- attachments_list: List of attachment dictionaries
266
-
267
- Returns:
268
- List of attachment data dictionaries with metadata
269
- """
270
- direct_attachments = []
271
-
272
- for idx, attachment in enumerate(attachments_list):
273
- if not isinstance(attachment, dict):
274
- continue
275
-
276
- # Check if this looks like an attachment (has file_name or download_url)
277
- if "file_name" in attachment or "download_url" in attachment:
278
- # Transform download_url to internal access package URL for access package display
279
- if "download_url" in attachment:
280
- attachment["original_download_url"] = attachment["download_url"]
281
- attachment["download_url"] = (
282
- f"attachments/{attachment.get('file_name', f'attachment_{idx}')}"
283
- )
284
-
285
- direct_attachments.append(attachment)
286
-
287
- return direct_attachments
288
-
289
- def _collect_nested_attachments(self, key: str, items: list) -> list[dict]:
290
- """Collect attachments from nested items.
291
-
292
- Args:
293
- key: The key for the items list
294
- items: List of items that may contain attachments
295
-
296
- Returns:
297
- List of attachment data dictionaries with metadata
298
- """
299
- nested_attachments = []
300
-
301
- for item in items:
302
- if not isinstance(item, dict):
303
- continue
304
-
305
- # Recursively search for attachments in nested structures
306
- item_attachments = self._find_attachments_recursive(item, key)
307
- nested_attachments.extend(item_attachments)
308
-
309
- return nested_attachments
310
-
311
- def _find_attachments_recursive(
312
- self, item: dict, context_key: str, path: str = ""
313
- ) -> list[dict]:
314
- """Recursively find attachments in nested dictionary structures.
315
-
316
- Args:
317
- item: Dictionary item to search
318
- context_key: The top-level key for context
319
- path: Current path in the nested structure
320
-
321
- Returns:
322
- List of attachment data dictionaries with metadata
323
- """
324
- attachments = []
325
-
326
- # Check if this item has direct attachments
327
- if "attachments" in item and isinstance(item["attachments"], list):
328
- for attachment in item["attachments"]:
329
- if not isinstance(attachment, dict):
330
- continue
331
-
332
- # Check if this looks like an attachment
333
- if "file_name" in attachment or "download_url" in attachment:
334
- # Add context about which item this attachment belongs to
335
- attachment_with_context = attachment.copy()
336
- attachment_with_context["_context"] = {
337
- "key": context_key,
338
- "item_id": item.get("id", "unknown"),
339
- "path": path,
340
- }
341
-
342
- # Transform download_url to internal access package URL
343
- if "download_url" in attachment:
344
- attachment_with_context["original_download_url"] = attachment[
345
- "download_url"
346
- ]
347
- attachment_with_context["download_url"] = (
348
- f"attachments/{attachment.get('file_name', 'attachment')}"
349
- )
350
-
351
- attachments.append(attachment_with_context)
352
-
353
- # Recursively search nested dictionaries
354
- for key, value in item.items():
355
- if isinstance(value, dict):
356
- current_path = f"{path}.{key}" if path else key
357
- nested_attachments = self._find_attachments_recursive(
358
- value, context_key, current_path
359
- )
360
- attachments.extend(nested_attachments)
361
-
362
- return attachments
363
-
364
243
  def _validate_attachment(
365
244
  self, attachment: dict
366
245
  ) -> Optional[AttachmentProcessingInfo]:
@@ -373,12 +252,8 @@ class SmartOpenStreamingStorage:
373
252
  AttachmentProcessingInfo if valid, None otherwise
374
253
  """
375
254
  try:
376
- # Extract required fields - use original_download_url for storage operations
377
- storage_key = (
378
- attachment.get("original_download_url")
379
- or attachment.get("download_url")
380
- or attachment.get("file_name", "")
381
- )
255
+ # Extract storage key using shared utility
256
+ storage_key = extract_storage_key_from_attachment(attachment)
382
257
  if not storage_key:
383
258
  return None
384
259
 
@@ -390,11 +265,8 @@ class SmartOpenStreamingStorage:
390
265
  content_type=attachment.get("content_type"),
391
266
  )
392
267
 
393
- # Create base path for the attachment in the zip
394
- base_path = "attachments"
395
- if attachment.get("_context"):
396
- context = attachment["_context"]
397
- base_path = f"{context['key']}/{context['item_id']}/attachments"
268
+ # Resolve base path using shared utility
269
+ base_path = resolve_base_path_from_context(attachment)
398
270
 
399
271
  # Create AttachmentProcessingInfo
400
272
  processing_info = AttachmentProcessingInfo(
@@ -403,9 +275,6 @@ class SmartOpenStreamingStorage:
403
275
  item=attachment,
404
276
  )
405
277
 
406
- logger.debug(
407
- f"Successfully validated attachment: {attachment_info.storage_key}"
408
- )
409
278
  return processing_info
410
279
 
411
280
  except (ValueError, TypeError, KeyError) as e:
@@ -438,9 +307,6 @@ class SmartOpenStreamingStorage:
438
307
  total_bytes += len(chunk)
439
308
  yield chunk
440
309
 
441
- logger.debug(
442
- f"Completed streaming {chunk_count} chunks ({total_bytes} bytes) for {storage_key}"
443
- )
444
310
  except Exception as e:
445
311
  logger.warning(f"Failed to stream attachment {storage_key}: {e}")
446
312
  # Yield empty content on failure
@@ -449,10 +315,10 @@ class SmartOpenStreamingStorage:
449
315
  def _collect_and_validate_attachments(
450
316
  self, data: dict
451
317
  ) -> list[AttachmentProcessingInfo]:
452
- """Collect and validate all attachments from the data.
318
+ """Collect and validate attachments using the same contextual approach as DSR report builder.
453
319
 
454
- This method now delegates to _collect_attachments and _validate_attachment
455
- for better separation of concerns and readability.
320
+ This method uses the shared contextual processing logic to ensure consistency
321
+ between DSR report builder and streaming storage.
456
322
 
457
323
  Args:
458
324
  data: The data dictionary containing items with attachments
@@ -460,17 +326,64 @@ class SmartOpenStreamingStorage:
460
326
  Returns:
461
327
  List of validated AttachmentProcessingInfo objects
462
328
  """
463
- # Collect raw attachment data
464
- raw_attachments = self._collect_attachments(data)
329
+ # Initialize tracking structures (similar to DSR report builder)
330
+ used_filenames_data: set[str] = set()
331
+ used_filenames_attachments: set[str] = set()
332
+ processed_attachments: dict[tuple[str, str], str] = {}
333
+
334
+ # Use the shared contextual processing function
335
+ processed_attachments_list = process_attachments_contextually(
336
+ data,
337
+ used_filenames_data,
338
+ used_filenames_attachments,
339
+ processed_attachments,
340
+ enable_streaming=True, # Always use streaming mode for storage
341
+ )
342
+
343
+ # Convert to AttachmentProcessingInfo objects using shared utility
344
+ return convert_processed_attachments_to_attachment_processing_info(
345
+ processed_attachments_list, self._validate_attachment
346
+ )
347
+
348
+ def _collect_and_validate_attachments_from_dsr_builder(
349
+ self, data: dict, dsr_builder: "DsrReportBuilder"
350
+ ) -> list[AttachmentProcessingInfo]:
351
+ """Collect and validate attachments using the DSR report builder's processed attachments.
352
+
353
+ This method reuses the DSR report builder's processed attachments to avoid
354
+ duplicate processing and ensure consistency.
465
355
 
466
- # Validate and convert each attachment
467
- validated_attachments = []
468
- for attachment_data in raw_attachments:
469
- validated = self._validate_attachment(attachment_data)
470
- if validated:
471
- validated_attachments.append(validated)
356
+ Args:
357
+ data: The data dictionary containing items with attachments
358
+ dsr_builder: The DSR report builder instance that has already processed attachments
359
+
360
+ Returns:
361
+ List of validated AttachmentProcessingInfo objects
362
+ """
363
+ # Use the DSR report builder's processed attachments
364
+ # Create temporary sets for compatibility with the shared function
365
+ used_filenames_data = set()
366
+ used_filenames_attachments = set()
367
+
368
+ # Populate the temporary sets from the DSR builder's per-dataset tracking
369
+ for dataset_name, filenames in dsr_builder.used_filenames_per_dataset.items():
370
+ if dataset_name == "attachments":
371
+ used_filenames_attachments.update(filenames)
372
+ else:
373
+ used_filenames_data.update(filenames)
374
+
375
+ processed_attachments_list = process_attachments_contextually(
376
+ data,
377
+ used_filenames_data,
378
+ used_filenames_attachments,
379
+ dsr_builder.processed_attachments,
380
+ enable_streaming=True, # Always use streaming mode for storage
381
+ )
472
382
 
473
- return validated_attachments
383
+ # Convert to AttachmentProcessingInfo objects using shared utility
384
+ return convert_processed_attachments_to_attachment_processing_info(
385
+ processed_attachments_list, self._validate_attachment
386
+ )
474
387
 
475
388
  @retry_cloud_storage_operation(
476
389
  provider="smart_open_streaming",
@@ -514,6 +427,9 @@ class SmartOpenStreamingStorage:
514
427
  if not privacy_request:
515
428
  raise ValueError("Privacy request must be provided")
516
429
 
430
+ # Reset used filenames for this upload operation
431
+ self.used_filenames_per_dataset.clear()
432
+
517
433
  # Use default buffer config if none provided
518
434
  if buffer_config is None:
519
435
  buffer_config = StreamingBufferConfig()
@@ -628,18 +544,22 @@ class SmartOpenStreamingStorage:
628
544
  """
629
545
  # Generate the DSR report first
630
546
  try:
631
- dsr_buffer = DsrReportBuilder(
547
+ dsr_builder = DsrReportBuilder(
632
548
  privacy_request=privacy_request,
633
549
  dsr_data=data,
634
- ).generate()
550
+ enable_streaming=True,
551
+ )
552
+ dsr_buffer = dsr_builder.generate()
635
553
  # Reset buffer position to ensure it can be read multiple times
636
554
  dsr_buffer.seek(0)
637
555
  except Exception as e:
638
556
  logger.error(f"Failed to generate DSR report: {e}")
639
557
  raise StorageUploadError(f"Failed to generate DSR report: {e}") from e
640
558
 
641
- # Check if there are attachments to include
642
- all_attachments = self._collect_and_validate_attachments(data)
559
+ # Use the DSR report builder's processed attachments to avoid duplicates
560
+ all_attachments = self._collect_and_validate_attachments_from_dsr_builder(
561
+ data, dsr_builder
562
+ )
643
563
 
644
564
  if not all_attachments:
645
565
  # No attachments, just upload the DSR report
@@ -733,7 +653,7 @@ class SmartOpenStreamingStorage:
733
653
  batch_size: Number of attachments to process in each batch
734
654
  resp_format: Response format (csv, json)
735
655
  """
736
- # Collect and validate all attachments
656
+ # Collect and validate all attachments using shared contextual processing
737
657
  all_attachments = self._collect_and_validate_attachments(data)
738
658
 
739
659
  if not all_attachments:
@@ -943,7 +863,24 @@ class SmartOpenStreamingStorage:
943
863
  f"Could not parse storage URL: {storage_key} - {e}"
944
864
  ) from e
945
865
 
946
- file_path = f"{attachment_info.base_path}/{attachment_info.attachment.file_name or DEFAULT_ATTACHMENT_NAME}"
866
+ # Generate unique filename using same logic as DSR report builder
867
+ original_filename = (
868
+ attachment_info.attachment.file_name or DEFAULT_ATTACHMENT_NAME
869
+ )
870
+
871
+ # Determine dataset name from base_path using shared utility
872
+ dataset_name = determine_dataset_name_from_path(attachment_info.base_path)
873
+
874
+ if dataset_name not in self.used_filenames_per_dataset:
875
+ self.used_filenames_per_dataset[dataset_name] = set()
876
+
877
+ unique_filename = get_unique_filename(
878
+ original_filename, self.used_filenames_per_dataset[dataset_name]
879
+ )
880
+ self.used_filenames_per_dataset[dataset_name].add(unique_filename)
881
+ file_path = resolve_attachment_storage_path(
882
+ unique_filename, attachment_info.base_path
883
+ )
947
884
 
948
885
  try:
949
886
  content_stream = self._create_attachment_content_stream(