ethyca-fides 2.69.0rc9__py2.py3-none-any.whl → 2.69.0rc10__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ethyca-fides might be problematic. Click here for more details.
- {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/METADATA +1 -1
- {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/RECORD +98 -98
- fides/_version.py +3 -3
- fides/api/service/privacy_request/dsr_package/dsr_report_builder.py +253 -71
- fides/api/service/privacy_request/dsr_package/templates/attachments_index.html +4 -2
- fides/api/service/privacy_request/dsr_package/templates/collection_index.html +3 -1
- fides/api/service/privacy_request/dsr_package/templates/dataset_index.html +1 -1
- fides/api/service/privacy_request/request_runner_service.py +8 -2
- fides/api/service/storage/streaming/smart_open_streaming_storage.py +106 -169
- fides/api/service/storage/util.py +579 -0
- fides/api/task/manual/manual_task_graph_task.py +11 -9
- fides/ui-build/static/admin/404.html +1 -1
- fides/ui-build/static/admin/add-systems/manual.html +1 -1
- fides/ui-build/static/admin/add-systems/multiple.html +1 -1
- fides/ui-build/static/admin/add-systems.html +1 -1
- fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
- fides/ui-build/static/admin/consent/configure.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
- fides/ui-build/static/admin/consent/properties.html +1 -1
- fides/ui-build/static/admin/consent/reporting.html +1 -1
- fides/ui-build/static/admin/consent.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
- fides/ui-build/static/admin/data-catalog.html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
- fides/ui-build/static/admin/data-discovery/activity.html +1 -1
- fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/detection.html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
- fides/ui-build/static/admin/datamap.html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
- fides/ui-build/static/admin/dataset/new.html +1 -1
- fides/ui-build/static/admin/dataset.html +1 -1
- fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
- fides/ui-build/static/admin/datastore-connection/new.html +1 -1
- fides/ui-build/static/admin/datastore-connection.html +1 -1
- fides/ui-build/static/admin/index.html +1 -1
- fides/ui-build/static/admin/integrations/[id].html +1 -1
- fides/ui-build/static/admin/integrations.html +1 -1
- fides/ui-build/static/admin/login/[provider].html +1 -1
- fides/ui-build/static/admin/login.html +1 -1
- fides/ui-build/static/admin/messaging/[id].html +1 -1
- fides/ui-build/static/admin/messaging/add-template.html +1 -1
- fides/ui-build/static/admin/messaging.html +1 -1
- fides/ui-build/static/admin/poc/ant-components.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
- fides/ui-build/static/admin/poc/forms.html +1 -1
- fides/ui-build/static/admin/poc/table-migration.html +1 -1
- fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
- fides/ui-build/static/admin/privacy-requests.html +1 -1
- fides/ui-build/static/admin/properties/[id].html +1 -1
- fides/ui-build/static/admin/properties/add-property.html +1 -1
- fides/ui-build/static/admin/properties.html +1 -1
- fides/ui-build/static/admin/reporting/datamap.html +1 -1
- fides/ui-build/static/admin/settings/about/alpha.html +1 -1
- fides/ui-build/static/admin/settings/about.html +1 -1
- fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
- fides/ui-build/static/admin/settings/consent.html +1 -1
- fides/ui-build/static/admin/settings/custom-fields.html +1 -1
- fides/ui-build/static/admin/settings/domain-records.html +1 -1
- fides/ui-build/static/admin/settings/domains.html +1 -1
- fides/ui-build/static/admin/settings/email-templates.html +1 -1
- fides/ui-build/static/admin/settings/locations.html +1 -1
- fides/ui-build/static/admin/settings/organization.html +1 -1
- fides/ui-build/static/admin/settings/regulations.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id].html +1 -1
- fides/ui-build/static/admin/systems.html +1 -1
- fides/ui-build/static/admin/taxonomy.html +1 -1
- fides/ui-build/static/admin/user-management/new.html +1 -1
- fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
- fides/ui-build/static/admin/user-management.html +1 -1
- {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/WHEEL +0 -0
- {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/entry_points.txt +0 -0
- {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/licenses/LICENSE +0 -0
- {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.0rc10.dist-info}/top_level.txt +0 -0
- /fides/ui-build/static/admin/_next/static/{XiHm-6CdVChTC5rbN9GtT → 8qfO1Ol3G3QbcXpHAnPlU}/_buildManifest.js +0 -0
- /fides/ui-build/static/admin/_next/static/{XiHm-6CdVChTC5rbN9GtT → 8qfO1Ol3G3QbcXpHAnPlU}/_ssgManifest.js +0 -0
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
# pylint: disable=too-many-lines
|
|
3
2
|
from __future__ import annotations
|
|
4
3
|
|
|
5
4
|
import csv
|
|
@@ -34,6 +33,15 @@ from fides.api.service.storage.streaming.schemas import (
|
|
|
34
33
|
StreamingBufferConfig,
|
|
35
34
|
)
|
|
36
35
|
from fides.api.service.storage.streaming.smart_open_client import SmartOpenStorageClient
|
|
36
|
+
from fides.api.service.storage.util import (
|
|
37
|
+
convert_processed_attachments_to_attachment_processing_info,
|
|
38
|
+
determine_dataset_name_from_path,
|
|
39
|
+
extract_storage_key_from_attachment,
|
|
40
|
+
get_unique_filename,
|
|
41
|
+
process_attachments_contextually,
|
|
42
|
+
resolve_attachment_storage_path,
|
|
43
|
+
resolve_base_path_from_context,
|
|
44
|
+
)
|
|
37
45
|
|
|
38
46
|
DEFAULT_ATTACHMENT_NAME = "attachment"
|
|
39
47
|
DEFAULT_FILE_MODE = 0o644
|
|
@@ -68,6 +76,9 @@ class SmartOpenStreamingStorage:
|
|
|
68
76
|
"""
|
|
69
77
|
self.storage_client = storage_client
|
|
70
78
|
self.chunk_size = chunk_size
|
|
79
|
+
# Track used filenames per dataset to match DSR report builder behavior
|
|
80
|
+
# Maps dataset_name -> set of used filenames
|
|
81
|
+
self.used_filenames_per_dataset: dict[str, set[str]] = {}
|
|
71
82
|
|
|
72
83
|
def _parse_storage_url(self, storage_key: str) -> tuple[str, str]:
|
|
73
84
|
"""Parse storage URL and return (bucket, key).
|
|
@@ -229,138 +240,6 @@ class SmartOpenStreamingStorage:
|
|
|
229
240
|
|
|
230
241
|
return packages
|
|
231
242
|
|
|
232
|
-
def _collect_attachments(self, data: dict) -> list[dict]:
|
|
233
|
-
"""Collect all attachment data from the input data structure.
|
|
234
|
-
|
|
235
|
-
This method handles both direct attachments (under 'attachments' key) and
|
|
236
|
-
nested attachments within items. It returns raw attachment data without validation.
|
|
237
|
-
|
|
238
|
-
Args:
|
|
239
|
-
data: The data dictionary containing items with attachments
|
|
240
|
-
|
|
241
|
-
Returns:
|
|
242
|
-
List of raw attachment dictionaries with metadata
|
|
243
|
-
"""
|
|
244
|
-
all_attachments = []
|
|
245
|
-
|
|
246
|
-
for key, value in data.items():
|
|
247
|
-
|
|
248
|
-
if not isinstance(value, list) or not value:
|
|
249
|
-
continue
|
|
250
|
-
|
|
251
|
-
# Collect direct attachments if this key is "attachments"
|
|
252
|
-
if key == "attachments":
|
|
253
|
-
all_attachments.extend(self._collect_direct_attachments(value))
|
|
254
|
-
|
|
255
|
-
# Collect nested attachments from items
|
|
256
|
-
all_attachments.extend(self._collect_nested_attachments(key, value))
|
|
257
|
-
|
|
258
|
-
logger.debug(f"Collected {len(all_attachments)} raw attachments")
|
|
259
|
-
return all_attachments
|
|
260
|
-
|
|
261
|
-
def _collect_direct_attachments(self, attachments_list: list) -> list[dict]:
|
|
262
|
-
"""Collect attachments from a direct attachments list.
|
|
263
|
-
|
|
264
|
-
Args:
|
|
265
|
-
attachments_list: List of attachment dictionaries
|
|
266
|
-
|
|
267
|
-
Returns:
|
|
268
|
-
List of attachment data dictionaries with metadata
|
|
269
|
-
"""
|
|
270
|
-
direct_attachments = []
|
|
271
|
-
|
|
272
|
-
for idx, attachment in enumerate(attachments_list):
|
|
273
|
-
if not isinstance(attachment, dict):
|
|
274
|
-
continue
|
|
275
|
-
|
|
276
|
-
# Check if this looks like an attachment (has file_name or download_url)
|
|
277
|
-
if "file_name" in attachment or "download_url" in attachment:
|
|
278
|
-
# Transform download_url to internal access package URL for access package display
|
|
279
|
-
if "download_url" in attachment:
|
|
280
|
-
attachment["original_download_url"] = attachment["download_url"]
|
|
281
|
-
attachment["download_url"] = (
|
|
282
|
-
f"attachments/{attachment.get('file_name', f'attachment_{idx}')}"
|
|
283
|
-
)
|
|
284
|
-
|
|
285
|
-
direct_attachments.append(attachment)
|
|
286
|
-
|
|
287
|
-
return direct_attachments
|
|
288
|
-
|
|
289
|
-
def _collect_nested_attachments(self, key: str, items: list) -> list[dict]:
|
|
290
|
-
"""Collect attachments from nested items.
|
|
291
|
-
|
|
292
|
-
Args:
|
|
293
|
-
key: The key for the items list
|
|
294
|
-
items: List of items that may contain attachments
|
|
295
|
-
|
|
296
|
-
Returns:
|
|
297
|
-
List of attachment data dictionaries with metadata
|
|
298
|
-
"""
|
|
299
|
-
nested_attachments = []
|
|
300
|
-
|
|
301
|
-
for item in items:
|
|
302
|
-
if not isinstance(item, dict):
|
|
303
|
-
continue
|
|
304
|
-
|
|
305
|
-
# Recursively search for attachments in nested structures
|
|
306
|
-
item_attachments = self._find_attachments_recursive(item, key)
|
|
307
|
-
nested_attachments.extend(item_attachments)
|
|
308
|
-
|
|
309
|
-
return nested_attachments
|
|
310
|
-
|
|
311
|
-
def _find_attachments_recursive(
|
|
312
|
-
self, item: dict, context_key: str, path: str = ""
|
|
313
|
-
) -> list[dict]:
|
|
314
|
-
"""Recursively find attachments in nested dictionary structures.
|
|
315
|
-
|
|
316
|
-
Args:
|
|
317
|
-
item: Dictionary item to search
|
|
318
|
-
context_key: The top-level key for context
|
|
319
|
-
path: Current path in the nested structure
|
|
320
|
-
|
|
321
|
-
Returns:
|
|
322
|
-
List of attachment data dictionaries with metadata
|
|
323
|
-
"""
|
|
324
|
-
attachments = []
|
|
325
|
-
|
|
326
|
-
# Check if this item has direct attachments
|
|
327
|
-
if "attachments" in item and isinstance(item["attachments"], list):
|
|
328
|
-
for attachment in item["attachments"]:
|
|
329
|
-
if not isinstance(attachment, dict):
|
|
330
|
-
continue
|
|
331
|
-
|
|
332
|
-
# Check if this looks like an attachment
|
|
333
|
-
if "file_name" in attachment or "download_url" in attachment:
|
|
334
|
-
# Add context about which item this attachment belongs to
|
|
335
|
-
attachment_with_context = attachment.copy()
|
|
336
|
-
attachment_with_context["_context"] = {
|
|
337
|
-
"key": context_key,
|
|
338
|
-
"item_id": item.get("id", "unknown"),
|
|
339
|
-
"path": path,
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
# Transform download_url to internal access package URL
|
|
343
|
-
if "download_url" in attachment:
|
|
344
|
-
attachment_with_context["original_download_url"] = attachment[
|
|
345
|
-
"download_url"
|
|
346
|
-
]
|
|
347
|
-
attachment_with_context["download_url"] = (
|
|
348
|
-
f"attachments/{attachment.get('file_name', 'attachment')}"
|
|
349
|
-
)
|
|
350
|
-
|
|
351
|
-
attachments.append(attachment_with_context)
|
|
352
|
-
|
|
353
|
-
# Recursively search nested dictionaries
|
|
354
|
-
for key, value in item.items():
|
|
355
|
-
if isinstance(value, dict):
|
|
356
|
-
current_path = f"{path}.{key}" if path else key
|
|
357
|
-
nested_attachments = self._find_attachments_recursive(
|
|
358
|
-
value, context_key, current_path
|
|
359
|
-
)
|
|
360
|
-
attachments.extend(nested_attachments)
|
|
361
|
-
|
|
362
|
-
return attachments
|
|
363
|
-
|
|
364
243
|
def _validate_attachment(
|
|
365
244
|
self, attachment: dict
|
|
366
245
|
) -> Optional[AttachmentProcessingInfo]:
|
|
@@ -373,12 +252,8 @@ class SmartOpenStreamingStorage:
|
|
|
373
252
|
AttachmentProcessingInfo if valid, None otherwise
|
|
374
253
|
"""
|
|
375
254
|
try:
|
|
376
|
-
# Extract
|
|
377
|
-
storage_key = (
|
|
378
|
-
attachment.get("original_download_url")
|
|
379
|
-
or attachment.get("download_url")
|
|
380
|
-
or attachment.get("file_name", "")
|
|
381
|
-
)
|
|
255
|
+
# Extract storage key using shared utility
|
|
256
|
+
storage_key = extract_storage_key_from_attachment(attachment)
|
|
382
257
|
if not storage_key:
|
|
383
258
|
return None
|
|
384
259
|
|
|
@@ -390,11 +265,8 @@ class SmartOpenStreamingStorage:
|
|
|
390
265
|
content_type=attachment.get("content_type"),
|
|
391
266
|
)
|
|
392
267
|
|
|
393
|
-
#
|
|
394
|
-
base_path =
|
|
395
|
-
if attachment.get("_context"):
|
|
396
|
-
context = attachment["_context"]
|
|
397
|
-
base_path = f"{context['key']}/{context['item_id']}/attachments"
|
|
268
|
+
# Resolve base path using shared utility
|
|
269
|
+
base_path = resolve_base_path_from_context(attachment)
|
|
398
270
|
|
|
399
271
|
# Create AttachmentProcessingInfo
|
|
400
272
|
processing_info = AttachmentProcessingInfo(
|
|
@@ -403,9 +275,6 @@ class SmartOpenStreamingStorage:
|
|
|
403
275
|
item=attachment,
|
|
404
276
|
)
|
|
405
277
|
|
|
406
|
-
logger.debug(
|
|
407
|
-
f"Successfully validated attachment: {attachment_info.storage_key}"
|
|
408
|
-
)
|
|
409
278
|
return processing_info
|
|
410
279
|
|
|
411
280
|
except (ValueError, TypeError, KeyError) as e:
|
|
@@ -438,9 +307,6 @@ class SmartOpenStreamingStorage:
|
|
|
438
307
|
total_bytes += len(chunk)
|
|
439
308
|
yield chunk
|
|
440
309
|
|
|
441
|
-
logger.debug(
|
|
442
|
-
f"Completed streaming {chunk_count} chunks ({total_bytes} bytes) for {storage_key}"
|
|
443
|
-
)
|
|
444
310
|
except Exception as e:
|
|
445
311
|
logger.warning(f"Failed to stream attachment {storage_key}: {e}")
|
|
446
312
|
# Yield empty content on failure
|
|
@@ -449,10 +315,10 @@ class SmartOpenStreamingStorage:
|
|
|
449
315
|
def _collect_and_validate_attachments(
|
|
450
316
|
self, data: dict
|
|
451
317
|
) -> list[AttachmentProcessingInfo]:
|
|
452
|
-
"""Collect and validate
|
|
318
|
+
"""Collect and validate attachments using the same contextual approach as DSR report builder.
|
|
453
319
|
|
|
454
|
-
This method
|
|
455
|
-
|
|
320
|
+
This method uses the shared contextual processing logic to ensure consistency
|
|
321
|
+
between DSR report builder and streaming storage.
|
|
456
322
|
|
|
457
323
|
Args:
|
|
458
324
|
data: The data dictionary containing items with attachments
|
|
@@ -460,17 +326,64 @@ class SmartOpenStreamingStorage:
|
|
|
460
326
|
Returns:
|
|
461
327
|
List of validated AttachmentProcessingInfo objects
|
|
462
328
|
"""
|
|
463
|
-
#
|
|
464
|
-
|
|
329
|
+
# Initialize tracking structures (similar to DSR report builder)
|
|
330
|
+
used_filenames_data: set[str] = set()
|
|
331
|
+
used_filenames_attachments: set[str] = set()
|
|
332
|
+
processed_attachments: dict[tuple[str, str], str] = {}
|
|
333
|
+
|
|
334
|
+
# Use the shared contextual processing function
|
|
335
|
+
processed_attachments_list = process_attachments_contextually(
|
|
336
|
+
data,
|
|
337
|
+
used_filenames_data,
|
|
338
|
+
used_filenames_attachments,
|
|
339
|
+
processed_attachments,
|
|
340
|
+
enable_streaming=True, # Always use streaming mode for storage
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
# Convert to AttachmentProcessingInfo objects using shared utility
|
|
344
|
+
return convert_processed_attachments_to_attachment_processing_info(
|
|
345
|
+
processed_attachments_list, self._validate_attachment
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
def _collect_and_validate_attachments_from_dsr_builder(
|
|
349
|
+
self, data: dict, dsr_builder: "DsrReportBuilder"
|
|
350
|
+
) -> list[AttachmentProcessingInfo]:
|
|
351
|
+
"""Collect and validate attachments using the DSR report builder's processed attachments.
|
|
352
|
+
|
|
353
|
+
This method reuses the DSR report builder's processed attachments to avoid
|
|
354
|
+
duplicate processing and ensure consistency.
|
|
465
355
|
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
356
|
+
Args:
|
|
357
|
+
data: The data dictionary containing items with attachments
|
|
358
|
+
dsr_builder: The DSR report builder instance that has already processed attachments
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
List of validated AttachmentProcessingInfo objects
|
|
362
|
+
"""
|
|
363
|
+
# Use the DSR report builder's processed attachments
|
|
364
|
+
# Create temporary sets for compatibility with the shared function
|
|
365
|
+
used_filenames_data = set()
|
|
366
|
+
used_filenames_attachments = set()
|
|
367
|
+
|
|
368
|
+
# Populate the temporary sets from the DSR builder's per-dataset tracking
|
|
369
|
+
for dataset_name, filenames in dsr_builder.used_filenames_per_dataset.items():
|
|
370
|
+
if dataset_name == "attachments":
|
|
371
|
+
used_filenames_attachments.update(filenames)
|
|
372
|
+
else:
|
|
373
|
+
used_filenames_data.update(filenames)
|
|
374
|
+
|
|
375
|
+
processed_attachments_list = process_attachments_contextually(
|
|
376
|
+
data,
|
|
377
|
+
used_filenames_data,
|
|
378
|
+
used_filenames_attachments,
|
|
379
|
+
dsr_builder.processed_attachments,
|
|
380
|
+
enable_streaming=True, # Always use streaming mode for storage
|
|
381
|
+
)
|
|
472
382
|
|
|
473
|
-
|
|
383
|
+
# Convert to AttachmentProcessingInfo objects using shared utility
|
|
384
|
+
return convert_processed_attachments_to_attachment_processing_info(
|
|
385
|
+
processed_attachments_list, self._validate_attachment
|
|
386
|
+
)
|
|
474
387
|
|
|
475
388
|
@retry_cloud_storage_operation(
|
|
476
389
|
provider="smart_open_streaming",
|
|
@@ -514,6 +427,9 @@ class SmartOpenStreamingStorage:
|
|
|
514
427
|
if not privacy_request:
|
|
515
428
|
raise ValueError("Privacy request must be provided")
|
|
516
429
|
|
|
430
|
+
# Reset used filenames for this upload operation
|
|
431
|
+
self.used_filenames_per_dataset.clear()
|
|
432
|
+
|
|
517
433
|
# Use default buffer config if none provided
|
|
518
434
|
if buffer_config is None:
|
|
519
435
|
buffer_config = StreamingBufferConfig()
|
|
@@ -628,18 +544,22 @@ class SmartOpenStreamingStorage:
|
|
|
628
544
|
"""
|
|
629
545
|
# Generate the DSR report first
|
|
630
546
|
try:
|
|
631
|
-
|
|
547
|
+
dsr_builder = DsrReportBuilder(
|
|
632
548
|
privacy_request=privacy_request,
|
|
633
549
|
dsr_data=data,
|
|
634
|
-
|
|
550
|
+
enable_streaming=True,
|
|
551
|
+
)
|
|
552
|
+
dsr_buffer = dsr_builder.generate()
|
|
635
553
|
# Reset buffer position to ensure it can be read multiple times
|
|
636
554
|
dsr_buffer.seek(0)
|
|
637
555
|
except Exception as e:
|
|
638
556
|
logger.error(f"Failed to generate DSR report: {e}")
|
|
639
557
|
raise StorageUploadError(f"Failed to generate DSR report: {e}") from e
|
|
640
558
|
|
|
641
|
-
#
|
|
642
|
-
all_attachments = self.
|
|
559
|
+
# Use the DSR report builder's processed attachments to avoid duplicates
|
|
560
|
+
all_attachments = self._collect_and_validate_attachments_from_dsr_builder(
|
|
561
|
+
data, dsr_builder
|
|
562
|
+
)
|
|
643
563
|
|
|
644
564
|
if not all_attachments:
|
|
645
565
|
# No attachments, just upload the DSR report
|
|
@@ -733,7 +653,7 @@ class SmartOpenStreamingStorage:
|
|
|
733
653
|
batch_size: Number of attachments to process in each batch
|
|
734
654
|
resp_format: Response format (csv, json)
|
|
735
655
|
"""
|
|
736
|
-
# Collect and validate all attachments
|
|
656
|
+
# Collect and validate all attachments using shared contextual processing
|
|
737
657
|
all_attachments = self._collect_and_validate_attachments(data)
|
|
738
658
|
|
|
739
659
|
if not all_attachments:
|
|
@@ -943,7 +863,24 @@ class SmartOpenStreamingStorage:
|
|
|
943
863
|
f"Could not parse storage URL: {storage_key} - {e}"
|
|
944
864
|
) from e
|
|
945
865
|
|
|
946
|
-
|
|
866
|
+
# Generate unique filename using same logic as DSR report builder
|
|
867
|
+
original_filename = (
|
|
868
|
+
attachment_info.attachment.file_name or DEFAULT_ATTACHMENT_NAME
|
|
869
|
+
)
|
|
870
|
+
|
|
871
|
+
# Determine dataset name from base_path using shared utility
|
|
872
|
+
dataset_name = determine_dataset_name_from_path(attachment_info.base_path)
|
|
873
|
+
|
|
874
|
+
if dataset_name not in self.used_filenames_per_dataset:
|
|
875
|
+
self.used_filenames_per_dataset[dataset_name] = set()
|
|
876
|
+
|
|
877
|
+
unique_filename = get_unique_filename(
|
|
878
|
+
original_filename, self.used_filenames_per_dataset[dataset_name]
|
|
879
|
+
)
|
|
880
|
+
self.used_filenames_per_dataset[dataset_name].add(unique_filename)
|
|
881
|
+
file_path = resolve_attachment_storage_path(
|
|
882
|
+
unique_filename, attachment_info.base_path
|
|
883
|
+
)
|
|
947
884
|
|
|
948
885
|
try:
|
|
949
886
|
content_stream = self._create_attachment_content_stream(
|