ethyca-fides 2.68.1b2__py2.py3-none-any.whl → 2.68.1b4__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ethyca-fides might be problematic. Click here for more details.
- {ethyca_fides-2.68.1b2.dist-info → ethyca_fides-2.68.1b4.dist-info}/METADATA +3 -1
- {ethyca_fides-2.68.1b2.dist-info → ethyca_fides-2.68.1b4.dist-info}/RECORD +253 -232
- fides/_version.py +3 -3
- fides/api/alembic/migrations/versions/90502bcda282_update_request_tasks_add_polling_async.py +35 -0
- fides/api/alembic/migrations/versions/b1a2c3d4e5f6_add_location_to_privacy_request.py +26 -0
- fides/api/api/v1/api.py +2 -0
- fides/api/api/v1/endpoints/dsr_package_link.py +167 -0
- fides/api/api/v1/endpoints/privacy_request_endpoints.py +31 -1
- fides/api/api/v1/endpoints/user_endpoints.py +4 -0
- fides/api/common_exceptions.py +12 -3
- fides/api/models/detection_discovery/core.py +6 -0
- fides/api/models/privacy_request/privacy_request.py +1 -0
- fides/api/models/privacy_request/request_task.py +25 -0
- fides/api/models/privacy_request/webhook.py +33 -1
- fides/api/oauth/utils.py +122 -57
- fides/api/schemas/application_config.py +7 -0
- fides/api/schemas/connection_configuration/connection_type_system_map.py +6 -0
- fides/api/schemas/enums/__init__.py +0 -0
- fides/api/schemas/enums/connection_category.py +20 -0
- fides/api/schemas/enums/integration_feature.py +23 -0
- fides/api/schemas/external_https.py +9 -0
- fides/api/schemas/privacy_center_config.py +48 -19
- fides/api/schemas/privacy_request.py +16 -0
- fides/api/schemas/saas/display_info.py +19 -0
- fides/api/schemas/saas/saas_config.py +2 -0
- fides/api/schemas/storage/storage.py +4 -0
- fides/api/service/async_dsr/__init__.py +0 -0
- fides/api/service/async_dsr/async_dsr_service.py +75 -0
- fides/api/service/connectors/saas_connector.py +5 -6
- fides/api/service/privacy_request/dsr_package/dsr_report_builder.py +6 -4
- fides/api/service/privacy_request/request_runner_service.py +41 -4
- fides/api/service/privacy_request/request_service.py +50 -2
- fides/api/service/storage/storage_uploader_service.py +80 -5
- fides/api/service/storage/streaming/__init__.py +42 -0
- fides/api/service/storage/streaming/base_storage_client.py +61 -0
- fides/api/service/storage/streaming/dsr_storage.py +98 -0
- fides/api/service/storage/streaming/retry.py +282 -0
- fides/api/service/storage/streaming/s3/__init__.py +5 -0
- fides/api/service/storage/streaming/s3/s3_storage_client.py +113 -0
- fides/api/service/storage/streaming/s3/streaming_s3.py +196 -0
- fides/api/service/storage/streaming/schemas.py +173 -0
- fides/api/service/storage/streaming/smart_open_client.py +265 -0
- fides/api/service/storage/streaming/smart_open_streaming_storage.py +998 -0
- fides/api/service/storage/streaming/storage_client_factory.py +60 -0
- fides/api/task/graph_task.py +4 -4
- fides/api/task/manual/manual_task_graph_task.py +3 -4
- fides/api/util/connection_type.py +20 -0
- fides/api/util/text.py +51 -0
- fides/common/api/v1/urn_registry.py +3 -0
- fides/config/execution_settings.py +4 -0
- fides/service/privacy_request/privacy_request_service.py +84 -9
- fides/ui-build/static/admin/404.html +1 -1
- fides/ui-build/static/admin/_next/static/LRCvfOqg1kP5kGnkD84G4/_buildManifest.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/1099-b973dfdfc5c3de90.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/1345-b60d1f3442379c73.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/{1817-c90365325f8a3d75.js → 1817-74692de5d760a664.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/{1975.e5cc7a1ccd477671.js → 1975.78e719130cfe3fd6.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/{2921-46f9465c2852a46b.js → 2921-2d9261e8e2e127c0.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/3620-ebd89f91b82661e8.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/3729-ccf90cdaae158f39.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/3847-2759bf1f47a1d29e.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/3855-4174a4d4c205d6e8.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/3872-660aba76572c811b.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/{3923-a33633feba5e655e.js → 3923-c6cdc2e5278ae9a7.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/{401-741bb31b586b7c96.js → 401-8bc2c6c84172c096.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/{4121-94354b50a41f8497.js → 4121-9a4ebceff9accb7f.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/431-86ad2beeb93c95c9.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/4608-4d31340b0d0157c1.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/4786-aaef673b30c19e2e.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/4808-a654c7f7a1ca62c8.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/4844-cd7e1d0c7bb94094.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/5258-bc4a25d43e4aa07d.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/5487-37c78c4799ba5223.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/549-2213dc1c34143cda.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/{6084-02abe12327fc3dbc.js → 6084-55cc66e7c94f0686.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/{6853-270261ef5537a106.js → 6853-313ce974d33432fb.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/6954-021bd06d0ab59c3c.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/7476-2fc286c2a9125eb8.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/7630-b9a41262a69edf5e.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/768-034e121688a3bbdd.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/{787-5ba991cad1f7664a.js → 787-8df7118742e84908.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/79-d2ace89108ead8ae.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/796-2de6dac5f311d54a.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/8002-cfdc6574bd841892.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/9046-c44e41da49338c6c.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/9676.b7d5d1d90b9da224.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/{9826-8c81c97a72510fcf.js → 9826-d9addbd5ac990fa4.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/9951-6ee5c0a23951a07f.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/{404-9174cdb70126c2c5.js → 404-9644eb282f2dcd71.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/{_app-2c10f6b217b7978b.js → _app-284cba7174fa1f16.js} +136 -135
- fides/ui-build/static/admin/_next/static/chunks/pages/add-systems/{manual-621416493c89ef01.js → manual-42b7fd34712f49bd.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/add-systems/{multiple-0b9908c3e1dfe49e.js → multiple-4f164eab0960bbe0.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/add-systems-985d3c9179e69d7f.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/consent/configure/{add-vendors-5bb1b31ae8752250.js → add-vendors-61090926e5f98a5d.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/consent/configure-17ffe691b91cee2e.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/consent/privacy-experience/{[id]-4e4d9426743b5cb4.js → [id]-95c13bca5c1e575e.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/consent/{privacy-experience-d72460348fadcab8.js → privacy-experience-609399510a60beb9.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/consent/privacy-notices/{[id]-3e7ddc252da00c98.js → [id]-d7d8f228ac74b26e.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/consent/privacy-notices/{new-35a7c305beee9428.js → new-821c0f82d5a2b7d4.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/consent/privacy-notices-8365782543cf6ab9.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/consent/{properties-ab96939421639153.js → properties-40a7aa65f4d13cf9.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/consent/reporting-e4bacfc5c2ed2324.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/{consent-13240e3ca77acfeb.js → consent-70c5c6aa5389d99f.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog/[systemId]/projects/[projectUrn]/{[resourceUrn]-aad6047a4604b945.js → [resourceUrn]-adc500a03e239857.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog/[systemId]/projects/{[projectUrn]-bd37b407c80c6986.js → [projectUrn]-3207f62e5012611b.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog/[systemId]/projects-7b42dee0fb696658.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog/[systemId]/resources/{[resourceUrn]-b6b98cea25dd94fa.js → [resourceUrn]-c8b3d090e4ba60d3.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog-31a45ea2ca2a7f04.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center/[monitorId]/[systemId]-6172c2eb539319fd.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center/[monitorId]-945d354ff057fb03.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center-d9795e00f39cf4e9.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/activity-657833fd8528280f.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/detection/{[resourceUrn]-31e6c54794a9883e.js → [resourceUrn]-22eec362dfbb1d2a.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/{detection-2822a423a7ad0550.js → detection-4decce5ef996e563.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/discovery/{[resourceUrn]-f98dd251babb7e28.js → [resourceUrn]-01acdd1ad492fd89.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/{discovery-56eb4c014f0d96a3.js → discovery-85fdbf4cde60d910.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/{datamap-8f88dc31c5144ea8.js → datamap-3a4b89fb21d14753.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/dataset/[datasetId]/[collectionName]/[...subfieldNames]-cb8d303f56091bd5.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/dataset/[datasetId]/[collectionName]-401c8be76d9daec7.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/dataset/[datasetId]-97e2d375b21cfe43.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/dataset/new-40ef544ca1f2c9b9.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/dataset-e3c763f8e71f8e24.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/datastore-connection/{[id]-67a7fe58b96ea739.js → [id]-152e5d15705ec072.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/datastore-connection/new-651b10cae0e99a05.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/datastore-connection-03c54bc9fb18d2b0.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/{index-876bfd7210040cec.js → index-3d19b9ffa15a928a.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/integrations/[id]-4b0bb4ccfb237d41.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/integrations-78d4e0c14654148b.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/messaging/{[id]-6e796c3fe632280b.js → [id]-72cb360a6d14e701.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/messaging/{add-template-fa0f3841c5bdfdeb.js → add-template-0ed67cf774d5cbf5.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/messaging-b06a2204e2a5b667.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/poc/ant-components-7050899b3f792129.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/poc/form-experiments/{AntForm-11503454a62d8d7b.js → AntForm-7c3466f4d5797e55.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/poc/form-experiments/{FormikAntFormItem-a504941807bdb7f1.js → FormikAntFormItem-8de252f25871bab9.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/poc/form-experiments/{FormikControlled-0119403c8ff97f83.js → FormikControlled-cd6de0da47f980cf.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/poc/form-experiments/{FormikField-94f6d57d6c94ddf7.js → FormikField-7c238a881fe30e28.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/poc/{forms-ed1a3ae09d72df89.js → forms-d4f3e8f67f76f146.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/poc/table-migration-c9220e20c1d93758.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/[id]-b9d6886a3f157120.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/messaging-9c1fd7867b2d80d7.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/storage-fc959ed21dbce38c.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure-44a4a638dcb2722a.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests-1433c9f9501a884f.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/properties/{[id]-41976b28503623cd.js → [id]-16e0b42cb342aa5f.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/properties/{add-property-cb438d8f5ec6007a.js → add-property-ebd114a86b809391.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/{properties-b6db7036993709b3.js → properties-901be5fa4a48f48c.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/reporting/datamap-da9ced1e20681154.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/about/alpha-0174554c0ac5958f.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/about-6f45ddbf675e66d2.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/consent/[configuration_id]/[purpose_id]-275c49e6089c5c9f.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/consent-1a8d05e19f06d857.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/custom-fields-49d86b9ca4523ca6.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/{domain-records-386368bf7cb31771.js → domain-records-f71b4b95d91db926.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/domains-a595cad18cf04673.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/email-templates-6f7f9751689b042c.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/{locations-b41fb5ad277088ab.js → locations-e2c88d7f779fe604.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/organization-c65acd2b7ab04753.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/{regulations-a94dfeea43fbca7d.js → regulations-c1c699eeb40a9dc0.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/systems/configure/[id]/test-datasets-a274e2191b87e315.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/systems/configure/{[id]-18b316e2dad73731.js → [id]-4a48b4f996a64957.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/systems-30debc87925634d9.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/taxonomy-ca625b1296a029f0.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/user-management/profile/{[id]-3237881945acc0ee.js → [id]-7a3180b235eb8846.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/{user-management-a3a50d9d79066935.js → user-management-5e2d0acf575252ca.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/{webpack-69658aeaf6155d89.js → webpack-4502d4d67006b48f.js} +1 -1
- fides/ui-build/static/admin/_next/static/css/43d0c0fc207767eb.css +1 -0
- fides/ui-build/static/admin/add-systems/manual.html +1 -1
- fides/ui-build/static/admin/add-systems/multiple.html +1 -1
- fides/ui-build/static/admin/add-systems.html +1 -1
- fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
- fides/ui-build/static/admin/consent/configure.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
- fides/ui-build/static/admin/consent/properties.html +1 -1
- fides/ui-build/static/admin/consent/reporting.html +1 -1
- fides/ui-build/static/admin/consent.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
- fides/ui-build/static/admin/data-catalog.html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
- fides/ui-build/static/admin/data-discovery/activity.html +1 -1
- fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/detection.html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
- fides/ui-build/static/admin/datamap.html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
- fides/ui-build/static/admin/dataset/new.html +1 -1
- fides/ui-build/static/admin/dataset.html +1 -1
- fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
- fides/ui-build/static/admin/datastore-connection/new.html +1 -1
- fides/ui-build/static/admin/datastore-connection.html +1 -1
- fides/ui-build/static/admin/index.html +1 -1
- fides/ui-build/static/admin/integrations/[id].html +1 -1
- fides/ui-build/static/admin/integrations.html +1 -1
- fides/ui-build/static/admin/lib/fides-preview.js +1 -1
- fides/ui-build/static/admin/lib/fides-tcf.js +2 -2
- fides/ui-build/static/admin/login/[provider].html +1 -1
- fides/ui-build/static/admin/login.html +1 -1
- fides/ui-build/static/admin/messaging/[id].html +1 -1
- fides/ui-build/static/admin/messaging/add-template.html +1 -1
- fides/ui-build/static/admin/messaging.html +1 -1
- fides/ui-build/static/admin/poc/ant-components.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
- fides/ui-build/static/admin/poc/forms.html +1 -1
- fides/ui-build/static/admin/poc/table-migration.html +1 -1
- fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
- fides/ui-build/static/admin/privacy-requests.html +1 -1
- fides/ui-build/static/admin/properties/[id].html +1 -1
- fides/ui-build/static/admin/properties/add-property.html +1 -1
- fides/ui-build/static/admin/properties.html +1 -1
- fides/ui-build/static/admin/reporting/datamap.html +1 -1
- fides/ui-build/static/admin/settings/about/alpha.html +1 -1
- fides/ui-build/static/admin/settings/about.html +1 -1
- fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
- fides/ui-build/static/admin/settings/consent.html +1 -1
- fides/ui-build/static/admin/settings/custom-fields.html +1 -1
- fides/ui-build/static/admin/settings/domain-records.html +1 -1
- fides/ui-build/static/admin/settings/domains.html +1 -1
- fides/ui-build/static/admin/settings/email-templates.html +1 -1
- fides/ui-build/static/admin/settings/locations.html +1 -1
- fides/ui-build/static/admin/settings/organization.html +1 -1
- fides/ui-build/static/admin/settings/regulations.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id].html +1 -1
- fides/ui-build/static/admin/systems.html +1 -1
- fides/ui-build/static/admin/taxonomy.html +1 -1
- fides/ui-build/static/admin/user-management/new.html +1 -1
- fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
- fides/ui-build/static/admin/user-management.html +1 -1
- fides/ui-build/static/admin/_next/static/chunks/203-0c6cadcda98bdd33.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/3450-9314e1b15df8a8da.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/3855-4267fd8193e7f525.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/3872-ac5feefd40b61ae3.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/409-5bc4369b80a8c11d.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/4230-1ebc8c0ab293a077.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/431-a34d7ceff17c2169.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/4608-557fb24665b2e4bf.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/5309-ffdec884eec79d29.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/5574-831167a8da90e2e6.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/6662-499c189f932a35aa.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/6780-7d28e030f6516e5d.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/6882-7cc1d14e27a80c10.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/6954-7784e8d5ad6b8110.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/7476-4de465016d3433b4.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/7630-2a5c57787632693d.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/7725-c79513b04113112b.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/79-98cfab20bb831137.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/796-0b768155bf20505f.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/8735-f84afcc50885883c.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/9046-97a972cc8a8ed24d.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/9226-318dadf1c050ecda.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/9676.9e6828b42ef05e06.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/9951-4df2b67e0def5500.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/add-systems-18e96ce81dab51a4.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/consent/configure-54d7c7310763c66d.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/consent/privacy-notices-6bc3b73a21576869.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/consent/reporting-fe3d6887fecf0f86.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog/[systemId]/projects-e4770acf7044e2f5.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog-0db635c3483c9da8.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center/[monitorId]/[systemId]-0c0e0a7798345541.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center/[monitorId]-3c56e5fe072a44c6.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center-58827eb86516931f.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/activity-6a90131dcecd694c.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/dataset/[datasetId]/[collectionName]/[...subfieldNames]-145fe9e4cfcb231d.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/dataset/[datasetId]/[collectionName]-8a1e5d140785c1e9.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/dataset/[datasetId]-227b5db4b472a6a7.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/dataset/new-8401f17fe5d9a1dc.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/dataset-7d77b3ad069be268.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/datastore-connection/new-90a8df230cb89877.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/datastore-connection-cfb25b02abb8da71.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/integrations/[id]-4e286a1e501a0c73.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/integrations-3fdc55d4c129e618.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/messaging-8f9c006b6166f002.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/poc/ant-components-6ba7ae4f26c06cb0.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/poc/table-migration-e8db3ad525e7ddbd.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/[id]-c14dd24592369467.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/messaging-100d7d03930629a8.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/storage-6f8d1b3ec83cfcf0.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure-3ce15577435d47cb.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests-709bcb0bc6a5382d.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/reporting/datamap-4bc3e281409265cc.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/about/alpha-1ea40fcd6b4268bf.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/about-65c7600fadc6e55a.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/consent/[configuration_id]/[purpose_id]-33dab986141b3663.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/consent-1195042727c399ed.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/custom-fields-71b98858ecb4e097.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/domains-cf427e04f862b5d2.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/email-templates-eabeeec5bf2773c6.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/organization-ee56698ae3a6a78b.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/systems/configure/[id]/test-datasets-0e2e98cc38ee5499.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/systems-c32589c86081b750.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/taxonomy-a8f09bf8f3204ca7.js +0 -1
- fides/ui-build/static/admin/_next/static/css/a72179b1754aadd3.css +0 -1
- fides/ui-build/static/admin/_next/static/qvk5eMANVfwYkdURE7fgG/_buildManifest.js +0 -1
- {ethyca_fides-2.68.1b2.dist-info → ethyca_fides-2.68.1b4.dist-info}/WHEEL +0 -0
- {ethyca_fides-2.68.1b2.dist-info → ethyca_fides-2.68.1b4.dist-info}/entry_points.txt +0 -0
- {ethyca_fides-2.68.1b2.dist-info → ethyca_fides-2.68.1b4.dist-info}/licenses/LICENSE +0 -0
- {ethyca_fides-2.68.1b2.dist-info → ethyca_fides-2.68.1b4.dist-info}/top_level.txt +0 -0
- /fides/ui-build/static/admin/_next/static/{qvk5eMANVfwYkdURE7fgG → LRCvfOqg1kP5kGnkD84G4}/_ssgManifest.js +0 -0
|
@@ -0,0 +1,998 @@
|
|
|
1
|
+
"""Smart-open based streaming storage for efficient cloud-to-cloud data transfer."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import csv
|
|
6
|
+
import json
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from io import BytesIO, StringIO
|
|
9
|
+
from itertools import chain
|
|
10
|
+
from typing import Any, Generator, Iterable, Optional, Tuple
|
|
11
|
+
from urllib.parse import urlparse
|
|
12
|
+
|
|
13
|
+
from fideslang.validation import AnyHttpUrlString
|
|
14
|
+
from loguru import logger
|
|
15
|
+
from stream_zip import _ZIP_32_TYPE, stream_zip
|
|
16
|
+
|
|
17
|
+
from fides.api.common_exceptions import StorageUploadError
|
|
18
|
+
from fides.api.models.privacy_request import PrivacyRequest
|
|
19
|
+
from fides.api.schemas.storage.storage import ResponseFormat
|
|
20
|
+
from fides.api.service.privacy_request.dsr_package.dsr_report_builder import (
|
|
21
|
+
DsrReportBuilder,
|
|
22
|
+
)
|
|
23
|
+
from fides.api.service.storage.streaming.dsr_storage import (
|
|
24
|
+
create_dsr_report_files_generator,
|
|
25
|
+
stream_dsr_buffer_to_storage,
|
|
26
|
+
)
|
|
27
|
+
from fides.api.service.storage.streaming.retry import retry_cloud_storage_operation
|
|
28
|
+
from fides.api.service.storage.streaming.schemas import (
|
|
29
|
+
CHUNK_SIZE_THRESHOLD,
|
|
30
|
+
AttachmentInfo,
|
|
31
|
+
AttachmentProcessingInfo,
|
|
32
|
+
PackageSplitConfig,
|
|
33
|
+
StorageUploadConfig,
|
|
34
|
+
StreamingBufferConfig,
|
|
35
|
+
)
|
|
36
|
+
from fides.api.service.storage.streaming.smart_open_client import SmartOpenStorageClient
|
|
37
|
+
|
|
38
|
+
DEFAULT_ATTACHMENT_NAME = "attachment"
|
|
39
|
+
DEFAULT_FILE_MODE = 0o644
|
|
40
|
+
S3_AMAZONAWS_COM_DOMAIN = ".s3.amazonaws.com"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class SmartOpenStreamingStorage:
|
|
44
|
+
"""Streaming storage implementation using smart-open for efficient cloud-to-cloud data streaming.
|
|
45
|
+
|
|
46
|
+
This class maintains our DSR-specific business logic (package splitting, attachment processing)
|
|
47
|
+
while leveraging smart-open's mature streaming capabilities for storage operations.
|
|
48
|
+
|
|
49
|
+
Key streaming features:
|
|
50
|
+
- Data files (JSON/CSV): Small files loaded into memory for ZIP creation
|
|
51
|
+
- Attachment files: Streamed in chunks (8KB) without loading entire files to memory
|
|
52
|
+
- ZIP creation: Uses stream_zip for memory-efficient ZIP generation
|
|
53
|
+
- Upload: Streams ZIP chunks directly to destination storage
|
|
54
|
+
|
|
55
|
+
This ensures true cloud-to-cloud streaming with minimal memory usage and no local file storage.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
storage_client: SmartOpenStorageClient,
|
|
61
|
+
chunk_size: int = CHUNK_SIZE_THRESHOLD,
|
|
62
|
+
):
|
|
63
|
+
"""Initialize with a smart-open storage client.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
storage_client: Smart-open based storage client
|
|
67
|
+
chunk_size: Size of chunks for streaming attachments (default: 8KB)
|
|
68
|
+
"""
|
|
69
|
+
self.storage_client = storage_client
|
|
70
|
+
self.chunk_size = chunk_size
|
|
71
|
+
|
|
72
|
+
def _parse_storage_url(self, storage_key: str) -> tuple[str, str]:
|
|
73
|
+
"""Parse storage URL and return (bucket, key).
|
|
74
|
+
|
|
75
|
+
Supports multiple URL formats:
|
|
76
|
+
- s3://bucket/path
|
|
77
|
+
- https://bucket.s3.amazonaws.com/path
|
|
78
|
+
- http://bucket.s3.amazonaws.com/path
|
|
79
|
+
- Generic HTTP(S) URLs (returns domain as bucket, path as key)
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
storage_key: Storage key or URL
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Tuple of (bucket_name, object_key)
|
|
86
|
+
|
|
87
|
+
Raises:
|
|
88
|
+
ValueError: If URL cannot be parsed
|
|
89
|
+
"""
|
|
90
|
+
if storage_key.startswith("s3://"):
|
|
91
|
+
# Extract bucket from S3 URL: s3://bucket/path
|
|
92
|
+
parts = storage_key.split("/")
|
|
93
|
+
if len(parts) < 4:
|
|
94
|
+
raise ValueError(f"Invalid S3 URL format: {storage_key}")
|
|
95
|
+
return parts[2], "/".join(parts[3:])
|
|
96
|
+
|
|
97
|
+
if S3_AMAZONAWS_COM_DOMAIN in storage_key:
|
|
98
|
+
# Extract bucket and key from HTTP(S) S3 URL
|
|
99
|
+
clean_url = storage_key.split("?")[0]
|
|
100
|
+
parts = clean_url.split(S3_AMAZONAWS_COM_DOMAIN)
|
|
101
|
+
if len(parts) == 2:
|
|
102
|
+
bucket = parts[0].replace("https://", "").replace("http://", "")
|
|
103
|
+
key = parts[1].lstrip(
|
|
104
|
+
"/"
|
|
105
|
+
) # Strip leading forward slash for S3 compatibility
|
|
106
|
+
return bucket, key
|
|
107
|
+
|
|
108
|
+
# Handle generic HTTP(S) URLs
|
|
109
|
+
if storage_key.startswith(("http://", "https://")):
|
|
110
|
+
parsed = urlparse(storage_key)
|
|
111
|
+
bucket = parsed.netloc
|
|
112
|
+
key = parsed.path.lstrip("/")
|
|
113
|
+
return bucket, key
|
|
114
|
+
|
|
115
|
+
raise ValueError(f"Could not parse storage URL: {storage_key}")
|
|
116
|
+
|
|
117
|
+
def _convert_to_stream_zip_format(
|
|
118
|
+
self, generator: Generator[Tuple[str, BytesIO, dict[str, Any]], None, None]
|
|
119
|
+
) -> Generator[Tuple[str, datetime, int, Any, Iterable[bytes]], None, None]:
|
|
120
|
+
"""Convert generator from (filename, BytesIO, metadata) to (filename, datetime, mode, method, content_iter) format.
|
|
121
|
+
|
|
122
|
+
This adapter converts our internal generator format to the format expected by stream_zip.
|
|
123
|
+
For data files, we can read the entire content since they're typically small JSON/CSV files.
|
|
124
|
+
"""
|
|
125
|
+
for filename, content, _ in generator:
|
|
126
|
+
# Reset BytesIO position and get content
|
|
127
|
+
content.seek(0)
|
|
128
|
+
content_bytes = content.read()
|
|
129
|
+
content.seek(0) # Reset for potential reuse
|
|
130
|
+
|
|
131
|
+
yield filename, datetime.now(), DEFAULT_FILE_MODE, _ZIP_32_TYPE(), iter(
|
|
132
|
+
[content_bytes]
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
def build_attachments_list(
|
|
136
|
+
self, data: dict, config: PackageSplitConfig
|
|
137
|
+
) -> list[tuple[str, dict, int]]:
|
|
138
|
+
"""
|
|
139
|
+
Build a list of attachments from the data.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
data: The data to build the attachments list from
|
|
143
|
+
config: The configuration for package splitting
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
A list of AttachmentInfo objects
|
|
147
|
+
"""
|
|
148
|
+
attachments_list = []
|
|
149
|
+
for key, value in data.items():
|
|
150
|
+
if not isinstance(value, list):
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
for item in value:
|
|
154
|
+
attachments = item.get("attachments", [])
|
|
155
|
+
if not isinstance(attachments, list):
|
|
156
|
+
attachments = []
|
|
157
|
+
|
|
158
|
+
attachment_count = len(attachments)
|
|
159
|
+
|
|
160
|
+
# Only include items that have attachments
|
|
161
|
+
if attachment_count > 0:
|
|
162
|
+
# If a single item has more attachments than the limit, we need to split it
|
|
163
|
+
if attachment_count > config.max_attachments:
|
|
164
|
+
# Split the item into multiple sub-items
|
|
165
|
+
for i in range(0, attachment_count, config.max_attachments):
|
|
166
|
+
sub_attachments = attachments[
|
|
167
|
+
i : i + config.max_attachments
|
|
168
|
+
]
|
|
169
|
+
sub_item = item.copy()
|
|
170
|
+
sub_item["attachments"] = sub_attachments
|
|
171
|
+
attachments_list.append(
|
|
172
|
+
(key, sub_item, len(sub_attachments))
|
|
173
|
+
)
|
|
174
|
+
else:
|
|
175
|
+
attachments_list.append((key, item, attachment_count))
|
|
176
|
+
|
|
177
|
+
return attachments_list
|
|
178
|
+
|
|
179
|
+
def split_data_into_packages(
|
|
180
|
+
self, data: dict, config: Optional[PackageSplitConfig] = None
|
|
181
|
+
) -> list[dict]:
|
|
182
|
+
"""Split large datasets into multiple smaller packages.
|
|
183
|
+
|
|
184
|
+
Uses a best-fit decreasing algorithm to optimize package distribution:
|
|
185
|
+
1. Sort items by attachment count (largest first)
|
|
186
|
+
2. Try to fit each item in the package with the most remaining space
|
|
187
|
+
3. Create new packages only when necessary
|
|
188
|
+
4. Handle items that exceed the max_attachments limit by splitting them
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
data: The data to split
|
|
192
|
+
config: Configuration for package splitting (defaults to PackageSplitConfig())
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
List of data packages
|
|
196
|
+
"""
|
|
197
|
+
# Use default config if none provided
|
|
198
|
+
if config is None:
|
|
199
|
+
config = PackageSplitConfig()
|
|
200
|
+
|
|
201
|
+
# Collect all items with their attachment counts
|
|
202
|
+
all_items = self.build_attachments_list(data, config)
|
|
203
|
+
|
|
204
|
+
# Sort by attachment count (largest first) for better space utilization
|
|
205
|
+
all_items.sort(key=lambda x: x[2], reverse=True)
|
|
206
|
+
|
|
207
|
+
packages: list[dict[str, Any]] = []
|
|
208
|
+
package_attachment_counts: list[int] = []
|
|
209
|
+
|
|
210
|
+
for key, item, attachment_count in all_items:
|
|
211
|
+
# Try to find a package with enough space
|
|
212
|
+
package_found = False
|
|
213
|
+
|
|
214
|
+
for i, current_count in enumerate(package_attachment_counts):
|
|
215
|
+
if current_count + attachment_count <= config.max_attachments:
|
|
216
|
+
# Add to existing package
|
|
217
|
+
if key not in packages[i]:
|
|
218
|
+
packages[i][key] = []
|
|
219
|
+
packages[i][key].append(item)
|
|
220
|
+
package_attachment_counts[i] += attachment_count
|
|
221
|
+
package_found = True
|
|
222
|
+
break
|
|
223
|
+
|
|
224
|
+
if not package_found:
|
|
225
|
+
# Create new package - this item cannot fit in any existing package
|
|
226
|
+
new_package = {key: [item]}
|
|
227
|
+
packages.append(new_package)
|
|
228
|
+
package_attachment_counts.append(attachment_count)
|
|
229
|
+
|
|
230
|
+
return packages
|
|
231
|
+
|
|
232
|
+
def _collect_attachments(self, data: dict) -> list[dict]:
|
|
233
|
+
"""Collect all attachment data from the input data structure.
|
|
234
|
+
|
|
235
|
+
This method handles both direct attachments (under 'attachments' key) and
|
|
236
|
+
nested attachments within items. It returns raw attachment data without validation.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
data: The data dictionary containing items with attachments
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
List of raw attachment dictionaries with metadata
|
|
243
|
+
"""
|
|
244
|
+
all_attachments = []
|
|
245
|
+
|
|
246
|
+
for key, value in data.items():
|
|
247
|
+
logger.debug(f"Processing key '{key}' with value type: {type(value)}")
|
|
248
|
+
|
|
249
|
+
if not isinstance(value, list) or not value:
|
|
250
|
+
continue
|
|
251
|
+
|
|
252
|
+
# Collect direct attachments if this key is "attachments"
|
|
253
|
+
if key == "attachments":
|
|
254
|
+
all_attachments.extend(self._collect_direct_attachments(value))
|
|
255
|
+
|
|
256
|
+
# Collect nested attachments from items
|
|
257
|
+
all_attachments.extend(self._collect_nested_attachments(key, value))
|
|
258
|
+
|
|
259
|
+
logger.debug(f"Collected {len(all_attachments)} raw attachments")
|
|
260
|
+
return all_attachments
|
|
261
|
+
|
|
262
|
+
def _collect_direct_attachments(self, attachments_list: list) -> list[dict]:
|
|
263
|
+
"""Collect attachments from a direct attachments list.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
attachments_list: List of attachment dictionaries
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
List of attachment data dictionaries with metadata
|
|
270
|
+
"""
|
|
271
|
+
direct_attachments = []
|
|
272
|
+
|
|
273
|
+
logger.debug(
|
|
274
|
+
f"Found 'attachments' key with {len(attachments_list)} items - processing as direct attachments"
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
for idx, attachment in enumerate(attachments_list):
|
|
278
|
+
if not isinstance(attachment, dict):
|
|
279
|
+
continue
|
|
280
|
+
|
|
281
|
+
# Check if this looks like an attachment (has file_name or download_url)
|
|
282
|
+
if "file_name" in attachment or "download_url" in attachment:
|
|
283
|
+
# Transform download_url to internal access package URL for access package display
|
|
284
|
+
if "download_url" in attachment:
|
|
285
|
+
attachment["original_download_url"] = attachment["download_url"]
|
|
286
|
+
attachment["download_url"] = (
|
|
287
|
+
f"attachments/{attachment.get('file_name', f'attachment_{idx}')}"
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
direct_attachments.append(attachment)
|
|
291
|
+
|
|
292
|
+
return direct_attachments
|
|
293
|
+
|
|
294
|
+
def _collect_nested_attachments(self, key: str, items: list) -> list[dict]:
|
|
295
|
+
"""Collect attachments from nested items.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
key: The key for the items list
|
|
299
|
+
items: List of items that may contain attachments
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
List of attachment data dictionaries with metadata
|
|
303
|
+
"""
|
|
304
|
+
nested_attachments = []
|
|
305
|
+
|
|
306
|
+
for item in items:
|
|
307
|
+
if not isinstance(item, dict):
|
|
308
|
+
continue
|
|
309
|
+
|
|
310
|
+
# Recursively search for attachments in nested structures
|
|
311
|
+
item_attachments = self._find_attachments_recursive(item, key)
|
|
312
|
+
nested_attachments.extend(item_attachments)
|
|
313
|
+
|
|
314
|
+
return nested_attachments
|
|
315
|
+
|
|
316
|
+
def _find_attachments_recursive(
|
|
317
|
+
self, item: dict, context_key: str, path: str = ""
|
|
318
|
+
) -> list[dict]:
|
|
319
|
+
"""Recursively find attachments in nested dictionary structures.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
item: Dictionary item to search
|
|
323
|
+
context_key: The top-level key for context
|
|
324
|
+
path: Current path in the nested structure
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
List of attachment data dictionaries with metadata
|
|
328
|
+
"""
|
|
329
|
+
attachments = []
|
|
330
|
+
|
|
331
|
+
# Check if this item has direct attachments
|
|
332
|
+
if "attachments" in item and isinstance(item["attachments"], list):
|
|
333
|
+
for attachment in item["attachments"]:
|
|
334
|
+
if not isinstance(attachment, dict):
|
|
335
|
+
continue
|
|
336
|
+
|
|
337
|
+
# Check if this looks like an attachment
|
|
338
|
+
if "file_name" in attachment or "download_url" in attachment:
|
|
339
|
+
# Add context about which item this attachment belongs to
|
|
340
|
+
attachment_with_context = attachment.copy()
|
|
341
|
+
attachment_with_context["_context"] = {
|
|
342
|
+
"key": context_key,
|
|
343
|
+
"item_id": item.get("id", "unknown"),
|
|
344
|
+
"path": path,
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
# Transform download_url to internal access package URL
|
|
348
|
+
if "download_url" in attachment:
|
|
349
|
+
attachment_with_context["original_download_url"] = attachment[
|
|
350
|
+
"download_url"
|
|
351
|
+
]
|
|
352
|
+
attachment_with_context["download_url"] = (
|
|
353
|
+
f"attachments/{attachment.get('file_name', 'attachment')}"
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
attachments.append(attachment_with_context)
|
|
357
|
+
|
|
358
|
+
# Recursively search nested dictionaries
|
|
359
|
+
for key, value in item.items():
|
|
360
|
+
if isinstance(value, dict):
|
|
361
|
+
current_path = f"{path}.{key}" if path else key
|
|
362
|
+
nested_attachments = self._find_attachments_recursive(
|
|
363
|
+
value, context_key, current_path
|
|
364
|
+
)
|
|
365
|
+
attachments.extend(nested_attachments)
|
|
366
|
+
|
|
367
|
+
return attachments
|
|
368
|
+
|
|
369
|
+
def _validate_attachment(
|
|
370
|
+
self, attachment: dict
|
|
371
|
+
) -> Optional[AttachmentProcessingInfo]:
|
|
372
|
+
"""Validate a single attachment and create AttachmentProcessingInfo.
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
attachment: Raw attachment data dictionary
|
|
376
|
+
|
|
377
|
+
Returns:
|
|
378
|
+
AttachmentProcessingInfo if valid, None otherwise
|
|
379
|
+
"""
|
|
380
|
+
try:
|
|
381
|
+
# Extract required fields - use original_download_url for storage operations
|
|
382
|
+
storage_key = (
|
|
383
|
+
attachment.get("original_download_url")
|
|
384
|
+
or attachment.get("download_url")
|
|
385
|
+
or attachment.get("file_name", "")
|
|
386
|
+
)
|
|
387
|
+
if not storage_key:
|
|
388
|
+
return None
|
|
389
|
+
|
|
390
|
+
# Create AttachmentInfo
|
|
391
|
+
attachment_info = AttachmentInfo(
|
|
392
|
+
storage_key=storage_key,
|
|
393
|
+
file_name=attachment.get("file_name"),
|
|
394
|
+
size=attachment.get("size"),
|
|
395
|
+
content_type=attachment.get("content_type"),
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
# Create base path for the attachment in the zip
|
|
399
|
+
base_path = "attachments"
|
|
400
|
+
if attachment.get("_context"):
|
|
401
|
+
context = attachment["_context"]
|
|
402
|
+
base_path = f"{context['key']}/{context['item_id']}/attachments"
|
|
403
|
+
|
|
404
|
+
# Create AttachmentProcessingInfo
|
|
405
|
+
processing_info = AttachmentProcessingInfo(
|
|
406
|
+
attachment=attachment_info,
|
|
407
|
+
base_path=base_path,
|
|
408
|
+
item=attachment,
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
logger.debug(
|
|
412
|
+
f"Successfully validated attachment: {attachment_info.storage_key}"
|
|
413
|
+
)
|
|
414
|
+
return processing_info
|
|
415
|
+
|
|
416
|
+
except (ValueError, TypeError, KeyError) as e:
|
|
417
|
+
logger.debug(f"Failed to validate attachment: {attachment}, error: {e}")
|
|
418
|
+
return None
|
|
419
|
+
|
|
420
|
+
def _create_attachment_content_stream(
|
|
421
|
+
self, bucket: str, key: str, storage_key: str
|
|
422
|
+
) -> Iterable[bytes]:
|
|
423
|
+
"""Create a streaming iterator for attachment content without loading entire file to memory.
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
bucket: Source bucket name
|
|
427
|
+
key: Source key/path
|
|
428
|
+
storage_key: Original storage key for logging
|
|
429
|
+
|
|
430
|
+
Returns:
|
|
431
|
+
Iterator that yields chunks of the attachment content
|
|
432
|
+
"""
|
|
433
|
+
try:
|
|
434
|
+
logger.debug(
|
|
435
|
+
f"Starting streaming read of {storage_key} from bucket: {bucket}, key: {key}"
|
|
436
|
+
)
|
|
437
|
+
with self.storage_client.stream_read(bucket, key) as content_stream:
|
|
438
|
+
# Stream in chunks instead of reading entire file
|
|
439
|
+
chunk_count = 0
|
|
440
|
+
total_bytes = 0
|
|
441
|
+
while True:
|
|
442
|
+
chunk = content_stream.read(self.chunk_size)
|
|
443
|
+
if not chunk:
|
|
444
|
+
break
|
|
445
|
+
chunk_count += 1
|
|
446
|
+
total_bytes += len(chunk)
|
|
447
|
+
yield chunk
|
|
448
|
+
|
|
449
|
+
logger.debug(
|
|
450
|
+
f"Completed streaming {chunk_count} chunks ({total_bytes} bytes) for {storage_key}"
|
|
451
|
+
)
|
|
452
|
+
except Exception as e:
|
|
453
|
+
logger.warning(f"Failed to stream attachment {storage_key}: {e}")
|
|
454
|
+
# Yield empty content on failure
|
|
455
|
+
yield b""
|
|
456
|
+
|
|
457
|
+
def _collect_and_validate_attachments(
|
|
458
|
+
self, data: dict
|
|
459
|
+
) -> list[AttachmentProcessingInfo]:
|
|
460
|
+
"""Collect and validate all attachments from the data.
|
|
461
|
+
|
|
462
|
+
This method now delegates to _collect_attachments and _validate_attachment
|
|
463
|
+
for better separation of concerns and readability.
|
|
464
|
+
|
|
465
|
+
Args:
|
|
466
|
+
data: The data dictionary containing items with attachments
|
|
467
|
+
|
|
468
|
+
Returns:
|
|
469
|
+
List of validated AttachmentProcessingInfo objects
|
|
470
|
+
"""
|
|
471
|
+
# Collect raw attachment data
|
|
472
|
+
raw_attachments = self._collect_attachments(data)
|
|
473
|
+
|
|
474
|
+
# Validate and convert each attachment
|
|
475
|
+
validated_attachments = []
|
|
476
|
+
for attachment_data in raw_attachments:
|
|
477
|
+
validated = self._validate_attachment(attachment_data)
|
|
478
|
+
if validated:
|
|
479
|
+
validated_attachments.append(validated)
|
|
480
|
+
|
|
481
|
+
logger.debug(
|
|
482
|
+
f"Successfully validated {len(validated_attachments)} out of {len(raw_attachments)} attachments"
|
|
483
|
+
)
|
|
484
|
+
return validated_attachments
|
|
485
|
+
|
|
486
|
+
@retry_cloud_storage_operation(
|
|
487
|
+
provider="smart_open_streaming",
|
|
488
|
+
operation_name="upload_to_storage_streaming",
|
|
489
|
+
max_retries=2,
|
|
490
|
+
base_delay=2.0,
|
|
491
|
+
max_delay=30.0,
|
|
492
|
+
)
|
|
493
|
+
def upload_to_storage_streaming(
|
|
494
|
+
self,
|
|
495
|
+
data: dict,
|
|
496
|
+
config: StorageUploadConfig,
|
|
497
|
+
privacy_request: Optional[PrivacyRequest],
|
|
498
|
+
document: Optional[Any] = None,
|
|
499
|
+
buffer_config: Optional[StreamingBufferConfig] = None,
|
|
500
|
+
batch_size: int = 10,
|
|
501
|
+
) -> Optional[AnyHttpUrlString]:
|
|
502
|
+
"""Upload data to cloud storage using smart-open streaming for memory efficiency.
|
|
503
|
+
|
|
504
|
+
This function leverages smart-open's streaming capabilities while maintaining
|
|
505
|
+
our DSR-specific business logic for package splitting and attachment processing.
|
|
506
|
+
All data is streamed directly from source to destination without local storage.
|
|
507
|
+
|
|
508
|
+
Args:
|
|
509
|
+
data: Data to upload
|
|
510
|
+
config: Upload configuration
|
|
511
|
+
privacy_request: Privacy request object
|
|
512
|
+
document: Optional document (not yet implemented)
|
|
513
|
+
buffer_config: Buffer configuration
|
|
514
|
+
batch_size: Number of attachments to process in each batch
|
|
515
|
+
|
|
516
|
+
Returns:
|
|
517
|
+
presigned_url or None if URL generation fails
|
|
518
|
+
|
|
519
|
+
Raises:
|
|
520
|
+
ValueError: If privacy_request is not provided
|
|
521
|
+
NotImplementedError: If document-only upload is attempted
|
|
522
|
+
StorageUploadError: If upload fails
|
|
523
|
+
"""
|
|
524
|
+
self._validate_upload_inputs(privacy_request, document)
|
|
525
|
+
if not privacy_request:
|
|
526
|
+
raise ValueError("Privacy request must be provided")
|
|
527
|
+
|
|
528
|
+
# Use default buffer config if none provided
|
|
529
|
+
if buffer_config is None:
|
|
530
|
+
buffer_config = StreamingBufferConfig()
|
|
531
|
+
|
|
532
|
+
try:
|
|
533
|
+
if config.resp_format in [
|
|
534
|
+
ResponseFormat.csv.value,
|
|
535
|
+
ResponseFormat.json.value,
|
|
536
|
+
]:
|
|
537
|
+
return self._handle_data_format_upload(
|
|
538
|
+
config, data, privacy_request, buffer_config, batch_size
|
|
539
|
+
)
|
|
540
|
+
if config.resp_format == ResponseFormat.html.value:
|
|
541
|
+
return self._handle_html_format_upload(
|
|
542
|
+
config, data, privacy_request, buffer_config, batch_size
|
|
543
|
+
)
|
|
544
|
+
raise ValueError(f"Unsupported response format: {config.resp_format}")
|
|
545
|
+
|
|
546
|
+
except (ValueError, NotImplementedError):
|
|
547
|
+
# Re-raise validation errors as-is - these are user errors, not system errors
|
|
548
|
+
raise
|
|
549
|
+
except StorageUploadError:
|
|
550
|
+
# Re-raise storage errors as-is
|
|
551
|
+
raise
|
|
552
|
+
except Exception as e:
|
|
553
|
+
# Log unexpected errors and wrap them in StorageUploadError
|
|
554
|
+
logger.error(f"Unexpected error during storage upload: {e}", exc_info=True)
|
|
555
|
+
raise StorageUploadError(
|
|
556
|
+
f"Storage upload failed due to unexpected error: {e}"
|
|
557
|
+
) from e
|
|
558
|
+
|
|
559
|
+
def _validate_upload_inputs(
|
|
560
|
+
self, privacy_request: Optional[PrivacyRequest], document: Optional[Any]
|
|
561
|
+
) -> None:
|
|
562
|
+
"""Validate upload input parameters.
|
|
563
|
+
|
|
564
|
+
Args:
|
|
565
|
+
privacy_request: Privacy request object
|
|
566
|
+
document: Optional document
|
|
567
|
+
|
|
568
|
+
Raises:
|
|
569
|
+
ValueError: If privacy_request is not provided
|
|
570
|
+
NotImplementedError: If document-only upload is attempted
|
|
571
|
+
"""
|
|
572
|
+
if not privacy_request:
|
|
573
|
+
raise ValueError("Privacy request must be provided")
|
|
574
|
+
|
|
575
|
+
if document:
|
|
576
|
+
raise NotImplementedError("Document-only uploads not yet implemented")
|
|
577
|
+
|
|
578
|
+
def _handle_data_format_upload(
|
|
579
|
+
self,
|
|
580
|
+
config: StorageUploadConfig,
|
|
581
|
+
data: dict,
|
|
582
|
+
privacy_request: PrivacyRequest,
|
|
583
|
+
buffer_config: StreamingBufferConfig,
|
|
584
|
+
batch_size: int,
|
|
585
|
+
) -> Optional[AnyHttpUrlString]:
|
|
586
|
+
"""Handle CSV/JSON format uploads.
|
|
587
|
+
|
|
588
|
+
Args:
|
|
589
|
+
config: Upload configuration
|
|
590
|
+
data: Data to upload
|
|
591
|
+
privacy_request: Privacy request object
|
|
592
|
+
buffer_config: Buffer configuration
|
|
593
|
+
batch_size: Number of attachments to process in each batch
|
|
594
|
+
|
|
595
|
+
Returns:
|
|
596
|
+
presigned_url or None if URL generation fails
|
|
597
|
+
"""
|
|
598
|
+
self._stream_attachments_to_storage_zip(
|
|
599
|
+
config.bucket_name,
|
|
600
|
+
config.file_key,
|
|
601
|
+
data,
|
|
602
|
+
privacy_request,
|
|
603
|
+
config.max_workers,
|
|
604
|
+
buffer_config,
|
|
605
|
+
batch_size,
|
|
606
|
+
config.resp_format,
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
# Generate presigned URL for the uploaded file
|
|
610
|
+
try:
|
|
611
|
+
return self.storage_client.generate_presigned_url(
|
|
612
|
+
config.bucket_name, config.file_key
|
|
613
|
+
)
|
|
614
|
+
except Exception as e:
|
|
615
|
+
logger.error(
|
|
616
|
+
f"Failed to generate presigned URL for {config.bucket_name}/{config.file_key}: {e}"
|
|
617
|
+
)
|
|
618
|
+
raise StorageUploadError(f"Failed to generate presigned URL: {e}") from e
|
|
619
|
+
|
|
620
|
+
def _handle_html_format_upload(
|
|
621
|
+
self,
|
|
622
|
+
config: StorageUploadConfig,
|
|
623
|
+
data: dict,
|
|
624
|
+
privacy_request: PrivacyRequest,
|
|
625
|
+
buffer_config: StreamingBufferConfig,
|
|
626
|
+
batch_size: int,
|
|
627
|
+
) -> Optional[AnyHttpUrlString]:
|
|
628
|
+
"""Handle HTML format uploads with DSR report generation.
|
|
629
|
+
|
|
630
|
+
Args:
|
|
631
|
+
config: Upload configuration
|
|
632
|
+
data: Data to upload
|
|
633
|
+
privacy_request: Privacy request object
|
|
634
|
+
buffer_config: Buffer configuration
|
|
635
|
+
batch_size: Number of attachments to process in each batch
|
|
636
|
+
|
|
637
|
+
Returns:
|
|
638
|
+
presigned_url or None if URL generation fails
|
|
639
|
+
"""
|
|
640
|
+
# Generate the DSR report first
|
|
641
|
+
try:
|
|
642
|
+
dsr_buffer = DsrReportBuilder(
|
|
643
|
+
privacy_request=privacy_request,
|
|
644
|
+
dsr_data=data,
|
|
645
|
+
).generate()
|
|
646
|
+
# Reset buffer position to ensure it can be read multiple times
|
|
647
|
+
dsr_buffer.seek(0)
|
|
648
|
+
except Exception as e:
|
|
649
|
+
logger.error(f"Failed to generate DSR report: {e}")
|
|
650
|
+
raise StorageUploadError(f"Failed to generate DSR report: {e}") from e
|
|
651
|
+
|
|
652
|
+
# Check if there are attachments to include
|
|
653
|
+
all_attachments = self._collect_and_validate_attachments(data)
|
|
654
|
+
|
|
655
|
+
if not all_attachments:
|
|
656
|
+
# No attachments, just upload the DSR report
|
|
657
|
+
stream_dsr_buffer_to_storage(
|
|
658
|
+
self.storage_client,
|
|
659
|
+
config.bucket_name,
|
|
660
|
+
config.file_key,
|
|
661
|
+
dsr_buffer,
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
try:
|
|
665
|
+
return self.storage_client.generate_presigned_url(
|
|
666
|
+
config.bucket_name, config.file_key
|
|
667
|
+
)
|
|
668
|
+
except Exception as e:
|
|
669
|
+
logger.error(
|
|
670
|
+
f"Failed to generate presigned URL for {config.bucket_name}/{config.file_key}: {e}"
|
|
671
|
+
)
|
|
672
|
+
raise StorageUploadError(
|
|
673
|
+
f"Failed to generate presigned URL: {e}"
|
|
674
|
+
) from e
|
|
675
|
+
logger.debug(
|
|
676
|
+
f"Creating HTML DSR report ZIP with {len(all_attachments)} attachments"
|
|
677
|
+
)
|
|
678
|
+
|
|
679
|
+
# Create ZIP generator with DSR report files
|
|
680
|
+
dsr_files_generator = create_dsr_report_files_generator(
|
|
681
|
+
dsr_buffer,
|
|
682
|
+
all_attachments,
|
|
683
|
+
config.bucket_name,
|
|
684
|
+
config.max_workers,
|
|
685
|
+
batch_size,
|
|
686
|
+
)
|
|
687
|
+
|
|
688
|
+
# Create ZIP generator with attachment files
|
|
689
|
+
attachment_files_generator = self._create_attachment_files(all_attachments)
|
|
690
|
+
|
|
691
|
+
# Combine both generators and stream the complete ZIP to storage
|
|
692
|
+
combined_entries = chain(attachment_files_generator, dsr_files_generator)
|
|
693
|
+
with self.storage_client.stream_upload(
|
|
694
|
+
config.bucket_name,
|
|
695
|
+
config.file_key,
|
|
696
|
+
content_type="application/zip",
|
|
697
|
+
) as upload_stream:
|
|
698
|
+
for chunk in stream_zip(combined_entries):
|
|
699
|
+
upload_stream.write(chunk)
|
|
700
|
+
|
|
701
|
+
logger.debug(
|
|
702
|
+
f"Successfully uploaded HTML DSR report ZIP with attachments: {config.file_key}"
|
|
703
|
+
)
|
|
704
|
+
|
|
705
|
+
# Generate presigned URL for the uploaded file
|
|
706
|
+
try:
|
|
707
|
+
return self.storage_client.generate_presigned_url(
|
|
708
|
+
config.bucket_name, config.file_key
|
|
709
|
+
)
|
|
710
|
+
except Exception as e:
|
|
711
|
+
logger.error(
|
|
712
|
+
f"Failed to generate presigned URL for {config.bucket_name}/{config.file_key}: {e}"
|
|
713
|
+
)
|
|
714
|
+
raise StorageUploadError(f"Failed to generate presigned URL: {e}") from e
|
|
715
|
+
|
|
716
|
+
@retry_cloud_storage_operation(
|
|
717
|
+
provider="smart_open_streaming",
|
|
718
|
+
operation_name="stream_attachments_to_storage_zip",
|
|
719
|
+
max_retries=2,
|
|
720
|
+
base_delay=2.0,
|
|
721
|
+
max_delay=30.0,
|
|
722
|
+
)
|
|
723
|
+
def _stream_attachments_to_storage_zip(
|
|
724
|
+
self,
|
|
725
|
+
bucket_name: str,
|
|
726
|
+
file_key: str,
|
|
727
|
+
data: dict,
|
|
728
|
+
privacy_request: PrivacyRequest,
|
|
729
|
+
max_workers: int,
|
|
730
|
+
buffer_config: StreamingBufferConfig,
|
|
731
|
+
batch_size: int,
|
|
732
|
+
resp_format: str,
|
|
733
|
+
) -> None:
|
|
734
|
+
"""Stream attachments to storage as a ZIP file using smart-open.
|
|
735
|
+
|
|
736
|
+
This method leverages smart-open's streaming capabilities for efficient memory usage.
|
|
737
|
+
Data flows directly from source storage through ZIP generation to destination storage
|
|
738
|
+
without materializing entire files in memory.
|
|
739
|
+
|
|
740
|
+
Args:
|
|
741
|
+
bucket_name: Storage bucket name
|
|
742
|
+
file_key: File key in storage
|
|
743
|
+
data: Data to upload
|
|
744
|
+
privacy_request: Privacy request object
|
|
745
|
+
max_workers: Maximum parallel workers
|
|
746
|
+
buffer_config: Buffer configuration
|
|
747
|
+
batch_size: Number of attachments to process in each batch
|
|
748
|
+
resp_format: Response format (csv, json)
|
|
749
|
+
"""
|
|
750
|
+
# Collect and validate all attachments
|
|
751
|
+
all_attachments = self._collect_and_validate_attachments(data)
|
|
752
|
+
|
|
753
|
+
if not all_attachments:
|
|
754
|
+
# No attachments, just upload the data
|
|
755
|
+
self._upload_data_only_zip(bucket_name, file_key, data, resp_format)
|
|
756
|
+
return
|
|
757
|
+
|
|
758
|
+
logger.debug(
|
|
759
|
+
f"Starting streaming ZIP creation with {len(all_attachments)} attachments in batches of {batch_size}"
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
# Create the ZIP file with data and attachments using smart-open streaming
|
|
763
|
+
zip_generator = self._create_zip_generator(
|
|
764
|
+
data,
|
|
765
|
+
all_attachments,
|
|
766
|
+
bucket_name,
|
|
767
|
+
max_workers,
|
|
768
|
+
batch_size,
|
|
769
|
+
resp_format,
|
|
770
|
+
)
|
|
771
|
+
|
|
772
|
+
# Use smart-open's streaming upload capability
|
|
773
|
+
with self.storage_client.stream_upload(
|
|
774
|
+
bucket_name, file_key, content_type="application/zip"
|
|
775
|
+
) as upload_stream:
|
|
776
|
+
for chunk in stream_zip(zip_generator):
|
|
777
|
+
upload_stream.write(chunk)
|
|
778
|
+
|
|
779
|
+
logger.debug(
|
|
780
|
+
f"Successfully created memory-efficient streaming ZIP using smart-open: {file_key}"
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
def _upload_data_only_zip(
|
|
784
|
+
self, bucket_name: str, file_key: str, data: dict, resp_format: str
|
|
785
|
+
) -> None:
|
|
786
|
+
"""Upload data-only ZIP file (no attachments) using smart-open.
|
|
787
|
+
|
|
788
|
+
Args:
|
|
789
|
+
bucket_name: Storage bucket name
|
|
790
|
+
file_key: File key in storage
|
|
791
|
+
data: Data to upload
|
|
792
|
+
resp_format: Response format
|
|
793
|
+
"""
|
|
794
|
+
logger.debug("Creating data-only ZIP file (no attachments)")
|
|
795
|
+
|
|
796
|
+
# Create data files generator
|
|
797
|
+
data_files_generator = self._create_data_files(data, resp_format)
|
|
798
|
+
|
|
799
|
+
# Convert to stream_zip format
|
|
800
|
+
zip_generator = self._convert_to_stream_zip_format(data_files_generator)
|
|
801
|
+
|
|
802
|
+
# Use smart-open streaming upload
|
|
803
|
+
with self.storage_client.stream_upload(
|
|
804
|
+
bucket_name, file_key, content_type="application/zip"
|
|
805
|
+
) as upload_stream:
|
|
806
|
+
for chunk in stream_zip(zip_generator):
|
|
807
|
+
upload_stream.write(chunk)
|
|
808
|
+
|
|
809
|
+
logger.debug(f"Successfully uploaded data-only ZIP: {file_key}")
|
|
810
|
+
|
|
811
|
+
def _create_zip_generator(
|
|
812
|
+
self,
|
|
813
|
+
data: dict,
|
|
814
|
+
all_attachments: list[AttachmentProcessingInfo],
|
|
815
|
+
bucket_name: str,
|
|
816
|
+
max_workers: int,
|
|
817
|
+
batch_size: int,
|
|
818
|
+
resp_format: str,
|
|
819
|
+
) -> Generator[Tuple[str, datetime, int, Any, Iterable[bytes]], None, None]:
|
|
820
|
+
"""Create a generator for ZIP file contents including data and attachments.
|
|
821
|
+
|
|
822
|
+
Args:
|
|
823
|
+
data: Data to include in the ZIP
|
|
824
|
+
all_attachments: List of validated attachments
|
|
825
|
+
bucket_name: Storage bucket name
|
|
826
|
+
max_workers: Maximum parallel workers
|
|
827
|
+
batch_size: Number of attachments to process in each batch
|
|
828
|
+
resp_format: Response format
|
|
829
|
+
|
|
830
|
+
Returns:
|
|
831
|
+
Generator yielding ZIP file entries in stream_zip format
|
|
832
|
+
"""
|
|
833
|
+
logger.debug(f"Creating ZIP generator with {len(all_attachments)} attachments")
|
|
834
|
+
|
|
835
|
+
# For HTML format, data files are not needed as the DSR report contains the HTML content
|
|
836
|
+
if resp_format.lower() != "html":
|
|
837
|
+
# First, yield data files (convert to stream_zip format and stream directly)
|
|
838
|
+
data_files_generator = self._create_data_files(
|
|
839
|
+
data, resp_format, all_attachments
|
|
840
|
+
)
|
|
841
|
+
logger.debug("Yielding data files for ZIP")
|
|
842
|
+
yield from self._convert_to_stream_zip_format(data_files_generator)
|
|
843
|
+
|
|
844
|
+
# Then, yield attachment files (already in stream_zip format, stream directly)
|
|
845
|
+
attachment_files_generator = self._create_attachment_files(all_attachments)
|
|
846
|
+
logger.debug("Yielding attachment files for ZIP")
|
|
847
|
+
yield from attachment_files_generator
|
|
848
|
+
|
|
849
|
+
def _create_data_files(
|
|
850
|
+
self,
|
|
851
|
+
data: dict,
|
|
852
|
+
resp_format: str = "json",
|
|
853
|
+
all_attachments: Optional[list[AttachmentProcessingInfo]] = None,
|
|
854
|
+
) -> Generator[Tuple[str, BytesIO, dict[str, Any]], None, None]:
|
|
855
|
+
"""Create data files (JSON/CSV) from the input data based on resp_format configuration."""
|
|
856
|
+
|
|
857
|
+
# Transform data to use internal access package URLs if attachments are provided
|
|
858
|
+
if all_attachments:
|
|
859
|
+
data = self._transform_data_for_access_package(data, all_attachments)
|
|
860
|
+
|
|
861
|
+
for key, value in data.items():
|
|
862
|
+
if isinstance(value, list) and value:
|
|
863
|
+
# Use the configured response format instead of making decisions based on content
|
|
864
|
+
if resp_format.lower() == "json":
|
|
865
|
+
data_content = json.dumps(value, default=str).encode("utf-8")
|
|
866
|
+
yield f"{key}.json", BytesIO(data_content), {}
|
|
867
|
+
elif resp_format.lower() == "csv":
|
|
868
|
+
csv_buffer = StringIO()
|
|
869
|
+
if value and isinstance(value[0], dict):
|
|
870
|
+
writer = csv.DictWriter(csv_buffer, fieldnames=value[0].keys())
|
|
871
|
+
writer.writeheader()
|
|
872
|
+
writer.writerows(value)
|
|
873
|
+
data_content = csv_buffer.getvalue().encode("utf-8")
|
|
874
|
+
yield f"{key}.csv", BytesIO(data_content), {}
|
|
875
|
+
else:
|
|
876
|
+
# Fallback to JSON for non-dict list items when CSV is requested
|
|
877
|
+
data_content = json.dumps(value, default=str).encode("utf-8")
|
|
878
|
+
yield f"{key}.json", BytesIO(data_content), {}
|
|
879
|
+
elif resp_format.lower() == "html":
|
|
880
|
+
# HTML format typically uses JSON for data files since HTML is for the report itself
|
|
881
|
+
data_content = json.dumps(value, default=str).encode("utf-8")
|
|
882
|
+
yield f"{key}.json", BytesIO(data_content), {}
|
|
883
|
+
else:
|
|
884
|
+
# Default to JSON for unsupported formats
|
|
885
|
+
data_content = json.dumps(value, default=str).encode("utf-8")
|
|
886
|
+
yield f"{key}.json", BytesIO(data_content), {}
|
|
887
|
+
|
|
888
|
+
def _create_attachment_files(
|
|
889
|
+
self,
|
|
890
|
+
all_attachments: list[AttachmentProcessingInfo],
|
|
891
|
+
) -> Generator[Tuple[str, datetime, int, Any, Iterable[bytes]], None, None]:
|
|
892
|
+
"""Create attachment files for the ZIP using true cloud-to-cloud streaming.
|
|
893
|
+
|
|
894
|
+
This method yields stream_zip format entries without loading entire files to memory.
|
|
895
|
+
Each attachment is processed as a streaming iterator that yields chunks directly
|
|
896
|
+
from source storage to ZIP generation.
|
|
897
|
+
|
|
898
|
+
Args:
|
|
899
|
+
all_attachments: List of validated attachments
|
|
900
|
+
|
|
901
|
+
Returns:
|
|
902
|
+
Generator yielding attachment file entries in stream_zip format
|
|
903
|
+
"""
|
|
904
|
+
for attachment_info in all_attachments:
|
|
905
|
+
result = self._process_attachment_safely(attachment_info)
|
|
906
|
+
yield result
|
|
907
|
+
|
|
908
|
+
def _transform_data_for_access_package(
|
|
909
|
+
self, data: dict[str, Any], all_attachments: list[AttachmentProcessingInfo]
|
|
910
|
+
) -> dict[str, Any]:
|
|
911
|
+
"""
|
|
912
|
+
Transform the data structure to replace download URLs with internal access package paths.
|
|
913
|
+
This ensures that when data is serialized to JSON/CSV, it contains internal references
|
|
914
|
+
instead of external download URLs.
|
|
915
|
+
"""
|
|
916
|
+
if not all_attachments:
|
|
917
|
+
return data
|
|
918
|
+
|
|
919
|
+
# Create a simple mapping of original URLs to internal paths
|
|
920
|
+
url_mapping = {
|
|
921
|
+
attachment.attachment.storage_key: f"attachments/{attachment.attachment.file_name or f'attachment_{id(attachment.attachment)}'}"
|
|
922
|
+
for attachment in all_attachments
|
|
923
|
+
if attachment.attachment.storage_key.startswith(("http://", "https://"))
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
if not url_mapping:
|
|
927
|
+
return data
|
|
928
|
+
|
|
929
|
+
# Simple recursive replacement
|
|
930
|
+
def replace_urls(obj: Any) -> Any:
|
|
931
|
+
if isinstance(obj, dict):
|
|
932
|
+
return {k: replace_urls(v) for k, v in obj.items()}
|
|
933
|
+
if isinstance(obj, list):
|
|
934
|
+
return [replace_urls(item) for item in obj]
|
|
935
|
+
if isinstance(obj, str) and obj in url_mapping:
|
|
936
|
+
return url_mapping[obj]
|
|
937
|
+
return obj
|
|
938
|
+
|
|
939
|
+
return replace_urls(data)
|
|
940
|
+
|
|
941
|
+
def _process_attachment_safely(
|
|
942
|
+
self,
|
|
943
|
+
attachment_info: AttachmentProcessingInfo,
|
|
944
|
+
) -> tuple[str, datetime, int, Any, Iterable[bytes]]:
|
|
945
|
+
"""Process attachment with consistent error handling.
|
|
946
|
+
|
|
947
|
+
Args:
|
|
948
|
+
attachment_info: Attachment processing information
|
|
949
|
+
|
|
950
|
+
Returns:
|
|
951
|
+
Stream ZIP format tuple
|
|
952
|
+
|
|
953
|
+
Raises:
|
|
954
|
+
StorageUploadError: If attachment processing fails for any reason
|
|
955
|
+
"""
|
|
956
|
+
try:
|
|
957
|
+
storage_key = attachment_info.attachment.storage_key
|
|
958
|
+
|
|
959
|
+
try:
|
|
960
|
+
source_bucket, source_key = self._parse_storage_url(storage_key)
|
|
961
|
+
logger.debug(
|
|
962
|
+
f"Parsed storage URL - bucket: {source_bucket}, key: {source_key}"
|
|
963
|
+
)
|
|
964
|
+
except ValueError as e:
|
|
965
|
+
logger.error(f"Could not parse storage URL: {storage_key} - {e}")
|
|
966
|
+
raise StorageUploadError(
|
|
967
|
+
f"Could not parse storage URL: {storage_key} - {e}"
|
|
968
|
+
) from e
|
|
969
|
+
|
|
970
|
+
file_path = f"{attachment_info.base_path}/{attachment_info.attachment.file_name or DEFAULT_ATTACHMENT_NAME}"
|
|
971
|
+
|
|
972
|
+
try:
|
|
973
|
+
content_stream = self._create_attachment_content_stream(
|
|
974
|
+
source_bucket, source_key, storage_key
|
|
975
|
+
)
|
|
976
|
+
return (
|
|
977
|
+
file_path,
|
|
978
|
+
datetime.now(),
|
|
979
|
+
DEFAULT_FILE_MODE,
|
|
980
|
+
_ZIP_32_TYPE(),
|
|
981
|
+
content_stream,
|
|
982
|
+
)
|
|
983
|
+
except Exception as e:
|
|
984
|
+
logger.error(
|
|
985
|
+
f"Failed to create content stream for attachment {storage_key}: {e}"
|
|
986
|
+
)
|
|
987
|
+
raise StorageUploadError(
|
|
988
|
+
f"Failed to create content stream for attachment: {e}"
|
|
989
|
+
) from e
|
|
990
|
+
|
|
991
|
+
except Exception as e:
|
|
992
|
+
logger.error(
|
|
993
|
+
f"Failed to process attachment {attachment_info.attachment.storage_key}: {e}",
|
|
994
|
+
exc_info=True,
|
|
995
|
+
)
|
|
996
|
+
raise StorageUploadError(
|
|
997
|
+
f"Failed to process attachment {attachment_info.attachment.storage_key}: {e}"
|
|
998
|
+
) from e
|