ethyca-fides 2.68.1b1__py2.py3-none-any.whl → 2.68.1b3__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ethyca-fides might be problematic. Click here for more details.

Files changed (309) hide show
  1. {ethyca_fides-2.68.1b1.dist-info → ethyca_fides-2.68.1b3.dist-info}/METADATA +3 -1
  2. {ethyca_fides-2.68.1b1.dist-info → ethyca_fides-2.68.1b3.dist-info}/RECORD +247 -224
  3. fides/_version.py +3 -3
  4. fides/api/alembic/migrations/versions/3baf42d251a6_add_generic_taxonomy_models.py +239 -0
  5. fides/api/alembic/migrations/versions/90502bcda282_update_request_tasks_add_polling_async.py +35 -0
  6. fides/api/api/v1/endpoints/generic_overrides.py +64 -167
  7. fides/api/api/v1/endpoints/privacy_request_endpoints.py +1 -1
  8. fides/api/common_exceptions.py +12 -3
  9. fides/api/db/base.py +6 -0
  10. fides/api/models/detection_discovery/core.py +6 -0
  11. fides/api/models/privacy_request/request_task.py +25 -0
  12. fides/api/models/taxonomy.py +275 -0
  13. fides/api/schemas/privacy_center_config.py +48 -19
  14. fides/api/schemas/storage/storage.py +2 -0
  15. fides/api/service/async_dsr/__init__.py +0 -0
  16. fides/api/service/async_dsr/async_dsr_service.py +75 -0
  17. fides/api/service/connectors/saas_connector.py +5 -6
  18. fides/api/service/deps.py +5 -0
  19. fides/api/service/privacy_request/dsr_package/dsr_report_builder.py +6 -4
  20. fides/api/service/privacy_request/request_service.py +56 -3
  21. fides/api/service/storage/storage_uploader_service.py +80 -5
  22. fides/api/service/storage/streaming/__init__.py +42 -0
  23. fides/api/service/storage/streaming/base_storage_client.py +61 -0
  24. fides/api/service/storage/streaming/dsr_storage.py +98 -0
  25. fides/api/service/storage/streaming/retry.py +282 -0
  26. fides/api/service/storage/streaming/s3/__init__.py +5 -0
  27. fides/api/service/storage/streaming/s3/s3_storage_client.py +113 -0
  28. fides/api/service/storage/streaming/s3/streaming_s3.py +196 -0
  29. fides/api/service/storage/streaming/schemas.py +173 -0
  30. fides/api/service/storage/streaming/smart_open_client.py +265 -0
  31. fides/api/service/storage/streaming/smart_open_streaming_storage.py +998 -0
  32. fides/api/service/storage/streaming/storage_client_factory.py +60 -0
  33. fides/api/task/graph_task.py +4 -4
  34. fides/api/task/manual/manual_task_graph_task.py +14 -4
  35. fides/api/util/connection_type.py +68 -33
  36. fides/config/execution_settings.py +4 -0
  37. fides/data/sample_project/docker-compose.yml +3 -3
  38. fides/service/privacy_request/privacy_request_service.py +1 -9
  39. fides/service/taxonomy/__init__.py +0 -0
  40. fides/service/taxonomy/handlers/__init__.py +11 -0
  41. fides/service/taxonomy/handlers/base.py +42 -0
  42. fides/service/taxonomy/handlers/legacy_handler.py +95 -0
  43. fides/service/taxonomy/taxonomy_service.py +261 -0
  44. fides/service/taxonomy/utils.py +160 -0
  45. fides/ui-build/static/admin/404.html +1 -1
  46. fides/ui-build/static/admin/_next/static/_BLI2ArqQzY5XnXbrcxa2/_buildManifest.js +1 -0
  47. fides/ui-build/static/admin/_next/static/chunks/1099-7b2085a3931da9e4.js +1 -0
  48. fides/ui-build/static/admin/_next/static/chunks/1138-0d846ffef62c580f.js +1 -0
  49. fides/ui-build/static/admin/_next/static/chunks/1345-ab756811e19ff4fc.js +1 -0
  50. fides/ui-build/static/admin/_next/static/chunks/{1817-c90365325f8a3d75.js → 1817-fd21f1f5ef0faffa.js} +1 -1
  51. fides/ui-build/static/admin/_next/static/chunks/{1975.e5cc7a1ccd477671.js → 1975.16126463309143e3.js} +1 -1
  52. fides/ui-build/static/admin/_next/static/chunks/{2921-46f9465c2852a46b.js → 2921-0e5cc63a82e31830.js} +1 -1
  53. fides/ui-build/static/admin/_next/static/chunks/3620-6cceae71bae5b531.js +1 -0
  54. fides/ui-build/static/admin/_next/static/chunks/3729-7d2d52400f1f7413.js +1 -0
  55. fides/ui-build/static/admin/_next/static/chunks/3855-64541570e2f838fb.js +1 -0
  56. fides/ui-build/static/admin/_next/static/chunks/3872-7a18d18a5e287e4e.js +1 -0
  57. fides/ui-build/static/admin/_next/static/chunks/{3923-a33633feba5e655e.js → 3923-5c87b3d7f1626678.js} +1 -1
  58. fides/ui-build/static/admin/_next/static/chunks/{401-741bb31b586b7c96.js → 401-3902e3e98790d401.js} +1 -1
  59. fides/ui-build/static/admin/_next/static/chunks/{4121-94354b50a41f8497.js → 4121-64ef70ef906bbdd0.js} +1 -1
  60. fides/ui-build/static/admin/_next/static/chunks/431-86ad2beeb93c95c9.js +1 -0
  61. fides/ui-build/static/admin/_next/static/chunks/4608-70521532195124de.js +1 -0
  62. fides/ui-build/static/admin/_next/static/chunks/4786-53ef1662f2d0d98c.js +1 -0
  63. fides/ui-build/static/admin/_next/static/chunks/4808-8713433c84a62efe.js +1 -0
  64. fides/ui-build/static/admin/_next/static/chunks/4844-351f99b6644b654e.js +1 -0
  65. fides/ui-build/static/admin/_next/static/chunks/5258-c6f96dc740eb5fb1.js +1 -0
  66. fides/ui-build/static/admin/_next/static/chunks/5487-338800277d36b8d7.js +1 -0
  67. fides/ui-build/static/admin/_next/static/chunks/549-e6453a3526023e85.js +1 -0
  68. fides/ui-build/static/admin/_next/static/chunks/602-80d113e801d7407d.js +1 -0
  69. fides/ui-build/static/admin/_next/static/chunks/{6084-02abe12327fc3dbc.js → 6084-da63f20d9416a982.js} +1 -1
  70. fides/ui-build/static/admin/_next/static/chunks/{6853-270261ef5537a106.js → 6853-1d947b75eb07188c.js} +1 -1
  71. fides/ui-build/static/admin/_next/static/chunks/6954-24f9a4f27d67b732.js +1 -0
  72. fides/ui-build/static/admin/_next/static/chunks/7476-a0dd03bfccf60d0c.js +1 -0
  73. fides/ui-build/static/admin/_next/static/chunks/7630-9fbe06cfb98266fe.js +1 -0
  74. fides/ui-build/static/admin/_next/static/chunks/{787-5ba991cad1f7664a.js → 787-3dd31844cf7fec55.js} +1 -1
  75. fides/ui-build/static/admin/_next/static/chunks/79-dcd20e8b09501c17.js +1 -0
  76. fides/ui-build/static/admin/_next/static/chunks/796-8773e04b64ce2260.js +1 -0
  77. fides/ui-build/static/admin/_next/static/chunks/8002-dcd02da6e5649a1c.js +1 -0
  78. fides/ui-build/static/admin/_next/static/chunks/9046-57eab238570b8bf4.js +1 -0
  79. fides/ui-build/static/admin/_next/static/chunks/9676.bf0a8a6ff6dfd2af.js +1 -0
  80. fides/ui-build/static/admin/_next/static/chunks/{9826-8c81c97a72510fcf.js → 9826-756c958aecab59a2.js} +1 -1
  81. fides/ui-build/static/admin/_next/static/chunks/9951-cdf73904a3adb27b.js +1 -0
  82. fides/ui-build/static/admin/_next/static/chunks/pages/{404-9174cdb70126c2c5.js → 404-dd625a559ada46ca.js} +1 -1
  83. fides/ui-build/static/admin/_next/static/chunks/pages/{_app-65723cd4b8fc36ac.js → _app-b6b09b2878b77b21.js} +136 -135
  84. fides/ui-build/static/admin/_next/static/chunks/pages/add-systems/{manual-621416493c89ef01.js → manual-92cf5e313be1f9e2.js} +1 -1
  85. fides/ui-build/static/admin/_next/static/chunks/pages/add-systems/{multiple-0b9908c3e1dfe49e.js → multiple-d6c525ee731a2993.js} +1 -1
  86. fides/ui-build/static/admin/_next/static/chunks/pages/add-systems-5664a3ea796e5ffb.js +1 -0
  87. fides/ui-build/static/admin/_next/static/chunks/pages/consent/configure/{add-vendors-5bb1b31ae8752250.js → add-vendors-78f13de90111fd80.js} +1 -1
  88. fides/ui-build/static/admin/_next/static/chunks/pages/consent/configure-0fc678f3d6d2fcec.js +1 -0
  89. fides/ui-build/static/admin/_next/static/chunks/pages/consent/privacy-experience/{[id]-4e4d9426743b5cb4.js → [id]-126db59dc25ca326.js} +1 -1
  90. fides/ui-build/static/admin/_next/static/chunks/pages/consent/{privacy-experience-d72460348fadcab8.js → privacy-experience-289605267d6cce7e.js} +1 -1
  91. fides/ui-build/static/admin/_next/static/chunks/pages/consent/privacy-notices/{[id]-3e7ddc252da00c98.js → [id]-e9fd9b28ac9705af.js} +1 -1
  92. fides/ui-build/static/admin/_next/static/chunks/pages/consent/privacy-notices/{new-35a7c305beee9428.js → new-28c003b6043bd16c.js} +1 -1
  93. fides/ui-build/static/admin/_next/static/chunks/pages/consent/privacy-notices-c643eff04525298e.js +1 -0
  94. fides/ui-build/static/admin/_next/static/chunks/pages/consent/{properties-ab96939421639153.js → properties-3ef5d01779a26455.js} +1 -1
  95. fides/ui-build/static/admin/_next/static/chunks/pages/consent/reporting-baa4a2f8f08ac224.js +1 -0
  96. fides/ui-build/static/admin/_next/static/chunks/pages/{consent-13240e3ca77acfeb.js → consent-8d4be9e7ec7d2a35.js} +1 -1
  97. fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog/[systemId]/projects/[projectUrn]/{[resourceUrn]-aad6047a4604b945.js → [resourceUrn]-f27ec4578c674181.js} +1 -1
  98. fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog/[systemId]/projects/{[projectUrn]-bd37b407c80c6986.js → [projectUrn]-27b6c255bd9e73b6.js} +1 -1
  99. fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog/[systemId]/projects-0f66dac32040519c.js +1 -0
  100. fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog/[systemId]/resources/{[resourceUrn]-b6b98cea25dd94fa.js → [resourceUrn]-3b938562df81c4b0.js} +1 -1
  101. fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog-ebf5e7fa4e2ffb49.js +1 -0
  102. fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center/[monitorId]/[systemId]-b27c660039d951c9.js +1 -0
  103. fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center/[monitorId]-8ce5d24af470888e.js +1 -0
  104. fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center-8e35e33928abbcdc.js +1 -0
  105. fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/activity-21c141279e66237a.js +1 -0
  106. fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/detection/{[resourceUrn]-31e6c54794a9883e.js → [resourceUrn]-3bc6a207693fd175.js} +1 -1
  107. fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/{detection-2822a423a7ad0550.js → detection-da16e73df395ad1d.js} +1 -1
  108. fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/discovery/{[resourceUrn]-f98dd251babb7e28.js → [resourceUrn]-04b242632a114405.js} +1 -1
  109. fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/{discovery-56eb4c014f0d96a3.js → discovery-900fe50183a40d72.js} +1 -1
  110. fides/ui-build/static/admin/_next/static/chunks/pages/{datamap-8f88dc31c5144ea8.js → datamap-4f1f7c3a9531a8f4.js} +1 -1
  111. fides/ui-build/static/admin/_next/static/chunks/pages/dataset/[datasetId]/[collectionName]/[...subfieldNames]-343294dcb10d9532.js +1 -0
  112. fides/ui-build/static/admin/_next/static/chunks/pages/dataset/[datasetId]/[collectionName]-1c097a0809fa5b6f.js +1 -0
  113. fides/ui-build/static/admin/_next/static/chunks/pages/dataset/[datasetId]-b47fa2498b534719.js +1 -0
  114. fides/ui-build/static/admin/_next/static/chunks/pages/dataset/new-a31f881cab25704a.js +1 -0
  115. fides/ui-build/static/admin/_next/static/chunks/pages/dataset-858c59c9e67e318d.js +1 -0
  116. fides/ui-build/static/admin/_next/static/chunks/pages/datastore-connection/{[id]-67a7fe58b96ea739.js → [id]-16c28d272225afb6.js} +1 -1
  117. fides/ui-build/static/admin/_next/static/chunks/pages/datastore-connection/{new-90a8df230cb89877.js → new-68f502d8b0b5792c.js} +1 -1
  118. fides/ui-build/static/admin/_next/static/chunks/pages/datastore-connection-1eb9acb17b133fd1.js +1 -0
  119. fides/ui-build/static/admin/_next/static/chunks/pages/{index-876bfd7210040cec.js → index-fec557d99211f577.js} +1 -1
  120. fides/ui-build/static/admin/_next/static/chunks/pages/integrations/{[id]-766e57bcf38b5b1e.js → [id]-e613543818d6cbd2.js} +1 -1
  121. fides/ui-build/static/admin/_next/static/chunks/pages/integrations-8069f7c33695fd45.js +1 -0
  122. fides/ui-build/static/admin/_next/static/chunks/pages/messaging/{[id]-6e796c3fe632280b.js → [id]-4a08ca7762a19700.js} +1 -1
  123. fides/ui-build/static/admin/_next/static/chunks/pages/messaging/{add-template-fa0f3841c5bdfdeb.js → add-template-343a965dcdb3d11e.js} +1 -1
  124. fides/ui-build/static/admin/_next/static/chunks/pages/messaging-3ade4c54b1c8a11e.js +1 -0
  125. fides/ui-build/static/admin/_next/static/chunks/pages/poc/ant-components-9103bfb854f71410.js +1 -0
  126. fides/ui-build/static/admin/_next/static/chunks/pages/poc/form-experiments/{AntForm-11503454a62d8d7b.js → AntForm-3b97029bd4d3c3ea.js} +1 -1
  127. fides/ui-build/static/admin/_next/static/chunks/pages/poc/form-experiments/{FormikAntFormItem-a504941807bdb7f1.js → FormikAntFormItem-9d9beb8f0d8a278c.js} +1 -1
  128. fides/ui-build/static/admin/_next/static/chunks/pages/poc/form-experiments/{FormikControlled-0119403c8ff97f83.js → FormikControlled-84a4d8fc60f839ed.js} +1 -1
  129. fides/ui-build/static/admin/_next/static/chunks/pages/poc/form-experiments/{FormikField-94f6d57d6c94ddf7.js → FormikField-1fccf542ab2e33bf.js} +1 -1
  130. fides/ui-build/static/admin/_next/static/chunks/pages/poc/{forms-ed1a3ae09d72df89.js → forms-aa75263ae1ba67bb.js} +1 -1
  131. fides/ui-build/static/admin/_next/static/chunks/pages/poc/table-migration-db334a1cbb102255.js +1 -0
  132. fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/[id]-11c1e4545c8f528c.js +1 -0
  133. fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/messaging-192a986f61c23268.js +1 -0
  134. fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/storage-9216ac993d71387e.js +1 -0
  135. fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure-e55ec84d5380401d.js +1 -0
  136. fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests-48f447b31c786b80.js +1 -0
  137. fides/ui-build/static/admin/_next/static/chunks/pages/properties/{[id]-41976b28503623cd.js → [id]-a74b51b704b80cb2.js} +1 -1
  138. fides/ui-build/static/admin/_next/static/chunks/pages/properties/{add-property-cb438d8f5ec6007a.js → add-property-8d23f0c55ff6510a.js} +1 -1
  139. fides/ui-build/static/admin/_next/static/chunks/pages/{properties-b6db7036993709b3.js → properties-77acceac4f99e7af.js} +1 -1
  140. fides/ui-build/static/admin/_next/static/chunks/pages/reporting/{datamap-4bc3e281409265cc.js → datamap-e60d398e255f4e00.js} +1 -1
  141. fides/ui-build/static/admin/_next/static/chunks/pages/settings/about/alpha-6aad3f563ed03b3f.js +1 -0
  142. fides/ui-build/static/admin/_next/static/chunks/pages/settings/about-c1b8f3606d160bb1.js +1 -0
  143. fides/ui-build/static/admin/_next/static/chunks/pages/settings/consent/[configuration_id]/[purpose_id]-d9f7f78810d58d08.js +1 -0
  144. fides/ui-build/static/admin/_next/static/chunks/pages/settings/consent-ee2c7dde99b1dafb.js +1 -0
  145. fides/ui-build/static/admin/_next/static/chunks/pages/settings/custom-fields-a4dad8ca9de2d07b.js +1 -0
  146. fides/ui-build/static/admin/_next/static/chunks/pages/settings/{domain-records-386368bf7cb31771.js → domain-records-31c270d228e00581.js} +1 -1
  147. fides/ui-build/static/admin/_next/static/chunks/pages/settings/domains-996b3f250dd3ea1f.js +1 -0
  148. fides/ui-build/static/admin/_next/static/chunks/pages/settings/email-templates-ee94981326ddcbf4.js +1 -0
  149. fides/ui-build/static/admin/_next/static/chunks/pages/settings/{locations-b41fb5ad277088ab.js → locations-0b831c58966782b8.js} +1 -1
  150. fides/ui-build/static/admin/_next/static/chunks/pages/settings/organization-94271ba4a224a353.js +1 -0
  151. fides/ui-build/static/admin/_next/static/chunks/pages/settings/{regulations-a94dfeea43fbca7d.js → regulations-41b8136e50320fd3.js} +1 -1
  152. fides/ui-build/static/admin/_next/static/chunks/pages/systems/configure/[id]/test-datasets-52b45569cbc82e60.js +1 -0
  153. fides/ui-build/static/admin/_next/static/chunks/pages/systems/configure/{[id]-18b316e2dad73731.js → [id]-36d74e93e54aabaf.js} +1 -1
  154. fides/ui-build/static/admin/_next/static/chunks/pages/systems-24dfc8e2279ced2e.js +1 -0
  155. fides/ui-build/static/admin/_next/static/chunks/pages/taxonomy-d9675cf5e6083b27.js +1 -0
  156. fides/ui-build/static/admin/_next/static/chunks/pages/user-management/profile/{[id]-3237881945acc0ee.js → [id]-866826d7959df487.js} +1 -1
  157. fides/ui-build/static/admin/_next/static/chunks/pages/{user-management-a3a50d9d79066935.js → user-management-e63b61a8f99ccd57.js} +1 -1
  158. fides/ui-build/static/admin/_next/static/chunks/{webpack-69658aeaf6155d89.js → webpack-6d0a487039bcf30c.js} +1 -1
  159. fides/ui-build/static/admin/_next/static/css/92441453b27e9c34.css +1 -0
  160. fides/ui-build/static/admin/add-systems/manual.html +1 -1
  161. fides/ui-build/static/admin/add-systems/multiple.html +1 -1
  162. fides/ui-build/static/admin/add-systems.html +1 -1
  163. fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
  164. fides/ui-build/static/admin/consent/configure.html +1 -1
  165. fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
  166. fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
  167. fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
  168. fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
  169. fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
  170. fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
  171. fides/ui-build/static/admin/consent/properties.html +1 -1
  172. fides/ui-build/static/admin/consent/reporting.html +1 -1
  173. fides/ui-build/static/admin/consent.html +1 -1
  174. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
  175. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
  176. fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
  177. fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
  178. fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
  179. fides/ui-build/static/admin/data-catalog.html +1 -1
  180. fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
  181. fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
  182. fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
  183. fides/ui-build/static/admin/data-discovery/activity.html +1 -1
  184. fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
  185. fides/ui-build/static/admin/data-discovery/detection.html +1 -1
  186. fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
  187. fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
  188. fides/ui-build/static/admin/datamap.html +1 -1
  189. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
  190. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
  191. fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
  192. fides/ui-build/static/admin/dataset/new.html +1 -1
  193. fides/ui-build/static/admin/dataset.html +1 -1
  194. fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
  195. fides/ui-build/static/admin/datastore-connection/new.html +1 -1
  196. fides/ui-build/static/admin/datastore-connection.html +1 -1
  197. fides/ui-build/static/admin/index.html +1 -1
  198. fides/ui-build/static/admin/integrations/[id].html +1 -1
  199. fides/ui-build/static/admin/integrations.html +1 -1
  200. fides/ui-build/static/admin/lib/fides-preview.js +1 -1
  201. fides/ui-build/static/admin/lib/fides-tcf.js +2 -2
  202. fides/ui-build/static/admin/lib/fides.js +1 -1
  203. fides/ui-build/static/admin/login/[provider].html +1 -1
  204. fides/ui-build/static/admin/login.html +1 -1
  205. fides/ui-build/static/admin/messaging/[id].html +1 -1
  206. fides/ui-build/static/admin/messaging/add-template.html +1 -1
  207. fides/ui-build/static/admin/messaging.html +1 -1
  208. fides/ui-build/static/admin/poc/ant-components.html +1 -1
  209. fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
  210. fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
  211. fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
  212. fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
  213. fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
  214. fides/ui-build/static/admin/poc/forms.html +1 -1
  215. fides/ui-build/static/admin/poc/table-migration.html +1 -1
  216. fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
  217. fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
  218. fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
  219. fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
  220. fides/ui-build/static/admin/privacy-requests.html +1 -1
  221. fides/ui-build/static/admin/properties/[id].html +1 -1
  222. fides/ui-build/static/admin/properties/add-property.html +1 -1
  223. fides/ui-build/static/admin/properties.html +1 -1
  224. fides/ui-build/static/admin/reporting/datamap.html +1 -1
  225. fides/ui-build/static/admin/settings/about/alpha.html +1 -1
  226. fides/ui-build/static/admin/settings/about.html +1 -1
  227. fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
  228. fides/ui-build/static/admin/settings/consent.html +1 -1
  229. fides/ui-build/static/admin/settings/custom-fields.html +1 -1
  230. fides/ui-build/static/admin/settings/domain-records.html +1 -1
  231. fides/ui-build/static/admin/settings/domains.html +1 -1
  232. fides/ui-build/static/admin/settings/email-templates.html +1 -1
  233. fides/ui-build/static/admin/settings/locations.html +1 -1
  234. fides/ui-build/static/admin/settings/organization.html +1 -1
  235. fides/ui-build/static/admin/settings/regulations.html +1 -1
  236. fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
  237. fides/ui-build/static/admin/systems/configure/[id].html +1 -1
  238. fides/ui-build/static/admin/systems.html +1 -1
  239. fides/ui-build/static/admin/taxonomy.html +1 -1
  240. fides/ui-build/static/admin/user-management/new.html +1 -1
  241. fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
  242. fides/ui-build/static/admin/user-management.html +1 -1
  243. fides/ui-build/static/admin/_next/static/chunks/203-0c6cadcda98bdd33.js +0 -1
  244. fides/ui-build/static/admin/_next/static/chunks/3450-9314e1b15df8a8da.js +0 -1
  245. fides/ui-build/static/admin/_next/static/chunks/3855-4267fd8193e7f525.js +0 -1
  246. fides/ui-build/static/admin/_next/static/chunks/3872-ac5feefd40b61ae3.js +0 -1
  247. fides/ui-build/static/admin/_next/static/chunks/409-5bc4369b80a8c11d.js +0 -1
  248. fides/ui-build/static/admin/_next/static/chunks/4230-1ebc8c0ab293a077.js +0 -1
  249. fides/ui-build/static/admin/_next/static/chunks/431-a34d7ceff17c2169.js +0 -1
  250. fides/ui-build/static/admin/_next/static/chunks/4608-557fb24665b2e4bf.js +0 -1
  251. fides/ui-build/static/admin/_next/static/chunks/5309-ffdec884eec79d29.js +0 -1
  252. fides/ui-build/static/admin/_next/static/chunks/5574-831167a8da90e2e6.js +0 -1
  253. fides/ui-build/static/admin/_next/static/chunks/6662-499c189f932a35aa.js +0 -1
  254. fides/ui-build/static/admin/_next/static/chunks/6780-7d28e030f6516e5d.js +0 -1
  255. fides/ui-build/static/admin/_next/static/chunks/6882-7cc1d14e27a80c10.js +0 -1
  256. fides/ui-build/static/admin/_next/static/chunks/6954-7784e8d5ad6b8110.js +0 -1
  257. fides/ui-build/static/admin/_next/static/chunks/7476-4de465016d3433b4.js +0 -1
  258. fides/ui-build/static/admin/_next/static/chunks/7630-2a5c57787632693d.js +0 -1
  259. fides/ui-build/static/admin/_next/static/chunks/7725-c79513b04113112b.js +0 -1
  260. fides/ui-build/static/admin/_next/static/chunks/79-98cfab20bb831137.js +0 -1
  261. fides/ui-build/static/admin/_next/static/chunks/796-0b768155bf20505f.js +0 -1
  262. fides/ui-build/static/admin/_next/static/chunks/8735-f84afcc50885883c.js +0 -1
  263. fides/ui-build/static/admin/_next/static/chunks/9046-97a972cc8a8ed24d.js +0 -1
  264. fides/ui-build/static/admin/_next/static/chunks/9226-318dadf1c050ecda.js +0 -1
  265. fides/ui-build/static/admin/_next/static/chunks/9676.9e6828b42ef05e06.js +0 -1
  266. fides/ui-build/static/admin/_next/static/chunks/9951-4df2b67e0def5500.js +0 -1
  267. fides/ui-build/static/admin/_next/static/chunks/pages/add-systems-18e96ce81dab51a4.js +0 -1
  268. fides/ui-build/static/admin/_next/static/chunks/pages/consent/configure-54d7c7310763c66d.js +0 -1
  269. fides/ui-build/static/admin/_next/static/chunks/pages/consent/privacy-notices-6bc3b73a21576869.js +0 -1
  270. fides/ui-build/static/admin/_next/static/chunks/pages/consent/reporting-fe3d6887fecf0f86.js +0 -1
  271. fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog/[systemId]/projects-e4770acf7044e2f5.js +0 -1
  272. fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog-0db635c3483c9da8.js +0 -1
  273. fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center/[monitorId]/[systemId]-0c0e0a7798345541.js +0 -1
  274. fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center/[monitorId]-3c56e5fe072a44c6.js +0 -1
  275. fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center-53a763e49ce34a74.js +0 -1
  276. fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/activity-6a90131dcecd694c.js +0 -1
  277. fides/ui-build/static/admin/_next/static/chunks/pages/dataset/[datasetId]/[collectionName]/[...subfieldNames]-145fe9e4cfcb231d.js +0 -1
  278. fides/ui-build/static/admin/_next/static/chunks/pages/dataset/[datasetId]/[collectionName]-8a1e5d140785c1e9.js +0 -1
  279. fides/ui-build/static/admin/_next/static/chunks/pages/dataset/[datasetId]-227b5db4b472a6a7.js +0 -1
  280. fides/ui-build/static/admin/_next/static/chunks/pages/dataset/new-8401f17fe5d9a1dc.js +0 -1
  281. fides/ui-build/static/admin/_next/static/chunks/pages/dataset-7d77b3ad069be268.js +0 -1
  282. fides/ui-build/static/admin/_next/static/chunks/pages/datastore-connection-cfb25b02abb8da71.js +0 -1
  283. fides/ui-build/static/admin/_next/static/chunks/pages/integrations-3fdc55d4c129e618.js +0 -1
  284. fides/ui-build/static/admin/_next/static/chunks/pages/messaging-8f9c006b6166f002.js +0 -1
  285. fides/ui-build/static/admin/_next/static/chunks/pages/poc/ant-components-6ba7ae4f26c06cb0.js +0 -1
  286. fides/ui-build/static/admin/_next/static/chunks/pages/poc/table-migration-e8db3ad525e7ddbd.js +0 -1
  287. fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/[id]-c14dd24592369467.js +0 -1
  288. fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/messaging-100d7d03930629a8.js +0 -1
  289. fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/storage-6f8d1b3ec83cfcf0.js +0 -1
  290. fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure-3ce15577435d47cb.js +0 -1
  291. fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests-f43a988542813110.js +0 -1
  292. fides/ui-build/static/admin/_next/static/chunks/pages/settings/about/alpha-1ea40fcd6b4268bf.js +0 -1
  293. fides/ui-build/static/admin/_next/static/chunks/pages/settings/about-65c7600fadc6e55a.js +0 -1
  294. fides/ui-build/static/admin/_next/static/chunks/pages/settings/consent/[configuration_id]/[purpose_id]-33dab986141b3663.js +0 -1
  295. fides/ui-build/static/admin/_next/static/chunks/pages/settings/consent-1195042727c399ed.js +0 -1
  296. fides/ui-build/static/admin/_next/static/chunks/pages/settings/custom-fields-71b98858ecb4e097.js +0 -1
  297. fides/ui-build/static/admin/_next/static/chunks/pages/settings/domains-cf427e04f862b5d2.js +0 -1
  298. fides/ui-build/static/admin/_next/static/chunks/pages/settings/email-templates-eabeeec5bf2773c6.js +0 -1
  299. fides/ui-build/static/admin/_next/static/chunks/pages/settings/organization-ee56698ae3a6a78b.js +0 -1
  300. fides/ui-build/static/admin/_next/static/chunks/pages/systems/configure/[id]/test-datasets-0e2e98cc38ee5499.js +0 -1
  301. fides/ui-build/static/admin/_next/static/chunks/pages/systems-c32589c86081b750.js +0 -1
  302. fides/ui-build/static/admin/_next/static/chunks/pages/taxonomy-a8f09bf8f3204ca7.js +0 -1
  303. fides/ui-build/static/admin/_next/static/css/e1628f15dd5f019b.css +0 -1
  304. fides/ui-build/static/admin/_next/static/tzF4yti8NslASlGnxnZ8m/_buildManifest.js +0 -1
  305. {ethyca_fides-2.68.1b1.dist-info → ethyca_fides-2.68.1b3.dist-info}/WHEEL +0 -0
  306. {ethyca_fides-2.68.1b1.dist-info → ethyca_fides-2.68.1b3.dist-info}/entry_points.txt +0 -0
  307. {ethyca_fides-2.68.1b1.dist-info → ethyca_fides-2.68.1b3.dist-info}/licenses/LICENSE +0 -0
  308. {ethyca_fides-2.68.1b1.dist-info → ethyca_fides-2.68.1b3.dist-info}/top_level.txt +0 -0
  309. /fides/ui-build/static/admin/_next/static/{tzF4yti8NslASlGnxnZ8m → _BLI2ArqQzY5XnXbrcxa2}/_ssgManifest.js +0 -0
@@ -0,0 +1,998 @@
1
+ """Smart-open based streaming storage for efficient cloud-to-cloud data transfer."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import csv
6
+ import json
7
+ from datetime import datetime
8
+ from io import BytesIO, StringIO
9
+ from itertools import chain
10
+ from typing import Any, Generator, Iterable, Optional, Tuple
11
+ from urllib.parse import urlparse
12
+
13
+ from fideslang.validation import AnyHttpUrlString
14
+ from loguru import logger
15
+ from stream_zip import _ZIP_32_TYPE, stream_zip
16
+
17
+ from fides.api.common_exceptions import StorageUploadError
18
+ from fides.api.models.privacy_request import PrivacyRequest
19
+ from fides.api.schemas.storage.storage import ResponseFormat
20
+ from fides.api.service.privacy_request.dsr_package.dsr_report_builder import (
21
+ DsrReportBuilder,
22
+ )
23
+ from fides.api.service.storage.streaming.dsr_storage import (
24
+ create_dsr_report_files_generator,
25
+ stream_dsr_buffer_to_storage,
26
+ )
27
+ from fides.api.service.storage.streaming.retry import retry_cloud_storage_operation
28
+ from fides.api.service.storage.streaming.schemas import (
29
+ CHUNK_SIZE_THRESHOLD,
30
+ AttachmentInfo,
31
+ AttachmentProcessingInfo,
32
+ PackageSplitConfig,
33
+ StorageUploadConfig,
34
+ StreamingBufferConfig,
35
+ )
36
+ from fides.api.service.storage.streaming.smart_open_client import SmartOpenStorageClient
37
+
38
+ DEFAULT_ATTACHMENT_NAME = "attachment"
39
+ DEFAULT_FILE_MODE = 0o644
40
+ S3_AMAZONAWS_COM_DOMAIN = ".s3.amazonaws.com"
41
+
42
+
43
+ class SmartOpenStreamingStorage:
44
+ """Streaming storage implementation using smart-open for efficient cloud-to-cloud data streaming.
45
+
46
+ This class maintains our DSR-specific business logic (package splitting, attachment processing)
47
+ while leveraging smart-open's mature streaming capabilities for storage operations.
48
+
49
+ Key streaming features:
50
+ - Data files (JSON/CSV): Small files loaded into memory for ZIP creation
51
+ - Attachment files: Streamed in chunks (8KB) without loading entire files to memory
52
+ - ZIP creation: Uses stream_zip for memory-efficient ZIP generation
53
+ - Upload: Streams ZIP chunks directly to destination storage
54
+
55
+ This ensures true cloud-to-cloud streaming with minimal memory usage and no local file storage.
56
+ """
57
+
58
+ def __init__(
59
+ self,
60
+ storage_client: SmartOpenStorageClient,
61
+ chunk_size: int = CHUNK_SIZE_THRESHOLD,
62
+ ):
63
+ """Initialize with a smart-open storage client.
64
+
65
+ Args:
66
+ storage_client: Smart-open based storage client
67
+ chunk_size: Size of chunks for streaming attachments (default: 8KB)
68
+ """
69
+ self.storage_client = storage_client
70
+ self.chunk_size = chunk_size
71
+
72
+ def _parse_storage_url(self, storage_key: str) -> tuple[str, str]:
73
+ """Parse storage URL and return (bucket, key).
74
+
75
+ Supports multiple URL formats:
76
+ - s3://bucket/path
77
+ - https://bucket.s3.amazonaws.com/path
78
+ - http://bucket.s3.amazonaws.com/path
79
+ - Generic HTTP(S) URLs (returns domain as bucket, path as key)
80
+
81
+ Args:
82
+ storage_key: Storage key or URL
83
+
84
+ Returns:
85
+ Tuple of (bucket_name, object_key)
86
+
87
+ Raises:
88
+ ValueError: If URL cannot be parsed
89
+ """
90
+ if storage_key.startswith("s3://"):
91
+ # Extract bucket from S3 URL: s3://bucket/path
92
+ parts = storage_key.split("/")
93
+ if len(parts) < 4:
94
+ raise ValueError(f"Invalid S3 URL format: {storage_key}")
95
+ return parts[2], "/".join(parts[3:])
96
+
97
+ if S3_AMAZONAWS_COM_DOMAIN in storage_key:
98
+ # Extract bucket and key from HTTP(S) S3 URL
99
+ clean_url = storage_key.split("?")[0]
100
+ parts = clean_url.split(S3_AMAZONAWS_COM_DOMAIN)
101
+ if len(parts) == 2:
102
+ bucket = parts[0].replace("https://", "").replace("http://", "")
103
+ key = parts[1].lstrip(
104
+ "/"
105
+ ) # Strip leading forward slash for S3 compatibility
106
+ return bucket, key
107
+
108
+ # Handle generic HTTP(S) URLs
109
+ if storage_key.startswith(("http://", "https://")):
110
+ parsed = urlparse(storage_key)
111
+ bucket = parsed.netloc
112
+ key = parsed.path.lstrip("/")
113
+ return bucket, key
114
+
115
+ raise ValueError(f"Could not parse storage URL: {storage_key}")
116
+
117
+ def _convert_to_stream_zip_format(
118
+ self, generator: Generator[Tuple[str, BytesIO, dict[str, Any]], None, None]
119
+ ) -> Generator[Tuple[str, datetime, int, Any, Iterable[bytes]], None, None]:
120
+ """Convert generator from (filename, BytesIO, metadata) to (filename, datetime, mode, method, content_iter) format.
121
+
122
+ This adapter converts our internal generator format to the format expected by stream_zip.
123
+ For data files, we can read the entire content since they're typically small JSON/CSV files.
124
+ """
125
+ for filename, content, _ in generator:
126
+ # Reset BytesIO position and get content
127
+ content.seek(0)
128
+ content_bytes = content.read()
129
+ content.seek(0) # Reset for potential reuse
130
+
131
+ yield filename, datetime.now(), DEFAULT_FILE_MODE, _ZIP_32_TYPE(), iter(
132
+ [content_bytes]
133
+ )
134
+
135
+ def build_attachments_list(
136
+ self, data: dict, config: PackageSplitConfig
137
+ ) -> list[tuple[str, dict, int]]:
138
+ """
139
+ Build a list of attachments from the data.
140
+
141
+ Args:
142
+ data: The data to build the attachments list from
143
+ config: The configuration for package splitting
144
+
145
+ Returns:
146
+ A list of AttachmentInfo objects
147
+ """
148
+ attachments_list = []
149
+ for key, value in data.items():
150
+ if not isinstance(value, list):
151
+ continue
152
+
153
+ for item in value:
154
+ attachments = item.get("attachments", [])
155
+ if not isinstance(attachments, list):
156
+ attachments = []
157
+
158
+ attachment_count = len(attachments)
159
+
160
+ # Only include items that have attachments
161
+ if attachment_count > 0:
162
+ # If a single item has more attachments than the limit, we need to split it
163
+ if attachment_count > config.max_attachments:
164
+ # Split the item into multiple sub-items
165
+ for i in range(0, attachment_count, config.max_attachments):
166
+ sub_attachments = attachments[
167
+ i : i + config.max_attachments
168
+ ]
169
+ sub_item = item.copy()
170
+ sub_item["attachments"] = sub_attachments
171
+ attachments_list.append(
172
+ (key, sub_item, len(sub_attachments))
173
+ )
174
+ else:
175
+ attachments_list.append((key, item, attachment_count))
176
+
177
+ return attachments_list
178
+
179
+ def split_data_into_packages(
180
+ self, data: dict, config: Optional[PackageSplitConfig] = None
181
+ ) -> list[dict]:
182
+ """Split large datasets into multiple smaller packages.
183
+
184
+ Uses a best-fit decreasing algorithm to optimize package distribution:
185
+ 1. Sort items by attachment count (largest first)
186
+ 2. Try to fit each item in the package with the most remaining space
187
+ 3. Create new packages only when necessary
188
+ 4. Handle items that exceed the max_attachments limit by splitting them
189
+
190
+ Args:
191
+ data: The data to split
192
+ config: Configuration for package splitting (defaults to PackageSplitConfig())
193
+
194
+ Returns:
195
+ List of data packages
196
+ """
197
+ # Use default config if none provided
198
+ if config is None:
199
+ config = PackageSplitConfig()
200
+
201
+ # Collect all items with their attachment counts
202
+ all_items = self.build_attachments_list(data, config)
203
+
204
+ # Sort by attachment count (largest first) for better space utilization
205
+ all_items.sort(key=lambda x: x[2], reverse=True)
206
+
207
+ packages: list[dict[str, Any]] = []
208
+ package_attachment_counts: list[int] = []
209
+
210
+ for key, item, attachment_count in all_items:
211
+ # Try to find a package with enough space
212
+ package_found = False
213
+
214
+ for i, current_count in enumerate(package_attachment_counts):
215
+ if current_count + attachment_count <= config.max_attachments:
216
+ # Add to existing package
217
+ if key not in packages[i]:
218
+ packages[i][key] = []
219
+ packages[i][key].append(item)
220
+ package_attachment_counts[i] += attachment_count
221
+ package_found = True
222
+ break
223
+
224
+ if not package_found:
225
+ # Create new package - this item cannot fit in any existing package
226
+ new_package = {key: [item]}
227
+ packages.append(new_package)
228
+ package_attachment_counts.append(attachment_count)
229
+
230
+ return packages
231
+
232
+ def _collect_attachments(self, data: dict) -> list[dict]:
233
+ """Collect all attachment data from the input data structure.
234
+
235
+ This method handles both direct attachments (under 'attachments' key) and
236
+ nested attachments within items. It returns raw attachment data without validation.
237
+
238
+ Args:
239
+ data: The data dictionary containing items with attachments
240
+
241
+ Returns:
242
+ List of raw attachment dictionaries with metadata
243
+ """
244
+ all_attachments = []
245
+
246
+ for key, value in data.items():
247
+ logger.debug(f"Processing key '{key}' with value type: {type(value)}")
248
+
249
+ if not isinstance(value, list) or not value:
250
+ continue
251
+
252
+ # Collect direct attachments if this key is "attachments"
253
+ if key == "attachments":
254
+ all_attachments.extend(self._collect_direct_attachments(value))
255
+
256
+ # Collect nested attachments from items
257
+ all_attachments.extend(self._collect_nested_attachments(key, value))
258
+
259
+ logger.debug(f"Collected {len(all_attachments)} raw attachments")
260
+ return all_attachments
261
+
262
+ def _collect_direct_attachments(self, attachments_list: list) -> list[dict]:
263
+ """Collect attachments from a direct attachments list.
264
+
265
+ Args:
266
+ attachments_list: List of attachment dictionaries
267
+
268
+ Returns:
269
+ List of attachment data dictionaries with metadata
270
+ """
271
+ direct_attachments = []
272
+
273
+ logger.debug(
274
+ f"Found 'attachments' key with {len(attachments_list)} items - processing as direct attachments"
275
+ )
276
+
277
+ for idx, attachment in enumerate(attachments_list):
278
+ if not isinstance(attachment, dict):
279
+ continue
280
+
281
+ # Check if this looks like an attachment (has file_name or download_url)
282
+ if "file_name" in attachment or "download_url" in attachment:
283
+ # Transform download_url to internal access package URL for access package display
284
+ if "download_url" in attachment:
285
+ attachment["original_download_url"] = attachment["download_url"]
286
+ attachment["download_url"] = (
287
+ f"attachments/{attachment.get('file_name', f'attachment_{idx}')}"
288
+ )
289
+
290
+ direct_attachments.append(attachment)
291
+
292
+ return direct_attachments
293
+
294
+ def _collect_nested_attachments(self, key: str, items: list) -> list[dict]:
295
+ """Collect attachments from nested items.
296
+
297
+ Args:
298
+ key: The key for the items list
299
+ items: List of items that may contain attachments
300
+
301
+ Returns:
302
+ List of attachment data dictionaries with metadata
303
+ """
304
+ nested_attachments = []
305
+
306
+ for item in items:
307
+ if not isinstance(item, dict):
308
+ continue
309
+
310
+ # Recursively search for attachments in nested structures
311
+ item_attachments = self._find_attachments_recursive(item, key)
312
+ nested_attachments.extend(item_attachments)
313
+
314
+ return nested_attachments
315
+
316
+ def _find_attachments_recursive(
317
+ self, item: dict, context_key: str, path: str = ""
318
+ ) -> list[dict]:
319
+ """Recursively find attachments in nested dictionary structures.
320
+
321
+ Args:
322
+ item: Dictionary item to search
323
+ context_key: The top-level key for context
324
+ path: Current path in the nested structure
325
+
326
+ Returns:
327
+ List of attachment data dictionaries with metadata
328
+ """
329
+ attachments = []
330
+
331
+ # Check if this item has direct attachments
332
+ if "attachments" in item and isinstance(item["attachments"], list):
333
+ for attachment in item["attachments"]:
334
+ if not isinstance(attachment, dict):
335
+ continue
336
+
337
+ # Check if this looks like an attachment
338
+ if "file_name" in attachment or "download_url" in attachment:
339
+ # Add context about which item this attachment belongs to
340
+ attachment_with_context = attachment.copy()
341
+ attachment_with_context["_context"] = {
342
+ "key": context_key,
343
+ "item_id": item.get("id", "unknown"),
344
+ "path": path,
345
+ }
346
+
347
+ # Transform download_url to internal access package URL
348
+ if "download_url" in attachment:
349
+ attachment_with_context["original_download_url"] = attachment[
350
+ "download_url"
351
+ ]
352
+ attachment_with_context["download_url"] = (
353
+ f"attachments/{attachment.get('file_name', 'attachment')}"
354
+ )
355
+
356
+ attachments.append(attachment_with_context)
357
+
358
+ # Recursively search nested dictionaries
359
+ for key, value in item.items():
360
+ if isinstance(value, dict):
361
+ current_path = f"{path}.{key}" if path else key
362
+ nested_attachments = self._find_attachments_recursive(
363
+ value, context_key, current_path
364
+ )
365
+ attachments.extend(nested_attachments)
366
+
367
+ return attachments
368
+
369
+ def _validate_attachment(
370
+ self, attachment: dict
371
+ ) -> Optional[AttachmentProcessingInfo]:
372
+ """Validate a single attachment and create AttachmentProcessingInfo.
373
+
374
+ Args:
375
+ attachment: Raw attachment data dictionary
376
+
377
+ Returns:
378
+ AttachmentProcessingInfo if valid, None otherwise
379
+ """
380
+ try:
381
+ # Extract required fields - use original_download_url for storage operations
382
+ storage_key = (
383
+ attachment.get("original_download_url")
384
+ or attachment.get("download_url")
385
+ or attachment.get("file_name", "")
386
+ )
387
+ if not storage_key:
388
+ return None
389
+
390
+ # Create AttachmentInfo
391
+ attachment_info = AttachmentInfo(
392
+ storage_key=storage_key,
393
+ file_name=attachment.get("file_name"),
394
+ size=attachment.get("size"),
395
+ content_type=attachment.get("content_type"),
396
+ )
397
+
398
+ # Create base path for the attachment in the zip
399
+ base_path = "attachments"
400
+ if attachment.get("_context"):
401
+ context = attachment["_context"]
402
+ base_path = f"{context['key']}/{context['item_id']}/attachments"
403
+
404
+ # Create AttachmentProcessingInfo
405
+ processing_info = AttachmentProcessingInfo(
406
+ attachment=attachment_info,
407
+ base_path=base_path,
408
+ item=attachment,
409
+ )
410
+
411
+ logger.debug(
412
+ f"Successfully validated attachment: {attachment_info.storage_key}"
413
+ )
414
+ return processing_info
415
+
416
+ except (ValueError, TypeError, KeyError) as e:
417
+ logger.debug(f"Failed to validate attachment: {attachment}, error: {e}")
418
+ return None
419
+
420
+ def _create_attachment_content_stream(
421
+ self, bucket: str, key: str, storage_key: str
422
+ ) -> Iterable[bytes]:
423
+ """Create a streaming iterator for attachment content without loading entire file to memory.
424
+
425
+ Args:
426
+ bucket: Source bucket name
427
+ key: Source key/path
428
+ storage_key: Original storage key for logging
429
+
430
+ Returns:
431
+ Iterator that yields chunks of the attachment content
432
+ """
433
+ try:
434
+ logger.debug(
435
+ f"Starting streaming read of {storage_key} from bucket: {bucket}, key: {key}"
436
+ )
437
+ with self.storage_client.stream_read(bucket, key) as content_stream:
438
+ # Stream in chunks instead of reading entire file
439
+ chunk_count = 0
440
+ total_bytes = 0
441
+ while True:
442
+ chunk = content_stream.read(self.chunk_size)
443
+ if not chunk:
444
+ break
445
+ chunk_count += 1
446
+ total_bytes += len(chunk)
447
+ yield chunk
448
+
449
+ logger.debug(
450
+ f"Completed streaming {chunk_count} chunks ({total_bytes} bytes) for {storage_key}"
451
+ )
452
+ except Exception as e:
453
+ logger.warning(f"Failed to stream attachment {storage_key}: {e}")
454
+ # Yield empty content on failure
455
+ yield b""
456
+
457
+ def _collect_and_validate_attachments(
458
+ self, data: dict
459
+ ) -> list[AttachmentProcessingInfo]:
460
+ """Collect and validate all attachments from the data.
461
+
462
+ This method now delegates to _collect_attachments and _validate_attachment
463
+ for better separation of concerns and readability.
464
+
465
+ Args:
466
+ data: The data dictionary containing items with attachments
467
+
468
+ Returns:
469
+ List of validated AttachmentProcessingInfo objects
470
+ """
471
+ # Collect raw attachment data
472
+ raw_attachments = self._collect_attachments(data)
473
+
474
+ # Validate and convert each attachment
475
+ validated_attachments = []
476
+ for attachment_data in raw_attachments:
477
+ validated = self._validate_attachment(attachment_data)
478
+ if validated:
479
+ validated_attachments.append(validated)
480
+
481
+ logger.debug(
482
+ f"Successfully validated {len(validated_attachments)} out of {len(raw_attachments)} attachments"
483
+ )
484
+ return validated_attachments
485
+
486
+ @retry_cloud_storage_operation(
487
+ provider="smart_open_streaming",
488
+ operation_name="upload_to_storage_streaming",
489
+ max_retries=2,
490
+ base_delay=2.0,
491
+ max_delay=30.0,
492
+ )
493
+ def upload_to_storage_streaming(
494
+ self,
495
+ data: dict,
496
+ config: StorageUploadConfig,
497
+ privacy_request: Optional[PrivacyRequest],
498
+ document: Optional[Any] = None,
499
+ buffer_config: Optional[StreamingBufferConfig] = None,
500
+ batch_size: int = 10,
501
+ ) -> Optional[AnyHttpUrlString]:
502
+ """Upload data to cloud storage using smart-open streaming for memory efficiency.
503
+
504
+ This function leverages smart-open's streaming capabilities while maintaining
505
+ our DSR-specific business logic for package splitting and attachment processing.
506
+ All data is streamed directly from source to destination without local storage.
507
+
508
+ Args:
509
+ data: Data to upload
510
+ config: Upload configuration
511
+ privacy_request: Privacy request object
512
+ document: Optional document (not yet implemented)
513
+ buffer_config: Buffer configuration
514
+ batch_size: Number of attachments to process in each batch
515
+
516
+ Returns:
517
+ presigned_url or None if URL generation fails
518
+
519
+ Raises:
520
+ ValueError: If privacy_request is not provided
521
+ NotImplementedError: If document-only upload is attempted
522
+ StorageUploadError: If upload fails
523
+ """
524
+ self._validate_upload_inputs(privacy_request, document)
525
+ if not privacy_request:
526
+ raise ValueError("Privacy request must be provided")
527
+
528
+ # Use default buffer config if none provided
529
+ if buffer_config is None:
530
+ buffer_config = StreamingBufferConfig()
531
+
532
+ try:
533
+ if config.resp_format in [
534
+ ResponseFormat.csv.value,
535
+ ResponseFormat.json.value,
536
+ ]:
537
+ return self._handle_data_format_upload(
538
+ config, data, privacy_request, buffer_config, batch_size
539
+ )
540
+ if config.resp_format == ResponseFormat.html.value:
541
+ return self._handle_html_format_upload(
542
+ config, data, privacy_request, buffer_config, batch_size
543
+ )
544
+ raise ValueError(f"Unsupported response format: {config.resp_format}")
545
+
546
+ except (ValueError, NotImplementedError):
547
+ # Re-raise validation errors as-is - these are user errors, not system errors
548
+ raise
549
+ except StorageUploadError:
550
+ # Re-raise storage errors as-is
551
+ raise
552
+ except Exception as e:
553
+ # Log unexpected errors and wrap them in StorageUploadError
554
+ logger.error(f"Unexpected error during storage upload: {e}", exc_info=True)
555
+ raise StorageUploadError(
556
+ f"Storage upload failed due to unexpected error: {e}"
557
+ ) from e
558
+
559
+ def _validate_upload_inputs(
560
+ self, privacy_request: Optional[PrivacyRequest], document: Optional[Any]
561
+ ) -> None:
562
+ """Validate upload input parameters.
563
+
564
+ Args:
565
+ privacy_request: Privacy request object
566
+ document: Optional document
567
+
568
+ Raises:
569
+ ValueError: If privacy_request is not provided
570
+ NotImplementedError: If document-only upload is attempted
571
+ """
572
+ if not privacy_request:
573
+ raise ValueError("Privacy request must be provided")
574
+
575
+ if document:
576
+ raise NotImplementedError("Document-only uploads not yet implemented")
577
+
578
+ def _handle_data_format_upload(
579
+ self,
580
+ config: StorageUploadConfig,
581
+ data: dict,
582
+ privacy_request: PrivacyRequest,
583
+ buffer_config: StreamingBufferConfig,
584
+ batch_size: int,
585
+ ) -> Optional[AnyHttpUrlString]:
586
+ """Handle CSV/JSON format uploads.
587
+
588
+ Args:
589
+ config: Upload configuration
590
+ data: Data to upload
591
+ privacy_request: Privacy request object
592
+ buffer_config: Buffer configuration
593
+ batch_size: Number of attachments to process in each batch
594
+
595
+ Returns:
596
+ presigned_url or None if URL generation fails
597
+ """
598
+ self._stream_attachments_to_storage_zip(
599
+ config.bucket_name,
600
+ config.file_key,
601
+ data,
602
+ privacy_request,
603
+ config.max_workers,
604
+ buffer_config,
605
+ batch_size,
606
+ config.resp_format,
607
+ )
608
+
609
+ # Generate presigned URL for the uploaded file
610
+ try:
611
+ return self.storage_client.generate_presigned_url(
612
+ config.bucket_name, config.file_key
613
+ )
614
+ except Exception as e:
615
+ logger.error(
616
+ f"Failed to generate presigned URL for {config.bucket_name}/{config.file_key}: {e}"
617
+ )
618
+ raise StorageUploadError(f"Failed to generate presigned URL: {e}") from e
619
+
620
+ def _handle_html_format_upload(
621
+ self,
622
+ config: StorageUploadConfig,
623
+ data: dict,
624
+ privacy_request: PrivacyRequest,
625
+ buffer_config: StreamingBufferConfig,
626
+ batch_size: int,
627
+ ) -> Optional[AnyHttpUrlString]:
628
+ """Handle HTML format uploads with DSR report generation.
629
+
630
+ Args:
631
+ config: Upload configuration
632
+ data: Data to upload
633
+ privacy_request: Privacy request object
634
+ buffer_config: Buffer configuration
635
+ batch_size: Number of attachments to process in each batch
636
+
637
+ Returns:
638
+ presigned_url or None if URL generation fails
639
+ """
640
+ # Generate the DSR report first
641
+ try:
642
+ dsr_buffer = DsrReportBuilder(
643
+ privacy_request=privacy_request,
644
+ dsr_data=data,
645
+ ).generate()
646
+ # Reset buffer position to ensure it can be read multiple times
647
+ dsr_buffer.seek(0)
648
+ except Exception as e:
649
+ logger.error(f"Failed to generate DSR report: {e}")
650
+ raise StorageUploadError(f"Failed to generate DSR report: {e}") from e
651
+
652
+ # Check if there are attachments to include
653
+ all_attachments = self._collect_and_validate_attachments(data)
654
+
655
+ if not all_attachments:
656
+ # No attachments, just upload the DSR report
657
+ stream_dsr_buffer_to_storage(
658
+ self.storage_client,
659
+ config.bucket_name,
660
+ config.file_key,
661
+ dsr_buffer,
662
+ )
663
+
664
+ try:
665
+ return self.storage_client.generate_presigned_url(
666
+ config.bucket_name, config.file_key
667
+ )
668
+ except Exception as e:
669
+ logger.error(
670
+ f"Failed to generate presigned URL for {config.bucket_name}/{config.file_key}: {e}"
671
+ )
672
+ raise StorageUploadError(
673
+ f"Failed to generate presigned URL: {e}"
674
+ ) from e
675
+ logger.debug(
676
+ f"Creating HTML DSR report ZIP with {len(all_attachments)} attachments"
677
+ )
678
+
679
+ # Create ZIP generator with DSR report files
680
+ dsr_files_generator = create_dsr_report_files_generator(
681
+ dsr_buffer,
682
+ all_attachments,
683
+ config.bucket_name,
684
+ config.max_workers,
685
+ batch_size,
686
+ )
687
+
688
+ # Create ZIP generator with attachment files
689
+ attachment_files_generator = self._create_attachment_files(all_attachments)
690
+
691
+ # Combine both generators and stream the complete ZIP to storage
692
+ combined_entries = chain(attachment_files_generator, dsr_files_generator)
693
+ with self.storage_client.stream_upload(
694
+ config.bucket_name,
695
+ config.file_key,
696
+ content_type="application/zip",
697
+ ) as upload_stream:
698
+ for chunk in stream_zip(combined_entries):
699
+ upload_stream.write(chunk)
700
+
701
+ logger.debug(
702
+ f"Successfully uploaded HTML DSR report ZIP with attachments: {config.file_key}"
703
+ )
704
+
705
+ # Generate presigned URL for the uploaded file
706
+ try:
707
+ return self.storage_client.generate_presigned_url(
708
+ config.bucket_name, config.file_key
709
+ )
710
+ except Exception as e:
711
+ logger.error(
712
+ f"Failed to generate presigned URL for {config.bucket_name}/{config.file_key}: {e}"
713
+ )
714
+ raise StorageUploadError(f"Failed to generate presigned URL: {e}") from e
715
+
716
+ @retry_cloud_storage_operation(
717
+ provider="smart_open_streaming",
718
+ operation_name="stream_attachments_to_storage_zip",
719
+ max_retries=2,
720
+ base_delay=2.0,
721
+ max_delay=30.0,
722
+ )
723
+ def _stream_attachments_to_storage_zip(
724
+ self,
725
+ bucket_name: str,
726
+ file_key: str,
727
+ data: dict,
728
+ privacy_request: PrivacyRequest,
729
+ max_workers: int,
730
+ buffer_config: StreamingBufferConfig,
731
+ batch_size: int,
732
+ resp_format: str,
733
+ ) -> None:
734
+ """Stream attachments to storage as a ZIP file using smart-open.
735
+
736
+ This method leverages smart-open's streaming capabilities for efficient memory usage.
737
+ Data flows directly from source storage through ZIP generation to destination storage
738
+ without materializing entire files in memory.
739
+
740
+ Args:
741
+ bucket_name: Storage bucket name
742
+ file_key: File key in storage
743
+ data: Data to upload
744
+ privacy_request: Privacy request object
745
+ max_workers: Maximum parallel workers
746
+ buffer_config: Buffer configuration
747
+ batch_size: Number of attachments to process in each batch
748
+ resp_format: Response format (csv, json)
749
+ """
750
+ # Collect and validate all attachments
751
+ all_attachments = self._collect_and_validate_attachments(data)
752
+
753
+ if not all_attachments:
754
+ # No attachments, just upload the data
755
+ self._upload_data_only_zip(bucket_name, file_key, data, resp_format)
756
+ return
757
+
758
+ logger.debug(
759
+ f"Starting streaming ZIP creation with {len(all_attachments)} attachments in batches of {batch_size}"
760
+ )
761
+
762
+ # Create the ZIP file with data and attachments using smart-open streaming
763
+ zip_generator = self._create_zip_generator(
764
+ data,
765
+ all_attachments,
766
+ bucket_name,
767
+ max_workers,
768
+ batch_size,
769
+ resp_format,
770
+ )
771
+
772
+ # Use smart-open's streaming upload capability
773
+ with self.storage_client.stream_upload(
774
+ bucket_name, file_key, content_type="application/zip"
775
+ ) as upload_stream:
776
+ for chunk in stream_zip(zip_generator):
777
+ upload_stream.write(chunk)
778
+
779
+ logger.debug(
780
+ f"Successfully created memory-efficient streaming ZIP using smart-open: {file_key}"
781
+ )
782
+
783
+ def _upload_data_only_zip(
784
+ self, bucket_name: str, file_key: str, data: dict, resp_format: str
785
+ ) -> None:
786
+ """Upload data-only ZIP file (no attachments) using smart-open.
787
+
788
+ Args:
789
+ bucket_name: Storage bucket name
790
+ file_key: File key in storage
791
+ data: Data to upload
792
+ resp_format: Response format
793
+ """
794
+ logger.debug("Creating data-only ZIP file (no attachments)")
795
+
796
+ # Create data files generator
797
+ data_files_generator = self._create_data_files(data, resp_format)
798
+
799
+ # Convert to stream_zip format
800
+ zip_generator = self._convert_to_stream_zip_format(data_files_generator)
801
+
802
+ # Use smart-open streaming upload
803
+ with self.storage_client.stream_upload(
804
+ bucket_name, file_key, content_type="application/zip"
805
+ ) as upload_stream:
806
+ for chunk in stream_zip(zip_generator):
807
+ upload_stream.write(chunk)
808
+
809
+ logger.debug(f"Successfully uploaded data-only ZIP: {file_key}")
810
+
811
+ def _create_zip_generator(
812
+ self,
813
+ data: dict,
814
+ all_attachments: list[AttachmentProcessingInfo],
815
+ bucket_name: str,
816
+ max_workers: int,
817
+ batch_size: int,
818
+ resp_format: str,
819
+ ) -> Generator[Tuple[str, datetime, int, Any, Iterable[bytes]], None, None]:
820
+ """Create a generator for ZIP file contents including data and attachments.
821
+
822
+ Args:
823
+ data: Data to include in the ZIP
824
+ all_attachments: List of validated attachments
825
+ bucket_name: Storage bucket name
826
+ max_workers: Maximum parallel workers
827
+ batch_size: Number of attachments to process in each batch
828
+ resp_format: Response format
829
+
830
+ Returns:
831
+ Generator yielding ZIP file entries in stream_zip format
832
+ """
833
+ logger.debug(f"Creating ZIP generator with {len(all_attachments)} attachments")
834
+
835
+ # For HTML format, data files are not needed as the DSR report contains the HTML content
836
+ if resp_format.lower() != "html":
837
+ # First, yield data files (convert to stream_zip format and stream directly)
838
+ data_files_generator = self._create_data_files(
839
+ data, resp_format, all_attachments
840
+ )
841
+ logger.debug("Yielding data files for ZIP")
842
+ yield from self._convert_to_stream_zip_format(data_files_generator)
843
+
844
+ # Then, yield attachment files (already in stream_zip format, stream directly)
845
+ attachment_files_generator = self._create_attachment_files(all_attachments)
846
+ logger.debug("Yielding attachment files for ZIP")
847
+ yield from attachment_files_generator
848
+
849
+ def _create_data_files(
850
+ self,
851
+ data: dict,
852
+ resp_format: str = "json",
853
+ all_attachments: Optional[list[AttachmentProcessingInfo]] = None,
854
+ ) -> Generator[Tuple[str, BytesIO, dict[str, Any]], None, None]:
855
+ """Create data files (JSON/CSV) from the input data based on resp_format configuration."""
856
+
857
+ # Transform data to use internal access package URLs if attachments are provided
858
+ if all_attachments:
859
+ data = self._transform_data_for_access_package(data, all_attachments)
860
+
861
+ for key, value in data.items():
862
+ if isinstance(value, list) and value:
863
+ # Use the configured response format instead of making decisions based on content
864
+ if resp_format.lower() == "json":
865
+ data_content = json.dumps(value, default=str).encode("utf-8")
866
+ yield f"{key}.json", BytesIO(data_content), {}
867
+ elif resp_format.lower() == "csv":
868
+ csv_buffer = StringIO()
869
+ if value and isinstance(value[0], dict):
870
+ writer = csv.DictWriter(csv_buffer, fieldnames=value[0].keys())
871
+ writer.writeheader()
872
+ writer.writerows(value)
873
+ data_content = csv_buffer.getvalue().encode("utf-8")
874
+ yield f"{key}.csv", BytesIO(data_content), {}
875
+ else:
876
+ # Fallback to JSON for non-dict list items when CSV is requested
877
+ data_content = json.dumps(value, default=str).encode("utf-8")
878
+ yield f"{key}.json", BytesIO(data_content), {}
879
+ elif resp_format.lower() == "html":
880
+ # HTML format typically uses JSON for data files since HTML is for the report itself
881
+ data_content = json.dumps(value, default=str).encode("utf-8")
882
+ yield f"{key}.json", BytesIO(data_content), {}
883
+ else:
884
+ # Default to JSON for unsupported formats
885
+ data_content = json.dumps(value, default=str).encode("utf-8")
886
+ yield f"{key}.json", BytesIO(data_content), {}
887
+
888
+ def _create_attachment_files(
889
+ self,
890
+ all_attachments: list[AttachmentProcessingInfo],
891
+ ) -> Generator[Tuple[str, datetime, int, Any, Iterable[bytes]], None, None]:
892
+ """Create attachment files for the ZIP using true cloud-to-cloud streaming.
893
+
894
+ This method yields stream_zip format entries without loading entire files to memory.
895
+ Each attachment is processed as a streaming iterator that yields chunks directly
896
+ from source storage to ZIP generation.
897
+
898
+ Args:
899
+ all_attachments: List of validated attachments
900
+
901
+ Returns:
902
+ Generator yielding attachment file entries in stream_zip format
903
+ """
904
+ for attachment_info in all_attachments:
905
+ result = self._process_attachment_safely(attachment_info)
906
+ yield result
907
+
908
+ def _transform_data_for_access_package(
909
+ self, data: dict[str, Any], all_attachments: list[AttachmentProcessingInfo]
910
+ ) -> dict[str, Any]:
911
+ """
912
+ Transform the data structure to replace download URLs with internal access package paths.
913
+ This ensures that when data is serialized to JSON/CSV, it contains internal references
914
+ instead of external download URLs.
915
+ """
916
+ if not all_attachments:
917
+ return data
918
+
919
+ # Create a simple mapping of original URLs to internal paths
920
+ url_mapping = {
921
+ attachment.attachment.storage_key: f"attachments/{attachment.attachment.file_name or f'attachment_{id(attachment.attachment)}'}"
922
+ for attachment in all_attachments
923
+ if attachment.attachment.storage_key.startswith(("http://", "https://"))
924
+ }
925
+
926
+ if not url_mapping:
927
+ return data
928
+
929
+ # Simple recursive replacement
930
+ def replace_urls(obj: Any) -> Any:
931
+ if isinstance(obj, dict):
932
+ return {k: replace_urls(v) for k, v in obj.items()}
933
+ if isinstance(obj, list):
934
+ return [replace_urls(item) for item in obj]
935
+ if isinstance(obj, str) and obj in url_mapping:
936
+ return url_mapping[obj]
937
+ return obj
938
+
939
+ return replace_urls(data)
940
+
941
+ def _process_attachment_safely(
942
+ self,
943
+ attachment_info: AttachmentProcessingInfo,
944
+ ) -> tuple[str, datetime, int, Any, Iterable[bytes]]:
945
+ """Process attachment with consistent error handling.
946
+
947
+ Args:
948
+ attachment_info: Attachment processing information
949
+
950
+ Returns:
951
+ Stream ZIP format tuple
952
+
953
+ Raises:
954
+ StorageUploadError: If attachment processing fails for any reason
955
+ """
956
+ try:
957
+ storage_key = attachment_info.attachment.storage_key
958
+
959
+ try:
960
+ source_bucket, source_key = self._parse_storage_url(storage_key)
961
+ logger.debug(
962
+ f"Parsed storage URL - bucket: {source_bucket}, key: {source_key}"
963
+ )
964
+ except ValueError as e:
965
+ logger.error(f"Could not parse storage URL: {storage_key} - {e}")
966
+ raise StorageUploadError(
967
+ f"Could not parse storage URL: {storage_key} - {e}"
968
+ ) from e
969
+
970
+ file_path = f"{attachment_info.base_path}/{attachment_info.attachment.file_name or DEFAULT_ATTACHMENT_NAME}"
971
+
972
+ try:
973
+ content_stream = self._create_attachment_content_stream(
974
+ source_bucket, source_key, storage_key
975
+ )
976
+ return (
977
+ file_path,
978
+ datetime.now(),
979
+ DEFAULT_FILE_MODE,
980
+ _ZIP_32_TYPE(),
981
+ content_stream,
982
+ )
983
+ except Exception as e:
984
+ logger.error(
985
+ f"Failed to create content stream for attachment {storage_key}: {e}"
986
+ )
987
+ raise StorageUploadError(
988
+ f"Failed to create content stream for attachment: {e}"
989
+ ) from e
990
+
991
+ except Exception as e:
992
+ logger.error(
993
+ f"Failed to process attachment {attachment_info.attachment.storage_key}: {e}",
994
+ exc_info=True,
995
+ )
996
+ raise StorageUploadError(
997
+ f"Failed to process attachment {attachment_info.attachment.storage_key}: {e}"
998
+ ) from e