ethyca-fides 2.69.0rc9__py2.py3-none-any.whl → 2.69.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ethyca-fides might be problematic. Click here for more details.
- {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.1.dist-info}/METADATA +2 -2
- {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.1.dist-info}/RECORD +204 -195
- fides/_version.py +3 -3
- fides/api/alembic/migrations/versions/78dbe23d8204_adding_privacy_request_redaction_patterns.py +52 -0
- fides/api/api/v1/api.py +2 -0
- fides/api/api/v1/endpoints/dsr_package_link.py +2 -2
- fides/api/api/v1/endpoints/oauth_endpoints.py +20 -6
- fides/api/api/v1/endpoints/privacy_request_redaction_patterns_endpoints.py +95 -0
- fides/api/api/v1/endpoints/user_endpoints.py +28 -1
- fides/api/app_setup.py +16 -2
- fides/api/db/base.py +3 -0
- fides/api/main.py +22 -0
- fides/api/models/client.py +1 -0
- fides/api/models/privacy_request_redaction_pattern.py +64 -0
- fides/api/oauth/utils.py +117 -6
- fides/api/schemas/privacy_request_redaction_patterns.py +55 -0
- fides/api/service/privacy_request/dsr_package/dsr_data_preprocessor.py +231 -0
- fides/api/service/privacy_request/dsr_package/dsr_report_builder.py +286 -120
- fides/api/service/privacy_request/dsr_package/templates/attachments_index.html +4 -2
- fides/api/service/privacy_request/dsr_package/templates/collection_index.html +3 -1
- fides/api/service/privacy_request/dsr_package/templates/dataset_index.html +1 -1
- fides/api/service/privacy_request/dsr_package/utils.py +268 -0
- fides/api/service/privacy_request/request_runner_service.py +8 -2
- fides/api/service/storage/streaming/smart_open_streaming_storage.py +107 -170
- fides/api/service/storage/util.py +579 -0
- fides/api/task/manual/manual_task_graph_task.py +11 -9
- fides/api/tasks/storage.py +2 -2
- fides/api/util/endpoint_utils.py +0 -13
- fides/api/util/rate_limit.py +194 -0
- fides/common/api/scope_registry.py +8 -0
- fides/common/api/v1/urn_registry.py +3 -0
- fides/config/redis_settings.py +27 -3
- fides/config/security_settings.py +31 -9
- fides/ui-build/static/admin/404.html +1 -1
- fides/ui-build/static/admin/_next/static/1TigfgzjzHeoVqRLNIMYa/_buildManifest.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/4831-fd99c0b3784de128.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/{_app-ef8e1c986bc5b795.js → _app-fcdad91f6f66292b.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/settings/privacy-requests-2ecc073f41628f62.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/user-management/{new-de8cb3739ab99c09.js → new-92f52c43f522a350.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/user-management/profile/{[id]-05d61c80a556b2d5.js → [id]-64452dfae2c5e614.js} +1 -1
- fides/ui-build/static/admin/add-systems/manual.html +1 -1
- fides/ui-build/static/admin/add-systems/multiple.html +1 -1
- fides/ui-build/static/admin/add-systems.html +1 -1
- fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
- fides/ui-build/static/admin/consent/configure.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
- fides/ui-build/static/admin/consent/properties.html +1 -1
- fides/ui-build/static/admin/consent/reporting.html +1 -1
- fides/ui-build/static/admin/consent.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
- fides/ui-build/static/admin/data-catalog.html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
- fides/ui-build/static/admin/data-discovery/activity.html +1 -1
- fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/detection.html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
- fides/ui-build/static/admin/datamap.html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
- fides/ui-build/static/admin/dataset/new.html +1 -1
- fides/ui-build/static/admin/dataset.html +1 -1
- fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
- fides/ui-build/static/admin/datastore-connection/new.html +1 -1
- fides/ui-build/static/admin/datastore-connection.html +1 -1
- fides/ui-build/static/admin/index.html +1 -1
- fides/ui-build/static/admin/integrations/[id].html +1 -1
- fides/ui-build/static/admin/integrations.html +1 -1
- fides/ui-build/static/admin/login/[provider].html +1 -1
- fides/ui-build/static/admin/login.html +1 -1
- fides/ui-build/static/admin/messaging/[id].html +1 -1
- fides/ui-build/static/admin/messaging/add-template.html +1 -1
- fides/ui-build/static/admin/messaging.html +1 -1
- fides/ui-build/static/admin/poc/ant-components.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
- fides/ui-build/static/admin/poc/forms.html +1 -1
- fides/ui-build/static/admin/poc/table-migration.html +1 -1
- fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
- fides/ui-build/static/admin/privacy-requests.html +1 -1
- fides/ui-build/static/admin/properties/[id].html +1 -1
- fides/ui-build/static/admin/properties/add-property.html +1 -1
- fides/ui-build/static/admin/properties.html +1 -1
- fides/ui-build/static/admin/reporting/datamap.html +1 -1
- fides/ui-build/static/admin/settings/about/alpha.html +1 -1
- fides/ui-build/static/admin/settings/about.html +1 -1
- fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
- fides/ui-build/static/admin/settings/consent.html +1 -1
- fides/ui-build/static/admin/settings/custom-fields.html +1 -1
- fides/ui-build/static/admin/settings/domain-records.html +1 -1
- fides/ui-build/static/admin/settings/domains.html +1 -1
- fides/ui-build/static/admin/settings/email-templates.html +1 -1
- fides/ui-build/static/admin/settings/locations.html +1 -1
- fides/ui-build/static/admin/settings/organization.html +1 -1
- fides/ui-build/static/admin/settings/privacy-requests.html +1 -0
- fides/ui-build/static/admin/settings/regulations.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id].html +1 -1
- fides/ui-build/static/admin/systems.html +1 -1
- fides/ui-build/static/admin/taxonomy.html +1 -1
- fides/ui-build/static/admin/user-management/new.html +1 -1
- fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
- fides/ui-build/static/admin/user-management.html +1 -1
- fides/ui-build/static/admin/_next/static/XiHm-6CdVChTC5rbN9GtT/_buildManifest.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/4121-c8d5d717e31899e1.js +0 -1
- {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.1.dist-info}/WHEEL +0 -0
- {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.1.dist-info}/entry_points.txt +0 -0
- {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.1.dist-info}/licenses/LICENSE +0 -0
- {ethyca_fides-2.69.0rc9.dist-info → ethyca_fides-2.69.1.dist-info}/top_level.txt +0 -0
- /fides/ui-build/static/admin/_next/static/{XiHm-6CdVChTC5rbN9GtT → 1TigfgzjzHeoVqRLNIMYa}/_ssgManifest.js +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{1817-3d9e110e007853f0.js → 1817-0ca16d288fad916d.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{3620-31ebb43dba84cbbd.js → 3620-602eb74dc896d556.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{3729-a1ca1608efc11ac4.js → 3729-c17ac8031a4c4fd1.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{3872-a91143aa35fa8ef8.js → 3872-f78dec02f0d959ae.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{4608-23bbd4c3c4a59f42.js → 4608-be8cba73f5d7c326.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{4786-0827aae7aceadd22.js → 4786-61154adf88e448e1.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{4808-78ca630f2d2503cd.js → 4808-dd4157aa72648068.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{5487-8c635883dcaa9c2a.js → 5487-02d00bad7c6830e0.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{6084-0096d7de64ef8015.js → 6084-c153669d5567e242.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{6954-9d46e2276c461c26.js → 6954-5296188c19d7d0ac.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{7476-d1b0af9ade392e5b.js → 7476-45c5088baa8b66af.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{7630-da0a7ce4e3a0d62c.js → 7630-7ed6c6117775dffe.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{787-3499983fa346b380.js → 787-a8c7eab617e2fceb.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{79-f197fc4db8d530e5.js → 79-65674011d455af4d.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{796-db1e30119ea973c7.js → 796-9e1ca1a4030707c5.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{8002-971e29181f72edd1.js → 8002-24af20d679efc04e.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{9826-b0b3d3cfb13bfbc1.js → 9826-dbae8dee941a7fac.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/add-systems/{manual-9dc7e70ab5b05723.js → manual-ace203dfacacbdc4.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/add-systems/{multiple-4b79a1652297ed9a.js → multiple-920fb469e0dda1d2.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/{add-systems-1632a59203fe8eab.js → add-systems-bd0d82078e67cac3.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/consent/configure/{add-vendors-1ca9df7ca91bd101.js → add-vendors-406170eaae4329c6.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/consent/{configure-07bdbc9ae4137db4.js → configure-7207ab23bdb36ce8.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/consent/{privacy-experience-2795cd4115a77c94.js → privacy-experience-9dda4de5ec580279.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/consent/privacy-notices/{[id]-e02921dc82dccbb1.js → [id]-b378576cba255609.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/consent/privacy-notices/{new-98f9e4ba3610628a.js → new-2ca1de7b88094ab0.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/consent/{privacy-notices-17ed82777810d1c6.js → privacy-notices-0d4844d0b808e6e4.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/{consent-09610b10923d9268.js → consent-3e8bdefe714254ec.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog/[systemId]/projects/[projectUrn]/{[resourceUrn]-da1a48336daff6f8.js → [resourceUrn]-2c29ff7a01198f30.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog/[systemId]/projects/{[projectUrn]-d8e776f1e64e4ba8.js → [projectUrn]-04cfe2cfba7b7cd8.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog/[systemId]/{projects-75b9629b0d9cdf96.js → projects-5f2d7b24804f861f.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog/[systemId]/resources/{[resourceUrn]-470da05db63767cd.js → [resourceUrn]-8eb581024bc0172f.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/data-catalog/[systemId]/{resources-6c3714ee97a718c1.js → resources-de704de849960f01.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/{data-catalog-6984c033b8fe3a13.js → data-catalog-30108b00ac769fc3.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center/[monitorId]/{[systemId]-2f0a33ef9ba1f1da.js → [systemId]-e1ba213fb666b3f4.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center/{[monitorId]-e9d4f25b20ff6781.js → [monitorId]-6d133580045abdda.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/{action-center-9c428d3ef0985915.js → action-center-9a81d42a474e1e48.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/detection/{[resourceUrn]-c3a97e6721ca0abe.js → [resourceUrn]-8f736b078e9842da.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/{detection-a0a7de552ef71f5b.js → detection-eb814e3c22807871.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/discovery/{[resourceUrn]-109754fec0755339.js → [resourceUrn]-6875b7783fcfda2f.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/{discovery-88654783b06b3b21.js → discovery-172dbd7740e212ca.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/{datamap-89136e6800dc9369.js → datamap-c7390e046b2e2b7f.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/dataset/[datasetId]/[collectionName]/{[...subfieldNames]-8f58192dcb54883d.js → [...subfieldNames]-dfd71c1e9c458b89.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/dataset/[datasetId]/{[collectionName]-dcb4ab380a77aa1e.js → [collectionName]-7cdc42ec5493b83d.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/dataset/{[datasetId]-6f16d43071fb9c11.js → [datasetId]-e12b11ba15bc3fc1.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/dataset/{new-97f06e21580f1f6a.js → new-e32fccc4ca520d2b.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/{dataset-674bb3940f088ecc.js → dataset-7c59a6abf6ba6207.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/datastore-connection/{[id]-6f77d8647fca71e0.js → [id]-927b7e476c4b47d0.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/datastore-connection/{new-821dd1269834cfa2.js → new-cbe100d50df34285.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/{datastore-connection-23e4caf79faa8106.js → datastore-connection-cce20440b177050b.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/{index-23eb64eed81dcb69.js → index-6cd8708106331b8d.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/integrations/{[id]-3a4cd3fe9094fba3.js → [id]-4c3c413a2668df53.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/{integrations-57e618d7b16ac69a.js → integrations-95402b5001c07ef2.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/messaging/{[id]-c9a323eb6a929476.js → [id]-3c6dc2f6e6bae960.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/messaging/{add-template-b9bb09e46921a590.js → add-template-4a6d4023a7791be8.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/{messaging-82c631a12b5a008c.js → messaging-76b204c9b98d656f.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/poc/{table-migration-38360083348c3d6c.js → table-migration-48500551fd6a7602.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/{[id]-0d0bb9eb004a3336.js → [id]-0f25a76dd18c5e20.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/{messaging-f9320a58f489f5b7.js → messaging-ad6ad3e5bd72765d.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/{storage-d0cfa8aeddd43a40.js → storage-6032d82f0fc2893d.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/{configure-72ca94ec5ed85733.js → configure-d83e5bd52a638234.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/{privacy-requests-5a5edc8a4aa7c30a.js → privacy-requests-baf31c3e4b081046.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/properties/{[id]-5ec775c4904fdbfe.js → [id]-e784c05d056b2371.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/properties/{add-property-a6812c0916f2949e.js → add-property-0a7a2db148a7561a.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/settings/about/{alpha-3e72e9f91991c119.js → alpha-a82f3df840d5c1b5.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/settings/{about-6aab092f4871cecb.js → about-d06fb16487705b9d.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/settings/{consent-be47008304106395.js → consent-93a978443bf299db.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/settings/{custom-fields-ae1b57589da7b175.js → custom-fields-9ecb803099082bf4.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/settings/{domain-records-23a6d7a921150188.js → domain-records-16fdd91a81074dd1.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/settings/{domains-2a9e8859ab4d9de6.js → domains-4cdd6001e7cb9aee.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/settings/{email-templates-4f9f0fdf9925ae90.js → email-templates-1914de830ce5cfc4.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/settings/{locations-46f7af35cee4a8bb.js → locations-2e635dcd11b78224.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/settings/{organization-a596a96cb8d0aa8e.js → organization-f547f1f33c12faf3.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/settings/{regulations-6ed5fc2410e00857.js → regulations-7c02e469d8c5bd74.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/systems/configure/[id]/{test-datasets-86811e3cda277e77.js → test-datasets-20b1193ed76c56b0.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/systems/configure/{[id]-5a43f108d8047d5b.js → [id]-6e15332935f6b538.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/{systems-045a841e22e85ea8.js → systems-fbc8761ef4d55516.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/{taxonomy-1b3f2d4bcb0e164d.js → taxonomy-4d7827fc9c46b6b8.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/{user-management-2cab41659f1ee7da.js → user-management-9cec020f89544426.js} +0 -0
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
from typing import Any, List, Optional
|
|
2
|
+
|
|
3
|
+
from fideslang.models import Dataset, DatasetField
|
|
4
|
+
from loguru import logger
|
|
5
|
+
from sqlalchemy import text
|
|
6
|
+
from sqlalchemy.orm import Session
|
|
7
|
+
|
|
8
|
+
from fides.api.models.datasetconfig import DatasetConfig
|
|
9
|
+
from fides.api.models.privacy_request.privacy_request import PrivacyRequest
|
|
10
|
+
from fides.api.schemas.policy import ActionType
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# TODO: keeping this for a bit to help with development and testing
|
|
14
|
+
def get_redaction_entities_map(db: Session) -> set[str]:
|
|
15
|
+
"""
|
|
16
|
+
Create a set of hierarchical entity keys that should be redacted based on fides_meta.redact: name.
|
|
17
|
+
|
|
18
|
+
This utility function reads all enabled dataset configurations from the database
|
|
19
|
+
and builds a set of hierarchical entity keys (dataset_name, dataset_name.collection_name,
|
|
20
|
+
dataset_name.collection_name.field_name) that have fides_meta.redact set to "name".
|
|
21
|
+
|
|
22
|
+
Supports deeply nested field structures with unlimited nesting depth.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
db: Database session
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Set of hierarchical entity keys that should be redacted
|
|
29
|
+
"""
|
|
30
|
+
redaction_entities = set()
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
dataset_configs = DatasetConfig.all(db=db)
|
|
34
|
+
|
|
35
|
+
for dataset_config in dataset_configs:
|
|
36
|
+
ctl_dataset = dataset_config.ctl_dataset
|
|
37
|
+
if not ctl_dataset:
|
|
38
|
+
continue
|
|
39
|
+
|
|
40
|
+
dataset = Dataset.model_validate(dataset_config.ctl_dataset)
|
|
41
|
+
# Intentionally using the fides_key instead of name since it's always provided
|
|
42
|
+
dataset_name = dataset.fides_key
|
|
43
|
+
|
|
44
|
+
# Check dataset level
|
|
45
|
+
if dataset.fides_meta and dataset.fides_meta.redact == "name":
|
|
46
|
+
redaction_entities.add(dataset_name)
|
|
47
|
+
|
|
48
|
+
# Check collection level
|
|
49
|
+
for collection_dict in dataset.collections:
|
|
50
|
+
# Collections are stored as dictionaries in the database
|
|
51
|
+
collection_name = collection_dict.name
|
|
52
|
+
if not collection_name:
|
|
53
|
+
continue
|
|
54
|
+
|
|
55
|
+
collection_path = f"{dataset_name}.{collection_name}"
|
|
56
|
+
collection_fides_meta = collection_dict.fides_meta
|
|
57
|
+
|
|
58
|
+
if collection_fides_meta and collection_fides_meta.redact == "name":
|
|
59
|
+
redaction_entities.add(collection_path)
|
|
60
|
+
|
|
61
|
+
# Check field level (with recursive nested field support)
|
|
62
|
+
_traverse_fields_for_redaction(
|
|
63
|
+
collection_dict.fields, collection_path, redaction_entities
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
except Exception as exc:
|
|
67
|
+
# Log error but don't fail, just return empty set
|
|
68
|
+
logger.warning(f"Error extracting redaction configurations: {exc}")
|
|
69
|
+
|
|
70
|
+
return redaction_entities
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def get_redaction_entities_map_db(db: Session) -> set[str]:
|
|
74
|
+
"""
|
|
75
|
+
Create a set of hierarchical entity keys that should be redacted based on fides_meta.redact: name.
|
|
76
|
+
|
|
77
|
+
This function uses a hybrid approach:
|
|
78
|
+
1. First identifies datasets that contain ANY redaction metadata at any level
|
|
79
|
+
2. Then processes only those datasets with redaction metadata
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
db: Database session
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Set of hierarchical entity keys that should be redacted
|
|
87
|
+
"""
|
|
88
|
+
redaction_entities: set[str] = set()
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
# Step 1: Pre-filter to find datasets with ANY redaction metadata
|
|
92
|
+
# Simple existence check - no paths needed, just check if redaction exists anywhere
|
|
93
|
+
pre_filter_query = """
|
|
94
|
+
SELECT DISTINCT dc.ctl_dataset_id
|
|
95
|
+
FROM datasetconfig dc
|
|
96
|
+
JOIN ctl_datasets ds ON dc.ctl_dataset_id = ds.id
|
|
97
|
+
WHERE
|
|
98
|
+
-- Dataset-level redaction
|
|
99
|
+
ds.fides_meta->>'redact' = 'name'
|
|
100
|
+
OR
|
|
101
|
+
-- Collection-level redaction
|
|
102
|
+
EXISTS (
|
|
103
|
+
SELECT 1 FROM jsonb_array_elements(ds.collections::jsonb) AS collection
|
|
104
|
+
WHERE collection->'fides_meta'->>'redact' = 'name'
|
|
105
|
+
LIMIT 1
|
|
106
|
+
)
|
|
107
|
+
OR
|
|
108
|
+
-- Field-level redaction using jsonb_path_query
|
|
109
|
+
EXISTS (
|
|
110
|
+
SELECT 1
|
|
111
|
+
FROM jsonb_path_query(ds.collections::jsonb, '$.**.fides_meta') AS fides_meta
|
|
112
|
+
WHERE fides_meta->>'redact' = 'name'
|
|
113
|
+
LIMIT 1
|
|
114
|
+
)
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
candidate_datasets = db.execute(pre_filter_query).fetchall()
|
|
118
|
+
|
|
119
|
+
if not candidate_datasets:
|
|
120
|
+
logger.debug("No datasets found with redaction metadata")
|
|
121
|
+
return redaction_entities
|
|
122
|
+
|
|
123
|
+
logger.debug(
|
|
124
|
+
f"Pre-filtered to {len(candidate_datasets)} datasets with redaction metadata"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# Step 2: Process only the candidate datasets with targeted queries
|
|
128
|
+
# Convert to a format we can use in SQL ANY clause
|
|
129
|
+
dataset_ids = [row[0] for row in candidate_datasets]
|
|
130
|
+
|
|
131
|
+
# Query for dataset-level redactions (only on candidate datasets)
|
|
132
|
+
dataset_query = text(
|
|
133
|
+
"""
|
|
134
|
+
SELECT ds.fides_key as entity_path
|
|
135
|
+
FROM datasetconfig dc
|
|
136
|
+
JOIN ctl_datasets ds ON dc.ctl_dataset_id = ds.id
|
|
137
|
+
WHERE ds.id = ANY(:dataset_ids)
|
|
138
|
+
AND ds.fides_meta->>'redact' = 'name'
|
|
139
|
+
"""
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
dataset_results = db.execute(
|
|
143
|
+
dataset_query, {"dataset_ids": dataset_ids}
|
|
144
|
+
).fetchall()
|
|
145
|
+
for row in dataset_results:
|
|
146
|
+
redaction_entities.add(row[0])
|
|
147
|
+
|
|
148
|
+
# Query for collection-level redactions (only on candidate datasets)
|
|
149
|
+
collection_query = text(
|
|
150
|
+
"""
|
|
151
|
+
SELECT ds.fides_key || '.' || (collection->>'name') as entity_path
|
|
152
|
+
FROM datasetconfig dc
|
|
153
|
+
JOIN ctl_datasets ds ON dc.ctl_dataset_id = ds.id
|
|
154
|
+
CROSS JOIN LATERAL jsonb_array_elements(ds.collections::jsonb) AS collection
|
|
155
|
+
WHERE ds.id = ANY(:dataset_ids)
|
|
156
|
+
AND collection->'fides_meta'->>'redact' = 'name'
|
|
157
|
+
AND collection->>'name' IS NOT NULL
|
|
158
|
+
"""
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
collection_results = db.execute(
|
|
162
|
+
collection_query, {"dataset_ids": dataset_ids}
|
|
163
|
+
).fetchall()
|
|
164
|
+
for row in collection_results:
|
|
165
|
+
redaction_entities.add(row[0])
|
|
166
|
+
|
|
167
|
+
# Query for field-level redactions (including nested fields)
|
|
168
|
+
# This uses a recursive CTE to handle arbitrary nesting levels
|
|
169
|
+
field_query = text(
|
|
170
|
+
"""
|
|
171
|
+
WITH RECURSIVE field_hierarchy AS (
|
|
172
|
+
-- Base case: top-level fields in collections (only candidate datasets)
|
|
173
|
+
SELECT
|
|
174
|
+
ds.fides_key || '.' ||
|
|
175
|
+
(collection->>'name') || '.' ||
|
|
176
|
+
(field->>'name') as entity_path,
|
|
177
|
+
field->'fields' as nested_fields,
|
|
178
|
+
field->'fides_meta'->>'redact' as redact_value
|
|
179
|
+
FROM datasetconfig dc
|
|
180
|
+
JOIN ctl_datasets ds ON dc.ctl_dataset_id = ds.id
|
|
181
|
+
CROSS JOIN LATERAL jsonb_array_elements(ds.collections::jsonb) AS collection
|
|
182
|
+
CROSS JOIN LATERAL jsonb_array_elements(collection->'fields') AS field
|
|
183
|
+
WHERE ds.id = ANY(:dataset_ids)
|
|
184
|
+
AND collection->>'name' IS NOT NULL
|
|
185
|
+
AND field->>'name' IS NOT NULL
|
|
186
|
+
|
|
187
|
+
UNION ALL
|
|
188
|
+
|
|
189
|
+
-- Recursive case: nested fields
|
|
190
|
+
SELECT
|
|
191
|
+
fh.entity_path || '.' || (nested_field->>'name') as entity_path,
|
|
192
|
+
nested_field->'fields' as nested_fields,
|
|
193
|
+
nested_field->'fides_meta'->>'redact' as redact_value
|
|
194
|
+
FROM field_hierarchy fh
|
|
195
|
+
CROSS JOIN LATERAL jsonb_array_elements(fh.nested_fields) AS nested_field
|
|
196
|
+
WHERE jsonb_typeof(fh.nested_fields) = 'array'
|
|
197
|
+
AND nested_field->>'name' IS NOT NULL
|
|
198
|
+
)
|
|
199
|
+
SELECT DISTINCT entity_path
|
|
200
|
+
FROM field_hierarchy
|
|
201
|
+
WHERE redact_value = 'name'
|
|
202
|
+
"""
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
field_results = db.execute(field_query, {"dataset_ids": dataset_ids}).fetchall()
|
|
206
|
+
for row in field_results:
|
|
207
|
+
redaction_entities.add(row[0])
|
|
208
|
+
|
|
209
|
+
logger.debug(f"Found {len(redaction_entities)} entities requiring redaction")
|
|
210
|
+
|
|
211
|
+
except Exception as exc:
|
|
212
|
+
# Log error but don't fail, just return empty set
|
|
213
|
+
logger.warning(
|
|
214
|
+
f"Error extracting redaction configurations from database: {exc}"
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
return redaction_entities
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def map_privacy_request(privacy_request: PrivacyRequest) -> dict[str, Any]:
|
|
221
|
+
"""Creates a map with a subset of values from the privacy request"""
|
|
222
|
+
request_data: dict[str, Any] = {}
|
|
223
|
+
request_data["id"] = privacy_request.id
|
|
224
|
+
|
|
225
|
+
action_type: Optional[ActionType] = privacy_request.policy.get_action_type()
|
|
226
|
+
if action_type:
|
|
227
|
+
request_data["type"] = action_type.value
|
|
228
|
+
|
|
229
|
+
request_data["identity"] = {
|
|
230
|
+
key: value
|
|
231
|
+
for key, value in privacy_request.get_persisted_identity()
|
|
232
|
+
.labeled_dict(include_default_labels=True)
|
|
233
|
+
.items()
|
|
234
|
+
if value["value"] is not None
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if privacy_request.requested_at:
|
|
238
|
+
request_data["requested_at"] = privacy_request.requested_at.strftime(
|
|
239
|
+
"%m/%d/%Y %H:%M %Z"
|
|
240
|
+
)
|
|
241
|
+
return request_data
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _traverse_fields_for_redaction(
|
|
245
|
+
fields: List[DatasetField], current_path: str, redaction_entities: set[str]
|
|
246
|
+
) -> None:
|
|
247
|
+
"""
|
|
248
|
+
Recursively traverse nested fields to find redaction entities.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
fields: List of field dictionaries to traverse
|
|
252
|
+
current_path: Current hierarchical path (e.g., "dataset.collection")
|
|
253
|
+
redaction_entities: Set to add redacted field paths to
|
|
254
|
+
"""
|
|
255
|
+
for field in fields:
|
|
256
|
+
field_name = field.name
|
|
257
|
+
if not field_name:
|
|
258
|
+
continue
|
|
259
|
+
|
|
260
|
+
field_path = f"{current_path}.{field_name}"
|
|
261
|
+
field_fides_meta = field.fides_meta
|
|
262
|
+
|
|
263
|
+
if field_fides_meta and field_fides_meta.redact == "name":
|
|
264
|
+
redaction_entities.add(field_path)
|
|
265
|
+
|
|
266
|
+
# Recursively check nested fields
|
|
267
|
+
if field.fields:
|
|
268
|
+
_traverse_fields_for_redaction(field.fields, field_path, redaction_entities)
|
|
@@ -312,8 +312,14 @@ def upload_and_save_access_results( # pylint: disable=R0912
|
|
|
312
312
|
loaded_attachments = [
|
|
313
313
|
attachment
|
|
314
314
|
for attachment in privacy_request.attachments
|
|
315
|
-
if
|
|
316
|
-
|
|
315
|
+
if not any(
|
|
316
|
+
ref.reference_type
|
|
317
|
+
in [
|
|
318
|
+
AttachmentReferenceType.access_manual_webhook,
|
|
319
|
+
AttachmentReferenceType.manual_task_submission,
|
|
320
|
+
]
|
|
321
|
+
for ref in attachment.references
|
|
322
|
+
)
|
|
317
323
|
]
|
|
318
324
|
attachments = get_attachments_content(loaded_attachments)
|
|
319
325
|
# Process attachments once for both upload and storage
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
# pylint: disable=too-many-lines
|
|
3
2
|
from __future__ import annotations
|
|
4
3
|
|
|
5
4
|
import csv
|
|
@@ -18,7 +17,7 @@ from fides.api.common_exceptions import StorageUploadError
|
|
|
18
17
|
from fides.api.models.privacy_request import PrivacyRequest
|
|
19
18
|
from fides.api.schemas.storage.storage import ResponseFormat
|
|
20
19
|
from fides.api.service.privacy_request.dsr_package.dsr_report_builder import (
|
|
21
|
-
|
|
20
|
+
DSRReportBuilder,
|
|
22
21
|
)
|
|
23
22
|
from fides.api.service.storage.streaming.dsr_storage import (
|
|
24
23
|
create_dsr_report_files_generator,
|
|
@@ -34,6 +33,15 @@ from fides.api.service.storage.streaming.schemas import (
|
|
|
34
33
|
StreamingBufferConfig,
|
|
35
34
|
)
|
|
36
35
|
from fides.api.service.storage.streaming.smart_open_client import SmartOpenStorageClient
|
|
36
|
+
from fides.api.service.storage.util import (
|
|
37
|
+
convert_processed_attachments_to_attachment_processing_info,
|
|
38
|
+
determine_dataset_name_from_path,
|
|
39
|
+
extract_storage_key_from_attachment,
|
|
40
|
+
get_unique_filename,
|
|
41
|
+
process_attachments_contextually,
|
|
42
|
+
resolve_attachment_storage_path,
|
|
43
|
+
resolve_base_path_from_context,
|
|
44
|
+
)
|
|
37
45
|
|
|
38
46
|
DEFAULT_ATTACHMENT_NAME = "attachment"
|
|
39
47
|
DEFAULT_FILE_MODE = 0o644
|
|
@@ -68,6 +76,9 @@ class SmartOpenStreamingStorage:
|
|
|
68
76
|
"""
|
|
69
77
|
self.storage_client = storage_client
|
|
70
78
|
self.chunk_size = chunk_size
|
|
79
|
+
# Track used filenames per dataset to match DSR report builder behavior
|
|
80
|
+
# Maps dataset_name -> set of used filenames
|
|
81
|
+
self.used_filenames_per_dataset: dict[str, set[str]] = {}
|
|
71
82
|
|
|
72
83
|
def _parse_storage_url(self, storage_key: str) -> tuple[str, str]:
|
|
73
84
|
"""Parse storage URL and return (bucket, key).
|
|
@@ -229,138 +240,6 @@ class SmartOpenStreamingStorage:
|
|
|
229
240
|
|
|
230
241
|
return packages
|
|
231
242
|
|
|
232
|
-
def _collect_attachments(self, data: dict) -> list[dict]:
|
|
233
|
-
"""Collect all attachment data from the input data structure.
|
|
234
|
-
|
|
235
|
-
This method handles both direct attachments (under 'attachments' key) and
|
|
236
|
-
nested attachments within items. It returns raw attachment data without validation.
|
|
237
|
-
|
|
238
|
-
Args:
|
|
239
|
-
data: The data dictionary containing items with attachments
|
|
240
|
-
|
|
241
|
-
Returns:
|
|
242
|
-
List of raw attachment dictionaries with metadata
|
|
243
|
-
"""
|
|
244
|
-
all_attachments = []
|
|
245
|
-
|
|
246
|
-
for key, value in data.items():
|
|
247
|
-
|
|
248
|
-
if not isinstance(value, list) or not value:
|
|
249
|
-
continue
|
|
250
|
-
|
|
251
|
-
# Collect direct attachments if this key is "attachments"
|
|
252
|
-
if key == "attachments":
|
|
253
|
-
all_attachments.extend(self._collect_direct_attachments(value))
|
|
254
|
-
|
|
255
|
-
# Collect nested attachments from items
|
|
256
|
-
all_attachments.extend(self._collect_nested_attachments(key, value))
|
|
257
|
-
|
|
258
|
-
logger.debug(f"Collected {len(all_attachments)} raw attachments")
|
|
259
|
-
return all_attachments
|
|
260
|
-
|
|
261
|
-
def _collect_direct_attachments(self, attachments_list: list) -> list[dict]:
|
|
262
|
-
"""Collect attachments from a direct attachments list.
|
|
263
|
-
|
|
264
|
-
Args:
|
|
265
|
-
attachments_list: List of attachment dictionaries
|
|
266
|
-
|
|
267
|
-
Returns:
|
|
268
|
-
List of attachment data dictionaries with metadata
|
|
269
|
-
"""
|
|
270
|
-
direct_attachments = []
|
|
271
|
-
|
|
272
|
-
for idx, attachment in enumerate(attachments_list):
|
|
273
|
-
if not isinstance(attachment, dict):
|
|
274
|
-
continue
|
|
275
|
-
|
|
276
|
-
# Check if this looks like an attachment (has file_name or download_url)
|
|
277
|
-
if "file_name" in attachment or "download_url" in attachment:
|
|
278
|
-
# Transform download_url to internal access package URL for access package display
|
|
279
|
-
if "download_url" in attachment:
|
|
280
|
-
attachment["original_download_url"] = attachment["download_url"]
|
|
281
|
-
attachment["download_url"] = (
|
|
282
|
-
f"attachments/{attachment.get('file_name', f'attachment_{idx}')}"
|
|
283
|
-
)
|
|
284
|
-
|
|
285
|
-
direct_attachments.append(attachment)
|
|
286
|
-
|
|
287
|
-
return direct_attachments
|
|
288
|
-
|
|
289
|
-
def _collect_nested_attachments(self, key: str, items: list) -> list[dict]:
|
|
290
|
-
"""Collect attachments from nested items.
|
|
291
|
-
|
|
292
|
-
Args:
|
|
293
|
-
key: The key for the items list
|
|
294
|
-
items: List of items that may contain attachments
|
|
295
|
-
|
|
296
|
-
Returns:
|
|
297
|
-
List of attachment data dictionaries with metadata
|
|
298
|
-
"""
|
|
299
|
-
nested_attachments = []
|
|
300
|
-
|
|
301
|
-
for item in items:
|
|
302
|
-
if not isinstance(item, dict):
|
|
303
|
-
continue
|
|
304
|
-
|
|
305
|
-
# Recursively search for attachments in nested structures
|
|
306
|
-
item_attachments = self._find_attachments_recursive(item, key)
|
|
307
|
-
nested_attachments.extend(item_attachments)
|
|
308
|
-
|
|
309
|
-
return nested_attachments
|
|
310
|
-
|
|
311
|
-
def _find_attachments_recursive(
|
|
312
|
-
self, item: dict, context_key: str, path: str = ""
|
|
313
|
-
) -> list[dict]:
|
|
314
|
-
"""Recursively find attachments in nested dictionary structures.
|
|
315
|
-
|
|
316
|
-
Args:
|
|
317
|
-
item: Dictionary item to search
|
|
318
|
-
context_key: The top-level key for context
|
|
319
|
-
path: Current path in the nested structure
|
|
320
|
-
|
|
321
|
-
Returns:
|
|
322
|
-
List of attachment data dictionaries with metadata
|
|
323
|
-
"""
|
|
324
|
-
attachments = []
|
|
325
|
-
|
|
326
|
-
# Check if this item has direct attachments
|
|
327
|
-
if "attachments" in item and isinstance(item["attachments"], list):
|
|
328
|
-
for attachment in item["attachments"]:
|
|
329
|
-
if not isinstance(attachment, dict):
|
|
330
|
-
continue
|
|
331
|
-
|
|
332
|
-
# Check if this looks like an attachment
|
|
333
|
-
if "file_name" in attachment or "download_url" in attachment:
|
|
334
|
-
# Add context about which item this attachment belongs to
|
|
335
|
-
attachment_with_context = attachment.copy()
|
|
336
|
-
attachment_with_context["_context"] = {
|
|
337
|
-
"key": context_key,
|
|
338
|
-
"item_id": item.get("id", "unknown"),
|
|
339
|
-
"path": path,
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
# Transform download_url to internal access package URL
|
|
343
|
-
if "download_url" in attachment:
|
|
344
|
-
attachment_with_context["original_download_url"] = attachment[
|
|
345
|
-
"download_url"
|
|
346
|
-
]
|
|
347
|
-
attachment_with_context["download_url"] = (
|
|
348
|
-
f"attachments/{attachment.get('file_name', 'attachment')}"
|
|
349
|
-
)
|
|
350
|
-
|
|
351
|
-
attachments.append(attachment_with_context)
|
|
352
|
-
|
|
353
|
-
# Recursively search nested dictionaries
|
|
354
|
-
for key, value in item.items():
|
|
355
|
-
if isinstance(value, dict):
|
|
356
|
-
current_path = f"{path}.{key}" if path else key
|
|
357
|
-
nested_attachments = self._find_attachments_recursive(
|
|
358
|
-
value, context_key, current_path
|
|
359
|
-
)
|
|
360
|
-
attachments.extend(nested_attachments)
|
|
361
|
-
|
|
362
|
-
return attachments
|
|
363
|
-
|
|
364
243
|
def _validate_attachment(
|
|
365
244
|
self, attachment: dict
|
|
366
245
|
) -> Optional[AttachmentProcessingInfo]:
|
|
@@ -373,12 +252,8 @@ class SmartOpenStreamingStorage:
|
|
|
373
252
|
AttachmentProcessingInfo if valid, None otherwise
|
|
374
253
|
"""
|
|
375
254
|
try:
|
|
376
|
-
# Extract
|
|
377
|
-
storage_key = (
|
|
378
|
-
attachment.get("original_download_url")
|
|
379
|
-
or attachment.get("download_url")
|
|
380
|
-
or attachment.get("file_name", "")
|
|
381
|
-
)
|
|
255
|
+
# Extract storage key using shared utility
|
|
256
|
+
storage_key = extract_storage_key_from_attachment(attachment)
|
|
382
257
|
if not storage_key:
|
|
383
258
|
return None
|
|
384
259
|
|
|
@@ -390,11 +265,8 @@ class SmartOpenStreamingStorage:
|
|
|
390
265
|
content_type=attachment.get("content_type"),
|
|
391
266
|
)
|
|
392
267
|
|
|
393
|
-
#
|
|
394
|
-
base_path =
|
|
395
|
-
if attachment.get("_context"):
|
|
396
|
-
context = attachment["_context"]
|
|
397
|
-
base_path = f"{context['key']}/{context['item_id']}/attachments"
|
|
268
|
+
# Resolve base path using shared utility
|
|
269
|
+
base_path = resolve_base_path_from_context(attachment)
|
|
398
270
|
|
|
399
271
|
# Create AttachmentProcessingInfo
|
|
400
272
|
processing_info = AttachmentProcessingInfo(
|
|
@@ -403,9 +275,6 @@ class SmartOpenStreamingStorage:
|
|
|
403
275
|
item=attachment,
|
|
404
276
|
)
|
|
405
277
|
|
|
406
|
-
logger.debug(
|
|
407
|
-
f"Successfully validated attachment: {attachment_info.storage_key}"
|
|
408
|
-
)
|
|
409
278
|
return processing_info
|
|
410
279
|
|
|
411
280
|
except (ValueError, TypeError, KeyError) as e:
|
|
@@ -438,9 +307,6 @@ class SmartOpenStreamingStorage:
|
|
|
438
307
|
total_bytes += len(chunk)
|
|
439
308
|
yield chunk
|
|
440
309
|
|
|
441
|
-
logger.debug(
|
|
442
|
-
f"Completed streaming {chunk_count} chunks ({total_bytes} bytes) for {storage_key}"
|
|
443
|
-
)
|
|
444
310
|
except Exception as e:
|
|
445
311
|
logger.warning(f"Failed to stream attachment {storage_key}: {e}")
|
|
446
312
|
# Yield empty content on failure
|
|
@@ -449,10 +315,10 @@ class SmartOpenStreamingStorage:
|
|
|
449
315
|
def _collect_and_validate_attachments(
|
|
450
316
|
self, data: dict
|
|
451
317
|
) -> list[AttachmentProcessingInfo]:
|
|
452
|
-
"""Collect and validate
|
|
318
|
+
"""Collect and validate attachments using the same contextual approach as DSR report builder.
|
|
453
319
|
|
|
454
|
-
This method
|
|
455
|
-
|
|
320
|
+
This method uses the shared contextual processing logic to ensure consistency
|
|
321
|
+
between DSR report builder and streaming storage.
|
|
456
322
|
|
|
457
323
|
Args:
|
|
458
324
|
data: The data dictionary containing items with attachments
|
|
@@ -460,17 +326,64 @@ class SmartOpenStreamingStorage:
|
|
|
460
326
|
Returns:
|
|
461
327
|
List of validated AttachmentProcessingInfo objects
|
|
462
328
|
"""
|
|
463
|
-
#
|
|
464
|
-
|
|
329
|
+
# Initialize tracking structures (similar to DSR report builder)
|
|
330
|
+
used_filenames_data: set[str] = set()
|
|
331
|
+
used_filenames_attachments: set[str] = set()
|
|
332
|
+
processed_attachments: dict[tuple[str, str], str] = {}
|
|
333
|
+
|
|
334
|
+
# Use the shared contextual processing function
|
|
335
|
+
processed_attachments_list = process_attachments_contextually(
|
|
336
|
+
data,
|
|
337
|
+
used_filenames_data,
|
|
338
|
+
used_filenames_attachments,
|
|
339
|
+
processed_attachments,
|
|
340
|
+
enable_streaming=True, # Always use streaming mode for storage
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
# Convert to AttachmentProcessingInfo objects using shared utility
|
|
344
|
+
return convert_processed_attachments_to_attachment_processing_info(
|
|
345
|
+
processed_attachments_list, self._validate_attachment
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
def _collect_and_validate_attachments_from_dsr_builder(
|
|
349
|
+
self, data: dict, dsr_builder: "DSRReportBuilder"
|
|
350
|
+
) -> list[AttachmentProcessingInfo]:
|
|
351
|
+
"""Collect and validate attachments using the DSR report builder's processed attachments.
|
|
352
|
+
|
|
353
|
+
This method reuses the DSR report builder's processed attachments to avoid
|
|
354
|
+
duplicate processing and ensure consistency.
|
|
465
355
|
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
356
|
+
Args:
|
|
357
|
+
data: The data dictionary containing items with attachments
|
|
358
|
+
dsr_builder: The DSR report builder instance that has already processed attachments
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
List of validated AttachmentProcessingInfo objects
|
|
362
|
+
"""
|
|
363
|
+
# Use the DSR report builder's processed attachments
|
|
364
|
+
# Create temporary sets for compatibility with the shared function
|
|
365
|
+
used_filenames_data = set()
|
|
366
|
+
used_filenames_attachments = set()
|
|
367
|
+
|
|
368
|
+
# Populate the temporary sets from the DSR builder's per-dataset tracking
|
|
369
|
+
for dataset_name, filenames in dsr_builder.used_filenames_per_dataset.items():
|
|
370
|
+
if dataset_name == "attachments":
|
|
371
|
+
used_filenames_attachments.update(filenames)
|
|
372
|
+
else:
|
|
373
|
+
used_filenames_data.update(filenames)
|
|
374
|
+
|
|
375
|
+
processed_attachments_list = process_attachments_contextually(
|
|
376
|
+
data,
|
|
377
|
+
used_filenames_data,
|
|
378
|
+
used_filenames_attachments,
|
|
379
|
+
dsr_builder.processed_attachments,
|
|
380
|
+
enable_streaming=True, # Always use streaming mode for storage
|
|
381
|
+
)
|
|
472
382
|
|
|
473
|
-
|
|
383
|
+
# Convert to AttachmentProcessingInfo objects using shared utility
|
|
384
|
+
return convert_processed_attachments_to_attachment_processing_info(
|
|
385
|
+
processed_attachments_list, self._validate_attachment
|
|
386
|
+
)
|
|
474
387
|
|
|
475
388
|
@retry_cloud_storage_operation(
|
|
476
389
|
provider="smart_open_streaming",
|
|
@@ -514,6 +427,9 @@ class SmartOpenStreamingStorage:
|
|
|
514
427
|
if not privacy_request:
|
|
515
428
|
raise ValueError("Privacy request must be provided")
|
|
516
429
|
|
|
430
|
+
# Reset used filenames for this upload operation
|
|
431
|
+
self.used_filenames_per_dataset.clear()
|
|
432
|
+
|
|
517
433
|
# Use default buffer config if none provided
|
|
518
434
|
if buffer_config is None:
|
|
519
435
|
buffer_config = StreamingBufferConfig()
|
|
@@ -628,18 +544,22 @@ class SmartOpenStreamingStorage:
|
|
|
628
544
|
"""
|
|
629
545
|
# Generate the DSR report first
|
|
630
546
|
try:
|
|
631
|
-
|
|
547
|
+
dsr_builder = DSRReportBuilder(
|
|
632
548
|
privacy_request=privacy_request,
|
|
633
549
|
dsr_data=data,
|
|
634
|
-
|
|
550
|
+
enable_streaming=True,
|
|
551
|
+
)
|
|
552
|
+
dsr_buffer = dsr_builder.generate()
|
|
635
553
|
# Reset buffer position to ensure it can be read multiple times
|
|
636
554
|
dsr_buffer.seek(0)
|
|
637
555
|
except Exception as e:
|
|
638
556
|
logger.error(f"Failed to generate DSR report: {e}")
|
|
639
557
|
raise StorageUploadError(f"Failed to generate DSR report: {e}") from e
|
|
640
558
|
|
|
641
|
-
#
|
|
642
|
-
all_attachments = self.
|
|
559
|
+
# Use the DSR report builder's processed attachments to avoid duplicates
|
|
560
|
+
all_attachments = self._collect_and_validate_attachments_from_dsr_builder(
|
|
561
|
+
data, dsr_builder
|
|
562
|
+
)
|
|
643
563
|
|
|
644
564
|
if not all_attachments:
|
|
645
565
|
# No attachments, just upload the DSR report
|
|
@@ -733,7 +653,7 @@ class SmartOpenStreamingStorage:
|
|
|
733
653
|
batch_size: Number of attachments to process in each batch
|
|
734
654
|
resp_format: Response format (csv, json)
|
|
735
655
|
"""
|
|
736
|
-
# Collect and validate all attachments
|
|
656
|
+
# Collect and validate all attachments using shared contextual processing
|
|
737
657
|
all_attachments = self._collect_and_validate_attachments(data)
|
|
738
658
|
|
|
739
659
|
if not all_attachments:
|
|
@@ -943,7 +863,24 @@ class SmartOpenStreamingStorage:
|
|
|
943
863
|
f"Could not parse storage URL: {storage_key} - {e}"
|
|
944
864
|
) from e
|
|
945
865
|
|
|
946
|
-
|
|
866
|
+
# Generate unique filename using same logic as DSR report builder
|
|
867
|
+
original_filename = (
|
|
868
|
+
attachment_info.attachment.file_name or DEFAULT_ATTACHMENT_NAME
|
|
869
|
+
)
|
|
870
|
+
|
|
871
|
+
# Determine dataset name from base_path using shared utility
|
|
872
|
+
dataset_name = determine_dataset_name_from_path(attachment_info.base_path)
|
|
873
|
+
|
|
874
|
+
if dataset_name not in self.used_filenames_per_dataset:
|
|
875
|
+
self.used_filenames_per_dataset[dataset_name] = set()
|
|
876
|
+
|
|
877
|
+
unique_filename = get_unique_filename(
|
|
878
|
+
original_filename, self.used_filenames_per_dataset[dataset_name]
|
|
879
|
+
)
|
|
880
|
+
self.used_filenames_per_dataset[dataset_name].add(unique_filename)
|
|
881
|
+
file_path = resolve_attachment_storage_path(
|
|
882
|
+
unique_filename, attachment_info.base_path
|
|
883
|
+
)
|
|
947
884
|
|
|
948
885
|
try:
|
|
949
886
|
content_stream = self._create_attachment_content_stream(
|