ethyca-fides 2.63.1b4__py2.py3-none-any.whl → 2.63.1rc0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ethyca_fides-2.63.1b4.dist-info → ethyca_fides-2.63.1rc0.dist-info}/METADATA +1 -1
- {ethyca_fides-2.63.1b4.dist-info → ethyca_fides-2.63.1rc0.dist-info}/RECORD +114 -126
- fides/_version.py +3 -3
- fides/api/db/base.py +0 -2
- fides/api/main.py +0 -1
- fides/api/models/attachment.py +23 -36
- fides/api/service/privacy_request/dsr_package/dsr_report_builder.py +46 -264
- fides/api/service/privacy_request/dsr_package/templates/collection_index.html +9 -34
- fides/api/service/privacy_request/dsr_package/templates/item.html +37 -0
- fides/api/service/privacy_request/dsr_package/templates/main.css +2 -45
- fides/api/service/privacy_request/dsr_package/templates/welcome.html +8 -12
- fides/api/service/privacy_request/request_runner_service.py +139 -258
- fides/api/service/storage/gcs.py +3 -15
- fides/api/service/storage/s3.py +14 -28
- fides/api/service/storage/util.py +7 -45
- fides/api/tasks/storage.py +91 -85
- fides/api/util/cache.py +1 -77
- fides/config/redis_settings.py +8 -99
- fides/service/messaging/aws_ses_service.py +1 -5
- fides/ui-build/static/admin/404.html +1 -1
- fides/ui-build/static/admin/_next/static/{X2nvWLg2_-vsCTkhSWpzw → PEElhfUdgE5bJjiyu5QCD}/_buildManifest.js +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/messaging-8cab04871908cfeb.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/storage-150d40428245ee0c.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests-20cdb2c8a03deae1.js +1 -0
- fides/ui-build/static/admin/add-systems/manual.html +1 -1
- fides/ui-build/static/admin/add-systems/multiple.html +1 -1
- fides/ui-build/static/admin/add-systems.html +1 -1
- fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
- fides/ui-build/static/admin/consent/configure.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
- fides/ui-build/static/admin/consent/properties.html +1 -1
- fides/ui-build/static/admin/consent/reporting.html +1 -1
- fides/ui-build/static/admin/consent.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
- fides/ui-build/static/admin/data-catalog.html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
- fides/ui-build/static/admin/data-discovery/activity.html +1 -1
- fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/detection.html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
- fides/ui-build/static/admin/datamap.html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
- fides/ui-build/static/admin/dataset/new.html +1 -1
- fides/ui-build/static/admin/dataset.html +1 -1
- fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
- fides/ui-build/static/admin/datastore-connection/new.html +1 -1
- fides/ui-build/static/admin/datastore-connection.html +1 -1
- fides/ui-build/static/admin/index.html +1 -1
- fides/ui-build/static/admin/integrations/[id].html +1 -1
- fides/ui-build/static/admin/integrations.html +1 -1
- fides/ui-build/static/admin/lib/fides-ext-gpp.js +1 -1
- fides/ui-build/static/admin/lib/fides-headless.js +1 -1
- fides/ui-build/static/admin/lib/fides-preview.js +1 -1
- fides/ui-build/static/admin/lib/fides-tcf.js +2 -2
- fides/ui-build/static/admin/lib/fides.js +2 -2
- fides/ui-build/static/admin/login/[provider].html +1 -1
- fides/ui-build/static/admin/login.html +1 -1
- fides/ui-build/static/admin/messaging/[id].html +1 -1
- fides/ui-build/static/admin/messaging/add-template.html +1 -1
- fides/ui-build/static/admin/messaging.html +1 -1
- fides/ui-build/static/admin/poc/ant-components.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
- fides/ui-build/static/admin/poc/forms.html +1 -1
- fides/ui-build/static/admin/poc/table-migration.html +1 -1
- fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
- fides/ui-build/static/admin/privacy-requests.html +1 -1
- fides/ui-build/static/admin/properties/[id].html +1 -1
- fides/ui-build/static/admin/properties/add-property.html +1 -1
- fides/ui-build/static/admin/properties.html +1 -1
- fides/ui-build/static/admin/reporting/datamap.html +1 -1
- fides/ui-build/static/admin/settings/about/alpha.html +1 -1
- fides/ui-build/static/admin/settings/about.html +1 -1
- fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
- fides/ui-build/static/admin/settings/consent.html +1 -1
- fides/ui-build/static/admin/settings/custom-fields.html +1 -1
- fides/ui-build/static/admin/settings/domain-records.html +1 -1
- fides/ui-build/static/admin/settings/domains.html +1 -1
- fides/ui-build/static/admin/settings/email-templates.html +1 -1
- fides/ui-build/static/admin/settings/locations.html +1 -1
- fides/ui-build/static/admin/settings/organization.html +1 -1
- fides/ui-build/static/admin/settings/regulations.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id].html +1 -1
- fides/ui-build/static/admin/systems.html +1 -1
- fides/ui-build/static/admin/taxonomy.html +1 -1
- fides/ui-build/static/admin/user-management/new.html +1 -1
- fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
- fides/ui-build/static/admin/user-management.html +1 -1
- fides/api/alembic/migrations/versions/5efcdf18438e_add_manual_task_tables.py +0 -160
- fides/api/models/manual_tasks/__init__.py +0 -8
- fides/api/models/manual_tasks/manual_task.py +0 -110
- fides/api/models/manual_tasks/manual_task_log.py +0 -100
- fides/api/schemas/manual_tasks/__init__.py +0 -0
- fides/api/schemas/manual_tasks/manual_task_schemas.py +0 -79
- fides/api/schemas/manual_tasks/manual_task_status.py +0 -151
- fides/api/service/privacy_request/attachment_handling.py +0 -132
- fides/api/service/privacy_request/dsr_package/templates/attachments_index.html +0 -33
- fides/api/tasks/csv_utils.py +0 -170
- fides/api/tasks/encryption_utils.py +0 -42
- fides/service/manual_tasks/__init__.py +0 -0
- fides/service/manual_tasks/manual_task_service.py +0 -150
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/messaging-c583a61302f02add.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/storage-20d20a8d1736f7c4.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests-0e557d79e1e43c2b.js +0 -1
- {ethyca_fides-2.63.1b4.dist-info → ethyca_fides-2.63.1rc0.dist-info}/WHEEL +0 -0
- {ethyca_fides-2.63.1b4.dist-info → ethyca_fides-2.63.1rc0.dist-info}/entry_points.txt +0 -0
- {ethyca_fides-2.63.1b4.dist-info → ethyca_fides-2.63.1rc0.dist-info}/licenses/LICENSE +0 -0
- {ethyca_fides-2.63.1b4.dist-info → ethyca_fides-2.63.1rc0.dist-info}/top_level.txt +0 -0
- /fides/ui-build/static/admin/_next/static/{X2nvWLg2_-vsCTkhSWpzw → PEElhfUdgE5bJjiyu5QCD}/_ssgManifest.js +0 -0
fides/api/service/storage/s3.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from
|
4
|
-
from typing import IO, Any, Dict, Optional, Tuple, Union
|
3
|
+
from typing import IO, Any, Dict, Tuple, Union
|
5
4
|
|
6
5
|
from boto3.s3.transfer import TransferConfig
|
7
6
|
from botocore.exceptions import ClientError, ParamValidationError
|
@@ -35,7 +34,7 @@ def maybe_get_s3_client(
|
|
35
34
|
|
36
35
|
|
37
36
|
def create_presigned_url_for_s3(
|
38
|
-
s3_client: Any, bucket_name: str, file_key: str
|
37
|
+
s3_client: Any, bucket_name: str, file_key: str
|
39
38
|
) -> AnyHttpUrlString:
|
40
39
|
"""
|
41
40
|
Generates a presigned URL to share an S3 object
|
@@ -46,16 +45,10 @@ def create_presigned_url_for_s3(
|
|
46
45
|
:return: Presigned URL as string.
|
47
46
|
"""
|
48
47
|
params = {"Bucket": bucket_name, "Key": file_key}
|
49
|
-
if ttl_seconds:
|
50
|
-
if ttl_seconds > 604800:
|
51
|
-
raise ValueError("TTL must be less than 7 days")
|
52
|
-
expires_in = ttl_seconds
|
53
|
-
else:
|
54
|
-
expires_in = CONFIG.security.subject_request_download_link_ttl_seconds
|
55
48
|
response = s3_client.generate_presigned_url(
|
56
49
|
"get_object",
|
57
50
|
Params=params,
|
58
|
-
ExpiresIn=
|
51
|
+
ExpiresIn=CONFIG.security.subject_request_download_link_ttl_seconds,
|
59
52
|
)
|
60
53
|
|
61
54
|
# The response contains the presigned URL
|
@@ -75,7 +68,7 @@ def generic_upload_to_s3( # pylint: disable=R0913
|
|
75
68
|
file_key: str,
|
76
69
|
auth_method: str,
|
77
70
|
document: IO[bytes],
|
78
|
-
size_threshold: int = LARGE_FILE_THRESHOLD, #
|
71
|
+
size_threshold: int = LARGE_FILE_THRESHOLD, # 5 MB threshold
|
79
72
|
) -> Tuple[int, AnyHttpUrlString]:
|
80
73
|
"""
|
81
74
|
Uploads file like objects to S3.
|
@@ -137,8 +130,7 @@ def generic_retrieve_from_s3(
|
|
137
130
|
file_key: str,
|
138
131
|
auth_method: str,
|
139
132
|
get_content: bool = False,
|
140
|
-
|
141
|
-
) -> Tuple[int, Union[str, IO[bytes]]]:
|
133
|
+
) -> Tuple[int, Union[str, bytes]]:
|
142
134
|
"""
|
143
135
|
Retrieves a file from S3 and returns its size and either a presigned URL or the actual content.
|
144
136
|
|
@@ -156,23 +148,17 @@ def generic_retrieve_from_s3(
|
|
156
148
|
s3_client = get_s3_client(auth_method, storage_secrets)
|
157
149
|
|
158
150
|
try:
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
file_obj = BytesIO()
|
165
|
-
s3_client.download_fileobj(
|
166
|
-
Bucket=bucket_name, Key=file_key, Fileobj=file_obj
|
167
|
-
)
|
168
|
-
file_obj.seek(0) # Reset file pointer to beginning
|
169
|
-
return int(size_response["ContentLength"]), file_obj
|
151
|
+
if get_content:
|
152
|
+
# Get the actual content
|
153
|
+
response = s3_client.get_object(Bucket=bucket_name, Key=file_key)
|
154
|
+
content = response["Body"].read()
|
155
|
+
return response["ContentLength"], content
|
170
156
|
|
171
157
|
# Get presigned URL
|
172
|
-
presigned_url = create_presigned_url_for_s3(
|
173
|
-
|
174
|
-
)
|
175
|
-
return int(
|
158
|
+
presigned_url = create_presigned_url_for_s3(s3_client, bucket_name, file_key)
|
159
|
+
# Get file size
|
160
|
+
response = s3_client.head_object(Bucket=bucket_name, Key=file_key)
|
161
|
+
return int(response["ContentLength"]), str(presigned_url)
|
176
162
|
except ClientError as e:
|
177
163
|
logger.error(f"Error retrieving file from S3: {e}")
|
178
164
|
raise e
|
@@ -3,10 +3,6 @@ from enum import Enum as EnumType
|
|
3
3
|
|
4
4
|
from loguru import logger
|
5
5
|
|
6
|
-
# This is the max file size for downloading the content of an attachment.
|
7
|
-
# This is an industry standard used by companies like Google and Microsoft.
|
8
|
-
LARGE_FILE_THRESHOLD = 25 * 1024 * 1024 # 25 MB
|
9
|
-
|
10
6
|
|
11
7
|
class AllowedFileType(EnumType):
|
12
8
|
"""
|
@@ -28,49 +24,15 @@ class AllowedFileType(EnumType):
|
|
28
24
|
|
29
25
|
LOCAL_FIDES_UPLOAD_DIRECTORY = "fides_uploads"
|
30
26
|
|
27
|
+
# Default to 10MB if not specified in environment
|
28
|
+
LARGE_FILE_THRESHOLD = 10 * 1024 * 1024 # 10 MB threshold
|
31
29
|
|
32
|
-
def get_local_filename(file_key: str) -> str:
|
33
|
-
"""Verifies that the local storage directory exists and returns the local filepath.
|
34
|
-
|
35
|
-
This extra security checks are to prevent directory traversal attacks and "complete business and technical destruction".
|
36
|
-
Thanks Claude.
|
37
|
-
|
38
|
-
Args:
|
39
|
-
file_key: The key/path for the file
|
40
|
-
|
41
|
-
Returns:
|
42
|
-
The full local filepath
|
43
|
-
|
44
|
-
Raises:
|
45
|
-
ValueError: If the file_key is invalid or would result in a path outside the upload directory
|
46
|
-
"""
|
47
|
-
# Basic validation
|
48
|
-
if not file_key:
|
49
|
-
raise ValueError("File key cannot be empty")
|
50
|
-
|
51
|
-
# Security checks before normalization
|
52
|
-
if file_key.startswith("/"):
|
53
|
-
raise ValueError("Invalid file key: cannot start with '/'")
|
54
|
-
|
55
|
-
# Normalize the path to handle any path separators consistently
|
56
|
-
# First normalize using os.path.normpath to handle any redundant separators
|
57
|
-
normalized_key = os.path.normpath(file_key)
|
58
|
-
# Then convert all separators to forward slashes for consistency
|
59
|
-
normalized_key = normalized_key.replace("\\", "/")
|
60
30
|
|
61
|
-
|
62
|
-
|
63
|
-
if not os.path.
|
64
|
-
os.
|
65
|
-
|
66
|
-
raise ValueError(
|
67
|
-
"Invalid file key: would result in path outside upload directory"
|
68
|
-
)
|
69
|
-
|
70
|
-
# Create all necessary directories
|
71
|
-
os.makedirs(os.path.dirname(final_path), exist_ok=True)
|
72
|
-
|
73
|
-
return final_path
|
31
|
+
def get_local_filename(file_key: str) -> str:
|
32
|
+
"""Verifies that the local storage directory exists and returns the local filepath"""
|
33
|
+
if not os.path.exists(LOCAL_FIDES_UPLOAD_DIRECTORY):
|
34
|
+
os.makedirs(LOCAL_FIDES_UPLOAD_DIRECTORY)
|
35
|
+
return f"{LOCAL_FIDES_UPLOAD_DIRECTORY}/{file_key}"
|
74
36
|
|
75
37
|
|
76
38
|
def get_allowed_file_type_or_raise(file_key: str) -> str:
|
fides/api/tasks/storage.py
CHANGED
@@ -1,20 +1,23 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import json
|
4
|
+
import secrets
|
4
5
|
import zipfile
|
5
6
|
from io import BytesIO
|
6
|
-
from typing import TYPE_CHECKING, Any, Optional
|
7
|
+
from typing import TYPE_CHECKING, Any, Dict, Optional, Union
|
7
8
|
|
9
|
+
import pandas as pd
|
8
10
|
from botocore.exceptions import ClientError, ParamValidationError
|
9
11
|
from fideslang.validation import AnyHttpUrlString
|
10
12
|
from loguru import logger
|
11
13
|
|
12
14
|
from fides.api.common_exceptions import StorageUploadError
|
15
|
+
from fides.api.cryptography.cryptographic_util import bytes_to_b64_str
|
13
16
|
from fides.api.schemas.storage.storage import ResponseFormat, StorageSecrets
|
14
17
|
from fides.api.service.privacy_request.dsr_package.dsr_report_builder import (
|
15
18
|
DsrReportBuilder,
|
16
19
|
)
|
17
|
-
from fides.api.service.storage.gcs import
|
20
|
+
from fides.api.service.storage.gcs import get_gcs_client
|
18
21
|
from fides.api.service.storage.s3 import (
|
19
22
|
create_presigned_url_for_s3,
|
20
23
|
generic_upload_to_s3,
|
@@ -23,9 +26,11 @@ from fides.api.service.storage.util import (
|
|
23
26
|
LOCAL_FIDES_UPLOAD_DIRECTORY,
|
24
27
|
get_local_filename,
|
25
28
|
)
|
26
|
-
from fides.api.tasks.csv_utils import write_csv_to_zip
|
27
|
-
from fides.api.tasks.encryption_utils import encrypt_access_request_results
|
28
29
|
from fides.api.util.aws_util import get_s3_client
|
30
|
+
from fides.api.util.cache import get_cache, get_encryption_cache_key
|
31
|
+
from fides.api.util.encryption.aes_gcm_encryption_scheme import (
|
32
|
+
encrypt_to_bytes_verify_secrets_length,
|
33
|
+
)
|
29
34
|
from fides.api.util.storage_util import StorageJSONEncoder
|
30
35
|
from fides.config import CONFIG
|
31
36
|
|
@@ -33,8 +38,33 @@ if TYPE_CHECKING:
|
|
33
38
|
from fides.api.models.privacy_request import PrivacyRequest
|
34
39
|
|
35
40
|
|
41
|
+
def encrypt_access_request_results(data: Union[str, bytes], request_id: str) -> str:
|
42
|
+
"""Encrypt data with encryption key if provided, otherwise return unencrypted data"""
|
43
|
+
cache = get_cache()
|
44
|
+
encryption_cache_key = get_encryption_cache_key(
|
45
|
+
privacy_request_id=request_id,
|
46
|
+
encryption_attr="key",
|
47
|
+
)
|
48
|
+
if isinstance(data, bytes):
|
49
|
+
data = data.decode(CONFIG.security.encoding)
|
50
|
+
|
51
|
+
encryption_key: str | None = cache.get(encryption_cache_key)
|
52
|
+
if not encryption_key:
|
53
|
+
return data
|
54
|
+
|
55
|
+
bytes_encryption_key: bytes = encryption_key.encode(
|
56
|
+
encoding=CONFIG.security.encoding
|
57
|
+
)
|
58
|
+
nonce: bytes = secrets.token_bytes(CONFIG.security.aes_gcm_nonce_length)
|
59
|
+
# b64encode the entire nonce and the encrypted message together
|
60
|
+
return bytes_to_b64_str(
|
61
|
+
nonce
|
62
|
+
+ encrypt_to_bytes_verify_secrets_length(data, bytes_encryption_key, nonce)
|
63
|
+
)
|
64
|
+
|
65
|
+
|
36
66
|
def write_to_in_memory_buffer(
|
37
|
-
resp_format: str, data:
|
67
|
+
resp_format: str, data: Dict[str, Any], privacy_request: PrivacyRequest
|
38
68
|
) -> BytesIO:
|
39
69
|
"""Write JSON/CSV data to in-memory file-like object to be passed to S3 or GCS. Encrypt data if encryption key/nonce
|
40
70
|
has been cached for the given privacy request id
|
@@ -43,62 +73,46 @@ def write_to_in_memory_buffer(
|
|
43
73
|
:param data: Dict
|
44
74
|
:param request_id: str, The privacy request id
|
45
75
|
"""
|
46
|
-
|
47
76
|
logger.debug("Writing data to in-memory buffer")
|
48
|
-
try:
|
49
|
-
if resp_format == ResponseFormat.html.value:
|
50
|
-
return DsrReportBuilder(
|
51
|
-
privacy_request=privacy_request,
|
52
|
-
dsr_data=data,
|
53
|
-
).generate()
|
54
|
-
|
55
|
-
if resp_format == ResponseFormat.json.value:
|
56
|
-
return convert_dict_to_encrypted_json(data, privacy_request.id)
|
57
|
-
|
58
|
-
if resp_format == ResponseFormat.csv.value:
|
59
|
-
zipped_csvs = BytesIO()
|
60
|
-
with zipfile.ZipFile(zipped_csvs, "w") as f:
|
61
|
-
write_csv_to_zip(f, data, privacy_request.id)
|
62
|
-
zipped_csvs.seek(0)
|
63
|
-
return zipped_csvs
|
64
|
-
except Exception as e:
|
65
|
-
logger.error(f"Error writing data to in-memory buffer: {str(e)}")
|
66
|
-
raise e
|
67
|
-
|
68
|
-
raise NotImplementedError(f"No handling for response format {resp_format}.")
|
69
|
-
|
70
|
-
|
71
|
-
def convert_dict_to_encrypted_json(
|
72
|
-
data: dict[str, Any], privacy_request_id: str
|
73
|
-
) -> BytesIO:
|
74
|
-
"""Convert data to JSON and encrypt it.
|
75
|
-
|
76
|
-
Args:
|
77
|
-
data: The data to convert and encrypt
|
78
|
-
privacy_request_id: The ID of the privacy request for encryption
|
79
77
|
|
80
|
-
|
81
|
-
BytesIO: A file-like object containing the encrypted JSON data
|
82
|
-
|
83
|
-
Raises:
|
84
|
-
Exception: If JSON conversion fails
|
85
|
-
"""
|
86
|
-
try:
|
78
|
+
if resp_format == ResponseFormat.json.value:
|
87
79
|
json_str = json.dumps(data, indent=2, default=StorageJSONEncoder().default)
|
88
80
|
return BytesIO(
|
89
|
-
encrypt_access_request_results(json_str,
|
81
|
+
encrypt_access_request_results(json_str, privacy_request.id).encode(
|
90
82
|
CONFIG.security.encoding
|
91
83
|
)
|
92
84
|
)
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
85
|
+
|
86
|
+
if resp_format == ResponseFormat.csv.value:
|
87
|
+
zipped_csvs = BytesIO()
|
88
|
+
with zipfile.ZipFile(zipped_csvs, "w") as f:
|
89
|
+
for key in data:
|
90
|
+
df = pd.json_normalize(data[key])
|
91
|
+
buffer = BytesIO()
|
92
|
+
df.to_csv(buffer, index=False, encoding=CONFIG.security.encoding)
|
93
|
+
buffer.seek(0)
|
94
|
+
f.writestr(
|
95
|
+
f"{key}.csv",
|
96
|
+
encrypt_access_request_results(
|
97
|
+
buffer.getvalue(), privacy_request.id
|
98
|
+
),
|
99
|
+
)
|
100
|
+
|
101
|
+
zipped_csvs.seek(0)
|
102
|
+
return zipped_csvs
|
103
|
+
|
104
|
+
if resp_format == ResponseFormat.html.value:
|
105
|
+
return DsrReportBuilder(
|
106
|
+
privacy_request=privacy_request,
|
107
|
+
dsr_data=data,
|
108
|
+
).generate()
|
109
|
+
|
110
|
+
raise NotImplementedError(f"No handling for response format {resp_format}.")
|
97
111
|
|
98
112
|
|
99
113
|
def upload_to_s3( # pylint: disable=R0913
|
100
|
-
storage_secrets:
|
101
|
-
data:
|
114
|
+
storage_secrets: Dict[StorageSecrets, Any],
|
115
|
+
data: Dict,
|
102
116
|
bucket_name: str,
|
103
117
|
file_key: str,
|
104
118
|
resp_format: str,
|
@@ -126,22 +140,18 @@ def upload_to_s3( # pylint: disable=R0913
|
|
126
140
|
"storage", {}
|
127
141
|
).get("aws_s3_assume_role_arn"),
|
128
142
|
)
|
129
|
-
except (ClientError, ParamValidationError) as e:
|
130
|
-
logger.error(f"Error getting s3 client: {str(e)}")
|
131
|
-
raise StorageUploadError(f"Error getting s3 client: {str(e)}")
|
132
143
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
144
|
+
# handles file chunking
|
145
|
+
try:
|
146
|
+
s3_client.upload_fileobj(
|
147
|
+
Fileobj=write_to_in_memory_buffer(resp_format, data, privacy_request),
|
148
|
+
Bucket=bucket_name,
|
149
|
+
Key=file_key,
|
150
|
+
)
|
151
|
+
except Exception as e:
|
152
|
+
logger.error("Encountered error while uploading s3 object: {}", e)
|
153
|
+
raise e
|
143
154
|
|
144
|
-
try:
|
145
155
|
presigned_url: AnyHttpUrlString = create_presigned_url_for_s3(
|
146
156
|
s3_client, bucket_name, file_key
|
147
157
|
)
|
@@ -152,11 +162,13 @@ def upload_to_s3( # pylint: disable=R0913
|
|
152
162
|
"Encountered error while uploading and generating link for s3 object: {}", e
|
153
163
|
)
|
154
164
|
raise StorageUploadError(f"Error uploading to S3: {e}")
|
165
|
+
except ParamValidationError as e:
|
166
|
+
raise StorageUploadError(f"The parameters you provided are incorrect: {e}")
|
155
167
|
|
156
168
|
|
157
169
|
def upload_to_gcs(
|
158
|
-
storage_secrets:
|
159
|
-
data:
|
170
|
+
storage_secrets: Dict,
|
171
|
+
data: Dict,
|
160
172
|
bucket_name: str,
|
161
173
|
file_key: str,
|
162
174
|
resp_format: str,
|
@@ -165,30 +177,24 @@ def upload_to_gcs(
|
|
165
177
|
) -> str:
|
166
178
|
"""Uploads access request data to a Google Cloud Storage bucket"""
|
167
179
|
logger.info("Starting Google Cloud Storage upload of {}", file_key)
|
168
|
-
content_type = {
|
169
|
-
ResponseFormat.json.value: "application/json",
|
170
|
-
ResponseFormat.csv.value: "application/zip",
|
171
|
-
ResponseFormat.html.value: "application/zip",
|
172
|
-
}
|
173
|
-
|
174
|
-
blob = get_gcs_blob(auth_method, storage_secrets, bucket_name, file_key)
|
175
|
-
in_memory_file = write_to_in_memory_buffer(resp_format, data, privacy_request)
|
176
180
|
|
177
181
|
try:
|
182
|
+
storage_client = get_gcs_client(auth_method, storage_secrets)
|
183
|
+
bucket = storage_client.bucket(bucket_name)
|
184
|
+
|
185
|
+
blob = bucket.blob(file_key)
|
186
|
+
in_memory_file = write_to_in_memory_buffer(resp_format, data, privacy_request)
|
187
|
+
content_type = {
|
188
|
+
ResponseFormat.json.value: "application/json",
|
189
|
+
ResponseFormat.csv.value: "application/zip",
|
190
|
+
ResponseFormat.html.value: "application/zip",
|
191
|
+
}
|
178
192
|
blob.upload_from_string(
|
179
193
|
in_memory_file.getvalue(), content_type=content_type[resp_format]
|
180
194
|
)
|
181
|
-
except Exception as e:
|
182
|
-
logger.error("Error uploading to GCS: {}", str(e))
|
183
|
-
logger.error(
|
184
|
-
"Encountered error while uploading and generating link for Google Cloud Storage object: {}",
|
185
|
-
e,
|
186
|
-
)
|
187
|
-
raise
|
188
195
|
|
189
|
-
|
196
|
+
logger.info("File {} uploaded to {}", file_key, blob.public_url)
|
190
197
|
|
191
|
-
try:
|
192
198
|
presigned_url = blob.generate_signed_url(
|
193
199
|
version="v4",
|
194
200
|
expiration=CONFIG.security.subject_request_download_link_ttl_seconds,
|
@@ -204,7 +210,7 @@ def upload_to_gcs(
|
|
204
210
|
|
205
211
|
|
206
212
|
def upload_to_local(
|
207
|
-
data:
|
213
|
+
data: Dict,
|
208
214
|
file_key: str,
|
209
215
|
privacy_request: PrivacyRequest,
|
210
216
|
resp_format: str = ResponseFormat.json.value,
|
fides/api/util/cache.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
import json
|
2
|
-
import os
|
3
2
|
from typing import Any, Dict, List, Optional, Union
|
4
3
|
from urllib.parse import unquote_to_bytes
|
5
4
|
|
@@ -28,7 +27,6 @@ from fides.config import CONFIG
|
|
28
27
|
RedisValue = Union[bytes, float, int, str]
|
29
28
|
|
30
29
|
_connection = None
|
31
|
-
_read_only_connection = None
|
32
30
|
|
33
31
|
|
34
32
|
class FidesopsRedis(Redis):
|
@@ -159,36 +157,6 @@ class FidesopsRedis(Redis):
|
|
159
157
|
return list_length
|
160
158
|
|
161
159
|
|
162
|
-
# FIXME: Ideally we don't want our code to be aware of the way tests are run,
|
163
|
-
# e.g that we run them in parallel with pytest-xdist. We need to find a way
|
164
|
-
# to change the pytest_configure_node hook to set the correct environment variable
|
165
|
-
# like we do for the readonly database. It wasn't working so we're using this workaround for now.
|
166
|
-
def _determine_redis_db_index(
|
167
|
-
read_only: Optional[bool] = False,
|
168
|
-
) -> int: # pragma: no cover
|
169
|
-
"""Return the Redis DB index that should be used for the current process.
|
170
|
-
|
171
|
-
Behavior:
|
172
|
-
1. Test mode:
|
173
|
-
- If running under xdist, map `gwN` → DB `N + 1` (reserve DB 0).
|
174
|
-
- If *not* running under xdist, always use DB 1.
|
175
|
-
|
176
|
-
2. Non-test mode: return the value already present in `CONFIG.redis.db_index`
|
177
|
-
"""
|
178
|
-
|
179
|
-
# 1. Test mode logic
|
180
|
-
if CONFIG.test_mode:
|
181
|
-
worker_id = os.getenv("PYTEST_XDIST_WORKER")
|
182
|
-
if worker_id and worker_id.startswith("gw"):
|
183
|
-
suffix = worker_id[2:]
|
184
|
-
if suffix.isdigit():
|
185
|
-
return int(suffix) + 1 # gw0 -> 1, gw1 -> 2, etc.
|
186
|
-
return CONFIG.redis.test_db_index
|
187
|
-
|
188
|
-
# 2. Non-test mode
|
189
|
-
return CONFIG.redis.read_only_db_index if read_only else CONFIG.redis.db_index
|
190
|
-
|
191
|
-
|
192
160
|
def get_cache(should_log: Optional[bool] = False) -> FidesopsRedis:
|
193
161
|
"""Return a singleton connection to our Redis cache"""
|
194
162
|
|
@@ -205,7 +173,7 @@ def get_cache(should_log: Optional[bool] = False) -> FidesopsRedis:
|
|
205
173
|
decode_responses=CONFIG.redis.decode_responses,
|
206
174
|
host=CONFIG.redis.host,
|
207
175
|
port=CONFIG.redis.port,
|
208
|
-
db=
|
176
|
+
db=CONFIG.redis.db_index,
|
209
177
|
username=CONFIG.redis.user,
|
210
178
|
password=CONFIG.redis.password,
|
211
179
|
ssl=CONFIG.redis.ssl,
|
@@ -234,50 +202,6 @@ def get_cache(should_log: Optional[bool] = False) -> FidesopsRedis:
|
|
234
202
|
return _connection
|
235
203
|
|
236
204
|
|
237
|
-
def get_read_only_cache() -> FidesopsRedis:
|
238
|
-
"""
|
239
|
-
Return a singleton connection to the read-only Redis cache.
|
240
|
-
If read-only is not enabled, return the regular cache.
|
241
|
-
"""
|
242
|
-
# If read-only is not enabled, return the regular cache
|
243
|
-
if not CONFIG.redis.read_only_enabled:
|
244
|
-
logger.debug(
|
245
|
-
"Read-only Redis is not enabled. Returning writeable cache connection instead."
|
246
|
-
)
|
247
|
-
return get_cache()
|
248
|
-
|
249
|
-
global _read_only_connection # pylint: disable=W0603
|
250
|
-
if _read_only_connection is None:
|
251
|
-
logger.debug("Creating new read-only Redis connection...")
|
252
|
-
_read_only_connection = FidesopsRedis( # type: ignore[call-overload]
|
253
|
-
charset=CONFIG.redis.charset,
|
254
|
-
decode_responses=CONFIG.redis.decode_responses,
|
255
|
-
host=CONFIG.redis.read_only_host,
|
256
|
-
port=CONFIG.redis.read_only_port,
|
257
|
-
db=_determine_redis_db_index(read_only=True),
|
258
|
-
username=CONFIG.redis.read_only_user,
|
259
|
-
password=CONFIG.redis.read_only_password,
|
260
|
-
ssl=CONFIG.redis.read_only_ssl,
|
261
|
-
ssl_ca_certs=CONFIG.redis.read_only_ssl_ca_certs,
|
262
|
-
ssl_cert_reqs=CONFIG.redis.read_only_ssl_cert_reqs,
|
263
|
-
)
|
264
|
-
logger.debug("New read-only Redis connection created.")
|
265
|
-
|
266
|
-
try:
|
267
|
-
connected = _read_only_connection.ping()
|
268
|
-
logger.debug("Read-only Redis connection succeeded.")
|
269
|
-
except ConnectionErrorFromRedis:
|
270
|
-
connected = False
|
271
|
-
|
272
|
-
if not connected:
|
273
|
-
logger.error(
|
274
|
-
"Unable to establish read-only Redis connection. Returning writeable cache connection instead."
|
275
|
-
)
|
276
|
-
return get_cache()
|
277
|
-
|
278
|
-
return _read_only_connection
|
279
|
-
|
280
|
-
|
281
205
|
def get_identity_cache_key(privacy_request_id: str, identity_attribute: str) -> str:
|
282
206
|
"""Return the key at which to save this PrivacyRequest's identity for the passed in attribute"""
|
283
207
|
# TODO: Remove this prefix
|
fides/config/redis_settings.py
CHANGED
@@ -20,10 +20,6 @@ class RedisSettings(FidesSettings):
|
|
20
20
|
default=0,
|
21
21
|
description="The application will use this index in the Redis cache to cache data.",
|
22
22
|
)
|
23
|
-
test_db_index: int = Field(
|
24
|
-
default=1,
|
25
|
-
description="The application will use this index in the Redis cache to cache data for testing.",
|
26
|
-
)
|
27
23
|
decode_responses: bool = Field(
|
28
24
|
default=True,
|
29
25
|
description="Whether or not to automatically decode the values fetched from Redis. Decodes using the `charset` configuration value.",
|
@@ -68,57 +64,14 @@ class RedisSettings(FidesSettings):
|
|
68
64
|
default="", description="The user with which to login to the Redis cache."
|
69
65
|
)
|
70
66
|
|
71
|
-
# Read-only Redis settings
|
72
|
-
read_only_enabled: bool = Field(
|
73
|
-
default=False,
|
74
|
-
description="Whether a read-only Redis cache is enabled.",
|
75
|
-
)
|
76
|
-
read_only_host: str = Field(
|
77
|
-
default="",
|
78
|
-
description="The network address for the read-only Redis cache.",
|
79
|
-
)
|
80
|
-
read_only_port: int = Field(
|
81
|
-
default=6379,
|
82
|
-
description="The port at which the read-only Redis cache will be accessible.",
|
83
|
-
)
|
84
|
-
read_only_user: str = Field(
|
85
|
-
default="",
|
86
|
-
description="The user with which to login to the read-only Redis cache.",
|
87
|
-
)
|
88
|
-
read_only_password: str = Field(
|
89
|
-
default="",
|
90
|
-
description="The password with which to login to the read-only Redis cache.",
|
91
|
-
)
|
92
|
-
read_only_db_index: int = Field(
|
93
|
-
default=0,
|
94
|
-
description="The application will use this index in the read-only Redis cache to cache data.",
|
95
|
-
)
|
96
|
-
read_only_ssl: bool = Field(
|
97
|
-
default=False,
|
98
|
-
description="Whether the application's connections to the read-only cache should be encrypted using TLS.",
|
99
|
-
)
|
100
|
-
read_only_ssl_cert_reqs: Optional[str] = Field(
|
101
|
-
default="required",
|
102
|
-
description="If using TLS encryption, set this to 'required' if you wish to enforce the read-only Redis cache to provide a certificate. Note that not all cache providers support this without setting ssl_ca_certs (e.g. AWS Elasticache).",
|
103
|
-
)
|
104
|
-
read_only_ssl_ca_certs: str = Field(
|
105
|
-
default="",
|
106
|
-
description="If using TLS encryption rooted with a custom Certificate Authority, set this to the path of the CA certificate.",
|
107
|
-
)
|
108
|
-
|
109
67
|
# This relies on other values to get built so must be last
|
110
68
|
connection_url: Optional[str] = Field(
|
111
69
|
default=None,
|
112
70
|
description="A full connection URL to the Redis cache. If not specified, this URL is automatically assembled from the host, port, password and db_index specified above.",
|
113
71
|
exclude=True,
|
114
72
|
)
|
115
|
-
read_only_connection_url: Optional[str] = Field(
|
116
|
-
default=None,
|
117
|
-
description="A full connection URL to the read-only Redis cache. If not specified, this URL is automatically assembled from the read_only_host, read_only_port, read_only_password and read_only_db_index specified above.",
|
118
|
-
exclude=True,
|
119
|
-
)
|
120
73
|
|
121
|
-
@field_validator("connection_url",
|
74
|
+
@field_validator("connection_url", mode="before")
|
122
75
|
@classmethod
|
123
76
|
def assemble_connection_url(
|
124
77
|
cls,
|
@@ -130,50 +83,22 @@ class RedisSettings(FidesSettings):
|
|
130
83
|
# If the whole URL is provided via the config, preference that
|
131
84
|
return v
|
132
85
|
|
133
|
-
is_read_only = info.field_name == "read_only_connection_url"
|
134
|
-
|
135
86
|
connection_protocol = "redis"
|
136
87
|
params_str = ""
|
137
|
-
use_tls = (
|
138
|
-
info.data.get("read_only_ssl")
|
139
|
-
if is_read_only
|
140
|
-
else info.data.get("ssl", False)
|
141
|
-
)
|
88
|
+
use_tls = info.data.get("ssl", False)
|
142
89
|
|
143
90
|
# These vars are intentionally fetched with `or ""` as the default to account
|
144
91
|
# for the edge case where `None` is explicitly set in `values` by Pydantic because
|
145
92
|
# it is not overridden by the config file or an env var
|
146
|
-
user = (
|
147
|
-
|
148
|
-
|
149
|
-
else info.data.get("user", "")
|
150
|
-
)
|
151
|
-
password = (
|
152
|
-
info.data.get("read_only_password", "")
|
153
|
-
if is_read_only
|
154
|
-
else info.data.get("password", "")
|
155
|
-
)
|
156
|
-
db_index = (
|
157
|
-
info.data.get("read_only_db_index", "")
|
158
|
-
if is_read_only
|
159
|
-
else info.data.get("db_index", "")
|
160
|
-
)
|
93
|
+
user = info.data.get("user") or ""
|
94
|
+
password = info.data.get("password") or ""
|
95
|
+
db_index = info.data.get("db_index") or ""
|
161
96
|
if use_tls:
|
162
97
|
# If using TLS update the connection URL format
|
163
98
|
connection_protocol = "rediss"
|
164
|
-
cert_reqs = (
|
165
|
-
info.data.get("read_only_ssl_cert_reqs", "none")
|
166
|
-
if is_read_only
|
167
|
-
else info.data.get("ssl_cert_reqs", "none")
|
168
|
-
)
|
99
|
+
cert_reqs = info.data.get("ssl_cert_reqs", "none")
|
169
100
|
params = {"ssl_cert_reqs": quote_plus(cert_reqs)}
|
170
|
-
|
171
|
-
ssl_ca_certs = (
|
172
|
-
info.data.get("read_only_ssl_ca_certs", "")
|
173
|
-
if is_read_only
|
174
|
-
else info.data.get("ssl_ca_certs", "")
|
175
|
-
)
|
176
|
-
if ssl_ca_certs:
|
101
|
+
if ssl_ca_certs := info.data.get("ssl_ca_certs", ""):
|
177
102
|
params["ssl_ca_certs"] = quote(ssl_ca_certs, safe="/")
|
178
103
|
params_str = "?" + urlencode(params, quote_via=quote, safe="/")
|
179
104
|
|
@@ -183,23 +108,7 @@ class RedisSettings(FidesSettings):
|
|
183
108
|
if password or user:
|
184
109
|
auth_prefix = f"{quote_plus(user)}:{quote_plus(password)}@"
|
185
110
|
|
186
|
-
|
187
|
-
info.data.get("read_only_host", "")
|
188
|
-
if is_read_only
|
189
|
-
else info.data.get("host", "")
|
190
|
-
)
|
191
|
-
port = (
|
192
|
-
info.data.get("read_only_port", "")
|
193
|
-
if is_read_only
|
194
|
-
else info.data.get("port", "")
|
195
|
-
)
|
196
|
-
|
197
|
-
# Only include database index in URL if it's not the default (0)
|
198
|
-
db_path = f"{db_index}" if db_index != 0 else ""
|
199
|
-
|
200
|
-
connection_url = (
|
201
|
-
f"{connection_protocol}://{auth_prefix}{host}:{port}/{db_path}{params_str}"
|
202
|
-
)
|
111
|
+
connection_url = f"{connection_protocol}://{auth_prefix}{info.data.get('host', '')}:{info.data.get('port', '')}/{db_index}{params_str}"
|
203
112
|
return connection_url
|
204
113
|
|
205
114
|
model_config = SettingsConfigDict(env_prefix=ENV_PREFIX)
|