ethyca-fides 2.63.1b3__py2.py3-none-any.whl → 2.63.1rc0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1rc0.dist-info}/METADATA +1 -1
- {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1rc0.dist-info}/RECORD +139 -141
- fides/_version.py +3 -3
- fides/api/alembic/migrations/versions/29e56fa1fdb3_add_monitor_tasks.py +147 -0
- fides/api/api/v1/endpoints/privacy_request_endpoints.py +4 -4
- fides/api/db/base.py +5 -3
- fides/api/main.py +0 -1
- fides/api/models/attachment.py +23 -36
- fides/api/models/connectionconfig.py +1 -1
- fides/api/models/detection_discovery/__init__.py +35 -0
- fides/api/models/detection_discovery/monitor_task.py +161 -0
- fides/api/models/field_types/__init__.py +5 -0
- fides/api/models/field_types/encrypted_large_data.py +151 -0
- fides/api/models/privacy_preference.py +1 -1
- fides/api/models/privacy_request/execution_log.py +3 -31
- fides/api/models/privacy_request/privacy_request.py +16 -3
- fides/api/models/privacy_request/request_task.py +36 -25
- fides/api/models/worker_task.py +96 -0
- fides/api/schemas/external_storage.py +22 -0
- fides/api/schemas/privacy_request.py +1 -12
- fides/api/service/connectors/base_erasure_email_connector.py +1 -1
- fides/api/service/connectors/consent_email_connector.py +2 -1
- fides/api/service/connectors/dynamic_erasure_email_connector.py +2 -1
- fides/api/service/connectors/erasure_email_connector.py +1 -1
- fides/api/service/external_data_storage.py +371 -0
- fides/api/service/privacy_request/dsr_package/dsr_report_builder.py +46 -264
- fides/api/service/privacy_request/dsr_package/templates/collection_index.html +9 -34
- fides/api/service/privacy_request/dsr_package/templates/item.html +37 -0
- fides/api/service/privacy_request/dsr_package/templates/main.css +2 -45
- fides/api/service/privacy_request/dsr_package/templates/welcome.html +8 -12
- fides/api/service/privacy_request/request_runner_service.py +139 -258
- fides/api/service/privacy_request/request_service.py +1 -1
- fides/api/service/storage/gcs.py +3 -15
- fides/api/service/storage/s3.py +14 -28
- fides/api/service/storage/util.py +7 -45
- fides/api/task/create_request_tasks.py +1 -1
- fides/api/task/execute_request_tasks.py +9 -8
- fides/api/task/graph_task.py +22 -10
- fides/api/tasks/storage.py +91 -85
- fides/api/util/cache.py +1 -77
- fides/api/util/consent_util.py +1 -1
- fides/api/util/data_size.py +102 -0
- fides/api/util/encryption/aes_gcm_encryption_util.py +271 -0
- fides/config/redis_settings.py +8 -99
- fides/service/messaging/aws_ses_service.py +1 -5
- fides/service/privacy_request/privacy_request_service.py +1 -1
- fides/ui-build/static/admin/404.html +1 -1
- fides/ui-build/static/admin/add-systems/manual.html +1 -1
- fides/ui-build/static/admin/add-systems/multiple.html +1 -1
- fides/ui-build/static/admin/add-systems.html +1 -1
- fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
- fides/ui-build/static/admin/consent/configure.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
- fides/ui-build/static/admin/consent/properties.html +1 -1
- fides/ui-build/static/admin/consent/reporting.html +1 -1
- fides/ui-build/static/admin/consent.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
- fides/ui-build/static/admin/data-catalog.html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
- fides/ui-build/static/admin/data-discovery/activity.html +1 -1
- fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/detection.html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
- fides/ui-build/static/admin/datamap.html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
- fides/ui-build/static/admin/dataset/new.html +1 -1
- fides/ui-build/static/admin/dataset.html +1 -1
- fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
- fides/ui-build/static/admin/datastore-connection/new.html +1 -1
- fides/ui-build/static/admin/datastore-connection.html +1 -1
- fides/ui-build/static/admin/index.html +1 -1
- fides/ui-build/static/admin/integrations/[id].html +1 -1
- fides/ui-build/static/admin/integrations.html +1 -1
- fides/ui-build/static/admin/lib/fides-ext-gpp.js +1 -1
- fides/ui-build/static/admin/lib/fides-headless.js +1 -1
- fides/ui-build/static/admin/lib/fides-preview.js +1 -1
- fides/ui-build/static/admin/lib/fides-tcf.js +2 -2
- fides/ui-build/static/admin/lib/fides.js +2 -2
- fides/ui-build/static/admin/login/[provider].html +1 -1
- fides/ui-build/static/admin/login.html +1 -1
- fides/ui-build/static/admin/messaging/[id].html +1 -1
- fides/ui-build/static/admin/messaging/add-template.html +1 -1
- fides/ui-build/static/admin/messaging.html +1 -1
- fides/ui-build/static/admin/poc/ant-components.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
- fides/ui-build/static/admin/poc/forms.html +1 -1
- fides/ui-build/static/admin/poc/table-migration.html +1 -1
- fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
- fides/ui-build/static/admin/privacy-requests.html +1 -1
- fides/ui-build/static/admin/properties/[id].html +1 -1
- fides/ui-build/static/admin/properties/add-property.html +1 -1
- fides/ui-build/static/admin/properties.html +1 -1
- fides/ui-build/static/admin/reporting/datamap.html +1 -1
- fides/ui-build/static/admin/settings/about/alpha.html +1 -1
- fides/ui-build/static/admin/settings/about.html +1 -1
- fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
- fides/ui-build/static/admin/settings/consent.html +1 -1
- fides/ui-build/static/admin/settings/custom-fields.html +1 -1
- fides/ui-build/static/admin/settings/domain-records.html +1 -1
- fides/ui-build/static/admin/settings/domains.html +1 -1
- fides/ui-build/static/admin/settings/email-templates.html +1 -1
- fides/ui-build/static/admin/settings/locations.html +1 -1
- fides/ui-build/static/admin/settings/organization.html +1 -1
- fides/ui-build/static/admin/settings/regulations.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id].html +1 -1
- fides/ui-build/static/admin/systems.html +1 -1
- fides/ui-build/static/admin/taxonomy.html +1 -1
- fides/ui-build/static/admin/user-management/new.html +1 -1
- fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
- fides/ui-build/static/admin/user-management.html +1 -1
- fides/api/alembic/migrations/versions/5efcdf18438e_add_manual_task_tables.py +0 -160
- fides/api/models/manual_tasks/__init__.py +0 -8
- fides/api/models/manual_tasks/manual_task.py +0 -110
- fides/api/models/manual_tasks/manual_task_log.py +0 -100
- fides/api/schemas/manual_tasks/__init__.py +0 -0
- fides/api/schemas/manual_tasks/manual_task_schemas.py +0 -79
- fides/api/schemas/manual_tasks/manual_task_status.py +0 -151
- fides/api/service/privacy_request/attachment_handling.py +0 -132
- fides/api/service/privacy_request/dsr_package/templates/attachments_index.html +0 -33
- fides/api/tasks/csv_utils.py +0 -170
- fides/api/tasks/encryption_utils.py +0 -42
- fides/service/manual_tasks/__init__.py +0 -0
- fides/service/manual_tasks/manual_task_service.py +0 -150
- {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1rc0.dist-info}/WHEEL +0 -0
- {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1rc0.dist-info}/entry_points.txt +0 -0
- {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1rc0.dist-info}/licenses/LICENSE +0 -0
- {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1rc0.dist-info}/top_level.txt +0 -0
- /fides/api/models/{detection_discovery.py → detection_discovery/core.py} +0 -0
- /fides/ui-build/static/admin/_next/static/{ycPcko8qnif6BlkQ6MN4D → PEElhfUdgE5bJjiyu5QCD}/_buildManifest.js +0 -0
- /fides/ui-build/static/admin/_next/static/{ycPcko8qnif6BlkQ6MN4D → PEElhfUdgE5bJjiyu5QCD}/_ssgManifest.js +0 -0
fides/api/service/storage/s3.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from
|
4
|
-
from typing import IO, Any, Dict, Optional, Tuple, Union
|
3
|
+
from typing import IO, Any, Dict, Tuple, Union
|
5
4
|
|
6
5
|
from boto3.s3.transfer import TransferConfig
|
7
6
|
from botocore.exceptions import ClientError, ParamValidationError
|
@@ -35,7 +34,7 @@ def maybe_get_s3_client(
|
|
35
34
|
|
36
35
|
|
37
36
|
def create_presigned_url_for_s3(
|
38
|
-
s3_client: Any, bucket_name: str, file_key: str
|
37
|
+
s3_client: Any, bucket_name: str, file_key: str
|
39
38
|
) -> AnyHttpUrlString:
|
40
39
|
"""
|
41
40
|
Generates a presigned URL to share an S3 object
|
@@ -46,16 +45,10 @@ def create_presigned_url_for_s3(
|
|
46
45
|
:return: Presigned URL as string.
|
47
46
|
"""
|
48
47
|
params = {"Bucket": bucket_name, "Key": file_key}
|
49
|
-
if ttl_seconds:
|
50
|
-
if ttl_seconds > 604800:
|
51
|
-
raise ValueError("TTL must be less than 7 days")
|
52
|
-
expires_in = ttl_seconds
|
53
|
-
else:
|
54
|
-
expires_in = CONFIG.security.subject_request_download_link_ttl_seconds
|
55
48
|
response = s3_client.generate_presigned_url(
|
56
49
|
"get_object",
|
57
50
|
Params=params,
|
58
|
-
ExpiresIn=
|
51
|
+
ExpiresIn=CONFIG.security.subject_request_download_link_ttl_seconds,
|
59
52
|
)
|
60
53
|
|
61
54
|
# The response contains the presigned URL
|
@@ -75,7 +68,7 @@ def generic_upload_to_s3( # pylint: disable=R0913
|
|
75
68
|
file_key: str,
|
76
69
|
auth_method: str,
|
77
70
|
document: IO[bytes],
|
78
|
-
size_threshold: int = LARGE_FILE_THRESHOLD, #
|
71
|
+
size_threshold: int = LARGE_FILE_THRESHOLD, # 5 MB threshold
|
79
72
|
) -> Tuple[int, AnyHttpUrlString]:
|
80
73
|
"""
|
81
74
|
Uploads file like objects to S3.
|
@@ -137,8 +130,7 @@ def generic_retrieve_from_s3(
|
|
137
130
|
file_key: str,
|
138
131
|
auth_method: str,
|
139
132
|
get_content: bool = False,
|
140
|
-
|
141
|
-
) -> Tuple[int, Union[str, IO[bytes]]]:
|
133
|
+
) -> Tuple[int, Union[str, bytes]]:
|
142
134
|
"""
|
143
135
|
Retrieves a file from S3 and returns its size and either a presigned URL or the actual content.
|
144
136
|
|
@@ -156,23 +148,17 @@ def generic_retrieve_from_s3(
|
|
156
148
|
s3_client = get_s3_client(auth_method, storage_secrets)
|
157
149
|
|
158
150
|
try:
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
file_obj = BytesIO()
|
165
|
-
s3_client.download_fileobj(
|
166
|
-
Bucket=bucket_name, Key=file_key, Fileobj=file_obj
|
167
|
-
)
|
168
|
-
file_obj.seek(0) # Reset file pointer to beginning
|
169
|
-
return int(size_response["ContentLength"]), file_obj
|
151
|
+
if get_content:
|
152
|
+
# Get the actual content
|
153
|
+
response = s3_client.get_object(Bucket=bucket_name, Key=file_key)
|
154
|
+
content = response["Body"].read()
|
155
|
+
return response["ContentLength"], content
|
170
156
|
|
171
157
|
# Get presigned URL
|
172
|
-
presigned_url = create_presigned_url_for_s3(
|
173
|
-
|
174
|
-
)
|
175
|
-
return int(
|
158
|
+
presigned_url = create_presigned_url_for_s3(s3_client, bucket_name, file_key)
|
159
|
+
# Get file size
|
160
|
+
response = s3_client.head_object(Bucket=bucket_name, Key=file_key)
|
161
|
+
return int(response["ContentLength"]), str(presigned_url)
|
176
162
|
except ClientError as e:
|
177
163
|
logger.error(f"Error retrieving file from S3: {e}")
|
178
164
|
raise e
|
@@ -3,10 +3,6 @@ from enum import Enum as EnumType
|
|
3
3
|
|
4
4
|
from loguru import logger
|
5
5
|
|
6
|
-
# This is the max file size for downloading the content of an attachment.
|
7
|
-
# This is an industry standard used by companies like Google and Microsoft.
|
8
|
-
LARGE_FILE_THRESHOLD = 25 * 1024 * 1024 # 25 MB
|
9
|
-
|
10
6
|
|
11
7
|
class AllowedFileType(EnumType):
|
12
8
|
"""
|
@@ -28,49 +24,15 @@ class AllowedFileType(EnumType):
|
|
28
24
|
|
29
25
|
LOCAL_FIDES_UPLOAD_DIRECTORY = "fides_uploads"
|
30
26
|
|
27
|
+
# Default to 10MB if not specified in environment
|
28
|
+
LARGE_FILE_THRESHOLD = 10 * 1024 * 1024 # 10 MB threshold
|
31
29
|
|
32
|
-
def get_local_filename(file_key: str) -> str:
|
33
|
-
"""Verifies that the local storage directory exists and returns the local filepath.
|
34
|
-
|
35
|
-
This extra security checks are to prevent directory traversal attacks and "complete business and technical destruction".
|
36
|
-
Thanks Claude.
|
37
|
-
|
38
|
-
Args:
|
39
|
-
file_key: The key/path for the file
|
40
|
-
|
41
|
-
Returns:
|
42
|
-
The full local filepath
|
43
|
-
|
44
|
-
Raises:
|
45
|
-
ValueError: If the file_key is invalid or would result in a path outside the upload directory
|
46
|
-
"""
|
47
|
-
# Basic validation
|
48
|
-
if not file_key:
|
49
|
-
raise ValueError("File key cannot be empty")
|
50
|
-
|
51
|
-
# Security checks before normalization
|
52
|
-
if file_key.startswith("/"):
|
53
|
-
raise ValueError("Invalid file key: cannot start with '/'")
|
54
|
-
|
55
|
-
# Normalize the path to handle any path separators consistently
|
56
|
-
# First normalize using os.path.normpath to handle any redundant separators
|
57
|
-
normalized_key = os.path.normpath(file_key)
|
58
|
-
# Then convert all separators to forward slashes for consistency
|
59
|
-
normalized_key = normalized_key.replace("\\", "/")
|
60
30
|
|
61
|
-
|
62
|
-
|
63
|
-
if not os.path.
|
64
|
-
os.
|
65
|
-
|
66
|
-
raise ValueError(
|
67
|
-
"Invalid file key: would result in path outside upload directory"
|
68
|
-
)
|
69
|
-
|
70
|
-
# Create all necessary directories
|
71
|
-
os.makedirs(os.path.dirname(final_path), exist_ok=True)
|
72
|
-
|
73
|
-
return final_path
|
31
|
+
def get_local_filename(file_key: str) -> str:
|
32
|
+
"""Verifies that the local storage directory exists and returns the local filepath"""
|
33
|
+
if not os.path.exists(LOCAL_FIDES_UPLOAD_DIRECTORY):
|
34
|
+
os.makedirs(LOCAL_FIDES_UPLOAD_DIRECTORY)
|
35
|
+
return f"{LOCAL_FIDES_UPLOAD_DIRECTORY}/{file_key}"
|
74
36
|
|
75
37
|
|
76
38
|
def get_allowed_file_type_or_raise(file_key: str) -> str:
|
@@ -29,8 +29,8 @@ from fides.api.models.privacy_request import (
|
|
29
29
|
RequestTask,
|
30
30
|
TraversalDetails,
|
31
31
|
)
|
32
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
32
33
|
from fides.api.schemas.policy import ActionType
|
33
|
-
from fides.api.schemas.privacy_request import ExecutionLogStatus
|
34
34
|
from fides.api.task.deprecated_graph_task import format_data_use_map_for_caching
|
35
35
|
from fides.api.task.execute_request_tasks import log_task_queued, queue_request_task
|
36
36
|
from fides.api.util.logger_context_utils import log_context
|
@@ -22,8 +22,9 @@ from fides.api.common_exceptions import (
|
|
22
22
|
from fides.api.graph.config import TERMINATOR_ADDRESS, CollectionAddress
|
23
23
|
from fides.api.models.connectionconfig import ConnectionConfig
|
24
24
|
from fides.api.models.privacy_request import ExecutionLog, PrivacyRequest, RequestTask
|
25
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
25
26
|
from fides.api.schemas.policy import ActionType, CurrentStep
|
26
|
-
from fides.api.schemas.privacy_request import
|
27
|
+
from fides.api.schemas.privacy_request import PrivacyRequestStatus
|
27
28
|
from fides.api.task.graph_task import (
|
28
29
|
GraphTask,
|
29
30
|
mark_current_and_downstream_nodes_as_failed,
|
@@ -145,7 +146,7 @@ def can_run_task_body(
|
|
145
146
|
if request_task.is_terminator_task:
|
146
147
|
logger.info(
|
147
148
|
"Terminator {} task reached.",
|
148
|
-
request_task.action_type
|
149
|
+
request_task.action_type,
|
149
150
|
)
|
150
151
|
return False
|
151
152
|
if request_task.is_root_task:
|
@@ -154,7 +155,7 @@ def can_run_task_body(
|
|
154
155
|
if request_task.status != ExecutionLogStatus.pending:
|
155
156
|
logger_method(request_task)(
|
156
157
|
"Skipping {} task {} with status {}.",
|
157
|
-
request_task.action_type
|
158
|
+
request_task.action_type,
|
158
159
|
request_task.collection_address,
|
159
160
|
request_task.status.value,
|
160
161
|
)
|
@@ -449,7 +450,7 @@ def log_task_complete(request_task: RequestTask) -> None:
|
|
449
450
|
"""Convenience method for logging task completion"""
|
450
451
|
logger.info(
|
451
452
|
"{} task {} is {}.",
|
452
|
-
request_task.action_type.
|
453
|
+
request_task.action_type.capitalize(),
|
453
454
|
request_task.collection_address,
|
454
455
|
request_task.status.value,
|
455
456
|
)
|
@@ -478,9 +479,9 @@ def _order_tasks_by_input_key(
|
|
478
479
|
|
479
480
|
|
480
481
|
mapping = {
|
481
|
-
ActionType.access: run_access_node,
|
482
|
-
ActionType.erasure: run_erasure_node,
|
483
|
-
ActionType.consent: run_consent_node,
|
482
|
+
ActionType.access.value: run_access_node,
|
483
|
+
ActionType.erasure.value: run_erasure_node,
|
484
|
+
ActionType.consent.value: run_consent_node,
|
484
485
|
}
|
485
486
|
|
486
487
|
|
@@ -504,7 +505,7 @@ def log_task_queued(request_task: RequestTask, location: str) -> None:
|
|
504
505
|
"""Helper for logging that tasks are queued"""
|
505
506
|
logger_method(request_task)(
|
506
507
|
"Queuing {} task {} from {}.",
|
507
|
-
request_task.action_type
|
508
|
+
request_task.action_type,
|
508
509
|
request_task.collection_address,
|
509
510
|
location,
|
510
511
|
)
|
fides/api/task/graph_task.py
CHANGED
@@ -39,8 +39,8 @@ from fides.api.models.datasetconfig import DatasetConfig
|
|
39
39
|
from fides.api.models.policy import Policy, Rule
|
40
40
|
from fides.api.models.privacy_preference import PrivacyPreferenceHistory
|
41
41
|
from fides.api.models.privacy_request import ExecutionLog, PrivacyRequest, RequestTask
|
42
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
42
43
|
from fides.api.schemas.policy import ActionType, CurrentStep
|
43
|
-
from fides.api.schemas.privacy_request import ExecutionLogStatus
|
44
44
|
from fides.api.service.connectors.base_connector import BaseConnector
|
45
45
|
from fides.api.task.consolidate_query_matches import consolidate_query_matches
|
46
46
|
from fides.api.task.filter_element_match import filter_element_match
|
@@ -503,12 +503,20 @@ class GraphTask(ABC): # pylint: disable=too-many-instance-attributes
|
|
503
503
|
self.post_process_input_data(formatted_input_data)
|
504
504
|
)
|
505
505
|
|
506
|
-
# For erasures:
|
507
|
-
|
508
|
-
|
506
|
+
# For erasures: build placeholder version incrementally to avoid holding two full
|
507
|
+
# copies of the data in memory simultaneously.
|
508
|
+
placeholder_output: List[Row] = []
|
509
|
+
for original_row in output:
|
510
|
+
# Create a deep copy of the *single* row, transform it, then append to
|
511
|
+
# the placeholder list. Peak memory at any point is one extra row rather
|
512
|
+
# than an entire dataset.
|
513
|
+
row_copy = copy.deepcopy(original_row)
|
509
514
|
filter_element_match(
|
510
|
-
|
515
|
+
row_copy,
|
516
|
+
query_paths=post_processed_node_input_data,
|
517
|
+
delete_elements=False,
|
511
518
|
)
|
519
|
+
placeholder_output.append(row_copy)
|
512
520
|
|
513
521
|
# For DSR 3.0, save data to build masking requests directly
|
514
522
|
# on the Request Task.
|
@@ -519,11 +527,14 @@ class GraphTask(ABC): # pylint: disable=too-many-instance-attributes
|
|
519
527
|
# TODO Remove when we stop support for DSR 2.0
|
520
528
|
# Save data to build masking requests for DSR 2.0 in Redis.
|
521
529
|
# Results saved with matching array elements preserved
|
522
|
-
|
523
|
-
|
524
|
-
|
530
|
+
if not CONFIG.execution.use_dsr_3_0:
|
531
|
+
self.resources.cache_results_with_placeholders(
|
532
|
+
f"access_request__{self.key}", placeholder_output
|
533
|
+
)
|
525
534
|
|
526
|
-
# For access request results,
|
535
|
+
# For access request results, mutate rows in-place to remove non-matching
|
536
|
+
# array elements. We already iterated over `output` above, so reuse the same
|
537
|
+
# loop structure to keep cache locality.
|
527
538
|
for row in output:
|
528
539
|
logger.info(
|
529
540
|
"Filtering row in {} for matching array elements.",
|
@@ -537,7 +548,8 @@ class GraphTask(ABC): # pylint: disable=too-many-instance-attributes
|
|
537
548
|
|
538
549
|
# TODO Remove when we stop support for DSR 2.0
|
539
550
|
# Saves intermediate access results for DSR 2.0 in Redis
|
540
|
-
|
551
|
+
if not CONFIG.execution.use_dsr_3_0:
|
552
|
+
self.resources.cache_object(f"access_request__{self.key}", output)
|
541
553
|
|
542
554
|
# Return filtered rows with non-matched array data removed.
|
543
555
|
return output
|
fides/api/tasks/storage.py
CHANGED
@@ -1,20 +1,23 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import json
|
4
|
+
import secrets
|
4
5
|
import zipfile
|
5
6
|
from io import BytesIO
|
6
|
-
from typing import TYPE_CHECKING, Any, Optional
|
7
|
+
from typing import TYPE_CHECKING, Any, Dict, Optional, Union
|
7
8
|
|
9
|
+
import pandas as pd
|
8
10
|
from botocore.exceptions import ClientError, ParamValidationError
|
9
11
|
from fideslang.validation import AnyHttpUrlString
|
10
12
|
from loguru import logger
|
11
13
|
|
12
14
|
from fides.api.common_exceptions import StorageUploadError
|
15
|
+
from fides.api.cryptography.cryptographic_util import bytes_to_b64_str
|
13
16
|
from fides.api.schemas.storage.storage import ResponseFormat, StorageSecrets
|
14
17
|
from fides.api.service.privacy_request.dsr_package.dsr_report_builder import (
|
15
18
|
DsrReportBuilder,
|
16
19
|
)
|
17
|
-
from fides.api.service.storage.gcs import
|
20
|
+
from fides.api.service.storage.gcs import get_gcs_client
|
18
21
|
from fides.api.service.storage.s3 import (
|
19
22
|
create_presigned_url_for_s3,
|
20
23
|
generic_upload_to_s3,
|
@@ -23,9 +26,11 @@ from fides.api.service.storage.util import (
|
|
23
26
|
LOCAL_FIDES_UPLOAD_DIRECTORY,
|
24
27
|
get_local_filename,
|
25
28
|
)
|
26
|
-
from fides.api.tasks.csv_utils import write_csv_to_zip
|
27
|
-
from fides.api.tasks.encryption_utils import encrypt_access_request_results
|
28
29
|
from fides.api.util.aws_util import get_s3_client
|
30
|
+
from fides.api.util.cache import get_cache, get_encryption_cache_key
|
31
|
+
from fides.api.util.encryption.aes_gcm_encryption_scheme import (
|
32
|
+
encrypt_to_bytes_verify_secrets_length,
|
33
|
+
)
|
29
34
|
from fides.api.util.storage_util import StorageJSONEncoder
|
30
35
|
from fides.config import CONFIG
|
31
36
|
|
@@ -33,8 +38,33 @@ if TYPE_CHECKING:
|
|
33
38
|
from fides.api.models.privacy_request import PrivacyRequest
|
34
39
|
|
35
40
|
|
41
|
+
def encrypt_access_request_results(data: Union[str, bytes], request_id: str) -> str:
|
42
|
+
"""Encrypt data with encryption key if provided, otherwise return unencrypted data"""
|
43
|
+
cache = get_cache()
|
44
|
+
encryption_cache_key = get_encryption_cache_key(
|
45
|
+
privacy_request_id=request_id,
|
46
|
+
encryption_attr="key",
|
47
|
+
)
|
48
|
+
if isinstance(data, bytes):
|
49
|
+
data = data.decode(CONFIG.security.encoding)
|
50
|
+
|
51
|
+
encryption_key: str | None = cache.get(encryption_cache_key)
|
52
|
+
if not encryption_key:
|
53
|
+
return data
|
54
|
+
|
55
|
+
bytes_encryption_key: bytes = encryption_key.encode(
|
56
|
+
encoding=CONFIG.security.encoding
|
57
|
+
)
|
58
|
+
nonce: bytes = secrets.token_bytes(CONFIG.security.aes_gcm_nonce_length)
|
59
|
+
# b64encode the entire nonce and the encrypted message together
|
60
|
+
return bytes_to_b64_str(
|
61
|
+
nonce
|
62
|
+
+ encrypt_to_bytes_verify_secrets_length(data, bytes_encryption_key, nonce)
|
63
|
+
)
|
64
|
+
|
65
|
+
|
36
66
|
def write_to_in_memory_buffer(
|
37
|
-
resp_format: str, data:
|
67
|
+
resp_format: str, data: Dict[str, Any], privacy_request: PrivacyRequest
|
38
68
|
) -> BytesIO:
|
39
69
|
"""Write JSON/CSV data to in-memory file-like object to be passed to S3 or GCS. Encrypt data if encryption key/nonce
|
40
70
|
has been cached for the given privacy request id
|
@@ -43,62 +73,46 @@ def write_to_in_memory_buffer(
|
|
43
73
|
:param data: Dict
|
44
74
|
:param request_id: str, The privacy request id
|
45
75
|
"""
|
46
|
-
|
47
76
|
logger.debug("Writing data to in-memory buffer")
|
48
|
-
try:
|
49
|
-
if resp_format == ResponseFormat.html.value:
|
50
|
-
return DsrReportBuilder(
|
51
|
-
privacy_request=privacy_request,
|
52
|
-
dsr_data=data,
|
53
|
-
).generate()
|
54
|
-
|
55
|
-
if resp_format == ResponseFormat.json.value:
|
56
|
-
return convert_dict_to_encrypted_json(data, privacy_request.id)
|
57
|
-
|
58
|
-
if resp_format == ResponseFormat.csv.value:
|
59
|
-
zipped_csvs = BytesIO()
|
60
|
-
with zipfile.ZipFile(zipped_csvs, "w") as f:
|
61
|
-
write_csv_to_zip(f, data, privacy_request.id)
|
62
|
-
zipped_csvs.seek(0)
|
63
|
-
return zipped_csvs
|
64
|
-
except Exception as e:
|
65
|
-
logger.error(f"Error writing data to in-memory buffer: {str(e)}")
|
66
|
-
raise e
|
67
|
-
|
68
|
-
raise NotImplementedError(f"No handling for response format {resp_format}.")
|
69
|
-
|
70
|
-
|
71
|
-
def convert_dict_to_encrypted_json(
|
72
|
-
data: dict[str, Any], privacy_request_id: str
|
73
|
-
) -> BytesIO:
|
74
|
-
"""Convert data to JSON and encrypt it.
|
75
|
-
|
76
|
-
Args:
|
77
|
-
data: The data to convert and encrypt
|
78
|
-
privacy_request_id: The ID of the privacy request for encryption
|
79
77
|
|
80
|
-
|
81
|
-
BytesIO: A file-like object containing the encrypted JSON data
|
82
|
-
|
83
|
-
Raises:
|
84
|
-
Exception: If JSON conversion fails
|
85
|
-
"""
|
86
|
-
try:
|
78
|
+
if resp_format == ResponseFormat.json.value:
|
87
79
|
json_str = json.dumps(data, indent=2, default=StorageJSONEncoder().default)
|
88
80
|
return BytesIO(
|
89
|
-
encrypt_access_request_results(json_str,
|
81
|
+
encrypt_access_request_results(json_str, privacy_request.id).encode(
|
90
82
|
CONFIG.security.encoding
|
91
83
|
)
|
92
84
|
)
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
85
|
+
|
86
|
+
if resp_format == ResponseFormat.csv.value:
|
87
|
+
zipped_csvs = BytesIO()
|
88
|
+
with zipfile.ZipFile(zipped_csvs, "w") as f:
|
89
|
+
for key in data:
|
90
|
+
df = pd.json_normalize(data[key])
|
91
|
+
buffer = BytesIO()
|
92
|
+
df.to_csv(buffer, index=False, encoding=CONFIG.security.encoding)
|
93
|
+
buffer.seek(0)
|
94
|
+
f.writestr(
|
95
|
+
f"{key}.csv",
|
96
|
+
encrypt_access_request_results(
|
97
|
+
buffer.getvalue(), privacy_request.id
|
98
|
+
),
|
99
|
+
)
|
100
|
+
|
101
|
+
zipped_csvs.seek(0)
|
102
|
+
return zipped_csvs
|
103
|
+
|
104
|
+
if resp_format == ResponseFormat.html.value:
|
105
|
+
return DsrReportBuilder(
|
106
|
+
privacy_request=privacy_request,
|
107
|
+
dsr_data=data,
|
108
|
+
).generate()
|
109
|
+
|
110
|
+
raise NotImplementedError(f"No handling for response format {resp_format}.")
|
97
111
|
|
98
112
|
|
99
113
|
def upload_to_s3( # pylint: disable=R0913
|
100
|
-
storage_secrets:
|
101
|
-
data:
|
114
|
+
storage_secrets: Dict[StorageSecrets, Any],
|
115
|
+
data: Dict,
|
102
116
|
bucket_name: str,
|
103
117
|
file_key: str,
|
104
118
|
resp_format: str,
|
@@ -126,22 +140,18 @@ def upload_to_s3( # pylint: disable=R0913
|
|
126
140
|
"storage", {}
|
127
141
|
).get("aws_s3_assume_role_arn"),
|
128
142
|
)
|
129
|
-
except (ClientError, ParamValidationError) as e:
|
130
|
-
logger.error(f"Error getting s3 client: {str(e)}")
|
131
|
-
raise StorageUploadError(f"Error getting s3 client: {str(e)}")
|
132
143
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
144
|
+
# handles file chunking
|
145
|
+
try:
|
146
|
+
s3_client.upload_fileobj(
|
147
|
+
Fileobj=write_to_in_memory_buffer(resp_format, data, privacy_request),
|
148
|
+
Bucket=bucket_name,
|
149
|
+
Key=file_key,
|
150
|
+
)
|
151
|
+
except Exception as e:
|
152
|
+
logger.error("Encountered error while uploading s3 object: {}", e)
|
153
|
+
raise e
|
143
154
|
|
144
|
-
try:
|
145
155
|
presigned_url: AnyHttpUrlString = create_presigned_url_for_s3(
|
146
156
|
s3_client, bucket_name, file_key
|
147
157
|
)
|
@@ -152,11 +162,13 @@ def upload_to_s3( # pylint: disable=R0913
|
|
152
162
|
"Encountered error while uploading and generating link for s3 object: {}", e
|
153
163
|
)
|
154
164
|
raise StorageUploadError(f"Error uploading to S3: {e}")
|
165
|
+
except ParamValidationError as e:
|
166
|
+
raise StorageUploadError(f"The parameters you provided are incorrect: {e}")
|
155
167
|
|
156
168
|
|
157
169
|
def upload_to_gcs(
|
158
|
-
storage_secrets:
|
159
|
-
data:
|
170
|
+
storage_secrets: Dict,
|
171
|
+
data: Dict,
|
160
172
|
bucket_name: str,
|
161
173
|
file_key: str,
|
162
174
|
resp_format: str,
|
@@ -165,30 +177,24 @@ def upload_to_gcs(
|
|
165
177
|
) -> str:
|
166
178
|
"""Uploads access request data to a Google Cloud Storage bucket"""
|
167
179
|
logger.info("Starting Google Cloud Storage upload of {}", file_key)
|
168
|
-
content_type = {
|
169
|
-
ResponseFormat.json.value: "application/json",
|
170
|
-
ResponseFormat.csv.value: "application/zip",
|
171
|
-
ResponseFormat.html.value: "application/zip",
|
172
|
-
}
|
173
|
-
|
174
|
-
blob = get_gcs_blob(auth_method, storage_secrets, bucket_name, file_key)
|
175
|
-
in_memory_file = write_to_in_memory_buffer(resp_format, data, privacy_request)
|
176
180
|
|
177
181
|
try:
|
182
|
+
storage_client = get_gcs_client(auth_method, storage_secrets)
|
183
|
+
bucket = storage_client.bucket(bucket_name)
|
184
|
+
|
185
|
+
blob = bucket.blob(file_key)
|
186
|
+
in_memory_file = write_to_in_memory_buffer(resp_format, data, privacy_request)
|
187
|
+
content_type = {
|
188
|
+
ResponseFormat.json.value: "application/json",
|
189
|
+
ResponseFormat.csv.value: "application/zip",
|
190
|
+
ResponseFormat.html.value: "application/zip",
|
191
|
+
}
|
178
192
|
blob.upload_from_string(
|
179
193
|
in_memory_file.getvalue(), content_type=content_type[resp_format]
|
180
194
|
)
|
181
|
-
except Exception as e:
|
182
|
-
logger.error("Error uploading to GCS: {}", str(e))
|
183
|
-
logger.error(
|
184
|
-
"Encountered error while uploading and generating link for Google Cloud Storage object: {}",
|
185
|
-
e,
|
186
|
-
)
|
187
|
-
raise
|
188
195
|
|
189
|
-
|
196
|
+
logger.info("File {} uploaded to {}", file_key, blob.public_url)
|
190
197
|
|
191
|
-
try:
|
192
198
|
presigned_url = blob.generate_signed_url(
|
193
199
|
version="v4",
|
194
200
|
expiration=CONFIG.security.subject_request_download_link_ttl_seconds,
|
@@ -204,7 +210,7 @@ def upload_to_gcs(
|
|
204
210
|
|
205
211
|
|
206
212
|
def upload_to_local(
|
207
|
-
data:
|
213
|
+
data: Dict,
|
208
214
|
file_key: str,
|
209
215
|
privacy_request: PrivacyRequest,
|
210
216
|
resp_format: str = ResponseFormat.json.value,
|
fides/api/util/cache.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
import json
|
2
|
-
import os
|
3
2
|
from typing import Any, Dict, List, Optional, Union
|
4
3
|
from urllib.parse import unquote_to_bytes
|
5
4
|
|
@@ -28,7 +27,6 @@ from fides.config import CONFIG
|
|
28
27
|
RedisValue = Union[bytes, float, int, str]
|
29
28
|
|
30
29
|
_connection = None
|
31
|
-
_read_only_connection = None
|
32
30
|
|
33
31
|
|
34
32
|
class FidesopsRedis(Redis):
|
@@ -159,36 +157,6 @@ class FidesopsRedis(Redis):
|
|
159
157
|
return list_length
|
160
158
|
|
161
159
|
|
162
|
-
# FIXME: Ideally we don't want our code to be aware of the way tests are run,
|
163
|
-
# e.g that we run them in parallel with pytest-xdist. We need to find a way
|
164
|
-
# to change the pytest_configure_node hook to set the correct environment variable
|
165
|
-
# like we do for the readonly database. It wasn't working so we're using this workaround for now.
|
166
|
-
def _determine_redis_db_index(
|
167
|
-
read_only: Optional[bool] = False,
|
168
|
-
) -> int: # pragma: no cover
|
169
|
-
"""Return the Redis DB index that should be used for the current process.
|
170
|
-
|
171
|
-
Behavior:
|
172
|
-
1. Test mode:
|
173
|
-
- If running under xdist, map `gwN` → DB `N + 1` (reserve DB 0).
|
174
|
-
- If *not* running under xdist, always use DB 1.
|
175
|
-
|
176
|
-
2. Non-test mode: return the value already present in `CONFIG.redis.db_index`
|
177
|
-
"""
|
178
|
-
|
179
|
-
# 1. Test mode logic
|
180
|
-
if CONFIG.test_mode:
|
181
|
-
worker_id = os.getenv("PYTEST_XDIST_WORKER")
|
182
|
-
if worker_id and worker_id.startswith("gw"):
|
183
|
-
suffix = worker_id[2:]
|
184
|
-
if suffix.isdigit():
|
185
|
-
return int(suffix) + 1 # gw0 -> 1, gw1 -> 2, etc.
|
186
|
-
return CONFIG.redis.test_db_index
|
187
|
-
|
188
|
-
# 2. Non-test mode
|
189
|
-
return CONFIG.redis.read_only_db_index if read_only else CONFIG.redis.db_index
|
190
|
-
|
191
|
-
|
192
160
|
def get_cache(should_log: Optional[bool] = False) -> FidesopsRedis:
|
193
161
|
"""Return a singleton connection to our Redis cache"""
|
194
162
|
|
@@ -205,7 +173,7 @@ def get_cache(should_log: Optional[bool] = False) -> FidesopsRedis:
|
|
205
173
|
decode_responses=CONFIG.redis.decode_responses,
|
206
174
|
host=CONFIG.redis.host,
|
207
175
|
port=CONFIG.redis.port,
|
208
|
-
db=
|
176
|
+
db=CONFIG.redis.db_index,
|
209
177
|
username=CONFIG.redis.user,
|
210
178
|
password=CONFIG.redis.password,
|
211
179
|
ssl=CONFIG.redis.ssl,
|
@@ -234,50 +202,6 @@ def get_cache(should_log: Optional[bool] = False) -> FidesopsRedis:
|
|
234
202
|
return _connection
|
235
203
|
|
236
204
|
|
237
|
-
def get_read_only_cache() -> FidesopsRedis:
|
238
|
-
"""
|
239
|
-
Return a singleton connection to the read-only Redis cache.
|
240
|
-
If read-only is not enabled, return the regular cache.
|
241
|
-
"""
|
242
|
-
# If read-only is not enabled, return the regular cache
|
243
|
-
if not CONFIG.redis.read_only_enabled:
|
244
|
-
logger.debug(
|
245
|
-
"Read-only Redis is not enabled. Returning writeable cache connection instead."
|
246
|
-
)
|
247
|
-
return get_cache()
|
248
|
-
|
249
|
-
global _read_only_connection # pylint: disable=W0603
|
250
|
-
if _read_only_connection is None:
|
251
|
-
logger.debug("Creating new read-only Redis connection...")
|
252
|
-
_read_only_connection = FidesopsRedis( # type: ignore[call-overload]
|
253
|
-
charset=CONFIG.redis.charset,
|
254
|
-
decode_responses=CONFIG.redis.decode_responses,
|
255
|
-
host=CONFIG.redis.read_only_host,
|
256
|
-
port=CONFIG.redis.read_only_port,
|
257
|
-
db=_determine_redis_db_index(read_only=True),
|
258
|
-
username=CONFIG.redis.read_only_user,
|
259
|
-
password=CONFIG.redis.read_only_password,
|
260
|
-
ssl=CONFIG.redis.read_only_ssl,
|
261
|
-
ssl_ca_certs=CONFIG.redis.read_only_ssl_ca_certs,
|
262
|
-
ssl_cert_reqs=CONFIG.redis.read_only_ssl_cert_reqs,
|
263
|
-
)
|
264
|
-
logger.debug("New read-only Redis connection created.")
|
265
|
-
|
266
|
-
try:
|
267
|
-
connected = _read_only_connection.ping()
|
268
|
-
logger.debug("Read-only Redis connection succeeded.")
|
269
|
-
except ConnectionErrorFromRedis:
|
270
|
-
connected = False
|
271
|
-
|
272
|
-
if not connected:
|
273
|
-
logger.error(
|
274
|
-
"Unable to establish read-only Redis connection. Returning writeable cache connection instead."
|
275
|
-
)
|
276
|
-
return get_cache()
|
277
|
-
|
278
|
-
return _read_only_connection
|
279
|
-
|
280
|
-
|
281
205
|
def get_identity_cache_key(privacy_request_id: str, identity_attribute: str) -> str:
|
282
206
|
"""Return the key at which to save this PrivacyRequest's identity for the passed in attribute"""
|
283
207
|
# TODO: Remove this prefix
|
fides/api/util/consent_util.py
CHANGED
@@ -18,7 +18,7 @@ from fides.api.models.privacy_request import (
|
|
18
18
|
)
|
19
19
|
from fides.api.models.sql_models import System # type: ignore[attr-defined]
|
20
20
|
from fides.api.models.tcf_purpose_overrides import TCFPurposeOverride
|
21
|
-
from fides.api.
|
21
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
22
22
|
from fides.api.schemas.redis_cache import Identity
|
23
23
|
|
24
24
|
|