ethyca-fides 2.63.0rc3__py2.py3-none-any.whl → 2.63.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/METADATA +1 -1
- {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/RECORD +129 -110
- fides/_version.py +3 -3
- fides/api/alembic/migrations/versions/29e56fa1fdb3_add_monitor_tasks.py +147 -0
- fides/api/alembic/migrations/versions/5efcdf18438e_add_manual_task_tables.py +160 -0
- fides/api/api/v1/endpoints/privacy_request_endpoints.py +4 -4
- fides/api/db/base.py +7 -1
- fides/api/models/connectionconfig.py +1 -1
- fides/api/models/detection_discovery/__init__.py +35 -0
- fides/api/models/detection_discovery/monitor_task.py +162 -0
- fides/api/models/field_types/__init__.py +5 -0
- fides/api/models/field_types/encrypted_large_data.py +151 -0
- fides/api/models/manual_tasks/__init__.py +8 -0
- fides/api/models/manual_tasks/manual_task.py +110 -0
- fides/api/models/manual_tasks/manual_task_log.py +100 -0
- fides/api/models/privacy_preference.py +1 -1
- fides/api/models/privacy_request/execution_log.py +3 -31
- fides/api/models/privacy_request/privacy_request.py +16 -3
- fides/api/models/privacy_request/request_task.py +36 -25
- fides/api/models/worker_task.py +96 -0
- fides/api/schemas/external_storage.py +22 -0
- fides/api/schemas/manual_tasks/__init__.py +0 -0
- fides/api/schemas/manual_tasks/manual_task_schemas.py +79 -0
- fides/api/schemas/manual_tasks/manual_task_status.py +151 -0
- fides/api/schemas/privacy_request.py +1 -12
- fides/api/service/connectors/base_erasure_email_connector.py +1 -1
- fides/api/service/connectors/consent_email_connector.py +2 -1
- fides/api/service/connectors/dynamic_erasure_email_connector.py +2 -1
- fides/api/service/connectors/erasure_email_connector.py +1 -1
- fides/api/service/external_data_storage.py +371 -0
- fides/api/service/privacy_request/request_runner_service.py +5 -5
- fides/api/service/privacy_request/request_service.py +1 -1
- fides/api/task/create_request_tasks.py +1 -1
- fides/api/task/execute_request_tasks.py +9 -8
- fides/api/task/graph_task.py +22 -10
- fides/api/util/consent_util.py +1 -1
- fides/api/util/data_size.py +102 -0
- fides/api/util/encryption/aes_gcm_encryption_util.py +271 -0
- fides/service/manual_tasks/__init__.py +0 -0
- fides/service/manual_tasks/manual_task_service.py +150 -0
- fides/service/privacy_request/privacy_request_service.py +1 -1
- fides/ui-build/static/admin/404.html +1 -1
- fides/ui-build/static/admin/add-systems/manual.html +1 -1
- fides/ui-build/static/admin/add-systems/multiple.html +1 -1
- fides/ui-build/static/admin/add-systems.html +1 -1
- fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
- fides/ui-build/static/admin/consent/configure.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
- fides/ui-build/static/admin/consent/properties.html +1 -1
- fides/ui-build/static/admin/consent/reporting.html +1 -1
- fides/ui-build/static/admin/consent.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
- fides/ui-build/static/admin/data-catalog.html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
- fides/ui-build/static/admin/data-discovery/activity.html +1 -1
- fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/detection.html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
- fides/ui-build/static/admin/datamap.html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
- fides/ui-build/static/admin/dataset/new.html +1 -1
- fides/ui-build/static/admin/dataset.html +1 -1
- fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
- fides/ui-build/static/admin/datastore-connection/new.html +1 -1
- fides/ui-build/static/admin/datastore-connection.html +1 -1
- fides/ui-build/static/admin/index.html +1 -1
- fides/ui-build/static/admin/integrations/[id].html +1 -1
- fides/ui-build/static/admin/integrations.html +1 -1
- fides/ui-build/static/admin/login/[provider].html +1 -1
- fides/ui-build/static/admin/login.html +1 -1
- fides/ui-build/static/admin/messaging/[id].html +1 -1
- fides/ui-build/static/admin/messaging/add-template.html +1 -1
- fides/ui-build/static/admin/messaging.html +1 -1
- fides/ui-build/static/admin/poc/ant-components.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
- fides/ui-build/static/admin/poc/forms.html +1 -1
- fides/ui-build/static/admin/poc/table-migration.html +1 -1
- fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
- fides/ui-build/static/admin/privacy-requests.html +1 -1
- fides/ui-build/static/admin/properties/[id].html +1 -1
- fides/ui-build/static/admin/properties/add-property.html +1 -1
- fides/ui-build/static/admin/properties.html +1 -1
- fides/ui-build/static/admin/reporting/datamap.html +1 -1
- fides/ui-build/static/admin/settings/about/alpha.html +1 -1
- fides/ui-build/static/admin/settings/about.html +1 -1
- fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
- fides/ui-build/static/admin/settings/consent.html +1 -1
- fides/ui-build/static/admin/settings/custom-fields.html +1 -1
- fides/ui-build/static/admin/settings/domain-records.html +1 -1
- fides/ui-build/static/admin/settings/domains.html +1 -1
- fides/ui-build/static/admin/settings/email-templates.html +1 -1
- fides/ui-build/static/admin/settings/locations.html +1 -1
- fides/ui-build/static/admin/settings/organization.html +1 -1
- fides/ui-build/static/admin/settings/regulations.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id].html +1 -1
- fides/ui-build/static/admin/systems.html +1 -1
- fides/ui-build/static/admin/taxonomy.html +1 -1
- fides/ui-build/static/admin/user-management/new.html +1 -1
- fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
- fides/ui-build/static/admin/user-management.html +1 -1
- {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/WHEEL +0 -0
- {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/entry_points.txt +0 -0
- {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/licenses/LICENSE +0 -0
- {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/top_level.txt +0 -0
- /fides/api/models/{detection_discovery.py → detection_discovery/core.py} +0 -0
- /fides/ui-build/static/admin/_next/static/{XobHpfndIH7IpV30u2vGV → SZn_Fpr_qG1COMjkdloep}/_buildManifest.js +0 -0
- /fides/ui-build/static/admin/_next/static/{XobHpfndIH7IpV30u2vGV → SZn_Fpr_qG1COMjkdloep}/_ssgManifest.js +0 -0
@@ -262,14 +262,14 @@ def upload_access_results( # pylint: disable=R0912
|
|
262
262
|
privacy_request.add_success_execution_log(
|
263
263
|
session,
|
264
264
|
connection_key=None,
|
265
|
-
dataset_name="Access
|
265
|
+
dataset_name="Access package upload",
|
266
266
|
collection_name=None,
|
267
|
-
message="Access
|
267
|
+
message="Access package upload successful for privacy request.",
|
268
268
|
action_type=ActionType.access,
|
269
269
|
)
|
270
270
|
logger.bind(
|
271
271
|
time_taken=time.time() - start_time,
|
272
|
-
).info("Access
|
272
|
+
).info("Access package upload successful for privacy request.")
|
273
273
|
except common_exceptions.StorageUploadError as exc:
|
274
274
|
logger.bind(
|
275
275
|
policy_key=policy.key,
|
@@ -279,9 +279,9 @@ def upload_access_results( # pylint: disable=R0912
|
|
279
279
|
privacy_request.add_error_execution_log(
|
280
280
|
session,
|
281
281
|
connection_key=None,
|
282
|
-
dataset_name="Access
|
282
|
+
dataset_name="Access package upload",
|
283
283
|
collection_name=None,
|
284
|
-
message="Access
|
284
|
+
message="Access package upload failed for privacy request.",
|
285
285
|
action_type=ActionType.access,
|
286
286
|
)
|
287
287
|
privacy_request.status = PrivacyRequestStatus.error
|
@@ -17,11 +17,11 @@ from fides.api.models.privacy_request import (
|
|
17
17
|
PrivacyRequest,
|
18
18
|
RequestTask,
|
19
19
|
)
|
20
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
20
21
|
from fides.api.schemas.drp_privacy_request import DrpPrivacyRequestCreate
|
21
22
|
from fides.api.schemas.masking.masking_secrets import MaskingSecretCache
|
22
23
|
from fides.api.schemas.policy import ActionType
|
23
24
|
from fides.api.schemas.privacy_request import (
|
24
|
-
ExecutionLogStatus,
|
25
25
|
PrivacyRequestResponse,
|
26
26
|
PrivacyRequestStatus,
|
27
27
|
)
|
@@ -29,8 +29,8 @@ from fides.api.models.privacy_request import (
|
|
29
29
|
RequestTask,
|
30
30
|
TraversalDetails,
|
31
31
|
)
|
32
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
32
33
|
from fides.api.schemas.policy import ActionType
|
33
|
-
from fides.api.schemas.privacy_request import ExecutionLogStatus
|
34
34
|
from fides.api.task.deprecated_graph_task import format_data_use_map_for_caching
|
35
35
|
from fides.api.task.execute_request_tasks import log_task_queued, queue_request_task
|
36
36
|
from fides.api.util.logger_context_utils import log_context
|
@@ -22,8 +22,9 @@ from fides.api.common_exceptions import (
|
|
22
22
|
from fides.api.graph.config import TERMINATOR_ADDRESS, CollectionAddress
|
23
23
|
from fides.api.models.connectionconfig import ConnectionConfig
|
24
24
|
from fides.api.models.privacy_request import ExecutionLog, PrivacyRequest, RequestTask
|
25
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
25
26
|
from fides.api.schemas.policy import ActionType, CurrentStep
|
26
|
-
from fides.api.schemas.privacy_request import
|
27
|
+
from fides.api.schemas.privacy_request import PrivacyRequestStatus
|
27
28
|
from fides.api.task.graph_task import (
|
28
29
|
GraphTask,
|
29
30
|
mark_current_and_downstream_nodes_as_failed,
|
@@ -145,7 +146,7 @@ def can_run_task_body(
|
|
145
146
|
if request_task.is_terminator_task:
|
146
147
|
logger.info(
|
147
148
|
"Terminator {} task reached.",
|
148
|
-
request_task.action_type
|
149
|
+
request_task.action_type,
|
149
150
|
)
|
150
151
|
return False
|
151
152
|
if request_task.is_root_task:
|
@@ -154,7 +155,7 @@ def can_run_task_body(
|
|
154
155
|
if request_task.status != ExecutionLogStatus.pending:
|
155
156
|
logger_method(request_task)(
|
156
157
|
"Skipping {} task {} with status {}.",
|
157
|
-
request_task.action_type
|
158
|
+
request_task.action_type,
|
158
159
|
request_task.collection_address,
|
159
160
|
request_task.status.value,
|
160
161
|
)
|
@@ -449,7 +450,7 @@ def log_task_complete(request_task: RequestTask) -> None:
|
|
449
450
|
"""Convenience method for logging task completion"""
|
450
451
|
logger.info(
|
451
452
|
"{} task {} is {}.",
|
452
|
-
request_task.action_type.
|
453
|
+
request_task.action_type.capitalize(),
|
453
454
|
request_task.collection_address,
|
454
455
|
request_task.status.value,
|
455
456
|
)
|
@@ -478,9 +479,9 @@ def _order_tasks_by_input_key(
|
|
478
479
|
|
479
480
|
|
480
481
|
mapping = {
|
481
|
-
ActionType.access: run_access_node,
|
482
|
-
ActionType.erasure: run_erasure_node,
|
483
|
-
ActionType.consent: run_consent_node,
|
482
|
+
ActionType.access.value: run_access_node,
|
483
|
+
ActionType.erasure.value: run_erasure_node,
|
484
|
+
ActionType.consent.value: run_consent_node,
|
484
485
|
}
|
485
486
|
|
486
487
|
|
@@ -504,7 +505,7 @@ def log_task_queued(request_task: RequestTask, location: str) -> None:
|
|
504
505
|
"""Helper for logging that tasks are queued"""
|
505
506
|
logger_method(request_task)(
|
506
507
|
"Queuing {} task {} from {}.",
|
507
|
-
request_task.action_type
|
508
|
+
request_task.action_type,
|
508
509
|
request_task.collection_address,
|
509
510
|
location,
|
510
511
|
)
|
fides/api/task/graph_task.py
CHANGED
@@ -39,8 +39,8 @@ from fides.api.models.datasetconfig import DatasetConfig
|
|
39
39
|
from fides.api.models.policy import Policy, Rule
|
40
40
|
from fides.api.models.privacy_preference import PrivacyPreferenceHistory
|
41
41
|
from fides.api.models.privacy_request import ExecutionLog, PrivacyRequest, RequestTask
|
42
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
42
43
|
from fides.api.schemas.policy import ActionType, CurrentStep
|
43
|
-
from fides.api.schemas.privacy_request import ExecutionLogStatus
|
44
44
|
from fides.api.service.connectors.base_connector import BaseConnector
|
45
45
|
from fides.api.task.consolidate_query_matches import consolidate_query_matches
|
46
46
|
from fides.api.task.filter_element_match import filter_element_match
|
@@ -503,12 +503,20 @@ class GraphTask(ABC): # pylint: disable=too-many-instance-attributes
|
|
503
503
|
self.post_process_input_data(formatted_input_data)
|
504
504
|
)
|
505
505
|
|
506
|
-
# For erasures:
|
507
|
-
|
508
|
-
|
506
|
+
# For erasures: build placeholder version incrementally to avoid holding two full
|
507
|
+
# copies of the data in memory simultaneously.
|
508
|
+
placeholder_output: List[Row] = []
|
509
|
+
for original_row in output:
|
510
|
+
# Create a deep copy of the *single* row, transform it, then append to
|
511
|
+
# the placeholder list. Peak memory at any point is one extra row rather
|
512
|
+
# than an entire dataset.
|
513
|
+
row_copy = copy.deepcopy(original_row)
|
509
514
|
filter_element_match(
|
510
|
-
|
515
|
+
row_copy,
|
516
|
+
query_paths=post_processed_node_input_data,
|
517
|
+
delete_elements=False,
|
511
518
|
)
|
519
|
+
placeholder_output.append(row_copy)
|
512
520
|
|
513
521
|
# For DSR 3.0, save data to build masking requests directly
|
514
522
|
# on the Request Task.
|
@@ -519,11 +527,14 @@ class GraphTask(ABC): # pylint: disable=too-many-instance-attributes
|
|
519
527
|
# TODO Remove when we stop support for DSR 2.0
|
520
528
|
# Save data to build masking requests for DSR 2.0 in Redis.
|
521
529
|
# Results saved with matching array elements preserved
|
522
|
-
|
523
|
-
|
524
|
-
|
530
|
+
if not CONFIG.execution.use_dsr_3_0:
|
531
|
+
self.resources.cache_results_with_placeholders(
|
532
|
+
f"access_request__{self.key}", placeholder_output
|
533
|
+
)
|
525
534
|
|
526
|
-
# For access request results,
|
535
|
+
# For access request results, mutate rows in-place to remove non-matching
|
536
|
+
# array elements. We already iterated over `output` above, so reuse the same
|
537
|
+
# loop structure to keep cache locality.
|
527
538
|
for row in output:
|
528
539
|
logger.info(
|
529
540
|
"Filtering row in {} for matching array elements.",
|
@@ -537,7 +548,8 @@ class GraphTask(ABC): # pylint: disable=too-many-instance-attributes
|
|
537
548
|
|
538
549
|
# TODO Remove when we stop support for DSR 2.0
|
539
550
|
# Saves intermediate access results for DSR 2.0 in Redis
|
540
|
-
|
551
|
+
if not CONFIG.execution.use_dsr_3_0:
|
552
|
+
self.resources.cache_object(f"access_request__{self.key}", output)
|
541
553
|
|
542
554
|
# Return filtered rows with non-matched array data removed.
|
543
555
|
return output
|
fides/api/util/consent_util.py
CHANGED
@@ -18,7 +18,7 @@ from fides.api.models.privacy_request import (
|
|
18
18
|
)
|
19
19
|
from fides.api.models.sql_models import System # type: ignore[attr-defined]
|
20
20
|
from fides.api.models.tcf_purpose_overrides import TCFPurposeOverride
|
21
|
-
from fides.api.
|
21
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
22
22
|
from fides.api.schemas.redis_cache import Identity
|
23
23
|
|
24
24
|
|
@@ -0,0 +1,102 @@
|
|
1
|
+
"""
|
2
|
+
Helpers for estimating the size of large collections of access data.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from __future__ import annotations
|
6
|
+
|
7
|
+
import json
|
8
|
+
import sys
|
9
|
+
from typing import List, Optional
|
10
|
+
|
11
|
+
from loguru import logger
|
12
|
+
|
13
|
+
from fides.api.util.collection_util import Row
|
14
|
+
from fides.api.util.custom_json_encoder import CustomJSONEncoder
|
15
|
+
|
16
|
+
# 640MB threshold for external storage
|
17
|
+
# We only generate an estimated size for large datasets so we want to be conservative
|
18
|
+
# and fallback to external storage even if we haven't hit the 1GB max limit.
|
19
|
+
# We also want to pad for encryption and base64 encoding.
|
20
|
+
LARGE_DATA_THRESHOLD_BYTES = 640 * 1024 * 1024 # 640MB
|
21
|
+
|
22
|
+
|
23
|
+
def calculate_data_size(data: List[Row]) -> int: # noqa: D401 – utility function
|
24
|
+
"""Return an approximate JSON-serialized size (in bytes) for a list of *Row*.
|
25
|
+
|
26
|
+
The implementation purposefully avoids serializing the entire payload when
|
27
|
+
*data* is large. For collections >1000 rows we sample a subset, measure the
|
28
|
+
encoded size, then extrapolate. This keeps memory usage bounded while still
|
29
|
+
giving us an order-of-magnitude estimate suitable for "should I stream this
|
30
|
+
out to S3?" decisions.
|
31
|
+
"""
|
32
|
+
|
33
|
+
if not data:
|
34
|
+
return 0
|
35
|
+
|
36
|
+
try:
|
37
|
+
data_count = len(data)
|
38
|
+
|
39
|
+
# For very large datasets, estimate size from a sample to avoid memory issues
|
40
|
+
if data_count > 1000:
|
41
|
+
logger.debug(
|
42
|
+
f"Calculating size for large dataset ({data_count} rows) using sampling"
|
43
|
+
)
|
44
|
+
|
45
|
+
sample_size = min(500, max(100, data_count // 20)) # 5 % capped at 500
|
46
|
+
|
47
|
+
# stratified sampling – take items spaced across the set when possible
|
48
|
+
if data_count > sample_size * 3:
|
49
|
+
step = data_count // sample_size
|
50
|
+
sample_indices = list(range(0, data_count, step))[:sample_size]
|
51
|
+
sample = [data[i] for i in sample_indices]
|
52
|
+
else:
|
53
|
+
sample = data[:sample_size]
|
54
|
+
|
55
|
+
sample_json = json.dumps(
|
56
|
+
sample, cls=CustomJSONEncoder, separators=(",", ":")
|
57
|
+
)
|
58
|
+
sample_bytes = len(sample_json.encode("utf-8"))
|
59
|
+
|
60
|
+
avg_record_size = sample_bytes / sample_size
|
61
|
+
content_size = int(avg_record_size * data_count)
|
62
|
+
|
63
|
+
# overhead: 2 bytes for [] plus a comma between every record plus 1 % slack
|
64
|
+
structure_overhead = 2 + (data_count - 1) + int(content_size * 0.01)
|
65
|
+
return content_size + structure_overhead
|
66
|
+
|
67
|
+
# small datasets – just measure
|
68
|
+
json_str = json.dumps(data, cls=CustomJSONEncoder, separators=(",", ":"))
|
69
|
+
return len(json_str.encode("utf-8"))
|
70
|
+
|
71
|
+
except (TypeError, ValueError) as exc:
|
72
|
+
logger.warning(
|
73
|
+
f"Failed to calculate JSON size, falling back to sys.getsizeof: {exc}"
|
74
|
+
)
|
75
|
+
return sys.getsizeof(data)
|
76
|
+
|
77
|
+
|
78
|
+
def is_large_data(
|
79
|
+
data: List[Row], threshold_bytes: Optional[int] = None
|
80
|
+
) -> bool: # noqa: D401
|
81
|
+
"""Return *True* if *data* is likely to exceed *threshold_bytes* when serialized."""
|
82
|
+
|
83
|
+
if not data:
|
84
|
+
return False
|
85
|
+
|
86
|
+
threshold = (
|
87
|
+
threshold_bytes if threshold_bytes is not None else LARGE_DATA_THRESHOLD_BYTES
|
88
|
+
)
|
89
|
+
size = calculate_data_size(data)
|
90
|
+
if size > threshold:
|
91
|
+
logger.info(
|
92
|
+
f"Data size ({size:,} bytes) exceeds threshold ({threshold:,} bytes) – using external storage"
|
93
|
+
)
|
94
|
+
return True
|
95
|
+
return False
|
96
|
+
|
97
|
+
|
98
|
+
__all__ = [
|
99
|
+
"calculate_data_size",
|
100
|
+
"is_large_data",
|
101
|
+
"LARGE_DATA_THRESHOLD_BYTES",
|
102
|
+
]
|
@@ -0,0 +1,271 @@
|
|
1
|
+
"""
|
2
|
+
AES GCM encryption utilities with SQLAlchemy-Utils and cryptography library implementations.
|
3
|
+
|
4
|
+
This module provides simplified encrypt/decrypt functions using two approaches:
|
5
|
+
1. SQLAlchemy-Utils AesGcmEngine (compatible with existing database encryption)
|
6
|
+
2. Cryptography library with chunked processing (better performance, standard library)
|
7
|
+
"""
|
8
|
+
|
9
|
+
import base64
|
10
|
+
import hashlib
|
11
|
+
import json
|
12
|
+
import os
|
13
|
+
from typing import Any, List, Optional, Union
|
14
|
+
|
15
|
+
from cryptography.hazmat.backends import default_backend
|
16
|
+
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
17
|
+
from loguru import logger
|
18
|
+
from sqlalchemy_utils.types.encrypted.encrypted_type import AesGcmEngine
|
19
|
+
|
20
|
+
from fides.api.util.collection_util import Row
|
21
|
+
from fides.api.util.custom_json_encoder import CustomJSONEncoder, _custom_decoder
|
22
|
+
from fides.config import CONFIG
|
23
|
+
|
24
|
+
|
25
|
+
class EncryptionError(Exception):
|
26
|
+
"""Raised when encryption/decryption operations fail"""
|
27
|
+
|
28
|
+
|
29
|
+
# SQLAlchemy-Utils Implementation (for compatibility with existing database encryption)
|
30
|
+
def encrypt_with_sqlalchemy_utils(data: List[Row]) -> bytes:
|
31
|
+
"""
|
32
|
+
Serialize and encrypt data using CustomJSONEncoder and SQLAlchemy-Utils AesGcmEngine.
|
33
|
+
|
34
|
+
This approach is compatible with existing database encryption but has lower performance.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
data: Raw data to serialize and encrypt
|
38
|
+
|
39
|
+
Returns:
|
40
|
+
Encrypted bytes
|
41
|
+
|
42
|
+
Raises:
|
43
|
+
EncryptionError: If serialization or encryption fails
|
44
|
+
"""
|
45
|
+
try:
|
46
|
+
# Serialize using CustomJSONEncoder for consistent ObjectId handling
|
47
|
+
serialized_data = json.dumps(data, cls=CustomJSONEncoder, separators=(",", ":"))
|
48
|
+
data_bytes = serialized_data.encode("utf-8")
|
49
|
+
|
50
|
+
# Encrypt using SQLAlchemy-Utils AesGcmEngine
|
51
|
+
engine = AesGcmEngine()
|
52
|
+
key = CONFIG.security.app_encryption_key
|
53
|
+
engine._update_key(key) # pylint: disable=protected-access
|
54
|
+
|
55
|
+
# AesGcmEngine expects string input
|
56
|
+
data_str = data_bytes.decode("utf-8")
|
57
|
+
encrypted_data = engine.encrypt(data_str)
|
58
|
+
encrypted_bytes = encrypted_data.encode("utf-8")
|
59
|
+
|
60
|
+
logger.debug(
|
61
|
+
f"SQLAlchemy-Utils: Encrypted {len(data_bytes)} bytes to {len(encrypted_bytes)} bytes"
|
62
|
+
)
|
63
|
+
return encrypted_bytes
|
64
|
+
|
65
|
+
except Exception as e:
|
66
|
+
logger.error(f"SQLAlchemy-Utils encryption failed: {e}")
|
67
|
+
raise EncryptionError(f"SQLAlchemy-Utils encryption failed: {str(e)}")
|
68
|
+
|
69
|
+
|
70
|
+
def decrypt_with_sqlalchemy_utils(encrypted_bytes: bytes) -> List[Row]:
|
71
|
+
"""
|
72
|
+
Decrypt and deserialize data using SQLAlchemy-Utils AesGcmEngine and _custom_decoder.
|
73
|
+
|
74
|
+
Args:
|
75
|
+
encrypted_bytes: Encrypted data bytes to decrypt
|
76
|
+
|
77
|
+
Returns:
|
78
|
+
Deserialized data
|
79
|
+
|
80
|
+
Raises:
|
81
|
+
EncryptionError: If decryption or deserialization fails
|
82
|
+
"""
|
83
|
+
try:
|
84
|
+
# Decrypt using SQLAlchemy-Utils AesGcmEngine
|
85
|
+
engine = AesGcmEngine()
|
86
|
+
key = CONFIG.security.app_encryption_key
|
87
|
+
engine._update_key(key) # pylint: disable=protected-access
|
88
|
+
|
89
|
+
# AesGcmEngine expects string input
|
90
|
+
encrypted_str = encrypted_bytes.decode("utf-8")
|
91
|
+
decrypted_data = engine.decrypt(encrypted_str)
|
92
|
+
|
93
|
+
# Deserialize using _custom_decoder for consistent ObjectId handling
|
94
|
+
data = json.loads(decrypted_data, object_hook=_custom_decoder)
|
95
|
+
|
96
|
+
logger.debug(
|
97
|
+
f"SQLAlchemy-Utils: Decrypted {len(encrypted_bytes)} bytes to {len(data)} records"
|
98
|
+
)
|
99
|
+
return data
|
100
|
+
|
101
|
+
except Exception as e:
|
102
|
+
logger.error(f"SQLAlchemy-Utils decryption failed: {e}")
|
103
|
+
raise EncryptionError(f"SQLAlchemy-Utils decryption failed: {str(e)}")
|
104
|
+
|
105
|
+
|
106
|
+
# Cryptography Library Implementation (standard, chunked processing)
|
107
|
+
def encrypt_with_cryptography(
|
108
|
+
data: Union[List[Row], Any], chunk_size: Optional[int] = None
|
109
|
+
) -> bytes:
|
110
|
+
"""
|
111
|
+
Serialize and encrypt data using the standard cryptography library with chunked processing.
|
112
|
+
|
113
|
+
This provides fast performance and memory efficiency for large datasets.
|
114
|
+
|
115
|
+
Args:
|
116
|
+
data: Raw data to serialize and encrypt
|
117
|
+
chunk_size: Size of chunks for processing (default 4MB)
|
118
|
+
|
119
|
+
Returns:
|
120
|
+
Encrypted bytes (base64-encoded string as bytes)
|
121
|
+
|
122
|
+
Raises:
|
123
|
+
EncryptionError: If serialization or encryption fails
|
124
|
+
"""
|
125
|
+
try:
|
126
|
+
# Set default chunk size
|
127
|
+
if chunk_size is None:
|
128
|
+
chunk_size = 4 * 1024 * 1024 # 4MB chunks
|
129
|
+
|
130
|
+
# Serialize using CustomJSONEncoder for consistent handling
|
131
|
+
serialized_data = json.dumps(data, cls=CustomJSONEncoder, separators=(",", ":"))
|
132
|
+
plaintext = serialized_data.encode("utf-8")
|
133
|
+
|
134
|
+
data_size_mb = len(plaintext) / (1024 * 1024)
|
135
|
+
chunk_size_mb = chunk_size / (1024 * 1024)
|
136
|
+
estimated_chunks = len(plaintext) // chunk_size + (
|
137
|
+
1 if len(plaintext) % chunk_size else 0
|
138
|
+
)
|
139
|
+
record_count = len(data) if isinstance(data, list) else "N/A"
|
140
|
+
|
141
|
+
logger.info(
|
142
|
+
f"Cryptography: Encrypting {record_count} records ({data_size_mb:.1f} MB) "
|
143
|
+
f"using {chunk_size_mb:.0f}MB chunks (~{estimated_chunks} chunks)"
|
144
|
+
)
|
145
|
+
|
146
|
+
# Use SQLAlchemy-Utils compatible key (SHA256 hash of app key)
|
147
|
+
key = _get_sqlalchemy_compatible_key()
|
148
|
+
nonce = os.urandom(12) # 96-bit nonce for AES-GCM
|
149
|
+
|
150
|
+
# Create cipher
|
151
|
+
cipher = Cipher(
|
152
|
+
algorithms.AES(key), modes.GCM(nonce), backend=default_backend()
|
153
|
+
)
|
154
|
+
encryptor = cipher.encryptor()
|
155
|
+
|
156
|
+
# Process in chunks for memory efficiency
|
157
|
+
ciphertext_chunks = []
|
158
|
+
for i in range(0, len(plaintext), chunk_size):
|
159
|
+
chunk = plaintext[i : i + chunk_size]
|
160
|
+
ciphertext_chunks.append(encryptor.update(chunk))
|
161
|
+
|
162
|
+
# Finalize and get tag
|
163
|
+
encryptor.finalize()
|
164
|
+
tag = encryptor.tag
|
165
|
+
|
166
|
+
# Combine in same format as SQLAlchemy-Utils: [nonce/iv][tag][ciphertext]
|
167
|
+
ciphertext = b"".join(ciphertext_chunks)
|
168
|
+
binary_result = nonce + tag + ciphertext
|
169
|
+
|
170
|
+
# Base64 encode to match SQLAlchemy-Utils format
|
171
|
+
base64_result = base64.b64encode(binary_result).decode("utf-8")
|
172
|
+
result_bytes = base64_result.encode("utf-8")
|
173
|
+
|
174
|
+
encrypted_size_mb = len(result_bytes) / (1024 * 1024)
|
175
|
+
logger.info(
|
176
|
+
f"Cryptography: Encrypted successfully - "
|
177
|
+
f"{len(ciphertext_chunks)} chunks, {encrypted_size_mb:.1f} MB output (base64)"
|
178
|
+
)
|
179
|
+
|
180
|
+
return result_bytes
|
181
|
+
|
182
|
+
except Exception as e:
|
183
|
+
logger.error(f"Cryptography encryption failed: {e}")
|
184
|
+
raise EncryptionError(f"Cryptography encryption failed: {str(e)}")
|
185
|
+
|
186
|
+
|
187
|
+
def decrypt_with_cryptography(
|
188
|
+
encrypted_bytes: bytes, chunk_size: Optional[int] = None
|
189
|
+
) -> Union[List[Row], Any]:
|
190
|
+
"""
|
191
|
+
Decrypt and deserialize data using the cryptography library with chunked processing.
|
192
|
+
|
193
|
+
Args:
|
194
|
+
encrypted_bytes: Encrypted data (base64-encoded string as bytes)
|
195
|
+
chunk_size: Size of chunks for processing (default 4MB)
|
196
|
+
|
197
|
+
Returns:
|
198
|
+
Deserialized data
|
199
|
+
|
200
|
+
Raises:
|
201
|
+
EncryptionError: If decryption or deserialization fails
|
202
|
+
"""
|
203
|
+
try:
|
204
|
+
# Set default chunk size
|
205
|
+
if chunk_size is None:
|
206
|
+
chunk_size = 4 * 1024 * 1024 # 4MB chunks
|
207
|
+
|
208
|
+
# Decode from base64
|
209
|
+
encrypted_str = encrypted_bytes.decode("utf-8")
|
210
|
+
binary_data = base64.b64decode(encrypted_str)
|
211
|
+
|
212
|
+
# Extract components in SQLAlchemy-Utils format: [nonce/iv][tag][ciphertext]
|
213
|
+
if len(binary_data) < 28: # 12 (nonce) + 16 (tag)
|
214
|
+
raise ValueError("Encrypted data too short")
|
215
|
+
|
216
|
+
nonce = binary_data[:12] # First 12 bytes: nonce/IV
|
217
|
+
tag = binary_data[12:28] # Next 16 bytes: tag
|
218
|
+
ciphertext = binary_data[28:] # Remaining bytes: ciphertext
|
219
|
+
|
220
|
+
encrypted_size_mb = len(encrypted_bytes) / (1024 * 1024)
|
221
|
+
chunk_size_mb = chunk_size / (1024 * 1024)
|
222
|
+
estimated_chunks = len(ciphertext) // chunk_size + (
|
223
|
+
1 if len(ciphertext) % chunk_size else 0
|
224
|
+
)
|
225
|
+
|
226
|
+
logger.info(
|
227
|
+
f"Cryptography: Decrypting {encrypted_size_mb:.1f} MB "
|
228
|
+
f"using {chunk_size_mb:.0f}MB chunks (~{estimated_chunks} chunks)"
|
229
|
+
)
|
230
|
+
|
231
|
+
# Use SQLAlchemy-Utils compatible key
|
232
|
+
key = _get_sqlalchemy_compatible_key()
|
233
|
+
cipher = Cipher(
|
234
|
+
algorithms.AES(key), modes.GCM(nonce, tag), backend=default_backend()
|
235
|
+
)
|
236
|
+
decryptor = cipher.decryptor()
|
237
|
+
|
238
|
+
# Process in chunks for memory efficiency
|
239
|
+
plaintext_chunks = []
|
240
|
+
for i in range(0, len(ciphertext), chunk_size):
|
241
|
+
chunk = ciphertext[i : i + chunk_size]
|
242
|
+
plaintext_chunks.append(decryptor.update(chunk))
|
243
|
+
|
244
|
+
# Finalize
|
245
|
+
decryptor.finalize()
|
246
|
+
|
247
|
+
# Combine and deserialize
|
248
|
+
plaintext = b"".join(plaintext_chunks)
|
249
|
+
decrypted_json = plaintext.decode("utf-8")
|
250
|
+
data = json.loads(decrypted_json, object_hook=_custom_decoder)
|
251
|
+
|
252
|
+
record_count = len(data) if isinstance(data, list) else "N/A"
|
253
|
+
logger.info(f"Cryptography: Successfully decrypted {record_count} records")
|
254
|
+
|
255
|
+
return data
|
256
|
+
|
257
|
+
except Exception as e:
|
258
|
+
logger.error(f"Cryptography decryption failed: {e}")
|
259
|
+
raise EncryptionError(f"Cryptography decryption failed: {str(e)}")
|
260
|
+
|
261
|
+
|
262
|
+
def _get_sqlalchemy_compatible_key() -> bytes:
|
263
|
+
"""Get 32-byte encryption key compatible with SQLAlchemy-Utils AesGcmEngine."""
|
264
|
+
app_key = CONFIG.security.app_encryption_key.encode(CONFIG.security.encoding)
|
265
|
+
# SQLAlchemy-Utils always uses SHA256 hash of the key
|
266
|
+
return hashlib.sha256(app_key).digest()
|
267
|
+
|
268
|
+
|
269
|
+
# Public API - Use cryptography by default for new operations
|
270
|
+
encrypt_data = encrypt_with_cryptography
|
271
|
+
decrypt_data = decrypt_with_cryptography
|
File without changes
|