ethyca-fides 2.63.0rc3__py2.py3-none-any.whl → 2.63.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/METADATA +1 -1
  2. {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/RECORD +129 -110
  3. fides/_version.py +3 -3
  4. fides/api/alembic/migrations/versions/29e56fa1fdb3_add_monitor_tasks.py +147 -0
  5. fides/api/alembic/migrations/versions/5efcdf18438e_add_manual_task_tables.py +160 -0
  6. fides/api/api/v1/endpoints/privacy_request_endpoints.py +4 -4
  7. fides/api/db/base.py +7 -1
  8. fides/api/models/connectionconfig.py +1 -1
  9. fides/api/models/detection_discovery/__init__.py +35 -0
  10. fides/api/models/detection_discovery/monitor_task.py +162 -0
  11. fides/api/models/field_types/__init__.py +5 -0
  12. fides/api/models/field_types/encrypted_large_data.py +151 -0
  13. fides/api/models/manual_tasks/__init__.py +8 -0
  14. fides/api/models/manual_tasks/manual_task.py +110 -0
  15. fides/api/models/manual_tasks/manual_task_log.py +100 -0
  16. fides/api/models/privacy_preference.py +1 -1
  17. fides/api/models/privacy_request/execution_log.py +3 -31
  18. fides/api/models/privacy_request/privacy_request.py +16 -3
  19. fides/api/models/privacy_request/request_task.py +36 -25
  20. fides/api/models/worker_task.py +96 -0
  21. fides/api/schemas/external_storage.py +22 -0
  22. fides/api/schemas/manual_tasks/__init__.py +0 -0
  23. fides/api/schemas/manual_tasks/manual_task_schemas.py +79 -0
  24. fides/api/schemas/manual_tasks/manual_task_status.py +151 -0
  25. fides/api/schemas/privacy_request.py +1 -12
  26. fides/api/service/connectors/base_erasure_email_connector.py +1 -1
  27. fides/api/service/connectors/consent_email_connector.py +2 -1
  28. fides/api/service/connectors/dynamic_erasure_email_connector.py +2 -1
  29. fides/api/service/connectors/erasure_email_connector.py +1 -1
  30. fides/api/service/external_data_storage.py +371 -0
  31. fides/api/service/privacy_request/request_runner_service.py +5 -5
  32. fides/api/service/privacy_request/request_service.py +1 -1
  33. fides/api/task/create_request_tasks.py +1 -1
  34. fides/api/task/execute_request_tasks.py +9 -8
  35. fides/api/task/graph_task.py +22 -10
  36. fides/api/util/consent_util.py +1 -1
  37. fides/api/util/data_size.py +102 -0
  38. fides/api/util/encryption/aes_gcm_encryption_util.py +271 -0
  39. fides/service/manual_tasks/__init__.py +0 -0
  40. fides/service/manual_tasks/manual_task_service.py +150 -0
  41. fides/service/privacy_request/privacy_request_service.py +1 -1
  42. fides/ui-build/static/admin/404.html +1 -1
  43. fides/ui-build/static/admin/add-systems/manual.html +1 -1
  44. fides/ui-build/static/admin/add-systems/multiple.html +1 -1
  45. fides/ui-build/static/admin/add-systems.html +1 -1
  46. fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
  47. fides/ui-build/static/admin/consent/configure.html +1 -1
  48. fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
  49. fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
  50. fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
  51. fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
  52. fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
  53. fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
  54. fides/ui-build/static/admin/consent/properties.html +1 -1
  55. fides/ui-build/static/admin/consent/reporting.html +1 -1
  56. fides/ui-build/static/admin/consent.html +1 -1
  57. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
  58. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
  59. fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
  60. fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
  61. fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
  62. fides/ui-build/static/admin/data-catalog.html +1 -1
  63. fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
  64. fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
  65. fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
  66. fides/ui-build/static/admin/data-discovery/activity.html +1 -1
  67. fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
  68. fides/ui-build/static/admin/data-discovery/detection.html +1 -1
  69. fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
  70. fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
  71. fides/ui-build/static/admin/datamap.html +1 -1
  72. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
  73. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
  74. fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
  75. fides/ui-build/static/admin/dataset/new.html +1 -1
  76. fides/ui-build/static/admin/dataset.html +1 -1
  77. fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
  78. fides/ui-build/static/admin/datastore-connection/new.html +1 -1
  79. fides/ui-build/static/admin/datastore-connection.html +1 -1
  80. fides/ui-build/static/admin/index.html +1 -1
  81. fides/ui-build/static/admin/integrations/[id].html +1 -1
  82. fides/ui-build/static/admin/integrations.html +1 -1
  83. fides/ui-build/static/admin/login/[provider].html +1 -1
  84. fides/ui-build/static/admin/login.html +1 -1
  85. fides/ui-build/static/admin/messaging/[id].html +1 -1
  86. fides/ui-build/static/admin/messaging/add-template.html +1 -1
  87. fides/ui-build/static/admin/messaging.html +1 -1
  88. fides/ui-build/static/admin/poc/ant-components.html +1 -1
  89. fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
  90. fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
  91. fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
  92. fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
  93. fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
  94. fides/ui-build/static/admin/poc/forms.html +1 -1
  95. fides/ui-build/static/admin/poc/table-migration.html +1 -1
  96. fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
  97. fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
  98. fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
  99. fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
  100. fides/ui-build/static/admin/privacy-requests.html +1 -1
  101. fides/ui-build/static/admin/properties/[id].html +1 -1
  102. fides/ui-build/static/admin/properties/add-property.html +1 -1
  103. fides/ui-build/static/admin/properties.html +1 -1
  104. fides/ui-build/static/admin/reporting/datamap.html +1 -1
  105. fides/ui-build/static/admin/settings/about/alpha.html +1 -1
  106. fides/ui-build/static/admin/settings/about.html +1 -1
  107. fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
  108. fides/ui-build/static/admin/settings/consent.html +1 -1
  109. fides/ui-build/static/admin/settings/custom-fields.html +1 -1
  110. fides/ui-build/static/admin/settings/domain-records.html +1 -1
  111. fides/ui-build/static/admin/settings/domains.html +1 -1
  112. fides/ui-build/static/admin/settings/email-templates.html +1 -1
  113. fides/ui-build/static/admin/settings/locations.html +1 -1
  114. fides/ui-build/static/admin/settings/organization.html +1 -1
  115. fides/ui-build/static/admin/settings/regulations.html +1 -1
  116. fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
  117. fides/ui-build/static/admin/systems/configure/[id].html +1 -1
  118. fides/ui-build/static/admin/systems.html +1 -1
  119. fides/ui-build/static/admin/taxonomy.html +1 -1
  120. fides/ui-build/static/admin/user-management/new.html +1 -1
  121. fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
  122. fides/ui-build/static/admin/user-management.html +1 -1
  123. {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/WHEEL +0 -0
  124. {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/entry_points.txt +0 -0
  125. {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/licenses/LICENSE +0 -0
  126. {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/top_level.txt +0 -0
  127. /fides/api/models/{detection_discovery.py → detection_discovery/core.py} +0 -0
  128. /fides/ui-build/static/admin/_next/static/{XobHpfndIH7IpV30u2vGV → SZn_Fpr_qG1COMjkdloep}/_buildManifest.js +0 -0
  129. /fides/ui-build/static/admin/_next/static/{XobHpfndIH7IpV30u2vGV → SZn_Fpr_qG1COMjkdloep}/_ssgManifest.js +0 -0
@@ -262,14 +262,14 @@ def upload_access_results( # pylint: disable=R0912
262
262
  privacy_request.add_success_execution_log(
263
263
  session,
264
264
  connection_key=None,
265
- dataset_name="Access Package Upload",
265
+ dataset_name="Access package upload",
266
266
  collection_name=None,
267
- message="Access Package Upload successful for privacy request.",
267
+ message="Access package upload successful for privacy request.",
268
268
  action_type=ActionType.access,
269
269
  )
270
270
  logger.bind(
271
271
  time_taken=time.time() - start_time,
272
- ).info("Access Package Upload successful for privacy request.")
272
+ ).info("Access package upload successful for privacy request.")
273
273
  except common_exceptions.StorageUploadError as exc:
274
274
  logger.bind(
275
275
  policy_key=policy.key,
@@ -279,9 +279,9 @@ def upload_access_results( # pylint: disable=R0912
279
279
  privacy_request.add_error_execution_log(
280
280
  session,
281
281
  connection_key=None,
282
- dataset_name="Access Package Upload",
282
+ dataset_name="Access package upload",
283
283
  collection_name=None,
284
- message="Access Package Upload failed for privacy request.",
284
+ message="Access package upload failed for privacy request.",
285
285
  action_type=ActionType.access,
286
286
  )
287
287
  privacy_request.status = PrivacyRequestStatus.error
@@ -17,11 +17,11 @@ from fides.api.models.privacy_request import (
17
17
  PrivacyRequest,
18
18
  RequestTask,
19
19
  )
20
+ from fides.api.models.worker_task import ExecutionLogStatus
20
21
  from fides.api.schemas.drp_privacy_request import DrpPrivacyRequestCreate
21
22
  from fides.api.schemas.masking.masking_secrets import MaskingSecretCache
22
23
  from fides.api.schemas.policy import ActionType
23
24
  from fides.api.schemas.privacy_request import (
24
- ExecutionLogStatus,
25
25
  PrivacyRequestResponse,
26
26
  PrivacyRequestStatus,
27
27
  )
@@ -29,8 +29,8 @@ from fides.api.models.privacy_request import (
29
29
  RequestTask,
30
30
  TraversalDetails,
31
31
  )
32
+ from fides.api.models.worker_task import ExecutionLogStatus
32
33
  from fides.api.schemas.policy import ActionType
33
- from fides.api.schemas.privacy_request import ExecutionLogStatus
34
34
  from fides.api.task.deprecated_graph_task import format_data_use_map_for_caching
35
35
  from fides.api.task.execute_request_tasks import log_task_queued, queue_request_task
36
36
  from fides.api.util.logger_context_utils import log_context
@@ -22,8 +22,9 @@ from fides.api.common_exceptions import (
22
22
  from fides.api.graph.config import TERMINATOR_ADDRESS, CollectionAddress
23
23
  from fides.api.models.connectionconfig import ConnectionConfig
24
24
  from fides.api.models.privacy_request import ExecutionLog, PrivacyRequest, RequestTask
25
+ from fides.api.models.worker_task import ExecutionLogStatus
25
26
  from fides.api.schemas.policy import ActionType, CurrentStep
26
- from fides.api.schemas.privacy_request import ExecutionLogStatus, PrivacyRequestStatus
27
+ from fides.api.schemas.privacy_request import PrivacyRequestStatus
27
28
  from fides.api.task.graph_task import (
28
29
  GraphTask,
29
30
  mark_current_and_downstream_nodes_as_failed,
@@ -145,7 +146,7 @@ def can_run_task_body(
145
146
  if request_task.is_terminator_task:
146
147
  logger.info(
147
148
  "Terminator {} task reached.",
148
- request_task.action_type.value,
149
+ request_task.action_type,
149
150
  )
150
151
  return False
151
152
  if request_task.is_root_task:
@@ -154,7 +155,7 @@ def can_run_task_body(
154
155
  if request_task.status != ExecutionLogStatus.pending:
155
156
  logger_method(request_task)(
156
157
  "Skipping {} task {} with status {}.",
157
- request_task.action_type.value,
158
+ request_task.action_type,
158
159
  request_task.collection_address,
159
160
  request_task.status.value,
160
161
  )
@@ -449,7 +450,7 @@ def log_task_complete(request_task: RequestTask) -> None:
449
450
  """Convenience method for logging task completion"""
450
451
  logger.info(
451
452
  "{} task {} is {}.",
452
- request_task.action_type.value.capitalize(),
453
+ request_task.action_type.capitalize(),
453
454
  request_task.collection_address,
454
455
  request_task.status.value,
455
456
  )
@@ -478,9 +479,9 @@ def _order_tasks_by_input_key(
478
479
 
479
480
 
480
481
  mapping = {
481
- ActionType.access: run_access_node,
482
- ActionType.erasure: run_erasure_node,
483
- ActionType.consent: run_consent_node,
482
+ ActionType.access.value: run_access_node,
483
+ ActionType.erasure.value: run_erasure_node,
484
+ ActionType.consent.value: run_consent_node,
484
485
  }
485
486
 
486
487
 
@@ -504,7 +505,7 @@ def log_task_queued(request_task: RequestTask, location: str) -> None:
504
505
  """Helper for logging that tasks are queued"""
505
506
  logger_method(request_task)(
506
507
  "Queuing {} task {} from {}.",
507
- request_task.action_type.value,
508
+ request_task.action_type,
508
509
  request_task.collection_address,
509
510
  location,
510
511
  )
@@ -39,8 +39,8 @@ from fides.api.models.datasetconfig import DatasetConfig
39
39
  from fides.api.models.policy import Policy, Rule
40
40
  from fides.api.models.privacy_preference import PrivacyPreferenceHistory
41
41
  from fides.api.models.privacy_request import ExecutionLog, PrivacyRequest, RequestTask
42
+ from fides.api.models.worker_task import ExecutionLogStatus
42
43
  from fides.api.schemas.policy import ActionType, CurrentStep
43
- from fides.api.schemas.privacy_request import ExecutionLogStatus
44
44
  from fides.api.service.connectors.base_connector import BaseConnector
45
45
  from fides.api.task.consolidate_query_matches import consolidate_query_matches
46
46
  from fides.api.task.filter_element_match import filter_element_match
@@ -503,12 +503,20 @@ class GraphTask(ABC): # pylint: disable=too-many-instance-attributes
503
503
  self.post_process_input_data(formatted_input_data)
504
504
  )
505
505
 
506
- # For erasures: cache results with non-matching array elements *replaced* with placeholder text
507
- placeholder_output: List[Row] = copy.deepcopy(output)
508
- for row in placeholder_output:
506
+ # For erasures: build placeholder version incrementally to avoid holding two full
507
+ # copies of the data in memory simultaneously.
508
+ placeholder_output: List[Row] = []
509
+ for original_row in output:
510
+ # Create a deep copy of the *single* row, transform it, then append to
511
+ # the placeholder list. Peak memory at any point is one extra row rather
512
+ # than an entire dataset.
513
+ row_copy = copy.deepcopy(original_row)
509
514
  filter_element_match(
510
- row, query_paths=post_processed_node_input_data, delete_elements=False
515
+ row_copy,
516
+ query_paths=post_processed_node_input_data,
517
+ delete_elements=False,
511
518
  )
519
+ placeholder_output.append(row_copy)
512
520
 
513
521
  # For DSR 3.0, save data to build masking requests directly
514
522
  # on the Request Task.
@@ -519,11 +527,14 @@ class GraphTask(ABC): # pylint: disable=too-many-instance-attributes
519
527
  # TODO Remove when we stop support for DSR 2.0
520
528
  # Save data to build masking requests for DSR 2.0 in Redis.
521
529
  # Results saved with matching array elements preserved
522
- self.resources.cache_results_with_placeholders(
523
- f"access_request__{self.key}", placeholder_output
524
- )
530
+ if not CONFIG.execution.use_dsr_3_0:
531
+ self.resources.cache_results_with_placeholders(
532
+ f"access_request__{self.key}", placeholder_output
533
+ )
525
534
 
526
- # For access request results, cache results with non-matching array elements *removed*
535
+ # For access request results, mutate rows in-place to remove non-matching
536
+ # array elements. We already iterated over `output` above, so reuse the same
537
+ # loop structure to keep cache locality.
527
538
  for row in output:
528
539
  logger.info(
529
540
  "Filtering row in {} for matching array elements.",
@@ -537,7 +548,8 @@ class GraphTask(ABC): # pylint: disable=too-many-instance-attributes
537
548
 
538
549
  # TODO Remove when we stop support for DSR 2.0
539
550
  # Saves intermediate access results for DSR 2.0 in Redis
540
- self.resources.cache_object(f"access_request__{self.key}", output)
551
+ if not CONFIG.execution.use_dsr_3_0:
552
+ self.resources.cache_object(f"access_request__{self.key}", output)
541
553
 
542
554
  # Return filtered rows with non-matched array data removed.
543
555
  return output
@@ -18,7 +18,7 @@ from fides.api.models.privacy_request import (
18
18
  )
19
19
  from fides.api.models.sql_models import System # type: ignore[attr-defined]
20
20
  from fides.api.models.tcf_purpose_overrides import TCFPurposeOverride
21
- from fides.api.schemas.privacy_request import ExecutionLogStatus
21
+ from fides.api.models.worker_task import ExecutionLogStatus
22
22
  from fides.api.schemas.redis_cache import Identity
23
23
 
24
24
 
@@ -0,0 +1,102 @@
1
+ """
2
+ Helpers for estimating the size of large collections of access data.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ import sys
9
+ from typing import List, Optional
10
+
11
+ from loguru import logger
12
+
13
+ from fides.api.util.collection_util import Row
14
+ from fides.api.util.custom_json_encoder import CustomJSONEncoder
15
+
16
+ # 640MB threshold for external storage
17
+ # We only generate an estimated size for large datasets so we want to be conservative
18
+ # and fallback to external storage even if we haven't hit the 1GB max limit.
19
+ # We also want to pad for encryption and base64 encoding.
20
+ LARGE_DATA_THRESHOLD_BYTES = 640 * 1024 * 1024 # 640MB
21
+
22
+
23
+ def calculate_data_size(data: List[Row]) -> int: # noqa: D401 – utility function
24
+ """Return an approximate JSON-serialized size (in bytes) for a list of *Row*.
25
+
26
+ The implementation purposefully avoids serializing the entire payload when
27
+ *data* is large. For collections >1000 rows we sample a subset, measure the
28
+ encoded size, then extrapolate. This keeps memory usage bounded while still
29
+ giving us an order-of-magnitude estimate suitable for "should I stream this
30
+ out to S3?" decisions.
31
+ """
32
+
33
+ if not data:
34
+ return 0
35
+
36
+ try:
37
+ data_count = len(data)
38
+
39
+ # For very large datasets, estimate size from a sample to avoid memory issues
40
+ if data_count > 1000:
41
+ logger.debug(
42
+ f"Calculating size for large dataset ({data_count} rows) using sampling"
43
+ )
44
+
45
+ sample_size = min(500, max(100, data_count // 20)) # 5 % capped at 500
46
+
47
+ # stratified sampling – take items spaced across the set when possible
48
+ if data_count > sample_size * 3:
49
+ step = data_count // sample_size
50
+ sample_indices = list(range(0, data_count, step))[:sample_size]
51
+ sample = [data[i] for i in sample_indices]
52
+ else:
53
+ sample = data[:sample_size]
54
+
55
+ sample_json = json.dumps(
56
+ sample, cls=CustomJSONEncoder, separators=(",", ":")
57
+ )
58
+ sample_bytes = len(sample_json.encode("utf-8"))
59
+
60
+ avg_record_size = sample_bytes / sample_size
61
+ content_size = int(avg_record_size * data_count)
62
+
63
+ # overhead: 2 bytes for [] plus a comma between every record plus 1 % slack
64
+ structure_overhead = 2 + (data_count - 1) + int(content_size * 0.01)
65
+ return content_size + structure_overhead
66
+
67
+ # small datasets – just measure
68
+ json_str = json.dumps(data, cls=CustomJSONEncoder, separators=(",", ":"))
69
+ return len(json_str.encode("utf-8"))
70
+
71
+ except (TypeError, ValueError) as exc:
72
+ logger.warning(
73
+ f"Failed to calculate JSON size, falling back to sys.getsizeof: {exc}"
74
+ )
75
+ return sys.getsizeof(data)
76
+
77
+
78
+ def is_large_data(
79
+ data: List[Row], threshold_bytes: Optional[int] = None
80
+ ) -> bool: # noqa: D401
81
+ """Return *True* if *data* is likely to exceed *threshold_bytes* when serialized."""
82
+
83
+ if not data:
84
+ return False
85
+
86
+ threshold = (
87
+ threshold_bytes if threshold_bytes is not None else LARGE_DATA_THRESHOLD_BYTES
88
+ )
89
+ size = calculate_data_size(data)
90
+ if size > threshold:
91
+ logger.info(
92
+ f"Data size ({size:,} bytes) exceeds threshold ({threshold:,} bytes) – using external storage"
93
+ )
94
+ return True
95
+ return False
96
+
97
+
98
+ __all__ = [
99
+ "calculate_data_size",
100
+ "is_large_data",
101
+ "LARGE_DATA_THRESHOLD_BYTES",
102
+ ]
@@ -0,0 +1,271 @@
1
+ """
2
+ AES GCM encryption utilities with SQLAlchemy-Utils and cryptography library implementations.
3
+
4
+ This module provides simplified encrypt/decrypt functions using two approaches:
5
+ 1. SQLAlchemy-Utils AesGcmEngine (compatible with existing database encryption)
6
+ 2. Cryptography library with chunked processing (better performance, standard library)
7
+ """
8
+
9
+ import base64
10
+ import hashlib
11
+ import json
12
+ import os
13
+ from typing import Any, List, Optional, Union
14
+
15
+ from cryptography.hazmat.backends import default_backend
16
+ from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
17
+ from loguru import logger
18
+ from sqlalchemy_utils.types.encrypted.encrypted_type import AesGcmEngine
19
+
20
+ from fides.api.util.collection_util import Row
21
+ from fides.api.util.custom_json_encoder import CustomJSONEncoder, _custom_decoder
22
+ from fides.config import CONFIG
23
+
24
+
25
+ class EncryptionError(Exception):
26
+ """Raised when encryption/decryption operations fail"""
27
+
28
+
29
+ # SQLAlchemy-Utils Implementation (for compatibility with existing database encryption)
30
+ def encrypt_with_sqlalchemy_utils(data: List[Row]) -> bytes:
31
+ """
32
+ Serialize and encrypt data using CustomJSONEncoder and SQLAlchemy-Utils AesGcmEngine.
33
+
34
+ This approach is compatible with existing database encryption but has lower performance.
35
+
36
+ Args:
37
+ data: Raw data to serialize and encrypt
38
+
39
+ Returns:
40
+ Encrypted bytes
41
+
42
+ Raises:
43
+ EncryptionError: If serialization or encryption fails
44
+ """
45
+ try:
46
+ # Serialize using CustomJSONEncoder for consistent ObjectId handling
47
+ serialized_data = json.dumps(data, cls=CustomJSONEncoder, separators=(",", ":"))
48
+ data_bytes = serialized_data.encode("utf-8")
49
+
50
+ # Encrypt using SQLAlchemy-Utils AesGcmEngine
51
+ engine = AesGcmEngine()
52
+ key = CONFIG.security.app_encryption_key
53
+ engine._update_key(key) # pylint: disable=protected-access
54
+
55
+ # AesGcmEngine expects string input
56
+ data_str = data_bytes.decode("utf-8")
57
+ encrypted_data = engine.encrypt(data_str)
58
+ encrypted_bytes = encrypted_data.encode("utf-8")
59
+
60
+ logger.debug(
61
+ f"SQLAlchemy-Utils: Encrypted {len(data_bytes)} bytes to {len(encrypted_bytes)} bytes"
62
+ )
63
+ return encrypted_bytes
64
+
65
+ except Exception as e:
66
+ logger.error(f"SQLAlchemy-Utils encryption failed: {e}")
67
+ raise EncryptionError(f"SQLAlchemy-Utils encryption failed: {str(e)}")
68
+
69
+
70
+ def decrypt_with_sqlalchemy_utils(encrypted_bytes: bytes) -> List[Row]:
71
+ """
72
+ Decrypt and deserialize data using SQLAlchemy-Utils AesGcmEngine and _custom_decoder.
73
+
74
+ Args:
75
+ encrypted_bytes: Encrypted data bytes to decrypt
76
+
77
+ Returns:
78
+ Deserialized data
79
+
80
+ Raises:
81
+ EncryptionError: If decryption or deserialization fails
82
+ """
83
+ try:
84
+ # Decrypt using SQLAlchemy-Utils AesGcmEngine
85
+ engine = AesGcmEngine()
86
+ key = CONFIG.security.app_encryption_key
87
+ engine._update_key(key) # pylint: disable=protected-access
88
+
89
+ # AesGcmEngine expects string input
90
+ encrypted_str = encrypted_bytes.decode("utf-8")
91
+ decrypted_data = engine.decrypt(encrypted_str)
92
+
93
+ # Deserialize using _custom_decoder for consistent ObjectId handling
94
+ data = json.loads(decrypted_data, object_hook=_custom_decoder)
95
+
96
+ logger.debug(
97
+ f"SQLAlchemy-Utils: Decrypted {len(encrypted_bytes)} bytes to {len(data)} records"
98
+ )
99
+ return data
100
+
101
+ except Exception as e:
102
+ logger.error(f"SQLAlchemy-Utils decryption failed: {e}")
103
+ raise EncryptionError(f"SQLAlchemy-Utils decryption failed: {str(e)}")
104
+
105
+
106
+ # Cryptography Library Implementation (standard, chunked processing)
107
+ def encrypt_with_cryptography(
108
+ data: Union[List[Row], Any], chunk_size: Optional[int] = None
109
+ ) -> bytes:
110
+ """
111
+ Serialize and encrypt data using the standard cryptography library with chunked processing.
112
+
113
+ This provides fast performance and memory efficiency for large datasets.
114
+
115
+ Args:
116
+ data: Raw data to serialize and encrypt
117
+ chunk_size: Size of chunks for processing (default 4MB)
118
+
119
+ Returns:
120
+ Encrypted bytes (base64-encoded string as bytes)
121
+
122
+ Raises:
123
+ EncryptionError: If serialization or encryption fails
124
+ """
125
+ try:
126
+ # Set default chunk size
127
+ if chunk_size is None:
128
+ chunk_size = 4 * 1024 * 1024 # 4MB chunks
129
+
130
+ # Serialize using CustomJSONEncoder for consistent handling
131
+ serialized_data = json.dumps(data, cls=CustomJSONEncoder, separators=(",", ":"))
132
+ plaintext = serialized_data.encode("utf-8")
133
+
134
+ data_size_mb = len(plaintext) / (1024 * 1024)
135
+ chunk_size_mb = chunk_size / (1024 * 1024)
136
+ estimated_chunks = len(plaintext) // chunk_size + (
137
+ 1 if len(plaintext) % chunk_size else 0
138
+ )
139
+ record_count = len(data) if isinstance(data, list) else "N/A"
140
+
141
+ logger.info(
142
+ f"Cryptography: Encrypting {record_count} records ({data_size_mb:.1f} MB) "
143
+ f"using {chunk_size_mb:.0f}MB chunks (~{estimated_chunks} chunks)"
144
+ )
145
+
146
+ # Use SQLAlchemy-Utils compatible key (SHA256 hash of app key)
147
+ key = _get_sqlalchemy_compatible_key()
148
+ nonce = os.urandom(12) # 96-bit nonce for AES-GCM
149
+
150
+ # Create cipher
151
+ cipher = Cipher(
152
+ algorithms.AES(key), modes.GCM(nonce), backend=default_backend()
153
+ )
154
+ encryptor = cipher.encryptor()
155
+
156
+ # Process in chunks for memory efficiency
157
+ ciphertext_chunks = []
158
+ for i in range(0, len(plaintext), chunk_size):
159
+ chunk = plaintext[i : i + chunk_size]
160
+ ciphertext_chunks.append(encryptor.update(chunk))
161
+
162
+ # Finalize and get tag
163
+ encryptor.finalize()
164
+ tag = encryptor.tag
165
+
166
+ # Combine in same format as SQLAlchemy-Utils: [nonce/iv][tag][ciphertext]
167
+ ciphertext = b"".join(ciphertext_chunks)
168
+ binary_result = nonce + tag + ciphertext
169
+
170
+ # Base64 encode to match SQLAlchemy-Utils format
171
+ base64_result = base64.b64encode(binary_result).decode("utf-8")
172
+ result_bytes = base64_result.encode("utf-8")
173
+
174
+ encrypted_size_mb = len(result_bytes) / (1024 * 1024)
175
+ logger.info(
176
+ f"Cryptography: Encrypted successfully - "
177
+ f"{len(ciphertext_chunks)} chunks, {encrypted_size_mb:.1f} MB output (base64)"
178
+ )
179
+
180
+ return result_bytes
181
+
182
+ except Exception as e:
183
+ logger.error(f"Cryptography encryption failed: {e}")
184
+ raise EncryptionError(f"Cryptography encryption failed: {str(e)}")
185
+
186
+
187
+ def decrypt_with_cryptography(
188
+ encrypted_bytes: bytes, chunk_size: Optional[int] = None
189
+ ) -> Union[List[Row], Any]:
190
+ """
191
+ Decrypt and deserialize data using the cryptography library with chunked processing.
192
+
193
+ Args:
194
+ encrypted_bytes: Encrypted data (base64-encoded string as bytes)
195
+ chunk_size: Size of chunks for processing (default 4MB)
196
+
197
+ Returns:
198
+ Deserialized data
199
+
200
+ Raises:
201
+ EncryptionError: If decryption or deserialization fails
202
+ """
203
+ try:
204
+ # Set default chunk size
205
+ if chunk_size is None:
206
+ chunk_size = 4 * 1024 * 1024 # 4MB chunks
207
+
208
+ # Decode from base64
209
+ encrypted_str = encrypted_bytes.decode("utf-8")
210
+ binary_data = base64.b64decode(encrypted_str)
211
+
212
+ # Extract components in SQLAlchemy-Utils format: [nonce/iv][tag][ciphertext]
213
+ if len(binary_data) < 28: # 12 (nonce) + 16 (tag)
214
+ raise ValueError("Encrypted data too short")
215
+
216
+ nonce = binary_data[:12] # First 12 bytes: nonce/IV
217
+ tag = binary_data[12:28] # Next 16 bytes: tag
218
+ ciphertext = binary_data[28:] # Remaining bytes: ciphertext
219
+
220
+ encrypted_size_mb = len(encrypted_bytes) / (1024 * 1024)
221
+ chunk_size_mb = chunk_size / (1024 * 1024)
222
+ estimated_chunks = len(ciphertext) // chunk_size + (
223
+ 1 if len(ciphertext) % chunk_size else 0
224
+ )
225
+
226
+ logger.info(
227
+ f"Cryptography: Decrypting {encrypted_size_mb:.1f} MB "
228
+ f"using {chunk_size_mb:.0f}MB chunks (~{estimated_chunks} chunks)"
229
+ )
230
+
231
+ # Use SQLAlchemy-Utils compatible key
232
+ key = _get_sqlalchemy_compatible_key()
233
+ cipher = Cipher(
234
+ algorithms.AES(key), modes.GCM(nonce, tag), backend=default_backend()
235
+ )
236
+ decryptor = cipher.decryptor()
237
+
238
+ # Process in chunks for memory efficiency
239
+ plaintext_chunks = []
240
+ for i in range(0, len(ciphertext), chunk_size):
241
+ chunk = ciphertext[i : i + chunk_size]
242
+ plaintext_chunks.append(decryptor.update(chunk))
243
+
244
+ # Finalize
245
+ decryptor.finalize()
246
+
247
+ # Combine and deserialize
248
+ plaintext = b"".join(plaintext_chunks)
249
+ decrypted_json = plaintext.decode("utf-8")
250
+ data = json.loads(decrypted_json, object_hook=_custom_decoder)
251
+
252
+ record_count = len(data) if isinstance(data, list) else "N/A"
253
+ logger.info(f"Cryptography: Successfully decrypted {record_count} records")
254
+
255
+ return data
256
+
257
+ except Exception as e:
258
+ logger.error(f"Cryptography decryption failed: {e}")
259
+ raise EncryptionError(f"Cryptography decryption failed: {str(e)}")
260
+
261
+
262
+ def _get_sqlalchemy_compatible_key() -> bytes:
263
+ """Get 32-byte encryption key compatible with SQLAlchemy-Utils AesGcmEngine."""
264
+ app_key = CONFIG.security.app_encryption_key.encode(CONFIG.security.encoding)
265
+ # SQLAlchemy-Utils always uses SHA256 hash of the key
266
+ return hashlib.sha256(app_key).digest()
267
+
268
+
269
+ # Public API - Use cryptography by default for new operations
270
+ encrypt_data = encrypt_with_cryptography
271
+ decrypt_data = decrypt_with_cryptography
File without changes