ethyca-fides 2.63.1b3__py2.py3-none-any.whl → 2.63.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1rc0.dist-info}/METADATA +1 -1
  2. {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1rc0.dist-info}/RECORD +139 -141
  3. fides/_version.py +3 -3
  4. fides/api/alembic/migrations/versions/29e56fa1fdb3_add_monitor_tasks.py +147 -0
  5. fides/api/api/v1/endpoints/privacy_request_endpoints.py +4 -4
  6. fides/api/db/base.py +5 -3
  7. fides/api/main.py +0 -1
  8. fides/api/models/attachment.py +23 -36
  9. fides/api/models/connectionconfig.py +1 -1
  10. fides/api/models/detection_discovery/__init__.py +35 -0
  11. fides/api/models/detection_discovery/monitor_task.py +161 -0
  12. fides/api/models/field_types/__init__.py +5 -0
  13. fides/api/models/field_types/encrypted_large_data.py +151 -0
  14. fides/api/models/privacy_preference.py +1 -1
  15. fides/api/models/privacy_request/execution_log.py +3 -31
  16. fides/api/models/privacy_request/privacy_request.py +16 -3
  17. fides/api/models/privacy_request/request_task.py +36 -25
  18. fides/api/models/worker_task.py +96 -0
  19. fides/api/schemas/external_storage.py +22 -0
  20. fides/api/schemas/privacy_request.py +1 -12
  21. fides/api/service/connectors/base_erasure_email_connector.py +1 -1
  22. fides/api/service/connectors/consent_email_connector.py +2 -1
  23. fides/api/service/connectors/dynamic_erasure_email_connector.py +2 -1
  24. fides/api/service/connectors/erasure_email_connector.py +1 -1
  25. fides/api/service/external_data_storage.py +371 -0
  26. fides/api/service/privacy_request/dsr_package/dsr_report_builder.py +46 -264
  27. fides/api/service/privacy_request/dsr_package/templates/collection_index.html +9 -34
  28. fides/api/service/privacy_request/dsr_package/templates/item.html +37 -0
  29. fides/api/service/privacy_request/dsr_package/templates/main.css +2 -45
  30. fides/api/service/privacy_request/dsr_package/templates/welcome.html +8 -12
  31. fides/api/service/privacy_request/request_runner_service.py +139 -258
  32. fides/api/service/privacy_request/request_service.py +1 -1
  33. fides/api/service/storage/gcs.py +3 -15
  34. fides/api/service/storage/s3.py +14 -28
  35. fides/api/service/storage/util.py +7 -45
  36. fides/api/task/create_request_tasks.py +1 -1
  37. fides/api/task/execute_request_tasks.py +9 -8
  38. fides/api/task/graph_task.py +22 -10
  39. fides/api/tasks/storage.py +91 -85
  40. fides/api/util/cache.py +1 -77
  41. fides/api/util/consent_util.py +1 -1
  42. fides/api/util/data_size.py +102 -0
  43. fides/api/util/encryption/aes_gcm_encryption_util.py +271 -0
  44. fides/config/redis_settings.py +8 -99
  45. fides/service/messaging/aws_ses_service.py +1 -5
  46. fides/service/privacy_request/privacy_request_service.py +1 -1
  47. fides/ui-build/static/admin/404.html +1 -1
  48. fides/ui-build/static/admin/add-systems/manual.html +1 -1
  49. fides/ui-build/static/admin/add-systems/multiple.html +1 -1
  50. fides/ui-build/static/admin/add-systems.html +1 -1
  51. fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
  52. fides/ui-build/static/admin/consent/configure.html +1 -1
  53. fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
  54. fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
  55. fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
  56. fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
  57. fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
  58. fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
  59. fides/ui-build/static/admin/consent/properties.html +1 -1
  60. fides/ui-build/static/admin/consent/reporting.html +1 -1
  61. fides/ui-build/static/admin/consent.html +1 -1
  62. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
  63. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
  64. fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
  65. fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
  66. fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
  67. fides/ui-build/static/admin/data-catalog.html +1 -1
  68. fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
  69. fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
  70. fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
  71. fides/ui-build/static/admin/data-discovery/activity.html +1 -1
  72. fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
  73. fides/ui-build/static/admin/data-discovery/detection.html +1 -1
  74. fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
  75. fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
  76. fides/ui-build/static/admin/datamap.html +1 -1
  77. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
  78. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
  79. fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
  80. fides/ui-build/static/admin/dataset/new.html +1 -1
  81. fides/ui-build/static/admin/dataset.html +1 -1
  82. fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
  83. fides/ui-build/static/admin/datastore-connection/new.html +1 -1
  84. fides/ui-build/static/admin/datastore-connection.html +1 -1
  85. fides/ui-build/static/admin/index.html +1 -1
  86. fides/ui-build/static/admin/integrations/[id].html +1 -1
  87. fides/ui-build/static/admin/integrations.html +1 -1
  88. fides/ui-build/static/admin/lib/fides-ext-gpp.js +1 -1
  89. fides/ui-build/static/admin/lib/fides-headless.js +1 -1
  90. fides/ui-build/static/admin/lib/fides-preview.js +1 -1
  91. fides/ui-build/static/admin/lib/fides-tcf.js +2 -2
  92. fides/ui-build/static/admin/lib/fides.js +2 -2
  93. fides/ui-build/static/admin/login/[provider].html +1 -1
  94. fides/ui-build/static/admin/login.html +1 -1
  95. fides/ui-build/static/admin/messaging/[id].html +1 -1
  96. fides/ui-build/static/admin/messaging/add-template.html +1 -1
  97. fides/ui-build/static/admin/messaging.html +1 -1
  98. fides/ui-build/static/admin/poc/ant-components.html +1 -1
  99. fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
  100. fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
  101. fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
  102. fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
  103. fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
  104. fides/ui-build/static/admin/poc/forms.html +1 -1
  105. fides/ui-build/static/admin/poc/table-migration.html +1 -1
  106. fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
  107. fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
  108. fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
  109. fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
  110. fides/ui-build/static/admin/privacy-requests.html +1 -1
  111. fides/ui-build/static/admin/properties/[id].html +1 -1
  112. fides/ui-build/static/admin/properties/add-property.html +1 -1
  113. fides/ui-build/static/admin/properties.html +1 -1
  114. fides/ui-build/static/admin/reporting/datamap.html +1 -1
  115. fides/ui-build/static/admin/settings/about/alpha.html +1 -1
  116. fides/ui-build/static/admin/settings/about.html +1 -1
  117. fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
  118. fides/ui-build/static/admin/settings/consent.html +1 -1
  119. fides/ui-build/static/admin/settings/custom-fields.html +1 -1
  120. fides/ui-build/static/admin/settings/domain-records.html +1 -1
  121. fides/ui-build/static/admin/settings/domains.html +1 -1
  122. fides/ui-build/static/admin/settings/email-templates.html +1 -1
  123. fides/ui-build/static/admin/settings/locations.html +1 -1
  124. fides/ui-build/static/admin/settings/organization.html +1 -1
  125. fides/ui-build/static/admin/settings/regulations.html +1 -1
  126. fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
  127. fides/ui-build/static/admin/systems/configure/[id].html +1 -1
  128. fides/ui-build/static/admin/systems.html +1 -1
  129. fides/ui-build/static/admin/taxonomy.html +1 -1
  130. fides/ui-build/static/admin/user-management/new.html +1 -1
  131. fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
  132. fides/ui-build/static/admin/user-management.html +1 -1
  133. fides/api/alembic/migrations/versions/5efcdf18438e_add_manual_task_tables.py +0 -160
  134. fides/api/models/manual_tasks/__init__.py +0 -8
  135. fides/api/models/manual_tasks/manual_task.py +0 -110
  136. fides/api/models/manual_tasks/manual_task_log.py +0 -100
  137. fides/api/schemas/manual_tasks/__init__.py +0 -0
  138. fides/api/schemas/manual_tasks/manual_task_schemas.py +0 -79
  139. fides/api/schemas/manual_tasks/manual_task_status.py +0 -151
  140. fides/api/service/privacy_request/attachment_handling.py +0 -132
  141. fides/api/service/privacy_request/dsr_package/templates/attachments_index.html +0 -33
  142. fides/api/tasks/csv_utils.py +0 -170
  143. fides/api/tasks/encryption_utils.py +0 -42
  144. fides/service/manual_tasks/__init__.py +0 -0
  145. fides/service/manual_tasks/manual_task_service.py +0 -150
  146. {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1rc0.dist-info}/WHEEL +0 -0
  147. {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1rc0.dist-info}/entry_points.txt +0 -0
  148. {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1rc0.dist-info}/licenses/LICENSE +0 -0
  149. {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1rc0.dist-info}/top_level.txt +0 -0
  150. /fides/api/models/{detection_discovery.py → detection_discovery/core.py} +0 -0
  151. /fides/ui-build/static/admin/_next/static/{ycPcko8qnif6BlkQ6MN4D → PEElhfUdgE5bJjiyu5QCD}/_buildManifest.js +0 -0
  152. /fides/ui-build/static/admin/_next/static/{ycPcko8qnif6BlkQ6MN4D → PEElhfUdgE5bJjiyu5QCD}/_ssgManifest.js +0 -0
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from io import BytesIO
4
- from typing import IO, Any, Dict, Optional, Tuple, Union
3
+ from typing import IO, Any, Dict, Tuple, Union
5
4
 
6
5
  from boto3.s3.transfer import TransferConfig
7
6
  from botocore.exceptions import ClientError, ParamValidationError
@@ -35,7 +34,7 @@ def maybe_get_s3_client(
35
34
 
36
35
 
37
36
  def create_presigned_url_for_s3(
38
- s3_client: Any, bucket_name: str, file_key: str, ttl_seconds: Optional[int] = None
37
+ s3_client: Any, bucket_name: str, file_key: str
39
38
  ) -> AnyHttpUrlString:
40
39
  """
41
40
  Generates a presigned URL to share an S3 object
@@ -46,16 +45,10 @@ def create_presigned_url_for_s3(
46
45
  :return: Presigned URL as string.
47
46
  """
48
47
  params = {"Bucket": bucket_name, "Key": file_key}
49
- if ttl_seconds:
50
- if ttl_seconds > 604800:
51
- raise ValueError("TTL must be less than 7 days")
52
- expires_in = ttl_seconds
53
- else:
54
- expires_in = CONFIG.security.subject_request_download_link_ttl_seconds
55
48
  response = s3_client.generate_presigned_url(
56
49
  "get_object",
57
50
  Params=params,
58
- ExpiresIn=expires_in,
51
+ ExpiresIn=CONFIG.security.subject_request_download_link_ttl_seconds,
59
52
  )
60
53
 
61
54
  # The response contains the presigned URL
@@ -75,7 +68,7 @@ def generic_upload_to_s3( # pylint: disable=R0913
75
68
  file_key: str,
76
69
  auth_method: str,
77
70
  document: IO[bytes],
78
- size_threshold: int = LARGE_FILE_THRESHOLD, # 25 MB threshold
71
+ size_threshold: int = LARGE_FILE_THRESHOLD, # 5 MB threshold
79
72
  ) -> Tuple[int, AnyHttpUrlString]:
80
73
  """
81
74
  Uploads file like objects to S3.
@@ -137,8 +130,7 @@ def generic_retrieve_from_s3(
137
130
  file_key: str,
138
131
  auth_method: str,
139
132
  get_content: bool = False,
140
- ttl_seconds: Optional[int] = None,
141
- ) -> Tuple[int, Union[str, IO[bytes]]]:
133
+ ) -> Tuple[int, Union[str, bytes]]:
142
134
  """
143
135
  Retrieves a file from S3 and returns its size and either a presigned URL or the actual content.
144
136
 
@@ -156,23 +148,17 @@ def generic_retrieve_from_s3(
156
148
  s3_client = get_s3_client(auth_method, storage_secrets)
157
149
 
158
150
  try:
159
- # Get file size using head_object
160
- size_response = s3_client.head_object(Bucket=bucket_name, Key=file_key)
161
- # If the file is less than 25MB, we can get the content otherwise return the presigned URL
162
- if get_content and size_response["ContentLength"] <= LARGE_FILE_THRESHOLD:
163
- # Get the actual content using download_fileobj
164
- file_obj = BytesIO()
165
- s3_client.download_fileobj(
166
- Bucket=bucket_name, Key=file_key, Fileobj=file_obj
167
- )
168
- file_obj.seek(0) # Reset file pointer to beginning
169
- return int(size_response["ContentLength"]), file_obj
151
+ if get_content:
152
+ # Get the actual content
153
+ response = s3_client.get_object(Bucket=bucket_name, Key=file_key)
154
+ content = response["Body"].read()
155
+ return response["ContentLength"], content
170
156
 
171
157
  # Get presigned URL
172
- presigned_url = create_presigned_url_for_s3(
173
- s3_client, bucket_name, file_key, ttl_seconds
174
- )
175
- return int(size_response["ContentLength"]), str(presigned_url)
158
+ presigned_url = create_presigned_url_for_s3(s3_client, bucket_name, file_key)
159
+ # Get file size
160
+ response = s3_client.head_object(Bucket=bucket_name, Key=file_key)
161
+ return int(response["ContentLength"]), str(presigned_url)
176
162
  except ClientError as e:
177
163
  logger.error(f"Error retrieving file from S3: {e}")
178
164
  raise e
@@ -3,10 +3,6 @@ from enum import Enum as EnumType
3
3
 
4
4
  from loguru import logger
5
5
 
6
- # This is the max file size for downloading the content of an attachment.
7
- # This is an industry standard used by companies like Google and Microsoft.
8
- LARGE_FILE_THRESHOLD = 25 * 1024 * 1024 # 25 MB
9
-
10
6
 
11
7
  class AllowedFileType(EnumType):
12
8
  """
@@ -28,49 +24,15 @@ class AllowedFileType(EnumType):
28
24
 
29
25
  LOCAL_FIDES_UPLOAD_DIRECTORY = "fides_uploads"
30
26
 
27
+ # Default to 10MB if not specified in environment
28
+ LARGE_FILE_THRESHOLD = 10 * 1024 * 1024 # 10 MB threshold
31
29
 
32
- def get_local_filename(file_key: str) -> str:
33
- """Verifies that the local storage directory exists and returns the local filepath.
34
-
35
- This extra security checks are to prevent directory traversal attacks and "complete business and technical destruction".
36
- Thanks Claude.
37
-
38
- Args:
39
- file_key: The key/path for the file
40
-
41
- Returns:
42
- The full local filepath
43
-
44
- Raises:
45
- ValueError: If the file_key is invalid or would result in a path outside the upload directory
46
- """
47
- # Basic validation
48
- if not file_key:
49
- raise ValueError("File key cannot be empty")
50
-
51
- # Security checks before normalization
52
- if file_key.startswith("/"):
53
- raise ValueError("Invalid file key: cannot start with '/'")
54
-
55
- # Normalize the path to handle any path separators consistently
56
- # First normalize using os.path.normpath to handle any redundant separators
57
- normalized_key = os.path.normpath(file_key)
58
- # Then convert all separators to forward slashes for consistency
59
- normalized_key = normalized_key.replace("\\", "/")
60
30
 
61
- # Additional security: ensure the final path is within the upload directory
62
- final_path = os.path.join(LOCAL_FIDES_UPLOAD_DIRECTORY, normalized_key)
63
- if not os.path.abspath(final_path).startswith(
64
- os.path.abspath(LOCAL_FIDES_UPLOAD_DIRECTORY)
65
- ):
66
- raise ValueError(
67
- "Invalid file key: would result in path outside upload directory"
68
- )
69
-
70
- # Create all necessary directories
71
- os.makedirs(os.path.dirname(final_path), exist_ok=True)
72
-
73
- return final_path
31
+ def get_local_filename(file_key: str) -> str:
32
+ """Verifies that the local storage directory exists and returns the local filepath"""
33
+ if not os.path.exists(LOCAL_FIDES_UPLOAD_DIRECTORY):
34
+ os.makedirs(LOCAL_FIDES_UPLOAD_DIRECTORY)
35
+ return f"{LOCAL_FIDES_UPLOAD_DIRECTORY}/{file_key}"
74
36
 
75
37
 
76
38
  def get_allowed_file_type_or_raise(file_key: str) -> str:
@@ -29,8 +29,8 @@ from fides.api.models.privacy_request import (
29
29
  RequestTask,
30
30
  TraversalDetails,
31
31
  )
32
+ from fides.api.models.worker_task import ExecutionLogStatus
32
33
  from fides.api.schemas.policy import ActionType
33
- from fides.api.schemas.privacy_request import ExecutionLogStatus
34
34
  from fides.api.task.deprecated_graph_task import format_data_use_map_for_caching
35
35
  from fides.api.task.execute_request_tasks import log_task_queued, queue_request_task
36
36
  from fides.api.util.logger_context_utils import log_context
@@ -22,8 +22,9 @@ from fides.api.common_exceptions import (
22
22
  from fides.api.graph.config import TERMINATOR_ADDRESS, CollectionAddress
23
23
  from fides.api.models.connectionconfig import ConnectionConfig
24
24
  from fides.api.models.privacy_request import ExecutionLog, PrivacyRequest, RequestTask
25
+ from fides.api.models.worker_task import ExecutionLogStatus
25
26
  from fides.api.schemas.policy import ActionType, CurrentStep
26
- from fides.api.schemas.privacy_request import ExecutionLogStatus, PrivacyRequestStatus
27
+ from fides.api.schemas.privacy_request import PrivacyRequestStatus
27
28
  from fides.api.task.graph_task import (
28
29
  GraphTask,
29
30
  mark_current_and_downstream_nodes_as_failed,
@@ -145,7 +146,7 @@ def can_run_task_body(
145
146
  if request_task.is_terminator_task:
146
147
  logger.info(
147
148
  "Terminator {} task reached.",
148
- request_task.action_type.value,
149
+ request_task.action_type,
149
150
  )
150
151
  return False
151
152
  if request_task.is_root_task:
@@ -154,7 +155,7 @@ def can_run_task_body(
154
155
  if request_task.status != ExecutionLogStatus.pending:
155
156
  logger_method(request_task)(
156
157
  "Skipping {} task {} with status {}.",
157
- request_task.action_type.value,
158
+ request_task.action_type,
158
159
  request_task.collection_address,
159
160
  request_task.status.value,
160
161
  )
@@ -449,7 +450,7 @@ def log_task_complete(request_task: RequestTask) -> None:
449
450
  """Convenience method for logging task completion"""
450
451
  logger.info(
451
452
  "{} task {} is {}.",
452
- request_task.action_type.value.capitalize(),
453
+ request_task.action_type.capitalize(),
453
454
  request_task.collection_address,
454
455
  request_task.status.value,
455
456
  )
@@ -478,9 +479,9 @@ def _order_tasks_by_input_key(
478
479
 
479
480
 
480
481
  mapping = {
481
- ActionType.access: run_access_node,
482
- ActionType.erasure: run_erasure_node,
483
- ActionType.consent: run_consent_node,
482
+ ActionType.access.value: run_access_node,
483
+ ActionType.erasure.value: run_erasure_node,
484
+ ActionType.consent.value: run_consent_node,
484
485
  }
485
486
 
486
487
 
@@ -504,7 +505,7 @@ def log_task_queued(request_task: RequestTask, location: str) -> None:
504
505
  """Helper for logging that tasks are queued"""
505
506
  logger_method(request_task)(
506
507
  "Queuing {} task {} from {}.",
507
- request_task.action_type.value,
508
+ request_task.action_type,
508
509
  request_task.collection_address,
509
510
  location,
510
511
  )
@@ -39,8 +39,8 @@ from fides.api.models.datasetconfig import DatasetConfig
39
39
  from fides.api.models.policy import Policy, Rule
40
40
  from fides.api.models.privacy_preference import PrivacyPreferenceHistory
41
41
  from fides.api.models.privacy_request import ExecutionLog, PrivacyRequest, RequestTask
42
+ from fides.api.models.worker_task import ExecutionLogStatus
42
43
  from fides.api.schemas.policy import ActionType, CurrentStep
43
- from fides.api.schemas.privacy_request import ExecutionLogStatus
44
44
  from fides.api.service.connectors.base_connector import BaseConnector
45
45
  from fides.api.task.consolidate_query_matches import consolidate_query_matches
46
46
  from fides.api.task.filter_element_match import filter_element_match
@@ -503,12 +503,20 @@ class GraphTask(ABC): # pylint: disable=too-many-instance-attributes
503
503
  self.post_process_input_data(formatted_input_data)
504
504
  )
505
505
 
506
- # For erasures: cache results with non-matching array elements *replaced* with placeholder text
507
- placeholder_output: List[Row] = copy.deepcopy(output)
508
- for row in placeholder_output:
506
+ # For erasures: build placeholder version incrementally to avoid holding two full
507
+ # copies of the data in memory simultaneously.
508
+ placeholder_output: List[Row] = []
509
+ for original_row in output:
510
+ # Create a deep copy of the *single* row, transform it, then append to
511
+ # the placeholder list. Peak memory at any point is one extra row rather
512
+ # than an entire dataset.
513
+ row_copy = copy.deepcopy(original_row)
509
514
  filter_element_match(
510
- row, query_paths=post_processed_node_input_data, delete_elements=False
515
+ row_copy,
516
+ query_paths=post_processed_node_input_data,
517
+ delete_elements=False,
511
518
  )
519
+ placeholder_output.append(row_copy)
512
520
 
513
521
  # For DSR 3.0, save data to build masking requests directly
514
522
  # on the Request Task.
@@ -519,11 +527,14 @@ class GraphTask(ABC): # pylint: disable=too-many-instance-attributes
519
527
  # TODO Remove when we stop support for DSR 2.0
520
528
  # Save data to build masking requests for DSR 2.0 in Redis.
521
529
  # Results saved with matching array elements preserved
522
- self.resources.cache_results_with_placeholders(
523
- f"access_request__{self.key}", placeholder_output
524
- )
530
+ if not CONFIG.execution.use_dsr_3_0:
531
+ self.resources.cache_results_with_placeholders(
532
+ f"access_request__{self.key}", placeholder_output
533
+ )
525
534
 
526
- # For access request results, cache results with non-matching array elements *removed*
535
+ # For access request results, mutate rows in-place to remove non-matching
536
+ # array elements. We already iterated over `output` above, so reuse the same
537
+ # loop structure to keep cache locality.
527
538
  for row in output:
528
539
  logger.info(
529
540
  "Filtering row in {} for matching array elements.",
@@ -537,7 +548,8 @@ class GraphTask(ABC): # pylint: disable=too-many-instance-attributes
537
548
 
538
549
  # TODO Remove when we stop support for DSR 2.0
539
550
  # Saves intermediate access results for DSR 2.0 in Redis
540
- self.resources.cache_object(f"access_request__{self.key}", output)
551
+ if not CONFIG.execution.use_dsr_3_0:
552
+ self.resources.cache_object(f"access_request__{self.key}", output)
541
553
 
542
554
  # Return filtered rows with non-matched array data removed.
543
555
  return output
@@ -1,20 +1,23 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import json
4
+ import secrets
4
5
  import zipfile
5
6
  from io import BytesIO
6
- from typing import TYPE_CHECKING, Any, Optional
7
+ from typing import TYPE_CHECKING, Any, Dict, Optional, Union
7
8
 
9
+ import pandas as pd
8
10
  from botocore.exceptions import ClientError, ParamValidationError
9
11
  from fideslang.validation import AnyHttpUrlString
10
12
  from loguru import logger
11
13
 
12
14
  from fides.api.common_exceptions import StorageUploadError
15
+ from fides.api.cryptography.cryptographic_util import bytes_to_b64_str
13
16
  from fides.api.schemas.storage.storage import ResponseFormat, StorageSecrets
14
17
  from fides.api.service.privacy_request.dsr_package.dsr_report_builder import (
15
18
  DsrReportBuilder,
16
19
  )
17
- from fides.api.service.storage.gcs import get_gcs_blob
20
+ from fides.api.service.storage.gcs import get_gcs_client
18
21
  from fides.api.service.storage.s3 import (
19
22
  create_presigned_url_for_s3,
20
23
  generic_upload_to_s3,
@@ -23,9 +26,11 @@ from fides.api.service.storage.util import (
23
26
  LOCAL_FIDES_UPLOAD_DIRECTORY,
24
27
  get_local_filename,
25
28
  )
26
- from fides.api.tasks.csv_utils import write_csv_to_zip
27
- from fides.api.tasks.encryption_utils import encrypt_access_request_results
28
29
  from fides.api.util.aws_util import get_s3_client
30
+ from fides.api.util.cache import get_cache, get_encryption_cache_key
31
+ from fides.api.util.encryption.aes_gcm_encryption_scheme import (
32
+ encrypt_to_bytes_verify_secrets_length,
33
+ )
29
34
  from fides.api.util.storage_util import StorageJSONEncoder
30
35
  from fides.config import CONFIG
31
36
 
@@ -33,8 +38,33 @@ if TYPE_CHECKING:
33
38
  from fides.api.models.privacy_request import PrivacyRequest
34
39
 
35
40
 
41
+ def encrypt_access_request_results(data: Union[str, bytes], request_id: str) -> str:
42
+ """Encrypt data with encryption key if provided, otherwise return unencrypted data"""
43
+ cache = get_cache()
44
+ encryption_cache_key = get_encryption_cache_key(
45
+ privacy_request_id=request_id,
46
+ encryption_attr="key",
47
+ )
48
+ if isinstance(data, bytes):
49
+ data = data.decode(CONFIG.security.encoding)
50
+
51
+ encryption_key: str | None = cache.get(encryption_cache_key)
52
+ if not encryption_key:
53
+ return data
54
+
55
+ bytes_encryption_key: bytes = encryption_key.encode(
56
+ encoding=CONFIG.security.encoding
57
+ )
58
+ nonce: bytes = secrets.token_bytes(CONFIG.security.aes_gcm_nonce_length)
59
+ # b64encode the entire nonce and the encrypted message together
60
+ return bytes_to_b64_str(
61
+ nonce
62
+ + encrypt_to_bytes_verify_secrets_length(data, bytes_encryption_key, nonce)
63
+ )
64
+
65
+
36
66
  def write_to_in_memory_buffer(
37
- resp_format: str, data: dict[str, Any], privacy_request: PrivacyRequest
67
+ resp_format: str, data: Dict[str, Any], privacy_request: PrivacyRequest
38
68
  ) -> BytesIO:
39
69
  """Write JSON/CSV data to in-memory file-like object to be passed to S3 or GCS. Encrypt data if encryption key/nonce
40
70
  has been cached for the given privacy request id
@@ -43,62 +73,46 @@ def write_to_in_memory_buffer(
43
73
  :param data: Dict
44
74
  :param request_id: str, The privacy request id
45
75
  """
46
-
47
76
  logger.debug("Writing data to in-memory buffer")
48
- try:
49
- if resp_format == ResponseFormat.html.value:
50
- return DsrReportBuilder(
51
- privacy_request=privacy_request,
52
- dsr_data=data,
53
- ).generate()
54
-
55
- if resp_format == ResponseFormat.json.value:
56
- return convert_dict_to_encrypted_json(data, privacy_request.id)
57
-
58
- if resp_format == ResponseFormat.csv.value:
59
- zipped_csvs = BytesIO()
60
- with zipfile.ZipFile(zipped_csvs, "w") as f:
61
- write_csv_to_zip(f, data, privacy_request.id)
62
- zipped_csvs.seek(0)
63
- return zipped_csvs
64
- except Exception as e:
65
- logger.error(f"Error writing data to in-memory buffer: {str(e)}")
66
- raise e
67
-
68
- raise NotImplementedError(f"No handling for response format {resp_format}.")
69
-
70
-
71
- def convert_dict_to_encrypted_json(
72
- data: dict[str, Any], privacy_request_id: str
73
- ) -> BytesIO:
74
- """Convert data to JSON and encrypt it.
75
-
76
- Args:
77
- data: The data to convert and encrypt
78
- privacy_request_id: The ID of the privacy request for encryption
79
77
 
80
- Returns:
81
- BytesIO: A file-like object containing the encrypted JSON data
82
-
83
- Raises:
84
- Exception: If JSON conversion fails
85
- """
86
- try:
78
+ if resp_format == ResponseFormat.json.value:
87
79
  json_str = json.dumps(data, indent=2, default=StorageJSONEncoder().default)
88
80
  return BytesIO(
89
- encrypt_access_request_results(json_str, privacy_request_id).encode(
81
+ encrypt_access_request_results(json_str, privacy_request.id).encode(
90
82
  CONFIG.security.encoding
91
83
  )
92
84
  )
93
- except Exception as e:
94
- logger.error(f"Error converting data to JSON: {str(e)}")
95
- logger.error(f"Data that failed to convert: {data}")
96
- raise
85
+
86
+ if resp_format == ResponseFormat.csv.value:
87
+ zipped_csvs = BytesIO()
88
+ with zipfile.ZipFile(zipped_csvs, "w") as f:
89
+ for key in data:
90
+ df = pd.json_normalize(data[key])
91
+ buffer = BytesIO()
92
+ df.to_csv(buffer, index=False, encoding=CONFIG.security.encoding)
93
+ buffer.seek(0)
94
+ f.writestr(
95
+ f"{key}.csv",
96
+ encrypt_access_request_results(
97
+ buffer.getvalue(), privacy_request.id
98
+ ),
99
+ )
100
+
101
+ zipped_csvs.seek(0)
102
+ return zipped_csvs
103
+
104
+ if resp_format == ResponseFormat.html.value:
105
+ return DsrReportBuilder(
106
+ privacy_request=privacy_request,
107
+ dsr_data=data,
108
+ ).generate()
109
+
110
+ raise NotImplementedError(f"No handling for response format {resp_format}.")
97
111
 
98
112
 
99
113
  def upload_to_s3( # pylint: disable=R0913
100
- storage_secrets: dict[StorageSecrets, Any],
101
- data: dict,
114
+ storage_secrets: Dict[StorageSecrets, Any],
115
+ data: Dict,
102
116
  bucket_name: str,
103
117
  file_key: str,
104
118
  resp_format: str,
@@ -126,22 +140,18 @@ def upload_to_s3( # pylint: disable=R0913
126
140
  "storage", {}
127
141
  ).get("aws_s3_assume_role_arn"),
128
142
  )
129
- except (ClientError, ParamValidationError) as e:
130
- logger.error(f"Error getting s3 client: {str(e)}")
131
- raise StorageUploadError(f"Error getting s3 client: {str(e)}")
132
143
 
133
- # handles file chunking
134
- try:
135
- s3_client.upload_fileobj(
136
- Fileobj=write_to_in_memory_buffer(resp_format, data, privacy_request),
137
- Bucket=bucket_name,
138
- Key=file_key,
139
- )
140
- except ClientError as e:
141
- logger.error("Encountered error while uploading s3 object: {}", e)
142
- raise StorageUploadError(f"Error uploading to S3: {e}")
144
+ # handles file chunking
145
+ try:
146
+ s3_client.upload_fileobj(
147
+ Fileobj=write_to_in_memory_buffer(resp_format, data, privacy_request),
148
+ Bucket=bucket_name,
149
+ Key=file_key,
150
+ )
151
+ except Exception as e:
152
+ logger.error("Encountered error while uploading s3 object: {}", e)
153
+ raise e
143
154
 
144
- try:
145
155
  presigned_url: AnyHttpUrlString = create_presigned_url_for_s3(
146
156
  s3_client, bucket_name, file_key
147
157
  )
@@ -152,11 +162,13 @@ def upload_to_s3( # pylint: disable=R0913
152
162
  "Encountered error while uploading and generating link for s3 object: {}", e
153
163
  )
154
164
  raise StorageUploadError(f"Error uploading to S3: {e}")
165
+ except ParamValidationError as e:
166
+ raise StorageUploadError(f"The parameters you provided are incorrect: {e}")
155
167
 
156
168
 
157
169
  def upload_to_gcs(
158
- storage_secrets: dict,
159
- data: dict,
170
+ storage_secrets: Dict,
171
+ data: Dict,
160
172
  bucket_name: str,
161
173
  file_key: str,
162
174
  resp_format: str,
@@ -165,30 +177,24 @@ def upload_to_gcs(
165
177
  ) -> str:
166
178
  """Uploads access request data to a Google Cloud Storage bucket"""
167
179
  logger.info("Starting Google Cloud Storage upload of {}", file_key)
168
- content_type = {
169
- ResponseFormat.json.value: "application/json",
170
- ResponseFormat.csv.value: "application/zip",
171
- ResponseFormat.html.value: "application/zip",
172
- }
173
-
174
- blob = get_gcs_blob(auth_method, storage_secrets, bucket_name, file_key)
175
- in_memory_file = write_to_in_memory_buffer(resp_format, data, privacy_request)
176
180
 
177
181
  try:
182
+ storage_client = get_gcs_client(auth_method, storage_secrets)
183
+ bucket = storage_client.bucket(bucket_name)
184
+
185
+ blob = bucket.blob(file_key)
186
+ in_memory_file = write_to_in_memory_buffer(resp_format, data, privacy_request)
187
+ content_type = {
188
+ ResponseFormat.json.value: "application/json",
189
+ ResponseFormat.csv.value: "application/zip",
190
+ ResponseFormat.html.value: "application/zip",
191
+ }
178
192
  blob.upload_from_string(
179
193
  in_memory_file.getvalue(), content_type=content_type[resp_format]
180
194
  )
181
- except Exception as e:
182
- logger.error("Error uploading to GCS: {}", str(e))
183
- logger.error(
184
- "Encountered error while uploading and generating link for Google Cloud Storage object: {}",
185
- e,
186
- )
187
- raise
188
195
 
189
- logger.info("File {} uploaded to {}", file_key, blob.public_url)
196
+ logger.info("File {} uploaded to {}", file_key, blob.public_url)
190
197
 
191
- try:
192
198
  presigned_url = blob.generate_signed_url(
193
199
  version="v4",
194
200
  expiration=CONFIG.security.subject_request_download_link_ttl_seconds,
@@ -204,7 +210,7 @@ def upload_to_gcs(
204
210
 
205
211
 
206
212
  def upload_to_local(
207
- data: dict,
213
+ data: Dict,
208
214
  file_key: str,
209
215
  privacy_request: PrivacyRequest,
210
216
  resp_format: str = ResponseFormat.json.value,
fides/api/util/cache.py CHANGED
@@ -1,5 +1,4 @@
1
1
  import json
2
- import os
3
2
  from typing import Any, Dict, List, Optional, Union
4
3
  from urllib.parse import unquote_to_bytes
5
4
 
@@ -28,7 +27,6 @@ from fides.config import CONFIG
28
27
  RedisValue = Union[bytes, float, int, str]
29
28
 
30
29
  _connection = None
31
- _read_only_connection = None
32
30
 
33
31
 
34
32
  class FidesopsRedis(Redis):
@@ -159,36 +157,6 @@ class FidesopsRedis(Redis):
159
157
  return list_length
160
158
 
161
159
 
162
- # FIXME: Ideally we don't want our code to be aware of the way tests are run,
163
- # e.g that we run them in parallel with pytest-xdist. We need to find a way
164
- # to change the pytest_configure_node hook to set the correct environment variable
165
- # like we do for the readonly database. It wasn't working so we're using this workaround for now.
166
- def _determine_redis_db_index(
167
- read_only: Optional[bool] = False,
168
- ) -> int: # pragma: no cover
169
- """Return the Redis DB index that should be used for the current process.
170
-
171
- Behavior:
172
- 1. Test mode:
173
- - If running under xdist, map `gwN` → DB `N + 1` (reserve DB 0).
174
- - If *not* running under xdist, always use DB 1.
175
-
176
- 2. Non-test mode: return the value already present in `CONFIG.redis.db_index`
177
- """
178
-
179
- # 1. Test mode logic
180
- if CONFIG.test_mode:
181
- worker_id = os.getenv("PYTEST_XDIST_WORKER")
182
- if worker_id and worker_id.startswith("gw"):
183
- suffix = worker_id[2:]
184
- if suffix.isdigit():
185
- return int(suffix) + 1 # gw0 -> 1, gw1 -> 2, etc.
186
- return CONFIG.redis.test_db_index
187
-
188
- # 2. Non-test mode
189
- return CONFIG.redis.read_only_db_index if read_only else CONFIG.redis.db_index
190
-
191
-
192
160
  def get_cache(should_log: Optional[bool] = False) -> FidesopsRedis:
193
161
  """Return a singleton connection to our Redis cache"""
194
162
 
@@ -205,7 +173,7 @@ def get_cache(should_log: Optional[bool] = False) -> FidesopsRedis:
205
173
  decode_responses=CONFIG.redis.decode_responses,
206
174
  host=CONFIG.redis.host,
207
175
  port=CONFIG.redis.port,
208
- db=_determine_redis_db_index(),
176
+ db=CONFIG.redis.db_index,
209
177
  username=CONFIG.redis.user,
210
178
  password=CONFIG.redis.password,
211
179
  ssl=CONFIG.redis.ssl,
@@ -234,50 +202,6 @@ def get_cache(should_log: Optional[bool] = False) -> FidesopsRedis:
234
202
  return _connection
235
203
 
236
204
 
237
- def get_read_only_cache() -> FidesopsRedis:
238
- """
239
- Return a singleton connection to the read-only Redis cache.
240
- If read-only is not enabled, return the regular cache.
241
- """
242
- # If read-only is not enabled, return the regular cache
243
- if not CONFIG.redis.read_only_enabled:
244
- logger.debug(
245
- "Read-only Redis is not enabled. Returning writeable cache connection instead."
246
- )
247
- return get_cache()
248
-
249
- global _read_only_connection # pylint: disable=W0603
250
- if _read_only_connection is None:
251
- logger.debug("Creating new read-only Redis connection...")
252
- _read_only_connection = FidesopsRedis( # type: ignore[call-overload]
253
- charset=CONFIG.redis.charset,
254
- decode_responses=CONFIG.redis.decode_responses,
255
- host=CONFIG.redis.read_only_host,
256
- port=CONFIG.redis.read_only_port,
257
- db=_determine_redis_db_index(read_only=True),
258
- username=CONFIG.redis.read_only_user,
259
- password=CONFIG.redis.read_only_password,
260
- ssl=CONFIG.redis.read_only_ssl,
261
- ssl_ca_certs=CONFIG.redis.read_only_ssl_ca_certs,
262
- ssl_cert_reqs=CONFIG.redis.read_only_ssl_cert_reqs,
263
- )
264
- logger.debug("New read-only Redis connection created.")
265
-
266
- try:
267
- connected = _read_only_connection.ping()
268
- logger.debug("Read-only Redis connection succeeded.")
269
- except ConnectionErrorFromRedis:
270
- connected = False
271
-
272
- if not connected:
273
- logger.error(
274
- "Unable to establish read-only Redis connection. Returning writeable cache connection instead."
275
- )
276
- return get_cache()
277
-
278
- return _read_only_connection
279
-
280
-
281
205
  def get_identity_cache_key(privacy_request_id: str, identity_attribute: str) -> str:
282
206
  """Return the key at which to save this PrivacyRequest's identity for the passed in attribute"""
283
207
  # TODO: Remove this prefix
@@ -18,7 +18,7 @@ from fides.api.models.privacy_request import (
18
18
  )
19
19
  from fides.api.models.sql_models import System # type: ignore[attr-defined]
20
20
  from fides.api.models.tcf_purpose_overrides import TCFPurposeOverride
21
- from fides.api.schemas.privacy_request import ExecutionLogStatus
21
+ from fides.api.models.worker_task import ExecutionLogStatus
22
22
  from fides.api.schemas.redis_cache import Identity
23
23
 
24
24