ethyca-fides 2.63.0rc3__py2.py3-none-any.whl → 2.63.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/METADATA +1 -1
  2. {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/RECORD +129 -110
  3. fides/_version.py +3 -3
  4. fides/api/alembic/migrations/versions/29e56fa1fdb3_add_monitor_tasks.py +147 -0
  5. fides/api/alembic/migrations/versions/5efcdf18438e_add_manual_task_tables.py +160 -0
  6. fides/api/api/v1/endpoints/privacy_request_endpoints.py +4 -4
  7. fides/api/db/base.py +7 -1
  8. fides/api/models/connectionconfig.py +1 -1
  9. fides/api/models/detection_discovery/__init__.py +35 -0
  10. fides/api/models/detection_discovery/monitor_task.py +162 -0
  11. fides/api/models/field_types/__init__.py +5 -0
  12. fides/api/models/field_types/encrypted_large_data.py +151 -0
  13. fides/api/models/manual_tasks/__init__.py +8 -0
  14. fides/api/models/manual_tasks/manual_task.py +110 -0
  15. fides/api/models/manual_tasks/manual_task_log.py +100 -0
  16. fides/api/models/privacy_preference.py +1 -1
  17. fides/api/models/privacy_request/execution_log.py +3 -31
  18. fides/api/models/privacy_request/privacy_request.py +16 -3
  19. fides/api/models/privacy_request/request_task.py +36 -25
  20. fides/api/models/worker_task.py +96 -0
  21. fides/api/schemas/external_storage.py +22 -0
  22. fides/api/schemas/manual_tasks/__init__.py +0 -0
  23. fides/api/schemas/manual_tasks/manual_task_schemas.py +79 -0
  24. fides/api/schemas/manual_tasks/manual_task_status.py +151 -0
  25. fides/api/schemas/privacy_request.py +1 -12
  26. fides/api/service/connectors/base_erasure_email_connector.py +1 -1
  27. fides/api/service/connectors/consent_email_connector.py +2 -1
  28. fides/api/service/connectors/dynamic_erasure_email_connector.py +2 -1
  29. fides/api/service/connectors/erasure_email_connector.py +1 -1
  30. fides/api/service/external_data_storage.py +371 -0
  31. fides/api/service/privacy_request/request_runner_service.py +5 -5
  32. fides/api/service/privacy_request/request_service.py +1 -1
  33. fides/api/task/create_request_tasks.py +1 -1
  34. fides/api/task/execute_request_tasks.py +9 -8
  35. fides/api/task/graph_task.py +22 -10
  36. fides/api/util/consent_util.py +1 -1
  37. fides/api/util/data_size.py +102 -0
  38. fides/api/util/encryption/aes_gcm_encryption_util.py +271 -0
  39. fides/service/manual_tasks/__init__.py +0 -0
  40. fides/service/manual_tasks/manual_task_service.py +150 -0
  41. fides/service/privacy_request/privacy_request_service.py +1 -1
  42. fides/ui-build/static/admin/404.html +1 -1
  43. fides/ui-build/static/admin/add-systems/manual.html +1 -1
  44. fides/ui-build/static/admin/add-systems/multiple.html +1 -1
  45. fides/ui-build/static/admin/add-systems.html +1 -1
  46. fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
  47. fides/ui-build/static/admin/consent/configure.html +1 -1
  48. fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
  49. fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
  50. fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
  51. fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
  52. fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
  53. fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
  54. fides/ui-build/static/admin/consent/properties.html +1 -1
  55. fides/ui-build/static/admin/consent/reporting.html +1 -1
  56. fides/ui-build/static/admin/consent.html +1 -1
  57. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
  58. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
  59. fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
  60. fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
  61. fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
  62. fides/ui-build/static/admin/data-catalog.html +1 -1
  63. fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
  64. fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
  65. fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
  66. fides/ui-build/static/admin/data-discovery/activity.html +1 -1
  67. fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
  68. fides/ui-build/static/admin/data-discovery/detection.html +1 -1
  69. fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
  70. fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
  71. fides/ui-build/static/admin/datamap.html +1 -1
  72. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
  73. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
  74. fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
  75. fides/ui-build/static/admin/dataset/new.html +1 -1
  76. fides/ui-build/static/admin/dataset.html +1 -1
  77. fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
  78. fides/ui-build/static/admin/datastore-connection/new.html +1 -1
  79. fides/ui-build/static/admin/datastore-connection.html +1 -1
  80. fides/ui-build/static/admin/index.html +1 -1
  81. fides/ui-build/static/admin/integrations/[id].html +1 -1
  82. fides/ui-build/static/admin/integrations.html +1 -1
  83. fides/ui-build/static/admin/login/[provider].html +1 -1
  84. fides/ui-build/static/admin/login.html +1 -1
  85. fides/ui-build/static/admin/messaging/[id].html +1 -1
  86. fides/ui-build/static/admin/messaging/add-template.html +1 -1
  87. fides/ui-build/static/admin/messaging.html +1 -1
  88. fides/ui-build/static/admin/poc/ant-components.html +1 -1
  89. fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
  90. fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
  91. fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
  92. fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
  93. fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
  94. fides/ui-build/static/admin/poc/forms.html +1 -1
  95. fides/ui-build/static/admin/poc/table-migration.html +1 -1
  96. fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
  97. fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
  98. fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
  99. fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
  100. fides/ui-build/static/admin/privacy-requests.html +1 -1
  101. fides/ui-build/static/admin/properties/[id].html +1 -1
  102. fides/ui-build/static/admin/properties/add-property.html +1 -1
  103. fides/ui-build/static/admin/properties.html +1 -1
  104. fides/ui-build/static/admin/reporting/datamap.html +1 -1
  105. fides/ui-build/static/admin/settings/about/alpha.html +1 -1
  106. fides/ui-build/static/admin/settings/about.html +1 -1
  107. fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
  108. fides/ui-build/static/admin/settings/consent.html +1 -1
  109. fides/ui-build/static/admin/settings/custom-fields.html +1 -1
  110. fides/ui-build/static/admin/settings/domain-records.html +1 -1
  111. fides/ui-build/static/admin/settings/domains.html +1 -1
  112. fides/ui-build/static/admin/settings/email-templates.html +1 -1
  113. fides/ui-build/static/admin/settings/locations.html +1 -1
  114. fides/ui-build/static/admin/settings/organization.html +1 -1
  115. fides/ui-build/static/admin/settings/regulations.html +1 -1
  116. fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
  117. fides/ui-build/static/admin/systems/configure/[id].html +1 -1
  118. fides/ui-build/static/admin/systems.html +1 -1
  119. fides/ui-build/static/admin/taxonomy.html +1 -1
  120. fides/ui-build/static/admin/user-management/new.html +1 -1
  121. fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
  122. fides/ui-build/static/admin/user-management.html +1 -1
  123. {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/WHEEL +0 -0
  124. {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/entry_points.txt +0 -0
  125. {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/licenses/LICENSE +0 -0
  126. {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/top_level.txt +0 -0
  127. /fides/api/models/{detection_discovery.py → detection_discovery/core.py} +0 -0
  128. /fides/ui-build/static/admin/_next/static/{XobHpfndIH7IpV30u2vGV → SZn_Fpr_qG1COMjkdloep}/_buildManifest.js +0 -0
  129. /fides/ui-build/static/admin/_next/static/{XobHpfndIH7IpV30u2vGV → SZn_Fpr_qG1COMjkdloep}/_ssgManifest.js +0 -0
@@ -0,0 +1,79 @@
1
+ from datetime import datetime
2
+ from enum import Enum
3
+ from typing import Annotated, Any, Optional
4
+
5
+ from pydantic import ConfigDict, Field
6
+
7
+ from fides.api.schemas.base_class import FidesSchema
8
+
9
+
10
+ class ManualTaskType(str, Enum):
11
+ """Enum for manual task types."""
12
+
13
+ privacy_request = "privacy_request"
14
+ # Add more task types as needed
15
+
16
+
17
+ class ManualTaskParentEntityType(str, Enum):
18
+ """Enum for manual task parent entity types."""
19
+
20
+ connection_config = (
21
+ "connection_config" # used for access and erasure privacy requests
22
+ )
23
+ # Add more parent entity types as needed
24
+
25
+
26
+ class ManualTaskReferenceType(str, Enum):
27
+ """Enum for manual task reference types."""
28
+
29
+ privacy_request = "privacy_request"
30
+ connection_config = "connection_config"
31
+ manual_task_config = "manual_task_config"
32
+ assigned_user = "assigned_user" # Reference to the user assigned to the task
33
+ # Add more reference types as needed
34
+
35
+
36
+ class ManualTaskLogStatus(str, Enum):
37
+ """Enum for manual task log status."""
38
+
39
+ created = "created"
40
+ updated = "updated"
41
+ in_processing = "in_processing"
42
+ complete = "complete"
43
+ error = "error"
44
+ retrying = "retrying"
45
+ paused = "paused"
46
+ awaiting_input = "awaiting_input"
47
+
48
+
49
+ class ManualTaskLogCreate(FidesSchema):
50
+ """Schema for creating a manual task log entry."""
51
+
52
+ model_config = ConfigDict(extra="forbid")
53
+
54
+ task_id: Annotated[str, Field(..., description="ID of the task")]
55
+ status: Annotated[ManualTaskLogStatus, Field(..., description="Log status")]
56
+ message: Annotated[Optional[str], Field(None, description="Log message")]
57
+ details: Annotated[
58
+ Optional[dict[str, Any]], Field(None, description="Additional details")
59
+ ]
60
+ config_id: Annotated[Optional[str], Field(None, description="Configuration ID")]
61
+ instance_id: Annotated[Optional[str], Field(None, description="Instance ID")]
62
+
63
+
64
+ class ManualTaskLogResponse(FidesSchema):
65
+ """Schema for manual task log response."""
66
+
67
+ model_config = ConfigDict(extra="forbid")
68
+
69
+ id: Annotated[str, Field(..., description="Log ID")]
70
+ task_id: Annotated[str, Field(..., description="Task ID")]
71
+ status: Annotated[ManualTaskLogStatus, Field(..., description="Log status")]
72
+ message: Annotated[Optional[str], Field(None, description="Log message")]
73
+ details: Annotated[
74
+ Optional[dict[str, Any]], Field(None, description="Additional details")
75
+ ]
76
+ config_id: Annotated[Optional[str], Field(None, description="Configuration ID")]
77
+ instance_id: Annotated[Optional[str], Field(None, description="Instance ID")]
78
+ created_at: Annotated[datetime, Field(..., description="Creation timestamp")]
79
+ updated_at: Annotated[datetime, Field(..., description="Last update timestamp")]
@@ -0,0 +1,151 @@
1
+ from datetime import datetime, timezone
2
+ from enum import Enum as EnumType
3
+ from typing import Optional
4
+
5
+ from sqlalchemy.orm import Session
6
+
7
+
8
+ class StatusTransitionNotAllowed(Exception):
9
+ """Exception raised when a status transition is not allowed."""
10
+
11
+ def __init__(self, message: str):
12
+ self.message = message
13
+ super().__init__(self.message)
14
+
15
+
16
+ class StatusType(str, EnumType):
17
+ """Enum for manual task status."""
18
+
19
+ pending = "pending"
20
+ in_progress = "in_progress"
21
+ completed = "completed"
22
+ failed = "failed"
23
+
24
+ @classmethod
25
+ def get_valid_transitions(cls, current_status: "StatusType") -> list["StatusType"]:
26
+ """Get valid transitions from the current status.
27
+
28
+ Args:
29
+ current_status: The current status
30
+
31
+ Returns:
32
+ list[StatusType]: List of valid transitions
33
+ """
34
+ if current_status == cls.pending:
35
+ return [cls.in_progress, cls.failed]
36
+ if current_status == cls.in_progress:
37
+ return [cls.completed, cls.failed]
38
+ if current_status == cls.completed:
39
+ return []
40
+ if current_status == cls.failed:
41
+ return [cls.pending, cls.in_progress]
42
+ return []
43
+
44
+
45
+ class StatusTransitionMixin:
46
+ """Mixin for handling status transitions.
47
+
48
+ This mixin provides methods for managing status transitions and completion tracking.
49
+ It can be used by any model that needs status management.
50
+ """
51
+
52
+ # These should be overridden by the implementing class
53
+ status: StatusType
54
+ completed_at: Optional[datetime]
55
+ completed_by_id: Optional[str]
56
+
57
+ def _get_valid_transitions(self) -> list[StatusType]:
58
+ """Get valid transitions from the current status.
59
+
60
+ Returns:
61
+ list[StatusType]: List of valid transitions
62
+ """
63
+ return StatusType.get_valid_transitions(self.status)
64
+
65
+ def _validate_status_transition(self, new_status: StatusType) -> None:
66
+ """Validate that a status transition is allowed.
67
+
68
+ Args:
69
+ new_status: The new status to transition to
70
+
71
+ Raises:
72
+ StatusTransitionNotAllowed: If the transition is not allowed
73
+ """
74
+ # Don't allow transitions to the same status
75
+ if new_status == self.status:
76
+ raise StatusTransitionNotAllowed(
77
+ f"Invalid status transition: already in status {new_status}"
78
+ )
79
+
80
+ # Get valid transitions for current status
81
+ valid_transitions = self._get_valid_transitions()
82
+ if new_status not in valid_transitions:
83
+ raise StatusTransitionNotAllowed(
84
+ f"Invalid status transition from {self.status} to {new_status}. "
85
+ f"Valid transitions are: {valid_transitions}"
86
+ )
87
+
88
+ def update_status(
89
+ self, db: Session, new_status: StatusType, user_id: Optional[str] = None
90
+ ) -> None:
91
+ """Update the status with validation and completion handling.
92
+
93
+ Args:
94
+ db: Database session
95
+ new_status: New status to set
96
+ user_id: Optional user ID who is making the change
97
+ """
98
+ self._validate_status_transition(new_status)
99
+
100
+ if new_status == StatusType.completed:
101
+ self.completed_at = datetime.now(timezone.utc)
102
+ self.completed_by_id = user_id
103
+ elif new_status == StatusType.pending:
104
+ # Reset completion fields if going back to pending
105
+ self.completed_at = None
106
+ self.completed_by_id = None
107
+
108
+ self.status = new_status
109
+ db.add(self)
110
+ db.commit()
111
+
112
+ def mark_completed(self, db: Session, user_id: str) -> None:
113
+ """Mark as completed.
114
+
115
+ Args:
116
+ db: Database session
117
+ user_id: user ID who completed the task
118
+ """
119
+ self.update_status(db, StatusType.completed, user_id)
120
+
121
+ def mark_failed(self, db: Session) -> None:
122
+ """Mark as failed."""
123
+ self.update_status(db, StatusType.failed)
124
+
125
+ def start_progress(self, db: Session) -> None:
126
+ """Mark as in progress."""
127
+ self.update_status(db, StatusType.in_progress)
128
+
129
+ def reset_to_pending(self, db: Session) -> None:
130
+ """Reset to pending status."""
131
+ self.update_status(db, StatusType.pending)
132
+
133
+ @property
134
+ def is_completed(self) -> bool:
135
+ """Check if completed."""
136
+ return self.status == StatusType.completed
137
+
138
+ @property
139
+ def is_failed(self) -> bool:
140
+ """Check if failed."""
141
+ return self.status == StatusType.failed
142
+
143
+ @property
144
+ def is_in_progress(self) -> bool:
145
+ """Check if in progress."""
146
+ return self.status == StatusType.in_progress
147
+
148
+ @property
149
+ def is_pending(self) -> bool:
150
+ """Check if pending."""
151
+ return self.status == StatusType.pending
@@ -8,6 +8,7 @@ from pydantic import ConfigDict, Field, field_serializer, field_validator
8
8
  from fides.api.custom_types import SafeStr
9
9
  from fides.api.graph.config import CollectionAddress
10
10
  from fides.api.models.audit_log import AuditLogAction
11
+ from fides.api.models.worker_task import ExecutionLogStatus
11
12
  from fides.api.schemas.api import BulkResponse, BulkUpdateFailed
12
13
  from fides.api.schemas.base_class import FidesSchema
13
14
  from fides.api.schemas.policy import ActionType, CurrentStep
@@ -141,18 +142,6 @@ class FieldsAffectedResponse(FidesSchema):
141
142
  model_config = ConfigDict(from_attributes=True, use_enum_values=True)
142
143
 
143
144
 
144
- class ExecutionLogStatus(EnumType):
145
- """Enum for execution log statuses, reflecting where they are in their workflow"""
146
-
147
- in_processing = "in_processing"
148
- pending = "pending"
149
- complete = "complete"
150
- error = "error"
151
- awaiting_processing = "paused" # "paused" in the database to avoid a migration, but use "awaiting_processing" in the app
152
- retrying = "retrying"
153
- skipped = "skipped"
154
-
155
-
156
145
  class ExecutionLogStatusSerializeOverride(FidesSchema):
157
146
  """Override to serialize "paused" Execution Logs as awaiting_processing instead"""
158
147
 
@@ -5,6 +5,7 @@ from sqlalchemy.orm import Session
5
5
  from fides.api.models.connectionconfig import ConnectionConfig, ConnectionType
6
6
  from fides.api.models.policy import Rule
7
7
  from fides.api.models.privacy_request import ExecutionLog, PrivacyRequest
8
+ from fides.api.models.worker_task import ExecutionLogStatus
8
9
  from fides.api.schemas.connection_configuration.connection_secrets_email import (
9
10
  AdvancedSettings,
10
11
  BaseEmailSchema,
@@ -15,7 +16,6 @@ from fides.api.schemas.messaging.messaging import (
15
16
  MessagingActionType,
16
17
  )
17
18
  from fides.api.schemas.policy import ActionType
18
- from fides.api.schemas.privacy_request import ExecutionLogStatus
19
19
  from fides.api.schemas.redis_cache import Identity
20
20
  from fides.api.service.connectors.base_email_connector import (
21
21
  BaseEmailConnector,
@@ -16,6 +16,7 @@ from fides.api.models.privacy_notice import (
16
16
  )
17
17
  from fides.api.models.privacy_preference import PrivacyPreferenceHistory
18
18
  from fides.api.models.privacy_request import ExecutionLog, PrivacyRequest
19
+ from fides.api.models.worker_task import ExecutionLogStatus
19
20
  from fides.api.schemas.connection_configuration.connection_secrets_email import (
20
21
  AdvancedSettingsWithExtendedIdentityTypes,
21
22
  ExtendedEmailSchema,
@@ -29,7 +30,7 @@ from fides.api.schemas.messaging.messaging import (
29
30
  from fides.api.schemas.policy import ActionType
30
31
  from fides.api.schemas.privacy_notice import PrivacyNoticeHistorySchema
31
32
  from fides.api.schemas.privacy_preference import MinimalPrivacyPreferenceHistorySchema
32
- from fides.api.schemas.privacy_request import Consent, ExecutionLogStatus
33
+ from fides.api.schemas.privacy_request import Consent
33
34
  from fides.api.schemas.redis_cache import Identity
34
35
  from fides.api.service.connectors.base_email_connector import (
35
36
  BaseEmailConnector,
@@ -17,11 +17,12 @@ from fides.api.models.privacy_request import (
17
17
  RequestTask,
18
18
  TraversalDetails,
19
19
  )
20
+ from fides.api.models.worker_task import ExecutionLogStatus
20
21
  from fides.api.schemas.connection_configuration.connection_secrets_dynamic_erasure_email import (
21
22
  DynamicErasureEmailSchema,
22
23
  )
23
24
  from fides.api.schemas.policy import ActionType
24
- from fides.api.schemas.privacy_request import ExecutionLogStatus, PrivacyRequestStatus
25
+ from fides.api.schemas.privacy_request import PrivacyRequestStatus
25
26
  from fides.api.service.connectors.base_connector import BaseConnector
26
27
  from fides.api.service.connectors.base_erasure_email_connector import (
27
28
  BaseErasureEmailConnector,
@@ -10,9 +10,9 @@ from fides.api.models.connectionconfig import (
10
10
  ConnectionType,
11
11
  )
12
12
  from fides.api.models.privacy_request import ExecutionLog
13
+ from fides.api.models.worker_task import ExecutionLogStatus
13
14
  from fides.api.schemas.connection_configuration import EmailSchema
14
15
  from fides.api.schemas.policy import ActionType
15
- from fides.api.schemas.privacy_request import ExecutionLogStatus
16
16
  from fides.api.service.connectors.base_erasure_email_connector import (
17
17
  BaseErasureEmailConnector,
18
18
  filter_user_identities_for_connector,
@@ -0,0 +1,371 @@
1
+ """
2
+ Service for handling external storage of large encrypted data.
3
+
4
+ This service provides a generic interface for storing large data that would
5
+ otherwise exceed database column size limits or impact performance.
6
+ """
7
+
8
+ import os
9
+ from io import BytesIO
10
+ from typing import Any, Optional
11
+
12
+ from loguru import logger
13
+ from sqlalchemy.orm import Session
14
+
15
+ from fides.api.models.storage import StorageConfig, get_active_default_storage_config
16
+ from fides.api.schemas.external_storage import ExternalStorageMetadata
17
+ from fides.api.schemas.storage.storage import StorageDetails, StorageType
18
+ from fides.api.service.storage.gcs import get_gcs_client
19
+ from fides.api.service.storage.s3 import generic_delete_from_s3, generic_upload_to_s3
20
+ from fides.api.service.storage.util import get_local_filename
21
+ from fides.api.util.aws_util import get_s3_client
22
+ from fides.api.util.encryption.aes_gcm_encryption_util import decrypt_data, encrypt_data
23
+
24
+
25
+ class ExternalDataStorageError(Exception):
26
+ """Raised when external data storage operations fail."""
27
+
28
+
29
+ class ExternalDataStorageService:
30
+ """
31
+ Service for storing large encrypted data externally.
32
+
33
+ Handles:
34
+ - Automatic encryption/decryption
35
+ - Multiple storage backends (S3, local, GCS, etc.)
36
+ - Consistent file organization
37
+ - Cleanup operations
38
+ """
39
+
40
+ @staticmethod
41
+ def _get_storage_config(db: Session, storage_key: Optional[str]) -> "StorageConfig":
42
+ """Resolve and return the StorageConfig to use.
43
+
44
+ Preference order:
45
+
46
+ 1. If *storage_key* is provided, fetch that specific configuration.
47
+ 2. Otherwise, fall back to the *active* default storage configuration.
48
+
49
+ Raises ExternalDataStorageError when no suitable configuration is found.
50
+ """
51
+
52
+ if storage_key:
53
+ storage_config = (
54
+ db.query(StorageConfig).filter(StorageConfig.key == storage_key).first()
55
+ )
56
+ if not storage_config:
57
+ msg = f"Storage configuration with key '{storage_key}' not found"
58
+ logger.error(msg)
59
+ raise ExternalDataStorageError(msg)
60
+ return storage_config
61
+
62
+ # No explicit key – use the active default
63
+ storage_config = get_active_default_storage_config(db)
64
+ if not storage_config:
65
+ msg = "No active default storage configuration available for large data"
66
+ logger.error(msg)
67
+ raise ExternalDataStorageError(msg)
68
+
69
+ return storage_config
70
+
71
+ @staticmethod
72
+ def store_data(
73
+ db: Session,
74
+ storage_path: str,
75
+ data: Any,
76
+ storage_key: Optional[str] = None,
77
+ ) -> ExternalStorageMetadata:
78
+ """
79
+ Store data in external storage with encryption.
80
+
81
+ Args:
82
+ db: Database session
83
+ storage_path: Path where data should be stored (e.g., "model/id/field/timestamp")
84
+ data: The data to store (will be serialized and encrypted)
85
+ storage_key: Optional specific storage config key to use
86
+
87
+ Returns:
88
+ ExternalStorageMetadata with storage details
89
+
90
+ Raises:
91
+ ExternalDataStorageError: If storage operation fails
92
+ """
93
+ try:
94
+ storage_config = ExternalDataStorageService._get_storage_config(
95
+ db, storage_key
96
+ )
97
+
98
+ # Serialize and encrypt the data
99
+ encrypted_data = encrypt_data(data)
100
+ file_size = len(encrypted_data)
101
+
102
+ # Store to external storage based on type
103
+ if storage_config.type == StorageType.s3:
104
+ ExternalDataStorageService._store_to_s3(
105
+ storage_config, storage_path, encrypted_data
106
+ )
107
+ elif storage_config.type == StorageType.gcs:
108
+ ExternalDataStorageService._store_to_gcs(
109
+ storage_config, storage_path, encrypted_data
110
+ )
111
+ elif storage_config.type == StorageType.local:
112
+ ExternalDataStorageService._store_to_local(storage_path, encrypted_data)
113
+ else:
114
+ raise ExternalDataStorageError(
115
+ f"Unsupported storage type: {storage_config.type}"
116
+ )
117
+
118
+ # Create and return metadata
119
+ metadata = ExternalStorageMetadata(
120
+ storage_type=StorageType(storage_config.type.value),
121
+ file_key=storage_path,
122
+ filesize=file_size,
123
+ storage_key=storage_config.key,
124
+ )
125
+
126
+ logger.info(
127
+ f"Stored {file_size:,} bytes to {storage_config.type} storage "
128
+ f"at path: {storage_path}"
129
+ )
130
+
131
+ return metadata
132
+
133
+ except Exception as e:
134
+ logger.error(f"Failed to store data externally: {str(e)}")
135
+ raise ExternalDataStorageError(f"Failed to store data: {str(e)}") from e
136
+
137
+ @staticmethod
138
+ def retrieve_data(
139
+ db: Session,
140
+ metadata: ExternalStorageMetadata,
141
+ ) -> Any:
142
+ """
143
+ Retrieve and decrypt data from external storage.
144
+
145
+ Args:
146
+ db: Database session
147
+ metadata: Storage metadata containing location and details
148
+
149
+ Returns:
150
+ Decrypted and deserialized data
151
+
152
+ Raises:
153
+ ExternalDataStorageError: If retrieval operation fails
154
+ """
155
+ try:
156
+ storage_config = ExternalDataStorageService._get_storage_config(
157
+ db, metadata.storage_key
158
+ )
159
+
160
+ # Retrieve encrypted data based on storage type
161
+ storage_type_value = (
162
+ metadata.storage_type.value
163
+ if isinstance(metadata.storage_type, StorageType)
164
+ else metadata.storage_type
165
+ )
166
+
167
+ if storage_type_value == StorageType.s3.value:
168
+ encrypted_data = ExternalDataStorageService._retrieve_from_s3(
169
+ storage_config, metadata
170
+ )
171
+ elif storage_type_value == StorageType.gcs.value:
172
+ encrypted_data = ExternalDataStorageService._retrieve_from_gcs(
173
+ storage_config, metadata
174
+ )
175
+ elif storage_type_value == StorageType.local.value:
176
+ encrypted_data = ExternalDataStorageService._retrieve_from_local(
177
+ metadata
178
+ )
179
+ else:
180
+ raise ExternalDataStorageError(
181
+ f"Unsupported storage type: {storage_type_value}"
182
+ )
183
+
184
+ # Handle case where download returns None
185
+ if encrypted_data is None:
186
+ raise ExternalDataStorageError(
187
+ f"No data found at path: {metadata.file_key}"
188
+ )
189
+
190
+ # Decrypt and deserialize
191
+ data = decrypt_data(encrypted_data)
192
+
193
+ logger.info(
194
+ f"Retrieved {metadata.filesize:,} bytes from {storage_type_value} storage "
195
+ f"at path: {metadata.file_key}"
196
+ )
197
+
198
+ return data
199
+
200
+ except ExternalDataStorageError:
201
+ raise
202
+ except Exception as e:
203
+ logger.error(f"Failed to retrieve data from external storage: {str(e)}")
204
+ raise ExternalDataStorageError(f"Failed to retrieve data: {str(e)}") from e
205
+
206
+ @staticmethod
207
+ def delete_data(
208
+ db: Session,
209
+ metadata: ExternalStorageMetadata,
210
+ ) -> None:
211
+ """
212
+ Delete data from external storage.
213
+
214
+ Args:
215
+ db: Database session
216
+ metadata: Storage metadata containing location
217
+
218
+ Note:
219
+ This operation is best-effort and will log warnings on failure
220
+ rather than raising exceptions, to support cleanup scenarios.
221
+ """
222
+ try:
223
+ storage_config = ExternalDataStorageService._get_storage_config(
224
+ db, metadata.storage_key
225
+ )
226
+
227
+ # Delete from external storage based on type
228
+ storage_type_value = (
229
+ metadata.storage_type.value
230
+ if isinstance(metadata.storage_type, StorageType)
231
+ else metadata.storage_type
232
+ )
233
+
234
+ if storage_type_value == StorageType.s3.value:
235
+ ExternalDataStorageService._delete_from_s3(storage_config, metadata)
236
+ elif storage_type_value == StorageType.gcs.value:
237
+ ExternalDataStorageService._delete_from_gcs(storage_config, metadata)
238
+ elif storage_type_value == StorageType.local.value:
239
+ ExternalDataStorageService._delete_from_local(metadata)
240
+ else:
241
+ logger.warning(
242
+ f"Unsupported storage type for cleanup: {storage_type_value}"
243
+ )
244
+ return
245
+
246
+ logger.info(
247
+ f"Deleted external storage file from {storage_type_value} storage "
248
+ f"at path: {metadata.file_key}"
249
+ )
250
+
251
+ except Exception as e:
252
+ # Log but don't raise - cleanup should be best effort
253
+ logger.warning(
254
+ f"Failed to delete external storage file at {metadata.file_key}: {str(e)}"
255
+ )
256
+
257
+ # Private helper methods for each storage type
258
+
259
+ @staticmethod
260
+ def _store_to_s3(config: StorageConfig, file_key: str, data: bytes) -> None:
261
+ """Store data to S3 using existing generic_upload_to_s3"""
262
+ bucket_name = config.details[StorageDetails.BUCKET.value]
263
+ auth_method = config.details[StorageDetails.AUTH_METHOD.value]
264
+
265
+ document = BytesIO(data)
266
+ generic_upload_to_s3(
267
+ storage_secrets=config.secrets,
268
+ bucket_name=bucket_name,
269
+ file_key=file_key,
270
+ auth_method=auth_method,
271
+ document=document,
272
+ )
273
+
274
+ @staticmethod
275
+ def _store_to_gcs(config: StorageConfig, file_key: str, data: bytes) -> None:
276
+ """Store data to GCS using existing get_gcs_client"""
277
+ bucket_name = config.details[StorageDetails.BUCKET.value]
278
+ auth_method = config.details[StorageDetails.AUTH_METHOD.value]
279
+
280
+ storage_client = get_gcs_client(auth_method, config.secrets)
281
+ bucket = storage_client.bucket(bucket_name)
282
+ blob = bucket.blob(file_key)
283
+
284
+ blob.upload_from_string(data, content_type="application/octet-stream")
285
+
286
+ @staticmethod
287
+ def _store_to_local(file_key: str, data: bytes) -> None:
288
+ """Store data to local filesystem using existing get_local_filename"""
289
+ file_path = get_local_filename(file_key)
290
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
291
+ with open(file_path, "wb") as f:
292
+ f.write(data)
293
+
294
+ @staticmethod
295
+ def _retrieve_from_s3(
296
+ config: StorageConfig, metadata: ExternalStorageMetadata
297
+ ) -> bytes:
298
+ """Retrieve data from S3 directly, bypassing file size limits"""
299
+
300
+ bucket_name = config.details[StorageDetails.BUCKET.value]
301
+ auth_method = config.details[StorageDetails.AUTH_METHOD.value]
302
+
303
+ # Get S3 client directly and download content regardless of file size
304
+ s3_client = get_s3_client(auth_method, config.secrets)
305
+
306
+ try:
307
+ # Download content directly to BytesIO buffer
308
+ file_obj = BytesIO()
309
+ s3_client.download_fileobj(
310
+ Bucket=bucket_name, Key=metadata.file_key, Fileobj=file_obj
311
+ )
312
+ file_obj.seek(0) # Reset file pointer to beginning
313
+ return file_obj.read()
314
+ except Exception as e:
315
+ logger.error(f"Error retrieving file from S3: {e}")
316
+ raise e
317
+
318
+ @staticmethod
319
+ def _retrieve_from_gcs(
320
+ config: StorageConfig, metadata: ExternalStorageMetadata
321
+ ) -> bytes:
322
+ """Retrieve data from GCS using existing get_gcs_client"""
323
+ bucket_name = config.details[StorageDetails.BUCKET.value]
324
+ auth_method = config.details[StorageDetails.AUTH_METHOD.value]
325
+
326
+ storage_client = get_gcs_client(auth_method, config.secrets)
327
+ bucket = storage_client.bucket(bucket_name)
328
+ blob = bucket.blob(metadata.file_key)
329
+ return blob.download_as_bytes()
330
+
331
+ @staticmethod
332
+ def _retrieve_from_local(metadata: ExternalStorageMetadata) -> bytes:
333
+ """Retrieve data from local filesystem"""
334
+ file_path = get_local_filename(metadata.file_key)
335
+ with open(file_path, "rb") as f:
336
+ return f.read()
337
+
338
+ @staticmethod
339
+ def _delete_from_s3(
340
+ config: StorageConfig, metadata: ExternalStorageMetadata
341
+ ) -> None:
342
+ """Delete data from S3 using existing generic_delete_from_s3"""
343
+ bucket_name = config.details[StorageDetails.BUCKET.value]
344
+ auth_method = config.details[StorageDetails.AUTH_METHOD.value]
345
+
346
+ generic_delete_from_s3(
347
+ storage_secrets=config.secrets,
348
+ bucket_name=bucket_name,
349
+ file_key=metadata.file_key,
350
+ auth_method=auth_method,
351
+ )
352
+
353
+ @staticmethod
354
+ def _delete_from_gcs(
355
+ config: StorageConfig, metadata: ExternalStorageMetadata
356
+ ) -> None:
357
+ """Delete data from GCS using existing get_gcs_client"""
358
+ bucket_name = config.details[StorageDetails.BUCKET.value]
359
+ auth_method = config.details[StorageDetails.AUTH_METHOD.value]
360
+
361
+ storage_client = get_gcs_client(auth_method, config.secrets)
362
+ bucket = storage_client.bucket(bucket_name)
363
+ blob = bucket.blob(metadata.file_key)
364
+ blob.delete()
365
+
366
+ @staticmethod
367
+ def _delete_from_local(metadata: ExternalStorageMetadata) -> None:
368
+ """Delete data from local filesystem"""
369
+ file_path = get_local_filename(metadata.file_key)
370
+ if os.path.exists(file_path):
371
+ os.remove(file_path)