ethyca-fides 2.63.0rc3__py2.py3-none-any.whl → 2.63.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/METADATA +1 -1
- {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/RECORD +129 -110
- fides/_version.py +3 -3
- fides/api/alembic/migrations/versions/29e56fa1fdb3_add_monitor_tasks.py +147 -0
- fides/api/alembic/migrations/versions/5efcdf18438e_add_manual_task_tables.py +160 -0
- fides/api/api/v1/endpoints/privacy_request_endpoints.py +4 -4
- fides/api/db/base.py +7 -1
- fides/api/models/connectionconfig.py +1 -1
- fides/api/models/detection_discovery/__init__.py +35 -0
- fides/api/models/detection_discovery/monitor_task.py +162 -0
- fides/api/models/field_types/__init__.py +5 -0
- fides/api/models/field_types/encrypted_large_data.py +151 -0
- fides/api/models/manual_tasks/__init__.py +8 -0
- fides/api/models/manual_tasks/manual_task.py +110 -0
- fides/api/models/manual_tasks/manual_task_log.py +100 -0
- fides/api/models/privacy_preference.py +1 -1
- fides/api/models/privacy_request/execution_log.py +3 -31
- fides/api/models/privacy_request/privacy_request.py +16 -3
- fides/api/models/privacy_request/request_task.py +36 -25
- fides/api/models/worker_task.py +96 -0
- fides/api/schemas/external_storage.py +22 -0
- fides/api/schemas/manual_tasks/__init__.py +0 -0
- fides/api/schemas/manual_tasks/manual_task_schemas.py +79 -0
- fides/api/schemas/manual_tasks/manual_task_status.py +151 -0
- fides/api/schemas/privacy_request.py +1 -12
- fides/api/service/connectors/base_erasure_email_connector.py +1 -1
- fides/api/service/connectors/consent_email_connector.py +2 -1
- fides/api/service/connectors/dynamic_erasure_email_connector.py +2 -1
- fides/api/service/connectors/erasure_email_connector.py +1 -1
- fides/api/service/external_data_storage.py +371 -0
- fides/api/service/privacy_request/request_runner_service.py +5 -5
- fides/api/service/privacy_request/request_service.py +1 -1
- fides/api/task/create_request_tasks.py +1 -1
- fides/api/task/execute_request_tasks.py +9 -8
- fides/api/task/graph_task.py +22 -10
- fides/api/util/consent_util.py +1 -1
- fides/api/util/data_size.py +102 -0
- fides/api/util/encryption/aes_gcm_encryption_util.py +271 -0
- fides/service/manual_tasks/__init__.py +0 -0
- fides/service/manual_tasks/manual_task_service.py +150 -0
- fides/service/privacy_request/privacy_request_service.py +1 -1
- fides/ui-build/static/admin/404.html +1 -1
- fides/ui-build/static/admin/add-systems/manual.html +1 -1
- fides/ui-build/static/admin/add-systems/multiple.html +1 -1
- fides/ui-build/static/admin/add-systems.html +1 -1
- fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
- fides/ui-build/static/admin/consent/configure.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
- fides/ui-build/static/admin/consent/properties.html +1 -1
- fides/ui-build/static/admin/consent/reporting.html +1 -1
- fides/ui-build/static/admin/consent.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
- fides/ui-build/static/admin/data-catalog.html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
- fides/ui-build/static/admin/data-discovery/activity.html +1 -1
- fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/detection.html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
- fides/ui-build/static/admin/datamap.html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
- fides/ui-build/static/admin/dataset/new.html +1 -1
- fides/ui-build/static/admin/dataset.html +1 -1
- fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
- fides/ui-build/static/admin/datastore-connection/new.html +1 -1
- fides/ui-build/static/admin/datastore-connection.html +1 -1
- fides/ui-build/static/admin/index.html +1 -1
- fides/ui-build/static/admin/integrations/[id].html +1 -1
- fides/ui-build/static/admin/integrations.html +1 -1
- fides/ui-build/static/admin/login/[provider].html +1 -1
- fides/ui-build/static/admin/login.html +1 -1
- fides/ui-build/static/admin/messaging/[id].html +1 -1
- fides/ui-build/static/admin/messaging/add-template.html +1 -1
- fides/ui-build/static/admin/messaging.html +1 -1
- fides/ui-build/static/admin/poc/ant-components.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
- fides/ui-build/static/admin/poc/forms.html +1 -1
- fides/ui-build/static/admin/poc/table-migration.html +1 -1
- fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
- fides/ui-build/static/admin/privacy-requests.html +1 -1
- fides/ui-build/static/admin/properties/[id].html +1 -1
- fides/ui-build/static/admin/properties/add-property.html +1 -1
- fides/ui-build/static/admin/properties.html +1 -1
- fides/ui-build/static/admin/reporting/datamap.html +1 -1
- fides/ui-build/static/admin/settings/about/alpha.html +1 -1
- fides/ui-build/static/admin/settings/about.html +1 -1
- fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
- fides/ui-build/static/admin/settings/consent.html +1 -1
- fides/ui-build/static/admin/settings/custom-fields.html +1 -1
- fides/ui-build/static/admin/settings/domain-records.html +1 -1
- fides/ui-build/static/admin/settings/domains.html +1 -1
- fides/ui-build/static/admin/settings/email-templates.html +1 -1
- fides/ui-build/static/admin/settings/locations.html +1 -1
- fides/ui-build/static/admin/settings/organization.html +1 -1
- fides/ui-build/static/admin/settings/regulations.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id].html +1 -1
- fides/ui-build/static/admin/systems.html +1 -1
- fides/ui-build/static/admin/taxonomy.html +1 -1
- fides/ui-build/static/admin/user-management/new.html +1 -1
- fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
- fides/ui-build/static/admin/user-management.html +1 -1
- {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/WHEEL +0 -0
- {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/entry_points.txt +0 -0
- {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/licenses/LICENSE +0 -0
- {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/top_level.txt +0 -0
- /fides/api/models/{detection_discovery.py → detection_discovery/core.py} +0 -0
- /fides/ui-build/static/admin/_next/static/{XobHpfndIH7IpV30u2vGV → SZn_Fpr_qG1COMjkdloep}/_buildManifest.js +0 -0
- /fides/ui-build/static/admin/_next/static/{XobHpfndIH7IpV30u2vGV → SZn_Fpr_qG1COMjkdloep}/_ssgManifest.js +0 -0
@@ -0,0 +1,79 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
from enum import Enum
|
3
|
+
from typing import Annotated, Any, Optional
|
4
|
+
|
5
|
+
from pydantic import ConfigDict, Field
|
6
|
+
|
7
|
+
from fides.api.schemas.base_class import FidesSchema
|
8
|
+
|
9
|
+
|
10
|
+
class ManualTaskType(str, Enum):
|
11
|
+
"""Enum for manual task types."""
|
12
|
+
|
13
|
+
privacy_request = "privacy_request"
|
14
|
+
# Add more task types as needed
|
15
|
+
|
16
|
+
|
17
|
+
class ManualTaskParentEntityType(str, Enum):
|
18
|
+
"""Enum for manual task parent entity types."""
|
19
|
+
|
20
|
+
connection_config = (
|
21
|
+
"connection_config" # used for access and erasure privacy requests
|
22
|
+
)
|
23
|
+
# Add more parent entity types as needed
|
24
|
+
|
25
|
+
|
26
|
+
class ManualTaskReferenceType(str, Enum):
|
27
|
+
"""Enum for manual task reference types."""
|
28
|
+
|
29
|
+
privacy_request = "privacy_request"
|
30
|
+
connection_config = "connection_config"
|
31
|
+
manual_task_config = "manual_task_config"
|
32
|
+
assigned_user = "assigned_user" # Reference to the user assigned to the task
|
33
|
+
# Add more reference types as needed
|
34
|
+
|
35
|
+
|
36
|
+
class ManualTaskLogStatus(str, Enum):
|
37
|
+
"""Enum for manual task log status."""
|
38
|
+
|
39
|
+
created = "created"
|
40
|
+
updated = "updated"
|
41
|
+
in_processing = "in_processing"
|
42
|
+
complete = "complete"
|
43
|
+
error = "error"
|
44
|
+
retrying = "retrying"
|
45
|
+
paused = "paused"
|
46
|
+
awaiting_input = "awaiting_input"
|
47
|
+
|
48
|
+
|
49
|
+
class ManualTaskLogCreate(FidesSchema):
|
50
|
+
"""Schema for creating a manual task log entry."""
|
51
|
+
|
52
|
+
model_config = ConfigDict(extra="forbid")
|
53
|
+
|
54
|
+
task_id: Annotated[str, Field(..., description="ID of the task")]
|
55
|
+
status: Annotated[ManualTaskLogStatus, Field(..., description="Log status")]
|
56
|
+
message: Annotated[Optional[str], Field(None, description="Log message")]
|
57
|
+
details: Annotated[
|
58
|
+
Optional[dict[str, Any]], Field(None, description="Additional details")
|
59
|
+
]
|
60
|
+
config_id: Annotated[Optional[str], Field(None, description="Configuration ID")]
|
61
|
+
instance_id: Annotated[Optional[str], Field(None, description="Instance ID")]
|
62
|
+
|
63
|
+
|
64
|
+
class ManualTaskLogResponse(FidesSchema):
|
65
|
+
"""Schema for manual task log response."""
|
66
|
+
|
67
|
+
model_config = ConfigDict(extra="forbid")
|
68
|
+
|
69
|
+
id: Annotated[str, Field(..., description="Log ID")]
|
70
|
+
task_id: Annotated[str, Field(..., description="Task ID")]
|
71
|
+
status: Annotated[ManualTaskLogStatus, Field(..., description="Log status")]
|
72
|
+
message: Annotated[Optional[str], Field(None, description="Log message")]
|
73
|
+
details: Annotated[
|
74
|
+
Optional[dict[str, Any]], Field(None, description="Additional details")
|
75
|
+
]
|
76
|
+
config_id: Annotated[Optional[str], Field(None, description="Configuration ID")]
|
77
|
+
instance_id: Annotated[Optional[str], Field(None, description="Instance ID")]
|
78
|
+
created_at: Annotated[datetime, Field(..., description="Creation timestamp")]
|
79
|
+
updated_at: Annotated[datetime, Field(..., description="Last update timestamp")]
|
@@ -0,0 +1,151 @@
|
|
1
|
+
from datetime import datetime, timezone
|
2
|
+
from enum import Enum as EnumType
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from sqlalchemy.orm import Session
|
6
|
+
|
7
|
+
|
8
|
+
class StatusTransitionNotAllowed(Exception):
|
9
|
+
"""Exception raised when a status transition is not allowed."""
|
10
|
+
|
11
|
+
def __init__(self, message: str):
|
12
|
+
self.message = message
|
13
|
+
super().__init__(self.message)
|
14
|
+
|
15
|
+
|
16
|
+
class StatusType(str, EnumType):
|
17
|
+
"""Enum for manual task status."""
|
18
|
+
|
19
|
+
pending = "pending"
|
20
|
+
in_progress = "in_progress"
|
21
|
+
completed = "completed"
|
22
|
+
failed = "failed"
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
def get_valid_transitions(cls, current_status: "StatusType") -> list["StatusType"]:
|
26
|
+
"""Get valid transitions from the current status.
|
27
|
+
|
28
|
+
Args:
|
29
|
+
current_status: The current status
|
30
|
+
|
31
|
+
Returns:
|
32
|
+
list[StatusType]: List of valid transitions
|
33
|
+
"""
|
34
|
+
if current_status == cls.pending:
|
35
|
+
return [cls.in_progress, cls.failed]
|
36
|
+
if current_status == cls.in_progress:
|
37
|
+
return [cls.completed, cls.failed]
|
38
|
+
if current_status == cls.completed:
|
39
|
+
return []
|
40
|
+
if current_status == cls.failed:
|
41
|
+
return [cls.pending, cls.in_progress]
|
42
|
+
return []
|
43
|
+
|
44
|
+
|
45
|
+
class StatusTransitionMixin:
|
46
|
+
"""Mixin for handling status transitions.
|
47
|
+
|
48
|
+
This mixin provides methods for managing status transitions and completion tracking.
|
49
|
+
It can be used by any model that needs status management.
|
50
|
+
"""
|
51
|
+
|
52
|
+
# These should be overridden by the implementing class
|
53
|
+
status: StatusType
|
54
|
+
completed_at: Optional[datetime]
|
55
|
+
completed_by_id: Optional[str]
|
56
|
+
|
57
|
+
def _get_valid_transitions(self) -> list[StatusType]:
|
58
|
+
"""Get valid transitions from the current status.
|
59
|
+
|
60
|
+
Returns:
|
61
|
+
list[StatusType]: List of valid transitions
|
62
|
+
"""
|
63
|
+
return StatusType.get_valid_transitions(self.status)
|
64
|
+
|
65
|
+
def _validate_status_transition(self, new_status: StatusType) -> None:
|
66
|
+
"""Validate that a status transition is allowed.
|
67
|
+
|
68
|
+
Args:
|
69
|
+
new_status: The new status to transition to
|
70
|
+
|
71
|
+
Raises:
|
72
|
+
StatusTransitionNotAllowed: If the transition is not allowed
|
73
|
+
"""
|
74
|
+
# Don't allow transitions to the same status
|
75
|
+
if new_status == self.status:
|
76
|
+
raise StatusTransitionNotAllowed(
|
77
|
+
f"Invalid status transition: already in status {new_status}"
|
78
|
+
)
|
79
|
+
|
80
|
+
# Get valid transitions for current status
|
81
|
+
valid_transitions = self._get_valid_transitions()
|
82
|
+
if new_status not in valid_transitions:
|
83
|
+
raise StatusTransitionNotAllowed(
|
84
|
+
f"Invalid status transition from {self.status} to {new_status}. "
|
85
|
+
f"Valid transitions are: {valid_transitions}"
|
86
|
+
)
|
87
|
+
|
88
|
+
def update_status(
|
89
|
+
self, db: Session, new_status: StatusType, user_id: Optional[str] = None
|
90
|
+
) -> None:
|
91
|
+
"""Update the status with validation and completion handling.
|
92
|
+
|
93
|
+
Args:
|
94
|
+
db: Database session
|
95
|
+
new_status: New status to set
|
96
|
+
user_id: Optional user ID who is making the change
|
97
|
+
"""
|
98
|
+
self._validate_status_transition(new_status)
|
99
|
+
|
100
|
+
if new_status == StatusType.completed:
|
101
|
+
self.completed_at = datetime.now(timezone.utc)
|
102
|
+
self.completed_by_id = user_id
|
103
|
+
elif new_status == StatusType.pending:
|
104
|
+
# Reset completion fields if going back to pending
|
105
|
+
self.completed_at = None
|
106
|
+
self.completed_by_id = None
|
107
|
+
|
108
|
+
self.status = new_status
|
109
|
+
db.add(self)
|
110
|
+
db.commit()
|
111
|
+
|
112
|
+
def mark_completed(self, db: Session, user_id: str) -> None:
|
113
|
+
"""Mark as completed.
|
114
|
+
|
115
|
+
Args:
|
116
|
+
db: Database session
|
117
|
+
user_id: user ID who completed the task
|
118
|
+
"""
|
119
|
+
self.update_status(db, StatusType.completed, user_id)
|
120
|
+
|
121
|
+
def mark_failed(self, db: Session) -> None:
|
122
|
+
"""Mark as failed."""
|
123
|
+
self.update_status(db, StatusType.failed)
|
124
|
+
|
125
|
+
def start_progress(self, db: Session) -> None:
|
126
|
+
"""Mark as in progress."""
|
127
|
+
self.update_status(db, StatusType.in_progress)
|
128
|
+
|
129
|
+
def reset_to_pending(self, db: Session) -> None:
|
130
|
+
"""Reset to pending status."""
|
131
|
+
self.update_status(db, StatusType.pending)
|
132
|
+
|
133
|
+
@property
|
134
|
+
def is_completed(self) -> bool:
|
135
|
+
"""Check if completed."""
|
136
|
+
return self.status == StatusType.completed
|
137
|
+
|
138
|
+
@property
|
139
|
+
def is_failed(self) -> bool:
|
140
|
+
"""Check if failed."""
|
141
|
+
return self.status == StatusType.failed
|
142
|
+
|
143
|
+
@property
|
144
|
+
def is_in_progress(self) -> bool:
|
145
|
+
"""Check if in progress."""
|
146
|
+
return self.status == StatusType.in_progress
|
147
|
+
|
148
|
+
@property
|
149
|
+
def is_pending(self) -> bool:
|
150
|
+
"""Check if pending."""
|
151
|
+
return self.status == StatusType.pending
|
@@ -8,6 +8,7 @@ from pydantic import ConfigDict, Field, field_serializer, field_validator
|
|
8
8
|
from fides.api.custom_types import SafeStr
|
9
9
|
from fides.api.graph.config import CollectionAddress
|
10
10
|
from fides.api.models.audit_log import AuditLogAction
|
11
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
11
12
|
from fides.api.schemas.api import BulkResponse, BulkUpdateFailed
|
12
13
|
from fides.api.schemas.base_class import FidesSchema
|
13
14
|
from fides.api.schemas.policy import ActionType, CurrentStep
|
@@ -141,18 +142,6 @@ class FieldsAffectedResponse(FidesSchema):
|
|
141
142
|
model_config = ConfigDict(from_attributes=True, use_enum_values=True)
|
142
143
|
|
143
144
|
|
144
|
-
class ExecutionLogStatus(EnumType):
|
145
|
-
"""Enum for execution log statuses, reflecting where they are in their workflow"""
|
146
|
-
|
147
|
-
in_processing = "in_processing"
|
148
|
-
pending = "pending"
|
149
|
-
complete = "complete"
|
150
|
-
error = "error"
|
151
|
-
awaiting_processing = "paused" # "paused" in the database to avoid a migration, but use "awaiting_processing" in the app
|
152
|
-
retrying = "retrying"
|
153
|
-
skipped = "skipped"
|
154
|
-
|
155
|
-
|
156
145
|
class ExecutionLogStatusSerializeOverride(FidesSchema):
|
157
146
|
"""Override to serialize "paused" Execution Logs as awaiting_processing instead"""
|
158
147
|
|
@@ -5,6 +5,7 @@ from sqlalchemy.orm import Session
|
|
5
5
|
from fides.api.models.connectionconfig import ConnectionConfig, ConnectionType
|
6
6
|
from fides.api.models.policy import Rule
|
7
7
|
from fides.api.models.privacy_request import ExecutionLog, PrivacyRequest
|
8
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
8
9
|
from fides.api.schemas.connection_configuration.connection_secrets_email import (
|
9
10
|
AdvancedSettings,
|
10
11
|
BaseEmailSchema,
|
@@ -15,7 +16,6 @@ from fides.api.schemas.messaging.messaging import (
|
|
15
16
|
MessagingActionType,
|
16
17
|
)
|
17
18
|
from fides.api.schemas.policy import ActionType
|
18
|
-
from fides.api.schemas.privacy_request import ExecutionLogStatus
|
19
19
|
from fides.api.schemas.redis_cache import Identity
|
20
20
|
from fides.api.service.connectors.base_email_connector import (
|
21
21
|
BaseEmailConnector,
|
@@ -16,6 +16,7 @@ from fides.api.models.privacy_notice import (
|
|
16
16
|
)
|
17
17
|
from fides.api.models.privacy_preference import PrivacyPreferenceHistory
|
18
18
|
from fides.api.models.privacy_request import ExecutionLog, PrivacyRequest
|
19
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
19
20
|
from fides.api.schemas.connection_configuration.connection_secrets_email import (
|
20
21
|
AdvancedSettingsWithExtendedIdentityTypes,
|
21
22
|
ExtendedEmailSchema,
|
@@ -29,7 +30,7 @@ from fides.api.schemas.messaging.messaging import (
|
|
29
30
|
from fides.api.schemas.policy import ActionType
|
30
31
|
from fides.api.schemas.privacy_notice import PrivacyNoticeHistorySchema
|
31
32
|
from fides.api.schemas.privacy_preference import MinimalPrivacyPreferenceHistorySchema
|
32
|
-
from fides.api.schemas.privacy_request import Consent
|
33
|
+
from fides.api.schemas.privacy_request import Consent
|
33
34
|
from fides.api.schemas.redis_cache import Identity
|
34
35
|
from fides.api.service.connectors.base_email_connector import (
|
35
36
|
BaseEmailConnector,
|
@@ -17,11 +17,12 @@ from fides.api.models.privacy_request import (
|
|
17
17
|
RequestTask,
|
18
18
|
TraversalDetails,
|
19
19
|
)
|
20
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
20
21
|
from fides.api.schemas.connection_configuration.connection_secrets_dynamic_erasure_email import (
|
21
22
|
DynamicErasureEmailSchema,
|
22
23
|
)
|
23
24
|
from fides.api.schemas.policy import ActionType
|
24
|
-
from fides.api.schemas.privacy_request import
|
25
|
+
from fides.api.schemas.privacy_request import PrivacyRequestStatus
|
25
26
|
from fides.api.service.connectors.base_connector import BaseConnector
|
26
27
|
from fides.api.service.connectors.base_erasure_email_connector import (
|
27
28
|
BaseErasureEmailConnector,
|
@@ -10,9 +10,9 @@ from fides.api.models.connectionconfig import (
|
|
10
10
|
ConnectionType,
|
11
11
|
)
|
12
12
|
from fides.api.models.privacy_request import ExecutionLog
|
13
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
13
14
|
from fides.api.schemas.connection_configuration import EmailSchema
|
14
15
|
from fides.api.schemas.policy import ActionType
|
15
|
-
from fides.api.schemas.privacy_request import ExecutionLogStatus
|
16
16
|
from fides.api.service.connectors.base_erasure_email_connector import (
|
17
17
|
BaseErasureEmailConnector,
|
18
18
|
filter_user_identities_for_connector,
|
@@ -0,0 +1,371 @@
|
|
1
|
+
"""
|
2
|
+
Service for handling external storage of large encrypted data.
|
3
|
+
|
4
|
+
This service provides a generic interface for storing large data that would
|
5
|
+
otherwise exceed database column size limits or impact performance.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import os
|
9
|
+
from io import BytesIO
|
10
|
+
from typing import Any, Optional
|
11
|
+
|
12
|
+
from loguru import logger
|
13
|
+
from sqlalchemy.orm import Session
|
14
|
+
|
15
|
+
from fides.api.models.storage import StorageConfig, get_active_default_storage_config
|
16
|
+
from fides.api.schemas.external_storage import ExternalStorageMetadata
|
17
|
+
from fides.api.schemas.storage.storage import StorageDetails, StorageType
|
18
|
+
from fides.api.service.storage.gcs import get_gcs_client
|
19
|
+
from fides.api.service.storage.s3 import generic_delete_from_s3, generic_upload_to_s3
|
20
|
+
from fides.api.service.storage.util import get_local_filename
|
21
|
+
from fides.api.util.aws_util import get_s3_client
|
22
|
+
from fides.api.util.encryption.aes_gcm_encryption_util import decrypt_data, encrypt_data
|
23
|
+
|
24
|
+
|
25
|
+
class ExternalDataStorageError(Exception):
|
26
|
+
"""Raised when external data storage operations fail."""
|
27
|
+
|
28
|
+
|
29
|
+
class ExternalDataStorageService:
|
30
|
+
"""
|
31
|
+
Service for storing large encrypted data externally.
|
32
|
+
|
33
|
+
Handles:
|
34
|
+
- Automatic encryption/decryption
|
35
|
+
- Multiple storage backends (S3, local, GCS, etc.)
|
36
|
+
- Consistent file organization
|
37
|
+
- Cleanup operations
|
38
|
+
"""
|
39
|
+
|
40
|
+
@staticmethod
|
41
|
+
def _get_storage_config(db: Session, storage_key: Optional[str]) -> "StorageConfig":
|
42
|
+
"""Resolve and return the StorageConfig to use.
|
43
|
+
|
44
|
+
Preference order:
|
45
|
+
|
46
|
+
1. If *storage_key* is provided, fetch that specific configuration.
|
47
|
+
2. Otherwise, fall back to the *active* default storage configuration.
|
48
|
+
|
49
|
+
Raises ExternalDataStorageError when no suitable configuration is found.
|
50
|
+
"""
|
51
|
+
|
52
|
+
if storage_key:
|
53
|
+
storage_config = (
|
54
|
+
db.query(StorageConfig).filter(StorageConfig.key == storage_key).first()
|
55
|
+
)
|
56
|
+
if not storage_config:
|
57
|
+
msg = f"Storage configuration with key '{storage_key}' not found"
|
58
|
+
logger.error(msg)
|
59
|
+
raise ExternalDataStorageError(msg)
|
60
|
+
return storage_config
|
61
|
+
|
62
|
+
# No explicit key – use the active default
|
63
|
+
storage_config = get_active_default_storage_config(db)
|
64
|
+
if not storage_config:
|
65
|
+
msg = "No active default storage configuration available for large data"
|
66
|
+
logger.error(msg)
|
67
|
+
raise ExternalDataStorageError(msg)
|
68
|
+
|
69
|
+
return storage_config
|
70
|
+
|
71
|
+
@staticmethod
|
72
|
+
def store_data(
|
73
|
+
db: Session,
|
74
|
+
storage_path: str,
|
75
|
+
data: Any,
|
76
|
+
storage_key: Optional[str] = None,
|
77
|
+
) -> ExternalStorageMetadata:
|
78
|
+
"""
|
79
|
+
Store data in external storage with encryption.
|
80
|
+
|
81
|
+
Args:
|
82
|
+
db: Database session
|
83
|
+
storage_path: Path where data should be stored (e.g., "model/id/field/timestamp")
|
84
|
+
data: The data to store (will be serialized and encrypted)
|
85
|
+
storage_key: Optional specific storage config key to use
|
86
|
+
|
87
|
+
Returns:
|
88
|
+
ExternalStorageMetadata with storage details
|
89
|
+
|
90
|
+
Raises:
|
91
|
+
ExternalDataStorageError: If storage operation fails
|
92
|
+
"""
|
93
|
+
try:
|
94
|
+
storage_config = ExternalDataStorageService._get_storage_config(
|
95
|
+
db, storage_key
|
96
|
+
)
|
97
|
+
|
98
|
+
# Serialize and encrypt the data
|
99
|
+
encrypted_data = encrypt_data(data)
|
100
|
+
file_size = len(encrypted_data)
|
101
|
+
|
102
|
+
# Store to external storage based on type
|
103
|
+
if storage_config.type == StorageType.s3:
|
104
|
+
ExternalDataStorageService._store_to_s3(
|
105
|
+
storage_config, storage_path, encrypted_data
|
106
|
+
)
|
107
|
+
elif storage_config.type == StorageType.gcs:
|
108
|
+
ExternalDataStorageService._store_to_gcs(
|
109
|
+
storage_config, storage_path, encrypted_data
|
110
|
+
)
|
111
|
+
elif storage_config.type == StorageType.local:
|
112
|
+
ExternalDataStorageService._store_to_local(storage_path, encrypted_data)
|
113
|
+
else:
|
114
|
+
raise ExternalDataStorageError(
|
115
|
+
f"Unsupported storage type: {storage_config.type}"
|
116
|
+
)
|
117
|
+
|
118
|
+
# Create and return metadata
|
119
|
+
metadata = ExternalStorageMetadata(
|
120
|
+
storage_type=StorageType(storage_config.type.value),
|
121
|
+
file_key=storage_path,
|
122
|
+
filesize=file_size,
|
123
|
+
storage_key=storage_config.key,
|
124
|
+
)
|
125
|
+
|
126
|
+
logger.info(
|
127
|
+
f"Stored {file_size:,} bytes to {storage_config.type} storage "
|
128
|
+
f"at path: {storage_path}"
|
129
|
+
)
|
130
|
+
|
131
|
+
return metadata
|
132
|
+
|
133
|
+
except Exception as e:
|
134
|
+
logger.error(f"Failed to store data externally: {str(e)}")
|
135
|
+
raise ExternalDataStorageError(f"Failed to store data: {str(e)}") from e
|
136
|
+
|
137
|
+
@staticmethod
|
138
|
+
def retrieve_data(
|
139
|
+
db: Session,
|
140
|
+
metadata: ExternalStorageMetadata,
|
141
|
+
) -> Any:
|
142
|
+
"""
|
143
|
+
Retrieve and decrypt data from external storage.
|
144
|
+
|
145
|
+
Args:
|
146
|
+
db: Database session
|
147
|
+
metadata: Storage metadata containing location and details
|
148
|
+
|
149
|
+
Returns:
|
150
|
+
Decrypted and deserialized data
|
151
|
+
|
152
|
+
Raises:
|
153
|
+
ExternalDataStorageError: If retrieval operation fails
|
154
|
+
"""
|
155
|
+
try:
|
156
|
+
storage_config = ExternalDataStorageService._get_storage_config(
|
157
|
+
db, metadata.storage_key
|
158
|
+
)
|
159
|
+
|
160
|
+
# Retrieve encrypted data based on storage type
|
161
|
+
storage_type_value = (
|
162
|
+
metadata.storage_type.value
|
163
|
+
if isinstance(metadata.storage_type, StorageType)
|
164
|
+
else metadata.storage_type
|
165
|
+
)
|
166
|
+
|
167
|
+
if storage_type_value == StorageType.s3.value:
|
168
|
+
encrypted_data = ExternalDataStorageService._retrieve_from_s3(
|
169
|
+
storage_config, metadata
|
170
|
+
)
|
171
|
+
elif storage_type_value == StorageType.gcs.value:
|
172
|
+
encrypted_data = ExternalDataStorageService._retrieve_from_gcs(
|
173
|
+
storage_config, metadata
|
174
|
+
)
|
175
|
+
elif storage_type_value == StorageType.local.value:
|
176
|
+
encrypted_data = ExternalDataStorageService._retrieve_from_local(
|
177
|
+
metadata
|
178
|
+
)
|
179
|
+
else:
|
180
|
+
raise ExternalDataStorageError(
|
181
|
+
f"Unsupported storage type: {storage_type_value}"
|
182
|
+
)
|
183
|
+
|
184
|
+
# Handle case where download returns None
|
185
|
+
if encrypted_data is None:
|
186
|
+
raise ExternalDataStorageError(
|
187
|
+
f"No data found at path: {metadata.file_key}"
|
188
|
+
)
|
189
|
+
|
190
|
+
# Decrypt and deserialize
|
191
|
+
data = decrypt_data(encrypted_data)
|
192
|
+
|
193
|
+
logger.info(
|
194
|
+
f"Retrieved {metadata.filesize:,} bytes from {storage_type_value} storage "
|
195
|
+
f"at path: {metadata.file_key}"
|
196
|
+
)
|
197
|
+
|
198
|
+
return data
|
199
|
+
|
200
|
+
except ExternalDataStorageError:
|
201
|
+
raise
|
202
|
+
except Exception as e:
|
203
|
+
logger.error(f"Failed to retrieve data from external storage: {str(e)}")
|
204
|
+
raise ExternalDataStorageError(f"Failed to retrieve data: {str(e)}") from e
|
205
|
+
|
206
|
+
@staticmethod
|
207
|
+
def delete_data(
|
208
|
+
db: Session,
|
209
|
+
metadata: ExternalStorageMetadata,
|
210
|
+
) -> None:
|
211
|
+
"""
|
212
|
+
Delete data from external storage.
|
213
|
+
|
214
|
+
Args:
|
215
|
+
db: Database session
|
216
|
+
metadata: Storage metadata containing location
|
217
|
+
|
218
|
+
Note:
|
219
|
+
This operation is best-effort and will log warnings on failure
|
220
|
+
rather than raising exceptions, to support cleanup scenarios.
|
221
|
+
"""
|
222
|
+
try:
|
223
|
+
storage_config = ExternalDataStorageService._get_storage_config(
|
224
|
+
db, metadata.storage_key
|
225
|
+
)
|
226
|
+
|
227
|
+
# Delete from external storage based on type
|
228
|
+
storage_type_value = (
|
229
|
+
metadata.storage_type.value
|
230
|
+
if isinstance(metadata.storage_type, StorageType)
|
231
|
+
else metadata.storage_type
|
232
|
+
)
|
233
|
+
|
234
|
+
if storage_type_value == StorageType.s3.value:
|
235
|
+
ExternalDataStorageService._delete_from_s3(storage_config, metadata)
|
236
|
+
elif storage_type_value == StorageType.gcs.value:
|
237
|
+
ExternalDataStorageService._delete_from_gcs(storage_config, metadata)
|
238
|
+
elif storage_type_value == StorageType.local.value:
|
239
|
+
ExternalDataStorageService._delete_from_local(metadata)
|
240
|
+
else:
|
241
|
+
logger.warning(
|
242
|
+
f"Unsupported storage type for cleanup: {storage_type_value}"
|
243
|
+
)
|
244
|
+
return
|
245
|
+
|
246
|
+
logger.info(
|
247
|
+
f"Deleted external storage file from {storage_type_value} storage "
|
248
|
+
f"at path: {metadata.file_key}"
|
249
|
+
)
|
250
|
+
|
251
|
+
except Exception as e:
|
252
|
+
# Log but don't raise - cleanup should be best effort
|
253
|
+
logger.warning(
|
254
|
+
f"Failed to delete external storage file at {metadata.file_key}: {str(e)}"
|
255
|
+
)
|
256
|
+
|
257
|
+
# Private helper methods for each storage type
|
258
|
+
|
259
|
+
@staticmethod
|
260
|
+
def _store_to_s3(config: StorageConfig, file_key: str, data: bytes) -> None:
|
261
|
+
"""Store data to S3 using existing generic_upload_to_s3"""
|
262
|
+
bucket_name = config.details[StorageDetails.BUCKET.value]
|
263
|
+
auth_method = config.details[StorageDetails.AUTH_METHOD.value]
|
264
|
+
|
265
|
+
document = BytesIO(data)
|
266
|
+
generic_upload_to_s3(
|
267
|
+
storage_secrets=config.secrets,
|
268
|
+
bucket_name=bucket_name,
|
269
|
+
file_key=file_key,
|
270
|
+
auth_method=auth_method,
|
271
|
+
document=document,
|
272
|
+
)
|
273
|
+
|
274
|
+
@staticmethod
|
275
|
+
def _store_to_gcs(config: StorageConfig, file_key: str, data: bytes) -> None:
|
276
|
+
"""Store data to GCS using existing get_gcs_client"""
|
277
|
+
bucket_name = config.details[StorageDetails.BUCKET.value]
|
278
|
+
auth_method = config.details[StorageDetails.AUTH_METHOD.value]
|
279
|
+
|
280
|
+
storage_client = get_gcs_client(auth_method, config.secrets)
|
281
|
+
bucket = storage_client.bucket(bucket_name)
|
282
|
+
blob = bucket.blob(file_key)
|
283
|
+
|
284
|
+
blob.upload_from_string(data, content_type="application/octet-stream")
|
285
|
+
|
286
|
+
@staticmethod
|
287
|
+
def _store_to_local(file_key: str, data: bytes) -> None:
|
288
|
+
"""Store data to local filesystem using existing get_local_filename"""
|
289
|
+
file_path = get_local_filename(file_key)
|
290
|
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
291
|
+
with open(file_path, "wb") as f:
|
292
|
+
f.write(data)
|
293
|
+
|
294
|
+
@staticmethod
|
295
|
+
def _retrieve_from_s3(
|
296
|
+
config: StorageConfig, metadata: ExternalStorageMetadata
|
297
|
+
) -> bytes:
|
298
|
+
"""Retrieve data from S3 directly, bypassing file size limits"""
|
299
|
+
|
300
|
+
bucket_name = config.details[StorageDetails.BUCKET.value]
|
301
|
+
auth_method = config.details[StorageDetails.AUTH_METHOD.value]
|
302
|
+
|
303
|
+
# Get S3 client directly and download content regardless of file size
|
304
|
+
s3_client = get_s3_client(auth_method, config.secrets)
|
305
|
+
|
306
|
+
try:
|
307
|
+
# Download content directly to BytesIO buffer
|
308
|
+
file_obj = BytesIO()
|
309
|
+
s3_client.download_fileobj(
|
310
|
+
Bucket=bucket_name, Key=metadata.file_key, Fileobj=file_obj
|
311
|
+
)
|
312
|
+
file_obj.seek(0) # Reset file pointer to beginning
|
313
|
+
return file_obj.read()
|
314
|
+
except Exception as e:
|
315
|
+
logger.error(f"Error retrieving file from S3: {e}")
|
316
|
+
raise e
|
317
|
+
|
318
|
+
@staticmethod
|
319
|
+
def _retrieve_from_gcs(
|
320
|
+
config: StorageConfig, metadata: ExternalStorageMetadata
|
321
|
+
) -> bytes:
|
322
|
+
"""Retrieve data from GCS using existing get_gcs_client"""
|
323
|
+
bucket_name = config.details[StorageDetails.BUCKET.value]
|
324
|
+
auth_method = config.details[StorageDetails.AUTH_METHOD.value]
|
325
|
+
|
326
|
+
storage_client = get_gcs_client(auth_method, config.secrets)
|
327
|
+
bucket = storage_client.bucket(bucket_name)
|
328
|
+
blob = bucket.blob(metadata.file_key)
|
329
|
+
return blob.download_as_bytes()
|
330
|
+
|
331
|
+
@staticmethod
|
332
|
+
def _retrieve_from_local(metadata: ExternalStorageMetadata) -> bytes:
|
333
|
+
"""Retrieve data from local filesystem"""
|
334
|
+
file_path = get_local_filename(metadata.file_key)
|
335
|
+
with open(file_path, "rb") as f:
|
336
|
+
return f.read()
|
337
|
+
|
338
|
+
@staticmethod
|
339
|
+
def _delete_from_s3(
|
340
|
+
config: StorageConfig, metadata: ExternalStorageMetadata
|
341
|
+
) -> None:
|
342
|
+
"""Delete data from S3 using existing generic_delete_from_s3"""
|
343
|
+
bucket_name = config.details[StorageDetails.BUCKET.value]
|
344
|
+
auth_method = config.details[StorageDetails.AUTH_METHOD.value]
|
345
|
+
|
346
|
+
generic_delete_from_s3(
|
347
|
+
storage_secrets=config.secrets,
|
348
|
+
bucket_name=bucket_name,
|
349
|
+
file_key=metadata.file_key,
|
350
|
+
auth_method=auth_method,
|
351
|
+
)
|
352
|
+
|
353
|
+
@staticmethod
|
354
|
+
def _delete_from_gcs(
|
355
|
+
config: StorageConfig, metadata: ExternalStorageMetadata
|
356
|
+
) -> None:
|
357
|
+
"""Delete data from GCS using existing get_gcs_client"""
|
358
|
+
bucket_name = config.details[StorageDetails.BUCKET.value]
|
359
|
+
auth_method = config.details[StorageDetails.AUTH_METHOD.value]
|
360
|
+
|
361
|
+
storage_client = get_gcs_client(auth_method, config.secrets)
|
362
|
+
bucket = storage_client.bucket(bucket_name)
|
363
|
+
blob = bucket.blob(metadata.file_key)
|
364
|
+
blob.delete()
|
365
|
+
|
366
|
+
@staticmethod
|
367
|
+
def _delete_from_local(metadata: ExternalStorageMetadata) -> None:
|
368
|
+
"""Delete data from local filesystem"""
|
369
|
+
file_path = get_local_filename(metadata.file_key)
|
370
|
+
if os.path.exists(file_path):
|
371
|
+
os.remove(file_path)
|