ethyca-fides 2.63.1b3__py2.py3-none-any.whl → 2.63.1rc0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1rc0.dist-info}/METADATA +1 -1
- {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1rc0.dist-info}/RECORD +139 -141
- fides/_version.py +3 -3
- fides/api/alembic/migrations/versions/29e56fa1fdb3_add_monitor_tasks.py +147 -0
- fides/api/api/v1/endpoints/privacy_request_endpoints.py +4 -4
- fides/api/db/base.py +5 -3
- fides/api/main.py +0 -1
- fides/api/models/attachment.py +23 -36
- fides/api/models/connectionconfig.py +1 -1
- fides/api/models/detection_discovery/__init__.py +35 -0
- fides/api/models/detection_discovery/monitor_task.py +161 -0
- fides/api/models/field_types/__init__.py +5 -0
- fides/api/models/field_types/encrypted_large_data.py +151 -0
- fides/api/models/privacy_preference.py +1 -1
- fides/api/models/privacy_request/execution_log.py +3 -31
- fides/api/models/privacy_request/privacy_request.py +16 -3
- fides/api/models/privacy_request/request_task.py +36 -25
- fides/api/models/worker_task.py +96 -0
- fides/api/schemas/external_storage.py +22 -0
- fides/api/schemas/privacy_request.py +1 -12
- fides/api/service/connectors/base_erasure_email_connector.py +1 -1
- fides/api/service/connectors/consent_email_connector.py +2 -1
- fides/api/service/connectors/dynamic_erasure_email_connector.py +2 -1
- fides/api/service/connectors/erasure_email_connector.py +1 -1
- fides/api/service/external_data_storage.py +371 -0
- fides/api/service/privacy_request/dsr_package/dsr_report_builder.py +46 -264
- fides/api/service/privacy_request/dsr_package/templates/collection_index.html +9 -34
- fides/api/service/privacy_request/dsr_package/templates/item.html +37 -0
- fides/api/service/privacy_request/dsr_package/templates/main.css +2 -45
- fides/api/service/privacy_request/dsr_package/templates/welcome.html +8 -12
- fides/api/service/privacy_request/request_runner_service.py +139 -258
- fides/api/service/privacy_request/request_service.py +1 -1
- fides/api/service/storage/gcs.py +3 -15
- fides/api/service/storage/s3.py +14 -28
- fides/api/service/storage/util.py +7 -45
- fides/api/task/create_request_tasks.py +1 -1
- fides/api/task/execute_request_tasks.py +9 -8
- fides/api/task/graph_task.py +22 -10
- fides/api/tasks/storage.py +91 -85
- fides/api/util/cache.py +1 -77
- fides/api/util/consent_util.py +1 -1
- fides/api/util/data_size.py +102 -0
- fides/api/util/encryption/aes_gcm_encryption_util.py +271 -0
- fides/config/redis_settings.py +8 -99
- fides/service/messaging/aws_ses_service.py +1 -5
- fides/service/privacy_request/privacy_request_service.py +1 -1
- fides/ui-build/static/admin/404.html +1 -1
- fides/ui-build/static/admin/add-systems/manual.html +1 -1
- fides/ui-build/static/admin/add-systems/multiple.html +1 -1
- fides/ui-build/static/admin/add-systems.html +1 -1
- fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
- fides/ui-build/static/admin/consent/configure.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
- fides/ui-build/static/admin/consent/properties.html +1 -1
- fides/ui-build/static/admin/consent/reporting.html +1 -1
- fides/ui-build/static/admin/consent.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
- fides/ui-build/static/admin/data-catalog.html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
- fides/ui-build/static/admin/data-discovery/activity.html +1 -1
- fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/detection.html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
- fides/ui-build/static/admin/datamap.html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
- fides/ui-build/static/admin/dataset/new.html +1 -1
- fides/ui-build/static/admin/dataset.html +1 -1
- fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
- fides/ui-build/static/admin/datastore-connection/new.html +1 -1
- fides/ui-build/static/admin/datastore-connection.html +1 -1
- fides/ui-build/static/admin/index.html +1 -1
- fides/ui-build/static/admin/integrations/[id].html +1 -1
- fides/ui-build/static/admin/integrations.html +1 -1
- fides/ui-build/static/admin/lib/fides-ext-gpp.js +1 -1
- fides/ui-build/static/admin/lib/fides-headless.js +1 -1
- fides/ui-build/static/admin/lib/fides-preview.js +1 -1
- fides/ui-build/static/admin/lib/fides-tcf.js +2 -2
- fides/ui-build/static/admin/lib/fides.js +2 -2
- fides/ui-build/static/admin/login/[provider].html +1 -1
- fides/ui-build/static/admin/login.html +1 -1
- fides/ui-build/static/admin/messaging/[id].html +1 -1
- fides/ui-build/static/admin/messaging/add-template.html +1 -1
- fides/ui-build/static/admin/messaging.html +1 -1
- fides/ui-build/static/admin/poc/ant-components.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
- fides/ui-build/static/admin/poc/forms.html +1 -1
- fides/ui-build/static/admin/poc/table-migration.html +1 -1
- fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
- fides/ui-build/static/admin/privacy-requests.html +1 -1
- fides/ui-build/static/admin/properties/[id].html +1 -1
- fides/ui-build/static/admin/properties/add-property.html +1 -1
- fides/ui-build/static/admin/properties.html +1 -1
- fides/ui-build/static/admin/reporting/datamap.html +1 -1
- fides/ui-build/static/admin/settings/about/alpha.html +1 -1
- fides/ui-build/static/admin/settings/about.html +1 -1
- fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
- fides/ui-build/static/admin/settings/consent.html +1 -1
- fides/ui-build/static/admin/settings/custom-fields.html +1 -1
- fides/ui-build/static/admin/settings/domain-records.html +1 -1
- fides/ui-build/static/admin/settings/domains.html +1 -1
- fides/ui-build/static/admin/settings/email-templates.html +1 -1
- fides/ui-build/static/admin/settings/locations.html +1 -1
- fides/ui-build/static/admin/settings/organization.html +1 -1
- fides/ui-build/static/admin/settings/regulations.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id].html +1 -1
- fides/ui-build/static/admin/systems.html +1 -1
- fides/ui-build/static/admin/taxonomy.html +1 -1
- fides/ui-build/static/admin/user-management/new.html +1 -1
- fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
- fides/ui-build/static/admin/user-management.html +1 -1
- fides/api/alembic/migrations/versions/5efcdf18438e_add_manual_task_tables.py +0 -160
- fides/api/models/manual_tasks/__init__.py +0 -8
- fides/api/models/manual_tasks/manual_task.py +0 -110
- fides/api/models/manual_tasks/manual_task_log.py +0 -100
- fides/api/schemas/manual_tasks/__init__.py +0 -0
- fides/api/schemas/manual_tasks/manual_task_schemas.py +0 -79
- fides/api/schemas/manual_tasks/manual_task_status.py +0 -151
- fides/api/service/privacy_request/attachment_handling.py +0 -132
- fides/api/service/privacy_request/dsr_package/templates/attachments_index.html +0 -33
- fides/api/tasks/csv_utils.py +0 -170
- fides/api/tasks/encryption_utils.py +0 -42
- fides/service/manual_tasks/__init__.py +0 -0
- fides/service/manual_tasks/manual_task_service.py +0 -150
- {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1rc0.dist-info}/WHEEL +0 -0
- {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1rc0.dist-info}/entry_points.txt +0 -0
- {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1rc0.dist-info}/licenses/LICENSE +0 -0
- {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1rc0.dist-info}/top_level.txt +0 -0
- /fides/api/models/{detection_discovery.py → detection_discovery/core.py} +0 -0
- /fides/ui-build/static/admin/_next/static/{ycPcko8qnif6BlkQ6MN4D → PEElhfUdgE5bJjiyu5QCD}/_buildManifest.js +0 -0
- /fides/ui-build/static/admin/_next/static/{ycPcko8qnif6BlkQ6MN4D → PEElhfUdgE5bJjiyu5QCD}/_ssgManifest.js +0 -0
@@ -0,0 +1,151 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
from typing import Any, Optional, Type
|
3
|
+
|
4
|
+
from loguru import logger
|
5
|
+
|
6
|
+
from fides.api.api.deps import get_autoclose_db_session
|
7
|
+
from fides.api.schemas.external_storage import ExternalStorageMetadata
|
8
|
+
from fides.api.service.external_data_storage import (
|
9
|
+
ExternalDataStorageError,
|
10
|
+
ExternalDataStorageService,
|
11
|
+
)
|
12
|
+
from fides.api.util.data_size import LARGE_DATA_THRESHOLD_BYTES, calculate_data_size
|
13
|
+
|
14
|
+
|
15
|
+
class EncryptedLargeDataDescriptor:
|
16
|
+
"""
|
17
|
+
A Python descriptor for database fields with encrypted external storage fallback.
|
18
|
+
|
19
|
+
See the original implementation for detailed docstrings.
|
20
|
+
"""
|
21
|
+
|
22
|
+
def __init__(
|
23
|
+
self,
|
24
|
+
field_name: str,
|
25
|
+
empty_default: Optional[Any] = None,
|
26
|
+
threshold_bytes: Optional[int] = None,
|
27
|
+
):
|
28
|
+
self.field_name = field_name
|
29
|
+
self.private_field = f"_{field_name}"
|
30
|
+
self.empty_default = empty_default if empty_default is not None else []
|
31
|
+
self.threshold_bytes = threshold_bytes or LARGE_DATA_THRESHOLD_BYTES
|
32
|
+
self.model_class: Optional[str] = None
|
33
|
+
self.name: Optional[str] = None
|
34
|
+
|
35
|
+
# Descriptor protocol helpers
|
36
|
+
|
37
|
+
def __set_name__(
|
38
|
+
self, owner: Type, name: str
|
39
|
+
) -> None: # noqa: D401 (docstring in orig file)
|
40
|
+
self.name = name
|
41
|
+
self.model_class = owner.__name__
|
42
|
+
|
43
|
+
def _generate_storage_path(self, instance: Any) -> str:
|
44
|
+
instance_id = getattr(instance, "id", None)
|
45
|
+
if not instance_id:
|
46
|
+
raise ValueError(f"Instance {instance} must have an 'id' attribute")
|
47
|
+
timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S-%f")
|
48
|
+
return f"{self.model_class}/{instance_id}/{self.field_name}/{timestamp}.txt"
|
49
|
+
|
50
|
+
def __get__(self, instance: Any, owner: Type) -> Any: # noqa: D401
|
51
|
+
if instance is None:
|
52
|
+
return self
|
53
|
+
raw_data = getattr(instance, self.private_field)
|
54
|
+
if raw_data is None:
|
55
|
+
return None
|
56
|
+
if isinstance(raw_data, dict) and "storage_type" in raw_data:
|
57
|
+
logger.info(
|
58
|
+
f"Reading {self.model_class}.{self.field_name} from external storage "
|
59
|
+
f"({raw_data.get('storage_type')})"
|
60
|
+
)
|
61
|
+
try:
|
62
|
+
metadata = ExternalStorageMetadata.model_validate(raw_data)
|
63
|
+
data = self._retrieve_external_data(metadata)
|
64
|
+
record_count = len(data) if isinstance(data, list) else "N/A"
|
65
|
+
logger.info(
|
66
|
+
f"Successfully retrieved {self.model_class}.{self.field_name} "
|
67
|
+
f"from external storage (records: {record_count})"
|
68
|
+
)
|
69
|
+
return data if data is not None else self.empty_default
|
70
|
+
except Exception as e: # pylint: disable=broad-except
|
71
|
+
logger.error(
|
72
|
+
f"Failed to retrieve {self.model_class}.{self.field_name} "
|
73
|
+
f"from external storage: {str(e)}"
|
74
|
+
)
|
75
|
+
raise ExternalDataStorageError(
|
76
|
+
f"Failed to retrieve {self.field_name}: {str(e)}"
|
77
|
+
) from e
|
78
|
+
else:
|
79
|
+
return raw_data
|
80
|
+
|
81
|
+
def __set__(self, instance: Any, value: Any) -> None: # noqa: D401
|
82
|
+
if not value:
|
83
|
+
self._cleanup_external_data(instance)
|
84
|
+
setattr(instance, self.private_field, self.empty_default)
|
85
|
+
return
|
86
|
+
try:
|
87
|
+
current_data = self.__get__(instance, type(instance))
|
88
|
+
if current_data == value:
|
89
|
+
return
|
90
|
+
except Exception: # pylint: disable=broad-except
|
91
|
+
pass
|
92
|
+
|
93
|
+
data_size = calculate_data_size(value)
|
94
|
+
if data_size > self.threshold_bytes:
|
95
|
+
logger.info(
|
96
|
+
f"{self.model_class}.{self.field_name}: Data size ({data_size:,} bytes) "
|
97
|
+
f"exceeds threshold ({self.threshold_bytes:,} bytes), storing externally"
|
98
|
+
)
|
99
|
+
self._cleanup_external_data(instance)
|
100
|
+
metadata = self._store_external_data(instance, value)
|
101
|
+
setattr(instance, self.private_field, metadata.model_dump())
|
102
|
+
else:
|
103
|
+
self._cleanup_external_data(instance)
|
104
|
+
setattr(instance, self.private_field, value)
|
105
|
+
|
106
|
+
# External storage helpers
|
107
|
+
|
108
|
+
def _store_external_data(self, instance: Any, data: Any) -> ExternalStorageMetadata:
|
109
|
+
storage_path = self._generate_storage_path(instance)
|
110
|
+
with get_autoclose_db_session() as session:
|
111
|
+
metadata = ExternalDataStorageService.store_data(
|
112
|
+
db=session,
|
113
|
+
storage_path=storage_path,
|
114
|
+
data=data,
|
115
|
+
)
|
116
|
+
logger.info(
|
117
|
+
f"Stored {self.model_class}.{self.field_name} to external storage: {storage_path}"
|
118
|
+
)
|
119
|
+
return metadata
|
120
|
+
|
121
|
+
@staticmethod
|
122
|
+
def _retrieve_external_data(metadata: ExternalStorageMetadata) -> Any: # noqa: D401
|
123
|
+
with get_autoclose_db_session() as session:
|
124
|
+
return ExternalDataStorageService.retrieve_data(
|
125
|
+
db=session,
|
126
|
+
metadata=metadata,
|
127
|
+
)
|
128
|
+
|
129
|
+
def _cleanup_external_data(self, instance: Any) -> None: # noqa: D401
|
130
|
+
raw_data = getattr(instance, self.private_field, None)
|
131
|
+
if isinstance(raw_data, dict) and "storage_type" in raw_data:
|
132
|
+
try:
|
133
|
+
metadata = ExternalStorageMetadata.model_validate(raw_data)
|
134
|
+
with get_autoclose_db_session() as session:
|
135
|
+
ExternalDataStorageService.delete_data(
|
136
|
+
db=session,
|
137
|
+
metadata=metadata,
|
138
|
+
)
|
139
|
+
logger.info(
|
140
|
+
f"Cleaned up external storage for {self.model_class}.{self.field_name}: "
|
141
|
+
f"{metadata.file_key}"
|
142
|
+
)
|
143
|
+
except Exception as e: # pylint: disable=broad-except
|
144
|
+
logger.warning(
|
145
|
+
f"Failed to cleanup external {self.field_name}: {str(e)}"
|
146
|
+
)
|
147
|
+
|
148
|
+
# Public helper
|
149
|
+
|
150
|
+
def cleanup(self, instance: Any) -> None: # noqa: D401
|
151
|
+
self._cleanup_external_data(instance)
|
@@ -22,8 +22,8 @@ from fides.api.models.privacy_notice import (
|
|
22
22
|
UserConsentPreference,
|
23
23
|
)
|
24
24
|
from fides.api.models.privacy_request import PrivacyRequest, ProvidedIdentity
|
25
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
25
26
|
from fides.api.schemas.language import SupportedLanguage
|
26
|
-
from fides.api.schemas.privacy_request import ExecutionLogStatus
|
27
27
|
from fides.api.schemas.redis_cache import MultiValue
|
28
28
|
from fides.config import CONFIG
|
29
29
|
|
@@ -4,15 +4,14 @@ from __future__ import annotations
|
|
4
4
|
|
5
5
|
from typing import Optional
|
6
6
|
|
7
|
-
from sqlalchemy import Column,
|
7
|
+
from sqlalchemy import Column, String
|
8
8
|
from sqlalchemy.dialects.postgresql import JSONB
|
9
9
|
from sqlalchemy.ext.mutable import MutableList
|
10
|
-
from sqlalchemy.sql import text
|
11
10
|
|
12
11
|
from fides.api.db.base_class import Base # type: ignore[attr-defined]
|
13
12
|
from fides.api.db.util import EnumColumn
|
13
|
+
from fides.api.models.worker_task import ExecutionLogStatus, TaskExecutionLog
|
14
14
|
from fides.api.schemas.policy import ActionType, CurrentStep
|
15
|
-
from fides.api.schemas.privacy_request import ExecutionLogStatus
|
16
15
|
|
17
16
|
# Locations from which privacy request execution can be resumed, in order.
|
18
17
|
EXECUTION_CHECKPOINTS = [
|
@@ -53,7 +52,7 @@ def can_run_checkpoint(
|
|
53
52
|
) >= EXECUTION_CHECKPOINTS.index(from_checkpoint)
|
54
53
|
|
55
54
|
|
56
|
-
class ExecutionLog(Base):
|
55
|
+
class ExecutionLog(TaskExecutionLog, Base):
|
57
56
|
"""
|
58
57
|
Stores the individual execution logs associated with a PrivacyRequest.
|
59
58
|
|
@@ -68,41 +67,14 @@ class ExecutionLog(Base):
|
|
68
67
|
collection_name = Column(String, index=True)
|
69
68
|
# A JSON Array describing affected fields along with their data categories and paths
|
70
69
|
fields_affected = Column(MutableList.as_mutable(JSONB), nullable=True)
|
71
|
-
# Contains info, warning, or error messages
|
72
|
-
message = Column(String)
|
73
70
|
action_type = Column(
|
74
71
|
EnumColumn(ActionType),
|
75
72
|
index=True,
|
76
73
|
nullable=False,
|
77
74
|
)
|
78
|
-
status = Column(
|
79
|
-
EnumColumn(
|
80
|
-
ExecutionLogStatus,
|
81
|
-
native_enum=True,
|
82
|
-
values_callable=lambda x: [
|
83
|
-
i.value for i in x
|
84
|
-
], # Using ExecutionLogStatus values in database, even though app is using the names.
|
85
|
-
),
|
86
|
-
index=True,
|
87
|
-
nullable=False,
|
88
|
-
)
|
89
75
|
|
90
76
|
privacy_request_id = Column(
|
91
77
|
String,
|
92
78
|
nullable=False,
|
93
79
|
index=True,
|
94
80
|
)
|
95
|
-
|
96
|
-
# Use clock_timestamp() instead of NOW() to get the actual current time at row creation,
|
97
|
-
# regardless of transaction state. This prevents timestamp caching within transactions
|
98
|
-
# and ensures more accurate creation times.
|
99
|
-
# https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-CURRENT
|
100
|
-
|
101
|
-
created_at = Column(
|
102
|
-
DateTime(timezone=True), server_default=text("clock_timestamp()")
|
103
|
-
)
|
104
|
-
updated_at = Column(
|
105
|
-
DateTime(timezone=True),
|
106
|
-
server_default=text("clock_timestamp()"),
|
107
|
-
onupdate=text("clock_timestamp()"),
|
108
|
-
)
|
@@ -48,6 +48,7 @@ from fides.api.models.audit_log import AuditLog
|
|
48
48
|
from fides.api.models.client import ClientDetail
|
49
49
|
from fides.api.models.comment import Comment, CommentReference, CommentReferenceType
|
50
50
|
from fides.api.models.fides_user import FidesUser
|
51
|
+
from fides.api.models.field_types import EncryptedLargeDataDescriptor
|
51
52
|
from fides.api.models.manual_webhook import AccessManualWebhook
|
52
53
|
from fides.api.models.policy import (
|
53
54
|
Policy,
|
@@ -72,13 +73,13 @@ from fides.api.models.privacy_request.webhook import (
|
|
72
73
|
generate_request_callback_pre_approval_jwe,
|
73
74
|
generate_request_callback_resume_jwe,
|
74
75
|
)
|
76
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
75
77
|
from fides.api.schemas.drp_privacy_request import DrpPrivacyRequestCreate
|
76
78
|
from fides.api.schemas.external_https import SecondPartyResponseFormat
|
77
79
|
from fides.api.schemas.masking.masking_secrets import MaskingSecretCache
|
78
80
|
from fides.api.schemas.policy import ActionType, CurrentStep
|
79
81
|
from fides.api.schemas.privacy_request import (
|
80
82
|
CheckpointActionRequired,
|
81
|
-
ExecutionLogStatus,
|
82
83
|
ManualAction,
|
83
84
|
PrivacyRequestSource,
|
84
85
|
PrivacyRequestStatus,
|
@@ -251,7 +252,8 @@ class PrivacyRequest(
|
|
251
252
|
awaiting_email_send_at = Column(DateTime(timezone=True), nullable=True)
|
252
253
|
|
253
254
|
# Encrypted filtered access results saved for later retrieval
|
254
|
-
|
255
|
+
_filtered_final_upload = Column( # An encrypted JSON String - Dict[Dict[str, List[Row]]] - rule keys mapped to the filtered access results
|
256
|
+
"filtered_final_upload",
|
255
257
|
StringEncryptedType(
|
256
258
|
type_in=JSONTypeOverride,
|
257
259
|
key=CONFIG.security.app_encryption_key,
|
@@ -260,6 +262,11 @@ class PrivacyRequest(
|
|
260
262
|
),
|
261
263
|
)
|
262
264
|
|
265
|
+
# Use descriptor for automatic external storage handling
|
266
|
+
filtered_final_upload = EncryptedLargeDataDescriptor(
|
267
|
+
field_name="filtered_final_upload", empty_default={}
|
268
|
+
)
|
269
|
+
|
263
270
|
# Encrypted filtered access results saved for later retrieval
|
264
271
|
access_result_urls = Column( # An encrypted JSON String - Dict[Dict[str, List[Row]]] - rule keys mapped to the filtered access results
|
265
272
|
StringEncryptedType(
|
@@ -334,6 +341,7 @@ class PrivacyRequest(
|
|
334
341
|
deleting this object from the database
|
335
342
|
"""
|
336
343
|
self.clear_cached_values()
|
344
|
+
self.cleanup_external_storage()
|
337
345
|
Attachment.delete_attachments_for_reference_and_type(
|
338
346
|
db, self.id, AttachmentReferenceType.privacy_request
|
339
347
|
)
|
@@ -1257,6 +1265,11 @@ class PrivacyRequest(
|
|
1257
1265
|
# DSR 2.0 does not cache the results so nothing to do here
|
1258
1266
|
return {}
|
1259
1267
|
|
1268
|
+
def cleanup_external_storage(self) -> None:
|
1269
|
+
"""Clean up all external storage files for this privacy request"""
|
1270
|
+
# Access the descriptor from the class to call cleanup
|
1271
|
+
PrivacyRequest.filtered_final_upload.cleanup(self)
|
1272
|
+
|
1260
1273
|
def save_filtered_access_results(
|
1261
1274
|
self, db: Session, results: Dict[str, Dict[str, List[Row]]]
|
1262
1275
|
) -> None:
|
@@ -1544,7 +1557,7 @@ def get_action_required_details(
|
|
1544
1557
|
|
1545
1558
|
|
1546
1559
|
def _parse_cache_to_checkpoint_action_required(
|
1547
|
-
cache: dict[str, Any]
|
1560
|
+
cache: dict[str, Any],
|
1548
1561
|
) -> CheckpointActionRequired:
|
1549
1562
|
collection = (
|
1550
1563
|
CollectionAddress(
|
@@ -14,20 +14,19 @@ from sqlalchemy_utils.types.encrypted.encrypted_type import (
|
|
14
14
|
StringEncryptedType,
|
15
15
|
)
|
16
16
|
|
17
|
-
from fides.api.db.base_class import Base # type: ignore[attr-defined]
|
18
|
-
from fides.api.db.base_class import JSONTypeOverride
|
19
|
-
from fides.api.db.util import EnumColumn
|
17
|
+
from fides.api.db.base_class import Base, JSONTypeOverride # type: ignore[attr-defined]
|
20
18
|
from fides.api.graph.config import (
|
21
19
|
ROOT_COLLECTION_ADDRESS,
|
22
20
|
TERMINATOR_ADDRESS,
|
23
21
|
CollectionAddress,
|
24
22
|
)
|
23
|
+
from fides.api.models.field_types import EncryptedLargeDataDescriptor
|
25
24
|
from fides.api.models.privacy_request.execution_log import (
|
26
25
|
COMPLETED_EXECUTION_LOG_STATUSES,
|
27
26
|
)
|
27
|
+
from fides.api.models.worker_task import ExecutionLogStatus, WorkerTask
|
28
28
|
from fides.api.schemas.base_class import FidesSchema
|
29
29
|
from fides.api.schemas.policy import ActionType
|
30
|
-
from fides.api.schemas.privacy_request import ExecutionLogStatus
|
31
30
|
from fides.api.util.cache import (
|
32
31
|
FidesopsRedis,
|
33
32
|
celery_tasks_in_flight,
|
@@ -68,7 +67,8 @@ class TraversalDetails(FidesSchema):
|
|
68
67
|
)
|
69
68
|
|
70
69
|
|
71
|
-
|
70
|
+
# TODO: At some point we will refactor this model to store all task types in a common table that links to tables with specific task attributes.
|
71
|
+
class RequestTask(WorkerTask, Base):
|
72
72
|
"""
|
73
73
|
An individual Task for a Privacy Request.
|
74
74
|
|
@@ -91,21 +91,6 @@ class RequestTask(Base):
|
|
91
91
|
) # Of the format dataset_name:collection_name for convenience
|
92
92
|
dataset_name = Column(String, nullable=False, index=True)
|
93
93
|
collection_name = Column(String, nullable=False, index=True)
|
94
|
-
action_type = Column(EnumColumn(ActionType), nullable=False, index=True)
|
95
|
-
|
96
|
-
# Note that RequestTasks share statuses with ExecutionLogs. When a RequestTask changes state, an ExecutionLog
|
97
|
-
# is also created with that state. These are tied tightly together in GraphTask.
|
98
|
-
status = Column(
|
99
|
-
EnumColumn(
|
100
|
-
ExecutionLogStatus,
|
101
|
-
native_enum=False,
|
102
|
-
values_callable=lambda x: [
|
103
|
-
i.value for i in x
|
104
|
-
], # Using ExecutionLogStatus values in database, even though app is using the names.
|
105
|
-
), # character varying in database
|
106
|
-
index=True,
|
107
|
-
nullable=False,
|
108
|
-
)
|
109
94
|
|
110
95
|
upstream_tasks = Column(
|
111
96
|
MutableList.as_mutable(JSONB)
|
@@ -121,7 +106,8 @@ class RequestTask(Base):
|
|
121
106
|
# Raw data retrieved from an access request is stored here. This contains all of the
|
122
107
|
# intermediate data we retrieved, needed for downstream tasks, but hasn't been filtered
|
123
108
|
# by data category for the end user.
|
124
|
-
|
109
|
+
_access_data = Column( # An encrypted JSON String - saved as a list of Rows
|
110
|
+
"access_data",
|
125
111
|
StringEncryptedType(
|
126
112
|
type_in=JSONTypeOverride,
|
127
113
|
key=CONFIG.security.app_encryption_key,
|
@@ -132,7 +118,8 @@ class RequestTask(Base):
|
|
132
118
|
|
133
119
|
# This is the raw access data saved in erasure format (with placeholders preserved) to perform a masking request.
|
134
120
|
# First saved on the access node, and then copied to the corresponding erasure node.
|
135
|
-
|
121
|
+
_data_for_erasures = Column( # An encrypted JSON String - saved as a list of rows
|
122
|
+
"data_for_erasures",
|
136
123
|
StringEncryptedType(
|
137
124
|
type_in=JSONTypeOverride,
|
138
125
|
key=CONFIG.security.app_encryption_key,
|
@@ -141,6 +128,15 @@ class RequestTask(Base):
|
|
141
128
|
),
|
142
129
|
)
|
143
130
|
|
131
|
+
# Use descriptors for automatic external storage handling
|
132
|
+
access_data = EncryptedLargeDataDescriptor(
|
133
|
+
field_name="access_data", empty_default=[]
|
134
|
+
)
|
135
|
+
|
136
|
+
data_for_erasures = EncryptedLargeDataDescriptor(
|
137
|
+
field_name="data_for_erasures", empty_default=[]
|
138
|
+
)
|
139
|
+
|
144
140
|
# Written after an erasure is completed
|
145
141
|
rows_masked = Column(Integer)
|
146
142
|
# Written after a consent request is completed - not all consent
|
@@ -177,12 +173,22 @@ class RequestTask(Base):
|
|
177
173
|
"""Convenience helper for asserting whether the task is a terminator task"""
|
178
174
|
return self.request_task_address == TERMINATOR_ADDRESS
|
179
175
|
|
176
|
+
@classmethod
|
177
|
+
def allowed_action_types(cls) -> List[str]:
|
178
|
+
return [e.value for e in ActionType]
|
179
|
+
|
180
180
|
def get_cached_task_id(self) -> Optional[str]:
|
181
181
|
"""Gets the cached celery task ID for this request task."""
|
182
182
|
cache: FidesopsRedis = get_cache()
|
183
183
|
task_id = cache.get(get_async_task_tracking_cache_key(self.id))
|
184
184
|
return task_id
|
185
185
|
|
186
|
+
def cleanup_external_storage(self) -> None:
|
187
|
+
"""Clean up all external storage files for this request task"""
|
188
|
+
# Access the descriptor from the class to call cleanup
|
189
|
+
RequestTask.access_data.cleanup(self)
|
190
|
+
RequestTask.data_for_erasures.cleanup(self)
|
191
|
+
|
186
192
|
def get_access_data(self) -> List[Row]:
|
187
193
|
"""Helper to retrieve access data or default to empty list"""
|
188
194
|
return self.access_data or []
|
@@ -191,6 +197,11 @@ class RequestTask(Base):
|
|
191
197
|
"""Helper to retrieve erasure data needed to build masking requests or default to empty list"""
|
192
198
|
return self.data_for_erasures or []
|
193
199
|
|
200
|
+
def delete(self, db: Session) -> None:
|
201
|
+
"""Override delete to cleanup external storage first"""
|
202
|
+
self.cleanup_external_storage()
|
203
|
+
super().delete(db)
|
204
|
+
|
194
205
|
def update_status(self, db: Session, status: ExecutionLogStatus) -> None:
|
195
206
|
"""Helper method to update a task's status"""
|
196
207
|
self.status = status
|
@@ -236,7 +247,7 @@ class RequestTask(Base):
|
|
236
247
|
if not tasks_complete and should_log:
|
237
248
|
logger.debug(
|
238
249
|
"Upstream tasks incomplete for {} task {}.",
|
239
|
-
self.action_type
|
250
|
+
self.action_type,
|
240
251
|
self.collection_address,
|
241
252
|
)
|
242
253
|
|
@@ -267,7 +278,7 @@ class RequestTask(Base):
|
|
267
278
|
logger.debug(
|
268
279
|
"Celery Task ID {} found for {} task {}.",
|
269
280
|
celery_task_id,
|
270
|
-
self.action_type
|
281
|
+
self.action_type,
|
271
282
|
self.collection_address,
|
272
283
|
)
|
273
284
|
|
@@ -277,7 +288,7 @@ class RequestTask(Base):
|
|
277
288
|
logger.debug(
|
278
289
|
"Celery Task {} already processing for {} task {}.",
|
279
290
|
celery_task_id,
|
280
|
-
self.action_type
|
291
|
+
self.action_type,
|
281
292
|
self.collection_address,
|
282
293
|
)
|
283
294
|
|
@@ -0,0 +1,96 @@
|
|
1
|
+
import enum
|
2
|
+
from typing import Any, List
|
3
|
+
|
4
|
+
from sqlalchemy import Column, DateTime, String
|
5
|
+
from sqlalchemy.sql import text
|
6
|
+
|
7
|
+
from fides.api.db.util import EnumColumn
|
8
|
+
|
9
|
+
|
10
|
+
class ExecutionLogStatus(enum.Enum):
|
11
|
+
"""Enum for task execution log statuses, reflecting where they are in their workflow"""
|
12
|
+
|
13
|
+
in_processing = "in_processing"
|
14
|
+
pending = "pending"
|
15
|
+
complete = "complete"
|
16
|
+
error = "error"
|
17
|
+
awaiting_processing = "paused" # "paused" in the database to avoid a migration, but use "awaiting_processing" in the app
|
18
|
+
retrying = "retrying"
|
19
|
+
skipped = "skipped"
|
20
|
+
|
21
|
+
|
22
|
+
class WorkerTask:
|
23
|
+
"""
|
24
|
+
A task for a worker to execute.
|
25
|
+
"""
|
26
|
+
|
27
|
+
# Field called action_type to avoid migrations in RequestTask when creating this model
|
28
|
+
action_type = Column(String, nullable=False, index=True)
|
29
|
+
# Note that WorkerTask share statuses with ExecutionLogs. When a WorkerTask changes state, an ExecutionLog
|
30
|
+
# is also created with that state. These are tied tightly together in GraphTask.
|
31
|
+
status = Column(
|
32
|
+
EnumColumn(
|
33
|
+
ExecutionLogStatus,
|
34
|
+
native_enum=False,
|
35
|
+
values_callable=lambda x: [
|
36
|
+
i.value for i in x
|
37
|
+
], # Using ExecutionLogStatus values in database, even though app is using the names.
|
38
|
+
), # character varying in database
|
39
|
+
index=True,
|
40
|
+
nullable=False,
|
41
|
+
)
|
42
|
+
|
43
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
44
|
+
action_type = kwargs.get("action_type")
|
45
|
+
if action_type is not None:
|
46
|
+
self.validate_action_type(action_type)
|
47
|
+
super().__init__(*args, **kwargs)
|
48
|
+
|
49
|
+
@classmethod
|
50
|
+
def validate_action_type(cls, action_type: str) -> None:
|
51
|
+
"""
|
52
|
+
Validates that the action type is allowed for the worker task.
|
53
|
+
"""
|
54
|
+
if action_type not in cls.allowed_action_types():
|
55
|
+
raise ValueError(f"Invalid action_type '{action_type}' for {cls.__name__}")
|
56
|
+
|
57
|
+
@classmethod
|
58
|
+
def allowed_action_types(cls) -> List[str]:
|
59
|
+
"""
|
60
|
+
Subclasses must implement this method to return a list of allowed action types.
|
61
|
+
"""
|
62
|
+
raise NotImplementedError("Subclasses must implement allowed_action_types")
|
63
|
+
|
64
|
+
|
65
|
+
class TaskExecutionLog:
|
66
|
+
"""
|
67
|
+
Stores the individual execution logs associated with a WorkerTask.
|
68
|
+
"""
|
69
|
+
|
70
|
+
status = Column(
|
71
|
+
EnumColumn(
|
72
|
+
ExecutionLogStatus,
|
73
|
+
native_enum=True,
|
74
|
+
values_callable=lambda x: [
|
75
|
+
i.value for i in x
|
76
|
+
], # Using ExecutionLogStatus values in database, even though app is using the names.
|
77
|
+
),
|
78
|
+
index=True,
|
79
|
+
nullable=False,
|
80
|
+
)
|
81
|
+
# Contains info, warning, or error messages
|
82
|
+
message = Column(String)
|
83
|
+
|
84
|
+
# Use clock_timestamp() instead of NOW() to get the actual current time at row creation,
|
85
|
+
# regardless of transaction state. This prevents timestamp caching within transactions
|
86
|
+
# and ensures more accurate creation times.
|
87
|
+
# https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-CURRENT
|
88
|
+
|
89
|
+
created_at = Column(
|
90
|
+
DateTime(timezone=True), server_default=text("clock_timestamp()")
|
91
|
+
)
|
92
|
+
updated_at = Column(
|
93
|
+
DateTime(timezone=True),
|
94
|
+
server_default=text("clock_timestamp()"),
|
95
|
+
onupdate=text("clock_timestamp()"),
|
96
|
+
)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
"""Schema for external storage metadata."""
|
2
|
+
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from pydantic import Field
|
6
|
+
|
7
|
+
from fides.api.schemas.base_class import FidesSchema
|
8
|
+
from fides.api.schemas.storage.storage import StorageType
|
9
|
+
|
10
|
+
|
11
|
+
class ExternalStorageMetadata(FidesSchema):
|
12
|
+
"""Metadata for externally stored encrypted data."""
|
13
|
+
|
14
|
+
storage_type: StorageType
|
15
|
+
file_key: str = Field(description="Path/key of the file in external storage")
|
16
|
+
filesize: int = Field(description="Size of the stored file in bytes", ge=0)
|
17
|
+
storage_key: Optional[str] = Field(
|
18
|
+
default=None, description="Storage configuration key used"
|
19
|
+
)
|
20
|
+
|
21
|
+
class Config:
|
22
|
+
use_enum_values = True
|
@@ -8,6 +8,7 @@ from pydantic import ConfigDict, Field, field_serializer, field_validator
|
|
8
8
|
from fides.api.custom_types import SafeStr
|
9
9
|
from fides.api.graph.config import CollectionAddress
|
10
10
|
from fides.api.models.audit_log import AuditLogAction
|
11
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
11
12
|
from fides.api.schemas.api import BulkResponse, BulkUpdateFailed
|
12
13
|
from fides.api.schemas.base_class import FidesSchema
|
13
14
|
from fides.api.schemas.policy import ActionType, CurrentStep
|
@@ -141,18 +142,6 @@ class FieldsAffectedResponse(FidesSchema):
|
|
141
142
|
model_config = ConfigDict(from_attributes=True, use_enum_values=True)
|
142
143
|
|
143
144
|
|
144
|
-
class ExecutionLogStatus(EnumType):
|
145
|
-
"""Enum for execution log statuses, reflecting where they are in their workflow"""
|
146
|
-
|
147
|
-
in_processing = "in_processing"
|
148
|
-
pending = "pending"
|
149
|
-
complete = "complete"
|
150
|
-
error = "error"
|
151
|
-
awaiting_processing = "paused" # "paused" in the database to avoid a migration, but use "awaiting_processing" in the app
|
152
|
-
retrying = "retrying"
|
153
|
-
skipped = "skipped"
|
154
|
-
|
155
|
-
|
156
145
|
class ExecutionLogStatusSerializeOverride(FidesSchema):
|
157
146
|
"""Override to serialize "paused" Execution Logs as awaiting_processing instead"""
|
158
147
|
|
@@ -5,6 +5,7 @@ from sqlalchemy.orm import Session
|
|
5
5
|
from fides.api.models.connectionconfig import ConnectionConfig, ConnectionType
|
6
6
|
from fides.api.models.policy import Rule
|
7
7
|
from fides.api.models.privacy_request import ExecutionLog, PrivacyRequest
|
8
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
8
9
|
from fides.api.schemas.connection_configuration.connection_secrets_email import (
|
9
10
|
AdvancedSettings,
|
10
11
|
BaseEmailSchema,
|
@@ -15,7 +16,6 @@ from fides.api.schemas.messaging.messaging import (
|
|
15
16
|
MessagingActionType,
|
16
17
|
)
|
17
18
|
from fides.api.schemas.policy import ActionType
|
18
|
-
from fides.api.schemas.privacy_request import ExecutionLogStatus
|
19
19
|
from fides.api.schemas.redis_cache import Identity
|
20
20
|
from fides.api.service.connectors.base_email_connector import (
|
21
21
|
BaseEmailConnector,
|
@@ -16,6 +16,7 @@ from fides.api.models.privacy_notice import (
|
|
16
16
|
)
|
17
17
|
from fides.api.models.privacy_preference import PrivacyPreferenceHistory
|
18
18
|
from fides.api.models.privacy_request import ExecutionLog, PrivacyRequest
|
19
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
19
20
|
from fides.api.schemas.connection_configuration.connection_secrets_email import (
|
20
21
|
AdvancedSettingsWithExtendedIdentityTypes,
|
21
22
|
ExtendedEmailSchema,
|
@@ -29,7 +30,7 @@ from fides.api.schemas.messaging.messaging import (
|
|
29
30
|
from fides.api.schemas.policy import ActionType
|
30
31
|
from fides.api.schemas.privacy_notice import PrivacyNoticeHistorySchema
|
31
32
|
from fides.api.schemas.privacy_preference import MinimalPrivacyPreferenceHistorySchema
|
32
|
-
from fides.api.schemas.privacy_request import Consent
|
33
|
+
from fides.api.schemas.privacy_request import Consent
|
33
34
|
from fides.api.schemas.redis_cache import Identity
|
34
35
|
from fides.api.service.connectors.base_email_connector import (
|
35
36
|
BaseEmailConnector,
|
@@ -17,11 +17,12 @@ from fides.api.models.privacy_request import (
|
|
17
17
|
RequestTask,
|
18
18
|
TraversalDetails,
|
19
19
|
)
|
20
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
20
21
|
from fides.api.schemas.connection_configuration.connection_secrets_dynamic_erasure_email import (
|
21
22
|
DynamicErasureEmailSchema,
|
22
23
|
)
|
23
24
|
from fides.api.schemas.policy import ActionType
|
24
|
-
from fides.api.schemas.privacy_request import
|
25
|
+
from fides.api.schemas.privacy_request import PrivacyRequestStatus
|
25
26
|
from fides.api.service.connectors.base_connector import BaseConnector
|
26
27
|
from fides.api.service.connectors.base_erasure_email_connector import (
|
27
28
|
BaseErasureEmailConnector,
|
@@ -10,9 +10,9 @@ from fides.api.models.connectionconfig import (
|
|
10
10
|
ConnectionType,
|
11
11
|
)
|
12
12
|
from fides.api.models.privacy_request import ExecutionLog
|
13
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
13
14
|
from fides.api.schemas.connection_configuration import EmailSchema
|
14
15
|
from fides.api.schemas.policy import ActionType
|
15
|
-
from fides.api.schemas.privacy_request import ExecutionLogStatus
|
16
16
|
from fides.api.service.connectors.base_erasure_email_connector import (
|
17
17
|
BaseErasureEmailConnector,
|
18
18
|
filter_user_identities_for_connector,
|