ethyca-fides 2.63.0rc2__py2.py3-none-any.whl → 2.63.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ethyca_fides-2.63.0rc2.dist-info → ethyca_fides-2.63.1.dist-info}/METADATA +1 -1
- {ethyca_fides-2.63.0rc2.dist-info → ethyca_fides-2.63.1.dist-info}/RECORD +131 -112
- fides/_version.py +3 -3
- fides/api/alembic/migrations/versions/29e56fa1fdb3_add_monitor_tasks.py +147 -0
- fides/api/alembic/migrations/versions/5efcdf18438e_add_manual_task_tables.py +160 -0
- fides/api/alembic/migrations/versions/bf713b5a021d_staged_resource_ancestor_link_data_.py +20 -11
- fides/api/api/v1/endpoints/privacy_request_endpoints.py +4 -4
- fides/api/db/base.py +7 -1
- fides/api/migrations/post_upgrade_index_creation.py +3 -3
- fides/api/models/connectionconfig.py +1 -1
- fides/api/models/detection_discovery/__init__.py +35 -0
- fides/api/models/detection_discovery/monitor_task.py +162 -0
- fides/api/models/field_types/__init__.py +5 -0
- fides/api/models/field_types/encrypted_large_data.py +151 -0
- fides/api/models/manual_tasks/__init__.py +8 -0
- fides/api/models/manual_tasks/manual_task.py +110 -0
- fides/api/models/manual_tasks/manual_task_log.py +100 -0
- fides/api/models/privacy_preference.py +1 -1
- fides/api/models/privacy_request/execution_log.py +3 -31
- fides/api/models/privacy_request/privacy_request.py +16 -3
- fides/api/models/privacy_request/request_task.py +36 -25
- fides/api/models/worker_task.py +96 -0
- fides/api/schemas/external_storage.py +22 -0
- fides/api/schemas/manual_tasks/__init__.py +0 -0
- fides/api/schemas/manual_tasks/manual_task_schemas.py +79 -0
- fides/api/schemas/manual_tasks/manual_task_status.py +151 -0
- fides/api/schemas/privacy_request.py +1 -12
- fides/api/service/connectors/base_erasure_email_connector.py +1 -1
- fides/api/service/connectors/consent_email_connector.py +2 -1
- fides/api/service/connectors/dynamic_erasure_email_connector.py +2 -1
- fides/api/service/connectors/erasure_email_connector.py +1 -1
- fides/api/service/external_data_storage.py +371 -0
- fides/api/service/privacy_request/request_runner_service.py +5 -5
- fides/api/service/privacy_request/request_service.py +1 -1
- fides/api/task/create_request_tasks.py +1 -1
- fides/api/task/execute_request_tasks.py +9 -8
- fides/api/task/graph_task.py +22 -10
- fides/api/util/consent_util.py +1 -1
- fides/api/util/data_size.py +102 -0
- fides/api/util/encryption/aes_gcm_encryption_util.py +271 -0
- fides/service/manual_tasks/__init__.py +0 -0
- fides/service/manual_tasks/manual_task_service.py +150 -0
- fides/service/privacy_request/privacy_request_service.py +1 -1
- fides/ui-build/static/admin/404.html +1 -1
- fides/ui-build/static/admin/add-systems/manual.html +1 -1
- fides/ui-build/static/admin/add-systems/multiple.html +1 -1
- fides/ui-build/static/admin/add-systems.html +1 -1
- fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
- fides/ui-build/static/admin/consent/configure.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
- fides/ui-build/static/admin/consent/properties.html +1 -1
- fides/ui-build/static/admin/consent/reporting.html +1 -1
- fides/ui-build/static/admin/consent.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
- fides/ui-build/static/admin/data-catalog.html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
- fides/ui-build/static/admin/data-discovery/activity.html +1 -1
- fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/detection.html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
- fides/ui-build/static/admin/datamap.html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
- fides/ui-build/static/admin/dataset/new.html +1 -1
- fides/ui-build/static/admin/dataset.html +1 -1
- fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
- fides/ui-build/static/admin/datastore-connection/new.html +1 -1
- fides/ui-build/static/admin/datastore-connection.html +1 -1
- fides/ui-build/static/admin/index.html +1 -1
- fides/ui-build/static/admin/integrations/[id].html +1 -1
- fides/ui-build/static/admin/integrations.html +1 -1
- fides/ui-build/static/admin/login/[provider].html +1 -1
- fides/ui-build/static/admin/login.html +1 -1
- fides/ui-build/static/admin/messaging/[id].html +1 -1
- fides/ui-build/static/admin/messaging/add-template.html +1 -1
- fides/ui-build/static/admin/messaging.html +1 -1
- fides/ui-build/static/admin/poc/ant-components.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
- fides/ui-build/static/admin/poc/forms.html +1 -1
- fides/ui-build/static/admin/poc/table-migration.html +1 -1
- fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
- fides/ui-build/static/admin/privacy-requests.html +1 -1
- fides/ui-build/static/admin/properties/[id].html +1 -1
- fides/ui-build/static/admin/properties/add-property.html +1 -1
- fides/ui-build/static/admin/properties.html +1 -1
- fides/ui-build/static/admin/reporting/datamap.html +1 -1
- fides/ui-build/static/admin/settings/about/alpha.html +1 -1
- fides/ui-build/static/admin/settings/about.html +1 -1
- fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
- fides/ui-build/static/admin/settings/consent.html +1 -1
- fides/ui-build/static/admin/settings/custom-fields.html +1 -1
- fides/ui-build/static/admin/settings/domain-records.html +1 -1
- fides/ui-build/static/admin/settings/domains.html +1 -1
- fides/ui-build/static/admin/settings/email-templates.html +1 -1
- fides/ui-build/static/admin/settings/locations.html +1 -1
- fides/ui-build/static/admin/settings/organization.html +1 -1
- fides/ui-build/static/admin/settings/regulations.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id].html +1 -1
- fides/ui-build/static/admin/systems.html +1 -1
- fides/ui-build/static/admin/taxonomy.html +1 -1
- fides/ui-build/static/admin/user-management/new.html +1 -1
- fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
- fides/ui-build/static/admin/user-management.html +1 -1
- {ethyca_fides-2.63.0rc2.dist-info → ethyca_fides-2.63.1.dist-info}/WHEEL +0 -0
- {ethyca_fides-2.63.0rc2.dist-info → ethyca_fides-2.63.1.dist-info}/entry_points.txt +0 -0
- {ethyca_fides-2.63.0rc2.dist-info → ethyca_fides-2.63.1.dist-info}/licenses/LICENSE +0 -0
- {ethyca_fides-2.63.0rc2.dist-info → ethyca_fides-2.63.1.dist-info}/top_level.txt +0 -0
- /fides/api/models/{detection_discovery.py → detection_discovery/core.py} +0 -0
- /fides/ui-build/static/admin/_next/static/{Fb70i-8GI-owNAvgEJWhA → SZn_Fpr_qG1COMjkdloep}/_buildManifest.js +0 -0
- /fides/ui-build/static/admin/_next/static/{Fb70i-8GI-owNAvgEJWhA → SZn_Fpr_qG1COMjkdloep}/_ssgManifest.js +0 -0
@@ -0,0 +1,151 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
from typing import Any, Optional, Type
|
3
|
+
|
4
|
+
from loguru import logger
|
5
|
+
|
6
|
+
from fides.api.api.deps import get_autoclose_db_session
|
7
|
+
from fides.api.schemas.external_storage import ExternalStorageMetadata
|
8
|
+
from fides.api.service.external_data_storage import (
|
9
|
+
ExternalDataStorageError,
|
10
|
+
ExternalDataStorageService,
|
11
|
+
)
|
12
|
+
from fides.api.util.data_size import LARGE_DATA_THRESHOLD_BYTES, calculate_data_size
|
13
|
+
|
14
|
+
|
15
|
+
class EncryptedLargeDataDescriptor:
|
16
|
+
"""
|
17
|
+
A Python descriptor for database fields with encrypted external storage fallback.
|
18
|
+
|
19
|
+
See the original implementation for detailed docstrings.
|
20
|
+
"""
|
21
|
+
|
22
|
+
def __init__(
|
23
|
+
self,
|
24
|
+
field_name: str,
|
25
|
+
empty_default: Optional[Any] = None,
|
26
|
+
threshold_bytes: Optional[int] = None,
|
27
|
+
):
|
28
|
+
self.field_name = field_name
|
29
|
+
self.private_field = f"_{field_name}"
|
30
|
+
self.empty_default = empty_default if empty_default is not None else []
|
31
|
+
self.threshold_bytes = threshold_bytes or LARGE_DATA_THRESHOLD_BYTES
|
32
|
+
self.model_class: Optional[str] = None
|
33
|
+
self.name: Optional[str] = None
|
34
|
+
|
35
|
+
# Descriptor protocol helpers
|
36
|
+
|
37
|
+
def __set_name__(
|
38
|
+
self, owner: Type, name: str
|
39
|
+
) -> None: # noqa: D401 (docstring in orig file)
|
40
|
+
self.name = name
|
41
|
+
self.model_class = owner.__name__
|
42
|
+
|
43
|
+
def _generate_storage_path(self, instance: Any) -> str:
|
44
|
+
instance_id = getattr(instance, "id", None)
|
45
|
+
if not instance_id:
|
46
|
+
raise ValueError(f"Instance {instance} must have an 'id' attribute")
|
47
|
+
timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S-%f")
|
48
|
+
return f"{self.model_class}/{instance_id}/{self.field_name}/{timestamp}.txt"
|
49
|
+
|
50
|
+
def __get__(self, instance: Any, owner: Type) -> Any: # noqa: D401
|
51
|
+
if instance is None:
|
52
|
+
return self
|
53
|
+
raw_data = getattr(instance, self.private_field)
|
54
|
+
if raw_data is None:
|
55
|
+
return None
|
56
|
+
if isinstance(raw_data, dict) and "storage_type" in raw_data:
|
57
|
+
logger.info(
|
58
|
+
f"Reading {self.model_class}.{self.field_name} from external storage "
|
59
|
+
f"({raw_data.get('storage_type')})"
|
60
|
+
)
|
61
|
+
try:
|
62
|
+
metadata = ExternalStorageMetadata.model_validate(raw_data)
|
63
|
+
data = self._retrieve_external_data(metadata)
|
64
|
+
record_count = len(data) if isinstance(data, list) else "N/A"
|
65
|
+
logger.info(
|
66
|
+
f"Successfully retrieved {self.model_class}.{self.field_name} "
|
67
|
+
f"from external storage (records: {record_count})"
|
68
|
+
)
|
69
|
+
return data if data is not None else self.empty_default
|
70
|
+
except Exception as e: # pylint: disable=broad-except
|
71
|
+
logger.error(
|
72
|
+
f"Failed to retrieve {self.model_class}.{self.field_name} "
|
73
|
+
f"from external storage: {str(e)}"
|
74
|
+
)
|
75
|
+
raise ExternalDataStorageError(
|
76
|
+
f"Failed to retrieve {self.field_name}: {str(e)}"
|
77
|
+
) from e
|
78
|
+
else:
|
79
|
+
return raw_data
|
80
|
+
|
81
|
+
def __set__(self, instance: Any, value: Any) -> None: # noqa: D401
|
82
|
+
if not value:
|
83
|
+
self._cleanup_external_data(instance)
|
84
|
+
setattr(instance, self.private_field, self.empty_default)
|
85
|
+
return
|
86
|
+
try:
|
87
|
+
current_data = self.__get__(instance, type(instance))
|
88
|
+
if current_data == value:
|
89
|
+
return
|
90
|
+
except Exception: # pylint: disable=broad-except
|
91
|
+
pass
|
92
|
+
|
93
|
+
data_size = calculate_data_size(value)
|
94
|
+
if data_size > self.threshold_bytes:
|
95
|
+
logger.info(
|
96
|
+
f"{self.model_class}.{self.field_name}: Data size ({data_size:,} bytes) "
|
97
|
+
f"exceeds threshold ({self.threshold_bytes:,} bytes), storing externally"
|
98
|
+
)
|
99
|
+
self._cleanup_external_data(instance)
|
100
|
+
metadata = self._store_external_data(instance, value)
|
101
|
+
setattr(instance, self.private_field, metadata.model_dump())
|
102
|
+
else:
|
103
|
+
self._cleanup_external_data(instance)
|
104
|
+
setattr(instance, self.private_field, value)
|
105
|
+
|
106
|
+
# External storage helpers
|
107
|
+
|
108
|
+
def _store_external_data(self, instance: Any, data: Any) -> ExternalStorageMetadata:
|
109
|
+
storage_path = self._generate_storage_path(instance)
|
110
|
+
with get_autoclose_db_session() as session:
|
111
|
+
metadata = ExternalDataStorageService.store_data(
|
112
|
+
db=session,
|
113
|
+
storage_path=storage_path,
|
114
|
+
data=data,
|
115
|
+
)
|
116
|
+
logger.info(
|
117
|
+
f"Stored {self.model_class}.{self.field_name} to external storage: {storage_path}"
|
118
|
+
)
|
119
|
+
return metadata
|
120
|
+
|
121
|
+
@staticmethod
|
122
|
+
def _retrieve_external_data(metadata: ExternalStorageMetadata) -> Any: # noqa: D401
|
123
|
+
with get_autoclose_db_session() as session:
|
124
|
+
return ExternalDataStorageService.retrieve_data(
|
125
|
+
db=session,
|
126
|
+
metadata=metadata,
|
127
|
+
)
|
128
|
+
|
129
|
+
def _cleanup_external_data(self, instance: Any) -> None: # noqa: D401
|
130
|
+
raw_data = getattr(instance, self.private_field, None)
|
131
|
+
if isinstance(raw_data, dict) and "storage_type" in raw_data:
|
132
|
+
try:
|
133
|
+
metadata = ExternalStorageMetadata.model_validate(raw_data)
|
134
|
+
with get_autoclose_db_session() as session:
|
135
|
+
ExternalDataStorageService.delete_data(
|
136
|
+
db=session,
|
137
|
+
metadata=metadata,
|
138
|
+
)
|
139
|
+
logger.info(
|
140
|
+
f"Cleaned up external storage for {self.model_class}.{self.field_name}: "
|
141
|
+
f"{metadata.file_key}"
|
142
|
+
)
|
143
|
+
except Exception as e: # pylint: disable=broad-except
|
144
|
+
logger.warning(
|
145
|
+
f"Failed to cleanup external {self.field_name}: {str(e)}"
|
146
|
+
)
|
147
|
+
|
148
|
+
# Public helper
|
149
|
+
|
150
|
+
def cleanup(self, instance: Any) -> None: # noqa: D401
|
151
|
+
self._cleanup_external_data(instance)
|
@@ -0,0 +1,110 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
from sqlalchemy import Column, DateTime, ForeignKey, String
|
4
|
+
from sqlalchemy.ext.declarative import declared_attr
|
5
|
+
from sqlalchemy.orm import Session, relationship
|
6
|
+
|
7
|
+
from fides.api.db.base_class import Base
|
8
|
+
from fides.api.db.util import EnumColumn
|
9
|
+
from fides.api.models.manual_tasks.manual_task_log import ManualTaskLog
|
10
|
+
from fides.api.schemas.manual_tasks.manual_task_schemas import (
|
11
|
+
ManualTaskLogStatus,
|
12
|
+
ManualTaskParentEntityType,
|
13
|
+
ManualTaskReferenceType,
|
14
|
+
ManualTaskType,
|
15
|
+
)
|
16
|
+
|
17
|
+
|
18
|
+
class ManualTask(Base):
|
19
|
+
"""Model for storing manual tasks.
|
20
|
+
|
21
|
+
This model can be used for both privacy request tasks and general tasks.
|
22
|
+
For privacy requests, it replaces the functionality of manual webhooks.
|
23
|
+
For other use cases, it provides a flexible task management system.
|
24
|
+
|
25
|
+
There can only be one ManualTask per parent entity.
|
26
|
+
You can create multiple Configs for the same ManualTask.
|
27
|
+
"""
|
28
|
+
|
29
|
+
@declared_attr
|
30
|
+
def __tablename__(cls) -> str:
|
31
|
+
"""Overriding base class method to set the table name."""
|
32
|
+
return "manual_task"
|
33
|
+
|
34
|
+
# Database columns
|
35
|
+
task_type = Column(
|
36
|
+
EnumColumn(ManualTaskType),
|
37
|
+
nullable=False,
|
38
|
+
default=ManualTaskType.privacy_request,
|
39
|
+
)
|
40
|
+
parent_entity_id = Column(String, nullable=False)
|
41
|
+
parent_entity_type = Column(
|
42
|
+
EnumColumn(ManualTaskParentEntityType),
|
43
|
+
nullable=False,
|
44
|
+
default=ManualTaskParentEntityType.connection_config,
|
45
|
+
)
|
46
|
+
due_date = Column(DateTime, nullable=True)
|
47
|
+
|
48
|
+
# Relationships
|
49
|
+
references = relationship(
|
50
|
+
"ManualTaskReference",
|
51
|
+
back_populates="task",
|
52
|
+
uselist=True,
|
53
|
+
cascade="all, delete-orphan",
|
54
|
+
)
|
55
|
+
logs = relationship(
|
56
|
+
"ManualTaskLog",
|
57
|
+
back_populates="task",
|
58
|
+
primaryjoin="and_(ManualTask.id == ManualTaskLog.task_id)",
|
59
|
+
viewonly=True,
|
60
|
+
order_by="ManualTaskLog.created_at",
|
61
|
+
)
|
62
|
+
|
63
|
+
# Properties
|
64
|
+
@property
|
65
|
+
def assigned_users(self) -> list[str]:
|
66
|
+
"""Get all users assigned to this task."""
|
67
|
+
if not self.references:
|
68
|
+
return []
|
69
|
+
return [
|
70
|
+
ref.reference_id
|
71
|
+
for ref in self.references
|
72
|
+
if ref.reference_type == ManualTaskReferenceType.assigned_user
|
73
|
+
]
|
74
|
+
|
75
|
+
# CRUD Operations
|
76
|
+
@classmethod
|
77
|
+
def create(
|
78
|
+
cls, db: Session, *, data: dict[str, Any], check_name: bool = True
|
79
|
+
) -> "ManualTask":
|
80
|
+
"""Create a new manual task."""
|
81
|
+
task = super().create(db=db, data=data, check_name=check_name)
|
82
|
+
ManualTaskLog.create_log(
|
83
|
+
db=db,
|
84
|
+
task_id=task.id,
|
85
|
+
status=ManualTaskLogStatus.created,
|
86
|
+
message=f"Created manual task for {data['task_type']}",
|
87
|
+
)
|
88
|
+
return task
|
89
|
+
|
90
|
+
|
91
|
+
class ManualTaskReference(Base):
|
92
|
+
"""Join table to associate manual tasks with multiple references.
|
93
|
+
|
94
|
+
A single task may have many references including privacy requests, configurations, and assigned users.
|
95
|
+
"""
|
96
|
+
|
97
|
+
@declared_attr
|
98
|
+
def __tablename__(cls) -> str:
|
99
|
+
"""Overriding base class method to set the table name."""
|
100
|
+
return "manual_task_reference"
|
101
|
+
|
102
|
+
# Database columns
|
103
|
+
task_id = Column(
|
104
|
+
String, ForeignKey("manual_task.id", ondelete="CASCADE"), nullable=False
|
105
|
+
)
|
106
|
+
reference_id = Column(String, nullable=False)
|
107
|
+
reference_type = Column(EnumColumn(ManualTaskReferenceType), nullable=False)
|
108
|
+
|
109
|
+
# Relationships
|
110
|
+
task = relationship("ManualTask", back_populates="references")
|
@@ -0,0 +1,100 @@
|
|
1
|
+
from typing import TYPE_CHECKING, Any, Optional
|
2
|
+
|
3
|
+
from sqlalchemy import Column, ForeignKey, String
|
4
|
+
from sqlalchemy.dialects.postgresql import JSONB
|
5
|
+
from sqlalchemy.ext.declarative import declared_attr
|
6
|
+
from sqlalchemy.orm import Session, relationship
|
7
|
+
|
8
|
+
from fides.api.db.base_class import Base
|
9
|
+
from fides.api.schemas.manual_tasks.manual_task_schemas import ManualTaskLogStatus
|
10
|
+
|
11
|
+
if TYPE_CHECKING:
|
12
|
+
from fides.api.models.manual_tasks.manual_task import ManualTask
|
13
|
+
|
14
|
+
|
15
|
+
class ManualTaskLog(Base):
|
16
|
+
"""Model for storing manual task execution logs."""
|
17
|
+
|
18
|
+
@declared_attr
|
19
|
+
def __tablename__(cls) -> str:
|
20
|
+
"""Overriding base class method to set the table name."""
|
21
|
+
return "manual_task_log"
|
22
|
+
|
23
|
+
task_id = Column(
|
24
|
+
String, ForeignKey("manual_task.id", ondelete="CASCADE"), nullable=False
|
25
|
+
)
|
26
|
+
# TODO: Add foreign key constraints when config and instance are implemented
|
27
|
+
config_id = Column(String, nullable=True)
|
28
|
+
instance_id = Column(String, nullable=True)
|
29
|
+
status = Column(String, nullable=False)
|
30
|
+
message = Column(String, nullable=True)
|
31
|
+
details = Column(JSONB, nullable=True)
|
32
|
+
|
33
|
+
# Relationships - using string references to avoid circular imports
|
34
|
+
task = relationship("ManualTask", back_populates="logs", foreign_keys=[task_id])
|
35
|
+
# TODO: Add config and instance relationships when they are implemented
|
36
|
+
# config = relationship("ManualTaskConfig", back_populates="logs")
|
37
|
+
# instance = relationship("ManualTaskInstance", back_populates="logs")
|
38
|
+
|
39
|
+
@classmethod
|
40
|
+
def create_log(
|
41
|
+
cls,
|
42
|
+
db: Session,
|
43
|
+
status: ManualTaskLogStatus,
|
44
|
+
task_id: str,
|
45
|
+
config_id: Optional[str] = None,
|
46
|
+
instance_id: Optional[str] = None,
|
47
|
+
message: Optional[str] = None,
|
48
|
+
details: Optional[dict[str, Any]] = None,
|
49
|
+
) -> "ManualTaskLog":
|
50
|
+
"""Create a new task log entry.
|
51
|
+
|
52
|
+
Args:
|
53
|
+
db: Database session
|
54
|
+
task_id: ID of the task
|
55
|
+
status: Status of the log entry
|
56
|
+
message: Optional message describing the event
|
57
|
+
details: Optional additional details about the event
|
58
|
+
"""
|
59
|
+
data = {
|
60
|
+
"task_id": task_id,
|
61
|
+
"config_id": config_id,
|
62
|
+
"instance_id": instance_id,
|
63
|
+
"status": status,
|
64
|
+
"message": message,
|
65
|
+
"details": details,
|
66
|
+
}
|
67
|
+
return cls.create(db=db, data=data)
|
68
|
+
|
69
|
+
@classmethod
|
70
|
+
def create_error_log(
|
71
|
+
cls,
|
72
|
+
db: Session,
|
73
|
+
task_id: str,
|
74
|
+
message: str,
|
75
|
+
config_id: Optional[str] = None,
|
76
|
+
instance_id: Optional[str] = None,
|
77
|
+
details: Optional[dict[str, Any]] = None,
|
78
|
+
) -> "ManualTaskLog":
|
79
|
+
"""Create a new error log entry.
|
80
|
+
|
81
|
+
Args:
|
82
|
+
db: Database session
|
83
|
+
task_id: ID of the task
|
84
|
+
message: Error message describing what went wrong
|
85
|
+
config_id: Optional ID of the configuration
|
86
|
+
instance_id: Optional ID of the instance
|
87
|
+
details: Optional additional details about the error
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
The created error log entry
|
91
|
+
"""
|
92
|
+
return cls.create_log(
|
93
|
+
db=db,
|
94
|
+
status=ManualTaskLogStatus.error,
|
95
|
+
task_id=task_id,
|
96
|
+
config_id=config_id,
|
97
|
+
instance_id=instance_id,
|
98
|
+
message=message,
|
99
|
+
details=details,
|
100
|
+
)
|
@@ -22,8 +22,8 @@ from fides.api.models.privacy_notice import (
|
|
22
22
|
UserConsentPreference,
|
23
23
|
)
|
24
24
|
from fides.api.models.privacy_request import PrivacyRequest, ProvidedIdentity
|
25
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
25
26
|
from fides.api.schemas.language import SupportedLanguage
|
26
|
-
from fides.api.schemas.privacy_request import ExecutionLogStatus
|
27
27
|
from fides.api.schemas.redis_cache import MultiValue
|
28
28
|
from fides.config import CONFIG
|
29
29
|
|
@@ -4,15 +4,14 @@ from __future__ import annotations
|
|
4
4
|
|
5
5
|
from typing import Optional
|
6
6
|
|
7
|
-
from sqlalchemy import Column,
|
7
|
+
from sqlalchemy import Column, String
|
8
8
|
from sqlalchemy.dialects.postgresql import JSONB
|
9
9
|
from sqlalchemy.ext.mutable import MutableList
|
10
|
-
from sqlalchemy.sql import text
|
11
10
|
|
12
11
|
from fides.api.db.base_class import Base # type: ignore[attr-defined]
|
13
12
|
from fides.api.db.util import EnumColumn
|
13
|
+
from fides.api.models.worker_task import ExecutionLogStatus, TaskExecutionLog
|
14
14
|
from fides.api.schemas.policy import ActionType, CurrentStep
|
15
|
-
from fides.api.schemas.privacy_request import ExecutionLogStatus
|
16
15
|
|
17
16
|
# Locations from which privacy request execution can be resumed, in order.
|
18
17
|
EXECUTION_CHECKPOINTS = [
|
@@ -53,7 +52,7 @@ def can_run_checkpoint(
|
|
53
52
|
) >= EXECUTION_CHECKPOINTS.index(from_checkpoint)
|
54
53
|
|
55
54
|
|
56
|
-
class ExecutionLog(Base):
|
55
|
+
class ExecutionLog(TaskExecutionLog, Base):
|
57
56
|
"""
|
58
57
|
Stores the individual execution logs associated with a PrivacyRequest.
|
59
58
|
|
@@ -68,41 +67,14 @@ class ExecutionLog(Base):
|
|
68
67
|
collection_name = Column(String, index=True)
|
69
68
|
# A JSON Array describing affected fields along with their data categories and paths
|
70
69
|
fields_affected = Column(MutableList.as_mutable(JSONB), nullable=True)
|
71
|
-
# Contains info, warning, or error messages
|
72
|
-
message = Column(String)
|
73
70
|
action_type = Column(
|
74
71
|
EnumColumn(ActionType),
|
75
72
|
index=True,
|
76
73
|
nullable=False,
|
77
74
|
)
|
78
|
-
status = Column(
|
79
|
-
EnumColumn(
|
80
|
-
ExecutionLogStatus,
|
81
|
-
native_enum=True,
|
82
|
-
values_callable=lambda x: [
|
83
|
-
i.value for i in x
|
84
|
-
], # Using ExecutionLogStatus values in database, even though app is using the names.
|
85
|
-
),
|
86
|
-
index=True,
|
87
|
-
nullable=False,
|
88
|
-
)
|
89
75
|
|
90
76
|
privacy_request_id = Column(
|
91
77
|
String,
|
92
78
|
nullable=False,
|
93
79
|
index=True,
|
94
80
|
)
|
95
|
-
|
96
|
-
# Use clock_timestamp() instead of NOW() to get the actual current time at row creation,
|
97
|
-
# regardless of transaction state. This prevents timestamp caching within transactions
|
98
|
-
# and ensures more accurate creation times.
|
99
|
-
# https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-CURRENT
|
100
|
-
|
101
|
-
created_at = Column(
|
102
|
-
DateTime(timezone=True), server_default=text("clock_timestamp()")
|
103
|
-
)
|
104
|
-
updated_at = Column(
|
105
|
-
DateTime(timezone=True),
|
106
|
-
server_default=text("clock_timestamp()"),
|
107
|
-
onupdate=text("clock_timestamp()"),
|
108
|
-
)
|
@@ -48,6 +48,7 @@ from fides.api.models.audit_log import AuditLog
|
|
48
48
|
from fides.api.models.client import ClientDetail
|
49
49
|
from fides.api.models.comment import Comment, CommentReference, CommentReferenceType
|
50
50
|
from fides.api.models.fides_user import FidesUser
|
51
|
+
from fides.api.models.field_types import EncryptedLargeDataDescriptor
|
51
52
|
from fides.api.models.manual_webhook import AccessManualWebhook
|
52
53
|
from fides.api.models.policy import (
|
53
54
|
Policy,
|
@@ -72,13 +73,13 @@ from fides.api.models.privacy_request.webhook import (
|
|
72
73
|
generate_request_callback_pre_approval_jwe,
|
73
74
|
generate_request_callback_resume_jwe,
|
74
75
|
)
|
76
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
75
77
|
from fides.api.schemas.drp_privacy_request import DrpPrivacyRequestCreate
|
76
78
|
from fides.api.schemas.external_https import SecondPartyResponseFormat
|
77
79
|
from fides.api.schemas.masking.masking_secrets import MaskingSecretCache
|
78
80
|
from fides.api.schemas.policy import ActionType, CurrentStep
|
79
81
|
from fides.api.schemas.privacy_request import (
|
80
82
|
CheckpointActionRequired,
|
81
|
-
ExecutionLogStatus,
|
82
83
|
ManualAction,
|
83
84
|
PrivacyRequestSource,
|
84
85
|
PrivacyRequestStatus,
|
@@ -251,7 +252,8 @@ class PrivacyRequest(
|
|
251
252
|
awaiting_email_send_at = Column(DateTime(timezone=True), nullable=True)
|
252
253
|
|
253
254
|
# Encrypted filtered access results saved for later retrieval
|
254
|
-
|
255
|
+
_filtered_final_upload = Column( # An encrypted JSON String - Dict[Dict[str, List[Row]]] - rule keys mapped to the filtered access results
|
256
|
+
"filtered_final_upload",
|
255
257
|
StringEncryptedType(
|
256
258
|
type_in=JSONTypeOverride,
|
257
259
|
key=CONFIG.security.app_encryption_key,
|
@@ -260,6 +262,11 @@ class PrivacyRequest(
|
|
260
262
|
),
|
261
263
|
)
|
262
264
|
|
265
|
+
# Use descriptor for automatic external storage handling
|
266
|
+
filtered_final_upload = EncryptedLargeDataDescriptor(
|
267
|
+
field_name="filtered_final_upload", empty_default={}
|
268
|
+
)
|
269
|
+
|
263
270
|
# Encrypted filtered access results saved for later retrieval
|
264
271
|
access_result_urls = Column( # An encrypted JSON String - Dict[Dict[str, List[Row]]] - rule keys mapped to the filtered access results
|
265
272
|
StringEncryptedType(
|
@@ -334,6 +341,7 @@ class PrivacyRequest(
|
|
334
341
|
deleting this object from the database
|
335
342
|
"""
|
336
343
|
self.clear_cached_values()
|
344
|
+
self.cleanup_external_storage()
|
337
345
|
Attachment.delete_attachments_for_reference_and_type(
|
338
346
|
db, self.id, AttachmentReferenceType.privacy_request
|
339
347
|
)
|
@@ -1257,6 +1265,11 @@ class PrivacyRequest(
|
|
1257
1265
|
# DSR 2.0 does not cache the results so nothing to do here
|
1258
1266
|
return {}
|
1259
1267
|
|
1268
|
+
def cleanup_external_storage(self) -> None:
|
1269
|
+
"""Clean up all external storage files for this privacy request"""
|
1270
|
+
# Access the descriptor from the class to call cleanup
|
1271
|
+
PrivacyRequest.filtered_final_upload.cleanup(self)
|
1272
|
+
|
1260
1273
|
def save_filtered_access_results(
|
1261
1274
|
self, db: Session, results: Dict[str, Dict[str, List[Row]]]
|
1262
1275
|
) -> None:
|
@@ -1544,7 +1557,7 @@ def get_action_required_details(
|
|
1544
1557
|
|
1545
1558
|
|
1546
1559
|
def _parse_cache_to_checkpoint_action_required(
|
1547
|
-
cache: dict[str, Any]
|
1560
|
+
cache: dict[str, Any],
|
1548
1561
|
) -> CheckpointActionRequired:
|
1549
1562
|
collection = (
|
1550
1563
|
CollectionAddress(
|
@@ -14,20 +14,19 @@ from sqlalchemy_utils.types.encrypted.encrypted_type import (
|
|
14
14
|
StringEncryptedType,
|
15
15
|
)
|
16
16
|
|
17
|
-
from fides.api.db.base_class import Base # type: ignore[attr-defined]
|
18
|
-
from fides.api.db.base_class import JSONTypeOverride
|
19
|
-
from fides.api.db.util import EnumColumn
|
17
|
+
from fides.api.db.base_class import Base, JSONTypeOverride # type: ignore[attr-defined]
|
20
18
|
from fides.api.graph.config import (
|
21
19
|
ROOT_COLLECTION_ADDRESS,
|
22
20
|
TERMINATOR_ADDRESS,
|
23
21
|
CollectionAddress,
|
24
22
|
)
|
23
|
+
from fides.api.models.field_types import EncryptedLargeDataDescriptor
|
25
24
|
from fides.api.models.privacy_request.execution_log import (
|
26
25
|
COMPLETED_EXECUTION_LOG_STATUSES,
|
27
26
|
)
|
27
|
+
from fides.api.models.worker_task import ExecutionLogStatus, WorkerTask
|
28
28
|
from fides.api.schemas.base_class import FidesSchema
|
29
29
|
from fides.api.schemas.policy import ActionType
|
30
|
-
from fides.api.schemas.privacy_request import ExecutionLogStatus
|
31
30
|
from fides.api.util.cache import (
|
32
31
|
FidesopsRedis,
|
33
32
|
celery_tasks_in_flight,
|
@@ -68,7 +67,8 @@ class TraversalDetails(FidesSchema):
|
|
68
67
|
)
|
69
68
|
|
70
69
|
|
71
|
-
|
70
|
+
# TODO: At some point we will refactor this model to store all task types in a common table that links to tables with specific task attributes.
|
71
|
+
class RequestTask(WorkerTask, Base):
|
72
72
|
"""
|
73
73
|
An individual Task for a Privacy Request.
|
74
74
|
|
@@ -91,21 +91,6 @@ class RequestTask(Base):
|
|
91
91
|
) # Of the format dataset_name:collection_name for convenience
|
92
92
|
dataset_name = Column(String, nullable=False, index=True)
|
93
93
|
collection_name = Column(String, nullable=False, index=True)
|
94
|
-
action_type = Column(EnumColumn(ActionType), nullable=False, index=True)
|
95
|
-
|
96
|
-
# Note that RequestTasks share statuses with ExecutionLogs. When a RequestTask changes state, an ExecutionLog
|
97
|
-
# is also created with that state. These are tied tightly together in GraphTask.
|
98
|
-
status = Column(
|
99
|
-
EnumColumn(
|
100
|
-
ExecutionLogStatus,
|
101
|
-
native_enum=False,
|
102
|
-
values_callable=lambda x: [
|
103
|
-
i.value for i in x
|
104
|
-
], # Using ExecutionLogStatus values in database, even though app is using the names.
|
105
|
-
), # character varying in database
|
106
|
-
index=True,
|
107
|
-
nullable=False,
|
108
|
-
)
|
109
94
|
|
110
95
|
upstream_tasks = Column(
|
111
96
|
MutableList.as_mutable(JSONB)
|
@@ -121,7 +106,8 @@ class RequestTask(Base):
|
|
121
106
|
# Raw data retrieved from an access request is stored here. This contains all of the
|
122
107
|
# intermediate data we retrieved, needed for downstream tasks, but hasn't been filtered
|
123
108
|
# by data category for the end user.
|
124
|
-
|
109
|
+
_access_data = Column( # An encrypted JSON String - saved as a list of Rows
|
110
|
+
"access_data",
|
125
111
|
StringEncryptedType(
|
126
112
|
type_in=JSONTypeOverride,
|
127
113
|
key=CONFIG.security.app_encryption_key,
|
@@ -132,7 +118,8 @@ class RequestTask(Base):
|
|
132
118
|
|
133
119
|
# This is the raw access data saved in erasure format (with placeholders preserved) to perform a masking request.
|
134
120
|
# First saved on the access node, and then copied to the corresponding erasure node.
|
135
|
-
|
121
|
+
_data_for_erasures = Column( # An encrypted JSON String - saved as a list of rows
|
122
|
+
"data_for_erasures",
|
136
123
|
StringEncryptedType(
|
137
124
|
type_in=JSONTypeOverride,
|
138
125
|
key=CONFIG.security.app_encryption_key,
|
@@ -141,6 +128,15 @@ class RequestTask(Base):
|
|
141
128
|
),
|
142
129
|
)
|
143
130
|
|
131
|
+
# Use descriptors for automatic external storage handling
|
132
|
+
access_data = EncryptedLargeDataDescriptor(
|
133
|
+
field_name="access_data", empty_default=[]
|
134
|
+
)
|
135
|
+
|
136
|
+
data_for_erasures = EncryptedLargeDataDescriptor(
|
137
|
+
field_name="data_for_erasures", empty_default=[]
|
138
|
+
)
|
139
|
+
|
144
140
|
# Written after an erasure is completed
|
145
141
|
rows_masked = Column(Integer)
|
146
142
|
# Written after a consent request is completed - not all consent
|
@@ -177,12 +173,22 @@ class RequestTask(Base):
|
|
177
173
|
"""Convenience helper for asserting whether the task is a terminator task"""
|
178
174
|
return self.request_task_address == TERMINATOR_ADDRESS
|
179
175
|
|
176
|
+
@classmethod
|
177
|
+
def allowed_action_types(cls) -> List[str]:
|
178
|
+
return [e.value for e in ActionType]
|
179
|
+
|
180
180
|
def get_cached_task_id(self) -> Optional[str]:
|
181
181
|
"""Gets the cached celery task ID for this request task."""
|
182
182
|
cache: FidesopsRedis = get_cache()
|
183
183
|
task_id = cache.get(get_async_task_tracking_cache_key(self.id))
|
184
184
|
return task_id
|
185
185
|
|
186
|
+
def cleanup_external_storage(self) -> None:
|
187
|
+
"""Clean up all external storage files for this request task"""
|
188
|
+
# Access the descriptor from the class to call cleanup
|
189
|
+
RequestTask.access_data.cleanup(self)
|
190
|
+
RequestTask.data_for_erasures.cleanup(self)
|
191
|
+
|
186
192
|
def get_access_data(self) -> List[Row]:
|
187
193
|
"""Helper to retrieve access data or default to empty list"""
|
188
194
|
return self.access_data or []
|
@@ -191,6 +197,11 @@ class RequestTask(Base):
|
|
191
197
|
"""Helper to retrieve erasure data needed to build masking requests or default to empty list"""
|
192
198
|
return self.data_for_erasures or []
|
193
199
|
|
200
|
+
def delete(self, db: Session) -> None:
|
201
|
+
"""Override delete to cleanup external storage first"""
|
202
|
+
self.cleanup_external_storage()
|
203
|
+
super().delete(db)
|
204
|
+
|
194
205
|
def update_status(self, db: Session, status: ExecutionLogStatus) -> None:
|
195
206
|
"""Helper method to update a task's status"""
|
196
207
|
self.status = status
|
@@ -236,7 +247,7 @@ class RequestTask(Base):
|
|
236
247
|
if not tasks_complete and should_log:
|
237
248
|
logger.debug(
|
238
249
|
"Upstream tasks incomplete for {} task {}.",
|
239
|
-
self.action_type
|
250
|
+
self.action_type,
|
240
251
|
self.collection_address,
|
241
252
|
)
|
242
253
|
|
@@ -267,7 +278,7 @@ class RequestTask(Base):
|
|
267
278
|
logger.debug(
|
268
279
|
"Celery Task ID {} found for {} task {}.",
|
269
280
|
celery_task_id,
|
270
|
-
self.action_type
|
281
|
+
self.action_type,
|
271
282
|
self.collection_address,
|
272
283
|
)
|
273
284
|
|
@@ -277,7 +288,7 @@ class RequestTask(Base):
|
|
277
288
|
logger.debug(
|
278
289
|
"Celery Task {} already processing for {} task {}.",
|
279
290
|
celery_task_id,
|
280
|
-
self.action_type
|
291
|
+
self.action_type,
|
281
292
|
self.collection_address,
|
282
293
|
)
|
283
294
|
|