ethyca-fides 2.63.0rc2__py2.py3-none-any.whl → 2.63.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. {ethyca_fides-2.63.0rc2.dist-info → ethyca_fides-2.63.1.dist-info}/METADATA +1 -1
  2. {ethyca_fides-2.63.0rc2.dist-info → ethyca_fides-2.63.1.dist-info}/RECORD +131 -112
  3. fides/_version.py +3 -3
  4. fides/api/alembic/migrations/versions/29e56fa1fdb3_add_monitor_tasks.py +147 -0
  5. fides/api/alembic/migrations/versions/5efcdf18438e_add_manual_task_tables.py +160 -0
  6. fides/api/alembic/migrations/versions/bf713b5a021d_staged_resource_ancestor_link_data_.py +20 -11
  7. fides/api/api/v1/endpoints/privacy_request_endpoints.py +4 -4
  8. fides/api/db/base.py +7 -1
  9. fides/api/migrations/post_upgrade_index_creation.py +3 -3
  10. fides/api/models/connectionconfig.py +1 -1
  11. fides/api/models/detection_discovery/__init__.py +35 -0
  12. fides/api/models/detection_discovery/monitor_task.py +162 -0
  13. fides/api/models/field_types/__init__.py +5 -0
  14. fides/api/models/field_types/encrypted_large_data.py +151 -0
  15. fides/api/models/manual_tasks/__init__.py +8 -0
  16. fides/api/models/manual_tasks/manual_task.py +110 -0
  17. fides/api/models/manual_tasks/manual_task_log.py +100 -0
  18. fides/api/models/privacy_preference.py +1 -1
  19. fides/api/models/privacy_request/execution_log.py +3 -31
  20. fides/api/models/privacy_request/privacy_request.py +16 -3
  21. fides/api/models/privacy_request/request_task.py +36 -25
  22. fides/api/models/worker_task.py +96 -0
  23. fides/api/schemas/external_storage.py +22 -0
  24. fides/api/schemas/manual_tasks/__init__.py +0 -0
  25. fides/api/schemas/manual_tasks/manual_task_schemas.py +79 -0
  26. fides/api/schemas/manual_tasks/manual_task_status.py +151 -0
  27. fides/api/schemas/privacy_request.py +1 -12
  28. fides/api/service/connectors/base_erasure_email_connector.py +1 -1
  29. fides/api/service/connectors/consent_email_connector.py +2 -1
  30. fides/api/service/connectors/dynamic_erasure_email_connector.py +2 -1
  31. fides/api/service/connectors/erasure_email_connector.py +1 -1
  32. fides/api/service/external_data_storage.py +371 -0
  33. fides/api/service/privacy_request/request_runner_service.py +5 -5
  34. fides/api/service/privacy_request/request_service.py +1 -1
  35. fides/api/task/create_request_tasks.py +1 -1
  36. fides/api/task/execute_request_tasks.py +9 -8
  37. fides/api/task/graph_task.py +22 -10
  38. fides/api/util/consent_util.py +1 -1
  39. fides/api/util/data_size.py +102 -0
  40. fides/api/util/encryption/aes_gcm_encryption_util.py +271 -0
  41. fides/service/manual_tasks/__init__.py +0 -0
  42. fides/service/manual_tasks/manual_task_service.py +150 -0
  43. fides/service/privacy_request/privacy_request_service.py +1 -1
  44. fides/ui-build/static/admin/404.html +1 -1
  45. fides/ui-build/static/admin/add-systems/manual.html +1 -1
  46. fides/ui-build/static/admin/add-systems/multiple.html +1 -1
  47. fides/ui-build/static/admin/add-systems.html +1 -1
  48. fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
  49. fides/ui-build/static/admin/consent/configure.html +1 -1
  50. fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
  51. fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
  52. fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
  53. fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
  54. fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
  55. fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
  56. fides/ui-build/static/admin/consent/properties.html +1 -1
  57. fides/ui-build/static/admin/consent/reporting.html +1 -1
  58. fides/ui-build/static/admin/consent.html +1 -1
  59. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
  60. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
  61. fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
  62. fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
  63. fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
  64. fides/ui-build/static/admin/data-catalog.html +1 -1
  65. fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
  66. fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
  67. fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
  68. fides/ui-build/static/admin/data-discovery/activity.html +1 -1
  69. fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
  70. fides/ui-build/static/admin/data-discovery/detection.html +1 -1
  71. fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
  72. fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
  73. fides/ui-build/static/admin/datamap.html +1 -1
  74. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
  75. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
  76. fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
  77. fides/ui-build/static/admin/dataset/new.html +1 -1
  78. fides/ui-build/static/admin/dataset.html +1 -1
  79. fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
  80. fides/ui-build/static/admin/datastore-connection/new.html +1 -1
  81. fides/ui-build/static/admin/datastore-connection.html +1 -1
  82. fides/ui-build/static/admin/index.html +1 -1
  83. fides/ui-build/static/admin/integrations/[id].html +1 -1
  84. fides/ui-build/static/admin/integrations.html +1 -1
  85. fides/ui-build/static/admin/login/[provider].html +1 -1
  86. fides/ui-build/static/admin/login.html +1 -1
  87. fides/ui-build/static/admin/messaging/[id].html +1 -1
  88. fides/ui-build/static/admin/messaging/add-template.html +1 -1
  89. fides/ui-build/static/admin/messaging.html +1 -1
  90. fides/ui-build/static/admin/poc/ant-components.html +1 -1
  91. fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
  92. fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
  93. fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
  94. fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
  95. fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
  96. fides/ui-build/static/admin/poc/forms.html +1 -1
  97. fides/ui-build/static/admin/poc/table-migration.html +1 -1
  98. fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
  99. fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
  100. fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
  101. fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
  102. fides/ui-build/static/admin/privacy-requests.html +1 -1
  103. fides/ui-build/static/admin/properties/[id].html +1 -1
  104. fides/ui-build/static/admin/properties/add-property.html +1 -1
  105. fides/ui-build/static/admin/properties.html +1 -1
  106. fides/ui-build/static/admin/reporting/datamap.html +1 -1
  107. fides/ui-build/static/admin/settings/about/alpha.html +1 -1
  108. fides/ui-build/static/admin/settings/about.html +1 -1
  109. fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
  110. fides/ui-build/static/admin/settings/consent.html +1 -1
  111. fides/ui-build/static/admin/settings/custom-fields.html +1 -1
  112. fides/ui-build/static/admin/settings/domain-records.html +1 -1
  113. fides/ui-build/static/admin/settings/domains.html +1 -1
  114. fides/ui-build/static/admin/settings/email-templates.html +1 -1
  115. fides/ui-build/static/admin/settings/locations.html +1 -1
  116. fides/ui-build/static/admin/settings/organization.html +1 -1
  117. fides/ui-build/static/admin/settings/regulations.html +1 -1
  118. fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
  119. fides/ui-build/static/admin/systems/configure/[id].html +1 -1
  120. fides/ui-build/static/admin/systems.html +1 -1
  121. fides/ui-build/static/admin/taxonomy.html +1 -1
  122. fides/ui-build/static/admin/user-management/new.html +1 -1
  123. fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
  124. fides/ui-build/static/admin/user-management.html +1 -1
  125. {ethyca_fides-2.63.0rc2.dist-info → ethyca_fides-2.63.1.dist-info}/WHEEL +0 -0
  126. {ethyca_fides-2.63.0rc2.dist-info → ethyca_fides-2.63.1.dist-info}/entry_points.txt +0 -0
  127. {ethyca_fides-2.63.0rc2.dist-info → ethyca_fides-2.63.1.dist-info}/licenses/LICENSE +0 -0
  128. {ethyca_fides-2.63.0rc2.dist-info → ethyca_fides-2.63.1.dist-info}/top_level.txt +0 -0
  129. /fides/api/models/{detection_discovery.py → detection_discovery/core.py} +0 -0
  130. /fides/ui-build/static/admin/_next/static/{Fb70i-8GI-owNAvgEJWhA → SZn_Fpr_qG1COMjkdloep}/_buildManifest.js +0 -0
  131. /fides/ui-build/static/admin/_next/static/{Fb70i-8GI-owNAvgEJWhA → SZn_Fpr_qG1COMjkdloep}/_ssgManifest.js +0 -0
@@ -0,0 +1,151 @@
1
+ from datetime import datetime
2
+ from typing import Any, Optional, Type
3
+
4
+ from loguru import logger
5
+
6
+ from fides.api.api.deps import get_autoclose_db_session
7
+ from fides.api.schemas.external_storage import ExternalStorageMetadata
8
+ from fides.api.service.external_data_storage import (
9
+ ExternalDataStorageError,
10
+ ExternalDataStorageService,
11
+ )
12
+ from fides.api.util.data_size import LARGE_DATA_THRESHOLD_BYTES, calculate_data_size
13
+
14
+
15
+ class EncryptedLargeDataDescriptor:
16
+ """
17
+ A Python descriptor for database fields with encrypted external storage fallback.
18
+
19
+ See the original implementation for detailed docstrings.
20
+ """
21
+
22
+ def __init__(
23
+ self,
24
+ field_name: str,
25
+ empty_default: Optional[Any] = None,
26
+ threshold_bytes: Optional[int] = None,
27
+ ):
28
+ self.field_name = field_name
29
+ self.private_field = f"_{field_name}"
30
+ self.empty_default = empty_default if empty_default is not None else []
31
+ self.threshold_bytes = threshold_bytes or LARGE_DATA_THRESHOLD_BYTES
32
+ self.model_class: Optional[str] = None
33
+ self.name: Optional[str] = None
34
+
35
+ # Descriptor protocol helpers
36
+
37
+ def __set_name__(
38
+ self, owner: Type, name: str
39
+ ) -> None: # noqa: D401 (docstring in orig file)
40
+ self.name = name
41
+ self.model_class = owner.__name__
42
+
43
+ def _generate_storage_path(self, instance: Any) -> str:
44
+ instance_id = getattr(instance, "id", None)
45
+ if not instance_id:
46
+ raise ValueError(f"Instance {instance} must have an 'id' attribute")
47
+ timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S-%f")
48
+ return f"{self.model_class}/{instance_id}/{self.field_name}/{timestamp}.txt"
49
+
50
+ def __get__(self, instance: Any, owner: Type) -> Any: # noqa: D401
51
+ if instance is None:
52
+ return self
53
+ raw_data = getattr(instance, self.private_field)
54
+ if raw_data is None:
55
+ return None
56
+ if isinstance(raw_data, dict) and "storage_type" in raw_data:
57
+ logger.info(
58
+ f"Reading {self.model_class}.{self.field_name} from external storage "
59
+ f"({raw_data.get('storage_type')})"
60
+ )
61
+ try:
62
+ metadata = ExternalStorageMetadata.model_validate(raw_data)
63
+ data = self._retrieve_external_data(metadata)
64
+ record_count = len(data) if isinstance(data, list) else "N/A"
65
+ logger.info(
66
+ f"Successfully retrieved {self.model_class}.{self.field_name} "
67
+ f"from external storage (records: {record_count})"
68
+ )
69
+ return data if data is not None else self.empty_default
70
+ except Exception as e: # pylint: disable=broad-except
71
+ logger.error(
72
+ f"Failed to retrieve {self.model_class}.{self.field_name} "
73
+ f"from external storage: {str(e)}"
74
+ )
75
+ raise ExternalDataStorageError(
76
+ f"Failed to retrieve {self.field_name}: {str(e)}"
77
+ ) from e
78
+ else:
79
+ return raw_data
80
+
81
+ def __set__(self, instance: Any, value: Any) -> None: # noqa: D401
82
+ if not value:
83
+ self._cleanup_external_data(instance)
84
+ setattr(instance, self.private_field, self.empty_default)
85
+ return
86
+ try:
87
+ current_data = self.__get__(instance, type(instance))
88
+ if current_data == value:
89
+ return
90
+ except Exception: # pylint: disable=broad-except
91
+ pass
92
+
93
+ data_size = calculate_data_size(value)
94
+ if data_size > self.threshold_bytes:
95
+ logger.info(
96
+ f"{self.model_class}.{self.field_name}: Data size ({data_size:,} bytes) "
97
+ f"exceeds threshold ({self.threshold_bytes:,} bytes), storing externally"
98
+ )
99
+ self._cleanup_external_data(instance)
100
+ metadata = self._store_external_data(instance, value)
101
+ setattr(instance, self.private_field, metadata.model_dump())
102
+ else:
103
+ self._cleanup_external_data(instance)
104
+ setattr(instance, self.private_field, value)
105
+
106
+ # External storage helpers
107
+
108
+ def _store_external_data(self, instance: Any, data: Any) -> ExternalStorageMetadata:
109
+ storage_path = self._generate_storage_path(instance)
110
+ with get_autoclose_db_session() as session:
111
+ metadata = ExternalDataStorageService.store_data(
112
+ db=session,
113
+ storage_path=storage_path,
114
+ data=data,
115
+ )
116
+ logger.info(
117
+ f"Stored {self.model_class}.{self.field_name} to external storage: {storage_path}"
118
+ )
119
+ return metadata
120
+
121
+ @staticmethod
122
+ def _retrieve_external_data(metadata: ExternalStorageMetadata) -> Any: # noqa: D401
123
+ with get_autoclose_db_session() as session:
124
+ return ExternalDataStorageService.retrieve_data(
125
+ db=session,
126
+ metadata=metadata,
127
+ )
128
+
129
+ def _cleanup_external_data(self, instance: Any) -> None: # noqa: D401
130
+ raw_data = getattr(instance, self.private_field, None)
131
+ if isinstance(raw_data, dict) and "storage_type" in raw_data:
132
+ try:
133
+ metadata = ExternalStorageMetadata.model_validate(raw_data)
134
+ with get_autoclose_db_session() as session:
135
+ ExternalDataStorageService.delete_data(
136
+ db=session,
137
+ metadata=metadata,
138
+ )
139
+ logger.info(
140
+ f"Cleaned up external storage for {self.model_class}.{self.field_name}: "
141
+ f"{metadata.file_key}"
142
+ )
143
+ except Exception as e: # pylint: disable=broad-except
144
+ logger.warning(
145
+ f"Failed to cleanup external {self.field_name}: {str(e)}"
146
+ )
147
+
148
+ # Public helper
149
+
150
+ def cleanup(self, instance: Any) -> None: # noqa: D401
151
+ self._cleanup_external_data(instance)
@@ -0,0 +1,8 @@
1
+ from fides.api.models.manual_tasks.manual_task import ManualTask, ManualTaskReference
2
+ from fides.api.models.manual_tasks.manual_task_log import ManualTaskLog
3
+
4
+ __all__ = [
5
+ "ManualTask",
6
+ "ManualTaskReference",
7
+ "ManualTaskLog",
8
+ ]
@@ -0,0 +1,110 @@
1
+ from typing import Any
2
+
3
+ from sqlalchemy import Column, DateTime, ForeignKey, String
4
+ from sqlalchemy.ext.declarative import declared_attr
5
+ from sqlalchemy.orm import Session, relationship
6
+
7
+ from fides.api.db.base_class import Base
8
+ from fides.api.db.util import EnumColumn
9
+ from fides.api.models.manual_tasks.manual_task_log import ManualTaskLog
10
+ from fides.api.schemas.manual_tasks.manual_task_schemas import (
11
+ ManualTaskLogStatus,
12
+ ManualTaskParentEntityType,
13
+ ManualTaskReferenceType,
14
+ ManualTaskType,
15
+ )
16
+
17
+
18
+ class ManualTask(Base):
19
+ """Model for storing manual tasks.
20
+
21
+ This model can be used for both privacy request tasks and general tasks.
22
+ For privacy requests, it replaces the functionality of manual webhooks.
23
+ For other use cases, it provides a flexible task management system.
24
+
25
+ There can only be one ManualTask per parent entity.
26
+ You can create multiple Configs for the same ManualTask.
27
+ """
28
+
29
+ @declared_attr
30
+ def __tablename__(cls) -> str:
31
+ """Overriding base class method to set the table name."""
32
+ return "manual_task"
33
+
34
+ # Database columns
35
+ task_type = Column(
36
+ EnumColumn(ManualTaskType),
37
+ nullable=False,
38
+ default=ManualTaskType.privacy_request,
39
+ )
40
+ parent_entity_id = Column(String, nullable=False)
41
+ parent_entity_type = Column(
42
+ EnumColumn(ManualTaskParentEntityType),
43
+ nullable=False,
44
+ default=ManualTaskParentEntityType.connection_config,
45
+ )
46
+ due_date = Column(DateTime, nullable=True)
47
+
48
+ # Relationships
49
+ references = relationship(
50
+ "ManualTaskReference",
51
+ back_populates="task",
52
+ uselist=True,
53
+ cascade="all, delete-orphan",
54
+ )
55
+ logs = relationship(
56
+ "ManualTaskLog",
57
+ back_populates="task",
58
+ primaryjoin="and_(ManualTask.id == ManualTaskLog.task_id)",
59
+ viewonly=True,
60
+ order_by="ManualTaskLog.created_at",
61
+ )
62
+
63
+ # Properties
64
+ @property
65
+ def assigned_users(self) -> list[str]:
66
+ """Get all users assigned to this task."""
67
+ if not self.references:
68
+ return []
69
+ return [
70
+ ref.reference_id
71
+ for ref in self.references
72
+ if ref.reference_type == ManualTaskReferenceType.assigned_user
73
+ ]
74
+
75
+ # CRUD Operations
76
+ @classmethod
77
+ def create(
78
+ cls, db: Session, *, data: dict[str, Any], check_name: bool = True
79
+ ) -> "ManualTask":
80
+ """Create a new manual task."""
81
+ task = super().create(db=db, data=data, check_name=check_name)
82
+ ManualTaskLog.create_log(
83
+ db=db,
84
+ task_id=task.id,
85
+ status=ManualTaskLogStatus.created,
86
+ message=f"Created manual task for {data['task_type']}",
87
+ )
88
+ return task
89
+
90
+
91
+ class ManualTaskReference(Base):
92
+ """Join table to associate manual tasks with multiple references.
93
+
94
+ A single task may have many references including privacy requests, configurations, and assigned users.
95
+ """
96
+
97
+ @declared_attr
98
+ def __tablename__(cls) -> str:
99
+ """Overriding base class method to set the table name."""
100
+ return "manual_task_reference"
101
+
102
+ # Database columns
103
+ task_id = Column(
104
+ String, ForeignKey("manual_task.id", ondelete="CASCADE"), nullable=False
105
+ )
106
+ reference_id = Column(String, nullable=False)
107
+ reference_type = Column(EnumColumn(ManualTaskReferenceType), nullable=False)
108
+
109
+ # Relationships
110
+ task = relationship("ManualTask", back_populates="references")
@@ -0,0 +1,100 @@
1
+ from typing import TYPE_CHECKING, Any, Optional
2
+
3
+ from sqlalchemy import Column, ForeignKey, String
4
+ from sqlalchemy.dialects.postgresql import JSONB
5
+ from sqlalchemy.ext.declarative import declared_attr
6
+ from sqlalchemy.orm import Session, relationship
7
+
8
+ from fides.api.db.base_class import Base
9
+ from fides.api.schemas.manual_tasks.manual_task_schemas import ManualTaskLogStatus
10
+
11
+ if TYPE_CHECKING:
12
+ from fides.api.models.manual_tasks.manual_task import ManualTask
13
+
14
+
15
+ class ManualTaskLog(Base):
16
+ """Model for storing manual task execution logs."""
17
+
18
+ @declared_attr
19
+ def __tablename__(cls) -> str:
20
+ """Overriding base class method to set the table name."""
21
+ return "manual_task_log"
22
+
23
+ task_id = Column(
24
+ String, ForeignKey("manual_task.id", ondelete="CASCADE"), nullable=False
25
+ )
26
+ # TODO: Add foreign key constraints when config and instance are implemented
27
+ config_id = Column(String, nullable=True)
28
+ instance_id = Column(String, nullable=True)
29
+ status = Column(String, nullable=False)
30
+ message = Column(String, nullable=True)
31
+ details = Column(JSONB, nullable=True)
32
+
33
+ # Relationships - using string references to avoid circular imports
34
+ task = relationship("ManualTask", back_populates="logs", foreign_keys=[task_id])
35
+ # TODO: Add config and instance relationships when they are implemented
36
+ # config = relationship("ManualTaskConfig", back_populates="logs")
37
+ # instance = relationship("ManualTaskInstance", back_populates="logs")
38
+
39
+ @classmethod
40
+ def create_log(
41
+ cls,
42
+ db: Session,
43
+ status: ManualTaskLogStatus,
44
+ task_id: str,
45
+ config_id: Optional[str] = None,
46
+ instance_id: Optional[str] = None,
47
+ message: Optional[str] = None,
48
+ details: Optional[dict[str, Any]] = None,
49
+ ) -> "ManualTaskLog":
50
+ """Create a new task log entry.
51
+
52
+ Args:
53
+ db: Database session
54
+ task_id: ID of the task
55
+ status: Status of the log entry
56
+ message: Optional message describing the event
57
+ details: Optional additional details about the event
58
+ """
59
+ data = {
60
+ "task_id": task_id,
61
+ "config_id": config_id,
62
+ "instance_id": instance_id,
63
+ "status": status,
64
+ "message": message,
65
+ "details": details,
66
+ }
67
+ return cls.create(db=db, data=data)
68
+
69
+ @classmethod
70
+ def create_error_log(
71
+ cls,
72
+ db: Session,
73
+ task_id: str,
74
+ message: str,
75
+ config_id: Optional[str] = None,
76
+ instance_id: Optional[str] = None,
77
+ details: Optional[dict[str, Any]] = None,
78
+ ) -> "ManualTaskLog":
79
+ """Create a new error log entry.
80
+
81
+ Args:
82
+ db: Database session
83
+ task_id: ID of the task
84
+ message: Error message describing what went wrong
85
+ config_id: Optional ID of the configuration
86
+ instance_id: Optional ID of the instance
87
+ details: Optional additional details about the error
88
+
89
+ Returns:
90
+ The created error log entry
91
+ """
92
+ return cls.create_log(
93
+ db=db,
94
+ status=ManualTaskLogStatus.error,
95
+ task_id=task_id,
96
+ config_id=config_id,
97
+ instance_id=instance_id,
98
+ message=message,
99
+ details=details,
100
+ )
@@ -22,8 +22,8 @@ from fides.api.models.privacy_notice import (
22
22
  UserConsentPreference,
23
23
  )
24
24
  from fides.api.models.privacy_request import PrivacyRequest, ProvidedIdentity
25
+ from fides.api.models.worker_task import ExecutionLogStatus
25
26
  from fides.api.schemas.language import SupportedLanguage
26
- from fides.api.schemas.privacy_request import ExecutionLogStatus
27
27
  from fides.api.schemas.redis_cache import MultiValue
28
28
  from fides.config import CONFIG
29
29
 
@@ -4,15 +4,14 @@ from __future__ import annotations
4
4
 
5
5
  from typing import Optional
6
6
 
7
- from sqlalchemy import Column, DateTime, String
7
+ from sqlalchemy import Column, String
8
8
  from sqlalchemy.dialects.postgresql import JSONB
9
9
  from sqlalchemy.ext.mutable import MutableList
10
- from sqlalchemy.sql import text
11
10
 
12
11
  from fides.api.db.base_class import Base # type: ignore[attr-defined]
13
12
  from fides.api.db.util import EnumColumn
13
+ from fides.api.models.worker_task import ExecutionLogStatus, TaskExecutionLog
14
14
  from fides.api.schemas.policy import ActionType, CurrentStep
15
- from fides.api.schemas.privacy_request import ExecutionLogStatus
16
15
 
17
16
  # Locations from which privacy request execution can be resumed, in order.
18
17
  EXECUTION_CHECKPOINTS = [
@@ -53,7 +52,7 @@ def can_run_checkpoint(
53
52
  ) >= EXECUTION_CHECKPOINTS.index(from_checkpoint)
54
53
 
55
54
 
56
- class ExecutionLog(Base):
55
+ class ExecutionLog(TaskExecutionLog, Base):
57
56
  """
58
57
  Stores the individual execution logs associated with a PrivacyRequest.
59
58
 
@@ -68,41 +67,14 @@ class ExecutionLog(Base):
68
67
  collection_name = Column(String, index=True)
69
68
  # A JSON Array describing affected fields along with their data categories and paths
70
69
  fields_affected = Column(MutableList.as_mutable(JSONB), nullable=True)
71
- # Contains info, warning, or error messages
72
- message = Column(String)
73
70
  action_type = Column(
74
71
  EnumColumn(ActionType),
75
72
  index=True,
76
73
  nullable=False,
77
74
  )
78
- status = Column(
79
- EnumColumn(
80
- ExecutionLogStatus,
81
- native_enum=True,
82
- values_callable=lambda x: [
83
- i.value for i in x
84
- ], # Using ExecutionLogStatus values in database, even though app is using the names.
85
- ),
86
- index=True,
87
- nullable=False,
88
- )
89
75
 
90
76
  privacy_request_id = Column(
91
77
  String,
92
78
  nullable=False,
93
79
  index=True,
94
80
  )
95
-
96
- # Use clock_timestamp() instead of NOW() to get the actual current time at row creation,
97
- # regardless of transaction state. This prevents timestamp caching within transactions
98
- # and ensures more accurate creation times.
99
- # https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-CURRENT
100
-
101
- created_at = Column(
102
- DateTime(timezone=True), server_default=text("clock_timestamp()")
103
- )
104
- updated_at = Column(
105
- DateTime(timezone=True),
106
- server_default=text("clock_timestamp()"),
107
- onupdate=text("clock_timestamp()"),
108
- )
@@ -48,6 +48,7 @@ from fides.api.models.audit_log import AuditLog
48
48
  from fides.api.models.client import ClientDetail
49
49
  from fides.api.models.comment import Comment, CommentReference, CommentReferenceType
50
50
  from fides.api.models.fides_user import FidesUser
51
+ from fides.api.models.field_types import EncryptedLargeDataDescriptor
51
52
  from fides.api.models.manual_webhook import AccessManualWebhook
52
53
  from fides.api.models.policy import (
53
54
  Policy,
@@ -72,13 +73,13 @@ from fides.api.models.privacy_request.webhook import (
72
73
  generate_request_callback_pre_approval_jwe,
73
74
  generate_request_callback_resume_jwe,
74
75
  )
76
+ from fides.api.models.worker_task import ExecutionLogStatus
75
77
  from fides.api.schemas.drp_privacy_request import DrpPrivacyRequestCreate
76
78
  from fides.api.schemas.external_https import SecondPartyResponseFormat
77
79
  from fides.api.schemas.masking.masking_secrets import MaskingSecretCache
78
80
  from fides.api.schemas.policy import ActionType, CurrentStep
79
81
  from fides.api.schemas.privacy_request import (
80
82
  CheckpointActionRequired,
81
- ExecutionLogStatus,
82
83
  ManualAction,
83
84
  PrivacyRequestSource,
84
85
  PrivacyRequestStatus,
@@ -251,7 +252,8 @@ class PrivacyRequest(
251
252
  awaiting_email_send_at = Column(DateTime(timezone=True), nullable=True)
252
253
 
253
254
  # Encrypted filtered access results saved for later retrieval
254
- filtered_final_upload = Column( # An encrypted JSON String - Dict[Dict[str, List[Row]]] - rule keys mapped to the filtered access results
255
+ _filtered_final_upload = Column( # An encrypted JSON String - Dict[Dict[str, List[Row]]] - rule keys mapped to the filtered access results
256
+ "filtered_final_upload",
255
257
  StringEncryptedType(
256
258
  type_in=JSONTypeOverride,
257
259
  key=CONFIG.security.app_encryption_key,
@@ -260,6 +262,11 @@ class PrivacyRequest(
260
262
  ),
261
263
  )
262
264
 
265
+ # Use descriptor for automatic external storage handling
266
+ filtered_final_upload = EncryptedLargeDataDescriptor(
267
+ field_name="filtered_final_upload", empty_default={}
268
+ )
269
+
263
270
  # Encrypted filtered access results saved for later retrieval
264
271
  access_result_urls = Column( # An encrypted JSON String - Dict[Dict[str, List[Row]]] - rule keys mapped to the filtered access results
265
272
  StringEncryptedType(
@@ -334,6 +341,7 @@ class PrivacyRequest(
334
341
  deleting this object from the database
335
342
  """
336
343
  self.clear_cached_values()
344
+ self.cleanup_external_storage()
337
345
  Attachment.delete_attachments_for_reference_and_type(
338
346
  db, self.id, AttachmentReferenceType.privacy_request
339
347
  )
@@ -1257,6 +1265,11 @@ class PrivacyRequest(
1257
1265
  # DSR 2.0 does not cache the results so nothing to do here
1258
1266
  return {}
1259
1267
 
1268
+ def cleanup_external_storage(self) -> None:
1269
+ """Clean up all external storage files for this privacy request"""
1270
+ # Access the descriptor from the class to call cleanup
1271
+ PrivacyRequest.filtered_final_upload.cleanup(self)
1272
+
1260
1273
  def save_filtered_access_results(
1261
1274
  self, db: Session, results: Dict[str, Dict[str, List[Row]]]
1262
1275
  ) -> None:
@@ -1544,7 +1557,7 @@ def get_action_required_details(
1544
1557
 
1545
1558
 
1546
1559
  def _parse_cache_to_checkpoint_action_required(
1547
- cache: dict[str, Any]
1560
+ cache: dict[str, Any],
1548
1561
  ) -> CheckpointActionRequired:
1549
1562
  collection = (
1550
1563
  CollectionAddress(
@@ -14,20 +14,19 @@ from sqlalchemy_utils.types.encrypted.encrypted_type import (
14
14
  StringEncryptedType,
15
15
  )
16
16
 
17
- from fides.api.db.base_class import Base # type: ignore[attr-defined]
18
- from fides.api.db.base_class import JSONTypeOverride
19
- from fides.api.db.util import EnumColumn
17
+ from fides.api.db.base_class import Base, JSONTypeOverride # type: ignore[attr-defined]
20
18
  from fides.api.graph.config import (
21
19
  ROOT_COLLECTION_ADDRESS,
22
20
  TERMINATOR_ADDRESS,
23
21
  CollectionAddress,
24
22
  )
23
+ from fides.api.models.field_types import EncryptedLargeDataDescriptor
25
24
  from fides.api.models.privacy_request.execution_log import (
26
25
  COMPLETED_EXECUTION_LOG_STATUSES,
27
26
  )
27
+ from fides.api.models.worker_task import ExecutionLogStatus, WorkerTask
28
28
  from fides.api.schemas.base_class import FidesSchema
29
29
  from fides.api.schemas.policy import ActionType
30
- from fides.api.schemas.privacy_request import ExecutionLogStatus
31
30
  from fides.api.util.cache import (
32
31
  FidesopsRedis,
33
32
  celery_tasks_in_flight,
@@ -68,7 +67,8 @@ class TraversalDetails(FidesSchema):
68
67
  )
69
68
 
70
69
 
71
- class RequestTask(Base):
70
+ # TODO: At some point we will refactor this model to store all task types in a common table that links to tables with specific task attributes.
71
+ class RequestTask(WorkerTask, Base):
72
72
  """
73
73
  An individual Task for a Privacy Request.
74
74
 
@@ -91,21 +91,6 @@ class RequestTask(Base):
91
91
  ) # Of the format dataset_name:collection_name for convenience
92
92
  dataset_name = Column(String, nullable=False, index=True)
93
93
  collection_name = Column(String, nullable=False, index=True)
94
- action_type = Column(EnumColumn(ActionType), nullable=False, index=True)
95
-
96
- # Note that RequestTasks share statuses with ExecutionLogs. When a RequestTask changes state, an ExecutionLog
97
- # is also created with that state. These are tied tightly together in GraphTask.
98
- status = Column(
99
- EnumColumn(
100
- ExecutionLogStatus,
101
- native_enum=False,
102
- values_callable=lambda x: [
103
- i.value for i in x
104
- ], # Using ExecutionLogStatus values in database, even though app is using the names.
105
- ), # character varying in database
106
- index=True,
107
- nullable=False,
108
- )
109
94
 
110
95
  upstream_tasks = Column(
111
96
  MutableList.as_mutable(JSONB)
@@ -121,7 +106,8 @@ class RequestTask(Base):
121
106
  # Raw data retrieved from an access request is stored here. This contains all of the
122
107
  # intermediate data we retrieved, needed for downstream tasks, but hasn't been filtered
123
108
  # by data category for the end user.
124
- access_data = Column( # An encrypted JSON String - saved as a list of Rows
109
+ _access_data = Column( # An encrypted JSON String - saved as a list of Rows
110
+ "access_data",
125
111
  StringEncryptedType(
126
112
  type_in=JSONTypeOverride,
127
113
  key=CONFIG.security.app_encryption_key,
@@ -132,7 +118,8 @@ class RequestTask(Base):
132
118
 
133
119
  # This is the raw access data saved in erasure format (with placeholders preserved) to perform a masking request.
134
120
  # First saved on the access node, and then copied to the corresponding erasure node.
135
- data_for_erasures = Column( # An encrypted JSON String - saved as a list of rows
121
+ _data_for_erasures = Column( # An encrypted JSON String - saved as a list of rows
122
+ "data_for_erasures",
136
123
  StringEncryptedType(
137
124
  type_in=JSONTypeOverride,
138
125
  key=CONFIG.security.app_encryption_key,
@@ -141,6 +128,15 @@ class RequestTask(Base):
141
128
  ),
142
129
  )
143
130
 
131
+ # Use descriptors for automatic external storage handling
132
+ access_data = EncryptedLargeDataDescriptor(
133
+ field_name="access_data", empty_default=[]
134
+ )
135
+
136
+ data_for_erasures = EncryptedLargeDataDescriptor(
137
+ field_name="data_for_erasures", empty_default=[]
138
+ )
139
+
144
140
  # Written after an erasure is completed
145
141
  rows_masked = Column(Integer)
146
142
  # Written after a consent request is completed - not all consent
@@ -177,12 +173,22 @@ class RequestTask(Base):
177
173
  """Convenience helper for asserting whether the task is a terminator task"""
178
174
  return self.request_task_address == TERMINATOR_ADDRESS
179
175
 
176
+ @classmethod
177
+ def allowed_action_types(cls) -> List[str]:
178
+ return [e.value for e in ActionType]
179
+
180
180
  def get_cached_task_id(self) -> Optional[str]:
181
181
  """Gets the cached celery task ID for this request task."""
182
182
  cache: FidesopsRedis = get_cache()
183
183
  task_id = cache.get(get_async_task_tracking_cache_key(self.id))
184
184
  return task_id
185
185
 
186
+ def cleanup_external_storage(self) -> None:
187
+ """Clean up all external storage files for this request task"""
188
+ # Access the descriptor from the class to call cleanup
189
+ RequestTask.access_data.cleanup(self)
190
+ RequestTask.data_for_erasures.cleanup(self)
191
+
186
192
  def get_access_data(self) -> List[Row]:
187
193
  """Helper to retrieve access data or default to empty list"""
188
194
  return self.access_data or []
@@ -191,6 +197,11 @@ class RequestTask(Base):
191
197
  """Helper to retrieve erasure data needed to build masking requests or default to empty list"""
192
198
  return self.data_for_erasures or []
193
199
 
200
+ def delete(self, db: Session) -> None:
201
+ """Override delete to cleanup external storage first"""
202
+ self.cleanup_external_storage()
203
+ super().delete(db)
204
+
194
205
  def update_status(self, db: Session, status: ExecutionLogStatus) -> None:
195
206
  """Helper method to update a task's status"""
196
207
  self.status = status
@@ -236,7 +247,7 @@ class RequestTask(Base):
236
247
  if not tasks_complete and should_log:
237
248
  logger.debug(
238
249
  "Upstream tasks incomplete for {} task {}.",
239
- self.action_type.value,
250
+ self.action_type,
240
251
  self.collection_address,
241
252
  )
242
253
 
@@ -267,7 +278,7 @@ class RequestTask(Base):
267
278
  logger.debug(
268
279
  "Celery Task ID {} found for {} task {}.",
269
280
  celery_task_id,
270
- self.action_type.value,
281
+ self.action_type,
271
282
  self.collection_address,
272
283
  )
273
284
 
@@ -277,7 +288,7 @@ class RequestTask(Base):
277
288
  logger.debug(
278
289
  "Celery Task {} already processing for {} task {}.",
279
290
  celery_task_id,
280
- self.action_type.value,
291
+ self.action_type,
281
292
  self.collection_address,
282
293
  )
283
294