ethyca-fides 2.63.0rc3__py2.py3-none-any.whl → 2.63.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/METADATA +1 -1
  2. {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/RECORD +129 -110
  3. fides/_version.py +3 -3
  4. fides/api/alembic/migrations/versions/29e56fa1fdb3_add_monitor_tasks.py +147 -0
  5. fides/api/alembic/migrations/versions/5efcdf18438e_add_manual_task_tables.py +160 -0
  6. fides/api/api/v1/endpoints/privacy_request_endpoints.py +4 -4
  7. fides/api/db/base.py +7 -1
  8. fides/api/models/connectionconfig.py +1 -1
  9. fides/api/models/detection_discovery/__init__.py +35 -0
  10. fides/api/models/detection_discovery/monitor_task.py +162 -0
  11. fides/api/models/field_types/__init__.py +5 -0
  12. fides/api/models/field_types/encrypted_large_data.py +151 -0
  13. fides/api/models/manual_tasks/__init__.py +8 -0
  14. fides/api/models/manual_tasks/manual_task.py +110 -0
  15. fides/api/models/manual_tasks/manual_task_log.py +100 -0
  16. fides/api/models/privacy_preference.py +1 -1
  17. fides/api/models/privacy_request/execution_log.py +3 -31
  18. fides/api/models/privacy_request/privacy_request.py +16 -3
  19. fides/api/models/privacy_request/request_task.py +36 -25
  20. fides/api/models/worker_task.py +96 -0
  21. fides/api/schemas/external_storage.py +22 -0
  22. fides/api/schemas/manual_tasks/__init__.py +0 -0
  23. fides/api/schemas/manual_tasks/manual_task_schemas.py +79 -0
  24. fides/api/schemas/manual_tasks/manual_task_status.py +151 -0
  25. fides/api/schemas/privacy_request.py +1 -12
  26. fides/api/service/connectors/base_erasure_email_connector.py +1 -1
  27. fides/api/service/connectors/consent_email_connector.py +2 -1
  28. fides/api/service/connectors/dynamic_erasure_email_connector.py +2 -1
  29. fides/api/service/connectors/erasure_email_connector.py +1 -1
  30. fides/api/service/external_data_storage.py +371 -0
  31. fides/api/service/privacy_request/request_runner_service.py +5 -5
  32. fides/api/service/privacy_request/request_service.py +1 -1
  33. fides/api/task/create_request_tasks.py +1 -1
  34. fides/api/task/execute_request_tasks.py +9 -8
  35. fides/api/task/graph_task.py +22 -10
  36. fides/api/util/consent_util.py +1 -1
  37. fides/api/util/data_size.py +102 -0
  38. fides/api/util/encryption/aes_gcm_encryption_util.py +271 -0
  39. fides/service/manual_tasks/__init__.py +0 -0
  40. fides/service/manual_tasks/manual_task_service.py +150 -0
  41. fides/service/privacy_request/privacy_request_service.py +1 -1
  42. fides/ui-build/static/admin/404.html +1 -1
  43. fides/ui-build/static/admin/add-systems/manual.html +1 -1
  44. fides/ui-build/static/admin/add-systems/multiple.html +1 -1
  45. fides/ui-build/static/admin/add-systems.html +1 -1
  46. fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
  47. fides/ui-build/static/admin/consent/configure.html +1 -1
  48. fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
  49. fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
  50. fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
  51. fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
  52. fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
  53. fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
  54. fides/ui-build/static/admin/consent/properties.html +1 -1
  55. fides/ui-build/static/admin/consent/reporting.html +1 -1
  56. fides/ui-build/static/admin/consent.html +1 -1
  57. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
  58. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
  59. fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
  60. fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
  61. fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
  62. fides/ui-build/static/admin/data-catalog.html +1 -1
  63. fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
  64. fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
  65. fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
  66. fides/ui-build/static/admin/data-discovery/activity.html +1 -1
  67. fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
  68. fides/ui-build/static/admin/data-discovery/detection.html +1 -1
  69. fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
  70. fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
  71. fides/ui-build/static/admin/datamap.html +1 -1
  72. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
  73. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
  74. fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
  75. fides/ui-build/static/admin/dataset/new.html +1 -1
  76. fides/ui-build/static/admin/dataset.html +1 -1
  77. fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
  78. fides/ui-build/static/admin/datastore-connection/new.html +1 -1
  79. fides/ui-build/static/admin/datastore-connection.html +1 -1
  80. fides/ui-build/static/admin/index.html +1 -1
  81. fides/ui-build/static/admin/integrations/[id].html +1 -1
  82. fides/ui-build/static/admin/integrations.html +1 -1
  83. fides/ui-build/static/admin/login/[provider].html +1 -1
  84. fides/ui-build/static/admin/login.html +1 -1
  85. fides/ui-build/static/admin/messaging/[id].html +1 -1
  86. fides/ui-build/static/admin/messaging/add-template.html +1 -1
  87. fides/ui-build/static/admin/messaging.html +1 -1
  88. fides/ui-build/static/admin/poc/ant-components.html +1 -1
  89. fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
  90. fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
  91. fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
  92. fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
  93. fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
  94. fides/ui-build/static/admin/poc/forms.html +1 -1
  95. fides/ui-build/static/admin/poc/table-migration.html +1 -1
  96. fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
  97. fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
  98. fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
  99. fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
  100. fides/ui-build/static/admin/privacy-requests.html +1 -1
  101. fides/ui-build/static/admin/properties/[id].html +1 -1
  102. fides/ui-build/static/admin/properties/add-property.html +1 -1
  103. fides/ui-build/static/admin/properties.html +1 -1
  104. fides/ui-build/static/admin/reporting/datamap.html +1 -1
  105. fides/ui-build/static/admin/settings/about/alpha.html +1 -1
  106. fides/ui-build/static/admin/settings/about.html +1 -1
  107. fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
  108. fides/ui-build/static/admin/settings/consent.html +1 -1
  109. fides/ui-build/static/admin/settings/custom-fields.html +1 -1
  110. fides/ui-build/static/admin/settings/domain-records.html +1 -1
  111. fides/ui-build/static/admin/settings/domains.html +1 -1
  112. fides/ui-build/static/admin/settings/email-templates.html +1 -1
  113. fides/ui-build/static/admin/settings/locations.html +1 -1
  114. fides/ui-build/static/admin/settings/organization.html +1 -1
  115. fides/ui-build/static/admin/settings/regulations.html +1 -1
  116. fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
  117. fides/ui-build/static/admin/systems/configure/[id].html +1 -1
  118. fides/ui-build/static/admin/systems.html +1 -1
  119. fides/ui-build/static/admin/taxonomy.html +1 -1
  120. fides/ui-build/static/admin/user-management/new.html +1 -1
  121. fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
  122. fides/ui-build/static/admin/user-management.html +1 -1
  123. {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/WHEEL +0 -0
  124. {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/entry_points.txt +0 -0
  125. {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/licenses/LICENSE +0 -0
  126. {ethyca_fides-2.63.0rc3.dist-info → ethyca_fides-2.63.1.dist-info}/top_level.txt +0 -0
  127. /fides/api/models/{detection_discovery.py → detection_discovery/core.py} +0 -0
  128. /fides/ui-build/static/admin/_next/static/{XobHpfndIH7IpV30u2vGV → SZn_Fpr_qG1COMjkdloep}/_buildManifest.js +0 -0
  129. /fides/ui-build/static/admin/_next/static/{XobHpfndIH7IpV30u2vGV → SZn_Fpr_qG1COMjkdloep}/_ssgManifest.js +0 -0
@@ -0,0 +1,110 @@
1
+ from typing import Any
2
+
3
+ from sqlalchemy import Column, DateTime, ForeignKey, String
4
+ from sqlalchemy.ext.declarative import declared_attr
5
+ from sqlalchemy.orm import Session, relationship
6
+
7
+ from fides.api.db.base_class import Base
8
+ from fides.api.db.util import EnumColumn
9
+ from fides.api.models.manual_tasks.manual_task_log import ManualTaskLog
10
+ from fides.api.schemas.manual_tasks.manual_task_schemas import (
11
+ ManualTaskLogStatus,
12
+ ManualTaskParentEntityType,
13
+ ManualTaskReferenceType,
14
+ ManualTaskType,
15
+ )
16
+
17
+
18
+ class ManualTask(Base):
19
+ """Model for storing manual tasks.
20
+
21
+ This model can be used for both privacy request tasks and general tasks.
22
+ For privacy requests, it replaces the functionality of manual webhooks.
23
+ For other use cases, it provides a flexible task management system.
24
+
25
+ There can only be one ManualTask per parent entity.
26
+ You can create multiple Configs for the same ManualTask.
27
+ """
28
+
29
+ @declared_attr
30
+ def __tablename__(cls) -> str:
31
+ """Overriding base class method to set the table name."""
32
+ return "manual_task"
33
+
34
+ # Database columns
35
+ task_type = Column(
36
+ EnumColumn(ManualTaskType),
37
+ nullable=False,
38
+ default=ManualTaskType.privacy_request,
39
+ )
40
+ parent_entity_id = Column(String, nullable=False)
41
+ parent_entity_type = Column(
42
+ EnumColumn(ManualTaskParentEntityType),
43
+ nullable=False,
44
+ default=ManualTaskParentEntityType.connection_config,
45
+ )
46
+ due_date = Column(DateTime, nullable=True)
47
+
48
+ # Relationships
49
+ references = relationship(
50
+ "ManualTaskReference",
51
+ back_populates="task",
52
+ uselist=True,
53
+ cascade="all, delete-orphan",
54
+ )
55
+ logs = relationship(
56
+ "ManualTaskLog",
57
+ back_populates="task",
58
+ primaryjoin="and_(ManualTask.id == ManualTaskLog.task_id)",
59
+ viewonly=True,
60
+ order_by="ManualTaskLog.created_at",
61
+ )
62
+
63
+ # Properties
64
+ @property
65
+ def assigned_users(self) -> list[str]:
66
+ """Get all users assigned to this task."""
67
+ if not self.references:
68
+ return []
69
+ return [
70
+ ref.reference_id
71
+ for ref in self.references
72
+ if ref.reference_type == ManualTaskReferenceType.assigned_user
73
+ ]
74
+
75
+ # CRUD Operations
76
+ @classmethod
77
+ def create(
78
+ cls, db: Session, *, data: dict[str, Any], check_name: bool = True
79
+ ) -> "ManualTask":
80
+ """Create a new manual task."""
81
+ task = super().create(db=db, data=data, check_name=check_name)
82
+ ManualTaskLog.create_log(
83
+ db=db,
84
+ task_id=task.id,
85
+ status=ManualTaskLogStatus.created,
86
+ message=f"Created manual task for {data['task_type']}",
87
+ )
88
+ return task
89
+
90
+
91
+ class ManualTaskReference(Base):
92
+ """Join table to associate manual tasks with multiple references.
93
+
94
+ A single task may have many references including privacy requests, configurations, and assigned users.
95
+ """
96
+
97
+ @declared_attr
98
+ def __tablename__(cls) -> str:
99
+ """Overriding base class method to set the table name."""
100
+ return "manual_task_reference"
101
+
102
+ # Database columns
103
+ task_id = Column(
104
+ String, ForeignKey("manual_task.id", ondelete="CASCADE"), nullable=False
105
+ )
106
+ reference_id = Column(String, nullable=False)
107
+ reference_type = Column(EnumColumn(ManualTaskReferenceType), nullable=False)
108
+
109
+ # Relationships
110
+ task = relationship("ManualTask", back_populates="references")
@@ -0,0 +1,100 @@
1
+ from typing import TYPE_CHECKING, Any, Optional
2
+
3
+ from sqlalchemy import Column, ForeignKey, String
4
+ from sqlalchemy.dialects.postgresql import JSONB
5
+ from sqlalchemy.ext.declarative import declared_attr
6
+ from sqlalchemy.orm import Session, relationship
7
+
8
+ from fides.api.db.base_class import Base
9
+ from fides.api.schemas.manual_tasks.manual_task_schemas import ManualTaskLogStatus
10
+
11
+ if TYPE_CHECKING:
12
+ from fides.api.models.manual_tasks.manual_task import ManualTask
13
+
14
+
15
+ class ManualTaskLog(Base):
16
+ """Model for storing manual task execution logs."""
17
+
18
+ @declared_attr
19
+ def __tablename__(cls) -> str:
20
+ """Overriding base class method to set the table name."""
21
+ return "manual_task_log"
22
+
23
+ task_id = Column(
24
+ String, ForeignKey("manual_task.id", ondelete="CASCADE"), nullable=False
25
+ )
26
+ # TODO: Add foreign key constraints when config and instance are implemented
27
+ config_id = Column(String, nullable=True)
28
+ instance_id = Column(String, nullable=True)
29
+ status = Column(String, nullable=False)
30
+ message = Column(String, nullable=True)
31
+ details = Column(JSONB, nullable=True)
32
+
33
+ # Relationships - using string references to avoid circular imports
34
+ task = relationship("ManualTask", back_populates="logs", foreign_keys=[task_id])
35
+ # TODO: Add config and instance relationships when they are implemented
36
+ # config = relationship("ManualTaskConfig", back_populates="logs")
37
+ # instance = relationship("ManualTaskInstance", back_populates="logs")
38
+
39
+ @classmethod
40
+ def create_log(
41
+ cls,
42
+ db: Session,
43
+ status: ManualTaskLogStatus,
44
+ task_id: str,
45
+ config_id: Optional[str] = None,
46
+ instance_id: Optional[str] = None,
47
+ message: Optional[str] = None,
48
+ details: Optional[dict[str, Any]] = None,
49
+ ) -> "ManualTaskLog":
50
+ """Create a new task log entry.
51
+
52
+ Args:
53
+ db: Database session
54
+ task_id: ID of the task
55
+ status: Status of the log entry
56
+ message: Optional message describing the event
57
+ details: Optional additional details about the event
58
+ """
59
+ data = {
60
+ "task_id": task_id,
61
+ "config_id": config_id,
62
+ "instance_id": instance_id,
63
+ "status": status,
64
+ "message": message,
65
+ "details": details,
66
+ }
67
+ return cls.create(db=db, data=data)
68
+
69
+ @classmethod
70
+ def create_error_log(
71
+ cls,
72
+ db: Session,
73
+ task_id: str,
74
+ message: str,
75
+ config_id: Optional[str] = None,
76
+ instance_id: Optional[str] = None,
77
+ details: Optional[dict[str, Any]] = None,
78
+ ) -> "ManualTaskLog":
79
+ """Create a new error log entry.
80
+
81
+ Args:
82
+ db: Database session
83
+ task_id: ID of the task
84
+ message: Error message describing what went wrong
85
+ config_id: Optional ID of the configuration
86
+ instance_id: Optional ID of the instance
87
+ details: Optional additional details about the error
88
+
89
+ Returns:
90
+ The created error log entry
91
+ """
92
+ return cls.create_log(
93
+ db=db,
94
+ status=ManualTaskLogStatus.error,
95
+ task_id=task_id,
96
+ config_id=config_id,
97
+ instance_id=instance_id,
98
+ message=message,
99
+ details=details,
100
+ )
@@ -22,8 +22,8 @@ from fides.api.models.privacy_notice import (
22
22
  UserConsentPreference,
23
23
  )
24
24
  from fides.api.models.privacy_request import PrivacyRequest, ProvidedIdentity
25
+ from fides.api.models.worker_task import ExecutionLogStatus
25
26
  from fides.api.schemas.language import SupportedLanguage
26
- from fides.api.schemas.privacy_request import ExecutionLogStatus
27
27
  from fides.api.schemas.redis_cache import MultiValue
28
28
  from fides.config import CONFIG
29
29
 
@@ -4,15 +4,14 @@ from __future__ import annotations
4
4
 
5
5
  from typing import Optional
6
6
 
7
- from sqlalchemy import Column, DateTime, String
7
+ from sqlalchemy import Column, String
8
8
  from sqlalchemy.dialects.postgresql import JSONB
9
9
  from sqlalchemy.ext.mutable import MutableList
10
- from sqlalchemy.sql import text
11
10
 
12
11
  from fides.api.db.base_class import Base # type: ignore[attr-defined]
13
12
  from fides.api.db.util import EnumColumn
13
+ from fides.api.models.worker_task import ExecutionLogStatus, TaskExecutionLog
14
14
  from fides.api.schemas.policy import ActionType, CurrentStep
15
- from fides.api.schemas.privacy_request import ExecutionLogStatus
16
15
 
17
16
  # Locations from which privacy request execution can be resumed, in order.
18
17
  EXECUTION_CHECKPOINTS = [
@@ -53,7 +52,7 @@ def can_run_checkpoint(
53
52
  ) >= EXECUTION_CHECKPOINTS.index(from_checkpoint)
54
53
 
55
54
 
56
- class ExecutionLog(Base):
55
+ class ExecutionLog(TaskExecutionLog, Base):
57
56
  """
58
57
  Stores the individual execution logs associated with a PrivacyRequest.
59
58
 
@@ -68,41 +67,14 @@ class ExecutionLog(Base):
68
67
  collection_name = Column(String, index=True)
69
68
  # A JSON Array describing affected fields along with their data categories and paths
70
69
  fields_affected = Column(MutableList.as_mutable(JSONB), nullable=True)
71
- # Contains info, warning, or error messages
72
- message = Column(String)
73
70
  action_type = Column(
74
71
  EnumColumn(ActionType),
75
72
  index=True,
76
73
  nullable=False,
77
74
  )
78
- status = Column(
79
- EnumColumn(
80
- ExecutionLogStatus,
81
- native_enum=True,
82
- values_callable=lambda x: [
83
- i.value for i in x
84
- ], # Using ExecutionLogStatus values in database, even though app is using the names.
85
- ),
86
- index=True,
87
- nullable=False,
88
- )
89
75
 
90
76
  privacy_request_id = Column(
91
77
  String,
92
78
  nullable=False,
93
79
  index=True,
94
80
  )
95
-
96
- # Use clock_timestamp() instead of NOW() to get the actual current time at row creation,
97
- # regardless of transaction state. This prevents timestamp caching within transactions
98
- # and ensures more accurate creation times.
99
- # https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-CURRENT
100
-
101
- created_at = Column(
102
- DateTime(timezone=True), server_default=text("clock_timestamp()")
103
- )
104
- updated_at = Column(
105
- DateTime(timezone=True),
106
- server_default=text("clock_timestamp()"),
107
- onupdate=text("clock_timestamp()"),
108
- )
@@ -48,6 +48,7 @@ from fides.api.models.audit_log import AuditLog
48
48
  from fides.api.models.client import ClientDetail
49
49
  from fides.api.models.comment import Comment, CommentReference, CommentReferenceType
50
50
  from fides.api.models.fides_user import FidesUser
51
+ from fides.api.models.field_types import EncryptedLargeDataDescriptor
51
52
  from fides.api.models.manual_webhook import AccessManualWebhook
52
53
  from fides.api.models.policy import (
53
54
  Policy,
@@ -72,13 +73,13 @@ from fides.api.models.privacy_request.webhook import (
72
73
  generate_request_callback_pre_approval_jwe,
73
74
  generate_request_callback_resume_jwe,
74
75
  )
76
+ from fides.api.models.worker_task import ExecutionLogStatus
75
77
  from fides.api.schemas.drp_privacy_request import DrpPrivacyRequestCreate
76
78
  from fides.api.schemas.external_https import SecondPartyResponseFormat
77
79
  from fides.api.schemas.masking.masking_secrets import MaskingSecretCache
78
80
  from fides.api.schemas.policy import ActionType, CurrentStep
79
81
  from fides.api.schemas.privacy_request import (
80
82
  CheckpointActionRequired,
81
- ExecutionLogStatus,
82
83
  ManualAction,
83
84
  PrivacyRequestSource,
84
85
  PrivacyRequestStatus,
@@ -251,7 +252,8 @@ class PrivacyRequest(
251
252
  awaiting_email_send_at = Column(DateTime(timezone=True), nullable=True)
252
253
 
253
254
  # Encrypted filtered access results saved for later retrieval
254
- filtered_final_upload = Column( # An encrypted JSON String - Dict[Dict[str, List[Row]]] - rule keys mapped to the filtered access results
255
+ _filtered_final_upload = Column( # An encrypted JSON String - Dict[Dict[str, List[Row]]] - rule keys mapped to the filtered access results
256
+ "filtered_final_upload",
255
257
  StringEncryptedType(
256
258
  type_in=JSONTypeOverride,
257
259
  key=CONFIG.security.app_encryption_key,
@@ -260,6 +262,11 @@ class PrivacyRequest(
260
262
  ),
261
263
  )
262
264
 
265
+ # Use descriptor for automatic external storage handling
266
+ filtered_final_upload = EncryptedLargeDataDescriptor(
267
+ field_name="filtered_final_upload", empty_default={}
268
+ )
269
+
263
270
  # Encrypted filtered access results saved for later retrieval
264
271
  access_result_urls = Column( # An encrypted JSON String - Dict[Dict[str, List[Row]]] - rule keys mapped to the filtered access results
265
272
  StringEncryptedType(
@@ -334,6 +341,7 @@ class PrivacyRequest(
334
341
  deleting this object from the database
335
342
  """
336
343
  self.clear_cached_values()
344
+ self.cleanup_external_storage()
337
345
  Attachment.delete_attachments_for_reference_and_type(
338
346
  db, self.id, AttachmentReferenceType.privacy_request
339
347
  )
@@ -1257,6 +1265,11 @@ class PrivacyRequest(
1257
1265
  # DSR 2.0 does not cache the results so nothing to do here
1258
1266
  return {}
1259
1267
 
1268
+ def cleanup_external_storage(self) -> None:
1269
+ """Clean up all external storage files for this privacy request"""
1270
+ # Access the descriptor from the class to call cleanup
1271
+ PrivacyRequest.filtered_final_upload.cleanup(self)
1272
+
1260
1273
  def save_filtered_access_results(
1261
1274
  self, db: Session, results: Dict[str, Dict[str, List[Row]]]
1262
1275
  ) -> None:
@@ -1544,7 +1557,7 @@ def get_action_required_details(
1544
1557
 
1545
1558
 
1546
1559
  def _parse_cache_to_checkpoint_action_required(
1547
- cache: dict[str, Any]
1560
+ cache: dict[str, Any],
1548
1561
  ) -> CheckpointActionRequired:
1549
1562
  collection = (
1550
1563
  CollectionAddress(
@@ -14,20 +14,19 @@ from sqlalchemy_utils.types.encrypted.encrypted_type import (
14
14
  StringEncryptedType,
15
15
  )
16
16
 
17
- from fides.api.db.base_class import Base # type: ignore[attr-defined]
18
- from fides.api.db.base_class import JSONTypeOverride
19
- from fides.api.db.util import EnumColumn
17
+ from fides.api.db.base_class import Base, JSONTypeOverride # type: ignore[attr-defined]
20
18
  from fides.api.graph.config import (
21
19
  ROOT_COLLECTION_ADDRESS,
22
20
  TERMINATOR_ADDRESS,
23
21
  CollectionAddress,
24
22
  )
23
+ from fides.api.models.field_types import EncryptedLargeDataDescriptor
25
24
  from fides.api.models.privacy_request.execution_log import (
26
25
  COMPLETED_EXECUTION_LOG_STATUSES,
27
26
  )
27
+ from fides.api.models.worker_task import ExecutionLogStatus, WorkerTask
28
28
  from fides.api.schemas.base_class import FidesSchema
29
29
  from fides.api.schemas.policy import ActionType
30
- from fides.api.schemas.privacy_request import ExecutionLogStatus
31
30
  from fides.api.util.cache import (
32
31
  FidesopsRedis,
33
32
  celery_tasks_in_flight,
@@ -68,7 +67,8 @@ class TraversalDetails(FidesSchema):
68
67
  )
69
68
 
70
69
 
71
- class RequestTask(Base):
70
+ # TODO: At some point we will refactor this model to store all task types in a common table that links to tables with specific task attributes.
71
+ class RequestTask(WorkerTask, Base):
72
72
  """
73
73
  An individual Task for a Privacy Request.
74
74
 
@@ -91,21 +91,6 @@ class RequestTask(Base):
91
91
  ) # Of the format dataset_name:collection_name for convenience
92
92
  dataset_name = Column(String, nullable=False, index=True)
93
93
  collection_name = Column(String, nullable=False, index=True)
94
- action_type = Column(EnumColumn(ActionType), nullable=False, index=True)
95
-
96
- # Note that RequestTasks share statuses with ExecutionLogs. When a RequestTask changes state, an ExecutionLog
97
- # is also created with that state. These are tied tightly together in GraphTask.
98
- status = Column(
99
- EnumColumn(
100
- ExecutionLogStatus,
101
- native_enum=False,
102
- values_callable=lambda x: [
103
- i.value for i in x
104
- ], # Using ExecutionLogStatus values in database, even though app is using the names.
105
- ), # character varying in database
106
- index=True,
107
- nullable=False,
108
- )
109
94
 
110
95
  upstream_tasks = Column(
111
96
  MutableList.as_mutable(JSONB)
@@ -121,7 +106,8 @@ class RequestTask(Base):
121
106
  # Raw data retrieved from an access request is stored here. This contains all of the
122
107
  # intermediate data we retrieved, needed for downstream tasks, but hasn't been filtered
123
108
  # by data category for the end user.
124
- access_data = Column( # An encrypted JSON String - saved as a list of Rows
109
+ _access_data = Column( # An encrypted JSON String - saved as a list of Rows
110
+ "access_data",
125
111
  StringEncryptedType(
126
112
  type_in=JSONTypeOverride,
127
113
  key=CONFIG.security.app_encryption_key,
@@ -132,7 +118,8 @@ class RequestTask(Base):
132
118
 
133
119
  # This is the raw access data saved in erasure format (with placeholders preserved) to perform a masking request.
134
120
  # First saved on the access node, and then copied to the corresponding erasure node.
135
- data_for_erasures = Column( # An encrypted JSON String - saved as a list of rows
121
+ _data_for_erasures = Column( # An encrypted JSON String - saved as a list of rows
122
+ "data_for_erasures",
136
123
  StringEncryptedType(
137
124
  type_in=JSONTypeOverride,
138
125
  key=CONFIG.security.app_encryption_key,
@@ -141,6 +128,15 @@ class RequestTask(Base):
141
128
  ),
142
129
  )
143
130
 
131
+ # Use descriptors for automatic external storage handling
132
+ access_data = EncryptedLargeDataDescriptor(
133
+ field_name="access_data", empty_default=[]
134
+ )
135
+
136
+ data_for_erasures = EncryptedLargeDataDescriptor(
137
+ field_name="data_for_erasures", empty_default=[]
138
+ )
139
+
144
140
  # Written after an erasure is completed
145
141
  rows_masked = Column(Integer)
146
142
  # Written after a consent request is completed - not all consent
@@ -177,12 +173,22 @@ class RequestTask(Base):
177
173
  """Convenience helper for asserting whether the task is a terminator task"""
178
174
  return self.request_task_address == TERMINATOR_ADDRESS
179
175
 
176
+ @classmethod
177
+ def allowed_action_types(cls) -> List[str]:
178
+ return [e.value for e in ActionType]
179
+
180
180
  def get_cached_task_id(self) -> Optional[str]:
181
181
  """Gets the cached celery task ID for this request task."""
182
182
  cache: FidesopsRedis = get_cache()
183
183
  task_id = cache.get(get_async_task_tracking_cache_key(self.id))
184
184
  return task_id
185
185
 
186
+ def cleanup_external_storage(self) -> None:
187
+ """Clean up all external storage files for this request task"""
188
+ # Access the descriptor from the class to call cleanup
189
+ RequestTask.access_data.cleanup(self)
190
+ RequestTask.data_for_erasures.cleanup(self)
191
+
186
192
  def get_access_data(self) -> List[Row]:
187
193
  """Helper to retrieve access data or default to empty list"""
188
194
  return self.access_data or []
@@ -191,6 +197,11 @@ class RequestTask(Base):
191
197
  """Helper to retrieve erasure data needed to build masking requests or default to empty list"""
192
198
  return self.data_for_erasures or []
193
199
 
200
+ def delete(self, db: Session) -> None:
201
+ """Override delete to cleanup external storage first"""
202
+ self.cleanup_external_storage()
203
+ super().delete(db)
204
+
194
205
  def update_status(self, db: Session, status: ExecutionLogStatus) -> None:
195
206
  """Helper method to update a task's status"""
196
207
  self.status = status
@@ -236,7 +247,7 @@ class RequestTask(Base):
236
247
  if not tasks_complete and should_log:
237
248
  logger.debug(
238
249
  "Upstream tasks incomplete for {} task {}.",
239
- self.action_type.value,
250
+ self.action_type,
240
251
  self.collection_address,
241
252
  )
242
253
 
@@ -267,7 +278,7 @@ class RequestTask(Base):
267
278
  logger.debug(
268
279
  "Celery Task ID {} found for {} task {}.",
269
280
  celery_task_id,
270
- self.action_type.value,
281
+ self.action_type,
271
282
  self.collection_address,
272
283
  )
273
284
 
@@ -277,7 +288,7 @@ class RequestTask(Base):
277
288
  logger.debug(
278
289
  "Celery Task {} already processing for {} task {}.",
279
290
  celery_task_id,
280
- self.action_type.value,
291
+ self.action_type,
281
292
  self.collection_address,
282
293
  )
283
294
 
@@ -0,0 +1,96 @@
1
+ import enum
2
+ from typing import Any, List
3
+
4
+ from sqlalchemy import Column, DateTime, String
5
+ from sqlalchemy.sql import text
6
+
7
+ from fides.api.db.util import EnumColumn
8
+
9
+
10
+ class ExecutionLogStatus(enum.Enum):
11
+ """Enum for task execution log statuses, reflecting where they are in their workflow"""
12
+
13
+ in_processing = "in_processing"
14
+ pending = "pending"
15
+ complete = "complete"
16
+ error = "error"
17
+ awaiting_processing = "paused" # "paused" in the database to avoid a migration, but use "awaiting_processing" in the app
18
+ retrying = "retrying"
19
+ skipped = "skipped"
20
+
21
+
22
+ class WorkerTask:
23
+ """
24
+ A task for a worker to execute.
25
+ """
26
+
27
+ # Field called action_type to avoid migrations in RequestTask when creating this model
28
+ action_type = Column(String, nullable=False, index=True)
29
+ # Note that WorkerTask share statuses with ExecutionLogs. When a WorkerTask changes state, an ExecutionLog
30
+ # is also created with that state. These are tied tightly together in GraphTask.
31
+ status = Column(
32
+ EnumColumn(
33
+ ExecutionLogStatus,
34
+ native_enum=False,
35
+ values_callable=lambda x: [
36
+ i.value for i in x
37
+ ], # Using ExecutionLogStatus values in database, even though app is using the names.
38
+ ), # character varying in database
39
+ index=True,
40
+ nullable=False,
41
+ )
42
+
43
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
44
+ action_type = kwargs.get("action_type")
45
+ if action_type is not None:
46
+ self.validate_action_type(action_type)
47
+ super().__init__(*args, **kwargs)
48
+
49
+ @classmethod
50
+ def validate_action_type(cls, action_type: str) -> None:
51
+ """
52
+ Validates that the action type is allowed for the worker task.
53
+ """
54
+ if action_type not in cls.allowed_action_types():
55
+ raise ValueError(f"Invalid action_type '{action_type}' for {cls.__name__}")
56
+
57
+ @classmethod
58
+ def allowed_action_types(cls) -> List[str]:
59
+ """
60
+ Subclasses must implement this method to return a list of allowed action types.
61
+ """
62
+ raise NotImplementedError("Subclasses must implement allowed_action_types")
63
+
64
+
65
+ class TaskExecutionLog:
66
+ """
67
+ Stores the individual execution logs associated with a WorkerTask.
68
+ """
69
+
70
+ status = Column(
71
+ EnumColumn(
72
+ ExecutionLogStatus,
73
+ native_enum=True,
74
+ values_callable=lambda x: [
75
+ i.value for i in x
76
+ ], # Using ExecutionLogStatus values in database, even though app is using the names.
77
+ ),
78
+ index=True,
79
+ nullable=False,
80
+ )
81
+ # Contains info, warning, or error messages
82
+ message = Column(String)
83
+
84
+ # Use clock_timestamp() instead of NOW() to get the actual current time at row creation,
85
+ # regardless of transaction state. This prevents timestamp caching within transactions
86
+ # and ensures more accurate creation times.
87
+ # https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-CURRENT
88
+
89
+ created_at = Column(
90
+ DateTime(timezone=True), server_default=text("clock_timestamp()")
91
+ )
92
+ updated_at = Column(
93
+ DateTime(timezone=True),
94
+ server_default=text("clock_timestamp()"),
95
+ onupdate=text("clock_timestamp()"),
96
+ )
@@ -0,0 +1,22 @@
1
+ """Schema for external storage metadata."""
2
+
3
+ from typing import Optional
4
+
5
+ from pydantic import Field
6
+
7
+ from fides.api.schemas.base_class import FidesSchema
8
+ from fides.api.schemas.storage.storage import StorageType
9
+
10
+
11
+ class ExternalStorageMetadata(FidesSchema):
12
+ """Metadata for externally stored encrypted data."""
13
+
14
+ storage_type: StorageType
15
+ file_key: str = Field(description="Path/key of the file in external storage")
16
+ filesize: int = Field(description="Size of the stored file in bytes", ge=0)
17
+ storage_key: Optional[str] = Field(
18
+ default=None, description="Storage configuration key used"
19
+ )
20
+
21
+ class Config:
22
+ use_enum_values = True
File without changes