ethyca-fides 2.63.1b3__py2.py3-none-any.whl → 2.63.1b4__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1b4.dist-info}/METADATA +1 -1
  2. {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1b4.dist-info}/RECORD +127 -117
  3. fides/_version.py +3 -3
  4. fides/api/alembic/migrations/versions/29e56fa1fdb3_add_monitor_tasks.py +147 -0
  5. fides/api/api/v1/endpoints/privacy_request_endpoints.py +4 -4
  6. fides/api/db/base.py +5 -1
  7. fides/api/models/connectionconfig.py +1 -1
  8. fides/api/models/detection_discovery/__init__.py +35 -0
  9. fides/api/models/detection_discovery/monitor_task.py +161 -0
  10. fides/api/models/field_types/__init__.py +5 -0
  11. fides/api/models/field_types/encrypted_large_data.py +151 -0
  12. fides/api/models/privacy_preference.py +1 -1
  13. fides/api/models/privacy_request/execution_log.py +3 -31
  14. fides/api/models/privacy_request/privacy_request.py +16 -3
  15. fides/api/models/privacy_request/request_task.py +36 -25
  16. fides/api/models/worker_task.py +96 -0
  17. fides/api/schemas/external_storage.py +22 -0
  18. fides/api/schemas/privacy_request.py +1 -12
  19. fides/api/service/connectors/base_erasure_email_connector.py +1 -1
  20. fides/api/service/connectors/consent_email_connector.py +2 -1
  21. fides/api/service/connectors/dynamic_erasure_email_connector.py +2 -1
  22. fides/api/service/connectors/erasure_email_connector.py +1 -1
  23. fides/api/service/external_data_storage.py +371 -0
  24. fides/api/service/privacy_request/request_runner_service.py +5 -5
  25. fides/api/service/privacy_request/request_service.py +1 -1
  26. fides/api/task/create_request_tasks.py +1 -1
  27. fides/api/task/execute_request_tasks.py +9 -8
  28. fides/api/task/graph_task.py +22 -10
  29. fides/api/util/consent_util.py +1 -1
  30. fides/api/util/data_size.py +102 -0
  31. fides/api/util/encryption/aes_gcm_encryption_util.py +271 -0
  32. fides/service/privacy_request/privacy_request_service.py +1 -1
  33. fides/ui-build/static/admin/404.html +1 -1
  34. fides/ui-build/static/admin/_next/static/{ycPcko8qnif6BlkQ6MN4D → X2nvWLg2_-vsCTkhSWpzw}/_buildManifest.js +1 -1
  35. fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/messaging-c583a61302f02add.js +1 -0
  36. fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/storage-20d20a8d1736f7c4.js +1 -0
  37. fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests-0e557d79e1e43c2b.js +1 -0
  38. fides/ui-build/static/admin/add-systems/manual.html +1 -1
  39. fides/ui-build/static/admin/add-systems/multiple.html +1 -1
  40. fides/ui-build/static/admin/add-systems.html +1 -1
  41. fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
  42. fides/ui-build/static/admin/consent/configure.html +1 -1
  43. fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
  44. fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
  45. fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
  46. fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
  47. fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
  48. fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
  49. fides/ui-build/static/admin/consent/properties.html +1 -1
  50. fides/ui-build/static/admin/consent/reporting.html +1 -1
  51. fides/ui-build/static/admin/consent.html +1 -1
  52. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
  53. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
  54. fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
  55. fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
  56. fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
  57. fides/ui-build/static/admin/data-catalog.html +1 -1
  58. fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
  59. fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
  60. fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
  61. fides/ui-build/static/admin/data-discovery/activity.html +1 -1
  62. fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
  63. fides/ui-build/static/admin/data-discovery/detection.html +1 -1
  64. fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
  65. fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
  66. fides/ui-build/static/admin/datamap.html +1 -1
  67. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
  68. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
  69. fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
  70. fides/ui-build/static/admin/dataset/new.html +1 -1
  71. fides/ui-build/static/admin/dataset.html +1 -1
  72. fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
  73. fides/ui-build/static/admin/datastore-connection/new.html +1 -1
  74. fides/ui-build/static/admin/datastore-connection.html +1 -1
  75. fides/ui-build/static/admin/index.html +1 -1
  76. fides/ui-build/static/admin/integrations/[id].html +1 -1
  77. fides/ui-build/static/admin/integrations.html +1 -1
  78. fides/ui-build/static/admin/lib/fides-headless.js +1 -1
  79. fides/ui-build/static/admin/lib/fides-preview.js +1 -1
  80. fides/ui-build/static/admin/lib/fides-tcf.js +2 -2
  81. fides/ui-build/static/admin/lib/fides.js +2 -2
  82. fides/ui-build/static/admin/login/[provider].html +1 -1
  83. fides/ui-build/static/admin/login.html +1 -1
  84. fides/ui-build/static/admin/messaging/[id].html +1 -1
  85. fides/ui-build/static/admin/messaging/add-template.html +1 -1
  86. fides/ui-build/static/admin/messaging.html +1 -1
  87. fides/ui-build/static/admin/poc/ant-components.html +1 -1
  88. fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
  89. fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
  90. fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
  91. fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
  92. fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
  93. fides/ui-build/static/admin/poc/forms.html +1 -1
  94. fides/ui-build/static/admin/poc/table-migration.html +1 -1
  95. fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
  96. fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
  97. fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
  98. fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
  99. fides/ui-build/static/admin/privacy-requests.html +1 -1
  100. fides/ui-build/static/admin/properties/[id].html +1 -1
  101. fides/ui-build/static/admin/properties/add-property.html +1 -1
  102. fides/ui-build/static/admin/properties.html +1 -1
  103. fides/ui-build/static/admin/reporting/datamap.html +1 -1
  104. fides/ui-build/static/admin/settings/about/alpha.html +1 -1
  105. fides/ui-build/static/admin/settings/about.html +1 -1
  106. fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
  107. fides/ui-build/static/admin/settings/consent.html +1 -1
  108. fides/ui-build/static/admin/settings/custom-fields.html +1 -1
  109. fides/ui-build/static/admin/settings/domain-records.html +1 -1
  110. fides/ui-build/static/admin/settings/domains.html +1 -1
  111. fides/ui-build/static/admin/settings/email-templates.html +1 -1
  112. fides/ui-build/static/admin/settings/locations.html +1 -1
  113. fides/ui-build/static/admin/settings/organization.html +1 -1
  114. fides/ui-build/static/admin/settings/regulations.html +1 -1
  115. fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
  116. fides/ui-build/static/admin/systems/configure/[id].html +1 -1
  117. fides/ui-build/static/admin/systems.html +1 -1
  118. fides/ui-build/static/admin/taxonomy.html +1 -1
  119. fides/ui-build/static/admin/user-management/new.html +1 -1
  120. fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
  121. fides/ui-build/static/admin/user-management.html +1 -1
  122. fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/messaging-8cab04871908cfeb.js +0 -1
  123. fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/storage-150d40428245ee0c.js +0 -1
  124. fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests-20cdb2c8a03deae1.js +0 -1
  125. {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1b4.dist-info}/WHEEL +0 -0
  126. {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1b4.dist-info}/entry_points.txt +0 -0
  127. {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1b4.dist-info}/licenses/LICENSE +0 -0
  128. {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1b4.dist-info}/top_level.txt +0 -0
  129. /fides/api/models/{detection_discovery.py → detection_discovery/core.py} +0 -0
  130. /fides/ui-build/static/admin/_next/static/{ycPcko8qnif6BlkQ6MN4D → X2nvWLg2_-vsCTkhSWpzw}/_ssgManifest.js +0 -0
@@ -75,6 +75,7 @@ from fides.api.models.privacy_request import (
75
75
  ProvidedIdentity,
76
76
  RequestTask,
77
77
  )
78
+ from fides.api.models.worker_task import ExecutionLogStatus
78
79
  from fides.api.oauth.utils import (
79
80
  verify_callback_oauth_policy_pre_webhook,
80
81
  verify_callback_oauth_pre_approval_webhook,
@@ -91,7 +92,6 @@ from fides.api.schemas.privacy_request import (
91
92
  CheckpointActionRequired,
92
93
  DenyPrivacyRequests,
93
94
  ExecutionLogDetailResponse,
94
- ExecutionLogStatus,
95
95
  FilteredPrivacyRequestResults,
96
96
  LogEntry,
97
97
  ManualWebhookData,
@@ -1940,16 +1940,16 @@ def request_task_async_callback(
1940
1940
  ]:
1941
1941
  raise HTTPException(
1942
1942
  status_code=HTTP_400_BAD_REQUEST,
1943
- detail=f"Callback failed. Cannot queue {request_task.action_type.value} task '{request_task.id}' with privacy request status '{privacy_request.status.value}'",
1943
+ detail=f"Callback failed. Cannot queue {request_task.action_type} task '{request_task.id}' with privacy request status '{privacy_request.status.value}'",
1944
1944
  )
1945
1945
  if request_task.status != ExecutionLogStatus.awaiting_processing:
1946
1946
  raise HTTPException(
1947
1947
  status_code=HTTP_400_BAD_REQUEST,
1948
- detail=f"Callback failed. Cannot queue {request_task.action_type.value} task '{request_task.id}' with request task status '{request_task.status.value}'",
1948
+ detail=f"Callback failed. Cannot queue {request_task.action_type} task '{request_task.id}' with request task status '{request_task.status.value}'",
1949
1949
  )
1950
1950
  logger.info(
1951
1951
  "Callback received for {} task {} {}",
1952
- request_task.action_type.value,
1952
+ request_task.action_type,
1953
1953
  request_task.collection_address,
1954
1954
  request_task.id,
1955
1955
  )
fides/api/db/base.py CHANGED
@@ -16,7 +16,11 @@ from fides.api.models.custom_connector_template import CustomConnectorTemplate
16
16
  from fides.api.models.custom_report import CustomReport
17
17
  from fides.api.models.datasetconfig import DatasetConfig
18
18
  from fides.api.models.db_cache import DBCache
19
- from fides.api.models.detection_discovery import MonitorConfig, StagedResource
19
+ from fides.api.models.detection_discovery.core import MonitorConfig, StagedResource
20
+ from fides.api.models.detection_discovery.monitor_task import (
21
+ MonitorTask,
22
+ MonitorTaskExecutionLog,
23
+ )
20
24
  from fides.api.models.experience_notices import ExperienceNotices
21
25
  from fides.api.models.fides_cloud import FidesCloud
22
26
  from fides.api.models.fides_user import FidesUser
@@ -23,7 +23,7 @@ from fides.api.schemas.saas.saas_config import SaaSConfig
23
23
  from fides.config import CONFIG
24
24
 
25
25
  if TYPE_CHECKING:
26
- from fides.api.models.detection_discovery import MonitorConfig
26
+ from fides.api.models.detection_discovery.core import MonitorConfig
27
27
  from fides.api.schemas.connection_configuration.enums.system_type import SystemType
28
28
 
29
29
 
@@ -0,0 +1,35 @@
1
+ from .core import (
2
+ DiffStatus,
3
+ MonitorConfig,
4
+ MonitorExecution,
5
+ MonitorFrequency,
6
+ SharedMonitorConfig,
7
+ StagedResource,
8
+ StagedResourceAncestor,
9
+ fetch_staged_resources_by_type_query,
10
+ )
11
+ from .monitor_task import (
12
+ MonitorTask,
13
+ MonitorTaskExecutionLog,
14
+ MonitorTaskType,
15
+ TaskRunType,
16
+ create_monitor_task_with_execution_log,
17
+ update_monitor_task_with_execution_log,
18
+ )
19
+
20
+ __all__ = [
21
+ "DiffStatus",
22
+ "MonitorConfig",
23
+ "MonitorExecution",
24
+ "MonitorFrequency",
25
+ "SharedMonitorConfig",
26
+ "StagedResource",
27
+ "StagedResourceAncestor",
28
+ "fetch_staged_resources_by_type_query",
29
+ "MonitorTask",
30
+ "MonitorTaskExecutionLog",
31
+ "MonitorTaskType",
32
+ "TaskRunType",
33
+ "create_monitor_task_with_execution_log",
34
+ "update_monitor_task_with_execution_log",
35
+ ]
@@ -0,0 +1,161 @@
1
+ from __future__ import annotations
2
+
3
+ from enum import Enum
4
+ from typing import List, Optional
5
+
6
+ from sqlalchemy import ARRAY, Column
7
+ from sqlalchemy import Enum as SQLAlchemyEnum
8
+ from sqlalchemy import ForeignKey, String
9
+ from sqlalchemy.dialects.postgresql import JSONB
10
+ from sqlalchemy.orm import Session, relationship
11
+
12
+ from fides.api.db.base_class import Base, FidesBase # type: ignore[attr-defined]
13
+ from fides.api.models.detection_discovery.core import MonitorConfig
14
+ from fides.api.models.worker_task import (
15
+ ExecutionLogStatus,
16
+ TaskExecutionLog,
17
+ WorkerTask,
18
+ )
19
+
20
+
21
+ class MonitorTaskType(Enum):
22
+ """
23
+ Types of tasks that can be executed by a worker.
24
+ """
25
+
26
+ DETECTION = "detection"
27
+ CLASSIFICATION = "classification"
28
+ PROMOTION = "promotion"
29
+
30
+
31
+ class MonitorTask(WorkerTask, Base):
32
+ """
33
+ A monitor task executed by a worker.
34
+ """
35
+
36
+ # celery_id is used to track task executions. While MonitorTask.id remains constant,
37
+ # celery_id changes with each execution or retry of the task, allowing us to track
38
+ # the current execution state while maintaining a stable reference to the original task.
39
+ celery_id = Column(
40
+ String(255), unique=True, nullable=False, default=FidesBase.generate_uuid
41
+ )
42
+ task_arguments = Column(JSONB, nullable=True) # To be able to rerun the task
43
+ # Contains info, warning, or error messages
44
+ message = Column(String)
45
+ monitor_config_id = Column(
46
+ String,
47
+ ForeignKey(MonitorConfig.id_field_path, ondelete="CASCADE"),
48
+ index=True,
49
+ nullable=False,
50
+ )
51
+ staged_resource_urns = Column(ARRAY(String), nullable=True)
52
+ child_resource_urns = Column(ARRAY(String), nullable=True)
53
+
54
+ monitor_config = relationship(MonitorConfig, cascade="all, delete")
55
+ execution_logs = relationship(
56
+ "MonitorTaskExecutionLog", back_populates="monitor_task", cascade="all, delete"
57
+ )
58
+
59
+ @classmethod
60
+ def allowed_action_types(cls) -> List[str]:
61
+ return [e.value for e in MonitorTaskType]
62
+
63
+
64
+ class TaskRunType(Enum):
65
+ """
66
+ Type of task run.
67
+ """
68
+
69
+ MANUAL = "manual"
70
+ SYSTEM = "system"
71
+
72
+
73
+ class MonitorTaskExecutionLog(TaskExecutionLog, Base):
74
+ """
75
+ Stores the individual execution logs associated with a MonitorTask.
76
+ """
77
+
78
+ # This celery_id preserves the specific execution ID for historical tracking,
79
+ # unlike MonitorTask.celery_id which is updated with each execution.
80
+ # This allows us to maintain a complete history of all task execution attempts.
81
+ celery_id = Column(String(255), nullable=False)
82
+ monitor_task_id = Column(
83
+ String,
84
+ ForeignKey(MonitorTask.id_field_path, ondelete="CASCADE"),
85
+ index=True,
86
+ nullable=False,
87
+ )
88
+ run_type = Column(
89
+ SQLAlchemyEnum(TaskRunType), nullable=False, default=TaskRunType.SYSTEM
90
+ )
91
+
92
+ monitor_task = relationship("MonitorTask", back_populates="execution_logs")
93
+
94
+
95
+ def create_monitor_task_with_execution_log(
96
+ db: Session, monitor_task_data: dict
97
+ ) -> MonitorTask:
98
+ """
99
+ Creates a monitor task with an execution log.
100
+ The default status is pending for the task and pending for the execution log.
101
+ """
102
+ status = ExecutionLogStatus.pending
103
+ task_record = MonitorTask( # type: ignore
104
+ status=status.value,
105
+ **monitor_task_data,
106
+ )
107
+ db.add(task_record)
108
+ db.flush()
109
+
110
+ execution_log = MonitorTaskExecutionLog( # type: ignore
111
+ monitor_task=task_record, celery_id=task_record.celery_id, status=status
112
+ )
113
+ db.add(execution_log)
114
+
115
+ db.commit()
116
+ db.refresh(task_record)
117
+ return task_record
118
+
119
+
120
+ def update_monitor_task_with_execution_log(
121
+ db: Session,
122
+ status: ExecutionLogStatus,
123
+ task_record: Optional[MonitorTask] = None,
124
+ celery_id: Optional[str] = None,
125
+ message: Optional[str] = None,
126
+ run_type: TaskRunType = TaskRunType.SYSTEM,
127
+ ) -> MonitorTask:
128
+ """
129
+ Updates a monitor task with an execution log.
130
+
131
+ It must be either celery_id or task_record. If it doesn't receive a celery_id, it's assumed a new one needs to be created because a new run is about to be performed.
132
+ If it receives a celery_id, it means it only needs to update the status of an existing run. It can receive task_record to avoid querying the database again to get it.
133
+ """
134
+ if not celery_id and not task_record:
135
+ raise ValueError("Either celery_id or task_record must be provided")
136
+
137
+ if celery_id and not task_record:
138
+ task_record = MonitorTask.get_by(db=db, field="celery_id", value=celery_id)
139
+ if not task_record:
140
+ raise ValueError(f"Could not find MonitorTask with celery_id {celery_id}")
141
+
142
+ assert task_record is not None # help type checker understand the control flow
143
+
144
+ if not celery_id:
145
+ celery_id = task_record.generate_uuid()
146
+ task_record.celery_id = celery_id
147
+
148
+ task_record.status = status.value # type: ignore
149
+ task_record.message = message
150
+
151
+ MonitorTaskExecutionLog( # type: ignore
152
+ monitor_task=task_record,
153
+ status=status,
154
+ message=message,
155
+ celery_id=celery_id,
156
+ run_type=run_type,
157
+ )
158
+
159
+ db.commit()
160
+ db.refresh(task_record)
161
+ return task_record
@@ -0,0 +1,5 @@
1
+ from .encrypted_large_data import EncryptedLargeDataDescriptor
2
+
3
+ __all__ = [
4
+ "EncryptedLargeDataDescriptor",
5
+ ]
@@ -0,0 +1,151 @@
1
+ from datetime import datetime
2
+ from typing import Any, Optional, Type
3
+
4
+ from loguru import logger
5
+
6
+ from fides.api.api.deps import get_autoclose_db_session
7
+ from fides.api.schemas.external_storage import ExternalStorageMetadata
8
+ from fides.api.service.external_data_storage import (
9
+ ExternalDataStorageError,
10
+ ExternalDataStorageService,
11
+ )
12
+ from fides.api.util.data_size import LARGE_DATA_THRESHOLD_BYTES, calculate_data_size
13
+
14
+
15
+ class EncryptedLargeDataDescriptor:
16
+ """
17
+ A Python descriptor for database fields with encrypted external storage fallback.
18
+
19
+ See the original implementation for detailed docstrings.
20
+ """
21
+
22
+ def __init__(
23
+ self,
24
+ field_name: str,
25
+ empty_default: Optional[Any] = None,
26
+ threshold_bytes: Optional[int] = None,
27
+ ):
28
+ self.field_name = field_name
29
+ self.private_field = f"_{field_name}"
30
+ self.empty_default = empty_default if empty_default is not None else []
31
+ self.threshold_bytes = threshold_bytes or LARGE_DATA_THRESHOLD_BYTES
32
+ self.model_class: Optional[str] = None
33
+ self.name: Optional[str] = None
34
+
35
+ # Descriptor protocol helpers
36
+
37
+ def __set_name__(
38
+ self, owner: Type, name: str
39
+ ) -> None: # noqa: D401 (docstring in orig file)
40
+ self.name = name
41
+ self.model_class = owner.__name__
42
+
43
+ def _generate_storage_path(self, instance: Any) -> str:
44
+ instance_id = getattr(instance, "id", None)
45
+ if not instance_id:
46
+ raise ValueError(f"Instance {instance} must have an 'id' attribute")
47
+ timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S-%f")
48
+ return f"{self.model_class}/{instance_id}/{self.field_name}/{timestamp}.txt"
49
+
50
+ def __get__(self, instance: Any, owner: Type) -> Any: # noqa: D401
51
+ if instance is None:
52
+ return self
53
+ raw_data = getattr(instance, self.private_field)
54
+ if raw_data is None:
55
+ return None
56
+ if isinstance(raw_data, dict) and "storage_type" in raw_data:
57
+ logger.info(
58
+ f"Reading {self.model_class}.{self.field_name} from external storage "
59
+ f"({raw_data.get('storage_type')})"
60
+ )
61
+ try:
62
+ metadata = ExternalStorageMetadata.model_validate(raw_data)
63
+ data = self._retrieve_external_data(metadata)
64
+ record_count = len(data) if isinstance(data, list) else "N/A"
65
+ logger.info(
66
+ f"Successfully retrieved {self.model_class}.{self.field_name} "
67
+ f"from external storage (records: {record_count})"
68
+ )
69
+ return data if data is not None else self.empty_default
70
+ except Exception as e: # pylint: disable=broad-except
71
+ logger.error(
72
+ f"Failed to retrieve {self.model_class}.{self.field_name} "
73
+ f"from external storage: {str(e)}"
74
+ )
75
+ raise ExternalDataStorageError(
76
+ f"Failed to retrieve {self.field_name}: {str(e)}"
77
+ ) from e
78
+ else:
79
+ return raw_data
80
+
81
+ def __set__(self, instance: Any, value: Any) -> None: # noqa: D401
82
+ if not value:
83
+ self._cleanup_external_data(instance)
84
+ setattr(instance, self.private_field, self.empty_default)
85
+ return
86
+ try:
87
+ current_data = self.__get__(instance, type(instance))
88
+ if current_data == value:
89
+ return
90
+ except Exception: # pylint: disable=broad-except
91
+ pass
92
+
93
+ data_size = calculate_data_size(value)
94
+ if data_size > self.threshold_bytes:
95
+ logger.info(
96
+ f"{self.model_class}.{self.field_name}: Data size ({data_size:,} bytes) "
97
+ f"exceeds threshold ({self.threshold_bytes:,} bytes), storing externally"
98
+ )
99
+ self._cleanup_external_data(instance)
100
+ metadata = self._store_external_data(instance, value)
101
+ setattr(instance, self.private_field, metadata.model_dump())
102
+ else:
103
+ self._cleanup_external_data(instance)
104
+ setattr(instance, self.private_field, value)
105
+
106
+ # External storage helpers
107
+
108
+ def _store_external_data(self, instance: Any, data: Any) -> ExternalStorageMetadata:
109
+ storage_path = self._generate_storage_path(instance)
110
+ with get_autoclose_db_session() as session:
111
+ metadata = ExternalDataStorageService.store_data(
112
+ db=session,
113
+ storage_path=storage_path,
114
+ data=data,
115
+ )
116
+ logger.info(
117
+ f"Stored {self.model_class}.{self.field_name} to external storage: {storage_path}"
118
+ )
119
+ return metadata
120
+
121
+ @staticmethod
122
+ def _retrieve_external_data(metadata: ExternalStorageMetadata) -> Any: # noqa: D401
123
+ with get_autoclose_db_session() as session:
124
+ return ExternalDataStorageService.retrieve_data(
125
+ db=session,
126
+ metadata=metadata,
127
+ )
128
+
129
+ def _cleanup_external_data(self, instance: Any) -> None: # noqa: D401
130
+ raw_data = getattr(instance, self.private_field, None)
131
+ if isinstance(raw_data, dict) and "storage_type" in raw_data:
132
+ try:
133
+ metadata = ExternalStorageMetadata.model_validate(raw_data)
134
+ with get_autoclose_db_session() as session:
135
+ ExternalDataStorageService.delete_data(
136
+ db=session,
137
+ metadata=metadata,
138
+ )
139
+ logger.info(
140
+ f"Cleaned up external storage for {self.model_class}.{self.field_name}: "
141
+ f"{metadata.file_key}"
142
+ )
143
+ except Exception as e: # pylint: disable=broad-except
144
+ logger.warning(
145
+ f"Failed to cleanup external {self.field_name}: {str(e)}"
146
+ )
147
+
148
+ # Public helper
149
+
150
+ def cleanup(self, instance: Any) -> None: # noqa: D401
151
+ self._cleanup_external_data(instance)
@@ -22,8 +22,8 @@ from fides.api.models.privacy_notice import (
22
22
  UserConsentPreference,
23
23
  )
24
24
  from fides.api.models.privacy_request import PrivacyRequest, ProvidedIdentity
25
+ from fides.api.models.worker_task import ExecutionLogStatus
25
26
  from fides.api.schemas.language import SupportedLanguage
26
- from fides.api.schemas.privacy_request import ExecutionLogStatus
27
27
  from fides.api.schemas.redis_cache import MultiValue
28
28
  from fides.config import CONFIG
29
29
 
@@ -4,15 +4,14 @@ from __future__ import annotations
4
4
 
5
5
  from typing import Optional
6
6
 
7
- from sqlalchemy import Column, DateTime, String
7
+ from sqlalchemy import Column, String
8
8
  from sqlalchemy.dialects.postgresql import JSONB
9
9
  from sqlalchemy.ext.mutable import MutableList
10
- from sqlalchemy.sql import text
11
10
 
12
11
  from fides.api.db.base_class import Base # type: ignore[attr-defined]
13
12
  from fides.api.db.util import EnumColumn
13
+ from fides.api.models.worker_task import ExecutionLogStatus, TaskExecutionLog
14
14
  from fides.api.schemas.policy import ActionType, CurrentStep
15
- from fides.api.schemas.privacy_request import ExecutionLogStatus
16
15
 
17
16
  # Locations from which privacy request execution can be resumed, in order.
18
17
  EXECUTION_CHECKPOINTS = [
@@ -53,7 +52,7 @@ def can_run_checkpoint(
53
52
  ) >= EXECUTION_CHECKPOINTS.index(from_checkpoint)
54
53
 
55
54
 
56
- class ExecutionLog(Base):
55
+ class ExecutionLog(TaskExecutionLog, Base):
57
56
  """
58
57
  Stores the individual execution logs associated with a PrivacyRequest.
59
58
 
@@ -68,41 +67,14 @@ class ExecutionLog(Base):
68
67
  collection_name = Column(String, index=True)
69
68
  # A JSON Array describing affected fields along with their data categories and paths
70
69
  fields_affected = Column(MutableList.as_mutable(JSONB), nullable=True)
71
- # Contains info, warning, or error messages
72
- message = Column(String)
73
70
  action_type = Column(
74
71
  EnumColumn(ActionType),
75
72
  index=True,
76
73
  nullable=False,
77
74
  )
78
- status = Column(
79
- EnumColumn(
80
- ExecutionLogStatus,
81
- native_enum=True,
82
- values_callable=lambda x: [
83
- i.value for i in x
84
- ], # Using ExecutionLogStatus values in database, even though app is using the names.
85
- ),
86
- index=True,
87
- nullable=False,
88
- )
89
75
 
90
76
  privacy_request_id = Column(
91
77
  String,
92
78
  nullable=False,
93
79
  index=True,
94
80
  )
95
-
96
- # Use clock_timestamp() instead of NOW() to get the actual current time at row creation,
97
- # regardless of transaction state. This prevents timestamp caching within transactions
98
- # and ensures more accurate creation times.
99
- # https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-CURRENT
100
-
101
- created_at = Column(
102
- DateTime(timezone=True), server_default=text("clock_timestamp()")
103
- )
104
- updated_at = Column(
105
- DateTime(timezone=True),
106
- server_default=text("clock_timestamp()"),
107
- onupdate=text("clock_timestamp()"),
108
- )
@@ -48,6 +48,7 @@ from fides.api.models.audit_log import AuditLog
48
48
  from fides.api.models.client import ClientDetail
49
49
  from fides.api.models.comment import Comment, CommentReference, CommentReferenceType
50
50
  from fides.api.models.fides_user import FidesUser
51
+ from fides.api.models.field_types import EncryptedLargeDataDescriptor
51
52
  from fides.api.models.manual_webhook import AccessManualWebhook
52
53
  from fides.api.models.policy import (
53
54
  Policy,
@@ -72,13 +73,13 @@ from fides.api.models.privacy_request.webhook import (
72
73
  generate_request_callback_pre_approval_jwe,
73
74
  generate_request_callback_resume_jwe,
74
75
  )
76
+ from fides.api.models.worker_task import ExecutionLogStatus
75
77
  from fides.api.schemas.drp_privacy_request import DrpPrivacyRequestCreate
76
78
  from fides.api.schemas.external_https import SecondPartyResponseFormat
77
79
  from fides.api.schemas.masking.masking_secrets import MaskingSecretCache
78
80
  from fides.api.schemas.policy import ActionType, CurrentStep
79
81
  from fides.api.schemas.privacy_request import (
80
82
  CheckpointActionRequired,
81
- ExecutionLogStatus,
82
83
  ManualAction,
83
84
  PrivacyRequestSource,
84
85
  PrivacyRequestStatus,
@@ -251,7 +252,8 @@ class PrivacyRequest(
251
252
  awaiting_email_send_at = Column(DateTime(timezone=True), nullable=True)
252
253
 
253
254
  # Encrypted filtered access results saved for later retrieval
254
- filtered_final_upload = Column( # An encrypted JSON String - Dict[Dict[str, List[Row]]] - rule keys mapped to the filtered access results
255
+ _filtered_final_upload = Column( # An encrypted JSON String - Dict[Dict[str, List[Row]]] - rule keys mapped to the filtered access results
256
+ "filtered_final_upload",
255
257
  StringEncryptedType(
256
258
  type_in=JSONTypeOverride,
257
259
  key=CONFIG.security.app_encryption_key,
@@ -260,6 +262,11 @@ class PrivacyRequest(
260
262
  ),
261
263
  )
262
264
 
265
+ # Use descriptor for automatic external storage handling
266
+ filtered_final_upload = EncryptedLargeDataDescriptor(
267
+ field_name="filtered_final_upload", empty_default={}
268
+ )
269
+
263
270
  # Encrypted filtered access results saved for later retrieval
264
271
  access_result_urls = Column( # An encrypted JSON String - Dict[Dict[str, List[Row]]] - rule keys mapped to the filtered access results
265
272
  StringEncryptedType(
@@ -334,6 +341,7 @@ class PrivacyRequest(
334
341
  deleting this object from the database
335
342
  """
336
343
  self.clear_cached_values()
344
+ self.cleanup_external_storage()
337
345
  Attachment.delete_attachments_for_reference_and_type(
338
346
  db, self.id, AttachmentReferenceType.privacy_request
339
347
  )
@@ -1257,6 +1265,11 @@ class PrivacyRequest(
1257
1265
  # DSR 2.0 does not cache the results so nothing to do here
1258
1266
  return {}
1259
1267
 
1268
+ def cleanup_external_storage(self) -> None:
1269
+ """Clean up all external storage files for this privacy request"""
1270
+ # Access the descriptor from the class to call cleanup
1271
+ PrivacyRequest.filtered_final_upload.cleanup(self)
1272
+
1260
1273
  def save_filtered_access_results(
1261
1274
  self, db: Session, results: Dict[str, Dict[str, List[Row]]]
1262
1275
  ) -> None:
@@ -1544,7 +1557,7 @@ def get_action_required_details(
1544
1557
 
1545
1558
 
1546
1559
  def _parse_cache_to_checkpoint_action_required(
1547
- cache: dict[str, Any]
1560
+ cache: dict[str, Any],
1548
1561
  ) -> CheckpointActionRequired:
1549
1562
  collection = (
1550
1563
  CollectionAddress(