ethyca-fides 2.63.1b3__py2.py3-none-any.whl → 2.63.1b4__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1b4.dist-info}/METADATA +1 -1
- {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1b4.dist-info}/RECORD +127 -117
- fides/_version.py +3 -3
- fides/api/alembic/migrations/versions/29e56fa1fdb3_add_monitor_tasks.py +147 -0
- fides/api/api/v1/endpoints/privacy_request_endpoints.py +4 -4
- fides/api/db/base.py +5 -1
- fides/api/models/connectionconfig.py +1 -1
- fides/api/models/detection_discovery/__init__.py +35 -0
- fides/api/models/detection_discovery/monitor_task.py +161 -0
- fides/api/models/field_types/__init__.py +5 -0
- fides/api/models/field_types/encrypted_large_data.py +151 -0
- fides/api/models/privacy_preference.py +1 -1
- fides/api/models/privacy_request/execution_log.py +3 -31
- fides/api/models/privacy_request/privacy_request.py +16 -3
- fides/api/models/privacy_request/request_task.py +36 -25
- fides/api/models/worker_task.py +96 -0
- fides/api/schemas/external_storage.py +22 -0
- fides/api/schemas/privacy_request.py +1 -12
- fides/api/service/connectors/base_erasure_email_connector.py +1 -1
- fides/api/service/connectors/consent_email_connector.py +2 -1
- fides/api/service/connectors/dynamic_erasure_email_connector.py +2 -1
- fides/api/service/connectors/erasure_email_connector.py +1 -1
- fides/api/service/external_data_storage.py +371 -0
- fides/api/service/privacy_request/request_runner_service.py +5 -5
- fides/api/service/privacy_request/request_service.py +1 -1
- fides/api/task/create_request_tasks.py +1 -1
- fides/api/task/execute_request_tasks.py +9 -8
- fides/api/task/graph_task.py +22 -10
- fides/api/util/consent_util.py +1 -1
- fides/api/util/data_size.py +102 -0
- fides/api/util/encryption/aes_gcm_encryption_util.py +271 -0
- fides/service/privacy_request/privacy_request_service.py +1 -1
- fides/ui-build/static/admin/404.html +1 -1
- fides/ui-build/static/admin/_next/static/{ycPcko8qnif6BlkQ6MN4D → X2nvWLg2_-vsCTkhSWpzw}/_buildManifest.js +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/messaging-c583a61302f02add.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/storage-20d20a8d1736f7c4.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests-0e557d79e1e43c2b.js +1 -0
- fides/ui-build/static/admin/add-systems/manual.html +1 -1
- fides/ui-build/static/admin/add-systems/multiple.html +1 -1
- fides/ui-build/static/admin/add-systems.html +1 -1
- fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
- fides/ui-build/static/admin/consent/configure.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
- fides/ui-build/static/admin/consent/properties.html +1 -1
- fides/ui-build/static/admin/consent/reporting.html +1 -1
- fides/ui-build/static/admin/consent.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
- fides/ui-build/static/admin/data-catalog.html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
- fides/ui-build/static/admin/data-discovery/activity.html +1 -1
- fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/detection.html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
- fides/ui-build/static/admin/datamap.html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
- fides/ui-build/static/admin/dataset/new.html +1 -1
- fides/ui-build/static/admin/dataset.html +1 -1
- fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
- fides/ui-build/static/admin/datastore-connection/new.html +1 -1
- fides/ui-build/static/admin/datastore-connection.html +1 -1
- fides/ui-build/static/admin/index.html +1 -1
- fides/ui-build/static/admin/integrations/[id].html +1 -1
- fides/ui-build/static/admin/integrations.html +1 -1
- fides/ui-build/static/admin/lib/fides-headless.js +1 -1
- fides/ui-build/static/admin/lib/fides-preview.js +1 -1
- fides/ui-build/static/admin/lib/fides-tcf.js +2 -2
- fides/ui-build/static/admin/lib/fides.js +2 -2
- fides/ui-build/static/admin/login/[provider].html +1 -1
- fides/ui-build/static/admin/login.html +1 -1
- fides/ui-build/static/admin/messaging/[id].html +1 -1
- fides/ui-build/static/admin/messaging/add-template.html +1 -1
- fides/ui-build/static/admin/messaging.html +1 -1
- fides/ui-build/static/admin/poc/ant-components.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
- fides/ui-build/static/admin/poc/forms.html +1 -1
- fides/ui-build/static/admin/poc/table-migration.html +1 -1
- fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
- fides/ui-build/static/admin/privacy-requests.html +1 -1
- fides/ui-build/static/admin/properties/[id].html +1 -1
- fides/ui-build/static/admin/properties/add-property.html +1 -1
- fides/ui-build/static/admin/properties.html +1 -1
- fides/ui-build/static/admin/reporting/datamap.html +1 -1
- fides/ui-build/static/admin/settings/about/alpha.html +1 -1
- fides/ui-build/static/admin/settings/about.html +1 -1
- fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
- fides/ui-build/static/admin/settings/consent.html +1 -1
- fides/ui-build/static/admin/settings/custom-fields.html +1 -1
- fides/ui-build/static/admin/settings/domain-records.html +1 -1
- fides/ui-build/static/admin/settings/domains.html +1 -1
- fides/ui-build/static/admin/settings/email-templates.html +1 -1
- fides/ui-build/static/admin/settings/locations.html +1 -1
- fides/ui-build/static/admin/settings/organization.html +1 -1
- fides/ui-build/static/admin/settings/regulations.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id].html +1 -1
- fides/ui-build/static/admin/systems.html +1 -1
- fides/ui-build/static/admin/taxonomy.html +1 -1
- fides/ui-build/static/admin/user-management/new.html +1 -1
- fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
- fides/ui-build/static/admin/user-management.html +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/messaging-8cab04871908cfeb.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/storage-150d40428245ee0c.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests-20cdb2c8a03deae1.js +0 -1
- {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1b4.dist-info}/WHEEL +0 -0
- {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1b4.dist-info}/entry_points.txt +0 -0
- {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1b4.dist-info}/licenses/LICENSE +0 -0
- {ethyca_fides-2.63.1b3.dist-info → ethyca_fides-2.63.1b4.dist-info}/top_level.txt +0 -0
- /fides/api/models/{detection_discovery.py → detection_discovery/core.py} +0 -0
- /fides/ui-build/static/admin/_next/static/{ycPcko8qnif6BlkQ6MN4D → X2nvWLg2_-vsCTkhSWpzw}/_ssgManifest.js +0 -0
@@ -75,6 +75,7 @@ from fides.api.models.privacy_request import (
|
|
75
75
|
ProvidedIdentity,
|
76
76
|
RequestTask,
|
77
77
|
)
|
78
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
78
79
|
from fides.api.oauth.utils import (
|
79
80
|
verify_callback_oauth_policy_pre_webhook,
|
80
81
|
verify_callback_oauth_pre_approval_webhook,
|
@@ -91,7 +92,6 @@ from fides.api.schemas.privacy_request import (
|
|
91
92
|
CheckpointActionRequired,
|
92
93
|
DenyPrivacyRequests,
|
93
94
|
ExecutionLogDetailResponse,
|
94
|
-
ExecutionLogStatus,
|
95
95
|
FilteredPrivacyRequestResults,
|
96
96
|
LogEntry,
|
97
97
|
ManualWebhookData,
|
@@ -1940,16 +1940,16 @@ def request_task_async_callback(
|
|
1940
1940
|
]:
|
1941
1941
|
raise HTTPException(
|
1942
1942
|
status_code=HTTP_400_BAD_REQUEST,
|
1943
|
-
detail=f"Callback failed. Cannot queue {request_task.action_type
|
1943
|
+
detail=f"Callback failed. Cannot queue {request_task.action_type} task '{request_task.id}' with privacy request status '{privacy_request.status.value}'",
|
1944
1944
|
)
|
1945
1945
|
if request_task.status != ExecutionLogStatus.awaiting_processing:
|
1946
1946
|
raise HTTPException(
|
1947
1947
|
status_code=HTTP_400_BAD_REQUEST,
|
1948
|
-
detail=f"Callback failed. Cannot queue {request_task.action_type
|
1948
|
+
detail=f"Callback failed. Cannot queue {request_task.action_type} task '{request_task.id}' with request task status '{request_task.status.value}'",
|
1949
1949
|
)
|
1950
1950
|
logger.info(
|
1951
1951
|
"Callback received for {} task {} {}",
|
1952
|
-
request_task.action_type
|
1952
|
+
request_task.action_type,
|
1953
1953
|
request_task.collection_address,
|
1954
1954
|
request_task.id,
|
1955
1955
|
)
|
fides/api/db/base.py
CHANGED
@@ -16,7 +16,11 @@ from fides.api.models.custom_connector_template import CustomConnectorTemplate
|
|
16
16
|
from fides.api.models.custom_report import CustomReport
|
17
17
|
from fides.api.models.datasetconfig import DatasetConfig
|
18
18
|
from fides.api.models.db_cache import DBCache
|
19
|
-
from fides.api.models.detection_discovery import MonitorConfig, StagedResource
|
19
|
+
from fides.api.models.detection_discovery.core import MonitorConfig, StagedResource
|
20
|
+
from fides.api.models.detection_discovery.monitor_task import (
|
21
|
+
MonitorTask,
|
22
|
+
MonitorTaskExecutionLog,
|
23
|
+
)
|
20
24
|
from fides.api.models.experience_notices import ExperienceNotices
|
21
25
|
from fides.api.models.fides_cloud import FidesCloud
|
22
26
|
from fides.api.models.fides_user import FidesUser
|
@@ -23,7 +23,7 @@ from fides.api.schemas.saas.saas_config import SaaSConfig
|
|
23
23
|
from fides.config import CONFIG
|
24
24
|
|
25
25
|
if TYPE_CHECKING:
|
26
|
-
from fides.api.models.detection_discovery import MonitorConfig
|
26
|
+
from fides.api.models.detection_discovery.core import MonitorConfig
|
27
27
|
from fides.api.schemas.connection_configuration.enums.system_type import SystemType
|
28
28
|
|
29
29
|
|
@@ -0,0 +1,35 @@
|
|
1
|
+
from .core import (
|
2
|
+
DiffStatus,
|
3
|
+
MonitorConfig,
|
4
|
+
MonitorExecution,
|
5
|
+
MonitorFrequency,
|
6
|
+
SharedMonitorConfig,
|
7
|
+
StagedResource,
|
8
|
+
StagedResourceAncestor,
|
9
|
+
fetch_staged_resources_by_type_query,
|
10
|
+
)
|
11
|
+
from .monitor_task import (
|
12
|
+
MonitorTask,
|
13
|
+
MonitorTaskExecutionLog,
|
14
|
+
MonitorTaskType,
|
15
|
+
TaskRunType,
|
16
|
+
create_monitor_task_with_execution_log,
|
17
|
+
update_monitor_task_with_execution_log,
|
18
|
+
)
|
19
|
+
|
20
|
+
__all__ = [
|
21
|
+
"DiffStatus",
|
22
|
+
"MonitorConfig",
|
23
|
+
"MonitorExecution",
|
24
|
+
"MonitorFrequency",
|
25
|
+
"SharedMonitorConfig",
|
26
|
+
"StagedResource",
|
27
|
+
"StagedResourceAncestor",
|
28
|
+
"fetch_staged_resources_by_type_query",
|
29
|
+
"MonitorTask",
|
30
|
+
"MonitorTaskExecutionLog",
|
31
|
+
"MonitorTaskType",
|
32
|
+
"TaskRunType",
|
33
|
+
"create_monitor_task_with_execution_log",
|
34
|
+
"update_monitor_task_with_execution_log",
|
35
|
+
]
|
@@ -0,0 +1,161 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from enum import Enum
|
4
|
+
from typing import List, Optional
|
5
|
+
|
6
|
+
from sqlalchemy import ARRAY, Column
|
7
|
+
from sqlalchemy import Enum as SQLAlchemyEnum
|
8
|
+
from sqlalchemy import ForeignKey, String
|
9
|
+
from sqlalchemy.dialects.postgresql import JSONB
|
10
|
+
from sqlalchemy.orm import Session, relationship
|
11
|
+
|
12
|
+
from fides.api.db.base_class import Base, FidesBase # type: ignore[attr-defined]
|
13
|
+
from fides.api.models.detection_discovery.core import MonitorConfig
|
14
|
+
from fides.api.models.worker_task import (
|
15
|
+
ExecutionLogStatus,
|
16
|
+
TaskExecutionLog,
|
17
|
+
WorkerTask,
|
18
|
+
)
|
19
|
+
|
20
|
+
|
21
|
+
class MonitorTaskType(Enum):
|
22
|
+
"""
|
23
|
+
Types of tasks that can be executed by a worker.
|
24
|
+
"""
|
25
|
+
|
26
|
+
DETECTION = "detection"
|
27
|
+
CLASSIFICATION = "classification"
|
28
|
+
PROMOTION = "promotion"
|
29
|
+
|
30
|
+
|
31
|
+
class MonitorTask(WorkerTask, Base):
|
32
|
+
"""
|
33
|
+
A monitor task executed by a worker.
|
34
|
+
"""
|
35
|
+
|
36
|
+
# celery_id is used to track task executions. While MonitorTask.id remains constant,
|
37
|
+
# celery_id changes with each execution or retry of the task, allowing us to track
|
38
|
+
# the current execution state while maintaining a stable reference to the original task.
|
39
|
+
celery_id = Column(
|
40
|
+
String(255), unique=True, nullable=False, default=FidesBase.generate_uuid
|
41
|
+
)
|
42
|
+
task_arguments = Column(JSONB, nullable=True) # To be able to rerun the task
|
43
|
+
# Contains info, warning, or error messages
|
44
|
+
message = Column(String)
|
45
|
+
monitor_config_id = Column(
|
46
|
+
String,
|
47
|
+
ForeignKey(MonitorConfig.id_field_path, ondelete="CASCADE"),
|
48
|
+
index=True,
|
49
|
+
nullable=False,
|
50
|
+
)
|
51
|
+
staged_resource_urns = Column(ARRAY(String), nullable=True)
|
52
|
+
child_resource_urns = Column(ARRAY(String), nullable=True)
|
53
|
+
|
54
|
+
monitor_config = relationship(MonitorConfig, cascade="all, delete")
|
55
|
+
execution_logs = relationship(
|
56
|
+
"MonitorTaskExecutionLog", back_populates="monitor_task", cascade="all, delete"
|
57
|
+
)
|
58
|
+
|
59
|
+
@classmethod
|
60
|
+
def allowed_action_types(cls) -> List[str]:
|
61
|
+
return [e.value for e in MonitorTaskType]
|
62
|
+
|
63
|
+
|
64
|
+
class TaskRunType(Enum):
|
65
|
+
"""
|
66
|
+
Type of task run.
|
67
|
+
"""
|
68
|
+
|
69
|
+
MANUAL = "manual"
|
70
|
+
SYSTEM = "system"
|
71
|
+
|
72
|
+
|
73
|
+
class MonitorTaskExecutionLog(TaskExecutionLog, Base):
|
74
|
+
"""
|
75
|
+
Stores the individual execution logs associated with a MonitorTask.
|
76
|
+
"""
|
77
|
+
|
78
|
+
# This celery_id preserves the specific execution ID for historical tracking,
|
79
|
+
# unlike MonitorTask.celery_id which is updated with each execution.
|
80
|
+
# This allows us to maintain a complete history of all task execution attempts.
|
81
|
+
celery_id = Column(String(255), nullable=False)
|
82
|
+
monitor_task_id = Column(
|
83
|
+
String,
|
84
|
+
ForeignKey(MonitorTask.id_field_path, ondelete="CASCADE"),
|
85
|
+
index=True,
|
86
|
+
nullable=False,
|
87
|
+
)
|
88
|
+
run_type = Column(
|
89
|
+
SQLAlchemyEnum(TaskRunType), nullable=False, default=TaskRunType.SYSTEM
|
90
|
+
)
|
91
|
+
|
92
|
+
monitor_task = relationship("MonitorTask", back_populates="execution_logs")
|
93
|
+
|
94
|
+
|
95
|
+
def create_monitor_task_with_execution_log(
|
96
|
+
db: Session, monitor_task_data: dict
|
97
|
+
) -> MonitorTask:
|
98
|
+
"""
|
99
|
+
Creates a monitor task with an execution log.
|
100
|
+
The default status is pending for the task and pending for the execution log.
|
101
|
+
"""
|
102
|
+
status = ExecutionLogStatus.pending
|
103
|
+
task_record = MonitorTask( # type: ignore
|
104
|
+
status=status.value,
|
105
|
+
**monitor_task_data,
|
106
|
+
)
|
107
|
+
db.add(task_record)
|
108
|
+
db.flush()
|
109
|
+
|
110
|
+
execution_log = MonitorTaskExecutionLog( # type: ignore
|
111
|
+
monitor_task=task_record, celery_id=task_record.celery_id, status=status
|
112
|
+
)
|
113
|
+
db.add(execution_log)
|
114
|
+
|
115
|
+
db.commit()
|
116
|
+
db.refresh(task_record)
|
117
|
+
return task_record
|
118
|
+
|
119
|
+
|
120
|
+
def update_monitor_task_with_execution_log(
|
121
|
+
db: Session,
|
122
|
+
status: ExecutionLogStatus,
|
123
|
+
task_record: Optional[MonitorTask] = None,
|
124
|
+
celery_id: Optional[str] = None,
|
125
|
+
message: Optional[str] = None,
|
126
|
+
run_type: TaskRunType = TaskRunType.SYSTEM,
|
127
|
+
) -> MonitorTask:
|
128
|
+
"""
|
129
|
+
Updates a monitor task with an execution log.
|
130
|
+
|
131
|
+
It must be either celery_id or task_record. If it doesn't receive a celery_id, it's assumed a new one needs to be created because a new run is about to be performed.
|
132
|
+
If it receives a celery_id, it means it only needs to update the status of an existing run. It can receive task_record to avoid querying the database again to get it.
|
133
|
+
"""
|
134
|
+
if not celery_id and not task_record:
|
135
|
+
raise ValueError("Either celery_id or task_record must be provided")
|
136
|
+
|
137
|
+
if celery_id and not task_record:
|
138
|
+
task_record = MonitorTask.get_by(db=db, field="celery_id", value=celery_id)
|
139
|
+
if not task_record:
|
140
|
+
raise ValueError(f"Could not find MonitorTask with celery_id {celery_id}")
|
141
|
+
|
142
|
+
assert task_record is not None # help type checker understand the control flow
|
143
|
+
|
144
|
+
if not celery_id:
|
145
|
+
celery_id = task_record.generate_uuid()
|
146
|
+
task_record.celery_id = celery_id
|
147
|
+
|
148
|
+
task_record.status = status.value # type: ignore
|
149
|
+
task_record.message = message
|
150
|
+
|
151
|
+
MonitorTaskExecutionLog( # type: ignore
|
152
|
+
monitor_task=task_record,
|
153
|
+
status=status,
|
154
|
+
message=message,
|
155
|
+
celery_id=celery_id,
|
156
|
+
run_type=run_type,
|
157
|
+
)
|
158
|
+
|
159
|
+
db.commit()
|
160
|
+
db.refresh(task_record)
|
161
|
+
return task_record
|
@@ -0,0 +1,151 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
from typing import Any, Optional, Type
|
3
|
+
|
4
|
+
from loguru import logger
|
5
|
+
|
6
|
+
from fides.api.api.deps import get_autoclose_db_session
|
7
|
+
from fides.api.schemas.external_storage import ExternalStorageMetadata
|
8
|
+
from fides.api.service.external_data_storage import (
|
9
|
+
ExternalDataStorageError,
|
10
|
+
ExternalDataStorageService,
|
11
|
+
)
|
12
|
+
from fides.api.util.data_size import LARGE_DATA_THRESHOLD_BYTES, calculate_data_size
|
13
|
+
|
14
|
+
|
15
|
+
class EncryptedLargeDataDescriptor:
|
16
|
+
"""
|
17
|
+
A Python descriptor for database fields with encrypted external storage fallback.
|
18
|
+
|
19
|
+
See the original implementation for detailed docstrings.
|
20
|
+
"""
|
21
|
+
|
22
|
+
def __init__(
|
23
|
+
self,
|
24
|
+
field_name: str,
|
25
|
+
empty_default: Optional[Any] = None,
|
26
|
+
threshold_bytes: Optional[int] = None,
|
27
|
+
):
|
28
|
+
self.field_name = field_name
|
29
|
+
self.private_field = f"_{field_name}"
|
30
|
+
self.empty_default = empty_default if empty_default is not None else []
|
31
|
+
self.threshold_bytes = threshold_bytes or LARGE_DATA_THRESHOLD_BYTES
|
32
|
+
self.model_class: Optional[str] = None
|
33
|
+
self.name: Optional[str] = None
|
34
|
+
|
35
|
+
# Descriptor protocol helpers
|
36
|
+
|
37
|
+
def __set_name__(
|
38
|
+
self, owner: Type, name: str
|
39
|
+
) -> None: # noqa: D401 (docstring in orig file)
|
40
|
+
self.name = name
|
41
|
+
self.model_class = owner.__name__
|
42
|
+
|
43
|
+
def _generate_storage_path(self, instance: Any) -> str:
|
44
|
+
instance_id = getattr(instance, "id", None)
|
45
|
+
if not instance_id:
|
46
|
+
raise ValueError(f"Instance {instance} must have an 'id' attribute")
|
47
|
+
timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S-%f")
|
48
|
+
return f"{self.model_class}/{instance_id}/{self.field_name}/{timestamp}.txt"
|
49
|
+
|
50
|
+
def __get__(self, instance: Any, owner: Type) -> Any: # noqa: D401
|
51
|
+
if instance is None:
|
52
|
+
return self
|
53
|
+
raw_data = getattr(instance, self.private_field)
|
54
|
+
if raw_data is None:
|
55
|
+
return None
|
56
|
+
if isinstance(raw_data, dict) and "storage_type" in raw_data:
|
57
|
+
logger.info(
|
58
|
+
f"Reading {self.model_class}.{self.field_name} from external storage "
|
59
|
+
f"({raw_data.get('storage_type')})"
|
60
|
+
)
|
61
|
+
try:
|
62
|
+
metadata = ExternalStorageMetadata.model_validate(raw_data)
|
63
|
+
data = self._retrieve_external_data(metadata)
|
64
|
+
record_count = len(data) if isinstance(data, list) else "N/A"
|
65
|
+
logger.info(
|
66
|
+
f"Successfully retrieved {self.model_class}.{self.field_name} "
|
67
|
+
f"from external storage (records: {record_count})"
|
68
|
+
)
|
69
|
+
return data if data is not None else self.empty_default
|
70
|
+
except Exception as e: # pylint: disable=broad-except
|
71
|
+
logger.error(
|
72
|
+
f"Failed to retrieve {self.model_class}.{self.field_name} "
|
73
|
+
f"from external storage: {str(e)}"
|
74
|
+
)
|
75
|
+
raise ExternalDataStorageError(
|
76
|
+
f"Failed to retrieve {self.field_name}: {str(e)}"
|
77
|
+
) from e
|
78
|
+
else:
|
79
|
+
return raw_data
|
80
|
+
|
81
|
+
def __set__(self, instance: Any, value: Any) -> None: # noqa: D401
|
82
|
+
if not value:
|
83
|
+
self._cleanup_external_data(instance)
|
84
|
+
setattr(instance, self.private_field, self.empty_default)
|
85
|
+
return
|
86
|
+
try:
|
87
|
+
current_data = self.__get__(instance, type(instance))
|
88
|
+
if current_data == value:
|
89
|
+
return
|
90
|
+
except Exception: # pylint: disable=broad-except
|
91
|
+
pass
|
92
|
+
|
93
|
+
data_size = calculate_data_size(value)
|
94
|
+
if data_size > self.threshold_bytes:
|
95
|
+
logger.info(
|
96
|
+
f"{self.model_class}.{self.field_name}: Data size ({data_size:,} bytes) "
|
97
|
+
f"exceeds threshold ({self.threshold_bytes:,} bytes), storing externally"
|
98
|
+
)
|
99
|
+
self._cleanup_external_data(instance)
|
100
|
+
metadata = self._store_external_data(instance, value)
|
101
|
+
setattr(instance, self.private_field, metadata.model_dump())
|
102
|
+
else:
|
103
|
+
self._cleanup_external_data(instance)
|
104
|
+
setattr(instance, self.private_field, value)
|
105
|
+
|
106
|
+
# External storage helpers
|
107
|
+
|
108
|
+
def _store_external_data(self, instance: Any, data: Any) -> ExternalStorageMetadata:
|
109
|
+
storage_path = self._generate_storage_path(instance)
|
110
|
+
with get_autoclose_db_session() as session:
|
111
|
+
metadata = ExternalDataStorageService.store_data(
|
112
|
+
db=session,
|
113
|
+
storage_path=storage_path,
|
114
|
+
data=data,
|
115
|
+
)
|
116
|
+
logger.info(
|
117
|
+
f"Stored {self.model_class}.{self.field_name} to external storage: {storage_path}"
|
118
|
+
)
|
119
|
+
return metadata
|
120
|
+
|
121
|
+
@staticmethod
|
122
|
+
def _retrieve_external_data(metadata: ExternalStorageMetadata) -> Any: # noqa: D401
|
123
|
+
with get_autoclose_db_session() as session:
|
124
|
+
return ExternalDataStorageService.retrieve_data(
|
125
|
+
db=session,
|
126
|
+
metadata=metadata,
|
127
|
+
)
|
128
|
+
|
129
|
+
def _cleanup_external_data(self, instance: Any) -> None: # noqa: D401
|
130
|
+
raw_data = getattr(instance, self.private_field, None)
|
131
|
+
if isinstance(raw_data, dict) and "storage_type" in raw_data:
|
132
|
+
try:
|
133
|
+
metadata = ExternalStorageMetadata.model_validate(raw_data)
|
134
|
+
with get_autoclose_db_session() as session:
|
135
|
+
ExternalDataStorageService.delete_data(
|
136
|
+
db=session,
|
137
|
+
metadata=metadata,
|
138
|
+
)
|
139
|
+
logger.info(
|
140
|
+
f"Cleaned up external storage for {self.model_class}.{self.field_name}: "
|
141
|
+
f"{metadata.file_key}"
|
142
|
+
)
|
143
|
+
except Exception as e: # pylint: disable=broad-except
|
144
|
+
logger.warning(
|
145
|
+
f"Failed to cleanup external {self.field_name}: {str(e)}"
|
146
|
+
)
|
147
|
+
|
148
|
+
# Public helper
|
149
|
+
|
150
|
+
def cleanup(self, instance: Any) -> None: # noqa: D401
|
151
|
+
self._cleanup_external_data(instance)
|
@@ -22,8 +22,8 @@ from fides.api.models.privacy_notice import (
|
|
22
22
|
UserConsentPreference,
|
23
23
|
)
|
24
24
|
from fides.api.models.privacy_request import PrivacyRequest, ProvidedIdentity
|
25
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
25
26
|
from fides.api.schemas.language import SupportedLanguage
|
26
|
-
from fides.api.schemas.privacy_request import ExecutionLogStatus
|
27
27
|
from fides.api.schemas.redis_cache import MultiValue
|
28
28
|
from fides.config import CONFIG
|
29
29
|
|
@@ -4,15 +4,14 @@ from __future__ import annotations
|
|
4
4
|
|
5
5
|
from typing import Optional
|
6
6
|
|
7
|
-
from sqlalchemy import Column,
|
7
|
+
from sqlalchemy import Column, String
|
8
8
|
from sqlalchemy.dialects.postgresql import JSONB
|
9
9
|
from sqlalchemy.ext.mutable import MutableList
|
10
|
-
from sqlalchemy.sql import text
|
11
10
|
|
12
11
|
from fides.api.db.base_class import Base # type: ignore[attr-defined]
|
13
12
|
from fides.api.db.util import EnumColumn
|
13
|
+
from fides.api.models.worker_task import ExecutionLogStatus, TaskExecutionLog
|
14
14
|
from fides.api.schemas.policy import ActionType, CurrentStep
|
15
|
-
from fides.api.schemas.privacy_request import ExecutionLogStatus
|
16
15
|
|
17
16
|
# Locations from which privacy request execution can be resumed, in order.
|
18
17
|
EXECUTION_CHECKPOINTS = [
|
@@ -53,7 +52,7 @@ def can_run_checkpoint(
|
|
53
52
|
) >= EXECUTION_CHECKPOINTS.index(from_checkpoint)
|
54
53
|
|
55
54
|
|
56
|
-
class ExecutionLog(Base):
|
55
|
+
class ExecutionLog(TaskExecutionLog, Base):
|
57
56
|
"""
|
58
57
|
Stores the individual execution logs associated with a PrivacyRequest.
|
59
58
|
|
@@ -68,41 +67,14 @@ class ExecutionLog(Base):
|
|
68
67
|
collection_name = Column(String, index=True)
|
69
68
|
# A JSON Array describing affected fields along with their data categories and paths
|
70
69
|
fields_affected = Column(MutableList.as_mutable(JSONB), nullable=True)
|
71
|
-
# Contains info, warning, or error messages
|
72
|
-
message = Column(String)
|
73
70
|
action_type = Column(
|
74
71
|
EnumColumn(ActionType),
|
75
72
|
index=True,
|
76
73
|
nullable=False,
|
77
74
|
)
|
78
|
-
status = Column(
|
79
|
-
EnumColumn(
|
80
|
-
ExecutionLogStatus,
|
81
|
-
native_enum=True,
|
82
|
-
values_callable=lambda x: [
|
83
|
-
i.value for i in x
|
84
|
-
], # Using ExecutionLogStatus values in database, even though app is using the names.
|
85
|
-
),
|
86
|
-
index=True,
|
87
|
-
nullable=False,
|
88
|
-
)
|
89
75
|
|
90
76
|
privacy_request_id = Column(
|
91
77
|
String,
|
92
78
|
nullable=False,
|
93
79
|
index=True,
|
94
80
|
)
|
95
|
-
|
96
|
-
# Use clock_timestamp() instead of NOW() to get the actual current time at row creation,
|
97
|
-
# regardless of transaction state. This prevents timestamp caching within transactions
|
98
|
-
# and ensures more accurate creation times.
|
99
|
-
# https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-CURRENT
|
100
|
-
|
101
|
-
created_at = Column(
|
102
|
-
DateTime(timezone=True), server_default=text("clock_timestamp()")
|
103
|
-
)
|
104
|
-
updated_at = Column(
|
105
|
-
DateTime(timezone=True),
|
106
|
-
server_default=text("clock_timestamp()"),
|
107
|
-
onupdate=text("clock_timestamp()"),
|
108
|
-
)
|
@@ -48,6 +48,7 @@ from fides.api.models.audit_log import AuditLog
|
|
48
48
|
from fides.api.models.client import ClientDetail
|
49
49
|
from fides.api.models.comment import Comment, CommentReference, CommentReferenceType
|
50
50
|
from fides.api.models.fides_user import FidesUser
|
51
|
+
from fides.api.models.field_types import EncryptedLargeDataDescriptor
|
51
52
|
from fides.api.models.manual_webhook import AccessManualWebhook
|
52
53
|
from fides.api.models.policy import (
|
53
54
|
Policy,
|
@@ -72,13 +73,13 @@ from fides.api.models.privacy_request.webhook import (
|
|
72
73
|
generate_request_callback_pre_approval_jwe,
|
73
74
|
generate_request_callback_resume_jwe,
|
74
75
|
)
|
76
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
75
77
|
from fides.api.schemas.drp_privacy_request import DrpPrivacyRequestCreate
|
76
78
|
from fides.api.schemas.external_https import SecondPartyResponseFormat
|
77
79
|
from fides.api.schemas.masking.masking_secrets import MaskingSecretCache
|
78
80
|
from fides.api.schemas.policy import ActionType, CurrentStep
|
79
81
|
from fides.api.schemas.privacy_request import (
|
80
82
|
CheckpointActionRequired,
|
81
|
-
ExecutionLogStatus,
|
82
83
|
ManualAction,
|
83
84
|
PrivacyRequestSource,
|
84
85
|
PrivacyRequestStatus,
|
@@ -251,7 +252,8 @@ class PrivacyRequest(
|
|
251
252
|
awaiting_email_send_at = Column(DateTime(timezone=True), nullable=True)
|
252
253
|
|
253
254
|
# Encrypted filtered access results saved for later retrieval
|
254
|
-
|
255
|
+
_filtered_final_upload = Column( # An encrypted JSON String - Dict[Dict[str, List[Row]]] - rule keys mapped to the filtered access results
|
256
|
+
"filtered_final_upload",
|
255
257
|
StringEncryptedType(
|
256
258
|
type_in=JSONTypeOverride,
|
257
259
|
key=CONFIG.security.app_encryption_key,
|
@@ -260,6 +262,11 @@ class PrivacyRequest(
|
|
260
262
|
),
|
261
263
|
)
|
262
264
|
|
265
|
+
# Use descriptor for automatic external storage handling
|
266
|
+
filtered_final_upload = EncryptedLargeDataDescriptor(
|
267
|
+
field_name="filtered_final_upload", empty_default={}
|
268
|
+
)
|
269
|
+
|
263
270
|
# Encrypted filtered access results saved for later retrieval
|
264
271
|
access_result_urls = Column( # An encrypted JSON String - Dict[Dict[str, List[Row]]] - rule keys mapped to the filtered access results
|
265
272
|
StringEncryptedType(
|
@@ -334,6 +341,7 @@ class PrivacyRequest(
|
|
334
341
|
deleting this object from the database
|
335
342
|
"""
|
336
343
|
self.clear_cached_values()
|
344
|
+
self.cleanup_external_storage()
|
337
345
|
Attachment.delete_attachments_for_reference_and_type(
|
338
346
|
db, self.id, AttachmentReferenceType.privacy_request
|
339
347
|
)
|
@@ -1257,6 +1265,11 @@ class PrivacyRequest(
|
|
1257
1265
|
# DSR 2.0 does not cache the results so nothing to do here
|
1258
1266
|
return {}
|
1259
1267
|
|
1268
|
+
def cleanup_external_storage(self) -> None:
|
1269
|
+
"""Clean up all external storage files for this privacy request"""
|
1270
|
+
# Access the descriptor from the class to call cleanup
|
1271
|
+
PrivacyRequest.filtered_final_upload.cleanup(self)
|
1272
|
+
|
1260
1273
|
def save_filtered_access_results(
|
1261
1274
|
self, db: Session, results: Dict[str, Dict[str, List[Row]]]
|
1262
1275
|
) -> None:
|
@@ -1544,7 +1557,7 @@ def get_action_required_details(
|
|
1544
1557
|
|
1545
1558
|
|
1546
1559
|
def _parse_cache_to_checkpoint_action_required(
|
1547
|
-
cache: dict[str, Any]
|
1560
|
+
cache: dict[str, Any],
|
1548
1561
|
) -> CheckpointActionRequired:
|
1549
1562
|
collection = (
|
1550
1563
|
CollectionAddress(
|