ethyca-fides 2.67.0rc0__py2.py3-none-any.whl → 2.67.1b0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ethyca-fides might be problematic. Click here for more details.
- {ethyca_fides-2.67.0rc0.dist-info → ethyca_fides-2.67.1b0.dist-info}/METADATA +1 -1
- {ethyca_fides-2.67.0rc0.dist-info → ethyca_fides-2.67.1b0.dist-info}/RECORD +138 -136
- fides/_version.py +3 -3
- fides/api/common_exceptions.py +4 -0
- fides/api/graph/execution.py +16 -0
- fides/api/models/privacy_request/privacy_request.py +33 -13
- fides/api/schemas/application_config.py +1 -0
- fides/api/schemas/connection_configuration/connection_secrets_datahub.py +10 -1
- fides/api/service/connectors/base_connector.py +14 -0
- fides/api/service/connectors/bigquery_connector.py +5 -0
- fides/api/service/connectors/query_configs/bigquery_query_config.py +4 -4
- fides/api/service/connectors/query_configs/snowflake_query_config.py +3 -3
- fides/api/service/connectors/snowflake_connector.py +55 -2
- fides/api/service/connectors/sql_connector.py +107 -9
- fides/api/service/privacy_request/request_runner_service.py +3 -2
- fides/api/service/privacy_request/request_service.py +173 -32
- fides/api/task/execute_request_tasks.py +4 -0
- fides/api/task/graph_task.py +48 -2
- fides/api/util/cache.py +56 -0
- fides/api/util/memory_watchdog.py +286 -0
- fides/config/execution_settings.py +8 -0
- fides/config/utils.py +1 -0
- fides/ui-build/static/admin/404.html +1 -1
- fides/ui-build/static/admin/_next/static/chunks/5309-d9a488457898263b.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/{6780-fc7d9ddb1a03e7b3.js → 6780-b42a27e72707936d.js} +1 -1
- fides/ui-build/static/admin/_next/static/chunks/7725-539d3a906f627531.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/8735-40caf91800a3610c.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/9046-7085a401297c5520.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/{_app-4b5bff46158a19a3.js → _app-750d6bd16c971bb9.js} +2 -2
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center/[monitorId]/[systemId]-a286affa43687eb5.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center/[monitorId]-92b0bd97d8e79340.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center-ee3c0a103346fc06.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/integrations/[id]-8e346fb36e8034d2.js +1 -0
- fides/ui-build/static/admin/_next/static/v1eqRIfzld3di00TTnVM9/_buildManifest.js +1 -0
- fides/ui-build/static/admin/add-systems/manual.html +1 -1
- fides/ui-build/static/admin/add-systems/multiple.html +1 -1
- fides/ui-build/static/admin/add-systems.html +1 -1
- fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
- fides/ui-build/static/admin/consent/configure.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
- fides/ui-build/static/admin/consent/properties.html +1 -1
- fides/ui-build/static/admin/consent/reporting.html +1 -1
- fides/ui-build/static/admin/consent.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
- fides/ui-build/static/admin/data-catalog.html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
- fides/ui-build/static/admin/data-discovery/activity.html +1 -1
- fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/detection.html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
- fides/ui-build/static/admin/datamap.html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
- fides/ui-build/static/admin/dataset/new.html +1 -1
- fides/ui-build/static/admin/dataset.html +1 -1
- fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
- fides/ui-build/static/admin/datastore-connection/new.html +1 -1
- fides/ui-build/static/admin/datastore-connection.html +1 -1
- fides/ui-build/static/admin/index.html +1 -1
- fides/ui-build/static/admin/integrations/[id].html +1 -1
- fides/ui-build/static/admin/integrations.html +1 -1
- fides/ui-build/static/admin/lib/fides-ext-gpp.js +1 -1
- fides/ui-build/static/admin/lib/fides-headless.js +1 -1
- fides/ui-build/static/admin/lib/fides-preview.js +1 -1
- fides/ui-build/static/admin/lib/fides-tcf.js +4 -4
- fides/ui-build/static/admin/lib/fides.js +4 -4
- fides/ui-build/static/admin/login/[provider].html +1 -1
- fides/ui-build/static/admin/login.html +1 -1
- fides/ui-build/static/admin/messaging/[id].html +1 -1
- fides/ui-build/static/admin/messaging/add-template.html +1 -1
- fides/ui-build/static/admin/messaging.html +1 -1
- fides/ui-build/static/admin/poc/ant-components.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
- fides/ui-build/static/admin/poc/forms.html +1 -1
- fides/ui-build/static/admin/poc/table-migration.html +1 -1
- fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
- fides/ui-build/static/admin/privacy-requests.html +1 -1
- fides/ui-build/static/admin/properties/[id].html +1 -1
- fides/ui-build/static/admin/properties/add-property.html +1 -1
- fides/ui-build/static/admin/properties.html +1 -1
- fides/ui-build/static/admin/reporting/datamap.html +1 -1
- fides/ui-build/static/admin/settings/about/alpha.html +1 -1
- fides/ui-build/static/admin/settings/about.html +1 -1
- fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
- fides/ui-build/static/admin/settings/consent.html +1 -1
- fides/ui-build/static/admin/settings/custom-fields.html +1 -1
- fides/ui-build/static/admin/settings/domain-records.html +1 -1
- fides/ui-build/static/admin/settings/domains.html +1 -1
- fides/ui-build/static/admin/settings/email-templates.html +1 -1
- fides/ui-build/static/admin/settings/locations.html +1 -1
- fides/ui-build/static/admin/settings/organization.html +1 -1
- fides/ui-build/static/admin/settings/regulations.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id].html +1 -1
- fides/ui-build/static/admin/systems.html +1 -1
- fides/ui-build/static/admin/taxonomy.html +1 -1
- fides/ui-build/static/admin/user-management/new.html +1 -1
- fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
- fides/ui-build/static/admin/user-management.html +1 -1
- fides/ui-build/static/admin/_next/static/GKmhMPa_1gMto8JZO8ENy/_buildManifest.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/5309-ce5702b9faeaff55.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/8237-841439bef6682177.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/9046-04a8c092fef1cd83.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center/[monitorId]/[systemId]-7caea7bb58c1f153.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center/[monitorId]-a9a70856f7be1542.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center-1f0ea5c92ae9a2b4.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/integrations/[id]-f53fe1f2cbebda7c.js +0 -1
- {ethyca_fides-2.67.0rc0.dist-info → ethyca_fides-2.67.1b0.dist-info}/WHEEL +0 -0
- {ethyca_fides-2.67.0rc0.dist-info → ethyca_fides-2.67.1b0.dist-info}/entry_points.txt +0 -0
- {ethyca_fides-2.67.0rc0.dist-info → ethyca_fides-2.67.1b0.dist-info}/licenses/LICENSE +0 -0
- {ethyca_fides-2.67.0rc0.dist-info → ethyca_fides-2.67.1b0.dist-info}/top_level.txt +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{3450-ca4ba70da999f264.js → 3450-69f4e16978971bb8.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{4608-43acf39319177bee.js → 4608-f16f281f2d05d963.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{6662-4392ba1e4c254ef7.js → 6662-a9e54ead3dc53644.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/{6954-9c4912fbce87c4df.js → 6954-ba98e778a5b45ebf.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/{data-catalog-d5b01abcb76792ce.js → data-catalog-7770a8dc34bd0fc0.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/datastore-connection/{new-8446418c7ad28f77.js → new-c6614583b14dc9f2.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/{datastore-connection-0f29b47402292070.js → datastore-connection-3bd77864da523d41.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/{integrations-142abe3e3e3e5bf7.js → integrations-7f15cd8538cdc24d.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/{[id]-1b6b0d703cf59389.js → [id]-79f1576b1126975c.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/{privacy-requests-ccd8d9e06cf2d278.js → privacy-requests-96a08c4431b5462c.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/reporting/{datamap-fd1a67892056830a.js → datamap-9d1840f8309b706e.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/settings/about/{alpha-8f98a4895e74725e.js → alpha-1066f0c202ef744c.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/settings/{about-8155a35a62fdb5ae.js → about-37ba24a72a06862e.js} +0 -0
- /fides/ui-build/static/admin/_next/static/chunks/pages/user-management/{new-bc4eb541906781e6.js → new-de8cb3739ab99c09.js} +0 -0
- /fides/ui-build/static/admin/_next/static/{GKmhMPa_1gMto8JZO8ENy → v1eqRIfzld3di00TTnVM9}/_ssgManifest.js +0 -0
fides/api/graph/execution.py
CHANGED
|
@@ -4,6 +4,7 @@ from fideslang.validation import FidesKey
|
|
|
4
4
|
from loguru import logger
|
|
5
5
|
|
|
6
6
|
from fides.api.graph.config import (
|
|
7
|
+
TERMINATOR_ADDRESS,
|
|
7
8
|
Collection,
|
|
8
9
|
CollectionAddress,
|
|
9
10
|
Field,
|
|
@@ -157,3 +158,18 @@ class ExecutionNode(Contextualizable): # pylint: disable=too-many-instance-attr
|
|
|
157
158
|
)
|
|
158
159
|
|
|
159
160
|
return out
|
|
161
|
+
|
|
162
|
+
def has_outgoing_dependencies(self) -> bool:
|
|
163
|
+
"""
|
|
164
|
+
Check if this node has outgoing edges to collections other than the terminal node.
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
bool: True if the node has dependencies on other collections, False otherwise
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
for edge in self.outgoing_edges:
|
|
171
|
+
# Check if the outgoing edge points to a collection other than the terminal node
|
|
172
|
+
target_collection = edge.f2.collection_address()
|
|
173
|
+
if target_collection != TERMINATOR_ADDRESS:
|
|
174
|
+
return True
|
|
175
|
+
return False
|
|
@@ -1009,6 +1009,38 @@ class PrivacyRequest(
|
|
|
1009
1009
|
request_task_celery_ids.append(request_task_id)
|
|
1010
1010
|
return request_task_celery_ids
|
|
1011
1011
|
|
|
1012
|
+
def cancel_celery_tasks(self) -> None:
|
|
1013
|
+
"""Cancel all Celery tasks associated with this privacy request.
|
|
1014
|
+
|
|
1015
|
+
This includes both the main privacy request task and any sub-tasks (Request Tasks).
|
|
1016
|
+
"""
|
|
1017
|
+
task_ids: List[str] = []
|
|
1018
|
+
|
|
1019
|
+
# Add the main privacy request task ID
|
|
1020
|
+
parent_task_id = self.get_cached_task_id()
|
|
1021
|
+
if parent_task_id:
|
|
1022
|
+
task_ids.append(parent_task_id)
|
|
1023
|
+
|
|
1024
|
+
# Add all request task IDs
|
|
1025
|
+
request_task_celery_ids = self.get_request_task_celery_task_ids()
|
|
1026
|
+
task_ids.extend(request_task_celery_ids)
|
|
1027
|
+
|
|
1028
|
+
if not task_ids:
|
|
1029
|
+
return
|
|
1030
|
+
|
|
1031
|
+
# Revoke all Celery tasks in batch
|
|
1032
|
+
logger.info(f"Revoking {len(task_ids)} tasks for privacy request {self.id}")
|
|
1033
|
+
try:
|
|
1034
|
+
# Use terminate=False to allow graceful shutdown if already running
|
|
1035
|
+
celery_app.control.revoke(task_ids, terminate=False)
|
|
1036
|
+
logger.info(
|
|
1037
|
+
f"Successfully revoked {len(task_ids)} tasks for privacy request {self.id}"
|
|
1038
|
+
)
|
|
1039
|
+
except Exception as exc:
|
|
1040
|
+
logger.warning(
|
|
1041
|
+
f"Failed to revoke {len(task_ids)} tasks for privacy request {self.id}: {exc}"
|
|
1042
|
+
)
|
|
1043
|
+
|
|
1012
1044
|
def cancel_processing(self, db: Session, cancel_reason: Optional[str]) -> None:
|
|
1013
1045
|
"""Cancels a privacy request. Currently should only cancel 'pending' tasks
|
|
1014
1046
|
|
|
@@ -1021,19 +1053,7 @@ class PrivacyRequest(
|
|
|
1021
1053
|
self.canceled_at = datetime.utcnow()
|
|
1022
1054
|
self.save(db)
|
|
1023
1055
|
|
|
1024
|
-
|
|
1025
|
-
self.get_request_task_celery_task_ids()
|
|
1026
|
-
) # Celery tasks for sub tasks (DSR 3.0 Request Tasks)
|
|
1027
|
-
parent_task_id = (
|
|
1028
|
-
self.get_cached_task_id()
|
|
1029
|
-
) # Celery task for current Privacy Request
|
|
1030
|
-
if parent_task_id:
|
|
1031
|
-
task_ids.append(parent_task_id)
|
|
1032
|
-
|
|
1033
|
-
for celery_task_id in task_ids:
|
|
1034
|
-
logger.info("Revoking task {} for request {}", celery_task_id, self.id)
|
|
1035
|
-
# Only revokes if execution is not already in progress.
|
|
1036
|
-
celery_app.control.revoke(celery_task_id, terminate=False)
|
|
1056
|
+
self.cancel_celery_tasks()
|
|
1037
1057
|
|
|
1038
1058
|
def error_processing(self, db: Session) -> None:
|
|
1039
1059
|
"""Mark privacy request as errored, and note time processing was finished"""
|
|
@@ -70,6 +70,7 @@ class ExecutionApplicationConfig(FidesSchema):
|
|
|
70
70
|
subject_identity_verification_required: Optional[bool] = None
|
|
71
71
|
disable_consent_identity_verification: Optional[bool] = None
|
|
72
72
|
require_manual_request_approval: Optional[bool] = None
|
|
73
|
+
memory_watchdog_enabled: Optional[bool] = None
|
|
73
74
|
sql_dry_run: Optional[SqlDryRunMode] = None
|
|
74
75
|
|
|
75
76
|
model_config = ConfigDict(use_enum_values=True, extra="forbid")
|
|
@@ -16,6 +16,15 @@ class PeriodicIntegrationFrequency(Enum):
|
|
|
16
16
|
daily = "daily"
|
|
17
17
|
weekly = "weekly"
|
|
18
18
|
monthly = "monthly"
|
|
19
|
+
not_scheduled = "not scheduled"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
PERIODIC_INTEGRATION_FREQUENCY_TO_DAYS = {
|
|
23
|
+
PeriodicIntegrationFrequency.daily.value: 1,
|
|
24
|
+
PeriodicIntegrationFrequency.weekly.value: 7,
|
|
25
|
+
PeriodicIntegrationFrequency.monthly.value: 30,
|
|
26
|
+
PeriodicIntegrationFrequency.not_scheduled.value: -1, # negative value to indicate that the integration is not scheduled
|
|
27
|
+
}
|
|
19
28
|
|
|
20
29
|
|
|
21
30
|
class DatahubSchema(ConnectionConfigSecretsSchema):
|
|
@@ -30,7 +39,7 @@ class DatahubSchema(ConnectionConfigSecretsSchema):
|
|
|
30
39
|
)
|
|
31
40
|
frequency: PeriodicIntegrationFrequency = Field(
|
|
32
41
|
title="Frequency",
|
|
33
|
-
description="The frequency at which the integration should run. Available options are daily, weekly, and
|
|
42
|
+
description="The frequency at which the integration should run. Available options are daily, weekly, monthly, and not scheduled.",
|
|
34
43
|
)
|
|
35
44
|
glossary_node: str = Field(
|
|
36
45
|
title="Glossary node",
|
|
@@ -144,3 +144,17 @@ class BaseConnector(Generic[DB_CONNECTOR_TYPE], ABC):
|
|
|
144
144
|
# Defaulting to true for now so we can keep the default behavior and
|
|
145
145
|
# incrementally determine the need for primary keys across all connectors
|
|
146
146
|
return True
|
|
147
|
+
|
|
148
|
+
def get_qualified_table_name(self, node: ExecutionNode) -> str:
|
|
149
|
+
"""
|
|
150
|
+
Get the fully qualified table name for the given execution node.
|
|
151
|
+
"""
|
|
152
|
+
raise NotImplementedError(
|
|
153
|
+
"get_qualified_table_name is not implemented by this connector"
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def table_exists(self, qualified_table_name: str) -> bool:
|
|
157
|
+
"""
|
|
158
|
+
Check if a table exists in the datastore.
|
|
159
|
+
"""
|
|
160
|
+
raise NotImplementedError("table_exists is not implemented by this connector")
|
|
@@ -70,6 +70,11 @@ class BigQueryConnector(SQLConnector):
|
|
|
70
70
|
node, SQLConnector.get_namespace_meta(db, node.address.dataset)
|
|
71
71
|
)
|
|
72
72
|
|
|
73
|
+
def get_qualified_table_name(self, node: ExecutionNode) -> str:
|
|
74
|
+
"""Get fully qualified BigQuery table name using existing query config logic"""
|
|
75
|
+
query_config = self.query_config(node)
|
|
76
|
+
return query_config.generate_table_name()
|
|
77
|
+
|
|
73
78
|
def partitioned_retrieval(
|
|
74
79
|
self,
|
|
75
80
|
query_config: SQLQueryConfig,
|
|
@@ -93,7 +93,7 @@ class BigQueryQueryConfig(QueryStringWithoutTuplesOverrideQueryConfig):
|
|
|
93
93
|
|
|
94
94
|
return where_clauses
|
|
95
95
|
|
|
96
|
-
def
|
|
96
|
+
def generate_table_name(self) -> str:
|
|
97
97
|
"""
|
|
98
98
|
Prepends the dataset ID and project ID to the base table name
|
|
99
99
|
if the BigQuery namespace meta is provided.
|
|
@@ -116,7 +116,7 @@ class BigQueryQueryConfig(QueryStringWithoutTuplesOverrideQueryConfig):
|
|
|
116
116
|
Returns a query string with backtick formatting for tables that have the same names as
|
|
117
117
|
BigQuery reserved words.
|
|
118
118
|
"""
|
|
119
|
-
return f'SELECT {field_list} FROM `{self.
|
|
119
|
+
return f'SELECT {field_list} FROM `{self.generate_table_name()}` WHERE ({" OR ".join(clauses)})'
|
|
120
120
|
|
|
121
121
|
def generate_masking_stmt(
|
|
122
122
|
self,
|
|
@@ -197,7 +197,7 @@ class BigQueryQueryConfig(QueryStringWithoutTuplesOverrideQueryConfig):
|
|
|
197
197
|
)
|
|
198
198
|
return []
|
|
199
199
|
|
|
200
|
-
table = Table(self.
|
|
200
|
+
table = Table(self.generate_table_name(), MetaData(bind=client), autoload=True)
|
|
201
201
|
where_clauses: List[ColumnElement] = [
|
|
202
202
|
table.c[k] == v for k, v in non_empty_reference_field_keys.items()
|
|
203
203
|
]
|
|
@@ -256,7 +256,7 @@ class BigQueryQueryConfig(QueryStringWithoutTuplesOverrideQueryConfig):
|
|
|
256
256
|
)
|
|
257
257
|
return []
|
|
258
258
|
|
|
259
|
-
table = Table(self.
|
|
259
|
+
table = Table(self.generate_table_name(), MetaData(bind=client), autoload=True)
|
|
260
260
|
|
|
261
261
|
# Build individual reference clauses
|
|
262
262
|
where_clauses: List[ColumnElement] = []
|
|
@@ -30,7 +30,7 @@ class SnowflakeQueryConfig(SQLQueryConfig):
|
|
|
30
30
|
"""Returns field names in clauses surrounded by quotation marks as required by Snowflake syntax."""
|
|
31
31
|
return f'"{string_path}" {operator} (:{operand})'
|
|
32
32
|
|
|
33
|
-
def
|
|
33
|
+
def generate_table_name(self) -> str:
|
|
34
34
|
"""
|
|
35
35
|
Prepends the dataset name and schema to the base table name
|
|
36
36
|
if the Snowflake namespace meta is provided.
|
|
@@ -57,7 +57,7 @@ class SnowflakeQueryConfig(SQLQueryConfig):
|
|
|
57
57
|
clauses: List[str],
|
|
58
58
|
) -> str:
|
|
59
59
|
"""Returns a query string with double quotation mark formatting as required by Snowflake syntax."""
|
|
60
|
-
return f'SELECT {field_list} FROM {self.
|
|
60
|
+
return f'SELECT {field_list} FROM {self.generate_table_name()} WHERE ({" OR ".join(clauses)})'
|
|
61
61
|
|
|
62
62
|
def format_key_map_for_update_stmt(self, param_map: Dict[str, Any]) -> List[str]:
|
|
63
63
|
"""Adds the appropriate formatting for update statements in this datastore."""
|
|
@@ -69,4 +69,4 @@ class SnowflakeQueryConfig(SQLQueryConfig):
|
|
|
69
69
|
where_clauses: List[str],
|
|
70
70
|
) -> str:
|
|
71
71
|
"""Returns a parameterized update statement in Snowflake dialect."""
|
|
72
|
-
return f'UPDATE {self.
|
|
72
|
+
return f'UPDATE {self.generate_table_name()} SET {", ".join(update_clauses)} WHERE {" AND ".join(where_clauses)}'
|
|
@@ -3,11 +3,11 @@ from typing import Any, Dict, Union
|
|
|
3
3
|
from cryptography.hazmat.backends import default_backend
|
|
4
4
|
from cryptography.hazmat.primitives import serialization
|
|
5
5
|
from snowflake.sqlalchemy import URL as Snowflake_URL
|
|
6
|
+
from sqlalchemy import text
|
|
6
7
|
from sqlalchemy.orm import Session
|
|
7
8
|
|
|
8
9
|
from fides.api.graph.execution import ExecutionNode
|
|
9
10
|
from fides.api.schemas.connection_configuration import SnowflakeSchema
|
|
10
|
-
from fides.api.service.connectors.query_configs.query_config import SQLQueryConfig
|
|
11
11
|
from fides.api.service.connectors.query_configs.snowflake_query_config import (
|
|
12
12
|
SnowflakeQueryConfig,
|
|
13
13
|
)
|
|
@@ -69,10 +69,63 @@ class SnowflakeConnector(SQLConnector):
|
|
|
69
69
|
connect_args["private_key"] = private_key
|
|
70
70
|
return connect_args
|
|
71
71
|
|
|
72
|
-
def query_config(self, node: ExecutionNode) ->
|
|
72
|
+
def query_config(self, node: ExecutionNode) -> SnowflakeQueryConfig:
|
|
73
73
|
"""Query wrapper corresponding to the input execution_node."""
|
|
74
74
|
|
|
75
75
|
db: Session = Session.object_session(self.configuration)
|
|
76
76
|
return SnowflakeQueryConfig(
|
|
77
77
|
node, SQLConnector.get_namespace_meta(db, node.address.dataset)
|
|
78
78
|
)
|
|
79
|
+
|
|
80
|
+
def get_qualified_table_name(self, node: ExecutionNode) -> str:
|
|
81
|
+
"""Get fully qualified Snowflake table name using existing query config logic"""
|
|
82
|
+
query_config = self.query_config(node)
|
|
83
|
+
return query_config.generate_table_name()
|
|
84
|
+
|
|
85
|
+
def table_exists(self, qualified_table_name: str) -> bool:
|
|
86
|
+
"""
|
|
87
|
+
Check if table exists in Snowflake using the proper three-part naming convention.
|
|
88
|
+
|
|
89
|
+
Snowflake supports database.schema.table naming, and the generic SQLConnector
|
|
90
|
+
table_exists method doesn't handle quoted identifiers properly.
|
|
91
|
+
"""
|
|
92
|
+
try:
|
|
93
|
+
client = self.create_client()
|
|
94
|
+
with client.connect() as connection:
|
|
95
|
+
# Remove quotes and split the parts
|
|
96
|
+
clean_name = qualified_table_name.replace('"', "")
|
|
97
|
+
parts = clean_name.split(".")
|
|
98
|
+
|
|
99
|
+
if len(parts) == 1:
|
|
100
|
+
# Simple table name - use current schema
|
|
101
|
+
table_name = parts[0]
|
|
102
|
+
result = connection.execute(text(f'DESC TABLE "{table_name}"'))
|
|
103
|
+
elif len(parts) == 2:
|
|
104
|
+
# schema.table format
|
|
105
|
+
schema_name, table_name = parts
|
|
106
|
+
result = connection.execute(
|
|
107
|
+
text(f'DESC TABLE "{schema_name}"."{table_name}"')
|
|
108
|
+
)
|
|
109
|
+
elif len(parts) >= 3:
|
|
110
|
+
# database.schema.table format
|
|
111
|
+
database_name, schema_name, table_name = (
|
|
112
|
+
parts[-3],
|
|
113
|
+
parts[-2],
|
|
114
|
+
parts[-1],
|
|
115
|
+
)
|
|
116
|
+
# Use the database.schema.table format
|
|
117
|
+
result = connection.execute(
|
|
118
|
+
text(
|
|
119
|
+
f'DESC TABLE "{database_name}"."{schema_name}"."{table_name}"'
|
|
120
|
+
)
|
|
121
|
+
)
|
|
122
|
+
else:
|
|
123
|
+
return False
|
|
124
|
+
|
|
125
|
+
# If we get here without an exception, the table exists
|
|
126
|
+
result.close()
|
|
127
|
+
return True
|
|
128
|
+
|
|
129
|
+
except Exception:
|
|
130
|
+
# Table doesn't exist or other error
|
|
131
|
+
return False
|
|
@@ -6,7 +6,7 @@ import paramiko
|
|
|
6
6
|
import sshtunnel # type: ignore
|
|
7
7
|
from aiohttp.client_exceptions import ClientResponseError
|
|
8
8
|
from loguru import logger
|
|
9
|
-
from sqlalchemy import Column, select
|
|
9
|
+
from sqlalchemy import Column, inspect, select
|
|
10
10
|
from sqlalchemy.dialects.postgresql import JSONB
|
|
11
11
|
from sqlalchemy.engine import ( # type: ignore
|
|
12
12
|
Connection,
|
|
@@ -22,6 +22,7 @@ from sqlalchemy.sql.elements import TextClause
|
|
|
22
22
|
from fides.api.common_exceptions import (
|
|
23
23
|
ConnectionException,
|
|
24
24
|
SSHTunnelConfigNotFoundException,
|
|
25
|
+
TableNotFound,
|
|
25
26
|
)
|
|
26
27
|
from fides.api.graph.execution import ExecutionNode
|
|
27
28
|
from fides.api.models.connectionconfig import ConnectionConfig, ConnectionTestStatus
|
|
@@ -189,14 +190,28 @@ class SQLConnector(BaseConnector[Engine]):
|
|
|
189
190
|
|
|
190
191
|
logger.info("Starting data retrieval for {}", node.address)
|
|
191
192
|
with client.connect() as connection:
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
193
|
+
try:
|
|
194
|
+
self.set_schema(connection)
|
|
195
|
+
if (
|
|
196
|
+
query_config.partitioning
|
|
197
|
+
): # only BigQuery supports partitioning, for now
|
|
198
|
+
return self.partitioned_retrieval(query_config, connection, stmt)
|
|
199
|
+
|
|
200
|
+
results = connection.execute(stmt)
|
|
201
|
+
return self.cursor_result_to_rows(results)
|
|
202
|
+
except Exception as exc:
|
|
203
|
+
# Check if table exists using qualified table name
|
|
204
|
+
qualified_table_name = self.get_qualified_table_name(node)
|
|
205
|
+
if not self.table_exists(qualified_table_name):
|
|
206
|
+
# Central decision point - will raise TableNotFound or ConnectionException
|
|
207
|
+
self.handle_table_not_found(
|
|
208
|
+
node=node,
|
|
209
|
+
table_name=qualified_table_name,
|
|
210
|
+
operation_context="data retrieval",
|
|
211
|
+
original_exception=exc,
|
|
212
|
+
)
|
|
213
|
+
# Table exists or can't check - re-raise original exception
|
|
214
|
+
raise
|
|
200
215
|
|
|
201
216
|
def mask_data(
|
|
202
217
|
self,
|
|
@@ -290,3 +305,86 @@ class SQLConnector(BaseConnector[Engine]):
|
|
|
290
305
|
raise NotImplementedError(
|
|
291
306
|
"Partitioned retrieval is only supported for BigQuery currently!"
|
|
292
307
|
)
|
|
308
|
+
|
|
309
|
+
def get_qualified_table_name(self, node: ExecutionNode) -> str:
|
|
310
|
+
"""
|
|
311
|
+
Get the fully qualified table name for this database.
|
|
312
|
+
|
|
313
|
+
Default: Returns the simple collection name
|
|
314
|
+
Override: Database-specific connectors can implement namespace resolution
|
|
315
|
+
"""
|
|
316
|
+
return node.collection.name
|
|
317
|
+
|
|
318
|
+
def table_exists(self, qualified_table_name: str) -> bool:
|
|
319
|
+
"""
|
|
320
|
+
Check if table exists using SQLAlchemy introspection.
|
|
321
|
+
|
|
322
|
+
This is a generic implementation that should work for most SQL databases.
|
|
323
|
+
Override: Connectors can implement database-specific table existence checking
|
|
324
|
+
"""
|
|
325
|
+
try:
|
|
326
|
+
client = self.create_client()
|
|
327
|
+
with client.connect() as connection:
|
|
328
|
+
inspector = inspect(connection)
|
|
329
|
+
|
|
330
|
+
# For simple table names
|
|
331
|
+
if "." not in qualified_table_name:
|
|
332
|
+
return inspector.has_table(qualified_table_name)
|
|
333
|
+
|
|
334
|
+
# For qualified names like schema.table or database.schema.table
|
|
335
|
+
parts = qualified_table_name.split(".")
|
|
336
|
+
|
|
337
|
+
if len(parts) == 2:
|
|
338
|
+
# schema.table format
|
|
339
|
+
schema_name, table_name = parts
|
|
340
|
+
return inspector.has_table(table_name, schema=schema_name)
|
|
341
|
+
|
|
342
|
+
if len(parts) >= 3:
|
|
343
|
+
# database.schema.table format (use schema.table)
|
|
344
|
+
schema_name, table_name = parts[-2], parts[-1]
|
|
345
|
+
return inspector.has_table(table_name, schema=schema_name)
|
|
346
|
+
|
|
347
|
+
# Fallback for unexpected format
|
|
348
|
+
return inspector.has_table(qualified_table_name)
|
|
349
|
+
|
|
350
|
+
except Exception as exc:
|
|
351
|
+
# Graceful fallback - if we can't check, assume table exists
|
|
352
|
+
# to preserve existing behavior for connectors that don't implement this
|
|
353
|
+
logger.error("Unable to check if table exists, assuming it does: {}", exc)
|
|
354
|
+
return True
|
|
355
|
+
|
|
356
|
+
def handle_table_not_found(
|
|
357
|
+
self,
|
|
358
|
+
node: ExecutionNode,
|
|
359
|
+
table_name: str,
|
|
360
|
+
operation_context: str,
|
|
361
|
+
original_exception: Optional[Exception] = None,
|
|
362
|
+
) -> None:
|
|
363
|
+
"""
|
|
364
|
+
Central decision point for table-not-found scenarios.
|
|
365
|
+
|
|
366
|
+
Raises TableNotFound (for collection skipping) or ConnectionException (for hard errors).
|
|
367
|
+
The raised exception will be caught by the @retry decorator in graph_task.py.
|
|
368
|
+
|
|
369
|
+
Args:
|
|
370
|
+
node: The ExecutionNode being processed
|
|
371
|
+
table_name: Name of the missing table
|
|
372
|
+
operation_context: Context like "data retrieval" or "data masking"
|
|
373
|
+
original_exception: The original exception that triggered this check
|
|
374
|
+
"""
|
|
375
|
+
if node.has_outgoing_dependencies():
|
|
376
|
+
# Collection has dependencies - cannot skip safely
|
|
377
|
+
error_msg = (
|
|
378
|
+
f"Table '{table_name}' did not exist during {operation_context}. "
|
|
379
|
+
f"Cannot skip collection '{node.address}' because other collections depend on it."
|
|
380
|
+
)
|
|
381
|
+
if original_exception:
|
|
382
|
+
raise ConnectionException(error_msg) from original_exception
|
|
383
|
+
raise ConnectionException(error_msg)
|
|
384
|
+
|
|
385
|
+
# Safe to skip - raise TableNotFound for @retry decorator to catch
|
|
386
|
+
skip_msg = f"Table '{table_name}' did not exist during {operation_context}."
|
|
387
|
+
if original_exception:
|
|
388
|
+
raise TableNotFound(skip_msg) from original_exception
|
|
389
|
+
|
|
390
|
+
raise TableNotFound(skip_msg)
|
|
@@ -80,6 +80,7 @@ from fides.api.util.cache import get_all_masking_secret_keys
|
|
|
80
80
|
from fides.api.util.collection_util import Row
|
|
81
81
|
from fides.api.util.logger import Pii, _log_exception, _log_warning
|
|
82
82
|
from fides.api.util.logger_context_utils import LoggerContextKeys, log_context
|
|
83
|
+
from fides.api.util.memory_watchdog import memory_limiter
|
|
83
84
|
from fides.common.api.v1.urn_registry import (
|
|
84
85
|
PRIVACY_REQUEST_TRANSFER_TO_PARENT,
|
|
85
86
|
V1_URL_PREFIX,
|
|
@@ -358,8 +359,8 @@ def upload_and_save_access_results( # pylint: disable=R0912
|
|
|
358
359
|
|
|
359
360
|
|
|
360
361
|
@celery_app.task(base=DatabaseTask, bind=True)
|
|
361
|
-
|
|
362
|
-
|
|
362
|
+
@memory_limiter
|
|
363
|
+
@log_context(capture_args={"privacy_request_id": LoggerContextKeys.privacy_request_id})
|
|
363
364
|
def run_privacy_request(
|
|
364
365
|
self: DatabaseTask,
|
|
365
366
|
privacy_request_id: str,
|