ethyca-fides 2.67.0rc1__py2.py3-none-any.whl → 2.67.1b0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ethyca-fides might be problematic. Click here for more details.
- {ethyca_fides-2.67.0rc1.dist-info → ethyca_fides-2.67.1b0.dist-info}/METADATA +1 -1
- {ethyca_fides-2.67.0rc1.dist-info → ethyca_fides-2.67.1b0.dist-info}/RECORD +110 -109
- fides/_version.py +3 -3
- fides/api/common_exceptions.py +4 -0
- fides/api/graph/execution.py +16 -0
- fides/api/models/privacy_request/privacy_request.py +33 -13
- fides/api/schemas/application_config.py +1 -0
- fides/api/schemas/connection_configuration/connection_secrets_datahub.py +10 -1
- fides/api/service/connectors/base_connector.py +14 -0
- fides/api/service/connectors/bigquery_connector.py +5 -0
- fides/api/service/connectors/query_configs/bigquery_query_config.py +4 -4
- fides/api/service/connectors/query_configs/snowflake_query_config.py +3 -3
- fides/api/service/connectors/snowflake_connector.py +55 -2
- fides/api/service/connectors/sql_connector.py +107 -9
- fides/api/service/privacy_request/request_runner_service.py +3 -2
- fides/api/service/privacy_request/request_service.py +173 -32
- fides/api/task/execute_request_tasks.py +4 -0
- fides/api/task/graph_task.py +48 -2
- fides/api/util/cache.py +56 -0
- fides/api/util/memory_watchdog.py +286 -0
- fides/config/execution_settings.py +8 -0
- fides/config/utils.py +1 -0
- fides/ui-build/static/admin/404.html +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/{_app-5c3a63bb1697f34c.js → _app-750d6bd16c971bb9.js} +1 -1
- fides/ui-build/static/admin/add-systems/manual.html +1 -1
- fides/ui-build/static/admin/add-systems/multiple.html +1 -1
- fides/ui-build/static/admin/add-systems.html +1 -1
- fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
- fides/ui-build/static/admin/consent/configure.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
- fides/ui-build/static/admin/consent/properties.html +1 -1
- fides/ui-build/static/admin/consent/reporting.html +1 -1
- fides/ui-build/static/admin/consent.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
- fides/ui-build/static/admin/data-catalog.html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
- fides/ui-build/static/admin/data-discovery/activity.html +1 -1
- fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/detection.html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
- fides/ui-build/static/admin/datamap.html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
- fides/ui-build/static/admin/dataset/new.html +1 -1
- fides/ui-build/static/admin/dataset.html +1 -1
- fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
- fides/ui-build/static/admin/datastore-connection/new.html +1 -1
- fides/ui-build/static/admin/datastore-connection.html +1 -1
- fides/ui-build/static/admin/index.html +1 -1
- fides/ui-build/static/admin/integrations/[id].html +1 -1
- fides/ui-build/static/admin/integrations.html +1 -1
- fides/ui-build/static/admin/login/[provider].html +1 -1
- fides/ui-build/static/admin/login.html +1 -1
- fides/ui-build/static/admin/messaging/[id].html +1 -1
- fides/ui-build/static/admin/messaging/add-template.html +1 -1
- fides/ui-build/static/admin/messaging.html +1 -1
- fides/ui-build/static/admin/poc/ant-components.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
- fides/ui-build/static/admin/poc/forms.html +1 -1
- fides/ui-build/static/admin/poc/table-migration.html +1 -1
- fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
- fides/ui-build/static/admin/privacy-requests.html +1 -1
- fides/ui-build/static/admin/properties/[id].html +1 -1
- fides/ui-build/static/admin/properties/add-property.html +1 -1
- fides/ui-build/static/admin/properties.html +1 -1
- fides/ui-build/static/admin/reporting/datamap.html +1 -1
- fides/ui-build/static/admin/settings/about/alpha.html +1 -1
- fides/ui-build/static/admin/settings/about.html +1 -1
- fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
- fides/ui-build/static/admin/settings/consent.html +1 -1
- fides/ui-build/static/admin/settings/custom-fields.html +1 -1
- fides/ui-build/static/admin/settings/domain-records.html +1 -1
- fides/ui-build/static/admin/settings/domains.html +1 -1
- fides/ui-build/static/admin/settings/email-templates.html +1 -1
- fides/ui-build/static/admin/settings/locations.html +1 -1
- fides/ui-build/static/admin/settings/organization.html +1 -1
- fides/ui-build/static/admin/settings/regulations.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id].html +1 -1
- fides/ui-build/static/admin/systems.html +1 -1
- fides/ui-build/static/admin/taxonomy.html +1 -1
- fides/ui-build/static/admin/user-management/new.html +1 -1
- fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
- fides/ui-build/static/admin/user-management.html +1 -1
- {ethyca_fides-2.67.0rc1.dist-info → ethyca_fides-2.67.1b0.dist-info}/WHEEL +0 -0
- {ethyca_fides-2.67.0rc1.dist-info → ethyca_fides-2.67.1b0.dist-info}/entry_points.txt +0 -0
- {ethyca_fides-2.67.0rc1.dist-info → ethyca_fides-2.67.1b0.dist-info}/licenses/LICENSE +0 -0
- {ethyca_fides-2.67.0rc1.dist-info → ethyca_fides-2.67.1b0.dist-info}/top_level.txt +0 -0
- /fides/ui-build/static/admin/_next/static/{ZIM71ZcqBBeTYHc-MN9_n → v1eqRIfzld3di00TTnVM9}/_buildManifest.js +0 -0
- /fides/ui-build/static/admin/_next/static/{ZIM71ZcqBBeTYHc-MN9_n → v1eqRIfzld3di00TTnVM9}/_ssgManifest.js +0 -0
|
@@ -93,7 +93,7 @@ class BigQueryQueryConfig(QueryStringWithoutTuplesOverrideQueryConfig):
|
|
|
93
93
|
|
|
94
94
|
return where_clauses
|
|
95
95
|
|
|
96
|
-
def
|
|
96
|
+
def generate_table_name(self) -> str:
|
|
97
97
|
"""
|
|
98
98
|
Prepends the dataset ID and project ID to the base table name
|
|
99
99
|
if the BigQuery namespace meta is provided.
|
|
@@ -116,7 +116,7 @@ class BigQueryQueryConfig(QueryStringWithoutTuplesOverrideQueryConfig):
|
|
|
116
116
|
Returns a query string with backtick formatting for tables that have the same names as
|
|
117
117
|
BigQuery reserved words.
|
|
118
118
|
"""
|
|
119
|
-
return f'SELECT {field_list} FROM `{self.
|
|
119
|
+
return f'SELECT {field_list} FROM `{self.generate_table_name()}` WHERE ({" OR ".join(clauses)})'
|
|
120
120
|
|
|
121
121
|
def generate_masking_stmt(
|
|
122
122
|
self,
|
|
@@ -197,7 +197,7 @@ class BigQueryQueryConfig(QueryStringWithoutTuplesOverrideQueryConfig):
|
|
|
197
197
|
)
|
|
198
198
|
return []
|
|
199
199
|
|
|
200
|
-
table = Table(self.
|
|
200
|
+
table = Table(self.generate_table_name(), MetaData(bind=client), autoload=True)
|
|
201
201
|
where_clauses: List[ColumnElement] = [
|
|
202
202
|
table.c[k] == v for k, v in non_empty_reference_field_keys.items()
|
|
203
203
|
]
|
|
@@ -256,7 +256,7 @@ class BigQueryQueryConfig(QueryStringWithoutTuplesOverrideQueryConfig):
|
|
|
256
256
|
)
|
|
257
257
|
return []
|
|
258
258
|
|
|
259
|
-
table = Table(self.
|
|
259
|
+
table = Table(self.generate_table_name(), MetaData(bind=client), autoload=True)
|
|
260
260
|
|
|
261
261
|
# Build individual reference clauses
|
|
262
262
|
where_clauses: List[ColumnElement] = []
|
|
@@ -30,7 +30,7 @@ class SnowflakeQueryConfig(SQLQueryConfig):
|
|
|
30
30
|
"""Returns field names in clauses surrounded by quotation marks as required by Snowflake syntax."""
|
|
31
31
|
return f'"{string_path}" {operator} (:{operand})'
|
|
32
32
|
|
|
33
|
-
def
|
|
33
|
+
def generate_table_name(self) -> str:
|
|
34
34
|
"""
|
|
35
35
|
Prepends the dataset name and schema to the base table name
|
|
36
36
|
if the Snowflake namespace meta is provided.
|
|
@@ -57,7 +57,7 @@ class SnowflakeQueryConfig(SQLQueryConfig):
|
|
|
57
57
|
clauses: List[str],
|
|
58
58
|
) -> str:
|
|
59
59
|
"""Returns a query string with double quotation mark formatting as required by Snowflake syntax."""
|
|
60
|
-
return f'SELECT {field_list} FROM {self.
|
|
60
|
+
return f'SELECT {field_list} FROM {self.generate_table_name()} WHERE ({" OR ".join(clauses)})'
|
|
61
61
|
|
|
62
62
|
def format_key_map_for_update_stmt(self, param_map: Dict[str, Any]) -> List[str]:
|
|
63
63
|
"""Adds the appropriate formatting for update statements in this datastore."""
|
|
@@ -69,4 +69,4 @@ class SnowflakeQueryConfig(SQLQueryConfig):
|
|
|
69
69
|
where_clauses: List[str],
|
|
70
70
|
) -> str:
|
|
71
71
|
"""Returns a parameterized update statement in Snowflake dialect."""
|
|
72
|
-
return f'UPDATE {self.
|
|
72
|
+
return f'UPDATE {self.generate_table_name()} SET {", ".join(update_clauses)} WHERE {" AND ".join(where_clauses)}'
|
|
@@ -3,11 +3,11 @@ from typing import Any, Dict, Union
|
|
|
3
3
|
from cryptography.hazmat.backends import default_backend
|
|
4
4
|
from cryptography.hazmat.primitives import serialization
|
|
5
5
|
from snowflake.sqlalchemy import URL as Snowflake_URL
|
|
6
|
+
from sqlalchemy import text
|
|
6
7
|
from sqlalchemy.orm import Session
|
|
7
8
|
|
|
8
9
|
from fides.api.graph.execution import ExecutionNode
|
|
9
10
|
from fides.api.schemas.connection_configuration import SnowflakeSchema
|
|
10
|
-
from fides.api.service.connectors.query_configs.query_config import SQLQueryConfig
|
|
11
11
|
from fides.api.service.connectors.query_configs.snowflake_query_config import (
|
|
12
12
|
SnowflakeQueryConfig,
|
|
13
13
|
)
|
|
@@ -69,10 +69,63 @@ class SnowflakeConnector(SQLConnector):
|
|
|
69
69
|
connect_args["private_key"] = private_key
|
|
70
70
|
return connect_args
|
|
71
71
|
|
|
72
|
-
def query_config(self, node: ExecutionNode) ->
|
|
72
|
+
def query_config(self, node: ExecutionNode) -> SnowflakeQueryConfig:
|
|
73
73
|
"""Query wrapper corresponding to the input execution_node."""
|
|
74
74
|
|
|
75
75
|
db: Session = Session.object_session(self.configuration)
|
|
76
76
|
return SnowflakeQueryConfig(
|
|
77
77
|
node, SQLConnector.get_namespace_meta(db, node.address.dataset)
|
|
78
78
|
)
|
|
79
|
+
|
|
80
|
+
def get_qualified_table_name(self, node: ExecutionNode) -> str:
|
|
81
|
+
"""Get fully qualified Snowflake table name using existing query config logic"""
|
|
82
|
+
query_config = self.query_config(node)
|
|
83
|
+
return query_config.generate_table_name()
|
|
84
|
+
|
|
85
|
+
def table_exists(self, qualified_table_name: str) -> bool:
|
|
86
|
+
"""
|
|
87
|
+
Check if table exists in Snowflake using the proper three-part naming convention.
|
|
88
|
+
|
|
89
|
+
Snowflake supports database.schema.table naming, and the generic SQLConnector
|
|
90
|
+
table_exists method doesn't handle quoted identifiers properly.
|
|
91
|
+
"""
|
|
92
|
+
try:
|
|
93
|
+
client = self.create_client()
|
|
94
|
+
with client.connect() as connection:
|
|
95
|
+
# Remove quotes and split the parts
|
|
96
|
+
clean_name = qualified_table_name.replace('"', "")
|
|
97
|
+
parts = clean_name.split(".")
|
|
98
|
+
|
|
99
|
+
if len(parts) == 1:
|
|
100
|
+
# Simple table name - use current schema
|
|
101
|
+
table_name = parts[0]
|
|
102
|
+
result = connection.execute(text(f'DESC TABLE "{table_name}"'))
|
|
103
|
+
elif len(parts) == 2:
|
|
104
|
+
# schema.table format
|
|
105
|
+
schema_name, table_name = parts
|
|
106
|
+
result = connection.execute(
|
|
107
|
+
text(f'DESC TABLE "{schema_name}"."{table_name}"')
|
|
108
|
+
)
|
|
109
|
+
elif len(parts) >= 3:
|
|
110
|
+
# database.schema.table format
|
|
111
|
+
database_name, schema_name, table_name = (
|
|
112
|
+
parts[-3],
|
|
113
|
+
parts[-2],
|
|
114
|
+
parts[-1],
|
|
115
|
+
)
|
|
116
|
+
# Use the database.schema.table format
|
|
117
|
+
result = connection.execute(
|
|
118
|
+
text(
|
|
119
|
+
f'DESC TABLE "{database_name}"."{schema_name}"."{table_name}"'
|
|
120
|
+
)
|
|
121
|
+
)
|
|
122
|
+
else:
|
|
123
|
+
return False
|
|
124
|
+
|
|
125
|
+
# If we get here without an exception, the table exists
|
|
126
|
+
result.close()
|
|
127
|
+
return True
|
|
128
|
+
|
|
129
|
+
except Exception:
|
|
130
|
+
# Table doesn't exist or other error
|
|
131
|
+
return False
|
|
@@ -6,7 +6,7 @@ import paramiko
|
|
|
6
6
|
import sshtunnel # type: ignore
|
|
7
7
|
from aiohttp.client_exceptions import ClientResponseError
|
|
8
8
|
from loguru import logger
|
|
9
|
-
from sqlalchemy import Column, select
|
|
9
|
+
from sqlalchemy import Column, inspect, select
|
|
10
10
|
from sqlalchemy.dialects.postgresql import JSONB
|
|
11
11
|
from sqlalchemy.engine import ( # type: ignore
|
|
12
12
|
Connection,
|
|
@@ -22,6 +22,7 @@ from sqlalchemy.sql.elements import TextClause
|
|
|
22
22
|
from fides.api.common_exceptions import (
|
|
23
23
|
ConnectionException,
|
|
24
24
|
SSHTunnelConfigNotFoundException,
|
|
25
|
+
TableNotFound,
|
|
25
26
|
)
|
|
26
27
|
from fides.api.graph.execution import ExecutionNode
|
|
27
28
|
from fides.api.models.connectionconfig import ConnectionConfig, ConnectionTestStatus
|
|
@@ -189,14 +190,28 @@ class SQLConnector(BaseConnector[Engine]):
|
|
|
189
190
|
|
|
190
191
|
logger.info("Starting data retrieval for {}", node.address)
|
|
191
192
|
with client.connect() as connection:
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
193
|
+
try:
|
|
194
|
+
self.set_schema(connection)
|
|
195
|
+
if (
|
|
196
|
+
query_config.partitioning
|
|
197
|
+
): # only BigQuery supports partitioning, for now
|
|
198
|
+
return self.partitioned_retrieval(query_config, connection, stmt)
|
|
199
|
+
|
|
200
|
+
results = connection.execute(stmt)
|
|
201
|
+
return self.cursor_result_to_rows(results)
|
|
202
|
+
except Exception as exc:
|
|
203
|
+
# Check if table exists using qualified table name
|
|
204
|
+
qualified_table_name = self.get_qualified_table_name(node)
|
|
205
|
+
if not self.table_exists(qualified_table_name):
|
|
206
|
+
# Central decision point - will raise TableNotFound or ConnectionException
|
|
207
|
+
self.handle_table_not_found(
|
|
208
|
+
node=node,
|
|
209
|
+
table_name=qualified_table_name,
|
|
210
|
+
operation_context="data retrieval",
|
|
211
|
+
original_exception=exc,
|
|
212
|
+
)
|
|
213
|
+
# Table exists or can't check - re-raise original exception
|
|
214
|
+
raise
|
|
200
215
|
|
|
201
216
|
def mask_data(
|
|
202
217
|
self,
|
|
@@ -290,3 +305,86 @@ class SQLConnector(BaseConnector[Engine]):
|
|
|
290
305
|
raise NotImplementedError(
|
|
291
306
|
"Partitioned retrieval is only supported for BigQuery currently!"
|
|
292
307
|
)
|
|
308
|
+
|
|
309
|
+
def get_qualified_table_name(self, node: ExecutionNode) -> str:
|
|
310
|
+
"""
|
|
311
|
+
Get the fully qualified table name for this database.
|
|
312
|
+
|
|
313
|
+
Default: Returns the simple collection name
|
|
314
|
+
Override: Database-specific connectors can implement namespace resolution
|
|
315
|
+
"""
|
|
316
|
+
return node.collection.name
|
|
317
|
+
|
|
318
|
+
def table_exists(self, qualified_table_name: str) -> bool:
|
|
319
|
+
"""
|
|
320
|
+
Check if table exists using SQLAlchemy introspection.
|
|
321
|
+
|
|
322
|
+
This is a generic implementation that should work for most SQL databases.
|
|
323
|
+
Override: Connectors can implement database-specific table existence checking
|
|
324
|
+
"""
|
|
325
|
+
try:
|
|
326
|
+
client = self.create_client()
|
|
327
|
+
with client.connect() as connection:
|
|
328
|
+
inspector = inspect(connection)
|
|
329
|
+
|
|
330
|
+
# For simple table names
|
|
331
|
+
if "." not in qualified_table_name:
|
|
332
|
+
return inspector.has_table(qualified_table_name)
|
|
333
|
+
|
|
334
|
+
# For qualified names like schema.table or database.schema.table
|
|
335
|
+
parts = qualified_table_name.split(".")
|
|
336
|
+
|
|
337
|
+
if len(parts) == 2:
|
|
338
|
+
# schema.table format
|
|
339
|
+
schema_name, table_name = parts
|
|
340
|
+
return inspector.has_table(table_name, schema=schema_name)
|
|
341
|
+
|
|
342
|
+
if len(parts) >= 3:
|
|
343
|
+
# database.schema.table format (use schema.table)
|
|
344
|
+
schema_name, table_name = parts[-2], parts[-1]
|
|
345
|
+
return inspector.has_table(table_name, schema=schema_name)
|
|
346
|
+
|
|
347
|
+
# Fallback for unexpected format
|
|
348
|
+
return inspector.has_table(qualified_table_name)
|
|
349
|
+
|
|
350
|
+
except Exception as exc:
|
|
351
|
+
# Graceful fallback - if we can't check, assume table exists
|
|
352
|
+
# to preserve existing behavior for connectors that don't implement this
|
|
353
|
+
logger.error("Unable to check if table exists, assuming it does: {}", exc)
|
|
354
|
+
return True
|
|
355
|
+
|
|
356
|
+
def handle_table_not_found(
|
|
357
|
+
self,
|
|
358
|
+
node: ExecutionNode,
|
|
359
|
+
table_name: str,
|
|
360
|
+
operation_context: str,
|
|
361
|
+
original_exception: Optional[Exception] = None,
|
|
362
|
+
) -> None:
|
|
363
|
+
"""
|
|
364
|
+
Central decision point for table-not-found scenarios.
|
|
365
|
+
|
|
366
|
+
Raises TableNotFound (for collection skipping) or ConnectionException (for hard errors).
|
|
367
|
+
The raised exception will be caught by the @retry decorator in graph_task.py.
|
|
368
|
+
|
|
369
|
+
Args:
|
|
370
|
+
node: The ExecutionNode being processed
|
|
371
|
+
table_name: Name of the missing table
|
|
372
|
+
operation_context: Context like "data retrieval" or "data masking"
|
|
373
|
+
original_exception: The original exception that triggered this check
|
|
374
|
+
"""
|
|
375
|
+
if node.has_outgoing_dependencies():
|
|
376
|
+
# Collection has dependencies - cannot skip safely
|
|
377
|
+
error_msg = (
|
|
378
|
+
f"Table '{table_name}' did not exist during {operation_context}. "
|
|
379
|
+
f"Cannot skip collection '{node.address}' because other collections depend on it."
|
|
380
|
+
)
|
|
381
|
+
if original_exception:
|
|
382
|
+
raise ConnectionException(error_msg) from original_exception
|
|
383
|
+
raise ConnectionException(error_msg)
|
|
384
|
+
|
|
385
|
+
# Safe to skip - raise TableNotFound for @retry decorator to catch
|
|
386
|
+
skip_msg = f"Table '{table_name}' did not exist during {operation_context}."
|
|
387
|
+
if original_exception:
|
|
388
|
+
raise TableNotFound(skip_msg) from original_exception
|
|
389
|
+
|
|
390
|
+
raise TableNotFound(skip_msg)
|
|
@@ -80,6 +80,7 @@ from fides.api.util.cache import get_all_masking_secret_keys
|
|
|
80
80
|
from fides.api.util.collection_util import Row
|
|
81
81
|
from fides.api.util.logger import Pii, _log_exception, _log_warning
|
|
82
82
|
from fides.api.util.logger_context_utils import LoggerContextKeys, log_context
|
|
83
|
+
from fides.api.util.memory_watchdog import memory_limiter
|
|
83
84
|
from fides.common.api.v1.urn_registry import (
|
|
84
85
|
PRIVACY_REQUEST_TRANSFER_TO_PARENT,
|
|
85
86
|
V1_URL_PREFIX,
|
|
@@ -358,8 +359,8 @@ def upload_and_save_access_results( # pylint: disable=R0912
|
|
|
358
359
|
|
|
359
360
|
|
|
360
361
|
@celery_app.task(base=DatabaseTask, bind=True)
|
|
361
|
-
|
|
362
|
-
|
|
362
|
+
@memory_limiter
|
|
363
|
+
@log_context(capture_args={"privacy_request_id": LoggerContextKeys.privacy_request_id})
|
|
363
364
|
def run_privacy_request(
|
|
364
365
|
self: DatabaseTask,
|
|
365
366
|
privacy_request_id: str,
|
|
@@ -3,11 +3,12 @@ from __future__ import annotations
|
|
|
3
3
|
import json
|
|
4
4
|
from asyncio import sleep
|
|
5
5
|
from datetime import datetime, timedelta
|
|
6
|
-
from typing import Any, Dict, Optional, Set
|
|
6
|
+
from typing import Any, Dict, List, Optional, Set
|
|
7
7
|
|
|
8
8
|
from httpx import AsyncClient
|
|
9
9
|
from loguru import logger
|
|
10
10
|
from sqlalchemy import text
|
|
11
|
+
from sqlalchemy.orm import Session
|
|
11
12
|
from sqlalchemy.sql.elements import TextClause
|
|
12
13
|
|
|
13
14
|
from fides.api.common_exceptions import PrivacyRequestNotFound
|
|
@@ -31,6 +32,9 @@ from fides.api.util.cache import (
|
|
|
31
32
|
celery_tasks_in_flight,
|
|
32
33
|
get_async_task_tracking_cache_key,
|
|
33
34
|
get_cache,
|
|
35
|
+
get_privacy_request_retry_count,
|
|
36
|
+
increment_privacy_request_retry_count,
|
|
37
|
+
reset_privacy_request_retry_count,
|
|
34
38
|
)
|
|
35
39
|
from fides.api.util.lock import redis_lock
|
|
36
40
|
from fides.common.api.v1.urn_registry import PRIVACY_REQUESTS, V1_URL_PREFIX
|
|
@@ -350,10 +354,17 @@ def initiate_interrupted_task_requeue_poll() -> None:
|
|
|
350
354
|
|
|
351
355
|
|
|
352
356
|
def get_cached_task_id(entity_id: str) -> Optional[str]:
|
|
353
|
-
"""Gets the cached task ID for a privacy request or request task by ID.
|
|
357
|
+
"""Gets the cached task ID for a privacy request or request task by ID.
|
|
358
|
+
|
|
359
|
+
Raises Exception if cache operations fail, allowing callers to handle cache failures appropriately.
|
|
360
|
+
"""
|
|
354
361
|
cache: FidesopsRedis = get_cache()
|
|
355
|
-
|
|
356
|
-
|
|
362
|
+
try:
|
|
363
|
+
task_id = cache.get(get_async_task_tracking_cache_key(entity_id))
|
|
364
|
+
return task_id
|
|
365
|
+
except Exception as exc:
|
|
366
|
+
logger.error(f"Failed to get cached task ID for entity {entity_id}: {exc}")
|
|
367
|
+
raise
|
|
357
368
|
|
|
358
369
|
|
|
359
370
|
REQUEUE_INTERRUPTED_TASKS_LOCK = "requeue_interrupted_tasks_lock"
|
|
@@ -393,6 +404,115 @@ def _get_task_ids_from_dsr_queue(
|
|
|
393
404
|
return queued_tasks_ids
|
|
394
405
|
|
|
395
406
|
|
|
407
|
+
def _cancel_interrupted_tasks_and_error_privacy_request(
|
|
408
|
+
db: Session, privacy_request: PrivacyRequest, error_message: Optional[str] = None
|
|
409
|
+
) -> None:
|
|
410
|
+
"""
|
|
411
|
+
Cancel all tasks associated with an interrupted privacy request and set the privacy request to error state.
|
|
412
|
+
|
|
413
|
+
This function:
|
|
414
|
+
1. Logs the error message (either provided or default)
|
|
415
|
+
2. Revokes the main privacy request task and all associated request tasks
|
|
416
|
+
3. Sets the privacy request status to error
|
|
417
|
+
4. Creates an error log entry
|
|
418
|
+
|
|
419
|
+
Args:
|
|
420
|
+
db: Database session
|
|
421
|
+
privacy_request: The privacy request to cancel and error
|
|
422
|
+
error_message: Optional error message to log. If not provided, uses default message.
|
|
423
|
+
"""
|
|
424
|
+
if error_message:
|
|
425
|
+
logger.error(error_message)
|
|
426
|
+
else:
|
|
427
|
+
logger.error(
|
|
428
|
+
f"Canceling interrupted tasks and marking privacy request {privacy_request.id} as error"
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
# Cancel all associated Celery tasks
|
|
432
|
+
privacy_request.cancel_celery_tasks()
|
|
433
|
+
|
|
434
|
+
# Set privacy request to error state using the existing method
|
|
435
|
+
try:
|
|
436
|
+
privacy_request.error_processing(db)
|
|
437
|
+
logger.info(
|
|
438
|
+
f"Privacy request {privacy_request.id} marked as error due to task interruption"
|
|
439
|
+
)
|
|
440
|
+
except Exception as exc:
|
|
441
|
+
logger.error(
|
|
442
|
+
f"Failed to mark privacy request {privacy_request.id} as error: {exc}"
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def _handle_privacy_request_requeue(
|
|
447
|
+
db: Session, privacy_request: PrivacyRequest
|
|
448
|
+
) -> None:
|
|
449
|
+
"""Handle retry logic for a privacy request - either requeue or cancel based on retry count."""
|
|
450
|
+
try:
|
|
451
|
+
# Check retry count and either requeue or cancel based on limit
|
|
452
|
+
current_retry_count = get_privacy_request_retry_count(privacy_request.id)
|
|
453
|
+
max_retries = CONFIG.execution.privacy_request_requeue_retry_count
|
|
454
|
+
|
|
455
|
+
if current_retry_count < max_retries:
|
|
456
|
+
# Increment retry count and attempt requeue
|
|
457
|
+
new_retry_count = increment_privacy_request_retry_count(privacy_request.id)
|
|
458
|
+
logger.info(
|
|
459
|
+
f"Requeuing privacy request {privacy_request.id} "
|
|
460
|
+
f"(attempt {new_retry_count}/{max_retries})"
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
from fides.service.privacy_request.privacy_request_service import ( # pylint: disable=cyclic-import
|
|
464
|
+
PrivacyRequestError,
|
|
465
|
+
_requeue_privacy_request,
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
try:
|
|
469
|
+
_requeue_privacy_request(db, privacy_request)
|
|
470
|
+
except PrivacyRequestError as exc:
|
|
471
|
+
# If requeue fails, cancel tasks and set to error state
|
|
472
|
+
_cancel_interrupted_tasks_and_error_privacy_request(
|
|
473
|
+
db, privacy_request, exc.message
|
|
474
|
+
)
|
|
475
|
+
else:
|
|
476
|
+
# Exceeded retry limit, cancel tasks and set to error state
|
|
477
|
+
_cancel_interrupted_tasks_and_error_privacy_request(
|
|
478
|
+
db,
|
|
479
|
+
privacy_request,
|
|
480
|
+
f"Privacy request {privacy_request.id} exceeded max retry attempts "
|
|
481
|
+
f"({max_retries}), canceling tasks and setting to error state",
|
|
482
|
+
)
|
|
483
|
+
# Reset retry count since we're giving up
|
|
484
|
+
reset_privacy_request_retry_count(privacy_request.id)
|
|
485
|
+
|
|
486
|
+
except Exception as cache_exc:
|
|
487
|
+
# If cache operations fail (Redis down, network issues, etc.), fail safe by canceling
|
|
488
|
+
_cancel_interrupted_tasks_and_error_privacy_request(
|
|
489
|
+
db,
|
|
490
|
+
privacy_request,
|
|
491
|
+
f"Cache operation failed for privacy request {privacy_request.id}, "
|
|
492
|
+
f"failing safe by canceling tasks: {cache_exc}",
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def _get_request_task_ids_in_progress(
|
|
497
|
+
db: Session, privacy_request_id: str
|
|
498
|
+
) -> List[str]:
|
|
499
|
+
"""Get the IDs of request tasks that are currently in progress for a privacy request."""
|
|
500
|
+
request_tasks_in_progress = (
|
|
501
|
+
db.query(RequestTask.id)
|
|
502
|
+
.filter(RequestTask.privacy_request_id == privacy_request_id)
|
|
503
|
+
.filter(
|
|
504
|
+
RequestTask.status.in_(
|
|
505
|
+
[
|
|
506
|
+
ExecutionLogStatus.in_processing,
|
|
507
|
+
ExecutionLogStatus.pending,
|
|
508
|
+
]
|
|
509
|
+
)
|
|
510
|
+
)
|
|
511
|
+
.all()
|
|
512
|
+
)
|
|
513
|
+
return [task[0] for task in request_tasks_in_progress]
|
|
514
|
+
|
|
515
|
+
|
|
396
516
|
# pylint: disable=too-many-branches
|
|
397
517
|
@celery_app.task(base=DatabaseTask, bind=True)
|
|
398
518
|
def requeue_interrupted_tasks(self: DatabaseTask) -> None:
|
|
@@ -442,17 +562,40 @@ def requeue_interrupted_tasks(self: DatabaseTask) -> None:
|
|
|
442
562
|
)
|
|
443
563
|
|
|
444
564
|
# Get task IDs from the queue in a memory-efficient way
|
|
445
|
-
|
|
565
|
+
try:
|
|
566
|
+
queued_tasks_ids = _get_task_ids_from_dsr_queue(redis_conn)
|
|
567
|
+
except Exception as queue_exc:
|
|
568
|
+
logger.warning(
|
|
569
|
+
f"Failed to get task IDs from queue, skipping queue state checks: {queue_exc}"
|
|
570
|
+
)
|
|
571
|
+
return
|
|
446
572
|
|
|
447
573
|
# Check each privacy request
|
|
448
574
|
for privacy_request in in_progress_requests:
|
|
449
575
|
should_requeue = False
|
|
450
576
|
logger.debug(f"Checking tasks for privacy request {privacy_request.id}")
|
|
451
577
|
|
|
452
|
-
|
|
578
|
+
try:
|
|
579
|
+
task_id = get_cached_task_id(privacy_request.id)
|
|
580
|
+
except Exception as cache_exc:
|
|
581
|
+
# If we can't get the task ID due to cache failure, fail safe by canceling
|
|
582
|
+
_cancel_interrupted_tasks_and_error_privacy_request(
|
|
583
|
+
db,
|
|
584
|
+
privacy_request,
|
|
585
|
+
f"Cache failure when getting task ID for privacy request {privacy_request.id}, "
|
|
586
|
+
f"failing safe by canceling tasks: {cache_exc}",
|
|
587
|
+
)
|
|
588
|
+
continue
|
|
453
589
|
|
|
454
590
|
# If the task ID is not cached, we can't check if it's running
|
|
591
|
+
# This means the request is stuck - cancel it
|
|
455
592
|
if not task_id:
|
|
593
|
+
_cancel_interrupted_tasks_and_error_privacy_request(
|
|
594
|
+
db,
|
|
595
|
+
privacy_request,
|
|
596
|
+
f"No task ID found for privacy request {privacy_request.id}, "
|
|
597
|
+
f"request is stuck without a running task - canceling",
|
|
598
|
+
)
|
|
456
599
|
continue
|
|
457
600
|
|
|
458
601
|
# Check if the main privacy request task is active
|
|
@@ -470,30 +613,36 @@ def requeue_interrupted_tasks(self: DatabaseTask) -> None:
|
|
|
470
613
|
)
|
|
471
614
|
should_requeue = True
|
|
472
615
|
|
|
473
|
-
|
|
474
|
-
db.
|
|
475
|
-
.filter(RequestTask.privacy_request_id == privacy_request.id)
|
|
476
|
-
.filter(
|
|
477
|
-
RequestTask.status.in_(
|
|
478
|
-
[
|
|
479
|
-
ExecutionLogStatus.in_processing,
|
|
480
|
-
ExecutionLogStatus.pending,
|
|
481
|
-
]
|
|
482
|
-
)
|
|
483
|
-
)
|
|
484
|
-
.all()
|
|
616
|
+
request_task_ids_in_progress = _get_request_task_ids_in_progress(
|
|
617
|
+
db, privacy_request.id
|
|
485
618
|
)
|
|
486
|
-
request_task_ids_in_progress = [
|
|
487
|
-
task[0] for task in request_tasks_in_progress
|
|
488
|
-
]
|
|
489
619
|
|
|
490
620
|
# Check each individual request task
|
|
491
621
|
for request_task_id in request_task_ids_in_progress:
|
|
492
|
-
|
|
622
|
+
try:
|
|
623
|
+
subtask_id = get_cached_task_id(request_task_id)
|
|
624
|
+
except Exception as cache_exc:
|
|
625
|
+
# If we can't get the subtask ID due to cache failure, fail safe by canceling
|
|
626
|
+
_cancel_interrupted_tasks_and_error_privacy_request(
|
|
627
|
+
db,
|
|
628
|
+
privacy_request,
|
|
629
|
+
f"Cache failure when getting subtask ID for request task {request_task_id} "
|
|
630
|
+
f"(privacy request {privacy_request.id}), failing safe by canceling tasks: {cache_exc}",
|
|
631
|
+
)
|
|
632
|
+
should_requeue = False
|
|
633
|
+
break
|
|
493
634
|
|
|
494
635
|
# If the task ID is not cached, we can't check if it's running
|
|
636
|
+
# This means the subtask is stuck - cancel the entire privacy request
|
|
495
637
|
if not subtask_id:
|
|
496
|
-
|
|
638
|
+
_cancel_interrupted_tasks_and_error_privacy_request(
|
|
639
|
+
db,
|
|
640
|
+
privacy_request,
|
|
641
|
+
f"No task ID found for request task {request_task_id} "
|
|
642
|
+
f"(privacy request {privacy_request.id}), subtask is stuck - canceling privacy request",
|
|
643
|
+
)
|
|
644
|
+
should_requeue = False
|
|
645
|
+
break
|
|
497
646
|
|
|
498
647
|
if (
|
|
499
648
|
subtask_id not in queued_tasks_ids
|
|
@@ -507,12 +656,4 @@ def requeue_interrupted_tasks(self: DatabaseTask) -> None:
|
|
|
507
656
|
|
|
508
657
|
# Requeue the privacy request if needed
|
|
509
658
|
if should_requeue:
|
|
510
|
-
|
|
511
|
-
PrivacyRequestError,
|
|
512
|
-
_requeue_privacy_request,
|
|
513
|
-
)
|
|
514
|
-
|
|
515
|
-
try:
|
|
516
|
-
_requeue_privacy_request(db, privacy_request)
|
|
517
|
-
except PrivacyRequestError as exc:
|
|
518
|
-
logger.error(exc.message)
|
|
659
|
+
_handle_privacy_request_requeue(db, privacy_request)
|
|
@@ -36,6 +36,7 @@ from fides.api.tasks import DSR_QUEUE_NAME, DatabaseTask, celery_app
|
|
|
36
36
|
from fides.api.util.cache import cache_task_tracking_key
|
|
37
37
|
from fides.api.util.collection_util import Row
|
|
38
38
|
from fides.api.util.logger_context_utils import LoggerContextKeys, log_context
|
|
39
|
+
from fides.api.util.memory_watchdog import memory_limiter
|
|
39
40
|
|
|
40
41
|
# DSR 3.0 task functions
|
|
41
42
|
|
|
@@ -255,6 +256,7 @@ def queue_downstream_tasks(
|
|
|
255
256
|
|
|
256
257
|
|
|
257
258
|
@celery_app.task(base=DatabaseTask, bind=True)
|
|
259
|
+
@memory_limiter
|
|
258
260
|
@log_context(
|
|
259
261
|
capture_args={
|
|
260
262
|
"privacy_request_id": LoggerContextKeys.privacy_request_id,
|
|
@@ -319,6 +321,7 @@ def run_access_node(
|
|
|
319
321
|
|
|
320
322
|
|
|
321
323
|
@celery_app.task(base=DatabaseTask, bind=True)
|
|
324
|
+
@memory_limiter
|
|
322
325
|
@log_context(
|
|
323
326
|
capture_args={
|
|
324
327
|
"privacy_request_id": LoggerContextKeys.privacy_request_id,
|
|
@@ -391,6 +394,7 @@ def run_erasure_node(
|
|
|
391
394
|
|
|
392
395
|
|
|
393
396
|
@celery_app.task(base=DatabaseTask, bind=True)
|
|
397
|
+
@memory_limiter
|
|
394
398
|
@log_context(
|
|
395
399
|
capture_args={
|
|
396
400
|
"privacy_request_id": LoggerContextKeys.privacy_request_id,
|