ethyca-fides 2.67.0rc1__py2.py3-none-any.whl → 2.67.1b0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ethyca-fides might be problematic. Click here for more details.

Files changed (110) hide show
  1. {ethyca_fides-2.67.0rc1.dist-info → ethyca_fides-2.67.1b0.dist-info}/METADATA +1 -1
  2. {ethyca_fides-2.67.0rc1.dist-info → ethyca_fides-2.67.1b0.dist-info}/RECORD +110 -109
  3. fides/_version.py +3 -3
  4. fides/api/common_exceptions.py +4 -0
  5. fides/api/graph/execution.py +16 -0
  6. fides/api/models/privacy_request/privacy_request.py +33 -13
  7. fides/api/schemas/application_config.py +1 -0
  8. fides/api/schemas/connection_configuration/connection_secrets_datahub.py +10 -1
  9. fides/api/service/connectors/base_connector.py +14 -0
  10. fides/api/service/connectors/bigquery_connector.py +5 -0
  11. fides/api/service/connectors/query_configs/bigquery_query_config.py +4 -4
  12. fides/api/service/connectors/query_configs/snowflake_query_config.py +3 -3
  13. fides/api/service/connectors/snowflake_connector.py +55 -2
  14. fides/api/service/connectors/sql_connector.py +107 -9
  15. fides/api/service/privacy_request/request_runner_service.py +3 -2
  16. fides/api/service/privacy_request/request_service.py +173 -32
  17. fides/api/task/execute_request_tasks.py +4 -0
  18. fides/api/task/graph_task.py +48 -2
  19. fides/api/util/cache.py +56 -0
  20. fides/api/util/memory_watchdog.py +286 -0
  21. fides/config/execution_settings.py +8 -0
  22. fides/config/utils.py +1 -0
  23. fides/ui-build/static/admin/404.html +1 -1
  24. fides/ui-build/static/admin/_next/static/chunks/pages/{_app-5c3a63bb1697f34c.js → _app-750d6bd16c971bb9.js} +1 -1
  25. fides/ui-build/static/admin/add-systems/manual.html +1 -1
  26. fides/ui-build/static/admin/add-systems/multiple.html +1 -1
  27. fides/ui-build/static/admin/add-systems.html +1 -1
  28. fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
  29. fides/ui-build/static/admin/consent/configure.html +1 -1
  30. fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
  31. fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
  32. fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
  33. fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
  34. fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
  35. fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
  36. fides/ui-build/static/admin/consent/properties.html +1 -1
  37. fides/ui-build/static/admin/consent/reporting.html +1 -1
  38. fides/ui-build/static/admin/consent.html +1 -1
  39. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
  40. fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
  41. fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
  42. fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
  43. fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
  44. fides/ui-build/static/admin/data-catalog.html +1 -1
  45. fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
  46. fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
  47. fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
  48. fides/ui-build/static/admin/data-discovery/activity.html +1 -1
  49. fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
  50. fides/ui-build/static/admin/data-discovery/detection.html +1 -1
  51. fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
  52. fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
  53. fides/ui-build/static/admin/datamap.html +1 -1
  54. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
  55. fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
  56. fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
  57. fides/ui-build/static/admin/dataset/new.html +1 -1
  58. fides/ui-build/static/admin/dataset.html +1 -1
  59. fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
  60. fides/ui-build/static/admin/datastore-connection/new.html +1 -1
  61. fides/ui-build/static/admin/datastore-connection.html +1 -1
  62. fides/ui-build/static/admin/index.html +1 -1
  63. fides/ui-build/static/admin/integrations/[id].html +1 -1
  64. fides/ui-build/static/admin/integrations.html +1 -1
  65. fides/ui-build/static/admin/login/[provider].html +1 -1
  66. fides/ui-build/static/admin/login.html +1 -1
  67. fides/ui-build/static/admin/messaging/[id].html +1 -1
  68. fides/ui-build/static/admin/messaging/add-template.html +1 -1
  69. fides/ui-build/static/admin/messaging.html +1 -1
  70. fides/ui-build/static/admin/poc/ant-components.html +1 -1
  71. fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
  72. fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
  73. fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
  74. fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
  75. fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
  76. fides/ui-build/static/admin/poc/forms.html +1 -1
  77. fides/ui-build/static/admin/poc/table-migration.html +1 -1
  78. fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
  79. fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
  80. fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
  81. fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
  82. fides/ui-build/static/admin/privacy-requests.html +1 -1
  83. fides/ui-build/static/admin/properties/[id].html +1 -1
  84. fides/ui-build/static/admin/properties/add-property.html +1 -1
  85. fides/ui-build/static/admin/properties.html +1 -1
  86. fides/ui-build/static/admin/reporting/datamap.html +1 -1
  87. fides/ui-build/static/admin/settings/about/alpha.html +1 -1
  88. fides/ui-build/static/admin/settings/about.html +1 -1
  89. fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
  90. fides/ui-build/static/admin/settings/consent.html +1 -1
  91. fides/ui-build/static/admin/settings/custom-fields.html +1 -1
  92. fides/ui-build/static/admin/settings/domain-records.html +1 -1
  93. fides/ui-build/static/admin/settings/domains.html +1 -1
  94. fides/ui-build/static/admin/settings/email-templates.html +1 -1
  95. fides/ui-build/static/admin/settings/locations.html +1 -1
  96. fides/ui-build/static/admin/settings/organization.html +1 -1
  97. fides/ui-build/static/admin/settings/regulations.html +1 -1
  98. fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
  99. fides/ui-build/static/admin/systems/configure/[id].html +1 -1
  100. fides/ui-build/static/admin/systems.html +1 -1
  101. fides/ui-build/static/admin/taxonomy.html +1 -1
  102. fides/ui-build/static/admin/user-management/new.html +1 -1
  103. fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
  104. fides/ui-build/static/admin/user-management.html +1 -1
  105. {ethyca_fides-2.67.0rc1.dist-info → ethyca_fides-2.67.1b0.dist-info}/WHEEL +0 -0
  106. {ethyca_fides-2.67.0rc1.dist-info → ethyca_fides-2.67.1b0.dist-info}/entry_points.txt +0 -0
  107. {ethyca_fides-2.67.0rc1.dist-info → ethyca_fides-2.67.1b0.dist-info}/licenses/LICENSE +0 -0
  108. {ethyca_fides-2.67.0rc1.dist-info → ethyca_fides-2.67.1b0.dist-info}/top_level.txt +0 -0
  109. /fides/ui-build/static/admin/_next/static/{ZIM71ZcqBBeTYHc-MN9_n → v1eqRIfzld3di00TTnVM9}/_buildManifest.js +0 -0
  110. /fides/ui-build/static/admin/_next/static/{ZIM71ZcqBBeTYHc-MN9_n → v1eqRIfzld3di00TTnVM9}/_ssgManifest.js +0 -0
@@ -93,7 +93,7 @@ class BigQueryQueryConfig(QueryStringWithoutTuplesOverrideQueryConfig):
93
93
 
94
94
  return where_clauses
95
95
 
96
- def _generate_table_name(self) -> str:
96
+ def generate_table_name(self) -> str:
97
97
  """
98
98
  Prepends the dataset ID and project ID to the base table name
99
99
  if the BigQuery namespace meta is provided.
@@ -116,7 +116,7 @@ class BigQueryQueryConfig(QueryStringWithoutTuplesOverrideQueryConfig):
116
116
  Returns a query string with backtick formatting for tables that have the same names as
117
117
  BigQuery reserved words.
118
118
  """
119
- return f'SELECT {field_list} FROM `{self._generate_table_name()}` WHERE ({" OR ".join(clauses)})'
119
+ return f'SELECT {field_list} FROM `{self.generate_table_name()}` WHERE ({" OR ".join(clauses)})'
120
120
 
121
121
  def generate_masking_stmt(
122
122
  self,
@@ -197,7 +197,7 @@ class BigQueryQueryConfig(QueryStringWithoutTuplesOverrideQueryConfig):
197
197
  )
198
198
  return []
199
199
 
200
- table = Table(self._generate_table_name(), MetaData(bind=client), autoload=True)
200
+ table = Table(self.generate_table_name(), MetaData(bind=client), autoload=True)
201
201
  where_clauses: List[ColumnElement] = [
202
202
  table.c[k] == v for k, v in non_empty_reference_field_keys.items()
203
203
  ]
@@ -256,7 +256,7 @@ class BigQueryQueryConfig(QueryStringWithoutTuplesOverrideQueryConfig):
256
256
  )
257
257
  return []
258
258
 
259
- table = Table(self._generate_table_name(), MetaData(bind=client), autoload=True)
259
+ table = Table(self.generate_table_name(), MetaData(bind=client), autoload=True)
260
260
 
261
261
  # Build individual reference clauses
262
262
  where_clauses: List[ColumnElement] = []
@@ -30,7 +30,7 @@ class SnowflakeQueryConfig(SQLQueryConfig):
30
30
  """Returns field names in clauses surrounded by quotation marks as required by Snowflake syntax."""
31
31
  return f'"{string_path}" {operator} (:{operand})'
32
32
 
33
- def _generate_table_name(self) -> str:
33
+ def generate_table_name(self) -> str:
34
34
  """
35
35
  Prepends the dataset name and schema to the base table name
36
36
  if the Snowflake namespace meta is provided.
@@ -57,7 +57,7 @@ class SnowflakeQueryConfig(SQLQueryConfig):
57
57
  clauses: List[str],
58
58
  ) -> str:
59
59
  """Returns a query string with double quotation mark formatting as required by Snowflake syntax."""
60
- return f'SELECT {field_list} FROM {self._generate_table_name()} WHERE ({" OR ".join(clauses)})'
60
+ return f'SELECT {field_list} FROM {self.generate_table_name()} WHERE ({" OR ".join(clauses)})'
61
61
 
62
62
  def format_key_map_for_update_stmt(self, param_map: Dict[str, Any]) -> List[str]:
63
63
  """Adds the appropriate formatting for update statements in this datastore."""
@@ -69,4 +69,4 @@ class SnowflakeQueryConfig(SQLQueryConfig):
69
69
  where_clauses: List[str],
70
70
  ) -> str:
71
71
  """Returns a parameterized update statement in Snowflake dialect."""
72
- return f'UPDATE {self._generate_table_name()} SET {", ".join(update_clauses)} WHERE {" AND ".join(where_clauses)}'
72
+ return f'UPDATE {self.generate_table_name()} SET {", ".join(update_clauses)} WHERE {" AND ".join(where_clauses)}'
@@ -3,11 +3,11 @@ from typing import Any, Dict, Union
3
3
  from cryptography.hazmat.backends import default_backend
4
4
  from cryptography.hazmat.primitives import serialization
5
5
  from snowflake.sqlalchemy import URL as Snowflake_URL
6
+ from sqlalchemy import text
6
7
  from sqlalchemy.orm import Session
7
8
 
8
9
  from fides.api.graph.execution import ExecutionNode
9
10
  from fides.api.schemas.connection_configuration import SnowflakeSchema
10
- from fides.api.service.connectors.query_configs.query_config import SQLQueryConfig
11
11
  from fides.api.service.connectors.query_configs.snowflake_query_config import (
12
12
  SnowflakeQueryConfig,
13
13
  )
@@ -69,10 +69,63 @@ class SnowflakeConnector(SQLConnector):
69
69
  connect_args["private_key"] = private_key
70
70
  return connect_args
71
71
 
72
- def query_config(self, node: ExecutionNode) -> SQLQueryConfig:
72
+ def query_config(self, node: ExecutionNode) -> SnowflakeQueryConfig:
73
73
  """Query wrapper corresponding to the input execution_node."""
74
74
 
75
75
  db: Session = Session.object_session(self.configuration)
76
76
  return SnowflakeQueryConfig(
77
77
  node, SQLConnector.get_namespace_meta(db, node.address.dataset)
78
78
  )
79
+
80
+ def get_qualified_table_name(self, node: ExecutionNode) -> str:
81
+ """Get fully qualified Snowflake table name using existing query config logic"""
82
+ query_config = self.query_config(node)
83
+ return query_config.generate_table_name()
84
+
85
+ def table_exists(self, qualified_table_name: str) -> bool:
86
+ """
87
+ Check if table exists in Snowflake using the proper three-part naming convention.
88
+
89
+ Snowflake supports database.schema.table naming, and the generic SQLConnector
90
+ table_exists method doesn't handle quoted identifiers properly.
91
+ """
92
+ try:
93
+ client = self.create_client()
94
+ with client.connect() as connection:
95
+ # Remove quotes and split the parts
96
+ clean_name = qualified_table_name.replace('"', "")
97
+ parts = clean_name.split(".")
98
+
99
+ if len(parts) == 1:
100
+ # Simple table name - use current schema
101
+ table_name = parts[0]
102
+ result = connection.execute(text(f'DESC TABLE "{table_name}"'))
103
+ elif len(parts) == 2:
104
+ # schema.table format
105
+ schema_name, table_name = parts
106
+ result = connection.execute(
107
+ text(f'DESC TABLE "{schema_name}"."{table_name}"')
108
+ )
109
+ elif len(parts) >= 3:
110
+ # database.schema.table format
111
+ database_name, schema_name, table_name = (
112
+ parts[-3],
113
+ parts[-2],
114
+ parts[-1],
115
+ )
116
+ # Use the database.schema.table format
117
+ result = connection.execute(
118
+ text(
119
+ f'DESC TABLE "{database_name}"."{schema_name}"."{table_name}"'
120
+ )
121
+ )
122
+ else:
123
+ return False
124
+
125
+ # If we get here without an exception, the table exists
126
+ result.close()
127
+ return True
128
+
129
+ except Exception:
130
+ # Table doesn't exist or other error
131
+ return False
@@ -6,7 +6,7 @@ import paramiko
6
6
  import sshtunnel # type: ignore
7
7
  from aiohttp.client_exceptions import ClientResponseError
8
8
  from loguru import logger
9
- from sqlalchemy import Column, select
9
+ from sqlalchemy import Column, inspect, select
10
10
  from sqlalchemy.dialects.postgresql import JSONB
11
11
  from sqlalchemy.engine import ( # type: ignore
12
12
  Connection,
@@ -22,6 +22,7 @@ from sqlalchemy.sql.elements import TextClause
22
22
  from fides.api.common_exceptions import (
23
23
  ConnectionException,
24
24
  SSHTunnelConfigNotFoundException,
25
+ TableNotFound,
25
26
  )
26
27
  from fides.api.graph.execution import ExecutionNode
27
28
  from fides.api.models.connectionconfig import ConnectionConfig, ConnectionTestStatus
@@ -189,14 +190,28 @@ class SQLConnector(BaseConnector[Engine]):
189
190
 
190
191
  logger.info("Starting data retrieval for {}", node.address)
191
192
  with client.connect() as connection:
192
- self.set_schema(connection)
193
- if (
194
- query_config.partitioning
195
- ): # only BigQuery supports partitioning, for now
196
- return self.partitioned_retrieval(query_config, connection, stmt)
197
-
198
- results = connection.execute(stmt)
199
- return self.cursor_result_to_rows(results)
193
+ try:
194
+ self.set_schema(connection)
195
+ if (
196
+ query_config.partitioning
197
+ ): # only BigQuery supports partitioning, for now
198
+ return self.partitioned_retrieval(query_config, connection, stmt)
199
+
200
+ results = connection.execute(stmt)
201
+ return self.cursor_result_to_rows(results)
202
+ except Exception as exc:
203
+ # Check if table exists using qualified table name
204
+ qualified_table_name = self.get_qualified_table_name(node)
205
+ if not self.table_exists(qualified_table_name):
206
+ # Central decision point - will raise TableNotFound or ConnectionException
207
+ self.handle_table_not_found(
208
+ node=node,
209
+ table_name=qualified_table_name,
210
+ operation_context="data retrieval",
211
+ original_exception=exc,
212
+ )
213
+ # Table exists or can't check - re-raise original exception
214
+ raise
200
215
 
201
216
  def mask_data(
202
217
  self,
@@ -290,3 +305,86 @@ class SQLConnector(BaseConnector[Engine]):
290
305
  raise NotImplementedError(
291
306
  "Partitioned retrieval is only supported for BigQuery currently!"
292
307
  )
308
+
309
+ def get_qualified_table_name(self, node: ExecutionNode) -> str:
310
+ """
311
+ Get the fully qualified table name for this database.
312
+
313
+ Default: Returns the simple collection name
314
+ Override: Database-specific connectors can implement namespace resolution
315
+ """
316
+ return node.collection.name
317
+
318
+ def table_exists(self, qualified_table_name: str) -> bool:
319
+ """
320
+ Check if table exists using SQLAlchemy introspection.
321
+
322
+ This is a generic implementation that should work for most SQL databases.
323
+ Override: Connectors can implement database-specific table existence checking
324
+ """
325
+ try:
326
+ client = self.create_client()
327
+ with client.connect() as connection:
328
+ inspector = inspect(connection)
329
+
330
+ # For simple table names
331
+ if "." not in qualified_table_name:
332
+ return inspector.has_table(qualified_table_name)
333
+
334
+ # For qualified names like schema.table or database.schema.table
335
+ parts = qualified_table_name.split(".")
336
+
337
+ if len(parts) == 2:
338
+ # schema.table format
339
+ schema_name, table_name = parts
340
+ return inspector.has_table(table_name, schema=schema_name)
341
+
342
+ if len(parts) >= 3:
343
+ # database.schema.table format (use schema.table)
344
+ schema_name, table_name = parts[-2], parts[-1]
345
+ return inspector.has_table(table_name, schema=schema_name)
346
+
347
+ # Fallback for unexpected format
348
+ return inspector.has_table(qualified_table_name)
349
+
350
+ except Exception as exc:
351
+ # Graceful fallback - if we can't check, assume table exists
352
+ # to preserve existing behavior for connectors that don't implement this
353
+ logger.error("Unable to check if table exists, assuming it does: {}", exc)
354
+ return True
355
+
356
+ def handle_table_not_found(
357
+ self,
358
+ node: ExecutionNode,
359
+ table_name: str,
360
+ operation_context: str,
361
+ original_exception: Optional[Exception] = None,
362
+ ) -> None:
363
+ """
364
+ Central decision point for table-not-found scenarios.
365
+
366
+ Raises TableNotFound (for collection skipping) or ConnectionException (for hard errors).
367
+ The raised exception will be caught by the @retry decorator in graph_task.py.
368
+
369
+ Args:
370
+ node: The ExecutionNode being processed
371
+ table_name: Name of the missing table
372
+ operation_context: Context like "data retrieval" or "data masking"
373
+ original_exception: The original exception that triggered this check
374
+ """
375
+ if node.has_outgoing_dependencies():
376
+ # Collection has dependencies - cannot skip safely
377
+ error_msg = (
378
+ f"Table '{table_name}' did not exist during {operation_context}. "
379
+ f"Cannot skip collection '{node.address}' because other collections depend on it."
380
+ )
381
+ if original_exception:
382
+ raise ConnectionException(error_msg) from original_exception
383
+ raise ConnectionException(error_msg)
384
+
385
+ # Safe to skip - raise TableNotFound for @retry decorator to catch
386
+ skip_msg = f"Table '{table_name}' did not exist during {operation_context}."
387
+ if original_exception:
388
+ raise TableNotFound(skip_msg) from original_exception
389
+
390
+ raise TableNotFound(skip_msg)
@@ -80,6 +80,7 @@ from fides.api.util.cache import get_all_masking_secret_keys
80
80
  from fides.api.util.collection_util import Row
81
81
  from fides.api.util.logger import Pii, _log_exception, _log_warning
82
82
  from fides.api.util.logger_context_utils import LoggerContextKeys, log_context
83
+ from fides.api.util.memory_watchdog import memory_limiter
83
84
  from fides.common.api.v1.urn_registry import (
84
85
  PRIVACY_REQUEST_TRANSFER_TO_PARENT,
85
86
  V1_URL_PREFIX,
@@ -358,8 +359,8 @@ def upload_and_save_access_results( # pylint: disable=R0912
358
359
 
359
360
 
360
361
  @celery_app.task(base=DatabaseTask, bind=True)
361
- # TODO: Add log_context back in, this is just for some temporary testing
362
- # @log_context(capture_args={"privacy_request_id": LoggerContextKeys.privacy_request_id})
362
+ @memory_limiter
363
+ @log_context(capture_args={"privacy_request_id": LoggerContextKeys.privacy_request_id})
363
364
  def run_privacy_request(
364
365
  self: DatabaseTask,
365
366
  privacy_request_id: str,
@@ -3,11 +3,12 @@ from __future__ import annotations
3
3
  import json
4
4
  from asyncio import sleep
5
5
  from datetime import datetime, timedelta
6
- from typing import Any, Dict, Optional, Set
6
+ from typing import Any, Dict, List, Optional, Set
7
7
 
8
8
  from httpx import AsyncClient
9
9
  from loguru import logger
10
10
  from sqlalchemy import text
11
+ from sqlalchemy.orm import Session
11
12
  from sqlalchemy.sql.elements import TextClause
12
13
 
13
14
  from fides.api.common_exceptions import PrivacyRequestNotFound
@@ -31,6 +32,9 @@ from fides.api.util.cache import (
31
32
  celery_tasks_in_flight,
32
33
  get_async_task_tracking_cache_key,
33
34
  get_cache,
35
+ get_privacy_request_retry_count,
36
+ increment_privacy_request_retry_count,
37
+ reset_privacy_request_retry_count,
34
38
  )
35
39
  from fides.api.util.lock import redis_lock
36
40
  from fides.common.api.v1.urn_registry import PRIVACY_REQUESTS, V1_URL_PREFIX
@@ -350,10 +354,17 @@ def initiate_interrupted_task_requeue_poll() -> None:
350
354
 
351
355
 
352
356
  def get_cached_task_id(entity_id: str) -> Optional[str]:
353
- """Gets the cached task ID for a privacy request or request task by ID."""
357
+ """Gets the cached task ID for a privacy request or request task by ID.
358
+
359
+ Raises Exception if cache operations fail, allowing callers to handle cache failures appropriately.
360
+ """
354
361
  cache: FidesopsRedis = get_cache()
355
- task_id = cache.get(get_async_task_tracking_cache_key(entity_id))
356
- return task_id
362
+ try:
363
+ task_id = cache.get(get_async_task_tracking_cache_key(entity_id))
364
+ return task_id
365
+ except Exception as exc:
366
+ logger.error(f"Failed to get cached task ID for entity {entity_id}: {exc}")
367
+ raise
357
368
 
358
369
 
359
370
  REQUEUE_INTERRUPTED_TASKS_LOCK = "requeue_interrupted_tasks_lock"
@@ -393,6 +404,115 @@ def _get_task_ids_from_dsr_queue(
393
404
  return queued_tasks_ids
394
405
 
395
406
 
407
+ def _cancel_interrupted_tasks_and_error_privacy_request(
408
+ db: Session, privacy_request: PrivacyRequest, error_message: Optional[str] = None
409
+ ) -> None:
410
+ """
411
+ Cancel all tasks associated with an interrupted privacy request and set the privacy request to error state.
412
+
413
+ This function:
414
+ 1. Logs the error message (either provided or default)
415
+ 2. Revokes the main privacy request task and all associated request tasks
416
+ 3. Sets the privacy request status to error
417
+ 4. Creates an error log entry
418
+
419
+ Args:
420
+ db: Database session
421
+ privacy_request: The privacy request to cancel and error
422
+ error_message: Optional error message to log. If not provided, uses default message.
423
+ """
424
+ if error_message:
425
+ logger.error(error_message)
426
+ else:
427
+ logger.error(
428
+ f"Canceling interrupted tasks and marking privacy request {privacy_request.id} as error"
429
+ )
430
+
431
+ # Cancel all associated Celery tasks
432
+ privacy_request.cancel_celery_tasks()
433
+
434
+ # Set privacy request to error state using the existing method
435
+ try:
436
+ privacy_request.error_processing(db)
437
+ logger.info(
438
+ f"Privacy request {privacy_request.id} marked as error due to task interruption"
439
+ )
440
+ except Exception as exc:
441
+ logger.error(
442
+ f"Failed to mark privacy request {privacy_request.id} as error: {exc}"
443
+ )
444
+
445
+
446
+ def _handle_privacy_request_requeue(
447
+ db: Session, privacy_request: PrivacyRequest
448
+ ) -> None:
449
+ """Handle retry logic for a privacy request - either requeue or cancel based on retry count."""
450
+ try:
451
+ # Check retry count and either requeue or cancel based on limit
452
+ current_retry_count = get_privacy_request_retry_count(privacy_request.id)
453
+ max_retries = CONFIG.execution.privacy_request_requeue_retry_count
454
+
455
+ if current_retry_count < max_retries:
456
+ # Increment retry count and attempt requeue
457
+ new_retry_count = increment_privacy_request_retry_count(privacy_request.id)
458
+ logger.info(
459
+ f"Requeuing privacy request {privacy_request.id} "
460
+ f"(attempt {new_retry_count}/{max_retries})"
461
+ )
462
+
463
+ from fides.service.privacy_request.privacy_request_service import ( # pylint: disable=cyclic-import
464
+ PrivacyRequestError,
465
+ _requeue_privacy_request,
466
+ )
467
+
468
+ try:
469
+ _requeue_privacy_request(db, privacy_request)
470
+ except PrivacyRequestError as exc:
471
+ # If requeue fails, cancel tasks and set to error state
472
+ _cancel_interrupted_tasks_and_error_privacy_request(
473
+ db, privacy_request, exc.message
474
+ )
475
+ else:
476
+ # Exceeded retry limit, cancel tasks and set to error state
477
+ _cancel_interrupted_tasks_and_error_privacy_request(
478
+ db,
479
+ privacy_request,
480
+ f"Privacy request {privacy_request.id} exceeded max retry attempts "
481
+ f"({max_retries}), canceling tasks and setting to error state",
482
+ )
483
+ # Reset retry count since we're giving up
484
+ reset_privacy_request_retry_count(privacy_request.id)
485
+
486
+ except Exception as cache_exc:
487
+ # If cache operations fail (Redis down, network issues, etc.), fail safe by canceling
488
+ _cancel_interrupted_tasks_and_error_privacy_request(
489
+ db,
490
+ privacy_request,
491
+ f"Cache operation failed for privacy request {privacy_request.id}, "
492
+ f"failing safe by canceling tasks: {cache_exc}",
493
+ )
494
+
495
+
496
+ def _get_request_task_ids_in_progress(
497
+ db: Session, privacy_request_id: str
498
+ ) -> List[str]:
499
+ """Get the IDs of request tasks that are currently in progress for a privacy request."""
500
+ request_tasks_in_progress = (
501
+ db.query(RequestTask.id)
502
+ .filter(RequestTask.privacy_request_id == privacy_request_id)
503
+ .filter(
504
+ RequestTask.status.in_(
505
+ [
506
+ ExecutionLogStatus.in_processing,
507
+ ExecutionLogStatus.pending,
508
+ ]
509
+ )
510
+ )
511
+ .all()
512
+ )
513
+ return [task[0] for task in request_tasks_in_progress]
514
+
515
+
396
516
  # pylint: disable=too-many-branches
397
517
  @celery_app.task(base=DatabaseTask, bind=True)
398
518
  def requeue_interrupted_tasks(self: DatabaseTask) -> None:
@@ -442,17 +562,40 @@ def requeue_interrupted_tasks(self: DatabaseTask) -> None:
442
562
  )
443
563
 
444
564
  # Get task IDs from the queue in a memory-efficient way
445
- queued_tasks_ids = _get_task_ids_from_dsr_queue(redis_conn)
565
+ try:
566
+ queued_tasks_ids = _get_task_ids_from_dsr_queue(redis_conn)
567
+ except Exception as queue_exc:
568
+ logger.warning(
569
+ f"Failed to get task IDs from queue, skipping queue state checks: {queue_exc}"
570
+ )
571
+ return
446
572
 
447
573
  # Check each privacy request
448
574
  for privacy_request in in_progress_requests:
449
575
  should_requeue = False
450
576
  logger.debug(f"Checking tasks for privacy request {privacy_request.id}")
451
577
 
452
- task_id = get_cached_task_id(privacy_request.id)
578
+ try:
579
+ task_id = get_cached_task_id(privacy_request.id)
580
+ except Exception as cache_exc:
581
+ # If we can't get the task ID due to cache failure, fail safe by canceling
582
+ _cancel_interrupted_tasks_and_error_privacy_request(
583
+ db,
584
+ privacy_request,
585
+ f"Cache failure when getting task ID for privacy request {privacy_request.id}, "
586
+ f"failing safe by canceling tasks: {cache_exc}",
587
+ )
588
+ continue
453
589
 
454
590
  # If the task ID is not cached, we can't check if it's running
591
+ # This means the request is stuck - cancel it
455
592
  if not task_id:
593
+ _cancel_interrupted_tasks_and_error_privacy_request(
594
+ db,
595
+ privacy_request,
596
+ f"No task ID found for privacy request {privacy_request.id}, "
597
+ f"request is stuck without a running task - canceling",
598
+ )
456
599
  continue
457
600
 
458
601
  # Check if the main privacy request task is active
@@ -470,30 +613,36 @@ def requeue_interrupted_tasks(self: DatabaseTask) -> None:
470
613
  )
471
614
  should_requeue = True
472
615
 
473
- request_tasks_in_progress = (
474
- db.query(RequestTask.id)
475
- .filter(RequestTask.privacy_request_id == privacy_request.id)
476
- .filter(
477
- RequestTask.status.in_(
478
- [
479
- ExecutionLogStatus.in_processing,
480
- ExecutionLogStatus.pending,
481
- ]
482
- )
483
- )
484
- .all()
616
+ request_task_ids_in_progress = _get_request_task_ids_in_progress(
617
+ db, privacy_request.id
485
618
  )
486
- request_task_ids_in_progress = [
487
- task[0] for task in request_tasks_in_progress
488
- ]
489
619
 
490
620
  # Check each individual request task
491
621
  for request_task_id in request_task_ids_in_progress:
492
- subtask_id = get_cached_task_id(request_task_id)
622
+ try:
623
+ subtask_id = get_cached_task_id(request_task_id)
624
+ except Exception as cache_exc:
625
+ # If we can't get the subtask ID due to cache failure, fail safe by canceling
626
+ _cancel_interrupted_tasks_and_error_privacy_request(
627
+ db,
628
+ privacy_request,
629
+ f"Cache failure when getting subtask ID for request task {request_task_id} "
630
+ f"(privacy request {privacy_request.id}), failing safe by canceling tasks: {cache_exc}",
631
+ )
632
+ should_requeue = False
633
+ break
493
634
 
494
635
  # If the task ID is not cached, we can't check if it's running
636
+ # This means the subtask is stuck - cancel the entire privacy request
495
637
  if not subtask_id:
496
- continue
638
+ _cancel_interrupted_tasks_and_error_privacy_request(
639
+ db,
640
+ privacy_request,
641
+ f"No task ID found for request task {request_task_id} "
642
+ f"(privacy request {privacy_request.id}), subtask is stuck - canceling privacy request",
643
+ )
644
+ should_requeue = False
645
+ break
497
646
 
498
647
  if (
499
648
  subtask_id not in queued_tasks_ids
@@ -507,12 +656,4 @@ def requeue_interrupted_tasks(self: DatabaseTask) -> None:
507
656
 
508
657
  # Requeue the privacy request if needed
509
658
  if should_requeue:
510
- from fides.service.privacy_request.privacy_request_service import ( # pylint: disable=cyclic-import
511
- PrivacyRequestError,
512
- _requeue_privacy_request,
513
- )
514
-
515
- try:
516
- _requeue_privacy_request(db, privacy_request)
517
- except PrivacyRequestError as exc:
518
- logger.error(exc.message)
659
+ _handle_privacy_request_requeue(db, privacy_request)
@@ -36,6 +36,7 @@ from fides.api.tasks import DSR_QUEUE_NAME, DatabaseTask, celery_app
36
36
  from fides.api.util.cache import cache_task_tracking_key
37
37
  from fides.api.util.collection_util import Row
38
38
  from fides.api.util.logger_context_utils import LoggerContextKeys, log_context
39
+ from fides.api.util.memory_watchdog import memory_limiter
39
40
 
40
41
  # DSR 3.0 task functions
41
42
 
@@ -255,6 +256,7 @@ def queue_downstream_tasks(
255
256
 
256
257
 
257
258
  @celery_app.task(base=DatabaseTask, bind=True)
259
+ @memory_limiter
258
260
  @log_context(
259
261
  capture_args={
260
262
  "privacy_request_id": LoggerContextKeys.privacy_request_id,
@@ -319,6 +321,7 @@ def run_access_node(
319
321
 
320
322
 
321
323
  @celery_app.task(base=DatabaseTask, bind=True)
324
+ @memory_limiter
322
325
  @log_context(
323
326
  capture_args={
324
327
  "privacy_request_id": LoggerContextKeys.privacy_request_id,
@@ -391,6 +394,7 @@ def run_erasure_node(
391
394
 
392
395
 
393
396
  @celery_app.task(base=DatabaseTask, bind=True)
397
+ @memory_limiter
394
398
  @log_context(
395
399
  capture_args={
396
400
  "privacy_request_id": LoggerContextKeys.privacy_request_id,