ethyca-fides 2.67.0rc2__py2.py3-none-any.whl → 2.67.0rc4__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ethyca-fides might be problematic. Click here for more details.
- {ethyca_fides-2.67.0rc2.dist-info → ethyca_fides-2.67.0rc4.dist-info}/METADATA +1 -1
- {ethyca_fides-2.67.0rc2.dist-info → ethyca_fides-2.67.0rc4.dist-info}/RECORD +101 -100
- fides/_version.py +3 -3
- fides/api/models/privacy_request/privacy_request.py +33 -13
- fides/api/schemas/application_config.py +1 -0
- fides/api/service/privacy_request/request_runner_service.py +3 -2
- fides/api/service/privacy_request/request_service.py +173 -32
- fides/api/task/execute_request_tasks.py +4 -0
- fides/api/task/graph_task.py +46 -2
- fides/api/util/cache.py +56 -0
- fides/api/util/memory_watchdog.py +286 -0
- fides/config/execution_settings.py +8 -0
- fides/config/utils.py +1 -0
- fides/ui-build/static/admin/404.html +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center/[monitorId]/[systemId]-3e5725cd06d7fe6c.js +1 -0
- fides/ui-build/static/admin/_next/static/{5x65uIwZtfTiu6ITZ4wqq → mGce9yxLktePdBkapyLNl}/_buildManifest.js +1 -1
- fides/ui-build/static/admin/add-systems/manual.html +1 -1
- fides/ui-build/static/admin/add-systems/multiple.html +1 -1
- fides/ui-build/static/admin/add-systems.html +1 -1
- fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
- fides/ui-build/static/admin/consent/configure.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
- fides/ui-build/static/admin/consent/properties.html +1 -1
- fides/ui-build/static/admin/consent/reporting.html +1 -1
- fides/ui-build/static/admin/consent.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
- fides/ui-build/static/admin/data-catalog.html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
- fides/ui-build/static/admin/data-discovery/activity.html +1 -1
- fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/detection.html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
- fides/ui-build/static/admin/datamap.html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
- fides/ui-build/static/admin/dataset/new.html +1 -1
- fides/ui-build/static/admin/dataset.html +1 -1
- fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
- fides/ui-build/static/admin/datastore-connection/new.html +1 -1
- fides/ui-build/static/admin/datastore-connection.html +1 -1
- fides/ui-build/static/admin/index.html +1 -1
- fides/ui-build/static/admin/integrations/[id].html +1 -1
- fides/ui-build/static/admin/integrations.html +1 -1
- fides/ui-build/static/admin/login/[provider].html +1 -1
- fides/ui-build/static/admin/login.html +1 -1
- fides/ui-build/static/admin/messaging/[id].html +1 -1
- fides/ui-build/static/admin/messaging/add-template.html +1 -1
- fides/ui-build/static/admin/messaging.html +1 -1
- fides/ui-build/static/admin/poc/ant-components.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
- fides/ui-build/static/admin/poc/forms.html +1 -1
- fides/ui-build/static/admin/poc/table-migration.html +1 -1
- fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
- fides/ui-build/static/admin/privacy-requests.html +1 -1
- fides/ui-build/static/admin/properties/[id].html +1 -1
- fides/ui-build/static/admin/properties/add-property.html +1 -1
- fides/ui-build/static/admin/properties.html +1 -1
- fides/ui-build/static/admin/reporting/datamap.html +1 -1
- fides/ui-build/static/admin/settings/about/alpha.html +1 -1
- fides/ui-build/static/admin/settings/about.html +1 -1
- fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
- fides/ui-build/static/admin/settings/consent.html +1 -1
- fides/ui-build/static/admin/settings/custom-fields.html +1 -1
- fides/ui-build/static/admin/settings/domain-records.html +1 -1
- fides/ui-build/static/admin/settings/domains.html +1 -1
- fides/ui-build/static/admin/settings/email-templates.html +1 -1
- fides/ui-build/static/admin/settings/locations.html +1 -1
- fides/ui-build/static/admin/settings/organization.html +1 -1
- fides/ui-build/static/admin/settings/regulations.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id].html +1 -1
- fides/ui-build/static/admin/systems.html +1 -1
- fides/ui-build/static/admin/taxonomy.html +1 -1
- fides/ui-build/static/admin/user-management/new.html +1 -1
- fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
- fides/ui-build/static/admin/user-management.html +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/data-discovery/action-center/[monitorId]/[systemId]-a286affa43687eb5.js +0 -1
- {ethyca_fides-2.67.0rc2.dist-info → ethyca_fides-2.67.0rc4.dist-info}/WHEEL +0 -0
- {ethyca_fides-2.67.0rc2.dist-info → ethyca_fides-2.67.0rc4.dist-info}/entry_points.txt +0 -0
- {ethyca_fides-2.67.0rc2.dist-info → ethyca_fides-2.67.0rc4.dist-info}/licenses/LICENSE +0 -0
- {ethyca_fides-2.67.0rc2.dist-info → ethyca_fides-2.67.0rc4.dist-info}/top_level.txt +0 -0
- /fides/ui-build/static/admin/_next/static/{5x65uIwZtfTiu6ITZ4wqq → mGce9yxLktePdBkapyLNl}/_ssgManifest.js +0 -0
|
@@ -3,11 +3,12 @@ from __future__ import annotations
|
|
|
3
3
|
import json
|
|
4
4
|
from asyncio import sleep
|
|
5
5
|
from datetime import datetime, timedelta
|
|
6
|
-
from typing import Any, Dict, Optional, Set
|
|
6
|
+
from typing import Any, Dict, List, Optional, Set
|
|
7
7
|
|
|
8
8
|
from httpx import AsyncClient
|
|
9
9
|
from loguru import logger
|
|
10
10
|
from sqlalchemy import text
|
|
11
|
+
from sqlalchemy.orm import Session
|
|
11
12
|
from sqlalchemy.sql.elements import TextClause
|
|
12
13
|
|
|
13
14
|
from fides.api.common_exceptions import PrivacyRequestNotFound
|
|
@@ -31,6 +32,9 @@ from fides.api.util.cache import (
|
|
|
31
32
|
celery_tasks_in_flight,
|
|
32
33
|
get_async_task_tracking_cache_key,
|
|
33
34
|
get_cache,
|
|
35
|
+
get_privacy_request_retry_count,
|
|
36
|
+
increment_privacy_request_retry_count,
|
|
37
|
+
reset_privacy_request_retry_count,
|
|
34
38
|
)
|
|
35
39
|
from fides.api.util.lock import redis_lock
|
|
36
40
|
from fides.common.api.v1.urn_registry import PRIVACY_REQUESTS, V1_URL_PREFIX
|
|
@@ -350,10 +354,17 @@ def initiate_interrupted_task_requeue_poll() -> None:
|
|
|
350
354
|
|
|
351
355
|
|
|
352
356
|
def get_cached_task_id(entity_id: str) -> Optional[str]:
|
|
353
|
-
"""Gets the cached task ID for a privacy request or request task by ID.
|
|
357
|
+
"""Gets the cached task ID for a privacy request or request task by ID.
|
|
358
|
+
|
|
359
|
+
Raises Exception if cache operations fail, allowing callers to handle cache failures appropriately.
|
|
360
|
+
"""
|
|
354
361
|
cache: FidesopsRedis = get_cache()
|
|
355
|
-
|
|
356
|
-
|
|
362
|
+
try:
|
|
363
|
+
task_id = cache.get(get_async_task_tracking_cache_key(entity_id))
|
|
364
|
+
return task_id
|
|
365
|
+
except Exception as exc:
|
|
366
|
+
logger.error(f"Failed to get cached task ID for entity {entity_id}: {exc}")
|
|
367
|
+
raise
|
|
357
368
|
|
|
358
369
|
|
|
359
370
|
REQUEUE_INTERRUPTED_TASKS_LOCK = "requeue_interrupted_tasks_lock"
|
|
@@ -393,6 +404,115 @@ def _get_task_ids_from_dsr_queue(
|
|
|
393
404
|
return queued_tasks_ids
|
|
394
405
|
|
|
395
406
|
|
|
407
|
+
def _cancel_interrupted_tasks_and_error_privacy_request(
|
|
408
|
+
db: Session, privacy_request: PrivacyRequest, error_message: Optional[str] = None
|
|
409
|
+
) -> None:
|
|
410
|
+
"""
|
|
411
|
+
Cancel all tasks associated with an interrupted privacy request and set the privacy request to error state.
|
|
412
|
+
|
|
413
|
+
This function:
|
|
414
|
+
1. Logs the error message (either provided or default)
|
|
415
|
+
2. Revokes the main privacy request task and all associated request tasks
|
|
416
|
+
3. Sets the privacy request status to error
|
|
417
|
+
4. Creates an error log entry
|
|
418
|
+
|
|
419
|
+
Args:
|
|
420
|
+
db: Database session
|
|
421
|
+
privacy_request: The privacy request to cancel and error
|
|
422
|
+
error_message: Optional error message to log. If not provided, uses default message.
|
|
423
|
+
"""
|
|
424
|
+
if error_message:
|
|
425
|
+
logger.error(error_message)
|
|
426
|
+
else:
|
|
427
|
+
logger.error(
|
|
428
|
+
f"Canceling interrupted tasks and marking privacy request {privacy_request.id} as error"
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
# Cancel all associated Celery tasks
|
|
432
|
+
privacy_request.cancel_celery_tasks()
|
|
433
|
+
|
|
434
|
+
# Set privacy request to error state using the existing method
|
|
435
|
+
try:
|
|
436
|
+
privacy_request.error_processing(db)
|
|
437
|
+
logger.info(
|
|
438
|
+
f"Privacy request {privacy_request.id} marked as error due to task interruption"
|
|
439
|
+
)
|
|
440
|
+
except Exception as exc:
|
|
441
|
+
logger.error(
|
|
442
|
+
f"Failed to mark privacy request {privacy_request.id} as error: {exc}"
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def _handle_privacy_request_requeue(
|
|
447
|
+
db: Session, privacy_request: PrivacyRequest
|
|
448
|
+
) -> None:
|
|
449
|
+
"""Handle retry logic for a privacy request - either requeue or cancel based on retry count."""
|
|
450
|
+
try:
|
|
451
|
+
# Check retry count and either requeue or cancel based on limit
|
|
452
|
+
current_retry_count = get_privacy_request_retry_count(privacy_request.id)
|
|
453
|
+
max_retries = CONFIG.execution.privacy_request_requeue_retry_count
|
|
454
|
+
|
|
455
|
+
if current_retry_count < max_retries:
|
|
456
|
+
# Increment retry count and attempt requeue
|
|
457
|
+
new_retry_count = increment_privacy_request_retry_count(privacy_request.id)
|
|
458
|
+
logger.info(
|
|
459
|
+
f"Requeuing privacy request {privacy_request.id} "
|
|
460
|
+
f"(attempt {new_retry_count}/{max_retries})"
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
from fides.service.privacy_request.privacy_request_service import ( # pylint: disable=cyclic-import
|
|
464
|
+
PrivacyRequestError,
|
|
465
|
+
_requeue_privacy_request,
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
try:
|
|
469
|
+
_requeue_privacy_request(db, privacy_request)
|
|
470
|
+
except PrivacyRequestError as exc:
|
|
471
|
+
# If requeue fails, cancel tasks and set to error state
|
|
472
|
+
_cancel_interrupted_tasks_and_error_privacy_request(
|
|
473
|
+
db, privacy_request, exc.message
|
|
474
|
+
)
|
|
475
|
+
else:
|
|
476
|
+
# Exceeded retry limit, cancel tasks and set to error state
|
|
477
|
+
_cancel_interrupted_tasks_and_error_privacy_request(
|
|
478
|
+
db,
|
|
479
|
+
privacy_request,
|
|
480
|
+
f"Privacy request {privacy_request.id} exceeded max retry attempts "
|
|
481
|
+
f"({max_retries}), canceling tasks and setting to error state",
|
|
482
|
+
)
|
|
483
|
+
# Reset retry count since we're giving up
|
|
484
|
+
reset_privacy_request_retry_count(privacy_request.id)
|
|
485
|
+
|
|
486
|
+
except Exception as cache_exc:
|
|
487
|
+
# If cache operations fail (Redis down, network issues, etc.), fail safe by canceling
|
|
488
|
+
_cancel_interrupted_tasks_and_error_privacy_request(
|
|
489
|
+
db,
|
|
490
|
+
privacy_request,
|
|
491
|
+
f"Cache operation failed for privacy request {privacy_request.id}, "
|
|
492
|
+
f"failing safe by canceling tasks: {cache_exc}",
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def _get_request_task_ids_in_progress(
|
|
497
|
+
db: Session, privacy_request_id: str
|
|
498
|
+
) -> List[str]:
|
|
499
|
+
"""Get the IDs of request tasks that are currently in progress for a privacy request."""
|
|
500
|
+
request_tasks_in_progress = (
|
|
501
|
+
db.query(RequestTask.id)
|
|
502
|
+
.filter(RequestTask.privacy_request_id == privacy_request_id)
|
|
503
|
+
.filter(
|
|
504
|
+
RequestTask.status.in_(
|
|
505
|
+
[
|
|
506
|
+
ExecutionLogStatus.in_processing,
|
|
507
|
+
ExecutionLogStatus.pending,
|
|
508
|
+
]
|
|
509
|
+
)
|
|
510
|
+
)
|
|
511
|
+
.all()
|
|
512
|
+
)
|
|
513
|
+
return [task[0] for task in request_tasks_in_progress]
|
|
514
|
+
|
|
515
|
+
|
|
396
516
|
# pylint: disable=too-many-branches
|
|
397
517
|
@celery_app.task(base=DatabaseTask, bind=True)
|
|
398
518
|
def requeue_interrupted_tasks(self: DatabaseTask) -> None:
|
|
@@ -442,17 +562,40 @@ def requeue_interrupted_tasks(self: DatabaseTask) -> None:
|
|
|
442
562
|
)
|
|
443
563
|
|
|
444
564
|
# Get task IDs from the queue in a memory-efficient way
|
|
445
|
-
|
|
565
|
+
try:
|
|
566
|
+
queued_tasks_ids = _get_task_ids_from_dsr_queue(redis_conn)
|
|
567
|
+
except Exception as queue_exc:
|
|
568
|
+
logger.warning(
|
|
569
|
+
f"Failed to get task IDs from queue, skipping queue state checks: {queue_exc}"
|
|
570
|
+
)
|
|
571
|
+
return
|
|
446
572
|
|
|
447
573
|
# Check each privacy request
|
|
448
574
|
for privacy_request in in_progress_requests:
|
|
449
575
|
should_requeue = False
|
|
450
576
|
logger.debug(f"Checking tasks for privacy request {privacy_request.id}")
|
|
451
577
|
|
|
452
|
-
|
|
578
|
+
try:
|
|
579
|
+
task_id = get_cached_task_id(privacy_request.id)
|
|
580
|
+
except Exception as cache_exc:
|
|
581
|
+
# If we can't get the task ID due to cache failure, fail safe by canceling
|
|
582
|
+
_cancel_interrupted_tasks_and_error_privacy_request(
|
|
583
|
+
db,
|
|
584
|
+
privacy_request,
|
|
585
|
+
f"Cache failure when getting task ID for privacy request {privacy_request.id}, "
|
|
586
|
+
f"failing safe by canceling tasks: {cache_exc}",
|
|
587
|
+
)
|
|
588
|
+
continue
|
|
453
589
|
|
|
454
590
|
# If the task ID is not cached, we can't check if it's running
|
|
591
|
+
# This means the request is stuck - cancel it
|
|
455
592
|
if not task_id:
|
|
593
|
+
_cancel_interrupted_tasks_and_error_privacy_request(
|
|
594
|
+
db,
|
|
595
|
+
privacy_request,
|
|
596
|
+
f"No task ID found for privacy request {privacy_request.id}, "
|
|
597
|
+
f"request is stuck without a running task - canceling",
|
|
598
|
+
)
|
|
456
599
|
continue
|
|
457
600
|
|
|
458
601
|
# Check if the main privacy request task is active
|
|
@@ -470,30 +613,36 @@ def requeue_interrupted_tasks(self: DatabaseTask) -> None:
|
|
|
470
613
|
)
|
|
471
614
|
should_requeue = True
|
|
472
615
|
|
|
473
|
-
|
|
474
|
-
db.
|
|
475
|
-
.filter(RequestTask.privacy_request_id == privacy_request.id)
|
|
476
|
-
.filter(
|
|
477
|
-
RequestTask.status.in_(
|
|
478
|
-
[
|
|
479
|
-
ExecutionLogStatus.in_processing,
|
|
480
|
-
ExecutionLogStatus.pending,
|
|
481
|
-
]
|
|
482
|
-
)
|
|
483
|
-
)
|
|
484
|
-
.all()
|
|
616
|
+
request_task_ids_in_progress = _get_request_task_ids_in_progress(
|
|
617
|
+
db, privacy_request.id
|
|
485
618
|
)
|
|
486
|
-
request_task_ids_in_progress = [
|
|
487
|
-
task[0] for task in request_tasks_in_progress
|
|
488
|
-
]
|
|
489
619
|
|
|
490
620
|
# Check each individual request task
|
|
491
621
|
for request_task_id in request_task_ids_in_progress:
|
|
492
|
-
|
|
622
|
+
try:
|
|
623
|
+
subtask_id = get_cached_task_id(request_task_id)
|
|
624
|
+
except Exception as cache_exc:
|
|
625
|
+
# If we can't get the subtask ID due to cache failure, fail safe by canceling
|
|
626
|
+
_cancel_interrupted_tasks_and_error_privacy_request(
|
|
627
|
+
db,
|
|
628
|
+
privacy_request,
|
|
629
|
+
f"Cache failure when getting subtask ID for request task {request_task_id} "
|
|
630
|
+
f"(privacy request {privacy_request.id}), failing safe by canceling tasks: {cache_exc}",
|
|
631
|
+
)
|
|
632
|
+
should_requeue = False
|
|
633
|
+
break
|
|
493
634
|
|
|
494
635
|
# If the task ID is not cached, we can't check if it's running
|
|
636
|
+
# This means the subtask is stuck - cancel the entire privacy request
|
|
495
637
|
if not subtask_id:
|
|
496
|
-
|
|
638
|
+
_cancel_interrupted_tasks_and_error_privacy_request(
|
|
639
|
+
db,
|
|
640
|
+
privacy_request,
|
|
641
|
+
f"No task ID found for request task {request_task_id} "
|
|
642
|
+
f"(privacy request {privacy_request.id}), subtask is stuck - canceling privacy request",
|
|
643
|
+
)
|
|
644
|
+
should_requeue = False
|
|
645
|
+
break
|
|
497
646
|
|
|
498
647
|
if (
|
|
499
648
|
subtask_id not in queued_tasks_ids
|
|
@@ -507,12 +656,4 @@ def requeue_interrupted_tasks(self: DatabaseTask) -> None:
|
|
|
507
656
|
|
|
508
657
|
# Requeue the privacy request if needed
|
|
509
658
|
if should_requeue:
|
|
510
|
-
|
|
511
|
-
PrivacyRequestError,
|
|
512
|
-
_requeue_privacy_request,
|
|
513
|
-
)
|
|
514
|
-
|
|
515
|
-
try:
|
|
516
|
-
_requeue_privacy_request(db, privacy_request)
|
|
517
|
-
except PrivacyRequestError as exc:
|
|
518
|
-
logger.error(exc.message)
|
|
659
|
+
_handle_privacy_request_requeue(db, privacy_request)
|
|
@@ -36,6 +36,7 @@ from fides.api.tasks import DSR_QUEUE_NAME, DatabaseTask, celery_app
|
|
|
36
36
|
from fides.api.util.cache import cache_task_tracking_key
|
|
37
37
|
from fides.api.util.collection_util import Row
|
|
38
38
|
from fides.api.util.logger_context_utils import LoggerContextKeys, log_context
|
|
39
|
+
from fides.api.util.memory_watchdog import memory_limiter
|
|
39
40
|
|
|
40
41
|
# DSR 3.0 task functions
|
|
41
42
|
|
|
@@ -255,6 +256,7 @@ def queue_downstream_tasks(
|
|
|
255
256
|
|
|
256
257
|
|
|
257
258
|
@celery_app.task(base=DatabaseTask, bind=True)
|
|
259
|
+
@memory_limiter
|
|
258
260
|
@log_context(
|
|
259
261
|
capture_args={
|
|
260
262
|
"privacy_request_id": LoggerContextKeys.privacy_request_id,
|
|
@@ -319,6 +321,7 @@ def run_access_node(
|
|
|
319
321
|
|
|
320
322
|
|
|
321
323
|
@celery_app.task(base=DatabaseTask, bind=True)
|
|
324
|
+
@memory_limiter
|
|
322
325
|
@log_context(
|
|
323
326
|
capture_args={
|
|
324
327
|
"privacy_request_id": LoggerContextKeys.privacy_request_id,
|
|
@@ -391,6 +394,7 @@ def run_erasure_node(
|
|
|
391
394
|
|
|
392
395
|
|
|
393
396
|
@celery_app.task(base=DatabaseTask, bind=True)
|
|
397
|
+
@memory_limiter
|
|
394
398
|
@log_context(
|
|
395
399
|
capture_args={
|
|
396
400
|
"privacy_request_id": LoggerContextKeys.privacy_request_id,
|
fides/api/task/graph_task.py
CHANGED
|
@@ -62,6 +62,7 @@ from fides.api.util.consent_util import (
|
|
|
62
62
|
)
|
|
63
63
|
from fides.api.util.logger import Pii
|
|
64
64
|
from fides.api.util.logger_context_utils import LoggerContextKeys
|
|
65
|
+
from fides.api.util.memory_watchdog import MemoryLimitExceeded
|
|
65
66
|
from fides.api.util.saas_util import FIDESOPS_GROUPED_INPUTS
|
|
66
67
|
from fides.config import CONFIG
|
|
67
68
|
|
|
@@ -71,6 +72,16 @@ EMPTY_REQUEST = PrivacyRequest()
|
|
|
71
72
|
EMPTY_REQUEST_TASK = RequestTask()
|
|
72
73
|
|
|
73
74
|
|
|
75
|
+
def _is_memory_limit_exceeded(exception: BaseException) -> bool:
|
|
76
|
+
"""Check if the exception or any exception in its chain is a MemoryLimitExceeded."""
|
|
77
|
+
current_exception: Optional[BaseException] = exception
|
|
78
|
+
while current_exception:
|
|
79
|
+
if isinstance(current_exception, MemoryLimitExceeded):
|
|
80
|
+
return True
|
|
81
|
+
current_exception = current_exception.__cause__ or current_exception.__context__
|
|
82
|
+
return False
|
|
83
|
+
|
|
84
|
+
|
|
74
85
|
def retry(
|
|
75
86
|
action_type: ActionType,
|
|
76
87
|
default_return: Any,
|
|
@@ -146,7 +157,31 @@ def retry(
|
|
|
146
157
|
self.log_skipped(action_type, exc)
|
|
147
158
|
self.cache_system_status_for_preferences()
|
|
148
159
|
return default_return
|
|
160
|
+
except MemoryLimitExceeded as ex:
|
|
161
|
+
# Hard failure – mark task & downstream as errored and abort.
|
|
162
|
+
logger.error(
|
|
163
|
+
"Memory watchdog exceeded ({}%). Aborting {} {} without retry.",
|
|
164
|
+
ex.memory_percent,
|
|
165
|
+
method_name,
|
|
166
|
+
self.execution_node.address,
|
|
167
|
+
)
|
|
168
|
+
# Persist error status and create execution logs before raising
|
|
169
|
+
self.log_end(action_type, ex)
|
|
170
|
+
self.add_error_status_for_consent_reporting()
|
|
171
|
+
raise
|
|
149
172
|
except BaseException as ex: # pylint: disable=W0703
|
|
173
|
+
# Check if this exception was caused by memory limit exceeded
|
|
174
|
+
if _is_memory_limit_exceeded(ex):
|
|
175
|
+
logger.error(
|
|
176
|
+
"Memory watchdog exceeded (wrapped exception). Aborting {} {} without retry.",
|
|
177
|
+
method_name,
|
|
178
|
+
self.execution_node.address,
|
|
179
|
+
)
|
|
180
|
+
# Persist error status and create execution logs before raising
|
|
181
|
+
self.log_end(action_type, ex)
|
|
182
|
+
self.add_error_status_for_consent_reporting()
|
|
183
|
+
raise
|
|
184
|
+
|
|
150
185
|
traceback.print_exc()
|
|
151
186
|
func_delay *= CONFIG.execution.task_retry_backoff
|
|
152
187
|
logger.warning(
|
|
@@ -555,12 +590,21 @@ class GraphTask(ABC): # pylint: disable=too-many-instance-attributes
|
|
|
555
590
|
# For access request results, mutate rows in-place to remove non-matching
|
|
556
591
|
# array elements. We already iterated over `output` above, so reuse the same
|
|
557
592
|
# loop structure to keep cache locality.
|
|
593
|
+
logger.info(
|
|
594
|
+
"Filtering {} rows in {} for matching array elements.",
|
|
595
|
+
len(output),
|
|
596
|
+
self.execution_node.address,
|
|
597
|
+
)
|
|
558
598
|
for row in output:
|
|
599
|
+
filter_element_match(row, post_processed_node_input_data)
|
|
600
|
+
|
|
601
|
+
if len(output) > 0:
|
|
559
602
|
logger.info(
|
|
560
|
-
"Filtering
|
|
603
|
+
"Filtering completed for {} rows in {}. Post-processed node size: {}",
|
|
604
|
+
len(output),
|
|
561
605
|
self.execution_node.address,
|
|
606
|
+
len(post_processed_node_input_data),
|
|
562
607
|
)
|
|
563
|
-
filter_element_match(row, post_processed_node_input_data)
|
|
564
608
|
|
|
565
609
|
if self.request_task.id:
|
|
566
610
|
# Saves intermediate access results for DSR 3.0 directly on the Request Task
|
fides/api/util/cache.py
CHANGED
|
@@ -334,6 +334,62 @@ def cache_task_tracking_key(request_id: str, celery_task_id: str) -> None:
|
|
|
334
334
|
)
|
|
335
335
|
|
|
336
336
|
|
|
337
|
+
def get_privacy_request_retry_cache_key(privacy_request_id: str) -> str:
|
|
338
|
+
"""Get cache key for tracking privacy request requeue retry attempts."""
|
|
339
|
+
return f"id-{privacy_request_id}-privacy-request-retry-count"
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def get_privacy_request_retry_count(privacy_request_id: str) -> int:
|
|
343
|
+
"""Get the current retry count for a privacy request requeue attempts.
|
|
344
|
+
|
|
345
|
+
Raises Exception if cache operations fail, allowing callers to handle cache failures appropriately.
|
|
346
|
+
"""
|
|
347
|
+
cache: FidesopsRedis = get_cache()
|
|
348
|
+
try:
|
|
349
|
+
retry_count = cache.get(get_privacy_request_retry_cache_key(privacy_request_id))
|
|
350
|
+
return int(retry_count) if retry_count else 0
|
|
351
|
+
except Exception as exc:
|
|
352
|
+
logger.error(
|
|
353
|
+
f"Failed to get retry count for privacy request {privacy_request_id}: {exc}"
|
|
354
|
+
)
|
|
355
|
+
raise
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def increment_privacy_request_retry_count(privacy_request_id: str) -> int:
|
|
359
|
+
"""Increment and return the retry count for a privacy request requeue attempts.
|
|
360
|
+
|
|
361
|
+
Raises Exception if cache operations fail, allowing callers to handle cache failures appropriately.
|
|
362
|
+
"""
|
|
363
|
+
cache: FidesopsRedis = get_cache()
|
|
364
|
+
cache_key = get_privacy_request_retry_cache_key(privacy_request_id)
|
|
365
|
+
|
|
366
|
+
try:
|
|
367
|
+
# Increment the counter, will be 1 if key doesn't exist
|
|
368
|
+
new_count = cache.incr(cache_key)
|
|
369
|
+
# Set expiry to prevent cache buildup (24 hours)
|
|
370
|
+
cache.expire(cache_key, 86400)
|
|
371
|
+
return new_count
|
|
372
|
+
except Exception as exc:
|
|
373
|
+
logger.error(
|
|
374
|
+
f"Failed to increment retry count for privacy request {privacy_request_id}: {exc}"
|
|
375
|
+
)
|
|
376
|
+
raise
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def reset_privacy_request_retry_count(privacy_request_id: str) -> None:
|
|
380
|
+
"""Reset the retry count for a privacy request requeue attempts.
|
|
381
|
+
|
|
382
|
+
Silently fails if cache operations fail since this is cleanup.
|
|
383
|
+
"""
|
|
384
|
+
cache: FidesopsRedis = get_cache()
|
|
385
|
+
try:
|
|
386
|
+
cache.delete(get_privacy_request_retry_cache_key(privacy_request_id))
|
|
387
|
+
except Exception as exc:
|
|
388
|
+
logger.warning(
|
|
389
|
+
f"Failed to reset retry count for privacy request {privacy_request_id}: {exc}"
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
|
|
337
393
|
def celery_tasks_in_flight(celery_task_ids: List[str]) -> bool:
|
|
338
394
|
"""Returns True if supplied Celery Tasks appear to be in-flight"""
|
|
339
395
|
if not celery_task_ids:
|