ethyca-fides 2.63.1b1__py2.py3-none-any.whl → 2.63.1b4__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ethyca_fides-2.63.1b1.dist-info → ethyca_fides-2.63.1b4.dist-info}/METADATA +1 -1
- {ethyca_fides-2.63.1b1.dist-info → ethyca_fides-2.63.1b4.dist-info}/RECORD +139 -120
- fides/_version.py +3 -3
- fides/api/alembic/migrations/versions/29e56fa1fdb3_add_monitor_tasks.py +147 -0
- fides/api/alembic/migrations/versions/5efcdf18438e_add_manual_task_tables.py +160 -0
- fides/api/api/v1/endpoints/privacy_request_endpoints.py +4 -4
- fides/api/db/base.py +7 -1
- fides/api/models/connectionconfig.py +1 -1
- fides/api/models/detection_discovery/__init__.py +35 -0
- fides/api/models/detection_discovery/monitor_task.py +161 -0
- fides/api/models/field_types/__init__.py +5 -0
- fides/api/models/field_types/encrypted_large_data.py +151 -0
- fides/api/models/manual_tasks/__init__.py +8 -0
- fides/api/models/manual_tasks/manual_task.py +110 -0
- fides/api/models/manual_tasks/manual_task_log.py +100 -0
- fides/api/models/privacy_preference.py +1 -1
- fides/api/models/privacy_request/execution_log.py +3 -31
- fides/api/models/privacy_request/privacy_request.py +16 -3
- fides/api/models/privacy_request/request_task.py +36 -25
- fides/api/models/worker_task.py +96 -0
- fides/api/schemas/external_storage.py +22 -0
- fides/api/schemas/manual_tasks/__init__.py +0 -0
- fides/api/schemas/manual_tasks/manual_task_schemas.py +79 -0
- fides/api/schemas/manual_tasks/manual_task_status.py +151 -0
- fides/api/schemas/privacy_request.py +1 -12
- fides/api/service/connectors/base_erasure_email_connector.py +1 -1
- fides/api/service/connectors/consent_email_connector.py +2 -1
- fides/api/service/connectors/dynamic_erasure_email_connector.py +2 -1
- fides/api/service/connectors/erasure_email_connector.py +1 -1
- fides/api/service/external_data_storage.py +371 -0
- fides/api/service/privacy_request/request_runner_service.py +5 -5
- fides/api/service/privacy_request/request_service.py +1 -1
- fides/api/task/create_request_tasks.py +1 -1
- fides/api/task/execute_request_tasks.py +9 -8
- fides/api/task/graph_task.py +22 -10
- fides/api/util/cache.py +77 -1
- fides/api/util/consent_util.py +1 -1
- fides/api/util/data_size.py +102 -0
- fides/api/util/encryption/aes_gcm_encryption_util.py +271 -0
- fides/config/redis_settings.py +99 -8
- fides/service/manual_tasks/__init__.py +0 -0
- fides/service/manual_tasks/manual_task_service.py +150 -0
- fides/service/privacy_request/privacy_request_service.py +1 -1
- fides/ui-build/static/admin/404.html +1 -1
- fides/ui-build/static/admin/_next/static/{74KgkHM2cEVIXGgJPlTZ3 → X2nvWLg2_-vsCTkhSWpzw}/_buildManifest.js +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/messaging-c583a61302f02add.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/storage-20d20a8d1736f7c4.js +1 -0
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests-0e557d79e1e43c2b.js +1 -0
- fides/ui-build/static/admin/add-systems/manual.html +1 -1
- fides/ui-build/static/admin/add-systems/multiple.html +1 -1
- fides/ui-build/static/admin/add-systems.html +1 -1
- fides/ui-build/static/admin/consent/configure/add-vendors.html +1 -1
- fides/ui-build/static/admin/consent/configure.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-experience.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/[id].html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices/new.html +1 -1
- fides/ui-build/static/admin/consent/privacy-notices.html +1 -1
- fides/ui-build/static/admin/consent/properties.html +1 -1
- fides/ui-build/static/admin/consent/reporting.html +1 -1
- fides/ui-build/static/admin/consent.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn]/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects/[projectUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/projects.html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-catalog/[systemId]/resources.html +1 -1
- fides/ui-build/static/admin/data-catalog.html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId]/[systemId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center/[monitorId].html +1 -1
- fides/ui-build/static/admin/data-discovery/action-center.html +1 -1
- fides/ui-build/static/admin/data-discovery/activity.html +1 -1
- fides/ui-build/static/admin/data-discovery/detection/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/detection.html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery/[resourceUrn].html +1 -1
- fides/ui-build/static/admin/data-discovery/discovery.html +1 -1
- fides/ui-build/static/admin/datamap.html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName]/[...subfieldNames].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId]/[collectionName].html +1 -1
- fides/ui-build/static/admin/dataset/[datasetId].html +1 -1
- fides/ui-build/static/admin/dataset/new.html +1 -1
- fides/ui-build/static/admin/dataset.html +1 -1
- fides/ui-build/static/admin/datastore-connection/[id].html +1 -1
- fides/ui-build/static/admin/datastore-connection/new.html +1 -1
- fides/ui-build/static/admin/datastore-connection.html +1 -1
- fides/ui-build/static/admin/index.html +1 -1
- fides/ui-build/static/admin/integrations/[id].html +1 -1
- fides/ui-build/static/admin/integrations.html +1 -1
- fides/ui-build/static/admin/lib/fides-ext-gpp.js +1 -1
- fides/ui-build/static/admin/lib/fides-headless.js +1 -1
- fides/ui-build/static/admin/lib/fides-preview.js +1 -1
- fides/ui-build/static/admin/lib/fides-tcf.js +2 -2
- fides/ui-build/static/admin/lib/fides.js +2 -2
- fides/ui-build/static/admin/login/[provider].html +1 -1
- fides/ui-build/static/admin/login.html +1 -1
- fides/ui-build/static/admin/messaging/[id].html +1 -1
- fides/ui-build/static/admin/messaging/add-template.html +1 -1
- fides/ui-build/static/admin/messaging.html +1 -1
- fides/ui-build/static/admin/poc/ant-components.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/AntForm.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikAntFormItem.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikControlled.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikField.html +1 -1
- fides/ui-build/static/admin/poc/form-experiments/FormikSpreadField.html +1 -1
- fides/ui-build/static/admin/poc/forms.html +1 -1
- fides/ui-build/static/admin/poc/table-migration.html +1 -1
- fides/ui-build/static/admin/privacy-requests/[id].html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/messaging.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure/storage.html +1 -1
- fides/ui-build/static/admin/privacy-requests/configure.html +1 -1
- fides/ui-build/static/admin/privacy-requests.html +1 -1
- fides/ui-build/static/admin/properties/[id].html +1 -1
- fides/ui-build/static/admin/properties/add-property.html +1 -1
- fides/ui-build/static/admin/properties.html +1 -1
- fides/ui-build/static/admin/reporting/datamap.html +1 -1
- fides/ui-build/static/admin/settings/about/alpha.html +1 -1
- fides/ui-build/static/admin/settings/about.html +1 -1
- fides/ui-build/static/admin/settings/consent/[configuration_id]/[purpose_id].html +1 -1
- fides/ui-build/static/admin/settings/consent.html +1 -1
- fides/ui-build/static/admin/settings/custom-fields.html +1 -1
- fides/ui-build/static/admin/settings/domain-records.html +1 -1
- fides/ui-build/static/admin/settings/domains.html +1 -1
- fides/ui-build/static/admin/settings/email-templates.html +1 -1
- fides/ui-build/static/admin/settings/locations.html +1 -1
- fides/ui-build/static/admin/settings/organization.html +1 -1
- fides/ui-build/static/admin/settings/regulations.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id]/test-datasets.html +1 -1
- fides/ui-build/static/admin/systems/configure/[id].html +1 -1
- fides/ui-build/static/admin/systems.html +1 -1
- fides/ui-build/static/admin/taxonomy.html +1 -1
- fides/ui-build/static/admin/user-management/new.html +1 -1
- fides/ui-build/static/admin/user-management/profile/[id].html +1 -1
- fides/ui-build/static/admin/user-management.html +1 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/messaging-8cab04871908cfeb.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests/configure/storage-150d40428245ee0c.js +0 -1
- fides/ui-build/static/admin/_next/static/chunks/pages/privacy-requests-20cdb2c8a03deae1.js +0 -1
- {ethyca_fides-2.63.1b1.dist-info → ethyca_fides-2.63.1b4.dist-info}/WHEEL +0 -0
- {ethyca_fides-2.63.1b1.dist-info → ethyca_fides-2.63.1b4.dist-info}/entry_points.txt +0 -0
- {ethyca_fides-2.63.1b1.dist-info → ethyca_fides-2.63.1b4.dist-info}/licenses/LICENSE +0 -0
- {ethyca_fides-2.63.1b1.dist-info → ethyca_fides-2.63.1b4.dist-info}/top_level.txt +0 -0
- /fides/api/models/{detection_discovery.py → detection_discovery/core.py} +0 -0
- /fides/ui-build/static/admin/_next/static/{74KgkHM2cEVIXGgJPlTZ3 → X2nvWLg2_-vsCTkhSWpzw}/_ssgManifest.js +0 -0
@@ -0,0 +1,102 @@
|
|
1
|
+
"""
|
2
|
+
Helpers for estimating the size of large collections of access data.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from __future__ import annotations
|
6
|
+
|
7
|
+
import json
|
8
|
+
import sys
|
9
|
+
from typing import List, Optional
|
10
|
+
|
11
|
+
from loguru import logger
|
12
|
+
|
13
|
+
from fides.api.util.collection_util import Row
|
14
|
+
from fides.api.util.custom_json_encoder import CustomJSONEncoder
|
15
|
+
|
16
|
+
# 640MB threshold for external storage
|
17
|
+
# We only generate an estimated size for large datasets so we want to be conservative
|
18
|
+
# and fallback to external storage even if we haven't hit the 1GB max limit.
|
19
|
+
# We also want to pad for encryption and base64 encoding.
|
20
|
+
LARGE_DATA_THRESHOLD_BYTES = 640 * 1024 * 1024 # 640MB
|
21
|
+
|
22
|
+
|
23
|
+
def calculate_data_size(data: List[Row]) -> int: # noqa: D401 – utility function
|
24
|
+
"""Return an approximate JSON-serialized size (in bytes) for a list of *Row*.
|
25
|
+
|
26
|
+
The implementation purposefully avoids serializing the entire payload when
|
27
|
+
*data* is large. For collections >1000 rows we sample a subset, measure the
|
28
|
+
encoded size, then extrapolate. This keeps memory usage bounded while still
|
29
|
+
giving us an order-of-magnitude estimate suitable for "should I stream this
|
30
|
+
out to S3?" decisions.
|
31
|
+
"""
|
32
|
+
|
33
|
+
if not data:
|
34
|
+
return 0
|
35
|
+
|
36
|
+
try:
|
37
|
+
data_count = len(data)
|
38
|
+
|
39
|
+
# For very large datasets, estimate size from a sample to avoid memory issues
|
40
|
+
if data_count > 1000:
|
41
|
+
logger.debug(
|
42
|
+
f"Calculating size for large dataset ({data_count} rows) using sampling"
|
43
|
+
)
|
44
|
+
|
45
|
+
sample_size = min(500, max(100, data_count // 20)) # 5 % capped at 500
|
46
|
+
|
47
|
+
# stratified sampling – take items spaced across the set when possible
|
48
|
+
if data_count > sample_size * 3:
|
49
|
+
step = data_count // sample_size
|
50
|
+
sample_indices = list(range(0, data_count, step))[:sample_size]
|
51
|
+
sample = [data[i] for i in sample_indices]
|
52
|
+
else:
|
53
|
+
sample = data[:sample_size]
|
54
|
+
|
55
|
+
sample_json = json.dumps(
|
56
|
+
sample, cls=CustomJSONEncoder, separators=(",", ":")
|
57
|
+
)
|
58
|
+
sample_bytes = len(sample_json.encode("utf-8"))
|
59
|
+
|
60
|
+
avg_record_size = sample_bytes / sample_size
|
61
|
+
content_size = int(avg_record_size * data_count)
|
62
|
+
|
63
|
+
# overhead: 2 bytes for [] plus a comma between every record plus 1 % slack
|
64
|
+
structure_overhead = 2 + (data_count - 1) + int(content_size * 0.01)
|
65
|
+
return content_size + structure_overhead
|
66
|
+
|
67
|
+
# small datasets – just measure
|
68
|
+
json_str = json.dumps(data, cls=CustomJSONEncoder, separators=(",", ":"))
|
69
|
+
return len(json_str.encode("utf-8"))
|
70
|
+
|
71
|
+
except (TypeError, ValueError) as exc:
|
72
|
+
logger.warning(
|
73
|
+
f"Failed to calculate JSON size, falling back to sys.getsizeof: {exc}"
|
74
|
+
)
|
75
|
+
return sys.getsizeof(data)
|
76
|
+
|
77
|
+
|
78
|
+
def is_large_data(
|
79
|
+
data: List[Row], threshold_bytes: Optional[int] = None
|
80
|
+
) -> bool: # noqa: D401
|
81
|
+
"""Return *True* if *data* is likely to exceed *threshold_bytes* when serialized."""
|
82
|
+
|
83
|
+
if not data:
|
84
|
+
return False
|
85
|
+
|
86
|
+
threshold = (
|
87
|
+
threshold_bytes if threshold_bytes is not None else LARGE_DATA_THRESHOLD_BYTES
|
88
|
+
)
|
89
|
+
size = calculate_data_size(data)
|
90
|
+
if size > threshold:
|
91
|
+
logger.info(
|
92
|
+
f"Data size ({size:,} bytes) exceeds threshold ({threshold:,} bytes) – using external storage"
|
93
|
+
)
|
94
|
+
return True
|
95
|
+
return False
|
96
|
+
|
97
|
+
|
98
|
+
__all__ = [
|
99
|
+
"calculate_data_size",
|
100
|
+
"is_large_data",
|
101
|
+
"LARGE_DATA_THRESHOLD_BYTES",
|
102
|
+
]
|
@@ -0,0 +1,271 @@
|
|
1
|
+
"""
|
2
|
+
AES GCM encryption utilities with SQLAlchemy-Utils and cryptography library implementations.
|
3
|
+
|
4
|
+
This module provides simplified encrypt/decrypt functions using two approaches:
|
5
|
+
1. SQLAlchemy-Utils AesGcmEngine (compatible with existing database encryption)
|
6
|
+
2. Cryptography library with chunked processing (better performance, standard library)
|
7
|
+
"""
|
8
|
+
|
9
|
+
import base64
|
10
|
+
import hashlib
|
11
|
+
import json
|
12
|
+
import os
|
13
|
+
from typing import Any, List, Optional, Union
|
14
|
+
|
15
|
+
from cryptography.hazmat.backends import default_backend
|
16
|
+
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
17
|
+
from loguru import logger
|
18
|
+
from sqlalchemy_utils.types.encrypted.encrypted_type import AesGcmEngine
|
19
|
+
|
20
|
+
from fides.api.util.collection_util import Row
|
21
|
+
from fides.api.util.custom_json_encoder import CustomJSONEncoder, _custom_decoder
|
22
|
+
from fides.config import CONFIG
|
23
|
+
|
24
|
+
|
25
|
+
class EncryptionError(Exception):
|
26
|
+
"""Raised when encryption/decryption operations fail"""
|
27
|
+
|
28
|
+
|
29
|
+
# SQLAlchemy-Utils Implementation (for compatibility with existing database encryption)
|
30
|
+
def encrypt_with_sqlalchemy_utils(data: List[Row]) -> bytes:
|
31
|
+
"""
|
32
|
+
Serialize and encrypt data using CustomJSONEncoder and SQLAlchemy-Utils AesGcmEngine.
|
33
|
+
|
34
|
+
This approach is compatible with existing database encryption but has lower performance.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
data: Raw data to serialize and encrypt
|
38
|
+
|
39
|
+
Returns:
|
40
|
+
Encrypted bytes
|
41
|
+
|
42
|
+
Raises:
|
43
|
+
EncryptionError: If serialization or encryption fails
|
44
|
+
"""
|
45
|
+
try:
|
46
|
+
# Serialize using CustomJSONEncoder for consistent ObjectId handling
|
47
|
+
serialized_data = json.dumps(data, cls=CustomJSONEncoder, separators=(",", ":"))
|
48
|
+
data_bytes = serialized_data.encode("utf-8")
|
49
|
+
|
50
|
+
# Encrypt using SQLAlchemy-Utils AesGcmEngine
|
51
|
+
engine = AesGcmEngine()
|
52
|
+
key = CONFIG.security.app_encryption_key
|
53
|
+
engine._update_key(key) # pylint: disable=protected-access
|
54
|
+
|
55
|
+
# AesGcmEngine expects string input
|
56
|
+
data_str = data_bytes.decode("utf-8")
|
57
|
+
encrypted_data = engine.encrypt(data_str)
|
58
|
+
encrypted_bytes = encrypted_data.encode("utf-8")
|
59
|
+
|
60
|
+
logger.debug(
|
61
|
+
f"SQLAlchemy-Utils: Encrypted {len(data_bytes)} bytes to {len(encrypted_bytes)} bytes"
|
62
|
+
)
|
63
|
+
return encrypted_bytes
|
64
|
+
|
65
|
+
except Exception as e:
|
66
|
+
logger.error(f"SQLAlchemy-Utils encryption failed: {e}")
|
67
|
+
raise EncryptionError(f"SQLAlchemy-Utils encryption failed: {str(e)}")
|
68
|
+
|
69
|
+
|
70
|
+
def decrypt_with_sqlalchemy_utils(encrypted_bytes: bytes) -> List[Row]:
|
71
|
+
"""
|
72
|
+
Decrypt and deserialize data using SQLAlchemy-Utils AesGcmEngine and _custom_decoder.
|
73
|
+
|
74
|
+
Args:
|
75
|
+
encrypted_bytes: Encrypted data bytes to decrypt
|
76
|
+
|
77
|
+
Returns:
|
78
|
+
Deserialized data
|
79
|
+
|
80
|
+
Raises:
|
81
|
+
EncryptionError: If decryption or deserialization fails
|
82
|
+
"""
|
83
|
+
try:
|
84
|
+
# Decrypt using SQLAlchemy-Utils AesGcmEngine
|
85
|
+
engine = AesGcmEngine()
|
86
|
+
key = CONFIG.security.app_encryption_key
|
87
|
+
engine._update_key(key) # pylint: disable=protected-access
|
88
|
+
|
89
|
+
# AesGcmEngine expects string input
|
90
|
+
encrypted_str = encrypted_bytes.decode("utf-8")
|
91
|
+
decrypted_data = engine.decrypt(encrypted_str)
|
92
|
+
|
93
|
+
# Deserialize using _custom_decoder for consistent ObjectId handling
|
94
|
+
data = json.loads(decrypted_data, object_hook=_custom_decoder)
|
95
|
+
|
96
|
+
logger.debug(
|
97
|
+
f"SQLAlchemy-Utils: Decrypted {len(encrypted_bytes)} bytes to {len(data)} records"
|
98
|
+
)
|
99
|
+
return data
|
100
|
+
|
101
|
+
except Exception as e:
|
102
|
+
logger.error(f"SQLAlchemy-Utils decryption failed: {e}")
|
103
|
+
raise EncryptionError(f"SQLAlchemy-Utils decryption failed: {str(e)}")
|
104
|
+
|
105
|
+
|
106
|
+
# Cryptography Library Implementation (standard, chunked processing)
|
107
|
+
def encrypt_with_cryptography(
|
108
|
+
data: Union[List[Row], Any], chunk_size: Optional[int] = None
|
109
|
+
) -> bytes:
|
110
|
+
"""
|
111
|
+
Serialize and encrypt data using the standard cryptography library with chunked processing.
|
112
|
+
|
113
|
+
This provides fast performance and memory efficiency for large datasets.
|
114
|
+
|
115
|
+
Args:
|
116
|
+
data: Raw data to serialize and encrypt
|
117
|
+
chunk_size: Size of chunks for processing (default 4MB)
|
118
|
+
|
119
|
+
Returns:
|
120
|
+
Encrypted bytes (base64-encoded string as bytes)
|
121
|
+
|
122
|
+
Raises:
|
123
|
+
EncryptionError: If serialization or encryption fails
|
124
|
+
"""
|
125
|
+
try:
|
126
|
+
# Set default chunk size
|
127
|
+
if chunk_size is None:
|
128
|
+
chunk_size = 4 * 1024 * 1024 # 4MB chunks
|
129
|
+
|
130
|
+
# Serialize using CustomJSONEncoder for consistent handling
|
131
|
+
serialized_data = json.dumps(data, cls=CustomJSONEncoder, separators=(",", ":"))
|
132
|
+
plaintext = serialized_data.encode("utf-8")
|
133
|
+
|
134
|
+
data_size_mb = len(plaintext) / (1024 * 1024)
|
135
|
+
chunk_size_mb = chunk_size / (1024 * 1024)
|
136
|
+
estimated_chunks = len(plaintext) // chunk_size + (
|
137
|
+
1 if len(plaintext) % chunk_size else 0
|
138
|
+
)
|
139
|
+
record_count = len(data) if isinstance(data, list) else "N/A"
|
140
|
+
|
141
|
+
logger.info(
|
142
|
+
f"Cryptography: Encrypting {record_count} records ({data_size_mb:.1f} MB) "
|
143
|
+
f"using {chunk_size_mb:.0f}MB chunks (~{estimated_chunks} chunks)"
|
144
|
+
)
|
145
|
+
|
146
|
+
# Use SQLAlchemy-Utils compatible key (SHA256 hash of app key)
|
147
|
+
key = _get_sqlalchemy_compatible_key()
|
148
|
+
nonce = os.urandom(12) # 96-bit nonce for AES-GCM
|
149
|
+
|
150
|
+
# Create cipher
|
151
|
+
cipher = Cipher(
|
152
|
+
algorithms.AES(key), modes.GCM(nonce), backend=default_backend()
|
153
|
+
)
|
154
|
+
encryptor = cipher.encryptor()
|
155
|
+
|
156
|
+
# Process in chunks for memory efficiency
|
157
|
+
ciphertext_chunks = []
|
158
|
+
for i in range(0, len(plaintext), chunk_size):
|
159
|
+
chunk = plaintext[i : i + chunk_size]
|
160
|
+
ciphertext_chunks.append(encryptor.update(chunk))
|
161
|
+
|
162
|
+
# Finalize and get tag
|
163
|
+
encryptor.finalize()
|
164
|
+
tag = encryptor.tag
|
165
|
+
|
166
|
+
# Combine in same format as SQLAlchemy-Utils: [nonce/iv][tag][ciphertext]
|
167
|
+
ciphertext = b"".join(ciphertext_chunks)
|
168
|
+
binary_result = nonce + tag + ciphertext
|
169
|
+
|
170
|
+
# Base64 encode to match SQLAlchemy-Utils format
|
171
|
+
base64_result = base64.b64encode(binary_result).decode("utf-8")
|
172
|
+
result_bytes = base64_result.encode("utf-8")
|
173
|
+
|
174
|
+
encrypted_size_mb = len(result_bytes) / (1024 * 1024)
|
175
|
+
logger.info(
|
176
|
+
f"Cryptography: Encrypted successfully - "
|
177
|
+
f"{len(ciphertext_chunks)} chunks, {encrypted_size_mb:.1f} MB output (base64)"
|
178
|
+
)
|
179
|
+
|
180
|
+
return result_bytes
|
181
|
+
|
182
|
+
except Exception as e:
|
183
|
+
logger.error(f"Cryptography encryption failed: {e}")
|
184
|
+
raise EncryptionError(f"Cryptography encryption failed: {str(e)}")
|
185
|
+
|
186
|
+
|
187
|
+
def decrypt_with_cryptography(
|
188
|
+
encrypted_bytes: bytes, chunk_size: Optional[int] = None
|
189
|
+
) -> Union[List[Row], Any]:
|
190
|
+
"""
|
191
|
+
Decrypt and deserialize data using the cryptography library with chunked processing.
|
192
|
+
|
193
|
+
Args:
|
194
|
+
encrypted_bytes: Encrypted data (base64-encoded string as bytes)
|
195
|
+
chunk_size: Size of chunks for processing (default 4MB)
|
196
|
+
|
197
|
+
Returns:
|
198
|
+
Deserialized data
|
199
|
+
|
200
|
+
Raises:
|
201
|
+
EncryptionError: If decryption or deserialization fails
|
202
|
+
"""
|
203
|
+
try:
|
204
|
+
# Set default chunk size
|
205
|
+
if chunk_size is None:
|
206
|
+
chunk_size = 4 * 1024 * 1024 # 4MB chunks
|
207
|
+
|
208
|
+
# Decode from base64
|
209
|
+
encrypted_str = encrypted_bytes.decode("utf-8")
|
210
|
+
binary_data = base64.b64decode(encrypted_str)
|
211
|
+
|
212
|
+
# Extract components in SQLAlchemy-Utils format: [nonce/iv][tag][ciphertext]
|
213
|
+
if len(binary_data) < 28: # 12 (nonce) + 16 (tag)
|
214
|
+
raise ValueError("Encrypted data too short")
|
215
|
+
|
216
|
+
nonce = binary_data[:12] # First 12 bytes: nonce/IV
|
217
|
+
tag = binary_data[12:28] # Next 16 bytes: tag
|
218
|
+
ciphertext = binary_data[28:] # Remaining bytes: ciphertext
|
219
|
+
|
220
|
+
encrypted_size_mb = len(encrypted_bytes) / (1024 * 1024)
|
221
|
+
chunk_size_mb = chunk_size / (1024 * 1024)
|
222
|
+
estimated_chunks = len(ciphertext) // chunk_size + (
|
223
|
+
1 if len(ciphertext) % chunk_size else 0
|
224
|
+
)
|
225
|
+
|
226
|
+
logger.info(
|
227
|
+
f"Cryptography: Decrypting {encrypted_size_mb:.1f} MB "
|
228
|
+
f"using {chunk_size_mb:.0f}MB chunks (~{estimated_chunks} chunks)"
|
229
|
+
)
|
230
|
+
|
231
|
+
# Use SQLAlchemy-Utils compatible key
|
232
|
+
key = _get_sqlalchemy_compatible_key()
|
233
|
+
cipher = Cipher(
|
234
|
+
algorithms.AES(key), modes.GCM(nonce, tag), backend=default_backend()
|
235
|
+
)
|
236
|
+
decryptor = cipher.decryptor()
|
237
|
+
|
238
|
+
# Process in chunks for memory efficiency
|
239
|
+
plaintext_chunks = []
|
240
|
+
for i in range(0, len(ciphertext), chunk_size):
|
241
|
+
chunk = ciphertext[i : i + chunk_size]
|
242
|
+
plaintext_chunks.append(decryptor.update(chunk))
|
243
|
+
|
244
|
+
# Finalize
|
245
|
+
decryptor.finalize()
|
246
|
+
|
247
|
+
# Combine and deserialize
|
248
|
+
plaintext = b"".join(plaintext_chunks)
|
249
|
+
decrypted_json = plaintext.decode("utf-8")
|
250
|
+
data = json.loads(decrypted_json, object_hook=_custom_decoder)
|
251
|
+
|
252
|
+
record_count = len(data) if isinstance(data, list) else "N/A"
|
253
|
+
logger.info(f"Cryptography: Successfully decrypted {record_count} records")
|
254
|
+
|
255
|
+
return data
|
256
|
+
|
257
|
+
except Exception as e:
|
258
|
+
logger.error(f"Cryptography decryption failed: {e}")
|
259
|
+
raise EncryptionError(f"Cryptography decryption failed: {str(e)}")
|
260
|
+
|
261
|
+
|
262
|
+
def _get_sqlalchemy_compatible_key() -> bytes:
|
263
|
+
"""Get 32-byte encryption key compatible with SQLAlchemy-Utils AesGcmEngine."""
|
264
|
+
app_key = CONFIG.security.app_encryption_key.encode(CONFIG.security.encoding)
|
265
|
+
# SQLAlchemy-Utils always uses SHA256 hash of the key
|
266
|
+
return hashlib.sha256(app_key).digest()
|
267
|
+
|
268
|
+
|
269
|
+
# Public API - Use cryptography by default for new operations
|
270
|
+
encrypt_data = encrypt_with_cryptography
|
271
|
+
decrypt_data = decrypt_with_cryptography
|
fides/config/redis_settings.py
CHANGED
@@ -20,6 +20,10 @@ class RedisSettings(FidesSettings):
|
|
20
20
|
default=0,
|
21
21
|
description="The application will use this index in the Redis cache to cache data.",
|
22
22
|
)
|
23
|
+
test_db_index: int = Field(
|
24
|
+
default=1,
|
25
|
+
description="The application will use this index in the Redis cache to cache data for testing.",
|
26
|
+
)
|
23
27
|
decode_responses: bool = Field(
|
24
28
|
default=True,
|
25
29
|
description="Whether or not to automatically decode the values fetched from Redis. Decodes using the `charset` configuration value.",
|
@@ -64,14 +68,57 @@ class RedisSettings(FidesSettings):
|
|
64
68
|
default="", description="The user with which to login to the Redis cache."
|
65
69
|
)
|
66
70
|
|
71
|
+
# Read-only Redis settings
|
72
|
+
read_only_enabled: bool = Field(
|
73
|
+
default=False,
|
74
|
+
description="Whether a read-only Redis cache is enabled.",
|
75
|
+
)
|
76
|
+
read_only_host: str = Field(
|
77
|
+
default="",
|
78
|
+
description="The network address for the read-only Redis cache.",
|
79
|
+
)
|
80
|
+
read_only_port: int = Field(
|
81
|
+
default=6379,
|
82
|
+
description="The port at which the read-only Redis cache will be accessible.",
|
83
|
+
)
|
84
|
+
read_only_user: str = Field(
|
85
|
+
default="",
|
86
|
+
description="The user with which to login to the read-only Redis cache.",
|
87
|
+
)
|
88
|
+
read_only_password: str = Field(
|
89
|
+
default="",
|
90
|
+
description="The password with which to login to the read-only Redis cache.",
|
91
|
+
)
|
92
|
+
read_only_db_index: int = Field(
|
93
|
+
default=0,
|
94
|
+
description="The application will use this index in the read-only Redis cache to cache data.",
|
95
|
+
)
|
96
|
+
read_only_ssl: bool = Field(
|
97
|
+
default=False,
|
98
|
+
description="Whether the application's connections to the read-only cache should be encrypted using TLS.",
|
99
|
+
)
|
100
|
+
read_only_ssl_cert_reqs: Optional[str] = Field(
|
101
|
+
default="required",
|
102
|
+
description="If using TLS encryption, set this to 'required' if you wish to enforce the read-only Redis cache to provide a certificate. Note that not all cache providers support this without setting ssl_ca_certs (e.g. AWS Elasticache).",
|
103
|
+
)
|
104
|
+
read_only_ssl_ca_certs: str = Field(
|
105
|
+
default="",
|
106
|
+
description="If using TLS encryption rooted with a custom Certificate Authority, set this to the path of the CA certificate.",
|
107
|
+
)
|
108
|
+
|
67
109
|
# This relies on other values to get built so must be last
|
68
110
|
connection_url: Optional[str] = Field(
|
69
111
|
default=None,
|
70
112
|
description="A full connection URL to the Redis cache. If not specified, this URL is automatically assembled from the host, port, password and db_index specified above.",
|
71
113
|
exclude=True,
|
72
114
|
)
|
115
|
+
read_only_connection_url: Optional[str] = Field(
|
116
|
+
default=None,
|
117
|
+
description="A full connection URL to the read-only Redis cache. If not specified, this URL is automatically assembled from the read_only_host, read_only_port, read_only_password and read_only_db_index specified above.",
|
118
|
+
exclude=True,
|
119
|
+
)
|
73
120
|
|
74
|
-
@field_validator("connection_url", mode="before")
|
121
|
+
@field_validator("connection_url", "read_only_connection_url", mode="before")
|
75
122
|
@classmethod
|
76
123
|
def assemble_connection_url(
|
77
124
|
cls,
|
@@ -83,22 +130,50 @@ class RedisSettings(FidesSettings):
|
|
83
130
|
# If the whole URL is provided via the config, preference that
|
84
131
|
return v
|
85
132
|
|
133
|
+
is_read_only = info.field_name == "read_only_connection_url"
|
134
|
+
|
86
135
|
connection_protocol = "redis"
|
87
136
|
params_str = ""
|
88
|
-
use_tls =
|
137
|
+
use_tls = (
|
138
|
+
info.data.get("read_only_ssl")
|
139
|
+
if is_read_only
|
140
|
+
else info.data.get("ssl", False)
|
141
|
+
)
|
89
142
|
|
90
143
|
# These vars are intentionally fetched with `or ""` as the default to account
|
91
144
|
# for the edge case where `None` is explicitly set in `values` by Pydantic because
|
92
145
|
# it is not overridden by the config file or an env var
|
93
|
-
user =
|
94
|
-
|
95
|
-
|
146
|
+
user = (
|
147
|
+
info.data.get("read_only_user", "")
|
148
|
+
if is_read_only
|
149
|
+
else info.data.get("user", "")
|
150
|
+
)
|
151
|
+
password = (
|
152
|
+
info.data.get("read_only_password", "")
|
153
|
+
if is_read_only
|
154
|
+
else info.data.get("password", "")
|
155
|
+
)
|
156
|
+
db_index = (
|
157
|
+
info.data.get("read_only_db_index", "")
|
158
|
+
if is_read_only
|
159
|
+
else info.data.get("db_index", "")
|
160
|
+
)
|
96
161
|
if use_tls:
|
97
162
|
# If using TLS update the connection URL format
|
98
163
|
connection_protocol = "rediss"
|
99
|
-
cert_reqs =
|
164
|
+
cert_reqs = (
|
165
|
+
info.data.get("read_only_ssl_cert_reqs", "none")
|
166
|
+
if is_read_only
|
167
|
+
else info.data.get("ssl_cert_reqs", "none")
|
168
|
+
)
|
100
169
|
params = {"ssl_cert_reqs": quote_plus(cert_reqs)}
|
101
|
-
|
170
|
+
|
171
|
+
ssl_ca_certs = (
|
172
|
+
info.data.get("read_only_ssl_ca_certs", "")
|
173
|
+
if is_read_only
|
174
|
+
else info.data.get("ssl_ca_certs", "")
|
175
|
+
)
|
176
|
+
if ssl_ca_certs:
|
102
177
|
params["ssl_ca_certs"] = quote(ssl_ca_certs, safe="/")
|
103
178
|
params_str = "?" + urlencode(params, quote_via=quote, safe="/")
|
104
179
|
|
@@ -108,7 +183,23 @@ class RedisSettings(FidesSettings):
|
|
108
183
|
if password or user:
|
109
184
|
auth_prefix = f"{quote_plus(user)}:{quote_plus(password)}@"
|
110
185
|
|
111
|
-
|
186
|
+
host = (
|
187
|
+
info.data.get("read_only_host", "")
|
188
|
+
if is_read_only
|
189
|
+
else info.data.get("host", "")
|
190
|
+
)
|
191
|
+
port = (
|
192
|
+
info.data.get("read_only_port", "")
|
193
|
+
if is_read_only
|
194
|
+
else info.data.get("port", "")
|
195
|
+
)
|
196
|
+
|
197
|
+
# Only include database index in URL if it's not the default (0)
|
198
|
+
db_path = f"{db_index}" if db_index != 0 else ""
|
199
|
+
|
200
|
+
connection_url = (
|
201
|
+
f"{connection_protocol}://{auth_prefix}{host}:{port}/{db_path}{params_str}"
|
202
|
+
)
|
112
203
|
return connection_url
|
113
204
|
|
114
205
|
model_config = SettingsConfigDict(env_prefix=ENV_PREFIX)
|
File without changes
|
@@ -0,0 +1,150 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
3
|
+
from loguru import logger
|
4
|
+
from sqlalchemy import select
|
5
|
+
from sqlalchemy.orm import Session
|
6
|
+
|
7
|
+
from fides.api.models.fides_user import FidesUser
|
8
|
+
from fides.api.models.manual_tasks.manual_task import ManualTask, ManualTaskReference
|
9
|
+
from fides.api.models.manual_tasks.manual_task_log import ManualTaskLog
|
10
|
+
from fides.api.schemas.manual_tasks.manual_task_schemas import (
|
11
|
+
ManualTaskLogStatus,
|
12
|
+
ManualTaskParentEntityType,
|
13
|
+
ManualTaskReferenceType,
|
14
|
+
ManualTaskType,
|
15
|
+
)
|
16
|
+
|
17
|
+
|
18
|
+
class ManualTaskService:
|
19
|
+
def __init__(self, db: Session):
|
20
|
+
self.db = db
|
21
|
+
|
22
|
+
def get_task(
|
23
|
+
self,
|
24
|
+
task_id: Optional[str] = None,
|
25
|
+
parent_entity_id: Optional[str] = None,
|
26
|
+
parent_entity_type: Optional[ManualTaskParentEntityType] = None,
|
27
|
+
task_type: Optional[ManualTaskType] = None,
|
28
|
+
) -> Optional[ManualTask]:
|
29
|
+
"""Get the manual task using provided filters.
|
30
|
+
|
31
|
+
Args:
|
32
|
+
task_id: The task ID
|
33
|
+
parent_entity_id: The parent entity ID
|
34
|
+
parent_entity_type: The parent entity type
|
35
|
+
task_type: The task type
|
36
|
+
|
37
|
+
Returns:
|
38
|
+
Optional[ManualTask]: The manual task for the connection, if it exists
|
39
|
+
"""
|
40
|
+
if not any([task_id, parent_entity_id, parent_entity_type, task_type]):
|
41
|
+
logger.warning("No filters provided to get_task. Returning None.")
|
42
|
+
return None
|
43
|
+
|
44
|
+
stmt = select(ManualTask) # type: ignore[arg-type]
|
45
|
+
if task_id:
|
46
|
+
stmt = stmt.where(ManualTask.id == task_id)
|
47
|
+
if parent_entity_id:
|
48
|
+
stmt = stmt.where(ManualTask.parent_entity_id == parent_entity_id)
|
49
|
+
if parent_entity_type:
|
50
|
+
stmt = stmt.where(ManualTask.parent_entity_type == parent_entity_type)
|
51
|
+
if task_type:
|
52
|
+
stmt = stmt.where(ManualTask.task_type == task_type)
|
53
|
+
return self.db.execute(stmt).scalar_one_or_none()
|
54
|
+
|
55
|
+
# User Management
|
56
|
+
def assign_users_to_task(
|
57
|
+
self, db: Session, task: ManualTask, user_ids: list[str]
|
58
|
+
) -> None:
|
59
|
+
"""Assigns users to this task. We can assign one or more users to a task.
|
60
|
+
|
61
|
+
Args:
|
62
|
+
db: Database session
|
63
|
+
task: The task to assign users to
|
64
|
+
user_ids: List of user IDs to assign
|
65
|
+
"""
|
66
|
+
user_ids = list(set(user_ids))
|
67
|
+
if not user_ids:
|
68
|
+
raise ValueError("User ID is required for assignment")
|
69
|
+
|
70
|
+
# Create new user assignment
|
71
|
+
for user_id in user_ids:
|
72
|
+
# if user is already assigned, skip
|
73
|
+
if user_id in task.assigned_users:
|
74
|
+
continue
|
75
|
+
# verify user exists
|
76
|
+
user = db.query(FidesUser).filter_by(id=user_id).first()
|
77
|
+
if not user:
|
78
|
+
ManualTaskLog.create_error_log(
|
79
|
+
db=db,
|
80
|
+
task_id=task.id,
|
81
|
+
message=f"Failed to add user {user_id} to task {task.id}: user does not exist",
|
82
|
+
details={"user_id": user_id},
|
83
|
+
)
|
84
|
+
continue
|
85
|
+
|
86
|
+
ManualTaskReference.create(
|
87
|
+
db=db,
|
88
|
+
data={
|
89
|
+
"task_id": task.id,
|
90
|
+
"reference_id": user_id,
|
91
|
+
"reference_type": ManualTaskReferenceType.assigned_user,
|
92
|
+
},
|
93
|
+
)
|
94
|
+
|
95
|
+
# Log the user assignment
|
96
|
+
ManualTaskLog.create_log(
|
97
|
+
db=db,
|
98
|
+
task_id=task.id,
|
99
|
+
status=ManualTaskLogStatus.updated,
|
100
|
+
message=f"User {user_id} assigned to task",
|
101
|
+
details={"assigned_user_id": user_id},
|
102
|
+
)
|
103
|
+
|
104
|
+
def unassign_users_from_task(
|
105
|
+
self, db: Session, task: ManualTask, user_ids: list[str]
|
106
|
+
) -> None:
|
107
|
+
"""Remove the user assignment from this task.
|
108
|
+
|
109
|
+
Args:
|
110
|
+
db: Database session
|
111
|
+
task: The task to unassign users from
|
112
|
+
user_ids: List of user IDs to unassign
|
113
|
+
"""
|
114
|
+
user_ids = list(set(user_ids))
|
115
|
+
if not user_ids:
|
116
|
+
raise ValueError("User ID is required for unassignment")
|
117
|
+
|
118
|
+
# Get references to unassign
|
119
|
+
references_to_unassign = (
|
120
|
+
db.query(ManualTaskReference)
|
121
|
+
.filter(
|
122
|
+
ManualTaskReference.task_id == task.id,
|
123
|
+
ManualTaskReference.reference_type
|
124
|
+
== ManualTaskReferenceType.assigned_user,
|
125
|
+
ManualTaskReference.reference_id.in_(user_ids),
|
126
|
+
)
|
127
|
+
.all()
|
128
|
+
)
|
129
|
+
|
130
|
+
# Delete references and log unassignments
|
131
|
+
for ref in references_to_unassign:
|
132
|
+
ref.delete(db)
|
133
|
+
ManualTaskLog.create_log(
|
134
|
+
db=db,
|
135
|
+
task_id=task.id,
|
136
|
+
status=ManualTaskLogStatus.updated,
|
137
|
+
message=f"User {ref.reference_id} unassigned from task",
|
138
|
+
details={"unassigned_user_id": ref.reference_id},
|
139
|
+
)
|
140
|
+
|
141
|
+
# Check if any users weren't unassigned
|
142
|
+
unassigned_user_ids = [ref.reference_id for ref in references_to_unassign]
|
143
|
+
left_over_user_ids = [
|
144
|
+
user_id for user_id in user_ids if user_id not in unassigned_user_ids
|
145
|
+
]
|
146
|
+
if left_over_user_ids:
|
147
|
+
logger.warning(
|
148
|
+
f"Failed to unassign users {left_over_user_ids} from task {task.id}: "
|
149
|
+
"users were not assigned to the task"
|
150
|
+
)
|
@@ -20,6 +20,7 @@ from fides.api.models.privacy_request import (
|
|
20
20
|
RequestTask,
|
21
21
|
)
|
22
22
|
from fides.api.models.property import Property
|
23
|
+
from fides.api.models.worker_task import ExecutionLogStatus
|
23
24
|
from fides.api.schemas.api import BulkUpdateFailed
|
24
25
|
from fides.api.schemas.messaging.messaging import MessagingActionType
|
25
26
|
from fides.api.schemas.policy import ActionType, CurrentStep
|
@@ -27,7 +28,6 @@ from fides.api.schemas.privacy_request import (
|
|
27
28
|
BulkPostPrivacyRequests,
|
28
29
|
BulkReviewResponse,
|
29
30
|
CheckpointActionRequired,
|
30
|
-
ExecutionLogStatus,
|
31
31
|
PrivacyRequestCreate,
|
32
32
|
PrivacyRequestResponse,
|
33
33
|
PrivacyRequestResubmit,
|