PyPI - ethyca-fides - Versions diffs - 2.56.3b0__py2.py3-none-any.whl → 2.56.3b2__py2.py3-none-any.whl - Mend

ethyca-fides 2.56.3b0py2.py3-none-any.whl → 2.56.3b2py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

fides/api/task/graph_task.py CHANGED Viewed

@@ -36,7 +36,8 @@ from fides.api.models.connectionconfig import (
     ConnectionType,
 )
 from fides.api.models.datasetconfig import DatasetConfig
-from fides.api.models.policy import Policy
+from fides.api.models.policy import Policy, Rule
+from fides.api.models.privacy_preference import PrivacyPreferenceHistory
 from fides.api.models.privacy_request import ExecutionLog, PrivacyRequest, RequestTask
 from fides.api.schemas.policy import ActionType, CurrentStep
 from fides.api.schemas.privacy_request import ExecutionLogStatus
@@ -54,7 +55,9 @@ from fides.api.util.collection_util import (
     make_immutable,
     make_mutable,
 )
-from fides.api.util.consent_util import add_errored_system_status_for_consent_reporting
+from fides.api.util.consent_util import (
+    add_errored_system_status_for_consent_reporting_on_preferences,
+)
 from fides.api.util.logger import Pii
 from fides.api.util.logger_context_utils import LoggerContextKeys
 from fides.api.util.saas_util import FIDESOPS_GROUPED_INPUTS
@@ -138,13 +141,7 @@ def retry(
                         self.resources.request.id,
                     )
                     self.log_skipped(action_type, exc)
-                    for pref in self.resources.request.privacy_preferences:
-                        # For consent reporting, also caching the given system as skipped for all historical privacy preferences.
-                        pref.cache_system_status(
-                            self.resources.session,
-                            self.connector.configuration.system_key,
-                            ExecutionLogStatus.skipped,
-                        )
+                    self.cache_system_status_for_preferences()
                     return default_return
                 except BaseException as ex:  # pylint: disable=W0703
                     traceback.print_exc()
@@ -164,11 +161,7 @@ def retry(
                     action_type.value
                 ]  # Convert ActionType into a CurrentStep, no longer coerced with Pydantic V2
             )
-            add_errored_system_status_for_consent_reporting(
-                self.resources.session,
-                self.resources.request,
-                self.connector.configuration,
-            )
+            self.add_error_status_for_consent_reporting()
             if not self.request_task.id:
                 # TODO Remove when we stop support for DSR 2.0
                 # Re-raise to stop privacy request execution on failure for
@@ -730,6 +723,48 @@ class GraphTask(ABC):  # pylint: disable=too-many-instance-attributes
         self.log_end(ActionType.consent)
         return output
+    def cache_system_status_for_preferences(self) -> None:
+        """
+        Calls cache_system_status for all historical privacy preferences for the given request.
+        Purposely uses a new session.
+        """
+        privacy_request_id = self.resources.request.id
+        with get_db() as db:
+            privacy_preferences = db.query(PrivacyPreferenceHistory).filter(
+                PrivacyPreferenceHistory.privacy_request_id == privacy_request_id
+            )
+            for pref in privacy_preferences:
+                # For consent reporting, also caching the given system as skipped for all historical privacy preferences.
+                pref.cache_system_status(
+                    db,
+                    self.connector.configuration.system_key,  # type: ignore[arg-type]
+                    ExecutionLogStatus.skipped,
+                )
+    def add_error_status_for_consent_reporting(self) -> None:
+        """
+        Adds the errored system status for all historical privacy preferences for the given request that
+        are deemed relevant for the connector failure (i.e if they had a "pending" log added to them).
+        Purposely uses a new session.
+        """
+        privacy_request_id = self.resources.request.id
+        with get_db() as db:
+            privacy_preferences = (
+                db.query(PrivacyPreferenceHistory)
+                .filter(
+                    PrivacyPreferenceHistory.privacy_request_id == privacy_request_id
+                )
+                .all()
+            )
+            add_errored_system_status_for_consent_reporting_on_preferences(
+                db, privacy_preferences, self.connector.configuration
+            )
 def collect_queries(
     traversal: Traversal, resources: TaskResources
@@ -816,39 +851,45 @@ def build_affected_field_logs(
     }]
     """
-    targeted_field_paths: Dict[FieldAddress, str] = {}
+    policy_id = policy.id
-    for rule in policy.rules:  # type: ignore[attr-defined]
-        if rule.action_type != action_type:
-            continue
-        rule_categories: List[str] = rule.get_target_data_categories()
-        if not rule_categories:
-            continue
+    with get_db() as db:
-        collection_categories: Dict[
-            str, List[FieldPath]
-        ] = node.collection.field_paths_by_category  # type: ignore
-        for rule_cat in rule_categories:
-            for collection_cat, field_paths in collection_categories.items():
-                if collection_cat.startswith(rule_cat):
-                    targeted_field_paths.update(
-                        {
-                            node.address.field_address(field_path): collection_cat
-                            for field_path in field_paths
-                        }
-                    )
+        rules = db.query(Rule).filter(Rule.policy_id == policy_id)
-    ret: List[Dict[str, Any]] = []
-    for field_address, data_categories in targeted_field_paths.items():
-        ret.append(
-            {
-                "path": field_address.value,
-                "field_name": field_address.field_path.string_path,
-                "data_categories": [data_categories],
-            }
-        )
+        targeted_field_paths: Dict[FieldAddress, str] = {}
+        for rule in rules:  # type: ignore[attr-defined]
+            if rule.action_type != action_type:
+                continue
+            rule_categories: List[str] = rule.get_target_data_categories()
+            if not rule_categories:
+                continue
+            collection_categories: Dict[
+                str, List[FieldPath]
+            ] = node.collection.field_paths_by_category  # type: ignore
+            for rule_cat in rule_categories:
+                for collection_cat, field_paths in collection_categories.items():
+                    if collection_cat.startswith(rule_cat):
+                        targeted_field_paths.update(
+                            {
+                                node.address.field_address(field_path): collection_cat
+                                for field_path in field_paths
+                            }
+                        )
+        ret: List[Dict[str, Any]] = []
+        for field_address, data_categories in targeted_field_paths.items():
+            ret.append(
+                {
+                    "path": field_address.value,
+                    "field_name": field_address.field_path.string_path,
+                    "data_categories": [data_categories],
+                }
+            )
-    return ret
+        return ret
 def build_consent_dataset_graph(datasets: List[DatasetConfig]) -> DatasetGraph:

fides/api/tasks/storage.py CHANGED Viewed

@@ -5,20 +5,20 @@ import os
 import secrets
 import zipfile
 from io import BytesIO
-from typing import Any, Dict, Optional, Set, Union
+from typing import Any, Dict, Optional, Union
 import pandas as pd
 from botocore.exceptions import ClientError, ParamValidationError
+from fideslang.validation import AnyHttpUrlString
 from loguru import logger
 from fides.api.cryptography.cryptographic_util import bytes_to_b64_str
-from fides.api.graph.graph import DataCategoryFieldMapping
 from fides.api.models.privacy_request import PrivacyRequest
 from fides.api.schemas.storage.storage import ResponseFormat, StorageSecrets
 from fides.api.service.privacy_request.dsr_package.dsr_report_builder import (
     DsrReportBuilder,
 )
-from fides.api.util.aws_util import get_aws_session
+from fides.api.util.aws_util import get_s3_client
 from fides.api.util.cache import get_cache, get_encryption_cache_key
 from fides.api.util.encryption.aes_gcm_encryption_scheme import (
     encrypt_to_bytes_verify_secrets_length,
@@ -101,7 +101,9 @@ def write_to_in_memory_buffer(
     raise NotImplementedError(f"No handling for response format {resp_format}.")
-def create_presigned_url_for_s3(s3_client: Any, bucket_name: str, file_key: str) -> str:
+def create_presigned_url_for_s3(
+    s3_client: Any, bucket_name: str, file_key: str
+) -> AnyHttpUrlString:
     """ "Generate a presigned URL to share an S3 object
     :param s3_client: s3 base client
@@ -119,23 +121,108 @@ def create_presigned_url_for_s3(s3_client: Any, bucket_name: str, file_key: str)
     return response
+def generic_upload_to_s3(  # pylint: disable=R0913
+    storage_secrets: Dict[StorageSecrets, Any],
+    bucket_name: str,
+    file_key: str,
+    auth_method: str,
+    document: bytes,
+) -> Optional[AnyHttpUrlString]:
+    """Uploads arbitrary data to s3 returned from an access request"""
+    logger.info("Starting S3 Upload of {}", file_key)
+    try:
+        s3_client = get_s3_client(auth_method, storage_secrets)
+        try:
+            s3_client.put_object(Bucket=bucket_name, Key=file_key, Body=document)
+        except Exception as e:
+            logger.error("Encountered error while uploading s3 object: {}", e)
+            raise e
+        presigned_url: AnyHttpUrlString = create_presigned_url_for_s3(
+            s3_client, bucket_name, file_key
+        )
+        return presigned_url
+    except ClientError as e:
+        logger.error(
+            "Encountered error while uploading and generating link for s3 object: {}", e
+        )
+        raise e
+    except ParamValidationError as e:
+        raise ValueError(f"The parameters you provided are incorrect: {e}")
+def generic_retrieve_from_s3(
+    storage_secrets: Dict[StorageSecrets, Any],
+    bucket_name: str,
+    file_key: str,
+    auth_method: str,
+) -> Optional[bytes]:
+    """Retrieves arbitrary data from s3"""
+    logger.info("Starting S3 Retrieve of {}", file_key)
+    try:
+        s3_client = get_s3_client(auth_method, storage_secrets)
+        try:
+            response = s3_client.get_object(Bucket=bucket_name, Key=file_key)
+            return response["Body"].read()
+        except Exception as e:
+            logger.error("Encountered error while retrieving s3 object: {}", e)
+            raise e
+    except ClientError as e:
+        logger.error("Encountered error while retrieving s3 object: {}", e)
+        raise e
+    except ParamValidationError as e:
+        raise ValueError(f"The parameters you provided are incorrect: {e}")
+def generic_delete_from_s3(
+    storage_secrets: Dict[StorageSecrets, Any],
+    bucket_name: str,
+    file_key: str,
+    auth_method: str,
+) -> None:
+    """Deletes arbitrary data from s3"""
+    logger.info("Starting S3 Delete of {}", file_key)
+    try:
+        s3_client = get_s3_client(auth_method, storage_secrets)
+        try:
+            s3_client.delete_object(Bucket=bucket_name, Key=file_key)
+        except Exception as e:
+            logger.error("Encountered error while deleting s3 object: {}", e)
+            raise e
+    except ClientError as e:
+        logger.error("Encountered error while deleting s3 object: {}", e)
+        raise e
+    except ParamValidationError as e:
+        raise ValueError(f"The parameters you provided are incorrect: {e}")
 def upload_to_s3(  # pylint: disable=R0913
     storage_secrets: Dict[StorageSecrets, Any],
     data: Dict,
     bucket_name: str,
     file_key: str,
     resp_format: str,
-    privacy_request: PrivacyRequest,
+    privacy_request: Optional[PrivacyRequest],
+    document: Optional[bytes],
     auth_method: str,
-    data_category_field_mapping: Optional[DataCategoryFieldMapping] = None,
-    data_use_map: Optional[Dict[str, Set[str]]] = None,
-) -> str:
+) -> Optional[AnyHttpUrlString]:
     """Uploads arbitrary data to s3 returned from an access request"""
     logger.info("Starting S3 Upload of {}", file_key)
+    if privacy_request is None and document is not None:
+        return generic_upload_to_s3(
+            storage_secrets, bucket_name, file_key, auth_method, document
+        )
+    if privacy_request is None:
+        raise ValueError("Privacy request must be provided")
     try:
-        my_session = get_aws_session(auth_method, storage_secrets)
-        s3_client = my_session.client("s3")
+        s3_client = get_s3_client(auth_method, storage_secrets)
         # handles file chunking
         try:
@@ -148,7 +235,7 @@ def upload_to_s3(  # pylint: disable=R0913
             logger.error("Encountered error while uploading s3 object: {}", e)
             raise e
-        presigned_url: str = create_presigned_url_for_s3(
+        presigned_url: AnyHttpUrlString = create_presigned_url_for_s3(
             s3_client, bucket_name, file_key
         )
@@ -162,17 +249,21 @@ def upload_to_s3(  # pylint: disable=R0913
         raise ValueError(f"The parameters you provided are incorrect: {e}")
+def get_local_filename(file_key: str) -> str:
+    """Verifies that the local storage directory exists"""
+    if not os.path.exists(LOCAL_FIDES_UPLOAD_DIRECTORY):
+        os.makedirs(LOCAL_FIDES_UPLOAD_DIRECTORY)
+    return f"{LOCAL_FIDES_UPLOAD_DIRECTORY}/{file_key}"
 def upload_to_local(
     data: Dict,
     file_key: str,
     privacy_request: PrivacyRequest,
     resp_format: str = ResponseFormat.json.value,
-    data_category_field_mapping: Optional[DataCategoryFieldMapping] = None,
-    data_use_map: Optional[Dict[str, Set[str]]] = None,
 ) -> str:
     """Uploads access request data to a local folder - for testing/demo purposes only"""
-    if not os.path.exists(LOCAL_FIDES_UPLOAD_DIRECTORY):
-        os.makedirs(LOCAL_FIDES_UPLOAD_DIRECTORY)
+    get_local_filename(file_key)
     filename = f"{LOCAL_FIDES_UPLOAD_DIRECTORY}/{file_key}"
     in_memory_file = write_to_in_memory_buffer(resp_format, data, privacy_request)

fides/api/util/aws_util.py CHANGED Viewed

@@ -70,3 +70,22 @@ def get_aws_session(
             raise
     else:
         return session
+def get_s3_client(
+    auth_method: str,
+    storage_secrets: Optional[Dict[StorageSecrets, Any]],
+    assume_role_arn: Optional[str] = None,
+) -> Session:
+    """
+    Abstraction to retrieve an AWS S3 client using secrets.
+    If an `assume_role_arn` is provided, the secrets will be used to
+    assume that role and return a Session instantiated with that role.
+    """
+    session = get_aws_session(
+        auth_method=auth_method,
+        storage_secrets=storage_secrets,
+        assume_role_arn=assume_role_arn,
+    )
+    return session.client("s3")

fides/api/util/collection_util.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from collections import deque
 from functools import reduce
 from typing import Any, Callable, Dict, Iterable, List, Optional, TypeVar, Union
@@ -119,3 +120,119 @@ def extract_key_for_address(
     request_id_dataset, collection = full_request_id.split(":")
     dataset = request_id_dataset.split("__", number_of_leading_strings_to_exclude)[-1]
     return f"{dataset}:{collection}"
+def unflatten_dict(flat_dict: Dict[str, Any], separator: str = ".") -> Dict[str, Any]:
+    """
+    Converts a dictionary of paths/values into a nested dictionary
+    example:
+    {"A.B": "1", "A.C": "2"}
+    becomes
+    {
+        "A": {
+            "B": "1",
+            "C": "2"
+        }
+    }
+    """
+    output: Dict[Any, Any] = {}
+    queue = deque(flat_dict.items())
+    while queue:
+        path, value = queue.popleft()
+        keys = path.split(separator)
+        target = output
+        for i, current_key in enumerate(keys[:-1]):
+            next_key = keys[i + 1]
+            if next_key.isdigit():
+                target = target.setdefault(current_key, [])
+            else:
+                if isinstance(target, dict):
+                    target = target.setdefault(current_key, {})
+                elif isinstance(target, list):
+                    while len(target) <= int(current_key):
+                        target.append({})
+                    target = target[int(current_key)]
+        try:
+            if isinstance(target, list):
+                target.append(value)
+            else:
+                # If the value is a dictionary, add its components to the queue for processing
+                if isinstance(value, dict):
+                    target = target.setdefault(keys[-1], {})
+                    for inner_key, inner_value in value.items():
+                        new_key = f"{path}{separator}{inner_key}"
+                        queue.append((new_key, inner_value))
+                else:
+                    target[keys[-1]] = value
+        except TypeError as exc:
+            raise ValueError(
+                f"Error unflattening dictionary, conflicting levels detected: {exc}"
+            )
+    return output
+def flatten_dict(data: Any, prefix: str = "", separator: str = ".") -> Dict[str, Any]:
+    """
+    Recursively flatten a dictionary or list into a flat dictionary with dot-notation keys.
+    Handles nested dictionaries and arrays with proper indices.
+    example:
+    {
+        "A": {
+            "B": "1",
+            "C": "2"
+        },
+        "D": [
+            {"E": "3"},
+            {"E": "4"}
+        ]
+    }
+    becomes
+    {
+        "A.B": "1",
+        "A.C": "2",
+        "D.0.E": "3",
+        "D.1.E": "4"
+    }
+    Args:
+        data: The data to flatten (must be a dict or list)
+        prefix: The current key prefix (used in recursion)
+        separator: The separator to use between key segments (default: ".")
+    Returns:
+        A flattened dictionary with dot-notation keys
+    Raises:
+        FidesopsException: If input is not a dict or list
+    """
+    items = {}
+    if isinstance(data, dict):
+        for k, v in data.items():
+            new_key = f"{prefix}{separator}{k}" if prefix else k
+            if isinstance(v, (dict, list)):
+                items.update(flatten_dict(v, new_key, separator))
+            else:
+                items[new_key] = v
+    elif isinstance(data, list):
+        for i, v in enumerate(data):
+            new_key = f"{prefix}{separator}{i}"
+            if isinstance(v, (dict, list)):
+                items.update(flatten_dict(v, new_key, separator))
+            else:
+                items[new_key] = v
+    else:
+        raise ValueError(
+            f"Input to flatten_dict must be a dict or list, got {type(data).__name__}"
+        )
+    return items

fides/api/util/consent_util.py CHANGED Viewed

@@ -214,15 +214,30 @@ def add_errored_system_status_for_consent_reporting(
     Deeming them relevant if they already had a "pending" log added to them.
     """
-    for pref in privacy_request.privacy_preferences:  # type: ignore[attr-defined]
+    add_errored_system_status_for_consent_reporting_on_preferences(db, privacy_request.privacy_preferences, connection_config)  # type: ignore[attr-defined]
+def add_errored_system_status_for_consent_reporting_on_preferences(
+    db: Session,
+    privacy_preferences: List[PrivacyPreferenceHistory],
+    connection_config: ConnectionConfig,
+) -> None:
+    """
+    Cache an errored system status for consent reporting on just the subset
+    of preferences that were deemed relevant for the connector on failure,
+    from the provided list of preferences.
+    Deeming them relevant if they already had a "pending" log added to them.
+    """
+    for preference in privacy_preferences:
         if (
-            pref.affected_system_status
-            and pref.affected_system_status.get(connection_config.system_key)
+            preference.affected_system_status
+            and preference.affected_system_status.get(connection_config.system_key)
             == ExecutionLogStatus.pending.value
         ):
-            pref.cache_system_status(
+            preference.cache_system_status(
                 db,
-                connection_config.system_key,
+                connection_config.system_key,  # type: ignore[arg-type]
                 ExecutionLogStatus.error,
             )

fides/api/util/saas_util.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 import json
 import re
 import socket
-from collections import defaultdict, deque
+from collections import defaultdict
 from ipaddress import IPv4Address, IPv6Address, ip_address
 from typing import Any, Dict, List, Optional, Set, Tuple, Union
@@ -256,60 +256,6 @@ def merge_datasets(dataset: GraphDataset, config_dataset: GraphDataset) -> Graph
     )
-def unflatten_dict(flat_dict: Dict[str, Any], separator: str = ".") -> Dict[str, Any]:
-    """
-    Converts a dictionary of paths/values into a nested dictionary
-    example:
-    {"A.B": "1", "A.C": "2"}
-    becomes
-    {
-        "A": {
-            "B": "1",
-            "C": "2"
-        }
-    }
-    """
-    output: Dict[Any, Any] = {}
-    queue = deque(flat_dict.items())
-    while queue:
-        path, value = queue.popleft()
-        keys = path.split(separator)
-        target = output
-        for i, current_key in enumerate(keys[:-1]):
-            next_key = keys[i + 1]
-            if next_key.isdigit():
-                target = target.setdefault(current_key, [])
-            else:
-                if isinstance(target, dict):
-                    target = target.setdefault(current_key, {})
-                elif isinstance(target, list):
-                    while len(target) <= int(current_key):
-                        target.append({})
-                    target = target[int(current_key)]
-        try:
-            if isinstance(target, list):
-                target.append(value)
-            else:
-                # If the value is a dictionary, add its components to the queue for processing
-                if isinstance(value, dict):
-                    target = target.setdefault(keys[-1], {})
-                    for inner_key, inner_value in value.items():
-                        new_key = f"{path}{separator}{inner_key}"
-                        queue.append((new_key, inner_value))
-                else:
-                    target[keys[-1]] = value
-        except TypeError as exc:
-            raise FidesopsException(
-                f"Error unflattening dictionary, conflicting levels detected: {exc}"
-            )
-    return output
 def format_body(
     headers: Dict[str, Any],
     body: Optional[str],
@@ -339,7 +285,7 @@ def format_body(
     if content_type == "application/json":
         output = body
     elif content_type == "application/x-www-form-urlencoded":
-        output = multidimensional_urlencode(json.loads(body))
+        output = nullsafe_urlencode(json.loads(body))
     elif content_type == "text/plain":
         output = body
     else:
@@ -470,3 +416,33 @@ def replace_version(saas_config: str, new_version: str) -> str:
         version_pattern, f"version: {new_version}", saas_config, count=1
     )
     return updated_config
+def nullsafe_urlencode(data: Any) -> str:
+    """
+    Wrapper around multidimensional_urlencode that preserves null values as empty strings.
+    This is useful for APIs that expect keys with empty values (e.g., "name=") to represent
+    null values, rather than omitting the field entirely.
+    Args:
+        data: The data to encode (can be a dict, list, or other nested structure)
+    Returns:
+        URL-encoded string with null values properly handled
+    """
+    def prepare_null_values(data: Any) -> Any:
+        """
+        Recursively process data for URL encoding, converting None values to empty strings.
+        """
+        if data is None:
+            return ""
+        if isinstance(data, dict):
+            return {k: prepare_null_values(v) for k, v in data.items()}
+        if isinstance(data, list):
+            return [prepare_null_values(item) for item in data]
+        return data
+    processed_data = prepare_null_values(data)
+    return multidimensional_urlencode(processed_data)

fides/config/security_settings.py CHANGED Viewed

@@ -3,7 +3,6 @@
 # pylint: disable=C0115,C0116, E0213
 from typing import List, Optional, Pattern, Tuple, Union
-import validators
 from pydantic import Field, SerializeAsAny, ValidationInfo, field_validator
 from pydantic_settings import SettingsConfigDict
 from slowapi.wrappers import parse_many  # type: ignore
@@ -176,22 +175,17 @@ class SecuritySettings(FidesSettings):
     @field_validator("cors_origins", mode="before")
     @classmethod
     def assemble_cors_origins(cls, v: Union[str, List[str]]) -> Union[List[str], str]:
-        """Return a list of valid origins for CORS requests"""
-        def validate(values: List[str]) -> None:
-            for value in values:
-                if value != "*":
-                    if not validators.url(value):
-                        raise ValueError(f"{value} is not a valid url")
+        """
+        Return a list of origins for CORS requests.
+        This validator allows us to parse a comma-separated string of origins
+        into a list of origins, since the `cors_origins` field can be set
+        as a comma-separated string or a list of strings.
+        """
         if isinstance(v, str) and not v.startswith("["):
             values = [i.strip() for i in v.split(",")]
-            validate(values)
             return values
-        if isinstance(v, (list, str)):
-            validate(v)  # type: ignore
+        if isinstance(v, list):
             return v
         raise ValueError(v)

ethyca-fides 2.56.3b0__py2.py3-none-any.whl → 2.56.3b2__py2.py3-none-any.whl

ethyca-fides 2.56.3b0py2.py3-none-any.whl → 2.56.3b2py2.py3-none-any.whl