PyPI - ethyca-fides - Versions diffs - 2.69.0rc8__py2.py3-none-any.whl → 2.69.0rc10__py2.py3-none-any.whl - Mend

ethyca-fides 2.69.0rc8py2.py3-none-any.whl → 2.69.0rc10py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ethyca-fides might be problematic. Click here for more details.

Files changed (101) hide show

fides/api/service/storage/util.py CHANGED Viewed

@@ -1,8 +1,13 @@
 import os
+from collections import defaultdict
 from enum import Enum as EnumType
+from typing import Any, Callable, Optional
+from urllib.parse import quote
 from loguru import logger
+from fides.api.util.storage_util import format_size
 # This is the max file size for downloading the content of an attachment.
 # This is an industry standard used by companies like Google and Microsoft.
 LARGE_FILE_THRESHOLD = 25 * 1024 * 1024  # 25 MB
@@ -84,3 +89,577 @@ def get_allowed_file_type_or_raise(file_key: str) -> str:
         return AllowedFileType[file_type].value
     except KeyError:
         raise ValueError(error_msg)
+def get_unique_filename(filename: str, used_filenames: set[str]) -> str:
+    """
+    Generates a unique filename by appending a counter if the file already exists.
+    Tracks filenames per dataset to match DSR report builder behavior.
+    Args:
+        filename: The original filename
+        used_filenames: Set of filenames that have already been used
+    Returns:
+        A unique filename that won't conflict with existing files in the same dataset
+    """
+    base_name, extension = os.path.splitext(filename)
+    counter = 1
+    unique_filename = filename
+    # Check if file exists in this dataset's used_filenames set
+    while unique_filename in used_filenames:
+        unique_filename = f"{base_name}_{counter}{extension}"
+        counter += 1
+    return unique_filename
+def determine_dataset_name_from_path(base_path: str) -> str:
+    """
+    Determine the dataset name from a base path.
+    Args:
+        base_path: The base path (e.g., "attachments", "data/manualtask/manual_data")
+    Returns:
+        The dataset name extracted from the path
+    """
+    if base_path == "attachments":
+        return "attachments"
+    # Extract dataset name from path like "data/manualtask/manual_data"
+    path_parts = base_path.split("/")
+    if len(path_parts) >= 2 and path_parts[0] == "data":
+        return path_parts[1]  # e.g., "manualtask"
+    return "unknown"
+def resolve_attachment_storage_path(
+    unique_filename: str,
+    base_path: str,
+) -> str:
+    """
+    Resolve the actual storage path for an attachment file.
+    This function provides a single source of truth for how attachment files
+    are stored in the ZIP file, ensuring consistency between DSR report builder
+    and streaming storage components.
+    Args:
+        unique_filename: The unique filename for the attachment
+        base_path: The base path for the attachment (e.g., "attachments", "data/dataset/collection")
+    Returns:
+        The full storage path for the attachment file
+    """
+    return f"{base_path}/{unique_filename}"
+def generate_attachment_url_from_storage_path(
+    download_url: str,
+    unique_filename: str,
+    base_path: str,
+    html_directory: str,
+    enable_streaming: bool = False,
+) -> str:
+    """
+    Generate attachment URL based on the actual storage path and HTML template location.
+    This is the CURRENTLY USED function for generating attachment URLs in DSR packages.
+    It provides more sophisticated path resolution by:
+    1. Using resolve_attachment_storage_path() to calculate the actual storage path
+    2. Handling different directory structures (attachments vs data/dataset/collection)
+    3. Generating proper relative paths from HTML template locations to attachment files
+    Used by:
+    - _process_attachment_list() in this file
+    - _write_attachment_content() in dsr_report_builder.py
+    Args:
+        download_url: The original download URL
+        unique_filename: The unique filename for the attachment
+        base_path: The base path where the attachment is stored (e.g., "attachments", "data/dataset/collection")
+        html_directory: The directory where the HTML template is located
+        enable_streaming: Whether streaming mode is enabled
+    Returns:
+        The appropriate attachment URL
+    """
+    if enable_streaming:
+        # Calculate the actual storage path
+        storage_path = resolve_attachment_storage_path(unique_filename, base_path)
+        # Generate relative path from HTML template directory to storage path
+        if html_directory == "attachments" and base_path == "attachments":
+            # From attachments/index.html to attachments/filename.pdf (same directory)
+            return unique_filename
+        if html_directory.startswith("data/") and base_path.startswith("data/"):
+            # From data/dataset/collection/index.html to data/dataset/collection/attachments/filename.pdf
+            # Both are in data/ structure, so go to attachments subdirectory
+            return f"attachments/{unique_filename}"
+        # For other cases, calculate relative path
+        # This is a simplified approach - in practice, you might need more sophisticated path resolution
+        return f"../{storage_path}"
+    return download_url
+def process_attachment_naming(
+    attachment: dict[str, Any],
+    used_filenames: set[str],
+    processed_attachments: dict[tuple[str, str], str],
+    dataset_name: str = "attachments",
+) -> Optional[tuple[str, tuple[str, str]]]:
+    """
+    Process attachment naming and return unique filename and attachment key.
+    Args:
+        attachment: The attachment dictionary
+        used_filenames: Set of used filenames for this dataset
+        processed_attachments: Dictionary mapping attachment keys to unique filenames
+        dataset_name: The dataset name for context
+    Returns:
+        Tuple of (unique_filename, attachment_key) where attachment_key is (download_url, file_name)
+    """
+    file_name = attachment.get("file_name")
+    download_url = attachment.get("download_url")
+    if not file_name or not download_url:
+        logger.warning(
+            f"Skipping attachment with missing {'file name' if not file_name else 'download URL'}"
+        )
+        return None
+    # Get or generate unique filename
+    attachment_key = (download_url, file_name)
+    if attachment_key not in processed_attachments:
+        unique_filename = get_unique_filename(file_name, used_filenames)
+        used_filenames.add(unique_filename)
+        processed_attachments[attachment_key] = unique_filename
+    else:
+        unique_filename = processed_attachments[attachment_key]
+        # Ensure the filename is also added to the current used_filenames set
+        # to prevent conflicts in subsequent processing
+        used_filenames.add(unique_filename)
+    return (unique_filename, attachment_key)
+def format_attachment_size(file_size: Any) -> str:
+    """
+    Format file size for display.
+    Args:
+        file_size: The file size (int, float, or other)
+    Returns:
+        Formatted file size string
+    """
+    return (
+        format_size(float(file_size))
+        if isinstance(file_size, (int, float))
+        else "Unknown"
+    )
+def create_attachment_info_dict(
+    attachment_url: str, file_size: str, file_name: str
+) -> dict[str, str]:
+    """
+    Create attachment info dictionary for templates.
+    Args:
+        attachment_url: The attachment URL
+        file_size: The formatted file size
+        file_name: The original file name
+    Returns:
+        Dictionary with attachment information
+    """
+    # Always encode the URL for safe usage in templates
+    safe_url = quote(attachment_url, safe="/:")
+    return {
+        "url": attachment_url,
+        "safe_url": safe_url,
+        "size": file_size,
+        "original_name": file_name,
+    }
+def is_attachment_field(field_value: Any) -> bool:
+    """
+    Check if a field value contains attachment-like data.
+    Args:
+        field_value: The field value to check
+    Returns:
+        True if the field contains attachment-like data
+    """
+    if not isinstance(field_value, list) or not field_value:
+        return False
+    first_item = field_value[0]
+    if not isinstance(first_item, dict):
+        return False
+    # Check if this field contains attachment-like data
+    return all(key in first_item for key in ["file_name", "download_url", "file_size"])
+def process_attachments_contextually(
+    data: dict[str, Any],
+    used_filenames_data: set[str],
+    used_filenames_attachments: set[str],
+    processed_attachments: dict[tuple[str, str], str],
+    enable_streaming: bool = False,
+    callback: Optional[Callable] = None,
+) -> list[dict[str, Any]]:
+    """
+    Process attachments using the same contextual approach as DSR report builder.
+    This function iterates through the data structure and processes attachments
+    as they are encountered, maintaining the same logic as the DSR report builder.
+    Args:
+        data: The DSR data dictionary
+        used_filenames_data: Set of used filenames for data datasets
+        used_filenames_attachments: Set of used filenames for attachments
+        processed_attachments: Dictionary mapping attachment keys to unique filenames
+        enable_streaming: Whether streaming mode is enabled
+        callback: Optional callback function to process each attachment
+                 Signature: callback(attachment, unique_filename, attachment_info, context)
+    Returns:
+        List of processed attachment dictionaries with context information
+    """
+    processed_attachments_list = []
+    # Process datasets (excluding attachments)
+    datasets = _get_datasets_from_dsr_data(data)
+    for dataset_name, collections in datasets.items():
+        for collection_name, items in collections.items():
+            for item in items:
+                if not isinstance(item, dict):
+                    continue
+                # Process direct attachments in the item
+                if "attachments" in item and isinstance(item["attachments"], list):
+                    directory = f"data/{dataset_name}/{collection_name}"
+                    processed = _process_attachment_list(
+                        item["attachments"],
+                        directory,
+                        dataset_name,
+                        used_filenames_data,
+                        used_filenames_attachments,
+                        processed_attachments,
+                        enable_streaming,
+                        callback,
+                        {
+                            "dataset": dataset_name,
+                            "collection": collection_name,
+                            "type": "direct",
+                        },
+                    )
+                    processed_attachments_list.extend(processed)
+                # Process nested attachment fields (ManualTask format)
+                for field_name, field_value in item.items():
+                    if is_attachment_field(field_value):
+                        directory = f"data/{dataset_name}/{collection_name}"
+                        processed = _process_attachment_list(
+                            field_value,
+                            directory,
+                            dataset_name,
+                            used_filenames_data,
+                            used_filenames_attachments,
+                            processed_attachments,
+                            enable_streaming,
+                            callback,
+                            {
+                                "dataset": dataset_name,
+                                "collection": collection_name,
+                                "field": field_name,
+                                "type": "nested",
+                            },
+                        )
+                        processed_attachments_list.extend(processed)
+    # Process top-level attachments from the "attachments" key
+    # These are legitimate top-level attachments, not duplicates of dataset attachments
+    if "attachments" in data:
+        processed = _process_attachment_list(
+            data["attachments"],
+            "attachments",
+            "attachments",
+            used_filenames_data,
+            used_filenames_attachments,
+            processed_attachments,
+            enable_streaming,
+            callback,
+            {"type": "top_level"},
+        )
+        processed_attachments_list.extend(processed)
+    return processed_attachments_list
+def _get_datasets_from_dsr_data(dsr_data: dict[str, Any]) -> dict[str, Any]:
+    """
+    Extract datasets from DSR data using the same logic as DSR report builder.
+    Args:
+        dsr_data: The DSR data dictionary
+    Returns:
+        Dictionary of datasets with collections
+    """
+    datasets: dict[str, Any] = defaultdict(lambda: defaultdict(list))
+    for key, rows in dsr_data.items():
+        # Skip attachments - they're handled separately
+        if key == "attachments":
+            continue
+        parts = key.split(":", 1)
+        if len(parts) > 1:
+            dataset_name, collection_name = parts
+        else:
+            # Try to determine dataset name from system_name in rows
+            dataset_name = "manual"
+            collection_name = parts[0]
+            for row in rows:
+                if isinstance(row, dict) and "system_name" in row:
+                    dataset_name = row["system_name"]
+                    break
+        datasets[dataset_name][collection_name].extend(rows)
+    return datasets
+def _process_attachment_list(
+    attachments: list[dict[str, Any]],
+    directory: str,
+    dataset_name: str,
+    used_filenames_data: set[str],
+    used_filenames_attachments: set[str],
+    processed_attachments: dict[tuple[str, str], str],
+    enable_streaming: bool,
+    callback: Optional[Callable],
+    context: dict[str, Any],
+) -> list[dict[str, Any]]:
+    """
+    Process a list of attachments using the same logic as DSR report builder.
+    Args:
+        attachments: List of attachment dictionaries
+        directory: Directory path for the attachments
+        dataset_name: Name of the dataset
+        used_filenames_data: Set of used filenames for data datasets
+        used_filenames_attachments: Set of used filenames for attachments
+        processed_attachments: Dictionary mapping attachment keys to unique filenames
+        enable_streaming: Whether streaming mode is enabled
+        callback: Optional callback function to process each attachment
+        context: Context information about where the attachment was found
+    Returns:
+        List of processed attachment dictionaries
+    """
+    processed_attachments_list = []
+    for attachment in attachments:
+        if not isinstance(attachment, dict):
+            continue
+        # Get the appropriate used_filenames set based on dataset type
+        used_filenames = (
+            used_filenames_attachments
+            if dataset_name == "attachments"
+            else used_filenames_data
+        )
+        # Process attachment naming using shared utility
+        result = process_attachment_naming(
+            attachment, used_filenames, processed_attachments, dataset_name
+        )
+        if result is None:  # Skip if processing failed
+            continue
+        unique_filename, _ = result
+        # Format file size using shared utility
+        file_size = format_attachment_size(attachment.get("file_size"))
+        # Generate attachment URL using shared utility with actual storage path
+        download_url = attachment.get("download_url")
+        if not download_url:
+            continue
+        attachment_url = generate_attachment_url_from_storage_path(
+            download_url,
+            unique_filename,
+            directory,  # This is the base_path where the file will be stored
+            directory,  # This is the HTML template directory
+            enable_streaming,
+        )
+        # Create attachment info dictionary using shared utility
+        file_name = attachment.get("file_name")
+        if not file_name:
+            continue
+        attachment_info = create_attachment_info_dict(
+            attachment_url, file_size, file_name
+        )
+        # Create processed attachment with context
+        processed_attachment = {
+            "attachment": attachment,
+            "unique_filename": unique_filename,
+            "attachment_info": attachment_info,
+            "context": context,
+            "directory": directory,
+            "dataset_name": dataset_name,
+        }
+        # Call callback if provided
+        if callback:
+            callback(attachment, unique_filename, attachment_info, context)
+        processed_attachments_list.append(processed_attachment)
+    return processed_attachments_list
+def extract_storage_key_from_attachment(attachment: dict[str, Any]) -> str:
+    """
+    Extract storage key from attachment data with fallback logic.
+    This function provides a consistent way to extract storage keys from
+    attachment dictionaries across different components.
+    Args:
+        attachment: The attachment dictionary
+    Returns:
+        The storage key (URL or filename) for the attachment
+    """
+    if original_url := attachment.get("original_download_url"):
+        return original_url
+    if download_url := attachment.get("download_url"):
+        return download_url
+    file_name = attachment.get("file_name")
+    return file_name if file_name is not None else ""
+def resolve_base_path_from_context(
+    attachment: dict[str, Any], default_base_path: str = "attachments"
+) -> str:
+    """
+    Resolve the base path for an attachment based on its context.
+    This function provides consistent base path resolution logic across
+    different storage components.
+    Args:
+        attachment: The attachment dictionary
+        default_base_path: Default base path if no context is found
+    Returns:
+        The resolved base path for the attachment
+    """
+    if not attachment.get("_context"):
+        return default_base_path
+    context = attachment["_context"]
+    context_type = context.get("type")
+    if context_type == "direct":
+        return f"data/{context['dataset']}/{context['collection']}/attachments"
+    if context_type == "nested":
+        return f"data/{context['dataset']}/{context['collection']}/attachments"
+    if context_type == "top_level":
+        return "attachments"
+    # Handle old context format
+    if context.get("key") and context.get("item_id"):
+        return f"{context['key']}/{context['item_id']}/attachments"
+    # Fallback for unknown context types
+    return "unknown/unknown/attachments"
+def resolve_directory_from_context(
+    attachment: dict[str, Any], default_directory: str = "attachments"
+) -> str:
+    """
+    Resolve the directory path for an attachment based on its context.
+    This function provides consistent directory resolution logic for DSR report builder.
+    Args:
+        attachment: The attachment dictionary
+        default_directory: Default directory if no context is found
+    Returns:
+        The resolved directory path for the attachment
+    """
+    if not attachment.get("_context"):
+        return default_directory
+    context = attachment["_context"]
+    context_type = context.get("type")
+    if context_type == "direct":
+        return f"data/{context['dataset']}/{context['collection']}"
+    if context_type == "nested":
+        return f"data/{context['dataset']}/{context['collection']}"
+    if context_type == "top_level":
+        return "attachments"
+    if context.get("key") and context.get("item_id"):
+        return f"{context['key']}/{context['item_id']}"
+    return default_directory
+def convert_processed_attachments_to_attachment_processing_info(
+    processed_attachments_list: list[dict[str, Any]], validate_attachment_func: Callable
+) -> list[Any]:
+    """
+    Convert processed attachments list to AttachmentProcessingInfo objects.
+    This is a shared utility function to avoid duplication between different
+    attachment collection methods.
+    Args:
+        processed_attachments_list: List of processed attachment dictionaries
+        validate_attachment_func: Function to validate individual attachments
+                                 Signature: validate_attachment_func(attachment_with_context) -> AttachmentProcessingInfo | None
+    Returns:
+        List of validated AttachmentProcessingInfo objects
+    """
+    validated_attachments = []
+    for processed_attachment in processed_attachments_list:
+        attachment_data = processed_attachment["attachment"]
+        # Add context information to the attachment data
+        attachment_with_context = attachment_data.copy()
+        attachment_with_context["_context"] = processed_attachment["context"]
+        # Validate and convert to AttachmentProcessingInfo
+        if validate_attachment_func is not None:
+            validated = validate_attachment_func(attachment_with_context)
+            if validated:
+                validated_attachments.append(validated)
+    return validated_attachments

fides/api/task/manual/manual_task_graph_task.py CHANGED Viewed

@@ -33,7 +33,6 @@ from fides.api.task.manual.manual_task_utils import (
 )
 from fides.api.task.task_resources import TaskResources
 from fides.api.util.collection_util import Row
-from fides.api.util.storage_util import format_size
 class ManualTaskGraphTask(GraphTask):
@@ -398,9 +397,9 @@ class ManualTaskGraphTask(GraphTask):
     def _process_attachment_field(
         self, submission: ManualTaskSubmission
-    ) -> Optional[dict[str, dict[str, Any]]]:
-        """Process attachment field and return attachment map or None."""
-        attachment_map: dict[str, dict[str, Any]] = {}
+    ) -> Optional[list[dict[str, Any]]]:
+        """Process attachment field and return attachment list or None."""
+        attachment_list: list[dict[str, Any]] = []
         for attachment in filter(
             lambda a: a.attachment_type == AttachmentType.include_with_access_package,
@@ -408,15 +407,18 @@ class ManualTaskGraphTask(GraphTask):
         ):
             try:
                 size, url = attachment.retrieve_attachment()
-                attachment_map[attachment.file_name] = {
-                    "url": str(url) if url else None,
-                    "size": (format_size(size) if size else "Unknown"),
-                }
+                attachment_list.append(
+                    {
+                        "file_name": attachment.file_name,
+                        "download_url": str(url) if url else None,
+                        "file_size": size,
+                    }
+                )
             except Exception as exc:  # pylint: disable=broad-exception-caught
                 logger.warning(
                     f"Error retrieving attachment {attachment.file_name}: {str(exc)}"
                 )
-        return attachment_map or None
+        return attachment_list or None
     def _cleanup_manual_task_instances(
         self, manual_task: ManualTask, privacy_request: PrivacyRequest

fides/config/security_settings.py CHANGED Viewed

@@ -101,10 +101,6 @@ class SecuritySettings(FidesSettings):
         default="1000/minute",
         description="The number of requests from a single IP address allowed to hit an endpoint within a rolling 60 second period.",
     )
-    auth_rate_limit: str = Field(
-        default="10/minute",
-        description="The number of authentication requests from a single IP address allowed to hit authentication endpoints (login, OAuth token) within the specified time period.",
-    )
     root_user_scopes: List[str] = Field(
         default=SCOPE_REGISTRY,
         description="The list of scopes that are given to the root user.",
@@ -217,13 +213,13 @@ class SecuritySettings(FidesSettings):
         oauth_root_client_secret_hash = (hashed_client_id, salt.encode(encoding))  # type: ignore
         return oauth_root_client_secret_hash
-    @field_validator("request_rate_limit", "auth_rate_limit")
+    @field_validator("request_rate_limit")
     @classmethod
-    def validate_rate_limits(
+    def validate_request_rate_limit(
         cls,
         v: str,
     ) -> str:
-        """Validate the formatting of rate limit fields"""
+        """Validate the formatting of `request_rate_limit`"""
         try:
             # Defer to `limits.parse_many` https://limits.readthedocs.io/en/stable/api.html#limits.parse_many
             parse_many(v)

fides/ui-build/static/admin/404.html CHANGED Viewed

	@@ -1 +1 @@
1	- <!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link data-next-font="" rel="preconnect" href="/" crossorigin="anonymous"/><link rel="preload" href="/_next/static/css/650df9c348000a26.css" as="style"/><link rel="stylesheet" href="/_next/static/css/650df9c348000a26.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-42372ed130431b0a.js"></script><script src="/_next/static/chunks/webpack-678e89d68dbcd94f.js" defer=""></script><script src="/_next/static/chunks/framework-c92fc3344e6fd165.js" defer=""></script><script src="/_next/static/chunks/main-090643377c8254e6.js" defer=""></script><script src="/_next/static/chunks/pages/_app-ef8e1c986bc5b795.js" defer=""></script><script src="/_next/static/chunks/pages/404-471a6b18e712f050.js" defer=""></script><script src="/_next/static/~~_uK_VdayMLILH3d3WqL9g~~/_buildManifest.js" defer=""></script><script src="/_next/static/~~_uK_VdayMLILH3d3WqL9g~~/_ssgManifest.js" defer=""></script><style>.data-ant-cssinjs-cache-path{content:"";}</style></head><body><div id="__next"><div style="height:100%;display:flex"></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/404","query":{},"buildId":"~~_uK_VdayMLILH3d3WqL9g~~","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1	+ <!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link data-next-font="" rel="preconnect" href="/" crossorigin="anonymous"/><link rel="preload" href="/_next/static/css/650df9c348000a26.css" as="style"/><link rel="stylesheet" href="/_next/static/css/650df9c348000a26.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-42372ed130431b0a.js"></script><script src="/_next/static/chunks/webpack-678e89d68dbcd94f.js" defer=""></script><script src="/_next/static/chunks/framework-c92fc3344e6fd165.js" defer=""></script><script src="/_next/static/chunks/main-090643377c8254e6.js" defer=""></script><script src="/_next/static/chunks/pages/_app-ef8e1c986bc5b795.js" defer=""></script><script src="/_next/static/chunks/pages/404-471a6b18e712f050.js" defer=""></script><script src="/_next/static/8qfO1Ol3G3QbcXpHAnPlU/_buildManifest.js" defer=""></script><script src="/_next/static/8qfO1Ol3G3QbcXpHAnPlU/_ssgManifest.js" defer=""></script><style>.data-ant-cssinjs-cache-path{content:"";}</style></head><body><div id="__next"><div style="height:100%;display:flex"></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/404","query":{},"buildId":"8qfO1Ol3G3QbcXpHAnPlU","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>

ethyca-fides 2.69.0rc8__py2.py3-none-any.whl → 2.69.0rc10__py2.py3-none-any.whl

Potentially problematic release.

ethyca-fides 2.69.0rc8py2.py3-none-any.whl → 2.69.0rc10py2.py3-none-any.whl