PyPI - ethyca-fides - Versions diffs - 2.63.1b0__py2.py3-none-any.whl → 2.63.1b1__py2.py3-none-any.whl - Mend

ethyca-fides 2.63.1b0py2.py3-none-any.whl → 2.63.1b1py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

fides/api/service/privacy_request/attachment_handling.py ADDED Viewed

@@ -0,0 +1,132 @@
+import time as time_module
+from dataclasses import dataclass
+from typing import Any, Dict, Iterator, List, Optional, Tuple
+from loguru import logger
+from fides.api.models.attachment import Attachment, AttachmentType
+from fides.api.schemas.storage.storage import StorageDetails
+@dataclass
+class AttachmentData:
+    """Data structure for attachment metadata and content.
+    Using a dataclass rather than a Pydantic model here for the following reasons:
+    - The data structure is simple and doesn't need complex validation.
+    - The fields being used have already been validated and are properly typed.
+    - The class is used internally for data transfer, not for API serialization.
+    - Performance is important since this is used in a data processing pipeline.
+    """
+    file_name: str
+    file_size: Optional[int]
+    download_url: Optional[str]
+    content_type: str
+    bucket_name: str
+    file_key: str
+    storage_key: str
+    def to_upload_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for upload, including presigned URL."""
+        return {
+            "file_name": self.file_name,
+            "file_size": self.file_size,
+            "download_url": self.download_url,
+            "content_type": self.content_type,
+        }
+    def to_storage_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for storage, including the elements needed to recreated the presigned URL."""
+        return {
+            "file_name": self.file_name,
+            "file_size": self.file_size,
+            "content_type": self.content_type,
+            "bucket_name": self.bucket_name,
+            "file_key": self.file_key,
+            "storage_key": self.storage_key,
+        }
+def get_attachments_content(
+    loaded_attachments: List[Attachment],
+) -> Iterator[AttachmentData]:
+    """
+    Retrieves all attachments associated with a privacy request that are marked to be included with the access package.
+    Yields AttachmentData objects containing attachment metadata and download urls.
+    Uses generators to minimize memory usage.
+    Args:
+        loaded_attachments: List of Attachment objects to process
+    Yields:
+        AttachmentData object containing attachment metadata and url
+    """
+    start_time = time_module.time()
+    processed_count = 0
+    skipped_count = 0
+    error_count = 0
+    total_size = 0
+    for attachment in loaded_attachments:
+        if attachment.attachment_type != AttachmentType.include_with_access_package:
+            skipped_count += 1
+            continue
+        try:
+            # Get size and download URL using retrieve_attachment
+            size, url = attachment.retrieve_attachment()
+            total_size += size if size else 0
+            if url is None:
+                logger.warning(
+                    "No download URL retrieved for attachment {}", attachment.file_name
+                )
+                skipped_count += 1
+                continue
+            processed_count += 1
+            yield AttachmentData(
+                file_name=attachment.file_name,
+                file_size=size,
+                download_url=str(url) if url else None,
+                content_type=attachment.content_type,
+                bucket_name=attachment.config.details[StorageDetails.BUCKET.value],
+                file_key=attachment.file_key,
+                storage_key=attachment.storage_key,
+            )
+        except Exception as e:
+            error_count += 1
+            logger.error(
+                "Error processing attachment {}: {}", attachment.file_name, str(e)
+            )
+            continue
+    # Log final metrics
+    time_taken = time_module.time() - start_time
+    logger.bind(
+        time_to_process=time_taken,
+        total_attachments=len(loaded_attachments),
+        processed_attachments=processed_count,
+        skipped_attachments=skipped_count,
+        error_attachments=error_count,
+        total_size_bytes=total_size,
+    ).info("Attachment processing complete")
+def process_attachments_for_upload(
+    attachments: Iterator[AttachmentData],
+) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
+    """
+    Process attachments into separate upload and storage formats.
+    Returns both formats:
+    - upload_attachments: Used for uploading to access packages
+    - storage_attachments: Used for saving filtered access results
+    """
+    upload_attachments = []
+    storage_attachments = []
+    for attachment in attachments:
+        storage_attachments.append(attachment.to_storage_dict())
+        upload_attachments.append(attachment.to_upload_dict())
+    return upload_attachments, storage_attachments

fides/api/service/privacy_request/dsr_package/dsr_report_builder.py CHANGED Viewed

@@ -1,13 +1,15 @@
 import json
 import os
+import time as time_module
 import zipfile
 from collections import defaultdict
 from io import BytesIO
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Optional
 import jinja2
 from jinja2 import Environment, FileSystemLoader
+from loguru import logger
 from fides.api.models.privacy_request import PrivacyRequest
 from fides.api.schemas.policy import ActionType
@@ -22,54 +24,79 @@ BORDER_COLOR = "#E2E8F0"
 # pylint: disable=too-many-instance-attributes
 class DsrReportBuilder:
+    """
+    Manages populating HTML templates from the given data and adding the generated
+    pages to a zip file in a way that the pages can be navigated between.
+    The zip file is structured as follows:
+    - welcome.html: the main index page
+    - data/dataset_name/index.html: the index page for the dataset
+    - data/dataset_name/collection_name/index.html: the index page for the collection
+    - data/dataset_name/collection_name/item_index.html: the detail page for the item
+    - attachments/index.html: the index page for the attachments
+    Args:
+        privacy_request: the privacy request object
+        dsr_data: the DSR data
+    """
     def __init__(
         self,
         privacy_request: PrivacyRequest,
-        dsr_data: Dict[str, Any],
+        dsr_data: dict[str, Any],
     ):
         """
-        Manages populating HTML templates from the given data and adding the generated
-        pages to a zip file in a way that the pages can be navigated between.
+        Initializes the DSR report builder.
         """
+        # Define pretty_print function for Jinja templates
+        jinja2.filters.FILTERS["pretty_print"] = lambda value, indent=4: json.dumps(
+            value, indent=indent, cls=StorageJSONEncoder
+        )
-        # zip file variables
+        # Initialize instance zip file variables
         self.baos = BytesIO()
         # we close this in the finally block of generate()
         # pylint: disable=consider-using-with
         self.out = zipfile.ZipFile(self.baos, "w")
-        # Jinja template environment initialization
-        def pretty_print(value: str, indent: int = 4) -> str:
-            return json.dumps(
-                value, indent=indent, default=StorageJSONEncoder().default
-            )
-        jinja2.filters.FILTERS["pretty_print"] = pretty_print
         self.template_loader = Environment(
             loader=FileSystemLoader(DSR_DIRECTORY), autoescape=True
         )
         # to pass in custom colors in the future
-        self.template_data: Dict[str, Any] = {
+        self.template_data: dict[str, Any] = {
             "text_color": TEXT_COLOR,
             "header_color": HEADER_COLOR,
             "border_color": BORDER_COLOR,
         }
-        self.main_links: Dict[str, Any] = {}  # used to track the generated pages
+        self.main_links: dict[str, Any] = {}  # used to track the generated pages
         # report data to populate the templates
         self.request_data = _map_privacy_request(privacy_request)
         self.dsr_data = dsr_data
+        # Track used filenames across all attachments
+        self.used_filenames: set[str] = set()
     def _populate_template(
         self,
         template_path: str,
         heading: Optional[str] = None,
         description: Optional[str] = None,
-        data: Optional[Dict[str, Any]] = None,
+        data: Optional[dict[str, Any]] = None,
     ) -> str:
-        """Generates a file from the template and data"""
+        """
+        Populates the template with the given data.
+        Args:
+            template_path: the path to the template to populate
+            heading: the heading to display on the template
+            description: the description to display on the template
+            data: the data to populate the template with
+        Returns:
+            The rendered template as a string.
+        """
         report_data = {
             "heading": heading,
             "description": description,
@@ -82,14 +109,24 @@ class DsrReportBuilder:
         return rendered_template
     def _add_file(self, filename: str, contents: str) -> None:
-        """Helper to add a file to the zip archive"""
+        """
+        Adds a file to the zip file.
+        Args:
+            filename: the name of the file to add
+            contents: the contents of the file to add
+        """
         if filename and contents:
             self.out.writestr(f"{filename}", contents.encode("utf-8"))
-    def _add_dataset(self, dataset_name: str, collections: Dict[str, Any]) -> None:
+    def _add_dataset(self, dataset_name: str, collections: dict[str, Any]) -> None:
         """
         Generates a page for each collection in the dataset and an index page for the dataset.
         Tracks the generated links to build a root level index after each collection has been processed.
+        Args:
+            dataset_name: the name of the dataset to add
+            collections: the collections to add to the dataset
         """
         # track links to collection indexes
         collection_links = {}
@@ -109,40 +146,203 @@ class DsrReportBuilder:
             ),
         )
+    def _get_unique_filename(self, filename: str) -> str:
+        """
+        Generates a unique filename by appending a counter if the file already exists.
+        Now tracks filenames across all directories to ensure global uniqueness.
+        Args:
+            filename: The original filename
+        Returns:
+            A unique filename that won't conflict with existing files
+        """
+        base_name, extension = os.path.splitext(filename)
+        counter = 1
+        unique_filename = filename
+        # Check if file exists in used_filenames set
+        while unique_filename in self.used_filenames:
+            unique_filename = f"{base_name}_{counter}{extension}"
+            counter += 1
+        # Add the new filename to the set
+        self.used_filenames.add(unique_filename)
+        return unique_filename
+    def _write_attachment_content(
+        self,
+        attachments: list[dict[str, Any]],
+        directory: str,
+    ) -> dict[str, dict[str, str]]:
+        """
+        Processes attachments and returns a dictionary mapping filenames to their download URLs and sizes.
+        Args:
+            attachments: The attachments to process
+            directory: The directory path (unused for presigned URLs)
+        Returns:
+            Dictionary mapping filenames to dictionaries containing url and size
+        """
+        # First process all attachments into a list of tuples (filename, data)
+        processed_attachments = []
+        for attachment in attachments:
+            if not isinstance(attachment, dict):
+                continue
+            file_name = attachment.get("file_name")
+            if not file_name:
+                logger.warning("Skipping attachment with no file name")
+                continue
+            download_url = attachment.get("download_url")
+            if not download_url:
+                logger.warning("Skipping attachment with no download URL")
+                continue
+            file_size = attachment.get("file_size")
+            if isinstance(file_size, (int, float)):
+                file_size = self._format_size(float(file_size))
+            else:
+                file_size = "Unknown"
+            # Get a unique filename to prevent duplicates
+            unique_filename = self._get_unique_filename(file_name)
+            # Add to processed attachments
+            processed_attachments.append(
+                (unique_filename, {"url": download_url, "size": file_size})
+            )
+        # Convert list of tuples to dictionary
+        return dict(processed_attachments)
     def _add_collection(
-        self, rows: List[Dict[str, Any]], dataset_name: str, collection_name: str
+        self, rows: list[dict[str, Any]], dataset_name: str, collection_name: str
     ) -> None:
-        # track links to detail pages
-        detail_links = {}
-        for index, item in enumerate(rows, 1):
-            detail_url = f"{index}.html"
-            self._add_file(
-                f"data/{dataset_name}/{collection_name}/{index}.html",
-                self._populate_template(
-                    "templates/item.html",
-                    f"{collection_name} (item #{index})",
-                    None,
-                    item,
-                ),
+        """
+        Adds a collection to the zip file.
+        Args:
+            rows: the rows to add to the collection
+            dataset_name: the name of the dataset to add the collection to
+            collection_name: the name of the collection to add
+        """
+        items_content = []
+        for index, collection_item in enumerate(rows, 1):
+            # Create a copy of the item data to avoid modifying the original
+            item_data = collection_item.copy()
+            # Process any attachments in the item
+            if "attachments" in item_data and isinstance(
+                item_data["attachments"], list
+            ):
+                # Process attachments and get their URLs
+                attachment_links = self._write_attachment_content(
+                    item_data["attachments"],
+                    f"data/{dataset_name}/{collection_name}",
+                )
+                # Add the attachment URLs to the item data
+                item_data["attachments"] = attachment_links
+            # Add item content to the list
+            items_content.append(
+                {
+                    "index": index,
+                    "heading": f"{collection_name} (item #{index})",
+                    "data": item_data,
+                }
             )
-            detail_links[f"item #{index}"] = detail_url
-        # generate detail index page
+        # Generate the collection index page
         self._add_file(
             f"data/{dataset_name}/{collection_name}/index.html",
             self._populate_template(
                 "templates/collection_index.html",
                 collection_name,
                 None,
-                detail_links,
+                {"collection_items": items_content},
+            ),
+        )
+    def _add_attachments(self, attachments: list[dict[str, Any]]) -> None:
+        """
+        Adds top-level attachments to the zip file.
+        Args:
+            attachments: the attachments to add
+        """
+        if not attachments or not isinstance(attachments, list):
+            return
+        # Process attachments and get the links
+        attachment_links = self._write_attachment_content(attachments, "attachments")
+        # Generate attachments index page using the attachments index template
+        self._add_file(
+            "attachments/index.html",
+            self._populate_template(
+                "templates/attachments_index.html",
+                "Attachments",
+                "Files attached to this privacy request",
+                attachment_links,
             ),
         )
+    def _get_datasets_from_dsr_data(self) -> dict[str, Any]:
+        """
+        Returns the datasets from the DSR data.
+        """
+        # pre-process data to split the dataset:collection keys
+        datasets: dict[str, Any] = defaultdict(lambda: defaultdict(list))
+        for key, rows in self.dsr_data.items():
+            # we handle attachments separately
+            if key == "attachments":
+                continue
+            parts = key.split(":", 1)
+            if len(parts) > 1:
+                dataset_name, collection_name = parts
+            else:
+                for row in rows:
+                    if "system_name" in row:
+                        dataset_name = row["system_name"]
+                        collection_name = parts[0]
+                        break
+                else:
+                    dataset_name = "manual"
+                    collection_name = parts[0]
+            datasets[dataset_name][collection_name].extend(rows)
+        return datasets
+    def _format_size(self, size_bytes: float) -> str:
+        """
+        Format size in bytes to human readable format.
+        Args:
+            size_bytes: Size in bytes
+        Returns:
+            Formatted string with appropriate unit (B, KB, MB, GB)
+        """
+        for unit in ["B", "KB", "MB", "GB"]:
+            if size_bytes < 1024.0:
+                return f"{size_bytes:.1f} {unit}"
+            size_bytes /= 1024.0
+        return f"{size_bytes:.1f} TB"
     def generate(self) -> BytesIO:
         """
         Processes the request and DSR data to build zip file containing the DSR report.
         Returns the zip file as an in-memory byte array.
         """
+        start_time = time_module.time()
         try:
             # all the css for the pages is in main.css
             self._add_file(
@@ -157,18 +357,28 @@ class DsrReportBuilder:
             )
             # pre-process data to split the dataset:collection keys
-            datasets: Dict[str, Any] = defaultdict(lambda: defaultdict(list))
-            for key, rows in self.dsr_data.items():
-                parts = key.split(":", 1)
-                dataset_name, collection_name = (
-                    parts if len(parts) > 1 else ("manual", parts[0])
-                )
-                datasets[dataset_name][collection_name].extend(rows)
+            datasets: dict[str, Any] = self._get_datasets_from_dsr_data()
-            for dataset_name, collections in datasets.items():
-                self._add_dataset(dataset_name, collections)
+            # Sort datasets alphabetically, excluding special cases
+            regular_datasets = [
+                name for name in sorted(datasets.keys()) if name != "dataset"
+            ]  # pylint: disable=invalid-name
+            # Add regular datasets in alphabetical order
+            for dataset_name in regular_datasets:
+                self._add_dataset(dataset_name, datasets[dataset_name])
                 self.main_links[dataset_name] = f"data/{dataset_name}/index.html"
+            # Add Additional Data if it exists
+            if "dataset" in datasets:
+                self._add_dataset("dataset", datasets["dataset"])
+                self.main_links["Additional Data"] = "data/dataset/index.html"
+            # Add Additional Attachments last if it exists
+            if "attachments" in self.dsr_data:
+                self._add_attachments(self.dsr_data["attachments"])
+                self.main_links["Additional Attachments"] = "attachments/index.html"
             # create the main index once all the datasets have been added
             self._add_file(
                 "welcome.html",
@@ -182,12 +392,20 @@ class DsrReportBuilder:
         # reset the file pointer so the file can be fully read by the caller
         self.baos.seek(0)
+        # Calculate time taken and file size
+        time_taken = time_module.time() - start_time
+        file_size = self._format_size(float(len(self.baos.getvalue())))
+        logger.bind(time_to_generate=time_taken, dsr_package_size=file_size).info(
+            "DSR report generation complete."
+        )
         return self.baos
-def _map_privacy_request(privacy_request: PrivacyRequest) -> Dict[str, Any]:
+def _map_privacy_request(privacy_request: PrivacyRequest) -> dict[str, Any]:
     """Creates a map with a subset of values from the privacy request"""
-    request_data: Dict[str, Any] = {}
+    request_data: dict[str, Any] = {}
     request_data["id"] = privacy_request.id
     action_type: Optional[ActionType] = privacy_request.policy.get_action_type()

fides/api/service/privacy_request/dsr_package/templates/attachments_index.html ADDED Viewed

@@ -0,0 +1,33 @@
+<html>
+   <head>
+      <link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600">
+      <link rel="stylesheet" href="../data/main.css">
+   </head>
+   <body>
+      <div class="container">
+         <div class="header"></div>
+         <div class="content">
+            <div class="button-container">
+               <a href="../welcome.html">
+                  <div class="button"><img src="../data/back.svg"></div>
+                  <span>Back to main page</span>
+               </a>
+            </div>
+            <h1>Attachments</h1>
+            <p class="expiration-notice">Note: All download links will expire in 7 days.</p>
+            <div class="table table-hover">
+               <div class="table-row">
+                  <div class="table-cell" style="text-align: left;">File Name</div>
+                  <div class="table-cell" style="text-align: left;">Size</div>
+               </div>
+               {% for name, info in data.items() %}
+               <a href="{{ info.url }}" class="table-row" target="_blank">
+                  <div class="table-cell" style="text-align: left;">{{ name }}</div>
+                  <div class="table-cell" style="text-align: left;">{{ info.size }}</div>
+               </a>
+               {% endfor %}
+            </div>
+         </div>
+      </div>
+   </body>
+</html>

fides/api/service/privacy_request/dsr_package/templates/collection_index.html CHANGED Viewed

@@ -14,17 +14,42 @@
                </a>
             </div>
             <h1>{{ heading }}</h1>
-            <div class="table table-hover">
-               <div class="table-row">
-                  <div class="table-cell">Items</div>
+            {% for item in data.collection_items %}
+            <div class="item-section">
+               <h2>{{ item.heading }}</h2>
+               <div class="table">
+                  <div class="table-row">
+                     <div class="table-cell">Field</div>
+                     <div class="table-cell">Value</div>
+                  </div>
+                  {% for field, value in item.data.items() %}
+                  <div class="table-row">
+                     <div class="table-cell">{{ field }}</div>
+                     <div class="table-cell">
+                        {% if field == "attachments" and value is mapping and value|length > 0 %}
+                           <p class="expiration-notice">Note: All download links will expire in 7 days.</p>
+                           <div class="table table-hover">
+                              <div class="table-row">
+                                 <div class="table-cell" style="text-align: left;">File Name</div>
+                                 <div class="table-cell" style="text-align: left;">Size</div>
+                              </div>
+                              {% for attachment_name, attachment_info in value.items() %}
+                              <a href="{{ attachment_info.url }}" class="table-row" target="_blank">
+                                 <div class="table-cell" style="text-align: left;">{{ attachment_name }}</div>
+                                 <div class="table-cell" style="text-align: left;">{{ attachment_info.size }}</div>
+                              </a>
+                              {% endfor %}
+                           </div>
+                        {% else %}
+                           <pre>{{ value | pretty_print }}</pre>
+                        {% endif %}
+                     </div>
+                  </div>
+                  {% endfor %}
                </div>
-               {% for name, link in data.items() %}
-               <a href="{{ link }}" class="table-row">
-                  <div class="table-cell">{{ name }}</div>
-               </a>
-               {% endfor %}
             </div>
+            {% endfor %}
          </div>
       </div>
    </body>
-</html>
+</html>

ethyca-fides 2.63.1b0__py2.py3-none-any.whl → 2.63.1b1__py2.py3-none-any.whl

ethyca-fides 2.63.1b0py2.py3-none-any.whl → 2.63.1b1py2.py3-none-any.whl